@sable-ai/sdk-core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -0
- package/dist/esm/index.js +2431 -0
- package/dist/sable.iife.js +1486 -0
- package/dist/types/browser-bridge/actions.d.ts +27 -0
- package/dist/types/browser-bridge/dom-state.d.ts +37 -0
- package/dist/types/browser-bridge/index.d.ts +19 -0
- package/dist/types/connection/index.d.ts +26 -0
- package/dist/types/events/index.d.ts +15 -0
- package/dist/types/global.d.ts +26 -0
- package/dist/types/index.d.ts +23 -0
- package/dist/types/rpc.d.ts +22 -0
- package/dist/types/runtime/clipboard.d.ts +14 -0
- package/dist/types/runtime/index.d.ts +36 -0
- package/dist/types/runtime/video-overlay.d.ts +14 -0
- package/dist/types/session/debug-panel.d.ts +29 -0
- package/dist/types/session/index.d.ts +41 -0
- package/dist/types/types/index.d.ts +131 -0
- package/dist/types/version.d.ts +7 -0
- package/dist/types/vision/frame-source.d.ts +34 -0
- package/dist/types/vision/index.d.ts +29 -0
- package/dist/types/vision/publisher.d.ts +44 -0
- package/dist/types/vision/wireframe.d.ts +22 -0
- package/package.json +61 -0
- package/src/assets/visible-dom.js.txt +764 -0
- package/src/assets/wireframe.js.txt +678 -0
- package/src/assets.d.ts +24 -0
- package/src/browser-bridge/actions.ts +161 -0
- package/src/browser-bridge/dom-state.ts +103 -0
- package/src/browser-bridge/index.ts +99 -0
- package/src/connection/index.ts +49 -0
- package/src/events/index.ts +50 -0
- package/src/global.ts +35 -0
- package/src/index.test.ts +6 -0
- package/src/index.ts +43 -0
- package/src/rpc.ts +31 -0
- package/src/runtime/clipboard.ts +47 -0
- package/src/runtime/index.ts +138 -0
- package/src/runtime/video-overlay.ts +94 -0
- package/src/session/debug-panel.ts +254 -0
- package/src/session/index.ts +375 -0
- package/src/types/index.ts +176 -0
- package/src/version.ts +8 -0
- package/src/vision/frame-source.ts +111 -0
- package/src/vision/index.ts +70 -0
- package/src/vision/publisher.ts +106 -0
- package/src/vision/wireframe.ts +43 -0
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session lifecycle.
|
|
3
|
+
*
|
|
4
|
+
* `Session` is the glue layer: it fetches connection details, dynamically
|
|
5
|
+
* imports `livekit-client`, connects, publishes the mic, registers the
|
|
6
|
+
* runtime + browser-bridge RPCs, and — if vision is enabled — starts the
|
|
7
|
+
* frame source + video publisher and mounts the debug panel. `start()`
|
|
8
|
+
* returns once the room is live and mic is publishing; events are emitted
|
|
9
|
+
* via the `SableEventEmitter`.
|
|
10
|
+
*
|
|
11
|
+
* Only one session is allowed at a time. `start()` throws if a session is
|
|
12
|
+
* already active; callers must `stop()` first. `stop()` is idempotent.
|
|
13
|
+
*
|
|
14
|
+
* `livekit-client` is imported dynamically (not statically) so the IIFE
|
|
15
|
+
* entry bundle stays small — the heavy client only loads when a customer
|
|
16
|
+
* actually calls `start()`.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import {
|
|
20
|
+
fetchConnectionDetails,
|
|
21
|
+
DEFAULT_API_URL,
|
|
22
|
+
} from "../connection";
|
|
23
|
+
import { SableEventEmitter } from "../events";
|
|
24
|
+
import { installRuntime } from "../runtime";
|
|
25
|
+
import { registerBrowserHandlers } from "../browser-bridge";
|
|
26
|
+
import {
|
|
27
|
+
startVision,
|
|
28
|
+
type LiveKitPublishLib,
|
|
29
|
+
type VisionHandle,
|
|
30
|
+
} from "../vision";
|
|
31
|
+
import type {
|
|
32
|
+
SableAPI,
|
|
33
|
+
SableEventHandler,
|
|
34
|
+
SableEvents,
|
|
35
|
+
StartOptions,
|
|
36
|
+
} from "../types";
|
|
37
|
+
import { VERSION } from "../version";
|
|
38
|
+
import { mountDebugPanel, shouldShowDebugPanel } from "./debug-panel";
|
|
39
|
+
|
|
40
|
+
// ── livekit-client structural types ───────────────────────────────────────
|
|
41
|
+
//
|
|
42
|
+
// The client is dynamically imported so we can't use its types at the top
|
|
43
|
+
// level. These mirror only the subset we call.
|
|
44
|
+
|
|
45
|
+
interface LiveKitRoom {
|
|
46
|
+
connect(url: string, token: string): Promise<unknown>;
|
|
47
|
+
disconnect(): Promise<void>;
|
|
48
|
+
on(event: string, handler: (...args: unknown[]) => void): unknown;
|
|
49
|
+
registerRpcMethod(
|
|
50
|
+
method: string,
|
|
51
|
+
handler: (data: { payload: string }) => Promise<string>,
|
|
52
|
+
): void;
|
|
53
|
+
localParticipant: {
|
|
54
|
+
identity?: string;
|
|
55
|
+
setMicrophoneEnabled(enabled: boolean): Promise<unknown>;
|
|
56
|
+
performRpc(opts: {
|
|
57
|
+
destinationIdentity: string;
|
|
58
|
+
method: string;
|
|
59
|
+
payload: string;
|
|
60
|
+
}): Promise<string>;
|
|
61
|
+
publishTrack(
|
|
62
|
+
track: unknown,
|
|
63
|
+
options?: { source?: unknown; name?: string },
|
|
64
|
+
): Promise<{ trackSid?: string }>;
|
|
65
|
+
unpublishTrack(track: unknown, stopOnUnpublish?: boolean): Promise<unknown>;
|
|
66
|
+
};
|
|
67
|
+
remoteParticipants?: Map<
|
|
68
|
+
string,
|
|
69
|
+
{ identity?: string; trackPublications?: Map<string, unknown> }
|
|
70
|
+
>;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// ── Agent handshake ───────────────────────────────────────────────────────
|
|
74
|
+
//
|
|
75
|
+
// Agents emit `agentReady` after joining and wait for a `uiReady` reply
|
|
76
|
+
// before generating their greeting. If we don't reply within ~10s the agent
|
|
77
|
+
// gives up and never publishes audio.
|
|
78
|
+
// Reference: parley/src/features/agent/hooks/useAgentConnection.ts.
|
|
79
|
+
|
|
80
|
+
const UI_READY_RETRY_ATTEMPTS = 5;
|
|
81
|
+
const UI_READY_RETRY_DELAY_MS = 500;
|
|
82
|
+
|
|
83
|
+
function findAgentIdentity(room: LiveKitRoom): string | null {
|
|
84
|
+
const remotes = room.remoteParticipants
|
|
85
|
+
? Array.from(room.remoteParticipants.values())
|
|
86
|
+
: [];
|
|
87
|
+
const agent = remotes.find(
|
|
88
|
+
(p) => typeof p.identity === "string" && p.identity.startsWith("agent"),
|
|
89
|
+
);
|
|
90
|
+
return agent?.identity ?? remotes[0]?.identity ?? null;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
async function sendUiReady(room: LiveKitRoom): Promise<void> {
|
|
94
|
+
for (let attempt = 1; attempt <= UI_READY_RETRY_ATTEMPTS; attempt++) {
|
|
95
|
+
const identity = findAgentIdentity(room);
|
|
96
|
+
if (!identity) {
|
|
97
|
+
console.warn("[Sable] sendUiReady: no agent participant yet", { attempt });
|
|
98
|
+
await new Promise((r) => setTimeout(r, UI_READY_RETRY_DELAY_MS));
|
|
99
|
+
continue;
|
|
100
|
+
}
|
|
101
|
+
try {
|
|
102
|
+
await room.localParticipant.performRpc({
|
|
103
|
+
destinationIdentity: identity,
|
|
104
|
+
method: "uiReady",
|
|
105
|
+
payload: JSON.stringify({ timestamp: Date.now() }),
|
|
106
|
+
});
|
|
107
|
+
console.log("[Sable] uiReady sent", { identity, attempt });
|
|
108
|
+
return;
|
|
109
|
+
} catch (err) {
|
|
110
|
+
console.warn("[Sable] uiReady RPC failed", { attempt, err });
|
|
111
|
+
await new Promise((r) => setTimeout(r, UI_READY_RETRY_DELAY_MS));
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
console.error("[Sable] uiReady: exhausted retries — agent will not greet");
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// ── Session class ─────────────────────────────────────────────────────────
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* One active session at a time. The class is internal — customers interact
|
|
121
|
+
* with the `SableAPI` singleton installed on `window.Sable` (see `global.ts`).
|
|
122
|
+
* Keeping a class here (rather than a bag of module-level vars) makes the
|
|
123
|
+
* state ownership explicit and the teardown path easier to reason about.
|
|
124
|
+
*/
|
|
125
|
+
export class Session implements SableAPI {
|
|
126
|
+
readonly version = VERSION;
|
|
127
|
+
private readonly emitter = new SableEventEmitter();
|
|
128
|
+
private activeRoom: LiveKitRoom | null = null;
|
|
129
|
+
private visionHandle: VisionHandle | null = null;
|
|
130
|
+
private unmountDebugPanel: (() => void) | null = null;
|
|
131
|
+
|
|
132
|
+
on<E extends keyof SableEvents>(
|
|
133
|
+
event: E,
|
|
134
|
+
handler: SableEventHandler<E>,
|
|
135
|
+
): () => void {
|
|
136
|
+
return this.emitter.on(event, handler);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
async start(opts: StartOptions): Promise<void> {
|
|
140
|
+
if (this.activeRoom) {
|
|
141
|
+
throw new Error("Sable already started; call stop() first");
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Public key resolution. `publicKey` wins when both are passed, but
|
|
145
|
+
// `agentPublicId` remains supported during beta so customers upgrading
|
|
146
|
+
// from an earlier build don't have to rename the field the same day
|
|
147
|
+
// they update the package.
|
|
148
|
+
const publicKey = opts.publicKey ?? opts.agentPublicId;
|
|
149
|
+
if (!publicKey) {
|
|
150
|
+
throw new Error("Sable.start: `publicKey` is required");
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const apiUrl = opts.apiUrl ?? DEFAULT_API_URL;
|
|
154
|
+
console.log("[Sable] fetching connection details", { apiUrl });
|
|
155
|
+
const details = await fetchConnectionDetails({ apiUrl, publicKey });
|
|
156
|
+
console.log("[Sable] connection details received", {
|
|
157
|
+
roomName: details.roomName,
|
|
158
|
+
participantName: details.participantName,
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
// Dynamic import keeps the IIFE entry small; livekit-client is ~200KB
|
|
162
|
+
// minified and only needed once a session actually starts.
|
|
163
|
+
const livekit = await import("livekit-client");
|
|
164
|
+
const { Room, RoomEvent, LocalVideoTrack, Track } = livekit;
|
|
165
|
+
const room = new Room() as unknown as LiveKitRoom;
|
|
166
|
+
|
|
167
|
+
const publishLib: LiveKitPublishLib = {
|
|
168
|
+
LocalVideoTrack: LocalVideoTrack as unknown as LiveKitPublishLib["LocalVideoTrack"],
|
|
169
|
+
Track: Track as unknown as LiveKitPublishLib["Track"],
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
// Handshake handler MUST be registered before room.connect() so it's
|
|
173
|
+
// ready when the first RPC arrives.
|
|
174
|
+
room.registerRpcMethod("agentReady", async () => {
|
|
175
|
+
console.log("[Sable] RPC agentReady received");
|
|
176
|
+
void sendUiReady(room);
|
|
177
|
+
return JSON.stringify({ success: true });
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
// Browser bridge: 6 browser.* handlers. Safe to register even for
|
|
181
|
+
// voice-only sessions — the agent just won't call them.
|
|
182
|
+
registerBrowserHandlers(room);
|
|
183
|
+
|
|
184
|
+
// Runtime: default methods (clipboard, switchView) + customer overrides
|
|
185
|
+
// and extensions passed via `opts.runtime`.
|
|
186
|
+
installRuntime(room, opts.runtime);
|
|
187
|
+
|
|
188
|
+
this.wireRoomEvents(room, RoomEvent);
|
|
189
|
+
|
|
190
|
+
await room.connect(details.serverUrl, details.participantToken);
|
|
191
|
+
await room.localParticipant.setMicrophoneEnabled(true);
|
|
192
|
+
|
|
193
|
+
this.activeRoom = room;
|
|
194
|
+
|
|
195
|
+
// Vision is off by default. Opt in with `vision: { enabled: true }`.
|
|
196
|
+
if (opts.vision?.enabled) {
|
|
197
|
+
try {
|
|
198
|
+
this.visionHandle = await startVision({
|
|
199
|
+
room: room as unknown as Parameters<typeof startVision>[0]["room"],
|
|
200
|
+
lib: publishLib,
|
|
201
|
+
options: opts.vision,
|
|
202
|
+
});
|
|
203
|
+
if (shouldShowDebugPanel(opts.debug)) {
|
|
204
|
+
this.unmountDebugPanel = mountDebugPanel(this.visionHandle.canvas);
|
|
205
|
+
}
|
|
206
|
+
} catch (e) {
|
|
207
|
+
console.warn("[Sable] failed to start vision", e);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
console.log("[Sable] session live", {
|
|
212
|
+
roomName: details.roomName,
|
|
213
|
+
participantName: details.participantName,
|
|
214
|
+
});
|
|
215
|
+
this.emitter.emit("session:started", {
|
|
216
|
+
roomName: details.roomName,
|
|
217
|
+
participantName: details.participantName,
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
// Watchdog: warn loudly if no remote audio track shows up within 10s.
|
|
221
|
+
setTimeout(() => {
|
|
222
|
+
if (this.activeRoom !== room) return;
|
|
223
|
+
const r = room as unknown as {
|
|
224
|
+
remoteParticipants?: Map<
|
|
225
|
+
string,
|
|
226
|
+
{
|
|
227
|
+
identity?: string;
|
|
228
|
+
trackPublications?: Map<
|
|
229
|
+
string,
|
|
230
|
+
{ kind?: string; isSubscribed?: boolean }
|
|
231
|
+
>;
|
|
232
|
+
}
|
|
233
|
+
>;
|
|
234
|
+
};
|
|
235
|
+
const remotes = r.remoteParticipants
|
|
236
|
+
? Array.from(r.remoteParticipants.values())
|
|
237
|
+
: [];
|
|
238
|
+
const summary = remotes.map((p) => ({
|
|
239
|
+
identity: p.identity,
|
|
240
|
+
tracks: p.trackPublications
|
|
241
|
+
? Array.from(p.trackPublications.values()).map((t) => ({
|
|
242
|
+
kind: t.kind,
|
|
243
|
+
subscribed: t.isSubscribed,
|
|
244
|
+
}))
|
|
245
|
+
: [],
|
|
246
|
+
}));
|
|
247
|
+
const anyAudio = summary.some((p) =>
|
|
248
|
+
p.tracks.some((t) => t.kind === "audio"),
|
|
249
|
+
);
|
|
250
|
+
if (!anyAudio) {
|
|
251
|
+
console.warn(
|
|
252
|
+
"[Sable] no remote audio track after 10s — agent worker probably failed to publish. Remote participants:",
|
|
253
|
+
summary,
|
|
254
|
+
);
|
|
255
|
+
}
|
|
256
|
+
}, 10000);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
async stop(): Promise<void> {
|
|
260
|
+
const room = this.activeRoom;
|
|
261
|
+
if (!room) return;
|
|
262
|
+
this.activeRoom = null;
|
|
263
|
+
|
|
264
|
+
if (this.unmountDebugPanel) {
|
|
265
|
+
try {
|
|
266
|
+
this.unmountDebugPanel();
|
|
267
|
+
} catch (e) {
|
|
268
|
+
console.warn("[Sable] debug panel unmount failed", e);
|
|
269
|
+
}
|
|
270
|
+
this.unmountDebugPanel = null;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
if (this.visionHandle) {
|
|
274
|
+
try {
|
|
275
|
+
await this.visionHandle.stop();
|
|
276
|
+
} catch (e) {
|
|
277
|
+
console.warn("[Sable] vision stop failed", e);
|
|
278
|
+
}
|
|
279
|
+
this.visionHandle = null;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
try {
|
|
283
|
+
await room.localParticipant.setMicrophoneEnabled(false);
|
|
284
|
+
} catch (err) {
|
|
285
|
+
console.warn("[Sable] setMicrophoneEnabled(false) failed", err);
|
|
286
|
+
}
|
|
287
|
+
await room.disconnect();
|
|
288
|
+
console.log("[Sable] session ended");
|
|
289
|
+
this.emitter.emit("session:ended", {});
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Subscribe to LiveKit room events and translate the interesting ones into
|
|
294
|
+
* `SableEvents`. Keeps the Session → customer event surface decoupled from
|
|
295
|
+
* the LiveKit event names so we can swap the transport later without
|
|
296
|
+
* breaking subscribers.
|
|
297
|
+
*/
|
|
298
|
+
private wireRoomEvents(
|
|
299
|
+
room: LiveKitRoom,
|
|
300
|
+
RoomEvent: Record<string, string>,
|
|
301
|
+
): void {
|
|
302
|
+
room.on(RoomEvent.ConnectionStateChanged, (state: unknown) => {
|
|
303
|
+
console.log("[Sable] ConnectionStateChanged", state);
|
|
304
|
+
});
|
|
305
|
+
room.on(RoomEvent.Disconnected, (reason: unknown) => {
|
|
306
|
+
console.log("[Sable] Disconnected", reason);
|
|
307
|
+
// Forward to stop() so cleanup runs exactly once regardless of who
|
|
308
|
+
// initiated the disconnect (customer call vs. server drop).
|
|
309
|
+
if (this.activeRoom === room) {
|
|
310
|
+
void this.stop().catch((e) =>
|
|
311
|
+
console.warn("[Sable] stop on disconnect failed", e),
|
|
312
|
+
);
|
|
313
|
+
}
|
|
314
|
+
});
|
|
315
|
+
room.on(RoomEvent.ParticipantConnected, (participant: unknown) => {
|
|
316
|
+
const p = participant as {
|
|
317
|
+
identity?: string;
|
|
318
|
+
sid?: string;
|
|
319
|
+
metadata?: string;
|
|
320
|
+
};
|
|
321
|
+
console.log("[Sable] ParticipantConnected", {
|
|
322
|
+
identity: p.identity,
|
|
323
|
+
sid: p.sid,
|
|
324
|
+
});
|
|
325
|
+
});
|
|
326
|
+
room.on(RoomEvent.ParticipantDisconnected, (participant: unknown) => {
|
|
327
|
+
const p = participant as { identity?: string };
|
|
328
|
+
console.warn("[Sable] ParticipantDisconnected", { identity: p.identity });
|
|
329
|
+
});
|
|
330
|
+
room.on(
|
|
331
|
+
RoomEvent.TrackSubscribed,
|
|
332
|
+
(track: unknown, _pub: unknown, participant: unknown) => {
|
|
333
|
+
const t = track as {
|
|
334
|
+
kind?: string;
|
|
335
|
+
attach?: () => HTMLMediaElement;
|
|
336
|
+
};
|
|
337
|
+
const p = participant as { identity?: string };
|
|
338
|
+
console.log("[Sable] TrackSubscribed", {
|
|
339
|
+
kind: t.kind,
|
|
340
|
+
participant: p.identity,
|
|
341
|
+
});
|
|
342
|
+
// Auto-attach remote audio so the agent's voice plays without the
|
|
343
|
+
// customer needing to wire up an <audio> element themselves.
|
|
344
|
+
if (t.kind === "audio" && typeof t.attach === "function") {
|
|
345
|
+
const el = t.attach();
|
|
346
|
+
el.setAttribute("data-sable", "1");
|
|
347
|
+
el.setAttribute("playsinline", "");
|
|
348
|
+
el.autoplay = true;
|
|
349
|
+
document.body.appendChild(el);
|
|
350
|
+
console.log("[Sable] attached remote audio element");
|
|
351
|
+
}
|
|
352
|
+
},
|
|
353
|
+
);
|
|
354
|
+
room.on(RoomEvent.TrackUnsubscribed, (track: unknown) => {
|
|
355
|
+
const t = track as { detach?: () => HTMLMediaElement[] };
|
|
356
|
+
if (typeof t.detach === "function") {
|
|
357
|
+
for (const el of t.detach()) {
|
|
358
|
+
el.remove();
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
});
|
|
362
|
+
room.on(RoomEvent.ActiveSpeakersChanged, (speakers: unknown) => {
|
|
363
|
+
const list = (speakers as Array<{ identity?: string }>) ?? [];
|
|
364
|
+
const agentTalking = list.some(
|
|
365
|
+
(s) => typeof s.identity === "string" && s.identity.startsWith("agent"),
|
|
366
|
+
);
|
|
367
|
+
const userTalking = list.some(
|
|
368
|
+
(s) =>
|
|
369
|
+
typeof s.identity === "string" && !s.identity.startsWith("agent"),
|
|
370
|
+
);
|
|
371
|
+
this.emitter.emit("agent:speaking", agentTalking);
|
|
372
|
+
this.emitter.emit("user:speaking", userTalking);
|
|
373
|
+
});
|
|
374
|
+
}
|
|
375
|
+
}
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public type surface for @sable-ai/sdk-core.
|
|
3
|
+
*
|
|
4
|
+
* Everything a consumer can touch lives here. Internal types stay in their
|
|
5
|
+
* respective modules so we never accidentally ship them in the published
|
|
6
|
+
* `.d.ts` bundle.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
// ── Frame sources ──────────────────────────────────────────────────────────
|
|
10
|
+
//
|
|
11
|
+
// Discriminated union — `type` is the tag. Adding a new source (e.g. `video`,
|
|
12
|
+
// `webgl`) means adding a new variant here and a new case in
|
|
13
|
+
// `vision/frame-source.ts`. The public API stays stable.
|
|
14
|
+
|
|
15
|
+
export interface WireframeFrameSource {
|
|
16
|
+
type: "wireframe";
|
|
17
|
+
/** Capture rate in frames per second. Default: 2. */
|
|
18
|
+
rate?: number;
|
|
19
|
+
features?: {
|
|
20
|
+
/**
|
|
21
|
+
* Include rendered images in the wireframe (instead of placeholder boxes).
|
|
22
|
+
* Slightly higher CPU + bandwidth. Default: false.
|
|
23
|
+
*/
|
|
24
|
+
includeImages?: boolean;
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface FnFrameSource {
|
|
29
|
+
type: "fn";
|
|
30
|
+
/** Capture rate in frames per second. Default: 2. */
|
|
31
|
+
rate?: number;
|
|
32
|
+
/**
|
|
33
|
+
* Called at `rate` Hz. Return an `HTMLCanvasElement` or `ImageBitmap` that
|
|
34
|
+
* the SDK will publish to the agent as a video track. Useful for feeding
|
|
35
|
+
* custom sources like a 3D scene, a `<video>` element, or a WebGL surface
|
|
36
|
+
* that the DOM walker can't introspect.
|
|
37
|
+
*/
|
|
38
|
+
captureFn: () => HTMLCanvasElement | ImageBitmap;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export type FrameSource = WireframeFrameSource | FnFrameSource;
|
|
42
|
+
|
|
43
|
+
// ── Vision ─────────────────────────────────────────────────────────────────
|
|
44
|
+
|
|
45
|
+
export interface VisionOptions {
|
|
46
|
+
/**
|
|
47
|
+
* Whether to publish a video track of the page to the agent. Default: false.
|
|
48
|
+
* Turn this on for agents that should be able to *see* the user's screen
|
|
49
|
+
* in addition to hearing them.
|
|
50
|
+
*/
|
|
51
|
+
enabled?: boolean;
|
|
52
|
+
/**
|
|
53
|
+
* Where video frames come from. Defaults to the built-in wireframe renderer
|
|
54
|
+
* at 2 fps with images disabled.
|
|
55
|
+
*/
|
|
56
|
+
frameSource?: FrameSource;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// ── Runtime (agent → page RPC surface) ─────────────────────────────────────
|
|
60
|
+
//
|
|
61
|
+
// The agent can call methods on the page over LiveKit RPC. `sdk-core` ships
|
|
62
|
+
// default implementations for a known set of methods (clipboard copy, video
|
|
63
|
+
// overlay, and no-op placeholders for host-UI-specific methods). Customers
|
|
64
|
+
// override any of them by passing matching keys in `Sable.start({ runtime })`,
|
|
65
|
+
// and can add new methods specific to their app that become callable by the
|
|
66
|
+
// agent. One unified surface: no distinction between "SDK methods" and
|
|
67
|
+
// "customer methods" from the agent's perspective.
|
|
68
|
+
|
|
69
|
+
export type RuntimeMethod = (
|
|
70
|
+
payload: Record<string, unknown>,
|
|
71
|
+
) => unknown | Promise<unknown>;
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Map of method name → handler. Used both for the user-provided overrides
|
|
75
|
+
* passed to `Sable.start({ runtime })` and for the SDK's internal defaults.
|
|
76
|
+
*/
|
|
77
|
+
export interface RuntimeMethods {
|
|
78
|
+
[method: string]: RuntimeMethod;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// ── Start options ──────────────────────────────────────────────────────────
|
|
82
|
+
|
|
83
|
+
export interface StartOptions {
|
|
84
|
+
/**
|
|
85
|
+
* Publishable key for the agent (from platform.withsable.com → your agent
|
|
86
|
+
* → Web SDK → Public key). Safe to ship in client-side code — the security
|
|
87
|
+
* boundary is the allowed-domains list configured alongside the key.
|
|
88
|
+
*
|
|
89
|
+
* During beta, raw agent IDs (e.g. `agt_...`) are accepted here too.
|
|
90
|
+
*/
|
|
91
|
+
publicKey?: string;
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* @deprecated Use `publicKey` instead. Accepted as an alias during beta and
|
|
95
|
+
* will be removed before 1.0. If both are set, `publicKey` wins.
|
|
96
|
+
*/
|
|
97
|
+
agentPublicId?: string;
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* What the agent can see. Off by default — opt in for vision-enabled agents.
|
|
101
|
+
*/
|
|
102
|
+
vision?: VisionOptions;
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Overrides + extensions for methods the agent can RPC into the page.
|
|
106
|
+
* Unspecified methods fall back to the SDK's default implementations. New
|
|
107
|
+
* methods become callable by the agent as-is.
|
|
108
|
+
*/
|
|
109
|
+
runtime?: RuntimeMethods;
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Arbitrary metadata forwarded to the agent at session start. Surfaces
|
|
113
|
+
* verbatim in the agent's initial prompt.
|
|
114
|
+
*/
|
|
115
|
+
context?: Record<string, unknown>;
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Dev-only: mount a floating preview panel showing the exact wireframe
|
|
119
|
+
* canvas being published to the agent. Can also be enabled via
|
|
120
|
+
* `?sable-debug=1` or `localStorage["sable:debug"]="1"`.
|
|
121
|
+
*/
|
|
122
|
+
debug?: boolean;
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Override the sable-api base URL. Dev/test only. Defaults to the
|
|
126
|
+
* production gateway.
|
|
127
|
+
* @internal
|
|
128
|
+
*/
|
|
129
|
+
apiUrl?: string;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// ── Events ─────────────────────────────────────────────────────────────────
|
|
133
|
+
//
|
|
134
|
+
// Fire-and-forget — the SDK does not care whether customers subscribe.
|
|
135
|
+
|
|
136
|
+
export interface SableEvents {
|
|
137
|
+
/** Fired once the room is connected, mic is live, and handshake is done. */
|
|
138
|
+
"session:started": { roomName: string; participantName: string };
|
|
139
|
+
/** Fired once when the session ends for any reason. */
|
|
140
|
+
"session:ended": { reason?: string };
|
|
141
|
+
/** Fired whenever the agent starts or stops speaking. */
|
|
142
|
+
"agent:speaking": boolean;
|
|
143
|
+
/** Fired whenever the local user starts or stops speaking. */
|
|
144
|
+
"user:speaking": boolean;
|
|
145
|
+
/** Fired for any non-fatal error during the session. */
|
|
146
|
+
error: Error;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
export type SableEventHandler<E extends keyof SableEvents> = (
|
|
150
|
+
payload: SableEvents[E],
|
|
151
|
+
) => void;
|
|
152
|
+
|
|
153
|
+
// ── Public API surface ─────────────────────────────────────────────────────
|
|
154
|
+
|
|
155
|
+
export interface SableAPI {
|
|
156
|
+
/** SDK version string, matches the npm package version. */
|
|
157
|
+
version: string;
|
|
158
|
+
/** Start a voice (and optionally vision) session with the agent. */
|
|
159
|
+
start(opts: StartOptions): Promise<void>;
|
|
160
|
+
/** Tear down the active session. No-op if none. */
|
|
161
|
+
stop(): Promise<void>;
|
|
162
|
+
/**
|
|
163
|
+
* Subscribe to a session event. Returns an unsubscribe function. Fire-and-
|
|
164
|
+
* forget — the SDK does not care whether you subscribe.
|
|
165
|
+
*/
|
|
166
|
+
on<E extends keyof SableEvents>(
|
|
167
|
+
event: E,
|
|
168
|
+
handler: SableEventHandler<E>,
|
|
169
|
+
): () => void;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
declare global {
|
|
173
|
+
interface Window {
|
|
174
|
+
Sable?: SableAPI;
|
|
175
|
+
}
|
|
176
|
+
}
|
package/src/version.ts
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FrameSource dispatcher.
|
|
3
|
+
*
|
|
4
|
+
* The public API lets customers choose what the agent sees via
|
|
5
|
+
* `vision: { frameSource: { type: "wireframe" | "fn", ... } }`. This module
|
|
6
|
+
* hides the strategy behind one entrypoint — `startFrameSource` — which
|
|
7
|
+
* returns a stop function. The target canvas is passed in so the caller
|
|
8
|
+
* (vision/index.ts) can hand the same canvas to `canvas.captureStream()`.
|
|
9
|
+
*
|
|
10
|
+
* Two built-in sources:
|
|
11
|
+
*
|
|
12
|
+
* 1. `{ type: "wireframe", rate, features: { includeImages } }`
|
|
13
|
+
* — Runs the Wireframe library against `document.body` at `rate` Hz.
|
|
14
|
+
* This is the default, tuned for low bandwidth + agent-readable
|
|
15
|
+
* structure. `includeImages: true` fetches cover photos/avatars/
|
|
16
|
+
* thumbnails via CORS and draws real pixels; otherwise the agent
|
|
17
|
+
* gets labelled placeholder boxes.
|
|
18
|
+
*
|
|
19
|
+
* 2. `{ type: "fn", rate, captureFn }`
|
|
20
|
+
* — The user supplies a function that returns a canvas or ImageBitmap
|
|
21
|
+
* on each tick. Useful for custom sources the DOM walker can't
|
|
22
|
+
* introspect: `<video>` elements, WebGL/3D scenes, off-screen canvases.
|
|
23
|
+
*
|
|
24
|
+
* Adding a new source (e.g. `{ type: "video" }`) is a matter of:
|
|
25
|
+
* - adding a variant in `types.ts`
|
|
26
|
+
* - adding a case here.
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
import type { FrameSource } from "../types";
|
|
30
|
+
import { getWireframeCtor } from "./wireframe";
|
|
31
|
+
|
|
32
|
+
const DEFAULT_RATE_HZ = 2;
|
|
33
|
+
|
|
34
|
+
function intervalMs(rate: number | undefined): number {
|
|
35
|
+
const r = typeof rate === "number" && rate > 0 ? rate : DEFAULT_RATE_HZ;
|
|
36
|
+
return Math.max(1, Math.round(1000 / r));
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Resize `canvas` to match the current viewport if needed. Called on every
|
|
41
|
+
* tick so window resizes are picked up live without a separate listener.
|
|
42
|
+
*/
|
|
43
|
+
function syncCanvasSize(canvas: HTMLCanvasElement): void {
|
|
44
|
+
const w = Math.max(1, window.innerWidth);
|
|
45
|
+
const h = Math.max(1, window.innerHeight);
|
|
46
|
+
if (canvas.width !== w || canvas.height !== h) {
|
|
47
|
+
canvas.width = w;
|
|
48
|
+
canvas.height = h;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Start capturing frames from `source` into `canvas`. Returns a stop function
|
|
54
|
+
* that halts the loop. Errors inside a single tick are logged and skipped —
|
|
55
|
+
* one bad frame shouldn't kill vision for the rest of the session.
|
|
56
|
+
*/
|
|
57
|
+
export function startFrameSource(
|
|
58
|
+
source: FrameSource,
|
|
59
|
+
canvas: HTMLCanvasElement,
|
|
60
|
+
): () => void {
|
|
61
|
+
const ctx = canvas.getContext("2d", { alpha: false });
|
|
62
|
+
if (!ctx) {
|
|
63
|
+
console.warn("[Sable] frame source: 2d context unavailable");
|
|
64
|
+
return () => {};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const delayMs = intervalMs(source.rate);
|
|
68
|
+
|
|
69
|
+
let stopped = false;
|
|
70
|
+
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
71
|
+
let inFlight = false;
|
|
72
|
+
|
|
73
|
+
const tick = async (): Promise<void> => {
|
|
74
|
+
if (stopped) return;
|
|
75
|
+
if (!inFlight) {
|
|
76
|
+
inFlight = true;
|
|
77
|
+
try {
|
|
78
|
+
syncCanvasSize(canvas);
|
|
79
|
+
if (source.type === "wireframe") {
|
|
80
|
+
const includeImages = source.features?.includeImages === true;
|
|
81
|
+
const Wireframe = getWireframeCtor();
|
|
82
|
+
const wf = new Wireframe(document.body, { images: includeImages });
|
|
83
|
+
const { canvas: src } = await wf.capture();
|
|
84
|
+
ctx.fillStyle = "#ffffff";
|
|
85
|
+
ctx.fillRect(0, 0, canvas.width, canvas.height);
|
|
86
|
+
ctx.drawImage(src, 0, 0, canvas.width, canvas.height);
|
|
87
|
+
} else if (source.type === "fn") {
|
|
88
|
+
const frame = source.captureFn();
|
|
89
|
+
ctx.fillStyle = "#ffffff";
|
|
90
|
+
ctx.fillRect(0, 0, canvas.width, canvas.height);
|
|
91
|
+
// Both HTMLCanvasElement and ImageBitmap are valid drawImage sources.
|
|
92
|
+
ctx.drawImage(frame, 0, 0, canvas.width, canvas.height);
|
|
93
|
+
}
|
|
94
|
+
} catch (e) {
|
|
95
|
+
console.warn("[Sable] frame source tick failed", e);
|
|
96
|
+
} finally {
|
|
97
|
+
inFlight = false;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
if (!stopped) {
|
|
101
|
+
timer = setTimeout(tick, delayMs);
|
|
102
|
+
}
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
void tick();
|
|
106
|
+
|
|
107
|
+
return () => {
|
|
108
|
+
stopped = true;
|
|
109
|
+
if (timer !== undefined) clearTimeout(timer);
|
|
110
|
+
};
|
|
111
|
+
}
|