@sable-ai/sdk-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +55 -0
  2. package/dist/esm/index.js +2431 -0
  3. package/dist/sable.iife.js +1486 -0
  4. package/dist/types/browser-bridge/actions.d.ts +27 -0
  5. package/dist/types/browser-bridge/dom-state.d.ts +37 -0
  6. package/dist/types/browser-bridge/index.d.ts +19 -0
  7. package/dist/types/connection/index.d.ts +26 -0
  8. package/dist/types/events/index.d.ts +15 -0
  9. package/dist/types/global.d.ts +26 -0
  10. package/dist/types/index.d.ts +23 -0
  11. package/dist/types/rpc.d.ts +22 -0
  12. package/dist/types/runtime/clipboard.d.ts +14 -0
  13. package/dist/types/runtime/index.d.ts +36 -0
  14. package/dist/types/runtime/video-overlay.d.ts +14 -0
  15. package/dist/types/session/debug-panel.d.ts +29 -0
  16. package/dist/types/session/index.d.ts +41 -0
  17. package/dist/types/types/index.d.ts +131 -0
  18. package/dist/types/version.d.ts +7 -0
  19. package/dist/types/vision/frame-source.d.ts +34 -0
  20. package/dist/types/vision/index.d.ts +29 -0
  21. package/dist/types/vision/publisher.d.ts +44 -0
  22. package/dist/types/vision/wireframe.d.ts +22 -0
  23. package/package.json +61 -0
  24. package/src/assets/visible-dom.js.txt +764 -0
  25. package/src/assets/wireframe.js.txt +678 -0
  26. package/src/assets.d.ts +24 -0
  27. package/src/browser-bridge/actions.ts +161 -0
  28. package/src/browser-bridge/dom-state.ts +103 -0
  29. package/src/browser-bridge/index.ts +99 -0
  30. package/src/connection/index.ts +49 -0
  31. package/src/events/index.ts +50 -0
  32. package/src/global.ts +35 -0
  33. package/src/index.test.ts +6 -0
  34. package/src/index.ts +43 -0
  35. package/src/rpc.ts +31 -0
  36. package/src/runtime/clipboard.ts +47 -0
  37. package/src/runtime/index.ts +138 -0
  38. package/src/runtime/video-overlay.ts +94 -0
  39. package/src/session/debug-panel.ts +254 -0
  40. package/src/session/index.ts +375 -0
  41. package/src/types/index.ts +176 -0
  42. package/src/version.ts +8 -0
  43. package/src/vision/frame-source.ts +111 -0
  44. package/src/vision/index.ts +70 -0
  45. package/src/vision/publisher.ts +106 -0
  46. package/src/vision/wireframe.ts +43 -0
@@ -0,0 +1,375 @@
1
+ /**
2
+ * Session lifecycle.
3
+ *
4
+ * `Session` is the glue layer: it fetches connection details, dynamically
5
+ * imports `livekit-client`, connects, publishes the mic, registers the
6
+ * runtime + browser-bridge RPCs, and — if vision is enabled — starts the
7
+ * frame source + video publisher and mounts the debug panel. `start()`
8
+ * returns once the room is live and mic is publishing; events are emitted
9
+ * via the `SableEventEmitter`.
10
+ *
11
+ * Only one session is allowed at a time. `start()` throws if a session is
12
+ * already active; callers must `stop()` first. `stop()` is idempotent.
13
+ *
14
+ * `livekit-client` is imported dynamically (not statically) so the IIFE
15
+ * entry bundle stays small — the heavy client only loads when a customer
16
+ * actually calls `start()`.
17
+ */
18
+
19
+ import {
20
+ fetchConnectionDetails,
21
+ DEFAULT_API_URL,
22
+ } from "../connection";
23
+ import { SableEventEmitter } from "../events";
24
+ import { installRuntime } from "../runtime";
25
+ import { registerBrowserHandlers } from "../browser-bridge";
26
+ import {
27
+ startVision,
28
+ type LiveKitPublishLib,
29
+ type VisionHandle,
30
+ } from "../vision";
31
+ import type {
32
+ SableAPI,
33
+ SableEventHandler,
34
+ SableEvents,
35
+ StartOptions,
36
+ } from "../types";
37
+ import { VERSION } from "../version";
38
+ import { mountDebugPanel, shouldShowDebugPanel } from "./debug-panel";
39
+
40
+ // ── livekit-client structural types ───────────────────────────────────────
41
+ //
42
+ // The client is dynamically imported so we can't use its types at the top
43
+ // level. These mirror only the subset we call.
44
+
45
+ interface LiveKitRoom {
46
+ connect(url: string, token: string): Promise<unknown>;
47
+ disconnect(): Promise<void>;
48
+ on(event: string, handler: (...args: unknown[]) => void): unknown;
49
+ registerRpcMethod(
50
+ method: string,
51
+ handler: (data: { payload: string }) => Promise<string>,
52
+ ): void;
53
+ localParticipant: {
54
+ identity?: string;
55
+ setMicrophoneEnabled(enabled: boolean): Promise<unknown>;
56
+ performRpc(opts: {
57
+ destinationIdentity: string;
58
+ method: string;
59
+ payload: string;
60
+ }): Promise<string>;
61
+ publishTrack(
62
+ track: unknown,
63
+ options?: { source?: unknown; name?: string },
64
+ ): Promise<{ trackSid?: string }>;
65
+ unpublishTrack(track: unknown, stopOnUnpublish?: boolean): Promise<unknown>;
66
+ };
67
+ remoteParticipants?: Map<
68
+ string,
69
+ { identity?: string; trackPublications?: Map<string, unknown> }
70
+ >;
71
+ }
72
+
73
+ // ── Agent handshake ───────────────────────────────────────────────────────
74
+ //
75
+ // Agents emit `agentReady` after joining and wait for a `uiReady` reply
76
+ // before generating their greeting. If we don't reply within ~10s the agent
77
+ // gives up and never publishes audio.
78
+ // Reference: parley/src/features/agent/hooks/useAgentConnection.ts.
79
+
80
+ const UI_READY_RETRY_ATTEMPTS = 5;
81
+ const UI_READY_RETRY_DELAY_MS = 500;
82
+
83
+ function findAgentIdentity(room: LiveKitRoom): string | null {
84
+ const remotes = room.remoteParticipants
85
+ ? Array.from(room.remoteParticipants.values())
86
+ : [];
87
+ const agent = remotes.find(
88
+ (p) => typeof p.identity === "string" && p.identity.startsWith("agent"),
89
+ );
90
+ return agent?.identity ?? remotes[0]?.identity ?? null;
91
+ }
92
+
93
+ async function sendUiReady(room: LiveKitRoom): Promise<void> {
94
+ for (let attempt = 1; attempt <= UI_READY_RETRY_ATTEMPTS; attempt++) {
95
+ const identity = findAgentIdentity(room);
96
+ if (!identity) {
97
+ console.warn("[Sable] sendUiReady: no agent participant yet", { attempt });
98
+ await new Promise((r) => setTimeout(r, UI_READY_RETRY_DELAY_MS));
99
+ continue;
100
+ }
101
+ try {
102
+ await room.localParticipant.performRpc({
103
+ destinationIdentity: identity,
104
+ method: "uiReady",
105
+ payload: JSON.stringify({ timestamp: Date.now() }),
106
+ });
107
+ console.log("[Sable] uiReady sent", { identity, attempt });
108
+ return;
109
+ } catch (err) {
110
+ console.warn("[Sable] uiReady RPC failed", { attempt, err });
111
+ await new Promise((r) => setTimeout(r, UI_READY_RETRY_DELAY_MS));
112
+ }
113
+ }
114
+ console.error("[Sable] uiReady: exhausted retries — agent will not greet");
115
+ }
116
+
117
+ // ── Session class ─────────────────────────────────────────────────────────
118
+
119
+ /**
120
+ * One active session at a time. The class is internal — customers interact
121
+ * with the `SableAPI` singleton installed on `window.Sable` (see `global.ts`).
122
+ * Keeping a class here (rather than a bag of module-level vars) makes the
123
+ * state ownership explicit and the teardown path easier to reason about.
124
+ */
125
+ export class Session implements SableAPI {
126
+ readonly version = VERSION;
127
+ private readonly emitter = new SableEventEmitter();
128
+ private activeRoom: LiveKitRoom | null = null;
129
+ private visionHandle: VisionHandle | null = null;
130
+ private unmountDebugPanel: (() => void) | null = null;
131
+
132
+ on<E extends keyof SableEvents>(
133
+ event: E,
134
+ handler: SableEventHandler<E>,
135
+ ): () => void {
136
+ return this.emitter.on(event, handler);
137
+ }
138
+
139
+ async start(opts: StartOptions): Promise<void> {
140
+ if (this.activeRoom) {
141
+ throw new Error("Sable already started; call stop() first");
142
+ }
143
+
144
+ // Public key resolution. `publicKey` wins when both are passed, but
145
+ // `agentPublicId` remains supported during beta so customers upgrading
146
+ // from an earlier build don't have to rename the field the same day
147
+ // they update the package.
148
+ const publicKey = opts.publicKey ?? opts.agentPublicId;
149
+ if (!publicKey) {
150
+ throw new Error("Sable.start: `publicKey` is required");
151
+ }
152
+
153
+ const apiUrl = opts.apiUrl ?? DEFAULT_API_URL;
154
+ console.log("[Sable] fetching connection details", { apiUrl });
155
+ const details = await fetchConnectionDetails({ apiUrl, publicKey });
156
+ console.log("[Sable] connection details received", {
157
+ roomName: details.roomName,
158
+ participantName: details.participantName,
159
+ });
160
+
161
+ // Dynamic import keeps the IIFE entry small; livekit-client is ~200KB
162
+ // minified and only needed once a session actually starts.
163
+ const livekit = await import("livekit-client");
164
+ const { Room, RoomEvent, LocalVideoTrack, Track } = livekit;
165
+ const room = new Room() as unknown as LiveKitRoom;
166
+
167
+ const publishLib: LiveKitPublishLib = {
168
+ LocalVideoTrack: LocalVideoTrack as unknown as LiveKitPublishLib["LocalVideoTrack"],
169
+ Track: Track as unknown as LiveKitPublishLib["Track"],
170
+ };
171
+
172
+ // Handshake handler MUST be registered before room.connect() so it's
173
+ // ready when the first RPC arrives.
174
+ room.registerRpcMethod("agentReady", async () => {
175
+ console.log("[Sable] RPC agentReady received");
176
+ void sendUiReady(room);
177
+ return JSON.stringify({ success: true });
178
+ });
179
+
180
+ // Browser bridge: 6 browser.* handlers. Safe to register even for
181
+ // voice-only sessions — the agent just won't call them.
182
+ registerBrowserHandlers(room);
183
+
184
+ // Runtime: default methods (clipboard, switchView) + customer overrides
185
+ // and extensions passed via `opts.runtime`.
186
+ installRuntime(room, opts.runtime);
187
+
188
+ this.wireRoomEvents(room, RoomEvent);
189
+
190
+ await room.connect(details.serverUrl, details.participantToken);
191
+ await room.localParticipant.setMicrophoneEnabled(true);
192
+
193
+ this.activeRoom = room;
194
+
195
+ // Vision is off by default. Opt in with `vision: { enabled: true }`.
196
+ if (opts.vision?.enabled) {
197
+ try {
198
+ this.visionHandle = await startVision({
199
+ room: room as unknown as Parameters<typeof startVision>[0]["room"],
200
+ lib: publishLib,
201
+ options: opts.vision,
202
+ });
203
+ if (shouldShowDebugPanel(opts.debug)) {
204
+ this.unmountDebugPanel = mountDebugPanel(this.visionHandle.canvas);
205
+ }
206
+ } catch (e) {
207
+ console.warn("[Sable] failed to start vision", e);
208
+ }
209
+ }
210
+
211
+ console.log("[Sable] session live", {
212
+ roomName: details.roomName,
213
+ participantName: details.participantName,
214
+ });
215
+ this.emitter.emit("session:started", {
216
+ roomName: details.roomName,
217
+ participantName: details.participantName,
218
+ });
219
+
220
+ // Watchdog: warn loudly if no remote audio track shows up within 10s.
221
+ setTimeout(() => {
222
+ if (this.activeRoom !== room) return;
223
+ const r = room as unknown as {
224
+ remoteParticipants?: Map<
225
+ string,
226
+ {
227
+ identity?: string;
228
+ trackPublications?: Map<
229
+ string,
230
+ { kind?: string; isSubscribed?: boolean }
231
+ >;
232
+ }
233
+ >;
234
+ };
235
+ const remotes = r.remoteParticipants
236
+ ? Array.from(r.remoteParticipants.values())
237
+ : [];
238
+ const summary = remotes.map((p) => ({
239
+ identity: p.identity,
240
+ tracks: p.trackPublications
241
+ ? Array.from(p.trackPublications.values()).map((t) => ({
242
+ kind: t.kind,
243
+ subscribed: t.isSubscribed,
244
+ }))
245
+ : [],
246
+ }));
247
+ const anyAudio = summary.some((p) =>
248
+ p.tracks.some((t) => t.kind === "audio"),
249
+ );
250
+ if (!anyAudio) {
251
+ console.warn(
252
+ "[Sable] no remote audio track after 10s — agent worker probably failed to publish. Remote participants:",
253
+ summary,
254
+ );
255
+ }
256
+ }, 10000);
257
+ }
258
+
259
+ async stop(): Promise<void> {
260
+ const room = this.activeRoom;
261
+ if (!room) return;
262
+ this.activeRoom = null;
263
+
264
+ if (this.unmountDebugPanel) {
265
+ try {
266
+ this.unmountDebugPanel();
267
+ } catch (e) {
268
+ console.warn("[Sable] debug panel unmount failed", e);
269
+ }
270
+ this.unmountDebugPanel = null;
271
+ }
272
+
273
+ if (this.visionHandle) {
274
+ try {
275
+ await this.visionHandle.stop();
276
+ } catch (e) {
277
+ console.warn("[Sable] vision stop failed", e);
278
+ }
279
+ this.visionHandle = null;
280
+ }
281
+
282
+ try {
283
+ await room.localParticipant.setMicrophoneEnabled(false);
284
+ } catch (err) {
285
+ console.warn("[Sable] setMicrophoneEnabled(false) failed", err);
286
+ }
287
+ await room.disconnect();
288
+ console.log("[Sable] session ended");
289
+ this.emitter.emit("session:ended", {});
290
+ }
291
+
292
+ /**
293
+ * Subscribe to LiveKit room events and translate the interesting ones into
294
+ * `SableEvents`. Keeps the Session → customer event surface decoupled from
295
+ * the LiveKit event names so we can swap the transport later without
296
+ * breaking subscribers.
297
+ */
298
+ private wireRoomEvents(
299
+ room: LiveKitRoom,
300
+ RoomEvent: Record<string, string>,
301
+ ): void {
302
+ room.on(RoomEvent.ConnectionStateChanged, (state: unknown) => {
303
+ console.log("[Sable] ConnectionStateChanged", state);
304
+ });
305
+ room.on(RoomEvent.Disconnected, (reason: unknown) => {
306
+ console.log("[Sable] Disconnected", reason);
307
+ // Forward to stop() so cleanup runs exactly once regardless of who
308
+ // initiated the disconnect (customer call vs. server drop).
309
+ if (this.activeRoom === room) {
310
+ void this.stop().catch((e) =>
311
+ console.warn("[Sable] stop on disconnect failed", e),
312
+ );
313
+ }
314
+ });
315
+ room.on(RoomEvent.ParticipantConnected, (participant: unknown) => {
316
+ const p = participant as {
317
+ identity?: string;
318
+ sid?: string;
319
+ metadata?: string;
320
+ };
321
+ console.log("[Sable] ParticipantConnected", {
322
+ identity: p.identity,
323
+ sid: p.sid,
324
+ });
325
+ });
326
+ room.on(RoomEvent.ParticipantDisconnected, (participant: unknown) => {
327
+ const p = participant as { identity?: string };
328
+ console.warn("[Sable] ParticipantDisconnected", { identity: p.identity });
329
+ });
330
+ room.on(
331
+ RoomEvent.TrackSubscribed,
332
+ (track: unknown, _pub: unknown, participant: unknown) => {
333
+ const t = track as {
334
+ kind?: string;
335
+ attach?: () => HTMLMediaElement;
336
+ };
337
+ const p = participant as { identity?: string };
338
+ console.log("[Sable] TrackSubscribed", {
339
+ kind: t.kind,
340
+ participant: p.identity,
341
+ });
342
+ // Auto-attach remote audio so the agent's voice plays without the
343
+ // customer needing to wire up an <audio> element themselves.
344
+ if (t.kind === "audio" && typeof t.attach === "function") {
345
+ const el = t.attach();
346
+ el.setAttribute("data-sable", "1");
347
+ el.setAttribute("playsinline", "");
348
+ el.autoplay = true;
349
+ document.body.appendChild(el);
350
+ console.log("[Sable] attached remote audio element");
351
+ }
352
+ },
353
+ );
354
+ room.on(RoomEvent.TrackUnsubscribed, (track: unknown) => {
355
+ const t = track as { detach?: () => HTMLMediaElement[] };
356
+ if (typeof t.detach === "function") {
357
+ for (const el of t.detach()) {
358
+ el.remove();
359
+ }
360
+ }
361
+ });
362
+ room.on(RoomEvent.ActiveSpeakersChanged, (speakers: unknown) => {
363
+ const list = (speakers as Array<{ identity?: string }>) ?? [];
364
+ const agentTalking = list.some(
365
+ (s) => typeof s.identity === "string" && s.identity.startsWith("agent"),
366
+ );
367
+ const userTalking = list.some(
368
+ (s) =>
369
+ typeof s.identity === "string" && !s.identity.startsWith("agent"),
370
+ );
371
+ this.emitter.emit("agent:speaking", agentTalking);
372
+ this.emitter.emit("user:speaking", userTalking);
373
+ });
374
+ }
375
+ }
@@ -0,0 +1,176 @@
1
+ /**
2
+ * Public type surface for @sable-ai/sdk-core.
3
+ *
4
+ * Everything a consumer can touch lives here. Internal types stay in their
5
+ * respective modules so we never accidentally ship them in the published
6
+ * `.d.ts` bundle.
7
+ */
8
+
9
+ // ── Frame sources ──────────────────────────────────────────────────────────
10
+ //
11
+ // Discriminated union — `type` is the tag. Adding a new source (e.g. `video`,
12
+ // `webgl`) means adding a new variant here and a new case in
13
+ // `vision/frame-source.ts`. The public API stays stable.
14
+
15
+ export interface WireframeFrameSource {
16
+ type: "wireframe";
17
+ /** Capture rate in frames per second. Default: 2. */
18
+ rate?: number;
19
+ features?: {
20
+ /**
21
+ * Include rendered images in the wireframe (instead of placeholder boxes).
22
+ * Slightly higher CPU + bandwidth. Default: false.
23
+ */
24
+ includeImages?: boolean;
25
+ };
26
+ }
27
+
28
+ export interface FnFrameSource {
29
+ type: "fn";
30
+ /** Capture rate in frames per second. Default: 2. */
31
+ rate?: number;
32
+ /**
33
+ * Called at `rate` Hz. Return an `HTMLCanvasElement` or `ImageBitmap` that
34
+ * the SDK will publish to the agent as a video track. Useful for feeding
35
+ * custom sources like a 3D scene, a `<video>` element, or a WebGL surface
36
+ * that the DOM walker can't introspect.
37
+ */
38
+ captureFn: () => HTMLCanvasElement | ImageBitmap;
39
+ }
40
+
41
+ export type FrameSource = WireframeFrameSource | FnFrameSource;
42
+
43
+ // ── Vision ─────────────────────────────────────────────────────────────────
44
+
45
+ export interface VisionOptions {
46
+ /**
47
+ * Whether to publish a video track of the page to the agent. Default: false.
48
+ * Turn this on for agents that should be able to *see* the user's screen
49
+ * in addition to hearing them.
50
+ */
51
+ enabled?: boolean;
52
+ /**
53
+ * Where video frames come from. Defaults to the built-in wireframe renderer
54
+ * at 2 fps with images disabled.
55
+ */
56
+ frameSource?: FrameSource;
57
+ }
58
+
59
+ // ── Runtime (agent → page RPC surface) ─────────────────────────────────────
60
+ //
61
+ // The agent can call methods on the page over LiveKit RPC. `sdk-core` ships
62
+ // default implementations for a known set of methods (clipboard copy, video
63
+ // overlay, and no-op placeholders for host-UI-specific methods). Customers
64
+ // override any of them by passing matching keys in `Sable.start({ runtime })`,
65
+ // and can add new methods specific to their app that become callable by the
66
+ // agent. One unified surface: no distinction between "SDK methods" and
67
+ // "customer methods" from the agent's perspective.
68
+
69
+ export type RuntimeMethod = (
70
+ payload: Record<string, unknown>,
71
+ ) => unknown | Promise<unknown>;
72
+
73
+ /**
74
+ * Map of method name → handler. Used both for the user-provided overrides
75
+ * passed to `Sable.start({ runtime })` and for the SDK's internal defaults.
76
+ */
77
+ export interface RuntimeMethods {
78
+ [method: string]: RuntimeMethod;
79
+ }
80
+
81
+ // ── Start options ──────────────────────────────────────────────────────────
82
+
83
+ export interface StartOptions {
84
+ /**
85
+ * Publishable key for the agent (from platform.withsable.com → your agent
86
+ * → Web SDK → Public key). Safe to ship in client-side code — the security
87
+ * boundary is the allowed-domains list configured alongside the key.
88
+ *
89
+ * During beta, raw agent IDs (e.g. `agt_...`) are accepted here too.
90
+ */
91
+ publicKey?: string;
92
+
93
+ /**
94
+ * @deprecated Use `publicKey` instead. Accepted as an alias during beta and
95
+ * will be removed before 1.0. If both are set, `publicKey` wins.
96
+ */
97
+ agentPublicId?: string;
98
+
99
+ /**
100
+ * What the agent can see. Off by default — opt in for vision-enabled agents.
101
+ */
102
+ vision?: VisionOptions;
103
+
104
+ /**
105
+ * Overrides + extensions for methods the agent can RPC into the page.
106
+ * Unspecified methods fall back to the SDK's default implementations. New
107
+ * methods become callable by the agent as-is.
108
+ */
109
+ runtime?: RuntimeMethods;
110
+
111
+ /**
112
+ * Arbitrary metadata forwarded to the agent at session start. Surfaces
113
+ * verbatim in the agent's initial prompt.
114
+ */
115
+ context?: Record<string, unknown>;
116
+
117
+ /**
118
+ * Dev-only: mount a floating preview panel showing the exact wireframe
119
+ * canvas being published to the agent. Can also be enabled via
120
+ * `?sable-debug=1` or `localStorage["sable:debug"]="1"`.
121
+ */
122
+ debug?: boolean;
123
+
124
+ /**
125
+ * Override the sable-api base URL. Dev/test only. Defaults to the
126
+ * production gateway.
127
+ * @internal
128
+ */
129
+ apiUrl?: string;
130
+ }
131
+
132
+ // ── Events ─────────────────────────────────────────────────────────────────
133
+ //
134
+ // Fire-and-forget — the SDK does not care whether customers subscribe.
135
+
136
+ export interface SableEvents {
137
+ /** Fired once the room is connected, mic is live, and handshake is done. */
138
+ "session:started": { roomName: string; participantName: string };
139
+ /** Fired once when the session ends for any reason. */
140
+ "session:ended": { reason?: string };
141
+ /** Fired whenever the agent starts or stops speaking. */
142
+ "agent:speaking": boolean;
143
+ /** Fired whenever the local user starts or stops speaking. */
144
+ "user:speaking": boolean;
145
+ /** Fired for any non-fatal error during the session. */
146
+ error: Error;
147
+ }
148
+
149
+ export type SableEventHandler<E extends keyof SableEvents> = (
150
+ payload: SableEvents[E],
151
+ ) => void;
152
+
153
+ // ── Public API surface ─────────────────────────────────────────────────────
154
+
155
+ export interface SableAPI {
156
+ /** SDK version string, matches the npm package version. */
157
+ version: string;
158
+ /** Start a voice (and optionally vision) session with the agent. */
159
+ start(opts: StartOptions): Promise<void>;
160
+ /** Tear down the active session. No-op if none. */
161
+ stop(): Promise<void>;
162
+ /**
163
+ * Subscribe to a session event. Returns an unsubscribe function. Fire-and-
164
+ * forget — the SDK does not care whether you subscribe.
165
+ */
166
+ on<E extends keyof SableEvents>(
167
+ event: E,
168
+ handler: SableEventHandler<E>,
169
+ ): () => void;
170
+ }
171
+
172
+ declare global {
173
+ interface Window {
174
+ Sable?: SableAPI;
175
+ }
176
+ }
package/src/version.ts ADDED
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Single source of truth for the SDK version string.
3
+ *
4
+ * Kept in a standalone file so build tooling (GitHub Actions release workflow)
5
+ * can replace it at publish time without touching anything else.
6
+ */
7
+
8
+ export const VERSION = "0.1.0";
@@ -0,0 +1,111 @@
1
+ /**
2
+ * FrameSource dispatcher.
3
+ *
4
+ * The public API lets customers choose what the agent sees via
5
+ * `vision: { frameSource: { type: "wireframe" | "fn", ... } }`. This module
6
+ * hides the strategy behind one entrypoint — `startFrameSource` — which
7
+ * returns a stop function. The target canvas is passed in so the caller
8
+ * (vision/index.ts) can hand the same canvas to `canvas.captureStream()`.
9
+ *
10
+ * Two built-in sources:
11
+ *
12
+ * 1. `{ type: "wireframe", rate, features: { includeImages } }`
13
+ * — Runs the Wireframe library against `document.body` at `rate` Hz.
14
+ * This is the default, tuned for low bandwidth + agent-readable
15
+ * structure. `includeImages: true` fetches cover photos/avatars/
16
+ * thumbnails via CORS and draws real pixels; otherwise the agent
17
+ * gets labelled placeholder boxes.
18
+ *
19
+ * 2. `{ type: "fn", rate, captureFn }`
20
+ * — The user supplies a function that returns a canvas or ImageBitmap
21
+ * on each tick. Useful for custom sources the DOM walker can't
22
+ * introspect: `<video>` elements, WebGL/3D scenes, off-screen canvases.
23
+ *
24
+ * Adding a new source (e.g. `{ type: "video" }`) is a matter of:
25
+ * - adding a variant in `types.ts`
26
+ * - adding a case here.
27
+ */
28
+
29
+ import type { FrameSource } from "../types";
30
+ import { getWireframeCtor } from "./wireframe";
31
+
32
+ const DEFAULT_RATE_HZ = 2;
33
+
34
+ function intervalMs(rate: number | undefined): number {
35
+ const r = typeof rate === "number" && rate > 0 ? rate : DEFAULT_RATE_HZ;
36
+ return Math.max(1, Math.round(1000 / r));
37
+ }
38
+
39
+ /**
40
+ * Resize `canvas` to match the current viewport if needed. Called on every
41
+ * tick so window resizes are picked up live without a separate listener.
42
+ */
43
+ function syncCanvasSize(canvas: HTMLCanvasElement): void {
44
+ const w = Math.max(1, window.innerWidth);
45
+ const h = Math.max(1, window.innerHeight);
46
+ if (canvas.width !== w || canvas.height !== h) {
47
+ canvas.width = w;
48
+ canvas.height = h;
49
+ }
50
+ }
51
+
52
+ /**
53
+ * Start capturing frames from `source` into `canvas`. Returns a stop function
54
+ * that halts the loop. Errors inside a single tick are logged and skipped —
55
+ * one bad frame shouldn't kill vision for the rest of the session.
56
+ */
57
+ export function startFrameSource(
58
+ source: FrameSource,
59
+ canvas: HTMLCanvasElement,
60
+ ): () => void {
61
+ const ctx = canvas.getContext("2d", { alpha: false });
62
+ if (!ctx) {
63
+ console.warn("[Sable] frame source: 2d context unavailable");
64
+ return () => {};
65
+ }
66
+
67
+ const delayMs = intervalMs(source.rate);
68
+
69
+ let stopped = false;
70
+ let timer: ReturnType<typeof setTimeout> | undefined;
71
+ let inFlight = false;
72
+
73
+ const tick = async (): Promise<void> => {
74
+ if (stopped) return;
75
+ if (!inFlight) {
76
+ inFlight = true;
77
+ try {
78
+ syncCanvasSize(canvas);
79
+ if (source.type === "wireframe") {
80
+ const includeImages = source.features?.includeImages === true;
81
+ const Wireframe = getWireframeCtor();
82
+ const wf = new Wireframe(document.body, { images: includeImages });
83
+ const { canvas: src } = await wf.capture();
84
+ ctx.fillStyle = "#ffffff";
85
+ ctx.fillRect(0, 0, canvas.width, canvas.height);
86
+ ctx.drawImage(src, 0, 0, canvas.width, canvas.height);
87
+ } else if (source.type === "fn") {
88
+ const frame = source.captureFn();
89
+ ctx.fillStyle = "#ffffff";
90
+ ctx.fillRect(0, 0, canvas.width, canvas.height);
91
+ // Both HTMLCanvasElement and ImageBitmap are valid drawImage sources.
92
+ ctx.drawImage(frame, 0, 0, canvas.width, canvas.height);
93
+ }
94
+ } catch (e) {
95
+ console.warn("[Sable] frame source tick failed", e);
96
+ } finally {
97
+ inFlight = false;
98
+ }
99
+ }
100
+ if (!stopped) {
101
+ timer = setTimeout(tick, delayMs);
102
+ }
103
+ };
104
+
105
+ void tick();
106
+
107
+ return () => {
108
+ stopped = true;
109
+ if (timer !== undefined) clearTimeout(timer);
110
+ };
111
+ }