@sable-ai/sdk-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +55 -0
  2. package/dist/esm/index.js +2431 -0
  3. package/dist/sable.iife.js +1486 -0
  4. package/dist/types/browser-bridge/actions.d.ts +27 -0
  5. package/dist/types/browser-bridge/dom-state.d.ts +37 -0
  6. package/dist/types/browser-bridge/index.d.ts +19 -0
  7. package/dist/types/connection/index.d.ts +26 -0
  8. package/dist/types/events/index.d.ts +15 -0
  9. package/dist/types/global.d.ts +26 -0
  10. package/dist/types/index.d.ts +23 -0
  11. package/dist/types/rpc.d.ts +22 -0
  12. package/dist/types/runtime/clipboard.d.ts +14 -0
  13. package/dist/types/runtime/index.d.ts +36 -0
  14. package/dist/types/runtime/video-overlay.d.ts +14 -0
  15. package/dist/types/session/debug-panel.d.ts +29 -0
  16. package/dist/types/session/index.d.ts +41 -0
  17. package/dist/types/types/index.d.ts +131 -0
  18. package/dist/types/version.d.ts +7 -0
  19. package/dist/types/vision/frame-source.d.ts +34 -0
  20. package/dist/types/vision/index.d.ts +29 -0
  21. package/dist/types/vision/publisher.d.ts +44 -0
  22. package/dist/types/vision/wireframe.d.ts +22 -0
  23. package/package.json +61 -0
  24. package/src/assets/visible-dom.js.txt +764 -0
  25. package/src/assets/wireframe.js.txt +678 -0
  26. package/src/assets.d.ts +24 -0
  27. package/src/browser-bridge/actions.ts +161 -0
  28. package/src/browser-bridge/dom-state.ts +103 -0
  29. package/src/browser-bridge/index.ts +99 -0
  30. package/src/connection/index.ts +49 -0
  31. package/src/events/index.ts +50 -0
  32. package/src/global.ts +35 -0
  33. package/src/index.test.ts +6 -0
  34. package/src/index.ts +43 -0
  35. package/src/rpc.ts +31 -0
  36. package/src/runtime/clipboard.ts +47 -0
  37. package/src/runtime/index.ts +138 -0
  38. package/src/runtime/video-overlay.ts +94 -0
  39. package/src/session/debug-panel.ts +254 -0
  40. package/src/session/index.ts +375 -0
  41. package/src/types/index.ts +176 -0
  42. package/src/version.ts +8 -0
  43. package/src/vision/frame-source.ts +111 -0
  44. package/src/vision/index.ts +70 -0
  45. package/src/vision/publisher.ts +106 -0
  46. package/src/vision/wireframe.ts +43 -0
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Action dispatcher for `browser.execute_action`.
3
+ *
4
+ * The canonical wire contract lives in the Python bridge
5
+ * (`sable_agentkit/components/browser/bridges/wire.py`). The `kind` tag and
6
+ * payload shape of each variant must stay in lock-step with it — if a new
7
+ * action lands on the Python side, mirror it here.
8
+ *
9
+ * Target resolution: actions can target an element either by CSS selector
10
+ * string or by `{ x, y }` coordinates (for vision-driven clicks where the
11
+ * agent only knows pixel positions). See `resolveTarget`.
12
+ */
13
+ export interface ActionEnvelope {
14
+ kind: string;
15
+ payload?: unknown;
16
+ button?: string;
17
+ key?: string;
18
+ text?: string;
19
+ delay?: number;
20
+ replace?: boolean;
21
+ url?: string;
22
+ expression?: string;
23
+ start?: unknown;
24
+ end?: unknown;
25
+ steps?: number;
26
+ }
27
+ export declare function dispatchAction(action: ActionEnvelope): Promise<void>;
@@ -0,0 +1,37 @@
1
+ /**
2
+ * DOM-state capture for `browser.get_dom_state`.
3
+ *
4
+ * The agent calls `browser.get_dom_state` when it needs a fresh snapshot of
5
+ * the page before deciding on the next action. The response carries three
6
+ * things:
7
+ *
8
+ * - `screenshot_jpeg_b64` — a wireframe-rendered image of `document.body`
9
+ * (the field name is historical; the bytes are PNG — Northstar treats
10
+ * it as an opaque image and doesn't enforce the codec)
11
+ * - `elements` — the visible-element list produced by `visible-dom.js`,
12
+ * an agent-friendly structured summary of the interactive DOM
13
+ * - `viewport` + `url` — so the agent can reason about pixel coordinates
14
+ * and the current page identity
15
+ *
16
+ * `visible-dom.js` is shipped as a text asset and eval'd once on first use.
17
+ * `settle()` is also here — it's a mutation-observer quiet-period wait used
18
+ * by the `browser.settle` RPC to let animations/transitions finish before
19
+ * the agent reads DOM state again.
20
+ */
21
+ export interface DomStateResponse {
22
+ screenshot_jpeg_b64: string;
23
+ elements: unknown;
24
+ viewport: {
25
+ width: number;
26
+ height: number;
27
+ };
28
+ url: string;
29
+ }
30
+ export declare function captureDomState(): Promise<DomStateResponse>;
31
+ /**
32
+ * Mutation-observer quiet-period wait. Mirrors `visible_dom.py`'s settle —
33
+ * return as soon as the DOM has been quiet for `QUIET_MS`, or after
34
+ * `MAX_MS`, whichever comes first. Bookended by two double-rAFs so any
35
+ * in-flight layout/paint work gets flushed before and after the wait.
36
+ */
37
+ export declare function settle(): Promise<void>;
@@ -0,0 +1,19 @@
1
+ /**
2
+ * SDK side of the Sable browser bridge.
3
+ *
4
+ * Registers six LiveKit RPC handlers that the agent's UserBrowserBridge
5
+ * (`sable-agentkit/components/browser/bridges/user.py`) calls into:
6
+ *
7
+ * browser.execute_action → dispatches an Action variant against the page
8
+ * browser.get_dom_state → wireframe screenshot + visible-element list
9
+ * browser.get_url → window.location.href
10
+ * browser.get_viewport → window.innerWidth/innerHeight
11
+ * browser.verify_selector → !!document.querySelector(selector)
12
+ * browser.settle → mutation-observer quiet-period wait
13
+ *
14
+ * The wire contract is the canonical Python implementation in
15
+ * `sable_agentkit/components/browser/bridges/wire.py` — every field shape
16
+ * and Action `kind` tag must match it exactly.
17
+ */
18
+ import type { RpcRoom } from "../rpc";
19
+ export declare function registerBrowserHandlers(room: RpcRoom): void;
@@ -0,0 +1,26 @@
1
+ /**
2
+ * sable-api `/connection-details` fetch.
3
+ *
4
+ * Isolated in its own file because this is the ONE thing that changes when
5
+ * the backend flips from raw agent IDs (`?agentPublicId=...`) to publishable
6
+ * keys (`?publicKey=pk_live_...` + per-agent allowed-domains origin check).
7
+ * When that lands, the diff is contained to this file.
8
+ *
9
+ * Until then: we accept `publicKey` from the customer and pass it through as
10
+ * `?agentPublicId=...` on the wire, which keeps the current sable-api
11
+ * contract working while the public-facing option name is already the one
12
+ * we want long-term.
13
+ */
14
+ export declare const DEFAULT_API_URL = "https://sable-api-gateway-9dfmhij9.wl.gateway.dev";
15
+ export interface ConnectionDetails {
16
+ serverUrl: string;
17
+ roomName: string;
18
+ participantToken: string;
19
+ participantName: string;
20
+ }
21
+ export interface FetchConnectionDetailsInput {
22
+ apiUrl: string;
23
+ /** Either a `pk_live_...` publishable key or a raw `agt_...` agent ID. */
24
+ publicKey: string;
25
+ }
26
+ export declare function fetchConnectionDetails(input: FetchConnectionDetailsInput): Promise<ConnectionDetails>;
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Typed event emitter for the SDK's public event surface.
3
+ *
4
+ * Intentionally tiny — one map, one loop, no dependency on any EventTarget
5
+ * polyfill. We keep fire-and-forget semantics: handler exceptions are caught
6
+ * and logged so one misbehaving subscriber can't break the session.
7
+ */
8
+ import type { SableEventHandler, SableEvents } from "../types";
9
+ export declare class SableEventEmitter {
10
+ private readonly listeners;
11
+ on<E extends keyof SableEvents>(event: E, handler: SableEventHandler<E>): () => void;
12
+ emit<E extends keyof SableEvents>(event: E, payload: SableEvents[E]): void;
13
+ /** Drop every handler. Called on session teardown to avoid leaks. */
14
+ clear(): void;
15
+ }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * `window.Sable` installer.
3
+ *
4
+ * The SDK ships in two distribution formats that MUST present the same
5
+ * runtime singleton:
6
+ *
7
+ * 1. IIFE bundle (`<script src="https://sdk.withsable.com/v1/sable.js">`)
8
+ * — auto-installs `window.Sable` on load.
9
+ *
10
+ * 2. npm ESM (`import Sable from "@sable-ai/sdk-core"`) — the `index.ts`
11
+ * barrel calls `installGlobal()` in addition to exporting `Sable` as
12
+ * its default export. If the customer also loaded the IIFE in the
13
+ * same page, the second install is a no-op (first-write-wins), so
14
+ * framework apps and script-tag users see the exact same session
15
+ * object. This is the Stripe/Intercom pattern: one global, multiple
16
+ * ways to reach it.
17
+ */
18
+ import type { SableAPI } from "./types";
19
+ /** The process-wide Sable singleton. Used by both `index.ts` and `installGlobal`. */
20
+ export declare const Sable: SableAPI;
21
+ /**
22
+ * Attach `Sable` to `window.Sable`, unless something already claimed that
23
+ * slot. First-write-wins so mixed script-tag + npm usage doesn't swap the
24
+ * singleton mid-session.
25
+ */
26
+ export declare function installGlobal(): void;
@@ -0,0 +1,23 @@
1
+ /**
2
+ * @sable-ai/sdk-core — public entry point.
3
+ *
4
+ * Two ways to use this package, both backed by the same singleton:
5
+ *
6
+ * 1. Script tag (IIFE bundle):
7
+ * `<script src="https://sdk.withsable.com/v1/sable.js"></script>`
8
+ * — auto-installs `window.Sable`, good for no-build sites and the
9
+ * Chrome extension's inject script.
10
+ *
11
+ * 2. npm package (ESM):
12
+ * `import Sable from "@sable-ai/sdk-core"`
13
+ * — good for framework apps. Importing also installs `window.Sable`
14
+ * so mixed usage (one page, two entry points) stays coherent.
15
+ *
16
+ * This file is a barrel: all real code lives in sibling folders. Keep it
17
+ * that way — the build output is what customers see, and a lean entry
18
+ * module minimises tree-shake surprises.
19
+ */
20
+ import { Sable } from "./global";
21
+ export { VERSION } from "./version";
22
+ export type { SableAPI, SableEvents, SableEventHandler, StartOptions, VisionOptions, FrameSource, WireframeFrameSource, FnFrameSource, RuntimeMethod, RuntimeMethods, } from "./types";
23
+ export default Sable;
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Shared LiveKit RPC primitives.
3
+ *
4
+ * Both `runtime/` (agent → page method calls) and `browser-bridge/` (agent
5
+ * driving the user's browser) register handlers on the room via
6
+ * `registerRpcMethod`. They don't need the full LiveKit `Room` type — just the
7
+ * single method — so we describe the minimum shape here. This keeps the heavy
8
+ * `livekit-client` import dynamic and out of the IIFE entry bundle.
9
+ */
10
+ export interface RpcRoom {
11
+ registerRpcMethod(method: string, handler: (data: {
12
+ payload: string;
13
+ }) => Promise<string>): void;
14
+ }
15
+ /**
16
+ * Parse an RPC payload string into a plain object. RPC payloads are JSON but
17
+ * we don't want a single malformed call from the agent to throw inside a
18
+ * handler — LiveKit RPC propagates exceptions back to the caller and the
19
+ * agent's tool use logic treats that as a hard error that can derail the
20
+ * conversation. Soft-failing to `{}` lets the handler decide what to do.
21
+ */
22
+ export declare function safeParse(payload: string): Record<string, unknown>;
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Clipboard runtime methods.
3
+ *
4
+ * `sendToolMessage` and its legacy alias `sendCopyableText` carry text the
5
+ * user is supposed to act on (URLs, code snippets, prompts). Parley renders
6
+ * them as chat bubbles; the standalone SDK has no chat surface, so we copy
7
+ * to the clipboard so the user can ⌘V into whatever the agent is guiding
8
+ * them through. URL wins over message when both are present — agents put
9
+ * explanatory text in `message` and the actual thing-to-copy in `url`.
10
+ */
11
+ export declare function handleCopyable(rpcName: string, payload: Record<string, unknown>): Promise<{
12
+ success: boolean;
13
+ error?: string;
14
+ }>;
@@ -0,0 +1,36 @@
1
+ /**
2
+ * Runtime: the set of methods the agent can RPC into the page.
3
+ *
4
+ * Historically these were called "UI stubs" because they originated as
5
+ * no-op placeholders for methods parley implements in its call overlay.
6
+ * That framing no longer fits: half of them now do real work, and the
7
+ * public API lets customers replace any of them AND add new ones through
8
+ * the same `Sable.start({ runtime })` surface.
9
+ *
10
+ * The shape:
11
+ *
12
+ * 1. `DEFAULT_RUNTIME` — built-in implementations shipped with the SDK.
13
+ * A few do real work (clipboard copy, video overlay); the rest are
14
+ * no-ops for methods that only make sense in a host-app call UI.
15
+ *
16
+ * 2. `installRuntime(room, userRuntime)` — merges `userRuntime` over the
17
+ * defaults and registers every entry as a LiveKit RPC handler on
18
+ * `room`. Agent RPC calls → run the matching method → return a
19
+ * JSON-encoded result.
20
+ *
21
+ * Customers extend the runtime by passing new keys in `userRuntime`:
22
+ * anything you put in becomes callable by the agent as-is. This means
23
+ * the same surface handles both "override a built-in" and "expose a
24
+ * business-logic tool" — one concept, not two.
25
+ */
26
+ import { type RpcRoom } from "../rpc";
27
+ import type { RuntimeMethods } from "../types";
28
+ export declare const DEFAULT_RUNTIME: RuntimeMethods;
29
+ /**
30
+ * Merge the user-provided runtime over `DEFAULT_RUNTIME` and register every
31
+ * entry as a LiveKit RPC handler on `room`. Later keys win — so passing
32
+ * `{ switchView: myImpl }` replaces the default video-overlay behaviour,
33
+ * while passing `{ activateTrial: ... }` exposes a new method the agent
34
+ * can call without touching the built-ins.
35
+ */
36
+ export declare function installRuntime(room: RpcRoom, userRuntime?: RuntimeMethods): void;
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Default `switchView({ mode: "video", url })` implementation.
3
+ *
4
+ * Mounts a centred floating video clip in the page. Host apps with their own
5
+ * call UI would override `switchView` in `runtime` to render into their own
6
+ * surface; the standalone SDK uses this simple overlay as the built-in
7
+ * default so agents that call `switchView` Just Work out of the box.
8
+ *
9
+ * Module-level state (`activeViewOverlay`) keeps at most one overlay mounted
10
+ * — calling `mountVideoOverlay` while another is showing tears the old one
11
+ * down first.
12
+ */
13
+ export declare function removeViewOverlay(): void;
14
+ export declare function mountVideoOverlay(url: string): void;
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Floating debug panel — "what the agent sees".
3
+ *
4
+ * When debug is on, we mount the vision capture canvas as a draggable
5
+ * preview in the host page. The panel renders the *exact* pixels that get
6
+ * encoded into the LiveKit video track, so "what you see in the panel" is
7
+ * literally "what the agent sees". Position + minimized state persist in
8
+ * `localStorage` so customers don't have to re-place the panel every
9
+ * reload.
10
+ *
11
+ * Opt-in signals (any of these enables the panel):
12
+ * - `Sable.start({ debug: true })`
13
+ * - `?sable-debug=1` anywhere in the page URL
14
+ * - `localStorage.setItem('sable:debug', '1')`
15
+ */
16
+ /**
17
+ * Opt-in check: does ANY of the debug signals say we should show the panel?
18
+ * Called by the session before deciding whether to mount.
19
+ */
20
+ export declare function shouldShowDebugPanel(debugOpt: boolean | undefined): boolean;
21
+ /**
22
+ * Mount `canvas` as a floating preview in the page. Returns a teardown that
23
+ * removes the wrapper and detaches listeners.
24
+ *
25
+ * The wrapper is pointer-events:none by default; only the header bar and
26
+ * its minimize button re-enable pointer events, so the panel never blocks
27
+ * clicks on the underlying page.
28
+ */
29
+ export declare function mountDebugPanel(canvas: HTMLCanvasElement): () => void;
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Session lifecycle.
3
+ *
4
+ * `Session` is the glue layer: it fetches connection details, dynamically
5
+ * imports `livekit-client`, connects, publishes the mic, registers the
6
+ * runtime + browser-bridge RPCs, and — if vision is enabled — starts the
7
+ * frame source + video publisher and mounts the debug panel. `start()`
8
+ * returns once the room is live and mic is publishing; events are emitted
9
+ * via the `SableEventEmitter`.
10
+ *
11
+ * Only one session is allowed at a time. `start()` throws if a session is
12
+ * already active; callers must `stop()` first. `stop()` is idempotent.
13
+ *
14
+ * `livekit-client` is imported dynamically (not statically) so the IIFE
15
+ * entry bundle stays small — the heavy client only loads when a customer
16
+ * actually calls `start()`.
17
+ */
18
+ import type { SableAPI, SableEventHandler, SableEvents, StartOptions } from "../types";
19
+ /**
20
+ * One active session at a time. The class is internal — customers interact
21
+ * with the `SableAPI` singleton installed on `window.Sable` (see `global.ts`).
22
+ * Keeping a class here (rather than a bag of module-level vars) makes the
23
+ * state ownership explicit and the teardown path easier to reason about.
24
+ */
25
+ export declare class Session implements SableAPI {
26
+ readonly version = "0.1.0";
27
+ private readonly emitter;
28
+ private activeRoom;
29
+ private visionHandle;
30
+ private unmountDebugPanel;
31
+ on<E extends keyof SableEvents>(event: E, handler: SableEventHandler<E>): () => void;
32
+ start(opts: StartOptions): Promise<void>;
33
+ stop(): Promise<void>;
34
+ /**
35
+ * Subscribe to LiveKit room events and translate the interesting ones into
36
+ * `SableEvents`. Keeps the Session → customer event surface decoupled from
37
+ * the LiveKit event names so we can swap the transport later without
38
+ * breaking subscribers.
39
+ */
40
+ private wireRoomEvents;
41
+ }
@@ -0,0 +1,131 @@
1
+ /**
2
+ * Public type surface for @sable-ai/sdk-core.
3
+ *
4
+ * Everything a consumer can touch lives here. Internal types stay in their
5
+ * respective modules so we never accidentally ship them in the published
6
+ * `.d.ts` bundle.
7
+ */
8
+ export interface WireframeFrameSource {
9
+ type: "wireframe";
10
+ /** Capture rate in frames per second. Default: 2. */
11
+ rate?: number;
12
+ features?: {
13
+ /**
14
+ * Include rendered images in the wireframe (instead of placeholder boxes).
15
+ * Slightly higher CPU + bandwidth. Default: false.
16
+ */
17
+ includeImages?: boolean;
18
+ };
19
+ }
20
+ export interface FnFrameSource {
21
+ type: "fn";
22
+ /** Capture rate in frames per second. Default: 2. */
23
+ rate?: number;
24
+ /**
25
+ * Called at `rate` Hz. Return an `HTMLCanvasElement` or `ImageBitmap` that
26
+ * the SDK will publish to the agent as a video track. Useful for feeding
27
+ * custom sources like a 3D scene, a `<video>` element, or a WebGL surface
28
+ * that the DOM walker can't introspect.
29
+ */
30
+ captureFn: () => HTMLCanvasElement | ImageBitmap;
31
+ }
32
+ export type FrameSource = WireframeFrameSource | FnFrameSource;
33
+ export interface VisionOptions {
34
+ /**
35
+ * Whether to publish a video track of the page to the agent. Default: false.
36
+ * Turn this on for agents that should be able to *see* the user's screen
37
+ * in addition to hearing them.
38
+ */
39
+ enabled?: boolean;
40
+ /**
41
+ * Where video frames come from. Defaults to the built-in wireframe renderer
42
+ * at 2 fps with images disabled.
43
+ */
44
+ frameSource?: FrameSource;
45
+ }
46
+ export type RuntimeMethod = (payload: Record<string, unknown>) => unknown | Promise<unknown>;
47
+ /**
48
+ * Map of method name → handler. Used both for the user-provided overrides
49
+ * passed to `Sable.start({ runtime })` and for the SDK's internal defaults.
50
+ */
51
+ export interface RuntimeMethods {
52
+ [method: string]: RuntimeMethod;
53
+ }
54
+ export interface StartOptions {
55
+ /**
56
+ * Publishable key for the agent (from platform.withsable.com → your agent
57
+ * → Web SDK → Public key). Safe to ship in client-side code — the security
58
+ * boundary is the allowed-domains list configured alongside the key.
59
+ *
60
+ * During beta, raw agent IDs (e.g. `agt_...`) are accepted here too.
61
+ */
62
+ publicKey?: string;
63
+ /**
64
+ * @deprecated Use `publicKey` instead. Accepted as an alias during beta and
65
+ * will be removed before 1.0. If both are set, `publicKey` wins.
66
+ */
67
+ agentPublicId?: string;
68
+ /**
69
+ * What the agent can see. Off by default — opt in for vision-enabled agents.
70
+ */
71
+ vision?: VisionOptions;
72
+ /**
73
+ * Overrides + extensions for methods the agent can RPC into the page.
74
+ * Unspecified methods fall back to the SDK's default implementations. New
75
+ * methods become callable by the agent as-is.
76
+ */
77
+ runtime?: RuntimeMethods;
78
+ /**
79
+ * Arbitrary metadata forwarded to the agent at session start. Surfaces
80
+ * verbatim in the agent's initial prompt.
81
+ */
82
+ context?: Record<string, unknown>;
83
+ /**
84
+ * Dev-only: mount a floating preview panel showing the exact wireframe
85
+ * canvas being published to the agent. Can also be enabled via
86
+ * `?sable-debug=1` or `localStorage["sable:debug"]="1"`.
87
+ */
88
+ debug?: boolean;
89
+ /**
90
+ * Override the sable-api base URL. Dev/test only. Defaults to the
91
+ * production gateway.
92
+ * @internal
93
+ */
94
+ apiUrl?: string;
95
+ }
96
+ export interface SableEvents {
97
+ /** Fired once the room is connected, mic is live, and handshake is done. */
98
+ "session:started": {
99
+ roomName: string;
100
+ participantName: string;
101
+ };
102
+ /** Fired once when the session ends for any reason. */
103
+ "session:ended": {
104
+ reason?: string;
105
+ };
106
+ /** Fired whenever the agent starts or stops speaking. */
107
+ "agent:speaking": boolean;
108
+ /** Fired whenever the local user starts or stops speaking. */
109
+ "user:speaking": boolean;
110
+ /** Fired for any non-fatal error during the session. */
111
+ error: Error;
112
+ }
113
+ export type SableEventHandler<E extends keyof SableEvents> = (payload: SableEvents[E]) => void;
114
+ export interface SableAPI {
115
+ /** SDK version string, matches the npm package version. */
116
+ version: string;
117
+ /** Start a voice (and optionally vision) session with the agent. */
118
+ start(opts: StartOptions): Promise<void>;
119
+ /** Tear down the active session. No-op if none. */
120
+ stop(): Promise<void>;
121
+ /**
122
+ * Subscribe to a session event. Returns an unsubscribe function. Fire-and-
123
+ * forget — the SDK does not care whether you subscribe.
124
+ */
125
+ on<E extends keyof SableEvents>(event: E, handler: SableEventHandler<E>): () => void;
126
+ }
127
+ declare global {
128
+ interface Window {
129
+ Sable?: SableAPI;
130
+ }
131
+ }
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Single source of truth for the SDK version string.
3
+ *
4
+ * Kept in a standalone file so build tooling (GitHub Actions release workflow)
5
+ * can replace it at publish time without touching anything else.
6
+ */
7
+ export declare const VERSION = "0.1.0";
@@ -0,0 +1,34 @@
1
+ /**
2
+ * FrameSource dispatcher.
3
+ *
4
+ * The public API lets customers choose what the agent sees via
5
+ * `vision: { frameSource: { type: "wireframe" | "fn", ... } }`. This module
6
+ * hides the strategy behind one entrypoint — `startFrameSource` — which
7
+ * returns a stop function. The target canvas is passed in so the caller
8
+ * (vision/index.ts) can hand the same canvas to `canvas.captureStream()`.
9
+ *
10
+ * Two built-in sources:
11
+ *
12
+ * 1. `{ type: "wireframe", rate, features: { includeImages } }`
13
+ * — Runs the Wireframe library against `document.body` at `rate` Hz.
14
+ * This is the default, tuned for low bandwidth + agent-readable
15
+ * structure. `includeImages: true` fetches cover photos/avatars/
16
+ * thumbnails via CORS and draws real pixels; otherwise the agent
17
+ * gets labelled placeholder boxes.
18
+ *
19
+ * 2. `{ type: "fn", rate, captureFn }`
20
+ * — The user supplies a function that returns a canvas or ImageBitmap
21
+ * on each tick. Useful for custom sources the DOM walker can't
22
+ * introspect: `<video>` elements, WebGL/3D scenes, off-screen canvases.
23
+ *
24
+ * Adding a new source (e.g. `{ type: "video" }`) is a matter of:
25
+ * - adding a variant in `types.ts`
26
+ * - adding a case here.
27
+ */
28
+ import type { FrameSource } from "../types";
29
+ /**
30
+ * Start capturing frames from `source` into `canvas`. Returns a stop function
31
+ * that halts the loop. Errors inside a single tick are logged and skipped —
32
+ * one bad frame shouldn't kill vision for the rest of the session.
33
+ */
34
+ export declare function startFrameSource(source: FrameSource, canvas: HTMLCanvasElement): () => void;
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Vision entry point.
3
+ *
4
+ * `startVision` owns the full lifecycle of "what the agent sees":
5
+ *
6
+ * 1. create a capture canvas sized to the viewport
7
+ * 2. start the configured frame source, drawing into that canvas
8
+ * 3. publish the canvas as a LiveKit screenshare video track
9
+ *
10
+ * Returns both the canvas (so the session can hand it to the debug panel,
11
+ * which just renders the exact pixels we publish) and a combined async stop
12
+ * function. Everything is off by default — callers pass `vision: { enabled:
13
+ * true }` in `Sable.start({ ... })` to opt in.
14
+ */
15
+ import type { VisionOptions } from "../types";
16
+ import { type LiveKitPublishLib, type PublishCapableRoom } from "./publisher";
17
+ export type { LiveKitPublishLib, PublishCapableRoom } from "./publisher";
18
+ export interface StartVisionArgs {
19
+ room: PublishCapableRoom;
20
+ lib: LiveKitPublishLib;
21
+ options: VisionOptions;
22
+ }
23
+ export interface VisionHandle {
24
+ /** The canvas being published. Useful for the debug panel. */
25
+ canvas: HTMLCanvasElement;
26
+ /** Stop the frame loop and unpublish the track. */
27
+ stop: () => Promise<void>;
28
+ }
29
+ export declare function startVision(args: StartVisionArgs): Promise<VisionHandle>;
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Publish a canvas as a LiveKit screenshare video track.
3
+ *
4
+ * Vision is delivered as a regular LiveKit video track rather than raw
5
+ * bytes: we draw each frame into a persistent canvas (see `frame-source.ts`)
6
+ * and hand that canvas to `canvas.captureStream(fps)`. The resulting
7
+ * MediaStreamTrack gets wrapped in a LocalVideoTrack and published as
8
+ * `Track.Source.ScreenShare`, so the agent subscribes to it the same way
9
+ * it would subscribe to any screenshare — no custom byte-stream handler,
10
+ * no per-frame PNG decode, and the codec delta-compresses mostly-static
11
+ * pages so bandwidth stays low.
12
+ *
13
+ * The fps passed to `captureStream` should match the frame source's render
14
+ * rate; a mismatch either drops frames (encoder faster than source) or
15
+ * wastes bandwidth (encoder slower than source). `vision/index.ts` owns
16
+ * the pairing.
17
+ */
18
+ interface LocalTrackPublication {
19
+ trackSid?: string;
20
+ }
21
+ export interface PublishCapableRoom {
22
+ localParticipant: {
23
+ publishTrack(track: unknown, options?: {
24
+ source?: unknown;
25
+ name?: string;
26
+ }): Promise<LocalTrackPublication>;
27
+ unpublishTrack(track: unknown, stopOnUnpublish?: boolean): Promise<unknown>;
28
+ };
29
+ }
30
+ export interface LiveKitPublishLib {
31
+ LocalVideoTrack: new (mediaStreamTrack: MediaStreamTrack, constraints?: unknown, userProvidedTrack?: boolean) => unknown;
32
+ Track: {
33
+ Source: {
34
+ ScreenShare: unknown;
35
+ };
36
+ };
37
+ }
38
+ /**
39
+ * Publish `canvas` as a screenshare track at `fps` frames per second.
40
+ * Returns an async teardown that unpublishes the track and stops the
41
+ * underlying MediaStreamTrack.
42
+ */
43
+ export declare function publishCanvasAsVideoTrack(room: PublishCapableRoom, lib: LiveKitPublishLib, canvas: HTMLCanvasElement, fps: number): Promise<() => Promise<void>>;
44
+ export {};
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Lazy bootstrap for the Wireframe class.
3
+ *
4
+ * `wireframe.js` is shipped as a text asset (see `assets/wireframe.js.txt`)
5
+ * and eval'd once at first use. Evaluating inside an IIFE with a shadowed
6
+ * `console` keeps the library's per-capture "[wireframe] drew N elements"
7
+ * log out of the host page's devtools — at 1 fps it would flood the console.
8
+ * `.warn`/`.error` still go through so real problems surface.
9
+ *
10
+ * Used by:
11
+ * - `vision/frame-source.ts` — to render the wireframe canvas at `rate` Hz
12
+ * - `browser-bridge/dom-state.ts` — to produce the `screenshot_jpeg_b64`
13
+ * field returned by `browser.get_dom_state`
14
+ */
15
+ export type WireframeInstance = {
16
+ toDataURL(): Promise<string>;
17
+ capture: () => Promise<{
18
+ canvas: HTMLCanvasElement;
19
+ }>;
20
+ };
21
+ export type WireframeCtor = new (root?: Element, opts?: Record<string, unknown>) => WireframeInstance;
22
+ export declare function getWireframeCtor(): WireframeCtor;