@sable-ai/sdk-core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -0
- package/dist/esm/index.js +2431 -0
- package/dist/sable.iife.js +1486 -0
- package/dist/types/browser-bridge/actions.d.ts +27 -0
- package/dist/types/browser-bridge/dom-state.d.ts +37 -0
- package/dist/types/browser-bridge/index.d.ts +19 -0
- package/dist/types/connection/index.d.ts +26 -0
- package/dist/types/events/index.d.ts +15 -0
- package/dist/types/global.d.ts +26 -0
- package/dist/types/index.d.ts +23 -0
- package/dist/types/rpc.d.ts +22 -0
- package/dist/types/runtime/clipboard.d.ts +14 -0
- package/dist/types/runtime/index.d.ts +36 -0
- package/dist/types/runtime/video-overlay.d.ts +14 -0
- package/dist/types/session/debug-panel.d.ts +29 -0
- package/dist/types/session/index.d.ts +41 -0
- package/dist/types/types/index.d.ts +131 -0
- package/dist/types/version.d.ts +7 -0
- package/dist/types/vision/frame-source.d.ts +34 -0
- package/dist/types/vision/index.d.ts +29 -0
- package/dist/types/vision/publisher.d.ts +44 -0
- package/dist/types/vision/wireframe.d.ts +22 -0
- package/package.json +61 -0
- package/src/assets/visible-dom.js.txt +764 -0
- package/src/assets/wireframe.js.txt +678 -0
- package/src/assets.d.ts +24 -0
- package/src/browser-bridge/actions.ts +161 -0
- package/src/browser-bridge/dom-state.ts +103 -0
- package/src/browser-bridge/index.ts +99 -0
- package/src/connection/index.ts +49 -0
- package/src/events/index.ts +50 -0
- package/src/global.ts +35 -0
- package/src/index.test.ts +6 -0
- package/src/index.ts +43 -0
- package/src/rpc.ts +31 -0
- package/src/runtime/clipboard.ts +47 -0
- package/src/runtime/index.ts +138 -0
- package/src/runtime/video-overlay.ts +94 -0
- package/src/session/debug-panel.ts +254 -0
- package/src/session/index.ts +375 -0
- package/src/types/index.ts +176 -0
- package/src/version.ts +8 -0
- package/src/vision/frame-source.ts +111 -0
- package/src/vision/index.ts +70 -0
- package/src/vision/publisher.ts +106 -0
- package/src/vision/wireframe.ts +43 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Action dispatcher for `browser.execute_action`.
|
|
3
|
+
*
|
|
4
|
+
* The canonical wire contract lives in the Python bridge
|
|
5
|
+
* (`sable_agentkit/components/browser/bridges/wire.py`). The `kind` tag and
|
|
6
|
+
* payload shape of each variant must stay in lock-step with it — if a new
|
|
7
|
+
* action lands on the Python side, mirror it here.
|
|
8
|
+
*
|
|
9
|
+
* Target resolution: actions can target an element either by CSS selector
|
|
10
|
+
* string or by `{ x, y }` coordinates (for vision-driven clicks where the
|
|
11
|
+
* agent only knows pixel positions). See `resolveTarget`.
|
|
12
|
+
*/
|
|
13
|
+
export interface ActionEnvelope {
|
|
14
|
+
kind: string;
|
|
15
|
+
payload?: unknown;
|
|
16
|
+
button?: string;
|
|
17
|
+
key?: string;
|
|
18
|
+
text?: string;
|
|
19
|
+
delay?: number;
|
|
20
|
+
replace?: boolean;
|
|
21
|
+
url?: string;
|
|
22
|
+
expression?: string;
|
|
23
|
+
start?: unknown;
|
|
24
|
+
end?: unknown;
|
|
25
|
+
steps?: number;
|
|
26
|
+
}
|
|
27
|
+
export declare function dispatchAction(action: ActionEnvelope): Promise<void>;
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOM-state capture for `browser.get_dom_state`.
|
|
3
|
+
*
|
|
4
|
+
* The agent calls `browser.get_dom_state` when it needs a fresh snapshot of
|
|
5
|
+
* the page before deciding on the next action. The response carries three
|
|
6
|
+
* things:
|
|
7
|
+
*
|
|
8
|
+
* - `screenshot_jpeg_b64` — a wireframe-rendered image of `document.body`
|
|
9
|
+
* (the field name is historical; the bytes are PNG — Northstar treats
|
|
10
|
+
* it as an opaque image and doesn't enforce the codec)
|
|
11
|
+
* - `elements` — the visible-element list produced by `visible-dom.js`,
|
|
12
|
+
* an agent-friendly structured summary of the interactive DOM
|
|
13
|
+
* - `viewport` + `url` — so the agent can reason about pixel coordinates
|
|
14
|
+
* and the current page identity
|
|
15
|
+
*
|
|
16
|
+
* `visible-dom.js` is shipped as a text asset and eval'd once on first use.
|
|
17
|
+
* `settle()` is also here — it's a mutation-observer quiet-period wait used
|
|
18
|
+
* by the `browser.settle` RPC to let animations/transitions finish before
|
|
19
|
+
* the agent reads DOM state again.
|
|
20
|
+
*/
|
|
21
|
+
export interface DomStateResponse {
|
|
22
|
+
screenshot_jpeg_b64: string;
|
|
23
|
+
elements: unknown;
|
|
24
|
+
viewport: {
|
|
25
|
+
width: number;
|
|
26
|
+
height: number;
|
|
27
|
+
};
|
|
28
|
+
url: string;
|
|
29
|
+
}
|
|
30
|
+
export declare function captureDomState(): Promise<DomStateResponse>;
|
|
31
|
+
/**
|
|
32
|
+
* Mutation-observer quiet-period wait. Mirrors `visible_dom.py`'s settle —
|
|
33
|
+
* return as soon as the DOM has been quiet for `QUIET_MS`, or after
|
|
34
|
+
* `MAX_MS`, whichever comes first. Bookended by two double-rAFs so any
|
|
35
|
+
* in-flight layout/paint work gets flushed before and after the wait.
|
|
36
|
+
*/
|
|
37
|
+
export declare function settle(): Promise<void>;
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SDK side of the Sable browser bridge.
|
|
3
|
+
*
|
|
4
|
+
* Registers six LiveKit RPC handlers that the agent's UserBrowserBridge
|
|
5
|
+
* (`sable-agentkit/components/browser/bridges/user.py`) calls into:
|
|
6
|
+
*
|
|
7
|
+
* browser.execute_action → dispatches an Action variant against the page
|
|
8
|
+
* browser.get_dom_state → wireframe screenshot + visible-element list
|
|
9
|
+
* browser.get_url → window.location.href
|
|
10
|
+
* browser.get_viewport → window.innerWidth/innerHeight
|
|
11
|
+
* browser.verify_selector → !!document.querySelector(selector)
|
|
12
|
+
* browser.settle → mutation-observer quiet-period wait
|
|
13
|
+
*
|
|
14
|
+
* The wire contract is the canonical Python implementation in
|
|
15
|
+
* `sable_agentkit/components/browser/bridges/wire.py` — every field shape
|
|
16
|
+
* and Action `kind` tag must match it exactly.
|
|
17
|
+
*/
|
|
18
|
+
import type { RpcRoom } from "../rpc";
|
|
19
|
+
export declare function registerBrowserHandlers(room: RpcRoom): void;
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sable-api `/connection-details` fetch.
|
|
3
|
+
*
|
|
4
|
+
* Isolated in its own file because this is the ONE thing that changes when
|
|
5
|
+
* the backend flips from raw agent IDs (`?agentPublicId=...`) to publishable
|
|
6
|
+
* keys (`?publicKey=pk_live_...` + per-agent allowed-domains origin check).
|
|
7
|
+
* When that lands, the diff is contained to this file.
|
|
8
|
+
*
|
|
9
|
+
* Until then: we accept `publicKey` from the customer and pass it through as
|
|
10
|
+
* `?agentPublicId=...` on the wire, which keeps the current sable-api
|
|
11
|
+
* contract working while the public-facing option name is already the one
|
|
12
|
+
* we want long-term.
|
|
13
|
+
*/
|
|
14
|
+
export declare const DEFAULT_API_URL = "https://sable-api-gateway-9dfmhij9.wl.gateway.dev";
|
|
15
|
+
export interface ConnectionDetails {
|
|
16
|
+
serverUrl: string;
|
|
17
|
+
roomName: string;
|
|
18
|
+
participantToken: string;
|
|
19
|
+
participantName: string;
|
|
20
|
+
}
|
|
21
|
+
export interface FetchConnectionDetailsInput {
|
|
22
|
+
apiUrl: string;
|
|
23
|
+
/** Either a `pk_live_...` publishable key or a raw `agt_...` agent ID. */
|
|
24
|
+
publicKey: string;
|
|
25
|
+
}
|
|
26
|
+
export declare function fetchConnectionDetails(input: FetchConnectionDetailsInput): Promise<ConnectionDetails>;
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Typed event emitter for the SDK's public event surface.
|
|
3
|
+
*
|
|
4
|
+
* Intentionally tiny — one map, one loop, no dependency on any EventTarget
|
|
5
|
+
* polyfill. We keep fire-and-forget semantics: handler exceptions are caught
|
|
6
|
+
* and logged so one misbehaving subscriber can't break the session.
|
|
7
|
+
*/
|
|
8
|
+
import type { SableEventHandler, SableEvents } from "../types";
|
|
9
|
+
export declare class SableEventEmitter {
|
|
10
|
+
private readonly listeners;
|
|
11
|
+
on<E extends keyof SableEvents>(event: E, handler: SableEventHandler<E>): () => void;
|
|
12
|
+
emit<E extends keyof SableEvents>(event: E, payload: SableEvents[E]): void;
|
|
13
|
+
/** Drop every handler. Called on session teardown to avoid leaks. */
|
|
14
|
+
clear(): void;
|
|
15
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `window.Sable` installer.
|
|
3
|
+
*
|
|
4
|
+
* The SDK ships in two distribution formats that MUST present the same
|
|
5
|
+
* runtime singleton:
|
|
6
|
+
*
|
|
7
|
+
* 1. IIFE bundle (`<script src="https://sdk.withsable.com/v1/sable.js">`)
|
|
8
|
+
* — auto-installs `window.Sable` on load.
|
|
9
|
+
*
|
|
10
|
+
* 2. npm ESM (`import Sable from "@sable-ai/sdk-core"`) — the `index.ts`
|
|
11
|
+
* barrel calls `installGlobal()` in addition to exporting `Sable` as
|
|
12
|
+
* its default export. If the customer also loaded the IIFE in the
|
|
13
|
+
* same page, the second install is a no-op (first-write-wins), so
|
|
14
|
+
* framework apps and script-tag users see the exact same session
|
|
15
|
+
* object. This is the Stripe/Intercom pattern: one global, multiple
|
|
16
|
+
* ways to reach it.
|
|
17
|
+
*/
|
|
18
|
+
import type { SableAPI } from "./types";
|
|
19
|
+
/** The process-wide Sable singleton. Used by both `index.ts` and `installGlobal`. */
|
|
20
|
+
export declare const Sable: SableAPI;
|
|
21
|
+
/**
|
|
22
|
+
* Attach `Sable` to `window.Sable`, unless something already claimed that
|
|
23
|
+
* slot. First-write-wins so mixed script-tag + npm usage doesn't swap the
|
|
24
|
+
* singleton mid-session.
|
|
25
|
+
*/
|
|
26
|
+
export declare function installGlobal(): void;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @sable-ai/sdk-core — public entry point.
|
|
3
|
+
*
|
|
4
|
+
* Two ways to use this package, both backed by the same singleton:
|
|
5
|
+
*
|
|
6
|
+
* 1. Script tag (IIFE bundle):
|
|
7
|
+
* `<script src="https://sdk.withsable.com/v1/sable.js"></script>`
|
|
8
|
+
* — auto-installs `window.Sable`, good for no-build sites and the
|
|
9
|
+
* Chrome extension's inject script.
|
|
10
|
+
*
|
|
11
|
+
* 2. npm package (ESM):
|
|
12
|
+
* `import Sable from "@sable-ai/sdk-core"`
|
|
13
|
+
* — good for framework apps. Importing also installs `window.Sable`
|
|
14
|
+
* so mixed usage (one page, two entry points) stays coherent.
|
|
15
|
+
*
|
|
16
|
+
* This file is a barrel: all real code lives in sibling folders. Keep it
|
|
17
|
+
* that way — the build output is what customers see, and a lean entry
|
|
18
|
+
* module minimises tree-shake surprises.
|
|
19
|
+
*/
|
|
20
|
+
import { Sable } from "./global";
|
|
21
|
+
export { VERSION } from "./version";
|
|
22
|
+
export type { SableAPI, SableEvents, SableEventHandler, StartOptions, VisionOptions, FrameSource, WireframeFrameSource, FnFrameSource, RuntimeMethod, RuntimeMethods, } from "./types";
|
|
23
|
+
export default Sable;
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared LiveKit RPC primitives.
|
|
3
|
+
*
|
|
4
|
+
* Both `runtime/` (agent → page method calls) and `browser-bridge/` (agent
|
|
5
|
+
* driving the user's browser) register handlers on the room via
|
|
6
|
+
* `registerRpcMethod`. They don't need the full LiveKit `Room` type — just the
|
|
7
|
+
* single method — so we describe the minimum shape here. This keeps the heavy
|
|
8
|
+
* `livekit-client` import dynamic and out of the IIFE entry bundle.
|
|
9
|
+
*/
|
|
10
|
+
export interface RpcRoom {
|
|
11
|
+
registerRpcMethod(method: string, handler: (data: {
|
|
12
|
+
payload: string;
|
|
13
|
+
}) => Promise<string>): void;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Parse an RPC payload string into a plain object. RPC payloads are JSON but
|
|
17
|
+
* we don't want a single malformed call from the agent to throw inside a
|
|
18
|
+
* handler — LiveKit RPC propagates exceptions back to the caller and the
|
|
19
|
+
* agent's tool use logic treats that as a hard error that can derail the
|
|
20
|
+
* conversation. Soft-failing to `{}` lets the handler decide what to do.
|
|
21
|
+
*/
|
|
22
|
+
export declare function safeParse(payload: string): Record<string, unknown>;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Clipboard runtime methods.
|
|
3
|
+
*
|
|
4
|
+
* `sendToolMessage` and its legacy alias `sendCopyableText` carry text the
|
|
5
|
+
* user is supposed to act on (URLs, code snippets, prompts). Parley renders
|
|
6
|
+
* them as chat bubbles; the standalone SDK has no chat surface, so we copy
|
|
7
|
+
* to the clipboard so the user can ⌘V into whatever the agent is guiding
|
|
8
|
+
* them through. URL wins over message when both are present — agents put
|
|
9
|
+
* explanatory text in `message` and the actual thing-to-copy in `url`.
|
|
10
|
+
*/
|
|
11
|
+
export declare function handleCopyable(rpcName: string, payload: Record<string, unknown>): Promise<{
|
|
12
|
+
success: boolean;
|
|
13
|
+
error?: string;
|
|
14
|
+
}>;
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Runtime: the set of methods the agent can RPC into the page.
|
|
3
|
+
*
|
|
4
|
+
* Historically these were called "UI stubs" because they originated as
|
|
5
|
+
* no-op placeholders for methods parley implements in its call overlay.
|
|
6
|
+
* That framing no longer fits: half of them now do real work, and the
|
|
7
|
+
* public API lets customers replace any of them AND add new ones through
|
|
8
|
+
* the same `Sable.start({ runtime })` surface.
|
|
9
|
+
*
|
|
10
|
+
* The shape:
|
|
11
|
+
*
|
|
12
|
+
* 1. `DEFAULT_RUNTIME` — built-in implementations shipped with the SDK.
|
|
13
|
+
* A few do real work (clipboard copy, video overlay); the rest are
|
|
14
|
+
* no-ops for methods that only make sense in a host-app call UI.
|
|
15
|
+
*
|
|
16
|
+
* 2. `installRuntime(room, userRuntime)` — merges `userRuntime` over the
|
|
17
|
+
* defaults and registers every entry as a LiveKit RPC handler on
|
|
18
|
+
* `room`. Agent RPC calls → run the matching method → return a
|
|
19
|
+
* JSON-encoded result.
|
|
20
|
+
*
|
|
21
|
+
* Customers extend the runtime by passing new keys in `userRuntime`:
|
|
22
|
+
* anything you put in becomes callable by the agent as-is. This means
|
|
23
|
+
* the same surface handles both "override a built-in" and "expose a
|
|
24
|
+
* business-logic tool" — one concept, not two.
|
|
25
|
+
*/
|
|
26
|
+
import { type RpcRoom } from "../rpc";
|
|
27
|
+
import type { RuntimeMethods } from "../types";
|
|
28
|
+
export declare const DEFAULT_RUNTIME: RuntimeMethods;
|
|
29
|
+
/**
|
|
30
|
+
* Merge the user-provided runtime over `DEFAULT_RUNTIME` and register every
|
|
31
|
+
* entry as a LiveKit RPC handler on `room`. Later keys win — so passing
|
|
32
|
+
* `{ switchView: myImpl }` replaces the default video-overlay behaviour,
|
|
33
|
+
* while passing `{ activateTrial: ... }` exposes a new method the agent
|
|
34
|
+
* can call without touching the built-ins.
|
|
35
|
+
*/
|
|
36
|
+
export declare function installRuntime(room: RpcRoom, userRuntime?: RuntimeMethods): void;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Default `switchView({ mode: "video", url })` implementation.
|
|
3
|
+
*
|
|
4
|
+
* Mounts a centred floating video clip in the page. Host apps with their own
|
|
5
|
+
* call UI would override `switchView` in `runtime` to render into their own
|
|
6
|
+
* surface; the standalone SDK uses this simple overlay as the built-in
|
|
7
|
+
* default so agents that call `switchView` Just Work out of the box.
|
|
8
|
+
*
|
|
9
|
+
* Module-level state (`activeViewOverlay`) keeps at most one overlay mounted
|
|
10
|
+
* — calling `mountVideoOverlay` while another is showing tears the old one
|
|
11
|
+
* down first.
|
|
12
|
+
*/
|
|
13
|
+
export declare function removeViewOverlay(): void;
|
|
14
|
+
export declare function mountVideoOverlay(url: string): void;
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Floating debug panel — "what the agent sees".
|
|
3
|
+
*
|
|
4
|
+
* When debug is on, we mount the vision capture canvas as a draggable
|
|
5
|
+
* preview in the host page. The panel renders the *exact* pixels that get
|
|
6
|
+
* encoded into the LiveKit video track, so "what you see in the panel" is
|
|
7
|
+
* literally "what the agent sees". Position + minimized state persist in
|
|
8
|
+
* `localStorage` so customers don't have to re-place the panel every
|
|
9
|
+
* reload.
|
|
10
|
+
*
|
|
11
|
+
* Opt-in signals (any of these enables the panel):
|
|
12
|
+
* - `Sable.start({ debug: true })`
|
|
13
|
+
* - `?sable-debug=1` anywhere in the page URL
|
|
14
|
+
* - `localStorage.setItem('sable:debug', '1')`
|
|
15
|
+
*/
|
|
16
|
+
/**
|
|
17
|
+
* Opt-in check: does ANY of the debug signals say we should show the panel?
|
|
18
|
+
* Called by the session before deciding whether to mount.
|
|
19
|
+
*/
|
|
20
|
+
export declare function shouldShowDebugPanel(debugOpt: boolean | undefined): boolean;
|
|
21
|
+
/**
|
|
22
|
+
* Mount `canvas` as a floating preview in the page. Returns a teardown that
|
|
23
|
+
* removes the wrapper and detaches listeners.
|
|
24
|
+
*
|
|
25
|
+
* The wrapper is pointer-events:none by default; only the header bar and
|
|
26
|
+
* its minimize button re-enable pointer events, so the panel never blocks
|
|
27
|
+
* clicks on the underlying page.
|
|
28
|
+
*/
|
|
29
|
+
export declare function mountDebugPanel(canvas: HTMLCanvasElement): () => void;
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session lifecycle.
|
|
3
|
+
*
|
|
4
|
+
* `Session` is the glue layer: it fetches connection details, dynamically
|
|
5
|
+
* imports `livekit-client`, connects, publishes the mic, registers the
|
|
6
|
+
* runtime + browser-bridge RPCs, and — if vision is enabled — starts the
|
|
7
|
+
* frame source + video publisher and mounts the debug panel. `start()`
|
|
8
|
+
* returns once the room is live and mic is publishing; events are emitted
|
|
9
|
+
* via the `SableEventEmitter`.
|
|
10
|
+
*
|
|
11
|
+
* Only one session is allowed at a time. `start()` throws if a session is
|
|
12
|
+
* already active; callers must `stop()` first. `stop()` is idempotent.
|
|
13
|
+
*
|
|
14
|
+
* `livekit-client` is imported dynamically (not statically) so the IIFE
|
|
15
|
+
* entry bundle stays small — the heavy client only loads when a customer
|
|
16
|
+
* actually calls `start()`.
|
|
17
|
+
*/
|
|
18
|
+
import type { SableAPI, SableEventHandler, SableEvents, StartOptions } from "../types";
|
|
19
|
+
/**
|
|
20
|
+
* One active session at a time. The class is internal — customers interact
|
|
21
|
+
* with the `SableAPI` singleton installed on `window.Sable` (see `global.ts`).
|
|
22
|
+
* Keeping a class here (rather than a bag of module-level vars) makes the
|
|
23
|
+
* state ownership explicit and the teardown path easier to reason about.
|
|
24
|
+
*/
|
|
25
|
+
export declare class Session implements SableAPI {
|
|
26
|
+
readonly version = "0.1.0";
|
|
27
|
+
private readonly emitter;
|
|
28
|
+
private activeRoom;
|
|
29
|
+
private visionHandle;
|
|
30
|
+
private unmountDebugPanel;
|
|
31
|
+
on<E extends keyof SableEvents>(event: E, handler: SableEventHandler<E>): () => void;
|
|
32
|
+
start(opts: StartOptions): Promise<void>;
|
|
33
|
+
stop(): Promise<void>;
|
|
34
|
+
/**
|
|
35
|
+
* Subscribe to LiveKit room events and translate the interesting ones into
|
|
36
|
+
* `SableEvents`. Keeps the Session → customer event surface decoupled from
|
|
37
|
+
* the LiveKit event names so we can swap the transport later without
|
|
38
|
+
* breaking subscribers.
|
|
39
|
+
*/
|
|
40
|
+
private wireRoomEvents;
|
|
41
|
+
}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public type surface for @sable-ai/sdk-core.
|
|
3
|
+
*
|
|
4
|
+
* Everything a consumer can touch lives here. Internal types stay in their
|
|
5
|
+
* respective modules so we never accidentally ship them in the published
|
|
6
|
+
* `.d.ts` bundle.
|
|
7
|
+
*/
|
|
8
|
+
export interface WireframeFrameSource {
|
|
9
|
+
type: "wireframe";
|
|
10
|
+
/** Capture rate in frames per second. Default: 2. */
|
|
11
|
+
rate?: number;
|
|
12
|
+
features?: {
|
|
13
|
+
/**
|
|
14
|
+
* Include rendered images in the wireframe (instead of placeholder boxes).
|
|
15
|
+
* Slightly higher CPU + bandwidth. Default: false.
|
|
16
|
+
*/
|
|
17
|
+
includeImages?: boolean;
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
export interface FnFrameSource {
|
|
21
|
+
type: "fn";
|
|
22
|
+
/** Capture rate in frames per second. Default: 2. */
|
|
23
|
+
rate?: number;
|
|
24
|
+
/**
|
|
25
|
+
* Called at `rate` Hz. Return an `HTMLCanvasElement` or `ImageBitmap` that
|
|
26
|
+
* the SDK will publish to the agent as a video track. Useful for feeding
|
|
27
|
+
* custom sources like a 3D scene, a `<video>` element, or a WebGL surface
|
|
28
|
+
* that the DOM walker can't introspect.
|
|
29
|
+
*/
|
|
30
|
+
captureFn: () => HTMLCanvasElement | ImageBitmap;
|
|
31
|
+
}
|
|
32
|
+
export type FrameSource = WireframeFrameSource | FnFrameSource;
|
|
33
|
+
export interface VisionOptions {
|
|
34
|
+
/**
|
|
35
|
+
* Whether to publish a video track of the page to the agent. Default: false.
|
|
36
|
+
* Turn this on for agents that should be able to *see* the user's screen
|
|
37
|
+
* in addition to hearing them.
|
|
38
|
+
*/
|
|
39
|
+
enabled?: boolean;
|
|
40
|
+
/**
|
|
41
|
+
* Where video frames come from. Defaults to the built-in wireframe renderer
|
|
42
|
+
* at 2 fps with images disabled.
|
|
43
|
+
*/
|
|
44
|
+
frameSource?: FrameSource;
|
|
45
|
+
}
|
|
46
|
+
export type RuntimeMethod = (payload: Record<string, unknown>) => unknown | Promise<unknown>;
|
|
47
|
+
/**
|
|
48
|
+
* Map of method name → handler. Used both for the user-provided overrides
|
|
49
|
+
* passed to `Sable.start({ runtime })` and for the SDK's internal defaults.
|
|
50
|
+
*/
|
|
51
|
+
export interface RuntimeMethods {
|
|
52
|
+
[method: string]: RuntimeMethod;
|
|
53
|
+
}
|
|
54
|
+
export interface StartOptions {
|
|
55
|
+
/**
|
|
56
|
+
* Publishable key for the agent (from platform.withsable.com → your agent
|
|
57
|
+
* → Web SDK → Public key). Safe to ship in client-side code — the security
|
|
58
|
+
* boundary is the allowed-domains list configured alongside the key.
|
|
59
|
+
*
|
|
60
|
+
* During beta, raw agent IDs (e.g. `agt_...`) are accepted here too.
|
|
61
|
+
*/
|
|
62
|
+
publicKey?: string;
|
|
63
|
+
/**
|
|
64
|
+
* @deprecated Use `publicKey` instead. Accepted as an alias during beta and
|
|
65
|
+
* will be removed before 1.0. If both are set, `publicKey` wins.
|
|
66
|
+
*/
|
|
67
|
+
agentPublicId?: string;
|
|
68
|
+
/**
|
|
69
|
+
* What the agent can see. Off by default — opt in for vision-enabled agents.
|
|
70
|
+
*/
|
|
71
|
+
vision?: VisionOptions;
|
|
72
|
+
/**
|
|
73
|
+
* Overrides + extensions for methods the agent can RPC into the page.
|
|
74
|
+
* Unspecified methods fall back to the SDK's default implementations. New
|
|
75
|
+
* methods become callable by the agent as-is.
|
|
76
|
+
*/
|
|
77
|
+
runtime?: RuntimeMethods;
|
|
78
|
+
/**
|
|
79
|
+
* Arbitrary metadata forwarded to the agent at session start. Surfaces
|
|
80
|
+
* verbatim in the agent's initial prompt.
|
|
81
|
+
*/
|
|
82
|
+
context?: Record<string, unknown>;
|
|
83
|
+
/**
|
|
84
|
+
* Dev-only: mount a floating preview panel showing the exact wireframe
|
|
85
|
+
* canvas being published to the agent. Can also be enabled via
|
|
86
|
+
* `?sable-debug=1` or `localStorage["sable:debug"]="1"`.
|
|
87
|
+
*/
|
|
88
|
+
debug?: boolean;
|
|
89
|
+
/**
|
|
90
|
+
* Override the sable-api base URL. Dev/test only. Defaults to the
|
|
91
|
+
* production gateway.
|
|
92
|
+
* @internal
|
|
93
|
+
*/
|
|
94
|
+
apiUrl?: string;
|
|
95
|
+
}
|
|
96
|
+
export interface SableEvents {
|
|
97
|
+
/** Fired once the room is connected, mic is live, and handshake is done. */
|
|
98
|
+
"session:started": {
|
|
99
|
+
roomName: string;
|
|
100
|
+
participantName: string;
|
|
101
|
+
};
|
|
102
|
+
/** Fired once when the session ends for any reason. */
|
|
103
|
+
"session:ended": {
|
|
104
|
+
reason?: string;
|
|
105
|
+
};
|
|
106
|
+
/** Fired whenever the agent starts or stops speaking. */
|
|
107
|
+
"agent:speaking": boolean;
|
|
108
|
+
/** Fired whenever the local user starts or stops speaking. */
|
|
109
|
+
"user:speaking": boolean;
|
|
110
|
+
/** Fired for any non-fatal error during the session. */
|
|
111
|
+
error: Error;
|
|
112
|
+
}
|
|
113
|
+
export type SableEventHandler<E extends keyof SableEvents> = (payload: SableEvents[E]) => void;
|
|
114
|
+
export interface SableAPI {
|
|
115
|
+
/** SDK version string, matches the npm package version. */
|
|
116
|
+
version: string;
|
|
117
|
+
/** Start a voice (and optionally vision) session with the agent. */
|
|
118
|
+
start(opts: StartOptions): Promise<void>;
|
|
119
|
+
/** Tear down the active session. No-op if none. */
|
|
120
|
+
stop(): Promise<void>;
|
|
121
|
+
/**
|
|
122
|
+
* Subscribe to a session event. Returns an unsubscribe function. Fire-and-
|
|
123
|
+
* forget — the SDK does not care whether you subscribe.
|
|
124
|
+
*/
|
|
125
|
+
on<E extends keyof SableEvents>(event: E, handler: SableEventHandler<E>): () => void;
|
|
126
|
+
}
|
|
127
|
+
declare global {
|
|
128
|
+
interface Window {
|
|
129
|
+
Sable?: SableAPI;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FrameSource dispatcher.
|
|
3
|
+
*
|
|
4
|
+
* The public API lets customers choose what the agent sees via
|
|
5
|
+
* `vision: { frameSource: { type: "wireframe" | "fn", ... } }`. This module
|
|
6
|
+
* hides the strategy behind one entrypoint — `startFrameSource` — which
|
|
7
|
+
* returns a stop function. The target canvas is passed in so the caller
|
|
8
|
+
* (vision/index.ts) can hand the same canvas to `canvas.captureStream()`.
|
|
9
|
+
*
|
|
10
|
+
* Two built-in sources:
|
|
11
|
+
*
|
|
12
|
+
* 1. `{ type: "wireframe", rate, features: { includeImages } }`
|
|
13
|
+
* — Runs the Wireframe library against `document.body` at `rate` Hz.
|
|
14
|
+
* This is the default, tuned for low bandwidth + agent-readable
|
|
15
|
+
* structure. `includeImages: true` fetches cover photos/avatars/
|
|
16
|
+
* thumbnails via CORS and draws real pixels; otherwise the agent
|
|
17
|
+
* gets labelled placeholder boxes.
|
|
18
|
+
*
|
|
19
|
+
* 2. `{ type: "fn", rate, captureFn }`
|
|
20
|
+
* — The user supplies a function that returns a canvas or ImageBitmap
|
|
21
|
+
* on each tick. Useful for custom sources the DOM walker can't
|
|
22
|
+
* introspect: `<video>` elements, WebGL/3D scenes, off-screen canvases.
|
|
23
|
+
*
|
|
24
|
+
* Adding a new source (e.g. `{ type: "video" }`) is a matter of:
|
|
25
|
+
* - adding a variant in `types.ts`
|
|
26
|
+
* - adding a case here.
|
|
27
|
+
*/
|
|
28
|
+
import type { FrameSource } from "../types";
|
|
29
|
+
/**
|
|
30
|
+
* Start capturing frames from `source` into `canvas`. Returns a stop function
|
|
31
|
+
* that halts the loop. Errors inside a single tick are logged and skipped —
|
|
32
|
+
* one bad frame shouldn't kill vision for the rest of the session.
|
|
33
|
+
*/
|
|
34
|
+
export declare function startFrameSource(source: FrameSource, canvas: HTMLCanvasElement): () => void;
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vision entry point.
|
|
3
|
+
*
|
|
4
|
+
* `startVision` owns the full lifecycle of "what the agent sees":
|
|
5
|
+
*
|
|
6
|
+
* 1. create a capture canvas sized to the viewport
|
|
7
|
+
* 2. start the configured frame source, drawing into that canvas
|
|
8
|
+
* 3. publish the canvas as a LiveKit screenshare video track
|
|
9
|
+
*
|
|
10
|
+
* Returns both the canvas (so the session can hand it to the debug panel,
|
|
11
|
+
* which just renders the exact pixels we publish) and a combined async stop
|
|
12
|
+
* function. Everything is off by default — callers pass `vision: { enabled:
|
|
13
|
+
* true }` in `Sable.start({ ... })` to opt in.
|
|
14
|
+
*/
|
|
15
|
+
import type { VisionOptions } from "../types";
|
|
16
|
+
import { type LiveKitPublishLib, type PublishCapableRoom } from "./publisher";
|
|
17
|
+
export type { LiveKitPublishLib, PublishCapableRoom } from "./publisher";
|
|
18
|
+
export interface StartVisionArgs {
|
|
19
|
+
room: PublishCapableRoom;
|
|
20
|
+
lib: LiveKitPublishLib;
|
|
21
|
+
options: VisionOptions;
|
|
22
|
+
}
|
|
23
|
+
export interface VisionHandle {
|
|
24
|
+
/** The canvas being published. Useful for the debug panel. */
|
|
25
|
+
canvas: HTMLCanvasElement;
|
|
26
|
+
/** Stop the frame loop and unpublish the track. */
|
|
27
|
+
stop: () => Promise<void>;
|
|
28
|
+
}
|
|
29
|
+
export declare function startVision(args: StartVisionArgs): Promise<VisionHandle>;
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Publish a canvas as a LiveKit screenshare video track.
|
|
3
|
+
*
|
|
4
|
+
* Vision is delivered as a regular LiveKit video track rather than raw
|
|
5
|
+
* bytes: we draw each frame into a persistent canvas (see `frame-source.ts`)
|
|
6
|
+
* and hand that canvas to `canvas.captureStream(fps)`. The resulting
|
|
7
|
+
* MediaStreamTrack gets wrapped in a LocalVideoTrack and published as
|
|
8
|
+
* `Track.Source.ScreenShare`, so the agent subscribes to it the same way
|
|
9
|
+
* it would subscribe to any screenshare — no custom byte-stream handler,
|
|
10
|
+
* no per-frame PNG decode, and the codec delta-compresses mostly-static
|
|
11
|
+
* pages so bandwidth stays low.
|
|
12
|
+
*
|
|
13
|
+
* The fps passed to `captureStream` should match the frame source's render
|
|
14
|
+
* rate; a mismatch either drops frames (encoder faster than source) or
|
|
15
|
+
* wastes bandwidth (encoder slower than source). `vision/index.ts` owns
|
|
16
|
+
* the pairing.
|
|
17
|
+
*/
|
|
18
|
+
interface LocalTrackPublication {
|
|
19
|
+
trackSid?: string;
|
|
20
|
+
}
|
|
21
|
+
export interface PublishCapableRoom {
|
|
22
|
+
localParticipant: {
|
|
23
|
+
publishTrack(track: unknown, options?: {
|
|
24
|
+
source?: unknown;
|
|
25
|
+
name?: string;
|
|
26
|
+
}): Promise<LocalTrackPublication>;
|
|
27
|
+
unpublishTrack(track: unknown, stopOnUnpublish?: boolean): Promise<unknown>;
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
export interface LiveKitPublishLib {
|
|
31
|
+
LocalVideoTrack: new (mediaStreamTrack: MediaStreamTrack, constraints?: unknown, userProvidedTrack?: boolean) => unknown;
|
|
32
|
+
Track: {
|
|
33
|
+
Source: {
|
|
34
|
+
ScreenShare: unknown;
|
|
35
|
+
};
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Publish `canvas` as a screenshare track at `fps` frames per second.
|
|
40
|
+
* Returns an async teardown that unpublishes the track and stops the
|
|
41
|
+
* underlying MediaStreamTrack.
|
|
42
|
+
*/
|
|
43
|
+
export declare function publishCanvasAsVideoTrack(room: PublishCapableRoom, lib: LiveKitPublishLib, canvas: HTMLCanvasElement, fps: number): Promise<() => Promise<void>>;
|
|
44
|
+
export {};
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lazy bootstrap for the Wireframe class.
|
|
3
|
+
*
|
|
4
|
+
* `wireframe.js` is shipped as a text asset (see `assets/wireframe.js.txt`)
|
|
5
|
+
* and eval'd once at first use. Evaluating inside an IIFE with a shadowed
|
|
6
|
+
* `console` keeps the library's per-capture "[wireframe] drew N elements"
|
|
7
|
+
* log out of the host page's devtools — at 1 fps it would flood the console.
|
|
8
|
+
* `.warn`/`.error` still go through so real problems surface.
|
|
9
|
+
*
|
|
10
|
+
* Used by:
|
|
11
|
+
* - `vision/frame-source.ts` — to render the wireframe canvas at `rate` Hz
|
|
12
|
+
* - `browser-bridge/dom-state.ts` — to produce the `screenshot_jpeg_b64`
|
|
13
|
+
* field returned by `browser.get_dom_state`
|
|
14
|
+
*/
|
|
15
|
+
export type WireframeInstance = {
|
|
16
|
+
toDataURL(): Promise<string>;
|
|
17
|
+
capture: () => Promise<{
|
|
18
|
+
canvas: HTMLCanvasElement;
|
|
19
|
+
}>;
|
|
20
|
+
};
|
|
21
|
+
export type WireframeCtor = new (root?: Element, opts?: Record<string, unknown>) => WireframeInstance;
|
|
22
|
+
export declare function getWireframeCtor(): WireframeCtor;
|