@elizaos/plugin-computeruse 2.0.3-beta.2 → 2.0.3-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/actions/clipboard.d.ts +22 -0
- package/dist/actions/clipboard.d.ts.map +1 -0
- package/dist/actions/helpers.d.ts +33 -0
- package/dist/actions/helpers.d.ts.map +1 -0
- package/dist/actions/progress.d.ts +26 -0
- package/dist/actions/progress.d.ts.map +1 -0
- package/dist/actions/use-computer-agent.d.ts +113 -0
- package/dist/actions/use-computer-agent.d.ts.map +1 -0
- package/dist/actions/use-computer.d.ts +3 -0
- package/dist/actions/use-computer.d.ts.map +1 -0
- package/dist/actions/window-handlers.d.ts +11 -0
- package/dist/actions/window-handlers.d.ts.map +1 -0
- package/dist/actions/window.d.ts +11 -0
- package/dist/actions/window.d.ts.map +1 -0
- package/dist/actor/actor.d.ts +84 -0
- package/dist/actor/actor.d.ts.map +1 -0
- package/dist/actor/agent-callbacks.d.ts +128 -0
- package/dist/actor/agent-callbacks.d.ts.map +1 -0
- package/dist/actor/agent-loop.d.ts +134 -0
- package/dist/actor/agent-loop.d.ts.map +1 -0
- package/dist/actor/aosp-input-actor.d.ts +87 -0
- package/dist/actor/aosp-input-actor.d.ts.map +1 -0
- package/dist/actor/brain.d.ts +195 -0
- package/dist/actor/brain.d.ts.map +1 -0
- package/dist/actor/cascade.d.ts +92 -0
- package/dist/actor/cascade.d.ts.map +1 -0
- package/dist/actor/computer-interface.d.ts +276 -0
- package/dist/actor/computer-interface.d.ts.map +1 -0
- package/dist/actor/dispatch.d.ts +24 -0
- package/dist/actor/dispatch.d.ts.map +1 -0
- package/dist/actor/index.d.ts +12 -0
- package/dist/actor/index.d.ts.map +1 -0
- package/dist/actor/types.d.ts +94 -0
- package/dist/actor/types.d.ts.map +1 -0
- package/dist/approval-manager.d.ts +29 -0
- package/dist/approval-manager.d.ts.map +1 -0
- package/dist/index.d.ts +46 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +13649 -0
- package/dist/index.js.map +68 -0
- package/dist/mcp/index.d.ts +8 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/server.d.ts +42 -0
- package/dist/mcp/server.d.ts.map +1 -0
- package/dist/mcp/tools.d.ts +53 -0
- package/dist/mcp/tools.d.ts.map +1 -0
- package/dist/mobile/android-bridge.d.ts +263 -0
- package/dist/mobile/android-bridge.d.ts.map +1 -0
- package/dist/mobile/android-scene.d.ts +52 -0
- package/dist/mobile/android-scene.d.ts.map +1 -0
- package/dist/mobile/android-trajectory.d.ts +66 -0
- package/dist/mobile/android-trajectory.d.ts.map +1 -0
- package/dist/mobile/index.d.ts +19 -0
- package/dist/mobile/index.d.ts.map +1 -0
- package/dist/mobile/ios-app-intent-registry.d.ts +20 -0
- package/dist/mobile/ios-app-intent-registry.d.ts.map +1 -0
- package/dist/mobile/ios-bridge.d.ts +359 -0
- package/dist/mobile/ios-bridge.d.ts.map +1 -0
- package/dist/mobile/ios-computer-interface.d.ts +160 -0
- package/dist/mobile/ios-computer-interface.d.ts.map +1 -0
- package/dist/mobile/mobile-computer-interface.d.ts +142 -0
- package/dist/mobile/mobile-computer-interface.d.ts.map +1 -0
- package/dist/mobile/mobile-screen-capture.d.ts +64 -0
- package/dist/mobile/mobile-screen-capture.d.ts.map +1 -0
- package/dist/mobile/ocr-provider.d.ts +187 -0
- package/dist/mobile/ocr-provider.d.ts.map +1 -0
- package/dist/mobile/ocr-provider.js +111 -0
- package/dist/mobile/ocr-provider.js.map +10 -0
- package/dist/osworld/action-converter.d.ts +38 -0
- package/dist/osworld/action-converter.d.ts.map +1 -0
- package/dist/osworld/adapter.d.ts +79 -0
- package/dist/osworld/adapter.d.ts.map +1 -0
- package/dist/osworld/types.d.ts +69 -0
- package/dist/osworld/types.d.ts.map +1 -0
- package/dist/parity/index.d.ts +9 -0
- package/dist/parity/index.d.ts.map +1 -0
- package/dist/parity/parity-matrix.d.ts +82 -0
- package/dist/parity/parity-matrix.d.ts.map +1 -0
- package/dist/parity/screenspot.d.ts +56 -0
- package/dist/parity/screenspot.d.ts.map +1 -0
- package/dist/platform/a11y.d.ts +64 -0
- package/dist/platform/a11y.d.ts.map +1 -0
- package/dist/platform/browser.d.ts +61 -0
- package/dist/platform/browser.d.ts.map +1 -0
- package/dist/platform/capabilities.d.ts +33 -0
- package/dist/platform/capabilities.d.ts.map +1 -0
- package/dist/platform/capture.d.ts +65 -0
- package/dist/platform/capture.d.ts.map +1 -0
- package/dist/platform/clipboard.d.ts +24 -0
- package/dist/platform/clipboard.d.ts.map +1 -0
- package/dist/platform/coords.d.ts +73 -0
- package/dist/platform/coords.d.ts.map +1 -0
- package/dist/platform/desktop.d.ts +56 -0
- package/dist/platform/desktop.d.ts.map +1 -0
- package/dist/platform/displays.d.ts +97 -0
- package/dist/platform/displays.d.ts.map +1 -0
- package/dist/platform/driver.d.ts +49 -0
- package/dist/platform/driver.d.ts.map +1 -0
- package/dist/platform/file-ops.d.ts +27 -0
- package/dist/platform/file-ops.d.ts.map +1 -0
- package/dist/platform/helpers.d.ts +60 -0
- package/dist/platform/helpers.d.ts.map +1 -0
- package/dist/platform/launch.d.ts +54 -0
- package/dist/platform/launch.d.ts.map +1 -0
- package/dist/platform/normalized-coords.d.ts +46 -0
- package/dist/platform/normalized-coords.d.ts.map +1 -0
- package/dist/platform/nut-driver.d.ts +86 -0
- package/dist/platform/nut-driver.d.ts.map +1 -0
- package/dist/platform/permissions.d.ts +33 -0
- package/dist/platform/permissions.d.ts.map +1 -0
- package/dist/platform/process-list.d.ts +32 -0
- package/dist/platform/process-list.d.ts.map +1 -0
- package/dist/platform/ps-host.d.ts +77 -0
- package/dist/platform/ps-host.d.ts.map +1 -0
- package/dist/platform/screenshot-errors.d.ts +54 -0
- package/dist/platform/screenshot-errors.d.ts.map +1 -0
- package/dist/platform/screenshot-quality.d.ts +11 -0
- package/dist/platform/screenshot-quality.d.ts.map +1 -0
- package/dist/platform/screenshot.d.ts +16 -0
- package/dist/platform/screenshot.d.ts.map +1 -0
- package/dist/platform/security.d.ts +20 -0
- package/dist/platform/security.d.ts.map +1 -0
- package/dist/platform/terminal.d.ts +38 -0
- package/dist/platform/terminal.d.ts.map +1 -0
- package/dist/platform/wayland-portal.d.ts +25 -0
- package/dist/platform/wayland-portal.d.ts.map +1 -0
- package/dist/platform/windows-list.d.ts +78 -0
- package/dist/platform/windows-list.d.ts.map +1 -0
- package/dist/providers/computer-state.d.ts +9 -0
- package/dist/providers/computer-state.d.ts.map +1 -0
- package/dist/providers/scene.d.ts +21 -0
- package/dist/providers/scene.d.ts.map +1 -0
- package/dist/register-routes.d.ts +2 -0
- package/dist/register-routes.d.ts.map +1 -0
- package/dist/register-routes.js +13836 -0
- package/dist/register-routes.js.map +71 -0
- package/dist/routes/computer-use-compat-routes.d.ts +29 -0
- package/dist/routes/computer-use-compat-routes.d.ts.map +1 -0
- package/dist/routes/computer-use-routes.d.ts +3 -0
- package/dist/routes/computer-use-routes.d.ts.map +1 -0
- package/dist/routes/sandbox-routes.d.ts +53 -0
- package/dist/routes/sandbox-routes.d.ts.map +1 -0
- package/dist/sandbox/docker-backend.d.ts +69 -0
- package/dist/sandbox/docker-backend.d.ts.map +1 -0
- package/dist/sandbox/index.d.ts +62 -0
- package/dist/sandbox/index.d.ts.map +1 -0
- package/dist/sandbox/qemu-backend.d.ts +48 -0
- package/dist/sandbox/qemu-backend.d.ts.map +1 -0
- package/dist/sandbox/remote-guest.d.ts +72 -0
- package/dist/sandbox/remote-guest.d.ts.map +1 -0
- package/dist/sandbox/sandbox-driver.d.ts +41 -0
- package/dist/sandbox/sandbox-driver.d.ts.map +1 -0
- package/dist/sandbox/surface-types.d.ts +17 -0
- package/dist/sandbox/surface-types.d.ts.map +1 -0
- package/dist/sandbox/types.d.ts +138 -0
- package/dist/sandbox/types.d.ts.map +1 -0
- package/dist/sandbox/wsb-backend.d.ts +48 -0
- package/dist/sandbox/wsb-backend.d.ts.map +1 -0
- package/dist/scene/a11y-provider.d.ts +83 -0
- package/dist/scene/a11y-provider.d.ts.map +1 -0
- package/dist/scene/apps.d.ts +39 -0
- package/dist/scene/apps.d.ts.map +1 -0
- package/dist/scene/dhash.d.ts +105 -0
- package/dist/scene/dhash.d.ts.map +1 -0
- package/dist/scene/ocr-adapter.d.ts +64 -0
- package/dist/scene/ocr-adapter.d.ts.map +1 -0
- package/dist/scene/scene-builder.d.ts +107 -0
- package/dist/scene/scene-builder.d.ts.map +1 -0
- package/dist/scene/scene-types.d.ts +70 -0
- package/dist/scene/scene-types.d.ts.map +1 -0
- package/dist/scene/screen-state.d.ts +105 -0
- package/dist/scene/screen-state.d.ts.map +1 -0
- package/dist/scene/serialize.d.ts +28 -0
- package/dist/scene/serialize.d.ts.map +1 -0
- package/dist/security/browser-script-policy.d.ts +9 -0
- package/dist/security/browser-script-policy.d.ts.map +1 -0
- package/dist/services/computer-use-service.d.ts +142 -0
- package/dist/services/computer-use-service.d.ts.map +1 -0
- package/dist/services/desktop-control.d.ts +35 -0
- package/dist/services/desktop-control.d.ts.map +1 -0
- package/dist/services/index.d.ts +7 -0
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/vision-context-provider.d.ts +32 -0
- package/dist/services/vision-context-provider.d.ts.map +1 -0
- package/dist/types.d.ts +385 -0
- package/dist/types.d.ts.map +1 -0
- package/package.json +16 -5
- package/registry-entry.json +74 -0
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AccessibilityProvider — thin abstraction over platform-specific a11y trees.
|
|
3
|
+
*
|
|
4
|
+
* The scene-builder needs structured a11y nodes (role, label, bbox, actions)
|
|
5
|
+
* tagged with a stable id so the planner can say "click element a47" across
|
|
6
|
+
* turns. The existing `platform/a11y.ts::extractA11yTree()` returns a single
|
|
7
|
+
* flat string — useful for prompts but not structured. This module wraps it
|
|
8
|
+
* with a typed-node interface and adds:
|
|
9
|
+
*
|
|
10
|
+
* - Native impls per-OS that prefer structured JSON output where the
|
|
11
|
+
* platform supports it (AT-SPI emits structured data we just need to
|
|
12
|
+
* marshal; UIA/AX similarly).
|
|
13
|
+
* - A Wayland-compositor fallback (`hyprctl clients -j`, `swaymsg -t
|
|
14
|
+
* get_tree`) so Linux Wayland-only environments still surface windowable
|
|
15
|
+
* nodes even when AT-SPI is locked down.
|
|
16
|
+
* - A `NullAccessibilityProvider` for platforms / contexts where a11y is
|
|
17
|
+
* intentionally disabled (CI, headless runners).
|
|
18
|
+
*
|
|
19
|
+
* Stable id strategy:
|
|
20
|
+
* - Each provider emits `a<displayId>-<seq>` IDs. The same logical element
|
|
21
|
+
* keeps the same id across consecutive frames AS LONG AS the provider's
|
|
22
|
+
* in-memory map is preserved — we re-key when role + label + bbox
|
|
23
|
+
* intersect significantly with a previous frame's node. This is the
|
|
24
|
+
* contract WS7's "click element a47" depends on.
|
|
25
|
+
*
|
|
26
|
+
* The Android `AccessibilityService` impl is owned by WS8 and registers via
|
|
27
|
+
* `setAccessibilityProvider()` at runtime — this module exposes the seam
|
|
28
|
+
* but does not ship a JS-side implementation.
|
|
29
|
+
*/
|
|
30
|
+
import type { SceneAxNode } from "./scene-types.js";
|
|
31
|
+
export interface AccessibilityProvider {
|
|
32
|
+
readonly name: string;
|
|
33
|
+
/**
|
|
34
|
+
* Whether this provider can produce structured nodes on the current host.
|
|
35
|
+
* Used by `resolveAccessibilityProvider` to pick the best chain entry.
|
|
36
|
+
*/
|
|
37
|
+
available(): boolean;
|
|
38
|
+
/**
|
|
39
|
+
* Capture the live a11y tree and return per-display node lists. Returns
|
|
40
|
+
* an empty array when no nodes are reachable (vs throwing) so the
|
|
41
|
+
* scene-builder always produces a Scene.
|
|
42
|
+
*/
|
|
43
|
+
snapshot(): Promise<SceneAxNode[]>;
|
|
44
|
+
}
|
|
45
|
+
declare class NullAccessibilityProvider implements AccessibilityProvider {
|
|
46
|
+
readonly name = "null";
|
|
47
|
+
available(): boolean;
|
|
48
|
+
snapshot(): Promise<SceneAxNode[]>;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Replace the active provider (used by Android/WS8 to inject the native
|
|
52
|
+
* `AccessibilityService` adapter, and by tests).
|
|
53
|
+
*/
|
|
54
|
+
export declare function setAccessibilityProvider(provider: AccessibilityProvider): void;
|
|
55
|
+
export declare function resolveAccessibilityProvider(): AccessibilityProvider;
|
|
56
|
+
interface IdAssignerState {
|
|
57
|
+
seq: Map<number, number>;
|
|
58
|
+
}
|
|
59
|
+
export declare function makeIdAssigner(): IdAssignerState;
|
|
60
|
+
export declare function assignAxId(state: IdAssignerState, displayId: number): string;
|
|
61
|
+
export declare class LinuxAccessibilityProvider implements AccessibilityProvider {
|
|
62
|
+
readonly name = "linux";
|
|
63
|
+
available(): boolean;
|
|
64
|
+
snapshot(): Promise<SceneAxNode[]>;
|
|
65
|
+
private tryAtspi;
|
|
66
|
+
private tryWaylandCompositor;
|
|
67
|
+
private tryHyprland;
|
|
68
|
+
private trySway;
|
|
69
|
+
}
|
|
70
|
+
export declare function parseHyprlandClients(text: string): SceneAxNode[];
|
|
71
|
+
export declare function parseSwayTree(text: string): SceneAxNode[];
|
|
72
|
+
export declare class DarwinAccessibilityProvider implements AccessibilityProvider {
|
|
73
|
+
readonly name = "darwin";
|
|
74
|
+
available(): boolean;
|
|
75
|
+
snapshot(): Promise<SceneAxNode[]>;
|
|
76
|
+
}
|
|
77
|
+
export declare class WindowsAccessibilityProvider implements AccessibilityProvider {
|
|
78
|
+
readonly name = "win32";
|
|
79
|
+
available(): boolean;
|
|
80
|
+
snapshot(): Promise<SceneAxNode[]>;
|
|
81
|
+
}
|
|
82
|
+
export { NullAccessibilityProvider };
|
|
83
|
+
//# sourceMappingURL=a11y-provider.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"a11y-provider.d.ts","sourceRoot":"","sources":["../../src/scene/a11y-provider.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAIH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAEpD,MAAM,WAAW,qBAAqB;IACpC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB;;;OAGG;IACH,SAAS,IAAI,OAAO,CAAC;IACrB;;;;OAIG;IACH,QAAQ,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;CACpC;AAED,cAAM,yBAA0B,YAAW,qBAAqB;IAC9D,QAAQ,CAAC,IAAI,UAAU;IACvB,SAAS,IAAI,OAAO;IAGd,QAAQ,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;CAGzC;AAID;;;GAGG;AACH,wBAAgB,wBAAwB,CACtC,QAAQ,EAAE,qBAAqB,GAC9B,IAAI,CAEN;AAED,wBAAgB,4BAA4B,IAAI,qBAAqB,CAOpE;AAED,UAAU,eAAe;IACvB,GAAG,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC1B;AAED,wBAAgB,cAAc,IAAI,eAAe,CAEhD;AAED,wBAAgB,UAAU,CAAC,KAAK,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,CAK5E;AAID,qBAAa,0BAA2B,YAAW,qBAAqB;IACtE,QAAQ,CAAC,IAAI,WAAW;IAExB,SAAS,IAAI,OAAO;IAUd,QAAQ,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;IASxC,OAAO,CAAC,QAAQ;IA0DhB,OAAO,CAAC,oBAAoB;IAa5B,OAAO,CAAC,WAAW;IAanB,OAAO,CAAC,OAAO;CAYhB;AAaD,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,MAAM,GAAG,WAAW,EAAE,CAiChE;AAcD,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,WAAW,EAAE,CA4CzD;AAuBD,qBAAa,2BAA4B,YAAW,qBAAqB;IACvE,QAAQ,CAAC,IAAI,YAAY;IAEzB,SAAS,IAAI,OAAO;IAId,QAAQ,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;CAuDzC;AAID,qBAAa,4BAA6B,YAAW,qBAAqB;IACxE,QAAQ,CAAC,IAAI,WAAW;IAExB,SAAS,IAAI,OAAO;IAId,QAAQ,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;CAqDzC;AAED,OAAO,EAAE,yBAAyB,EAAE,CAAC"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* App enumeration — join `listProcesses()` with `listWindows()` to produce
|
|
3
|
+
* a per-pid `SceneApp[]` shape with embedded windows.
|
|
4
|
+
*
|
|
5
|
+
* This is intentionally a join over what we already have. The existing
|
|
6
|
+
* `windows-list.ts` doesn't surface pid on every OS:
|
|
7
|
+
* - Windows: `Get-Process` already keys by pid; `MainWindowTitle` lives on
|
|
8
|
+
* the same row. The join is implicit.
|
|
9
|
+
* - Linux : `wmctrl -l -p` gives pid per window. We re-invoke wmctrl with
|
|
10
|
+
* `-p` here when available; otherwise the per-pid map carries an
|
|
11
|
+
* empty `windows` list.
|
|
12
|
+
* - macOS : AppleScript gives `name of proc` but not the pid. We do a
|
|
13
|
+
* name-based join on `comm`. Close enough for "click in Safari"
|
|
14
|
+
* — the planner has app-name + window-title to disambiguate.
|
|
15
|
+
*
|
|
16
|
+
* Edge cases:
|
|
17
|
+
* - A process with no visible windows still appears in `apps[]` with an
|
|
18
|
+
* empty `windows` list. The planner uses this to know "Slack is running
|
|
19
|
+
* but minimized" without firing an extra query.
|
|
20
|
+
* - A window with no resolvable pid (Linux X11 without `_NET_WM_PID`) maps
|
|
21
|
+
* to a synthetic `{ pid: 0, name: <appField> }` bucket.
|
|
22
|
+
*/
|
|
23
|
+
import { listProcesses } from "../platform/process-list.js";
|
|
24
|
+
import { listWindows } from "../platform/windows-list.js";
|
|
25
|
+
import type { WindowInfo } from "../types.js";
|
|
26
|
+
import type { SceneApp } from "./scene-types.js";
|
|
27
|
+
export interface AppEnumerationDeps {
|
|
28
|
+
/** Override for tests. */
|
|
29
|
+
processes?: typeof listProcesses;
|
|
30
|
+
windows?: typeof listWindows;
|
|
31
|
+
}
|
|
32
|
+
export declare function enumerateApps(deps?: AppEnumerationDeps): SceneApp[];
|
|
33
|
+
interface RawProc {
|
|
34
|
+
pid: number;
|
|
35
|
+
name: string;
|
|
36
|
+
}
|
|
37
|
+
export declare function joinAppsAndWindows(procs: RawProc[], windows: WindowInfo[], platform: "linux" | "darwin" | "win32" | string): SceneApp[];
|
|
38
|
+
export {};
|
|
39
|
+
//# sourceMappingURL=apps.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"apps.d.ts","sourceRoot":"","sources":["../../src/scene/apps.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAIH,OAAO,EAAE,aAAa,EAAE,MAAM,6BAA6B,CAAC;AAC5D,OAAO,EAAE,WAAW,EAAE,MAAM,6BAA6B,CAAC;AAC1D,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,KAAK,EAAE,QAAQ,EAAkB,MAAM,kBAAkB,CAAC;AAEjE,MAAM,WAAW,kBAAkB;IACjC,0BAA0B;IAC1B,SAAS,CAAC,EAAE,OAAO,aAAa,CAAC;IACjC,OAAO,CAAC,EAAE,OAAO,WAAW,CAAC;CAC9B;AAED,wBAAgB,aAAa,CAAC,IAAI,GAAE,kBAAuB,GAAG,QAAQ,EAAE,CAMvE;AAED,UAAU,OAAO;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;CACd;AASD,wBAAgB,kBAAkB,CAChC,KAAK,EAAE,OAAO,EAAE,EAChB,OAAO,EAAE,UAAU,EAAE,EACrB,QAAQ,EAAE,OAAO,GAAG,QAAQ,GAAG,OAAO,GAAG,MAAM,GAC9C,QAAQ,EAAE,CA4FZ"}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Difference-hash (dHash) implementation for cheap frame-level and
|
|
3
|
+
* block-level change detection in the WS6 scene-builder.
|
|
4
|
+
*
|
|
5
|
+
* Why dHash:
|
|
6
|
+
* - 64-bit integer, hammable in two cycles, vastly cheaper than a structural
|
|
7
|
+
* similarity index.
|
|
8
|
+
* - Robust to small re-encodings (cursor jitter, anti-alias seams) which
|
|
9
|
+
* dominate "no change" frames in a real session.
|
|
10
|
+
* - Tunable: we run an 8×8 whole-frame hash for the cheap "did anything
|
|
11
|
+
* happen?" gate, and a 16×16 block grid (each block ~128×128 of source)
|
|
12
|
+
* for dirty-block re-OCR.
|
|
13
|
+
*
|
|
14
|
+
* Implementation notes:
|
|
15
|
+
* - The PNG decoder here is intentionally minimal — it handles the formats
|
|
16
|
+
* produced by every screenshot path we ship (color type 2 = RGB, 6 =
|
|
17
|
+
* RGBA, 8-bit depth, non-interlaced). Anything else returns `null` and
|
|
18
|
+
* the caller falls back to a coarser whole-frame byte hash.
|
|
19
|
+
* - Pure functions, no I/O. Safe to test deterministically.
|
|
20
|
+
*
|
|
21
|
+
* Block-grid contract:
|
|
22
|
+
* - The image is gridded into N×N blocks (default 16) covering the whole
|
|
23
|
+
* frame; remainder pixels go to the right/bottom edges.
|
|
24
|
+
* - Each block gets its own 8-bit "mini-hash" derived from row-wise grayscale
|
|
25
|
+
* differences sampled at four points per block. Two blocks compare equal
|
|
26
|
+
* iff their hashes match. This is intentionally a much coarser test than
|
|
27
|
+
* a full per-block dHash — the goal is just "did this region change at
|
|
28
|
+
* all?" not "how similar."
|
|
29
|
+
*/
|
|
30
|
+
export interface RawImage {
|
|
31
|
+
width: number;
|
|
32
|
+
height: number;
|
|
33
|
+
/** RGBA scanline data, row-major, 4 bytes/pixel, length = width*height*4. */
|
|
34
|
+
rgba: Buffer;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Decode a PNG buffer to RGBA. Returns null for unsupported variants
|
|
38
|
+
* (interlaced, palette-indexed, 16-bit depth) — caller handles that by
|
|
39
|
+
* falling back to a non-block-grid hash strategy.
|
|
40
|
+
*/
|
|
41
|
+
export declare function decodePng(png: Buffer): RawImage | null;
|
|
42
|
+
/**
|
|
43
|
+
* 64-bit dHash of the whole frame. Returns `null` if the PNG can't be
|
|
44
|
+
* decoded; callers fall back to a byte-length comparison.
|
|
45
|
+
*/
|
|
46
|
+
export declare function frameDhash(png: Buffer): bigint | null;
|
|
47
|
+
/**
|
|
48
|
+
* Hamming distance between two 64-bit dHashes. Two visually identical frames
|
|
49
|
+
* report 0. We treat anything < 5 as "no change" for the idle gate.
|
|
50
|
+
*/
|
|
51
|
+
export declare function hamming(a: bigint, b: bigint): number;
|
|
52
|
+
export interface BlockGrid {
|
|
53
|
+
/** Cols × Rows block-hash matrix. Index by `row * cols + col`. */
|
|
54
|
+
hashes: Uint32Array;
|
|
55
|
+
cols: number;
|
|
56
|
+
rows: number;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Split a frame into a `cols x rows` grid and return a tiny per-block hash.
|
|
60
|
+
*
|
|
61
|
+
* We don't use a full dHash per block — instead we sample 4 luminance points
|
|
62
|
+
* per block and pack a 16-bit fingerprint. That's enough to spot "this block
|
|
63
|
+
* changed" with very few false-negatives in practice, and stays cheap when
|
|
64
|
+
* called every active-poll frame (4 Hz).
|
|
65
|
+
*/
|
|
66
|
+
export declare function blockGrid(png: Buffer, cols?: number, rows?: number): BlockGrid | null;
|
|
67
|
+
export declare function blockGridFromImage(image: RawImage, cols?: number, rows?: number): BlockGrid;
|
|
68
|
+
export interface DirtyBlock {
|
|
69
|
+
col: number;
|
|
70
|
+
row: number;
|
|
71
|
+
/** Pixel-space bbox `[x, y, w, h]` of this block in the source frame. */
|
|
72
|
+
bbox: [number, number, number, number];
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Return the list of blocks whose hash changed between two grids.
|
|
76
|
+
*
|
|
77
|
+
* If `prev` is null (first frame) every block is dirty.
|
|
78
|
+
* If `imageWidth/Height` are passed, the result includes a pixel-space bbox
|
|
79
|
+
* for each dirty block so the caller can crop the source PNG to re-OCR only
|
|
80
|
+
* the changed regions.
|
|
81
|
+
*/
|
|
82
|
+
export declare function diffBlocks(prev: BlockGrid | null, current: BlockGrid, imageWidth?: number, imageHeight?: number): DirtyBlock[];
|
|
83
|
+
/**
|
|
84
|
+
* Coalesce a list of dirty blocks into axis-aligned rectangles using a row-
|
|
85
|
+
* sweep merge. Adjacent dirty blocks in the same row collapse into a single
|
|
86
|
+
* horizontal strip, then strips in successive rows that fully overlap collapse
|
|
87
|
+
* vertically. This keeps the OS region-capture count tiny in the common case
|
|
88
|
+
* where a single text field or a banner area changes (1 region, not 12).
|
|
89
|
+
*
|
|
90
|
+
* Returns the rectangles in display-local pixel space when `imageWidth` /
|
|
91
|
+
* `imageHeight` are provided; otherwise in `col, row, colspan, rowspan`
|
|
92
|
+
* units (1,1,1,1 = a single block at grid position 0,0).
|
|
93
|
+
*/
|
|
94
|
+
export declare function coalesceDirtyBlocks(dirty: DirtyBlock[], grid: BlockGrid, imageWidth?: number, imageHeight?: number): Array<{
|
|
95
|
+
bbox: [number, number, number, number];
|
|
96
|
+
}>;
|
|
97
|
+
/**
|
|
98
|
+
* Read PNG dimensions without inflating IDAT. Cheap — only the IHDR chunk is
|
|
99
|
+
* touched. Returns null if the buffer isn't a recognizable PNG.
|
|
100
|
+
*/
|
|
101
|
+
export declare function pngDimensions(png: Buffer): {
|
|
102
|
+
width: number;
|
|
103
|
+
height: number;
|
|
104
|
+
} | null;
|
|
105
|
+
//# sourceMappingURL=dhash.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dhash.d.ts","sourceRoot":"","sources":["../../src/scene/dhash.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAQH,MAAM,WAAW,QAAQ;IACvB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,6EAA6E;IAC7E,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;;;GAIG;AACH,wBAAgB,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,QAAQ,GAAG,IAAI,CAmEtD;AAwFD;;;GAGG;AACH,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAIrD;AAED;;;GAGG;AACH,wBAAgB,OAAO,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,CAQpD;AAED,MAAM,WAAW,SAAS;IACxB,kEAAkE;IAClE,MAAM,EAAE,WAAW,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;;;;;;GAOG;AACH,wBAAgB,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,SAAK,EAAE,IAAI,SAAK,GAAG,SAAS,GAAG,IAAI,CAI7E;AAED,wBAAgB,kBAAkB,CAChC,KAAK,EAAE,QAAQ,EACf,IAAI,SAAK,EACT,IAAI,SAAK,GACR,SAAS,CA2BX;AAOD,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,yEAAyE;IACzE,IAAI,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;CACxC;AAED;;;;;;;GAOG;AACH,wBAAgB,UAAU,CACxB,IAAI,EAAE,SAAS,GAAG,IAAI,EACtB,OAAO,EAAE,SAAS,EAClB,UAAU,CAAC,EAAE,MAAM,EACnB,WAAW,CAAC,EAAE,MAAM,GACnB,UAAU,EAAE,CAgCd;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,mBAAmB,CACjC,KAAK,EAAE,UAAU,EAAE,EACnB,IAAI,EAAE,SAAS,EACf,UAAU,CAAC,EAAE,MAAM,EACnB,WAAW,CAAC,EAAE,MAAM,GACnB,KAAK,CAAC;IAAE,IAAI,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAA;CAAE,CAAC,CAwEnD;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAC3B,GAAG,EAAE,MAAM,GACV;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAoB1C"}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OCR adapter for the WS6 scene-builder.
|
|
3
|
+
*
|
|
4
|
+
* The scene-builder needs text-from-image extraction on full frames and
|
|
5
|
+
* (much more often) on cropped dirty blocks. There are two registry seams it
|
|
6
|
+
* can draw from, in priority order:
|
|
7
|
+
*
|
|
8
|
+
* 1. The coord-aware `CoordOcrProvider` (`mobile/ocr-provider.ts`,
|
|
9
|
+
* `registerCoordOcrProvider` / `getCoordOcrProvider`). This is the
|
|
10
|
+
* canonical seam: `@elizaos/plugin-vision` registers a bridge to its
|
|
11
|
+
* hierarchical OCR (docTR / Apple Vision) at boot, returning blocks with
|
|
12
|
+
* bbox + words + semantic position in display-absolute coords. Preferred
|
|
13
|
+
* whenever a provider is registered.
|
|
14
|
+
* 2. The line-only `OcrProvider` registry (`listOcrProviders()`), used by
|
|
15
|
+
* the on-device iOS Apple-Vision provider and unit-test fakes. Fallback
|
|
16
|
+
* when no coord provider is registered.
|
|
17
|
+
*
|
|
18
|
+
* **Integration choice (justified):**
|
|
19
|
+
*
|
|
20
|
+
* plugin-computeruse cannot take a hard `@elizaos/plugin-vision` dependency —
|
|
21
|
+
* that creates a cycle (vision -> capture -> computeruse) and forces every
|
|
22
|
+
* computeruse consumer to install the vision OCR stack even when they only
|
|
23
|
+
* want desktop control. Instead, computeruse *publishes* both registries and
|
|
24
|
+
* a consumer (plugin-vision, or an integrator) registers a provider at
|
|
25
|
+
* startup. The chain degrades to "no OCR" when nothing is registered; the
|
|
26
|
+
* scene-builder logs that condition once.
|
|
27
|
+
*
|
|
28
|
+
* This module exposes:
|
|
29
|
+
* - `runOcrOnPng(png, displayId, options)` — the scene-builder calls this.
|
|
30
|
+
* - `runOcrOnRegions(...)` — same, but for cropped dirty blocks. Falls back
|
|
31
|
+
* to whole-frame OCR if the provider can't crop in place.
|
|
32
|
+
* - `setOcrLoggingHook(fn)` — the scene-builder injects a logger so this
|
|
33
|
+
* module doesn't have to take a `@elizaos/core` dep itself.
|
|
34
|
+
*/
|
|
35
|
+
import type { CoordOcrProvider } from "../mobile/ocr-provider.js";
|
|
36
|
+
import type { SceneOcrBox } from "./scene-types.js";
|
|
37
|
+
export declare function setOcrLoggingHook(fn: (message: string) => void): void;
|
|
38
|
+
export interface OcrAdapterIdState {
|
|
39
|
+
/** Per-display sequence counter so ids stay stable per Scene. */
|
|
40
|
+
perDisplay: Map<number, number>;
|
|
41
|
+
}
|
|
42
|
+
export declare function makeOcrIdState(): OcrAdapterIdState;
|
|
43
|
+
export declare function nextOcrId(state: OcrAdapterIdState, displayId: number): string;
|
|
44
|
+
/**
|
|
45
|
+
* Run OCR on a whole PNG buffer. Prefers the coord-aware provider; falls back
|
|
46
|
+
* to the line-only registry. Returns boxes tagged with `displayId` and stable
|
|
47
|
+
* `t<displayId>-<seq>` ids drawn from `idState`. Empty array if no provider is
|
|
48
|
+
* registered.
|
|
49
|
+
*/
|
|
50
|
+
export declare function runOcrOnPng(png: Buffer, displayId: number, idState: OcrAdapterIdState): Promise<SceneOcrBox[]>;
|
|
51
|
+
/**
|
|
52
|
+
* Run OCR on a list of cropped region buffers, each tied to a bbox in the
|
|
53
|
+
* source frame. Used for dirty-block re-OCR.
|
|
54
|
+
*
|
|
55
|
+
* `crops[i].png` is a standalone PNG of the dirty region. `crops[i].bbox` is
|
|
56
|
+
* the region's location in the source frame (display-local). The returned
|
|
57
|
+
* boxes are translated back into display-local source coordinates.
|
|
58
|
+
*/
|
|
59
|
+
export declare function runOcrOnRegions(crops: Array<{
|
|
60
|
+
png: Buffer;
|
|
61
|
+
bbox: [number, number, number, number];
|
|
62
|
+
}>, displayId: number, idState: OcrAdapterIdState): Promise<SceneOcrBox[]>;
|
|
63
|
+
export type { CoordOcrProvider };
|
|
64
|
+
//# sourceMappingURL=ocr-adapter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ocr-adapter.d.ts","sourceRoot":"","sources":["../../src/scene/ocr-adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AAEH,OAAO,KAAK,EAEV,gBAAgB,EAGjB,MAAM,2BAA2B,CAAC;AAKnC,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAGpD,wBAAgB,iBAAiB,CAAC,EAAE,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,GAAG,IAAI,CAErE;AAED,MAAM,WAAW,iBAAiB;IAChC,iEAAiE;IACjE,UAAU,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACjC;AAED,wBAAgB,cAAc,IAAI,iBAAiB,CAElD;AAED,wBAAgB,SAAS,CAAC,KAAK,EAAE,iBAAiB,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,CAK7E;AAoBD;;;;;GAKG;AACH,wBAAsB,WAAW,CAC/B,GAAG,EAAE,MAAM,EACX,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,iBAAiB,GACzB,OAAO,CAAC,WAAW,EAAE,CAAC,CAsCxB;AAED;;;;;;;GAOG;AACH,wBAAsB,eAAe,CACnC,KAAK,EAAE,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAA;CAAE,CAAC,EACrE,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,iBAAiB,GACzB,OAAO,CAAC,WAAW,EAAE,CAAC,CAsFxB;AAyCD,YAAY,EAAE,gBAAgB,EAAE,CAAC"}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WS6 — Scene Builder
|
|
3
|
+
*
|
|
4
|
+
* Produces a single compact `Scene` JSON object per scene update. The runtime
|
|
5
|
+
* lifecycle is:
|
|
6
|
+
*
|
|
7
|
+
* 1. Idle poll @ 1 Hz — capture each display, compute frame dHash.
|
|
8
|
+
* If no change observed for >2s, skip OCR + AX.
|
|
9
|
+
* 2. Active poll @ 4 Hz — capture, dHash, block-grid diff, run OCR on
|
|
10
|
+
* the dirty blocks only, fold in AX snapshot.
|
|
11
|
+
* 3. Agent turn (onAgentTurn) — full pipeline including the WS7 VLM hook.
|
|
12
|
+
* We surface a clean entry point but do NOT
|
|
13
|
+
* call the VLM here.
|
|
14
|
+
*
|
|
15
|
+
* Caches:
|
|
16
|
+
* - Whole-display scene cache keyed by `(displayId, frameDhash)`, TTL 30s.
|
|
17
|
+
* - Per-display previous BlockGrid kept in memory; dirtied on every active
|
|
18
|
+
* frame.
|
|
19
|
+
* - AX subtree cache: we re-snapshot every agent turn and every active
|
|
20
|
+
* poll when no AX focus-change notifications are available (which is
|
|
21
|
+
* the case on every desktop OS today). The AT-SPI signal route would
|
|
22
|
+
* remove that polling — call out as a follow-up.
|
|
23
|
+
*
|
|
24
|
+
* The builder is dependency-injectable for tests: pass an in-memory
|
|
25
|
+
* `captureDisplays`, `enumerateApps`, `accessibilityProvider`, and OCR adapter
|
|
26
|
+
* to assert pipeline behavior without a real screen.
|
|
27
|
+
*/
|
|
28
|
+
import { EventEmitter } from "node:events";
|
|
29
|
+
import type { DisplayCapture } from "../platform/capture.js";
|
|
30
|
+
import type { DisplayDescriptor } from "../types.js";
|
|
31
|
+
import { type AccessibilityProvider } from "./a11y-provider.js";
|
|
32
|
+
import { type OcrAdapterIdState } from "./ocr-adapter.js";
|
|
33
|
+
import type { Scene, SceneApp, SceneOcrBox, SceneVlmElement } from "./scene-types.js";
|
|
34
|
+
export interface SceneBuilderDeps {
|
|
35
|
+
captureAll?: () => Promise<DisplayCapture[]>;
|
|
36
|
+
captureOne?: (displayId: number) => Promise<DisplayCapture>;
|
|
37
|
+
/**
|
|
38
|
+
* Capture a region within a display in display-local coordinates. Used by
|
|
39
|
+
* the dirty-block re-OCR fast path to avoid full-frame OCR when only a
|
|
40
|
+
* small slice of the screen changed. Default: `captureDisplayRegion`.
|
|
41
|
+
*/
|
|
42
|
+
captureRegion?: (displayId: number, region: {
|
|
43
|
+
x: number;
|
|
44
|
+
y: number;
|
|
45
|
+
width: number;
|
|
46
|
+
height: number;
|
|
47
|
+
}) => Promise<DisplayCapture>;
|
|
48
|
+
enumerateApps?: () => SceneApp[];
|
|
49
|
+
listDisplays?: () => DisplayDescriptor[];
|
|
50
|
+
accessibilityProvider?: AccessibilityProvider;
|
|
51
|
+
runOcrOnFrame?: (png: Buffer, displayId: number, idState: OcrAdapterIdState) => Promise<SceneOcrBox[]>;
|
|
52
|
+
/**
|
|
53
|
+
* Run OCR on a list of cropped PNGs, each carrying its display-local bbox.
|
|
54
|
+
* Returned boxes have their bboxes translated back into source-frame
|
|
55
|
+
* display-local coordinates. Default: `runOcrOnRegions`.
|
|
56
|
+
*/
|
|
57
|
+
runOcrOnCrops?: (crops: Array<{
|
|
58
|
+
png: Buffer;
|
|
59
|
+
bbox: [number, number, number, number];
|
|
60
|
+
}>, displayId: number, idState: OcrAdapterIdState) => Promise<SceneOcrBox[]>;
|
|
61
|
+
log?: (msg: string) => void;
|
|
62
|
+
}
|
|
63
|
+
export interface SceneUpdateEvent {
|
|
64
|
+
scene: Scene;
|
|
65
|
+
reason: "idle" | "active" | "agent-turn";
|
|
66
|
+
}
|
|
67
|
+
export declare class SceneBuilder extends EventEmitter {
|
|
68
|
+
private readonly deps;
|
|
69
|
+
private readonly perDisplay;
|
|
70
|
+
private readonly ocrIdState;
|
|
71
|
+
private latestScene;
|
|
72
|
+
private inFlight;
|
|
73
|
+
constructor(deps?: SceneBuilderDeps);
|
|
74
|
+
/**
|
|
75
|
+
* Pulse the pipeline. Mode chooses how much work to do:
|
|
76
|
+
* - "idle" — capture + dHash only; reuses cached OCR/AX if unchanged.
|
|
77
|
+
* - "active" — capture + dHash + dirty-block OCR + AX.
|
|
78
|
+
* - "agent-turn" — full pipeline; WS7's `onAgentTurn` should call this.
|
|
79
|
+
*
|
|
80
|
+
* Returns the produced Scene. Subscribers are notified after.
|
|
81
|
+
*/
|
|
82
|
+
tick(mode?: "idle" | "active" | "agent-turn"): Promise<Scene>;
|
|
83
|
+
/** Called by WS7 when a real agent turn starts. Always full pipeline. */
|
|
84
|
+
onAgentTurn(): Promise<Scene>;
|
|
85
|
+
/** Returns the most recently emitted Scene, or null before first tick. */
|
|
86
|
+
getCurrentScene(): Scene | null;
|
|
87
|
+
/**
|
|
88
|
+
* Populate the VLM annotations on the current scene (#9105 M3). The Brain /
|
|
89
|
+
* DirtyTileDescriber produce `vlm_scene` (a one-paragraph description) and
|
|
90
|
+
* `vlm_elements` (described tiles); these were previously always `null`
|
|
91
|
+
* because nothing ever wrote them. Persisting them here lets the next
|
|
92
|
+
* provider read carry the cheap understanding instead of re-describing, and
|
|
93
|
+
* they survive subsequent ticks via the `latestScene?.vlm_*` pass-through.
|
|
94
|
+
*/
|
|
95
|
+
setVlmAnnotations(vlmScene: string | null, vlmElements: SceneVlmElement[] | null): void;
|
|
96
|
+
/** Subscribe to scene updates. Returns an unsubscribe function. */
|
|
97
|
+
subscribe(handler: (event: SceneUpdateEvent) => void): () => void;
|
|
98
|
+
private run;
|
|
99
|
+
private captureWithFallback;
|
|
100
|
+
private safeEnumerateApps;
|
|
101
|
+
private safeSnapshotAx;
|
|
102
|
+
private ensureState;
|
|
103
|
+
}
|
|
104
|
+
export declare function getDefaultSceneBuilder(): SceneBuilder;
|
|
105
|
+
/** Test-only reset. */
|
|
106
|
+
export declare function _resetDefaultSceneBuilderForTests(): void;
|
|
107
|
+
//# sourceMappingURL=scene-builder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scene-builder.d.ts","sourceRoot":"","sources":["../../src/scene/scene-builder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AAO7D,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAC;AACrD,OAAO,EACL,KAAK,qBAAqB,EAE3B,MAAM,oBAAoB,CAAC;AAW5B,OAAO,EAEL,KAAK,iBAAiB,EAIvB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,KAAK,EACV,KAAK,EACL,QAAQ,EAGR,WAAW,EACX,eAAe,EAChB,MAAM,kBAAkB,CAAC;AAM1B,MAAM,WAAW,gBAAgB;IAC/B,UAAU,CAAC,EAAE,MAAM,OAAO,CAAC,cAAc,EAAE,CAAC,CAAC;IAC7C,UAAU,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,KAAK,OAAO,CAAC,cAAc,CAAC,CAAC;IAC5D;;;;OAIG;IACH,aAAa,CAAC,EAAE,CACd,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE;QAAE,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,KAC5D,OAAO,CAAC,cAAc,CAAC,CAAC;IAC7B,aAAa,CAAC,EAAE,MAAM,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,MAAM,iBAAiB,EAAE,CAAC;IACzC,qBAAqB,CAAC,EAAE,qBAAqB,CAAC;IAC9C,aAAa,CAAC,EAAE,CACd,GAAG,EAAE,MAAM,EACX,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,iBAAiB,KACvB,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;IAC5B;;;;OAIG;IACH,aAAa,CAAC,EAAE,CACd,KAAK,EAAE,KAAK,CAAC;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAA;KAAE,CAAC,EACrE,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,iBAAiB,KACvB,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;IAC5B,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAC;CAC7B;AASD,MAAM,WAAW,gBAAgB;IAC/B,KAAK,EAAE,KAAK,CAAC;IACb,MAAM,EAAE,MAAM,GAAG,QAAQ,GAAG,YAAY,CAAC;CAC1C;AAED,qBAAa,YAAa,SAAQ,YAAY;IAC5C,OAAO,CAAC,QAAQ,CAAC,IAAI,CAEnB;IACF,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAsC;IACjE,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAuC;IAClE,OAAO,CAAC,WAAW,CAAsB;IACzC,OAAO,CAAC,QAAQ,CAA+B;gBAEnC,IAAI,GAAE,gBAAqB;IAkCvC;;;;;;;OAOG;IACG,IAAI,CACR,IAAI,GAAE,MAAM,GAAG,QAAQ,GAAG,YAAuB,GAChD,OAAO,CAAC,KAAK,CAAC;IAWjB,yEAAyE;IACnE,WAAW,IAAI,OAAO,CAAC,KAAK,CAAC;IAInC,0EAA0E;IAC1E,eAAe,IAAI,KAAK,GAAG,IAAI;IAI/B;;;;;;;OAOG;IACH,iBAAiB,CACf,QAAQ,EAAE,MAAM,GAAG,IAAI,EACvB,WAAW,EAAE,eAAe,EAAE,GAAG,IAAI,GACpC,IAAI;IASP,mEAAmE;IACnE,SAAS,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,IAAI,GAAG,MAAM,IAAI;YAQnD,GAAG;YA2LH,mBAAmB;IAuBjC,OAAO,CAAC,iBAAiB;YAWX,cAAc;IAW5B,OAAO,CAAC,WAAW;CAapB;AA2CD,wBAAgB,sBAAsB,IAAI,YAAY,CAGrD;AAED,uBAAuB;AACvB,wBAAgB,iCAAiC,IAAI,IAAI,CAExD"}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Scene representation produced by the WS6 scene-builder pipeline.
|
|
3
|
+
*
|
|
4
|
+
* One `Scene` describes the agent's full visual + structural context at a
|
|
5
|
+
* single moment. It is the contract WS7 (Brain) consumes to ground every
|
|
6
|
+
* coordinate-bearing action.
|
|
7
|
+
*
|
|
8
|
+
* All bbox coordinates are LOCAL to their `displayId`. WS5's
|
|
9
|
+
* `localToGlobal` / `globalToLocal` translate to the input-driver space
|
|
10
|
+
* before any click fires.
|
|
11
|
+
*/
|
|
12
|
+
import type { DisplayDescriptor, WindowInfo } from "../types.js";
|
|
13
|
+
export interface SceneAppWindow {
|
|
14
|
+
id: string;
|
|
15
|
+
title: string;
|
|
16
|
+
bounds: [number, number, number, number];
|
|
17
|
+
displayId: number;
|
|
18
|
+
}
|
|
19
|
+
export interface SceneApp {
|
|
20
|
+
name: string;
|
|
21
|
+
pid: number;
|
|
22
|
+
windows: SceneAppWindow[];
|
|
23
|
+
}
|
|
24
|
+
export interface SceneFocusedWindow {
|
|
25
|
+
app: string;
|
|
26
|
+
pid: number | null;
|
|
27
|
+
bounds: [number, number, number, number];
|
|
28
|
+
title: string;
|
|
29
|
+
displayId: number;
|
|
30
|
+
}
|
|
31
|
+
export interface SceneOcrBox {
|
|
32
|
+
/** Stable id `t<displayId>-<seq>`. */
|
|
33
|
+
id: string;
|
|
34
|
+
text: string;
|
|
35
|
+
bbox: [number, number, number, number];
|
|
36
|
+
conf: number;
|
|
37
|
+
displayId: number;
|
|
38
|
+
}
|
|
39
|
+
export interface SceneAxNode {
|
|
40
|
+
id: string;
|
|
41
|
+
role: string;
|
|
42
|
+
label?: string;
|
|
43
|
+
bbox: [number, number, number, number];
|
|
44
|
+
actions: string[];
|
|
45
|
+
displayId: number;
|
|
46
|
+
}
|
|
47
|
+
export interface SceneVlmElement {
|
|
48
|
+
id: string;
|
|
49
|
+
kind: string;
|
|
50
|
+
desc: string;
|
|
51
|
+
bbox: [number, number, number, number];
|
|
52
|
+
displayId: number;
|
|
53
|
+
}
|
|
54
|
+
export interface Scene {
|
|
55
|
+
timestamp: number;
|
|
56
|
+
displays: DisplayDescriptor[];
|
|
57
|
+
focused_window: SceneFocusedWindow | null;
|
|
58
|
+
apps: SceneApp[];
|
|
59
|
+
ocr: SceneOcrBox[];
|
|
60
|
+
ax: SceneAxNode[];
|
|
61
|
+
/** Set by WS7's Brain when a VLM turn runs; `null` outside agent turns. */
|
|
62
|
+
vlm_scene: string | null;
|
|
63
|
+
vlm_elements: SceneVlmElement[] | null;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Window-info alias retained for completeness; the scene-builder also accepts
|
|
67
|
+
* the existing `WindowInfo` shape and folds it into `SceneAppWindow`.
|
|
68
|
+
*/
|
|
69
|
+
export type SceneWindowInfo = WindowInfo;
|
|
70
|
+
//# sourceMappingURL=scene-types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scene-types.d.ts","sourceRoot":"","sources":["../../src/scene/scene-types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,KAAK,EAAE,iBAAiB,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEjE,MAAM,WAAW,cAAc;IAC7B,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;IACzC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,cAAc,EAAE,CAAC;CAC3B;AAED,MAAM,WAAW,kBAAkB;IACjC,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,GAAG,IAAI,CAAC;IACnB,MAAM,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;IACzC,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,WAAW;IAC1B,sCAAsC;IACtC,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;IACvC,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,WAAW;IAC1B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;IACvC,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,eAAe;IAC9B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;IACvC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,KAAK;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,iBAAiB,EAAE,CAAC;IAC9B,cAAc,EAAE,kBAAkB,GAAG,IAAI,CAAC;IAC1C,IAAI,EAAE,QAAQ,EAAE,CAAC;IACjB,GAAG,EAAE,WAAW,EAAE,CAAC;IACnB,EAAE,EAAE,WAAW,EAAE,CAAC;IAClB,2EAA2E;IAC3E,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,YAAY,EAAE,eAAe,EAAE,GAAG,IAAI,CAAC;CACxC;AAED;;;GAGG;AACH,MAAM,MAAM,eAAe,GAAG,UAAU,CAAC"}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified `ScreenState` — one capture per turn (#9105 M3).
|
|
3
|
+
*
|
|
4
|
+
* Before this seam the same display was captured/encoded multiple times per
|
|
5
|
+
* turn: the SceneBuilder's dHash gate, the SceneBuilder's OCR pass, and the
|
|
6
|
+
* Brain's `encodeForBrain` each pulled their own frame. `ScreenStateStore`
|
|
7
|
+
* makes the capture a single shared artifact: it grabs one PNG per display,
|
|
8
|
+
* computes the frame dHash and the 16×16 block grid once, and hands every
|
|
9
|
+
* consumer (OCR adapter, DirtyTileDescriber, scene provider, Brain) the same
|
|
10
|
+
* `ScreenState`. Re-asking for a display within `freshnessMs` reuses the prior
|
|
11
|
+
* capture instead of hitting the OS again.
|
|
12
|
+
*
|
|
13
|
+
* Change detection rides the existing `scene/dhash.ts`: a refresh only emits a
|
|
14
|
+
* change event when the frame dHash moved by at least `hammingThreshold` bits,
|
|
15
|
+
* so subscribers (e.g. the continuous describer loop) don't re-run on a
|
|
16
|
+
* pixel-identical screen. `getStats()` exposes how many captures were served
|
|
17
|
+
* from cache vs taken fresh so a test can prove the per-turn saving.
|
|
18
|
+
*
|
|
19
|
+
* Pure + injectable: pass a `capture` fn to drive the store from synthetic PNGs
|
|
20
|
+
* with no real screen.
|
|
21
|
+
*/
|
|
22
|
+
import type { DisplayCapture } from "../platform/capture.js";
|
|
23
|
+
import { type BlockGrid, type DirtyBlock } from "./dhash.js";
|
|
24
|
+
/** Frame-dHash hamming distance below which two frames are "unchanged". */
|
|
25
|
+
export declare const SCREEN_STATE_HAMMING_THRESHOLD = 5;
|
|
26
|
+
/** Default reuse window: a capture younger than this is served from cache. */
|
|
27
|
+
export declare const SCREEN_STATE_DEFAULT_FRESHNESS_MS = 400;
|
|
28
|
+
/** A single shared per-display capture for one turn. */
|
|
29
|
+
export interface ScreenState {
|
|
30
|
+
displayId: number;
|
|
31
|
+
/** ms epoch when the underlying frame was captured. */
|
|
32
|
+
capturedAt: number;
|
|
33
|
+
width: number;
|
|
34
|
+
height: number;
|
|
35
|
+
/** PNG bytes at backing-store resolution. */
|
|
36
|
+
png: Buffer;
|
|
37
|
+
/** 64-bit frame dHash, or null when the PNG could not be decoded. */
|
|
38
|
+
dhash: bigint | null;
|
|
39
|
+
/** 16×16 block grid of the frame, or null when undecodable. */
|
|
40
|
+
blockGrid: BlockGrid | null;
|
|
41
|
+
/**
|
|
42
|
+
* Blocks that changed vs the previously stored frame for this display.
|
|
43
|
+
* `null` on the very first capture (no prior to diff against). Bboxes are in
|
|
44
|
+
* display-local pixel space when dimensions were available.
|
|
45
|
+
*/
|
|
46
|
+
dirtyBlocks: DirtyBlock[] | null;
|
|
47
|
+
}
|
|
48
|
+
export interface ScreenStateChange {
|
|
49
|
+
state: ScreenState;
|
|
50
|
+
/** dHash hamming distance from the previous frame (Infinity on first frame). */
|
|
51
|
+
distance: number;
|
|
52
|
+
}
|
|
53
|
+
/** Capture-accounting snapshot for a store. */
|
|
54
|
+
export interface ScreenStateStats {
|
|
55
|
+
/** Fresh OS captures actually taken. */
|
|
56
|
+
captures: number;
|
|
57
|
+
/** Capture requests served from the freshness cache (no OS hit). */
|
|
58
|
+
cacheHits: number;
|
|
59
|
+
/** Refreshes that changed the screen enough to fire a change event. */
|
|
60
|
+
changes: number;
|
|
61
|
+
}
|
|
62
|
+
export interface ScreenStateStoreOptions {
|
|
63
|
+
/**
|
|
64
|
+
* Capture one display to a PNG. Defaults to the platform `captureDisplay`.
|
|
65
|
+
* Injected in tests to drive synthetic frames.
|
|
66
|
+
*/
|
|
67
|
+
capture: (displayId: number) => Promise<DisplayCapture>;
|
|
68
|
+
/** Reuse window in ms. Default `SCREEN_STATE_DEFAULT_FRESHNESS_MS`. */
|
|
69
|
+
freshnessMs?: number;
|
|
70
|
+
/** Hamming threshold for "changed". Default `SCREEN_STATE_HAMMING_THRESHOLD`. */
|
|
71
|
+
hammingThreshold?: number;
|
|
72
|
+
/** Clock injection for deterministic freshness tests. Default `Date.now`. */
|
|
73
|
+
now?: () => number;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Owns the single shared capture per display. `ComputerUseService` holds one
|
|
77
|
+
* store; SceneBuilder, the Brain, and the DirtyTileDescriber loop all read it.
|
|
78
|
+
*/
|
|
79
|
+
export declare class ScreenStateStore {
|
|
80
|
+
private readonly capture;
|
|
81
|
+
private readonly freshnessMs;
|
|
82
|
+
private readonly hammingThreshold;
|
|
83
|
+
private readonly now;
|
|
84
|
+
private readonly states;
|
|
85
|
+
private readonly listeners;
|
|
86
|
+
private stats;
|
|
87
|
+
constructor(options: ScreenStateStoreOptions);
|
|
88
|
+
getStats(): ScreenStateStats;
|
|
89
|
+
/** Latest stored state for a display, or null if never captured. */
|
|
90
|
+
peek(displayId: number): ScreenState | null;
|
|
91
|
+
/**
|
|
92
|
+
* Return a `ScreenState` for `displayId`, reusing the last capture when it is
|
|
93
|
+
* younger than the freshness window. Pass `force` to always re-capture.
|
|
94
|
+
*/
|
|
95
|
+
get(displayId: number, force?: boolean): Promise<ScreenState>;
|
|
96
|
+
/**
|
|
97
|
+
* Force a fresh capture, recompute dHash + block grid + dirty diff against the
|
|
98
|
+
* prior frame, store it, and emit a change event when the frame moved by at
|
|
99
|
+
* least the hamming threshold.
|
|
100
|
+
*/
|
|
101
|
+
refresh(displayId: number): Promise<ScreenState>;
|
|
102
|
+
/** Subscribe to change events. Returns an unsubscribe function. */
|
|
103
|
+
onChange(listener: (change: ScreenStateChange) => void): () => void;
|
|
104
|
+
}
|
|
105
|
+
//# sourceMappingURL=screen-state.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"screen-state.d.ts","sourceRoot":"","sources":["../../src/scene/screen-state.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AAC7D,OAAO,EACL,KAAK,SAAS,EAEd,KAAK,UAAU,EAKhB,MAAM,YAAY,CAAC;AAEpB,2EAA2E;AAC3E,eAAO,MAAM,8BAA8B,IAAI,CAAC;AAChD,8EAA8E;AAC9E,eAAO,MAAM,iCAAiC,MAAM,CAAC;AAErD,wDAAwD;AACxD,MAAM,WAAW,WAAW;IAC1B,SAAS,EAAE,MAAM,CAAC;IAClB,uDAAuD;IACvD,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,6CAA6C;IAC7C,GAAG,EAAE,MAAM,CAAC;IACZ,qEAAqE;IACrE,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACrB,+DAA+D;IAC/D,SAAS,EAAE,SAAS,GAAG,IAAI,CAAC;IAC5B;;;;OAIG;IACH,WAAW,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC;CAClC;AAED,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,WAAW,CAAC;IACnB,gFAAgF;IAChF,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,+CAA+C;AAC/C,MAAM,WAAW,gBAAgB;IAC/B,wCAAwC;IACxC,QAAQ,EAAE,MAAM,CAAC;IACjB,oEAAoE;IACpE,SAAS,EAAE,MAAM,CAAC;IAClB,uEAAuE;IACvE,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,uBAAuB;IACtC;;;OAGG;IACH,OAAO,EAAE,CAAC,SAAS,EAAE,MAAM,KAAK,OAAO,CAAC,cAAc,CAAC,CAAC;IACxD,uEAAuE;IACvE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,iFAAiF;IACjF,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,6EAA6E;IAC7E,GAAG,CAAC,EAAE,MAAM,MAAM,CAAC;CACpB;AAED;;;GAGG;AACH,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAiD;IACzE,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAS;IAC1C,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAe;IACnC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAkC;IACzD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAkD;IAC5E,OAAO,CAAC,KAAK,CAA+D;gBAEhE,OAAO,EAAE,uBAAuB;IAQ5C,QAAQ,IAAI,gBAAgB;IAI5B,oEAAoE;IACpE,IAAI,CAAC,SAAS,EAAE,MAAM,GAAG,WAAW,GAAG,IAAI;IAI3C;;;OAGG;IACG,GAAG,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,UAAQ,GAAG,OAAO,CAAC,WAAW,CAAC;IASjE;;;;OAIG;IACG,OAAO,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAoCtD,mEAAmE;IACnE,QAAQ,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,iBAAiB,KAAK,IAAI,GAAG,MAAM,IAAI;CAIpE"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token-efficient Scene serializer.
|
|
3
|
+
*
|
|
4
|
+
* Used by the `scene` provider to render the current Scene into the agent
|
|
5
|
+
* prompt. We deliberately cap lists by display so a 4k-monitor session
|
|
6
|
+
* doesn't push the prompt to thousands of tokens:
|
|
7
|
+
*
|
|
8
|
+
* - displays : full list (small)
|
|
9
|
+
* - focused_window : full (small)
|
|
10
|
+
* - apps : pid, name, window count, top-2 window titles
|
|
11
|
+
* - ocr : top-N most-confident lines per display
|
|
12
|
+
* (default N = 24, configurable)
|
|
13
|
+
* - ax : limited to the focused window's display subtree
|
|
14
|
+
* (default cap = 24)
|
|
15
|
+
* - vlm_scene / elements : passed through verbatim
|
|
16
|
+
*
|
|
17
|
+
* The output is fenced JSON for predictable downstream tokenization.
|
|
18
|
+
*/
|
|
19
|
+
import type { Scene } from "./scene-types.js";
|
|
20
|
+
export interface SerializeOptions {
|
|
21
|
+
ocrTopN?: number;
|
|
22
|
+
axMax?: number;
|
|
23
|
+
appTopWindows?: number;
|
|
24
|
+
/** Cap total apps emitted. Prefers apps with at least one visible window. */
|
|
25
|
+
appMax?: number;
|
|
26
|
+
}
|
|
27
|
+
export declare function serializeSceneForPrompt(scene: Scene, options?: SerializeOptions): string;
|
|
28
|
+
//# sourceMappingURL=serialize.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"serialize.d.ts","sourceRoot":"","sources":["../../src/scene/serialize.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAE9C,MAAM,WAAW,gBAAgB;IAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,6EAA6E;IAC7E,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,wBAAgB,uBAAuB,CACrC,KAAK,EAAE,KAAK,EACZ,OAAO,GAAE,gBAAqB,GAC7B,MAAM,CAqFR"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/** GHSA-rcvr-766c-4phv: arbitrary page.evaluate + AsyncFunction must not run agent-supplied code. */
|
|
2
|
+
export declare const BROWSER_EXECUTE_DISABLED_MESSAGE: string;
|
|
3
|
+
export declare class BrowserExecuteDisabledError extends Error {
|
|
4
|
+
readonly code: "browser_execute_disabled";
|
|
5
|
+
constructor(message?: string);
|
|
6
|
+
}
|
|
7
|
+
export declare function assertBrowserExecuteAllowed(): never;
|
|
8
|
+
export declare function isBrowserExecuteAllowed(): boolean;
|
|
9
|
+
//# sourceMappingURL=browser-script-policy.d.ts.map
|