@elizaos/plugin-computeruse 2.0.3-beta.2 → 2.0.3-beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. package/dist/actions/clipboard.d.ts +22 -0
  2. package/dist/actions/clipboard.d.ts.map +1 -0
  3. package/dist/actions/helpers.d.ts +33 -0
  4. package/dist/actions/helpers.d.ts.map +1 -0
  5. package/dist/actions/progress.d.ts +26 -0
  6. package/dist/actions/progress.d.ts.map +1 -0
  7. package/dist/actions/use-computer-agent.d.ts +113 -0
  8. package/dist/actions/use-computer-agent.d.ts.map +1 -0
  9. package/dist/actions/use-computer.d.ts +3 -0
  10. package/dist/actions/use-computer.d.ts.map +1 -0
  11. package/dist/actions/window-handlers.d.ts +11 -0
  12. package/dist/actions/window-handlers.d.ts.map +1 -0
  13. package/dist/actions/window.d.ts +11 -0
  14. package/dist/actions/window.d.ts.map +1 -0
  15. package/dist/actor/actor.d.ts +84 -0
  16. package/dist/actor/actor.d.ts.map +1 -0
  17. package/dist/actor/agent-callbacks.d.ts +128 -0
  18. package/dist/actor/agent-callbacks.d.ts.map +1 -0
  19. package/dist/actor/agent-loop.d.ts +134 -0
  20. package/dist/actor/agent-loop.d.ts.map +1 -0
  21. package/dist/actor/aosp-input-actor.d.ts +87 -0
  22. package/dist/actor/aosp-input-actor.d.ts.map +1 -0
  23. package/dist/actor/brain.d.ts +195 -0
  24. package/dist/actor/brain.d.ts.map +1 -0
  25. package/dist/actor/cascade.d.ts +92 -0
  26. package/dist/actor/cascade.d.ts.map +1 -0
  27. package/dist/actor/computer-interface.d.ts +276 -0
  28. package/dist/actor/computer-interface.d.ts.map +1 -0
  29. package/dist/actor/dispatch.d.ts +24 -0
  30. package/dist/actor/dispatch.d.ts.map +1 -0
  31. package/dist/actor/index.d.ts +12 -0
  32. package/dist/actor/index.d.ts.map +1 -0
  33. package/dist/actor/types.d.ts +94 -0
  34. package/dist/actor/types.d.ts.map +1 -0
  35. package/dist/approval-manager.d.ts +29 -0
  36. package/dist/approval-manager.d.ts.map +1 -0
  37. package/dist/index.d.ts +46 -0
  38. package/dist/index.d.ts.map +1 -0
  39. package/dist/index.js +13649 -0
  40. package/dist/index.js.map +68 -0
  41. package/dist/mcp/index.d.ts +8 -0
  42. package/dist/mcp/index.d.ts.map +1 -0
  43. package/dist/mcp/server.d.ts +42 -0
  44. package/dist/mcp/server.d.ts.map +1 -0
  45. package/dist/mcp/tools.d.ts +53 -0
  46. package/dist/mcp/tools.d.ts.map +1 -0
  47. package/dist/mobile/android-bridge.d.ts +263 -0
  48. package/dist/mobile/android-bridge.d.ts.map +1 -0
  49. package/dist/mobile/android-scene.d.ts +52 -0
  50. package/dist/mobile/android-scene.d.ts.map +1 -0
  51. package/dist/mobile/android-trajectory.d.ts +66 -0
  52. package/dist/mobile/android-trajectory.d.ts.map +1 -0
  53. package/dist/mobile/index.d.ts +19 -0
  54. package/dist/mobile/index.d.ts.map +1 -0
  55. package/dist/mobile/ios-app-intent-registry.d.ts +20 -0
  56. package/dist/mobile/ios-app-intent-registry.d.ts.map +1 -0
  57. package/dist/mobile/ios-bridge.d.ts +359 -0
  58. package/dist/mobile/ios-bridge.d.ts.map +1 -0
  59. package/dist/mobile/ios-computer-interface.d.ts +160 -0
  60. package/dist/mobile/ios-computer-interface.d.ts.map +1 -0
  61. package/dist/mobile/mobile-computer-interface.d.ts +142 -0
  62. package/dist/mobile/mobile-computer-interface.d.ts.map +1 -0
  63. package/dist/mobile/mobile-screen-capture.d.ts +64 -0
  64. package/dist/mobile/mobile-screen-capture.d.ts.map +1 -0
  65. package/dist/mobile/ocr-provider.d.ts +187 -0
  66. package/dist/mobile/ocr-provider.d.ts.map +1 -0
  67. package/dist/mobile/ocr-provider.js +111 -0
  68. package/dist/mobile/ocr-provider.js.map +10 -0
  69. package/dist/osworld/action-converter.d.ts +38 -0
  70. package/dist/osworld/action-converter.d.ts.map +1 -0
  71. package/dist/osworld/adapter.d.ts +79 -0
  72. package/dist/osworld/adapter.d.ts.map +1 -0
  73. package/dist/osworld/types.d.ts +69 -0
  74. package/dist/osworld/types.d.ts.map +1 -0
  75. package/dist/parity/index.d.ts +9 -0
  76. package/dist/parity/index.d.ts.map +1 -0
  77. package/dist/parity/parity-matrix.d.ts +82 -0
  78. package/dist/parity/parity-matrix.d.ts.map +1 -0
  79. package/dist/parity/screenspot.d.ts +56 -0
  80. package/dist/parity/screenspot.d.ts.map +1 -0
  81. package/dist/platform/a11y.d.ts +64 -0
  82. package/dist/platform/a11y.d.ts.map +1 -0
  83. package/dist/platform/browser.d.ts +61 -0
  84. package/dist/platform/browser.d.ts.map +1 -0
  85. package/dist/platform/capabilities.d.ts +33 -0
  86. package/dist/platform/capabilities.d.ts.map +1 -0
  87. package/dist/platform/capture.d.ts +65 -0
  88. package/dist/platform/capture.d.ts.map +1 -0
  89. package/dist/platform/clipboard.d.ts +24 -0
  90. package/dist/platform/clipboard.d.ts.map +1 -0
  91. package/dist/platform/coords.d.ts +73 -0
  92. package/dist/platform/coords.d.ts.map +1 -0
  93. package/dist/platform/desktop.d.ts +56 -0
  94. package/dist/platform/desktop.d.ts.map +1 -0
  95. package/dist/platform/displays.d.ts +97 -0
  96. package/dist/platform/displays.d.ts.map +1 -0
  97. package/dist/platform/driver.d.ts +49 -0
  98. package/dist/platform/driver.d.ts.map +1 -0
  99. package/dist/platform/file-ops.d.ts +27 -0
  100. package/dist/platform/file-ops.d.ts.map +1 -0
  101. package/dist/platform/helpers.d.ts +60 -0
  102. package/dist/platform/helpers.d.ts.map +1 -0
  103. package/dist/platform/launch.d.ts +54 -0
  104. package/dist/platform/launch.d.ts.map +1 -0
  105. package/dist/platform/normalized-coords.d.ts +46 -0
  106. package/dist/platform/normalized-coords.d.ts.map +1 -0
  107. package/dist/platform/nut-driver.d.ts +86 -0
  108. package/dist/platform/nut-driver.d.ts.map +1 -0
  109. package/dist/platform/permissions.d.ts +33 -0
  110. package/dist/platform/permissions.d.ts.map +1 -0
  111. package/dist/platform/process-list.d.ts +32 -0
  112. package/dist/platform/process-list.d.ts.map +1 -0
  113. package/dist/platform/ps-host.d.ts +77 -0
  114. package/dist/platform/ps-host.d.ts.map +1 -0
  115. package/dist/platform/screenshot-errors.d.ts +54 -0
  116. package/dist/platform/screenshot-errors.d.ts.map +1 -0
  117. package/dist/platform/screenshot-quality.d.ts +11 -0
  118. package/dist/platform/screenshot-quality.d.ts.map +1 -0
  119. package/dist/platform/screenshot.d.ts +16 -0
  120. package/dist/platform/screenshot.d.ts.map +1 -0
  121. package/dist/platform/security.d.ts +20 -0
  122. package/dist/platform/security.d.ts.map +1 -0
  123. package/dist/platform/terminal.d.ts +38 -0
  124. package/dist/platform/terminal.d.ts.map +1 -0
  125. package/dist/platform/wayland-portal.d.ts +25 -0
  126. package/dist/platform/wayland-portal.d.ts.map +1 -0
  127. package/dist/platform/windows-list.d.ts +78 -0
  128. package/dist/platform/windows-list.d.ts.map +1 -0
  129. package/dist/providers/computer-state.d.ts +9 -0
  130. package/dist/providers/computer-state.d.ts.map +1 -0
  131. package/dist/providers/scene.d.ts +21 -0
  132. package/dist/providers/scene.d.ts.map +1 -0
  133. package/dist/register-routes.d.ts +2 -0
  134. package/dist/register-routes.d.ts.map +1 -0
  135. package/dist/register-routes.js +13836 -0
  136. package/dist/register-routes.js.map +71 -0
  137. package/dist/routes/computer-use-compat-routes.d.ts +29 -0
  138. package/dist/routes/computer-use-compat-routes.d.ts.map +1 -0
  139. package/dist/routes/computer-use-routes.d.ts +3 -0
  140. package/dist/routes/computer-use-routes.d.ts.map +1 -0
  141. package/dist/routes/sandbox-routes.d.ts +53 -0
  142. package/dist/routes/sandbox-routes.d.ts.map +1 -0
  143. package/dist/sandbox/docker-backend.d.ts +69 -0
  144. package/dist/sandbox/docker-backend.d.ts.map +1 -0
  145. package/dist/sandbox/index.d.ts +62 -0
  146. package/dist/sandbox/index.d.ts.map +1 -0
  147. package/dist/sandbox/qemu-backend.d.ts +48 -0
  148. package/dist/sandbox/qemu-backend.d.ts.map +1 -0
  149. package/dist/sandbox/remote-guest.d.ts +72 -0
  150. package/dist/sandbox/remote-guest.d.ts.map +1 -0
  151. package/dist/sandbox/sandbox-driver.d.ts +41 -0
  152. package/dist/sandbox/sandbox-driver.d.ts.map +1 -0
  153. package/dist/sandbox/surface-types.d.ts +17 -0
  154. package/dist/sandbox/surface-types.d.ts.map +1 -0
  155. package/dist/sandbox/types.d.ts +138 -0
  156. package/dist/sandbox/types.d.ts.map +1 -0
  157. package/dist/sandbox/wsb-backend.d.ts +48 -0
  158. package/dist/sandbox/wsb-backend.d.ts.map +1 -0
  159. package/dist/scene/a11y-provider.d.ts +83 -0
  160. package/dist/scene/a11y-provider.d.ts.map +1 -0
  161. package/dist/scene/apps.d.ts +39 -0
  162. package/dist/scene/apps.d.ts.map +1 -0
  163. package/dist/scene/dhash.d.ts +105 -0
  164. package/dist/scene/dhash.d.ts.map +1 -0
  165. package/dist/scene/ocr-adapter.d.ts +64 -0
  166. package/dist/scene/ocr-adapter.d.ts.map +1 -0
  167. package/dist/scene/scene-builder.d.ts +107 -0
  168. package/dist/scene/scene-builder.d.ts.map +1 -0
  169. package/dist/scene/scene-types.d.ts +70 -0
  170. package/dist/scene/scene-types.d.ts.map +1 -0
  171. package/dist/scene/screen-state.d.ts +105 -0
  172. package/dist/scene/screen-state.d.ts.map +1 -0
  173. package/dist/scene/serialize.d.ts +28 -0
  174. package/dist/scene/serialize.d.ts.map +1 -0
  175. package/dist/security/browser-script-policy.d.ts +9 -0
  176. package/dist/security/browser-script-policy.d.ts.map +1 -0
  177. package/dist/services/computer-use-service.d.ts +142 -0
  178. package/dist/services/computer-use-service.d.ts.map +1 -0
  179. package/dist/services/desktop-control.d.ts +35 -0
  180. package/dist/services/desktop-control.d.ts.map +1 -0
  181. package/dist/services/index.d.ts +7 -0
  182. package/dist/services/index.d.ts.map +1 -0
  183. package/dist/services/vision-context-provider.d.ts +32 -0
  184. package/dist/services/vision-context-provider.d.ts.map +1 -0
  185. package/dist/types.d.ts +385 -0
  186. package/dist/types.d.ts.map +1 -0
  187. package/package.json +16 -5
  188. package/registry-entry.json +74 -0
@@ -0,0 +1,87 @@
1
+ /**
2
+ * WS7 ↔ AOSP — Privileged-input actor.
3
+ *
4
+ * In the consumer build the cascade routes gestures through
5
+ * `MobileComputerInterface` → `AccessibilityGestureDescription` — which is
6
+ * coarse and blocks on touch-recognizer state in some apps (banking, DRM
7
+ * video, anything that sets `filterTouchesWhenObscured`).
8
+ *
9
+ * In an AOSP system-app build (see `docs/AOSP_SYSTEM_APP.md`), the
10
+ * privileged path uses `InputManager.injectInputEvent()` directly. That
11
+ * path lives behind `AospPrivilegedBridge.injectMotionEvent(...)` on the
12
+ * Kotlin side; the consumer-flavor `AospPrivilegedBridge` exports
13
+ * `createIfAvailable(): null` so this actor stays inert until a real AOSP
14
+ * bridge is linked in.
15
+ *
16
+ * `AospInputActor` maps a resolved WS7 `ProposedAction` (display-local
17
+ * pixel coords) into the privileged-bridge calls. It does NOT implement
18
+ * the `Actor` "grounding" contract — grounding stays with the OCR/AX or
19
+ * VLM actor; this is purely an *input-dispatch* shim. It's surfaced as
20
+ * an alternative to `ComputerInterface` for AOSP builds: the agent loop
21
+ * picks `AospInputActor.execute(action)` instead of `dispatch(action, {
22
+ * interface, ... })` when the privileged bridge is available.
23
+ */
24
+ import type { ActionResult, ProposedAction } from "./types.js";
25
+ /** Minimal Kotlin-side surface this actor needs from the AOSP build. */
26
+ export interface AospPrivilegedInputBridge {
27
+ /**
28
+ * Inject a single motion event at the InputManager level. `action` follows
29
+ * `MotionEvent.ACTION_*` constants (DOWN=0, UP=1, MOVE=2). `downTimeMs`
30
+ * is the original-touch timestamp the gesture started at, in `uptimeMillis`
31
+ * units. Implementations enforce the INJECT_EVENTS permission.
32
+ */
33
+ injectMotionEvent(args: {
34
+ x: number;
35
+ y: number;
36
+ action: number;
37
+ downTimeMs: number;
38
+ }): Promise<{
39
+ ok: boolean;
40
+ }>;
41
+ /** Capture the primary display frame buffer synchronously. JPEG bytes. */
42
+ captureDisplayFrameBuffer?(): Promise<Uint8Array>;
43
+ }
44
+ export interface AospInputActorDeps {
45
+ /** Returns the AOSP bridge handle, or null in consumer builds. */
46
+ getBridge: () => AospPrivilegedInputBridge | null;
47
+ /** Override the clock for tests. */
48
+ now?: () => number;
49
+ }
50
+ /**
51
+ * Motion-event action constants matching `android.view.MotionEvent.ACTION_*`.
52
+ * Re-exported here so callers don't need to import Android Kotlin enums.
53
+ */
54
+ export declare const MOTION_EVENT_ACTION_DOWN: 0;
55
+ export declare const MOTION_EVENT_ACTION_UP: 1;
56
+ export declare const MOTION_EVENT_ACTION_MOVE: 2;
57
+ /**
58
+ * Translate a cascade-resolved `ProposedAction` into one or more
59
+ * `injectMotionEvent` calls. Returns the same `ActionResult` envelope the
60
+ * desktop dispatcher uses — invalid args or driver errors do not throw.
61
+ *
62
+ * Behavior parity with `dispatch.ts`:
63
+ * - unknown action.kind → invalid_args
64
+ * - missing coords → invalid_args
65
+ * - bridge throw → driver_error
66
+ * - bridge ok:false → driver_error
67
+ *
68
+ * Coverage:
69
+ * - click / double_click / right_click → tap(s)
70
+ * - drag → DOWN at start, MOVE/UP at end
71
+ * - scroll → swipe (DOWN, MOVE, UP)
72
+ * - wait / finish → success: true (no input event)
73
+ * - type / key / hotkey → invalid_args (use AccessibilityNodeInfo
74
+ * or a separate keymap actor; out of
75
+ * scope for this privileged path).
76
+ */
77
+ export declare class AospInputActor {
78
+ private readonly deps;
79
+ readonly name = "aosp-input";
80
+ constructor(deps: AospInputActorDeps);
81
+ execute(action: ProposedAction): Promise<ActionResult>;
82
+ private tap;
83
+ private swipe;
84
+ private must;
85
+ private now;
86
+ }
87
+ //# sourceMappingURL=aosp-input-actor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"aosp-input-actor.d.ts","sourceRoot":"","sources":["../../src/actor/aosp-input-actor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAGH,OAAO,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAK/D,wEAAwE;AACxE,MAAM,WAAW,yBAAyB;IACxC;;;;;OAKG;IACH,iBAAiB,CAAC,IAAI,EAAE;QACtB,CAAC,EAAE,MAAM,CAAC;QACV,CAAC,EAAE,MAAM,CAAC;QACV,MAAM,EAAE,MAAM,CAAC;QACf,UAAU,EAAE,MAAM,CAAC;KACpB,GAAG,OAAO,CAAC;QAAE,EAAE,EAAE,OAAO,CAAA;KAAE,CAAC,CAAC;IAC7B,0EAA0E;IAC1E,yBAAyB,CAAC,IAAI,OAAO,CAAC,UAAU,CAAC,CAAC;CACnD;AAED,MAAM,WAAW,kBAAkB;IACjC,kEAAkE;IAClE,SAAS,EAAE,MAAM,yBAAyB,GAAG,IAAI,CAAC;IAClD,oCAAoC;IACpC,GAAG,CAAC,EAAE,MAAM,MAAM,CAAC;CACpB;AAED;;;GAGG;AACH,eAAO,MAAM,wBAAwB,EAAG,CAAU,CAAC;AACnD,eAAO,MAAM,sBAAsB,EAAG,CAAU,CAAC;AACjD,eAAO,MAAM,wBAAwB,EAAG,CAAU,CAAC;AAEnD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,qBAAa,cAAc;IAGb,OAAO,CAAC,QAAQ,CAAC,IAAI;IAFjC,QAAQ,CAAC,IAAI,gBAAgB;gBAEA,IAAI,EAAE,kBAAkB;IAE/C,OAAO,CAAC,MAAM,EAAE,cAAc,GAAG,OAAO,CAAC,YAAY,CAAC;YAwH9C,GAAG;YA0BH,KAAK;YAsCL,IAAI;IAYlB,OAAO,CAAC,GAAG;CAGZ"}
@@ -0,0 +1,195 @@
1
+ /**
2
+ * WS7 — Brain (full-screen reasoning).
3
+ *
4
+ * Sends one image per display (each downscaled to ~1.3 MP, the OS-Atlas /
5
+ * Qwen3-VL `max_pixels` convention) to `runtime.useModel(IMAGE_DESCRIPTION,
6
+ * ...)`. The model is prompted to emit a JSON `BrainOutput` describing:
7
+ * - the scene in one paragraph,
8
+ * - which display to act on,
9
+ * - up to N ROIs the Actor should zoom into,
10
+ * - a single proposed action with rationale.
11
+ *
12
+ * The Brain itself doesn't dispatch — it just produces `BrainOutput`. The
13
+ * cascade ("ScreenSeekeR") is the orchestrator that takes a `BrainOutput`,
14
+ * optionally calls the Actor on cropped ROIs, and produces a concrete
15
+ * `ProposedAction` for the dispatcher.
16
+ *
17
+ * Image transport contract: we pass `imageUrl` as a `data:image/png;base64,...`
18
+ * URL. The WS2 MemoryArbiter intercepts at `ModelType.IMAGE_DESCRIPTION` and
19
+ * routes through its content-hash cache, so identical frames don't burn
20
+ * inference budget twice.
21
+ *
22
+ * Image policy (#9105): the compact scene already carries OCR text + AX boxes,
23
+ * which can suffice to pick the next target, so the `"on-escalation"` policy
24
+ * plans from that text-only context with NO image — routed through a TEXT model,
25
+ * since every IMAGE_DESCRIPTION provider rejects an empty imageUrl — and
26
+ * attaches the ~1.3 MP frame only when the planned target cannot be grounded
27
+ * against the OCR/AX boxes. The DEFAULT is `"always"` (legacy: image on every
28
+ * call) until a real-model CUA trajectory validates imageless planning accuracy;
29
+ * operators opt into `"on-escalation"` via the `COMPUTERUSE_BRAIN_IMAGE_POLICY`
30
+ * setting. `"never"` never attaches pixels.
31
+ *
32
+ * Parse strictness:
33
+ * - We try to parse the response as JSON (either the literal string or
34
+ * `result.description`).
35
+ * - On first parse failure, retry once with a stricter prompt.
36
+ * - On second failure, throw `BrainParseError` — the cascade surfaces this
37
+ * as a structured `ActionResult.error` and aborts the turn cleanly.
38
+ */
39
+ import { type IAgentRuntime, type ImageDescriptionResult } from "@elizaos/core";
40
+ import type { DisplayCapture } from "../platform/capture.js";
41
+ import type { Scene } from "../scene/scene-types.js";
42
+ import type { BrainOutput } from "./types.js";
43
+ export declare const BRAIN_MAX_PIXELS = 1310720;
44
+ export declare const BRAIN_MAX_ROIS = 2;
45
+ /** Bound on the per-Brain dHash→BrainOutput cache (LRU-ish, oldest evicted). */
46
+ export declare const BRAIN_DHASH_CACHE_MAX = 16;
47
+ /**
48
+ * dHash Hamming threshold for cached-plan reuse (#9581 continuous-understanding
49
+ * tuning). Exact-equality (distance 0) re-burned the IMAGE_DESCRIPTION model on
50
+ * cosmetically-identical frames — cursor jitter, a blinking caret, anti-aliasing,
51
+ * and tiny scroll noise all flip a few dHash bits.
52
+ *
53
+ * This mirrors `SCREEN_STATE_HAMMING_THRESHOLD`: distances below the threshold
54
+ * are unchanged; distances at or above it are changed and must re-plan.
55
+ */
56
+ export declare const BRAIN_DHASH_HAMMING_THRESHOLD = 5;
57
+ /**
58
+ * Image-token estimate per source pixel for a vision model with the Qwen3-VL /
59
+ * OS-Atlas `max_pixels` convention: one visual token ≈ a 28×28 (≈750 px) patch.
60
+ * Used only to quantify the saving when a frame is *not* attached (#9105).
61
+ */
62
+ export declare const BRAIN_PIXELS_PER_IMAGE_TOKEN = 750;
63
+ /**
64
+ * When to attach the raw screenshot to the planning model (#9105):
65
+ * - `"always"` — attach the pixels on every call (legacy behaviour). Default.
66
+ * - `"on-escalation"` — plan from the compact OCR/AX scene with no image
67
+ * first (routed through a TEXT model); attach pixels
68
+ * only when the planned target cannot be grounded
69
+ * against the OCR/AX boxes or a strict-retry fires.
70
+ * - `"never"` — never attach the screenshot; plan from the scene alone.
71
+ */
72
+ export type BrainImagePolicy = "always" | "on-escalation" | "never";
73
+ /**
74
+ * Default is `"always"` (proven legacy behaviour). `"on-escalation"` cuts the
75
+ * dominant per-frame image-token cost but plans the first pass blind to the
76
+ * pixels, so it stays opt-in (via the `COMPUTERUSE_BRAIN_IMAGE_POLICY` runtime
77
+ * setting / env var) until a real-model CUA trajectory validates its accuracy.
78
+ */
79
+ export declare const DEFAULT_BRAIN_IMAGE_POLICY: BrainImagePolicy;
80
+ /**
81
+ * Resolve the Brain image policy from the `COMPUTERUSE_BRAIN_IMAGE_POLICY`
82
+ * runtime setting / env var, falling back to {@link DEFAULT_BRAIN_IMAGE_POLICY}.
83
+ * The operator escape hatch to enable imageless planning without a code change
84
+ * once it is validated against a real model.
85
+ */
86
+ export declare function resolveBrainImagePolicy(runtime: IAgentRuntime | null): BrainImagePolicy;
87
+ /** Token-accounting snapshot for a Brain instance (#9105 M3). */
88
+ export interface BrainStats {
89
+ /** IMAGE_DESCRIPTION model calls actually issued. */
90
+ invocations: number;
91
+ /** Describe calls served from the frame-dHash cache (no model call). */
92
+ cacheHits: number;
93
+ /**
94
+ * Model calls issued with NO screenshot attached (the scene's OCR/AX text
95
+ * sufficed). Each one saved roughly one full-frame image's worth of tokens.
96
+ */
97
+ imagelessCalls: number;
98
+ /**
99
+ * Estimated image tokens NOT sent because of imageless calls — the sum of
100
+ * `(width * height) / BRAIN_PIXELS_PER_IMAGE_TOKEN` over every imageless
101
+ * call, capped per frame at `BRAIN_MAX_PIXELS`.
102
+ */
103
+ estImageTokensSaved: number;
104
+ }
105
+ export declare class BrainParseError extends Error {
106
+ readonly raw: string;
107
+ constructor(message: string, raw: string);
108
+ }
109
+ export interface BrainDeps {
110
+ /** Optional override for tests — bypasses runtime.useModel. */
111
+ invokeModel?: (args: {
112
+ imageUrl: string;
113
+ prompt: string;
114
+ displayId: number;
115
+ }) => Promise<string | ImageDescriptionResult>;
116
+ /**
117
+ * When to attach the raw screenshot to the planning model (#9105). Defaults
118
+ * to `"always"` (legacy); see {@link resolveBrainImagePolicy} for the
119
+ * operator opt-in to `"on-escalation"`.
120
+ */
121
+ imagePolicy?: BrainImagePolicy;
122
+ }
123
+ export interface BrainInput {
124
+ scene: Scene;
125
+ goal: string;
126
+ /**
127
+ * Per-display capture buffers. If a display from `scene.displays` is
128
+ * missing here, the Brain skips it. The cascade is responsible for
129
+ * supplying these alongside the scene.
130
+ */
131
+ captures: Map<number, DisplayCapture>;
132
+ }
133
+ /**
134
+ * Pure description of a "Brain" call. Created by `Cascade.runCascade` and
135
+ * test fixtures.
136
+ */
137
+ export declare class Brain {
138
+ private readonly runtime;
139
+ private readonly deps;
140
+ /**
141
+ * Frame-dHash → BrainOutput cache. The WS2 MemoryArbiter only dedups the
142
+ * IMAGE_DESCRIPTION call for LOCAL backends; the remote/cloud path bypasses
143
+ * it, so an identical screen re-burns tokens every step. This call-site cache
144
+ * skips the model entirely when the same frame is observed for the same goal,
145
+ * cutting the dominant CUA-loop token cost regardless of backend (#9105 M3).
146
+ */
147
+ private readonly dhashCache;
148
+ private readonly imagePolicy;
149
+ private invocations;
150
+ private cacheHits;
151
+ private imagelessCalls;
152
+ private estImageTokensSaved;
153
+ constructor(runtime: IAgentRuntime | null, deps?: BrainDeps);
154
+ /** Token-accounting snapshot (model calls, cache hits, imageless savings). */
155
+ getStats(): BrainStats;
156
+ private cacheKey;
157
+ /**
158
+ * Return a cached plan for the same goal whose frame is within
159
+ * `BRAIN_DHASH_HAMMING_THRESHOLD` bits of `dh` — a near-identical screen, not
160
+ * just a byte-identical one. On a hit the entry is moved to the end (LRU), so
161
+ * a steadily-evolving screen keeps its most-recent close match warm.
162
+ */
163
+ private findCached;
164
+ private rememberOutput;
165
+ observeAndPlan(input: BrainInput): Promise<BrainOutput>;
166
+ /**
167
+ * True when the imageless plan needs no screenshot to dispatch: a
168
+ * non-coordinate action (type/hotkey/key/wait/finish) carries everything in
169
+ * its args, and a coordinate action is fine when its `ref`/rationale resolves
170
+ * to a concrete OCR/AX box. When the target cannot be grounded we escalate to
171
+ * the pixels — correctness over token saving.
172
+ */
173
+ private resolvesWithoutImage;
174
+ private recordImageless;
175
+ private invoke;
176
+ }
177
+ export declare function brainPromptFor(compactSceneJson: string, goal: string, strict: boolean): string;
178
+ export declare function parseBrainOutput(raw: string): BrainOutput;
179
+ /**
180
+ * Encode a PNG buffer for transport to the IMAGE_DESCRIPTION model. We don't
181
+ * resize here — `runtime.useModel` adapters (and any vLLM backends behind
182
+ * them) handle the `max_pixels` downscale. The constant `BRAIN_MAX_PIXELS`
183
+ * is exported for the cascade so it can crop ROIs at the right native
184
+ * resolution before invoking the Actor.
185
+ */
186
+ export declare function encodeForBrain(png: Buffer): Promise<string>;
187
+ /**
188
+ * Estimate the visual tokens a frame would have cost the planning model, for
189
+ * the "tokens saved by going imageless" telemetry (#9105). The backends apply a
190
+ * `max_pixels` downscale, so the per-frame estimate is capped at
191
+ * `BRAIN_MAX_PIXELS`. Falls back to the cap when the PNG header is unreadable —
192
+ * an attached frame would have been downscaled to that ceiling regardless.
193
+ */
194
+ export declare function estimateImageTokens(png: Buffer): number;
195
+ //# sourceMappingURL=brain.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"brain.d.ts","sourceRoot":"","sources":["../../src/actor/brain.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,OAAO,EACL,KAAK,aAAa,EAClB,KAAK,sBAAsB,EAG5B,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AAE7D,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,yBAAyB,CAAC;AAGrD,OAAO,KAAK,EAAE,WAAW,EAAiC,MAAM,YAAY,CAAC;AAE7E,eAAO,MAAM,gBAAgB,UAAY,CAAC;AAC1C,eAAO,MAAM,cAAc,IAAI,CAAC;AAChC,gFAAgF;AAChF,eAAO,MAAM,qBAAqB,KAAK,CAAC;AACxC;;;;;;;;GAQG;AACH,eAAO,MAAM,6BAA6B,IAAI,CAAC;AAC/C;;;;GAIG;AACH,eAAO,MAAM,4BAA4B,MAAM,CAAC;AAEhD;;;;;;;;GAQG;AACH,MAAM,MAAM,gBAAgB,GAAG,QAAQ,GAAG,eAAe,GAAG,OAAO,CAAC;AAEpE;;;;;GAKG;AACH,eAAO,MAAM,0BAA0B,EAAE,gBAA2B,CAAC;AAErE;;;;;GAKG;AACH,wBAAgB,uBAAuB,CACrC,OAAO,EAAE,aAAa,GAAG,IAAI,GAC5B,gBAAgB,CAOlB;AAED,iEAAiE;AACjE,MAAM,WAAW,UAAU;IACzB,qDAAqD;IACrD,WAAW,EAAE,MAAM,CAAC;IACpB,wEAAwE;IACxE,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,cAAc,EAAE,MAAM,CAAC;IACvB;;;;OAIG;IACH,mBAAmB,EAAE,MAAM,CAAC;CAC7B;AAmDD,qBAAa,eAAgB,SAAQ,KAAK;aAGtB,GAAG,EAAE,MAAM;gBAD3B,OAAO,EAAE,MAAM,EACC,GAAG,EAAE,MAAM;CAK9B;AAED,MAAM,WAAW,SAAS;IACxB,+DAA+D;IAC/D,WAAW,CAAC,EAAE,CAAC,IAAI,EAAE;QACnB,QAAQ,EAAE,MAAM,CAAC;QACjB,MAAM,EAAE,MAAM,CAAC;QACf,SAAS,EAAE,MAAM,CAAC;KACnB,KAAK,OAAO,CAAC,MAAM,GAAG,sBAAsB,CAAC,CAAC;IAC/C;;;;OAIG;IACH,WAAW,CAAC,EAAE,gBAAgB,CAAC;CAChC;AAED,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,KAAK,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb;;;;OAIG;IACH,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;CACvC;AAED;;;GAGG;AACH,qBAAa,KAAK;IAqBd,OAAO,CAAC,QAAQ,CAAC,OAAO;IACxB,OAAO,CAAC,QAAQ,CAAC,IAAI;IArBvB;;;;;;OAMG;IACH,OAAO,CAAC,QAAQ,CAAC,UAAU,CAKnB;IACR,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAmB;IAC/C,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,SAAS,CAAK;IACtB,OAAO,CAAC,cAAc,CAAK;IAC3B,OAAO,CAAC,mBAAmB,CAAK;gBAGb,OAAO,EAAE,aAAa,GAAG,IAAI,EAC7B,IAAI,GAAE,SAAc;IAKvC,8EAA8E;IAC9E,QAAQ,IAAI,UAAU;IAStB,OAAO,CAAC,QAAQ;IAShB;;;;;OAKG;IACH,OAAO,CAAC,UAAU;IAoBlB,OAAO,CAAC,cAAc;IAmBhB,cAAc,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC;IA4I7D;;;;;;OAMG;IACH,OAAO,CAAC,oBAAoB;IAa5B,OAAO,CAAC,eAAe;YAKT,MAAM;CA4BrB;AAID,wBAAgB,cAAc,CAC5B,gBAAgB,EAAE,MAAM,EACxB,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,OAAO,GACd,MAAM,CAgCR;AAID,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG,WAAW,CA6FzD;AAkBD;;;;;;GAMG;AACH,wBAAsB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAEjE;AAED;;;;;;GAMG;AACH,wBAAgB,mBAAmB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAMvD"}
@@ -0,0 +1,92 @@
1
+ /**
2
+ * WS7 — ScreenSeekeR cascade.
3
+ *
4
+ * Step 1: Brain looks at the full-screen scene for `target_display_id`.
5
+ * Step 2: For each ROI (up to BRAIN_MAX_ROIS), crop the *native resolution*
6
+ * region from the captured PNG and hand it to the Actor for
7
+ * fine grounding. The Actor returns display-local coords.
8
+ * Step 3: Combine the Brain's proposed action with the Actor's coordinates
9
+ * (or fall back to the OCR/AX deterministic actor on `ref`) and
10
+ * produce a single `ProposedAction` for the dispatcher.
11
+ *
12
+ * Cropping notes:
13
+ * - We do NOT decode the PNG. The cropped buffer is what we hand to the
14
+ * Actor. For the built-in OCR/AX actor, the crop is just a pass-through.
15
+ * - When a real PNG cropper is wired in (sharp / native module), this
16
+ * module is the place to add it: `cropPngToRoi(frame, bbox)`.
17
+ * - The cascade tests use the actual frame bytes; assertions are on the
18
+ * resolved coords and the order of Actor calls.
19
+ */
20
+ import type { DisplayCapture } from "../platform/capture.js";
21
+ import type { Scene } from "../scene/scene-types.js";
22
+ import type { Actor } from "./actor.js";
23
+ import { type Brain } from "./brain.js";
24
+ import type { BrainRoi, CascadeResult } from "./types.js";
25
+ export interface CascadeDeps {
26
+ brain: Brain;
27
+ actor?: Actor | null;
28
+ /** Cropper override (mostly tests). Returns a Buffer for the bbox region. */
29
+ crop?: (frame: Buffer, bbox: [number, number, number, number]) => Buffer;
30
+ }
31
+ export interface CascadeInput {
32
+ scene: Scene;
33
+ goal: string;
34
+ /** Per-display PNG captures, keyed by displayId. */
35
+ captures: Map<number, DisplayCapture>;
36
+ }
37
+ /** Grounding-cache accounting (#9105 M5). */
38
+ export interface CascadeGroundStats {
39
+ /** Grounding resolutions served from the per-Scene cache. */
40
+ hits: number;
41
+ /** Grounding resolutions that ran the full resolve (OCR/AX + optional actor). */
42
+ misses: number;
43
+ }
44
+ export declare class Cascade {
45
+ private readonly deps;
46
+ /**
47
+ * Per-Scene grounding cache (predict/ground split, #9105 M5). Grounding the
48
+ * same target (ref or rationale) on the same Scene is deterministic, so the
49
+ * cheap GROUND step is memoized — re-grounding within a turn skips a repeat
50
+ * `resolveReference` (OCR/AX) scan and a repeat (possibly model-backed)
51
+ * `actor.ground` call. Keyed by Scene timestamp so a new screen invalidates.
52
+ */
53
+ private readonly groundCache;
54
+ private groundStats;
55
+ constructor(deps: CascadeDeps);
56
+ /** Grounding cache hit/miss snapshot for token/work accounting. */
57
+ getGroundStats(): CascadeGroundStats;
58
+ run(input: CascadeInput): Promise<CascadeResult>;
59
+ /**
60
+ * Grounding-only entry (the `predict_click` half of the predict/ground split,
61
+ * #9105 M5 / #9170 M10). Resolves a `ref` (OCR/AX id) or free-form
62
+ * `instruction` to a display-local coordinate WITHOUT running the Brain —
63
+ * agent loops that do their own step planning (Anthropic / OpenAI
64
+ * computer-use) call this to reuse our deterministic OCR/AX + actor grounding
65
+ * and its per-Scene cache. Returns `null` when nothing can be grounded.
66
+ */
67
+ groundTarget(args: {
68
+ scene: Scene;
69
+ captures: Map<number, DisplayCapture>;
70
+ targetDisplayId: number;
71
+ ref?: string;
72
+ instruction?: string;
73
+ /** Optional ROI to ground inside when no `ref` is available. */
74
+ roi?: BrainRoi;
75
+ }): Promise<{
76
+ displayId: number;
77
+ x: number;
78
+ y: number;
79
+ } | null>;
80
+ private resolveBrainOutput;
81
+ private resolveCoords;
82
+ /** Drop cache entries from any Scene other than `timestamp`, then store. */
83
+ private rememberGround;
84
+ private resolveCoordsUncached;
85
+ private coordsForRef;
86
+ private groundReference;
87
+ private groundRoi;
88
+ private cropFrame;
89
+ }
90
+ export declare function setActor(actor: Actor | null): void;
91
+ export declare function getRegisteredActor(): Actor | null;
92
+ //# sourceMappingURL=cascade.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cascade.d.ts","sourceRoot":"","sources":["../../src/actor/cascade.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAGH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AAC7D,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,yBAAyB,CAAC;AACrD,OAAO,KAAK,EAAE,KAAK,EAAmB,MAAM,YAAY,CAAC;AAEzD,OAAO,EAAkB,KAAK,KAAK,EAAE,MAAM,YAAY,CAAC;AACxD,OAAO,KAAK,EAEV,QAAQ,EACR,aAAa,EAEd,MAAM,YAAY,CAAC;AAEpB,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,KAAK,CAAC;IACb,KAAK,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC;IACrB,6EAA6E;IAC7E,IAAI,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,KAAK,MAAM,CAAC;CAC1E;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,KAAK,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,oDAAoD;IACpD,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;CACvC;AAED,6CAA6C;AAC7C,MAAM,WAAW,kBAAkB;IACjC,6DAA6D;IAC7D,IAAI,EAAE,MAAM,CAAC;IACb,iFAAiF;IACjF,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,qBAAa,OAAO;IAcN,OAAO,CAAC,QAAQ,CAAC,IAAI;IAbjC;;;;;;OAMG;IACH,OAAO,CAAC,QAAQ,CAAC,WAAW,CAGxB;IACJ,OAAO,CAAC,WAAW,CAA8C;gBAEpC,IAAI,EAAE,WAAW;IAE9C,mEAAmE;IACnE,cAAc,IAAI,kBAAkB;IAI9B,GAAG,CAAC,KAAK,EAAE,YAAY,GAAG,OAAO,CAAC,aAAa,CAAC;IAStD;;;;;;;OAOG;IACG,YAAY,CAAC,IAAI,EAAE;QACvB,KAAK,EAAE,KAAK,CAAC;QACb,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;QACtC,eAAe,EAAE,MAAM,CAAC;QACxB,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,gEAAgE;QAChE,GAAG,CAAC,EAAE,QAAQ,CAAC;KAChB,GAAG,OAAO,CAAC;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAC;QAAC,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;YAsBjD,kBAAkB;YAwJlB,aAAa;IA2B3B,4EAA4E;IAC5E,OAAO,CAAC,cAAc;YAYR,qBAAqB;YAkCrB,YAAY;YAUZ,eAAe;YAiDf,SAAS;IAiCvB,OAAO,CAAC,SAAS;CAYlB;AAQD,wBAAgB,QAAQ,CAAC,KAAK,EAAE,KAAK,GAAG,IAAI,GAAG,IAAI,CAElD;AACD,wBAAgB,kBAAkB,IAAI,KAAK,GAAG,IAAI,CAEjD"}
@@ -0,0 +1,276 @@
1
+ /**
2
+ * WS7 — BaseComputerInterface (TypeScript port).
3
+ *
4
+ * Shape ported from trycua/cua's `BaseComputerInterface` (MIT). The original
5
+ * provides one standard API surface for screenshot + mouse + keyboard + screen
6
+ * geometry that the Brain/Actor cascade can call without knowing what OS or
7
+ * driver is underneath.
8
+ *
9
+ * Origin: https://github.com/trycua/cua/blob/main/libs/python/computer/
10
+ * computer/interface/base.py
11
+ * License: MIT, Copyright (c) 2025 trycua
12
+ *
13
+ * This is NOT a verbatim re-implementation — Eliza's underlying drivers
14
+ * (`platform/driver.ts`, `platform/capture.ts`) already cover most of the same
15
+ * surface. We give the cascade one well-typed seam so tests can swap in a
16
+ * fake driver, and so the Brain doesn't need to know about display-id
17
+ * resolution, retina scaling, or coord-source switches.
18
+ *
19
+ * Coordinate contract (matches WS5):
20
+ * - Every method that takes `(displayId, x, y)` uses LOCAL pixel coords for
21
+ * that display. The interface routes those through `localToGlobalDefault`
22
+ * before any input fires.
23
+ * - VLMs producing coords in image-space (i.e. against the downscaled max-
24
+ * pixels frame) MUST first call `toScreenCoordinates(...)` to get the
25
+ * real OS-pixel coord. The inverse `toScreenshotCoordinates(...)` is for
26
+ * logging / replay.
27
+ * - `displayId` must be a known display from `listDisplays()`. Unknown ids
28
+ * throw — this is the safety net the dispatch layer relies on.
29
+ */
30
+ import { type DisplayCapture } from "../platform/capture.js";
31
+ import type { Scene, SceneAxNode } from "../scene/scene-types.js";
32
+ import type { DisplayDescriptor } from "../types.js";
33
+ /** A POSIX-style mouse button. */
34
+ export type MouseButton = "left" | "middle" | "right";
35
+ export interface ScreenshotResult {
36
+ displayId: number;
37
+ /** PNG bytes at backing-store resolution. */
38
+ frame: Buffer;
39
+ /** Backing-store / logical-pixel ratio. */
40
+ scaleFactor: number;
41
+ /** [x, y, w, h] in OS-global pixel space, for the display this frame belongs to. */
42
+ bounds: [number, number, number, number];
43
+ }
44
+ export interface DisplayPoint {
45
+ displayId: number;
46
+ /** LOCAL pixel coords inside the display's logical bounds. */
47
+ x: number;
48
+ y: number;
49
+ }
50
+ export interface DragPath {
51
+ displayId: number;
52
+ path: Array<{
53
+ x: number;
54
+ y: number;
55
+ }>;
56
+ }
57
+ export interface ScrollDelta {
58
+ displayId: number;
59
+ x: number;
60
+ y: number;
61
+ /** Negative = scroll up/left, positive = scroll down/right. */
62
+ dx: number;
63
+ dy: number;
64
+ }
65
+ export interface CursorPosition {
66
+ displayId: number;
67
+ x: number;
68
+ y: number;
69
+ }
70
+ /**
71
+ * The single seam the cascade calls. All methods are safe to await
72
+ * concurrently within one display (driver semantics permitting); callers
73
+ * serialize at the dispatch layer.
74
+ */
75
+ export interface ComputerInterface {
76
+ screenshot(opts?: {
77
+ displayId?: number;
78
+ }): Promise<ScreenshotResult>;
79
+ mouseDown(point: DisplayPoint & {
80
+ button?: MouseButton;
81
+ }): Promise<void>;
82
+ mouseUp(point: DisplayPoint & {
83
+ button?: MouseButton;
84
+ }): Promise<void>;
85
+ leftClick(point: DisplayPoint): Promise<void>;
86
+ rightClick(point: DisplayPoint): Promise<void>;
87
+ doubleClick(point: DisplayPoint): Promise<void>;
88
+ moveCursor(point: DisplayPoint): Promise<void>;
89
+ dragTo(point: DisplayPoint): Promise<void>;
90
+ drag(path: DragPath): Promise<void>;
91
+ keyDown(args: {
92
+ key: string;
93
+ }): Promise<void>;
94
+ keyUp(args: {
95
+ key: string;
96
+ }): Promise<void>;
97
+ typeText(args: {
98
+ text: string;
99
+ }): Promise<void>;
100
+ pressKey(args: {
101
+ key: string;
102
+ }): Promise<void>;
103
+ hotkey(args: {
104
+ keys: string[];
105
+ }): Promise<void>;
106
+ scroll(delta: ScrollDelta): Promise<void>;
107
+ scrollUp(args: {
108
+ displayId: number;
109
+ clicks: number;
110
+ }): Promise<void>;
111
+ scrollDown(args: {
112
+ displayId: number;
113
+ clicks: number;
114
+ }): Promise<void>;
115
+ getScreenSize(args: {
116
+ displayId: number;
117
+ }): {
118
+ w: number;
119
+ h: number;
120
+ };
121
+ getCursorPosition(): CursorPosition;
122
+ /**
123
+ * Convert from VLM-image-space (the downscaled max-pixels frame the model
124
+ * was shown) into OS-LOCAL-pixel-space for the same display.
125
+ *
126
+ * `(imgX, imgY)` are pixel coords inside an `imgW × imgH` image. Returned
127
+ * `(x, y)` are the same physical point but in the display's logical bounds.
128
+ */
129
+ toScreenCoordinates(args: {
130
+ displayId: number;
131
+ imgX: number;
132
+ imgY: number;
133
+ imgW: number;
134
+ imgH: number;
135
+ }): {
136
+ x: number;
137
+ y: number;
138
+ };
139
+ /** Inverse of `toScreenCoordinates`. */
140
+ toScreenshotCoordinates(args: {
141
+ displayId: number;
142
+ x: number;
143
+ y: number;
144
+ imgW: number;
145
+ imgH: number;
146
+ }): {
147
+ imgX: number;
148
+ imgY: number;
149
+ };
150
+ /**
151
+ * Return the AX (accessibility) tree the scene-builder snapshot already
152
+ * collected. We don't re-snapshot here — that's WS6's job. Cascade calls
153
+ * this to enumerate clickable AX nodes by display.
154
+ */
155
+ getAccessibilityTree(args: {
156
+ displayId?: number;
157
+ }): SceneAxNode[];
158
+ }
159
+ /**
160
+ * Reference implementation that delegates to `platform/driver.ts` and
161
+ * `platform/capture.ts`. The constructor takes a `Scene` accessor so the
162
+ * AX tree is read from the latest WS6 snapshot without re-walking the OS.
163
+ */
164
+ export interface ComputerInterfaceDeps {
165
+ /** Latest scene accessor — used for `getAccessibilityTree`. */
166
+ getScene?: () => Scene | null;
167
+ /** Capture override (mostly for tests). */
168
+ capture?: (displayId: number) => Promise<DisplayCapture>;
169
+ /** Driver overrides for tests. */
170
+ driver?: Partial<{
171
+ click: (x: number, y: number) => Promise<void>;
172
+ doubleClick: (x: number, y: number) => Promise<void>;
173
+ rightClick: (x: number, y: number) => Promise<void>;
174
+ mouseMove: (x: number, y: number) => Promise<void>;
175
+ mouseDown: (x: number, y: number, button: MouseButton) => Promise<void>;
176
+ mouseUp: (x: number, y: number, button: MouseButton) => Promise<void>;
177
+ drag: (x1: number, y1: number, x2: number, y2: number) => Promise<void>;
178
+ dragPath: (path: Array<{
179
+ x: number;
180
+ y: number;
181
+ }>) => Promise<void>;
182
+ scroll: (x: number, y: number, direction: "up" | "down" | "left" | "right", amount: number) => Promise<void>;
183
+ type: (text: string) => Promise<void>;
184
+ keyPress: (key: string) => Promise<void>;
185
+ keyCombo: (combo: string) => Promise<void>;
186
+ keyDown: (key: string) => Promise<void>;
187
+ keyUp: (key: string) => Promise<void>;
188
+ }>;
189
+ listDisplays?: () => DisplayDescriptor[];
190
+ /**
191
+ * Where the implementation should remember the last cursor target.
192
+ * Set whenever a movement-bearing call resolves successfully.
193
+ */
194
+ cursorState?: {
195
+ current: CursorPosition;
196
+ };
197
+ }
198
+ export declare class DefaultComputerInterface implements ComputerInterface {
199
+ private readonly deps;
200
+ constructor(deps?: ComputerInterfaceDeps);
201
+ screenshot(opts?: {
202
+ displayId?: number;
203
+ }): Promise<ScreenshotResult>;
204
+ mouseDown(point: DisplayPoint & {
205
+ button?: MouseButton;
206
+ }): Promise<void>;
207
+ mouseUp(point: DisplayPoint & {
208
+ button?: MouseButton;
209
+ }): Promise<void>;
210
+ leftClick(point: DisplayPoint): Promise<void>;
211
+ rightClick(point: DisplayPoint): Promise<void>;
212
+ doubleClick(point: DisplayPoint): Promise<void>;
213
+ moveCursor(point: DisplayPoint): Promise<void>;
214
+ dragTo(point: DisplayPoint): Promise<void>;
215
+ drag(args: DragPath): Promise<void>;
216
+ keyDown(args: {
217
+ key: string;
218
+ }): Promise<void>;
219
+ keyUp(args: {
220
+ key: string;
221
+ }): Promise<void>;
222
+ typeText(args: {
223
+ text: string;
224
+ }): Promise<void>;
225
+ pressKey(args: {
226
+ key: string;
227
+ }): Promise<void>;
228
+ hotkey(args: {
229
+ keys: string[];
230
+ }): Promise<void>;
231
+ scroll(delta: ScrollDelta): Promise<void>;
232
+ scrollUp(args: {
233
+ displayId: number;
234
+ clicks: number;
235
+ }): Promise<void>;
236
+ scrollDown(args: {
237
+ displayId: number;
238
+ clicks: number;
239
+ }): Promise<void>;
240
+ getScreenSize(args: {
241
+ displayId: number;
242
+ }): {
243
+ w: number;
244
+ h: number;
245
+ };
246
+ getCursorPosition(): CursorPosition;
247
+ toScreenCoordinates(args: {
248
+ displayId: number;
249
+ imgX: number;
250
+ imgY: number;
251
+ imgW: number;
252
+ imgH: number;
253
+ }): {
254
+ x: number;
255
+ y: number;
256
+ };
257
+ toScreenshotCoordinates(args: {
258
+ displayId: number;
259
+ x: number;
260
+ y: number;
261
+ imgW: number;
262
+ imgH: number;
263
+ }): {
264
+ imgX: number;
265
+ imgY: number;
266
+ };
267
+ getAccessibilityTree(args: {
268
+ displayId?: number;
269
+ }): SceneAxNode[];
270
+ private primaryId;
271
+ private requireDisplay;
272
+ private toGlobalChecked;
273
+ }
274
+ /** Convenience factory used by the cascade. */
275
+ export declare function makeComputerInterface(deps?: ComputerInterfaceDeps): ComputerInterface;
276
+ //# sourceMappingURL=computer-interface.d.ts.map