@elizaos/plugin-computeruse 2.0.3-beta.2 → 2.0.3-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. package/dist/actions/clipboard.d.ts +22 -0
  2. package/dist/actions/clipboard.d.ts.map +1 -0
  3. package/dist/actions/helpers.d.ts +33 -0
  4. package/dist/actions/helpers.d.ts.map +1 -0
  5. package/dist/actions/progress.d.ts +26 -0
  6. package/dist/actions/progress.d.ts.map +1 -0
  7. package/dist/actions/use-computer-agent.d.ts +113 -0
  8. package/dist/actions/use-computer-agent.d.ts.map +1 -0
  9. package/dist/actions/use-computer.d.ts +3 -0
  10. package/dist/actions/use-computer.d.ts.map +1 -0
  11. package/dist/actions/window-handlers.d.ts +11 -0
  12. package/dist/actions/window-handlers.d.ts.map +1 -0
  13. package/dist/actions/window.d.ts +11 -0
  14. package/dist/actions/window.d.ts.map +1 -0
  15. package/dist/actor/actor.d.ts +84 -0
  16. package/dist/actor/actor.d.ts.map +1 -0
  17. package/dist/actor/agent-callbacks.d.ts +128 -0
  18. package/dist/actor/agent-callbacks.d.ts.map +1 -0
  19. package/dist/actor/agent-loop.d.ts +134 -0
  20. package/dist/actor/agent-loop.d.ts.map +1 -0
  21. package/dist/actor/aosp-input-actor.d.ts +87 -0
  22. package/dist/actor/aosp-input-actor.d.ts.map +1 -0
  23. package/dist/actor/brain.d.ts +195 -0
  24. package/dist/actor/brain.d.ts.map +1 -0
  25. package/dist/actor/cascade.d.ts +92 -0
  26. package/dist/actor/cascade.d.ts.map +1 -0
  27. package/dist/actor/computer-interface.d.ts +276 -0
  28. package/dist/actor/computer-interface.d.ts.map +1 -0
  29. package/dist/actor/dispatch.d.ts +24 -0
  30. package/dist/actor/dispatch.d.ts.map +1 -0
  31. package/dist/actor/index.d.ts +12 -0
  32. package/dist/actor/index.d.ts.map +1 -0
  33. package/dist/actor/types.d.ts +94 -0
  34. package/dist/actor/types.d.ts.map +1 -0
  35. package/dist/approval-manager.d.ts +29 -0
  36. package/dist/approval-manager.d.ts.map +1 -0
  37. package/dist/index.d.ts +46 -0
  38. package/dist/index.d.ts.map +1 -0
  39. package/dist/index.js +13649 -0
  40. package/dist/index.js.map +68 -0
  41. package/dist/mcp/index.d.ts +8 -0
  42. package/dist/mcp/index.d.ts.map +1 -0
  43. package/dist/mcp/server.d.ts +42 -0
  44. package/dist/mcp/server.d.ts.map +1 -0
  45. package/dist/mcp/tools.d.ts +53 -0
  46. package/dist/mcp/tools.d.ts.map +1 -0
  47. package/dist/mobile/android-bridge.d.ts +263 -0
  48. package/dist/mobile/android-bridge.d.ts.map +1 -0
  49. package/dist/mobile/android-scene.d.ts +52 -0
  50. package/dist/mobile/android-scene.d.ts.map +1 -0
  51. package/dist/mobile/android-trajectory.d.ts +66 -0
  52. package/dist/mobile/android-trajectory.d.ts.map +1 -0
  53. package/dist/mobile/index.d.ts +19 -0
  54. package/dist/mobile/index.d.ts.map +1 -0
  55. package/dist/mobile/ios-app-intent-registry.d.ts +20 -0
  56. package/dist/mobile/ios-app-intent-registry.d.ts.map +1 -0
  57. package/dist/mobile/ios-bridge.d.ts +359 -0
  58. package/dist/mobile/ios-bridge.d.ts.map +1 -0
  59. package/dist/mobile/ios-computer-interface.d.ts +160 -0
  60. package/dist/mobile/ios-computer-interface.d.ts.map +1 -0
  61. package/dist/mobile/mobile-computer-interface.d.ts +142 -0
  62. package/dist/mobile/mobile-computer-interface.d.ts.map +1 -0
  63. package/dist/mobile/mobile-screen-capture.d.ts +64 -0
  64. package/dist/mobile/mobile-screen-capture.d.ts.map +1 -0
  65. package/dist/mobile/ocr-provider.d.ts +187 -0
  66. package/dist/mobile/ocr-provider.d.ts.map +1 -0
  67. package/dist/mobile/ocr-provider.js +111 -0
  68. package/dist/mobile/ocr-provider.js.map +10 -0
  69. package/dist/osworld/action-converter.d.ts +38 -0
  70. package/dist/osworld/action-converter.d.ts.map +1 -0
  71. package/dist/osworld/adapter.d.ts +79 -0
  72. package/dist/osworld/adapter.d.ts.map +1 -0
  73. package/dist/osworld/types.d.ts +69 -0
  74. package/dist/osworld/types.d.ts.map +1 -0
  75. package/dist/parity/index.d.ts +9 -0
  76. package/dist/parity/index.d.ts.map +1 -0
  77. package/dist/parity/parity-matrix.d.ts +82 -0
  78. package/dist/parity/parity-matrix.d.ts.map +1 -0
  79. package/dist/parity/screenspot.d.ts +56 -0
  80. package/dist/parity/screenspot.d.ts.map +1 -0
  81. package/dist/platform/a11y.d.ts +64 -0
  82. package/dist/platform/a11y.d.ts.map +1 -0
  83. package/dist/platform/browser.d.ts +61 -0
  84. package/dist/platform/browser.d.ts.map +1 -0
  85. package/dist/platform/capabilities.d.ts +33 -0
  86. package/dist/platform/capabilities.d.ts.map +1 -0
  87. package/dist/platform/capture.d.ts +65 -0
  88. package/dist/platform/capture.d.ts.map +1 -0
  89. package/dist/platform/clipboard.d.ts +24 -0
  90. package/dist/platform/clipboard.d.ts.map +1 -0
  91. package/dist/platform/coords.d.ts +73 -0
  92. package/dist/platform/coords.d.ts.map +1 -0
  93. package/dist/platform/desktop.d.ts +56 -0
  94. package/dist/platform/desktop.d.ts.map +1 -0
  95. package/dist/platform/displays.d.ts +97 -0
  96. package/dist/platform/displays.d.ts.map +1 -0
  97. package/dist/platform/driver.d.ts +49 -0
  98. package/dist/platform/driver.d.ts.map +1 -0
  99. package/dist/platform/file-ops.d.ts +27 -0
  100. package/dist/platform/file-ops.d.ts.map +1 -0
  101. package/dist/platform/helpers.d.ts +60 -0
  102. package/dist/platform/helpers.d.ts.map +1 -0
  103. package/dist/platform/launch.d.ts +54 -0
  104. package/dist/platform/launch.d.ts.map +1 -0
  105. package/dist/platform/normalized-coords.d.ts +46 -0
  106. package/dist/platform/normalized-coords.d.ts.map +1 -0
  107. package/dist/platform/nut-driver.d.ts +86 -0
  108. package/dist/platform/nut-driver.d.ts.map +1 -0
  109. package/dist/platform/permissions.d.ts +33 -0
  110. package/dist/platform/permissions.d.ts.map +1 -0
  111. package/dist/platform/process-list.d.ts +32 -0
  112. package/dist/platform/process-list.d.ts.map +1 -0
  113. package/dist/platform/ps-host.d.ts +77 -0
  114. package/dist/platform/ps-host.d.ts.map +1 -0
  115. package/dist/platform/screenshot-errors.d.ts +54 -0
  116. package/dist/platform/screenshot-errors.d.ts.map +1 -0
  117. package/dist/platform/screenshot-quality.d.ts +11 -0
  118. package/dist/platform/screenshot-quality.d.ts.map +1 -0
  119. package/dist/platform/screenshot.d.ts +16 -0
  120. package/dist/platform/screenshot.d.ts.map +1 -0
  121. package/dist/platform/security.d.ts +20 -0
  122. package/dist/platform/security.d.ts.map +1 -0
  123. package/dist/platform/terminal.d.ts +38 -0
  124. package/dist/platform/terminal.d.ts.map +1 -0
  125. package/dist/platform/wayland-portal.d.ts +25 -0
  126. package/dist/platform/wayland-portal.d.ts.map +1 -0
  127. package/dist/platform/windows-list.d.ts +78 -0
  128. package/dist/platform/windows-list.d.ts.map +1 -0
  129. package/dist/providers/computer-state.d.ts +9 -0
  130. package/dist/providers/computer-state.d.ts.map +1 -0
  131. package/dist/providers/scene.d.ts +21 -0
  132. package/dist/providers/scene.d.ts.map +1 -0
  133. package/dist/register-routes.d.ts +2 -0
  134. package/dist/register-routes.d.ts.map +1 -0
  135. package/dist/register-routes.js +13836 -0
  136. package/dist/register-routes.js.map +71 -0
  137. package/dist/routes/computer-use-compat-routes.d.ts +29 -0
  138. package/dist/routes/computer-use-compat-routes.d.ts.map +1 -0
  139. package/dist/routes/computer-use-routes.d.ts +3 -0
  140. package/dist/routes/computer-use-routes.d.ts.map +1 -0
  141. package/dist/routes/sandbox-routes.d.ts +53 -0
  142. package/dist/routes/sandbox-routes.d.ts.map +1 -0
  143. package/dist/sandbox/docker-backend.d.ts +69 -0
  144. package/dist/sandbox/docker-backend.d.ts.map +1 -0
  145. package/dist/sandbox/index.d.ts +62 -0
  146. package/dist/sandbox/index.d.ts.map +1 -0
  147. package/dist/sandbox/qemu-backend.d.ts +48 -0
  148. package/dist/sandbox/qemu-backend.d.ts.map +1 -0
  149. package/dist/sandbox/remote-guest.d.ts +72 -0
  150. package/dist/sandbox/remote-guest.d.ts.map +1 -0
  151. package/dist/sandbox/sandbox-driver.d.ts +41 -0
  152. package/dist/sandbox/sandbox-driver.d.ts.map +1 -0
  153. package/dist/sandbox/surface-types.d.ts +17 -0
  154. package/dist/sandbox/surface-types.d.ts.map +1 -0
  155. package/dist/sandbox/types.d.ts +138 -0
  156. package/dist/sandbox/types.d.ts.map +1 -0
  157. package/dist/sandbox/wsb-backend.d.ts +48 -0
  158. package/dist/sandbox/wsb-backend.d.ts.map +1 -0
  159. package/dist/scene/a11y-provider.d.ts +83 -0
  160. package/dist/scene/a11y-provider.d.ts.map +1 -0
  161. package/dist/scene/apps.d.ts +39 -0
  162. package/dist/scene/apps.d.ts.map +1 -0
  163. package/dist/scene/dhash.d.ts +105 -0
  164. package/dist/scene/dhash.d.ts.map +1 -0
  165. package/dist/scene/ocr-adapter.d.ts +64 -0
  166. package/dist/scene/ocr-adapter.d.ts.map +1 -0
  167. package/dist/scene/scene-builder.d.ts +107 -0
  168. package/dist/scene/scene-builder.d.ts.map +1 -0
  169. package/dist/scene/scene-types.d.ts +70 -0
  170. package/dist/scene/scene-types.d.ts.map +1 -0
  171. package/dist/scene/screen-state.d.ts +105 -0
  172. package/dist/scene/screen-state.d.ts.map +1 -0
  173. package/dist/scene/serialize.d.ts +28 -0
  174. package/dist/scene/serialize.d.ts.map +1 -0
  175. package/dist/security/browser-script-policy.d.ts +9 -0
  176. package/dist/security/browser-script-policy.d.ts.map +1 -0
  177. package/dist/services/computer-use-service.d.ts +142 -0
  178. package/dist/services/computer-use-service.d.ts.map +1 -0
  179. package/dist/services/desktop-control.d.ts +35 -0
  180. package/dist/services/desktop-control.d.ts.map +1 -0
  181. package/dist/services/index.d.ts +7 -0
  182. package/dist/services/index.d.ts.map +1 -0
  183. package/dist/services/vision-context-provider.d.ts +32 -0
  184. package/dist/services/vision-context-provider.d.ts.map +1 -0
  185. package/dist/types.d.ts +385 -0
  186. package/dist/types.d.ts.map +1 -0
  187. package/package.json +16 -5
  188. package/registry-entry.json +74 -0
@@ -0,0 +1,22 @@
1
+ /**
2
+ * CLIPBOARD parent action — read or write the host system clipboard.
3
+ *
4
+ * Routes through `driverReadClipboard` / `driverWriteClipboard`, which
5
+ * select the per-OS tool (pbcopy/pbpaste, wl-copy/wl-paste, xclip,
6
+ * PowerShell Set-Clipboard / Get-Clipboard).
7
+ *
8
+ * Subactions: `read`, `write`. The plugin index promotes them to virtual
9
+ * top-level actions (`CLIPBOARD_READ`, `CLIPBOARD_WRITE`) so the planner
10
+ * can pick a specific verb directly from the action catalogue.
11
+ */
12
+ import type { Action } from "@elizaos/core";
13
+ declare const CLIPBOARD_ACTIONS: readonly ["read", "write"];
14
+ export type ClipboardActionType = (typeof CLIPBOARD_ACTIONS)[number];
15
+ export interface ClipboardActionParams {
16
+ action: ClipboardActionType;
17
+ /** Text payload for `write`. Ignored for `read`. */
18
+ text?: string;
19
+ }
20
+ export declare const clipboardAction: Action;
21
+ export {};
22
+ //# sourceMappingURL=clipboard.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"clipboard.d.ts","sourceRoot":"","sources":["../../src/actions/clipboard.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,KAAK,EACV,MAAM,EAOP,MAAM,eAAe,CAAC;AASvB,QAAA,MAAM,iBAAiB,4BAA6B,CAAC;AACrD,MAAM,MAAM,mBAAmB,GAAG,CAAC,OAAO,iBAAiB,CAAC,CAAC,MAAM,CAAC,CAAC;AAErE,MAAM,WAAW,qBAAqB;IACpC,MAAM,EAAE,mBAAmB,CAAC;IAC5B,oDAAoD;IACpD,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AA+ED,eAAO,MAAM,eAAe,EAAE,MAgI7B,CAAC"}
@@ -0,0 +1,33 @@
1
+ import type { ActionResult, HandlerOptions, Memory } from "@elizaos/core";
2
+ export interface NativeComputerUseResult {
3
+ success: boolean;
4
+ message?: string;
5
+ error?: string;
6
+ permissionDenied?: boolean;
7
+ permissionType?: string;
8
+ approvalRequired?: boolean;
9
+ approvalId?: string;
10
+ screenshot?: string;
11
+ frontendScreenshot?: string;
12
+ }
13
+ export declare function resolveActionParams<T>(message: Memory, options?: HandlerOptions): T;
14
+ export declare function buildScreenshotAttachment(args: {
15
+ idPrefix: string;
16
+ screenshot: string;
17
+ title: string;
18
+ description: string;
19
+ }): {
20
+ id: string;
21
+ url: string;
22
+ title: string;
23
+ source: string;
24
+ description: string;
25
+ contentType: "image";
26
+ };
27
+ export declare function toComputerUseActionResult<T extends NativeComputerUseResult>({ action, result, text, suppressClipboard, }: {
28
+ action: string;
29
+ result: T;
30
+ text: string;
31
+ suppressClipboard?: boolean;
32
+ }): ActionResult;
33
+ //# sourceMappingURL=helpers.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../../src/actions/helpers.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,eAAe,CAAC;AAE1E,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAED,wBAAgB,mBAAmB,CAAC,CAAC,EACnC,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,cAAc,GACvB,CAAC,CAiBH;AAED,wBAAgB,yBAAyB,CAAC,IAAI,EAAE;IAC9C,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;CACrB;;;;;;;EASA;AAqBD,wBAAgB,yBAAyB,CAAC,CAAC,SAAS,uBAAuB,EAAE,EAC3E,MAAM,EACN,MAAM,EACN,IAAI,EACJ,iBAAyB,GAC1B,EAAE;IACD,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,CAAC,CAAC;IACV,IAAI,EAAE,MAAM,CAAC;IACb,iBAAiB,CAAC,EAAE,OAAO,CAAC;CAC7B,GAAG,YAAY,CAYf"}
@@ -0,0 +1,26 @@
1
+ import { type Content, type HandlerCallback } from "@elizaos/core";
2
+ import type { ApprovalSnapshot, PendingApproval } from "../types.js";
3
+ export declare const ACTION_PROGRESS_SOURCE = "action_progress";
4
+ export declare const COMPUTER_USE_APPROVAL_SOURCE = "computeruse_approval";
5
+ export interface StepProgressInput {
6
+ actionName: string;
7
+ step: number;
8
+ kind: string;
9
+ rationale?: string;
10
+ success?: boolean;
11
+ error?: string;
12
+ source?: string;
13
+ }
14
+ export interface ApprovalRelayService {
15
+ getApprovalSnapshot(): ApprovalSnapshot;
16
+ subscribeApprovals(listener: (snapshot: ApprovalSnapshot) => void): () => void;
17
+ }
18
+ export interface ApprovalRelayOptions {
19
+ ownerId?: string;
20
+ }
21
+ export declare function isStreamProgressEnabled(value: unknown): value is true;
22
+ export declare function formatStepProgressText(step: number, kind: string, rationale?: string): string;
23
+ export declare function buildStepProgressContent(input: StepProgressInput): Content;
24
+ export declare function buildApprovalPromptContent(approval: PendingApproval, options?: ApprovalRelayOptions): Content;
25
+ export declare function withApprovalRelay<T>(service: ApprovalRelayService, callback: HandlerCallback | undefined, run: () => Promise<T>, options?: ApprovalRelayOptions): Promise<T>;
26
+ //# sourceMappingURL=progress.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"progress.d.ts","sourceRoot":"","sources":["../../src/actions/progress.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,OAAO,EACZ,KAAK,eAAe,EAGrB,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAErE,eAAO,MAAM,sBAAsB,oBAAoB,CAAC;AACxD,eAAO,MAAM,4BAA4B,yBAAyB,CAAC;AAEnE,MAAM,WAAW,iBAAiB;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,oBAAoB;IACnC,mBAAmB,IAAI,gBAAgB,CAAC;IACxC,kBAAkB,CAChB,QAAQ,EAAE,CAAC,QAAQ,EAAE,gBAAgB,KAAK,IAAI,GAC7C,MAAM,IAAI,CAAC;CACf;AAED,MAAM,WAAW,oBAAoB;IACnC,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,IAAI,CAErE;AAED,wBAAgB,sBAAsB,CACpC,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,MAAM,EACZ,SAAS,CAAC,EAAE,MAAM,GACjB,MAAM,CAGR;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,iBAAiB,GAAG,OAAO,CAoB1E;AAYD,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,eAAe,EACzB,OAAO,GAAE,oBAAyB,GACjC,OAAO,CAoBT;AAED,wBAAsB,iBAAiB,CAAC,CAAC,EACvC,OAAO,EAAE,oBAAoB,EAC7B,QAAQ,EAAE,eAAe,GAAG,SAAS,EACrC,GAAG,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,EACrB,OAAO,GAAE,oBAAyB,GACjC,OAAO,CAAC,CAAC,CAAC,CAwCZ"}
@@ -0,0 +1,113 @@
1
+ /**
2
+ * WS7 — COMPUTER_USE_AGENT action.
3
+ *
4
+ * High-level "give me a goal, I'll click my way there" entry point. The
5
+ * planner emits one of these instead of the lower-level COMPUTER_USE_CLICK
6
+ * etc. when the right action isn't obvious from the prompt.
7
+ *
8
+ * Loop:
9
+ * 1. refresh scene (`agent-turn`)
10
+ * 2. capture per-display PNGs
11
+ * 3. Brain → Cascade → ProposedAction
12
+ * 4. dispatch into ComputerInterface
13
+ * 5. observe (auto-screenshot via the existing service flow happens for
14
+ * ProposedAction.kind=click/etc; explicit captureAllDisplays after
15
+ * every step)
16
+ * 6. repeat until `finish` or `maxSteps`
17
+ *
18
+ * Trajectory events are emitted as structured `logger.info` lines with a
19
+ * `evt: "computeruse.agent.step"` payload, which the trajectory-logger app
20
+ * picks up via standard log capture. When `streamProgress` is enabled, the
21
+ * same step boundary also emits a `HandlerCallback` status to the origin chat.
22
+ * We don't take a hard dependency on the trajectory-logger plugin from here.
23
+ */
24
+ import { type Action, type Content, type IAgentRuntime } from "@elizaos/core";
25
+ import { type AgentMiddleware, type TrajectoryEntry } from "../actor/agent-callbacks.js";
26
+ import { type AgentLoop, type AgentLoopStats } from "../actor/agent-loop.js";
27
+ import type { Brain } from "../actor/brain.js";
28
+ import { type ComputerInterface } from "../actor/computer-interface.js";
29
+ import { type DisplayCapture } from "../platform/capture.js";
30
+ import type { ComputerUseService } from "../services/computer-use-service.js";
31
+ export interface ComputerUseAgentParams {
32
+ goal: string;
33
+ maxSteps?: number;
34
+ /**
35
+ * When true, emit a chat message after each dispatched step so a long-running
36
+ * goal does not leave the origin chat silent for minutes (#8912). The action
37
+ * handler wires this to the runtime HandlerCallback; the loop itself calls
38
+ * per-step progress hooks.
39
+ */
40
+ streamProgress?: boolean;
41
+ /** Wall-clock budget (ms) — the loop aborts before a step that exceeds it. */
42
+ maxDurationMs?: number;
43
+ /**
44
+ * Image-retention window (#9170 M11): keep only the N most-recent steps'
45
+ * screenshots in the bounded history. Off (unbounded) when unset.
46
+ */
47
+ imageRetentionLast?: number;
48
+ }
49
+ /** One per-step progress event, surfaced when `streamProgress` is set. */
50
+ export interface ComputerUseAgentStepProgress {
51
+ goal: string;
52
+ step: number;
53
+ maxSteps: number;
54
+ sceneSummary: string;
55
+ actionKind: string;
56
+ rationale: string;
57
+ rois: number;
58
+ result: {
59
+ success: boolean;
60
+ error?: string;
61
+ };
62
+ }
63
+ interface AgentDeps {
64
+ brain?: Brain;
65
+ /** Pre-built loop override (tests). Supersedes model-string selection. */
66
+ loop?: AgentLoop;
67
+ /** Loop model-string override (tests / explicit selection). */
68
+ loopModel?: string;
69
+ /**
70
+ * Callback middleware override (#9170 M11). When set, replaces the default
71
+ * pipeline (operator-normalizer + trajectory, plus budget/image-retention
72
+ * when configured via params).
73
+ */
74
+ middleware?: AgentMiddleware[];
75
+ /** Clock override (tests) — defaults to `Date.now`. */
76
+ now?: () => number;
77
+ computerInterface?: ComputerInterface;
78
+ captureAll?: () => Promise<DisplayCapture[]>;
79
+ /** Called after each dispatched step when `params.streamProgress` is set. */
80
+ onStepProgress?: (progress: ComputerUseAgentStepProgress) => Promise<void> | void;
81
+ /** Called with compact Content after each dispatched step when enabled. */
82
+ onCompactStepProgress?: (content: Content) => Promise<void> | void;
83
+ }
84
+ export interface ComputerUseAgentReport {
85
+ goal: string;
86
+ steps: Array<{
87
+ step: number;
88
+ sceneSummary: string;
89
+ actionKind: string;
90
+ rationale: string;
91
+ rois: number;
92
+ result: {
93
+ success: boolean;
94
+ error?: string;
95
+ };
96
+ }>;
97
+ finished: boolean;
98
+ reason: "finish" | "max_steps" | "error" | "budget";
99
+ error?: string;
100
+ /** Per-step transcript recorded by the trajectory middleware (#9170 M11). */
101
+ trajectory?: TrajectoryEntry[];
102
+ /** Per-run model-call accounting, when the loop reports it (#9105). */
103
+ modelStats?: AgentLoopStats;
104
+ }
105
+ export declare function formatComputerUseAgentProgress(progress: ComputerUseAgentStepProgress): string;
106
+ /**
107
+ * Run one Brain/Cascade/Dispatch loop. Exported so tests can drive it
108
+ * without exercising the full Action plumbing.
109
+ */
110
+ export declare function runComputerUseAgentLoop(runtime: IAgentRuntime | null, params: ComputerUseAgentParams, service: ComputerUseService, deps?: AgentDeps): Promise<ComputerUseAgentReport>;
111
+ export declare const computerUseAgentAction: Action;
112
+ export {};
113
+ //# sourceMappingURL=use-computer-agent.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"use-computer-agent.d.ts","sourceRoot":"","sources":["../../src/actions/use-computer-agent.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,EACL,KAAK,MAAM,EAEX,KAAK,OAAO,EAGZ,KAAK,aAAa,EAInB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,KAAK,eAAe,EAWpB,KAAK,eAAe,EACrB,MAAM,6BAA6B,CAAC;AACrC,OAAO,EAEL,KAAK,SAAS,EACd,KAAK,cAAc,EAGpB,MAAM,wBAAwB,CAAC;AAChC,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,EACL,KAAK,iBAAiB,EAEvB,MAAM,gCAAgC,CAAC;AAExC,OAAO,EAEL,KAAK,cAAc,EACpB,MAAM,wBAAwB,CAAC;AAGhC,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,qCAAqC,CAAC;AAS9E,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,8EAA8E;IAC9E,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB;;;OAGG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAED,0EAA0E;AAC1E,MAAM,WAAW,4BAA4B;IAC3C,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE;QAAE,OAAO,EAAE,OAAO,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;CAC9C;AAED,UAAU,SAAS;IACjB,KAAK,CAAC,EAAE,KAAK,CAAC;IACd,0EAA0E;IAC1E,IAAI,CAAC,EAAE,SAAS,CAAC;IACjB,+DAA+D;IAC/D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;OAIG;IACH,UAAU,CAAC,EAAE,eAAe,EAAE,CAAC;IAC/B,uDAAuD;IACvD,GAAG,CAAC,EAAE,MAAM,MAAM,CAAC;IACnB,iBAAiB,CAAC,EAAE,iBAAiB,CAAC;IACtC,UAAU,CAAC,EAAE,MAAM,OAAO,CAAC,cAAc,EAAE,CAAC,CAAC;IAC7C,6EAA6E;IAC7E,cAAc,CAAC,EAAE,CACf,QAAQ,EAAE,4BAA4B,KACnC,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;IAC1B,2EAA2E;IAC3E,qBAAqB,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;CACpE;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,KAAK,CAAC;QACX,IAAI,EAAE,MAAM,CAAC;QACb,YAAY,EAAE,MAAM,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC;QACnB,SAAS,EAAE,MAAM,CAAC;QAClB,IAAI,EAAE,MAAM,CAAC;QACb,MAAM,EAAE;YAAE,OAAO,EAAE,OAAO,CAAC;YAAC,KAAK,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC;KAC9C,CAAC,CAAC;IACH,QAAQ,EAAE,OAAO,CAAC;IAClB,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,OAAO,GAAG,QAAQ,CAAC;IACpD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,6EAA6E;IAC7E,UAAU,CAAC,EAAE,eAAe,EAAE,CAAC;IAC/B,uEAAuE;IACvE,UAAU,CAAC,EAAE,cAAc,CAAC;CAC7B;AAED,wBAAgB,8BAA8B,CAC5C,QAAQ,EAAE,4BAA4B,GACrC,MAAM,CAMR;AAmBD;;;GAGG;AACH,wBAAsB,uBAAuB,CAC3C,OAAO,EAAE,aAAa,GAAG,IAAI,EAC7B,MAAM,EAAE,sBAAsB,EAC9B,OAAO,EAAE,kBAAkB,EAC3B,IAAI,GAAE,SAAc,GACnB,OAAO,CAAC,sBAAsB,CAAC,CAuLjC;AAqFD,eAAO,MAAM,sBAAsB,EAAE,MA+HpC,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { Action } from "@elizaos/core";
2
+ export declare const useComputerAction: Action;
3
+ //# sourceMappingURL=use-computer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"use-computer.d.ts","sourceRoot":"","sources":["../../src/actions/use-computer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,MAAM,EAOP,MAAM,eAAe,CAAC;AAgLvB,eAAO,MAAM,iBAAiB,EAAE,MA6W/B,CAAC"}
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Pure handler for the WINDOW parent action. Takes the live
3
+ * `ComputerUseService` and resolved params, executes the underlying
4
+ * window-management call, and returns an `ActionResult`.
5
+ */
6
+ import type { ActionResult, HandlerCallback } from "@elizaos/core";
7
+ import type { ComputerUseService } from "../services/computer-use-service.js";
8
+ import type { WindowActionParams } from "../types.js";
9
+ import { type ApprovalRelayOptions } from "./progress.js";
10
+ export declare function handleWindowOp(service: ComputerUseService, params: WindowActionParams, callback?: HandlerCallback, approvalOptions?: ApprovalRelayOptions): Promise<ActionResult>;
11
+ //# sourceMappingURL=window-handlers.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"window-handlers.d.ts","sourceRoot":"","sources":["../../src/actions/window-handlers.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACnE,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,qCAAqC,CAAC;AAC9E,OAAO,KAAK,EAAE,kBAAkB,EAAsB,MAAM,aAAa,CAAC;AAE1E,OAAO,EAAE,KAAK,oBAAoB,EAAqB,MAAM,eAAe,CAAC;AA0B7E,wBAAsB,cAAc,CAClC,OAAO,EAAE,kBAAkB,EAC3B,MAAM,EAAE,kBAAkB,EAC1B,QAAQ,CAAC,EAAE,eAAe,EAC1B,eAAe,GAAE,oBAAyB,GACzC,OAAO,CAAC,YAAY,CAAC,CAwBvB"}
@@ -0,0 +1,11 @@
1
+ /**
2
+ * WINDOW parent action — manages local desktop windows (list / focus /
3
+ * switch / arrange / move / minimize / maximize / restore / close).
4
+ *
5
+ * Pointer and keyboard primitives live on COMPUTER_USE. File and shell
6
+ * operations live on the FILE and SHELL actions in their own plugins —
7
+ * this plugin no longer exposes them.
8
+ */
9
+ import type { Action } from "@elizaos/core";
10
+ export declare const windowAction: Action;
11
+ //# sourceMappingURL=window.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"window.d.ts","sourceRoot":"","sources":["../../src/actions/window.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EACV,MAAM,EAOP,MAAM,eAAe,CAAC;AAgEvB,eAAO,MAAM,YAAY,EAAE,MAkK1B,CAAC"}
@@ -0,0 +1,84 @@
1
+ /**
2
+ * WS7 — Actor (optional fine-grained grounding).
3
+ *
4
+ * The Actor is responsible for converting a Brain-issued reference
5
+ * ("click the Save button I see in this crop") into concrete display-local
6
+ * pixel coords.
7
+ *
8
+ * Primary path — deterministic (no model):
9
+ * `OcrCoordinateGroundingActor` resolves a `ref: "t<displayId>-<seq>"`
10
+ * (OCR id) or `ref: "a<displayId>-<seq>"` (AX id) directly from the
11
+ * Scene. Click point is bbox-center. No VLM call, fully reproducible,
12
+ * what 99% of the cascade should use.
13
+ *
14
+ * Optional secondary path — VLM:
15
+ * `OsAtlasProActor` is a typed adapter for an operator-provided model-server
16
+ * endpoint (e.g. an OS-Atlas-Pro vLLM service). Unless a deployment
17
+ * registers that endpoint, the cascade uses the OCR/AX grounding above.
18
+ *
19
+ * Register the active Actor on the cascade via `setActor(actor)` (see
20
+ * `cascade.ts`). If none is registered, the cascade uses the OCR/AX actor
21
+ * automatically.
22
+ */
23
+ import type { Scene } from "../scene/scene-types.js";
24
+ import type { GroundingResult, ReferenceTarget } from "./types.js";
25
+ export interface ActorGroundArgs {
26
+ /** Display the Brain wants to act on. */
27
+ displayId: number;
28
+ /**
29
+ * Cropped image of the ROI at native resolution (PNG bytes). May be a
30
+ * empty Buffer when the deterministic grounding doesn't need image bytes.
31
+ */
32
+ croppedImage: Buffer;
33
+ /** Hint from the Brain: "the Save button in the dialog footer". */
34
+ hint: string;
35
+ /** Optional reference from `BrainProposedAction.ref`. */
36
+ ref?: string;
37
+ }
38
+ export interface Actor {
39
+ readonly name: string;
40
+ ground(args: ActorGroundArgs): Promise<GroundingResult>;
41
+ }
42
+ export declare class OcrCoordinateGroundingActor implements Actor {
43
+ private readonly getScene;
44
+ readonly name = "ocr-ax-grounding";
45
+ constructor(getScene: () => Scene | null);
46
+ ground(args: ActorGroundArgs): Promise<GroundingResult>;
47
+ }
48
+ /**
49
+ * Look up a scene element by stable id, OR by case-insensitive label match
50
+ * when an id is absent. Used by both the deterministic actor and the cascade
51
+ * dispatcher to validate Brain output.
52
+ */
53
+ export declare function resolveReference(scene: Scene, ref: string | undefined, hint: string, preferredDisplay: number): ReferenceTarget | null;
54
+ export interface OsAtlasProActorOptions {
55
+ /** Endpoint of the model server, e.g. `http://localhost:8000/v1`. */
56
+ endpoint: string;
57
+ /** Optional auth header. */
58
+ apiKey?: string;
59
+ /** Model identifier on the server. */
60
+ model?: string;
61
+ /** Override the HTTP fetch (mostly for tests). */
62
+ fetcher?: (input: string, init: {
63
+ body: string;
64
+ headers: Record<string, string>;
65
+ }) => Promise<{
66
+ ok: boolean;
67
+ status: number;
68
+ text: () => Promise<string>;
69
+ }>;
70
+ }
71
+ /**
72
+ * Adapter for a server-side OS-Atlas-Pro (or compatible) grounding model.
73
+ * Not wired into the cascade by default. The contract: POST a JSON payload
74
+ * with `{ image: base64, hint }`, expect `{ x, y, confidence }` in image
75
+ * coordinates of the crop. The cascade is responsible for converting those
76
+ * crop-local coords back to display-local before dispatch.
77
+ */
78
+ export declare class OsAtlasProActor implements Actor {
79
+ private readonly opts;
80
+ readonly name = "osatlas-pro";
81
+ constructor(opts: OsAtlasProActorOptions);
82
+ ground(args: ActorGroundArgs): Promise<GroundingResult>;
83
+ }
84
+ //# sourceMappingURL=actor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"actor.d.ts","sourceRoot":"","sources":["../../src/actor/actor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH,OAAO,KAAK,EAAE,KAAK,EAA4B,MAAM,yBAAyB,CAAC;AAC/E,OAAO,KAAK,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAEnE,MAAM,WAAW,eAAe;IAC9B,yCAAyC;IACzC,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,YAAY,EAAE,MAAM,CAAC;IACrB,mEAAmE;IACnE,IAAI,EAAE,MAAM,CAAC;IACb,yDAAyD;IACzD,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,KAAK;IACpB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,IAAI,EAAE,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;CACzD;AAID,qBAAa,2BAA4B,YAAW,KAAK;IAG3C,OAAO,CAAC,QAAQ,CAAC,QAAQ;IAFrC,QAAQ,CAAC,IAAI,sBAAsB;gBAEN,QAAQ,EAAE,MAAM,KAAK,GAAG,IAAI;IAEnD,MAAM,CAAC,IAAI,EAAE,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC;CAsB9D;AAED;;;;GAIG;AACH,wBAAgB,gBAAgB,CAC9B,KAAK,EAAE,KAAK,EACZ,GAAG,EAAE,MAAM,GAAG,SAAS,EACvB,IAAI,EAAE,MAAM,EACZ,gBAAgB,EAAE,MAAM,GACvB,eAAe,GAAG,IAAI,CA+BxB;AA0CD,MAAM,WAAW,sBAAsB;IACrC,qEAAqE;IACrE,QAAQ,EAAE,MAAM,CAAC;IACjB,4BAA4B;IAC5B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,sCAAsC;IACtC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,kDAAkD;IAClD,OAAO,CAAC,EAAE,CACR,KAAK,EAAE,MAAM,EACb,IAAI,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;KAAE,KACpD,OAAO,CAAC;QAAE,EAAE,EAAE,OAAO,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,OAAO,CAAC,MAAM,CAAC,CAAA;KAAE,CAAC,CAAC;CAC5E;AAED;;;;;;GAMG;AACH,qBAAa,eAAgB,YAAW,KAAK;IAG/B,OAAO,CAAC,QAAQ,CAAC,IAAI;IAFjC,QAAQ,CAAC,IAAI,iBAAiB;gBAED,IAAI,EAAE,sBAAsB;IAQnD,MAAM,CAAC,IAAI,EAAE,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC;CAkD9D"}
@@ -0,0 +1,128 @@
1
+ /**
2
+ * Agent-loop callback middleware (#9170 M11).
3
+ *
4
+ * trycua/cua threads a callback pipeline through its agent loop: budget caps,
5
+ * image-retention (keep only the N most-recent screenshots in context),
6
+ * operator-normalization (clean up the model's proposed action), and trajectory
7
+ * recording. Each callback is a thin middleware that observes (and sometimes
8
+ * transforms or aborts) the loop without the loop knowing which middlewares are
9
+ * present.
10
+ *
11
+ * This module defines the `AgentMiddleware` hook set and four built-ins. The
12
+ * runner (`use-computer-agent.ts`) fires the hooks at fixed points:
13
+ * onRunStart → [ beforeStep → onCaptures → transformProposed → afterStep ]* →
14
+ * onRunEnd
15
+ *
16
+ * Middlewares are pure-by-default and composable: `runBeforeStep`, etc.,
17
+ * fold the list in order and short-circuit on the first abort.
18
+ */
19
+ import type { DisplayCapture } from "../platform/capture.js";
20
+ import type { CascadeResult, ProposedAction } from "./types.js";
21
+ export interface AgentRunContext {
22
+ goal: string;
23
+ maxSteps: number;
24
+ }
25
+ export interface AgentStepContext {
26
+ step: number;
27
+ maxSteps: number;
28
+ goal: string;
29
+ /** Wall-clock ms since the run started (set by the runner). */
30
+ elapsedMs: number;
31
+ }
32
+ export interface AgentDispatchContext {
33
+ step: number;
34
+ goal: string;
35
+ proposed: CascadeResult;
36
+ dispatchSuccess: boolean;
37
+ error?: string;
38
+ }
39
+ /** Returned by `beforeStep` — abort halts the loop with `reason`. */
40
+ export interface AgentStepDecision {
41
+ abort?: boolean;
42
+ reason?: string;
43
+ }
44
+ export interface AgentRunSummary {
45
+ goal: string;
46
+ steps: number;
47
+ finished: boolean;
48
+ reason: string;
49
+ }
50
+ export interface AgentMiddleware {
51
+ readonly name: string;
52
+ onRunStart?(ctx: AgentRunContext): void | Promise<void>;
53
+ /** Inspect/abort before a step runs (budget caps live here). */
54
+ beforeStep?(ctx: AgentStepContext): AgentStepDecision | Promise<AgentStepDecision>;
55
+ /** Observe the captured frames (image-retention bookkeeping). */
56
+ onCaptures?(captures: Map<number, DisplayCapture>, ctx: AgentStepContext): void | Promise<void>;
57
+ /** Transform the planned step before dispatch (operator-normalizer). */
58
+ transformProposed?(proposed: CascadeResult, ctx: AgentStepContext): CascadeResult | Promise<CascadeResult>;
59
+ /** Observe a dispatched step (trajectory recording). */
60
+ afterStep?(ctx: AgentDispatchContext): void | Promise<void>;
61
+ onRunEnd?(summary: AgentRunSummary): void | Promise<void>;
62
+ }
63
+ export declare function runOnRunStart(middlewares: readonly AgentMiddleware[], ctx: AgentRunContext): Promise<void>;
64
+ /** Fold `beforeStep`; the FIRST abort wins (and names the middleware). */
65
+ export declare function runBeforeStep(middlewares: readonly AgentMiddleware[], ctx: AgentStepContext): Promise<AgentStepDecision>;
66
+ export declare function runOnCaptures(middlewares: readonly AgentMiddleware[], captures: Map<number, DisplayCapture>, ctx: AgentStepContext): Promise<void>;
67
+ /** Fold `transformProposed` left-to-right; each sees the prior's output. */
68
+ export declare function runTransformProposed(middlewares: readonly AgentMiddleware[], proposed: CascadeResult, ctx: AgentStepContext): Promise<CascadeResult>;
69
+ export declare function runAfterStep(middlewares: readonly AgentMiddleware[], ctx: AgentDispatchContext): Promise<void>;
70
+ export declare function runOnRunEnd(middlewares: readonly AgentMiddleware[], summary: AgentRunSummary): Promise<void>;
71
+ export interface BudgetCapOptions {
72
+ /** Abort once this many steps have STARTED (independent of the loop's own
73
+ * maxSteps; use to cap below it, e.g. cost control). */
74
+ maxSteps?: number;
75
+ /** Abort once wall-clock elapsed exceeds this many ms. */
76
+ maxDurationMs?: number;
77
+ }
78
+ /**
79
+ * Halts the loop when a step or time budget is exhausted. Caps below the loop's
80
+ * own `maxSteps`, and adds a wall-clock cap the loop has no notion of.
81
+ */
82
+ export declare function createBudgetCapMiddleware(options: BudgetCapOptions): AgentMiddleware;
83
+ export interface ImageRetentionMiddleware extends AgentMiddleware {
84
+ /** The display-keyed captures retained from the most recent steps. */
85
+ retained(): Array<{
86
+ step: number;
87
+ displayIds: number[];
88
+ }>;
89
+ }
90
+ /**
91
+ * Bounds the screenshot history to the `keepLast` most-recent steps, mirroring
92
+ * cua's image-retention (older frames fall out of context to cap token cost).
93
+ * The runner forwards the per-step captures; this middleware keeps the bounded
94
+ * window that a model-history consumer should send.
95
+ */
96
+ export declare function createImageRetentionMiddleware(options: {
97
+ keepLast: number;
98
+ }): ImageRetentionMiddleware;
99
+ /**
100
+ * Normalize the model's proposed action into the canonical dispatch shape:
101
+ * integer coordinates, trimmed type text, deduped/lowercased hotkey keys. Pure
102
+ * and idempotent — re-normalizing already-clean input is a no-op.
103
+ */
104
+ export declare function normalizeProposedAction(action: ProposedAction): ProposedAction;
105
+ /** Operator-normalizer middleware — cleans `proposed.proposed` before dispatch. */
106
+ export declare function createOperatorNormalizerMiddleware(): AgentMiddleware;
107
+ export interface TrajectoryEntry {
108
+ step: number;
109
+ goal: string;
110
+ actionKind: string;
111
+ rationale: string;
112
+ success: boolean;
113
+ error?: string;
114
+ }
115
+ export interface TrajectoryMiddleware extends AgentMiddleware {
116
+ /** The recorded trajectory so far. */
117
+ entries(): TrajectoryEntry[];
118
+ }
119
+ /**
120
+ * Records one entry per dispatched step. Independent of the existing
121
+ * `logger.info` trajectory events — this gives an in-memory transcript the
122
+ * caller can attach to the run report or persist.
123
+ */
124
+ export declare function createTrajectoryMiddleware(options?: {
125
+ /** Also emit a debug log line per step. Default false. */
126
+ log?: boolean;
127
+ }): TrajectoryMiddleware;
128
+ //# sourceMappingURL=agent-callbacks.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"agent-callbacks.d.ts","sourceRoot":"","sources":["../../src/actor/agent-callbacks.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAGH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AAC7D,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAEhE,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,+DAA+D;IAC/D,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,oBAAoB;IACnC,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,aAAa,CAAC;IACxB,eAAe,EAAE,OAAO,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,qEAAqE;AACrE,MAAM,WAAW,iBAAiB;IAChC,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,OAAO,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,UAAU,CAAC,CAAC,GAAG,EAAE,eAAe,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACxD,gEAAgE;IAChE,UAAU,CAAC,CACT,GAAG,EAAE,gBAAgB,GACpB,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAAC;IAClD,iEAAiE;IACjE,UAAU,CAAC,CACT,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,cAAc,CAAC,EACrC,GAAG,EAAE,gBAAgB,GACpB,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACxB,wEAAwE;IACxE,iBAAiB,CAAC,CAChB,QAAQ,EAAE,aAAa,EACvB,GAAG,EAAE,gBAAgB,GACpB,aAAa,GAAG,OAAO,CAAC,aAAa,CAAC,CAAC;IAC1C,wDAAwD;IACxD,SAAS,CAAC,CAAC,GAAG,EAAE,oBAAoB,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC5D,QAAQ,CAAC,CAAC,OAAO,EAAE,eAAe,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CAC3D;AAID,wBAAsB,aAAa,CACjC,WAAW,EAAE,SAAS,eAAe,EAAE,EACvC,GAAG,EAAE,eAAe,GACnB,OAAO,CAAC,IAAI,CAAC,CAEf;AAED,0EAA0E;AAC1E,wBAAsB,aAAa,CACjC,WAAW,EAAE,SAAS,eAAe,EAAE,EACvC,GAAG,EAAE,gBAAgB,GACpB,OAAO,CAAC,iBAAiB,CAAC,CAW5B;AAED,wBAAsB,aAAa,CACjC,WAAW,EAAE,SAAS,eAAe,EAAE,EACvC,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,cAAc,CAAC,EACrC,GAAG,EAAE,gBAAgB,GACpB,OAAO,CAAC,IAAI,CAAC,CAEf;AAED,4EAA4E;AAC5E,wBAAsB,oBAAoB,CACxC,WAAW,EAAE,SAAS,eAAe,EAAE,EACvC,QAAQ,EAAE,aAAa,EACvB,GAAG,EAAE,gBAAgB,GACpB,OAAO,CAAC,aAAa,CAAC,CAMxB;AAED,wBAAsB,YAAY,CAChC,WAAW,EAAE,SAAS,eAAe,EAAE,EACvC,GAAG,EAAE,oBAAoB,GACxB,OAAO,CAAC,IAAI,CAAC,CAEf;AAED,wBAAsB,WAAW,CAC/B,WAAW,EAAE,SAAS,eAAe,EAAE,EACvC,OAAO,EAAE,eAAe,GACvB,OAAO,CAAC,IAAI,CAAC,CAEf;AAID,MAAM,WAAW,gBAAgB;IAC/B;4DACwD;IACxD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,0DAA0D;IAC1D,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED;;;GAGG;AACH,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,gBAAgB,GACxB,eAAe,CAsBjB;AAID,MAAM,WAAW,wBAAyB,SAAQ,eAAe;IAC/D,sEAAsE;IACtE,QAAQ,IAAI,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC,CAAC;CAC3D;AAED;;;;;GAKG;AACH,wBAAgB,8BAA8B,CAAC,OAAO,EAAE;IACtD,QAAQ,EAAE,MAAM,CAAC;CAClB,GAAG,wBAAwB,CAgB3B;AASD;;;;GAIG;AACH,wBAAgB,uBAAuB,CACrC,MAAM,EAAE,cAAc,GACrB,cAAc,CAiBhB;AAED,mFAAmF;AACnF,wBAAgB,kCAAkC,IAAI,eAAe,CAUpE;AAID,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,oBAAqB,SAAQ,eAAe;IAC3D,sCAAsC;IACtC,OAAO,IAAI,eAAe,EAAE,CAAC;CAC9B;AAED;;;;GAIG;AACH,wBAAgB,0BAA0B,CAAC,OAAO,CAAC,EAAE;IACnD,0DAA0D;IAC1D,GAAG,CAAC,EAAE,OAAO,CAAC;CACf,GAAG,oBAAoB,CAwBvB"}
@@ -0,0 +1,134 @@
1
+ /**
2
+ * Agent-loop registry (#9170 M10).
3
+ *
4
+ * trycua/cua selects an agent *loop* from a model string: an `anthropic/...`
5
+ * model routes to the Claude computer-use loop, `openai/computer-use-preview`
6
+ * routes to the OpenAI operator loop, an OmniParser/grounder string routes to a
7
+ * local set-of-marks loop, etc. Each loop implements the same two-call seam —
8
+ * `predict_step` (observe + plan the next action) and `predict_click` (ground a
9
+ * target to a coordinate) — so the runner is decoupled from *how* a step is
10
+ * produced.
11
+ *
12
+ * elizaOS shipped a single hardcoded Brain→Cascade (ScreenSeekeR). This module
13
+ * replaces that hardcoding with a registry:
14
+ * - `AgentLoop` — the `predictStep` / `predictClick` seam.
15
+ * - `registerAgentLoop` — register a loop keyed by a model-string matcher.
16
+ * - `createAgentLoop(modelString, deps)` — pick the highest-priority matching
17
+ * loop and instantiate it.
18
+ *
19
+ * The built-in `local-grounder` loop wraps the existing Brain→Cascade and
20
+ * exposes the M5 grounding cache through `predictClick`. Anthropic / OpenAI
21
+ * computer-use loops are *pluggable*: a provider plugin calls
22
+ * `registerAgentLoop` with `matchesModelFamily("anthropic")` (etc.) and its own
23
+ * `predictStep`. With none registered, every model string falls through to the
24
+ * local grounder (which always matches at the lowest priority).
25
+ */
26
+ import type { IAgentRuntime } from "@elizaos/core";
27
+ import type { DisplayCapture } from "../platform/capture.js";
28
+ import type { Scene } from "../scene/scene-types.js";
29
+ import { type Actor } from "./actor.js";
30
+ import { Brain } from "./brain.js";
31
+ import type { CascadeResult, GroundingResult } from "./types.js";
32
+ /** Default loop model-string — the local OCR/AX + actor grounder. */
33
+ export declare const DEFAULT_AGENT_LOOP_MODEL = "local-grounder";
34
+ /** Setting / env key the runner reads to choose a loop. */
35
+ export declare const AGENT_LOOP_SETTING = "COMPUTER_USE_AGENT_LOOP";
36
+ export interface AgentStepInput {
37
+ scene: Scene;
38
+ goal: string;
39
+ captures: Map<number, DisplayCapture>;
40
+ }
41
+ export interface PredictClickInput {
42
+ scene: Scene;
43
+ captures: Map<number, DisplayCapture>;
44
+ targetDisplayId: number;
45
+ /** OCR/AX id to ground (`t<d>-<n>` / `a<d>-<n>`). */
46
+ ref?: string;
47
+ /** Free-form instruction when no ref is available. */
48
+ instruction?: string;
49
+ }
50
+ /**
51
+ * The two-call seam every loop implements. `predictStep` plans the next
52
+ * concrete action; `predictClick` grounds a target to a coordinate (used by
53
+ * loops that plan elsewhere but reuse our grounding, and by callers that want
54
+ * grounding without a full step).
55
+ */
56
+ /**
57
+ * Per-run model-call accounting (#9105). `invocations` counts the token-bearing
58
+ * model calls a loop actually issued; `cacheHits` counts calls served without a
59
+ * model round-trip. Reported once per run as `evt:"computeruse.agent.tokens"`.
60
+ */
61
+ export interface AgentLoopStats {
62
+ /** Token-bearing model calls actually issued during the run. */
63
+ invocations: number;
64
+ /** Calls served from cache (no model call, no tokens). */
65
+ cacheHits: number;
66
+ /** Model calls issued with no screenshot attached (#9105). */
67
+ imagelessCalls: number;
68
+ /** Estimated image tokens not sent because of imageless calls (#9105). */
69
+ estImageTokensSaved: number;
70
+ }
71
+ export interface AgentLoop {
72
+ readonly name: string;
73
+ predictStep(input: AgentStepInput): Promise<CascadeResult>;
74
+ predictClick(input: PredictClickInput): Promise<GroundingResult | null>;
75
+ /** Per-run model-call accounting, when the loop tracks it (#9105). */
76
+ getStats?(): AgentLoopStats;
77
+ }
78
+ export interface AgentLoopDeps {
79
+ runtime: IAgentRuntime | null;
80
+ /** Latest-scene accessor for the default actor. */
81
+ getScene: () => Scene | null;
82
+ /** Brain override (mostly tests). */
83
+ brain?: Brain;
84
+ /** Actor override (mostly tests). */
85
+ actor?: Actor | null;
86
+ }
87
+ export interface AgentLoopRegistration {
88
+ /** Stable id for telemetry + explicit selection. */
89
+ readonly name: string;
90
+ /** True when this loop handles `modelString`. */
91
+ matches: (modelString: string) => boolean;
92
+ /** Instantiate the loop for a run. */
93
+ create: (deps: AgentLoopDeps) => AgentLoop;
94
+ /** Higher wins when multiple registrations match. Default 0. */
95
+ priority?: number;
96
+ }
97
+ /**
98
+ * Wraps the existing ScreenSeekeR (Brain → Cascade). `predictStep` is the full
99
+ * observe→plan→ground cascade; `predictClick` calls the cascade's grounding-only
100
+ * path so the M5 per-Scene grounding cache is shared across both.
101
+ */
102
+ export declare class LocalGrounderLoop implements AgentLoop {
103
+ readonly name = "local-grounder";
104
+ private readonly cascade;
105
+ private readonly brain;
106
+ constructor(deps: AgentLoopDeps);
107
+ predictStep(input: AgentStepInput): Promise<CascadeResult>;
108
+ predictClick(input: PredictClickInput): Promise<GroundingResult | null>;
109
+ /** Grounding cache hit/miss snapshot (delegates to the wrapped cascade). */
110
+ getGroundStats(): import("./cascade.js").CascadeGroundStats;
111
+ /** Model-call accounting from the wrapped Brain (#9105). */
112
+ getStats(): AgentLoopStats;
113
+ }
114
+ /**
115
+ * A matcher for a provider family — `anthropic`, `openai`, `google`, … A
116
+ * pluggable loop registers with `matches: matchesModelFamily("anthropic")` so a
117
+ * model string like `anthropic/claude-...` or `claude-3-7-sonnet` routes to it.
118
+ */
119
+ export declare function matchesModelFamily(family: string): (modelString: string) => boolean;
120
+ /** Register (or replace, by name) an agent-loop. */
121
+ export declare function registerAgentLoop(registration: AgentLoopRegistration): void;
122
+ export declare function unregisterAgentLoop(name: string): void;
123
+ export declare function listAgentLoops(): readonly AgentLoopRegistration[];
124
+ /**
125
+ * Pick the registration for a model string: the highest-priority one whose
126
+ * `matches` returns true. The local grounder's match-anything floor guarantees
127
+ * a result, so this never throws.
128
+ */
129
+ export declare function selectAgentLoopRegistration(modelString: string): AgentLoopRegistration;
130
+ /** Resolve + instantiate the loop for a model string. */
131
+ export declare function createAgentLoop(modelString: string, deps: AgentLoopDeps): AgentLoop;
132
+ /** Test helper — restore the registry to just the built-in local grounder. */
133
+ export declare function _resetAgentLoopsForTests(): void;
134
+ //# sourceMappingURL=agent-loop.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"agent-loop.d.ts","sourceRoot":"","sources":["../../src/actor/agent-loop.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AACnD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AAC7D,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,yBAAyB,CAAC;AACrD,OAAO,EAAE,KAAK,KAAK,EAA+B,MAAM,YAAY,CAAC;AACrE,OAAO,EAAE,KAAK,EAA2B,MAAM,YAAY,CAAC;AAE5D,OAAO,KAAK,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAEjE,qEAAqE;AACrE,eAAO,MAAM,wBAAwB,mBAAmB,CAAC;AAEzD,2DAA2D;AAC3D,eAAO,MAAM,kBAAkB,4BAA4B,CAAC;AAE5D,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,KAAK,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;CACvC;AAED,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,KAAK,CAAC;IACb,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC;IACtC,eAAe,EAAE,MAAM,CAAC;IACxB,qDAAqD;IACrD,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,sDAAsD;IACtD,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;;;;GAKG;AACH;;;;GAIG;AACH,MAAM,WAAW,cAAc;IAC7B,gEAAgE;IAChE,WAAW,EAAE,MAAM,CAAC;IACpB,0DAA0D;IAC1D,SAAS,EAAE,MAAM,CAAC;IAClB,8DAA8D;IAC9D,cAAc,EAAE,MAAM,CAAC;IACvB,0EAA0E;IAC1E,mBAAmB,EAAE,MAAM,CAAC;CAC7B;AAED,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,KAAK,EAAE,cAAc,GAAG,OAAO,CAAC,aAAa,CAAC,CAAC;IAC3D,YAAY,CAAC,KAAK,EAAE,iBAAiB,GAAG,OAAO,CAAC,eAAe,GAAG,IAAI,CAAC,CAAC;IACxE,sEAAsE;IACtE,QAAQ,CAAC,IAAI,cAAc,CAAC;CAC7B;AAED,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,aAAa,GAAG,IAAI,CAAC;IAC9B,mDAAmD;IACnD,QAAQ,EAAE,MAAM,KAAK,GAAG,IAAI,CAAC;IAC7B,qCAAqC;IACrC,KAAK,CAAC,EAAE,KAAK,CAAC;IACd,qCAAqC;IACrC,KAAK,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC;CACtB;AAED,MAAM,WAAW,qBAAqB;IACpC,oDAAoD;IACpD,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,iDAAiD;IACjD,OAAO,EAAE,CAAC,WAAW,EAAE,MAAM,KAAK,OAAO,CAAC;IAC1C,sCAAsC;IACtC,MAAM,EAAE,CAAC,IAAI,EAAE,aAAa,KAAK,SAAS,CAAC;IAC3C,gEAAgE;IAChE,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAID;;;;GAIG;AACH,qBAAa,iBAAkB,YAAW,SAAS;IACjD,QAAQ,CAAC,IAAI,oBAA4B;IACzC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAU;IAClC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAQ;gBAElB,IAAI,EAAE,aAAa;IAc/B,WAAW,CAAC,KAAK,EAAE,cAAc,GAAG,OAAO,CAAC,aAAa,CAAC;IAIpD,YAAY,CAChB,KAAK,EAAE,iBAAiB,GACvB,OAAO,CAAC,eAAe,GAAG,IAAI,CAAC;IAkBlC,4EAA4E;IAC5E,cAAc;IAId,4DAA4D;IAC5D,QAAQ,IAAI,cAAc;CAG3B;AAID;;;;GAIG;AACH,wBAAgB,kBAAkB,CAChC,MAAM,EAAE,MAAM,GACb,CAAC,WAAW,EAAE,MAAM,KAAK,OAAO,CAYlC;AAMD,oDAAoD;AACpD,wBAAgB,iBAAiB,CAAC,YAAY,EAAE,qBAAqB,GAAG,IAAI,CAE3E;AAED,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAEtD;AAED,wBAAgB,cAAc,IAAI,SAAS,qBAAqB,EAAE,CAIjE;AA0BD;;;;GAIG;AACH,wBAAgB,2BAA2B,CACzC,WAAW,EAAE,MAAM,GAClB,qBAAqB,CAevB;AAED,yDAAyD;AACzD,wBAAgB,eAAe,CAC7B,WAAW,EAAE,MAAM,EACnB,IAAI,EAAE,aAAa,GAClB,SAAS,CAEX;AAED,8EAA8E;AAC9E,wBAAgB,wBAAwB,IAAI,IAAI,CAG/C"}