@elizaos/plugin-computeruse 2.0.0-beta.1 → 2.0.3-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +80 -0
- package/dist/actions/clipboard.d.ts +22 -0
- package/dist/actions/clipboard.d.ts.map +1 -0
- package/dist/actions/progress.d.ts +26 -0
- package/dist/actions/progress.d.ts.map +1 -0
- package/dist/actions/use-computer-agent.d.ts +113 -0
- package/dist/actions/use-computer-agent.d.ts.map +1 -0
- package/dist/actions/use-computer.d.ts.map +1 -1
- package/dist/actions/window-handlers.d.ts +11 -0
- package/dist/actions/window-handlers.d.ts.map +1 -0
- package/dist/actions/window.d.ts +11 -0
- package/dist/actions/window.d.ts.map +1 -0
- package/dist/actor/actor.d.ts +84 -0
- package/dist/actor/actor.d.ts.map +1 -0
- package/dist/actor/agent-callbacks.d.ts +128 -0
- package/dist/actor/agent-callbacks.d.ts.map +1 -0
- package/dist/actor/agent-loop.d.ts +134 -0
- package/dist/actor/agent-loop.d.ts.map +1 -0
- package/dist/actor/aosp-input-actor.d.ts +87 -0
- package/dist/actor/aosp-input-actor.d.ts.map +1 -0
- package/dist/actor/brain.d.ts +195 -0
- package/dist/actor/brain.d.ts.map +1 -0
- package/dist/actor/cascade.d.ts +92 -0
- package/dist/actor/cascade.d.ts.map +1 -0
- package/dist/actor/computer-interface.d.ts +276 -0
- package/dist/actor/computer-interface.d.ts.map +1 -0
- package/dist/actor/dispatch.d.ts +24 -0
- package/dist/actor/dispatch.d.ts.map +1 -0
- package/dist/actor/index.d.ts +12 -0
- package/dist/actor/index.d.ts.map +1 -0
- package/dist/actor/types.d.ts +94 -0
- package/dist/actor/types.d.ts.map +1 -0
- package/dist/approval-manager.d.ts.map +1 -1
- package/dist/index.d.ts +19 -6
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +12001 -5484
- package/dist/index.js.map +59 -25
- package/dist/mcp/index.d.ts +8 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/server.d.ts +42 -0
- package/dist/mcp/server.d.ts.map +1 -0
- package/dist/mcp/tools.d.ts +53 -0
- package/dist/mcp/tools.d.ts.map +1 -0
- package/dist/mobile/android-bridge.d.ts +263 -0
- package/dist/mobile/android-bridge.d.ts.map +1 -0
- package/dist/mobile/android-scene.d.ts +52 -0
- package/dist/mobile/android-scene.d.ts.map +1 -0
- package/dist/mobile/android-trajectory.d.ts +66 -0
- package/dist/mobile/android-trajectory.d.ts.map +1 -0
- package/dist/mobile/index.d.ts +19 -0
- package/dist/mobile/index.d.ts.map +1 -0
- package/dist/mobile/ios-app-intent-registry.d.ts +20 -0
- package/dist/mobile/ios-app-intent-registry.d.ts.map +1 -0
- package/dist/mobile/ios-bridge.d.ts +359 -0
- package/dist/mobile/ios-bridge.d.ts.map +1 -0
- package/dist/mobile/ios-computer-interface.d.ts +160 -0
- package/dist/mobile/ios-computer-interface.d.ts.map +1 -0
- package/dist/mobile/mobile-computer-interface.d.ts +142 -0
- package/dist/mobile/mobile-computer-interface.d.ts.map +1 -0
- package/dist/mobile/mobile-screen-capture.d.ts +64 -0
- package/dist/mobile/mobile-screen-capture.d.ts.map +1 -0
- package/dist/mobile/ocr-provider.d.ts +187 -0
- package/dist/mobile/ocr-provider.d.ts.map +1 -0
- package/dist/mobile/ocr-provider.js +111 -0
- package/dist/mobile/ocr-provider.js.map +10 -0
- package/dist/osworld/action-converter.d.ts +4 -1
- package/dist/osworld/action-converter.d.ts.map +1 -1
- package/dist/osworld/adapter.d.ts +1 -0
- package/dist/osworld/adapter.d.ts.map +1 -1
- package/dist/parity/index.d.ts +9 -0
- package/dist/parity/index.d.ts.map +1 -0
- package/dist/parity/parity-matrix.d.ts +82 -0
- package/dist/parity/parity-matrix.d.ts.map +1 -0
- package/dist/parity/screenspot.d.ts +56 -0
- package/dist/parity/screenspot.d.ts.map +1 -0
- package/dist/platform/a11y.d.ts +29 -1
- package/dist/platform/a11y.d.ts.map +1 -1
- package/dist/platform/browser.d.ts +1 -1
- package/dist/platform/browser.d.ts.map +1 -1
- package/dist/platform/capabilities.d.ts +23 -0
- package/dist/platform/capabilities.d.ts.map +1 -1
- package/dist/platform/capture.d.ts +65 -0
- package/dist/platform/capture.d.ts.map +1 -0
- package/dist/platform/clipboard.d.ts +24 -0
- package/dist/platform/clipboard.d.ts.map +1 -0
- package/dist/platform/coords.d.ts +73 -0
- package/dist/platform/coords.d.ts.map +1 -0
- package/dist/platform/desktop.d.ts +23 -0
- package/dist/platform/desktop.d.ts.map +1 -1
- package/dist/platform/displays.d.ts +97 -0
- package/dist/platform/displays.d.ts.map +1 -0
- package/dist/platform/driver.d.ts +22 -0
- package/dist/platform/driver.d.ts.map +1 -1
- package/dist/platform/file-ops.d.ts +17 -0
- package/dist/platform/file-ops.d.ts.map +1 -1
- package/dist/platform/helpers.d.ts +2 -3
- package/dist/platform/helpers.d.ts.map +1 -1
- package/dist/platform/launch.d.ts +54 -0
- package/dist/platform/launch.d.ts.map +1 -0
- package/dist/platform/normalized-coords.d.ts +46 -0
- package/dist/platform/normalized-coords.d.ts.map +1 -0
- package/dist/platform/nut-driver.d.ts +67 -0
- package/dist/platform/nut-driver.d.ts.map +1 -1
- package/dist/platform/permissions.d.ts +12 -0
- package/dist/platform/permissions.d.ts.map +1 -1
- package/dist/platform/process-list.d.ts +32 -0
- package/dist/platform/process-list.d.ts.map +1 -0
- package/dist/platform/ps-host.d.ts +77 -0
- package/dist/platform/ps-host.d.ts.map +1 -0
- package/dist/platform/screenshot-errors.d.ts +54 -0
- package/dist/platform/screenshot-errors.d.ts.map +1 -0
- package/dist/platform/screenshot-quality.d.ts +11 -0
- package/dist/platform/screenshot-quality.d.ts.map +1 -0
- package/dist/platform/screenshot.d.ts.map +1 -1
- package/dist/platform/security.d.ts +8 -0
- package/dist/platform/security.d.ts.map +1 -1
- package/dist/platform/wayland-portal.d.ts +25 -0
- package/dist/platform/wayland-portal.d.ts.map +1 -0
- package/dist/platform/windows-list.d.ts +43 -1
- package/dist/platform/windows-list.d.ts.map +1 -1
- package/dist/providers/computer-state.d.ts.map +1 -1
- package/dist/providers/scene.d.ts +21 -0
- package/dist/providers/scene.d.ts.map +1 -0
- package/dist/register-routes.js +11715 -4990
- package/dist/register-routes.js.map +61 -24
- package/dist/routes/computer-use-compat-routes.d.ts +1 -1
- package/dist/routes/computer-use-compat-routes.d.ts.map +1 -1
- package/dist/sandbox/docker-backend.d.ts +69 -0
- package/dist/sandbox/docker-backend.d.ts.map +1 -0
- package/dist/sandbox/index.d.ts +62 -0
- package/dist/sandbox/index.d.ts.map +1 -0
- package/dist/sandbox/qemu-backend.d.ts +48 -0
- package/dist/sandbox/qemu-backend.d.ts.map +1 -0
- package/dist/sandbox/remote-guest.d.ts +72 -0
- package/dist/sandbox/remote-guest.d.ts.map +1 -0
- package/dist/sandbox/sandbox-driver.d.ts +41 -0
- package/dist/sandbox/sandbox-driver.d.ts.map +1 -0
- package/dist/sandbox/surface-types.d.ts +17 -0
- package/dist/sandbox/surface-types.d.ts.map +1 -0
- package/dist/sandbox/types.d.ts +138 -0
- package/dist/sandbox/types.d.ts.map +1 -0
- package/dist/sandbox/wsb-backend.d.ts +48 -0
- package/dist/sandbox/wsb-backend.d.ts.map +1 -0
- package/dist/scene/a11y-provider.d.ts +83 -0
- package/dist/scene/a11y-provider.d.ts.map +1 -0
- package/dist/scene/apps.d.ts +39 -0
- package/dist/scene/apps.d.ts.map +1 -0
- package/dist/scene/dhash.d.ts +105 -0
- package/dist/scene/dhash.d.ts.map +1 -0
- package/dist/scene/ocr-adapter.d.ts +64 -0
- package/dist/scene/ocr-adapter.d.ts.map +1 -0
- package/dist/scene/scene-builder.d.ts +107 -0
- package/dist/scene/scene-builder.d.ts.map +1 -0
- package/dist/scene/scene-types.d.ts +70 -0
- package/dist/scene/scene-types.d.ts.map +1 -0
- package/dist/scene/screen-state.d.ts +105 -0
- package/dist/scene/screen-state.d.ts.map +1 -0
- package/dist/scene/serialize.d.ts +28 -0
- package/dist/scene/serialize.d.ts.map +1 -0
- package/dist/security/browser-script-policy.d.ts +9 -0
- package/dist/security/browser-script-policy.d.ts.map +1 -0
- package/dist/services/computer-use-service.d.ts +78 -2
- package/dist/services/computer-use-service.d.ts.map +1 -1
- package/dist/services/index.d.ts +7 -0
- package/dist/services/index.d.ts.map +1 -0
- package/dist/services/vision-context-provider.d.ts +32 -0
- package/dist/services/vision-context-provider.d.ts.map +1 -0
- package/dist/types.d.ts +115 -5
- package/dist/types.d.ts.map +1 -1
- package/package.json +47 -10
- package/registry-entry.json +74 -0
- package/dist/actions/desktop-handlers.d.ts +0 -20
- package/dist/actions/desktop-handlers.d.ts.map +0 -1
- package/dist/actions/desktop.d.ts +0 -11
- package/dist/actions/desktop.d.ts.map +0 -1
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Shaw Walters and elizaOS Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# `@elizaos/plugin-computeruse`
|
|
2
|
+
|
|
3
|
+
Desktop automation plugin for elizaOS agents — screenshots, mouse /
|
|
4
|
+
keyboard control, browser CDP automation, window management, clipboard,
|
|
5
|
+
and the OCR provider registry that other plugins contribute to.
|
|
6
|
+
|
|
7
|
+
Ported from
|
|
8
|
+
[`coasty-ai/open-computer-use`](https://github.com/coasty-ai/open-computer-use)
|
|
9
|
+
(Apache 2.0).
|
|
10
|
+
|
|
11
|
+
## Boundary with `@elizaos/plugin-vision`
|
|
12
|
+
|
|
13
|
+
This plugin owns the OS surfaces:
|
|
14
|
+
|
|
15
|
+
- screen / display capture (`src/platform/capture.ts`,
|
|
16
|
+
`src/platform/displays.ts`,
|
|
17
|
+
`ComputerUseService.captureScreen()`),
|
|
18
|
+
- input + windows + clipboard + accessibility,
|
|
19
|
+
- the OCR provider registries — `OcrProvider` (line-level) and
|
|
20
|
+
`CoordOcrProvider` (hierarchical with absolute coords), defined in
|
|
21
|
+
`src/mobile/ocr-provider.ts`.
|
|
22
|
+
|
|
23
|
+
`@elizaos/plugin-vision` owns the camera pipeline, scene description
|
|
24
|
+
via `runtime.useModel(IMAGE_DESCRIPTION)`, the screen tiler, the
|
|
25
|
+
detector pipeline (faces / people / objects), and the OCR
|
|
26
|
+
implementations themselves. plugin-vision *consumes* capture from this
|
|
27
|
+
plugin via `runtime.getService("computeruse")` and *contributes* the
|
|
28
|
+
hierarchical OCR adapter into this plugin's `registerCoordOcrProvider`
|
|
29
|
+
seam at boot.
|
|
30
|
+
|
|
31
|
+
Both seams are runtime feature-detected — neither package depends on
|
|
32
|
+
the other.
|
|
33
|
+
|
|
34
|
+
## Enabling
|
|
35
|
+
|
|
36
|
+
- Config: `features.computeruse: true`
|
|
37
|
+
- Env: `COMPUTER_USE_ENABLED=1`
|
|
38
|
+
|
|
39
|
+
## Platform requirements
|
|
40
|
+
|
|
41
|
+
| OS | Capture | Input |
|
|
42
|
+
|----|---------|-------|
|
|
43
|
+
| macOS | `screencapture` (built-in) | `cliclick` (`brew install cliclick`), AppleScript |
|
|
44
|
+
| Linux | `import` (ImageMagick) / `scrot` | `xdotool` (`sudo apt install xdotool`) |
|
|
45
|
+
| Windows | PowerShell + `System.Drawing` | PowerShell |
|
|
46
|
+
| Browser | — | `puppeteer-core` + Chrome / Edge / Brave |
|
|
47
|
+
|
|
48
|
+
## Surface
|
|
49
|
+
|
|
50
|
+
- **Actions** — `COMPUTER_USE` (canonical screenshot / click / key /
|
|
51
|
+
scroll / etc.), `WINDOW` (list / focus / arrange / move /...), and
|
|
52
|
+
`COMPUTER_USE_AGENT` (high-level goal-driven autonomous desktop loop:
|
|
53
|
+
Brain → Cascade → dispatch up to `maxSteps` iterations).
|
|
54
|
+
Subactions of `COMPUTER_USE` and `WINDOW` are promoted to virtual
|
|
55
|
+
top-level actions (e.g. `COMPUTER_USE_CLICK`, `WINDOW_FOCUS`) so the
|
|
56
|
+
planner picks a specific verb directly from the catalogue.
|
|
57
|
+
- **Services** — `ComputerUseService` (`serviceType = "computeruse"`)
|
|
58
|
+
and `VisionContextProvider`.
|
|
59
|
+
- **Providers** — `computerStateProvider`, `sceneProvider`.
|
|
60
|
+
- **Routes** — approval inbox + SSE stream + approval-mode toggle under
|
|
61
|
+
`/api/computer-use/...`.
|
|
62
|
+
|
|
63
|
+
## File operations + shell
|
|
64
|
+
|
|
65
|
+
File operations live on the FILE action; shell / terminal access lives
|
|
66
|
+
on the SHELL action. They are **not** exposed by this plugin.
|
|
67
|
+
|
|
68
|
+
## Further reading
|
|
69
|
+
|
|
70
|
+
- [`docs/MULTI_MONITOR.md`](./docs/MULTI_MONITOR.md) — multi-display
|
|
71
|
+
capture and coordinate translation.
|
|
72
|
+
- [`docs/SCENE_BUILDER.md`](./docs/SCENE_BUILDER.md) — how windows,
|
|
73
|
+
a11y, screen, and OCR are composed into a single `Scene`.
|
|
74
|
+
- [`docs/IOS_CONSTRAINTS.md`](./docs/IOS_CONSTRAINTS.md) /
|
|
75
|
+
[`docs/ANDROID_CONSTRAINTS.md`](./docs/ANDROID_CONSTRAINTS.md) —
|
|
76
|
+
honest scope on mobile.
|
|
77
|
+
- [`docs/MOBILE_ASSISTANT_ROUTING.md`](./docs/MOBILE_ASSISTANT_ROUTING.md)
|
|
78
|
+
— mobile request routing.
|
|
79
|
+
- [`docs/AOSP_SYSTEM_APP.md`](./docs/AOSP_SYSTEM_APP.md) — AOSP
|
|
80
|
+
system-app deployment notes.
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLIPBOARD parent action — read or write the host system clipboard.
|
|
3
|
+
*
|
|
4
|
+
* Routes through `driverReadClipboard` / `driverWriteClipboard`, which
|
|
5
|
+
* select the per-OS tool (pbcopy/pbpaste, wl-copy/wl-paste, xclip,
|
|
6
|
+
* PowerShell Set-Clipboard / Get-Clipboard).
|
|
7
|
+
*
|
|
8
|
+
* Subactions: `read`, `write`. The plugin index promotes them to virtual
|
|
9
|
+
* top-level actions (`CLIPBOARD_READ`, `CLIPBOARD_WRITE`) so the planner
|
|
10
|
+
* can pick a specific verb directly from the action catalogue.
|
|
11
|
+
*/
|
|
12
|
+
import type { Action } from "@elizaos/core";
|
|
13
|
+
declare const CLIPBOARD_ACTIONS: readonly ["read", "write"];
|
|
14
|
+
export type ClipboardActionType = (typeof CLIPBOARD_ACTIONS)[number];
|
|
15
|
+
export interface ClipboardActionParams {
|
|
16
|
+
action: ClipboardActionType;
|
|
17
|
+
/** Text payload for `write`. Ignored for `read`. */
|
|
18
|
+
text?: string;
|
|
19
|
+
}
|
|
20
|
+
export declare const clipboardAction: Action;
|
|
21
|
+
export {};
|
|
22
|
+
//# sourceMappingURL=clipboard.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"clipboard.d.ts","sourceRoot":"","sources":["../../src/actions/clipboard.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,KAAK,EACV,MAAM,EAOP,MAAM,eAAe,CAAC;AASvB,QAAA,MAAM,iBAAiB,4BAA6B,CAAC;AACrD,MAAM,MAAM,mBAAmB,GAAG,CAAC,OAAO,iBAAiB,CAAC,CAAC,MAAM,CAAC,CAAC;AAErE,MAAM,WAAW,qBAAqB;IACpC,MAAM,EAAE,mBAAmB,CAAC;IAC5B,oDAAoD;IACpD,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AA+ED,eAAO,MAAM,eAAe,EAAE,MAgI7B,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { type Content, type HandlerCallback } from "@elizaos/core";
|
|
2
|
+
import type { ApprovalSnapshot, PendingApproval } from "../types.js";
|
|
3
|
+
export declare const ACTION_PROGRESS_SOURCE = "action_progress";
|
|
4
|
+
export declare const COMPUTER_USE_APPROVAL_SOURCE = "computeruse_approval";
|
|
5
|
+
export interface StepProgressInput {
|
|
6
|
+
actionName: string;
|
|
7
|
+
step: number;
|
|
8
|
+
kind: string;
|
|
9
|
+
rationale?: string;
|
|
10
|
+
success?: boolean;
|
|
11
|
+
error?: string;
|
|
12
|
+
source?: string;
|
|
13
|
+
}
|
|
14
|
+
export interface ApprovalRelayService {
|
|
15
|
+
getApprovalSnapshot(): ApprovalSnapshot;
|
|
16
|
+
subscribeApprovals(listener: (snapshot: ApprovalSnapshot) => void): () => void;
|
|
17
|
+
}
|
|
18
|
+
export interface ApprovalRelayOptions {
|
|
19
|
+
ownerId?: string;
|
|
20
|
+
}
|
|
21
|
+
export declare function isStreamProgressEnabled(value: unknown): value is true;
|
|
22
|
+
export declare function formatStepProgressText(step: number, kind: string, rationale?: string): string;
|
|
23
|
+
export declare function buildStepProgressContent(input: StepProgressInput): Content;
|
|
24
|
+
export declare function buildApprovalPromptContent(approval: PendingApproval, options?: ApprovalRelayOptions): Content;
|
|
25
|
+
export declare function withApprovalRelay<T>(service: ApprovalRelayService, callback: HandlerCallback | undefined, run: () => Promise<T>, options?: ApprovalRelayOptions): Promise<T>;
|
|
26
|
+
//# sourceMappingURL=progress.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"progress.d.ts","sourceRoot":"","sources":["../../src/actions/progress.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,OAAO,EACZ,KAAK,eAAe,EAGrB,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAErE,eAAO,MAAM,sBAAsB,oBAAoB,CAAC;AACxD,eAAO,MAAM,4BAA4B,yBAAyB,CAAC;AAEnE,MAAM,WAAW,iBAAiB;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,oBAAoB;IACnC,mBAAmB,IAAI,gBAAgB,CAAC;IACxC,kBAAkB,CAChB,QAAQ,EAAE,CAAC,QAAQ,EAAE,gBAAgB,KAAK,IAAI,GAC7C,MAAM,IAAI,CAAC;CACf;AAED,MAAM,WAAW,oBAAoB;IACnC,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,IAAI,CAErE;AAED,wBAAgB,sBAAsB,CACpC,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,MAAM,EACZ,SAAS,CAAC,EAAE,MAAM,GACjB,MAAM,CAGR;AAED,wBAAgB,wBAAwB,CAAC,KAAK,EAAE,iBAAiB,GAAG,OAAO,CAoB1E;AAYD,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,eAAe,EACzB,OAAO,GAAE,oBAAyB,GACjC,OAAO,CAoBT;AAED,wBAAsB,iBAAiB,CAAC,CAAC,EACvC,OAAO,EAAE,oBAAoB,EAC7B,QAAQ,EAAE,eAAe,GAAG,SAAS,EACrC,GAAG,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,EACrB,OAAO,GAAE,oBAAyB,GACjC,OAAO,CAAC,CAAC,CAAC,CAwCZ"}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WS7 — COMPUTER_USE_AGENT action.
|
|
3
|
+
*
|
|
4
|
+
* High-level "give me a goal, I'll click my way there" entry point. The
|
|
5
|
+
* planner emits one of these instead of the lower-level COMPUTER_USE_CLICK
|
|
6
|
+
* etc. when the right action isn't obvious from the prompt.
|
|
7
|
+
*
|
|
8
|
+
* Loop:
|
|
9
|
+
* 1. refresh scene (`agent-turn`)
|
|
10
|
+
* 2. capture per-display PNGs
|
|
11
|
+
* 3. Brain → Cascade → ProposedAction
|
|
12
|
+
* 4. dispatch into ComputerInterface
|
|
13
|
+
* 5. observe (auto-screenshot via the existing service flow happens for
|
|
14
|
+
* ProposedAction.kind=click/etc; explicit captureAllDisplays after
|
|
15
|
+
* every step)
|
|
16
|
+
* 6. repeat until `finish` or `maxSteps`
|
|
17
|
+
*
|
|
18
|
+
* Trajectory events are emitted as structured `logger.info` lines with a
|
|
19
|
+
* `evt: "computeruse.agent.step"` payload, which the trajectory-logger app
|
|
20
|
+
* picks up via standard log capture. When `streamProgress` is enabled, the
|
|
21
|
+
* same step boundary also emits a `HandlerCallback` status to the origin chat.
|
|
22
|
+
* We don't take a hard dependency on the trajectory-logger plugin from here.
|
|
23
|
+
*/
|
|
24
|
+
import { type Action, type Content, type IAgentRuntime } from "@elizaos/core";
|
|
25
|
+
import { type AgentMiddleware, type TrajectoryEntry } from "../actor/agent-callbacks.js";
|
|
26
|
+
import { type AgentLoop, type AgentLoopStats } from "../actor/agent-loop.js";
|
|
27
|
+
import type { Brain } from "../actor/brain.js";
|
|
28
|
+
import { type ComputerInterface } from "../actor/computer-interface.js";
|
|
29
|
+
import { type DisplayCapture } from "../platform/capture.js";
|
|
30
|
+
import type { ComputerUseService } from "../services/computer-use-service.js";
|
|
31
|
+
export interface ComputerUseAgentParams {
|
|
32
|
+
goal: string;
|
|
33
|
+
maxSteps?: number;
|
|
34
|
+
/**
|
|
35
|
+
* When true, emit a chat message after each dispatched step so a long-running
|
|
36
|
+
* goal does not leave the origin chat silent for minutes (#8912). The action
|
|
37
|
+
* handler wires this to the runtime HandlerCallback; the loop itself calls
|
|
38
|
+
* per-step progress hooks.
|
|
39
|
+
*/
|
|
40
|
+
streamProgress?: boolean;
|
|
41
|
+
/** Wall-clock budget (ms) — the loop aborts before a step that exceeds it. */
|
|
42
|
+
maxDurationMs?: number;
|
|
43
|
+
/**
|
|
44
|
+
* Image-retention window (#9170 M11): keep only the N most-recent steps'
|
|
45
|
+
* screenshots in the bounded history. Off (unbounded) when unset.
|
|
46
|
+
*/
|
|
47
|
+
imageRetentionLast?: number;
|
|
48
|
+
}
|
|
49
|
+
/** One per-step progress event, surfaced when `streamProgress` is set. */
|
|
50
|
+
export interface ComputerUseAgentStepProgress {
|
|
51
|
+
goal: string;
|
|
52
|
+
step: number;
|
|
53
|
+
maxSteps: number;
|
|
54
|
+
sceneSummary: string;
|
|
55
|
+
actionKind: string;
|
|
56
|
+
rationale: string;
|
|
57
|
+
rois: number;
|
|
58
|
+
result: {
|
|
59
|
+
success: boolean;
|
|
60
|
+
error?: string;
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
interface AgentDeps {
|
|
64
|
+
brain?: Brain;
|
|
65
|
+
/** Pre-built loop override (tests). Supersedes model-string selection. */
|
|
66
|
+
loop?: AgentLoop;
|
|
67
|
+
/** Loop model-string override (tests / explicit selection). */
|
|
68
|
+
loopModel?: string;
|
|
69
|
+
/**
|
|
70
|
+
* Callback middleware override (#9170 M11). When set, replaces the default
|
|
71
|
+
* pipeline (operator-normalizer + trajectory, plus budget/image-retention
|
|
72
|
+
* when configured via params).
|
|
73
|
+
*/
|
|
74
|
+
middleware?: AgentMiddleware[];
|
|
75
|
+
/** Clock override (tests) — defaults to `Date.now`. */
|
|
76
|
+
now?: () => number;
|
|
77
|
+
computerInterface?: ComputerInterface;
|
|
78
|
+
captureAll?: () => Promise<DisplayCapture[]>;
|
|
79
|
+
/** Called after each dispatched step when `params.streamProgress` is set. */
|
|
80
|
+
onStepProgress?: (progress: ComputerUseAgentStepProgress) => Promise<void> | void;
|
|
81
|
+
/** Called with compact Content after each dispatched step when enabled. */
|
|
82
|
+
onCompactStepProgress?: (content: Content) => Promise<void> | void;
|
|
83
|
+
}
|
|
84
|
+
export interface ComputerUseAgentReport {
|
|
85
|
+
goal: string;
|
|
86
|
+
steps: Array<{
|
|
87
|
+
step: number;
|
|
88
|
+
sceneSummary: string;
|
|
89
|
+
actionKind: string;
|
|
90
|
+
rationale: string;
|
|
91
|
+
rois: number;
|
|
92
|
+
result: {
|
|
93
|
+
success: boolean;
|
|
94
|
+
error?: string;
|
|
95
|
+
};
|
|
96
|
+
}>;
|
|
97
|
+
finished: boolean;
|
|
98
|
+
reason: "finish" | "max_steps" | "error" | "budget";
|
|
99
|
+
error?: string;
|
|
100
|
+
/** Per-step transcript recorded by the trajectory middleware (#9170 M11). */
|
|
101
|
+
trajectory?: TrajectoryEntry[];
|
|
102
|
+
/** Per-run model-call accounting, when the loop reports it (#9105). */
|
|
103
|
+
modelStats?: AgentLoopStats;
|
|
104
|
+
}
|
|
105
|
+
export declare function formatComputerUseAgentProgress(progress: ComputerUseAgentStepProgress): string;
|
|
106
|
+
/**
|
|
107
|
+
* Run one Brain/Cascade/Dispatch loop. Exported so tests can drive it
|
|
108
|
+
* without exercising the full Action plumbing.
|
|
109
|
+
*/
|
|
110
|
+
export declare function runComputerUseAgentLoop(runtime: IAgentRuntime | null, params: ComputerUseAgentParams, service: ComputerUseService, deps?: AgentDeps): Promise<ComputerUseAgentReport>;
|
|
111
|
+
export declare const computerUseAgentAction: Action;
|
|
112
|
+
export {};
|
|
113
|
+
//# sourceMappingURL=use-computer-agent.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"use-computer-agent.d.ts","sourceRoot":"","sources":["../../src/actions/use-computer-agent.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,EACL,KAAK,MAAM,EAEX,KAAK,OAAO,EAGZ,KAAK,aAAa,EAInB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,KAAK,eAAe,EAWpB,KAAK,eAAe,EACrB,MAAM,6BAA6B,CAAC;AACrC,OAAO,EAEL,KAAK,SAAS,EACd,KAAK,cAAc,EAGpB,MAAM,wBAAwB,CAAC;AAChC,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,EACL,KAAK,iBAAiB,EAEvB,MAAM,gCAAgC,CAAC;AAExC,OAAO,EAEL,KAAK,cAAc,EACpB,MAAM,wBAAwB,CAAC;AAGhC,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,qCAAqC,CAAC;AAS9E,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,8EAA8E;IAC9E,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB;;;OAGG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAED,0EAA0E;AAC1E,MAAM,WAAW,4BAA4B;IAC3C,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE;QAAE,OAAO,EAAE,OAAO,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;CAC9C;AAED,UAAU,SAAS;IACjB,KAAK,CAAC,EAAE,KAAK,CAAC;IACd,0EAA0E;IAC1E,IAAI,CAAC,EAAE,SAAS,CAAC;IACjB,+DAA+D;IAC/D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;OAIG;IACH,UAAU,CAAC,EAAE,eAAe,EAAE,CAAC;IAC/B,uDAAuD;IACvD,GAAG,CAAC,EAAE,MAAM,MAAM,CAAC;IACnB,iBAAiB,CAAC,EAAE,iBAAiB,CAAC;IACtC,UAAU,CAAC,EAAE,MAAM,OAAO,CAAC,cAAc,EAAE,CAAC,CAAC;IAC7C,6EAA6E;IAC7E,cAAc,CAAC,EAAE,CACf,QAAQ,EAAE,4BAA4B,KACnC,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;IAC1B,2EAA2E;IAC3E,qBAAqB,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;CACpE;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,KAAK,CAAC;QACX,IAAI,EAAE,MAAM,CAAC;QACb,YAAY,EAAE,MAAM,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC;QACnB,SAAS,EAAE,MAAM,CAAC;QAClB,IAAI,EAAE,MAAM,CAAC;QACb,MAAM,EAAE;YAAE,OAAO,EAAE,OAAO,CAAC;YAAC,KAAK,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC;KAC9C,CAAC,CAAC;IACH,QAAQ,EAAE,OAAO,CAAC;IAClB,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,OAAO,GAAG,QAAQ,CAAC;IACpD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,6EAA6E;IAC7E,UAAU,CAAC,EAAE,eAAe,EAAE,CAAC;IAC/B,uEAAuE;IACvE,UAAU,CAAC,EAAE,cAAc,CAAC;CAC7B;AAED,wBAAgB,8BAA8B,CAC5C,QAAQ,EAAE,4BAA4B,GACrC,MAAM,CAMR;AAmBD;;;GAGG;AACH,wBAAsB,uBAAuB,CAC3C,OAAO,EAAE,aAAa,GAAG,IAAI,EAC7B,MAAM,EAAE,sBAAsB,EAC9B,OAAO,EAAE,kBAAkB,EAC3B,IAAI,GAAE,SAAc,GACnB,OAAO,CAAC,sBAAsB,CAAC,CAuLjC;AAqFD,eAAO,MAAM,sBAAsB,EAAE,MA+HpC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"use-computer.d.ts","sourceRoot":"","sources":["../../src/actions/use-computer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,MAAM,EAOP,MAAM,eAAe,CAAC;
|
|
1
|
+
{"version":3,"file":"use-computer.d.ts","sourceRoot":"","sources":["../../src/actions/use-computer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,MAAM,EAOP,MAAM,eAAe,CAAC;AAgLvB,eAAO,MAAM,iBAAiB,EAAE,MA6W/B,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure handler for the WINDOW parent action. Takes the live
|
|
3
|
+
* `ComputerUseService` and resolved params, executes the underlying
|
|
4
|
+
* window-management call, and returns an `ActionResult`.
|
|
5
|
+
*/
|
|
6
|
+
import type { ActionResult, HandlerCallback } from "@elizaos/core";
|
|
7
|
+
import type { ComputerUseService } from "../services/computer-use-service.js";
|
|
8
|
+
import type { WindowActionParams } from "../types.js";
|
|
9
|
+
import { type ApprovalRelayOptions } from "./progress.js";
|
|
10
|
+
export declare function handleWindowOp(service: ComputerUseService, params: WindowActionParams, callback?: HandlerCallback, approvalOptions?: ApprovalRelayOptions): Promise<ActionResult>;
|
|
11
|
+
//# sourceMappingURL=window-handlers.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"window-handlers.d.ts","sourceRoot":"","sources":["../../src/actions/window-handlers.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACnE,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,qCAAqC,CAAC;AAC9E,OAAO,KAAK,EAAE,kBAAkB,EAAsB,MAAM,aAAa,CAAC;AAE1E,OAAO,EAAE,KAAK,oBAAoB,EAAqB,MAAM,eAAe,CAAC;AA0B7E,wBAAsB,cAAc,CAClC,OAAO,EAAE,kBAAkB,EAC3B,MAAM,EAAE,kBAAkB,EAC1B,QAAQ,CAAC,EAAE,eAAe,EAC1B,eAAe,GAAE,oBAAyB,GACzC,OAAO,CAAC,YAAY,CAAC,CAwBvB"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WINDOW parent action — manages local desktop windows (list / focus /
|
|
3
|
+
* switch / arrange / move / minimize / maximize / restore / close).
|
|
4
|
+
*
|
|
5
|
+
* Pointer and keyboard primitives live on COMPUTER_USE. File and shell
|
|
6
|
+
* operations live on the FILE and SHELL actions in their own plugins —
|
|
7
|
+
* this plugin no longer exposes them.
|
|
8
|
+
*/
|
|
9
|
+
import type { Action } from "@elizaos/core";
|
|
10
|
+
export declare const windowAction: Action;
|
|
11
|
+
//# sourceMappingURL=window.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"window.d.ts","sourceRoot":"","sources":["../../src/actions/window.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EACV,MAAM,EAOP,MAAM,eAAe,CAAC;AAgEvB,eAAO,MAAM,YAAY,EAAE,MAkK1B,CAAC"}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WS7 — Actor (optional fine-grained grounding).
|
|
3
|
+
*
|
|
4
|
+
* The Actor is responsible for converting a Brain-issued reference
|
|
5
|
+
* ("click the Save button I see in this crop") into concrete display-local
|
|
6
|
+
* pixel coords.
|
|
7
|
+
*
|
|
8
|
+
* Primary path — deterministic (no model):
|
|
9
|
+
* `OcrCoordinateGroundingActor` resolves a `ref: "t<displayId>-<seq>"`
|
|
10
|
+
* (OCR id) or `ref: "a<displayId>-<seq>"` (AX id) directly from the
|
|
11
|
+
* Scene. Click point is bbox-center. No VLM call, fully reproducible,
|
|
12
|
+
* what 99% of the cascade should use.
|
|
13
|
+
*
|
|
14
|
+
* Optional secondary path — VLM:
|
|
15
|
+
* `OsAtlasProActor` is a typed adapter for an operator-provided model-server
|
|
16
|
+
* endpoint (e.g. an OS-Atlas-Pro vLLM service). Unless a deployment
|
|
17
|
+
* registers that endpoint, the cascade uses the OCR/AX grounding above.
|
|
18
|
+
*
|
|
19
|
+
* Register the active Actor on the cascade via `setActor(actor)` (see
|
|
20
|
+
* `cascade.ts`). If none is registered, the cascade uses the OCR/AX actor
|
|
21
|
+
* automatically.
|
|
22
|
+
*/
|
|
23
|
+
import type { Scene } from "../scene/scene-types.js";
|
|
24
|
+
import type { GroundingResult, ReferenceTarget } from "./types.js";
|
|
25
|
+
export interface ActorGroundArgs {
|
|
26
|
+
/** Display the Brain wants to act on. */
|
|
27
|
+
displayId: number;
|
|
28
|
+
/**
|
|
29
|
+
* Cropped image of the ROI at native resolution (PNG bytes). May be a
|
|
30
|
+
* empty Buffer when the deterministic grounding doesn't need image bytes.
|
|
31
|
+
*/
|
|
32
|
+
croppedImage: Buffer;
|
|
33
|
+
/** Hint from the Brain: "the Save button in the dialog footer". */
|
|
34
|
+
hint: string;
|
|
35
|
+
/** Optional reference from `BrainProposedAction.ref`. */
|
|
36
|
+
ref?: string;
|
|
37
|
+
}
|
|
38
|
+
export interface Actor {
|
|
39
|
+
readonly name: string;
|
|
40
|
+
ground(args: ActorGroundArgs): Promise<GroundingResult>;
|
|
41
|
+
}
|
|
42
|
+
export declare class OcrCoordinateGroundingActor implements Actor {
|
|
43
|
+
private readonly getScene;
|
|
44
|
+
readonly name = "ocr-ax-grounding";
|
|
45
|
+
constructor(getScene: () => Scene | null);
|
|
46
|
+
ground(args: ActorGroundArgs): Promise<GroundingResult>;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Look up a scene element by stable id, OR by case-insensitive label match
|
|
50
|
+
* when an id is absent. Used by both the deterministic actor and the cascade
|
|
51
|
+
* dispatcher to validate Brain output.
|
|
52
|
+
*/
|
|
53
|
+
export declare function resolveReference(scene: Scene, ref: string | undefined, hint: string, preferredDisplay: number): ReferenceTarget | null;
|
|
54
|
+
export interface OsAtlasProActorOptions {
|
|
55
|
+
/** Endpoint of the model server, e.g. `http://localhost:8000/v1`. */
|
|
56
|
+
endpoint: string;
|
|
57
|
+
/** Optional auth header. */
|
|
58
|
+
apiKey?: string;
|
|
59
|
+
/** Model identifier on the server. */
|
|
60
|
+
model?: string;
|
|
61
|
+
/** Override the HTTP fetch (mostly for tests). */
|
|
62
|
+
fetcher?: (input: string, init: {
|
|
63
|
+
body: string;
|
|
64
|
+
headers: Record<string, string>;
|
|
65
|
+
}) => Promise<{
|
|
66
|
+
ok: boolean;
|
|
67
|
+
status: number;
|
|
68
|
+
text: () => Promise<string>;
|
|
69
|
+
}>;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Adapter for a server-side OS-Atlas-Pro (or compatible) grounding model.
|
|
73
|
+
* Not wired into the cascade by default. The contract: POST a JSON payload
|
|
74
|
+
* with `{ image: base64, hint }`, expect `{ x, y, confidence }` in image
|
|
75
|
+
* coordinates of the crop. The cascade is responsible for converting those
|
|
76
|
+
* crop-local coords back to display-local before dispatch.
|
|
77
|
+
*/
|
|
78
|
+
export declare class OsAtlasProActor implements Actor {
|
|
79
|
+
private readonly opts;
|
|
80
|
+
readonly name = "osatlas-pro";
|
|
81
|
+
constructor(opts: OsAtlasProActorOptions);
|
|
82
|
+
ground(args: ActorGroundArgs): Promise<GroundingResult>;
|
|
83
|
+
}
|
|
84
|
+
//# sourceMappingURL=actor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"actor.d.ts","sourceRoot":"","sources":["../../src/actor/actor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH,OAAO,KAAK,EAAE,KAAK,EAA4B,MAAM,yBAAyB,CAAC;AAC/E,OAAO,KAAK,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAEnE,MAAM,WAAW,eAAe;IAC9B,yCAAyC;IACzC,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,YAAY,EAAE,MAAM,CAAC;IACrB,mEAAmE;IACnE,IAAI,EAAE,MAAM,CAAC;IACb,yDAAyD;IACzD,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,KAAK;IACpB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,IAAI,EAAE,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;CACzD;AAID,qBAAa,2BAA4B,YAAW,KAAK;IAG3C,OAAO,CAAC,QAAQ,CAAC,QAAQ;IAFrC,QAAQ,CAAC,IAAI,sBAAsB;gBAEN,QAAQ,EAAE,MAAM,KAAK,GAAG,IAAI;IAEnD,MAAM,CAAC,IAAI,EAAE,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC;CAsB9D;AAED;;;;GAIG;AACH,wBAAgB,gBAAgB,CAC9B,KAAK,EAAE,KAAK,EACZ,GAAG,EAAE,MAAM,GAAG,SAAS,EACvB,IAAI,EAAE,MAAM,EACZ,gBAAgB,EAAE,MAAM,GACvB,eAAe,GAAG,IAAI,CA+BxB;AA0CD,MAAM,WAAW,sBAAsB;IACrC,qEAAqE;IACrE,QAAQ,EAAE,MAAM,CAAC;IACjB,4BAA4B;IAC5B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,sCAAsC;IACtC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,kDAAkD;IAClD,OAAO,CAAC,EAAE,CACR,KAAK,EAAE,MAAM,EACb,IAAI,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;KAAE,KACpD,OAAO,CAAC;QAAE,EAAE,EAAE,OAAO,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,OAAO,CAAC,MAAM,CAAC,CAAA;KAAE,CAAC,CAAC;CAC5E;AAED;;;;;;GAMG;AACH,qBAAa,eAAgB,YAAW,KAAK;IAG/B,OAAO,CAAC,QAAQ,CAAC,IAAI;IAFjC,QAAQ,CAAC,IAAI,iBAAiB;gBAED,IAAI,EAAE,sBAAsB;IAQnD,MAAM,CAAC,IAAI,EAAE,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC;CAkD9D"}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent-loop callback middleware (#9170 M11).
|
|
3
|
+
*
|
|
4
|
+
* trycua/cua threads a callback pipeline through its agent loop: budget caps,
|
|
5
|
+
* image-retention (keep only the N most-recent screenshots in context),
|
|
6
|
+
* operator-normalization (clean up the model's proposed action), and trajectory
|
|
7
|
+
* recording. Each callback is a thin middleware that observes (and sometimes
|
|
8
|
+
* transforms or aborts) the loop without the loop knowing which middlewares are
|
|
9
|
+
* present.
|
|
10
|
+
*
|
|
11
|
+
* This module defines the `AgentMiddleware` hook set and four built-ins. The
|
|
12
|
+
* runner (`use-computer-agent.ts`) fires the hooks at fixed points:
|
|
13
|
+
* onRunStart → [ beforeStep → onCaptures → transformProposed → afterStep ]* →
|
|
14
|
+
* onRunEnd
|
|
15
|
+
*
|
|
16
|
+
* Middlewares are pure-by-default and composable: `runBeforeStep`, etc.,
|
|
17
|
+
* fold the list in order and short-circuit on the first abort.
|
|
18
|
+
*/
|
|
19
|
+
import type { DisplayCapture } from "../platform/capture.js";
|
|
20
|
+
import type { CascadeResult, ProposedAction } from "./types.js";
|
|
21
|
+
export interface AgentRunContext {
|
|
22
|
+
goal: string;
|
|
23
|
+
maxSteps: number;
|
|
24
|
+
}
|
|
25
|
+
export interface AgentStepContext {
|
|
26
|
+
step: number;
|
|
27
|
+
maxSteps: number;
|
|
28
|
+
goal: string;
|
|
29
|
+
/** Wall-clock ms since the run started (set by the runner). */
|
|
30
|
+
elapsedMs: number;
|
|
31
|
+
}
|
|
32
|
+
export interface AgentDispatchContext {
|
|
33
|
+
step: number;
|
|
34
|
+
goal: string;
|
|
35
|
+
proposed: CascadeResult;
|
|
36
|
+
dispatchSuccess: boolean;
|
|
37
|
+
error?: string;
|
|
38
|
+
}
|
|
39
|
+
/** Returned by `beforeStep` — abort halts the loop with `reason`. */
|
|
40
|
+
export interface AgentStepDecision {
|
|
41
|
+
abort?: boolean;
|
|
42
|
+
reason?: string;
|
|
43
|
+
}
|
|
44
|
+
export interface AgentRunSummary {
|
|
45
|
+
goal: string;
|
|
46
|
+
steps: number;
|
|
47
|
+
finished: boolean;
|
|
48
|
+
reason: string;
|
|
49
|
+
}
|
|
50
|
+
export interface AgentMiddleware {
|
|
51
|
+
readonly name: string;
|
|
52
|
+
onRunStart?(ctx: AgentRunContext): void | Promise<void>;
|
|
53
|
+
/** Inspect/abort before a step runs (budget caps live here). */
|
|
54
|
+
beforeStep?(ctx: AgentStepContext): AgentStepDecision | Promise<AgentStepDecision>;
|
|
55
|
+
/** Observe the captured frames (image-retention bookkeeping). */
|
|
56
|
+
onCaptures?(captures: Map<number, DisplayCapture>, ctx: AgentStepContext): void | Promise<void>;
|
|
57
|
+
/** Transform the planned step before dispatch (operator-normalizer). */
|
|
58
|
+
transformProposed?(proposed: CascadeResult, ctx: AgentStepContext): CascadeResult | Promise<CascadeResult>;
|
|
59
|
+
/** Observe a dispatched step (trajectory recording). */
|
|
60
|
+
afterStep?(ctx: AgentDispatchContext): void | Promise<void>;
|
|
61
|
+
onRunEnd?(summary: AgentRunSummary): void | Promise<void>;
|
|
62
|
+
}
|
|
63
|
+
export declare function runOnRunStart(middlewares: readonly AgentMiddleware[], ctx: AgentRunContext): Promise<void>;
|
|
64
|
+
/** Fold `beforeStep`; the FIRST abort wins (and names the middleware). */
|
|
65
|
+
export declare function runBeforeStep(middlewares: readonly AgentMiddleware[], ctx: AgentStepContext): Promise<AgentStepDecision>;
|
|
66
|
+
export declare function runOnCaptures(middlewares: readonly AgentMiddleware[], captures: Map<number, DisplayCapture>, ctx: AgentStepContext): Promise<void>;
|
|
67
|
+
/** Fold `transformProposed` left-to-right; each sees the prior's output. */
|
|
68
|
+
export declare function runTransformProposed(middlewares: readonly AgentMiddleware[], proposed: CascadeResult, ctx: AgentStepContext): Promise<CascadeResult>;
|
|
69
|
+
export declare function runAfterStep(middlewares: readonly AgentMiddleware[], ctx: AgentDispatchContext): Promise<void>;
|
|
70
|
+
export declare function runOnRunEnd(middlewares: readonly AgentMiddleware[], summary: AgentRunSummary): Promise<void>;
|
|
71
|
+
export interface BudgetCapOptions {
|
|
72
|
+
/** Abort once this many steps have STARTED (independent of the loop's own
|
|
73
|
+
* maxSteps; use to cap below it, e.g. cost control). */
|
|
74
|
+
maxSteps?: number;
|
|
75
|
+
/** Abort once wall-clock elapsed exceeds this many ms. */
|
|
76
|
+
maxDurationMs?: number;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Halts the loop when a step or time budget is exhausted. Caps below the loop's
|
|
80
|
+
* own `maxSteps`, and adds a wall-clock cap the loop has no notion of.
|
|
81
|
+
*/
|
|
82
|
+
export declare function createBudgetCapMiddleware(options: BudgetCapOptions): AgentMiddleware;
|
|
83
|
+
export interface ImageRetentionMiddleware extends AgentMiddleware {
|
|
84
|
+
/** The display-keyed captures retained from the most recent steps. */
|
|
85
|
+
retained(): Array<{
|
|
86
|
+
step: number;
|
|
87
|
+
displayIds: number[];
|
|
88
|
+
}>;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Bounds the screenshot history to the `keepLast` most-recent steps, mirroring
|
|
92
|
+
* cua's image-retention (older frames fall out of context to cap token cost).
|
|
93
|
+
* The runner forwards the per-step captures; this middleware keeps the bounded
|
|
94
|
+
* window that a model-history consumer should send.
|
|
95
|
+
*/
|
|
96
|
+
export declare function createImageRetentionMiddleware(options: {
|
|
97
|
+
keepLast: number;
|
|
98
|
+
}): ImageRetentionMiddleware;
|
|
99
|
+
/**
|
|
100
|
+
* Normalize the model's proposed action into the canonical dispatch shape:
|
|
101
|
+
* integer coordinates, trimmed type text, deduped/lowercased hotkey keys. Pure
|
|
102
|
+
* and idempotent — re-normalizing already-clean input is a no-op.
|
|
103
|
+
*/
|
|
104
|
+
export declare function normalizeProposedAction(action: ProposedAction): ProposedAction;
|
|
105
|
+
/** Operator-normalizer middleware — cleans `proposed.proposed` before dispatch. */
|
|
106
|
+
export declare function createOperatorNormalizerMiddleware(): AgentMiddleware;
|
|
107
|
+
export interface TrajectoryEntry {
|
|
108
|
+
step: number;
|
|
109
|
+
goal: string;
|
|
110
|
+
actionKind: string;
|
|
111
|
+
rationale: string;
|
|
112
|
+
success: boolean;
|
|
113
|
+
error?: string;
|
|
114
|
+
}
|
|
115
|
+
export interface TrajectoryMiddleware extends AgentMiddleware {
|
|
116
|
+
/** The recorded trajectory so far. */
|
|
117
|
+
entries(): TrajectoryEntry[];
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Records one entry per dispatched step. Independent of the existing
|
|
121
|
+
* `logger.info` trajectory events — this gives an in-memory transcript the
|
|
122
|
+
* caller can attach to the run report or persist.
|
|
123
|
+
*/
|
|
124
|
+
export declare function createTrajectoryMiddleware(options?: {
|
|
125
|
+
/** Also emit a debug log line per step. Default false. */
|
|
126
|
+
log?: boolean;
|
|
127
|
+
}): TrajectoryMiddleware;
|
|
128
|
+
//# sourceMappingURL=agent-callbacks.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent-callbacks.d.ts","sourceRoot":"","sources":["../../src/actor/agent-callbacks.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAGH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AAC7D,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAEhE,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,+DAA+D;IAC/D,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,oBAAoB;IACnC,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,aAAa,CAAC;IACxB,eAAe,EAAE,OAAO,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,qEAAqE;AACrE,MAAM,WAAW,iBAAiB;IAChC,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,OAAO,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,UAAU,CAAC,CAAC,GAAG,EAAE,eAAe,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACxD,gEAAgE;IAChE,UAAU,CAAC,CACT,GAAG,EAAE,gBAAgB,GACpB,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAAC;IAClD,iEAAiE;IACjE,UAAU,CAAC,CACT,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,cAAc,CAAC,EACrC,GAAG,EAAE,gBAAgB,GACpB,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACxB,wEAAwE;IACxE,iBAAiB,CAAC,CAChB,QAAQ,EAAE,aAAa,EACvB,GAAG,EAAE,gBAAgB,GACpB,aAAa,GAAG,OAAO,CAAC,aAAa,CAAC,CAAC;IAC1C,wDAAwD;IACxD,SAAS,CAAC,CAAC,GAAG,EAAE,oBAAoB,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC5D,QAAQ,CAAC,CAAC,OAAO,EAAE,eAAe,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CAC3D;AAID,wBAAsB,aAAa,CACjC,WAAW,EAAE,SAAS,eAAe,EAAE,EACvC,GAAG,EAAE,eAAe,GACnB,OAAO,CAAC,IAAI,CAAC,CAEf;AAED,0EAA0E;AAC1E,wBAAsB,aAAa,CACjC,WAAW,EAAE,SAAS,eAAe,EAAE,EACvC,GAAG,EAAE,gBAAgB,GACpB,OAAO,CAAC,iBAAiB,CAAC,CAW5B;AAED,wBAAsB,aAAa,CACjC,WAAW,EAAE,SAAS,eAAe,EAAE,EACvC,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,cAAc,CAAC,EACrC,GAAG,EAAE,gBAAgB,GACpB,OAAO,CAAC,IAAI,CAAC,CAEf;AAED,4EAA4E;AAC5E,wBAAsB,oBAAoB,CACxC,WAAW,EAAE,SAAS,eAAe,EAAE,EACvC,QAAQ,EAAE,aAAa,EACvB,GAAG,EAAE,gBAAgB,GACpB,OAAO,CAAC,aAAa,CAAC,CAMxB;AAED,wBAAsB,YAAY,CAChC,WAAW,EAAE,SAAS,eAAe,EAAE,EACvC,GAAG,EAAE,oBAAoB,GACxB,OAAO,CAAC,IAAI,CAAC,CAEf;AAED,wBAAsB,WAAW,CAC/B,WAAW,EAAE,SAAS,eAAe,EAAE,EACvC,OAAO,EAAE,eAAe,GACvB,OAAO,CAAC,IAAI,CAAC,CAEf;AAID,MAAM,WAAW,gBAAgB;IAC/B;4DACwD;IACxD,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,0DAA0D;IAC1D,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED;;;GAGG;AACH,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,gBAAgB,GACxB,eAAe,CAsBjB;AAID,MAAM,WAAW,wBAAyB,SAAQ,eAAe;IAC/D,sEAAsE;IACtE,QAAQ,IAAI,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC,CAAC;CAC3D;AAED;;;;;GAKG;AACH,wBAAgB,8BAA8B,CAAC,OAAO,EAAE;IACtD,QAAQ,EAAE,MAAM,CAAC;CAClB,GAAG,wBAAwB,CAgB3B;AASD;;;;GAIG;AACH,wBAAgB,uBAAuB,CACrC,MAAM,EAAE,cAAc,GACrB,cAAc,CAiBhB;AAED,mFAAmF;AACnF,wBAAgB,kCAAkC,IAAI,eAAe,CAUpE;AAID,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,oBAAqB,SAAQ,eAAe;IAC3D,sCAAsC;IACtC,OAAO,IAAI,eAAe,EAAE,CAAC;CAC9B;AAED;;;;GAIG;AACH,wBAAgB,0BAA0B,CAAC,OAAO,CAAC,EAAE;IACnD,0DAA0D;IAC1D,GAAG,CAAC,EAAE,OAAO,CAAC;CACf,GAAG,oBAAoB,CAwBvB"}
|