@qwen-code/qwen-code 0.18.0 → 0.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/bundled/loop/SKILL.md +2 -1
  2. package/bundled/qc-helper/docs/configuration/auth.md +1 -1
  3. package/bundled/qc-helper/docs/configuration/model-providers.md +12 -5
  4. package/bundled/qc-helper/docs/configuration/settings.md +30 -27
  5. package/bundled/qc-helper/docs/features/dual-output.md +37 -3
  6. package/bundled/qc-helper/docs/features/skills.md +29 -3
  7. package/bundled/qc-helper/docs/features/sub-agents.md +2 -1
  8. package/bundled/qc-helper/docs/qwen-serve.md +26 -18
  9. package/chunks/{agent-LOTJK6AH.js → agent-XT7NHZ5H.js} +21 -20
  10. package/chunks/{agent-headless-TU3EPMYU.js → agent-headless-LNRE63ZL.js} +21 -20
  11. package/chunks/{anthropicContentGenerator-2HBRNQ3B.js → anthropicContentGenerator-DCI26OQF.js} +4 -4
  12. package/chunks/{askUserQuestion-OGCMIBQM.js → askUserQuestion-ITYUTWLR.js} +2 -2
  13. package/chunks/{ca-BARBRL6N.js → ca-RK4QPLIX.js} +18 -1
  14. package/chunks/{chunk-ZTZ4DDQE.js → chunk-3NRO6NHX.js} +2 -2
  15. package/chunks/{chunk-IWAYOW5Q.js → chunk-6T7Y7USE.js} +6566 -4195
  16. package/chunks/{chunk-MFBBBTNY.js → chunk-7KPZFE5A.js} +1 -1
  17. package/chunks/{chunk-XV4HCEVI.js → chunk-A2ZIEEGJ.js} +9 -22
  18. package/chunks/{chunk-A7B4ISQP.js → chunk-B4ZF2KSI.js} +1 -1
  19. package/chunks/chunk-BJ5HQ23U.js +178 -0
  20. package/chunks/{chunk-LBP46COL.js → chunk-BXYRCW2C.js} +83 -15
  21. package/chunks/{chunk-OHEGWO4L.js → chunk-CPVI5J2L.js} +1 -1
  22. package/chunks/{chunk-R7ODSGTK.js → chunk-DHZREJTG.js} +2 -2
  23. package/chunks/{chunk-SEGYWKIH.js → chunk-FIQECJTQ.js} +1 -1
  24. package/chunks/{chunk-HR7SV7AY.js → chunk-HA2UEYZP.js} +6 -2
  25. package/chunks/{chunk-JTQAQBTV.js → chunk-HED55F43.js} +5 -1
  26. package/chunks/{chunk-HLPLOD42.js → chunk-HQUWWSSP.js} +1 -1
  27. package/chunks/{chunk-2Y5SYSD3.js → chunk-IDYDPBBN.js} +3 -3
  28. package/chunks/{chunk-3HTIVKZE.js → chunk-IQHSD7K5.js} +1 -1
  29. package/chunks/{chunk-LEJ42GNY.js → chunk-IS7UA4W3.js} +6 -6
  30. package/chunks/{chunk-B7HXHOHU.js → chunk-LXYWINWF.js} +1 -1
  31. package/chunks/{chunk-IDX6COTE.js → chunk-LYRSMKLS.js} +2 -2
  32. package/chunks/{chunk-M6VTDSVR.js → chunk-LYSND7KR.js} +9 -4
  33. package/chunks/{chunk-EYENRK4D.js → chunk-NNIYWQIS.js} +1 -1
  34. package/chunks/chunk-OMX7CUOE.js +356 -0
  35. package/chunks/{chunk-BIVG75CP.js → chunk-QILTEBWS.js} +1 -1
  36. package/chunks/{chunk-6YIUGZTC.js → chunk-RON7LFNH.js} +281 -132
  37. package/chunks/{chunk-3DHXZ6EV.js → chunk-SFRV6BGY.js} +6 -4
  38. package/chunks/{chunk-7BCMOPIM.js → chunk-WJ3SND6W.js} +31 -12
  39. package/chunks/{chunk-J5MDQKJL.js → chunk-WPTCDQN6.js} +2 -347
  40. package/chunks/{chunk-PL3MVCWD.js → chunk-XZTNBSMW.js} +11 -11
  41. package/chunks/{chunk-72LDN5PP.js → chunk-Y7KMDUEP.js} +1 -1
  42. package/chunks/{chunk-SNGELLWX.js → chunk-ZMIBJS45.js} +1 -1
  43. package/chunks/{chunk-XBY7E2FX.js → chunk-ZOFNJQNJ.js} +6 -4
  44. package/chunks/computer-use-4YX3JGBV.js +2052 -0
  45. package/chunks/{contextCommand-K347QT6O.js → contextCommand-KS2H7MW5.js} +23 -22
  46. package/chunks/cron-create-CAPUKK7I.js +184 -0
  47. package/chunks/{cron-delete-WKWSJZQA.js → cron-delete-G3KAR26Q.js} +27 -4
  48. package/chunks/{cron-list-B52XEXAZ.js → cron-list-ZA4ZIUS5.js} +39 -6
  49. package/chunks/{de-YGKK2BC4.js → de-FGPM4KW5.js} +18 -1
  50. package/chunks/{dist-KAZ3SEBX.js → dist-7YWFWOCJ.js} +1 -1
  51. package/chunks/{dist-4LXD6L6X.js → dist-VEGFONCF.js} +2 -2
  52. package/chunks/{dist-H6ONXVLG.js → dist-X4EXN7W6.js} +1 -1
  53. package/chunks/{dist-PK7DFCAW.js → dist-YLS6NI7H.js} +1 -1
  54. package/chunks/{edit-KU4PJGEX.js → edit-2ARPEO4B.js} +22 -21
  55. package/chunks/{en-DHGYHIHX.js → en-VP6XPGEC.js} +5 -2
  56. package/chunks/{enter-worktree-PPYIDCWI.js → enter-worktree-IXNXNAW5.js} +21 -20
  57. package/chunks/{enterPlanMode-5CZDMCB4.js → enterPlanMode-TAKAGAYP.js} +21 -20
  58. package/chunks/{exit-worktree-UY3CGHKC.js → exit-worktree-LHTRV7ML.js} +21 -20
  59. package/chunks/{exitPlanMode-3DN4QNSG.js → exitPlanMode-MK5UAITL.js} +71 -31
  60. package/chunks/{fr-JXBKPJKQ.js → fr-ATYBVCLT.js} +18 -1
  61. package/chunks/{geminiContentGenerator-7A6I2RWB.js → geminiContentGenerator-HFJIGO77.js} +4 -4
  62. package/chunks/{glob-OFNQSS52.js → glob-I2USLUSC.js} +21 -20
  63. package/chunks/{grep-6J2MSUM5.js → grep-WBIF7THR.js} +30 -26
  64. package/chunks/{ja-TGPZSP2B.js → ja-W2QEA2OI.js} +18 -1
  65. package/chunks/{keychain-token-storage-6IU6ORQN.js → keychain-token-storage-QSTRHKKL.js} +2 -2
  66. package/chunks/{ls-V3O6A5PT.js → ls-2R5RHLX5.js} +3 -3
  67. package/chunks/{lsp-G2OCIFUA.js → lsp-XKH6ZIAN.js} +2 -2
  68. package/chunks/{monitor-FKLHV423.js → monitor-WU7UFATU.js} +21 -20
  69. package/chunks/{notebook-edit-KTBYFKWG.js → notebook-edit-KUHYPXEM.js} +22 -21
  70. package/chunks/{openaiContentGenerator-L5KSWQY7.js → openaiContentGenerator-5PLHYJQL.js} +11 -11
  71. package/chunks/{pt-TIBG6BIO.js → pt-ZKEWJFBW.js} +18 -1
  72. package/chunks/{qwenContentGenerator-PYOXLMBW.js → qwenContentGenerator-TSKW73KY.js} +23 -22
  73. package/chunks/{qwenOAuth2-2KCKWDCF.js → qwenOAuth2-KK433U33.js} +4 -4
  74. package/chunks/{read-file-JQVRK4NU.js → read-file-VIPF2PS6.js} +8 -8
  75. package/chunks/{ripGrep-2L4LPNAJ.js → ripGrep-XLIZTYE7.js} +21 -20
  76. package/chunks/{ru-JBCHCK4L.js → ru-VEKTPJ74.js} +18 -1
  77. package/chunks/{scheduler-FGNXY4JQ.js → scheduler-O66SLJGU.js} +21 -20
  78. package/chunks/{send-message-SZFWNOCL.js → send-message-CTME7DXD.js} +2 -2
  79. package/chunks/{serve-N2IBLA3G.js → serve-BWOLYT62.js} +998 -278
  80. package/chunks/{shell-PTEG6UX4.js → shell-XE7UYKOO.js} +21 -20
  81. package/chunks/{skill-X4NTK4NH.js → skill-RZWM6XMC.js} +10 -10
  82. package/chunks/{src-GLLQ3R5W.js → src-L5P7K4MH.js} +42 -26
  83. package/chunks/{syntheticOutput-IKAY5F6X.js → syntheticOutput-ZJGSU7OQ.js} +3 -3
  84. package/chunks/{task-create-MQICOJFV.js → task-create-EE6JEM7G.js} +7 -6
  85. package/chunks/{task-list-RIHJCH32.js → task-list-EESYAC65.js} +6 -5
  86. package/chunks/{task-stop-FWZRFANS.js → task-stop-XZVCFFYY.js} +2 -2
  87. package/chunks/{task-update-2LHPXOYM.js → task-update-EIO4HNE3.js} +7 -6
  88. package/chunks/{team-create-2E4PF4KN.js → team-create-R2H7Y3SG.js} +21 -20
  89. package/chunks/{team-delete-DAUDQS4J.js → team-delete-A7LXPGV7.js} +6 -5
  90. package/chunks/{todoWrite-HTUACZES.js → todoWrite-VRKSGAWM.js} +4 -4
  91. package/chunks/{tool-search-KTVULRES.js → tool-search-USSQMTMS.js} +8 -8
  92. package/chunks/{web-fetch-CZ7LLKPE.js → web-fetch-GHAZUA54.js} +4 -4
  93. package/chunks/{workflow-L2ZUUDT2.js → workflow-5LNNLNUR.js} +503 -49
  94. package/chunks/{write-file-ZEB2JDYH.js → write-file-2I7HP24C.js} +22 -21
  95. package/chunks/{zh-7H5OQC4I.js → zh-OIXDDQHB.js} +5 -2
  96. package/chunks/{zh-TW-P4IDHD3M.js → zh-TW-6YFNCKTA.js} +5 -2
  97. package/cli-entry.js +19 -0
  98. package/cli.js +6547 -4938
  99. package/locales/ca.js +20 -2
  100. package/locales/de.js +21 -2
  101. package/locales/en.js +7 -4
  102. package/locales/fr.js +22 -2
  103. package/locales/ja.js +22 -2
  104. package/locales/pt.js +21 -2
  105. package/locales/ru.js +20 -2
  106. package/locales/zh-TW.js +6 -4
  107. package/locales/zh.js +6 -4
  108. package/package.json +4 -3
  109. package/chunks/chunk-SKBPNJEW.js +0 -45
  110. package/chunks/computer-use-3RH2DOM6.js +0 -825
  111. package/chunks/cron-create-YJL3KFWI.js +0 -140
@@ -0,0 +1,2052 @@
1
+ // Force strict mode and setup for ESM
2
+ "use strict";
3
+ import {
4
+ Client,
5
+ StdioClientTransport,
6
+ extract
7
+ } from "./chunk-6T7Y7USE.js";
8
+ import "./chunk-K5PGHDBN.js";
9
+ import "./chunk-HQUWWSSP.js";
10
+ import "./chunk-O4PICXES.js";
11
+ import "./chunk-TW522KN6.js";
12
+ import "./chunk-BJ5HQ23U.js";
13
+ import "./chunk-SFRV6BGY.js";
14
+ import "./chunk-ZOFNJQNJ.js";
15
+ import "./chunk-WPTCDQN6.js";
16
+ import "./chunk-OMX7CUOE.js";
17
+ import "./chunk-MLZQVCF3.js";
18
+ import "./chunk-LD2XBG6Z.js";
19
+ import "./chunk-CPVI5J2L.js";
20
+ import "./chunk-ZMIBJS45.js";
21
+ import "./chunk-77WXWU44.js";
22
+ import "./chunk-B4ZF2KSI.js";
23
+ import {
24
+ safeJsonStringify
25
+ } from "./chunk-RON7LFNH.js";
26
+ import "./chunk-3PJXIDKI.js";
27
+ import "./chunk-UWCTAVOD.js";
28
+ import "./chunk-OFEVLU4C.js";
29
+ import "./chunk-IQHSD7K5.js";
30
+ import "./chunk-LYRSMKLS.js";
31
+ import "./chunk-QILTEBWS.js";
32
+ import {
33
+ BaseDeclarativeTool,
34
+ BaseToolInvocation
35
+ } from "./chunk-A2ZIEEGJ.js";
36
+ import "./chunk-IDYDPBBN.js";
37
+ import "./chunk-FIQECJTQ.js";
38
+ import "./chunk-64WXLC72.js";
39
+ import "./chunk-LXYWINWF.js";
40
+ import "./chunk-NNIYWQIS.js";
41
+ import "./chunk-LYSND7KR.js";
42
+ import "./chunk-55ZMG67I.js";
43
+ import "./chunk-H6BD2ELD.js";
44
+ import "./chunk-5IFG2VC4.js";
45
+ import "./chunk-HA2UEYZP.js";
46
+ import "./chunk-ZERZSAZL.js";
47
+ import "./chunk-QN5NZ3UQ.js";
48
+ import "./chunk-BR4QREVK.js";
49
+ import "./chunk-Z2Z3GUXZ.js";
50
+ import {
51
+ init_esbuild_shims
52
+ } from "./chunk-A4BMJM77.js";
53
+ import {
54
+ __name
55
+ } from "./chunk-J2S4EL5Y.js";
56
+
57
+ // packages/core/src/tools/computer-use/index.ts
58
+ init_esbuild_shims();
59
+
60
+ // packages/core/src/tools/computer-use/tool.ts
61
+ init_esbuild_shims();
62
+
63
+ // packages/core/src/tools/computer-use/client.ts
64
+ init_esbuild_shims();
65
+ import { homedir as homedir2 } from "node:os";
66
+
67
+ // packages/core/src/tools/computer-use/constants.ts
68
+ init_esbuild_shims();
69
+ import { join } from "node:path";
70
+ import { homedir } from "node:os";
71
+ var CUA_DRIVER_VERSION = "0.5.2";
72
+ var OSS_MIRROR_BASE = "https://qwen-code-assets.oss-cn-hangzhou.aliyuncs.com/computer-use";
73
+ var GITHUB_RELEASE_BASE = "https://github.com/trycua/cua/releases/download";
74
+ function resolveAssetTarget(platform = process.platform, arch = process.arch, version = CUA_DRIVER_VERSION) {
75
+ const v = version;
76
+ if (platform === "darwin") {
77
+ const slug = arch === "arm64" ? "darwin-arm64" : "darwin-x86_64";
78
+ const extractDir = `cua-driver-rs-${v}-${slug}`;
79
+ return {
80
+ asset: `${extractDir}.tar.gz`,
81
+ extractDir,
82
+ // Spawn the binary INSIDE CuaDriver.app, not the bare one beside it.
83
+ // cua-driver only triggers its TCC auto-relaunch (`open -a CuaDriver
84
+ // serve`, which attributes Accessibility/Screen-Recording grants to
85
+ // com.trycua.driver rather than the launching terminal) when its
86
+ // running image resolves into `/CuaDriver.app/Contents/MacOS/`
87
+ // (see bundle.rs `is_executable_inside_cuadriver_app`). Pointing at
88
+ // the bare `cua-driver` made TCC attribute to the parent terminal
89
+ // (e.g. iTerm) — wrong identity, per-terminal, oversized privacy.
90
+ binaryRelPath: "CuaDriver.app/Contents/MacOS/cua-driver",
91
+ hasApp: true
92
+ };
93
+ }
94
+ if (platform === "linux") {
95
+ if (arch !== "x64") {
96
+ throw new Error(
97
+ `Computer Use: unsupported Linux arch '${arch}' (only x64).`
98
+ );
99
+ }
100
+ return {
101
+ asset: `cua-driver-rs-${v}-linux-x86_64-binary.tar.gz`,
102
+ extractDir: ".",
103
+ binaryRelPath: "cua-driver",
104
+ hasApp: false
105
+ };
106
+ }
107
+ if (platform === "win32") {
108
+ const slug = arch === "arm64" ? "windows-arm64" : "windows-x86_64";
109
+ const extractDir = `cua-driver-rs-${v}-${slug}`;
110
+ return {
111
+ asset: `${extractDir}.zip`,
112
+ extractDir,
113
+ binaryRelPath: "cua-driver.exe",
114
+ hasApp: false
115
+ };
116
+ }
117
+ throw new Error(`Computer Use: unsupported platform '${platform}'.`);
118
+ }
119
+ __name(resolveAssetTarget, "resolveAssetTarget");
120
+ function resolveAssetUrls(asset, env = process.env, version = CUA_DRIVER_VERSION) {
121
+ const urls = [];
122
+ const override = env["QWEN_COMPUTER_USE_DOWNLOAD_HOST"];
123
+ if (override) {
124
+ urls.push(`${trimSlash(override)}/cua-driver-rs/v${version}/${asset}`);
125
+ }
126
+ urls.push(`${OSS_MIRROR_BASE}/cua-driver-rs/v${version}/${asset}`);
127
+ urls.push(`${GITHUB_RELEASE_BASE}/cua-driver-rs-v${version}/${asset}`);
128
+ return urls;
129
+ }
130
+ __name(resolveAssetUrls, "resolveAssetUrls");
131
+ function resolveChecksumUrls(env = process.env, version = CUA_DRIVER_VERSION) {
132
+ return resolveAssetUrls("checksums.txt", env, version);
133
+ }
134
+ __name(resolveChecksumUrls, "resolveChecksumUrls");
135
+ var MAX_IMAGE_DIMENSION_ENV = "QWEN_COMPUTER_USE_MAX_IMAGE_DIMENSION";
136
+ function coerceImageDimension(value) {
137
+ if (value === void 0) return void 0;
138
+ if (typeof value === "string" && value.trim() === "") return void 0;
139
+ const n = typeof value === "number" ? value : Number(value);
140
+ if (!Number.isInteger(n) || n < 0) return void 0;
141
+ return n;
142
+ }
143
+ __name(coerceImageDimension, "coerceImageDimension");
144
+ function resolveMaxImageDimension(settingValue, env = process.env) {
145
+ const fromEnv = coerceImageDimension(env[MAX_IMAGE_DIMENSION_ENV]);
146
+ if (fromEnv !== void 0) return fromEnv;
147
+ return coerceImageDimension(settingValue);
148
+ }
149
+ __name(resolveMaxImageDimension, "resolveMaxImageDimension");
150
+ function computerUseRoot(home = homedir()) {
151
+ return join(home, ".qwen", "computer-use");
152
+ }
153
+ __name(computerUseRoot, "computerUseRoot");
154
+ function versionDir(home = homedir(), version = CUA_DRIVER_VERSION) {
155
+ return join(computerUseRoot(home), `cua-driver-rs-${version}`);
156
+ }
157
+ __name(versionDir, "versionDir");
158
+ function binaryPath(home = homedir(), platform = process.platform, arch = process.arch, version = CUA_DRIVER_VERSION) {
159
+ const target = resolveAssetTarget(platform, arch, version);
160
+ return join(
161
+ versionDir(home, version),
162
+ target.extractDir,
163
+ target.binaryRelPath
164
+ );
165
+ }
166
+ __name(binaryPath, "binaryPath");
167
+ function approvalKey(version = CUA_DRIVER_VERSION) {
168
+ return `cua-driver-rs@${version}`;
169
+ }
170
+ __name(approvalKey, "approvalKey");
171
+ function trimSlash(s) {
172
+ return s.endsWith("/") ? s.slice(0, -1) : s;
173
+ }
174
+ __name(trimSlash, "trimSlash");
175
+
176
+ // packages/core/src/tools/computer-use/client.ts
177
+ var ComputerUseClient = class _ComputerUseClient {
178
+ static {
179
+ __name(this, "ComputerUseClient");
180
+ }
181
+ static singleton;
182
+ binary;
183
+ onProgress;
184
+ maxImageDimension;
185
+ client;
186
+ startPromise;
187
+ constructor(options) {
188
+ this.binary = options.binary;
189
+ this.onProgress = options.onProgress ?? (() => {
190
+ });
191
+ this.maxImageDimension = options.maxImageDimension;
192
+ }
193
+ /**
194
+ * Set the screenshot longest-edge cap applied on the next (re)connect via
195
+ * `set_config`. Cheap to call before every `start()`; the value is only
196
+ * pushed to cua-driver inside `doStart` (once per spawn, re-applied after a
197
+ * reconnect). `undefined` means "don't override".
198
+ */
199
+ setMaxImageDimension(value) {
200
+ this.maxImageDimension = value;
201
+ }
202
+ /**
203
+ * Shared singleton instance, created with default options on first
204
+ * access. Tests can replace it via `setSharedForTest()`.
205
+ *
206
+ * The binary path is derived from the pinned `CUA_DRIVER_VERSION` in
207
+ * constants.ts, the single source of truth the downloaded binary +
208
+ * generated `schemas.ts` agree on.
209
+ */
210
+ static shared() {
211
+ if (!_ComputerUseClient.singleton) {
212
+ _ComputerUseClient.singleton = new _ComputerUseClient({
213
+ binary: binaryPath(homedir2())
214
+ });
215
+ }
216
+ return _ComputerUseClient.singleton;
217
+ }
218
+ /** Test-only: replace the singleton. */
219
+ static setSharedForTest(replacement) {
220
+ _ComputerUseClient.singleton = replacement;
221
+ }
222
+ isStarted() {
223
+ return this.client !== void 0;
224
+ }
225
+ /**
226
+ * Start the upstream MCP server. Idempotent: concurrent callers share
227
+ * the same in-flight start promise.
228
+ *
229
+ * An optional `onProgress` callback can be supplied to receive download
230
+ * and startup messages during this call. It overrides the instance-level
231
+ * callback for the duration of the start operation only.
232
+ *
233
+ * Throws on spawn failure (binary missing / not executable, daemon
234
+ * launch failure, etc.). The caller (bootstrap state machine) is
235
+ * responsible for mapping the throw into user-facing UX.
236
+ */
237
+ async start(onProgress) {
238
+ if (this.client) return;
239
+ if (this.startPromise) return this.startPromise;
240
+ this.startPromise = this.doStart(onProgress).finally(() => {
241
+ this.startPromise = void 0;
242
+ });
243
+ return this.startPromise;
244
+ }
245
+ async doStart(onProgress) {
246
+ const progress = onProgress ?? this.onProgress;
247
+ progress("Starting Computer Use driver...");
248
+ const transport = new StdioClientTransport({
249
+ command: this.binary,
250
+ args: ["mcp"],
251
+ // Inherit env so HTTPS_PROXY / cua-driver config env flow through.
252
+ env: { ...process.env }
253
+ });
254
+ const client = new Client(
255
+ { name: "qwen-code-computer-use", version: "1.0.0" },
256
+ { capabilities: {} }
257
+ );
258
+ await client.connect(transport);
259
+ this.client = client;
260
+ await this.applyRuntimeConfig(client, progress);
261
+ }
262
+ /**
263
+ * Push session-level runtime config to a freshly connected daemon. Today
264
+ * that is just `max_image_dimension` (the screenshot longest-edge cap),
265
+ * applied via the `set_config` tool when an override is configured.
266
+ *
267
+ * Runs once per spawn — including after the reconnect in `callTool`, since a
268
+ * daemon restart resets runtime config to its persisted default. Best-effort:
269
+ * a failed `set_config` must NOT abort startup (the driver is still usable at
270
+ * its default dimension), so the error is surfaced via `progress` and
271
+ * swallowed. Calls the inner client directly to avoid recursing through
272
+ * `callTool`'s reconnect path.
273
+ */
274
+ async applyRuntimeConfig(client, progress) {
275
+ if (this.maxImageDimension === void 0) return;
276
+ try {
277
+ await client.callTool({
278
+ name: "set_config",
279
+ arguments: { max_image_dimension: this.maxImageDimension }
280
+ });
281
+ } catch (err) {
282
+ const msg = err instanceof Error ? err.message : String(err);
283
+ progress(
284
+ `Computer Use: could not apply max_image_dimension=${this.maxImageDimension} (${msg}); using driver default.`
285
+ );
286
+ }
287
+ }
288
+ /**
289
+ * List the tools exposed by the upstream server. Used by the schema
290
+ * sync script and bootstrap diagnostics.
291
+ */
292
+ async listTools() {
293
+ if (!this.client) throw new Error("ComputerUseClient not started");
294
+ return this.client.listTools();
295
+ }
296
+ /**
297
+ * Call a tool by upstream name (NOT the qwen-code-facing
298
+ * `computer_use__` prefixed name). Returns the raw MCP result so the
299
+ * caller can inspect `isError` and parse text content.
300
+ *
301
+ * On transport-closed errors (e.g. macOS kills the upstream binary after
302
+ * the user grants Screen Recording permission), this method transparently
303
+ * tears down the stale connection, reconnects, and retries the call once.
304
+ * If the retry also fails, the error is re-thrown without further
305
+ * reconnect attempts.
306
+ */
307
+ async callTool(name, args) {
308
+ if (!this.client) throw new Error("ComputerUseClient not started");
309
+ try {
310
+ return await this.client.callTool({
311
+ name,
312
+ arguments: args
313
+ });
314
+ } catch (err) {
315
+ if (!isTransportClosedError(err)) throw err;
316
+ let lastErr = err;
317
+ for (let attempt = 0; attempt < 3; attempt++) {
318
+ await this.stop();
319
+ await this.start();
320
+ if (!this.client) throw new Error("ComputerUseClient reconnect failed");
321
+ try {
322
+ return await this.client.callTool({
323
+ name,
324
+ arguments: args
325
+ });
326
+ } catch (retryErr) {
327
+ if (!isTransportClosedError(retryErr)) throw retryErr;
328
+ lastErr = retryErr;
329
+ await new Promise((r) => setTimeout(r, 1e3));
330
+ }
331
+ }
332
+ throw lastErr;
333
+ }
334
+ }
335
+ /** Tear down the child process. Safe to call multiple times. */
336
+ async stop() {
337
+ const client = this.client;
338
+ this.client = void 0;
339
+ if (client) {
340
+ try {
341
+ await client.close();
342
+ } catch {
343
+ }
344
+ }
345
+ }
346
+ };
347
+ function isTransportClosedError(err) {
348
+ const msg = err instanceof Error ? err.message : String(err);
349
+ return /connection closed|not connected|connection refused|daemon transport error|os error 61/i.test(
350
+ msg
351
+ );
352
+ }
353
+ __name(isTransportClosedError, "isTransportClosedError");
354
+
355
+ // packages/core/src/tools/computer-use/schemas.ts
356
+ init_esbuild_shims();
357
+ var COMPUTER_USE_TOOL_NAMES = [
358
+ "bring_to_front",
359
+ "check_for_update",
360
+ "check_permissions",
361
+ "click",
362
+ "double_click",
363
+ "drag",
364
+ "end_session",
365
+ "get_accessibility_tree",
366
+ "get_agent_cursor_state",
367
+ "get_config",
368
+ "get_cursor_position",
369
+ "get_recording_state",
370
+ "get_screen_size",
371
+ "get_window_state",
372
+ "hotkey",
373
+ "kill_app",
374
+ "launch_app",
375
+ "list_apps",
376
+ "list_windows",
377
+ "move_cursor",
378
+ "page",
379
+ "press_key",
380
+ "replay_trajectory",
381
+ "right_click",
382
+ "scroll",
383
+ "set_agent_cursor_enabled",
384
+ "set_agent_cursor_motion",
385
+ "set_agent_cursor_style",
386
+ "set_config",
387
+ "set_value",
388
+ "start_recording",
389
+ "start_session",
390
+ "stop_recording",
391
+ "type_text",
392
+ "zoom"
393
+ ];
394
+ var COMPUTER_USE_SCHEMAS = {
395
+ bring_to_front: {
396
+ description: 'Activate a window so subsequent input tools with `dispatch:"foreground"` land on it without a per-call SetForegroundWindow flash. **Windows-only:** on macOS this tool returns an error pointing to the platform-native `NSRunningApplication.activate` (which the macOS input tools don\'t need because CGEvent.postToPid reaches backgrounded windows). On Linux this tool also stubs out; use `wmctrl -a` or `xdotool windowactivate` if you need explicit activation.',
397
+ parameterSchema: {
398
+ additionalProperties: false,
399
+ properties: {
400
+ pid: {
401
+ type: "integer"
402
+ },
403
+ window_id: {
404
+ type: "integer"
405
+ }
406
+ },
407
+ required: ["pid"],
408
+ type: "object"
409
+ }
410
+ },
411
+ check_for_update: {
412
+ description: "Check whether a newer cua-driver-rs release is available on GitHub. Returns the current and latest versions, an `update_available` boolean, the install one-liner, and the release notes URL. Read-only \u2014 never installs. Mirror of `cua-driver check-update --json`.",
413
+ parameterSchema: {
414
+ additionalProperties: false,
415
+ properties: {},
416
+ type: "object"
417
+ }
418
+ },
419
+ check_permissions: {
420
+ description: "Report TCC permission status for Accessibility and Screen Recording. By default also raises the system permission dialogs for any missing grants \u2014 Apple's request APIs are no-ops when the grant is already active, so this is safe to call repeatedly. Pass {\"prompt\": false} for a purely read-only status check.\n\nReturns: `accessibility` + `screen_recording` (booleans from the TCC preflight APIs), `screen_recording_capturable` (a live ScreenCaptureKit probe \u2014 if it disagrees with `screen_recording`, the preflight grant belongs to a different process), and `source` (which TCC identity the booleans reflect: the CuaDriver daemon vs the launching terminal/IDE). macOS attributes grants to the responsible process, so a standalone call from a terminal reports the terminal's grants, not the driver's.",
421
+ parameterSchema: {
422
+ additionalProperties: false,
423
+ properties: {
424
+ prompt: {
425
+ description: "Raise the system permission prompts for missing grants. Default true.",
426
+ type: "boolean"
427
+ }
428
+ },
429
+ type: "object"
430
+ }
431
+ },
432
+ click: {
433
+ description: "Left-click against a target pid. **Prefer `element_index` over pixel coordinates** \u2014 element_index works on backgrounded / minimized / hidden / off-Space windows, surfaces a stable handle that survives rebuilds, and tells you what you're clicking via the cached element's role + label. Reach for `x, y` only when the target is a canvas / video / WebGL / custom-drawn surface that doesn't appear in the AX tree.\n\nTwo addressing modes:\n\n- element_index + window_id (from last get_window_state): AX action path. Works on backgrounded/hidden windows. No cursor move, no focus steal. element_index cache is scoped per (pid, window_id) and is replaced by the next snapshot of the same window \u2014 re-snapshot every turn before clicking.\n\n- x, y (window-local screenshot pixels, top-left origin of the PNG returned by get_window_state): CGEvent path. Synthesizes mouse events and posts to pid. Use modifier for cmd/shift/option/ctrl. Needs a visible on-screen window to anchor the conversion.\n\naction: press (default), show_menu, pick, confirm, cancel, open.\nfrom_zoom: set true after a zoom call to auto-translate zoom-image pixel coordinates to full-window space.",
434
+ parameterSchema: {
435
+ additionalProperties: false,
436
+ properties: {
437
+ action: {
438
+ description: "AX action: press, show_menu, pick, confirm, cancel, open.",
439
+ type: "string"
440
+ },
441
+ count: {
442
+ description: "Click count (pixel path only). Default 1.",
443
+ type: "integer"
444
+ },
445
+ debug_image_out: {
446
+ description: "Optional file path. When set on a pixel-addressed click, captures a fresh screenshot, draws a red crosshair at (x, y), and writes the PNG. Use to verify coordinate spaces. Requires window_id; incompatible with from_zoom.",
447
+ type: "string"
448
+ },
449
+ element_index: {
450
+ description: "Element index from last get_window_state.",
451
+ type: "integer"
452
+ },
453
+ from_zoom: {
454
+ description: "When true, x and y are in the last zoom image for this pid; driver translates back to full-window coordinates.",
455
+ type: "boolean"
456
+ },
457
+ modifier: {
458
+ description: "Modifier keys: cmd, shift, option/alt, ctrl.",
459
+ items: {
460
+ type: "string"
461
+ },
462
+ type: "array"
463
+ },
464
+ pid: {
465
+ description: "Target process ID.",
466
+ type: "integer"
467
+ },
468
+ session: {
469
+ description: "Optional session id: declares/uses the agent cursor and per-session state for this run. The same id works over MCP, the CLI, or the raw socket, and follows the run across apps/windows. Omit to run cursor-less.",
470
+ type: "string"
471
+ },
472
+ window_id: {
473
+ description: "Target window ID. Required for element_index.",
474
+ type: "integer"
475
+ },
476
+ x: {
477
+ description: "Window-local screenshot X coordinate.",
478
+ type: "number"
479
+ },
480
+ y: {
481
+ description: "Window-local screenshot Y coordinate.",
482
+ type: "number"
483
+ }
484
+ },
485
+ required: ["pid"],
486
+ type: "object"
487
+ }
488
+ },
489
+ double_click: {
490
+ description: "Double-click at (x, y) or on an AX element identified by element_index + window_id.\n\nAX path (element_index provided): performs `AXOpen` when the element advertises it (Finder items, openable list rows/cells); otherwise resolves the element's on-screen center and falls back to a pixel double-click there.\n\nPixel path (x, y provided): two down/up pairs ~80 ms apart at the given coordinates.",
491
+ parameterSchema: {
492
+ additionalProperties: false,
493
+ properties: {
494
+ element_index: {
495
+ description: "Element index from last get_window_state. Uses AX path.",
496
+ type: "integer"
497
+ },
498
+ pid: {
499
+ type: "integer"
500
+ },
501
+ session: {
502
+ description: "Optional session id: declares/uses the agent cursor and per-session state for this run. The same id works over MCP, the CLI, or the raw socket, and follows the run across apps/windows. Omit to run cursor-less.",
503
+ type: "string"
504
+ },
505
+ window_id: {
506
+ description: "CGWindowID. Required when element_index is used.",
507
+ type: "integer"
508
+ },
509
+ x: {
510
+ description: "Screen X coordinate (pixel path).",
511
+ type: "number"
512
+ },
513
+ y: {
514
+ description: "Screen Y coordinate (pixel path).",
515
+ type: "number"
516
+ }
517
+ },
518
+ required: ["pid"],
519
+ type: "object"
520
+ }
521
+ },
522
+ drag: {
523
+ description: "Press-drag-release gesture from (from_x, from_y) to (to_x, to_y) in window-local screenshot pixels \u2014 the same space get_window_state returns. Top-left origin of the target's window.\n\nUse for: marquee/lasso selection, drag-and-drop, resizing via a handle, scrubbing a slider, repositioning a panel.\n\n`duration_ms` (default 500) is the wall-clock budget for the path between mouse-down and mouse-up; `steps` (default 20) is the number of intermediate mouseDragged events linearly interpolated along the path. Increase both for slower, more human drags; decrease for snap gestures.\n\n`modifier` keys (cmd/shift/option/ctrl) are held across the entire gesture.\n\nWhen `from_zoom` is true, coordinates are in the last zoom image for this pid; the driver maps them back to window coordinates before dispatching.",
524
+ parameterSchema: {
525
+ additionalProperties: false,
526
+ properties: {
527
+ button: {
528
+ description: "Mouse button used for the drag. Default: left.",
529
+ enum: ["left", "right", "middle"],
530
+ type: "string"
531
+ },
532
+ duration_ms: {
533
+ description: "Wall-clock duration of the drag path between mouseDown and mouseUp. Default: 500.",
534
+ maximum: 1e4,
535
+ minimum: 0,
536
+ type: "integer"
537
+ },
538
+ from_x: {
539
+ description: "Drag-start X in window-local screenshot pixels. Top-left origin.",
540
+ type: "number"
541
+ },
542
+ from_y: {
543
+ description: "Drag-start Y in window-local screenshot pixels. Top-left origin.",
544
+ type: "number"
545
+ },
546
+ from_zoom: {
547
+ description: "When true, coordinates are in the last zoom image for this pid; driver maps back to window coordinates.",
548
+ type: "boolean"
549
+ },
550
+ modifier: {
551
+ description: "Modifier keys held across the entire gesture: cmd/shift/option/ctrl.",
552
+ items: {
553
+ type: "string"
554
+ },
555
+ type: "array"
556
+ },
557
+ pid: {
558
+ description: "Target process ID.",
559
+ type: "integer"
560
+ },
561
+ session: {
562
+ description: "Optional session id: declares/uses the agent cursor and per-session state for this run. The same id works over MCP, the CLI, or the raw socket, and follows the run across apps/windows. Omit to run cursor-less.",
563
+ type: "string"
564
+ },
565
+ steps: {
566
+ description: "Number of intermediate mouseDragged events linearly interpolated along the path. Default: 20.",
567
+ maximum: 200,
568
+ minimum: 1,
569
+ type: "integer"
570
+ },
571
+ to_x: {
572
+ description: "Drag-end X in window-local screenshot pixels.",
573
+ type: "number"
574
+ },
575
+ to_y: {
576
+ description: "Drag-end Y in window-local screenshot pixels.",
577
+ type: "number"
578
+ },
579
+ window_id: {
580
+ description: "CGWindowID for the window the pixel coordinates were measured against. Optional \u2014 when omitted the driver picks the frontmost window of pid.",
581
+ type: "integer"
582
+ }
583
+ },
584
+ required: ["pid", "from_x", "from_y", "to_x", "to_y"],
585
+ type: "object"
586
+ }
587
+ },
588
+ end_session: {
589
+ description: "End a session declared with `start_session`: removes its agent cursor, stops any recording it owns, and clears its per-session config. Call this when a run finishes so its cursor doesn't linger (otherwise the idle-TTL reclaims it after a period of inactivity). Idempotent.",
590
+ parameterSchema: {
591
+ additionalProperties: true,
592
+ properties: {
593
+ session: {
594
+ description: "The session id to end.",
595
+ type: "string"
596
+ }
597
+ },
598
+ required: ["session"],
599
+ type: "object"
600
+ }
601
+ },
602
+ get_accessibility_tree: {
603
+ description: "Return a lightweight snapshot of the desktop: running regular apps and on-screen visible windows with their bounds, z-order, and owner pid.\n\nFor the full AX subtree of a single window (with interactive element indices you can click by), use `get_window_state` instead \u2014 that's the heavy per-window tool. This one is a fast discovery read that needs no TCC grants.",
604
+ parameterSchema: {
605
+ additionalProperties: false,
606
+ properties: {},
607
+ type: "object"
608
+ }
609
+ },
610
+ get_agent_cursor_state: {
611
+ description: "Return the current state of THIS session's agent cursor: position, config (color, icon, label, size, opacity), enabled flag. Pass cursor_id to inspect a specific instance.",
612
+ parameterSchema: {
613
+ additionalProperties: false,
614
+ properties: {
615
+ cursor_id: {
616
+ description: "Cursor instance. Default: this session's cursor.",
617
+ type: "string"
618
+ }
619
+ },
620
+ type: "object"
621
+ }
622
+ },
623
+ get_config: {
624
+ description: "Return the current cua-driver-rs configuration.",
625
+ parameterSchema: {
626
+ additionalProperties: false,
627
+ properties: {},
628
+ type: "object"
629
+ }
630
+ },
631
+ get_cursor_position: {
632
+ description: "Return the current mouse cursor position in screen points (origin top-left).",
633
+ parameterSchema: {
634
+ additionalProperties: false,
635
+ properties: {},
636
+ type: "object"
637
+ }
638
+ },
639
+ get_recording_state: {
640
+ description: "Report the current trajectory recorder state: whether recording is enabled, the output directory (when enabled), and the 1-based counter for the next turn folder that will be written. Counter increments on every recorded action tool call and resets to 1 each time recording is (re-)enabled.\n\nPure read-only.",
641
+ parameterSchema: {
642
+ additionalProperties: false,
643
+ properties: {},
644
+ type: "object"
645
+ }
646
+ },
647
+ get_screen_size: {
648
+ description: "Return the logical size of the main display in points plus its backing scale factor. Agents click in points; Retina displays have scale_factor 2.0. Requires no TCC permissions.",
649
+ parameterSchema: {
650
+ additionalProperties: false,
651
+ properties: {},
652
+ type: "object"
653
+ }
654
+ },
655
+ get_window_state: {
656
+ description: "Walk a running app's AX tree and return a Markdown rendering of its UI, tagging every actionable element with [element_index N]. Pass those indices to click, type_text, press_key, etc.\n\nINVARIANT: call get_window_state once per turn per (pid, window_id) before any element-indexed action. The index map is replaced by the next snapshot.\n\nAlso captures a PNG screenshot of the specified window.\n\nOptional `query` filters the tree_markdown to matching lines plus their ancestor chain (case-insensitive substring). The element_index values are unchanged \u2014 filtering only trims the rendered Markdown.",
657
+ parameterSchema: {
658
+ additionalProperties: false,
659
+ properties: {
660
+ capture_mode: {
661
+ description: "som=AX+screenshot (default), vision=screenshot only (no AX walk), ax=AX only (no screenshot).",
662
+ enum: ["som", "vision", "ax"],
663
+ type: "string"
664
+ },
665
+ pid: {
666
+ description: "Target process ID.",
667
+ type: "integer"
668
+ },
669
+ query: {
670
+ description: "Case-insensitive filter for tree_markdown.",
671
+ type: "string"
672
+ },
673
+ screenshot_out_file: {
674
+ description: "When set, write the PNG to this file path (~ expanded) instead of embedding base64 in the response. The structured output will contain screenshot_file_path instead.",
675
+ type: "string"
676
+ },
677
+ session: {
678
+ description: "Optional session id: declares/uses the agent cursor and per-session state for this run. The same id works over MCP, the CLI, or the raw socket, and follows the run across apps/windows. Omit to run cursor-less.",
679
+ type: "string"
680
+ },
681
+ window_id: {
682
+ description: "Target window ID from list_windows.",
683
+ type: "integer"
684
+ }
685
+ },
686
+ required: ["pid", "window_id"],
687
+ type: "object"
688
+ }
689
+ },
690
+ hotkey: {
691
+ description: 'Press a combination of keys simultaneously \u2014 e.g. `["cmd", "c"]` for Copy, `["cmd", "shift", "4"]` for screenshot selection. The combo is posted directly to the target pid\'s event queue; the target does NOT need to be frontmost.\n\nTwo delivery paths:\n\u2022 Default (no window_id): auth-message envelope \u2014 Chromium/Electron apps accept the keystrokes as trusted live input on macOS 14+.\n\u2022 With window_id: NSMenu path \u2014 briefly activates the target WindowServer-frontmost via SLPSSetFrontProcessWithOptions (kCPSNoWindows, < 1 ms), posts WITHOUT the auth envelope so IOHIDPostEvent fires and NSApplication.sendEvent: dispatches NSMenu key equivalents (e.g. Cmd+Z undo, Cmd+W close). Restores prior frontmost immediately. Use this path when you need native menu-bar actions on non-Chromium apps.\n\nRecognized modifiers: cmd/command, shift, option/alt, ctrl/control, fn. Non-modifier keys use the same vocabulary as `press_key` (return, tab, escape, up/down/left/right, space, delete, home, end, pageup, pagedown, f1-f12, letters, digits). Order: modifiers first, one non-modifier last.',
692
+ parameterSchema: {
693
+ additionalProperties: false,
694
+ properties: {
695
+ keys: {
696
+ description: 'Modifier(s) and one non-modifier key, e.g. ["cmd", "c"].',
697
+ items: {
698
+ type: "string"
699
+ },
700
+ minItems: 2,
701
+ type: "array"
702
+ },
703
+ pid: {
704
+ description: "Target process ID.",
705
+ type: "integer"
706
+ },
707
+ session: {
708
+ description: "Optional session id: declares/uses the agent cursor and per-session state for this run. The same id works over MCP, the CLI, or the raw socket, and follows the run across apps/windows. Omit to run cursor-less.",
709
+ type: "string"
710
+ },
711
+ window_id: {
712
+ description: "When set, uses NSMenu path: briefly activates the window for menu key dispatch, then restores prior frontmost.",
713
+ type: "integer"
714
+ }
715
+ },
716
+ required: ["pid", "keys"],
717
+ type: "object"
718
+ }
719
+ },
720
+ kill_app: {
721
+ description: "Force-terminate a process by pid (kill -9 equivalent on macOS / Linux; taskkill /F equivalent on Windows). Use as escalation when the cooperative close path (hotkey cmd+q on macOS, click-the-X on Windows) failed to make the process exit. Unsaved state is lost \u2014 prefer the cooperative path first.",
722
+ parameterSchema: {
723
+ additionalProperties: false,
724
+ properties: {
725
+ pid: {
726
+ description: "PID of the process to terminate.",
727
+ type: "integer"
728
+ }
729
+ },
730
+ required: ["pid"],
731
+ type: "object"
732
+ }
733
+ },
734
+ launch_app: {
735
+ description: "Launch a macOS app in the background \u2014 the target does NOT come to the foreground.\n\nProvide either `bundle_id` (preferred \u2014 unambiguous, e.g. `com.apple.calculator`) or `name` (e.g. \"Calculator\"). If both are given, bundle_id wins.\n\nOptional `urls` are handed to the app as open targets \u2014 for Finder, pass a folder path to open a backgrounded Finder window there.\n\nOptional `electron_debugging_port`: opens a Chrome DevTools Protocol (CDP) server on the specified port (appends --remote-debugging-port=N to the app's argv). Use this to automate Electron/VS Code/Cursor via CDP.\n\nOptional `webkit_inspector_port`: opens a WebKit inspector server on the specified port (sets WEBKIT_INSPECTOR_SERVER=127.0.0.1:N + TAURI_WEBVIEW_AUTOMATION=1). Use this for Tauri/WebKit-based apps.\n\nOptional `creates_new_application_instance`: when true, forces a new app instance even if one is already running (passes -n to open). Reach for this when another agent or session may drive the SAME app concurrently \u2014 it returns a fresh pid + window so each session acts on its own isolated window instead of clobbering one shared instance. Without it, single-instance apps (Calculator, many utilities) hand every caller the same window, so two sessions fight over it.\n\nOptional `additional_arguments`: extra argv strings appended after --args.\n\nReturns the launched app's pid, bundle_id, name, and a `windows` array (same shape as `list_windows`) so callers can skip an extra round-trip before `get_window_state(pid, window_id)`. When the focus-steal belt-and-braces demotion check ran (target pid \u2260 prior frontmost), the response also includes `self_activation_suppressed: bool` \u2014 true if focus stayed with the prior frontmost, false if the launched app held focus despite the re-demote attempt.",
736
+ parameterSchema: {
737
+ additionalProperties: false,
738
+ properties: {
739
+ additional_arguments: {
740
+ description: "Extra arguments appended after --args when launching.",
741
+ items: {
742
+ type: "string"
743
+ },
744
+ type: "array"
745
+ },
746
+ bundle_id: {
747
+ description: "App bundle identifier, e.g. com.apple.calculator. Preferred over name.",
748
+ type: "string"
749
+ },
750
+ creates_new_application_instance: {
751
+ description: "When true, force a new app instance even if already running (open -n). Use for concurrent multi-agent/multi-session work so each session gets an isolated instance + window instead of sharing one \u2014 on single-instance apps (e.g. Calculator) every caller otherwise gets the same window and the sessions clobber each other.",
752
+ type: "boolean"
753
+ },
754
+ electron_debugging_port: {
755
+ description: "Open a Chrome DevTools Protocol server on this port (appends --remote-debugging-port=N).",
756
+ type: "integer"
757
+ },
758
+ name: {
759
+ description: "App display name. Used only when bundle_id is absent.",
760
+ type: "string"
761
+ },
762
+ urls: {
763
+ description: "Optional file paths or URLs to open with the app (e.g. a folder path for Finder).",
764
+ items: {
765
+ type: "string"
766
+ },
767
+ type: "array"
768
+ },
769
+ webkit_inspector_port: {
770
+ description: "Open a WebKit inspector server on this port (sets WEBKIT_INSPECTOR_SERVER env var).",
771
+ type: "integer"
772
+ }
773
+ },
774
+ type: "object"
775
+ }
776
+ },
777
+ list_apps: {
778
+ description: 'List macOS apps \u2014 both currently running and installed-but-not-running \u2014 with per-app state flags:\n\n- running: is a process for this app live? (pid is 0 when false)\n- active: is it the system-frontmost app? (implies running)\n- launch_path: filesystem path to the `.app` bundle, when known. Pass this to `launch_app` to start the app cold.\n- kind: `"desktop"` for `.app` bundles on macOS.\n- last_used: RFC3339 timestamp from the bundle\'s filesystem mtime, when readable; otherwise null.\n\nOnly apps with NSApplicationActivationPolicyRegular are included \u2014 background helpers and system UI agents are filtered out. Installed apps come from scanning /Applications, /Applications/Utilities, ~/Applications, /System/Applications, and /System/Applications/Utilities.\n\nUse this for "is X installed?" as well as "is X running?". For per-window state \u2014 on-screen, on-current-Space, minimized, window titles \u2014 call list_windows instead. For just opening an app \u2014 running or not \u2014 call launch_app({bundle_id: ...}) directly; list_apps is not a prerequisite.',
779
+ parameterSchema: {
780
+ additionalProperties: false,
781
+ properties: {},
782
+ type: "object"
783
+ }
784
+ },
785
+ list_windows: {
786
+ description: "List all layer-0 top-level windows currently known to WindowServer. Includes off-screen windows (minimized, on another Space, hidden-launched). Use this to find a window_id before calling get_window_state.\n\nPer-record fields: window_id, pid, app_name, title, bounds (x/y/width/height, top-left origin), z_index (higher = frontmost), is_on_screen, on_current_space.",
787
+ parameterSchema: {
788
+ additionalProperties: false,
789
+ properties: {
790
+ on_screen_only: {
791
+ description: "When true, drop windows not on the current Space. Default false.",
792
+ type: "boolean"
793
+ },
794
+ pid: {
795
+ description: "Optional pid filter. When set, only this pid's windows are returned.",
796
+ type: "integer"
797
+ }
798
+ },
799
+ type: "object"
800
+ }
801
+ },
802
+ move_cursor: {
803
+ description: "Move the agent cursor overlay to (x, y). Does NOT move the real mouse cursor \u2014 the user's cursor stays where it is. Useful for showing the agent's attention without interrupting the user.",
804
+ parameterSchema: {
805
+ additionalProperties: false,
806
+ properties: {
807
+ cursor_id: {
808
+ description: "Cursor instance to move. Default: 'default'.",
809
+ type: "string"
810
+ },
811
+ session: {
812
+ description: "Optional session id: declares/uses the agent cursor and per-session state for this run. The same id works over MCP, the CLI, or the raw socket, and follows the run across apps/windows. Omit to run cursor-less.",
813
+ type: "string"
814
+ },
815
+ x: {
816
+ type: "number"
817
+ },
818
+ y: {
819
+ type: "number"
820
+ }
821
+ },
822
+ required: ["x", "y"],
823
+ type: "object"
824
+ }
825
+ },
826
+ page: {
827
+ description: "Interact with the browser page loaded in a running app. Supports Chrome, Brave, Edge, Safari (via AppleScript on macOS), Electron apps (via CDP), Chromium/Firefox on Windows (via UIA for read; CDP for execute_javascript when --remote-debugging-port is set), and WKWebView/Tauri/AT-SPI fallbacks.\n\nActions:\n- execute_javascript: Run JS and return the result.\n- get_text: Extract visible text from the page.\n- query_dom: Find elements matching a CSS selector.\n- click_element: Click a CSS-selected element AND animate the agent cursor to its on-screen center first (so the user sees what the agent is doing). Prefer over `execute_javascript('el.click()')` whenever you want visible cursor feedback.\n- enable_javascript_apple_events: macOS-only \u2014 patch the browser's Preferences to allow JS from Apple Events (Chrome/Brave/Edge, requires user confirmation and a browser restart).",
828
+ parameterSchema: {
829
+ additionalProperties: false,
830
+ properties: {
831
+ action: {
832
+ description: "Action to perform.",
833
+ enum: [
834
+ "execute_javascript",
835
+ "get_text",
836
+ "query_dom",
837
+ "click_element",
838
+ "enable_javascript_apple_events"
839
+ ],
840
+ type: "string"
841
+ },
842
+ attributes: {
843
+ description: "Element attributes to include in query_dom results.",
844
+ items: {
845
+ type: "string"
846
+ },
847
+ type: "array"
848
+ },
849
+ bundle_id: {
850
+ description: "Bundle ID of the browser. Required for enable_javascript_apple_events (macOS only).",
851
+ type: "string"
852
+ },
853
+ css_selector: {
854
+ description: "CSS selector for query_dom (e.g. 'a', 'button', 'input', 'h1'-'h6', 'p', 'img', 'select', '*').",
855
+ type: "string"
856
+ },
857
+ javascript: {
858
+ description: "JavaScript to execute. Required for execute_javascript.",
859
+ type: "string"
860
+ },
861
+ pid: {
862
+ description: "Target process ID.",
863
+ type: "integer"
864
+ },
865
+ selector: {
866
+ description: "CSS selector for click_element (e.g. 'button.submit', '#login a').",
867
+ type: "string"
868
+ },
869
+ user_has_confirmed_enabling: {
870
+ description: "Must be true to proceed with enable_javascript_apple_events. This will quit and relaunch the browser.",
871
+ type: "boolean"
872
+ },
873
+ window_id: {
874
+ description: "Target window ID from list_windows.",
875
+ type: "integer"
876
+ }
877
+ },
878
+ required: ["action"],
879
+ type: "object"
880
+ }
881
+ },
882
+ press_key: {
883
+ description: "Press and release a single key, delivered to the target pid via CGEventPostToPid. No focus steal.\n\nTwo delivery paths:\n\u2022 window_id + element_index: focuses the AX element first, then posts via the auth-message path (Chromium-safe).\n\u2022 window_id only (no element_index): NSMenu path \u2014 briefly activates the window WindowServer-frontmost via SLPSSetFrontProcessWithOptions (kCPSNoWindows, < 1 ms), posts WITHOUT the auth envelope so IOHIDPostEvent fires and NSApplication.sendEvent: dispatches NSMenu key equivalents. Restores prior frontmost immediately.\n\u2022 No window_id: standard auth-message path.\n\nKey names: return, tab, escape, up/down/left/right, space, delete, home, end, pageup, pagedown, f1-f12, plus any letter or digit.\nModifiers array: cmd, shift, option/alt, ctrl, fn.",
884
+ parameterSchema: {
885
+ additionalProperties: false,
886
+ properties: {
887
+ element_index: {
888
+ type: "integer"
889
+ },
890
+ key: {
891
+ description: "Key name: return, tab, escape, up, down, etc.",
892
+ type: "string"
893
+ },
894
+ modifiers: {
895
+ description: "Modifier keys: cmd, shift, option/alt, ctrl, fn.",
896
+ items: {
897
+ type: "string"
898
+ },
899
+ type: "array"
900
+ },
901
+ pid: {
902
+ type: "integer"
903
+ },
904
+ session: {
905
+ description: "Optional session id: declares/uses the agent cursor and per-session state for this run. The same id works over MCP, the CLI, or the raw socket, and follows the run across apps/windows. Omit to run cursor-less.",
906
+ type: "string"
907
+ },
908
+ window_id: {
909
+ type: "integer"
910
+ }
911
+ },
912
+ required: ["pid", "key"],
913
+ type: "object"
914
+ }
915
+ },
916
+ replay_trajectory: {
917
+ description: "Replay a recorded trajectory by re-invoking every turn's tool call in lexical order. `dir` must point at a directory previously written by `start_recording`. Each `turn-NNNNN/` is parsed for `action.json`, and the recorded tool is called with its recorded `arguments` via the same dispatch path an MCP / CLI call uses.\n\nCaveats:\n- Element-indexed actions (`click({pid, element_index})` etc.) will fail because element indices are per-snapshot and don't survive across sessions. Pixel clicks (`click({pid, x, y})`) and all keyboard tools replay cleanly. Failures are reported but don't stop replay unless `stop_on_error` is true.\n- `get_window_state` and other read-only tools are NOT currently recorded, so replays do not re-populate the per-(pid, window_id) element cache.\n- If recording is ENABLED while replay runs, the replay itself is recorded into the currently configured output directory. That's deliberate: recording a replay against a new build and diffing the two trajectories is the regression-test workflow.",
918
+ parameterSchema: {
919
+ additionalProperties: false,
920
+ properties: {
921
+ delay_ms: {
922
+ description: "Milliseconds to sleep between turns, for human-observable pacing. Default 500.",
923
+ maximum: 1e4,
924
+ minimum: 0,
925
+ type: "integer"
926
+ },
927
+ dir: {
928
+ description: "Trajectory directory previously written by `set_recording`. Absolute or ~-rooted.",
929
+ type: "string"
930
+ },
931
+ stop_on_error: {
932
+ description: "Stop replay on the first tool-call error. Default true \u2014 set false to best-effort through the full trajectory.",
933
+ type: "boolean"
934
+ }
935
+ },
936
+ required: ["dir"],
937
+ type: "object"
938
+ }
939
+ },
940
+ right_click: {
941
+ description: "Right-click against a target pid. Two addressing modes:\n\n- `element_index` + `window_id` (from the last `get_window_state` snapshot) \u2014 performs `AXShowMenu` on the cached element. Pure AX RPC, works on backgrounded / hidden windows, no cursor move or focus steal. Requires a prior `get_window_state(pid, window_id)` in this turn.\n\n- `x`, `y` \u2014 synthesizes `rightMouseDown` / `rightMouseUp` CGEvent pair posted to the pid. Driver converts image-pixel \u2192 screen-point internally. `modifier` forces the CGEvent path (AX actions don't propagate modifier keys).\n\nExactly one of `element_index` or (`x` AND `y`) must be provided. `pid` always required. `window_id` required when `element_index` is used.",
942
+ parameterSchema: {
943
+ additionalProperties: false,
944
+ properties: {
945
+ element_index: {
946
+ description: "Element index from last get_window_state. Routes through AXShowMenu. Requires window_id.",
947
+ type: "integer"
948
+ },
949
+ modifier: {
950
+ description: "Modifier keys held during the right-click: cmd/shift/option/ctrl. Pixel path only.",
951
+ items: {
952
+ type: "string"
953
+ },
954
+ type: "array"
955
+ },
956
+ pid: {
957
+ description: "Target process ID.",
958
+ type: "integer"
959
+ },
960
+ session: {
961
+ description: "Optional session id: declares/uses the agent cursor and per-session state for this run. The same id works over MCP, the CLI, or the raw socket, and follows the run across apps/windows. Omit to run cursor-less.",
962
+ type: "string"
963
+ },
964
+ window_id: {
965
+ description: "CGWindowID. Required when element_index is used.",
966
+ type: "integer"
967
+ },
968
+ x: {
969
+ description: "X in window-local screenshot pixels. Must be provided together with y.",
970
+ type: "number"
971
+ },
972
+ y: {
973
+ description: "Y in window-local screenshot pixels. Must be provided together with x.",
974
+ type: "number"
975
+ }
976
+ },
977
+ required: ["pid"],
978
+ type: "object"
979
+ }
980
+ },
981
+ scroll: {
982
+ description: "Scroll the target pid's focused region by synthesized keystrokes.\n\nMapping: by='page' \u2192 PageDown/PageUp \xD7 amount; by='line' \u2192 DownArrow/UpArrow \xD7 amount. Horizontal variants use Left/Right arrow keys.\n\nOptional element_index + window_id pre-focuses the element before scrolling.",
983
+ parameterSchema: {
984
+ additionalProperties: false,
985
+ properties: {
986
+ amount: {
987
+ description: "Number of keystroke repetitions. Default: 3.",
988
+ maximum: 50,
989
+ minimum: 1,
990
+ type: "integer"
991
+ },
992
+ by: {
993
+ description: "Scroll granularity. Default: line.",
994
+ enum: ["line", "page"],
995
+ type: "string"
996
+ },
997
+ direction: {
998
+ description: "Scroll direction.",
999
+ enum: ["up", "down", "left", "right"],
1000
+ type: "string"
1001
+ },
1002
+ element_index: {
1003
+ type: "integer"
1004
+ },
1005
+ pid: {
1006
+ type: "integer"
1007
+ },
1008
+ session: {
1009
+ description: "Optional session id: declares/uses the agent cursor and per-session state for this run. The same id works over MCP, the CLI, or the raw socket, and follows the run across apps/windows. Omit to run cursor-less.",
1010
+ type: "string"
1011
+ },
1012
+ window_id: {
1013
+ type: "integer"
1014
+ }
1015
+ },
1016
+ required: ["pid", "direction"],
1017
+ type: "object"
1018
+ }
1019
+ },
1020
+ set_agent_cursor_enabled: {
1021
+ description: "Show or hide the agent cursor for a session. A cursor exists only for a DECLARED session: pass `session` (the same id you start_session / drive actions with) and the cursor appears on that session's first action \u2014 its color is derived from the id. Without a `session`, actions run cursor-less. Use enabled=false to hide a session's cursor, enabled=true to re-show it. (`cursor_id` is a legacy alias for `session`.)",
1022
+ parameterSchema: {
1023
+ additionalProperties: false,
1024
+ properties: {
1025
+ cursor_id: {
1026
+ description: "Cursor instance. Default: 'default'.",
1027
+ type: "string"
1028
+ },
1029
+ enabled: {
1030
+ description: "true = show, false = hide.",
1031
+ type: "boolean"
1032
+ }
1033
+ },
1034
+ required: ["enabled"],
1035
+ type: "object"
1036
+ }
1037
+ },
1038
+ set_agent_cursor_motion: {
1039
+ description: "Configure the visual appearance and motion curve of an agent cursor instance.\n\nAppearance (multi-cursor customization):\n- cursor_id: instance name (default='default')\n- cursor_icon: built-in ('arrow','crosshair','hand','dot') or PNG/SVG file path\n- cursor_color: hex color e.g. '#00FFFF' or CSS name\n- cursor_label: short text shown near the cursor\n- cursor_size: dot radius in points (default=16)\n- cursor_opacity: 0.0\u20131.0 (default=0.85)\n\nMotion curve (Bezier path shape):\n- start_handle: departure control-point fraction [0,1]. Default 0.3\n- end_handle: arrival control-point fraction [0,1]. Default 0.3\n- arc_size: perpendicular deflection as fraction of path length [0,1]. Default 0.25\n- arc_flow: asymmetry [-1,1]; positive bulges toward destination. Default 0.0\n- spring: settle damping [0.3,1.0]; 1.0=no overshoot. Default 0.72\n- glide_duration_ms: fixed flight duration per move [50,5000]; omit for speed-based (the default)\n- dwell_after_click_ms: pause after click ripple [0,5000]. Default 80\n- idle_hide_ms: auto-hide delay [0,60000]; 0=never. Default 20000",
1040
+ parameterSchema: {
1041
+ additionalProperties: false,
1042
+ properties: {
1043
+ arc_flow: {
1044
+ description: "Asymmetry bias in [-1, 1]. Default 0.0.",
1045
+ type: "number"
1046
+ },
1047
+ arc_size: {
1048
+ description: "Arc deflection as fraction of path length [0, 1]. Default 0.25.",
1049
+ type: "number"
1050
+ },
1051
+ cursor_color: {
1052
+ description: "Hex color (e.g. '#00FFFF') or CSS color name.",
1053
+ type: "string"
1054
+ },
1055
+ cursor_icon: {
1056
+ description: "Built-in icon name or file path to PNG/SVG.",
1057
+ type: "string"
1058
+ },
1059
+ cursor_id: {
1060
+ description: "Cursor instance name. Default: 'default'.",
1061
+ type: "string"
1062
+ },
1063
+ cursor_label: {
1064
+ description: "Short label near the cursor dot.",
1065
+ type: "string"
1066
+ },
1067
+ cursor_opacity: {
1068
+ description: "Opacity 0.0\u20131.0. Default: 0.85.",
1069
+ type: "number"
1070
+ },
1071
+ cursor_size: {
1072
+ description: "Dot radius in points. Default: 16.",
1073
+ type: "number"
1074
+ },
1075
+ dwell_after_click_ms: {
1076
+ description: "Pause after click ripple in ms. Default 80.",
1077
+ maximum: 5e3,
1078
+ minimum: 0,
1079
+ type: "number"
1080
+ },
1081
+ end_handle: {
1082
+ description: "End-handle fraction in [0, 1]. Default 0.3.",
1083
+ type: "number"
1084
+ },
1085
+ glide_duration_ms: {
1086
+ description: "Fixed flight duration per move in ms; omit for speed-based timing (the default).",
1087
+ maximum: 5e3,
1088
+ minimum: 50,
1089
+ type: "number"
1090
+ },
1091
+ idle_hide_ms: {
1092
+ description: "Auto-hide delay in ms. 0 = never hide. Default 20000.",
1093
+ maximum: 6e4,
1094
+ minimum: 0,
1095
+ type: "number"
1096
+ },
1097
+ spring: {
1098
+ description: "Settle damping in [0.3, 1.0]. Default 0.72.",
1099
+ type: "number"
1100
+ },
1101
+ start_handle: {
1102
+ description: "Start-handle fraction in [0, 1]. Default 0.3.",
1103
+ type: "number"
1104
+ },
1105
+ turn_radius: {
1106
+ description: "Minimum turning radius of the glide path in points; smaller = tighter curves. Default 80.",
1107
+ maximum: 1e3,
1108
+ minimum: 1,
1109
+ type: "number"
1110
+ }
1111
+ },
1112
+ type: "object"
1113
+ }
1114
+ },
1115
+ set_agent_cursor_style: {
1116
+ description: 'Update the visual style of the agent cursor overlay.\n\n- gradient_colors: array of CSS hex strings (e.g. ["#FF0000","#0000FF"]) used as the arrow fill gradient from tip to tail. Empty array reverts to the default palette colours.\n- bloom_color: hex string for the radial halo/bloom behind the cursor (e.g. "#00FFFF"). Empty string reverts to the default.\n- image_path: path to a PNG, JPEG, SVG, or ICO file to use as the cursor icon instead of the default gradient arrow. Empty string reverts to the procedural arrow.\nAll parameters are optional; omit any you do not want to change.',
1117
+ parameterSchema: {
1118
+ additionalProperties: false,
1119
+ properties: {
1120
+ bloom_color: {
1121
+ description: "Hex bloom/halo colour (e.g. '#00FFFF'). '' = revert to default.",
1122
+ type: "string"
1123
+ },
1124
+ cursor_id: {
1125
+ description: "Cursor instance. Default: 'default'.",
1126
+ type: "string"
1127
+ },
1128
+ gradient_colors: {
1129
+ description: "CSS hex gradient stops tip\u2192tail. [] = revert to default.",
1130
+ items: {
1131
+ type: "string"
1132
+ },
1133
+ type: "array"
1134
+ },
1135
+ image_path: {
1136
+ description: "Path to PNG/JPEG/SVG/ICO cursor image. '' = revert to arrow.",
1137
+ type: "string"
1138
+ }
1139
+ },
1140
+ type: "object"
1141
+ }
1142
+ },
1143
+ set_config: {
1144
+ description: "Update cua-driver-rs configuration. Changes to capture_mode and max_image_dimension take effect immediately. The experimental_pip keys are persisted to ~/.cua-driver/config.json and take effect on the next daemon restart (the PiP backend is initialised once at startup).",
1145
+ parameterSchema: {
1146
+ additionalProperties: false,
1147
+ properties: {
1148
+ capture_mode: {
1149
+ description: "Default capture mode for get_window_state.",
1150
+ enum: ["som", "vision", "ax"],
1151
+ type: "string"
1152
+ },
1153
+ experimental_pip: {
1154
+ description: "Enable the experimental picture-in-picture preview window. Applies on next daemon restart.",
1155
+ type: "boolean"
1156
+ },
1157
+ experimental_pip_geometry: {
1158
+ description: "PiP window size + optional position in `WxH` or `WxH+X+Y` form (e.g. `320x200+24+24`). Applies on next daemon restart.",
1159
+ type: "string"
1160
+ },
1161
+ max_image_dimension: {
1162
+ description: "Max dimension for screenshot resizing (0 = no limit).",
1163
+ type: "integer"
1164
+ }
1165
+ },
1166
+ type: "object"
1167
+ }
1168
+ },
1169
+ set_value: {
1170
+ description: "Set a value on a UI element. Two modes depending on element role:\n\n- **AXPopUpButton / select dropdown**: finds the child option whose title or value matches `value` (case-insensitive) and AXPresses it directly \u2014 the native macOS popup menu is never opened, so focus is never stolen. Use this for HTML <select> elements in Safari or any native NSPopUpButton.\n\n- **All other elements**: writes AXValue directly (sliders, steppers, date pickers, native text fields that expose settable AXValue).\n\nFor free-form text entry into web inputs, prefer `type_text_chars` which synthesises key events \u2014 AXValue writes are ignored by WebKit.",
1171
+ parameterSchema: {
1172
+ additionalProperties: false,
1173
+ properties: {
1174
+ element_index: {
1175
+ type: "integer"
1176
+ },
1177
+ pid: {
1178
+ type: "integer"
1179
+ },
1180
+ session: {
1181
+ description: "Optional session id: declares/uses the agent cursor and per-session state for this run. The same id works over MCP, the CLI, or the raw socket, and follows the run across apps/windows. Omit to run cursor-less.",
1182
+ type: "string"
1183
+ },
1184
+ value: {
1185
+ description: "New value. AX will coerce to the element's native type.",
1186
+ type: "string"
1187
+ },
1188
+ window_id: {
1189
+ description: "CGWindowID for the window whose get_window_state produced the element_index.",
1190
+ type: "integer"
1191
+ }
1192
+ },
1193
+ required: ["pid", "window_id", "element_index", "value"],
1194
+ type: "object"
1195
+ }
1196
+ },
1197
+ start_recording: {
1198
+ description: "Start trajectory recording. Every subsequent action-tool invocation (click, right_click, scroll, type_text, press_key, hotkey, set_value) writes a turn folder under `output_dir`:\n\n- `app_state.json` \u2014 post-action AX/UIA snapshot for the target pid.\n- `screenshot.png` \u2014 post-action per-window screenshot of the target's frontmost on-screen window.\n- `action.json` \u2014 tool name, full input arguments, result summary, pid, click point (when applicable), ISO-8601 timestamp.\n- `click.png` \u2014 for click-family actions only, `screenshot.png` with a red dot drawn at the click point.\n\nTurn folders are named `turn-00001/`, `turn-00002/`, etc. Turn numbering restarts at 1 each time recording is (re-)started.\n\n**Video is off by default.** Pass `record_video: true` to also capture the main display to `<output_dir>/recording.mp4` (H.264 / 30 fps) for the lifetime of the session. The recording is torn down automatically when the MCP client disconnects.\n\n**macOS uses native ScreenCaptureKit** (in-process SCStream + SCRecordingOutput) so video inherits Cua Driver's own Screen Recording grant \u2014 no extra TCC prompt, no ffmpeg subprocess. Requires macOS 15.0+.\n\n**Windows + Linux use an ffmpeg subprocess** (`gdigrab` / `x11grab` + libx264). Requires ffmpeg on PATH (winget install Gyan.FFmpeg / apt install ffmpeg); when ffmpeg is missing or fails on startup the per-turn capture (screenshots + action.json) still runs and the session's `last_error` field carries the diagnostic.\n\nState persists for the life of the daemon / MCP session; a restart resets to disabled with no on-disk state. Call `stop_recording` to disable + finalize the mp4.",
1199
+ parameterSchema: {
1200
+ additionalProperties: false,
1201
+ properties: {
1202
+ output_dir: {
1203
+ description: "Absolute or ~-rooted directory where turn folders and (when enabled) the video file are written.",
1204
+ type: "string"
1205
+ },
1206
+ record_video: {
1207
+ description: "Capture the main display to <output_dir>/recording.mp4. Default: false. Set to true to also capture the main display to recording.mp4 (otherwise only the per-turn screenshots + JSON are recorded). On macOS this uses native ScreenCaptureKit (no extra TCC prompt, macOS 15.0+); on Windows + Linux it requires ffmpeg on PATH.",
1208
+ type: "boolean"
1209
+ }
1210
+ },
1211
+ required: ["output_dir"],
1212
+ type: "object"
1213
+ }
1214
+ },
1215
+ start_session: {
1216
+ description: "Declare a session \u2014 a named, color-coded identity for THIS agent run. Pass a stable `session` id; the agent cursor, per-session config, and recording all key on it, and it follows the run across any apps/windows. The cursor's color is derived from the id, so distinct runs are visually distinct. A cursor is shown only for a declared session \u2014 call this (or pass `session` on your first action) to opt in. Idempotent: re-calling with the same id just refreshes its idle-TTL. End it with `end_session` (or let the idle-TTL reclaim it). Concurrent runs/subagents each pass their own `session` to get their own cursor.",
1217
+ parameterSchema: {
1218
+ additionalProperties: true,
1219
+ properties: {
1220
+ session: {
1221
+ description: 'Stable session id for this run (e.g. "research-run-1").',
1222
+ type: "string"
1223
+ }
1224
+ },
1225
+ required: ["session"],
1226
+ type: "object"
1227
+ }
1228
+ },
1229
+ stop_recording: {
1230
+ description: "Stop trajectory recording. Disables further per-turn capture and, when video was enabled, gracefully terminates the ffmpeg subprocess so the mp4's moov atom is finalized (the file is playable). Calling stop on an already-stopped session is a no-op. The response carries `last_video_path` pointing at the finalized mp4 (when video was on).\n\nA manual `stop_recording` is **unconditional** \u2014 it stops whatever recording is active regardless of which session started it. Ownership-scoped teardown (so one MCP client disconnecting can't stop a recording a later client started) is handled by the daemon's `session_end` lifecycle signal, not by this tool.",
1231
+ parameterSchema: {
1232
+ additionalProperties: false,
1233
+ properties: {},
1234
+ type: "object"
1235
+ }
1236
+ },
1237
+ type_text: {
1238
+ description: "Insert text into the target pid via `AXSetAttribute(kAXSelectedText)`. Works for standard Cocoa text fields and text views. No keystrokes are synthesized \u2014 special keys (Return / Escape / arrows) go through `press_key` / `hotkey`. For Chromium / Electron inputs that don't implement `kAXSelectedText`, the tool falls back to CGEvent character synthesis automatically.\n\nOptional `element_index` + `window_id` (from the last `get_window_state` snapshot) directs the write to a specific field. Without `element_index`, the write goes to the pid's currently focused element.",
1239
+ parameterSchema: {
1240
+ additionalProperties: false,
1241
+ properties: {
1242
+ delay_ms: {
1243
+ description: "Milliseconds between characters in the CGEvent fallback path. Default 30. Ignored when the AX path succeeds.",
1244
+ maximum: 200,
1245
+ minimum: 0,
1246
+ type: "integer"
1247
+ },
1248
+ element_index: {
1249
+ description: "Element index from last get_window_state. Directs the write to a specific field. Requires window_id.",
1250
+ type: "integer"
1251
+ },
1252
+ pid: {
1253
+ description: "Target process ID.",
1254
+ type: "integer"
1255
+ },
1256
+ session: {
1257
+ description: "Optional session id: declares/uses the agent cursor and per-session state for this run. The same id works over MCP, the CLI, or the raw socket, and follows the run across apps/windows. Omit to run cursor-less.",
1258
+ type: "string"
1259
+ },
1260
+ text: {
1261
+ description: "Text to insert at the target's cursor.",
1262
+ type: "string"
1263
+ },
1264
+ window_id: {
1265
+ description: "CGWindowID. Required when element_index is used.",
1266
+ type: "integer"
1267
+ }
1268
+ },
1269
+ required: ["pid", "text"],
1270
+ type: "object"
1271
+ }
1272
+ },
1273
+ zoom: {
1274
+ description: "Capture a cropped JPEG of a window region (x1,y1)\u2013(x2,y2) in screenshot pixel coordinates, with 20% padding added on each side. The output image is at most 500 px wide.\n\nAfter a zoom, pass `from_zoom=true` to click/type_text to auto-translate coordinates back to full-window space.",
1275
+ parameterSchema: {
1276
+ additionalProperties: false,
1277
+ properties: {
1278
+ pid: {
1279
+ description: "Target pid \u2014 required for from_zoom click/type translation.",
1280
+ type: "integer"
1281
+ },
1282
+ window_id: {
1283
+ description: "CGWindowID from list_windows.",
1284
+ type: "integer"
1285
+ },
1286
+ x1: {
1287
+ description: "Left edge of region in screenshot pixels.",
1288
+ type: "number"
1289
+ },
1290
+ x2: {
1291
+ description: "Right edge of region in screenshot pixels.",
1292
+ type: "number"
1293
+ },
1294
+ y1: {
1295
+ description: "Top edge of region in screenshot pixels.",
1296
+ type: "number"
1297
+ },
1298
+ y2: {
1299
+ description: "Bottom edge of region in screenshot pixels.",
1300
+ type: "number"
1301
+ }
1302
+ },
1303
+ required: ["window_id", "x1", "y1", "x2", "y2"],
1304
+ type: "object"
1305
+ }
1306
+ }
1307
+ };
1308
+
1309
+ // packages/core/src/tools/computer-use/bootstrap.ts
1310
+ init_esbuild_shims();
1311
+ import { execFile, spawnSync } from "node:child_process";
1312
+ import { promisify } from "node:util";
1313
+ import { rmSync } from "node:fs";
1314
+ import { homedir as homedir4 } from "node:os";
1315
+ import { join as join4 } from "node:path";
1316
+
1317
+ // packages/core/src/tools/computer-use/install-state.ts
1318
+ init_esbuild_shims();
1319
+ import { readFile, writeFile, mkdir } from "node:fs/promises";
1320
+ import { homedir as homedir3 } from "node:os";
1321
+ import { join as join2, dirname } from "node:path";
1322
+ function installStatePathFor(home = homedir3()) {
1323
+ return join2(home, ".qwen", "computer-use", "installed.json");
1324
+ }
1325
+ __name(installStatePathFor, "installStatePathFor");
1326
+ async function loadInstallState(home = homedir3()) {
1327
+ try {
1328
+ const text = await readFile(installStatePathFor(home), "utf8");
1329
+ const parsed = JSON.parse(text);
1330
+ if (typeof parsed?.approvedPackageSpec !== "string") return void 0;
1331
+ if (typeof parsed?.approvedAtIso !== "string") return void 0;
1332
+ return parsed;
1333
+ } catch (err) {
1334
+ if (err?.code === "ENOENT") return void 0;
1335
+ return void 0;
1336
+ }
1337
+ }
1338
+ __name(loadInstallState, "loadInstallState");
1339
+ async function saveInstallState(home = homedir3(), state) {
1340
+ const path = installStatePathFor(home);
1341
+ await mkdir(dirname(path), { recursive: true });
1342
+ await writeFile(path, JSON.stringify(state, null, 2), "utf8");
1343
+ }
1344
+ __name(saveInstallState, "saveInstallState");
1345
+ async function isPackageSpecApproved(home = homedir3(), packageSpec) {
1346
+ const state = await loadInstallState(home);
1347
+ return state?.approvedPackageSpec === packageSpec;
1348
+ }
1349
+ __name(isPackageSpecApproved, "isPackageSpecApproved");
1350
+
1351
+ // packages/core/src/tools/computer-use/downloader.ts
1352
+ init_esbuild_shims();
1353
+ import { createHash } from "node:crypto";
1354
+ import { createWriteStream } from "node:fs";
1355
+ import { mkdir as mkdir2, rm, stat, chmod, rename } from "node:fs/promises";
1356
+ import { join as join3 } from "node:path";
1357
+ import { tmpdir } from "node:os";
1358
+ import { pipeline } from "node:stream/promises";
1359
+ import { Readable } from "node:stream";
1360
+ function parseChecksums(body) {
1361
+ const map = /* @__PURE__ */ new Map();
1362
+ for (const line of body.split("\n")) {
1363
+ const m = line.trim().match(/^([0-9a-f]{64})\s+\*?(.+)$/i);
1364
+ if (m) map.set(m[2].trim(), m[1].toLowerCase());
1365
+ }
1366
+ return map;
1367
+ }
1368
+ __name(parseChecksums, "parseChecksums");
1369
+ async function findInstalled(home, platform = process.platform, arch = process.arch, version = CUA_DRIVER_VERSION) {
1370
+ const p = binaryPath(home, platform, arch, version);
1371
+ try {
1372
+ const s = await stat(p);
1373
+ if (s.isFile()) return p;
1374
+ } catch {
1375
+ }
1376
+ return void 0;
1377
+ }
1378
+ __name(findInstalled, "findInstalled");
1379
+ async function fetchFirst(urls, fetchImpl, onProgress) {
1380
+ let lastErr;
1381
+ for (const url of urls) {
1382
+ try {
1383
+ const controller = new AbortController();
1384
+ const headersTimeout = setTimeout(() => {
1385
+ controller.abort(new Error(`headers timeout after 30s for ${url}`));
1386
+ }, 3e4);
1387
+ try {
1388
+ const res = await fetchImpl(url, {
1389
+ redirect: "follow",
1390
+ signal: controller.signal
1391
+ });
1392
+ if (res.ok && res.body) return { url, res };
1393
+ await res.body?.cancel();
1394
+ lastErr = new Error(`HTTP ${res.status} for ${url}`);
1395
+ } finally {
1396
+ clearTimeout(headersTimeout);
1397
+ }
1398
+ } catch (err) {
1399
+ lastErr = err;
1400
+ onProgress?.(`Source unreachable, trying fallback\u2026`);
1401
+ }
1402
+ }
1403
+ throw new Error(
1404
+ `Computer Use: all download sources failed. Last error: ${lastErr instanceof Error ? lastErr.message : String(lastErr)}`
1405
+ );
1406
+ }
1407
+ __name(fetchFirst, "fetchFirst");
1408
+ async function ensureInstalled(opts) {
1409
+ const platform = opts.platform ?? process.platform;
1410
+ const arch = opts.arch ?? process.arch;
1411
+ const version = opts.version ?? CUA_DRIVER_VERSION;
1412
+ const env = opts.env ?? process.env;
1413
+ const fetchImpl = opts.fetchImpl ?? fetch;
1414
+ const onProgress = opts.onProgress;
1415
+ const existing = await findInstalled(opts.home, platform, arch, version);
1416
+ if (existing) return existing;
1417
+ const target = resolveAssetTarget(platform, arch, version);
1418
+ onProgress?.("Downloading Computer Use driver (~20MB, one time)...");
1419
+ const { res: sumRes } = await fetchFirst(
1420
+ resolveChecksumUrls(env, version),
1421
+ fetchImpl
1422
+ );
1423
+ const checksums = parseChecksums(await sumRes.text());
1424
+ const expectedSha = checksums.get(target.asset);
1425
+ if (!expectedSha) {
1426
+ throw new Error(
1427
+ `Computer Use: ${target.asset} missing from checksums.txt.`
1428
+ );
1429
+ }
1430
+ const { res } = await fetchFirst(
1431
+ resolveAssetUrls(target.asset, env, version),
1432
+ fetchImpl,
1433
+ onProgress
1434
+ );
1435
+ await mkdir2(computerUseTmp(opts.home), { recursive: true });
1436
+ const tmpFile = join3(computerUseTmp(opts.home), target.asset);
1437
+ const hash = createHash("sha256");
1438
+ const nodeStream = Readable.fromWeb(res.body);
1439
+ let idleTimer;
1440
+ const armIdle = /* @__PURE__ */ __name(() => {
1441
+ clearTimeout(idleTimer);
1442
+ idleTimer = setTimeout(() => {
1443
+ nodeStream.destroy(new Error("download stalled: no data for 60s"));
1444
+ }, 6e4);
1445
+ }, "armIdle");
1446
+ nodeStream.on("data", (chunk) => {
1447
+ hash.update(chunk);
1448
+ armIdle();
1449
+ });
1450
+ armIdle();
1451
+ try {
1452
+ await pipeline(nodeStream, createWriteStream(tmpFile));
1453
+ } finally {
1454
+ clearTimeout(idleTimer);
1455
+ }
1456
+ const actualSha = hash.digest("hex");
1457
+ if (actualSha !== expectedSha) {
1458
+ await rm(tmpFile, { force: true });
1459
+ throw new Error(
1460
+ `Computer Use: checksum mismatch for ${target.asset} (expected ${expectedSha.slice(0, 12)}\u2026, got ${actualSha.slice(0, 12)}\u2026).`
1461
+ );
1462
+ }
1463
+ const dir = versionDir(opts.home, version);
1464
+ const stagingDir = `${dir}.staging`;
1465
+ await rm(stagingDir, { recursive: true, force: true });
1466
+ await mkdir2(stagingDir, { recursive: true });
1467
+ await extractArchive(tmpFile, stagingDir, target.asset, opts.unzipImpl);
1468
+ await rm(tmpFile, { force: true });
1469
+ await rm(dir, { recursive: true, force: true });
1470
+ await rename(stagingDir, dir);
1471
+ const bin = binaryPath(opts.home, platform, arch, version);
1472
+ if (platform !== "win32") {
1473
+ await chmod(bin, 493);
1474
+ }
1475
+ if (platform === "darwin" && target.hasApp) {
1476
+ const extractRoot = join3(dir, target.extractDir);
1477
+ const appDir = join3(extractRoot, "CuaDriver.app");
1478
+ await stripQuarantine(extractRoot);
1479
+ await registerLaunchServices(appDir);
1480
+ }
1481
+ onProgress?.("Computer Use driver ready.");
1482
+ return bin;
1483
+ }
1484
+ __name(ensureInstalled, "ensureInstalled");
1485
+ var LSREGISTER = "/System/Library/Frameworks/CoreServices.framework/Versions/A/Frameworks/LaunchServices.framework/Versions/A/Support/lsregister";
1486
+ async function registerLaunchServices(appPath) {
1487
+ try {
1488
+ const { execFile: execFile2 } = await import("node:child_process");
1489
+ const { promisify: promisify2 } = await import("node:util");
1490
+ await promisify2(execFile2)(LSREGISTER, ["-f", appPath], { timeout: 15e3 });
1491
+ } catch {
1492
+ }
1493
+ }
1494
+ __name(registerLaunchServices, "registerLaunchServices");
1495
+ function computerUseTmp(home) {
1496
+ return join3(
1497
+ tmpdir(),
1498
+ "qwen-computer-use-dl",
1499
+ Buffer.from(home).toString("hex").slice(0, 8)
1500
+ );
1501
+ }
1502
+ __name(computerUseTmp, "computerUseTmp");
1503
+ async function stripQuarantine(path) {
1504
+ try {
1505
+ const { execFile: execFile2 } = await import("node:child_process");
1506
+ const { promisify: promisify2 } = await import("node:util");
1507
+ await promisify2(execFile2)("xattr", ["-dr", "com.apple.quarantine", path], {
1508
+ timeout: 1e4
1509
+ });
1510
+ } catch {
1511
+ }
1512
+ }
1513
+ __name(stripQuarantine, "stripQuarantine");
1514
+ async function extractArchive(archivePath, destDir, asset, unzipImpl) {
1515
+ if (asset.endsWith(".zip")) {
1516
+ await (unzipImpl ?? extractZipWindows)(archivePath, destDir);
1517
+ } else {
1518
+ await extract({ file: archivePath, cwd: destDir });
1519
+ }
1520
+ }
1521
+ __name(extractArchive, "extractArchive");
1522
+ async function extractZipWindows(zipPath, destDir) {
1523
+ const { execFile: execFile2 } = await import("node:child_process");
1524
+ const { promisify: promisify2 } = await import("node:util");
1525
+ const run = promisify2(execFile2);
1526
+ const psCommand = `Expand-Archive -LiteralPath ${psSingleQuote(zipPath)} -DestinationPath ${psSingleQuote(destDir)} -Force`;
1527
+ const attempts = [
1528
+ { cmd: "tar", args: ["-xf", zipPath, "-C", destDir] },
1529
+ { cmd: "tar", args: ["--force-local", "-xf", zipPath, "-C", destDir] },
1530
+ {
1531
+ cmd: "powershell",
1532
+ args: ["-NoProfile", "-NonInteractive", "-Command", psCommand]
1533
+ }
1534
+ ];
1535
+ const errors = [];
1536
+ for (const { cmd, args } of attempts) {
1537
+ try {
1538
+ await run(cmd, args, { timeout: 18e4 });
1539
+ return;
1540
+ } catch (err) {
1541
+ errors.push(`${cmd} ${args[0]}: ${errMsg(err)}`);
1542
+ }
1543
+ }
1544
+ throw new Error(
1545
+ `Computer Use: failed to unzip ${zipPath} on Windows (${errors.join("; ")}).`
1546
+ );
1547
+ }
1548
+ __name(extractZipWindows, "extractZipWindows");
1549
+ function psSingleQuote(s) {
1550
+ return `'${s.replace(/'/g, "''")}'`;
1551
+ }
1552
+ __name(psSingleQuote, "psSingleQuote");
1553
+ function errMsg(e) {
1554
+ return e instanceof Error ? e.message : String(e);
1555
+ }
1556
+ __name(errMsg, "errMsg");
1557
+
1558
+ // packages/core/src/tools/computer-use/bootstrap.ts
1559
+ var execFileAsync = promisify(execFile);
1560
+ function parsePermissionsStatus(json) {
1561
+ try {
1562
+ const o = JSON.parse(json);
1563
+ if (typeof o.accessibility !== "boolean") return "unknown";
1564
+ if (!o.accessibility) return "accessibility";
1565
+ if (!o.screen_recording) return "screenRecording";
1566
+ return "ok";
1567
+ } catch {
1568
+ return "unknown";
1569
+ }
1570
+ }
1571
+ __name(parsePermissionsStatus, "parsePermissionsStatus");
1572
+ var SOCKET = /* @__PURE__ */ __name(() => join4(homedir4(), "Library", "Caches", "cua-driver", "cua-driver.sock"), "SOCKET");
1573
+ function killServeDaemons() {
1574
+ try {
1575
+ spawnSync(
1576
+ "pkill",
1577
+ ["-f", "CuaDriver.app/Contents/MacOS/cua-driver serve"],
1578
+ {
1579
+ stdio: "ignore"
1580
+ }
1581
+ );
1582
+ } catch {
1583
+ }
1584
+ try {
1585
+ rmSync(SOCKET(), { force: true });
1586
+ } catch {
1587
+ }
1588
+ }
1589
+ __name(killServeDaemons, "killServeDaemons");
1590
+ async function probePermissionsViaStatus() {
1591
+ try {
1592
+ const { stdout } = await execFileAsync(
1593
+ binaryPath(homedir4()),
1594
+ ["permissions", "status", "--json"],
1595
+ { timeout: 1e4, env: process.env }
1596
+ );
1597
+ return parsePermissionsStatus(stdout);
1598
+ } catch {
1599
+ return "unknown";
1600
+ }
1601
+ }
1602
+ __name(probePermissionsViaStatus, "probePermissionsViaStatus");
1603
+ function startStatusDaemonProcess() {
1604
+ killServeDaemons();
1605
+ try {
1606
+ spawnSync(
1607
+ "open",
1608
+ [
1609
+ "-n",
1610
+ "-g",
1611
+ "-a",
1612
+ "CuaDriver",
1613
+ "--args",
1614
+ "serve",
1615
+ "--no-permissions-gate"
1616
+ ],
1617
+ { stdio: "ignore" }
1618
+ );
1619
+ } catch {
1620
+ }
1621
+ return { kill: killServeDaemons };
1622
+ }
1623
+ __name(startStatusDaemonProcess, "startStatusDaemonProcess");
1624
+ function openPermissionPaneProcess(kind) {
1625
+ const anchor = kind === "accessibility" ? "Privacy_Accessibility" : "Privacy_ScreenCapture";
1626
+ try {
1627
+ spawnSync(
1628
+ "open",
1629
+ [`x-apple.systempreferences:com.apple.preference.security?${anchor}`],
1630
+ { stdio: "ignore" }
1631
+ );
1632
+ } catch {
1633
+ }
1634
+ }
1635
+ __name(openPermissionPaneProcess, "openPermissionPaneProcess");
1636
+ function defaultDeps() {
1637
+ const home = homedir4();
1638
+ return {
1639
+ homeDir: home,
1640
+ approvalKey: approvalKey(),
1641
+ platform: process.platform,
1642
+ promptInstallApproval: /* @__PURE__ */ __name(async (key) => {
1643
+ process.stderr.write(
1644
+ `
1645
+ [Computer Use] First-time setup
1646
+ Driver: ${key}
1647
+ This downloads a ~20MB signed + notarized binary into ~/.qwen/computer-use/.
1648
+ Computer Use can click, type, and read your desktop apps in the background.
1649
+ On macOS you'll be guided through Accessibility and Screen Recording permissions next.
1650
+ Set QWEN_COMPUTER_USE_AUTO_APPROVE=1 to skip this prompt.
1651
+ `
1652
+ );
1653
+ return process.env["QWEN_COMPUTER_USE_AUTO_APPROVE"] === "1";
1654
+ }, "promptInstallApproval"),
1655
+ install: /* @__PURE__ */ __name((onProgress) => ensureInstalled({ home, onProgress }), "install"),
1656
+ startStatusDaemon: startStatusDaemonProcess,
1657
+ probePermissions: probePermissionsViaStatus,
1658
+ openPermissionPane: openPermissionPaneProcess
1659
+ };
1660
+ }
1661
+ __name(defaultDeps, "defaultDeps");
1662
+ async function runBootstrap(client, ctx, depsOverride) {
1663
+ const deps = { ...defaultDeps(), ...depsOverride };
1664
+ const pollIntervalMs = deps.pollIntervalMs ?? 5e3;
1665
+ const pollTimeoutMs = deps.pollTimeoutMs ?? 10 * 6e4;
1666
+ if (client.isStarted()) return;
1667
+ const approved = await isPackageSpecApproved(deps.homeDir, deps.approvalKey);
1668
+ if (!approved) {
1669
+ if (ctx.autoApproveInstall) {
1670
+ ctx.updateOutput?.("Computer Use install auto-approved (approval mode).");
1671
+ } else {
1672
+ ctx.updateOutput?.(
1673
+ "Computer Use needs a one-time driver download (first use)."
1674
+ );
1675
+ const ok = await deps.promptInstallApproval(deps.approvalKey);
1676
+ if (!ok) {
1677
+ throw new Error(
1678
+ `Computer Use install declined by user. Re-invoke the tool to be prompted again.`
1679
+ );
1680
+ }
1681
+ }
1682
+ await saveInstallState(deps.homeDir, {
1683
+ approvedPackageSpec: deps.approvalKey,
1684
+ approvedAtIso: (/* @__PURE__ */ new Date()).toISOString()
1685
+ });
1686
+ }
1687
+ await deps.install(ctx.updateOutput);
1688
+ if (deps.platform === "darwin") {
1689
+ await ensurePermissions(deps, ctx, pollIntervalMs, pollTimeoutMs);
1690
+ }
1691
+ await client.start(ctx.updateOutput);
1692
+ }
1693
+ __name(runBootstrap, "runBootstrap");
1694
+ async function ensurePermissions(deps, ctx, pollIntervalMs, pollTimeoutMs) {
1695
+ let daemon = deps.startStatusDaemon();
1696
+ let openedAccessibility = false;
1697
+ let openedScreenRecording = false;
1698
+ try {
1699
+ const startedAt = Date.now();
1700
+ for (; ; ) {
1701
+ if (ctx.signal.aborted)
1702
+ throw new Error("Computer Use bootstrap aborted.");
1703
+ if (Date.now() - startedAt > pollTimeoutMs) {
1704
+ throw new Error(
1705
+ `Computer Use permission grant timed out after ${Math.round(
1706
+ pollTimeoutMs / 1e3
1707
+ )}s. Re-invoke the tool to retry.`
1708
+ );
1709
+ }
1710
+ await sleep(pollIntervalMs);
1711
+ const probe = await deps.probePermissions();
1712
+ if (probe === "ok") return;
1713
+ if (probe === "unknown") {
1714
+ daemon.kill();
1715
+ daemon = deps.startStatusDaemon();
1716
+ const elapsed = Math.round((Date.now() - startedAt) / 1e3);
1717
+ ctx.updateOutput?.(
1718
+ `Bringing up Computer Use permissions check\u2026 (${elapsed}s)`
1719
+ );
1720
+ continue;
1721
+ }
1722
+ if (probe === "accessibility") {
1723
+ if (!openedAccessibility) {
1724
+ openedAccessibility = true;
1725
+ deps.openPermissionPane("accessibility");
1726
+ ctx.updateOutput?.(
1727
+ "Step 1/2 \u2014 In the System Settings window that opened (Privacy & Security \u2192 Accessibility), turn ON CuaDriver. This continues automatically."
1728
+ );
1729
+ } else {
1730
+ const elapsed = Math.round((Date.now() - startedAt) / 1e3);
1731
+ ctx.updateOutput?.(
1732
+ `Waiting for Accessibility\u2026 (${elapsed}s) \u2014 enable CuaDriver in System Settings.`
1733
+ );
1734
+ }
1735
+ continue;
1736
+ }
1737
+ if (!openedScreenRecording) {
1738
+ openedScreenRecording = true;
1739
+ deps.openPermissionPane("screenRecording");
1740
+ ctx.updateOutput?.(
1741
+ "Step 2/2 \u2014 Accessibility granted. Now in System Settings (Privacy & Security \u2192 Screen & System Audio Recording), turn ON CuaDriver. macOS will ask to restart CuaDriver \u2014 allow it; that is expected. This continues automatically."
1742
+ );
1743
+ } else {
1744
+ const elapsed = Math.round((Date.now() - startedAt) / 1e3);
1745
+ ctx.updateOutput?.(
1746
+ `Waiting for Screen Recording\u2026 (${elapsed}s) \u2014 enable CuaDriver in System Settings.`
1747
+ );
1748
+ }
1749
+ }
1750
+ } finally {
1751
+ daemon.kill();
1752
+ }
1753
+ }
1754
+ __name(ensurePermissions, "ensurePermissions");
1755
+ var sleep = /* @__PURE__ */ __name((ms) => new Promise((r) => setTimeout(r, ms)), "sleep");
1756
+
1757
+ // packages/core/src/tools/computer-use/tool.ts
1758
+ import { homedir as homedir5 } from "node:os";
1759
+ var INSTALL_REASON = "This downloads the Computer Use driver (~20MB, signed + notarized) into ~/.qwen/computer-use/ the first time. Computer Use can click, type, and read your desktop apps in the background. On macOS you'll be guided through Accessibility / Screen Recording permissions next.";
1760
+ var HIGH_RISK_TOOLS = /* @__PURE__ */ new Set([
1761
+ "kill_app",
1762
+ "launch_app",
1763
+ "start_recording",
1764
+ "set_config",
1765
+ "replay_trajectory"
1766
+ ]);
1767
+ var HIGH_RISK_PAGE_ACTIONS = /* @__PURE__ */ new Set([
1768
+ "execute_javascript",
1769
+ "enable_javascript_apple_events"
1770
+ ]);
1771
+ for (const t of HIGH_RISK_TOOLS) {
1772
+ if (!(t in COMPUTER_USE_SCHEMAS)) {
1773
+ throw new Error(`HIGH_RISK_TOOLS contains unknown tool: ${t}`);
1774
+ }
1775
+ }
1776
+ function isHighRiskCall(upstreamName, params) {
1777
+ if (HIGH_RISK_TOOLS.has(upstreamName)) return true;
1778
+ return upstreamName === "page" && HIGH_RISK_PAGE_ACTIONS.has(params["action"]);
1779
+ }
1780
+ __name(isHighRiskCall, "isHighRiskCall");
1781
+ var ComputerUseInvocation = class extends BaseToolInvocation {
1782
+ constructor(upstreamName, params, config) {
1783
+ super(params);
1784
+ this.upstreamName = upstreamName;
1785
+ this.config = config;
1786
+ }
1787
+ static {
1788
+ __name(this, "ComputerUseInvocation");
1789
+ }
1790
+ getDescription() {
1791
+ return safeJsonStringify(this.params);
1792
+ }
1793
+ /**
1794
+ * Always returns 'ask' so every desktop action surfaces through the
1795
+ * standard tool-permission dialog. The PermissionManager rule system
1796
+ * handles "always allow" per tool via ProceedAlwaysTool — that's the
1797
+ * single source of truth for repeat-approval behavior.
1798
+ *
1799
+ * Earlier this returned 'allow' once the install-state file existed,
1800
+ * which conflated install approval with per-action approval and
1801
+ * effectively granted blanket permission for all 9 computer_use__*
1802
+ * tools (including mutating actions like click / type_text / drag)
1803
+ * after the first install confirmation. See PR #4590 review for the
1804
+ * full discussion.
1805
+ */
1806
+ async getDefaultPermission() {
1807
+ return "ask";
1808
+ }
1809
+ /**
1810
+ * Builds the confirmation dialog. Two variants:
1811
+ *
1812
+ * 1. Install not yet approved → show install info (download size,
1813
+ * permission flow to follow). onConfirm writes the install state
1814
+ * so runBootstrap() inside execute() skips its env-var fallback
1815
+ * prompt for headless contexts.
1816
+ *
1817
+ * 2. Install already approved → show per-action info (which tool +
1818
+ * which args) so the user can decide whether THIS specific action
1819
+ * is OK to perform.
1820
+ *
1821
+ * Both variants set permissionRules so the standard "Always allow"
1822
+ * outcomes (ProceedAlwaysTool / ProceedAlwaysUser / ProceedAlwaysProject)
1823
+ * add a rule via PermissionManager — subsequent calls of the SAME
1824
+ * tool then skip the dialog. Different tools each need their own
1825
+ * "always allow" choice; install approval no longer grants blanket
1826
+ * access.
1827
+ *
1828
+ * On Cancel: install state is NOT written; execute() / runBootstrap()
1829
+ * will use the env-var fallback (QWEN_COMPUTER_USE_AUTO_APPROVE),
1830
+ * which defaults to refusing — producing a clear error message.
1831
+ */
1832
+ async getConfirmationDetails(_abortSignal) {
1833
+ const permissionRules = [`computer_use__${this.upstreamName}`];
1834
+ const installApproved = await isPackageSpecApproved(
1835
+ homedir5(),
1836
+ approvalKey()
1837
+ );
1838
+ const onConfirm = /* @__PURE__ */ __name(async (outcome, _payload) => {
1839
+ if (outcome !== "cancel" /* Cancel */) {
1840
+ await saveInstallState(homedir5(), {
1841
+ approvedPackageSpec: approvalKey(),
1842
+ approvedAtIso: (/* @__PURE__ */ new Date()).toISOString()
1843
+ });
1844
+ }
1845
+ }, "onConfirm");
1846
+ if (isHighRiskCall(this.upstreamName, this.params)) {
1847
+ return {
1848
+ type: "mcp",
1849
+ title: installApproved ? `Allow high-risk Computer Use (${this.upstreamName})` : `Allow high-risk Computer Use (${this.upstreamName}) \u2014 first use also downloads the driver`,
1850
+ serverName: "cua-driver",
1851
+ toolName: this.upstreamName,
1852
+ toolDisplayName: `computer_use__${this.upstreamName}`,
1853
+ permissionRules,
1854
+ onConfirm
1855
+ };
1856
+ }
1857
+ const argsJson = safeJsonStringify(this.params);
1858
+ const prompt = installApproved ? `Tool: computer_use__${this.upstreamName}
1859
+
1860
+ Args: ${argsJson}
1861
+
1862
+ This will act on your desktop via the Computer Use binary.` : `Tool: computer_use__${this.upstreamName}
1863
+
1864
+ Args: ${argsJson}
1865
+
1866
+ ${INSTALL_REASON}`;
1867
+ return {
1868
+ type: "info",
1869
+ title: `Allow Computer Use (${this.upstreamName})`,
1870
+ prompt,
1871
+ permissionRules,
1872
+ onConfirm
1873
+ };
1874
+ }
1875
+ async execute(signal, updateOutput) {
1876
+ const client = ComputerUseClient.shared();
1877
+ client.setMaxImageDimension(
1878
+ resolveMaxImageDimension(this.config?.getComputerUseMaxImageDimension())
1879
+ );
1880
+ const autoApproveInstall = !!this.config;
1881
+ await runBootstrap(client, { signal, updateOutput, autoApproveInstall });
1882
+ let mcpResult;
1883
+ try {
1884
+ mcpResult = await client.callTool(this.upstreamName, this.params);
1885
+ } catch (err) {
1886
+ const message = err instanceof Error ? err.message : String(err);
1887
+ return {
1888
+ llmContent: `Computer Use tool '${this.upstreamName}' failed: ${message}`,
1889
+ returnDisplay: `Error: ${message}`,
1890
+ error: { message }
1891
+ };
1892
+ }
1893
+ const llmContent = buildLlmContent(
1894
+ mcpResult.content,
1895
+ this.upstreamName,
1896
+ mcpResult.structuredContent
1897
+ );
1898
+ const returnDisplay = buildDisplayText(mcpResult.content);
1899
+ if (mcpResult.isError) {
1900
+ const errorText = returnDisplay || `Tool '${this.upstreamName}' returned isError=true`;
1901
+ return {
1902
+ llmContent: llmContent || errorText,
1903
+ returnDisplay: errorText,
1904
+ error: { message: errorText }
1905
+ };
1906
+ }
1907
+ return {
1908
+ llmContent,
1909
+ returnDisplay
1910
+ };
1911
+ }
1912
+ };
1913
+ var ComputerUseTool = class extends BaseDeclarativeTool {
1914
+ constructor(upstreamName, schema, config) {
1915
+ const qwenName = `computer_use__${upstreamName}`;
1916
+ super(
1917
+ qwenName,
1918
+ qwenName,
1919
+ // displayName == name; no MCP branding in UI
1920
+ schema.description,
1921
+ "other" /* Other */,
1922
+ schema.parameterSchema,
1923
+ true,
1924
+ // isOutputMarkdown — many results are JSON-ish text or screenshots
1925
+ true,
1926
+ // canUpdateOutput — bootstrap streams progress
1927
+ true,
1928
+ // shouldDefer — surface only via ToolSearch
1929
+ false,
1930
+ // alwaysLoad
1931
+ `computer use desktop click type screenshot mouse keyboard scroll drag automation gui app native`
1932
+ );
1933
+ this.upstreamName = upstreamName;
1934
+ this.config = config;
1935
+ }
1936
+ static {
1937
+ __name(this, "ComputerUseTool");
1938
+ }
1939
+ /**
1940
+ * Coerce parameter types before schema validation.
1941
+ * Models can send the wrong JS type for a field:
1942
+ * - qwen3.6 sends `element_index: 2` (number) but upstream wants "2" (string)
1943
+ * - Some models send `x: "500"` (string) but upstream wants 500 (number)
1944
+ * Pre-coercing avoids spurious validation failures without loosening schema types.
1945
+ */
1946
+ validateToolParams(params) {
1947
+ const coerced = coerceTypes(
1948
+ params,
1949
+ this.parameterSchema
1950
+ );
1951
+ return super.validateToolParams(coerced);
1952
+ }
1953
+ build(params) {
1954
+ const coerced = coerceTypes(
1955
+ params,
1956
+ this.parameterSchema
1957
+ );
1958
+ return super.build(coerced);
1959
+ }
1960
+ createInvocation(params) {
1961
+ return new ComputerUseInvocation(this.upstreamName, params, this.config);
1962
+ }
1963
+ };
1964
+ function coerceTypes(params, schema) {
1965
+ const properties = schema.properties;
1966
+ if (!properties) return params;
1967
+ const result = { ...params };
1968
+ for (const [key, value] of Object.entries(result)) {
1969
+ const fieldType = properties[key]?.type;
1970
+ if ((fieldType === "integer" || fieldType === "number") && typeof value === "string") {
1971
+ const trimmed = value.trim();
1972
+ if (/^-?\d+(\.\d+)?$/.test(trimmed)) {
1973
+ const parsed = fieldType === "integer" ? parseInt(trimmed, 10) : parseFloat(trimmed);
1974
+ if (Number.isFinite(parsed)) {
1975
+ result[key] = parsed;
1976
+ }
1977
+ }
1978
+ } else if (fieldType === "string" && typeof value === "number") {
1979
+ result[key] = String(value);
1980
+ }
1981
+ }
1982
+ return result;
1983
+ }
1984
+ __name(coerceTypes, "coerceTypes");
1985
+ function buildLlmContent(content, toolName, structuredContent) {
1986
+ const parts = [];
1987
+ for (const block of content) {
1988
+ if (block.type === "text" && block.text) {
1989
+ parts.push({ text: block.text });
1990
+ } else if ((block.type === "image" || block.type === "audio") && block.mimeType && block.data) {
1991
+ parts.push({
1992
+ text: `[Tool '${toolName}' provided the following ${block.type} data with mime-type: ${block.mimeType}]`
1993
+ });
1994
+ parts.push({
1995
+ inlineData: {
1996
+ mimeType: block.mimeType,
1997
+ data: block.data
1998
+ }
1999
+ });
2000
+ }
2001
+ }
2002
+ const structuredText = stringifyStructured(structuredContent);
2003
+ if (structuredText) {
2004
+ parts.push({ text: `Structured result: ${structuredText}` });
2005
+ }
2006
+ const hasNonText = parts.some((p) => p.inlineData !== void 0);
2007
+ if (!hasNonText) {
2008
+ return parts.map((p) => p.text ?? "").filter(Boolean).join("\n");
2009
+ }
2010
+ return parts;
2011
+ }
2012
+ __name(buildLlmContent, "buildLlmContent");
2013
+ function buildDisplayText(content) {
2014
+ return content.map((block) => block.type === "text" ? block.text ?? "" : "").filter(Boolean).join("\n");
2015
+ }
2016
+ __name(buildDisplayText, "buildDisplayText");
2017
+ function stringifyStructured(structured) {
2018
+ if (!structured || typeof structured !== "object") return void 0;
2019
+ const rest = {};
2020
+ for (const [k, v] of Object.entries(structured)) {
2021
+ if (k === "tree_markdown") continue;
2022
+ rest[k] = v;
2023
+ }
2024
+ if (Object.keys(rest).length === 0) return void 0;
2025
+ return safeJsonStringify(rest);
2026
+ }
2027
+ __name(stringifyStructured, "stringifyStructured");
2028
+
2029
+ // packages/core/src/tools/computer-use/index.ts
2030
+ async function registerComputerUseTools(registerLazy, config) {
2031
+ for (const upstreamName of COMPUTER_USE_TOOL_NAMES) {
2032
+ const schema = COMPUTER_USE_SCHEMAS[upstreamName];
2033
+ const qwenName = `computer_use__${upstreamName}`;
2034
+ await registerLazy(
2035
+ qwenName,
2036
+ async () => new ComputerUseTool(upstreamName, schema, config)
2037
+ );
2038
+ }
2039
+ }
2040
+ __name(registerComputerUseTools, "registerComputerUseTools");
2041
+ export {
2042
+ COMPUTER_USE_SCHEMAS,
2043
+ COMPUTER_USE_TOOL_NAMES,
2044
+ ComputerUseClient,
2045
+ ComputerUseTool,
2046
+ registerComputerUseTools
2047
+ };
2048
+ /**
2049
+ * @license
2050
+ * Copyright 2025 Qwen Team
2051
+ * SPDX-License-Identifier: Apache-2.0
2052
+ */