@xagent-ai/cli 1.2.2 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (602) hide show
  1. package/.github/ISSUE_TEMPLATE/bug_report.md +38 -38
  2. package/.github/ISSUE_TEMPLATE/feature_request.md +20 -20
  3. package/.github/release.yml +76 -0
  4. package/.github/workflows/ci.yml +75 -0
  5. package/.github/workflows/release.yml +103 -0
  6. package/.gitmodules +3 -3
  7. package/README.md +326 -280
  8. package/README_CN.md +325 -279
  9. package/dist/agents.d.ts.map +1 -1
  10. package/dist/agents.js +7 -3
  11. package/dist/agents.js.map +1 -1
  12. package/dist/ai-client/factory.d.ts +40 -0
  13. package/dist/ai-client/factory.d.ts.map +1 -0
  14. package/dist/ai-client/factory.js +100 -0
  15. package/dist/ai-client/factory.js.map +1 -0
  16. package/dist/ai-client/index.d.ts +20 -0
  17. package/dist/ai-client/index.d.ts.map +1 -0
  18. package/dist/ai-client/index.js +49 -0
  19. package/dist/ai-client/index.js.map +1 -0
  20. package/dist/ai-client/providers/anthropic.d.ts +57 -0
  21. package/dist/ai-client/providers/anthropic.d.ts.map +1 -0
  22. package/dist/ai-client/providers/anthropic.js +406 -0
  23. package/dist/ai-client/providers/anthropic.js.map +1 -0
  24. package/dist/ai-client/providers/openai.d.ts +57 -0
  25. package/dist/ai-client/providers/openai.d.ts.map +1 -0
  26. package/dist/ai-client/providers/openai.js +290 -0
  27. package/dist/ai-client/providers/openai.js.map +1 -0
  28. package/dist/ai-client/providers/remote.d.ts +110 -0
  29. package/dist/ai-client/providers/remote.d.ts.map +1 -0
  30. package/dist/ai-client/providers/remote.js +352 -0
  31. package/dist/ai-client/providers/remote.js.map +1 -0
  32. package/dist/ai-client/registry.d.ts +51 -0
  33. package/dist/ai-client/registry.d.ts.map +1 -0
  34. package/dist/ai-client/registry.js +81 -0
  35. package/dist/ai-client/registry.js.map +1 -0
  36. package/dist/ai-client/types.d.ts +274 -0
  37. package/dist/ai-client/types.d.ts.map +1 -0
  38. package/dist/ai-client/types.js +90 -0
  39. package/dist/ai-client/types.js.map +1 -0
  40. package/dist/ai-client-factory.d.ts +62 -0
  41. package/dist/ai-client-factory.d.ts.map +1 -0
  42. package/dist/ai-client-factory.js +157 -0
  43. package/dist/ai-client-factory.js.map +1 -0
  44. package/dist/auth.d.ts +23 -1
  45. package/dist/auth.d.ts.map +1 -1
  46. package/dist/auth.js +164 -174
  47. package/dist/auth.js.map +1 -1
  48. package/dist/cancellation.d.ts +5 -4
  49. package/dist/cancellation.d.ts.map +1 -1
  50. package/dist/cancellation.js +53 -32
  51. package/dist/cancellation.js.map +1 -1
  52. package/dist/checkpoint.d.ts +2 -1
  53. package/dist/checkpoint.d.ts.map +1 -1
  54. package/dist/checkpoint.js +39 -6
  55. package/dist/checkpoint.js.map +1 -1
  56. package/dist/cli.js +742 -29
  57. package/dist/cli.js.map +1 -1
  58. package/dist/config.d.ts +10 -4
  59. package/dist/config.d.ts.map +1 -1
  60. package/dist/config.js +62 -25
  61. package/dist/config.js.map +1 -1
  62. package/dist/context-compressor.d.ts +82 -18
  63. package/dist/context-compressor.d.ts.map +1 -1
  64. package/dist/context-compressor.js +718 -154
  65. package/dist/context-compressor.js.map +1 -1
  66. package/dist/conversation.d.ts +1 -1
  67. package/dist/conversation.d.ts.map +1 -1
  68. package/dist/conversation.js +8 -7
  69. package/dist/conversation.js.map +1 -1
  70. package/dist/gui-subagent/action-parser/actionParser.d.ts.map +1 -1
  71. package/dist/gui-subagent/action-parser/actionParser.js +6 -4
  72. package/dist/gui-subagent/action-parser/actionParser.js.map +1 -1
  73. package/dist/gui-subagent/agent/gui-agent.d.ts +39 -2
  74. package/dist/gui-subagent/agent/gui-agent.d.ts.map +1 -1
  75. package/dist/gui-subagent/agent/gui-agent.js +189 -74
  76. package/dist/gui-subagent/agent/gui-agent.js.map +1 -1
  77. package/dist/gui-subagent/index.d.ts +23 -1
  78. package/dist/gui-subagent/index.d.ts.map +1 -1
  79. package/dist/gui-subagent/index.js +6 -0
  80. package/dist/gui-subagent/index.js.map +1 -1
  81. package/dist/gui-subagent/operator/base-operator.d.ts.map +1 -1
  82. package/dist/gui-subagent/operator/base-operator.js +0 -1
  83. package/dist/gui-subagent/operator/base-operator.js.map +1 -1
  84. package/dist/gui-subagent/operator/computer-operator.d.ts.map +1 -1
  85. package/dist/gui-subagent/operator/computer-operator.js +31 -8
  86. package/dist/gui-subagent/operator/computer-operator.js.map +1 -1
  87. package/dist/gui-subagent/types/actions.d.ts +1 -1
  88. package/dist/gui-subagent/types/actions.d.ts.map +1 -1
  89. package/dist/gui-subagent/types/actions.js +0 -1
  90. package/dist/gui-subagent/types/actions.js.map +1 -1
  91. package/dist/gui-subagent/types/operator.d.ts +1 -1
  92. package/dist/gui-subagent/types/operator.d.ts.map +1 -1
  93. package/dist/index.d.ts +1 -2
  94. package/dist/index.d.ts.map +1 -1
  95. package/dist/index.js +1 -2
  96. package/dist/index.js.map +1 -1
  97. package/dist/input-processor.d.ts.map +1 -1
  98. package/dist/input-processor.js +8 -5
  99. package/dist/input-processor.js.map +1 -1
  100. package/dist/logger.d.ts.map +1 -1
  101. package/dist/logger.js +1 -1
  102. package/dist/logger.js.map +1 -1
  103. package/dist/mcp.d.ts +7 -1
  104. package/dist/mcp.d.ts.map +1 -1
  105. package/dist/mcp.js +157 -49
  106. package/dist/mcp.js.map +1 -1
  107. package/dist/memory.d.ts.map +1 -1
  108. package/dist/memory.js +3 -3
  109. package/dist/memory.js.map +1 -1
  110. package/dist/output-util.d.ts +27 -0
  111. package/dist/output-util.d.ts.map +1 -0
  112. package/dist/output-util.js +74 -0
  113. package/dist/output-util.js.map +1 -0
  114. package/dist/retry.js +1 -1
  115. package/dist/retry.js.map +1 -1
  116. package/dist/ripgrep.d.ts +29 -0
  117. package/dist/ripgrep.d.ts.map +1 -0
  118. package/dist/ripgrep.js +294 -0
  119. package/dist/ripgrep.js.map +1 -0
  120. package/dist/sdk-output-adapter.d.ts +34 -1
  121. package/dist/sdk-output-adapter.d.ts.map +1 -1
  122. package/dist/sdk-output-adapter.js +67 -2
  123. package/dist/sdk-output-adapter.js.map +1 -1
  124. package/dist/sdk-session.d.ts.map +1 -1
  125. package/dist/sdk-session.js +2 -0
  126. package/dist/sdk-session.js.map +1 -1
  127. package/dist/session-manager.js +3 -3
  128. package/dist/session-manager.js.map +1 -1
  129. package/dist/session.d.ts +116 -6
  130. package/dist/session.d.ts.map +1 -1
  131. package/dist/session.js +1416 -448
  132. package/dist/session.js.map +1 -1
  133. package/dist/shell.d.ts +33 -0
  134. package/dist/shell.d.ts.map +1 -0
  135. package/dist/shell.js +126 -0
  136. package/dist/shell.js.map +1 -0
  137. package/dist/skill-installer.d.ts +38 -0
  138. package/dist/skill-installer.d.ts.map +1 -0
  139. package/dist/skill-installer.js +447 -0
  140. package/dist/skill-installer.js.map +1 -0
  141. package/dist/skill-invoker.d.ts +8 -2
  142. package/dist/skill-invoker.d.ts.map +1 -1
  143. package/dist/skill-invoker.js +36 -15
  144. package/dist/skill-invoker.js.map +1 -1
  145. package/dist/skill-loader.d.ts +8 -3
  146. package/dist/skill-loader.d.ts.map +1 -1
  147. package/dist/skill-loader.js +51 -48
  148. package/dist/skill-loader.js.map +1 -1
  149. package/dist/skill-manager.d.ts +85 -0
  150. package/dist/skill-manager.d.ts.map +1 -0
  151. package/dist/skill-manager.js +341 -0
  152. package/dist/skill-manager.js.map +1 -0
  153. package/dist/slash-commands.d.ts +39 -2
  154. package/dist/slash-commands.d.ts.map +1 -1
  155. package/dist/slash-commands.js +934 -305
  156. package/dist/slash-commands.js.map +1 -1
  157. package/dist/smart-approval.d.ts +20 -1
  158. package/dist/smart-approval.d.ts.map +1 -1
  159. package/dist/smart-approval.js +125 -56
  160. package/dist/smart-approval.js.map +1 -1
  161. package/dist/system-prompt-generator.d.ts +6 -0
  162. package/dist/system-prompt-generator.d.ts.map +1 -1
  163. package/dist/system-prompt-generator.js +86 -36
  164. package/dist/system-prompt-generator.js.map +1 -1
  165. package/dist/terminal.d.ts +28 -0
  166. package/dist/terminal.d.ts.map +1 -0
  167. package/dist/terminal.js +82 -0
  168. package/dist/terminal.js.map +1 -0
  169. package/dist/theme.d.ts.map +1 -1
  170. package/dist/theme.js +8 -7
  171. package/dist/theme.js.map +1 -1
  172. package/dist/tools.d.ts +38 -7
  173. package/dist/tools.d.ts.map +1 -1
  174. package/dist/tools.js +1249 -617
  175. package/dist/tools.js.map +1 -1
  176. package/dist/truncate.d.ts +55 -0
  177. package/dist/truncate.d.ts.map +1 -0
  178. package/dist/truncate.js +130 -0
  179. package/dist/truncate.js.map +1 -0
  180. package/dist/types.d.ts +84 -9
  181. package/dist/types.d.ts.map +1 -1
  182. package/dist/types.js +49 -0
  183. package/dist/types.js.map +1 -1
  184. package/dist/update.d.ts.map +1 -1
  185. package/dist/update.js +28 -36
  186. package/dist/update.js.map +1 -1
  187. package/dist/workflow.d.ts +5 -1
  188. package/dist/workflow.d.ts.map +1 -1
  189. package/dist/workflow.js +61 -49
  190. package/dist/workflow.js.map +1 -1
  191. package/docs/architecture/mcp-integration-guide.md +304 -194
  192. package/docs/architecture/overview.md +169 -169
  193. package/docs/architecture/tool-system-design.md +134 -134
  194. package/docs/cli/commands.md +349 -238
  195. package/docs/smart-mode.md +281 -281
  196. package/docs/third-party-models.md +440 -439
  197. package/find-skills/SKILL.md +133 -0
  198. package/package.json +91 -90
  199. package/scripts/install-ripgrep.js +241 -0
  200. package/src/agents.ts +7 -3
  201. package/src/ai-client/factory.ts +116 -0
  202. package/src/ai-client/index.ts +61 -0
  203. package/src/ai-client/providers/anthropic.ts +475 -0
  204. package/src/ai-client/providers/openai.ts +348 -0
  205. package/src/ai-client/providers/remote.ts +439 -0
  206. package/src/ai-client/registry.ts +97 -0
  207. package/src/ai-client/types.ts +364 -0
  208. package/src/ai-client-factory.ts +204 -0
  209. package/src/auth.ts +661 -614
  210. package/src/cancellation.ts +202 -176
  211. package/src/checkpoint.ts +255 -219
  212. package/src/cli.ts +1523 -743
  213. package/src/config.ts +341 -297
  214. package/src/context-compressor.ts +987 -290
  215. package/src/conversation.ts +290 -288
  216. package/src/gui-subagent/action-parser/actionParser.ts +318 -315
  217. package/src/gui-subagent/action-parser/constants.ts +14 -14
  218. package/src/gui-subagent/action-parser/index.ts +8 -8
  219. package/src/gui-subagent/action-parser/types.ts +31 -31
  220. package/src/gui-subagent/agent/gui-agent.ts +1234 -1089
  221. package/src/gui-subagent/agent/index.ts +5 -5
  222. package/src/gui-subagent/index.ts +185 -163
  223. package/src/gui-subagent/operator/base-operator.ts +244 -245
  224. package/src/gui-subagent/operator/computer-operator.ts +541 -520
  225. package/src/gui-subagent/operator/index.ts +6 -6
  226. package/src/gui-subagent/types/actions.ts +260 -262
  227. package/src/gui-subagent/types/index.ts +6 -6
  228. package/src/gui-subagent/types/operator.ts +106 -106
  229. package/src/gui-subagent/utils.ts +51 -51
  230. package/src/index.ts +17 -18
  231. package/src/input-processor.ts +8 -5
  232. package/src/logger.ts +436 -438
  233. package/src/mcp.ts +793 -682
  234. package/src/memory.ts +343 -344
  235. package/src/output-util.ts +80 -0
  236. package/src/retry.ts +1 -1
  237. package/src/ripgrep.ts +370 -0
  238. package/src/sdk-output-adapter.ts +842 -0
  239. package/src/sdk-session.ts +62 -0
  240. package/src/session-manager.ts +308 -308
  241. package/src/session.ts +1775 -573
  242. package/src/shell.ts +134 -0
  243. package/src/skill-installer.ts +518 -0
  244. package/src/skill-invoker.ts +959 -935
  245. package/src/skill-loader.ts +501 -496
  246. package/src/skill-manager.ts +385 -0
  247. package/src/slash-commands.ts +2189 -1389
  248. package/src/smart-approval.ts +193 -74
  249. package/src/system-prompt-generator.ts +91 -36
  250. package/src/terminal.ts +96 -0
  251. package/src/theme.ts +739 -738
  252. package/src/tools.ts +1790 -931
  253. package/src/truncate.ts +173 -0
  254. package/src/types.ts +337 -198
  255. package/src/update.ts +33 -40
  256. package/src/workflow.ts +521 -508
  257. package/test/cli-launch.test.ts +279 -0
  258. package/tsconfig.json +22 -22
  259. package/vitest.config.ts +21 -19
  260. package/dist/ai-client.d.ts +0 -86
  261. package/dist/ai-client.d.ts.map +0 -1
  262. package/dist/ai-client.js +0 -1372
  263. package/dist/ai-client.js.map +0 -1
  264. package/dist/gui-subagent/operator/browser-operator.d.ts +0 -36
  265. package/dist/gui-subagent/operator/browser-operator.d.ts.map +0 -1
  266. package/dist/gui-subagent/operator/browser-operator.js +0 -306
  267. package/dist/gui-subagent/operator/browser-operator.js.map +0 -1
  268. package/dist/gui-subagent/operator/desktop-operator.d.ts +0 -55
  269. package/dist/gui-subagent/operator/desktop-operator.d.ts.map +0 -1
  270. package/dist/gui-subagent/operator/desktop-operator.js +0 -527
  271. package/dist/gui-subagent/operator/desktop-operator.js.map +0 -1
  272. package/dist/hook.d.ts +0 -73
  273. package/dist/hook.d.ts.map +0 -1
  274. package/dist/hook.js +0 -156
  275. package/dist/hook.js.map +0 -1
  276. package/dist/input-history.d.ts +0 -24
  277. package/dist/input-history.d.ts.map +0 -1
  278. package/dist/input-history.js +0 -94
  279. package/dist/input-history.js.map +0 -1
  280. package/dist/keyboard-manager.d.ts +0 -151
  281. package/dist/keyboard-manager.d.ts.map +0 -1
  282. package/dist/keyboard-manager.js +0 -396
  283. package/dist/keyboard-manager.js.map +0 -1
  284. package/dist/print-system-prompt.d.ts +0 -2
  285. package/dist/print-system-prompt.d.ts.map +0 -1
  286. package/dist/print-system-prompt.js +0 -40
  287. package/dist/print-system-prompt.js.map +0 -1
  288. package/dist/remote-ai-client.d.ts +0 -104
  289. package/dist/remote-ai-client.d.ts.map +0 -1
  290. package/dist/remote-ai-client.js +0 -552
  291. package/dist/remote-ai-client.js.map +0 -1
  292. package/dist/sdk-session-v2.d.ts +0 -13
  293. package/dist/sdk-session-v2.d.ts.map +0 -1
  294. package/dist/sdk-session-v2.js +0 -46
  295. package/dist/sdk-session-v2.js.map +0 -1
  296. package/dist/test-boundary-conditions.d.ts.map +0 -1
  297. package/dist/test-boundary-conditions.js.map +0 -1
  298. package/dist/test-cancellation-fix.d.ts.map +0 -1
  299. package/dist/test-cancellation-fix.js.map +0 -1
  300. package/dist/test-input-history.d.ts.map +0 -1
  301. package/dist/test-input-history.js.map +0 -1
  302. package/dist/test-interaction-flow.d.ts.map +0 -1
  303. package/dist/test-interaction-flow.js.map +0 -1
  304. package/dist/test-quick.d.ts.map +0 -1
  305. package/dist/test-quick.js.map +0 -1
  306. package/dist/test-user-interaction.d.ts.map +0 -1
  307. package/dist/test-user-interaction.js.map +0 -1
  308. package/dist/tools/edit-diff.d.ts +0 -32
  309. package/dist/tools/edit-diff.d.ts.map +0 -1
  310. package/dist/tools/edit-diff.js +0 -185
  311. package/dist/tools/edit-diff.js.map +0 -1
  312. package/dist/tools/edit.d.ts +0 -11
  313. package/dist/tools/edit.d.ts.map +0 -1
  314. package/dist/tools/edit.js +0 -129
  315. package/dist/tools/edit.js.map +0 -1
  316. package/dist/unified-session.d.ts +0 -42
  317. package/dist/unified-session.d.ts.map +0 -1
  318. package/dist/unified-session.js +0 -271
  319. package/dist/unified-session.js.map +0 -1
  320. package/skills/.claude-plugin/marketplace.json +0 -45
  321. package/skills/README.md +0 -94
  322. package/skills/THIRD_PARTY_NOTICES.md +0 -405
  323. package/skills/skills/algorithmic-art/LICENSE.txt +0 -202
  324. package/skills/skills/algorithmic-art/SKILL.md +0 -405
  325. package/skills/skills/algorithmic-art/templates/generator_template.js +0 -223
  326. package/skills/skills/algorithmic-art/templates/viewer.html +0 -599
  327. package/skills/skills/brand-guidelines/LICENSE.txt +0 -202
  328. package/skills/skills/brand-guidelines/SKILL.md +0 -73
  329. package/skills/skills/canvas-design/LICENSE.txt +0 -202
  330. package/skills/skills/canvas-design/SKILL.md +0 -130
  331. package/skills/skills/canvas-design/canvas-fonts/ArsenalSC-OFL.txt +0 -93
  332. package/skills/skills/canvas-design/canvas-fonts/ArsenalSC-Regular.ttf +0 -0
  333. package/skills/skills/canvas-design/canvas-fonts/BigShoulders-Bold.ttf +0 -0
  334. package/skills/skills/canvas-design/canvas-fonts/BigShoulders-OFL.txt +0 -93
  335. package/skills/skills/canvas-design/canvas-fonts/BigShoulders-Regular.ttf +0 -0
  336. package/skills/skills/canvas-design/canvas-fonts/Boldonse-OFL.txt +0 -93
  337. package/skills/skills/canvas-design/canvas-fonts/Boldonse-Regular.ttf +0 -0
  338. package/skills/skills/canvas-design/canvas-fonts/BricolageGrotesque-Bold.ttf +0 -0
  339. package/skills/skills/canvas-design/canvas-fonts/BricolageGrotesque-OFL.txt +0 -93
  340. package/skills/skills/canvas-design/canvas-fonts/BricolageGrotesque-Regular.ttf +0 -0
  341. package/skills/skills/canvas-design/canvas-fonts/CrimsonPro-Bold.ttf +0 -0
  342. package/skills/skills/canvas-design/canvas-fonts/CrimsonPro-Italic.ttf +0 -0
  343. package/skills/skills/canvas-design/canvas-fonts/CrimsonPro-OFL.txt +0 -93
  344. package/skills/skills/canvas-design/canvas-fonts/CrimsonPro-Regular.ttf +0 -0
  345. package/skills/skills/canvas-design/canvas-fonts/DMMono-OFL.txt +0 -93
  346. package/skills/skills/canvas-design/canvas-fonts/DMMono-Regular.ttf +0 -0
  347. package/skills/skills/canvas-design/canvas-fonts/EricaOne-OFL.txt +0 -94
  348. package/skills/skills/canvas-design/canvas-fonts/EricaOne-Regular.ttf +0 -0
  349. package/skills/skills/canvas-design/canvas-fonts/GeistMono-Bold.ttf +0 -0
  350. package/skills/skills/canvas-design/canvas-fonts/GeistMono-OFL.txt +0 -93
  351. package/skills/skills/canvas-design/canvas-fonts/GeistMono-Regular.ttf +0 -0
  352. package/skills/skills/canvas-design/canvas-fonts/Gloock-OFL.txt +0 -93
  353. package/skills/skills/canvas-design/canvas-fonts/Gloock-Regular.ttf +0 -0
  354. package/skills/skills/canvas-design/canvas-fonts/IBMPlexMono-Bold.ttf +0 -0
  355. package/skills/skills/canvas-design/canvas-fonts/IBMPlexMono-OFL.txt +0 -93
  356. package/skills/skills/canvas-design/canvas-fonts/IBMPlexMono-Regular.ttf +0 -0
  357. package/skills/skills/canvas-design/canvas-fonts/IBMPlexSerif-Bold.ttf +0 -0
  358. package/skills/skills/canvas-design/canvas-fonts/IBMPlexSerif-BoldItalic.ttf +0 -0
  359. package/skills/skills/canvas-design/canvas-fonts/IBMPlexSerif-Italic.ttf +0 -0
  360. package/skills/skills/canvas-design/canvas-fonts/IBMPlexSerif-Regular.ttf +0 -0
  361. package/skills/skills/canvas-design/canvas-fonts/InstrumentSans-Bold.ttf +0 -0
  362. package/skills/skills/canvas-design/canvas-fonts/InstrumentSans-BoldItalic.ttf +0 -0
  363. package/skills/skills/canvas-design/canvas-fonts/InstrumentSans-Italic.ttf +0 -0
  364. package/skills/skills/canvas-design/canvas-fonts/InstrumentSans-OFL.txt +0 -93
  365. package/skills/skills/canvas-design/canvas-fonts/InstrumentSans-Regular.ttf +0 -0
  366. package/skills/skills/canvas-design/canvas-fonts/InstrumentSerif-Italic.ttf +0 -0
  367. package/skills/skills/canvas-design/canvas-fonts/InstrumentSerif-Regular.ttf +0 -0
  368. package/skills/skills/canvas-design/canvas-fonts/Italiana-OFL.txt +0 -93
  369. package/skills/skills/canvas-design/canvas-fonts/Italiana-Regular.ttf +0 -0
  370. package/skills/skills/canvas-design/canvas-fonts/JetBrainsMono-Bold.ttf +0 -0
  371. package/skills/skills/canvas-design/canvas-fonts/JetBrainsMono-OFL.txt +0 -93
  372. package/skills/skills/canvas-design/canvas-fonts/JetBrainsMono-Regular.ttf +0 -0
  373. package/skills/skills/canvas-design/canvas-fonts/Jura-Light.ttf +0 -0
  374. package/skills/skills/canvas-design/canvas-fonts/Jura-Medium.ttf +0 -0
  375. package/skills/skills/canvas-design/canvas-fonts/Jura-OFL.txt +0 -93
  376. package/skills/skills/canvas-design/canvas-fonts/LibreBaskerville-OFL.txt +0 -93
  377. package/skills/skills/canvas-design/canvas-fonts/LibreBaskerville-Regular.ttf +0 -0
  378. package/skills/skills/canvas-design/canvas-fonts/Lora-Bold.ttf +0 -0
  379. package/skills/skills/canvas-design/canvas-fonts/Lora-BoldItalic.ttf +0 -0
  380. package/skills/skills/canvas-design/canvas-fonts/Lora-Italic.ttf +0 -0
  381. package/skills/skills/canvas-design/canvas-fonts/Lora-OFL.txt +0 -93
  382. package/skills/skills/canvas-design/canvas-fonts/Lora-Regular.ttf +0 -0
  383. package/skills/skills/canvas-design/canvas-fonts/NationalPark-Bold.ttf +0 -0
  384. package/skills/skills/canvas-design/canvas-fonts/NationalPark-OFL.txt +0 -93
  385. package/skills/skills/canvas-design/canvas-fonts/NationalPark-Regular.ttf +0 -0
  386. package/skills/skills/canvas-design/canvas-fonts/NothingYouCouldDo-OFL.txt +0 -93
  387. package/skills/skills/canvas-design/canvas-fonts/NothingYouCouldDo-Regular.ttf +0 -0
  388. package/skills/skills/canvas-design/canvas-fonts/Outfit-Bold.ttf +0 -0
  389. package/skills/skills/canvas-design/canvas-fonts/Outfit-OFL.txt +0 -93
  390. package/skills/skills/canvas-design/canvas-fonts/Outfit-Regular.ttf +0 -0
  391. package/skills/skills/canvas-design/canvas-fonts/PixelifySans-Medium.ttf +0 -0
  392. package/skills/skills/canvas-design/canvas-fonts/PixelifySans-OFL.txt +0 -93
  393. package/skills/skills/canvas-design/canvas-fonts/PoiretOne-OFL.txt +0 -93
  394. package/skills/skills/canvas-design/canvas-fonts/PoiretOne-Regular.ttf +0 -0
  395. package/skills/skills/canvas-design/canvas-fonts/RedHatMono-Bold.ttf +0 -0
  396. package/skills/skills/canvas-design/canvas-fonts/RedHatMono-OFL.txt +0 -93
  397. package/skills/skills/canvas-design/canvas-fonts/RedHatMono-Regular.ttf +0 -0
  398. package/skills/skills/canvas-design/canvas-fonts/Silkscreen-OFL.txt +0 -93
  399. package/skills/skills/canvas-design/canvas-fonts/Silkscreen-Regular.ttf +0 -0
  400. package/skills/skills/canvas-design/canvas-fonts/SmoochSans-Medium.ttf +0 -0
  401. package/skills/skills/canvas-design/canvas-fonts/SmoochSans-OFL.txt +0 -93
  402. package/skills/skills/canvas-design/canvas-fonts/Tektur-Medium.ttf +0 -0
  403. package/skills/skills/canvas-design/canvas-fonts/Tektur-OFL.txt +0 -93
  404. package/skills/skills/canvas-design/canvas-fonts/Tektur-Regular.ttf +0 -0
  405. package/skills/skills/canvas-design/canvas-fonts/WorkSans-Bold.ttf +0 -0
  406. package/skills/skills/canvas-design/canvas-fonts/WorkSans-BoldItalic.ttf +0 -0
  407. package/skills/skills/canvas-design/canvas-fonts/WorkSans-Italic.ttf +0 -0
  408. package/skills/skills/canvas-design/canvas-fonts/WorkSans-OFL.txt +0 -93
  409. package/skills/skills/canvas-design/canvas-fonts/WorkSans-Regular.ttf +0 -0
  410. package/skills/skills/canvas-design/canvas-fonts/YoungSerif-OFL.txt +0 -93
  411. package/skills/skills/canvas-design/canvas-fonts/YoungSerif-Regular.ttf +0 -0
  412. package/skills/skills/doc-coauthoring/SKILL.md +0 -375
  413. package/skills/skills/docx/LICENSE.txt +0 -30
  414. package/skills/skills/docx/SKILL.md +0 -197
  415. package/skills/skills/docx/docx-js.md +0 -350
  416. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +0 -1499
  417. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +0 -146
  418. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +0 -1085
  419. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +0 -11
  420. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +0 -3081
  421. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +0 -23
  422. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +0 -185
  423. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +0 -287
  424. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +0 -1676
  425. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +0 -28
  426. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +0 -144
  427. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +0 -174
  428. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +0 -25
  429. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +0 -18
  430. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +0 -59
  431. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +0 -56
  432. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +0 -195
  433. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +0 -582
  434. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +0 -25
  435. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +0 -4439
  436. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +0 -570
  437. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +0 -509
  438. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +0 -12
  439. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +0 -108
  440. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +0 -96
  441. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +0 -3646
  442. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +0 -116
  443. package/skills/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +0 -42
  444. package/skills/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +0 -50
  445. package/skills/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +0 -49
  446. package/skills/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +0 -33
  447. package/skills/skills/docx/ooxml/schemas/mce/mc.xsd +0 -75
  448. package/skills/skills/docx/ooxml/schemas/microsoft/wml-2010.xsd +0 -560
  449. package/skills/skills/docx/ooxml/schemas/microsoft/wml-2012.xsd +0 -67
  450. package/skills/skills/docx/ooxml/schemas/microsoft/wml-2018.xsd +0 -14
  451. package/skills/skills/docx/ooxml/schemas/microsoft/wml-cex-2018.xsd +0 -20
  452. package/skills/skills/docx/ooxml/schemas/microsoft/wml-cid-2016.xsd +0 -13
  453. package/skills/skills/docx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +0 -4
  454. package/skills/skills/docx/ooxml/schemas/microsoft/wml-symex-2015.xsd +0 -8
  455. package/skills/skills/docx/ooxml/scripts/pack.py +0 -159
  456. package/skills/skills/docx/ooxml/scripts/unpack.py +0 -29
  457. package/skills/skills/docx/ooxml/scripts/validate.py +0 -69
  458. package/skills/skills/docx/ooxml/scripts/validation/__init__.py +0 -15
  459. package/skills/skills/docx/ooxml/scripts/validation/base.py +0 -951
  460. package/skills/skills/docx/ooxml/scripts/validation/docx.py +0 -274
  461. package/skills/skills/docx/ooxml/scripts/validation/pptx.py +0 -315
  462. package/skills/skills/docx/ooxml/scripts/validation/redlining.py +0 -279
  463. package/skills/skills/docx/ooxml.md +0 -610
  464. package/skills/skills/docx/scripts/__init__.py +0 -1
  465. package/skills/skills/docx/scripts/document.py +0 -1276
  466. package/skills/skills/docx/scripts/templates/comments.xml +0 -3
  467. package/skills/skills/docx/scripts/templates/commentsExtended.xml +0 -3
  468. package/skills/skills/docx/scripts/templates/commentsExtensible.xml +0 -3
  469. package/skills/skills/docx/scripts/templates/commentsIds.xml +0 -3
  470. package/skills/skills/docx/scripts/templates/people.xml +0 -3
  471. package/skills/skills/docx/scripts/utilities.py +0 -374
  472. package/skills/skills/frontend-design/LICENSE.txt +0 -177
  473. package/skills/skills/frontend-design/SKILL.md +0 -42
  474. package/skills/skills/internal-comms/LICENSE.txt +0 -202
  475. package/skills/skills/internal-comms/SKILL.md +0 -32
  476. package/skills/skills/internal-comms/examples/3p-updates.md +0 -47
  477. package/skills/skills/internal-comms/examples/company-newsletter.md +0 -65
  478. package/skills/skills/internal-comms/examples/faq-answers.md +0 -30
  479. package/skills/skills/internal-comms/examples/general-comms.md +0 -16
  480. package/skills/skills/mcp-builder/LICENSE.txt +0 -202
  481. package/skills/skills/mcp-builder/SKILL.md +0 -236
  482. package/skills/skills/mcp-builder/reference/evaluation.md +0 -602
  483. package/skills/skills/mcp-builder/reference/mcp_best_practices.md +0 -249
  484. package/skills/skills/mcp-builder/reference/node_mcp_server.md +0 -970
  485. package/skills/skills/mcp-builder/reference/python_mcp_server.md +0 -719
  486. package/skills/skills/mcp-builder/scripts/connections.py +0 -151
  487. package/skills/skills/mcp-builder/scripts/evaluation.py +0 -373
  488. package/skills/skills/mcp-builder/scripts/example_evaluation.xml +0 -22
  489. package/skills/skills/mcp-builder/scripts/requirements.txt +0 -2
  490. package/skills/skills/pdf/LICENSE.txt +0 -30
  491. package/skills/skills/pdf/SKILL.md +0 -294
  492. package/skills/skills/pdf/forms.md +0 -205
  493. package/skills/skills/pdf/reference.md +0 -612
  494. package/skills/skills/pdf/scripts/check_bounding_boxes.py +0 -70
  495. package/skills/skills/pdf/scripts/check_bounding_boxes_test.py +0 -226
  496. package/skills/skills/pdf/scripts/check_fillable_fields.py +0 -12
  497. package/skills/skills/pdf/scripts/convert_pdf_to_images.py +0 -35
  498. package/skills/skills/pdf/scripts/create_validation_image.py +0 -41
  499. package/skills/skills/pdf/scripts/extract_form_field_info.py +0 -152
  500. package/skills/skills/pdf/scripts/fill_fillable_fields.py +0 -114
  501. package/skills/skills/pdf/scripts/fill_pdf_form_with_annotations.py +0 -108
  502. package/skills/skills/pptx/LICENSE.txt +0 -30
  503. package/skills/skills/pptx/SKILL.md +0 -484
  504. package/skills/skills/pptx/html2pptx.md +0 -625
  505. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +0 -1499
  506. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +0 -146
  507. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +0 -1085
  508. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +0 -11
  509. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +0 -3081
  510. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +0 -23
  511. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +0 -185
  512. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +0 -287
  513. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +0 -1676
  514. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +0 -28
  515. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +0 -144
  516. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +0 -174
  517. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +0 -25
  518. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +0 -18
  519. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +0 -59
  520. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +0 -56
  521. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +0 -195
  522. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +0 -582
  523. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +0 -25
  524. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +0 -4439
  525. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +0 -570
  526. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +0 -509
  527. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +0 -12
  528. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +0 -108
  529. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +0 -96
  530. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +0 -3646
  531. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +0 -116
  532. package/skills/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +0 -42
  533. package/skills/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +0 -50
  534. package/skills/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +0 -49
  535. package/skills/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +0 -33
  536. package/skills/skills/pptx/ooxml/schemas/mce/mc.xsd +0 -75
  537. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-2010.xsd +0 -560
  538. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-2012.xsd +0 -67
  539. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-2018.xsd +0 -14
  540. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-cex-2018.xsd +0 -20
  541. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-cid-2016.xsd +0 -13
  542. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +0 -4
  543. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-symex-2015.xsd +0 -8
  544. package/skills/skills/pptx/ooxml/scripts/pack.py +0 -159
  545. package/skills/skills/pptx/ooxml/scripts/unpack.py +0 -29
  546. package/skills/skills/pptx/ooxml/scripts/validate.py +0 -69
  547. package/skills/skills/pptx/ooxml/scripts/validation/__init__.py +0 -15
  548. package/skills/skills/pptx/ooxml/scripts/validation/base.py +0 -951
  549. package/skills/skills/pptx/ooxml/scripts/validation/docx.py +0 -274
  550. package/skills/skills/pptx/ooxml/scripts/validation/pptx.py +0 -315
  551. package/skills/skills/pptx/ooxml/scripts/validation/redlining.py +0 -279
  552. package/skills/skills/pptx/ooxml.md +0 -427
  553. package/skills/skills/pptx/scripts/html2pptx.js +0 -979
  554. package/skills/skills/pptx/scripts/inventory.py +0 -1020
  555. package/skills/skills/pptx/scripts/rearrange.py +0 -231
  556. package/skills/skills/pptx/scripts/replace.py +0 -385
  557. package/skills/skills/pptx/scripts/thumbnail.py +0 -450
  558. package/skills/skills/skill-creator/LICENSE.txt +0 -202
  559. package/skills/skills/skill-creator/SKILL.md +0 -356
  560. package/skills/skills/skill-creator/references/output-patterns.md +0 -82
  561. package/skills/skills/skill-creator/references/workflows.md +0 -28
  562. package/skills/skills/skill-creator/scripts/init_skill.py +0 -303
  563. package/skills/skills/skill-creator/scripts/package_skill.py +0 -110
  564. package/skills/skills/skill-creator/scripts/quick_validate.py +0 -95
  565. package/skills/skills/slack-gif-creator/LICENSE.txt +0 -202
  566. package/skills/skills/slack-gif-creator/SKILL.md +0 -254
  567. package/skills/skills/slack-gif-creator/core/easing.py +0 -234
  568. package/skills/skills/slack-gif-creator/core/frame_composer.py +0 -176
  569. package/skills/skills/slack-gif-creator/core/gif_builder.py +0 -269
  570. package/skills/skills/slack-gif-creator/core/validators.py +0 -136
  571. package/skills/skills/slack-gif-creator/requirements.txt +0 -4
  572. package/skills/skills/theme-factory/LICENSE.txt +0 -202
  573. package/skills/skills/theme-factory/SKILL.md +0 -59
  574. package/skills/skills/theme-factory/theme-showcase.pdf +0 -0
  575. package/skills/skills/theme-factory/themes/arctic-frost.md +0 -19
  576. package/skills/skills/theme-factory/themes/botanical-garden.md +0 -19
  577. package/skills/skills/theme-factory/themes/desert-rose.md +0 -19
  578. package/skills/skills/theme-factory/themes/forest-canopy.md +0 -19
  579. package/skills/skills/theme-factory/themes/golden-hour.md +0 -19
  580. package/skills/skills/theme-factory/themes/midnight-galaxy.md +0 -19
  581. package/skills/skills/theme-factory/themes/modern-minimalist.md +0 -19
  582. package/skills/skills/theme-factory/themes/ocean-depths.md +0 -19
  583. package/skills/skills/theme-factory/themes/sunset-boulevard.md +0 -19
  584. package/skills/skills/theme-factory/themes/tech-innovation.md +0 -19
  585. package/skills/skills/web-artifacts-builder/LICENSE.txt +0 -202
  586. package/skills/skills/web-artifacts-builder/SKILL.md +0 -74
  587. package/skills/skills/web-artifacts-builder/scripts/bundle-artifact.sh +0 -54
  588. package/skills/skills/web-artifacts-builder/scripts/init-artifact.sh +0 -322
  589. package/skills/skills/webapp-testing/LICENSE.txt +0 -202
  590. package/skills/skills/webapp-testing/SKILL.md +0 -96
  591. package/skills/skills/webapp-testing/examples/console_logging.py +0 -35
  592. package/skills/skills/webapp-testing/examples/element_discovery.py +0 -40
  593. package/skills/skills/webapp-testing/examples/static_html_automation.py +0 -33
  594. package/skills/skills/webapp-testing/scripts/with_server.py +0 -106
  595. package/skills/skills/xlsx/LICENSE.txt +0 -30
  596. package/skills/skills/xlsx/SKILL.md +0 -289
  597. package/skills/skills/xlsx/recalc.py +0 -178
  598. package/skills/spec/agent-skills-spec.md +0 -3
  599. package/skills/template/SKILL.md +0 -6
  600. package/src/ai-client.ts +0 -1560
  601. package/src/remote-ai-client.ts +0 -664
  602. /package/{.eslintrc.js → .eslintrc.cjs} +0 -0
@@ -1,520 +1,541 @@
1
- /**
2
- * Computer Operator using @computer-use/nut-js
3
- * Provides desktop automation capabilities for gui-subagent
4
- * Based on UI-TARS NutJSOperator implementation
5
- *
6
- * This implementation is aligned with packages/ui-tars/operators/nut-js/src/index.ts
7
- */
8
-
9
- import {
10
- screen,
11
- Button,
12
- Key,
13
- Point,
14
- centerOf,
15
- keyboard,
16
- mouse,
17
- sleep,
18
- straightTo,
19
- clipboard,
20
- } from '@computer-use/nut-js';
21
- import { Jimp } from 'jimp';
22
- import type { OperatorConfig, ScreenContext, ScreenshotOutput, ExecuteParams, ExecuteOutput } from '../types/operator.js';
23
- import { Operator, type OperatorManual, parseBoxToScreenCoords } from './base-operator.js';
24
- import { getLogger } from '../../logger.js';
25
-
26
- const guiLogger = getLogger();
27
-
28
- export interface ComputerOperatorOptions {
29
- config?: OperatorConfig;
30
- computerConfig?: Record<string, any>;
31
- logger?: any;
32
- }
33
-
34
- export class ComputerOperator extends Operator {
35
- private config: OperatorConfig;
36
- private logger: any;
37
- private screenCtx: ScreenContext | null = null;
38
-
39
- constructor(options: ComputerOperatorOptions = {}) {
40
- super();
41
- this.config = options.config || {};
42
- this.logger = options.logger || guiLogger;
43
- }
44
-
45
- protected async initialize(): Promise<void> {
46
- this.logger.debug('Initializing computer operator...');
47
-
48
- try {
49
- const { width, height, scaleFactor } = await this.getScreenSize();
50
- this.screenCtx = {
51
- width,
52
- height,
53
- scaleFactor,
54
- };
55
-
56
- this.logger.debug(`Computer operator initialized: ${width}x${height} @ ${scaleFactor}x`);
57
- } catch (error) {
58
- this.logger.error('Failed to initialize computer operator:', error);
59
- throw error;
60
- }
61
- }
62
-
63
- private async getScreenSize(): Promise<{ width: number; height: number; scaleFactor: number }> {
64
- try {
65
- const grabImage = await screen.grab();
66
- const screenWithScale = await grabImage.toRGB();
67
- const scaleFactor = screenWithScale.pixelDensity.scaleX;
68
- const width = screenWithScale.width / scaleFactor;
69
- const height = screenWithScale.height / scaleFactor;
70
- return { width, height, scaleFactor };
71
- } catch {
72
- return {
73
- width: this.config.viewport?.width || 1920,
74
- height: this.config.viewport?.height || 1080,
75
- scaleFactor: this.config.deviceScaleFactor || 1,
76
- };
77
- }
78
- }
79
-
80
- getSupportedActions(): string[] {
81
- return [
82
- 'click',
83
- 'left_click',
84
- 'left_single',
85
- 'left_double',
86
- 'double_click',
87
- 'right_click',
88
- 'right_single',
89
- 'middle_click',
90
- 'mouse_move',
91
- 'hover',
92
- 'drag',
93
- 'left_click_drag',
94
- 'select',
95
- 'scroll',
96
- 'type',
97
- 'hotkey',
98
- 'press',
99
- 'release',
100
- 'open_url',
101
- 'wait',
102
- 'finished',
103
- 'user_stop',
104
- 'error_env',
105
- 'call_user',
106
- ];
107
- }
108
-
109
- protected screenContext(): ScreenContext {
110
- if (!this.screenCtx) {
111
- throw new Error('Screen context not initialized');
112
- }
113
- return this.screenCtx;
114
- }
115
-
116
- protected async screenshot(): Promise<ScreenshotOutput> {
117
- try {
118
- const grabImage = await screen.grab();
119
- const screenWithScale = await grabImage.toRGB();
120
- const scaleFactor = screenWithScale.pixelDensity.scaleX;
121
-
122
- const screenWithScaleImage = await Jimp.fromBitmap({
123
- width: screenWithScale.width,
124
- height: screenWithScale.height,
125
- data: Buffer.from(screenWithScale.data),
126
- });
127
-
128
- const width = screenWithScale.width / scaleFactor;
129
- const height = screenWithScale.height / scaleFactor;
130
-
131
- const physicalScreenImage = await screenWithScaleImage
132
- .resize({
133
- w: width,
134
- h: height,
135
- })
136
- .getBuffer('image/png');
137
-
138
- this.logger.debug(`[ComputerOperator] screenshot: ${width}x${height}, scaleFactor: ${scaleFactor}`);
139
-
140
- return {
141
- status: 'success',
142
- base64: physicalScreenImage.toString('base64'),
143
- scaleFactor,
144
- };
145
- } catch (error) {
146
- const errorMsg = error instanceof Error ? error.message : 'Unknown error';
147
- this.logger.warn(`[ComputerOperator] Screenshot failed: ${errorMsg}`);
148
- return {
149
- status: 'failed',
150
- errorMessage: errorMsg,
151
- };
152
- }
153
- }
154
-
155
- protected async execute(params: ExecuteParams): Promise<ExecuteOutput> {
156
- const { parsedPrediction, screenWidth, screenHeight, scaleFactor } = params;
157
- const { action_type, action_inputs } = parsedPrediction;
158
-
159
- // Empty or invalid action should return failed to avoid infinite loop
160
- if (!action_type || action_type.trim() === '') {
161
- this.logger.warn(`[ComputerOperator] Empty action, skipping step`);
162
- return {
163
- status: 'failed',
164
- errorMessage: 'Empty or invalid action type'
165
- };
166
- }
167
-
168
- const startBoxStr = action_inputs?.start_box || '';
169
- const { x: startX, y: startY } = parseBoxToScreenCoords({
170
- boxStr: startBoxStr,
171
- screenWidth,
172
- screenHeight,
173
- });
174
-
175
- mouse.config.mouseSpeed = 3600;
176
-
177
- // this.logger.debug('[ComputerOperator] execute', { action_type, startX, startY, scaleFactor });
178
-
179
- try {
180
- const result = await this.executeAction(action_type, action_inputs, { startX, startY, screenWidth, screenHeight, scaleFactor });
181
- if (result === 'end') {
182
- return { status: 'end' };
183
- }
184
-
185
- return { status: 'success' };
186
- } catch (error) {
187
- this.logger.error(`Failed to execute action ${action_type}:`, error);
188
- return {
189
- status: 'failed',
190
- errorMessage: (error as Error).message,
191
- };
192
- }
193
- }
194
-
195
- private async executeAction(
196
- actionType: string,
197
- inputs: Record<string, any>,
198
- context: { startX: number; startY: number; screenWidth: number; screenHeight: number; scaleFactor: number }
199
- ): Promise<'end' | void> {
200
- const { startX, startY, screenWidth, screenHeight, scaleFactor } = context;
201
-
202
- const moveStraightTo = async (x: number, y: number) => {
203
- await mouse.move(straightTo(new Point(x, y)));
204
- };
205
-
206
- const getHotkeys = (keyStr: string | undefined): Key[] => {
207
- if (keyStr) {
208
- const platformCommandKey = process.platform === 'darwin' ? Key.LeftCmd : Key.LeftWin;
209
- const platformCtrlKey = process.platform === 'darwin' ? Key.LeftCmd : Key.LeftControl;
210
- const keyMap = {
211
- return: Key.Enter,
212
- ctrl: platformCtrlKey,
213
- shift: Key.LeftShift,
214
- alt: Key.LeftAlt,
215
- 'page down': Key.PageDown,
216
- 'page up': Key.PageUp,
217
- meta: platformCommandKey,
218
- win: platformCommandKey,
219
- command: platformCommandKey,
220
- cmd: platformCommandKey,
221
- ',': Key.Comma,
222
- arrowup: Key.Up,
223
- arrowdown: Key.Down,
224
- arrowleft: Key.Left,
225
- arrowright: Key.Right,
226
- } as const;
227
-
228
- const lowercaseKeyMap = Object.fromEntries(
229
- Object.entries(Key).map(([k, v]) => [k.toLowerCase(), v]),
230
- ) as {
231
- [K in keyof typeof Key as Lowercase<K>]: (typeof Key)[K];
232
- };
233
-
234
- const keys = keyStr
235
- .split(/[\s+]+/)
236
- .map((k) => k.toLowerCase())
237
- .map(
238
- (k) =>
239
- keyMap[k as keyof typeof keyMap] ??
240
- lowercaseKeyMap[k as Lowercase<keyof typeof Key>],
241
- )
242
- .filter(Boolean);
243
- this.logger.debug('[ComputerOperator] hotkey:', keys);
244
- return keys;
245
- }
246
- return [];
247
- };
248
-
249
- switch (actionType) {
250
- case 'wait':
251
- this.logger.debug('[ComputerOperator] wait', inputs);
252
- await sleep(5000);
253
- break;
254
-
255
- case 'mouse_move':
256
- case 'hover':
257
- this.logger.debug('[ComputerOperator] mouse_move');
258
- await moveStraightTo(startX, startY);
259
- break;
260
-
261
- case 'click':
262
- case 'left_click':
263
- case 'left_single':
264
- this.logger.debug('[ComputerOperator] left_click');
265
- await moveStraightTo(startX, startY);
266
- await sleep(100);
267
- await mouse.click(Button.LEFT);
268
- break;
269
-
270
- case 'left_double':
271
- case 'double_click':
272
- this.logger.debug(`[ComputerOperator] ${actionType}(${startX}, ${startY})`);
273
- await moveStraightTo(startX, startY);
274
- await sleep(100);
275
- await mouse.doubleClick(Button.LEFT);
276
- break;
277
-
278
- case 'right_click':
279
- case 'right_single':
280
- this.logger.debug('[ComputerOperator] right_click');
281
- await moveStraightTo(startX, startY);
282
- await sleep(100);
283
- await mouse.click(Button.RIGHT);
284
- break;
285
-
286
- case 'middle_click':
287
- this.logger.debug('[ComputerOperator] middle_click');
288
- await moveStraightTo(startX, startY);
289
- await mouse.click(Button.MIDDLE);
290
- break;
291
-
292
- case 'drag':
293
- case 'left_click_drag':
294
- case 'select': {
295
- const endBoxStr = inputs?.end_box || '';
296
- if (endBoxStr) {
297
- const { x: endX, y: endY } = parseBoxToScreenCoords({
298
- boxStr: endBoxStr,
299
- screenWidth,
300
- screenHeight,
301
- });
302
-
303
- if (startX && startY && endX && endY) {
304
- this.logger.debug(
305
- `[ComputerOperator] drag coordinates: startX=${startX}, startY=${startY}, endX=${endX}, endY=${endY}`,
306
- );
307
- await moveStraightTo(startX, startY);
308
- await sleep(100);
309
- await mouse.drag(straightTo(new Point(endX, endY)));
310
- }
311
- }
312
- break;
313
- }
314
-
315
- case 'type': {
316
- const content = inputs.content?.trim();
317
- this.logger.debug('[ComputerOperator] type', content);
318
- if (content) {
319
- const stripContent = content.replace(/\\n$/, '').replace(/\n$/, '');
320
- keyboard.config.autoDelayMs = 0;
321
- if (process.platform === 'win32') {
322
- const originalClipboard = await clipboard.getContent();
323
- await clipboard.setContent(stripContent);
324
- await keyboard.pressKey(Key.LeftControl, Key.V);
325
- await sleep(50);
326
- await keyboard.releaseKey(Key.LeftControl, Key.V);
327
- await sleep(50);
328
- await clipboard.setContent(originalClipboard);
329
- } else {
330
- await keyboard.type(stripContent);
331
- }
332
-
333
- if (content.endsWith('\n') || content.endsWith('\\n')) {
334
- await keyboard.pressKey(Key.Enter);
335
- await keyboard.releaseKey(Key.Enter);
336
- }
337
-
338
- keyboard.config.autoDelayMs = 500;
339
- }
340
- break;
341
- }
342
-
343
- case 'hotkey': {
344
- const keyStr = inputs?.key || inputs?.hotkey;
345
- const keys = getHotkeys(keyStr);
346
- if (keys.length > 0) {
347
- await keyboard.pressKey(...keys);
348
- await keyboard.releaseKey(...keys);
349
- }
350
- break;
351
- }
352
-
353
- case 'press': {
354
- const keyStr = inputs?.key || inputs?.hotkey;
355
- const keys = getHotkeys(keyStr);
356
- if (keys.length > 0) {
357
- await keyboard.pressKey(...keys);
358
- }
359
- break;
360
- }
361
-
362
- case 'release': {
363
- const keyStr = inputs?.key || inputs?.hotkey;
364
- const keys = getHotkeys(keyStr);
365
- if (keys.length > 0) {
366
- await keyboard.releaseKey(...keys);
367
- }
368
- break;
369
- }
370
-
371
- case 'scroll': {
372
- const { direction } = inputs;
373
- if (startX !== null && startY !== null) {
374
- await moveStraightTo(startX, startY);
375
- }
376
-
377
- switch (direction?.toLowerCase()) {
378
- case 'up':
379
- await mouse.scrollUp(5 * 100);
380
- break;
381
- case 'down':
382
- await mouse.scrollDown(5 * 100);
383
- break;
384
- default:
385
- this.logger.warn(`[ComputerOperator] Unsupported scroll direction: ${direction}`);
386
- }
387
- break;
388
- }
389
-
390
- case 'open_url': {
391
- let url = inputs?.url || inputs?.content;
392
- if (!url) {
393
- throw new Error('No URL specified for open_url action');
394
- }
395
-
396
- // Ensure URL has protocol
397
- if (!/^https?:\/\//i.test(url)) {
398
- url = 'https://' + url;
399
- }
400
-
401
- this.logger.debug(`[ComputerOperator] Opening URL: ${url}`);
402
-
403
- // Use system command to open URL in default browser
404
- const { exec } = await import('child_process');
405
- const platform = process.platform;
406
-
407
- if (platform === 'win32') {
408
- // Windows: use start command
409
- await new Promise<void>((resolve, reject) => {
410
- exec(`start "" "${url}"`, (error) => {
411
- if (error) {
412
- this.logger.warn(`[ComputerOperator] Failed to open URL with start command: ${error.message}`);
413
- // Fallback: try using PowerShell
414
- exec(`powershell -Command "Start-Process '${url}'"`, (psError) => {
415
- if (psError) {
416
- reject(psError);
417
- } else {
418
- resolve();
419
- }
420
- });
421
- } else {
422
- resolve();
423
- }
424
- });
425
- });
426
- } else if (platform === 'darwin') {
427
- // macOS: use open command
428
- await new Promise<void>((resolve, reject) => {
429
- exec(`open "${url}"`, (error) => {
430
- if (error) {
431
- reject(error);
432
- } else {
433
- resolve();
434
- }
435
- });
436
- });
437
- } else {
438
- // Linux: use xdg-open
439
- await new Promise<void>((resolve, reject) => {
440
- exec(`xdg-open "${url}"`, (error) => {
441
- if (error) {
442
- reject(error);
443
- } else {
444
- resolve();
445
- }
446
- });
447
- });
448
- }
449
-
450
- // Wait for browser to open and page to load
451
- await sleep(2000);
452
- break;
453
- }
454
-
455
- case 'error_env':
456
- case 'call_user':
457
- case 'finished':
458
- case 'user_stop':
459
- this.logger.debug(`[ComputerOperator] ${actionType}`);
460
- return 'end';
461
-
462
- default:
463
- this.logger.warn(`[ComputerOperator] Unsupported action: ${actionType}`);
464
- }
465
- }
466
-
467
- async cleanup(): Promise<void> {
468
- this.logger.debug('Cleaning up computer operator...');
469
- }
470
-
471
- async destroyInstance(): Promise<void> {
472
- this.logger.debug('Destroying computer operator instance...');
473
- await this.cleanup();
474
- }
475
-
476
- static override get MANUAL(): OperatorManual {
477
- return {
478
- ACTION_SPACES: [
479
- // Mouse actions
480
- `click(start_box='[x1, y1, x2, y2]') # Single click (taskbar icons)`,
481
- `left_double(start_box='[x1, y1, x2, y2]') # Double click (desktop icons/folders)`,
482
- `right_single(start_box='[x1, y1, x2, y2]') # Right click`,
483
- `drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]') # Drag`,
484
-
485
- // Keyboard actions
486
- `hotkey(key='') # e.g., 'ctrl c', 'alt tab' (max 3 keys)`,
487
- `type(content='') # Use "\\n" at the end to submit`,
488
- `press(key='') # Single key press: 'enter', 'esc', 'tab', 'win', etc.`,
489
-
490
- // Navigation
491
- `open_url(url='https://xxx') # Open URL in default browser`,
492
-
493
- // Scroll
494
- `scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')`,
495
-
496
- // System
497
- `wait() # Sleep 5s and take a screenshot`,
498
- `finished() # Task completed`,
499
- `call_user() # Need user's help`,
500
- ],
501
-
502
- KEY_SPACE: {
503
- 'enter': 'Enter key',
504
- 'esc': 'Escape key',
505
- 'tab': 'Tab key',
506
- 'win': 'Windows key (or Command on Mac)',
507
- 'delete': 'Delete key',
508
- 'backspace': 'Backspace key',
509
- 'page up': 'Page Up',
510
- 'page down': 'Page Down',
511
- 'home': 'Home key',
512
- 'end': 'End key',
513
- 'arrow up': 'Up arrow',
514
- 'arrow down': 'Down arrow',
515
- 'arrow left': 'Left arrow',
516
- 'arrow right': 'Right arrow',
517
- },
518
- };
519
- }
520
- }
1
+ /**
2
+ * Computer Operator using @computer-use/nut-js
3
+ * Provides desktop automation capabilities for gui-subagent
4
+ * Based on UI-TARS NutJSOperator implementation
5
+ *
6
+ * This implementation is aligned with packages/ui-tars/operators/nut-js/src/index.ts
7
+ */
8
+
9
+ import {
10
+ screen,
11
+ Button,
12
+ Key,
13
+ Point,
14
+ keyboard,
15
+ mouse,
16
+ sleep,
17
+ straightTo,
18
+ clipboard,
19
+ } from '@computer-use/nut-js';
20
+ import { Jimp } from 'jimp';
21
+ import type { OperatorConfig, ScreenContext, ScreenshotOutput, ExecuteParams, ExecuteOutput } from '../types/operator.js';
22
+ import { Operator, type OperatorManual, parseBoxToScreenCoords } from './base-operator.js';
23
+ import { getLogger } from '../../logger.js';
24
+
25
+ const guiLogger = getLogger();
26
+
27
+ export interface ComputerOperatorOptions {
28
+ config?: OperatorConfig;
29
+ computerConfig?: Record<string, any>;
30
+ logger?: any;
31
+ }
32
+
33
+ export class ComputerOperator extends Operator {
34
+ private config: OperatorConfig;
35
+ private logger: any;
36
+ private screenCtx: ScreenContext | null = null;
37
+
38
+ constructor(options: ComputerOperatorOptions = {}) {
39
+ super();
40
+ this.config = options.config || {};
41
+ this.logger = options.logger || guiLogger;
42
+ }
43
+
44
+ protected async initialize(): Promise<void> {
45
+ this.logger.debug('Initializing computer operator...');
46
+
47
+ try {
48
+ const { width, height, scaleFactor } = await this.getScreenSize();
49
+ this.screenCtx = {
50
+ width,
51
+ height,
52
+ scaleFactor,
53
+ };
54
+
55
+ this.logger.debug(`Computer operator initialized: ${width}x${height} @ ${scaleFactor}x`);
56
+ } catch (error) {
57
+ this.logger.error('Failed to initialize computer operator:', error);
58
+ throw error;
59
+ }
60
+ }
61
+
62
+ private async getScreenSize(): Promise<{ width: number; height: number; scaleFactor: number }> {
63
+ try {
64
+ const grabImage = await screen.grab();
65
+ const screenWithScale = await grabImage.toRGB();
66
+ const scaleFactor = screenWithScale.pixelDensity.scaleX;
67
+ const width = screenWithScale.width / scaleFactor;
68
+ const height = screenWithScale.height / scaleFactor;
69
+ return { width, height, scaleFactor };
70
+ } catch {
71
+ return {
72
+ width: this.config.viewport?.width || 1920,
73
+ height: this.config.viewport?.height || 1080,
74
+ scaleFactor: this.config.deviceScaleFactor || 1,
75
+ };
76
+ }
77
+ }
78
+
79
+ getSupportedActions(): string[] {
80
+ return [
81
+ 'click',
82
+ 'left_click',
83
+ 'left_single',
84
+ 'left_double',
85
+ 'double_click',
86
+ 'right_click',
87
+ 'right_single',
88
+ 'middle_click',
89
+ 'mouse_move',
90
+ 'hover',
91
+ 'drag',
92
+ 'left_click_drag',
93
+ 'select',
94
+ 'scroll',
95
+ 'type',
96
+ 'hotkey',
97
+ 'press',
98
+ 'release',
99
+ 'open_url',
100
+ 'wait',
101
+ 'finished',
102
+ 'user_stop',
103
+ 'error_env',
104
+ ];
105
+ }
106
+
107
+ protected screenContext(): ScreenContext {
108
+ if (!this.screenCtx) {
109
+ throw new Error('Screen context not initialized');
110
+ }
111
+ return this.screenCtx;
112
+ }
113
+
114
+ protected async screenshot(): Promise<ScreenshotOutput> {
115
+ try {
116
+ const grabImage = await screen.grab();
117
+ const screenWithScale = await grabImage.toRGB();
118
+ const scaleFactor = screenWithScale.pixelDensity.scaleX;
119
+
120
+ const screenWithScaleImage = await Jimp.fromBitmap({
121
+ width: screenWithScale.width,
122
+ height: screenWithScale.height,
123
+ data: Buffer.from(screenWithScale.data),
124
+ });
125
+
126
+ const width = screenWithScale.width / scaleFactor;
127
+ const height = screenWithScale.height / scaleFactor;
128
+
129
+ const physicalScreenImage = await screenWithScaleImage
130
+ .resize({
131
+ w: width,
132
+ h: height,
133
+ })
134
+ .getBuffer('image/png');
135
+
136
+ this.logger.debug(`[ComputerOperator] screenshot: ${width}x${height}, scaleFactor: ${scaleFactor}`);
137
+
138
+ return {
139
+ status: 'success',
140
+ base64: physicalScreenImage.toString('base64'),
141
+ scaleFactor,
142
+ };
143
+ } catch (error) {
144
+ const errorMsg = error instanceof Error ? error.message : 'Unknown error';
145
+ this.logger.warn(`[ComputerOperator] Screenshot failed: ${errorMsg}`);
146
+ return {
147
+ status: 'failed',
148
+ errorMessage: errorMsg,
149
+ };
150
+ }
151
+ }
152
+
153
+ protected async execute(params: ExecuteParams): Promise<ExecuteOutput> {
154
+ const { parsedPrediction, screenWidth, screenHeight, scaleFactor } = params;
155
+ const { action_type, action_inputs } = parsedPrediction;
156
+
157
+ // Empty or invalid action should return needs_input to let main agent decide
158
+ if (!action_type || action_type.trim() === '') {
159
+ this.logger.debug(`[ComputerOperator] Empty action, returning to main agent for decision`);
160
+ return {
161
+ status: 'needs_input',
162
+ errorMessage: 'Empty or invalid action type - returned to main agent for decision'
163
+ };
164
+ }
165
+
166
+ const startBoxStr = action_inputs?.start_box || '';
167
+ const { x: startX, y: startY } = parseBoxToScreenCoords({
168
+ boxStr: startBoxStr,
169
+ screenWidth,
170
+ screenHeight,
171
+ });
172
+
173
+ mouse.config.mouseSpeed = 3600;
174
+
175
+ // this.logger.debug('[ComputerOperator] execute', { action_type, startX, startY, scaleFactor });
176
+
177
+ try {
178
+ const result = await this.executeAction(action_type, action_inputs, { startX, startY, screenWidth, screenHeight, scaleFactor });
179
+ if (result === 'end') {
180
+ return { status: 'end' };
181
+ }
182
+
183
+ return { status: 'success' };
184
+ } catch (error) {
185
+ this.logger.error(`Failed to execute action ${action_type}:`, error);
186
+ return {
187
+ status: 'failed',
188
+ errorMessage: (error as Error).message,
189
+ };
190
+ }
191
+ }
192
+
193
+ private async executeAction(
194
+ actionType: string,
195
+ inputs: Record<string, any>,
196
+ context: { startX: number; startY: number; screenWidth: number; screenHeight: number; scaleFactor: number }
197
+ ): Promise<'end' | void> {
198
+ const { startX, startY, screenWidth, screenHeight, scaleFactor } = context;
199
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
200
+ void scaleFactor;
201
+
202
+ const moveStraightTo = async (x: number, y: number) => {
203
+ await mouse.move(straightTo(new Point(x, y)));
204
+ };
205
+
206
+ const getHotkeys = (keyStr: string | undefined): Key[] => {
207
+ if (keyStr) {
208
+ const platformCommandKey = process.platform === 'darwin' ? Key.LeftCmd : Key.LeftWin;
209
+ const platformCtrlKey = process.platform === 'darwin' ? Key.LeftCmd : Key.LeftControl;
210
+ const keyMap = {
211
+ return: Key.Enter,
212
+ ctrl: platformCtrlKey,
213
+ shift: Key.LeftShift,
214
+ alt: Key.LeftAlt,
215
+ 'page down': Key.PageDown,
216
+ 'page up': Key.PageUp,
217
+ meta: platformCommandKey,
218
+ win: platformCommandKey,
219
+ command: platformCommandKey,
220
+ cmd: platformCommandKey,
221
+ ',': Key.Comma,
222
+ arrowup: Key.Up,
223
+ arrowdown: Key.Down,
224
+ arrowleft: Key.Left,
225
+ arrowright: Key.Right,
226
+ } as const;
227
+
228
+ const lowercaseKeyMap = Object.fromEntries(
229
+ Object.entries(Key).map(([k, v]) => [k.toLowerCase(), v]),
230
+ ) as {
231
+ [K in keyof typeof Key as Lowercase<K>]: (typeof Key)[K];
232
+ };
233
+
234
+ const keys = keyStr
235
+ .split(/[\s+]+/)
236
+ .map((k) => k.toLowerCase())
237
+ .map(
238
+ (k) =>
239
+ keyMap[k as keyof typeof keyMap] ??
240
+ lowercaseKeyMap[k as Lowercase<keyof typeof Key>],
241
+ )
242
+ .filter(Boolean);
243
+ this.logger.debug('[ComputerOperator] hotkey:', keys);
244
+ return keys;
245
+ }
246
+ return [];
247
+ };
248
+
249
+ switch (actionType) {
250
+ case 'wait':
251
+ this.logger.debug('[ComputerOperator] wait', inputs);
252
+ await sleep(5000);
253
+ break;
254
+
255
+ case 'mouse_move':
256
+ case 'hover':
257
+ this.logger.debug('[ComputerOperator] mouse_move');
258
+ await moveStraightTo(startX, startY);
259
+ break;
260
+
261
+ case 'click':
262
+ case 'left_click':
263
+ case 'left_single':
264
+ this.logger.debug('[ComputerOperator] left_click');
265
+ await moveStraightTo(startX, startY);
266
+ await sleep(100);
267
+ await mouse.click(Button.LEFT);
268
+ break;
269
+
270
+ case 'left_double':
271
+ case 'double_click':
272
+ this.logger.debug(`[ComputerOperator] ${actionType}(${startX}, ${startY})`);
273
+ await moveStraightTo(startX, startY);
274
+ await sleep(100);
275
+ await mouse.doubleClick(Button.LEFT);
276
+ break;
277
+
278
+ case 'right_click':
279
+ case 'right_single':
280
+ this.logger.debug('[ComputerOperator] right_click');
281
+ await moveStraightTo(startX, startY);
282
+ await sleep(100);
283
+ await mouse.click(Button.RIGHT);
284
+ break;
285
+
286
+ case 'middle_click':
287
+ this.logger.debug('[ComputerOperator] middle_click');
288
+ await moveStraightTo(startX, startY);
289
+ await mouse.click(Button.MIDDLE);
290
+ break;
291
+
292
+ case 'drag':
293
+ case 'left_click_drag':
294
+ case 'select': {
295
+ const endBoxStr = inputs?.end_box || '';
296
+ if (endBoxStr) {
297
+ const { x: endX, y: endY } = parseBoxToScreenCoords({
298
+ boxStr: endBoxStr,
299
+ screenWidth,
300
+ screenHeight,
301
+ });
302
+
303
+ if (startX && startY && endX && endY) {
304
+ this.logger.debug(
305
+ `[ComputerOperator] drag coordinates: startX=${startX}, startY=${startY}, endX=${endX}, endY=${endY}`,
306
+ );
307
+ await moveStraightTo(startX, startY);
308
+ await sleep(100);
309
+ await mouse.drag(straightTo(new Point(endX, endY)));
310
+ }
311
+ }
312
+ break;
313
+ }
314
+
315
+ case 'type': {
316
+ const content = inputs.content?.trim();
317
+ this.logger.debug('[ComputerOperator] type', content);
318
+ if (content) {
319
+ const stripContent = content.replace(/\\n$/, '').replace(/\n$/, '');
320
+ keyboard.config.autoDelayMs = 0;
321
+ if (process.platform === 'win32') {
322
+ const originalClipboard = await clipboard.getContent();
323
+ await clipboard.setContent(stripContent);
324
+ await keyboard.pressKey(Key.LeftControl, Key.V);
325
+ await sleep(50);
326
+ await keyboard.releaseKey(Key.LeftControl, Key.V);
327
+ await sleep(50);
328
+ // Restore clipboard content with retry to handle clipboardy occasional panics on Windows
329
+ await restoreClipboardWithRetry(originalClipboard, this.logger);
330
+ } else {
331
+ await keyboard.type(stripContent);
332
+ }
333
+
334
+ if (content.endsWith('\n') || content.endsWith('\\n')) {
335
+ await keyboard.pressKey(Key.Enter);
336
+ await keyboard.releaseKey(Key.Enter);
337
+ }
338
+
339
+ keyboard.config.autoDelayMs = 500;
340
+ }
341
+ break;
342
+ }
343
+
344
+ case 'hotkey': {
345
+ const keyStr = inputs?.key || inputs?.hotkey;
346
+ const keys = getHotkeys(keyStr);
347
+ if (keys.length > 0) {
348
+ await keyboard.pressKey(...keys);
349
+ await keyboard.releaseKey(...keys);
350
+ }
351
+ break;
352
+ }
353
+
354
+ case 'press': {
355
+ const keyStr = inputs?.key || inputs?.hotkey;
356
+ const keys = getHotkeys(keyStr);
357
+ if (keys.length > 0) {
358
+ await keyboard.pressKey(...keys);
359
+ }
360
+ break;
361
+ }
362
+
363
+ case 'release': {
364
+ const keyStr = inputs?.key || inputs?.hotkey;
365
+ const keys = getHotkeys(keyStr);
366
+ if (keys.length > 0) {
367
+ await keyboard.releaseKey(...keys);
368
+ }
369
+ break;
370
+ }
371
+
372
+ case 'scroll': {
373
+ const { direction } = inputs;
374
+ if (startX !== null && startY !== null) {
375
+ await moveStraightTo(startX, startY);
376
+ }
377
+
378
+ switch (direction?.toLowerCase()) {
379
+ case 'up':
380
+ await mouse.scrollUp(5 * 100);
381
+ break;
382
+ case 'down':
383
+ await mouse.scrollDown(5 * 100);
384
+ break;
385
+ default:
386
+ this.logger.warn(`[ComputerOperator] Unsupported scroll direction: ${direction}`);
387
+ }
388
+ break;
389
+ }
390
+
391
+ case 'open_url': {
392
+ let url = inputs?.url || inputs?.content;
393
+ if (!url) {
394
+ throw new Error('No URL specified for open_url action');
395
+ }
396
+
397
+ // Ensure URL has protocol
398
+ if (!/^https?:\/\//i.test(url)) {
399
+ url = 'https://' + url;
400
+ }
401
+
402
+ this.logger.debug(`[ComputerOperator] Opening URL: ${url}`);
403
+
404
+ // Use system command to open URL in default browser
405
+ const { exec } = await import('child_process');
406
+ const platform = process.platform;
407
+
408
+ if (platform === 'win32') {
409
+ // Windows: use start command
410
+ await new Promise<void>((resolve, reject) => {
411
+ exec(`start "" "${url}"`, (error) => {
412
+ if (error) {
413
+ this.logger.warn(`[ComputerOperator] Failed to open URL with start command: ${error.message}`);
414
+ // Fallback: try using PowerShell
415
+ exec(`powershell -Command "Start-Process '${url}'"`, (psError) => {
416
+ if (psError) {
417
+ reject(psError);
418
+ } else {
419
+ resolve();
420
+ }
421
+ });
422
+ } else {
423
+ resolve();
424
+ }
425
+ });
426
+ });
427
+ } else if (platform === 'darwin') {
428
+ // macOS: use open command
429
+ await new Promise<void>((resolve, reject) => {
430
+ exec(`open "${url}"`, (error) => {
431
+ if (error) {
432
+ reject(error);
433
+ } else {
434
+ resolve();
435
+ }
436
+ });
437
+ });
438
+ } else {
439
+ // Linux: use xdg-open
440
+ await new Promise<void>((resolve, reject) => {
441
+ exec(`xdg-open "${url}"`, (error) => {
442
+ if (error) {
443
+ reject(error);
444
+ } else {
445
+ resolve();
446
+ }
447
+ });
448
+ });
449
+ }
450
+
451
+ // Wait for browser to open and page to load
452
+ await sleep(2000);
453
+ break;
454
+ }
455
+
456
+ case 'error_env':
457
+ case 'finished':
458
+ case 'user_stop':
459
+ this.logger.debug(`[ComputerOperator] ${actionType}`);
460
+ return 'end';
461
+
462
+ default:
463
+ this.logger.warn(`[ComputerOperator] Unsupported action: ${actionType}`);
464
+ }
465
+ }
466
+
467
+ async cleanup(): Promise<void> {
468
+ this.logger.debug('Cleaning up computer operator...');
469
+ }
470
+
471
+ async destroyInstance(): Promise<void> {
472
+ this.logger.debug('Destroying computer operator instance...');
473
+ await this.cleanup();
474
+ }
475
+
476
+ static override get MANUAL(): OperatorManual {
477
+ return {
478
+ ACTION_SPACES: [
479
+ // Mouse actions
480
+ `click(start_box='[x1, y1, x2, y2]') # Single click (taskbar icons)`,
481
+ `left_double(start_box='[x1, y1, x2, y2]') # Double click (desktop icons/folders)`,
482
+ `right_single(start_box='[x1, y1, x2, y2]') # Right click`,
483
+ `drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]') # Drag`,
484
+
485
+ // Keyboard actions
486
+ `hotkey(key='') # e.g., 'ctrl c', 'alt tab' (max 3 keys)`,
487
+ `type(content='') # Use "\\n" at the end to submit`,
488
+ `press(key='') # Single key press: 'enter', 'esc', 'tab', 'win', etc.`,
489
+
490
+ // Navigation
491
+ `open_url(url='https://xxx') # Open URL in default browser`,
492
+
493
+ // Scroll
494
+ `scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')`,
495
+
496
+ // System
497
+ `wait() # Sleep 5s and take a screenshot`,
498
+ `finished() # Task completed`,
499
+ ],
500
+
501
+ KEY_SPACE: {
502
+ 'enter': 'Enter key',
503
+ 'esc': 'Escape key',
504
+ 'tab': 'Tab key',
505
+ 'win': 'Windows key (or Command on Mac)',
506
+ 'delete': 'Delete key',
507
+ 'backspace': 'Backspace key',
508
+ 'page up': 'Page Up',
509
+ 'page down': 'Page Down',
510
+ 'home': 'Home key',
511
+ 'end': 'End key',
512
+ 'arrow up': 'Up arrow',
513
+ 'arrow down': 'Down arrow',
514
+ 'arrow left': 'Left arrow',
515
+ 'arrow right': 'Right arrow',
516
+ },
517
+ };
518
+ }
519
+ }
520
+
521
+ /**
522
+ * Restore clipboard content with retry mechanism.
523
+ * Handles clipboardy occasional panics on Windows gracefully.
524
+ */
525
+ async function restoreClipboardWithRetry(content: string, logger: any): Promise<void> {
526
+ const maxRetries = 2;
527
+ for (let i = 0; i < maxRetries; i++) {
528
+ try {
529
+ await clipboard.setContent(content);
530
+ return;
531
+ } catch (restoreError: any) {
532
+ if (i === maxRetries - 1) {
533
+ // Last retry failed - log warning and give up
534
+ logger.warn('[ComputerOperator] Failed to restore clipboard content after retries:', restoreError?.message || restoreError);
535
+ } else {
536
+ // Retry after brief delay
537
+ await sleep(100);
538
+ }
539
+ }
540
+ }
541
+ }