@xagent-ai/cli 1.2.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (568) hide show
  1. package/.github/ISSUE_TEMPLATE/bug_report.md +38 -38
  2. package/.github/ISSUE_TEMPLATE/feature_request.md +20 -20
  3. package/.github/workflows/ci.yml +72 -0
  4. package/.github/workflows/release.yml +109 -0
  5. package/.gitmodules +3 -3
  6. package/README.md +326 -280
  7. package/README_CN.md +325 -279
  8. package/dist/ai-client/factory.d.ts +52 -0
  9. package/dist/ai-client/factory.d.ts.map +1 -0
  10. package/dist/ai-client/factory.js +132 -0
  11. package/dist/ai-client/factory.js.map +1 -0
  12. package/dist/ai-client/index.d.ts +20 -0
  13. package/dist/ai-client/index.d.ts.map +1 -0
  14. package/dist/ai-client/index.js +49 -0
  15. package/dist/ai-client/index.js.map +1 -0
  16. package/dist/ai-client/providers/anthropic.d.ts +57 -0
  17. package/dist/ai-client/providers/anthropic.d.ts.map +1 -0
  18. package/dist/ai-client/providers/anthropic.js +400 -0
  19. package/dist/ai-client/providers/anthropic.js.map +1 -0
  20. package/dist/ai-client/providers/openai.d.ts +57 -0
  21. package/dist/ai-client/providers/openai.d.ts.map +1 -0
  22. package/dist/ai-client/providers/openai.js +286 -0
  23. package/dist/ai-client/providers/openai.js.map +1 -0
  24. package/dist/ai-client/providers/remote.d.ts +111 -0
  25. package/dist/ai-client/providers/remote.d.ts.map +1 -0
  26. package/dist/ai-client/providers/remote.js +351 -0
  27. package/dist/ai-client/providers/remote.js.map +1 -0
  28. package/dist/ai-client/registry.d.ts +51 -0
  29. package/dist/ai-client/registry.d.ts.map +1 -0
  30. package/dist/ai-client/registry.js +81 -0
  31. package/dist/ai-client/registry.js.map +1 -0
  32. package/dist/ai-client/types.d.ts +260 -0
  33. package/dist/ai-client/types.d.ts.map +1 -0
  34. package/dist/ai-client/types.js +73 -0
  35. package/dist/ai-client/types.js.map +1 -0
  36. package/dist/ai-client-factory.d.ts +62 -0
  37. package/dist/ai-client-factory.d.ts.map +1 -0
  38. package/dist/ai-client-factory.js +157 -0
  39. package/dist/ai-client-factory.js.map +1 -0
  40. package/dist/auth.d.ts +23 -1
  41. package/dist/auth.d.ts.map +1 -1
  42. package/dist/auth.js +160 -168
  43. package/dist/auth.js.map +1 -1
  44. package/dist/cancellation.d.ts +5 -4
  45. package/dist/cancellation.d.ts.map +1 -1
  46. package/dist/cancellation.js +55 -32
  47. package/dist/cancellation.js.map +1 -1
  48. package/dist/checkpoint.d.ts +1 -1
  49. package/dist/checkpoint.d.ts.map +1 -1
  50. package/dist/checkpoint.js +2 -2
  51. package/dist/checkpoint.js.map +1 -1
  52. package/dist/cli.js +626 -13
  53. package/dist/cli.js.map +1 -1
  54. package/dist/config.d.ts +10 -4
  55. package/dist/config.d.ts.map +1 -1
  56. package/dist/config.js +62 -25
  57. package/dist/config.js.map +1 -1
  58. package/dist/context-compressor.d.ts +81 -16
  59. package/dist/context-compressor.d.ts.map +1 -1
  60. package/dist/context-compressor.js +712 -153
  61. package/dist/context-compressor.js.map +1 -1
  62. package/dist/gui-subagent/action-parser/actionParser.d.ts.map +1 -1
  63. package/dist/gui-subagent/action-parser/actionParser.js +4 -2
  64. package/dist/gui-subagent/action-parser/actionParser.js.map +1 -1
  65. package/dist/gui-subagent/agent/gui-agent.d.ts +29 -2
  66. package/dist/gui-subagent/agent/gui-agent.d.ts.map +1 -1
  67. package/dist/gui-subagent/agent/gui-agent.js +87 -45
  68. package/dist/gui-subagent/agent/gui-agent.js.map +1 -1
  69. package/dist/gui-subagent/index.d.ts +16 -1
  70. package/dist/gui-subagent/index.d.ts.map +1 -1
  71. package/dist/gui-subagent/index.js +4 -0
  72. package/dist/gui-subagent/index.js.map +1 -1
  73. package/dist/gui-subagent/operator/base-operator.d.ts.map +1 -1
  74. package/dist/gui-subagent/operator/base-operator.js +0 -1
  75. package/dist/gui-subagent/operator/base-operator.js.map +1 -1
  76. package/dist/gui-subagent/operator/computer-operator.d.ts.map +1 -1
  77. package/dist/gui-subagent/operator/computer-operator.js +29 -8
  78. package/dist/gui-subagent/operator/computer-operator.js.map +1 -1
  79. package/dist/gui-subagent/types/actions.d.ts +1 -1
  80. package/dist/gui-subagent/types/actions.d.ts.map +1 -1
  81. package/dist/gui-subagent/types/actions.js +0 -1
  82. package/dist/gui-subagent/types/actions.js.map +1 -1
  83. package/dist/gui-subagent/types/operator.d.ts +1 -1
  84. package/dist/gui-subagent/types/operator.d.ts.map +1 -1
  85. package/dist/index.d.ts +1 -2
  86. package/dist/index.d.ts.map +1 -1
  87. package/dist/index.js +1 -2
  88. package/dist/index.js.map +1 -1
  89. package/dist/input-processor.d.ts.map +1 -1
  90. package/dist/input-processor.js +6 -3
  91. package/dist/input-processor.js.map +1 -1
  92. package/dist/mcp.d.ts +5 -0
  93. package/dist/mcp.d.ts.map +1 -1
  94. package/dist/mcp.js +81 -35
  95. package/dist/mcp.js.map +1 -1
  96. package/dist/ripgrep.d.ts +29 -0
  97. package/dist/ripgrep.d.ts.map +1 -0
  98. package/dist/ripgrep.js +292 -0
  99. package/dist/ripgrep.js.map +1 -0
  100. package/dist/session.d.ts +23 -7
  101. package/dist/session.d.ts.map +1 -1
  102. package/dist/session.js +624 -243
  103. package/dist/session.js.map +1 -1
  104. package/dist/shell.d.ts +33 -0
  105. package/dist/shell.d.ts.map +1 -0
  106. package/dist/shell.js +125 -0
  107. package/dist/shell.js.map +1 -0
  108. package/dist/skill-installer.d.ts +38 -0
  109. package/dist/skill-installer.d.ts.map +1 -0
  110. package/dist/skill-installer.js +447 -0
  111. package/dist/skill-installer.js.map +1 -0
  112. package/dist/skill-invoker.d.ts +7 -1
  113. package/dist/skill-invoker.d.ts.map +1 -1
  114. package/dist/skill-invoker.js +34 -13
  115. package/dist/skill-invoker.js.map +1 -1
  116. package/dist/skill-loader.d.ts +8 -3
  117. package/dist/skill-loader.d.ts.map +1 -1
  118. package/dist/skill-loader.js +46 -44
  119. package/dist/skill-loader.js.map +1 -1
  120. package/dist/skill-manager.d.ts +85 -0
  121. package/dist/skill-manager.d.ts.map +1 -0
  122. package/dist/skill-manager.js +340 -0
  123. package/dist/skill-manager.js.map +1 -0
  124. package/dist/slash-commands.d.ts +38 -1
  125. package/dist/slash-commands.d.ts.map +1 -1
  126. package/dist/slash-commands.js +912 -296
  127. package/dist/slash-commands.js.map +1 -1
  128. package/dist/smart-approval.d.ts.map +1 -1
  129. package/dist/smart-approval.js +67 -55
  130. package/dist/smart-approval.js.map +1 -1
  131. package/dist/system-prompt-generator.d.ts +6 -0
  132. package/dist/system-prompt-generator.d.ts.map +1 -1
  133. package/dist/system-prompt-generator.js +84 -34
  134. package/dist/system-prompt-generator.js.map +1 -1
  135. package/dist/terminal.d.ts +28 -0
  136. package/dist/terminal.d.ts.map +1 -0
  137. package/dist/terminal.js +82 -0
  138. package/dist/terminal.js.map +1 -0
  139. package/dist/tools.d.ts +23 -7
  140. package/dist/tools.d.ts.map +1 -1
  141. package/dist/tools.js +797 -437
  142. package/dist/tools.js.map +1 -1
  143. package/dist/truncate.d.ts +55 -0
  144. package/dist/truncate.d.ts.map +1 -0
  145. package/dist/truncate.js +130 -0
  146. package/dist/truncate.js.map +1 -0
  147. package/dist/types.d.ts +27 -9
  148. package/dist/types.d.ts.map +1 -1
  149. package/dist/update.d.ts.map +1 -1
  150. package/dist/update.js +17 -28
  151. package/dist/update.js.map +1 -1
  152. package/dist/workflow.d.ts +5 -1
  153. package/dist/workflow.d.ts.map +1 -1
  154. package/dist/workflow.js +60 -47
  155. package/dist/workflow.js.map +1 -1
  156. package/docs/architecture/mcp-integration-guide.md +304 -194
  157. package/docs/architecture/overview.md +169 -169
  158. package/docs/architecture/tool-system-design.md +134 -134
  159. package/docs/cli/commands.md +349 -238
  160. package/docs/smart-mode.md +281 -281
  161. package/docs/third-party-models.md +439 -439
  162. package/find-skills/SKILL.md +133 -0
  163. package/package.json +89 -90
  164. package/scripts/install-ripgrep.js +241 -0
  165. package/src/ai-client/factory.ts +151 -0
  166. package/src/ai-client/index.ts +61 -0
  167. package/src/ai-client/providers/anthropic.ts +466 -0
  168. package/src/ai-client/providers/openai.ts +342 -0
  169. package/src/ai-client/providers/remote.ts +436 -0
  170. package/src/ai-client/registry.ts +97 -0
  171. package/src/ai-client/types.ts +345 -0
  172. package/src/ai-client-factory.ts +204 -0
  173. package/src/auth.ts +663 -614
  174. package/src/cancellation.ts +205 -176
  175. package/src/checkpoint.ts +219 -219
  176. package/src/cli.ts +1406 -743
  177. package/src/config.ts +341 -297
  178. package/src/context-compressor.ts +982 -290
  179. package/src/conversation.ts +288 -288
  180. package/src/gui-subagent/action-parser/actionParser.ts +318 -315
  181. package/src/gui-subagent/action-parser/constants.ts +14 -14
  182. package/src/gui-subagent/action-parser/index.ts +8 -8
  183. package/src/gui-subagent/action-parser/types.ts +31 -31
  184. package/src/gui-subagent/agent/gui-agent.ts +1151 -1089
  185. package/src/gui-subagent/agent/index.ts +5 -5
  186. package/src/gui-subagent/index.ts +177 -163
  187. package/src/gui-subagent/operator/base-operator.ts +244 -245
  188. package/src/gui-subagent/operator/computer-operator.ts +540 -520
  189. package/src/gui-subagent/operator/index.ts +6 -6
  190. package/src/gui-subagent/types/actions.ts +260 -262
  191. package/src/gui-subagent/types/index.ts +6 -6
  192. package/src/gui-subagent/types/operator.ts +106 -106
  193. package/src/gui-subagent/utils.ts +51 -51
  194. package/src/index.ts +17 -18
  195. package/src/input-processor.ts +6 -3
  196. package/src/logger.ts +438 -438
  197. package/src/mcp.ts +730 -682
  198. package/src/memory.ts +344 -344
  199. package/src/ripgrep.ts +368 -0
  200. package/src/session-manager.ts +308 -308
  201. package/src/session.ts +948 -386
  202. package/src/shell.ts +133 -0
  203. package/src/skill-installer.ts +518 -0
  204. package/src/skill-invoker.ts +960 -935
  205. package/src/skill-loader.ts +501 -496
  206. package/src/skill-manager.ts +384 -0
  207. package/src/slash-commands.ts +2181 -1389
  208. package/src/smart-approval.ts +117 -73
  209. package/src/system-prompt-generator.ts +89 -34
  210. package/src/terminal.ts +96 -0
  211. package/src/theme.ts +738 -738
  212. package/src/tools.ts +1336 -773
  213. package/src/truncate.ts +173 -0
  214. package/src/types.ts +219 -198
  215. package/src/update.ts +22 -32
  216. package/src/workflow.ts +523 -508
  217. package/tsconfig.json +22 -22
  218. package/vitest.config.ts +19 -19
  219. package/dist/ai-client.d.ts +0 -86
  220. package/dist/ai-client.d.ts.map +0 -1
  221. package/dist/ai-client.js +0 -1372
  222. package/dist/ai-client.js.map +0 -1
  223. package/dist/gui-subagent/operator/browser-operator.d.ts +0 -36
  224. package/dist/gui-subagent/operator/browser-operator.d.ts.map +0 -1
  225. package/dist/gui-subagent/operator/browser-operator.js +0 -306
  226. package/dist/gui-subagent/operator/browser-operator.js.map +0 -1
  227. package/dist/gui-subagent/operator/desktop-operator.d.ts +0 -55
  228. package/dist/gui-subagent/operator/desktop-operator.d.ts.map +0 -1
  229. package/dist/gui-subagent/operator/desktop-operator.js +0 -527
  230. package/dist/gui-subagent/operator/desktop-operator.js.map +0 -1
  231. package/dist/hook.d.ts +0 -73
  232. package/dist/hook.d.ts.map +0 -1
  233. package/dist/hook.js +0 -156
  234. package/dist/hook.js.map +0 -1
  235. package/dist/input-history.d.ts +0 -24
  236. package/dist/input-history.d.ts.map +0 -1
  237. package/dist/input-history.js +0 -94
  238. package/dist/input-history.js.map +0 -1
  239. package/dist/keyboard-manager.d.ts +0 -151
  240. package/dist/keyboard-manager.d.ts.map +0 -1
  241. package/dist/keyboard-manager.js +0 -396
  242. package/dist/keyboard-manager.js.map +0 -1
  243. package/dist/print-system-prompt.d.ts +0 -2
  244. package/dist/print-system-prompt.d.ts.map +0 -1
  245. package/dist/print-system-prompt.js +0 -40
  246. package/dist/print-system-prompt.js.map +0 -1
  247. package/dist/remote-ai-client.d.ts +0 -104
  248. package/dist/remote-ai-client.d.ts.map +0 -1
  249. package/dist/remote-ai-client.js +0 -552
  250. package/dist/remote-ai-client.js.map +0 -1
  251. package/dist/sdk-output-adapter.d.ts +0 -232
  252. package/dist/sdk-output-adapter.d.ts.map +0 -1
  253. package/dist/sdk-output-adapter.js +0 -636
  254. package/dist/sdk-output-adapter.js.map +0 -1
  255. package/dist/sdk-session-v2.d.ts +0 -13
  256. package/dist/sdk-session-v2.d.ts.map +0 -1
  257. package/dist/sdk-session-v2.js +0 -46
  258. package/dist/sdk-session-v2.js.map +0 -1
  259. package/dist/sdk-session.d.ts +0 -13
  260. package/dist/sdk-session.d.ts.map +0 -1
  261. package/dist/sdk-session.js +0 -48
  262. package/dist/sdk-session.js.map +0 -1
  263. package/dist/test-boundary-conditions.d.ts.map +0 -1
  264. package/dist/test-boundary-conditions.js.map +0 -1
  265. package/dist/test-cancellation-fix.d.ts.map +0 -1
  266. package/dist/test-cancellation-fix.js.map +0 -1
  267. package/dist/test-input-history.d.ts.map +0 -1
  268. package/dist/test-input-history.js.map +0 -1
  269. package/dist/test-interaction-flow.d.ts.map +0 -1
  270. package/dist/test-interaction-flow.js.map +0 -1
  271. package/dist/test-quick.d.ts.map +0 -1
  272. package/dist/test-quick.js.map +0 -1
  273. package/dist/test-user-interaction.d.ts.map +0 -1
  274. package/dist/test-user-interaction.js.map +0 -1
  275. package/dist/tools/edit-diff.d.ts +0 -32
  276. package/dist/tools/edit-diff.d.ts.map +0 -1
  277. package/dist/tools/edit-diff.js +0 -185
  278. package/dist/tools/edit-diff.js.map +0 -1
  279. package/dist/tools/edit.d.ts +0 -11
  280. package/dist/tools/edit.d.ts.map +0 -1
  281. package/dist/tools/edit.js +0 -129
  282. package/dist/tools/edit.js.map +0 -1
  283. package/dist/unified-session.d.ts +0 -42
  284. package/dist/unified-session.d.ts.map +0 -1
  285. package/dist/unified-session.js +0 -271
  286. package/dist/unified-session.js.map +0 -1
  287. package/skills/.claude-plugin/marketplace.json +0 -45
  288. package/skills/README.md +0 -94
  289. package/skills/THIRD_PARTY_NOTICES.md +0 -405
  290. package/skills/skills/algorithmic-art/LICENSE.txt +0 -202
  291. package/skills/skills/algorithmic-art/SKILL.md +0 -405
  292. package/skills/skills/algorithmic-art/templates/generator_template.js +0 -223
  293. package/skills/skills/algorithmic-art/templates/viewer.html +0 -599
  294. package/skills/skills/brand-guidelines/LICENSE.txt +0 -202
  295. package/skills/skills/brand-guidelines/SKILL.md +0 -73
  296. package/skills/skills/canvas-design/LICENSE.txt +0 -202
  297. package/skills/skills/canvas-design/SKILL.md +0 -130
  298. package/skills/skills/canvas-design/canvas-fonts/ArsenalSC-OFL.txt +0 -93
  299. package/skills/skills/canvas-design/canvas-fonts/ArsenalSC-Regular.ttf +0 -0
  300. package/skills/skills/canvas-design/canvas-fonts/BigShoulders-Bold.ttf +0 -0
  301. package/skills/skills/canvas-design/canvas-fonts/BigShoulders-OFL.txt +0 -93
  302. package/skills/skills/canvas-design/canvas-fonts/BigShoulders-Regular.ttf +0 -0
  303. package/skills/skills/canvas-design/canvas-fonts/Boldonse-OFL.txt +0 -93
  304. package/skills/skills/canvas-design/canvas-fonts/Boldonse-Regular.ttf +0 -0
  305. package/skills/skills/canvas-design/canvas-fonts/BricolageGrotesque-Bold.ttf +0 -0
  306. package/skills/skills/canvas-design/canvas-fonts/BricolageGrotesque-OFL.txt +0 -93
  307. package/skills/skills/canvas-design/canvas-fonts/BricolageGrotesque-Regular.ttf +0 -0
  308. package/skills/skills/canvas-design/canvas-fonts/CrimsonPro-Bold.ttf +0 -0
  309. package/skills/skills/canvas-design/canvas-fonts/CrimsonPro-Italic.ttf +0 -0
  310. package/skills/skills/canvas-design/canvas-fonts/CrimsonPro-OFL.txt +0 -93
  311. package/skills/skills/canvas-design/canvas-fonts/CrimsonPro-Regular.ttf +0 -0
  312. package/skills/skills/canvas-design/canvas-fonts/DMMono-OFL.txt +0 -93
  313. package/skills/skills/canvas-design/canvas-fonts/DMMono-Regular.ttf +0 -0
  314. package/skills/skills/canvas-design/canvas-fonts/EricaOne-OFL.txt +0 -94
  315. package/skills/skills/canvas-design/canvas-fonts/EricaOne-Regular.ttf +0 -0
  316. package/skills/skills/canvas-design/canvas-fonts/GeistMono-Bold.ttf +0 -0
  317. package/skills/skills/canvas-design/canvas-fonts/GeistMono-OFL.txt +0 -93
  318. package/skills/skills/canvas-design/canvas-fonts/GeistMono-Regular.ttf +0 -0
  319. package/skills/skills/canvas-design/canvas-fonts/Gloock-OFL.txt +0 -93
  320. package/skills/skills/canvas-design/canvas-fonts/Gloock-Regular.ttf +0 -0
  321. package/skills/skills/canvas-design/canvas-fonts/IBMPlexMono-Bold.ttf +0 -0
  322. package/skills/skills/canvas-design/canvas-fonts/IBMPlexMono-OFL.txt +0 -93
  323. package/skills/skills/canvas-design/canvas-fonts/IBMPlexMono-Regular.ttf +0 -0
  324. package/skills/skills/canvas-design/canvas-fonts/IBMPlexSerif-Bold.ttf +0 -0
  325. package/skills/skills/canvas-design/canvas-fonts/IBMPlexSerif-BoldItalic.ttf +0 -0
  326. package/skills/skills/canvas-design/canvas-fonts/IBMPlexSerif-Italic.ttf +0 -0
  327. package/skills/skills/canvas-design/canvas-fonts/IBMPlexSerif-Regular.ttf +0 -0
  328. package/skills/skills/canvas-design/canvas-fonts/InstrumentSans-Bold.ttf +0 -0
  329. package/skills/skills/canvas-design/canvas-fonts/InstrumentSans-BoldItalic.ttf +0 -0
  330. package/skills/skills/canvas-design/canvas-fonts/InstrumentSans-Italic.ttf +0 -0
  331. package/skills/skills/canvas-design/canvas-fonts/InstrumentSans-OFL.txt +0 -93
  332. package/skills/skills/canvas-design/canvas-fonts/InstrumentSans-Regular.ttf +0 -0
  333. package/skills/skills/canvas-design/canvas-fonts/InstrumentSerif-Italic.ttf +0 -0
  334. package/skills/skills/canvas-design/canvas-fonts/InstrumentSerif-Regular.ttf +0 -0
  335. package/skills/skills/canvas-design/canvas-fonts/Italiana-OFL.txt +0 -93
  336. package/skills/skills/canvas-design/canvas-fonts/Italiana-Regular.ttf +0 -0
  337. package/skills/skills/canvas-design/canvas-fonts/JetBrainsMono-Bold.ttf +0 -0
  338. package/skills/skills/canvas-design/canvas-fonts/JetBrainsMono-OFL.txt +0 -93
  339. package/skills/skills/canvas-design/canvas-fonts/JetBrainsMono-Regular.ttf +0 -0
  340. package/skills/skills/canvas-design/canvas-fonts/Jura-Light.ttf +0 -0
  341. package/skills/skills/canvas-design/canvas-fonts/Jura-Medium.ttf +0 -0
  342. package/skills/skills/canvas-design/canvas-fonts/Jura-OFL.txt +0 -93
  343. package/skills/skills/canvas-design/canvas-fonts/LibreBaskerville-OFL.txt +0 -93
  344. package/skills/skills/canvas-design/canvas-fonts/LibreBaskerville-Regular.ttf +0 -0
  345. package/skills/skills/canvas-design/canvas-fonts/Lora-Bold.ttf +0 -0
  346. package/skills/skills/canvas-design/canvas-fonts/Lora-BoldItalic.ttf +0 -0
  347. package/skills/skills/canvas-design/canvas-fonts/Lora-Italic.ttf +0 -0
  348. package/skills/skills/canvas-design/canvas-fonts/Lora-OFL.txt +0 -93
  349. package/skills/skills/canvas-design/canvas-fonts/Lora-Regular.ttf +0 -0
  350. package/skills/skills/canvas-design/canvas-fonts/NationalPark-Bold.ttf +0 -0
  351. package/skills/skills/canvas-design/canvas-fonts/NationalPark-OFL.txt +0 -93
  352. package/skills/skills/canvas-design/canvas-fonts/NationalPark-Regular.ttf +0 -0
  353. package/skills/skills/canvas-design/canvas-fonts/NothingYouCouldDo-OFL.txt +0 -93
  354. package/skills/skills/canvas-design/canvas-fonts/NothingYouCouldDo-Regular.ttf +0 -0
  355. package/skills/skills/canvas-design/canvas-fonts/Outfit-Bold.ttf +0 -0
  356. package/skills/skills/canvas-design/canvas-fonts/Outfit-OFL.txt +0 -93
  357. package/skills/skills/canvas-design/canvas-fonts/Outfit-Regular.ttf +0 -0
  358. package/skills/skills/canvas-design/canvas-fonts/PixelifySans-Medium.ttf +0 -0
  359. package/skills/skills/canvas-design/canvas-fonts/PixelifySans-OFL.txt +0 -93
  360. package/skills/skills/canvas-design/canvas-fonts/PoiretOne-OFL.txt +0 -93
  361. package/skills/skills/canvas-design/canvas-fonts/PoiretOne-Regular.ttf +0 -0
  362. package/skills/skills/canvas-design/canvas-fonts/RedHatMono-Bold.ttf +0 -0
  363. package/skills/skills/canvas-design/canvas-fonts/RedHatMono-OFL.txt +0 -93
  364. package/skills/skills/canvas-design/canvas-fonts/RedHatMono-Regular.ttf +0 -0
  365. package/skills/skills/canvas-design/canvas-fonts/Silkscreen-OFL.txt +0 -93
  366. package/skills/skills/canvas-design/canvas-fonts/Silkscreen-Regular.ttf +0 -0
  367. package/skills/skills/canvas-design/canvas-fonts/SmoochSans-Medium.ttf +0 -0
  368. package/skills/skills/canvas-design/canvas-fonts/SmoochSans-OFL.txt +0 -93
  369. package/skills/skills/canvas-design/canvas-fonts/Tektur-Medium.ttf +0 -0
  370. package/skills/skills/canvas-design/canvas-fonts/Tektur-OFL.txt +0 -93
  371. package/skills/skills/canvas-design/canvas-fonts/Tektur-Regular.ttf +0 -0
  372. package/skills/skills/canvas-design/canvas-fonts/WorkSans-Bold.ttf +0 -0
  373. package/skills/skills/canvas-design/canvas-fonts/WorkSans-BoldItalic.ttf +0 -0
  374. package/skills/skills/canvas-design/canvas-fonts/WorkSans-Italic.ttf +0 -0
  375. package/skills/skills/canvas-design/canvas-fonts/WorkSans-OFL.txt +0 -93
  376. package/skills/skills/canvas-design/canvas-fonts/WorkSans-Regular.ttf +0 -0
  377. package/skills/skills/canvas-design/canvas-fonts/YoungSerif-OFL.txt +0 -93
  378. package/skills/skills/canvas-design/canvas-fonts/YoungSerif-Regular.ttf +0 -0
  379. package/skills/skills/doc-coauthoring/SKILL.md +0 -375
  380. package/skills/skills/docx/LICENSE.txt +0 -30
  381. package/skills/skills/docx/SKILL.md +0 -197
  382. package/skills/skills/docx/docx-js.md +0 -350
  383. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +0 -1499
  384. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +0 -146
  385. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +0 -1085
  386. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +0 -11
  387. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +0 -3081
  388. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +0 -23
  389. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +0 -185
  390. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +0 -287
  391. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +0 -1676
  392. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +0 -28
  393. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +0 -144
  394. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +0 -174
  395. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +0 -25
  396. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +0 -18
  397. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +0 -59
  398. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +0 -56
  399. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +0 -195
  400. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +0 -582
  401. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +0 -25
  402. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +0 -4439
  403. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +0 -570
  404. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +0 -509
  405. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +0 -12
  406. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +0 -108
  407. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +0 -96
  408. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +0 -3646
  409. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +0 -116
  410. package/skills/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +0 -42
  411. package/skills/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +0 -50
  412. package/skills/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +0 -49
  413. package/skills/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +0 -33
  414. package/skills/skills/docx/ooxml/schemas/mce/mc.xsd +0 -75
  415. package/skills/skills/docx/ooxml/schemas/microsoft/wml-2010.xsd +0 -560
  416. package/skills/skills/docx/ooxml/schemas/microsoft/wml-2012.xsd +0 -67
  417. package/skills/skills/docx/ooxml/schemas/microsoft/wml-2018.xsd +0 -14
  418. package/skills/skills/docx/ooxml/schemas/microsoft/wml-cex-2018.xsd +0 -20
  419. package/skills/skills/docx/ooxml/schemas/microsoft/wml-cid-2016.xsd +0 -13
  420. package/skills/skills/docx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +0 -4
  421. package/skills/skills/docx/ooxml/schemas/microsoft/wml-symex-2015.xsd +0 -8
  422. package/skills/skills/docx/ooxml/scripts/pack.py +0 -159
  423. package/skills/skills/docx/ooxml/scripts/unpack.py +0 -29
  424. package/skills/skills/docx/ooxml/scripts/validate.py +0 -69
  425. package/skills/skills/docx/ooxml/scripts/validation/__init__.py +0 -15
  426. package/skills/skills/docx/ooxml/scripts/validation/base.py +0 -951
  427. package/skills/skills/docx/ooxml/scripts/validation/docx.py +0 -274
  428. package/skills/skills/docx/ooxml/scripts/validation/pptx.py +0 -315
  429. package/skills/skills/docx/ooxml/scripts/validation/redlining.py +0 -279
  430. package/skills/skills/docx/ooxml.md +0 -610
  431. package/skills/skills/docx/scripts/__init__.py +0 -1
  432. package/skills/skills/docx/scripts/document.py +0 -1276
  433. package/skills/skills/docx/scripts/templates/comments.xml +0 -3
  434. package/skills/skills/docx/scripts/templates/commentsExtended.xml +0 -3
  435. package/skills/skills/docx/scripts/templates/commentsExtensible.xml +0 -3
  436. package/skills/skills/docx/scripts/templates/commentsIds.xml +0 -3
  437. package/skills/skills/docx/scripts/templates/people.xml +0 -3
  438. package/skills/skills/docx/scripts/utilities.py +0 -374
  439. package/skills/skills/frontend-design/LICENSE.txt +0 -177
  440. package/skills/skills/frontend-design/SKILL.md +0 -42
  441. package/skills/skills/internal-comms/LICENSE.txt +0 -202
  442. package/skills/skills/internal-comms/SKILL.md +0 -32
  443. package/skills/skills/internal-comms/examples/3p-updates.md +0 -47
  444. package/skills/skills/internal-comms/examples/company-newsletter.md +0 -65
  445. package/skills/skills/internal-comms/examples/faq-answers.md +0 -30
  446. package/skills/skills/internal-comms/examples/general-comms.md +0 -16
  447. package/skills/skills/mcp-builder/LICENSE.txt +0 -202
  448. package/skills/skills/mcp-builder/SKILL.md +0 -236
  449. package/skills/skills/mcp-builder/reference/evaluation.md +0 -602
  450. package/skills/skills/mcp-builder/reference/mcp_best_practices.md +0 -249
  451. package/skills/skills/mcp-builder/reference/node_mcp_server.md +0 -970
  452. package/skills/skills/mcp-builder/reference/python_mcp_server.md +0 -719
  453. package/skills/skills/mcp-builder/scripts/connections.py +0 -151
  454. package/skills/skills/mcp-builder/scripts/evaluation.py +0 -373
  455. package/skills/skills/mcp-builder/scripts/example_evaluation.xml +0 -22
  456. package/skills/skills/mcp-builder/scripts/requirements.txt +0 -2
  457. package/skills/skills/pdf/LICENSE.txt +0 -30
  458. package/skills/skills/pdf/SKILL.md +0 -294
  459. package/skills/skills/pdf/forms.md +0 -205
  460. package/skills/skills/pdf/reference.md +0 -612
  461. package/skills/skills/pdf/scripts/check_bounding_boxes.py +0 -70
  462. package/skills/skills/pdf/scripts/check_bounding_boxes_test.py +0 -226
  463. package/skills/skills/pdf/scripts/check_fillable_fields.py +0 -12
  464. package/skills/skills/pdf/scripts/convert_pdf_to_images.py +0 -35
  465. package/skills/skills/pdf/scripts/create_validation_image.py +0 -41
  466. package/skills/skills/pdf/scripts/extract_form_field_info.py +0 -152
  467. package/skills/skills/pdf/scripts/fill_fillable_fields.py +0 -114
  468. package/skills/skills/pdf/scripts/fill_pdf_form_with_annotations.py +0 -108
  469. package/skills/skills/pptx/LICENSE.txt +0 -30
  470. package/skills/skills/pptx/SKILL.md +0 -484
  471. package/skills/skills/pptx/html2pptx.md +0 -625
  472. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +0 -1499
  473. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +0 -146
  474. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +0 -1085
  475. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +0 -11
  476. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +0 -3081
  477. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +0 -23
  478. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +0 -185
  479. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +0 -287
  480. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +0 -1676
  481. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +0 -28
  482. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +0 -144
  483. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +0 -174
  484. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +0 -25
  485. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +0 -18
  486. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +0 -59
  487. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +0 -56
  488. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +0 -195
  489. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +0 -582
  490. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +0 -25
  491. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +0 -4439
  492. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +0 -570
  493. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +0 -509
  494. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +0 -12
  495. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +0 -108
  496. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +0 -96
  497. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +0 -3646
  498. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +0 -116
  499. package/skills/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +0 -42
  500. package/skills/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +0 -50
  501. package/skills/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +0 -49
  502. package/skills/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +0 -33
  503. package/skills/skills/pptx/ooxml/schemas/mce/mc.xsd +0 -75
  504. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-2010.xsd +0 -560
  505. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-2012.xsd +0 -67
  506. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-2018.xsd +0 -14
  507. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-cex-2018.xsd +0 -20
  508. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-cid-2016.xsd +0 -13
  509. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +0 -4
  510. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-symex-2015.xsd +0 -8
  511. package/skills/skills/pptx/ooxml/scripts/pack.py +0 -159
  512. package/skills/skills/pptx/ooxml/scripts/unpack.py +0 -29
  513. package/skills/skills/pptx/ooxml/scripts/validate.py +0 -69
  514. package/skills/skills/pptx/ooxml/scripts/validation/__init__.py +0 -15
  515. package/skills/skills/pptx/ooxml/scripts/validation/base.py +0 -951
  516. package/skills/skills/pptx/ooxml/scripts/validation/docx.py +0 -274
  517. package/skills/skills/pptx/ooxml/scripts/validation/pptx.py +0 -315
  518. package/skills/skills/pptx/ooxml/scripts/validation/redlining.py +0 -279
  519. package/skills/skills/pptx/ooxml.md +0 -427
  520. package/skills/skills/pptx/scripts/html2pptx.js +0 -979
  521. package/skills/skills/pptx/scripts/inventory.py +0 -1020
  522. package/skills/skills/pptx/scripts/rearrange.py +0 -231
  523. package/skills/skills/pptx/scripts/replace.py +0 -385
  524. package/skills/skills/pptx/scripts/thumbnail.py +0 -450
  525. package/skills/skills/skill-creator/LICENSE.txt +0 -202
  526. package/skills/skills/skill-creator/SKILL.md +0 -356
  527. package/skills/skills/skill-creator/references/output-patterns.md +0 -82
  528. package/skills/skills/skill-creator/references/workflows.md +0 -28
  529. package/skills/skills/skill-creator/scripts/init_skill.py +0 -303
  530. package/skills/skills/skill-creator/scripts/package_skill.py +0 -110
  531. package/skills/skills/skill-creator/scripts/quick_validate.py +0 -95
  532. package/skills/skills/slack-gif-creator/LICENSE.txt +0 -202
  533. package/skills/skills/slack-gif-creator/SKILL.md +0 -254
  534. package/skills/skills/slack-gif-creator/core/easing.py +0 -234
  535. package/skills/skills/slack-gif-creator/core/frame_composer.py +0 -176
  536. package/skills/skills/slack-gif-creator/core/gif_builder.py +0 -269
  537. package/skills/skills/slack-gif-creator/core/validators.py +0 -136
  538. package/skills/skills/slack-gif-creator/requirements.txt +0 -4
  539. package/skills/skills/theme-factory/LICENSE.txt +0 -202
  540. package/skills/skills/theme-factory/SKILL.md +0 -59
  541. package/skills/skills/theme-factory/theme-showcase.pdf +0 -0
  542. package/skills/skills/theme-factory/themes/arctic-frost.md +0 -19
  543. package/skills/skills/theme-factory/themes/botanical-garden.md +0 -19
  544. package/skills/skills/theme-factory/themes/desert-rose.md +0 -19
  545. package/skills/skills/theme-factory/themes/forest-canopy.md +0 -19
  546. package/skills/skills/theme-factory/themes/golden-hour.md +0 -19
  547. package/skills/skills/theme-factory/themes/midnight-galaxy.md +0 -19
  548. package/skills/skills/theme-factory/themes/modern-minimalist.md +0 -19
  549. package/skills/skills/theme-factory/themes/ocean-depths.md +0 -19
  550. package/skills/skills/theme-factory/themes/sunset-boulevard.md +0 -19
  551. package/skills/skills/theme-factory/themes/tech-innovation.md +0 -19
  552. package/skills/skills/web-artifacts-builder/LICENSE.txt +0 -202
  553. package/skills/skills/web-artifacts-builder/SKILL.md +0 -74
  554. package/skills/skills/web-artifacts-builder/scripts/bundle-artifact.sh +0 -54
  555. package/skills/skills/web-artifacts-builder/scripts/init-artifact.sh +0 -322
  556. package/skills/skills/webapp-testing/LICENSE.txt +0 -202
  557. package/skills/skills/webapp-testing/SKILL.md +0 -96
  558. package/skills/skills/webapp-testing/examples/console_logging.py +0 -35
  559. package/skills/skills/webapp-testing/examples/element_discovery.py +0 -40
  560. package/skills/skills/webapp-testing/examples/static_html_automation.py +0 -33
  561. package/skills/skills/webapp-testing/scripts/with_server.py +0 -106
  562. package/skills/skills/xlsx/LICENSE.txt +0 -30
  563. package/skills/skills/xlsx/SKILL.md +0 -289
  564. package/skills/skills/xlsx/recalc.py +0 -178
  565. package/skills/spec/agent-skills-spec.md +0 -3
  566. package/skills/template/SKILL.md +0 -6
  567. package/src/ai-client.ts +0 -1560
  568. package/src/remote-ai-client.ts +0 -664
@@ -1,520 +1,540 @@
1
- /**
2
- * Computer Operator using @computer-use/nut-js
3
- * Provides desktop automation capabilities for gui-subagent
4
- * Based on UI-TARS NutJSOperator implementation
5
- *
6
- * This implementation is aligned with packages/ui-tars/operators/nut-js/src/index.ts
7
- */
8
-
9
- import {
10
- screen,
11
- Button,
12
- Key,
13
- Point,
14
- centerOf,
15
- keyboard,
16
- mouse,
17
- sleep,
18
- straightTo,
19
- clipboard,
20
- } from '@computer-use/nut-js';
21
- import { Jimp } from 'jimp';
22
- import type { OperatorConfig, ScreenContext, ScreenshotOutput, ExecuteParams, ExecuteOutput } from '../types/operator.js';
23
- import { Operator, type OperatorManual, parseBoxToScreenCoords } from './base-operator.js';
24
- import { getLogger } from '../../logger.js';
25
-
26
- const guiLogger = getLogger();
27
-
28
- export interface ComputerOperatorOptions {
29
- config?: OperatorConfig;
30
- computerConfig?: Record<string, any>;
31
- logger?: any;
32
- }
33
-
34
- export class ComputerOperator extends Operator {
35
- private config: OperatorConfig;
36
- private logger: any;
37
- private screenCtx: ScreenContext | null = null;
38
-
39
- constructor(options: ComputerOperatorOptions = {}) {
40
- super();
41
- this.config = options.config || {};
42
- this.logger = options.logger || guiLogger;
43
- }
44
-
45
- protected async initialize(): Promise<void> {
46
- this.logger.debug('Initializing computer operator...');
47
-
48
- try {
49
- const { width, height, scaleFactor } = await this.getScreenSize();
50
- this.screenCtx = {
51
- width,
52
- height,
53
- scaleFactor,
54
- };
55
-
56
- this.logger.debug(`Computer operator initialized: ${width}x${height} @ ${scaleFactor}x`);
57
- } catch (error) {
58
- this.logger.error('Failed to initialize computer operator:', error);
59
- throw error;
60
- }
61
- }
62
-
63
- private async getScreenSize(): Promise<{ width: number; height: number; scaleFactor: number }> {
64
- try {
65
- const grabImage = await screen.grab();
66
- const screenWithScale = await grabImage.toRGB();
67
- const scaleFactor = screenWithScale.pixelDensity.scaleX;
68
- const width = screenWithScale.width / scaleFactor;
69
- const height = screenWithScale.height / scaleFactor;
70
- return { width, height, scaleFactor };
71
- } catch {
72
- return {
73
- width: this.config.viewport?.width || 1920,
74
- height: this.config.viewport?.height || 1080,
75
- scaleFactor: this.config.deviceScaleFactor || 1,
76
- };
77
- }
78
- }
79
-
80
- getSupportedActions(): string[] {
81
- return [
82
- 'click',
83
- 'left_click',
84
- 'left_single',
85
- 'left_double',
86
- 'double_click',
87
- 'right_click',
88
- 'right_single',
89
- 'middle_click',
90
- 'mouse_move',
91
- 'hover',
92
- 'drag',
93
- 'left_click_drag',
94
- 'select',
95
- 'scroll',
96
- 'type',
97
- 'hotkey',
98
- 'press',
99
- 'release',
100
- 'open_url',
101
- 'wait',
102
- 'finished',
103
- 'user_stop',
104
- 'error_env',
105
- 'call_user',
106
- ];
107
- }
108
-
109
- protected screenContext(): ScreenContext {
110
- if (!this.screenCtx) {
111
- throw new Error('Screen context not initialized');
112
- }
113
- return this.screenCtx;
114
- }
115
-
116
- protected async screenshot(): Promise<ScreenshotOutput> {
117
- try {
118
- const grabImage = await screen.grab();
119
- const screenWithScale = await grabImage.toRGB();
120
- const scaleFactor = screenWithScale.pixelDensity.scaleX;
121
-
122
- const screenWithScaleImage = await Jimp.fromBitmap({
123
- width: screenWithScale.width,
124
- height: screenWithScale.height,
125
- data: Buffer.from(screenWithScale.data),
126
- });
127
-
128
- const width = screenWithScale.width / scaleFactor;
129
- const height = screenWithScale.height / scaleFactor;
130
-
131
- const physicalScreenImage = await screenWithScaleImage
132
- .resize({
133
- w: width,
134
- h: height,
135
- })
136
- .getBuffer('image/png');
137
-
138
- this.logger.debug(`[ComputerOperator] screenshot: ${width}x${height}, scaleFactor: ${scaleFactor}`);
139
-
140
- return {
141
- status: 'success',
142
- base64: physicalScreenImage.toString('base64'),
143
- scaleFactor,
144
- };
145
- } catch (error) {
146
- const errorMsg = error instanceof Error ? error.message : 'Unknown error';
147
- this.logger.warn(`[ComputerOperator] Screenshot failed: ${errorMsg}`);
148
- return {
149
- status: 'failed',
150
- errorMessage: errorMsg,
151
- };
152
- }
153
- }
154
-
155
- protected async execute(params: ExecuteParams): Promise<ExecuteOutput> {
156
- const { parsedPrediction, screenWidth, screenHeight, scaleFactor } = params;
157
- const { action_type, action_inputs } = parsedPrediction;
158
-
159
- // Empty or invalid action should return failed to avoid infinite loop
160
- if (!action_type || action_type.trim() === '') {
161
- this.logger.warn(`[ComputerOperator] Empty action, skipping step`);
162
- return {
163
- status: 'failed',
164
- errorMessage: 'Empty or invalid action type'
165
- };
166
- }
167
-
168
- const startBoxStr = action_inputs?.start_box || '';
169
- const { x: startX, y: startY } = parseBoxToScreenCoords({
170
- boxStr: startBoxStr,
171
- screenWidth,
172
- screenHeight,
173
- });
174
-
175
- mouse.config.mouseSpeed = 3600;
176
-
177
- // this.logger.debug('[ComputerOperator] execute', { action_type, startX, startY, scaleFactor });
178
-
179
- try {
180
- const result = await this.executeAction(action_type, action_inputs, { startX, startY, screenWidth, screenHeight, scaleFactor });
181
- if (result === 'end') {
182
- return { status: 'end' };
183
- }
184
-
185
- return { status: 'success' };
186
- } catch (error) {
187
- this.logger.error(`Failed to execute action ${action_type}:`, error);
188
- return {
189
- status: 'failed',
190
- errorMessage: (error as Error).message,
191
- };
192
- }
193
- }
194
-
195
- private async executeAction(
196
- actionType: string,
197
- inputs: Record<string, any>,
198
- context: { startX: number; startY: number; screenWidth: number; screenHeight: number; scaleFactor: number }
199
- ): Promise<'end' | void> {
200
- const { startX, startY, screenWidth, screenHeight, scaleFactor } = context;
201
-
202
- const moveStraightTo = async (x: number, y: number) => {
203
- await mouse.move(straightTo(new Point(x, y)));
204
- };
205
-
206
- const getHotkeys = (keyStr: string | undefined): Key[] => {
207
- if (keyStr) {
208
- const platformCommandKey = process.platform === 'darwin' ? Key.LeftCmd : Key.LeftWin;
209
- const platformCtrlKey = process.platform === 'darwin' ? Key.LeftCmd : Key.LeftControl;
210
- const keyMap = {
211
- return: Key.Enter,
212
- ctrl: platformCtrlKey,
213
- shift: Key.LeftShift,
214
- alt: Key.LeftAlt,
215
- 'page down': Key.PageDown,
216
- 'page up': Key.PageUp,
217
- meta: platformCommandKey,
218
- win: platformCommandKey,
219
- command: platformCommandKey,
220
- cmd: platformCommandKey,
221
- ',': Key.Comma,
222
- arrowup: Key.Up,
223
- arrowdown: Key.Down,
224
- arrowleft: Key.Left,
225
- arrowright: Key.Right,
226
- } as const;
227
-
228
- const lowercaseKeyMap = Object.fromEntries(
229
- Object.entries(Key).map(([k, v]) => [k.toLowerCase(), v]),
230
- ) as {
231
- [K in keyof typeof Key as Lowercase<K>]: (typeof Key)[K];
232
- };
233
-
234
- const keys = keyStr
235
- .split(/[\s+]+/)
236
- .map((k) => k.toLowerCase())
237
- .map(
238
- (k) =>
239
- keyMap[k as keyof typeof keyMap] ??
240
- lowercaseKeyMap[k as Lowercase<keyof typeof Key>],
241
- )
242
- .filter(Boolean);
243
- this.logger.debug('[ComputerOperator] hotkey:', keys);
244
- return keys;
245
- }
246
- return [];
247
- };
248
-
249
- switch (actionType) {
250
- case 'wait':
251
- this.logger.debug('[ComputerOperator] wait', inputs);
252
- await sleep(5000);
253
- break;
254
-
255
- case 'mouse_move':
256
- case 'hover':
257
- this.logger.debug('[ComputerOperator] mouse_move');
258
- await moveStraightTo(startX, startY);
259
- break;
260
-
261
- case 'click':
262
- case 'left_click':
263
- case 'left_single':
264
- this.logger.debug('[ComputerOperator] left_click');
265
- await moveStraightTo(startX, startY);
266
- await sleep(100);
267
- await mouse.click(Button.LEFT);
268
- break;
269
-
270
- case 'left_double':
271
- case 'double_click':
272
- this.logger.debug(`[ComputerOperator] ${actionType}(${startX}, ${startY})`);
273
- await moveStraightTo(startX, startY);
274
- await sleep(100);
275
- await mouse.doubleClick(Button.LEFT);
276
- break;
277
-
278
- case 'right_click':
279
- case 'right_single':
280
- this.logger.debug('[ComputerOperator] right_click');
281
- await moveStraightTo(startX, startY);
282
- await sleep(100);
283
- await mouse.click(Button.RIGHT);
284
- break;
285
-
286
- case 'middle_click':
287
- this.logger.debug('[ComputerOperator] middle_click');
288
- await moveStraightTo(startX, startY);
289
- await mouse.click(Button.MIDDLE);
290
- break;
291
-
292
- case 'drag':
293
- case 'left_click_drag':
294
- case 'select': {
295
- const endBoxStr = inputs?.end_box || '';
296
- if (endBoxStr) {
297
- const { x: endX, y: endY } = parseBoxToScreenCoords({
298
- boxStr: endBoxStr,
299
- screenWidth,
300
- screenHeight,
301
- });
302
-
303
- if (startX && startY && endX && endY) {
304
- this.logger.debug(
305
- `[ComputerOperator] drag coordinates: startX=${startX}, startY=${startY}, endX=${endX}, endY=${endY}`,
306
- );
307
- await moveStraightTo(startX, startY);
308
- await sleep(100);
309
- await mouse.drag(straightTo(new Point(endX, endY)));
310
- }
311
- }
312
- break;
313
- }
314
-
315
- case 'type': {
316
- const content = inputs.content?.trim();
317
- this.logger.debug('[ComputerOperator] type', content);
318
- if (content) {
319
- const stripContent = content.replace(/\\n$/, '').replace(/\n$/, '');
320
- keyboard.config.autoDelayMs = 0;
321
- if (process.platform === 'win32') {
322
- const originalClipboard = await clipboard.getContent();
323
- await clipboard.setContent(stripContent);
324
- await keyboard.pressKey(Key.LeftControl, Key.V);
325
- await sleep(50);
326
- await keyboard.releaseKey(Key.LeftControl, Key.V);
327
- await sleep(50);
328
- await clipboard.setContent(originalClipboard);
329
- } else {
330
- await keyboard.type(stripContent);
331
- }
332
-
333
- if (content.endsWith('\n') || content.endsWith('\\n')) {
334
- await keyboard.pressKey(Key.Enter);
335
- await keyboard.releaseKey(Key.Enter);
336
- }
337
-
338
- keyboard.config.autoDelayMs = 500;
339
- }
340
- break;
341
- }
342
-
343
- case 'hotkey': {
344
- const keyStr = inputs?.key || inputs?.hotkey;
345
- const keys = getHotkeys(keyStr);
346
- if (keys.length > 0) {
347
- await keyboard.pressKey(...keys);
348
- await keyboard.releaseKey(...keys);
349
- }
350
- break;
351
- }
352
-
353
- case 'press': {
354
- const keyStr = inputs?.key || inputs?.hotkey;
355
- const keys = getHotkeys(keyStr);
356
- if (keys.length > 0) {
357
- await keyboard.pressKey(...keys);
358
- }
359
- break;
360
- }
361
-
362
- case 'release': {
363
- const keyStr = inputs?.key || inputs?.hotkey;
364
- const keys = getHotkeys(keyStr);
365
- if (keys.length > 0) {
366
- await keyboard.releaseKey(...keys);
367
- }
368
- break;
369
- }
370
-
371
- case 'scroll': {
372
- const { direction } = inputs;
373
- if (startX !== null && startY !== null) {
374
- await moveStraightTo(startX, startY);
375
- }
376
-
377
- switch (direction?.toLowerCase()) {
378
- case 'up':
379
- await mouse.scrollUp(5 * 100);
380
- break;
381
- case 'down':
382
- await mouse.scrollDown(5 * 100);
383
- break;
384
- default:
385
- this.logger.warn(`[ComputerOperator] Unsupported scroll direction: ${direction}`);
386
- }
387
- break;
388
- }
389
-
390
- case 'open_url': {
391
- let url = inputs?.url || inputs?.content;
392
- if (!url) {
393
- throw new Error('No URL specified for open_url action');
394
- }
395
-
396
- // Ensure URL has protocol
397
- if (!/^https?:\/\//i.test(url)) {
398
- url = 'https://' + url;
399
- }
400
-
401
- this.logger.debug(`[ComputerOperator] Opening URL: ${url}`);
402
-
403
- // Use system command to open URL in default browser
404
- const { exec } = await import('child_process');
405
- const platform = process.platform;
406
-
407
- if (platform === 'win32') {
408
- // Windows: use start command
409
- await new Promise<void>((resolve, reject) => {
410
- exec(`start "" "${url}"`, (error) => {
411
- if (error) {
412
- this.logger.warn(`[ComputerOperator] Failed to open URL with start command: ${error.message}`);
413
- // Fallback: try using PowerShell
414
- exec(`powershell -Command "Start-Process '${url}'"`, (psError) => {
415
- if (psError) {
416
- reject(psError);
417
- } else {
418
- resolve();
419
- }
420
- });
421
- } else {
422
- resolve();
423
- }
424
- });
425
- });
426
- } else if (platform === 'darwin') {
427
- // macOS: use open command
428
- await new Promise<void>((resolve, reject) => {
429
- exec(`open "${url}"`, (error) => {
430
- if (error) {
431
- reject(error);
432
- } else {
433
- resolve();
434
- }
435
- });
436
- });
437
- } else {
438
- // Linux: use xdg-open
439
- await new Promise<void>((resolve, reject) => {
440
- exec(`xdg-open "${url}"`, (error) => {
441
- if (error) {
442
- reject(error);
443
- } else {
444
- resolve();
445
- }
446
- });
447
- });
448
- }
449
-
450
- // Wait for browser to open and page to load
451
- await sleep(2000);
452
- break;
453
- }
454
-
455
- case 'error_env':
456
- case 'call_user':
457
- case 'finished':
458
- case 'user_stop':
459
- this.logger.debug(`[ComputerOperator] ${actionType}`);
460
- return 'end';
461
-
462
- default:
463
- this.logger.warn(`[ComputerOperator] Unsupported action: ${actionType}`);
464
- }
465
- }
466
-
467
- async cleanup(): Promise<void> {
468
- this.logger.debug('Cleaning up computer operator...');
469
- }
470
-
471
- async destroyInstance(): Promise<void> {
472
- this.logger.debug('Destroying computer operator instance...');
473
- await this.cleanup();
474
- }
475
-
476
- static override get MANUAL(): OperatorManual {
477
- return {
478
- ACTION_SPACES: [
479
- // Mouse actions
480
- `click(start_box='[x1, y1, x2, y2]') # Single click (taskbar icons)`,
481
- `left_double(start_box='[x1, y1, x2, y2]') # Double click (desktop icons/folders)`,
482
- `right_single(start_box='[x1, y1, x2, y2]') # Right click`,
483
- `drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]') # Drag`,
484
-
485
- // Keyboard actions
486
- `hotkey(key='') # e.g., 'ctrl c', 'alt tab' (max 3 keys)`,
487
- `type(content='') # Use "\\n" at the end to submit`,
488
- `press(key='') # Single key press: 'enter', 'esc', 'tab', 'win', etc.`,
489
-
490
- // Navigation
491
- `open_url(url='https://xxx') # Open URL in default browser`,
492
-
493
- // Scroll
494
- `scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')`,
495
-
496
- // System
497
- `wait() # Sleep 5s and take a screenshot`,
498
- `finished() # Task completed`,
499
- `call_user() # Need user's help`,
500
- ],
501
-
502
- KEY_SPACE: {
503
- 'enter': 'Enter key',
504
- 'esc': 'Escape key',
505
- 'tab': 'Tab key',
506
- 'win': 'Windows key (or Command on Mac)',
507
- 'delete': 'Delete key',
508
- 'backspace': 'Backspace key',
509
- 'page up': 'Page Up',
510
- 'page down': 'Page Down',
511
- 'home': 'Home key',
512
- 'end': 'End key',
513
- 'arrow up': 'Up arrow',
514
- 'arrow down': 'Down arrow',
515
- 'arrow left': 'Left arrow',
516
- 'arrow right': 'Right arrow',
517
- },
518
- };
519
- }
520
- }
1
+ /**
2
+ * Computer Operator using @computer-use/nut-js
3
+ * Provides desktop automation capabilities for gui-subagent
4
+ * Based on UI-TARS NutJSOperator implementation
5
+ *
6
+ * This implementation is aligned with packages/ui-tars/operators/nut-js/src/index.ts
7
+ */
8
+
9
+ import {
10
+ screen,
11
+ Button,
12
+ Key,
13
+ Point,
14
+ centerOf,
15
+ keyboard,
16
+ mouse,
17
+ sleep,
18
+ straightTo,
19
+ clipboard,
20
+ } from '@computer-use/nut-js';
21
+ import { Jimp } from 'jimp';
22
+ import type { OperatorConfig, ScreenContext, ScreenshotOutput, ExecuteParams, ExecuteOutput } from '../types/operator.js';
23
+ import { Operator, type OperatorManual, parseBoxToScreenCoords } from './base-operator.js';
24
+ import { getLogger } from '../../logger.js';
25
+
26
+ const guiLogger = getLogger();
27
+
28
+ export interface ComputerOperatorOptions {
29
+ config?: OperatorConfig;
30
+ computerConfig?: Record<string, any>;
31
+ logger?: any;
32
+ }
33
+
34
+ export class ComputerOperator extends Operator {
35
+ private config: OperatorConfig;
36
+ private logger: any;
37
+ private screenCtx: ScreenContext | null = null;
38
+
39
+ constructor(options: ComputerOperatorOptions = {}) {
40
+ super();
41
+ this.config = options.config || {};
42
+ this.logger = options.logger || guiLogger;
43
+ }
44
+
45
+ protected async initialize(): Promise<void> {
46
+ this.logger.debug('Initializing computer operator...');
47
+
48
+ try {
49
+ const { width, height, scaleFactor } = await this.getScreenSize();
50
+ this.screenCtx = {
51
+ width,
52
+ height,
53
+ scaleFactor,
54
+ };
55
+
56
+ this.logger.debug(`Computer operator initialized: ${width}x${height} @ ${scaleFactor}x`);
57
+ } catch (error) {
58
+ this.logger.error('Failed to initialize computer operator:', error);
59
+ throw error;
60
+ }
61
+ }
62
+
63
+ private async getScreenSize(): Promise<{ width: number; height: number; scaleFactor: number }> {
64
+ try {
65
+ const grabImage = await screen.grab();
66
+ const screenWithScale = await grabImage.toRGB();
67
+ const scaleFactor = screenWithScale.pixelDensity.scaleX;
68
+ const width = screenWithScale.width / scaleFactor;
69
+ const height = screenWithScale.height / scaleFactor;
70
+ return { width, height, scaleFactor };
71
+ } catch {
72
+ return {
73
+ width: this.config.viewport?.width || 1920,
74
+ height: this.config.viewport?.height || 1080,
75
+ scaleFactor: this.config.deviceScaleFactor || 1,
76
+ };
77
+ }
78
+ }
79
+
80
+ getSupportedActions(): string[] {
81
+ return [
82
+ 'click',
83
+ 'left_click',
84
+ 'left_single',
85
+ 'left_double',
86
+ 'double_click',
87
+ 'right_click',
88
+ 'right_single',
89
+ 'middle_click',
90
+ 'mouse_move',
91
+ 'hover',
92
+ 'drag',
93
+ 'left_click_drag',
94
+ 'select',
95
+ 'scroll',
96
+ 'type',
97
+ 'hotkey',
98
+ 'press',
99
+ 'release',
100
+ 'open_url',
101
+ 'wait',
102
+ 'finished',
103
+ 'user_stop',
104
+ 'error_env',
105
+ ];
106
+ }
107
+
108
+ protected screenContext(): ScreenContext {
109
+ if (!this.screenCtx) {
110
+ throw new Error('Screen context not initialized');
111
+ }
112
+ return this.screenCtx;
113
+ }
114
+
115
+ protected async screenshot(): Promise<ScreenshotOutput> {
116
+ try {
117
+ const grabImage = await screen.grab();
118
+ const screenWithScale = await grabImage.toRGB();
119
+ const scaleFactor = screenWithScale.pixelDensity.scaleX;
120
+
121
+ const screenWithScaleImage = await Jimp.fromBitmap({
122
+ width: screenWithScale.width,
123
+ height: screenWithScale.height,
124
+ data: Buffer.from(screenWithScale.data),
125
+ });
126
+
127
+ const width = screenWithScale.width / scaleFactor;
128
+ const height = screenWithScale.height / scaleFactor;
129
+
130
+ const physicalScreenImage = await screenWithScaleImage
131
+ .resize({
132
+ w: width,
133
+ h: height,
134
+ })
135
+ .getBuffer('image/png');
136
+
137
+ this.logger.debug(`[ComputerOperator] screenshot: ${width}x${height}, scaleFactor: ${scaleFactor}`);
138
+
139
+ return {
140
+ status: 'success',
141
+ base64: physicalScreenImage.toString('base64'),
142
+ scaleFactor,
143
+ };
144
+ } catch (error) {
145
+ const errorMsg = error instanceof Error ? error.message : 'Unknown error';
146
+ this.logger.warn(`[ComputerOperator] Screenshot failed: ${errorMsg}`);
147
+ return {
148
+ status: 'failed',
149
+ errorMessage: errorMsg,
150
+ };
151
+ }
152
+ }
153
+
154
+ protected async execute(params: ExecuteParams): Promise<ExecuteOutput> {
155
+ const { parsedPrediction, screenWidth, screenHeight, scaleFactor } = params;
156
+ const { action_type, action_inputs } = parsedPrediction;
157
+
158
+ // Empty or invalid action should return needs_input to let main agent decide
159
+ if (!action_type || action_type.trim() === '') {
160
+ this.logger.debug(`[ComputerOperator] Empty action, returning to main agent for decision`);
161
+ return {
162
+ status: 'needs_input',
163
+ errorMessage: 'Empty or invalid action type - returned to main agent for decision'
164
+ };
165
+ }
166
+
167
+ const startBoxStr = action_inputs?.start_box || '';
168
+ const { x: startX, y: startY } = parseBoxToScreenCoords({
169
+ boxStr: startBoxStr,
170
+ screenWidth,
171
+ screenHeight,
172
+ });
173
+
174
+ mouse.config.mouseSpeed = 3600;
175
+
176
+ // this.logger.debug('[ComputerOperator] execute', { action_type, startX, startY, scaleFactor });
177
+
178
+ try {
179
+ const result = await this.executeAction(action_type, action_inputs, { startX, startY, screenWidth, screenHeight, scaleFactor });
180
+ if (result === 'end') {
181
+ return { status: 'end' };
182
+ }
183
+
184
+ return { status: 'success' };
185
+ } catch (error) {
186
+ this.logger.error(`Failed to execute action ${action_type}:`, error);
187
+ return {
188
+ status: 'failed',
189
+ errorMessage: (error as Error).message,
190
+ };
191
+ }
192
+ }
193
+
194
+ private async executeAction(
195
+ actionType: string,
196
+ inputs: Record<string, any>,
197
+ context: { startX: number; startY: number; screenWidth: number; screenHeight: number; scaleFactor: number }
198
+ ): Promise<'end' | void> {
199
+ const { startX, startY, screenWidth, screenHeight, scaleFactor } = context;
200
+
201
+ const moveStraightTo = async (x: number, y: number) => {
202
+ await mouse.move(straightTo(new Point(x, y)));
203
+ };
204
+
205
+ const getHotkeys = (keyStr: string | undefined): Key[] => {
206
+ if (keyStr) {
207
+ const platformCommandKey = process.platform === 'darwin' ? Key.LeftCmd : Key.LeftWin;
208
+ const platformCtrlKey = process.platform === 'darwin' ? Key.LeftCmd : Key.LeftControl;
209
+ const keyMap = {
210
+ return: Key.Enter,
211
+ ctrl: platformCtrlKey,
212
+ shift: Key.LeftShift,
213
+ alt: Key.LeftAlt,
214
+ 'page down': Key.PageDown,
215
+ 'page up': Key.PageUp,
216
+ meta: platformCommandKey,
217
+ win: platformCommandKey,
218
+ command: platformCommandKey,
219
+ cmd: platformCommandKey,
220
+ ',': Key.Comma,
221
+ arrowup: Key.Up,
222
+ arrowdown: Key.Down,
223
+ arrowleft: Key.Left,
224
+ arrowright: Key.Right,
225
+ } as const;
226
+
227
+ const lowercaseKeyMap = Object.fromEntries(
228
+ Object.entries(Key).map(([k, v]) => [k.toLowerCase(), v]),
229
+ ) as {
230
+ [K in keyof typeof Key as Lowercase<K>]: (typeof Key)[K];
231
+ };
232
+
233
+ const keys = keyStr
234
+ .split(/[\s+]+/)
235
+ .map((k) => k.toLowerCase())
236
+ .map(
237
+ (k) =>
238
+ keyMap[k as keyof typeof keyMap] ??
239
+ lowercaseKeyMap[k as Lowercase<keyof typeof Key>],
240
+ )
241
+ .filter(Boolean);
242
+ this.logger.debug('[ComputerOperator] hotkey:', keys);
243
+ return keys;
244
+ }
245
+ return [];
246
+ };
247
+
248
+ switch (actionType) {
249
+ case 'wait':
250
+ this.logger.debug('[ComputerOperator] wait', inputs);
251
+ await sleep(5000);
252
+ break;
253
+
254
+ case 'mouse_move':
255
+ case 'hover':
256
+ this.logger.debug('[ComputerOperator] mouse_move');
257
+ await moveStraightTo(startX, startY);
258
+ break;
259
+
260
+ case 'click':
261
+ case 'left_click':
262
+ case 'left_single':
263
+ this.logger.debug('[ComputerOperator] left_click');
264
+ await moveStraightTo(startX, startY);
265
+ await sleep(100);
266
+ await mouse.click(Button.LEFT);
267
+ break;
268
+
269
+ case 'left_double':
270
+ case 'double_click':
271
+ this.logger.debug(`[ComputerOperator] ${actionType}(${startX}, ${startY})`);
272
+ await moveStraightTo(startX, startY);
273
+ await sleep(100);
274
+ await mouse.doubleClick(Button.LEFT);
275
+ break;
276
+
277
+ case 'right_click':
278
+ case 'right_single':
279
+ this.logger.debug('[ComputerOperator] right_click');
280
+ await moveStraightTo(startX, startY);
281
+ await sleep(100);
282
+ await mouse.click(Button.RIGHT);
283
+ break;
284
+
285
+ case 'middle_click':
286
+ this.logger.debug('[ComputerOperator] middle_click');
287
+ await moveStraightTo(startX, startY);
288
+ await mouse.click(Button.MIDDLE);
289
+ break;
290
+
291
+ case 'drag':
292
+ case 'left_click_drag':
293
+ case 'select': {
294
+ const endBoxStr = inputs?.end_box || '';
295
+ if (endBoxStr) {
296
+ const { x: endX, y: endY } = parseBoxToScreenCoords({
297
+ boxStr: endBoxStr,
298
+ screenWidth,
299
+ screenHeight,
300
+ });
301
+
302
+ if (startX && startY && endX && endY) {
303
+ this.logger.debug(
304
+ `[ComputerOperator] drag coordinates: startX=${startX}, startY=${startY}, endX=${endX}, endY=${endY}`,
305
+ );
306
+ await moveStraightTo(startX, startY);
307
+ await sleep(100);
308
+ await mouse.drag(straightTo(new Point(endX, endY)));
309
+ }
310
+ }
311
+ break;
312
+ }
313
+
314
+ case 'type': {
315
+ const content = inputs.content?.trim();
316
+ this.logger.debug('[ComputerOperator] type', content);
317
+ if (content) {
318
+ const stripContent = content.replace(/\\n$/, '').replace(/\n$/, '');
319
+ keyboard.config.autoDelayMs = 0;
320
+ if (process.platform === 'win32') {
321
+ const originalClipboard = await clipboard.getContent();
322
+ await clipboard.setContent(stripContent);
323
+ await keyboard.pressKey(Key.LeftControl, Key.V);
324
+ await sleep(50);
325
+ await keyboard.releaseKey(Key.LeftControl, Key.V);
326
+ await sleep(50);
327
+ // Restore clipboard content with retry to handle clipboardy occasional panics on Windows
328
+ await restoreClipboardWithRetry(originalClipboard, this.logger);
329
+ } else {
330
+ await keyboard.type(stripContent);
331
+ }
332
+
333
+ if (content.endsWith('\n') || content.endsWith('\\n')) {
334
+ await keyboard.pressKey(Key.Enter);
335
+ await keyboard.releaseKey(Key.Enter);
336
+ }
337
+
338
+ keyboard.config.autoDelayMs = 500;
339
+ }
340
+ break;
341
+ }
342
+
343
+ case 'hotkey': {
344
+ const keyStr = inputs?.key || inputs?.hotkey;
345
+ const keys = getHotkeys(keyStr);
346
+ if (keys.length > 0) {
347
+ await keyboard.pressKey(...keys);
348
+ await keyboard.releaseKey(...keys);
349
+ }
350
+ break;
351
+ }
352
+
353
+ case 'press': {
354
+ const keyStr = inputs?.key || inputs?.hotkey;
355
+ const keys = getHotkeys(keyStr);
356
+ if (keys.length > 0) {
357
+ await keyboard.pressKey(...keys);
358
+ }
359
+ break;
360
+ }
361
+
362
+ case 'release': {
363
+ const keyStr = inputs?.key || inputs?.hotkey;
364
+ const keys = getHotkeys(keyStr);
365
+ if (keys.length > 0) {
366
+ await keyboard.releaseKey(...keys);
367
+ }
368
+ break;
369
+ }
370
+
371
+ case 'scroll': {
372
+ const { direction } = inputs;
373
+ if (startX !== null && startY !== null) {
374
+ await moveStraightTo(startX, startY);
375
+ }
376
+
377
+ switch (direction?.toLowerCase()) {
378
+ case 'up':
379
+ await mouse.scrollUp(5 * 100);
380
+ break;
381
+ case 'down':
382
+ await mouse.scrollDown(5 * 100);
383
+ break;
384
+ default:
385
+ this.logger.warn(`[ComputerOperator] Unsupported scroll direction: ${direction}`);
386
+ }
387
+ break;
388
+ }
389
+
390
+ case 'open_url': {
391
+ let url = inputs?.url || inputs?.content;
392
+ if (!url) {
393
+ throw new Error('No URL specified for open_url action');
394
+ }
395
+
396
+ // Ensure URL has protocol
397
+ if (!/^https?:\/\//i.test(url)) {
398
+ url = 'https://' + url;
399
+ }
400
+
401
+ this.logger.debug(`[ComputerOperator] Opening URL: ${url}`);
402
+
403
+ // Use system command to open URL in default browser
404
+ const { exec } = await import('child_process');
405
+ const platform = process.platform;
406
+
407
+ if (platform === 'win32') {
408
+ // Windows: use start command
409
+ await new Promise<void>((resolve, reject) => {
410
+ exec(`start "" "${url}"`, (error) => {
411
+ if (error) {
412
+ this.logger.warn(`[ComputerOperator] Failed to open URL with start command: ${error.message}`);
413
+ // Fallback: try using PowerShell
414
+ exec(`powershell -Command "Start-Process '${url}'"`, (psError) => {
415
+ if (psError) {
416
+ reject(psError);
417
+ } else {
418
+ resolve();
419
+ }
420
+ });
421
+ } else {
422
+ resolve();
423
+ }
424
+ });
425
+ });
426
+ } else if (platform === 'darwin') {
427
+ // macOS: use open command
428
+ await new Promise<void>((resolve, reject) => {
429
+ exec(`open "${url}"`, (error) => {
430
+ if (error) {
431
+ reject(error);
432
+ } else {
433
+ resolve();
434
+ }
435
+ });
436
+ });
437
+ } else {
438
+ // Linux: use xdg-open
439
+ await new Promise<void>((resolve, reject) => {
440
+ exec(`xdg-open "${url}"`, (error) => {
441
+ if (error) {
442
+ reject(error);
443
+ } else {
444
+ resolve();
445
+ }
446
+ });
447
+ });
448
+ }
449
+
450
+ // Wait for browser to open and page to load
451
+ await sleep(2000);
452
+ break;
453
+ }
454
+
455
+ case 'error_env':
456
+ case 'finished':
457
+ case 'user_stop':
458
+ this.logger.debug(`[ComputerOperator] ${actionType}`);
459
+ return 'end';
460
+
461
+ default:
462
+ this.logger.warn(`[ComputerOperator] Unsupported action: ${actionType}`);
463
+ }
464
+ }
465
+
466
+ async cleanup(): Promise<void> {
467
+ this.logger.debug('Cleaning up computer operator...');
468
+ }
469
+
470
+ async destroyInstance(): Promise<void> {
471
+ this.logger.debug('Destroying computer operator instance...');
472
+ await this.cleanup();
473
+ }
474
+
475
+ static override get MANUAL(): OperatorManual {
476
+ return {
477
+ ACTION_SPACES: [
478
+ // Mouse actions
479
+ `click(start_box='[x1, y1, x2, y2]') # Single click (taskbar icons)`,
480
+ `left_double(start_box='[x1, y1, x2, y2]') # Double click (desktop icons/folders)`,
481
+ `right_single(start_box='[x1, y1, x2, y2]') # Right click`,
482
+ `drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]') # Drag`,
483
+
484
+ // Keyboard actions
485
+ `hotkey(key='') # e.g., 'ctrl c', 'alt tab' (max 3 keys)`,
486
+ `type(content='') # Use "\\n" at the end to submit`,
487
+ `press(key='') # Single key press: 'enter', 'esc', 'tab', 'win', etc.`,
488
+
489
+ // Navigation
490
+ `open_url(url='https://xxx') # Open URL in default browser`,
491
+
492
+ // Scroll
493
+ `scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')`,
494
+
495
+ // System
496
+ `wait() # Sleep 5s and take a screenshot`,
497
+ `finished() # Task completed`,
498
+ ],
499
+
500
+ KEY_SPACE: {
501
+ 'enter': 'Enter key',
502
+ 'esc': 'Escape key',
503
+ 'tab': 'Tab key',
504
+ 'win': 'Windows key (or Command on Mac)',
505
+ 'delete': 'Delete key',
506
+ 'backspace': 'Backspace key',
507
+ 'page up': 'Page Up',
508
+ 'page down': 'Page Down',
509
+ 'home': 'Home key',
510
+ 'end': 'End key',
511
+ 'arrow up': 'Up arrow',
512
+ 'arrow down': 'Down arrow',
513
+ 'arrow left': 'Left arrow',
514
+ 'arrow right': 'Right arrow',
515
+ },
516
+ };
517
+ }
518
+ }
519
+
520
+ /**
521
+ * Restore clipboard content with retry mechanism.
522
+ * Handles clipboardy occasional panics on Windows gracefully.
523
+ */
524
+ async function restoreClipboardWithRetry(content: string, logger: any): Promise<void> {
525
+ const maxRetries = 2;
526
+ for (let i = 0; i < maxRetries; i++) {
527
+ try {
528
+ await clipboard.setContent(content);
529
+ return;
530
+ } catch (restoreError: any) {
531
+ if (i === maxRetries - 1) {
532
+ // Last retry failed - log warning and give up
533
+ logger.warn('[ComputerOperator] Failed to restore clipboard content after retries:', restoreError?.message || restoreError);
534
+ } else {
535
+ // Retry after brief delay
536
+ await sleep(100);
537
+ }
538
+ }
539
+ }
540
+ }