@xagent-ai/cli 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (537) hide show
  1. package/.eslintrc.js +25 -0
  2. package/.gitmodules +3 -0
  3. package/.prettierrc.json +8 -0
  4. package/CONTRIBUTING.md +167 -0
  5. package/LICENSE +21 -0
  6. package/README.md +280 -0
  7. package/README_CN.md +280 -0
  8. package/dist/agents.d.ts +21 -0
  9. package/dist/agents.d.ts.map +1 -0
  10. package/dist/agents.js +463 -0
  11. package/dist/agents.js.map +1 -0
  12. package/dist/ai-client.d.ts +83 -0
  13. package/dist/ai-client.d.ts.map +1 -0
  14. package/dist/ai-client.js +1280 -0
  15. package/dist/ai-client.js.map +1 -0
  16. package/dist/auth.d.ts +25 -0
  17. package/dist/auth.d.ts.map +1 -0
  18. package/dist/auth.js +573 -0
  19. package/dist/auth.js.map +1 -0
  20. package/dist/cancellation.d.ts +46 -0
  21. package/dist/cancellation.d.ts.map +1 -0
  22. package/dist/cancellation.js +154 -0
  23. package/dist/cancellation.js.map +1 -0
  24. package/dist/checkpoint.d.ts +28 -0
  25. package/dist/checkpoint.d.ts.map +1 -0
  26. package/dist/checkpoint.js +186 -0
  27. package/dist/checkpoint.js.map +1 -0
  28. package/dist/cli.d.ts +3 -0
  29. package/dist/cli.d.ts.map +1 -0
  30. package/dist/cli.js +364 -0
  31. package/dist/cli.js.map +1 -0
  32. package/dist/config.d.ts +49 -0
  33. package/dist/config.d.ts.map +1 -0
  34. package/dist/config.js +205 -0
  35. package/dist/config.js.map +1 -0
  36. package/dist/context-compressor.d.ts +51 -0
  37. package/dist/context-compressor.d.ts.map +1 -0
  38. package/dist/context-compressor.js +231 -0
  39. package/dist/context-compressor.js.map +1 -0
  40. package/dist/conversation.d.ts +34 -0
  41. package/dist/conversation.d.ts.map +1 -0
  42. package/dist/conversation.js +221 -0
  43. package/dist/conversation.js.map +1 -0
  44. package/dist/gui-subagent/action-parser/actionParser.d.ts +19 -0
  45. package/dist/gui-subagent/action-parser/actionParser.d.ts.map +1 -0
  46. package/dist/gui-subagent/action-parser/actionParser.js +203 -0
  47. package/dist/gui-subagent/action-parser/actionParser.js.map +1 -0
  48. package/dist/gui-subagent/action-parser/constants.d.ts +8 -0
  49. package/dist/gui-subagent/action-parser/constants.d.ts.map +1 -0
  50. package/dist/gui-subagent/action-parser/constants.js +12 -0
  51. package/dist/gui-subagent/action-parser/constants.js.map +1 -0
  52. package/dist/gui-subagent/action-parser/index.d.ts +3 -0
  53. package/dist/gui-subagent/action-parser/index.d.ts.map +1 -0
  54. package/dist/gui-subagent/action-parser/index.js +6 -0
  55. package/dist/gui-subagent/action-parser/index.js.map +1 -0
  56. package/dist/gui-subagent/action-parser/types.d.ts +24 -0
  57. package/dist/gui-subagent/action-parser/types.d.ts.map +1 -0
  58. package/dist/gui-subagent/action-parser/types.js +12 -0
  59. package/dist/gui-subagent/action-parser/types.js.map +1 -0
  60. package/dist/gui-subagent/agent/gui-agent.d.ts +126 -0
  61. package/dist/gui-subagent/agent/gui-agent.d.ts.map +1 -0
  62. package/dist/gui-subagent/agent/gui-agent.js +820 -0
  63. package/dist/gui-subagent/agent/gui-agent.js.map +1 -0
  64. package/dist/gui-subagent/agent/index.d.ts +5 -0
  65. package/dist/gui-subagent/agent/index.d.ts.map +1 -0
  66. package/dist/gui-subagent/agent/index.js +5 -0
  67. package/dist/gui-subagent/agent/index.js.map +1 -0
  68. package/dist/gui-subagent/index.d.ts +43 -0
  69. package/dist/gui-subagent/index.d.ts.map +1 -0
  70. package/dist/gui-subagent/index.js +96 -0
  71. package/dist/gui-subagent/index.js.map +1 -0
  72. package/dist/gui-subagent/operator/base-operator.d.ts +108 -0
  73. package/dist/gui-subagent/operator/base-operator.d.ts.map +1 -0
  74. package/dist/gui-subagent/operator/base-operator.js +172 -0
  75. package/dist/gui-subagent/operator/base-operator.js.map +1 -0
  76. package/dist/gui-subagent/operator/browser-operator.d.ts +36 -0
  77. package/dist/gui-subagent/operator/browser-operator.d.ts.map +1 -0
  78. package/dist/gui-subagent/operator/browser-operator.js +306 -0
  79. package/dist/gui-subagent/operator/browser-operator.js.map +1 -0
  80. package/dist/gui-subagent/operator/computer-operator.d.ts +31 -0
  81. package/dist/gui-subagent/operator/computer-operator.d.ts.map +1 -0
  82. package/dist/gui-subagent/operator/computer-operator.js +441 -0
  83. package/dist/gui-subagent/operator/computer-operator.js.map +1 -0
  84. package/dist/gui-subagent/operator/desktop-operator.d.ts +55 -0
  85. package/dist/gui-subagent/operator/desktop-operator.d.ts.map +1 -0
  86. package/dist/gui-subagent/operator/desktop-operator.js +527 -0
  87. package/dist/gui-subagent/operator/desktop-operator.js.map +1 -0
  88. package/dist/gui-subagent/operator/index.d.ts +7 -0
  89. package/dist/gui-subagent/operator/index.d.ts.map +1 -0
  90. package/dist/gui-subagent/operator/index.js +6 -0
  91. package/dist/gui-subagent/operator/index.js.map +1 -0
  92. package/dist/gui-subagent/types/actions.d.ts +108 -0
  93. package/dist/gui-subagent/types/actions.d.ts.map +1 -0
  94. package/dist/gui-subagent/types/actions.js +39 -0
  95. package/dist/gui-subagent/types/actions.js.map +1 -0
  96. package/dist/gui-subagent/types/index.d.ts +6 -0
  97. package/dist/gui-subagent/types/index.d.ts.map +1 -0
  98. package/dist/gui-subagent/types/index.js +6 -0
  99. package/dist/gui-subagent/types/index.js.map +1 -0
  100. package/dist/gui-subagent/types/operator.d.ts +95 -0
  101. package/dist/gui-subagent/types/operator.d.ts.map +1 -0
  102. package/dist/gui-subagent/types/operator.js +16 -0
  103. package/dist/gui-subagent/types/operator.js.map +1 -0
  104. package/dist/gui-subagent/utils.d.ts +19 -0
  105. package/dist/gui-subagent/utils.d.ts.map +1 -0
  106. package/dist/gui-subagent/utils.js +42 -0
  107. package/dist/gui-subagent/utils.js.map +1 -0
  108. package/dist/hook.d.ts +73 -0
  109. package/dist/hook.d.ts.map +1 -0
  110. package/dist/hook.js +156 -0
  111. package/dist/hook.js.map +1 -0
  112. package/dist/index.d.ts +19 -0
  113. package/dist/index.d.ts.map +1 -0
  114. package/dist/index.js +19 -0
  115. package/dist/index.js.map +1 -0
  116. package/dist/input-history.d.ts +24 -0
  117. package/dist/input-history.d.ts.map +1 -0
  118. package/dist/input-history.js +94 -0
  119. package/dist/input-history.js.map +1 -0
  120. package/dist/input-processor.d.ts +31 -0
  121. package/dist/input-processor.d.ts.map +1 -0
  122. package/dist/input-processor.js +233 -0
  123. package/dist/input-processor.js.map +1 -0
  124. package/dist/keyboard-manager.d.ts +151 -0
  125. package/dist/keyboard-manager.d.ts.map +1 -0
  126. package/dist/keyboard-manager.js +396 -0
  127. package/dist/keyboard-manager.js.map +1 -0
  128. package/dist/logger.d.ts +75 -0
  129. package/dist/logger.d.ts.map +1 -0
  130. package/dist/logger.js +339 -0
  131. package/dist/logger.js.map +1 -0
  132. package/dist/mcp.d.ts +57 -0
  133. package/dist/mcp.d.ts.map +1 -0
  134. package/dist/mcp.js +483 -0
  135. package/dist/mcp.js.map +1 -0
  136. package/dist/memory.d.ts +25 -0
  137. package/dist/memory.d.ts.map +1 -0
  138. package/dist/memory.js +250 -0
  139. package/dist/memory.js.map +1 -0
  140. package/dist/print-system-prompt.d.ts +2 -0
  141. package/dist/print-system-prompt.d.ts.map +1 -0
  142. package/dist/print-system-prompt.js +40 -0
  143. package/dist/print-system-prompt.js.map +1 -0
  144. package/dist/session-manager.d.ts +41 -0
  145. package/dist/session-manager.d.ts.map +1 -0
  146. package/dist/session-manager.js +234 -0
  147. package/dist/session-manager.js.map +1 -0
  148. package/dist/session.d.ts +77 -0
  149. package/dist/session.d.ts.map +1 -0
  150. package/dist/session.js +1081 -0
  151. package/dist/session.js.map +1 -0
  152. package/dist/skill-invoker.d.ts +177 -0
  153. package/dist/skill-invoker.d.ts.map +1 -0
  154. package/dist/skill-invoker.js +1643 -0
  155. package/dist/skill-invoker.js.map +1 -0
  156. package/dist/skill-loader.d.ts +76 -0
  157. package/dist/skill-loader.d.ts.map +1 -0
  158. package/dist/skill-loader.js +407 -0
  159. package/dist/skill-loader.js.map +1 -0
  160. package/dist/slash-commands.d.ts +60 -0
  161. package/dist/slash-commands.d.ts.map +1 -0
  162. package/dist/slash-commands.js +1021 -0
  163. package/dist/slash-commands.js.map +1 -0
  164. package/dist/smart-approval.d.ts +137 -0
  165. package/dist/smart-approval.d.ts.map +1 -0
  166. package/dist/smart-approval.js +512 -0
  167. package/dist/smart-approval.js.map +1 -0
  168. package/dist/system-prompt-generator.d.ts +35 -0
  169. package/dist/system-prompt-generator.d.ts.map +1 -0
  170. package/dist/system-prompt-generator.js +729 -0
  171. package/dist/system-prompt-generator.js.map +1 -0
  172. package/dist/test-boundary-conditions.d.ts.map +1 -0
  173. package/dist/test-boundary-conditions.js.map +1 -0
  174. package/dist/test-cancellation-fix.d.ts.map +1 -0
  175. package/dist/test-cancellation-fix.js.map +1 -0
  176. package/dist/test-input-history.d.ts.map +1 -0
  177. package/dist/test-input-history.js.map +1 -0
  178. package/dist/test-interaction-flow.d.ts.map +1 -0
  179. package/dist/test-interaction-flow.js.map +1 -0
  180. package/dist/test-quick.d.ts.map +1 -0
  181. package/dist/test-quick.js.map +1 -0
  182. package/dist/test-user-interaction.d.ts.map +1 -0
  183. package/dist/test-user-interaction.js.map +1 -0
  184. package/dist/theme.d.ts +353 -0
  185. package/dist/theme.d.ts.map +1 -0
  186. package/dist/theme.js +383 -0
  187. package/dist/theme.js.map +1 -0
  188. package/dist/tools.d.ts +373 -0
  189. package/dist/tools.d.ts.map +1 -0
  190. package/dist/tools.js +2906 -0
  191. package/dist/tools.js.map +1 -0
  192. package/dist/types.d.ts +180 -0
  193. package/dist/types.d.ts.map +1 -0
  194. package/dist/types.js +23 -0
  195. package/dist/types.js.map +1 -0
  196. package/dist/unified-session.d.ts +42 -0
  197. package/dist/unified-session.d.ts.map +1 -0
  198. package/dist/unified-session.js +271 -0
  199. package/dist/unified-session.js.map +1 -0
  200. package/dist/update.d.ts +30 -0
  201. package/dist/update.d.ts.map +1 -0
  202. package/dist/update.js +211 -0
  203. package/dist/update.js.map +1 -0
  204. package/dist/workflow.d.ts +53 -0
  205. package/dist/workflow.d.ts.map +1 -0
  206. package/dist/workflow.js +405 -0
  207. package/dist/workflow.js.map +1 -0
  208. package/docs/architecture/mcp-integration-guide.md +131 -0
  209. package/docs/architecture/overview.md +93 -0
  210. package/docs/architecture/tool-system-design.md +89 -0
  211. package/docs/cli/commands.md +189 -0
  212. package/docs/smart-mode.md +257 -0
  213. package/docs/third-party-models.md +449 -0
  214. package/package.json +85 -0
  215. package/scripts/init-skills-path.js +58 -0
  216. package/skills/.claude-plugin/marketplace.json +45 -0
  217. package/skills/README.md +94 -0
  218. package/skills/THIRD_PARTY_NOTICES.md +405 -0
  219. package/skills/skills/algorithmic-art/LICENSE.txt +202 -0
  220. package/skills/skills/algorithmic-art/SKILL.md +405 -0
  221. package/skills/skills/algorithmic-art/templates/generator_template.js +223 -0
  222. package/skills/skills/algorithmic-art/templates/viewer.html +599 -0
  223. package/skills/skills/brand-guidelines/LICENSE.txt +202 -0
  224. package/skills/skills/brand-guidelines/SKILL.md +73 -0
  225. package/skills/skills/canvas-design/LICENSE.txt +202 -0
  226. package/skills/skills/canvas-design/SKILL.md +130 -0
  227. package/skills/skills/canvas-design/canvas-fonts/ArsenalSC-OFL.txt +93 -0
  228. package/skills/skills/canvas-design/canvas-fonts/ArsenalSC-Regular.ttf +0 -0
  229. package/skills/skills/canvas-design/canvas-fonts/BigShoulders-Bold.ttf +0 -0
  230. package/skills/skills/canvas-design/canvas-fonts/BigShoulders-OFL.txt +93 -0
  231. package/skills/skills/canvas-design/canvas-fonts/BigShoulders-Regular.ttf +0 -0
  232. package/skills/skills/canvas-design/canvas-fonts/Boldonse-OFL.txt +93 -0
  233. package/skills/skills/canvas-design/canvas-fonts/Boldonse-Regular.ttf +0 -0
  234. package/skills/skills/canvas-design/canvas-fonts/BricolageGrotesque-Bold.ttf +0 -0
  235. package/skills/skills/canvas-design/canvas-fonts/BricolageGrotesque-OFL.txt +93 -0
  236. package/skills/skills/canvas-design/canvas-fonts/BricolageGrotesque-Regular.ttf +0 -0
  237. package/skills/skills/canvas-design/canvas-fonts/CrimsonPro-Bold.ttf +0 -0
  238. package/skills/skills/canvas-design/canvas-fonts/CrimsonPro-Italic.ttf +0 -0
  239. package/skills/skills/canvas-design/canvas-fonts/CrimsonPro-OFL.txt +93 -0
  240. package/skills/skills/canvas-design/canvas-fonts/CrimsonPro-Regular.ttf +0 -0
  241. package/skills/skills/canvas-design/canvas-fonts/DMMono-OFL.txt +93 -0
  242. package/skills/skills/canvas-design/canvas-fonts/DMMono-Regular.ttf +0 -0
  243. package/skills/skills/canvas-design/canvas-fonts/EricaOne-OFL.txt +94 -0
  244. package/skills/skills/canvas-design/canvas-fonts/EricaOne-Regular.ttf +0 -0
  245. package/skills/skills/canvas-design/canvas-fonts/GeistMono-Bold.ttf +0 -0
  246. package/skills/skills/canvas-design/canvas-fonts/GeistMono-OFL.txt +93 -0
  247. package/skills/skills/canvas-design/canvas-fonts/GeistMono-Regular.ttf +0 -0
  248. package/skills/skills/canvas-design/canvas-fonts/Gloock-OFL.txt +93 -0
  249. package/skills/skills/canvas-design/canvas-fonts/Gloock-Regular.ttf +0 -0
  250. package/skills/skills/canvas-design/canvas-fonts/IBMPlexMono-Bold.ttf +0 -0
  251. package/skills/skills/canvas-design/canvas-fonts/IBMPlexMono-OFL.txt +93 -0
  252. package/skills/skills/canvas-design/canvas-fonts/IBMPlexMono-Regular.ttf +0 -0
  253. package/skills/skills/canvas-design/canvas-fonts/IBMPlexSerif-Bold.ttf +0 -0
  254. package/skills/skills/canvas-design/canvas-fonts/IBMPlexSerif-BoldItalic.ttf +0 -0
  255. package/skills/skills/canvas-design/canvas-fonts/IBMPlexSerif-Italic.ttf +0 -0
  256. package/skills/skills/canvas-design/canvas-fonts/IBMPlexSerif-Regular.ttf +0 -0
  257. package/skills/skills/canvas-design/canvas-fonts/InstrumentSans-Bold.ttf +0 -0
  258. package/skills/skills/canvas-design/canvas-fonts/InstrumentSans-BoldItalic.ttf +0 -0
  259. package/skills/skills/canvas-design/canvas-fonts/InstrumentSans-Italic.ttf +0 -0
  260. package/skills/skills/canvas-design/canvas-fonts/InstrumentSans-OFL.txt +93 -0
  261. package/skills/skills/canvas-design/canvas-fonts/InstrumentSans-Regular.ttf +0 -0
  262. package/skills/skills/canvas-design/canvas-fonts/InstrumentSerif-Italic.ttf +0 -0
  263. package/skills/skills/canvas-design/canvas-fonts/InstrumentSerif-Regular.ttf +0 -0
  264. package/skills/skills/canvas-design/canvas-fonts/Italiana-OFL.txt +93 -0
  265. package/skills/skills/canvas-design/canvas-fonts/Italiana-Regular.ttf +0 -0
  266. package/skills/skills/canvas-design/canvas-fonts/JetBrainsMono-Bold.ttf +0 -0
  267. package/skills/skills/canvas-design/canvas-fonts/JetBrainsMono-OFL.txt +93 -0
  268. package/skills/skills/canvas-design/canvas-fonts/JetBrainsMono-Regular.ttf +0 -0
  269. package/skills/skills/canvas-design/canvas-fonts/Jura-Light.ttf +0 -0
  270. package/skills/skills/canvas-design/canvas-fonts/Jura-Medium.ttf +0 -0
  271. package/skills/skills/canvas-design/canvas-fonts/Jura-OFL.txt +93 -0
  272. package/skills/skills/canvas-design/canvas-fonts/LibreBaskerville-OFL.txt +93 -0
  273. package/skills/skills/canvas-design/canvas-fonts/LibreBaskerville-Regular.ttf +0 -0
  274. package/skills/skills/canvas-design/canvas-fonts/Lora-Bold.ttf +0 -0
  275. package/skills/skills/canvas-design/canvas-fonts/Lora-BoldItalic.ttf +0 -0
  276. package/skills/skills/canvas-design/canvas-fonts/Lora-Italic.ttf +0 -0
  277. package/skills/skills/canvas-design/canvas-fonts/Lora-OFL.txt +93 -0
  278. package/skills/skills/canvas-design/canvas-fonts/Lora-Regular.ttf +0 -0
  279. package/skills/skills/canvas-design/canvas-fonts/NationalPark-Bold.ttf +0 -0
  280. package/skills/skills/canvas-design/canvas-fonts/NationalPark-OFL.txt +93 -0
  281. package/skills/skills/canvas-design/canvas-fonts/NationalPark-Regular.ttf +0 -0
  282. package/skills/skills/canvas-design/canvas-fonts/NothingYouCouldDo-OFL.txt +93 -0
  283. package/skills/skills/canvas-design/canvas-fonts/NothingYouCouldDo-Regular.ttf +0 -0
  284. package/skills/skills/canvas-design/canvas-fonts/Outfit-Bold.ttf +0 -0
  285. package/skills/skills/canvas-design/canvas-fonts/Outfit-OFL.txt +93 -0
  286. package/skills/skills/canvas-design/canvas-fonts/Outfit-Regular.ttf +0 -0
  287. package/skills/skills/canvas-design/canvas-fonts/PixelifySans-Medium.ttf +0 -0
  288. package/skills/skills/canvas-design/canvas-fonts/PixelifySans-OFL.txt +93 -0
  289. package/skills/skills/canvas-design/canvas-fonts/PoiretOne-OFL.txt +93 -0
  290. package/skills/skills/canvas-design/canvas-fonts/PoiretOne-Regular.ttf +0 -0
  291. package/skills/skills/canvas-design/canvas-fonts/RedHatMono-Bold.ttf +0 -0
  292. package/skills/skills/canvas-design/canvas-fonts/RedHatMono-OFL.txt +93 -0
  293. package/skills/skills/canvas-design/canvas-fonts/RedHatMono-Regular.ttf +0 -0
  294. package/skills/skills/canvas-design/canvas-fonts/Silkscreen-OFL.txt +93 -0
  295. package/skills/skills/canvas-design/canvas-fonts/Silkscreen-Regular.ttf +0 -0
  296. package/skills/skills/canvas-design/canvas-fonts/SmoochSans-Medium.ttf +0 -0
  297. package/skills/skills/canvas-design/canvas-fonts/SmoochSans-OFL.txt +93 -0
  298. package/skills/skills/canvas-design/canvas-fonts/Tektur-Medium.ttf +0 -0
  299. package/skills/skills/canvas-design/canvas-fonts/Tektur-OFL.txt +93 -0
  300. package/skills/skills/canvas-design/canvas-fonts/Tektur-Regular.ttf +0 -0
  301. package/skills/skills/canvas-design/canvas-fonts/WorkSans-Bold.ttf +0 -0
  302. package/skills/skills/canvas-design/canvas-fonts/WorkSans-BoldItalic.ttf +0 -0
  303. package/skills/skills/canvas-design/canvas-fonts/WorkSans-Italic.ttf +0 -0
  304. package/skills/skills/canvas-design/canvas-fonts/WorkSans-OFL.txt +93 -0
  305. package/skills/skills/canvas-design/canvas-fonts/WorkSans-Regular.ttf +0 -0
  306. package/skills/skills/canvas-design/canvas-fonts/YoungSerif-OFL.txt +93 -0
  307. package/skills/skills/canvas-design/canvas-fonts/YoungSerif-Regular.ttf +0 -0
  308. package/skills/skills/doc-coauthoring/SKILL.md +375 -0
  309. package/skills/skills/docx/LICENSE.txt +30 -0
  310. package/skills/skills/docx/SKILL.md +197 -0
  311. package/skills/skills/docx/docx-js.md +350 -0
  312. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  313. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  314. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  315. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  316. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  317. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  318. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  319. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  320. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  321. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  322. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  323. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  324. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  325. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  326. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  327. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  328. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  329. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  330. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  331. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  332. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  333. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  334. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  335. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  336. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  337. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  338. package/skills/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  339. package/skills/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  340. package/skills/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  341. package/skills/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  342. package/skills/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  343. package/skills/skills/docx/ooxml/schemas/mce/mc.xsd +75 -0
  344. package/skills/skills/docx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
  345. package/skills/skills/docx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
  346. package/skills/skills/docx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
  347. package/skills/skills/docx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
  348. package/skills/skills/docx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
  349. package/skills/skills/docx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  350. package/skills/skills/docx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
  351. package/skills/skills/docx/ooxml/scripts/pack.py +159 -0
  352. package/skills/skills/docx/ooxml/scripts/unpack.py +29 -0
  353. package/skills/skills/docx/ooxml/scripts/validate.py +69 -0
  354. package/skills/skills/docx/ooxml/scripts/validation/__init__.py +15 -0
  355. package/skills/skills/docx/ooxml/scripts/validation/base.py +951 -0
  356. package/skills/skills/docx/ooxml/scripts/validation/docx.py +274 -0
  357. package/skills/skills/docx/ooxml/scripts/validation/pptx.py +315 -0
  358. package/skills/skills/docx/ooxml/scripts/validation/redlining.py +279 -0
  359. package/skills/skills/docx/ooxml.md +610 -0
  360. package/skills/skills/docx/scripts/__init__.py +1 -0
  361. package/skills/skills/docx/scripts/document.py +1276 -0
  362. package/skills/skills/docx/scripts/templates/comments.xml +3 -0
  363. package/skills/skills/docx/scripts/templates/commentsExtended.xml +3 -0
  364. package/skills/skills/docx/scripts/templates/commentsExtensible.xml +3 -0
  365. package/skills/skills/docx/scripts/templates/commentsIds.xml +3 -0
  366. package/skills/skills/docx/scripts/templates/people.xml +3 -0
  367. package/skills/skills/docx/scripts/utilities.py +374 -0
  368. package/skills/skills/frontend-design/LICENSE.txt +177 -0
  369. package/skills/skills/frontend-design/SKILL.md +42 -0
  370. package/skills/skills/internal-comms/LICENSE.txt +202 -0
  371. package/skills/skills/internal-comms/SKILL.md +32 -0
  372. package/skills/skills/internal-comms/examples/3p-updates.md +47 -0
  373. package/skills/skills/internal-comms/examples/company-newsletter.md +65 -0
  374. package/skills/skills/internal-comms/examples/faq-answers.md +30 -0
  375. package/skills/skills/internal-comms/examples/general-comms.md +16 -0
  376. package/skills/skills/mcp-builder/LICENSE.txt +202 -0
  377. package/skills/skills/mcp-builder/SKILL.md +236 -0
  378. package/skills/skills/mcp-builder/reference/evaluation.md +602 -0
  379. package/skills/skills/mcp-builder/reference/mcp_best_practices.md +249 -0
  380. package/skills/skills/mcp-builder/reference/node_mcp_server.md +970 -0
  381. package/skills/skills/mcp-builder/reference/python_mcp_server.md +719 -0
  382. package/skills/skills/mcp-builder/scripts/connections.py +151 -0
  383. package/skills/skills/mcp-builder/scripts/evaluation.py +373 -0
  384. package/skills/skills/mcp-builder/scripts/example_evaluation.xml +22 -0
  385. package/skills/skills/mcp-builder/scripts/requirements.txt +2 -0
  386. package/skills/skills/pdf/LICENSE.txt +30 -0
  387. package/skills/skills/pdf/SKILL.md +294 -0
  388. package/skills/skills/pdf/forms.md +205 -0
  389. package/skills/skills/pdf/reference.md +612 -0
  390. package/skills/skills/pdf/scripts/check_bounding_boxes.py +70 -0
  391. package/skills/skills/pdf/scripts/check_bounding_boxes_test.py +226 -0
  392. package/skills/skills/pdf/scripts/check_fillable_fields.py +12 -0
  393. package/skills/skills/pdf/scripts/convert_pdf_to_images.py +35 -0
  394. package/skills/skills/pdf/scripts/create_validation_image.py +41 -0
  395. package/skills/skills/pdf/scripts/extract_form_field_info.py +152 -0
  396. package/skills/skills/pdf/scripts/fill_fillable_fields.py +114 -0
  397. package/skills/skills/pdf/scripts/fill_pdf_form_with_annotations.py +108 -0
  398. package/skills/skills/pptx/LICENSE.txt +30 -0
  399. package/skills/skills/pptx/SKILL.md +484 -0
  400. package/skills/skills/pptx/html2pptx.md +625 -0
  401. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  402. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  403. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  404. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  405. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  406. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  407. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  408. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  409. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  410. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  411. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  412. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  413. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  414. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  415. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  416. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  417. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  418. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  419. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  420. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  421. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  422. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  423. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  424. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  425. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  426. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  427. package/skills/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  428. package/skills/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  429. package/skills/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  430. package/skills/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  431. package/skills/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  432. package/skills/skills/pptx/ooxml/schemas/mce/mc.xsd +75 -0
  433. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
  434. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
  435. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
  436. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
  437. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
  438. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  439. package/skills/skills/pptx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
  440. package/skills/skills/pptx/ooxml/scripts/pack.py +159 -0
  441. package/skills/skills/pptx/ooxml/scripts/unpack.py +29 -0
  442. package/skills/skills/pptx/ooxml/scripts/validate.py +69 -0
  443. package/skills/skills/pptx/ooxml/scripts/validation/__init__.py +15 -0
  444. package/skills/skills/pptx/ooxml/scripts/validation/base.py +951 -0
  445. package/skills/skills/pptx/ooxml/scripts/validation/docx.py +274 -0
  446. package/skills/skills/pptx/ooxml/scripts/validation/pptx.py +315 -0
  447. package/skills/skills/pptx/ooxml/scripts/validation/redlining.py +279 -0
  448. package/skills/skills/pptx/ooxml.md +427 -0
  449. package/skills/skills/pptx/scripts/html2pptx.js +979 -0
  450. package/skills/skills/pptx/scripts/inventory.py +1020 -0
  451. package/skills/skills/pptx/scripts/rearrange.py +231 -0
  452. package/skills/skills/pptx/scripts/replace.py +385 -0
  453. package/skills/skills/pptx/scripts/thumbnail.py +450 -0
  454. package/skills/skills/skill-creator/LICENSE.txt +202 -0
  455. package/skills/skills/skill-creator/SKILL.md +356 -0
  456. package/skills/skills/skill-creator/references/output-patterns.md +82 -0
  457. package/skills/skills/skill-creator/references/workflows.md +28 -0
  458. package/skills/skills/skill-creator/scripts/init_skill.py +303 -0
  459. package/skills/skills/skill-creator/scripts/package_skill.py +110 -0
  460. package/skills/skills/skill-creator/scripts/quick_validate.py +95 -0
  461. package/skills/skills/slack-gif-creator/LICENSE.txt +202 -0
  462. package/skills/skills/slack-gif-creator/SKILL.md +254 -0
  463. package/skills/skills/slack-gif-creator/core/easing.py +234 -0
  464. package/skills/skills/slack-gif-creator/core/frame_composer.py +176 -0
  465. package/skills/skills/slack-gif-creator/core/gif_builder.py +269 -0
  466. package/skills/skills/slack-gif-creator/core/validators.py +136 -0
  467. package/skills/skills/slack-gif-creator/requirements.txt +4 -0
  468. package/skills/skills/theme-factory/LICENSE.txt +202 -0
  469. package/skills/skills/theme-factory/SKILL.md +59 -0
  470. package/skills/skills/theme-factory/theme-showcase.pdf +0 -0
  471. package/skills/skills/theme-factory/themes/arctic-frost.md +19 -0
  472. package/skills/skills/theme-factory/themes/botanical-garden.md +19 -0
  473. package/skills/skills/theme-factory/themes/desert-rose.md +19 -0
  474. package/skills/skills/theme-factory/themes/forest-canopy.md +19 -0
  475. package/skills/skills/theme-factory/themes/golden-hour.md +19 -0
  476. package/skills/skills/theme-factory/themes/midnight-galaxy.md +19 -0
  477. package/skills/skills/theme-factory/themes/modern-minimalist.md +19 -0
  478. package/skills/skills/theme-factory/themes/ocean-depths.md +19 -0
  479. package/skills/skills/theme-factory/themes/sunset-boulevard.md +19 -0
  480. package/skills/skills/theme-factory/themes/tech-innovation.md +19 -0
  481. package/skills/skills/web-artifacts-builder/LICENSE.txt +202 -0
  482. package/skills/skills/web-artifacts-builder/SKILL.md +74 -0
  483. package/skills/skills/web-artifacts-builder/scripts/bundle-artifact.sh +54 -0
  484. package/skills/skills/web-artifacts-builder/scripts/init-artifact.sh +322 -0
  485. package/skills/skills/webapp-testing/LICENSE.txt +202 -0
  486. package/skills/skills/webapp-testing/SKILL.md +96 -0
  487. package/skills/skills/webapp-testing/examples/console_logging.py +35 -0
  488. package/skills/skills/webapp-testing/examples/element_discovery.py +40 -0
  489. package/skills/skills/webapp-testing/examples/static_html_automation.py +33 -0
  490. package/skills/skills/webapp-testing/scripts/with_server.py +106 -0
  491. package/skills/skills/xlsx/LICENSE.txt +30 -0
  492. package/skills/skills/xlsx/SKILL.md +289 -0
  493. package/skills/skills/xlsx/recalc.py +178 -0
  494. package/skills/spec/agent-skills-spec.md +3 -0
  495. package/skills/template/SKILL.md +6 -0
  496. package/src/agents.ts +504 -0
  497. package/src/ai-client.ts +1456 -0
  498. package/src/auth.ts +648 -0
  499. package/src/cancellation.ts +176 -0
  500. package/src/checkpoint.ts +219 -0
  501. package/src/cli.ts +384 -0
  502. package/src/config.ts +248 -0
  503. package/src/context-compressor.ts +290 -0
  504. package/src/conversation.ts +288 -0
  505. package/src/gui-subagent/action-parser/actionParser.ts +312 -0
  506. package/src/gui-subagent/action-parser/constants.ts +12 -0
  507. package/src/gui-subagent/action-parser/index.ts +6 -0
  508. package/src/gui-subagent/action-parser/types.ts +31 -0
  509. package/src/gui-subagent/agent/gui-agent.ts +982 -0
  510. package/src/gui-subagent/agent/index.ts +5 -0
  511. package/src/gui-subagent/index.ts +139 -0
  512. package/src/gui-subagent/operator/base-operator.ts +246 -0
  513. package/src/gui-subagent/operator/computer-operator.ts +520 -0
  514. package/src/gui-subagent/operator/index.ts +7 -0
  515. package/src/gui-subagent/types/actions.ts +263 -0
  516. package/src/gui-subagent/types/index.ts +6 -0
  517. package/src/gui-subagent/types/operator.ts +106 -0
  518. package/src/gui-subagent/utils.ts +51 -0
  519. package/src/index.ts +18 -0
  520. package/src/input-processor.ts +282 -0
  521. package/src/logger.ts +438 -0
  522. package/src/mcp.ts +563 -0
  523. package/src/memory.ts +303 -0
  524. package/src/session-manager.ts +308 -0
  525. package/src/session.ts +1280 -0
  526. package/src/skill-invoker.ts +1888 -0
  527. package/src/skill-loader.ts +476 -0
  528. package/src/slash-commands.ts +1150 -0
  529. package/src/smart-approval.ts +595 -0
  530. package/src/system-prompt-generator.ts +786 -0
  531. package/src/theme.ts +455 -0
  532. package/src/tools.ts +3398 -0
  533. package/src/types.ts +198 -0
  534. package/src/update.ts +270 -0
  535. package/src/workflow.ts +508 -0
  536. package/tsconfig.json +22 -0
  537. package/vitest.config.ts +19 -0
@@ -0,0 +1,982 @@
1
+ /**
2
+ * GUI Agent for xagent
3
+ * Orchestrates desktop automation with AI-powered action execution
4
+ * Based on UI-TARS architecture with computer control only
5
+ *
6
+ * This implementation is aligned with packages/ui-tars/sdk/src/GUIAgent.ts
7
+ */
8
+
9
+ import type {
10
+ ScreenContext,
11
+ ScreenshotOutput,
12
+ ExecuteParams,
13
+ ExecuteOutput,
14
+ PredictionParsed,
15
+ } from '../types/operator.js';
16
+ import type { Operator } from '../operator/base-operator.js';
17
+ import { sleep, asyncRetry } from '../utils.js';
18
+ import { actionParser } from '../action-parser/index.js';
19
+ import { colors, icons, renderMarkdown } from '../../theme.js';
20
+ import { getLogger } from '../../logger.js';
21
+
22
+ /**
23
+ * Helper function to truncate long text
24
+ */
25
+ function truncateText(text: string, maxLength: number = 200): string {
26
+ if (!text) return '';
27
+ return text.length > maxLength ? text.substring(0, maxLength) + '...' : text;
28
+ }
29
+
30
+ /**
31
+ * Helper function to indent multiline text
32
+ */
33
+ function indentMultiline(text: string, indent: string): string {
34
+ return text.split('\n').map(line => indent + line).join('\n');
35
+ }
36
+
37
+ const guiLogger = getLogger();
38
+
39
+ // UI-TARS Status Enum
40
+ export enum GUIAgentStatus {
41
+ INIT = 'init',
42
+ RUNNING = 'running',
43
+ PAUSE = 'paused',
44
+ END = 'end',
45
+ ERROR = 'error',
46
+ USER_STOPPED = 'user_stopped',
47
+ CALL_USER = 'call_user',
48
+ }
49
+
50
+ export interface GUIAgentConfig<T extends Operator> {
51
+ operator: T;
52
+ model?: string;
53
+ modelBaseUrl?: string;
54
+ modelApiKey?: string;
55
+ systemPrompt?: string;
56
+ loopIntervalInMs?: number;
57
+ maxLoopCount?: number;
58
+ logger?: any;
59
+ signal?: AbortSignal;
60
+ onData?: (data: GUIAgentData) => void;
61
+ onError?: (error: Error) => void;
62
+ showAIDebugInfo?: boolean;
63
+ retry?: {
64
+ screenshot?: {
65
+ maxRetries?: number;
66
+ onRetry?: (e: Error) => void;
67
+ };
68
+ model?: {
69
+ maxRetries?: number;
70
+ onRetry?: (e: Error) => void;
71
+ };
72
+ execute?: {
73
+ maxRetries?: number;
74
+ onRetry?: (e: Error) => void;
75
+ };
76
+ };
77
+ }
78
+
79
+ export interface GUIAgentData {
80
+ status: GUIAgentStatus;
81
+ conversations: Conversation[];
82
+ error?: string;
83
+ systemPrompt?: string;
84
+ }
85
+
86
+ export interface Conversation {
87
+ from: 'human' | 'assistant';
88
+ value: string;
89
+ screenshotBase64?: string;
90
+ screenshotContext?: {
91
+ size: { width: number; height: number };
92
+ mime?: string;
93
+ scaleFactor: number;
94
+ };
95
+ actionType?: string;
96
+ actionInputs?: Record<string, any>;
97
+ timing?: {
98
+ start: number;
99
+ end: number;
100
+ cost: number;
101
+ };
102
+ predictionParsed?: PredictionParsed[];
103
+ }
104
+
105
+ // UI-TARS constants (aligned with @ui-tars/shared/constants)
106
+ const MAX_LOOP_COUNT = 100;
107
+ const MAX_SNAPSHOT_ERR_CNT = 5;
108
+ const MAX_STEP_RETRIES = 3; // Max retries for a single action step before giving up
109
+ const IMAGE_PLACEHOLDER = '{{IMG_PLACEHOLDER_0}}';
110
+
111
+ export class GUIAgent<T extends Operator> {
112
+ private operator: T;
113
+ private readonly model: string;
114
+ private readonly modelBaseUrl: string;
115
+ private readonly modelApiKey: string;
116
+ private readonly systemPrompt: string;
117
+ private readonly loopIntervalInMs: number;
118
+ private readonly maxLoopCount: number;
119
+ private readonly logger: Console;
120
+ private readonly signal?: AbortSignal;
121
+ private readonly onData?: (data: GUIAgentData) => void;
122
+ private readonly onError?: (error: Error) => void;
123
+ private readonly showAIDebugInfo: boolean;
124
+ private readonly retry?: GUIAgentConfig<T>['retry'];
125
+
126
+ private isPaused = false;
127
+ private resumePromise: Promise<void> | null = null;
128
+ private resolveResume: (() => void) | null = null;
129
+ private isStopped = false;
130
+
131
+ constructor(config: GUIAgentConfig<T>) {
132
+ this.operator = config.operator;
133
+ this.model = config.model || '';
134
+ this.modelBaseUrl = config.modelBaseUrl || '';
135
+ this.modelApiKey = config.modelApiKey || '';
136
+ this.loopIntervalInMs = config.loopIntervalInMs || 0;
137
+ this.maxLoopCount = config.maxLoopCount || MAX_LOOP_COUNT;
138
+ this.logger = config.logger || guiLogger;
139
+ this.signal = config.signal;
140
+ this.onData = config.onData;
141
+ this.onError = config.onError;
142
+ this.showAIDebugInfo = config.showAIDebugInfo ?? false;
143
+ this.retry = config.retry;
144
+
145
+ this.systemPrompt = config.systemPrompt || this.buildSystemPrompt();
146
+ }
147
+
148
+ /**
149
+ * Display conversation results with formatting similar to session.ts (simplified)
150
+ */
151
+ private displayConversationResult(conversation: Conversation, iteration: number, indentLevel: number = 1): void {
152
+ const indent = ' '.repeat(indentLevel);
153
+ const innerIndent = ' '.repeat(indentLevel + 1);
154
+ const maxWidth = process.stdout.columns || 80;
155
+
156
+ if (conversation.from === 'assistant') {
157
+ // Display assistant response (action)
158
+ const content = conversation.value || '';
159
+ const timing = conversation.timing;
160
+
161
+ // Simplified: show step number and action
162
+ const actionSummary = content.replace(/Thought:[\s\S]*?Action:\s*/i, '').trim();
163
+ const actionType = conversation.predictionParsed?.[0]?.action_type || 'action';
164
+
165
+ console.log(`${indent}${colors.primaryBright(`[${iteration}]`)} ${colors.textMuted(actionType)}${timing ? colors.textDim(` (${timing.cost}ms)`) : ''}`);
166
+
167
+ // Optionally show action details on next line if verbose
168
+ if (this.showAIDebugInfo && actionSummary) {
169
+ const truncatedSummary = actionSummary.length > 60 ? actionSummary.substring(0, 60) + '...' : actionSummary;
170
+ console.log(`${innerIndent}${colors.textMuted(truncatedSummary)}`);
171
+ }
172
+ } else if (conversation.from === 'human' && conversation.screenshotBase64) {
173
+ // Show minimal indicator for screenshot
174
+ if (this.showAIDebugInfo) {
175
+ const timing = conversation.timing;
176
+ console.log(`${indent}${colors.textMuted(`${icons.loading} screenshot${timing ? ` (${timing.cost}ms)` : ''}`)}`);
177
+ }
178
+ }
179
+ }
180
+
181
+ /**
182
+ * Display status message
183
+ */
184
+ private displayStatus(data: GUIAgentData, iteration: number, indentLevel: number = 1): void {
185
+ const indent = ' '.repeat(indentLevel);
186
+ const status = data.status;
187
+
188
+ switch (status) {
189
+ case GUIAgentStatus.RUNNING:
190
+ console.log(`${indent}${colors.info(`${icons.loading} Step ${iteration}: Running...`)}`);
191
+ break;
192
+ case GUIAgentStatus.END:
193
+ // Handled by caller
194
+ break;
195
+ case GUIAgentStatus.ERROR:
196
+ if (data.error) {
197
+ console.log(`${indent}${colors.error(`${icons.cross} ${data.error}`)}`);
198
+ }
199
+ break;
200
+ case GUIAgentStatus.CALL_USER:
201
+ console.log(`${indent}${colors.warning(`${icons.warning} Needs user input`)}`);
202
+ break;
203
+ case GUIAgentStatus.USER_STOPPED:
204
+ console.log(`${indent}${colors.warning(`${icons.warning} Stopped`)}`);
205
+ break;
206
+ default:
207
+ break;
208
+ }
209
+ }
210
+
211
+ private buildSystemPrompt(): string {
212
+ return `You are a GUI agent. You are given a task and your action history, with screenshots. You need to perform the next action to complete the task.
213
+
214
+ ## Output Format
215
+ \`
216
+ Thought: ...
217
+ Action: ...
218
+ \`
219
+
220
+ ## Action Space
221
+ click(point='<point>x1 y1</point>')
222
+ left_double(point='<point>x1 y1</point>')
223
+ right_single(point='<point>x1 y1</point>')
224
+ drag(start_point='<point>x1 y1</point>', end_point='<point>x2 y2</point>')
225
+ hotkey(key='ctrl c') # Split keys with a space and use lowercase. Also, do not use more than 3 keys in one hotkey action.
226
+ type(content='xxx') # Use escape characters \', \", and \n in content part to ensure we can parse the content in normal python string format. If you want to submit your input, use \n at the end of content.
227
+ scroll(point='<point>x1 y1</point>', direction='down or up or right or left') # Show more information on the \`direction\` side.
228
+ open_url(url='https://xxx') # Open URL in browser
229
+ wait() #Sleep for 5s and take a screenshot to check for any changes.
230
+ finished(content='xxx') # Use escape characters \', \", and \n in content part to ensure we can parse the content in normal python string format.
231
+
232
+
233
+
234
+
235
+ ## Note
236
+ - Use {language} in \`Thought\` part.
237
+ - Write a small plan and finally summarize your next action (with its target element) in one sentence in \`Thought\` part.
238
+
239
+ `;
240
+ }
241
+
242
+
243
+
244
+ async initialize(): Promise<void> {
245
+ await this.operator.doInitialize();
246
+ }
247
+
248
+ /**
249
+ * Run the GUI agent with a single instruction (UI-TARS style)
250
+ * All operations are determined by the GUI model
251
+ */
252
+ async run(instruction: string): Promise<GUIAgentData> {
253
+ const data: GUIAgentData = {
254
+ status: GUIAgentStatus.INIT,
255
+ conversations: [
256
+ {
257
+ from: 'human',
258
+ value: instruction,
259
+ timing: {
260
+ start: Date.now(),
261
+ end: Date.now(),
262
+ cost: 0,
263
+ },
264
+ },
265
+ ],
266
+ };
267
+
268
+ // Initialize operator for initial screenshot
269
+ try {
270
+ await this.operator.doInitialize();
271
+ } catch (initError) {
272
+ const errorMsg = initError instanceof Error ? initError.message : 'Unknown error';
273
+ this.logger.error(`[GUIAgent] Failed to initialize operator: ${errorMsg}`);
274
+
275
+ // Check if it's an RDP-related issue
276
+ if (errorMsg.includes('screen') || errorMsg.includes('capture') || errorMsg.includes('display')) {
277
+ data.status = GUIAgentStatus.ERROR;
278
+ data.error = 'Failed to initialize screen capture. This may be caused by:\n' +
279
+ ' 1. Remote Desktop session disconnected or minimized\n' +
280
+ ' 2. Display driver issues\n' +
281
+ 'Suggestion: Ensure your display is active and try again.';
282
+ } else {
283
+ data.status = GUIAgentStatus.ERROR;
284
+ data.error = `Failed to initialize operator: ${errorMsg}`;
285
+ }
286
+ return data;
287
+ }
288
+
289
+ const currentTime = Date.now();
290
+
291
+ if (this.showAIDebugInfo) {
292
+ this.logger.debug('[GUIAgent] run:', {
293
+ systemPrompt: this.systemPrompt,
294
+ model: this.model,
295
+ maxLoopCount: this.maxLoopCount,
296
+ });
297
+ }
298
+
299
+ let loopCnt = 0;
300
+ let snapshotErrCnt = 0;
301
+
302
+ // Start running agent
303
+ data.status = GUIAgentStatus.RUNNING;
304
+ data.systemPrompt = this.systemPrompt;
305
+ console.log(`${colors.primaryBright(`${icons.rocket} GUI Agent started`)}`);
306
+ console.log('');
307
+ await this.onData?.({ ...data, conversations: [] });
308
+
309
+ try {
310
+ // eslint-disable-next-line no-constant-condition
311
+ while (true) {
312
+ if (this.showAIDebugInfo) {
313
+ this.logger.debug('[GUIAgent] loopCnt:', loopCnt);
314
+ }
315
+
316
+ // Check pause status
317
+ if (this.isPaused && this.resumePromise) {
318
+ data.status = GUIAgentStatus.PAUSE;
319
+ await this.onData?.({ ...data, conversations: [] });
320
+ await this.resumePromise;
321
+ data.status = GUIAgentStatus.RUNNING;
322
+ await this.onData?.({ ...data, conversations: [] });
323
+ }
324
+
325
+ // Check stop or aborted status
326
+ if (
327
+ this.isStopped ||
328
+ data.status !== GUIAgentStatus.RUNNING ||
329
+ this.signal?.aborted
330
+ ) {
331
+ if (this.signal?.aborted) {
332
+ data.status = GUIAgentStatus.USER_STOPPED;
333
+ }
334
+ break;
335
+ }
336
+
337
+ // Check loop limit
338
+ if (loopCnt >= this.maxLoopCount) {
339
+ data.status = GUIAgentStatus.ERROR;
340
+ data.error = `Has reached max loop count: ${loopCnt}`;
341
+ break;
342
+ }
343
+
344
+ // Check screenshot error limit
345
+ if (snapshotErrCnt >= MAX_SNAPSHOT_ERR_CNT) {
346
+ data.status = GUIAgentStatus.ERROR;
347
+ data.error = 'Screenshot failed too many times. Stopping task.';
348
+ break;
349
+ }
350
+
351
+ loopCnt += 1;
352
+ const start = Date.now();
353
+
354
+ // Take screenshot (single attempt - no retry to avoid infinite loops)
355
+ let snapshot: ScreenshotOutput;
356
+ try {
357
+ snapshot = await this.operator.doScreenshot();
358
+ } catch (screenshotError) {
359
+ const errorMsg = screenshotError instanceof Error ? screenshotError.message : 'Unknown error';
360
+ this.logger.warn(`[GUIAgent] Screenshot exception: ${errorMsg}`);
361
+ snapshotErrCnt += 1;
362
+ data.status = GUIAgentStatus.ERROR;
363
+ data.error = `Screenshot failed ${snapshotErrCnt} times. Stopping task.`;
364
+ this.logger.error(`[GUIAgent] ${data.error}`);
365
+ await sleep(1000);
366
+ break;
367
+ }
368
+
369
+ // Check if screenshot returned failure status
370
+ if (snapshot.status === 'failed') {
371
+ const errorMsg = snapshot.errorMessage || 'Unknown error';
372
+ this.logger.warn(`[GUIAgent] Screenshot failed: ${errorMsg}`);
373
+ snapshotErrCnt += 1;
374
+ data.status = GUIAgentStatus.ERROR;
375
+ data.error = `Screenshot failed ${snapshotErrCnt} times. Stopping task.`;
376
+ this.logger.error(`[GUIAgent] ${data.error}`);
377
+ await sleep(1000);
378
+ break;
379
+ }
380
+
381
+ // Check abort immediately after screenshot
382
+ if (this.signal?.aborted) {
383
+ data.status = GUIAgentStatus.USER_STOPPED;
384
+ break;
385
+ }
386
+
387
+ // Validate screenshot
388
+ const isValidImage = !!(snapshot?.base64);
389
+ if (!isValidImage) {
390
+ snapshotErrCnt += 1;
391
+ data.status = GUIAgentStatus.ERROR;
392
+ data.error = `Screenshot failed ${snapshotErrCnt} times. Stopping task.`;
393
+ this.logger.error(`[GUIAgent] ${data.error}`);
394
+ await sleep(1000);
395
+ break;
396
+ }
397
+
398
+ // Reset error counter on successful screenshot
399
+ snapshotErrCnt = 0;
400
+
401
+ const end = Date.now();
402
+
403
+ // Get screen context
404
+ const screenContext = await this.operator.getScreenContext();
405
+
406
+ // Add screenshot to conversation
407
+ data.conversations.push({
408
+ from: 'human',
409
+ value: IMAGE_PLACEHOLDER,
410
+ screenshotBase64: snapshot.base64,
411
+ screenshotContext: {
412
+ size: {
413
+ width: screenContext.width,
414
+ height: screenContext.height,
415
+ },
416
+ scaleFactor: snapshot.scaleFactor ?? screenContext.scaleFactor,
417
+ },
418
+ timing: {
419
+ start,
420
+ end,
421
+ cost: end - start,
422
+ },
423
+ });
424
+
425
+ await this.onData?.({
426
+ ...data,
427
+ conversations: data.conversations.slice(-1),
428
+ });
429
+
430
+ // Display screenshot notification
431
+ const latestScreenshot = data.conversations[data.conversations.length - 1];
432
+ if (latestScreenshot && latestScreenshot.from === 'human' && latestScreenshot.screenshotBase64) {
433
+ this.displayConversationResult(latestScreenshot, loopCnt);
434
+ }
435
+
436
+ // Build messages for model
437
+ const messages = this.buildModelMessages(data.conversations, data.systemPrompt);
438
+
439
+ // Check abort before model call
440
+ if (this.signal?.aborted) {
441
+ data.status = GUIAgentStatus.USER_STOPPED;
442
+ break;
443
+ }
444
+
445
+ // Invoke model with retry
446
+ let prediction: string;
447
+ let parsedPredictions: PredictionParsed[];
448
+ try {
449
+ const modelResult: { prediction: string; parsedPredictions: PredictionParsed[] } = await asyncRetry(
450
+ async (bail) => {
451
+ try {
452
+ const result = await this.callModelAPI(messages, screenContext);
453
+ return result;
454
+ } catch (error: unknown) {
455
+ if (
456
+ error instanceof Error &&
457
+ (error.name === 'AbortError' ||
458
+ error.message?.includes('aborted'))
459
+ ) {
460
+ bail(error as Error);
461
+ return { prediction: '', parsedPredictions: [] };
462
+ }
463
+ throw error;
464
+ }
465
+ },
466
+ {
467
+ retries: this.retry?.model?.maxRetries ?? 0,
468
+ minTimeout: 1000 * 30,
469
+ onRetry: this.retry?.model?.onRetry,
470
+ }
471
+ );
472
+ prediction = modelResult.prediction;
473
+ parsedPredictions = modelResult.parsedPredictions;
474
+ } catch (modelError) {
475
+ // Handle multimodal model API errors with specific error messages
476
+ data.status = GUIAgentStatus.ERROR;
477
+ const errorMsg = modelError instanceof Error ? modelError.message : String(modelError);
478
+
479
+ // Provide specific error message based on error type
480
+ if (errorMsg.includes('401') || errorMsg.includes('authentication') || errorMsg.includes('API key') || errorMsg.includes('api_key') || errorMsg.includes('Unauthorized') || errorMsg.includes('invalid_api_key')) {
481
+ data.error = '[Multimodal Model Authentication Failed] The guiSubagentApiKey configuration is invalid.\n' +
482
+ 'Error details: HTTP 401 - API key is invalid or expired\n' +
483
+ 'Suggested action: Please check the guiSubagentApiKey configuration in ~/.xagent/settings.json and ensure a valid API key is set';
484
+ } else if (errorMsg.includes('429') || errorMsg.includes('rate limit') || errorMsg.includes('too many requests')) {
485
+ data.error = '[Multimodal Model Rate Limit Exceeded] API requests exceed rate limit.\n' +
486
+ 'Error details: HTTP 429 - Too Many Requests\n' +
487
+ 'Suggested action: Please retry later, or check your API account quota settings. Wait a few minutes before retrying';
488
+ } else if (errorMsg.includes('network') || errorMsg.includes('fetch') || errorMsg.includes('connection') || errorMsg.includes('ECONNREFUSED')) {
489
+ data.error = '[Multimodal Model Network Error] Cannot connect to API service.\n' +
490
+ 'Error details: Network connection failed. Possible causes:\n' +
491
+ ' 1. Network connection is lost\n' +
492
+ ' 2. The guiSubagentBaseUrl configuration is incorrect\n' +
493
+ ' 3. API service endpoint is unreachable\n' +
494
+ 'Suggested action: Please check the guiSubagentBaseUrl configuration in ~/.xagent/settings.json and ensure network connectivity';
495
+ } else if (errorMsg.includes('404') || errorMsg.includes('not found') || errorMsg.includes('model not found') || errorMsg.includes('InvalidEndpointOrModel.NotFound')) {
496
+ // Extract model name
497
+ const modelMatch = errorMsg.match(/model[:\s]+([^\s,"]+)|"model[:"]+([^",}]+)/i);
498
+ const modelName = modelMatch ? (modelMatch[1] || modelMatch[2]) : 'Unknown';
499
+ data.error = '[Multimodal Model Configuration Error] The model specified in guiSubagentModel does not exist or is not accessible.\n' +
500
+ 'Error details: HTTP 404 - Model or Endpoint not found\n' +
501
+ 'Configured model name: ' + modelName + '\n' +
502
+ 'Suggested action: Please check the guiSubagentModel configuration in ~/.xagent/settings.json, remove or replace with a valid model name';
503
+ } else {
504
+ data.error = '[Multimodal Model API Call Failed]\n' +
505
+ 'Error details: ' + errorMsg + '\n' +
506
+ 'Please check the following configuration items:\n' +
507
+ ' - guiSubagentApiKey: API key\n' +
508
+ ' - guiSubagentBaseUrl: API service URL\n' +
509
+ ' - guiSubagentModel: Model name\n' +
510
+ 'Config file location: ~/.xagent/settings.json';
511
+ }
512
+ break;
513
+ }
514
+
515
+ // Check abort immediately after model call
516
+ if (this.signal?.aborted) {
517
+ data.status = GUIAgentStatus.USER_STOPPED;
518
+ break;
519
+ }
520
+
521
+ if (!prediction) {
522
+ this.logger.warn('[GUIAgent] Warning: Empty response from model, retrying...');
523
+ continue;
524
+ }
525
+
526
+ if (this.showAIDebugInfo) {
527
+ this.logger.debug('[GUIAgent] Response:', prediction);
528
+ this.logger.debug('[GUIAgent] Parsed Predictions:', JSON.stringify(parsedPredictions));
529
+ }
530
+
531
+ const predictionSummary = this.getSummary(prediction);
532
+
533
+ data.conversations.push({
534
+ from: 'assistant',
535
+ value: predictionSummary,
536
+ timing: {
537
+ start,
538
+ end: Date.now(),
539
+ cost: Date.now() - start,
540
+ },
541
+ screenshotContext: {
542
+ size: {
543
+ width: screenContext.width,
544
+ height: screenContext.height,
545
+ },
546
+ scaleFactor: snapshot.scaleFactor ?? screenContext.scaleFactor,
547
+ },
548
+ predictionParsed: parsedPredictions,
549
+ });
550
+
551
+ await this.onData?.({
552
+ ...data,
553
+ conversations: data.conversations.slice(-1),
554
+ });
555
+
556
+ // Display assistant response
557
+ const latestAssistant = data.conversations[data.conversations.length - 1];
558
+ if (latestAssistant && latestAssistant.from === 'assistant') {
559
+ this.displayConversationResult(latestAssistant, loopCnt);
560
+ }
561
+
562
+ // Check if we need to switch operator based on first action
563
+ // Execute actions
564
+ for (const parsedPrediction of parsedPredictions) {
565
+ const actionType = parsedPrediction.action_type;
566
+
567
+ if (this.showAIDebugInfo) {
568
+ this.logger.debug('[GUIAgent] Action:', actionType);
569
+ }
570
+
571
+ // Handle internal action spaces
572
+ if (actionType === 'error_env') {
573
+ data.status = GUIAgentStatus.ERROR;
574
+ data.error = 'Environment error';
575
+ break;
576
+ } else if (actionType === 'max_loop') {
577
+ data.status = GUIAgentStatus.ERROR;
578
+ data.error = 'Reached max loop';
579
+ break;
580
+ }
581
+
582
+ // Execute action with retry
583
+ if (!this.signal?.aborted && !this.isStopped) {
584
+ let stepRetryCount = 0;
585
+ let stepSuccess = false;
586
+ let lastErrorMsg = '';
587
+
588
+ this.logger.debug(`[GUIAgent] Executing action: ${actionType}, loopCnt: ${loopCnt}`);
589
+
590
+ while (stepRetryCount < MAX_STEP_RETRIES && !stepSuccess) {
591
+ try {
592
+ const executeResult = await this.operator.doExecute({
593
+ prediction,
594
+ parsedPrediction,
595
+ screenWidth: screenContext.width,
596
+ screenHeight: screenContext.height,
597
+ scaleFactor: snapshot.scaleFactor ?? screenContext.scaleFactor,
598
+ factors: [1000, 1000], // Default factors
599
+ });
600
+
601
+ if (executeResult.status === 'end') {
602
+ // 'finished' action or explicit end
603
+ stepSuccess = true;
604
+ break;
605
+ }
606
+
607
+ // Any other status (success, failed, etc.) is considered success
608
+ stepSuccess = true;
609
+ break;
610
+ } catch (executeError) {
611
+ stepRetryCount++;
612
+ lastErrorMsg = executeError instanceof Error ? executeError.message : 'Unknown error';
613
+ this.logger.warn(`[GUIAgent] Action failed ${stepRetryCount}/${MAX_STEP_RETRIES}: ${lastErrorMsg}`);
614
+
615
+ if (stepRetryCount < MAX_STEP_RETRIES) {
616
+ await sleep(1000);
617
+ // Take new screenshot for retry
618
+ const retrySnapshot = await this.operator.doScreenshot();
619
+ if (retrySnapshot?.base64) {
620
+ data.conversations.push({
621
+ from: 'human',
622
+ value: IMAGE_PLACEHOLDER,
623
+ screenshotBase64: retrySnapshot.base64,
624
+ screenshotContext: {
625
+ size: {
626
+ width: screenContext.width,
627
+ height: screenContext.height,
628
+ },
629
+ scaleFactor: retrySnapshot.scaleFactor ?? screenContext.scaleFactor,
630
+ },
631
+ });
632
+ }
633
+ }
634
+ }
635
+ }
636
+
637
+ if (!stepSuccess) {
638
+ // All retries exhausted
639
+ this.logger.error(`[GUIAgent] Action failed after ${MAX_STEP_RETRIES} attempts: ${lastErrorMsg}`);
640
+ data.status = GUIAgentStatus.ERROR;
641
+ data.error = `Action failed after ${MAX_STEP_RETRIES} attempts: ${lastErrorMsg}`;
642
+ break;
643
+ }
644
+ }
645
+
646
+ // Check abort immediately after action execution
647
+ if (this.signal?.aborted) {
648
+ data.status = GUIAgentStatus.USER_STOPPED;
649
+ break;
650
+ }
651
+
652
+ // Handle special action types
653
+ if (actionType === 'call_user') {
654
+ data.status = GUIAgentStatus.CALL_USER;
655
+ break;
656
+ } else if (actionType === 'finished') {
657
+ data.status = GUIAgentStatus.END;
658
+ break;
659
+ }
660
+ }
661
+
662
+ // Check abort after action loop
663
+ if (this.signal?.aborted) {
664
+ data.status = GUIAgentStatus.USER_STOPPED;
665
+ break;
666
+ }
667
+
668
+ // Wait between iterations
669
+ if (this.loopIntervalInMs > 0) {
670
+ await sleep(this.loopIntervalInMs);
671
+ }
672
+ }
673
+ } catch (error) {
674
+ this.logger.error('[GUIAgent] Catch error', error);
675
+ if (
676
+ error instanceof Error &&
677
+ (error.name === 'AbortError' || error.message?.includes('aborted'))
678
+ ) {
679
+ data.status = GUIAgentStatus.USER_STOPPED;
680
+ } else {
681
+ data.status = GUIAgentStatus.ERROR;
682
+ data.error = error instanceof Error ? error.message : 'Unknown error';
683
+ }
684
+ } finally {
685
+ // Save final status
686
+ const finalStatus = data.status;
687
+ const finalError = data.error;
688
+
689
+ // Output error immediately if task failed
690
+ if (finalStatus === GUIAgentStatus.ERROR && finalError) {
691
+ console.log(`\n${colors.error('āœ–')} ${finalError}\n`);
692
+ }
693
+
694
+ // Call onData callback if set
695
+ // Note: Use Promise.resolve().then() to avoid modifying data in callback
696
+ const onDataCallback = this.onData;
697
+ if (onDataCallback) {
698
+ Promise.resolve().then(() => onDataCallback({ ...data, conversations: [] }));
699
+ }
700
+
701
+ // Call onError callback if status is error
702
+ if (finalStatus === GUIAgentStatus.ERROR && this.onError) {
703
+ this.onError(new Error(finalError || 'Unknown error occurred'));
704
+ }
705
+
706
+ if (this.showAIDebugInfo) {
707
+ this.logger.debug('[GUIAgent] Final status:', {
708
+ status: finalStatus,
709
+ loopCnt,
710
+ totalConversations: data.conversations.length,
711
+ });
712
+ }
713
+
714
+ // Ensure the returned status is correct (reassign)
715
+ this.logger.debug(`[GUIAgent] Finally: finalStatus=${finalStatus}, finalError=${finalError}, data.status=${data.status}, data.error=${data.error}`);
716
+
717
+ // Log final status (only visible when showAIDebugInfo is enabled)
718
+ this.logger.debug(`[GUIAgent] Final status: ${finalStatus}${finalError ? `, Error: ${finalError}` : ''}, Steps: ${loopCnt}`);
719
+
720
+ data.status = finalStatus;
721
+ data.error = finalError;
722
+ }
723
+
724
+ return data;
725
+ }
726
+
727
+ /**
728
+ * Build messages for the model API
729
+ */
730
+ private buildModelMessages(conversations: Conversation[], systemPrompt: string): any[] {
731
+ const messages: any[] = [];
732
+
733
+ // System prompt
734
+ messages.push({
735
+ role: 'system',
736
+ content: systemPrompt,
737
+ });
738
+
739
+ // Add conversation history
740
+ for (const conv of conversations) {
741
+ if (conv.from === 'human' && conv.screenshotBase64) {
742
+ messages.push({
743
+ role: 'user',
744
+ content: [
745
+ { type: 'text', text: conv.value },
746
+ {
747
+ type: 'image_url',
748
+ image_url: {
749
+ url: `data:image/png;base64,${conv.screenshotBase64}`,
750
+ detail: 'high',
751
+ },
752
+ },
753
+ ],
754
+ });
755
+ } else if (conv.from === 'assistant') {
756
+ messages.push({
757
+ role: 'assistant',
758
+ content: conv.value,
759
+ });
760
+ } else {
761
+ messages.push({
762
+ role: 'user',
763
+ content: conv.value,
764
+ });
765
+ }
766
+ }
767
+
768
+ return messages;
769
+ }
770
+
771
+ /**
772
+ * Call the model API with debug logging
773
+ */
774
+ private async callModelAPI(
775
+ messages: any[],
776
+ screenContext: ScreenContext
777
+ ): Promise<{ prediction: string; parsedPredictions: PredictionParsed[] }> {
778
+ const baseUrl = this.modelBaseUrl || process.env.MODEL_BASE_URL || 'https://api.openai.com/v1';
779
+ const apiKey = this.modelApiKey || process.env.MODEL_API_KEY || '';
780
+
781
+ const requestBody = {
782
+ model: this.model,
783
+ messages,
784
+ max_tokens: 1024,
785
+ temperature: 0.1,
786
+ };
787
+
788
+ // Debug output for model input
789
+ if (this.showAIDebugInfo) {
790
+ console.log('\n╔══════════════════════════════════════════════════════════╗');
791
+ console.log('ā•‘ GUI MODEL REQUEST DEBUG ā•‘');
792
+ console.log('ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•');
793
+ console.log(`šŸ“¦ Model: ${this.model}`);
794
+ console.log(`🌐 Base URL: ${baseUrl}`);
795
+ console.log(`šŸ’¬ Messages: ${messages.length}`);
796
+
797
+ // Show system prompt if present
798
+ const systemMsg = messages.find((m: any) => m.role === 'system');
799
+ if (systemMsg) {
800
+ console.log('\nā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”');
801
+ console.log('│ 🟫 SYSTEM │');
802
+ console.log('ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤');
803
+ const systemContent = typeof systemMsg.content === 'string'
804
+ ? systemMsg.content
805
+ : JSON.stringify(systemMsg.content);
806
+ const lines = systemContent.split('\n').slice(0, 15);
807
+ for (const line of lines) {
808
+ console.log('│ ' + line.slice(0, 62));
809
+ }
810
+ if (systemContent.split('\n').length > 15) {
811
+ console.log('│ ... (truncated)');
812
+ }
813
+ console.log('ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜');
814
+ }
815
+
816
+ // Show conversation messages
817
+ const roleColors: Record<string, string> = {
818
+ user: 'šŸ‘¤ USER',
819
+ assistant: 'šŸ¤– ASSISTANT',
820
+ };
821
+
822
+ for (let i = 0; i < messages.length; i++) {
823
+ const msg = messages[i];
824
+ if (msg.role === 'system') continue;
825
+
826
+ const roleLabel = roleColors[msg.role] || `ā— ${msg.role.toUpperCase()}`;
827
+ console.log(`\nā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”`);
828
+ console.log(`│ ${roleLabel} (${i + 1}) │`);
829
+ console.log('ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤');
830
+
831
+ if (typeof msg.content === 'string') {
832
+ const lines = msg.content.split('\n').slice(0, 20);
833
+ for (const line of lines) {
834
+ console.log('│ ' + line.slice(0, 62));
835
+ }
836
+ if (msg.content.split('\n').length > 20) {
837
+ console.log('│ ... (truncated)');
838
+ }
839
+ } else if (Array.isArray(msg.content)) {
840
+ const hasImage = msg.content.some((c: any) => c.type === 'image_url');
841
+ console.log('│ šŸ“Ž Content blocks: ' + msg.content.length);
842
+ if (hasImage) {
843
+ const imageBlock = msg.content.find((c: any) => c.type === 'image_url');
844
+ const imageSize = imageBlock?.image_url?.url?.length || 0;
845
+ console.log('│ šŸ–¼ļø Image size: ' + (imageSize / 1024).toFixed(2) + ' KB');
846
+ }
847
+ const textBlock = msg.content.find((c: any) => c.type === 'text');
848
+ if (textBlock?.text) {
849
+ const lines = textBlock.text.split('\n').slice(0, 10);
850
+ for (const line of lines) {
851
+ console.log('│ ' + line.slice(0, 62));
852
+ }
853
+ }
854
+ }
855
+ console.log('ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜');
856
+ }
857
+
858
+ console.log('\nšŸ“¤ Sending request to model API...\n');
859
+
860
+ }
861
+
862
+
863
+
864
+ let response;
865
+ try {
866
+ response = await fetch(`${baseUrl}/chat/completions`, {
867
+ method: 'POST',
868
+ headers: {
869
+ 'Content-Type': 'application/json',
870
+ 'Authorization': `Bearer ${apiKey}`,
871
+ },
872
+ body: JSON.stringify(requestBody),
873
+ signal: this.signal,
874
+ });
875
+ } catch (fetchError) {
876
+ throw fetchError;
877
+ }
878
+
879
+ // Handle non-200 responses
880
+ if (!response.ok) {
881
+ const errorText = await response.text();
882
+ throw new Error(`Model API error: ${errorText}`);
883
+ }
884
+
885
+ const result = await response.json() as { choices?: Array<{ message?: { content?: string } }>; usage?: any };
886
+ const content = result.choices?.[0]?.message?.content || '';
887
+
888
+ // Debug output for model response
889
+ if (this.showAIDebugInfo) {
890
+ console.log('\n╔══════════════════════════════════════════════════════════╗');
891
+ console.log('ā•‘ GUI MODEL RESPONSE DEBUG ā•‘');
892
+ console.log('ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•');
893
+
894
+ if (result.usage) {
895
+ console.log(`šŸ“Š Tokens: ${result.usage.prompt_tokens} (prompt) + ${result.usage.completion_tokens} (completion) = ${result.usage.total_tokens} (total)`);
896
+ }
897
+
898
+ console.log('\nā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”');
899
+ console.log('│ šŸ¤– ASSISTANT │');
900
+ console.log('ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤');
901
+ console.log('│ šŸ’¬ CONTENT:');
902
+ console.log('│ ───────────────────────────────────────────────────────────');
903
+
904
+ const lines = content.split('\n').slice(0, 30);
905
+ for (const line of lines) {
906
+ console.log('│ ' + line.slice(0, 62));
907
+ }
908
+ if (content.split('\n').length > 30) {
909
+ console.log(`│ ... (${content.split('\n').length - 30} more lines)`);
910
+ }
911
+ console.log('│ ───────────────────────────────────────────────────────────');
912
+ console.log('ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜');
913
+
914
+ console.log('\n╔══════════════════════════════════════════════════════════╗');
915
+ console.log('ā•‘ RESPONSE ENDED ā•‘');
916
+ console.log('ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•\n');
917
+ }
918
+
919
+ const { parsed: parsedPredictions } = actionParser({
920
+ prediction: content,
921
+ factor: [1000, 1000],
922
+ screenContext: {
923
+ width: screenContext.width,
924
+ height: screenContext.height,
925
+ },
926
+ });
927
+
928
+ return {
929
+ prediction: content,
930
+ parsedPredictions,
931
+ };
932
+ }
933
+
934
+ /**
935
+ * Get summary from prediction text
936
+ */
937
+ private getSummary(prediction: string): string {
938
+ // Extract the action part as summary
939
+ const actionMatch = prediction.match(/Action[::]\s*([\s\S]+)$/i);
940
+ if (actionMatch) {
941
+ return actionMatch[1].trim();
942
+ }
943
+ return prediction.slice(0, 200);
944
+ }
945
+
946
+ pause(): void {
947
+ this.isPaused = true;
948
+ this.resumePromise = new Promise((resolve) => {
949
+ this.resolveResume = resolve;
950
+ });
951
+ }
952
+
953
+ resume(): void {
954
+ if (this.resolveResume) {
955
+ this.resolveResume();
956
+ this.resumePromise = null;
957
+ this.resolveResume = null;
958
+ }
959
+ this.isPaused = false;
960
+ }
961
+
962
+ stop(): void {
963
+ this.isStopped = true;
964
+ }
965
+
966
+ async cleanup(): Promise<void> {
967
+ this.logger.debug('Cleaning up GUI Agent...');
968
+ await this.operator.cleanup();
969
+
970
+ // Cleanup cancellation listener if attached
971
+ const cancelHandler = (this as any)._cancelHandler;
972
+ const cancellationManager = (this as any)._cancellationManager;
973
+ if (cancelHandler && cancellationManager) {
974
+ cancellationManager.off('cancelled', cancelHandler);
975
+ (this as any)._cancelHandler = undefined;
976
+ (this as any)._cancellationManager = undefined;
977
+ }
978
+ }
979
+ }
980
+
981
+ export { GUIAgentStatus as StatusEnum };
982
+