onbuzz 4.9.13 → 4.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (451) hide show
  1. package/node_modules/glob/README.md +31 -5
  2. package/node_modules/glob/dist/commonjs/glob.d.ts +8 -0
  3. package/node_modules/glob/dist/commonjs/glob.d.ts.map +1 -1
  4. package/node_modules/glob/dist/commonjs/glob.js +2 -1
  5. package/node_modules/glob/dist/commonjs/glob.js.map +1 -1
  6. package/node_modules/glob/dist/commonjs/index.min.js +3 -3
  7. package/node_modules/glob/dist/commonjs/index.min.js.map +4 -4
  8. package/node_modules/glob/dist/commonjs/pattern.d.ts +3 -0
  9. package/node_modules/glob/dist/commonjs/pattern.d.ts.map +1 -1
  10. package/node_modules/glob/dist/commonjs/pattern.js +4 -0
  11. package/node_modules/glob/dist/commonjs/pattern.js.map +1 -1
  12. package/node_modules/glob/dist/esm/glob.d.ts +8 -0
  13. package/node_modules/glob/dist/esm/glob.d.ts.map +1 -1
  14. package/node_modules/glob/dist/esm/glob.js +2 -1
  15. package/node_modules/glob/dist/esm/glob.js.map +1 -1
  16. package/node_modules/glob/dist/esm/index.min.js +3 -3
  17. package/node_modules/glob/dist/esm/index.min.js.map +4 -4
  18. package/node_modules/glob/dist/esm/pattern.d.ts +3 -0
  19. package/node_modules/glob/dist/esm/pattern.d.ts.map +1 -1
  20. package/node_modules/glob/dist/esm/pattern.js +4 -0
  21. package/node_modules/glob/dist/esm/pattern.js.map +1 -1
  22. package/node_modules/{@isaacs → glob/node_modules}/balanced-match/README.md +7 -10
  23. package/node_modules/{@isaacs → glob/node_modules}/balanced-match/package.json +7 -18
  24. package/node_modules/{@isaacs → glob/node_modules}/brace-expansion/README.md +3 -6
  25. package/node_modules/{@isaacs → glob/node_modules}/brace-expansion/dist/commonjs/index.js +6 -4
  26. package/node_modules/glob/node_modules/brace-expansion/dist/commonjs/index.js.map +1 -0
  27. package/node_modules/{@isaacs → glob/node_modules}/brace-expansion/dist/esm/index.js +6 -4
  28. package/node_modules/glob/node_modules/brace-expansion/dist/esm/index.js.map +1 -0
  29. package/node_modules/{@isaacs → glob/node_modules}/brace-expansion/package.json +11 -7
  30. package/node_modules/glob/node_modules/minimatch/README.md +76 -1
  31. package/node_modules/glob/node_modules/minimatch/dist/commonjs/assert-valid-pattern.d.ts +1 -1
  32. package/node_modules/glob/node_modules/minimatch/dist/commonjs/assert-valid-pattern.d.ts.map +1 -1
  33. package/node_modules/glob/node_modules/minimatch/dist/commonjs/assert-valid-pattern.js.map +1 -1
  34. package/node_modules/glob/node_modules/minimatch/dist/commonjs/ast.d.ts +4 -2
  35. package/node_modules/glob/node_modules/minimatch/dist/commonjs/ast.d.ts.map +1 -1
  36. package/node_modules/glob/node_modules/minimatch/dist/commonjs/ast.js +309 -55
  37. package/node_modules/glob/node_modules/minimatch/dist/commonjs/ast.js.map +1 -1
  38. package/node_modules/glob/node_modules/minimatch/dist/commonjs/brace-expressions.d.ts.map +1 -1
  39. package/node_modules/glob/node_modules/minimatch/dist/commonjs/brace-expressions.js +2 -4
  40. package/node_modules/glob/node_modules/minimatch/dist/commonjs/brace-expressions.js.map +1 -1
  41. package/node_modules/glob/node_modules/minimatch/dist/commonjs/escape.d.ts +1 -1
  42. package/node_modules/glob/node_modules/minimatch/dist/commonjs/escape.d.ts.map +1 -1
  43. package/node_modules/glob/node_modules/minimatch/dist/commonjs/escape.js +4 -4
  44. package/node_modules/glob/node_modules/minimatch/dist/commonjs/escape.js.map +1 -1
  45. package/node_modules/glob/node_modules/minimatch/dist/commonjs/index.d.ts +81 -1
  46. package/node_modules/glob/node_modules/minimatch/dist/commonjs/index.d.ts.map +1 -1
  47. package/node_modules/glob/node_modules/minimatch/dist/commonjs/index.js +232 -134
  48. package/node_modules/glob/node_modules/minimatch/dist/commonjs/index.js.map +1 -1
  49. package/node_modules/glob/node_modules/minimatch/dist/commonjs/unescape.d.ts +1 -1
  50. package/node_modules/glob/node_modules/minimatch/dist/commonjs/unescape.d.ts.map +1 -1
  51. package/node_modules/glob/node_modules/minimatch/dist/commonjs/unescape.js +8 -8
  52. package/node_modules/glob/node_modules/minimatch/dist/commonjs/unescape.js.map +1 -1
  53. package/node_modules/glob/node_modules/minimatch/dist/esm/assert-valid-pattern.d.ts +1 -1
  54. package/node_modules/glob/node_modules/minimatch/dist/esm/assert-valid-pattern.d.ts.map +1 -1
  55. package/node_modules/glob/node_modules/minimatch/dist/esm/assert-valid-pattern.js.map +1 -1
  56. package/node_modules/glob/node_modules/minimatch/dist/esm/ast.d.ts +4 -2
  57. package/node_modules/glob/node_modules/minimatch/dist/esm/ast.d.ts.map +1 -1
  58. package/node_modules/glob/node_modules/minimatch/dist/esm/ast.js +309 -55
  59. package/node_modules/glob/node_modules/minimatch/dist/esm/ast.js.map +1 -1
  60. package/node_modules/glob/node_modules/minimatch/dist/esm/brace-expressions.d.ts.map +1 -1
  61. package/node_modules/glob/node_modules/minimatch/dist/esm/brace-expressions.js +2 -4
  62. package/node_modules/glob/node_modules/minimatch/dist/esm/brace-expressions.js.map +1 -1
  63. package/node_modules/glob/node_modules/minimatch/dist/esm/escape.d.ts +1 -1
  64. package/node_modules/glob/node_modules/minimatch/dist/esm/escape.d.ts.map +1 -1
  65. package/node_modules/glob/node_modules/minimatch/dist/esm/escape.js +4 -4
  66. package/node_modules/glob/node_modules/minimatch/dist/esm/escape.js.map +1 -1
  67. package/node_modules/glob/node_modules/minimatch/dist/esm/index.d.ts +81 -1
  68. package/node_modules/glob/node_modules/minimatch/dist/esm/index.d.ts.map +1 -1
  69. package/node_modules/glob/node_modules/minimatch/dist/esm/index.js +232 -134
  70. package/node_modules/glob/node_modules/minimatch/dist/esm/index.js.map +1 -1
  71. package/node_modules/glob/node_modules/minimatch/dist/esm/unescape.d.ts +1 -1
  72. package/node_modules/glob/node_modules/minimatch/dist/esm/unescape.d.ts.map +1 -1
  73. package/node_modules/glob/node_modules/minimatch/dist/esm/unescape.js +8 -8
  74. package/node_modules/glob/node_modules/minimatch/dist/esm/unescape.js.map +1 -1
  75. package/node_modules/glob/node_modules/minimatch/package.json +17 -11
  76. package/node_modules/glob/package.json +10 -13
  77. package/node_modules/minipass/LICENSE.md +55 -0
  78. package/node_modules/minipass/dist/commonjs/index.d.ts +12 -16
  79. package/node_modules/minipass/dist/commonjs/index.d.ts.map +1 -1
  80. package/node_modules/minipass/dist/commonjs/index.js +13 -3
  81. package/node_modules/minipass/dist/commonjs/index.js.map +1 -1
  82. package/node_modules/minipass/dist/esm/index.d.ts +12 -16
  83. package/node_modules/minipass/dist/esm/index.d.ts.map +1 -1
  84. package/node_modules/minipass/dist/esm/index.js +3 -1
  85. package/node_modules/minipass/dist/esm/index.js.map +1 -1
  86. package/node_modules/minipass/package.json +9 -14
  87. package/node_modules/path-scurry/node_modules/lru-cache/README.md +96 -10
  88. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/browser/diagnostics-channel-browser.d.ts.map +1 -0
  89. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/browser/diagnostics-channel-browser.js.map +1 -0
  90. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/browser/diagnostics-channel.d.ts +5 -0
  91. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/browser/diagnostics-channel.js +7 -0
  92. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/browser/index.d.ts +1400 -0
  93. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/browser/index.d.ts.map +1 -0
  94. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/browser/index.js +1726 -0
  95. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/browser/index.js.map +1 -0
  96. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/browser/index.min.js +2 -0
  97. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/browser/index.min.js.map +7 -0
  98. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/browser/perf.d.ts +12 -0
  99. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/browser/perf.d.ts.map +1 -0
  100. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/browser/perf.js +10 -0
  101. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/browser/perf.js.map +1 -0
  102. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/diagnostics-channel-cjs.cjs.map +1 -0
  103. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/diagnostics-channel-cjs.d.cts.map +1 -0
  104. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/diagnostics-channel.d.ts +5 -0
  105. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/diagnostics-channel.js +7 -0
  106. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/index.d.ts +109 -32
  107. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/index.d.ts.map +1 -1
  108. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/index.js +334 -197
  109. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/index.js.map +1 -1
  110. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/index.min.js +1 -1
  111. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/index.min.js.map +4 -4
  112. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/node/diagnostics-channel-node.d.ts.map +1 -0
  113. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/node/diagnostics-channel-node.js.map +1 -0
  114. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/node/diagnostics-channel.d.ts +5 -0
  115. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/node/diagnostics-channel.js +9 -0
  116. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/node/index.d.ts +1400 -0
  117. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/node/index.d.ts.map +1 -0
  118. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/node/index.js +1726 -0
  119. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/node/index.js.map +1 -0
  120. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/node/index.min.js +2 -0
  121. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/node/index.min.js.map +7 -0
  122. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/node/perf.d.ts +12 -0
  123. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/node/perf.d.ts.map +1 -0
  124. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/node/perf.js +10 -0
  125. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/node/perf.js.map +1 -0
  126. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/perf.d.ts +12 -0
  127. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/perf.d.ts.map +1 -0
  128. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/perf.js +10 -0
  129. package/node_modules/path-scurry/node_modules/lru-cache/dist/commonjs/perf.js.map +1 -0
  130. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/browser/diagnostics-channel-browser.d.ts.map +1 -0
  131. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/browser/diagnostics-channel-browser.js.map +1 -0
  132. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/browser/diagnostics-channel.d.ts +5 -0
  133. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/browser/diagnostics-channel.js +4 -0
  134. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/browser/index.d.ts +1400 -0
  135. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/browser/index.d.ts.map +1 -0
  136. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/browser/index.js +1722 -0
  137. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/browser/index.js.map +1 -0
  138. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/browser/index.min.js +2 -0
  139. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/browser/index.min.js.map +7 -0
  140. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/browser/perf.d.ts +12 -0
  141. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/browser/perf.d.ts.map +1 -0
  142. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/browser/perf.js +7 -0
  143. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/browser/perf.js.map +1 -0
  144. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/diagnostics-channel-esm.d.mts.map +1 -0
  145. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/diagnostics-channel-esm.mjs.map +1 -0
  146. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/diagnostics-channel.d.ts +5 -0
  147. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/diagnostics-channel.js +19 -0
  148. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/index.d.ts +109 -32
  149. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/index.d.ts.map +1 -1
  150. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/index.js +333 -196
  151. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/index.js.map +1 -1
  152. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/index.min.js +1 -1
  153. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/index.min.js.map +4 -4
  154. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/node/diagnostics-channel-node.d.ts.map +1 -0
  155. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/node/diagnostics-channel-node.js.map +1 -0
  156. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/node/diagnostics-channel.d.ts +5 -0
  157. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/node/diagnostics-channel.js +6 -0
  158. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/node/index.d.ts +1400 -0
  159. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/node/index.d.ts.map +1 -0
  160. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/node/index.js +1722 -0
  161. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/node/index.js.map +1 -0
  162. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/node/index.min.js +2 -0
  163. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/node/index.min.js.map +7 -0
  164. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/node/perf.d.ts +12 -0
  165. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/node/perf.d.ts.map +1 -0
  166. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/node/perf.js +7 -0
  167. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/node/perf.js.map +1 -0
  168. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/perf.d.ts +12 -0
  169. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/perf.d.ts.map +1 -0
  170. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/perf.js +7 -0
  171. package/node_modules/path-scurry/node_modules/lru-cache/dist/esm/perf.js.map +1 -0
  172. package/node_modules/path-scurry/node_modules/lru-cache/package.json +71 -18
  173. package/node_modules/path-scurry/package.json +8 -24
  174. package/package.json +1 -1
  175. package/scripts/debug-balance-probe.mjs +35 -35
  176. package/scripts/push-image.sh +43 -43
  177. package/scripts/setup-acr.sh +65 -65
  178. package/scripts/verify-optional-deps.js +96 -1
  179. package/src/__tests__/composioCliFlags.test.js +239 -239
  180. package/src/analyzers/CSSAnalyzer.js +298 -297
  181. package/src/analyzers/ConfigValidator.js +691 -690
  182. package/src/analyzers/ESLintAnalyzer.js +320 -320
  183. package/src/analyzers/JavaScriptAnalyzer.js +260 -261
  184. package/src/analyzers/PrettierFormatter.js +246 -247
  185. package/src/analyzers/PythonAnalyzer.js +283 -283
  186. package/src/analyzers/SecurityAnalyzer.js +729 -729
  187. package/src/analyzers/SparrowAnalyzer.js +341 -341
  188. package/src/analyzers/TypeScriptAnalyzer.js +247 -247
  189. package/src/analyzers/__tests__/CSSAnalyzer.test.js +41 -41
  190. package/src/analyzers/__tests__/ConfigValidator.test.js +362 -362
  191. package/src/analyzers/__tests__/JavaScriptAnalyzer.test.js +40 -40
  192. package/src/analyzers/__tests__/PythonAnalyzer.test.js +205 -208
  193. package/src/analyzers/__tests__/SecurityAnalyzer.test.js +303 -303
  194. package/src/analyzers/__tests__/TypeScriptAnalyzer.test.js +187 -187
  195. package/src/analyzers/codeCloneDetector/analyzer.js +344 -344
  196. package/src/analyzers/codeCloneDetector/detector.js +250 -250
  197. package/src/analyzers/codeCloneDetector/index.js +194 -192
  198. package/src/analyzers/codeCloneDetector/parser.js +199 -199
  199. package/src/core/__tests__/agentPool.test.js +866 -866
  200. package/src/core/__tests__/agentPoolAutoResume.test.js +209 -209
  201. package/src/core/__tests__/agentPoolWakeOnMessage.test.js +315 -315
  202. package/src/core/__tests__/agentScheduler.emptyResponseChatStall.test.js +213 -213
  203. package/src/core/__tests__/agentScheduler.errorCategorisation.test.js +246 -246
  204. package/src/core/__tests__/agentScheduler.firstChunkTimeout.test.js +138 -138
  205. package/src/core/__tests__/agentScheduler.modeTransitions.test.js +233 -233
  206. package/src/core/__tests__/agentScheduler.nativePromptPick.test.js +319 -319
  207. package/src/core/__tests__/agentScheduler.taskLifecycleInstruction.test.js +78 -78
  208. package/src/core/__tests__/agentScheduler.visualizer.test.js +258 -258
  209. package/src/core/__tests__/flowCheckpointStore.test.js +140 -140
  210. package/src/core/__tests__/flowEndToEnd.test.js +565 -565
  211. package/src/core/__tests__/flowFieldMapping.test.js +188 -189
  212. package/src/core/__tests__/flowLintClientMirror.test.js +96 -98
  213. package/src/core/__tests__/flowSavePayload.test.js +170 -169
  214. package/src/core/__tests__/flowTemplates.test.js +311 -311
  215. package/src/core/__tests__/flowVersionStore.test.js +123 -123
  216. package/src/core/__tests__/messageProcessor.test.js +669 -669
  217. package/src/core/__tests__/stateManager.test.js +0 -1
  218. package/src/core/agentPool.js +2474 -2475
  219. package/src/core/agentScheduler.js +1 -4
  220. package/src/core/contextManager.js +708 -708
  221. package/src/core/flowExecutor.js +1510 -1510
  222. package/src/core/flowFieldMapping.js +136 -138
  223. package/src/core/messageProcessor.js +953 -954
  224. package/src/core/orchestrator.js +593 -595
  225. package/src/core/stateManager.js +1765 -1752
  226. package/src/index.js +1221 -1221
  227. package/src/interfaces/__tests__/archivedAgentDelete.test.js +207 -207
  228. package/src/interfaces/__tests__/bulkAgentRoute.test.js +361 -361
  229. package/src/interfaces/__tests__/imageServing.test.js +228 -228
  230. package/src/interfaces/__tests__/remoteSessionAuth.test.js +308 -308
  231. package/src/interfaces/__tests__/videoJobsRoutes.test.js +178 -179
  232. package/src/interfaces/__tests__/webServer.marketplace.test.js +629 -629
  233. package/src/interfaces/schedulerRoutes.js +50 -50
  234. package/src/interfaces/terminal/__tests__/smoke/connection.test.js +341 -350
  235. package/src/interfaces/terminal/__tests__/smoke/enhancements.test.js +156 -156
  236. package/src/interfaces/terminal/__tests__/smoke/imports.test.js +325 -330
  237. package/src/interfaces/terminal/__tests__/smoke/tools.test.js +385 -388
  238. package/src/interfaces/terminal/api/session.js +265 -266
  239. package/src/interfaces/terminal/api/websocket.js +496 -497
  240. package/src/interfaces/terminal/components/AgentCreator.js +691 -705
  241. package/src/interfaces/terminal/components/AgentEditor.js +676 -678
  242. package/src/interfaces/terminal/components/AgentSwitcher.js +331 -330
  243. package/src/interfaces/terminal/components/ErrorPanel.js +263 -264
  244. package/src/interfaces/terminal/components/Header.js +28 -28
  245. package/src/interfaces/terminal/components/Layout.js +598 -603
  246. package/src/interfaces/terminal/components/MessageList.js +280 -281
  247. package/src/interfaces/terminal/components/SettingsPanel.js +410 -415
  248. package/src/interfaces/terminal/components/StatusBar.js +2 -0
  249. package/src/interfaces/terminal/index.js +168 -168
  250. package/src/interfaces/terminal/state/useAgentControl.js +496 -496
  251. package/src/interfaces/terminal/state/useAgents.js +537 -537
  252. package/src/interfaces/terminal/state/useMessages.js +629 -630
  253. package/src/interfaces/terminal/state/useTools.js +554 -554
  254. package/src/interfaces/terminal/utils/debugLogger.js +44 -44
  255. package/src/interfaces/terminal/utils/settingsStorage.js +232 -232
  256. package/src/interfaces/webServer.js +7578 -7579
  257. package/src/interfaces/webServer.js.bak +7046 -7046
  258. package/src/modules/fileExplorer/__tests__/zipDownload.test.js +237 -237
  259. package/src/modules/fileExplorer/controller.js +470 -469
  260. package/src/modules/fileExplorer/routes.js +285 -286
  261. package/src/modules/widget/__tests__/isDisabled.test.js +41 -41
  262. package/src/modules/widget/__tests__/routes.test.js +677 -678
  263. package/src/modules/widget/__tests__/runtime.test.js +401 -401
  264. package/src/modules/widget/__tests__/versioning.test.js +309 -309
  265. package/src/modules/widget/__tests__/webComponentRuntime.test.js +565 -565
  266. package/src/modules/widget/__tests__/widgetTool.test.js +316 -316
  267. package/src/modules/widget/routes.js +435 -435
  268. package/src/modules/widget/runtime/bundle.js +640 -640
  269. package/src/modules/widget/runtime/webComponentBundle.js +470 -470
  270. package/src/modules/widget/schema.js +182 -181
  271. package/src/modules/widget/widgetTool.js +1389 -1389
  272. package/src/services/__tests__/agentActivityService.test.js +401 -402
  273. package/src/services/__tests__/benchmarkService.test.js +184 -184
  274. package/src/services/__tests__/contextInjectionService.test.js +246 -246
  275. package/src/services/__tests__/conversationQuery.test.js +721 -723
  276. package/src/services/__tests__/credentialVault.test.js +469 -469
  277. package/src/services/__tests__/discordService.integration.test.js +638 -639
  278. package/src/services/__tests__/flowContextService.test.js +590 -590
  279. package/src/services/__tests__/memoryService.test.js +1 -1
  280. package/src/services/__tests__/messageSource.test.js +380 -380
  281. package/src/services/__tests__/modelRouterNaming.test.js +111 -111
  282. package/src/services/__tests__/projectDetector.test.js +34 -34
  283. package/src/services/__tests__/promptService.test.js +242 -242
  284. package/src/services/__tests__/telegramService.test.js +941 -941
  285. package/src/services/__tests__/tokenCountingService.test.js +48 -48
  286. package/src/services/agentActivityService.js +419 -420
  287. package/src/services/aiService.js +2997 -3001
  288. package/src/services/apiKeyManager.js +359 -359
  289. package/src/services/benchmarkService.js +196 -196
  290. package/src/services/codebaseKnowledgeService.js +2 -2
  291. package/src/services/composioService.js +738 -738
  292. package/src/services/conversationCompactionService.js +1258 -1257
  293. package/src/services/credentialVault.js +685 -685
  294. package/src/services/discordService.js +792 -793
  295. package/src/services/embeddings/__tests__/azureCustomProvider.test.js +232 -232
  296. package/src/services/embeddings/__tests__/embeddingService.test.js +417 -417
  297. package/src/services/embeddings/__tests__/localProvider.test.js +263 -263
  298. package/src/services/embeddings/autoRecall.js +218 -219
  299. package/src/services/embeddings/indexers/__tests__/agentIndexer.test.js +232 -232
  300. package/src/services/embeddings/indexers/__tests__/memoryIndexer.test.js +418 -418
  301. package/src/services/embeddings/indexers/__tests__/reminisceIndexer.test.js +356 -357
  302. package/src/services/embeddings/indexers/__tests__/skillsIndexer.test.js +145 -145
  303. package/src/services/embeddings/indexers/__tests__/taskIndexer.test.js +146 -146
  304. package/src/services/embeddings/indexers/composioIndexer.js +279 -279
  305. package/src/services/embeddings/providerInterface.js +206 -206
  306. package/src/services/embeddings/providers/localProvider.js +11 -7
  307. package/src/services/embeddings/providers/openaiProvider.js +101 -101
  308. package/src/services/embeddings/vectorStore/inMemoryJsonStore.js +356 -356
  309. package/src/services/errorHandler.js +809 -809
  310. package/src/services/flowContextService.js +586 -586
  311. package/src/services/grounding/MockAdapter.js +125 -125
  312. package/src/services/modelRouterService.js +26 -31
  313. package/src/services/modelsService.js +322 -322
  314. package/src/services/ollamaService.js +452 -452
  315. package/src/services/projectDetector.js +403 -404
  316. package/src/services/promptService.js +418 -418
  317. package/src/services/qualityInspector.js +795 -795
  318. package/src/services/scheduleService.js +726 -726
  319. package/src/services/serviceRegistry.js +386 -386
  320. package/src/services/telegrafBot.js +174 -174
  321. package/src/services/telegramService.js +1972 -1972
  322. package/src/services/visualEditorBridge.js +1033 -1033
  323. package/src/services/visualEditorServer.js +1769 -1774
  324. package/src/services/whatsappService.js +667 -668
  325. package/src/tools/__tests__/agentCommunicationTool.findAgent.test.js +226 -226
  326. package/src/tools/__tests__/agentCommunicationTool.test.js +3 -3
  327. package/src/tools/__tests__/agentDelayTool.test.js +342 -342
  328. package/src/tools/__tests__/baseTool.test.js +3 -3
  329. package/src/tools/__tests__/codeMapTool.test.js +915 -915
  330. package/src/tools/__tests__/fileContentReplaceTool.test.js +309 -309
  331. package/src/tools/__tests__/fileTreeTool.test.js +274 -274
  332. package/src/tools/__tests__/filesystemTool.test.js +815 -815
  333. package/src/tools/__tests__/foundryWebSearchTool.test.js +252 -252
  334. package/src/tools/__tests__/imageTool.validator.test.js +194 -194
  335. package/src/tools/__tests__/jobDoneTool.test.js +580 -581
  336. package/src/tools/__tests__/memoryTool.forgetStale.test.js +272 -272
  337. package/src/tools/__tests__/memoryTool.reminisce.test.js +2 -2
  338. package/src/tools/__tests__/memoryTool.reminisceSemanticSearch.test.js +301 -301
  339. package/src/tools/__tests__/memoryTool.semanticSearch.test.js +405 -405
  340. package/src/tools/__tests__/memoryTool.teamPool.test.js +293 -293
  341. package/src/tools/__tests__/memoryTool.test.js +1 -1
  342. package/src/tools/__tests__/seekTool.test.js +282 -282
  343. package/src/tools/__tests__/skillsTool.search.test.js +164 -164
  344. package/src/tools/__tests__/skillsTool.test.js +226 -226
  345. package/src/tools/__tests__/staticAnalysisTool.test.js +509 -509
  346. package/src/tools/__tests__/taskManagerTool.discipline.test.js +137 -137
  347. package/src/tools/__tests__/taskManagerTool.search.test.js +143 -143
  348. package/src/tools/__tests__/taskManagerTool.test.js +866 -866
  349. package/src/tools/__tests__/terminalTool.test.js +448 -448
  350. package/src/tools/__tests__/toolShapeForgiveness.test.js +259 -260
  351. package/src/tools/__tests__/userPromptTool.test.js +297 -297
  352. package/src/tools/__tests__/videoTool.jobs.test.js +147 -147
  353. package/src/tools/__tests__/webTool.e2e.test.js +609 -603
  354. package/src/tools/__tests__/webTool.unit.test.js +195 -195
  355. package/src/tools/__tests__/webTool.visionModel.test.js +75 -75
  356. package/src/tools/agentCommunicationTool.js +8 -10
  357. package/src/tools/agentDelayTool.js +496 -497
  358. package/src/tools/asyncToolManager.js +602 -603
  359. package/src/tools/baseTool.js +12 -11
  360. package/src/tools/cloneDetectionTool.js +576 -581
  361. package/src/tools/codeMapTool.js +0 -6
  362. package/src/tools/composioTool.js +617 -617
  363. package/src/tools/dependencyResolverTool.js +1211 -1212
  364. package/src/tools/desktop/DesktopTool.js +629 -638
  365. package/src/tools/desktop/__tests__/DesktopTool.e2e.test.js +306 -306
  366. package/src/tools/desktop/__tests__/DesktopTool.test.js +507 -507
  367. package/src/tools/desktop/__tests__/osController.test.js +364 -364
  368. package/src/tools/desktop/osController.js +491 -491
  369. package/src/tools/docxTool.js +623 -623
  370. package/src/tools/excelTool.js +636 -636
  371. package/src/tools/fileContentReplaceTool.js +5 -7
  372. package/src/tools/fileSystemTool.js +12 -19
  373. package/src/tools/fileTreeTool.js +840 -840
  374. package/src/tools/foundryWebSearchTool.js +273 -273
  375. package/src/tools/helpTool.js +198 -198
  376. package/src/tools/imageTool.js +1397 -1397
  377. package/src/tools/importAnalyzerTool.js +1056 -1056
  378. package/src/tools/jobDoneTool.js +495 -495
  379. package/src/tools/memoryTool.js +1 -1
  380. package/src/tools/office/pres/__tests__/presSystem.test.js +365 -365
  381. package/src/tools/office/pres/archetypes/agenda.js +61 -61
  382. package/src/tools/office/pres/archetypes/bentoGrid.js +218 -219
  383. package/src/tools/office/pres/archetypes/bigStat.js +140 -142
  384. package/src/tools/office/pres/archetypes/closing.js +70 -70
  385. package/src/tools/office/pres/archetypes/hero.js +70 -70
  386. package/src/tools/office/pres/archetypes/productHero.js +93 -94
  387. package/src/tools/office/pres/archetypes/table.js +73 -74
  388. package/src/tools/office/pres/backgrounds/orb.js +66 -66
  389. package/src/tools/office/pres/components.js +422 -423
  390. package/src/tools/officeTool.js +441 -441
  391. package/src/tools/pdfTool.js +625 -627
  392. package/src/tools/platformControlTool.js +1081 -1081
  393. package/src/tools/seekTool.js +917 -918
  394. package/src/tools/skillsTool.js +1 -1
  395. package/src/tools/staticAnalysisTool.js +2143 -2146
  396. package/src/tools/taskManagerTool.js +3324 -3324
  397. package/src/tools/terminalTool.js +2615 -2618
  398. package/src/tools/videoTool.js +1303 -1303
  399. package/src/tools/visionTool.js +508 -508
  400. package/src/tools/visualEditorTool.js +1289 -1290
  401. package/src/tools/webTool.js +3368 -3368
  402. package/src/tools/whatsappTool.js +464 -464
  403. package/src/types/__tests__/agent.test.js +499 -499
  404. package/src/types/__tests__/contextReference.test.js +606 -606
  405. package/src/types/__tests__/conversation.test.js +555 -555
  406. package/src/types/__tests__/toolCommand.test.js +584 -584
  407. package/src/types/contextReference.js +974 -971
  408. package/src/types/conversation.js +729 -729
  409. package/src/types/toolCommand.js +746 -746
  410. package/src/utilities/__tests__/attachmentValidator.test.js +80 -80
  411. package/src/utilities/__tests__/auditReport.test.js +328 -328
  412. package/src/utilities/__tests__/directoryAccessManager.test.js +388 -388
  413. package/src/utilities/__tests__/jsonRepair.test.js +103 -104
  414. package/src/utilities/__tests__/modeTransitionReasons.test.js +105 -105
  415. package/src/utilities/__tests__/platformUtils.test.js +80 -87
  416. package/src/utilities/__tests__/structuredFileValidator.test.js +261 -263
  417. package/src/utilities/__tests__/toolConstants.test.js +92 -94
  418. package/src/utilities/__tests__/useIsTouchDevice.detect.test.js +114 -114
  419. package/src/utilities/__tests__/webUiUtilSync.test.js +117 -117
  420. package/src/utilities/attachmentValidator.js +284 -288
  421. package/src/utilities/authCache.js.backup-1779570472481 +121 -121
  422. package/src/utilities/browserStealth.js +631 -630
  423. package/src/utilities/configManager.js +616 -617
  424. package/src/utilities/directoryAccessManager.js +564 -565
  425. package/src/utilities/fileProcessor.js +308 -307
  426. package/src/utilities/humanBehavior.js +454 -453
  427. package/src/utilities/logger.js +479 -479
  428. package/src/utilities/structuredFileValidator.js +696 -699
  429. package/src/utilities/tagParser.js +5 -10
  430. package/src/utilities/userDataDir.js +308 -308
  431. package/node_modules/@isaacs/brace-expansion/dist/commonjs/index.js.map +0 -1
  432. package/node_modules/@isaacs/brace-expansion/dist/esm/index.js.map +0 -1
  433. package/node_modules/minipass/LICENSE +0 -15
  434. /package/node_modules/{@isaacs → glob/node_modules}/balanced-match/LICENSE.md +0 -0
  435. /package/node_modules/{@isaacs → glob/node_modules}/balanced-match/dist/commonjs/index.d.ts +0 -0
  436. /package/node_modules/{@isaacs → glob/node_modules}/balanced-match/dist/commonjs/index.d.ts.map +0 -0
  437. /package/node_modules/{@isaacs → glob/node_modules}/balanced-match/dist/commonjs/index.js +0 -0
  438. /package/node_modules/{@isaacs → glob/node_modules}/balanced-match/dist/commonjs/index.js.map +0 -0
  439. /package/node_modules/{@isaacs → glob/node_modules}/balanced-match/dist/commonjs/package.json +0 -0
  440. /package/node_modules/{@isaacs → glob/node_modules}/balanced-match/dist/esm/index.d.ts +0 -0
  441. /package/node_modules/{@isaacs → glob/node_modules}/balanced-match/dist/esm/index.d.ts.map +0 -0
  442. /package/node_modules/{@isaacs → glob/node_modules}/balanced-match/dist/esm/index.js +0 -0
  443. /package/node_modules/{@isaacs → glob/node_modules}/balanced-match/dist/esm/index.js.map +0 -0
  444. /package/node_modules/{@isaacs → glob/node_modules}/balanced-match/dist/esm/package.json +0 -0
  445. /package/node_modules/{@isaacs → glob/node_modules}/brace-expansion/LICENSE +0 -0
  446. /package/node_modules/{@isaacs → glob/node_modules}/brace-expansion/dist/commonjs/index.d.ts +0 -0
  447. /package/node_modules/{@isaacs → glob/node_modules}/brace-expansion/dist/commonjs/index.d.ts.map +0 -0
  448. /package/node_modules/{@isaacs → glob/node_modules}/brace-expansion/dist/commonjs/package.json +0 -0
  449. /package/node_modules/{@isaacs → glob/node_modules}/brace-expansion/dist/esm/index.d.ts +0 -0
  450. /package/node_modules/{@isaacs → glob/node_modules}/brace-expansion/dist/esm/index.d.ts.map +0 -0
  451. /package/node_modules/{@isaacs → glob/node_modules}/brace-expansion/dist/esm/package.json +0 -0
@@ -1,638 +1,629 @@
1
- /**
2
- * DesktopTool — agent-facing desktop-control surface.
3
- *
4
- * Pairs the OS controller (mouse / keyboard / screenshot / windows) with
5
- * a pluggable visual-grounding model (default: Kimi K2.6) so an agent
6
- * can say "click the orange Save button" and the tool figures out
7
- * `(x, y)` from a fresh screenshot before the click.
8
- *
9
- * Five guardrails are layered into every execute() call:
10
- *
11
- * 1. CAPABILITY GATE - osController refuses ops the OS can't do
12
- * (e.g. mouseInput on Wayland) and surfaces
13
- * an operator-friendly remediation message.
14
- *
15
- * 2. PER-AGENT ALLOWLIST - toolConfig.allowedActions lets an operator
16
- * disable any subset of actions per agent
17
- * (e.g. read-only screenshot bot vs. full
18
- * control). Default: ALL DISABLED — the
19
- * agent has to be explicitly granted control.
20
- *
21
- * 3. KILL SWITCH - LOXIA_DESKTOP_TOOL_DISABLED=1 env flag
22
- * short-circuits every call with a clear
23
- * error. Zero-source kill in production.
24
- *
25
- * 4. AUDIT TRAIL - every action emits a structured log line
26
- * with operation id + grounded coords so
27
- * screen-recording-free postmortems work.
28
- *
29
- * 5. INTENT GROUNDING - "click" / "scroll" / "drag" accept either
30
- * raw (x, y) OR a natural-language `intent`.
31
- * With intent, the tool grabs a screenshot,
32
- * asks the grounding model, validates the
33
- * returned coords fit the screen, and
34
- * ONLY THEN moves. Failed grounding never
35
- * silently clicks somewhere random.
36
- *
37
- * Model is swappable via toolConfig.groundingModelId (any id registered
38
- * in src/services/grounding/registry.js). Backend proxy vs. direct
39
- * Foundry is also config — see _buildAdapter below.
40
- */
41
-
42
- import { writeFile, mkdir } from 'node:fs/promises';
43
- import { join } from 'node:path';
44
- import { tmpdir } from 'node:os';
45
- import { BaseTool } from '../baseTool.js';
46
- import { createOSController, OSError } from './osController.js';
47
- import {
48
- createGroundingModel,
49
- ModelId,
50
- DEFAULT_REASONING_EFFORT,
51
- } from '../../services/grounding/index.js';
52
-
53
- /** Every action the tool can perform. */
54
- export const DesktopAction = Object.freeze({
55
- SCREENSHOT: 'screenshot',
56
- CLICK: 'click',
57
- TYPE: 'type',
58
- KEY_PRESS: 'key_press',
59
- SCROLL: 'scroll',
60
- DRAG: 'drag',
61
- LIST_WINDOWS: 'list_windows',
62
- FOCUS_WINDOW: 'focus_window',
63
- DESCRIBE_CAPS: 'describe_capabilities',
64
- });
65
-
66
- const ACTIONS_REQUIRING_INPUT = new Set([
67
- DesktopAction.CLICK, DesktopAction.TYPE, DesktopAction.KEY_PRESS,
68
- DesktopAction.SCROLL, DesktopAction.DRAG, DesktopAction.FOCUS_WINDOW,
69
- ]);
70
-
71
- const ACTIONS_THAT_CAN_USE_INTENT = new Set([
72
- DesktopAction.CLICK, DesktopAction.SCROLL, DesktopAction.DRAG,
73
- ]);
74
-
75
- /** Env-level kill switch — checked on every call. */
76
- const ENV_KILL_FLAG = 'LOXIA_DESKTOP_TOOL_DISABLED';
77
-
78
- /** Defaults overridable via per-agent toolConfig.desktop or this.config. */
79
- const Defaults = Object.freeze({
80
- /** Default actions an agent gets WHEN allowedActions is unset.
81
- * Empty list = no control. Operator must opt in per agent. */
82
- ALLOWED_ACTIONS: [],
83
- /** Visual-grounding model (registry id). */
84
- GROUNDING_MODEL: ModelId.KIMI_K2_6,
85
- /** Grounding reasoning effort. */
86
- GROUNDING_EFFORT: DEFAULT_REASONING_EFFORT,
87
- /** Hard cap on a single grounded coord lookup. Catches hangs. */
88
- GROUNDING_TIMEOUT_MS: 180_000,
89
- /** Refuse clicks > this many px from the screen edge (sanity check
90
- * against bogus grounding outputs). */
91
- COORD_MARGIN_PX: 0,
92
- });
93
-
94
- export class DesktopTool extends BaseTool {
95
- constructor(config = {}, logger = null) {
96
- super(config, logger);
97
- this.id = 'desktop';
98
- this.name = 'Desktop Control';
99
- this.version = '1.0.0';
100
- this.requiresProject = false;
101
- this.isAsync = true;
102
- // Modest builtin delay so the OS has time to repaint between
103
- // an action and the agent's next screenshot.
104
- this.builtinDelay = 150;
105
-
106
- // Injected at registry time. The tool needs:
107
- // - aiService: source of baseUrl + apiKey for proxy-mode grounding
108
- // - osController: defaults to a fresh createOSController() instance
109
- // but tests inject a stub.
110
- this.aiService = null;
111
- this.osController = config.osController || null;
112
- this._adapterCache = null; // lazy built on first ground()
113
- }
114
-
115
- /** Called by index.js after construction. */
116
- setAIService(aiService) {
117
- this.aiService = aiService;
118
- }
119
-
120
- /** Static factory for tests / explicit DI. */
121
- static withDependencies({ aiService, osController, config = {}, logger = null }) {
122
- const tool = new DesktopTool({ ...config, osController }, logger);
123
- if (aiService) tool.setAIService(aiService);
124
- return tool;
125
- }
126
-
127
- // ─── BaseTool surface ─────────────────────────────────────────────
128
-
129
- getDescription() {
130
- // NOTE: this string lands in the agent's system prompt every turn.
131
- // Keep it tight, action-oriented, and free of meta-commentary like
132
- // "this is beta" / "we're missing safeguards" — agents read those
133
- // disclaimers as instructions to act timidly and the user's UX
134
- // suffers. Operator-facing beta indicators live in the web-UI
135
- // configurator + tool-selector pills, not here.
136
- return `
137
- Desktop Control Tool: drive the user's keyboard, mouse, screen, and
138
- windows like a human. Combines OS-level input with visual grounding
139
- ("click the orange Save button") via a vision LLM.
140
-
141
- USAGE:
142
-
143
- {
144
- "toolId": "desktop",
145
- "parameters": {
146
- "action": "click",
147
- "intent": "the orange Save button"
148
- }
149
- }
150
-
151
- PREFERRED WORKFLOW use intent-driven actions, NOT raw screenshot:
152
-
153
- For "click X" tasks, do NOT call screenshot first and then try to read
154
- the image yourself. You can't see screenshot results in plain text. The
155
- intent-driven actions screenshot + ground + act in one step:
156
-
157
- { "toolId": "desktop", "parameters": {
158
- "action": "click", "intent": "the Netflix icon in the taskbar" }}
159
-
160
- That triggers: screenshot grounding model finds the coords → click.
161
- You only need to look at the screenshot yourself if grounding fails
162
- (rare on legible UIs).
163
-
164
- ACTIONS:
165
- - click (intent OR x,y) single/double click use intent first
166
- - type (text) keyboard input into focused window
167
- - key_press (keys[]) chord like ["Control","S"]
168
- - scroll (intent OR x,y, dy) positive dy scrolls down
169
- - drag (fromIntent/toIntent OR from/to coords)
170
- - list_windows titles + bounds of open windows
171
- - focus_window (titleMatch) bring matching window to front
172
- - screenshot capture screen to disk (returns file
173
- path; you cannot read raw PNG bytes —
174
- use the vision tool on the path if you
175
- must inspect, or prefer intent actions)
176
- - describe_capabilities OS + display server + permission state
177
-
178
- INTENT FAILURE: if grounding can't find the target, you'll get a
179
- GROUNDING_FAILED or COORDS_OUT_OF_BOUNDS error with the model's raw
180
- answer. Sharpen the intent ("the red Save button in the toolbar")
181
- and retry.
182
-
183
- PERMISSION MODEL:
184
- - Every action is OFF by default per-agent until the operator adds
185
- it to toolConfig.desktop.allowedActions in the agent's config.
186
- - LOXIA_DESKTOP_TOOL_DISABLED=1 is a global kill switch (env).
187
- - On Linux Wayland, input actions are blocked by the OS; only
188
- screenshot + list_windows work. The tool surfaces a clear
189
- "Wayland blocks input injection" error for the rest.
190
-
191
- OS NOTES:
192
- - macOS: needs Accessibility + Screen Recording permissions granted
193
- to the Loxia process. Without them you'll see PERMISSION_DENIED.
194
- - First call lazily loads @nut-tree-fork/nut-js (optional dep, ~30MB
195
- native binary). If not installed, every action fails with
196
- NATIVE_UNAVAILABLE install it or run on a different machine.
197
- `.trim();
198
- }
199
-
200
- parseParameters(content) {
201
- try {
202
- const trimmed = (content || '').trim();
203
- if (trimmed.startsWith('{')) {
204
- const parsed = JSON.parse(trimmed);
205
- return parsed.parameters || parsed;
206
- }
207
- // No XML alternate form — desktop actions are too varied to
208
- // hand-author. JSON-only keeps the surface honest.
209
- throw new Error('desktop tool requires JSON parameters');
210
- } catch (err) {
211
- throw new Error(`Failed to parse desktop parameters: ${err.message}`);
212
- }
213
- }
214
-
215
- getSupportedActions() {
216
- return Object.values(DesktopAction);
217
- }
218
-
219
- getRequiredParameters() {
220
- return ['action'];
221
- }
222
-
223
- // ─── execute ──────────────────────────────────────────────────────
224
-
225
- async execute(params, context) {
226
- // 1. Kill switch
227
- if (process.env[ENV_KILL_FLAG] === '1') {
228
- return this._fail('DESKTOP_DISABLED',
229
- 'Desktop tool disabled via LOXIA_DESKTOP_TOOL_DISABLED.');
230
- }
231
-
232
- const action = params?.action;
233
- if (!action || !Object.values(DesktopAction).includes(action)) {
234
- return this._fail('INVALID_ACTION',
235
- `unknown action "${action}". Valid: ${Object.values(DesktopAction).join(', ')}`);
236
- }
237
-
238
- // 2. Per-agent allowlist
239
- const effective = this.getEffectiveConfig(context, Defaults);
240
- const allowed = effective.allowedActions || Defaults.ALLOWED_ACTIONS;
241
- if (!Array.isArray(allowed) || !allowed.includes(action)) {
242
- return this._fail('NOT_PERMITTED',
243
- `action "${action}" not in this agent's allowedActions ` +
244
- `(set toolConfig.desktop.allowedActions to enable).`);
245
- }
246
-
247
- // 3. Dispatch
248
- const osc = this._osc();
249
- try {
250
- switch (action) {
251
- case DesktopAction.SCREENSHOT: return await this._actScreenshot(osc, params);
252
- case DesktopAction.CLICK: return await this._actClick(osc, effective, params, context);
253
- case DesktopAction.TYPE: return await this._actType(osc, params);
254
- case DesktopAction.KEY_PRESS: return await this._actKeyPress(osc, params);
255
- case DesktopAction.SCROLL: return await this._actScroll(osc, effective, params, context);
256
- case DesktopAction.DRAG: return await this._actDrag(osc, effective, params, context);
257
- case DesktopAction.LIST_WINDOWS: return await this._actListWindows(osc);
258
- case DesktopAction.FOCUS_WINDOW: return await this._actFocusWindow(osc, params);
259
- case DesktopAction.DESCRIBE_CAPS: return await this._actDescribeCaps(osc);
260
- }
261
- } catch (err) {
262
- return this._fail(err.code || 'OP_FAILED', err.message, action);
263
- }
264
- }
265
-
266
- // ─── actions ──────────────────────────────────────────────────────
267
-
268
- async _actScreenshot(osc, params) {
269
- const png = await osc.screenshot({ region: params.region });
270
- const size = await osc.screenSize();
271
- // Tag the action in the output. Without this, a screenshot success
272
- // result is indistinguishable from a click/scroll/drag SUCCESS when
273
- // the agent reads the tool result text and out-of-order batched
274
- // returns can land a stale screenshot success right where a click
275
- // success would normally appear. Putting "[action: screenshot]" up
276
- // front makes the action explicit so the model can't mistake it for
277
- // a click confirmation.
278
- // Save to a temp file rather than inlining base64 into the tool
279
- // result. A 200 KB PNG becomes ~290 KB of base64 — stuffing that
280
- // into the conversation as a text tool-result derails the next
281
- // model turn (it sees a wall of characters, not an image, and
282
- // loses the original task; streaming often aborts mid-scan).
283
- // Returning a path lets vision-capable downstream tools open it
284
- // properly, and keeps the conversation token count sane.
285
- const filePath = await this._saveScreenshot(png);
286
- this._audit('screenshot', { size, bytes: png.length, filePath });
287
- return {
288
- success: true,
289
- action: 'screenshot',
290
- output:
291
- `[action: screenshot] Captured ${size.width}x${size.height} screenshot ` +
292
- `(${png.length} bytes) → ${filePath}\n` +
293
- `NOTE: a successful screenshot does NOT mean an earlier click/scroll/drag ` +
294
- `succeeded — those have separate results tagged "[action: click]" etc. ` +
295
- `Look at the most recent action-tagged result for the action you actually called.\n` +
296
- `To inspect the image use the vision tool with this path, or just call ` +
297
- `click/scroll/drag with an "intent" (one-step: screenshot + ground + act).`,
298
- screenshotPath: filePath,
299
- bytes: png.length,
300
- size,
301
- };
302
- }
303
-
304
- /**
305
- * Persist a captured PNG to disk. We use the OS temp dir under a
306
- * stable subfolder so old shots are easy to clean up by hand, and
307
- * a millisecond-precise filename so concurrent captures don't
308
- * collide. Returning the file path (not the bytes) is what keeps
309
- * the conversation text-size sane — see _actScreenshot.
310
- */
311
- async _saveScreenshot(png) {
312
- const dir = join(tmpdir(), 'loxia-desktop-screenshots');
313
- await mkdir(dir, { recursive: true });
314
- const filePath = join(dir, `screenshot-${Date.now()}.png`);
315
- await writeFile(filePath, png);
316
- return filePath;
317
- }
318
-
319
- async _actClick(osc, effective, params, context) {
320
- const { x, y, groundedFrom } = await this._resolveCoords(osc, effective, params, context);
321
- await osc.mouseClick(x, y, { button: params.button, count: params.count });
322
- this._audit('click', { x, y, button: params.button || 'left', count: params.count || 1, groundedFrom });
323
- return {
324
- success: true,
325
- action: 'click',
326
- output: `[action: click] Clicked at (${x}, ${y})${groundedFrom ? ` — grounded from "${groundedFrom}"` : ''}.`,
327
- coords: { x, y },
328
- groundedFrom,
329
- };
330
- }
331
-
332
- async _actType(osc, params) {
333
- if (typeof params.text !== 'string' || params.text.length === 0) {
334
- return this._fail('INVALID_INPUT', 'type action requires non-empty "text"', 'type');
335
- }
336
- await osc.typeText(params.text, { delayMs: params.delayMs });
337
- this._audit('type', { chars: params.text.length });
338
- return {
339
- success: true,
340
- action: 'type',
341
- output: `[action: type] Typed ${params.text.length} characters.`,
342
- };
343
- }
344
-
345
- async _actKeyPress(osc, params) {
346
- if (!Array.isArray(params.keys) || params.keys.length === 0) {
347
- return this._fail('INVALID_INPUT', 'key_press requires non-empty "keys" array', 'key_press');
348
- }
349
- await osc.keyPress(params.keys);
350
- this._audit('key_press', { keys: params.keys });
351
- return {
352
- success: true,
353
- action: 'key_press',
354
- output: `[action: key_press] Pressed ${params.keys.join('+')}.`,
355
- };
356
- }
357
-
358
- async _actScroll(osc, effective, params, context) {
359
- if (!Number.isFinite(params.dy)) {
360
- return this._fail('INVALID_INPUT', 'scroll requires numeric "dy"', 'scroll');
361
- }
362
- const { x, y, groundedFrom } = await this._resolveCoords(osc, effective, params, context);
363
- await osc.mouseScroll(x, y, params.dy);
364
- this._audit('scroll', { x, y, dy: params.dy, groundedFrom });
365
- return {
366
- success: true,
367
- action: 'scroll',
368
- output: `[action: scroll] Scrolled ${params.dy > 0 ? 'down' : 'up'} ${Math.abs(params.dy)} at (${x}, ${y}).`,
369
- coords: { x, y },
370
- groundedFrom,
371
- };
372
- }
373
-
374
- async _actDrag(osc, effective, params, context) {
375
- // Drag accepts either two coord pairs OR two intents. The two ends
376
- // are grounded independently with the SAME screenshot — avoids two
377
- // model calls when both intents reference the same view.
378
- const from = await this._resolveCoords(
379
- osc, effective,
380
- { x: params.from?.x, y: params.from?.y, intent: params.fromIntent },
381
- context,
382
- );
383
- const to = await this._resolveCoords(
384
- osc, effective,
385
- { x: params.to?.x, y: params.to?.y, intent: params.toIntent },
386
- context,
387
- from._sharedScreenshot,
388
- );
389
- await osc.mouseDrag({ x: from.x, y: from.y }, { x: to.x, y: to.y }, { button: params.button });
390
- this._audit('drag', { from, to });
391
- return {
392
- success: true,
393
- action: 'drag',
394
- output: `[action: drag] Dragged from (${from.x}, ${from.y}) to (${to.x}, ${to.y}).`,
395
- from: { x: from.x, y: from.y },
396
- to: { x: to.x, y: to.y },
397
- };
398
- }
399
-
400
- async _actListWindows(osc) {
401
- const raw = await osc.listWindows();
402
- // Most platforms (Windows especially) report hundreds of OS-internal
403
- // handles with empty titles. Surfacing the full list bloats the
404
- // conversation context (>200 KB observed on Win11 with ~1300
405
- // handles) and the model loses the original task in the noise.
406
- // Filter to titled windows and cap to a sensible top-N. Total count
407
- // stays in the output so the agent knows truncation happened.
408
- const titled = raw.filter(w => w.title && w.title.trim().length > 0);
409
- const MAX_LIST = 50;
410
- const top = titled.slice(0, MAX_LIST);
411
- const truncated = titled.length > MAX_LIST;
412
- this._audit('list_windows', { total: raw.length, titled: titled.length, returned: top.length });
413
- return {
414
- success: true,
415
- action: 'list_windows',
416
- output:
417
- `[action: list_windows] ${titled.length} titled window(s)` +
418
- (raw.length !== titled.length ? ` (filtered ${raw.length - titled.length} untitled handles)` : '') +
419
- (truncated ? ` — showing first ${MAX_LIST}` : '') + ': ' +
420
- top.map(w => `"${w.title}"`).join(', '),
421
- windows: top,
422
- totalCount: raw.length,
423
- titledCount: titled.length,
424
- truncated,
425
- };
426
- }
427
-
428
- async _actFocusWindow(osc, params) {
429
- if (!params.titleMatch) {
430
- return this._fail('INVALID_INPUT', 'focus_window requires "titleMatch"', 'focus_window');
431
- }
432
- const r = await osc.focusWindow({ titleMatch: params.titleMatch });
433
- this._audit('focus_window', r);
434
- return {
435
- success: r.focused,
436
- action: 'focus_window',
437
- output: r.focused
438
- ? `[action: focus_window] Focused window: "${r.title}".`
439
- : `[action: focus_window] No window matched "${params.titleMatch}".`,
440
- ...r,
441
- };
442
- }
443
-
444
- async _actDescribeCaps(osc) {
445
- const caps = await osc.describeCapabilities();
446
- return {
447
- success: true,
448
- action: 'describe_capabilities',
449
- output: `[action: describe_capabilities] OS: ${caps.os}${caps.display ? ` / ${caps.display}` : ''}; ` +
450
- `screenshot=${caps.screenshot} mouseInput=${caps.mouseInput} ` +
451
- `keyboardInput=${caps.keyboardInput} windowFocus=${caps.windowFocus}` +
452
- (caps.degradedReason ? `\nDegraded: ${caps.degradedReason}` : ''),
453
- capabilities: caps,
454
- };
455
- }
456
-
457
- // ─── helpers ──────────────────────────────────────────────────────
458
-
459
- /**
460
- * Resolve (x, y) for an action. Three input shapes:
461
- *
462
- * 1. {x, y} — used as-is after validation
463
- * 2. {intent: '...'} — ask the grounding model
464
- * 3. neither — INVALID_INPUT
465
- *
466
- * When grounding fires, the screenshot is returned on the result so
467
- * the caller (drag) can reuse it for the second coord without an
468
- * extra capture.
469
- */
470
- async _resolveCoords(osc, effective, params, context, reuseScreenshot = null) {
471
- if (Number.isFinite(params.x) && Number.isFinite(params.y)) {
472
- return { x: params.x, y: params.y, groundedFrom: null };
473
- }
474
- if (typeof params.intent === 'string' && params.intent.trim()) {
475
- const size = await osc.screenSize();
476
- const screenshot = reuseScreenshot || await osc.screenshot();
477
- const adapter = await this._adapter(effective, context);
478
- const result = await this._groundWithFallback(adapter, {
479
- screenshot,
480
- intent: params.intent,
481
- imageSize: { width: size.width, height: size.height },
482
- knobs: { reasoning_effort: effective.groundingEffort || Defaults.GROUNDING_EFFORT },
483
- timeoutMs: effective.groundingTimeoutMs || Defaults.GROUNDING_TIMEOUT_MS,
484
- }, effective, context);
485
- if (!result.coords) {
486
- const err = new Error(`grounding produced no coords; model said: "${(result.answer || '').slice(0, 200)}"`);
487
- err.code = 'GROUNDING_FAILED';
488
- throw err;
489
- }
490
- const { x, y } = result.coords;
491
- if (!this._coordsInScreen(x, y, size, effective.coordMarginPx ?? Defaults.COORD_MARGIN_PX)) {
492
- const err = new Error(`grounded coords (${x}, ${y}) lie outside the ${size.width}x${size.height} screen`);
493
- err.code = 'COORDS_OUT_OF_BOUNDS';
494
- throw err;
495
- }
496
- return { x, y, groundedFrom: params.intent, _sharedScreenshot: screenshot };
497
- }
498
- const err = new Error('action requires either (x, y) or "intent"');
499
- err.code = 'INVALID_INPUT';
500
- throw err;
501
- }
502
-
503
- _coordsInScreen(x, y, size, margin) {
504
- return x >= margin && y >= margin
505
- && x <= size.width - margin
506
- && y <= size.height - margin;
507
- }
508
-
509
- /**
510
- * Lazy-build the grounding adapter. The model id + transport mode
511
- * come from toolConfig so an operator can swap Kimi → some future
512
- * model without code changes.
513
- */
514
- async _adapter(effective, context) {
515
- if (this._adapterCache) return this._adapterCache;
516
- this._adapterCache = this._buildAdapter(effective, context);
517
- return this._adapterCache;
518
- }
519
-
520
- _buildAdapter(effective, context) {
521
- const modelId = effective.groundingModelId || Defaults.GROUNDING_MODEL;
522
-
523
- const foundryEndpoint = process.env.FOUNDRY_ENDPOINT;
524
- const foundryKey = process.env.FOUNDRY_KEY;
525
- const forceDirect = process.env.LOXIA_GROUNDING_DIRECT === '1';
526
- const haveDirectCreds = !!(foundryEndpoint && foundryKey);
527
-
528
- // Mode selection. In order of preference:
529
- // 1. forceDirect env flag direct (test / CI)
530
- // 2. direct creds set AND no aiService → direct (headless / bench)
531
- // 3. otherwise → proxy via the Loxia backend (production path)
532
- // A failed proxy request (e.g. 404 because /llm/grounding isn't
533
- // deployed yet) automatically retries once in direct mode if creds
534
- // exist — `ground()` does the fallback below.
535
- if (forceDirect || (!this.aiService && haveDirectCreds)) {
536
- if (!haveDirectCreds) {
537
- throw Object.assign(new Error(
538
- 'Direct mode requested but FOUNDRY_ENDPOINT / FOUNDRY_KEY env vars are not set.'
539
- ), { code: 'NOT_INITIALISED' });
540
- }
541
- return createGroundingModel(modelId, {
542
- mode: 'direct',
543
- endpoint: foundryEndpoint,
544
- apiKey: foundryKey,
545
- });
546
- }
547
- if (!this.aiService) {
548
- throw Object.assign(new Error(
549
- 'DesktopTool has no grounding transport. Either set FOUNDRY_ENDPOINT + ' +
550
- 'FOUNDRY_KEY env vars (direct mode) OR run a Loxia backend that has ' +
551
- 'POST /llm/grounding deployed (proxy mode).'
552
- ), { code: 'NOT_INITIALISED' });
553
- }
554
- return createGroundingModel(modelId, {
555
- mode: 'proxy',
556
- backendUrl: this.aiService.baseUrl,
557
- userApiKey: this._resolveUserKey(context),
558
- });
559
- }
560
-
561
- /**
562
- * Wraps the adapter's ground() with one automatic fallback to direct
563
- * mode when proxy returns 404 (route not deployed yet) AND direct
564
- * credentials are available in env. Saves the user from "deploy the
565
- * backend before you can fun" friction during the rollout window.
566
- */
567
- async _groundWithFallback(adapter, request, effective, context) {
568
- try {
569
- return await adapter.ground(request);
570
- } catch (err) {
571
- const is404 = /\b404\b/.test(err.message || '') || /\bNot Found\b/i.test(err.message || '');
572
- const isProxy = adapter.mode === 'proxy';
573
- const haveDirectCreds = !!(process.env.FOUNDRY_ENDPOINT && process.env.FOUNDRY_KEY);
574
- if (!is404 || !isProxy || !haveDirectCreds) throw err;
575
-
576
- this.logger?.warn?.(
577
- '[desktop] proxy /llm/grounding returned 404 — falling back to direct Foundry. ' +
578
- 'Deploy the backend route to get billing + audit back.',
579
- );
580
- const directAdapter = createGroundingModel(
581
- effective.groundingModelId || Defaults.GROUNDING_MODEL,
582
- { mode: 'direct', endpoint: process.env.FOUNDRY_ENDPOINT, apiKey: process.env.FOUNDRY_KEY },
583
- );
584
- // Replace cache so subsequent calls go straight to direct.
585
- this._adapterCache = directAdapter;
586
- return await directAdapter.ground(request);
587
- }
588
- }
589
-
590
- _resolveUserKey(context) {
591
- // Pull the per-session Loxia token the same way other tools do.
592
- const km = this.aiService?.apiKeyManager;
593
- if (km && typeof km.getKeysForRequest === 'function') {
594
- const keys = km.getKeysForRequest(context?.sessionId, {
595
- platformProvided: context?.platformProvided !== false,
596
- });
597
- if (keys?.loxiaApiKey) return keys.loxiaApiKey;
598
- }
599
- return this.aiService?.config?.apiKey || process.env.LOXIA_API_KEY || null;
600
- }
601
-
602
- /**
603
- * Lazy-create an OS controller if the constructor didn't get one.
604
- * Production path: created on demand. Test path: injected at ctor.
605
- */
606
- _osc() {
607
- if (!this.osController) this.osController = createOSController();
608
- return this.osController;
609
- }
610
-
611
- _audit(action, meta) {
612
- this.logger?.info?.(`[desktop] ${action}`, { tool: this.id, action, ...meta });
613
- }
614
-
615
- /**
616
- * Build a structured failure result. `action` is optional but
617
- * strongly preferred — it lets the agent distinguish "click failed"
618
- * from "screenshot failed" at a glance when results arrive
619
- * out-of-order in a batched message, which is the only way it can
620
- * recover the correct mental model of what happened.
621
- */
622
- _fail(code, message, action = null) {
623
- this.logger?.warn?.(`[desktop] ${code}: ${message}`);
624
- const prefix = action ? `[action: ${action}] ` : '';
625
- return {
626
- success: false,
627
- ...(action ? { action } : {}),
628
- error: message,
629
- code,
630
- output: `${prefix}Desktop action failed (${code}): ${message}`,
631
- };
632
- }
633
- }
634
-
635
- // Re-export OSError so callers can match on it without two imports.
636
- export { OSError };
637
-
638
- export default DesktopTool;
1
+ /**
2
+ * DesktopTool — agent-facing desktop-control surface.
3
+ *
4
+ * Pairs the OS controller (mouse / keyboard / screenshot / windows) with
5
+ * a pluggable visual-grounding model (default: Kimi K2.6) so an agent
6
+ * can say "click the orange Save button" and the tool figures out
7
+ * `(x, y)` from a fresh screenshot before the click.
8
+ *
9
+ * Five guardrails are layered into every execute() call:
10
+ *
11
+ * 1. CAPABILITY GATE - osController refuses ops the OS can't do
12
+ * (e.g. mouseInput on Wayland) and surfaces
13
+ * an operator-friendly remediation message.
14
+ *
15
+ * 2. PER-AGENT ALLOWLIST - toolConfig.allowedActions lets an operator
16
+ * disable any subset of actions per agent
17
+ * (e.g. read-only screenshot bot vs. full
18
+ * control). Default: ALL DISABLED — the
19
+ * agent has to be explicitly granted control.
20
+ *
21
+ * 3. KILL SWITCH - LOXIA_DESKTOP_TOOL_DISABLED=1 env flag
22
+ * short-circuits every call with a clear
23
+ * error. Zero-source kill in production.
24
+ *
25
+ * 4. AUDIT TRAIL - every action emits a structured log line
26
+ * with operation id + grounded coords so
27
+ * screen-recording-free postmortems work.
28
+ *
29
+ * 5. INTENT GROUNDING - "click" / "scroll" / "drag" accept either
30
+ * raw (x, y) OR a natural-language `intent`.
31
+ * With intent, the tool grabs a screenshot,
32
+ * asks the grounding model, validates the
33
+ * returned coords fit the screen, and
34
+ * ONLY THEN moves. Failed grounding never
35
+ * silently clicks somewhere random.
36
+ *
37
+ * Model is swappable via toolConfig.groundingModelId (any id registered
38
+ * in src/services/grounding/registry.js). Backend proxy vs. direct
39
+ * Foundry is also config — see _buildAdapter below.
40
+ */
41
+
42
+ import { writeFile, mkdir } from 'node:fs/promises';
43
+ import { join } from 'node:path';
44
+ import { tmpdir } from 'node:os';
45
+ import { BaseTool } from '../baseTool.js';
46
+ import { createOSController, OSError } from './osController.js';
47
+ import {
48
+ createGroundingModel,
49
+ ModelId,
50
+ DEFAULT_REASONING_EFFORT,
51
+ } from '../../services/grounding/index.js';
52
+
53
+ /** Every action the tool can perform. */
54
+ export const DesktopAction = Object.freeze({
55
+ SCREENSHOT: 'screenshot',
56
+ CLICK: 'click',
57
+ TYPE: 'type',
58
+ KEY_PRESS: 'key_press',
59
+ SCROLL: 'scroll',
60
+ DRAG: 'drag',
61
+ LIST_WINDOWS: 'list_windows',
62
+ FOCUS_WINDOW: 'focus_window',
63
+ DESCRIBE_CAPS: 'describe_capabilities',
64
+ });
65
+
66
+ /** Env-level kill switch — checked on every call. */
67
+ const ENV_KILL_FLAG = 'LOXIA_DESKTOP_TOOL_DISABLED';
68
+
69
+ /** Defaults — overridable via per-agent toolConfig.desktop or this.config. */
70
+ const Defaults = Object.freeze({
71
+ /** Default actions an agent gets WHEN allowedActions is unset.
72
+ * Empty list = no control. Operator must opt in per agent. */
73
+ ALLOWED_ACTIONS: [],
74
+ /** Visual-grounding model (registry id). */
75
+ GROUNDING_MODEL: ModelId.KIMI_K2_6,
76
+ /** Grounding reasoning effort. */
77
+ GROUNDING_EFFORT: DEFAULT_REASONING_EFFORT,
78
+ /** Hard cap on a single grounded coord lookup. Catches hangs. */
79
+ GROUNDING_TIMEOUT_MS: 180_000,
80
+ /** Refuse clicks > this many px from the screen edge (sanity check
81
+ * against bogus grounding outputs). */
82
+ COORD_MARGIN_PX: 0,
83
+ });
84
+
85
+ export class DesktopTool extends BaseTool {
86
+ constructor(config = {}, logger = null) {
87
+ super(config, logger);
88
+ this.id = 'desktop';
89
+ this.name = 'Desktop Control';
90
+ this.version = '1.0.0';
91
+ this.requiresProject = false;
92
+ this.isAsync = true;
93
+ // Modest builtin delay so the OS has time to repaint between
94
+ // an action and the agent's next screenshot.
95
+ this.builtinDelay = 150;
96
+
97
+ // Injected at registry time. The tool needs:
98
+ // - aiService: source of baseUrl + apiKey for proxy-mode grounding
99
+ // - osController: defaults to a fresh createOSController() instance
100
+ // but tests inject a stub.
101
+ this.aiService = null;
102
+ this.osController = config.osController || null;
103
+ this._adapterCache = null; // lazy built on first ground()
104
+ }
105
+
106
+ /** Called by index.js after construction. */
107
+ setAIService(aiService) {
108
+ this.aiService = aiService;
109
+ }
110
+
111
+ /** Static factory for tests / explicit DI. */
112
+ static withDependencies({ aiService, osController, config = {}, logger = null }) {
113
+ const tool = new DesktopTool({ ...config, osController }, logger);
114
+ if (aiService) tool.setAIService(aiService);
115
+ return tool;
116
+ }
117
+
118
+ // ─── BaseTool surface ─────────────────────────────────────────────
119
+
120
+ getDescription() {
121
+ // NOTE: this string lands in the agent's system prompt every turn.
122
+ // Keep it tight, action-oriented, and free of meta-commentary like
123
+ // "this is beta" / "we're missing safeguards" — agents read those
124
+ // disclaimers as instructions to act timidly and the user's UX
125
+ // suffers. Operator-facing beta indicators live in the web-UI
126
+ // configurator + tool-selector pills, not here.
127
+ return `
128
+ Desktop Control Tool: drive the user's keyboard, mouse, screen, and
129
+ windows like a human. Combines OS-level input with visual grounding
130
+ ("click the orange Save button") via a vision LLM.
131
+
132
+ USAGE:
133
+
134
+ {
135
+ "toolId": "desktop",
136
+ "parameters": {
137
+ "action": "click",
138
+ "intent": "the orange Save button"
139
+ }
140
+ }
141
+
142
+ PREFERRED WORKFLOW — use intent-driven actions, NOT raw screenshot:
143
+
144
+ For "click X" tasks, do NOT call screenshot first and then try to read
145
+ the image yourself. You can't see screenshot results in plain text. The
146
+ intent-driven actions screenshot + ground + act in one step:
147
+
148
+ { "toolId": "desktop", "parameters": {
149
+ "action": "click", "intent": "the Netflix icon in the taskbar" }}
150
+
151
+ That triggers: screenshot grounding model finds the coords → click.
152
+ You only need to look at the screenshot yourself if grounding fails
153
+ (rare on legible UIs).
154
+
155
+ ACTIONS:
156
+ - click (intent OR x,y) single/double click — use intent first
157
+ - type (text) keyboard input into focused window
158
+ - key_press (keys[]) chord like ["Control","S"]
159
+ - scroll (intent OR x,y, dy) positive dy scrolls down
160
+ - drag (fromIntent/toIntent OR from/to coords)
161
+ - list_windows titles + bounds of open windows
162
+ - focus_window (titleMatch) bring matching window to front
163
+ - screenshot capture screen to disk (returns file
164
+ path; you cannot read raw PNG bytes —
165
+ use the vision tool on the path if you
166
+ must inspect, or prefer intent actions)
167
+ - describe_capabilities OS + display server + permission state
168
+
169
+ INTENT FAILURE: if grounding can't find the target, you'll get a
170
+ GROUNDING_FAILED or COORDS_OUT_OF_BOUNDS error with the model's raw
171
+ answer. Sharpen the intent ("the red Save button in the toolbar")
172
+ and retry.
173
+
174
+ PERMISSION MODEL:
175
+ - Every action is OFF by default per-agent until the operator adds
176
+ it to toolConfig.desktop.allowedActions in the agent's config.
177
+ - LOXIA_DESKTOP_TOOL_DISABLED=1 is a global kill switch (env).
178
+ - On Linux Wayland, input actions are blocked by the OS; only
179
+ screenshot + list_windows work. The tool surfaces a clear
180
+ "Wayland blocks input injection" error for the rest.
181
+
182
+ OS NOTES:
183
+ - macOS: needs Accessibility + Screen Recording permissions granted
184
+ to the Loxia process. Without them you'll see PERMISSION_DENIED.
185
+ - First call lazily loads @nut-tree-fork/nut-js (optional dep, ~30MB
186
+ native binary). If not installed, every action fails with
187
+ NATIVE_UNAVAILABLE install it or run on a different machine.
188
+ `.trim();
189
+ }
190
+
191
+ parseParameters(content) {
192
+ try {
193
+ const trimmed = (content || '').trim();
194
+ if (trimmed.startsWith('{')) {
195
+ const parsed = JSON.parse(trimmed);
196
+ return parsed.parameters || parsed;
197
+ }
198
+ // No XML alternate form — desktop actions are too varied to
199
+ // hand-author. JSON-only keeps the surface honest.
200
+ throw new Error('desktop tool requires JSON parameters');
201
+ } catch (err) {
202
+ throw new Error(`Failed to parse desktop parameters: ${err.message}`, { cause: err });
203
+ }
204
+ }
205
+
206
+ getSupportedActions() {
207
+ return Object.values(DesktopAction);
208
+ }
209
+
210
+ getRequiredParameters() {
211
+ return ['action'];
212
+ }
213
+
214
+ // ─── execute ──────────────────────────────────────────────────────
215
+
216
+ async execute(params, context) {
217
+ // 1. Kill switch
218
+ if (process.env[ENV_KILL_FLAG] === '1') {
219
+ return this._fail('DESKTOP_DISABLED',
220
+ 'Desktop tool disabled via LOXIA_DESKTOP_TOOL_DISABLED.');
221
+ }
222
+
223
+ const action = params?.action;
224
+ if (!action || !Object.values(DesktopAction).includes(action)) {
225
+ return this._fail('INVALID_ACTION',
226
+ `unknown action "${action}". Valid: ${Object.values(DesktopAction).join(', ')}`);
227
+ }
228
+
229
+ // 2. Per-agent allowlist
230
+ const effective = this.getEffectiveConfig(context, Defaults);
231
+ const allowed = effective.allowedActions || Defaults.ALLOWED_ACTIONS;
232
+ if (!Array.isArray(allowed) || !allowed.includes(action)) {
233
+ return this._fail('NOT_PERMITTED',
234
+ `action "${action}" not in this agent's allowedActions ` +
235
+ `(set toolConfig.desktop.allowedActions to enable).`);
236
+ }
237
+
238
+ // 3. Dispatch
239
+ const osc = this._osc();
240
+ try {
241
+ switch (action) {
242
+ case DesktopAction.SCREENSHOT: return await this._actScreenshot(osc, params);
243
+ case DesktopAction.CLICK: return await this._actClick(osc, effective, params, context);
244
+ case DesktopAction.TYPE: return await this._actType(osc, params);
245
+ case DesktopAction.KEY_PRESS: return await this._actKeyPress(osc, params);
246
+ case DesktopAction.SCROLL: return await this._actScroll(osc, effective, params, context);
247
+ case DesktopAction.DRAG: return await this._actDrag(osc, effective, params, context);
248
+ case DesktopAction.LIST_WINDOWS: return await this._actListWindows(osc);
249
+ case DesktopAction.FOCUS_WINDOW: return await this._actFocusWindow(osc, params);
250
+ case DesktopAction.DESCRIBE_CAPS: return await this._actDescribeCaps(osc);
251
+ }
252
+ } catch (err) {
253
+ return this._fail(err.code || 'OP_FAILED', err.message, action);
254
+ }
255
+ }
256
+
257
+ // ─── actions ──────────────────────────────────────────────────────
258
+
259
+ async _actScreenshot(osc, params) {
260
+ const png = await osc.screenshot({ region: params.region });
261
+ const size = await osc.screenSize();
262
+ // Tag the action in the output. Without this, a screenshot success
263
+ // result is indistinguishable from a click/scroll/drag SUCCESS when
264
+ // the agent reads the tool result text — and out-of-order batched
265
+ // returns can land a stale screenshot success right where a click
266
+ // success would normally appear. Putting "[action: screenshot]" up
267
+ // front makes the action explicit so the model can't mistake it for
268
+ // a click confirmation.
269
+ // Save to a temp file rather than inlining base64 into the tool
270
+ // result. A 200 KB PNG becomes ~290 KB of base64 — stuffing that
271
+ // into the conversation as a text tool-result derails the next
272
+ // model turn (it sees a wall of characters, not an image, and
273
+ // loses the original task; streaming often aborts mid-scan).
274
+ // Returning a path lets vision-capable downstream tools open it
275
+ // properly, and keeps the conversation token count sane.
276
+ const filePath = await this._saveScreenshot(png);
277
+ this._audit('screenshot', { size, bytes: png.length, filePath });
278
+ return {
279
+ success: true,
280
+ action: 'screenshot',
281
+ output:
282
+ `[action: screenshot] Captured ${size.width}x${size.height} screenshot ` +
283
+ `(${png.length} bytes) ${filePath}\n` +
284
+ `NOTE: a successful screenshot does NOT mean an earlier click/scroll/drag ` +
285
+ `succeeded those have separate results tagged "[action: click]" etc. ` +
286
+ `Look at the most recent action-tagged result for the action you actually called.\n` +
287
+ `To inspect the image use the vision tool with this path, or just call ` +
288
+ `click/scroll/drag with an "intent" (one-step: screenshot + ground + act).`,
289
+ screenshotPath: filePath,
290
+ bytes: png.length,
291
+ size,
292
+ };
293
+ }
294
+
295
+ /**
296
+ * Persist a captured PNG to disk. We use the OS temp dir under a
297
+ * stable subfolder so old shots are easy to clean up by hand, and
298
+ * a millisecond-precise filename so concurrent captures don't
299
+ * collide. Returning the file path (not the bytes) is what keeps
300
+ * the conversation text-size sane — see _actScreenshot.
301
+ */
302
+ async _saveScreenshot(png) {
303
+ const dir = join(tmpdir(), 'loxia-desktop-screenshots');
304
+ await mkdir(dir, { recursive: true });
305
+ const filePath = join(dir, `screenshot-${Date.now()}.png`);
306
+ await writeFile(filePath, png);
307
+ return filePath;
308
+ }
309
+
310
+ async _actClick(osc, effective, params, context) {
311
+ const { x, y, groundedFrom } = await this._resolveCoords(osc, effective, params, context);
312
+ await osc.mouseClick(x, y, { button: params.button, count: params.count });
313
+ this._audit('click', { x, y, button: params.button || 'left', count: params.count || 1, groundedFrom });
314
+ return {
315
+ success: true,
316
+ action: 'click',
317
+ output: `[action: click] Clicked at (${x}, ${y})${groundedFrom ? ` — grounded from "${groundedFrom}"` : ''}.`,
318
+ coords: { x, y },
319
+ groundedFrom,
320
+ };
321
+ }
322
+
323
+ async _actType(osc, params) {
324
+ if (typeof params.text !== 'string' || params.text.length === 0) {
325
+ return this._fail('INVALID_INPUT', 'type action requires non-empty "text"', 'type');
326
+ }
327
+ await osc.typeText(params.text, { delayMs: params.delayMs });
328
+ this._audit('type', { chars: params.text.length });
329
+ return {
330
+ success: true,
331
+ action: 'type',
332
+ output: `[action: type] Typed ${params.text.length} characters.`,
333
+ };
334
+ }
335
+
336
+ async _actKeyPress(osc, params) {
337
+ if (!Array.isArray(params.keys) || params.keys.length === 0) {
338
+ return this._fail('INVALID_INPUT', 'key_press requires non-empty "keys" array', 'key_press');
339
+ }
340
+ await osc.keyPress(params.keys);
341
+ this._audit('key_press', { keys: params.keys });
342
+ return {
343
+ success: true,
344
+ action: 'key_press',
345
+ output: `[action: key_press] Pressed ${params.keys.join('+')}.`,
346
+ };
347
+ }
348
+
349
+ async _actScroll(osc, effective, params, context) {
350
+ if (!Number.isFinite(params.dy)) {
351
+ return this._fail('INVALID_INPUT', 'scroll requires numeric "dy"', 'scroll');
352
+ }
353
+ const { x, y, groundedFrom } = await this._resolveCoords(osc, effective, params, context);
354
+ await osc.mouseScroll(x, y, params.dy);
355
+ this._audit('scroll', { x, y, dy: params.dy, groundedFrom });
356
+ return {
357
+ success: true,
358
+ action: 'scroll',
359
+ output: `[action: scroll] Scrolled ${params.dy > 0 ? 'down' : 'up'} ${Math.abs(params.dy)} at (${x}, ${y}).`,
360
+ coords: { x, y },
361
+ groundedFrom,
362
+ };
363
+ }
364
+
365
+ async _actDrag(osc, effective, params, context) {
366
+ // Drag accepts either two coord pairs OR two intents. The two ends
367
+ // are grounded independently with the SAME screenshot — avoids two
368
+ // model calls when both intents reference the same view.
369
+ const from = await this._resolveCoords(
370
+ osc, effective,
371
+ { x: params.from?.x, y: params.from?.y, intent: params.fromIntent },
372
+ context,
373
+ );
374
+ const to = await this._resolveCoords(
375
+ osc, effective,
376
+ { x: params.to?.x, y: params.to?.y, intent: params.toIntent },
377
+ context,
378
+ from._sharedScreenshot,
379
+ );
380
+ await osc.mouseDrag({ x: from.x, y: from.y }, { x: to.x, y: to.y }, { button: params.button });
381
+ this._audit('drag', { from, to });
382
+ return {
383
+ success: true,
384
+ action: 'drag',
385
+ output: `[action: drag] Dragged from (${from.x}, ${from.y}) to (${to.x}, ${to.y}).`,
386
+ from: { x: from.x, y: from.y },
387
+ to: { x: to.x, y: to.y },
388
+ };
389
+ }
390
+
391
+ async _actListWindows(osc) {
392
+ const raw = await osc.listWindows();
393
+ // Most platforms (Windows especially) report hundreds of OS-internal
394
+ // handles with empty titles. Surfacing the full list bloats the
395
+ // conversation context (>200 KB observed on Win11 with ~1300
396
+ // handles) and the model loses the original task in the noise.
397
+ // Filter to titled windows and cap to a sensible top-N. Total count
398
+ // stays in the output so the agent knows truncation happened.
399
+ const titled = raw.filter(w => w.title && w.title.trim().length > 0);
400
+ const MAX_LIST = 50;
401
+ const top = titled.slice(0, MAX_LIST);
402
+ const truncated = titled.length > MAX_LIST;
403
+ this._audit('list_windows', { total: raw.length, titled: titled.length, returned: top.length });
404
+ return {
405
+ success: true,
406
+ action: 'list_windows',
407
+ output:
408
+ `[action: list_windows] ${titled.length} titled window(s)` +
409
+ (raw.length !== titled.length ? ` (filtered ${raw.length - titled.length} untitled handles)` : '') +
410
+ (truncated ? ` showing first ${MAX_LIST}` : '') + ': ' +
411
+ top.map(w => `"${w.title}"`).join(', '),
412
+ windows: top,
413
+ totalCount: raw.length,
414
+ titledCount: titled.length,
415
+ truncated,
416
+ };
417
+ }
418
+
419
+ async _actFocusWindow(osc, params) {
420
+ if (!params.titleMatch) {
421
+ return this._fail('INVALID_INPUT', 'focus_window requires "titleMatch"', 'focus_window');
422
+ }
423
+ const r = await osc.focusWindow({ titleMatch: params.titleMatch });
424
+ this._audit('focus_window', r);
425
+ return {
426
+ success: r.focused,
427
+ action: 'focus_window',
428
+ output: r.focused
429
+ ? `[action: focus_window] Focused window: "${r.title}".`
430
+ : `[action: focus_window] No window matched "${params.titleMatch}".`,
431
+ ...r,
432
+ };
433
+ }
434
+
435
+ async _actDescribeCaps(osc) {
436
+ const caps = await osc.describeCapabilities();
437
+ return {
438
+ success: true,
439
+ action: 'describe_capabilities',
440
+ output: `[action: describe_capabilities] OS: ${caps.os}${caps.display ? ` / ${caps.display}` : ''}; ` +
441
+ `screenshot=${caps.screenshot} mouseInput=${caps.mouseInput} ` +
442
+ `keyboardInput=${caps.keyboardInput} windowFocus=${caps.windowFocus}` +
443
+ (caps.degradedReason ? `\nDegraded: ${caps.degradedReason}` : ''),
444
+ capabilities: caps,
445
+ };
446
+ }
447
+
448
+ // ─── helpers ──────────────────────────────────────────────────────
449
+
450
+ /**
451
+ * Resolve (x, y) for an action. Three input shapes:
452
+ *
453
+ * 1. {x, y} — used as-is after validation
454
+ * 2. {intent: '...'} — ask the grounding model
455
+ * 3. neither — INVALID_INPUT
456
+ *
457
+ * When grounding fires, the screenshot is returned on the result so
458
+ * the caller (drag) can reuse it for the second coord without an
459
+ * extra capture.
460
+ */
461
+ async _resolveCoords(osc, effective, params, context, reuseScreenshot = null) {
462
+ if (Number.isFinite(params.x) && Number.isFinite(params.y)) {
463
+ return { x: params.x, y: params.y, groundedFrom: null };
464
+ }
465
+ if (typeof params.intent === 'string' && params.intent.trim()) {
466
+ const size = await osc.screenSize();
467
+ const screenshot = reuseScreenshot || await osc.screenshot();
468
+ const adapter = await this._adapter(effective, context);
469
+ const result = await this._groundWithFallback(adapter, {
470
+ screenshot,
471
+ intent: params.intent,
472
+ imageSize: { width: size.width, height: size.height },
473
+ knobs: { reasoning_effort: effective.groundingEffort || Defaults.GROUNDING_EFFORT },
474
+ timeoutMs: effective.groundingTimeoutMs || Defaults.GROUNDING_TIMEOUT_MS,
475
+ }, effective, context);
476
+ if (!result.coords) {
477
+ const err = new Error(`grounding produced no coords; model said: "${(result.answer || '').slice(0, 200)}"`);
478
+ err.code = 'GROUNDING_FAILED';
479
+ throw err;
480
+ }
481
+ const { x, y } = result.coords;
482
+ if (!this._coordsInScreen(x, y, size, effective.coordMarginPx ?? Defaults.COORD_MARGIN_PX)) {
483
+ const err = new Error(`grounded coords (${x}, ${y}) lie outside the ${size.width}x${size.height} screen`);
484
+ err.code = 'COORDS_OUT_OF_BOUNDS';
485
+ throw err;
486
+ }
487
+ return { x, y, groundedFrom: params.intent, _sharedScreenshot: screenshot };
488
+ }
489
+ const err = new Error('action requires either (x, y) or "intent"');
490
+ err.code = 'INVALID_INPUT';
491
+ throw err;
492
+ }
493
+
494
+ _coordsInScreen(x, y, size, margin) {
495
+ return x >= margin && y >= margin
496
+ && x <= size.width - margin
497
+ && y <= size.height - margin;
498
+ }
499
+
500
+ /**
501
+ * Lazy-build the grounding adapter. The model id + transport mode
502
+ * come from toolConfig so an operator can swap Kimi → some future
503
+ * model without code changes.
504
+ */
505
+ async _adapter(effective, context) {
506
+ if (this._adapterCache) return this._adapterCache;
507
+ this._adapterCache = this._buildAdapter(effective, context);
508
+ return this._adapterCache;
509
+ }
510
+
511
+ _buildAdapter(effective, context) {
512
+ const modelId = effective.groundingModelId || Defaults.GROUNDING_MODEL;
513
+
514
+ const foundryEndpoint = process.env.FOUNDRY_ENDPOINT;
515
+ const foundryKey = process.env.FOUNDRY_KEY;
516
+ const forceDirect = process.env.LOXIA_GROUNDING_DIRECT === '1';
517
+ const haveDirectCreds = !!(foundryEndpoint && foundryKey);
518
+
519
+ // Mode selection. In order of preference:
520
+ // 1. forceDirect env flag → direct (test / CI)
521
+ // 2. direct creds set AND no aiService → direct (headless / bench)
522
+ // 3. otherwise → proxy via the Loxia backend (production path)
523
+ // A failed proxy request (e.g. 404 because /llm/grounding isn't
524
+ // deployed yet) automatically retries once in direct mode if creds
525
+ // exist `ground()` does the fallback below.
526
+ if (forceDirect || (!this.aiService && haveDirectCreds)) {
527
+ if (!haveDirectCreds) {
528
+ throw Object.assign(new Error(
529
+ 'Direct mode requested but FOUNDRY_ENDPOINT / FOUNDRY_KEY env vars are not set.'
530
+ ), { code: 'NOT_INITIALISED' });
531
+ }
532
+ return createGroundingModel(modelId, {
533
+ mode: 'direct',
534
+ endpoint: foundryEndpoint,
535
+ apiKey: foundryKey,
536
+ });
537
+ }
538
+ if (!this.aiService) {
539
+ throw Object.assign(new Error(
540
+ 'DesktopTool has no grounding transport. Either set FOUNDRY_ENDPOINT + ' +
541
+ 'FOUNDRY_KEY env vars (direct mode) OR run a Loxia backend that has ' +
542
+ 'POST /llm/grounding deployed (proxy mode).'
543
+ ), { code: 'NOT_INITIALISED' });
544
+ }
545
+ return createGroundingModel(modelId, {
546
+ mode: 'proxy',
547
+ backendUrl: this.aiService.baseUrl,
548
+ userApiKey: this._resolveUserKey(context),
549
+ });
550
+ }
551
+
552
+ /**
553
+ * Wraps the adapter's ground() with one automatic fallback to direct
554
+ * mode when proxy returns 404 (route not deployed yet) AND direct
555
+ * credentials are available in env. Saves the user from "deploy the
556
+ * backend before you can fun" friction during the rollout window.
557
+ */
558
+ async _groundWithFallback(adapter, request, effective) {
559
+ try {
560
+ return await adapter.ground(request);
561
+ } catch (err) {
562
+ const is404 = /\b404\b/.test(err.message || '') || /\bNot Found\b/i.test(err.message || '');
563
+ const isProxy = adapter.mode === 'proxy';
564
+ const haveDirectCreds = !!(process.env.FOUNDRY_ENDPOINT && process.env.FOUNDRY_KEY);
565
+ if (!is404 || !isProxy || !haveDirectCreds) throw err;
566
+
567
+ this.logger?.warn?.(
568
+ '[desktop] proxy /llm/grounding returned 404 — falling back to direct Foundry. ' +
569
+ 'Deploy the backend route to get billing + audit back.',
570
+ );
571
+ const directAdapter = createGroundingModel(
572
+ effective.groundingModelId || Defaults.GROUNDING_MODEL,
573
+ { mode: 'direct', endpoint: process.env.FOUNDRY_ENDPOINT, apiKey: process.env.FOUNDRY_KEY },
574
+ );
575
+ // Replace cache so subsequent calls go straight to direct.
576
+ this._adapterCache = directAdapter;
577
+ return await directAdapter.ground(request);
578
+ }
579
+ }
580
+
581
+ _resolveUserKey(context) {
582
+ // Pull the per-session Loxia token the same way other tools do.
583
+ const km = this.aiService?.apiKeyManager;
584
+ if (km && typeof km.getKeysForRequest === 'function') {
585
+ const keys = km.getKeysForRequest(context?.sessionId, {
586
+ platformProvided: context?.platformProvided !== false,
587
+ });
588
+ if (keys?.loxiaApiKey) return keys.loxiaApiKey;
589
+ }
590
+ return this.aiService?.config?.apiKey || process.env.LOXIA_API_KEY || null;
591
+ }
592
+
593
+ /**
594
+ * Lazy-create an OS controller if the constructor didn't get one.
595
+ * Production path: created on demand. Test path: injected at ctor.
596
+ */
597
+ _osc() {
598
+ if (!this.osController) this.osController = createOSController();
599
+ return this.osController;
600
+ }
601
+
602
+ _audit(action, meta) {
603
+ this.logger?.info?.(`[desktop] ${action}`, { tool: this.id, action, ...meta });
604
+ }
605
+
606
+ /**
607
+ * Build a structured failure result. `action` is optional but
608
+ * strongly preferred — it lets the agent distinguish "click failed"
609
+ * from "screenshot failed" at a glance when results arrive
610
+ * out-of-order in a batched message, which is the only way it can
611
+ * recover the correct mental model of what happened.
612
+ */
613
+ _fail(code, message, action = null) {
614
+ this.logger?.warn?.(`[desktop] ${code}: ${message}`);
615
+ const prefix = action ? `[action: ${action}] ` : '';
616
+ return {
617
+ success: false,
618
+ ...(action ? { action } : {}),
619
+ error: message,
620
+ code,
621
+ output: `${prefix}Desktop action failed (${code}): ${message}`,
622
+ };
623
+ }
624
+ }
625
+
626
+ // Re-export OSError so callers can match on it without two imports.
627
+ export { OSError };
628
+
629
+ export default DesktopTool;