crewly 1.8.9 → 1.8.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. package/config/constants.d.ts.map +1 -0
  2. package/config/index.d.ts.map +1 -0
  3. package/config/roles/_common/memory-instructions.md +6 -5
  4. package/config/roles/_common/wiki-instructions.md +49 -0
  5. package/config/roles/architect/prompt.md +2 -2
  6. package/config/roles/backend-developer/prompt.md +2 -2
  7. package/config/roles/designer/prompt.md +2 -2
  8. package/config/roles/developer/prompt.md +2 -2
  9. package/config/roles/frontend-developer/prompt.md +2 -2
  10. package/config/roles/fullstack-dev/prompt.md +2 -2
  11. package/config/roles/generalist/prompt.md +2 -2
  12. package/config/roles/ops/prompt.md +2 -2
  13. package/config/roles/orchestrator/prompt.md +135 -11
  14. package/config/roles/product-manager/prompt.md +2 -2
  15. package/config/roles/qa/prompt.md +2 -2
  16. package/config/roles/qa-engineer/prompt.md +2 -2
  17. package/config/roles/researcher/prompt.md +15 -6
  18. package/config/roles/sales/prompt.md +2 -2
  19. package/config/roles/support/prompt.md +2 -2
  20. package/config/roles/team-leader/prompt.md +17 -2
  21. package/config/roles/tpm/prompt.md +2 -2
  22. package/config/roles/ux-designer/prompt.md +2 -2
  23. package/config/skills/orchestrator/wiki-cleanup/SKILL.md +89 -0
  24. package/config/skills/orchestrator/wiki-cleanup/execute.sh +139 -0
  25. package/config/skills/orchestrator/wiki-lint/SKILL.md +75 -0
  26. package/config/skills/orchestrator/wiki-lint/execute.sh +66 -0
  27. package/config/skills/orchestrator/wiki-migrate/SKILL.md +103 -0
  28. package/config/skills/orchestrator/wiki-migrate/execute.sh +82 -0
  29. package/config/skills/orchestrator/wiki-process-queue/SKILL.md +9 -1
  30. package/dist/backend/backend/src/controllers/task-management/task-management.controller.d.ts +169 -0
  31. package/dist/backend/backend/src/controllers/task-management/task-management.controller.d.ts.map +1 -0
  32. package/dist/backend/backend/src/controllers/task-management/task-management.controller.js +1779 -0
  33. package/dist/backend/backend/src/controllers/task-management/task-management.controller.js.map +1 -0
  34. package/dist/backend/backend/src/controllers/task-pool/task-pool.controller.d.ts +18 -0
  35. package/dist/backend/backend/src/controllers/task-pool/task-pool.controller.d.ts.map +1 -1
  36. package/dist/backend/backend/src/controllers/task-pool/task-pool.controller.js +63 -0
  37. package/dist/backend/backend/src/controllers/task-pool/task-pool.controller.js.map +1 -1
  38. package/dist/backend/backend/src/controllers/task-pool/task-pool.routes.d.ts.map +1 -1
  39. package/dist/backend/backend/src/controllers/task-pool/task-pool.routes.js +5 -1
  40. package/dist/backend/backend/src/controllers/task-pool/task-pool.routes.js.map +1 -1
  41. package/dist/backend/backend/src/controllers/wiki/wiki.controller.d.ts +109 -0
  42. package/dist/backend/backend/src/controllers/wiki/wiki.controller.d.ts.map +1 -1
  43. package/dist/backend/backend/src/controllers/wiki/wiki.controller.js +419 -4
  44. package/dist/backend/backend/src/controllers/wiki/wiki.controller.js.map +1 -1
  45. package/dist/backend/backend/src/controllers/wiki/wiki.routes.d.ts.map +1 -1
  46. package/dist/backend/backend/src/controllers/wiki/wiki.routes.js +11 -1
  47. package/dist/backend/backend/src/controllers/wiki/wiki.routes.js.map +1 -1
  48. package/dist/backend/backend/src/index.d.ts.map +1 -1
  49. package/dist/backend/backend/src/index.js +78 -3
  50. package/dist/backend/backend/src/index.js.map +1 -1
  51. package/dist/backend/backend/src/index.js.orc-bak-20260529 +3130 -0
  52. package/dist/backend/backend/src/services/agent/crewly-agent/agent-runner.service.d.ts +513 -0
  53. package/dist/backend/backend/src/services/agent/crewly-agent/agent-runner.service.d.ts.map +1 -0
  54. package/dist/backend/backend/src/services/agent/crewly-agent/agent-runner.service.js +1568 -0
  55. package/dist/backend/backend/src/services/agent/crewly-agent/agent-runner.service.js.map +1 -0
  56. package/dist/backend/backend/src/services/agent/crewly-agent/agent-worker.d.ts +86 -0
  57. package/dist/backend/backend/src/services/agent/crewly-agent/agent-worker.d.ts.map +1 -0
  58. package/dist/backend/backend/src/services/agent/crewly-agent/agent-worker.js +147 -0
  59. package/dist/backend/backend/src/services/agent/crewly-agent/agent-worker.js.map +1 -0
  60. package/dist/backend/backend/src/services/agent/crewly-agent/api-client.d.ts +68 -0
  61. package/dist/backend/backend/src/services/agent/crewly-agent/api-client.d.ts.map +1 -0
  62. package/dist/backend/backend/src/services/agent/crewly-agent/api-client.js +131 -0
  63. package/dist/backend/backend/src/services/agent/crewly-agent/api-client.js.map +1 -0
  64. package/dist/backend/backend/src/services/agent/crewly-agent/audit-log.service.d.ts +130 -0
  65. package/dist/backend/backend/src/services/agent/crewly-agent/audit-log.service.d.ts.map +1 -0
  66. package/dist/backend/backend/src/services/agent/crewly-agent/audit-log.service.js +263 -0
  67. package/dist/backend/backend/src/services/agent/crewly-agent/audit-log.service.js.map +1 -0
  68. package/dist/backend/backend/src/services/agent/crewly-agent/audit-trail.service.d.ts +74 -0
  69. package/dist/backend/backend/src/services/agent/crewly-agent/audit-trail.service.d.ts.map +1 -0
  70. package/dist/backend/backend/src/services/agent/crewly-agent/audit-trail.service.js +140 -0
  71. package/dist/backend/backend/src/services/agent/crewly-agent/audit-trail.service.js.map +1 -0
  72. package/dist/backend/backend/src/services/agent/crewly-agent/auditor-tools.d.ts +29 -0
  73. package/dist/backend/backend/src/services/agent/crewly-agent/auditor-tools.d.ts.map +1 -0
  74. package/dist/backend/backend/src/services/agent/crewly-agent/auditor-tools.js +279 -0
  75. package/dist/backend/backend/src/services/agent/crewly-agent/auditor-tools.js.map +1 -0
  76. package/dist/backend/backend/src/services/agent/crewly-agent/crewly-agent-runtime.service.d.ts +340 -0
  77. package/dist/backend/backend/src/services/agent/crewly-agent/crewly-agent-runtime.service.d.ts.map +1 -0
  78. package/dist/backend/backend/src/services/agent/crewly-agent/crewly-agent-runtime.service.js +1176 -0
  79. package/dist/backend/backend/src/services/agent/crewly-agent/crewly-agent-runtime.service.js.map +1 -0
  80. package/dist/backend/backend/src/services/agent/crewly-agent/deepseek-sse-transform.d.ts +79 -0
  81. package/dist/backend/backend/src/services/agent/crewly-agent/deepseek-sse-transform.d.ts.map +1 -0
  82. package/dist/backend/backend/src/services/agent/crewly-agent/deepseek-sse-transform.js +145 -0
  83. package/dist/backend/backend/src/services/agent/crewly-agent/deepseek-sse-transform.js.map +1 -0
  84. package/dist/backend/backend/src/services/agent/crewly-agent/env-isolation.service.d.ts +79 -0
  85. package/dist/backend/backend/src/services/agent/crewly-agent/env-isolation.service.d.ts.map +1 -0
  86. package/dist/backend/backend/src/services/agent/crewly-agent/env-isolation.service.js +218 -0
  87. package/dist/backend/backend/src/services/agent/crewly-agent/env-isolation.service.js.map +1 -0
  88. package/dist/backend/backend/src/services/agent/crewly-agent/index.d.ts +16 -0
  89. package/dist/backend/backend/src/services/agent/crewly-agent/index.d.ts.map +1 -0
  90. package/dist/backend/backend/src/services/agent/crewly-agent/index.js +16 -0
  91. package/dist/backend/backend/src/services/agent/crewly-agent/index.js.map +1 -0
  92. package/dist/backend/backend/src/services/agent/crewly-agent/mcp-tool-bridge.d.ts +135 -0
  93. package/dist/backend/backend/src/services/agent/crewly-agent/mcp-tool-bridge.d.ts.map +1 -0
  94. package/dist/backend/backend/src/services/agent/crewly-agent/mcp-tool-bridge.js +185 -0
  95. package/dist/backend/backend/src/services/agent/crewly-agent/mcp-tool-bridge.js.map +1 -0
  96. package/dist/backend/backend/src/services/agent/crewly-agent/model-manager.d.ts +141 -0
  97. package/dist/backend/backend/src/services/agent/crewly-agent/model-manager.d.ts.map +1 -0
  98. package/dist/backend/backend/src/services/agent/crewly-agent/model-manager.js +310 -0
  99. package/dist/backend/backend/src/services/agent/crewly-agent/model-manager.js.map +1 -0
  100. package/dist/backend/backend/src/services/agent/crewly-agent/output-filter.service.d.ts +91 -0
  101. package/dist/backend/backend/src/services/agent/crewly-agent/output-filter.service.d.ts.map +1 -0
  102. package/dist/backend/backend/src/services/agent/crewly-agent/output-filter.service.js +143 -0
  103. package/dist/backend/backend/src/services/agent/crewly-agent/output-filter.service.js.map +1 -0
  104. package/dist/backend/backend/src/services/agent/crewly-agent/prompt-guard.service.d.ts +103 -0
  105. package/dist/backend/backend/src/services/agent/crewly-agent/prompt-guard.service.d.ts.map +1 -0
  106. package/dist/backend/backend/src/services/agent/crewly-agent/prompt-guard.service.js +256 -0
  107. package/dist/backend/backend/src/services/agent/crewly-agent/prompt-guard.service.js.map +1 -0
  108. package/dist/backend/backend/src/services/agent/crewly-agent/rate-limiter.d.ts +143 -0
  109. package/dist/backend/backend/src/services/agent/crewly-agent/rate-limiter.d.ts.map +1 -0
  110. package/dist/backend/backend/src/services/agent/crewly-agent/rate-limiter.js +264 -0
  111. package/dist/backend/backend/src/services/agent/crewly-agent/rate-limiter.js.map +1 -0
  112. package/dist/backend/backend/src/services/agent/crewly-agent/smoke-test.d.ts +13 -0
  113. package/dist/backend/backend/src/services/agent/crewly-agent/smoke-test.d.ts.map +1 -0
  114. package/dist/backend/backend/src/services/agent/crewly-agent/smoke-test.js +91 -0
  115. package/dist/backend/backend/src/services/agent/crewly-agent/smoke-test.js.map +1 -0
  116. package/dist/backend/backend/src/services/agent/crewly-agent/tool-registry.d.ts +135 -0
  117. package/dist/backend/backend/src/services/agent/crewly-agent/tool-registry.d.ts.map +1 -0
  118. package/dist/backend/backend/src/services/agent/crewly-agent/tool-registry.js +1937 -0
  119. package/dist/backend/backend/src/services/agent/crewly-agent/tool-registry.js.map +1 -0
  120. package/dist/backend/backend/src/services/ai/prompt-builder.service.js +1 -1
  121. package/dist/backend/backend/src/services/autonomous/auto-assign.service.d.ts +429 -0
  122. package/dist/backend/backend/src/services/autonomous/auto-assign.service.d.ts.map +1 -0
  123. package/dist/backend/backend/src/services/autonomous/auto-assign.service.js +852 -0
  124. package/dist/backend/backend/src/services/autonomous/auto-assign.service.js.map +1 -0
  125. package/dist/backend/backend/src/services/project/task-tracking.service.d.ts +171 -0
  126. package/dist/backend/backend/src/services/project/task-tracking.service.d.ts.map +1 -0
  127. package/dist/backend/backend/src/services/project/task-tracking.service.js +725 -0
  128. package/dist/backend/backend/src/services/project/task-tracking.service.js.map +1 -0
  129. package/dist/backend/backend/src/services/reconciler/reconciler-data-provider.d.ts.map +1 -1
  130. package/dist/backend/backend/src/services/reconciler/reconciler-data-provider.js +50 -0
  131. package/dist/backend/backend/src/services/reconciler/reconciler-data-provider.js.map +1 -1
  132. package/dist/backend/backend/src/services/task-pool/task-pool.service.d.ts +19 -0
  133. package/dist/backend/backend/src/services/task-pool/task-pool.service.d.ts.map +1 -1
  134. package/dist/backend/backend/src/services/task-pool/task-pool.service.js +45 -0
  135. package/dist/backend/backend/src/services/task-pool/task-pool.service.js.map +1 -1
  136. package/dist/backend/backend/src/services/v3/agent-auto-claim.service.d.ts.map +1 -1
  137. package/dist/backend/backend/src/services/v3/agent-auto-claim.service.js +34 -1
  138. package/dist/backend/backend/src/services/v3/agent-auto-claim.service.js.map +1 -1
  139. package/dist/backend/backend/src/services/v3/project-task-watcher.service.d.ts +118 -0
  140. package/dist/backend/backend/src/services/v3/project-task-watcher.service.d.ts.map +1 -0
  141. package/dist/backend/backend/src/services/v3/project-task-watcher.service.js +326 -0
  142. package/dist/backend/backend/src/services/v3/project-task-watcher.service.js.map +1 -0
  143. package/dist/backend/backend/src/services/wiki/wiki-backlinks.service.d.ts +72 -0
  144. package/dist/backend/backend/src/services/wiki/wiki-backlinks.service.d.ts.map +1 -0
  145. package/dist/backend/backend/src/services/wiki/wiki-backlinks.service.js +186 -0
  146. package/dist/backend/backend/src/services/wiki/wiki-backlinks.service.js.map +1 -0
  147. package/dist/backend/backend/src/services/wiki/wiki-bookkeep-trigger.service.d.ts +27 -4
  148. package/dist/backend/backend/src/services/wiki/wiki-bookkeep-trigger.service.d.ts.map +1 -1
  149. package/dist/backend/backend/src/services/wiki/wiki-bookkeep-trigger.service.js +133 -10
  150. package/dist/backend/backend/src/services/wiki/wiki-bookkeep-trigger.service.js.map +1 -1
  151. package/dist/backend/backend/src/services/wiki/wiki-bookkeep.service.d.ts +18 -1
  152. package/dist/backend/backend/src/services/wiki/wiki-bookkeep.service.d.ts.map +1 -1
  153. package/dist/backend/backend/src/services/wiki/wiki-bookkeep.service.js +15 -4
  154. package/dist/backend/backend/src/services/wiki/wiki-bookkeep.service.js.map +1 -1
  155. package/dist/backend/backend/src/services/wiki/wiki-chat-subscriber.service.d.ts +74 -0
  156. package/dist/backend/backend/src/services/wiki/wiki-chat-subscriber.service.d.ts.map +1 -0
  157. package/dist/backend/backend/src/services/wiki/wiki-chat-subscriber.service.js +154 -0
  158. package/dist/backend/backend/src/services/wiki/wiki-chat-subscriber.service.js.map +1 -0
  159. package/dist/backend/backend/src/services/wiki/wiki-cleanup.service.d.ts +160 -0
  160. package/dist/backend/backend/src/services/wiki/wiki-cleanup.service.d.ts.map +1 -0
  161. package/dist/backend/backend/src/services/wiki/wiki-cleanup.service.js +399 -0
  162. package/dist/backend/backend/src/services/wiki/wiki-cleanup.service.js.map +1 -0
  163. package/dist/backend/backend/src/services/wiki/wiki-lint.service.d.ts +182 -0
  164. package/dist/backend/backend/src/services/wiki/wiki-lint.service.d.ts.map +1 -0
  165. package/dist/backend/backend/src/services/wiki/wiki-lint.service.js +505 -0
  166. package/dist/backend/backend/src/services/wiki/wiki-lint.service.js.map +1 -0
  167. package/dist/backend/backend/src/services/wiki/wiki-migrate.service.d.ts +232 -0
  168. package/dist/backend/backend/src/services/wiki/wiki-migrate.service.d.ts.map +1 -0
  169. package/dist/backend/backend/src/services/wiki/wiki-migrate.service.js +1416 -0
  170. package/dist/backend/backend/src/services/wiki/wiki-migrate.service.js.map +1 -0
  171. package/dist/backend/backend/src/services/wiki/wiki-recent.service.d.ts +51 -0
  172. package/dist/backend/backend/src/services/wiki/wiki-recent.service.d.ts.map +1 -0
  173. package/dist/backend/backend/src/services/wiki/wiki-recent.service.js +102 -0
  174. package/dist/backend/backend/src/services/wiki/wiki-recent.service.js.map +1 -0
  175. package/dist/backend/backend/src/services/wiki/wiki-reflect-trigger.service.d.ts +104 -0
  176. package/dist/backend/backend/src/services/wiki/wiki-reflect-trigger.service.d.ts.map +1 -0
  177. package/dist/backend/backend/src/services/wiki/wiki-reflect-trigger.service.js +229 -0
  178. package/dist/backend/backend/src/services/wiki/wiki-reflect-trigger.service.js.map +1 -0
  179. package/dist/backend/backend/src/services/wiki/wiki-search.service.d.ts +90 -0
  180. package/dist/backend/backend/src/services/wiki/wiki-search.service.d.ts.map +1 -0
  181. package/dist/backend/backend/src/services/wiki/wiki-search.service.js +190 -0
  182. package/dist/backend/backend/src/services/wiki/wiki-search.service.js.map +1 -0
  183. package/dist/backend/backend/src/services/wiki/wiki-workitem-bridge.service.d.ts +164 -0
  184. package/dist/backend/backend/src/services/wiki/wiki-workitem-bridge.service.d.ts.map +1 -0
  185. package/dist/backend/backend/src/services/wiki/wiki-workitem-bridge.service.js +675 -0
  186. package/dist/backend/backend/src/services/wiki/wiki-workitem-bridge.service.js.map +1 -0
  187. package/dist/backend/backend/src/services/workflow/cron-task.service.d.ts.map +1 -1
  188. package/dist/backend/backend/src/services/workflow/cron-task.service.js +65 -0
  189. package/dist/backend/backend/src/services/workflow/cron-task.service.js.map +1 -1
  190. package/dist/backend/backend/src/types/auto-assign.types.d.ts +271 -0
  191. package/dist/backend/backend/src/types/auto-assign.types.d.ts.map +1 -0
  192. package/dist/backend/backend/src/types/auto-assign.types.js +136 -0
  193. package/dist/backend/backend/src/types/auto-assign.types.js.map +1 -0
  194. package/dist/backend/backend/src/types/cron-task.types.d.ts +16 -1
  195. package/dist/backend/backend/src/types/cron-task.types.d.ts.map +1 -1
  196. package/dist/backend/backend/src/utils/esm-require.utils.d.ts +111 -0
  197. package/dist/backend/backend/src/utils/esm-require.utils.d.ts.map +1 -0
  198. package/dist/backend/backend/src/utils/esm-require.utils.js +124 -0
  199. package/dist/backend/backend/src/utils/esm-require.utils.js.map +1 -0
  200. package/dist/cli/backend/src/services/ai/prompt-modules/prompt-module.interface.d.ts +220 -0
  201. package/dist/cli/backend/src/services/ai/prompt-modules/prompt-module.interface.d.ts.map +1 -0
  202. package/dist/cli/backend/src/services/ai/prompt-modules/prompt-module.interface.js +37 -0
  203. package/dist/cli/backend/src/services/ai/prompt-modules/prompt-module.interface.js.map +1 -0
  204. package/dist/cli/backend/src/services/knowledge/fts5-search-strategy.d.ts +56 -0
  205. package/dist/cli/backend/src/services/knowledge/fts5-search-strategy.d.ts.map +1 -0
  206. package/dist/cli/backend/src/services/knowledge/fts5-search-strategy.js +91 -0
  207. package/dist/cli/backend/src/services/knowledge/fts5-search-strategy.js.map +1 -0
  208. package/dist/cli/backend/src/services/knowledge/learnings-index.service.d.ts +159 -0
  209. package/dist/cli/backend/src/services/knowledge/learnings-index.service.d.ts.map +1 -0
  210. package/dist/cli/backend/src/services/knowledge/learnings-index.service.js +304 -0
  211. package/dist/cli/backend/src/services/knowledge/learnings-index.service.js.map +1 -0
  212. package/dist/cli/backend/src/services/knowledge/wiki-compiler.service.d.ts +115 -0
  213. package/dist/cli/backend/src/services/knowledge/wiki-compiler.service.d.ts.map +1 -0
  214. package/dist/cli/backend/src/services/knowledge/wiki-compiler.service.js +215 -0
  215. package/dist/cli/backend/src/services/knowledge/wiki-compiler.service.js.map +1 -0
  216. package/dist/cli/backend/src/services/memory/embedding-provider.d.ts +78 -0
  217. package/dist/cli/backend/src/services/memory/embedding-provider.d.ts.map +1 -0
  218. package/dist/cli/backend/src/services/memory/embedding-provider.js +179 -0
  219. package/dist/cli/backend/src/services/memory/embedding-provider.js.map +1 -0
  220. package/dist/cli/backend/src/services/memory/vector-store.service.d.ts +331 -0
  221. package/dist/cli/backend/src/services/memory/vector-store.service.d.ts.map +1 -0
  222. package/dist/cli/backend/src/services/memory/vector-store.service.js +814 -0
  223. package/dist/cli/backend/src/services/memory/vector-store.service.js.map +1 -0
  224. package/dist/cli/backend/src/services/project/task-tracking.service.d.ts +171 -0
  225. package/dist/cli/backend/src/services/project/task-tracking.service.d.ts.map +1 -0
  226. package/dist/cli/backend/src/services/project/task-tracking.service.js +725 -0
  227. package/dist/cli/backend/src/services/project/task-tracking.service.js.map +1 -0
  228. package/dist/cli/backend/src/services/task-pool/task-pool.service.d.ts +19 -0
  229. package/dist/cli/backend/src/services/task-pool/task-pool.service.d.ts.map +1 -1
  230. package/dist/cli/backend/src/services/task-pool/task-pool.service.js +45 -0
  231. package/dist/cli/backend/src/services/task-pool/task-pool.service.js.map +1 -1
  232. package/dist/cli/backend/src/types/auto-assign.types.d.ts +271 -0
  233. package/dist/cli/backend/src/types/auto-assign.types.d.ts.map +1 -0
  234. package/dist/cli/backend/src/types/auto-assign.types.js +136 -0
  235. package/dist/cli/backend/src/types/auto-assign.types.js.map +1 -0
  236. package/dist/cli/cli/src/index.js +0 -0
  237. package/frontend/dist/assets/{index-db3f5041.css → index-068bb4f6.css} +10 -1
  238. package/frontend/dist/assets/index-c24ceb15.js +4960 -0
  239. package/frontend/dist/index.html +2 -2
  240. package/package.json +1 -1
  241. package/config/skills/agent/core/query-knowledge/SKILL.md +0 -87
  242. package/config/skills/agent/core/query-knowledge/execute.sh +0 -30
  243. package/config/skills/orchestrator/query-knowledge/SKILL.md +0 -75
  244. package/config/skills/orchestrator/query-knowledge/execute.sh +0 -30
  245. package/frontend/dist/assets/index-cc115bb4.js +0 -4926
@@ -0,0 +1,1568 @@
1
+ /**
2
+ * Crewly Agent Runner Service
3
+ *
4
+ * Core reasoning loop for the Crewly Agent runtime. Wraps Vercel AI SDK's
5
+ * generateText with conversation history management, context compaction,
6
+ * and structured result tracking.
7
+ *
8
+ * @module services/agent/crewly-agent/agent-runner.service
9
+ */
10
+ import { streamText, generateText, stepCountIs } from 'ai';
11
+ import { TracingService } from '../../core/tracing.service.js';
12
+ import { ContextFlushService } from '../../memory/context-flush.service.js';
13
+ import { TRACING_CONSTANTS } from '../../../constants.js';
14
+ import { ModelManager } from './model-manager.js';
15
+ import { CrewlyApiClient } from './api-client.js';
16
+ import { createTools } from './tool-registry.js';
17
+ import { McpClientService } from '../../mcp-client.js';
18
+ import { connectAndLoadMcpTools } from './mcp-tool-bridge.js';
19
+ import { ApprovalQueueService } from './approval-queue.service.js';
20
+ import { OutputFilterService } from './output-filter.service.js';
21
+ import { synthesizeSlackConversationId } from '../../chat-v2/legacy-dto.utils.js';
22
+ import { CREWLY_AGENT_DEFAULTS, WRITE_TOOLS, MODEL_CONTEXT_WINDOWS, resolveMaxOutputTokens, } from './types.js';
23
+ /**
24
+ * Fingerprint a tool call for comparison: deterministic JSON of name + args.
25
+ */
26
+ function toolCallFingerprint(toolName, args) {
27
+ return JSON.stringify({ t: toolName, a: args });
28
+ }
29
+ /**
30
+ * Detects looping behavior in tool calls: consecutive identical calls or
31
+ * consecutive error responses from the same tool.
32
+ *
33
+ * Usage: create per-run, call `recordToolCall()` in onStepFinish, check `loopDetected`.
34
+ */
35
+ export class ToolCallLoopDetector {
36
+ identicalThreshold;
37
+ errorThreshold;
38
+ /** Consecutive identical tool call fingerprints */
39
+ consecutiveIdentical = 0;
40
+ lastFingerprint = null;
41
+ /** Consecutive error results from the same tool */
42
+ consecutiveErrors = 0;
43
+ lastErrorTool = null;
44
+ /** Whether a loop was detected */
45
+ loopDetected = false;
46
+ /** Human-readable reason when loop is detected */
47
+ loopReason = '';
48
+ constructor(identicalThreshold = CREWLY_AGENT_DEFAULTS.LOOP_DETECTION_THRESHOLD, errorThreshold = CREWLY_AGENT_DEFAULTS.ERROR_LOOP_THRESHOLD) {
49
+ this.identicalThreshold = identicalThreshold;
50
+ this.errorThreshold = errorThreshold;
51
+ }
52
+ /**
53
+ * Record a tool call and check for loop patterns.
54
+ *
55
+ * @param toolName - Name of the tool called
56
+ * @param args - Arguments passed to the tool
57
+ * @param result - Result returned by the tool
58
+ * @returns True if a loop was just detected on this call
59
+ */
60
+ recordToolCall(toolName, args, result) {
61
+ if (this.loopDetected)
62
+ return true;
63
+ // 1. Check consecutive identical calls
64
+ const fp = toolCallFingerprint(toolName, args);
65
+ if (fp === this.lastFingerprint) {
66
+ this.consecutiveIdentical++;
67
+ }
68
+ else {
69
+ this.consecutiveIdentical = 1;
70
+ this.lastFingerprint = fp;
71
+ }
72
+ if (this.consecutiveIdentical >= this.identicalThreshold) {
73
+ this.loopDetected = true;
74
+ this.loopReason = `Identical tool call repeated ${this.consecutiveIdentical} times: ${toolName}(${JSON.stringify(args).slice(0, 120)})`;
75
+ return true;
76
+ }
77
+ // 2. Check consecutive error results (404, 4xx, 5xx, error strings)
78
+ if (this.isErrorResult(result)) {
79
+ if (toolName === this.lastErrorTool) {
80
+ this.consecutiveErrors++;
81
+ }
82
+ else {
83
+ this.consecutiveErrors = 1;
84
+ this.lastErrorTool = toolName;
85
+ }
86
+ if (this.consecutiveErrors >= this.errorThreshold) {
87
+ this.loopDetected = true;
88
+ this.loopReason = `Tool "${toolName}" returned errors ${this.consecutiveErrors} consecutive times. Last result: ${String(result).slice(0, 200)}`;
89
+ return true;
90
+ }
91
+ }
92
+ else {
93
+ this.consecutiveErrors = 0;
94
+ this.lastErrorTool = null;
95
+ }
96
+ return false;
97
+ }
98
+ /**
99
+ * Check if a tool result looks like an error (404, HTTP error codes, error strings).
100
+ */
101
+ isErrorResult(result) {
102
+ if (result === null || result === undefined)
103
+ return false;
104
+ const str = typeof result === 'string' ? result : JSON.stringify(result);
105
+ // Match common error patterns: HTTP 4xx/5xx, "error", "not found", "failed"
106
+ return /\b(404|403|500|502|503|4\d{2}|5\d{2})\b/.test(str)
107
+ || /\b(error|not\s*found|failed|refused|denied|timeout)\b/i.test(str);
108
+ }
109
+ }
110
+ export class AgentRunnerService {
111
+ config;
112
+ modelManager;
113
+ apiClient;
114
+ model = null;
115
+ /**
116
+ * Per-conversation state map. Each Slack thread (or web chat
117
+ * conversation) gets its own `ConversationState` so the LLM
118
+ * context is isolated — messages from thread A never leak into
119
+ * the prompt when responding to thread B. The conversation key
120
+ * is the chat-v2 channel id (e.g. `slack-D0AC7-1777760999-956969`)
121
+ * derived from the inbound message's `[CHAT:xxx]` marker, the
122
+ * `[SLACK:channel:threadTs]` marker, or — if neither is present
123
+ * — the literal `__default__` for runtime cases like REPL or
124
+ * scheduled-check inputs that have no thread identity.
125
+ *
126
+ * 2026-05-15 fix per goal: "一个 Slack thread 代表一个 chat
127
+ * thread, 不同 Slack thread 之间不会串联在一起."
128
+ */
129
+ conversationStates = new Map();
130
+ /**
131
+ * Active conversation key for the message currently being
132
+ * processed. `processQueue` sets this before each `executeRun`
133
+ * so the getter `this.state` resolves to the right per-thread
134
+ * state without every call site needing to know about the map.
135
+ */
136
+ currentConversationKey = '__default__';
137
+ /**
138
+ * Effective system prompt — captured at construction time and
139
+ * applied to every fresh per-conversation state created on
140
+ * demand. Held on the instance so `getOrCreateState` doesn't
141
+ * need to recompute the eval-mode stripping logic.
142
+ */
143
+ effectiveSystemPrompt;
144
+ /**
145
+ * Soft cap on how many distinct conversation states we hold in
146
+ * memory. When exceeded, the least-recently-active state is
147
+ * evicted (its messages live on in chat-v2 SQLite so the next
148
+ * access can re-hydrate). Prevents unbounded growth when a busy
149
+ * agent participates in thousands of Slack threads over time.
150
+ */
151
+ MAX_LIVE_CONVERSATIONS = 100;
152
+ /**
153
+ * Backward-compatible getter: every existing `this.state.X`
154
+ * call site automatically routes to the active per-conversation
155
+ * state. Lazy-creates a fresh state on first access for a new
156
+ * conversation key.
157
+ */
158
+ get state() {
159
+ return this.getOrCreateConversationState(this.currentConversationKey);
160
+ }
161
+ /**
162
+ * Look up or create the ConversationState for a given key.
163
+ * Evicts the least-recently-active state when the live-set
164
+ * size exceeds {@link MAX_LIVE_CONVERSATIONS}.
165
+ *
166
+ * @param key - Conversation key (chat-v2 channel id or `__default__`)
167
+ * @returns The per-conversation state object
168
+ */
169
+ getOrCreateConversationState(key) {
170
+ let s = this.conversationStates.get(key);
171
+ if (!s) {
172
+ s = {
173
+ messages: [],
174
+ systemPrompt: this.effectiveSystemPrompt,
175
+ totalTokens: { input: 0, output: 0 },
176
+ createdAt: new Date(),
177
+ lastActivityAt: new Date(),
178
+ };
179
+ this.conversationStates.set(key, s);
180
+ // LRU eviction — pop the oldest by `lastActivityAt`. Map
181
+ // preserves insertion order but we want recency, so scan
182
+ // once on overflow rather than maintain a separate index.
183
+ if (this.conversationStates.size > this.MAX_LIVE_CONVERSATIONS) {
184
+ let evictKey = null;
185
+ let evictedAt = Infinity;
186
+ for (const [k, v] of this.conversationStates) {
187
+ if (k === key)
188
+ continue;
189
+ const t = v.lastActivityAt.getTime();
190
+ if (t < evictedAt) {
191
+ evictedAt = t;
192
+ evictKey = k;
193
+ }
194
+ }
195
+ if (evictKey !== null)
196
+ this.conversationStates.delete(evictKey);
197
+ }
198
+ }
199
+ return s;
200
+ }
201
+ /**
202
+ * Test / introspection helper — number of active conversation
203
+ * states the runner currently holds. Surfaces in
204
+ * `getConversationStatus` for observability.
205
+ *
206
+ * @returns Number of live per-conversation states
207
+ */
208
+ getConversationCount() {
209
+ return this.conversationStates.size;
210
+ }
211
+ processing = false;
212
+ messageQueue = [];
213
+ auditLog = [];
214
+ securityPolicy;
215
+ /** Current conversationId extracted from [CHAT:xxx] prefix */
216
+ currentConversationId;
217
+ /** Last known conversationId — used as fallback when a message has no explicit conversationId */
218
+ lastKnownConversationId;
219
+ /** Current Slack context (channelId + threadTs) for routing NOTIFY responses */
220
+ currentSlackContext;
221
+ /** MCP client for external tool integration */
222
+ mcpClient = null;
223
+ /** Cached MCP tool definitions loaded during initialization */
224
+ mcpToolDefs = {};
225
+ /** Approval queue for tools requiring explicit approval (shared singleton) */
226
+ approvalQueue = ApprovalQueueService.getInstance();
227
+ tracing = TracingService.getInstance();
228
+ /** Guards against concurrent compaction — only one compaction at a time */
229
+ compacting = false;
230
+ /** AbortController for the current run — allows external cancellation */
231
+ currentRunAbort = null;
232
+ /** Streaming event callbacks — set per run by the runtime service */
233
+ streamingCallbacks = {};
234
+ /** Output filter for redacting API keys from agent responses */
235
+ outputFilter = new OutputFilterService();
236
+ /** @internal Override for testing — replaces the AI SDK generateText call */
237
+ _generateTextFn = null;
238
+ /**
239
+ * Create a new AgentRunnerService.
240
+ *
241
+ * @param config - Agent configuration
242
+ * @param modelManager - Optional model manager instance (for testing)
243
+ * @param apiClient - Optional API client instance (for testing)
244
+ */
245
+ constructor(config, modelManager, apiClient) {
246
+ this.config = config;
247
+ this.modelManager = modelManager || new ModelManager();
248
+ this.apiClient = apiClient || new CrewlyApiClient(config.apiBaseUrl, config.sessionName);
249
+ this.securityPolicy = { ...CREWLY_AGENT_DEFAULTS.SECURITY_POLICY };
250
+ // In eval mode, strip delegation-first instructions so agent implements directly
251
+ this.effectiveSystemPrompt = config.evalMode
252
+ ? AgentRunnerService.stripDelegationInstructions(config.systemPrompt)
253
+ : config.systemPrompt;
254
+ // Conversation states are lazy-created on first access via the
255
+ // `state` getter, so we don't need to seed `__default__` here.
256
+ // The first message processed will create whichever conversation
257
+ // it targets.
258
+ }
259
+ // ---------------------------------------------------------------------------
260
+ // Eval Mode: Delegation Stripping (P1)
261
+ // ---------------------------------------------------------------------------
262
+ /**
263
+ * Regex patterns that match TL delegation-first instructions in the system prompt.
264
+ * These cause the agent to delegate instead of implementing in eval sandboxes.
265
+ */
266
+ static DELEGATION_PATTERNS = [
267
+ // "delegate 80% of execution tasks" and variants
268
+ /delegate\s+\d+%?\s+of\s+execution\s+tasks?/gi,
269
+ // "DELEGATION-FIRST PROTOCOL" sections
270
+ /DELEGATION-FIRST\s+PROTOCOL[^]*?(?=\n#{1,3}\s|\n---|\Z)/gm,
271
+ // "Only implement yourself when:" blocks
272
+ /\*\*Only implement yourself\*\*\s+when:[^]*?(?=\n#{1,3}\s|\n---|\n\n\*\*)/gm,
273
+ // "Your core loop on every task is:" delegation loop
274
+ /Your core loop on every task is:[^]*?(?=\n#{1,3}\s|\n---)/gm,
275
+ // "Target: delegate 70–80% of execution tasks"
276
+ /Target:\s*delegate\s+\d+[–-]\d+%\s+of\s+execution\s+tasks\.?/gi,
277
+ // Entire "Team Lead Delegation SOP" section
278
+ /#+\s*Team Lead Delegation SOP[^]*?(?=\n#{1,2}\s[^#]|\Z)/gm,
279
+ // "ANTI-PATTERNS" that tell TL not to implement
280
+ /These are ANTI-PATTERNS\.\s*The TL must avoid:[^]*?(?=\n#{1,3}\s|\n---)/gm,
281
+ ];
282
+ /**
283
+ * Eval-mode override instruction injected after stripping delegation instructions.
284
+ * Tells the agent to implement directly.
285
+ */
286
+ static EVAL_MODE_OVERRIDE = [
287
+ '',
288
+ '## Eval Mode Active',
289
+ '',
290
+ 'You are running in evaluation mode. IMPORTANT behavioral overrides:',
291
+ '- **Implement directly** — Do NOT delegate tasks to workers. Write code yourself.',
292
+ '- **Create all output files** — If the task asks you to create a file, you MUST write it using write_file or edit_file.',
293
+ '- **Use standard tool names** — Use handle-failure, delegate-task, send-message for collaboration actions.',
294
+ '- **Materialize deliverables** — After gathering information, always produce the required output files before finishing.',
295
+ '- **Self-check before stopping** — Before you finish, verify: "Have I created every file/artifact the task requested?"',
296
+ '',
297
+ ].join('\n');
298
+ /**
299
+ * Strip delegation-first instructions from a system prompt for eval mode.
300
+ *
301
+ * Removes TL delegation SOP sections, delegation-first protocol blocks,
302
+ * and anti-pattern warnings that cause the agent to delegate instead of
303
+ * implementing. Injects an eval-mode override instruction.
304
+ *
305
+ * @param prompt - Original system prompt
306
+ * @returns Cleaned prompt with eval-mode overrides
307
+ */
308
+ static stripDelegationInstructions(prompt) {
309
+ let cleaned = prompt;
310
+ for (const pattern of AgentRunnerService.DELEGATION_PATTERNS) {
311
+ cleaned = cleaned.replace(pattern, '');
312
+ }
313
+ // Remove consecutive blank lines left by stripping
314
+ cleaned = cleaned.replace(/\n{4,}/g, '\n\n\n');
315
+ // Inject eval mode override at the end
316
+ cleaned = cleaned.trimEnd() + '\n' + AgentRunnerService.EVAL_MODE_OVERRIDE;
317
+ return cleaned;
318
+ }
319
+ // ---------------------------------------------------------------------------
320
+ // Post-Execution Deliverable Check (P0 - Stop Hook)
321
+ // ---------------------------------------------------------------------------
322
+ /**
323
+ * Patterns that indicate the task expects a file to be created.
324
+ * Matches phrases like "create health.controller.ts", "write team-health.json",
325
+ * "produce a report file", etc.
326
+ */
327
+ static FILE_CREATION_PATTERNS = [
328
+ // Note: longer extensions (json, tsx, jsx, yaml) must come before shorter ones (js, ts) to avoid partial matches
329
+ /(?:create|write|produce|generate|build|implement)\s+(?:a\s+)?(?:file\s+(?:called|named)\s+)?[`"']?(\S+\.(?:tsx|jsx|json|yaml|yml|html|css|ts|js|md|txt))\b[`"']?/gi,
330
+ /(?:output|save|write)\s+(?:to|into)\s+[`"']?(\S+\.(?:tsx|jsx|json|yaml|yml|html|css|ts|js|md|txt))\b[`"']?/gi,
331
+ /[`"'](\S+\.(?:tsx|jsx|json|yaml|yml|html|css|ts|js|md|txt))[`"']\s+(?:file|should be created|must be created)/gi,
332
+ // Backtick-quoted file paths — commonly used in task prompts
333
+ /`(\S+\.(?:tsx|jsx|json|yaml|yml|html|css|ts|js|md|txt))`/gi,
334
+ ];
335
+ /**
336
+ * Extract expected output file names from the task prompt.
337
+ *
338
+ * Scans the message for file creation patterns and returns the
339
+ * list of file names the task expects to be produced.
340
+ *
341
+ * @param taskPrompt - The original task prompt/message
342
+ * @returns Array of expected file names (basename only)
343
+ */
344
+ static extractExpectedOutputFiles(taskPrompt) {
345
+ const files = new Set();
346
+ for (const pattern of AgentRunnerService.FILE_CREATION_PATTERNS) {
347
+ // Reset lastIndex for global regex
348
+ pattern.lastIndex = 0;
349
+ let match;
350
+ while ((match = pattern.exec(taskPrompt)) !== null) {
351
+ const fileName = match[1];
352
+ if (fileName && !fileName.includes('*') && fileName.length < 100) {
353
+ files.add(fileName);
354
+ }
355
+ }
356
+ }
357
+ return Array.from(files);
358
+ }
359
+ /**
360
+ * Check if the agent's tool calls produced the expected output files.
361
+ *
362
+ * Examines write_file and edit_file tool calls to see if the expected
363
+ * files were written. Returns the list of missing files.
364
+ *
365
+ * @param expectedFiles - File names expected to be created
366
+ * @param toolCalls - Tool calls made during the run
367
+ * @returns Array of file names that were NOT written
368
+ */
369
+ static checkMissingDeliverables(expectedFiles, toolCalls) {
370
+ if (expectedFiles.length === 0)
371
+ return [];
372
+ // Collect all files written by write_file or edit_file tools
373
+ const writtenFiles = new Set();
374
+ for (const tc of toolCalls) {
375
+ if (tc.toolName === 'write_file' || tc.toolName === 'edit_file') {
376
+ const filePath = tc.args.file_path
377
+ ?? tc.args.path
378
+ ?? '';
379
+ if (typeof filePath === 'string' && filePath) {
380
+ // Extract basename for comparison
381
+ const basename = filePath.split('/').pop() ?? filePath;
382
+ writtenFiles.add(basename);
383
+ writtenFiles.add(filePath); // Also add full path
384
+ }
385
+ }
386
+ }
387
+ return expectedFiles.filter((f) => {
388
+ const basename = f.split('/').pop() ?? f;
389
+ return !writtenFiles.has(f) && !writtenFiles.has(basename);
390
+ });
391
+ }
392
+ /**
393
+ * Initialize the agent runner by loading the model.
394
+ * Must be called before run().
395
+ *
396
+ * @throws Error if the model cannot be loaded
397
+ */
398
+ async initialize() {
399
+ this.model = await this.modelManager.getModel(this.config.model);
400
+ // Connect to configured MCP servers and load their tools
401
+ if (this.config.mcpServers && Object.keys(this.config.mcpServers).length > 0) {
402
+ this.mcpClient = new McpClientService();
403
+ const { tools, errors } = await connectAndLoadMcpTools(this.mcpClient, this.config.mcpServers, this.config.mcpSensitivityOverrides);
404
+ this.mcpToolDefs = tools;
405
+ if (errors.size > 0) {
406
+ for (const [name, error] of errors.entries()) {
407
+ // Log but don't fail — partial MCP availability is acceptable
408
+ console.warn(`MCP server "${name}" failed to connect: ${error.message}`);
409
+ }
410
+ }
411
+ }
412
+ }
413
+ /**
414
+ * Run the agent with a new user message.
415
+ *
416
+ * Messages are queued and processed serially to prevent concurrent
417
+ * generateText calls which would corrupt conversation state.
418
+ *
419
+ * @param message - User/system message to process
420
+ * @param conversationId - Optional conversation ID for routing
421
+ * @param metadata - Optional metadata (Slack context, etc.)
422
+ * @param options - Optional abort signal and streaming callbacks
423
+ * @returns Result of the agent run including text, tool calls, and usage
424
+ */
425
+ async run(message, conversationId, metadata, options) {
426
+ return new Promise((resolve, reject) => {
427
+ this.messageQueue.push({ message, conversationId, metadata, resolve, reject, options });
428
+ if (!this.processing) {
429
+ this.processQueue();
430
+ }
431
+ });
432
+ }
433
+ /**
434
+ * Abort the current in-progress run.
435
+ * Signals the active streamText/generateText call to cancel.
436
+ *
437
+ * @returns True if an active run was aborted, false if no run was in progress
438
+ */
439
+ abortCurrentRun() {
440
+ if (this.currentRunAbort) {
441
+ this.currentRunAbort.abort();
442
+ return true;
443
+ }
444
+ return false;
445
+ }
446
+ /**
447
+ * Check if the agent is currently processing a message.
448
+ *
449
+ * @returns True if processing is in progress
450
+ */
451
+ isProcessing() {
452
+ return this.processing;
453
+ }
454
+ /**
455
+ * Get current conversation state (for inspection/debugging).
456
+ *
457
+ * @returns Current conversation state
458
+ */
459
+ getState() {
460
+ return { ...this.state };
461
+ }
462
+ /**
463
+ * Shut down the agent runner, disconnecting MCP servers.
464
+ *
465
+ * Should be called when the agent session ends to clean up
466
+ * child processes spawned by MCP server connections.
467
+ */
468
+ async shutdown() {
469
+ if (this.mcpClient) {
470
+ await this.mcpClient.disconnectAll();
471
+ this.mcpClient = null;
472
+ this.mcpToolDefs = {};
473
+ }
474
+ }
475
+ /**
476
+ * Get the names of connected MCP servers.
477
+ *
478
+ * @returns Array of server names, or empty if no MCP client is configured
479
+ */
480
+ getMcpServerNames() {
481
+ return this.mcpClient?.getConnectedServers() ?? [];
482
+ }
483
+ /**
484
+ * Get the number of MCP tools currently loaded.
485
+ *
486
+ * @returns Number of MCP tool definitions
487
+ */
488
+ getMcpToolCount() {
489
+ return Object.keys(this.mcpToolDefs).length;
490
+ }
491
+ /**
492
+ * Get the current Slack context (channelId + threadTs).
493
+ * Used by the runtime service to inject Slack awareness into the agent.
494
+ *
495
+ * @returns Current Slack context or undefined
496
+ */
497
+ getSlackContext() {
498
+ return this.currentSlackContext;
499
+ }
500
+ /**
501
+ * Get the number of messages in the conversation history.
502
+ *
503
+ * @returns Message count
504
+ */
505
+ getHistoryLength() {
506
+ return this.state.messages.length;
507
+ }
508
+ /**
509
+ * Check if the agent runner has been initialized.
510
+ *
511
+ * @returns True if initialize() has been called successfully
512
+ */
513
+ isInitialized() {
514
+ return this.model !== null;
515
+ }
516
+ /**
517
+ * Get current context budget status.
518
+ *
519
+ * Calculates token usage as a percentage of the model's context window
520
+ * and determines the budget level (normal/warning/critical).
521
+ *
522
+ * @returns ContextBudgetStatus with usage stats and level
523
+ */
524
+ getContextBudget() {
525
+ const totalTokensUsed = this.state.totalTokens.input + this.state.totalTokens.output;
526
+ const contextWindowSize = MODEL_CONTEXT_WINDOWS[this.config.model.modelId]
527
+ ?? MODEL_CONTEXT_WINDOWS.default;
528
+ const usagePercent = contextWindowSize > 0
529
+ ? totalTokensUsed / contextWindowSize
530
+ : 0;
531
+ const threshold = this.config.compactionThreshold;
532
+ const warningThreshold = threshold * 0.85; // warn at 85% of compaction threshold
533
+ let level = 'normal';
534
+ if (usagePercent >= threshold) {
535
+ level = 'critical';
536
+ }
537
+ else if (usagePercent >= warningThreshold) {
538
+ level = 'warning';
539
+ }
540
+ const compactionPending = this.state.messages.length >= this.config.maxHistoryMessages
541
+ || usagePercent >= threshold;
542
+ const pct = (usagePercent * 100).toFixed(1);
543
+ let summary = `${pct}% of context budget used (${totalTokensUsed.toLocaleString()}/${contextWindowSize.toLocaleString()} tokens, ${this.state.messages.length} messages)`;
544
+ if (level === 'critical') {
545
+ summary += ' — CRITICAL: compaction recommended immediately';
546
+ }
547
+ else if (level === 'warning') {
548
+ summary += ' — WARNING: approaching compaction threshold';
549
+ }
550
+ return {
551
+ totalTokensUsed,
552
+ contextWindowSize,
553
+ usagePercent,
554
+ level,
555
+ messageCount: this.state.messages.length,
556
+ compactionPending,
557
+ summary,
558
+ };
559
+ }
560
+ /**
561
+ * Process queued messages serially.
562
+ */
563
+ async processQueue() {
564
+ this.processing = true;
565
+ while (this.messageQueue.length > 0) {
566
+ const item = this.messageQueue.shift();
567
+ try {
568
+ // Update current conversationId for tool context.
569
+ // If the incoming message has an explicit conversationId, use it and
570
+ // remember it for future messages. If not, fall back to the last known
571
+ // conversationId so tools (especially [NOTIFY] output) can still route
572
+ // responses correctly for system messages like scheduled checks.
573
+ if (item.conversationId) {
574
+ this.currentConversationId = item.conversationId;
575
+ this.lastKnownConversationId = item.conversationId;
576
+ }
577
+ else {
578
+ this.currentConversationId = this.lastKnownConversationId;
579
+ }
580
+ // Update Slack context from message metadata (Bug 5 fix).
581
+ // When a message arrives via Slack, metadata contains channelId + threadTs
582
+ // so the agent's tools (reply_slack) know where to reply.
583
+ if (item.metadata?.channelId) {
584
+ this.currentSlackContext = {
585
+ channelId: item.metadata.channelId,
586
+ threadTs: item.metadata.threadTs,
587
+ };
588
+ }
589
+ // 2026-05-15 thread isolation: pick the per-conversation
590
+ // state for this message so the LLM sees only this thread's
591
+ // history. Prefer the explicit conversationId; for Slack
592
+ // inbound that has no conversationId yet (rare path), derive
593
+ // it from the channelId+threadTs marker using the same
594
+ // `slack-${channelId}-${threadTs}` shape persistSlackInbound
595
+ // and `/slack/send` use, so chat-v2 channel ids and runner
596
+ // conversation keys stay aligned. Fall back to `__default__`
597
+ // for runtime-internal messages (scheduled checks, system
598
+ // events) that have no thread identity.
599
+ const resolvedConvKey = item.conversationId ??
600
+ (item.metadata?.channelId && item.metadata?.threadTs
601
+ ? synthesizeSlackConversationId(String(item.metadata.channelId), String(item.metadata.threadTs))
602
+ : this.lastKnownConversationId ?? '__default__');
603
+ this.currentConversationKey = resolvedConvKey;
604
+ // Set streaming callbacks for this run
605
+ this.streamingCallbacks = item.options?.streaming ?? {};
606
+ const result = await this.tracing.withSpan(TRACING_CONSTANTS.SPANS.AGENT_RUN, {
607
+ attributes: {
608
+ 'agent.session': this.config.sessionName,
609
+ 'agent.role': this.config.role,
610
+ }
611
+ }, async () => {
612
+ return this.executeRun(item.message, item.options?.abortSignal);
613
+ });
614
+ item.resolve(result);
615
+ }
616
+ catch (error) {
617
+ item.reject(error instanceof Error ? error : new Error(String(error)));
618
+ }
619
+ }
620
+ this.processing = false;
621
+ // Re-check: a message may have been pushed between the while-loop exit
622
+ // condition check and this.processing = false. Without this guard, the
623
+ // queued message would be stranded — nobody restarts processQueue.
624
+ if (this.messageQueue.length > 0) {
625
+ this.processQueue();
626
+ }
627
+ }
628
+ /**
629
+ * Execute a single streamText run with the current conversation context.
630
+ *
631
+ * Uses streamText for real-time token emission and tool call feedback.
632
+ * Falls back to generateText when _generateTextFn is set (testing).
633
+ *
634
+ * @param message - New message to add to the conversation
635
+ * @param externalAbortSignal - Optional external abort signal for cancellation
636
+ * @returns Agent run result
637
+ */
638
+ async executeRun(message, externalAbortSignal) {
639
+ if (!this.model) {
640
+ throw new Error('AgentRunner not initialized. Call initialize() first.');
641
+ }
642
+ // Check if compaction is needed before adding new message
643
+ // Trigger on message count OR token budget threshold
644
+ const budget = this.getContextBudget();
645
+ if (this.state.messages.length >= this.config.maxHistoryMessages || budget.level === 'critical') {
646
+ await this.compactHistory();
647
+ }
648
+ // Add user message to history
649
+ this.state.messages.push({ role: 'user', content: message });
650
+ this.state.lastActivityAt = new Date();
651
+ // Build tools with callbacks for compaction, audit, and security enforcement
652
+ const callbacks = {
653
+ onCompactMemory: () => this.requestCompaction(),
654
+ onGetContextBudget: () => this.getContextBudget(),
655
+ onAuditLog: (entry) => this.recordAudit({ ...entry, sessionName: this.config.sessionName }),
656
+ onCheckApproval: (toolName, sensitivity) => this.checkApproval(toolName, sensitivity),
657
+ onGetAuditLog: (filters) => this.getFilteredAuditLog(filters),
658
+ onEnqueueApproval: (toolName, sensitivity, args) => {
659
+ const approval = this.approvalQueue.enqueue(this.config.sessionName, toolName, sensitivity, args);
660
+ return { approvalId: approval.id };
661
+ },
662
+ };
663
+ const mcpTools = Object.keys(this.mcpToolDefs).length > 0 ? this.mcpToolDefs : undefined;
664
+ const tools = createTools(this.apiClient, this.config.sessionName, this.config.projectPath, callbacks, this.currentConversationId, this.currentSlackContext, mcpTools);
665
+ // Create abort controller that merges external signal with internal control
666
+ const runAbort = new AbortController();
667
+ this.currentRunAbort = runAbort;
668
+ // If external signal is already aborted, abort immediately
669
+ if (externalAbortSignal?.aborted) {
670
+ runAbort.abort();
671
+ }
672
+ else if (externalAbortSignal) {
673
+ externalAbortSignal.addEventListener('abort', () => runAbort.abort(), { once: true });
674
+ }
675
+ try {
676
+ // If a test override is set, use generateText path (backward compatible)
677
+ if (this._generateTextFn) {
678
+ return await this.executeRunWithGenerateText(tools, runAbort.signal);
679
+ }
680
+ // Production path: streamText for real-time feedback
681
+ return await this.executeRunWithStreamText(tools, runAbort.signal);
682
+ }
683
+ finally {
684
+ this.currentRunAbort = null;
685
+ }
686
+ }
687
+ /**
688
+ * Check if an error is recoverable and eligible for automatic retry.
689
+ *
690
+ * Recoverable errors include:
691
+ * - HTTP 429 (rate limit)
692
+ * - HTTP 5xx (server errors)
693
+ * - Network timeouts and connection errors
694
+ *
695
+ * @param error - The error to classify
696
+ * @returns True if the error is recoverable
697
+ */
698
+ isRecoverableError(error) {
699
+ if (!(error instanceof Error))
700
+ return false;
701
+ const msg = error.message.toLowerCase();
702
+ const statusMatch = msg.match(/\b(429|5\d{2})\b/);
703
+ if (statusMatch)
704
+ return true;
705
+ if (msg.includes('rate limit') || msg.includes('too many requests'))
706
+ return true;
707
+ if (msg.includes('timeout') || msg.includes('econnreset') || msg.includes('econnrefused'))
708
+ return true;
709
+ if (msg.includes('network') || msg.includes('fetch failed') || msg.includes('socket hang up'))
710
+ return true;
711
+ if (msg.includes('service unavailable') || msg.includes('internal server error'))
712
+ return true;
713
+ return false;
714
+ }
715
+ /**
716
+ * Check if an error indicates the context length was exceeded.
717
+ *
718
+ * @param error - The error to classify
719
+ * @returns True if the error is a context length exceeded error
720
+ */
721
+ isContextLengthError(error) {
722
+ if (!(error instanceof Error))
723
+ return false;
724
+ const msg = error.message.toLowerCase();
725
+ return msg.includes('context length') || msg.includes('token limit')
726
+ || msg.includes('max_tokens') || msg.includes('context window')
727
+ || msg.includes('too long') || msg.includes('maximum context');
728
+ }
729
+ /**
730
+ * Sleep for a specified duration.
731
+ *
732
+ * @param ms - Milliseconds to sleep
733
+ * @returns Promise that resolves after the delay
734
+ */
735
+ sleep(ms) {
736
+ return new Promise(resolve => setTimeout(resolve, ms));
737
+ }
738
+ /**
739
+ * Execute run using streamText for real-time streaming output.
740
+ * This is the production path — emits events as tokens arrive.
741
+ *
742
+ * Includes automatic retry with exponential backoff for recoverable errors
743
+ * (429, 5xx, network) and progressive context trimming for context length errors.
744
+ */
745
+ async executeRunWithStreamText(tools, abortSignal) {
746
+ const maxRetries = CREWLY_AGENT_DEFAULTS.MAX_RETRIES;
747
+ const baseDelay = CREWLY_AGENT_DEFAULTS.RETRY_BASE_DELAY_MS;
748
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
749
+ try {
750
+ return await this.executeStreamTextAttempt(tools, abortSignal);
751
+ }
752
+ catch (error) {
753
+ // Context length exceeded — try compaction then retry once
754
+ if (this.isContextLengthError(error)) {
755
+ this.streamingCallbacks.onTextChunk?.('[retry] Context length exceeded, compacting history...\n');
756
+ const compactionResult = await this.requestCompaction();
757
+ if (compactionResult.compacted) {
758
+ try {
759
+ return await this.executeStreamTextAttempt(tools, abortSignal);
760
+ }
761
+ catch (retryError) {
762
+ // If still too long, remove earliest non-system messages and try once more
763
+ if (this.isContextLengthError(retryError) && this.state.messages.length > 2) {
764
+ this.streamingCallbacks.onTextChunk?.('[retry] Still too long, trimming oldest messages...\n');
765
+ this.trimOldestNonSystemMessages();
766
+ return await this.executeStreamTextAttempt(tools, abortSignal);
767
+ }
768
+ throw retryError;
769
+ }
770
+ }
771
+ }
772
+ // Recoverable error — retry with backoff
773
+ if (this.isRecoverableError(error) && attempt < maxRetries) {
774
+ const delay = baseDelay * Math.pow(2, attempt);
775
+ this.streamingCallbacks.onTextChunk?.(`[retry] Recoverable error (attempt ${attempt + 1}/${maxRetries}), retrying in ${delay}ms...\n`);
776
+ await this.sleep(delay);
777
+ continue;
778
+ }
779
+ throw error;
780
+ }
781
+ }
782
+ // Unreachable — the loop always returns or throws
783
+ throw new Error('Retry loop exhausted without result');
784
+ }
785
+ /**
786
+ * Single attempt of streamText execution (no retry logic).
787
+ */
788
+ async executeStreamTextAttempt(tools, abortSignal) {
789
+ const toolCalls = [];
790
+ let stepCount = 0;
791
+ const loopDetector = new ToolCallLoopDetector();
792
+ // Local abort controller so we can abort on loop detection
793
+ const loopAbort = new AbortController();
794
+ const mergedSignal = AbortSignal.any([abortSignal, loopAbort.signal]);
795
+ const streamResult = streamText({
796
+ model: this.model,
797
+ system: this.state.systemPrompt,
798
+ messages: this.state.messages,
799
+ tools: tools,
800
+ stopWhen: stepCountIs(this.config.maxSteps),
801
+ temperature: this.config.model.temperature,
802
+ maxOutputTokens: resolveMaxOutputTokens(this.config.model),
803
+ abortSignal: mergedSignal,
804
+ onChunk: ({ chunk }) => {
805
+ // Emit text chunks in real-time
806
+ if (chunk.type === 'text-delta' && chunk.text) {
807
+ this.streamingCallbacks.onTextChunk?.(chunk.text);
808
+ }
809
+ },
810
+ experimental_onToolCallStart: (event) => {
811
+ const tc = event.toolCall;
812
+ const args = tc?.args ?? tc?.input ?? {};
813
+ this.streamingCallbacks.onToolCallStart?.(tc?.toolName ?? 'unknown', (typeof args === 'string' ? JSON.parse(args) : args));
814
+ },
815
+ experimental_onToolCallFinish: (event) => {
816
+ const tc = event.toolCall;
817
+ const args = tc?.args ?? tc?.input ?? {};
818
+ this.streamingCallbacks.onToolCallFinish?.(tc?.toolName ?? 'unknown', (typeof args === 'string' ? JSON.parse(args) : args), event.toolResult, event.durationMs ?? 0);
819
+ },
820
+ onStepFinish: ({ toolCalls: stepToolCalls, toolResults }) => {
821
+ stepCount++;
822
+ const hasTools = (stepToolCalls?.length ?? 0) > 0;
823
+ // Collect tool calls from this step and check for loops
824
+ if (stepToolCalls) {
825
+ for (const tc of stepToolCalls) {
826
+ const args = tc.input ?? {};
827
+ const result = toolResults?.find((tr) => tr.toolCallId === tc.toolCallId)?.output;
828
+ toolCalls.push({ toolName: tc.toolName, args, result });
829
+ loopDetector.recordToolCall(tc.toolName, args, result);
830
+ }
831
+ }
832
+ // Abort if loop detected — will be caught below
833
+ if (loopDetector.loopDetected) {
834
+ console.warn('[AgentRunner] Loop detected, aborting run:', loopDetector.loopReason);
835
+ loopAbort.abort();
836
+ }
837
+ this.streamingCallbacks.onStepFinish?.(stepCount, hasTools);
838
+ },
839
+ });
840
+ // I2 — DeepSeek reasoning buffer leak guard.
841
+ // The DeepSeek custom fetch wrapper accumulates parser handles per HTTP call.
842
+ // If streamText throws (timeout, network) BEFORE the success-path consume runs,
843
+ // those handles never drain and leak across run boundaries. The try/finally
844
+ // guarantees a consume call happens on every exit path. Consume-once
845
+ // semantics in ModelManager make double-call on the success path harmless
846
+ // (second call returns null).
847
+ try {
848
+ // Await the full result (stream completes when all steps are done or aborted)
849
+ let result;
850
+ try {
851
+ result = await streamResult;
852
+ }
853
+ catch (err) {
854
+ // If aborted due to loop detection, handle gracefully
855
+ if (loopDetector.loopDetected) {
856
+ return this.handleLoopDetected(loopDetector, toolCalls, stepCount);
857
+ }
858
+ throw err;
859
+ }
860
+ // Also check post-completion in case the loop threshold was hit on the final step
861
+ if (loopDetector.loopDetected) {
862
+ return this.handleLoopDetected(loopDetector, toolCalls, stepCount);
863
+ }
864
+ // Warn if tool call count is excessive (polling dead-loop protection)
865
+ const maxToolCalls = CREWLY_AGENT_DEFAULTS.MAX_TOOL_CALLS_PER_RESPONSE;
866
+ if (toolCalls.length > maxToolCalls) {
867
+ console.warn('[AgentRunner] Excessive tool calls in single response:', {
868
+ count: toolCalls.length,
869
+ limit: maxToolCalls,
870
+ topTools: toolCalls.slice(0, 5).map(tc => tc.toolName),
871
+ });
872
+ }
873
+ // Add assistant response to history
874
+ let text = await result.text;
875
+ if (text) {
876
+ this.state.messages.push({ role: 'assistant', content: text });
877
+ }
878
+ // Empty response fallback: if model made tool calls but produced no text summary,
879
+ // prompt it once more to generate a summary (prevents silent completions)
880
+ if (!text && toolCalls.length > 0) {
881
+ console.warn('[AgentRunner] Empty text response after tool calls, requesting summary fallback');
882
+ const fallbackResult = await this.requestSummaryFallback();
883
+ if (fallbackResult) {
884
+ text = fallbackResult;
885
+ }
886
+ }
887
+ // Security guardrail: redact any API keys from agent output
888
+ if (text) {
889
+ const scanResult = this.outputFilter.scan(text);
890
+ if (scanResult.detected) {
891
+ console.warn('[AgentRunner] API keys redacted from output:', scanResult.matchedPatterns);
892
+ text = scanResult.redactedText;
893
+ }
894
+ }
895
+ // Update token tracking
896
+ const resultUsage = await result.usage;
897
+ const usage = {
898
+ input: resultUsage?.inputTokens ?? 0,
899
+ output: resultUsage?.outputTokens ?? 0,
900
+ };
901
+ this.state.totalTokens.input += usage.input;
902
+ this.state.totalTokens.output += usage.output;
903
+ // Check budget after token update
904
+ const postBudget = this.getContextBudget();
905
+ const budgetWarning = postBudget.level !== 'normal' ? postBudget.summary : undefined;
906
+ const finishReason = await result.finishReason;
907
+ // P0 Stop Hook: In eval mode, check if required output files were created.
908
+ // If deliverables are missing, inject a corrective message and do one more run.
909
+ // Note: If loop was detected, we already returned early via handleLoopDetected.
910
+ if (this.config.evalMode) {
911
+ const stopHookResult = await this.executeStopHook(toolCalls, tools, abortSignal);
912
+ if (stopHookResult) {
913
+ // Merge tool calls and update text from the follow-up run
914
+ toolCalls.push(...stopHookResult.toolCalls);
915
+ if (stopHookResult.text) {
916
+ text = stopHookResult.text;
917
+ }
918
+ }
919
+ }
920
+ // I2 — DeepSeek-R1 reasoning_content drain.
921
+ // After streamResult is fully drained, pull any reasoning the custom fetch
922
+ // wrapper accumulated for this run. Returns null for non-DeepSeek providers
923
+ // (the wrapper only runs on the DeepSeek provider path) or when no
924
+ // reasoning was produced.
925
+ const reasoning = this.config.model.provider === 'deepseek'
926
+ ? await this.modelManager.consumeDeepseekReasoning()
927
+ : undefined;
928
+ return {
929
+ text,
930
+ steps: stepCount,
931
+ usage,
932
+ toolCalls,
933
+ finishReason,
934
+ budgetWarning,
935
+ reasoning,
936
+ };
937
+ }
938
+ finally {
939
+ // Cleanup-drain — if try block threw before the success-path consume,
940
+ // this prevents the parser handle array from leaking across runs.
941
+ // Safe on success path: consume-once semantics return null on 2nd call.
942
+ if (this.config.model.provider === 'deepseek') {
943
+ try {
944
+ await this.modelManager.consumeDeepseekReasoning();
945
+ }
946
+ catch (e) {
947
+ console.warn('[AgentRunner] DeepSeek reasoning cleanup-drain failed:', e);
948
+ }
949
+ }
950
+ }
951
+ }
952
+ /**
953
+ * Execute run using generateText (batch mode).
954
+ * Used when _generateTextFn is set for testing, or as fallback.
955
+ *
956
+ * Includes automatic retry with exponential backoff for recoverable errors
957
+ * and progressive context trimming for context length errors.
958
+ */
959
+ async executeRunWithGenerateText(tools, abortSignal) {
960
+ const maxRetries = CREWLY_AGENT_DEFAULTS.MAX_RETRIES;
961
+ const baseDelay = CREWLY_AGENT_DEFAULTS.RETRY_BASE_DELAY_MS;
962
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
963
+ try {
964
+ return await this.executeGenerateTextAttempt(tools, abortSignal);
965
+ }
966
+ catch (error) {
967
+ // Context length exceeded — try compaction then retry once
968
+ if (this.isContextLengthError(error)) {
969
+ const compactionResult = await this.requestCompaction();
970
+ if (compactionResult.compacted) {
971
+ try {
972
+ return await this.executeGenerateTextAttempt(tools, abortSignal);
973
+ }
974
+ catch (retryError) {
975
+ if (this.isContextLengthError(retryError) && this.state.messages.length > 2) {
976
+ this.trimOldestNonSystemMessages();
977
+ return await this.executeGenerateTextAttempt(tools, abortSignal);
978
+ }
979
+ throw retryError;
980
+ }
981
+ }
982
+ }
983
+ // Recoverable error — retry with backoff
984
+ if (this.isRecoverableError(error) && attempt < maxRetries) {
985
+ const delay = baseDelay * Math.pow(2, attempt);
986
+ await this.sleep(delay);
987
+ continue;
988
+ }
989
+ throw error;
990
+ }
991
+ }
992
+ throw new Error('Retry loop exhausted without result');
993
+ }
994
+ /**
995
+ * Single attempt of generateText execution (no retry logic).
996
+ */
997
+ async executeGenerateTextAttempt(tools, abortSignal) {
998
+ const generateFn = this._generateTextFn || generateText;
999
+ const result = await generateFn({
1000
+ model: this.model,
1001
+ system: this.state.systemPrompt,
1002
+ messages: this.state.messages,
1003
+ tools,
1004
+ stopWhen: stepCountIs(this.config.maxSteps),
1005
+ temperature: this.config.model.temperature,
1006
+ maxOutputTokens: resolveMaxOutputTokens(this.config.model),
1007
+ abortSignal,
1008
+ });
1009
+ // Track tool calls across all steps with loop detection
1010
+ const toolCalls = [];
1011
+ const loopDetector = new ToolCallLoopDetector();
1012
+ for (const step of result.steps) {
1013
+ if (step.toolCalls) {
1014
+ for (const tc of step.toolCalls) {
1015
+ const args = tc.input ?? {};
1016
+ const tcResult = step.toolResults?.find((tr) => tr.toolCallId === tc.toolCallId)?.output;
1017
+ toolCalls.push({ toolName: tc.toolName, args, result: tcResult });
1018
+ loopDetector.recordToolCall(tc.toolName, args, tcResult);
1019
+ }
1020
+ }
1021
+ }
1022
+ // If loop detected in generateText path, handle gracefully
1023
+ if (loopDetector.loopDetected) {
1024
+ console.warn('[AgentRunner] Loop detected in generateText:', loopDetector.loopReason);
1025
+ return this.handleLoopDetected(loopDetector, toolCalls, result.steps.length);
1026
+ }
1027
+ // Warn if tool call count is excessive
1028
+ const maxToolCalls = CREWLY_AGENT_DEFAULTS.MAX_TOOL_CALLS_PER_RESPONSE;
1029
+ if (toolCalls.length > maxToolCalls) {
1030
+ console.warn('[AgentRunner] Excessive tool calls in single response:', {
1031
+ count: toolCalls.length,
1032
+ limit: maxToolCalls,
1033
+ topTools: toolCalls.slice(0, 5).map(tc => tc.toolName),
1034
+ });
1035
+ }
1036
+ // Add assistant response to history
1037
+ let finalText = result.text;
1038
+ if (finalText) {
1039
+ this.state.messages.push({ role: 'assistant', content: finalText });
1040
+ }
1041
+ // Empty response fallback: if model made tool calls but produced no text summary,
1042
+ // prompt it once more to generate a summary (prevents silent completions)
1043
+ if (!finalText && toolCalls.length > 0) {
1044
+ console.warn('[AgentRunner] Empty text response after tool calls, requesting summary fallback');
1045
+ const fallbackResult = await this.requestSummaryFallback();
1046
+ if (fallbackResult) {
1047
+ finalText = fallbackResult;
1048
+ }
1049
+ }
1050
+ // Security guardrail: redact any API keys from agent output
1051
+ if (finalText) {
1052
+ const scanResult = this.outputFilter.scan(finalText);
1053
+ if (scanResult.detected) {
1054
+ console.warn('[AgentRunner] API keys redacted from output:', scanResult.matchedPatterns);
1055
+ finalText = scanResult.redactedText;
1056
+ }
1057
+ }
1058
+ // Update token tracking
1059
+ const usage = {
1060
+ input: result.usage?.inputTokens ?? 0,
1061
+ output: result.usage?.outputTokens ?? 0,
1062
+ };
1063
+ this.state.totalTokens.input += usage.input;
1064
+ this.state.totalTokens.output += usage.output;
1065
+ // Check budget after token update and attach warning if approaching limits
1066
+ const postBudget = this.getContextBudget();
1067
+ const budgetWarning = postBudget.level !== 'normal' ? postBudget.summary : undefined;
1068
+ // P0 Stop Hook: In eval mode, check if required output files were created.
1069
+ // Note: If loop was detected via loopDetector, we already returned early.
1070
+ if (this.config.evalMode) {
1071
+ const stopHookResult = await this.executeStopHook(toolCalls, tools, abortSignal);
1072
+ if (stopHookResult) {
1073
+ toolCalls.push(...stopHookResult.toolCalls);
1074
+ if (stopHookResult.text) {
1075
+ finalText = stopHookResult.text;
1076
+ }
1077
+ }
1078
+ }
1079
+ // I2 — DeepSeek-R1 reasoning_content drain (generateText path).
1080
+ // Same as the streamText path: pull buffered reasoning the custom fetch
1081
+ // wrapper accumulated. Returns null for non-DeepSeek providers.
1082
+ const reasoning = this.config.model.provider === 'deepseek'
1083
+ ? await this.modelManager.consumeDeepseekReasoning()
1084
+ : undefined;
1085
+ return {
1086
+ text: finalText,
1087
+ steps: result.steps.length,
1088
+ usage,
1089
+ toolCalls,
1090
+ finishReason: result.finishReason,
1091
+ budgetWarning,
1092
+ reasoning,
1093
+ };
1094
+ }
1095
+ /**
1096
+ * Remove the oldest non-system messages to reduce context size.
1097
+ * Preserves the most recent messages and any system-role messages.
1098
+ */
1099
+ trimOldestNonSystemMessages() {
1100
+ // Remove up to 5 of the oldest non-system messages
1101
+ let removed = 0;
1102
+ const maxRemove = 5;
1103
+ this.state.messages = this.state.messages.filter((msg) => {
1104
+ if (removed >= maxRemove)
1105
+ return true;
1106
+ if (msg.role === 'system')
1107
+ return true;
1108
+ removed++;
1109
+ return false;
1110
+ });
1111
+ }
1112
+ /**
1113
+ * Handle a detected tool call loop by injecting a corrective system message
1114
+ * into conversation history and returning a structured result.
1115
+ *
1116
+ * @param detector - The loop detector with reason details
1117
+ * @param toolCalls - Tool calls collected so far
1118
+ * @param steps - Number of steps taken
1119
+ * @returns AgentRunResult with the loop warning as text
1120
+ */
1121
+ handleLoopDetected(detector, toolCalls, steps) {
1122
+ const guidance = `[LOOP DETECTED] ${detector.loopReason}. ` +
1123
+ 'You are repeating the same action without progress. ' +
1124
+ 'STOP and try a different approach: use a different tool, change the arguments, ' +
1125
+ 'skip this step, or ask for help. Do NOT repeat the same call again.';
1126
+ // Inject corrective message so the model sees it on the next run
1127
+ this.state.messages.push({ role: 'assistant', content: `[Loop detected — halting. ${detector.loopReason}]` });
1128
+ this.state.messages.push({ role: 'user', content: guidance });
1129
+ this.streamingCallbacks.onTextChunk?.(`\n⚠️ ${guidance}\n`);
1130
+ return {
1131
+ text: `[Loop detected] ${detector.loopReason}`,
1132
+ steps,
1133
+ usage: { input: 0, output: 0 },
1134
+ toolCalls,
1135
+ finishReason: 'loop-detected',
1136
+ budgetWarning: undefined,
1137
+ };
1138
+ }
1139
+ /**
1140
+ * Execute the Stop Hook: check if the agent produced all required deliverables.
1141
+ *
1142
+ * Scans the original task prompt (first user message) for expected output files,
1143
+ * then checks if write_file/edit_file tool calls created them. If files are
1144
+ * missing, injects a corrective prompt and runs one more generateText call
1145
+ * with tools so the agent can create the missing deliverables.
1146
+ *
1147
+ * Inspired by Claude Code's Stop hook which blocks the agent from finishing
1148
+ * until task requirements are met.
1149
+ *
1150
+ * @param toolCalls - Tool calls made so far
1151
+ * @param tools - Available tools for the follow-up run
1152
+ * @param abortSignal - Abort signal for cancellation
1153
+ * @returns Additional AgentRunResult from the follow-up, or null if no action needed
1154
+ */
1155
+ async executeStopHook(toolCalls, tools, abortSignal) {
1156
+ if (!this.model)
1157
+ return null;
1158
+ // Find the original task prompt (first user message)
1159
+ const firstUserMsg = this.state.messages.find((m) => m.role === 'user');
1160
+ if (!firstUserMsg)
1161
+ return null;
1162
+ const taskPrompt = typeof firstUserMsg.content === 'string'
1163
+ ? firstUserMsg.content
1164
+ : JSON.stringify(firstUserMsg.content);
1165
+ // Extract expected output files from the task prompt
1166
+ const expectedFiles = AgentRunnerService.extractExpectedOutputFiles(taskPrompt);
1167
+ if (expectedFiles.length === 0)
1168
+ return null;
1169
+ // Check which files are missing
1170
+ const missingFiles = AgentRunnerService.checkMissingDeliverables(expectedFiles, toolCalls);
1171
+ if (missingFiles.length === 0)
1172
+ return null;
1173
+ // Inject corrective message
1174
+ const stopMessage = [
1175
+ '[STOP HOOK — Deliverable Check Failed]',
1176
+ '',
1177
+ `The task requires you to create these files: ${expectedFiles.map(f => '`' + f + '`').join(', ')}`,
1178
+ `Missing files: ${missingFiles.map(f => '`' + f + '`').join(', ')}`,
1179
+ '',
1180
+ 'You MUST create these files before finishing. Use write_file to create each missing file now.',
1181
+ 'Do NOT delegate this work. Implement and write the files directly.',
1182
+ ].join('\n');
1183
+ this.state.messages.push({ role: 'user', content: stopMessage });
1184
+ this.streamingCallbacks.onTextChunk?.(`\n⚠️ Stop Hook: Missing deliverables: ${missingFiles.join(', ')}. Running follow-up...\n`);
1185
+ try {
1186
+ // Run one more round with tools to create missing files
1187
+ const followUp = await generateText({
1188
+ model: this.model,
1189
+ system: this.state.systemPrompt,
1190
+ messages: this.state.messages,
1191
+ tools: tools,
1192
+ stopWhen: stepCountIs(20), // Limited steps for follow-up
1193
+ temperature: this.config.model.temperature,
1194
+ maxOutputTokens: resolveMaxOutputTokens(this.config.model),
1195
+ abortSignal,
1196
+ });
1197
+ // Extract results from the generateText response using safe property access
1198
+ const followUpResult = followUp;
1199
+ const steps = followUpResult.steps ?? [];
1200
+ const text = followUpResult.text ?? '';
1201
+ const followUpUsage = followUpResult.usage;
1202
+ const finishReason = followUpResult.finishReason ?? 'stop';
1203
+ const followUpToolCalls = [];
1204
+ for (const step of steps) {
1205
+ if (step.toolCalls) {
1206
+ for (const tc of step.toolCalls) {
1207
+ const args = tc.input ?? {};
1208
+ followUpToolCalls.push({ toolName: tc.toolName, args, result: undefined });
1209
+ }
1210
+ }
1211
+ }
1212
+ if (text) {
1213
+ this.state.messages.push({ role: 'assistant', content: text });
1214
+ }
1215
+ // Track follow-up token usage
1216
+ if (followUpUsage) {
1217
+ this.state.totalTokens.input += followUpUsage.inputTokens ?? 0;
1218
+ this.state.totalTokens.output += followUpUsage.outputTokens ?? 0;
1219
+ }
1220
+ return {
1221
+ text,
1222
+ steps: steps.length,
1223
+ usage: {
1224
+ input: followUpUsage?.inputTokens ?? 0,
1225
+ output: followUpUsage?.outputTokens ?? 0,
1226
+ },
1227
+ toolCalls: followUpToolCalls,
1228
+ finishReason,
1229
+ };
1230
+ }
1231
+ catch (err) {
1232
+ console.warn('[AgentRunner] Stop hook follow-up failed:', err instanceof Error ? err.message : err);
1233
+ return null;
1234
+ }
1235
+ }
1236
+ /**
1237
+ * Request a text summary from the model when the previous response had tool calls
1238
+ * but no text output. Injects a follow-up user message and makes a single
1239
+ * generateText call with no tools to force a text-only response.
1240
+ *
1241
+ * @returns The summary text, or empty string if the fallback also fails
1242
+ */
1243
+ async requestSummaryFallback() {
1244
+ if (!this.model)
1245
+ return '';
1246
+ const prompt = '请用文字总结你刚才完成的工作和发现的结果,然后调用report-status汇报。' +
1247
+ 'Please summarize what you just did, what you found, and any issues encountered. ' +
1248
+ 'Then call report-status to report your status.';
1249
+ this.state.messages.push({ role: 'user', content: prompt });
1250
+ try {
1251
+ const fallback = await generateText({
1252
+ model: this.model,
1253
+ system: this.state.systemPrompt,
1254
+ messages: this.state.messages,
1255
+ maxOutputTokens: resolveMaxOutputTokens(this.config.model),
1256
+ temperature: this.config.model.temperature,
1257
+ });
1258
+ const text = fallback.text || '';
1259
+ if (text) {
1260
+ this.state.messages.push({ role: 'assistant', content: text });
1261
+ this.streamingCallbacks.onTextChunk?.(text);
1262
+ // Track fallback token usage
1263
+ const fallbackUsage = fallback.usage;
1264
+ if (fallbackUsage) {
1265
+ this.state.totalTokens.input += fallbackUsage.inputTokens ?? 0;
1266
+ this.state.totalTokens.output += fallbackUsage.outputTokens ?? 0;
1267
+ }
1268
+ }
1269
+ return text;
1270
+ }
1271
+ catch (err) {
1272
+ console.error('[AgentRunner] Summary fallback failed:', err instanceof Error ? err.message : err);
1273
+ return '';
1274
+ }
1275
+ }
1276
+ /**
1277
+ * Public method for agent-initiated context compaction.
1278
+ * Called by the compact_memory tool to intelligently summarize conversation state.
1279
+ *
1280
+ * Uses the model to generate a structured summary preserving:
1281
+ * - Active tasks and their status
1282
+ * - Key decisions made
1283
+ * - Important findings and blockers
1284
+ * - Current working context
1285
+ *
1286
+ * @returns CompactionResult with before/after stats
1287
+ */
1288
+ async requestCompaction() {
1289
+ if (!this.model || this.state.messages.length < 10) {
1290
+ return {
1291
+ compacted: false,
1292
+ messagesBefore: this.state.messages.length,
1293
+ messagesAfter: this.state.messages.length,
1294
+ reason: this.state.messages.length < 10
1295
+ ? 'Too few messages to compact'
1296
+ : 'Model not initialized',
1297
+ };
1298
+ }
1299
+ return this.compactHistory();
1300
+ }
1301
+ /**
1302
+ * Get the security audit log.
1303
+ *
1304
+ * @param limit - Maximum number of entries to return (most recent first)
1305
+ * @returns Array of audit entries
1306
+ */
1307
+ getAuditLog(limit) {
1308
+ const entries = [...this.auditLog].reverse();
1309
+ return limit ? entries.slice(0, limit) : entries;
1310
+ }
1311
+ /**
1312
+ * Get the current security policy.
1313
+ *
1314
+ * @returns Current security policy configuration
1315
+ */
1316
+ getSecurityPolicy() {
1317
+ return { ...this.securityPolicy };
1318
+ }
1319
+ /**
1320
+ * Update the security policy.
1321
+ *
1322
+ * @param updates - Partial security policy to merge
1323
+ */
1324
+ updateSecurityPolicy(updates) {
1325
+ this.securityPolicy = { ...this.securityPolicy, ...updates };
1326
+ }
1327
+ /**
1328
+ * Get the approval queue service instance.
1329
+ * Used by the approvals controller to manage pending approvals.
1330
+ *
1331
+ * @returns The ApprovalQueueService instance
1332
+ */
1333
+ getApprovalQueue() {
1334
+ return this.approvalQueue;
1335
+ }
1336
+ /**
1337
+ * Record an audit entry for a tool invocation.
1338
+ *
1339
+ * @param entry - Audit entry to record
1340
+ */
1341
+ recordAudit(entry) {
1342
+ if (!this.securityPolicy.auditEnabled)
1343
+ return;
1344
+ this.auditLog.push(entry);
1345
+ // Enforce max entries limit
1346
+ if (this.auditLog.length > this.securityPolicy.maxAuditEntries) {
1347
+ this.auditLog = this.auditLog.slice(-this.securityPolicy.maxAuditEntries);
1348
+ }
1349
+ }
1350
+ /**
1351
+ * Check if a tool is allowed to execute under the current security policy.
1352
+ *
1353
+ * Evaluates the tool against two checks:
1354
+ * 1. blockedTools — tools explicitly blocked by name (returns blocked=true)
1355
+ * 2. requireApproval — tools whose sensitivity requires approval (returns requiresApproval=true)
1356
+ *
1357
+ * @param toolName - Name of the tool being invoked
1358
+ * @param sensitivity - Sensitivity classification of the tool
1359
+ * @returns ApprovalCheckResult indicating if execution is allowed
1360
+ */
1361
+ checkApproval(toolName, sensitivity) {
1362
+ // Check read-only mode — block all write/modify tools
1363
+ if (this.securityPolicy.readOnlyMode && WRITE_TOOLS.includes(toolName)) {
1364
+ return {
1365
+ allowed: false,
1366
+ blocked: true,
1367
+ reason: `Tool '${toolName}' is blocked — read-only audit mode is active`,
1368
+ };
1369
+ }
1370
+ // Check blocked tools
1371
+ if (this.securityPolicy.blockedTools.includes(toolName)) {
1372
+ return {
1373
+ allowed: false,
1374
+ blocked: true,
1375
+ reason: `Tool '${toolName}' is blocked by security policy`,
1376
+ };
1377
+ }
1378
+ // Check approval requirements
1379
+ if (this.securityPolicy.requireApproval.includes(sensitivity)) {
1380
+ return {
1381
+ allowed: false,
1382
+ blocked: false,
1383
+ reason: `Tool '${toolName}' (${sensitivity}) requires approval — approval mode is active for '${sensitivity}' tools`,
1384
+ };
1385
+ }
1386
+ return { allowed: true };
1387
+ }
1388
+ /**
1389
+ * Get filtered audit log entries.
1390
+ *
1391
+ * @param filters - Query filters for limit, sensitivity, and toolName
1392
+ * @returns Filtered audit entries (most recent first)
1393
+ */
1394
+ getFilteredAuditLog(filters) {
1395
+ let entries = [...this.auditLog].reverse();
1396
+ if (filters.sensitivity) {
1397
+ entries = entries.filter(e => e.sensitivity === filters.sensitivity);
1398
+ }
1399
+ if (filters.toolName) {
1400
+ entries = entries.filter(e => e.toolName === filters.toolName);
1401
+ }
1402
+ return entries.slice(0, filters.limit);
1403
+ }
1404
+ /**
1405
+ * Compact conversation history using AI-generated structured summary.
1406
+ *
1407
+ * Keeps the most recent messages and uses the model to generate an
1408
+ * intelligent summary of older messages that preserves critical state:
1409
+ * decisions, active tasks, findings, and working context.
1410
+ *
1411
+ * Falls back to truncation-based summary if AI summarization fails.
1412
+ *
1413
+ * @returns CompactionResult with before/after statistics
1414
+ */
1415
+ async compactHistory() {
1416
+ // Guard against concurrent compaction — if already compacting, skip
1417
+ if (this.compacting) {
1418
+ return {
1419
+ compacted: false,
1420
+ messagesBefore: this.state.messages.length,
1421
+ messagesAfter: this.state.messages.length,
1422
+ reason: 'Compaction already in progress',
1423
+ };
1424
+ }
1425
+ if (!this.model || this.state.messages.length < 10) {
1426
+ return {
1427
+ compacted: false,
1428
+ messagesBefore: this.state.messages.length,
1429
+ messagesAfter: this.state.messages.length,
1430
+ reason: 'History too small to compact',
1431
+ };
1432
+ }
1433
+ this.compacting = true;
1434
+ try {
1435
+ const messagesBefore = this.state.messages.length;
1436
+ // Determine the split point: keep at least 10 recent messages but adjust
1437
+ // to avoid breaking tool_call/tool_result pairs. If the first "recent"
1438
+ // message is a tool result (role === 'tool'), extend keepRecent backwards
1439
+ // to include its paired assistant tool_call message.
1440
+ let keepRecent = Math.min(10, this.state.messages.length - 2);
1441
+ if (keepRecent < 2)
1442
+ keepRecent = 2;
1443
+ // Expand keepRecent if we'd split inside a tool call pair
1444
+ let splitIdx = this.state.messages.length - keepRecent;
1445
+ while (splitIdx > 0 && splitIdx < this.state.messages.length) {
1446
+ const firstKept = this.state.messages[splitIdx];
1447
+ // If the first kept message is a tool result, we must also keep the
1448
+ // preceding assistant message that contained the tool_call
1449
+ if (firstKept.role === 'tool') {
1450
+ splitIdx--;
1451
+ keepRecent++;
1452
+ }
1453
+ else {
1454
+ break;
1455
+ }
1456
+ }
1457
+ const oldMessages = this.state.messages.slice(0, splitIdx);
1458
+ const recentMessages = this.state.messages.slice(splitIdx);
1459
+ // Pre-compaction context flush (#153): extract critical items from old
1460
+ // messages so they can be explicitly included in the AI summary prompt.
1461
+ // This ensures task progress, decisions, technical details, and blockers
1462
+ // survive compaction even if the AI summary would otherwise miss them.
1463
+ const flushService = ContextFlushService.getInstance();
1464
+ const oldText = oldMessages.map(msg => {
1465
+ const content = typeof msg.content === 'string'
1466
+ ? msg.content
1467
+ : JSON.stringify(msg.content);
1468
+ return content;
1469
+ }).join('\n');
1470
+ const extractedItems = flushService.extract(oldText);
1471
+ // Attempt AI-powered summarization
1472
+ let summaryText;
1473
+ try {
1474
+ summaryText = await this.generateAISummary(oldMessages, extractedItems);
1475
+ }
1476
+ catch {
1477
+ // Fallback to truncation-based summary
1478
+ summaryText = this.generateFallbackSummary(oldMessages, extractedItems);
1479
+ }
1480
+ this.state.messages = [
1481
+ { role: 'assistant', content: summaryText },
1482
+ ...recentMessages,
1483
+ ];
1484
+ return {
1485
+ compacted: true,
1486
+ messagesBefore,
1487
+ messagesAfter: this.state.messages.length,
1488
+ };
1489
+ }
1490
+ finally {
1491
+ this.compacting = false;
1492
+ }
1493
+ }
1494
+ /**
1495
+ * Generate an AI-powered structured summary of conversation messages.
1496
+ *
1497
+ * Asks the model to extract and preserve critical state from the
1498
+ * conversation history in a structured format. Pre-extracted critical
1499
+ * items from ContextFlushService are included in the prompt to ensure
1500
+ * they are preserved even if the AI would otherwise miss them.
1501
+ *
1502
+ * @param messages - Messages to summarize
1503
+ * @param extractedItems - Critical items extracted by ContextFlushService
1504
+ * @returns Structured summary string
1505
+ */
1506
+ async generateAISummary(messages, extractedItems = []) {
1507
+ const conversationText = messages.map(msg => {
1508
+ const content = typeof msg.content === 'string'
1509
+ ? msg.content.substring(0, 2000)
1510
+ : JSON.stringify(msg.content).substring(0, 2000);
1511
+ return `[${msg.role}]: ${content}`;
1512
+ }).join('\n');
1513
+ // Build critical items section if any were extracted
1514
+ let criticalItemsSection = '';
1515
+ if (extractedItems.length > 0) {
1516
+ const itemLines = extractedItems.map(item => `- [${item.category}] ${item.content} (confidence: ${item.confidence})`).join('\n');
1517
+ criticalItemsSection = `\n\nIMPORTANT — The following critical items were auto-extracted and MUST appear in your summary:\n${itemLines}\n`;
1518
+ }
1519
+ const summarizationPrompt = `Summarize this conversation history into a structured state snapshot. Preserve ALL of the following if present:
1520
+
1521
+ 1. **Active Tasks**: What tasks are in progress, assigned to whom, their status
1522
+ 2. **Decisions Made**: Key decisions and their rationale
1523
+ 3. **Key Findings**: Important discoveries, patterns, or blockers found
1524
+ 4. **Current Context**: What the agent is currently working on
1525
+ 5. **Pending Items**: Anything awaiting response or follow-up
1526
+ ${criticalItemsSection}
1527
+ Be concise but complete. This summary replaces the original messages.
1528
+
1529
+ Conversation (${messages.length} messages):
1530
+ ${conversationText}`;
1531
+ const generateFn = this._generateTextFn || generateText;
1532
+ const result = await generateFn({
1533
+ model: this.model,
1534
+ messages: [{ role: 'user', content: summarizationPrompt }],
1535
+ maxOutputTokens: 2048,
1536
+ temperature: 0.1,
1537
+ });
1538
+ const summary = result.text || '';
1539
+ if (!summary || summary.length < 20) {
1540
+ throw new Error('AI summary too short, falling back');
1541
+ }
1542
+ return `[Compacted State — ${messages.length} messages summarized]\n\n${summary}`;
1543
+ }
1544
+ /**
1545
+ * Generate a truncation-based fallback summary when AI summarization fails.
1546
+ * Includes pre-extracted critical items so they survive compaction.
1547
+ *
1548
+ * @param messages - Messages to summarize
1549
+ * @param extractedItems - Critical items extracted by ContextFlushService
1550
+ * @returns Simple concatenated summary string
1551
+ */
1552
+ generateFallbackSummary(messages, extractedItems = []) {
1553
+ const summaryParts = [];
1554
+ for (const msg of messages) {
1555
+ const content = typeof msg.content === 'string'
1556
+ ? msg.content.substring(0, 1000)
1557
+ : JSON.stringify(msg.content).substring(0, 1000);
1558
+ summaryParts.push(`[${msg.role}]: ${content}`);
1559
+ }
1560
+ let result = `Previous conversation summary (${messages.length} messages compressed):\n${summaryParts.join('\n')}`;
1561
+ if (extractedItems.length > 0) {
1562
+ const itemLines = extractedItems.map(item => `- [${item.category}] ${item.content}`).join('\n');
1563
+ result += `\n\nExtracted critical context:\n${itemLines}`;
1564
+ }
1565
+ return result;
1566
+ }
1567
+ }
1568
+ //# sourceMappingURL=agent-runner.service.js.map