@namzu/sdk 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. package/CHANGELOG.md +393 -0
  2. package/dist/advisory/executor.d.ts.map +1 -1
  3. package/dist/advisory/executor.js +9 -2
  4. package/dist/advisory/executor.js.map +1 -1
  5. package/dist/advisory/executor.test.d.ts +2 -1
  6. package/dist/advisory/executor.test.d.ts.map +1 -1
  7. package/dist/advisory/executor.test.js +7 -4
  8. package/dist/advisory/executor.test.js.map +1 -1
  9. package/dist/agents/ReactiveAgent.d.ts.map +1 -1
  10. package/dist/agents/ReactiveAgent.js +2 -0
  11. package/dist/agents/ReactiveAgent.js.map +1 -1
  12. package/dist/agents/SupervisorAgent.d.ts.map +1 -1
  13. package/dist/agents/SupervisorAgent.js +13 -0
  14. package/dist/agents/SupervisorAgent.js.map +1 -1
  15. package/dist/bridge/sse/mapper.test.js +2 -2
  16. package/dist/constants/compaction/index.d.ts.map +1 -1
  17. package/dist/constants/compaction/index.js +8 -3
  18. package/dist/constants/compaction/index.js.map +1 -1
  19. package/dist/constants/sandbox/index.d.ts +21 -0
  20. package/dist/constants/sandbox/index.d.ts.map +1 -1
  21. package/dist/constants/sandbox/index.js +30 -0
  22. package/dist/constants/sandbox/index.js.map +1 -1
  23. package/dist/constants/tools/index.d.ts.map +1 -1
  24. package/dist/constants/tools/index.js +33 -2
  25. package/dist/constants/tools/index.js.map +1 -1
  26. package/dist/manager/run/persistence.d.ts.map +1 -1
  27. package/dist/manager/run/persistence.js +35 -5
  28. package/dist/manager/run/persistence.js.map +1 -1
  29. package/dist/persona/assembler.d.ts +1 -0
  30. package/dist/persona/assembler.d.ts.map +1 -1
  31. package/dist/persona/assembler.js +28 -6
  32. package/dist/persona/assembler.js.map +1 -1
  33. package/dist/provider/collect.test.js +2 -2
  34. package/dist/public-runtime.d.ts +5 -4
  35. package/dist/public-runtime.d.ts.map +1 -1
  36. package/dist/public-runtime.js +5 -4
  37. package/dist/public-runtime.js.map +1 -1
  38. package/dist/public-tools.d.ts +2 -0
  39. package/dist/public-tools.d.ts.map +1 -1
  40. package/dist/public-tools.js +2 -0
  41. package/dist/public-tools.js.map +1 -1
  42. package/dist/public-types.d.ts +3 -0
  43. package/dist/public-types.d.ts.map +1 -1
  44. package/dist/registry/index.d.ts +2 -0
  45. package/dist/registry/index.d.ts.map +1 -1
  46. package/dist/registry/index.js +1 -0
  47. package/dist/registry/index.js.map +1 -1
  48. package/dist/registry/tool/execute.d.ts.map +1 -1
  49. package/dist/registry/tool/execute.js +87 -5
  50. package/dist/registry/tool/execute.js.map +1 -1
  51. package/dist/registry/tool/execute.test.d.ts +4 -2
  52. package/dist/registry/tool/execute.test.d.ts.map +1 -1
  53. package/dist/registry/tool/execute.test.js +112 -3
  54. package/dist/registry/tool/execute.test.js.map +1 -1
  55. package/dist/registry/toolset/catalog.d.ts +42 -0
  56. package/dist/registry/toolset/catalog.d.ts.map +1 -0
  57. package/dist/registry/toolset/catalog.js +217 -0
  58. package/dist/registry/toolset/catalog.js.map +1 -0
  59. package/dist/registry/toolset/catalog.test.d.ts +2 -0
  60. package/dist/registry/toolset/catalog.test.d.ts.map +1 -0
  61. package/dist/registry/toolset/catalog.test.js +85 -0
  62. package/dist/registry/toolset/catalog.test.js.map +1 -0
  63. package/dist/runtime/query/__tests__/deferred-tools.test.d.ts +2 -0
  64. package/dist/runtime/query/__tests__/deferred-tools.test.d.ts.map +1 -0
  65. package/dist/runtime/query/__tests__/deferred-tools.test.js +147 -0
  66. package/dist/runtime/query/__tests__/deferred-tools.test.js.map +1 -0
  67. package/dist/runtime/query/__tests__/executor-concurrency.test.d.ts +2 -0
  68. package/dist/runtime/query/__tests__/executor-concurrency.test.d.ts.map +1 -0
  69. package/dist/runtime/query/__tests__/executor-concurrency.test.js +98 -0
  70. package/dist/runtime/query/__tests__/executor-concurrency.test.js.map +1 -0
  71. package/dist/runtime/query/__tests__/executor-plugin-hooks.test.js +38 -3
  72. package/dist/runtime/query/__tests__/executor-plugin-hooks.test.js.map +1 -1
  73. package/dist/runtime/query/__tests__/prompt.test.js +47 -2
  74. package/dist/runtime/query/__tests__/prompt.test.js.map +1 -1
  75. package/dist/runtime/query/__tests__/stream-recovery.test.d.ts +2 -0
  76. package/dist/runtime/query/__tests__/stream-recovery.test.d.ts.map +1 -0
  77. package/dist/runtime/query/__tests__/stream-recovery.test.js +126 -0
  78. package/dist/runtime/query/__tests__/stream-recovery.test.js.map +1 -0
  79. package/dist/runtime/query/continuation.d.ts +16 -0
  80. package/dist/runtime/query/continuation.d.ts.map +1 -0
  81. package/dist/runtime/query/continuation.js +16 -0
  82. package/dist/runtime/query/continuation.js.map +1 -0
  83. package/dist/runtime/query/executor.d.ts +3 -0
  84. package/dist/runtime/query/executor.d.ts.map +1 -1
  85. package/dist/runtime/query/executor.js +71 -3
  86. package/dist/runtime/query/executor.js.map +1 -1
  87. package/dist/runtime/query/index.d.ts.map +1 -1
  88. package/dist/runtime/query/index.js +19 -3
  89. package/dist/runtime/query/index.js.map +1 -1
  90. package/dist/runtime/query/iteration/index.d.ts +22 -0
  91. package/dist/runtime/query/iteration/index.d.ts.map +1 -1
  92. package/dist/runtime/query/iteration/index.js +227 -60
  93. package/dist/runtime/query/iteration/index.js.map +1 -1
  94. package/dist/runtime/query/iteration/phases/context.d.ts +10 -0
  95. package/dist/runtime/query/iteration/phases/context.d.ts.map +1 -1
  96. package/dist/runtime/query/iteration/phases/context.js.map +1 -1
  97. package/dist/runtime/query/prompt.d.ts.map +1 -1
  98. package/dist/runtime/query/prompt.js +21 -1
  99. package/dist/runtime/query/prompt.js.map +1 -1
  100. package/dist/runtime/query/tooling.d.ts +1 -0
  101. package/dist/runtime/query/tooling.d.ts.map +1 -1
  102. package/dist/runtime/query/tooling.js +1 -0
  103. package/dist/runtime/query/tooling.js.map +1 -1
  104. package/dist/sandbox/provider/local.d.ts.map +1 -1
  105. package/dist/sandbox/provider/local.js +32 -1
  106. package/dist/sandbox/provider/local.js.map +1 -1
  107. package/dist/session/workspace/__tests__/shared-run.test.d.ts +2 -0
  108. package/dist/session/workspace/__tests__/shared-run.test.d.ts.map +1 -0
  109. package/dist/session/workspace/__tests__/shared-run.test.js +147 -0
  110. package/dist/session/workspace/__tests__/shared-run.test.js.map +1 -0
  111. package/dist/session/workspace/index.d.ts +2 -0
  112. package/dist/session/workspace/index.d.ts.map +1 -1
  113. package/dist/session/workspace/index.js +1 -0
  114. package/dist/session/workspace/index.js.map +1 -1
  115. package/dist/session/workspace/shared-run.d.ts +81 -0
  116. package/dist/session/workspace/shared-run.d.ts.map +1 -0
  117. package/dist/session/workspace/shared-run.js +251 -0
  118. package/dist/session/workspace/shared-run.js.map +1 -0
  119. package/dist/skills/loader.d.ts.map +1 -1
  120. package/dist/skills/loader.js +36 -6
  121. package/dist/skills/loader.js.map +1 -1
  122. package/dist/skills/loader.test.d.ts +2 -0
  123. package/dist/skills/loader.test.d.ts.map +1 -0
  124. package/dist/skills/loader.test.js +65 -0
  125. package/dist/skills/loader.test.js.map +1 -0
  126. package/dist/streaming/coalesce.test.js +1 -1
  127. package/dist/tools/builtins/__tests__/edit.test.d.ts +2 -0
  128. package/dist/tools/builtins/__tests__/edit.test.d.ts.map +1 -0
  129. package/dist/tools/builtins/__tests__/edit.test.js +38 -0
  130. package/dist/tools/builtins/__tests__/edit.test.js.map +1 -0
  131. package/dist/tools/builtins/__tests__/payload-budget.test.d.ts +2 -0
  132. package/dist/tools/builtins/__tests__/payload-budget.test.d.ts.map +1 -0
  133. package/dist/tools/builtins/__tests__/payload-budget.test.js +22 -0
  134. package/dist/tools/builtins/__tests__/payload-budget.test.js.map +1 -0
  135. package/dist/tools/builtins/__tests__/read-file.test.d.ts +2 -0
  136. package/dist/tools/builtins/__tests__/read-file.test.d.ts.map +1 -0
  137. package/dist/tools/builtins/__tests__/read-file.test.js +24 -0
  138. package/dist/tools/builtins/__tests__/read-file.test.js.map +1 -0
  139. package/dist/tools/builtins/__tests__/verify-outputs.test.d.ts +2 -0
  140. package/dist/tools/builtins/__tests__/verify-outputs.test.d.ts.map +1 -0
  141. package/dist/tools/builtins/__tests__/verify-outputs.test.js +52 -0
  142. package/dist/tools/builtins/__tests__/verify-outputs.test.js.map +1 -0
  143. package/dist/tools/builtins/__tests__/write-file.test.d.ts +2 -0
  144. package/dist/tools/builtins/__tests__/write-file.test.d.ts.map +1 -0
  145. package/dist/tools/builtins/__tests__/write-file.test.js +74 -0
  146. package/dist/tools/builtins/__tests__/write-file.test.js.map +1 -0
  147. package/dist/tools/builtins/bash.d.ts.map +1 -1
  148. package/dist/tools/builtins/bash.js +40 -7
  149. package/dist/tools/builtins/bash.js.map +1 -1
  150. package/dist/tools/builtins/edit.d.ts +5 -2
  151. package/dist/tools/builtins/edit.d.ts.map +1 -1
  152. package/dist/tools/builtins/edit.js +114 -18
  153. package/dist/tools/builtins/edit.js.map +1 -1
  154. package/dist/tools/builtins/index.d.ts +1 -0
  155. package/dist/tools/builtins/index.d.ts.map +1 -1
  156. package/dist/tools/builtins/index.js +13 -13
  157. package/dist/tools/builtins/index.js.map +1 -1
  158. package/dist/tools/builtins/read-file.d.ts +1 -0
  159. package/dist/tools/builtins/read-file.d.ts.map +1 -1
  160. package/dist/tools/builtins/read-file.js +23 -8
  161. package/dist/tools/builtins/read-file.js.map +1 -1
  162. package/dist/tools/builtins/search-tools.d.ts.map +1 -1
  163. package/dist/tools/builtins/search-tools.js +4 -1
  164. package/dist/tools/builtins/search-tools.js.map +1 -1
  165. package/dist/tools/builtins/verify-outputs.d.ts +5 -0
  166. package/dist/tools/builtins/verify-outputs.d.ts.map +1 -0
  167. package/dist/tools/builtins/verify-outputs.js +103 -0
  168. package/dist/tools/builtins/verify-outputs.js.map +1 -0
  169. package/dist/tools/builtins/write-file.d.ts +3 -2
  170. package/dist/tools/builtins/write-file.d.ts.map +1 -1
  171. package/dist/tools/builtins/write-file.js +72 -12
  172. package/dist/tools/builtins/write-file.js.map +1 -1
  173. package/dist/tools/coordinator/__tests__/agent.test.d.ts +15 -0
  174. package/dist/tools/coordinator/__tests__/agent.test.d.ts.map +1 -0
  175. package/dist/tools/coordinator/__tests__/agent.test.js +142 -0
  176. package/dist/tools/coordinator/__tests__/agent.test.js.map +1 -0
  177. package/dist/tools/coordinator/__tests__/task-list.test.d.ts +13 -0
  178. package/dist/tools/coordinator/__tests__/task-list.test.d.ts.map +1 -0
  179. package/dist/tools/coordinator/__tests__/task-list.test.js +162 -0
  180. package/dist/tools/coordinator/__tests__/task-list.test.js.map +1 -0
  181. package/dist/tools/coordinator/agent.d.ts +34 -0
  182. package/dist/tools/coordinator/agent.d.ts.map +1 -0
  183. package/dist/tools/coordinator/agent.js +107 -0
  184. package/dist/tools/coordinator/agent.js.map +1 -0
  185. package/dist/tools/coordinator/index.d.ts +7 -0
  186. package/dist/tools/coordinator/index.d.ts.map +1 -1
  187. package/dist/tools/coordinator/index.js +111 -21
  188. package/dist/tools/coordinator/index.js.map +1 -1
  189. package/dist/types/agent/base.d.ts +8 -0
  190. package/dist/types/agent/base.d.ts.map +1 -1
  191. package/dist/types/agent/reactive.d.ts +23 -0
  192. package/dist/types/agent/reactive.d.ts.map +1 -1
  193. package/dist/types/agent/supervisor.d.ts +41 -0
  194. package/dist/types/agent/supervisor.d.ts.map +1 -1
  195. package/dist/types/message/index.d.ts +22 -1
  196. package/dist/types/message/index.d.ts.map +1 -1
  197. package/dist/types/message/index.js +7 -2
  198. package/dist/types/message/index.js.map +1 -1
  199. package/dist/types/provider/chat.d.ts +2 -9
  200. package/dist/types/provider/chat.d.ts.map +1 -1
  201. package/dist/types/run/events.d.ts +6 -0
  202. package/dist/types/run/events.d.ts.map +1 -1
  203. package/dist/types/run/events.js.map +1 -1
  204. package/dist/types/sandbox/index.d.ts +193 -0
  205. package/dist/types/sandbox/index.d.ts.map +1 -1
  206. package/dist/types/sandbox/index.js.map +1 -1
  207. package/dist/types/skills/index.d.ts +2 -0
  208. package/dist/types/skills/index.d.ts.map +1 -1
  209. package/dist/types/tool/index.d.ts +22 -0
  210. package/dist/types/tool/index.d.ts.map +1 -1
  211. package/dist/types/toolset/index.d.ts +71 -0
  212. package/dist/types/toolset/index.d.ts.map +1 -0
  213. package/dist/types/toolset/index.js +2 -0
  214. package/dist/types/toolset/index.js.map +1 -0
  215. package/dist/types/workspace/index.d.ts +1 -0
  216. package/dist/types/workspace/index.d.ts.map +1 -1
  217. package/dist/types/workspace/shared-run.d.ts +61 -0
  218. package/dist/types/workspace/shared-run.d.ts.map +1 -0
  219. package/dist/types/workspace/shared-run.js +2 -0
  220. package/dist/types/workspace/shared-run.js.map +1 -0
  221. package/dist/verification/index.d.ts +1 -0
  222. package/dist/verification/index.d.ts.map +1 -1
  223. package/dist/verification/index.js +1 -0
  224. package/dist/verification/index.js.map +1 -1
  225. package/dist/verification/presets.d.ts +53 -0
  226. package/dist/verification/presets.d.ts.map +1 -0
  227. package/dist/verification/presets.js +70 -0
  228. package/dist/verification/presets.js.map +1 -0
  229. package/dist/verification/presets.test.d.ts +16 -0
  230. package/dist/verification/presets.test.d.ts.map +1 -0
  231. package/dist/verification/presets.test.js +79 -0
  232. package/dist/verification/presets.test.js.map +1 -0
  233. package/package.json +3 -2
  234. package/src/advisory/executor.test.ts +7 -4
  235. package/src/advisory/executor.ts +11 -2
  236. package/src/agents/ReactiveAgent.ts +2 -0
  237. package/src/agents/SupervisorAgent.ts +13 -0
  238. package/src/bridge/sse/mapper.test.ts +2 -2
  239. package/src/constants/compaction/index.ts +8 -3
  240. package/src/constants/sandbox/index.ts +37 -0
  241. package/src/constants/tools/index.ts +33 -2
  242. package/src/manager/run/persistence.ts +34 -6
  243. package/src/persona/assembler.ts +31 -8
  244. package/src/provider/collect.test.ts +2 -2
  245. package/src/public-runtime.ts +14 -1
  246. package/src/public-tools.ts +2 -0
  247. package/src/public-types.ts +7 -0
  248. package/src/registry/index.ts +7 -0
  249. package/src/registry/tool/execute.test.ts +132 -3
  250. package/src/registry/tool/execute.ts +94 -9
  251. package/src/registry/toolset/catalog.test.ts +97 -0
  252. package/src/registry/toolset/catalog.ts +283 -0
  253. package/src/runtime/query/__tests__/deferred-tools.test.ts +183 -0
  254. package/src/runtime/query/__tests__/executor-concurrency.test.ts +122 -0
  255. package/src/runtime/query/__tests__/executor-plugin-hooks.test.ts +48 -3
  256. package/src/runtime/query/__tests__/prompt.test.ts +51 -2
  257. package/src/runtime/query/__tests__/stream-recovery.test.ts +156 -0
  258. package/src/runtime/query/continuation.ts +16 -0
  259. package/src/runtime/query/executor.ts +82 -13
  260. package/src/runtime/query/index.ts +24 -3
  261. package/src/runtime/query/iteration/index.ts +263 -68
  262. package/src/runtime/query/iteration/phases/context.ts +10 -0
  263. package/src/runtime/query/prompt.ts +17 -1
  264. package/src/runtime/query/tooling.ts +2 -0
  265. package/src/sandbox/provider/local.ts +33 -0
  266. package/src/session/workspace/__tests__/shared-run.test.ts +181 -0
  267. package/src/session/workspace/index.ts +6 -0
  268. package/src/session/workspace/shared-run.ts +316 -0
  269. package/src/skills/loader.test.ts +89 -0
  270. package/src/skills/loader.ts +37 -6
  271. package/src/streaming/coalesce.test.ts +1 -1
  272. package/src/tools/builtins/__tests__/edit.test.ts +57 -0
  273. package/src/tools/builtins/__tests__/payload-budget.test.ts +29 -0
  274. package/src/tools/builtins/__tests__/read-file.test.ts +31 -0
  275. package/src/tools/builtins/__tests__/verify-outputs.test.ts +71 -0
  276. package/src/tools/builtins/__tests__/write-file.test.ts +97 -0
  277. package/src/tools/builtins/bash.ts +48 -7
  278. package/src/tools/builtins/edit.ts +162 -27
  279. package/src/tools/builtins/index.ts +13 -13
  280. package/src/tools/builtins/read-file.ts +31 -8
  281. package/src/tools/builtins/search-tools.ts +5 -1
  282. package/src/tools/builtins/verify-outputs.ts +126 -0
  283. package/src/tools/builtins/write-file.ts +83 -14
  284. package/src/tools/coordinator/__tests__/agent.test.ts +172 -0
  285. package/src/tools/coordinator/__tests__/task-list.test.ts +182 -0
  286. package/src/tools/coordinator/agent.ts +157 -0
  287. package/src/tools/coordinator/index.ts +128 -22
  288. package/src/types/agent/base.ts +8 -0
  289. package/src/types/agent/reactive.ts +25 -0
  290. package/src/types/agent/supervisor.ts +45 -0
  291. package/src/types/message/index.ts +32 -2
  292. package/src/types/provider/chat.ts +2 -9
  293. package/src/types/run/events.ts +6 -0
  294. package/src/types/sandbox/index.ts +219 -0
  295. package/src/types/skills/index.ts +4 -0
  296. package/src/types/tool/index.ts +24 -0
  297. package/src/types/toolset/index.ts +86 -0
  298. package/src/types/workspace/index.ts +9 -0
  299. package/src/types/workspace/shared-run.ts +65 -0
  300. package/src/verification/index.ts +1 -0
  301. package/src/verification/presets.test.ts +112 -0
  302. package/src/verification/presets.ts +72 -0
@@ -24,6 +24,7 @@ import { toErrorMessage } from '../../../utils/error.js'
24
24
  import { generateMessageId } from '../../../utils/id.js'
25
25
  import type { Logger } from '../../../utils/logger.js'
26
26
  import type { CheckpointManager } from '../checkpoint.js'
27
+ import { AUTO_CONTINUATION_USER_MESSAGE } from '../continuation.js'
27
28
  import type { EmitEvent } from '../events.js'
28
29
  import type { ToolExecutor } from '../executor.js'
29
30
  import type { GuardCoordinator } from '../guard.js'
@@ -58,6 +59,35 @@ export interface IterationConfig {
58
59
  pluginManager?: import('../../../plugin/lifecycle.js').PluginLifecycleManager
59
60
  }
60
61
 
62
+ /**
63
+ * Escape the five XML metacharacters so an interpolated value cannot
64
+ * break out of a tag. Used for the simple identifier fields in the
65
+ * `<task-notification>` envelope (taskId, agentId, status) — values
66
+ * here are controlled enums / opaque ids in practice, but escaping
67
+ * keeps the envelope robust against any future producer that lets a
68
+ * `<` or `&` leak in.
69
+ */
70
+ function xmlEscape(value: string): string {
71
+ return value
72
+ .replace(/&/g, '&amp;')
73
+ .replace(/</g, '&lt;')
74
+ .replace(/>/g, '&gt;')
75
+ .replace(/"/g, '&quot;')
76
+ .replace(/'/g, '&apos;')
77
+ }
78
+
79
+ /**
80
+ * Wrap free-form worker output in a CDATA section. CDATA preserves
81
+ * the raw text — code, markdown angle brackets, ampersands — so the
82
+ * supervisor sees what the worker actually produced instead of an
83
+ * escape-encoded approximation. The only termination CDATA forbids
84
+ * is the literal `]]>` sequence; we split-and-rejoin around it to
85
+ * keep the section well-formed regardless of payload.
86
+ */
87
+ function cdataWrap(value: string): string {
88
+ return `<![CDATA[${value.replace(/]]>/g, ']]]]><![CDATA[>')}]]>`
89
+ }
90
+
61
91
  /**
62
92
  * Map a provider's coarse `finishReason` plus the orchestrator's
63
93
  * `forceFinalize` flag onto the per-message {@link MessageStopReason}
@@ -104,8 +134,11 @@ interface StreamingTurnResult {
104
134
  * finally-style fall-through path with `stopReason: 'refusal'`.
105
135
  * - `tool_input_delta` with no `toolUseId` registered yet: we drop
106
136
  * the fragment and log a warning (proxies seen to misorder events).
107
- * - `chunk.error`: we surface as a thrown error after emitting the
108
- * message_completed terminator so consumer cards still close.
137
+ * - `chunk.error`: when no tool input is recoverable, we surface as
138
+ * a thrown error after emitting the message_completed terminator so
139
+ * consumer cards still close. If a tool-use block was already open,
140
+ * we instead synthesize a tool call with runtime truncation metadata
141
+ * so the executor can return a model-readable retry hint.
109
142
  */
110
143
  async function* streamProviderTurn(
111
144
  provider: LLMProvider,
@@ -134,7 +167,26 @@ async function* streamProviderTurn(
134
167
  }
135
168
  const toolBuckets = new Map<
136
169
  number,
137
- { id: string; name: string; argsBuf: string; started: boolean; completed: boolean }
170
+ {
171
+ id: string
172
+ name: string
173
+ argsBuf: string
174
+ started: boolean
175
+ completed: boolean
176
+ /**
177
+ * Parsed input. `null` while the bucket is still streaming.
178
+ * The synthesized
179
+ * `ChatCompletionResponse.toolCalls[].function.arguments` is
180
+ * derived from this — never from the raw buffer — so the
181
+ * downstream executor (`runtime/query/executor.ts`) never has
182
+ * to re-parse a truncated string. A truncated tool call is
183
+ * surfaced as `arguments: "{}"` plus `metadata.inputTruncated`
184
+ * so tool args remain clean while the executor can still
185
+ * return a specific retry hint.
186
+ */
187
+ parsed: unknown | null
188
+ inputTruncated: boolean
189
+ }
138
190
  >()
139
191
  let streamError: string | undefined
140
192
 
@@ -169,6 +221,8 @@ async function* streamProviderTurn(
169
221
  argsBuf: '',
170
222
  started: false,
171
223
  completed: false,
224
+ parsed: null,
225
+ inputTruncated: false,
172
226
  }
173
227
  toolBuckets.set(tc.index, bucket)
174
228
  }
@@ -218,17 +272,20 @@ async function* streamProviderTurn(
218
272
  try {
219
273
  parsed = bucket.argsBuf ? JSON.parse(bucket.argsBuf) : {}
220
274
  } catch (err) {
275
+ bucket.inputTruncated = true
221
276
  log.warn('tool input JSON parse failed at content_block_stop', {
222
277
  runId,
223
278
  toolUseId: endId,
224
279
  error: err instanceof Error ? err.message : String(err),
225
280
  })
226
281
  }
282
+ bucket.parsed = parsed
227
283
  await emitEvent({
228
284
  type: 'tool_input_completed',
229
285
  runId,
230
286
  toolUseId: endId as ToolUseId,
231
287
  input: parsed,
288
+ ...(bucket.inputTruncated ? { inputTruncated: true } : {}),
232
289
  })
233
290
  yield* drainPending()
234
291
  }
@@ -242,29 +299,108 @@ async function* streamProviderTurn(
242
299
  }
243
300
 
244
301
  // Flush any tool buckets the provider failed to close (no toolCallEnd
245
- // arrived — defensive against providers that don't yet emit it).
302
+ // arrived — defensive against providers that don't yet emit it, and
303
+ // the load-bearing path when the provider stream ends with
304
+ // `stop_reason: "max_tokens"` mid-`input_json_delta`. In that case
305
+ // Anthropic's SSE never sends `content_block_stop` for the open
306
+ // tool_use block: the upstream model ran out of completion tokens
307
+ // before it could close the JSON literal, so the buffered
308
+ // `argsBuf` ends with something like `"content":"…some prefix` —
309
+ // not parseable.
310
+ //
311
+ // Two cases coalesce here:
312
+ // 1. The buffer parses cleanly (the provider just forgot to emit
313
+ // `content_block_stop` but the args are intact) — keep parsed.
314
+ // 2. The buffer is truncated mid-literal — `parsed = {}` is the
315
+ // safe fallback so the executor's `JSON.parse(arguments)`
316
+ // succeeds and downstream consumers don't crash. The PRICE
317
+ // we used to pay was the model getting back a generic
318
+ // "<field> is required" Zod error and not realising its
319
+ // previous tool call was truncated server-side, so it would
320
+ // retry with the SAME long input and hit the same cutoff in
321
+ // a loop. Detect the truncation case and mark the tool call
322
+ // with runtime metadata; the executor surfaces a specific
323
+ // "your tool call was cut off by max_tokens — retry with
324
+ // shorter input or split into smaller calls" message that the
325
+ // model can act on.
246
326
  for (const bucket of toolBuckets.values()) {
247
327
  if (bucket.started && !bucket.completed) {
248
328
  bucket.completed = true
249
329
  let parsed: unknown = {}
250
- try {
251
- parsed = bucket.argsBuf ? JSON.parse(bucket.argsBuf) : {}
252
- } catch {
253
- // leave parsed = {}
330
+ let truncated = false
331
+ if (bucket.argsBuf) {
332
+ try {
333
+ parsed = JSON.parse(bucket.argsBuf)
334
+ } catch {
335
+ // argsBuf had content but didn't parse — almost
336
+ // certainly the max_tokens-mid-literal cutoff. Mark
337
+ // the bucket so the executor can return a model-
338
+ // readable hint instead of a generic Zod error.
339
+ truncated = true
340
+ parsed = {}
341
+ }
342
+ }
343
+ bucket.parsed = parsed
344
+ bucket.inputTruncated = truncated
345
+ if (truncated) {
346
+ log.warn('tool input truncated by upstream cutoff (no toolCallEnd, argsBuf unparsable)', {
347
+ runId,
348
+ toolUseId: bucket.id,
349
+ toolName: bucket.name,
350
+ bufferLength: bucket.argsBuf.length,
351
+ })
254
352
  }
255
353
  await emitEvent({
256
354
  type: 'tool_input_completed',
257
355
  runId,
258
356
  toolUseId: bucket.id as ToolUseId,
259
357
  input: parsed,
358
+ ...(truncated ? { inputTruncated: true } : {}),
260
359
  })
261
360
  yield* drainPending()
262
361
  }
263
362
  }
264
363
 
364
+ // `arguments` MUST be valid JSON for the executor's `JSON.parse`
365
+ // (`runtime/query/executor.ts:executeSingle`) to succeed. We
366
+ // always serialise from the bucket's `parsed` object (filled by
367
+ // either the `toolCallEnd` branch above or the post-stream flush
368
+ // loop) instead of re-emitting `argsBuf`. When the provider
369
+ // stream truncated mid-input, `metadata.inputTruncated` carries that
370
+ // state; the executor parses cleanly and returns a specific
371
+ // model-readable retry hint instead of the generic "Invalid JSON in
372
+ // tool arguments" intercept.
373
+ const toolCalls = [...toolBuckets.entries()]
374
+ .sort(([a], [b]) => a - b)
375
+ .map(([, b]) => ({
376
+ id: b.id,
377
+ type: 'function' as const,
378
+ function: {
379
+ name: b.name,
380
+ arguments: JSON.stringify(b.parsed ?? {}),
381
+ },
382
+ ...(b.inputTruncated ? { metadata: { inputTruncated: true } } : {}),
383
+ }))
384
+
385
+ const recoveredToolInputFromStreamError =
386
+ streamError !== undefined && toolCalls.some((tc) => tc.id && tc.function.name)
387
+ const effectiveFinishReason: ChatCompletionResponse['finishReason'] =
388
+ recoveredToolInputFromStreamError ? 'tool_calls' : finishReason
389
+
390
+ if (recoveredToolInputFromStreamError) {
391
+ log.warn('provider stream failed after tool input; surfacing tool call to executor', {
392
+ runId,
393
+ iteration,
394
+ error: streamError,
395
+ toolCallCount: toolCalls.length,
396
+ })
397
+ }
398
+
265
399
  const stopReason: MessageStopReason = streamError
266
- ? 'refusal'
267
- : synthesizeMessageStopReason(finishReason, forceFinalize)
400
+ ? recoveredToolInputFromStreamError
401
+ ? 'tool_use'
402
+ : 'refusal'
403
+ : synthesizeMessageStopReason(effectiveFinishReason, forceFinalize)
268
404
 
269
405
  await emitEvent({
270
406
  type: 'message_completed',
@@ -277,18 +413,10 @@ async function* streamProviderTurn(
277
413
  })
278
414
  yield* drainPending()
279
415
 
280
- if (streamError) {
416
+ if (streamError && !recoveredToolInputFromStreamError) {
281
417
  throw new Error(`Provider stream error: ${streamError}`)
282
418
  }
283
419
 
284
- const toolCalls = [...toolBuckets.entries()]
285
- .sort(([a], [b]) => a - b)
286
- .map(([, b]) => ({
287
- id: b.id,
288
- type: 'function' as const,
289
- function: { name: b.name, arguments: b.argsBuf },
290
- }))
291
-
292
420
  const response: ChatCompletionResponse = {
293
421
  id: id || messageId,
294
422
  model: model || params.model,
@@ -297,7 +425,7 @@ async function* streamProviderTurn(
297
425
  content: textBuf.length > 0 ? textBuf : null,
298
426
  toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
299
427
  },
300
- finishReason,
428
+ finishReason: effectiveFinishReason,
301
429
  usage,
302
430
  }
303
431
  return { response, messageId }
@@ -354,17 +482,19 @@ export class IterationOrchestrator {
354
482
  const { model } = runConfig
355
483
  const tracer = getTracer()
356
484
 
485
+ // Worker-completion delivery used to fan out through a global
486
+ // onTaskCompleted listener that pushed handles onto
487
+ // `pendingNotifications`; the iteration loop then drained
488
+ // them as <task-notification> envelopes. Both `create_task`
489
+ // and the `Agent` tool are now blocking and return their
490
+ // worker output as the dispatching tool_use's canonical
491
+ // tool_result, so the listener path would only DUPLICATE
492
+ // every completion (once as tool_result, once as injected
493
+ // envelope user-message). Leaving the binding out closes
494
+ // the duplicate notification surface entirely; the dormant
495
+ // drain stays as a no-op until a follow-up tears it out.
357
496
  let unsubscribeTaskListener: (() => void) | undefined
358
- if (this.ctx.taskGateway) {
359
- unsubscribeTaskListener = this.ctx.taskGateway.onTaskCompleted((handle) => {
360
- this.ctx.pendingNotifications.push(handle)
361
- this.ctx.log.debug('Task completion queued for notification', {
362
- taskId: handle.taskId,
363
- agentId: handle.agentId,
364
- state: handle.state,
365
- })
366
- })
367
- }
497
+ void unsubscribeTaskListener
368
498
 
369
499
  try {
370
500
  const planSignal = yield* runPlanGate(this.ctx)
@@ -588,6 +718,43 @@ export class IterationOrchestrator {
588
718
  const hasContent =
589
719
  response.message.content !== null && response.message.content.length > 0
590
720
 
721
+ // Auto-continuation on `stop_reason: max_tokens`. The
722
+ // model hit its per-call output cap mid-text (NOT
723
+ // mid-tool-use — that path is handled separately
724
+ // below via `inputTruncated`). Push a synthetic
725
+ // "continue" user message and let the loop fire
726
+ // another turn. The provider receives the partial
727
+ // assistant content + the continue prompt and
728
+ // resumes from where it left off, mirroring the
729
+ // Claude.ai "Continue" affordance.
730
+ //
731
+ // Guards:
732
+ // - `hasContent` so we don't loop forever on an
733
+ // empty cutoff (Anthropic occasionally emits
734
+ // `stop_reason: max_tokens` with no content
735
+ // when an injected pre-fill blocks the model).
736
+ // - `!forceFinalize` so the forced-finalize path
737
+ // never auto-continues — that path is invoked
738
+ // specifically to extract a closing summary.
739
+ // - max_iterations bounds the loop in any case.
740
+ if (!forceFinalize && response.finishReason === 'length' && hasContent) {
741
+ this.ctx.log.info('LLM hit max_tokens mid-text — auto-continuing', {
742
+ runId: runMgr.id,
743
+ iteration: iterationNum,
744
+ completionTokens: response.usage.completionTokens,
745
+ })
746
+ runMgr.pushMessage(createUserMessage(AUTO_CONTINUATION_USER_MESSAGE))
747
+ await this.ctx.emitEvent({
748
+ type: 'iteration_completed',
749
+ runId: runMgr.id,
750
+ iteration: iterationNum,
751
+ hasToolCalls: false,
752
+ })
753
+ yield* this.ctx.drainPending()
754
+ iterSpan.end()
755
+ continue
756
+ }
757
+
591
758
  if (!hasContent && !forceFinalize) {
592
759
  this.ctx.log.warn('Empty completion detected — requesting final summary', {
593
760
  iteration: iterationNum,
@@ -686,47 +853,75 @@ export class IterationOrchestrator {
686
853
  await this.injectOneTaskNotification()
687
854
  }
688
855
 
856
+ /**
857
+ * Canonical async completion delivery (ses_009-task-notification-envelope).
858
+ *
859
+ * Drains every pending task completion in one pass and emits each as
860
+ * a plain USER text message wrapped in the `<task-notification>`
861
+ * envelope the supervisor prompt expects.
862
+ *
863
+ * Why not a `tool_result` block bound to the dispatching tool_use_id:
864
+ * `create_task` is documented as NON-BLOCKING and returns
865
+ * "Task launched: …" immediately. That immediate return is already
866
+ * recorded as the canonical tool_result for that tool_use, so a
867
+ * second tool_result for the SAME tool_use_id — emitted later, after
868
+ * intervening assistant turns — is rejected by Anthropic with
869
+ * `messages.<n>.content.0: unexpected tool_use_id found in
870
+ * tool_result blocks` because the immediately-prior assistant
871
+ * message no longer carries the matching tool_use. Wrapping as a
872
+ * user text envelope sidesteps the pairing rule entirely.
873
+ *
874
+ * Coalescing N drops into one drain replaces the previous
875
+ * one-at-a-time pattern which forced a separate orchestrator
876
+ * iteration per completed task on wide fan-outs.
877
+ */
689
878
  private async injectOneTaskNotification(): Promise<void> {
690
- const handle = this.ctx.pendingNotifications.shift()
691
- if (!handle) return
692
- const meta = this.ctx.launchedTasks.get(handle.taskId)
693
- const resultText =
694
- handle.result?.result ??
695
- handle.result?.lastError ??
696
- `Task finished with state: ${handle.state}`
697
-
698
- if (meta?.planTaskId && this.ctx.taskStore) {
699
- const success = handle.state === 'completed'
700
- await this.ctx.taskStore.update(meta.planTaskId as `task_${string}`, {
701
- status: 'completed',
702
- description: success ? undefined : `Failed: ${resultText.substring(0, 200)}`,
879
+ if (this.ctx.pendingNotifications.length === 0) return
880
+ const handles = this.ctx.pendingNotifications.splice(0)
881
+
882
+ for (const handle of handles) {
883
+ const meta = this.ctx.launchedTasks.get(handle.taskId)
884
+ const resultText =
885
+ handle.result?.result ??
886
+ handle.result?.lastError ??
887
+ `Task finished with state: ${handle.state}`
888
+
889
+ if (meta?.planTaskId && this.ctx.taskStore) {
890
+ const success = handle.state === 'completed'
891
+ await this.ctx.taskStore.update(meta.planTaskId as `task_${string}`, {
892
+ status: 'completed',
893
+ description: success ? undefined : `Failed: ${resultText.substring(0, 200)}`,
894
+ })
895
+ }
896
+
897
+ this.ctx.launchedTasks.delete(handle.taskId)
898
+
899
+ // `remaining-tasks` = inflight workers still pending after this
900
+ // one drains. `launchedTasks` is the single source of truth:
901
+ // it holds every dispatched worker that has NOT yet been
902
+ // drained + delete()'d. The drain batch entries are still
903
+ // inside launchedTasks until each iteration's delete() above
904
+ // removes them, so reading the size right after that delete
905
+ // gives the honest count. Adding `handles.length - 1 - i`
906
+ // here used to double-count this same queue.
907
+ const remainingTasks = this.ctx.launchedTasks.size
908
+ const envelope =
909
+ `<task-notification>\n<task-id>${xmlEscape(handle.taskId)}</task-id>\n` +
910
+ `<agent-id>${xmlEscape(handle.agentId)}</agent-id>\n` +
911
+ `<status>${xmlEscape(handle.state)}</status>\n` +
912
+ `<result>${cdataWrap(resultText)}</result>\n` +
913
+ `<remaining-tasks>${remainingTasks}</remaining-tasks>\n</task-notification>`
914
+
915
+ this.ctx.runMgr.pushMessage(createUserMessage(envelope))
916
+
917
+ this.ctx.log.info('Task notification injected', {
918
+ taskId: handle.taskId,
919
+ agentId: handle.agentId,
920
+ state: handle.state,
921
+ planTaskId: meta?.planTaskId,
922
+ remainingNotifications: remainingTasks,
703
923
  })
704
924
  }
705
-
706
- this.ctx.launchedTasks.delete(handle.taskId)
707
- const remainingTasks = this.ctx.launchedTasks.size
708
-
709
- const notification = [
710
- '<task-notification>',
711
- ` <task-id>${handle.taskId}</task-id>`,
712
- ` <agent-id>${handle.agentId}</agent-id>`,
713
- ` <status>${handle.state}</status>`,
714
- ` <description>${meta?.description ?? 'agent task'}</description>`,
715
- ` <result>${resultText}</result>`,
716
- ` <remaining-tasks>${remainingTasks}</remaining-tasks>`,
717
- '</task-notification>',
718
- ].join('\n')
719
-
720
- this.ctx.runMgr.pushMessage(createUserMessage(notification))
721
-
722
- this.ctx.log.info('Task notification injected', {
723
- taskId: handle.taskId,
724
- agentId: handle.agentId,
725
- state: handle.state,
726
- planTaskId: meta?.planTaskId,
727
- remainingTasks,
728
- remainingNotifications: this.ctx.pendingNotifications.length,
729
- })
730
925
  }
731
926
 
732
927
  private async requestFinalResponse(model: string, reason: StopReason): Promise<void> {
@@ -22,6 +22,16 @@ export interface LaunchedTaskMeta {
22
22
  readonly agentId: string
23
23
  readonly description: string
24
24
  readonly planTaskId?: string
25
+ /**
26
+ * The `tool_use_id` of the assistant `create_task` block that
27
+ * spawned this background task. Required to emit the canonical
28
+ * `tool_result` content block when the task completes — without
29
+ * it we'd fall back to the legacy synthetic-user-message inject
30
+ * (see ses_009-task-notification-envelope). Optional because
31
+ * older call paths that don't thread `ToolContext.toolUseId`
32
+ * still publish the meta without it.
33
+ */
34
+ readonly originalToolUseId?: string
25
35
  }
26
36
 
27
37
  export interface IterationContext {
@@ -1,5 +1,5 @@
1
1
  import { FILESYSTEM_TOOLS } from '../../constants/tools/index.js'
2
- import { assembleSystemPrompt } from '../../persona/assembler.js'
2
+ import { assembleSystemPrompt, renderSkillsSection } from '../../persona/assembler.js'
3
3
  import type { AgentRuntimeContext } from '../../types/agent/base.js'
4
4
  import type { AgentContextLevel } from '../../types/agent/factory.js'
5
5
  import type { AgentPersona } from '../../types/persona/index.js'
@@ -83,6 +83,14 @@ export class PromptBuilder {
83
83
  parts.push(this.config.systemPrompt)
84
84
  } else if (this.config.persona) {
85
85
  parts.push(assembleSystemPrompt(this.config.persona, this.config.skills))
86
+ } else {
87
+ const skillSection = renderSkillsSection(this.config.skills)
88
+ if (skillSection) parts.push(skillSection)
89
+ }
90
+
91
+ if (this.config.systemPrompt) {
92
+ const skillSection = renderSkillsSection(this.config.skills)
93
+ if (skillSection) parts.push(skillSection)
86
94
  }
87
95
 
88
96
  if (contextLevel !== 'minimal') {
@@ -133,6 +141,14 @@ export class PromptBuilder {
133
141
  if (this.config.persona.sessionContext) {
134
142
  dynamicParts.push(`## Session Context\n${this.config.persona.sessionContext.trim()}`)
135
143
  }
144
+ } else {
145
+ const skillSection = renderSkillsSection(this.config.skills)
146
+ if (skillSection) staticParts.push(skillSection)
147
+ }
148
+
149
+ if (this.config.systemPrompt) {
150
+ const skillSection = renderSkillsSection(this.config.skills)
151
+ if (skillSection) staticParts.push(skillSection)
136
152
  }
137
153
 
138
154
  if (contextLevel !== 'minimal') {
@@ -17,6 +17,7 @@ export interface ToolingBootstrapConfig {
17
17
  permissionMode: PermissionMode
18
18
  env: Record<string, string>
19
19
  abortSignal: AbortSignal
20
+ allowedTools?: readonly string[]
20
21
  invocationState?: InvocationState
21
22
  pluginManager?: PluginLifecycleManager
22
23
  }
@@ -36,6 +37,7 @@ export class ToolingBootstrap {
36
37
  permissionMode: config.permissionMode,
37
38
  env: config.env,
38
39
  abortSignal: config.abortSignal,
40
+ allowedTools: config.allowedTools,
39
41
  invocationState: config.invocationState,
40
42
  pluginManager: config.pluginManager,
41
43
  },
@@ -4,8 +4,10 @@ import {
4
4
  readFile as fsReadFile,
5
5
  writeFile as fsWriteFile,
6
6
  mkdir,
7
+ readdir,
7
8
  rename,
8
9
  rm,
10
+ stat,
9
11
  } from 'node:fs/promises'
10
12
  import { tmpdir } from 'node:os'
11
13
  import { dirname, isAbsolute, join, relative, resolve } from 'node:path'
@@ -24,6 +26,7 @@ import type {
24
26
  SandboxEnvironment,
25
27
  SandboxExecOptions,
26
28
  SandboxExecResult,
29
+ SandboxFileEntry,
27
30
  SandboxProvider,
28
31
  SandboxStatus,
29
32
  } from '../../types/sandbox/index.js'
@@ -302,6 +305,36 @@ class LocalSandbox implements Sandbox {
302
305
  return fsReadFile(resolved)
303
306
  }
304
307
 
308
+ async listFiles(rootPath: string): Promise<readonly SandboxFileEntry[]> {
309
+ if (this._status === 'destroyed') {
310
+ throw new Error(`Sandbox ${this.id} is destroyed`)
311
+ }
312
+
313
+ const resolved = assertInsideSandbox(this.rootDir, rootPath)
314
+ const root = await stat(resolved).catch(() => null)
315
+ if (!root || !root.isDirectory()) return []
316
+
317
+ const entries: SandboxFileEntry[] = []
318
+ const stack: string[] = [resolved]
319
+ while (stack.length > 0) {
320
+ const dir = stack.pop()
321
+ if (!dir) break
322
+ const dirents = await readdir(dir, { withFileTypes: true }).catch(() => [])
323
+ for (const ent of dirents) {
324
+ const full = join(dir, ent.name)
325
+ if (ent.isDirectory()) {
326
+ stack.push(full)
327
+ continue
328
+ }
329
+ if (!ent.isFile()) continue
330
+ const info = await stat(full).catch(() => null)
331
+ if (!info) continue
332
+ entries.push({ path: full, size: info.size })
333
+ }
334
+ }
335
+ return entries
336
+ }
337
+
305
338
  async destroy(): Promise<void> {
306
339
  if (this._status === 'destroyed') {
307
340
  return