@namzu/sdk 0.6.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. package/CHANGELOG.md +362 -0
  2. package/dist/advisory/executor.d.ts.map +1 -1
  3. package/dist/advisory/executor.js +9 -2
  4. package/dist/advisory/executor.js.map +1 -1
  5. package/dist/advisory/executor.test.d.ts +2 -1
  6. package/dist/advisory/executor.test.d.ts.map +1 -1
  7. package/dist/advisory/executor.test.js +7 -4
  8. package/dist/advisory/executor.test.js.map +1 -1
  9. package/dist/agents/ReactiveAgent.d.ts.map +1 -1
  10. package/dist/agents/ReactiveAgent.js +2 -0
  11. package/dist/agents/ReactiveAgent.js.map +1 -1
  12. package/dist/agents/SupervisorAgent.d.ts.map +1 -1
  13. package/dist/agents/SupervisorAgent.js +7 -0
  14. package/dist/agents/SupervisorAgent.js.map +1 -1
  15. package/dist/bridge/sse/mapper.test.js +2 -2
  16. package/dist/constants/compaction/index.d.ts.map +1 -1
  17. package/dist/constants/compaction/index.js +8 -3
  18. package/dist/constants/compaction/index.js.map +1 -1
  19. package/dist/constants/sandbox/index.d.ts +21 -0
  20. package/dist/constants/sandbox/index.d.ts.map +1 -1
  21. package/dist/constants/sandbox/index.js +30 -0
  22. package/dist/constants/sandbox/index.js.map +1 -1
  23. package/dist/constants/tools/index.d.ts.map +1 -1
  24. package/dist/constants/tools/index.js +33 -2
  25. package/dist/constants/tools/index.js.map +1 -1
  26. package/dist/manager/run/persistence.d.ts.map +1 -1
  27. package/dist/manager/run/persistence.js +35 -5
  28. package/dist/manager/run/persistence.js.map +1 -1
  29. package/dist/persona/assembler.d.ts +1 -0
  30. package/dist/persona/assembler.d.ts.map +1 -1
  31. package/dist/persona/assembler.js +28 -6
  32. package/dist/persona/assembler.js.map +1 -1
  33. package/dist/provider/collect.test.js +2 -2
  34. package/dist/public-runtime.d.ts +5 -4
  35. package/dist/public-runtime.d.ts.map +1 -1
  36. package/dist/public-runtime.js +5 -4
  37. package/dist/public-runtime.js.map +1 -1
  38. package/dist/public-tools.d.ts +2 -0
  39. package/dist/public-tools.d.ts.map +1 -1
  40. package/dist/public-tools.js +2 -0
  41. package/dist/public-tools.js.map +1 -1
  42. package/dist/public-types.d.ts +3 -0
  43. package/dist/public-types.d.ts.map +1 -1
  44. package/dist/registry/index.d.ts +2 -0
  45. package/dist/registry/index.d.ts.map +1 -1
  46. package/dist/registry/index.js +1 -0
  47. package/dist/registry/index.js.map +1 -1
  48. package/dist/registry/tool/execute.d.ts.map +1 -1
  49. package/dist/registry/tool/execute.js +87 -5
  50. package/dist/registry/tool/execute.js.map +1 -1
  51. package/dist/registry/tool/execute.test.d.ts +4 -2
  52. package/dist/registry/tool/execute.test.d.ts.map +1 -1
  53. package/dist/registry/tool/execute.test.js +112 -3
  54. package/dist/registry/tool/execute.test.js.map +1 -1
  55. package/dist/registry/toolset/catalog.d.ts +42 -0
  56. package/dist/registry/toolset/catalog.d.ts.map +1 -0
  57. package/dist/registry/toolset/catalog.js +217 -0
  58. package/dist/registry/toolset/catalog.js.map +1 -0
  59. package/dist/registry/toolset/catalog.test.d.ts +2 -0
  60. package/dist/registry/toolset/catalog.test.d.ts.map +1 -0
  61. package/dist/registry/toolset/catalog.test.js +85 -0
  62. package/dist/registry/toolset/catalog.test.js.map +1 -0
  63. package/dist/runtime/query/__tests__/deferred-tools.test.d.ts +2 -0
  64. package/dist/runtime/query/__tests__/deferred-tools.test.d.ts.map +1 -0
  65. package/dist/runtime/query/__tests__/deferred-tools.test.js +147 -0
  66. package/dist/runtime/query/__tests__/deferred-tools.test.js.map +1 -0
  67. package/dist/runtime/query/__tests__/executor-concurrency.test.d.ts +2 -0
  68. package/dist/runtime/query/__tests__/executor-concurrency.test.d.ts.map +1 -0
  69. package/dist/runtime/query/__tests__/executor-concurrency.test.js +98 -0
  70. package/dist/runtime/query/__tests__/executor-concurrency.test.js.map +1 -0
  71. package/dist/runtime/query/__tests__/executor-plugin-hooks.test.js +38 -3
  72. package/dist/runtime/query/__tests__/executor-plugin-hooks.test.js.map +1 -1
  73. package/dist/runtime/query/__tests__/prompt.test.js +47 -2
  74. package/dist/runtime/query/__tests__/prompt.test.js.map +1 -1
  75. package/dist/runtime/query/__tests__/stream-recovery.test.d.ts +2 -0
  76. package/dist/runtime/query/__tests__/stream-recovery.test.d.ts.map +1 -0
  77. package/dist/runtime/query/__tests__/stream-recovery.test.js +126 -0
  78. package/dist/runtime/query/__tests__/stream-recovery.test.js.map +1 -0
  79. package/dist/runtime/query/continuation.d.ts +16 -0
  80. package/dist/runtime/query/continuation.d.ts.map +1 -0
  81. package/dist/runtime/query/continuation.js +16 -0
  82. package/dist/runtime/query/continuation.js.map +1 -0
  83. package/dist/runtime/query/executor.d.ts +3 -0
  84. package/dist/runtime/query/executor.d.ts.map +1 -1
  85. package/dist/runtime/query/executor.js +71 -3
  86. package/dist/runtime/query/executor.js.map +1 -1
  87. package/dist/runtime/query/index.d.ts.map +1 -1
  88. package/dist/runtime/query/index.js +19 -3
  89. package/dist/runtime/query/index.js.map +1 -1
  90. package/dist/runtime/query/iteration/index.d.ts +22 -0
  91. package/dist/runtime/query/iteration/index.d.ts.map +1 -1
  92. package/dist/runtime/query/iteration/index.js +227 -60
  93. package/dist/runtime/query/iteration/index.js.map +1 -1
  94. package/dist/runtime/query/iteration/phases/context.d.ts +10 -0
  95. package/dist/runtime/query/iteration/phases/context.d.ts.map +1 -1
  96. package/dist/runtime/query/iteration/phases/context.js.map +1 -1
  97. package/dist/runtime/query/prompt.d.ts.map +1 -1
  98. package/dist/runtime/query/prompt.js +21 -1
  99. package/dist/runtime/query/prompt.js.map +1 -1
  100. package/dist/runtime/query/tooling.d.ts +1 -0
  101. package/dist/runtime/query/tooling.d.ts.map +1 -1
  102. package/dist/runtime/query/tooling.js +1 -0
  103. package/dist/runtime/query/tooling.js.map +1 -1
  104. package/dist/sandbox/provider/local.d.ts.map +1 -1
  105. package/dist/sandbox/provider/local.js +32 -1
  106. package/dist/sandbox/provider/local.js.map +1 -1
  107. package/dist/session/workspace/__tests__/shared-run.test.d.ts +2 -0
  108. package/dist/session/workspace/__tests__/shared-run.test.d.ts.map +1 -0
  109. package/dist/session/workspace/__tests__/shared-run.test.js +147 -0
  110. package/dist/session/workspace/__tests__/shared-run.test.js.map +1 -0
  111. package/dist/session/workspace/index.d.ts +2 -0
  112. package/dist/session/workspace/index.d.ts.map +1 -1
  113. package/dist/session/workspace/index.js +1 -0
  114. package/dist/session/workspace/index.js.map +1 -1
  115. package/dist/session/workspace/shared-run.d.ts +81 -0
  116. package/dist/session/workspace/shared-run.d.ts.map +1 -0
  117. package/dist/session/workspace/shared-run.js +251 -0
  118. package/dist/session/workspace/shared-run.js.map +1 -0
  119. package/dist/skills/loader.d.ts.map +1 -1
  120. package/dist/skills/loader.js +36 -6
  121. package/dist/skills/loader.js.map +1 -1
  122. package/dist/skills/loader.test.d.ts +2 -0
  123. package/dist/skills/loader.test.d.ts.map +1 -0
  124. package/dist/skills/loader.test.js +65 -0
  125. package/dist/skills/loader.test.js.map +1 -0
  126. package/dist/streaming/coalesce.test.js +1 -1
  127. package/dist/tools/builtins/__tests__/edit.test.d.ts +2 -0
  128. package/dist/tools/builtins/__tests__/edit.test.d.ts.map +1 -0
  129. package/dist/tools/builtins/__tests__/edit.test.js +38 -0
  130. package/dist/tools/builtins/__tests__/edit.test.js.map +1 -0
  131. package/dist/tools/builtins/__tests__/payload-budget.test.d.ts +2 -0
  132. package/dist/tools/builtins/__tests__/payload-budget.test.d.ts.map +1 -0
  133. package/dist/tools/builtins/__tests__/payload-budget.test.js +22 -0
  134. package/dist/tools/builtins/__tests__/payload-budget.test.js.map +1 -0
  135. package/dist/tools/builtins/__tests__/read-file.test.d.ts +2 -0
  136. package/dist/tools/builtins/__tests__/read-file.test.d.ts.map +1 -0
  137. package/dist/tools/builtins/__tests__/read-file.test.js +24 -0
  138. package/dist/tools/builtins/__tests__/read-file.test.js.map +1 -0
  139. package/dist/tools/builtins/__tests__/verify-outputs.test.d.ts +2 -0
  140. package/dist/tools/builtins/__tests__/verify-outputs.test.d.ts.map +1 -0
  141. package/dist/tools/builtins/__tests__/verify-outputs.test.js +52 -0
  142. package/dist/tools/builtins/__tests__/verify-outputs.test.js.map +1 -0
  143. package/dist/tools/builtins/__tests__/write-file.test.d.ts +2 -0
  144. package/dist/tools/builtins/__tests__/write-file.test.d.ts.map +1 -0
  145. package/dist/tools/builtins/__tests__/write-file.test.js +74 -0
  146. package/dist/tools/builtins/__tests__/write-file.test.js.map +1 -0
  147. package/dist/tools/builtins/bash.d.ts.map +1 -1
  148. package/dist/tools/builtins/bash.js +40 -7
  149. package/dist/tools/builtins/bash.js.map +1 -1
  150. package/dist/tools/builtins/edit.d.ts +5 -2
  151. package/dist/tools/builtins/edit.d.ts.map +1 -1
  152. package/dist/tools/builtins/edit.js +114 -18
  153. package/dist/tools/builtins/edit.js.map +1 -1
  154. package/dist/tools/builtins/index.d.ts +1 -0
  155. package/dist/tools/builtins/index.d.ts.map +1 -1
  156. package/dist/tools/builtins/index.js +13 -13
  157. package/dist/tools/builtins/index.js.map +1 -1
  158. package/dist/tools/builtins/read-file.d.ts +1 -0
  159. package/dist/tools/builtins/read-file.d.ts.map +1 -1
  160. package/dist/tools/builtins/read-file.js +23 -8
  161. package/dist/tools/builtins/read-file.js.map +1 -1
  162. package/dist/tools/builtins/search-tools.d.ts.map +1 -1
  163. package/dist/tools/builtins/search-tools.js +4 -1
  164. package/dist/tools/builtins/search-tools.js.map +1 -1
  165. package/dist/tools/builtins/verify-outputs.d.ts +5 -0
  166. package/dist/tools/builtins/verify-outputs.d.ts.map +1 -0
  167. package/dist/tools/builtins/verify-outputs.js +103 -0
  168. package/dist/tools/builtins/verify-outputs.js.map +1 -0
  169. package/dist/tools/builtins/write-file.d.ts +3 -2
  170. package/dist/tools/builtins/write-file.d.ts.map +1 -1
  171. package/dist/tools/builtins/write-file.js +72 -12
  172. package/dist/tools/builtins/write-file.js.map +1 -1
  173. package/dist/tools/coordinator/__tests__/agent.test.d.ts +15 -0
  174. package/dist/tools/coordinator/__tests__/agent.test.d.ts.map +1 -0
  175. package/dist/tools/coordinator/__tests__/agent.test.js +142 -0
  176. package/dist/tools/coordinator/__tests__/agent.test.js.map +1 -0
  177. package/dist/tools/coordinator/__tests__/task-list.test.d.ts +13 -0
  178. package/dist/tools/coordinator/__tests__/task-list.test.d.ts.map +1 -0
  179. package/dist/tools/coordinator/__tests__/task-list.test.js +162 -0
  180. package/dist/tools/coordinator/__tests__/task-list.test.js.map +1 -0
  181. package/dist/tools/coordinator/agent.d.ts +34 -0
  182. package/dist/tools/coordinator/agent.d.ts.map +1 -0
  183. package/dist/tools/coordinator/agent.js +107 -0
  184. package/dist/tools/coordinator/agent.js.map +1 -0
  185. package/dist/tools/coordinator/index.d.ts +7 -0
  186. package/dist/tools/coordinator/index.d.ts.map +1 -1
  187. package/dist/tools/coordinator/index.js +111 -21
  188. package/dist/tools/coordinator/index.js.map +1 -1
  189. package/dist/types/agent/base.d.ts +8 -0
  190. package/dist/types/agent/base.d.ts.map +1 -1
  191. package/dist/types/agent/reactive.d.ts +23 -0
  192. package/dist/types/agent/reactive.d.ts.map +1 -1
  193. package/dist/types/agent/supervisor.d.ts +14 -0
  194. package/dist/types/agent/supervisor.d.ts.map +1 -1
  195. package/dist/types/message/index.d.ts +22 -1
  196. package/dist/types/message/index.d.ts.map +1 -1
  197. package/dist/types/message/index.js +7 -2
  198. package/dist/types/message/index.js.map +1 -1
  199. package/dist/types/provider/chat.d.ts +2 -9
  200. package/dist/types/provider/chat.d.ts.map +1 -1
  201. package/dist/types/run/events.d.ts +6 -0
  202. package/dist/types/run/events.d.ts.map +1 -1
  203. package/dist/types/run/events.js.map +1 -1
  204. package/dist/types/sandbox/index.d.ts +193 -0
  205. package/dist/types/sandbox/index.d.ts.map +1 -1
  206. package/dist/types/sandbox/index.js.map +1 -1
  207. package/dist/types/skills/index.d.ts +2 -0
  208. package/dist/types/skills/index.d.ts.map +1 -1
  209. package/dist/types/tool/index.d.ts +22 -0
  210. package/dist/types/tool/index.d.ts.map +1 -1
  211. package/dist/types/toolset/index.d.ts +71 -0
  212. package/dist/types/toolset/index.d.ts.map +1 -0
  213. package/dist/types/toolset/index.js +2 -0
  214. package/dist/types/toolset/index.js.map +1 -0
  215. package/dist/types/workspace/index.d.ts +1 -0
  216. package/dist/types/workspace/index.d.ts.map +1 -1
  217. package/dist/types/workspace/shared-run.d.ts +61 -0
  218. package/dist/types/workspace/shared-run.d.ts.map +1 -0
  219. package/dist/types/workspace/shared-run.js +2 -0
  220. package/dist/types/workspace/shared-run.js.map +1 -0
  221. package/dist/verification/index.d.ts +1 -0
  222. package/dist/verification/index.d.ts.map +1 -1
  223. package/dist/verification/index.js +1 -0
  224. package/dist/verification/index.js.map +1 -1
  225. package/dist/verification/presets.d.ts +53 -0
  226. package/dist/verification/presets.d.ts.map +1 -0
  227. package/dist/verification/presets.js +70 -0
  228. package/dist/verification/presets.js.map +1 -0
  229. package/dist/verification/presets.test.d.ts +16 -0
  230. package/dist/verification/presets.test.d.ts.map +1 -0
  231. package/dist/verification/presets.test.js +79 -0
  232. package/dist/verification/presets.test.js.map +1 -0
  233. package/package.json +3 -2
  234. package/src/advisory/executor.test.ts +7 -4
  235. package/src/advisory/executor.ts +11 -2
  236. package/src/agents/ReactiveAgent.ts +2 -0
  237. package/src/agents/SupervisorAgent.ts +7 -0
  238. package/src/bridge/sse/mapper.test.ts +2 -2
  239. package/src/constants/compaction/index.ts +8 -3
  240. package/src/constants/sandbox/index.ts +37 -0
  241. package/src/constants/tools/index.ts +33 -2
  242. package/src/manager/run/persistence.ts +34 -6
  243. package/src/persona/assembler.ts +31 -8
  244. package/src/provider/collect.test.ts +2 -2
  245. package/src/public-runtime.ts +14 -1
  246. package/src/public-tools.ts +2 -0
  247. package/src/public-types.ts +7 -0
  248. package/src/registry/index.ts +7 -0
  249. package/src/registry/tool/execute.test.ts +132 -3
  250. package/src/registry/tool/execute.ts +94 -9
  251. package/src/registry/toolset/catalog.test.ts +97 -0
  252. package/src/registry/toolset/catalog.ts +283 -0
  253. package/src/runtime/query/__tests__/deferred-tools.test.ts +183 -0
  254. package/src/runtime/query/__tests__/executor-concurrency.test.ts +122 -0
  255. package/src/runtime/query/__tests__/executor-plugin-hooks.test.ts +48 -3
  256. package/src/runtime/query/__tests__/prompt.test.ts +51 -2
  257. package/src/runtime/query/__tests__/stream-recovery.test.ts +156 -0
  258. package/src/runtime/query/continuation.ts +16 -0
  259. package/src/runtime/query/executor.ts +82 -13
  260. package/src/runtime/query/index.ts +24 -3
  261. package/src/runtime/query/iteration/index.ts +263 -68
  262. package/src/runtime/query/iteration/phases/context.ts +10 -0
  263. package/src/runtime/query/prompt.ts +17 -1
  264. package/src/runtime/query/tooling.ts +2 -0
  265. package/src/sandbox/provider/local.ts +33 -0
  266. package/src/session/workspace/__tests__/shared-run.test.ts +181 -0
  267. package/src/session/workspace/index.ts +6 -0
  268. package/src/session/workspace/shared-run.ts +316 -0
  269. package/src/skills/loader.test.ts +89 -0
  270. package/src/skills/loader.ts +37 -6
  271. package/src/streaming/coalesce.test.ts +1 -1
  272. package/src/tools/builtins/__tests__/edit.test.ts +57 -0
  273. package/src/tools/builtins/__tests__/payload-budget.test.ts +29 -0
  274. package/src/tools/builtins/__tests__/read-file.test.ts +31 -0
  275. package/src/tools/builtins/__tests__/verify-outputs.test.ts +71 -0
  276. package/src/tools/builtins/__tests__/write-file.test.ts +97 -0
  277. package/src/tools/builtins/bash.ts +48 -7
  278. package/src/tools/builtins/edit.ts +162 -27
  279. package/src/tools/builtins/index.ts +13 -13
  280. package/src/tools/builtins/read-file.ts +31 -8
  281. package/src/tools/builtins/search-tools.ts +5 -1
  282. package/src/tools/builtins/verify-outputs.ts +126 -0
  283. package/src/tools/builtins/write-file.ts +83 -14
  284. package/src/tools/coordinator/__tests__/agent.test.ts +172 -0
  285. package/src/tools/coordinator/__tests__/task-list.test.ts +182 -0
  286. package/src/tools/coordinator/agent.ts +157 -0
  287. package/src/tools/coordinator/index.ts +128 -22
  288. package/src/types/agent/base.ts +8 -0
  289. package/src/types/agent/reactive.ts +25 -0
  290. package/src/types/agent/supervisor.ts +16 -0
  291. package/src/types/message/index.ts +32 -2
  292. package/src/types/provider/chat.ts +2 -9
  293. package/src/types/run/events.ts +6 -0
  294. package/src/types/sandbox/index.ts +219 -0
  295. package/src/types/skills/index.ts +4 -0
  296. package/src/types/tool/index.ts +24 -0
  297. package/src/types/toolset/index.ts +86 -0
  298. package/src/types/workspace/index.ts +9 -0
  299. package/src/types/workspace/shared-run.ts +65 -0
  300. package/src/verification/index.ts +1 -0
  301. package/src/verification/presets.test.ts +112 -0
  302. package/src/verification/presets.ts +72 -0
@@ -18,7 +18,10 @@ function makeLogger(): Logger {
18
18
  error: vi.fn(),
19
19
  debug: vi.fn(),
20
20
  }
21
- return { ...stub, child: vi.fn(() => ({ ...stub, child: vi.fn() })) } as unknown as Logger
21
+ return {
22
+ ...stub,
23
+ child: vi.fn(() => ({ ...stub, child: vi.fn() })),
24
+ } as unknown as Logger
22
25
  }
23
26
 
24
27
  function makeToolRegistry(execute: ToolRegistryContract['execute']): ToolRegistryContract {
@@ -93,6 +96,42 @@ describe('ToolExecutor plugin hooks', () => {
93
96
  expect(batch.results[0]?.output).toBe('ok')
94
97
  })
95
98
 
99
+ it('preserves tool stdout/stderr when a tool exits unsuccessfully', async () => {
100
+ const tools = makeToolRegistry(
101
+ vi.fn(async () => ({
102
+ success: false,
103
+ output: 'STDOUT:\npartial result\n\nSTDERR:\nboom',
104
+ error: 'Command exited with code 1',
105
+ })),
106
+ )
107
+ const exec = new ToolExecutor(
108
+ {
109
+ tools,
110
+ runId: mockRunId,
111
+ workingDirectory: '/tmp',
112
+ permissionMode: 'auto',
113
+ env: {},
114
+ abortSignal: new AbortController().signal,
115
+ },
116
+ activityStore,
117
+ emitEvent,
118
+ makeLogger(),
119
+ )
120
+
121
+ const batch = await exec.executeBatch(buildResponse('bash', { command: 'false' }))
122
+ expect(batch.results[0]?.output).toContain('STDOUT:\npartial result')
123
+ expect(batch.results[0]?.output).toContain('STDERR:\nboom')
124
+ expect(batch.results[0]?.output).toContain('Error: Command exited with code 1')
125
+
126
+ const completed = emitted.find((e) => e.type === 'tool_completed')
127
+ expect(completed).toMatchObject({
128
+ type: 'tool_completed',
129
+ toolName: 'bash',
130
+ result: expect.stringContaining('STDOUT:\npartial result'),
131
+ isError: true,
132
+ })
133
+ })
134
+
96
135
  it('replaces input on pre_tool_use modify', async () => {
97
136
  const executeMock = vi.fn(async () => ({ success: true, output: 'ok' }))
98
137
  const tools = makeToolRegistry(executeMock)
@@ -120,7 +159,10 @@ describe('ToolExecutor plugin hooks', () => {
120
159
  })
121
160
 
122
161
  it('skips registry execution and synthesizes output on pre_tool_use skip', async () => {
123
- const executeMock = vi.fn(async () => ({ success: true, output: 'should-not-run' }))
162
+ const executeMock = vi.fn(async () => ({
163
+ success: true,
164
+ output: 'should-not-run',
165
+ }))
124
166
  const tools = makeToolRegistry(executeMock)
125
167
  const pluginManager = makePluginManager(async (event) =>
126
168
  event === 'pre_tool_use'
@@ -223,7 +265,10 @@ describe('ToolExecutor plugin hooks', () => {
223
265
  })
224
266
 
225
267
  it('carries modified input into synthetic skip outcome (modify -> skip chain)', async () => {
226
- const executeMock = vi.fn(async () => ({ success: true, output: 'should-not-run' }))
268
+ const executeMock = vi.fn(async () => ({
269
+ success: true,
270
+ output: 'should-not-run',
271
+ }))
227
272
  const tools = makeToolRegistry(executeMock)
228
273
  const { PluginLifecycleManager } = await import('../../../plugin/lifecycle.js')
229
274
  const realManager = new PluginLifecycleManager({
@@ -25,7 +25,7 @@ describe('PromptBuilder runtime context', () => {
25
25
  label: 'test runtime',
26
26
  outputDirectory: 'outputs/',
27
27
  outputFileMarker: 'OUTPUT_FILE: <filename> - <description>',
28
- notes: ['Mirror generated files after the turn.'],
28
+ notes: ['Register generated files after the turn.'],
29
29
  },
30
30
  }).build('full', '/tmp/work')
31
31
 
@@ -33,6 +33,55 @@ describe('PromptBuilder runtime context', () => {
33
33
  expect(prompt).toContain('Working directory: /tmp/work')
34
34
  expect(prompt).toContain('Output directory: outputs/')
35
35
  expect(prompt).toContain('OUTPUT_FILE: <filename> - <description>')
36
- expect(prompt).toContain('Mirror generated files after the turn.')
36
+ expect(prompt).toContain('Register generated files after the turn.')
37
+ })
38
+
39
+ it('discloses available skills even when the host supplies a systemPrompt', () => {
40
+ const prompt = new PromptBuilder({
41
+ systemPrompt: 'You are a project assistant.',
42
+ tools: makeToolRegistry(),
43
+ skills: [
44
+ {
45
+ metadata: {
46
+ name: 'project-documents',
47
+ description: 'Draft and edit project documents from grounded inputs.',
48
+ },
49
+ dirPath: '/repo/.agents/skills/project-documents',
50
+ },
51
+ ],
52
+ }).build('full', '/tmp/work')
53
+
54
+ expect(prompt).toContain('You are a project assistant.')
55
+ expect(prompt).toContain('## Available Skills')
56
+ expect(prompt).toContain('project-documents')
57
+ expect(prompt).toContain('Draft and edit project documents')
58
+ expect(prompt).not.toContain('## Loaded Skills')
59
+ })
60
+
61
+ it('includes loaded skill bodies with systemPrompt while preserving the metadata catalogue', () => {
62
+ const prompt = new PromptBuilder({
63
+ systemPrompt: 'You are a cowork supervisor.',
64
+ tools: makeToolRegistry(),
65
+ skills: [
66
+ {
67
+ metadata: {
68
+ name: 'long-form-files',
69
+ description: 'Create long files with bounded edit chunks.',
70
+ license: 'MIT',
71
+ compatibility: 'Requires file tools',
72
+ allowedTools: 'read write edit',
73
+ },
74
+ body: 'Use skeleton-first writes and bounded edit chunks.',
75
+ dirPath: '/repo/.agents/skills/long-form-files',
76
+ },
77
+ ],
78
+ }).build('full', '/tmp/work')
79
+
80
+ expect(prompt).toContain('## Available Skills')
81
+ expect(prompt).toContain('license: MIT')
82
+ expect(prompt).toContain('compatibility: Requires file tools')
83
+ expect(prompt).toContain('allowed-tools: read write edit')
84
+ expect(prompt).toContain('## Loaded Skills')
85
+ expect(prompt).toContain('Use skeleton-first writes')
37
86
  })
38
87
  })
@@ -0,0 +1,156 @@
1
+ import { mkdtemp, rm } from 'node:fs/promises'
2
+ import { tmpdir } from 'node:os'
3
+ import { join } from 'node:path'
4
+ import { afterEach, describe, expect, it, vi } from 'vitest'
5
+ import { z } from 'zod'
6
+
7
+ import { ToolRegistry } from '../../../registry/tool/execute.js'
8
+ import type { SessionId, TenantId } from '../../../types/ids/index.js'
9
+ import { createUserMessage } from '../../../types/message/index.js'
10
+ import type { LLMProvider, StreamChunk } from '../../../types/provider/index.js'
11
+ import type { RunEvent } from '../../../types/run/index.js'
12
+ import type { ProjectId, ThreadId } from '../../../types/session/ids.js'
13
+ import { drainQuery } from '../index.js'
14
+
15
+ const ZERO_USAGE = {
16
+ promptTokens: 0,
17
+ completionTokens: 0,
18
+ totalTokens: 0,
19
+ cachedTokens: 0,
20
+ cacheWriteTokens: 0,
21
+ }
22
+
23
+ class IdleDuringToolInputProvider implements LLMProvider {
24
+ readonly id = 'idle-during-tool-input'
25
+ readonly name = 'Idle During Tool Input Provider'
26
+ calls = 0
27
+
28
+ async *chatStream(): AsyncIterable<StreamChunk> {
29
+ this.calls += 1
30
+
31
+ if (this.calls === 1) {
32
+ yield {
33
+ id: 'msg_1',
34
+ delta: {
35
+ toolCalls: [
36
+ {
37
+ index: 0,
38
+ id: 'toolu_write_1',
39
+ type: 'function',
40
+ function: { name: 'write_file' },
41
+ },
42
+ ],
43
+ },
44
+ }
45
+ yield {
46
+ id: 'msg_1',
47
+ delta: {
48
+ toolCalls: [
49
+ {
50
+ index: 0,
51
+ id: 'toolu_write_1',
52
+ function: {
53
+ arguments: '{"path":"/tmp/out.md","content":"partial',
54
+ },
55
+ },
56
+ ],
57
+ },
58
+ }
59
+ throw new Error('Anthropic stream idle for 90s')
60
+ }
61
+
62
+ yield {
63
+ id: 'msg_2',
64
+ delta: { content: 'Recovered after retry guidance.' },
65
+ }
66
+ yield {
67
+ id: 'msg_2',
68
+ delta: {},
69
+ finishReason: 'stop',
70
+ usage: ZERO_USAGE,
71
+ }
72
+ }
73
+ }
74
+
75
+ describe('query stream recovery', () => {
76
+ let workdirs: string[] = []
77
+
78
+ afterEach(async () => {
79
+ await Promise.all(workdirs.map((dir) => rm(dir, { recursive: true, force: true })))
80
+ workdirs = []
81
+ })
82
+
83
+ it('turns an idle stream with partial tool JSON into retryable tool feedback', async () => {
84
+ const provider = new IdleDuringToolInputProvider()
85
+ const actualWrite = vi.fn(async () => ({ success: true, output: 'should not run' }))
86
+ const tools = new ToolRegistry()
87
+ tools.register({
88
+ name: 'write_file',
89
+ description: 'write a file',
90
+ inputSchema: z.object({
91
+ path: z.string(),
92
+ content: z.string(),
93
+ }),
94
+ execute: actualWrite,
95
+ })
96
+ const workingDirectory = await mkdtemp(join(tmpdir(), 'namzu-stream-recovery-'))
97
+ workdirs.push(workingDirectory)
98
+ const events: RunEvent[] = []
99
+
100
+ const run = await drainQuery(
101
+ {
102
+ provider,
103
+ tools,
104
+ runConfig: {
105
+ model: 'mock-model',
106
+ timeoutMs: 5_000,
107
+ tokenBudget: 100_000,
108
+ maxIterations: 3,
109
+ maxResponseTokens: 256,
110
+ },
111
+ agentId: 'agent_test',
112
+ agentName: 'Test Agent',
113
+ messages: [createUserMessage('write the file')],
114
+ workingDirectory,
115
+ sessionId: 'ses_stream_recovery' as SessionId,
116
+ threadId: 'thd_stream_recovery' as ThreadId,
117
+ projectId: 'prj_stream_recovery' as ProjectId,
118
+ tenantId: 'tnt_stream_recovery' as TenantId,
119
+ },
120
+ (event) => {
121
+ events.push(event)
122
+ },
123
+ )
124
+
125
+ expect(run.status).toBe('completed')
126
+ expect(run.result).toBe('Recovered after retry guidance.')
127
+ expect(provider.calls).toBe(2)
128
+ expect(actualWrite).not.toHaveBeenCalled()
129
+
130
+ expect(events.some((event) => event.type === 'run_failed')).toBe(false)
131
+ expect(
132
+ events.some(
133
+ (event) =>
134
+ event.type === 'tool_input_completed' &&
135
+ event.inputTruncated === true &&
136
+ JSON.stringify(event.input) === '{}',
137
+ ),
138
+ ).toBe(true)
139
+ expect(JSON.stringify(events)).not.toContain('__namzuTruncated')
140
+
141
+ const completedTool = events.find(
142
+ (event) => event.type === 'tool_completed' && event.toolUseId === 'toolu_write_1',
143
+ )
144
+ expect(completedTool).toMatchObject({
145
+ type: 'tool_completed',
146
+ toolName: 'write_file',
147
+ isError: true,
148
+ })
149
+ expect(completedTool?.type === 'tool_completed' ? completedTool.result : '').toContain(
150
+ 'call was cut off',
151
+ )
152
+ expect(completedTool?.type === 'tool_completed' ? completedTool.result : '').toContain(
153
+ 'extend it with edit using insertLine',
154
+ )
155
+ })
156
+ })
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Synthetic user prompt injected by the iteration loop when a turn
3
+ * ends with `stop_reason: max_tokens` AND no tool_use. Mirrors
4
+ * Claude.ai's "Continue" affordance: the loop pushes this message
5
+ * back into the conversation and fires another iteration, letting
6
+ * the model pick up where it was cut off.
7
+ *
8
+ * The exact string is the marker used by `resolveResult` (in
9
+ * `manager/run/persistence.ts`) to detect auto-continuation
10
+ * boundaries: when walking the message tail it skips user messages
11
+ * that match this constant verbatim, so the run's `result` field
12
+ * concatenates the full multi-turn assistant output instead of only
13
+ * surfacing the trailing continuation chunk.
14
+ */
15
+ export const AUTO_CONTINUATION_USER_MESSAGE =
16
+ 'Continue exactly where you left off. Do not repeat content you already wrote — pick up at the next token.'
@@ -7,13 +7,18 @@ import { type ProbeRegistry, probe as defaultProbeRegistry } from '../../probe/r
7
7
  import type { ActivityStore } from '../../store/activity/memory.js'
8
8
  import type { RunId } from '../../types/ids/index.js'
9
9
  import type { InvocationState } from '../../types/invocation/index.js'
10
- import { type Message, createToolMessage } from '../../types/message/index.js'
10
+ import { type Message, type ToolCall, createToolMessage } from '../../types/message/index.js'
11
11
  import type { PermissionMode } from '../../types/permission/index.js'
12
12
  import type { PluginHookResult } from '../../types/plugin/index.js'
13
13
  import type { ChatCompletionResponse } from '../../types/provider/index.js'
14
14
  import type { RunEvent } from '../../types/run/index.js'
15
15
  import type { Sandbox } from '../../types/sandbox/index.js'
16
- import type { ToolContext, ToolRegistryContract, ToolResult } from '../../types/tool/index.js'
16
+ import type {
17
+ FileReadTracker,
18
+ ToolContext,
19
+ ToolRegistryContract,
20
+ ToolResult,
21
+ } from '../../types/tool/index.js'
17
22
  import type { Logger } from '../../utils/logger.js'
18
23
  import { compressShellOutput } from '../../utils/shell-compress.js'
19
24
 
@@ -26,6 +31,7 @@ export interface ToolExecutorConfig {
26
31
  permissionMode: PermissionMode
27
32
  env: Record<string, string>
28
33
  abortSignal: AbortSignal
34
+ allowedTools?: readonly string[]
29
35
  sandbox?: Sandbox
30
36
  invocationState?: InvocationState
31
37
  pluginManager?: PluginLifecycleManager
@@ -48,6 +54,13 @@ export class ToolExecutor {
48
54
  private log: Logger
49
55
  private workingStateManager?: WorkingStateManager
50
56
  private probes: ProbeRegistry
57
+ private readonly readPaths: Set<string> = new Set()
58
+ private readonly fileReadTracker: FileReadTracker = {
59
+ recordRead: (key: string) => {
60
+ this.readPaths.add(key)
61
+ },
62
+ hasRead: (key: string) => this.readPaths.has(key),
63
+ }
51
64
 
52
65
  constructor(
53
66
  config: ToolExecutorConfig,
@@ -83,11 +96,38 @@ export class ToolExecutor {
83
96
  tools: toolCalls.map((tc) => tc.function.name),
84
97
  })
85
98
 
86
- const toolContext = this.buildToolContext()
87
-
88
- const results = await Promise.all(
89
- toolCalls.map((toolCall) => this.executeSingle(toolCall, toolContext)),
90
- )
99
+ // One context per call so each execution can see its own
100
+ // `toolUseId`. The base context is built once; we spread + add
101
+ // per-call to keep allocations cheap.
102
+ const baseContext = this.buildToolContext()
103
+
104
+ // Respect each tool's `concurrencySafe` flag. Read-only tools
105
+ // (ls/grep/glob/…) run in parallel; tools that mutate shared state
106
+ // (edit/write/append/bash — `concurrencySafe: false`) are serialized in
107
+ // a single chain, so e.g. several `edit` calls to the SAME file in one
108
+ // turn apply one-after-another instead of racing read→modify→write
109
+ // (which let the last writer clobber the rest). Results are written by
110
+ // index to preserve the original tool-call order.
111
+ const results: Array<{ toolCallId: string; output: string }> = new Array(toolCalls.length)
112
+ const parallel: Promise<void>[] = []
113
+ let serial: Promise<void> = Promise.resolve()
114
+ toolCalls.forEach((toolCall, i) => {
115
+ const ctx = { ...baseContext, toolUseId: toolCall.id }
116
+ const run = async () => {
117
+ results[i] = await this.executeSingle(toolCall, ctx)
118
+ }
119
+ let input: unknown = {}
120
+ try {
121
+ input = JSON.parse(toolCall.function.arguments || '{}')
122
+ } catch {
123
+ // non-JSON args → treat as unsafe (serialize), the conservative path
124
+ }
125
+ const safe =
126
+ this.config.tools.get(toolCall.function.name)?.isConcurrencySafe?.(input) === true
127
+ if (safe) parallel.push(run())
128
+ else serial = serial.then(run)
129
+ })
130
+ await Promise.all([...parallel, serial])
91
131
 
92
132
  const messages: Message[] = results.map((r) => createToolMessage(r.output, r.toolCallId))
93
133
 
@@ -108,19 +148,38 @@ export class ToolExecutor {
108
148
  },
109
149
  invocationState: this.config.invocationState,
110
150
  toolRegistry: this.config.tools,
151
+ allowedTools: this.config.allowedTools,
111
152
  sandbox: this.config.sandbox,
153
+ fileReadTracker: this.fileReadTracker,
112
154
  }
113
155
  }
114
156
 
115
157
  private async executeSingle(
116
- toolCall: {
117
- id: string
118
- type: string
119
- function: { name: string; arguments: string }
120
- },
158
+ toolCall: ToolCall,
121
159
  toolContext: ToolContext,
122
160
  ): Promise<{ toolCallId: string; output: string }> {
123
161
  const toolName = toolCall.function.name
162
+
163
+ if (toolCall.metadata?.inputTruncated === true) {
164
+ const message = truncatedToolInputMessage(toolName)
165
+ await this.emitEvent({
166
+ type: 'tool_executing',
167
+ runId: this.config.runId,
168
+ toolUseId: toolCall.id,
169
+ toolName,
170
+ input: {},
171
+ })
172
+ await this.emitEvent({
173
+ type: 'tool_completed',
174
+ runId: this.config.runId,
175
+ toolUseId: toolCall.id,
176
+ toolName,
177
+ result: message,
178
+ isError: true,
179
+ })
180
+ return { toolCallId: toolCall.id, output: message }
181
+ }
182
+
124
183
  let input: unknown
125
184
 
126
185
  try {
@@ -238,7 +297,7 @@ export class ToolExecutor {
238
297
 
239
298
  const rawOutput = result.success
240
299
  ? result.output
241
- : `Error: ${result.error ?? 'Tool execution failed'}`
300
+ : formatFailedToolOutput(result.output, result.error)
242
301
 
243
302
  let output = result.success ? this.maybeCompress(toolName, rawOutput) : rawOutput
244
303
 
@@ -430,3 +489,13 @@ export class ToolExecutor {
430
489
  return compressed
431
490
  }
432
491
  }
492
+
493
+ function formatFailedToolOutput(output: string | undefined, error: string | undefined): string {
494
+ const errorText = `Error: ${error ?? 'Tool execution failed'}`
495
+ if (!output || output.trim().length === 0) return errorText
496
+ return `${output}\n\n${errorText}`
497
+ }
498
+
499
+ function truncatedToolInputMessage(toolName: string): string {
500
+ return `Error: Tool "${toolName}" call was cut off while the model was streaming JSON arguments. The tool was NOT executed. Retry with a much shorter input. Self-budget any content/newStr payload under 12000 characters before calling file tools. For long files, create a short opening with write, then extend it with edit using insertLine: "end" in bounded section chunks; for delegated work, pass a shared workspace filename/reference instead of embedding the content in the tool call.`
501
+ }
@@ -224,6 +224,8 @@ export async function* query(params: QueryParams): AsyncGenerator<RunEvent, Run>
224
224
  }
225
225
  }
226
226
 
227
+ const effectiveAllowedTools = withDeferredDiscoveryTool(params.tools, params.allowedTools)
228
+
227
229
  const toolExecutor = ToolingBootstrap.init(
228
230
  {
229
231
  tools: params.tools,
@@ -232,6 +234,7 @@ export async function* query(params: QueryParams): AsyncGenerator<RunEvent, Run>
232
234
  permissionMode: ctx.permissionMode,
233
235
  env: params.runConfig.env ?? {},
234
236
  abortSignal: ctx.abortController.signal,
237
+ allowedTools: effectiveAllowedTools,
235
238
  invocationState: params.invocationState,
236
239
  pluginManager: params.pluginManager,
237
240
  },
@@ -252,7 +255,7 @@ export async function* query(params: QueryParams): AsyncGenerator<RunEvent, Run>
252
255
  skills: params.skills,
253
256
  basePrompt: params.basePrompt,
254
257
  tools: params.tools,
255
- allowedTools: params.allowedTools,
258
+ allowedTools: effectiveAllowedTools,
256
259
  runtimeContext: params.runtimeContext,
257
260
  })
258
261
 
@@ -312,7 +315,7 @@ export async function* query(params: QueryParams): AsyncGenerator<RunEvent, Run>
312
315
  provider: params.provider,
313
316
  runConfig: params.runConfig,
314
317
  tools: params.tools,
315
- allowedTools: params.allowedTools,
318
+ allowedTools: effectiveAllowedTools,
316
319
  taskGateway: params.taskGateway,
317
320
  taskStore: params.taskStore,
318
321
  launchedTasks: params.launchedTasks,
@@ -370,7 +373,7 @@ export async function* query(params: QueryParams): AsyncGenerator<RunEvent, Run>
370
373
  skills: params.skills,
371
374
  basePrompt: contextLevel === 'full' ? params.basePrompt : undefined,
372
375
  tools: params.tools,
373
- allowedTools: params.allowedTools,
376
+ allowedTools: effectiveAllowedTools,
374
377
  runtimeContext: params.runtimeContext,
375
378
  }
376
379
 
@@ -536,3 +539,21 @@ export async function drainQuery(
536
539
 
537
540
  return result.value
538
541
  }
542
+
543
+ function withDeferredDiscoveryTool(
544
+ tools: ToolRegistryContract,
545
+ allowedTools?: string[],
546
+ ): string[] | undefined {
547
+ if (!allowedTools) return undefined
548
+ if (allowedTools.includes(SearchToolsTool.name)) return allowedTools
549
+
550
+ const allowedHasDeferred = allowedTools.some(
551
+ (name) => tools.has(name) && tools.getAvailability(name) === 'deferred',
552
+ )
553
+ if (!allowedHasDeferred) return allowedTools
554
+
555
+ if (!tools.has(SearchToolsTool.name)) return allowedTools
556
+ if (tools.getAvailability(SearchToolsTool.name) !== 'active') return allowedTools
557
+
558
+ return [...allowedTools, SearchToolsTool.name]
559
+ }