@namzu/sdk 0.6.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +362 -0
- package/dist/advisory/executor.d.ts.map +1 -1
- package/dist/advisory/executor.js +9 -2
- package/dist/advisory/executor.js.map +1 -1
- package/dist/advisory/executor.test.d.ts +2 -1
- package/dist/advisory/executor.test.d.ts.map +1 -1
- package/dist/advisory/executor.test.js +7 -4
- package/dist/advisory/executor.test.js.map +1 -1
- package/dist/agents/ReactiveAgent.d.ts.map +1 -1
- package/dist/agents/ReactiveAgent.js +2 -0
- package/dist/agents/ReactiveAgent.js.map +1 -1
- package/dist/agents/SupervisorAgent.d.ts.map +1 -1
- package/dist/agents/SupervisorAgent.js +7 -0
- package/dist/agents/SupervisorAgent.js.map +1 -1
- package/dist/bridge/sse/mapper.test.js +2 -2
- package/dist/constants/compaction/index.d.ts.map +1 -1
- package/dist/constants/compaction/index.js +8 -3
- package/dist/constants/compaction/index.js.map +1 -1
- package/dist/constants/sandbox/index.d.ts +21 -0
- package/dist/constants/sandbox/index.d.ts.map +1 -1
- package/dist/constants/sandbox/index.js +30 -0
- package/dist/constants/sandbox/index.js.map +1 -1
- package/dist/constants/tools/index.d.ts.map +1 -1
- package/dist/constants/tools/index.js +33 -2
- package/dist/constants/tools/index.js.map +1 -1
- package/dist/manager/run/persistence.d.ts.map +1 -1
- package/dist/manager/run/persistence.js +35 -5
- package/dist/manager/run/persistence.js.map +1 -1
- package/dist/persona/assembler.d.ts +1 -0
- package/dist/persona/assembler.d.ts.map +1 -1
- package/dist/persona/assembler.js +28 -6
- package/dist/persona/assembler.js.map +1 -1
- package/dist/provider/collect.test.js +2 -2
- package/dist/public-runtime.d.ts +5 -4
- package/dist/public-runtime.d.ts.map +1 -1
- package/dist/public-runtime.js +5 -4
- package/dist/public-runtime.js.map +1 -1
- package/dist/public-tools.d.ts +2 -0
- package/dist/public-tools.d.ts.map +1 -1
- package/dist/public-tools.js +2 -0
- package/dist/public-tools.js.map +1 -1
- package/dist/public-types.d.ts +3 -0
- package/dist/public-types.d.ts.map +1 -1
- package/dist/registry/index.d.ts +2 -0
- package/dist/registry/index.d.ts.map +1 -1
- package/dist/registry/index.js +1 -0
- package/dist/registry/index.js.map +1 -1
- package/dist/registry/tool/execute.d.ts.map +1 -1
- package/dist/registry/tool/execute.js +87 -5
- package/dist/registry/tool/execute.js.map +1 -1
- package/dist/registry/tool/execute.test.d.ts +4 -2
- package/dist/registry/tool/execute.test.d.ts.map +1 -1
- package/dist/registry/tool/execute.test.js +112 -3
- package/dist/registry/tool/execute.test.js.map +1 -1
- package/dist/registry/toolset/catalog.d.ts +42 -0
- package/dist/registry/toolset/catalog.d.ts.map +1 -0
- package/dist/registry/toolset/catalog.js +217 -0
- package/dist/registry/toolset/catalog.js.map +1 -0
- package/dist/registry/toolset/catalog.test.d.ts +2 -0
- package/dist/registry/toolset/catalog.test.d.ts.map +1 -0
- package/dist/registry/toolset/catalog.test.js +85 -0
- package/dist/registry/toolset/catalog.test.js.map +1 -0
- package/dist/runtime/query/__tests__/deferred-tools.test.d.ts +2 -0
- package/dist/runtime/query/__tests__/deferred-tools.test.d.ts.map +1 -0
- package/dist/runtime/query/__tests__/deferred-tools.test.js +147 -0
- package/dist/runtime/query/__tests__/deferred-tools.test.js.map +1 -0
- package/dist/runtime/query/__tests__/executor-concurrency.test.d.ts +2 -0
- package/dist/runtime/query/__tests__/executor-concurrency.test.d.ts.map +1 -0
- package/dist/runtime/query/__tests__/executor-concurrency.test.js +98 -0
- package/dist/runtime/query/__tests__/executor-concurrency.test.js.map +1 -0
- package/dist/runtime/query/__tests__/executor-plugin-hooks.test.js +38 -3
- package/dist/runtime/query/__tests__/executor-plugin-hooks.test.js.map +1 -1
- package/dist/runtime/query/__tests__/prompt.test.js +47 -2
- package/dist/runtime/query/__tests__/prompt.test.js.map +1 -1
- package/dist/runtime/query/__tests__/stream-recovery.test.d.ts +2 -0
- package/dist/runtime/query/__tests__/stream-recovery.test.d.ts.map +1 -0
- package/dist/runtime/query/__tests__/stream-recovery.test.js +126 -0
- package/dist/runtime/query/__tests__/stream-recovery.test.js.map +1 -0
- package/dist/runtime/query/continuation.d.ts +16 -0
- package/dist/runtime/query/continuation.d.ts.map +1 -0
- package/dist/runtime/query/continuation.js +16 -0
- package/dist/runtime/query/continuation.js.map +1 -0
- package/dist/runtime/query/executor.d.ts +3 -0
- package/dist/runtime/query/executor.d.ts.map +1 -1
- package/dist/runtime/query/executor.js +71 -3
- package/dist/runtime/query/executor.js.map +1 -1
- package/dist/runtime/query/index.d.ts.map +1 -1
- package/dist/runtime/query/index.js +19 -3
- package/dist/runtime/query/index.js.map +1 -1
- package/dist/runtime/query/iteration/index.d.ts +22 -0
- package/dist/runtime/query/iteration/index.d.ts.map +1 -1
- package/dist/runtime/query/iteration/index.js +227 -60
- package/dist/runtime/query/iteration/index.js.map +1 -1
- package/dist/runtime/query/iteration/phases/context.d.ts +10 -0
- package/dist/runtime/query/iteration/phases/context.d.ts.map +1 -1
- package/dist/runtime/query/iteration/phases/context.js.map +1 -1
- package/dist/runtime/query/prompt.d.ts.map +1 -1
- package/dist/runtime/query/prompt.js +21 -1
- package/dist/runtime/query/prompt.js.map +1 -1
- package/dist/runtime/query/tooling.d.ts +1 -0
- package/dist/runtime/query/tooling.d.ts.map +1 -1
- package/dist/runtime/query/tooling.js +1 -0
- package/dist/runtime/query/tooling.js.map +1 -1
- package/dist/sandbox/provider/local.d.ts.map +1 -1
- package/dist/sandbox/provider/local.js +32 -1
- package/dist/sandbox/provider/local.js.map +1 -1
- package/dist/session/workspace/__tests__/shared-run.test.d.ts +2 -0
- package/dist/session/workspace/__tests__/shared-run.test.d.ts.map +1 -0
- package/dist/session/workspace/__tests__/shared-run.test.js +147 -0
- package/dist/session/workspace/__tests__/shared-run.test.js.map +1 -0
- package/dist/session/workspace/index.d.ts +2 -0
- package/dist/session/workspace/index.d.ts.map +1 -1
- package/dist/session/workspace/index.js +1 -0
- package/dist/session/workspace/index.js.map +1 -1
- package/dist/session/workspace/shared-run.d.ts +81 -0
- package/dist/session/workspace/shared-run.d.ts.map +1 -0
- package/dist/session/workspace/shared-run.js +251 -0
- package/dist/session/workspace/shared-run.js.map +1 -0
- package/dist/skills/loader.d.ts.map +1 -1
- package/dist/skills/loader.js +36 -6
- package/dist/skills/loader.js.map +1 -1
- package/dist/skills/loader.test.d.ts +2 -0
- package/dist/skills/loader.test.d.ts.map +1 -0
- package/dist/skills/loader.test.js +65 -0
- package/dist/skills/loader.test.js.map +1 -0
- package/dist/streaming/coalesce.test.js +1 -1
- package/dist/tools/builtins/__tests__/edit.test.d.ts +2 -0
- package/dist/tools/builtins/__tests__/edit.test.d.ts.map +1 -0
- package/dist/tools/builtins/__tests__/edit.test.js +38 -0
- package/dist/tools/builtins/__tests__/edit.test.js.map +1 -0
- package/dist/tools/builtins/__tests__/payload-budget.test.d.ts +2 -0
- package/dist/tools/builtins/__tests__/payload-budget.test.d.ts.map +1 -0
- package/dist/tools/builtins/__tests__/payload-budget.test.js +22 -0
- package/dist/tools/builtins/__tests__/payload-budget.test.js.map +1 -0
- package/dist/tools/builtins/__tests__/read-file.test.d.ts +2 -0
- package/dist/tools/builtins/__tests__/read-file.test.d.ts.map +1 -0
- package/dist/tools/builtins/__tests__/read-file.test.js +24 -0
- package/dist/tools/builtins/__tests__/read-file.test.js.map +1 -0
- package/dist/tools/builtins/__tests__/verify-outputs.test.d.ts +2 -0
- package/dist/tools/builtins/__tests__/verify-outputs.test.d.ts.map +1 -0
- package/dist/tools/builtins/__tests__/verify-outputs.test.js +52 -0
- package/dist/tools/builtins/__tests__/verify-outputs.test.js.map +1 -0
- package/dist/tools/builtins/__tests__/write-file.test.d.ts +2 -0
- package/dist/tools/builtins/__tests__/write-file.test.d.ts.map +1 -0
- package/dist/tools/builtins/__tests__/write-file.test.js +74 -0
- package/dist/tools/builtins/__tests__/write-file.test.js.map +1 -0
- package/dist/tools/builtins/bash.d.ts.map +1 -1
- package/dist/tools/builtins/bash.js +40 -7
- package/dist/tools/builtins/bash.js.map +1 -1
- package/dist/tools/builtins/edit.d.ts +5 -2
- package/dist/tools/builtins/edit.d.ts.map +1 -1
- package/dist/tools/builtins/edit.js +114 -18
- package/dist/tools/builtins/edit.js.map +1 -1
- package/dist/tools/builtins/index.d.ts +1 -0
- package/dist/tools/builtins/index.d.ts.map +1 -1
- package/dist/tools/builtins/index.js +13 -13
- package/dist/tools/builtins/index.js.map +1 -1
- package/dist/tools/builtins/read-file.d.ts +1 -0
- package/dist/tools/builtins/read-file.d.ts.map +1 -1
- package/dist/tools/builtins/read-file.js +23 -8
- package/dist/tools/builtins/read-file.js.map +1 -1
- package/dist/tools/builtins/search-tools.d.ts.map +1 -1
- package/dist/tools/builtins/search-tools.js +4 -1
- package/dist/tools/builtins/search-tools.js.map +1 -1
- package/dist/tools/builtins/verify-outputs.d.ts +5 -0
- package/dist/tools/builtins/verify-outputs.d.ts.map +1 -0
- package/dist/tools/builtins/verify-outputs.js +103 -0
- package/dist/tools/builtins/verify-outputs.js.map +1 -0
- package/dist/tools/builtins/write-file.d.ts +3 -2
- package/dist/tools/builtins/write-file.d.ts.map +1 -1
- package/dist/tools/builtins/write-file.js +72 -12
- package/dist/tools/builtins/write-file.js.map +1 -1
- package/dist/tools/coordinator/__tests__/agent.test.d.ts +15 -0
- package/dist/tools/coordinator/__tests__/agent.test.d.ts.map +1 -0
- package/dist/tools/coordinator/__tests__/agent.test.js +142 -0
- package/dist/tools/coordinator/__tests__/agent.test.js.map +1 -0
- package/dist/tools/coordinator/__tests__/task-list.test.d.ts +13 -0
- package/dist/tools/coordinator/__tests__/task-list.test.d.ts.map +1 -0
- package/dist/tools/coordinator/__tests__/task-list.test.js +162 -0
- package/dist/tools/coordinator/__tests__/task-list.test.js.map +1 -0
- package/dist/tools/coordinator/agent.d.ts +34 -0
- package/dist/tools/coordinator/agent.d.ts.map +1 -0
- package/dist/tools/coordinator/agent.js +107 -0
- package/dist/tools/coordinator/agent.js.map +1 -0
- package/dist/tools/coordinator/index.d.ts +7 -0
- package/dist/tools/coordinator/index.d.ts.map +1 -1
- package/dist/tools/coordinator/index.js +111 -21
- package/dist/tools/coordinator/index.js.map +1 -1
- package/dist/types/agent/base.d.ts +8 -0
- package/dist/types/agent/base.d.ts.map +1 -1
- package/dist/types/agent/reactive.d.ts +23 -0
- package/dist/types/agent/reactive.d.ts.map +1 -1
- package/dist/types/agent/supervisor.d.ts +14 -0
- package/dist/types/agent/supervisor.d.ts.map +1 -1
- package/dist/types/message/index.d.ts +22 -1
- package/dist/types/message/index.d.ts.map +1 -1
- package/dist/types/message/index.js +7 -2
- package/dist/types/message/index.js.map +1 -1
- package/dist/types/provider/chat.d.ts +2 -9
- package/dist/types/provider/chat.d.ts.map +1 -1
- package/dist/types/run/events.d.ts +6 -0
- package/dist/types/run/events.d.ts.map +1 -1
- package/dist/types/run/events.js.map +1 -1
- package/dist/types/sandbox/index.d.ts +193 -0
- package/dist/types/sandbox/index.d.ts.map +1 -1
- package/dist/types/sandbox/index.js.map +1 -1
- package/dist/types/skills/index.d.ts +2 -0
- package/dist/types/skills/index.d.ts.map +1 -1
- package/dist/types/tool/index.d.ts +22 -0
- package/dist/types/tool/index.d.ts.map +1 -1
- package/dist/types/toolset/index.d.ts +71 -0
- package/dist/types/toolset/index.d.ts.map +1 -0
- package/dist/types/toolset/index.js +2 -0
- package/dist/types/toolset/index.js.map +1 -0
- package/dist/types/workspace/index.d.ts +1 -0
- package/dist/types/workspace/index.d.ts.map +1 -1
- package/dist/types/workspace/shared-run.d.ts +61 -0
- package/dist/types/workspace/shared-run.d.ts.map +1 -0
- package/dist/types/workspace/shared-run.js +2 -0
- package/dist/types/workspace/shared-run.js.map +1 -0
- package/dist/verification/index.d.ts +1 -0
- package/dist/verification/index.d.ts.map +1 -1
- package/dist/verification/index.js +1 -0
- package/dist/verification/index.js.map +1 -1
- package/dist/verification/presets.d.ts +53 -0
- package/dist/verification/presets.d.ts.map +1 -0
- package/dist/verification/presets.js +70 -0
- package/dist/verification/presets.js.map +1 -0
- package/dist/verification/presets.test.d.ts +16 -0
- package/dist/verification/presets.test.d.ts.map +1 -0
- package/dist/verification/presets.test.js +79 -0
- package/dist/verification/presets.test.js.map +1 -0
- package/package.json +3 -2
- package/src/advisory/executor.test.ts +7 -4
- package/src/advisory/executor.ts +11 -2
- package/src/agents/ReactiveAgent.ts +2 -0
- package/src/agents/SupervisorAgent.ts +7 -0
- package/src/bridge/sse/mapper.test.ts +2 -2
- package/src/constants/compaction/index.ts +8 -3
- package/src/constants/sandbox/index.ts +37 -0
- package/src/constants/tools/index.ts +33 -2
- package/src/manager/run/persistence.ts +34 -6
- package/src/persona/assembler.ts +31 -8
- package/src/provider/collect.test.ts +2 -2
- package/src/public-runtime.ts +14 -1
- package/src/public-tools.ts +2 -0
- package/src/public-types.ts +7 -0
- package/src/registry/index.ts +7 -0
- package/src/registry/tool/execute.test.ts +132 -3
- package/src/registry/tool/execute.ts +94 -9
- package/src/registry/toolset/catalog.test.ts +97 -0
- package/src/registry/toolset/catalog.ts +283 -0
- package/src/runtime/query/__tests__/deferred-tools.test.ts +183 -0
- package/src/runtime/query/__tests__/executor-concurrency.test.ts +122 -0
- package/src/runtime/query/__tests__/executor-plugin-hooks.test.ts +48 -3
- package/src/runtime/query/__tests__/prompt.test.ts +51 -2
- package/src/runtime/query/__tests__/stream-recovery.test.ts +156 -0
- package/src/runtime/query/continuation.ts +16 -0
- package/src/runtime/query/executor.ts +82 -13
- package/src/runtime/query/index.ts +24 -3
- package/src/runtime/query/iteration/index.ts +263 -68
- package/src/runtime/query/iteration/phases/context.ts +10 -0
- package/src/runtime/query/prompt.ts +17 -1
- package/src/runtime/query/tooling.ts +2 -0
- package/src/sandbox/provider/local.ts +33 -0
- package/src/session/workspace/__tests__/shared-run.test.ts +181 -0
- package/src/session/workspace/index.ts +6 -0
- package/src/session/workspace/shared-run.ts +316 -0
- package/src/skills/loader.test.ts +89 -0
- package/src/skills/loader.ts +37 -6
- package/src/streaming/coalesce.test.ts +1 -1
- package/src/tools/builtins/__tests__/edit.test.ts +57 -0
- package/src/tools/builtins/__tests__/payload-budget.test.ts +29 -0
- package/src/tools/builtins/__tests__/read-file.test.ts +31 -0
- package/src/tools/builtins/__tests__/verify-outputs.test.ts +71 -0
- package/src/tools/builtins/__tests__/write-file.test.ts +97 -0
- package/src/tools/builtins/bash.ts +48 -7
- package/src/tools/builtins/edit.ts +162 -27
- package/src/tools/builtins/index.ts +13 -13
- package/src/tools/builtins/read-file.ts +31 -8
- package/src/tools/builtins/search-tools.ts +5 -1
- package/src/tools/builtins/verify-outputs.ts +126 -0
- package/src/tools/builtins/write-file.ts +83 -14
- package/src/tools/coordinator/__tests__/agent.test.ts +172 -0
- package/src/tools/coordinator/__tests__/task-list.test.ts +182 -0
- package/src/tools/coordinator/agent.ts +157 -0
- package/src/tools/coordinator/index.ts +128 -22
- package/src/types/agent/base.ts +8 -0
- package/src/types/agent/reactive.ts +25 -0
- package/src/types/agent/supervisor.ts +16 -0
- package/src/types/message/index.ts +32 -2
- package/src/types/provider/chat.ts +2 -9
- package/src/types/run/events.ts +6 -0
- package/src/types/sandbox/index.ts +219 -0
- package/src/types/skills/index.ts +4 -0
- package/src/types/tool/index.ts +24 -0
- package/src/types/toolset/index.ts +86 -0
- package/src/types/workspace/index.ts +9 -0
- package/src/types/workspace/shared-run.ts +65 -0
- package/src/verification/index.ts +1 -0
- package/src/verification/presets.test.ts +112 -0
- package/src/verification/presets.ts +72 -0
|
@@ -18,7 +18,10 @@ function makeLogger(): Logger {
|
|
|
18
18
|
error: vi.fn(),
|
|
19
19
|
debug: vi.fn(),
|
|
20
20
|
}
|
|
21
|
-
return {
|
|
21
|
+
return {
|
|
22
|
+
...stub,
|
|
23
|
+
child: vi.fn(() => ({ ...stub, child: vi.fn() })),
|
|
24
|
+
} as unknown as Logger
|
|
22
25
|
}
|
|
23
26
|
|
|
24
27
|
function makeToolRegistry(execute: ToolRegistryContract['execute']): ToolRegistryContract {
|
|
@@ -93,6 +96,42 @@ describe('ToolExecutor plugin hooks', () => {
|
|
|
93
96
|
expect(batch.results[0]?.output).toBe('ok')
|
|
94
97
|
})
|
|
95
98
|
|
|
99
|
+
it('preserves tool stdout/stderr when a tool exits unsuccessfully', async () => {
|
|
100
|
+
const tools = makeToolRegistry(
|
|
101
|
+
vi.fn(async () => ({
|
|
102
|
+
success: false,
|
|
103
|
+
output: 'STDOUT:\npartial result\n\nSTDERR:\nboom',
|
|
104
|
+
error: 'Command exited with code 1',
|
|
105
|
+
})),
|
|
106
|
+
)
|
|
107
|
+
const exec = new ToolExecutor(
|
|
108
|
+
{
|
|
109
|
+
tools,
|
|
110
|
+
runId: mockRunId,
|
|
111
|
+
workingDirectory: '/tmp',
|
|
112
|
+
permissionMode: 'auto',
|
|
113
|
+
env: {},
|
|
114
|
+
abortSignal: new AbortController().signal,
|
|
115
|
+
},
|
|
116
|
+
activityStore,
|
|
117
|
+
emitEvent,
|
|
118
|
+
makeLogger(),
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
const batch = await exec.executeBatch(buildResponse('bash', { command: 'false' }))
|
|
122
|
+
expect(batch.results[0]?.output).toContain('STDOUT:\npartial result')
|
|
123
|
+
expect(batch.results[0]?.output).toContain('STDERR:\nboom')
|
|
124
|
+
expect(batch.results[0]?.output).toContain('Error: Command exited with code 1')
|
|
125
|
+
|
|
126
|
+
const completed = emitted.find((e) => e.type === 'tool_completed')
|
|
127
|
+
expect(completed).toMatchObject({
|
|
128
|
+
type: 'tool_completed',
|
|
129
|
+
toolName: 'bash',
|
|
130
|
+
result: expect.stringContaining('STDOUT:\npartial result'),
|
|
131
|
+
isError: true,
|
|
132
|
+
})
|
|
133
|
+
})
|
|
134
|
+
|
|
96
135
|
it('replaces input on pre_tool_use modify', async () => {
|
|
97
136
|
const executeMock = vi.fn(async () => ({ success: true, output: 'ok' }))
|
|
98
137
|
const tools = makeToolRegistry(executeMock)
|
|
@@ -120,7 +159,10 @@ describe('ToolExecutor plugin hooks', () => {
|
|
|
120
159
|
})
|
|
121
160
|
|
|
122
161
|
it('skips registry execution and synthesizes output on pre_tool_use skip', async () => {
|
|
123
|
-
const executeMock = vi.fn(async () => ({
|
|
162
|
+
const executeMock = vi.fn(async () => ({
|
|
163
|
+
success: true,
|
|
164
|
+
output: 'should-not-run',
|
|
165
|
+
}))
|
|
124
166
|
const tools = makeToolRegistry(executeMock)
|
|
125
167
|
const pluginManager = makePluginManager(async (event) =>
|
|
126
168
|
event === 'pre_tool_use'
|
|
@@ -223,7 +265,10 @@ describe('ToolExecutor plugin hooks', () => {
|
|
|
223
265
|
})
|
|
224
266
|
|
|
225
267
|
it('carries modified input into synthetic skip outcome (modify -> skip chain)', async () => {
|
|
226
|
-
const executeMock = vi.fn(async () => ({
|
|
268
|
+
const executeMock = vi.fn(async () => ({
|
|
269
|
+
success: true,
|
|
270
|
+
output: 'should-not-run',
|
|
271
|
+
}))
|
|
227
272
|
const tools = makeToolRegistry(executeMock)
|
|
228
273
|
const { PluginLifecycleManager } = await import('../../../plugin/lifecycle.js')
|
|
229
274
|
const realManager = new PluginLifecycleManager({
|
|
@@ -25,7 +25,7 @@ describe('PromptBuilder runtime context', () => {
|
|
|
25
25
|
label: 'test runtime',
|
|
26
26
|
outputDirectory: 'outputs/',
|
|
27
27
|
outputFileMarker: 'OUTPUT_FILE: <filename> - <description>',
|
|
28
|
-
notes: ['
|
|
28
|
+
notes: ['Register generated files after the turn.'],
|
|
29
29
|
},
|
|
30
30
|
}).build('full', '/tmp/work')
|
|
31
31
|
|
|
@@ -33,6 +33,55 @@ describe('PromptBuilder runtime context', () => {
|
|
|
33
33
|
expect(prompt).toContain('Working directory: /tmp/work')
|
|
34
34
|
expect(prompt).toContain('Output directory: outputs/')
|
|
35
35
|
expect(prompt).toContain('OUTPUT_FILE: <filename> - <description>')
|
|
36
|
-
expect(prompt).toContain('
|
|
36
|
+
expect(prompt).toContain('Register generated files after the turn.')
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
it('discloses available skills even when the host supplies a systemPrompt', () => {
|
|
40
|
+
const prompt = new PromptBuilder({
|
|
41
|
+
systemPrompt: 'You are a project assistant.',
|
|
42
|
+
tools: makeToolRegistry(),
|
|
43
|
+
skills: [
|
|
44
|
+
{
|
|
45
|
+
metadata: {
|
|
46
|
+
name: 'project-documents',
|
|
47
|
+
description: 'Draft and edit project documents from grounded inputs.',
|
|
48
|
+
},
|
|
49
|
+
dirPath: '/repo/.agents/skills/project-documents',
|
|
50
|
+
},
|
|
51
|
+
],
|
|
52
|
+
}).build('full', '/tmp/work')
|
|
53
|
+
|
|
54
|
+
expect(prompt).toContain('You are a project assistant.')
|
|
55
|
+
expect(prompt).toContain('## Available Skills')
|
|
56
|
+
expect(prompt).toContain('project-documents')
|
|
57
|
+
expect(prompt).toContain('Draft and edit project documents')
|
|
58
|
+
expect(prompt).not.toContain('## Loaded Skills')
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
it('includes loaded skill bodies with systemPrompt while preserving the metadata catalogue', () => {
|
|
62
|
+
const prompt = new PromptBuilder({
|
|
63
|
+
systemPrompt: 'You are a cowork supervisor.',
|
|
64
|
+
tools: makeToolRegistry(),
|
|
65
|
+
skills: [
|
|
66
|
+
{
|
|
67
|
+
metadata: {
|
|
68
|
+
name: 'long-form-files',
|
|
69
|
+
description: 'Create long files with bounded edit chunks.',
|
|
70
|
+
license: 'MIT',
|
|
71
|
+
compatibility: 'Requires file tools',
|
|
72
|
+
allowedTools: 'read write edit',
|
|
73
|
+
},
|
|
74
|
+
body: 'Use skeleton-first writes and bounded edit chunks.',
|
|
75
|
+
dirPath: '/repo/.agents/skills/long-form-files',
|
|
76
|
+
},
|
|
77
|
+
],
|
|
78
|
+
}).build('full', '/tmp/work')
|
|
79
|
+
|
|
80
|
+
expect(prompt).toContain('## Available Skills')
|
|
81
|
+
expect(prompt).toContain('license: MIT')
|
|
82
|
+
expect(prompt).toContain('compatibility: Requires file tools')
|
|
83
|
+
expect(prompt).toContain('allowed-tools: read write edit')
|
|
84
|
+
expect(prompt).toContain('## Loaded Skills')
|
|
85
|
+
expect(prompt).toContain('Use skeleton-first writes')
|
|
37
86
|
})
|
|
38
87
|
})
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import { mkdtemp, rm } from 'node:fs/promises'
|
|
2
|
+
import { tmpdir } from 'node:os'
|
|
3
|
+
import { join } from 'node:path'
|
|
4
|
+
import { afterEach, describe, expect, it, vi } from 'vitest'
|
|
5
|
+
import { z } from 'zod'
|
|
6
|
+
|
|
7
|
+
import { ToolRegistry } from '../../../registry/tool/execute.js'
|
|
8
|
+
import type { SessionId, TenantId } from '../../../types/ids/index.js'
|
|
9
|
+
import { createUserMessage } from '../../../types/message/index.js'
|
|
10
|
+
import type { LLMProvider, StreamChunk } from '../../../types/provider/index.js'
|
|
11
|
+
import type { RunEvent } from '../../../types/run/index.js'
|
|
12
|
+
import type { ProjectId, ThreadId } from '../../../types/session/ids.js'
|
|
13
|
+
import { drainQuery } from '../index.js'
|
|
14
|
+
|
|
15
|
+
const ZERO_USAGE = {
|
|
16
|
+
promptTokens: 0,
|
|
17
|
+
completionTokens: 0,
|
|
18
|
+
totalTokens: 0,
|
|
19
|
+
cachedTokens: 0,
|
|
20
|
+
cacheWriteTokens: 0,
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
class IdleDuringToolInputProvider implements LLMProvider {
|
|
24
|
+
readonly id = 'idle-during-tool-input'
|
|
25
|
+
readonly name = 'Idle During Tool Input Provider'
|
|
26
|
+
calls = 0
|
|
27
|
+
|
|
28
|
+
async *chatStream(): AsyncIterable<StreamChunk> {
|
|
29
|
+
this.calls += 1
|
|
30
|
+
|
|
31
|
+
if (this.calls === 1) {
|
|
32
|
+
yield {
|
|
33
|
+
id: 'msg_1',
|
|
34
|
+
delta: {
|
|
35
|
+
toolCalls: [
|
|
36
|
+
{
|
|
37
|
+
index: 0,
|
|
38
|
+
id: 'toolu_write_1',
|
|
39
|
+
type: 'function',
|
|
40
|
+
function: { name: 'write_file' },
|
|
41
|
+
},
|
|
42
|
+
],
|
|
43
|
+
},
|
|
44
|
+
}
|
|
45
|
+
yield {
|
|
46
|
+
id: 'msg_1',
|
|
47
|
+
delta: {
|
|
48
|
+
toolCalls: [
|
|
49
|
+
{
|
|
50
|
+
index: 0,
|
|
51
|
+
id: 'toolu_write_1',
|
|
52
|
+
function: {
|
|
53
|
+
arguments: '{"path":"/tmp/out.md","content":"partial',
|
|
54
|
+
},
|
|
55
|
+
},
|
|
56
|
+
],
|
|
57
|
+
},
|
|
58
|
+
}
|
|
59
|
+
throw new Error('Anthropic stream idle for 90s')
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
yield {
|
|
63
|
+
id: 'msg_2',
|
|
64
|
+
delta: { content: 'Recovered after retry guidance.' },
|
|
65
|
+
}
|
|
66
|
+
yield {
|
|
67
|
+
id: 'msg_2',
|
|
68
|
+
delta: {},
|
|
69
|
+
finishReason: 'stop',
|
|
70
|
+
usage: ZERO_USAGE,
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
describe('query stream recovery', () => {
|
|
76
|
+
let workdirs: string[] = []
|
|
77
|
+
|
|
78
|
+
afterEach(async () => {
|
|
79
|
+
await Promise.all(workdirs.map((dir) => rm(dir, { recursive: true, force: true })))
|
|
80
|
+
workdirs = []
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
it('turns an idle stream with partial tool JSON into retryable tool feedback', async () => {
|
|
84
|
+
const provider = new IdleDuringToolInputProvider()
|
|
85
|
+
const actualWrite = vi.fn(async () => ({ success: true, output: 'should not run' }))
|
|
86
|
+
const tools = new ToolRegistry()
|
|
87
|
+
tools.register({
|
|
88
|
+
name: 'write_file',
|
|
89
|
+
description: 'write a file',
|
|
90
|
+
inputSchema: z.object({
|
|
91
|
+
path: z.string(),
|
|
92
|
+
content: z.string(),
|
|
93
|
+
}),
|
|
94
|
+
execute: actualWrite,
|
|
95
|
+
})
|
|
96
|
+
const workingDirectory = await mkdtemp(join(tmpdir(), 'namzu-stream-recovery-'))
|
|
97
|
+
workdirs.push(workingDirectory)
|
|
98
|
+
const events: RunEvent[] = []
|
|
99
|
+
|
|
100
|
+
const run = await drainQuery(
|
|
101
|
+
{
|
|
102
|
+
provider,
|
|
103
|
+
tools,
|
|
104
|
+
runConfig: {
|
|
105
|
+
model: 'mock-model',
|
|
106
|
+
timeoutMs: 5_000,
|
|
107
|
+
tokenBudget: 100_000,
|
|
108
|
+
maxIterations: 3,
|
|
109
|
+
maxResponseTokens: 256,
|
|
110
|
+
},
|
|
111
|
+
agentId: 'agent_test',
|
|
112
|
+
agentName: 'Test Agent',
|
|
113
|
+
messages: [createUserMessage('write the file')],
|
|
114
|
+
workingDirectory,
|
|
115
|
+
sessionId: 'ses_stream_recovery' as SessionId,
|
|
116
|
+
threadId: 'thd_stream_recovery' as ThreadId,
|
|
117
|
+
projectId: 'prj_stream_recovery' as ProjectId,
|
|
118
|
+
tenantId: 'tnt_stream_recovery' as TenantId,
|
|
119
|
+
},
|
|
120
|
+
(event) => {
|
|
121
|
+
events.push(event)
|
|
122
|
+
},
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
expect(run.status).toBe('completed')
|
|
126
|
+
expect(run.result).toBe('Recovered after retry guidance.')
|
|
127
|
+
expect(provider.calls).toBe(2)
|
|
128
|
+
expect(actualWrite).not.toHaveBeenCalled()
|
|
129
|
+
|
|
130
|
+
expect(events.some((event) => event.type === 'run_failed')).toBe(false)
|
|
131
|
+
expect(
|
|
132
|
+
events.some(
|
|
133
|
+
(event) =>
|
|
134
|
+
event.type === 'tool_input_completed' &&
|
|
135
|
+
event.inputTruncated === true &&
|
|
136
|
+
JSON.stringify(event.input) === '{}',
|
|
137
|
+
),
|
|
138
|
+
).toBe(true)
|
|
139
|
+
expect(JSON.stringify(events)).not.toContain('__namzuTruncated')
|
|
140
|
+
|
|
141
|
+
const completedTool = events.find(
|
|
142
|
+
(event) => event.type === 'tool_completed' && event.toolUseId === 'toolu_write_1',
|
|
143
|
+
)
|
|
144
|
+
expect(completedTool).toMatchObject({
|
|
145
|
+
type: 'tool_completed',
|
|
146
|
+
toolName: 'write_file',
|
|
147
|
+
isError: true,
|
|
148
|
+
})
|
|
149
|
+
expect(completedTool?.type === 'tool_completed' ? completedTool.result : '').toContain(
|
|
150
|
+
'call was cut off',
|
|
151
|
+
)
|
|
152
|
+
expect(completedTool?.type === 'tool_completed' ? completedTool.result : '').toContain(
|
|
153
|
+
'extend it with edit using insertLine',
|
|
154
|
+
)
|
|
155
|
+
})
|
|
156
|
+
})
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Synthetic user prompt injected by the iteration loop when a turn
|
|
3
|
+
* ends with `stop_reason: max_tokens` AND no tool_use. Mirrors
|
|
4
|
+
* Claude.ai's "Continue" affordance: the loop pushes this message
|
|
5
|
+
* back into the conversation and fires another iteration, letting
|
|
6
|
+
* the model pick up where it was cut off.
|
|
7
|
+
*
|
|
8
|
+
* The exact string is the marker used by `resolveResult` (in
|
|
9
|
+
* `manager/run/persistence.ts`) to detect auto-continuation
|
|
10
|
+
* boundaries: when walking the message tail it skips user messages
|
|
11
|
+
* that match this constant verbatim, so the run's `result` field
|
|
12
|
+
* concatenates the full multi-turn assistant output instead of only
|
|
13
|
+
* surfacing the trailing continuation chunk.
|
|
14
|
+
*/
|
|
15
|
+
export const AUTO_CONTINUATION_USER_MESSAGE =
|
|
16
|
+
'Continue exactly where you left off. Do not repeat content you already wrote — pick up at the next token.'
|
|
@@ -7,13 +7,18 @@ import { type ProbeRegistry, probe as defaultProbeRegistry } from '../../probe/r
|
|
|
7
7
|
import type { ActivityStore } from '../../store/activity/memory.js'
|
|
8
8
|
import type { RunId } from '../../types/ids/index.js'
|
|
9
9
|
import type { InvocationState } from '../../types/invocation/index.js'
|
|
10
|
-
import { type Message, createToolMessage } from '../../types/message/index.js'
|
|
10
|
+
import { type Message, type ToolCall, createToolMessage } from '../../types/message/index.js'
|
|
11
11
|
import type { PermissionMode } from '../../types/permission/index.js'
|
|
12
12
|
import type { PluginHookResult } from '../../types/plugin/index.js'
|
|
13
13
|
import type { ChatCompletionResponse } from '../../types/provider/index.js'
|
|
14
14
|
import type { RunEvent } from '../../types/run/index.js'
|
|
15
15
|
import type { Sandbox } from '../../types/sandbox/index.js'
|
|
16
|
-
import type {
|
|
16
|
+
import type {
|
|
17
|
+
FileReadTracker,
|
|
18
|
+
ToolContext,
|
|
19
|
+
ToolRegistryContract,
|
|
20
|
+
ToolResult,
|
|
21
|
+
} from '../../types/tool/index.js'
|
|
17
22
|
import type { Logger } from '../../utils/logger.js'
|
|
18
23
|
import { compressShellOutput } from '../../utils/shell-compress.js'
|
|
19
24
|
|
|
@@ -26,6 +31,7 @@ export interface ToolExecutorConfig {
|
|
|
26
31
|
permissionMode: PermissionMode
|
|
27
32
|
env: Record<string, string>
|
|
28
33
|
abortSignal: AbortSignal
|
|
34
|
+
allowedTools?: readonly string[]
|
|
29
35
|
sandbox?: Sandbox
|
|
30
36
|
invocationState?: InvocationState
|
|
31
37
|
pluginManager?: PluginLifecycleManager
|
|
@@ -48,6 +54,13 @@ export class ToolExecutor {
|
|
|
48
54
|
private log: Logger
|
|
49
55
|
private workingStateManager?: WorkingStateManager
|
|
50
56
|
private probes: ProbeRegistry
|
|
57
|
+
private readonly readPaths: Set<string> = new Set()
|
|
58
|
+
private readonly fileReadTracker: FileReadTracker = {
|
|
59
|
+
recordRead: (key: string) => {
|
|
60
|
+
this.readPaths.add(key)
|
|
61
|
+
},
|
|
62
|
+
hasRead: (key: string) => this.readPaths.has(key),
|
|
63
|
+
}
|
|
51
64
|
|
|
52
65
|
constructor(
|
|
53
66
|
config: ToolExecutorConfig,
|
|
@@ -83,11 +96,38 @@ export class ToolExecutor {
|
|
|
83
96
|
tools: toolCalls.map((tc) => tc.function.name),
|
|
84
97
|
})
|
|
85
98
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
99
|
+
// One context per call so each execution can see its own
|
|
100
|
+
// `toolUseId`. The base context is built once; we spread + add
|
|
101
|
+
// per-call to keep allocations cheap.
|
|
102
|
+
const baseContext = this.buildToolContext()
|
|
103
|
+
|
|
104
|
+
// Respect each tool's `concurrencySafe` flag. Read-only tools
|
|
105
|
+
// (ls/grep/glob/…) run in parallel; tools that mutate shared state
|
|
106
|
+
// (edit/write/append/bash — `concurrencySafe: false`) are serialized in
|
|
107
|
+
// a single chain, so e.g. several `edit` calls to the SAME file in one
|
|
108
|
+
// turn apply one-after-another instead of racing read→modify→write
|
|
109
|
+
// (which let the last writer clobber the rest). Results are written by
|
|
110
|
+
// index to preserve the original tool-call order.
|
|
111
|
+
const results: Array<{ toolCallId: string; output: string }> = new Array(toolCalls.length)
|
|
112
|
+
const parallel: Promise<void>[] = []
|
|
113
|
+
let serial: Promise<void> = Promise.resolve()
|
|
114
|
+
toolCalls.forEach((toolCall, i) => {
|
|
115
|
+
const ctx = { ...baseContext, toolUseId: toolCall.id }
|
|
116
|
+
const run = async () => {
|
|
117
|
+
results[i] = await this.executeSingle(toolCall, ctx)
|
|
118
|
+
}
|
|
119
|
+
let input: unknown = {}
|
|
120
|
+
try {
|
|
121
|
+
input = JSON.parse(toolCall.function.arguments || '{}')
|
|
122
|
+
} catch {
|
|
123
|
+
// non-JSON args → treat as unsafe (serialize), the conservative path
|
|
124
|
+
}
|
|
125
|
+
const safe =
|
|
126
|
+
this.config.tools.get(toolCall.function.name)?.isConcurrencySafe?.(input) === true
|
|
127
|
+
if (safe) parallel.push(run())
|
|
128
|
+
else serial = serial.then(run)
|
|
129
|
+
})
|
|
130
|
+
await Promise.all([...parallel, serial])
|
|
91
131
|
|
|
92
132
|
const messages: Message[] = results.map((r) => createToolMessage(r.output, r.toolCallId))
|
|
93
133
|
|
|
@@ -108,19 +148,38 @@ export class ToolExecutor {
|
|
|
108
148
|
},
|
|
109
149
|
invocationState: this.config.invocationState,
|
|
110
150
|
toolRegistry: this.config.tools,
|
|
151
|
+
allowedTools: this.config.allowedTools,
|
|
111
152
|
sandbox: this.config.sandbox,
|
|
153
|
+
fileReadTracker: this.fileReadTracker,
|
|
112
154
|
}
|
|
113
155
|
}
|
|
114
156
|
|
|
115
157
|
private async executeSingle(
|
|
116
|
-
toolCall:
|
|
117
|
-
id: string
|
|
118
|
-
type: string
|
|
119
|
-
function: { name: string; arguments: string }
|
|
120
|
-
},
|
|
158
|
+
toolCall: ToolCall,
|
|
121
159
|
toolContext: ToolContext,
|
|
122
160
|
): Promise<{ toolCallId: string; output: string }> {
|
|
123
161
|
const toolName = toolCall.function.name
|
|
162
|
+
|
|
163
|
+
if (toolCall.metadata?.inputTruncated === true) {
|
|
164
|
+
const message = truncatedToolInputMessage(toolName)
|
|
165
|
+
await this.emitEvent({
|
|
166
|
+
type: 'tool_executing',
|
|
167
|
+
runId: this.config.runId,
|
|
168
|
+
toolUseId: toolCall.id,
|
|
169
|
+
toolName,
|
|
170
|
+
input: {},
|
|
171
|
+
})
|
|
172
|
+
await this.emitEvent({
|
|
173
|
+
type: 'tool_completed',
|
|
174
|
+
runId: this.config.runId,
|
|
175
|
+
toolUseId: toolCall.id,
|
|
176
|
+
toolName,
|
|
177
|
+
result: message,
|
|
178
|
+
isError: true,
|
|
179
|
+
})
|
|
180
|
+
return { toolCallId: toolCall.id, output: message }
|
|
181
|
+
}
|
|
182
|
+
|
|
124
183
|
let input: unknown
|
|
125
184
|
|
|
126
185
|
try {
|
|
@@ -238,7 +297,7 @@ export class ToolExecutor {
|
|
|
238
297
|
|
|
239
298
|
const rawOutput = result.success
|
|
240
299
|
? result.output
|
|
241
|
-
:
|
|
300
|
+
: formatFailedToolOutput(result.output, result.error)
|
|
242
301
|
|
|
243
302
|
let output = result.success ? this.maybeCompress(toolName, rawOutput) : rawOutput
|
|
244
303
|
|
|
@@ -430,3 +489,13 @@ export class ToolExecutor {
|
|
|
430
489
|
return compressed
|
|
431
490
|
}
|
|
432
491
|
}
|
|
492
|
+
|
|
493
|
+
function formatFailedToolOutput(output: string | undefined, error: string | undefined): string {
|
|
494
|
+
const errorText = `Error: ${error ?? 'Tool execution failed'}`
|
|
495
|
+
if (!output || output.trim().length === 0) return errorText
|
|
496
|
+
return `${output}\n\n${errorText}`
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
function truncatedToolInputMessage(toolName: string): string {
|
|
500
|
+
return `Error: Tool "${toolName}" call was cut off while the model was streaming JSON arguments. The tool was NOT executed. Retry with a much shorter input. Self-budget any content/newStr payload under 12000 characters before calling file tools. For long files, create a short opening with write, then extend it with edit using insertLine: "end" in bounded section chunks; for delegated work, pass a shared workspace filename/reference instead of embedding the content in the tool call.`
|
|
501
|
+
}
|
|
@@ -224,6 +224,8 @@ export async function* query(params: QueryParams): AsyncGenerator<RunEvent, Run>
|
|
|
224
224
|
}
|
|
225
225
|
}
|
|
226
226
|
|
|
227
|
+
const effectiveAllowedTools = withDeferredDiscoveryTool(params.tools, params.allowedTools)
|
|
228
|
+
|
|
227
229
|
const toolExecutor = ToolingBootstrap.init(
|
|
228
230
|
{
|
|
229
231
|
tools: params.tools,
|
|
@@ -232,6 +234,7 @@ export async function* query(params: QueryParams): AsyncGenerator<RunEvent, Run>
|
|
|
232
234
|
permissionMode: ctx.permissionMode,
|
|
233
235
|
env: params.runConfig.env ?? {},
|
|
234
236
|
abortSignal: ctx.abortController.signal,
|
|
237
|
+
allowedTools: effectiveAllowedTools,
|
|
235
238
|
invocationState: params.invocationState,
|
|
236
239
|
pluginManager: params.pluginManager,
|
|
237
240
|
},
|
|
@@ -252,7 +255,7 @@ export async function* query(params: QueryParams): AsyncGenerator<RunEvent, Run>
|
|
|
252
255
|
skills: params.skills,
|
|
253
256
|
basePrompt: params.basePrompt,
|
|
254
257
|
tools: params.tools,
|
|
255
|
-
allowedTools:
|
|
258
|
+
allowedTools: effectiveAllowedTools,
|
|
256
259
|
runtimeContext: params.runtimeContext,
|
|
257
260
|
})
|
|
258
261
|
|
|
@@ -312,7 +315,7 @@ export async function* query(params: QueryParams): AsyncGenerator<RunEvent, Run>
|
|
|
312
315
|
provider: params.provider,
|
|
313
316
|
runConfig: params.runConfig,
|
|
314
317
|
tools: params.tools,
|
|
315
|
-
allowedTools:
|
|
318
|
+
allowedTools: effectiveAllowedTools,
|
|
316
319
|
taskGateway: params.taskGateway,
|
|
317
320
|
taskStore: params.taskStore,
|
|
318
321
|
launchedTasks: params.launchedTasks,
|
|
@@ -370,7 +373,7 @@ export async function* query(params: QueryParams): AsyncGenerator<RunEvent, Run>
|
|
|
370
373
|
skills: params.skills,
|
|
371
374
|
basePrompt: contextLevel === 'full' ? params.basePrompt : undefined,
|
|
372
375
|
tools: params.tools,
|
|
373
|
-
allowedTools:
|
|
376
|
+
allowedTools: effectiveAllowedTools,
|
|
374
377
|
runtimeContext: params.runtimeContext,
|
|
375
378
|
}
|
|
376
379
|
|
|
@@ -536,3 +539,21 @@ export async function drainQuery(
|
|
|
536
539
|
|
|
537
540
|
return result.value
|
|
538
541
|
}
|
|
542
|
+
|
|
543
|
+
function withDeferredDiscoveryTool(
|
|
544
|
+
tools: ToolRegistryContract,
|
|
545
|
+
allowedTools?: string[],
|
|
546
|
+
): string[] | undefined {
|
|
547
|
+
if (!allowedTools) return undefined
|
|
548
|
+
if (allowedTools.includes(SearchToolsTool.name)) return allowedTools
|
|
549
|
+
|
|
550
|
+
const allowedHasDeferred = allowedTools.some(
|
|
551
|
+
(name) => tools.has(name) && tools.getAvailability(name) === 'deferred',
|
|
552
|
+
)
|
|
553
|
+
if (!allowedHasDeferred) return allowedTools
|
|
554
|
+
|
|
555
|
+
if (!tools.has(SearchToolsTool.name)) return allowedTools
|
|
556
|
+
if (tools.getAvailability(SearchToolsTool.name) !== 'active') return allowedTools
|
|
557
|
+
|
|
558
|
+
return [...allowedTools, SearchToolsTool.name]
|
|
559
|
+
}
|