@amodalai/runtime 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/__fixtures__/README.md +4 -0
- package/dist/src/{agent/user-context-fetcher.test.d.ts → __fixtures__/e2e.test.d.ts} +1 -1
- package/dist/src/__fixtures__/e2e.test.js +211 -0
- package/dist/src/__fixtures__/e2e.test.js.map +1 -0
- package/dist/src/__fixtures__/smoke-agent/automations/delivery-callback-test.json +9 -0
- package/dist/src/__fixtures__/smoke-agent/connections/mock-mcp/spec.json +1 -1
- package/dist/src/__fixtures__/smoke.test.js +715 -29
- package/dist/src/__fixtures__/smoke.test.js.map +1 -1
- package/dist/src/__fixtures__/test-env.d.ts +27 -0
- package/dist/src/__fixtures__/test-env.js +64 -0
- package/dist/src/__fixtures__/test-env.js.map +1 -0
- package/dist/src/__fixtures__/test-helpers.d.ts +30 -0
- package/dist/src/__fixtures__/test-helpers.js +120 -0
- package/dist/src/__fixtures__/test-helpers.js.map +1 -0
- package/dist/src/agent/agent-types.d.ts +22 -0
- package/dist/src/agent/agent-types.js.map +1 -1
- package/dist/src/agent/automation-bridge.d.ts +9 -0
- package/dist/src/agent/automation-bridge.js +26 -0
- package/dist/src/agent/automation-bridge.js.map +1 -1
- package/dist/src/agent/automation-bridge.test.js +63 -0
- package/dist/src/agent/automation-bridge.test.js.map +1 -1
- package/dist/src/agent/local-server.d.ts +0 -7
- package/dist/src/agent/local-server.js +274 -87
- package/dist/src/agent/local-server.js.map +1 -1
- package/dist/src/agent/local-server.test.js +14 -11
- package/dist/src/agent/local-server.test.js.map +1 -1
- package/dist/src/agent/loop-types.d.ts +81 -7
- package/dist/src/agent/loop-types.js +4 -0
- package/dist/src/agent/loop-types.js.map +1 -1
- package/dist/src/agent/loop.js +16 -3
- package/dist/src/agent/loop.js.map +1 -1
- package/dist/src/agent/loop.test.js +572 -10
- package/dist/src/agent/loop.test.js.map +1 -1
- package/dist/src/agent/page-builder.js +20 -17
- package/dist/src/agent/page-builder.js.map +1 -1
- package/dist/src/agent/proactive/delivery-router.d.ts +68 -0
- package/dist/src/agent/proactive/delivery-router.js +337 -0
- package/dist/src/agent/proactive/delivery-router.js.map +1 -0
- package/dist/src/agent/proactive/delivery-router.test.d.ts +6 -0
- package/dist/src/agent/proactive/delivery-router.test.js +455 -0
- package/dist/src/agent/proactive/delivery-router.test.js.map +1 -0
- package/dist/src/agent/proactive/proactive-runner.d.ts +23 -1
- package/dist/src/agent/proactive/proactive-runner.js +42 -10
- package/dist/src/agent/proactive/proactive-runner.js.map +1 -1
- package/dist/src/agent/proactive/proactive-runner.test.js +0 -3
- package/dist/src/agent/proactive/proactive-runner.test.js.map +1 -1
- package/dist/src/agent/routes/admin-chat-abort.test.d.ts +6 -0
- package/dist/src/agent/routes/admin-chat-abort.test.js +206 -0
- package/dist/src/agent/routes/admin-chat-abort.test.js.map +1 -0
- package/dist/src/agent/routes/admin-chat.js +0 -3
- package/dist/src/agent/routes/admin-chat.js.map +1 -1
- package/dist/src/agent/routes/files.js +46 -52
- package/dist/src/agent/routes/files.js.map +1 -1
- package/dist/src/agent/routes/inspect.js +4 -6
- package/dist/src/agent/routes/inspect.js.map +1 -1
- package/dist/src/agent/routes/task.test.js +0 -3
- package/dist/src/agent/routes/task.test.js.map +1 -1
- package/dist/src/agent/snapshot-server.js +37 -3
- package/dist/src/agent/snapshot-server.js.map +1 -1
- package/dist/src/agent/states/compacting.js +5 -3
- package/dist/src/agent/states/compacting.js.map +1 -1
- package/dist/src/agent/states/confirming.js +3 -0
- package/dist/src/agent/states/confirming.js.map +1 -1
- package/dist/src/agent/states/dispatching.js +45 -2
- package/dist/src/agent/states/dispatching.js.map +1 -1
- package/dist/src/agent/states/executing.js +225 -81
- package/dist/src/agent/states/executing.js.map +1 -1
- package/dist/src/agent/states/streaming.js +14 -0
- package/dist/src/agent/states/streaming.js.map +1 -1
- package/dist/src/agent/states/thinking.d.ts +1 -1
- package/dist/src/agent/states/thinking.js +246 -29
- package/dist/src/agent/states/thinking.js.map +1 -1
- package/dist/src/agent/token-estimate.d.ts +20 -6
- package/dist/src/agent/token-estimate.js +24 -3
- package/dist/src/agent/token-estimate.js.map +1 -1
- package/dist/src/agent/token-estimate.test.d.ts +6 -0
- package/dist/src/agent/token-estimate.test.js +44 -0
- package/dist/src/agent/token-estimate.test.js.map +1 -0
- package/dist/src/agent/tool-executor-local.test.js +0 -1
- package/dist/src/agent/tool-executor-local.test.js.map +1 -1
- package/dist/src/agent/tool-harness-template.js +0 -1
- package/dist/src/agent/tool-harness-template.js.map +1 -1
- package/dist/src/api/create-agent.js +1 -5
- package/dist/src/api/create-agent.js.map +1 -1
- package/dist/src/api/types.d.ts +1 -5
- package/dist/src/channels/bootstrap.d.ts +59 -0
- package/dist/src/channels/bootstrap.js +84 -0
- package/dist/src/channels/bootstrap.js.map +1 -0
- package/dist/src/channels/channel-session-mapper.d.ts +42 -0
- package/dist/src/channels/channel-session-mapper.js +91 -0
- package/dist/src/channels/channel-session-mapper.js.map +1 -0
- package/dist/src/channels/dedup-cache.d.ts +17 -0
- package/dist/src/channels/dedup-cache.js +51 -0
- package/dist/src/channels/dedup-cache.js.map +1 -0
- package/dist/src/channels/dedup-cache.test.d.ts +6 -0
- package/dist/src/channels/dedup-cache.test.js +51 -0
- package/dist/src/channels/dedup-cache.test.js.map +1 -0
- package/dist/src/channels/errors.d.ts +28 -0
- package/dist/src/channels/errors.js +38 -0
- package/dist/src/channels/errors.js.map +1 -0
- package/dist/src/channels/in-memory-session-mapper.d.ts +34 -0
- package/dist/src/channels/in-memory-session-mapper.js +50 -0
- package/dist/src/channels/in-memory-session-mapper.js.map +1 -0
- package/dist/src/channels/plugin-loader.d.ts +20 -0
- package/dist/src/channels/plugin-loader.js +136 -0
- package/dist/src/channels/plugin-loader.js.map +1 -0
- package/dist/src/channels/plugin-loader.test.d.ts +6 -0
- package/dist/src/channels/plugin-loader.test.js +113 -0
- package/dist/src/channels/plugin-loader.test.js.map +1 -0
- package/dist/src/channels/routes.d.ts +29 -0
- package/dist/src/channels/routes.js +165 -0
- package/dist/src/channels/routes.js.map +1 -0
- package/dist/src/config.d.ts +0 -2
- package/dist/src/config.js +0 -1
- package/dist/src/config.js.map +1 -1
- package/dist/src/config.test.js +0 -2
- package/dist/src/config.test.js.map +1 -1
- package/dist/src/context/compiler.js +11 -34
- package/dist/src/context/compiler.js.map +1 -1
- package/dist/src/context/compiler.test.js +7 -60
- package/dist/src/context/compiler.test.js.map +1 -1
- package/dist/src/context/types.d.ts +0 -4
- package/dist/src/env-ref.d.ts +13 -0
- package/dist/src/env-ref.js +31 -0
- package/dist/src/env-ref.js.map +1 -0
- package/dist/src/env-ref.test.d.ts +6 -0
- package/dist/src/env-ref.test.js +34 -0
- package/dist/src/env-ref.test.js.map +1 -0
- package/dist/src/errors.d.ts +15 -0
- package/dist/src/errors.js +22 -0
- package/dist/src/errors.js.map +1 -1
- package/dist/src/errors.test.js +2 -2
- package/dist/src/errors.test.js.map +1 -1
- package/dist/src/events/event-bus.d.ts +54 -0
- package/dist/src/events/event-bus.js +84 -0
- package/dist/src/events/event-bus.js.map +1 -0
- package/dist/src/events/event-bus.test.d.ts +6 -0
- package/dist/src/events/event-bus.test.js +112 -0
- package/dist/src/events/event-bus.test.js.map +1 -0
- package/dist/src/events/events-route.d.ts +36 -0
- package/dist/src/events/events-route.js +80 -0
- package/dist/src/events/events-route.js.map +1 -0
- package/dist/src/events/events-route.test.d.ts +6 -0
- package/dist/src/events/events-route.test.js +134 -0
- package/dist/src/events/events-route.test.js.map +1 -0
- package/dist/src/events/store-event-wrapper.d.ts +19 -0
- package/dist/src/events/store-event-wrapper.js +57 -0
- package/dist/src/events/store-event-wrapper.js.map +1 -0
- package/dist/src/events/store-event-wrapper.test.d.ts +6 -0
- package/dist/src/events/store-event-wrapper.test.js +91 -0
- package/dist/src/events/store-event-wrapper.test.js.map +1 -0
- package/dist/src/index.d.ts +13 -0
- package/dist/src/index.js +10 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/middleware/auth.d.ts +0 -2
- package/dist/src/middleware/auth.js.map +1 -1
- package/dist/src/providers/search-provider.d.ts +64 -0
- package/dist/src/providers/search-provider.js +174 -0
- package/dist/src/providers/search-provider.js.map +1 -0
- package/dist/src/providers/types.d.ts +8 -0
- package/dist/src/routes/ai-stream.d.ts +18 -4
- package/dist/src/routes/ai-stream.js +10 -2
- package/dist/src/routes/ai-stream.js.map +1 -1
- package/dist/src/routes/chat-stream.d.ts +9 -1
- package/dist/src/routes/chat-stream.js +3 -1
- package/dist/src/routes/chat-stream.js.map +1 -1
- package/dist/src/routes/chat.d.ts +6 -0
- package/dist/src/routes/chat.js +2 -1
- package/dist/src/routes/chat.js.map +1 -1
- package/dist/src/routes/session-resolver.d.ts +15 -2
- package/dist/src/routes/session-resolver.js +22 -25
- package/dist/src/routes/session-resolver.js.map +1 -1
- package/dist/src/routes/session-resolver.test.js +117 -20
- package/dist/src/routes/session-resolver.test.js.map +1 -1
- package/dist/src/server.d.ts +35 -1
- package/dist/src/server.js +33 -0
- package/dist/src/server.js.map +1 -1
- package/dist/src/session/drizzle-session-store.d.ts +57 -0
- package/dist/src/session/drizzle-session-store.js +204 -0
- package/dist/src/session/drizzle-session-store.js.map +1 -0
- package/dist/src/session/manager.d.ts +6 -3
- package/dist/src/session/manager.js +46 -19
- package/dist/src/session/manager.js.map +1 -1
- package/dist/src/session/manager.test.js +12 -18
- package/dist/src/session/manager.test.js.map +1 -1
- package/dist/src/session/pglite-session-store.d.ts +23 -0
- package/dist/src/session/pglite-session-store.js +86 -0
- package/dist/src/session/pglite-session-store.js.map +1 -0
- package/dist/src/session/postgres-session-store.d.ts +44 -0
- package/dist/src/session/postgres-session-store.js +153 -0
- package/dist/src/session/postgres-session-store.js.map +1 -0
- package/dist/src/session/session-builder.d.ts +0 -5
- package/dist/src/session/session-builder.js +22 -6
- package/dist/src/session/session-builder.js.map +1 -1
- package/dist/src/session/session-builder.test.js +3 -8
- package/dist/src/session/session-builder.test.js.map +1 -1
- package/dist/src/session/session-store-selector.d.ts +49 -0
- package/dist/src/session/session-store-selector.js +60 -0
- package/dist/src/session/session-store-selector.js.map +1 -0
- package/dist/src/session/session-store-selector.test.d.ts +6 -0
- package/dist/src/session/session-store-selector.test.js +79 -0
- package/dist/src/session/session-store-selector.test.js.map +1 -0
- package/dist/src/session/store.d.ts +146 -32
- package/dist/src/session/store.js +126 -138
- package/dist/src/session/store.js.map +1 -1
- package/dist/src/session/store.test.js +385 -107
- package/dist/src/session/store.test.js.map +1 -1
- package/dist/src/session/tool-context-factory.d.ts +3 -7
- package/dist/src/session/tool-context-factory.js +1 -3
- package/dist/src/session/tool-context-factory.js.map +1 -1
- package/dist/src/session/tool-context-factory.test.js +1 -6
- package/dist/src/session/tool-context-factory.test.js.map +1 -1
- package/dist/src/session/types.d.ts +13 -10
- package/dist/src/stores/schema.d.ts +111 -34
- package/dist/src/stores/schema.js +21 -4
- package/dist/src/stores/schema.js.map +1 -1
- package/dist/src/tools/admin-file-tools.d.ts +29 -0
- package/dist/src/tools/admin-file-tools.js +527 -13
- package/dist/src/tools/admin-file-tools.js.map +1 -1
- package/dist/src/tools/admin-file-tools.test.js +380 -9
- package/dist/src/tools/admin-file-tools.test.js.map +1 -1
- package/dist/src/tools/custom-tool-adapter.js +0 -1
- package/dist/src/tools/custom-tool-adapter.js.map +1 -1
- package/dist/src/tools/custom-tool-adapter.test.js +0 -2
- package/dist/src/tools/custom-tool-adapter.test.js.map +1 -1
- package/dist/src/tools/dispatch-tool.d.ts +4 -4
- package/dist/src/tools/fetch-url-tool.d.ts +23 -0
- package/dist/src/tools/fetch-url-tool.js +333 -0
- package/dist/src/tools/fetch-url-tool.js.map +1 -0
- package/dist/src/tools/fetch-url-tool.test.d.ts +6 -0
- package/dist/src/tools/fetch-url-tool.test.js +227 -0
- package/dist/src/tools/fetch-url-tool.test.js.map +1 -0
- package/dist/src/tools/mcp-tool-adapter.test.js +0 -2
- package/dist/src/tools/mcp-tool-adapter.test.js.map +1 -1
- package/dist/src/tools/registry.test.js +0 -2
- package/dist/src/tools/registry.test.js.map +1 -1
- package/dist/src/tools/request-tool.test.js +0 -2
- package/dist/src/tools/request-tool.test.js.map +1 -1
- package/dist/src/tools/store-tools.test.js +0 -2
- package/dist/src/tools/store-tools.test.js.map +1 -1
- package/dist/src/tools/types.d.ts +20 -7
- package/dist/src/tools/web-search-tool.d.ts +31 -0
- package/dist/src/tools/web-search-tool.js +170 -0
- package/dist/src/tools/web-search-tool.js.map +1 -0
- package/dist/src/tools/web-search-tool.test.d.ts +6 -0
- package/dist/src/tools/web-search-tool.test.js +152 -0
- package/dist/src/tools/web-search-tool.test.js.map +1 -0
- package/dist/src/tools/web-tools-shared.d.ts +21 -0
- package/dist/src/tools/web-tools-shared.js +32 -0
- package/dist/src/tools/web-tools-shared.js.map +1 -0
- package/dist/src/types.d.ts +20 -4
- package/dist/src/types.js +13 -2
- package/dist/src/types.js.map +1 -1
- package/dist/src/types.test.js +0 -3
- package/dist/src/types.test.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +17 -3
- package/dist/src/agent/session-store.d.ts +0 -71
- package/dist/src/agent/session-store.js +0 -151
- package/dist/src/agent/session-store.js.map +0 -1
- package/dist/src/agent/user-context-fetcher.d.ts +0 -25
- package/dist/src/agent/user-context-fetcher.js +0 -79
- package/dist/src/agent/user-context-fetcher.js.map +0 -1
- package/dist/src/agent/user-context-fetcher.test.js +0 -121
- package/dist/src/agent/user-context-fetcher.test.js.map +0 -1
- package/dist/src/session/admin-file-tools.d.ts +0 -136
- package/dist/src/session/admin-file-tools.js +0 -240
- package/dist/src/session/admin-file-tools.js.map +0 -1
|
@@ -97,8 +97,6 @@ function makeMockContext(overrides) {
|
|
|
97
97
|
logger,
|
|
98
98
|
signal: new AbortController().signal,
|
|
99
99
|
sessionId: 'test-session',
|
|
100
|
-
tenantId: 'test-tenant',
|
|
101
|
-
user: { roles: ['user'] },
|
|
102
100
|
systemPrompt: 'You are a helpful assistant.',
|
|
103
101
|
messages: [],
|
|
104
102
|
usage: makeUsage(),
|
|
@@ -108,16 +106,16 @@ function makeMockContext(overrides) {
|
|
|
108
106
|
config: { ...DEFAULT_LOOP_CONFIG },
|
|
109
107
|
compactionFailures: 0,
|
|
110
108
|
preExecutionCache: new Map(),
|
|
109
|
+
confirmedCallIds: new Set(),
|
|
110
|
+
disabledToolsUntilTurn: new Map(),
|
|
111
111
|
waitForConfirmation: vi.fn().mockResolvedValue(true),
|
|
112
112
|
buildToolContext: vi.fn().mockReturnValue({
|
|
113
113
|
request: vi.fn(),
|
|
114
114
|
store: vi.fn(),
|
|
115
115
|
env: vi.fn(),
|
|
116
116
|
log: vi.fn(),
|
|
117
|
-
user: { roles: [] },
|
|
118
117
|
signal: new AbortController().signal,
|
|
119
118
|
sessionId: 'test-session',
|
|
120
|
-
tenantId: 'test-tenant',
|
|
121
119
|
}),
|
|
122
120
|
...overrides,
|
|
123
121
|
};
|
|
@@ -192,12 +190,14 @@ describe('handleThinking (via transition)', () => {
|
|
|
192
190
|
expect(errorEvents.length).toBe(1);
|
|
193
191
|
});
|
|
194
192
|
it('detects loops with similar (not identical) parameters', async () => {
|
|
195
|
-
// Build messages where the same tool is called with slightly different
|
|
193
|
+
// Build messages where the same tool is called with slightly different
|
|
194
|
+
// params. Use a non-pagination key (retry_count) — pagination keys
|
|
195
|
+
// (offset/limit/page/cursor) are treated as iteration, not loops.
|
|
196
196
|
const messages = [];
|
|
197
197
|
for (let i = 0; i < 8; i++) {
|
|
198
198
|
messages.push({
|
|
199
199
|
role: 'assistant',
|
|
200
|
-
content: [{ type: 'tool-call', toolCallId: `c${i}`, toolName: 'search_api', input: { query: 'test',
|
|
200
|
+
content: [{ type: 'tool-call', toolCallId: `c${i}`, toolName: 'search_api', input: { query: 'test', retry_count: i } }],
|
|
201
201
|
});
|
|
202
202
|
messages.push({
|
|
203
203
|
role: 'tool',
|
|
@@ -212,6 +212,195 @@ describe('handleThinking (via transition)', () => {
|
|
|
212
212
|
expect(result.next.reason).toBe('loop_detected');
|
|
213
213
|
}
|
|
214
214
|
});
|
|
215
|
+
it('does NOT detect pagination as a loop (offset/limit/page variants)', async () => {
|
|
216
|
+
// Agent walking a long file in chunks is legitimate iteration — same
|
|
217
|
+
// tool, same path, different offset. Must not trip the loop detector.
|
|
218
|
+
const messages = [];
|
|
219
|
+
for (let i = 0; i < 8; i++) {
|
|
220
|
+
messages.push({
|
|
221
|
+
role: 'assistant',
|
|
222
|
+
content: [{
|
|
223
|
+
type: 'tool-call',
|
|
224
|
+
toolCallId: `c${i}`,
|
|
225
|
+
toolName: 'read_repo_file',
|
|
226
|
+
input: { path: 'knowledge/big.md', offset: 1 + i * 2000, limit: 2000 },
|
|
227
|
+
}],
|
|
228
|
+
});
|
|
229
|
+
messages.push({
|
|
230
|
+
role: 'tool',
|
|
231
|
+
content: [{ type: 'tool-result', toolCallId: `c${i}`, toolName: 'read_repo_file', output: { type: 'text', value: 'chunk' } }],
|
|
232
|
+
});
|
|
233
|
+
}
|
|
234
|
+
const ctx = makeMockContext();
|
|
235
|
+
const result = await transition({ type: 'thinking', messages }, ctx);
|
|
236
|
+
// Should NOT be done — the loop detector should have skipped these.
|
|
237
|
+
expect(result.next.type).not.toBe('done');
|
|
238
|
+
});
|
|
239
|
+
it('replaces old tool results with summarizer output when hook is set', async () => {
|
|
240
|
+
// Build 20 tool-result messages so clearing triggers (threshold=15 by default)
|
|
241
|
+
const messages = [];
|
|
242
|
+
for (let i = 0; i < 20; i++) {
|
|
243
|
+
messages.push({
|
|
244
|
+
role: 'tool',
|
|
245
|
+
content: [{
|
|
246
|
+
type: 'tool-result',
|
|
247
|
+
toolCallId: `c${i}`,
|
|
248
|
+
toolName: 'search_api',
|
|
249
|
+
output: { type: 'text', value: `result body ${i} with lots of content` },
|
|
250
|
+
}],
|
|
251
|
+
});
|
|
252
|
+
}
|
|
253
|
+
const summarizer = vi.fn().mockResolvedValue('found 3 matching records');
|
|
254
|
+
const ctx = makeMockContext({ summarizeToolResult: summarizer });
|
|
255
|
+
await transition({ type: 'thinking', messages }, ctx);
|
|
256
|
+
// Summarizer should have been called for the cleared (non-kept) messages.
|
|
257
|
+
// threshold=15, keepRecent=5 → 15 cleared (all but the last 5).
|
|
258
|
+
expect(summarizer).toHaveBeenCalled();
|
|
259
|
+
expect(summarizer.mock.calls.length).toBe(15);
|
|
260
|
+
// Verify the summary is wired through — the messages passed to streamText
|
|
261
|
+
// should include the summary text in a cleared marker.
|
|
262
|
+
const streamTextCall = vi.mocked(ctx.provider.streamText).mock.calls[0][0];
|
|
263
|
+
const passedMessages = streamTextCall.messages;
|
|
264
|
+
const clearedMsg = passedMessages[0];
|
|
265
|
+
if (clearedMsg.role === 'tool' && Array.isArray(clearedMsg.content)) {
|
|
266
|
+
const part = clearedMsg.content[0];
|
|
267
|
+
if ('output' in part && part.output && typeof part.output === 'object' && 'value' in part.output) {
|
|
268
|
+
expect(String(part.output.value)).toContain('found 3 matching records');
|
|
269
|
+
expect(String(part.output.value)).toContain('search_api');
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
});
|
|
273
|
+
it('falls back to static marker when summarizer throws', async () => {
|
|
274
|
+
const messages = [];
|
|
275
|
+
for (let i = 0; i < 20; i++) {
|
|
276
|
+
messages.push({
|
|
277
|
+
role: 'tool',
|
|
278
|
+
content: [{
|
|
279
|
+
type: 'tool-result',
|
|
280
|
+
toolCallId: `c${i}`,
|
|
281
|
+
toolName: 'flaky_tool',
|
|
282
|
+
output: { type: 'text', value: `body ${i}` },
|
|
283
|
+
}],
|
|
284
|
+
});
|
|
285
|
+
}
|
|
286
|
+
const summarizer = vi.fn().mockRejectedValue(new Error('haiku unavailable'));
|
|
287
|
+
const ctx = makeMockContext({ summarizeToolResult: summarizer });
|
|
288
|
+
await transition({ type: 'thinking', messages }, ctx);
|
|
289
|
+
// Summarizer was called but threw; we should still proceed with static marker
|
|
290
|
+
expect(summarizer).toHaveBeenCalled();
|
|
291
|
+
const streamTextCall = vi.mocked(ctx.provider.streamText).mock.calls[0][0];
|
|
292
|
+
const clearedMsg = streamTextCall.messages[0];
|
|
293
|
+
if (clearedMsg.role === 'tool' && Array.isArray(clearedMsg.content)) {
|
|
294
|
+
const part = clearedMsg.content[0];
|
|
295
|
+
if ('output' in part && part.output && typeof part.output === 'object' && 'value' in part.output) {
|
|
296
|
+
expect(String(part.output.value)).toContain('Tool result cleared');
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
// The failure should have been logged
|
|
300
|
+
expect(ctx.logger.warn).toHaveBeenCalledWith('tool_result_summarization_failed', expect.objectContaining({ tool: 'flaky_tool' }));
|
|
301
|
+
});
|
|
302
|
+
it('skips already-cleared messages (idempotent)', async () => {
|
|
303
|
+
// Already-cleared messages keep their original toolCallId (otherwise
|
|
304
|
+
// providers reject "orphaned" assistant tool-calls). Detection is by
|
|
305
|
+
// output-value prefix: "[Tool result cleared..." or "[Summary of ...".
|
|
306
|
+
const messages = [];
|
|
307
|
+
for (let i = 0; i < 20; i++) {
|
|
308
|
+
messages.push({
|
|
309
|
+
role: 'tool',
|
|
310
|
+
content: [{
|
|
311
|
+
type: 'tool-result',
|
|
312
|
+
toolCallId: `c${i}`,
|
|
313
|
+
toolName: 'search_api',
|
|
314
|
+
// First 15 are already-cleared (marker prefix); last 5 are fresh
|
|
315
|
+
output: {
|
|
316
|
+
type: 'text',
|
|
317
|
+
value: i < 15 ? '[Tool result cleared to save context space]' : `body ${i}`,
|
|
318
|
+
},
|
|
319
|
+
}],
|
|
320
|
+
});
|
|
321
|
+
}
|
|
322
|
+
const summarizer = vi.fn().mockResolvedValue('summary');
|
|
323
|
+
const ctx = makeMockContext({ summarizeToolResult: summarizer });
|
|
324
|
+
await transition({ type: 'thinking', messages }, ctx);
|
|
325
|
+
// Clearing kicks in (20 > 15), but all 15 candidates are already cleared.
|
|
326
|
+
// Summarizer should NOT be called for already-cleared messages.
|
|
327
|
+
expect(summarizer).not.toHaveBeenCalled();
|
|
328
|
+
});
|
|
329
|
+
it('escalates at loopEscalationThreshold: stronger warn + removes looping tool', async () => {
|
|
330
|
+
// Build messages with 5 tool calls so count hits escalation (default=5)
|
|
331
|
+
// but not hard-stop (default=8).
|
|
332
|
+
const messages = [];
|
|
333
|
+
for (let i = 0; i < 5; i++) {
|
|
334
|
+
messages.push({
|
|
335
|
+
role: 'assistant',
|
|
336
|
+
content: [{ type: 'tool-call', toolCallId: `c${i}`, toolName: 'stuck_api', input: { q: 'same' } }],
|
|
337
|
+
});
|
|
338
|
+
messages.push({
|
|
339
|
+
role: 'tool',
|
|
340
|
+
content: [{ type: 'tool-result', toolCallId: `c${i}`, toolName: 'stuck_api', output: { type: 'text', value: 'no progress' } }],
|
|
341
|
+
});
|
|
342
|
+
}
|
|
343
|
+
// Registry has both the looping tool and another tool
|
|
344
|
+
const stuckTool = makeMockToolDef({ description: 'Stuck tool' });
|
|
345
|
+
const otherTool = makeMockToolDef({ description: 'Other tool' });
|
|
346
|
+
const registry = makeMockRegistry({ stuck_api: stuckTool, other_tool: otherTool });
|
|
347
|
+
const ctx = makeMockContext({ toolRegistry: registry });
|
|
348
|
+
const result = await transition({ type: 'thinking', messages }, ctx);
|
|
349
|
+
// Should still stream (not hard-stop)
|
|
350
|
+
expect(result.next.type).toBe('streaming');
|
|
351
|
+
// The escalation-level warn should have been logged
|
|
352
|
+
expect(ctx.logger.warn).toHaveBeenCalledWith('agent_loop_escalation', expect.objectContaining({ tool: 'stuck_api', count: 5 }));
|
|
353
|
+
// The looping tool should be EXCLUDED from this turn's tool set
|
|
354
|
+
const streamTextCall = vi.mocked(ctx.provider.streamText).mock.calls[0][0];
|
|
355
|
+
const passedTools = streamTextCall.tools;
|
|
356
|
+
expect(passedTools['stuck_api']).toBeUndefined();
|
|
357
|
+
expect(passedTools['other_tool']).toBeDefined();
|
|
358
|
+
// Escalation message should be appended
|
|
359
|
+
const lastMsg = streamTextCall.messages[streamTextCall.messages.length - 1];
|
|
360
|
+
expect(lastMsg.role).toBe('user');
|
|
361
|
+
if (typeof lastMsg.content === 'string') {
|
|
362
|
+
expect(lastMsg.content).toContain('temporarily disabled');
|
|
363
|
+
expect(lastMsg.content).toContain('stuck_api');
|
|
364
|
+
}
|
|
365
|
+
// The looping tool should be registered in the cooldown map, not just
|
|
366
|
+
// filtered once-off — so subsequent turns also skip it.
|
|
367
|
+
expect(ctx.disabledToolsUntilTurn.has('stuck_api')).toBe(true);
|
|
368
|
+
});
|
|
369
|
+
it('escalation cooldown keeps tool disabled across subsequent turns', async () => {
|
|
370
|
+
// Simulate a session where escalation fires at turn 5 with default
|
|
371
|
+
// cooldown of 3, then the agent moves on. The looping tool should be
|
|
372
|
+
// excluded from turns 5-7 and return at turn 8.
|
|
373
|
+
const stuckTool = makeMockToolDef({ description: 'Stuck tool' });
|
|
374
|
+
const otherTool = makeMockToolDef({ description: 'Other tool' });
|
|
375
|
+
const registry = makeMockRegistry({ stuck_api: stuckTool, other_tool: otherTool });
|
|
376
|
+
const ctx = makeMockContext({ toolRegistry: registry });
|
|
377
|
+
// Pre-populate the cooldown as if escalation fired at turn 5
|
|
378
|
+
ctx.turnCount = 4; // next turn will be 5
|
|
379
|
+
ctx.disabledToolsUntilTurn.set('stuck_api', 8); // disable until turn 8
|
|
380
|
+
// Turn 5: tool still disabled
|
|
381
|
+
await transition({ type: 'thinking', messages: [] }, ctx);
|
|
382
|
+
let streamTextCall = vi.mocked(ctx.provider.streamText).mock.calls[0][0];
|
|
383
|
+
let passedTools = streamTextCall.tools;
|
|
384
|
+
expect(passedTools['stuck_api']).toBeUndefined();
|
|
385
|
+
expect(passedTools['other_tool']).toBeDefined();
|
|
386
|
+
// Turn 6: still disabled
|
|
387
|
+
await transition({ type: 'thinking', messages: [] }, ctx);
|
|
388
|
+
streamTextCall = vi.mocked(ctx.provider.streamText).mock.calls[1][0];
|
|
389
|
+
passedTools = streamTextCall.tools;
|
|
390
|
+
expect(passedTools['stuck_api']).toBeUndefined();
|
|
391
|
+
// Turn 7: still disabled (turnCount=7, untilTurn=8)
|
|
392
|
+
await transition({ type: 'thinking', messages: [] }, ctx);
|
|
393
|
+
streamTextCall = vi.mocked(ctx.provider.streamText).mock.calls[2][0];
|
|
394
|
+
passedTools = streamTextCall.tools;
|
|
395
|
+
expect(passedTools['stuck_api']).toBeUndefined();
|
|
396
|
+
// Turn 8: cooldown expired — tool back in the set
|
|
397
|
+
await transition({ type: 'thinking', messages: [] }, ctx);
|
|
398
|
+
streamTextCall = vi.mocked(ctx.provider.streamText).mock.calls[3][0];
|
|
399
|
+
passedTools = streamTextCall.tools;
|
|
400
|
+
expect(passedTools['stuck_api']).toBeDefined();
|
|
401
|
+
// Map should be cleaned up after expiry
|
|
402
|
+
expect(ctx.disabledToolsUntilTurn.has('stuck_api')).toBe(false);
|
|
403
|
+
});
|
|
215
404
|
it('injects warning when tool called 3+ times', async () => {
|
|
216
405
|
const messages = [];
|
|
217
406
|
for (let i = 0; i < 3; i++) {
|
|
@@ -231,7 +420,7 @@ describe('handleThinking (via transition)', () => {
|
|
|
231
420
|
expect(result.next.type).toBe('streaming');
|
|
232
421
|
const streamTextCall = vi.mocked(ctx.provider.streamText).mock.calls[0][0];
|
|
233
422
|
const lastMsg = streamTextCall.messages[streamTextCall.messages.length - 1];
|
|
234
|
-
expect(lastMsg.role).toBe('
|
|
423
|
+
expect(lastMsg.role).toBe('user');
|
|
235
424
|
if (typeof lastMsg.content === 'string') {
|
|
236
425
|
expect(lastMsg.content).toContain('flaky_api');
|
|
237
426
|
expect(lastMsg.content).toContain('3 times');
|
|
@@ -364,6 +553,86 @@ describe('handleExecuting (via transition)', () => {
|
|
|
364
553
|
expect(startEvents.length).toBe(1);
|
|
365
554
|
expect(resultEvents.length).toBe(1);
|
|
366
555
|
});
|
|
556
|
+
it('routes requiresConfirmation tools through CONFIRMING on first pass', async () => {
|
|
557
|
+
const destructiveTool = makeMockToolDef({
|
|
558
|
+
execute: vi.fn().mockResolvedValue({ deleted: true }),
|
|
559
|
+
requiresConfirmation: true,
|
|
560
|
+
});
|
|
561
|
+
const registry = makeMockRegistry({ delete_repo: destructiveTool });
|
|
562
|
+
const ctx = makeMockContext({ toolRegistry: registry });
|
|
563
|
+
const state = {
|
|
564
|
+
type: 'executing',
|
|
565
|
+
queue: [],
|
|
566
|
+
current: { toolCallId: 'call-danger', toolName: 'delete_repo', args: { name: 'foo' } },
|
|
567
|
+
results: [],
|
|
568
|
+
};
|
|
569
|
+
const result = await transition(state, ctx);
|
|
570
|
+
expect(result.next.type).toBe('confirming');
|
|
571
|
+
expect(destructiveTool.execute).not.toHaveBeenCalled();
|
|
572
|
+
// ConfirmationRequired SSE event should be emitted
|
|
573
|
+
const confirmEvents = result.effects.filter((e) => e.type === SSEEventType.ConfirmationRequired);
|
|
574
|
+
expect(confirmEvents.length).toBe(1);
|
|
575
|
+
});
|
|
576
|
+
it('executes requiresConfirmation tools after approval (no re-confirm loop)', async () => {
|
|
577
|
+
const destructiveTool = makeMockToolDef({
|
|
578
|
+
execute: vi.fn().mockResolvedValue({ deleted: true }),
|
|
579
|
+
requiresConfirmation: true,
|
|
580
|
+
});
|
|
581
|
+
const registry = makeMockRegistry({ delete_repo: destructiveTool });
|
|
582
|
+
// Pre-populate confirmedCallIds as if CONFIRMING already approved this call
|
|
583
|
+
const ctx = makeMockContext({ toolRegistry: registry });
|
|
584
|
+
ctx.confirmedCallIds.add('call-approved');
|
|
585
|
+
const state = {
|
|
586
|
+
type: 'executing',
|
|
587
|
+
queue: [],
|
|
588
|
+
current: { toolCallId: 'call-approved', toolName: 'delete_repo', args: { name: 'foo' } },
|
|
589
|
+
results: [],
|
|
590
|
+
};
|
|
591
|
+
const result = await transition(state, ctx);
|
|
592
|
+
// Should execute this time, not route back to CONFIRMING
|
|
593
|
+
expect(result.next.type).toBe('thinking');
|
|
594
|
+
expect(destructiveTool.execute).toHaveBeenCalledTimes(1);
|
|
595
|
+
});
|
|
596
|
+
it('connection tool does NOT re-prompt after confirmedCallIds marks the call', async () => {
|
|
597
|
+
// Regression test for the latent infinite-loop bug: a connection tool
|
|
598
|
+
// whose ACL gate returns requiresConfirmation=true used to re-route back
|
|
599
|
+
// to CONFIRMING on every pass, since the permission checker has no
|
|
600
|
+
// notion of "already approved." confirmedCallIds fixes this.
|
|
601
|
+
const connectionTool = makeMockToolDef({
|
|
602
|
+
execute: vi.fn().mockResolvedValue({ ok: true }),
|
|
603
|
+
metadata: { category: 'connection', connection: 'github' },
|
|
604
|
+
});
|
|
605
|
+
const registry = makeMockRegistry({ request: connectionTool });
|
|
606
|
+
const ctx = makeMockContext({
|
|
607
|
+
toolRegistry: registry,
|
|
608
|
+
permissionChecker: {
|
|
609
|
+
check: vi.fn().mockReturnValue({
|
|
610
|
+
allowed: true,
|
|
611
|
+
requiresConfirmation: true,
|
|
612
|
+
reason: 'Write to github requires confirmation',
|
|
613
|
+
}),
|
|
614
|
+
},
|
|
615
|
+
});
|
|
616
|
+
// Simulate: CONFIRMING has already approved this call
|
|
617
|
+
ctx.confirmedCallIds.add('call-gh-write');
|
|
618
|
+
const state = {
|
|
619
|
+
type: 'executing',
|
|
620
|
+
queue: [],
|
|
621
|
+
current: {
|
|
622
|
+
toolCallId: 'call-gh-write',
|
|
623
|
+
toolName: 'request',
|
|
624
|
+
args: { method: 'POST', endpoint: '/repos/foo', intent: 'confirmed_write' },
|
|
625
|
+
},
|
|
626
|
+
results: [],
|
|
627
|
+
};
|
|
628
|
+
const result = await transition(state, ctx);
|
|
629
|
+
// Should execute the connection tool instead of re-routing to CONFIRMING
|
|
630
|
+
expect(result.next.type).toBe('thinking');
|
|
631
|
+
expect(connectionTool.execute).toHaveBeenCalledTimes(1);
|
|
632
|
+
// No ConfirmationRequired SSE event should have been emitted
|
|
633
|
+
const confirmEvents = result.effects.filter((e) => e.type === SSEEventType.ConfirmationRequired);
|
|
634
|
+
expect(confirmEvents.length).toBe(0);
|
|
635
|
+
});
|
|
367
636
|
it('transitions to compacting when context exceeds threshold', async () => {
|
|
368
637
|
const tool = makeMockToolDef({
|
|
369
638
|
// Return a large result to inflate context
|
|
@@ -540,6 +809,161 @@ describe('handleExecuting (via transition)', () => {
|
|
|
540
809
|
// The cached result should be used — tool.execute should NOT be called again
|
|
541
810
|
expect(readTool.execute).not.toHaveBeenCalled();
|
|
542
811
|
});
|
|
812
|
+
// -------------------------------------------------------------------------
|
|
813
|
+
// Parallel tool calls: batch contiguous read-only calls
|
|
814
|
+
// -------------------------------------------------------------------------
|
|
815
|
+
it('batches contiguous read-only calls and runs them concurrently', async () => {
|
|
816
|
+
// Two read-only tools. Each sleeps before resolving. If batched in
|
|
817
|
+
// parallel, total wall time ≈ max(sleep). If sequential, ≈ sum(sleep).
|
|
818
|
+
const sleep = (ms) => new Promise((r) => setTimeout(() => r('ok'), ms));
|
|
819
|
+
const readA = makeMockToolDef({
|
|
820
|
+
readOnly: true,
|
|
821
|
+
execute: vi.fn(() => sleep(50)),
|
|
822
|
+
});
|
|
823
|
+
const readB = makeMockToolDef({
|
|
824
|
+
readOnly: true,
|
|
825
|
+
execute: vi.fn(() => sleep(50)),
|
|
826
|
+
});
|
|
827
|
+
const registry = makeMockRegistry({ read_a: readA, read_b: readB });
|
|
828
|
+
const ctx = makeMockContext({ toolRegistry: registry });
|
|
829
|
+
const state = {
|
|
830
|
+
type: 'executing',
|
|
831
|
+
queue: [{ toolCallId: 'call-b', toolName: 'read_b', args: {} }],
|
|
832
|
+
current: { toolCallId: 'call-a', toolName: 'read_a', args: {} },
|
|
833
|
+
results: [],
|
|
834
|
+
};
|
|
835
|
+
const startedAt = Date.now();
|
|
836
|
+
const result = await transition(state, ctx);
|
|
837
|
+
const elapsed = Date.now() - startedAt;
|
|
838
|
+
// Both executed
|
|
839
|
+
expect(readA.execute).toHaveBeenCalledTimes(1);
|
|
840
|
+
expect(readB.execute).toHaveBeenCalledTimes(1);
|
|
841
|
+
// Parallel: should finish in roughly one sleep, well under the sum
|
|
842
|
+
expect(elapsed).toBeLessThan(90);
|
|
843
|
+
// Batch drained the queue and transitioned to thinking in one step
|
|
844
|
+
expect(result.next.type).toBe('thinking');
|
|
845
|
+
// Both results appended to messages
|
|
846
|
+
const toolMessages = ctx.messages.filter((m) => m.role === 'tool');
|
|
847
|
+
expect(toolMessages).toHaveLength(2);
|
|
848
|
+
// Per-call SSE events emitted (2 start + 2 result)
|
|
849
|
+
const starts = result.effects.filter((e) => e.type === SSEEventType.ToolCallStart);
|
|
850
|
+
const results = result.effects.filter((e) => e.type === SSEEventType.ToolCallResult);
|
|
851
|
+
expect(starts).toHaveLength(2);
|
|
852
|
+
expect(results).toHaveLength(2);
|
|
853
|
+
});
|
|
854
|
+
it('stops batching at the first write (non-readOnly) tool', async () => {
|
|
855
|
+
const readTool = makeMockToolDef({ readOnly: true, execute: vi.fn().mockResolvedValue('r') });
|
|
856
|
+
const writeTool = makeMockToolDef({ readOnly: false, execute: vi.fn().mockResolvedValue('w') });
|
|
857
|
+
const registry = makeMockRegistry({ read: readTool, write: writeTool });
|
|
858
|
+
const ctx = makeMockContext({ toolRegistry: registry });
|
|
859
|
+
const state = {
|
|
860
|
+
type: 'executing',
|
|
861
|
+
queue: [
|
|
862
|
+
{ toolCallId: 'call-read-2', toolName: 'read', args: {} },
|
|
863
|
+
{ toolCallId: 'call-write', toolName: 'write', args: {} },
|
|
864
|
+
{ toolCallId: 'call-read-3', toolName: 'read', args: {} },
|
|
865
|
+
],
|
|
866
|
+
current: { toolCallId: 'call-read-1', toolName: 'read', args: {} },
|
|
867
|
+
results: [],
|
|
868
|
+
};
|
|
869
|
+
const result = await transition(state, ctx);
|
|
870
|
+
// Batched the two leading reads; the write stopped the batch
|
|
871
|
+
expect(readTool.execute).toHaveBeenCalledTimes(2);
|
|
872
|
+
expect(writeTool.execute).not.toHaveBeenCalled();
|
|
873
|
+
// Next state should process the write sequentially
|
|
874
|
+
expect(result.next.type).toBe('executing');
|
|
875
|
+
if (result.next.type === 'executing') {
|
|
876
|
+
expect(result.next.current.toolCallId).toBe('call-write');
|
|
877
|
+
expect(result.next.queue).toHaveLength(1);
|
|
878
|
+
expect(result.next.queue[0].toolCallId).toBe('call-read-3');
|
|
879
|
+
}
|
|
880
|
+
});
|
|
881
|
+
it('does not batch when the current call is a write', async () => {
|
|
882
|
+
const writeTool = makeMockToolDef({ readOnly: false, execute: vi.fn().mockResolvedValue('w') });
|
|
883
|
+
const readTool = makeMockToolDef({ readOnly: true, execute: vi.fn().mockResolvedValue('r') });
|
|
884
|
+
const registry = makeMockRegistry({ write: writeTool, read: readTool });
|
|
885
|
+
const ctx = makeMockContext({ toolRegistry: registry });
|
|
886
|
+
const state = {
|
|
887
|
+
type: 'executing',
|
|
888
|
+
queue: [{ toolCallId: 'call-read', toolName: 'read', args: {} }],
|
|
889
|
+
current: { toolCallId: 'call-write', toolName: 'write', args: {} },
|
|
890
|
+
results: [],
|
|
891
|
+
};
|
|
892
|
+
const result = await transition(state, ctx);
|
|
893
|
+
// Only write executes; read stays in queue for the next transition
|
|
894
|
+
expect(writeTool.execute).toHaveBeenCalledTimes(1);
|
|
895
|
+
expect(readTool.execute).not.toHaveBeenCalled();
|
|
896
|
+
expect(result.next.type).toBe('executing');
|
|
897
|
+
if (result.next.type === 'executing') {
|
|
898
|
+
expect(result.next.current.toolCallId).toBe('call-read');
|
|
899
|
+
}
|
|
900
|
+
});
|
|
901
|
+
it('does not batch connection tools (any call could transition to CONFIRMING)', async () => {
|
|
902
|
+
const connRead = makeMockToolDef({
|
|
903
|
+
readOnly: true,
|
|
904
|
+
execute: vi.fn().mockResolvedValue('x'),
|
|
905
|
+
metadata: { category: 'connection', connection: 'github' },
|
|
906
|
+
});
|
|
907
|
+
const registry = makeMockRegistry({ request: connRead });
|
|
908
|
+
const ctx = makeMockContext({ toolRegistry: registry });
|
|
909
|
+
const state = {
|
|
910
|
+
type: 'executing',
|
|
911
|
+
queue: [{ toolCallId: 'call-2', toolName: 'request', args: { method: 'GET', endpoint: '/x' } }],
|
|
912
|
+
current: { toolCallId: 'call-1', toolName: 'request', args: { method: 'GET', endpoint: '/y' } },
|
|
913
|
+
results: [],
|
|
914
|
+
};
|
|
915
|
+
const result = await transition(state, ctx);
|
|
916
|
+
// Only the first ran; the second stays queued for its own ACL check
|
|
917
|
+
expect(connRead.execute).toHaveBeenCalledTimes(1);
|
|
918
|
+
expect(result.next.type).toBe('executing');
|
|
919
|
+
});
|
|
920
|
+
it('does not batch read-only tools that require confirmation', async () => {
|
|
921
|
+
const gated = makeMockToolDef({
|
|
922
|
+
readOnly: true,
|
|
923
|
+
requiresConfirmation: true,
|
|
924
|
+
execute: vi.fn().mockResolvedValue('x'),
|
|
925
|
+
});
|
|
926
|
+
const registry = makeMockRegistry({ gated });
|
|
927
|
+
const ctx = makeMockContext({ toolRegistry: registry });
|
|
928
|
+
const state = {
|
|
929
|
+
type: 'executing',
|
|
930
|
+
queue: [{ toolCallId: 'call-2', toolName: 'gated', args: {} }],
|
|
931
|
+
current: { toolCallId: 'call-1', toolName: 'gated', args: {} },
|
|
932
|
+
results: [],
|
|
933
|
+
};
|
|
934
|
+
const result = await transition(state, ctx);
|
|
935
|
+
// Routes to CONFIRMING, no execution yet
|
|
936
|
+
expect(result.next.type).toBe('confirming');
|
|
937
|
+
expect(gated.execute).not.toHaveBeenCalled();
|
|
938
|
+
});
|
|
939
|
+
it('a failure in one batched call does not block the others', async () => {
|
|
940
|
+
const good = makeMockToolDef({ readOnly: true, execute: vi.fn().mockResolvedValue('ok') });
|
|
941
|
+
const bad = makeMockToolDef({
|
|
942
|
+
readOnly: true,
|
|
943
|
+
execute: vi.fn().mockRejectedValue(new Error('boom')),
|
|
944
|
+
});
|
|
945
|
+
const registry = makeMockRegistry({ good, bad });
|
|
946
|
+
const ctx = makeMockContext({ toolRegistry: registry });
|
|
947
|
+
const state = {
|
|
948
|
+
type: 'executing',
|
|
949
|
+
queue: [
|
|
950
|
+
{ toolCallId: 'call-bad', toolName: 'bad', args: {} },
|
|
951
|
+
{ toolCallId: 'call-good-2', toolName: 'good', args: {} },
|
|
952
|
+
],
|
|
953
|
+
current: { toolCallId: 'call-good-1', toolName: 'good', args: {} },
|
|
954
|
+
results: [],
|
|
955
|
+
};
|
|
956
|
+
const result = await transition(state, ctx);
|
|
957
|
+
// All three tried, failure surfaced as an error tool-result for the bad one
|
|
958
|
+
expect(good.execute).toHaveBeenCalledTimes(2);
|
|
959
|
+
expect(bad.execute).toHaveBeenCalledTimes(1);
|
|
960
|
+
expect(result.next.type).toBe('thinking');
|
|
961
|
+
const toolMessages = ctx.messages.filter((m) => m.role === 'tool');
|
|
962
|
+
expect(toolMessages).toHaveLength(3);
|
|
963
|
+
// Error result is present in the SSE stream
|
|
964
|
+
const errorEvents = result.effects.filter((e) => e.type === SSEEventType.ToolCallResult && e.status === 'error');
|
|
965
|
+
expect(errorEvents).toHaveLength(1);
|
|
966
|
+
});
|
|
543
967
|
});
|
|
544
968
|
describe('handleConfirming (via transition)', () => {
|
|
545
969
|
it('approved confirmation resumes executing', async () => {
|
|
@@ -571,6 +995,30 @@ describe('handleConfirming (via transition)', () => {
|
|
|
571
995
|
// A denial message should have been appended
|
|
572
996
|
expect(ctx.messages.length).toBeGreaterThan(0);
|
|
573
997
|
});
|
|
998
|
+
it('approved confirmation marks the callId in ctx.confirmedCallIds', async () => {
|
|
999
|
+
const ctx = makeMockContext({
|
|
1000
|
+
waitForConfirmation: vi.fn().mockResolvedValue(true),
|
|
1001
|
+
});
|
|
1002
|
+
const state = {
|
|
1003
|
+
type: 'confirming',
|
|
1004
|
+
call: { toolCallId: 'call-XYZ', toolName: 'delete_item', args: { id: '123' } },
|
|
1005
|
+
remainingQueue: [],
|
|
1006
|
+
};
|
|
1007
|
+
await transition(state, ctx);
|
|
1008
|
+
expect(ctx.confirmedCallIds.has('call-XYZ')).toBe(true);
|
|
1009
|
+
});
|
|
1010
|
+
it('denied confirmation does NOT mark callId as confirmed', async () => {
|
|
1011
|
+
const ctx = makeMockContext({
|
|
1012
|
+
waitForConfirmation: vi.fn().mockResolvedValue(false),
|
|
1013
|
+
});
|
|
1014
|
+
const state = {
|
|
1015
|
+
type: 'confirming',
|
|
1016
|
+
call: { toolCallId: 'call-DENIED', toolName: 'delete_item', args: { id: '123' } },
|
|
1017
|
+
remainingQueue: [],
|
|
1018
|
+
};
|
|
1019
|
+
await transition(state, ctx);
|
|
1020
|
+
expect(ctx.confirmedCallIds.has('call-DENIED')).toBe(false);
|
|
1021
|
+
});
|
|
574
1022
|
it('intercepts dispatch_task and transitions to DISPATCHING', async () => {
|
|
575
1023
|
const dispatchTool = {
|
|
576
1024
|
description: 'Dispatch sub-task',
|
|
@@ -659,9 +1107,10 @@ describe('handleCompacting (via transition)', () => {
|
|
|
659
1107
|
if (result.next.type === 'thinking') {
|
|
660
1108
|
// Should have fewer messages (summary + recent turns)
|
|
661
1109
|
expect(result.next.messages.length).toBeLessThan(messages.length);
|
|
662
|
-
// First message should be the
|
|
1110
|
+
// First message should be the compaction summary (user role, not system,
|
|
1111
|
+
// because Anthropic rejects system messages after user/assistant turns)
|
|
663
1112
|
const firstMsg = result.next.messages[0];
|
|
664
|
-
expect(firstMsg.role).toBe('
|
|
1113
|
+
expect(firstMsg.role).toBe('user');
|
|
665
1114
|
const firstContent = firstMsg.content;
|
|
666
1115
|
expect(typeof firstContent === 'string' && firstContent.includes('Conversation Summary')).toBe(true);
|
|
667
1116
|
}
|
|
@@ -837,6 +1286,50 @@ describe('handleDispatching (via transition)', () => {
|
|
|
837
1286
|
agent: 'broken-agent',
|
|
838
1287
|
}));
|
|
839
1288
|
});
|
|
1289
|
+
it('propagates parent remaining token budget to child', async () => {
|
|
1290
|
+
// Parent has 100 budget, 90 already used → child should get 10.
|
|
1291
|
+
// Child's first mock turn yields 150 tokens, which exceeds the child's
|
|
1292
|
+
// 10-token budget; the child's outer loop catches it on the next check
|
|
1293
|
+
// and stops with budget_exceeded. Parent usage reflects the child's
|
|
1294
|
+
// consumed tokens once the child merges back.
|
|
1295
|
+
const parentCtx = makeMockContext({
|
|
1296
|
+
maxSessionTokens: 100,
|
|
1297
|
+
usage: { inputTokens: 60, outputTokens: 30, totalTokens: 90 },
|
|
1298
|
+
});
|
|
1299
|
+
const result = await transition({
|
|
1300
|
+
type: 'dispatching',
|
|
1301
|
+
task: { agentName: 'starved-child', toolSubset: [], prompt: 'Do lots of work' },
|
|
1302
|
+
toolCallId: 'tc-starved',
|
|
1303
|
+
queue: [],
|
|
1304
|
+
results: [],
|
|
1305
|
+
}, parentCtx);
|
|
1306
|
+
// Parent should resume (doesn't crash on child budget exhaustion)
|
|
1307
|
+
expect(result.next.type).toBe('thinking');
|
|
1308
|
+
// Child should have stopped early — its merged-back token usage should
|
|
1309
|
+
// not massively exceed the original budget, because the check fires on
|
|
1310
|
+
// the next outer loop iteration after the first 150-token turn.
|
|
1311
|
+
// (It won't be zero — the first turn runs fully and merges — but it
|
|
1312
|
+
// won't compound across many turns.)
|
|
1313
|
+
expect(parentCtx.usage.totalTokens).toBeLessThanOrEqual(90 + 150 + 150);
|
|
1314
|
+
});
|
|
1315
|
+
it('child inherits unlimited budget when parent has no cap', async () => {
|
|
1316
|
+
// No maxSessionTokens on parent → child should also have no cap (undefined).
|
|
1317
|
+
// The child runs through normal termination (model_stop), not budget.
|
|
1318
|
+
const parentCtx = makeMockContext({
|
|
1319
|
+
// maxSessionTokens intentionally omitted
|
|
1320
|
+
usage: { inputTokens: 500, outputTokens: 500, totalTokens: 1000 },
|
|
1321
|
+
});
|
|
1322
|
+
const result = await transition({
|
|
1323
|
+
type: 'dispatching',
|
|
1324
|
+
task: { agentName: 'unbounded-child', toolSubset: [], prompt: 'Go' },
|
|
1325
|
+
toolCallId: 'tc-unbounded',
|
|
1326
|
+
queue: [],
|
|
1327
|
+
results: [],
|
|
1328
|
+
}, parentCtx);
|
|
1329
|
+
expect(result.next.type).toBe('thinking');
|
|
1330
|
+
// No agent_loop_budget_exceeded log should have fired for the child
|
|
1331
|
+
expect(parentCtx.logger.warn).not.toHaveBeenCalledWith('agent_loop_budget_exceeded', expect.anything());
|
|
1332
|
+
});
|
|
840
1333
|
});
|
|
841
1334
|
// ---------------------------------------------------------------------------
|
|
842
1335
|
// 3. Integration: runAgent() full flow
|
|
@@ -974,6 +1467,76 @@ describe('runAgent', () => {
|
|
|
974
1467
|
// Turn count should not exceed maxTurns
|
|
975
1468
|
expect(ctx.turnCount).toBeLessThanOrEqual(3);
|
|
976
1469
|
});
|
|
1470
|
+
it('token budget terminates the loop with reason=budget_exceeded', async () => {
|
|
1471
|
+
const tool = makeMockToolDef();
|
|
1472
|
+
const registry = makeMockRegistry({ loop_tool: tool });
|
|
1473
|
+
// Each turn yields 15 tokens (10 in + 5 out). With maxSessionTokens=30, the loop
|
|
1474
|
+
// should stop after the 2nd turn pushes cumulative usage past the cap.
|
|
1475
|
+
const provider = {
|
|
1476
|
+
model: 'test-model',
|
|
1477
|
+
provider: 'test',
|
|
1478
|
+
languageModel: {},
|
|
1479
|
+
streamText: vi.fn(() => makeMockStream([
|
|
1480
|
+
{ type: 'tool-call', toolCallId: `c-${Date.now()}`, toolName: 'loop_tool', args: {} },
|
|
1481
|
+
{ type: 'finish', usage: makeUsage({ inputTokens: 10, outputTokens: 5, totalTokens: 15 }) },
|
|
1482
|
+
], '')),
|
|
1483
|
+
generateText: vi.fn(),
|
|
1484
|
+
};
|
|
1485
|
+
const ctx = makeMockContext({
|
|
1486
|
+
provider,
|
|
1487
|
+
toolRegistry: registry,
|
|
1488
|
+
maxTurns: 100,
|
|
1489
|
+
maxSessionTokens: 30,
|
|
1490
|
+
});
|
|
1491
|
+
const events = [];
|
|
1492
|
+
for await (const event of runAgent({
|
|
1493
|
+
messages: [{ role: 'user', content: 'Burn tokens' }],
|
|
1494
|
+
context: ctx,
|
|
1495
|
+
})) {
|
|
1496
|
+
events.push(event);
|
|
1497
|
+
}
|
|
1498
|
+
// Should have terminated on budget, not max_turns
|
|
1499
|
+
const doneEvent = events[events.length - 1];
|
|
1500
|
+
expect(doneEvent.type).toBe(SSEEventType.Done);
|
|
1501
|
+
expect(ctx.turnCount).toBeLessThan(100);
|
|
1502
|
+
expect(ctx.usage.totalTokens).toBeGreaterThanOrEqual(30);
|
|
1503
|
+
});
|
|
1504
|
+
it('undefined maxSessionTokens means no budget cap', async () => {
|
|
1505
|
+
// Same infinite-tool-call provider as max_turns test, but with no
|
|
1506
|
+
// maxSessionTokens set and a small maxTurns to bound the test. Verifies that
|
|
1507
|
+
// undefined budget doesn't accidentally trip the check.
|
|
1508
|
+
const tool = makeMockToolDef();
|
|
1509
|
+
const registry = makeMockRegistry({ loop_tool: tool });
|
|
1510
|
+
const provider = {
|
|
1511
|
+
model: 'test-model',
|
|
1512
|
+
provider: 'test',
|
|
1513
|
+
languageModel: {},
|
|
1514
|
+
streamText: vi.fn(() => makeMockStream([
|
|
1515
|
+
{ type: 'tool-call', toolCallId: `c-${Date.now()}`, toolName: 'loop_tool', args: {} },
|
|
1516
|
+
{ type: 'finish', usage: makeUsage({ inputTokens: 100, outputTokens: 100, totalTokens: 200 }) },
|
|
1517
|
+
], '')),
|
|
1518
|
+
generateText: vi.fn(),
|
|
1519
|
+
};
|
|
1520
|
+
const ctx = makeMockContext({
|
|
1521
|
+
provider,
|
|
1522
|
+
toolRegistry: registry,
|
|
1523
|
+
maxTurns: 2,
|
|
1524
|
+
// maxSessionTokens intentionally omitted
|
|
1525
|
+
});
|
|
1526
|
+
const events = [];
|
|
1527
|
+
for await (const event of runAgent({
|
|
1528
|
+
messages: [{ role: 'user', content: 'Run' }],
|
|
1529
|
+
context: ctx,
|
|
1530
|
+
})) {
|
|
1531
|
+
events.push(event);
|
|
1532
|
+
}
|
|
1533
|
+
// Should stop on max_turns, budget check should not interfere
|
|
1534
|
+
const doneEvent = events[events.length - 1];
|
|
1535
|
+
expect(doneEvent.type).toBe(SSEEventType.Done);
|
|
1536
|
+
expect(ctx.usage.totalTokens).toBeGreaterThan(0);
|
|
1537
|
+
// No budget-exceeded log should have been emitted
|
|
1538
|
+
// (positive assertion: we reached max_turns, accumulating tokens beyond any tiny cap)
|
|
1539
|
+
});
|
|
977
1540
|
it('done event always includes usage regardless of reason (G2)', async () => {
|
|
978
1541
|
// Abort immediately
|
|
979
1542
|
const abortController = new AbortController();
|
|
@@ -1019,7 +1582,6 @@ describe('runAgent', () => {
|
|
|
1019
1582
|
}
|
|
1020
1583
|
expect(ctx.logger.info).toHaveBeenCalledWith('agent_loop_start', expect.objectContaining({
|
|
1021
1584
|
session: 'test-session',
|
|
1022
|
-
tenant: 'test-tenant',
|
|
1023
1585
|
}));
|
|
1024
1586
|
expect(ctx.logger.info).toHaveBeenCalledWith('agent_loop_done', expect.objectContaining({
|
|
1025
1587
|
session: 'test-session',
|