@jackchen_me/open-multi-agent 0.2.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +1 -1
- package/CLAUDE.md +11 -3
- package/README.md +87 -20
- package/README_zh.md +85 -25
- package/dist/agent/agent.d.ts +15 -1
- package/dist/agent/agent.d.ts.map +1 -1
- package/dist/agent/agent.js +144 -10
- package/dist/agent/agent.js.map +1 -1
- package/dist/agent/loop-detector.d.ts +39 -0
- package/dist/agent/loop-detector.d.ts.map +1 -0
- package/dist/agent/loop-detector.js +122 -0
- package/dist/agent/loop-detector.js.map +1 -0
- package/dist/agent/pool.d.ts +2 -1
- package/dist/agent/pool.d.ts.map +1 -1
- package/dist/agent/pool.js +4 -2
- package/dist/agent/pool.js.map +1 -1
- package/dist/agent/runner.d.ts +23 -1
- package/dist/agent/runner.d.ts.map +1 -1
- package/dist/agent/runner.js +113 -12
- package/dist/agent/runner.js.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/llm/adapter.d.ts +4 -1
- package/dist/llm/adapter.d.ts.map +1 -1
- package/dist/llm/adapter.js +11 -0
- package/dist/llm/adapter.js.map +1 -1
- package/dist/llm/copilot.d.ts.map +1 -1
- package/dist/llm/copilot.js +2 -1
- package/dist/llm/copilot.js.map +1 -1
- package/dist/llm/gemini.d.ts +65 -0
- package/dist/llm/gemini.d.ts.map +1 -0
- package/dist/llm/gemini.js +317 -0
- package/dist/llm/gemini.js.map +1 -0
- package/dist/llm/grok.d.ts +21 -0
- package/dist/llm/grok.d.ts.map +1 -0
- package/dist/llm/grok.js +24 -0
- package/dist/llm/grok.js.map +1 -0
- package/dist/llm/openai-common.d.ts +8 -1
- package/dist/llm/openai-common.d.ts.map +1 -1
- package/dist/llm/openai-common.js +35 -2
- package/dist/llm/openai-common.js.map +1 -1
- package/dist/llm/openai.d.ts +1 -1
- package/dist/llm/openai.d.ts.map +1 -1
- package/dist/llm/openai.js +20 -2
- package/dist/llm/openai.js.map +1 -1
- package/dist/orchestrator/orchestrator.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator.js +89 -9
- package/dist/orchestrator/orchestrator.js.map +1 -1
- package/dist/task/queue.d.ts +31 -2
- package/dist/task/queue.d.ts.map +1 -1
- package/dist/task/queue.js +69 -2
- package/dist/task/queue.js.map +1 -1
- package/dist/tool/text-tool-extractor.d.ts +32 -0
- package/dist/tool/text-tool-extractor.d.ts.map +1 -0
- package/dist/tool/text-tool-extractor.js +187 -0
- package/dist/tool/text-tool-extractor.js.map +1 -0
- package/dist/types.d.ts +139 -7
- package/dist/types.d.ts.map +1 -1
- package/dist/utils/trace.d.ts +12 -0
- package/dist/utils/trace.d.ts.map +1 -0
- package/dist/utils/trace.js +30 -0
- package/dist/utils/trace.js.map +1 -0
- package/examples/06-local-model.ts +1 -0
- package/examples/08-gemma4-local.ts +76 -87
- package/examples/09-structured-output.ts +73 -0
- package/examples/10-task-retry.ts +132 -0
- package/examples/11-trace-observability.ts +133 -0
- package/examples/12-grok.ts +154 -0
- package/examples/13-gemini.ts +48 -0
- package/package.json +11 -1
- package/src/agent/agent.ts +159 -10
- package/src/agent/loop-detector.ts +137 -0
- package/src/agent/pool.ts +9 -2
- package/src/agent/runner.ts +148 -19
- package/src/index.ts +15 -0
- package/src/llm/adapter.ts +12 -1
- package/src/llm/copilot.ts +2 -1
- package/src/llm/gemini.ts +378 -0
- package/src/llm/grok.ts +29 -0
- package/src/llm/openai-common.ts +41 -2
- package/src/llm/openai.ts +23 -3
- package/src/orchestrator/orchestrator.ts +105 -11
- package/src/task/queue.ts +73 -3
- package/src/tool/text-tool-extractor.ts +219 -0
- package/src/types.ts +157 -6
- package/src/utils/trace.ts +34 -0
- package/tests/agent-hooks.test.ts +473 -0
- package/tests/agent-pool.test.ts +212 -0
- package/tests/approval.test.ts +464 -0
- package/tests/built-in-tools.test.ts +393 -0
- package/tests/gemini-adapter.test.ts +97 -0
- package/tests/grok-adapter.test.ts +74 -0
- package/tests/llm-adapters.test.ts +357 -0
- package/tests/loop-detection.test.ts +456 -0
- package/tests/openai-fallback.test.ts +159 -0
- package/tests/orchestrator.test.ts +281 -0
- package/tests/scheduler.test.ts +221 -0
- package/tests/team-messaging.test.ts +329 -0
- package/tests/text-tool-extractor.test.ts +170 -0
- package/tests/trace.test.ts +453 -0
- package/vitest.config.ts +9 -0
- package/examples/09-gemma4-auto-orchestration.ts +0 -162
|
@@ -0,0 +1,456 @@
|
|
|
1
|
+
import { describe, it, expect, vi } from 'vitest'
|
|
2
|
+
import { z } from 'zod'
|
|
3
|
+
import { LoopDetector } from '../src/agent/loop-detector.js'
|
|
4
|
+
import { AgentRunner } from '../src/agent/runner.js'
|
|
5
|
+
import { ToolRegistry, defineTool } from '../src/tool/framework.js'
|
|
6
|
+
import { ToolExecutor } from '../src/tool/executor.js'
|
|
7
|
+
import type { LLMAdapter, LLMResponse, StreamEvent } from '../src/types.js'
|
|
8
|
+
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
// Mock helpers
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
|
|
13
|
+
function mockAdapter(responses: LLMResponse[]): LLMAdapter {
|
|
14
|
+
let callIndex = 0
|
|
15
|
+
return {
|
|
16
|
+
name: 'mock',
|
|
17
|
+
async chat() {
|
|
18
|
+
return responses[callIndex++]!
|
|
19
|
+
},
|
|
20
|
+
async *stream() {
|
|
21
|
+
/* unused */
|
|
22
|
+
},
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function textResponse(text: string): LLMResponse {
|
|
27
|
+
return {
|
|
28
|
+
id: `resp-${Math.random().toString(36).slice(2)}`,
|
|
29
|
+
content: [{ type: 'text' as const, text }],
|
|
30
|
+
model: 'mock-model',
|
|
31
|
+
stop_reason: 'end_turn',
|
|
32
|
+
usage: { input_tokens: 10, output_tokens: 20 },
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function toolUseResponse(toolName: string, input: Record<string, unknown>): LLMResponse {
|
|
37
|
+
return {
|
|
38
|
+
id: `resp-${Math.random().toString(36).slice(2)}`,
|
|
39
|
+
content: [
|
|
40
|
+
{
|
|
41
|
+
type: 'tool_use' as const,
|
|
42
|
+
id: `tu-${Math.random().toString(36).slice(2)}`,
|
|
43
|
+
name: toolName,
|
|
44
|
+
input,
|
|
45
|
+
},
|
|
46
|
+
],
|
|
47
|
+
model: 'mock-model',
|
|
48
|
+
stop_reason: 'tool_use',
|
|
49
|
+
usage: { input_tokens: 15, output_tokens: 25 },
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const echoTool = defineTool({
|
|
54
|
+
name: 'echo',
|
|
55
|
+
description: 'Echoes input',
|
|
56
|
+
inputSchema: z.object({ message: z.string() }),
|
|
57
|
+
async execute({ message }) {
|
|
58
|
+
return { data: message }
|
|
59
|
+
},
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
63
|
+
// Unit tests — LoopDetector class
|
|
64
|
+
// ---------------------------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
describe('LoopDetector', () => {
|
|
67
|
+
describe('tool call repetition', () => {
|
|
68
|
+
it('returns null for non-repeating tool calls', () => {
|
|
69
|
+
const detector = new LoopDetector()
|
|
70
|
+
expect(detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])).toBeNull()
|
|
71
|
+
expect(detector.recordToolCalls([{ name: 'b', input: { x: 2 } }])).toBeNull()
|
|
72
|
+
expect(detector.recordToolCalls([{ name: 'c', input: { x: 3 } }])).toBeNull()
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
it('detects 3 identical tool calls (default threshold)', () => {
|
|
76
|
+
const detector = new LoopDetector()
|
|
77
|
+
expect(detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])).toBeNull()
|
|
78
|
+
expect(detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])).toBeNull()
|
|
79
|
+
const info = detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])
|
|
80
|
+
expect(info).not.toBeNull()
|
|
81
|
+
expect(info!.kind).toBe('tool_repetition')
|
|
82
|
+
expect(info!.repetitions).toBe(3)
|
|
83
|
+
})
|
|
84
|
+
|
|
85
|
+
it('does not trigger when args differ', () => {
|
|
86
|
+
const detector = new LoopDetector()
|
|
87
|
+
expect(detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])).toBeNull()
|
|
88
|
+
expect(detector.recordToolCalls([{ name: 'a', input: { x: 2 } }])).toBeNull()
|
|
89
|
+
expect(detector.recordToolCalls([{ name: 'a', input: { x: 3 } }])).toBeNull()
|
|
90
|
+
})
|
|
91
|
+
|
|
92
|
+
it('resets count when a different call intervenes', () => {
|
|
93
|
+
const detector = new LoopDetector()
|
|
94
|
+
detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])
|
|
95
|
+
detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])
|
|
96
|
+
// Different call breaks the streak
|
|
97
|
+
detector.recordToolCalls([{ name: 'b', input: { x: 1 } }])
|
|
98
|
+
expect(detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])).toBeNull()
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
it('handles multi-tool turns with order-independent signatures', () => {
|
|
102
|
+
const detector = new LoopDetector()
|
|
103
|
+
const toolsA = [
|
|
104
|
+
{ name: 'read', input: { file: 'a.ts' } },
|
|
105
|
+
{ name: 'read', input: { file: 'b.ts' } },
|
|
106
|
+
]
|
|
107
|
+
// Same tools in different order
|
|
108
|
+
const toolsB = [
|
|
109
|
+
{ name: 'read', input: { file: 'b.ts' } },
|
|
110
|
+
{ name: 'read', input: { file: 'a.ts' } },
|
|
111
|
+
]
|
|
112
|
+
expect(detector.recordToolCalls(toolsA)).toBeNull()
|
|
113
|
+
expect(detector.recordToolCalls(toolsB)).toBeNull()
|
|
114
|
+
const info = detector.recordToolCalls(toolsA)
|
|
115
|
+
expect(info).not.toBeNull()
|
|
116
|
+
expect(info!.kind).toBe('tool_repetition')
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
it('respects custom threshold', () => {
|
|
120
|
+
const detector = new LoopDetector({ maxRepetitions: 2 })
|
|
121
|
+
expect(detector.recordToolCalls([{ name: 'a', input: {} }])).toBeNull()
|
|
122
|
+
const info = detector.recordToolCalls([{ name: 'a', input: {} }])
|
|
123
|
+
expect(info).not.toBeNull()
|
|
124
|
+
expect(info!.repetitions).toBe(2)
|
|
125
|
+
})
|
|
126
|
+
|
|
127
|
+
it('returns null for empty blocks', () => {
|
|
128
|
+
const detector = new LoopDetector()
|
|
129
|
+
expect(detector.recordToolCalls([])).toBeNull()
|
|
130
|
+
})
|
|
131
|
+
|
|
132
|
+
it('produces deterministic signatures regardless of key order', () => {
|
|
133
|
+
const detector = new LoopDetector()
|
|
134
|
+
detector.recordToolCalls([{ name: 'a', input: { b: 2, a: 1 } }])
|
|
135
|
+
detector.recordToolCalls([{ name: 'a', input: { a: 1, b: 2 } }])
|
|
136
|
+
const info = detector.recordToolCalls([{ name: 'a', input: { b: 2, a: 1 } }])
|
|
137
|
+
expect(info).not.toBeNull()
|
|
138
|
+
})
|
|
139
|
+
})
|
|
140
|
+
|
|
141
|
+
describe('text repetition', () => {
|
|
142
|
+
it('returns null for non-repeating text', () => {
|
|
143
|
+
const detector = new LoopDetector()
|
|
144
|
+
expect(detector.recordText('hello')).toBeNull()
|
|
145
|
+
expect(detector.recordText('world')).toBeNull()
|
|
146
|
+
expect(detector.recordText('foo')).toBeNull()
|
|
147
|
+
})
|
|
148
|
+
|
|
149
|
+
it('detects 3 identical texts (default threshold)', () => {
|
|
150
|
+
const detector = new LoopDetector()
|
|
151
|
+
expect(detector.recordText('stuck')).toBeNull()
|
|
152
|
+
expect(detector.recordText('stuck')).toBeNull()
|
|
153
|
+
const info = detector.recordText('stuck')
|
|
154
|
+
expect(info).not.toBeNull()
|
|
155
|
+
expect(info!.kind).toBe('text_repetition')
|
|
156
|
+
expect(info!.repetitions).toBe(3)
|
|
157
|
+
})
|
|
158
|
+
|
|
159
|
+
it('ignores empty or whitespace-only text', () => {
|
|
160
|
+
const detector = new LoopDetector()
|
|
161
|
+
expect(detector.recordText('')).toBeNull()
|
|
162
|
+
expect(detector.recordText(' ')).toBeNull()
|
|
163
|
+
expect(detector.recordText('\n\t')).toBeNull()
|
|
164
|
+
})
|
|
165
|
+
|
|
166
|
+
it('normalises whitespace before comparison', () => {
|
|
167
|
+
const detector = new LoopDetector()
|
|
168
|
+
detector.recordText('hello world')
|
|
169
|
+
detector.recordText('hello world')
|
|
170
|
+
const info = detector.recordText('hello world')
|
|
171
|
+
expect(info).not.toBeNull()
|
|
172
|
+
})
|
|
173
|
+
})
|
|
174
|
+
|
|
175
|
+
describe('window size', () => {
|
|
176
|
+
it('clamps windowSize to at least maxRepeats', () => {
|
|
177
|
+
// Window of 2 with threshold 3 is auto-clamped to 3.
|
|
178
|
+
const detector = new LoopDetector({ loopDetectionWindow: 2, maxRepetitions: 3 })
|
|
179
|
+
detector.recordToolCalls([{ name: 'a', input: {} }])
|
|
180
|
+
detector.recordToolCalls([{ name: 'a', input: {} }])
|
|
181
|
+
// Third call triggers because window was clamped to 3
|
|
182
|
+
const info = detector.recordToolCalls([{ name: 'a', input: {} }])
|
|
183
|
+
expect(info).not.toBeNull()
|
|
184
|
+
expect(info!.repetitions).toBe(3)
|
|
185
|
+
})
|
|
186
|
+
|
|
187
|
+
it('works correctly when window >= threshold', () => {
|
|
188
|
+
const detector = new LoopDetector({ loopDetectionWindow: 4, maxRepetitions: 3 })
|
|
189
|
+
detector.recordToolCalls([{ name: 'a', input: {} }])
|
|
190
|
+
detector.recordToolCalls([{ name: 'a', input: {} }])
|
|
191
|
+
const info = detector.recordToolCalls([{ name: 'a', input: {} }])
|
|
192
|
+
expect(info).not.toBeNull()
|
|
193
|
+
})
|
|
194
|
+
})
|
|
195
|
+
})
|
|
196
|
+
|
|
197
|
+
// ---------------------------------------------------------------------------
|
|
198
|
+
// Integration tests — AgentRunner with loop detection
|
|
199
|
+
// ---------------------------------------------------------------------------
|
|
200
|
+
|
|
201
|
+
describe('AgentRunner loop detection', () => {
|
|
202
|
+
function buildRunner(
|
|
203
|
+
responses: LLMResponse[],
|
|
204
|
+
loopDetection: import('../src/types.js').LoopDetectionConfig,
|
|
205
|
+
) {
|
|
206
|
+
const adapter = mockAdapter(responses)
|
|
207
|
+
const registry = new ToolRegistry()
|
|
208
|
+
registry.register(echoTool)
|
|
209
|
+
const executor = new ToolExecutor(registry)
|
|
210
|
+
const runner = new AgentRunner(adapter, registry, executor, {
|
|
211
|
+
model: 'mock-model',
|
|
212
|
+
maxTurns: 10,
|
|
213
|
+
allowedTools: ['echo'],
|
|
214
|
+
agentName: 'test-agent',
|
|
215
|
+
loopDetection,
|
|
216
|
+
})
|
|
217
|
+
return runner
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
it('terminates early in terminate mode', async () => {
|
|
221
|
+
// 5 identical tool calls, then a text response (should never reach it)
|
|
222
|
+
const responses = [
|
|
223
|
+
...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
|
|
224
|
+
textResponse('done'),
|
|
225
|
+
]
|
|
226
|
+
const runner = buildRunner(responses, {
|
|
227
|
+
maxRepetitions: 3,
|
|
228
|
+
onLoopDetected: 'terminate',
|
|
229
|
+
})
|
|
230
|
+
|
|
231
|
+
const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
|
|
232
|
+
|
|
233
|
+
expect(result.loopDetected).toBe(true)
|
|
234
|
+
expect(result.turns).toBe(3)
|
|
235
|
+
})
|
|
236
|
+
|
|
237
|
+
it('emits loop_detected stream event in terminate mode', async () => {
|
|
238
|
+
const responses = [
|
|
239
|
+
...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
|
|
240
|
+
textResponse('done'),
|
|
241
|
+
]
|
|
242
|
+
const runner = buildRunner(responses, {
|
|
243
|
+
maxRepetitions: 3,
|
|
244
|
+
onLoopDetected: 'terminate',
|
|
245
|
+
})
|
|
246
|
+
|
|
247
|
+
const events: StreamEvent[] = []
|
|
248
|
+
for await (const event of runner.stream([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])) {
|
|
249
|
+
events.push(event)
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
const loopEvents = events.filter(e => e.type === 'loop_detected')
|
|
253
|
+
expect(loopEvents).toHaveLength(1)
|
|
254
|
+
const info = loopEvents[0]!.data as import('../src/types.js').LoopDetectionInfo
|
|
255
|
+
expect(info.kind).toBe('tool_repetition')
|
|
256
|
+
expect(info.repetitions).toBe(3)
|
|
257
|
+
})
|
|
258
|
+
|
|
259
|
+
it('calls onWarning in terminate mode', async () => {
|
|
260
|
+
const responses = [
|
|
261
|
+
...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
|
|
262
|
+
textResponse('done'),
|
|
263
|
+
]
|
|
264
|
+
const runner = buildRunner(responses, {
|
|
265
|
+
maxRepetitions: 3,
|
|
266
|
+
onLoopDetected: 'terminate',
|
|
267
|
+
})
|
|
268
|
+
|
|
269
|
+
const warnings: string[] = []
|
|
270
|
+
await runner.run(
|
|
271
|
+
[{ role: 'user', content: [{ type: 'text', text: 'go' }] }],
|
|
272
|
+
{ onWarning: (msg) => warnings.push(msg) },
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
expect(warnings).toHaveLength(1)
|
|
276
|
+
expect(warnings[0]).toContain('loop')
|
|
277
|
+
})
|
|
278
|
+
|
|
279
|
+
it('injects warning message in warn mode and terminates on second detection', async () => {
|
|
280
|
+
// 6 identical tool calls — warn fires at turn 3, then terminate at turn 4+
|
|
281
|
+
const responses = [
|
|
282
|
+
...Array.from({ length: 6 }, () => toolUseResponse('echo', { message: 'hi' })),
|
|
283
|
+
textResponse('done'),
|
|
284
|
+
]
|
|
285
|
+
const runner = buildRunner(responses, {
|
|
286
|
+
maxRepetitions: 3,
|
|
287
|
+
onLoopDetected: 'warn',
|
|
288
|
+
})
|
|
289
|
+
|
|
290
|
+
const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
|
|
291
|
+
|
|
292
|
+
// Should have terminated after the second detection (turn 4), not run all 6
|
|
293
|
+
expect(result.loopDetected).toBe(true)
|
|
294
|
+
expect(result.turns).toBeLessThanOrEqual(5)
|
|
295
|
+
})
|
|
296
|
+
|
|
297
|
+
it('supports custom callback returning terminate', async () => {
|
|
298
|
+
const responses = [
|
|
299
|
+
...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
|
|
300
|
+
textResponse('done'),
|
|
301
|
+
]
|
|
302
|
+
const callback = vi.fn().mockReturnValue('terminate')
|
|
303
|
+
const runner = buildRunner(responses, {
|
|
304
|
+
maxRepetitions: 3,
|
|
305
|
+
onLoopDetected: callback,
|
|
306
|
+
})
|
|
307
|
+
|
|
308
|
+
const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
|
|
309
|
+
|
|
310
|
+
expect(callback).toHaveBeenCalledOnce()
|
|
311
|
+
expect(result.loopDetected).toBe(true)
|
|
312
|
+
expect(result.turns).toBe(3)
|
|
313
|
+
})
|
|
314
|
+
|
|
315
|
+
it('supports custom callback returning inject', async () => {
|
|
316
|
+
// 'inject' behaves like 'warn': injects warning, terminates on second detection
|
|
317
|
+
const responses = [
|
|
318
|
+
...Array.from({ length: 6 }, () => toolUseResponse('echo', { message: 'hi' })),
|
|
319
|
+
textResponse('done'),
|
|
320
|
+
]
|
|
321
|
+
const callback = vi.fn().mockReturnValue('inject')
|
|
322
|
+
const runner = buildRunner(responses, {
|
|
323
|
+
maxRepetitions: 3,
|
|
324
|
+
onLoopDetected: callback,
|
|
325
|
+
})
|
|
326
|
+
|
|
327
|
+
const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
|
|
328
|
+
|
|
329
|
+
expect(callback).toHaveBeenCalledTimes(2) // first triggers inject, second forces terminate
|
|
330
|
+
expect(result.loopDetected).toBe(true)
|
|
331
|
+
expect(result.turns).toBeLessThanOrEqual(5)
|
|
332
|
+
})
|
|
333
|
+
|
|
334
|
+
it('supports custom callback returning continue', async () => {
|
|
335
|
+
const responses = [
|
|
336
|
+
...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
|
|
337
|
+
textResponse('done'),
|
|
338
|
+
]
|
|
339
|
+
const callback = vi.fn().mockReturnValue('continue')
|
|
340
|
+
const runner = buildRunner(responses, {
|
|
341
|
+
maxRepetitions: 3,
|
|
342
|
+
onLoopDetected: callback,
|
|
343
|
+
})
|
|
344
|
+
|
|
345
|
+
const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
|
|
346
|
+
|
|
347
|
+
// continue means no termination — runs until maxTurns or text response
|
|
348
|
+
// callback fires at turn 3, 4, 5 (all repeating)
|
|
349
|
+
expect(callback).toHaveBeenCalledTimes(3)
|
|
350
|
+
expect(result.loopDetected).toBeUndefined()
|
|
351
|
+
})
|
|
352
|
+
|
|
353
|
+
it('supports async onLoopDetected callback', async () => {
|
|
354
|
+
const responses = [
|
|
355
|
+
...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
|
|
356
|
+
textResponse('done'),
|
|
357
|
+
]
|
|
358
|
+
const callback = vi.fn().mockResolvedValue('terminate')
|
|
359
|
+
const runner = buildRunner(responses, {
|
|
360
|
+
maxRepetitions: 3,
|
|
361
|
+
onLoopDetected: callback,
|
|
362
|
+
})
|
|
363
|
+
|
|
364
|
+
const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
|
|
365
|
+
|
|
366
|
+
expect(callback).toHaveBeenCalledOnce()
|
|
367
|
+
expect(result.loopDetected).toBe(true)
|
|
368
|
+
expect(result.turns).toBe(3)
|
|
369
|
+
})
|
|
370
|
+
|
|
371
|
+
it('gives a fresh warning cycle after agent recovers from a loop', async () => {
|
|
372
|
+
// Sequence: 3x same tool (loop #1 warned) → 1x different tool (recovery)
|
|
373
|
+
// → 3x same tool again (loop #2 should warn, NOT immediate terminate)
|
|
374
|
+
// → 1x more same tool (now terminates after 2nd warning)
|
|
375
|
+
const responses = [
|
|
376
|
+
// Loop #1: 3 identical calls → triggers warn
|
|
377
|
+
toolUseResponse('echo', { message: 'hi' }),
|
|
378
|
+
toolUseResponse('echo', { message: 'hi' }),
|
|
379
|
+
toolUseResponse('echo', { message: 'hi' }),
|
|
380
|
+
// Recovery: different call
|
|
381
|
+
toolUseResponse('echo', { message: 'different' }),
|
|
382
|
+
// Loop #2: 3 identical calls → should trigger warn again (not terminate)
|
|
383
|
+
toolUseResponse('echo', { message: 'stuck again' }),
|
|
384
|
+
toolUseResponse('echo', { message: 'stuck again' }),
|
|
385
|
+
toolUseResponse('echo', { message: 'stuck again' }),
|
|
386
|
+
// 4th identical → second warning, force terminate
|
|
387
|
+
toolUseResponse('echo', { message: 'stuck again' }),
|
|
388
|
+
textResponse('done'),
|
|
389
|
+
]
|
|
390
|
+
const warnings: string[] = []
|
|
391
|
+
const runner = buildRunner(responses, {
|
|
392
|
+
maxRepetitions: 3,
|
|
393
|
+
onLoopDetected: 'warn',
|
|
394
|
+
})
|
|
395
|
+
|
|
396
|
+
const result = await runner.run(
|
|
397
|
+
[{ role: 'user', content: [{ type: 'text', text: 'go' }] }],
|
|
398
|
+
{ onWarning: (msg) => warnings.push(msg) },
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
// Three warnings: loop #1 warn, loop #2 warn, loop #2 force-terminate
|
|
402
|
+
expect(warnings).toHaveLength(3)
|
|
403
|
+
expect(result.loopDetected).toBe(true)
|
|
404
|
+
// Should have run past loop #1 (3 turns) + recovery (1) + loop #2 warn (3) + terminate (1) = 8
|
|
405
|
+
expect(result.turns).toBe(8)
|
|
406
|
+
})
|
|
407
|
+
|
|
408
|
+
it('injects warning TextBlock into tool-result user message in warn mode', async () => {
|
|
409
|
+
// 4 identical tool calls: warn fires at turn 3, terminate at turn 4
|
|
410
|
+
const responses = [
|
|
411
|
+
...Array.from({ length: 4 }, () => toolUseResponse('echo', { message: 'hi' })),
|
|
412
|
+
textResponse('done'),
|
|
413
|
+
]
|
|
414
|
+
const runner = buildRunner(responses, {
|
|
415
|
+
maxRepetitions: 3,
|
|
416
|
+
onLoopDetected: 'warn',
|
|
417
|
+
})
|
|
418
|
+
|
|
419
|
+
const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
|
|
420
|
+
|
|
421
|
+
// Find user messages that contain a text block with the WARNING string
|
|
422
|
+
const userMessages = result.messages.filter(m => m.role === 'user')
|
|
423
|
+
const warningBlocks = userMessages.flatMap(m =>
|
|
424
|
+
m.content.filter(
|
|
425
|
+
(b): b is import('../src/types.js').TextBlock =>
|
|
426
|
+
b.type === 'text' && 'text' in b && (b as import('../src/types.js').TextBlock).text.startsWith('WARNING:'),
|
|
427
|
+
),
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
expect(warningBlocks).toHaveLength(1)
|
|
431
|
+
expect(warningBlocks[0]!.text).toContain('repeating the same tool calls')
|
|
432
|
+
})
|
|
433
|
+
|
|
434
|
+
it('does not interfere when loopDetection is not configured', async () => {
|
|
435
|
+
const adapter = mockAdapter([
|
|
436
|
+
...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
|
|
437
|
+
textResponse('done'),
|
|
438
|
+
])
|
|
439
|
+
const registry = new ToolRegistry()
|
|
440
|
+
registry.register(echoTool)
|
|
441
|
+
const executor = new ToolExecutor(registry)
|
|
442
|
+
const runner = new AgentRunner(adapter, registry, executor, {
|
|
443
|
+
model: 'mock-model',
|
|
444
|
+
maxTurns: 10,
|
|
445
|
+
allowedTools: ['echo'],
|
|
446
|
+
agentName: 'test-agent',
|
|
447
|
+
// no loopDetection
|
|
448
|
+
})
|
|
449
|
+
|
|
450
|
+
const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
|
|
451
|
+
|
|
452
|
+
// All 5 tool turns + 1 text turn = 6
|
|
453
|
+
expect(result.turns).toBe(6)
|
|
454
|
+
expect(result.loopDetected).toBeUndefined()
|
|
455
|
+
})
|
|
456
|
+
})
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { fromOpenAICompletion } from '../src/llm/openai-common.js'
|
|
3
|
+
import type { ChatCompletion } from 'openai/resources/chat/completions/index.js'
|
|
4
|
+
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
// Helpers
|
|
7
|
+
// ---------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
function makeCompletion(overrides: {
|
|
10
|
+
content?: string | null
|
|
11
|
+
tool_calls?: ChatCompletion.Choice['message']['tool_calls']
|
|
12
|
+
finish_reason?: string
|
|
13
|
+
}): ChatCompletion {
|
|
14
|
+
return {
|
|
15
|
+
id: 'chatcmpl-test',
|
|
16
|
+
object: 'chat.completion',
|
|
17
|
+
created: Date.now(),
|
|
18
|
+
model: 'test-model',
|
|
19
|
+
choices: [
|
|
20
|
+
{
|
|
21
|
+
index: 0,
|
|
22
|
+
message: {
|
|
23
|
+
role: 'assistant',
|
|
24
|
+
content: overrides.content ?? null,
|
|
25
|
+
tool_calls: overrides.tool_calls,
|
|
26
|
+
refusal: null,
|
|
27
|
+
},
|
|
28
|
+
finish_reason: (overrides.finish_reason ?? 'stop') as 'stop' | 'tool_calls',
|
|
29
|
+
logprobs: null,
|
|
30
|
+
},
|
|
31
|
+
],
|
|
32
|
+
usage: {
|
|
33
|
+
prompt_tokens: 10,
|
|
34
|
+
completion_tokens: 20,
|
|
35
|
+
total_tokens: 30,
|
|
36
|
+
},
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const TOOL_NAMES = ['bash', 'file_read', 'file_write']
|
|
41
|
+
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
// Tests
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
describe('fromOpenAICompletion fallback extraction', () => {
|
|
47
|
+
it('returns normal tool_calls when present (no fallback)', () => {
|
|
48
|
+
const completion = makeCompletion({
|
|
49
|
+
content: 'Let me run a command.',
|
|
50
|
+
tool_calls: [
|
|
51
|
+
{
|
|
52
|
+
id: 'call_123',
|
|
53
|
+
type: 'function',
|
|
54
|
+
function: {
|
|
55
|
+
name: 'bash',
|
|
56
|
+
arguments: '{"command": "ls"}',
|
|
57
|
+
},
|
|
58
|
+
},
|
|
59
|
+
],
|
|
60
|
+
finish_reason: 'tool_calls',
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
const response = fromOpenAICompletion(completion, TOOL_NAMES)
|
|
64
|
+
const toolBlocks = response.content.filter(b => b.type === 'tool_use')
|
|
65
|
+
expect(toolBlocks).toHaveLength(1)
|
|
66
|
+
expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.name).toBe('bash')
|
|
67
|
+
expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.id).toBe('call_123')
|
|
68
|
+
expect(response.stop_reason).toBe('tool_use')
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
it('extracts tool calls from text when tool_calls is absent', () => {
|
|
72
|
+
const completion = makeCompletion({
|
|
73
|
+
content: 'I will run this:\n{"name": "bash", "arguments": {"command": "pwd"}}',
|
|
74
|
+
finish_reason: 'stop',
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
const response = fromOpenAICompletion(completion, TOOL_NAMES)
|
|
78
|
+
const toolBlocks = response.content.filter(b => b.type === 'tool_use')
|
|
79
|
+
expect(toolBlocks).toHaveLength(1)
|
|
80
|
+
expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.name).toBe('bash')
|
|
81
|
+
expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.input).toEqual({ command: 'pwd' })
|
|
82
|
+
// stop_reason should be corrected to tool_use
|
|
83
|
+
expect(response.stop_reason).toBe('tool_use')
|
|
84
|
+
})
|
|
85
|
+
|
|
86
|
+
it('does not fallback when knownToolNames is not provided', () => {
|
|
87
|
+
const completion = makeCompletion({
|
|
88
|
+
content: '{"name": "bash", "arguments": {"command": "ls"}}',
|
|
89
|
+
finish_reason: 'stop',
|
|
90
|
+
})
|
|
91
|
+
|
|
92
|
+
const response = fromOpenAICompletion(completion)
|
|
93
|
+
const toolBlocks = response.content.filter(b => b.type === 'tool_use')
|
|
94
|
+
expect(toolBlocks).toHaveLength(0)
|
|
95
|
+
expect(response.stop_reason).toBe('end_turn')
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
it('does not fallback when knownToolNames is empty', () => {
|
|
99
|
+
const completion = makeCompletion({
|
|
100
|
+
content: '{"name": "bash", "arguments": {"command": "ls"}}',
|
|
101
|
+
finish_reason: 'stop',
|
|
102
|
+
})
|
|
103
|
+
|
|
104
|
+
const response = fromOpenAICompletion(completion, [])
|
|
105
|
+
const toolBlocks = response.content.filter(b => b.type === 'tool_use')
|
|
106
|
+
expect(toolBlocks).toHaveLength(0)
|
|
107
|
+
expect(response.stop_reason).toBe('end_turn')
|
|
108
|
+
})
|
|
109
|
+
|
|
110
|
+
it('returns plain text when no tool calls found in text', () => {
|
|
111
|
+
const completion = makeCompletion({
|
|
112
|
+
content: 'Hello! How can I help you today?',
|
|
113
|
+
finish_reason: 'stop',
|
|
114
|
+
})
|
|
115
|
+
|
|
116
|
+
const response = fromOpenAICompletion(completion, TOOL_NAMES)
|
|
117
|
+
const toolBlocks = response.content.filter(b => b.type === 'tool_use')
|
|
118
|
+
expect(toolBlocks).toHaveLength(0)
|
|
119
|
+
expect(response.stop_reason).toBe('end_turn')
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
it('preserves text block alongside extracted tool blocks', () => {
|
|
123
|
+
const completion = makeCompletion({
|
|
124
|
+
content: 'Let me check:\n{"name": "file_read", "arguments": {"path": "/tmp/x"}}',
|
|
125
|
+
finish_reason: 'stop',
|
|
126
|
+
})
|
|
127
|
+
|
|
128
|
+
const response = fromOpenAICompletion(completion, TOOL_NAMES)
|
|
129
|
+
const textBlocks = response.content.filter(b => b.type === 'text')
|
|
130
|
+
const toolBlocks = response.content.filter(b => b.type === 'tool_use')
|
|
131
|
+
expect(textBlocks).toHaveLength(1)
|
|
132
|
+
expect(toolBlocks).toHaveLength(1)
|
|
133
|
+
})
|
|
134
|
+
|
|
135
|
+
it('does not double-extract when native tool_calls already present', () => {
|
|
136
|
+
// Text also contains a tool call JSON, but native tool_calls is populated.
|
|
137
|
+
// The fallback should NOT run.
|
|
138
|
+
const completion = makeCompletion({
|
|
139
|
+
content: '{"name": "file_read", "arguments": {"path": "/tmp/y"}}',
|
|
140
|
+
tool_calls: [
|
|
141
|
+
{
|
|
142
|
+
id: 'call_native',
|
|
143
|
+
type: 'function',
|
|
144
|
+
function: {
|
|
145
|
+
name: 'bash',
|
|
146
|
+
arguments: '{"command": "ls"}',
|
|
147
|
+
},
|
|
148
|
+
},
|
|
149
|
+
],
|
|
150
|
+
finish_reason: 'tool_calls',
|
|
151
|
+
})
|
|
152
|
+
|
|
153
|
+
const response = fromOpenAICompletion(completion, TOOL_NAMES)
|
|
154
|
+
const toolBlocks = response.content.filter(b => b.type === 'tool_use')
|
|
155
|
+
// Should only have the native one, not the text-extracted one
|
|
156
|
+
expect(toolBlocks).toHaveLength(1)
|
|
157
|
+
expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.id).toBe('call_native')
|
|
158
|
+
})
|
|
159
|
+
})
|