@jackchen_me/open-multi-agent 0.2.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/.github/workflows/ci.yml +1 -1
  2. package/CLAUDE.md +11 -3
  3. package/README.md +87 -20
  4. package/README_zh.md +85 -25
  5. package/dist/agent/agent.d.ts +15 -1
  6. package/dist/agent/agent.d.ts.map +1 -1
  7. package/dist/agent/agent.js +144 -10
  8. package/dist/agent/agent.js.map +1 -1
  9. package/dist/agent/loop-detector.d.ts +39 -0
  10. package/dist/agent/loop-detector.d.ts.map +1 -0
  11. package/dist/agent/loop-detector.js +122 -0
  12. package/dist/agent/loop-detector.js.map +1 -0
  13. package/dist/agent/pool.d.ts +2 -1
  14. package/dist/agent/pool.d.ts.map +1 -1
  15. package/dist/agent/pool.js +4 -2
  16. package/dist/agent/pool.js.map +1 -1
  17. package/dist/agent/runner.d.ts +23 -1
  18. package/dist/agent/runner.d.ts.map +1 -1
  19. package/dist/agent/runner.js +113 -12
  20. package/dist/agent/runner.js.map +1 -1
  21. package/dist/index.d.ts +3 -1
  22. package/dist/index.d.ts.map +1 -1
  23. package/dist/index.js +2 -0
  24. package/dist/index.js.map +1 -1
  25. package/dist/llm/adapter.d.ts +4 -1
  26. package/dist/llm/adapter.d.ts.map +1 -1
  27. package/dist/llm/adapter.js +11 -0
  28. package/dist/llm/adapter.js.map +1 -1
  29. package/dist/llm/copilot.d.ts.map +1 -1
  30. package/dist/llm/copilot.js +2 -1
  31. package/dist/llm/copilot.js.map +1 -1
  32. package/dist/llm/gemini.d.ts +65 -0
  33. package/dist/llm/gemini.d.ts.map +1 -0
  34. package/dist/llm/gemini.js +317 -0
  35. package/dist/llm/gemini.js.map +1 -0
  36. package/dist/llm/grok.d.ts +21 -0
  37. package/dist/llm/grok.d.ts.map +1 -0
  38. package/dist/llm/grok.js +24 -0
  39. package/dist/llm/grok.js.map +1 -0
  40. package/dist/llm/openai-common.d.ts +8 -1
  41. package/dist/llm/openai-common.d.ts.map +1 -1
  42. package/dist/llm/openai-common.js +35 -2
  43. package/dist/llm/openai-common.js.map +1 -1
  44. package/dist/llm/openai.d.ts +1 -1
  45. package/dist/llm/openai.d.ts.map +1 -1
  46. package/dist/llm/openai.js +20 -2
  47. package/dist/llm/openai.js.map +1 -1
  48. package/dist/orchestrator/orchestrator.d.ts.map +1 -1
  49. package/dist/orchestrator/orchestrator.js +89 -9
  50. package/dist/orchestrator/orchestrator.js.map +1 -1
  51. package/dist/task/queue.d.ts +31 -2
  52. package/dist/task/queue.d.ts.map +1 -1
  53. package/dist/task/queue.js +69 -2
  54. package/dist/task/queue.js.map +1 -1
  55. package/dist/tool/text-tool-extractor.d.ts +32 -0
  56. package/dist/tool/text-tool-extractor.d.ts.map +1 -0
  57. package/dist/tool/text-tool-extractor.js +187 -0
  58. package/dist/tool/text-tool-extractor.js.map +1 -0
  59. package/dist/types.d.ts +139 -7
  60. package/dist/types.d.ts.map +1 -1
  61. package/dist/utils/trace.d.ts +12 -0
  62. package/dist/utils/trace.d.ts.map +1 -0
  63. package/dist/utils/trace.js +30 -0
  64. package/dist/utils/trace.js.map +1 -0
  65. package/examples/06-local-model.ts +1 -0
  66. package/examples/08-gemma4-local.ts +76 -87
  67. package/examples/09-structured-output.ts +73 -0
  68. package/examples/10-task-retry.ts +132 -0
  69. package/examples/11-trace-observability.ts +133 -0
  70. package/examples/12-grok.ts +154 -0
  71. package/examples/13-gemini.ts +48 -0
  72. package/package.json +11 -1
  73. package/src/agent/agent.ts +159 -10
  74. package/src/agent/loop-detector.ts +137 -0
  75. package/src/agent/pool.ts +9 -2
  76. package/src/agent/runner.ts +148 -19
  77. package/src/index.ts +15 -0
  78. package/src/llm/adapter.ts +12 -1
  79. package/src/llm/copilot.ts +2 -1
  80. package/src/llm/gemini.ts +378 -0
  81. package/src/llm/grok.ts +29 -0
  82. package/src/llm/openai-common.ts +41 -2
  83. package/src/llm/openai.ts +23 -3
  84. package/src/orchestrator/orchestrator.ts +105 -11
  85. package/src/task/queue.ts +73 -3
  86. package/src/tool/text-tool-extractor.ts +219 -0
  87. package/src/types.ts +157 -6
  88. package/src/utils/trace.ts +34 -0
  89. package/tests/agent-hooks.test.ts +473 -0
  90. package/tests/agent-pool.test.ts +212 -0
  91. package/tests/approval.test.ts +464 -0
  92. package/tests/built-in-tools.test.ts +393 -0
  93. package/tests/gemini-adapter.test.ts +97 -0
  94. package/tests/grok-adapter.test.ts +74 -0
  95. package/tests/llm-adapters.test.ts +357 -0
  96. package/tests/loop-detection.test.ts +456 -0
  97. package/tests/openai-fallback.test.ts +159 -0
  98. package/tests/orchestrator.test.ts +281 -0
  99. package/tests/scheduler.test.ts +221 -0
  100. package/tests/team-messaging.test.ts +329 -0
  101. package/tests/text-tool-extractor.test.ts +170 -0
  102. package/tests/trace.test.ts +453 -0
  103. package/vitest.config.ts +9 -0
  104. package/examples/09-gemma4-auto-orchestration.ts +0 -162
@@ -0,0 +1,456 @@
1
+ import { describe, it, expect, vi } from 'vitest'
2
+ import { z } from 'zod'
3
+ import { LoopDetector } from '../src/agent/loop-detector.js'
4
+ import { AgentRunner } from '../src/agent/runner.js'
5
+ import { ToolRegistry, defineTool } from '../src/tool/framework.js'
6
+ import { ToolExecutor } from '../src/tool/executor.js'
7
+ import type { LLMAdapter, LLMResponse, StreamEvent } from '../src/types.js'
8
+
9
+ // ---------------------------------------------------------------------------
10
+ // Mock helpers
11
+ // ---------------------------------------------------------------------------
12
+
13
+ function mockAdapter(responses: LLMResponse[]): LLMAdapter {
14
+ let callIndex = 0
15
+ return {
16
+ name: 'mock',
17
+ async chat() {
18
+ return responses[callIndex++]!
19
+ },
20
+ async *stream() {
21
+ /* unused */
22
+ },
23
+ }
24
+ }
25
+
26
+ function textResponse(text: string): LLMResponse {
27
+ return {
28
+ id: `resp-${Math.random().toString(36).slice(2)}`,
29
+ content: [{ type: 'text' as const, text }],
30
+ model: 'mock-model',
31
+ stop_reason: 'end_turn',
32
+ usage: { input_tokens: 10, output_tokens: 20 },
33
+ }
34
+ }
35
+
36
+ function toolUseResponse(toolName: string, input: Record<string, unknown>): LLMResponse {
37
+ return {
38
+ id: `resp-${Math.random().toString(36).slice(2)}`,
39
+ content: [
40
+ {
41
+ type: 'tool_use' as const,
42
+ id: `tu-${Math.random().toString(36).slice(2)}`,
43
+ name: toolName,
44
+ input,
45
+ },
46
+ ],
47
+ model: 'mock-model',
48
+ stop_reason: 'tool_use',
49
+ usage: { input_tokens: 15, output_tokens: 25 },
50
+ }
51
+ }
52
+
53
+ const echoTool = defineTool({
54
+ name: 'echo',
55
+ description: 'Echoes input',
56
+ inputSchema: z.object({ message: z.string() }),
57
+ async execute({ message }) {
58
+ return { data: message }
59
+ },
60
+ })
61
+
62
+ // ---------------------------------------------------------------------------
63
+ // Unit tests — LoopDetector class
64
+ // ---------------------------------------------------------------------------
65
+
66
+ describe('LoopDetector', () => {
67
+ describe('tool call repetition', () => {
68
+ it('returns null for non-repeating tool calls', () => {
69
+ const detector = new LoopDetector()
70
+ expect(detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])).toBeNull()
71
+ expect(detector.recordToolCalls([{ name: 'b', input: { x: 2 } }])).toBeNull()
72
+ expect(detector.recordToolCalls([{ name: 'c', input: { x: 3 } }])).toBeNull()
73
+ })
74
+
75
+ it('detects 3 identical tool calls (default threshold)', () => {
76
+ const detector = new LoopDetector()
77
+ expect(detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])).toBeNull()
78
+ expect(detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])).toBeNull()
79
+ const info = detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])
80
+ expect(info).not.toBeNull()
81
+ expect(info!.kind).toBe('tool_repetition')
82
+ expect(info!.repetitions).toBe(3)
83
+ })
84
+
85
+ it('does not trigger when args differ', () => {
86
+ const detector = new LoopDetector()
87
+ expect(detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])).toBeNull()
88
+ expect(detector.recordToolCalls([{ name: 'a', input: { x: 2 } }])).toBeNull()
89
+ expect(detector.recordToolCalls([{ name: 'a', input: { x: 3 } }])).toBeNull()
90
+ })
91
+
92
+ it('resets count when a different call intervenes', () => {
93
+ const detector = new LoopDetector()
94
+ detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])
95
+ detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])
96
+ // Different call breaks the streak
97
+ detector.recordToolCalls([{ name: 'b', input: { x: 1 } }])
98
+ expect(detector.recordToolCalls([{ name: 'a', input: { x: 1 } }])).toBeNull()
99
+ })
100
+
101
+ it('handles multi-tool turns with order-independent signatures', () => {
102
+ const detector = new LoopDetector()
103
+ const toolsA = [
104
+ { name: 'read', input: { file: 'a.ts' } },
105
+ { name: 'read', input: { file: 'b.ts' } },
106
+ ]
107
+ // Same tools in different order
108
+ const toolsB = [
109
+ { name: 'read', input: { file: 'b.ts' } },
110
+ { name: 'read', input: { file: 'a.ts' } },
111
+ ]
112
+ expect(detector.recordToolCalls(toolsA)).toBeNull()
113
+ expect(detector.recordToolCalls(toolsB)).toBeNull()
114
+ const info = detector.recordToolCalls(toolsA)
115
+ expect(info).not.toBeNull()
116
+ expect(info!.kind).toBe('tool_repetition')
117
+ })
118
+
119
+ it('respects custom threshold', () => {
120
+ const detector = new LoopDetector({ maxRepetitions: 2 })
121
+ expect(detector.recordToolCalls([{ name: 'a', input: {} }])).toBeNull()
122
+ const info = detector.recordToolCalls([{ name: 'a', input: {} }])
123
+ expect(info).not.toBeNull()
124
+ expect(info!.repetitions).toBe(2)
125
+ })
126
+
127
+ it('returns null for empty blocks', () => {
128
+ const detector = new LoopDetector()
129
+ expect(detector.recordToolCalls([])).toBeNull()
130
+ })
131
+
132
+ it('produces deterministic signatures regardless of key order', () => {
133
+ const detector = new LoopDetector()
134
+ detector.recordToolCalls([{ name: 'a', input: { b: 2, a: 1 } }])
135
+ detector.recordToolCalls([{ name: 'a', input: { a: 1, b: 2 } }])
136
+ const info = detector.recordToolCalls([{ name: 'a', input: { b: 2, a: 1 } }])
137
+ expect(info).not.toBeNull()
138
+ })
139
+ })
140
+
141
+ describe('text repetition', () => {
142
+ it('returns null for non-repeating text', () => {
143
+ const detector = new LoopDetector()
144
+ expect(detector.recordText('hello')).toBeNull()
145
+ expect(detector.recordText('world')).toBeNull()
146
+ expect(detector.recordText('foo')).toBeNull()
147
+ })
148
+
149
+ it('detects 3 identical texts (default threshold)', () => {
150
+ const detector = new LoopDetector()
151
+ expect(detector.recordText('stuck')).toBeNull()
152
+ expect(detector.recordText('stuck')).toBeNull()
153
+ const info = detector.recordText('stuck')
154
+ expect(info).not.toBeNull()
155
+ expect(info!.kind).toBe('text_repetition')
156
+ expect(info!.repetitions).toBe(3)
157
+ })
158
+
159
+ it('ignores empty or whitespace-only text', () => {
160
+ const detector = new LoopDetector()
161
+ expect(detector.recordText('')).toBeNull()
162
+ expect(detector.recordText(' ')).toBeNull()
163
+ expect(detector.recordText('\n\t')).toBeNull()
164
+ })
165
+
166
+ it('normalises whitespace before comparison', () => {
167
+ const detector = new LoopDetector()
168
+ detector.recordText('hello world')
169
+ detector.recordText('hello world')
170
+ const info = detector.recordText('hello world')
171
+ expect(info).not.toBeNull()
172
+ })
173
+ })
174
+
175
+ describe('window size', () => {
176
+ it('clamps windowSize to at least maxRepeats', () => {
177
+ // Window of 2 with threshold 3 is auto-clamped to 3.
178
+ const detector = new LoopDetector({ loopDetectionWindow: 2, maxRepetitions: 3 })
179
+ detector.recordToolCalls([{ name: 'a', input: {} }])
180
+ detector.recordToolCalls([{ name: 'a', input: {} }])
181
+ // Third call triggers because window was clamped to 3
182
+ const info = detector.recordToolCalls([{ name: 'a', input: {} }])
183
+ expect(info).not.toBeNull()
184
+ expect(info!.repetitions).toBe(3)
185
+ })
186
+
187
+ it('works correctly when window >= threshold', () => {
188
+ const detector = new LoopDetector({ loopDetectionWindow: 4, maxRepetitions: 3 })
189
+ detector.recordToolCalls([{ name: 'a', input: {} }])
190
+ detector.recordToolCalls([{ name: 'a', input: {} }])
191
+ const info = detector.recordToolCalls([{ name: 'a', input: {} }])
192
+ expect(info).not.toBeNull()
193
+ })
194
+ })
195
+ })
196
+
197
+ // ---------------------------------------------------------------------------
198
+ // Integration tests — AgentRunner with loop detection
199
+ // ---------------------------------------------------------------------------
200
+
201
+ describe('AgentRunner loop detection', () => {
202
+ function buildRunner(
203
+ responses: LLMResponse[],
204
+ loopDetection: import('../src/types.js').LoopDetectionConfig,
205
+ ) {
206
+ const adapter = mockAdapter(responses)
207
+ const registry = new ToolRegistry()
208
+ registry.register(echoTool)
209
+ const executor = new ToolExecutor(registry)
210
+ const runner = new AgentRunner(adapter, registry, executor, {
211
+ model: 'mock-model',
212
+ maxTurns: 10,
213
+ allowedTools: ['echo'],
214
+ agentName: 'test-agent',
215
+ loopDetection,
216
+ })
217
+ return runner
218
+ }
219
+
220
+ it('terminates early in terminate mode', async () => {
221
+ // 5 identical tool calls, then a text response (should never reach it)
222
+ const responses = [
223
+ ...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
224
+ textResponse('done'),
225
+ ]
226
+ const runner = buildRunner(responses, {
227
+ maxRepetitions: 3,
228
+ onLoopDetected: 'terminate',
229
+ })
230
+
231
+ const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
232
+
233
+ expect(result.loopDetected).toBe(true)
234
+ expect(result.turns).toBe(3)
235
+ })
236
+
237
+ it('emits loop_detected stream event in terminate mode', async () => {
238
+ const responses = [
239
+ ...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
240
+ textResponse('done'),
241
+ ]
242
+ const runner = buildRunner(responses, {
243
+ maxRepetitions: 3,
244
+ onLoopDetected: 'terminate',
245
+ })
246
+
247
+ const events: StreamEvent[] = []
248
+ for await (const event of runner.stream([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])) {
249
+ events.push(event)
250
+ }
251
+
252
+ const loopEvents = events.filter(e => e.type === 'loop_detected')
253
+ expect(loopEvents).toHaveLength(1)
254
+ const info = loopEvents[0]!.data as import('../src/types.js').LoopDetectionInfo
255
+ expect(info.kind).toBe('tool_repetition')
256
+ expect(info.repetitions).toBe(3)
257
+ })
258
+
259
+ it('calls onWarning in terminate mode', async () => {
260
+ const responses = [
261
+ ...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
262
+ textResponse('done'),
263
+ ]
264
+ const runner = buildRunner(responses, {
265
+ maxRepetitions: 3,
266
+ onLoopDetected: 'terminate',
267
+ })
268
+
269
+ const warnings: string[] = []
270
+ await runner.run(
271
+ [{ role: 'user', content: [{ type: 'text', text: 'go' }] }],
272
+ { onWarning: (msg) => warnings.push(msg) },
273
+ )
274
+
275
+ expect(warnings).toHaveLength(1)
276
+ expect(warnings[0]).toContain('loop')
277
+ })
278
+
279
+ it('injects warning message in warn mode and terminates on second detection', async () => {
280
+ // 6 identical tool calls — warn fires at turn 3, then terminate at turn 4+
281
+ const responses = [
282
+ ...Array.from({ length: 6 }, () => toolUseResponse('echo', { message: 'hi' })),
283
+ textResponse('done'),
284
+ ]
285
+ const runner = buildRunner(responses, {
286
+ maxRepetitions: 3,
287
+ onLoopDetected: 'warn',
288
+ })
289
+
290
+ const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
291
+
292
+ // Should have terminated after the second detection (turn 4), not run all 6
293
+ expect(result.loopDetected).toBe(true)
294
+ expect(result.turns).toBeLessThanOrEqual(5)
295
+ })
296
+
297
+ it('supports custom callback returning terminate', async () => {
298
+ const responses = [
299
+ ...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
300
+ textResponse('done'),
301
+ ]
302
+ const callback = vi.fn().mockReturnValue('terminate')
303
+ const runner = buildRunner(responses, {
304
+ maxRepetitions: 3,
305
+ onLoopDetected: callback,
306
+ })
307
+
308
+ const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
309
+
310
+ expect(callback).toHaveBeenCalledOnce()
311
+ expect(result.loopDetected).toBe(true)
312
+ expect(result.turns).toBe(3)
313
+ })
314
+
315
+ it('supports custom callback returning inject', async () => {
316
+ // 'inject' behaves like 'warn': injects warning, terminates on second detection
317
+ const responses = [
318
+ ...Array.from({ length: 6 }, () => toolUseResponse('echo', { message: 'hi' })),
319
+ textResponse('done'),
320
+ ]
321
+ const callback = vi.fn().mockReturnValue('inject')
322
+ const runner = buildRunner(responses, {
323
+ maxRepetitions: 3,
324
+ onLoopDetected: callback,
325
+ })
326
+
327
+ const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
328
+
329
+ expect(callback).toHaveBeenCalledTimes(2) // first triggers inject, second forces terminate
330
+ expect(result.loopDetected).toBe(true)
331
+ expect(result.turns).toBeLessThanOrEqual(5)
332
+ })
333
+
334
+ it('supports custom callback returning continue', async () => {
335
+ const responses = [
336
+ ...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
337
+ textResponse('done'),
338
+ ]
339
+ const callback = vi.fn().mockReturnValue('continue')
340
+ const runner = buildRunner(responses, {
341
+ maxRepetitions: 3,
342
+ onLoopDetected: callback,
343
+ })
344
+
345
+ const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
346
+
347
+ // continue means no termination — runs until maxTurns or text response
348
+ // callback fires at turn 3, 4, 5 (all repeating)
349
+ expect(callback).toHaveBeenCalledTimes(3)
350
+ expect(result.loopDetected).toBeUndefined()
351
+ })
352
+
353
+ it('supports async onLoopDetected callback', async () => {
354
+ const responses = [
355
+ ...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
356
+ textResponse('done'),
357
+ ]
358
+ const callback = vi.fn().mockResolvedValue('terminate')
359
+ const runner = buildRunner(responses, {
360
+ maxRepetitions: 3,
361
+ onLoopDetected: callback,
362
+ })
363
+
364
+ const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
365
+
366
+ expect(callback).toHaveBeenCalledOnce()
367
+ expect(result.loopDetected).toBe(true)
368
+ expect(result.turns).toBe(3)
369
+ })
370
+
371
+ it('gives a fresh warning cycle after agent recovers from a loop', async () => {
372
+ // Sequence: 3x same tool (loop #1 warned) → 1x different tool (recovery)
373
+ // → 3x same tool again (loop #2 should warn, NOT immediate terminate)
374
+ // → 1x more same tool (now terminates after 2nd warning)
375
+ const responses = [
376
+ // Loop #1: 3 identical calls → triggers warn
377
+ toolUseResponse('echo', { message: 'hi' }),
378
+ toolUseResponse('echo', { message: 'hi' }),
379
+ toolUseResponse('echo', { message: 'hi' }),
380
+ // Recovery: different call
381
+ toolUseResponse('echo', { message: 'different' }),
382
+ // Loop #2: 3 identical calls → should trigger warn again (not terminate)
383
+ toolUseResponse('echo', { message: 'stuck again' }),
384
+ toolUseResponse('echo', { message: 'stuck again' }),
385
+ toolUseResponse('echo', { message: 'stuck again' }),
386
+ // 4th identical → second warning, force terminate
387
+ toolUseResponse('echo', { message: 'stuck again' }),
388
+ textResponse('done'),
389
+ ]
390
+ const warnings: string[] = []
391
+ const runner = buildRunner(responses, {
392
+ maxRepetitions: 3,
393
+ onLoopDetected: 'warn',
394
+ })
395
+
396
+ const result = await runner.run(
397
+ [{ role: 'user', content: [{ type: 'text', text: 'go' }] }],
398
+ { onWarning: (msg) => warnings.push(msg) },
399
+ )
400
+
401
+ // Three warnings: loop #1 warn, loop #2 warn, loop #2 force-terminate
402
+ expect(warnings).toHaveLength(3)
403
+ expect(result.loopDetected).toBe(true)
404
+ // Should have run past loop #1 (3 turns) + recovery (1) + loop #2 warn (3) + terminate (1) = 8
405
+ expect(result.turns).toBe(8)
406
+ })
407
+
408
+ it('injects warning TextBlock into tool-result user message in warn mode', async () => {
409
+ // 4 identical tool calls: warn fires at turn 3, terminate at turn 4
410
+ const responses = [
411
+ ...Array.from({ length: 4 }, () => toolUseResponse('echo', { message: 'hi' })),
412
+ textResponse('done'),
413
+ ]
414
+ const runner = buildRunner(responses, {
415
+ maxRepetitions: 3,
416
+ onLoopDetected: 'warn',
417
+ })
418
+
419
+ const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
420
+
421
+ // Find user messages that contain a text block with the WARNING string
422
+ const userMessages = result.messages.filter(m => m.role === 'user')
423
+ const warningBlocks = userMessages.flatMap(m =>
424
+ m.content.filter(
425
+ (b): b is import('../src/types.js').TextBlock =>
426
+ b.type === 'text' && 'text' in b && (b as import('../src/types.js').TextBlock).text.startsWith('WARNING:'),
427
+ ),
428
+ )
429
+
430
+ expect(warningBlocks).toHaveLength(1)
431
+ expect(warningBlocks[0]!.text).toContain('repeating the same tool calls')
432
+ })
433
+
434
+ it('does not interfere when loopDetection is not configured', async () => {
435
+ const adapter = mockAdapter([
436
+ ...Array.from({ length: 5 }, () => toolUseResponse('echo', { message: 'hi' })),
437
+ textResponse('done'),
438
+ ])
439
+ const registry = new ToolRegistry()
440
+ registry.register(echoTool)
441
+ const executor = new ToolExecutor(registry)
442
+ const runner = new AgentRunner(adapter, registry, executor, {
443
+ model: 'mock-model',
444
+ maxTurns: 10,
445
+ allowedTools: ['echo'],
446
+ agentName: 'test-agent',
447
+ // no loopDetection
448
+ })
449
+
450
+ const result = await runner.run([{ role: 'user', content: [{ type: 'text', text: 'go' }] }])
451
+
452
+ // All 5 tool turns + 1 text turn = 6
453
+ expect(result.turns).toBe(6)
454
+ expect(result.loopDetected).toBeUndefined()
455
+ })
456
+ })
@@ -0,0 +1,159 @@
1
+ import { describe, it, expect } from 'vitest'
2
+ import { fromOpenAICompletion } from '../src/llm/openai-common.js'
3
+ import type { ChatCompletion } from 'openai/resources/chat/completions/index.js'
4
+
5
+ // ---------------------------------------------------------------------------
6
+ // Helpers
7
+ // ---------------------------------------------------------------------------
8
+
9
+ function makeCompletion(overrides: {
10
+ content?: string | null
11
+ tool_calls?: ChatCompletion.Choice['message']['tool_calls']
12
+ finish_reason?: string
13
+ }): ChatCompletion {
14
+ return {
15
+ id: 'chatcmpl-test',
16
+ object: 'chat.completion',
17
+ created: Date.now(),
18
+ model: 'test-model',
19
+ choices: [
20
+ {
21
+ index: 0,
22
+ message: {
23
+ role: 'assistant',
24
+ content: overrides.content ?? null,
25
+ tool_calls: overrides.tool_calls,
26
+ refusal: null,
27
+ },
28
+ finish_reason: (overrides.finish_reason ?? 'stop') as 'stop' | 'tool_calls',
29
+ logprobs: null,
30
+ },
31
+ ],
32
+ usage: {
33
+ prompt_tokens: 10,
34
+ completion_tokens: 20,
35
+ total_tokens: 30,
36
+ },
37
+ }
38
+ }
39
+
40
+ const TOOL_NAMES = ['bash', 'file_read', 'file_write']
41
+
42
+ // ---------------------------------------------------------------------------
43
+ // Tests
44
+ // ---------------------------------------------------------------------------
45
+
46
+ describe('fromOpenAICompletion fallback extraction', () => {
47
+ it('returns normal tool_calls when present (no fallback)', () => {
48
+ const completion = makeCompletion({
49
+ content: 'Let me run a command.',
50
+ tool_calls: [
51
+ {
52
+ id: 'call_123',
53
+ type: 'function',
54
+ function: {
55
+ name: 'bash',
56
+ arguments: '{"command": "ls"}',
57
+ },
58
+ },
59
+ ],
60
+ finish_reason: 'tool_calls',
61
+ })
62
+
63
+ const response = fromOpenAICompletion(completion, TOOL_NAMES)
64
+ const toolBlocks = response.content.filter(b => b.type === 'tool_use')
65
+ expect(toolBlocks).toHaveLength(1)
66
+ expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.name).toBe('bash')
67
+ expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.id).toBe('call_123')
68
+ expect(response.stop_reason).toBe('tool_use')
69
+ })
70
+
71
+ it('extracts tool calls from text when tool_calls is absent', () => {
72
+ const completion = makeCompletion({
73
+ content: 'I will run this:\n{"name": "bash", "arguments": {"command": "pwd"}}',
74
+ finish_reason: 'stop',
75
+ })
76
+
77
+ const response = fromOpenAICompletion(completion, TOOL_NAMES)
78
+ const toolBlocks = response.content.filter(b => b.type === 'tool_use')
79
+ expect(toolBlocks).toHaveLength(1)
80
+ expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.name).toBe('bash')
81
+ expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.input).toEqual({ command: 'pwd' })
82
+ // stop_reason should be corrected to tool_use
83
+ expect(response.stop_reason).toBe('tool_use')
84
+ })
85
+
86
+ it('does not fallback when knownToolNames is not provided', () => {
87
+ const completion = makeCompletion({
88
+ content: '{"name": "bash", "arguments": {"command": "ls"}}',
89
+ finish_reason: 'stop',
90
+ })
91
+
92
+ const response = fromOpenAICompletion(completion)
93
+ const toolBlocks = response.content.filter(b => b.type === 'tool_use')
94
+ expect(toolBlocks).toHaveLength(0)
95
+ expect(response.stop_reason).toBe('end_turn')
96
+ })
97
+
98
+ it('does not fallback when knownToolNames is empty', () => {
99
+ const completion = makeCompletion({
100
+ content: '{"name": "bash", "arguments": {"command": "ls"}}',
101
+ finish_reason: 'stop',
102
+ })
103
+
104
+ const response = fromOpenAICompletion(completion, [])
105
+ const toolBlocks = response.content.filter(b => b.type === 'tool_use')
106
+ expect(toolBlocks).toHaveLength(0)
107
+ expect(response.stop_reason).toBe('end_turn')
108
+ })
109
+
110
+ it('returns plain text when no tool calls found in text', () => {
111
+ const completion = makeCompletion({
112
+ content: 'Hello! How can I help you today?',
113
+ finish_reason: 'stop',
114
+ })
115
+
116
+ const response = fromOpenAICompletion(completion, TOOL_NAMES)
117
+ const toolBlocks = response.content.filter(b => b.type === 'tool_use')
118
+ expect(toolBlocks).toHaveLength(0)
119
+ expect(response.stop_reason).toBe('end_turn')
120
+ })
121
+
122
+ it('preserves text block alongside extracted tool blocks', () => {
123
+ const completion = makeCompletion({
124
+ content: 'Let me check:\n{"name": "file_read", "arguments": {"path": "/tmp/x"}}',
125
+ finish_reason: 'stop',
126
+ })
127
+
128
+ const response = fromOpenAICompletion(completion, TOOL_NAMES)
129
+ const textBlocks = response.content.filter(b => b.type === 'text')
130
+ const toolBlocks = response.content.filter(b => b.type === 'tool_use')
131
+ expect(textBlocks).toHaveLength(1)
132
+ expect(toolBlocks).toHaveLength(1)
133
+ })
134
+
135
+ it('does not double-extract when native tool_calls already present', () => {
136
+ // Text also contains a tool call JSON, but native tool_calls is populated.
137
+ // The fallback should NOT run.
138
+ const completion = makeCompletion({
139
+ content: '{"name": "file_read", "arguments": {"path": "/tmp/y"}}',
140
+ tool_calls: [
141
+ {
142
+ id: 'call_native',
143
+ type: 'function',
144
+ function: {
145
+ name: 'bash',
146
+ arguments: '{"command": "ls"}',
147
+ },
148
+ },
149
+ ],
150
+ finish_reason: 'tool_calls',
151
+ })
152
+
153
+ const response = fromOpenAICompletion(completion, TOOL_NAMES)
154
+ const toolBlocks = response.content.filter(b => b.type === 'tool_use')
155
+ // Should only have the native one, not the text-extracted one
156
+ expect(toolBlocks).toHaveLength(1)
157
+ expect(toolBlocks[0]!.type === 'tool_use' && toolBlocks[0]!.id).toBe('call_native')
158
+ })
159
+ })