@jackchen_me/open-multi-agent 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/.github/ISSUE_TEMPLATE/bug_report.md +40 -0
  2. package/.github/ISSUE_TEMPLATE/feature_request.md +23 -0
  3. package/.github/pull_request_template.md +14 -0
  4. package/.github/workflows/ci.yml +23 -0
  5. package/CLAUDE.md +80 -0
  6. package/CODE_OF_CONDUCT.md +48 -0
  7. package/CONTRIBUTING.md +72 -0
  8. package/DECISIONS.md +43 -0
  9. package/README.md +144 -144
  10. package/README_zh.md +277 -0
  11. package/SECURITY.md +17 -0
  12. package/dist/agent/agent.d.ts +20 -1
  13. package/dist/agent/agent.d.ts.map +1 -1
  14. package/dist/agent/agent.js +233 -12
  15. package/dist/agent/agent.js.map +1 -1
  16. package/dist/agent/loop-detector.d.ts +39 -0
  17. package/dist/agent/loop-detector.d.ts.map +1 -0
  18. package/dist/agent/loop-detector.js +122 -0
  19. package/dist/agent/loop-detector.js.map +1 -0
  20. package/dist/agent/pool.d.ts +2 -1
  21. package/dist/agent/pool.d.ts.map +1 -1
  22. package/dist/agent/pool.js +4 -2
  23. package/dist/agent/pool.js.map +1 -1
  24. package/dist/agent/runner.d.ts +23 -1
  25. package/dist/agent/runner.d.ts.map +1 -1
  26. package/dist/agent/runner.js +113 -12
  27. package/dist/agent/runner.js.map +1 -1
  28. package/dist/agent/structured-output.d.ts +33 -0
  29. package/dist/agent/structured-output.d.ts.map +1 -0
  30. package/dist/agent/structured-output.js +116 -0
  31. package/dist/agent/structured-output.js.map +1 -0
  32. package/dist/index.d.ts +5 -2
  33. package/dist/index.d.ts.map +1 -1
  34. package/dist/index.js +4 -1
  35. package/dist/index.js.map +1 -1
  36. package/dist/llm/adapter.d.ts +12 -4
  37. package/dist/llm/adapter.d.ts.map +1 -1
  38. package/dist/llm/adapter.js +28 -5
  39. package/dist/llm/adapter.js.map +1 -1
  40. package/dist/llm/anthropic.d.ts +1 -1
  41. package/dist/llm/anthropic.d.ts.map +1 -1
  42. package/dist/llm/anthropic.js +2 -1
  43. package/dist/llm/anthropic.js.map +1 -1
  44. package/dist/llm/copilot.d.ts +92 -0
  45. package/dist/llm/copilot.d.ts.map +1 -0
  46. package/dist/llm/copilot.js +427 -0
  47. package/dist/llm/copilot.js.map +1 -0
  48. package/dist/llm/gemini.d.ts +65 -0
  49. package/dist/llm/gemini.d.ts.map +1 -0
  50. package/dist/llm/gemini.js +317 -0
  51. package/dist/llm/gemini.js.map +1 -0
  52. package/dist/llm/grok.d.ts +21 -0
  53. package/dist/llm/grok.d.ts.map +1 -0
  54. package/dist/llm/grok.js +24 -0
  55. package/dist/llm/grok.js.map +1 -0
  56. package/dist/llm/openai-common.d.ts +54 -0
  57. package/dist/llm/openai-common.d.ts.map +1 -0
  58. package/dist/llm/openai-common.js +242 -0
  59. package/dist/llm/openai-common.js.map +1 -0
  60. package/dist/llm/openai.d.ts +2 -2
  61. package/dist/llm/openai.d.ts.map +1 -1
  62. package/dist/llm/openai.js +23 -226
  63. package/dist/llm/openai.js.map +1 -1
  64. package/dist/orchestrator/orchestrator.d.ts +25 -1
  65. package/dist/orchestrator/orchestrator.d.ts.map +1 -1
  66. package/dist/orchestrator/orchestrator.js +214 -41
  67. package/dist/orchestrator/orchestrator.js.map +1 -1
  68. package/dist/task/queue.d.ts +31 -2
  69. package/dist/task/queue.d.ts.map +1 -1
  70. package/dist/task/queue.js +70 -3
  71. package/dist/task/queue.js.map +1 -1
  72. package/dist/task/task.d.ts +3 -0
  73. package/dist/task/task.d.ts.map +1 -1
  74. package/dist/task/task.js +5 -1
  75. package/dist/task/task.js.map +1 -1
  76. package/dist/team/messaging.d.ts.map +1 -1
  77. package/dist/team/messaging.js +2 -1
  78. package/dist/team/messaging.js.map +1 -1
  79. package/dist/tool/text-tool-extractor.d.ts +32 -0
  80. package/dist/tool/text-tool-extractor.d.ts.map +1 -0
  81. package/dist/tool/text-tool-extractor.js +187 -0
  82. package/dist/tool/text-tool-extractor.js.map +1 -0
  83. package/dist/types.d.ts +167 -7
  84. package/dist/types.d.ts.map +1 -1
  85. package/dist/utils/trace.d.ts +12 -0
  86. package/dist/utils/trace.d.ts.map +1 -0
  87. package/dist/utils/trace.js +30 -0
  88. package/dist/utils/trace.js.map +1 -0
  89. package/examples/05-copilot-test.ts +49 -0
  90. package/examples/06-local-model.ts +200 -0
  91. package/examples/07-fan-out-aggregate.ts +209 -0
  92. package/examples/08-gemma4-local.ts +192 -0
  93. package/examples/09-structured-output.ts +73 -0
  94. package/examples/10-task-retry.ts +132 -0
  95. package/examples/11-trace-observability.ts +133 -0
  96. package/examples/12-grok.ts +154 -0
  97. package/examples/13-gemini.ts +48 -0
  98. package/package.json +14 -3
  99. package/src/agent/agent.ts +273 -15
  100. package/src/agent/loop-detector.ts +137 -0
  101. package/src/agent/pool.ts +9 -2
  102. package/src/agent/runner.ts +148 -19
  103. package/src/agent/structured-output.ts +126 -0
  104. package/src/index.ts +17 -1
  105. package/src/llm/adapter.ts +29 -5
  106. package/src/llm/anthropic.ts +2 -1
  107. package/src/llm/copilot.ts +552 -0
  108. package/src/llm/gemini.ts +378 -0
  109. package/src/llm/grok.ts +29 -0
  110. package/src/llm/openai-common.ts +294 -0
  111. package/src/llm/openai.ts +31 -261
  112. package/src/orchestrator/orchestrator.ts +260 -40
  113. package/src/task/queue.ts +74 -4
  114. package/src/task/task.ts +8 -1
  115. package/src/team/messaging.ts +3 -1
  116. package/src/tool/text-tool-extractor.ts +219 -0
  117. package/src/types.ts +186 -6
  118. package/src/utils/trace.ts +34 -0
  119. package/tests/agent-hooks.test.ts +473 -0
  120. package/tests/agent-pool.test.ts +212 -0
  121. package/tests/approval.test.ts +464 -0
  122. package/tests/built-in-tools.test.ts +393 -0
  123. package/tests/gemini-adapter.test.ts +97 -0
  124. package/tests/grok-adapter.test.ts +74 -0
  125. package/tests/llm-adapters.test.ts +357 -0
  126. package/tests/loop-detection.test.ts +456 -0
  127. package/tests/openai-fallback.test.ts +159 -0
  128. package/tests/orchestrator.test.ts +281 -0
  129. package/tests/scheduler.test.ts +221 -0
  130. package/tests/semaphore.test.ts +57 -0
  131. package/tests/shared-memory.test.ts +122 -0
  132. package/tests/structured-output.test.ts +331 -0
  133. package/tests/task-queue.test.ts +244 -0
  134. package/tests/task-retry.test.ts +368 -0
  135. package/tests/task-utils.test.ts +155 -0
  136. package/tests/team-messaging.test.ts +329 -0
  137. package/tests/text-tool-extractor.test.ts +170 -0
  138. package/tests/tool-executor.test.ts +193 -0
  139. package/tests/trace.test.ts +453 -0
  140. package/vitest.config.ts +9 -0
@@ -0,0 +1,378 @@
1
+ /**
2
+ * @fileoverview Google Gemini adapter implementing {@link LLMAdapter}.
3
+ *
4
+ * Built for `@google/genai` (the unified Google Gen AI SDK, v1.x), NOT the
5
+ * legacy `@google/generative-ai` package.
6
+ *
7
+ * Converts between the framework's internal {@link ContentBlock} types and the
8
+ * `@google/genai` SDK's wire format, handling tool definitions, system prompts,
9
+ * and both batch and streaming response paths.
10
+ *
11
+ * API key resolution order:
12
+ * 1. `apiKey` constructor argument
13
+ * 2. `GEMINI_API_KEY` environment variable
14
+ * 3. `GOOGLE_API_KEY` environment variable
15
+ *
16
+ * @example
17
+ * ```ts
18
+ * import { GeminiAdapter } from './gemini.js'
19
+ *
20
+ * const adapter = new GeminiAdapter()
21
+ * const response = await adapter.chat(messages, {
22
+ * model: 'gemini-2.5-flash',
23
+ * maxTokens: 1024,
24
+ * })
25
+ * ```
26
+ */
27
+
28
+ import {
29
+ GoogleGenAI,
30
+ FunctionCallingConfigMode,
31
+ type Content,
32
+ type FunctionDeclaration,
33
+ type GenerateContentConfig,
34
+ type GenerateContentResponse,
35
+ type Part,
36
+ type Tool as GeminiTool,
37
+ } from '@google/genai'
38
+
39
+ import type {
40
+ ContentBlock,
41
+ LLMAdapter,
42
+ LLMChatOptions,
43
+ LLMMessage,
44
+ LLMResponse,
45
+ LLMStreamOptions,
46
+ LLMToolDef,
47
+ StreamEvent,
48
+ ToolUseBlock,
49
+ } from '../types.js'
50
+
51
+ // ---------------------------------------------------------------------------
52
+ // Internal helpers
53
+ // ---------------------------------------------------------------------------
54
+
55
+ /**
56
+ * Map framework role names to Gemini role names.
57
+ *
58
+ * Gemini uses `"model"` instead of `"assistant"`.
59
+ */
60
+ function toGeminiRole(role: 'user' | 'assistant'): string {
61
+ return role === 'assistant' ? 'model' : 'user'
62
+ }
63
+
64
+ /**
65
+ * Convert framework messages into Gemini's {@link Content}[] format.
66
+ *
67
+ * Key differences from Anthropic:
68
+ * - Gemini uses `"model"` instead of `"assistant"`.
69
+ * - `functionResponse` parts (tool results) must appear in `"user"` turns.
70
+ * - `functionCall` parts appear in `"model"` turns.
71
+ * - We build a name lookup map from tool_use blocks so tool_result blocks
72
+ * can resolve the function name required by Gemini's `functionResponse`.
73
+ */
74
+ function toGeminiContents(messages: LLMMessage[]): Content[] {
75
+ // First pass: build id → name map for resolving tool results.
76
+ const toolNameById = new Map<string, string>()
77
+ for (const msg of messages) {
78
+ for (const block of msg.content) {
79
+ if (block.type === 'tool_use') {
80
+ toolNameById.set(block.id, block.name)
81
+ }
82
+ }
83
+ }
84
+
85
+ return messages.map((msg): Content => {
86
+ const parts: Part[] = msg.content.map((block): Part => {
87
+ switch (block.type) {
88
+ case 'text':
89
+ return { text: block.text }
90
+
91
+ case 'tool_use':
92
+ return {
93
+ functionCall: {
94
+ id: block.id,
95
+ name: block.name,
96
+ args: block.input,
97
+ },
98
+ }
99
+
100
+ case 'tool_result': {
101
+ const name = toolNameById.get(block.tool_use_id) ?? block.tool_use_id
102
+ return {
103
+ functionResponse: {
104
+ id: block.tool_use_id,
105
+ name,
106
+ response: {
107
+ content:
108
+ typeof block.content === 'string'
109
+ ? block.content
110
+ : JSON.stringify(block.content),
111
+ isError: block.is_error ?? false,
112
+ },
113
+ },
114
+ }
115
+ }
116
+
117
+ case 'image':
118
+ return {
119
+ inlineData: {
120
+ mimeType: block.source.media_type,
121
+ data: block.source.data,
122
+ },
123
+ }
124
+
125
+ default: {
126
+ const _exhaustive: never = block
127
+ throw new Error(`Unhandled content block type: ${JSON.stringify(_exhaustive)}`)
128
+ }
129
+ }
130
+ })
131
+
132
+ return { role: toGeminiRole(msg.role), parts }
133
+ })
134
+ }
135
+
136
+ /**
137
+ * Convert framework {@link LLMToolDef}s into a Gemini `tools` config array.
138
+ *
139
+ * In `@google/genai`, function declarations use `parametersJsonSchema` (not
140
+ * `parameters` or `input_schema`). All declarations are grouped under a single
141
+ * tool entry.
142
+ */
143
+ function toGeminiTools(tools: readonly LLMToolDef[]): GeminiTool[] {
144
+ const functionDeclarations: FunctionDeclaration[] = tools.map((t) => ({
145
+ name: t.name,
146
+ description: t.description,
147
+ parametersJsonSchema: t.inputSchema as Record<string, unknown>,
148
+ }))
149
+ return [{ functionDeclarations }]
150
+ }
151
+
152
+ /**
153
+ * Build the {@link GenerateContentConfig} shared by chat() and stream().
154
+ */
155
+ function buildConfig(
156
+ options: LLMChatOptions | LLMStreamOptions,
157
+ ): GenerateContentConfig {
158
+ return {
159
+ maxOutputTokens: options.maxTokens ?? 4096,
160
+ temperature: options.temperature,
161
+ systemInstruction: options.systemPrompt,
162
+ tools: options.tools ? toGeminiTools(options.tools) : undefined,
163
+ toolConfig: options.tools
164
+ ? { functionCallingConfig: { mode: FunctionCallingConfigMode.AUTO } }
165
+ : undefined,
166
+ }
167
+ }
168
+
169
+ /**
170
+ * Generate a stable pseudo-random ID string for tool use blocks.
171
+ *
172
+ * Gemini may not always return call IDs (especially in streaming), so we
173
+ * fabricate them when absent to satisfy the framework's {@link ToolUseBlock}
174
+ * contract.
175
+ */
176
+ function generateId(): string {
177
+ return `gemini-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`
178
+ }
179
+
180
+ /**
181
+ * Extract the function call ID from a Gemini part, or generate one.
182
+ *
183
+ * The `id` field exists in newer API versions but may be absent in older
184
+ * responses, so we cast conservatively and fall back to a generated ID.
185
+ */
186
+ function getFunctionCallId(part: Part): string {
187
+ return (part.functionCall as { id?: string } | undefined)?.id ?? generateId()
188
+ }
189
+
190
+ /**
191
+ * Convert a Gemini {@link GenerateContentResponse} into a framework
192
+ * {@link LLMResponse}.
193
+ */
194
+ function fromGeminiResponse(
195
+ response: GenerateContentResponse,
196
+ id: string,
197
+ model: string,
198
+ ): LLMResponse {
199
+ const candidate = response.candidates?.[0]
200
+ const content: ContentBlock[] = []
201
+
202
+ for (const part of candidate?.content?.parts ?? []) {
203
+ if (part.text !== undefined && part.text !== '') {
204
+ content.push({ type: 'text', text: part.text })
205
+ } else if (part.functionCall !== undefined) {
206
+ content.push({
207
+ type: 'tool_use',
208
+ id: getFunctionCallId(part),
209
+ name: part.functionCall.name ?? '',
210
+ input: (part.functionCall.args ?? {}) as Record<string, unknown>,
211
+ })
212
+ }
213
+ // inlineData echoes and other part types are silently ignored.
214
+ }
215
+
216
+ // Map Gemini finish reasons to framework stop_reason vocabulary.
217
+ const finishReason = candidate?.finishReason as string | undefined
218
+ let stop_reason: LLMResponse['stop_reason'] = 'end_turn'
219
+ if (finishReason === 'MAX_TOKENS') {
220
+ stop_reason = 'max_tokens'
221
+ } else if (content.some((b) => b.type === 'tool_use')) {
222
+ // Gemini may report STOP even when it returned function calls.
223
+ stop_reason = 'tool_use'
224
+ }
225
+
226
+ const usage = response.usageMetadata
227
+ return {
228
+ id,
229
+ content,
230
+ model,
231
+ stop_reason,
232
+ usage: {
233
+ input_tokens: usage?.promptTokenCount ?? 0,
234
+ output_tokens: usage?.candidatesTokenCount ?? 0,
235
+ },
236
+ }
237
+ }
238
+
239
+ // ---------------------------------------------------------------------------
240
+ // Adapter implementation
241
+ // ---------------------------------------------------------------------------
242
+
243
+ /**
244
+ * LLM adapter backed by the Google Gemini API via `@google/genai`.
245
+ *
246
+ * Thread-safe — a single instance may be shared across concurrent agent runs.
247
+ * The underlying SDK client is stateless across requests.
248
+ */
249
+ export class GeminiAdapter implements LLMAdapter {
250
+ readonly name = 'gemini'
251
+
252
+ readonly #client: GoogleGenAI
253
+
254
+ constructor(apiKey?: string) {
255
+ this.#client = new GoogleGenAI({
256
+ apiKey: apiKey ?? process.env['GEMINI_API_KEY'] ?? process.env['GOOGLE_API_KEY'],
257
+ })
258
+ }
259
+
260
+ // -------------------------------------------------------------------------
261
+ // chat()
262
+ // -------------------------------------------------------------------------
263
+
264
+ /**
265
+ * Send a synchronous (non-streaming) chat request and return the complete
266
+ * {@link LLMResponse}.
267
+ *
268
+ * Uses `ai.models.generateContent()` with the full conversation as `contents`,
269
+ * which is the idiomatic pattern for `@google/genai`.
270
+ */
271
+ async chat(messages: LLMMessage[], options: LLMChatOptions): Promise<LLMResponse> {
272
+ const id = generateId()
273
+ const contents = toGeminiContents(messages)
274
+
275
+ const response = await this.#client.models.generateContent({
276
+ model: options.model,
277
+ contents,
278
+ config: buildConfig(options),
279
+ })
280
+
281
+ return fromGeminiResponse(response, id, options.model)
282
+ }
283
+
284
+ // -------------------------------------------------------------------------
285
+ // stream()
286
+ // -------------------------------------------------------------------------
287
+
288
+ /**
289
+ * Send a streaming chat request and yield {@link StreamEvent}s as they
290
+ * arrive from the API.
291
+ *
292
+ * Uses `ai.models.generateContentStream()` which returns an
293
+ * `AsyncGenerator<GenerateContentResponse>`. Each yielded chunk has the same
294
+ * shape as a full response but contains only the delta for that chunk.
295
+ *
296
+ * Because `@google/genai` doesn't expose a `finalMessage()` helper like the
297
+ * Anthropic SDK, we accumulate content and token counts as we stream so that
298
+ * the terminal `done` event carries a complete and accurate {@link LLMResponse}.
299
+ *
300
+ * Sequence guarantees (matching the Anthropic adapter):
301
+ * - Zero or more `text` events with incremental deltas
302
+ * - Zero or more `tool_use` events (one per call; Gemini doesn't stream args)
303
+ * - Exactly one terminal event: `done` or `error`
304
+ */
305
+ async *stream(
306
+ messages: LLMMessage[],
307
+ options: LLMStreamOptions,
308
+ ): AsyncIterable<StreamEvent> {
309
+ const id = generateId()
310
+ const contents = toGeminiContents(messages)
311
+
312
+ try {
313
+ const streamResponse = await this.#client.models.generateContentStream({
314
+ model: options.model,
315
+ contents,
316
+ config: buildConfig(options),
317
+ })
318
+
319
+ // Accumulators for building the done payload.
320
+ const accumulatedContent: ContentBlock[] = []
321
+ let inputTokens = 0
322
+ let outputTokens = 0
323
+ let lastFinishReason: string | undefined
324
+
325
+ for await (const chunk of streamResponse) {
326
+ const candidate = chunk.candidates?.[0]
327
+
328
+ // Accumulate token counts — the API emits these on the final chunk.
329
+ if (chunk.usageMetadata) {
330
+ inputTokens = chunk.usageMetadata.promptTokenCount ?? inputTokens
331
+ outputTokens = chunk.usageMetadata.candidatesTokenCount ?? outputTokens
332
+ }
333
+ if (candidate?.finishReason) {
334
+ lastFinishReason = candidate.finishReason as string
335
+ }
336
+
337
+ for (const part of candidate?.content?.parts ?? []) {
338
+ if (part.text) {
339
+ accumulatedContent.push({ type: 'text', text: part.text })
340
+ yield { type: 'text', data: part.text } satisfies StreamEvent
341
+ } else if (part.functionCall) {
342
+ const toolId = getFunctionCallId(part)
343
+ const toolUseBlock: ToolUseBlock = {
344
+ type: 'tool_use',
345
+ id: toolId,
346
+ name: part.functionCall.name ?? '',
347
+ input: (part.functionCall.args ?? {}) as Record<string, unknown>,
348
+ }
349
+ accumulatedContent.push(toolUseBlock)
350
+ yield { type: 'tool_use', data: toolUseBlock } satisfies StreamEvent
351
+ }
352
+ }
353
+ }
354
+
355
+ // Determine stop_reason from the accumulated response.
356
+ const hasToolUse = accumulatedContent.some((b) => b.type === 'tool_use')
357
+ let stop_reason: LLMResponse['stop_reason'] = 'end_turn'
358
+ if (lastFinishReason === 'MAX_TOKENS') {
359
+ stop_reason = 'max_tokens'
360
+ } else if (hasToolUse) {
361
+ stop_reason = 'tool_use'
362
+ }
363
+
364
+ const finalResponse: LLMResponse = {
365
+ id,
366
+ content: accumulatedContent,
367
+ model: options.model,
368
+ stop_reason,
369
+ usage: { input_tokens: inputTokens, output_tokens: outputTokens },
370
+ }
371
+
372
+ yield { type: 'done', data: finalResponse } satisfies StreamEvent
373
+ } catch (err) {
374
+ const error = err instanceof Error ? err : new Error(String(err))
375
+ yield { type: 'error', data: error } satisfies StreamEvent
376
+ }
377
+ }
378
+ }
@@ -0,0 +1,29 @@
1
+ /**
2
+ * @fileoverview Grok (xAI) adapter.
3
+ *
4
+ * Thin wrapper around OpenAIAdapter that hard-codes the official xAI endpoint
5
+ * and XAI_API_KEY environment variable fallback.
6
+ */
7
+
8
+ import { OpenAIAdapter } from './openai.js'
9
+
10
+ /**
11
+ * LLM adapter for Grok models (grok-4 series and future models).
12
+ *
13
+ * Thread-safe. Can be shared across agents.
14
+ *
15
+ * Usage:
16
+ * provider: 'grok'
17
+ * model: 'grok-4' (or any current Grok model name)
18
+ */
19
+ export class GrokAdapter extends OpenAIAdapter {
20
+ readonly name = 'grok'
21
+
22
+ constructor(apiKey?: string, baseURL?: string) {
23
+ // Allow override of baseURL (for proxies or future changes) but default to official xAI endpoint.
24
+ super(
25
+ apiKey ?? process.env['XAI_API_KEY'],
26
+ baseURL ?? 'https://api.x.ai/v1'
27
+ )
28
+ }
29
+ }
@@ -0,0 +1,294 @@
1
+ /**
2
+ * @fileoverview Shared OpenAI wire-format conversion helpers.
3
+ *
4
+ * Both the OpenAI and Copilot adapters use the OpenAI Chat Completions API
5
+ * format. This module contains the common conversion logic so it isn't
6
+ * duplicated across adapters.
7
+ */
8
+
9
+ import OpenAI from 'openai'
10
+ import type {
11
+ ChatCompletion,
12
+ ChatCompletionAssistantMessageParam,
13
+ ChatCompletionMessageParam,
14
+ ChatCompletionMessageToolCall,
15
+ ChatCompletionTool,
16
+ ChatCompletionToolMessageParam,
17
+ ChatCompletionUserMessageParam,
18
+ } from 'openai/resources/chat/completions/index.js'
19
+
20
+ import type {
21
+ ContentBlock,
22
+ LLMMessage,
23
+ LLMResponse,
24
+ LLMToolDef,
25
+ TextBlock,
26
+ ToolUseBlock,
27
+ } from '../types.js'
28
+ import { extractToolCallsFromText } from '../tool/text-tool-extractor.js'
29
+
30
+ // ---------------------------------------------------------------------------
31
+ // Framework → OpenAI
32
+ // ---------------------------------------------------------------------------
33
+
34
+ /**
35
+ * Convert a framework {@link LLMToolDef} to an OpenAI {@link ChatCompletionTool}.
36
+ */
37
+ export function toOpenAITool(tool: LLMToolDef): ChatCompletionTool {
38
+ return {
39
+ type: 'function',
40
+ function: {
41
+ name: tool.name,
42
+ description: tool.description,
43
+ parameters: tool.inputSchema as Record<string, unknown>,
44
+ },
45
+ }
46
+ }
47
+
48
+ /**
49
+ * Determine whether a framework message contains any `tool_result` content
50
+ * blocks, which must be serialised as separate OpenAI `tool`-role messages.
51
+ */
52
+ function hasToolResults(msg: LLMMessage): boolean {
53
+ return msg.content.some((b) => b.type === 'tool_result')
54
+ }
55
+
56
+ /**
57
+ * Convert framework {@link LLMMessage}s into OpenAI
58
+ * {@link ChatCompletionMessageParam} entries.
59
+ *
60
+ * `tool_result` blocks are expanded into top-level `tool`-role messages
61
+ * because OpenAI uses a dedicated role for tool results rather than embedding
62
+ * them inside user-content arrays.
63
+ */
64
+ export function toOpenAIMessages(messages: LLMMessage[]): ChatCompletionMessageParam[] {
65
+ const result: ChatCompletionMessageParam[] = []
66
+
67
+ for (const msg of messages) {
68
+ if (msg.role === 'assistant') {
69
+ result.push(toOpenAIAssistantMessage(msg))
70
+ } else {
71
+ // user role
72
+ if (!hasToolResults(msg)) {
73
+ result.push(toOpenAIUserMessage(msg))
74
+ } else {
75
+ const nonToolBlocks = msg.content.filter((b) => b.type !== 'tool_result')
76
+ if (nonToolBlocks.length > 0) {
77
+ result.push(toOpenAIUserMessage({ role: 'user', content: nonToolBlocks }))
78
+ }
79
+
80
+ for (const block of msg.content) {
81
+ if (block.type === 'tool_result') {
82
+ const toolMsg: ChatCompletionToolMessageParam = {
83
+ role: 'tool',
84
+ tool_call_id: block.tool_use_id,
85
+ content: block.content,
86
+ }
87
+ result.push(toolMsg)
88
+ }
89
+ }
90
+ }
91
+ }
92
+ }
93
+
94
+ return result
95
+ }
96
+
97
+ /**
98
+ * Convert a `user`-role framework message into an OpenAI user message.
99
+ * Image blocks are converted to the OpenAI image_url content part format.
100
+ */
101
+ function toOpenAIUserMessage(msg: LLMMessage): ChatCompletionUserMessageParam {
102
+ if (msg.content.length === 1 && msg.content[0]?.type === 'text') {
103
+ return { role: 'user', content: msg.content[0].text }
104
+ }
105
+
106
+ type ContentPart = OpenAI.Chat.ChatCompletionContentPartText | OpenAI.Chat.ChatCompletionContentPartImage
107
+ const parts: ContentPart[] = []
108
+
109
+ for (const block of msg.content) {
110
+ if (block.type === 'text') {
111
+ parts.push({ type: 'text', text: block.text })
112
+ } else if (block.type === 'image') {
113
+ parts.push({
114
+ type: 'image_url',
115
+ image_url: {
116
+ url: `data:${block.source.media_type};base64,${block.source.data}`,
117
+ },
118
+ })
119
+ }
120
+ // tool_result blocks are handled by the caller (toOpenAIMessages); skip here.
121
+ }
122
+
123
+ return { role: 'user', content: parts }
124
+ }
125
+
126
+ /**
127
+ * Convert an `assistant`-role framework message into an OpenAI assistant message.
128
+ * `tool_use` blocks become `tool_calls`; `text` blocks become message content.
129
+ */
130
+ function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssistantMessageParam {
131
+ const toolCalls: ChatCompletionMessageToolCall[] = []
132
+ const textParts: string[] = []
133
+
134
+ for (const block of msg.content) {
135
+ if (block.type === 'tool_use') {
136
+ toolCalls.push({
137
+ id: block.id,
138
+ type: 'function',
139
+ function: {
140
+ name: block.name,
141
+ arguments: JSON.stringify(block.input),
142
+ },
143
+ })
144
+ } else if (block.type === 'text') {
145
+ textParts.push(block.text)
146
+ }
147
+ }
148
+
149
+ const assistantMsg: ChatCompletionAssistantMessageParam = {
150
+ role: 'assistant',
151
+ content: textParts.length > 0 ? textParts.join('') : null,
152
+ }
153
+
154
+ if (toolCalls.length > 0) {
155
+ assistantMsg.tool_calls = toolCalls
156
+ }
157
+
158
+ return assistantMsg
159
+ }
160
+
161
+ // ---------------------------------------------------------------------------
162
+ // OpenAI → Framework
163
+ // ---------------------------------------------------------------------------
164
+
165
+ /**
166
+ * Convert an OpenAI {@link ChatCompletion} into a framework {@link LLMResponse}.
167
+ *
168
+ * Takes only the first choice (index 0), consistent with how the framework
169
+ * is designed for single-output agents.
170
+ *
171
+ * @param completion - The raw OpenAI completion.
172
+ * @param knownToolNames - Optional whitelist of tool names. When the model
173
+ * returns no `tool_calls` but the text contains JSON
174
+ * that looks like a tool call, the fallback extractor
175
+ * uses this list to validate matches. Pass the names
176
+ * of tools sent in the request for best results.
177
+ */
178
+ export function fromOpenAICompletion(
179
+ completion: ChatCompletion,
180
+ knownToolNames?: string[],
181
+ ): LLMResponse {
182
+ const choice = completion.choices[0]
183
+ if (choice === undefined) {
184
+ throw new Error('OpenAI returned a completion with no choices')
185
+ }
186
+
187
+ const content: ContentBlock[] = []
188
+ const message = choice.message
189
+
190
+ if (message.content !== null && message.content !== undefined) {
191
+ const textBlock: TextBlock = { type: 'text', text: message.content }
192
+ content.push(textBlock)
193
+ }
194
+
195
+ for (const toolCall of message.tool_calls ?? []) {
196
+ let parsedInput: Record<string, unknown> = {}
197
+ try {
198
+ const parsed: unknown = JSON.parse(toolCall.function.arguments)
199
+ if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) {
200
+ parsedInput = parsed as Record<string, unknown>
201
+ }
202
+ } catch {
203
+ // Malformed arguments from the model — surface as empty object.
204
+ }
205
+
206
+ const toolUseBlock: ToolUseBlock = {
207
+ type: 'tool_use',
208
+ id: toolCall.id,
209
+ name: toolCall.function.name,
210
+ input: parsedInput,
211
+ }
212
+ content.push(toolUseBlock)
213
+ }
214
+
215
+ // ---------------------------------------------------------------------------
216
+ // Fallback: extract tool calls from text when native tool_calls is empty.
217
+ //
218
+ // Some local models (Ollama thinking models, misconfigured vLLM) return tool
219
+ // calls as plain text instead of using the tool_calls wire format. When we
220
+ // have text but no tool_calls, try to extract them from the text.
221
+ // ---------------------------------------------------------------------------
222
+ const hasNativeToolCalls = (message.tool_calls ?? []).length > 0
223
+ if (
224
+ !hasNativeToolCalls &&
225
+ knownToolNames !== undefined &&
226
+ knownToolNames.length > 0 &&
227
+ message.content !== null &&
228
+ message.content !== undefined &&
229
+ message.content.length > 0
230
+ ) {
231
+ const extracted = extractToolCallsFromText(message.content, knownToolNames)
232
+ if (extracted.length > 0) {
233
+ content.push(...extracted)
234
+ }
235
+ }
236
+
237
+ const hasToolUseBlocks = content.some(b => b.type === 'tool_use')
238
+ const rawStopReason = choice.finish_reason ?? 'stop'
239
+ // If we extracted tool calls from text but the finish_reason was 'stop',
240
+ // correct it to 'tool_use' so the agent runner continues the loop.
241
+ const stopReason = hasToolUseBlocks && rawStopReason === 'stop'
242
+ ? 'tool_use'
243
+ : normalizeFinishReason(rawStopReason)
244
+
245
+ return {
246
+ id: completion.id,
247
+ content,
248
+ model: completion.model,
249
+ stop_reason: stopReason,
250
+ usage: {
251
+ input_tokens: completion.usage?.prompt_tokens ?? 0,
252
+ output_tokens: completion.usage?.completion_tokens ?? 0,
253
+ },
254
+ }
255
+ }
256
+
257
+ /**
258
+ * Normalize an OpenAI `finish_reason` string to the framework's canonical
259
+ * stop-reason vocabulary.
260
+ *
261
+ * Mapping:
262
+ * - `'stop'` → `'end_turn'`
263
+ * - `'tool_calls'` → `'tool_use'`
264
+ * - `'length'` → `'max_tokens'`
265
+ * - `'content_filter'` → `'content_filter'`
266
+ * - anything else → passed through unchanged
267
+ */
268
+ export function normalizeFinishReason(reason: string): string {
269
+ switch (reason) {
270
+ case 'stop': return 'end_turn'
271
+ case 'tool_calls': return 'tool_use'
272
+ case 'length': return 'max_tokens'
273
+ case 'content_filter': return 'content_filter'
274
+ default: return reason
275
+ }
276
+ }
277
+
278
+ /**
279
+ * Prepend a system message when `systemPrompt` is provided, then append the
280
+ * converted conversation messages.
281
+ */
282
+ export function buildOpenAIMessageList(
283
+ messages: LLMMessage[],
284
+ systemPrompt: string | undefined,
285
+ ): ChatCompletionMessageParam[] {
286
+ const result: ChatCompletionMessageParam[] = []
287
+
288
+ if (systemPrompt !== undefined && systemPrompt.length > 0) {
289
+ result.push({ role: 'system', content: systemPrompt })
290
+ }
291
+
292
+ result.push(...toOpenAIMessages(messages))
293
+ return result
294
+ }