@jackchen_me/open-multi-agent 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/ISSUE_TEMPLATE/bug_report.md +40 -0
- package/.github/ISSUE_TEMPLATE/feature_request.md +23 -0
- package/.github/pull_request_template.md +14 -0
- package/.github/workflows/ci.yml +23 -0
- package/CLAUDE.md +80 -0
- package/CODE_OF_CONDUCT.md +48 -0
- package/CONTRIBUTING.md +72 -0
- package/DECISIONS.md +43 -0
- package/README.md +144 -144
- package/README_zh.md +277 -0
- package/SECURITY.md +17 -0
- package/dist/agent/agent.d.ts +20 -1
- package/dist/agent/agent.d.ts.map +1 -1
- package/dist/agent/agent.js +233 -12
- package/dist/agent/agent.js.map +1 -1
- package/dist/agent/loop-detector.d.ts +39 -0
- package/dist/agent/loop-detector.d.ts.map +1 -0
- package/dist/agent/loop-detector.js +122 -0
- package/dist/agent/loop-detector.js.map +1 -0
- package/dist/agent/pool.d.ts +2 -1
- package/dist/agent/pool.d.ts.map +1 -1
- package/dist/agent/pool.js +4 -2
- package/dist/agent/pool.js.map +1 -1
- package/dist/agent/runner.d.ts +23 -1
- package/dist/agent/runner.d.ts.map +1 -1
- package/dist/agent/runner.js +113 -12
- package/dist/agent/runner.js.map +1 -1
- package/dist/agent/structured-output.d.ts +33 -0
- package/dist/agent/structured-output.d.ts.map +1 -0
- package/dist/agent/structured-output.js +116 -0
- package/dist/agent/structured-output.js.map +1 -0
- package/dist/index.d.ts +5 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -1
- package/dist/index.js.map +1 -1
- package/dist/llm/adapter.d.ts +12 -4
- package/dist/llm/adapter.d.ts.map +1 -1
- package/dist/llm/adapter.js +28 -5
- package/dist/llm/adapter.js.map +1 -1
- package/dist/llm/anthropic.d.ts +1 -1
- package/dist/llm/anthropic.d.ts.map +1 -1
- package/dist/llm/anthropic.js +2 -1
- package/dist/llm/anthropic.js.map +1 -1
- package/dist/llm/copilot.d.ts +92 -0
- package/dist/llm/copilot.d.ts.map +1 -0
- package/dist/llm/copilot.js +427 -0
- package/dist/llm/copilot.js.map +1 -0
- package/dist/llm/gemini.d.ts +65 -0
- package/dist/llm/gemini.d.ts.map +1 -0
- package/dist/llm/gemini.js +317 -0
- package/dist/llm/gemini.js.map +1 -0
- package/dist/llm/grok.d.ts +21 -0
- package/dist/llm/grok.d.ts.map +1 -0
- package/dist/llm/grok.js +24 -0
- package/dist/llm/grok.js.map +1 -0
- package/dist/llm/openai-common.d.ts +54 -0
- package/dist/llm/openai-common.d.ts.map +1 -0
- package/dist/llm/openai-common.js +242 -0
- package/dist/llm/openai-common.js.map +1 -0
- package/dist/llm/openai.d.ts +2 -2
- package/dist/llm/openai.d.ts.map +1 -1
- package/dist/llm/openai.js +23 -226
- package/dist/llm/openai.js.map +1 -1
- package/dist/orchestrator/orchestrator.d.ts +25 -1
- package/dist/orchestrator/orchestrator.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator.js +214 -41
- package/dist/orchestrator/orchestrator.js.map +1 -1
- package/dist/task/queue.d.ts +31 -2
- package/dist/task/queue.d.ts.map +1 -1
- package/dist/task/queue.js +70 -3
- package/dist/task/queue.js.map +1 -1
- package/dist/task/task.d.ts +3 -0
- package/dist/task/task.d.ts.map +1 -1
- package/dist/task/task.js +5 -1
- package/dist/task/task.js.map +1 -1
- package/dist/team/messaging.d.ts.map +1 -1
- package/dist/team/messaging.js +2 -1
- package/dist/team/messaging.js.map +1 -1
- package/dist/tool/text-tool-extractor.d.ts +32 -0
- package/dist/tool/text-tool-extractor.d.ts.map +1 -0
- package/dist/tool/text-tool-extractor.js +187 -0
- package/dist/tool/text-tool-extractor.js.map +1 -0
- package/dist/types.d.ts +167 -7
- package/dist/types.d.ts.map +1 -1
- package/dist/utils/trace.d.ts +12 -0
- package/dist/utils/trace.d.ts.map +1 -0
- package/dist/utils/trace.js +30 -0
- package/dist/utils/trace.js.map +1 -0
- package/examples/05-copilot-test.ts +49 -0
- package/examples/06-local-model.ts +200 -0
- package/examples/07-fan-out-aggregate.ts +209 -0
- package/examples/08-gemma4-local.ts +192 -0
- package/examples/09-structured-output.ts +73 -0
- package/examples/10-task-retry.ts +132 -0
- package/examples/11-trace-observability.ts +133 -0
- package/examples/12-grok.ts +154 -0
- package/examples/13-gemini.ts +48 -0
- package/package.json +14 -3
- package/src/agent/agent.ts +273 -15
- package/src/agent/loop-detector.ts +137 -0
- package/src/agent/pool.ts +9 -2
- package/src/agent/runner.ts +148 -19
- package/src/agent/structured-output.ts +126 -0
- package/src/index.ts +17 -1
- package/src/llm/adapter.ts +29 -5
- package/src/llm/anthropic.ts +2 -1
- package/src/llm/copilot.ts +552 -0
- package/src/llm/gemini.ts +378 -0
- package/src/llm/grok.ts +29 -0
- package/src/llm/openai-common.ts +294 -0
- package/src/llm/openai.ts +31 -261
- package/src/orchestrator/orchestrator.ts +260 -40
- package/src/task/queue.ts +74 -4
- package/src/task/task.ts +8 -1
- package/src/team/messaging.ts +3 -1
- package/src/tool/text-tool-extractor.ts +219 -0
- package/src/types.ts +186 -6
- package/src/utils/trace.ts +34 -0
- package/tests/agent-hooks.test.ts +473 -0
- package/tests/agent-pool.test.ts +212 -0
- package/tests/approval.test.ts +464 -0
- package/tests/built-in-tools.test.ts +393 -0
- package/tests/gemini-adapter.test.ts +97 -0
- package/tests/grok-adapter.test.ts +74 -0
- package/tests/llm-adapters.test.ts +357 -0
- package/tests/loop-detection.test.ts +456 -0
- package/tests/openai-fallback.test.ts +159 -0
- package/tests/orchestrator.test.ts +281 -0
- package/tests/scheduler.test.ts +221 -0
- package/tests/semaphore.test.ts +57 -0
- package/tests/shared-memory.test.ts +122 -0
- package/tests/structured-output.test.ts +331 -0
- package/tests/task-queue.test.ts +244 -0
- package/tests/task-retry.test.ts +368 -0
- package/tests/task-utils.test.ts +155 -0
- package/tests/team-messaging.test.ts +329 -0
- package/tests/text-tool-extractor.test.ts +170 -0
- package/tests/tool-executor.test.ts +193 -0
- package/tests/trace.test.ts +453 -0
- package/vitest.config.ts +9 -0
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Google Gemini adapter implementing {@link LLMAdapter}.
|
|
3
|
+
*
|
|
4
|
+
* Built for `@google/genai` (the unified Google Gen AI SDK, v1.x), NOT the
|
|
5
|
+
* legacy `@google/generative-ai` package.
|
|
6
|
+
*
|
|
7
|
+
* Converts between the framework's internal {@link ContentBlock} types and the
|
|
8
|
+
* `@google/genai` SDK's wire format, handling tool definitions, system prompts,
|
|
9
|
+
* and both batch and streaming response paths.
|
|
10
|
+
*
|
|
11
|
+
* API key resolution order:
|
|
12
|
+
* 1. `apiKey` constructor argument
|
|
13
|
+
* 2. `GEMINI_API_KEY` environment variable
|
|
14
|
+
* 3. `GOOGLE_API_KEY` environment variable
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```ts
|
|
18
|
+
* import { GeminiAdapter } from './gemini.js'
|
|
19
|
+
*
|
|
20
|
+
* const adapter = new GeminiAdapter()
|
|
21
|
+
* const response = await adapter.chat(messages, {
|
|
22
|
+
* model: 'gemini-2.5-flash',
|
|
23
|
+
* maxTokens: 1024,
|
|
24
|
+
* })
|
|
25
|
+
* ```
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
import {
|
|
29
|
+
GoogleGenAI,
|
|
30
|
+
FunctionCallingConfigMode,
|
|
31
|
+
type Content,
|
|
32
|
+
type FunctionDeclaration,
|
|
33
|
+
type GenerateContentConfig,
|
|
34
|
+
type GenerateContentResponse,
|
|
35
|
+
type Part,
|
|
36
|
+
type Tool as GeminiTool,
|
|
37
|
+
} from '@google/genai'
|
|
38
|
+
|
|
39
|
+
import type {
|
|
40
|
+
ContentBlock,
|
|
41
|
+
LLMAdapter,
|
|
42
|
+
LLMChatOptions,
|
|
43
|
+
LLMMessage,
|
|
44
|
+
LLMResponse,
|
|
45
|
+
LLMStreamOptions,
|
|
46
|
+
LLMToolDef,
|
|
47
|
+
StreamEvent,
|
|
48
|
+
ToolUseBlock,
|
|
49
|
+
} from '../types.js'
|
|
50
|
+
|
|
51
|
+
// ---------------------------------------------------------------------------
|
|
52
|
+
// Internal helpers
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Map framework role names to Gemini role names.
|
|
57
|
+
*
|
|
58
|
+
* Gemini uses `"model"` instead of `"assistant"`.
|
|
59
|
+
*/
|
|
60
|
+
function toGeminiRole(role: 'user' | 'assistant'): string {
|
|
61
|
+
return role === 'assistant' ? 'model' : 'user'
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Convert framework messages into Gemini's {@link Content}[] format.
|
|
66
|
+
*
|
|
67
|
+
* Key differences from Anthropic:
|
|
68
|
+
* - Gemini uses `"model"` instead of `"assistant"`.
|
|
69
|
+
* - `functionResponse` parts (tool results) must appear in `"user"` turns.
|
|
70
|
+
* - `functionCall` parts appear in `"model"` turns.
|
|
71
|
+
* - We build a name lookup map from tool_use blocks so tool_result blocks
|
|
72
|
+
* can resolve the function name required by Gemini's `functionResponse`.
|
|
73
|
+
*/
|
|
74
|
+
function toGeminiContents(messages: LLMMessage[]): Content[] {
|
|
75
|
+
// First pass: build id → name map for resolving tool results.
|
|
76
|
+
const toolNameById = new Map<string, string>()
|
|
77
|
+
for (const msg of messages) {
|
|
78
|
+
for (const block of msg.content) {
|
|
79
|
+
if (block.type === 'tool_use') {
|
|
80
|
+
toolNameById.set(block.id, block.name)
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return messages.map((msg): Content => {
|
|
86
|
+
const parts: Part[] = msg.content.map((block): Part => {
|
|
87
|
+
switch (block.type) {
|
|
88
|
+
case 'text':
|
|
89
|
+
return { text: block.text }
|
|
90
|
+
|
|
91
|
+
case 'tool_use':
|
|
92
|
+
return {
|
|
93
|
+
functionCall: {
|
|
94
|
+
id: block.id,
|
|
95
|
+
name: block.name,
|
|
96
|
+
args: block.input,
|
|
97
|
+
},
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
case 'tool_result': {
|
|
101
|
+
const name = toolNameById.get(block.tool_use_id) ?? block.tool_use_id
|
|
102
|
+
return {
|
|
103
|
+
functionResponse: {
|
|
104
|
+
id: block.tool_use_id,
|
|
105
|
+
name,
|
|
106
|
+
response: {
|
|
107
|
+
content:
|
|
108
|
+
typeof block.content === 'string'
|
|
109
|
+
? block.content
|
|
110
|
+
: JSON.stringify(block.content),
|
|
111
|
+
isError: block.is_error ?? false,
|
|
112
|
+
},
|
|
113
|
+
},
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
case 'image':
|
|
118
|
+
return {
|
|
119
|
+
inlineData: {
|
|
120
|
+
mimeType: block.source.media_type,
|
|
121
|
+
data: block.source.data,
|
|
122
|
+
},
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
default: {
|
|
126
|
+
const _exhaustive: never = block
|
|
127
|
+
throw new Error(`Unhandled content block type: ${JSON.stringify(_exhaustive)}`)
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
})
|
|
131
|
+
|
|
132
|
+
return { role: toGeminiRole(msg.role), parts }
|
|
133
|
+
})
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Convert framework {@link LLMToolDef}s into a Gemini `tools` config array.
|
|
138
|
+
*
|
|
139
|
+
* In `@google/genai`, function declarations use `parametersJsonSchema` (not
|
|
140
|
+
* `parameters` or `input_schema`). All declarations are grouped under a single
|
|
141
|
+
* tool entry.
|
|
142
|
+
*/
|
|
143
|
+
function toGeminiTools(tools: readonly LLMToolDef[]): GeminiTool[] {
|
|
144
|
+
const functionDeclarations: FunctionDeclaration[] = tools.map((t) => ({
|
|
145
|
+
name: t.name,
|
|
146
|
+
description: t.description,
|
|
147
|
+
parametersJsonSchema: t.inputSchema as Record<string, unknown>,
|
|
148
|
+
}))
|
|
149
|
+
return [{ functionDeclarations }]
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Build the {@link GenerateContentConfig} shared by chat() and stream().
|
|
154
|
+
*/
|
|
155
|
+
function buildConfig(
|
|
156
|
+
options: LLMChatOptions | LLMStreamOptions,
|
|
157
|
+
): GenerateContentConfig {
|
|
158
|
+
return {
|
|
159
|
+
maxOutputTokens: options.maxTokens ?? 4096,
|
|
160
|
+
temperature: options.temperature,
|
|
161
|
+
systemInstruction: options.systemPrompt,
|
|
162
|
+
tools: options.tools ? toGeminiTools(options.tools) : undefined,
|
|
163
|
+
toolConfig: options.tools
|
|
164
|
+
? { functionCallingConfig: { mode: FunctionCallingConfigMode.AUTO } }
|
|
165
|
+
: undefined,
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Generate a stable pseudo-random ID string for tool use blocks.
|
|
171
|
+
*
|
|
172
|
+
* Gemini may not always return call IDs (especially in streaming), so we
|
|
173
|
+
* fabricate them when absent to satisfy the framework's {@link ToolUseBlock}
|
|
174
|
+
* contract.
|
|
175
|
+
*/
|
|
176
|
+
function generateId(): string {
|
|
177
|
+
return `gemini-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Extract the function call ID from a Gemini part, or generate one.
|
|
182
|
+
*
|
|
183
|
+
* The `id` field exists in newer API versions but may be absent in older
|
|
184
|
+
* responses, so we cast conservatively and fall back to a generated ID.
|
|
185
|
+
*/
|
|
186
|
+
function getFunctionCallId(part: Part): string {
|
|
187
|
+
return (part.functionCall as { id?: string } | undefined)?.id ?? generateId()
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Convert a Gemini {@link GenerateContentResponse} into a framework
|
|
192
|
+
* {@link LLMResponse}.
|
|
193
|
+
*/
|
|
194
|
+
function fromGeminiResponse(
|
|
195
|
+
response: GenerateContentResponse,
|
|
196
|
+
id: string,
|
|
197
|
+
model: string,
|
|
198
|
+
): LLMResponse {
|
|
199
|
+
const candidate = response.candidates?.[0]
|
|
200
|
+
const content: ContentBlock[] = []
|
|
201
|
+
|
|
202
|
+
for (const part of candidate?.content?.parts ?? []) {
|
|
203
|
+
if (part.text !== undefined && part.text !== '') {
|
|
204
|
+
content.push({ type: 'text', text: part.text })
|
|
205
|
+
} else if (part.functionCall !== undefined) {
|
|
206
|
+
content.push({
|
|
207
|
+
type: 'tool_use',
|
|
208
|
+
id: getFunctionCallId(part),
|
|
209
|
+
name: part.functionCall.name ?? '',
|
|
210
|
+
input: (part.functionCall.args ?? {}) as Record<string, unknown>,
|
|
211
|
+
})
|
|
212
|
+
}
|
|
213
|
+
// inlineData echoes and other part types are silently ignored.
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Map Gemini finish reasons to framework stop_reason vocabulary.
|
|
217
|
+
const finishReason = candidate?.finishReason as string | undefined
|
|
218
|
+
let stop_reason: LLMResponse['stop_reason'] = 'end_turn'
|
|
219
|
+
if (finishReason === 'MAX_TOKENS') {
|
|
220
|
+
stop_reason = 'max_tokens'
|
|
221
|
+
} else if (content.some((b) => b.type === 'tool_use')) {
|
|
222
|
+
// Gemini may report STOP even when it returned function calls.
|
|
223
|
+
stop_reason = 'tool_use'
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
const usage = response.usageMetadata
|
|
227
|
+
return {
|
|
228
|
+
id,
|
|
229
|
+
content,
|
|
230
|
+
model,
|
|
231
|
+
stop_reason,
|
|
232
|
+
usage: {
|
|
233
|
+
input_tokens: usage?.promptTokenCount ?? 0,
|
|
234
|
+
output_tokens: usage?.candidatesTokenCount ?? 0,
|
|
235
|
+
},
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// ---------------------------------------------------------------------------
|
|
240
|
+
// Adapter implementation
|
|
241
|
+
// ---------------------------------------------------------------------------
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* LLM adapter backed by the Google Gemini API via `@google/genai`.
|
|
245
|
+
*
|
|
246
|
+
* Thread-safe — a single instance may be shared across concurrent agent runs.
|
|
247
|
+
* The underlying SDK client is stateless across requests.
|
|
248
|
+
*/
|
|
249
|
+
export class GeminiAdapter implements LLMAdapter {
|
|
250
|
+
readonly name = 'gemini'
|
|
251
|
+
|
|
252
|
+
readonly #client: GoogleGenAI
|
|
253
|
+
|
|
254
|
+
constructor(apiKey?: string) {
|
|
255
|
+
this.#client = new GoogleGenAI({
|
|
256
|
+
apiKey: apiKey ?? process.env['GEMINI_API_KEY'] ?? process.env['GOOGLE_API_KEY'],
|
|
257
|
+
})
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// -------------------------------------------------------------------------
|
|
261
|
+
// chat()
|
|
262
|
+
// -------------------------------------------------------------------------
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* Send a synchronous (non-streaming) chat request and return the complete
|
|
266
|
+
* {@link LLMResponse}.
|
|
267
|
+
*
|
|
268
|
+
* Uses `ai.models.generateContent()` with the full conversation as `contents`,
|
|
269
|
+
* which is the idiomatic pattern for `@google/genai`.
|
|
270
|
+
*/
|
|
271
|
+
async chat(messages: LLMMessage[], options: LLMChatOptions): Promise<LLMResponse> {
|
|
272
|
+
const id = generateId()
|
|
273
|
+
const contents = toGeminiContents(messages)
|
|
274
|
+
|
|
275
|
+
const response = await this.#client.models.generateContent({
|
|
276
|
+
model: options.model,
|
|
277
|
+
contents,
|
|
278
|
+
config: buildConfig(options),
|
|
279
|
+
})
|
|
280
|
+
|
|
281
|
+
return fromGeminiResponse(response, id, options.model)
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// -------------------------------------------------------------------------
|
|
285
|
+
// stream()
|
|
286
|
+
// -------------------------------------------------------------------------
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Send a streaming chat request and yield {@link StreamEvent}s as they
|
|
290
|
+
* arrive from the API.
|
|
291
|
+
*
|
|
292
|
+
* Uses `ai.models.generateContentStream()` which returns an
|
|
293
|
+
* `AsyncGenerator<GenerateContentResponse>`. Each yielded chunk has the same
|
|
294
|
+
* shape as a full response but contains only the delta for that chunk.
|
|
295
|
+
*
|
|
296
|
+
* Because `@google/genai` doesn't expose a `finalMessage()` helper like the
|
|
297
|
+
* Anthropic SDK, we accumulate content and token counts as we stream so that
|
|
298
|
+
* the terminal `done` event carries a complete and accurate {@link LLMResponse}.
|
|
299
|
+
*
|
|
300
|
+
* Sequence guarantees (matching the Anthropic adapter):
|
|
301
|
+
* - Zero or more `text` events with incremental deltas
|
|
302
|
+
* - Zero or more `tool_use` events (one per call; Gemini doesn't stream args)
|
|
303
|
+
* - Exactly one terminal event: `done` or `error`
|
|
304
|
+
*/
|
|
305
|
+
async *stream(
|
|
306
|
+
messages: LLMMessage[],
|
|
307
|
+
options: LLMStreamOptions,
|
|
308
|
+
): AsyncIterable<StreamEvent> {
|
|
309
|
+
const id = generateId()
|
|
310
|
+
const contents = toGeminiContents(messages)
|
|
311
|
+
|
|
312
|
+
try {
|
|
313
|
+
const streamResponse = await this.#client.models.generateContentStream({
|
|
314
|
+
model: options.model,
|
|
315
|
+
contents,
|
|
316
|
+
config: buildConfig(options),
|
|
317
|
+
})
|
|
318
|
+
|
|
319
|
+
// Accumulators for building the done payload.
|
|
320
|
+
const accumulatedContent: ContentBlock[] = []
|
|
321
|
+
let inputTokens = 0
|
|
322
|
+
let outputTokens = 0
|
|
323
|
+
let lastFinishReason: string | undefined
|
|
324
|
+
|
|
325
|
+
for await (const chunk of streamResponse) {
|
|
326
|
+
const candidate = chunk.candidates?.[0]
|
|
327
|
+
|
|
328
|
+
// Accumulate token counts — the API emits these on the final chunk.
|
|
329
|
+
if (chunk.usageMetadata) {
|
|
330
|
+
inputTokens = chunk.usageMetadata.promptTokenCount ?? inputTokens
|
|
331
|
+
outputTokens = chunk.usageMetadata.candidatesTokenCount ?? outputTokens
|
|
332
|
+
}
|
|
333
|
+
if (candidate?.finishReason) {
|
|
334
|
+
lastFinishReason = candidate.finishReason as string
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
for (const part of candidate?.content?.parts ?? []) {
|
|
338
|
+
if (part.text) {
|
|
339
|
+
accumulatedContent.push({ type: 'text', text: part.text })
|
|
340
|
+
yield { type: 'text', data: part.text } satisfies StreamEvent
|
|
341
|
+
} else if (part.functionCall) {
|
|
342
|
+
const toolId = getFunctionCallId(part)
|
|
343
|
+
const toolUseBlock: ToolUseBlock = {
|
|
344
|
+
type: 'tool_use',
|
|
345
|
+
id: toolId,
|
|
346
|
+
name: part.functionCall.name ?? '',
|
|
347
|
+
input: (part.functionCall.args ?? {}) as Record<string, unknown>,
|
|
348
|
+
}
|
|
349
|
+
accumulatedContent.push(toolUseBlock)
|
|
350
|
+
yield { type: 'tool_use', data: toolUseBlock } satisfies StreamEvent
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// Determine stop_reason from the accumulated response.
|
|
356
|
+
const hasToolUse = accumulatedContent.some((b) => b.type === 'tool_use')
|
|
357
|
+
let stop_reason: LLMResponse['stop_reason'] = 'end_turn'
|
|
358
|
+
if (lastFinishReason === 'MAX_TOKENS') {
|
|
359
|
+
stop_reason = 'max_tokens'
|
|
360
|
+
} else if (hasToolUse) {
|
|
361
|
+
stop_reason = 'tool_use'
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
const finalResponse: LLMResponse = {
|
|
365
|
+
id,
|
|
366
|
+
content: accumulatedContent,
|
|
367
|
+
model: options.model,
|
|
368
|
+
stop_reason,
|
|
369
|
+
usage: { input_tokens: inputTokens, output_tokens: outputTokens },
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
yield { type: 'done', data: finalResponse } satisfies StreamEvent
|
|
373
|
+
} catch (err) {
|
|
374
|
+
const error = err instanceof Error ? err : new Error(String(err))
|
|
375
|
+
yield { type: 'error', data: error } satisfies StreamEvent
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
}
|
package/src/llm/grok.ts
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Grok (xAI) adapter.
|
|
3
|
+
*
|
|
4
|
+
* Thin wrapper around OpenAIAdapter that hard-codes the official xAI endpoint
|
|
5
|
+
* and XAI_API_KEY environment variable fallback.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { OpenAIAdapter } from './openai.js'
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* LLM adapter for Grok models (grok-4 series and future models).
|
|
12
|
+
*
|
|
13
|
+
* Thread-safe. Can be shared across agents.
|
|
14
|
+
*
|
|
15
|
+
* Usage:
|
|
16
|
+
* provider: 'grok'
|
|
17
|
+
* model: 'grok-4' (or any current Grok model name)
|
|
18
|
+
*/
|
|
19
|
+
export class GrokAdapter extends OpenAIAdapter {
|
|
20
|
+
readonly name = 'grok'
|
|
21
|
+
|
|
22
|
+
constructor(apiKey?: string, baseURL?: string) {
|
|
23
|
+
// Allow override of baseURL (for proxies or future changes) but default to official xAI endpoint.
|
|
24
|
+
super(
|
|
25
|
+
apiKey ?? process.env['XAI_API_KEY'],
|
|
26
|
+
baseURL ?? 'https://api.x.ai/v1'
|
|
27
|
+
)
|
|
28
|
+
}
|
|
29
|
+
}
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Shared OpenAI wire-format conversion helpers.
|
|
3
|
+
*
|
|
4
|
+
* Both the OpenAI and Copilot adapters use the OpenAI Chat Completions API
|
|
5
|
+
* format. This module contains the common conversion logic so it isn't
|
|
6
|
+
* duplicated across adapters.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import OpenAI from 'openai'
|
|
10
|
+
import type {
|
|
11
|
+
ChatCompletion,
|
|
12
|
+
ChatCompletionAssistantMessageParam,
|
|
13
|
+
ChatCompletionMessageParam,
|
|
14
|
+
ChatCompletionMessageToolCall,
|
|
15
|
+
ChatCompletionTool,
|
|
16
|
+
ChatCompletionToolMessageParam,
|
|
17
|
+
ChatCompletionUserMessageParam,
|
|
18
|
+
} from 'openai/resources/chat/completions/index.js'
|
|
19
|
+
|
|
20
|
+
import type {
|
|
21
|
+
ContentBlock,
|
|
22
|
+
LLMMessage,
|
|
23
|
+
LLMResponse,
|
|
24
|
+
LLMToolDef,
|
|
25
|
+
TextBlock,
|
|
26
|
+
ToolUseBlock,
|
|
27
|
+
} from '../types.js'
|
|
28
|
+
import { extractToolCallsFromText } from '../tool/text-tool-extractor.js'
|
|
29
|
+
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
// Framework → OpenAI
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Convert a framework {@link LLMToolDef} to an OpenAI {@link ChatCompletionTool}.
|
|
36
|
+
*/
|
|
37
|
+
export function toOpenAITool(tool: LLMToolDef): ChatCompletionTool {
|
|
38
|
+
return {
|
|
39
|
+
type: 'function',
|
|
40
|
+
function: {
|
|
41
|
+
name: tool.name,
|
|
42
|
+
description: tool.description,
|
|
43
|
+
parameters: tool.inputSchema as Record<string, unknown>,
|
|
44
|
+
},
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Determine whether a framework message contains any `tool_result` content
|
|
50
|
+
* blocks, which must be serialised as separate OpenAI `tool`-role messages.
|
|
51
|
+
*/
|
|
52
|
+
function hasToolResults(msg: LLMMessage): boolean {
|
|
53
|
+
return msg.content.some((b) => b.type === 'tool_result')
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Convert framework {@link LLMMessage}s into OpenAI
|
|
58
|
+
* {@link ChatCompletionMessageParam} entries.
|
|
59
|
+
*
|
|
60
|
+
* `tool_result` blocks are expanded into top-level `tool`-role messages
|
|
61
|
+
* because OpenAI uses a dedicated role for tool results rather than embedding
|
|
62
|
+
* them inside user-content arrays.
|
|
63
|
+
*/
|
|
64
|
+
export function toOpenAIMessages(messages: LLMMessage[]): ChatCompletionMessageParam[] {
|
|
65
|
+
const result: ChatCompletionMessageParam[] = []
|
|
66
|
+
|
|
67
|
+
for (const msg of messages) {
|
|
68
|
+
if (msg.role === 'assistant') {
|
|
69
|
+
result.push(toOpenAIAssistantMessage(msg))
|
|
70
|
+
} else {
|
|
71
|
+
// user role
|
|
72
|
+
if (!hasToolResults(msg)) {
|
|
73
|
+
result.push(toOpenAIUserMessage(msg))
|
|
74
|
+
} else {
|
|
75
|
+
const nonToolBlocks = msg.content.filter((b) => b.type !== 'tool_result')
|
|
76
|
+
if (nonToolBlocks.length > 0) {
|
|
77
|
+
result.push(toOpenAIUserMessage({ role: 'user', content: nonToolBlocks }))
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
for (const block of msg.content) {
|
|
81
|
+
if (block.type === 'tool_result') {
|
|
82
|
+
const toolMsg: ChatCompletionToolMessageParam = {
|
|
83
|
+
role: 'tool',
|
|
84
|
+
tool_call_id: block.tool_use_id,
|
|
85
|
+
content: block.content,
|
|
86
|
+
}
|
|
87
|
+
result.push(toolMsg)
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return result
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Convert a `user`-role framework message into an OpenAI user message.
|
|
99
|
+
* Image blocks are converted to the OpenAI image_url content part format.
|
|
100
|
+
*/
|
|
101
|
+
function toOpenAIUserMessage(msg: LLMMessage): ChatCompletionUserMessageParam {
|
|
102
|
+
if (msg.content.length === 1 && msg.content[0]?.type === 'text') {
|
|
103
|
+
return { role: 'user', content: msg.content[0].text }
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
type ContentPart = OpenAI.Chat.ChatCompletionContentPartText | OpenAI.Chat.ChatCompletionContentPartImage
|
|
107
|
+
const parts: ContentPart[] = []
|
|
108
|
+
|
|
109
|
+
for (const block of msg.content) {
|
|
110
|
+
if (block.type === 'text') {
|
|
111
|
+
parts.push({ type: 'text', text: block.text })
|
|
112
|
+
} else if (block.type === 'image') {
|
|
113
|
+
parts.push({
|
|
114
|
+
type: 'image_url',
|
|
115
|
+
image_url: {
|
|
116
|
+
url: `data:${block.source.media_type};base64,${block.source.data}`,
|
|
117
|
+
},
|
|
118
|
+
})
|
|
119
|
+
}
|
|
120
|
+
// tool_result blocks are handled by the caller (toOpenAIMessages); skip here.
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return { role: 'user', content: parts }
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Convert an `assistant`-role framework message into an OpenAI assistant message.
|
|
128
|
+
* `tool_use` blocks become `tool_calls`; `text` blocks become message content.
|
|
129
|
+
*/
|
|
130
|
+
function toOpenAIAssistantMessage(msg: LLMMessage): ChatCompletionAssistantMessageParam {
|
|
131
|
+
const toolCalls: ChatCompletionMessageToolCall[] = []
|
|
132
|
+
const textParts: string[] = []
|
|
133
|
+
|
|
134
|
+
for (const block of msg.content) {
|
|
135
|
+
if (block.type === 'tool_use') {
|
|
136
|
+
toolCalls.push({
|
|
137
|
+
id: block.id,
|
|
138
|
+
type: 'function',
|
|
139
|
+
function: {
|
|
140
|
+
name: block.name,
|
|
141
|
+
arguments: JSON.stringify(block.input),
|
|
142
|
+
},
|
|
143
|
+
})
|
|
144
|
+
} else if (block.type === 'text') {
|
|
145
|
+
textParts.push(block.text)
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const assistantMsg: ChatCompletionAssistantMessageParam = {
|
|
150
|
+
role: 'assistant',
|
|
151
|
+
content: textParts.length > 0 ? textParts.join('') : null,
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (toolCalls.length > 0) {
|
|
155
|
+
assistantMsg.tool_calls = toolCalls
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
return assistantMsg
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// ---------------------------------------------------------------------------
|
|
162
|
+
// OpenAI → Framework
|
|
163
|
+
// ---------------------------------------------------------------------------
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Convert an OpenAI {@link ChatCompletion} into a framework {@link LLMResponse}.
|
|
167
|
+
*
|
|
168
|
+
* Takes only the first choice (index 0), consistent with how the framework
|
|
169
|
+
* is designed for single-output agents.
|
|
170
|
+
*
|
|
171
|
+
* @param completion - The raw OpenAI completion.
|
|
172
|
+
* @param knownToolNames - Optional whitelist of tool names. When the model
|
|
173
|
+
* returns no `tool_calls` but the text contains JSON
|
|
174
|
+
* that looks like a tool call, the fallback extractor
|
|
175
|
+
* uses this list to validate matches. Pass the names
|
|
176
|
+
* of tools sent in the request for best results.
|
|
177
|
+
*/
|
|
178
|
+
export function fromOpenAICompletion(
|
|
179
|
+
completion: ChatCompletion,
|
|
180
|
+
knownToolNames?: string[],
|
|
181
|
+
): LLMResponse {
|
|
182
|
+
const choice = completion.choices[0]
|
|
183
|
+
if (choice === undefined) {
|
|
184
|
+
throw new Error('OpenAI returned a completion with no choices')
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
const content: ContentBlock[] = []
|
|
188
|
+
const message = choice.message
|
|
189
|
+
|
|
190
|
+
if (message.content !== null && message.content !== undefined) {
|
|
191
|
+
const textBlock: TextBlock = { type: 'text', text: message.content }
|
|
192
|
+
content.push(textBlock)
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
for (const toolCall of message.tool_calls ?? []) {
|
|
196
|
+
let parsedInput: Record<string, unknown> = {}
|
|
197
|
+
try {
|
|
198
|
+
const parsed: unknown = JSON.parse(toolCall.function.arguments)
|
|
199
|
+
if (parsed !== null && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
|
200
|
+
parsedInput = parsed as Record<string, unknown>
|
|
201
|
+
}
|
|
202
|
+
} catch {
|
|
203
|
+
// Malformed arguments from the model — surface as empty object.
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
const toolUseBlock: ToolUseBlock = {
|
|
207
|
+
type: 'tool_use',
|
|
208
|
+
id: toolCall.id,
|
|
209
|
+
name: toolCall.function.name,
|
|
210
|
+
input: parsedInput,
|
|
211
|
+
}
|
|
212
|
+
content.push(toolUseBlock)
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// ---------------------------------------------------------------------------
|
|
216
|
+
// Fallback: extract tool calls from text when native tool_calls is empty.
|
|
217
|
+
//
|
|
218
|
+
// Some local models (Ollama thinking models, misconfigured vLLM) return tool
|
|
219
|
+
// calls as plain text instead of using the tool_calls wire format. When we
|
|
220
|
+
// have text but no tool_calls, try to extract them from the text.
|
|
221
|
+
// ---------------------------------------------------------------------------
|
|
222
|
+
const hasNativeToolCalls = (message.tool_calls ?? []).length > 0
|
|
223
|
+
if (
|
|
224
|
+
!hasNativeToolCalls &&
|
|
225
|
+
knownToolNames !== undefined &&
|
|
226
|
+
knownToolNames.length > 0 &&
|
|
227
|
+
message.content !== null &&
|
|
228
|
+
message.content !== undefined &&
|
|
229
|
+
message.content.length > 0
|
|
230
|
+
) {
|
|
231
|
+
const extracted = extractToolCallsFromText(message.content, knownToolNames)
|
|
232
|
+
if (extracted.length > 0) {
|
|
233
|
+
content.push(...extracted)
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
const hasToolUseBlocks = content.some(b => b.type === 'tool_use')
|
|
238
|
+
const rawStopReason = choice.finish_reason ?? 'stop'
|
|
239
|
+
// If we extracted tool calls from text but the finish_reason was 'stop',
|
|
240
|
+
// correct it to 'tool_use' so the agent runner continues the loop.
|
|
241
|
+
const stopReason = hasToolUseBlocks && rawStopReason === 'stop'
|
|
242
|
+
? 'tool_use'
|
|
243
|
+
: normalizeFinishReason(rawStopReason)
|
|
244
|
+
|
|
245
|
+
return {
|
|
246
|
+
id: completion.id,
|
|
247
|
+
content,
|
|
248
|
+
model: completion.model,
|
|
249
|
+
stop_reason: stopReason,
|
|
250
|
+
usage: {
|
|
251
|
+
input_tokens: completion.usage?.prompt_tokens ?? 0,
|
|
252
|
+
output_tokens: completion.usage?.completion_tokens ?? 0,
|
|
253
|
+
},
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Normalize an OpenAI `finish_reason` string to the framework's canonical
|
|
259
|
+
* stop-reason vocabulary.
|
|
260
|
+
*
|
|
261
|
+
* Mapping:
|
|
262
|
+
* - `'stop'` → `'end_turn'`
|
|
263
|
+
* - `'tool_calls'` → `'tool_use'`
|
|
264
|
+
* - `'length'` → `'max_tokens'`
|
|
265
|
+
* - `'content_filter'` → `'content_filter'`
|
|
266
|
+
* - anything else → passed through unchanged
|
|
267
|
+
*/
|
|
268
|
+
export function normalizeFinishReason(reason: string): string {
|
|
269
|
+
switch (reason) {
|
|
270
|
+
case 'stop': return 'end_turn'
|
|
271
|
+
case 'tool_calls': return 'tool_use'
|
|
272
|
+
case 'length': return 'max_tokens'
|
|
273
|
+
case 'content_filter': return 'content_filter'
|
|
274
|
+
default: return reason
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Prepend a system message when `systemPrompt` is provided, then append the
|
|
280
|
+
* converted conversation messages.
|
|
281
|
+
*/
|
|
282
|
+
export function buildOpenAIMessageList(
|
|
283
|
+
messages: LLMMessage[],
|
|
284
|
+
systemPrompt: string | undefined,
|
|
285
|
+
): ChatCompletionMessageParam[] {
|
|
286
|
+
const result: ChatCompletionMessageParam[] = []
|
|
287
|
+
|
|
288
|
+
if (systemPrompt !== undefined && systemPrompt.length > 0) {
|
|
289
|
+
result.push({ role: 'system', content: systemPrompt })
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
result.push(...toOpenAIMessages(messages))
|
|
293
|
+
return result
|
|
294
|
+
}
|