@arcote.tech/arc-ai-gemini 0.5.2 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +2 -2
  2. package/src/index.ts +105 -84
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@arcote.tech/arc-ai-gemini",
3
3
  "type": "module",
4
- "version": "0.5.2",
4
+ "version": "0.5.5",
5
5
  "private": false,
6
6
  "description": "Gemini (Google) adapter for Arc AI framework",
7
7
  "main": "./src/index.ts",
@@ -10,7 +10,7 @@
10
10
  "type-check": "tsc --noEmit"
11
11
  },
12
12
  "peerDependencies": {
13
- "@arcote.tech/arc-ai": "^0.5.2",
13
+ "@arcote.tech/arc-ai": "^0.5.5",
14
14
  "typescript": "^5.0.0"
15
15
  },
16
16
  "devDependencies": {
package/src/index.ts CHANGED
@@ -2,6 +2,9 @@ import type {
2
2
  LLMProvider,
3
3
  CompletionRequest,
4
4
  CompletionResult,
5
+ Conversation,
6
+ ConversationTurn,
7
+ AssistantContentBlock,
5
8
  StreamChunk,
6
9
  ToolCall,
7
10
  TokenUsage,
@@ -38,51 +41,57 @@ export function gemini(config: GeminiConfig): LLMProvider {
38
41
  ];
39
42
  }
40
43
 
41
- function buildContents(messages: CompletionRequest["messages"]) {
42
- const systemMessages = messages.filter((m) => m.role === "system");
43
- const nonSystemMessages = messages.filter((m) => m.role !== "system");
44
-
45
- const systemInstruction = systemMessages.length
46
- ? { parts: [{ text: systemMessages.map((m) => m.content).join("\n\n") }] }
47
- : undefined;
48
-
49
- const contents = nonSystemMessages.map((m) => {
50
- if (m.role === "tool") {
51
- return {
52
- role: "user",
53
- parts: [
54
- {
55
- functionResponse: {
56
- name: m.name ?? "unknown",
57
- response: { result: m.content },
58
- },
59
- },
60
- ],
61
- };
62
- }
63
-
44
+ /**
45
+ * Translate a single ConversationTurn into a Gemini `contents[]` entry.
46
+ * Adapter is a pure translator caller already decided what to send via
47
+ * the Conversation discriminated union. Block ordering is preserved 1:1
48
+ * inside assistant turns by emitting parts in input order.
49
+ */
50
+ function turnToContent(turn: ConversationTurn): unknown {
51
+ if (turn.role === "user") {
52
+ return { role: "user", parts: [{ text: turn.content }] };
53
+ }
54
+ if (turn.role === "tool_result") {
64
55
  return {
65
- role: m.role === "assistant" ? "model" : "user",
66
- parts: [{ text: m.content }],
56
+ role: "user",
57
+ parts: [
58
+ {
59
+ functionResponse: {
60
+ name: turn.name,
61
+ response: { result: turn.content },
62
+ },
63
+ },
64
+ ],
67
65
  };
68
- });
69
-
70
- return { systemInstruction, contents };
66
+ }
67
+ // assistant — emit ordered parts
68
+ const parts: unknown[] = [];
69
+ for (const block of turn.blocks) {
70
+ if (block.type === "text") {
71
+ if (!block.text) continue;
72
+ parts.push({ text: block.text });
73
+ } else {
74
+ parts.push({
75
+ functionCall: { name: block.name, args: block.arguments },
76
+ });
77
+ }
78
+ }
79
+ return { role: "model", parts };
71
80
  }
72
81
 
73
- function parseUsage(raw: any): TokenUsage {
74
- const meta = raw.usageMetadata ?? {};
75
- return {
76
- inputTokens: meta.promptTokenCount ?? 0,
77
- outputTokens: meta.candidatesTokenCount ?? 0,
78
- totalTokens: meta.totalTokenCount ?? 0,
79
- cachedTokens: meta.cachedContentTokenCount ?? 0,
80
- reasoningTokens: 0,
81
- };
82
+ function buildContents(conversation: Conversation): unknown[] {
83
+ if (conversation.mode !== "full") {
84
+ throw new Error(
85
+ "Gemini provider does not support continuation mode — set " +
86
+ "`supportsContinuation: false` in the listener and pass " +
87
+ "`Conversation.mode = 'full'` with the full conversation history.",
88
+ );
89
+ }
90
+ return conversation.turns.map(turnToContent);
82
91
  }
83
92
 
84
- async function complete(request: CompletionRequest): Promise<CompletionResult> {
85
- const { systemInstruction, contents } = buildContents(request.messages);
93
+ function buildBody(request: CompletionRequest): Record<string, unknown> {
94
+ const contents = buildContents(request.conversation);
86
95
 
87
96
  const body: Record<string, unknown> = {
88
97
  contents,
@@ -92,18 +101,37 @@ export function gemini(config: GeminiConfig): LLMProvider {
92
101
  },
93
102
  };
94
103
 
95
- if (systemInstruction) body.systemInstruction = systemInstruction;
104
+ if (request.instructions) {
105
+ body.systemInstruction = { parts: [{ text: request.instructions }] };
106
+ }
96
107
 
97
108
  const tools = translateTools(request.tools);
98
109
  if (tools) body.tools = tools;
99
-
100
110
  if (request.webSearch) {
101
- body.tools = [
102
- ...(tools ?? []),
103
- { googleSearch: {} },
104
- ];
111
+ body.tools = [...(tools ?? []), { googleSearch: {} }];
105
112
  }
106
113
 
114
+ return body;
115
+ }
116
+
117
+ function parseUsage(raw: any): TokenUsage {
118
+ const meta = raw.usageMetadata ?? {};
119
+ return {
120
+ inputTokens: meta.promptTokenCount ?? 0,
121
+ outputTokens: meta.candidatesTokenCount ?? 0,
122
+ totalTokens: meta.totalTokenCount ?? 0,
123
+ cachedTokens: meta.cachedContentTokenCount ?? 0,
124
+ reasoningTokens: 0,
125
+ };
126
+ }
127
+
128
+ // ─── complete ─────────────────────────────────────────────────
129
+
130
+ async function complete(
131
+ request: CompletionRequest,
132
+ ): Promise<CompletionResult> {
133
+ const body = buildBody(request);
134
+
107
135
  const response = await fetch(
108
136
  `${baseUrl}/models/${request.model}:generateContent?key=${config.apiKey}`,
109
137
  {
@@ -122,14 +150,13 @@ export function gemini(config: GeminiConfig): LLMProvider {
122
150
  const candidate = data.candidates?.[0];
123
151
  const parts = candidate?.content?.parts ?? [];
124
152
 
125
- let content = "";
126
- const toolCalls: ToolCall[] = [];
127
-
153
+ const blocks: AssistantContentBlock[] = [];
128
154
  for (const part of parts) {
129
155
  if (part.text) {
130
- content += part.text;
156
+ blocks.push({ type: "text", text: part.text });
131
157
  } else if (part.functionCall) {
132
- toolCalls.push({
158
+ blocks.push({
159
+ type: "tool_call",
133
160
  id: generateToolCallId(),
134
161
  name: part.functionCall.name,
135
162
  arguments: part.functionCall.args ?? {},
@@ -137,42 +164,24 @@ export function gemini(config: GeminiConfig): LLMProvider {
137
164
  }
138
165
  }
139
166
 
140
- const finishReason: FinishReason =
141
- toolCalls.length > 0 ? "tool_call" : "stop";
167
+ const finishReason: FinishReason = blocks.some((b) => b.type === "tool_call")
168
+ ? "tool_call"
169
+ : "stop";
142
170
 
143
171
  return {
144
- content,
145
- toolCalls,
172
+ blocks,
146
173
  usage: parseUsage(data),
147
174
  finishReason,
148
175
  };
149
176
  }
150
177
 
178
+ // ─── streamComplete ───────────────────────────────────────────
179
+
151
180
  async function streamComplete(
152
181
  request: CompletionRequest,
153
182
  onChunk: (chunk: StreamChunk) => void,
154
183
  ): Promise<CompletionResult> {
155
- const { systemInstruction, contents } = buildContents(request.messages);
156
-
157
- const body: Record<string, unknown> = {
158
- contents,
159
- generationConfig: {
160
- temperature: request.temperature,
161
- maxOutputTokens: request.maxTokens,
162
- },
163
- };
164
-
165
- if (systemInstruction) body.systemInstruction = systemInstruction;
166
-
167
- const tools = translateTools(request.tools);
168
- if (tools) body.tools = tools;
169
-
170
- if (request.webSearch) {
171
- body.tools = [
172
- ...(tools ?? []),
173
- { googleSearch: {} },
174
- ];
175
- }
184
+ const body = buildBody(request);
176
185
 
177
186
  const response = await fetch(
178
187
  `${baseUrl}/models/${request.model}:streamGenerateContent?alt=sse&key=${config.apiKey}`,
@@ -188,7 +197,10 @@ export function gemini(config: GeminiConfig): LLMProvider {
188
197
  throw new Error(`Gemini API error ${response.status}: ${error}`);
189
198
  }
190
199
 
191
- let content = "";
200
+ // Gemini's streamGenerateContent emits parts in order across chunks. We
201
+ // append blocks as we see them; consecutive text parts merge into the
202
+ // current text block, function calls become their own blocks.
203
+ const blocks: AssistantContentBlock[] = [];
192
204
  let usage: TokenUsage = {
193
205
  inputTokens: 0,
194
206
  outputTokens: 0,
@@ -196,7 +208,6 @@ export function gemini(config: GeminiConfig): LLMProvider {
196
208
  cachedTokens: 0,
197
209
  reasoningTokens: 0,
198
210
  };
199
- const toolCalls: ToolCall[] = [];
200
211
 
201
212
  const reader = response.body!.getReader();
202
213
  const decoder = new TextDecoder();
@@ -220,7 +231,12 @@ export function gemini(config: GeminiConfig): LLMProvider {
220
231
 
221
232
  for (const part of parts) {
222
233
  if (part.text) {
223
- content += part.text;
234
+ const last = blocks[blocks.length - 1];
235
+ if (last?.type === "text") {
236
+ last.text += part.text;
237
+ } else {
238
+ blocks.push({ type: "text", text: part.text });
239
+ }
224
240
  onChunk({ type: "content_delta", content: part.text });
225
241
  } else if (part.functionCall) {
226
242
  const tc: ToolCall = {
@@ -228,12 +244,16 @@ export function gemini(config: GeminiConfig): LLMProvider {
228
244
  name: part.functionCall.name,
229
245
  arguments: part.functionCall.args ?? {},
230
246
  };
231
- toolCalls.push(tc);
247
+ blocks.push({
248
+ type: "tool_call",
249
+ id: tc.id,
250
+ name: tc.name,
251
+ arguments: tc.arguments,
252
+ });
232
253
  onChunk({ type: "tool_call_start", toolCall: tc });
233
254
  }
234
255
  }
235
256
 
236
- // Gemini sends usage in every chunk
237
257
  if (parsed.usageMetadata) {
238
258
  usage = parseUsage(parsed);
239
259
  }
@@ -243,12 +263,12 @@ export function gemini(config: GeminiConfig): LLMProvider {
243
263
  }
244
264
  }
245
265
 
246
- const finishReason: FinishReason =
247
- toolCalls.length > 0 ? "tool_call" : "stop";
266
+ const finishReason: FinishReason = blocks.some((b) => b.type === "tool_call")
267
+ ? "tool_call"
268
+ : "stop";
248
269
 
249
270
  return {
250
- content,
251
- toolCalls,
271
+ blocks,
252
272
  usage,
253
273
  finishReason,
254
274
  };
@@ -257,6 +277,7 @@ export function gemini(config: GeminiConfig): LLMProvider {
257
277
  return {
258
278
  name: "gemini",
259
279
  models: ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.0-flash"],
280
+ supportsContinuation: false,
260
281
  complete,
261
282
  streamComplete,
262
283
  };