@oh-my-pi/pi-ai 15.13.2 → 15.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,27 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.13.3] - 2026-06-15
6
+
7
+ ### Added
8
+
9
+ - Added the `gemini` in-band tool-call syntax with Python-style ```tool_code``` blocks and `default_api` invocations
10
+ - Added the `gemma` token-delimited in-band tool-call syntax using `<|tool_call>` and `<|tool_response>` blocks
11
+ - Added `gemini` and `gemma` to owned stream tool-result token detection so their tool responses are recognized
12
+ - Fixed truncated Gemini and Gemma tool blocks from being emitted as plain text during streaming
13
+ - Added the Azure OpenAI provider definition (`azure`) to the registry; `AZURE_OPENAI_API_KEY` resolves as its env-var API key via the catalog provider table.
14
+
15
+ ### Changed
16
+
17
+ - Gemini tool-call examples now render without the `default_api.` namespace prefix, keeping `<example>` blocks concise. The live wire format still uses `default_api.` per the Gemini grammar.
18
+
19
+ ### Fixed
20
+
21
+ - Fixed duplicate tool call projections by deduplicating provider-native `toolCall` events against in-band `tool_code` calls and keeping only the first real channel
22
+ - Dropped nameless native `toolCall` events so they no longer appear as surfaced tool calls in owned-mode streams
23
+ - Fixed truncated Gemini and Gemma tool blocks from being emitted as plain text during streaming
24
+ - Fixed Gemini/Gemma in-band tool-call parsing around Python comments, raw/unicode string literals, and Gemma close-token text inside string values.
25
+
5
26
  ## [15.13.2] - 2026-06-15
6
27
 
7
28
  ### Added
@@ -0,0 +1,16 @@
1
+ import type { Grammar, InbandScanEvent, InbandScanner } from "./types";
2
+ /**
3
+ * Scanner for the hosted-Gemini / Gemma 3 Pythonic tool-calling convention
4
+ * (see `docs/toolconv/gemini.md`). Tool calls arrive as a ```` ```tool_code ````
5
+ * fenced block whose body is one or more Python call expressions, e.g.
6
+ * `print(default_api.search(pattern="x", skip=40))`. Like the qwen3 scanner we
7
+ * buffer the whole block until its closing fence, then parse all calls at once
8
+ * (no incremental argument deltas — Python literals are not worth streaming).
9
+ */
10
+ export declare class GeminiInbandScanner implements InbandScanner {
11
+ #private;
12
+ feed(text: string): InbandScanEvent[];
13
+ flush(): InbandScanEvent[];
14
+ }
15
+ declare const grammar: Grammar;
16
+ export default grammar;
@@ -0,0 +1,14 @@
1
+ import type { Grammar, InbandScanEvent, InbandScanner } from "./types";
2
+ /**
3
+ * Scanner for the Gemma 4 token-delimited tool-calling convention (see
4
+ * `docs/toolconv/gemma.md`). Each call is one `<|tool_call>call:NAME{…}<tool_call|>`
5
+ * block whose argument list is `key:value` pairs; string values are wrapped in
6
+ * the `<|"|>` token rather than ASCII quotes, so splitting must skip those spans.
7
+ */
8
+ export declare class GemmaInbandScanner implements InbandScanner {
9
+ #private;
10
+ feed(text: string): InbandScanEvent[];
11
+ flush(): InbandScanEvent[];
12
+ }
13
+ declare const grammar: Grammar;
14
+ export default grammar;
@@ -22,3 +22,9 @@ export declare function renderXmlToolCalls(calls: readonly ToolCall[], options?:
22
22
  export declare function renderPiNativeInvocation(call: ToolCall, options?: GrammarRenderOptions): string;
23
23
  export declare function renderPiNativeToolCalls(calls: readonly ToolCall[], options?: GrammarRenderOptions): string;
24
24
  export declare function renderToolResponseResults(results: readonly GrammarToolResult[]): string;
25
+ export declare function renderGeminiInvocation(call: ToolCall, options?: GrammarRenderOptions): string;
26
+ export declare function renderGeminiToolCalls(calls: readonly ToolCall[], options?: GrammarRenderOptions): string;
27
+ export declare function renderGeminiToolResults(results: readonly GrammarToolResult[]): string;
28
+ export declare function renderGemmaInvocation(call: ToolCall, _options?: GrammarRenderOptions): string;
29
+ export declare function renderGemmaToolCalls(calls: readonly ToolCall[], options?: GrammarRenderOptions): string;
30
+ export declare function renderGemmaToolResults(results: readonly GrammarToolResult[]): string;
@@ -0,0 +1,4 @@
1
+ export declare const azureProvider: {
2
+ readonly id: "azure";
3
+ readonly name: "Azure OpenAI";
4
+ };
@@ -25,6 +25,9 @@ declare const ALL: ({
25
25
  readonly refreshToken: (credentials: import("./oauth").OAuthCredentials) => Promise<import("./oauth").OAuthCredentials>;
26
26
  readonly callbackPort: 54545;
27
27
  readonly pasteCodeFlow: true;
28
+ } | {
29
+ readonly id: "azure";
30
+ readonly name: "Azure OpenAI";
28
31
  } | {
29
32
  readonly id: "cerebras";
30
33
  readonly name: "Cerebras";
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "15.13.2",
4
+ "version": "15.13.3",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -38,8 +38,8 @@
38
38
  },
39
39
  "dependencies": {
40
40
  "@bufbuild/protobuf": "^2.12.0",
41
- "@oh-my-pi/pi-catalog": "15.13.2",
42
- "@oh-my-pi/pi-utils": "15.13.2",
41
+ "@oh-my-pi/pi-catalog": "15.13.3",
42
+ "@oh-my-pi/pi-utils": "15.13.3",
43
43
  "partial-json": "^0.1.7",
44
44
  "zod": "^4"
45
45
  },
@@ -1,5 +1,7 @@
1
1
  import anthropicGrammar from "./anthropic";
2
2
  import deepseekGrammar from "./deepseek";
3
+ import geminiGrammar from "./gemini";
4
+ import gemmaGrammar from "./gemma";
3
5
  import glmGrammar from "./glm";
4
6
  import harmonyGrammar from "./harmony";
5
7
  import hermesGrammar from "./hermes";
@@ -19,6 +21,8 @@ const GRAMMARS: Record<ToolCallSyntax, Grammar> = {
19
21
  harmony: harmonyGrammar,
20
22
  pi: piGrammar,
21
23
  qwen3: qwen3Grammar,
24
+ gemini: geminiGrammar,
25
+ gemma: gemmaGrammar,
22
26
  };
23
27
 
24
28
  export function getInbandGrammar(syntax: ToolCallSyntax): Grammar {
@@ -0,0 +1,35 @@
1
+ ## Format guide
2
+
3
+ Emit tool calls as Python inside a fenced ` ```tool_code ` block. Call each function as a method on `default_api`:
4
+
5
+ ````text
6
+ ```tool_code
7
+ default_api.function_name(arg="value", count=2)
8
+ ```
9
+ ````
10
+
11
+ Argument values are Python literals: `"strings"`, numbers, `True`/`False`, `None`, `[lists]`, `{"dicts": 1}`.
12
+
13
+ Call several functions in parallel as a Python list:
14
+
15
+ ````text
16
+ ```tool_code
17
+ [default_api.first(x="a"), default_api.second(y="b")]
18
+ ```
19
+ ````
20
+
21
+ Tool results arrive later in a ` ```tool_outputs ` block:
22
+
23
+ ````text
24
+ ```tool_outputs
25
+ verbatim tool result
26
+ ```
27
+ ````
28
+
29
+ ## Rules
30
+
31
+ - The function name MUST match a listed function; arguments are keyword form (`name=value`).
32
+ - Multiple calls = a single `[...]` list (or one `default_api...` call per line) inside one ` ```tool_code ` block.
33
+ - Put any reasoning as plain text before the ` ```tool_code ` block, never inside it.
34
+ - Read each ` ```tool_outputs ` block in call order. NEVER write a ` ```tool_outputs ` block yourself.
35
+ - After emitting the ` ```tool_code ` block, YOU MUST STOP AND HALT.
@@ -0,0 +1,440 @@
1
+ import { mintToolCallId, partialSuffixOverlapAny } from "./coercion";
2
+ import grammarPrompt from "./gemini.md" with { type: "text" };
3
+ import { renderGeminiInvocation, renderGeminiToolCalls, renderGeminiToolResults } from "./rendering";
4
+ import type { Grammar, InbandScanEvent, InbandScanner } from "./types";
5
+
6
+ const CODE_OPEN = "```tool_code";
7
+ const FENCE = "```";
8
+ const OPEN_TAGS = [CODE_OPEN] as const;
9
+
10
+ type State = "outside" | "tool";
11
+
12
+ interface ParsedCall {
13
+ name: string;
14
+ arguments: Record<string, unknown>;
15
+ }
16
+
17
+ /**
18
+ * Scanner for the hosted-Gemini / Gemma 3 Pythonic tool-calling convention
19
+ * (see `docs/toolconv/gemini.md`). Tool calls arrive as a ```` ```tool_code ````
20
+ * fenced block whose body is one or more Python call expressions, e.g.
21
+ * `print(default_api.search(pattern="x", skip=40))`. Like the qwen3 scanner we
22
+ * buffer the whole block until its closing fence, then parse all calls at once
23
+ * (no incremental argument deltas — Python literals are not worth streaming).
24
+ */
25
+ export class GeminiInbandScanner implements InbandScanner {
26
+ #buffer = "";
27
+ #state: State = "outside";
28
+
29
+ feed(text: string): InbandScanEvent[] {
30
+ if (text.length === 0) return [];
31
+ this.#buffer += text;
32
+ return this.#consume(false);
33
+ }
34
+
35
+ flush(): InbandScanEvent[] {
36
+ return this.#consume(true);
37
+ }
38
+
39
+ #consume(final: boolean): InbandScanEvent[] {
40
+ const events: InbandScanEvent[] = [];
41
+ while (this.#buffer.length > 0) {
42
+ if (this.#state === "outside") {
43
+ this.#consumeOutside(final, events);
44
+ if (this.#state === "outside") break;
45
+ continue;
46
+ }
47
+ this.#consumeTool(final, events);
48
+ if (this.#state === "tool") break;
49
+ }
50
+ return events;
51
+ }
52
+
53
+ #consumeOutside(final: boolean, events: InbandScanEvent[]): void {
54
+ const open = this.#buffer.indexOf(CODE_OPEN);
55
+ if (open === -1) {
56
+ const hold = final ? 0 : partialSuffixOverlapAny(this.#buffer, OPEN_TAGS);
57
+ const emit = this.#buffer.slice(0, this.#buffer.length - hold);
58
+ if (emit.length > 0) events.push({ type: "text", text: emit });
59
+ this.#buffer = this.#buffer.slice(this.#buffer.length - hold);
60
+ return;
61
+ }
62
+ if (open > 0) events.push({ type: "text", text: this.#buffer.slice(0, open) });
63
+ this.#buffer = this.#buffer.slice(open + CODE_OPEN.length);
64
+ this.#state = "tool";
65
+ }
66
+
67
+ #consumeTool(final: boolean, events: InbandScanEvent[]): void {
68
+ const close = this.#buffer.indexOf(FENCE);
69
+ if (close === -1) {
70
+ // Inside the fence we emit nothing until it closes; on a truncated
71
+ // stream the incomplete block is dropped rather than leaked as text.
72
+ if (final) {
73
+ this.#buffer = "";
74
+ this.#state = "outside";
75
+ }
76
+ return;
77
+ }
78
+ const body = this.#buffer.slice(0, close);
79
+ const rawBlock = `${CODE_OPEN}${body}${FENCE}`;
80
+ for (const call of parseGeminiCalls(body)) {
81
+ const id = mintToolCallId();
82
+ events.push({ type: "toolStart", id, name: call.name });
83
+ events.push({ type: "toolEnd", id, name: call.name, arguments: call.arguments, rawBlock });
84
+ }
85
+ this.#buffer = this.#buffer.slice(close + FENCE.length);
86
+ this.#state = "outside";
87
+ }
88
+ }
89
+
90
+ /** Extract every top-level call expression in a `tool_code` body. */
91
+ function parseGeminiCalls(body: string): ParsedCall[] {
92
+ const calls: ParsedCall[] = [];
93
+ let i = 0;
94
+ const n = body.length;
95
+ while (i < n) {
96
+ const ch = body[i]!;
97
+ if (ch === '"' || ch === "'") {
98
+ i = skipString(body, i);
99
+ continue;
100
+ }
101
+ if (ch === "#") {
102
+ i = skipComment(body, i);
103
+ continue;
104
+ }
105
+ if (ch === "(") {
106
+ const name = identBefore(body, i);
107
+ if (name && name !== "print") {
108
+ const end = matchParen(body, i);
109
+ if (end !== -1) {
110
+ calls.push({ name, arguments: parsePyArgs(body.slice(i + 1, end)) });
111
+ i = end + 1;
112
+ continue;
113
+ }
114
+ }
115
+ }
116
+ i++;
117
+ }
118
+ return calls;
119
+ }
120
+
121
+ /** Identifier immediately preceding a `(` (the callee's final name segment). */
122
+ function identBefore(body: string, parenIndex: number): string | undefined {
123
+ let j = parenIndex - 1;
124
+ while (j >= 0 && /\s/.test(body[j]!)) j--;
125
+ const end = j + 1;
126
+ while (j >= 0 && /[A-Za-z0-9_]/.test(body[j]!)) j--;
127
+ const name = body.slice(j + 1, end);
128
+ return /^[A-Za-z_]\w*$/.test(name) ? name : undefined;
129
+ }
130
+
131
+ /** Index of the `)` matching the `(` at `openIndex`, skipping string contents. */
132
+ function matchParen(body: string, openIndex: number): number {
133
+ let depth = 0;
134
+ let i = openIndex;
135
+ const n = body.length;
136
+ while (i < n) {
137
+ const ch = body[i]!;
138
+ if (ch === '"' || ch === "'") {
139
+ i = skipString(body, i);
140
+ continue;
141
+ }
142
+ if (ch === "#") {
143
+ i = skipComment(body, i);
144
+ continue;
145
+ }
146
+ if (ch === "(") depth++;
147
+ else if (ch === ")" && --depth === 0) return i;
148
+ i++;
149
+ }
150
+ return -1;
151
+ }
152
+
153
+ /** Index just past the Python string literal starting at `i` (a quote char). */
154
+ function skipString(body: string, i: number): number {
155
+ const quote = body[i]!;
156
+ const triple = quote + quote + quote;
157
+ if (body.startsWith(triple, i)) {
158
+ const close = body.indexOf(triple, i + 3);
159
+ return close === -1 ? body.length : close + 3;
160
+ }
161
+ let j = i + 1;
162
+ const n = body.length;
163
+ while (j < n) {
164
+ const ch = body[j]!;
165
+ if (ch === "\\") {
166
+ j += 2;
167
+ continue;
168
+ }
169
+ if (ch === quote) return j + 1;
170
+ j++;
171
+ }
172
+ return n;
173
+ }
174
+
175
+ function skipComment(body: string, i: number): number {
176
+ const newline = body.indexOf("\n", i + 1);
177
+ return newline === -1 ? body.length : newline + 1;
178
+ }
179
+
180
+ function stripComments(body: string): string {
181
+ let out = "";
182
+ let i = 0;
183
+ const n = body.length;
184
+ while (i < n) {
185
+ const ch = body[i]!;
186
+ if (ch === '"' || ch === "'") {
187
+ const end = skipString(body, i);
188
+ out += body.slice(i, end);
189
+ i = end;
190
+ continue;
191
+ }
192
+ if (ch === "#") {
193
+ const newline = body.indexOf("\n", i + 1);
194
+ if (newline === -1) break;
195
+ out += "\n";
196
+ i = newline + 1;
197
+ continue;
198
+ }
199
+ out += ch;
200
+ i++;
201
+ }
202
+ return out;
203
+ }
204
+
205
+ function parsePyArgs(text: string): Record<string, unknown> {
206
+ const out: Record<string, unknown> = {};
207
+ for (const segment of splitTopLevel(stripComments(text), ",")) {
208
+ const trimmed = segment.trim();
209
+ if (trimmed.length === 0) continue;
210
+ const eq = topLevelIndexOf(trimmed, "=");
211
+ if (eq === -1) continue; // positional args are not part of the convention
212
+ const key = trimmed.slice(0, eq).trim();
213
+ if (!/^[A-Za-z_]\w*$/.test(key)) continue;
214
+ out[key] = parsePyValue(trimmed.slice(eq + 1).trim());
215
+ }
216
+ return out;
217
+ }
218
+
219
+ function parsePyValue(raw: string): unknown {
220
+ const t = raw.trim();
221
+ if (t.length === 0) return "";
222
+ if (t === "True" || t === "true") return true;
223
+ if (t === "False" || t === "false") return false;
224
+ if (t === "None" || t === "null") return null;
225
+ const prefix = stringPrefixLength(t);
226
+ if (prefix !== undefined) return decodeString(t);
227
+ const first = t[0]!;
228
+ if (first === "[") return parseList(t);
229
+ if (first === "{") return parseDict(t);
230
+ if (/^[+-]?(\d|\.)/.test(t)) {
231
+ const num = Number(t);
232
+ if (!Number.isNaN(num)) return num;
233
+ }
234
+ return t;
235
+ }
236
+
237
+ function parseList(t: string): unknown[] {
238
+ const inner = t.slice(1, t.endsWith("]") ? t.length - 1 : t.length);
239
+ return splitTopLevel(stripComments(inner), ",")
240
+ .map(part => part.trim())
241
+ .filter(part => part.length > 0)
242
+ .map(parsePyValue);
243
+ }
244
+
245
+ function parseDict(t: string): Record<string, unknown> {
246
+ const inner = t.slice(1, t.endsWith("}") ? t.length - 1 : t.length);
247
+ const out: Record<string, unknown> = {};
248
+ for (const segment of splitTopLevel(stripComments(inner), ",")) {
249
+ const trimmed = segment.trim();
250
+ if (trimmed.length === 0) continue;
251
+ const colon = topLevelIndexOf(trimmed, ":");
252
+ if (colon === -1) continue;
253
+ const keyRaw = trimmed.slice(0, colon).trim();
254
+ const key = stringPrefixLength(keyRaw) !== undefined ? decodeString(keyRaw) : keyRaw;
255
+ out[key] = parsePyValue(trimmed.slice(colon + 1).trim());
256
+ }
257
+ return out;
258
+ }
259
+
260
+ function decodeString(t: string): string {
261
+ const prefix = stringPrefixLength(t) ?? 0;
262
+ const raw = t.slice(0, prefix).toLowerCase().includes("r");
263
+ const quote = t[prefix]!;
264
+ const triple = quote + quote + quote;
265
+ if (t.startsWith(triple, prefix) && t.length >= prefix + 6 && t.endsWith(triple)) {
266
+ const inner = t.slice(prefix + 3, t.length - 3);
267
+ return raw ? inner : unescapePythonString(inner);
268
+ }
269
+ const inner = t.endsWith(quote) && t.length >= prefix + 2 ? t.slice(prefix + 1, t.length - 1) : t.slice(prefix + 1);
270
+ return raw ? inner : unescapePythonString(inner);
271
+ }
272
+
273
+ function stringPrefixLength(t: string): number | undefined {
274
+ for (const len of [2, 1, 0]) {
275
+ const prefix = t.slice(0, len).toLowerCase();
276
+ if (
277
+ (prefix === "" || prefix === "r" || prefix === "u" || prefix === "b" || prefix === "br" || prefix === "rb") &&
278
+ (t[len] === '"' || t[len] === "'")
279
+ ) {
280
+ return len;
281
+ }
282
+ }
283
+ return undefined;
284
+ }
285
+
286
+ function unescapePythonString(s: string): string {
287
+ if (!s.includes("\\")) return s;
288
+ let out = "";
289
+ let i = 0;
290
+ while (i < s.length) {
291
+ const ch = s[i]!;
292
+ if (ch !== "\\") {
293
+ out += ch;
294
+ i++;
295
+ continue;
296
+ }
297
+ const next = s[i + 1];
298
+ if (next && /^[0-7]$/.test(next)) {
299
+ const octal = /^[0-7]{1,3}/.exec(s.slice(i + 1))![0];
300
+ out += String.fromCharCode(parseInt(octal, 8));
301
+ i += octal.length + 1;
302
+ continue;
303
+ }
304
+ switch (next) {
305
+ case "n":
306
+ out += "\n";
307
+ i += 2;
308
+ break;
309
+ case "t":
310
+ out += "\t";
311
+ i += 2;
312
+ break;
313
+ case "r":
314
+ out += "\r";
315
+ i += 2;
316
+ break;
317
+ case "\\":
318
+ out += "\\";
319
+ i += 2;
320
+ break;
321
+ case "'":
322
+ out += "'";
323
+ i += 2;
324
+ break;
325
+ case '"':
326
+ out += '"';
327
+ i += 2;
328
+ break;
329
+ case "0":
330
+ out += "\0";
331
+ i += 2;
332
+ break;
333
+ case "x": {
334
+ const hex = s.slice(i + 2, i + 4);
335
+ if (/^[0-9a-fA-F]{2}$/.test(hex)) {
336
+ out += String.fromCharCode(parseInt(hex, 16));
337
+ i += 4;
338
+ } else {
339
+ out += "x";
340
+ i += 2;
341
+ }
342
+ break;
343
+ }
344
+ case "u": {
345
+ const hex = s.slice(i + 2, i + 6);
346
+ if (/^[0-9a-fA-F]{4}$/.test(hex)) {
347
+ out += String.fromCharCode(parseInt(hex, 16));
348
+ i += 6;
349
+ } else {
350
+ out += "u";
351
+ i += 2;
352
+ }
353
+ break;
354
+ }
355
+ case "U": {
356
+ const hex = s.slice(i + 2, i + 10);
357
+ if (/^[0-9a-fA-F]{8}$/.test(hex)) {
358
+ out += String.fromCodePoint(parseInt(hex, 16));
359
+ i += 10;
360
+ } else {
361
+ out += "U";
362
+ i += 2;
363
+ }
364
+ break;
365
+ }
366
+ case undefined:
367
+ out += "\\";
368
+ i += 1;
369
+ break;
370
+ default:
371
+ out += next;
372
+ i += 2;
373
+ break;
374
+ }
375
+ }
376
+ return out;
377
+ }
378
+
379
+ /** Split on `sep` at bracket depth 0, skipping string literals. */
380
+ function splitTopLevel(text: string, sep: string): string[] {
381
+ const parts: string[] = [];
382
+ let depth = 0;
383
+ let start = 0;
384
+ let i = 0;
385
+ const n = text.length;
386
+ while (i < n) {
387
+ const ch = text[i]!;
388
+ if (ch === '"' || ch === "'") {
389
+ i = skipString(text, i);
390
+ continue;
391
+ }
392
+ if (ch === "#") {
393
+ i = skipComment(text, i);
394
+ continue;
395
+ }
396
+ if (ch === "(" || ch === "[" || ch === "{") depth++;
397
+ else if (ch === ")" || ch === "]" || ch === "}") depth--;
398
+ else if (depth === 0 && ch === sep) {
399
+ parts.push(text.slice(start, i));
400
+ start = i + 1;
401
+ }
402
+ i++;
403
+ }
404
+ parts.push(text.slice(start));
405
+ return parts;
406
+ }
407
+
408
+ /** First index of `ch` at bracket depth 0, skipping string literals. */
409
+ function topLevelIndexOf(text: string, ch: string): number {
410
+ let depth = 0;
411
+ let i = 0;
412
+ const n = text.length;
413
+ while (i < n) {
414
+ const c = text[i]!;
415
+ if (c === '"' || c === "'") {
416
+ i = skipString(text, i);
417
+ continue;
418
+ }
419
+ if (c === "#") {
420
+ i = skipComment(text, i);
421
+ continue;
422
+ }
423
+ if (c === "(" || c === "[" || c === "{") depth++;
424
+ else if (c === ")" || c === "]" || c === "}") depth--;
425
+ else if (depth === 0 && c === ch) return i;
426
+ i++;
427
+ }
428
+ return -1;
429
+ }
430
+
431
+ const grammar: Grammar = {
432
+ syntax: "gemini",
433
+ prompt: grammarPrompt,
434
+ createScanner: () => new GeminiInbandScanner(),
435
+ renderToolCall: renderGeminiInvocation,
436
+ renderAssistantToolCalls: renderGeminiToolCalls,
437
+ renderToolResults: renderGeminiToolResults,
438
+ };
439
+
440
+ export default grammar;
@@ -0,0 +1,23 @@
1
+ ## Format guide
2
+
3
+ Emit each tool call as one `<|tool_call>` block. The body is `call:NAME{key:value,...}`; wrap every string value in the `<|"|>` token:
4
+
5
+ ```text
6
+ <|tool_call>call:function_name{path:<|"|>src/a.ts<|"|>,count:2}<tool_call|>
7
+ ```
8
+
9
+ Non-string values are bare: numbers (`2`), `true`/`false`, `null`, lists `[<|"|>a<|"|>,<|"|>b<|"|>]`, and nested objects `{k:<|"|>v<|"|>}`.
10
+
11
+ Tool results arrive later in matching `<|tool_response>` blocks:
12
+
13
+ ```text
14
+ <|tool_response>response:function_name{output:<|"|>verbatim result<|"|>}<tool_response|>
15
+ ```
16
+
17
+ ## Rules
18
+
19
+ - `NAME` MUST match a listed function; arguments are `key:value` pairs separated by commas.
20
+ - Multiple calls = consecutive `<|tool_call>...<tool_call|>` blocks; keep prose outside them.
21
+ - The closer is `<tool_call|>` (pipe on the right), not `</tool_call>` or `<|tool_call>`.
22
+ - Read each `<|tool_response>` block in call order. NEVER write a `<|tool_response>` block yourself.
23
+ - After emitting your tool calls, YOU MUST STOP AND HALT.
@@ -0,0 +1,237 @@
1
+ import { mintToolCallId, partialSuffixOverlapAny } from "./coercion";
2
+ import grammarPrompt from "./gemma.md" with { type: "text" };
3
+ import { renderGemmaInvocation, renderGemmaToolCalls, renderGemmaToolResults } from "./rendering";
4
+ import type { Grammar, InbandScanEvent, InbandScanner } from "./types";
5
+
6
+ const CALL_OPEN = "<|tool_call>";
7
+ const CALL_CLOSE = "<tool_call|>";
8
+ const STRING = '<|"|>';
9
+ const OPEN_TAGS = [CALL_OPEN] as const;
10
+ const CALL_HEAD = /^call:\s*([A-Za-z_]\w*)\s*\{/;
11
+
12
+ type State = "outside" | "tool";
13
+
14
+ interface ParsedCall {
15
+ name: string;
16
+ arguments: Record<string, unknown>;
17
+ }
18
+
19
+ /**
20
+ * Scanner for the Gemma 4 token-delimited tool-calling convention (see
21
+ * `docs/toolconv/gemma.md`). Each call is one `<|tool_call>call:NAME{…}<tool_call|>`
22
+ * block whose argument list is `key:value` pairs; string values are wrapped in
23
+ * the `<|"|>` token rather than ASCII quotes, so splitting must skip those spans.
24
+ */
25
+ export class GemmaInbandScanner implements InbandScanner {
26
+ #buffer = "";
27
+ #state: State = "outside";
28
+
29
+ feed(text: string): InbandScanEvent[] {
30
+ if (text.length === 0) return [];
31
+ this.#buffer += text;
32
+ return this.#consume(false);
33
+ }
34
+
35
+ flush(): InbandScanEvent[] {
36
+ return this.#consume(true);
37
+ }
38
+
39
+ #consume(final: boolean): InbandScanEvent[] {
40
+ const events: InbandScanEvent[] = [];
41
+ while (this.#buffer.length > 0) {
42
+ if (this.#state === "outside") {
43
+ this.#consumeOutside(final, events);
44
+ if (this.#state === "outside") break;
45
+ continue;
46
+ }
47
+ this.#consumeTool(final, events);
48
+ if (this.#state === "tool") break;
49
+ }
50
+ return events;
51
+ }
52
+
53
+ #consumeOutside(final: boolean, events: InbandScanEvent[]): void {
54
+ const open = this.#buffer.indexOf(CALL_OPEN);
55
+ if (open === -1) {
56
+ const hold = final ? 0 : partialSuffixOverlapAny(this.#buffer, OPEN_TAGS);
57
+ const emit = this.#buffer.slice(0, this.#buffer.length - hold);
58
+ if (emit.length > 0) events.push({ type: "text", text: emit });
59
+ this.#buffer = this.#buffer.slice(this.#buffer.length - hold);
60
+ return;
61
+ }
62
+ if (open > 0) events.push({ type: "text", text: this.#buffer.slice(0, open) });
63
+ this.#buffer = this.#buffer.slice(open + CALL_OPEN.length);
64
+ this.#state = "tool";
65
+ }
66
+
67
+ #consumeTool(final: boolean, events: InbandScanEvent[]): void {
68
+ const close = findCallClose(this.#buffer);
69
+ if (close === -1) {
70
+ if (final) {
71
+ this.#buffer = "";
72
+ this.#state = "outside";
73
+ }
74
+ return;
75
+ }
76
+ const body = this.#buffer.slice(0, close);
77
+ const parsed = parseGemmaCall(body);
78
+ if (parsed) {
79
+ const id = mintToolCallId();
80
+ events.push({ type: "toolStart", id, name: parsed.name });
81
+ events.push({
82
+ type: "toolEnd",
83
+ id,
84
+ name: parsed.name,
85
+ arguments: parsed.arguments,
86
+ rawBlock: `${CALL_OPEN}${body}${CALL_CLOSE}`,
87
+ });
88
+ }
89
+ this.#buffer = this.#buffer.slice(close + CALL_CLOSE.length);
90
+ this.#state = "outside";
91
+ }
92
+ }
93
+
94
+ function parseGemmaCall(body: string): ParsedCall | undefined {
95
+ const trimmed = body.trim();
96
+ const head = CALL_HEAD.exec(trimmed);
97
+ if (!head) return undefined;
98
+ const braceStart = head[0].length - 1;
99
+ const end = matchDelim(trimmed, braceStart, "{", "}");
100
+ const argsText = end === -1 ? trimmed.slice(braceStart + 1) : trimmed.slice(braceStart + 1, end);
101
+ return { name: head[1]!, arguments: parseGemmaArgs(argsText) };
102
+ }
103
+
104
+ function parseGemmaArgs(text: string): Record<string, unknown> {
105
+ const out: Record<string, unknown> = {};
106
+ for (const segment of splitTopLevel(text, ",")) {
107
+ const trimmed = segment.trim();
108
+ if (trimmed.length === 0) continue;
109
+ const colon = topLevelIndexOf(trimmed, ":");
110
+ if (colon === -1) continue;
111
+ const key = trimmed.slice(0, colon).trim();
112
+ if (!/^[A-Za-z_]\w*$/.test(key)) continue;
113
+ out[key] = parseGemmaValue(trimmed.slice(colon + 1).trim());
114
+ }
115
+ return out;
116
+ }
117
+
118
+ function parseGemmaValue(raw: string): unknown {
119
+ const t = raw.trim();
120
+ if (t.startsWith(STRING)) {
121
+ const close = t.indexOf(STRING, STRING.length);
122
+ return close === -1 ? t.slice(STRING.length) : t.slice(STRING.length, close);
123
+ }
124
+ if (t.startsWith("[")) {
125
+ const end = matchDelim(t, 0, "[", "]");
126
+ const inner = end === -1 ? t.slice(1) : t.slice(1, end);
127
+ return splitTopLevel(inner, ",")
128
+ .map(part => part.trim())
129
+ .filter(part => part.length > 0)
130
+ .map(parseGemmaValue);
131
+ }
132
+ if (t.startsWith("{")) {
133
+ const end = matchDelim(t, 0, "{", "}");
134
+ return parseGemmaArgs(end === -1 ? t.slice(1) : t.slice(1, end));
135
+ }
136
+ if (t === "true") return true;
137
+ if (t === "false") return false;
138
+ if (t === "null" || t === "none" || t === "None") return null;
139
+ if (/^[+-]?(\d|\.)/.test(t)) {
140
+ const num = Number(t);
141
+ if (!Number.isNaN(num)) return num;
142
+ }
143
+ return t;
144
+ }
145
+
146
+ /** Index just past the `<|"|>`-delimited string starting at `i`. */
147
+ function skipGemmaString(text: string, i: number): number {
148
+ const close = text.indexOf(STRING, i + STRING.length);
149
+ return close === -1 ? text.length : close + STRING.length;
150
+ }
151
+
152
+ function findCallClose(text: string): number {
153
+ let i = 0;
154
+ const n = text.length;
155
+ while (i < n) {
156
+ if (text.startsWith(STRING, i)) {
157
+ i = skipGemmaString(text, i);
158
+ continue;
159
+ }
160
+ if (text.startsWith(CALL_CLOSE, i)) return i;
161
+ i++;
162
+ }
163
+ return -1;
164
+ }
165
+
166
+ /** Index of the `close` delimiter matching `open` at `openIndex`, skipping strings. */
167
+ function matchDelim(text: string, openIndex: number, open: string, close: string): number {
168
+ let depth = 0;
169
+ let i = openIndex;
170
+ const n = text.length;
171
+ while (i < n) {
172
+ if (text.startsWith(STRING, i)) {
173
+ i = skipGemmaString(text, i);
174
+ continue;
175
+ }
176
+ const ch = text[i]!;
177
+ if (ch === open) depth++;
178
+ else if (ch === close && --depth === 0) return i;
179
+ i++;
180
+ }
181
+ return -1;
182
+ }
183
+
184
+ /** Split on `sep` at bracket depth 0, skipping `<|"|>` string spans. */
185
+ function splitTopLevel(text: string, sep: string): string[] {
186
+ const parts: string[] = [];
187
+ let depth = 0;
188
+ let start = 0;
189
+ let i = 0;
190
+ const n = text.length;
191
+ while (i < n) {
192
+ if (text.startsWith(STRING, i)) {
193
+ i = skipGemmaString(text, i);
194
+ continue;
195
+ }
196
+ const ch = text[i]!;
197
+ if (ch === "{" || ch === "[" || ch === "(") depth++;
198
+ else if (ch === "}" || ch === "]" || ch === ")") depth--;
199
+ else if (depth === 0 && ch === sep) {
200
+ parts.push(text.slice(start, i));
201
+ start = i + 1;
202
+ }
203
+ i++;
204
+ }
205
+ parts.push(text.slice(start));
206
+ return parts;
207
+ }
208
+
209
+ /** First index of `ch` at bracket depth 0, skipping `<|"|>` string spans. */
210
+ function topLevelIndexOf(text: string, ch: string): number {
211
+ let depth = 0;
212
+ let i = 0;
213
+ const n = text.length;
214
+ while (i < n) {
215
+ if (text.startsWith(STRING, i)) {
216
+ i = skipGemmaString(text, i);
217
+ continue;
218
+ }
219
+ const c = text[i]!;
220
+ if (c === "{" || c === "[" || c === "(") depth++;
221
+ else if (c === "}" || c === "]" || c === ")") depth--;
222
+ else if (depth === 0 && c === ch) return i;
223
+ i++;
224
+ }
225
+ return -1;
226
+ }
227
+
228
+ const grammar: Grammar = {
229
+ syntax: "gemma",
230
+ prompt: grammarPrompt,
231
+ createScanner: () => new GemmaInbandScanner(),
232
+ renderToolCall: renderGemmaInvocation,
233
+ renderAssistantToolCalls: renderGemmaToolCalls,
234
+ renderToolResults: renderGemmaToolResults,
235
+ };
236
+
237
+ export default grammar;
@@ -20,6 +20,8 @@ const RESPONSE_OPEN_TOKENS: Record<ToolCallSyntax, readonly string[]> = {
20
20
  harmony: ["<|start|>functions."],
21
21
  pi: ["<tool_response>"],
22
22
  qwen3: ["<tool_response>"],
23
+ gemini: ["```tool_outputs"],
24
+ gemma: ["<|tool_response>"],
23
25
  };
24
26
 
25
27
  function firstTokenIndex(text: string, tokens: readonly string[]): number {
@@ -84,6 +86,24 @@ export function wrapInbandToolStream(
84
86
  return;
85
87
  }
86
88
  break;
89
+ case "toolcall_start": {
90
+ // Provider emitted a native structured tool call (e.g. Gemini via
91
+ // OpenRouter still returns `functionCall` parts even when owned mode
92
+ // sends no `tools`). Forward the native lifecycle live so the UI
93
+ // streams it; otherwise the turn loses its only actionable content
94
+ // and the loop retries forever on a reasoning-only message. The
95
+ // projector ignores nameless "ghost" parts and de-conflicts with the
96
+ // in-band channel.
97
+ const src = event.partial.content[event.contentIndex];
98
+ projector?.nativeToolStart(event.contentIndex, src?.type === "toolCall" ? src.name : "");
99
+ break;
100
+ }
101
+ case "toolcall_delta":
102
+ projector?.nativeToolDelta(event.contentIndex, event.delta);
103
+ break;
104
+ case "toolcall_end":
105
+ projector?.nativeToolEnd(event.contentIndex, event.toolCall);
106
+ break;
87
107
  case "done":
88
108
  projector ??= new InbandStreamProjector(out, tools, syntax, event.message, true);
89
109
  projector.finish(event.message, true);
@@ -113,6 +133,13 @@ class InbandStreamProjector {
113
133
  #fedLen = 0;
114
134
  #stopped = false;
115
135
  #responsePending = "";
136
+ // Provider-native tool calls forwarded live (e.g. Gemini still returns
137
+ // `functionCall` parts under owned mode), keyed by the inner stream's
138
+ // `contentIndex`. `#toolChannel` records which channel produced the turn's
139
+ // first real call so the other is dropped — no double-dispatch, and no
140
+ // guessing from emptiness. Nameless "ghost" parts never lock a channel.
141
+ #nativeBlocks = new Map<number, { index: number; block: ToolCall }>();
142
+ #toolChannel: "native" | "inband" | undefined;
116
143
 
117
144
  constructor(
118
145
  out: AssistantMessageEventStream,
@@ -140,6 +167,62 @@ class InbandStreamProjector {
140
167
  this.#partial.content.push(block);
141
168
  }
142
169
 
170
+ // Forward a native tool call's lifecycle live. `name` comes from the inner
171
+ // stream's partial (set at start for well-behaved providers). Empty `name`
172
+ // means a not-yet-identified or "ghost" call — skip until `nativeToolEnd`
173
+ // can confirm. Once the in-band channel owns the turn, native calls are
174
+ // dropped to avoid double-dispatch.
175
+ nativeToolStart(srcIndex: number, name: string): void {
176
+ if (this.#stopped || !name || this.#toolChannel === "inband") return;
177
+ this.#toolChannel = "native";
178
+ this.#closeText();
179
+ this.#closeThinking();
180
+ const block: ToolCall = { type: "toolCall", id: "", name, arguments: {} };
181
+ this.#partial.content.push(block);
182
+ const index = this.#partial.content.length - 1;
183
+ this.#nativeBlocks.set(srcIndex, { index, block });
184
+ if (this.#emitEvents) this.#out.push({ type: "toolcall_start", contentIndex: index, partial: this.#partial });
185
+ }
186
+
187
+ nativeToolDelta(srcIndex: number, delta: string): void {
188
+ if (this.#stopped) return;
189
+ const entry = this.#nativeBlocks.get(srcIndex);
190
+ if (!entry) return;
191
+ if (this.#emitEvents)
192
+ this.#out.push({ type: "toolcall_delta", contentIndex: entry.index, delta, partial: this.#partial });
193
+ }
194
+
195
+ nativeToolEnd(srcIndex: number, toolCall: ToolCall): void {
196
+ if (this.#stopped) return;
197
+ const entry = this.#nativeBlocks.get(srcIndex);
198
+ if (entry) {
199
+ Object.assign(entry.block, toolCall);
200
+ if (this.#emitEvents)
201
+ this.#out.push({
202
+ type: "toolcall_end",
203
+ contentIndex: entry.index,
204
+ toolCall: entry.block,
205
+ partial: this.#partial,
206
+ });
207
+ this.#nativeBlocks.delete(srcIndex);
208
+ return;
209
+ }
210
+ // Never streamed (name was empty at start). Salvage a real call whose name
211
+ // only arrived now; drop nameless ghosts and anything the in-band channel
212
+ // already claimed.
213
+ if (!toolCall.name || this.#toolChannel === "inband") return;
214
+ this.#toolChannel = "native";
215
+ this.#closeText();
216
+ this.#closeThinking();
217
+ const block: ToolCall = { ...toolCall };
218
+ this.#partial.content.push(block);
219
+ const index = this.#partial.content.length - 1;
220
+ if (this.#emitEvents) {
221
+ this.#out.push({ type: "toolcall_start", contentIndex: index, partial: this.#partial });
222
+ this.#out.push({ type: "toolcall_end", contentIndex: index, toolCall: block, partial: this.#partial });
223
+ }
224
+ }
225
+
143
226
  text(delta: string): boolean {
144
227
  if (this.#stopped) return true;
145
228
  this.#fedLen += delta.length;
@@ -281,6 +364,9 @@ class InbandStreamProjector {
281
364
  }
282
365
 
283
366
  #beginTool(event: Extract<InbandScanEvent, { type: "toolStart" }>): void {
367
+ // Native owns the turn → drop the in-band call to avoid double-dispatch.
368
+ if (this.#toolChannel === "native") return;
369
+ this.#toolChannel = "inband";
284
370
  this.#closeText();
285
371
  this.#closeThinking();
286
372
  if (this.#toolBlocks.has(event.id)) return;
@@ -212,3 +212,102 @@ function escapeXmlAttr(value: string): string {
212
212
  function escapeXmlText(value: string): string {
213
213
  return value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;");
214
214
  }
215
+
216
+ // --- Gemini: Pythonic `tool_code` / `default_api` convention ---
217
+
218
+ const GEMINI_CODE_OPEN = "```tool_code";
219
+ const GEMINI_OUTPUT_OPEN = "```tool_outputs";
220
+ const GEMINI_FENCE = "```";
221
+
222
+ export function renderGeminiInvocation(call: ToolCall, options: GrammarRenderOptions = {}): string {
223
+ const kwargs = Object.entries(call.arguments)
224
+ .map(([key, value]) => `${key}=${pyValue(value)}`)
225
+ .join(", ");
226
+ return options.example ? `${call.name}(${kwargs})` : `default_api.${call.name}(${kwargs})`;
227
+ }
228
+
229
+ export function renderGeminiToolCalls(calls: readonly ToolCall[], options: GrammarRenderOptions = {}): string {
230
+ // One call renders bare; parallel calls render as a Python list `[a, b]`.
231
+ const body =
232
+ calls.length === 1
233
+ ? renderGeminiInvocation(calls[0]!, options)
234
+ : `[${calls.map(call => renderGeminiInvocation(call, options)).join(", ")}]`;
235
+ // Examples show the bare call; the live wire form fences it as `tool_code`.
236
+ return options.example ? body : `${GEMINI_CODE_OPEN}\n${body}\n${GEMINI_FENCE}`;
237
+ }
238
+
239
+ export function renderGeminiToolResults(results: readonly GrammarToolResult[]): string {
240
+ return results.map(result => `${GEMINI_OUTPUT_OPEN}\n${result.text}\n${GEMINI_FENCE}`).join("\n");
241
+ }
242
+
243
+ function pyValue(value: unknown): string {
244
+ if (value === null || value === undefined) return "None";
245
+ if (typeof value === "boolean") return value ? "True" : "False";
246
+ if (typeof value === "number") return Number.isFinite(value) ? String(value) : pyString(String(value));
247
+ if (typeof value === "string") return pyString(value);
248
+ if (Array.isArray(value)) return `[${value.map(pyValue).join(", ")}]`;
249
+ if (typeof value === "object") {
250
+ const entries = Object.entries(value as Record<string, unknown>);
251
+ return `{${entries.map(([key, val]) => `${pyString(key)}: ${pyValue(val)}`).join(", ")}}`;
252
+ }
253
+ return pyString(String(value));
254
+ }
255
+
256
+ function pyString(value: string): string {
257
+ const escaped = value
258
+ .replaceAll("\\", "\\\\")
259
+ .replaceAll('"', '\\"')
260
+ .replaceAll("\n", "\\n")
261
+ .replaceAll("\r", "\\r")
262
+ .replaceAll("\t", "\\t");
263
+ return `"${escaped}"`;
264
+ }
265
+
266
+ // --- Gemma 4: token-delimited `call:NAME{…}` convention ---
267
+
268
+ const GEMMA_CALL_OPEN = "<|tool_call>";
269
+ const GEMMA_CALL_CLOSE = "<tool_call|>";
270
+ const GEMMA_RESPONSE_OPEN = "<|tool_response>";
271
+ const GEMMA_RESPONSE_CLOSE = "<tool_response|>";
272
+ const GEMMA_STRING = '<|"|>';
273
+
274
+ export function renderGemmaInvocation(call: ToolCall, _options: GrammarRenderOptions = {}): string {
275
+ const args = Object.entries(call.arguments)
276
+ .map(([key, value]) => `${key}:${gemmaValue(value)}`)
277
+ .join(",");
278
+ return `${GEMMA_CALL_OPEN}call:${call.name}{${args}}${GEMMA_CALL_CLOSE}`;
279
+ }
280
+
281
+ export function renderGemmaToolCalls(calls: readonly ToolCall[], options: GrammarRenderOptions = {}): string {
282
+ return calls.map(call => renderGemmaInvocation(call, options)).join("");
283
+ }
284
+
285
+ export function renderGemmaToolResults(results: readonly GrammarToolResult[]): string {
286
+ return results
287
+ .map(
288
+ result =>
289
+ `${GEMMA_RESPONSE_OPEN}response:${result.name}{output:${gemmaValue(parseMaybeJson(result.text))}}${GEMMA_RESPONSE_CLOSE}`,
290
+ )
291
+ .join("");
292
+ }
293
+
294
+ function gemmaValue(value: unknown): string {
295
+ if (value === null || value === undefined) return "null";
296
+ if (typeof value === "boolean") return value ? "true" : "false";
297
+ if (typeof value === "number") return String(value);
298
+ if (typeof value === "string") return `${GEMMA_STRING}${value}${GEMMA_STRING}`;
299
+ if (Array.isArray(value)) return `[${value.map(gemmaValue).join(",")}]`;
300
+ if (typeof value === "object") {
301
+ const entries = Object.entries(value as Record<string, unknown>);
302
+ return `{${entries.map(([key, val]) => `${key}:${gemmaValue(val)}`).join(",")}}`;
303
+ }
304
+ return `${GEMMA_STRING}${String(value)}${GEMMA_STRING}`;
305
+ }
306
+
307
+ function parseMaybeJson(text: string): unknown {
308
+ try {
309
+ return JSON.parse(text) as unknown;
310
+ } catch {
311
+ return text;
312
+ }
313
+ }
@@ -0,0 +1,6 @@
1
+ import type { ProviderDefinition } from "./types";
2
+
3
+ export const azureProvider = {
4
+ id: "azure",
5
+ name: "Azure OpenAI",
6
+ } as const satisfies ProviderDefinition;
@@ -3,6 +3,7 @@ import { aimlApiProvider } from "./aimlapi";
3
3
  import { alibabaCodingPlanProvider } from "./alibaba-coding-plan";
4
4
  import { amazonBedrockProvider } from "./amazon-bedrock";
5
5
  import { anthropicProvider } from "./anthropic";
6
+ import { azureProvider } from "./azure";
6
7
  import { cerebrasProvider } from "./cerebras";
7
8
  import { cloudflareAiGatewayProvider } from "./cloudflare-ai-gateway";
8
9
  import { cursorProvider } from "./cursor";
@@ -68,6 +69,7 @@ import { zhipuCodingPlanProvider } from "./zhipu-coding-plan";
68
69
  * list for the loginable providers; non-login model providers are appended.
69
70
  */
70
71
  const ALL = [
72
+ azureProvider,
71
73
  openaiCodexProvider,
72
74
  anthropicProvider,
73
75
  zaiProvider,