@oh-my-pi/pi-ai 15.0.0 → 15.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,16 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.0.1] - 2026-05-14
6
+ ### Breaking Changes
7
+
8
+ - Increased the minimum Bun runtime version to `>=1.3.14` for the `@aws-?` package
9
+
10
+ ### Added
11
+
12
+ - Added `installH2Fetch` to patch `globalThis.fetch` so HTTPS requests attempt HTTP/2 over ALPN with automatic HTTP/1.1 fallback when HTTP/2 is unsupported
13
+ - Added priority service-tier traffic to the `premiumRequests` accounting on OpenAI and OpenAI Codex providers. Sending `serviceTier: "priority"` now increments `usage.premiumRequests` by 1 per request, matching the existing GitHub Copilot premium-request budget semantics so downstream consumers (e.g. the `omp stats` "Premium Reqs" card and `/usage`) reflect priority traffic alongside Copilot premium calls.
14
+
5
15
  ## [15.0.0] - 2026-05-13
6
16
 
7
17
  ### Added
@@ -2344,4 +2354,4 @@ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
2344
2354
 
2345
2355
  ## [0.9.4] - 2025-11-26
2346
2356
 
2347
- Initial release with multi-provider LLM support.
2357
+ Initial release with multi-provider LLM support.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "15.0.0",
4
+ "version": "15.0.1",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://github.com/can1357/oh-my-pi",
7
7
  "author": "Can Boluk",
@@ -46,8 +46,8 @@
46
46
  "@aws-sdk/credential-provider-node": "^3.972.39",
47
47
  "@bufbuild/protobuf": "^2.12.0",
48
48
  "@google/genai": "^1.52.0",
49
- "@oh-my-pi/pi-natives": "15.0.0",
50
- "@oh-my-pi/pi-utils": "15.0.0",
49
+ "@oh-my-pi/pi-natives": "15.0.1",
50
+ "@oh-my-pi/pi-utils": "15.0.1",
51
51
  "@sinclair/typebox": "^0.34.49",
52
52
  "@smithy/node-http-handler": "^4.6.1",
53
53
  "ajv": "^8.20.0",
@@ -58,10 +58,10 @@
58
58
  "zod": "4.4.3"
59
59
  },
60
60
  "devDependencies": {
61
- "@types/bun": "^1.3.13"
61
+ "@types/bun": "^1.3.14"
62
62
  },
63
63
  "engines": {
64
- "bun": ">=1.3.7"
64
+ "bun": ">=1.3.14"
65
65
  },
66
66
  "files": [
67
67
  "src",
package/src/index.ts CHANGED
@@ -37,6 +37,7 @@ export * from "./usage/zai";
37
37
  export * from "./utils/anthropic-auth";
38
38
  export * from "./utils/discovery";
39
39
  export * from "./utils/event-stream";
40
+ export * from "./utils/h2-fetch";
40
41
  export * from "./utils/overflow";
41
42
  export * from "./utils/retry";
42
43
  export * from "./utils/schema";
@@ -1,4 +1,4 @@
1
- import { abortableSleep } from "@oh-my-pi/pi-utils";
1
+ import { fetchWithRetry } from "@oh-my-pi/pi-utils";
2
2
  import type { ModelManagerOptions } from "../model-manager";
3
3
  import { Effort } from "../model-thinking";
4
4
  import type { ThinkingConfig } from "../types";
@@ -19,16 +19,7 @@ type OllamaShowResponse = {
19
19
  model_info?: Record<string, unknown>;
20
20
  };
21
21
 
22
- const MODEL_RETRY_DELAYS_MS = [2_000, 5_000, 10_000];
23
-
24
- async function fetchWithRetry(url: string, init: RequestInit): Promise<Response> {
25
- for (let attempt = 0; attempt < MODEL_RETRY_DELAYS_MS.length; attempt++) {
26
- const response = await fetch(url, init);
27
- if (response.ok || response.status < 500) return response;
28
- await abortableSleep(MODEL_RETRY_DELAYS_MS[attempt]!);
29
- }
30
- return fetch(url, init);
31
- }
22
+ const OLLAMA_RETRY_DELAYS_MS = [2_000, 5_000, 10_000];
32
23
 
33
24
  function trimTrailingSlash(value: string): string {
34
25
  return value.endsWith("/") ? value.slice(0, -1) : value;
@@ -109,6 +100,7 @@ export function ollamaCloudModelManagerOptions(
109
100
  const response = await fetchWithRetry(`${baseUrl}/api/tags`, {
110
101
  method: "GET",
111
102
  headers: createCloudHeaders(apiKey),
103
+ defaultDelayMs: OLLAMA_RETRY_DELAYS_MS,
112
104
  });
113
105
  if (!response.ok) {
114
106
  throw new Error(`HTTP ${response.status} from ${baseUrl}/api/tags`);
@@ -1,5 +1,6 @@
1
1
  import * as nodeCrypto from "node:crypto";
2
2
  import * as fs from "node:fs";
3
+ import { scheduler } from "node:timers/promises";
3
4
  import * as tls from "node:tls";
4
5
  import Anthropic, { type ClientOptions as AnthropicSdkClientOptions } from "@anthropic-ai/sdk";
5
6
  import type {
@@ -8,7 +9,14 @@ import type {
8
9
  MessageParam,
9
10
  RawMessageStreamEvent,
10
11
  } from "@anthropic-ai/sdk/resources/messages";
11
- import { $env, abortableSleep, isEnoent, readSseEvents } from "@oh-my-pi/pi-utils";
12
+ import {
13
+ $env,
14
+ extractHttpStatusFromError,
15
+ isEnoent,
16
+ isRetryableError,
17
+ isUnexpectedSocketCloseMessage,
18
+ readSseEvents,
19
+ } from "@oh-my-pi/pi-utils";
12
20
  import { hasOpus47ApiRestrictions, mapEffortToAnthropicAdaptiveEffort } from "../model-thinking";
13
21
  import { calculateCost } from "../models";
14
22
  import { getEnvApiKey, OUTPUT_FALLBACK_BUFFER } from "../stream";
@@ -48,12 +56,7 @@ import { getStreamFirstEventTimeoutMs, getStreamIdleTimeoutMs, iterateWithIdleTi
48
56
  import { parseJsonWithRepair, parseStreamingJson } from "../utils/json-parse";
49
57
  import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
50
58
  import { notifyProviderResponse } from "../utils/provider-response";
51
- import {
52
- extractHttpStatusFromError,
53
- isCopilotRetryableError,
54
- isRetryableError,
55
- isUnexpectedSocketCloseMessage,
56
- } from "../utils/retry";
59
+ import { isCopilotTransientModelError } from "../utils/retry";
57
60
  import { COMBINATOR_KEYS, NO_STRICT } from "../utils/schema";
58
61
  import { notifyRawSseEvent, wrapFetchForSseDebug } from "../utils/sse-debug";
59
62
  import {
@@ -844,7 +847,7 @@ function isProviderRetryableStreamEnvelopeError(error: unknown): boolean {
844
847
 
845
848
  export function isProviderRetryableError(error: unknown, provider?: string): boolean {
846
849
  if (!(error instanceof Error)) return false;
847
- if (provider === "github-copilot" && isCopilotRetryableError(error)) return true;
850
+ if (provider === "github-copilot" && isCopilotTransientModelError(error)) return true;
848
851
  const msg = error.message.toLowerCase();
849
852
  if (
850
853
  isUnexpectedSocketCloseMessage(msg) ||
@@ -1287,7 +1290,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1287
1290
  }
1288
1291
  providerRetryAttempt++;
1289
1292
  const delayMs = PROVIDER_BASE_DELAY_MS * 2 ** (providerRetryAttempt - 1);
1290
- await abortableSleep(delayMs, options?.signal);
1293
+ await scheduler.wait(delayMs, { signal: options?.signal });
1291
1294
  output.content.length = 0;
1292
1295
  output.responseId = undefined;
1293
1296
  output.errorMessage = strictFallbackErrorMessage;
@@ -6,17 +6,15 @@ import type {
6
6
  ResponseInput,
7
7
  } from "openai/resources/responses/responses";
8
8
  import { getEnvApiKey } from "../stream";
9
- import {
10
- type Api,
11
- type AssistantMessage,
12
- type Context,
13
- type Model,
14
- type ServiceTier,
15
- type StreamFunction,
16
- type StreamOptions,
17
- shouldSendServiceTier,
18
- type Tool,
19
- type ToolChoice,
9
+ import type {
10
+ AssistantMessage,
11
+ Context,
12
+ Model,
13
+ ServiceTier,
14
+ StreamFunction,
15
+ StreamOptions,
16
+ Tool,
17
+ ToolChoice,
20
18
  } from "../types";
21
19
  import { normalizeSystemPrompts } from "../utils";
22
20
  import { createAbortSourceTracker } from "../utils/abort";
@@ -33,8 +31,11 @@ import { mapToOpenAIResponsesToolChoice } from "../utils/tool-choice";
33
31
  import { normalizeOpenAIResponsesPromptCacheKey, supportsDeveloperRole } from "./openai-responses";
34
32
  import {
35
33
  appendResponsesToolResultMessages,
34
+ applyCommonResponsesSamplingParams,
35
+ applyResponsesReasoningParams,
36
36
  convertResponsesAssistantMessage,
37
37
  convertResponsesInputContent,
38
+ createInitialResponsesAssistantMessage,
38
39
  normalizeResponsesToolCallIdForTransform,
39
40
  processResponsesStream,
40
41
  } from "./openai-responses-shared";
@@ -101,23 +102,11 @@ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"
101
102
  let firstTokenTime: number | undefined;
102
103
  const deploymentName = resolveDeploymentName(model, options);
103
104
 
104
- const output: AssistantMessage = {
105
- role: "assistant",
106
- content: [],
107
- api: "azure-openai-responses" as Api,
108
- provider: model.provider,
109
- model: model.id,
110
- usage: {
111
- input: 0,
112
- output: 0,
113
- cacheRead: 0,
114
- cacheWrite: 0,
115
- totalTokens: 0,
116
- cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
117
- },
118
- stopReason: "stop",
119
- timestamp: Date.now(),
120
- };
105
+ const output: AssistantMessage = createInitialResponsesAssistantMessage(
106
+ "azure-openai-responses",
107
+ model.provider,
108
+ model.id,
109
+ );
121
110
  let rawRequestDump: RawHttpRequestDump | undefined;
122
111
  const abortTracker = createAbortSourceTracker(options?.signal);
123
112
  const firstEventTimeoutAbortError = new Error(AZURE_OPENAI_RESPONSES_FIRST_EVENT_TIMEOUT_MESSAGE);
@@ -279,31 +268,7 @@ function buildParams(
279
268
  prompt_cache_key: normalizeOpenAIResponsesPromptCacheKey(options?.sessionId),
280
269
  };
281
270
 
282
- if (options?.maxTokens) {
283
- params.max_output_tokens = options?.maxTokens;
284
- }
285
-
286
- if (options?.temperature !== undefined) {
287
- params.temperature = options?.temperature;
288
- }
289
- if (options?.topP !== undefined) {
290
- params.top_p = options.topP;
291
- }
292
- if (options?.topK !== undefined) {
293
- params.top_k = options.topK;
294
- }
295
- if (options?.minP !== undefined) {
296
- params.min_p = options.minP;
297
- }
298
- if (options?.presencePenalty !== undefined) {
299
- params.presence_penalty = options.presencePenalty;
300
- }
301
- if (options?.repetitionPenalty !== undefined) {
302
- params.repetition_penalty = options.repetitionPenalty;
303
- }
304
- if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
305
- params.service_tier = options.serviceTier;
306
- }
271
+ applyCommonResponsesSamplingParams(params, options, model.provider);
307
272
 
308
273
  if (context.tools) {
309
274
  params.tools = convertTools(context.tools);
@@ -312,36 +277,7 @@ function buildParams(
312
277
  }
313
278
  }
314
279
 
315
- if (model.reasoning) {
316
- // Always request encrypted reasoning content so reasoning items can be
317
- // replayed in multi-turn conversations when store is false (items aren't
318
- // persisted server-side, so we must include the full content).
319
- // See: https://github.com/can1357/oh-my-pi/issues/41
320
- params.include = ["reasoning.encrypted_content"];
321
-
322
- if (options?.reasoning || options?.reasoningSummary !== undefined) {
323
- const reasoningParams: NonNullable<typeof params.reasoning> = {
324
- effort: options?.reasoning || "medium",
325
- };
326
- if (options?.reasoningSummary !== null) {
327
- reasoningParams.summary = options?.reasoningSummary || "auto";
328
- }
329
- params.reasoning = reasoningParams;
330
- } else {
331
- if (model.name.toLowerCase().startsWith("gpt-5")) {
332
- // Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
333
- messages.push({
334
- role: "developer",
335
- content: [
336
- {
337
- type: "input_text",
338
- text: "# Juice: 0 !important",
339
- },
340
- ],
341
- });
342
- }
343
- }
344
- }
280
+ applyResponsesReasoningParams(params, model, options, messages);
345
281
 
346
282
  return params;
347
283
  }
@@ -4,8 +4,9 @@
4
4
  * Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.
5
5
  */
6
6
  import { createHash, randomBytes, randomUUID } from "node:crypto";
7
+ import { scheduler } from "node:timers/promises";
7
8
  import type { Content, FunctionCallingConfigMode, ThinkingConfig } from "@google/genai";
8
- import { abortableSleep, readSseJson } from "@oh-my-pi/pi-utils";
9
+ import { fetchWithRetry, readSseJson } from "@oh-my-pi/pi-utils";
9
10
  import { calculateCost } from "../models";
10
11
  import type {
11
12
  Api,
@@ -23,28 +24,27 @@ import { AssistantMessageEventStream } from "../utils/event-stream";
23
24
  import { appendRawHttpRequestDumpFor400, type RawHttpRequestDump, withHttpStatus } from "../utils/http-inspector";
24
25
  import { refreshAntigravityToken } from "../utils/oauth/google-antigravity";
25
26
  import { refreshGoogleCloudToken } from "../utils/oauth/google-gemini-cli";
26
- import { extractHttpStatusFromError } from "../utils/retry";
27
27
  import { sanitizeSchemaForCCA } from "../utils/schema";
28
- import {
29
- ANTIGRAVITY_SYSTEM_INSTRUCTION,
30
- extractRetryDelay,
31
- getAntigravityUserAgent,
32
- getGeminiCliHeaders,
33
- } from "./google-gemini-headers";
28
+ import { ANTIGRAVITY_SYSTEM_INSTRUCTION, getAntigravityUserAgent, getGeminiCliHeaders } from "./google-gemini-headers";
34
29
  import {
35
30
  convertMessages,
36
31
  convertTools,
32
+ type GoogleThinkingLevel,
37
33
  isThinkingPart,
38
34
  mapStopReasonString,
39
35
  mapToolChoice,
36
+ nextToolCallId,
37
+ pushBlockEndEvent,
38
+ pushToolCallEvents,
40
39
  retainThoughtSignature,
40
+ startTextOrThinkingBlock,
41
41
  } from "./google-shared";
42
42
 
43
43
  /**
44
- * Thinking level for Gemini 3 models.
45
- * Mirrors Google's ThinkingLevel enum values.
44
+ * Thinking level for Gemini 3 models. Re-exported from `google-shared` so existing
45
+ * `import { GoogleThinkingLevel } from "./google-gemini-cli"` callers keep working.
46
46
  */
47
- export type GoogleThinkingLevel = "THINKING_LEVEL_UNSPECIFIED" | "MINIMAL" | "LOW" | "MEDIUM" | "HIGH";
47
+ export type { GoogleThinkingLevel };
48
48
 
49
49
  export interface GoogleGeminiCliOptions extends StreamOptions {
50
50
  toolChoice?: "auto" | "none" | "any";
@@ -72,15 +72,11 @@ const ANTIGRAVITY_ENDPOINT_FALLBACKS = [ANTIGRAVITY_DAILY_ENDPOINT, ANTIGRAVITY_
72
72
 
73
73
  export {
74
74
  ANTIGRAVITY_SYSTEM_INSTRUCTION,
75
- extractRetryDelay,
76
75
  getAntigravityUserAgent,
77
76
  getGeminiCliHeaders,
78
77
  getGeminiCliUserAgent,
79
78
  } from "./google-gemini-headers";
80
79
 
81
- // Counter for generating unique tool call IDs
82
- let toolCallCounter = 0;
83
-
84
80
  // Retry configuration
85
81
  const MAX_RETRIES = 3;
86
82
  const BASE_DELAY_MS = 1000;
@@ -104,16 +100,6 @@ function shouldInjectAntigravitySystemInstruction(modelId: string): boolean {
104
100
  return normalized.includes("claude") || normalized.includes("gemini-3-pro-high");
105
101
  }
106
102
 
107
- /**
108
- * Check if an error is retryable (rate limit, server error, network error, etc.)
109
- */
110
- function isRetryableError(status: number, errorText: string): boolean {
111
- if (status === 429 || status === 500 || status === 502 || status === 503 || status === 504) {
112
- return true;
113
- }
114
- return /resource.?exhausted|rate.?limit|overloaded|service.?unavailable|other.?side.?closed/i.test(errorText);
115
- }
116
-
117
103
  /**
118
104
  * Extract a clean, user-friendly error message from Google API error response.
119
105
  * Parses JSON error responses and returns just the message field.
@@ -366,109 +352,26 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
366
352
  headers: requestHeaders,
367
353
  };
368
354
 
369
- // Fetch with retry logic for rate limits and transient errors
370
- let response: Response | undefined;
371
- let lastError: Error | undefined;
372
- let requestUrl: string | undefined;
373
- let rateLimitTimeSpent = 0;
374
-
375
- for (let attempt = 0; ; attempt++) {
376
- if (options?.signal?.aborted) {
377
- throw new Error("Request was aborted");
378
- }
379
-
380
- try {
381
- const endpoint = endpoints[Math.min(attempt, endpoints.length - 1)];
382
- requestUrl = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
383
- response = await fetch(requestUrl, {
384
- method: "POST",
385
- headers: requestHeaders,
386
- body: requestBodyJson,
387
- signal: options?.signal,
388
- });
389
-
390
- if (response.ok) {
391
- break; // Success, exit retry loop
392
- }
393
-
394
- const errorText = await response.text();
395
-
396
- // Handle 429 rate limits with time budget
397
- if (response.status === 429) {
398
- if (/quota|exhausted/i.test(errorText)) {
399
- throw withHttpStatus(
400
- new Error(`Cloud Code Assist API error (429): ${extractErrorMessage(errorText)}`),
401
- 429,
402
- );
403
- }
404
- const serverDelay = extractRetryDelay(errorText, response);
405
- if (serverDelay && rateLimitTimeSpent + serverDelay <= RATE_LIMIT_BUDGET_MS) {
406
- rateLimitTimeSpent += serverDelay;
407
- await abortableSleep(serverDelay, options?.signal);
408
- continue;
409
- }
410
- // Fallback: use exponential backoff if no server delay, up to MAX_RETRIES
411
- if (!serverDelay && attempt < MAX_RETRIES) {
412
- await abortableSleep(BASE_DELAY_MS * 2 ** attempt, options?.signal);
413
- continue;
414
- }
415
- } else if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
416
- // Non-429 retryable errors use standard attempt cap
417
- const serverDelay = extractRetryDelay(errorText, response);
418
- const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
419
-
420
- // Check if server delay exceeds max allowed (default: 60s) for non-429 errors
421
- const maxDelayMs = options?.maxRetryDelayMs ?? 60000;
422
- if (maxDelayMs > 0 && serverDelay && serverDelay > maxDelayMs) {
423
- const delaySeconds = Math.ceil(serverDelay / 1000);
424
- throw withHttpStatus(
425
- new Error(
426
- `Server requested ${delaySeconds}s retry delay (max: ${Math.ceil(maxDelayMs / 1000)}s). ${extractErrorMessage(errorText)}`,
427
- ),
428
- response.status,
429
- );
430
- }
431
-
432
- await abortableSleep(delayMs, options?.signal);
433
- continue;
434
- }
435
-
436
- // Not retryable or budget exceeded
437
- throw withHttpStatus(
438
- new Error(`Cloud Code Assist API error (${response.status}): ${extractErrorMessage(errorText)}`),
439
- response.status,
440
- );
441
- } catch (error) {
442
- // Check for abort - fetch throws AbortError, our code throws "Request was aborted"
443
- if (error instanceof Error) {
444
- if (error.name === "AbortError" || error.message === "Request was aborted") {
445
- throw new Error("Request was aborted");
446
- }
447
- }
448
-
449
- // HTTP responses are handled inside the try block.
450
- // If we intentionally throw with status metadata, don't convert it into a network retry.
451
- if (extractHttpStatusFromError(error) !== undefined) {
452
- throw error;
453
- }
454
- // Extract detailed error message from fetch errors (Node includes cause)
455
- lastError = error instanceof Error ? error : new Error(String(error));
456
- if (lastError.message === "fetch failed" && lastError.cause instanceof Error) {
457
- lastError = new Error(`Network error: ${lastError.cause.message}`);
458
- }
459
- // Network errors are retryable
460
- if (attempt < MAX_RETRIES) {
461
- const delayMs = BASE_DELAY_MS * 2 ** attempt;
462
- await abortableSleep(delayMs, options?.signal);
463
- continue;
464
- }
465
- throw lastError;
466
- }
467
- }
468
-
469
- if (!response?.ok) {
470
- throw lastError ?? new Error("Failed to get response after retries");
355
+ const response = await fetchWithRetry(
356
+ attempt => `${endpoints[Math.min(attempt, endpoints.length - 1)]}/v1internal:streamGenerateContent?alt=sse`,
357
+ {
358
+ method: "POST",
359
+ headers: requestHeaders,
360
+ body: requestBodyJson,
361
+ signal: options?.signal,
362
+ maxAttempts: MAX_RETRIES + 1,
363
+ defaultDelayMs: attempt => BASE_DELAY_MS * 2 ** attempt,
364
+ maxDelayMs: options?.maxRetryDelayMs ?? RATE_LIMIT_BUDGET_MS,
365
+ },
366
+ );
367
+ if (!response.ok) {
368
+ const errorText = await response.text();
369
+ throw withHttpStatus(
370
+ new Error(`Cloud Code Assist API error (${response.status}): ${extractErrorMessage(errorText)}`),
371
+ response.status,
372
+ );
471
373
  }
374
+ const requestUrl = response.url;
472
375
 
473
376
  let started = false;
474
377
  const ensureStarted = () => {
@@ -525,37 +428,9 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
525
428
  (!isThinking && currentBlock.type !== "text")
526
429
  ) {
527
430
  if (currentBlock) {
528
- if (currentBlock.type === "text") {
529
- stream.push({
530
- type: "text_end",
531
- contentIndex: blocks.length - 1,
532
- content: currentBlock.text,
533
- partial: output,
534
- });
535
- } else {
536
- stream.push({
537
- type: "thinking_end",
538
- contentIndex: blockIndex(),
539
- content: currentBlock.thinking,
540
- partial: output,
541
- });
542
- }
543
- }
544
- if (isThinking) {
545
- currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
546
- output.content.push(currentBlock);
547
- ensureStarted();
548
- stream.push({
549
- type: "thinking_start",
550
- contentIndex: blockIndex(),
551
- partial: output,
552
- });
553
- } else {
554
- currentBlock = { type: "text", text: "" };
555
- output.content.push(currentBlock);
556
- ensureStarted();
557
- stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
431
+ pushBlockEndEvent(currentBlock, blockIndex(), output, stream);
558
432
  }
433
+ currentBlock = startTextOrThinkingBlock(isThinking, output, stream, ensureStarted);
559
434
  }
560
435
  if (currentBlock.type === "thinking") {
561
436
  currentBlock.thinking += part.text;
@@ -587,30 +462,14 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
587
462
  if (part.functionCall) {
588
463
  hasContent = true;
589
464
  if (currentBlock) {
590
- if (currentBlock.type === "text") {
591
- stream.push({
592
- type: "text_end",
593
- contentIndex: blockIndex(),
594
- content: currentBlock.text,
595
- partial: output,
596
- });
597
- } else {
598
- stream.push({
599
- type: "thinking_end",
600
- contentIndex: blockIndex(),
601
- content: currentBlock.thinking,
602
- partial: output,
603
- });
604
- }
465
+ pushBlockEndEvent(currentBlock, blockIndex(), output, stream);
605
466
  currentBlock = null;
606
467
  }
607
468
 
608
469
  const providedId = part.functionCall.id;
609
470
  const needsNewId =
610
471
  !providedId || output.content.some(b => b.type === "toolCall" && b.id === providedId);
611
- const toolCallId = needsNewId
612
- ? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
613
- : providedId;
472
+ const toolCallId = needsNewId ? nextToolCallId(part.functionCall.name || "tool") : providedId;
614
473
 
615
474
  const toolCall: ToolCall = {
616
475
  type: "toolCall",
@@ -622,19 +481,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
622
481
 
623
482
  output.content.push(toolCall);
624
483
  ensureStarted();
625
- stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
626
- stream.push({
627
- type: "toolcall_delta",
628
- contentIndex: blockIndex(),
629
- delta: JSON.stringify(toolCall.arguments),
630
- partial: output,
631
- });
632
- stream.push({
633
- type: "toolcall_end",
634
- contentIndex: blockIndex(),
635
- toolCall,
636
- partial: output,
637
- });
484
+ pushToolCallEvents(toolCall, blockIndex(), output, stream);
638
485
  }
639
486
  }
640
487
  }
@@ -671,21 +518,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
671
518
  }
672
519
 
673
520
  if (currentBlock) {
674
- if (currentBlock.type === "text") {
675
- stream.push({
676
- type: "text_end",
677
- contentIndex: blockIndex(),
678
- content: currentBlock.text,
679
- partial: output,
680
- });
681
- } else {
682
- stream.push({
683
- type: "thinking_end",
684
- contentIndex: blockIndex(),
685
- content: currentBlock.thinking,
686
- partial: output,
687
- });
688
- }
521
+ pushBlockEndEvent(currentBlock, blockIndex(), output, stream);
689
522
  }
690
523
 
691
524
  return hasContent;
@@ -702,7 +535,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
702
535
  if (emptyAttempt > 0) {
703
536
  const backoffMs = EMPTY_STREAM_BASE_DELAY_MS * 2 ** (emptyAttempt - 1);
704
537
  try {
705
- await abortableSleep(backoffMs, options?.signal);
538
+ await scheduler.wait(backoffMs, { signal: options?.signal });
706
539
  } catch {
707
540
  // Normalize AbortError to expected message for consistent error handling
708
541
  throw new Error("Request was aborted");