npm - @oh-my-pi/pi-ai - Versions diffs - 15.0.0 → 15.0.1 - Mend

@oh-my-pi/pi-ai 15.0.0 → 15.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/CHANGELOG.md +11 -1
package/package.json +5 -5
package/src/index.ts +1 -0
package/src/provider-models/ollama.ts +3 -11
package/src/providers/anthropic.ts +12 -9
package/src/providers/azure-openai-responses.ts +19 -83
package/src/providers/google-gemini-cli.ts +37 -204
package/src/providers/google-gemini-headers.ts +0 -100
package/src/providers/google-shared.ts +446 -4
package/src/providers/google-vertex.ts +19 -371
package/src/providers/google.ts +16 -359
package/src/providers/kimi.ts +15 -96
package/src/providers/ollama.ts +3 -12
package/src/providers/openai-anthropic-shim.ts +136 -0
package/src/providers/openai-codex-responses.ts +38 -199
package/src/providers/openai-completions.ts +17 -24
package/src/providers/openai-responses-shared.ts +143 -24
package/src/providers/openai-responses.ts +20 -76
package/src/providers/synthetic.ts +15 -102
package/src/types.ts +13 -1
package/src/utils/h2-fetch.ts +47 -0
package/src/utils/http-inspector.ts +2 -2
package/src/utils/oauth/github-copilot.ts +6 -10
package/src/utils/oauth/kimi.ts +4 -3
package/src/utils/oauth/lm-studio.ts +0 -2
package/src/utils/retry.ts +8 -130

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,16 @@
 ## [Unreleased]
+## [15.0.1] - 2026-05-14
+### Breaking Changes
+- Increased the minimum Bun runtime version to `>=1.3.14` for the `@aws-?` package
+### Added
+- Added `installH2Fetch` to patch `globalThis.fetch` so HTTPS requests attempt HTTP/2 over ALPN with automatic HTTP/1.1 fallback when HTTP/2 is unsupported
+- Added priority service-tier traffic to the `premiumRequests` accounting on OpenAI and OpenAI Codex providers. Sending `serviceTier: "priority"` now increments `usage.premiumRequests` by 1 per request, matching the existing GitHub Copilot premium-request budget semantics so downstream consumers (e.g. the `omp stats` "Premium Reqs" card and `/usage`) reflect priority traffic alongside Copilot premium calls.
 ## [15.0.0] - 2026-05-13
 ### Added
@@ -2344,4 +2354,4 @@ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
 ## [0.9.4] - 2025-11-26
-Initial release with multi-provider LLM support.
+Initial release with multi-provider LLM support.

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-ai",
-	"version": "15.0.0",
+	"version": "15.0.1",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"homepage": "https://github.com/can1357/oh-my-pi",
 	"author": "Can Boluk",
@@ -46,8 +46,8 @@
 		"@aws-sdk/credential-provider-node": "^3.972.39",
 		"@bufbuild/protobuf": "^2.12.0",
 		"@google/genai": "^1.52.0",
-		"@oh-my-pi/pi-natives": "15.0.0",
-		"@oh-my-pi/pi-utils": "15.0.0",
+		"@oh-my-pi/pi-natives": "15.0.1",
+		"@oh-my-pi/pi-utils": "15.0.1",
 		"@sinclair/typebox": "^0.34.49",
 		"@smithy/node-http-handler": "^4.6.1",
 		"ajv": "^8.20.0",
@@ -58,10 +58,10 @@
 		"zod": "4.4.3"
 	},
 	"devDependencies": {
-		"@types/bun": "^1.3.13"
+		"@types/bun": "^1.3.14"
 	},
 	"engines": {
-		"bun": ">=1.3.7"
+		"bun": ">=1.3.14"
 	},
 	"files": [
 		"src",

package/src/index.ts CHANGED Viewed

@@ -37,6 +37,7 @@ export * from "./usage/zai";
 export * from "./utils/anthropic-auth";
 export * from "./utils/discovery";
 export * from "./utils/event-stream";
+export * from "./utils/h2-fetch";
 export * from "./utils/overflow";
 export * from "./utils/retry";
 export * from "./utils/schema";

package/src/provider-models/ollama.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { abortableSleep } from "@oh-my-pi/pi-utils";
+import { fetchWithRetry } from "@oh-my-pi/pi-utils";
 import type { ModelManagerOptions } from "../model-manager";
 import { Effort } from "../model-thinking";
 import type { ThinkingConfig } from "../types";
@@ -19,16 +19,7 @@ type OllamaShowResponse = {
 	model_info?: Record<string, unknown>;
 };
-const MODEL_RETRY_DELAYS_MS = [2_000, 5_000, 10_000];
-async function fetchWithRetry(url: string, init: RequestInit): Promise<Response> {
-	for (let attempt = 0; attempt < MODEL_RETRY_DELAYS_MS.length; attempt++) {
-		const response = await fetch(url, init);
-		if (response.ok || response.status < 500) return response;
-		await abortableSleep(MODEL_RETRY_DELAYS_MS[attempt]!);
-	}
-	return fetch(url, init);
-}
+const OLLAMA_RETRY_DELAYS_MS = [2_000, 5_000, 10_000];
 function trimTrailingSlash(value: string): string {
 	return value.endsWith("/") ? value.slice(0, -1) : value;
@@ -109,6 +100,7 @@ export function ollamaCloudModelManagerOptions(
 			const response = await fetchWithRetry(`${baseUrl}/api/tags`, {
 				method: "GET",
 				headers: createCloudHeaders(apiKey),
+				defaultDelayMs: OLLAMA_RETRY_DELAYS_MS,
 			});
 			if (!response.ok) {
 				throw new Error(`HTTP ${response.status} from ${baseUrl}/api/tags`);

package/src/providers/anthropic.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import * as nodeCrypto from "node:crypto";
 import * as fs from "node:fs";
+import { scheduler } from "node:timers/promises";
 import * as tls from "node:tls";
 import Anthropic, { type ClientOptions as AnthropicSdkClientOptions } from "@anthropic-ai/sdk";
 import type {
@@ -8,7 +9,14 @@ import type {
 	MessageParam,
 	RawMessageStreamEvent,
 } from "@anthropic-ai/sdk/resources/messages";
-import { $env, abortableSleep, isEnoent, readSseEvents } from "@oh-my-pi/pi-utils";
+import {
+	$env,
+	extractHttpStatusFromError,
+	isEnoent,
+	isRetryableError,
+	isUnexpectedSocketCloseMessage,
+	readSseEvents,
+} from "@oh-my-pi/pi-utils";
 import { hasOpus47ApiRestrictions, mapEffortToAnthropicAdaptiveEffort } from "../model-thinking";
 import { calculateCost } from "../models";
 import { getEnvApiKey, OUTPUT_FALLBACK_BUFFER } from "../stream";
@@ -48,12 +56,7 @@ import { getStreamFirstEventTimeoutMs, getStreamIdleTimeoutMs, iterateWithIdleTi
 import { parseJsonWithRepair, parseStreamingJson } from "../utils/json-parse";
 import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
 import { notifyProviderResponse } from "../utils/provider-response";
-import {
-	extractHttpStatusFromError,
-	isCopilotRetryableError,
-	isRetryableError,
-	isUnexpectedSocketCloseMessage,
-} from "../utils/retry";
+import { isCopilotTransientModelError } from "../utils/retry";
 import { COMBINATOR_KEYS, NO_STRICT } from "../utils/schema";
 import { notifyRawSseEvent, wrapFetchForSseDebug } from "../utils/sse-debug";
 import {
@@ -844,7 +847,7 @@ function isProviderRetryableStreamEnvelopeError(error: unknown): boolean {
 export function isProviderRetryableError(error: unknown, provider?: string): boolean {
 	if (!(error instanceof Error)) return false;
-	if (provider === "github-copilot" && isCopilotRetryableError(error)) return true;
+	if (provider === "github-copilot" && isCopilotTransientModelError(error)) return true;
 	const msg = error.message.toLowerCase();
 	if (
 		isUnexpectedSocketCloseMessage(msg) ||
@@ -1287,7 +1290,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 					}
 					providerRetryAttempt++;
 					const delayMs = PROVIDER_BASE_DELAY_MS * 2 ** (providerRetryAttempt - 1);
-					await abortableSleep(delayMs, options?.signal);
+					await scheduler.wait(delayMs, { signal: options?.signal });
 					output.content.length = 0;
 					output.responseId = undefined;
 					output.errorMessage = strictFallbackErrorMessage;

package/src/providers/azure-openai-responses.ts CHANGED Viewed

@@ -6,17 +6,15 @@ import type {
 	ResponseInput,
 } from "openai/resources/responses/responses";
 import { getEnvApiKey } from "../stream";
-import {
-	type Api,
-	type AssistantMessage,
-	type Context,
-	type Model,
-	type ServiceTier,
-	type StreamFunction,
-	type StreamOptions,
-	shouldSendServiceTier,
-	type Tool,
-	type ToolChoice,
+import type {
+	AssistantMessage,
+	Context,
+	Model,
+	ServiceTier,
+	StreamFunction,
+	StreamOptions,
+	Tool,
+	ToolChoice,
 } from "../types";
 import { normalizeSystemPrompts } from "../utils";
 import { createAbortSourceTracker } from "../utils/abort";
@@ -33,8 +31,11 @@ import { mapToOpenAIResponsesToolChoice } from "../utils/tool-choice";
 import { normalizeOpenAIResponsesPromptCacheKey, supportsDeveloperRole } from "./openai-responses";
 import {
 	appendResponsesToolResultMessages,
+	applyCommonResponsesSamplingParams,
+	applyResponsesReasoningParams,
 	convertResponsesAssistantMessage,
 	convertResponsesInputContent,
+	createInitialResponsesAssistantMessage,
 	normalizeResponsesToolCallIdForTransform,
 	processResponsesStream,
 } from "./openai-responses-shared";
@@ -101,23 +102,11 @@ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"
 		let firstTokenTime: number | undefined;
 		const deploymentName = resolveDeploymentName(model, options);
-		const output: AssistantMessage = {
-			role: "assistant",
-			content: [],
-			api: "azure-openai-responses" as Api,
-			provider: model.provider,
-			model: model.id,
-			usage: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-				totalTokens: 0,
-				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
-			},
-			stopReason: "stop",
-			timestamp: Date.now(),
-		};
+		const output: AssistantMessage = createInitialResponsesAssistantMessage(
+			"azure-openai-responses",
+			model.provider,
+			model.id,
+		);
 		let rawRequestDump: RawHttpRequestDump | undefined;
 		const abortTracker = createAbortSourceTracker(options?.signal);
 		const firstEventTimeoutAbortError = new Error(AZURE_OPENAI_RESPONSES_FIRST_EVENT_TIMEOUT_MESSAGE);
@@ -279,31 +268,7 @@ function buildParams(
 		prompt_cache_key: normalizeOpenAIResponsesPromptCacheKey(options?.sessionId),
 	};
-	if (options?.maxTokens) {
-		params.max_output_tokens = options?.maxTokens;
-	}
-	if (options?.temperature !== undefined) {
-		params.temperature = options?.temperature;
-	}
-	if (options?.topP !== undefined) {
-		params.top_p = options.topP;
-	}
-	if (options?.topK !== undefined) {
-		params.top_k = options.topK;
-	}
-	if (options?.minP !== undefined) {
-		params.min_p = options.minP;
-	}
-	if (options?.presencePenalty !== undefined) {
-		params.presence_penalty = options.presencePenalty;
-	}
-	if (options?.repetitionPenalty !== undefined) {
-		params.repetition_penalty = options.repetitionPenalty;
-	}
-	if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
-		params.service_tier = options.serviceTier;
-	}
+	applyCommonResponsesSamplingParams(params, options, model.provider);
 	if (context.tools) {
 		params.tools = convertTools(context.tools);
@@ -312,36 +277,7 @@ function buildParams(
 		}
 	}
-	if (model.reasoning) {
-		// Always request encrypted reasoning content so reasoning items can be
-		// replayed in multi-turn conversations when store is false (items aren't
-		// persisted server-side, so we must include the full content).
-		// See: https://github.com/can1357/oh-my-pi/issues/41
-		params.include = ["reasoning.encrypted_content"];
-		if (options?.reasoning || options?.reasoningSummary !== undefined) {
-			const reasoningParams: NonNullable<typeof params.reasoning> = {
-				effort: options?.reasoning || "medium",
-			};
-			if (options?.reasoningSummary !== null) {
-				reasoningParams.summary = options?.reasoningSummary || "auto";
-			}
-			params.reasoning = reasoningParams;
-		} else {
-			if (model.name.toLowerCase().startsWith("gpt-5")) {
-				// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
-				messages.push({
-					role: "developer",
-					content: [
-						{
-							type: "input_text",
-							text: "# Juice: 0 !important",
-						},
-					],
-				});
-			}
-		}
-	}
+	applyResponsesReasoningParams(params, model, options, messages);
 	return params;
 }

package/src/providers/google-gemini-cli.ts CHANGED Viewed

@@ -4,8 +4,9 @@
  * Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.
  */
 import { createHash, randomBytes, randomUUID } from "node:crypto";
+import { scheduler } from "node:timers/promises";
 import type { Content, FunctionCallingConfigMode, ThinkingConfig } from "@google/genai";
-import { abortableSleep, readSseJson } from "@oh-my-pi/pi-utils";
+import { fetchWithRetry, readSseJson } from "@oh-my-pi/pi-utils";
 import { calculateCost } from "../models";
 import type {
 	Api,
@@ -23,28 +24,27 @@ import { AssistantMessageEventStream } from "../utils/event-stream";
 import { appendRawHttpRequestDumpFor400, type RawHttpRequestDump, withHttpStatus } from "../utils/http-inspector";
 import { refreshAntigravityToken } from "../utils/oauth/google-antigravity";
 import { refreshGoogleCloudToken } from "../utils/oauth/google-gemini-cli";
-import { extractHttpStatusFromError } from "../utils/retry";
 import { sanitizeSchemaForCCA } from "../utils/schema";
-import {
-	ANTIGRAVITY_SYSTEM_INSTRUCTION,
-	extractRetryDelay,
-	getAntigravityUserAgent,
-	getGeminiCliHeaders,
-} from "./google-gemini-headers";
+import { ANTIGRAVITY_SYSTEM_INSTRUCTION, getAntigravityUserAgent, getGeminiCliHeaders } from "./google-gemini-headers";
 import {
 	convertMessages,
 	convertTools,
+	type GoogleThinkingLevel,
 	isThinkingPart,
 	mapStopReasonString,
 	mapToolChoice,
+	nextToolCallId,
+	pushBlockEndEvent,
+	pushToolCallEvents,
 	retainThoughtSignature,
+	startTextOrThinkingBlock,
 } from "./google-shared";
 /**
- * Thinking level for Gemini 3 models.
- * Mirrors Google's ThinkingLevel enum values.
+ * Thinking level for Gemini 3 models. Re-exported from `google-shared` so existing
+ * `import { GoogleThinkingLevel } from "./google-gemini-cli"` callers keep working.
  */
-export type GoogleThinkingLevel = "THINKING_LEVEL_UNSPECIFIED" | "MINIMAL" | "LOW" | "MEDIUM" | "HIGH";
+export type { GoogleThinkingLevel };
 export interface GoogleGeminiCliOptions extends StreamOptions {
 	toolChoice?: "auto" | "none" | "any";
@@ -72,15 +72,11 @@ const ANTIGRAVITY_ENDPOINT_FALLBACKS = [ANTIGRAVITY_DAILY_ENDPOINT, ANTIGRAVITY_
 export {
 	ANTIGRAVITY_SYSTEM_INSTRUCTION,
-	extractRetryDelay,
 	getAntigravityUserAgent,
 	getGeminiCliHeaders,
 	getGeminiCliUserAgent,
 } from "./google-gemini-headers";
-// Counter for generating unique tool call IDs
-let toolCallCounter = 0;
 // Retry configuration
 const MAX_RETRIES = 3;
 const BASE_DELAY_MS = 1000;
@@ -104,16 +100,6 @@ function shouldInjectAntigravitySystemInstruction(modelId: string): boolean {
 	return normalized.includes("claude") || normalized.includes("gemini-3-pro-high");
 }
-/**
- * Check if an error is retryable (rate limit, server error, network error, etc.)
- */
-function isRetryableError(status: number, errorText: string): boolean {
-	if (status === 429 || status === 500 || status === 502 || status === 503 || status === 504) {
-		return true;
-	}
-	return /resource.?exhausted|rate.?limit|overloaded|service.?unavailable|other.?side.?closed/i.test(errorText);
-}
 /**
  * Extract a clean, user-friendly error message from Google API error response.
  * Parses JSON error responses and returns just the message field.
@@ -366,109 +352,26 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 				headers: requestHeaders,
 			};
-			// Fetch with retry logic for rate limits and transient errors
-			let response: Response | undefined;
-			let lastError: Error | undefined;
-			let requestUrl: string | undefined;
-			let rateLimitTimeSpent = 0;
-			for (let attempt = 0; ; attempt++) {
-				if (options?.signal?.aborted) {
-					throw new Error("Request was aborted");
-				}
-				try {
-					const endpoint = endpoints[Math.min(attempt, endpoints.length - 1)];
-					requestUrl = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
-					response = await fetch(requestUrl, {
-						method: "POST",
-						headers: requestHeaders,
-						body: requestBodyJson,
-						signal: options?.signal,
-					});
-					if (response.ok) {
-						break; // Success, exit retry loop
-					}
-					const errorText = await response.text();
-					// Handle 429 rate limits with time budget
-					if (response.status === 429) {
-						if (/quota|exhausted/i.test(errorText)) {
-							throw withHttpStatus(
-								new Error(`Cloud Code Assist API error (429): ${extractErrorMessage(errorText)}`),
-								429,
-							);
-						}
-						const serverDelay = extractRetryDelay(errorText, response);
-						if (serverDelay && rateLimitTimeSpent + serverDelay <= RATE_LIMIT_BUDGET_MS) {
-							rateLimitTimeSpent += serverDelay;
-							await abortableSleep(serverDelay, options?.signal);
-							continue;
-						}
-						// Fallback: use exponential backoff if no server delay, up to MAX_RETRIES
-						if (!serverDelay && attempt < MAX_RETRIES) {
-							await abortableSleep(BASE_DELAY_MS * 2 ** attempt, options?.signal);
-							continue;
-						}
-					} else if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
-						// Non-429 retryable errors use standard attempt cap
-						const serverDelay = extractRetryDelay(errorText, response);
-						const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
-						// Check if server delay exceeds max allowed (default: 60s) for non-429 errors
-						const maxDelayMs = options?.maxRetryDelayMs ?? 60000;
-						if (maxDelayMs > 0 && serverDelay && serverDelay > maxDelayMs) {
-							const delaySeconds = Math.ceil(serverDelay / 1000);
-							throw withHttpStatus(
-								new Error(
-									`Server requested ${delaySeconds}s retry delay (max: ${Math.ceil(maxDelayMs / 1000)}s). ${extractErrorMessage(errorText)}`,
-								),
-								response.status,
-							);
-						}
-						await abortableSleep(delayMs, options?.signal);
-						continue;
-					}
-					// Not retryable or budget exceeded
-					throw withHttpStatus(
-						new Error(`Cloud Code Assist API error (${response.status}): ${extractErrorMessage(errorText)}`),
-						response.status,
-					);
-				} catch (error) {
-					// Check for abort - fetch throws AbortError, our code throws "Request was aborted"
-					if (error instanceof Error) {
-						if (error.name === "AbortError" || error.message === "Request was aborted") {
-							throw new Error("Request was aborted");
-						}
-					}
-					// HTTP responses are handled inside the try block.
-					// If we intentionally throw with status metadata, don't convert it into a network retry.
-					if (extractHttpStatusFromError(error) !== undefined) {
-						throw error;
-					}
-					// Extract detailed error message from fetch errors (Node includes cause)
-					lastError = error instanceof Error ? error : new Error(String(error));
-					if (lastError.message === "fetch failed" && lastError.cause instanceof Error) {
-						lastError = new Error(`Network error: ${lastError.cause.message}`);
-					}
-					// Network errors are retryable
-					if (attempt < MAX_RETRIES) {
-						const delayMs = BASE_DELAY_MS * 2 ** attempt;
-						await abortableSleep(delayMs, options?.signal);
-						continue;
-					}
-					throw lastError;
-				}
-			}
-			if (!response?.ok) {
-				throw lastError ?? new Error("Failed to get response after retries");
+			const response = await fetchWithRetry(
+				attempt => `${endpoints[Math.min(attempt, endpoints.length - 1)]}/v1internal:streamGenerateContent?alt=sse`,
+				{
+					method: "POST",
+					headers: requestHeaders,
+					body: requestBodyJson,
+					signal: options?.signal,
+					maxAttempts: MAX_RETRIES + 1,
+					defaultDelayMs: attempt => BASE_DELAY_MS * 2 ** attempt,
+					maxDelayMs: options?.maxRetryDelayMs ?? RATE_LIMIT_BUDGET_MS,
+				},
+			);
+			if (!response.ok) {
+				const errorText = await response.text();
+				throw withHttpStatus(
+					new Error(`Cloud Code Assist API error (${response.status}): ${extractErrorMessage(errorText)}`),
+					response.status,
+				);
 			}
+			const requestUrl = response.url;
 			let started = false;
 			const ensureStarted = () => {
@@ -525,37 +428,9 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 									(!isThinking && currentBlock.type !== "text")
 								) {
 									if (currentBlock) {
-										if (currentBlock.type === "text") {
-											stream.push({
-												type: "text_end",
-												contentIndex: blocks.length - 1,
-												content: currentBlock.text,
-												partial: output,
-											});
-										} else {
-											stream.push({
-												type: "thinking_end",
-												contentIndex: blockIndex(),
-												content: currentBlock.thinking,
-												partial: output,
-											});
-										}
-									}
-									if (isThinking) {
-										currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
-										output.content.push(currentBlock);
-										ensureStarted();
-										stream.push({
-											type: "thinking_start",
-											contentIndex: blockIndex(),
-											partial: output,
-										});
-									} else {
-										currentBlock = { type: "text", text: "" };
-										output.content.push(currentBlock);
-										ensureStarted();
-										stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
+										pushBlockEndEvent(currentBlock, blockIndex(), output, stream);
 									}
+									currentBlock = startTextOrThinkingBlock(isThinking, output, stream, ensureStarted);
 								}
 								if (currentBlock.type === "thinking") {
 									currentBlock.thinking += part.text;
@@ -587,30 +462,14 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 							if (part.functionCall) {
 								hasContent = true;
 								if (currentBlock) {
-									if (currentBlock.type === "text") {
-										stream.push({
-											type: "text_end",
-											contentIndex: blockIndex(),
-											content: currentBlock.text,
-											partial: output,
-										});
-									} else {
-										stream.push({
-											type: "thinking_end",
-											contentIndex: blockIndex(),
-											content: currentBlock.thinking,
-											partial: output,
-										});
-									}
+									pushBlockEndEvent(currentBlock, blockIndex(), output, stream);
 									currentBlock = null;
 								}
 								const providedId = part.functionCall.id;
 								const needsNewId =
 									!providedId || output.content.some(b => b.type === "toolCall" && b.id === providedId);
-								const toolCallId = needsNewId
-									? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
-									: providedId;
+								const toolCallId = needsNewId ? nextToolCallId(part.functionCall.name || "tool") : providedId;
 								const toolCall: ToolCall = {
 									type: "toolCall",
@@ -622,19 +481,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 								output.content.push(toolCall);
 								ensureStarted();
-								stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
-								stream.push({
-									type: "toolcall_delta",
-									contentIndex: blockIndex(),
-									delta: JSON.stringify(toolCall.arguments),
-									partial: output,
-								});
-								stream.push({
-									type: "toolcall_end",
-									contentIndex: blockIndex(),
-									toolCall,
-									partial: output,
-								});
+								pushToolCallEvents(toolCall, blockIndex(), output, stream);
 							}
 						}
 					}
@@ -671,21 +518,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 				}
 				if (currentBlock) {
-					if (currentBlock.type === "text") {
-						stream.push({
-							type: "text_end",
-							contentIndex: blockIndex(),
-							content: currentBlock.text,
-							partial: output,
-						});
-					} else {
-						stream.push({
-							type: "thinking_end",
-							contentIndex: blockIndex(),
-							content: currentBlock.thinking,
-							partial: output,
-						});
-					}
+					pushBlockEndEvent(currentBlock, blockIndex(), output, stream);
 				}
 				return hasContent;
@@ -702,7 +535,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 				if (emptyAttempt > 0) {
 					const backoffMs = EMPTY_STREAM_BASE_DELAY_MS * 2 ** (emptyAttempt - 1);
 					try {
-						await abortableSleep(backoffMs, options?.signal);
+						await scheduler.wait(backoffMs, { signal: options?.signal });
 					} catch {
 						// Normalize AbortError to expected message for consistent error handling
 						throw new Error("Request was aborted");