@oh-my-pi/pi-ai 15.0.0 → 15.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -1
- package/package.json +5 -5
- package/src/index.ts +1 -0
- package/src/provider-models/ollama.ts +3 -11
- package/src/providers/anthropic.ts +12 -9
- package/src/providers/azure-openai-responses.ts +19 -83
- package/src/providers/google-gemini-cli.ts +37 -204
- package/src/providers/google-gemini-headers.ts +0 -100
- package/src/providers/google-shared.ts +446 -4
- package/src/providers/google-vertex.ts +19 -371
- package/src/providers/google.ts +16 -359
- package/src/providers/kimi.ts +15 -96
- package/src/providers/ollama.ts +3 -12
- package/src/providers/openai-anthropic-shim.ts +136 -0
- package/src/providers/openai-codex-responses.ts +38 -199
- package/src/providers/openai-completions.ts +17 -24
- package/src/providers/openai-responses-shared.ts +143 -24
- package/src/providers/openai-responses.ts +20 -76
- package/src/providers/synthetic.ts +15 -102
- package/src/types.ts +13 -1
- package/src/utils/h2-fetch.ts +47 -0
- package/src/utils/http-inspector.ts +2 -2
- package/src/utils/oauth/github-copilot.ts +6 -10
- package/src/utils/oauth/kimi.ts +4 -3
- package/src/utils/oauth/lm-studio.ts +0 -2
- package/src/utils/retry.ts +8 -130
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,16 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [15.0.1] - 2026-05-14
|
|
6
|
+
### Breaking Changes
|
|
7
|
+
|
|
8
|
+
- Increased the minimum Bun runtime version to `>=1.3.14` for the `@aws-?` package
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- Added `installH2Fetch` to patch `globalThis.fetch` so HTTPS requests attempt HTTP/2 over ALPN with automatic HTTP/1.1 fallback when HTTP/2 is unsupported
|
|
13
|
+
- Added priority service-tier traffic to the `premiumRequests` accounting on OpenAI and OpenAI Codex providers. Sending `serviceTier: "priority"` now increments `usage.premiumRequests` by 1 per request, matching the existing GitHub Copilot premium-request budget semantics so downstream consumers (e.g. the `omp stats` "Premium Reqs" card and `/usage`) reflect priority traffic alongside Copilot premium calls.
|
|
14
|
+
|
|
5
15
|
## [15.0.0] - 2026-05-13
|
|
6
16
|
|
|
7
17
|
### Added
|
|
@@ -2344,4 +2354,4 @@ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
|
|
|
2344
2354
|
|
|
2345
2355
|
## [0.9.4] - 2025-11-26
|
|
2346
2356
|
|
|
2347
|
-
Initial release with multi-provider LLM support.
|
|
2357
|
+
Initial release with multi-provider LLM support.
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "15.0.
|
|
4
|
+
"version": "15.0.1",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://github.com/can1357/oh-my-pi",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -46,8 +46,8 @@
|
|
|
46
46
|
"@aws-sdk/credential-provider-node": "^3.972.39",
|
|
47
47
|
"@bufbuild/protobuf": "^2.12.0",
|
|
48
48
|
"@google/genai": "^1.52.0",
|
|
49
|
-
"@oh-my-pi/pi-natives": "15.0.
|
|
50
|
-
"@oh-my-pi/pi-utils": "15.0.
|
|
49
|
+
"@oh-my-pi/pi-natives": "15.0.1",
|
|
50
|
+
"@oh-my-pi/pi-utils": "15.0.1",
|
|
51
51
|
"@sinclair/typebox": "^0.34.49",
|
|
52
52
|
"@smithy/node-http-handler": "^4.6.1",
|
|
53
53
|
"ajv": "^8.20.0",
|
|
@@ -58,10 +58,10 @@
|
|
|
58
58
|
"zod": "4.4.3"
|
|
59
59
|
},
|
|
60
60
|
"devDependencies": {
|
|
61
|
-
"@types/bun": "^1.3.
|
|
61
|
+
"@types/bun": "^1.3.14"
|
|
62
62
|
},
|
|
63
63
|
"engines": {
|
|
64
|
-
"bun": ">=1.3.
|
|
64
|
+
"bun": ">=1.3.14"
|
|
65
65
|
},
|
|
66
66
|
"files": [
|
|
67
67
|
"src",
|
package/src/index.ts
CHANGED
|
@@ -37,6 +37,7 @@ export * from "./usage/zai";
|
|
|
37
37
|
export * from "./utils/anthropic-auth";
|
|
38
38
|
export * from "./utils/discovery";
|
|
39
39
|
export * from "./utils/event-stream";
|
|
40
|
+
export * from "./utils/h2-fetch";
|
|
40
41
|
export * from "./utils/overflow";
|
|
41
42
|
export * from "./utils/retry";
|
|
42
43
|
export * from "./utils/schema";
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { fetchWithRetry } from "@oh-my-pi/pi-utils";
|
|
2
2
|
import type { ModelManagerOptions } from "../model-manager";
|
|
3
3
|
import { Effort } from "../model-thinking";
|
|
4
4
|
import type { ThinkingConfig } from "../types";
|
|
@@ -19,16 +19,7 @@ type OllamaShowResponse = {
|
|
|
19
19
|
model_info?: Record<string, unknown>;
|
|
20
20
|
};
|
|
21
21
|
|
|
22
|
-
const
|
|
23
|
-
|
|
24
|
-
async function fetchWithRetry(url: string, init: RequestInit): Promise<Response> {
|
|
25
|
-
for (let attempt = 0; attempt < MODEL_RETRY_DELAYS_MS.length; attempt++) {
|
|
26
|
-
const response = await fetch(url, init);
|
|
27
|
-
if (response.ok || response.status < 500) return response;
|
|
28
|
-
await abortableSleep(MODEL_RETRY_DELAYS_MS[attempt]!);
|
|
29
|
-
}
|
|
30
|
-
return fetch(url, init);
|
|
31
|
-
}
|
|
22
|
+
const OLLAMA_RETRY_DELAYS_MS = [2_000, 5_000, 10_000];
|
|
32
23
|
|
|
33
24
|
function trimTrailingSlash(value: string): string {
|
|
34
25
|
return value.endsWith("/") ? value.slice(0, -1) : value;
|
|
@@ -109,6 +100,7 @@ export function ollamaCloudModelManagerOptions(
|
|
|
109
100
|
const response = await fetchWithRetry(`${baseUrl}/api/tags`, {
|
|
110
101
|
method: "GET",
|
|
111
102
|
headers: createCloudHeaders(apiKey),
|
|
103
|
+
defaultDelayMs: OLLAMA_RETRY_DELAYS_MS,
|
|
112
104
|
});
|
|
113
105
|
if (!response.ok) {
|
|
114
106
|
throw new Error(`HTTP ${response.status} from ${baseUrl}/api/tags`);
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import * as nodeCrypto from "node:crypto";
|
|
2
2
|
import * as fs from "node:fs";
|
|
3
|
+
import { scheduler } from "node:timers/promises";
|
|
3
4
|
import * as tls from "node:tls";
|
|
4
5
|
import Anthropic, { type ClientOptions as AnthropicSdkClientOptions } from "@anthropic-ai/sdk";
|
|
5
6
|
import type {
|
|
@@ -8,7 +9,14 @@ import type {
|
|
|
8
9
|
MessageParam,
|
|
9
10
|
RawMessageStreamEvent,
|
|
10
11
|
} from "@anthropic-ai/sdk/resources/messages";
|
|
11
|
-
import {
|
|
12
|
+
import {
|
|
13
|
+
$env,
|
|
14
|
+
extractHttpStatusFromError,
|
|
15
|
+
isEnoent,
|
|
16
|
+
isRetryableError,
|
|
17
|
+
isUnexpectedSocketCloseMessage,
|
|
18
|
+
readSseEvents,
|
|
19
|
+
} from "@oh-my-pi/pi-utils";
|
|
12
20
|
import { hasOpus47ApiRestrictions, mapEffortToAnthropicAdaptiveEffort } from "../model-thinking";
|
|
13
21
|
import { calculateCost } from "../models";
|
|
14
22
|
import { getEnvApiKey, OUTPUT_FALLBACK_BUFFER } from "../stream";
|
|
@@ -48,12 +56,7 @@ import { getStreamFirstEventTimeoutMs, getStreamIdleTimeoutMs, iterateWithIdleTi
|
|
|
48
56
|
import { parseJsonWithRepair, parseStreamingJson } from "../utils/json-parse";
|
|
49
57
|
import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
|
|
50
58
|
import { notifyProviderResponse } from "../utils/provider-response";
|
|
51
|
-
import {
|
|
52
|
-
extractHttpStatusFromError,
|
|
53
|
-
isCopilotRetryableError,
|
|
54
|
-
isRetryableError,
|
|
55
|
-
isUnexpectedSocketCloseMessage,
|
|
56
|
-
} from "../utils/retry";
|
|
59
|
+
import { isCopilotTransientModelError } from "../utils/retry";
|
|
57
60
|
import { COMBINATOR_KEYS, NO_STRICT } from "../utils/schema";
|
|
58
61
|
import { notifyRawSseEvent, wrapFetchForSseDebug } from "../utils/sse-debug";
|
|
59
62
|
import {
|
|
@@ -844,7 +847,7 @@ function isProviderRetryableStreamEnvelopeError(error: unknown): boolean {
|
|
|
844
847
|
|
|
845
848
|
export function isProviderRetryableError(error: unknown, provider?: string): boolean {
|
|
846
849
|
if (!(error instanceof Error)) return false;
|
|
847
|
-
if (provider === "github-copilot" &&
|
|
850
|
+
if (provider === "github-copilot" && isCopilotTransientModelError(error)) return true;
|
|
848
851
|
const msg = error.message.toLowerCase();
|
|
849
852
|
if (
|
|
850
853
|
isUnexpectedSocketCloseMessage(msg) ||
|
|
@@ -1287,7 +1290,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1287
1290
|
}
|
|
1288
1291
|
providerRetryAttempt++;
|
|
1289
1292
|
const delayMs = PROVIDER_BASE_DELAY_MS * 2 ** (providerRetryAttempt - 1);
|
|
1290
|
-
await
|
|
1293
|
+
await scheduler.wait(delayMs, { signal: options?.signal });
|
|
1291
1294
|
output.content.length = 0;
|
|
1292
1295
|
output.responseId = undefined;
|
|
1293
1296
|
output.errorMessage = strictFallbackErrorMessage;
|
|
@@ -6,17 +6,15 @@ import type {
|
|
|
6
6
|
ResponseInput,
|
|
7
7
|
} from "openai/resources/responses/responses";
|
|
8
8
|
import { getEnvApiKey } from "../stream";
|
|
9
|
-
import {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
type Tool,
|
|
19
|
-
type ToolChoice,
|
|
9
|
+
import type {
|
|
10
|
+
AssistantMessage,
|
|
11
|
+
Context,
|
|
12
|
+
Model,
|
|
13
|
+
ServiceTier,
|
|
14
|
+
StreamFunction,
|
|
15
|
+
StreamOptions,
|
|
16
|
+
Tool,
|
|
17
|
+
ToolChoice,
|
|
20
18
|
} from "../types";
|
|
21
19
|
import { normalizeSystemPrompts } from "../utils";
|
|
22
20
|
import { createAbortSourceTracker } from "../utils/abort";
|
|
@@ -33,8 +31,11 @@ import { mapToOpenAIResponsesToolChoice } from "../utils/tool-choice";
|
|
|
33
31
|
import { normalizeOpenAIResponsesPromptCacheKey, supportsDeveloperRole } from "./openai-responses";
|
|
34
32
|
import {
|
|
35
33
|
appendResponsesToolResultMessages,
|
|
34
|
+
applyCommonResponsesSamplingParams,
|
|
35
|
+
applyResponsesReasoningParams,
|
|
36
36
|
convertResponsesAssistantMessage,
|
|
37
37
|
convertResponsesInputContent,
|
|
38
|
+
createInitialResponsesAssistantMessage,
|
|
38
39
|
normalizeResponsesToolCallIdForTransform,
|
|
39
40
|
processResponsesStream,
|
|
40
41
|
} from "./openai-responses-shared";
|
|
@@ -101,23 +102,11 @@ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"
|
|
|
101
102
|
let firstTokenTime: number | undefined;
|
|
102
103
|
const deploymentName = resolveDeploymentName(model, options);
|
|
103
104
|
|
|
104
|
-
const output: AssistantMessage =
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
model: model.id,
|
|
110
|
-
usage: {
|
|
111
|
-
input: 0,
|
|
112
|
-
output: 0,
|
|
113
|
-
cacheRead: 0,
|
|
114
|
-
cacheWrite: 0,
|
|
115
|
-
totalTokens: 0,
|
|
116
|
-
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
117
|
-
},
|
|
118
|
-
stopReason: "stop",
|
|
119
|
-
timestamp: Date.now(),
|
|
120
|
-
};
|
|
105
|
+
const output: AssistantMessage = createInitialResponsesAssistantMessage(
|
|
106
|
+
"azure-openai-responses",
|
|
107
|
+
model.provider,
|
|
108
|
+
model.id,
|
|
109
|
+
);
|
|
121
110
|
let rawRequestDump: RawHttpRequestDump | undefined;
|
|
122
111
|
const abortTracker = createAbortSourceTracker(options?.signal);
|
|
123
112
|
const firstEventTimeoutAbortError = new Error(AZURE_OPENAI_RESPONSES_FIRST_EVENT_TIMEOUT_MESSAGE);
|
|
@@ -279,31 +268,7 @@ function buildParams(
|
|
|
279
268
|
prompt_cache_key: normalizeOpenAIResponsesPromptCacheKey(options?.sessionId),
|
|
280
269
|
};
|
|
281
270
|
|
|
282
|
-
|
|
283
|
-
params.max_output_tokens = options?.maxTokens;
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
if (options?.temperature !== undefined) {
|
|
287
|
-
params.temperature = options?.temperature;
|
|
288
|
-
}
|
|
289
|
-
if (options?.topP !== undefined) {
|
|
290
|
-
params.top_p = options.topP;
|
|
291
|
-
}
|
|
292
|
-
if (options?.topK !== undefined) {
|
|
293
|
-
params.top_k = options.topK;
|
|
294
|
-
}
|
|
295
|
-
if (options?.minP !== undefined) {
|
|
296
|
-
params.min_p = options.minP;
|
|
297
|
-
}
|
|
298
|
-
if (options?.presencePenalty !== undefined) {
|
|
299
|
-
params.presence_penalty = options.presencePenalty;
|
|
300
|
-
}
|
|
301
|
-
if (options?.repetitionPenalty !== undefined) {
|
|
302
|
-
params.repetition_penalty = options.repetitionPenalty;
|
|
303
|
-
}
|
|
304
|
-
if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
|
|
305
|
-
params.service_tier = options.serviceTier;
|
|
306
|
-
}
|
|
271
|
+
applyCommonResponsesSamplingParams(params, options, model.provider);
|
|
307
272
|
|
|
308
273
|
if (context.tools) {
|
|
309
274
|
params.tools = convertTools(context.tools);
|
|
@@ -312,36 +277,7 @@ function buildParams(
|
|
|
312
277
|
}
|
|
313
278
|
}
|
|
314
279
|
|
|
315
|
-
|
|
316
|
-
// Always request encrypted reasoning content so reasoning items can be
|
|
317
|
-
// replayed in multi-turn conversations when store is false (items aren't
|
|
318
|
-
// persisted server-side, so we must include the full content).
|
|
319
|
-
// See: https://github.com/can1357/oh-my-pi/issues/41
|
|
320
|
-
params.include = ["reasoning.encrypted_content"];
|
|
321
|
-
|
|
322
|
-
if (options?.reasoning || options?.reasoningSummary !== undefined) {
|
|
323
|
-
const reasoningParams: NonNullable<typeof params.reasoning> = {
|
|
324
|
-
effort: options?.reasoning || "medium",
|
|
325
|
-
};
|
|
326
|
-
if (options?.reasoningSummary !== null) {
|
|
327
|
-
reasoningParams.summary = options?.reasoningSummary || "auto";
|
|
328
|
-
}
|
|
329
|
-
params.reasoning = reasoningParams;
|
|
330
|
-
} else {
|
|
331
|
-
if (model.name.toLowerCase().startsWith("gpt-5")) {
|
|
332
|
-
// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
|
|
333
|
-
messages.push({
|
|
334
|
-
role: "developer",
|
|
335
|
-
content: [
|
|
336
|
-
{
|
|
337
|
-
type: "input_text",
|
|
338
|
-
text: "# Juice: 0 !important",
|
|
339
|
-
},
|
|
340
|
-
],
|
|
341
|
-
});
|
|
342
|
-
}
|
|
343
|
-
}
|
|
344
|
-
}
|
|
280
|
+
applyResponsesReasoningParams(params, model, options, messages);
|
|
345
281
|
|
|
346
282
|
return params;
|
|
347
283
|
}
|
|
@@ -4,8 +4,9 @@
|
|
|
4
4
|
* Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.
|
|
5
5
|
*/
|
|
6
6
|
import { createHash, randomBytes, randomUUID } from "node:crypto";
|
|
7
|
+
import { scheduler } from "node:timers/promises";
|
|
7
8
|
import type { Content, FunctionCallingConfigMode, ThinkingConfig } from "@google/genai";
|
|
8
|
-
import {
|
|
9
|
+
import { fetchWithRetry, readSseJson } from "@oh-my-pi/pi-utils";
|
|
9
10
|
import { calculateCost } from "../models";
|
|
10
11
|
import type {
|
|
11
12
|
Api,
|
|
@@ -23,28 +24,27 @@ import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
|
23
24
|
import { appendRawHttpRequestDumpFor400, type RawHttpRequestDump, withHttpStatus } from "../utils/http-inspector";
|
|
24
25
|
import { refreshAntigravityToken } from "../utils/oauth/google-antigravity";
|
|
25
26
|
import { refreshGoogleCloudToken } from "../utils/oauth/google-gemini-cli";
|
|
26
|
-
import { extractHttpStatusFromError } from "../utils/retry";
|
|
27
27
|
import { sanitizeSchemaForCCA } from "../utils/schema";
|
|
28
|
-
import {
|
|
29
|
-
ANTIGRAVITY_SYSTEM_INSTRUCTION,
|
|
30
|
-
extractRetryDelay,
|
|
31
|
-
getAntigravityUserAgent,
|
|
32
|
-
getGeminiCliHeaders,
|
|
33
|
-
} from "./google-gemini-headers";
|
|
28
|
+
import { ANTIGRAVITY_SYSTEM_INSTRUCTION, getAntigravityUserAgent, getGeminiCliHeaders } from "./google-gemini-headers";
|
|
34
29
|
import {
|
|
35
30
|
convertMessages,
|
|
36
31
|
convertTools,
|
|
32
|
+
type GoogleThinkingLevel,
|
|
37
33
|
isThinkingPart,
|
|
38
34
|
mapStopReasonString,
|
|
39
35
|
mapToolChoice,
|
|
36
|
+
nextToolCallId,
|
|
37
|
+
pushBlockEndEvent,
|
|
38
|
+
pushToolCallEvents,
|
|
40
39
|
retainThoughtSignature,
|
|
40
|
+
startTextOrThinkingBlock,
|
|
41
41
|
} from "./google-shared";
|
|
42
42
|
|
|
43
43
|
/**
|
|
44
|
-
* Thinking level for Gemini 3 models.
|
|
45
|
-
*
|
|
44
|
+
* Thinking level for Gemini 3 models. Re-exported from `google-shared` so existing
|
|
45
|
+
* `import { GoogleThinkingLevel } from "./google-gemini-cli"` callers keep working.
|
|
46
46
|
*/
|
|
47
|
-
export type GoogleThinkingLevel
|
|
47
|
+
export type { GoogleThinkingLevel };
|
|
48
48
|
|
|
49
49
|
export interface GoogleGeminiCliOptions extends StreamOptions {
|
|
50
50
|
toolChoice?: "auto" | "none" | "any";
|
|
@@ -72,15 +72,11 @@ const ANTIGRAVITY_ENDPOINT_FALLBACKS = [ANTIGRAVITY_DAILY_ENDPOINT, ANTIGRAVITY_
|
|
|
72
72
|
|
|
73
73
|
export {
|
|
74
74
|
ANTIGRAVITY_SYSTEM_INSTRUCTION,
|
|
75
|
-
extractRetryDelay,
|
|
76
75
|
getAntigravityUserAgent,
|
|
77
76
|
getGeminiCliHeaders,
|
|
78
77
|
getGeminiCliUserAgent,
|
|
79
78
|
} from "./google-gemini-headers";
|
|
80
79
|
|
|
81
|
-
// Counter for generating unique tool call IDs
|
|
82
|
-
let toolCallCounter = 0;
|
|
83
|
-
|
|
84
80
|
// Retry configuration
|
|
85
81
|
const MAX_RETRIES = 3;
|
|
86
82
|
const BASE_DELAY_MS = 1000;
|
|
@@ -104,16 +100,6 @@ function shouldInjectAntigravitySystemInstruction(modelId: string): boolean {
|
|
|
104
100
|
return normalized.includes("claude") || normalized.includes("gemini-3-pro-high");
|
|
105
101
|
}
|
|
106
102
|
|
|
107
|
-
/**
|
|
108
|
-
* Check if an error is retryable (rate limit, server error, network error, etc.)
|
|
109
|
-
*/
|
|
110
|
-
function isRetryableError(status: number, errorText: string): boolean {
|
|
111
|
-
if (status === 429 || status === 500 || status === 502 || status === 503 || status === 504) {
|
|
112
|
-
return true;
|
|
113
|
-
}
|
|
114
|
-
return /resource.?exhausted|rate.?limit|overloaded|service.?unavailable|other.?side.?closed/i.test(errorText);
|
|
115
|
-
}
|
|
116
|
-
|
|
117
103
|
/**
|
|
118
104
|
* Extract a clean, user-friendly error message from Google API error response.
|
|
119
105
|
* Parses JSON error responses and returns just the message field.
|
|
@@ -366,109 +352,26 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
366
352
|
headers: requestHeaders,
|
|
367
353
|
};
|
|
368
354
|
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
signal: options?.signal,
|
|
388
|
-
});
|
|
389
|
-
|
|
390
|
-
if (response.ok) {
|
|
391
|
-
break; // Success, exit retry loop
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
const errorText = await response.text();
|
|
395
|
-
|
|
396
|
-
// Handle 429 rate limits with time budget
|
|
397
|
-
if (response.status === 429) {
|
|
398
|
-
if (/quota|exhausted/i.test(errorText)) {
|
|
399
|
-
throw withHttpStatus(
|
|
400
|
-
new Error(`Cloud Code Assist API error (429): ${extractErrorMessage(errorText)}`),
|
|
401
|
-
429,
|
|
402
|
-
);
|
|
403
|
-
}
|
|
404
|
-
const serverDelay = extractRetryDelay(errorText, response);
|
|
405
|
-
if (serverDelay && rateLimitTimeSpent + serverDelay <= RATE_LIMIT_BUDGET_MS) {
|
|
406
|
-
rateLimitTimeSpent += serverDelay;
|
|
407
|
-
await abortableSleep(serverDelay, options?.signal);
|
|
408
|
-
continue;
|
|
409
|
-
}
|
|
410
|
-
// Fallback: use exponential backoff if no server delay, up to MAX_RETRIES
|
|
411
|
-
if (!serverDelay && attempt < MAX_RETRIES) {
|
|
412
|
-
await abortableSleep(BASE_DELAY_MS * 2 ** attempt, options?.signal);
|
|
413
|
-
continue;
|
|
414
|
-
}
|
|
415
|
-
} else if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
|
|
416
|
-
// Non-429 retryable errors use standard attempt cap
|
|
417
|
-
const serverDelay = extractRetryDelay(errorText, response);
|
|
418
|
-
const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
|
|
419
|
-
|
|
420
|
-
// Check if server delay exceeds max allowed (default: 60s) for non-429 errors
|
|
421
|
-
const maxDelayMs = options?.maxRetryDelayMs ?? 60000;
|
|
422
|
-
if (maxDelayMs > 0 && serverDelay && serverDelay > maxDelayMs) {
|
|
423
|
-
const delaySeconds = Math.ceil(serverDelay / 1000);
|
|
424
|
-
throw withHttpStatus(
|
|
425
|
-
new Error(
|
|
426
|
-
`Server requested ${delaySeconds}s retry delay (max: ${Math.ceil(maxDelayMs / 1000)}s). ${extractErrorMessage(errorText)}`,
|
|
427
|
-
),
|
|
428
|
-
response.status,
|
|
429
|
-
);
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
await abortableSleep(delayMs, options?.signal);
|
|
433
|
-
continue;
|
|
434
|
-
}
|
|
435
|
-
|
|
436
|
-
// Not retryable or budget exceeded
|
|
437
|
-
throw withHttpStatus(
|
|
438
|
-
new Error(`Cloud Code Assist API error (${response.status}): ${extractErrorMessage(errorText)}`),
|
|
439
|
-
response.status,
|
|
440
|
-
);
|
|
441
|
-
} catch (error) {
|
|
442
|
-
// Check for abort - fetch throws AbortError, our code throws "Request was aborted"
|
|
443
|
-
if (error instanceof Error) {
|
|
444
|
-
if (error.name === "AbortError" || error.message === "Request was aborted") {
|
|
445
|
-
throw new Error("Request was aborted");
|
|
446
|
-
}
|
|
447
|
-
}
|
|
448
|
-
|
|
449
|
-
// HTTP responses are handled inside the try block.
|
|
450
|
-
// If we intentionally throw with status metadata, don't convert it into a network retry.
|
|
451
|
-
if (extractHttpStatusFromError(error) !== undefined) {
|
|
452
|
-
throw error;
|
|
453
|
-
}
|
|
454
|
-
// Extract detailed error message from fetch errors (Node includes cause)
|
|
455
|
-
lastError = error instanceof Error ? error : new Error(String(error));
|
|
456
|
-
if (lastError.message === "fetch failed" && lastError.cause instanceof Error) {
|
|
457
|
-
lastError = new Error(`Network error: ${lastError.cause.message}`);
|
|
458
|
-
}
|
|
459
|
-
// Network errors are retryable
|
|
460
|
-
if (attempt < MAX_RETRIES) {
|
|
461
|
-
const delayMs = BASE_DELAY_MS * 2 ** attempt;
|
|
462
|
-
await abortableSleep(delayMs, options?.signal);
|
|
463
|
-
continue;
|
|
464
|
-
}
|
|
465
|
-
throw lastError;
|
|
466
|
-
}
|
|
467
|
-
}
|
|
468
|
-
|
|
469
|
-
if (!response?.ok) {
|
|
470
|
-
throw lastError ?? new Error("Failed to get response after retries");
|
|
355
|
+
const response = await fetchWithRetry(
|
|
356
|
+
attempt => `${endpoints[Math.min(attempt, endpoints.length - 1)]}/v1internal:streamGenerateContent?alt=sse`,
|
|
357
|
+
{
|
|
358
|
+
method: "POST",
|
|
359
|
+
headers: requestHeaders,
|
|
360
|
+
body: requestBodyJson,
|
|
361
|
+
signal: options?.signal,
|
|
362
|
+
maxAttempts: MAX_RETRIES + 1,
|
|
363
|
+
defaultDelayMs: attempt => BASE_DELAY_MS * 2 ** attempt,
|
|
364
|
+
maxDelayMs: options?.maxRetryDelayMs ?? RATE_LIMIT_BUDGET_MS,
|
|
365
|
+
},
|
|
366
|
+
);
|
|
367
|
+
if (!response.ok) {
|
|
368
|
+
const errorText = await response.text();
|
|
369
|
+
throw withHttpStatus(
|
|
370
|
+
new Error(`Cloud Code Assist API error (${response.status}): ${extractErrorMessage(errorText)}`),
|
|
371
|
+
response.status,
|
|
372
|
+
);
|
|
471
373
|
}
|
|
374
|
+
const requestUrl = response.url;
|
|
472
375
|
|
|
473
376
|
let started = false;
|
|
474
377
|
const ensureStarted = () => {
|
|
@@ -525,37 +428,9 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
525
428
|
(!isThinking && currentBlock.type !== "text")
|
|
526
429
|
) {
|
|
527
430
|
if (currentBlock) {
|
|
528
|
-
|
|
529
|
-
stream.push({
|
|
530
|
-
type: "text_end",
|
|
531
|
-
contentIndex: blocks.length - 1,
|
|
532
|
-
content: currentBlock.text,
|
|
533
|
-
partial: output,
|
|
534
|
-
});
|
|
535
|
-
} else {
|
|
536
|
-
stream.push({
|
|
537
|
-
type: "thinking_end",
|
|
538
|
-
contentIndex: blockIndex(),
|
|
539
|
-
content: currentBlock.thinking,
|
|
540
|
-
partial: output,
|
|
541
|
-
});
|
|
542
|
-
}
|
|
543
|
-
}
|
|
544
|
-
if (isThinking) {
|
|
545
|
-
currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
|
|
546
|
-
output.content.push(currentBlock);
|
|
547
|
-
ensureStarted();
|
|
548
|
-
stream.push({
|
|
549
|
-
type: "thinking_start",
|
|
550
|
-
contentIndex: blockIndex(),
|
|
551
|
-
partial: output,
|
|
552
|
-
});
|
|
553
|
-
} else {
|
|
554
|
-
currentBlock = { type: "text", text: "" };
|
|
555
|
-
output.content.push(currentBlock);
|
|
556
|
-
ensureStarted();
|
|
557
|
-
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
|
|
431
|
+
pushBlockEndEvent(currentBlock, blockIndex(), output, stream);
|
|
558
432
|
}
|
|
433
|
+
currentBlock = startTextOrThinkingBlock(isThinking, output, stream, ensureStarted);
|
|
559
434
|
}
|
|
560
435
|
if (currentBlock.type === "thinking") {
|
|
561
436
|
currentBlock.thinking += part.text;
|
|
@@ -587,30 +462,14 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
587
462
|
if (part.functionCall) {
|
|
588
463
|
hasContent = true;
|
|
589
464
|
if (currentBlock) {
|
|
590
|
-
|
|
591
|
-
stream.push({
|
|
592
|
-
type: "text_end",
|
|
593
|
-
contentIndex: blockIndex(),
|
|
594
|
-
content: currentBlock.text,
|
|
595
|
-
partial: output,
|
|
596
|
-
});
|
|
597
|
-
} else {
|
|
598
|
-
stream.push({
|
|
599
|
-
type: "thinking_end",
|
|
600
|
-
contentIndex: blockIndex(),
|
|
601
|
-
content: currentBlock.thinking,
|
|
602
|
-
partial: output,
|
|
603
|
-
});
|
|
604
|
-
}
|
|
465
|
+
pushBlockEndEvent(currentBlock, blockIndex(), output, stream);
|
|
605
466
|
currentBlock = null;
|
|
606
467
|
}
|
|
607
468
|
|
|
608
469
|
const providedId = part.functionCall.id;
|
|
609
470
|
const needsNewId =
|
|
610
471
|
!providedId || output.content.some(b => b.type === "toolCall" && b.id === providedId);
|
|
611
|
-
const toolCallId = needsNewId
|
|
612
|
-
? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
|
|
613
|
-
: providedId;
|
|
472
|
+
const toolCallId = needsNewId ? nextToolCallId(part.functionCall.name || "tool") : providedId;
|
|
614
473
|
|
|
615
474
|
const toolCall: ToolCall = {
|
|
616
475
|
type: "toolCall",
|
|
@@ -622,19 +481,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
622
481
|
|
|
623
482
|
output.content.push(toolCall);
|
|
624
483
|
ensureStarted();
|
|
625
|
-
|
|
626
|
-
stream.push({
|
|
627
|
-
type: "toolcall_delta",
|
|
628
|
-
contentIndex: blockIndex(),
|
|
629
|
-
delta: JSON.stringify(toolCall.arguments),
|
|
630
|
-
partial: output,
|
|
631
|
-
});
|
|
632
|
-
stream.push({
|
|
633
|
-
type: "toolcall_end",
|
|
634
|
-
contentIndex: blockIndex(),
|
|
635
|
-
toolCall,
|
|
636
|
-
partial: output,
|
|
637
|
-
});
|
|
484
|
+
pushToolCallEvents(toolCall, blockIndex(), output, stream);
|
|
638
485
|
}
|
|
639
486
|
}
|
|
640
487
|
}
|
|
@@ -671,21 +518,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
671
518
|
}
|
|
672
519
|
|
|
673
520
|
if (currentBlock) {
|
|
674
|
-
|
|
675
|
-
stream.push({
|
|
676
|
-
type: "text_end",
|
|
677
|
-
contentIndex: blockIndex(),
|
|
678
|
-
content: currentBlock.text,
|
|
679
|
-
partial: output,
|
|
680
|
-
});
|
|
681
|
-
} else {
|
|
682
|
-
stream.push({
|
|
683
|
-
type: "thinking_end",
|
|
684
|
-
contentIndex: blockIndex(),
|
|
685
|
-
content: currentBlock.thinking,
|
|
686
|
-
partial: output,
|
|
687
|
-
});
|
|
688
|
-
}
|
|
521
|
+
pushBlockEndEvent(currentBlock, blockIndex(), output, stream);
|
|
689
522
|
}
|
|
690
523
|
|
|
691
524
|
return hasContent;
|
|
@@ -702,7 +535,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
702
535
|
if (emptyAttempt > 0) {
|
|
703
536
|
const backoffMs = EMPTY_STREAM_BASE_DELAY_MS * 2 ** (emptyAttempt - 1);
|
|
704
537
|
try {
|
|
705
|
-
await
|
|
538
|
+
await scheduler.wait(backoffMs, { signal: options?.signal });
|
|
706
539
|
} catch {
|
|
707
540
|
// Normalize AbortError to expected message for consistent error handling
|
|
708
541
|
throw new Error("Request was aborted");
|