@oh-my-pi/pi-ai 15.0.0 → 15.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -1
- package/package.json +6 -6
- package/src/index.ts +1 -0
- package/src/provider-models/ollama.ts +3 -11
- package/src/providers/anthropic.ts +21 -10
- package/src/providers/azure-openai-responses.ts +23 -84
- package/src/providers/gitlab-duo.ts +10 -4
- package/src/providers/google-gemini-cli.ts +39 -205
- package/src/providers/google-gemini-headers.ts +0 -100
- package/src/providers/google-shared.ts +446 -4
- package/src/providers/google-vertex.ts +46 -380
- package/src/providers/google.ts +27 -362
- package/src/providers/kimi.ts +15 -96
- package/src/providers/ollama.ts +4 -12
- package/src/providers/openai-anthropic-shim.ts +138 -0
- package/src/providers/openai-codex-responses.ts +42 -199
- package/src/providers/openai-completions-compat.ts +19 -9
- package/src/providers/openai-completions.ts +32 -31
- package/src/providers/openai-responses-shared.ts +143 -24
- package/src/providers/openai-responses.ts +25 -77
- package/src/providers/register-builtins.ts +35 -8
- package/src/providers/synthetic.ts +15 -102
- package/src/types.ts +31 -2
- package/src/utils/h2-fetch.ts +60 -0
- package/src/utils/http-inspector.ts +2 -2
- package/src/utils/idle-iterator.ts +1 -1
- package/src/utils/oauth/github-copilot.ts +6 -10
- package/src/utils/oauth/kimi.ts +4 -3
- package/src/utils/oauth/lm-studio.ts +0 -2
- package/src/utils/retry.ts +8 -130
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,22 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [15.0.2] - 2026-05-15
|
|
6
|
+
### Fixed
|
|
7
|
+
|
|
8
|
+
- Fixed `StreamOptions.fetch` typing to accept fetch-compatible override functions that do not expose `preconnect`, allowing custom fetch implementations to be used without type errors across runtimes
|
|
9
|
+
- Fixed Moonshot Kimi K2.6 forced tool calls to send `thinking: { type: "disabled" }`, avoiding `tool_choice 'specified' is incompatible with thinking enabled` 400s while preserving the requested named tool ([#1077](https://github.com/can1357/oh-my-pi/issues/1077)).
|
|
10
|
+
|
|
11
|
+
## [15.0.1] - 2026-05-14
|
|
12
|
+
### Breaking Changes
|
|
13
|
+
|
|
14
|
+
- Increased the minimum Bun runtime version to `>=1.3.14` for the `@aws-?` package
|
|
15
|
+
|
|
16
|
+
### Added
|
|
17
|
+
|
|
18
|
+
- Added `installH2Fetch` to patch `globalThis.fetch` so HTTPS requests attempt HTTP/2 over ALPN with automatic HTTP/1.1 fallback when HTTP/2 is unsupported
|
|
19
|
+
- Added priority service-tier traffic to the `premiumRequests` accounting on OpenAI and OpenAI Codex providers. Sending `serviceTier: "priority"` now increments `usage.premiumRequests` by 1 per request, matching the existing GitHub Copilot premium-request budget semantics so downstream consumers (e.g. the `omp stats` "Premium Reqs" card and `/usage`) reflect priority traffic alongside Copilot premium calls.
|
|
20
|
+
|
|
5
21
|
## [15.0.0] - 2026-05-13
|
|
6
22
|
|
|
7
23
|
### Added
|
|
@@ -12,6 +28,11 @@
|
|
|
12
28
|
|
|
13
29
|
- Fixed OAuth credentials being silently disabled when two omp processes (or any two `AuthStorage` instances sharing a `agent.db`) race on token refresh. Anthropic rotates refresh tokens on every use, so the loser's `invalid_grant` response previously soft-deleted the row that the winner just rotated, forcing the user to `/login` again. `#tryOAuthCredential` now re-reads the row from disk before declaring a definitive failure: if the persisted `refresh` differs from the snapshot it tried, the peer-rotated credential is reloaded and the request retries against the fresh token instead of disabling the live row.
|
|
14
30
|
- Closed a remaining race window in OAuth refresh-failure handling: between re-reading the credential row to check for peer rotation and the subsequent soft-delete, another process could still complete a refresh and rotate the row, leaving us to disable the freshly-rotated credential by `id`. The disable now runs as a single CAS update conditioned on the row's `data` still matching the snapshot we tried to refresh, and on `disabled_cause IS NULL`. If the CAS reports 0 rows changed (peer rotation, or row already disabled by a concurrent failure on the same snapshot), we reload from disk and retry instead of mutating the wrong row or emitting a spurious `credential_disabled` event.
|
|
31
|
+
### Changed
|
|
32
|
+
- Lowered the default steady-state stream idle timeout from 120s to 30s while preserving the existing environment overrides.
|
|
33
|
+
|
|
34
|
+
### Fixed
|
|
35
|
+
- Lazy built-in provider streams now enforce the shared idle watchdog and abort stalled provider requests, so session auto-retry can continue after transient network drops instead of remaining stuck. Caller aborts still terminate as aborted.
|
|
15
36
|
|
|
16
37
|
## [14.9.3] - 2026-05-10
|
|
17
38
|
|
|
@@ -2344,4 +2365,4 @@ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
|
|
|
2344
2365
|
|
|
2345
2366
|
## [0.9.4] - 2025-11-26
|
|
2346
2367
|
|
|
2347
|
-
Initial release with multi-provider LLM support.
|
|
2368
|
+
Initial release with multi-provider LLM support.
|
package/package.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "15.0.
|
|
4
|
+
"version": "15.0.2",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
|
-
"homepage": "https://
|
|
6
|
+
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
8
8
|
"contributors": [
|
|
9
9
|
"Mario Zechner"
|
|
@@ -46,8 +46,8 @@
|
|
|
46
46
|
"@aws-sdk/credential-provider-node": "^3.972.39",
|
|
47
47
|
"@bufbuild/protobuf": "^2.12.0",
|
|
48
48
|
"@google/genai": "^1.52.0",
|
|
49
|
-
"@oh-my-pi/pi-natives": "15.0.
|
|
50
|
-
"@oh-my-pi/pi-utils": "15.0.
|
|
49
|
+
"@oh-my-pi/pi-natives": "15.0.2",
|
|
50
|
+
"@oh-my-pi/pi-utils": "15.0.2",
|
|
51
51
|
"@sinclair/typebox": "^0.34.49",
|
|
52
52
|
"@smithy/node-http-handler": "^4.6.1",
|
|
53
53
|
"ajv": "^8.20.0",
|
|
@@ -58,10 +58,10 @@
|
|
|
58
58
|
"zod": "4.4.3"
|
|
59
59
|
},
|
|
60
60
|
"devDependencies": {
|
|
61
|
-
"@types/bun": "^1.3.
|
|
61
|
+
"@types/bun": "^1.3.14"
|
|
62
62
|
},
|
|
63
63
|
"engines": {
|
|
64
|
-
"bun": ">=1.3.
|
|
64
|
+
"bun": ">=1.3.14"
|
|
65
65
|
},
|
|
66
66
|
"files": [
|
|
67
67
|
"src",
|
package/src/index.ts
CHANGED
|
@@ -37,6 +37,7 @@ export * from "./usage/zai";
|
|
|
37
37
|
export * from "./utils/anthropic-auth";
|
|
38
38
|
export * from "./utils/discovery";
|
|
39
39
|
export * from "./utils/event-stream";
|
|
40
|
+
export * from "./utils/h2-fetch";
|
|
40
41
|
export * from "./utils/overflow";
|
|
41
42
|
export * from "./utils/retry";
|
|
42
43
|
export * from "./utils/schema";
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { fetchWithRetry } from "@oh-my-pi/pi-utils";
|
|
2
2
|
import type { ModelManagerOptions } from "../model-manager";
|
|
3
3
|
import { Effort } from "../model-thinking";
|
|
4
4
|
import type { ThinkingConfig } from "../types";
|
|
@@ -19,16 +19,7 @@ type OllamaShowResponse = {
|
|
|
19
19
|
model_info?: Record<string, unknown>;
|
|
20
20
|
};
|
|
21
21
|
|
|
22
|
-
const
|
|
23
|
-
|
|
24
|
-
async function fetchWithRetry(url: string, init: RequestInit): Promise<Response> {
|
|
25
|
-
for (let attempt = 0; attempt < MODEL_RETRY_DELAYS_MS.length; attempt++) {
|
|
26
|
-
const response = await fetch(url, init);
|
|
27
|
-
if (response.ok || response.status < 500) return response;
|
|
28
|
-
await abortableSleep(MODEL_RETRY_DELAYS_MS[attempt]!);
|
|
29
|
-
}
|
|
30
|
-
return fetch(url, init);
|
|
31
|
-
}
|
|
22
|
+
const OLLAMA_RETRY_DELAYS_MS = [2_000, 5_000, 10_000];
|
|
32
23
|
|
|
33
24
|
function trimTrailingSlash(value: string): string {
|
|
34
25
|
return value.endsWith("/") ? value.slice(0, -1) : value;
|
|
@@ -109,6 +100,7 @@ export function ollamaCloudModelManagerOptions(
|
|
|
109
100
|
const response = await fetchWithRetry(`${baseUrl}/api/tags`, {
|
|
110
101
|
method: "GET",
|
|
111
102
|
headers: createCloudHeaders(apiKey),
|
|
103
|
+
defaultDelayMs: OLLAMA_RETRY_DELAYS_MS,
|
|
112
104
|
});
|
|
113
105
|
if (!response.ok) {
|
|
114
106
|
throw new Error(`HTTP ${response.status} from ${baseUrl}/api/tags`);
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import * as nodeCrypto from "node:crypto";
|
|
2
2
|
import * as fs from "node:fs";
|
|
3
|
+
import { scheduler } from "node:timers/promises";
|
|
3
4
|
import * as tls from "node:tls";
|
|
4
5
|
import Anthropic, { type ClientOptions as AnthropicSdkClientOptions } from "@anthropic-ai/sdk";
|
|
5
6
|
import type {
|
|
@@ -8,7 +9,14 @@ import type {
|
|
|
8
9
|
MessageParam,
|
|
9
10
|
RawMessageStreamEvent,
|
|
10
11
|
} from "@anthropic-ai/sdk/resources/messages";
|
|
11
|
-
import {
|
|
12
|
+
import {
|
|
13
|
+
$env,
|
|
14
|
+
extractHttpStatusFromError,
|
|
15
|
+
isEnoent,
|
|
16
|
+
isRetryableError,
|
|
17
|
+
isUnexpectedSocketCloseMessage,
|
|
18
|
+
readSseEvents,
|
|
19
|
+
} from "@oh-my-pi/pi-utils";
|
|
12
20
|
import { hasOpus47ApiRestrictions, mapEffortToAnthropicAdaptiveEffort } from "../model-thinking";
|
|
13
21
|
import { calculateCost } from "../models";
|
|
14
22
|
import { getEnvApiKey, OUTPUT_FALLBACK_BUFFER } from "../stream";
|
|
@@ -17,6 +25,7 @@ import type {
|
|
|
17
25
|
AssistantMessage,
|
|
18
26
|
CacheRetention,
|
|
19
27
|
Context,
|
|
28
|
+
FetchImpl,
|
|
20
29
|
ImageContent,
|
|
21
30
|
Message,
|
|
22
31
|
Model,
|
|
@@ -48,12 +57,7 @@ import { getStreamFirstEventTimeoutMs, getStreamIdleTimeoutMs, iterateWithIdleTi
|
|
|
48
57
|
import { parseJsonWithRepair, parseStreamingJson } from "../utils/json-parse";
|
|
49
58
|
import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
|
|
50
59
|
import { notifyProviderResponse } from "../utils/provider-response";
|
|
51
|
-
import {
|
|
52
|
-
extractHttpStatusFromError,
|
|
53
|
-
isCopilotRetryableError,
|
|
54
|
-
isRetryableError,
|
|
55
|
-
isUnexpectedSocketCloseMessage,
|
|
56
|
-
} from "../utils/retry";
|
|
60
|
+
import { isCopilotTransientModelError } from "../utils/retry";
|
|
57
61
|
import { COMBINATOR_KEYS, NO_STRICT } from "../utils/schema";
|
|
58
62
|
import { notifyRawSseEvent, wrapFetchForSseDebug } from "../utils/sse-debug";
|
|
59
63
|
import {
|
|
@@ -538,6 +542,7 @@ export type AnthropicClientOptionsArgs = {
|
|
|
538
542
|
isOAuth?: boolean;
|
|
539
543
|
hasTools?: boolean;
|
|
540
544
|
onSseEvent?: AnthropicOptions["onSseEvent"];
|
|
545
|
+
fetch?: FetchImpl;
|
|
541
546
|
};
|
|
542
547
|
|
|
543
548
|
export type AnthropicClientOptionsResult = {
|
|
@@ -844,7 +849,7 @@ function isProviderRetryableStreamEnvelopeError(error: unknown): boolean {
|
|
|
844
849
|
|
|
845
850
|
export function isProviderRetryableError(error: unknown, provider?: string): boolean {
|
|
846
851
|
if (!(error instanceof Error)) return false;
|
|
847
|
-
if (provider === "github-copilot" &&
|
|
852
|
+
if (provider === "github-copilot" && isCopilotTransientModelError(error)) return true;
|
|
848
853
|
const msg = error.message.toLowerCase();
|
|
849
854
|
if (
|
|
850
855
|
isUnexpectedSocketCloseMessage(msg) ||
|
|
@@ -962,6 +967,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
962
967
|
isOAuth: options?.isOAuth,
|
|
963
968
|
hasTools: !!context.tools?.length,
|
|
964
969
|
onSseEvent: options?.onSseEvent,
|
|
970
|
+
fetch: options?.fetch,
|
|
965
971
|
});
|
|
966
972
|
client = created.client;
|
|
967
973
|
isOAuthToken = created.isOAuthToken;
|
|
@@ -1287,7 +1293,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1287
1293
|
}
|
|
1288
1294
|
providerRetryAttempt++;
|
|
1289
1295
|
const delayMs = PROVIDER_BASE_DELAY_MS * 2 ** (providerRetryAttempt - 1);
|
|
1290
|
-
await
|
|
1296
|
+
await scheduler.wait(delayMs, { signal: options?.signal });
|
|
1291
1297
|
output.content.length = 0;
|
|
1292
1298
|
output.responseId = undefined;
|
|
1293
1299
|
output.errorMessage = strictFallbackErrorMessage;
|
|
@@ -1402,7 +1408,12 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
|
|
|
1402
1408
|
const baseUrl = resolveAnthropicBaseUrl(model, apiKey);
|
|
1403
1409
|
const foundryCustomHeaders = resolveAnthropicCustomHeaders(model);
|
|
1404
1410
|
const tlsFetchOptions = buildClaudeCodeTlsFetchOptions(model, baseUrl);
|
|
1405
|
-
const
|
|
1411
|
+
const baseFetch = args.fetch ?? fetch;
|
|
1412
|
+
const debugFetch = onSseEvent
|
|
1413
|
+
? wrapFetchForSseDebug(baseFetch, event => onSseEvent(event, model))
|
|
1414
|
+
: args.fetch
|
|
1415
|
+
? baseFetch
|
|
1416
|
+
: undefined;
|
|
1406
1417
|
if (model.provider === "github-copilot") {
|
|
1407
1418
|
const copilotApiKey = parseGitHubCopilotApiKey(apiKey).accessToken;
|
|
1408
1419
|
const betaFeatures = [...extraBetas];
|
|
@@ -6,17 +6,15 @@ import type {
|
|
|
6
6
|
ResponseInput,
|
|
7
7
|
} from "openai/resources/responses/responses";
|
|
8
8
|
import { getEnvApiKey } from "../stream";
|
|
9
|
-
import {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
type Tool,
|
|
19
|
-
type ToolChoice,
|
|
9
|
+
import type {
|
|
10
|
+
AssistantMessage,
|
|
11
|
+
Context,
|
|
12
|
+
Model,
|
|
13
|
+
ServiceTier,
|
|
14
|
+
StreamFunction,
|
|
15
|
+
StreamOptions,
|
|
16
|
+
Tool,
|
|
17
|
+
ToolChoice,
|
|
20
18
|
} from "../types";
|
|
21
19
|
import { normalizeSystemPrompts } from "../utils";
|
|
22
20
|
import { createAbortSourceTracker } from "../utils/abort";
|
|
@@ -33,8 +31,11 @@ import { mapToOpenAIResponsesToolChoice } from "../utils/tool-choice";
|
|
|
33
31
|
import { normalizeOpenAIResponsesPromptCacheKey, supportsDeveloperRole } from "./openai-responses";
|
|
34
32
|
import {
|
|
35
33
|
appendResponsesToolResultMessages,
|
|
34
|
+
applyCommonResponsesSamplingParams,
|
|
35
|
+
applyResponsesReasoningParams,
|
|
36
36
|
convertResponsesAssistantMessage,
|
|
37
37
|
convertResponsesInputContent,
|
|
38
|
+
createInitialResponsesAssistantMessage,
|
|
38
39
|
normalizeResponsesToolCallIdForTransform,
|
|
39
40
|
processResponsesStream,
|
|
40
41
|
} from "./openai-responses-shared";
|
|
@@ -101,23 +102,11 @@ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"
|
|
|
101
102
|
let firstTokenTime: number | undefined;
|
|
102
103
|
const deploymentName = resolveDeploymentName(model, options);
|
|
103
104
|
|
|
104
|
-
const output: AssistantMessage =
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
model: model.id,
|
|
110
|
-
usage: {
|
|
111
|
-
input: 0,
|
|
112
|
-
output: 0,
|
|
113
|
-
cacheRead: 0,
|
|
114
|
-
cacheWrite: 0,
|
|
115
|
-
totalTokens: 0,
|
|
116
|
-
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
117
|
-
},
|
|
118
|
-
stopReason: "stop",
|
|
119
|
-
timestamp: Date.now(),
|
|
120
|
-
};
|
|
105
|
+
const output: AssistantMessage = createInitialResponsesAssistantMessage(
|
|
106
|
+
"azure-openai-responses",
|
|
107
|
+
model.provider,
|
|
108
|
+
model.id,
|
|
109
|
+
);
|
|
121
110
|
let rawRequestDump: RawHttpRequestDump | undefined;
|
|
122
111
|
const abortTracker = createAbortSourceTracker(options?.signal);
|
|
123
112
|
const firstEventTimeoutAbortError = new Error(AZURE_OPENAI_RESPONSES_FIRST_EVENT_TIMEOUT_MESSAGE);
|
|
@@ -252,6 +241,7 @@ function createClient(model: Model<"azure-openai-responses">, apiKey: string, op
|
|
|
252
241
|
|
|
253
242
|
const { baseUrl, apiVersion } = resolveAzureConfig(model, options);
|
|
254
243
|
|
|
244
|
+
const baseFetch = options?.fetch ?? fetch;
|
|
255
245
|
return new AzureOpenAI({
|
|
256
246
|
apiKey,
|
|
257
247
|
apiVersion,
|
|
@@ -259,7 +249,9 @@ function createClient(model: Model<"azure-openai-responses">, apiKey: string, op
|
|
|
259
249
|
maxRetries: 5,
|
|
260
250
|
defaultHeaders: headers,
|
|
261
251
|
baseURL: baseUrl,
|
|
262
|
-
fetch: options?.onSseEvent
|
|
252
|
+
fetch: options?.onSseEvent
|
|
253
|
+
? wrapFetchForSseDebug(baseFetch, event => options.onSseEvent?.(event, model))
|
|
254
|
+
: baseFetch,
|
|
263
255
|
});
|
|
264
256
|
}
|
|
265
257
|
|
|
@@ -279,31 +271,7 @@ function buildParams(
|
|
|
279
271
|
prompt_cache_key: normalizeOpenAIResponsesPromptCacheKey(options?.sessionId),
|
|
280
272
|
};
|
|
281
273
|
|
|
282
|
-
|
|
283
|
-
params.max_output_tokens = options?.maxTokens;
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
if (options?.temperature !== undefined) {
|
|
287
|
-
params.temperature = options?.temperature;
|
|
288
|
-
}
|
|
289
|
-
if (options?.topP !== undefined) {
|
|
290
|
-
params.top_p = options.topP;
|
|
291
|
-
}
|
|
292
|
-
if (options?.topK !== undefined) {
|
|
293
|
-
params.top_k = options.topK;
|
|
294
|
-
}
|
|
295
|
-
if (options?.minP !== undefined) {
|
|
296
|
-
params.min_p = options.minP;
|
|
297
|
-
}
|
|
298
|
-
if (options?.presencePenalty !== undefined) {
|
|
299
|
-
params.presence_penalty = options.presencePenalty;
|
|
300
|
-
}
|
|
301
|
-
if (options?.repetitionPenalty !== undefined) {
|
|
302
|
-
params.repetition_penalty = options.repetitionPenalty;
|
|
303
|
-
}
|
|
304
|
-
if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
|
|
305
|
-
params.service_tier = options.serviceTier;
|
|
306
|
-
}
|
|
274
|
+
applyCommonResponsesSamplingParams(params, options, model.provider);
|
|
307
275
|
|
|
308
276
|
if (context.tools) {
|
|
309
277
|
params.tools = convertTools(context.tools);
|
|
@@ -312,36 +280,7 @@ function buildParams(
|
|
|
312
280
|
}
|
|
313
281
|
}
|
|
314
282
|
|
|
315
|
-
|
|
316
|
-
// Always request encrypted reasoning content so reasoning items can be
|
|
317
|
-
// replayed in multi-turn conversations when store is false (items aren't
|
|
318
|
-
// persisted server-side, so we must include the full content).
|
|
319
|
-
// See: https://github.com/can1357/oh-my-pi/issues/41
|
|
320
|
-
params.include = ["reasoning.encrypted_content"];
|
|
321
|
-
|
|
322
|
-
if (options?.reasoning || options?.reasoningSummary !== undefined) {
|
|
323
|
-
const reasoningParams: NonNullable<typeof params.reasoning> = {
|
|
324
|
-
effort: options?.reasoning || "medium",
|
|
325
|
-
};
|
|
326
|
-
if (options?.reasoningSummary !== null) {
|
|
327
|
-
reasoningParams.summary = options?.reasoningSummary || "auto";
|
|
328
|
-
}
|
|
329
|
-
params.reasoning = reasoningParams;
|
|
330
|
-
} else {
|
|
331
|
-
if (model.name.toLowerCase().startsWith("gpt-5")) {
|
|
332
|
-
// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
|
|
333
|
-
messages.push({
|
|
334
|
-
role: "developer",
|
|
335
|
-
content: [
|
|
336
|
-
{
|
|
337
|
-
type: "input_text",
|
|
338
|
-
text: "# Juice: 0 !important",
|
|
339
|
-
},
|
|
340
|
-
],
|
|
341
|
-
});
|
|
342
|
-
}
|
|
343
|
-
}
|
|
344
|
-
}
|
|
283
|
+
applyResponsesReasoningParams(params, model, options, messages);
|
|
345
284
|
|
|
346
285
|
return params;
|
|
347
286
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { ANTHROPIC_THINKING, mapAnthropicToolChoice } from "../stream";
|
|
2
|
-
import type { Api, Context, Model, SimpleStreamOptions } from "../types";
|
|
2
|
+
import type { Api, Context, FetchImpl, Model, SimpleStreamOptions } from "../types";
|
|
3
3
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
4
4
|
import type { OpenAICompletionsOptions } from "./openai-completions";
|
|
5
5
|
import type { OpenAIResponsesOptions } from "./openai-responses";
|
|
@@ -172,13 +172,16 @@ interface DirectAccessToken {
|
|
|
172
172
|
|
|
173
173
|
const directAccessCache = new Map<string, DirectAccessToken>();
|
|
174
174
|
|
|
175
|
-
async function getDirectAccessToken(
|
|
175
|
+
async function getDirectAccessToken(
|
|
176
|
+
gitlabAccessToken: string,
|
|
177
|
+
fetchImpl: FetchImpl = fetch,
|
|
178
|
+
): Promise<DirectAccessToken> {
|
|
176
179
|
const cached = directAccessCache.get(gitlabAccessToken);
|
|
177
180
|
if (cached && cached.expiresAt > Date.now()) {
|
|
178
181
|
return cached;
|
|
179
182
|
}
|
|
180
183
|
|
|
181
|
-
const response = await
|
|
184
|
+
const response = await fetchImpl(`${GITLAB_COM_URL}/api/v4/ai/third_party_agents/direct_access`, {
|
|
182
185
|
method: "POST",
|
|
183
186
|
headers: {
|
|
184
187
|
Authorization: `Bearer ${gitlabAccessToken}`,
|
|
@@ -240,7 +243,7 @@ export function streamGitLabDuo(
|
|
|
240
243
|
throw new Error(`Unsupported GitLab Duo model: ${model.id}`);
|
|
241
244
|
}
|
|
242
245
|
|
|
243
|
-
const directAccess = await getDirectAccessToken(options.apiKey);
|
|
246
|
+
const directAccess = await getDirectAccessToken(options.apiKey, options.fetch);
|
|
244
247
|
const headers = {
|
|
245
248
|
...directAccess.headers,
|
|
246
249
|
...options.headers,
|
|
@@ -278,6 +281,7 @@ export function streamGitLabDuo(
|
|
|
278
281
|
onPayload: options.onPayload,
|
|
279
282
|
onResponse: options.onResponse,
|
|
280
283
|
onSseEvent: options.onSseEvent,
|
|
284
|
+
fetch: options.fetch,
|
|
281
285
|
thinkingEnabled: Boolean(reasoningEffort) && model.reasoning,
|
|
282
286
|
thinkingBudgetTokens: reasoningEffort
|
|
283
287
|
? (options.thinkingBudgets?.[reasoningEffort] ?? ANTHROPIC_THINKING[reasoningEffort])
|
|
@@ -314,6 +318,7 @@ export function streamGitLabDuo(
|
|
|
314
318
|
onPayload: options.onPayload,
|
|
315
319
|
onResponse: options.onResponse,
|
|
316
320
|
onSseEvent: options.onSseEvent,
|
|
321
|
+
fetch: options.fetch,
|
|
317
322
|
reasoning: reasoningEffort,
|
|
318
323
|
toolChoice: options.toolChoice,
|
|
319
324
|
} satisfies OpenAIResponsesOptions,
|
|
@@ -345,6 +350,7 @@ export function streamGitLabDuo(
|
|
|
345
350
|
onPayload: options.onPayload,
|
|
346
351
|
onResponse: options.onResponse,
|
|
347
352
|
onSseEvent: options.onSseEvent,
|
|
353
|
+
fetch: options.fetch,
|
|
348
354
|
reasoning: reasoningEffort,
|
|
349
355
|
toolChoice: options.toolChoice,
|
|
350
356
|
} satisfies OpenAICompletionsOptions,
|