@oh-my-pi/pi-ai 15.1.6 → 15.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/dist/types/providers/anthropic.d.ts +20 -1
- package/dist/types/types.d.ts +46 -9
- package/package.json +2 -2
- package/src/providers/anthropic.ts +109 -1
- package/src/providers/openai-codex-responses.ts +4 -3
- package/src/providers/openai-completions.ts +4 -3
- package/src/providers/openai-responses-shared.ts +4 -3
- package/src/stream.ts +6 -0
- package/src/types.ts +73 -16
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,16 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [15.1.7] - 2026-05-19
|
|
6
|
+
### Added
|
|
7
|
+
|
|
8
|
+
- Added Anthropic realization of `serviceTier: "priority"`. The anthropic-messages provider now sets `speed: "fast"` on the request and appends the `fast-mode-2026-02-01` beta to `Anthropic-Beta` whenever the caller passes `serviceTier: "priority"`. When the server rejects an unsupported model with `invalid_request_error`, the provider transparently retries the same turn without the fast-mode signal (mirroring the strict-tools fallback pattern), persists the disable via a new `providerSessionState.fastModeDisabled` flag so subsequent requests in the session skip the field, and surfaces the action via the new `AssistantMessage.disabledFeatures` array (id `"priority"`) so callers can sync user-facing toggles. A new `clearAnthropicFastModeFallback(providerSessionState)` helper lets callers re-arm priority after the auto-fallback fired.
|
|
9
|
+
- Added scoped `ServiceTier` values: `"openai-only"` (priority on `openai`/`openai-codex`, ignored elsewhere) and `"claude-only"` (priority on direct `anthropic`, ignored on Bedrock/Vertex Claude and elsewhere). A new `resolveServiceTier(serviceTier, provider)` helper computes the effective tier for the provider; existing OpenAI/Anthropic provider code routes through it, so `service_tier` and Anthropic fast-mode emission both respect scope. `getPriorityPremiumRequests` now counts Anthropic+priority as one premium request (previously zero) and continues to ignore providers that drop the field on the wire.
|
|
10
|
+
|
|
11
|
+
### Fixed
|
|
12
|
+
|
|
13
|
+
- Fixed Anthropic fast mode (`serviceTier: "priority"`) looping on 429 `rate_limit_error: "Extra usage is required for fast mode."` for accounts without the extra-usage entitlement. `isAnthropicFastModeUnsupportedError` now matches the 429 phrasing in addition to the 400 `invalid_request_error` "does not support the `speed` parameter" case, so the provider drops `speed: "fast"` on the in-turn retry, sets `providerSessionState.fastModeDisabled` for the remainder of the session, and surfaces `disabledFeatures: ["priority"]` to the caller instead of retrying with the same payload until `PROVIDER_MAX_RETRIES` is exhausted.
|
|
14
|
+
|
|
5
15
|
## [15.1.6] - 2026-05-19
|
|
6
16
|
|
|
7
17
|
### Fixed
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import Anthropic, { type ClientOptions as AnthropicSdkClientOptions } from "@anthropic-ai/sdk";
|
|
2
2
|
import type { MessageParam } from "@anthropic-ai/sdk/resources/messages";
|
|
3
|
-
import type { FetchImpl, Message, Model, SimpleStreamOptions, StreamFunction, StreamOptions, Usage } from "../types";
|
|
3
|
+
import type { FetchImpl, Message, Model, ProviderSessionState, ServiceTier, SimpleStreamOptions, StreamFunction, StreamOptions, Usage } from "../types";
|
|
4
4
|
export type AnthropicHeaderOptions = {
|
|
5
5
|
apiKey: string;
|
|
6
6
|
baseUrl?: string;
|
|
@@ -17,6 +17,15 @@ type AnthropicCacheControl = {
|
|
|
17
17
|
type: "ephemeral";
|
|
18
18
|
ttl?: "1h" | "5m";
|
|
19
19
|
};
|
|
20
|
+
/**
|
|
21
|
+
* Clears the in-session "server rejected fast mode" sticky flag. Call when the
|
|
22
|
+
* caller is explicitly re-arming `serviceTier: "priority"` (e.g. user toggled
|
|
23
|
+
* `/fast on` after a previous turn auto-disabled it) so the next request
|
|
24
|
+
* actually carries `speed: "fast"` again. No-op when the map or state entry
|
|
25
|
+
* hasn't been materialized yet.
|
|
26
|
+
*/
|
|
27
|
+
export declare function clearAnthropicFastModeFallback(providerSessionState: Map<string, ProviderSessionState> | undefined): void;
|
|
28
|
+
export declare function isAnthropicFastModeUnsupportedError(error: unknown): boolean;
|
|
20
29
|
export declare const claudeCodeVersion = "2.1.63";
|
|
21
30
|
export declare const claudeToolPrefix: string;
|
|
22
31
|
export declare const claudeCodeSystemInstruction = "You are a Claude agent, built on Anthropic's Claude Agent SDK.";
|
|
@@ -77,6 +86,16 @@ export interface AnthropicOptions extends StreamOptions {
|
|
|
77
86
|
name: string;
|
|
78
87
|
};
|
|
79
88
|
betas?: string[] | string;
|
|
89
|
+
/**
|
|
90
|
+
* Realization of `serviceTier: "priority"` on Anthropic models. When
|
|
91
|
+
* `"priority"`, sets `speed: "fast"` on the request and appends the
|
|
92
|
+
* `fast-mode-2026-02-01` beta header. Anthropic rejects unsupported models
|
|
93
|
+
* with `invalid_request_error`, which triggers an in-provider one-shot
|
|
94
|
+
* fallback (see `fastModeDisabled` provider state).
|
|
95
|
+
*
|
|
96
|
+
* Other `ServiceTier` values are currently ignored on this provider.
|
|
97
|
+
*/
|
|
98
|
+
serviceTier?: ServiceTier;
|
|
80
99
|
/** Force OAuth bearer auth mode for proxy tokens that don't match Anthropic token prefixes. */
|
|
81
100
|
isOAuth?: boolean;
|
|
82
101
|
/**
|
package/dist/types/types.d.ts
CHANGED
|
@@ -69,18 +69,47 @@ export type ToolChoice = "auto" | "none" | "any" | "required" | {
|
|
|
69
69
|
name: string;
|
|
70
70
|
};
|
|
71
71
|
export type CacheRetention = "none" | "short" | "long";
|
|
72
|
-
/** OpenAI service tier for processing priority. Only applies to OpenAI-compatible APIs. */
|
|
73
|
-
export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority";
|
|
74
|
-
export declare function shouldSendServiceTier(serviceTier?: ServiceTier | null, provider?: Provider): serviceTier is "flex" | "scale" | "priority";
|
|
75
72
|
/**
|
|
76
|
-
*
|
|
77
|
-
* a provider that supports it. Mirrors GitHub Copilot's `premiumRequests`
|
|
78
|
-
* accounting so the "premium requests" stat aggregates priority traffic too.
|
|
73
|
+
* Service tier hint for processing priority / cost control.
|
|
79
74
|
*
|
|
80
|
-
*
|
|
81
|
-
* `
|
|
75
|
+
* The unscoped values (`"auto"`, `"default"`, `"flex"`, `"scale"`,
|
|
76
|
+
* `"priority"`) are passed through to providers that understand them
|
|
77
|
+
* (OpenAI's `service_tier` field directly; Anthropic translates
|
|
78
|
+
* `"priority"` into `speed: "fast"` on supported Opus models).
|
|
79
|
+
*
|
|
80
|
+
* The scoped values target a specific provider family and behave as the
|
|
81
|
+
* unscoped value on the matching provider, or `undefined` everywhere else.
|
|
82
|
+
* They let users opt into priority on one family without paying premium
|
|
83
|
+
* costs on the other when switching models mid-session.
|
|
84
|
+
*
|
|
85
|
+
* - `"openai-only"` → `"priority"` on `openai` and `openai-codex`; ignored elsewhere.
|
|
86
|
+
* - `"claude-only"` → `"priority"` on direct `anthropic` (not Bedrock/Vertex Claude).
|
|
87
|
+
*/
|
|
88
|
+
export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority" | "openai-only" | "claude-only";
|
|
89
|
+
/** Resolved tier — one of the values that providers actually consume on the wire. */
|
|
90
|
+
export type ResolvedServiceTier = Exclude<ServiceTier, "openai-only" | "claude-only">;
|
|
91
|
+
/**
|
|
92
|
+
* Resolves a possibly scoped `ServiceTier` to the effective tier for the
|
|
93
|
+
* given provider. Scoped values match their target family and otherwise
|
|
94
|
+
* collapse to `undefined`; unscoped values pass through unchanged.
|
|
95
|
+
*/
|
|
96
|
+
export declare function resolveServiceTier(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): ResolvedServiceTier | undefined;
|
|
97
|
+
/**
|
|
98
|
+
* True when the (possibly scoped) tier should be sent as OpenAI's
|
|
99
|
+
* `service_tier` request field for the given provider. Non-OpenAI
|
|
100
|
+
* providers, unsupported tiers (`"auto"`, `"default"`), and scope
|
|
101
|
+
* mismatches all return false.
|
|
82
102
|
*/
|
|
83
|
-
export declare function
|
|
103
|
+
export declare function shouldSendServiceTier(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): boolean;
|
|
104
|
+
/**
|
|
105
|
+
* Premium-request weight contributed by sending priority to a provider
|
|
106
|
+
* that supports it. Mirrors GitHub Copilot's `premiumRequests` accounting
|
|
107
|
+
* so the "premium requests" stat aggregates priority traffic across the
|
|
108
|
+
* OpenAI family and Anthropic fast-mode realizations.
|
|
109
|
+
*
|
|
110
|
+
* Returns 1 per resolved priority request, 0 otherwise.
|
|
111
|
+
*/
|
|
112
|
+
export declare function getPriorityPremiumRequests(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): number;
|
|
84
113
|
export interface ProviderSessionState {
|
|
85
114
|
close(): void;
|
|
86
115
|
}
|
|
@@ -371,6 +400,14 @@ export interface AssistantMessage {
|
|
|
371
400
|
errorMessage?: string;
|
|
372
401
|
/** HTTP status surfaced by the provider when the request failed. Populated by every provider's catch block alongside `errorMessage` so consumers (auth retry, telemetry, UI) can branch without regex-scraping the message. */
|
|
373
402
|
errorStatus?: number;
|
|
403
|
+
/**
|
|
404
|
+
* Stable identifiers for request features the provider silently dropped
|
|
405
|
+
* during this turn (e.g. `"priority"`). Set when a server-side rejection
|
|
406
|
+
* triggered an in-provider fallback retry that succeeded without the
|
|
407
|
+
* feature. Callers can use this to sync user-facing toggles back to the
|
|
408
|
+
* server's actual state.
|
|
409
|
+
*/
|
|
410
|
+
disabledFeatures?: string[];
|
|
374
411
|
/** Provider-specific opaque payload used to reconstruct transport-native history. */
|
|
375
412
|
providerPayload?: ProviderPayload;
|
|
376
413
|
timestamp: number;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "15.1.
|
|
4
|
+
"version": "15.1.7",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -43,7 +43,7 @@
|
|
|
43
43
|
"dependencies": {
|
|
44
44
|
"@anthropic-ai/sdk": "^0.94.0",
|
|
45
45
|
"@bufbuild/protobuf": "^2.12.0",
|
|
46
|
-
"@oh-my-pi/pi-utils": "15.1.
|
|
46
|
+
"@oh-my-pi/pi-utils": "15.1.7",
|
|
47
47
|
"openai": "^6.36.0",
|
|
48
48
|
"partial-json": "^0.1.7",
|
|
49
49
|
"zod": "4.4.3"
|
|
@@ -32,6 +32,7 @@ import type {
|
|
|
32
32
|
Model,
|
|
33
33
|
ProviderSessionState,
|
|
34
34
|
RedactedThinkingContent,
|
|
35
|
+
ServiceTier,
|
|
35
36
|
SimpleStreamOptions,
|
|
36
37
|
StopReason,
|
|
37
38
|
StreamFunction,
|
|
@@ -43,6 +44,7 @@ import type {
|
|
|
43
44
|
ToolResultMessage,
|
|
44
45
|
Usage,
|
|
45
46
|
} from "../types";
|
|
47
|
+
import { resolveServiceTier } from "../types";
|
|
46
48
|
import {
|
|
47
49
|
isAnthropicOAuthToken,
|
|
48
50
|
isRecord,
|
|
@@ -111,6 +113,7 @@ const claudeCodeBetaDefaults = [
|
|
|
111
113
|
];
|
|
112
114
|
const fineGrainedToolStreamingBeta = "fine-grained-tool-streaming-2025-05-14";
|
|
113
115
|
const interleavedThinkingBeta = "interleaved-thinking-2025-05-14";
|
|
116
|
+
const fastModeBeta = "fast-mode-2026-02-01";
|
|
114
117
|
|
|
115
118
|
function getHeaderCaseInsensitive(headers: Record<string, string> | undefined, headerName: string): string | undefined {
|
|
116
119
|
if (!headers) return undefined;
|
|
@@ -224,13 +227,16 @@ const ANTHROPIC_PROVIDER_SESSION_STATE_KEY = "anthropic-messages";
|
|
|
224
227
|
|
|
225
228
|
type AnthropicProviderSessionState = ProviderSessionState & {
|
|
226
229
|
strictToolsDisabled: boolean;
|
|
230
|
+
fastModeDisabled: boolean;
|
|
227
231
|
};
|
|
228
232
|
|
|
229
233
|
function createAnthropicProviderSessionState(): AnthropicProviderSessionState {
|
|
230
234
|
const state: AnthropicProviderSessionState = {
|
|
231
235
|
strictToolsDisabled: false,
|
|
236
|
+
fastModeDisabled: false,
|
|
232
237
|
close: () => {
|
|
233
238
|
state.strictToolsDisabled = false;
|
|
239
|
+
state.fastModeDisabled = false;
|
|
234
240
|
},
|
|
235
241
|
};
|
|
236
242
|
return state;
|
|
@@ -249,6 +255,23 @@ function getAnthropicProviderSessionState(
|
|
|
249
255
|
return created;
|
|
250
256
|
}
|
|
251
257
|
|
|
258
|
+
/**
|
|
259
|
+
* Clears the in-session "server rejected fast mode" sticky flag. Call when the
|
|
260
|
+
* caller is explicitly re-arming `serviceTier: "priority"` (e.g. user toggled
|
|
261
|
+
* `/fast on` after a previous turn auto-disabled it) so the next request
|
|
262
|
+
* actually carries `speed: "fast"` again. No-op when the map or state entry
|
|
263
|
+
* hasn't been materialized yet.
|
|
264
|
+
*/
|
|
265
|
+
export function clearAnthropicFastModeFallback(
|
|
266
|
+
providerSessionState: Map<string, ProviderSessionState> | undefined,
|
|
267
|
+
): void {
|
|
268
|
+
if (!providerSessionState) return;
|
|
269
|
+
const state = providerSessionState.get(ANTHROPIC_PROVIDER_SESSION_STATE_KEY) as
|
|
270
|
+
| AnthropicProviderSessionState
|
|
271
|
+
| undefined;
|
|
272
|
+
if (state) state.fastModeDisabled = false;
|
|
273
|
+
}
|
|
274
|
+
|
|
252
275
|
function isAnthropicStrictGrammarTooLargeError(error: unknown): boolean {
|
|
253
276
|
if (extractHttpStatusFromError(error) !== 400) return false;
|
|
254
277
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -258,11 +281,45 @@ function isAnthropicStrictGrammarTooLargeError(error: unknown): boolean {
|
|
|
258
281
|
return /invalid_request_error/i.test(message) && (isStrictGrammarTooLarge || isSchemaCompilationTooComplex);
|
|
259
282
|
}
|
|
260
283
|
|
|
284
|
+
export function isAnthropicFastModeUnsupportedError(error: unknown): boolean {
|
|
285
|
+
const status = extractHttpStatusFromError(error);
|
|
286
|
+
if (status !== 400 && status !== 429) return false;
|
|
287
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
288
|
+
// 400 invalid_request_error — model doesn't accept `speed` at all.
|
|
289
|
+
// Observed: "'claude-opus-4-5-20251101' does not support the `speed` parameter."
|
|
290
|
+
// Stay tolerant of phrasing drift ("is not supported", quoted vs backticked field).
|
|
291
|
+
if (
|
|
292
|
+
status === 400 &&
|
|
293
|
+
/invalid_request_error/i.test(message) &&
|
|
294
|
+
/\bspeed\b/i.test(message) &&
|
|
295
|
+
/not support/i.test(message)
|
|
296
|
+
) {
|
|
297
|
+
return true;
|
|
298
|
+
}
|
|
299
|
+
// 429 rate_limit_error — account lacks the extra-usage entitlement fast mode requires.
|
|
300
|
+
// Observed: "Extra usage is required for fast mode."
|
|
301
|
+
if (status === 429 && /rate_limit_error/i.test(message) && /fast mode/i.test(message)) {
|
|
302
|
+
return true;
|
|
303
|
+
}
|
|
304
|
+
return false;
|
|
305
|
+
}
|
|
306
|
+
|
|
261
307
|
function hasStrictAnthropicTools(params: MessageCreateParamsStreaming): boolean {
|
|
262
308
|
const tools = params.tools as Array<{ strict?: unknown }> | undefined;
|
|
263
309
|
return tools?.some(tool => tool.strict === true) ?? false;
|
|
264
310
|
}
|
|
265
311
|
|
|
312
|
+
/**
|
|
313
|
+
* `speed` lives on `BetaMessageCreateParams` (client.beta.messages) but this
|
|
314
|
+
* provider posts via `client.messages.create`, whose param type doesn't
|
|
315
|
+
* include it. This alias narrows the cast to one place.
|
|
316
|
+
*/
|
|
317
|
+
type ParamsWithSpeed = MessageCreateParamsStreaming & { speed?: "fast" };
|
|
318
|
+
|
|
319
|
+
function dropAnthropicFastMode(params: MessageCreateParamsStreaming): void {
|
|
320
|
+
delete (params as ParamsWithSpeed).speed;
|
|
321
|
+
}
|
|
322
|
+
|
|
266
323
|
function dropAnthropicStrictTools(params: MessageCreateParamsStreaming): void {
|
|
267
324
|
const tools = params.tools as Array<{ strict?: unknown }> | undefined;
|
|
268
325
|
if (!tools) return;
|
|
@@ -526,6 +583,16 @@ export interface AnthropicOptions extends StreamOptions {
|
|
|
526
583
|
interleavedThinking?: boolean;
|
|
527
584
|
toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
|
|
528
585
|
betas?: string[] | string;
|
|
586
|
+
/**
|
|
587
|
+
* Realization of `serviceTier: "priority"` on Anthropic models. When
|
|
588
|
+
* `"priority"`, sets `speed: "fast"` on the request and appends the
|
|
589
|
+
* `fast-mode-2026-02-01` beta header. Anthropic rejects unsupported models
|
|
590
|
+
* with `invalid_request_error`, which triggers an in-provider one-shot
|
|
591
|
+
* fallback (see `fastModeDisabled` provider state).
|
|
592
|
+
*
|
|
593
|
+
* Other `ServiceTier` values are currently ignored on this provider.
|
|
594
|
+
*/
|
|
595
|
+
serviceTier?: ServiceTier;
|
|
529
596
|
/** Force OAuth bearer auth mode for proxy tokens that don't match Anthropic token prefixes. */
|
|
530
597
|
isOAuth?: boolean;
|
|
531
598
|
/**
|
|
@@ -961,10 +1028,16 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
961
1028
|
} else {
|
|
962
1029
|
const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
|
|
963
1030
|
|
|
1031
|
+
const extraBetas = normalizeExtraBetas(options?.betas);
|
|
1032
|
+
const wantsAnthropicPriority = resolveServiceTier(options?.serviceTier, model.provider) === "priority";
|
|
1033
|
+
if (wantsAnthropicPriority && !extraBetas.includes(fastModeBeta)) {
|
|
1034
|
+
extraBetas.push(fastModeBeta);
|
|
1035
|
+
}
|
|
1036
|
+
|
|
964
1037
|
const created = createClient(model, {
|
|
965
1038
|
model,
|
|
966
1039
|
apiKey,
|
|
967
|
-
extraBetas
|
|
1040
|
+
extraBetas,
|
|
968
1041
|
stream: true,
|
|
969
1042
|
interleavedThinking: options?.interleavedThinking ?? true,
|
|
970
1043
|
headers: options?.headers,
|
|
@@ -984,6 +1057,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
984
1057
|
let disableStrictTools =
|
|
985
1058
|
(providerSessionState?.strictToolsDisabled ?? false) || (model.compat?.disableStrictTools ?? false);
|
|
986
1059
|
let strictFallbackErrorMessage: string | undefined;
|
|
1060
|
+
let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
|
|
987
1061
|
const prepareParams = async (): Promise<MessageCreateParamsStreaming> => {
|
|
988
1062
|
let nextParams = buildParams(model, baseUrl, context, isOAuthToken, options, disableStrictTools);
|
|
989
1063
|
const replacementPayload = await options?.onPayload?.(nextParams, model);
|
|
@@ -993,6 +1067,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
993
1067
|
if (disableStrictTools) {
|
|
994
1068
|
dropAnthropicStrictTools(nextParams);
|
|
995
1069
|
}
|
|
1070
|
+
if (dropFastMode) {
|
|
1071
|
+
dropAnthropicFastMode(nextParams);
|
|
1072
|
+
}
|
|
996
1073
|
rawRequestDump = {
|
|
997
1074
|
provider: model.provider,
|
|
998
1075
|
api: output.api,
|
|
@@ -1284,6 +1361,30 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1284
1361
|
firstTokenTime = undefined;
|
|
1285
1362
|
continue;
|
|
1286
1363
|
}
|
|
1364
|
+
if (
|
|
1365
|
+
!dropFastMode &&
|
|
1366
|
+
resolveServiceTier(options?.serviceTier, model.provider) === "priority" &&
|
|
1367
|
+
firstTokenTime === undefined &&
|
|
1368
|
+
isAnthropicFastModeUnsupportedError(streamFailure)
|
|
1369
|
+
) {
|
|
1370
|
+
logger.debug("anthropic: fast mode unsupported, retrying without speed", {
|
|
1371
|
+
model: model.id,
|
|
1372
|
+
error: streamFailure instanceof Error ? streamFailure.message : String(streamFailure),
|
|
1373
|
+
});
|
|
1374
|
+
if (providerSessionState) {
|
|
1375
|
+
providerSessionState.fastModeDisabled = true;
|
|
1376
|
+
}
|
|
1377
|
+
dropFastMode = true;
|
|
1378
|
+
params = await prepareParams();
|
|
1379
|
+
providerRetryAttempt = 0;
|
|
1380
|
+
output.content.length = 0;
|
|
1381
|
+
output.responseId = undefined;
|
|
1382
|
+
output.providerPayload = undefined;
|
|
1383
|
+
output.usage = createEmptyUsage(copilotDynamicHeaders?.premiumRequests);
|
|
1384
|
+
output.stopReason = "stop";
|
|
1385
|
+
firstTokenTime = undefined;
|
|
1386
|
+
continue;
|
|
1387
|
+
}
|
|
1287
1388
|
const isTransientEnvelopeFailure =
|
|
1288
1389
|
isTransientStreamParseError(streamFailure) || isTransientStreamEnvelopeError(streamFailure);
|
|
1289
1390
|
const canRetryTransientEnvelopeFailure = isTransientEnvelopeFailure && !streamedReplayUnsafeContent;
|
|
@@ -1315,6 +1416,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1315
1416
|
|
|
1316
1417
|
output.duration = Date.now() - startTime;
|
|
1317
1418
|
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
1419
|
+
if (dropFastMode && resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
|
|
1420
|
+
output.disabledFeatures = [...(output.disabledFeatures ?? []), "priority"];
|
|
1421
|
+
}
|
|
1318
1422
|
stream.push({ type: "done", reason: output.stopReason, message: output });
|
|
1319
1423
|
stream.end();
|
|
1320
1424
|
} catch (error) {
|
|
@@ -1862,6 +1966,10 @@ function buildParams(
|
|
|
1862
1966
|
params.metadata = { user_id: metadataUserId };
|
|
1863
1967
|
}
|
|
1864
1968
|
|
|
1969
|
+
if (resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
|
|
1970
|
+
(params as ParamsWithSpeed).speed = "fast";
|
|
1971
|
+
}
|
|
1972
|
+
|
|
1865
1973
|
if (options?.toolChoice) {
|
|
1866
1974
|
if (typeof options.toolChoice === "string") {
|
|
1867
1975
|
params.tool_choice = { type: options.toolChoice };
|
|
@@ -29,10 +29,10 @@ import {
|
|
|
29
29
|
type FetchImpl,
|
|
30
30
|
type Model,
|
|
31
31
|
type ProviderSessionState,
|
|
32
|
+
resolveServiceTier,
|
|
32
33
|
type ServiceTier,
|
|
33
34
|
type StreamFunction,
|
|
34
35
|
type StreamOptions,
|
|
35
|
-
shouldSendServiceTier,
|
|
36
36
|
type TextContent,
|
|
37
37
|
type ThinkingContent,
|
|
38
38
|
type Tool,
|
|
@@ -590,8 +590,9 @@ async function buildTransformedCodexRequestBody(
|
|
|
590
590
|
if (options?.repetitionPenalty !== undefined) {
|
|
591
591
|
params.repetition_penalty = options.repetitionPenalty;
|
|
592
592
|
}
|
|
593
|
-
|
|
594
|
-
|
|
593
|
+
const resolvedServiceTier = resolveServiceTier(options?.serviceTier, model.provider);
|
|
594
|
+
if (resolvedServiceTier === "flex" || resolvedServiceTier === "scale" || resolvedServiceTier === "priority") {
|
|
595
|
+
params.service_tier = resolvedServiceTier;
|
|
595
596
|
}
|
|
596
597
|
if (context.tools && context.tools.length > 0) {
|
|
597
598
|
params.tools = convertOpenAICodexResponsesTools(context.tools, model);
|
|
@@ -22,11 +22,11 @@ import {
|
|
|
22
22
|
type Model,
|
|
23
23
|
type OpenAICompat,
|
|
24
24
|
type ProviderSessionState,
|
|
25
|
+
resolveServiceTier,
|
|
25
26
|
type ServiceTier,
|
|
26
27
|
type StopReason,
|
|
27
28
|
type StreamFunction,
|
|
28
29
|
type StreamOptions,
|
|
29
|
-
shouldSendServiceTier,
|
|
30
30
|
type TextContent,
|
|
31
31
|
type ThinkingContent,
|
|
32
32
|
type Tool,
|
|
@@ -1092,8 +1092,9 @@ function buildParams(
|
|
|
1092
1092
|
if (options?.frequencyPenalty !== undefined) {
|
|
1093
1093
|
params.frequency_penalty = options.frequencyPenalty;
|
|
1094
1094
|
}
|
|
1095
|
-
|
|
1096
|
-
|
|
1095
|
+
const resolvedServiceTier = resolveServiceTier(options?.serviceTier, model.provider);
|
|
1096
|
+
if (resolvedServiceTier === "flex" || resolvedServiceTier === "scale" || resolvedServiceTier === "priority") {
|
|
1097
|
+
params.service_tier = resolvedServiceTier;
|
|
1097
1098
|
}
|
|
1098
1099
|
|
|
1099
1100
|
if (context.tools) {
|
|
@@ -17,10 +17,10 @@ import {
|
|
|
17
17
|
type AssistantMessage,
|
|
18
18
|
type ImageContent,
|
|
19
19
|
type Model,
|
|
20
|
+
resolveServiceTier,
|
|
20
21
|
type ServiceTier,
|
|
21
22
|
type StopReason,
|
|
22
23
|
type StreamOptions,
|
|
23
|
-
shouldSendServiceTier,
|
|
24
24
|
type TextContent,
|
|
25
25
|
type TextSignatureV1,
|
|
26
26
|
type ThinkingContent,
|
|
@@ -650,8 +650,9 @@ export function applyCommonResponsesSamplingParams<P extends CommonResponsesPara
|
|
|
650
650
|
if (options?.minP !== undefined) params.min_p = options.minP;
|
|
651
651
|
if (options?.presencePenalty !== undefined) params.presence_penalty = options.presencePenalty;
|
|
652
652
|
if (options?.repetitionPenalty !== undefined) params.repetition_penalty = options.repetitionPenalty;
|
|
653
|
-
|
|
654
|
-
|
|
653
|
+
const resolvedServiceTier = resolveServiceTier(options?.serviceTier, provider);
|
|
654
|
+
if (resolvedServiceTier === "flex" || resolvedServiceTier === "scale" || resolvedServiceTier === "priority") {
|
|
655
|
+
params.service_tier = resolvedServiceTier;
|
|
655
656
|
}
|
|
656
657
|
}
|
|
657
658
|
|
package/src/stream.ts
CHANGED
|
@@ -580,6 +580,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
580
580
|
thinkingEnabled: false,
|
|
581
581
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
582
582
|
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
583
|
+
serviceTier: options?.serviceTier,
|
|
583
584
|
});
|
|
584
585
|
}
|
|
585
586
|
|
|
@@ -590,6 +591,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
590
591
|
thinkingEnabled: false,
|
|
591
592
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
592
593
|
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
594
|
+
serviceTier: options?.serviceTier,
|
|
593
595
|
});
|
|
594
596
|
}
|
|
595
597
|
|
|
@@ -603,6 +605,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
603
605
|
effort,
|
|
604
606
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
605
607
|
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
608
|
+
serviceTier: options?.serviceTier,
|
|
606
609
|
});
|
|
607
610
|
}
|
|
608
611
|
|
|
@@ -613,6 +616,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
613
616
|
thinkingBudgetTokens: thinkingBudget,
|
|
614
617
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
615
618
|
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
619
|
+
serviceTier: options?.serviceTier,
|
|
616
620
|
});
|
|
617
621
|
}
|
|
618
622
|
|
|
@@ -631,6 +635,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
631
635
|
thinkingEnabled: false,
|
|
632
636
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
633
637
|
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
638
|
+
serviceTier: options?.serviceTier,
|
|
634
639
|
});
|
|
635
640
|
} else {
|
|
636
641
|
return castApi<"anthropic-messages">({
|
|
@@ -640,6 +645,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
640
645
|
thinkingBudgetTokens: thinkingBudget,
|
|
641
646
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
642
647
|
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
648
|
+
serviceTier: options?.serviceTier,
|
|
643
649
|
});
|
|
644
650
|
}
|
|
645
651
|
}
|
package/src/types.ts
CHANGED
|
@@ -162,29 +162,78 @@ export type ToolChoice =
|
|
|
162
162
|
// Base options all providers share
|
|
163
163
|
export type CacheRetention = "none" | "short" | "long";
|
|
164
164
|
|
|
165
|
-
/**
|
|
166
|
-
|
|
165
|
+
/**
|
|
166
|
+
* Service tier hint for processing priority / cost control.
|
|
167
|
+
*
|
|
168
|
+
* The unscoped values (`"auto"`, `"default"`, `"flex"`, `"scale"`,
|
|
169
|
+
* `"priority"`) are passed through to providers that understand them
|
|
170
|
+
* (OpenAI's `service_tier` field directly; Anthropic translates
|
|
171
|
+
* `"priority"` into `speed: "fast"` on supported Opus models).
|
|
172
|
+
*
|
|
173
|
+
* The scoped values target a specific provider family and behave as the
|
|
174
|
+
* unscoped value on the matching provider, or `undefined` everywhere else.
|
|
175
|
+
* They let users opt into priority on one family without paying premium
|
|
176
|
+
* costs on the other when switching models mid-session.
|
|
177
|
+
*
|
|
178
|
+
* - `"openai-only"` → `"priority"` on `openai` and `openai-codex`; ignored elsewhere.
|
|
179
|
+
* - `"claude-only"` → `"priority"` on direct `anthropic` (not Bedrock/Vertex Claude).
|
|
180
|
+
*/
|
|
181
|
+
export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority" | "openai-only" | "claude-only";
|
|
167
182
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
183
|
+
/** Resolved tier — one of the values that providers actually consume on the wire. */
|
|
184
|
+
export type ResolvedServiceTier = Exclude<ServiceTier, "openai-only" | "claude-only">;
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Resolves a possibly scoped `ServiceTier` to the effective tier for the
|
|
188
|
+
* given provider. Scoped values match their target family and otherwise
|
|
189
|
+
* collapse to `undefined`; unscoped values pass through unchanged.
|
|
190
|
+
*/
|
|
191
|
+
export function resolveServiceTier(
|
|
192
|
+
serviceTier: ServiceTier | null | undefined,
|
|
193
|
+
provider: Provider | undefined,
|
|
194
|
+
): ResolvedServiceTier | undefined {
|
|
195
|
+
if (!serviceTier) return undefined;
|
|
196
|
+
switch (serviceTier) {
|
|
197
|
+
case "openai-only":
|
|
198
|
+
return provider === "openai" || provider === "openai-codex" ? "priority" : undefined;
|
|
199
|
+
case "claude-only":
|
|
200
|
+
return provider === "anthropic" ? "priority" : undefined;
|
|
201
|
+
default:
|
|
202
|
+
return serviceTier;
|
|
174
203
|
}
|
|
175
|
-
return serviceTier === "flex" || serviceTier === "scale" || serviceTier === "priority";
|
|
176
204
|
}
|
|
177
205
|
|
|
178
206
|
/**
|
|
179
|
-
*
|
|
180
|
-
*
|
|
181
|
-
*
|
|
207
|
+
* True when the (possibly scoped) tier should be sent as OpenAI's
|
|
208
|
+
* `service_tier` request field for the given provider. Non-OpenAI
|
|
209
|
+
* providers, unsupported tiers (`"auto"`, `"default"`), and scope
|
|
210
|
+
* mismatches all return false.
|
|
211
|
+
*/
|
|
212
|
+
export function shouldSendServiceTier(
|
|
213
|
+
serviceTier: ServiceTier | null | undefined,
|
|
214
|
+
provider: Provider | undefined,
|
|
215
|
+
): boolean {
|
|
216
|
+
if (provider !== "openai" && provider !== "openai-codex") return false;
|
|
217
|
+
const resolved = resolveServiceTier(serviceTier, provider);
|
|
218
|
+
return resolved === "flex" || resolved === "scale" || resolved === "priority";
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Premium-request weight contributed by sending priority to a provider
|
|
223
|
+
* that supports it. Mirrors GitHub Copilot's `premiumRequests` accounting
|
|
224
|
+
* so the "premium requests" stat aggregates priority traffic across the
|
|
225
|
+
* OpenAI family and Anthropic fast-mode realizations.
|
|
182
226
|
*
|
|
183
|
-
* Returns 1 per priority request, 0 otherwise.
|
|
184
|
-
* `scale`) and providers that ignore `service_tier` always return 0.
|
|
227
|
+
* Returns 1 per resolved priority request, 0 otherwise.
|
|
185
228
|
*/
|
|
186
|
-
export function getPriorityPremiumRequests(
|
|
187
|
-
|
|
229
|
+
export function getPriorityPremiumRequests(
|
|
230
|
+
serviceTier: ServiceTier | null | undefined,
|
|
231
|
+
provider: Provider | undefined,
|
|
232
|
+
): number {
|
|
233
|
+
if (resolveServiceTier(serviceTier, provider) !== "priority") return 0;
|
|
234
|
+
// Only providers that realize `priority` on the wire bill the user.
|
|
235
|
+
// Everywhere else, the field is silently dropped and nothing is charged.
|
|
236
|
+
return provider === "openai" || provider === "openai-codex" || provider === "anthropic" ? 1 : 0;
|
|
188
237
|
}
|
|
189
238
|
|
|
190
239
|
export interface ProviderSessionState {
|
|
@@ -502,6 +551,14 @@ export interface AssistantMessage {
|
|
|
502
551
|
errorMessage?: string;
|
|
503
552
|
/** HTTP status surfaced by the provider when the request failed. Populated by every provider's catch block alongside `errorMessage` so consumers (auth retry, telemetry, UI) can branch without regex-scraping the message. */
|
|
504
553
|
errorStatus?: number;
|
|
554
|
+
/**
|
|
555
|
+
* Stable identifiers for request features the provider silently dropped
|
|
556
|
+
* during this turn (e.g. `"priority"`). Set when a server-side rejection
|
|
557
|
+
* triggered an in-provider fallback retry that succeeded without the
|
|
558
|
+
* feature. Callers can use this to sync user-facing toggles back to the
|
|
559
|
+
* server's actual state.
|
|
560
|
+
*/
|
|
561
|
+
disabledFeatures?: string[];
|
|
505
562
|
/** Provider-specific opaque payload used to reconstruct transport-native history. */
|
|
506
563
|
providerPayload?: ProviderPayload;
|
|
507
564
|
timestamp: number; // Unix timestamp in milliseconds
|