@oh-my-pi/pi-ai 15.1.6 → 15.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,16 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.1.7] - 2026-05-19
6
+ ### Added
7
+
8
+ - Added Anthropic realization of `serviceTier: "priority"`. The anthropic-messages provider now sets `speed: "fast"` on the request and appends the `fast-mode-2026-02-01` beta to `Anthropic-Beta` whenever the caller passes `serviceTier: "priority"`. When the server rejects an unsupported model with `invalid_request_error`, the provider transparently retries the same turn without the fast-mode signal (mirroring the strict-tools fallback pattern), persists the disable via a new `providerSessionState.fastModeDisabled` flag so subsequent requests in the session skip the field, and surfaces the action via the new `AssistantMessage.disabledFeatures` array (id `"priority"`) so callers can sync user-facing toggles. A new `clearAnthropicFastModeFallback(providerSessionState)` helper lets callers re-arm priority after the auto-fallback fired.
9
+ - Added scoped `ServiceTier` values: `"openai-only"` (priority on `openai`/`openai-codex`, ignored elsewhere) and `"claude-only"` (priority on direct `anthropic`, ignored on Bedrock/Vertex Claude and elsewhere). A new `resolveServiceTier(serviceTier, provider)` helper computes the effective tier for the provider; existing OpenAI/Anthropic provider code routes through it, so `service_tier` and Anthropic fast-mode emission both respect scope. `getPriorityPremiumRequests` now counts Anthropic+priority as one premium request (previously zero) and continues to ignore providers that drop the field on the wire.
10
+
11
+ ### Fixed
12
+
13
+ - Fixed Anthropic fast mode (`serviceTier: "priority"`) looping on 429 `rate_limit_error: "Extra usage is required for fast mode."` for accounts without the extra-usage entitlement. `isAnthropicFastModeUnsupportedError` now matches the 429 phrasing in addition to the 400 `invalid_request_error` "does not support the `speed` parameter" case, so the provider drops `speed: "fast"` on the in-turn retry, sets `providerSessionState.fastModeDisabled` for the remainder of the session, and surfaces `disabledFeatures: ["priority"]` to the caller instead of retrying with the same payload until `PROVIDER_MAX_RETRIES` is exhausted.
14
+
5
15
  ## [15.1.6] - 2026-05-19
6
16
 
7
17
  ### Fixed
@@ -1,6 +1,6 @@
1
1
  import Anthropic, { type ClientOptions as AnthropicSdkClientOptions } from "@anthropic-ai/sdk";
2
2
  import type { MessageParam } from "@anthropic-ai/sdk/resources/messages";
3
- import type { FetchImpl, Message, Model, SimpleStreamOptions, StreamFunction, StreamOptions, Usage } from "../types";
3
+ import type { FetchImpl, Message, Model, ProviderSessionState, ServiceTier, SimpleStreamOptions, StreamFunction, StreamOptions, Usage } from "../types";
4
4
  export type AnthropicHeaderOptions = {
5
5
  apiKey: string;
6
6
  baseUrl?: string;
@@ -17,6 +17,15 @@ type AnthropicCacheControl = {
17
17
  type: "ephemeral";
18
18
  ttl?: "1h" | "5m";
19
19
  };
20
+ /**
21
+ * Clears the in-session "server rejected fast mode" sticky flag. Call when the
22
+ * caller is explicitly re-arming `serviceTier: "priority"` (e.g. user toggled
23
+ * `/fast on` after a previous turn auto-disabled it) so the next request
24
+ * actually carries `speed: "fast"` again. No-op when the map or state entry
25
+ * hasn't been materialized yet.
26
+ */
27
+ export declare function clearAnthropicFastModeFallback(providerSessionState: Map<string, ProviderSessionState> | undefined): void;
28
+ export declare function isAnthropicFastModeUnsupportedError(error: unknown): boolean;
20
29
  export declare const claudeCodeVersion = "2.1.63";
21
30
  export declare const claudeToolPrefix: string;
22
31
  export declare const claudeCodeSystemInstruction = "You are a Claude agent, built on Anthropic's Claude Agent SDK.";
@@ -77,6 +86,16 @@ export interface AnthropicOptions extends StreamOptions {
77
86
  name: string;
78
87
  };
79
88
  betas?: string[] | string;
89
+ /**
90
+ * Realization of `serviceTier: "priority"` on Anthropic models. When
91
+ * `"priority"`, sets `speed: "fast"` on the request and appends the
92
+ * `fast-mode-2026-02-01` beta header. Anthropic rejects unsupported models
93
+ * with `invalid_request_error`, which triggers an in-provider one-shot
94
+ * fallback (see `fastModeDisabled` provider state).
95
+ *
96
+ * Other `ServiceTier` values are currently ignored on this provider.
97
+ */
98
+ serviceTier?: ServiceTier;
80
99
  /** Force OAuth bearer auth mode for proxy tokens that don't match Anthropic token prefixes. */
81
100
  isOAuth?: boolean;
82
101
  /**
@@ -69,18 +69,47 @@ export type ToolChoice = "auto" | "none" | "any" | "required" | {
69
69
  name: string;
70
70
  };
71
71
  export type CacheRetention = "none" | "short" | "long";
72
- /** OpenAI service tier for processing priority. Only applies to OpenAI-compatible APIs. */
73
- export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority";
74
- export declare function shouldSendServiceTier(serviceTier?: ServiceTier | null, provider?: Provider): serviceTier is "flex" | "scale" | "priority";
75
72
  /**
76
- * Premium-request weight contributed by sending a `priority` service tier to
77
- * a provider that supports it. Mirrors GitHub Copilot's `premiumRequests`
78
- * accounting so the "premium requests" stat aggregates priority traffic too.
73
+ * Service tier hint for processing priority / cost control.
79
74
  *
80
- * Returns 1 per priority request, 0 otherwise. Non-priority tiers (`flex`,
81
- * `scale`) and providers that ignore `service_tier` always return 0.
75
+ * The unscoped values (`"auto"`, `"default"`, `"flex"`, `"scale"`,
76
+ * `"priority"`) are passed through to providers that understand them
77
+ * (OpenAI's `service_tier` field directly; Anthropic translates
78
+ * `"priority"` into `speed: "fast"` on supported Opus models).
79
+ *
80
+ * The scoped values target a specific provider family and behave as the
81
+ * unscoped value on the matching provider, or `undefined` everywhere else.
82
+ * They let users opt into priority on one family without paying premium
83
+ * costs on the other when switching models mid-session.
84
+ *
85
+ * - `"openai-only"` → `"priority"` on `openai` and `openai-codex`; ignored elsewhere.
86
+ * - `"claude-only"` → `"priority"` on direct `anthropic` (not Bedrock/Vertex Claude).
87
+ */
88
+ export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority" | "openai-only" | "claude-only";
89
+ /** Resolved tier — one of the values that providers actually consume on the wire. */
90
+ export type ResolvedServiceTier = Exclude<ServiceTier, "openai-only" | "claude-only">;
91
+ /**
92
+ * Resolves a possibly scoped `ServiceTier` to the effective tier for the
93
+ * given provider. Scoped values match their target family and otherwise
94
+ * collapse to `undefined`; unscoped values pass through unchanged.
95
+ */
96
+ export declare function resolveServiceTier(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): ResolvedServiceTier | undefined;
97
+ /**
98
+ * True when the (possibly scoped) tier should be sent as OpenAI's
99
+ * `service_tier` request field for the given provider. Non-OpenAI
100
+ * providers, unsupported tiers (`"auto"`, `"default"`), and scope
101
+ * mismatches all return false.
82
102
  */
83
- export declare function getPriorityPremiumRequests(serviceTier?: ServiceTier | null, provider?: Provider): number;
103
+ export declare function shouldSendServiceTier(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): boolean;
104
+ /**
105
+ * Premium-request weight contributed by sending priority to a provider
106
+ * that supports it. Mirrors GitHub Copilot's `premiumRequests` accounting
107
+ * so the "premium requests" stat aggregates priority traffic across the
108
+ * OpenAI family and Anthropic fast-mode realizations.
109
+ *
110
+ * Returns 1 per resolved priority request, 0 otherwise.
111
+ */
112
+ export declare function getPriorityPremiumRequests(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): number;
84
113
  export interface ProviderSessionState {
85
114
  close(): void;
86
115
  }
@@ -371,6 +400,14 @@ export interface AssistantMessage {
371
400
  errorMessage?: string;
372
401
  /** HTTP status surfaced by the provider when the request failed. Populated by every provider's catch block alongside `errorMessage` so consumers (auth retry, telemetry, UI) can branch without regex-scraping the message. */
373
402
  errorStatus?: number;
403
+ /**
404
+ * Stable identifiers for request features the provider silently dropped
405
+ * during this turn (e.g. `"priority"`). Set when a server-side rejection
406
+ * triggered an in-provider fallback retry that succeeded without the
407
+ * feature. Callers can use this to sync user-facing toggles back to the
408
+ * server's actual state.
409
+ */
410
+ disabledFeatures?: string[];
374
411
  /** Provider-specific opaque payload used to reconstruct transport-native history. */
375
412
  providerPayload?: ProviderPayload;
376
413
  timestamp: number;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "15.1.6",
4
+ "version": "15.1.7",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -43,7 +43,7 @@
43
43
  "dependencies": {
44
44
  "@anthropic-ai/sdk": "^0.94.0",
45
45
  "@bufbuild/protobuf": "^2.12.0",
46
- "@oh-my-pi/pi-utils": "15.1.6",
46
+ "@oh-my-pi/pi-utils": "15.1.7",
47
47
  "openai": "^6.36.0",
48
48
  "partial-json": "^0.1.7",
49
49
  "zod": "4.4.3"
@@ -32,6 +32,7 @@ import type {
32
32
  Model,
33
33
  ProviderSessionState,
34
34
  RedactedThinkingContent,
35
+ ServiceTier,
35
36
  SimpleStreamOptions,
36
37
  StopReason,
37
38
  StreamFunction,
@@ -43,6 +44,7 @@ import type {
43
44
  ToolResultMessage,
44
45
  Usage,
45
46
  } from "../types";
47
+ import { resolveServiceTier } from "../types";
46
48
  import {
47
49
  isAnthropicOAuthToken,
48
50
  isRecord,
@@ -111,6 +113,7 @@ const claudeCodeBetaDefaults = [
111
113
  ];
112
114
  const fineGrainedToolStreamingBeta = "fine-grained-tool-streaming-2025-05-14";
113
115
  const interleavedThinkingBeta = "interleaved-thinking-2025-05-14";
116
+ const fastModeBeta = "fast-mode-2026-02-01";
114
117
 
115
118
  function getHeaderCaseInsensitive(headers: Record<string, string> | undefined, headerName: string): string | undefined {
116
119
  if (!headers) return undefined;
@@ -224,13 +227,16 @@ const ANTHROPIC_PROVIDER_SESSION_STATE_KEY = "anthropic-messages";
224
227
 
225
228
  type AnthropicProviderSessionState = ProviderSessionState & {
226
229
  strictToolsDisabled: boolean;
230
+ fastModeDisabled: boolean;
227
231
  };
228
232
 
229
233
  function createAnthropicProviderSessionState(): AnthropicProviderSessionState {
230
234
  const state: AnthropicProviderSessionState = {
231
235
  strictToolsDisabled: false,
236
+ fastModeDisabled: false,
232
237
  close: () => {
233
238
  state.strictToolsDisabled = false;
239
+ state.fastModeDisabled = false;
234
240
  },
235
241
  };
236
242
  return state;
@@ -249,6 +255,23 @@ function getAnthropicProviderSessionState(
249
255
  return created;
250
256
  }
251
257
 
258
+ /**
259
+ * Clears the in-session "server rejected fast mode" sticky flag. Call when the
260
+ * caller is explicitly re-arming `serviceTier: "priority"` (e.g. user toggled
261
+ * `/fast on` after a previous turn auto-disabled it) so the next request
262
+ * actually carries `speed: "fast"` again. No-op when the map or state entry
263
+ * hasn't been materialized yet.
264
+ */
265
+ export function clearAnthropicFastModeFallback(
266
+ providerSessionState: Map<string, ProviderSessionState> | undefined,
267
+ ): void {
268
+ if (!providerSessionState) return;
269
+ const state = providerSessionState.get(ANTHROPIC_PROVIDER_SESSION_STATE_KEY) as
270
+ | AnthropicProviderSessionState
271
+ | undefined;
272
+ if (state) state.fastModeDisabled = false;
273
+ }
274
+
252
275
  function isAnthropicStrictGrammarTooLargeError(error: unknown): boolean {
253
276
  if (extractHttpStatusFromError(error) !== 400) return false;
254
277
  const message = error instanceof Error ? error.message : String(error);
@@ -258,11 +281,45 @@ function isAnthropicStrictGrammarTooLargeError(error: unknown): boolean {
258
281
  return /invalid_request_error/i.test(message) && (isStrictGrammarTooLarge || isSchemaCompilationTooComplex);
259
282
  }
260
283
 
284
+ export function isAnthropicFastModeUnsupportedError(error: unknown): boolean {
285
+ const status = extractHttpStatusFromError(error);
286
+ if (status !== 400 && status !== 429) return false;
287
+ const message = error instanceof Error ? error.message : String(error);
288
+ // 400 invalid_request_error — model doesn't accept `speed` at all.
289
+ // Observed: "'claude-opus-4-5-20251101' does not support the `speed` parameter."
290
+ // Stay tolerant of phrasing drift ("is not supported", quoted vs backticked field).
291
+ if (
292
+ status === 400 &&
293
+ /invalid_request_error/i.test(message) &&
294
+ /\bspeed\b/i.test(message) &&
295
+ /not support/i.test(message)
296
+ ) {
297
+ return true;
298
+ }
299
+ // 429 rate_limit_error — account lacks the extra-usage entitlement fast mode requires.
300
+ // Observed: "Extra usage is required for fast mode."
301
+ if (status === 429 && /rate_limit_error/i.test(message) && /fast mode/i.test(message)) {
302
+ return true;
303
+ }
304
+ return false;
305
+ }
306
+
261
307
  function hasStrictAnthropicTools(params: MessageCreateParamsStreaming): boolean {
262
308
  const tools = params.tools as Array<{ strict?: unknown }> | undefined;
263
309
  return tools?.some(tool => tool.strict === true) ?? false;
264
310
  }
265
311
 
312
+ /**
313
+ * `speed` lives on `BetaMessageCreateParams` (client.beta.messages) but this
314
+ * provider posts via `client.messages.create`, whose param type doesn't
315
+ * include it. This alias narrows the cast to one place.
316
+ */
317
+ type ParamsWithSpeed = MessageCreateParamsStreaming & { speed?: "fast" };
318
+
319
+ function dropAnthropicFastMode(params: MessageCreateParamsStreaming): void {
320
+ delete (params as ParamsWithSpeed).speed;
321
+ }
322
+
266
323
  function dropAnthropicStrictTools(params: MessageCreateParamsStreaming): void {
267
324
  const tools = params.tools as Array<{ strict?: unknown }> | undefined;
268
325
  if (!tools) return;
@@ -526,6 +583,16 @@ export interface AnthropicOptions extends StreamOptions {
526
583
  interleavedThinking?: boolean;
527
584
  toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
528
585
  betas?: string[] | string;
586
+ /**
587
+ * Realization of `serviceTier: "priority"` on Anthropic models. When
588
+ * `"priority"`, sets `speed: "fast"` on the request and appends the
589
+ * `fast-mode-2026-02-01` beta header. Anthropic rejects unsupported models
590
+ * with `invalid_request_error`, which triggers an in-provider one-shot
591
+ * fallback (see `fastModeDisabled` provider state).
592
+ *
593
+ * Other `ServiceTier` values are currently ignored on this provider.
594
+ */
595
+ serviceTier?: ServiceTier;
529
596
  /** Force OAuth bearer auth mode for proxy tokens that don't match Anthropic token prefixes. */
530
597
  isOAuth?: boolean;
531
598
  /**
@@ -961,10 +1028,16 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
961
1028
  } else {
962
1029
  const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
963
1030
 
1031
+ const extraBetas = normalizeExtraBetas(options?.betas);
1032
+ const wantsAnthropicPriority = resolveServiceTier(options?.serviceTier, model.provider) === "priority";
1033
+ if (wantsAnthropicPriority && !extraBetas.includes(fastModeBeta)) {
1034
+ extraBetas.push(fastModeBeta);
1035
+ }
1036
+
964
1037
  const created = createClient(model, {
965
1038
  model,
966
1039
  apiKey,
967
- extraBetas: normalizeExtraBetas(options?.betas),
1040
+ extraBetas,
968
1041
  stream: true,
969
1042
  interleavedThinking: options?.interleavedThinking ?? true,
970
1043
  headers: options?.headers,
@@ -984,6 +1057,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
984
1057
  let disableStrictTools =
985
1058
  (providerSessionState?.strictToolsDisabled ?? false) || (model.compat?.disableStrictTools ?? false);
986
1059
  let strictFallbackErrorMessage: string | undefined;
1060
+ let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
987
1061
  const prepareParams = async (): Promise<MessageCreateParamsStreaming> => {
988
1062
  let nextParams = buildParams(model, baseUrl, context, isOAuthToken, options, disableStrictTools);
989
1063
  const replacementPayload = await options?.onPayload?.(nextParams, model);
@@ -993,6 +1067,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
993
1067
  if (disableStrictTools) {
994
1068
  dropAnthropicStrictTools(nextParams);
995
1069
  }
1070
+ if (dropFastMode) {
1071
+ dropAnthropicFastMode(nextParams);
1072
+ }
996
1073
  rawRequestDump = {
997
1074
  provider: model.provider,
998
1075
  api: output.api,
@@ -1284,6 +1361,30 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1284
1361
  firstTokenTime = undefined;
1285
1362
  continue;
1286
1363
  }
1364
+ if (
1365
+ !dropFastMode &&
1366
+ resolveServiceTier(options?.serviceTier, model.provider) === "priority" &&
1367
+ firstTokenTime === undefined &&
1368
+ isAnthropicFastModeUnsupportedError(streamFailure)
1369
+ ) {
1370
+ logger.debug("anthropic: fast mode unsupported, retrying without speed", {
1371
+ model: model.id,
1372
+ error: streamFailure instanceof Error ? streamFailure.message : String(streamFailure),
1373
+ });
1374
+ if (providerSessionState) {
1375
+ providerSessionState.fastModeDisabled = true;
1376
+ }
1377
+ dropFastMode = true;
1378
+ params = await prepareParams();
1379
+ providerRetryAttempt = 0;
1380
+ output.content.length = 0;
1381
+ output.responseId = undefined;
1382
+ output.providerPayload = undefined;
1383
+ output.usage = createEmptyUsage(copilotDynamicHeaders?.premiumRequests);
1384
+ output.stopReason = "stop";
1385
+ firstTokenTime = undefined;
1386
+ continue;
1387
+ }
1287
1388
  const isTransientEnvelopeFailure =
1288
1389
  isTransientStreamParseError(streamFailure) || isTransientStreamEnvelopeError(streamFailure);
1289
1390
  const canRetryTransientEnvelopeFailure = isTransientEnvelopeFailure && !streamedReplayUnsafeContent;
@@ -1315,6 +1416,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1315
1416
 
1316
1417
  output.duration = Date.now() - startTime;
1317
1418
  if (firstTokenTime) output.ttft = firstTokenTime - startTime;
1419
+ if (dropFastMode && resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
1420
+ output.disabledFeatures = [...(output.disabledFeatures ?? []), "priority"];
1421
+ }
1318
1422
  stream.push({ type: "done", reason: output.stopReason, message: output });
1319
1423
  stream.end();
1320
1424
  } catch (error) {
@@ -1862,6 +1966,10 @@ function buildParams(
1862
1966
  params.metadata = { user_id: metadataUserId };
1863
1967
  }
1864
1968
 
1969
+ if (resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
1970
+ (params as ParamsWithSpeed).speed = "fast";
1971
+ }
1972
+
1865
1973
  if (options?.toolChoice) {
1866
1974
  if (typeof options.toolChoice === "string") {
1867
1975
  params.tool_choice = { type: options.toolChoice };
@@ -29,10 +29,10 @@ import {
29
29
  type FetchImpl,
30
30
  type Model,
31
31
  type ProviderSessionState,
32
+ resolveServiceTier,
32
33
  type ServiceTier,
33
34
  type StreamFunction,
34
35
  type StreamOptions,
35
- shouldSendServiceTier,
36
36
  type TextContent,
37
37
  type ThinkingContent,
38
38
  type Tool,
@@ -590,8 +590,9 @@ async function buildTransformedCodexRequestBody(
590
590
  if (options?.repetitionPenalty !== undefined) {
591
591
  params.repetition_penalty = options.repetitionPenalty;
592
592
  }
593
- if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
594
- params.service_tier = options.serviceTier;
593
+ const resolvedServiceTier = resolveServiceTier(options?.serviceTier, model.provider);
594
+ if (resolvedServiceTier === "flex" || resolvedServiceTier === "scale" || resolvedServiceTier === "priority") {
595
+ params.service_tier = resolvedServiceTier;
595
596
  }
596
597
  if (context.tools && context.tools.length > 0) {
597
598
  params.tools = convertOpenAICodexResponsesTools(context.tools, model);
@@ -22,11 +22,11 @@ import {
22
22
  type Model,
23
23
  type OpenAICompat,
24
24
  type ProviderSessionState,
25
+ resolveServiceTier,
25
26
  type ServiceTier,
26
27
  type StopReason,
27
28
  type StreamFunction,
28
29
  type StreamOptions,
29
- shouldSendServiceTier,
30
30
  type TextContent,
31
31
  type ThinkingContent,
32
32
  type Tool,
@@ -1092,8 +1092,9 @@ function buildParams(
1092
1092
  if (options?.frequencyPenalty !== undefined) {
1093
1093
  params.frequency_penalty = options.frequencyPenalty;
1094
1094
  }
1095
- if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
1096
- params.service_tier = options.serviceTier;
1095
+ const resolvedServiceTier = resolveServiceTier(options?.serviceTier, model.provider);
1096
+ if (resolvedServiceTier === "flex" || resolvedServiceTier === "scale" || resolvedServiceTier === "priority") {
1097
+ params.service_tier = resolvedServiceTier;
1097
1098
  }
1098
1099
 
1099
1100
  if (context.tools) {
@@ -17,10 +17,10 @@ import {
17
17
  type AssistantMessage,
18
18
  type ImageContent,
19
19
  type Model,
20
+ resolveServiceTier,
20
21
  type ServiceTier,
21
22
  type StopReason,
22
23
  type StreamOptions,
23
- shouldSendServiceTier,
24
24
  type TextContent,
25
25
  type TextSignatureV1,
26
26
  type ThinkingContent,
@@ -650,8 +650,9 @@ export function applyCommonResponsesSamplingParams<P extends CommonResponsesPara
650
650
  if (options?.minP !== undefined) params.min_p = options.minP;
651
651
  if (options?.presencePenalty !== undefined) params.presence_penalty = options.presencePenalty;
652
652
  if (options?.repetitionPenalty !== undefined) params.repetition_penalty = options.repetitionPenalty;
653
- if (shouldSendServiceTier(options?.serviceTier, provider)) {
654
- params.service_tier = options.serviceTier;
653
+ const resolvedServiceTier = resolveServiceTier(options?.serviceTier, provider);
654
+ if (resolvedServiceTier === "flex" || resolvedServiceTier === "scale" || resolvedServiceTier === "priority") {
655
+ params.service_tier = resolvedServiceTier;
655
656
  }
656
657
  }
657
658
 
package/src/stream.ts CHANGED
@@ -580,6 +580,7 @@ function mapOptionsForApi<TApi extends Api>(
580
580
  thinkingEnabled: false,
581
581
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
582
582
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
583
+ serviceTier: options?.serviceTier,
583
584
  });
584
585
  }
585
586
 
@@ -590,6 +591,7 @@ function mapOptionsForApi<TApi extends Api>(
590
591
  thinkingEnabled: false,
591
592
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
592
593
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
594
+ serviceTier: options?.serviceTier,
593
595
  });
594
596
  }
595
597
 
@@ -603,6 +605,7 @@ function mapOptionsForApi<TApi extends Api>(
603
605
  effort,
604
606
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
605
607
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
608
+ serviceTier: options?.serviceTier,
606
609
  });
607
610
  }
608
611
 
@@ -613,6 +616,7 @@ function mapOptionsForApi<TApi extends Api>(
613
616
  thinkingBudgetTokens: thinkingBudget,
614
617
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
615
618
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
619
+ serviceTier: options?.serviceTier,
616
620
  });
617
621
  }
618
622
 
@@ -631,6 +635,7 @@ function mapOptionsForApi<TApi extends Api>(
631
635
  thinkingEnabled: false,
632
636
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
633
637
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
638
+ serviceTier: options?.serviceTier,
634
639
  });
635
640
  } else {
636
641
  return castApi<"anthropic-messages">({
@@ -640,6 +645,7 @@ function mapOptionsForApi<TApi extends Api>(
640
645
  thinkingBudgetTokens: thinkingBudget,
641
646
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
642
647
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
648
+ serviceTier: options?.serviceTier,
643
649
  });
644
650
  }
645
651
  }
package/src/types.ts CHANGED
@@ -162,29 +162,78 @@ export type ToolChoice =
162
162
  // Base options all providers share
163
163
  export type CacheRetention = "none" | "short" | "long";
164
164
 
165
- /** OpenAI service tier for processing priority. Only applies to OpenAI-compatible APIs. */
166
- export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority";
165
+ /**
166
+ * Service tier hint for processing priority / cost control.
167
+ *
168
+ * The unscoped values (`"auto"`, `"default"`, `"flex"`, `"scale"`,
169
+ * `"priority"`) are passed through to providers that understand them
170
+ * (OpenAI's `service_tier` field directly; Anthropic translates
171
+ * `"priority"` into `speed: "fast"` on supported Opus models).
172
+ *
173
+ * The scoped values target a specific provider family and behave as the
174
+ * unscoped value on the matching provider, or `undefined` everywhere else.
175
+ * They let users opt into priority on one family without paying premium
176
+ * costs on the other when switching models mid-session.
177
+ *
178
+ * - `"openai-only"` → `"priority"` on `openai` and `openai-codex`; ignored elsewhere.
179
+ * - `"claude-only"` → `"priority"` on direct `anthropic` (not Bedrock/Vertex Claude).
180
+ */
181
+ export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority" | "openai-only" | "claude-only";
167
182
 
168
- export function shouldSendServiceTier(
169
- serviceTier?: ServiceTier | null,
170
- provider?: Provider,
171
- ): serviceTier is "flex" | "scale" | "priority" {
172
- if (provider !== "openai" && provider !== "openai-codex") {
173
- return false;
183
+ /** Resolved tier — one of the values that providers actually consume on the wire. */
184
+ export type ResolvedServiceTier = Exclude<ServiceTier, "openai-only" | "claude-only">;
185
+
186
+ /**
187
+ * Resolves a possibly scoped `ServiceTier` to the effective tier for the
188
+ * given provider. Scoped values match their target family and otherwise
189
+ * collapse to `undefined`; unscoped values pass through unchanged.
190
+ */
191
+ export function resolveServiceTier(
192
+ serviceTier: ServiceTier | null | undefined,
193
+ provider: Provider | undefined,
194
+ ): ResolvedServiceTier | undefined {
195
+ if (!serviceTier) return undefined;
196
+ switch (serviceTier) {
197
+ case "openai-only":
198
+ return provider === "openai" || provider === "openai-codex" ? "priority" : undefined;
199
+ case "claude-only":
200
+ return provider === "anthropic" ? "priority" : undefined;
201
+ default:
202
+ return serviceTier;
174
203
  }
175
- return serviceTier === "flex" || serviceTier === "scale" || serviceTier === "priority";
176
204
  }
177
205
 
178
206
  /**
179
- * Premium-request weight contributed by sending a `priority` service tier to
180
- * a provider that supports it. Mirrors GitHub Copilot's `premiumRequests`
181
- * accounting so the "premium requests" stat aggregates priority traffic too.
207
+ * True when the (possibly scoped) tier should be sent as OpenAI's
208
+ * `service_tier` request field for the given provider. Non-OpenAI
209
+ * providers, unsupported tiers (`"auto"`, `"default"`), and scope
210
+ * mismatches all return false.
211
+ */
212
+ export function shouldSendServiceTier(
213
+ serviceTier: ServiceTier | null | undefined,
214
+ provider: Provider | undefined,
215
+ ): boolean {
216
+ if (provider !== "openai" && provider !== "openai-codex") return false;
217
+ const resolved = resolveServiceTier(serviceTier, provider);
218
+ return resolved === "flex" || resolved === "scale" || resolved === "priority";
219
+ }
220
+
221
+ /**
222
+ * Premium-request weight contributed by sending priority to a provider
223
+ * that supports it. Mirrors GitHub Copilot's `premiumRequests` accounting
224
+ * so the "premium requests" stat aggregates priority traffic across the
225
+ * OpenAI family and Anthropic fast-mode realizations.
182
226
  *
183
- * Returns 1 per priority request, 0 otherwise. Non-priority tiers (`flex`,
184
- * `scale`) and providers that ignore `service_tier` always return 0.
227
+ * Returns 1 per resolved priority request, 0 otherwise.
185
228
  */
186
- export function getPriorityPremiumRequests(serviceTier?: ServiceTier | null, provider?: Provider): number {
187
- return shouldSendServiceTier(serviceTier, provider) && serviceTier === "priority" ? 1 : 0;
229
+ export function getPriorityPremiumRequests(
230
+ serviceTier: ServiceTier | null | undefined,
231
+ provider: Provider | undefined,
232
+ ): number {
233
+ if (resolveServiceTier(serviceTier, provider) !== "priority") return 0;
234
+ // Only providers that realize `priority` on the wire bill the user.
235
+ // Everywhere else, the field is silently dropped and nothing is charged.
236
+ return provider === "openai" || provider === "openai-codex" || provider === "anthropic" ? 1 : 0;
188
237
  }
189
238
 
190
239
  export interface ProviderSessionState {
@@ -502,6 +551,14 @@ export interface AssistantMessage {
502
551
  errorMessage?: string;
503
552
  /** HTTP status surfaced by the provider when the request failed. Populated by every provider's catch block alongside `errorMessage` so consumers (auth retry, telemetry, UI) can branch without regex-scraping the message. */
504
553
  errorStatus?: number;
554
+ /**
555
+ * Stable identifiers for request features the provider silently dropped
556
+ * during this turn (e.g. `"priority"`). Set when a server-side rejection
557
+ * triggered an in-provider fallback retry that succeeded without the
558
+ * feature. Callers can use this to sync user-facing toggles back to the
559
+ * server's actual state.
560
+ */
561
+ disabledFeatures?: string[];
505
562
  /** Provider-specific opaque payload used to reconstruct transport-native history. */
506
563
  providerPayload?: ProviderPayload;
507
564
  timestamp: number; // Unix timestamp in milliseconds