npm - @oh-my-pi/pi-ai - Versions diffs - 15.1.6 → 15.1.7 - Mend

@oh-my-pi/pi-ai 15.1.6 → 15.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/CHANGELOG.md +10 -0
package/dist/types/providers/anthropic.d.ts +20 -1
package/dist/types/types.d.ts +46 -9
package/package.json +2 -2
package/src/providers/anthropic.ts +109 -1
package/src/providers/openai-codex-responses.ts +4 -3
package/src/providers/openai-completions.ts +4 -3
package/src/providers/openai-responses-shared.ts +4 -3
package/src/stream.ts +6 -0
package/src/types.ts +73 -16

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,16 @@
 ## [Unreleased]
+## [15.1.7] - 2026-05-19
+### Added
+- Added Anthropic realization of `serviceTier: "priority"`. The anthropic-messages provider now sets `speed: "fast"` on the request and appends the `fast-mode-2026-02-01` beta to `Anthropic-Beta` whenever the caller passes `serviceTier: "priority"`. When the server rejects an unsupported model with `invalid_request_error`, the provider transparently retries the same turn without the fast-mode signal (mirroring the strict-tools fallback pattern), persists the disable via a new `providerSessionState.fastModeDisabled` flag so subsequent requests in the session skip the field, and surfaces the action via the new `AssistantMessage.disabledFeatures` array (id `"priority"`) so callers can sync user-facing toggles. A new `clearAnthropicFastModeFallback(providerSessionState)` helper lets callers re-arm priority after the auto-fallback fired.
+- Added scoped `ServiceTier` values: `"openai-only"` (priority on `openai`/`openai-codex`, ignored elsewhere) and `"claude-only"` (priority on direct `anthropic`, ignored on Bedrock/Vertex Claude and elsewhere). A new `resolveServiceTier(serviceTier, provider)` helper computes the effective tier for the provider; existing OpenAI/Anthropic provider code routes through it, so `service_tier` and Anthropic fast-mode emission both respect scope. `getPriorityPremiumRequests` now counts Anthropic+priority as one premium request (previously zero) and continues to ignore providers that drop the field on the wire.
+### Fixed
+- Fixed Anthropic fast mode (`serviceTier: "priority"`) looping on 429 `rate_limit_error: "Extra usage is required for fast mode."` for accounts without the extra-usage entitlement. `isAnthropicFastModeUnsupportedError` now matches the 429 phrasing in addition to the 400 `invalid_request_error` "does not support the `speed` parameter" case, so the provider drops `speed: "fast"` on the in-turn retry, sets `providerSessionState.fastModeDisabled` for the remainder of the session, and surfaces `disabledFeatures: ["priority"]` to the caller instead of retrying with the same payload until `PROVIDER_MAX_RETRIES` is exhausted.
 ## [15.1.6] - 2026-05-19
 ### Fixed

package/dist/types/providers/anthropic.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import Anthropic, { type ClientOptions as AnthropicSdkClientOptions } from "@anthropic-ai/sdk";
 import type { MessageParam } from "@anthropic-ai/sdk/resources/messages";
-import type { FetchImpl, Message, Model, SimpleStreamOptions, StreamFunction, StreamOptions, Usage } from "../types";
+import type { FetchImpl, Message, Model, ProviderSessionState, ServiceTier, SimpleStreamOptions, StreamFunction, StreamOptions, Usage } from "../types";
 export type AnthropicHeaderOptions = {
     apiKey: string;
     baseUrl?: string;
@@ -17,6 +17,15 @@ type AnthropicCacheControl = {
     type: "ephemeral";
     ttl?: "1h" | "5m";
 };
+/**
+ * Clears the in-session "server rejected fast mode" sticky flag. Call when the
+ * caller is explicitly re-arming `serviceTier: "priority"` (e.g. user toggled
+ * `/fast on` after a previous turn auto-disabled it) so the next request
+ * actually carries `speed: "fast"` again. No-op when the map or state entry
+ * hasn't been materialized yet.
+ */
+export declare function clearAnthropicFastModeFallback(providerSessionState: Map<string, ProviderSessionState> | undefined): void;
+export declare function isAnthropicFastModeUnsupportedError(error: unknown): boolean;
 export declare const claudeCodeVersion = "2.1.63";
 export declare const claudeToolPrefix: string;
 export declare const claudeCodeSystemInstruction = "You are a Claude agent, built on Anthropic's Claude Agent SDK.";
@@ -77,6 +86,16 @@ export interface AnthropicOptions extends StreamOptions {
         name: string;
     };
     betas?: string[] | string;
+    /**
+     * Realization of `serviceTier: "priority"` on Anthropic models. When
+     * `"priority"`, sets `speed: "fast"` on the request and appends the
+     * `fast-mode-2026-02-01` beta header. Anthropic rejects unsupported models
+     * with `invalid_request_error`, which triggers an in-provider one-shot
+     * fallback (see `fastModeDisabled` provider state).
+     *
+     * Other `ServiceTier` values are currently ignored on this provider.
+     */
+    serviceTier?: ServiceTier;
     /** Force OAuth bearer auth mode for proxy tokens that don't match Anthropic token prefixes. */
     isOAuth?: boolean;
     /**

package/dist/types/types.d.ts CHANGED Viewed

@@ -69,18 +69,47 @@ export type ToolChoice = "auto" | "none" | "any" | "required" | {
     name: string;
 };
 export type CacheRetention = "none" | "short" | "long";
-/** OpenAI service tier for processing priority. Only applies to OpenAI-compatible APIs. */
-export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority";
-export declare function shouldSendServiceTier(serviceTier?: ServiceTier | null, provider?: Provider): serviceTier is "flex" | "scale" | "priority";
 /**
- * Premium-request weight contributed by sending a `priority` service tier to
- * a provider that supports it. Mirrors GitHub Copilot's `premiumRequests`
- * accounting so the "premium requests" stat aggregates priority traffic too.
+ * Service tier hint for processing priority / cost control.
  *
- * Returns 1 per priority request, 0 otherwise. Non-priority tiers (`flex`,
- * `scale`) and providers that ignore `service_tier` always return 0.
+ * The unscoped values (`"auto"`, `"default"`, `"flex"`, `"scale"`,
+ * `"priority"`) are passed through to providers that understand them
+ * (OpenAI's `service_tier` field directly; Anthropic translates
+ * `"priority"` into `speed: "fast"` on supported Opus models).
+ *
+ * The scoped values target a specific provider family and behave as the
+ * unscoped value on the matching provider, or `undefined` everywhere else.
+ * They let users opt into priority on one family without paying premium
+ * costs on the other when switching models mid-session.
+ *
+ * - `"openai-only"` → `"priority"` on `openai` and `openai-codex`; ignored elsewhere.
+ * - `"claude-only"` → `"priority"` on direct `anthropic` (not Bedrock/Vertex Claude).
+ */
+export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority" | "openai-only" | "claude-only";
+/** Resolved tier — one of the values that providers actually consume on the wire. */
+export type ResolvedServiceTier = Exclude<ServiceTier, "openai-only" | "claude-only">;
+/**
+ * Resolves a possibly scoped `ServiceTier` to the effective tier for the
+ * given provider. Scoped values match their target family and otherwise
+ * collapse to `undefined`; unscoped values pass through unchanged.
+ */
+export declare function resolveServiceTier(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): ResolvedServiceTier | undefined;
+/**
+ * True when the (possibly scoped) tier should be sent as OpenAI's
+ * `service_tier` request field for the given provider. Non-OpenAI
+ * providers, unsupported tiers (`"auto"`, `"default"`), and scope
+ * mismatches all return false.
  */
-export declare function getPriorityPremiumRequests(serviceTier?: ServiceTier | null, provider?: Provider): number;
+export declare function shouldSendServiceTier(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): boolean;
+/**
+ * Premium-request weight contributed by sending priority to a provider
+ * that supports it. Mirrors GitHub Copilot's `premiumRequests` accounting
+ * so the "premium requests" stat aggregates priority traffic across the
+ * OpenAI family and Anthropic fast-mode realizations.
+ *
+ * Returns 1 per resolved priority request, 0 otherwise.
+ */
+export declare function getPriorityPremiumRequests(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): number;
 export interface ProviderSessionState {
     close(): void;
 }
@@ -371,6 +400,14 @@ export interface AssistantMessage {
     errorMessage?: string;
     /** HTTP status surfaced by the provider when the request failed. Populated by every provider's catch block alongside `errorMessage` so consumers (auth retry, telemetry, UI) can branch without regex-scraping the message. */
     errorStatus?: number;
+    /**
+     * Stable identifiers for request features the provider silently dropped
+     * during this turn (e.g. `"priority"`). Set when a server-side rejection
+     * triggered an in-provider fallback retry that succeeded without the
+     * feature. Callers can use this to sync user-facing toggles back to the
+     * server's actual state.
+     */
+    disabledFeatures?: string[];
     /** Provider-specific opaque payload used to reconstruct transport-native history. */
     providerPayload?: ProviderPayload;
     timestamp: number;

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-ai",
-	"version": "15.1.6",
+	"version": "15.1.7",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -43,7 +43,7 @@
 	"dependencies": {
 		"@anthropic-ai/sdk": "^0.94.0",
 		"@bufbuild/protobuf": "^2.12.0",
-		"@oh-my-pi/pi-utils": "15.1.6",
+		"@oh-my-pi/pi-utils": "15.1.7",
 		"openai": "^6.36.0",
 		"partial-json": "^0.1.7",
 		"zod": "4.4.3"

package/src/providers/anthropic.ts CHANGED Viewed

@@ -32,6 +32,7 @@ import type {
 	Model,
 	ProviderSessionState,
 	RedactedThinkingContent,
+	ServiceTier,
 	SimpleStreamOptions,
 	StopReason,
 	StreamFunction,
@@ -43,6 +44,7 @@ import type {
 	ToolResultMessage,
 	Usage,
 } from "../types";
+import { resolveServiceTier } from "../types";
 import {
 	isAnthropicOAuthToken,
 	isRecord,
@@ -111,6 +113,7 @@ const claudeCodeBetaDefaults = [
 ];
 const fineGrainedToolStreamingBeta = "fine-grained-tool-streaming-2025-05-14";
 const interleavedThinkingBeta = "interleaved-thinking-2025-05-14";
+const fastModeBeta = "fast-mode-2026-02-01";
 function getHeaderCaseInsensitive(headers: Record<string, string> | undefined, headerName: string): string | undefined {
 	if (!headers) return undefined;
@@ -224,13 +227,16 @@ const ANTHROPIC_PROVIDER_SESSION_STATE_KEY = "anthropic-messages";
 type AnthropicProviderSessionState = ProviderSessionState & {
 	strictToolsDisabled: boolean;
+	fastModeDisabled: boolean;
 };
 function createAnthropicProviderSessionState(): AnthropicProviderSessionState {
 	const state: AnthropicProviderSessionState = {
 		strictToolsDisabled: false,
+		fastModeDisabled: false,
 		close: () => {
 			state.strictToolsDisabled = false;
+			state.fastModeDisabled = false;
 		},
 	};
 	return state;
@@ -249,6 +255,23 @@ function getAnthropicProviderSessionState(
 	return created;
 }
+/**
+ * Clears the in-session "server rejected fast mode" sticky flag. Call when the
+ * caller is explicitly re-arming `serviceTier: "priority"` (e.g. user toggled
+ * `/fast on` after a previous turn auto-disabled it) so the next request
+ * actually carries `speed: "fast"` again. No-op when the map or state entry
+ * hasn't been materialized yet.
+ */
+export function clearAnthropicFastModeFallback(
+	providerSessionState: Map<string, ProviderSessionState> | undefined,
+): void {
+	if (!providerSessionState) return;
+	const state = providerSessionState.get(ANTHROPIC_PROVIDER_SESSION_STATE_KEY) as
+		| AnthropicProviderSessionState
+		| undefined;
+	if (state) state.fastModeDisabled = false;
+}
 function isAnthropicStrictGrammarTooLargeError(error: unknown): boolean {
 	if (extractHttpStatusFromError(error) !== 400) return false;
 	const message = error instanceof Error ? error.message : String(error);
@@ -258,11 +281,45 @@ function isAnthropicStrictGrammarTooLargeError(error: unknown): boolean {
 	return /invalid_request_error/i.test(message) && (isStrictGrammarTooLarge || isSchemaCompilationTooComplex);
 }
+export function isAnthropicFastModeUnsupportedError(error: unknown): boolean {
+	const status = extractHttpStatusFromError(error);
+	if (status !== 400 && status !== 429) return false;
+	const message = error instanceof Error ? error.message : String(error);
+	// 400 invalid_request_error — model doesn't accept `speed` at all.
+	// Observed: "'claude-opus-4-5-20251101' does not support the `speed` parameter."
+	// Stay tolerant of phrasing drift ("is not supported", quoted vs backticked field).
+	if (
+		status === 400 &&
+		/invalid_request_error/i.test(message) &&
+		/\bspeed\b/i.test(message) &&
+		/not support/i.test(message)
+	) {
+		return true;
+	}
+	// 429 rate_limit_error — account lacks the extra-usage entitlement fast mode requires.
+	// Observed: "Extra usage is required for fast mode."
+	if (status === 429 && /rate_limit_error/i.test(message) && /fast mode/i.test(message)) {
+		return true;
+	}
+	return false;
+}
 function hasStrictAnthropicTools(params: MessageCreateParamsStreaming): boolean {
 	const tools = params.tools as Array<{ strict?: unknown }> | undefined;
 	return tools?.some(tool => tool.strict === true) ?? false;
 }
+/**
+ * `speed` lives on `BetaMessageCreateParams` (client.beta.messages) but this
+ * provider posts via `client.messages.create`, whose param type doesn't
+ * include it. This alias narrows the cast to one place.
+ */
+type ParamsWithSpeed = MessageCreateParamsStreaming & { speed?: "fast" };
+function dropAnthropicFastMode(params: MessageCreateParamsStreaming): void {
+	delete (params as ParamsWithSpeed).speed;
+}
 function dropAnthropicStrictTools(params: MessageCreateParamsStreaming): void {
 	const tools = params.tools as Array<{ strict?: unknown }> | undefined;
 	if (!tools) return;
@@ -526,6 +583,16 @@ export interface AnthropicOptions extends StreamOptions {
 	interleavedThinking?: boolean;
 	toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
 	betas?: string[] | string;
+	/**
+	 * Realization of `serviceTier: "priority"` on Anthropic models. When
+	 * `"priority"`, sets `speed: "fast"` on the request and appends the
+	 * `fast-mode-2026-02-01` beta header. Anthropic rejects unsupported models
+	 * with `invalid_request_error`, which triggers an in-provider one-shot
+	 * fallback (see `fastModeDisabled` provider state).
+	 *
+	 * Other `ServiceTier` values are currently ignored on this provider.
+	 */
+	serviceTier?: ServiceTier;
 	/** Force OAuth bearer auth mode for proxy tokens that don't match Anthropic token prefixes. */
 	isOAuth?: boolean;
 	/**
@@ -961,10 +1028,16 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 			} else {
 				const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
+				const extraBetas = normalizeExtraBetas(options?.betas);
+				const wantsAnthropicPriority = resolveServiceTier(options?.serviceTier, model.provider) === "priority";
+				if (wantsAnthropicPriority && !extraBetas.includes(fastModeBeta)) {
+					extraBetas.push(fastModeBeta);
+				}
 				const created = createClient(model, {
 					model,
 					apiKey,
-					extraBetas: normalizeExtraBetas(options?.betas),
+					extraBetas,
 					stream: true,
 					interleavedThinking: options?.interleavedThinking ?? true,
 					headers: options?.headers,
@@ -984,6 +1057,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 			let disableStrictTools =
 				(providerSessionState?.strictToolsDisabled ?? false) || (model.compat?.disableStrictTools ?? false);
 			let strictFallbackErrorMessage: string | undefined;
+			let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
 			const prepareParams = async (): Promise<MessageCreateParamsStreaming> => {
 				let nextParams = buildParams(model, baseUrl, context, isOAuthToken, options, disableStrictTools);
 				const replacementPayload = await options?.onPayload?.(nextParams, model);
@@ -993,6 +1067,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 				if (disableStrictTools) {
 					dropAnthropicStrictTools(nextParams);
 				}
+				if (dropFastMode) {
+					dropAnthropicFastMode(nextParams);
+				}
 				rawRequestDump = {
 					provider: model.provider,
 					api: output.api,
@@ -1284,6 +1361,30 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 						firstTokenTime = undefined;
 						continue;
 					}
+					if (
+						!dropFastMode &&
+						resolveServiceTier(options?.serviceTier, model.provider) === "priority" &&
+						firstTokenTime === undefined &&
+						isAnthropicFastModeUnsupportedError(streamFailure)
+					) {
+						logger.debug("anthropic: fast mode unsupported, retrying without speed", {
+							model: model.id,
+							error: streamFailure instanceof Error ? streamFailure.message : String(streamFailure),
+						});
+						if (providerSessionState) {
+							providerSessionState.fastModeDisabled = true;
+						}
+						dropFastMode = true;
+						params = await prepareParams();
+						providerRetryAttempt = 0;
+						output.content.length = 0;
+						output.responseId = undefined;
+						output.providerPayload = undefined;
+						output.usage = createEmptyUsage(copilotDynamicHeaders?.premiumRequests);
+						output.stopReason = "stop";
+						firstTokenTime = undefined;
+						continue;
+					}
 					const isTransientEnvelopeFailure =
 						isTransientStreamParseError(streamFailure) || isTransientStreamEnvelopeError(streamFailure);
 					const canRetryTransientEnvelopeFailure = isTransientEnvelopeFailure && !streamedReplayUnsafeContent;
@@ -1315,6 +1416,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 			output.duration = Date.now() - startTime;
 			if (firstTokenTime) output.ttft = firstTokenTime - startTime;
+			if (dropFastMode && resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
+				output.disabledFeatures = [...(output.disabledFeatures ?? []), "priority"];
+			}
 			stream.push({ type: "done", reason: output.stopReason, message: output });
 			stream.end();
 		} catch (error) {
@@ -1862,6 +1966,10 @@ function buildParams(
 		params.metadata = { user_id: metadataUserId };
 	}
+	if (resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
+		(params as ParamsWithSpeed).speed = "fast";
+	}
 	if (options?.toolChoice) {
 		if (typeof options.toolChoice === "string") {
 			params.tool_choice = { type: options.toolChoice };

package/src/providers/openai-codex-responses.ts CHANGED Viewed

@@ -29,10 +29,10 @@ import {
 	type FetchImpl,
 	type Model,
 	type ProviderSessionState,
+	resolveServiceTier,
 	type ServiceTier,
 	type StreamFunction,
 	type StreamOptions,
-	shouldSendServiceTier,
 	type TextContent,
 	type ThinkingContent,
 	type Tool,
@@ -590,8 +590,9 @@ async function buildTransformedCodexRequestBody(
 	if (options?.repetitionPenalty !== undefined) {
 		params.repetition_penalty = options.repetitionPenalty;
 	}
-	if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
-		params.service_tier = options.serviceTier;
+	const resolvedServiceTier = resolveServiceTier(options?.serviceTier, model.provider);
+	if (resolvedServiceTier === "flex" || resolvedServiceTier === "scale" || resolvedServiceTier === "priority") {
+		params.service_tier = resolvedServiceTier;
 	}
 	if (context.tools && context.tools.length > 0) {
 		params.tools = convertOpenAICodexResponsesTools(context.tools, model);

package/src/providers/openai-completions.ts CHANGED Viewed

@@ -22,11 +22,11 @@ import {
 	type Model,
 	type OpenAICompat,
 	type ProviderSessionState,
+	resolveServiceTier,
 	type ServiceTier,
 	type StopReason,
 	type StreamFunction,
 	type StreamOptions,
-	shouldSendServiceTier,
 	type TextContent,
 	type ThinkingContent,
 	type Tool,
@@ -1092,8 +1092,9 @@ function buildParams(
 	if (options?.frequencyPenalty !== undefined) {
 		params.frequency_penalty = options.frequencyPenalty;
 	}
-	if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
-		params.service_tier = options.serviceTier;
+	const resolvedServiceTier = resolveServiceTier(options?.serviceTier, model.provider);
+	if (resolvedServiceTier === "flex" || resolvedServiceTier === "scale" || resolvedServiceTier === "priority") {
+		params.service_tier = resolvedServiceTier;
 	}
 	if (context.tools) {

package/src/providers/openai-responses-shared.ts CHANGED Viewed

@@ -17,10 +17,10 @@ import {
 	type AssistantMessage,
 	type ImageContent,
 	type Model,
+	resolveServiceTier,
 	type ServiceTier,
 	type StopReason,
 	type StreamOptions,
-	shouldSendServiceTier,
 	type TextContent,
 	type TextSignatureV1,
 	type ThinkingContent,
@@ -650,8 +650,9 @@ export function applyCommonResponsesSamplingParams<P extends CommonResponsesPara
 	if (options?.minP !== undefined) params.min_p = options.minP;
 	if (options?.presencePenalty !== undefined) params.presence_penalty = options.presencePenalty;
 	if (options?.repetitionPenalty !== undefined) params.repetition_penalty = options.repetitionPenalty;
-	if (shouldSendServiceTier(options?.serviceTier, provider)) {
-		params.service_tier = options.serviceTier;
+	const resolvedServiceTier = resolveServiceTier(options?.serviceTier, provider);
+	if (resolvedServiceTier === "flex" || resolvedServiceTier === "scale" || resolvedServiceTier === "priority") {
+		params.service_tier = resolvedServiceTier;
 	}
 }

package/src/stream.ts CHANGED Viewed

@@ -580,6 +580,7 @@ function mapOptionsForApi<TApi extends Api>(
 					thinkingEnabled: false,
 					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
 					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
+					serviceTier: options?.serviceTier,
 				});
 			}
@@ -590,6 +591,7 @@ function mapOptionsForApi<TApi extends Api>(
 					thinkingEnabled: false,
 					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
 					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
+					serviceTier: options?.serviceTier,
 				});
 			}
@@ -603,6 +605,7 @@ function mapOptionsForApi<TApi extends Api>(
 					effort,
 					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
 					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
+					serviceTier: options?.serviceTier,
 				});
 			}
@@ -613,6 +616,7 @@ function mapOptionsForApi<TApi extends Api>(
 					thinkingBudgetTokens: thinkingBudget,
 					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
 					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
+					serviceTier: options?.serviceTier,
 				});
 			}
@@ -631,6 +635,7 @@ function mapOptionsForApi<TApi extends Api>(
 					thinkingEnabled: false,
 					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
 					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
+					serviceTier: options?.serviceTier,
 				});
 			} else {
 				return castApi<"anthropic-messages">({
@@ -640,6 +645,7 @@ function mapOptionsForApi<TApi extends Api>(
 					thinkingBudgetTokens: thinkingBudget,
 					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
 					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
+					serviceTier: options?.serviceTier,
 				});
 			}
 		}

package/src/types.ts CHANGED Viewed

@@ -162,29 +162,78 @@ export type ToolChoice =
 // Base options all providers share
 export type CacheRetention = "none" | "short" | "long";
-/** OpenAI service tier for processing priority. Only applies to OpenAI-compatible APIs. */
-export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority";
+/**
+ * Service tier hint for processing priority / cost control.
+ *
+ * The unscoped values (`"auto"`, `"default"`, `"flex"`, `"scale"`,
+ * `"priority"`) are passed through to providers that understand them
+ * (OpenAI's `service_tier` field directly; Anthropic translates
+ * `"priority"` into `speed: "fast"` on supported Opus models).
+ *
+ * The scoped values target a specific provider family and behave as the
+ * unscoped value on the matching provider, or `undefined` everywhere else.
+ * They let users opt into priority on one family without paying premium
+ * costs on the other when switching models mid-session.
+ *
+ * - `"openai-only"` → `"priority"` on `openai` and `openai-codex`; ignored elsewhere.
+ * - `"claude-only"` → `"priority"` on direct `anthropic` (not Bedrock/Vertex Claude).
+ */
+export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority" | "openai-only" | "claude-only";
-export function shouldSendServiceTier(
-	serviceTier?: ServiceTier | null,
-	provider?: Provider,
-): serviceTier is "flex" | "scale" | "priority" {
-	if (provider !== "openai" && provider !== "openai-codex") {
-		return false;
+/** Resolved tier — one of the values that providers actually consume on the wire. */
+export type ResolvedServiceTier = Exclude<ServiceTier, "openai-only" | "claude-only">;
+/**
+ * Resolves a possibly scoped `ServiceTier` to the effective tier for the
+ * given provider. Scoped values match their target family and otherwise
+ * collapse to `undefined`; unscoped values pass through unchanged.
+ */
+export function resolveServiceTier(
+	serviceTier: ServiceTier | null | undefined,
+	provider: Provider | undefined,
+): ResolvedServiceTier | undefined {
+	if (!serviceTier) return undefined;
+	switch (serviceTier) {
+		case "openai-only":
+			return provider === "openai" || provider === "openai-codex" ? "priority" : undefined;
+		case "claude-only":
+			return provider === "anthropic" ? "priority" : undefined;
+		default:
+			return serviceTier;
 	}
-	return serviceTier === "flex" || serviceTier === "scale" || serviceTier === "priority";
 }
 /**
- * Premium-request weight contributed by sending a `priority` service tier to
- * a provider that supports it. Mirrors GitHub Copilot's `premiumRequests`
- * accounting so the "premium requests" stat aggregates priority traffic too.
+ * True when the (possibly scoped) tier should be sent as OpenAI's
+ * `service_tier` request field for the given provider. Non-OpenAI
+ * providers, unsupported tiers (`"auto"`, `"default"`), and scope
+ * mismatches all return false.
+ */
+export function shouldSendServiceTier(
+	serviceTier: ServiceTier | null | undefined,
+	provider: Provider | undefined,
+): boolean {
+	if (provider !== "openai" && provider !== "openai-codex") return false;
+	const resolved = resolveServiceTier(serviceTier, provider);
+	return resolved === "flex" || resolved === "scale" || resolved === "priority";
+}
+/**
+ * Premium-request weight contributed by sending priority to a provider
+ * that supports it. Mirrors GitHub Copilot's `premiumRequests` accounting
+ * so the "premium requests" stat aggregates priority traffic across the
+ * OpenAI family and Anthropic fast-mode realizations.
  *
- * Returns 1 per priority request, 0 otherwise. Non-priority tiers (`flex`,
- * `scale`) and providers that ignore `service_tier` always return 0.
+ * Returns 1 per resolved priority request, 0 otherwise.
  */
-export function getPriorityPremiumRequests(serviceTier?: ServiceTier | null, provider?: Provider): number {
-	return shouldSendServiceTier(serviceTier, provider) && serviceTier === "priority" ? 1 : 0;
+export function getPriorityPremiumRequests(
+	serviceTier: ServiceTier | null | undefined,
+	provider: Provider | undefined,
+): number {
+	if (resolveServiceTier(serviceTier, provider) !== "priority") return 0;
+	// Only providers that realize `priority` on the wire bill the user.
+	// Everywhere else, the field is silently dropped and nothing is charged.
+	return provider === "openai" || provider === "openai-codex" || provider === "anthropic" ? 1 : 0;
 }
 export interface ProviderSessionState {
@@ -502,6 +551,14 @@ export interface AssistantMessage {
 	errorMessage?: string;
 	/** HTTP status surfaced by the provider when the request failed. Populated by every provider's catch block alongside `errorMessage` so consumers (auth retry, telemetry, UI) can branch without regex-scraping the message. */
 	errorStatus?: number;
+	/**
+	 * Stable identifiers for request features the provider silently dropped
+	 * during this turn (e.g. `"priority"`). Set when a server-side rejection
+	 * triggered an in-provider fallback retry that succeeded without the
+	 * feature. Callers can use this to sync user-facing toggles back to the
+	 * server's actual state.
+	 */
+	disabledFeatures?: string[];
 	/** Provider-specific opaque payload used to reconstruct transport-native history. */
 	providerPayload?: ProviderPayload;
 	timestamp: number; // Unix timestamp in milliseconds