npm - @mariozechner/pi-ai - Versions diffs - 0.71.0 → 0.72.0 - Mend

@mariozechner/pi-ai 0.71.0 → 0.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/README.md +12 -1
package/dist/env-api-keys.d.ts.map +1 -1
package/dist/env-api-keys.js +1 -0
package/dist/env-api-keys.js.map +1 -1
package/dist/index.d.ts +2 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1 -0
package/dist/index.js.map +1 -1
package/dist/models.d.ts +3 -10
package/dist/models.d.ts.map +1 -1
package/dist/models.generated.d.ts +730 -59
package/dist/models.generated.d.ts.map +1 -1
package/dist/models.generated.js +306 -28
package/dist/models.generated.js.map +1 -1
package/dist/models.js +28 -21
package/dist/models.js.map +1 -1
package/dist/providers/amazon-bedrock.d.ts.map +1 -1
package/dist/providers/amazon-bedrock.js +5 -11
package/dist/providers/amazon-bedrock.js.map +1 -1
package/dist/providers/anthropic.d.ts.map +1 -1
package/dist/providers/anthropic.js +5 -11
package/dist/providers/anthropic.js.map +1 -1
package/dist/providers/azure-openai-responses.d.ts.map +1 -1
package/dist/providers/azure-openai-responses.js +12 -6
package/dist/providers/azure-openai-responses.js.map +1 -1
package/dist/providers/google-vertex.d.ts.map +1 -1
package/dist/providers/google-vertex.js +4 -3
package/dist/providers/google-vertex.js.map +1 -1
package/dist/providers/google.d.ts.map +1 -1
package/dist/providers/google.js +4 -3
package/dist/providers/google.js.map +1 -1
package/dist/providers/mistral.d.ts.map +1 -1
package/dist/providers/mistral.js +7 -6
package/dist/providers/mistral.js.map +1 -1
package/dist/providers/openai-codex-responses.d.ts +15 -0
package/dist/providers/openai-codex-responses.d.ts.map +1 -1
package/dist/providers/openai-codex-responses.js +151 -21
package/dist/providers/openai-codex-responses.js.map +1 -1
package/dist/providers/openai-completions.d.ts.map +1 -1
package/dist/providers/openai-completions.js +16 -31
package/dist/providers/openai-completions.js.map +1 -1
package/dist/providers/openai-responses.d.ts.map +1 -1
package/dist/providers/openai-responses.js +12 -6
package/dist/providers/openai-responses.js.map +1 -1
package/dist/types.d.ts +9 -4
package/dist/types.d.ts.map +1 -1
package/dist/types.js.map +1 -1
package/dist/utils/overflow.d.ts +2 -0
package/dist/utils/overflow.d.ts.map +1 -1
package/dist/utils/overflow.js +14 -0
package/dist/utils/overflow.js.map +1 -1
package/package.json +1 -1

package/dist/types.d.ts CHANGED Viewed

@@ -2,9 +2,11 @@ import type { AssistantMessageEventStream } from "./utils/event-stream.js";
 export type { AssistantMessageEventStream } from "./utils/event-stream.js";
 export type KnownApi = "openai-completions" | "mistral-conversations" | "openai-responses" | "azure-openai-responses" | "openai-codex-responses" | "anthropic-messages" | "bedrock-converse-stream" | "google-generative-ai" | "google-vertex";
 export type Api = KnownApi | (string & {});
-export type KnownProvider = "amazon-bedrock" | "anthropic" | "google" | "google-vertex" | "openai" | "azure-openai-responses" | "openai-codex" | "deepseek" | "github-copilot" | "xai" | "groq" | "cerebras" | "openrouter" | "vercel-ai-gateway" | "zai" | "mistral" | "minimax" | "minimax-cn" | "moonshotai" | "moonshotai-cn" | "huggingface" | "fireworks" | "opencode" | "opencode-go" | "kimi-coding" | "cloudflare-workers-ai" | "cloudflare-ai-gateway";
+export type KnownProvider = "amazon-bedrock" | "anthropic" | "google" | "google-vertex" | "openai" | "azure-openai-responses" | "openai-codex" | "deepseek" | "github-copilot" | "xai" | "groq" | "cerebras" | "openrouter" | "vercel-ai-gateway" | "zai" | "mistral" | "minimax" | "minimax-cn" | "moonshotai" | "moonshotai-cn" | "huggingface" | "fireworks" | "opencode" | "opencode-go" | "kimi-coding" | "cloudflare-workers-ai" | "cloudflare-ai-gateway" | "xiaomi";
 export type Provider = KnownProvider | string;
 export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
+export type ModelThinkingLevel = "off" | ThinkingLevel;
+export type ThinkingLevelMap = Partial<Record<ModelThinkingLevel, string | null>>;
 /** Token budgets for each thinking level (token-based providers only) */
 export interface ThinkingBudgets {
     minimal?: number;
@@ -13,7 +15,7 @@ export interface ThinkingBudgets {
     high?: number;
 }
 export type CacheRetention = "none" | "short" | "long";
-export type Transport = "sse" | "websocket" | "auto";
+export type Transport = "sse" | "websocket" | "websocket-cached" | "auto";
 export interface ProviderResponse {
     status: number;
     headers: Record<string, string>;
@@ -245,8 +247,6 @@ export interface OpenAICompletionsCompat {
     supportsDeveloperRole?: boolean;
     /** Whether the provider supports `reasoning_effort`. Default: auto-detected from URL. */
     supportsReasoningEffort?: boolean;
-    /** Optional mapping from pi-ai reasoning levels to provider/model-specific `reasoning_effort` values. */
-    reasoningEffortMap?: Partial<Record<ThinkingLevel, string>>;
     /** Whether the provider supports `stream_options: { include_usage: true }` for token usage in streaming responses. Default: true. */
     supportsUsageInStreaming?: boolean;
     /** Which field to use for max tokens. Default: auto-detected from URL. */
@@ -382,6 +382,11 @@ export interface Model<TApi extends Api> {
     provider: Provider;
     baseUrl: string;
     reasoning: boolean;
+    /**
+     * Maps pi thinking levels to provider/model-specific values.
+     * Missing keys use provider defaults. null marks a level as unsupported.
+     */
+    thinkingLevelMap?: ThinkingLevelMap;
     input: ("text" | "image")[];
     cost: {
         input: number;

package/dist/types.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,2BAA2B,EAAE,MAAM,yBAAyB,CAAC;AAE3E,YAAY,EAAE,2BAA2B,EAAE,MAAM,yBAAyB,CAAC;AAE3E,MAAM,MAAM,QAAQ,GACjB,oBAAoB,GACpB,uBAAuB,GACvB,kBAAkB,GAClB,wBAAwB,GACxB,wBAAwB,GACxB,oBAAoB,GACpB,yBAAyB,GACzB,sBAAsB,GACtB,eAAe,CAAC;AAEnB,MAAM,MAAM,GAAG,GAAG,QAAQ,GAAG,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;AAE3C,MAAM,MAAM,aAAa,GACtB,gBAAgB,GAChB,WAAW,GACX,QAAQ,GACR,eAAe,GACf,QAAQ,GACR,wBAAwB,GACxB,cAAc,GACd,UAAU,GACV,gBAAgB,GAChB,KAAK,GACL,MAAM,GACN,UAAU,GACV,YAAY,GACZ,mBAAmB,GACnB,KAAK,GACL,SAAS,GACT,SAAS,GACT,YAAY,GACZ,YAAY,GACZ,eAAe,GACf,aAAa,GACb,WAAW,GACX,UAAU,GACV,aAAa,GACb,aAAa,GACb,uBAAuB,GACvB,uBAAuB,CAAC;AAC3B,MAAM,MAAM,QAAQ,GAAG,aAAa,GAAG,MAAM,CAAC;AAE9C,MAAM,MAAM,aAAa,GAAG,SAAS,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,OAAO,CAAC;AAE5E,yEAAyE;AACzE,MAAM,WAAW,eAAe;IAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;CACd;AAGD,MAAM,MAAM,cAAc,GAAG,MAAM,GAAG,OAAO,GAAG,MAAM,CAAC;AAEvD,MAAM,MAAM,SAAS,GAAG,KAAK,GAAG,WAAW,GAAG,MAAM,CAAC;AAErD,MAAM,WAAW,gBAAgB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAChC;AAED,MAAM,WAAW,aAAa;IAC7B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;;;OAGG;IACH,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB;;;OAGG;IACH,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,SAAS,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC,KAAK,OAAO,GAAG,SAAS,GAAG,OAAO,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC;IACxG;;;OAGG;IACH,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,gBAAgB,EAAE,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACrF;;;;OAIG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC;;;OAGG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB;;;;;;OAMG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACnC;AAED,MAAM,MAAM,qBAAqB,GAAG,aAAa,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAG5E,MAAM,WAAW,mBAAoB,SAAQ,aAAa;IACzD,SAAS,CAAC,EAAE,aAAa,CAAC;IAC1B,4EAA4E;IAC5E,eAAe,CAAC,EAAE,eAAe,CAAC;CAClC;AAUD,MAAM,MAAM,cAAc,CAAC,IAAI,SAAS,GAAG,GAAG,GAAG,EAAE,QAAQ,SAAS,aAAa,GAAG,aAAa,IAAI,CACpG,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,EAClB,OAAO,EAAE,OAAO,EAChB,OAAO,CAAC,EAAE,QAAQ,KACd,2BAA2B,CAAC;AAEjC,MAAM,WAAW,eAAe;IAC/B,CAAC,EAAE,CAAC,CAAC;IACL,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,CAAC,EAAE,YAAY,GAAG,cAAc,CAAC;CACtC;AAED,MAAM,WAAW,WAAW;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,aAAa,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,eAAe;IAC/B,IAAI,EAAE,UAAU,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B;;gDAE4C;IAC5C,QAAQ,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,YAAY;IAC5B,IAAI,EAAE,OAAO,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,QAAQ;IACxB,IAAI,EAAE,UAAU,CAAC;IACjB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC/B,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,KAAK;IACrB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE;QACL,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,EAAE,MAAM,CAAC;QACf,SAAS,EAAE,MAAM,CAAC;QAClB,UAAU,EAAE,MAAM,CAAC;QACnB,KAAK,EAAE,MAAM,CAAC;KACd,CAAC;CACF;AAED,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,QAAQ,GAAG,SAAS,GAAG,OAAO,GAAG,SAAS,CAAC;AAE7E,MAAM,WAAW,WAAW;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,GAAG,CAAC,WAAW,GAAG,YAAY,CAAC,EAAE,CAAC;IACjD,SAAS,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,gBAAgB;IAChC,IAAI,EAAE,WAAW,CAAC;IAClB,OAAO,EAAE,CAAC,WAAW,GAAG,eAAe,GAAG,QAAQ,CAAC,EAAE,CAAC;IACtD,GAAG,EAAE,GAAG,CAAC;IACT,QAAQ,EAAE,QAAQ,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,KAAK,CAAC;IACb,UAAU,EAAE,UAAU,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,iBAAiB,CAAC,QAAQ,GAAG,GAAG;IAChD,IAAI,EAAE,YAAY,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,CAAC,WAAW,GAAG,YAAY,CAAC,EAAE,CAAC;IACxC,OAAO,CAAC,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,MAAM,OAAO,GAAG,WAAW,GAAG,gBAAgB,GAAG,iBAAiB,CAAC;AAEzE,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAEvC,MAAM,WAAW,IAAI,CAAC,WAAW,SAAS,OAAO,GAAG,OAAO;IAC1D,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,WAAW,CAAC;CACxB;AAED,MAAM,WAAW,OAAO;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,OAAO,EAAE,CAAC;IACpB,KAAK,CAAC,EAAE,IAAI,EAAE,CAAC;CACf;AAED;;;;;;;GAOG;AACH,MAAM,MAAM,qBAAqB,GAC9B;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GAC5C;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GACvE;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GACtF;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GACtF;IAAE,IAAI,EAAE,gBAAgB,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GAC3E;IAAE,IAAI,EAAE,gBAAgB,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GAC1F;IAAE,IAAI,EAAE,cAAc,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GAC1F;IAAE,IAAI,EAAE,gBAAgB,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GAC3E;IAAE,IAAI,EAAE,gBAAgB,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GAC1F;IAAE,IAAI,EAAE,cAAc,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,QAAQ,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GAC7F;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,OAAO,CAAC,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,SAAS,CAAC,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GACvG;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,MAAM,EAAE,OAAO,CAAC,UAAU,EAAE,SAAS,GAAG,OAAO,CAAC,CAAC;IAAC,KAAK,EAAE,gBAAgB,CAAA;CAAE,CAAC;AAEhG;;;GAGG;AACH,MAAM,WAAW,uBAAuB;IACvC,wFAAwF;IACxF,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,yGAAyG;IACzG,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC,yFAAyF;IACzF,uBAAuB,CAAC,EAAE,OAAO,CAAC;IAClC,yGAAyG;IACzG,kBAAkB,CAAC,EAAE,OAAO,CAAC,MAAM,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC,CAAC;IAC5D,qIAAqI;IACrI,wBAAwB,CAAC,EAAE,OAAO,CAAC;IACnC,0EAA0E;IAC1E,cAAc,CAAC,EAAE,uBAAuB,GAAG,YAAY,CAAC;IACxD,sFAAsF;IACtF,sBAAsB,CAAC,EAAE,OAAO,CAAC;IACjC,2HAA2H;IAC3H,gCAAgC,CAAC,EAAE,OAAO,CAAC;IAC3C,4HAA4H;IAC5H,sBAAsB,CAAC,EAAE,OAAO,CAAC;IACjC,wJAAwJ;IACxJ,2CAA2C,CAAC,EAAE,OAAO,CAAC;IACtD,wWAAwW;IACxW,cAAc,CAAC,EAAE,QAAQ,GAAG,YAAY,GAAG,UAAU,GAAG,KAAK,GAAG,MAAM,GAAG,oBAAoB,CAAC;IAC9F,4FAA4F;IAC5F,iBAAiB,CAAC,EAAE,iBAAiB,CAAC;IACtC,iGAAiG;IACjG,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,0GAA0G;IAC1G,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,2FAA2F;IAC3F,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,iMAAiM;IACjM,kBAAkB,CAAC,EAAE,WAAW,CAAC;IACjC,mLAAmL;IACnL,0BAA0B,CAAC,EAAE,OAAO,CAAC;IACrC,oLAAoL;IACpL,0BAA0B,CAAC,EAAE,OAAO,CAAC;CACrC;AAED,wDAAwD;AACxD,MAAM,WAAW,qBAAqB;IACrC,qIAAqI;IACrI,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,oFAAoF;IACpF,0BAA0B,CAAC,EAAE,OAAO,CAAC;CACrC;AAED,qEAAqE;AACrE,MAAM,WAAW,uBAAuB;IACvC;;;;;;OAMG;IACH,+BAA+B,CAAC,EAAE,OAAO,CAAC;IAC1C,+GAA+G;IAC/G,0BAA0B,CAAC,EAAE,OAAO,CAAC;CACrC;AAED;;;;;GAKG;AACH,MAAM,WAAW,iBAAiB;IACjC,0EAA0E;IAC1E,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,4GAA4G;IAC5G,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,yJAAyJ;IACzJ,eAAe,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC;IACnC,+EAA+E;IAC/E,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,+EAA+E;IAC/E,wBAAwB,CAAC,EAAE,OAAO,CAAC;IACnC,2GAA2G;IAC3G,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IACjB,0EAA0E;IAC1E,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,6DAA6D;IAC7D,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,kIAAkI;IAClI,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,yHAAyH;IACzH,IAAI,CAAC,EACF,MAAM,GACN;QACA,4DAA4D;QAC5D,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,0DAA0D;QAC1D,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;KACzB,CAAC;IACL,8CAA8C;IAC9C,SAAS,CAAC,EAAE;QACX,uCAAuC;QACvC,MAAM,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;QACzB,2CAA2C;QAC3C,UAAU,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;QAC7B,uBAAuB;QACvB,KAAK,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;QACxB,4BAA4B;QAC5B,KAAK,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;QACxB,yBAAyB;QACzB,OAAO,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;KAC1B,CAAC;IACF,oIAAoI;IACpI,wBAAwB,CAAC,EACtB,MAAM,GACN;QACA,oDAAoD;QACpD,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,oDAAoD;QACpD,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,oDAAoD;QACpD,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,oDAAoD;QACpD,GAAG,CAAC,EAAE,MAAM,CAAC;KACZ,CAAC;IACL,2HAA2H;IAC3H,qBAAqB,CAAC,EACnB,MAAM,GACN;QACA,yDAAyD;QACzD,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,yDAAyD;QACzD,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,yDAAyD;QACzD,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,yDAAyD;QACzD,GAAG,CAAC,EAAE,MAAM,CAAC;KACZ,CAAC;CACL;AAED;;;;GAIG;AACH,MAAM,WAAW,oBAAoB;IACpC,mGAAmG;IACnG,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,8EAA8E;IAC9E,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAGD,MAAM,WAAW,KAAK,CAAC,IAAI,SAAS,GAAG;IACtC,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,IAAI,CAAC;IACV,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,OAAO,CAAC;IACnB,KAAK,EAAE,CAAC,MAAM,GAAG,OAAO,CAAC,EAAE,CAAC;IAC5B,IAAI,EAAE;QACL,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,EAAE,MAAM,CAAC;QACf,SAAS,EAAE,MAAM,CAAC;QAClB,UAAU,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,kGAAkG;IAClG,MAAM,CAAC,EAAE,IAAI,SAAS,oBAAoB,GACvC,uBAAuB,GACvB,IAAI,SAAS,kBAAkB,GAC9B,qBAAqB,GACrB,IAAI,SAAS,oBAAoB,GAChC,uBAAuB,GACvB,KAAK,CAAC;CACX","sourcesContent":["import type { AssistantMessageEventStream } from \"./utils/event-stream.js\";\n\nexport type { AssistantMessageEventStream } from \"./utils/event-stream.js\";\n\nexport type KnownApi =\n\t\| \"openai-completions\"\n\t\| \"mistral-conversations\"\n\t\| \"openai-responses\"\n\t\| \"azure-openai-responses\"\n\t\| \"openai-codex-responses\"\n\t\| \"anthropic-messages\"\n\t\| \"bedrock-converse-stream\"\n\t\| \"google-generative-ai\"\n\t\| \"google-vertex\";\n\nexport type Api = KnownApi \| (string & {});\n\nexport type KnownProvider =\n\t\| \"amazon-bedrock\"\n\t\| \"anthropic\"\n\t\| \"google\"\n\t\| \"google-vertex\"\n\t\| \"openai\"\n\t\| \"azure-openai-responses\"\n\t\| \"openai-codex\"\n\t\| \"deepseek\"\n\t\| \"github-copilot\"\n\t\| \"xai\"\n\t\| \"groq\"\n\t\| \"cerebras\"\n\t\| \"openrouter\"\n\t\| \"vercel-ai-gateway\"\n\t\| \"zai\"\n\t\| \"mistral\"\n\t\| \"minimax\"\n\t\| \"minimax-cn\"\n\t\| \"moonshotai\"\n\t\| \"moonshotai-cn\"\n\t\| \"huggingface\"\n\t\| \"fireworks\"\n\t\| \"opencode\"\n\t\| \"opencode-go\"\n\t\| \"kimi-coding\"\n\t\| \"cloudflare-workers-ai\"\n\t\| \"cloudflare-ai-gateway\";\nexport type Provider = KnownProvider \| string;\n\nexport type ThinkingLevel = \"minimal\" \| \"low\" \| \"medium\" \| \"high\" \| \"xhigh\";\n\n/** Token budgets for each thinking level (token-based providers only) /\nexport interface ThinkingBudgets {\n\tminimal?: number;\n\tlow?: number;\n\tmedium?: number;\n\thigh?: number;\n}\n\n// Base options all providers share\nexport type CacheRetention = \"none\" \| \"short\" \| \"long\";\n\nexport type Transport = \"sse\" \| \"websocket\" \| \"auto\";\n\nexport interface ProviderResponse {\n\tstatus: number;\n\theaders: Record<string, string>;\n}\n\nexport interface StreamOptions {\n\ttemperature?: number;\n\tmaxTokens?: number;\n\tsignal?: AbortSignal;\n\tapiKey?: string;\n\t/\n\t Preferred transport for providers that support multiple transports.\n\t * Providers that do not support this option ignore it.\n\t /\n\ttransport?: Transport;\n\t/\n\t Prompt cache retention preference. Providers map this to their supported values.\n\t * Default: \"short\".\n\t /\n\tcacheRetention?: CacheRetention;\n\t/\n\t Optional session identifier for providers that support session-based caching.\n\t * Providers can use this to enable prompt caching, request routing, or other\n\t * session-aware features. Ignored by providers that don't support it.\n\t /\n\tsessionId?: string;\n\t/\n\t Optional callback for inspecting or replacing provider payloads before sending.\n\t * Return undefined to keep the payload unchanged.\n\t /\n\tonPayload?: (payload: unknown, model: Model<Api>) => unknown \| undefined \| Promise<unknown \| undefined>;\n\t/\n\t Optional callback invoked after an HTTP response is received and before\n\t * its body stream is consumed.\n\t /\n\tonResponse?: (response: ProviderResponse, model: Model<Api>) => void \| Promise<void>;\n\t/\n\t Optional custom HTTP headers to include in API requests.\n\t * Merged with provider defaults; can override default headers.\n\t * Not supported by all providers (e.g., AWS Bedrock uses SDK auth).\n\t /\n\theaders?: Record<string, string>;\n\t/\n\t HTTP request timeout in milliseconds for providers/SDKs that support it.\n\t * For example, OpenAI and Anthropic SDK clients default to 10 minutes.\n\t /\n\ttimeoutMs?: number;\n\t/\n\t Maximum retry attempts for providers/SDKs that support client-side retries.\n\t * For example, OpenAI and Anthropic SDK clients default to 2.\n\t /\n\tmaxRetries?: number;\n\t/\n\t Maximum delay in milliseconds to wait for a retry when the server requests a long wait.\n\t * If the server's requested delay exceeds this value, the request fails immediately\n\t * with an error containing the requested delay, allowing higher-level retry logic\n\t * to handle it with user visibility.\n\t * Default: 60000 (60 seconds). Set to 0 to disable the cap.\n\t /\n\tmaxRetryDelayMs?: number;\n\t/\n\t Optional metadata to include in API requests.\n\t * Providers extract the fields they understand and ignore the rest.\n\t * For example, Anthropic uses `user_id` for abuse tracking and rate limiting.\n\t /\n\tmetadata?: Record<string, unknown>;\n}\n\nexport type ProviderStreamOptions = StreamOptions & Record<string, unknown>;\n\n// Unified options with reasoning passed to streamSimple() and completeSimple()\nexport interface SimpleStreamOptions extends StreamOptions {\n\treasoning?: ThinkingLevel;\n\t/* Custom token budgets for thinking levels (token-based providers only) /\n\tthinkingBudgets?: ThinkingBudgets;\n}\n\n// Generic StreamFunction with typed options.\n//\n// Contract:\n// - Must return an AssistantMessageEventStream.\n// - Once invoked, request/model/runtime failures should be encoded in the\n// returned stream, not thrown.\n// - Error termination must produce an AssistantMessage with stopReason\n// \"error\" or \"aborted\" and errorMessage, emitted via the stream protocol.\nexport type StreamFunction<TApi extends Api = Api, TOptions extends StreamOptions = StreamOptions> = (\n\tmodel: Model<TApi>,\n\tcontext: Context,\n\toptions?: TOptions,\n) => AssistantMessageEventStream;\n\nexport interface TextSignatureV1 {\n\tv: 1;\n\tid: string;\n\tphase?: \"commentary\" \| \"final_answer\";\n}\n\nexport interface TextContent {\n\ttype: \"text\";\n\ttext: string;\n\ttextSignature?: string; // e.g., for OpenAI responses, message metadata (legacy id string or TextSignatureV1 JSON)\n}\n\nexport interface ThinkingContent {\n\ttype: \"thinking\";\n\tthinking: string;\n\tthinkingSignature?: string; // e.g., for OpenAI responses, the reasoning item ID\n\t/* When true, the thinking content was redacted by safety filters. The opaque\n\t * encrypted payload is stored in `thinkingSignature` so it can be passed back\n\t * to the API for multi-turn continuity. /\n\tredacted?: boolean;\n}\n\nexport interface ImageContent {\n\ttype: \"image\";\n\tdata: string; // base64 encoded image data\n\tmimeType: string; // e.g., \"image/jpeg\", \"image/png\"\n}\n\nexport interface ToolCall {\n\ttype: \"toolCall\";\n\tid: string;\n\tname: string;\n\targuments: Record<string, any>;\n\tthoughtSignature?: string; // Google-specific: opaque signature for reusing thought context\n}\n\nexport interface Usage {\n\tinput: number;\n\toutput: number;\n\tcacheRead: number;\n\tcacheWrite: number;\n\ttotalTokens: number;\n\tcost: {\n\t\tinput: number;\n\t\toutput: number;\n\t\tcacheRead: number;\n\t\tcacheWrite: number;\n\t\ttotal: number;\n\t};\n}\n\nexport type StopReason = \"stop\" \| \"length\" \| \"toolUse\" \| \"error\" \| \"aborted\";\n\nexport interface UserMessage {\n\trole: \"user\";\n\tcontent: string \| (TextContent \| ImageContent)[];\n\ttimestamp: number; // Unix timestamp in milliseconds\n}\n\nexport interface AssistantMessage {\n\trole: \"assistant\";\n\tcontent: (TextContent \| ThinkingContent \| ToolCall)[];\n\tapi: Api;\n\tprovider: Provider;\n\tmodel: string;\n\tresponseModel?: string; // Concrete `chunk.model` when different from the requested `model` (e.g. OpenRouter `auto` -> `anthropic/...`)\n\tresponseId?: string; // Provider-specific response/message identifier when the upstream API exposes one\n\tusage: Usage;\n\tstopReason: StopReason;\n\terrorMessage?: string;\n\ttimestamp: number; // Unix timestamp in milliseconds\n}\n\nexport interface ToolResultMessage<TDetails = any> {\n\trole: \"toolResult\";\n\ttoolCallId: string;\n\ttoolName: string;\n\tcontent: (TextContent \| ImageContent)[]; // Supports text and images\n\tdetails?: TDetails;\n\tisError: boolean;\n\ttimestamp: number; // Unix timestamp in milliseconds\n}\n\nexport type Message = UserMessage \| AssistantMessage \| ToolResultMessage;\n\nimport type { TSchema } from \"typebox\";\n\nexport interface Tool<TParameters extends TSchema = TSchema> {\n\tname: string;\n\tdescription: string;\n\tparameters: TParameters;\n}\n\nexport interface Context {\n\tsystemPrompt?: string;\n\tmessages: Message[];\n\ttools?: Tool[];\n}\n\n/\n Event protocol for AssistantMessageEventStream.\n \n Streams should emit `start` before partial updates, then terminate with either:\n * - `done` carrying the final successful AssistantMessage, or\n * - `error` carrying the final AssistantMessage with stopReason \"error\" or \"aborted\"\n * and errorMessage.\n /\nexport type AssistantMessageEvent =\n\t\| { type: \"start\"; partial: AssistantMessage }\n\t\| { type: \"text_start\"; contentIndex: number; partial: AssistantMessage }\n\t\| { type: \"text_delta\"; contentIndex: number; delta: string; partial: AssistantMessage }\n\t\| { type: \"text_end\"; contentIndex: number; content: string; partial: AssistantMessage }\n\t\| { type: \"thinking_start\"; contentIndex: number; partial: AssistantMessage }\n\t\| { type: \"thinking_delta\"; contentIndex: number; delta: string; partial: AssistantMessage }\n\t\| { type: \"thinking_end\"; contentIndex: number; content: string; partial: AssistantMessage }\n\t\| { type: \"toolcall_start\"; contentIndex: number; partial: AssistantMessage }\n\t\| { type: \"toolcall_delta\"; contentIndex: number; delta: string; partial: AssistantMessage }\n\t\| { type: \"toolcall_end\"; contentIndex: number; toolCall: ToolCall; partial: AssistantMessage }\n\t\| { type: \"done\"; reason: Extract<StopReason, \"stop\" \| \"length\" \| \"toolUse\">; message: AssistantMessage }\n\t\| { type: \"error\"; reason: Extract<StopReason, \"aborted\" \| \"error\">; error: AssistantMessage };\n\n/\n Compatibility settings for OpenAI-compatible completions APIs.\n * Use this to override URL-based auto-detection for custom providers.\n /\nexport interface OpenAICompletionsCompat {\n\t/* Whether the provider supports the `store` field. Default: auto-detected from URL. /\n\tsupportsStore?: boolean;\n\t/* Whether the provider supports the `developer` role (vs `system`). Default: auto-detected from URL. /\n\tsupportsDeveloperRole?: boolean;\n\t/* Whether the provider supports `reasoning_effort`. Default: auto-detected from URL. /\n\tsupportsReasoningEffort?: boolean;\n\t/* Optional mapping from pi-ai reasoning levels to provider/model-specific `reasoning_effort` values. /\n\treasoningEffortMap?: Partial<Record<ThinkingLevel, string>>;\n\t/* Whether the provider supports `stream_options: { include_usage: true }` for token usage in streaming responses. Default: true. /\n\tsupportsUsageInStreaming?: boolean;\n\t/* Which field to use for max tokens. Default: auto-detected from URL. /\n\tmaxTokensField?: \"max_completion_tokens\" \| \"max_tokens\";\n\t/* Whether tool results require the `name` field. Default: auto-detected from URL. /\n\trequiresToolResultName?: boolean;\n\t/* Whether a user message after tool results requires an assistant message in between. Default: auto-detected from URL. /\n\trequiresAssistantAfterToolResult?: boolean;\n\t/* Whether thinking blocks must be converted to text blocks with <thinking> delimiters. Default: auto-detected from URL. /\n\trequiresThinkingAsText?: boolean;\n\t/* Whether all replayed assistant messages must include an empty reasoning_content field when reasoning is enabled. Default: auto-detected from URL. /\n\trequiresReasoningContentOnAssistantMessages?: boolean;\n\t/* Format for reasoning/thinking parameter. \"openai\" uses reasoning_effort, \"openrouter\" uses reasoning: { effort }, \"deepseek\" uses thinking: { type } plus reasoning_effort, \"zai\" uses top-level enable_thinking: boolean, \"qwen\" uses top-level enable_thinking: boolean, and \"qwen-chat-template\" uses chat_template_kwargs.enable_thinking. Default: \"openai\". /\n\tthinkingFormat?: \"openai\" \| \"openrouter\" \| \"deepseek\" \| \"zai\" \| \"qwen\" \| \"qwen-chat-template\";\n\t/* OpenRouter-specific routing preferences. Only used when baseUrl points to OpenRouter. /\n\topenRouterRouting?: OpenRouterRouting;\n\t/* Vercel AI Gateway routing preferences. Only used when baseUrl points to Vercel AI Gateway. /\n\tvercelGatewayRouting?: VercelGatewayRouting;\n\t/* Whether z.ai supports top-level `tool_stream: true` for streaming tool call deltas. Default: false. /\n\tzaiToolStream?: boolean;\n\t/* Whether the provider supports the `strict` field in tool definitions. Default: true. /\n\tsupportsStrictMode?: boolean;\n\t/* Cache control convention for prompt caching. \"anthropic\" applies Anthropic-style `cache_control` markers to the system prompt, last tool definition, and last user/assistant text content. /\n\tcacheControlFormat?: \"anthropic\";\n\t/* Whether to send known session-affinity headers (`session_id`, `x-client-request-id`, `x-session-affinity`) from `options.sessionId` when caching is enabled. Default: false. /\n\tsendSessionAffinityHeaders?: boolean;\n\t/* Whether the provider supports long prompt cache retention (`prompt_cache_retention: \"24h\"` or Anthropic-style `cache_control.ttl: \"1h\"`, depending on format). Default: true. /\n\tsupportsLongCacheRetention?: boolean;\n}\n\n/* Compatibility settings for OpenAI Responses APIs. /\nexport interface OpenAIResponsesCompat {\n\t/* Whether to send the OpenAI `session_id` cache-affinity header from `options.sessionId` when caching is enabled. Default: true. /\n\tsendSessionIdHeader?: boolean;\n\t/* Whether the provider supports `prompt_cache_retention: \"24h\"`. Default: true. /\n\tsupportsLongCacheRetention?: boolean;\n}\n\n/* Compatibility settings for Anthropic Messages-compatible APIs. /\nexport interface AnthropicMessagesCompat {\n\t/\n\t Whether the provider accepts per-tool `eager_input_streaming`.\n\t * When false, the Anthropic provider omits `tools[].eager_input_streaming`\n\t * and sends the legacy `fine-grained-tool-streaming-2025-05-14` beta header\n\t * for tool-enabled requests.\n\t * Default: true.\n\t /\n\tsupportsEagerToolInputStreaming?: boolean;\n\t/* Whether the provider supports Anthropic long cache retention (`cache_control.ttl: \"1h\"`). Default: true. /\n\tsupportsLongCacheRetention?: boolean;\n}\n\n/\n OpenRouter provider routing preferences.\n * Controls which upstream providers OpenRouter routes requests to.\n * Sent as the `provider` field in the OpenRouter API request body.\n * @see https://openrouter.ai/docs/guides/routing/provider-selection\n /\nexport interface OpenRouterRouting {\n\t/* Whether to allow backup providers to serve requests. Default: true. /\n\tallow_fallbacks?: boolean;\n\t/* Whether to filter providers to only those that support all parameters in the request. Default: false. /\n\trequire_parameters?: boolean;\n\t/* Data collection setting. \"allow\" (default): allow providers that may store/train on data. \"deny\": only use providers that don't collect user data. /\n\tdata_collection?: \"deny\" \| \"allow\";\n\t/* Whether to restrict routing to only ZDR (Zero Data Retention) endpoints. /\n\tzdr?: boolean;\n\t/* Whether to restrict routing to only models that allow text distillation. /\n\tenforce_distillable_text?: boolean;\n\t/* An ordered list of provider names/slugs to try in sequence, falling back to the next if unavailable. /\n\torder?: string[];\n\t/* List of provider names/slugs to exclusively allow for this request. /\n\tonly?: string[];\n\t/* List of provider names/slugs to skip for this request. /\n\tignore?: string[];\n\t/* A list of quantization levels to filter providers by (e.g., [\"fp16\", \"bf16\", \"fp8\", \"fp6\", \"int8\", \"int4\", \"fp4\", \"fp32\"]). /\n\tquantizations?: string[];\n\t/* Sorting strategy. Can be a string (e.g., \"price\", \"throughput\", \"latency\") or an object with `by` and `partition`. /\n\tsort?:\n\t\t\| string\n\t\t\| {\n\t\t\t\t/* The sorting metric: \"price\", \"throughput\", \"latency\". /\n\t\t\t\tby?: string;\n\t\t\t\t/* Partitioning strategy: \"model\" (default) or \"none\". /\n\t\t\t\tpartition?: string \| null;\n\t\t };\n\t/* Maximum price per million tokens (USD). /\n\tmax_price?: {\n\t\t/* Price per million prompt tokens. /\n\t\tprompt?: number \| string;\n\t\t/* Price per million completion tokens. /\n\t\tcompletion?: number \| string;\n\t\t/* Price per image. /\n\t\timage?: number \| string;\n\t\t/* Price per audio unit. /\n\t\taudio?: number \| string;\n\t\t/* Price per request. /\n\t\trequest?: number \| string;\n\t};\n\t/* Preferred minimum throughput (tokens/second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. /\n\tpreferred_min_throughput?:\n\t\t\| number\n\t\t\| {\n\t\t\t\t/* Minimum tokens/second at the 50th percentile. /\n\t\t\t\tp50?: number;\n\t\t\t\t/* Minimum tokens/second at the 75th percentile. /\n\t\t\t\tp75?: number;\n\t\t\t\t/* Minimum tokens/second at the 90th percentile. /\n\t\t\t\tp90?: number;\n\t\t\t\t/* Minimum tokens/second at the 99th percentile. /\n\t\t\t\tp99?: number;\n\t\t };\n\t/* Preferred maximum latency (seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. /\n\tpreferred_max_latency?:\n\t\t\| number\n\t\t\| {\n\t\t\t\t/* Maximum latency in seconds at the 50th percentile. /\n\t\t\t\tp50?: number;\n\t\t\t\t/* Maximum latency in seconds at the 75th percentile. /\n\t\t\t\tp75?: number;\n\t\t\t\t/* Maximum latency in seconds at the 90th percentile. /\n\t\t\t\tp90?: number;\n\t\t\t\t/* Maximum latency in seconds at the 99th percentile. /\n\t\t\t\tp99?: number;\n\t\t };\n}\n\n/\n Vercel AI Gateway routing preferences.\n * Controls which upstream providers the gateway routes requests to.\n * @see https://vercel.com/docs/ai-gateway/models-and-providers/provider-options\n /\nexport interface VercelGatewayRouting {\n\t/* List of provider slugs to exclusively use for this request (e.g., [\"bedrock\", \"anthropic\"]). /\n\tonly?: string[];\n\t/* List of provider slugs to try in order (e.g., [\"anthropic\", \"openai\"]). /\n\torder?: string[];\n}\n\n// Model interface for the unified model system\nexport interface Model<TApi extends Api> {\n\tid: string;\n\tname: string;\n\tapi: TApi;\n\tprovider: Provider;\n\tbaseUrl: string;\n\treasoning: boolean;\n\tinput: (\"text\" \| \"image\")[];\n\tcost: {\n\t\tinput: number; // $/million tokens\n\t\toutput: number; // $/million tokens\n\t\tcacheRead: number; // $/million tokens\n\t\tcacheWrite: number; // $/million tokens\n\t};\n\tcontextWindow: number;\n\tmaxTokens: number;\n\theaders?: Record<string, string>;\n\t/* Compatibility overrides for OpenAI-compatible APIs. If not set, auto-detected from baseUrl. */\n\tcompat?: TApi extends \"openai-completions\"\n\t\t? OpenAICompletionsCompat\n\t\t: TApi extends \"openai-responses\"\n\t\t\t? OpenAIResponsesCompat\n\t\t\t: TApi extends \"anthropic-messages\"\n\t\t\t\t? AnthropicMessagesCompat\n\t\t\t\t: never;\n}\n"]}
1	+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,2BAA2B,EAAE,MAAM,yBAAyB,CAAC;AAE3E,YAAY,EAAE,2BAA2B,EAAE,MAAM,yBAAyB,CAAC;AAE3E,MAAM,MAAM,QAAQ,GACjB,oBAAoB,GACpB,uBAAuB,GACvB,kBAAkB,GAClB,wBAAwB,GACxB,wBAAwB,GACxB,oBAAoB,GACpB,yBAAyB,GACzB,sBAAsB,GACtB,eAAe,CAAC;AAEnB,MAAM,MAAM,GAAG,GAAG,QAAQ,GAAG,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;AAE3C,MAAM,MAAM,aAAa,GACtB,gBAAgB,GAChB,WAAW,GACX,QAAQ,GACR,eAAe,GACf,QAAQ,GACR,wBAAwB,GACxB,cAAc,GACd,UAAU,GACV,gBAAgB,GAChB,KAAK,GACL,MAAM,GACN,UAAU,GACV,YAAY,GACZ,mBAAmB,GACnB,KAAK,GACL,SAAS,GACT,SAAS,GACT,YAAY,GACZ,YAAY,GACZ,eAAe,GACf,aAAa,GACb,WAAW,GACX,UAAU,GACV,aAAa,GACb,aAAa,GACb,uBAAuB,GACvB,uBAAuB,GACvB,QAAQ,CAAC;AACZ,MAAM,MAAM,QAAQ,GAAG,aAAa,GAAG,MAAM,CAAC;AAE9C,MAAM,MAAM,aAAa,GAAG,SAAS,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,OAAO,CAAC;AAC5E,MAAM,MAAM,kBAAkB,GAAG,KAAK,GAAG,aAAa,CAAC;AACvD,MAAM,MAAM,gBAAgB,GAAG,OAAO,CAAC,MAAM,CAAC,kBAAkB,EAAE,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC;AAElF,yEAAyE;AACzE,MAAM,WAAW,eAAe;IAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;CACd;AAGD,MAAM,MAAM,cAAc,GAAG,MAAM,GAAG,OAAO,GAAG,MAAM,CAAC;AAEvD,MAAM,MAAM,SAAS,GAAG,KAAK,GAAG,WAAW,GAAG,kBAAkB,GAAG,MAAM,CAAC;AAE1E,MAAM,WAAW,gBAAgB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAChC;AAED,MAAM,WAAW,aAAa;IAC7B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;;;OAGG;IACH,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB;;;OAGG;IACH,cAAc,CAAC,EAAE,cAAc,CAAC;IAChC;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,SAAS,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC,KAAK,OAAO,GAAG,SAAS,GAAG,OAAO,CAAC,OAAO,GAAG,SAAS,CAAC,CAAC;IACxG;;;OAGG;IACH,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE,gBAAgB,EAAE,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACrF;;;;OAIG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC;;;OAGG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB;;;;;;OAMG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACnC;AAED,MAAM,MAAM,qBAAqB,GAAG,aAAa,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAG5E,MAAM,WAAW,mBAAoB,SAAQ,aAAa;IACzD,SAAS,CAAC,EAAE,aAAa,CAAC;IAC1B,4EAA4E;IAC5E,eAAe,CAAC,EAAE,eAAe,CAAC;CAClC;AAUD,MAAM,MAAM,cAAc,CAAC,IAAI,SAAS,GAAG,GAAG,GAAG,EAAE,QAAQ,SAAS,aAAa,GAAG,aAAa,IAAI,CACpG,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,EAClB,OAAO,EAAE,OAAO,EAChB,OAAO,CAAC,EAAE,QAAQ,KACd,2BAA2B,CAAC;AAEjC,MAAM,WAAW,eAAe;IAC/B,CAAC,EAAE,CAAC,CAAC;IACL,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,CAAC,EAAE,YAAY,GAAG,cAAc,CAAC;CACtC;AAED,MAAM,WAAW,WAAW;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,aAAa,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,eAAe;IAC/B,IAAI,EAAE,UAAU,CAAC;IACjB,QAAQ,EAAE,MAAM,CAAC;IACjB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B;;gDAE4C;IAC5C,QAAQ,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,YAAY;IAC5B,IAAI,EAAE,OAAO,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,QAAQ;IACxB,IAAI,EAAE,UAAU,CAAC;IACjB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC/B,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,KAAK;IACrB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE;QACL,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,EAAE,MAAM,CAAC;QACf,SAAS,EAAE,MAAM,CAAC;QAClB,UAAU,EAAE,MAAM,CAAC;QACnB,KAAK,EAAE,MAAM,CAAC;KACd,CAAC;CACF;AAED,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,QAAQ,GAAG,SAAS,GAAG,OAAO,GAAG,SAAS,CAAC;AAE7E,MAAM,WAAW,WAAW;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,GAAG,CAAC,WAAW,GAAG,YAAY,CAAC,EAAE,CAAC;IACjD,SAAS,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,gBAAgB;IAChC,IAAI,EAAE,WAAW,CAAC;IAClB,OAAO,EAAE,CAAC,WAAW,GAAG,eAAe,GAAG,QAAQ,CAAC,EAAE,CAAC;IACtD,GAAG,EAAE,GAAG,CAAC;IACT,QAAQ,EAAE,QAAQ,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,KAAK,CAAC;IACb,UAAU,EAAE,UAAU,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,iBAAiB,CAAC,QAAQ,GAAG,GAAG;IAChD,IAAI,EAAE,YAAY,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,CAAC,WAAW,GAAG,YAAY,CAAC,EAAE,CAAC;IACxC,OAAO,CAAC,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,MAAM,OAAO,GAAG,WAAW,GAAG,gBAAgB,GAAG,iBAAiB,CAAC;AAEzE,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAEvC,MAAM,WAAW,IAAI,CAAC,WAAW,SAAS,OAAO,GAAG,OAAO;IAC1D,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,WAAW,CAAC;CACxB;AAED,MAAM,WAAW,OAAO;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,OAAO,EAAE,CAAC;IACpB,KAAK,CAAC,EAAE,IAAI,EAAE,CAAC;CACf;AAED;;;;;;;GAOG;AACH,MAAM,MAAM,qBAAqB,GAC9B;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GAC5C;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GACvE;IAAE,IAAI,EAAE,YAAY,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GACtF;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GACtF;IAAE,IAAI,EAAE,gBAAgB,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GAC3E;IAAE,IAAI,EAAE,gBAAgB,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GAC1F;IAAE,IAAI,EAAE,cAAc,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GAC1F;IAAE,IAAI,EAAE,gBAAgB,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GAC3E;IAAE,IAAI,EAAE,gBAAgB,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GAC1F;IAAE,IAAI,EAAE,cAAc,CAAC;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,QAAQ,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GAC7F;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,OAAO,CAAC,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,SAAS,CAAC,CAAC;IAAC,OAAO,EAAE,gBAAgB,CAAA;CAAE,GACvG;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,MAAM,EAAE,OAAO,CAAC,UAAU,EAAE,SAAS,GAAG,OAAO,CAAC,CAAC;IAAC,KAAK,EAAE,gBAAgB,CAAA;CAAE,CAAC;AAEhG;;;GAGG;AACH,MAAM,WAAW,uBAAuB;IACvC,wFAAwF;IACxF,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,yGAAyG;IACzG,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC,yFAAyF;IACzF,uBAAuB,CAAC,EAAE,OAAO,CAAC;IAClC,qIAAqI;IACrI,wBAAwB,CAAC,EAAE,OAAO,CAAC;IACnC,0EAA0E;IAC1E,cAAc,CAAC,EAAE,uBAAuB,GAAG,YAAY,CAAC;IACxD,sFAAsF;IACtF,sBAAsB,CAAC,EAAE,OAAO,CAAC;IACjC,2HAA2H;IAC3H,gCAAgC,CAAC,EAAE,OAAO,CAAC;IAC3C,4HAA4H;IAC5H,sBAAsB,CAAC,EAAE,OAAO,CAAC;IACjC,wJAAwJ;IACxJ,2CAA2C,CAAC,EAAE,OAAO,CAAC;IACtD,wWAAwW;IACxW,cAAc,CAAC,EAAE,QAAQ,GAAG,YAAY,GAAG,UAAU,GAAG,KAAK,GAAG,MAAM,GAAG,oBAAoB,CAAC;IAC9F,4FAA4F;IAC5F,iBAAiB,CAAC,EAAE,iBAAiB,CAAC;IACtC,iGAAiG;IACjG,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,0GAA0G;IAC1G,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,2FAA2F;IAC3F,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,iMAAiM;IACjM,kBAAkB,CAAC,EAAE,WAAW,CAAC;IACjC,mLAAmL;IACnL,0BAA0B,CAAC,EAAE,OAAO,CAAC;IACrC,oLAAoL;IACpL,0BAA0B,CAAC,EAAE,OAAO,CAAC;CACrC;AAED,wDAAwD;AACxD,MAAM,WAAW,qBAAqB;IACrC,qIAAqI;IACrI,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,oFAAoF;IACpF,0BAA0B,CAAC,EAAE,OAAO,CAAC;CACrC;AAED,qEAAqE;AACrE,MAAM,WAAW,uBAAuB;IACvC;;;;;;OAMG;IACH,+BAA+B,CAAC,EAAE,OAAO,CAAC;IAC1C,+GAA+G;IAC/G,0BAA0B,CAAC,EAAE,OAAO,CAAC;CACrC;AAED;;;;;GAKG;AACH,MAAM,WAAW,iBAAiB;IACjC,0EAA0E;IAC1E,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,4GAA4G;IAC5G,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,yJAAyJ;IACzJ,eAAe,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC;IACnC,+EAA+E;IAC/E,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,+EAA+E;IAC/E,wBAAwB,CAAC,EAAE,OAAO,CAAC;IACnC,2GAA2G;IAC3G,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IACjB,0EAA0E;IAC1E,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,6DAA6D;IAC7D,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;IAClB,kIAAkI;IAClI,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,yHAAyH;IACzH,IAAI,CAAC,EACF,MAAM,GACN;QACA,4DAA4D;QAC5D,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,0DAA0D;QAC1D,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;KACzB,CAAC;IACL,8CAA8C;IAC9C,SAAS,CAAC,EAAE;QACX,uCAAuC;QACvC,MAAM,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;QACzB,2CAA2C;QAC3C,UAAU,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;QAC7B,uBAAuB;QACvB,KAAK,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;QACxB,4BAA4B;QAC5B,KAAK,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;QACxB,yBAAyB;QACzB,OAAO,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;KAC1B,CAAC;IACF,oIAAoI;IACpI,wBAAwB,CAAC,EACtB,MAAM,GACN;QACA,oDAAoD;QACpD,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,oDAAoD;QACpD,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,oDAAoD;QACpD,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,oDAAoD;QACpD,GAAG,CAAC,EAAE,MAAM,CAAC;KACZ,CAAC;IACL,2HAA2H;IAC3H,qBAAqB,CAAC,EACnB,MAAM,GACN;QACA,yDAAyD;QACzD,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,yDAAyD;QACzD,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,yDAAyD;QACzD,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,yDAAyD;QACzD,GAAG,CAAC,EAAE,MAAM,CAAC;KACZ,CAAC;CACL;AAED;;;;GAIG;AACH,MAAM,WAAW,oBAAoB;IACpC,mGAAmG;IACnG,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,8EAA8E;IAC9E,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAGD,MAAM,WAAW,KAAK,CAAC,IAAI,SAAS,GAAG;IACtC,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,IAAI,CAAC;IACV,QAAQ,EAAE,QAAQ,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,OAAO,CAAC;IACnB;;;OAGG;IACH,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;IACpC,KAAK,EAAE,CAAC,MAAM,GAAG,OAAO,CAAC,EAAE,CAAC;IAC5B,IAAI,EAAE;QACL,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,EAAE,MAAM,CAAC;QACf,SAAS,EAAE,MAAM,CAAC;QAClB,UAAU,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,kGAAkG;IAClG,MAAM,CAAC,EAAE,IAAI,SAAS,oBAAoB,GACvC,uBAAuB,GACvB,IAAI,SAAS,kBAAkB,GAC9B,qBAAqB,GACrB,IAAI,SAAS,oBAAoB,GAChC,uBAAuB,GACvB,KAAK,CAAC;CACX","sourcesContent":["import type { AssistantMessageEventStream } from \"./utils/event-stream.js\";\n\nexport type { AssistantMessageEventStream } from \"./utils/event-stream.js\";\n\nexport type KnownApi =\n\t\| \"openai-completions\"\n\t\| \"mistral-conversations\"\n\t\| \"openai-responses\"\n\t\| \"azure-openai-responses\"\n\t\| \"openai-codex-responses\"\n\t\| \"anthropic-messages\"\n\t\| \"bedrock-converse-stream\"\n\t\| \"google-generative-ai\"\n\t\| \"google-vertex\";\n\nexport type Api = KnownApi \| (string & {});\n\nexport type KnownProvider =\n\t\| \"amazon-bedrock\"\n\t\| \"anthropic\"\n\t\| \"google\"\n\t\| \"google-vertex\"\n\t\| \"openai\"\n\t\| \"azure-openai-responses\"\n\t\| \"openai-codex\"\n\t\| \"deepseek\"\n\t\| \"github-copilot\"\n\t\| \"xai\"\n\t\| \"groq\"\n\t\| \"cerebras\"\n\t\| \"openrouter\"\n\t\| \"vercel-ai-gateway\"\n\t\| \"zai\"\n\t\| \"mistral\"\n\t\| \"minimax\"\n\t\| \"minimax-cn\"\n\t\| \"moonshotai\"\n\t\| \"moonshotai-cn\"\n\t\| \"huggingface\"\n\t\| \"fireworks\"\n\t\| \"opencode\"\n\t\| \"opencode-go\"\n\t\| \"kimi-coding\"\n\t\| \"cloudflare-workers-ai\"\n\t\| \"cloudflare-ai-gateway\"\n\t\| \"xiaomi\";\nexport type Provider = KnownProvider \| string;\n\nexport type ThinkingLevel = \"minimal\" \| \"low\" \| \"medium\" \| \"high\" \| \"xhigh\";\nexport type ModelThinkingLevel = \"off\" \| ThinkingLevel;\nexport type ThinkingLevelMap = Partial<Record<ModelThinkingLevel, string \| null>>;\n\n/** Token budgets for each thinking level (token-based providers only) /\nexport interface ThinkingBudgets {\n\tminimal?: number;\n\tlow?: number;\n\tmedium?: number;\n\thigh?: number;\n}\n\n// Base options all providers share\nexport type CacheRetention = \"none\" \| \"short\" \| \"long\";\n\nexport type Transport = \"sse\" \| \"websocket\" \| \"websocket-cached\" \| \"auto\";\n\nexport interface ProviderResponse {\n\tstatus: number;\n\theaders: Record<string, string>;\n}\n\nexport interface StreamOptions {\n\ttemperature?: number;\n\tmaxTokens?: number;\n\tsignal?: AbortSignal;\n\tapiKey?: string;\n\t/\n\t Preferred transport for providers that support multiple transports.\n\t * Providers that do not support this option ignore it.\n\t /\n\ttransport?: Transport;\n\t/\n\t Prompt cache retention preference. Providers map this to their supported values.\n\t * Default: \"short\".\n\t /\n\tcacheRetention?: CacheRetention;\n\t/\n\t Optional session identifier for providers that support session-based caching.\n\t * Providers can use this to enable prompt caching, request routing, or other\n\t * session-aware features. Ignored by providers that don't support it.\n\t /\n\tsessionId?: string;\n\t/\n\t Optional callback for inspecting or replacing provider payloads before sending.\n\t * Return undefined to keep the payload unchanged.\n\t /\n\tonPayload?: (payload: unknown, model: Model<Api>) => unknown \| undefined \| Promise<unknown \| undefined>;\n\t/\n\t Optional callback invoked after an HTTP response is received and before\n\t * its body stream is consumed.\n\t /\n\tonResponse?: (response: ProviderResponse, model: Model<Api>) => void \| Promise<void>;\n\t/\n\t Optional custom HTTP headers to include in API requests.\n\t * Merged with provider defaults; can override default headers.\n\t * Not supported by all providers (e.g., AWS Bedrock uses SDK auth).\n\t /\n\theaders?: Record<string, string>;\n\t/\n\t HTTP request timeout in milliseconds for providers/SDKs that support it.\n\t * For example, OpenAI and Anthropic SDK clients default to 10 minutes.\n\t /\n\ttimeoutMs?: number;\n\t/\n\t Maximum retry attempts for providers/SDKs that support client-side retries.\n\t * For example, OpenAI and Anthropic SDK clients default to 2.\n\t /\n\tmaxRetries?: number;\n\t/\n\t Maximum delay in milliseconds to wait for a retry when the server requests a long wait.\n\t * If the server's requested delay exceeds this value, the request fails immediately\n\t * with an error containing the requested delay, allowing higher-level retry logic\n\t * to handle it with user visibility.\n\t * Default: 60000 (60 seconds). Set to 0 to disable the cap.\n\t /\n\tmaxRetryDelayMs?: number;\n\t/\n\t Optional metadata to include in API requests.\n\t * Providers extract the fields they understand and ignore the rest.\n\t * For example, Anthropic uses `user_id` for abuse tracking and rate limiting.\n\t /\n\tmetadata?: Record<string, unknown>;\n}\n\nexport type ProviderStreamOptions = StreamOptions & Record<string, unknown>;\n\n// Unified options with reasoning passed to streamSimple() and completeSimple()\nexport interface SimpleStreamOptions extends StreamOptions {\n\treasoning?: ThinkingLevel;\n\t/* Custom token budgets for thinking levels (token-based providers only) /\n\tthinkingBudgets?: ThinkingBudgets;\n}\n\n// Generic StreamFunction with typed options.\n//\n// Contract:\n// - Must return an AssistantMessageEventStream.\n// - Once invoked, request/model/runtime failures should be encoded in the\n// returned stream, not thrown.\n// - Error termination must produce an AssistantMessage with stopReason\n// \"error\" or \"aborted\" and errorMessage, emitted via the stream protocol.\nexport type StreamFunction<TApi extends Api = Api, TOptions extends StreamOptions = StreamOptions> = (\n\tmodel: Model<TApi>,\n\tcontext: Context,\n\toptions?: TOptions,\n) => AssistantMessageEventStream;\n\nexport interface TextSignatureV1 {\n\tv: 1;\n\tid: string;\n\tphase?: \"commentary\" \| \"final_answer\";\n}\n\nexport interface TextContent {\n\ttype: \"text\";\n\ttext: string;\n\ttextSignature?: string; // e.g., for OpenAI responses, message metadata (legacy id string or TextSignatureV1 JSON)\n}\n\nexport interface ThinkingContent {\n\ttype: \"thinking\";\n\tthinking: string;\n\tthinkingSignature?: string; // e.g., for OpenAI responses, the reasoning item ID\n\t/* When true, the thinking content was redacted by safety filters. The opaque\n\t * encrypted payload is stored in `thinkingSignature` so it can be passed back\n\t * to the API for multi-turn continuity. /\n\tredacted?: boolean;\n}\n\nexport interface ImageContent {\n\ttype: \"image\";\n\tdata: string; // base64 encoded image data\n\tmimeType: string; // e.g., \"image/jpeg\", \"image/png\"\n}\n\nexport interface ToolCall {\n\ttype: \"toolCall\";\n\tid: string;\n\tname: string;\n\targuments: Record<string, any>;\n\tthoughtSignature?: string; // Google-specific: opaque signature for reusing thought context\n}\n\nexport interface Usage {\n\tinput: number;\n\toutput: number;\n\tcacheRead: number;\n\tcacheWrite: number;\n\ttotalTokens: number;\n\tcost: {\n\t\tinput: number;\n\t\toutput: number;\n\t\tcacheRead: number;\n\t\tcacheWrite: number;\n\t\ttotal: number;\n\t};\n}\n\nexport type StopReason = \"stop\" \| \"length\" \| \"toolUse\" \| \"error\" \| \"aborted\";\n\nexport interface UserMessage {\n\trole: \"user\";\n\tcontent: string \| (TextContent \| ImageContent)[];\n\ttimestamp: number; // Unix timestamp in milliseconds\n}\n\nexport interface AssistantMessage {\n\trole: \"assistant\";\n\tcontent: (TextContent \| ThinkingContent \| ToolCall)[];\n\tapi: Api;\n\tprovider: Provider;\n\tmodel: string;\n\tresponseModel?: string; // Concrete `chunk.model` when different from the requested `model` (e.g. OpenRouter `auto` -> `anthropic/...`)\n\tresponseId?: string; // Provider-specific response/message identifier when the upstream API exposes one\n\tusage: Usage;\n\tstopReason: StopReason;\n\terrorMessage?: string;\n\ttimestamp: number; // Unix timestamp in milliseconds\n}\n\nexport interface ToolResultMessage<TDetails = any> {\n\trole: \"toolResult\";\n\ttoolCallId: string;\n\ttoolName: string;\n\tcontent: (TextContent \| ImageContent)[]; // Supports text and images\n\tdetails?: TDetails;\n\tisError: boolean;\n\ttimestamp: number; // Unix timestamp in milliseconds\n}\n\nexport type Message = UserMessage \| AssistantMessage \| ToolResultMessage;\n\nimport type { TSchema } from \"typebox\";\n\nexport interface Tool<TParameters extends TSchema = TSchema> {\n\tname: string;\n\tdescription: string;\n\tparameters: TParameters;\n}\n\nexport interface Context {\n\tsystemPrompt?: string;\n\tmessages: Message[];\n\ttools?: Tool[];\n}\n\n/\n Event protocol for AssistantMessageEventStream.\n \n Streams should emit `start` before partial updates, then terminate with either:\n * - `done` carrying the final successful AssistantMessage, or\n * - `error` carrying the final AssistantMessage with stopReason \"error\" or \"aborted\"\n * and errorMessage.\n /\nexport type AssistantMessageEvent =\n\t\| { type: \"start\"; partial: AssistantMessage }\n\t\| { type: \"text_start\"; contentIndex: number; partial: AssistantMessage }\n\t\| { type: \"text_delta\"; contentIndex: number; delta: string; partial: AssistantMessage }\n\t\| { type: \"text_end\"; contentIndex: number; content: string; partial: AssistantMessage }\n\t\| { type: \"thinking_start\"; contentIndex: number; partial: AssistantMessage }\n\t\| { type: \"thinking_delta\"; contentIndex: number; delta: string; partial: AssistantMessage }\n\t\| { type: \"thinking_end\"; contentIndex: number; content: string; partial: AssistantMessage }\n\t\| { type: \"toolcall_start\"; contentIndex: number; partial: AssistantMessage }\n\t\| { type: \"toolcall_delta\"; contentIndex: number; delta: string; partial: AssistantMessage }\n\t\| { type: \"toolcall_end\"; contentIndex: number; toolCall: ToolCall; partial: AssistantMessage }\n\t\| { type: \"done\"; reason: Extract<StopReason, \"stop\" \| \"length\" \| \"toolUse\">; message: AssistantMessage }\n\t\| { type: \"error\"; reason: Extract<StopReason, \"aborted\" \| \"error\">; error: AssistantMessage };\n\n/\n Compatibility settings for OpenAI-compatible completions APIs.\n * Use this to override URL-based auto-detection for custom providers.\n /\nexport interface OpenAICompletionsCompat {\n\t/* Whether the provider supports the `store` field. Default: auto-detected from URL. /\n\tsupportsStore?: boolean;\n\t/* Whether the provider supports the `developer` role (vs `system`). Default: auto-detected from URL. /\n\tsupportsDeveloperRole?: boolean;\n\t/* Whether the provider supports `reasoning_effort`. Default: auto-detected from URL. /\n\tsupportsReasoningEffort?: boolean;\n\t/* Whether the provider supports `stream_options: { include_usage: true }` for token usage in streaming responses. Default: true. /\n\tsupportsUsageInStreaming?: boolean;\n\t/* Which field to use for max tokens. Default: auto-detected from URL. /\n\tmaxTokensField?: \"max_completion_tokens\" \| \"max_tokens\";\n\t/* Whether tool results require the `name` field. Default: auto-detected from URL. /\n\trequiresToolResultName?: boolean;\n\t/* Whether a user message after tool results requires an assistant message in between. Default: auto-detected from URL. /\n\trequiresAssistantAfterToolResult?: boolean;\n\t/* Whether thinking blocks must be converted to text blocks with <thinking> delimiters. Default: auto-detected from URL. /\n\trequiresThinkingAsText?: boolean;\n\t/* Whether all replayed assistant messages must include an empty reasoning_content field when reasoning is enabled. Default: auto-detected from URL. /\n\trequiresReasoningContentOnAssistantMessages?: boolean;\n\t/* Format for reasoning/thinking parameter. \"openai\" uses reasoning_effort, \"openrouter\" uses reasoning: { effort }, \"deepseek\" uses thinking: { type } plus reasoning_effort, \"zai\" uses top-level enable_thinking: boolean, \"qwen\" uses top-level enable_thinking: boolean, and \"qwen-chat-template\" uses chat_template_kwargs.enable_thinking. Default: \"openai\". /\n\tthinkingFormat?: \"openai\" \| \"openrouter\" \| \"deepseek\" \| \"zai\" \| \"qwen\" \| \"qwen-chat-template\";\n\t/* OpenRouter-specific routing preferences. Only used when baseUrl points to OpenRouter. /\n\topenRouterRouting?: OpenRouterRouting;\n\t/* Vercel AI Gateway routing preferences. Only used when baseUrl points to Vercel AI Gateway. /\n\tvercelGatewayRouting?: VercelGatewayRouting;\n\t/* Whether z.ai supports top-level `tool_stream: true` for streaming tool call deltas. Default: false. /\n\tzaiToolStream?: boolean;\n\t/* Whether the provider supports the `strict` field in tool definitions. Default: true. /\n\tsupportsStrictMode?: boolean;\n\t/* Cache control convention for prompt caching. \"anthropic\" applies Anthropic-style `cache_control` markers to the system prompt, last tool definition, and last user/assistant text content. /\n\tcacheControlFormat?: \"anthropic\";\n\t/* Whether to send known session-affinity headers (`session_id`, `x-client-request-id`, `x-session-affinity`) from `options.sessionId` when caching is enabled. Default: false. /\n\tsendSessionAffinityHeaders?: boolean;\n\t/* Whether the provider supports long prompt cache retention (`prompt_cache_retention: \"24h\"` or Anthropic-style `cache_control.ttl: \"1h\"`, depending on format). Default: true. /\n\tsupportsLongCacheRetention?: boolean;\n}\n\n/* Compatibility settings for OpenAI Responses APIs. /\nexport interface OpenAIResponsesCompat {\n\t/* Whether to send the OpenAI `session_id` cache-affinity header from `options.sessionId` when caching is enabled. Default: true. /\n\tsendSessionIdHeader?: boolean;\n\t/* Whether the provider supports `prompt_cache_retention: \"24h\"`. Default: true. /\n\tsupportsLongCacheRetention?: boolean;\n}\n\n/* Compatibility settings for Anthropic Messages-compatible APIs. /\nexport interface AnthropicMessagesCompat {\n\t/\n\t Whether the provider accepts per-tool `eager_input_streaming`.\n\t * When false, the Anthropic provider omits `tools[].eager_input_streaming`\n\t * and sends the legacy `fine-grained-tool-streaming-2025-05-14` beta header\n\t * for tool-enabled requests.\n\t * Default: true.\n\t /\n\tsupportsEagerToolInputStreaming?: boolean;\n\t/* Whether the provider supports Anthropic long cache retention (`cache_control.ttl: \"1h\"`). Default: true. /\n\tsupportsLongCacheRetention?: boolean;\n}\n\n/\n OpenRouter provider routing preferences.\n * Controls which upstream providers OpenRouter routes requests to.\n * Sent as the `provider` field in the OpenRouter API request body.\n * @see https://openrouter.ai/docs/guides/routing/provider-selection\n /\nexport interface OpenRouterRouting {\n\t/* Whether to allow backup providers to serve requests. Default: true. /\n\tallow_fallbacks?: boolean;\n\t/* Whether to filter providers to only those that support all parameters in the request. Default: false. /\n\trequire_parameters?: boolean;\n\t/* Data collection setting. \"allow\" (default): allow providers that may store/train on data. \"deny\": only use providers that don't collect user data. /\n\tdata_collection?: \"deny\" \| \"allow\";\n\t/* Whether to restrict routing to only ZDR (Zero Data Retention) endpoints. /\n\tzdr?: boolean;\n\t/* Whether to restrict routing to only models that allow text distillation. /\n\tenforce_distillable_text?: boolean;\n\t/* An ordered list of provider names/slugs to try in sequence, falling back to the next if unavailable. /\n\torder?: string[];\n\t/* List of provider names/slugs to exclusively allow for this request. /\n\tonly?: string[];\n\t/* List of provider names/slugs to skip for this request. /\n\tignore?: string[];\n\t/* A list of quantization levels to filter providers by (e.g., [\"fp16\", \"bf16\", \"fp8\", \"fp6\", \"int8\", \"int4\", \"fp4\", \"fp32\"]). /\n\tquantizations?: string[];\n\t/* Sorting strategy. Can be a string (e.g., \"price\", \"throughput\", \"latency\") or an object with `by` and `partition`. /\n\tsort?:\n\t\t\| string\n\t\t\| {\n\t\t\t\t/* The sorting metric: \"price\", \"throughput\", \"latency\". /\n\t\t\t\tby?: string;\n\t\t\t\t/* Partitioning strategy: \"model\" (default) or \"none\". /\n\t\t\t\tpartition?: string \| null;\n\t\t };\n\t/* Maximum price per million tokens (USD). /\n\tmax_price?: {\n\t\t/* Price per million prompt tokens. /\n\t\tprompt?: number \| string;\n\t\t/* Price per million completion tokens. /\n\t\tcompletion?: number \| string;\n\t\t/* Price per image. /\n\t\timage?: number \| string;\n\t\t/* Price per audio unit. /\n\t\taudio?: number \| string;\n\t\t/* Price per request. /\n\t\trequest?: number \| string;\n\t};\n\t/* Preferred minimum throughput (tokens/second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. /\n\tpreferred_min_throughput?:\n\t\t\| number\n\t\t\| {\n\t\t\t\t/* Minimum tokens/second at the 50th percentile. /\n\t\t\t\tp50?: number;\n\t\t\t\t/* Minimum tokens/second at the 75th percentile. /\n\t\t\t\tp75?: number;\n\t\t\t\t/* Minimum tokens/second at the 90th percentile. /\n\t\t\t\tp90?: number;\n\t\t\t\t/* Minimum tokens/second at the 99th percentile. /\n\t\t\t\tp99?: number;\n\t\t };\n\t/* Preferred maximum latency (seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. /\n\tpreferred_max_latency?:\n\t\t\| number\n\t\t\| {\n\t\t\t\t/* Maximum latency in seconds at the 50th percentile. /\n\t\t\t\tp50?: number;\n\t\t\t\t/* Maximum latency in seconds at the 75th percentile. /\n\t\t\t\tp75?: number;\n\t\t\t\t/* Maximum latency in seconds at the 90th percentile. /\n\t\t\t\tp90?: number;\n\t\t\t\t/* Maximum latency in seconds at the 99th percentile. /\n\t\t\t\tp99?: number;\n\t\t };\n}\n\n/\n Vercel AI Gateway routing preferences.\n * Controls which upstream providers the gateway routes requests to.\n * @see https://vercel.com/docs/ai-gateway/models-and-providers/provider-options\n /\nexport interface VercelGatewayRouting {\n\t/* List of provider slugs to exclusively use for this request (e.g., [\"bedrock\", \"anthropic\"]). /\n\tonly?: string[];\n\t/* List of provider slugs to try in order (e.g., [\"anthropic\", \"openai\"]). /\n\torder?: string[];\n}\n\n// Model interface for the unified model system\nexport interface Model<TApi extends Api> {\n\tid: string;\n\tname: string;\n\tapi: TApi;\n\tprovider: Provider;\n\tbaseUrl: string;\n\treasoning: boolean;\n\t/\n\t Maps pi thinking levels to provider/model-specific values.\n\t * Missing keys use provider defaults. null marks a level as unsupported.\n\t /\n\tthinkingLevelMap?: ThinkingLevelMap;\n\tinput: (\"text\" \| \"image\")[];\n\tcost: {\n\t\tinput: number; // $/million tokens\n\t\toutput: number; // $/million tokens\n\t\tcacheRead: number; // $/million tokens\n\t\tcacheWrite: number; // $/million tokens\n\t};\n\tcontextWindow: number;\n\tmaxTokens: number;\n\theaders?: Record<string, string>;\n\t/* Compatibility overrides for OpenAI-compatible APIs. If not set, auto-detected from baseUrl. */\n\tcompat?: TApi extends \"openai-completions\"\n\t\t? OpenAICompletionsCompat\n\t\t: TApi extends \"openai-responses\"\n\t\t\t? OpenAIResponsesCompat\n\t\t\t: TApi extends \"anthropic-messages\"\n\t\t\t\t? AnthropicMessagesCompat\n\t\t\t\t: never;\n}\n"]}

package/dist/types.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"","sourcesContent":["import type { AssistantMessageEventStream } from \"./utils/event-stream.js\";\n\nexport type { AssistantMessageEventStream } from \"./utils/event-stream.js\";\n\nexport type KnownApi =\n\t\| \"openai-completions\"\n\t\| \"mistral-conversations\"\n\t\| \"openai-responses\"\n\t\| \"azure-openai-responses\"\n\t\| \"openai-codex-responses\"\n\t\| \"anthropic-messages\"\n\t\| \"bedrock-converse-stream\"\n\t\| \"google-generative-ai\"\n\t\| \"google-vertex\";\n\nexport type Api = KnownApi \| (string & {});\n\nexport type KnownProvider =\n\t\| \"amazon-bedrock\"\n\t\| \"anthropic\"\n\t\| \"google\"\n\t\| \"google-vertex\"\n\t\| \"openai\"\n\t\| \"azure-openai-responses\"\n\t\| \"openai-codex\"\n\t\| \"deepseek\"\n\t\| \"github-copilot\"\n\t\| \"xai\"\n\t\| \"groq\"\n\t\| \"cerebras\"\n\t\| \"openrouter\"\n\t\| \"vercel-ai-gateway\"\n\t\| \"zai\"\n\t\| \"mistral\"\n\t\| \"minimax\"\n\t\| \"minimax-cn\"\n\t\| \"moonshotai\"\n\t\| \"moonshotai-cn\"\n\t\| \"huggingface\"\n\t\| \"fireworks\"\n\t\| \"opencode\"\n\t\| \"opencode-go\"\n\t\| \"kimi-coding\"\n\t\| \"cloudflare-workers-ai\"\n\t\| \"cloudflare-ai-gateway\";\nexport type Provider = KnownProvider \| string;\n\nexport type ThinkingLevel = \"minimal\" \| \"low\" \| \"medium\" \| \"high\" \| \"xhigh\";\n\n/** Token budgets for each thinking level (token-based providers only) /\nexport interface ThinkingBudgets {\n\tminimal?: number;\n\tlow?: number;\n\tmedium?: number;\n\thigh?: number;\n}\n\n// Base options all providers share\nexport type CacheRetention = \"none\" \| \"short\" \| \"long\";\n\nexport type Transport = \"sse\" \| \"websocket\" \| \"auto\";\n\nexport interface ProviderResponse {\n\tstatus: number;\n\theaders: Record<string, string>;\n}\n\nexport interface StreamOptions {\n\ttemperature?: number;\n\tmaxTokens?: number;\n\tsignal?: AbortSignal;\n\tapiKey?: string;\n\t/\n\t Preferred transport for providers that support multiple transports.\n\t * Providers that do not support this option ignore it.\n\t /\n\ttransport?: Transport;\n\t/\n\t Prompt cache retention preference. Providers map this to their supported values.\n\t * Default: \"short\".\n\t /\n\tcacheRetention?: CacheRetention;\n\t/\n\t Optional session identifier for providers that support session-based caching.\n\t * Providers can use this to enable prompt caching, request routing, or other\n\t * session-aware features. Ignored by providers that don't support it.\n\t /\n\tsessionId?: string;\n\t/\n\t Optional callback for inspecting or replacing provider payloads before sending.\n\t * Return undefined to keep the payload unchanged.\n\t /\n\tonPayload?: (payload: unknown, model: Model<Api>) => unknown \| undefined \| Promise<unknown \| undefined>;\n\t/\n\t Optional callback invoked after an HTTP response is received and before\n\t * its body stream is consumed.\n\t /\n\tonResponse?: (response: ProviderResponse, model: Model<Api>) => void \| Promise<void>;\n\t/\n\t Optional custom HTTP headers to include in API requests.\n\t * Merged with provider defaults; can override default headers.\n\t * Not supported by all providers (e.g., AWS Bedrock uses SDK auth).\n\t /\n\theaders?: Record<string, string>;\n\t/\n\t HTTP request timeout in milliseconds for providers/SDKs that support it.\n\t * For example, OpenAI and Anthropic SDK clients default to 10 minutes.\n\t /\n\ttimeoutMs?: number;\n\t/\n\t Maximum retry attempts for providers/SDKs that support client-side retries.\n\t * For example, OpenAI and Anthropic SDK clients default to 2.\n\t /\n\tmaxRetries?: number;\n\t/\n\t Maximum delay in milliseconds to wait for a retry when the server requests a long wait.\n\t * If the server's requested delay exceeds this value, the request fails immediately\n\t * with an error containing the requested delay, allowing higher-level retry logic\n\t * to handle it with user visibility.\n\t * Default: 60000 (60 seconds). Set to 0 to disable the cap.\n\t /\n\tmaxRetryDelayMs?: number;\n\t/\n\t Optional metadata to include in API requests.\n\t * Providers extract the fields they understand and ignore the rest.\n\t * For example, Anthropic uses `user_id` for abuse tracking and rate limiting.\n\t /\n\tmetadata?: Record<string, unknown>;\n}\n\nexport type ProviderStreamOptions = StreamOptions & Record<string, unknown>;\n\n// Unified options with reasoning passed to streamSimple() and completeSimple()\nexport interface SimpleStreamOptions extends StreamOptions {\n\treasoning?: ThinkingLevel;\n\t/* Custom token budgets for thinking levels (token-based providers only) /\n\tthinkingBudgets?: ThinkingBudgets;\n}\n\n// Generic StreamFunction with typed options.\n//\n// Contract:\n// - Must return an AssistantMessageEventStream.\n// - Once invoked, request/model/runtime failures should be encoded in the\n// returned stream, not thrown.\n// - Error termination must produce an AssistantMessage with stopReason\n// \"error\" or \"aborted\" and errorMessage, emitted via the stream protocol.\nexport type StreamFunction<TApi extends Api = Api, TOptions extends StreamOptions = StreamOptions> = (\n\tmodel: Model<TApi>,\n\tcontext: Context,\n\toptions?: TOptions,\n) => AssistantMessageEventStream;\n\nexport interface TextSignatureV1 {\n\tv: 1;\n\tid: string;\n\tphase?: \"commentary\" \| \"final_answer\";\n}\n\nexport interface TextContent {\n\ttype: \"text\";\n\ttext: string;\n\ttextSignature?: string; // e.g., for OpenAI responses, message metadata (legacy id string or TextSignatureV1 JSON)\n}\n\nexport interface ThinkingContent {\n\ttype: \"thinking\";\n\tthinking: string;\n\tthinkingSignature?: string; // e.g., for OpenAI responses, the reasoning item ID\n\t/* When true, the thinking content was redacted by safety filters. The opaque\n\t * encrypted payload is stored in `thinkingSignature` so it can be passed back\n\t * to the API for multi-turn continuity. /\n\tredacted?: boolean;\n}\n\nexport interface ImageContent {\n\ttype: \"image\";\n\tdata: string; // base64 encoded image data\n\tmimeType: string; // e.g., \"image/jpeg\", \"image/png\"\n}\n\nexport interface ToolCall {\n\ttype: \"toolCall\";\n\tid: string;\n\tname: string;\n\targuments: Record<string, any>;\n\tthoughtSignature?: string; // Google-specific: opaque signature for reusing thought context\n}\n\nexport interface Usage {\n\tinput: number;\n\toutput: number;\n\tcacheRead: number;\n\tcacheWrite: number;\n\ttotalTokens: number;\n\tcost: {\n\t\tinput: number;\n\t\toutput: number;\n\t\tcacheRead: number;\n\t\tcacheWrite: number;\n\t\ttotal: number;\n\t};\n}\n\nexport type StopReason = \"stop\" \| \"length\" \| \"toolUse\" \| \"error\" \| \"aborted\";\n\nexport interface UserMessage {\n\trole: \"user\";\n\tcontent: string \| (TextContent \| ImageContent)[];\n\ttimestamp: number; // Unix timestamp in milliseconds\n}\n\nexport interface AssistantMessage {\n\trole: \"assistant\";\n\tcontent: (TextContent \| ThinkingContent \| ToolCall)[];\n\tapi: Api;\n\tprovider: Provider;\n\tmodel: string;\n\tresponseModel?: string; // Concrete `chunk.model` when different from the requested `model` (e.g. OpenRouter `auto` -> `anthropic/...`)\n\tresponseId?: string; // Provider-specific response/message identifier when the upstream API exposes one\n\tusage: Usage;\n\tstopReason: StopReason;\n\terrorMessage?: string;\n\ttimestamp: number; // Unix timestamp in milliseconds\n}\n\nexport interface ToolResultMessage<TDetails = any> {\n\trole: \"toolResult\";\n\ttoolCallId: string;\n\ttoolName: string;\n\tcontent: (TextContent \| ImageContent)[]; // Supports text and images\n\tdetails?: TDetails;\n\tisError: boolean;\n\ttimestamp: number; // Unix timestamp in milliseconds\n}\n\nexport type Message = UserMessage \| AssistantMessage \| ToolResultMessage;\n\nimport type { TSchema } from \"typebox\";\n\nexport interface Tool<TParameters extends TSchema = TSchema> {\n\tname: string;\n\tdescription: string;\n\tparameters: TParameters;\n}\n\nexport interface Context {\n\tsystemPrompt?: string;\n\tmessages: Message[];\n\ttools?: Tool[];\n}\n\n/\n Event protocol for AssistantMessageEventStream.\n \n Streams should emit `start` before partial updates, then terminate with either:\n * - `done` carrying the final successful AssistantMessage, or\n * - `error` carrying the final AssistantMessage with stopReason \"error\" or \"aborted\"\n * and errorMessage.\n /\nexport type AssistantMessageEvent =\n\t\| { type: \"start\"; partial: AssistantMessage }\n\t\| { type: \"text_start\"; contentIndex: number; partial: AssistantMessage }\n\t\| { type: \"text_delta\"; contentIndex: number; delta: string; partial: AssistantMessage }\n\t\| { type: \"text_end\"; contentIndex: number; content: string; partial: AssistantMessage }\n\t\| { type: \"thinking_start\"; contentIndex: number; partial: AssistantMessage }\n\t\| { type: \"thinking_delta\"; contentIndex: number; delta: string; partial: AssistantMessage }\n\t\| { type: \"thinking_end\"; contentIndex: number; content: string; partial: AssistantMessage }\n\t\| { type: \"toolcall_start\"; contentIndex: number; partial: AssistantMessage }\n\t\| { type: \"toolcall_delta\"; contentIndex: number; delta: string; partial: AssistantMessage }\n\t\| { type: \"toolcall_end\"; contentIndex: number; toolCall: ToolCall; partial: AssistantMessage }\n\t\| { type: \"done\"; reason: Extract<StopReason, \"stop\" \| \"length\" \| \"toolUse\">; message: AssistantMessage }\n\t\| { type: \"error\"; reason: Extract<StopReason, \"aborted\" \| \"error\">; error: AssistantMessage };\n\n/\n Compatibility settings for OpenAI-compatible completions APIs.\n * Use this to override URL-based auto-detection for custom providers.\n /\nexport interface OpenAICompletionsCompat {\n\t/* Whether the provider supports the `store` field. Default: auto-detected from URL. /\n\tsupportsStore?: boolean;\n\t/* Whether the provider supports the `developer` role (vs `system`). Default: auto-detected from URL. /\n\tsupportsDeveloperRole?: boolean;\n\t/* Whether the provider supports `reasoning_effort`. Default: auto-detected from URL. /\n\tsupportsReasoningEffort?: boolean;\n\t/* Optional mapping from pi-ai reasoning levels to provider/model-specific `reasoning_effort` values. /\n\treasoningEffortMap?: Partial<Record<ThinkingLevel, string>>;\n\t/* Whether the provider supports `stream_options: { include_usage: true }` for token usage in streaming responses. Default: true. /\n\tsupportsUsageInStreaming?: boolean;\n\t/* Which field to use for max tokens. Default: auto-detected from URL. /\n\tmaxTokensField?: \"max_completion_tokens\" \| \"max_tokens\";\n\t/* Whether tool results require the `name` field. Default: auto-detected from URL. /\n\trequiresToolResultName?: boolean;\n\t/* Whether a user message after tool results requires an assistant message in between. Default: auto-detected from URL. /\n\trequiresAssistantAfterToolResult?: boolean;\n\t/* Whether thinking blocks must be converted to text blocks with <thinking> delimiters. Default: auto-detected from URL. /\n\trequiresThinkingAsText?: boolean;\n\t/* Whether all replayed assistant messages must include an empty reasoning_content field when reasoning is enabled. Default: auto-detected from URL. /\n\trequiresReasoningContentOnAssistantMessages?: boolean;\n\t/* Format for reasoning/thinking parameter. \"openai\" uses reasoning_effort, \"openrouter\" uses reasoning: { effort }, \"deepseek\" uses thinking: { type } plus reasoning_effort, \"zai\" uses top-level enable_thinking: boolean, \"qwen\" uses top-level enable_thinking: boolean, and \"qwen-chat-template\" uses chat_template_kwargs.enable_thinking. Default: \"openai\". /\n\tthinkingFormat?: \"openai\" \| \"openrouter\" \| \"deepseek\" \| \"zai\" \| \"qwen\" \| \"qwen-chat-template\";\n\t/* OpenRouter-specific routing preferences. Only used when baseUrl points to OpenRouter. /\n\topenRouterRouting?: OpenRouterRouting;\n\t/* Vercel AI Gateway routing preferences. Only used when baseUrl points to Vercel AI Gateway. /\n\tvercelGatewayRouting?: VercelGatewayRouting;\n\t/* Whether z.ai supports top-level `tool_stream: true` for streaming tool call deltas. Default: false. /\n\tzaiToolStream?: boolean;\n\t/* Whether the provider supports the `strict` field in tool definitions. Default: true. /\n\tsupportsStrictMode?: boolean;\n\t/* Cache control convention for prompt caching. \"anthropic\" applies Anthropic-style `cache_control` markers to the system prompt, last tool definition, and last user/assistant text content. /\n\tcacheControlFormat?: \"anthropic\";\n\t/* Whether to send known session-affinity headers (`session_id`, `x-client-request-id`, `x-session-affinity`) from `options.sessionId` when caching is enabled. Default: false. /\n\tsendSessionAffinityHeaders?: boolean;\n\t/* Whether the provider supports long prompt cache retention (`prompt_cache_retention: \"24h\"` or Anthropic-style `cache_control.ttl: \"1h\"`, depending on format). Default: true. /\n\tsupportsLongCacheRetention?: boolean;\n}\n\n/* Compatibility settings for OpenAI Responses APIs. /\nexport interface OpenAIResponsesCompat {\n\t/* Whether to send the OpenAI `session_id` cache-affinity header from `options.sessionId` when caching is enabled. Default: true. /\n\tsendSessionIdHeader?: boolean;\n\t/* Whether the provider supports `prompt_cache_retention: \"24h\"`. Default: true. /\n\tsupportsLongCacheRetention?: boolean;\n}\n\n/* Compatibility settings for Anthropic Messages-compatible APIs. /\nexport interface AnthropicMessagesCompat {\n\t/\n\t Whether the provider accepts per-tool `eager_input_streaming`.\n\t * When false, the Anthropic provider omits `tools[].eager_input_streaming`\n\t * and sends the legacy `fine-grained-tool-streaming-2025-05-14` beta header\n\t * for tool-enabled requests.\n\t * Default: true.\n\t /\n\tsupportsEagerToolInputStreaming?: boolean;\n\t/* Whether the provider supports Anthropic long cache retention (`cache_control.ttl: \"1h\"`). Default: true. /\n\tsupportsLongCacheRetention?: boolean;\n}\n\n/\n OpenRouter provider routing preferences.\n * Controls which upstream providers OpenRouter routes requests to.\n * Sent as the `provider` field in the OpenRouter API request body.\n * @see https://openrouter.ai/docs/guides/routing/provider-selection\n /\nexport interface OpenRouterRouting {\n\t/* Whether to allow backup providers to serve requests. Default: true. /\n\tallow_fallbacks?: boolean;\n\t/* Whether to filter providers to only those that support all parameters in the request. Default: false. /\n\trequire_parameters?: boolean;\n\t/* Data collection setting. \"allow\" (default): allow providers that may store/train on data. \"deny\": only use providers that don't collect user data. /\n\tdata_collection?: \"deny\" \| \"allow\";\n\t/* Whether to restrict routing to only ZDR (Zero Data Retention) endpoints. /\n\tzdr?: boolean;\n\t/* Whether to restrict routing to only models that allow text distillation. /\n\tenforce_distillable_text?: boolean;\n\t/* An ordered list of provider names/slugs to try in sequence, falling back to the next if unavailable. /\n\torder?: string[];\n\t/* List of provider names/slugs to exclusively allow for this request. /\n\tonly?: string[];\n\t/* List of provider names/slugs to skip for this request. /\n\tignore?: string[];\n\t/* A list of quantization levels to filter providers by (e.g., [\"fp16\", \"bf16\", \"fp8\", \"fp6\", \"int8\", \"int4\", \"fp4\", \"fp32\"]). /\n\tquantizations?: string[];\n\t/* Sorting strategy. Can be a string (e.g., \"price\", \"throughput\", \"latency\") or an object with `by` and `partition`. /\n\tsort?:\n\t\t\| string\n\t\t\| {\n\t\t\t\t/* The sorting metric: \"price\", \"throughput\", \"latency\". /\n\t\t\t\tby?: string;\n\t\t\t\t/* Partitioning strategy: \"model\" (default) or \"none\". /\n\t\t\t\tpartition?: string \| null;\n\t\t };\n\t/* Maximum price per million tokens (USD). /\n\tmax_price?: {\n\t\t/* Price per million prompt tokens. /\n\t\tprompt?: number \| string;\n\t\t/* Price per million completion tokens. /\n\t\tcompletion?: number \| string;\n\t\t/* Price per image. /\n\t\timage?: number \| string;\n\t\t/* Price per audio unit. /\n\t\taudio?: number \| string;\n\t\t/* Price per request. /\n\t\trequest?: number \| string;\n\t};\n\t/* Preferred minimum throughput (tokens/second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. /\n\tpreferred_min_throughput?:\n\t\t\| number\n\t\t\| {\n\t\t\t\t/* Minimum tokens/second at the 50th percentile. /\n\t\t\t\tp50?: number;\n\t\t\t\t/* Minimum tokens/second at the 75th percentile. /\n\t\t\t\tp75?: number;\n\t\t\t\t/* Minimum tokens/second at the 90th percentile. /\n\t\t\t\tp90?: number;\n\t\t\t\t/* Minimum tokens/second at the 99th percentile. /\n\t\t\t\tp99?: number;\n\t\t };\n\t/* Preferred maximum latency (seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. /\n\tpreferred_max_latency?:\n\t\t\| number\n\t\t\| {\n\t\t\t\t/* Maximum latency in seconds at the 50th percentile. /\n\t\t\t\tp50?: number;\n\t\t\t\t/* Maximum latency in seconds at the 75th percentile. /\n\t\t\t\tp75?: number;\n\t\t\t\t/* Maximum latency in seconds at the 90th percentile. /\n\t\t\t\tp90?: number;\n\t\t\t\t/* Maximum latency in seconds at the 99th percentile. /\n\t\t\t\tp99?: number;\n\t\t };\n}\n\n/\n Vercel AI Gateway routing preferences.\n * Controls which upstream providers the gateway routes requests to.\n * @see https://vercel.com/docs/ai-gateway/models-and-providers/provider-options\n /\nexport interface VercelGatewayRouting {\n\t/* List of provider slugs to exclusively use for this request (e.g., [\"bedrock\", \"anthropic\"]). /\n\tonly?: string[];\n\t/* List of provider slugs to try in order (e.g., [\"anthropic\", \"openai\"]). /\n\torder?: string[];\n}\n\n// Model interface for the unified model system\nexport interface Model<TApi extends Api> {\n\tid: string;\n\tname: string;\n\tapi: TApi;\n\tprovider: Provider;\n\tbaseUrl: string;\n\treasoning: boolean;\n\tinput: (\"text\" \| \"image\")[];\n\tcost: {\n\t\tinput: number; // $/million tokens\n\t\toutput: number; // $/million tokens\n\t\tcacheRead: number; // $/million tokens\n\t\tcacheWrite: number; // $/million tokens\n\t};\n\tcontextWindow: number;\n\tmaxTokens: number;\n\theaders?: Record<string, string>;\n\t/* Compatibility overrides for OpenAI-compatible APIs. If not set, auto-detected from baseUrl. */\n\tcompat?: TApi extends \"openai-completions\"\n\t\t? OpenAICompletionsCompat\n\t\t: TApi extends \"openai-responses\"\n\t\t\t? OpenAIResponsesCompat\n\t\t\t: TApi extends \"anthropic-messages\"\n\t\t\t\t? AnthropicMessagesCompat\n\t\t\t\t: never;\n}\n"]}
1	+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"","sourcesContent":["import type { AssistantMessageEventStream } from \"./utils/event-stream.js\";\n\nexport type { AssistantMessageEventStream } from \"./utils/event-stream.js\";\n\nexport type KnownApi =\n\t\| \"openai-completions\"\n\t\| \"mistral-conversations\"\n\t\| \"openai-responses\"\n\t\| \"azure-openai-responses\"\n\t\| \"openai-codex-responses\"\n\t\| \"anthropic-messages\"\n\t\| \"bedrock-converse-stream\"\n\t\| \"google-generative-ai\"\n\t\| \"google-vertex\";\n\nexport type Api = KnownApi \| (string & {});\n\nexport type KnownProvider =\n\t\| \"amazon-bedrock\"\n\t\| \"anthropic\"\n\t\| \"google\"\n\t\| \"google-vertex\"\n\t\| \"openai\"\n\t\| \"azure-openai-responses\"\n\t\| \"openai-codex\"\n\t\| \"deepseek\"\n\t\| \"github-copilot\"\n\t\| \"xai\"\n\t\| \"groq\"\n\t\| \"cerebras\"\n\t\| \"openrouter\"\n\t\| \"vercel-ai-gateway\"\n\t\| \"zai\"\n\t\| \"mistral\"\n\t\| \"minimax\"\n\t\| \"minimax-cn\"\n\t\| \"moonshotai\"\n\t\| \"moonshotai-cn\"\n\t\| \"huggingface\"\n\t\| \"fireworks\"\n\t\| \"opencode\"\n\t\| \"opencode-go\"\n\t\| \"kimi-coding\"\n\t\| \"cloudflare-workers-ai\"\n\t\| \"cloudflare-ai-gateway\"\n\t\| \"xiaomi\";\nexport type Provider = KnownProvider \| string;\n\nexport type ThinkingLevel = \"minimal\" \| \"low\" \| \"medium\" \| \"high\" \| \"xhigh\";\nexport type ModelThinkingLevel = \"off\" \| ThinkingLevel;\nexport type ThinkingLevelMap = Partial<Record<ModelThinkingLevel, string \| null>>;\n\n/** Token budgets for each thinking level (token-based providers only) /\nexport interface ThinkingBudgets {\n\tminimal?: number;\n\tlow?: number;\n\tmedium?: number;\n\thigh?: number;\n}\n\n// Base options all providers share\nexport type CacheRetention = \"none\" \| \"short\" \| \"long\";\n\nexport type Transport = \"sse\" \| \"websocket\" \| \"websocket-cached\" \| \"auto\";\n\nexport interface ProviderResponse {\n\tstatus: number;\n\theaders: Record<string, string>;\n}\n\nexport interface StreamOptions {\n\ttemperature?: number;\n\tmaxTokens?: number;\n\tsignal?: AbortSignal;\n\tapiKey?: string;\n\t/\n\t Preferred transport for providers that support multiple transports.\n\t * Providers that do not support this option ignore it.\n\t /\n\ttransport?: Transport;\n\t/\n\t Prompt cache retention preference. Providers map this to their supported values.\n\t * Default: \"short\".\n\t /\n\tcacheRetention?: CacheRetention;\n\t/\n\t Optional session identifier for providers that support session-based caching.\n\t * Providers can use this to enable prompt caching, request routing, or other\n\t * session-aware features. Ignored by providers that don't support it.\n\t /\n\tsessionId?: string;\n\t/\n\t Optional callback for inspecting or replacing provider payloads before sending.\n\t * Return undefined to keep the payload unchanged.\n\t /\n\tonPayload?: (payload: unknown, model: Model<Api>) => unknown \| undefined \| Promise<unknown \| undefined>;\n\t/\n\t Optional callback invoked after an HTTP response is received and before\n\t * its body stream is consumed.\n\t /\n\tonResponse?: (response: ProviderResponse, model: Model<Api>) => void \| Promise<void>;\n\t/\n\t Optional custom HTTP headers to include in API requests.\n\t * Merged with provider defaults; can override default headers.\n\t * Not supported by all providers (e.g., AWS Bedrock uses SDK auth).\n\t /\n\theaders?: Record<string, string>;\n\t/\n\t HTTP request timeout in milliseconds for providers/SDKs that support it.\n\t * For example, OpenAI and Anthropic SDK clients default to 10 minutes.\n\t /\n\ttimeoutMs?: number;\n\t/\n\t Maximum retry attempts for providers/SDKs that support client-side retries.\n\t * For example, OpenAI and Anthropic SDK clients default to 2.\n\t /\n\tmaxRetries?: number;\n\t/\n\t Maximum delay in milliseconds to wait for a retry when the server requests a long wait.\n\t * If the server's requested delay exceeds this value, the request fails immediately\n\t * with an error containing the requested delay, allowing higher-level retry logic\n\t * to handle it with user visibility.\n\t * Default: 60000 (60 seconds). Set to 0 to disable the cap.\n\t /\n\tmaxRetryDelayMs?: number;\n\t/\n\t Optional metadata to include in API requests.\n\t * Providers extract the fields they understand and ignore the rest.\n\t * For example, Anthropic uses `user_id` for abuse tracking and rate limiting.\n\t /\n\tmetadata?: Record<string, unknown>;\n}\n\nexport type ProviderStreamOptions = StreamOptions & Record<string, unknown>;\n\n// Unified options with reasoning passed to streamSimple() and completeSimple()\nexport interface SimpleStreamOptions extends StreamOptions {\n\treasoning?: ThinkingLevel;\n\t/* Custom token budgets for thinking levels (token-based providers only) /\n\tthinkingBudgets?: ThinkingBudgets;\n}\n\n// Generic StreamFunction with typed options.\n//\n// Contract:\n// - Must return an AssistantMessageEventStream.\n// - Once invoked, request/model/runtime failures should be encoded in the\n// returned stream, not thrown.\n// - Error termination must produce an AssistantMessage with stopReason\n// \"error\" or \"aborted\" and errorMessage, emitted via the stream protocol.\nexport type StreamFunction<TApi extends Api = Api, TOptions extends StreamOptions = StreamOptions> = (\n\tmodel: Model<TApi>,\n\tcontext: Context,\n\toptions?: TOptions,\n) => AssistantMessageEventStream;\n\nexport interface TextSignatureV1 {\n\tv: 1;\n\tid: string;\n\tphase?: \"commentary\" \| \"final_answer\";\n}\n\nexport interface TextContent {\n\ttype: \"text\";\n\ttext: string;\n\ttextSignature?: string; // e.g., for OpenAI responses, message metadata (legacy id string or TextSignatureV1 JSON)\n}\n\nexport interface ThinkingContent {\n\ttype: \"thinking\";\n\tthinking: string;\n\tthinkingSignature?: string; // e.g., for OpenAI responses, the reasoning item ID\n\t/* When true, the thinking content was redacted by safety filters. The opaque\n\t * encrypted payload is stored in `thinkingSignature` so it can be passed back\n\t * to the API for multi-turn continuity. /\n\tredacted?: boolean;\n}\n\nexport interface ImageContent {\n\ttype: \"image\";\n\tdata: string; // base64 encoded image data\n\tmimeType: string; // e.g., \"image/jpeg\", \"image/png\"\n}\n\nexport interface ToolCall {\n\ttype: \"toolCall\";\n\tid: string;\n\tname: string;\n\targuments: Record<string, any>;\n\tthoughtSignature?: string; // Google-specific: opaque signature for reusing thought context\n}\n\nexport interface Usage {\n\tinput: number;\n\toutput: number;\n\tcacheRead: number;\n\tcacheWrite: number;\n\ttotalTokens: number;\n\tcost: {\n\t\tinput: number;\n\t\toutput: number;\n\t\tcacheRead: number;\n\t\tcacheWrite: number;\n\t\ttotal: number;\n\t};\n}\n\nexport type StopReason = \"stop\" \| \"length\" \| \"toolUse\" \| \"error\" \| \"aborted\";\n\nexport interface UserMessage {\n\trole: \"user\";\n\tcontent: string \| (TextContent \| ImageContent)[];\n\ttimestamp: number; // Unix timestamp in milliseconds\n}\n\nexport interface AssistantMessage {\n\trole: \"assistant\";\n\tcontent: (TextContent \| ThinkingContent \| ToolCall)[];\n\tapi: Api;\n\tprovider: Provider;\n\tmodel: string;\n\tresponseModel?: string; // Concrete `chunk.model` when different from the requested `model` (e.g. OpenRouter `auto` -> `anthropic/...`)\n\tresponseId?: string; // Provider-specific response/message identifier when the upstream API exposes one\n\tusage: Usage;\n\tstopReason: StopReason;\n\terrorMessage?: string;\n\ttimestamp: number; // Unix timestamp in milliseconds\n}\n\nexport interface ToolResultMessage<TDetails = any> {\n\trole: \"toolResult\";\n\ttoolCallId: string;\n\ttoolName: string;\n\tcontent: (TextContent \| ImageContent)[]; // Supports text and images\n\tdetails?: TDetails;\n\tisError: boolean;\n\ttimestamp: number; // Unix timestamp in milliseconds\n}\n\nexport type Message = UserMessage \| AssistantMessage \| ToolResultMessage;\n\nimport type { TSchema } from \"typebox\";\n\nexport interface Tool<TParameters extends TSchema = TSchema> {\n\tname: string;\n\tdescription: string;\n\tparameters: TParameters;\n}\n\nexport interface Context {\n\tsystemPrompt?: string;\n\tmessages: Message[];\n\ttools?: Tool[];\n}\n\n/\n Event protocol for AssistantMessageEventStream.\n \n Streams should emit `start` before partial updates, then terminate with either:\n * - `done` carrying the final successful AssistantMessage, or\n * - `error` carrying the final AssistantMessage with stopReason \"error\" or \"aborted\"\n * and errorMessage.\n /\nexport type AssistantMessageEvent =\n\t\| { type: \"start\"; partial: AssistantMessage }\n\t\| { type: \"text_start\"; contentIndex: number; partial: AssistantMessage }\n\t\| { type: \"text_delta\"; contentIndex: number; delta: string; partial: AssistantMessage }\n\t\| { type: \"text_end\"; contentIndex: number; content: string; partial: AssistantMessage }\n\t\| { type: \"thinking_start\"; contentIndex: number; partial: AssistantMessage }\n\t\| { type: \"thinking_delta\"; contentIndex: number; delta: string; partial: AssistantMessage }\n\t\| { type: \"thinking_end\"; contentIndex: number; content: string; partial: AssistantMessage }\n\t\| { type: \"toolcall_start\"; contentIndex: number; partial: AssistantMessage }\n\t\| { type: \"toolcall_delta\"; contentIndex: number; delta: string; partial: AssistantMessage }\n\t\| { type: \"toolcall_end\"; contentIndex: number; toolCall: ToolCall; partial: AssistantMessage }\n\t\| { type: \"done\"; reason: Extract<StopReason, \"stop\" \| \"length\" \| \"toolUse\">; message: AssistantMessage }\n\t\| { type: \"error\"; reason: Extract<StopReason, \"aborted\" \| \"error\">; error: AssistantMessage };\n\n/\n Compatibility settings for OpenAI-compatible completions APIs.\n * Use this to override URL-based auto-detection for custom providers.\n /\nexport interface OpenAICompletionsCompat {\n\t/* Whether the provider supports the `store` field. Default: auto-detected from URL. /\n\tsupportsStore?: boolean;\n\t/* Whether the provider supports the `developer` role (vs `system`). Default: auto-detected from URL. /\n\tsupportsDeveloperRole?: boolean;\n\t/* Whether the provider supports `reasoning_effort`. Default: auto-detected from URL. /\n\tsupportsReasoningEffort?: boolean;\n\t/* Whether the provider supports `stream_options: { include_usage: true }` for token usage in streaming responses. Default: true. /\n\tsupportsUsageInStreaming?: boolean;\n\t/* Which field to use for max tokens. Default: auto-detected from URL. /\n\tmaxTokensField?: \"max_completion_tokens\" \| \"max_tokens\";\n\t/* Whether tool results require the `name` field. Default: auto-detected from URL. /\n\trequiresToolResultName?: boolean;\n\t/* Whether a user message after tool results requires an assistant message in between. Default: auto-detected from URL. /\n\trequiresAssistantAfterToolResult?: boolean;\n\t/* Whether thinking blocks must be converted to text blocks with <thinking> delimiters. Default: auto-detected from URL. /\n\trequiresThinkingAsText?: boolean;\n\t/* Whether all replayed assistant messages must include an empty reasoning_content field when reasoning is enabled. Default: auto-detected from URL. /\n\trequiresReasoningContentOnAssistantMessages?: boolean;\n\t/* Format for reasoning/thinking parameter. \"openai\" uses reasoning_effort, \"openrouter\" uses reasoning: { effort }, \"deepseek\" uses thinking: { type } plus reasoning_effort, \"zai\" uses top-level enable_thinking: boolean, \"qwen\" uses top-level enable_thinking: boolean, and \"qwen-chat-template\" uses chat_template_kwargs.enable_thinking. Default: \"openai\". /\n\tthinkingFormat?: \"openai\" \| \"openrouter\" \| \"deepseek\" \| \"zai\" \| \"qwen\" \| \"qwen-chat-template\";\n\t/* OpenRouter-specific routing preferences. Only used when baseUrl points to OpenRouter. /\n\topenRouterRouting?: OpenRouterRouting;\n\t/* Vercel AI Gateway routing preferences. Only used when baseUrl points to Vercel AI Gateway. /\n\tvercelGatewayRouting?: VercelGatewayRouting;\n\t/* Whether z.ai supports top-level `tool_stream: true` for streaming tool call deltas. Default: false. /\n\tzaiToolStream?: boolean;\n\t/* Whether the provider supports the `strict` field in tool definitions. Default: true. /\n\tsupportsStrictMode?: boolean;\n\t/* Cache control convention for prompt caching. \"anthropic\" applies Anthropic-style `cache_control` markers to the system prompt, last tool definition, and last user/assistant text content. /\n\tcacheControlFormat?: \"anthropic\";\n\t/* Whether to send known session-affinity headers (`session_id`, `x-client-request-id`, `x-session-affinity`) from `options.sessionId` when caching is enabled. Default: false. /\n\tsendSessionAffinityHeaders?: boolean;\n\t/* Whether the provider supports long prompt cache retention (`prompt_cache_retention: \"24h\"` or Anthropic-style `cache_control.ttl: \"1h\"`, depending on format). Default: true. /\n\tsupportsLongCacheRetention?: boolean;\n}\n\n/* Compatibility settings for OpenAI Responses APIs. /\nexport interface OpenAIResponsesCompat {\n\t/* Whether to send the OpenAI `session_id` cache-affinity header from `options.sessionId` when caching is enabled. Default: true. /\n\tsendSessionIdHeader?: boolean;\n\t/* Whether the provider supports `prompt_cache_retention: \"24h\"`. Default: true. /\n\tsupportsLongCacheRetention?: boolean;\n}\n\n/* Compatibility settings for Anthropic Messages-compatible APIs. /\nexport interface AnthropicMessagesCompat {\n\t/\n\t Whether the provider accepts per-tool `eager_input_streaming`.\n\t * When false, the Anthropic provider omits `tools[].eager_input_streaming`\n\t * and sends the legacy `fine-grained-tool-streaming-2025-05-14` beta header\n\t * for tool-enabled requests.\n\t * Default: true.\n\t /\n\tsupportsEagerToolInputStreaming?: boolean;\n\t/* Whether the provider supports Anthropic long cache retention (`cache_control.ttl: \"1h\"`). Default: true. /\n\tsupportsLongCacheRetention?: boolean;\n}\n\n/\n OpenRouter provider routing preferences.\n * Controls which upstream providers OpenRouter routes requests to.\n * Sent as the `provider` field in the OpenRouter API request body.\n * @see https://openrouter.ai/docs/guides/routing/provider-selection\n /\nexport interface OpenRouterRouting {\n\t/* Whether to allow backup providers to serve requests. Default: true. /\n\tallow_fallbacks?: boolean;\n\t/* Whether to filter providers to only those that support all parameters in the request. Default: false. /\n\trequire_parameters?: boolean;\n\t/* Data collection setting. \"allow\" (default): allow providers that may store/train on data. \"deny\": only use providers that don't collect user data. /\n\tdata_collection?: \"deny\" \| \"allow\";\n\t/* Whether to restrict routing to only ZDR (Zero Data Retention) endpoints. /\n\tzdr?: boolean;\n\t/* Whether to restrict routing to only models that allow text distillation. /\n\tenforce_distillable_text?: boolean;\n\t/* An ordered list of provider names/slugs to try in sequence, falling back to the next if unavailable. /\n\torder?: string[];\n\t/* List of provider names/slugs to exclusively allow for this request. /\n\tonly?: string[];\n\t/* List of provider names/slugs to skip for this request. /\n\tignore?: string[];\n\t/* A list of quantization levels to filter providers by (e.g., [\"fp16\", \"bf16\", \"fp8\", \"fp6\", \"int8\", \"int4\", \"fp4\", \"fp32\"]). /\n\tquantizations?: string[];\n\t/* Sorting strategy. Can be a string (e.g., \"price\", \"throughput\", \"latency\") or an object with `by` and `partition`. /\n\tsort?:\n\t\t\| string\n\t\t\| {\n\t\t\t\t/* The sorting metric: \"price\", \"throughput\", \"latency\". /\n\t\t\t\tby?: string;\n\t\t\t\t/* Partitioning strategy: \"model\" (default) or \"none\". /\n\t\t\t\tpartition?: string \| null;\n\t\t };\n\t/* Maximum price per million tokens (USD). /\n\tmax_price?: {\n\t\t/* Price per million prompt tokens. /\n\t\tprompt?: number \| string;\n\t\t/* Price per million completion tokens. /\n\t\tcompletion?: number \| string;\n\t\t/* Price per image. /\n\t\timage?: number \| string;\n\t\t/* Price per audio unit. /\n\t\taudio?: number \| string;\n\t\t/* Price per request. /\n\t\trequest?: number \| string;\n\t};\n\t/* Preferred minimum throughput (tokens/second). Can be a number (applies to p50) or an object with percentile-specific cutoffs. /\n\tpreferred_min_throughput?:\n\t\t\| number\n\t\t\| {\n\t\t\t\t/* Minimum tokens/second at the 50th percentile. /\n\t\t\t\tp50?: number;\n\t\t\t\t/* Minimum tokens/second at the 75th percentile. /\n\t\t\t\tp75?: number;\n\t\t\t\t/* Minimum tokens/second at the 90th percentile. /\n\t\t\t\tp90?: number;\n\t\t\t\t/* Minimum tokens/second at the 99th percentile. /\n\t\t\t\tp99?: number;\n\t\t };\n\t/* Preferred maximum latency (seconds). Can be a number (applies to p50) or an object with percentile-specific cutoffs. /\n\tpreferred_max_latency?:\n\t\t\| number\n\t\t\| {\n\t\t\t\t/* Maximum latency in seconds at the 50th percentile. /\n\t\t\t\tp50?: number;\n\t\t\t\t/* Maximum latency in seconds at the 75th percentile. /\n\t\t\t\tp75?: number;\n\t\t\t\t/* Maximum latency in seconds at the 90th percentile. /\n\t\t\t\tp90?: number;\n\t\t\t\t/* Maximum latency in seconds at the 99th percentile. /\n\t\t\t\tp99?: number;\n\t\t };\n}\n\n/\n Vercel AI Gateway routing preferences.\n * Controls which upstream providers the gateway routes requests to.\n * @see https://vercel.com/docs/ai-gateway/models-and-providers/provider-options\n /\nexport interface VercelGatewayRouting {\n\t/* List of provider slugs to exclusively use for this request (e.g., [\"bedrock\", \"anthropic\"]). /\n\tonly?: string[];\n\t/* List of provider slugs to try in order (e.g., [\"anthropic\", \"openai\"]). /\n\torder?: string[];\n}\n\n// Model interface for the unified model system\nexport interface Model<TApi extends Api> {\n\tid: string;\n\tname: string;\n\tapi: TApi;\n\tprovider: Provider;\n\tbaseUrl: string;\n\treasoning: boolean;\n\t/\n\t Maps pi thinking levels to provider/model-specific values.\n\t * Missing keys use provider defaults. null marks a level as unsupported.\n\t /\n\tthinkingLevelMap?: ThinkingLevelMap;\n\tinput: (\"text\" \| \"image\")[];\n\tcost: {\n\t\tinput: number; // $/million tokens\n\t\toutput: number; // $/million tokens\n\t\tcacheRead: number; // $/million tokens\n\t\tcacheWrite: number; // $/million tokens\n\t};\n\tcontextWindow: number;\n\tmaxTokens: number;\n\theaders?: Record<string, string>;\n\t/* Compatibility overrides for OpenAI-compatible APIs. If not set, auto-detected from baseUrl. */\n\tcompat?: TApi extends \"openai-completions\"\n\t\t? OpenAICompletionsCompat\n\t\t: TApi extends \"openai-responses\"\n\t\t\t? OpenAIResponsesCompat\n\t\t\t: TApi extends \"anthropic-messages\"\n\t\t\t\t? AnthropicMessagesCompat\n\t\t\t\t: never;\n}\n"]}

package/dist/utils/overflow.d.ts CHANGED Viewed

@@ -26,6 +26,8 @@ import type { AssistantMessage } from "../types.js";
  * **Unreliable detection:**
  * - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),
  *   sometimes returns rate limit errors. Pass contextWindow param to detect silent overflow.
+ * - Xiaomi MiMo: Truncates input to fit contextWindow then returns stopReason "length" with
+ *   output=0. Pass contextWindow param to detect via the "filled context + zero output" signal.
  * - Ollama: May truncate input silently for some setups, but may also return explicit
  *   overflow errors that match the patterns above. Silent truncation still cannot be
  *   detected here because we do not know the expected token count.

package/dist/utils/overflow.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"overflow.d.ts","sourceRoot":"","sources":["../../src/utils/overflow.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;~~AAiEpD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6CG~~;AACH,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,gBAAgB,EAAE,aAAa,CAAC,EAAE,MAAM,GAAG,OAAO,~~CAmB5F~~;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,MAAM,EAAE,CAE9C","sourcesContent":["import type { AssistantMessage } from \"../types.js\";\n\n/*\n Regex patterns to detect context overflow errors from different providers.\n \n These patterns match error messages returned when the input exceeds\n * the model's context window.\n \n Provider-specific patterns (with example error messages):\n \n - Anthropic: \"prompt is too long: 213462 tokens > 200000 maximum\"\n * - Anthropic: \"413 {\\\"error\\\":{\\\"type\\\":\\\"request_too_large\\\",\\\"message\\\":\\\"Request exceeds the maximum size\\\"}}\"\n * - OpenAI: \"Your input exceeds the context window of this model\"\n * - Google: \"The input token count (1196265) exceeds the maximum number of tokens allowed (1048575)\"\n * - xAI: \"This model's maximum prompt length is 131072 but the request contains 537812 tokens\"\n * - Groq: \"Please reduce the length of the messages or completion\"\n * - OpenRouter: \"This endpoint's maximum context length is X tokens. However, you requested about Y tokens\"\n * - llama.cpp: \"the request exceeds the available context size, try increasing it\"\n * - LM Studio: \"tokens to keep from the initial prompt is greater than the context length\"\n * - GitHub Copilot: \"prompt token count of X exceeds the limit of Y\"\n * - MiniMax: \"invalid params, context window exceeds limit\"\n * - Kimi For Coding: \"Your request exceeded model token limit: X (requested: Y)\"\n * - Cerebras: \"400/413 status code (no body)\"\n * - Mistral: \"Prompt contains X tokens ... too large for model with Y maximum context length\"\n * - z.ai: Does NOT error, accepts overflow silently - handled via usage.input > contextWindow\n * - Ollama: Some deployments truncate silently, others return errors like \"prompt too long; exceeded max context length by X tokens\"\n /\nconst OVERFLOW_PATTERNS = [\n\t/prompt is too long/i, // Anthropic token overflow\n\t/request_too_large/i, // Anthropic request byte-size overflow (HTTP 413)\n\t/input is too long for requested model/i, // Amazon Bedrock\n\t/exceeds the context window/i, // OpenAI (Completions & Responses API)\n\t/input token count.exceeds the maximum/i, // Google (Gemini)\n\t/maximum prompt length is \\d+/i, // xAI (Grok)\n\t/reduce the length of the messages/i, // Groq\n\t/maximum context length is \\d+ tokens/i, // OpenRouter (all backends)\n\t/exceeds the limit of \\d+/i, // GitHub Copilot\n\t/exceeds the available context size/i, // llama.cpp server\n\t/greater than the context length/i, // LM Studio\n\t/context window exceeds limit/i, // MiniMax\n\t/exceeded model token limit/i, // Kimi For Coding\n\t/too large for model with \\d+ maximum context length/i, // Mistral\n\t/model_context_window_exceeded/i, // z.ai non-standard finish_reason surfaced as error text\n\t/prompt too long; exceeded (?:max )?context length/i, // Ollama explicit overflow error\n\t/context[_ ]length[_ ]exceeded/i, // Generic fallback\n\t/too many tokens/i, // Generic fallback\n\t/token limit exceeded/i, // Generic fallback\n\t/^4(?:00\|13)\\s(?:status code)?\\s\$no body\$/i, // Cerebras: 400/413 with no body\n];\n\n/*\n Patterns that indicate non-overflow errors (e.g. rate limiting, server errors).\n * Error messages matching any of these are excluded from overflow detection\n * even if they also match an OVERFLOW_PATTERN.\n \n Example: Bedrock formats throttling errors as \"ThrottlingException: Too many tokens,\n * please wait before trying again.\" which would match the /too many tokens/i overflow\n * pattern without this exclusion.\n /\nconst NON_OVERFLOW_PATTERNS = [\n\t/^(Throttling error\|Service unavailable):/i, // AWS Bedrock non-overflow errors (human-readable prefixes from formatBedrockError)\n\t/rate limit/i, // Generic rate limiting\n\t/too many requests/i, // Generic HTTP 429 style\n];\n\n/\n Check if an assistant message represents a context overflow error.\n \n This handles two cases:\n * 1. Error-based overflow: Most providers return stopReason \"error\" with a\n * specific error message pattern.\n * 2. Silent overflow: Some providers accept overflow requests and return\n * successfully. For these, we check if usage.input exceeds the context window.\n \n ## Reliability by Provider\n \n Reliable detection (returns error with detectable message):\n * - Anthropic: \"prompt is too long: X tokens > Y maximum\" or \"request_too_large\"\n * - OpenAI (Completions & Responses): \"exceeds the context window\"\n * - Google Gemini: \"input token count exceeds the maximum\"\n * - xAI (Grok): \"maximum prompt length is X but request contains Y\"\n * - Groq: \"reduce the length of the messages\"\n * - Cerebras: 400/413 status code (no body)\n * - Mistral: \"Prompt contains X tokens ... too large for model with Y maximum context length\"\n * - OpenRouter (all backends): \"maximum context length is X tokens\"\n * - llama.cpp: \"exceeds the available context size\"\n * - LM Studio: \"greater than the context length\"\n * - Kimi For Coding: \"exceeded model token limit: X (requested: Y)\"\n \n Unreliable detection:\n * - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),\n * sometimes returns rate limit errors. Pass contextWindow param to detect silent overflow.\n * - Ollama: May truncate input silently for some setups, but may also return explicit\n * overflow errors that match the patterns above. Silent truncation still cannot be\n * detected here because we do not know the expected token count.\n \n ## Custom Providers\n \n If you've added custom models via settings.json, this function may not detect\n * overflow errors from those providers. To add support:\n \n 1. Send a request that exceeds the model's context window\n * 2. Check the errorMessage in the response\n * 3. Create a regex pattern that matches the error\n * 4. The pattern should be added to OVERFLOW_PATTERNS in this file, or\n * check the errorMessage yourself before calling this function\n \n @param message - The assistant message to check\n * @param contextWindow - Optional context window size for detecting silent overflow (z.ai)\n * @returns true if the message indicates a context overflow\n /\nexport function isContextOverflow(message: AssistantMessage, contextWindow?: number): boolean {\n\t// Case 1: Check error message patterns\n\tif (message.stopReason === \"error\" && message.errorMessage) {\n\t\t// Skip messages matching known non-overflow patterns (e.g. throttling / rate-limit)\n\t\tconst isNonOverflow = NON_OVERFLOW_PATTERNS.some((p) => p.test(message.errorMessage!));\n\t\tif (!isNonOverflow && OVERFLOW_PATTERNS.some((p) => p.test(message.errorMessage!))) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\t// Case 2: Silent overflow (z.ai style) - successful but usage exceeds context\n\tif (contextWindow && message.stopReason === \"stop\") {\n\t\tconst inputTokens = message.usage.input + message.usage.cacheRead;\n\t\tif (inputTokens > contextWindow) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\treturn false;\n}\n\n/\n Get the overflow patterns for testing purposes.\n */\nexport function getOverflowPatterns(): RegExp[] {\n\treturn [...OVERFLOW_PATTERNS];\n}\n"]}
1	+ {"version":3,"file":"overflow.d.ts","sourceRoot":"","sources":["../../src/utils/overflow.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAoEpD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+CG;AACH,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,gBAAgB,EAAE,aAAa,CAAC,EAAE,MAAM,GAAG,OAAO,CA6B5F;AAED;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,MAAM,EAAE,CAE9C","sourcesContent":["import type { AssistantMessage } from \"../types.js\";\n\n/*\n Regex patterns to detect context overflow errors from different providers.\n \n These patterns match error messages returned when the input exceeds\n * the model's context window.\n \n Provider-specific patterns (with example error messages):\n \n - Anthropic: \"prompt is too long: 213462 tokens > 200000 maximum\"\n * - Anthropic: \"413 {\\\"error\\\":{\\\"type\\\":\\\"request_too_large\\\",\\\"message\\\":\\\"Request exceeds the maximum size\\\"}}\"\n * - OpenAI: \"Your input exceeds the context window of this model\"\n * - Google: \"The input token count (1196265) exceeds the maximum number of tokens allowed (1048575)\"\n * - xAI: \"This model's maximum prompt length is 131072 but the request contains 537812 tokens\"\n * - Groq: \"Please reduce the length of the messages or completion\"\n * - OpenRouter: \"This endpoint's maximum context length is X tokens. However, you requested about Y tokens\"\n * - llama.cpp: \"the request exceeds the available context size, try increasing it\"\n * - LM Studio: \"tokens to keep from the initial prompt is greater than the context length\"\n * - GitHub Copilot: \"prompt token count of X exceeds the limit of Y\"\n * - MiniMax: \"invalid params, context window exceeds limit\"\n * - Kimi For Coding: \"Your request exceeded model token limit: X (requested: Y)\"\n * - Cerebras: \"400/413 status code (no body)\"\n * - Mistral: \"Prompt contains X tokens ... too large for model with Y maximum context length\"\n * - z.ai: Does NOT error, accepts overflow silently - handled via usage.input > contextWindow\n * - Xiaomi MiMo: Truncates input to fill contextWindow exactly, then returns finish_reason \"length\"\n * with output=0 (no room left to generate). Detected via stopReason \"length\" + zero output +\n * input filling the context window.\n * - Ollama: Some deployments truncate silently, others return errors like \"prompt too long; exceeded max context length by X tokens\"\n /\nconst OVERFLOW_PATTERNS = [\n\t/prompt is too long/i, // Anthropic token overflow\n\t/request_too_large/i, // Anthropic request byte-size overflow (HTTP 413)\n\t/input is too long for requested model/i, // Amazon Bedrock\n\t/exceeds the context window/i, // OpenAI (Completions & Responses API)\n\t/input token count.exceeds the maximum/i, // Google (Gemini)\n\t/maximum prompt length is \\d+/i, // xAI (Grok)\n\t/reduce the length of the messages/i, // Groq\n\t/maximum context length is \\d+ tokens/i, // OpenRouter (all backends)\n\t/exceeds the limit of \\d+/i, // GitHub Copilot\n\t/exceeds the available context size/i, // llama.cpp server\n\t/greater than the context length/i, // LM Studio\n\t/context window exceeds limit/i, // MiniMax\n\t/exceeded model token limit/i, // Kimi For Coding\n\t/too large for model with \\d+ maximum context length/i, // Mistral\n\t/model_context_window_exceeded/i, // z.ai non-standard finish_reason surfaced as error text\n\t/prompt too long; exceeded (?:max )?context length/i, // Ollama explicit overflow error\n\t/context[_ ]length[_ ]exceeded/i, // Generic fallback\n\t/too many tokens/i, // Generic fallback\n\t/token limit exceeded/i, // Generic fallback\n\t/^4(?:00\|13)\\s(?:status code)?\\s\$no body\$/i, // Cerebras: 400/413 with no body\n];\n\n/*\n Patterns that indicate non-overflow errors (e.g. rate limiting, server errors).\n * Error messages matching any of these are excluded from overflow detection\n * even if they also match an OVERFLOW_PATTERN.\n \n Example: Bedrock formats throttling errors as \"ThrottlingException: Too many tokens,\n * please wait before trying again.\" which would match the /too many tokens/i overflow\n * pattern without this exclusion.\n /\nconst NON_OVERFLOW_PATTERNS = [\n\t/^(Throttling error\|Service unavailable):/i, // AWS Bedrock non-overflow errors (human-readable prefixes from formatBedrockError)\n\t/rate limit/i, // Generic rate limiting\n\t/too many requests/i, // Generic HTTP 429 style\n];\n\n/\n Check if an assistant message represents a context overflow error.\n \n This handles two cases:\n * 1. Error-based overflow: Most providers return stopReason \"error\" with a\n * specific error message pattern.\n * 2. Silent overflow: Some providers accept overflow requests and return\n * successfully. For these, we check if usage.input exceeds the context window.\n \n ## Reliability by Provider\n \n Reliable detection (returns error with detectable message):\n * - Anthropic: \"prompt is too long: X tokens > Y maximum\" or \"request_too_large\"\n * - OpenAI (Completions & Responses): \"exceeds the context window\"\n * - Google Gemini: \"input token count exceeds the maximum\"\n * - xAI (Grok): \"maximum prompt length is X but request contains Y\"\n * - Groq: \"reduce the length of the messages\"\n * - Cerebras: 400/413 status code (no body)\n * - Mistral: \"Prompt contains X tokens ... too large for model with Y maximum context length\"\n * - OpenRouter (all backends): \"maximum context length is X tokens\"\n * - llama.cpp: \"exceeds the available context size\"\n * - LM Studio: \"greater than the context length\"\n * - Kimi For Coding: \"exceeded model token limit: X (requested: Y)\"\n \n Unreliable detection:\n * - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),\n * sometimes returns rate limit errors. Pass contextWindow param to detect silent overflow.\n * - Xiaomi MiMo: Truncates input to fit contextWindow then returns stopReason \"length\" with\n * output=0. Pass contextWindow param to detect via the \"filled context + zero output\" signal.\n * - Ollama: May truncate input silently for some setups, but may also return explicit\n * overflow errors that match the patterns above. Silent truncation still cannot be\n * detected here because we do not know the expected token count.\n \n ## Custom Providers\n \n If you've added custom models via settings.json, this function may not detect\n * overflow errors from those providers. To add support:\n \n 1. Send a request that exceeds the model's context window\n * 2. Check the errorMessage in the response\n * 3. Create a regex pattern that matches the error\n * 4. The pattern should be added to OVERFLOW_PATTERNS in this file, or\n * check the errorMessage yourself before calling this function\n \n @param message - The assistant message to check\n * @param contextWindow - Optional context window size for detecting silent overflow (z.ai)\n * @returns true if the message indicates a context overflow\n /\nexport function isContextOverflow(message: AssistantMessage, contextWindow?: number): boolean {\n\t// Case 1: Check error message patterns\n\tif (message.stopReason === \"error\" && message.errorMessage) {\n\t\t// Skip messages matching known non-overflow patterns (e.g. throttling / rate-limit)\n\t\tconst isNonOverflow = NON_OVERFLOW_PATTERNS.some((p) => p.test(message.errorMessage!));\n\t\tif (!isNonOverflow && OVERFLOW_PATTERNS.some((p) => p.test(message.errorMessage!))) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\t// Case 2: Silent overflow (z.ai style) - successful but usage exceeds context\n\tif (contextWindow && message.stopReason === \"stop\") {\n\t\tconst inputTokens = message.usage.input + message.usage.cacheRead;\n\t\tif (inputTokens > contextWindow) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\t// Case 3: Length-stop overflow (Xiaomi MiMo style) - server truncates oversized input\n\t// to fit the context window, leaving no room for output. Returns stopReason \"length\"\n\t// with output=0 and input+cacheRead filling the context window.\n\tif (contextWindow && message.stopReason === \"length\" && message.usage.output === 0) {\n\t\tconst inputTokens = message.usage.input + message.usage.cacheRead;\n\t\tif (inputTokens >= contextWindow 0.99) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\treturn false;\n}\n\n/*\n Get the overflow patterns for testing purposes.\n */\nexport function getOverflowPatterns(): RegExp[] {\n\treturn [...OVERFLOW_PATTERNS];\n}\n"]}

package/dist/utils/overflow.js CHANGED Viewed

@@ -21,6 +21,9 @@
  * - Cerebras: "400/413 status code (no body)"
  * - Mistral: "Prompt contains X tokens ... too large for model with Y maximum context length"
  * - z.ai: Does NOT error, accepts overflow silently - handled via usage.input > contextWindow
+ * - Xiaomi MiMo: Truncates input to fill contextWindow exactly, then returns finish_reason "length"
+ *   with output=0 (no room left to generate). Detected via stopReason "length" + zero output +
+ *   input filling the context window.
  * - Ollama: Some deployments truncate silently, others return errors like "prompt too long; exceeded max context length by X tokens"
  */
 const OVERFLOW_PATTERNS = [
@@ -86,6 +89,8 @@ const NON_OVERFLOW_PATTERNS = [
  * **Unreliable detection:**
  * - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),
  *   sometimes returns rate limit errors. Pass contextWindow param to detect silent overflow.
+ * - Xiaomi MiMo: Truncates input to fit contextWindow then returns stopReason "length" with
+ *   output=0. Pass contextWindow param to detect via the "filled context + zero output" signal.
  * - Ollama: May truncate input silently for some setups, but may also return explicit
  *   overflow errors that match the patterns above. Silent truncation still cannot be
  *   detected here because we do not know the expected token count.
@@ -121,6 +126,15 @@ export function isContextOverflow(message, contextWindow) {
             return true;
         }
     }
+    // Case 3: Length-stop overflow (Xiaomi MiMo style) - server truncates oversized input
+    // to fit the context window, leaving no room for output. Returns stopReason "length"
+    // with output=0 and input+cacheRead filling the context window.
+    if (contextWindow && message.stopReason === "length" && message.usage.output === 0) {
+        const inputTokens = message.usage.input + message.usage.cacheRead;
+        if (inputTokens >= contextWindow * 0.99) {
+            return true;
+        }
+    }
     return false;
 }
 /**

package/dist/utils/overflow.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"overflow.js","sourceRoot":"","sources":["../../src/utils/overflow.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,MAAM,iBAAiB,GAAG;IACzB,qBAAqB,EAAE,2BAA2B;IAClD,oBAAoB,EAAE,kDAAkD;IACxE,wCAAwC,EAAE,iBAAiB;IAC3D,6BAA6B,EAAE,uCAAuC;IACtE,yCAAyC,EAAE,kBAAkB;IAC7D,+BAA+B,EAAE,aAAa;IAC9C,oCAAoC,EAAE,OAAO;IAC7C,uCAAuC,EAAE,4BAA4B;IACrE,2BAA2B,EAAE,iBAAiB;IAC9C,qCAAqC,EAAE,mBAAmB;IAC1D,kCAAkC,EAAE,YAAY;IAChD,+BAA+B,EAAE,UAAU;IAC3C,6BAA6B,EAAE,kBAAkB;IACjD,sDAAsD,EAAE,UAAU;IAClE,gCAAgC,EAAE,yDAAyD;IAC3F,oDAAoD,EAAE,iCAAiC;IACvF,gCAAgC,EAAE,mBAAmB;IACrD,kBAAkB,EAAE,mBAAmB;IACvC,uBAAuB,EAAE,mBAAmB;IAC5C,+CAA+C,EAAE,iCAAiC;CAClF,CAAC;AAEF;;;;;;;;GAQG;AACH,MAAM,qBAAqB,GAAG;IAC7B,2CAA2C,EAAE,oFAAoF;IACjI,aAAa,EAAE,wBAAwB;IACvC,oBAAoB,EAAE,yBAAyB;CAC/C,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6CG;AACH,MAAM,UAAU,iBAAiB,CAAC,OAAyB,EAAE,aAAsB,EAAW;IAC7F,uCAAuC;IACvC,IAAI,OAAO,CAAC,UAAU,KAAK,OAAO,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;QAC5D,oFAAoF;QACpF,MAAM,aAAa,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,YAAa,CAAC,CAAC,CAAC;QACvF,IAAI,CAAC,aAAa,IAAI,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,YAAa,CAAC,CAAC,EAAE,CAAC;YACpF,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAED,8EAA8E;IAC9E,IAAI,aAAa,IAAI,OAAO,CAAC,UAAU,KAAK,MAAM,EAAE,CAAC;QACpD,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC;QAClE,IAAI,WAAW,GAAG,aAAa,EAAE,CAAC;YACjC,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAED,OAAO,KAAK,CAAC;AAAA,CACb;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,GAAa;IAC/C,OAAO,CAAC,GAAG,iBAAiB,CAAC,CAAC;AAAA,CAC9B","sourcesContent":["import type { AssistantMessage } from \"../types.js\";\n\n/*\n Regex patterns to detect context overflow errors from different providers.\n \n These patterns match error messages returned when the input exceeds\n * the model's context window.\n \n Provider-specific patterns (with example error messages):\n \n - Anthropic: \"prompt is too long: 213462 tokens > 200000 maximum\"\n * - Anthropic: \"413 {\\\"error\\\":{\\\"type\\\":\\\"request_too_large\\\",\\\"message\\\":\\\"Request exceeds the maximum size\\\"}}\"\n * - OpenAI: \"Your input exceeds the context window of this model\"\n * - Google: \"The input token count (1196265) exceeds the maximum number of tokens allowed (1048575)\"\n * - xAI: \"This model's maximum prompt length is 131072 but the request contains 537812 tokens\"\n * - Groq: \"Please reduce the length of the messages or completion\"\n * - OpenRouter: \"This endpoint's maximum context length is X tokens. However, you requested about Y tokens\"\n * - llama.cpp: \"the request exceeds the available context size, try increasing it\"\n * - LM Studio: \"tokens to keep from the initial prompt is greater than the context length\"\n * - GitHub Copilot: \"prompt token count of X exceeds the limit of Y\"\n * - MiniMax: \"invalid params, context window exceeds limit\"\n * - Kimi For Coding: \"Your request exceeded model token limit: X (requested: Y)\"\n * - Cerebras: \"400/413 status code (no body)\"\n * - Mistral: \"Prompt contains X tokens ... too large for model with Y maximum context length\"\n * - z.ai: Does NOT error, accepts overflow silently - handled via usage.input > contextWindow\n * - Ollama: Some deployments truncate silently, others return errors like \"prompt too long; exceeded max context length by X tokens\"\n /\nconst OVERFLOW_PATTERNS = [\n\t/prompt is too long/i, // Anthropic token overflow\n\t/request_too_large/i, // Anthropic request byte-size overflow (HTTP 413)\n\t/input is too long for requested model/i, // Amazon Bedrock\n\t/exceeds the context window/i, // OpenAI (Completions & Responses API)\n\t/input token count.exceeds the maximum/i, // Google (Gemini)\n\t/maximum prompt length is \\d+/i, // xAI (Grok)\n\t/reduce the length of the messages/i, // Groq\n\t/maximum context length is \\d+ tokens/i, // OpenRouter (all backends)\n\t/exceeds the limit of \\d+/i, // GitHub Copilot\n\t/exceeds the available context size/i, // llama.cpp server\n\t/greater than the context length/i, // LM Studio\n\t/context window exceeds limit/i, // MiniMax\n\t/exceeded model token limit/i, // Kimi For Coding\n\t/too large for model with \\d+ maximum context length/i, // Mistral\n\t/model_context_window_exceeded/i, // z.ai non-standard finish_reason surfaced as error text\n\t/prompt too long; exceeded (?:max )?context length/i, // Ollama explicit overflow error\n\t/context[_ ]length[_ ]exceeded/i, // Generic fallback\n\t/too many tokens/i, // Generic fallback\n\t/token limit exceeded/i, // Generic fallback\n\t/^4(?:00\|13)\\s(?:status code)?\\s\$no body\$/i, // Cerebras: 400/413 with no body\n];\n\n/*\n Patterns that indicate non-overflow errors (e.g. rate limiting, server errors).\n * Error messages matching any of these are excluded from overflow detection\n * even if they also match an OVERFLOW_PATTERN.\n \n Example: Bedrock formats throttling errors as \"ThrottlingException: Too many tokens,\n * please wait before trying again.\" which would match the /too many tokens/i overflow\n * pattern without this exclusion.\n /\nconst NON_OVERFLOW_PATTERNS = [\n\t/^(Throttling error\|Service unavailable):/i, // AWS Bedrock non-overflow errors (human-readable prefixes from formatBedrockError)\n\t/rate limit/i, // Generic rate limiting\n\t/too many requests/i, // Generic HTTP 429 style\n];\n\n/\n Check if an assistant message represents a context overflow error.\n \n This handles two cases:\n * 1. Error-based overflow: Most providers return stopReason \"error\" with a\n * specific error message pattern.\n * 2. Silent overflow: Some providers accept overflow requests and return\n * successfully. For these, we check if usage.input exceeds the context window.\n \n ## Reliability by Provider\n \n Reliable detection (returns error with detectable message):\n * - Anthropic: \"prompt is too long: X tokens > Y maximum\" or \"request_too_large\"\n * - OpenAI (Completions & Responses): \"exceeds the context window\"\n * - Google Gemini: \"input token count exceeds the maximum\"\n * - xAI (Grok): \"maximum prompt length is X but request contains Y\"\n * - Groq: \"reduce the length of the messages\"\n * - Cerebras: 400/413 status code (no body)\n * - Mistral: \"Prompt contains X tokens ... too large for model with Y maximum context length\"\n * - OpenRouter (all backends): \"maximum context length is X tokens\"\n * - llama.cpp: \"exceeds the available context size\"\n * - LM Studio: \"greater than the context length\"\n * - Kimi For Coding: \"exceeded model token limit: X (requested: Y)\"\n \n Unreliable detection:\n * - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),\n * sometimes returns rate limit errors. Pass contextWindow param to detect silent overflow.\n * - Ollama: May truncate input silently for some setups, but may also return explicit\n * overflow errors that match the patterns above. Silent truncation still cannot be\n * detected here because we do not know the expected token count.\n \n ## Custom Providers\n \n If you've added custom models via settings.json, this function may not detect\n * overflow errors from those providers. To add support:\n \n 1. Send a request that exceeds the model's context window\n * 2. Check the errorMessage in the response\n * 3. Create a regex pattern that matches the error\n * 4. The pattern should be added to OVERFLOW_PATTERNS in this file, or\n * check the errorMessage yourself before calling this function\n \n @param message - The assistant message to check\n * @param contextWindow - Optional context window size for detecting silent overflow (z.ai)\n * @returns true if the message indicates a context overflow\n /\nexport function isContextOverflow(message: AssistantMessage, contextWindow?: number): boolean {\n\t// Case 1: Check error message patterns\n\tif (message.stopReason === \"error\" && message.errorMessage) {\n\t\t// Skip messages matching known non-overflow patterns (e.g. throttling / rate-limit)\n\t\tconst isNonOverflow = NON_OVERFLOW_PATTERNS.some((p) => p.test(message.errorMessage!));\n\t\tif (!isNonOverflow && OVERFLOW_PATTERNS.some((p) => p.test(message.errorMessage!))) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\t// Case 2: Silent overflow (z.ai style) - successful but usage exceeds context\n\tif (contextWindow && message.stopReason === \"stop\") {\n\t\tconst inputTokens = message.usage.input + message.usage.cacheRead;\n\t\tif (inputTokens > contextWindow) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\treturn false;\n}\n\n/\n Get the overflow patterns for testing purposes.\n */\nexport function getOverflowPatterns(): RegExp[] {\n\treturn [...OVERFLOW_PATTERNS];\n}\n"]}
1	+ {"version":3,"file":"overflow.js","sourceRoot":"","sources":["../../src/utils/overflow.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,MAAM,iBAAiB,GAAG;IACzB,qBAAqB,EAAE,2BAA2B;IAClD,oBAAoB,EAAE,kDAAkD;IACxE,wCAAwC,EAAE,iBAAiB;IAC3D,6BAA6B,EAAE,uCAAuC;IACtE,yCAAyC,EAAE,kBAAkB;IAC7D,+BAA+B,EAAE,aAAa;IAC9C,oCAAoC,EAAE,OAAO;IAC7C,uCAAuC,EAAE,4BAA4B;IACrE,2BAA2B,EAAE,iBAAiB;IAC9C,qCAAqC,EAAE,mBAAmB;IAC1D,kCAAkC,EAAE,YAAY;IAChD,+BAA+B,EAAE,UAAU;IAC3C,6BAA6B,EAAE,kBAAkB;IACjD,sDAAsD,EAAE,UAAU;IAClE,gCAAgC,EAAE,yDAAyD;IAC3F,oDAAoD,EAAE,iCAAiC;IACvF,gCAAgC,EAAE,mBAAmB;IACrD,kBAAkB,EAAE,mBAAmB;IACvC,uBAAuB,EAAE,mBAAmB;IAC5C,+CAA+C,EAAE,iCAAiC;CAClF,CAAC;AAEF;;;;;;;;GAQG;AACH,MAAM,qBAAqB,GAAG;IAC7B,2CAA2C,EAAE,oFAAoF;IACjI,aAAa,EAAE,wBAAwB;IACvC,oBAAoB,EAAE,yBAAyB;CAC/C,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+CG;AACH,MAAM,UAAU,iBAAiB,CAAC,OAAyB,EAAE,aAAsB,EAAW;IAC7F,uCAAuC;IACvC,IAAI,OAAO,CAAC,UAAU,KAAK,OAAO,IAAI,OAAO,CAAC,YAAY,EAAE,CAAC;QAC5D,oFAAoF;QACpF,MAAM,aAAa,GAAG,qBAAqB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,YAAa,CAAC,CAAC,CAAC;QACvF,IAAI,CAAC,aAAa,IAAI,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,YAAa,CAAC,CAAC,EAAE,CAAC;YACpF,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAED,8EAA8E;IAC9E,IAAI,aAAa,IAAI,OAAO,CAAC,UAAU,KAAK,MAAM,EAAE,CAAC;QACpD,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC;QAClE,IAAI,WAAW,GAAG,aAAa,EAAE,CAAC;YACjC,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAED,sFAAsF;IACtF,qFAAqF;IACrF,gEAAgE;IAChE,IAAI,aAAa,IAAI,OAAO,CAAC,UAAU,KAAK,QAAQ,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACpF,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC;QAClE,IAAI,WAAW,IAAI,aAAa,GAAG,IAAI,EAAE,CAAC;YACzC,OAAO,IAAI,CAAC;QACb,CAAC;IACF,CAAC;IAED,OAAO,KAAK,CAAC;AAAA,CACb;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,GAAa;IAC/C,OAAO,CAAC,GAAG,iBAAiB,CAAC,CAAC;AAAA,CAC9B","sourcesContent":["import type { AssistantMessage } from \"../types.js\";\n\n/*\n Regex patterns to detect context overflow errors from different providers.\n \n These patterns match error messages returned when the input exceeds\n * the model's context window.\n \n Provider-specific patterns (with example error messages):\n \n - Anthropic: \"prompt is too long: 213462 tokens > 200000 maximum\"\n * - Anthropic: \"413 {\\\"error\\\":{\\\"type\\\":\\\"request_too_large\\\",\\\"message\\\":\\\"Request exceeds the maximum size\\\"}}\"\n * - OpenAI: \"Your input exceeds the context window of this model\"\n * - Google: \"The input token count (1196265) exceeds the maximum number of tokens allowed (1048575)\"\n * - xAI: \"This model's maximum prompt length is 131072 but the request contains 537812 tokens\"\n * - Groq: \"Please reduce the length of the messages or completion\"\n * - OpenRouter: \"This endpoint's maximum context length is X tokens. However, you requested about Y tokens\"\n * - llama.cpp: \"the request exceeds the available context size, try increasing it\"\n * - LM Studio: \"tokens to keep from the initial prompt is greater than the context length\"\n * - GitHub Copilot: \"prompt token count of X exceeds the limit of Y\"\n * - MiniMax: \"invalid params, context window exceeds limit\"\n * - Kimi For Coding: \"Your request exceeded model token limit: X (requested: Y)\"\n * - Cerebras: \"400/413 status code (no body)\"\n * - Mistral: \"Prompt contains X tokens ... too large for model with Y maximum context length\"\n * - z.ai: Does NOT error, accepts overflow silently - handled via usage.input > contextWindow\n * - Xiaomi MiMo: Truncates input to fill contextWindow exactly, then returns finish_reason \"length\"\n * with output=0 (no room left to generate). Detected via stopReason \"length\" + zero output +\n * input filling the context window.\n * - Ollama: Some deployments truncate silently, others return errors like \"prompt too long; exceeded max context length by X tokens\"\n /\nconst OVERFLOW_PATTERNS = [\n\t/prompt is too long/i, // Anthropic token overflow\n\t/request_too_large/i, // Anthropic request byte-size overflow (HTTP 413)\n\t/input is too long for requested model/i, // Amazon Bedrock\n\t/exceeds the context window/i, // OpenAI (Completions & Responses API)\n\t/input token count.exceeds the maximum/i, // Google (Gemini)\n\t/maximum prompt length is \\d+/i, // xAI (Grok)\n\t/reduce the length of the messages/i, // Groq\n\t/maximum context length is \\d+ tokens/i, // OpenRouter (all backends)\n\t/exceeds the limit of \\d+/i, // GitHub Copilot\n\t/exceeds the available context size/i, // llama.cpp server\n\t/greater than the context length/i, // LM Studio\n\t/context window exceeds limit/i, // MiniMax\n\t/exceeded model token limit/i, // Kimi For Coding\n\t/too large for model with \\d+ maximum context length/i, // Mistral\n\t/model_context_window_exceeded/i, // z.ai non-standard finish_reason surfaced as error text\n\t/prompt too long; exceeded (?:max )?context length/i, // Ollama explicit overflow error\n\t/context[_ ]length[_ ]exceeded/i, // Generic fallback\n\t/too many tokens/i, // Generic fallback\n\t/token limit exceeded/i, // Generic fallback\n\t/^4(?:00\|13)\\s(?:status code)?\\s\$no body\$/i, // Cerebras: 400/413 with no body\n];\n\n/*\n Patterns that indicate non-overflow errors (e.g. rate limiting, server errors).\n * Error messages matching any of these are excluded from overflow detection\n * even if they also match an OVERFLOW_PATTERN.\n \n Example: Bedrock formats throttling errors as \"ThrottlingException: Too many tokens,\n * please wait before trying again.\" which would match the /too many tokens/i overflow\n * pattern without this exclusion.\n /\nconst NON_OVERFLOW_PATTERNS = [\n\t/^(Throttling error\|Service unavailable):/i, // AWS Bedrock non-overflow errors (human-readable prefixes from formatBedrockError)\n\t/rate limit/i, // Generic rate limiting\n\t/too many requests/i, // Generic HTTP 429 style\n];\n\n/\n Check if an assistant message represents a context overflow error.\n \n This handles two cases:\n * 1. Error-based overflow: Most providers return stopReason \"error\" with a\n * specific error message pattern.\n * 2. Silent overflow: Some providers accept overflow requests and return\n * successfully. For these, we check if usage.input exceeds the context window.\n \n ## Reliability by Provider\n \n Reliable detection (returns error with detectable message):\n * - Anthropic: \"prompt is too long: X tokens > Y maximum\" or \"request_too_large\"\n * - OpenAI (Completions & Responses): \"exceeds the context window\"\n * - Google Gemini: \"input token count exceeds the maximum\"\n * - xAI (Grok): \"maximum prompt length is X but request contains Y\"\n * - Groq: \"reduce the length of the messages\"\n * - Cerebras: 400/413 status code (no body)\n * - Mistral: \"Prompt contains X tokens ... too large for model with Y maximum context length\"\n * - OpenRouter (all backends): \"maximum context length is X tokens\"\n * - llama.cpp: \"exceeds the available context size\"\n * - LM Studio: \"greater than the context length\"\n * - Kimi For Coding: \"exceeded model token limit: X (requested: Y)\"\n \n Unreliable detection:\n * - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),\n * sometimes returns rate limit errors. Pass contextWindow param to detect silent overflow.\n * - Xiaomi MiMo: Truncates input to fit contextWindow then returns stopReason \"length\" with\n * output=0. Pass contextWindow param to detect via the \"filled context + zero output\" signal.\n * - Ollama: May truncate input silently for some setups, but may also return explicit\n * overflow errors that match the patterns above. Silent truncation still cannot be\n * detected here because we do not know the expected token count.\n \n ## Custom Providers\n \n If you've added custom models via settings.json, this function may not detect\n * overflow errors from those providers. To add support:\n \n 1. Send a request that exceeds the model's context window\n * 2. Check the errorMessage in the response\n * 3. Create a regex pattern that matches the error\n * 4. The pattern should be added to OVERFLOW_PATTERNS in this file, or\n * check the errorMessage yourself before calling this function\n \n @param message - The assistant message to check\n * @param contextWindow - Optional context window size for detecting silent overflow (z.ai)\n * @returns true if the message indicates a context overflow\n /\nexport function isContextOverflow(message: AssistantMessage, contextWindow?: number): boolean {\n\t// Case 1: Check error message patterns\n\tif (message.stopReason === \"error\" && message.errorMessage) {\n\t\t// Skip messages matching known non-overflow patterns (e.g. throttling / rate-limit)\n\t\tconst isNonOverflow = NON_OVERFLOW_PATTERNS.some((p) => p.test(message.errorMessage!));\n\t\tif (!isNonOverflow && OVERFLOW_PATTERNS.some((p) => p.test(message.errorMessage!))) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\t// Case 2: Silent overflow (z.ai style) - successful but usage exceeds context\n\tif (contextWindow && message.stopReason === \"stop\") {\n\t\tconst inputTokens = message.usage.input + message.usage.cacheRead;\n\t\tif (inputTokens > contextWindow) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\t// Case 3: Length-stop overflow (Xiaomi MiMo style) - server truncates oversized input\n\t// to fit the context window, leaving no room for output. Returns stopReason \"length\"\n\t// with output=0 and input+cacheRead filling the context window.\n\tif (contextWindow && message.stopReason === \"length\" && message.usage.output === 0) {\n\t\tconst inputTokens = message.usage.input + message.usage.cacheRead;\n\t\tif (inputTokens >= contextWindow 0.99) {\n\t\t\treturn true;\n\t\t}\n\t}\n\n\treturn false;\n}\n\n/*\n Get the overflow patterns for testing purposes.\n */\nexport function getOverflowPatterns(): RegExp[] {\n\treturn [...OVERFLOW_PATTERNS];\n}\n"]}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@mariozechner/pi-ai",
-	"version": "0.71.0",
+	"version": "0.72.0",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"type": "module",
 	"main": "./dist/index.js",