npm - smoltalk - Versions diffs - 0.0.43 → 0.0.45 - Mend

smoltalk 0.0.43 → 0.0.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/clients/google.d.ts +8 -1
package/dist/clients/google.js +71 -5
package/dist/model.js +4 -4
package/dist/models.d.ts +478 -10
package/dist/models.js +241 -5
package/dist/strategies/raceStrategy.js +5 -1
package/dist/types.d.ts +2 -0
package/dist/types.js +40 -0
package/package.json +1 -1

package/dist/clients/google.d.ts CHANGED Viewed

@@ -1,8 +1,13 @@
-import { GoogleGenAI } from "@google/genai";
+import { Content, GenerateContentConfig, GoogleGenAI } from "@google/genai";
 import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
 import { BaseClient } from "./baseClient.js";
 import { ModelName } from "../models.js";
 export type SmolGoogleConfig = BaseClientConfig;
+type GeneratedRequest = {
+    contents: Content[];
+    model: ModelName;
+    config: GenerateContentConfig;
+};
 export declare class SmolGoogle extends BaseClient implements SmolClient {
     private client;
     private logger;
@@ -13,5 +18,7 @@ export declare class SmolGoogle extends BaseClient implements SmolClient {
     private calculateUsageAndCost;
     private buildRequest;
     _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
+    __textSync(request: GeneratedRequest): Promise<Result<PromptResult>>;
     _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
 }
+export {};

package/dist/clients/google.js CHANGED Viewed

@@ -1,10 +1,11 @@
 import { GoogleGenAI } from "@google/genai";
 import { ToolCall } from "../classes/ToolCall.js";
 import { getLogger } from "../logger.js";
-import { success, } from "../types.js";
+import { addCosts, addTokenUsage, success, } from "../types.js";
 import { zodToGoogleTool } from "../util/tool.js";
 import { BaseClient } from "./baseClient.js";
 import { Model } from "../model.js";
+import { userMessage } from "../classes/message/index.js";
 export class SmolGoogle extends BaseClient {
     client;
     logger;
@@ -65,10 +66,7 @@ export class SmolGoogle extends BaseClient {
         if (tools.length > 0) {
             genConfig.tools = [{ functionDeclarations: tools }];
         }
-        if (config.responseFormat && tools.length > 0) {
-            console.error("Warning: Both responseFormat and tools are specified in the prompt config. Google Gemini does not support enforcing a response format when tools are included, so the responseFormat will be ignored.");
-        }
-        else if (config.responseFormat && tools.length === 0) {
+        if (config.responseFormat) {
             genConfig.responseMimeType = "application/json";
             genConfig.responseJsonSchema = config.responseFormat.toJSONSchema();
         }
@@ -94,6 +92,67 @@ export class SmolGoogle extends BaseClient {
         if (signal) {
             request.config = { ...request.config, abortSignal: signal };
         }
+        const hasTools = config.tools && config.tools.length > 0;
+        const hasStructuredResponse = !!config.responseFormat;
+        if (!hasTools && !hasStructuredResponse) {
+            // If there are no tools or structured response, we can make a single request and return immediately
+            return this.__textSync(request);
+        }
+        // Google Gemini does not support combining function calling with
+        // responseMimeType 'application/json'. When tools are present, we
+        // make two requests instead
+        /*********** TOOL CALL REQUEST ************/
+        this.logger.debug("Detected both tool calls and structured response in call to Google Gemini. Making separate request to Google Gemini for tool calls.");
+        const toolRequest = {
+            ...request,
+            config: {
+                ...request.config,
+                responseMimeType: undefined,
+                responseJsonSchema: undefined,
+            },
+        };
+        const toolResult = await this.__textSync(toolRequest);
+        if (!toolResult.success) {
+            return toolResult;
+        }
+        if (toolResult.value.toolCalls.length > 0) {
+            this.logger.debug("Tool calls detected. Returning tool calls without making second request for structured response.");
+            return toolResult;
+        }
+        if (!toolResult.value.output) {
+            throw new Error("No output or tool calls detected in Google Gemini response. This should not happen.");
+        }
+        this.logger.debug("No tool calls detected. Making second request to Google Gemini for structured response.");
+        /*********** STRUCTURED OUTPUT REQUEST ************/
+        const message = userMessage(`Please return this output in the specified structured format. Output: ${toolResult.value.output}`);
+        const messages = [message.toGoogleMessage()];
+        const responseRequest = {
+            ...request,
+            config: {
+                ...request.config,
+                tools: undefined,
+            },
+            messages,
+        };
+        const responseResult = await this.__textSync(responseRequest);
+        if (!responseResult.success) {
+            return responseResult;
+        }
+        const thinkingBlocks = [
+            ...(toolResult.value.thinkingBlocks || []),
+            ...(responseResult.value.thinkingBlocks || []),
+        ];
+        return success({
+            output: responseResult.value.output,
+            // if there were tool calls, we would have returned already, so we know these are empty
+            toolCalls: [],
+            ...(thinkingBlocks.length > 0 && { thinkingBlocks }),
+            usage: addTokenUsage(toolResult.value.usage, responseResult.value.usage),
+            cost: addCosts(toolResult.value.cost, responseResult.value.cost),
+            model: request.model,
+        });
+    }
+    async __textSync(request) {
         this.logger.debug("Sending request to Google Gemini:", JSON.stringify(request, null, 2));
         // Send the prompt as the latest message
         const result = await this.client.models.generateContent(request);
@@ -136,6 +195,13 @@ export class SmolGoogle extends BaseClient {
         if (signal) {
             request.config = { ...request.config, abortSignal: signal };
         }
+        const hasTools = config.tools && config.tools.length > 0;
+        const hasStructuredResponse = !!config.responseFormat;
+        if (hasTools && hasStructuredResponse) {
+            this.logger.debug("Gemini does not support streaming responses with both tool calls and structured response formats. Response format will be ignored.");
+            request.config.responseMimeType = undefined;
+            request.config.responseJsonSchema = undefined;
+        }
         this.logger.debug("Sending streaming request to Google Gemini:", JSON.stringify(request, null, 2));
         const stream = await this.client.models.generateContentStream(request);
         let content = "";

package/dist/model.js CHANGED Viewed

@@ -109,12 +109,12 @@ export class Model {
         if (!model || !isTextModel(model)) {
             return null;
         }
-        const inputCost = round((usage.inputTokens * (model.inputTokenCost || 0)) / 1_000_000, 2);
-        const outputCost = round((usage.outputTokens * (model.outputTokenCost || 0)) / 1_000_000, 2);
+        const inputCost = round((usage.inputTokens * (model.inputTokenCost || 0)) / 1_000_000, 6);
+        const outputCost = round((usage.outputTokens * (model.outputTokenCost || 0)) / 1_000_000, 6);
         const cachedInputCost = usage.cachedInputTokens && model.cachedInputTokenCost
-            ? round((usage.cachedInputTokens * model.cachedInputTokenCost) / 1_000_000, 2)
+            ? round((usage.cachedInputTokens * model.cachedInputTokenCost) / 1_000_000, 6)
             : undefined;
-        const totalCost = round(inputCost + outputCost + (cachedInputCost || 0), 2);
+        const totalCost = round(inputCost + outputCost + (cachedInputCost || 0), 6);
         return {
             inputCost,
             outputCost,

package/dist/models.d.ts CHANGED Viewed

@@ -24,6 +24,18 @@ export type TextModel = BaseModel & {
     maxInputTokens: number;
     maxOutputTokens: number;
     outputTokensPerSecond?: number;
+    reasoning?: {
+        /** Available effort/thinking levels (provider-specific). Omit for budget-based thinking (Anthropic, Gemini 2.5). */
+        levels?: readonly string[];
+        /** Default reasoning level */
+        defaultLevel?: string;
+        /** Whether reasoning/thinking can be fully disabled */
+        canDisable?: boolean;
+        /** Whether the response includes visible thinking content (thinking blocks/parts) */
+        outputsThinking?: boolean;
+        /** Whether cryptographic thinking signatures are returned for round-tripping */
+        outputsSignatures?: boolean;
+    };
 };
 export type EmbeddingsModel = {
     type: "embeddings";
@@ -73,6 +85,13 @@ export declare const textModels: readonly [{
     readonly cachedInputTokenCost: 0.5;
     readonly outputTokenCost: 8;
     readonly outputTokensPerSecond: 94;
+    readonly reasoning: {
+        readonly levels: readonly ["low", "medium", "high"];
+        readonly defaultLevel: "medium";
+        readonly canDisable: false;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
     readonly provider: "openai";
 }, {
     readonly type: "text";
@@ -84,6 +103,13 @@ export declare const textModels: readonly [{
     readonly cachedInputTokenCost: 0.55;
     readonly outputTokenCost: 4.4;
     readonly outputTokensPerSecond: 214;
+    readonly reasoning: {
+        readonly levels: readonly ["low", "medium", "high"];
+        readonly defaultLevel: "medium";
+        readonly canDisable: false;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
     readonly provider: "openai";
 }, {
     readonly type: "text";
@@ -95,6 +121,13 @@ export declare const textModels: readonly [{
     readonly cachedInputTokenCost: 0.275;
     readonly outputTokenCost: 4.4;
     readonly outputTokensPerSecond: 135;
+    readonly reasoning: {
+        readonly levels: readonly ["low", "medium", "high"];
+        readonly defaultLevel: "medium";
+        readonly canDisable: false;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
     readonly provider: "openai";
 }, {
     readonly type: "text";
@@ -104,6 +137,11 @@ export declare const textModels: readonly [{
     readonly maxOutputTokens: 100000;
     readonly inputTokenCost: 20;
     readonly outputTokenCost: 80;
+    readonly reasoning: {
+        readonly canDisable: false;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
     readonly provider: "openai";
 }, {
     readonly type: "text";
@@ -115,6 +153,13 @@ export declare const textModels: readonly [{
     readonly cachedInputTokenCost: 7.5;
     readonly outputTokenCost: 60;
     readonly outputTokensPerSecond: 100;
+    readonly reasoning: {
+        readonly levels: readonly ["low", "medium", "high"];
+        readonly defaultLevel: "medium";
+        readonly canDisable: false;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
     readonly provider: "openai";
 }, {
     readonly type: "text";
@@ -179,6 +224,128 @@ export declare const textModels: readonly [{
     readonly outputTokenCost: 0.4;
     readonly outputTokensPerSecond: 142;
     readonly provider: "openai";
+}, {
+    readonly type: "text";
+    readonly modelName: "gpt-5";
+    readonly description: "GPT-5 is a frontier reasoning model with 400K context window. Supports reasoning tokens. Knowledge cutoff: September 2024.";
+    readonly maxInputTokens: 400000;
+    readonly maxOutputTokens: 128000;
+    readonly inputTokenCost: 1.25;
+    readonly cachedInputTokenCost: 0.125;
+    readonly outputTokenCost: 10;
+    readonly outputTokensPerSecond: 72;
+    readonly reasoning: {
+        readonly levels: readonly ["minimal", "low", "medium", "high"];
+        readonly defaultLevel: "medium";
+        readonly canDisable: false;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
+    readonly provider: "openai";
+}, {
+    readonly type: "text";
+    readonly modelName: "gpt-5-mini";
+    readonly description: "GPT-5 mini is a faster, more cost-efficient version of GPT-5 with 400K context window. Knowledge cutoff: May 2024.";
+    readonly maxInputTokens: 400000;
+    readonly maxOutputTokens: 128000;
+    readonly inputTokenCost: 0.25;
+    readonly cachedInputTokenCost: 0.025;
+    readonly outputTokenCost: 2;
+    readonly outputTokensPerSecond: 69;
+    readonly reasoning: {
+        readonly levels: readonly ["minimal", "low", "medium", "high"];
+        readonly defaultLevel: "medium";
+        readonly canDisable: false;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
+    readonly provider: "openai";
+}, {
+    readonly type: "text";
+    readonly modelName: "gpt-5-nano";
+    readonly description: "GPT-5 nano is the fastest and most affordable GPT-5 variant with 400K context window. Knowledge cutoff: May 2024.";
+    readonly maxInputTokens: 400000;
+    readonly maxOutputTokens: 128000;
+    readonly inputTokenCost: 0.05;
+    readonly cachedInputTokenCost: 0.005;
+    readonly outputTokenCost: 0.4;
+    readonly outputTokensPerSecond: 140;
+    readonly reasoning: {
+        readonly levels: readonly ["minimal", "low", "medium", "high"];
+        readonly defaultLevel: "medium";
+        readonly canDisable: false;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
+    readonly provider: "openai";
+}, {
+    readonly type: "text";
+    readonly modelName: "gpt-5.1";
+    readonly description: "GPT-5.1 is the flagship model for coding and agentic tasks with configurable reasoning effort. 400K context window. Knowledge cutoff: September 2024.";
+    readonly maxInputTokens: 400000;
+    readonly maxOutputTokens: 128000;
+    readonly inputTokenCost: 1.25;
+    readonly cachedInputTokenCost: 0.125;
+    readonly outputTokenCost: 10;
+    readonly reasoning: {
+        readonly levels: readonly ["none", "low", "medium", "high"];
+        readonly defaultLevel: "none";
+        readonly canDisable: true;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
+    readonly provider: "openai";
+}, {
+    readonly type: "text";
+    readonly modelName: "gpt-5.2";
+    readonly description: "GPT-5.2 is the flagship model for coding and agentic tasks across industries. 400K context window. Knowledge cutoff: August 2025.";
+    readonly maxInputTokens: 400000;
+    readonly maxOutputTokens: 128000;
+    readonly inputTokenCost: 1.75;
+    readonly cachedInputTokenCost: 0.175;
+    readonly outputTokenCost: 14;
+    readonly outputTokensPerSecond: 61;
+    readonly reasoning: {
+        readonly levels: readonly ["none", "low", "medium", "high"];
+        readonly defaultLevel: "none";
+        readonly canDisable: true;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
+    readonly provider: "openai";
+}, {
+    readonly type: "text";
+    readonly modelName: "gpt-5.4";
+    readonly description: "GPT-5.4 is the most capable and efficient frontier model for complex professional work. 1M context window, state-of-the-art coding and tool use. Standard pricing for ≤272K tokens, 2x input/1.5x output for >272K. Knowledge cutoff: August 2025.";
+    readonly maxInputTokens: 1050000;
+    readonly maxOutputTokens: 128000;
+    readonly inputTokenCost: 2.5;
+    readonly cachedInputTokenCost: 0.25;
+    readonly outputTokenCost: 15;
+    readonly reasoning: {
+        readonly levels: readonly ["none", "low", "medium", "high", "xhigh"];
+        readonly defaultLevel: "none";
+        readonly canDisable: true;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
+    readonly provider: "openai";
+}, {
+    readonly type: "text";
+    readonly modelName: "gpt-5.4-pro";
+    readonly description: "GPT-5.4 Pro uses more compute for complex reasoning tasks. 1M context window. Standard pricing for ≤272K tokens. Knowledge cutoff: August 2025.";
+    readonly maxInputTokens: 1050000;
+    readonly maxOutputTokens: 128000;
+    readonly inputTokenCost: 30;
+    readonly outputTokenCost: 180;
+    readonly reasoning: {
+        readonly levels: readonly ["medium", "high", "xhigh"];
+        readonly defaultLevel: "medium";
+        readonly canDisable: false;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
+    readonly provider: "openai";
 }, {
     readonly type: "text";
     readonly modelName: "gemini-3.1-pro-preview";
@@ -188,15 +355,23 @@ export declare const textModels: readonly [{
     readonly inputTokenCost: 2;
     readonly outputTokenCost: 12;
     readonly outputTokensPerSecond: 112;
+    readonly reasoning: {
+        readonly levels: readonly ["low", "medium", "high"];
+        readonly defaultLevel: "high";
+        readonly canDisable: false;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: true;
+    };
     readonly provider: "google";
 }, {
     readonly type: "text";
     readonly modelName: "gemini-3-pro-preview";
-    readonly description: "Gemini 3 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Superseded by 3.1 Pro.";
+    readonly description: "DEPRECATED: Shut down March 9, 2026. Use gemini-3.1-pro-preview instead.";
     readonly maxInputTokens: 1048576;
     readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 2;
     readonly outputTokenCost: 12;
+    readonly disabled: true;
     readonly provider: "google";
 }, {
     readonly type: "text";
@@ -206,6 +381,31 @@ export declare const textModels: readonly [{
     readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 0.5;
     readonly outputTokenCost: 3;
+    readonly outputTokensPerSecond: 146;
+    readonly reasoning: {
+        readonly levels: readonly ["minimal", "low", "medium", "high"];
+        readonly defaultLevel: "high";
+        readonly canDisable: false;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: true;
+    };
+    readonly provider: "google";
+}, {
+    readonly type: "text";
+    readonly modelName: "gemini-3.1-flash-lite-preview";
+    readonly description: "Most cost-effective Gemini 3.1 model with thinking support and 1M context window. 2.5x faster TTFA and 45% faster output than 2.5 Flash. Released March 2026.";
+    readonly maxInputTokens: 1048576;
+    readonly maxOutputTokens: 65536;
+    readonly inputTokenCost: 0.25;
+    readonly outputTokenCost: 1.5;
+    readonly outputTokensPerSecond: 379;
+    readonly reasoning: {
+        readonly levels: readonly ["minimal", "low", "medium", "high"];
+        readonly defaultLevel: "minimal";
+        readonly canDisable: false;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: true;
+    };
     readonly provider: "google";
 }, {
     readonly type: "text";
@@ -215,7 +415,12 @@ export declare const textModels: readonly [{
     readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 1.25;
     readonly outputTokenCost: 10;
-    readonly outputTokensPerSecond: 175;
+    readonly outputTokensPerSecond: 134;
+    readonly reasoning: {
+        readonly canDisable: false;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: true;
+    };
     readonly provider: "google";
 }, {
     readonly type: "text";
@@ -225,7 +430,12 @@ export declare const textModels: readonly [{
     readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 0.3;
     readonly outputTokenCost: 2.5;
-    readonly outputTokensPerSecond: 225;
+    readonly outputTokensPerSecond: 245;
+    readonly reasoning: {
+        readonly canDisable: true;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: true;
+    };
     readonly provider: "google";
 }, {
     readonly type: "text";
@@ -236,6 +446,11 @@ export declare const textModels: readonly [{
     readonly inputTokenCost: 0.1;
     readonly outputTokenCost: 0.4;
     readonly outputTokensPerSecond: 400;
+    readonly reasoning: {
+        readonly canDisable: true;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: false;
+    };
     readonly provider: "google";
 }, {
     readonly type: "text";
@@ -306,20 +521,33 @@ export declare const textModels: readonly [{
 }, {
     readonly type: "text";
     readonly modelName: "claude-opus-4-6";
-    readonly description: "The most intelligent Claude model for building agents and coding. 200K context window, 128K max output.";
+    readonly description: "The most intelligent Claude model for building agents and coding. 200K context window (1M in beta), 128K max output.";
     readonly maxInputTokens: 200000;
     readonly maxOutputTokens: 131072;
     readonly inputTokenCost: 5;
+    readonly cachedInputTokenCost: 0.5;
     readonly outputTokenCost: 25;
+    readonly outputTokensPerSecond: 53;
+    readonly reasoning: {
+        readonly canDisable: true;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: true;
+    };
     readonly provider: "anthropic";
 }, {
     readonly type: "text";
     readonly modelName: "claude-sonnet-4-6";
-    readonly description: "The best combination of speed and intelligence. 200K context window, 64K max output.";
+    readonly description: "The best combination of speed and intelligence. 200K context window (1M in beta), 64K max output.";
     readonly maxInputTokens: 200000;
     readonly maxOutputTokens: 64000;
     readonly inputTokenCost: 3;
+    readonly cachedInputTokenCost: 0.3;
     readonly outputTokenCost: 15;
+    readonly reasoning: {
+        readonly canDisable: true;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: true;
+    };
     readonly provider: "anthropic";
 }, {
     readonly type: "text";
@@ -328,7 +556,14 @@ export declare const textModels: readonly [{
     readonly maxInputTokens: 200000;
     readonly maxOutputTokens: 64000;
     readonly inputTokenCost: 1;
+    readonly cachedInputTokenCost: 0.1;
     readonly outputTokenCost: 5;
+    readonly outputTokensPerSecond: 97;
+    readonly reasoning: {
+        readonly canDisable: true;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: true;
+    };
     readonly provider: "anthropic";
 }, {
     readonly type: "text";
@@ -339,6 +574,11 @@ export declare const textModels: readonly [{
     readonly inputTokenCost: 3;
     readonly outputTokenCost: 15;
     readonly outputTokensPerSecond: 78;
+    readonly reasoning: {
+        readonly canDisable: true;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: true;
+    };
     readonly disabled: true;
     readonly provider: "anthropic";
 }, {
@@ -465,6 +705,13 @@ export declare function getModel(modelName: ModelName): {
     readonly cachedInputTokenCost: 0.5;
     readonly outputTokenCost: 8;
     readonly outputTokensPerSecond: 94;
+    readonly reasoning: {
+        readonly levels: readonly ["low", "medium", "high"];
+        readonly defaultLevel: "medium";
+        readonly canDisable: false;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
     readonly provider: "openai";
 } | {
     readonly type: "text";
@@ -476,6 +723,13 @@ export declare function getModel(modelName: ModelName): {
     readonly cachedInputTokenCost: 0.55;
     readonly outputTokenCost: 4.4;
     readonly outputTokensPerSecond: 214;
+    readonly reasoning: {
+        readonly levels: readonly ["low", "medium", "high"];
+        readonly defaultLevel: "medium";
+        readonly canDisable: false;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
     readonly provider: "openai";
 } | {
     readonly type: "text";
@@ -487,6 +741,13 @@ export declare function getModel(modelName: ModelName): {
     readonly cachedInputTokenCost: 0.275;
     readonly outputTokenCost: 4.4;
     readonly outputTokensPerSecond: 135;
+    readonly reasoning: {
+        readonly levels: readonly ["low", "medium", "high"];
+        readonly defaultLevel: "medium";
+        readonly canDisable: false;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
     readonly provider: "openai";
 } | {
     readonly type: "text";
@@ -496,6 +757,11 @@ export declare function getModel(modelName: ModelName): {
     readonly maxOutputTokens: 100000;
     readonly inputTokenCost: 20;
     readonly outputTokenCost: 80;
+    readonly reasoning: {
+        readonly canDisable: false;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
     readonly provider: "openai";
 } | {
     readonly type: "text";
@@ -507,6 +773,13 @@ export declare function getModel(modelName: ModelName): {
     readonly cachedInputTokenCost: 7.5;
     readonly outputTokenCost: 60;
     readonly outputTokensPerSecond: 100;
+    readonly reasoning: {
+        readonly levels: readonly ["low", "medium", "high"];
+        readonly defaultLevel: "medium";
+        readonly canDisable: false;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
     readonly provider: "openai";
 } | {
     readonly type: "text";
@@ -571,6 +844,128 @@ export declare function getModel(modelName: ModelName): {
     readonly outputTokenCost: 0.4;
     readonly outputTokensPerSecond: 142;
     readonly provider: "openai";
+} | {
+    readonly type: "text";
+    readonly modelName: "gpt-5";
+    readonly description: "GPT-5 is a frontier reasoning model with 400K context window. Supports reasoning tokens. Knowledge cutoff: September 2024.";
+    readonly maxInputTokens: 400000;
+    readonly maxOutputTokens: 128000;
+    readonly inputTokenCost: 1.25;
+    readonly cachedInputTokenCost: 0.125;
+    readonly outputTokenCost: 10;
+    readonly outputTokensPerSecond: 72;
+    readonly reasoning: {
+        readonly levels: readonly ["minimal", "low", "medium", "high"];
+        readonly defaultLevel: "medium";
+        readonly canDisable: false;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
+    readonly provider: "openai";
+} | {
+    readonly type: "text";
+    readonly modelName: "gpt-5-mini";
+    readonly description: "GPT-5 mini is a faster, more cost-efficient version of GPT-5 with 400K context window. Knowledge cutoff: May 2024.";
+    readonly maxInputTokens: 400000;
+    readonly maxOutputTokens: 128000;
+    readonly inputTokenCost: 0.25;
+    readonly cachedInputTokenCost: 0.025;
+    readonly outputTokenCost: 2;
+    readonly outputTokensPerSecond: 69;
+    readonly reasoning: {
+        readonly levels: readonly ["minimal", "low", "medium", "high"];
+        readonly defaultLevel: "medium";
+        readonly canDisable: false;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
+    readonly provider: "openai";
+} | {
+    readonly type: "text";
+    readonly modelName: "gpt-5-nano";
+    readonly description: "GPT-5 nano is the fastest and most affordable GPT-5 variant with 400K context window. Knowledge cutoff: May 2024.";
+    readonly maxInputTokens: 400000;
+    readonly maxOutputTokens: 128000;
+    readonly inputTokenCost: 0.05;
+    readonly cachedInputTokenCost: 0.005;
+    readonly outputTokenCost: 0.4;
+    readonly outputTokensPerSecond: 140;
+    readonly reasoning: {
+        readonly levels: readonly ["minimal", "low", "medium", "high"];
+        readonly defaultLevel: "medium";
+        readonly canDisable: false;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
+    readonly provider: "openai";
+} | {
+    readonly type: "text";
+    readonly modelName: "gpt-5.1";
+    readonly description: "GPT-5.1 is the flagship model for coding and agentic tasks with configurable reasoning effort. 400K context window. Knowledge cutoff: September 2024.";
+    readonly maxInputTokens: 400000;
+    readonly maxOutputTokens: 128000;
+    readonly inputTokenCost: 1.25;
+    readonly cachedInputTokenCost: 0.125;
+    readonly outputTokenCost: 10;
+    readonly reasoning: {
+        readonly levels: readonly ["none", "low", "medium", "high"];
+        readonly defaultLevel: "none";
+        readonly canDisable: true;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
+    readonly provider: "openai";
+} | {
+    readonly type: "text";
+    readonly modelName: "gpt-5.2";
+    readonly description: "GPT-5.2 is the flagship model for coding and agentic tasks across industries. 400K context window. Knowledge cutoff: August 2025.";
+    readonly maxInputTokens: 400000;
+    readonly maxOutputTokens: 128000;
+    readonly inputTokenCost: 1.75;
+    readonly cachedInputTokenCost: 0.175;
+    readonly outputTokenCost: 14;
+    readonly outputTokensPerSecond: 61;
+    readonly reasoning: {
+        readonly levels: readonly ["none", "low", "medium", "high"];
+        readonly defaultLevel: "none";
+        readonly canDisable: true;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
+    readonly provider: "openai";
+} | {
+    readonly type: "text";
+    readonly modelName: "gpt-5.4";
+    readonly description: "GPT-5.4 is the most capable and efficient frontier model for complex professional work. 1M context window, state-of-the-art coding and tool use. Standard pricing for ≤272K tokens, 2x input/1.5x output for >272K. Knowledge cutoff: August 2025.";
+    readonly maxInputTokens: 1050000;
+    readonly maxOutputTokens: 128000;
+    readonly inputTokenCost: 2.5;
+    readonly cachedInputTokenCost: 0.25;
+    readonly outputTokenCost: 15;
+    readonly reasoning: {
+        readonly levels: readonly ["none", "low", "medium", "high", "xhigh"];
+        readonly defaultLevel: "none";
+        readonly canDisable: true;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
+    readonly provider: "openai";
+} | {
+    readonly type: "text";
+    readonly modelName: "gpt-5.4-pro";
+    readonly description: "GPT-5.4 Pro uses more compute for complex reasoning tasks. 1M context window. Standard pricing for ≤272K tokens. Knowledge cutoff: August 2025.";
+    readonly maxInputTokens: 1050000;
+    readonly maxOutputTokens: 128000;
+    readonly inputTokenCost: 30;
+    readonly outputTokenCost: 180;
+    readonly reasoning: {
+        readonly levels: readonly ["medium", "high", "xhigh"];
+        readonly defaultLevel: "medium";
+        readonly canDisable: false;
+        readonly outputsThinking: false;
+        readonly outputsSignatures: false;
+    };
+    readonly provider: "openai";
 } | {
     readonly type: "text";
     readonly modelName: "gemini-3.1-pro-preview";
@@ -580,15 +975,23 @@ export declare function getModel(modelName: ModelName): {
     readonly inputTokenCost: 2;
     readonly outputTokenCost: 12;
     readonly outputTokensPerSecond: 112;
+    readonly reasoning: {
+        readonly levels: readonly ["low", "medium", "high"];
+        readonly defaultLevel: "high";
+        readonly canDisable: false;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: true;
+    };
     readonly provider: "google";
 } | {
     readonly type: "text";
     readonly modelName: "gemini-3-pro-preview";
-    readonly description: "Gemini 3 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Superseded by 3.1 Pro.";
+    readonly description: "DEPRECATED: Shut down March 9, 2026. Use gemini-3.1-pro-preview instead.";
     readonly maxInputTokens: 1048576;
     readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 2;
     readonly outputTokenCost: 12;
+    readonly disabled: true;
     readonly provider: "google";
 } | {
     readonly type: "text";
@@ -598,6 +1001,31 @@ export declare function getModel(modelName: ModelName): {
     readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 0.5;
     readonly outputTokenCost: 3;
+    readonly outputTokensPerSecond: 146;
+    readonly reasoning: {
+        readonly levels: readonly ["minimal", "low", "medium", "high"];
+        readonly defaultLevel: "high";
+        readonly canDisable: false;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: true;
+    };
+    readonly provider: "google";
+} | {
+    readonly type: "text";
+    readonly modelName: "gemini-3.1-flash-lite-preview";
+    readonly description: "Most cost-effective Gemini 3.1 model with thinking support and 1M context window. 2.5x faster TTFA and 45% faster output than 2.5 Flash. Released March 2026.";
+    readonly maxInputTokens: 1048576;
+    readonly maxOutputTokens: 65536;
+    readonly inputTokenCost: 0.25;
+    readonly outputTokenCost: 1.5;
+    readonly outputTokensPerSecond: 379;
+    readonly reasoning: {
+        readonly levels: readonly ["minimal", "low", "medium", "high"];
+        readonly defaultLevel: "minimal";
+        readonly canDisable: false;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: true;
+    };
     readonly provider: "google";
 } | {
     readonly type: "text";
@@ -607,7 +1035,12 @@ export declare function getModel(modelName: ModelName): {
     readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 1.25;
     readonly outputTokenCost: 10;
-    readonly outputTokensPerSecond: 175;
+    readonly outputTokensPerSecond: 134;
+    readonly reasoning: {
+        readonly canDisable: false;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: true;
+    };
     readonly provider: "google";
 } | {
     readonly type: "text";
@@ -617,7 +1050,12 @@ export declare function getModel(modelName: ModelName): {
     readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 0.3;
     readonly outputTokenCost: 2.5;
-    readonly outputTokensPerSecond: 225;
+    readonly outputTokensPerSecond: 245;
+    readonly reasoning: {
+        readonly canDisable: true;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: true;
+    };
     readonly provider: "google";
 } | {
     readonly type: "text";
@@ -628,6 +1066,11 @@ export declare function getModel(modelName: ModelName): {
     readonly inputTokenCost: 0.1;
     readonly outputTokenCost: 0.4;
     readonly outputTokensPerSecond: 400;
+    readonly reasoning: {
+        readonly canDisable: true;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: false;
+    };
     readonly provider: "google";
 } | {
     readonly type: "text";
@@ -698,20 +1141,33 @@ export declare function getModel(modelName: ModelName): {
 } | {
     readonly type: "text";
     readonly modelName: "claude-opus-4-6";
-    readonly description: "The most intelligent Claude model for building agents and coding. 200K context window, 128K max output.";
+    readonly description: "The most intelligent Claude model for building agents and coding. 200K context window (1M in beta), 128K max output.";
     readonly maxInputTokens: 200000;
     readonly maxOutputTokens: 131072;
     readonly inputTokenCost: 5;
+    readonly cachedInputTokenCost: 0.5;
     readonly outputTokenCost: 25;
+    readonly outputTokensPerSecond: 53;
+    readonly reasoning: {
+        readonly canDisable: true;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: true;
+    };
     readonly provider: "anthropic";
 } | {
     readonly type: "text";
     readonly modelName: "claude-sonnet-4-6";
-    readonly description: "The best combination of speed and intelligence. 200K context window, 64K max output.";
+    readonly description: "The best combination of speed and intelligence. 200K context window (1M in beta), 64K max output.";
     readonly maxInputTokens: 200000;
     readonly maxOutputTokens: 64000;
     readonly inputTokenCost: 3;
+    readonly cachedInputTokenCost: 0.3;
     readonly outputTokenCost: 15;
+    readonly reasoning: {
+        readonly canDisable: true;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: true;
+    };
     readonly provider: "anthropic";
 } | {
     readonly type: "text";
@@ -720,7 +1176,14 @@ export declare function getModel(modelName: ModelName): {
     readonly maxInputTokens: 200000;
     readonly maxOutputTokens: 64000;
     readonly inputTokenCost: 1;
+    readonly cachedInputTokenCost: 0.1;
     readonly outputTokenCost: 5;
+    readonly outputTokensPerSecond: 97;
+    readonly reasoning: {
+        readonly canDisable: true;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: true;
+    };
     readonly provider: "anthropic";
 } | {
     readonly type: "text";
@@ -731,6 +1194,11 @@ export declare function getModel(modelName: ModelName): {
     readonly inputTokenCost: 3;
     readonly outputTokenCost: 15;
     readonly outputTokensPerSecond: 78;
+    readonly reasoning: {
+        readonly canDisable: true;
+        readonly outputsThinking: true;
+        readonly outputsSignatures: true;
+    };
     readonly disabled: true;
     readonly provider: "anthropic";
 } | {

package/dist/models.js CHANGED Viewed

@@ -52,6 +52,13 @@ export const textModels = [
         cachedInputTokenCost: 0.5,
         outputTokenCost: 8,
         outputTokensPerSecond: 94,
+        reasoning: {
+            levels: ["low", "medium", "high"],
+            defaultLevel: "medium",
+            canDisable: false,
+            outputsThinking: false,
+            outputsSignatures: false,
+        },
         provider: "openai",
     },
     {
@@ -64,6 +71,13 @@ export const textModels = [
         cachedInputTokenCost: 0.55,
         outputTokenCost: 4.4,
         outputTokensPerSecond: 214,
+        reasoning: {
+            levels: ["low", "medium", "high"],
+            defaultLevel: "medium",
+            canDisable: false,
+            outputsThinking: false,
+            outputsSignatures: false,
+        },
         provider: "openai",
     },
     {
@@ -76,6 +90,13 @@ export const textModels = [
         cachedInputTokenCost: 0.275,
         outputTokenCost: 4.4,
         outputTokensPerSecond: 135,
+        reasoning: {
+            levels: ["low", "medium", "high"],
+            defaultLevel: "medium",
+            canDisable: false,
+            outputsThinking: false,
+            outputsSignatures: false,
+        },
         provider: "openai",
     },
     {
@@ -86,6 +107,11 @@ export const textModels = [
         maxOutputTokens: 100000,
         inputTokenCost: 20,
         outputTokenCost: 80,
+        reasoning: {
+            canDisable: false,
+            outputsThinking: false,
+            outputsSignatures: false,
+        },
         provider: "openai",
     },
     {
@@ -98,6 +124,13 @@ export const textModels = [
         cachedInputTokenCost: 7.5,
         outputTokenCost: 60,
         outputTokensPerSecond: 100,
+        reasoning: {
+            levels: ["low", "medium", "high"],
+            defaultLevel: "medium",
+            canDisable: false,
+            outputsThinking: false,
+            outputsSignatures: false,
+        },
         provider: "openai",
     },
     {
@@ -169,6 +202,135 @@ export const textModels = [
         outputTokensPerSecond: 142,
         provider: "openai",
     },
+    {
+        type: "text",
+        modelName: "gpt-5",
+        description: "GPT-5 is a frontier reasoning model with 400K context window. Supports reasoning tokens. Knowledge cutoff: September 2024.",
+        maxInputTokens: 400000,
+        maxOutputTokens: 128000,
+        inputTokenCost: 1.25,
+        cachedInputTokenCost: 0.125,
+        outputTokenCost: 10,
+        outputTokensPerSecond: 72,
+        reasoning: {
+            levels: ["minimal", "low", "medium", "high"],
+            defaultLevel: "medium",
+            canDisable: false,
+            outputsThinking: false,
+            outputsSignatures: false,
+        },
+        provider: "openai",
+    },
+    {
+        type: "text",
+        modelName: "gpt-5-mini",
+        description: "GPT-5 mini is a faster, more cost-efficient version of GPT-5 with 400K context window. Knowledge cutoff: May 2024.",
+        maxInputTokens: 400000,
+        maxOutputTokens: 128000,
+        inputTokenCost: 0.25,
+        cachedInputTokenCost: 0.025,
+        outputTokenCost: 2,
+        outputTokensPerSecond: 69,
+        reasoning: {
+            levels: ["minimal", "low", "medium", "high"],
+            defaultLevel: "medium",
+            canDisable: false,
+            outputsThinking: false,
+            outputsSignatures: false,
+        },
+        provider: "openai",
+    },
+    {
+        type: "text",
+        modelName: "gpt-5-nano",
+        description: "GPT-5 nano is the fastest and most affordable GPT-5 variant with 400K context window. Knowledge cutoff: May 2024.",
+        maxInputTokens: 400000,
+        maxOutputTokens: 128000,
+        inputTokenCost: 0.05,
+        cachedInputTokenCost: 0.005,
+        outputTokenCost: 0.4,
+        outputTokensPerSecond: 140,
+        reasoning: {
+            levels: ["minimal", "low", "medium", "high"],
+            defaultLevel: "medium",
+            canDisable: false,
+            outputsThinking: false,
+            outputsSignatures: false,
+        },
+        provider: "openai",
+    },
+    {
+        type: "text",
+        modelName: "gpt-5.1",
+        description: "GPT-5.1 is the flagship model for coding and agentic tasks with configurable reasoning effort. 400K context window. Knowledge cutoff: September 2024.",
+        maxInputTokens: 400000,
+        maxOutputTokens: 128000,
+        inputTokenCost: 1.25,
+        cachedInputTokenCost: 0.125,
+        outputTokenCost: 10,
+        reasoning: {
+            levels: ["none", "low", "medium", "high"],
+            defaultLevel: "none",
+            canDisable: true,
+            outputsThinking: false,
+            outputsSignatures: false,
+        },
+        provider: "openai",
+    },
+    {
+        type: "text",
+        modelName: "gpt-5.2",
+        description: "GPT-5.2 is the flagship model for coding and agentic tasks across industries. 400K context window. Knowledge cutoff: August 2025.",
+        maxInputTokens: 400000,
+        maxOutputTokens: 128000,
+        inputTokenCost: 1.75,
+        cachedInputTokenCost: 0.175,
+        outputTokenCost: 14,
+        outputTokensPerSecond: 61,
+        reasoning: {
+            levels: ["none", "low", "medium", "high"],
+            defaultLevel: "none",
+            canDisable: true,
+            outputsThinking: false,
+            outputsSignatures: false,
+        },
+        provider: "openai",
+    },
+    {
+        type: "text",
+        modelName: "gpt-5.4",
+        description: "GPT-5.4 is the most capable and efficient frontier model for complex professional work. 1M context window, state-of-the-art coding and tool use. Standard pricing for ≤272K tokens, 2x input/1.5x output for >272K. Knowledge cutoff: August 2025.",
+        maxInputTokens: 1_050_000,
+        maxOutputTokens: 128000,
+        inputTokenCost: 2.5,
+        cachedInputTokenCost: 0.25,
+        outputTokenCost: 15,
+        reasoning: {
+            levels: ["none", "low", "medium", "high", "xhigh"],
+            defaultLevel: "none",
+            canDisable: true,
+            outputsThinking: false,
+            outputsSignatures: false,
+        },
+        provider: "openai",
+    },
+    {
+        type: "text",
+        modelName: "gpt-5.4-pro",
+        description: "GPT-5.4 Pro uses more compute for complex reasoning tasks. 1M context window. Standard pricing for ≤272K tokens. Knowledge cutoff: August 2025.",
+        maxInputTokens: 1_050_000,
+        maxOutputTokens: 128000,
+        inputTokenCost: 30,
+        outputTokenCost: 180,
+        reasoning: {
+            levels: ["medium", "high", "xhigh"],
+            defaultLevel: "medium",
+            canDisable: false,
+            outputsThinking: false,
+            outputsSignatures: false,
+        },
+        provider: "openai",
+    },
     {
         type: "text",
         modelName: "gemini-3.1-pro-preview",
@@ -178,16 +340,24 @@ export const textModels = [
         inputTokenCost: 2.0,
         outputTokenCost: 12.0,
         outputTokensPerSecond: 112,
+        reasoning: {
+            levels: ["low", "medium", "high"],
+            defaultLevel: "high",
+            canDisable: false,
+            outputsThinking: true,
+            outputsSignatures: true,
+        },
         provider: "google",
     },
     {
         type: "text",
         modelName: "gemini-3-pro-preview",
-        description: "Gemini 3 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Superseded by 3.1 Pro.",
+        description: "DEPRECATED: Shut down March 9, 2026. Use gemini-3.1-pro-preview instead.",
         maxInputTokens: 1_048_576,
         maxOutputTokens: 65536,
         inputTokenCost: 2.0,
         outputTokenCost: 12.0,
+        disabled: true,
         provider: "google",
     },
     {
@@ -198,6 +368,32 @@ export const textModels = [
         maxOutputTokens: 65536,
         inputTokenCost: 0.5,
         outputTokenCost: 3.0,
+        outputTokensPerSecond: 146,
+        reasoning: {
+            levels: ["minimal", "low", "medium", "high"],
+            defaultLevel: "high",
+            canDisable: false,
+            outputsThinking: true,
+            outputsSignatures: true,
+        },
+        provider: "google",
+    },
+    {
+        type: "text",
+        modelName: "gemini-3.1-flash-lite-preview",
+        description: "Most cost-effective Gemini 3.1 model with thinking support and 1M context window. 2.5x faster TTFA and 45% faster output than 2.5 Flash. Released March 2026.",
+        maxInputTokens: 1_048_576,
+        maxOutputTokens: 65536,
+        inputTokenCost: 0.25,
+        outputTokenCost: 1.5,
+        outputTokensPerSecond: 379,
+        reasoning: {
+            levels: ["minimal", "low", "medium", "high"],
+            defaultLevel: "minimal",
+            canDisable: false,
+            outputsThinking: true,
+            outputsSignatures: true,
+        },
         provider: "google",
     },
     {
@@ -208,7 +404,12 @@ export const textModels = [
         maxOutputTokens: 65536,
         inputTokenCost: 1.25,
         outputTokenCost: 10.0,
-        outputTokensPerSecond: 175,
+        outputTokensPerSecond: 134,
+        reasoning: {
+            canDisable: false,
+            outputsThinking: true,
+            outputsSignatures: true,
+        },
         provider: "google",
     },
     {
@@ -219,7 +420,12 @@ export const textModels = [
         maxOutputTokens: 65536,
         inputTokenCost: 0.3,
         outputTokenCost: 2.5,
-        outputTokensPerSecond: 225,
+        outputTokensPerSecond: 245,
+        reasoning: {
+            canDisable: true,
+            outputsThinking: true,
+            outputsSignatures: true,
+        },
         provider: "google",
     },
     {
@@ -231,6 +437,11 @@ export const textModels = [
         inputTokenCost: 0.1,
         outputTokenCost: 0.4,
         outputTokensPerSecond: 400,
+        reasoning: {
+            canDisable: true,
+            outputsThinking: true,
+            outputsSignatures: false,
+        },
         provider: "google",
     },
     {
@@ -308,21 +519,34 @@ export const textModels = [
     {
         type: "text",
         modelName: "claude-opus-4-6",
-        description: "The most intelligent Claude model for building agents and coding. 200K context window, 128K max output.",
+        description: "The most intelligent Claude model for building agents and coding. 200K context window (1M in beta), 128K max output.",
         maxInputTokens: 200_000,
         maxOutputTokens: 131_072,
         inputTokenCost: 5,
+        cachedInputTokenCost: 0.5,
         outputTokenCost: 25,
+        outputTokensPerSecond: 53,
+        reasoning: {
+            canDisable: true,
+            outputsThinking: true,
+            outputsSignatures: true,
+        },
         provider: "anthropic",
     },
     {
         type: "text",
         modelName: "claude-sonnet-4-6",
-        description: "The best combination of speed and intelligence. 200K context window, 64K max output.",
+        description: "The best combination of speed and intelligence. 200K context window (1M in beta), 64K max output.",
         maxInputTokens: 200_000,
         maxOutputTokens: 64_000,
         inputTokenCost: 3,
+        cachedInputTokenCost: 0.3,
         outputTokenCost: 15,
+        reasoning: {
+            canDisable: true,
+            outputsThinking: true,
+            outputsSignatures: true,
+        },
         provider: "anthropic",
     },
     {
@@ -332,7 +556,14 @@ export const textModels = [
         maxInputTokens: 200_000,
         maxOutputTokens: 64_000,
         inputTokenCost: 1,
+        cachedInputTokenCost: 0.1,
         outputTokenCost: 5,
+        outputTokensPerSecond: 97,
+        reasoning: {
+            canDisable: true,
+            outputsThinking: true,
+            outputsSignatures: true,
+        },
         provider: "anthropic",
     },
     {
@@ -344,6 +575,11 @@ export const textModels = [
         inputTokenCost: 3,
         outputTokenCost: 15,
         outputTokensPerSecond: 78,
+        reasoning: {
+            canDisable: true,
+            outputsThinking: true,
+            outputsSignatures: true,
+        },
         disabled: true,
         provider: "anthropic",
     },

package/dist/strategies/raceStrategy.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import { getLogger } from "../logger.js";
 import { BaseStrategy } from "./baseStrategy.js";
 export class RaceStrategy extends BaseStrategy {
     strategies;
@@ -28,8 +29,11 @@ export class RaceStrategy extends BaseStrategy {
         });
         return Promise.race(promises.map((p, i) => p.then((result) => {
             for (let j = 0; j < controllers.length; j++) {
-                if (j !== i)
+                if (j !== i) {
+                    const logger = getLogger();
+                    logger.debug(`RaceStrategy: aborting strategy ${this.strategies[j]} because strategy ${this.strategies[i]} won the race.`);
                     controllers[j].abort();
+                }
             }
             return result;
         })));

package/dist/types.d.ts CHANGED Viewed

@@ -98,6 +98,8 @@ export type CostEstimate = {
     totalCost: number;
     currency: string;
 };
+export declare function addTokenUsage(_a?: TokenUsage, _b?: TokenUsage): TokenUsage;
+export declare function addCosts(_a?: CostEstimate, _b?: CostEstimate): CostEstimate;
 export type PromptResult = {
     output: string | null;
     toolCalls: ToolCall[];

package/dist/types.js CHANGED Viewed

@@ -1 +1,41 @@
 export * from "./types/result.js";
+export function addTokenUsage(_a, _b) {
+    let a = _a;
+    let b = _b;
+    if (a && !b)
+        return a;
+    if (b && !a)
+        return b;
+    if (!a && !b)
+        return { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
+    a = _a;
+    b = _b;
+    return {
+        inputTokens: a.inputTokens + b.inputTokens,
+        outputTokens: a.outputTokens + b.outputTokens,
+        cachedInputTokens: (a.cachedInputTokens || 0) + (b.cachedInputTokens || 0),
+        totalTokens: (a.totalTokens || 0) + (b.totalTokens || 0),
+    };
+}
+export function addCosts(_a, _b) {
+    let a = _a;
+    let b = _b;
+    if (a && !b)
+        return a;
+    if (b && !a)
+        return b;
+    if (!a && !b)
+        return { inputCost: 0, outputCost: 0, totalCost: 0, currency: "USD" };
+    a = _a;
+    b = _b;
+    if (a.currency !== b.currency) {
+        throw new Error(`Cannot add costs with different currencies: ${a.currency} and ${b.currency}`);
+    }
+    return {
+        inputCost: a.inputCost + b.inputCost,
+        outputCost: a.outputCost + b.outputCost,
+        cachedInputCost: (a.cachedInputCost || 0) + (b.cachedInputCost || 0),
+        totalCost: a.totalCost + b.totalCost,
+        currency: a.currency,
+    };
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "smoltalk",
-  "version": "0.0.43",
+  "version": "0.0.45",
   "description": "A common interface for LLM APIs",
   "homepage": "https://github.com/egonSchiele/smoltalk",
   "scripts": {