npm - smoltalk - Versions diffs - 0.0.16 → 0.0.18 - Mend

smoltalk 0.0.16 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/dist/classes/message/AssistantMessage.d.ts +1 -0
package/dist/classes/message/AssistantMessage.js +3 -0
package/dist/classes/message/DeveloperMessage.d.ts +1 -0
package/dist/classes/message/DeveloperMessage.js +3 -0
package/dist/classes/message/SystemMessage.d.ts +1 -0
package/dist/classes/message/SystemMessage.js +3 -0
package/dist/classes/message/ToolMessage.d.ts +1 -0
package/dist/classes/message/ToolMessage.js +3 -0
package/dist/classes/message/UserMessage.d.ts +1 -0
package/dist/classes/message/UserMessage.js +3 -0
package/dist/clients/google.d.ts +4 -1
package/dist/clients/google.js +77 -4
package/dist/clients/ollama.d.ts +3 -1
package/dist/clients/ollama.js +103 -1
package/dist/clients/openai.d.ts +1 -0
package/dist/clients/openai.js +34 -2
package/dist/models.d.ts +47 -22
package/dist/models.js +39 -11
package/dist/types.d.ts +15 -0
package/dist/util.d.ts +1 -0
package/dist/util.js +4 -0
package/package.json +1 -1

package/dist/classes/message/AssistantMessage.d.ts CHANGED Viewed

@@ -20,6 +20,7 @@ export declare class AssistantMessage extends BaseMessage implements MessageClas
         rawData?: any;
     });
     get content(): string;
+    set content(value: string);
     get role(): "assistant";
     get name(): string | undefined;
     get audio(): any | null | undefined;

package/dist/classes/message/AssistantMessage.js CHANGED Viewed

@@ -25,6 +25,9 @@ export class AssistantMessage extends BaseMessage {
             ? this._content
             : JSON.stringify(this._content);
     }
+    set content(value) {
+        this._content = value;
+    }
     get role() {
         return this._role;
     }

package/dist/classes/message/DeveloperMessage.d.ts CHANGED Viewed

@@ -13,6 +13,7 @@ export declare class DeveloperMessage extends BaseMessage implements MessageClas
         rawData?: any;
     });
     get content(): string;
+    set content(value: string);
     get role(): "developer";
     get name(): string | undefined;
     get rawData(): any;

package/dist/classes/message/DeveloperMessage.js CHANGED Viewed

@@ -15,6 +15,9 @@ export class DeveloperMessage extends BaseMessage {
             ? this._content
             : JSON.stringify(this._content);
     }
+    set content(value) {
+        this._content = value;
+    }
     get role() {
         return this._role;
     }

package/dist/classes/message/SystemMessage.d.ts CHANGED Viewed

@@ -13,6 +13,7 @@ export declare class SystemMessage extends BaseMessage implements MessageClass {
         rawData?: any;
     });
     get content(): string;
+    set content(value: string);
     get role(): "system";
     get name(): string | undefined;
     get rawData(): any;

package/dist/classes/message/SystemMessage.js CHANGED Viewed

@@ -15,6 +15,9 @@ export class SystemMessage extends BaseMessage {
             ? this._content
             : JSON.stringify(this._content);
     }
+    set content(value) {
+        this._content = value;
+    }
     get role() {
         return this._role;
     }

package/dist/classes/message/ToolMessage.d.ts CHANGED Viewed

@@ -15,6 +15,7 @@ export declare class ToolMessage extends BaseMessage implements MessageClass {
         name: string;
     });
     get content(): string;
+    set content(value: string);
     get role(): "tool";
     get name(): string;
     get tool_call_id(): string;

package/dist/classes/message/ToolMessage.js CHANGED Viewed

@@ -17,6 +17,9 @@ export class ToolMessage extends BaseMessage {
             ? this._content
             : JSON.stringify(this._content);
     }
+    set content(value) {
+        this._content = value;
+    }
     get role() {
         return this._role;
     }

package/dist/classes/message/UserMessage.d.ts CHANGED Viewed

@@ -12,6 +12,7 @@ export declare class UserMessage extends BaseMessage implements MessageClass {
         rawData?: any;
     });
     get content(): string;
+    set content(value: string);
     get role(): "user";
     get name(): string | undefined;
     get rawData(): any;

package/dist/classes/message/UserMessage.js CHANGED Viewed

@@ -13,6 +13,9 @@ export class UserMessage extends BaseMessage {
     get content() {
         return this._content;
     }
+    set content(value) {
+        this._content = value;
+    }
     get role() {
         return this._role;
     }

package/dist/clients/google.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { GoogleGenAI } from "@google/genai";
-import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient } from "../types.js";
+import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
 import { BaseClient } from "./baseClient.js";
 export type SmolGoogleConfig = BaseClientConfig;
 export declare class SmolGoogle extends BaseClient implements SmolClient {
@@ -9,5 +9,8 @@ export declare class SmolGoogle extends BaseClient implements SmolClient {
     constructor(config: SmolGoogleConfig);
     getClient(): GoogleGenAI;
     getModel(): string;
+    private calculateUsageAndCost;
+    private buildRequest;
     _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
+    _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
 }

package/dist/clients/google.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { getLogger } from "../logger.js";
 import { success, } from "../types.js";
 import { zodToGoogleTool } from "../util/tool.js";
 import { BaseClient } from "./baseClient.js";
+import { calculateCost } from "../models.js";
 export class SmolGoogle extends BaseClient {
     client;
     logger;
@@ -23,7 +24,24 @@ export class SmolGoogle extends BaseClient {
     getModel() {
         return this.model;
     }
-    async _textSync(config) {
+    calculateUsageAndCost(usageMetadata) {
+        let usage;
+        let cost;
+        if (usageMetadata) {
+            usage = {
+                inputTokens: usageMetadata.promptTokenCount || 0,
+                outputTokens: usageMetadata.candidatesTokenCount || 0,
+                cachedInputTokens: usageMetadata.cachedContentTokenCount,
+                totalTokens: usageMetadata.totalTokenCount,
+            };
+            const calculatedCost = calculateCost(this.model, usage);
+            if (calculatedCost) {
+                cost = calculatedCost;
+            }
+        }
+        return { usage, cost };
+    }
+    buildRequest(config) {
         const messages = config.messages.map((msg) => msg.toGoogleMessage());
         const tools = (config.tools || []).map((tool) => {
             return zodToGoogleTool(tool.name, tool.schema, {
@@ -38,13 +56,18 @@ export class SmolGoogle extends BaseClient {
             genConfig.responseMimeType = "application/json";
             genConfig.responseJsonSchema = config.responseFormat.toJSONSchema();
         }
-        const request = {
+        return {
             contents: messages,
             model: this.model,
             config: genConfig,
-            stream: config.stream || false,
             ...(config.rawAttributes || {}),
         };
+    }
+    async _textSync(config) {
+        const request = {
+            ...this.buildRequest(config),
+            stream: config.stream || false,
+        };
         this.logger.debug("Sending request to Google Gemini:", JSON.stringify(request, null, 2));
         // Send the prompt as the latest message
         const result = await this.client.models.generateContent(request);
@@ -61,7 +84,57 @@ export class SmolGoogle extends BaseClient {
                 });
             }
         });
+        // Extract usage and calculate cost
+        const { usage, cost } = this.calculateUsageAndCost(result.usageMetadata);
         // Return the response, updating the chat history
-        return success({ output, toolCalls });
+        return success({ output, toolCalls, usage, cost });
+    }
+    async *_textStream(config) {
+        const request = this.buildRequest(config);
+        this.logger.debug("Sending streaming request to Google Gemini:", JSON.stringify(request, null, 2));
+        const stream = await this.client.models.generateContentStream(request);
+        let content = "";
+        const toolCallsMap = new Map();
+        let usage;
+        let cost;
+        for await (const chunk of stream) {
+            // Extract usage metadata from chunks
+            if (chunk.usageMetadata) {
+                const usageAndCost = this.calculateUsageAndCost(chunk.usageMetadata);
+                usage = usageAndCost.usage;
+                cost = usageAndCost.cost;
+            }
+            // Handle text content
+            if (chunk.text) {
+                content += chunk.text;
+                yield { type: "text", text: chunk.text };
+            }
+            // Handle function calls
+            if (chunk.functionCalls) {
+                for (const functionCall of chunk.functionCalls) {
+                    const id = functionCall.id || functionCall.name || "";
+                    const name = functionCall.name || "";
+                    if (!toolCallsMap.has(id)) {
+                        toolCallsMap.set(id, {
+                            id: id,
+                            name: name,
+                            arguments: functionCall.args,
+                        });
+                    }
+                }
+            }
+        }
+        this.logger.debug("Streaming response completed from Google Gemini");
+        // Yield tool calls
+        const toolCalls = [];
+        for (const tc of toolCallsMap.values()) {
+            const toolCall = new ToolCall(tc.id, tc.name, tc.arguments);
+            toolCalls.push(toolCall);
+            yield { type: "tool_call", toolCall };
+        }
+        yield {
+            type: "done",
+            result: { output: content || null, toolCalls, usage, cost },
+        };
     }
 }

package/dist/clients/ollama.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { Ollama } from "ollama";
-import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient } from "../types.js";
+import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
 import { BaseClient } from "./baseClient.js";
 export declare const DEFAULT_OLLAMA_HOST = "http://localhost:11434";
 export type SmolOllamaConfig = BaseClientConfig;
@@ -10,5 +10,7 @@ export declare class SmolOllama extends BaseClient implements SmolClient {
     constructor(config: SmolOllamaConfig);
     getClient(): Ollama;
     getModel(): string;
+    private calculateUsageAndCost;
     _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
+    _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
 }

package/dist/clients/ollama.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { getLogger } from "../logger.js";
 import { success, } from "../types.js";
 import { zodToGoogleTool } from "../util/tool.js";
 import { BaseClient } from "./baseClient.js";
+import { calculateCost } from "../models.js";
 export const DEFAULT_OLLAMA_HOST = "http://localhost:11434";
 export class SmolOllama extends BaseClient {
     logger;
@@ -30,6 +31,24 @@ export class SmolOllama extends BaseClient {
     getModel() {
         return this.model;
     }
+    calculateUsageAndCost(responseData) {
+        let usage;
+        let cost;
+        if (responseData) {
+            const inputTokens = responseData.prompt_eval_count || 0;
+            const outputTokens = responseData.eval_count || 0;
+            usage = {
+                inputTokens,
+                outputTokens,
+                totalTokens: inputTokens + outputTokens,
+            };
+            const calculatedCost = calculateCost(this.model, usage);
+            if (calculatedCost) {
+                cost = calculatedCost;
+            }
+        }
+        return { usage, cost };
+    }
     async _textSync(config) {
         const messages = config.messages.map((msg) => msg.toOpenAIMessage());
         const tools = (config.tools || []).map((tool) => {
@@ -62,7 +81,90 @@ export class SmolOllama extends BaseClient {
                 toolCalls.push(new ToolCall(tool_call.id, tool_call.function.name, tool_call.function.arguments || {}));
             }
         }
+        // Extract usage and calculate cost
+        const { usage, cost } = this.calculateUsageAndCost(result);
         // Return the response, updating the chat history
-        return success({ output, toolCalls });
+        return success({ output, toolCalls, usage, cost });
+    }
+    async *_textStream(config) {
+        const messages = config.messages.map((msg) => msg.toOpenAIMessage());
+        const tools = (config.tools || []).map((tool) => {
+            return zodToGoogleTool(tool.name, tool.schema, {
+                description: tool.description,
+            });
+        });
+        const request = {
+            messages: messages,
+            model: this.model,
+            stream: true,
+        };
+        if (tools.length > 0) {
+            request.tools = tools.map((t) => ({ type: "function", function: t }));
+        }
+        if (config.responseFormat) {
+            request.format = config.responseFormat.toJSONSchema();
+        }
+        if (config.rawAttributes) {
+            Object.assign(request, config.rawAttributes);
+        }
+        this.logger.debug("Sending streaming request to Ollama:", JSON.stringify(request, null, 2));
+        // @ts-ignore
+        const stream = await this.client.chat(request);
+        let content = "";
+        const toolCallsMap = new Map();
+        let usage;
+        let cost;
+        let lastChunk;
+        for await (const chunk of stream) {
+            lastChunk = chunk;
+            // Handle text content
+            if (chunk.message?.content) {
+                content += chunk.message.content;
+                yield { type: "text", text: chunk.message.content };
+            }
+            // Handle tool calls
+            if (chunk.message?.tool_calls) {
+                for (const tc of chunk.message.tool_calls) {
+                    const tool_call = tc;
+                    const id = tool_call.id || tool_call.function.name || "";
+                    const name = tool_call.function.name || "";
+                    if (!toolCallsMap.has(id)) {
+                        toolCallsMap.set(id, {
+                            id: id,
+                            name: name,
+                            arguments: tool_call.function.arguments || {},
+                        });
+                    }
+                    else {
+                        // Merge arguments if tool call is split across chunks
+                        const existing = toolCallsMap.get(id);
+                        if (tool_call.function.arguments) {
+                            existing.arguments = {
+                                ...existing.arguments,
+                                ...tool_call.function.arguments,
+                            };
+                        }
+                    }
+                }
+            }
+        }
+        this.logger.debug("Streaming response completed from Ollama");
+        // Extract usage from the last chunk
+        if (lastChunk) {
+            const usageAndCost = this.calculateUsageAndCost(lastChunk);
+            usage = usageAndCost.usage;
+            cost = usageAndCost.cost;
+        }
+        // Yield tool calls
+        const toolCalls = [];
+        for (const tc of toolCallsMap.values()) {
+            const toolCall = new ToolCall(tc.id, tc.name, tc.arguments);
+            toolCalls.push(toolCall);
+            yield { type: "tool_call", toolCall };
+        }
+        yield {
+            type: "done",
+            result: { output: content || null, toolCalls, usage, cost },
+        };
     }
 }

package/dist/clients/openai.d.ts CHANGED Viewed

@@ -9,6 +9,7 @@ export declare class SmolOpenAi extends BaseClient implements SmolClient {
     constructor(config: SmolOpenAiConfig);
     getClient(): OpenAI;
     getModel(): string;
+    private calculateUsageAndCost;
     private buildRequest;
     _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
     _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;

package/dist/clients/openai.js CHANGED Viewed

@@ -5,6 +5,7 @@ import { isFunctionToolCall } from "../util.js";
 import { getLogger } from "../logger.js";
 import { BaseClient } from "./baseClient.js";
 import { zodToOpenAITool } from "../util/tool.js";
+import { calculateCost } from "../models.js";
 export class SmolOpenAi extends BaseClient {
     client;
     logger;
@@ -24,6 +25,23 @@ export class SmolOpenAi extends BaseClient {
     getModel() {
         return this.model;
     }
+    calculateUsageAndCost(usageData) {
+        let usage;
+        let cost;
+        if (usageData) {
+            usage = {
+                inputTokens: usageData.prompt_tokens || 0,
+                outputTokens: usageData.completion_tokens || 0,
+                cachedInputTokens: usageData.prompt_tokens_details?.cached_tokens,
+                totalTokens: usageData.total_tokens,
+            };
+            const calculatedCost = calculateCost(this.model, usage);
+            if (calculatedCost) {
+                cost = calculatedCost;
+            }
+        }
+        return { usage, cost };
+    }
     buildRequest(config) {
         const messages = config.messages.map((msg) => msg.toOpenAIMessage());
         const request = {
@@ -69,7 +87,9 @@ export class SmolOpenAi extends BaseClient {
                 }
             }
         }
-        return success({ output, toolCalls });
+        // Extract usage and calculate cost
+        const { usage, cost } = this.calculateUsageAndCost(completion.usage);
+        return success({ output, toolCalls, usage, cost });
     }
     async *_textStream(config) {
         const request = this.buildRequest(config);
@@ -77,11 +97,20 @@ export class SmolOpenAi extends BaseClient {
         const completion = await this.client.chat.completions.create({
             ...request,
             stream: true,
+            stream_options: { include_usage: true },
         });
         let content = "";
         const toolCallsMap = new Map();
+        let usage;
+        let cost;
         for await (const chunk of completion) {
             const delta = chunk.choices[0]?.delta;
+            // Extract usage from the final chunk
+            if (chunk.usage) {
+                const usageAndCost = this.calculateUsageAndCost(chunk.usage);
+                usage = usageAndCost.usage;
+                cost = usageAndCost.cost;
+            }
             if (!delta)
                 continue;
             if (delta.content) {
@@ -117,6 +146,9 @@ export class SmolOpenAi extends BaseClient {
             toolCalls.push(toolCall);
             yield { type: "tool_call", toolCall };
         }
-        yield { type: "done", result: { output: content || null, toolCalls } };
+        yield {
+            type: "done",
+            result: { output: content || null, toolCalls, usage, cost },
+        };
     }
 }

package/dist/models.d.ts CHANGED Viewed

@@ -63,11 +63,11 @@ export declare const textModels: readonly [{
 }, {
     readonly type: "text";
     readonly modelName: "o3";
-    readonly description: "o3 is a reasoning model that sets a new standard for math, science, and coding, visual reasoning tasks, and technical writing. Part of the o-series of reasoning models. The knowledge cutoff for o3 models is October, 2023.";
+    readonly description: "o3 is a reasoning model that sets a new standard for math, science, coding, visual reasoning tasks, and technical writing. Part of the o-series of reasoning models. The knowledge cutoff for o3 models is October, 2023.";
     readonly maxInputTokens: 200000;
     readonly maxOutputTokens: 100000;
     readonly inputTokenCost: 2;
-    readonly cachedInputTokenCost: 1;
+    readonly cachedInputTokenCost: 0.5;
     readonly outputTokenCost: 8;
     readonly provider: "openai";
 }, {
@@ -83,7 +83,7 @@ export declare const textModels: readonly [{
 }, {
     readonly type: "text";
     readonly modelName: "o4-mini";
-    readonly description: "o4-mini is a new o-series reasoning model that replaced o3-mini, providing excellent performance for math, science, and coding tasks. Available in ChatGPT Plus, Pro, and Team.";
+    readonly description: "Latest small o-series model optimized for fast, effective reasoning with exceptional performance in coding and visual tasks. Knowledge cutoff: June 2024.";
     readonly maxInputTokens: 200000;
     readonly maxOutputTokens: 100000;
     readonly inputTokenCost: 1.1;
@@ -143,25 +143,25 @@ export declare const textModels: readonly [{
 }, {
     readonly type: "text";
     readonly modelName: "gemini-3-pro-preview";
-    readonly description: "Strongest Gemini 3 model quality with 2M context window. Standard pricing for ≤200k tokens, higher rates for >200k tokens.";
-    readonly maxInputTokens: 2097152;
-    readonly maxOutputTokens: 8192;
+    readonly description: "Strongest Gemini 3 model quality with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Nov 2025, currently in preview.";
+    readonly maxInputTokens: 1048576;
+    readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 2;
     readonly outputTokenCost: 12;
     readonly provider: "google";
 }, {
     readonly type: "text";
     readonly modelName: "gemini-3-flash-preview";
-    readonly description: "Latest Gemini 3 flash model with 1M context window. Excellent performance for high-volume tasks.";
+    readonly description: "Latest Gemini 3 flash model with 1M context window and 64K output. Outperforms 2.5 Pro while being 3x faster. Optimized for agentic workflows and coding. Includes context caching for 90% cost reductions.";
     readonly maxInputTokens: 1048576;
-    readonly maxOutputTokens: 8192;
+    readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 0.5;
     readonly outputTokenCost: 3;
     readonly provider: "google";
 }, {
     readonly type: "text";
     readonly modelName: "gemini-2.5-pro";
-    readonly description: "High-performance Gemini 2.5 model with 2M context window. Standard pricing for ≤200k tokens, higher rates for >200k tokens.";
+    readonly description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/higher output). Batch API: 50% discount.";
     readonly maxInputTokens: 2097152;
     readonly maxOutputTokens: 8192;
     readonly inputTokenCost: 1.25;
@@ -170,7 +170,7 @@ export declare const textModels: readonly [{
 }, {
     readonly type: "text";
     readonly modelName: "gemini-2.5-flash";
-    readonly description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. 1M context window with free tier available.";
+    readonly description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.";
     readonly maxInputTokens: 1048576;
     readonly maxOutputTokens: 8192;
     readonly inputTokenCost: 0.3;
@@ -188,11 +188,12 @@ export declare const textModels: readonly [{
 }, {
     readonly type: "text";
     readonly modelName: "gemini-2.0-flash";
-    readonly description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window.";
+    readonly description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window. DEPRECATED: Will be shut down on March 31, 2026.";
     readonly maxInputTokens: 1048576;
     readonly maxOutputTokens: 8192;
     readonly inputTokenCost: 0.1;
     readonly outputTokenCost: 0.4;
+    readonly disabled: true;
     readonly provider: "google";
 }, {
     readonly type: "text";
@@ -316,6 +317,12 @@ export declare const imageModels: readonly [{
     readonly provider: "google";
     readonly description: "aka nano-banana";
     readonly costPerImage: 0.04;
+}, {
+    readonly type: "image";
+    readonly modelName: "gemini-3-pro-image-preview";
+    readonly provider: "google";
+    readonly description: "High-fidelity image generation with reasoning-enhanced composition. Supports legible text rendering, complex multi-turn editing, and character consistency using up to 14 reference inputs.";
+    readonly costPerImage: 0.05;
 }];
 export declare const embeddingsModels: {
     type: string;
@@ -359,11 +366,11 @@ export declare function getModel(modelName: ModelName): {
 } | {
     readonly type: "text";
     readonly modelName: "o3";
-    readonly description: "o3 is a reasoning model that sets a new standard for math, science, and coding, visual reasoning tasks, and technical writing. Part of the o-series of reasoning models. The knowledge cutoff for o3 models is October, 2023.";
+    readonly description: "o3 is a reasoning model that sets a new standard for math, science, coding, visual reasoning tasks, and technical writing. Part of the o-series of reasoning models. The knowledge cutoff for o3 models is October, 2023.";
     readonly maxInputTokens: 200000;
     readonly maxOutputTokens: 100000;
     readonly inputTokenCost: 2;
-    readonly cachedInputTokenCost: 1;
+    readonly cachedInputTokenCost: 0.5;
     readonly outputTokenCost: 8;
     readonly provider: "openai";
 } | {
@@ -379,7 +386,7 @@ export declare function getModel(modelName: ModelName): {
 } | {
     readonly type: "text";
     readonly modelName: "o4-mini";
-    readonly description: "o4-mini is a new o-series reasoning model that replaced o3-mini, providing excellent performance for math, science, and coding tasks. Available in ChatGPT Plus, Pro, and Team.";
+    readonly description: "Latest small o-series model optimized for fast, effective reasoning with exceptional performance in coding and visual tasks. Knowledge cutoff: June 2024.";
     readonly maxInputTokens: 200000;
     readonly maxOutputTokens: 100000;
     readonly inputTokenCost: 1.1;
@@ -439,25 +446,25 @@ export declare function getModel(modelName: ModelName): {
 } | {
     readonly type: "text";
     readonly modelName: "gemini-3-pro-preview";
-    readonly description: "Strongest Gemini 3 model quality with 2M context window. Standard pricing for ≤200k tokens, higher rates for >200k tokens.";
-    readonly maxInputTokens: 2097152;
-    readonly maxOutputTokens: 8192;
+    readonly description: "Strongest Gemini 3 model quality with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Nov 2025, currently in preview.";
+    readonly maxInputTokens: 1048576;
+    readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 2;
     readonly outputTokenCost: 12;
     readonly provider: "google";
 } | {
     readonly type: "text";
     readonly modelName: "gemini-3-flash-preview";
-    readonly description: "Latest Gemini 3 flash model with 1M context window. Excellent performance for high-volume tasks.";
+    readonly description: "Latest Gemini 3 flash model with 1M context window and 64K output. Outperforms 2.5 Pro while being 3x faster. Optimized for agentic workflows and coding. Includes context caching for 90% cost reductions.";
     readonly maxInputTokens: 1048576;
-    readonly maxOutputTokens: 8192;
+    readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 0.5;
     readonly outputTokenCost: 3;
     readonly provider: "google";
 } | {
     readonly type: "text";
     readonly modelName: "gemini-2.5-pro";
-    readonly description: "High-performance Gemini 2.5 model with 2M context window. Standard pricing for ≤200k tokens, higher rates for >200k tokens.";
+    readonly description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/higher output). Batch API: 50% discount.";
     readonly maxInputTokens: 2097152;
     readonly maxOutputTokens: 8192;
     readonly inputTokenCost: 1.25;
@@ -466,7 +473,7 @@ export declare function getModel(modelName: ModelName): {
 } | {
     readonly type: "text";
     readonly modelName: "gemini-2.5-flash";
-    readonly description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. 1M context window with free tier available.";
+    readonly description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.";
     readonly maxInputTokens: 1048576;
     readonly maxOutputTokens: 8192;
     readonly inputTokenCost: 0.3;
@@ -484,11 +491,12 @@ export declare function getModel(modelName: ModelName): {
 } | {
     readonly type: "text";
     readonly modelName: "gemini-2.0-flash";
-    readonly description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window.";
+    readonly description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window. DEPRECATED: Will be shut down on March 31, 2026.";
     readonly maxInputTokens: 1048576;
     readonly maxOutputTokens: 8192;
     readonly inputTokenCost: 0.1;
     readonly outputTokenCost: 0.4;
+    readonly disabled: true;
     readonly provider: "google";
 } | {
     readonly type: "text";
@@ -611,8 +619,25 @@ export declare function getModel(modelName: ModelName): {
     readonly provider: "google";
     readonly description: "aka nano-banana";
     readonly costPerImage: 0.04;
+} | {
+    readonly type: "image";
+    readonly modelName: "gemini-3-pro-image-preview";
+    readonly provider: "google";
+    readonly description: "High-fidelity image generation with reasoning-enhanced composition. Supports legible text rendering, complex multi-turn editing, and character consistency using up to 14 reference inputs.";
+    readonly costPerImage: 0.05;
 } | undefined;
 export declare function isImageModel(model: Model): model is ImageModel;
 export declare function isTextModel(model: Model): model is TextModel;
 export declare function isSpeechToTextModel(model: Model): model is SpeechToTextModel;
 export declare function isEmbeddingsModel(model: Model): model is EmbeddingsModel;
+export declare function calculateCost(modelName: ModelName, usage: {
+    inputTokens: number;
+    outputTokens: number;
+    cachedInputTokens?: number;
+}): {
+    inputCost: number;
+    outputCost: number;
+    cachedInputCost?: number;
+    totalCost: number;
+    currency: string;
+} | null;

package/dist/models.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import { round } from "./util.js";
 export const speechToTextModels = [
     { type: "speech-to-text", modelName: "whisper-local", provider: "local" },
     {
@@ -43,11 +44,11 @@ export const textModels = [
     {
         type: "text",
         modelName: "o3",
-        description: "o3 is a reasoning model that sets a new standard for math, science, and coding, visual reasoning tasks, and technical writing. Part of the o-series of reasoning models. The knowledge cutoff for o3 models is October, 2023.",
+        description: "o3 is a reasoning model that sets a new standard for math, science, coding, visual reasoning tasks, and technical writing. Part of the o-series of reasoning models. The knowledge cutoff for o3 models is October, 2023.",
         maxInputTokens: 200000,
         maxOutputTokens: 100000,
         inputTokenCost: 2,
-        cachedInputTokenCost: 1,
+        cachedInputTokenCost: 0.5,
         outputTokenCost: 8,
         provider: "openai",
     },
@@ -65,7 +66,7 @@ export const textModels = [
     {
         type: "text",
         modelName: "o4-mini",
-        description: "o4-mini is a new o-series reasoning model that replaced o3-mini, providing excellent performance for math, science, and coding tasks. Available in ChatGPT Plus, Pro, and Team.",
+        description: "Latest small o-series model optimized for fast, effective reasoning with exceptional performance in coding and visual tasks. Knowledge cutoff: June 2024.",
         maxInputTokens: 200000,
         maxOutputTokens: 100000,
         inputTokenCost: 1.1,
@@ -131,9 +132,9 @@ export const textModels = [
     {
         type: "text",
         modelName: "gemini-3-pro-preview",
-        description: "Strongest Gemini 3 model quality with 2M context window. Standard pricing for ≤200k tokens, higher rates for >200k tokens.",
-        maxInputTokens: 2_097_152,
-        maxOutputTokens: 8192,
+        description: "Strongest Gemini 3 model quality with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Nov 2025, currently in preview.",
+        maxInputTokens: 1_048_576,
+        maxOutputTokens: 65536,
         inputTokenCost: 2.0,
         outputTokenCost: 12.0,
         provider: "google",
@@ -141,9 +142,9 @@ export const textModels = [
     {
         type: "text",
         modelName: "gemini-3-flash-preview",
-        description: "Latest Gemini 3 flash model with 1M context window. Excellent performance for high-volume tasks.",
+        description: "Latest Gemini 3 flash model with 1M context window and 64K output. Outperforms 2.5 Pro while being 3x faster. Optimized for agentic workflows and coding. Includes context caching for 90% cost reductions.",
         maxInputTokens: 1_048_576,
-        maxOutputTokens: 8192,
+        maxOutputTokens: 65536,
         inputTokenCost: 0.5,
         outputTokenCost: 3.0,
         provider: "google",
@@ -151,7 +152,7 @@ export const textModels = [
     {
         type: "text",
         modelName: "gemini-2.5-pro",
-        description: "High-performance Gemini 2.5 model with 2M context window. Standard pricing for ≤200k tokens, higher rates for >200k tokens.",
+        description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/higher output). Batch API: 50% discount.",
         maxInputTokens: 2_097_152,
         maxOutputTokens: 8192,
         inputTokenCost: 1.25,
@@ -161,7 +162,7 @@ export const textModels = [
     {
         type: "text",
         modelName: "gemini-2.5-flash",
-        description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. 1M context window with free tier available.",
+        description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.",
         maxInputTokens: 1_048_576,
         maxOutputTokens: 8192,
         inputTokenCost: 0.3,
@@ -181,11 +182,12 @@ export const textModels = [
     {
         type: "text",
         modelName: "gemini-2.0-flash",
-        description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window.",
+        description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window. DEPRECATED: Will be shut down on March 31, 2026.",
         maxInputTokens: 1_048_576,
         maxOutputTokens: 8192,
         inputTokenCost: 0.1,
         outputTokenCost: 0.4,
+        disabled: true,
         provider: "google",
     },
     {
@@ -359,6 +361,13 @@ export const imageModels = [
         description: "aka nano-banana",
         costPerImage: 0.04,
     },
+    {
+        type: "image",
+        modelName: "gemini-3-pro-image-preview",
+        provider: "google",
+        description: "High-fidelity image generation with reasoning-enhanced composition. Supports legible text rendering, complex multi-turn editing, and character consistency using up to 14 reference inputs.",
+        costPerImage: 0.05,
+    },
 ];
 export const embeddingsModels = [
     { type: "embeddings", modelName: "text-embedding-3-small", tokenCost: 0.02 },
@@ -379,3 +388,22 @@ export function isSpeechToTextModel(model) {
 export function isEmbeddingsModel(model) {
     return model.type === "embeddings";
 }
+export function calculateCost(modelName, usage) {
+    const model = getModel(modelName);
+    if (!model || !isTextModel(model)) {
+        return null;
+    }
+    const inputCost = round((usage.inputTokens * (model.inputTokenCost || 0)) / 1_000_000, 2);
+    const outputCost = round((usage.outputTokens * (model.outputTokenCost || 0)) / 1_000_000, 2);
+    const cachedInputCost = usage.cachedInputTokens && model.cachedInputTokenCost
+        ? round((usage.cachedInputTokens * model.cachedInputTokenCost) / 1_000_000, 2)
+        : undefined;
+    const totalCost = round(inputCost + outputCost + (cachedInputCost || 0), 2);
+    return {
+        inputCost,
+        outputCost,
+        cachedInputCost,
+        totalCost,
+        currency: "USD",
+    };
+}

package/dist/types.d.ts CHANGED Viewed

@@ -43,9 +43,24 @@ export type ToolLoopDetection = {
     excludeTools?: string[];
 };
 export type BaseClientConfig = SmolConfig & {};
+export type TokenUsage = {
+    inputTokens: number;
+    outputTokens: number;
+    cachedInputTokens?: number;
+    totalTokens?: number;
+};
+export type CostEstimate = {
+    inputCost: number;
+    outputCost: number;
+    cachedInputCost?: number;
+    totalCost: number;
+    currency: string;
+};
 export type PromptResult = {
     output: string | null;
     toolCalls: ToolCall[];
+    usage?: TokenUsage;
+    cost?: CostEstimate;
 };
 export type StreamChunk = {
     type: "text";

package/dist/util.d.ts CHANGED Viewed

	@@ -1 +1,2 @@
1 1	export * from "./util/openai.js";
2	+ export declare function round(num: number, places: number): number;

package/dist/util.js CHANGED Viewed

@@ -1 +1,5 @@
 export * from "./util/openai.js";
+export function round(num, places) {
+    const factor = Math.pow(10, places);
+    return Math.round(num * factor) / factor;
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "smoltalk",
-  "version": "0.0.16",
+  "version": "0.0.18",
   "description": "A common interface for LLM APIs",
   "homepage": "https://github.com/egonSchiele/smoltalk",
   "scripts": {