npm - smoltalk - Versions diffs - 0.0.21 → 0.0.23 - Mend

smoltalk 0.0.21 → 0.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/clients/baseClient.d.ts +1 -0
package/dist/clients/baseClient.js +23 -0
package/dist/models.d.ts +124 -29
package/dist/models.js +71 -15
package/dist/types.d.ts +1 -0
package/package.json +1 -1

package/dist/clients/baseClient.d.ts CHANGED Viewed

@@ -10,6 +10,7 @@ export declare class BaseClient implements SmolClient {
         stream: true;
     }): AsyncGenerator<StreamChunk>;
     text(promptConfig: PromptConfig): Promise<Result<PromptResult>> | AsyncGenerator<StreamChunk>;
+    checkMessageLimit(promptConfig: PromptConfig): Result<PromptResult> | null;
     textSync(promptConfig: PromptConfig): Promise<Result<PromptResult>>;
     checkForToolLoops(promptConfig: PromptConfig): {
         continue: boolean;

package/dist/clients/baseClient.js CHANGED Viewed

@@ -14,7 +14,22 @@ export class BaseClient {
             return this.textSync(promptConfig);
         }
     }
+    checkMessageLimit(promptConfig) {
+        if (promptConfig.maxMessages !== undefined &&
+            promptConfig.messages.length > promptConfig.maxMessages) {
+            const logger = getLogger();
+            logger.warn(`Message limit exceeded: ${promptConfig.messages.length} messages sent, but maxMessages is set to ${promptConfig.maxMessages}. Aborting request.`);
+            return {
+                success: false,
+                error: `Message limit exceeded: ${promptConfig.messages.length} messages exceeds the maxMessages limit of ${promptConfig.maxMessages}`,
+            };
+        }
+        return null;
+    }
     async textSync(promptConfig) {
+        const messageLimitResult = this.checkMessageLimit(promptConfig);
+        if (messageLimitResult)
+            return messageLimitResult;
         const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(promptConfig);
         if (!shouldContinue) {
             return {
@@ -97,6 +112,14 @@ export class BaseClient {
         return this.text(newPromptConfig);
     }
     async *textStream(config) {
+        const messageLimitResult = this.checkMessageLimit(config);
+        if (messageLimitResult) {
+            yield {
+                type: "error",
+                error: messageLimitResult.success === false ? messageLimitResult.error : "Message limit exceeded",
+            };
+            return;
+        }
         const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(config);
         if (!shouldContinue) {
             yield {

package/dist/models.d.ts CHANGED Viewed

@@ -92,10 +92,19 @@ export declare const textModels: readonly [{
     readonly maxInputTokens: 200000;
     readonly maxOutputTokens: 100000;
     readonly inputTokenCost: 1.1;
-    readonly cachedInputTokenCost: 0.55;
+    readonly cachedInputTokenCost: 0.275;
     readonly outputTokenCost: 4.4;
     readonly outputTokensPerSecond: 135;
     readonly provider: "openai";
+}, {
+    readonly type: "text";
+    readonly modelName: "o3-pro";
+    readonly description: "o3-pro uses more compute for complex reasoning tasks. Available via Responses API only. Requests may take several minutes. Knowledge cutoff: June 2024.";
+    readonly maxInputTokens: 200000;
+    readonly maxOutputTokens: 100000;
+    readonly inputTokenCost: 20;
+    readonly outputTokenCost: 80;
+    readonly provider: "openai";
 }, {
     readonly type: "text";
     readonly modelName: "o1";
@@ -140,17 +149,50 @@ export declare const textModels: readonly [{
 }, {
     readonly type: "text";
     readonly modelName: "gpt-4.1";
-    readonly description: "GPT-4.1 supports up to 1 million tokens of context, representing a significant increase in context window capacity. Ideal for processing large documents and extended conversations.";
+    readonly description: "GPT-4.1 excels at instruction following and tool calling with 1M token context window. Knowledge cutoff: June 2024.";
     readonly maxInputTokens: 1047576;
     readonly maxOutputTokens: 32768;
-    readonly inputTokenCost: 2.5;
-    readonly cachedInputTokenCost: 1.25;
-    readonly outputTokenCost: 10;
+    readonly inputTokenCost: 2;
+    readonly cachedInputTokenCost: 0.5;
+    readonly outputTokenCost: 8;
+    readonly outputTokensPerSecond: 105;
+    readonly provider: "openai";
+}, {
+    readonly type: "text";
+    readonly modelName: "gpt-4.1-mini";
+    readonly description: "GPT-4.1 mini excels at instruction following and tool calling with 1M token context window and low latency. Knowledge cutoff: June 2024.";
+    readonly maxInputTokens: 1047576;
+    readonly maxOutputTokens: 32768;
+    readonly inputTokenCost: 0.4;
+    readonly cachedInputTokenCost: 0.1;
+    readonly outputTokenCost: 1.6;
+    readonly outputTokensPerSecond: 78;
+    readonly provider: "openai";
+}, {
+    readonly type: "text";
+    readonly modelName: "gpt-4.1-nano";
+    readonly description: "GPT-4.1 nano is the fastest and most affordable GPT-4.1 variant with 1M token context window. Knowledge cutoff: June 2024.";
+    readonly maxInputTokens: 1047576;
+    readonly maxOutputTokens: 32768;
+    readonly inputTokenCost: 0.1;
+    readonly cachedInputTokenCost: 0.025;
+    readonly outputTokenCost: 0.4;
+    readonly outputTokensPerSecond: 142;
     readonly provider: "openai";
+}, {
+    readonly type: "text";
+    readonly modelName: "gemini-3.1-pro-preview";
+    readonly description: "Latest Gemini 3.1 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Feb 2026.";
+    readonly maxInputTokens: 1048576;
+    readonly maxOutputTokens: 65536;
+    readonly inputTokenCost: 2;
+    readonly outputTokenCost: 12;
+    readonly outputTokensPerSecond: 112;
+    readonly provider: "google";
 }, {
     readonly type: "text";
     readonly modelName: "gemini-3-pro-preview";
-    readonly description: "Strongest Gemini 3 model quality with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Nov 2025, currently in preview.";
+    readonly description: "Gemini 3 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Superseded by 3.1 Pro.";
     readonly maxInputTokens: 1048576;
     readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 2;
@@ -168,9 +210,9 @@ export declare const textModels: readonly [{
 }, {
     readonly type: "text";
     readonly modelName: "gemini-2.5-pro";
-    readonly description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/higher output). Batch API: 50% discount.";
+    readonly description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/$15.00 output). Batch API: 50% discount.";
     readonly maxInputTokens: 2097152;
-    readonly maxOutputTokens: 8192;
+    readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 1.25;
     readonly outputTokenCost: 10;
     readonly outputTokensPerSecond: 175;
@@ -180,7 +222,7 @@ export declare const textModels: readonly [{
     readonly modelName: "gemini-2.5-flash";
     readonly description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.";
     readonly maxInputTokens: 1048576;
-    readonly maxOutputTokens: 8192;
+    readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 0.3;
     readonly outputTokenCost: 2.5;
     readonly outputTokensPerSecond: 225;
@@ -190,7 +232,7 @@ export declare const textModels: readonly [{
     readonly modelName: "gemini-2.5-flash-lite";
     readonly description: "Most cost-effective Gemini 2.5 option for high-throughput applications. 1M context window.";
     readonly maxInputTokens: 1048576;
-    readonly maxOutputTokens: 8192;
+    readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 0.1;
     readonly outputTokenCost: 0.4;
     readonly outputTokensPerSecond: 400;
@@ -219,43 +261,47 @@ export declare const textModels: readonly [{
 }, {
     readonly type: "text";
     readonly modelName: "gemini-2.0-flash-lite";
-    readonly description: "Cost effective offering to support high throughput. Note: May be deprecated in favor of 2.5-flash-lite.";
+    readonly description: "Cost effective offering to support high throughput. DEPRECATED: Will be shut down on March 31, 2026. Use gemini-2.5-flash-lite instead.";
     readonly maxInputTokens: 1048576;
     readonly maxOutputTokens: 8192;
     readonly inputTokenCost: 0.075;
     readonly outputTokenCost: 0.3;
+    readonly disabled: true;
     readonly provider: "google";
 }, {
     readonly type: "text";
     readonly modelName: "gemini-1.5-flash";
-    readonly description: "Provides speed and efficiency for high-volume, quality, cost-effective apps. Note: prices ~double after the first 128k tokens.";
+    readonly description: "RETIRED: No longer available. Use gemini-2.5-flash instead.";
     readonly maxInputTokens: 1048576;
     readonly maxOutputTokens: 8192;
     readonly inputTokenCost: 0.01875;
     readonly outputTokenCost: 0.075;
     readonly outputTokensPerSecond: 178;
     readonly costUnit: "characters";
+    readonly disabled: true;
     readonly provider: "google";
 }, {
     readonly type: "text";
     readonly modelName: "gemini-1.5-pro";
-    readonly description: "Supports text or chat prompts for a text or code response. Supports long-context understanding up to the maximum input token limit. Also does video?";
+    readonly description: "RETIRED: No longer available. Use gemini-2.5-pro instead.";
     readonly maxInputTokens: 2097152;
     readonly maxOutputTokens: 8192;
     readonly inputTokenCost: 0.3125;
     readonly outputTokenCost: 1.25;
     readonly outputTokensPerSecond: 59;
     readonly costUnit: "characters";
+    readonly disabled: true;
     readonly provider: "google";
 }, {
     readonly type: "text";
     readonly modelName: "gemini-1.0-pro";
-    readonly description: "The best performing model for a wide range of text-only tasks.";
+    readonly description: "RETIRED: No longer available. Use gemini-2.5-flash instead.";
     readonly maxInputTokens: 32760;
     readonly maxOutputTokens: 8192;
     readonly inputTokenCost: 0.125;
     readonly outputTokenCost: 0.375;
     readonly costUnit: "characters";
+    readonly disabled: true;
     readonly provider: "google";
 }, {
     readonly type: "text";
@@ -409,10 +455,19 @@ export declare function getModel(modelName: ModelName): {
     readonly maxInputTokens: 200000;
     readonly maxOutputTokens: 100000;
     readonly inputTokenCost: 1.1;
-    readonly cachedInputTokenCost: 0.55;
+    readonly cachedInputTokenCost: 0.275;
     readonly outputTokenCost: 4.4;
     readonly outputTokensPerSecond: 135;
     readonly provider: "openai";
+} | {
+    readonly type: "text";
+    readonly modelName: "o3-pro";
+    readonly description: "o3-pro uses more compute for complex reasoning tasks. Available via Responses API only. Requests may take several minutes. Knowledge cutoff: June 2024.";
+    readonly maxInputTokens: 200000;
+    readonly maxOutputTokens: 100000;
+    readonly inputTokenCost: 20;
+    readonly outputTokenCost: 80;
+    readonly provider: "openai";
 } | {
     readonly type: "text";
     readonly modelName: "o1";
@@ -457,17 +512,50 @@ export declare function getModel(modelName: ModelName): {
 } | {
     readonly type: "text";
     readonly modelName: "gpt-4.1";
-    readonly description: "GPT-4.1 supports up to 1 million tokens of context, representing a significant increase in context window capacity. Ideal for processing large documents and extended conversations.";
+    readonly description: "GPT-4.1 excels at instruction following and tool calling with 1M token context window. Knowledge cutoff: June 2024.";
     readonly maxInputTokens: 1047576;
     readonly maxOutputTokens: 32768;
-    readonly inputTokenCost: 2.5;
-    readonly cachedInputTokenCost: 1.25;
-    readonly outputTokenCost: 10;
+    readonly inputTokenCost: 2;
+    readonly cachedInputTokenCost: 0.5;
+    readonly outputTokenCost: 8;
+    readonly outputTokensPerSecond: 105;
+    readonly provider: "openai";
+} | {
+    readonly type: "text";
+    readonly modelName: "gpt-4.1-mini";
+    readonly description: "GPT-4.1 mini excels at instruction following and tool calling with 1M token context window and low latency. Knowledge cutoff: June 2024.";
+    readonly maxInputTokens: 1047576;
+    readonly maxOutputTokens: 32768;
+    readonly inputTokenCost: 0.4;
+    readonly cachedInputTokenCost: 0.1;
+    readonly outputTokenCost: 1.6;
+    readonly outputTokensPerSecond: 78;
+    readonly provider: "openai";
+} | {
+    readonly type: "text";
+    readonly modelName: "gpt-4.1-nano";
+    readonly description: "GPT-4.1 nano is the fastest and most affordable GPT-4.1 variant with 1M token context window. Knowledge cutoff: June 2024.";
+    readonly maxInputTokens: 1047576;
+    readonly maxOutputTokens: 32768;
+    readonly inputTokenCost: 0.1;
+    readonly cachedInputTokenCost: 0.025;
+    readonly outputTokenCost: 0.4;
+    readonly outputTokensPerSecond: 142;
     readonly provider: "openai";
+} | {
+    readonly type: "text";
+    readonly modelName: "gemini-3.1-pro-preview";
+    readonly description: "Latest Gemini 3.1 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Feb 2026.";
+    readonly maxInputTokens: 1048576;
+    readonly maxOutputTokens: 65536;
+    readonly inputTokenCost: 2;
+    readonly outputTokenCost: 12;
+    readonly outputTokensPerSecond: 112;
+    readonly provider: "google";
 } | {
     readonly type: "text";
     readonly modelName: "gemini-3-pro-preview";
-    readonly description: "Strongest Gemini 3 model quality with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Nov 2025, currently in preview.";
+    readonly description: "Gemini 3 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Superseded by 3.1 Pro.";
     readonly maxInputTokens: 1048576;
     readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 2;
@@ -485,9 +573,9 @@ export declare function getModel(modelName: ModelName): {
 } | {
     readonly type: "text";
     readonly modelName: "gemini-2.5-pro";
-    readonly description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/higher output). Batch API: 50% discount.";
+    readonly description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/$15.00 output). Batch API: 50% discount.";
     readonly maxInputTokens: 2097152;
-    readonly maxOutputTokens: 8192;
+    readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 1.25;
     readonly outputTokenCost: 10;
     readonly outputTokensPerSecond: 175;
@@ -497,7 +585,7 @@ export declare function getModel(modelName: ModelName): {
     readonly modelName: "gemini-2.5-flash";
     readonly description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.";
     readonly maxInputTokens: 1048576;
-    readonly maxOutputTokens: 8192;
+    readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 0.3;
     readonly outputTokenCost: 2.5;
     readonly outputTokensPerSecond: 225;
@@ -507,7 +595,7 @@ export declare function getModel(modelName: ModelName): {
     readonly modelName: "gemini-2.5-flash-lite";
     readonly description: "Most cost-effective Gemini 2.5 option for high-throughput applications. 1M context window.";
     readonly maxInputTokens: 1048576;
-    readonly maxOutputTokens: 8192;
+    readonly maxOutputTokens: 65536;
     readonly inputTokenCost: 0.1;
     readonly outputTokenCost: 0.4;
     readonly outputTokensPerSecond: 400;
@@ -536,43 +624,47 @@ export declare function getModel(modelName: ModelName): {
 } | {
     readonly type: "text";
     readonly modelName: "gemini-2.0-flash-lite";
-    readonly description: "Cost effective offering to support high throughput. Note: May be deprecated in favor of 2.5-flash-lite.";
+    readonly description: "Cost effective offering to support high throughput. DEPRECATED: Will be shut down on March 31, 2026. Use gemini-2.5-flash-lite instead.";
     readonly maxInputTokens: 1048576;
     readonly maxOutputTokens: 8192;
     readonly inputTokenCost: 0.075;
     readonly outputTokenCost: 0.3;
+    readonly disabled: true;
     readonly provider: "google";
 } | {
     readonly type: "text";
     readonly modelName: "gemini-1.5-flash";
-    readonly description: "Provides speed and efficiency for high-volume, quality, cost-effective apps. Note: prices ~double after the first 128k tokens.";
+    readonly description: "RETIRED: No longer available. Use gemini-2.5-flash instead.";
     readonly maxInputTokens: 1048576;
     readonly maxOutputTokens: 8192;
     readonly inputTokenCost: 0.01875;
     readonly outputTokenCost: 0.075;
     readonly outputTokensPerSecond: 178;
     readonly costUnit: "characters";
+    readonly disabled: true;
     readonly provider: "google";
 } | {
     readonly type: "text";
     readonly modelName: "gemini-1.5-pro";
-    readonly description: "Supports text or chat prompts for a text or code response. Supports long-context understanding up to the maximum input token limit. Also does video?";
+    readonly description: "RETIRED: No longer available. Use gemini-2.5-pro instead.";
     readonly maxInputTokens: 2097152;
     readonly maxOutputTokens: 8192;
     readonly inputTokenCost: 0.3125;
     readonly outputTokenCost: 1.25;
     readonly outputTokensPerSecond: 59;
     readonly costUnit: "characters";
+    readonly disabled: true;
     readonly provider: "google";
 } | {
     readonly type: "text";
     readonly modelName: "gemini-1.0-pro";
-    readonly description: "The best performing model for a wide range of text-only tasks.";
+    readonly description: "RETIRED: No longer available. Use gemini-2.5-flash instead.";
     readonly maxInputTokens: 32760;
     readonly maxOutputTokens: 8192;
     readonly inputTokenCost: 0.125;
     readonly outputTokenCost: 0.375;
     readonly costUnit: "characters";
+    readonly disabled: true;
     readonly provider: "google";
 } | {
     readonly type: "text";
@@ -663,9 +755,12 @@ export type Optimization = "speed" | "accuracy" | "cost" | "large-context";
 export type ModelConfig = {
     optimizeFor: Optimization[];
     providers: Provider[];
+    limit?: {
+        cost?: number;
+    };
 };
 export declare function isModelConfig(model: ModelName | ModelConfig): model is ModelConfig;
-export declare function pickModel(config: ModelConfig, models?: readonly (typeof textModels)[number][]): TextModelName;
+export declare function pickModel(config: ModelConfig, models?: readonly TextModel[]): TextModelName;
 export declare function calculateCost(modelName: ModelName, usage: {
     inputTokens: number;
     outputTokens: number;

package/dist/models.js CHANGED Viewed

@@ -75,11 +75,21 @@ export const textModels = [
         maxInputTokens: 200000,
         maxOutputTokens: 100000,
         inputTokenCost: 1.1,
-        cachedInputTokenCost: 0.55,
+        cachedInputTokenCost: 0.275,
         outputTokenCost: 4.4,
         outputTokensPerSecond: 135,
         provider: "openai",
     },
+    {
+        type: "text",
+        modelName: "o3-pro",
+        description: "o3-pro uses more compute for complex reasoning tasks. Available via Responses API only. Requests may take several minutes. Knowledge cutoff: June 2024.",
+        maxInputTokens: 200000,
+        maxOutputTokens: 100000,
+        inputTokenCost: 20,
+        outputTokenCost: 80,
+        provider: "openai",
+    },
     {
         type: "text",
         modelName: "o1",
@@ -128,18 +138,54 @@ export const textModels = [
     {
         type: "text",
         modelName: "gpt-4.1",
-        description: "GPT-4.1 supports up to 1 million tokens of context, representing a significant increase in context window capacity. Ideal for processing large documents and extended conversations.",
+        description: "GPT-4.1 excels at instruction following and tool calling with 1M token context window. Knowledge cutoff: June 2024.",
         maxInputTokens: 1047576,
         maxOutputTokens: 32768,
-        inputTokenCost: 2.5,
-        cachedInputTokenCost: 1.25,
-        outputTokenCost: 10,
+        inputTokenCost: 2.0,
+        cachedInputTokenCost: 0.5,
+        outputTokenCost: 8,
+        outputTokensPerSecond: 105,
+        provider: "openai",
+    },
+    {
+        type: "text",
+        modelName: "gpt-4.1-mini",
+        description: "GPT-4.1 mini excels at instruction following and tool calling with 1M token context window and low latency. Knowledge cutoff: June 2024.",
+        maxInputTokens: 1047576,
+        maxOutputTokens: 32768,
+        inputTokenCost: 0.4,
+        cachedInputTokenCost: 0.1,
+        outputTokenCost: 1.6,
+        outputTokensPerSecond: 78,
         provider: "openai",
     },
+    {
+        type: "text",
+        modelName: "gpt-4.1-nano",
+        description: "GPT-4.1 nano is the fastest and most affordable GPT-4.1 variant with 1M token context window. Knowledge cutoff: June 2024.",
+        maxInputTokens: 1047576,
+        maxOutputTokens: 32768,
+        inputTokenCost: 0.1,
+        cachedInputTokenCost: 0.025,
+        outputTokenCost: 0.4,
+        outputTokensPerSecond: 142,
+        provider: "openai",
+    },
+    {
+        type: "text",
+        modelName: "gemini-3.1-pro-preview",
+        description: "Latest Gemini 3.1 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Feb 2026.",
+        maxInputTokens: 1_048_576,
+        maxOutputTokens: 65536,
+        inputTokenCost: 2.0,
+        outputTokenCost: 12.0,
+        outputTokensPerSecond: 112,
+        provider: "google",
+    },
     {
         type: "text",
         modelName: "gemini-3-pro-preview",
-        description: "Strongest Gemini 3 model quality with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Nov 2025, currently in preview.",
+        description: "Gemini 3 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Superseded by 3.1 Pro.",
         maxInputTokens: 1_048_576,
         maxOutputTokens: 65536,
         inputTokenCost: 2.0,
@@ -159,9 +205,9 @@ export const textModels = [
     {
         type: "text",
         modelName: "gemini-2.5-pro",
-        description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/higher output). Batch API: 50% discount.",
+        description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/$15.00 output). Batch API: 50% discount.",
         maxInputTokens: 2_097_152,
-        maxOutputTokens: 8192,
+        maxOutputTokens: 65536,
         inputTokenCost: 1.25,
         outputTokenCost: 10.0,
         outputTokensPerSecond: 175,
@@ -172,7 +218,7 @@ export const textModels = [
         modelName: "gemini-2.5-flash",
         description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.",
         maxInputTokens: 1_048_576,
-        maxOutputTokens: 8192,
+        maxOutputTokens: 65536,
         inputTokenCost: 0.3,
         outputTokenCost: 2.5,
         outputTokensPerSecond: 225,
@@ -183,7 +229,7 @@ export const textModels = [
         modelName: "gemini-2.5-flash-lite",
         description: "Most cost-effective Gemini 2.5 option for high-throughput applications. 1M context window.",
         maxInputTokens: 1_048_576,
-        maxOutputTokens: 8192,
+        maxOutputTokens: 65536,
         inputTokenCost: 0.1,
         outputTokenCost: 0.4,
         outputTokensPerSecond: 400,
@@ -215,46 +261,50 @@ export const textModels = [
     {
         type: "text",
         modelName: "gemini-2.0-flash-lite",
-        description: "Cost effective offering to support high throughput. Note: May be deprecated in favor of 2.5-flash-lite.",
+        description: "Cost effective offering to support high throughput. DEPRECATED: Will be shut down on March 31, 2026. Use gemini-2.5-flash-lite instead.",
         maxInputTokens: 1_048_576,
         maxOutputTokens: 8192,
         inputTokenCost: 0.075,
         outputTokenCost: 0.3,
+        disabled: true,
         provider: "google",
     },
     {
         type: "text",
         modelName: "gemini-1.5-flash",
-        description: "Provides speed and efficiency for high-volume, quality, cost-effective apps. Note: prices ~double after the first 128k tokens.",
+        description: "RETIRED: No longer available. Use gemini-2.5-flash instead.",
         maxInputTokens: 1_048_576,
         maxOutputTokens: 8192,
         inputTokenCost: 0.01875,
         outputTokenCost: 0.075,
         outputTokensPerSecond: 178,
         costUnit: "characters",
+        disabled: true,
         provider: "google",
     },
     {
         type: "text",
         modelName: "gemini-1.5-pro",
-        description: "Supports text or chat prompts for a text or code response. Supports long-context understanding up to the maximum input token limit. Also does video?",
+        description: "RETIRED: No longer available. Use gemini-2.5-pro instead.",
         maxInputTokens: 2_097_152,
         maxOutputTokens: 8192,
         inputTokenCost: 0.3125,
         outputTokenCost: 1.25,
         outputTokensPerSecond: 59,
         costUnit: "characters",
+        disabled: true,
         provider: "google",
     },
     {
         type: "text",
         modelName: "gemini-1.0-pro",
-        description: "The best performing model for a wide range of text-only tasks.",
+        description: "RETIRED: No longer available. Use gemini-2.5-flash instead.",
         maxInputTokens: 32_760,
         maxOutputTokens: 8192,
         inputTokenCost: 0.125,
         outputTokenCost: 0.375,
         costUnit: "characters",
+        disabled: true,
         provider: "google",
     },
     {
@@ -429,8 +479,14 @@ function isLowerBetter(optimization) {
     return optimization === "cost";
 }
 export function pickModel(config, models = textModels) {
-    const candidates = models.filter((m) => config.providers.includes(m.provider) &&
+    let candidates = models.filter((m) => config.providers.includes(m.provider) &&
         !("disabled" in m && m.disabled));
+    if (config.limit?.cost !== undefined) {
+        candidates = candidates.filter((m) => {
+            const cost = (m.inputTokenCost ?? 0) + (m.outputTokenCost ?? 0);
+            return cost <= config.limit.cost;
+        });
+    }
     if (candidates.length === 0) {
         throw new SmolError("No models available for providers: " +
             config.providers.join(", ") +

package/dist/types.d.ts CHANGED Viewed

@@ -25,6 +25,7 @@ export type PromptConfig = {
         numRetries: number;
     }>;
     rawAttributes?: Record<string, any>;
+    maxMessages?: number;
 };
 export type SmolConfig = {
     openAiApiKey?: string;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "smoltalk",
-  "version": "0.0.21",
+  "version": "0.0.23",
   "description": "A common interface for LLM APIs",
   "homepage": "https://github.com/egonSchiele/smoltalk",
   "scripts": {