smoltalk 0.0.21 → 0.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,7 @@ export declare class BaseClient implements SmolClient {
10
10
  stream: true;
11
11
  }): AsyncGenerator<StreamChunk>;
12
12
  text(promptConfig: PromptConfig): Promise<Result<PromptResult>> | AsyncGenerator<StreamChunk>;
13
+ checkMessageLimit(promptConfig: PromptConfig): Result<PromptResult> | null;
13
14
  textSync(promptConfig: PromptConfig): Promise<Result<PromptResult>>;
14
15
  checkForToolLoops(promptConfig: PromptConfig): {
15
16
  continue: boolean;
@@ -14,7 +14,22 @@ export class BaseClient {
14
14
  return this.textSync(promptConfig);
15
15
  }
16
16
  }
17
+ checkMessageLimit(promptConfig) {
18
+ if (promptConfig.maxMessages !== undefined &&
19
+ promptConfig.messages.length > promptConfig.maxMessages) {
20
+ const logger = getLogger();
21
+ logger.warn(`Message limit exceeded: ${promptConfig.messages.length} messages sent, but maxMessages is set to ${promptConfig.maxMessages}. Aborting request.`);
22
+ return {
23
+ success: false,
24
+ error: `Message limit exceeded: ${promptConfig.messages.length} messages exceeds the maxMessages limit of ${promptConfig.maxMessages}`,
25
+ };
26
+ }
27
+ return null;
28
+ }
17
29
  async textSync(promptConfig) {
30
+ const messageLimitResult = this.checkMessageLimit(promptConfig);
31
+ if (messageLimitResult)
32
+ return messageLimitResult;
18
33
  const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(promptConfig);
19
34
  if (!shouldContinue) {
20
35
  return {
@@ -97,6 +112,14 @@ export class BaseClient {
97
112
  return this.text(newPromptConfig);
98
113
  }
99
114
  async *textStream(config) {
115
+ const messageLimitResult = this.checkMessageLimit(config);
116
+ if (messageLimitResult) {
117
+ yield {
118
+ type: "error",
119
+ error: messageLimitResult.success === false ? messageLimitResult.error : "Message limit exceeded",
120
+ };
121
+ return;
122
+ }
100
123
  const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(config);
101
124
  if (!shouldContinue) {
102
125
  yield {
package/dist/models.d.ts CHANGED
@@ -92,10 +92,19 @@ export declare const textModels: readonly [{
92
92
  readonly maxInputTokens: 200000;
93
93
  readonly maxOutputTokens: 100000;
94
94
  readonly inputTokenCost: 1.1;
95
- readonly cachedInputTokenCost: 0.55;
95
+ readonly cachedInputTokenCost: 0.275;
96
96
  readonly outputTokenCost: 4.4;
97
97
  readonly outputTokensPerSecond: 135;
98
98
  readonly provider: "openai";
99
+ }, {
100
+ readonly type: "text";
101
+ readonly modelName: "o3-pro";
102
+ readonly description: "o3-pro uses more compute for complex reasoning tasks. Available via Responses API only. Requests may take several minutes. Knowledge cutoff: June 2024.";
103
+ readonly maxInputTokens: 200000;
104
+ readonly maxOutputTokens: 100000;
105
+ readonly inputTokenCost: 20;
106
+ readonly outputTokenCost: 80;
107
+ readonly provider: "openai";
99
108
  }, {
100
109
  readonly type: "text";
101
110
  readonly modelName: "o1";
@@ -140,17 +149,50 @@ export declare const textModels: readonly [{
140
149
  }, {
141
150
  readonly type: "text";
142
151
  readonly modelName: "gpt-4.1";
143
- readonly description: "GPT-4.1 supports up to 1 million tokens of context, representing a significant increase in context window capacity. Ideal for processing large documents and extended conversations.";
152
+ readonly description: "GPT-4.1 excels at instruction following and tool calling with 1M token context window. Knowledge cutoff: June 2024.";
144
153
  readonly maxInputTokens: 1047576;
145
154
  readonly maxOutputTokens: 32768;
146
- readonly inputTokenCost: 2.5;
147
- readonly cachedInputTokenCost: 1.25;
148
- readonly outputTokenCost: 10;
155
+ readonly inputTokenCost: 2;
156
+ readonly cachedInputTokenCost: 0.5;
157
+ readonly outputTokenCost: 8;
158
+ readonly outputTokensPerSecond: 105;
159
+ readonly provider: "openai";
160
+ }, {
161
+ readonly type: "text";
162
+ readonly modelName: "gpt-4.1-mini";
163
+ readonly description: "GPT-4.1 mini excels at instruction following and tool calling with 1M token context window and low latency. Knowledge cutoff: June 2024.";
164
+ readonly maxInputTokens: 1047576;
165
+ readonly maxOutputTokens: 32768;
166
+ readonly inputTokenCost: 0.4;
167
+ readonly cachedInputTokenCost: 0.1;
168
+ readonly outputTokenCost: 1.6;
169
+ readonly outputTokensPerSecond: 78;
170
+ readonly provider: "openai";
171
+ }, {
172
+ readonly type: "text";
173
+ readonly modelName: "gpt-4.1-nano";
174
+ readonly description: "GPT-4.1 nano is the fastest and most affordable GPT-4.1 variant with 1M token context window. Knowledge cutoff: June 2024.";
175
+ readonly maxInputTokens: 1047576;
176
+ readonly maxOutputTokens: 32768;
177
+ readonly inputTokenCost: 0.1;
178
+ readonly cachedInputTokenCost: 0.025;
179
+ readonly outputTokenCost: 0.4;
180
+ readonly outputTokensPerSecond: 142;
149
181
  readonly provider: "openai";
182
+ }, {
183
+ readonly type: "text";
184
+ readonly modelName: "gemini-3.1-pro-preview";
185
+ readonly description: "Latest Gemini 3.1 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Feb 2026.";
186
+ readonly maxInputTokens: 1048576;
187
+ readonly maxOutputTokens: 65536;
188
+ readonly inputTokenCost: 2;
189
+ readonly outputTokenCost: 12;
190
+ readonly outputTokensPerSecond: 112;
191
+ readonly provider: "google";
150
192
  }, {
151
193
  readonly type: "text";
152
194
  readonly modelName: "gemini-3-pro-preview";
153
- readonly description: "Strongest Gemini 3 model quality with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Nov 2025, currently in preview.";
195
+ readonly description: "Gemini 3 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Superseded by 3.1 Pro.";
154
196
  readonly maxInputTokens: 1048576;
155
197
  readonly maxOutputTokens: 65536;
156
198
  readonly inputTokenCost: 2;
@@ -168,9 +210,9 @@ export declare const textModels: readonly [{
168
210
  }, {
169
211
  readonly type: "text";
170
212
  readonly modelName: "gemini-2.5-pro";
171
- readonly description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/higher output). Batch API: 50% discount.";
213
+ readonly description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/$15.00 output). Batch API: 50% discount.";
172
214
  readonly maxInputTokens: 2097152;
173
- readonly maxOutputTokens: 8192;
215
+ readonly maxOutputTokens: 65536;
174
216
  readonly inputTokenCost: 1.25;
175
217
  readonly outputTokenCost: 10;
176
218
  readonly outputTokensPerSecond: 175;
@@ -180,7 +222,7 @@ export declare const textModels: readonly [{
180
222
  readonly modelName: "gemini-2.5-flash";
181
223
  readonly description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.";
182
224
  readonly maxInputTokens: 1048576;
183
- readonly maxOutputTokens: 8192;
225
+ readonly maxOutputTokens: 65536;
184
226
  readonly inputTokenCost: 0.3;
185
227
  readonly outputTokenCost: 2.5;
186
228
  readonly outputTokensPerSecond: 225;
@@ -190,7 +232,7 @@ export declare const textModels: readonly [{
190
232
  readonly modelName: "gemini-2.5-flash-lite";
191
233
  readonly description: "Most cost-effective Gemini 2.5 option for high-throughput applications. 1M context window.";
192
234
  readonly maxInputTokens: 1048576;
193
- readonly maxOutputTokens: 8192;
235
+ readonly maxOutputTokens: 65536;
194
236
  readonly inputTokenCost: 0.1;
195
237
  readonly outputTokenCost: 0.4;
196
238
  readonly outputTokensPerSecond: 400;
@@ -219,43 +261,47 @@ export declare const textModels: readonly [{
219
261
  }, {
220
262
  readonly type: "text";
221
263
  readonly modelName: "gemini-2.0-flash-lite";
222
- readonly description: "Cost effective offering to support high throughput. Note: May be deprecated in favor of 2.5-flash-lite.";
264
+ readonly description: "Cost effective offering to support high throughput. DEPRECATED: Will be shut down on March 31, 2026. Use gemini-2.5-flash-lite instead.";
223
265
  readonly maxInputTokens: 1048576;
224
266
  readonly maxOutputTokens: 8192;
225
267
  readonly inputTokenCost: 0.075;
226
268
  readonly outputTokenCost: 0.3;
269
+ readonly disabled: true;
227
270
  readonly provider: "google";
228
271
  }, {
229
272
  readonly type: "text";
230
273
  readonly modelName: "gemini-1.5-flash";
231
- readonly description: "Provides speed and efficiency for high-volume, quality, cost-effective apps. Note: prices ~double after the first 128k tokens.";
274
+ readonly description: "RETIRED: No longer available. Use gemini-2.5-flash instead.";
232
275
  readonly maxInputTokens: 1048576;
233
276
  readonly maxOutputTokens: 8192;
234
277
  readonly inputTokenCost: 0.01875;
235
278
  readonly outputTokenCost: 0.075;
236
279
  readonly outputTokensPerSecond: 178;
237
280
  readonly costUnit: "characters";
281
+ readonly disabled: true;
238
282
  readonly provider: "google";
239
283
  }, {
240
284
  readonly type: "text";
241
285
  readonly modelName: "gemini-1.5-pro";
242
- readonly description: "Supports text or chat prompts for a text or code response. Supports long-context understanding up to the maximum input token limit. Also does video?";
286
+ readonly description: "RETIRED: No longer available. Use gemini-2.5-pro instead.";
243
287
  readonly maxInputTokens: 2097152;
244
288
  readonly maxOutputTokens: 8192;
245
289
  readonly inputTokenCost: 0.3125;
246
290
  readonly outputTokenCost: 1.25;
247
291
  readonly outputTokensPerSecond: 59;
248
292
  readonly costUnit: "characters";
293
+ readonly disabled: true;
249
294
  readonly provider: "google";
250
295
  }, {
251
296
  readonly type: "text";
252
297
  readonly modelName: "gemini-1.0-pro";
253
- readonly description: "The best performing model for a wide range of text-only tasks.";
298
+ readonly description: "RETIRED: No longer available. Use gemini-2.5-flash instead.";
254
299
  readonly maxInputTokens: 32760;
255
300
  readonly maxOutputTokens: 8192;
256
301
  readonly inputTokenCost: 0.125;
257
302
  readonly outputTokenCost: 0.375;
258
303
  readonly costUnit: "characters";
304
+ readonly disabled: true;
259
305
  readonly provider: "google";
260
306
  }, {
261
307
  readonly type: "text";
@@ -409,10 +455,19 @@ export declare function getModel(modelName: ModelName): {
409
455
  readonly maxInputTokens: 200000;
410
456
  readonly maxOutputTokens: 100000;
411
457
  readonly inputTokenCost: 1.1;
412
- readonly cachedInputTokenCost: 0.55;
458
+ readonly cachedInputTokenCost: 0.275;
413
459
  readonly outputTokenCost: 4.4;
414
460
  readonly outputTokensPerSecond: 135;
415
461
  readonly provider: "openai";
462
+ } | {
463
+ readonly type: "text";
464
+ readonly modelName: "o3-pro";
465
+ readonly description: "o3-pro uses more compute for complex reasoning tasks. Available via Responses API only. Requests may take several minutes. Knowledge cutoff: June 2024.";
466
+ readonly maxInputTokens: 200000;
467
+ readonly maxOutputTokens: 100000;
468
+ readonly inputTokenCost: 20;
469
+ readonly outputTokenCost: 80;
470
+ readonly provider: "openai";
416
471
  } | {
417
472
  readonly type: "text";
418
473
  readonly modelName: "o1";
@@ -457,17 +512,50 @@ export declare function getModel(modelName: ModelName): {
457
512
  } | {
458
513
  readonly type: "text";
459
514
  readonly modelName: "gpt-4.1";
460
- readonly description: "GPT-4.1 supports up to 1 million tokens of context, representing a significant increase in context window capacity. Ideal for processing large documents and extended conversations.";
515
+ readonly description: "GPT-4.1 excels at instruction following and tool calling with 1M token context window. Knowledge cutoff: June 2024.";
461
516
  readonly maxInputTokens: 1047576;
462
517
  readonly maxOutputTokens: 32768;
463
- readonly inputTokenCost: 2.5;
464
- readonly cachedInputTokenCost: 1.25;
465
- readonly outputTokenCost: 10;
518
+ readonly inputTokenCost: 2;
519
+ readonly cachedInputTokenCost: 0.5;
520
+ readonly outputTokenCost: 8;
521
+ readonly outputTokensPerSecond: 105;
522
+ readonly provider: "openai";
523
+ } | {
524
+ readonly type: "text";
525
+ readonly modelName: "gpt-4.1-mini";
526
+ readonly description: "GPT-4.1 mini excels at instruction following and tool calling with 1M token context window and low latency. Knowledge cutoff: June 2024.";
527
+ readonly maxInputTokens: 1047576;
528
+ readonly maxOutputTokens: 32768;
529
+ readonly inputTokenCost: 0.4;
530
+ readonly cachedInputTokenCost: 0.1;
531
+ readonly outputTokenCost: 1.6;
532
+ readonly outputTokensPerSecond: 78;
533
+ readonly provider: "openai";
534
+ } | {
535
+ readonly type: "text";
536
+ readonly modelName: "gpt-4.1-nano";
537
+ readonly description: "GPT-4.1 nano is the fastest and most affordable GPT-4.1 variant with 1M token context window. Knowledge cutoff: June 2024.";
538
+ readonly maxInputTokens: 1047576;
539
+ readonly maxOutputTokens: 32768;
540
+ readonly inputTokenCost: 0.1;
541
+ readonly cachedInputTokenCost: 0.025;
542
+ readonly outputTokenCost: 0.4;
543
+ readonly outputTokensPerSecond: 142;
466
544
  readonly provider: "openai";
545
+ } | {
546
+ readonly type: "text";
547
+ readonly modelName: "gemini-3.1-pro-preview";
548
+ readonly description: "Latest Gemini 3.1 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Feb 2026.";
549
+ readonly maxInputTokens: 1048576;
550
+ readonly maxOutputTokens: 65536;
551
+ readonly inputTokenCost: 2;
552
+ readonly outputTokenCost: 12;
553
+ readonly outputTokensPerSecond: 112;
554
+ readonly provider: "google";
467
555
  } | {
468
556
  readonly type: "text";
469
557
  readonly modelName: "gemini-3-pro-preview";
470
- readonly description: "Strongest Gemini 3 model quality with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Nov 2025, currently in preview.";
558
+ readonly description: "Gemini 3 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Superseded by 3.1 Pro.";
471
559
  readonly maxInputTokens: 1048576;
472
560
  readonly maxOutputTokens: 65536;
473
561
  readonly inputTokenCost: 2;
@@ -485,9 +573,9 @@ export declare function getModel(modelName: ModelName): {
485
573
  } | {
486
574
  readonly type: "text";
487
575
  readonly modelName: "gemini-2.5-pro";
488
- readonly description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/higher output). Batch API: 50% discount.";
576
+ readonly description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/$15.00 output). Batch API: 50% discount.";
489
577
  readonly maxInputTokens: 2097152;
490
- readonly maxOutputTokens: 8192;
578
+ readonly maxOutputTokens: 65536;
491
579
  readonly inputTokenCost: 1.25;
492
580
  readonly outputTokenCost: 10;
493
581
  readonly outputTokensPerSecond: 175;
@@ -497,7 +585,7 @@ export declare function getModel(modelName: ModelName): {
497
585
  readonly modelName: "gemini-2.5-flash";
498
586
  readonly description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.";
499
587
  readonly maxInputTokens: 1048576;
500
- readonly maxOutputTokens: 8192;
588
+ readonly maxOutputTokens: 65536;
501
589
  readonly inputTokenCost: 0.3;
502
590
  readonly outputTokenCost: 2.5;
503
591
  readonly outputTokensPerSecond: 225;
@@ -507,7 +595,7 @@ export declare function getModel(modelName: ModelName): {
507
595
  readonly modelName: "gemini-2.5-flash-lite";
508
596
  readonly description: "Most cost-effective Gemini 2.5 option for high-throughput applications. 1M context window.";
509
597
  readonly maxInputTokens: 1048576;
510
- readonly maxOutputTokens: 8192;
598
+ readonly maxOutputTokens: 65536;
511
599
  readonly inputTokenCost: 0.1;
512
600
  readonly outputTokenCost: 0.4;
513
601
  readonly outputTokensPerSecond: 400;
@@ -536,43 +624,47 @@ export declare function getModel(modelName: ModelName): {
536
624
  } | {
537
625
  readonly type: "text";
538
626
  readonly modelName: "gemini-2.0-flash-lite";
539
- readonly description: "Cost effective offering to support high throughput. Note: May be deprecated in favor of 2.5-flash-lite.";
627
+ readonly description: "Cost effective offering to support high throughput. DEPRECATED: Will be shut down on March 31, 2026. Use gemini-2.5-flash-lite instead.";
540
628
  readonly maxInputTokens: 1048576;
541
629
  readonly maxOutputTokens: 8192;
542
630
  readonly inputTokenCost: 0.075;
543
631
  readonly outputTokenCost: 0.3;
632
+ readonly disabled: true;
544
633
  readonly provider: "google";
545
634
  } | {
546
635
  readonly type: "text";
547
636
  readonly modelName: "gemini-1.5-flash";
548
- readonly description: "Provides speed and efficiency for high-volume, quality, cost-effective apps. Note: prices ~double after the first 128k tokens.";
637
+ readonly description: "RETIRED: No longer available. Use gemini-2.5-flash instead.";
549
638
  readonly maxInputTokens: 1048576;
550
639
  readonly maxOutputTokens: 8192;
551
640
  readonly inputTokenCost: 0.01875;
552
641
  readonly outputTokenCost: 0.075;
553
642
  readonly outputTokensPerSecond: 178;
554
643
  readonly costUnit: "characters";
644
+ readonly disabled: true;
555
645
  readonly provider: "google";
556
646
  } | {
557
647
  readonly type: "text";
558
648
  readonly modelName: "gemini-1.5-pro";
559
- readonly description: "Supports text or chat prompts for a text or code response. Supports long-context understanding up to the maximum input token limit. Also does video?";
649
+ readonly description: "RETIRED: No longer available. Use gemini-2.5-pro instead.";
560
650
  readonly maxInputTokens: 2097152;
561
651
  readonly maxOutputTokens: 8192;
562
652
  readonly inputTokenCost: 0.3125;
563
653
  readonly outputTokenCost: 1.25;
564
654
  readonly outputTokensPerSecond: 59;
565
655
  readonly costUnit: "characters";
656
+ readonly disabled: true;
566
657
  readonly provider: "google";
567
658
  } | {
568
659
  readonly type: "text";
569
660
  readonly modelName: "gemini-1.0-pro";
570
- readonly description: "The best performing model for a wide range of text-only tasks.";
661
+ readonly description: "RETIRED: No longer available. Use gemini-2.5-flash instead.";
571
662
  readonly maxInputTokens: 32760;
572
663
  readonly maxOutputTokens: 8192;
573
664
  readonly inputTokenCost: 0.125;
574
665
  readonly outputTokenCost: 0.375;
575
666
  readonly costUnit: "characters";
667
+ readonly disabled: true;
576
668
  readonly provider: "google";
577
669
  } | {
578
670
  readonly type: "text";
@@ -663,9 +755,12 @@ export type Optimization = "speed" | "accuracy" | "cost" | "large-context";
663
755
  export type ModelConfig = {
664
756
  optimizeFor: Optimization[];
665
757
  providers: Provider[];
758
+ limit?: {
759
+ cost?: number;
760
+ };
666
761
  };
667
762
  export declare function isModelConfig(model: ModelName | ModelConfig): model is ModelConfig;
668
- export declare function pickModel(config: ModelConfig, models?: readonly (typeof textModels)[number][]): TextModelName;
763
+ export declare function pickModel(config: ModelConfig, models?: readonly TextModel[]): TextModelName;
669
764
  export declare function calculateCost(modelName: ModelName, usage: {
670
765
  inputTokens: number;
671
766
  outputTokens: number;
package/dist/models.js CHANGED
@@ -75,11 +75,21 @@ export const textModels = [
75
75
  maxInputTokens: 200000,
76
76
  maxOutputTokens: 100000,
77
77
  inputTokenCost: 1.1,
78
- cachedInputTokenCost: 0.55,
78
+ cachedInputTokenCost: 0.275,
79
79
  outputTokenCost: 4.4,
80
80
  outputTokensPerSecond: 135,
81
81
  provider: "openai",
82
82
  },
83
+ {
84
+ type: "text",
85
+ modelName: "o3-pro",
86
+ description: "o3-pro uses more compute for complex reasoning tasks. Available via Responses API only. Requests may take several minutes. Knowledge cutoff: June 2024.",
87
+ maxInputTokens: 200000,
88
+ maxOutputTokens: 100000,
89
+ inputTokenCost: 20,
90
+ outputTokenCost: 80,
91
+ provider: "openai",
92
+ },
83
93
  {
84
94
  type: "text",
85
95
  modelName: "o1",
@@ -128,18 +138,54 @@ export const textModels = [
128
138
  {
129
139
  type: "text",
130
140
  modelName: "gpt-4.1",
131
- description: "GPT-4.1 supports up to 1 million tokens of context, representing a significant increase in context window capacity. Ideal for processing large documents and extended conversations.",
141
+ description: "GPT-4.1 excels at instruction following and tool calling with 1M token context window. Knowledge cutoff: June 2024.",
132
142
  maxInputTokens: 1047576,
133
143
  maxOutputTokens: 32768,
134
- inputTokenCost: 2.5,
135
- cachedInputTokenCost: 1.25,
136
- outputTokenCost: 10,
144
+ inputTokenCost: 2.0,
145
+ cachedInputTokenCost: 0.5,
146
+ outputTokenCost: 8,
147
+ outputTokensPerSecond: 105,
148
+ provider: "openai",
149
+ },
150
+ {
151
+ type: "text",
152
+ modelName: "gpt-4.1-mini",
153
+ description: "GPT-4.1 mini excels at instruction following and tool calling with 1M token context window and low latency. Knowledge cutoff: June 2024.",
154
+ maxInputTokens: 1047576,
155
+ maxOutputTokens: 32768,
156
+ inputTokenCost: 0.4,
157
+ cachedInputTokenCost: 0.1,
158
+ outputTokenCost: 1.6,
159
+ outputTokensPerSecond: 78,
137
160
  provider: "openai",
138
161
  },
162
+ {
163
+ type: "text",
164
+ modelName: "gpt-4.1-nano",
165
+ description: "GPT-4.1 nano is the fastest and most affordable GPT-4.1 variant with 1M token context window. Knowledge cutoff: June 2024.",
166
+ maxInputTokens: 1047576,
167
+ maxOutputTokens: 32768,
168
+ inputTokenCost: 0.1,
169
+ cachedInputTokenCost: 0.025,
170
+ outputTokenCost: 0.4,
171
+ outputTokensPerSecond: 142,
172
+ provider: "openai",
173
+ },
174
+ {
175
+ type: "text",
176
+ modelName: "gemini-3.1-pro-preview",
177
+ description: "Latest Gemini 3.1 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Feb 2026.",
178
+ maxInputTokens: 1_048_576,
179
+ maxOutputTokens: 65536,
180
+ inputTokenCost: 2.0,
181
+ outputTokenCost: 12.0,
182
+ outputTokensPerSecond: 112,
183
+ provider: "google",
184
+ },
139
185
  {
140
186
  type: "text",
141
187
  modelName: "gemini-3-pro-preview",
142
- description: "Strongest Gemini 3 model quality with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Nov 2025, currently in preview.",
188
+ description: "Gemini 3 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Superseded by 3.1 Pro.",
143
189
  maxInputTokens: 1_048_576,
144
190
  maxOutputTokens: 65536,
145
191
  inputTokenCost: 2.0,
@@ -159,9 +205,9 @@ export const textModels = [
159
205
  {
160
206
  type: "text",
161
207
  modelName: "gemini-2.5-pro",
162
- description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/higher output). Batch API: 50% discount.",
208
+ description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/$15.00 output). Batch API: 50% discount.",
163
209
  maxInputTokens: 2_097_152,
164
- maxOutputTokens: 8192,
210
+ maxOutputTokens: 65536,
165
211
  inputTokenCost: 1.25,
166
212
  outputTokenCost: 10.0,
167
213
  outputTokensPerSecond: 175,
@@ -172,7 +218,7 @@ export const textModels = [
172
218
  modelName: "gemini-2.5-flash",
173
219
  description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.",
174
220
  maxInputTokens: 1_048_576,
175
- maxOutputTokens: 8192,
221
+ maxOutputTokens: 65536,
176
222
  inputTokenCost: 0.3,
177
223
  outputTokenCost: 2.5,
178
224
  outputTokensPerSecond: 225,
@@ -183,7 +229,7 @@ export const textModels = [
183
229
  modelName: "gemini-2.5-flash-lite",
184
230
  description: "Most cost-effective Gemini 2.5 option for high-throughput applications. 1M context window.",
185
231
  maxInputTokens: 1_048_576,
186
- maxOutputTokens: 8192,
232
+ maxOutputTokens: 65536,
187
233
  inputTokenCost: 0.1,
188
234
  outputTokenCost: 0.4,
189
235
  outputTokensPerSecond: 400,
@@ -215,46 +261,50 @@ export const textModels = [
215
261
  {
216
262
  type: "text",
217
263
  modelName: "gemini-2.0-flash-lite",
218
- description: "Cost effective offering to support high throughput. Note: May be deprecated in favor of 2.5-flash-lite.",
264
+ description: "Cost effective offering to support high throughput. DEPRECATED: Will be shut down on March 31, 2026. Use gemini-2.5-flash-lite instead.",
219
265
  maxInputTokens: 1_048_576,
220
266
  maxOutputTokens: 8192,
221
267
  inputTokenCost: 0.075,
222
268
  outputTokenCost: 0.3,
269
+ disabled: true,
223
270
  provider: "google",
224
271
  },
225
272
  {
226
273
  type: "text",
227
274
  modelName: "gemini-1.5-flash",
228
- description: "Provides speed and efficiency for high-volume, quality, cost-effective apps. Note: prices ~double after the first 128k tokens.",
275
+ description: "RETIRED: No longer available. Use gemini-2.5-flash instead.",
229
276
  maxInputTokens: 1_048_576,
230
277
  maxOutputTokens: 8192,
231
278
  inputTokenCost: 0.01875,
232
279
  outputTokenCost: 0.075,
233
280
  outputTokensPerSecond: 178,
234
281
  costUnit: "characters",
282
+ disabled: true,
235
283
  provider: "google",
236
284
  },
237
285
  {
238
286
  type: "text",
239
287
  modelName: "gemini-1.5-pro",
240
- description: "Supports text or chat prompts for a text or code response. Supports long-context understanding up to the maximum input token limit. Also does video?",
288
+ description: "RETIRED: No longer available. Use gemini-2.5-pro instead.",
241
289
  maxInputTokens: 2_097_152,
242
290
  maxOutputTokens: 8192,
243
291
  inputTokenCost: 0.3125,
244
292
  outputTokenCost: 1.25,
245
293
  outputTokensPerSecond: 59,
246
294
  costUnit: "characters",
295
+ disabled: true,
247
296
  provider: "google",
248
297
  },
249
298
  {
250
299
  type: "text",
251
300
  modelName: "gemini-1.0-pro",
252
- description: "The best performing model for a wide range of text-only tasks.",
301
+ description: "RETIRED: No longer available. Use gemini-2.5-flash instead.",
253
302
  maxInputTokens: 32_760,
254
303
  maxOutputTokens: 8192,
255
304
  inputTokenCost: 0.125,
256
305
  outputTokenCost: 0.375,
257
306
  costUnit: "characters",
307
+ disabled: true,
258
308
  provider: "google",
259
309
  },
260
310
  {
@@ -429,8 +479,14 @@ function isLowerBetter(optimization) {
429
479
  return optimization === "cost";
430
480
  }
431
481
  export function pickModel(config, models = textModels) {
432
- const candidates = models.filter((m) => config.providers.includes(m.provider) &&
482
+ let candidates = models.filter((m) => config.providers.includes(m.provider) &&
433
483
  !("disabled" in m && m.disabled));
484
+ if (config.limit?.cost !== undefined) {
485
+ candidates = candidates.filter((m) => {
486
+ const cost = (m.inputTokenCost ?? 0) + (m.outputTokenCost ?? 0);
487
+ return cost <= config.limit.cost;
488
+ });
489
+ }
434
490
  if (candidates.length === 0) {
435
491
  throw new SmolError("No models available for providers: " +
436
492
  config.providers.join(", ") +
package/dist/types.d.ts CHANGED
@@ -25,6 +25,7 @@ export type PromptConfig = {
25
25
  numRetries: number;
26
26
  }>;
27
27
  rawAttributes?: Record<string, any>;
28
+ maxMessages?: number;
28
29
  };
29
30
  export type SmolConfig = {
30
31
  openAiApiKey?: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "smoltalk",
3
- "version": "0.0.21",
3
+ "version": "0.0.23",
4
4
  "description": "A common interface for LLM APIs",
5
5
  "homepage": "https://github.com/egonSchiele/smoltalk",
6
6
  "scripts": {