smoltalk 0.0.21 → 0.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/clients/baseClient.d.ts +1 -0
- package/dist/clients/baseClient.js +23 -0
- package/dist/models.d.ts +124 -29
- package/dist/models.js +71 -15
- package/dist/types.d.ts +1 -0
- package/package.json +1 -1
|
@@ -10,6 +10,7 @@ export declare class BaseClient implements SmolClient {
|
|
|
10
10
|
stream: true;
|
|
11
11
|
}): AsyncGenerator<StreamChunk>;
|
|
12
12
|
text(promptConfig: PromptConfig): Promise<Result<PromptResult>> | AsyncGenerator<StreamChunk>;
|
|
13
|
+
checkMessageLimit(promptConfig: PromptConfig): Result<PromptResult> | null;
|
|
13
14
|
textSync(promptConfig: PromptConfig): Promise<Result<PromptResult>>;
|
|
14
15
|
checkForToolLoops(promptConfig: PromptConfig): {
|
|
15
16
|
continue: boolean;
|
|
@@ -14,7 +14,22 @@ export class BaseClient {
|
|
|
14
14
|
return this.textSync(promptConfig);
|
|
15
15
|
}
|
|
16
16
|
}
|
|
17
|
+
checkMessageLimit(promptConfig) {
|
|
18
|
+
if (promptConfig.maxMessages !== undefined &&
|
|
19
|
+
promptConfig.messages.length > promptConfig.maxMessages) {
|
|
20
|
+
const logger = getLogger();
|
|
21
|
+
logger.warn(`Message limit exceeded: ${promptConfig.messages.length} messages sent, but maxMessages is set to ${promptConfig.maxMessages}. Aborting request.`);
|
|
22
|
+
return {
|
|
23
|
+
success: false,
|
|
24
|
+
error: `Message limit exceeded: ${promptConfig.messages.length} messages exceeds the maxMessages limit of ${promptConfig.maxMessages}`,
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
17
29
|
async textSync(promptConfig) {
|
|
30
|
+
const messageLimitResult = this.checkMessageLimit(promptConfig);
|
|
31
|
+
if (messageLimitResult)
|
|
32
|
+
return messageLimitResult;
|
|
18
33
|
const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(promptConfig);
|
|
19
34
|
if (!shouldContinue) {
|
|
20
35
|
return {
|
|
@@ -97,6 +112,14 @@ export class BaseClient {
|
|
|
97
112
|
return this.text(newPromptConfig);
|
|
98
113
|
}
|
|
99
114
|
async *textStream(config) {
|
|
115
|
+
const messageLimitResult = this.checkMessageLimit(config);
|
|
116
|
+
if (messageLimitResult) {
|
|
117
|
+
yield {
|
|
118
|
+
type: "error",
|
|
119
|
+
error: messageLimitResult.success === false ? messageLimitResult.error : "Message limit exceeded",
|
|
120
|
+
};
|
|
121
|
+
return;
|
|
122
|
+
}
|
|
100
123
|
const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(config);
|
|
101
124
|
if (!shouldContinue) {
|
|
102
125
|
yield {
|
package/dist/models.d.ts
CHANGED
|
@@ -92,10 +92,19 @@ export declare const textModels: readonly [{
|
|
|
92
92
|
readonly maxInputTokens: 200000;
|
|
93
93
|
readonly maxOutputTokens: 100000;
|
|
94
94
|
readonly inputTokenCost: 1.1;
|
|
95
|
-
readonly cachedInputTokenCost: 0.
|
|
95
|
+
readonly cachedInputTokenCost: 0.275;
|
|
96
96
|
readonly outputTokenCost: 4.4;
|
|
97
97
|
readonly outputTokensPerSecond: 135;
|
|
98
98
|
readonly provider: "openai";
|
|
99
|
+
}, {
|
|
100
|
+
readonly type: "text";
|
|
101
|
+
readonly modelName: "o3-pro";
|
|
102
|
+
readonly description: "o3-pro uses more compute for complex reasoning tasks. Available via Responses API only. Requests may take several minutes. Knowledge cutoff: June 2024.";
|
|
103
|
+
readonly maxInputTokens: 200000;
|
|
104
|
+
readonly maxOutputTokens: 100000;
|
|
105
|
+
readonly inputTokenCost: 20;
|
|
106
|
+
readonly outputTokenCost: 80;
|
|
107
|
+
readonly provider: "openai";
|
|
99
108
|
}, {
|
|
100
109
|
readonly type: "text";
|
|
101
110
|
readonly modelName: "o1";
|
|
@@ -140,17 +149,50 @@ export declare const textModels: readonly [{
|
|
|
140
149
|
}, {
|
|
141
150
|
readonly type: "text";
|
|
142
151
|
readonly modelName: "gpt-4.1";
|
|
143
|
-
readonly description: "GPT-4.1
|
|
152
|
+
readonly description: "GPT-4.1 excels at instruction following and tool calling with 1M token context window. Knowledge cutoff: June 2024.";
|
|
144
153
|
readonly maxInputTokens: 1047576;
|
|
145
154
|
readonly maxOutputTokens: 32768;
|
|
146
|
-
readonly inputTokenCost: 2
|
|
147
|
-
readonly cachedInputTokenCost:
|
|
148
|
-
readonly outputTokenCost:
|
|
155
|
+
readonly inputTokenCost: 2;
|
|
156
|
+
readonly cachedInputTokenCost: 0.5;
|
|
157
|
+
readonly outputTokenCost: 8;
|
|
158
|
+
readonly outputTokensPerSecond: 105;
|
|
159
|
+
readonly provider: "openai";
|
|
160
|
+
}, {
|
|
161
|
+
readonly type: "text";
|
|
162
|
+
readonly modelName: "gpt-4.1-mini";
|
|
163
|
+
readonly description: "GPT-4.1 mini excels at instruction following and tool calling with 1M token context window and low latency. Knowledge cutoff: June 2024.";
|
|
164
|
+
readonly maxInputTokens: 1047576;
|
|
165
|
+
readonly maxOutputTokens: 32768;
|
|
166
|
+
readonly inputTokenCost: 0.4;
|
|
167
|
+
readonly cachedInputTokenCost: 0.1;
|
|
168
|
+
readonly outputTokenCost: 1.6;
|
|
169
|
+
readonly outputTokensPerSecond: 78;
|
|
170
|
+
readonly provider: "openai";
|
|
171
|
+
}, {
|
|
172
|
+
readonly type: "text";
|
|
173
|
+
readonly modelName: "gpt-4.1-nano";
|
|
174
|
+
readonly description: "GPT-4.1 nano is the fastest and most affordable GPT-4.1 variant with 1M token context window. Knowledge cutoff: June 2024.";
|
|
175
|
+
readonly maxInputTokens: 1047576;
|
|
176
|
+
readonly maxOutputTokens: 32768;
|
|
177
|
+
readonly inputTokenCost: 0.1;
|
|
178
|
+
readonly cachedInputTokenCost: 0.025;
|
|
179
|
+
readonly outputTokenCost: 0.4;
|
|
180
|
+
readonly outputTokensPerSecond: 142;
|
|
149
181
|
readonly provider: "openai";
|
|
182
|
+
}, {
|
|
183
|
+
readonly type: "text";
|
|
184
|
+
readonly modelName: "gemini-3.1-pro-preview";
|
|
185
|
+
readonly description: "Latest Gemini 3.1 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Feb 2026.";
|
|
186
|
+
readonly maxInputTokens: 1048576;
|
|
187
|
+
readonly maxOutputTokens: 65536;
|
|
188
|
+
readonly inputTokenCost: 2;
|
|
189
|
+
readonly outputTokenCost: 12;
|
|
190
|
+
readonly outputTokensPerSecond: 112;
|
|
191
|
+
readonly provider: "google";
|
|
150
192
|
}, {
|
|
151
193
|
readonly type: "text";
|
|
152
194
|
readonly modelName: "gemini-3-pro-preview";
|
|
153
|
-
readonly description: "
|
|
195
|
+
readonly description: "Gemini 3 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Superseded by 3.1 Pro.";
|
|
154
196
|
readonly maxInputTokens: 1048576;
|
|
155
197
|
readonly maxOutputTokens: 65536;
|
|
156
198
|
readonly inputTokenCost: 2;
|
|
@@ -168,9 +210,9 @@ export declare const textModels: readonly [{
|
|
|
168
210
|
}, {
|
|
169
211
|
readonly type: "text";
|
|
170
212
|
readonly modelName: "gemini-2.5-pro";
|
|
171
|
-
readonly description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input
|
|
213
|
+
readonly description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/$15.00 output). Batch API: 50% discount.";
|
|
172
214
|
readonly maxInputTokens: 2097152;
|
|
173
|
-
readonly maxOutputTokens:
|
|
215
|
+
readonly maxOutputTokens: 65536;
|
|
174
216
|
readonly inputTokenCost: 1.25;
|
|
175
217
|
readonly outputTokenCost: 10;
|
|
176
218
|
readonly outputTokensPerSecond: 175;
|
|
@@ -180,7 +222,7 @@ export declare const textModels: readonly [{
|
|
|
180
222
|
readonly modelName: "gemini-2.5-flash";
|
|
181
223
|
readonly description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.";
|
|
182
224
|
readonly maxInputTokens: 1048576;
|
|
183
|
-
readonly maxOutputTokens:
|
|
225
|
+
readonly maxOutputTokens: 65536;
|
|
184
226
|
readonly inputTokenCost: 0.3;
|
|
185
227
|
readonly outputTokenCost: 2.5;
|
|
186
228
|
readonly outputTokensPerSecond: 225;
|
|
@@ -190,7 +232,7 @@ export declare const textModels: readonly [{
|
|
|
190
232
|
readonly modelName: "gemini-2.5-flash-lite";
|
|
191
233
|
readonly description: "Most cost-effective Gemini 2.5 option for high-throughput applications. 1M context window.";
|
|
192
234
|
readonly maxInputTokens: 1048576;
|
|
193
|
-
readonly maxOutputTokens:
|
|
235
|
+
readonly maxOutputTokens: 65536;
|
|
194
236
|
readonly inputTokenCost: 0.1;
|
|
195
237
|
readonly outputTokenCost: 0.4;
|
|
196
238
|
readonly outputTokensPerSecond: 400;
|
|
@@ -219,43 +261,47 @@ export declare const textModels: readonly [{
|
|
|
219
261
|
}, {
|
|
220
262
|
readonly type: "text";
|
|
221
263
|
readonly modelName: "gemini-2.0-flash-lite";
|
|
222
|
-
readonly description: "Cost effective offering to support high throughput.
|
|
264
|
+
readonly description: "Cost effective offering to support high throughput. DEPRECATED: Will be shut down on March 31, 2026. Use gemini-2.5-flash-lite instead.";
|
|
223
265
|
readonly maxInputTokens: 1048576;
|
|
224
266
|
readonly maxOutputTokens: 8192;
|
|
225
267
|
readonly inputTokenCost: 0.075;
|
|
226
268
|
readonly outputTokenCost: 0.3;
|
|
269
|
+
readonly disabled: true;
|
|
227
270
|
readonly provider: "google";
|
|
228
271
|
}, {
|
|
229
272
|
readonly type: "text";
|
|
230
273
|
readonly modelName: "gemini-1.5-flash";
|
|
231
|
-
readonly description: "
|
|
274
|
+
readonly description: "RETIRED: No longer available. Use gemini-2.5-flash instead.";
|
|
232
275
|
readonly maxInputTokens: 1048576;
|
|
233
276
|
readonly maxOutputTokens: 8192;
|
|
234
277
|
readonly inputTokenCost: 0.01875;
|
|
235
278
|
readonly outputTokenCost: 0.075;
|
|
236
279
|
readonly outputTokensPerSecond: 178;
|
|
237
280
|
readonly costUnit: "characters";
|
|
281
|
+
readonly disabled: true;
|
|
238
282
|
readonly provider: "google";
|
|
239
283
|
}, {
|
|
240
284
|
readonly type: "text";
|
|
241
285
|
readonly modelName: "gemini-1.5-pro";
|
|
242
|
-
readonly description: "
|
|
286
|
+
readonly description: "RETIRED: No longer available. Use gemini-2.5-pro instead.";
|
|
243
287
|
readonly maxInputTokens: 2097152;
|
|
244
288
|
readonly maxOutputTokens: 8192;
|
|
245
289
|
readonly inputTokenCost: 0.3125;
|
|
246
290
|
readonly outputTokenCost: 1.25;
|
|
247
291
|
readonly outputTokensPerSecond: 59;
|
|
248
292
|
readonly costUnit: "characters";
|
|
293
|
+
readonly disabled: true;
|
|
249
294
|
readonly provider: "google";
|
|
250
295
|
}, {
|
|
251
296
|
readonly type: "text";
|
|
252
297
|
readonly modelName: "gemini-1.0-pro";
|
|
253
|
-
readonly description: "
|
|
298
|
+
readonly description: "RETIRED: No longer available. Use gemini-2.5-flash instead.";
|
|
254
299
|
readonly maxInputTokens: 32760;
|
|
255
300
|
readonly maxOutputTokens: 8192;
|
|
256
301
|
readonly inputTokenCost: 0.125;
|
|
257
302
|
readonly outputTokenCost: 0.375;
|
|
258
303
|
readonly costUnit: "characters";
|
|
304
|
+
readonly disabled: true;
|
|
259
305
|
readonly provider: "google";
|
|
260
306
|
}, {
|
|
261
307
|
readonly type: "text";
|
|
@@ -409,10 +455,19 @@ export declare function getModel(modelName: ModelName): {
|
|
|
409
455
|
readonly maxInputTokens: 200000;
|
|
410
456
|
readonly maxOutputTokens: 100000;
|
|
411
457
|
readonly inputTokenCost: 1.1;
|
|
412
|
-
readonly cachedInputTokenCost: 0.
|
|
458
|
+
readonly cachedInputTokenCost: 0.275;
|
|
413
459
|
readonly outputTokenCost: 4.4;
|
|
414
460
|
readonly outputTokensPerSecond: 135;
|
|
415
461
|
readonly provider: "openai";
|
|
462
|
+
} | {
|
|
463
|
+
readonly type: "text";
|
|
464
|
+
readonly modelName: "o3-pro";
|
|
465
|
+
readonly description: "o3-pro uses more compute for complex reasoning tasks. Available via Responses API only. Requests may take several minutes. Knowledge cutoff: June 2024.";
|
|
466
|
+
readonly maxInputTokens: 200000;
|
|
467
|
+
readonly maxOutputTokens: 100000;
|
|
468
|
+
readonly inputTokenCost: 20;
|
|
469
|
+
readonly outputTokenCost: 80;
|
|
470
|
+
readonly provider: "openai";
|
|
416
471
|
} | {
|
|
417
472
|
readonly type: "text";
|
|
418
473
|
readonly modelName: "o1";
|
|
@@ -457,17 +512,50 @@ export declare function getModel(modelName: ModelName): {
|
|
|
457
512
|
} | {
|
|
458
513
|
readonly type: "text";
|
|
459
514
|
readonly modelName: "gpt-4.1";
|
|
460
|
-
readonly description: "GPT-4.1
|
|
515
|
+
readonly description: "GPT-4.1 excels at instruction following and tool calling with 1M token context window. Knowledge cutoff: June 2024.";
|
|
461
516
|
readonly maxInputTokens: 1047576;
|
|
462
517
|
readonly maxOutputTokens: 32768;
|
|
463
|
-
readonly inputTokenCost: 2
|
|
464
|
-
readonly cachedInputTokenCost:
|
|
465
|
-
readonly outputTokenCost:
|
|
518
|
+
readonly inputTokenCost: 2;
|
|
519
|
+
readonly cachedInputTokenCost: 0.5;
|
|
520
|
+
readonly outputTokenCost: 8;
|
|
521
|
+
readonly outputTokensPerSecond: 105;
|
|
522
|
+
readonly provider: "openai";
|
|
523
|
+
} | {
|
|
524
|
+
readonly type: "text";
|
|
525
|
+
readonly modelName: "gpt-4.1-mini";
|
|
526
|
+
readonly description: "GPT-4.1 mini excels at instruction following and tool calling with 1M token context window and low latency. Knowledge cutoff: June 2024.";
|
|
527
|
+
readonly maxInputTokens: 1047576;
|
|
528
|
+
readonly maxOutputTokens: 32768;
|
|
529
|
+
readonly inputTokenCost: 0.4;
|
|
530
|
+
readonly cachedInputTokenCost: 0.1;
|
|
531
|
+
readonly outputTokenCost: 1.6;
|
|
532
|
+
readonly outputTokensPerSecond: 78;
|
|
533
|
+
readonly provider: "openai";
|
|
534
|
+
} | {
|
|
535
|
+
readonly type: "text";
|
|
536
|
+
readonly modelName: "gpt-4.1-nano";
|
|
537
|
+
readonly description: "GPT-4.1 nano is the fastest and most affordable GPT-4.1 variant with 1M token context window. Knowledge cutoff: June 2024.";
|
|
538
|
+
readonly maxInputTokens: 1047576;
|
|
539
|
+
readonly maxOutputTokens: 32768;
|
|
540
|
+
readonly inputTokenCost: 0.1;
|
|
541
|
+
readonly cachedInputTokenCost: 0.025;
|
|
542
|
+
readonly outputTokenCost: 0.4;
|
|
543
|
+
readonly outputTokensPerSecond: 142;
|
|
466
544
|
readonly provider: "openai";
|
|
545
|
+
} | {
|
|
546
|
+
readonly type: "text";
|
|
547
|
+
readonly modelName: "gemini-3.1-pro-preview";
|
|
548
|
+
readonly description: "Latest Gemini 3.1 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Feb 2026.";
|
|
549
|
+
readonly maxInputTokens: 1048576;
|
|
550
|
+
readonly maxOutputTokens: 65536;
|
|
551
|
+
readonly inputTokenCost: 2;
|
|
552
|
+
readonly outputTokenCost: 12;
|
|
553
|
+
readonly outputTokensPerSecond: 112;
|
|
554
|
+
readonly provider: "google";
|
|
467
555
|
} | {
|
|
468
556
|
readonly type: "text";
|
|
469
557
|
readonly modelName: "gemini-3-pro-preview";
|
|
470
|
-
readonly description: "
|
|
558
|
+
readonly description: "Gemini 3 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Superseded by 3.1 Pro.";
|
|
471
559
|
readonly maxInputTokens: 1048576;
|
|
472
560
|
readonly maxOutputTokens: 65536;
|
|
473
561
|
readonly inputTokenCost: 2;
|
|
@@ -485,9 +573,9 @@ export declare function getModel(modelName: ModelName): {
|
|
|
485
573
|
} | {
|
|
486
574
|
readonly type: "text";
|
|
487
575
|
readonly modelName: "gemini-2.5-pro";
|
|
488
|
-
readonly description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input
|
|
576
|
+
readonly description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/$15.00 output). Batch API: 50% discount.";
|
|
489
577
|
readonly maxInputTokens: 2097152;
|
|
490
|
-
readonly maxOutputTokens:
|
|
578
|
+
readonly maxOutputTokens: 65536;
|
|
491
579
|
readonly inputTokenCost: 1.25;
|
|
492
580
|
readonly outputTokenCost: 10;
|
|
493
581
|
readonly outputTokensPerSecond: 175;
|
|
@@ -497,7 +585,7 @@ export declare function getModel(modelName: ModelName): {
|
|
|
497
585
|
readonly modelName: "gemini-2.5-flash";
|
|
498
586
|
readonly description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.";
|
|
499
587
|
readonly maxInputTokens: 1048576;
|
|
500
|
-
readonly maxOutputTokens:
|
|
588
|
+
readonly maxOutputTokens: 65536;
|
|
501
589
|
readonly inputTokenCost: 0.3;
|
|
502
590
|
readonly outputTokenCost: 2.5;
|
|
503
591
|
readonly outputTokensPerSecond: 225;
|
|
@@ -507,7 +595,7 @@ export declare function getModel(modelName: ModelName): {
|
|
|
507
595
|
readonly modelName: "gemini-2.5-flash-lite";
|
|
508
596
|
readonly description: "Most cost-effective Gemini 2.5 option for high-throughput applications. 1M context window.";
|
|
509
597
|
readonly maxInputTokens: 1048576;
|
|
510
|
-
readonly maxOutputTokens:
|
|
598
|
+
readonly maxOutputTokens: 65536;
|
|
511
599
|
readonly inputTokenCost: 0.1;
|
|
512
600
|
readonly outputTokenCost: 0.4;
|
|
513
601
|
readonly outputTokensPerSecond: 400;
|
|
@@ -536,43 +624,47 @@ export declare function getModel(modelName: ModelName): {
|
|
|
536
624
|
} | {
|
|
537
625
|
readonly type: "text";
|
|
538
626
|
readonly modelName: "gemini-2.0-flash-lite";
|
|
539
|
-
readonly description: "Cost effective offering to support high throughput.
|
|
627
|
+
readonly description: "Cost effective offering to support high throughput. DEPRECATED: Will be shut down on March 31, 2026. Use gemini-2.5-flash-lite instead.";
|
|
540
628
|
readonly maxInputTokens: 1048576;
|
|
541
629
|
readonly maxOutputTokens: 8192;
|
|
542
630
|
readonly inputTokenCost: 0.075;
|
|
543
631
|
readonly outputTokenCost: 0.3;
|
|
632
|
+
readonly disabled: true;
|
|
544
633
|
readonly provider: "google";
|
|
545
634
|
} | {
|
|
546
635
|
readonly type: "text";
|
|
547
636
|
readonly modelName: "gemini-1.5-flash";
|
|
548
|
-
readonly description: "
|
|
637
|
+
readonly description: "RETIRED: No longer available. Use gemini-2.5-flash instead.";
|
|
549
638
|
readonly maxInputTokens: 1048576;
|
|
550
639
|
readonly maxOutputTokens: 8192;
|
|
551
640
|
readonly inputTokenCost: 0.01875;
|
|
552
641
|
readonly outputTokenCost: 0.075;
|
|
553
642
|
readonly outputTokensPerSecond: 178;
|
|
554
643
|
readonly costUnit: "characters";
|
|
644
|
+
readonly disabled: true;
|
|
555
645
|
readonly provider: "google";
|
|
556
646
|
} | {
|
|
557
647
|
readonly type: "text";
|
|
558
648
|
readonly modelName: "gemini-1.5-pro";
|
|
559
|
-
readonly description: "
|
|
649
|
+
readonly description: "RETIRED: No longer available. Use gemini-2.5-pro instead.";
|
|
560
650
|
readonly maxInputTokens: 2097152;
|
|
561
651
|
readonly maxOutputTokens: 8192;
|
|
562
652
|
readonly inputTokenCost: 0.3125;
|
|
563
653
|
readonly outputTokenCost: 1.25;
|
|
564
654
|
readonly outputTokensPerSecond: 59;
|
|
565
655
|
readonly costUnit: "characters";
|
|
656
|
+
readonly disabled: true;
|
|
566
657
|
readonly provider: "google";
|
|
567
658
|
} | {
|
|
568
659
|
readonly type: "text";
|
|
569
660
|
readonly modelName: "gemini-1.0-pro";
|
|
570
|
-
readonly description: "
|
|
661
|
+
readonly description: "RETIRED: No longer available. Use gemini-2.5-flash instead.";
|
|
571
662
|
readonly maxInputTokens: 32760;
|
|
572
663
|
readonly maxOutputTokens: 8192;
|
|
573
664
|
readonly inputTokenCost: 0.125;
|
|
574
665
|
readonly outputTokenCost: 0.375;
|
|
575
666
|
readonly costUnit: "characters";
|
|
667
|
+
readonly disabled: true;
|
|
576
668
|
readonly provider: "google";
|
|
577
669
|
} | {
|
|
578
670
|
readonly type: "text";
|
|
@@ -663,9 +755,12 @@ export type Optimization = "speed" | "accuracy" | "cost" | "large-context";
|
|
|
663
755
|
export type ModelConfig = {
|
|
664
756
|
optimizeFor: Optimization[];
|
|
665
757
|
providers: Provider[];
|
|
758
|
+
limit?: {
|
|
759
|
+
cost?: number;
|
|
760
|
+
};
|
|
666
761
|
};
|
|
667
762
|
export declare function isModelConfig(model: ModelName | ModelConfig): model is ModelConfig;
|
|
668
|
-
export declare function pickModel(config: ModelConfig, models?: readonly
|
|
763
|
+
export declare function pickModel(config: ModelConfig, models?: readonly TextModel[]): TextModelName;
|
|
669
764
|
export declare function calculateCost(modelName: ModelName, usage: {
|
|
670
765
|
inputTokens: number;
|
|
671
766
|
outputTokens: number;
|
package/dist/models.js
CHANGED
|
@@ -75,11 +75,21 @@ export const textModels = [
|
|
|
75
75
|
maxInputTokens: 200000,
|
|
76
76
|
maxOutputTokens: 100000,
|
|
77
77
|
inputTokenCost: 1.1,
|
|
78
|
-
cachedInputTokenCost: 0.
|
|
78
|
+
cachedInputTokenCost: 0.275,
|
|
79
79
|
outputTokenCost: 4.4,
|
|
80
80
|
outputTokensPerSecond: 135,
|
|
81
81
|
provider: "openai",
|
|
82
82
|
},
|
|
83
|
+
{
|
|
84
|
+
type: "text",
|
|
85
|
+
modelName: "o3-pro",
|
|
86
|
+
description: "o3-pro uses more compute for complex reasoning tasks. Available via Responses API only. Requests may take several minutes. Knowledge cutoff: June 2024.",
|
|
87
|
+
maxInputTokens: 200000,
|
|
88
|
+
maxOutputTokens: 100000,
|
|
89
|
+
inputTokenCost: 20,
|
|
90
|
+
outputTokenCost: 80,
|
|
91
|
+
provider: "openai",
|
|
92
|
+
},
|
|
83
93
|
{
|
|
84
94
|
type: "text",
|
|
85
95
|
modelName: "o1",
|
|
@@ -128,18 +138,54 @@ export const textModels = [
|
|
|
128
138
|
{
|
|
129
139
|
type: "text",
|
|
130
140
|
modelName: "gpt-4.1",
|
|
131
|
-
description: "GPT-4.1
|
|
141
|
+
description: "GPT-4.1 excels at instruction following and tool calling with 1M token context window. Knowledge cutoff: June 2024.",
|
|
132
142
|
maxInputTokens: 1047576,
|
|
133
143
|
maxOutputTokens: 32768,
|
|
134
|
-
inputTokenCost: 2.
|
|
135
|
-
cachedInputTokenCost:
|
|
136
|
-
outputTokenCost:
|
|
144
|
+
inputTokenCost: 2.0,
|
|
145
|
+
cachedInputTokenCost: 0.5,
|
|
146
|
+
outputTokenCost: 8,
|
|
147
|
+
outputTokensPerSecond: 105,
|
|
148
|
+
provider: "openai",
|
|
149
|
+
},
|
|
150
|
+
{
|
|
151
|
+
type: "text",
|
|
152
|
+
modelName: "gpt-4.1-mini",
|
|
153
|
+
description: "GPT-4.1 mini excels at instruction following and tool calling with 1M token context window and low latency. Knowledge cutoff: June 2024.",
|
|
154
|
+
maxInputTokens: 1047576,
|
|
155
|
+
maxOutputTokens: 32768,
|
|
156
|
+
inputTokenCost: 0.4,
|
|
157
|
+
cachedInputTokenCost: 0.1,
|
|
158
|
+
outputTokenCost: 1.6,
|
|
159
|
+
outputTokensPerSecond: 78,
|
|
137
160
|
provider: "openai",
|
|
138
161
|
},
|
|
162
|
+
{
|
|
163
|
+
type: "text",
|
|
164
|
+
modelName: "gpt-4.1-nano",
|
|
165
|
+
description: "GPT-4.1 nano is the fastest and most affordable GPT-4.1 variant with 1M token context window. Knowledge cutoff: June 2024.",
|
|
166
|
+
maxInputTokens: 1047576,
|
|
167
|
+
maxOutputTokens: 32768,
|
|
168
|
+
inputTokenCost: 0.1,
|
|
169
|
+
cachedInputTokenCost: 0.025,
|
|
170
|
+
outputTokenCost: 0.4,
|
|
171
|
+
outputTokensPerSecond: 142,
|
|
172
|
+
provider: "openai",
|
|
173
|
+
},
|
|
174
|
+
{
|
|
175
|
+
type: "text",
|
|
176
|
+
modelName: "gemini-3.1-pro-preview",
|
|
177
|
+
description: "Latest Gemini 3.1 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Feb 2026.",
|
|
178
|
+
maxInputTokens: 1_048_576,
|
|
179
|
+
maxOutputTokens: 65536,
|
|
180
|
+
inputTokenCost: 2.0,
|
|
181
|
+
outputTokenCost: 12.0,
|
|
182
|
+
outputTokensPerSecond: 112,
|
|
183
|
+
provider: "google",
|
|
184
|
+
},
|
|
139
185
|
{
|
|
140
186
|
type: "text",
|
|
141
187
|
modelName: "gemini-3-pro-preview",
|
|
142
|
-
description: "
|
|
188
|
+
description: "Gemini 3 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Superseded by 3.1 Pro.",
|
|
143
189
|
maxInputTokens: 1_048_576,
|
|
144
190
|
maxOutputTokens: 65536,
|
|
145
191
|
inputTokenCost: 2.0,
|
|
@@ -159,9 +205,9 @@ export const textModels = [
|
|
|
159
205
|
{
|
|
160
206
|
type: "text",
|
|
161
207
|
modelName: "gemini-2.5-pro",
|
|
162
|
-
description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input
|
|
208
|
+
description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/$15.00 output). Batch API: 50% discount.",
|
|
163
209
|
maxInputTokens: 2_097_152,
|
|
164
|
-
maxOutputTokens:
|
|
210
|
+
maxOutputTokens: 65536,
|
|
165
211
|
inputTokenCost: 1.25,
|
|
166
212
|
outputTokenCost: 10.0,
|
|
167
213
|
outputTokensPerSecond: 175,
|
|
@@ -172,7 +218,7 @@ export const textModels = [
|
|
|
172
218
|
modelName: "gemini-2.5-flash",
|
|
173
219
|
description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.",
|
|
174
220
|
maxInputTokens: 1_048_576,
|
|
175
|
-
maxOutputTokens:
|
|
221
|
+
maxOutputTokens: 65536,
|
|
176
222
|
inputTokenCost: 0.3,
|
|
177
223
|
outputTokenCost: 2.5,
|
|
178
224
|
outputTokensPerSecond: 225,
|
|
@@ -183,7 +229,7 @@ export const textModels = [
|
|
|
183
229
|
modelName: "gemini-2.5-flash-lite",
|
|
184
230
|
description: "Most cost-effective Gemini 2.5 option for high-throughput applications. 1M context window.",
|
|
185
231
|
maxInputTokens: 1_048_576,
|
|
186
|
-
maxOutputTokens:
|
|
232
|
+
maxOutputTokens: 65536,
|
|
187
233
|
inputTokenCost: 0.1,
|
|
188
234
|
outputTokenCost: 0.4,
|
|
189
235
|
outputTokensPerSecond: 400,
|
|
@@ -215,46 +261,50 @@ export const textModels = [
|
|
|
215
261
|
{
|
|
216
262
|
type: "text",
|
|
217
263
|
modelName: "gemini-2.0-flash-lite",
|
|
218
|
-
description: "Cost effective offering to support high throughput.
|
|
264
|
+
description: "Cost effective offering to support high throughput. DEPRECATED: Will be shut down on March 31, 2026. Use gemini-2.5-flash-lite instead.",
|
|
219
265
|
maxInputTokens: 1_048_576,
|
|
220
266
|
maxOutputTokens: 8192,
|
|
221
267
|
inputTokenCost: 0.075,
|
|
222
268
|
outputTokenCost: 0.3,
|
|
269
|
+
disabled: true,
|
|
223
270
|
provider: "google",
|
|
224
271
|
},
|
|
225
272
|
{
|
|
226
273
|
type: "text",
|
|
227
274
|
modelName: "gemini-1.5-flash",
|
|
228
|
-
description: "
|
|
275
|
+
description: "RETIRED: No longer available. Use gemini-2.5-flash instead.",
|
|
229
276
|
maxInputTokens: 1_048_576,
|
|
230
277
|
maxOutputTokens: 8192,
|
|
231
278
|
inputTokenCost: 0.01875,
|
|
232
279
|
outputTokenCost: 0.075,
|
|
233
280
|
outputTokensPerSecond: 178,
|
|
234
281
|
costUnit: "characters",
|
|
282
|
+
disabled: true,
|
|
235
283
|
provider: "google",
|
|
236
284
|
},
|
|
237
285
|
{
|
|
238
286
|
type: "text",
|
|
239
287
|
modelName: "gemini-1.5-pro",
|
|
240
|
-
description: "
|
|
288
|
+
description: "RETIRED: No longer available. Use gemini-2.5-pro instead.",
|
|
241
289
|
maxInputTokens: 2_097_152,
|
|
242
290
|
maxOutputTokens: 8192,
|
|
243
291
|
inputTokenCost: 0.3125,
|
|
244
292
|
outputTokenCost: 1.25,
|
|
245
293
|
outputTokensPerSecond: 59,
|
|
246
294
|
costUnit: "characters",
|
|
295
|
+
disabled: true,
|
|
247
296
|
provider: "google",
|
|
248
297
|
},
|
|
249
298
|
{
|
|
250
299
|
type: "text",
|
|
251
300
|
modelName: "gemini-1.0-pro",
|
|
252
|
-
description: "
|
|
301
|
+
description: "RETIRED: No longer available. Use gemini-2.5-flash instead.",
|
|
253
302
|
maxInputTokens: 32_760,
|
|
254
303
|
maxOutputTokens: 8192,
|
|
255
304
|
inputTokenCost: 0.125,
|
|
256
305
|
outputTokenCost: 0.375,
|
|
257
306
|
costUnit: "characters",
|
|
307
|
+
disabled: true,
|
|
258
308
|
provider: "google",
|
|
259
309
|
},
|
|
260
310
|
{
|
|
@@ -429,8 +479,14 @@ function isLowerBetter(optimization) {
|
|
|
429
479
|
return optimization === "cost";
|
|
430
480
|
}
|
|
431
481
|
export function pickModel(config, models = textModels) {
|
|
432
|
-
|
|
482
|
+
let candidates = models.filter((m) => config.providers.includes(m.provider) &&
|
|
433
483
|
!("disabled" in m && m.disabled));
|
|
484
|
+
if (config.limit?.cost !== undefined) {
|
|
485
|
+
candidates = candidates.filter((m) => {
|
|
486
|
+
const cost = (m.inputTokenCost ?? 0) + (m.outputTokenCost ?? 0);
|
|
487
|
+
return cost <= config.limit.cost;
|
|
488
|
+
});
|
|
489
|
+
}
|
|
434
490
|
if (candidates.length === 0) {
|
|
435
491
|
throw new SmolError("No models available for providers: " +
|
|
436
492
|
config.providers.join(", ") +
|
package/dist/types.d.ts
CHANGED