smoltalk 0.0.43 → 0.0.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,13 @@
1
- import { GoogleGenAI } from "@google/genai";
1
+ import { Content, GenerateContentConfig, GoogleGenAI } from "@google/genai";
2
2
  import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
3
3
  import { BaseClient } from "./baseClient.js";
4
4
  import { ModelName } from "../models.js";
5
5
  export type SmolGoogleConfig = BaseClientConfig;
6
+ type GeneratedRequest = {
7
+ contents: Content[];
8
+ model: ModelName;
9
+ config: GenerateContentConfig;
10
+ };
6
11
  export declare class SmolGoogle extends BaseClient implements SmolClient {
7
12
  private client;
8
13
  private logger;
@@ -13,5 +18,7 @@ export declare class SmolGoogle extends BaseClient implements SmolClient {
13
18
  private calculateUsageAndCost;
14
19
  private buildRequest;
15
20
  _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
21
+ __textSync(request: GeneratedRequest): Promise<Result<PromptResult>>;
16
22
  _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
17
23
  }
24
+ export {};
@@ -1,10 +1,11 @@
1
1
  import { GoogleGenAI } from "@google/genai";
2
2
  import { ToolCall } from "../classes/ToolCall.js";
3
3
  import { getLogger } from "../logger.js";
4
- import { success, } from "../types.js";
4
+ import { addCosts, addTokenUsage, success, } from "../types.js";
5
5
  import { zodToGoogleTool } from "../util/tool.js";
6
6
  import { BaseClient } from "./baseClient.js";
7
7
  import { Model } from "../model.js";
8
+ import { userMessage } from "../classes/message/index.js";
8
9
  export class SmolGoogle extends BaseClient {
9
10
  client;
10
11
  logger;
@@ -65,10 +66,7 @@ export class SmolGoogle extends BaseClient {
65
66
  if (tools.length > 0) {
66
67
  genConfig.tools = [{ functionDeclarations: tools }];
67
68
  }
68
- if (config.responseFormat && tools.length > 0) {
69
- console.error("Warning: Both responseFormat and tools are specified in the prompt config. Google Gemini does not support enforcing a response format when tools are included, so the responseFormat will be ignored.");
70
- }
71
- else if (config.responseFormat && tools.length === 0) {
69
+ if (config.responseFormat) {
72
70
  genConfig.responseMimeType = "application/json";
73
71
  genConfig.responseJsonSchema = config.responseFormat.toJSONSchema();
74
72
  }
@@ -94,6 +92,67 @@ export class SmolGoogle extends BaseClient {
94
92
  if (signal) {
95
93
  request.config = { ...request.config, abortSignal: signal };
96
94
  }
95
+ const hasTools = config.tools && config.tools.length > 0;
96
+ const hasStructuredResponse = !!config.responseFormat;
97
+ if (!hasTools && !hasStructuredResponse) {
98
+ // If there are no tools or structured response, we can make a single request and return immediately
99
+ return this.__textSync(request);
100
+ }
101
+ // Google Gemini does not support combining function calling with
102
+ // responseMimeType 'application/json'. When tools are present, we
103
+ // make two requests instead
104
+ /*********** TOOL CALL REQUEST ************/
105
+ this.logger.debug("Detected both tool calls and structured response in call to Google Gemini. Making separate request to Google Gemini for tool calls.");
106
+ const toolRequest = {
107
+ ...request,
108
+ config: {
109
+ ...request.config,
110
+ responseMimeType: undefined,
111
+ responseJsonSchema: undefined,
112
+ },
113
+ };
114
+ const toolResult = await this.__textSync(toolRequest);
115
+ if (!toolResult.success) {
116
+ return toolResult;
117
+ }
118
+ if (toolResult.value.toolCalls.length > 0) {
119
+ this.logger.debug("Tool calls detected. Returning tool calls without making second request for structured response.");
120
+ return toolResult;
121
+ }
122
+ if (!toolResult.value.output) {
123
+ throw new Error("No output or tool calls detected in Google Gemini response. This should not happen.");
124
+ }
125
+ this.logger.debug("No tool calls detected. Making second request to Google Gemini for structured response.");
126
+ /*********** STRUCTURED OUTPUT REQUEST ************/
127
+ const message = userMessage(`Please return this output in the specified structured format. Output: ${toolResult.value.output}`);
128
+ const messages = [message.toGoogleMessage()];
129
+ const responseRequest = {
130
+ ...request,
131
+ config: {
132
+ ...request.config,
133
+ tools: undefined,
134
+ },
135
+ messages,
136
+ };
137
+ const responseResult = await this.__textSync(responseRequest);
138
+ if (!responseResult.success) {
139
+ return responseResult;
140
+ }
141
+ const thinkingBlocks = [
142
+ ...(toolResult.value.thinkingBlocks || []),
143
+ ...(responseResult.value.thinkingBlocks || []),
144
+ ];
145
+ return success({
146
+ output: responseResult.value.output,
147
+ // if there were tool calls, we would have returned already, so we know these are empty
148
+ toolCalls: [],
149
+ ...(thinkingBlocks.length > 0 && { thinkingBlocks }),
150
+ usage: addTokenUsage(toolResult.value.usage, responseResult.value.usage),
151
+ cost: addCosts(toolResult.value.cost, responseResult.value.cost),
152
+ model: request.model,
153
+ });
154
+ }
155
+ async __textSync(request) {
97
156
  this.logger.debug("Sending request to Google Gemini:", JSON.stringify(request, null, 2));
98
157
  // Send the prompt as the latest message
99
158
  const result = await this.client.models.generateContent(request);
@@ -136,6 +195,13 @@ export class SmolGoogle extends BaseClient {
136
195
  if (signal) {
137
196
  request.config = { ...request.config, abortSignal: signal };
138
197
  }
198
+ const hasTools = config.tools && config.tools.length > 0;
199
+ const hasStructuredResponse = !!config.responseFormat;
200
+ if (hasTools && hasStructuredResponse) {
201
+ this.logger.debug("Gemini does not support streaming responses with both tool calls and structured response formats. Response format will be ignored.");
202
+ request.config.responseMimeType = undefined;
203
+ request.config.responseJsonSchema = undefined;
204
+ }
139
205
  this.logger.debug("Sending streaming request to Google Gemini:", JSON.stringify(request, null, 2));
140
206
  const stream = await this.client.models.generateContentStream(request);
141
207
  let content = "";
package/dist/model.js CHANGED
@@ -109,12 +109,12 @@ export class Model {
109
109
  if (!model || !isTextModel(model)) {
110
110
  return null;
111
111
  }
112
- const inputCost = round((usage.inputTokens * (model.inputTokenCost || 0)) / 1_000_000, 2);
113
- const outputCost = round((usage.outputTokens * (model.outputTokenCost || 0)) / 1_000_000, 2);
112
+ const inputCost = round((usage.inputTokens * (model.inputTokenCost || 0)) / 1_000_000, 6);
113
+ const outputCost = round((usage.outputTokens * (model.outputTokenCost || 0)) / 1_000_000, 6);
114
114
  const cachedInputCost = usage.cachedInputTokens && model.cachedInputTokenCost
115
- ? round((usage.cachedInputTokens * model.cachedInputTokenCost) / 1_000_000, 2)
115
+ ? round((usage.cachedInputTokens * model.cachedInputTokenCost) / 1_000_000, 6)
116
116
  : undefined;
117
- const totalCost = round(inputCost + outputCost + (cachedInputCost || 0), 2);
117
+ const totalCost = round(inputCost + outputCost + (cachedInputCost || 0), 6);
118
118
  return {
119
119
  inputCost,
120
120
  outputCost,
package/dist/models.d.ts CHANGED
@@ -24,6 +24,18 @@ export type TextModel = BaseModel & {
24
24
  maxInputTokens: number;
25
25
  maxOutputTokens: number;
26
26
  outputTokensPerSecond?: number;
27
+ reasoning?: {
28
+ /** Available effort/thinking levels (provider-specific). Omit for budget-based thinking (Anthropic, Gemini 2.5). */
29
+ levels?: readonly string[];
30
+ /** Default reasoning level */
31
+ defaultLevel?: string;
32
+ /** Whether reasoning/thinking can be fully disabled */
33
+ canDisable?: boolean;
34
+ /** Whether the response includes visible thinking content (thinking blocks/parts) */
35
+ outputsThinking?: boolean;
36
+ /** Whether cryptographic thinking signatures are returned for round-tripping */
37
+ outputsSignatures?: boolean;
38
+ };
27
39
  };
28
40
  export type EmbeddingsModel = {
29
41
  type: "embeddings";
@@ -73,6 +85,13 @@ export declare const textModels: readonly [{
73
85
  readonly cachedInputTokenCost: 0.5;
74
86
  readonly outputTokenCost: 8;
75
87
  readonly outputTokensPerSecond: 94;
88
+ readonly reasoning: {
89
+ readonly levels: readonly ["low", "medium", "high"];
90
+ readonly defaultLevel: "medium";
91
+ readonly canDisable: false;
92
+ readonly outputsThinking: false;
93
+ readonly outputsSignatures: false;
94
+ };
76
95
  readonly provider: "openai";
77
96
  }, {
78
97
  readonly type: "text";
@@ -84,6 +103,13 @@ export declare const textModels: readonly [{
84
103
  readonly cachedInputTokenCost: 0.55;
85
104
  readonly outputTokenCost: 4.4;
86
105
  readonly outputTokensPerSecond: 214;
106
+ readonly reasoning: {
107
+ readonly levels: readonly ["low", "medium", "high"];
108
+ readonly defaultLevel: "medium";
109
+ readonly canDisable: false;
110
+ readonly outputsThinking: false;
111
+ readonly outputsSignatures: false;
112
+ };
87
113
  readonly provider: "openai";
88
114
  }, {
89
115
  readonly type: "text";
@@ -95,6 +121,13 @@ export declare const textModels: readonly [{
95
121
  readonly cachedInputTokenCost: 0.275;
96
122
  readonly outputTokenCost: 4.4;
97
123
  readonly outputTokensPerSecond: 135;
124
+ readonly reasoning: {
125
+ readonly levels: readonly ["low", "medium", "high"];
126
+ readonly defaultLevel: "medium";
127
+ readonly canDisable: false;
128
+ readonly outputsThinking: false;
129
+ readonly outputsSignatures: false;
130
+ };
98
131
  readonly provider: "openai";
99
132
  }, {
100
133
  readonly type: "text";
@@ -104,6 +137,11 @@ export declare const textModels: readonly [{
104
137
  readonly maxOutputTokens: 100000;
105
138
  readonly inputTokenCost: 20;
106
139
  readonly outputTokenCost: 80;
140
+ readonly reasoning: {
141
+ readonly canDisable: false;
142
+ readonly outputsThinking: false;
143
+ readonly outputsSignatures: false;
144
+ };
107
145
  readonly provider: "openai";
108
146
  }, {
109
147
  readonly type: "text";
@@ -115,6 +153,13 @@ export declare const textModels: readonly [{
115
153
  readonly cachedInputTokenCost: 7.5;
116
154
  readonly outputTokenCost: 60;
117
155
  readonly outputTokensPerSecond: 100;
156
+ readonly reasoning: {
157
+ readonly levels: readonly ["low", "medium", "high"];
158
+ readonly defaultLevel: "medium";
159
+ readonly canDisable: false;
160
+ readonly outputsThinking: false;
161
+ readonly outputsSignatures: false;
162
+ };
118
163
  readonly provider: "openai";
119
164
  }, {
120
165
  readonly type: "text";
@@ -179,6 +224,128 @@ export declare const textModels: readonly [{
179
224
  readonly outputTokenCost: 0.4;
180
225
  readonly outputTokensPerSecond: 142;
181
226
  readonly provider: "openai";
227
+ }, {
228
+ readonly type: "text";
229
+ readonly modelName: "gpt-5";
230
+ readonly description: "GPT-5 is a frontier reasoning model with 400K context window. Supports reasoning tokens. Knowledge cutoff: September 2024.";
231
+ readonly maxInputTokens: 400000;
232
+ readonly maxOutputTokens: 128000;
233
+ readonly inputTokenCost: 1.25;
234
+ readonly cachedInputTokenCost: 0.125;
235
+ readonly outputTokenCost: 10;
236
+ readonly outputTokensPerSecond: 72;
237
+ readonly reasoning: {
238
+ readonly levels: readonly ["minimal", "low", "medium", "high"];
239
+ readonly defaultLevel: "medium";
240
+ readonly canDisable: false;
241
+ readonly outputsThinking: false;
242
+ readonly outputsSignatures: false;
243
+ };
244
+ readonly provider: "openai";
245
+ }, {
246
+ readonly type: "text";
247
+ readonly modelName: "gpt-5-mini";
248
+ readonly description: "GPT-5 mini is a faster, more cost-efficient version of GPT-5 with 400K context window. Knowledge cutoff: May 2024.";
249
+ readonly maxInputTokens: 400000;
250
+ readonly maxOutputTokens: 128000;
251
+ readonly inputTokenCost: 0.25;
252
+ readonly cachedInputTokenCost: 0.025;
253
+ readonly outputTokenCost: 2;
254
+ readonly outputTokensPerSecond: 69;
255
+ readonly reasoning: {
256
+ readonly levels: readonly ["minimal", "low", "medium", "high"];
257
+ readonly defaultLevel: "medium";
258
+ readonly canDisable: false;
259
+ readonly outputsThinking: false;
260
+ readonly outputsSignatures: false;
261
+ };
262
+ readonly provider: "openai";
263
+ }, {
264
+ readonly type: "text";
265
+ readonly modelName: "gpt-5-nano";
266
+ readonly description: "GPT-5 nano is the fastest and most affordable GPT-5 variant with 400K context window. Knowledge cutoff: May 2024.";
267
+ readonly maxInputTokens: 400000;
268
+ readonly maxOutputTokens: 128000;
269
+ readonly inputTokenCost: 0.05;
270
+ readonly cachedInputTokenCost: 0.005;
271
+ readonly outputTokenCost: 0.4;
272
+ readonly outputTokensPerSecond: 140;
273
+ readonly reasoning: {
274
+ readonly levels: readonly ["minimal", "low", "medium", "high"];
275
+ readonly defaultLevel: "medium";
276
+ readonly canDisable: false;
277
+ readonly outputsThinking: false;
278
+ readonly outputsSignatures: false;
279
+ };
280
+ readonly provider: "openai";
281
+ }, {
282
+ readonly type: "text";
283
+ readonly modelName: "gpt-5.1";
284
+ readonly description: "GPT-5.1 is the flagship model for coding and agentic tasks with configurable reasoning effort. 400K context window. Knowledge cutoff: September 2024.";
285
+ readonly maxInputTokens: 400000;
286
+ readonly maxOutputTokens: 128000;
287
+ readonly inputTokenCost: 1.25;
288
+ readonly cachedInputTokenCost: 0.125;
289
+ readonly outputTokenCost: 10;
290
+ readonly reasoning: {
291
+ readonly levels: readonly ["none", "low", "medium", "high"];
292
+ readonly defaultLevel: "none";
293
+ readonly canDisable: true;
294
+ readonly outputsThinking: false;
295
+ readonly outputsSignatures: false;
296
+ };
297
+ readonly provider: "openai";
298
+ }, {
299
+ readonly type: "text";
300
+ readonly modelName: "gpt-5.2";
301
+ readonly description: "GPT-5.2 is the flagship model for coding and agentic tasks across industries. 400K context window. Knowledge cutoff: August 2025.";
302
+ readonly maxInputTokens: 400000;
303
+ readonly maxOutputTokens: 128000;
304
+ readonly inputTokenCost: 1.75;
305
+ readonly cachedInputTokenCost: 0.175;
306
+ readonly outputTokenCost: 14;
307
+ readonly outputTokensPerSecond: 61;
308
+ readonly reasoning: {
309
+ readonly levels: readonly ["none", "low", "medium", "high"];
310
+ readonly defaultLevel: "none";
311
+ readonly canDisable: true;
312
+ readonly outputsThinking: false;
313
+ readonly outputsSignatures: false;
314
+ };
315
+ readonly provider: "openai";
316
+ }, {
317
+ readonly type: "text";
318
+ readonly modelName: "gpt-5.4";
319
+ readonly description: "GPT-5.4 is the most capable and efficient frontier model for complex professional work. 1M context window, state-of-the-art coding and tool use. Standard pricing for ≤272K tokens, 2x input/1.5x output for >272K. Knowledge cutoff: August 2025.";
320
+ readonly maxInputTokens: 1050000;
321
+ readonly maxOutputTokens: 128000;
322
+ readonly inputTokenCost: 2.5;
323
+ readonly cachedInputTokenCost: 0.25;
324
+ readonly outputTokenCost: 15;
325
+ readonly reasoning: {
326
+ readonly levels: readonly ["none", "low", "medium", "high", "xhigh"];
327
+ readonly defaultLevel: "none";
328
+ readonly canDisable: true;
329
+ readonly outputsThinking: false;
330
+ readonly outputsSignatures: false;
331
+ };
332
+ readonly provider: "openai";
333
+ }, {
334
+ readonly type: "text";
335
+ readonly modelName: "gpt-5.4-pro";
336
+ readonly description: "GPT-5.4 Pro uses more compute for complex reasoning tasks. 1M context window. Standard pricing for ≤272K tokens. Knowledge cutoff: August 2025.";
337
+ readonly maxInputTokens: 1050000;
338
+ readonly maxOutputTokens: 128000;
339
+ readonly inputTokenCost: 30;
340
+ readonly outputTokenCost: 180;
341
+ readonly reasoning: {
342
+ readonly levels: readonly ["medium", "high", "xhigh"];
343
+ readonly defaultLevel: "medium";
344
+ readonly canDisable: false;
345
+ readonly outputsThinking: false;
346
+ readonly outputsSignatures: false;
347
+ };
348
+ readonly provider: "openai";
182
349
  }, {
183
350
  readonly type: "text";
184
351
  readonly modelName: "gemini-3.1-pro-preview";
@@ -188,15 +355,23 @@ export declare const textModels: readonly [{
188
355
  readonly inputTokenCost: 2;
189
356
  readonly outputTokenCost: 12;
190
357
  readonly outputTokensPerSecond: 112;
358
+ readonly reasoning: {
359
+ readonly levels: readonly ["low", "medium", "high"];
360
+ readonly defaultLevel: "high";
361
+ readonly canDisable: false;
362
+ readonly outputsThinking: true;
363
+ readonly outputsSignatures: true;
364
+ };
191
365
  readonly provider: "google";
192
366
  }, {
193
367
  readonly type: "text";
194
368
  readonly modelName: "gemini-3-pro-preview";
195
- readonly description: "Gemini 3 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Superseded by 3.1 Pro.";
369
+ readonly description: "DEPRECATED: Shut down March 9, 2026. Use gemini-3.1-pro-preview instead.";
196
370
  readonly maxInputTokens: 1048576;
197
371
  readonly maxOutputTokens: 65536;
198
372
  readonly inputTokenCost: 2;
199
373
  readonly outputTokenCost: 12;
374
+ readonly disabled: true;
200
375
  readonly provider: "google";
201
376
  }, {
202
377
  readonly type: "text";
@@ -206,6 +381,31 @@ export declare const textModels: readonly [{
206
381
  readonly maxOutputTokens: 65536;
207
382
  readonly inputTokenCost: 0.5;
208
383
  readonly outputTokenCost: 3;
384
+ readonly outputTokensPerSecond: 146;
385
+ readonly reasoning: {
386
+ readonly levels: readonly ["minimal", "low", "medium", "high"];
387
+ readonly defaultLevel: "high";
388
+ readonly canDisable: false;
389
+ readonly outputsThinking: true;
390
+ readonly outputsSignatures: true;
391
+ };
392
+ readonly provider: "google";
393
+ }, {
394
+ readonly type: "text";
395
+ readonly modelName: "gemini-3.1-flash-lite-preview";
396
+ readonly description: "Most cost-effective Gemini 3.1 model with thinking support and 1M context window. 2.5x faster TTFA and 45% faster output than 2.5 Flash. Released March 2026.";
397
+ readonly maxInputTokens: 1048576;
398
+ readonly maxOutputTokens: 65536;
399
+ readonly inputTokenCost: 0.25;
400
+ readonly outputTokenCost: 1.5;
401
+ readonly outputTokensPerSecond: 379;
402
+ readonly reasoning: {
403
+ readonly levels: readonly ["minimal", "low", "medium", "high"];
404
+ readonly defaultLevel: "minimal";
405
+ readonly canDisable: false;
406
+ readonly outputsThinking: true;
407
+ readonly outputsSignatures: true;
408
+ };
209
409
  readonly provider: "google";
210
410
  }, {
211
411
  readonly type: "text";
@@ -215,7 +415,12 @@ export declare const textModels: readonly [{
215
415
  readonly maxOutputTokens: 65536;
216
416
  readonly inputTokenCost: 1.25;
217
417
  readonly outputTokenCost: 10;
218
- readonly outputTokensPerSecond: 175;
418
+ readonly outputTokensPerSecond: 134;
419
+ readonly reasoning: {
420
+ readonly canDisable: false;
421
+ readonly outputsThinking: true;
422
+ readonly outputsSignatures: true;
423
+ };
219
424
  readonly provider: "google";
220
425
  }, {
221
426
  readonly type: "text";
@@ -225,7 +430,12 @@ export declare const textModels: readonly [{
225
430
  readonly maxOutputTokens: 65536;
226
431
  readonly inputTokenCost: 0.3;
227
432
  readonly outputTokenCost: 2.5;
228
- readonly outputTokensPerSecond: 225;
433
+ readonly outputTokensPerSecond: 245;
434
+ readonly reasoning: {
435
+ readonly canDisable: true;
436
+ readonly outputsThinking: true;
437
+ readonly outputsSignatures: true;
438
+ };
229
439
  readonly provider: "google";
230
440
  }, {
231
441
  readonly type: "text";
@@ -236,6 +446,11 @@ export declare const textModels: readonly [{
236
446
  readonly inputTokenCost: 0.1;
237
447
  readonly outputTokenCost: 0.4;
238
448
  readonly outputTokensPerSecond: 400;
449
+ readonly reasoning: {
450
+ readonly canDisable: true;
451
+ readonly outputsThinking: true;
452
+ readonly outputsSignatures: false;
453
+ };
239
454
  readonly provider: "google";
240
455
  }, {
241
456
  readonly type: "text";
@@ -306,20 +521,33 @@ export declare const textModels: readonly [{
306
521
  }, {
307
522
  readonly type: "text";
308
523
  readonly modelName: "claude-opus-4-6";
309
- readonly description: "The most intelligent Claude model for building agents and coding. 200K context window, 128K max output.";
524
+ readonly description: "The most intelligent Claude model for building agents and coding. 200K context window (1M in beta), 128K max output.";
310
525
  readonly maxInputTokens: 200000;
311
526
  readonly maxOutputTokens: 131072;
312
527
  readonly inputTokenCost: 5;
528
+ readonly cachedInputTokenCost: 0.5;
313
529
  readonly outputTokenCost: 25;
530
+ readonly outputTokensPerSecond: 53;
531
+ readonly reasoning: {
532
+ readonly canDisable: true;
533
+ readonly outputsThinking: true;
534
+ readonly outputsSignatures: true;
535
+ };
314
536
  readonly provider: "anthropic";
315
537
  }, {
316
538
  readonly type: "text";
317
539
  readonly modelName: "claude-sonnet-4-6";
318
- readonly description: "The best combination of speed and intelligence. 200K context window, 64K max output.";
540
+ readonly description: "The best combination of speed and intelligence. 200K context window (1M in beta), 64K max output.";
319
541
  readonly maxInputTokens: 200000;
320
542
  readonly maxOutputTokens: 64000;
321
543
  readonly inputTokenCost: 3;
544
+ readonly cachedInputTokenCost: 0.3;
322
545
  readonly outputTokenCost: 15;
546
+ readonly reasoning: {
547
+ readonly canDisable: true;
548
+ readonly outputsThinking: true;
549
+ readonly outputsSignatures: true;
550
+ };
323
551
  readonly provider: "anthropic";
324
552
  }, {
325
553
  readonly type: "text";
@@ -328,7 +556,14 @@ export declare const textModels: readonly [{
328
556
  readonly maxInputTokens: 200000;
329
557
  readonly maxOutputTokens: 64000;
330
558
  readonly inputTokenCost: 1;
559
+ readonly cachedInputTokenCost: 0.1;
331
560
  readonly outputTokenCost: 5;
561
+ readonly outputTokensPerSecond: 97;
562
+ readonly reasoning: {
563
+ readonly canDisable: true;
564
+ readonly outputsThinking: true;
565
+ readonly outputsSignatures: true;
566
+ };
332
567
  readonly provider: "anthropic";
333
568
  }, {
334
569
  readonly type: "text";
@@ -339,6 +574,11 @@ export declare const textModels: readonly [{
339
574
  readonly inputTokenCost: 3;
340
575
  readonly outputTokenCost: 15;
341
576
  readonly outputTokensPerSecond: 78;
577
+ readonly reasoning: {
578
+ readonly canDisable: true;
579
+ readonly outputsThinking: true;
580
+ readonly outputsSignatures: true;
581
+ };
342
582
  readonly disabled: true;
343
583
  readonly provider: "anthropic";
344
584
  }, {
@@ -465,6 +705,13 @@ export declare function getModel(modelName: ModelName): {
465
705
  readonly cachedInputTokenCost: 0.5;
466
706
  readonly outputTokenCost: 8;
467
707
  readonly outputTokensPerSecond: 94;
708
+ readonly reasoning: {
709
+ readonly levels: readonly ["low", "medium", "high"];
710
+ readonly defaultLevel: "medium";
711
+ readonly canDisable: false;
712
+ readonly outputsThinking: false;
713
+ readonly outputsSignatures: false;
714
+ };
468
715
  readonly provider: "openai";
469
716
  } | {
470
717
  readonly type: "text";
@@ -476,6 +723,13 @@ export declare function getModel(modelName: ModelName): {
476
723
  readonly cachedInputTokenCost: 0.55;
477
724
  readonly outputTokenCost: 4.4;
478
725
  readonly outputTokensPerSecond: 214;
726
+ readonly reasoning: {
727
+ readonly levels: readonly ["low", "medium", "high"];
728
+ readonly defaultLevel: "medium";
729
+ readonly canDisable: false;
730
+ readonly outputsThinking: false;
731
+ readonly outputsSignatures: false;
732
+ };
479
733
  readonly provider: "openai";
480
734
  } | {
481
735
  readonly type: "text";
@@ -487,6 +741,13 @@ export declare function getModel(modelName: ModelName): {
487
741
  readonly cachedInputTokenCost: 0.275;
488
742
  readonly outputTokenCost: 4.4;
489
743
  readonly outputTokensPerSecond: 135;
744
+ readonly reasoning: {
745
+ readonly levels: readonly ["low", "medium", "high"];
746
+ readonly defaultLevel: "medium";
747
+ readonly canDisable: false;
748
+ readonly outputsThinking: false;
749
+ readonly outputsSignatures: false;
750
+ };
490
751
  readonly provider: "openai";
491
752
  } | {
492
753
  readonly type: "text";
@@ -496,6 +757,11 @@ export declare function getModel(modelName: ModelName): {
496
757
  readonly maxOutputTokens: 100000;
497
758
  readonly inputTokenCost: 20;
498
759
  readonly outputTokenCost: 80;
760
+ readonly reasoning: {
761
+ readonly canDisable: false;
762
+ readonly outputsThinking: false;
763
+ readonly outputsSignatures: false;
764
+ };
499
765
  readonly provider: "openai";
500
766
  } | {
501
767
  readonly type: "text";
@@ -507,6 +773,13 @@ export declare function getModel(modelName: ModelName): {
507
773
  readonly cachedInputTokenCost: 7.5;
508
774
  readonly outputTokenCost: 60;
509
775
  readonly outputTokensPerSecond: 100;
776
+ readonly reasoning: {
777
+ readonly levels: readonly ["low", "medium", "high"];
778
+ readonly defaultLevel: "medium";
779
+ readonly canDisable: false;
780
+ readonly outputsThinking: false;
781
+ readonly outputsSignatures: false;
782
+ };
510
783
  readonly provider: "openai";
511
784
  } | {
512
785
  readonly type: "text";
@@ -571,6 +844,128 @@ export declare function getModel(modelName: ModelName): {
571
844
  readonly outputTokenCost: 0.4;
572
845
  readonly outputTokensPerSecond: 142;
573
846
  readonly provider: "openai";
847
+ } | {
848
+ readonly type: "text";
849
+ readonly modelName: "gpt-5";
850
+ readonly description: "GPT-5 is a frontier reasoning model with 400K context window. Supports reasoning tokens. Knowledge cutoff: September 2024.";
851
+ readonly maxInputTokens: 400000;
852
+ readonly maxOutputTokens: 128000;
853
+ readonly inputTokenCost: 1.25;
854
+ readonly cachedInputTokenCost: 0.125;
855
+ readonly outputTokenCost: 10;
856
+ readonly outputTokensPerSecond: 72;
857
+ readonly reasoning: {
858
+ readonly levels: readonly ["minimal", "low", "medium", "high"];
859
+ readonly defaultLevel: "medium";
860
+ readonly canDisable: false;
861
+ readonly outputsThinking: false;
862
+ readonly outputsSignatures: false;
863
+ };
864
+ readonly provider: "openai";
865
+ } | {
866
+ readonly type: "text";
867
+ readonly modelName: "gpt-5-mini";
868
+ readonly description: "GPT-5 mini is a faster, more cost-efficient version of GPT-5 with 400K context window. Knowledge cutoff: May 2024.";
869
+ readonly maxInputTokens: 400000;
870
+ readonly maxOutputTokens: 128000;
871
+ readonly inputTokenCost: 0.25;
872
+ readonly cachedInputTokenCost: 0.025;
873
+ readonly outputTokenCost: 2;
874
+ readonly outputTokensPerSecond: 69;
875
+ readonly reasoning: {
876
+ readonly levels: readonly ["minimal", "low", "medium", "high"];
877
+ readonly defaultLevel: "medium";
878
+ readonly canDisable: false;
879
+ readonly outputsThinking: false;
880
+ readonly outputsSignatures: false;
881
+ };
882
+ readonly provider: "openai";
883
+ } | {
884
+ readonly type: "text";
885
+ readonly modelName: "gpt-5-nano";
886
+ readonly description: "GPT-5 nano is the fastest and most affordable GPT-5 variant with 400K context window. Knowledge cutoff: May 2024.";
887
+ readonly maxInputTokens: 400000;
888
+ readonly maxOutputTokens: 128000;
889
+ readonly inputTokenCost: 0.05;
890
+ readonly cachedInputTokenCost: 0.005;
891
+ readonly outputTokenCost: 0.4;
892
+ readonly outputTokensPerSecond: 140;
893
+ readonly reasoning: {
894
+ readonly levels: readonly ["minimal", "low", "medium", "high"];
895
+ readonly defaultLevel: "medium";
896
+ readonly canDisable: false;
897
+ readonly outputsThinking: false;
898
+ readonly outputsSignatures: false;
899
+ };
900
+ readonly provider: "openai";
901
+ } | {
902
+ readonly type: "text";
903
+ readonly modelName: "gpt-5.1";
904
+ readonly description: "GPT-5.1 is the flagship model for coding and agentic tasks with configurable reasoning effort. 400K context window. Knowledge cutoff: September 2024.";
905
+ readonly maxInputTokens: 400000;
906
+ readonly maxOutputTokens: 128000;
907
+ readonly inputTokenCost: 1.25;
908
+ readonly cachedInputTokenCost: 0.125;
909
+ readonly outputTokenCost: 10;
910
+ readonly reasoning: {
911
+ readonly levels: readonly ["none", "low", "medium", "high"];
912
+ readonly defaultLevel: "none";
913
+ readonly canDisable: true;
914
+ readonly outputsThinking: false;
915
+ readonly outputsSignatures: false;
916
+ };
917
+ readonly provider: "openai";
918
+ } | {
919
+ readonly type: "text";
920
+ readonly modelName: "gpt-5.2";
921
+ readonly description: "GPT-5.2 is the flagship model for coding and agentic tasks across industries. 400K context window. Knowledge cutoff: August 2025.";
922
+ readonly maxInputTokens: 400000;
923
+ readonly maxOutputTokens: 128000;
924
+ readonly inputTokenCost: 1.75;
925
+ readonly cachedInputTokenCost: 0.175;
926
+ readonly outputTokenCost: 14;
927
+ readonly outputTokensPerSecond: 61;
928
+ readonly reasoning: {
929
+ readonly levels: readonly ["none", "low", "medium", "high"];
930
+ readonly defaultLevel: "none";
931
+ readonly canDisable: true;
932
+ readonly outputsThinking: false;
933
+ readonly outputsSignatures: false;
934
+ };
935
+ readonly provider: "openai";
936
+ } | {
937
+ readonly type: "text";
938
+ readonly modelName: "gpt-5.4";
939
+ readonly description: "GPT-5.4 is the most capable and efficient frontier model for complex professional work. 1M context window, state-of-the-art coding and tool use. Standard pricing for ≤272K tokens, 2x input/1.5x output for >272K. Knowledge cutoff: August 2025.";
940
+ readonly maxInputTokens: 1050000;
941
+ readonly maxOutputTokens: 128000;
942
+ readonly inputTokenCost: 2.5;
943
+ readonly cachedInputTokenCost: 0.25;
944
+ readonly outputTokenCost: 15;
945
+ readonly reasoning: {
946
+ readonly levels: readonly ["none", "low", "medium", "high", "xhigh"];
947
+ readonly defaultLevel: "none";
948
+ readonly canDisable: true;
949
+ readonly outputsThinking: false;
950
+ readonly outputsSignatures: false;
951
+ };
952
+ readonly provider: "openai";
953
+ } | {
954
+ readonly type: "text";
955
+ readonly modelName: "gpt-5.4-pro";
956
+ readonly description: "GPT-5.4 Pro uses more compute for complex reasoning tasks. 1M context window. Standard pricing for ≤272K tokens. Knowledge cutoff: August 2025.";
957
+ readonly maxInputTokens: 1050000;
958
+ readonly maxOutputTokens: 128000;
959
+ readonly inputTokenCost: 30;
960
+ readonly outputTokenCost: 180;
961
+ readonly reasoning: {
962
+ readonly levels: readonly ["medium", "high", "xhigh"];
963
+ readonly defaultLevel: "medium";
964
+ readonly canDisable: false;
965
+ readonly outputsThinking: false;
966
+ readonly outputsSignatures: false;
967
+ };
968
+ readonly provider: "openai";
574
969
  } | {
575
970
  readonly type: "text";
576
971
  readonly modelName: "gemini-3.1-pro-preview";
@@ -580,15 +975,23 @@ export declare function getModel(modelName: ModelName): {
580
975
  readonly inputTokenCost: 2;
581
976
  readonly outputTokenCost: 12;
582
977
  readonly outputTokensPerSecond: 112;
978
+ readonly reasoning: {
979
+ readonly levels: readonly ["low", "medium", "high"];
980
+ readonly defaultLevel: "high";
981
+ readonly canDisable: false;
982
+ readonly outputsThinking: true;
983
+ readonly outputsSignatures: true;
984
+ };
583
985
  readonly provider: "google";
584
986
  } | {
585
987
  readonly type: "text";
586
988
  readonly modelName: "gemini-3-pro-preview";
587
- readonly description: "Gemini 3 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Superseded by 3.1 Pro.";
989
+ readonly description: "DEPRECATED: Shut down March 9, 2026. Use gemini-3.1-pro-preview instead.";
588
990
  readonly maxInputTokens: 1048576;
589
991
  readonly maxOutputTokens: 65536;
590
992
  readonly inputTokenCost: 2;
591
993
  readonly outputTokenCost: 12;
994
+ readonly disabled: true;
592
995
  readonly provider: "google";
593
996
  } | {
594
997
  readonly type: "text";
@@ -598,6 +1001,31 @@ export declare function getModel(modelName: ModelName): {
598
1001
  readonly maxOutputTokens: 65536;
599
1002
  readonly inputTokenCost: 0.5;
600
1003
  readonly outputTokenCost: 3;
1004
+ readonly outputTokensPerSecond: 146;
1005
+ readonly reasoning: {
1006
+ readonly levels: readonly ["minimal", "low", "medium", "high"];
1007
+ readonly defaultLevel: "high";
1008
+ readonly canDisable: false;
1009
+ readonly outputsThinking: true;
1010
+ readonly outputsSignatures: true;
1011
+ };
1012
+ readonly provider: "google";
1013
+ } | {
1014
+ readonly type: "text";
1015
+ readonly modelName: "gemini-3.1-flash-lite-preview";
1016
+ readonly description: "Most cost-effective Gemini 3.1 model with thinking support and 1M context window. 2.5x faster TTFA and 45% faster output than 2.5 Flash. Released March 2026.";
1017
+ readonly maxInputTokens: 1048576;
1018
+ readonly maxOutputTokens: 65536;
1019
+ readonly inputTokenCost: 0.25;
1020
+ readonly outputTokenCost: 1.5;
1021
+ readonly outputTokensPerSecond: 379;
1022
+ readonly reasoning: {
1023
+ readonly levels: readonly ["minimal", "low", "medium", "high"];
1024
+ readonly defaultLevel: "minimal";
1025
+ readonly canDisable: false;
1026
+ readonly outputsThinking: true;
1027
+ readonly outputsSignatures: true;
1028
+ };
601
1029
  readonly provider: "google";
602
1030
  } | {
603
1031
  readonly type: "text";
@@ -607,7 +1035,12 @@ export declare function getModel(modelName: ModelName): {
607
1035
  readonly maxOutputTokens: 65536;
608
1036
  readonly inputTokenCost: 1.25;
609
1037
  readonly outputTokenCost: 10;
610
- readonly outputTokensPerSecond: 175;
1038
+ readonly outputTokensPerSecond: 134;
1039
+ readonly reasoning: {
1040
+ readonly canDisable: false;
1041
+ readonly outputsThinking: true;
1042
+ readonly outputsSignatures: true;
1043
+ };
611
1044
  readonly provider: "google";
612
1045
  } | {
613
1046
  readonly type: "text";
@@ -617,7 +1050,12 @@ export declare function getModel(modelName: ModelName): {
617
1050
  readonly maxOutputTokens: 65536;
618
1051
  readonly inputTokenCost: 0.3;
619
1052
  readonly outputTokenCost: 2.5;
620
- readonly outputTokensPerSecond: 225;
1053
+ readonly outputTokensPerSecond: 245;
1054
+ readonly reasoning: {
1055
+ readonly canDisable: true;
1056
+ readonly outputsThinking: true;
1057
+ readonly outputsSignatures: true;
1058
+ };
621
1059
  readonly provider: "google";
622
1060
  } | {
623
1061
  readonly type: "text";
@@ -628,6 +1066,11 @@ export declare function getModel(modelName: ModelName): {
628
1066
  readonly inputTokenCost: 0.1;
629
1067
  readonly outputTokenCost: 0.4;
630
1068
  readonly outputTokensPerSecond: 400;
1069
+ readonly reasoning: {
1070
+ readonly canDisable: true;
1071
+ readonly outputsThinking: true;
1072
+ readonly outputsSignatures: false;
1073
+ };
631
1074
  readonly provider: "google";
632
1075
  } | {
633
1076
  readonly type: "text";
@@ -698,20 +1141,33 @@ export declare function getModel(modelName: ModelName): {
698
1141
  } | {
699
1142
  readonly type: "text";
700
1143
  readonly modelName: "claude-opus-4-6";
701
- readonly description: "The most intelligent Claude model for building agents and coding. 200K context window, 128K max output.";
1144
+ readonly description: "The most intelligent Claude model for building agents and coding. 200K context window (1M in beta), 128K max output.";
702
1145
  readonly maxInputTokens: 200000;
703
1146
  readonly maxOutputTokens: 131072;
704
1147
  readonly inputTokenCost: 5;
1148
+ readonly cachedInputTokenCost: 0.5;
705
1149
  readonly outputTokenCost: 25;
1150
+ readonly outputTokensPerSecond: 53;
1151
+ readonly reasoning: {
1152
+ readonly canDisable: true;
1153
+ readonly outputsThinking: true;
1154
+ readonly outputsSignatures: true;
1155
+ };
706
1156
  readonly provider: "anthropic";
707
1157
  } | {
708
1158
  readonly type: "text";
709
1159
  readonly modelName: "claude-sonnet-4-6";
710
- readonly description: "The best combination of speed and intelligence. 200K context window, 64K max output.";
1160
+ readonly description: "The best combination of speed and intelligence. 200K context window (1M in beta), 64K max output.";
711
1161
  readonly maxInputTokens: 200000;
712
1162
  readonly maxOutputTokens: 64000;
713
1163
  readonly inputTokenCost: 3;
1164
+ readonly cachedInputTokenCost: 0.3;
714
1165
  readonly outputTokenCost: 15;
1166
+ readonly reasoning: {
1167
+ readonly canDisable: true;
1168
+ readonly outputsThinking: true;
1169
+ readonly outputsSignatures: true;
1170
+ };
715
1171
  readonly provider: "anthropic";
716
1172
  } | {
717
1173
  readonly type: "text";
@@ -720,7 +1176,14 @@ export declare function getModel(modelName: ModelName): {
720
1176
  readonly maxInputTokens: 200000;
721
1177
  readonly maxOutputTokens: 64000;
722
1178
  readonly inputTokenCost: 1;
1179
+ readonly cachedInputTokenCost: 0.1;
723
1180
  readonly outputTokenCost: 5;
1181
+ readonly outputTokensPerSecond: 97;
1182
+ readonly reasoning: {
1183
+ readonly canDisable: true;
1184
+ readonly outputsThinking: true;
1185
+ readonly outputsSignatures: true;
1186
+ };
724
1187
  readonly provider: "anthropic";
725
1188
  } | {
726
1189
  readonly type: "text";
@@ -731,6 +1194,11 @@ export declare function getModel(modelName: ModelName): {
731
1194
  readonly inputTokenCost: 3;
732
1195
  readonly outputTokenCost: 15;
733
1196
  readonly outputTokensPerSecond: 78;
1197
+ readonly reasoning: {
1198
+ readonly canDisable: true;
1199
+ readonly outputsThinking: true;
1200
+ readonly outputsSignatures: true;
1201
+ };
734
1202
  readonly disabled: true;
735
1203
  readonly provider: "anthropic";
736
1204
  } | {
package/dist/models.js CHANGED
@@ -52,6 +52,13 @@ export const textModels = [
52
52
  cachedInputTokenCost: 0.5,
53
53
  outputTokenCost: 8,
54
54
  outputTokensPerSecond: 94,
55
+ reasoning: {
56
+ levels: ["low", "medium", "high"],
57
+ defaultLevel: "medium",
58
+ canDisable: false,
59
+ outputsThinking: false,
60
+ outputsSignatures: false,
61
+ },
55
62
  provider: "openai",
56
63
  },
57
64
  {
@@ -64,6 +71,13 @@ export const textModels = [
64
71
  cachedInputTokenCost: 0.55,
65
72
  outputTokenCost: 4.4,
66
73
  outputTokensPerSecond: 214,
74
+ reasoning: {
75
+ levels: ["low", "medium", "high"],
76
+ defaultLevel: "medium",
77
+ canDisable: false,
78
+ outputsThinking: false,
79
+ outputsSignatures: false,
80
+ },
67
81
  provider: "openai",
68
82
  },
69
83
  {
@@ -76,6 +90,13 @@ export const textModels = [
76
90
  cachedInputTokenCost: 0.275,
77
91
  outputTokenCost: 4.4,
78
92
  outputTokensPerSecond: 135,
93
+ reasoning: {
94
+ levels: ["low", "medium", "high"],
95
+ defaultLevel: "medium",
96
+ canDisable: false,
97
+ outputsThinking: false,
98
+ outputsSignatures: false,
99
+ },
79
100
  provider: "openai",
80
101
  },
81
102
  {
@@ -86,6 +107,11 @@ export const textModels = [
86
107
  maxOutputTokens: 100000,
87
108
  inputTokenCost: 20,
88
109
  outputTokenCost: 80,
110
+ reasoning: {
111
+ canDisable: false,
112
+ outputsThinking: false,
113
+ outputsSignatures: false,
114
+ },
89
115
  provider: "openai",
90
116
  },
91
117
  {
@@ -98,6 +124,13 @@ export const textModels = [
98
124
  cachedInputTokenCost: 7.5,
99
125
  outputTokenCost: 60,
100
126
  outputTokensPerSecond: 100,
127
+ reasoning: {
128
+ levels: ["low", "medium", "high"],
129
+ defaultLevel: "medium",
130
+ canDisable: false,
131
+ outputsThinking: false,
132
+ outputsSignatures: false,
133
+ },
101
134
  provider: "openai",
102
135
  },
103
136
  {
@@ -169,6 +202,135 @@ export const textModels = [
169
202
  outputTokensPerSecond: 142,
170
203
  provider: "openai",
171
204
  },
205
+ {
206
+ type: "text",
207
+ modelName: "gpt-5",
208
+ description: "GPT-5 is a frontier reasoning model with 400K context window. Supports reasoning tokens. Knowledge cutoff: September 2024.",
209
+ maxInputTokens: 400000,
210
+ maxOutputTokens: 128000,
211
+ inputTokenCost: 1.25,
212
+ cachedInputTokenCost: 0.125,
213
+ outputTokenCost: 10,
214
+ outputTokensPerSecond: 72,
215
+ reasoning: {
216
+ levels: ["minimal", "low", "medium", "high"],
217
+ defaultLevel: "medium",
218
+ canDisable: false,
219
+ outputsThinking: false,
220
+ outputsSignatures: false,
221
+ },
222
+ provider: "openai",
223
+ },
224
+ {
225
+ type: "text",
226
+ modelName: "gpt-5-mini",
227
+ description: "GPT-5 mini is a faster, more cost-efficient version of GPT-5 with 400K context window. Knowledge cutoff: May 2024.",
228
+ maxInputTokens: 400000,
229
+ maxOutputTokens: 128000,
230
+ inputTokenCost: 0.25,
231
+ cachedInputTokenCost: 0.025,
232
+ outputTokenCost: 2,
233
+ outputTokensPerSecond: 69,
234
+ reasoning: {
235
+ levels: ["minimal", "low", "medium", "high"],
236
+ defaultLevel: "medium",
237
+ canDisable: false,
238
+ outputsThinking: false,
239
+ outputsSignatures: false,
240
+ },
241
+ provider: "openai",
242
+ },
243
+ {
244
+ type: "text",
245
+ modelName: "gpt-5-nano",
246
+ description: "GPT-5 nano is the fastest and most affordable GPT-5 variant with 400K context window. Knowledge cutoff: May 2024.",
247
+ maxInputTokens: 400000,
248
+ maxOutputTokens: 128000,
249
+ inputTokenCost: 0.05,
250
+ cachedInputTokenCost: 0.005,
251
+ outputTokenCost: 0.4,
252
+ outputTokensPerSecond: 140,
253
+ reasoning: {
254
+ levels: ["minimal", "low", "medium", "high"],
255
+ defaultLevel: "medium",
256
+ canDisable: false,
257
+ outputsThinking: false,
258
+ outputsSignatures: false,
259
+ },
260
+ provider: "openai",
261
+ },
262
+ {
263
+ type: "text",
264
+ modelName: "gpt-5.1",
265
+ description: "GPT-5.1 is the flagship model for coding and agentic tasks with configurable reasoning effort. 400K context window. Knowledge cutoff: September 2024.",
266
+ maxInputTokens: 400000,
267
+ maxOutputTokens: 128000,
268
+ inputTokenCost: 1.25,
269
+ cachedInputTokenCost: 0.125,
270
+ outputTokenCost: 10,
271
+ reasoning: {
272
+ levels: ["none", "low", "medium", "high"],
273
+ defaultLevel: "none",
274
+ canDisable: true,
275
+ outputsThinking: false,
276
+ outputsSignatures: false,
277
+ },
278
+ provider: "openai",
279
+ },
280
+ {
281
+ type: "text",
282
+ modelName: "gpt-5.2",
283
+ description: "GPT-5.2 is the flagship model for coding and agentic tasks across industries. 400K context window. Knowledge cutoff: August 2025.",
284
+ maxInputTokens: 400000,
285
+ maxOutputTokens: 128000,
286
+ inputTokenCost: 1.75,
287
+ cachedInputTokenCost: 0.175,
288
+ outputTokenCost: 14,
289
+ outputTokensPerSecond: 61,
290
+ reasoning: {
291
+ levels: ["none", "low", "medium", "high"],
292
+ defaultLevel: "none",
293
+ canDisable: true,
294
+ outputsThinking: false,
295
+ outputsSignatures: false,
296
+ },
297
+ provider: "openai",
298
+ },
299
+ {
300
+ type: "text",
301
+ modelName: "gpt-5.4",
302
+ description: "GPT-5.4 is the most capable and efficient frontier model for complex professional work. 1M context window, state-of-the-art coding and tool use. Standard pricing for ≤272K tokens, 2x input/1.5x output for >272K. Knowledge cutoff: August 2025.",
303
+ maxInputTokens: 1_050_000,
304
+ maxOutputTokens: 128000,
305
+ inputTokenCost: 2.5,
306
+ cachedInputTokenCost: 0.25,
307
+ outputTokenCost: 15,
308
+ reasoning: {
309
+ levels: ["none", "low", "medium", "high", "xhigh"],
310
+ defaultLevel: "none",
311
+ canDisable: true,
312
+ outputsThinking: false,
313
+ outputsSignatures: false,
314
+ },
315
+ provider: "openai",
316
+ },
317
+ {
318
+ type: "text",
319
+ modelName: "gpt-5.4-pro",
320
+ description: "GPT-5.4 Pro uses more compute for complex reasoning tasks. 1M context window. Standard pricing for ≤272K tokens. Knowledge cutoff: August 2025.",
321
+ maxInputTokens: 1_050_000,
322
+ maxOutputTokens: 128000,
323
+ inputTokenCost: 30,
324
+ outputTokenCost: 180,
325
+ reasoning: {
326
+ levels: ["medium", "high", "xhigh"],
327
+ defaultLevel: "medium",
328
+ canDisable: false,
329
+ outputsThinking: false,
330
+ outputsSignatures: false,
331
+ },
332
+ provider: "openai",
333
+ },
172
334
  {
173
335
  type: "text",
174
336
  modelName: "gemini-3.1-pro-preview",
@@ -178,16 +340,24 @@ export const textModels = [
178
340
  inputTokenCost: 2.0,
179
341
  outputTokenCost: 12.0,
180
342
  outputTokensPerSecond: 112,
343
+ reasoning: {
344
+ levels: ["low", "medium", "high"],
345
+ defaultLevel: "high",
346
+ canDisable: false,
347
+ outputsThinking: true,
348
+ outputsSignatures: true,
349
+ },
181
350
  provider: "google",
182
351
  },
183
352
  {
184
353
  type: "text",
185
354
  modelName: "gemini-3-pro-preview",
186
- description: "Gemini 3 Pro with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Superseded by 3.1 Pro.",
355
+ description: "DEPRECATED: Shut down March 9, 2026. Use gemini-3.1-pro-preview instead.",
187
356
  maxInputTokens: 1_048_576,
188
357
  maxOutputTokens: 65536,
189
358
  inputTokenCost: 2.0,
190
359
  outputTokenCost: 12.0,
360
+ disabled: true,
191
361
  provider: "google",
192
362
  },
193
363
  {
@@ -198,6 +368,32 @@ export const textModels = [
198
368
  maxOutputTokens: 65536,
199
369
  inputTokenCost: 0.5,
200
370
  outputTokenCost: 3.0,
371
+ outputTokensPerSecond: 146,
372
+ reasoning: {
373
+ levels: ["minimal", "low", "medium", "high"],
374
+ defaultLevel: "high",
375
+ canDisable: false,
376
+ outputsThinking: true,
377
+ outputsSignatures: true,
378
+ },
379
+ provider: "google",
380
+ },
381
+ {
382
+ type: "text",
383
+ modelName: "gemini-3.1-flash-lite-preview",
384
+ description: "Most cost-effective Gemini 3.1 model with thinking support and 1M context window. 2.5x faster TTFA and 45% faster output than 2.5 Flash. Released March 2026.",
385
+ maxInputTokens: 1_048_576,
386
+ maxOutputTokens: 65536,
387
+ inputTokenCost: 0.25,
388
+ outputTokenCost: 1.5,
389
+ outputTokensPerSecond: 379,
390
+ reasoning: {
391
+ levels: ["minimal", "low", "medium", "high"],
392
+ defaultLevel: "minimal",
393
+ canDisable: false,
394
+ outputsThinking: true,
395
+ outputsSignatures: true,
396
+ },
201
397
  provider: "google",
202
398
  },
203
399
  {
@@ -208,7 +404,12 @@ export const textModels = [
208
404
  maxOutputTokens: 65536,
209
405
  inputTokenCost: 1.25,
210
406
  outputTokenCost: 10.0,
211
- outputTokensPerSecond: 175,
407
+ outputTokensPerSecond: 134,
408
+ reasoning: {
409
+ canDisable: false,
410
+ outputsThinking: true,
411
+ outputsSignatures: true,
412
+ },
212
413
  provider: "google",
213
414
  },
214
415
  {
@@ -219,7 +420,12 @@ export const textModels = [
219
420
  maxOutputTokens: 65536,
220
421
  inputTokenCost: 0.3,
221
422
  outputTokenCost: 2.5,
222
- outputTokensPerSecond: 225,
423
+ outputTokensPerSecond: 245,
424
+ reasoning: {
425
+ canDisable: true,
426
+ outputsThinking: true,
427
+ outputsSignatures: true,
428
+ },
223
429
  provider: "google",
224
430
  },
225
431
  {
@@ -231,6 +437,11 @@ export const textModels = [
231
437
  inputTokenCost: 0.1,
232
438
  outputTokenCost: 0.4,
233
439
  outputTokensPerSecond: 400,
440
+ reasoning: {
441
+ canDisable: true,
442
+ outputsThinking: true,
443
+ outputsSignatures: false,
444
+ },
234
445
  provider: "google",
235
446
  },
236
447
  {
@@ -308,21 +519,34 @@ export const textModels = [
308
519
  {
309
520
  type: "text",
310
521
  modelName: "claude-opus-4-6",
311
- description: "The most intelligent Claude model for building agents and coding. 200K context window, 128K max output.",
522
+ description: "The most intelligent Claude model for building agents and coding. 200K context window (1M in beta), 128K max output.",
312
523
  maxInputTokens: 200_000,
313
524
  maxOutputTokens: 131_072,
314
525
  inputTokenCost: 5,
526
+ cachedInputTokenCost: 0.5,
315
527
  outputTokenCost: 25,
528
+ outputTokensPerSecond: 53,
529
+ reasoning: {
530
+ canDisable: true,
531
+ outputsThinking: true,
532
+ outputsSignatures: true,
533
+ },
316
534
  provider: "anthropic",
317
535
  },
318
536
  {
319
537
  type: "text",
320
538
  modelName: "claude-sonnet-4-6",
321
- description: "The best combination of speed and intelligence. 200K context window, 64K max output.",
539
+ description: "The best combination of speed and intelligence. 200K context window (1M in beta), 64K max output.",
322
540
  maxInputTokens: 200_000,
323
541
  maxOutputTokens: 64_000,
324
542
  inputTokenCost: 3,
543
+ cachedInputTokenCost: 0.3,
325
544
  outputTokenCost: 15,
545
+ reasoning: {
546
+ canDisable: true,
547
+ outputsThinking: true,
548
+ outputsSignatures: true,
549
+ },
326
550
  provider: "anthropic",
327
551
  },
328
552
  {
@@ -332,7 +556,14 @@ export const textModels = [
332
556
  maxInputTokens: 200_000,
333
557
  maxOutputTokens: 64_000,
334
558
  inputTokenCost: 1,
559
+ cachedInputTokenCost: 0.1,
335
560
  outputTokenCost: 5,
561
+ outputTokensPerSecond: 97,
562
+ reasoning: {
563
+ canDisable: true,
564
+ outputsThinking: true,
565
+ outputsSignatures: true,
566
+ },
336
567
  provider: "anthropic",
337
568
  },
338
569
  {
@@ -344,6 +575,11 @@ export const textModels = [
344
575
  inputTokenCost: 3,
345
576
  outputTokenCost: 15,
346
577
  outputTokensPerSecond: 78,
578
+ reasoning: {
579
+ canDisable: true,
580
+ outputsThinking: true,
581
+ outputsSignatures: true,
582
+ },
347
583
  disabled: true,
348
584
  provider: "anthropic",
349
585
  },
package/dist/types.d.ts CHANGED
@@ -98,6 +98,8 @@ export type CostEstimate = {
98
98
  totalCost: number;
99
99
  currency: string;
100
100
  };
101
+ export declare function addTokenUsage(_a?: TokenUsage, _b?: TokenUsage): TokenUsage;
102
+ export declare function addCosts(_a?: CostEstimate, _b?: CostEstimate): CostEstimate;
101
103
  export type PromptResult = {
102
104
  output: string | null;
103
105
  toolCalls: ToolCall[];
package/dist/types.js CHANGED
@@ -1 +1,41 @@
1
1
  export * from "./types/result.js";
2
+ export function addTokenUsage(_a, _b) {
3
+ let a = _a;
4
+ let b = _b;
5
+ if (a && !b)
6
+ return a;
7
+ if (b && !a)
8
+ return b;
9
+ if (!a && !b)
10
+ return { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
11
+ a = _a;
12
+ b = _b;
13
+ return {
14
+ inputTokens: a.inputTokens + b.inputTokens,
15
+ outputTokens: a.outputTokens + b.outputTokens,
16
+ cachedInputTokens: (a.cachedInputTokens || 0) + (b.cachedInputTokens || 0),
17
+ totalTokens: (a.totalTokens || 0) + (b.totalTokens || 0),
18
+ };
19
+ }
20
+ export function addCosts(_a, _b) {
21
+ let a = _a;
22
+ let b = _b;
23
+ if (a && !b)
24
+ return a;
25
+ if (b && !a)
26
+ return b;
27
+ if (!a && !b)
28
+ return { inputCost: 0, outputCost: 0, totalCost: 0, currency: "USD" };
29
+ a = _a;
30
+ b = _b;
31
+ if (a.currency !== b.currency) {
32
+ throw new Error(`Cannot add costs with different currencies: ${a.currency} and ${b.currency}`);
33
+ }
34
+ return {
35
+ inputCost: a.inputCost + b.inputCost,
36
+ outputCost: a.outputCost + b.outputCost,
37
+ cachedInputCost: (a.cachedInputCost || 0) + (b.cachedInputCost || 0),
38
+ totalCost: a.totalCost + b.totalCost,
39
+ currency: a.currency,
40
+ };
41
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "smoltalk",
3
- "version": "0.0.43",
3
+ "version": "0.0.44",
4
4
  "description": "A common interface for LLM APIs",
5
5
  "homepage": "https://github.com/egonSchiele/smoltalk",
6
6
  "scripts": {