smoltalk 0.0.16 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,6 +20,7 @@ export declare class AssistantMessage extends BaseMessage implements MessageClas
20
20
  rawData?: any;
21
21
  });
22
22
  get content(): string;
23
+ set content(value: string);
23
24
  get role(): "assistant";
24
25
  get name(): string | undefined;
25
26
  get audio(): any | null | undefined;
@@ -25,6 +25,9 @@ export class AssistantMessage extends BaseMessage {
25
25
  ? this._content
26
26
  : JSON.stringify(this._content);
27
27
  }
28
+ set content(value) {
29
+ this._content = value;
30
+ }
28
31
  get role() {
29
32
  return this._role;
30
33
  }
@@ -13,6 +13,7 @@ export declare class DeveloperMessage extends BaseMessage implements MessageClas
13
13
  rawData?: any;
14
14
  });
15
15
  get content(): string;
16
+ set content(value: string);
16
17
  get role(): "developer";
17
18
  get name(): string | undefined;
18
19
  get rawData(): any;
@@ -15,6 +15,9 @@ export class DeveloperMessage extends BaseMessage {
15
15
  ? this._content
16
16
  : JSON.stringify(this._content);
17
17
  }
18
+ set content(value) {
19
+ this._content = value;
20
+ }
18
21
  get role() {
19
22
  return this._role;
20
23
  }
@@ -13,6 +13,7 @@ export declare class SystemMessage extends BaseMessage implements MessageClass {
13
13
  rawData?: any;
14
14
  });
15
15
  get content(): string;
16
+ set content(value: string);
16
17
  get role(): "system";
17
18
  get name(): string | undefined;
18
19
  get rawData(): any;
@@ -15,6 +15,9 @@ export class SystemMessage extends BaseMessage {
15
15
  ? this._content
16
16
  : JSON.stringify(this._content);
17
17
  }
18
+ set content(value) {
19
+ this._content = value;
20
+ }
18
21
  get role() {
19
22
  return this._role;
20
23
  }
@@ -15,6 +15,7 @@ export declare class ToolMessage extends BaseMessage implements MessageClass {
15
15
  name: string;
16
16
  });
17
17
  get content(): string;
18
+ set content(value: string);
18
19
  get role(): "tool";
19
20
  get name(): string;
20
21
  get tool_call_id(): string;
@@ -17,6 +17,9 @@ export class ToolMessage extends BaseMessage {
17
17
  ? this._content
18
18
  : JSON.stringify(this._content);
19
19
  }
20
+ set content(value) {
21
+ this._content = value;
22
+ }
20
23
  get role() {
21
24
  return this._role;
22
25
  }
@@ -12,6 +12,7 @@ export declare class UserMessage extends BaseMessage implements MessageClass {
12
12
  rawData?: any;
13
13
  });
14
14
  get content(): string;
15
+ set content(value: string);
15
16
  get role(): "user";
16
17
  get name(): string | undefined;
17
18
  get rawData(): any;
@@ -13,6 +13,9 @@ export class UserMessage extends BaseMessage {
13
13
  get content() {
14
14
  return this._content;
15
15
  }
16
+ set content(value) {
17
+ this._content = value;
18
+ }
16
19
  get role() {
17
20
  return this._role;
18
21
  }
@@ -1,5 +1,5 @@
1
1
  import { GoogleGenAI } from "@google/genai";
2
- import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient } from "../types.js";
2
+ import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
3
3
  import { BaseClient } from "./baseClient.js";
4
4
  export type SmolGoogleConfig = BaseClientConfig;
5
5
  export declare class SmolGoogle extends BaseClient implements SmolClient {
@@ -9,5 +9,8 @@ export declare class SmolGoogle extends BaseClient implements SmolClient {
9
9
  constructor(config: SmolGoogleConfig);
10
10
  getClient(): GoogleGenAI;
11
11
  getModel(): string;
12
+ private calculateUsageAndCost;
13
+ private buildRequest;
12
14
  _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
15
+ _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
13
16
  }
@@ -4,6 +4,7 @@ import { getLogger } from "../logger.js";
4
4
  import { success, } from "../types.js";
5
5
  import { zodToGoogleTool } from "../util/tool.js";
6
6
  import { BaseClient } from "./baseClient.js";
7
+ import { calculateCost } from "../models.js";
7
8
  export class SmolGoogle extends BaseClient {
8
9
  client;
9
10
  logger;
@@ -23,7 +24,24 @@ export class SmolGoogle extends BaseClient {
23
24
  getModel() {
24
25
  return this.model;
25
26
  }
26
- async _textSync(config) {
27
+ calculateUsageAndCost(usageMetadata) {
28
+ let usage;
29
+ let cost;
30
+ if (usageMetadata) {
31
+ usage = {
32
+ inputTokens: usageMetadata.promptTokenCount || 0,
33
+ outputTokens: usageMetadata.candidatesTokenCount || 0,
34
+ cachedInputTokens: usageMetadata.cachedContentTokenCount,
35
+ totalTokens: usageMetadata.totalTokenCount,
36
+ };
37
+ const calculatedCost = calculateCost(this.model, usage);
38
+ if (calculatedCost) {
39
+ cost = calculatedCost;
40
+ }
41
+ }
42
+ return { usage, cost };
43
+ }
44
+ buildRequest(config) {
27
45
  const messages = config.messages.map((msg) => msg.toGoogleMessage());
28
46
  const tools = (config.tools || []).map((tool) => {
29
47
  return zodToGoogleTool(tool.name, tool.schema, {
@@ -38,13 +56,18 @@ export class SmolGoogle extends BaseClient {
38
56
  genConfig.responseMimeType = "application/json";
39
57
  genConfig.responseJsonSchema = config.responseFormat.toJSONSchema();
40
58
  }
41
- const request = {
59
+ return {
42
60
  contents: messages,
43
61
  model: this.model,
44
62
  config: genConfig,
45
- stream: config.stream || false,
46
63
  ...(config.rawAttributes || {}),
47
64
  };
65
+ }
66
+ async _textSync(config) {
67
+ const request = {
68
+ ...this.buildRequest(config),
69
+ stream: config.stream || false,
70
+ };
48
71
  this.logger.debug("Sending request to Google Gemini:", JSON.stringify(request, null, 2));
49
72
  // Send the prompt as the latest message
50
73
  const result = await this.client.models.generateContent(request);
@@ -61,7 +84,57 @@ export class SmolGoogle extends BaseClient {
61
84
  });
62
85
  }
63
86
  });
87
+ // Extract usage and calculate cost
88
+ const { usage, cost } = this.calculateUsageAndCost(result.usageMetadata);
64
89
  // Return the response, updating the chat history
65
- return success({ output, toolCalls });
90
+ return success({ output, toolCalls, usage, cost });
91
+ }
92
+ async *_textStream(config) {
93
+ const request = this.buildRequest(config);
94
+ this.logger.debug("Sending streaming request to Google Gemini:", JSON.stringify(request, null, 2));
95
+ const stream = await this.client.models.generateContentStream(request);
96
+ let content = "";
97
+ const toolCallsMap = new Map();
98
+ let usage;
99
+ let cost;
100
+ for await (const chunk of stream) {
101
+ // Extract usage metadata from chunks
102
+ if (chunk.usageMetadata) {
103
+ const usageAndCost = this.calculateUsageAndCost(chunk.usageMetadata);
104
+ usage = usageAndCost.usage;
105
+ cost = usageAndCost.cost;
106
+ }
107
+ // Handle text content
108
+ if (chunk.text) {
109
+ content += chunk.text;
110
+ yield { type: "text", text: chunk.text };
111
+ }
112
+ // Handle function calls
113
+ if (chunk.functionCalls) {
114
+ for (const functionCall of chunk.functionCalls) {
115
+ const id = functionCall.id || functionCall.name || "";
116
+ const name = functionCall.name || "";
117
+ if (!toolCallsMap.has(id)) {
118
+ toolCallsMap.set(id, {
119
+ id: id,
120
+ name: name,
121
+ arguments: functionCall.args,
122
+ });
123
+ }
124
+ }
125
+ }
126
+ }
127
+ this.logger.debug("Streaming response completed from Google Gemini");
128
+ // Yield tool calls
129
+ const toolCalls = [];
130
+ for (const tc of toolCallsMap.values()) {
131
+ const toolCall = new ToolCall(tc.id, tc.name, tc.arguments);
132
+ toolCalls.push(toolCall);
133
+ yield { type: "tool_call", toolCall };
134
+ }
135
+ yield {
136
+ type: "done",
137
+ result: { output: content || null, toolCalls, usage, cost },
138
+ };
66
139
  }
67
140
  }
@@ -1,5 +1,5 @@
1
1
  import { Ollama } from "ollama";
2
- import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient } from "../types.js";
2
+ import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
3
3
  import { BaseClient } from "./baseClient.js";
4
4
  export declare const DEFAULT_OLLAMA_HOST = "http://localhost:11434";
5
5
  export type SmolOllamaConfig = BaseClientConfig;
@@ -10,5 +10,7 @@ export declare class SmolOllama extends BaseClient implements SmolClient {
10
10
  constructor(config: SmolOllamaConfig);
11
11
  getClient(): Ollama;
12
12
  getModel(): string;
13
+ private calculateUsageAndCost;
13
14
  _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
15
+ _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
14
16
  }
@@ -4,6 +4,7 @@ import { getLogger } from "../logger.js";
4
4
  import { success, } from "../types.js";
5
5
  import { zodToGoogleTool } from "../util/tool.js";
6
6
  import { BaseClient } from "./baseClient.js";
7
+ import { calculateCost } from "../models.js";
7
8
  export const DEFAULT_OLLAMA_HOST = "http://localhost:11434";
8
9
  export class SmolOllama extends BaseClient {
9
10
  logger;
@@ -30,6 +31,24 @@ export class SmolOllama extends BaseClient {
30
31
  getModel() {
31
32
  return this.model;
32
33
  }
34
+ calculateUsageAndCost(responseData) {
35
+ let usage;
36
+ let cost;
37
+ if (responseData) {
38
+ const inputTokens = responseData.prompt_eval_count || 0;
39
+ const outputTokens = responseData.eval_count || 0;
40
+ usage = {
41
+ inputTokens,
42
+ outputTokens,
43
+ totalTokens: inputTokens + outputTokens,
44
+ };
45
+ const calculatedCost = calculateCost(this.model, usage);
46
+ if (calculatedCost) {
47
+ cost = calculatedCost;
48
+ }
49
+ }
50
+ return { usage, cost };
51
+ }
33
52
  async _textSync(config) {
34
53
  const messages = config.messages.map((msg) => msg.toOpenAIMessage());
35
54
  const tools = (config.tools || []).map((tool) => {
@@ -62,7 +81,90 @@ export class SmolOllama extends BaseClient {
62
81
  toolCalls.push(new ToolCall(tool_call.id, tool_call.function.name, tool_call.function.arguments || {}));
63
82
  }
64
83
  }
84
+ // Extract usage and calculate cost
85
+ const { usage, cost } = this.calculateUsageAndCost(result);
65
86
  // Return the response, updating the chat history
66
- return success({ output, toolCalls });
87
+ return success({ output, toolCalls, usage, cost });
88
+ }
89
+ async *_textStream(config) {
90
+ const messages = config.messages.map((msg) => msg.toOpenAIMessage());
91
+ const tools = (config.tools || []).map((tool) => {
92
+ return zodToGoogleTool(tool.name, tool.schema, {
93
+ description: tool.description,
94
+ });
95
+ });
96
+ const request = {
97
+ messages: messages,
98
+ model: this.model,
99
+ stream: true,
100
+ };
101
+ if (tools.length > 0) {
102
+ request.tools = tools.map((t) => ({ type: "function", function: t }));
103
+ }
104
+ if (config.responseFormat) {
105
+ request.format = config.responseFormat.toJSONSchema();
106
+ }
107
+ if (config.rawAttributes) {
108
+ Object.assign(request, config.rawAttributes);
109
+ }
110
+ this.logger.debug("Sending streaming request to Ollama:", JSON.stringify(request, null, 2));
111
+ // @ts-ignore
112
+ const stream = await this.client.chat(request);
113
+ let content = "";
114
+ const toolCallsMap = new Map();
115
+ let usage;
116
+ let cost;
117
+ let lastChunk;
118
+ for await (const chunk of stream) {
119
+ lastChunk = chunk;
120
+ // Handle text content
121
+ if (chunk.message?.content) {
122
+ content += chunk.message.content;
123
+ yield { type: "text", text: chunk.message.content };
124
+ }
125
+ // Handle tool calls
126
+ if (chunk.message?.tool_calls) {
127
+ for (const tc of chunk.message.tool_calls) {
128
+ const tool_call = tc;
129
+ const id = tool_call.id || tool_call.function.name || "";
130
+ const name = tool_call.function.name || "";
131
+ if (!toolCallsMap.has(id)) {
132
+ toolCallsMap.set(id, {
133
+ id: id,
134
+ name: name,
135
+ arguments: tool_call.function.arguments || {},
136
+ });
137
+ }
138
+ else {
139
+ // Merge arguments if tool call is split across chunks
140
+ const existing = toolCallsMap.get(id);
141
+ if (tool_call.function.arguments) {
142
+ existing.arguments = {
143
+ ...existing.arguments,
144
+ ...tool_call.function.arguments,
145
+ };
146
+ }
147
+ }
148
+ }
149
+ }
150
+ }
151
+ this.logger.debug("Streaming response completed from Ollama");
152
+ // Extract usage from the last chunk
153
+ if (lastChunk) {
154
+ const usageAndCost = this.calculateUsageAndCost(lastChunk);
155
+ usage = usageAndCost.usage;
156
+ cost = usageAndCost.cost;
157
+ }
158
+ // Yield tool calls
159
+ const toolCalls = [];
160
+ for (const tc of toolCallsMap.values()) {
161
+ const toolCall = new ToolCall(tc.id, tc.name, tc.arguments);
162
+ toolCalls.push(toolCall);
163
+ yield { type: "tool_call", toolCall };
164
+ }
165
+ yield {
166
+ type: "done",
167
+ result: { output: content || null, toolCalls, usage, cost },
168
+ };
67
169
  }
68
170
  }
@@ -9,6 +9,7 @@ export declare class SmolOpenAi extends BaseClient implements SmolClient {
9
9
  constructor(config: SmolOpenAiConfig);
10
10
  getClient(): OpenAI;
11
11
  getModel(): string;
12
+ private calculateUsageAndCost;
12
13
  private buildRequest;
13
14
  _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
14
15
  _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
@@ -5,6 +5,7 @@ import { isFunctionToolCall } from "../util.js";
5
5
  import { getLogger } from "../logger.js";
6
6
  import { BaseClient } from "./baseClient.js";
7
7
  import { zodToOpenAITool } from "../util/tool.js";
8
+ import { calculateCost } from "../models.js";
8
9
  export class SmolOpenAi extends BaseClient {
9
10
  client;
10
11
  logger;
@@ -24,6 +25,23 @@ export class SmolOpenAi extends BaseClient {
24
25
  getModel() {
25
26
  return this.model;
26
27
  }
28
+ calculateUsageAndCost(usageData) {
29
+ let usage;
30
+ let cost;
31
+ if (usageData) {
32
+ usage = {
33
+ inputTokens: usageData.prompt_tokens || 0,
34
+ outputTokens: usageData.completion_tokens || 0,
35
+ cachedInputTokens: usageData.prompt_tokens_details?.cached_tokens,
36
+ totalTokens: usageData.total_tokens,
37
+ };
38
+ const calculatedCost = calculateCost(this.model, usage);
39
+ if (calculatedCost) {
40
+ cost = calculatedCost;
41
+ }
42
+ }
43
+ return { usage, cost };
44
+ }
27
45
  buildRequest(config) {
28
46
  const messages = config.messages.map((msg) => msg.toOpenAIMessage());
29
47
  const request = {
@@ -69,7 +87,9 @@ export class SmolOpenAi extends BaseClient {
69
87
  }
70
88
  }
71
89
  }
72
- return success({ output, toolCalls });
90
+ // Extract usage and calculate cost
91
+ const { usage, cost } = this.calculateUsageAndCost(completion.usage);
92
+ return success({ output, toolCalls, usage, cost });
73
93
  }
74
94
  async *_textStream(config) {
75
95
  const request = this.buildRequest(config);
@@ -77,11 +97,20 @@ export class SmolOpenAi extends BaseClient {
77
97
  const completion = await this.client.chat.completions.create({
78
98
  ...request,
79
99
  stream: true,
100
+ stream_options: { include_usage: true },
80
101
  });
81
102
  let content = "";
82
103
  const toolCallsMap = new Map();
104
+ let usage;
105
+ let cost;
83
106
  for await (const chunk of completion) {
84
107
  const delta = chunk.choices[0]?.delta;
108
+ // Extract usage from the final chunk
109
+ if (chunk.usage) {
110
+ const usageAndCost = this.calculateUsageAndCost(chunk.usage);
111
+ usage = usageAndCost.usage;
112
+ cost = usageAndCost.cost;
113
+ }
85
114
  if (!delta)
86
115
  continue;
87
116
  if (delta.content) {
@@ -117,6 +146,9 @@ export class SmolOpenAi extends BaseClient {
117
146
  toolCalls.push(toolCall);
118
147
  yield { type: "tool_call", toolCall };
119
148
  }
120
- yield { type: "done", result: { output: content || null, toolCalls } };
149
+ yield {
150
+ type: "done",
151
+ result: { output: content || null, toolCalls, usage, cost },
152
+ };
121
153
  }
122
154
  }
package/dist/models.d.ts CHANGED
@@ -63,11 +63,11 @@ export declare const textModels: readonly [{
63
63
  }, {
64
64
  readonly type: "text";
65
65
  readonly modelName: "o3";
66
- readonly description: "o3 is a reasoning model that sets a new standard for math, science, and coding, visual reasoning tasks, and technical writing. Part of the o-series of reasoning models. The knowledge cutoff for o3 models is October, 2023.";
66
+ readonly description: "o3 is a reasoning model that sets a new standard for math, science, coding, visual reasoning tasks, and technical writing. Part of the o-series of reasoning models. The knowledge cutoff for o3 models is October, 2023.";
67
67
  readonly maxInputTokens: 200000;
68
68
  readonly maxOutputTokens: 100000;
69
69
  readonly inputTokenCost: 2;
70
- readonly cachedInputTokenCost: 1;
70
+ readonly cachedInputTokenCost: 0.5;
71
71
  readonly outputTokenCost: 8;
72
72
  readonly provider: "openai";
73
73
  }, {
@@ -83,7 +83,7 @@ export declare const textModels: readonly [{
83
83
  }, {
84
84
  readonly type: "text";
85
85
  readonly modelName: "o4-mini";
86
- readonly description: "o4-mini is a new o-series reasoning model that replaced o3-mini, providing excellent performance for math, science, and coding tasks. Available in ChatGPT Plus, Pro, and Team.";
86
+ readonly description: "Latest small o-series model optimized for fast, effective reasoning with exceptional performance in coding and visual tasks. Knowledge cutoff: June 2024.";
87
87
  readonly maxInputTokens: 200000;
88
88
  readonly maxOutputTokens: 100000;
89
89
  readonly inputTokenCost: 1.1;
@@ -143,25 +143,25 @@ export declare const textModels: readonly [{
143
143
  }, {
144
144
  readonly type: "text";
145
145
  readonly modelName: "gemini-3-pro-preview";
146
- readonly description: "Strongest Gemini 3 model quality with 2M context window. Standard pricing for ≤200k tokens, higher rates for >200k tokens.";
147
- readonly maxInputTokens: 2097152;
148
- readonly maxOutputTokens: 8192;
146
+ readonly description: "Strongest Gemini 3 model quality with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Nov 2025, currently in preview.";
147
+ readonly maxInputTokens: 1048576;
148
+ readonly maxOutputTokens: 65536;
149
149
  readonly inputTokenCost: 2;
150
150
  readonly outputTokenCost: 12;
151
151
  readonly provider: "google";
152
152
  }, {
153
153
  readonly type: "text";
154
154
  readonly modelName: "gemini-3-flash-preview";
155
- readonly description: "Latest Gemini 3 flash model with 1M context window. Excellent performance for high-volume tasks.";
155
+ readonly description: "Latest Gemini 3 flash model with 1M context window and 64K output. Outperforms 2.5 Pro while being 3x faster. Optimized for agentic workflows and coding. Includes context caching for 90% cost reductions.";
156
156
  readonly maxInputTokens: 1048576;
157
- readonly maxOutputTokens: 8192;
157
+ readonly maxOutputTokens: 65536;
158
158
  readonly inputTokenCost: 0.5;
159
159
  readonly outputTokenCost: 3;
160
160
  readonly provider: "google";
161
161
  }, {
162
162
  readonly type: "text";
163
163
  readonly modelName: "gemini-2.5-pro";
164
- readonly description: "High-performance Gemini 2.5 model with 2M context window. Standard pricing for ≤200k tokens, higher rates for >200k tokens.";
164
+ readonly description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/higher output). Batch API: 50% discount.";
165
165
  readonly maxInputTokens: 2097152;
166
166
  readonly maxOutputTokens: 8192;
167
167
  readonly inputTokenCost: 1.25;
@@ -170,7 +170,7 @@ export declare const textModels: readonly [{
170
170
  }, {
171
171
  readonly type: "text";
172
172
  readonly modelName: "gemini-2.5-flash";
173
- readonly description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. 1M context window with free tier available.";
173
+ readonly description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.";
174
174
  readonly maxInputTokens: 1048576;
175
175
  readonly maxOutputTokens: 8192;
176
176
  readonly inputTokenCost: 0.3;
@@ -188,11 +188,12 @@ export declare const textModels: readonly [{
188
188
  }, {
189
189
  readonly type: "text";
190
190
  readonly modelName: "gemini-2.0-flash";
191
- readonly description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window.";
191
+ readonly description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window. DEPRECATED: Will be shut down on March 31, 2026.";
192
192
  readonly maxInputTokens: 1048576;
193
193
  readonly maxOutputTokens: 8192;
194
194
  readonly inputTokenCost: 0.1;
195
195
  readonly outputTokenCost: 0.4;
196
+ readonly disabled: true;
196
197
  readonly provider: "google";
197
198
  }, {
198
199
  readonly type: "text";
@@ -316,6 +317,12 @@ export declare const imageModels: readonly [{
316
317
  readonly provider: "google";
317
318
  readonly description: "aka nano-banana";
318
319
  readonly costPerImage: 0.04;
320
+ }, {
321
+ readonly type: "image";
322
+ readonly modelName: "gemini-3-pro-image-preview";
323
+ readonly provider: "google";
324
+ readonly description: "High-fidelity image generation with reasoning-enhanced composition. Supports legible text rendering, complex multi-turn editing, and character consistency using up to 14 reference inputs.";
325
+ readonly costPerImage: 0.05;
319
326
  }];
320
327
  export declare const embeddingsModels: {
321
328
  type: string;
@@ -359,11 +366,11 @@ export declare function getModel(modelName: ModelName): {
359
366
  } | {
360
367
  readonly type: "text";
361
368
  readonly modelName: "o3";
362
- readonly description: "o3 is a reasoning model that sets a new standard for math, science, and coding, visual reasoning tasks, and technical writing. Part of the o-series of reasoning models. The knowledge cutoff for o3 models is October, 2023.";
369
+ readonly description: "o3 is a reasoning model that sets a new standard for math, science, coding, visual reasoning tasks, and technical writing. Part of the o-series of reasoning models. The knowledge cutoff for o3 models is October, 2023.";
363
370
  readonly maxInputTokens: 200000;
364
371
  readonly maxOutputTokens: 100000;
365
372
  readonly inputTokenCost: 2;
366
- readonly cachedInputTokenCost: 1;
373
+ readonly cachedInputTokenCost: 0.5;
367
374
  readonly outputTokenCost: 8;
368
375
  readonly provider: "openai";
369
376
  } | {
@@ -379,7 +386,7 @@ export declare function getModel(modelName: ModelName): {
379
386
  } | {
380
387
  readonly type: "text";
381
388
  readonly modelName: "o4-mini";
382
- readonly description: "o4-mini is a new o-series reasoning model that replaced o3-mini, providing excellent performance for math, science, and coding tasks. Available in ChatGPT Plus, Pro, and Team.";
389
+ readonly description: "Latest small o-series model optimized for fast, effective reasoning with exceptional performance in coding and visual tasks. Knowledge cutoff: June 2024.";
383
390
  readonly maxInputTokens: 200000;
384
391
  readonly maxOutputTokens: 100000;
385
392
  readonly inputTokenCost: 1.1;
@@ -439,25 +446,25 @@ export declare function getModel(modelName: ModelName): {
439
446
  } | {
440
447
  readonly type: "text";
441
448
  readonly modelName: "gemini-3-pro-preview";
442
- readonly description: "Strongest Gemini 3 model quality with 2M context window. Standard pricing for ≤200k tokens, higher rates for >200k tokens.";
443
- readonly maxInputTokens: 2097152;
444
- readonly maxOutputTokens: 8192;
449
+ readonly description: "Strongest Gemini 3 model quality with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Nov 2025, currently in preview.";
450
+ readonly maxInputTokens: 1048576;
451
+ readonly maxOutputTokens: 65536;
445
452
  readonly inputTokenCost: 2;
446
453
  readonly outputTokenCost: 12;
447
454
  readonly provider: "google";
448
455
  } | {
449
456
  readonly type: "text";
450
457
  readonly modelName: "gemini-3-flash-preview";
451
- readonly description: "Latest Gemini 3 flash model with 1M context window. Excellent performance for high-volume tasks.";
458
+ readonly description: "Latest Gemini 3 flash model with 1M context window and 64K output. Outperforms 2.5 Pro while being 3x faster. Optimized for agentic workflows and coding. Includes context caching for 90% cost reductions.";
452
459
  readonly maxInputTokens: 1048576;
453
- readonly maxOutputTokens: 8192;
460
+ readonly maxOutputTokens: 65536;
454
461
  readonly inputTokenCost: 0.5;
455
462
  readonly outputTokenCost: 3;
456
463
  readonly provider: "google";
457
464
  } | {
458
465
  readonly type: "text";
459
466
  readonly modelName: "gemini-2.5-pro";
460
- readonly description: "High-performance Gemini 2.5 model with 2M context window. Standard pricing for ≤200k tokens, higher rates for >200k tokens.";
467
+ readonly description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/higher output). Batch API: 50% discount.";
461
468
  readonly maxInputTokens: 2097152;
462
469
  readonly maxOutputTokens: 8192;
463
470
  readonly inputTokenCost: 1.25;
@@ -466,7 +473,7 @@ export declare function getModel(modelName: ModelName): {
466
473
  } | {
467
474
  readonly type: "text";
468
475
  readonly modelName: "gemini-2.5-flash";
469
- readonly description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. 1M context window with free tier available.";
476
+ readonly description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.";
470
477
  readonly maxInputTokens: 1048576;
471
478
  readonly maxOutputTokens: 8192;
472
479
  readonly inputTokenCost: 0.3;
@@ -484,11 +491,12 @@ export declare function getModel(modelName: ModelName): {
484
491
  } | {
485
492
  readonly type: "text";
486
493
  readonly modelName: "gemini-2.0-flash";
487
- readonly description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window.";
494
+ readonly description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window. DEPRECATED: Will be shut down on March 31, 2026.";
488
495
  readonly maxInputTokens: 1048576;
489
496
  readonly maxOutputTokens: 8192;
490
497
  readonly inputTokenCost: 0.1;
491
498
  readonly outputTokenCost: 0.4;
499
+ readonly disabled: true;
492
500
  readonly provider: "google";
493
501
  } | {
494
502
  readonly type: "text";
@@ -611,8 +619,25 @@ export declare function getModel(modelName: ModelName): {
611
619
  readonly provider: "google";
612
620
  readonly description: "aka nano-banana";
613
621
  readonly costPerImage: 0.04;
622
+ } | {
623
+ readonly type: "image";
624
+ readonly modelName: "gemini-3-pro-image-preview";
625
+ readonly provider: "google";
626
+ readonly description: "High-fidelity image generation with reasoning-enhanced composition. Supports legible text rendering, complex multi-turn editing, and character consistency using up to 14 reference inputs.";
627
+ readonly costPerImage: 0.05;
614
628
  } | undefined;
615
629
  export declare function isImageModel(model: Model): model is ImageModel;
616
630
  export declare function isTextModel(model: Model): model is TextModel;
617
631
  export declare function isSpeechToTextModel(model: Model): model is SpeechToTextModel;
618
632
  export declare function isEmbeddingsModel(model: Model): model is EmbeddingsModel;
633
+ export declare function calculateCost(modelName: ModelName, usage: {
634
+ inputTokens: number;
635
+ outputTokens: number;
636
+ cachedInputTokens?: number;
637
+ }): {
638
+ inputCost: number;
639
+ outputCost: number;
640
+ cachedInputCost?: number;
641
+ totalCost: number;
642
+ currency: string;
643
+ } | null;
package/dist/models.js CHANGED
@@ -1,3 +1,4 @@
1
+ import { round } from "./util.js";
1
2
  export const speechToTextModels = [
2
3
  { type: "speech-to-text", modelName: "whisper-local", provider: "local" },
3
4
  {
@@ -43,11 +44,11 @@ export const textModels = [
43
44
  {
44
45
  type: "text",
45
46
  modelName: "o3",
46
- description: "o3 is a reasoning model that sets a new standard for math, science, and coding, visual reasoning tasks, and technical writing. Part of the o-series of reasoning models. The knowledge cutoff for o3 models is October, 2023.",
47
+ description: "o3 is a reasoning model that sets a new standard for math, science, coding, visual reasoning tasks, and technical writing. Part of the o-series of reasoning models. The knowledge cutoff for o3 models is October, 2023.",
47
48
  maxInputTokens: 200000,
48
49
  maxOutputTokens: 100000,
49
50
  inputTokenCost: 2,
50
- cachedInputTokenCost: 1,
51
+ cachedInputTokenCost: 0.5,
51
52
  outputTokenCost: 8,
52
53
  provider: "openai",
53
54
  },
@@ -65,7 +66,7 @@ export const textModels = [
65
66
  {
66
67
  type: "text",
67
68
  modelName: "o4-mini",
68
- description: "o4-mini is a new o-series reasoning model that replaced o3-mini, providing excellent performance for math, science, and coding tasks. Available in ChatGPT Plus, Pro, and Team.",
69
+ description: "Latest small o-series model optimized for fast, effective reasoning with exceptional performance in coding and visual tasks. Knowledge cutoff: June 2024.",
69
70
  maxInputTokens: 200000,
70
71
  maxOutputTokens: 100000,
71
72
  inputTokenCost: 1.1,
@@ -131,9 +132,9 @@ export const textModels = [
131
132
  {
132
133
  type: "text",
133
134
  modelName: "gemini-3-pro-preview",
134
- description: "Strongest Gemini 3 model quality with 2M context window. Standard pricing for ≤200k tokens, higher rates for >200k tokens.",
135
- maxInputTokens: 2_097_152,
136
- maxOutputTokens: 8192,
135
+ description: "Strongest Gemini 3 model quality with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Nov 2025, currently in preview.",
136
+ maxInputTokens: 1_048_576,
137
+ maxOutputTokens: 65536,
137
138
  inputTokenCost: 2.0,
138
139
  outputTokenCost: 12.0,
139
140
  provider: "google",
@@ -141,9 +142,9 @@ export const textModels = [
141
142
  {
142
143
  type: "text",
143
144
  modelName: "gemini-3-flash-preview",
144
- description: "Latest Gemini 3 flash model with 1M context window. Excellent performance for high-volume tasks.",
145
+ description: "Latest Gemini 3 flash model with 1M context window and 64K output. Outperforms 2.5 Pro while being 3x faster. Optimized for agentic workflows and coding. Includes context caching for 90% cost reductions.",
145
146
  maxInputTokens: 1_048_576,
146
- maxOutputTokens: 8192,
147
+ maxOutputTokens: 65536,
147
148
  inputTokenCost: 0.5,
148
149
  outputTokenCost: 3.0,
149
150
  provider: "google",
@@ -151,7 +152,7 @@ export const textModels = [
151
152
  {
152
153
  type: "text",
153
154
  modelName: "gemini-2.5-pro",
154
- description: "High-performance Gemini 2.5 model with 2M context window. Standard pricing for ≤200k tokens, higher rates for >200k tokens.",
155
+ description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/higher output). Batch API: 50% discount.",
155
156
  maxInputTokens: 2_097_152,
156
157
  maxOutputTokens: 8192,
157
158
  inputTokenCost: 1.25,
@@ -161,7 +162,7 @@ export const textModels = [
161
162
  {
162
163
  type: "text",
163
164
  modelName: "gemini-2.5-flash",
164
- description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. 1M context window with free tier available.",
165
+ description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.",
165
166
  maxInputTokens: 1_048_576,
166
167
  maxOutputTokens: 8192,
167
168
  inputTokenCost: 0.3,
@@ -181,11 +182,12 @@ export const textModels = [
181
182
  {
182
183
  type: "text",
183
184
  modelName: "gemini-2.0-flash",
184
- description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window.",
185
+ description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window. DEPRECATED: Will be shut down on March 31, 2026.",
185
186
  maxInputTokens: 1_048_576,
186
187
  maxOutputTokens: 8192,
187
188
  inputTokenCost: 0.1,
188
189
  outputTokenCost: 0.4,
190
+ disabled: true,
189
191
  provider: "google",
190
192
  },
191
193
  {
@@ -359,6 +361,13 @@ export const imageModels = [
359
361
  description: "aka nano-banana",
360
362
  costPerImage: 0.04,
361
363
  },
364
+ {
365
+ type: "image",
366
+ modelName: "gemini-3-pro-image-preview",
367
+ provider: "google",
368
+ description: "High-fidelity image generation with reasoning-enhanced composition. Supports legible text rendering, complex multi-turn editing, and character consistency using up to 14 reference inputs.",
369
+ costPerImage: 0.05,
370
+ },
362
371
  ];
363
372
  export const embeddingsModels = [
364
373
  { type: "embeddings", modelName: "text-embedding-3-small", tokenCost: 0.02 },
@@ -379,3 +388,22 @@ export function isSpeechToTextModel(model) {
379
388
  export function isEmbeddingsModel(model) {
380
389
  return model.type === "embeddings";
381
390
  }
391
+ export function calculateCost(modelName, usage) {
392
+ const model = getModel(modelName);
393
+ if (!model || !isTextModel(model)) {
394
+ return null;
395
+ }
396
+ const inputCost = round((usage.inputTokens * (model.inputTokenCost || 0)) / 1_000_000, 2);
397
+ const outputCost = round((usage.outputTokens * (model.outputTokenCost || 0)) / 1_000_000, 2);
398
+ const cachedInputCost = usage.cachedInputTokens && model.cachedInputTokenCost
399
+ ? round((usage.cachedInputTokens * model.cachedInputTokenCost) / 1_000_000, 2)
400
+ : undefined;
401
+ const totalCost = round(inputCost + outputCost + (cachedInputCost || 0), 2);
402
+ return {
403
+ inputCost,
404
+ outputCost,
405
+ cachedInputCost,
406
+ totalCost,
407
+ currency: "USD",
408
+ };
409
+ }
package/dist/types.d.ts CHANGED
@@ -43,9 +43,24 @@ export type ToolLoopDetection = {
43
43
  excludeTools?: string[];
44
44
  };
45
45
  export type BaseClientConfig = SmolConfig & {};
46
+ export type TokenUsage = {
47
+ inputTokens: number;
48
+ outputTokens: number;
49
+ cachedInputTokens?: number;
50
+ totalTokens?: number;
51
+ };
52
+ export type CostEstimate = {
53
+ inputCost: number;
54
+ outputCost: number;
55
+ cachedInputCost?: number;
56
+ totalCost: number;
57
+ currency: string;
58
+ };
46
59
  export type PromptResult = {
47
60
  output: string | null;
48
61
  toolCalls: ToolCall[];
62
+ usage?: TokenUsage;
63
+ cost?: CostEstimate;
49
64
  };
50
65
  export type StreamChunk = {
51
66
  type: "text";
package/dist/util.d.ts CHANGED
@@ -1 +1,2 @@
1
1
  export * from "./util/openai.js";
2
+ export declare function round(num: number, places: number): number;
package/dist/util.js CHANGED
@@ -1 +1,5 @@
1
1
  export * from "./util/openai.js";
2
+ export function round(num, places) {
3
+ const factor = Math.pow(10, places);
4
+ return Math.round(num * factor) / factor;
5
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "smoltalk",
3
- "version": "0.0.16",
3
+ "version": "0.0.18",
4
4
  "description": "A common interface for LLM APIs",
5
5
  "homepage": "https://github.com/egonSchiele/smoltalk",
6
6
  "scripts": {