smoltalk 0.0.16 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/classes/message/AssistantMessage.d.ts +1 -0
- package/dist/classes/message/AssistantMessage.js +3 -0
- package/dist/classes/message/DeveloperMessage.d.ts +1 -0
- package/dist/classes/message/DeveloperMessage.js +3 -0
- package/dist/classes/message/SystemMessage.d.ts +1 -0
- package/dist/classes/message/SystemMessage.js +3 -0
- package/dist/classes/message/ToolMessage.d.ts +1 -0
- package/dist/classes/message/ToolMessage.js +3 -0
- package/dist/classes/message/UserMessage.d.ts +1 -0
- package/dist/classes/message/UserMessage.js +3 -0
- package/dist/clients/google.d.ts +4 -1
- package/dist/clients/google.js +77 -4
- package/dist/clients/ollama.d.ts +3 -1
- package/dist/clients/ollama.js +103 -1
- package/dist/clients/openai.d.ts +1 -0
- package/dist/clients/openai.js +34 -2
- package/dist/models.d.ts +47 -22
- package/dist/models.js +39 -11
- package/dist/types.d.ts +15 -0
- package/dist/util.d.ts +1 -0
- package/dist/util.js +4 -0
- package/package.json +1 -1
|
@@ -20,6 +20,7 @@ export declare class AssistantMessage extends BaseMessage implements MessageClas
|
|
|
20
20
|
rawData?: any;
|
|
21
21
|
});
|
|
22
22
|
get content(): string;
|
|
23
|
+
set content(value: string);
|
|
23
24
|
get role(): "assistant";
|
|
24
25
|
get name(): string | undefined;
|
|
25
26
|
get audio(): any | null | undefined;
|
package/dist/clients/google.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { GoogleGenAI } from "@google/genai";
|
|
2
|
-
import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient } from "../types.js";
|
|
2
|
+
import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
|
|
3
3
|
import { BaseClient } from "./baseClient.js";
|
|
4
4
|
export type SmolGoogleConfig = BaseClientConfig;
|
|
5
5
|
export declare class SmolGoogle extends BaseClient implements SmolClient {
|
|
@@ -9,5 +9,8 @@ export declare class SmolGoogle extends BaseClient implements SmolClient {
|
|
|
9
9
|
constructor(config: SmolGoogleConfig);
|
|
10
10
|
getClient(): GoogleGenAI;
|
|
11
11
|
getModel(): string;
|
|
12
|
+
private calculateUsageAndCost;
|
|
13
|
+
private buildRequest;
|
|
12
14
|
_textSync(config: PromptConfig): Promise<Result<PromptResult>>;
|
|
15
|
+
_textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
|
|
13
16
|
}
|
package/dist/clients/google.js
CHANGED
|
@@ -4,6 +4,7 @@ import { getLogger } from "../logger.js";
|
|
|
4
4
|
import { success, } from "../types.js";
|
|
5
5
|
import { zodToGoogleTool } from "../util/tool.js";
|
|
6
6
|
import { BaseClient } from "./baseClient.js";
|
|
7
|
+
import { calculateCost } from "../models.js";
|
|
7
8
|
export class SmolGoogle extends BaseClient {
|
|
8
9
|
client;
|
|
9
10
|
logger;
|
|
@@ -23,7 +24,24 @@ export class SmolGoogle extends BaseClient {
|
|
|
23
24
|
getModel() {
|
|
24
25
|
return this.model;
|
|
25
26
|
}
|
|
26
|
-
|
|
27
|
+
calculateUsageAndCost(usageMetadata) {
|
|
28
|
+
let usage;
|
|
29
|
+
let cost;
|
|
30
|
+
if (usageMetadata) {
|
|
31
|
+
usage = {
|
|
32
|
+
inputTokens: usageMetadata.promptTokenCount || 0,
|
|
33
|
+
outputTokens: usageMetadata.candidatesTokenCount || 0,
|
|
34
|
+
cachedInputTokens: usageMetadata.cachedContentTokenCount,
|
|
35
|
+
totalTokens: usageMetadata.totalTokenCount,
|
|
36
|
+
};
|
|
37
|
+
const calculatedCost = calculateCost(this.model, usage);
|
|
38
|
+
if (calculatedCost) {
|
|
39
|
+
cost = calculatedCost;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return { usage, cost };
|
|
43
|
+
}
|
|
44
|
+
buildRequest(config) {
|
|
27
45
|
const messages = config.messages.map((msg) => msg.toGoogleMessage());
|
|
28
46
|
const tools = (config.tools || []).map((tool) => {
|
|
29
47
|
return zodToGoogleTool(tool.name, tool.schema, {
|
|
@@ -38,13 +56,18 @@ export class SmolGoogle extends BaseClient {
|
|
|
38
56
|
genConfig.responseMimeType = "application/json";
|
|
39
57
|
genConfig.responseJsonSchema = config.responseFormat.toJSONSchema();
|
|
40
58
|
}
|
|
41
|
-
|
|
59
|
+
return {
|
|
42
60
|
contents: messages,
|
|
43
61
|
model: this.model,
|
|
44
62
|
config: genConfig,
|
|
45
|
-
stream: config.stream || false,
|
|
46
63
|
...(config.rawAttributes || {}),
|
|
47
64
|
};
|
|
65
|
+
}
|
|
66
|
+
async _textSync(config) {
|
|
67
|
+
const request = {
|
|
68
|
+
...this.buildRequest(config),
|
|
69
|
+
stream: config.stream || false,
|
|
70
|
+
};
|
|
48
71
|
this.logger.debug("Sending request to Google Gemini:", JSON.stringify(request, null, 2));
|
|
49
72
|
// Send the prompt as the latest message
|
|
50
73
|
const result = await this.client.models.generateContent(request);
|
|
@@ -61,7 +84,57 @@ export class SmolGoogle extends BaseClient {
|
|
|
61
84
|
});
|
|
62
85
|
}
|
|
63
86
|
});
|
|
87
|
+
// Extract usage and calculate cost
|
|
88
|
+
const { usage, cost } = this.calculateUsageAndCost(result.usageMetadata);
|
|
64
89
|
// Return the response, updating the chat history
|
|
65
|
-
return success({ output, toolCalls });
|
|
90
|
+
return success({ output, toolCalls, usage, cost });
|
|
91
|
+
}
|
|
92
|
+
async *_textStream(config) {
|
|
93
|
+
const request = this.buildRequest(config);
|
|
94
|
+
this.logger.debug("Sending streaming request to Google Gemini:", JSON.stringify(request, null, 2));
|
|
95
|
+
const stream = await this.client.models.generateContentStream(request);
|
|
96
|
+
let content = "";
|
|
97
|
+
const toolCallsMap = new Map();
|
|
98
|
+
let usage;
|
|
99
|
+
let cost;
|
|
100
|
+
for await (const chunk of stream) {
|
|
101
|
+
// Extract usage metadata from chunks
|
|
102
|
+
if (chunk.usageMetadata) {
|
|
103
|
+
const usageAndCost = this.calculateUsageAndCost(chunk.usageMetadata);
|
|
104
|
+
usage = usageAndCost.usage;
|
|
105
|
+
cost = usageAndCost.cost;
|
|
106
|
+
}
|
|
107
|
+
// Handle text content
|
|
108
|
+
if (chunk.text) {
|
|
109
|
+
content += chunk.text;
|
|
110
|
+
yield { type: "text", text: chunk.text };
|
|
111
|
+
}
|
|
112
|
+
// Handle function calls
|
|
113
|
+
if (chunk.functionCalls) {
|
|
114
|
+
for (const functionCall of chunk.functionCalls) {
|
|
115
|
+
const id = functionCall.id || functionCall.name || "";
|
|
116
|
+
const name = functionCall.name || "";
|
|
117
|
+
if (!toolCallsMap.has(id)) {
|
|
118
|
+
toolCallsMap.set(id, {
|
|
119
|
+
id: id,
|
|
120
|
+
name: name,
|
|
121
|
+
arguments: functionCall.args,
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
this.logger.debug("Streaming response completed from Google Gemini");
|
|
128
|
+
// Yield tool calls
|
|
129
|
+
const toolCalls = [];
|
|
130
|
+
for (const tc of toolCallsMap.values()) {
|
|
131
|
+
const toolCall = new ToolCall(tc.id, tc.name, tc.arguments);
|
|
132
|
+
toolCalls.push(toolCall);
|
|
133
|
+
yield { type: "tool_call", toolCall };
|
|
134
|
+
}
|
|
135
|
+
yield {
|
|
136
|
+
type: "done",
|
|
137
|
+
result: { output: content || null, toolCalls, usage, cost },
|
|
138
|
+
};
|
|
66
139
|
}
|
|
67
140
|
}
|
package/dist/clients/ollama.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Ollama } from "ollama";
|
|
2
|
-
import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient } from "../types.js";
|
|
2
|
+
import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
|
|
3
3
|
import { BaseClient } from "./baseClient.js";
|
|
4
4
|
export declare const DEFAULT_OLLAMA_HOST = "http://localhost:11434";
|
|
5
5
|
export type SmolOllamaConfig = BaseClientConfig;
|
|
@@ -10,5 +10,7 @@ export declare class SmolOllama extends BaseClient implements SmolClient {
|
|
|
10
10
|
constructor(config: SmolOllamaConfig);
|
|
11
11
|
getClient(): Ollama;
|
|
12
12
|
getModel(): string;
|
|
13
|
+
private calculateUsageAndCost;
|
|
13
14
|
_textSync(config: PromptConfig): Promise<Result<PromptResult>>;
|
|
15
|
+
_textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
|
|
14
16
|
}
|
package/dist/clients/ollama.js
CHANGED
|
@@ -4,6 +4,7 @@ import { getLogger } from "../logger.js";
|
|
|
4
4
|
import { success, } from "../types.js";
|
|
5
5
|
import { zodToGoogleTool } from "../util/tool.js";
|
|
6
6
|
import { BaseClient } from "./baseClient.js";
|
|
7
|
+
import { calculateCost } from "../models.js";
|
|
7
8
|
export const DEFAULT_OLLAMA_HOST = "http://localhost:11434";
|
|
8
9
|
export class SmolOllama extends BaseClient {
|
|
9
10
|
logger;
|
|
@@ -30,6 +31,24 @@ export class SmolOllama extends BaseClient {
|
|
|
30
31
|
getModel() {
|
|
31
32
|
return this.model;
|
|
32
33
|
}
|
|
34
|
+
calculateUsageAndCost(responseData) {
|
|
35
|
+
let usage;
|
|
36
|
+
let cost;
|
|
37
|
+
if (responseData) {
|
|
38
|
+
const inputTokens = responseData.prompt_eval_count || 0;
|
|
39
|
+
const outputTokens = responseData.eval_count || 0;
|
|
40
|
+
usage = {
|
|
41
|
+
inputTokens,
|
|
42
|
+
outputTokens,
|
|
43
|
+
totalTokens: inputTokens + outputTokens,
|
|
44
|
+
};
|
|
45
|
+
const calculatedCost = calculateCost(this.model, usage);
|
|
46
|
+
if (calculatedCost) {
|
|
47
|
+
cost = calculatedCost;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return { usage, cost };
|
|
51
|
+
}
|
|
33
52
|
async _textSync(config) {
|
|
34
53
|
const messages = config.messages.map((msg) => msg.toOpenAIMessage());
|
|
35
54
|
const tools = (config.tools || []).map((tool) => {
|
|
@@ -62,7 +81,90 @@ export class SmolOllama extends BaseClient {
|
|
|
62
81
|
toolCalls.push(new ToolCall(tool_call.id, tool_call.function.name, tool_call.function.arguments || {}));
|
|
63
82
|
}
|
|
64
83
|
}
|
|
84
|
+
// Extract usage and calculate cost
|
|
85
|
+
const { usage, cost } = this.calculateUsageAndCost(result);
|
|
65
86
|
// Return the response, updating the chat history
|
|
66
|
-
return success({ output, toolCalls });
|
|
87
|
+
return success({ output, toolCalls, usage, cost });
|
|
88
|
+
}
|
|
89
|
+
async *_textStream(config) {
|
|
90
|
+
const messages = config.messages.map((msg) => msg.toOpenAIMessage());
|
|
91
|
+
const tools = (config.tools || []).map((tool) => {
|
|
92
|
+
return zodToGoogleTool(tool.name, tool.schema, {
|
|
93
|
+
description: tool.description,
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
const request = {
|
|
97
|
+
messages: messages,
|
|
98
|
+
model: this.model,
|
|
99
|
+
stream: true,
|
|
100
|
+
};
|
|
101
|
+
if (tools.length > 0) {
|
|
102
|
+
request.tools = tools.map((t) => ({ type: "function", function: t }));
|
|
103
|
+
}
|
|
104
|
+
if (config.responseFormat) {
|
|
105
|
+
request.format = config.responseFormat.toJSONSchema();
|
|
106
|
+
}
|
|
107
|
+
if (config.rawAttributes) {
|
|
108
|
+
Object.assign(request, config.rawAttributes);
|
|
109
|
+
}
|
|
110
|
+
this.logger.debug("Sending streaming request to Ollama:", JSON.stringify(request, null, 2));
|
|
111
|
+
// @ts-ignore
|
|
112
|
+
const stream = await this.client.chat(request);
|
|
113
|
+
let content = "";
|
|
114
|
+
const toolCallsMap = new Map();
|
|
115
|
+
let usage;
|
|
116
|
+
let cost;
|
|
117
|
+
let lastChunk;
|
|
118
|
+
for await (const chunk of stream) {
|
|
119
|
+
lastChunk = chunk;
|
|
120
|
+
// Handle text content
|
|
121
|
+
if (chunk.message?.content) {
|
|
122
|
+
content += chunk.message.content;
|
|
123
|
+
yield { type: "text", text: chunk.message.content };
|
|
124
|
+
}
|
|
125
|
+
// Handle tool calls
|
|
126
|
+
if (chunk.message?.tool_calls) {
|
|
127
|
+
for (const tc of chunk.message.tool_calls) {
|
|
128
|
+
const tool_call = tc;
|
|
129
|
+
const id = tool_call.id || tool_call.function.name || "";
|
|
130
|
+
const name = tool_call.function.name || "";
|
|
131
|
+
if (!toolCallsMap.has(id)) {
|
|
132
|
+
toolCallsMap.set(id, {
|
|
133
|
+
id: id,
|
|
134
|
+
name: name,
|
|
135
|
+
arguments: tool_call.function.arguments || {},
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
else {
|
|
139
|
+
// Merge arguments if tool call is split across chunks
|
|
140
|
+
const existing = toolCallsMap.get(id);
|
|
141
|
+
if (tool_call.function.arguments) {
|
|
142
|
+
existing.arguments = {
|
|
143
|
+
...existing.arguments,
|
|
144
|
+
...tool_call.function.arguments,
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
this.logger.debug("Streaming response completed from Ollama");
|
|
152
|
+
// Extract usage from the last chunk
|
|
153
|
+
if (lastChunk) {
|
|
154
|
+
const usageAndCost = this.calculateUsageAndCost(lastChunk);
|
|
155
|
+
usage = usageAndCost.usage;
|
|
156
|
+
cost = usageAndCost.cost;
|
|
157
|
+
}
|
|
158
|
+
// Yield tool calls
|
|
159
|
+
const toolCalls = [];
|
|
160
|
+
for (const tc of toolCallsMap.values()) {
|
|
161
|
+
const toolCall = new ToolCall(tc.id, tc.name, tc.arguments);
|
|
162
|
+
toolCalls.push(toolCall);
|
|
163
|
+
yield { type: "tool_call", toolCall };
|
|
164
|
+
}
|
|
165
|
+
yield {
|
|
166
|
+
type: "done",
|
|
167
|
+
result: { output: content || null, toolCalls, usage, cost },
|
|
168
|
+
};
|
|
67
169
|
}
|
|
68
170
|
}
|
package/dist/clients/openai.d.ts
CHANGED
|
@@ -9,6 +9,7 @@ export declare class SmolOpenAi extends BaseClient implements SmolClient {
|
|
|
9
9
|
constructor(config: SmolOpenAiConfig);
|
|
10
10
|
getClient(): OpenAI;
|
|
11
11
|
getModel(): string;
|
|
12
|
+
private calculateUsageAndCost;
|
|
12
13
|
private buildRequest;
|
|
13
14
|
_textSync(config: PromptConfig): Promise<Result<PromptResult>>;
|
|
14
15
|
_textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
|
package/dist/clients/openai.js
CHANGED
|
@@ -5,6 +5,7 @@ import { isFunctionToolCall } from "../util.js";
|
|
|
5
5
|
import { getLogger } from "../logger.js";
|
|
6
6
|
import { BaseClient } from "./baseClient.js";
|
|
7
7
|
import { zodToOpenAITool } from "../util/tool.js";
|
|
8
|
+
import { calculateCost } from "../models.js";
|
|
8
9
|
export class SmolOpenAi extends BaseClient {
|
|
9
10
|
client;
|
|
10
11
|
logger;
|
|
@@ -24,6 +25,23 @@ export class SmolOpenAi extends BaseClient {
|
|
|
24
25
|
getModel() {
|
|
25
26
|
return this.model;
|
|
26
27
|
}
|
|
28
|
+
calculateUsageAndCost(usageData) {
|
|
29
|
+
let usage;
|
|
30
|
+
let cost;
|
|
31
|
+
if (usageData) {
|
|
32
|
+
usage = {
|
|
33
|
+
inputTokens: usageData.prompt_tokens || 0,
|
|
34
|
+
outputTokens: usageData.completion_tokens || 0,
|
|
35
|
+
cachedInputTokens: usageData.prompt_tokens_details?.cached_tokens,
|
|
36
|
+
totalTokens: usageData.total_tokens,
|
|
37
|
+
};
|
|
38
|
+
const calculatedCost = calculateCost(this.model, usage);
|
|
39
|
+
if (calculatedCost) {
|
|
40
|
+
cost = calculatedCost;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return { usage, cost };
|
|
44
|
+
}
|
|
27
45
|
buildRequest(config) {
|
|
28
46
|
const messages = config.messages.map((msg) => msg.toOpenAIMessage());
|
|
29
47
|
const request = {
|
|
@@ -69,7 +87,9 @@ export class SmolOpenAi extends BaseClient {
|
|
|
69
87
|
}
|
|
70
88
|
}
|
|
71
89
|
}
|
|
72
|
-
|
|
90
|
+
// Extract usage and calculate cost
|
|
91
|
+
const { usage, cost } = this.calculateUsageAndCost(completion.usage);
|
|
92
|
+
return success({ output, toolCalls, usage, cost });
|
|
73
93
|
}
|
|
74
94
|
async *_textStream(config) {
|
|
75
95
|
const request = this.buildRequest(config);
|
|
@@ -77,11 +97,20 @@ export class SmolOpenAi extends BaseClient {
|
|
|
77
97
|
const completion = await this.client.chat.completions.create({
|
|
78
98
|
...request,
|
|
79
99
|
stream: true,
|
|
100
|
+
stream_options: { include_usage: true },
|
|
80
101
|
});
|
|
81
102
|
let content = "";
|
|
82
103
|
const toolCallsMap = new Map();
|
|
104
|
+
let usage;
|
|
105
|
+
let cost;
|
|
83
106
|
for await (const chunk of completion) {
|
|
84
107
|
const delta = chunk.choices[0]?.delta;
|
|
108
|
+
// Extract usage from the final chunk
|
|
109
|
+
if (chunk.usage) {
|
|
110
|
+
const usageAndCost = this.calculateUsageAndCost(chunk.usage);
|
|
111
|
+
usage = usageAndCost.usage;
|
|
112
|
+
cost = usageAndCost.cost;
|
|
113
|
+
}
|
|
85
114
|
if (!delta)
|
|
86
115
|
continue;
|
|
87
116
|
if (delta.content) {
|
|
@@ -117,6 +146,9 @@ export class SmolOpenAi extends BaseClient {
|
|
|
117
146
|
toolCalls.push(toolCall);
|
|
118
147
|
yield { type: "tool_call", toolCall };
|
|
119
148
|
}
|
|
120
|
-
yield {
|
|
149
|
+
yield {
|
|
150
|
+
type: "done",
|
|
151
|
+
result: { output: content || null, toolCalls, usage, cost },
|
|
152
|
+
};
|
|
121
153
|
}
|
|
122
154
|
}
|
package/dist/models.d.ts
CHANGED
|
@@ -63,11 +63,11 @@ export declare const textModels: readonly [{
|
|
|
63
63
|
}, {
|
|
64
64
|
readonly type: "text";
|
|
65
65
|
readonly modelName: "o3";
|
|
66
|
-
readonly description: "o3 is a reasoning model that sets a new standard for math, science,
|
|
66
|
+
readonly description: "o3 is a reasoning model that sets a new standard for math, science, coding, visual reasoning tasks, and technical writing. Part of the o-series of reasoning models. The knowledge cutoff for o3 models is October, 2023.";
|
|
67
67
|
readonly maxInputTokens: 200000;
|
|
68
68
|
readonly maxOutputTokens: 100000;
|
|
69
69
|
readonly inputTokenCost: 2;
|
|
70
|
-
readonly cachedInputTokenCost:
|
|
70
|
+
readonly cachedInputTokenCost: 0.5;
|
|
71
71
|
readonly outputTokenCost: 8;
|
|
72
72
|
readonly provider: "openai";
|
|
73
73
|
}, {
|
|
@@ -83,7 +83,7 @@ export declare const textModels: readonly [{
|
|
|
83
83
|
}, {
|
|
84
84
|
readonly type: "text";
|
|
85
85
|
readonly modelName: "o4-mini";
|
|
86
|
-
readonly description: "
|
|
86
|
+
readonly description: "Latest small o-series model optimized for fast, effective reasoning with exceptional performance in coding and visual tasks. Knowledge cutoff: June 2024.";
|
|
87
87
|
readonly maxInputTokens: 200000;
|
|
88
88
|
readonly maxOutputTokens: 100000;
|
|
89
89
|
readonly inputTokenCost: 1.1;
|
|
@@ -143,25 +143,25 @@ export declare const textModels: readonly [{
|
|
|
143
143
|
}, {
|
|
144
144
|
readonly type: "text";
|
|
145
145
|
readonly modelName: "gemini-3-pro-preview";
|
|
146
|
-
readonly description: "Strongest Gemini 3 model quality with
|
|
147
|
-
readonly maxInputTokens:
|
|
148
|
-
readonly maxOutputTokens:
|
|
146
|
+
readonly description: "Strongest Gemini 3 model quality with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Nov 2025, currently in preview.";
|
|
147
|
+
readonly maxInputTokens: 1048576;
|
|
148
|
+
readonly maxOutputTokens: 65536;
|
|
149
149
|
readonly inputTokenCost: 2;
|
|
150
150
|
readonly outputTokenCost: 12;
|
|
151
151
|
readonly provider: "google";
|
|
152
152
|
}, {
|
|
153
153
|
readonly type: "text";
|
|
154
154
|
readonly modelName: "gemini-3-flash-preview";
|
|
155
|
-
readonly description: "Latest Gemini 3 flash model with 1M context window.
|
|
155
|
+
readonly description: "Latest Gemini 3 flash model with 1M context window and 64K output. Outperforms 2.5 Pro while being 3x faster. Optimized for agentic workflows and coding. Includes context caching for 90% cost reductions.";
|
|
156
156
|
readonly maxInputTokens: 1048576;
|
|
157
|
-
readonly maxOutputTokens:
|
|
157
|
+
readonly maxOutputTokens: 65536;
|
|
158
158
|
readonly inputTokenCost: 0.5;
|
|
159
159
|
readonly outputTokenCost: 3;
|
|
160
160
|
readonly provider: "google";
|
|
161
161
|
}, {
|
|
162
162
|
readonly type: "text";
|
|
163
163
|
readonly modelName: "gemini-2.5-pro";
|
|
164
|
-
readonly description: "High-performance Gemini 2.5 model with 2M context window. Standard pricing for ≤200k tokens, higher rates for >200k tokens.";
|
|
164
|
+
readonly description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/higher output). Batch API: 50% discount.";
|
|
165
165
|
readonly maxInputTokens: 2097152;
|
|
166
166
|
readonly maxOutputTokens: 8192;
|
|
167
167
|
readonly inputTokenCost: 1.25;
|
|
@@ -170,7 +170,7 @@ export declare const textModels: readonly [{
|
|
|
170
170
|
}, {
|
|
171
171
|
readonly type: "text";
|
|
172
172
|
readonly modelName: "gemini-2.5-flash";
|
|
173
|
-
readonly description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. 1M context window
|
|
173
|
+
readonly description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.";
|
|
174
174
|
readonly maxInputTokens: 1048576;
|
|
175
175
|
readonly maxOutputTokens: 8192;
|
|
176
176
|
readonly inputTokenCost: 0.3;
|
|
@@ -188,11 +188,12 @@ export declare const textModels: readonly [{
|
|
|
188
188
|
}, {
|
|
189
189
|
readonly type: "text";
|
|
190
190
|
readonly modelName: "gemini-2.0-flash";
|
|
191
|
-
readonly description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window.";
|
|
191
|
+
readonly description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window. DEPRECATED: Will be shut down on March 31, 2026.";
|
|
192
192
|
readonly maxInputTokens: 1048576;
|
|
193
193
|
readonly maxOutputTokens: 8192;
|
|
194
194
|
readonly inputTokenCost: 0.1;
|
|
195
195
|
readonly outputTokenCost: 0.4;
|
|
196
|
+
readonly disabled: true;
|
|
196
197
|
readonly provider: "google";
|
|
197
198
|
}, {
|
|
198
199
|
readonly type: "text";
|
|
@@ -316,6 +317,12 @@ export declare const imageModels: readonly [{
|
|
|
316
317
|
readonly provider: "google";
|
|
317
318
|
readonly description: "aka nano-banana";
|
|
318
319
|
readonly costPerImage: 0.04;
|
|
320
|
+
}, {
|
|
321
|
+
readonly type: "image";
|
|
322
|
+
readonly modelName: "gemini-3-pro-image-preview";
|
|
323
|
+
readonly provider: "google";
|
|
324
|
+
readonly description: "High-fidelity image generation with reasoning-enhanced composition. Supports legible text rendering, complex multi-turn editing, and character consistency using up to 14 reference inputs.";
|
|
325
|
+
readonly costPerImage: 0.05;
|
|
319
326
|
}];
|
|
320
327
|
export declare const embeddingsModels: {
|
|
321
328
|
type: string;
|
|
@@ -359,11 +366,11 @@ export declare function getModel(modelName: ModelName): {
|
|
|
359
366
|
} | {
|
|
360
367
|
readonly type: "text";
|
|
361
368
|
readonly modelName: "o3";
|
|
362
|
-
readonly description: "o3 is a reasoning model that sets a new standard for math, science,
|
|
369
|
+
readonly description: "o3 is a reasoning model that sets a new standard for math, science, coding, visual reasoning tasks, and technical writing. Part of the o-series of reasoning models. The knowledge cutoff for o3 models is October, 2023.";
|
|
363
370
|
readonly maxInputTokens: 200000;
|
|
364
371
|
readonly maxOutputTokens: 100000;
|
|
365
372
|
readonly inputTokenCost: 2;
|
|
366
|
-
readonly cachedInputTokenCost:
|
|
373
|
+
readonly cachedInputTokenCost: 0.5;
|
|
367
374
|
readonly outputTokenCost: 8;
|
|
368
375
|
readonly provider: "openai";
|
|
369
376
|
} | {
|
|
@@ -379,7 +386,7 @@ export declare function getModel(modelName: ModelName): {
|
|
|
379
386
|
} | {
|
|
380
387
|
readonly type: "text";
|
|
381
388
|
readonly modelName: "o4-mini";
|
|
382
|
-
readonly description: "
|
|
389
|
+
readonly description: "Latest small o-series model optimized for fast, effective reasoning with exceptional performance in coding and visual tasks. Knowledge cutoff: June 2024.";
|
|
383
390
|
readonly maxInputTokens: 200000;
|
|
384
391
|
readonly maxOutputTokens: 100000;
|
|
385
392
|
readonly inputTokenCost: 1.1;
|
|
@@ -439,25 +446,25 @@ export declare function getModel(modelName: ModelName): {
|
|
|
439
446
|
} | {
|
|
440
447
|
readonly type: "text";
|
|
441
448
|
readonly modelName: "gemini-3-pro-preview";
|
|
442
|
-
readonly description: "Strongest Gemini 3 model quality with
|
|
443
|
-
readonly maxInputTokens:
|
|
444
|
-
readonly maxOutputTokens:
|
|
449
|
+
readonly description: "Strongest Gemini 3 model quality with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Nov 2025, currently in preview.";
|
|
450
|
+
readonly maxInputTokens: 1048576;
|
|
451
|
+
readonly maxOutputTokens: 65536;
|
|
445
452
|
readonly inputTokenCost: 2;
|
|
446
453
|
readonly outputTokenCost: 12;
|
|
447
454
|
readonly provider: "google";
|
|
448
455
|
} | {
|
|
449
456
|
readonly type: "text";
|
|
450
457
|
readonly modelName: "gemini-3-flash-preview";
|
|
451
|
-
readonly description: "Latest Gemini 3 flash model with 1M context window.
|
|
458
|
+
readonly description: "Latest Gemini 3 flash model with 1M context window and 64K output. Outperforms 2.5 Pro while being 3x faster. Optimized for agentic workflows and coding. Includes context caching for 90% cost reductions.";
|
|
452
459
|
readonly maxInputTokens: 1048576;
|
|
453
|
-
readonly maxOutputTokens:
|
|
460
|
+
readonly maxOutputTokens: 65536;
|
|
454
461
|
readonly inputTokenCost: 0.5;
|
|
455
462
|
readonly outputTokenCost: 3;
|
|
456
463
|
readonly provider: "google";
|
|
457
464
|
} | {
|
|
458
465
|
readonly type: "text";
|
|
459
466
|
readonly modelName: "gemini-2.5-pro";
|
|
460
|
-
readonly description: "High-performance Gemini 2.5 model with 2M context window. Standard pricing for ≤200k tokens, higher rates for >200k tokens.";
|
|
467
|
+
readonly description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/higher output). Batch API: 50% discount.";
|
|
461
468
|
readonly maxInputTokens: 2097152;
|
|
462
469
|
readonly maxOutputTokens: 8192;
|
|
463
470
|
readonly inputTokenCost: 1.25;
|
|
@@ -466,7 +473,7 @@ export declare function getModel(modelName: ModelName): {
|
|
|
466
473
|
} | {
|
|
467
474
|
readonly type: "text";
|
|
468
475
|
readonly modelName: "gemini-2.5-flash";
|
|
469
|
-
readonly description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. 1M context window
|
|
476
|
+
readonly description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.";
|
|
470
477
|
readonly maxInputTokens: 1048576;
|
|
471
478
|
readonly maxOutputTokens: 8192;
|
|
472
479
|
readonly inputTokenCost: 0.3;
|
|
@@ -484,11 +491,12 @@ export declare function getModel(modelName: ModelName): {
|
|
|
484
491
|
} | {
|
|
485
492
|
readonly type: "text";
|
|
486
493
|
readonly modelName: "gemini-2.0-flash";
|
|
487
|
-
readonly description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window.";
|
|
494
|
+
readonly description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window. DEPRECATED: Will be shut down on March 31, 2026.";
|
|
488
495
|
readonly maxInputTokens: 1048576;
|
|
489
496
|
readonly maxOutputTokens: 8192;
|
|
490
497
|
readonly inputTokenCost: 0.1;
|
|
491
498
|
readonly outputTokenCost: 0.4;
|
|
499
|
+
readonly disabled: true;
|
|
492
500
|
readonly provider: "google";
|
|
493
501
|
} | {
|
|
494
502
|
readonly type: "text";
|
|
@@ -611,8 +619,25 @@ export declare function getModel(modelName: ModelName): {
|
|
|
611
619
|
readonly provider: "google";
|
|
612
620
|
readonly description: "aka nano-banana";
|
|
613
621
|
readonly costPerImage: 0.04;
|
|
622
|
+
} | {
|
|
623
|
+
readonly type: "image";
|
|
624
|
+
readonly modelName: "gemini-3-pro-image-preview";
|
|
625
|
+
readonly provider: "google";
|
|
626
|
+
readonly description: "High-fidelity image generation with reasoning-enhanced composition. Supports legible text rendering, complex multi-turn editing, and character consistency using up to 14 reference inputs.";
|
|
627
|
+
readonly costPerImage: 0.05;
|
|
614
628
|
} | undefined;
|
|
615
629
|
export declare function isImageModel(model: Model): model is ImageModel;
|
|
616
630
|
export declare function isTextModel(model: Model): model is TextModel;
|
|
617
631
|
export declare function isSpeechToTextModel(model: Model): model is SpeechToTextModel;
|
|
618
632
|
export declare function isEmbeddingsModel(model: Model): model is EmbeddingsModel;
|
|
633
|
+
export declare function calculateCost(modelName: ModelName, usage: {
|
|
634
|
+
inputTokens: number;
|
|
635
|
+
outputTokens: number;
|
|
636
|
+
cachedInputTokens?: number;
|
|
637
|
+
}): {
|
|
638
|
+
inputCost: number;
|
|
639
|
+
outputCost: number;
|
|
640
|
+
cachedInputCost?: number;
|
|
641
|
+
totalCost: number;
|
|
642
|
+
currency: string;
|
|
643
|
+
} | null;
|
package/dist/models.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { round } from "./util.js";
|
|
1
2
|
export const speechToTextModels = [
|
|
2
3
|
{ type: "speech-to-text", modelName: "whisper-local", provider: "local" },
|
|
3
4
|
{
|
|
@@ -43,11 +44,11 @@ export const textModels = [
|
|
|
43
44
|
{
|
|
44
45
|
type: "text",
|
|
45
46
|
modelName: "o3",
|
|
46
|
-
description: "o3 is a reasoning model that sets a new standard for math, science,
|
|
47
|
+
description: "o3 is a reasoning model that sets a new standard for math, science, coding, visual reasoning tasks, and technical writing. Part of the o-series of reasoning models. The knowledge cutoff for o3 models is October, 2023.",
|
|
47
48
|
maxInputTokens: 200000,
|
|
48
49
|
maxOutputTokens: 100000,
|
|
49
50
|
inputTokenCost: 2,
|
|
50
|
-
cachedInputTokenCost:
|
|
51
|
+
cachedInputTokenCost: 0.5,
|
|
51
52
|
outputTokenCost: 8,
|
|
52
53
|
provider: "openai",
|
|
53
54
|
},
|
|
@@ -65,7 +66,7 @@ export const textModels = [
|
|
|
65
66
|
{
|
|
66
67
|
type: "text",
|
|
67
68
|
modelName: "o4-mini",
|
|
68
|
-
description: "
|
|
69
|
+
description: "Latest small o-series model optimized for fast, effective reasoning with exceptional performance in coding and visual tasks. Knowledge cutoff: June 2024.",
|
|
69
70
|
maxInputTokens: 200000,
|
|
70
71
|
maxOutputTokens: 100000,
|
|
71
72
|
inputTokenCost: 1.1,
|
|
@@ -131,9 +132,9 @@ export const textModels = [
|
|
|
131
132
|
{
|
|
132
133
|
type: "text",
|
|
133
134
|
modelName: "gemini-3-pro-preview",
|
|
134
|
-
description: "Strongest Gemini 3 model quality with
|
|
135
|
-
maxInputTokens:
|
|
136
|
-
maxOutputTokens:
|
|
135
|
+
description: "Strongest Gemini 3 model quality with 1M context window and 64K output. Standard pricing for ≤200k tokens ($2.00 input/$12.00 output), higher rates for >200k tokens ($4.00 input/$18.00 output). Released Nov 2025, currently in preview.",
|
|
136
|
+
maxInputTokens: 1_048_576,
|
|
137
|
+
maxOutputTokens: 65536,
|
|
137
138
|
inputTokenCost: 2.0,
|
|
138
139
|
outputTokenCost: 12.0,
|
|
139
140
|
provider: "google",
|
|
@@ -141,9 +142,9 @@ export const textModels = [
|
|
|
141
142
|
{
|
|
142
143
|
type: "text",
|
|
143
144
|
modelName: "gemini-3-flash-preview",
|
|
144
|
-
description: "Latest Gemini 3 flash model with 1M context window.
|
|
145
|
+
description: "Latest Gemini 3 flash model with 1M context window and 64K output. Outperforms 2.5 Pro while being 3x faster. Optimized for agentic workflows and coding. Includes context caching for 90% cost reductions.",
|
|
145
146
|
maxInputTokens: 1_048_576,
|
|
146
|
-
maxOutputTokens:
|
|
147
|
+
maxOutputTokens: 65536,
|
|
147
148
|
inputTokenCost: 0.5,
|
|
148
149
|
outputTokenCost: 3.0,
|
|
149
150
|
provider: "google",
|
|
@@ -151,7 +152,7 @@ export const textModels = [
|
|
|
151
152
|
{
|
|
152
153
|
type: "text",
|
|
153
154
|
modelName: "gemini-2.5-pro",
|
|
154
|
-
description: "High-performance Gemini 2.5 model with 2M context window. Standard pricing for ≤200k tokens, higher rates for >200k tokens.",
|
|
155
|
+
description: "High-performance Gemini 2.5 model with 2M context window. Adaptive thinking for complex reasoning and coding. Standard pricing for ≤200k tokens ($1.25 input/$10.00 output), higher rates for >200k tokens ($2.50 input/higher output). Batch API: 50% discount.",
|
|
155
156
|
maxInputTokens: 2_097_152,
|
|
156
157
|
maxOutputTokens: 8192,
|
|
157
158
|
inputTokenCost: 1.25,
|
|
@@ -161,7 +162,7 @@ export const textModels = [
|
|
|
161
162
|
{
|
|
162
163
|
type: "text",
|
|
163
164
|
modelName: "gemini-2.5-flash",
|
|
164
|
-
description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. 1M context window
|
|
165
|
+
description: "Balanced Gemini 2.5 model with excellent performance-to-cost ratio. Lightning-fast with controllable thinking budgets. 1M context window. Context caching available for up to 75% cost reduction.",
|
|
165
166
|
maxInputTokens: 1_048_576,
|
|
166
167
|
maxOutputTokens: 8192,
|
|
167
168
|
inputTokenCost: 0.3,
|
|
@@ -181,11 +182,12 @@ export const textModels = [
|
|
|
181
182
|
{
|
|
182
183
|
type: "text",
|
|
183
184
|
modelName: "gemini-2.0-flash",
|
|
184
|
-
description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window.",
|
|
185
|
+
description: "Workhorse model for all daily tasks. Strong overall performance and supports real-time streaming Live API. 1M context window. DEPRECATED: Will be shut down on March 31, 2026.",
|
|
185
186
|
maxInputTokens: 1_048_576,
|
|
186
187
|
maxOutputTokens: 8192,
|
|
187
188
|
inputTokenCost: 0.1,
|
|
188
189
|
outputTokenCost: 0.4,
|
|
190
|
+
disabled: true,
|
|
189
191
|
provider: "google",
|
|
190
192
|
},
|
|
191
193
|
{
|
|
@@ -359,6 +361,13 @@ export const imageModels = [
|
|
|
359
361
|
description: "aka nano-banana",
|
|
360
362
|
costPerImage: 0.04,
|
|
361
363
|
},
|
|
364
|
+
{
|
|
365
|
+
type: "image",
|
|
366
|
+
modelName: "gemini-3-pro-image-preview",
|
|
367
|
+
provider: "google",
|
|
368
|
+
description: "High-fidelity image generation with reasoning-enhanced composition. Supports legible text rendering, complex multi-turn editing, and character consistency using up to 14 reference inputs.",
|
|
369
|
+
costPerImage: 0.05,
|
|
370
|
+
},
|
|
362
371
|
];
|
|
363
372
|
export const embeddingsModels = [
|
|
364
373
|
{ type: "embeddings", modelName: "text-embedding-3-small", tokenCost: 0.02 },
|
|
@@ -379,3 +388,22 @@ export function isSpeechToTextModel(model) {
|
|
|
379
388
|
export function isEmbeddingsModel(model) {
|
|
380
389
|
return model.type === "embeddings";
|
|
381
390
|
}
|
|
391
|
+
export function calculateCost(modelName, usage) {
|
|
392
|
+
const model = getModel(modelName);
|
|
393
|
+
if (!model || !isTextModel(model)) {
|
|
394
|
+
return null;
|
|
395
|
+
}
|
|
396
|
+
const inputCost = round((usage.inputTokens * (model.inputTokenCost || 0)) / 1_000_000, 2);
|
|
397
|
+
const outputCost = round((usage.outputTokens * (model.outputTokenCost || 0)) / 1_000_000, 2);
|
|
398
|
+
const cachedInputCost = usage.cachedInputTokens && model.cachedInputTokenCost
|
|
399
|
+
? round((usage.cachedInputTokens * model.cachedInputTokenCost) / 1_000_000, 2)
|
|
400
|
+
: undefined;
|
|
401
|
+
const totalCost = round(inputCost + outputCost + (cachedInputCost || 0), 2);
|
|
402
|
+
return {
|
|
403
|
+
inputCost,
|
|
404
|
+
outputCost,
|
|
405
|
+
cachedInputCost,
|
|
406
|
+
totalCost,
|
|
407
|
+
currency: "USD",
|
|
408
|
+
};
|
|
409
|
+
}
|
package/dist/types.d.ts
CHANGED
|
@@ -43,9 +43,24 @@ export type ToolLoopDetection = {
|
|
|
43
43
|
excludeTools?: string[];
|
|
44
44
|
};
|
|
45
45
|
export type BaseClientConfig = SmolConfig & {};
|
|
46
|
+
export type TokenUsage = {
|
|
47
|
+
inputTokens: number;
|
|
48
|
+
outputTokens: number;
|
|
49
|
+
cachedInputTokens?: number;
|
|
50
|
+
totalTokens?: number;
|
|
51
|
+
};
|
|
52
|
+
export type CostEstimate = {
|
|
53
|
+
inputCost: number;
|
|
54
|
+
outputCost: number;
|
|
55
|
+
cachedInputCost?: number;
|
|
56
|
+
totalCost: number;
|
|
57
|
+
currency: string;
|
|
58
|
+
};
|
|
46
59
|
export type PromptResult = {
|
|
47
60
|
output: string | null;
|
|
48
61
|
toolCalls: ToolCall[];
|
|
62
|
+
usage?: TokenUsage;
|
|
63
|
+
cost?: CostEstimate;
|
|
49
64
|
};
|
|
50
65
|
export type StreamChunk = {
|
|
51
66
|
type: "text";
|
package/dist/util.d.ts
CHANGED
package/dist/util.js
CHANGED