smoltalk 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/classes/message/AssistantMessage.d.ts +2 -0
- package/dist/clients/anthropic.js +116 -4
- package/dist/clients/google.js +5 -2
- package/dist/clients/openai.js +5 -2
- package/dist/clients/openaiResponses.js +5 -2
- package/dist/model.d.ts +2 -0
- package/dist/model.js +38 -5
- package/dist/models.d.ts +123 -10
- package/dist/models.js +65 -5
- package/dist/types/costEstimate.d.ts +2 -0
- package/dist/types/costEstimate.js +2 -0
- package/dist/types/tokenUsage.d.ts +2 -0
- package/dist/types/tokenUsage.js +2 -0
- package/dist/types.d.ts +4 -0
- package/package.json +1 -1
|
@@ -29,12 +29,14 @@ export declare const AssistantMessageJSONSchema: z.ZodObject<{
|
|
|
29
29
|
inputTokens: z.ZodNumber;
|
|
30
30
|
outputTokens: z.ZodNumber;
|
|
31
31
|
cachedInputTokens: z.ZodOptional<z.ZodNumber>;
|
|
32
|
+
cacheCreationInputTokens: z.ZodOptional<z.ZodNumber>;
|
|
32
33
|
totalTokens: z.ZodOptional<z.ZodNumber>;
|
|
33
34
|
}, z.core.$strip>>;
|
|
34
35
|
cost: z.ZodOptional<z.ZodObject<{
|
|
35
36
|
inputCost: z.ZodNumber;
|
|
36
37
|
outputCost: z.ZodNumber;
|
|
37
38
|
cachedInputCost: z.ZodOptional<z.ZodNumber>;
|
|
39
|
+
cacheCreationInputCost: z.ZodOptional<z.ZodNumber>;
|
|
38
40
|
totalCost: z.ZodNumber;
|
|
39
41
|
currency: z.ZodString;
|
|
40
42
|
}, z.core.$strip>>;
|
|
@@ -8,6 +8,82 @@ import { SmolContentPolicyError, SmolContextWindowExceededError, } from "../smol
|
|
|
8
8
|
import { BaseClient } from "./baseClient.js";
|
|
9
9
|
import { Model } from "../model.js";
|
|
10
10
|
const DEFAULT_MAX_TOKENS = 4096;
|
|
11
|
+
/**
|
|
12
|
+
* Attach ephemeral cache_control breakpoints to (up to) three places:
|
|
13
|
+
* 1. the last tool definition
|
|
14
|
+
* 2. the last system block (promoting system from string to array form)
|
|
15
|
+
* 3. the last block of the last user message
|
|
16
|
+
*
|
|
17
|
+
* Anthropic enforces minimum prefix sizes; smaller prefixes silently no-op.
|
|
18
|
+
*/
|
|
19
|
+
function applyCacheBreakpoints(req) {
|
|
20
|
+
const cc = { type: "ephemeral" };
|
|
21
|
+
// Tools: mark the last tool.
|
|
22
|
+
let tools = req.tools;
|
|
23
|
+
if (tools && tools.length > 0) {
|
|
24
|
+
const lastIdx = tools.length - 1;
|
|
25
|
+
const marked = [];
|
|
26
|
+
for (let i = 0; i < tools.length; i++) {
|
|
27
|
+
if (i === lastIdx) {
|
|
28
|
+
marked.push({ ...tools[i], cache_control: cc });
|
|
29
|
+
}
|
|
30
|
+
else {
|
|
31
|
+
marked.push(tools[i]);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
tools = marked;
|
|
35
|
+
}
|
|
36
|
+
// System: promote string to array form so the last block can be marked.
|
|
37
|
+
let system = req.system;
|
|
38
|
+
if (typeof system === "string" && system.length > 0) {
|
|
39
|
+
system = [{ type: "text", text: system, cache_control: cc }];
|
|
40
|
+
}
|
|
41
|
+
else if (Array.isArray(system) && system.length > 0) {
|
|
42
|
+
const lastIdx = system.length - 1;
|
|
43
|
+
const marked = [];
|
|
44
|
+
for (let i = 0; i < system.length; i++) {
|
|
45
|
+
if (i === lastIdx) {
|
|
46
|
+
marked.push({ ...system[i], cache_control: cc });
|
|
47
|
+
}
|
|
48
|
+
else {
|
|
49
|
+
marked.push(system[i]);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
system = marked;
|
|
53
|
+
}
|
|
54
|
+
// Messages: mark the last block of the last user message.
|
|
55
|
+
let messages = req.messages;
|
|
56
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
57
|
+
const m = messages[i];
|
|
58
|
+
if (m.role !== "user")
|
|
59
|
+
continue;
|
|
60
|
+
let blocks;
|
|
61
|
+
if (typeof m.content === "string") {
|
|
62
|
+
blocks = [{ type: "text", text: m.content }];
|
|
63
|
+
}
|
|
64
|
+
else {
|
|
65
|
+
blocks = [...m.content];
|
|
66
|
+
}
|
|
67
|
+
if (blocks.length === 0)
|
|
68
|
+
break;
|
|
69
|
+
blocks[blocks.length - 1] = {
|
|
70
|
+
...blocks[blocks.length - 1],
|
|
71
|
+
cache_control: cc,
|
|
72
|
+
};
|
|
73
|
+
const rebuilt = [];
|
|
74
|
+
for (let j = 0; j < messages.length; j++) {
|
|
75
|
+
if (j === i) {
|
|
76
|
+
rebuilt.push({ ...m, content: blocks });
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
rebuilt.push(messages[j]);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
messages = rebuilt;
|
|
83
|
+
break;
|
|
84
|
+
}
|
|
85
|
+
return { system, messages, tools };
|
|
86
|
+
}
|
|
11
87
|
export class SmolAnthropic extends BaseClient {
|
|
12
88
|
client;
|
|
13
89
|
logger;
|
|
@@ -22,11 +98,22 @@ export class SmolAnthropic extends BaseClient {
|
|
|
22
98
|
return this.model.getModel();
|
|
23
99
|
}
|
|
24
100
|
calculateUsageAndCost(usageData) {
|
|
101
|
+
const cacheRead = usageData.cache_read_input_tokens ?? 0;
|
|
102
|
+
const cacheCreation = usageData.cache_creation_input_tokens ?? 0;
|
|
25
103
|
const usage = {
|
|
26
104
|
inputTokens: usageData.input_tokens,
|
|
27
105
|
outputTokens: usageData.output_tokens,
|
|
28
|
-
totalTokens: usageData.input_tokens +
|
|
106
|
+
totalTokens: usageData.input_tokens +
|
|
107
|
+
cacheRead +
|
|
108
|
+
cacheCreation +
|
|
109
|
+
usageData.output_tokens,
|
|
29
110
|
};
|
|
111
|
+
if (cacheRead > 0) {
|
|
112
|
+
usage.cachedInputTokens = cacheRead;
|
|
113
|
+
}
|
|
114
|
+
if (cacheCreation > 0) {
|
|
115
|
+
usage.cacheCreationInputTokens = cacheCreation;
|
|
116
|
+
}
|
|
30
117
|
const cost = this.model.calculateCost(usage) ?? undefined;
|
|
31
118
|
return { usage, cost };
|
|
32
119
|
}
|
|
@@ -81,7 +168,12 @@ export class SmolAnthropic extends BaseClient {
|
|
|
81
168
|
budget_tokens: reasoningBudgetMap[config.reasoningEffort],
|
|
82
169
|
}
|
|
83
170
|
: undefined;
|
|
84
|
-
|
|
171
|
+
const cachingEnabled = config.caching?.enabled !== false;
|
|
172
|
+
const baseRequest = { system, messages: anthropicMessages, tools };
|
|
173
|
+
const finalRequest = cachingEnabled
|
|
174
|
+
? applyCacheBreakpoints(baseRequest)
|
|
175
|
+
: baseRequest;
|
|
176
|
+
return { ...finalRequest, thinking };
|
|
85
177
|
}
|
|
86
178
|
rethrowAsSmolError(error) {
|
|
87
179
|
if (error instanceof Anthropic.APIError) {
|
|
@@ -198,10 +290,15 @@ export class SmolAnthropic extends BaseClient {
|
|
|
198
290
|
// Track thinking blocks by index: index -> { text, signature }
|
|
199
291
|
const thinkingBlockMap = new Map();
|
|
200
292
|
let inputTokens = 0;
|
|
293
|
+
let cacheReadTokens = 0;
|
|
294
|
+
let cacheCreationTokens = 0;
|
|
201
295
|
let outputTokens = 0;
|
|
202
296
|
for await (const event of stream) {
|
|
203
297
|
if (event.type === "message_start") {
|
|
204
|
-
|
|
298
|
+
const u = event.message.usage;
|
|
299
|
+
inputTokens = u.input_tokens;
|
|
300
|
+
cacheReadTokens = u.cache_read_input_tokens ?? 0;
|
|
301
|
+
cacheCreationTokens = u.cache_creation_input_tokens ?? 0;
|
|
205
302
|
}
|
|
206
303
|
else if (event.type === "content_block_start") {
|
|
207
304
|
if (event.content_block.type === "tool_use") {
|
|
@@ -252,6 +349,15 @@ export class SmolAnthropic extends BaseClient {
|
|
|
252
349
|
}
|
|
253
350
|
else if (event.type === "message_delta") {
|
|
254
351
|
outputTokens = event.usage.output_tokens;
|
|
352
|
+
// Defensive: in practice Anthropic only sends cache fields on
|
|
353
|
+
// message_start, but read them here too so we don't miss an
|
|
354
|
+
// update if the SDK changes.
|
|
355
|
+
if (event.usage.cache_read_input_tokens != null) {
|
|
356
|
+
cacheReadTokens = event.usage.cache_read_input_tokens;
|
|
357
|
+
}
|
|
358
|
+
if (event.usage.cache_creation_input_tokens != null) {
|
|
359
|
+
cacheCreationTokens = event.usage.cache_creation_input_tokens;
|
|
360
|
+
}
|
|
255
361
|
}
|
|
256
362
|
}
|
|
257
363
|
this.logger.debug("Streaming response completed from Anthropic");
|
|
@@ -269,8 +375,14 @@ export class SmolAnthropic extends BaseClient {
|
|
|
269
375
|
const usage = {
|
|
270
376
|
inputTokens,
|
|
271
377
|
outputTokens,
|
|
272
|
-
totalTokens: inputTokens + outputTokens,
|
|
378
|
+
totalTokens: inputTokens + cacheReadTokens + cacheCreationTokens + outputTokens,
|
|
273
379
|
};
|
|
380
|
+
if (cacheReadTokens > 0) {
|
|
381
|
+
usage.cachedInputTokens = cacheReadTokens;
|
|
382
|
+
}
|
|
383
|
+
if (cacheCreationTokens > 0) {
|
|
384
|
+
usage.cacheCreationInputTokens = cacheCreationTokens;
|
|
385
|
+
}
|
|
274
386
|
const cost = this.model.calculateCost(usage) ?? undefined;
|
|
275
387
|
yield {
|
|
276
388
|
type: "done",
|
package/dist/clients/google.js
CHANGED
|
@@ -31,12 +31,15 @@ export class SmolGoogle extends BaseClient {
|
|
|
31
31
|
let usage;
|
|
32
32
|
let cost;
|
|
33
33
|
if (usageMetadata) {
|
|
34
|
+
const cached = usageMetadata.cachedContentTokenCount ?? 0;
|
|
34
35
|
usage = {
|
|
35
|
-
inputTokens: usageMetadata.promptTokenCount || 0,
|
|
36
|
+
inputTokens: Math.max(0, (usageMetadata.promptTokenCount || 0) - cached),
|
|
36
37
|
outputTokens: usageMetadata.candidatesTokenCount || 0,
|
|
37
|
-
cachedInputTokens: usageMetadata.cachedContentTokenCount,
|
|
38
38
|
totalTokens: usageMetadata.totalTokenCount,
|
|
39
39
|
};
|
|
40
|
+
if (cached > 0) {
|
|
41
|
+
usage.cachedInputTokens = cached;
|
|
42
|
+
}
|
|
40
43
|
const calculatedCost = this.model.calculateCost(usage);
|
|
41
44
|
if (calculatedCost) {
|
|
42
45
|
cost = calculatedCost;
|
package/dist/clients/openai.js
CHANGED
|
@@ -30,12 +30,15 @@ export class SmolOpenAi extends BaseClient {
|
|
|
30
30
|
let usage;
|
|
31
31
|
let cost;
|
|
32
32
|
if (usageData) {
|
|
33
|
+
const cached = usageData.prompt_tokens_details?.cached_tokens ?? 0;
|
|
33
34
|
usage = {
|
|
34
|
-
inputTokens: usageData.prompt_tokens || 0,
|
|
35
|
+
inputTokens: Math.max(0, (usageData.prompt_tokens || 0) - cached),
|
|
35
36
|
outputTokens: usageData.completion_tokens || 0,
|
|
36
|
-
cachedInputTokens: usageData.prompt_tokens_details?.cached_tokens,
|
|
37
37
|
totalTokens: usageData.total_tokens,
|
|
38
38
|
};
|
|
39
|
+
if (cached > 0) {
|
|
40
|
+
usage.cachedInputTokens = cached;
|
|
41
|
+
}
|
|
39
42
|
const calculatedCost = this.model.calculateCost(usage);
|
|
40
43
|
if (calculatedCost) {
|
|
41
44
|
cost = calculatedCost;
|
|
@@ -89,12 +89,15 @@ export class SmolOpenAiResponses extends BaseClient {
|
|
|
89
89
|
let usage;
|
|
90
90
|
let cost;
|
|
91
91
|
if (usageData) {
|
|
92
|
+
const cached = usageData.input_tokens_details?.cached_tokens ?? 0;
|
|
92
93
|
usage = {
|
|
93
|
-
inputTokens: usageData.input_tokens || 0,
|
|
94
|
+
inputTokens: Math.max(0, (usageData.input_tokens || 0) - cached),
|
|
94
95
|
outputTokens: usageData.output_tokens || 0,
|
|
95
|
-
cachedInputTokens: usageData.input_tokens_details?.cached_tokens,
|
|
96
96
|
totalTokens: usageData.total_tokens,
|
|
97
97
|
};
|
|
98
|
+
if (cached > 0) {
|
|
99
|
+
usage.cachedInputTokens = cached;
|
|
100
|
+
}
|
|
98
101
|
const calculatedCost = this.model.calculateCost(usage);
|
|
99
102
|
if (calculatedCost) {
|
|
100
103
|
cost = calculatedCost;
|
package/dist/model.d.ts
CHANGED
|
@@ -11,10 +11,12 @@ export declare class Model {
|
|
|
11
11
|
inputTokens: number;
|
|
12
12
|
outputTokens: number;
|
|
13
13
|
cachedInputTokens?: number;
|
|
14
|
+
cacheCreationInputTokens?: number;
|
|
14
15
|
}): {
|
|
15
16
|
inputCost: number;
|
|
16
17
|
outputCost: number;
|
|
17
18
|
cachedInputCost?: number;
|
|
19
|
+
cacheCreationInputCost?: number;
|
|
18
20
|
totalCost: number;
|
|
19
21
|
currency: string;
|
|
20
22
|
} | null;
|
package/dist/model.js
CHANGED
|
@@ -26,16 +26,49 @@ export class Model {
|
|
|
26
26
|
if (!model || !isTextModel(model)) {
|
|
27
27
|
return null;
|
|
28
28
|
}
|
|
29
|
+
const cachedTokens = usage.cachedInputTokens ?? 0;
|
|
30
|
+
const cacheCreationTokens = usage.cacheCreationInputTokens ?? 0;
|
|
31
|
+
// Disjoint buckets. If a discount price isn't defined for this model,
|
|
32
|
+
// the tokens were still billed by the provider — charge them at the
|
|
33
|
+
// full input rate so totalCost stays honest.
|
|
34
|
+
const cachedRate = model.cachedInputTokenCost ?? model.inputTokenCost ?? 0;
|
|
35
|
+
const cacheCreationRate = model.cacheCreationInputTokenCost ?? model.inputTokenCost ?? 0;
|
|
29
36
|
const inputCost = round((usage.inputTokens * (model.inputTokenCost || 0)) / 1_000_000, 6);
|
|
30
37
|
const outputCost = round((usage.outputTokens * (model.outputTokenCost || 0)) / 1_000_000, 6);
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
38
|
+
// Only expose cachedInputCost / cacheCreationInputCost when the model
|
|
39
|
+
// actually has a distinct discount price. Otherwise, fold those dollars
|
|
40
|
+
// into inputCost so the user isn't misled by a $0 cached field.
|
|
41
|
+
let cachedInputCost;
|
|
42
|
+
let cacheCreationInputCost;
|
|
43
|
+
let foldedInputDollars = 0;
|
|
44
|
+
if (cachedTokens > 0) {
|
|
45
|
+
const dollars = (cachedTokens * cachedRate) / 1_000_000;
|
|
46
|
+
if (model.cachedInputTokenCost != null) {
|
|
47
|
+
cachedInputCost = round(dollars, 6);
|
|
48
|
+
}
|
|
49
|
+
else {
|
|
50
|
+
foldedInputDollars += dollars;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
if (cacheCreationTokens > 0) {
|
|
54
|
+
const dollars = (cacheCreationTokens * cacheCreationRate) / 1_000_000;
|
|
55
|
+
if (model.cacheCreationInputTokenCost != null) {
|
|
56
|
+
cacheCreationInputCost = round(dollars, 6);
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
foldedInputDollars += dollars;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
const finalInputCost = round(inputCost + foldedInputDollars, 6);
|
|
63
|
+
const totalCost = round(finalInputCost +
|
|
64
|
+
outputCost +
|
|
65
|
+
(cachedInputCost || 0) +
|
|
66
|
+
(cacheCreationInputCost || 0), 6);
|
|
35
67
|
return {
|
|
36
|
-
inputCost,
|
|
68
|
+
inputCost: finalInputCost,
|
|
37
69
|
outputCost,
|
|
38
70
|
cachedInputCost,
|
|
71
|
+
cacheCreationInputCost,
|
|
39
72
|
totalCost,
|
|
40
73
|
currency: "USD",
|
|
41
74
|
};
|
package/dist/models.d.ts
CHANGED
|
@@ -16,6 +16,7 @@ export type BaseModel = {
|
|
|
16
16
|
description?: string;
|
|
17
17
|
inputTokenCost?: number;
|
|
18
18
|
cachedInputTokenCost?: number;
|
|
19
|
+
cacheCreationInputTokenCost?: number;
|
|
19
20
|
outputTokenCost?: number;
|
|
20
21
|
disabled?: boolean;
|
|
21
22
|
costUnit?: "tokens" | "characters" | "minutes";
|
|
@@ -466,10 +467,27 @@ export declare const textModels: readonly [{
|
|
|
466
467
|
readonly outputTokenCost: 12;
|
|
467
468
|
readonly disabled: true;
|
|
468
469
|
readonly provider: "google";
|
|
470
|
+
}, {
|
|
471
|
+
readonly type: "text";
|
|
472
|
+
readonly modelName: "gemini-3.5-flash";
|
|
473
|
+
readonly description: "Latest Gemini 3.5 Flash model (GA May 2026). Outperforms Gemini 3.1 Pro on coding and agentic suites at 4x the speed. 1M context window, 64K output. Context caching: $0.15/1M read.";
|
|
474
|
+
readonly maxInputTokens: 1048576;
|
|
475
|
+
readonly maxOutputTokens: 65536;
|
|
476
|
+
readonly inputTokenCost: 1.5;
|
|
477
|
+
readonly cachedInputTokenCost: 0.15;
|
|
478
|
+
readonly outputTokenCost: 9;
|
|
479
|
+
readonly reasoning: {
|
|
480
|
+
readonly levels: readonly ["minimal", "low", "medium", "high"];
|
|
481
|
+
readonly defaultLevel: "high";
|
|
482
|
+
readonly canDisable: false;
|
|
483
|
+
readonly outputsThinking: true;
|
|
484
|
+
readonly outputsSignatures: true;
|
|
485
|
+
};
|
|
486
|
+
readonly provider: "google";
|
|
469
487
|
}, {
|
|
470
488
|
readonly type: "text";
|
|
471
489
|
readonly modelName: "gemini-3-flash-preview";
|
|
472
|
-
readonly description: "
|
|
490
|
+
readonly description: "Gemini 3 Flash preview. Superseded by gemini-3.5-flash. 1M context window and 64K output. Optimized for agentic workflows and coding.";
|
|
473
491
|
readonly maxInputTokens: 1048576;
|
|
474
492
|
readonly maxOutputTokens: 65536;
|
|
475
493
|
readonly inputTokenCost: 0.5;
|
|
@@ -485,8 +503,8 @@ export declare const textModels: readonly [{
|
|
|
485
503
|
readonly provider: "google";
|
|
486
504
|
}, {
|
|
487
505
|
readonly type: "text";
|
|
488
|
-
readonly modelName: "gemini-3.1-flash-lite
|
|
489
|
-
readonly description: "Most cost-effective Gemini 3.1 model
|
|
506
|
+
readonly modelName: "gemini-3.1-flash-lite";
|
|
507
|
+
readonly description: "Most cost-effective Gemini 3.1 model (GA). Thinking support, 1M context window, 64K output. 2.5x faster TTFA and 45% faster output than 2.5 Flash.";
|
|
490
508
|
readonly maxInputTokens: 1048576;
|
|
491
509
|
readonly maxOutputTokens: 65536;
|
|
492
510
|
readonly inputTokenCost: 0.25;
|
|
@@ -500,6 +518,16 @@ export declare const textModels: readonly [{
|
|
|
500
518
|
readonly outputsSignatures: true;
|
|
501
519
|
};
|
|
502
520
|
readonly provider: "google";
|
|
521
|
+
}, {
|
|
522
|
+
readonly type: "text";
|
|
523
|
+
readonly modelName: "gemini-3.1-flash-lite-preview";
|
|
524
|
+
readonly description: "DEPRECATED: Preview version, discontinued July 9, 2026. Use gemini-3.1-flash-lite instead.";
|
|
525
|
+
readonly maxInputTokens: 1048576;
|
|
526
|
+
readonly maxOutputTokens: 65536;
|
|
527
|
+
readonly inputTokenCost: 0.25;
|
|
528
|
+
readonly outputTokenCost: 1.5;
|
|
529
|
+
readonly disabled: true;
|
|
530
|
+
readonly provider: "google";
|
|
503
531
|
}, {
|
|
504
532
|
readonly type: "text";
|
|
505
533
|
readonly modelName: "gemini-2.5-pro";
|
|
@@ -507,6 +535,7 @@ export declare const textModels: readonly [{
|
|
|
507
535
|
readonly maxInputTokens: 2097152;
|
|
508
536
|
readonly maxOutputTokens: 65536;
|
|
509
537
|
readonly inputTokenCost: 1.25;
|
|
538
|
+
readonly cachedInputTokenCost: 0.31;
|
|
510
539
|
readonly outputTokenCost: 10;
|
|
511
540
|
readonly outputTokensPerSecond: 145;
|
|
512
541
|
readonly reasoning: {
|
|
@@ -522,6 +551,7 @@ export declare const textModels: readonly [{
|
|
|
522
551
|
readonly maxInputTokens: 1048576;
|
|
523
552
|
readonly maxOutputTokens: 65536;
|
|
524
553
|
readonly inputTokenCost: 0.3;
|
|
554
|
+
readonly cachedInputTokenCost: 0.075;
|
|
525
555
|
readonly outputTokenCost: 2.5;
|
|
526
556
|
readonly outputTokensPerSecond: 245;
|
|
527
557
|
readonly reasoning: {
|
|
@@ -537,6 +567,7 @@ export declare const textModels: readonly [{
|
|
|
537
567
|
readonly maxInputTokens: 1048576;
|
|
538
568
|
readonly maxOutputTokens: 65536;
|
|
539
569
|
readonly inputTokenCost: 0.1;
|
|
570
|
+
readonly cachedInputTokenCost: 0.025;
|
|
540
571
|
readonly outputTokenCost: 0.4;
|
|
541
572
|
readonly outputTokensPerSecond: 400;
|
|
542
573
|
readonly reasoning: {
|
|
@@ -611,14 +642,30 @@ export declare const textModels: readonly [{
|
|
|
611
642
|
readonly costUnit: "characters";
|
|
612
643
|
readonly disabled: true;
|
|
613
644
|
readonly provider: "google";
|
|
645
|
+
}, {
|
|
646
|
+
readonly type: "text";
|
|
647
|
+
readonly modelName: "claude-opus-4-8";
|
|
648
|
+
readonly description: "The most capable Claude model for complex reasoning and agentic coding. Same per-token pricing as Opus 4.7 with improved tool-use efficiency (~290 tokens for tool-use system prompt vs 675 on 4.7). 1M context window, 128K max output.";
|
|
649
|
+
readonly maxInputTokens: 1000000;
|
|
650
|
+
readonly maxOutputTokens: 128000;
|
|
651
|
+
readonly inputTokenCost: 5;
|
|
652
|
+
readonly cachedInputTokenCost: 0.5;
|
|
653
|
+
readonly outputTokenCost: 25;
|
|
654
|
+
readonly reasoning: {
|
|
655
|
+
readonly canDisable: false;
|
|
656
|
+
readonly outputsThinking: true;
|
|
657
|
+
readonly outputsSignatures: true;
|
|
658
|
+
};
|
|
659
|
+
readonly provider: "anthropic";
|
|
614
660
|
}, {
|
|
615
661
|
readonly type: "text";
|
|
616
662
|
readonly modelName: "claude-opus-4-7";
|
|
617
|
-
readonly description: "
|
|
663
|
+
readonly description: "Claude Opus 4.7 for complex reasoning and agentic coding. Features Adaptive Thinking that auto-tunes reasoning compute per request. 1M context window, 128K max output. Knowledge cutoff: January 2026.";
|
|
618
664
|
readonly maxInputTokens: 1000000;
|
|
619
665
|
readonly maxOutputTokens: 128000;
|
|
620
666
|
readonly inputTokenCost: 5;
|
|
621
667
|
readonly cachedInputTokenCost: 0.5;
|
|
668
|
+
readonly cacheCreationInputTokenCost: 6.25;
|
|
622
669
|
readonly outputTokenCost: 25;
|
|
623
670
|
readonly outputTokensPerSecond: 72;
|
|
624
671
|
readonly reasoning: {
|
|
@@ -635,6 +682,7 @@ export declare const textModels: readonly [{
|
|
|
635
682
|
readonly maxOutputTokens: 128000;
|
|
636
683
|
readonly inputTokenCost: 5;
|
|
637
684
|
readonly cachedInputTokenCost: 0.5;
|
|
685
|
+
readonly cacheCreationInputTokenCost: 6.25;
|
|
638
686
|
readonly outputTokenCost: 25;
|
|
639
687
|
readonly outputTokensPerSecond: 53;
|
|
640
688
|
readonly reasoning: {
|
|
@@ -651,6 +699,7 @@ export declare const textModels: readonly [{
|
|
|
651
699
|
readonly maxOutputTokens: 64000;
|
|
652
700
|
readonly inputTokenCost: 3;
|
|
653
701
|
readonly cachedInputTokenCost: 0.3;
|
|
702
|
+
readonly cacheCreationInputTokenCost: 3.75;
|
|
654
703
|
readonly outputTokenCost: 15;
|
|
655
704
|
readonly outputTokensPerSecond: 52;
|
|
656
705
|
readonly reasoning: {
|
|
@@ -667,6 +716,7 @@ export declare const textModels: readonly [{
|
|
|
667
716
|
readonly maxOutputTokens: 64000;
|
|
668
717
|
readonly inputTokenCost: 1;
|
|
669
718
|
readonly cachedInputTokenCost: 0.1;
|
|
719
|
+
readonly cacheCreationInputTokenCost: 1.25;
|
|
670
720
|
readonly outputTokenCost: 5;
|
|
671
721
|
readonly outputTokensPerSecond: 97;
|
|
672
722
|
readonly reasoning: {
|
|
@@ -734,7 +784,14 @@ export declare const imageModels: readonly [{
|
|
|
734
784
|
readonly type: "image";
|
|
735
785
|
readonly modelName: "gemini-3.1-flash-image-preview";
|
|
736
786
|
readonly provider: "google";
|
|
737
|
-
readonly description: "
|
|
787
|
+
readonly description: "DEPRECATED: Preview version. Use gemini-3.1-flash-image instead.";
|
|
788
|
+
readonly costPerImage: 0.067;
|
|
789
|
+
readonly disabled: true;
|
|
790
|
+
}, {
|
|
791
|
+
readonly type: "image";
|
|
792
|
+
readonly modelName: "gemini-3.1-flash-image";
|
|
793
|
+
readonly provider: "google";
|
|
794
|
+
readonly description: "Fast image generation with Gemini 3.1 Flash (GA). Supports resolutions from 512px to 4096px. ~$0.045/image at 512px, $0.067 at 1K, $0.101 at 2K, $0.151 at 4K.";
|
|
738
795
|
readonly costPerImage: 0.067;
|
|
739
796
|
}];
|
|
740
797
|
export declare const embeddingsModels: EmbeddingsModel[];
|
|
@@ -1156,10 +1213,27 @@ export declare function getModel(modelName: ModelName): TextModel | EmbeddingsMo
|
|
|
1156
1213
|
readonly outputTokenCost: 12;
|
|
1157
1214
|
readonly disabled: true;
|
|
1158
1215
|
readonly provider: "google";
|
|
1216
|
+
} | {
|
|
1217
|
+
readonly type: "text";
|
|
1218
|
+
readonly modelName: "gemini-3.5-flash";
|
|
1219
|
+
readonly description: "Latest Gemini 3.5 Flash model (GA May 2026). Outperforms Gemini 3.1 Pro on coding and agentic suites at 4x the speed. 1M context window, 64K output. Context caching: $0.15/1M read.";
|
|
1220
|
+
readonly maxInputTokens: 1048576;
|
|
1221
|
+
readonly maxOutputTokens: 65536;
|
|
1222
|
+
readonly inputTokenCost: 1.5;
|
|
1223
|
+
readonly cachedInputTokenCost: 0.15;
|
|
1224
|
+
readonly outputTokenCost: 9;
|
|
1225
|
+
readonly reasoning: {
|
|
1226
|
+
readonly levels: readonly ["minimal", "low", "medium", "high"];
|
|
1227
|
+
readonly defaultLevel: "high";
|
|
1228
|
+
readonly canDisable: false;
|
|
1229
|
+
readonly outputsThinking: true;
|
|
1230
|
+
readonly outputsSignatures: true;
|
|
1231
|
+
};
|
|
1232
|
+
readonly provider: "google";
|
|
1159
1233
|
} | {
|
|
1160
1234
|
readonly type: "text";
|
|
1161
1235
|
readonly modelName: "gemini-3-flash-preview";
|
|
1162
|
-
readonly description: "
|
|
1236
|
+
readonly description: "Gemini 3 Flash preview. Superseded by gemini-3.5-flash. 1M context window and 64K output. Optimized for agentic workflows and coding.";
|
|
1163
1237
|
readonly maxInputTokens: 1048576;
|
|
1164
1238
|
readonly maxOutputTokens: 65536;
|
|
1165
1239
|
readonly inputTokenCost: 0.5;
|
|
@@ -1175,8 +1249,8 @@ export declare function getModel(modelName: ModelName): TextModel | EmbeddingsMo
|
|
|
1175
1249
|
readonly provider: "google";
|
|
1176
1250
|
} | {
|
|
1177
1251
|
readonly type: "text";
|
|
1178
|
-
readonly modelName: "gemini-3.1-flash-lite
|
|
1179
|
-
readonly description: "Most cost-effective Gemini 3.1 model
|
|
1252
|
+
readonly modelName: "gemini-3.1-flash-lite";
|
|
1253
|
+
readonly description: "Most cost-effective Gemini 3.1 model (GA). Thinking support, 1M context window, 64K output. 2.5x faster TTFA and 45% faster output than 2.5 Flash.";
|
|
1180
1254
|
readonly maxInputTokens: 1048576;
|
|
1181
1255
|
readonly maxOutputTokens: 65536;
|
|
1182
1256
|
readonly inputTokenCost: 0.25;
|
|
@@ -1190,6 +1264,16 @@ export declare function getModel(modelName: ModelName): TextModel | EmbeddingsMo
|
|
|
1190
1264
|
readonly outputsSignatures: true;
|
|
1191
1265
|
};
|
|
1192
1266
|
readonly provider: "google";
|
|
1267
|
+
} | {
|
|
1268
|
+
readonly type: "text";
|
|
1269
|
+
readonly modelName: "gemini-3.1-flash-lite-preview";
|
|
1270
|
+
readonly description: "DEPRECATED: Preview version, discontinued July 9, 2026. Use gemini-3.1-flash-lite instead.";
|
|
1271
|
+
readonly maxInputTokens: 1048576;
|
|
1272
|
+
readonly maxOutputTokens: 65536;
|
|
1273
|
+
readonly inputTokenCost: 0.25;
|
|
1274
|
+
readonly outputTokenCost: 1.5;
|
|
1275
|
+
readonly disabled: true;
|
|
1276
|
+
readonly provider: "google";
|
|
1193
1277
|
} | {
|
|
1194
1278
|
readonly type: "text";
|
|
1195
1279
|
readonly modelName: "gemini-2.5-pro";
|
|
@@ -1197,6 +1281,7 @@ export declare function getModel(modelName: ModelName): TextModel | EmbeddingsMo
|
|
|
1197
1281
|
readonly maxInputTokens: 2097152;
|
|
1198
1282
|
readonly maxOutputTokens: 65536;
|
|
1199
1283
|
readonly inputTokenCost: 1.25;
|
|
1284
|
+
readonly cachedInputTokenCost: 0.31;
|
|
1200
1285
|
readonly outputTokenCost: 10;
|
|
1201
1286
|
readonly outputTokensPerSecond: 145;
|
|
1202
1287
|
readonly reasoning: {
|
|
@@ -1212,6 +1297,7 @@ export declare function getModel(modelName: ModelName): TextModel | EmbeddingsMo
|
|
|
1212
1297
|
readonly maxInputTokens: 1048576;
|
|
1213
1298
|
readonly maxOutputTokens: 65536;
|
|
1214
1299
|
readonly inputTokenCost: 0.3;
|
|
1300
|
+
readonly cachedInputTokenCost: 0.075;
|
|
1215
1301
|
readonly outputTokenCost: 2.5;
|
|
1216
1302
|
readonly outputTokensPerSecond: 245;
|
|
1217
1303
|
readonly reasoning: {
|
|
@@ -1227,6 +1313,7 @@ export declare function getModel(modelName: ModelName): TextModel | EmbeddingsMo
|
|
|
1227
1313
|
readonly maxInputTokens: 1048576;
|
|
1228
1314
|
readonly maxOutputTokens: 65536;
|
|
1229
1315
|
readonly inputTokenCost: 0.1;
|
|
1316
|
+
readonly cachedInputTokenCost: 0.025;
|
|
1230
1317
|
readonly outputTokenCost: 0.4;
|
|
1231
1318
|
readonly outputTokensPerSecond: 400;
|
|
1232
1319
|
readonly reasoning: {
|
|
@@ -1301,14 +1388,30 @@ export declare function getModel(modelName: ModelName): TextModel | EmbeddingsMo
|
|
|
1301
1388
|
readonly costUnit: "characters";
|
|
1302
1389
|
readonly disabled: true;
|
|
1303
1390
|
readonly provider: "google";
|
|
1391
|
+
} | {
|
|
1392
|
+
readonly type: "text";
|
|
1393
|
+
readonly modelName: "claude-opus-4-8";
|
|
1394
|
+
readonly description: "The most capable Claude model for complex reasoning and agentic coding. Same per-token pricing as Opus 4.7 with improved tool-use efficiency (~290 tokens for tool-use system prompt vs 675 on 4.7). 1M context window, 128K max output.";
|
|
1395
|
+
readonly maxInputTokens: 1000000;
|
|
1396
|
+
readonly maxOutputTokens: 128000;
|
|
1397
|
+
readonly inputTokenCost: 5;
|
|
1398
|
+
readonly cachedInputTokenCost: 0.5;
|
|
1399
|
+
readonly outputTokenCost: 25;
|
|
1400
|
+
readonly reasoning: {
|
|
1401
|
+
readonly canDisable: false;
|
|
1402
|
+
readonly outputsThinking: true;
|
|
1403
|
+
readonly outputsSignatures: true;
|
|
1404
|
+
};
|
|
1405
|
+
readonly provider: "anthropic";
|
|
1304
1406
|
} | {
|
|
1305
1407
|
readonly type: "text";
|
|
1306
1408
|
readonly modelName: "claude-opus-4-7";
|
|
1307
|
-
readonly description: "
|
|
1409
|
+
readonly description: "Claude Opus 4.7 for complex reasoning and agentic coding. Features Adaptive Thinking that auto-tunes reasoning compute per request. 1M context window, 128K max output. Knowledge cutoff: January 2026.";
|
|
1308
1410
|
readonly maxInputTokens: 1000000;
|
|
1309
1411
|
readonly maxOutputTokens: 128000;
|
|
1310
1412
|
readonly inputTokenCost: 5;
|
|
1311
1413
|
readonly cachedInputTokenCost: 0.5;
|
|
1414
|
+
readonly cacheCreationInputTokenCost: 6.25;
|
|
1312
1415
|
readonly outputTokenCost: 25;
|
|
1313
1416
|
readonly outputTokensPerSecond: 72;
|
|
1314
1417
|
readonly reasoning: {
|
|
@@ -1325,6 +1428,7 @@ export declare function getModel(modelName: ModelName): TextModel | EmbeddingsMo
|
|
|
1325
1428
|
readonly maxOutputTokens: 128000;
|
|
1326
1429
|
readonly inputTokenCost: 5;
|
|
1327
1430
|
readonly cachedInputTokenCost: 0.5;
|
|
1431
|
+
readonly cacheCreationInputTokenCost: 6.25;
|
|
1328
1432
|
readonly outputTokenCost: 25;
|
|
1329
1433
|
readonly outputTokensPerSecond: 53;
|
|
1330
1434
|
readonly reasoning: {
|
|
@@ -1341,6 +1445,7 @@ export declare function getModel(modelName: ModelName): TextModel | EmbeddingsMo
|
|
|
1341
1445
|
readonly maxOutputTokens: 64000;
|
|
1342
1446
|
readonly inputTokenCost: 3;
|
|
1343
1447
|
readonly cachedInputTokenCost: 0.3;
|
|
1448
|
+
readonly cacheCreationInputTokenCost: 3.75;
|
|
1344
1449
|
readonly outputTokenCost: 15;
|
|
1345
1450
|
readonly outputTokensPerSecond: 52;
|
|
1346
1451
|
readonly reasoning: {
|
|
@@ -1357,6 +1462,7 @@ export declare function getModel(modelName: ModelName): TextModel | EmbeddingsMo
|
|
|
1357
1462
|
readonly maxOutputTokens: 64000;
|
|
1358
1463
|
readonly inputTokenCost: 1;
|
|
1359
1464
|
readonly cachedInputTokenCost: 0.1;
|
|
1465
|
+
readonly cacheCreationInputTokenCost: 1.25;
|
|
1360
1466
|
readonly outputTokenCost: 5;
|
|
1361
1467
|
readonly outputTokensPerSecond: 97;
|
|
1362
1468
|
readonly reasoning: {
|
|
@@ -1423,7 +1529,14 @@ export declare function getModel(modelName: ModelName): TextModel | EmbeddingsMo
|
|
|
1423
1529
|
readonly type: "image";
|
|
1424
1530
|
readonly modelName: "gemini-3.1-flash-image-preview";
|
|
1425
1531
|
readonly provider: "google";
|
|
1426
|
-
readonly description: "
|
|
1532
|
+
readonly description: "DEPRECATED: Preview version. Use gemini-3.1-flash-image instead.";
|
|
1533
|
+
readonly costPerImage: 0.067;
|
|
1534
|
+
readonly disabled: true;
|
|
1535
|
+
} | {
|
|
1536
|
+
readonly type: "image";
|
|
1537
|
+
readonly modelName: "gemini-3.1-flash-image";
|
|
1538
|
+
readonly provider: "google";
|
|
1539
|
+
readonly description: "Fast image generation with Gemini 3.1 Flash (GA). Supports resolutions from 512px to 4096px. ~$0.045/image at 512px, $0.067 at 1K, $0.101 at 2K, $0.151 at 4K.";
|
|
1427
1540
|
readonly costPerImage: 0.067;
|
|
1428
1541
|
} | undefined;
|
|
1429
1542
|
export declare function isImageModel(model: ModelType): model is ImageModel;
|
package/dist/models.js
CHANGED
|
@@ -459,10 +459,28 @@ export const textModels = [
|
|
|
459
459
|
disabled: true,
|
|
460
460
|
provider: "google",
|
|
461
461
|
},
|
|
462
|
+
{
|
|
463
|
+
type: "text",
|
|
464
|
+
modelName: "gemini-3.5-flash",
|
|
465
|
+
description: "Latest Gemini 3.5 Flash model (GA May 2026). Outperforms Gemini 3.1 Pro on coding and agentic suites at 4x the speed. 1M context window, 64K output. Context caching: $0.15/1M read.",
|
|
466
|
+
maxInputTokens: 1_048_576,
|
|
467
|
+
maxOutputTokens: 65536,
|
|
468
|
+
inputTokenCost: 1.5,
|
|
469
|
+
cachedInputTokenCost: 0.15,
|
|
470
|
+
outputTokenCost: 9.0,
|
|
471
|
+
reasoning: {
|
|
472
|
+
levels: ["minimal", "low", "medium", "high"],
|
|
473
|
+
defaultLevel: "high",
|
|
474
|
+
canDisable: false,
|
|
475
|
+
outputsThinking: true,
|
|
476
|
+
outputsSignatures: true,
|
|
477
|
+
},
|
|
478
|
+
provider: "google",
|
|
479
|
+
},
|
|
462
480
|
{
|
|
463
481
|
type: "text",
|
|
464
482
|
modelName: "gemini-3-flash-preview",
|
|
465
|
-
description: "
|
|
483
|
+
description: "Gemini 3 Flash preview. Superseded by gemini-3.5-flash. 1M context window and 64K output. Optimized for agentic workflows and coding.",
|
|
466
484
|
maxInputTokens: 1_048_576,
|
|
467
485
|
maxOutputTokens: 65536,
|
|
468
486
|
inputTokenCost: 0.5,
|
|
@@ -479,8 +497,8 @@ export const textModels = [
|
|
|
479
497
|
},
|
|
480
498
|
{
|
|
481
499
|
type: "text",
|
|
482
|
-
modelName: "gemini-3.1-flash-lite
|
|
483
|
-
description: "Most cost-effective Gemini 3.1 model
|
|
500
|
+
modelName: "gemini-3.1-flash-lite",
|
|
501
|
+
description: "Most cost-effective Gemini 3.1 model (GA). Thinking support, 1M context window, 64K output. 2.5x faster TTFA and 45% faster output than 2.5 Flash.",
|
|
484
502
|
maxInputTokens: 1_048_576,
|
|
485
503
|
maxOutputTokens: 65536,
|
|
486
504
|
inputTokenCost: 0.25,
|
|
@@ -495,6 +513,17 @@ export const textModels = [
|
|
|
495
513
|
},
|
|
496
514
|
provider: "google",
|
|
497
515
|
},
|
|
516
|
+
{
|
|
517
|
+
type: "text",
|
|
518
|
+
modelName: "gemini-3.1-flash-lite-preview",
|
|
519
|
+
description: "DEPRECATED: Preview version, discontinued July 9, 2026. Use gemini-3.1-flash-lite instead.",
|
|
520
|
+
maxInputTokens: 1_048_576,
|
|
521
|
+
maxOutputTokens: 65536,
|
|
522
|
+
inputTokenCost: 0.25,
|
|
523
|
+
outputTokenCost: 1.5,
|
|
524
|
+
disabled: true,
|
|
525
|
+
provider: "google",
|
|
526
|
+
},
|
|
498
527
|
{
|
|
499
528
|
type: "text",
|
|
500
529
|
modelName: "gemini-2.5-pro",
|
|
@@ -502,6 +531,7 @@ export const textModels = [
|
|
|
502
531
|
maxInputTokens: 2_097_152,
|
|
503
532
|
maxOutputTokens: 65536,
|
|
504
533
|
inputTokenCost: 1.25,
|
|
534
|
+
cachedInputTokenCost: 0.31,
|
|
505
535
|
outputTokenCost: 10.0,
|
|
506
536
|
outputTokensPerSecond: 145,
|
|
507
537
|
reasoning: {
|
|
@@ -518,6 +548,7 @@ export const textModels = [
|
|
|
518
548
|
maxInputTokens: 1_048_576,
|
|
519
549
|
maxOutputTokens: 65536,
|
|
520
550
|
inputTokenCost: 0.3,
|
|
551
|
+
cachedInputTokenCost: 0.075,
|
|
521
552
|
outputTokenCost: 2.5,
|
|
522
553
|
outputTokensPerSecond: 245,
|
|
523
554
|
reasoning: {
|
|
@@ -534,6 +565,7 @@ export const textModels = [
|
|
|
534
565
|
maxInputTokens: 1_048_576,
|
|
535
566
|
maxOutputTokens: 65536,
|
|
536
567
|
inputTokenCost: 0.1,
|
|
568
|
+
cachedInputTokenCost: 0.025,
|
|
537
569
|
outputTokenCost: 0.4,
|
|
538
570
|
outputTokensPerSecond: 400,
|
|
539
571
|
reasoning: {
|
|
@@ -615,14 +647,31 @@ export const textModels = [
|
|
|
615
647
|
disabled: true,
|
|
616
648
|
provider: "google",
|
|
617
649
|
},
|
|
650
|
+
{
|
|
651
|
+
type: "text",
|
|
652
|
+
modelName: "claude-opus-4-8",
|
|
653
|
+
description: "The most capable Claude model for complex reasoning and agentic coding. Same per-token pricing as Opus 4.7 with improved tool-use efficiency (~290 tokens for tool-use system prompt vs 675 on 4.7). 1M context window, 128K max output.",
|
|
654
|
+
maxInputTokens: 1_000_000,
|
|
655
|
+
maxOutputTokens: 128_000,
|
|
656
|
+
inputTokenCost: 5,
|
|
657
|
+
cachedInputTokenCost: 0.5,
|
|
658
|
+
outputTokenCost: 25,
|
|
659
|
+
reasoning: {
|
|
660
|
+
canDisable: false,
|
|
661
|
+
outputsThinking: true,
|
|
662
|
+
outputsSignatures: true,
|
|
663
|
+
},
|
|
664
|
+
provider: "anthropic",
|
|
665
|
+
},
|
|
618
666
|
{
|
|
619
667
|
type: "text",
|
|
620
668
|
modelName: "claude-opus-4-7",
|
|
621
|
-
description: "
|
|
669
|
+
description: "Claude Opus 4.7 for complex reasoning and agentic coding. Features Adaptive Thinking that auto-tunes reasoning compute per request. 1M context window, 128K max output. Knowledge cutoff: January 2026.",
|
|
622
670
|
maxInputTokens: 1_000_000,
|
|
623
671
|
maxOutputTokens: 128_000,
|
|
624
672
|
inputTokenCost: 5,
|
|
625
673
|
cachedInputTokenCost: 0.5,
|
|
674
|
+
cacheCreationInputTokenCost: 6.25,
|
|
626
675
|
outputTokenCost: 25,
|
|
627
676
|
outputTokensPerSecond: 72,
|
|
628
677
|
reasoning: {
|
|
@@ -640,6 +689,7 @@ export const textModels = [
|
|
|
640
689
|
maxOutputTokens: 128_000,
|
|
641
690
|
inputTokenCost: 5,
|
|
642
691
|
cachedInputTokenCost: 0.5,
|
|
692
|
+
cacheCreationInputTokenCost: 6.25,
|
|
643
693
|
outputTokenCost: 25,
|
|
644
694
|
outputTokensPerSecond: 53,
|
|
645
695
|
reasoning: {
|
|
@@ -657,6 +707,7 @@ export const textModels = [
|
|
|
657
707
|
maxOutputTokens: 64_000,
|
|
658
708
|
inputTokenCost: 3,
|
|
659
709
|
cachedInputTokenCost: 0.3,
|
|
710
|
+
cacheCreationInputTokenCost: 3.75,
|
|
660
711
|
outputTokenCost: 15,
|
|
661
712
|
outputTokensPerSecond: 52,
|
|
662
713
|
reasoning: {
|
|
@@ -674,6 +725,7 @@ export const textModels = [
|
|
|
674
725
|
maxOutputTokens: 64_000,
|
|
675
726
|
inputTokenCost: 1,
|
|
676
727
|
cachedInputTokenCost: 0.1,
|
|
728
|
+
cacheCreationInputTokenCost: 1.25,
|
|
677
729
|
outputTokenCost: 5,
|
|
678
730
|
outputTokensPerSecond: 97,
|
|
679
731
|
reasoning: {
|
|
@@ -751,7 +803,15 @@ export const imageModels = [
|
|
|
751
803
|
type: "image",
|
|
752
804
|
modelName: "gemini-3.1-flash-image-preview",
|
|
753
805
|
provider: "google",
|
|
754
|
-
description: "
|
|
806
|
+
description: "DEPRECATED: Preview version. Use gemini-3.1-flash-image instead.",
|
|
807
|
+
costPerImage: 0.067,
|
|
808
|
+
disabled: true,
|
|
809
|
+
},
|
|
810
|
+
{
|
|
811
|
+
type: "image",
|
|
812
|
+
modelName: "gemini-3.1-flash-image",
|
|
813
|
+
provider: "google",
|
|
814
|
+
description: "Fast image generation with Gemini 3.1 Flash (GA). Supports resolutions from 512px to 4096px. ~$0.045/image at 512px, $0.067 at 1K, $0.101 at 2K, $0.151 at 4K.",
|
|
755
815
|
costPerImage: 0.067,
|
|
756
816
|
},
|
|
757
817
|
];
|
|
@@ -3,6 +3,7 @@ export type CostEstimate = {
|
|
|
3
3
|
inputCost: number;
|
|
4
4
|
outputCost: number;
|
|
5
5
|
cachedInputCost?: number;
|
|
6
|
+
cacheCreationInputCost?: number;
|
|
6
7
|
totalCost: number;
|
|
7
8
|
currency: string;
|
|
8
9
|
};
|
|
@@ -10,6 +11,7 @@ export declare const CostEstimateSchema: z.ZodObject<{
|
|
|
10
11
|
inputCost: z.ZodNumber;
|
|
11
12
|
outputCost: z.ZodNumber;
|
|
12
13
|
cachedInputCost: z.ZodOptional<z.ZodNumber>;
|
|
14
|
+
cacheCreationInputCost: z.ZodOptional<z.ZodNumber>;
|
|
13
15
|
totalCost: z.ZodNumber;
|
|
14
16
|
currency: z.ZodString;
|
|
15
17
|
}, z.core.$strip>;
|
|
@@ -3,6 +3,7 @@ export const CostEstimateSchema = z.object({
|
|
|
3
3
|
inputCost: z.number(),
|
|
4
4
|
outputCost: z.number(),
|
|
5
5
|
cachedInputCost: z.number().optional(),
|
|
6
|
+
cacheCreationInputCost: z.number().optional(),
|
|
6
7
|
totalCost: z.number(),
|
|
7
8
|
currency: z.string(),
|
|
8
9
|
});
|
|
@@ -24,6 +25,7 @@ export function addCosts(_a, _b) {
|
|
|
24
25
|
inputCost: a.inputCost + b.inputCost,
|
|
25
26
|
outputCost: a.outputCost + b.outputCost,
|
|
26
27
|
cachedInputCost: (a.cachedInputCost || 0) + (b.cachedInputCost || 0),
|
|
28
|
+
cacheCreationInputCost: (a.cacheCreationInputCost || 0) + (b.cacheCreationInputCost || 0),
|
|
27
29
|
totalCost: a.totalCost + b.totalCost,
|
|
28
30
|
currency: a.currency,
|
|
29
31
|
};
|
|
@@ -3,12 +3,14 @@ export type TokenUsage = {
|
|
|
3
3
|
inputTokens: number;
|
|
4
4
|
outputTokens: number;
|
|
5
5
|
cachedInputTokens?: number;
|
|
6
|
+
cacheCreationInputTokens?: number;
|
|
6
7
|
totalTokens?: number;
|
|
7
8
|
};
|
|
8
9
|
export declare const TokenUsageSchema: z.ZodObject<{
|
|
9
10
|
inputTokens: z.ZodNumber;
|
|
10
11
|
outputTokens: z.ZodNumber;
|
|
11
12
|
cachedInputTokens: z.ZodOptional<z.ZodNumber>;
|
|
13
|
+
cacheCreationInputTokens: z.ZodOptional<z.ZodNumber>;
|
|
12
14
|
totalTokens: z.ZodOptional<z.ZodNumber>;
|
|
13
15
|
}, z.core.$strip>;
|
|
14
16
|
export declare function addTokenUsage(_a?: TokenUsage, _b?: TokenUsage): TokenUsage;
|
package/dist/types/tokenUsage.js
CHANGED
|
@@ -3,6 +3,7 @@ export const TokenUsageSchema = z.object({
|
|
|
3
3
|
inputTokens: z.number(),
|
|
4
4
|
outputTokens: z.number(),
|
|
5
5
|
cachedInputTokens: z.number().optional(),
|
|
6
|
+
cacheCreationInputTokens: z.number().optional(),
|
|
6
7
|
totalTokens: z.number().optional(),
|
|
7
8
|
});
|
|
8
9
|
export function addTokenUsage(_a, _b) {
|
|
@@ -20,6 +21,7 @@ export function addTokenUsage(_a, _b) {
|
|
|
20
21
|
inputTokens: a.inputTokens + b.inputTokens,
|
|
21
22
|
outputTokens: a.outputTokens + b.outputTokens,
|
|
22
23
|
cachedInputTokens: (a.cachedInputTokens || 0) + (b.cachedInputTokens || 0),
|
|
24
|
+
cacheCreationInputTokens: (a.cacheCreationInputTokens || 0) + (b.cacheCreationInputTokens || 0),
|
|
23
25
|
totalTokens: (a.totalTokens || 0) + (b.totalTokens || 0),
|
|
24
26
|
};
|
|
25
27
|
}
|
package/dist/types.d.ts
CHANGED
|
@@ -69,6 +69,10 @@ export type SmolConfig = {
|
|
|
69
69
|
enabled: boolean;
|
|
70
70
|
budgetTokens?: number;
|
|
71
71
|
};
|
|
72
|
+
/** Prompt caching. Currently used by Anthropic; OpenAI/Google cache automatically. Defaults to enabled. */
|
|
73
|
+
caching?: {
|
|
74
|
+
enabled?: boolean;
|
|
75
|
+
};
|
|
72
76
|
/** Provider-agnostic reasoning effort level. */
|
|
73
77
|
reasoningEffort?: "low" | "medium" | "high";
|
|
74
78
|
responseFormatOptions?: Partial<{
|