smoltalk 0.0.37 → 0.0.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/classes/message/AssistantMessage.d.ts +9 -1
- package/dist/classes/message/AssistantMessage.js +14 -0
- package/dist/classes/message/index.d.ts +3 -1
- package/dist/client.js +3 -4
- package/dist/clients/anthropic.d.ts +2 -1
- package/dist/clients/anthropic.js +34 -15
- package/dist/clients/baseClient.d.ts +6 -0
- package/dist/clients/baseClient.js +131 -7
- package/dist/clients/google.d.ts +2 -1
- package/dist/clients/google.js +29 -7
- package/dist/clients/ollama.d.ts +2 -1
- package/dist/clients/ollama.js +30 -8
- package/dist/clients/openai.d.ts +2 -1
- package/dist/clients/openai.js +14 -9
- package/dist/clients/openaiResponses.d.ts +2 -1
- package/dist/clients/openaiResponses.js +16 -9
- package/dist/functions.js +24 -3
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/dist/model.d.ts +33 -0
- package/dist/model.js +132 -0
- package/dist/models.d.ts +5 -26
- package/dist/models.js +0 -102
- package/dist/smolError.d.ts +6 -0
- package/dist/smolError.js +12 -0
- package/dist/statelogClient.d.ts +2 -1
- package/dist/strategies/baseStrategy.d.ts +10 -0
- package/dist/strategies/baseStrategy.js +20 -0
- package/dist/strategies/fallbackStrategy.d.ts +10 -0
- package/dist/strategies/fallbackStrategy.js +48 -0
- package/dist/strategies/idStrategy.d.ts +10 -0
- package/dist/strategies/idStrategy.js +22 -0
- package/dist/strategies/index.d.ts +11 -0
- package/dist/strategies/index.js +40 -0
- package/dist/strategies/raceStrategy.d.ts +9 -0
- package/dist/strategies/raceStrategy.js +37 -0
- package/dist/strategies/types.d.ts +31 -0
- package/dist/strategies/types.js +1 -0
- package/dist/types.d.ts +27 -1
- package/package.json +1 -1
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { BaseMessage, MessageClass } from "./BaseMessage.js";
|
|
2
|
-
import { TextPart, ThinkingBlock } from "../../types.js";
|
|
2
|
+
import { CostEstimate, TextPart, ThinkingBlock, TokenUsage } from "../../types.js";
|
|
3
3
|
import { ChatCompletionMessageParam } from "openai/resources";
|
|
4
4
|
import { Content } from "@google/genai";
|
|
5
5
|
import { ToolCall, ToolCallJSON } from "../ToolCall.js";
|
|
@@ -13,6 +13,8 @@ export type AssistantMessageJSON = {
|
|
|
13
13
|
refusal: string | null | undefined;
|
|
14
14
|
toolCalls: ToolCallJSON[] | undefined;
|
|
15
15
|
thinkingBlocks: ThinkingBlock[] | undefined;
|
|
16
|
+
usage: TokenUsage | undefined;
|
|
17
|
+
cost: CostEstimate | undefined;
|
|
16
18
|
};
|
|
17
19
|
export declare class AssistantMessage extends BaseMessage implements MessageClass {
|
|
18
20
|
_role: "assistant";
|
|
@@ -23,6 +25,8 @@ export declare class AssistantMessage extends BaseMessage implements MessageClas
|
|
|
23
25
|
_toolCalls?: ToolCall[];
|
|
24
26
|
_thinkingBlocks?: ThinkingBlock[];
|
|
25
27
|
_rawData?: any;
|
|
28
|
+
_usage?: TokenUsage;
|
|
29
|
+
_cost?: CostEstimate;
|
|
26
30
|
constructor(content: string | Array<TextPart> | null, options?: {
|
|
27
31
|
name?: string;
|
|
28
32
|
audio?: any | null;
|
|
@@ -30,6 +34,8 @@ export declare class AssistantMessage extends BaseMessage implements MessageClas
|
|
|
30
34
|
toolCalls?: ToolCall[];
|
|
31
35
|
thinkingBlocks?: ThinkingBlock[];
|
|
32
36
|
rawData?: any;
|
|
37
|
+
usage?: TokenUsage;
|
|
38
|
+
cost?: CostEstimate;
|
|
33
39
|
});
|
|
34
40
|
get content(): string;
|
|
35
41
|
set content(value: string);
|
|
@@ -40,6 +46,8 @@ export declare class AssistantMessage extends BaseMessage implements MessageClas
|
|
|
40
46
|
get toolCalls(): ToolCall[] | undefined;
|
|
41
47
|
get rawData(): any;
|
|
42
48
|
get thinkingBlocks(): ThinkingBlock[] | undefined;
|
|
49
|
+
get usage(): TokenUsage | undefined;
|
|
50
|
+
get cost(): CostEstimate | undefined;
|
|
43
51
|
toJSON(): AssistantMessageJSON;
|
|
44
52
|
static fromJSON(json: any): AssistantMessage;
|
|
45
53
|
toOpenAIMessage(): ChatCompletionMessageParam;
|
|
@@ -9,6 +9,8 @@ export class AssistantMessage extends BaseMessage {
|
|
|
9
9
|
_toolCalls;
|
|
10
10
|
_thinkingBlocks;
|
|
11
11
|
_rawData;
|
|
12
|
+
_usage;
|
|
13
|
+
_cost;
|
|
12
14
|
constructor(content, options = {}) {
|
|
13
15
|
super();
|
|
14
16
|
this._content = content;
|
|
@@ -18,6 +20,8 @@ export class AssistantMessage extends BaseMessage {
|
|
|
18
20
|
this._toolCalls = options.toolCalls;
|
|
19
21
|
this._thinkingBlocks = options.thinkingBlocks;
|
|
20
22
|
this._rawData = options.rawData;
|
|
23
|
+
this._usage = options.usage;
|
|
24
|
+
this._cost = options.cost;
|
|
21
25
|
}
|
|
22
26
|
get content() {
|
|
23
27
|
if (this._content === null || this._content === undefined) {
|
|
@@ -51,6 +55,12 @@ export class AssistantMessage extends BaseMessage {
|
|
|
51
55
|
get thinkingBlocks() {
|
|
52
56
|
return this._thinkingBlocks;
|
|
53
57
|
}
|
|
58
|
+
get usage() {
|
|
59
|
+
return this._usage;
|
|
60
|
+
}
|
|
61
|
+
get cost() {
|
|
62
|
+
return this._cost;
|
|
63
|
+
}
|
|
54
64
|
toJSON() {
|
|
55
65
|
return {
|
|
56
66
|
role: this.role,
|
|
@@ -60,6 +70,8 @@ export class AssistantMessage extends BaseMessage {
|
|
|
60
70
|
refusal: this.refusal,
|
|
61
71
|
toolCalls: this.toolCalls?.map((tc) => tc.toJSON()),
|
|
62
72
|
thinkingBlocks: this._thinkingBlocks,
|
|
73
|
+
usage: this._usage,
|
|
74
|
+
cost: this._cost,
|
|
63
75
|
};
|
|
64
76
|
}
|
|
65
77
|
static fromJSON(json) {
|
|
@@ -72,6 +84,8 @@ export class AssistantMessage extends BaseMessage {
|
|
|
72
84
|
: undefined,
|
|
73
85
|
thinkingBlocks: json.thinkingBlocks,
|
|
74
86
|
rawData: json.rawData,
|
|
87
|
+
usage: json.usage,
|
|
88
|
+
cost: json.cost,
|
|
75
89
|
});
|
|
76
90
|
}
|
|
77
91
|
toOpenAIMessage() {
|
|
@@ -8,7 +8,7 @@ import type { AssistantMessageJSON } from "./AssistantMessage.js";
|
|
|
8
8
|
import type { DeveloperMessageJSON } from "./DeveloperMessage.js";
|
|
9
9
|
import type { SystemMessageJSON } from "./SystemMessage.js";
|
|
10
10
|
import type { ToolMessageJSON } from "./ToolMessage.js";
|
|
11
|
-
import { TextPart } from "../../types.js";
|
|
11
|
+
import { CostEstimate, TextPart, TokenUsage } from "../../types.js";
|
|
12
12
|
export * from "./AssistantMessage.js";
|
|
13
13
|
export * from "./BaseMessage.js";
|
|
14
14
|
export * from "./DeveloperMessage.js";
|
|
@@ -30,6 +30,8 @@ export declare function assistantMessage(content: string | Array<TextPart> | nul
|
|
|
30
30
|
signature: string;
|
|
31
31
|
}>;
|
|
32
32
|
rawData?: any;
|
|
33
|
+
usage?: TokenUsage;
|
|
34
|
+
cost?: CostEstimate;
|
|
33
35
|
}): AssistantMessage;
|
|
34
36
|
export declare function developerMessage(content: string | Array<TextPart>, options?: {
|
|
35
37
|
name?: string;
|
package/dist/client.js
CHANGED
|
@@ -6,17 +6,16 @@ import { SmolAnthropic } from "./clients/anthropic.js";
|
|
|
6
6
|
import { SmolGoogle } from "./clients/google.js";
|
|
7
7
|
import { SmolOpenAi } from "./clients/openai.js";
|
|
8
8
|
import { SmolOpenAiResponses } from "./clients/openaiResponses.js";
|
|
9
|
-
import { getModel,
|
|
9
|
+
import { getModel, isTextModel } from "./models.js";
|
|
10
10
|
import { SmolError } from "./smolError.js";
|
|
11
11
|
import { getLogger } from "./logger.js";
|
|
12
12
|
import { SmolOllama } from "./clients/ollama.js";
|
|
13
|
+
import { Model } from "./model.js";
|
|
13
14
|
export function getClient(config) {
|
|
14
15
|
// Initialize logger singleton with desired log level
|
|
15
16
|
const logger = getLogger(config.logLevel);
|
|
16
17
|
// Resolve ModelConfig to a concrete model name
|
|
17
|
-
const modelName =
|
|
18
|
-
? pickModel(config.model)
|
|
19
|
-
: config.model;
|
|
18
|
+
const modelName = new Model(config.model).getResolvedModel();
|
|
20
19
|
let provider = config.provider;
|
|
21
20
|
if (!provider) {
|
|
22
21
|
const model = getModel(modelName);
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
|
|
2
2
|
import { BaseClient } from "./baseClient.js";
|
|
3
|
+
import { ModelName } from "../models.js";
|
|
3
4
|
export type SmolAnthropicConfig = BaseClientConfig & {
|
|
4
5
|
anthropicApiKey: string;
|
|
5
6
|
};
|
|
@@ -8,7 +9,7 @@ export declare class SmolAnthropic extends BaseClient implements SmolClient {
|
|
|
8
9
|
private logger;
|
|
9
10
|
private model;
|
|
10
11
|
constructor(config: SmolAnthropicConfig);
|
|
11
|
-
getModel():
|
|
12
|
+
getModel(): ModelName;
|
|
12
13
|
private calculateUsageAndCost;
|
|
13
14
|
private buildRequest;
|
|
14
15
|
_textSync(config: PromptConfig): Promise<Result<PromptResult>>;
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import Anthropic from "@anthropic-ai/sdk";
|
|
2
2
|
import { ToolCall } from "../classes/ToolCall.js";
|
|
3
|
-
import { SystemMessage, DeveloperMessage
|
|
3
|
+
import { SystemMessage, DeveloperMessage } from "../classes/message/index.js";
|
|
4
4
|
import { getLogger } from "../logger.js";
|
|
5
5
|
import { success, } from "../types.js";
|
|
6
6
|
import { zodToAnthropicTool } from "../util/tool.js";
|
|
7
7
|
import { BaseClient } from "./baseClient.js";
|
|
8
|
-
import {
|
|
8
|
+
import { Model } from "../model.js";
|
|
9
9
|
const DEFAULT_MAX_TOKENS = 4096;
|
|
10
10
|
export class SmolAnthropic extends BaseClient {
|
|
11
11
|
client;
|
|
@@ -15,10 +15,10 @@ export class SmolAnthropic extends BaseClient {
|
|
|
15
15
|
super(config);
|
|
16
16
|
this.client = new Anthropic({ apiKey: config.anthropicApiKey });
|
|
17
17
|
this.logger = getLogger();
|
|
18
|
-
this.model = config.model;
|
|
18
|
+
this.model = new Model(config.model);
|
|
19
19
|
}
|
|
20
20
|
getModel() {
|
|
21
|
-
return this.model;
|
|
21
|
+
return this.model.getResolvedModel();
|
|
22
22
|
}
|
|
23
23
|
calculateUsageAndCost(usageData) {
|
|
24
24
|
const usage = {
|
|
@@ -26,7 +26,7 @@ export class SmolAnthropic extends BaseClient {
|
|
|
26
26
|
outputTokens: usageData.output_tokens,
|
|
27
27
|
totalTokens: usageData.input_tokens + usageData.output_tokens,
|
|
28
28
|
};
|
|
29
|
-
const cost =
|
|
29
|
+
const cost = this.model.calculateCost(usage) ?? undefined;
|
|
30
30
|
return { usage, cost };
|
|
31
31
|
}
|
|
32
32
|
buildRequest(config) {
|
|
@@ -64,23 +64,37 @@ export class SmolAnthropic extends BaseClient {
|
|
|
64
64
|
description: tool.description,
|
|
65
65
|
}))
|
|
66
66
|
: undefined;
|
|
67
|
+
const reasoningBudgetMap = {
|
|
68
|
+
low: 2048,
|
|
69
|
+
medium: 5000,
|
|
70
|
+
high: 10000,
|
|
71
|
+
};
|
|
67
72
|
const thinking = config.thinking?.enabled
|
|
68
|
-
? {
|
|
69
|
-
|
|
73
|
+
? {
|
|
74
|
+
type: "enabled",
|
|
75
|
+
budget_tokens: config.thinking.budgetTokens ?? 5000,
|
|
76
|
+
}
|
|
77
|
+
: config.reasoningEffort
|
|
78
|
+
? {
|
|
79
|
+
type: "enabled",
|
|
80
|
+
budget_tokens: reasoningBudgetMap[config.reasoningEffort],
|
|
81
|
+
}
|
|
82
|
+
: undefined;
|
|
70
83
|
return { system, messages: anthropicMessages, tools, thinking };
|
|
71
84
|
}
|
|
72
85
|
async _textSync(config) {
|
|
73
86
|
const { system, messages, tools, thinking } = this.buildRequest(config);
|
|
74
87
|
this.logger.debug("Sending request to Anthropic:", {
|
|
75
|
-
model: this.
|
|
88
|
+
model: this.getModel(),
|
|
76
89
|
max_tokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
|
|
77
90
|
messages,
|
|
78
91
|
system,
|
|
79
92
|
tools,
|
|
80
93
|
thinking,
|
|
81
94
|
});
|
|
95
|
+
const signal = this.getAbortSignal(config);
|
|
82
96
|
const response = await this.client.messages.create({
|
|
83
|
-
model: this.
|
|
97
|
+
model: this.getModel(),
|
|
84
98
|
max_tokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
|
|
85
99
|
messages,
|
|
86
100
|
...(system && { system }),
|
|
@@ -91,7 +105,7 @@ export class SmolAnthropic extends BaseClient {
|
|
|
91
105
|
}),
|
|
92
106
|
...(config.rawAttributes || {}),
|
|
93
107
|
stream: false,
|
|
94
|
-
});
|
|
108
|
+
}, { ...(signal && { signal }) });
|
|
95
109
|
this.logger.debug("Response from Anthropic:", response);
|
|
96
110
|
let output = null;
|
|
97
111
|
const toolCalls = [];
|
|
@@ -115,7 +129,7 @@ export class SmolAnthropic extends BaseClient {
|
|
|
115
129
|
...(thinkingBlocks.length > 0 && { thinkingBlocks }),
|
|
116
130
|
usage,
|
|
117
131
|
cost,
|
|
118
|
-
model: this.
|
|
132
|
+
model: this.getModel(),
|
|
119
133
|
});
|
|
120
134
|
}
|
|
121
135
|
async *_textStream(config) {
|
|
@@ -128,6 +142,7 @@ export class SmolAnthropic extends BaseClient {
|
|
|
128
142
|
tools,
|
|
129
143
|
thinking,
|
|
130
144
|
});
|
|
145
|
+
const signal = this.getAbortSignal(config);
|
|
131
146
|
const stream = await this.client.messages.create({
|
|
132
147
|
model: this.model,
|
|
133
148
|
max_tokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
|
|
@@ -140,7 +155,7 @@ export class SmolAnthropic extends BaseClient {
|
|
|
140
155
|
}),
|
|
141
156
|
...(config.rawAttributes || {}),
|
|
142
157
|
stream: true,
|
|
143
|
-
});
|
|
158
|
+
}, { ...(signal && { signal }) });
|
|
144
159
|
let content = "";
|
|
145
160
|
// Track tool blocks by index: index -> { id, name, arguments (partial JSON) }
|
|
146
161
|
const toolBlocks = new Map();
|
|
@@ -192,7 +207,11 @@ export class SmolAnthropic extends BaseClient {
|
|
|
192
207
|
// Emit thinking chunk once the block is fully assembled
|
|
193
208
|
const thinkingBlock = thinkingBlockMap.get(event.index);
|
|
194
209
|
if (thinkingBlock) {
|
|
195
|
-
yield {
|
|
210
|
+
yield {
|
|
211
|
+
type: "thinking",
|
|
212
|
+
text: thinkingBlock.text,
|
|
213
|
+
signature: thinkingBlock.signature,
|
|
214
|
+
};
|
|
196
215
|
}
|
|
197
216
|
}
|
|
198
217
|
else if (event.type === "message_delta") {
|
|
@@ -212,7 +231,7 @@ export class SmolAnthropic extends BaseClient {
|
|
|
212
231
|
outputTokens,
|
|
213
232
|
totalTokens: inputTokens + outputTokens,
|
|
214
233
|
};
|
|
215
|
-
const cost =
|
|
234
|
+
const cost = this.model.calculateCost(usage) ?? undefined;
|
|
216
235
|
yield {
|
|
217
236
|
type: "done",
|
|
218
237
|
result: {
|
|
@@ -221,7 +240,7 @@ export class SmolAnthropic extends BaseClient {
|
|
|
221
240
|
...(thinkingBlocks.length > 0 && { thinkingBlocks }),
|
|
222
241
|
usage,
|
|
223
242
|
cost,
|
|
224
|
-
model: this.
|
|
243
|
+
model: this.getModel(),
|
|
225
244
|
},
|
|
226
245
|
};
|
|
227
246
|
}
|
|
@@ -4,6 +4,8 @@ export declare class BaseClient implements SmolClient {
|
|
|
4
4
|
protected config: SmolConfig;
|
|
5
5
|
protected statelogClient?: StatelogClient;
|
|
6
6
|
constructor(config: SmolConfig);
|
|
7
|
+
protected getAbortSignal(promptConfig: PromptConfig): AbortSignal | undefined;
|
|
8
|
+
protected isAbortError(err: unknown): boolean;
|
|
7
9
|
text(promptConfig: Omit<PromptConfig, "stream">): Promise<Result<PromptResult>>;
|
|
8
10
|
text(promptConfig: Omit<PromptConfig, "stream"> & {
|
|
9
11
|
stream: false;
|
|
@@ -13,6 +15,10 @@ export declare class BaseClient implements SmolClient {
|
|
|
13
15
|
}): AsyncGenerator<StreamChunk>;
|
|
14
16
|
text(promptConfig: PromptConfig): Promise<Result<PromptResult>> | AsyncGenerator<StreamChunk>;
|
|
15
17
|
checkMessageLimit(promptConfig: PromptConfig): Result<PromptResult> | null;
|
|
18
|
+
applyBudget(promptConfig: PromptConfig): {
|
|
19
|
+
config: PromptConfig;
|
|
20
|
+
failure?: Result<PromptResult>;
|
|
21
|
+
};
|
|
16
22
|
textSync(promptConfig: PromptConfig): Promise<Result<PromptResult>>;
|
|
17
23
|
checkForToolLoops(promptConfig: PromptConfig): {
|
|
18
24
|
continue: boolean;
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
import { userMessage, assistantMessage } from "../classes/message/index.js";
|
|
1
|
+
import { AssistantMessage, userMessage, assistantMessage, } from "../classes/message/index.js";
|
|
2
2
|
import { getLogger } from "../logger.js";
|
|
3
|
+
import { getModel, isTextModel } from "../models.js";
|
|
4
|
+
import { SmolStructuredOutputError } from "../smolError.js";
|
|
3
5
|
import { getStatelogClient } from "../statelogClient.js";
|
|
4
6
|
import { success, } from "../types.js";
|
|
5
7
|
import { z } from "zod";
|
|
@@ -13,6 +15,27 @@ export class BaseClient {
|
|
|
13
15
|
this.statelogClient = getStatelogClient(this.config.statelog);
|
|
14
16
|
}
|
|
15
17
|
}
|
|
18
|
+
getAbortSignal(promptConfig) {
|
|
19
|
+
const signals = [];
|
|
20
|
+
if (promptConfig.abortSignal) {
|
|
21
|
+
signals.push(promptConfig.abortSignal);
|
|
22
|
+
}
|
|
23
|
+
const timeBudgetMs = promptConfig.budget?.timeBudgetMs;
|
|
24
|
+
if (timeBudgetMs !== undefined) {
|
|
25
|
+
signals.push(AbortSignal.timeout(timeBudgetMs));
|
|
26
|
+
}
|
|
27
|
+
if (signals.length === 0)
|
|
28
|
+
return undefined;
|
|
29
|
+
if (signals.length === 1)
|
|
30
|
+
return signals[0];
|
|
31
|
+
return AbortSignal.any(signals);
|
|
32
|
+
}
|
|
33
|
+
isAbortError(err) {
|
|
34
|
+
return ((err instanceof DOMException && err.name === "AbortError") ||
|
|
35
|
+
(err instanceof DOMException && err.name === "TimeoutError") ||
|
|
36
|
+
(err instanceof Error && err.name === "AbortError") ||
|
|
37
|
+
(err instanceof Error && err.name === "TimeoutError"));
|
|
38
|
+
}
|
|
16
39
|
text(promptConfig) {
|
|
17
40
|
if (promptConfig.stream) {
|
|
18
41
|
return this.textStream(promptConfig);
|
|
@@ -33,19 +56,92 @@ export class BaseClient {
|
|
|
33
56
|
}
|
|
34
57
|
return null;
|
|
35
58
|
}
|
|
59
|
+
applyBudget(promptConfig) {
|
|
60
|
+
const budget = promptConfig.budget;
|
|
61
|
+
if (!budget)
|
|
62
|
+
return { config: promptConfig };
|
|
63
|
+
let config = { ...promptConfig };
|
|
64
|
+
// Auto-compute used values from message history when not explicitly provided
|
|
65
|
+
const assistantMessages = config.messages.filter((m) => m instanceof AssistantMessage);
|
|
66
|
+
const tokensUsed = budget.tokensUsed ??
|
|
67
|
+
assistantMessages.reduce((sum, m) => sum + (m.usage?.outputTokens ?? 0), 0);
|
|
68
|
+
const costUsed = budget.costUsed ??
|
|
69
|
+
assistantMessages.reduce((sum, m) => sum + (m.cost?.totalCost ?? 0), 0);
|
|
70
|
+
const requestsUsed = budget.requestsUsed ?? assistantMessages.length;
|
|
71
|
+
// Request budget check
|
|
72
|
+
if (budget.requestBudget !== undefined &&
|
|
73
|
+
requestsUsed >= budget.requestBudget) {
|
|
74
|
+
return {
|
|
75
|
+
config,
|
|
76
|
+
failure: {
|
|
77
|
+
success: false,
|
|
78
|
+
error: `Request budget exhausted: ${requestsUsed} requests used, budget is ${budget.requestBudget}`,
|
|
79
|
+
},
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
// Token budget check
|
|
83
|
+
if (budget.tokenBudget !== undefined) {
|
|
84
|
+
const remaining = budget.tokenBudget - tokensUsed;
|
|
85
|
+
if (remaining <= 0) {
|
|
86
|
+
return {
|
|
87
|
+
config,
|
|
88
|
+
failure: {
|
|
89
|
+
success: false,
|
|
90
|
+
error: `Token budget exhausted: ${tokensUsed} output tokens used, budget is ${budget.tokenBudget}`,
|
|
91
|
+
},
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
config.maxTokens = Math.min(config.maxTokens ?? Infinity, remaining);
|
|
95
|
+
}
|
|
96
|
+
// Cost budget check
|
|
97
|
+
if (budget.costBudget !== undefined) {
|
|
98
|
+
const remainingUSD = budget.costBudget - costUsed;
|
|
99
|
+
if (remainingUSD <= 0) {
|
|
100
|
+
return {
|
|
101
|
+
config,
|
|
102
|
+
failure: {
|
|
103
|
+
success: false,
|
|
104
|
+
error: `Cost budget exhausted: $${costUsed.toFixed(4)} spent, budget is $${budget.costBudget.toFixed(4)}`,
|
|
105
|
+
},
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
const model = getModel(this.config.model);
|
|
109
|
+
if (model && isTextModel(model) && model.outputTokenCost) {
|
|
110
|
+
const remainingTokens = Math.floor((remainingUSD / model.outputTokenCost) * 1_000_000);
|
|
111
|
+
config.maxTokens = Math.min(config.maxTokens ?? Infinity, remainingTokens);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
return { config };
|
|
115
|
+
}
|
|
36
116
|
async textSync(promptConfig) {
|
|
37
117
|
const messageLimitResult = this.checkMessageLimit(promptConfig);
|
|
38
118
|
if (messageLimitResult)
|
|
39
119
|
return messageLimitResult;
|
|
40
|
-
const {
|
|
120
|
+
const { config: budgetedConfig, failure: budgetFailure } = this.applyBudget(promptConfig);
|
|
121
|
+
if (budgetFailure)
|
|
122
|
+
return budgetFailure;
|
|
123
|
+
const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(budgetedConfig);
|
|
41
124
|
if (!shouldContinue) {
|
|
42
125
|
return {
|
|
43
126
|
success: true,
|
|
44
127
|
value: { output: null, toolCalls: [], model: this.config.model },
|
|
45
128
|
};
|
|
46
129
|
}
|
|
47
|
-
|
|
48
|
-
|
|
130
|
+
try {
|
|
131
|
+
const result = await this.textWithRetry(newPromptConfig, newPromptConfig.responseFormatOptions?.numRetries ||
|
|
132
|
+
DEFAULT_NUM_RETRIES);
|
|
133
|
+
return result;
|
|
134
|
+
}
|
|
135
|
+
catch (err) {
|
|
136
|
+
if (this.isAbortError(err)) {
|
|
137
|
+
const timeBudgetMs = promptConfig.budget?.timeBudgetMs;
|
|
138
|
+
const message = timeBudgetMs
|
|
139
|
+
? `Request timed out after ${timeBudgetMs}ms`
|
|
140
|
+
: "Request was aborted";
|
|
141
|
+
return { success: false, error: message };
|
|
142
|
+
}
|
|
143
|
+
throw err;
|
|
144
|
+
}
|
|
49
145
|
}
|
|
50
146
|
checkForToolLoops(promptConfig) {
|
|
51
147
|
if (!this.config.toolLoopDetection?.enabled) {
|
|
@@ -154,6 +250,10 @@ export class BaseClient {
|
|
|
154
250
|
async textWithRetry(promptConfig, retries) {
|
|
155
251
|
const result = await this._textSync(promptConfig);
|
|
156
252
|
if (result.success) {
|
|
253
|
+
if (!promptConfig.responseFormat ||
|
|
254
|
+
!promptConfig.responseFormatOptions?.strict) {
|
|
255
|
+
return result;
|
|
256
|
+
}
|
|
157
257
|
if (!("output" in result.value)) {
|
|
158
258
|
const retryMessages = [
|
|
159
259
|
...promptConfig.messages,
|
|
@@ -196,7 +296,7 @@ export class BaseClient {
|
|
|
196
296
|
}
|
|
197
297
|
}
|
|
198
298
|
}
|
|
199
|
-
|
|
299
|
+
throw new SmolStructuredOutputError(`Failed to get valid response after ${DEFAULT_NUM_RETRIES} attempts: ${result.success ? "Output did not match expected format" : result.error}`);
|
|
200
300
|
}
|
|
201
301
|
async _textSync(promptConfig) {
|
|
202
302
|
throw new Error("Method not implemented.");
|
|
@@ -222,7 +322,17 @@ export class BaseClient {
|
|
|
222
322
|
};
|
|
223
323
|
return;
|
|
224
324
|
}
|
|
225
|
-
const {
|
|
325
|
+
const { config: budgetedConfig, failure: budgetFailure } = this.applyBudget(config);
|
|
326
|
+
if (budgetFailure) {
|
|
327
|
+
yield {
|
|
328
|
+
type: "error",
|
|
329
|
+
error: budgetFailure.success === false
|
|
330
|
+
? budgetFailure.error
|
|
331
|
+
: "Budget exceeded",
|
|
332
|
+
};
|
|
333
|
+
return;
|
|
334
|
+
}
|
|
335
|
+
const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(budgetedConfig);
|
|
226
336
|
if (!shouldContinue) {
|
|
227
337
|
yield {
|
|
228
338
|
type: "done",
|
|
@@ -234,7 +344,21 @@ export class BaseClient {
|
|
|
234
344
|
};
|
|
235
345
|
return;
|
|
236
346
|
}
|
|
237
|
-
|
|
347
|
+
try {
|
|
348
|
+
yield* this._textStream(newPromptConfig);
|
|
349
|
+
}
|
|
350
|
+
catch (err) {
|
|
351
|
+
if (this.isAbortError(err)) {
|
|
352
|
+
const timeBudgetMs = config.budget?.timeBudgetMs;
|
|
353
|
+
const message = timeBudgetMs
|
|
354
|
+
? `Request timed out after ${timeBudgetMs}ms`
|
|
355
|
+
: "Request was aborted";
|
|
356
|
+
yield { type: "timeout", error: message };
|
|
357
|
+
}
|
|
358
|
+
else {
|
|
359
|
+
throw err;
|
|
360
|
+
}
|
|
361
|
+
}
|
|
238
362
|
}
|
|
239
363
|
// default implementation of text stream just calls the non-streaming version and yields the result
|
|
240
364
|
// clients that support streaming can override this to provide a streaming implementation
|
package/dist/clients/google.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { GoogleGenAI } from "@google/genai";
|
|
2
2
|
import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
|
|
3
3
|
import { BaseClient } from "./baseClient.js";
|
|
4
|
+
import { ModelName } from "../models.js";
|
|
4
5
|
export type SmolGoogleConfig = BaseClientConfig;
|
|
5
6
|
export declare class SmolGoogle extends BaseClient implements SmolClient {
|
|
6
7
|
private client;
|
|
@@ -8,7 +9,7 @@ export declare class SmolGoogle extends BaseClient implements SmolClient {
|
|
|
8
9
|
private model;
|
|
9
10
|
constructor(config: SmolGoogleConfig);
|
|
10
11
|
getClient(): GoogleGenAI;
|
|
11
|
-
getModel():
|
|
12
|
+
getModel(): ModelName;
|
|
12
13
|
private calculateUsageAndCost;
|
|
13
14
|
private buildRequest;
|
|
14
15
|
_textSync(config: PromptConfig): Promise<Result<PromptResult>>;
|
package/dist/clients/google.js
CHANGED
|
@@ -4,7 +4,7 @@ import { getLogger } from "../logger.js";
|
|
|
4
4
|
import { success, } from "../types.js";
|
|
5
5
|
import { zodToGoogleTool } from "../util/tool.js";
|
|
6
6
|
import { BaseClient } from "./baseClient.js";
|
|
7
|
-
import {
|
|
7
|
+
import { Model } from "../model.js";
|
|
8
8
|
export class SmolGoogle extends BaseClient {
|
|
9
9
|
client;
|
|
10
10
|
logger;
|
|
@@ -16,13 +16,13 @@ export class SmolGoogle extends BaseClient {
|
|
|
16
16
|
}
|
|
17
17
|
this.client = new GoogleGenAI({ apiKey: config.googleApiKey });
|
|
18
18
|
this.logger = getLogger();
|
|
19
|
-
this.model = config.model;
|
|
19
|
+
this.model = new Model(config.model);
|
|
20
20
|
}
|
|
21
21
|
getClient() {
|
|
22
22
|
return this.client;
|
|
23
23
|
}
|
|
24
24
|
getModel() {
|
|
25
|
-
return this.model;
|
|
25
|
+
return this.model.getResolvedModel();
|
|
26
26
|
}
|
|
27
27
|
calculateUsageAndCost(usageMetadata) {
|
|
28
28
|
let usage;
|
|
@@ -34,7 +34,7 @@ export class SmolGoogle extends BaseClient {
|
|
|
34
34
|
cachedInputTokens: usageMetadata.cachedContentTokenCount,
|
|
35
35
|
totalTokens: usageMetadata.totalTokenCount,
|
|
36
36
|
};
|
|
37
|
-
const calculatedCost =
|
|
37
|
+
const calculatedCost = this.model.calculateCost(usage);
|
|
38
38
|
if (calculatedCost) {
|
|
39
39
|
cost = calculatedCost;
|
|
40
40
|
}
|
|
@@ -73,18 +73,28 @@ export class SmolGoogle extends BaseClient {
|
|
|
73
73
|
genConfig.responseMimeType = "application/json";
|
|
74
74
|
genConfig.responseJsonSchema = config.responseFormat.toJSONSchema();
|
|
75
75
|
}
|
|
76
|
+
if (!config.thinking?.enabled && config.reasoningEffort) {
|
|
77
|
+
const budgetMap = { low: 2048, medium: 8192, high: 16384 };
|
|
78
|
+
genConfig.thinkingConfig = {
|
|
79
|
+
thinkingBudget: budgetMap[config.reasoningEffort],
|
|
80
|
+
};
|
|
81
|
+
}
|
|
76
82
|
return {
|
|
77
83
|
contents: messages,
|
|
78
|
-
model: this.
|
|
84
|
+
model: this.getModel(),
|
|
79
85
|
config: genConfig,
|
|
80
86
|
...(config.rawAttributes || {}),
|
|
81
87
|
};
|
|
82
88
|
}
|
|
83
89
|
async _textSync(config) {
|
|
90
|
+
const signal = this.getAbortSignal(config);
|
|
84
91
|
const request = {
|
|
85
92
|
...this.buildRequest(config),
|
|
86
93
|
stream: config.stream || false,
|
|
87
94
|
};
|
|
95
|
+
if (signal) {
|
|
96
|
+
request.config = { ...request.config, abortSignal: signal };
|
|
97
|
+
}
|
|
88
98
|
this.logger.debug("Sending request to Google Gemini:", JSON.stringify(request, null, 2));
|
|
89
99
|
// Send the prompt as the latest message
|
|
90
100
|
const result = await this.client.models.generateContent(request);
|
|
@@ -122,7 +132,11 @@ export class SmolGoogle extends BaseClient {
|
|
|
122
132
|
});
|
|
123
133
|
}
|
|
124
134
|
async *_textStream(config) {
|
|
135
|
+
const signal = this.getAbortSignal(config);
|
|
125
136
|
const request = this.buildRequest(config);
|
|
137
|
+
if (signal) {
|
|
138
|
+
request.config = { ...request.config, abortSignal: signal };
|
|
139
|
+
}
|
|
126
140
|
this.logger.debug("Sending streaming request to Google Gemini:", JSON.stringify(request, null, 2));
|
|
127
141
|
const stream = await this.client.models.generateContentStream(request);
|
|
128
142
|
let content = "";
|
|
@@ -147,7 +161,11 @@ export class SmolGoogle extends BaseClient {
|
|
|
147
161
|
signature: p.thoughtSignature,
|
|
148
162
|
};
|
|
149
163
|
thinkingBlocks.push(block);
|
|
150
|
-
yield {
|
|
164
|
+
yield {
|
|
165
|
+
type: "thinking",
|
|
166
|
+
text: block.text,
|
|
167
|
+
signature: block.signature,
|
|
168
|
+
};
|
|
151
169
|
}
|
|
152
170
|
else if (p.text) {
|
|
153
171
|
content += p.text;
|
|
@@ -157,7 +175,11 @@ export class SmolGoogle extends BaseClient {
|
|
|
157
175
|
const id = p.functionCall.id || p.functionCall.name || "";
|
|
158
176
|
const name = p.functionCall.name || "";
|
|
159
177
|
if (!toolCallsMap.has(id)) {
|
|
160
|
-
toolCallsMap.set(id, {
|
|
178
|
+
toolCallsMap.set(id, {
|
|
179
|
+
id,
|
|
180
|
+
name,
|
|
181
|
+
arguments: p.functionCall.args,
|
|
182
|
+
});
|
|
161
183
|
}
|
|
162
184
|
}
|
|
163
185
|
}
|
package/dist/clients/ollama.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { Ollama } from "ollama";
|
|
2
2
|
import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
|
|
3
3
|
import { BaseClient } from "./baseClient.js";
|
|
4
|
+
import { ModelName } from "../models.js";
|
|
4
5
|
export declare const DEFAULT_OLLAMA_HOST = "http://localhost:11434";
|
|
5
6
|
export type SmolOllamaConfig = BaseClientConfig;
|
|
6
7
|
export declare class SmolOllama extends BaseClient implements SmolClient {
|
|
@@ -9,7 +10,7 @@ export declare class SmolOllama extends BaseClient implements SmolClient {
|
|
|
9
10
|
private client;
|
|
10
11
|
constructor(config: SmolOllamaConfig);
|
|
11
12
|
getClient(): Ollama;
|
|
12
|
-
getModel():
|
|
13
|
+
getModel(): ModelName;
|
|
13
14
|
private calculateUsageAndCost;
|
|
14
15
|
_textSync(config: PromptConfig): Promise<Result<PromptResult>>;
|
|
15
16
|
_textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
|