smoltalk 0.0.58 → 0.0.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/classes/ToolCall.js +1 -1
- package/dist/client.d.ts +2 -0
- package/dist/client.js +5 -0
- package/dist/clients/anthropic.js +5 -2
- package/dist/clients/baseClient.d.ts +0 -4
- package/dist/clients/baseClient.js +9 -114
- package/dist/clients/google.js +2 -2
- package/dist/clients/llamaCpp.d.ts +28 -0
- package/dist/clients/llamaCpp.js +316 -0
- package/dist/clients/ollama.js +2 -2
- package/dist/clients/openai.js +5 -3
- package/dist/clients/openaiResponses.js +9 -9
- package/dist/functions.js +4 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/model.d.ts +5 -14
- package/dist/model.js +5 -102
- package/dist/models.d.ts +2 -96
- package/dist/models.js +1 -83
- package/dist/strategies/fastestStrategy.d.ts +3 -3
- package/dist/strategies/fastestStrategy.js +1 -1
- package/dist/strategies/idStrategy.js +1 -6
- package/dist/strategies/index.d.ts +4 -2
- package/dist/strategies/index.js +9 -4
- package/dist/strategies/raceStrategy.js +1 -1
- package/dist/strategies/timeoutStrategy.d.ts +13 -0
- package/dist/strategies/timeoutStrategy.js +58 -0
- package/dist/strategies/types.d.ts +9 -38
- package/dist/strategies/types.js +9 -22
- package/dist/types/costEstimate.d.ts +16 -0
- package/dist/types/costEstimate.js +30 -0
- package/dist/types/tokenUsage.d.ts +14 -0
- package/dist/types/tokenUsage.js +25 -0
- package/dist/types.d.ts +88 -87
- package/dist/types.js +10 -61
- package/dist/util/tool.js +1 -1
- package/dist/{util.d.ts → util/util.d.ts} +1 -1
- package/dist/{util.js → util/util.js} +1 -1
- package/package.json +4 -2
- /package/dist/{logger.d.ts → util/logger.d.ts} +0 -0
- /package/dist/{logger.js → util/logger.js} +0 -0
package/dist/classes/ToolCall.js
CHANGED
package/dist/client.d.ts
CHANGED
|
@@ -3,6 +3,8 @@ export * from "./clients/google.js";
|
|
|
3
3
|
export * from "./clients/openai.js";
|
|
4
4
|
export * from "./clients/openaiResponses.js";
|
|
5
5
|
export * from "./clients/baseClient.js";
|
|
6
|
+
export * from "./clients/ollama.js";
|
|
7
|
+
export * from "./clients/llamaCpp.js";
|
|
6
8
|
import { BaseClient } from "./clients/baseClient.js";
|
|
7
9
|
import { ResolvedSmolConfig } from "./types.js";
|
|
8
10
|
export declare function registerProvider(providerName: string, clientClass: typeof BaseClient): void;
|
package/dist/client.js
CHANGED
|
@@ -3,8 +3,11 @@ export * from "./clients/google.js";
|
|
|
3
3
|
export * from "./clients/openai.js";
|
|
4
4
|
export * from "./clients/openaiResponses.js";
|
|
5
5
|
export * from "./clients/baseClient.js";
|
|
6
|
+
export * from "./clients/ollama.js";
|
|
7
|
+
export * from "./clients/llamaCpp.js";
|
|
6
8
|
import { SmolAnthropic } from "./clients/anthropic.js";
|
|
7
9
|
import { SmolGoogle } from "./clients/google.js";
|
|
10
|
+
import { LlamaCPP } from "./clients/llamaCpp.js";
|
|
8
11
|
import { SmolOllama } from "./clients/ollama.js";
|
|
9
12
|
import { SmolOpenAi } from "./clients/openai.js";
|
|
10
13
|
import { SmolOpenAiResponses } from "./clients/openaiResponses.js";
|
|
@@ -63,6 +66,8 @@ export function getClient(config) {
|
|
|
63
66
|
return new SmolGoogle(clientConfig);
|
|
64
67
|
case "ollama":
|
|
65
68
|
return new SmolOllama(clientConfig);
|
|
69
|
+
case "llama-cpp":
|
|
70
|
+
return new LlamaCPP(clientConfig);
|
|
66
71
|
default:
|
|
67
72
|
if (provider in registeredProviders) {
|
|
68
73
|
const ClientClass = registeredProviders[provider];
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import Anthropic from "@anthropic-ai/sdk";
|
|
2
2
|
import { ToolCall } from "../classes/ToolCall.js";
|
|
3
3
|
import { SystemMessage, DeveloperMessage } from "../classes/message/index.js";
|
|
4
|
-
import { getLogger } from "../logger.js";
|
|
4
|
+
import { getLogger } from "../util/logger.js";
|
|
5
5
|
import { success, } from "../types.js";
|
|
6
6
|
import { zodToAnthropicTool } from "../util/tool.js";
|
|
7
7
|
import { SmolContentPolicyError, SmolContextWindowExceededError, } from "../smolError.js";
|
|
@@ -255,7 +255,10 @@ export class SmolAnthropic extends BaseClient {
|
|
|
255
255
|
}
|
|
256
256
|
}
|
|
257
257
|
this.logger.debug("Streaming response completed from Anthropic");
|
|
258
|
-
this.statelogClient?.promptResponse({
|
|
258
|
+
this.statelogClient?.promptResponse({
|
|
259
|
+
content,
|
|
260
|
+
usage: { inputTokens, outputTokens },
|
|
261
|
+
});
|
|
259
262
|
const toolCalls = [];
|
|
260
263
|
for (const block of toolBlocks.values()) {
|
|
261
264
|
const toolCall = new ToolCall(block.id, block.name, block.arguments);
|
|
@@ -15,10 +15,6 @@ export declare class BaseClient implements SmolClient {
|
|
|
15
15
|
}): AsyncGenerator<StreamChunk>;
|
|
16
16
|
text(promptConfig: PromptConfig): Promise<Result<PromptResult>> | AsyncGenerator<StreamChunk>;
|
|
17
17
|
checkMessageLimit(promptConfig: PromptConfig): Result<PromptResult> | null;
|
|
18
|
-
applyBudget(promptConfig: PromptConfig): {
|
|
19
|
-
config: PromptConfig;
|
|
20
|
-
failure?: Result<PromptResult>;
|
|
21
|
-
};
|
|
22
18
|
textSync(promptConfig: PromptConfig): Promise<Result<PromptResult>>;
|
|
23
19
|
checkForToolLoops(promptConfig: PromptConfig): {
|
|
24
20
|
continue: boolean;
|
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { userMessage, assistantMessage, } from "../classes/message/index.js";
|
|
2
2
|
import { latencyTracker } from "../latencyTracker.js";
|
|
3
|
-
import { getLogger } from "../logger.js";
|
|
4
|
-
import { getModel, isTextModel } from "../models.js";
|
|
3
|
+
import { getLogger } from "../util/logger.js";
|
|
5
4
|
import { SmolStructuredOutputError } from "../smolError.js";
|
|
6
5
|
import { getStatelogClient } from "../statelogClient.js";
|
|
7
6
|
import { success, } from "../types.js";
|
|
@@ -20,19 +19,7 @@ export class BaseClient {
|
|
|
20
19
|
}
|
|
21
20
|
}
|
|
22
21
|
getAbortSignal(promptConfig) {
|
|
23
|
-
|
|
24
|
-
if (promptConfig.abortSignal) {
|
|
25
|
-
signals.push(promptConfig.abortSignal);
|
|
26
|
-
}
|
|
27
|
-
const timeBudgetMs = promptConfig.budget?.timeBudgetMs;
|
|
28
|
-
if (timeBudgetMs !== undefined) {
|
|
29
|
-
signals.push(AbortSignal.timeout(timeBudgetMs));
|
|
30
|
-
}
|
|
31
|
-
if (signals.length === 0)
|
|
32
|
-
return undefined;
|
|
33
|
-
if (signals.length === 1)
|
|
34
|
-
return signals[0];
|
|
35
|
-
return AbortSignal.any(signals);
|
|
22
|
+
return promptConfig.abortSignal;
|
|
36
23
|
}
|
|
37
24
|
isAbortError(err) {
|
|
38
25
|
return ((err instanceof DOMException && err.name === "AbortError") ||
|
|
@@ -64,83 +51,11 @@ export class BaseClient {
|
|
|
64
51
|
}
|
|
65
52
|
return null;
|
|
66
53
|
}
|
|
67
|
-
applyBudget(promptConfig) {
|
|
68
|
-
const budget = promptConfig.budget;
|
|
69
|
-
if (!budget)
|
|
70
|
-
return { config: promptConfig };
|
|
71
|
-
let config = { ...promptConfig };
|
|
72
|
-
// Auto-compute used values from message history when not explicitly provided
|
|
73
|
-
const assistantMessages = config.messages.filter((m) => m instanceof AssistantMessage);
|
|
74
|
-
const tokensUsed = budget.tokensUsed ??
|
|
75
|
-
assistantMessages.reduce((sum, m) => sum + (m.usage?.outputTokens ?? 0), 0);
|
|
76
|
-
const costUsed = budget.costUsed ??
|
|
77
|
-
assistantMessages.reduce((sum, m) => sum + (m.cost?.totalCost ?? 0), 0);
|
|
78
|
-
const requestsUsed = budget.requestsUsed ?? assistantMessages.length;
|
|
79
|
-
// Request budget check
|
|
80
|
-
if (budget.requestBudget !== undefined &&
|
|
81
|
-
requestsUsed >= budget.requestBudget) {
|
|
82
|
-
this.statelogClient?.debug("Request budget exhausted", {
|
|
83
|
-
requestsUsed,
|
|
84
|
-
requestBudget: budget.requestBudget,
|
|
85
|
-
});
|
|
86
|
-
return {
|
|
87
|
-
config,
|
|
88
|
-
failure: {
|
|
89
|
-
success: false,
|
|
90
|
-
error: `Request budget exhausted: ${requestsUsed} requests used, budget is ${budget.requestBudget}`,
|
|
91
|
-
},
|
|
92
|
-
};
|
|
93
|
-
}
|
|
94
|
-
// Token budget check
|
|
95
|
-
if (budget.tokenBudget !== undefined) {
|
|
96
|
-
const remaining = budget.tokenBudget - tokensUsed;
|
|
97
|
-
if (remaining <= 0) {
|
|
98
|
-
this.statelogClient?.debug("Token budget exhausted", {
|
|
99
|
-
tokensUsed,
|
|
100
|
-
tokenBudget: budget.tokenBudget,
|
|
101
|
-
});
|
|
102
|
-
return {
|
|
103
|
-
config,
|
|
104
|
-
failure: {
|
|
105
|
-
success: false,
|
|
106
|
-
error: `Token budget exhausted: ${tokensUsed} output tokens used, budget is ${budget.tokenBudget}`,
|
|
107
|
-
},
|
|
108
|
-
};
|
|
109
|
-
}
|
|
110
|
-
config.maxTokens = Math.min(config.maxTokens ?? Infinity, remaining);
|
|
111
|
-
}
|
|
112
|
-
// Cost budget check
|
|
113
|
-
if (budget.costBudget !== undefined) {
|
|
114
|
-
const remainingUSD = budget.costBudget - costUsed;
|
|
115
|
-
if (remainingUSD <= 0) {
|
|
116
|
-
this.statelogClient?.debug("Cost budget exhausted", {
|
|
117
|
-
costUsed,
|
|
118
|
-
costBudget: budget.costBudget,
|
|
119
|
-
});
|
|
120
|
-
return {
|
|
121
|
-
config,
|
|
122
|
-
failure: {
|
|
123
|
-
success: false,
|
|
124
|
-
error: `Cost budget exhausted: $${costUsed.toFixed(4)} spent, budget is $${budget.costBudget.toFixed(4)}`,
|
|
125
|
-
},
|
|
126
|
-
};
|
|
127
|
-
}
|
|
128
|
-
const model = getModel(this.config.model);
|
|
129
|
-
if (model && isTextModel(model) && model.outputTokenCost) {
|
|
130
|
-
const remainingTokens = Math.floor((remainingUSD / model.outputTokenCost) * 1_000_000);
|
|
131
|
-
config.maxTokens = Math.min(config.maxTokens ?? Infinity, remainingTokens);
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
return { config };
|
|
135
|
-
}
|
|
136
54
|
async textSync(promptConfig) {
|
|
137
55
|
const messageLimitResult = this.checkMessageLimit(promptConfig);
|
|
138
56
|
if (messageLimitResult)
|
|
139
57
|
return messageLimitResult;
|
|
140
|
-
const {
|
|
141
|
-
if (budgetFailure)
|
|
142
|
-
return budgetFailure;
|
|
143
|
-
const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(budgetedConfig);
|
|
58
|
+
const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(promptConfig);
|
|
144
59
|
if (!shouldContinue) {
|
|
145
60
|
return {
|
|
146
61
|
success: true,
|
|
@@ -156,16 +71,11 @@ export class BaseClient {
|
|
|
156
71
|
}
|
|
157
72
|
catch (err) {
|
|
158
73
|
if (this.isAbortError(err)) {
|
|
159
|
-
const timeBudgetMs = promptConfig.budget?.timeBudgetMs;
|
|
160
|
-
const message = timeBudgetMs
|
|
161
|
-
? `Request timed out after ${timeBudgetMs}ms`
|
|
162
|
-
: "Request was aborted";
|
|
163
74
|
this.statelogClient?.debug("Request aborted or timed out", {
|
|
164
|
-
reason:
|
|
165
|
-
timeBudgetMs,
|
|
75
|
+
reason: "Request was aborted",
|
|
166
76
|
promptConfig,
|
|
167
77
|
});
|
|
168
|
-
return { success: false, error:
|
|
78
|
+
return { success: false, error: "Request was aborted" };
|
|
169
79
|
}
|
|
170
80
|
throw err;
|
|
171
81
|
}
|
|
@@ -364,17 +274,7 @@ export class BaseClient {
|
|
|
364
274
|
};
|
|
365
275
|
return;
|
|
366
276
|
}
|
|
367
|
-
const {
|
|
368
|
-
if (budgetFailure) {
|
|
369
|
-
yield {
|
|
370
|
-
type: "error",
|
|
371
|
-
error: budgetFailure.success === false
|
|
372
|
-
? budgetFailure.error
|
|
373
|
-
: "Budget exceeded",
|
|
374
|
-
};
|
|
375
|
-
return;
|
|
376
|
-
}
|
|
377
|
-
const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(budgetedConfig);
|
|
277
|
+
const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(config);
|
|
378
278
|
if (!shouldContinue) {
|
|
379
279
|
yield {
|
|
380
280
|
type: "done",
|
|
@@ -401,16 +301,11 @@ export class BaseClient {
|
|
|
401
301
|
}
|
|
402
302
|
catch (err) {
|
|
403
303
|
if (this.isAbortError(err)) {
|
|
404
|
-
const timeBudgetMs = config.budget?.timeBudgetMs;
|
|
405
|
-
const message = timeBudgetMs
|
|
406
|
-
? `Request timed out after ${timeBudgetMs}ms`
|
|
407
|
-
: "Request was aborted";
|
|
408
304
|
this.statelogClient?.debug("Streaming request aborted or timed out", {
|
|
409
|
-
reason:
|
|
410
|
-
timeBudgetMs,
|
|
305
|
+
reason: "Request was aborted",
|
|
411
306
|
newPromptConfig,
|
|
412
307
|
});
|
|
413
|
-
yield { type: "timeout", error:
|
|
308
|
+
yield { type: "timeout", error: "Request was aborted" };
|
|
414
309
|
}
|
|
415
310
|
else {
|
|
416
311
|
throw err;
|
package/dist/clients/google.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { GoogleGenAI } from "@google/genai";
|
|
2
2
|
import { ToolCall } from "../classes/ToolCall.js";
|
|
3
|
-
import { getLogger } from "../logger.js";
|
|
3
|
+
import { getLogger } from "../util/logger.js";
|
|
4
4
|
import { addCosts, addTokenUsage, success, } from "../types.js";
|
|
5
5
|
import { zodToGoogleTool } from "../util/tool.js";
|
|
6
6
|
import { SmolContentPolicyError, SmolContextWindowExceededError, } from "../smolError.js";
|
|
7
|
-
import { sanitizeAttributes } from "../util.js";
|
|
7
|
+
import { sanitizeAttributes } from "../util/util.js";
|
|
8
8
|
import { BaseClient } from "./baseClient.js";
|
|
9
9
|
import { Model } from "../model.js";
|
|
10
10
|
import { userMessage } from "../classes/message/index.js";
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { BaseClient } from "./baseClient.js";
|
|
2
|
+
import { BaseClientConfig, PromptConfig, PromptResult, Result, StreamChunk } from "../types.js";
|
|
3
|
+
export declare class LlamaCPP extends BaseClient {
|
|
4
|
+
private llama;
|
|
5
|
+
private llamaModel;
|
|
6
|
+
private modelDir;
|
|
7
|
+
private model;
|
|
8
|
+
private logger;
|
|
9
|
+
constructor(config: BaseClientConfig);
|
|
10
|
+
setup(): Promise<void>;
|
|
11
|
+
private getModelName;
|
|
12
|
+
/**
|
|
13
|
+
* Converts smoltalk messages to node-llama-cpp's ChatHistoryItem format.
|
|
14
|
+
* Builds the full history including the last user message (LlamaChat.generateResponse
|
|
15
|
+
* expects the complete history, unlike LlamaChatSession which takes the last message separately).
|
|
16
|
+
*/
|
|
17
|
+
private convertMessages;
|
|
18
|
+
/**
|
|
19
|
+
* Builds node-llama-cpp function definitions from smoltalk tool configs.
|
|
20
|
+
* Uses ChatModelFunctions (no handler) — LlamaChat.generateResponse() returns
|
|
21
|
+
* function calls without executing them, which matches smoltalk's tool loop model.
|
|
22
|
+
*/
|
|
23
|
+
private buildFunctions;
|
|
24
|
+
private calculateUsageAndCost;
|
|
25
|
+
private extractToolCalls;
|
|
26
|
+
_textSync(config: PromptConfig): Promise<Result<PromptResult>>;
|
|
27
|
+
_textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
|
|
28
|
+
}
|
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
import { getLlama, LlamaChat, LlamaLogLevel } from "node-llama-cpp";
|
|
2
|
+
import { BaseClient } from "./baseClient.js";
|
|
3
|
+
import { ToolCall } from "../classes/ToolCall.js";
|
|
4
|
+
import { getLogger } from "../util/logger.js";
|
|
5
|
+
import { Model } from "../model.js";
|
|
6
|
+
import { sanitizeAttributes } from "../util/util.js";
|
|
7
|
+
import { success, } from "../types.js";
|
|
8
|
+
import path from "path";
|
|
9
|
+
export class LlamaCPP extends BaseClient {
|
|
10
|
+
llama = null;
|
|
11
|
+
llamaModel = null;
|
|
12
|
+
modelDir;
|
|
13
|
+
model;
|
|
14
|
+
logger;
|
|
15
|
+
constructor(config) {
|
|
16
|
+
super(config);
|
|
17
|
+
if (!config.llamaCppModelDir) {
|
|
18
|
+
throw new Error("llamaCppModelDir is required in the config when using the LlamaCPP client.");
|
|
19
|
+
}
|
|
20
|
+
this.model = new Model(config.model);
|
|
21
|
+
this.modelDir = config.llamaCppModelDir;
|
|
22
|
+
this.logger = getLogger();
|
|
23
|
+
}
|
|
24
|
+
async setup() {
|
|
25
|
+
this.llama = await getLlama({ logLevel: LlamaLogLevel.error });
|
|
26
|
+
this.llamaModel = await this.llama.loadModel({
|
|
27
|
+
modelPath: path.join(this.modelDir, this.config.model),
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
getModelName() {
|
|
31
|
+
return this.model.getResolvedModel();
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Converts smoltalk messages to node-llama-cpp's ChatHistoryItem format.
|
|
35
|
+
* Builds the full history including the last user message (LlamaChat.generateResponse
|
|
36
|
+
* expects the complete history, unlike LlamaChatSession which takes the last message separately).
|
|
37
|
+
*/
|
|
38
|
+
convertMessages(messages) {
|
|
39
|
+
let systemPrompt;
|
|
40
|
+
const chatHistory = [];
|
|
41
|
+
for (let i = 0; i < messages.length; i++) {
|
|
42
|
+
const msg = messages[i];
|
|
43
|
+
if (msg.role === "system" || msg.role === "developer") {
|
|
44
|
+
if (!systemPrompt) {
|
|
45
|
+
systemPrompt = msg.content;
|
|
46
|
+
}
|
|
47
|
+
else {
|
|
48
|
+
systemPrompt += "\n" + msg.content;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
else if (msg.role === "user") {
|
|
52
|
+
chatHistory.push({ type: "user", text: msg.content });
|
|
53
|
+
}
|
|
54
|
+
else if (msg.role === "assistant") {
|
|
55
|
+
const assistantMsg = msg;
|
|
56
|
+
const response = [];
|
|
57
|
+
if (assistantMsg.content) {
|
|
58
|
+
response.push(assistantMsg.content);
|
|
59
|
+
}
|
|
60
|
+
// Handle tool calls: pair them with their results from subsequent tool messages
|
|
61
|
+
if (assistantMsg.toolCalls?.length) {
|
|
62
|
+
for (const tc of assistantMsg.toolCalls) {
|
|
63
|
+
// Find the corresponding tool result message
|
|
64
|
+
const toolResultMsg = messages
|
|
65
|
+
.slice(i + 1)
|
|
66
|
+
.find((m) => m.role === "tool" &&
|
|
67
|
+
m.tool_call_id === tc.id);
|
|
68
|
+
response.push({
|
|
69
|
+
type: "functionCall",
|
|
70
|
+
name: tc.name,
|
|
71
|
+
params: tc.arguments,
|
|
72
|
+
result: toolResultMsg ? toolResultMsg.content : undefined,
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
chatHistory.push({ type: "model", response });
|
|
77
|
+
}
|
|
78
|
+
// Tool messages are handled as part of assistant messages above
|
|
79
|
+
}
|
|
80
|
+
// Prepend system message if present
|
|
81
|
+
if (systemPrompt) {
|
|
82
|
+
chatHistory.unshift({ type: "system", text: systemPrompt });
|
|
83
|
+
}
|
|
84
|
+
return { systemPrompt, chatHistory };
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Builds node-llama-cpp function definitions from smoltalk tool configs.
|
|
88
|
+
* Uses ChatModelFunctions (no handler) — LlamaChat.generateResponse() returns
|
|
89
|
+
* function calls without executing them, which matches smoltalk's tool loop model.
|
|
90
|
+
*/
|
|
91
|
+
buildFunctions(tools) {
|
|
92
|
+
if (!tools)
|
|
93
|
+
return undefined;
|
|
94
|
+
const functions = {};
|
|
95
|
+
for (const tool of tools) {
|
|
96
|
+
const jsonSchema = tool.schema.toJSONSchema();
|
|
97
|
+
functions[tool.name] = {
|
|
98
|
+
description: tool.description,
|
|
99
|
+
params: jsonSchema,
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
return functions;
|
|
103
|
+
}
|
|
104
|
+
calculateUsageAndCost(meterBefore, meterAfter) {
|
|
105
|
+
const inputTokens = meterAfter.usedInputTokens - meterBefore.usedInputTokens;
|
|
106
|
+
const outputTokens = meterAfter.usedOutputTokens - meterBefore.usedOutputTokens;
|
|
107
|
+
const usage = {
|
|
108
|
+
inputTokens,
|
|
109
|
+
outputTokens,
|
|
110
|
+
totalTokens: inputTokens + outputTokens,
|
|
111
|
+
};
|
|
112
|
+
const cost = this.model.calculateCost(usage) ?? undefined;
|
|
113
|
+
return { usage, cost };
|
|
114
|
+
}
|
|
115
|
+
extractToolCalls(functionCalls) {
|
|
116
|
+
if (!functionCalls?.length)
|
|
117
|
+
return [];
|
|
118
|
+
return functionCalls.map((fc) => new ToolCall(fc.functionName, fc.functionName, (fc.params ?? {})));
|
|
119
|
+
}
|
|
120
|
+
async _textSync(config) {
|
|
121
|
+
if (!this.llama || !this.llamaModel) {
|
|
122
|
+
await this.setup();
|
|
123
|
+
}
|
|
124
|
+
const setupLlama = this.llama;
|
|
125
|
+
const setupModel = this.llamaModel;
|
|
126
|
+
const { chatHistory } = this.convertMessages(config.messages);
|
|
127
|
+
if (chatHistory.length === 0) {
|
|
128
|
+
return success({
|
|
129
|
+
output: "",
|
|
130
|
+
toolCalls: [],
|
|
131
|
+
model: this.getModelName(),
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
// Create grammar for response format
|
|
135
|
+
let grammar;
|
|
136
|
+
if (config.responseFormat) {
|
|
137
|
+
grammar = await setupLlama.createGrammarForJsonSchema(config.responseFormat.toJSONSchema());
|
|
138
|
+
}
|
|
139
|
+
// Create context and LlamaChat
|
|
140
|
+
const context = await setupModel.createContext();
|
|
141
|
+
const sequence = context.getSequence();
|
|
142
|
+
const chat = new LlamaChat({
|
|
143
|
+
contextSequence: sequence,
|
|
144
|
+
});
|
|
145
|
+
// Build tools if provided
|
|
146
|
+
const functions = this.buildFunctions(config.tools);
|
|
147
|
+
// Track token usage
|
|
148
|
+
const meterBefore = sequence.tokenMeter.getState();
|
|
149
|
+
// Build options
|
|
150
|
+
const options = {};
|
|
151
|
+
if (config.maxTokens !== undefined) {
|
|
152
|
+
options.maxTokens = config.maxTokens;
|
|
153
|
+
}
|
|
154
|
+
if (config.temperature !== undefined) {
|
|
155
|
+
options.temperature = config.temperature;
|
|
156
|
+
}
|
|
157
|
+
if (config.abortSignal) {
|
|
158
|
+
options.signal = config.abortSignal;
|
|
159
|
+
options.stopOnAbortSignal = true;
|
|
160
|
+
}
|
|
161
|
+
if (grammar && !functions) {
|
|
162
|
+
options.grammar = grammar;
|
|
163
|
+
}
|
|
164
|
+
if (functions) {
|
|
165
|
+
options.functions = functions;
|
|
166
|
+
}
|
|
167
|
+
// Apply raw attributes
|
|
168
|
+
Object.assign(options, sanitizeAttributes(config.rawAttributes));
|
|
169
|
+
this.logger.debug("Sending request to llama.cpp");
|
|
170
|
+
this.statelogClient?.promptRequest({
|
|
171
|
+
model: this.getModelName(),
|
|
172
|
+
messageCount: config.messages.length,
|
|
173
|
+
});
|
|
174
|
+
let result;
|
|
175
|
+
let meterAfter;
|
|
176
|
+
try {
|
|
177
|
+
result = await chat.generateResponse(chatHistory, options);
|
|
178
|
+
meterAfter = sequence.tokenMeter.getState();
|
|
179
|
+
}
|
|
180
|
+
finally {
|
|
181
|
+
chat.dispose();
|
|
182
|
+
await context.dispose();
|
|
183
|
+
}
|
|
184
|
+
// Extract text output
|
|
185
|
+
const output = result.response || null;
|
|
186
|
+
// Extract tool calls — generateResponse returns them without executing handlers
|
|
187
|
+
const toolCalls = this.extractToolCalls(result.functionCalls);
|
|
188
|
+
// Calculate usage and cost
|
|
189
|
+
const { usage, cost } = this.calculateUsageAndCost(meterBefore, meterAfter);
|
|
190
|
+
this.logger.debug("Response from llama.cpp:", output);
|
|
191
|
+
this.statelogClient?.promptResponse({ output, usage, cost });
|
|
192
|
+
return success({
|
|
193
|
+
output,
|
|
194
|
+
toolCalls,
|
|
195
|
+
usage,
|
|
196
|
+
cost,
|
|
197
|
+
model: this.getModelName(),
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
async *_textStream(config) {
|
|
201
|
+
if (!this.llama || !this.llamaModel) {
|
|
202
|
+
await this.setup();
|
|
203
|
+
}
|
|
204
|
+
const setupLlama = this.llama;
|
|
205
|
+
const setupModel = this.llamaModel;
|
|
206
|
+
const { chatHistory } = this.convertMessages(config.messages);
|
|
207
|
+
if (chatHistory.length === 0) {
|
|
208
|
+
yield {
|
|
209
|
+
type: "done",
|
|
210
|
+
result: { output: null, toolCalls: [], model: this.getModelName() },
|
|
211
|
+
};
|
|
212
|
+
return;
|
|
213
|
+
}
|
|
214
|
+
// Create grammar for response format
|
|
215
|
+
let grammar;
|
|
216
|
+
if (config.responseFormat) {
|
|
217
|
+
grammar = await setupLlama.createGrammarForJsonSchema(config.responseFormat.toJSONSchema());
|
|
218
|
+
}
|
|
219
|
+
// Create context and LlamaChat
|
|
220
|
+
const context = await setupModel.createContext();
|
|
221
|
+
const sequence = context.getSequence();
|
|
222
|
+
const chat = new LlamaChat({
|
|
223
|
+
contextSequence: sequence,
|
|
224
|
+
});
|
|
225
|
+
const functions = this.buildFunctions(config.tools);
|
|
226
|
+
const meterBefore = sequence.tokenMeter.getState();
|
|
227
|
+
// Bridge callback-based streaming to async generator using a queue
|
|
228
|
+
const chunks = [];
|
|
229
|
+
let resolveWaiter = null;
|
|
230
|
+
let done = false;
|
|
231
|
+
const pushChunk = (chunk) => {
|
|
232
|
+
chunks.push(chunk);
|
|
233
|
+
if (resolveWaiter) {
|
|
234
|
+
resolveWaiter();
|
|
235
|
+
resolveWaiter = null;
|
|
236
|
+
}
|
|
237
|
+
};
|
|
238
|
+
// Build options
|
|
239
|
+
const options = {
|
|
240
|
+
onTextChunk: (text) => {
|
|
241
|
+
pushChunk({ type: "text", text });
|
|
242
|
+
},
|
|
243
|
+
};
|
|
244
|
+
if (config.maxTokens !== undefined) {
|
|
245
|
+
options.maxTokens = config.maxTokens;
|
|
246
|
+
}
|
|
247
|
+
if (config.temperature !== undefined) {
|
|
248
|
+
options.temperature = config.temperature;
|
|
249
|
+
}
|
|
250
|
+
if (config.abortSignal) {
|
|
251
|
+
options.signal = config.abortSignal;
|
|
252
|
+
options.stopOnAbortSignal = true;
|
|
253
|
+
}
|
|
254
|
+
if (grammar && !functions) {
|
|
255
|
+
options.grammar = grammar;
|
|
256
|
+
}
|
|
257
|
+
if (functions) {
|
|
258
|
+
options.functions = functions;
|
|
259
|
+
}
|
|
260
|
+
Object.assign(options, sanitizeAttributes(config.rawAttributes));
|
|
261
|
+
this.logger.debug("Sending streaming request to llama.cpp");
|
|
262
|
+
this.statelogClient?.promptRequest({
|
|
263
|
+
model: this.getModelName(),
|
|
264
|
+
messageCount: config.messages.length,
|
|
265
|
+
});
|
|
266
|
+
// Run generateResponse in background, push chunks as they arrive
|
|
267
|
+
const promptPromise = chat
|
|
268
|
+
.generateResponse(chatHistory, options)
|
|
269
|
+
.then((result) => {
|
|
270
|
+
const meterAfter = sequence.tokenMeter.getState();
|
|
271
|
+
const toolCalls = this.extractToolCalls(result.functionCalls);
|
|
272
|
+
for (const tc of toolCalls) {
|
|
273
|
+
pushChunk({ type: "tool_call", toolCall: tc });
|
|
274
|
+
}
|
|
275
|
+
const { usage, cost } = this.calculateUsageAndCost(meterBefore, meterAfter);
|
|
276
|
+
const output = result.response || null;
|
|
277
|
+
this.logger.debug("Streaming response completed from llama.cpp");
|
|
278
|
+
this.statelogClient?.promptResponse({ output, usage, cost });
|
|
279
|
+
pushChunk({
|
|
280
|
+
type: "done",
|
|
281
|
+
result: {
|
|
282
|
+
output,
|
|
283
|
+
toolCalls,
|
|
284
|
+
usage,
|
|
285
|
+
cost,
|
|
286
|
+
model: this.getModelName(),
|
|
287
|
+
},
|
|
288
|
+
});
|
|
289
|
+
})
|
|
290
|
+
.catch((error) => {
|
|
291
|
+
pushChunk({ type: "error", error: error.message });
|
|
292
|
+
})
|
|
293
|
+
.finally(() => {
|
|
294
|
+
done = true;
|
|
295
|
+
chat.dispose();
|
|
296
|
+
context.dispose();
|
|
297
|
+
// Wake up the generator if it's waiting
|
|
298
|
+
if (resolveWaiter) {
|
|
299
|
+
resolveWaiter();
|
|
300
|
+
resolveWaiter = null;
|
|
301
|
+
}
|
|
302
|
+
});
|
|
303
|
+
// Yield chunks as they arrive
|
|
304
|
+
while (!done || chunks.length > 0) {
|
|
305
|
+
if (chunks.length > 0) {
|
|
306
|
+
yield chunks.shift();
|
|
307
|
+
}
|
|
308
|
+
else if (!done) {
|
|
309
|
+
await new Promise((resolve) => {
|
|
310
|
+
resolveWaiter = resolve;
|
|
311
|
+
});
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
await promptPromise;
|
|
315
|
+
}
|
|
316
|
+
}
|
package/dist/clients/ollama.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { Ollama } from "ollama";
|
|
2
2
|
import { ToolCall } from "../classes/ToolCall.js";
|
|
3
|
-
import { getLogger } from "../logger.js";
|
|
3
|
+
import { getLogger } from "../util/logger.js";
|
|
4
4
|
import { success, } from "../types.js";
|
|
5
5
|
import { zodToGoogleTool } from "../util/tool.js";
|
|
6
|
-
import { sanitizeAttributes } from "../util.js";
|
|
6
|
+
import { sanitizeAttributes } from "../util/util.js";
|
|
7
7
|
import { BaseClient } from "./baseClient.js";
|
|
8
8
|
import { SmolContextWindowExceededError } from "../smolError.js";
|
|
9
9
|
import { Model } from "../model.js";
|
package/dist/clients/openai.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import OpenAI from "openai";
|
|
2
2
|
import { success, } from "../types.js";
|
|
3
3
|
import { ToolCall } from "../classes/ToolCall.js";
|
|
4
|
-
import { isFunctionToolCall, sanitizeAttributes } from "../util.js";
|
|
5
|
-
import { getLogger } from "../logger.js";
|
|
4
|
+
import { isFunctionToolCall, sanitizeAttributes } from "../util/util.js";
|
|
5
|
+
import { getLogger } from "../util/logger.js";
|
|
6
6
|
import { BaseClient } from "./baseClient.js";
|
|
7
7
|
import { SmolContentPolicyError, SmolContextWindowExceededError, } from "../smolError.js";
|
|
8
8
|
import { zodToOpenAITool } from "../util/tool.js";
|
|
@@ -111,7 +111,9 @@ export class SmolOpenAi extends BaseClient {
|
|
|
111
111
|
}
|
|
112
112
|
else {
|
|
113
113
|
this.logger.warn(`Unsupported tool call type: ${tc.type} for tool call ID: ${tc.id}`);
|
|
114
|
-
this.statelogClient?.debug(`Unsupported tool call type: ${tc.type}`, {
|
|
114
|
+
this.statelogClient?.debug(`Unsupported tool call type: ${tc.type}`, {
|
|
115
|
+
toolCallId: tc.id,
|
|
116
|
+
});
|
|
115
117
|
}
|
|
116
118
|
}
|
|
117
119
|
}
|