smoltalk 0.0.59 → 0.0.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/classes/ToolCall.js +1 -1
  2. package/dist/client.d.ts +2 -0
  3. package/dist/client.js +5 -0
  4. package/dist/clients/anthropic.js +5 -2
  5. package/dist/clients/baseClient.d.ts +0 -4
  6. package/dist/clients/baseClient.js +9 -114
  7. package/dist/clients/google.js +2 -2
  8. package/dist/clients/llamaCpp.d.ts +28 -0
  9. package/dist/clients/llamaCpp.js +316 -0
  10. package/dist/clients/ollama.js +2 -2
  11. package/dist/clients/openai.js +5 -3
  12. package/dist/clients/openaiResponses.js +9 -9
  13. package/dist/functions.js +5 -5
  14. package/dist/index.d.ts +1 -1
  15. package/dist/index.js +1 -1
  16. package/dist/model.d.ts +5 -14
  17. package/dist/model.js +5 -102
  18. package/dist/models.d.ts +2 -96
  19. package/dist/models.js +1 -83
  20. package/dist/strategies/baseStrategy.d.ts +11 -3
  21. package/dist/strategies/baseStrategy.js +25 -3
  22. package/dist/strategies/fastestStrategy.d.ts +6 -4
  23. package/dist/strategies/fastestStrategy.js +33 -27
  24. package/dist/strategies/idStrategy.d.ts +3 -1
  25. package/dist/strategies/idStrategy.js +13 -10
  26. package/dist/strategies/index.d.ts +4 -2
  27. package/dist/strategies/index.js +9 -4
  28. package/dist/strategies/raceStrategy.d.ts +2 -2
  29. package/dist/strategies/raceStrategy.js +1 -1
  30. package/dist/strategies/randomStrategy.d.ts +2 -1
  31. package/dist/strategies/randomStrategy.js +8 -0
  32. package/dist/strategies/timeoutStrategy.d.ts +13 -0
  33. package/dist/strategies/timeoutStrategy.js +58 -0
  34. package/dist/strategies/types.d.ts +13 -41
  35. package/dist/strategies/types.js +9 -22
  36. package/dist/types/costEstimate.d.ts +16 -0
  37. package/dist/types/costEstimate.js +30 -0
  38. package/dist/types/tokenUsage.d.ts +14 -0
  39. package/dist/types/tokenUsage.js +25 -0
  40. package/dist/types.d.ts +88 -87
  41. package/dist/types.js +10 -61
  42. package/dist/util/tool.js +1 -1
  43. package/dist/{util.d.ts → util/util.d.ts} +1 -1
  44. package/dist/{util.js → util/util.js} +1 -1
  45. package/package.json +4 -2
  46. /package/dist/{logger.d.ts → util/logger.d.ts} +0 -0
  47. /package/dist/{logger.js → util/logger.js} +0 -0
@@ -1,5 +1,5 @@
1
1
  import { z } from "zod";
2
- import { getLogger } from "../logger.js";
2
+ import { getLogger } from "../util/logger.js";
3
3
  export const ToolCallJSONSchema = z.object({
4
4
  id: z.string().default(""),
5
5
  name: z.string(),
package/dist/client.d.ts CHANGED
@@ -3,6 +3,8 @@ export * from "./clients/google.js";
3
3
  export * from "./clients/openai.js";
4
4
  export * from "./clients/openaiResponses.js";
5
5
  export * from "./clients/baseClient.js";
6
+ export * from "./clients/ollama.js";
7
+ export * from "./clients/llamaCpp.js";
6
8
  import { BaseClient } from "./clients/baseClient.js";
7
9
  import { ResolvedSmolConfig } from "./types.js";
8
10
  export declare function registerProvider(providerName: string, clientClass: typeof BaseClient): void;
package/dist/client.js CHANGED
@@ -3,8 +3,11 @@ export * from "./clients/google.js";
3
3
  export * from "./clients/openai.js";
4
4
  export * from "./clients/openaiResponses.js";
5
5
  export * from "./clients/baseClient.js";
6
+ export * from "./clients/ollama.js";
7
+ export * from "./clients/llamaCpp.js";
6
8
  import { SmolAnthropic } from "./clients/anthropic.js";
7
9
  import { SmolGoogle } from "./clients/google.js";
10
+ import { LlamaCPP } from "./clients/llamaCpp.js";
8
11
  import { SmolOllama } from "./clients/ollama.js";
9
12
  import { SmolOpenAi } from "./clients/openai.js";
10
13
  import { SmolOpenAiResponses } from "./clients/openaiResponses.js";
@@ -63,6 +66,8 @@ export function getClient(config) {
63
66
  return new SmolGoogle(clientConfig);
64
67
  case "ollama":
65
68
  return new SmolOllama(clientConfig);
69
+ case "llama-cpp":
70
+ return new LlamaCPP(clientConfig);
66
71
  default:
67
72
  if (provider in registeredProviders) {
68
73
  const ClientClass = registeredProviders[provider];
@@ -1,7 +1,7 @@
1
1
  import Anthropic from "@anthropic-ai/sdk";
2
2
  import { ToolCall } from "../classes/ToolCall.js";
3
3
  import { SystemMessage, DeveloperMessage } from "../classes/message/index.js";
4
- import { getLogger } from "../logger.js";
4
+ import { getLogger } from "../util/logger.js";
5
5
  import { success, } from "../types.js";
6
6
  import { zodToAnthropicTool } from "../util/tool.js";
7
7
  import { SmolContentPolicyError, SmolContextWindowExceededError, } from "../smolError.js";
@@ -255,7 +255,10 @@ export class SmolAnthropic extends BaseClient {
255
255
  }
256
256
  }
257
257
  this.logger.debug("Streaming response completed from Anthropic");
258
- this.statelogClient?.promptResponse({ content, usage: { inputTokens, outputTokens } });
258
+ this.statelogClient?.promptResponse({
259
+ content,
260
+ usage: { inputTokens, outputTokens },
261
+ });
259
262
  const toolCalls = [];
260
263
  for (const block of toolBlocks.values()) {
261
264
  const toolCall = new ToolCall(block.id, block.name, block.arguments);
@@ -15,10 +15,6 @@ export declare class BaseClient implements SmolClient {
15
15
  }): AsyncGenerator<StreamChunk>;
16
16
  text(promptConfig: PromptConfig): Promise<Result<PromptResult>> | AsyncGenerator<StreamChunk>;
17
17
  checkMessageLimit(promptConfig: PromptConfig): Result<PromptResult> | null;
18
- applyBudget(promptConfig: PromptConfig): {
19
- config: PromptConfig;
20
- failure?: Result<PromptResult>;
21
- };
22
18
  textSync(promptConfig: PromptConfig): Promise<Result<PromptResult>>;
23
19
  checkForToolLoops(promptConfig: PromptConfig): {
24
20
  continue: boolean;
@@ -1,7 +1,6 @@
1
- import { AssistantMessage, userMessage, assistantMessage, } from "../classes/message/index.js";
1
+ import { userMessage, assistantMessage, } from "../classes/message/index.js";
2
2
  import { latencyTracker } from "../latencyTracker.js";
3
- import { getLogger } from "../logger.js";
4
- import { getModel, isTextModel } from "../models.js";
3
+ import { getLogger } from "../util/logger.js";
5
4
  import { SmolStructuredOutputError } from "../smolError.js";
6
5
  import { getStatelogClient } from "../statelogClient.js";
7
6
  import { success, } from "../types.js";
@@ -20,19 +19,7 @@ export class BaseClient {
20
19
  }
21
20
  }
22
21
  getAbortSignal(promptConfig) {
23
- const signals = [];
24
- if (promptConfig.abortSignal) {
25
- signals.push(promptConfig.abortSignal);
26
- }
27
- const timeBudgetMs = promptConfig.budget?.timeBudgetMs;
28
- if (timeBudgetMs !== undefined) {
29
- signals.push(AbortSignal.timeout(timeBudgetMs));
30
- }
31
- if (signals.length === 0)
32
- return undefined;
33
- if (signals.length === 1)
34
- return signals[0];
35
- return AbortSignal.any(signals);
22
+ return promptConfig.abortSignal;
36
23
  }
37
24
  isAbortError(err) {
38
25
  return ((err instanceof DOMException && err.name === "AbortError") ||
@@ -64,83 +51,11 @@ export class BaseClient {
64
51
  }
65
52
  return null;
66
53
  }
67
- applyBudget(promptConfig) {
68
- const budget = promptConfig.budget;
69
- if (!budget)
70
- return { config: promptConfig };
71
- let config = { ...promptConfig };
72
- // Auto-compute used values from message history when not explicitly provided
73
- const assistantMessages = config.messages.filter((m) => m instanceof AssistantMessage);
74
- const tokensUsed = budget.tokensUsed ??
75
- assistantMessages.reduce((sum, m) => sum + (m.usage?.outputTokens ?? 0), 0);
76
- const costUsed = budget.costUsed ??
77
- assistantMessages.reduce((sum, m) => sum + (m.cost?.totalCost ?? 0), 0);
78
- const requestsUsed = budget.requestsUsed ?? assistantMessages.length;
79
- // Request budget check
80
- if (budget.requestBudget !== undefined &&
81
- requestsUsed >= budget.requestBudget) {
82
- this.statelogClient?.debug("Request budget exhausted", {
83
- requestsUsed,
84
- requestBudget: budget.requestBudget,
85
- });
86
- return {
87
- config,
88
- failure: {
89
- success: false,
90
- error: `Request budget exhausted: ${requestsUsed} requests used, budget is ${budget.requestBudget}`,
91
- },
92
- };
93
- }
94
- // Token budget check
95
- if (budget.tokenBudget !== undefined) {
96
- const remaining = budget.tokenBudget - tokensUsed;
97
- if (remaining <= 0) {
98
- this.statelogClient?.debug("Token budget exhausted", {
99
- tokensUsed,
100
- tokenBudget: budget.tokenBudget,
101
- });
102
- return {
103
- config,
104
- failure: {
105
- success: false,
106
- error: `Token budget exhausted: ${tokensUsed} output tokens used, budget is ${budget.tokenBudget}`,
107
- },
108
- };
109
- }
110
- config.maxTokens = Math.min(config.maxTokens ?? Infinity, remaining);
111
- }
112
- // Cost budget check
113
- if (budget.costBudget !== undefined) {
114
- const remainingUSD = budget.costBudget - costUsed;
115
- if (remainingUSD <= 0) {
116
- this.statelogClient?.debug("Cost budget exhausted", {
117
- costUsed,
118
- costBudget: budget.costBudget,
119
- });
120
- return {
121
- config,
122
- failure: {
123
- success: false,
124
- error: `Cost budget exhausted: $${costUsed.toFixed(4)} spent, budget is $${budget.costBudget.toFixed(4)}`,
125
- },
126
- };
127
- }
128
- const model = getModel(this.config.model);
129
- if (model && isTextModel(model) && model.outputTokenCost) {
130
- const remainingTokens = Math.floor((remainingUSD / model.outputTokenCost) * 1_000_000);
131
- config.maxTokens = Math.min(config.maxTokens ?? Infinity, remainingTokens);
132
- }
133
- }
134
- return { config };
135
- }
136
54
  async textSync(promptConfig) {
137
55
  const messageLimitResult = this.checkMessageLimit(promptConfig);
138
56
  if (messageLimitResult)
139
57
  return messageLimitResult;
140
- const { config: budgetedConfig, failure: budgetFailure } = this.applyBudget(promptConfig);
141
- if (budgetFailure)
142
- return budgetFailure;
143
- const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(budgetedConfig);
58
+ const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(promptConfig);
144
59
  if (!shouldContinue) {
145
60
  return {
146
61
  success: true,
@@ -156,16 +71,11 @@ export class BaseClient {
156
71
  }
157
72
  catch (err) {
158
73
  if (this.isAbortError(err)) {
159
- const timeBudgetMs = promptConfig.budget?.timeBudgetMs;
160
- const message = timeBudgetMs
161
- ? `Request timed out after ${timeBudgetMs}ms`
162
- : "Request was aborted";
163
74
  this.statelogClient?.debug("Request aborted or timed out", {
164
- reason: message,
165
- timeBudgetMs,
75
+ reason: "Request was aborted",
166
76
  promptConfig,
167
77
  });
168
- return { success: false, error: message };
78
+ return { success: false, error: "Request was aborted" };
169
79
  }
170
80
  throw err;
171
81
  }
@@ -364,17 +274,7 @@ export class BaseClient {
364
274
  };
365
275
  return;
366
276
  }
367
- const { config: budgetedConfig, failure: budgetFailure } = this.applyBudget(config);
368
- if (budgetFailure) {
369
- yield {
370
- type: "error",
371
- error: budgetFailure.success === false
372
- ? budgetFailure.error
373
- : "Budget exceeded",
374
- };
375
- return;
376
- }
377
- const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(budgetedConfig);
277
+ const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(config);
378
278
  if (!shouldContinue) {
379
279
  yield {
380
280
  type: "done",
@@ -401,16 +301,11 @@ export class BaseClient {
401
301
  }
402
302
  catch (err) {
403
303
  if (this.isAbortError(err)) {
404
- const timeBudgetMs = config.budget?.timeBudgetMs;
405
- const message = timeBudgetMs
406
- ? `Request timed out after ${timeBudgetMs}ms`
407
- : "Request was aborted";
408
304
  this.statelogClient?.debug("Streaming request aborted or timed out", {
409
- reason: message,
410
- timeBudgetMs,
305
+ reason: "Request was aborted",
411
306
  newPromptConfig,
412
307
  });
413
- yield { type: "timeout", error: message };
308
+ yield { type: "timeout", error: "Request was aborted" };
414
309
  }
415
310
  else {
416
311
  throw err;
@@ -1,10 +1,10 @@
1
1
  import { GoogleGenAI } from "@google/genai";
2
2
  import { ToolCall } from "../classes/ToolCall.js";
3
- import { getLogger } from "../logger.js";
3
+ import { getLogger } from "../util/logger.js";
4
4
  import { addCosts, addTokenUsage, success, } from "../types.js";
5
5
  import { zodToGoogleTool } from "../util/tool.js";
6
6
  import { SmolContentPolicyError, SmolContextWindowExceededError, } from "../smolError.js";
7
- import { sanitizeAttributes } from "../util.js";
7
+ import { sanitizeAttributes } from "../util/util.js";
8
8
  import { BaseClient } from "./baseClient.js";
9
9
  import { Model } from "../model.js";
10
10
  import { userMessage } from "../classes/message/index.js";
@@ -0,0 +1,28 @@
1
+ import { BaseClient } from "./baseClient.js";
2
+ import { BaseClientConfig, PromptConfig, PromptResult, Result, StreamChunk } from "../types.js";
3
+ export declare class LlamaCPP extends BaseClient {
4
+ private llama;
5
+ private llamaModel;
6
+ private modelDir;
7
+ private model;
8
+ private logger;
9
+ constructor(config: BaseClientConfig);
10
+ setup(): Promise<void>;
11
+ private getModelName;
12
+ /**
13
+ * Converts smoltalk messages to node-llama-cpp's ChatHistoryItem format.
14
+ * Builds the full history including the last user message (LlamaChat.generateResponse
15
+ * expects the complete history, unlike LlamaChatSession which takes the last message separately).
16
+ */
17
+ private convertMessages;
18
+ /**
19
+ * Builds node-llama-cpp function definitions from smoltalk tool configs.
20
+ * Uses ChatModelFunctions (no handler) — LlamaChat.generateResponse() returns
21
+ * function calls without executing them, which matches smoltalk's tool loop model.
22
+ */
23
+ private buildFunctions;
24
+ private calculateUsageAndCost;
25
+ private extractToolCalls;
26
+ _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
27
+ _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
28
+ }
@@ -0,0 +1,316 @@
1
+ import { getLlama, LlamaChat, LlamaLogLevel } from "node-llama-cpp";
2
+ import { BaseClient } from "./baseClient.js";
3
+ import { ToolCall } from "../classes/ToolCall.js";
4
+ import { getLogger } from "../util/logger.js";
5
+ import { Model } from "../model.js";
6
+ import { sanitizeAttributes } from "../util/util.js";
7
+ import { success, } from "../types.js";
8
+ import path from "path";
9
+ export class LlamaCPP extends BaseClient {
10
+ llama = null;
11
+ llamaModel = null;
12
+ modelDir;
13
+ model;
14
+ logger;
15
+ constructor(config) {
16
+ super(config);
17
+ if (!config.llamaCppModelDir) {
18
+ throw new Error("llamaCppModelDir is required in the config when using the LlamaCPP client.");
19
+ }
20
+ this.model = new Model(config.model);
21
+ this.modelDir = config.llamaCppModelDir;
22
+ this.logger = getLogger();
23
+ }
24
+ async setup() {
25
+ this.llama = await getLlama({ logLevel: LlamaLogLevel.error });
26
+ this.llamaModel = await this.llama.loadModel({
27
+ modelPath: path.join(this.modelDir, this.config.model),
28
+ });
29
+ }
30
+ getModelName() {
31
+ return this.model.getResolvedModel();
32
+ }
33
+ /**
34
+ * Converts smoltalk messages to node-llama-cpp's ChatHistoryItem format.
35
+ * Builds the full history including the last user message (LlamaChat.generateResponse
36
+ * expects the complete history, unlike LlamaChatSession which takes the last message separately).
37
+ */
38
+ convertMessages(messages) {
39
+ let systemPrompt;
40
+ const chatHistory = [];
41
+ for (let i = 0; i < messages.length; i++) {
42
+ const msg = messages[i];
43
+ if (msg.role === "system" || msg.role === "developer") {
44
+ if (!systemPrompt) {
45
+ systemPrompt = msg.content;
46
+ }
47
+ else {
48
+ systemPrompt += "\n" + msg.content;
49
+ }
50
+ }
51
+ else if (msg.role === "user") {
52
+ chatHistory.push({ type: "user", text: msg.content });
53
+ }
54
+ else if (msg.role === "assistant") {
55
+ const assistantMsg = msg;
56
+ const response = [];
57
+ if (assistantMsg.content) {
58
+ response.push(assistantMsg.content);
59
+ }
60
+ // Handle tool calls: pair them with their results from subsequent tool messages
61
+ if (assistantMsg.toolCalls?.length) {
62
+ for (const tc of assistantMsg.toolCalls) {
63
+ // Find the corresponding tool result message
64
+ const toolResultMsg = messages
65
+ .slice(i + 1)
66
+ .find((m) => m.role === "tool" &&
67
+ m.tool_call_id === tc.id);
68
+ response.push({
69
+ type: "functionCall",
70
+ name: tc.name,
71
+ params: tc.arguments,
72
+ result: toolResultMsg ? toolResultMsg.content : undefined,
73
+ });
74
+ }
75
+ }
76
+ chatHistory.push({ type: "model", response });
77
+ }
78
+ // Tool messages are handled as part of assistant messages above
79
+ }
80
+ // Prepend system message if present
81
+ if (systemPrompt) {
82
+ chatHistory.unshift({ type: "system", text: systemPrompt });
83
+ }
84
+ return { systemPrompt, chatHistory };
85
+ }
86
+ /**
87
+ * Builds node-llama-cpp function definitions from smoltalk tool configs.
88
+ * Uses ChatModelFunctions (no handler) — LlamaChat.generateResponse() returns
89
+ * function calls without executing them, which matches smoltalk's tool loop model.
90
+ */
91
+ buildFunctions(tools) {
92
+ if (!tools)
93
+ return undefined;
94
+ const functions = {};
95
+ for (const tool of tools) {
96
+ const jsonSchema = tool.schema.toJSONSchema();
97
+ functions[tool.name] = {
98
+ description: tool.description,
99
+ params: jsonSchema,
100
+ };
101
+ }
102
+ return functions;
103
+ }
104
+ calculateUsageAndCost(meterBefore, meterAfter) {
105
+ const inputTokens = meterAfter.usedInputTokens - meterBefore.usedInputTokens;
106
+ const outputTokens = meterAfter.usedOutputTokens - meterBefore.usedOutputTokens;
107
+ const usage = {
108
+ inputTokens,
109
+ outputTokens,
110
+ totalTokens: inputTokens + outputTokens,
111
+ };
112
+ const cost = this.model.calculateCost(usage) ?? undefined;
113
+ return { usage, cost };
114
+ }
115
+ extractToolCalls(functionCalls) {
116
+ if (!functionCalls?.length)
117
+ return [];
118
+ return functionCalls.map((fc) => new ToolCall(fc.functionName, fc.functionName, (fc.params ?? {})));
119
+ }
120
+ async _textSync(config) {
121
+ if (!this.llama || !this.llamaModel) {
122
+ await this.setup();
123
+ }
124
+ const setupLlama = this.llama;
125
+ const setupModel = this.llamaModel;
126
+ const { chatHistory } = this.convertMessages(config.messages);
127
+ if (chatHistory.length === 0) {
128
+ return success({
129
+ output: "",
130
+ toolCalls: [],
131
+ model: this.getModelName(),
132
+ });
133
+ }
134
+ // Create grammar for response format
135
+ let grammar;
136
+ if (config.responseFormat) {
137
+ grammar = await setupLlama.createGrammarForJsonSchema(config.responseFormat.toJSONSchema());
138
+ }
139
+ // Create context and LlamaChat
140
+ const context = await setupModel.createContext();
141
+ const sequence = context.getSequence();
142
+ const chat = new LlamaChat({
143
+ contextSequence: sequence,
144
+ });
145
+ // Build tools if provided
146
+ const functions = this.buildFunctions(config.tools);
147
+ // Track token usage
148
+ const meterBefore = sequence.tokenMeter.getState();
149
+ // Build options
150
+ const options = {};
151
+ if (config.maxTokens !== undefined) {
152
+ options.maxTokens = config.maxTokens;
153
+ }
154
+ if (config.temperature !== undefined) {
155
+ options.temperature = config.temperature;
156
+ }
157
+ if (config.abortSignal) {
158
+ options.signal = config.abortSignal;
159
+ options.stopOnAbortSignal = true;
160
+ }
161
+ if (grammar && !functions) {
162
+ options.grammar = grammar;
163
+ }
164
+ if (functions) {
165
+ options.functions = functions;
166
+ }
167
+ // Apply raw attributes
168
+ Object.assign(options, sanitizeAttributes(config.rawAttributes));
169
+ this.logger.debug("Sending request to llama.cpp");
170
+ this.statelogClient?.promptRequest({
171
+ model: this.getModelName(),
172
+ messageCount: config.messages.length,
173
+ });
174
+ let result;
175
+ let meterAfter;
176
+ try {
177
+ result = await chat.generateResponse(chatHistory, options);
178
+ meterAfter = sequence.tokenMeter.getState();
179
+ }
180
+ finally {
181
+ chat.dispose();
182
+ await context.dispose();
183
+ }
184
+ // Extract text output
185
+ const output = result.response || null;
186
+ // Extract tool calls — generateResponse returns them without executing handlers
187
+ const toolCalls = this.extractToolCalls(result.functionCalls);
188
+ // Calculate usage and cost
189
+ const { usage, cost } = this.calculateUsageAndCost(meterBefore, meterAfter);
190
+ this.logger.debug("Response from llama.cpp:", output);
191
+ this.statelogClient?.promptResponse({ output, usage, cost });
192
+ return success({
193
+ output,
194
+ toolCalls,
195
+ usage,
196
+ cost,
197
+ model: this.getModelName(),
198
+ });
199
+ }
200
+ async *_textStream(config) {
201
+ if (!this.llama || !this.llamaModel) {
202
+ await this.setup();
203
+ }
204
+ const setupLlama = this.llama;
205
+ const setupModel = this.llamaModel;
206
+ const { chatHistory } = this.convertMessages(config.messages);
207
+ if (chatHistory.length === 0) {
208
+ yield {
209
+ type: "done",
210
+ result: { output: null, toolCalls: [], model: this.getModelName() },
211
+ };
212
+ return;
213
+ }
214
+ // Create grammar for response format
215
+ let grammar;
216
+ if (config.responseFormat) {
217
+ grammar = await setupLlama.createGrammarForJsonSchema(config.responseFormat.toJSONSchema());
218
+ }
219
+ // Create context and LlamaChat
220
+ const context = await setupModel.createContext();
221
+ const sequence = context.getSequence();
222
+ const chat = new LlamaChat({
223
+ contextSequence: sequence,
224
+ });
225
+ const functions = this.buildFunctions(config.tools);
226
+ const meterBefore = sequence.tokenMeter.getState();
227
+ // Bridge callback-based streaming to async generator using a queue
228
+ const chunks = [];
229
+ let resolveWaiter = null;
230
+ let done = false;
231
+ const pushChunk = (chunk) => {
232
+ chunks.push(chunk);
233
+ if (resolveWaiter) {
234
+ resolveWaiter();
235
+ resolveWaiter = null;
236
+ }
237
+ };
238
+ // Build options
239
+ const options = {
240
+ onTextChunk: (text) => {
241
+ pushChunk({ type: "text", text });
242
+ },
243
+ };
244
+ if (config.maxTokens !== undefined) {
245
+ options.maxTokens = config.maxTokens;
246
+ }
247
+ if (config.temperature !== undefined) {
248
+ options.temperature = config.temperature;
249
+ }
250
+ if (config.abortSignal) {
251
+ options.signal = config.abortSignal;
252
+ options.stopOnAbortSignal = true;
253
+ }
254
+ if (grammar && !functions) {
255
+ options.grammar = grammar;
256
+ }
257
+ if (functions) {
258
+ options.functions = functions;
259
+ }
260
+ Object.assign(options, sanitizeAttributes(config.rawAttributes));
261
+ this.logger.debug("Sending streaming request to llama.cpp");
262
+ this.statelogClient?.promptRequest({
263
+ model: this.getModelName(),
264
+ messageCount: config.messages.length,
265
+ });
266
+ // Run generateResponse in background, push chunks as they arrive
267
+ const promptPromise = chat
268
+ .generateResponse(chatHistory, options)
269
+ .then((result) => {
270
+ const meterAfter = sequence.tokenMeter.getState();
271
+ const toolCalls = this.extractToolCalls(result.functionCalls);
272
+ for (const tc of toolCalls) {
273
+ pushChunk({ type: "tool_call", toolCall: tc });
274
+ }
275
+ const { usage, cost } = this.calculateUsageAndCost(meterBefore, meterAfter);
276
+ const output = result.response || null;
277
+ this.logger.debug("Streaming response completed from llama.cpp");
278
+ this.statelogClient?.promptResponse({ output, usage, cost });
279
+ pushChunk({
280
+ type: "done",
281
+ result: {
282
+ output,
283
+ toolCalls,
284
+ usage,
285
+ cost,
286
+ model: this.getModelName(),
287
+ },
288
+ });
289
+ })
290
+ .catch((error) => {
291
+ pushChunk({ type: "error", error: error.message });
292
+ })
293
+ .finally(() => {
294
+ done = true;
295
+ chat.dispose();
296
+ context.dispose();
297
+ // Wake up the generator if it's waiting
298
+ if (resolveWaiter) {
299
+ resolveWaiter();
300
+ resolveWaiter = null;
301
+ }
302
+ });
303
+ // Yield chunks as they arrive
304
+ while (!done || chunks.length > 0) {
305
+ if (chunks.length > 0) {
306
+ yield chunks.shift();
307
+ }
308
+ else if (!done) {
309
+ await new Promise((resolve) => {
310
+ resolveWaiter = resolve;
311
+ });
312
+ }
313
+ }
314
+ await promptPromise;
315
+ }
316
+ }
@@ -1,9 +1,9 @@
1
1
  import { Ollama } from "ollama";
2
2
  import { ToolCall } from "../classes/ToolCall.js";
3
- import { getLogger } from "../logger.js";
3
+ import { getLogger } from "../util/logger.js";
4
4
  import { success, } from "../types.js";
5
5
  import { zodToGoogleTool } from "../util/tool.js";
6
- import { sanitizeAttributes } from "../util.js";
6
+ import { sanitizeAttributes } from "../util/util.js";
7
7
  import { BaseClient } from "./baseClient.js";
8
8
  import { SmolContextWindowExceededError } from "../smolError.js";
9
9
  import { Model } from "../model.js";
@@ -1,8 +1,8 @@
1
1
  import OpenAI from "openai";
2
2
  import { success, } from "../types.js";
3
3
  import { ToolCall } from "../classes/ToolCall.js";
4
- import { isFunctionToolCall, sanitizeAttributes } from "../util.js";
5
- import { getLogger } from "../logger.js";
4
+ import { isFunctionToolCall, sanitizeAttributes } from "../util/util.js";
5
+ import { getLogger } from "../util/logger.js";
6
6
  import { BaseClient } from "./baseClient.js";
7
7
  import { SmolContentPolicyError, SmolContextWindowExceededError, } from "../smolError.js";
8
8
  import { zodToOpenAITool } from "../util/tool.js";
@@ -111,7 +111,9 @@ export class SmolOpenAi extends BaseClient {
111
111
  }
112
112
  else {
113
113
  this.logger.warn(`Unsupported tool call type: ${tc.type} for tool call ID: ${tc.id}`);
114
- this.statelogClient?.debug(`Unsupported tool call type: ${tc.type}`, { toolCallId: tc.id });
114
+ this.statelogClient?.debug(`Unsupported tool call type: ${tc.type}`, {
115
+ toolCallId: tc.id,
116
+ });
115
117
  }
116
118
  }
117
119
  }