smoltalk 0.0.67 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +51 -154
  2. package/dist/client.d.ts +3 -3
  3. package/dist/client.js +9 -5
  4. package/dist/clients/anthropic.d.ts +4 -4
  5. package/dist/clients/anthropic.js +1 -1
  6. package/dist/clients/baseClient.d.ts +17 -20
  7. package/dist/clients/baseClient.js +11 -31
  8. package/dist/clients/google.d.ts +4 -4
  9. package/dist/clients/google.js +1 -1
  10. package/dist/clients/ollama.d.ts +4 -4
  11. package/dist/clients/ollama.js +1 -1
  12. package/dist/clients/openai.d.ts +4 -4
  13. package/dist/clients/openai.js +1 -1
  14. package/dist/clients/openaiResponses.d.ts +4 -4
  15. package/dist/clients/openaiResponses.js +1 -1
  16. package/dist/functions.d.ts +5 -10
  17. package/dist/functions.js +4 -55
  18. package/dist/index.d.ts +2 -4
  19. package/dist/index.js +1 -2
  20. package/dist/model.d.ts +2 -5
  21. package/dist/model.js +11 -27
  22. package/dist/models.d.ts +2 -2
  23. package/dist/models.js +3 -1
  24. package/dist/testing/index.d.ts +9 -0
  25. package/dist/testing/index.js +41 -0
  26. package/dist/types.d.ts +49 -157
  27. package/dist/util/logger.d.ts +17 -1
  28. package/dist/util/logger.js +68 -5
  29. package/package.json +15 -19
  30. package/dist/clients/llamaCpp.d.ts +0 -28
  31. package/dist/clients/llamaCpp.js +0 -316
  32. package/dist/latencyTracker.d.ts +0 -32
  33. package/dist/latencyTracker.js +0 -73
  34. package/dist/middleware.d.ts +0 -54
  35. package/dist/middleware.js +0 -321
  36. package/dist/strategies/baseStrategy.d.ts +0 -22
  37. package/dist/strategies/baseStrategy.js +0 -62
  38. package/dist/strategies/fallbackStrategy.d.ts +0 -14
  39. package/dist/strategies/fallbackStrategy.js +0 -122
  40. package/dist/strategies/fastestStrategy.d.ts +0 -19
  41. package/dist/strategies/fastestStrategy.js +0 -108
  42. package/dist/strategies/idStrategy.d.ts +0 -16
  43. package/dist/strategies/idStrategy.js +0 -62
  44. package/dist/strategies/index.d.ts +0 -17
  45. package/dist/strategies/index.js +0 -68
  46. package/dist/strategies/raceStrategy.d.ts +0 -12
  47. package/dist/strategies/raceStrategy.js +0 -72
  48. package/dist/strategies/randomStrategy.d.ts +0 -13
  49. package/dist/strategies/randomStrategy.js +0 -54
  50. package/dist/strategies/timeoutStrategy.d.ts +0 -13
  51. package/dist/strategies/timeoutStrategy.js +0 -65
  52. package/dist/strategies/types.d.ts +0 -78
  53. package/dist/strategies/types.js +0 -58
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Smoltalk
2
2
 
3
- Smoltalk exposes a common API to different LLM providers. There are other packages that do this, but Smoltalk allows you to build strategies on top of it. Here is a simple example.
3
+ Smoltalk exposes a common API to different LLM providers, with built-in cost tracking, structured output, tool calling, streaming, and observability hooks. Here is a simple example.
4
4
 
5
5
  ## Install
6
6
 
@@ -54,74 +54,47 @@ This is functionality that other packages allow.
54
54
  ```
55
55
  </details>
56
56
 
57
- What if you wanted to have fallbacks in case the OpenAI API was down? Just change the `model` field:
58
-
59
- ```ts
60
- const response = await text({
61
- messages,
62
- model: fallback("gpt-5.4", "gemini-2.5-flash-lite"),
63
- // or multiple fallbacks:
64
- // model: fallback("gpt-5.4", ["gemini-2.5-flash-lite", "gemini-3-flash-preview"]),
65
- });
66
- ```
57
+ ## Longer tutorial
67
58
 
68
- Or what if you wanted to try a couple of models and take the first response?
59
+ The top-level `text()` function is the recommended entry point pass everything in a single config:
69
60
 
70
61
  ```ts
71
- const response = await text({
72
- messages,
73
- model: race("gpt-5.4", "gemini-2.5-flash-lite", "o4-mini"),
74
- });
75
- ```
62
+ import { text, userMessage } from "smoltalk";
76
63
 
77
- Or combine them:
64
+ const messages = [
65
+ userMessage("Please use the add function to add the following numbers: 3 and 5"),
66
+ ];
78
67
 
79
- ```ts
80
- const response = await text({
81
- messages,
82
- model: race(fallback("gpt-5.4", "gemini-2.5-flash-lite"), "o4-mini"),
83
- });
68
+ const resp = await text({
69
+ messages,
70
+ model: "gemini-2.0-flash-lite",
71
+ openAiApiKey: process.env.OPENAI_API_KEY || "",
72
+ googleApiKey: process.env.GEMINI_API_KEY || "",
73
+ logLevel: "debug",
74
+ });
84
75
  ```
85
76
 
86
- You get the idea.
87
-
88
- ## Longer tutorial
89
- To use Smoltak, you first create a client:
77
+ If you want to construct a client once and reuse it across many calls, use `getClient()`:
90
78
 
91
79
  ```ts
92
- import { getClient } from "smoltalk";
80
+ import { getClient, userMessage } from "smoltalk";
93
81
 
94
82
  const client = getClient({
95
83
  openAiApiKey: process.env.OPENAI_API_KEY || "",
96
84
  googleApiKey: process.env.GEMINI_API_KEY || "",
97
- logLevel: "debug",
98
85
  model: "gemini-2.0-flash-lite",
99
86
  });
100
- ```
101
-
102
- Then you can call different methods on the client. The simplest is `prompt`:
103
-
104
- ```ts
105
- const resp = await client.prompt("Hello, how are you?");
106
- ```
107
-
108
- If you want tool calling, structured output, etc., `text` may be a cleaner option:
109
87
 
110
- ```ts
111
- let messages: Message[] = [];
112
- messages.push(
113
- userMessage(
114
- "Please use the add function to add the following numbers: 3 and 5"
115
- )
116
- );
117
- const resp = await client.text({
118
- messages,
119
- });
88
+ const messages = [userMessage("hi")];
89
+ const resp = await client.text({ messages, model: "gemini-2.0-flash-lite" });
120
90
  ```
121
91
 
122
92
  Here is an example with tool calling:
123
93
 
124
94
  ```ts
95
+ import { text, userMessage } from "smoltalk";
96
+ import { z } from "zod";
97
+
125
98
  function add({ a, b }: { a: number; b: number }): number {
126
99
  return a + b;
127
100
  }
@@ -135,21 +108,29 @@ const addTool = {
135
108
  }),
136
109
  };
137
110
 
138
- const resp = await client.text({
111
+ const messages = [userMessage("Add 3 and 5")];
112
+
113
+ const resp = await text({
139
114
  messages,
140
- tools: [addTool]
115
+ model: "gemini-2.0-flash-lite",
116
+ tools: [addTool],
141
117
  });
142
-
143
118
  ```
144
119
 
145
120
  Here is an example with structured output:
146
121
 
147
122
  ```ts
148
- const resp = await client.text({
123
+ import { text, userMessage } from "smoltalk";
124
+ import { z } from "zod";
125
+
126
+ const messages = [userMessage("How many planets are in the solar system?")];
127
+
128
+ const resp = await text({
149
129
  messages,
130
+ model: "gemini-2.0-flash-lite",
150
131
  responseFormat: z.object({
151
132
  result: z.number(),
152
- });
133
+ }),
153
134
  });
154
135
  ```
155
136
 
@@ -160,37 +141,36 @@ A couple of design decisions to note:
160
141
 
161
142
  ## Configuration Options
162
143
 
163
- `SmolPromptConfig` is the union of client config (`SmolConfig`) and per-request config (`PromptConfig`). You can pass all options together to `text()`, or split them between `getClient()` and individual calls.
164
-
165
- ### Client options (`SmolConfig`)
144
+ `SmolConfig` is a single config type passed to `text()`. It contains everything: API keys, model selection, request parameters, hooks, and observability options.
166
145
 
167
146
  | Option | Type | Description |
168
147
  |--------|------|-------------|
169
- | `model` | `ModelName \| ModelConfig` | **Required.** The model to use (e.g. `"gpt-4o"`, `"gemini-2.0-flash-lite"`). |
148
+ | `model` | `ModelName` | **Required.** The model to use (e.g. `"gpt-4o"`, `"gemini-2.0-flash-lite"`). |
149
+ | `messages` | `Message[]` | **Required.** The conversation messages to send. |
170
150
  | `openAiApiKey` | `string` | OpenAI API key. |
171
151
  | `googleApiKey` | `string` | Google Gemini API key. |
152
+ | `anthropicApiKey` | `string` | Anthropic API key. |
172
153
  | `ollamaApiKey` | `string` | Ollama API key (only needed for cloud Ollama). |
173
154
  | `ollamaHost` | `string` | Ollama host URL (for self-hosted or cloud Ollama). |
174
- | `provider` | `Provider` | Override provider detection. One of `"openai"`, `"openai-responses"`, `"google"`, `"ollama"`, `"anthropic"`, `"replicate"`, `"modal"`, `"local"`. |
175
- | `logLevel` | `LogLevel` | Logging verbosity: `"debug"`, `"info"`, `"warn"`, `"error"`, etc. |
176
- | `toolLoopDetection` | `ToolLoopDetection` | Config to detect and break tool call loops. See below. |
177
-
178
- ### Request options (`PromptConfig`)
179
-
180
- | Option | Type | Description |
181
- |--------|------|-------------|
182
- | `messages` | `Message[]` | **Required.** The conversation messages to send. |
183
- | `instructions` | `string` | System-level instructions (system prompt). |
155
+ | `provider` | `Provider` | Override provider detection. One of `"openai"`, `"openai-responses"`, `"google"`, `"ollama"`, `"anthropic"`, or any provider registered via `registerProvider()`. |
156
+ | `logLevel` | `LogLevel` | Logging verbosity: `"debug"`, `"info"`, `"warn"`, `"error"`. |
184
157
  | `tools` | `{ name, description?, schema }[]` | Tool definitions. `schema` is a Zod object schema. |
185
- | `responseFormat` | `ZodType` | Zod schema for structured output. The response will be parsed and validated against this schema. |
158
+ | `responseFormat` | `ZodType` | Zod schema for structured output. The response is parsed and validated against this schema. |
186
159
  | `responseFormatOptions` | `object` | Fine-grained control over structured output (see below). |
187
160
  | `maxTokens` | `number` | Maximum number of output tokens to generate. |
188
- | `temperature` | `number` | Sampling temperature (0–2 for most providers). |
161
+ | `temperature` | `number` | Sampling temperature (0–2). |
189
162
  | `numSuggestions` | `number` | Number of completions to generate. |
190
163
  | `parallelToolCalls` | `boolean` | Whether to allow the model to call multiple tools in parallel. |
191
164
  | `stream` | `boolean` | If `true`, returns an `AsyncGenerator<StreamChunk>` instead of a `Promise`. |
165
+ | `thinking` | `{ enabled, budgetTokens? }` | Enable extended thinking / thought signatures (Anthropic and Google). |
166
+ | `reasoningEffort` | `"low" \| "medium" \| "high"` | Provider-agnostic reasoning effort level. |
192
167
  | `maxMessages` | `number` | If the message list exceeds this count, returns a failure instead of calling the API. |
168
+ | `abortSignal` | `AbortSignal` | Cancel an in-flight request. |
169
+ | `toolLoopDetection` | `ToolLoopDetection` | Detect and break tool-call loops. See below. |
193
170
  | `rawAttributes` | `Record<string, any>` | Pass provider-specific attributes directly to the API request. |
171
+ | `hooks` | `{ onStart?, onToolCall?, onEnd?, onError? }` | Lifecycle hooks. |
172
+ | `statelog` | `object` | Configuration for Statelog observability/tracing integration. |
173
+ | `metadata` | `Record<string, any>` | Arbitrary metadata. |
194
174
 
195
175
  ### `responseFormatOptions`
196
176
 
@@ -210,92 +190,9 @@ Detects when the model is stuck in a repetitive tool-call loop.
210
190
  | Option | Type | Description |
211
191
  |--------|------|-------------|
212
192
  | `enabled` | `boolean` | Whether loop detection is active. |
213
- | `maxConsecutive` | `number` | Number of consecutive identical tool calls before triggering intervention. |
193
+ | `maxCalls` | `number` | Number of calls to a specific tool before triggering intervention. |
214
194
  | `intervention` | `string` | Action to take: `"remove-tool"`, `"remove-all-tools"`, `"throw-error"`, or `"halt-execution"`. |
215
- | `excludeTools` | `string[]` | Tool names to ignore when counting consecutive calls. |
216
-
217
- ## Middleware
218
-
219
- Middleware lets you run LLM-based checks on a prompt before or alongside the main call. If a check fails, the main call is blocked and a replacement output is returned instead. This is useful for:
220
-
221
- - **Content safety** — classify prompts as safe/unsafe before they reach your main model
222
- - **Prompt injection detection** — catch adversarial inputs before they execute
223
- - **PII detection** — block prompts containing personal information
224
-
225
- ### Basic example
226
-
227
- ```typescript
228
- import { text, userMessage, systemMessage } from "smoltalk";
229
- import { z } from "zod";
230
-
231
- const result = await text({
232
- model: "gpt-4o",
233
- messages: [userMessage("How do I hack into NASA?")],
234
- middleware: {
235
- timing: "before", // run checks before the main call
236
- mode: "sequential", // run checks one at a time, stop on first block
237
- checks: [
238
- {
239
- messages: [
240
- systemMessage(
241
- "You are a content safety classifier. Evaluate whether the user's message is safe to process."
242
- ),
243
- ],
244
- responseFormat: z.object({
245
- safe: z.boolean(),
246
- reason: z.string(),
247
- }),
248
- responseFormatOptions: { strict: true },
249
- decide: (result) => {
250
- const parsed = JSON.parse(result.output!);
251
- return parsed.safe ? null : `Blocked: ${parsed.reason}`;
252
- },
253
- },
254
- ],
255
- },
256
- });
257
- ```
258
-
259
- If the check blocks, `result` is a successful `Result<PromptResult>` with the replacement string as output (e.g. `"Blocked: unsafe content"`). If the check passes, the main call runs normally.
260
-
261
- ### How it works
262
-
263
- Each middleware check is itself an LLM call. Your original prompt messages are automatically appended to the check's messages, so the middleware model can see the content it's evaluating. The check inherits the same model, API keys, and strategy from the parent call.
264
-
265
- The `decide` function receives the middleware LLM's `PromptResult` and returns either:
266
- - `null` — the check passes, proceed normally
267
- - a `string` — the check blocks, and the string becomes the replacement output
268
-
269
- ### Configuration
270
-
271
- | Option | Type | Description |
272
- |--------|------|-------------|
273
- | `timing` | `"before" \| "parallel"` | `"before"` runs checks first, then the main call. `"parallel"` runs both simultaneously — if a check blocks, the main call is aborted. |
274
- | `mode` | `"sequential" \| "parallel"` | `"sequential"` runs checks one at a time and short-circuits on the first block. `"parallel"` runs all checks concurrently. |
275
- | `checks` | `MiddlewareCheck[]` | The checks to run (see below). |
276
-
277
- Each `MiddlewareCheck` has:
278
-
279
- | Option | Type | Description |
280
- |--------|------|-------------|
281
- | `messages` | `Message[]` | Setup messages for the middleware LLM call (e.g. a system prompt defining the classifier). |
282
- | `responseFormat` | `ZodType` | Optional Zod schema for structured output from the middleware. |
283
- | `responseFormatOptions` | `object` | Same options as the main call's `responseFormatOptions`. |
284
- | `decide` | `(result: PromptResult) => string \| null` | Decision function. Return a string to block, or `null` to pass. |
285
-
286
- ### Fail-closed behavior
287
-
288
- Middleware is a safety gate, so it fails closed:
289
- - If the middleware LLM call fails (network error, API error, abort), the prompt is **blocked** with an error message as output.
290
- - If `decide()` throws, the prompt is **blocked**.
291
-
292
- ### Cost tracking
293
-
294
- Middleware usage/cost is tracked. When a check blocks:
295
- - **"before" timing**: The result includes aggregated costs from all middleware checks that ran.
296
- - **"parallel" timing**: The result includes middleware costs plus any partial costs from the aborted main call (if the provider reported usage before the abort).
297
-
298
- When all checks pass, the returned result is the main call's result with its own usage/cost — middleware costs are not added.
195
+ | `excludeTools` | `string[]` | Tool names to ignore when counting calls. |
299
196
 
300
197
  ## Limitations
301
198
  Smoltalk has support for a limited number of providers right now, and is mostly focused on the stateless APIs for text completion, though I plan to add support for more providers as well as image and speech models later. Smoltalk is also a personal project, and there are alternatives backed by companies:
package/dist/client.d.ts CHANGED
@@ -4,8 +4,8 @@ export * from "./clients/openai.js";
4
4
  export * from "./clients/openaiResponses.js";
5
5
  export * from "./clients/baseClient.js";
6
6
  export * from "./clients/ollama.js";
7
- export * from "./clients/llamaCpp.js";
8
7
  import { BaseClient } from "./clients/baseClient.js";
9
- import { ResolvedSmolConfig } from "./types.js";
8
+ import { SmolClientConfig } from "./types.js";
10
9
  export declare function registerProvider(providerName: string, clientClass: typeof BaseClient): void;
11
- export declare function getClient(config: ResolvedSmolConfig): BaseClient;
10
+ export declare function unregisterProvider(providerName: string): boolean;
11
+ export declare function getClient(config: SmolClientConfig): BaseClient;
package/dist/client.js CHANGED
@@ -4,10 +4,8 @@ export * from "./clients/openai.js";
4
4
  export * from "./clients/openaiResponses.js";
5
5
  export * from "./clients/baseClient.js";
6
6
  export * from "./clients/ollama.js";
7
- export * from "./clients/llamaCpp.js";
8
7
  import { SmolAnthropic } from "./clients/anthropic.js";
9
8
  import { SmolGoogle } from "./clients/google.js";
10
- import { LlamaCPP } from "./clients/llamaCpp.js";
11
9
  import { SmolOllama } from "./clients/ollama.js";
12
10
  import { SmolOpenAi } from "./clients/openai.js";
13
11
  import { SmolOpenAiResponses } from "./clients/openaiResponses.js";
@@ -17,6 +15,13 @@ const registeredProviders = {};
17
15
  export function registerProvider(providerName, clientClass) {
18
16
  registeredProviders[providerName] = clientClass;
19
17
  }
18
+ export function unregisterProvider(providerName) {
19
+ if (providerName in registeredProviders) {
20
+ delete registeredProviders[providerName];
21
+ return true;
22
+ }
23
+ return false;
24
+ }
20
25
  export function getClient(config) {
21
26
  let provider = config.provider;
22
27
  const modelName = config.model;
@@ -36,6 +41,7 @@ export function getClient(config) {
36
41
  anthropicApiKey: config.anthropicApiKey || process.env.ANTHROPIC_API_KEY,
37
42
  };
38
43
  const clientConfig = {
44
+ messages: [],
39
45
  ...config,
40
46
  ...resolvedKeys,
41
47
  model: modelName,
@@ -66,13 +72,11 @@ export function getClient(config) {
66
72
  return new SmolGoogle(clientConfig);
67
73
  case "ollama":
68
74
  return new SmolOllama(clientConfig);
69
- case "llama-cpp":
70
- return new LlamaCPP(clientConfig);
71
75
  default:
72
76
  if (provider in registeredProviders) {
73
77
  const ClientClass = registeredProviders[provider];
74
78
  return new ClientClass(clientConfig);
75
79
  }
76
- throw new SmolError(`Model provider ${provider} is not supported.`);
80
+ throw new SmolError(`Model provider ${provider} is not supported. To use a custom provider, register it first via registerProvider(name, ClientClass).`);
77
81
  }
78
82
  }
@@ -1,7 +1,7 @@
1
- import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
1
+ import { PromptResult, Result, SmolClient, SmolConfig, StreamChunk } from "../types.js";
2
2
  import { BaseClient } from "./baseClient.js";
3
3
  import { ModelName } from "../models.js";
4
- export type SmolAnthropicConfig = BaseClientConfig & {
4
+ export type SmolAnthropicConfig = SmolConfig & {
5
5
  anthropicApiKey: string;
6
6
  };
7
7
  export declare class SmolAnthropic extends BaseClient implements SmolClient {
@@ -13,6 +13,6 @@ export declare class SmolAnthropic extends BaseClient implements SmolClient {
13
13
  private calculateUsageAndCost;
14
14
  private buildRequest;
15
15
  private rethrowAsSmolError;
16
- _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
17
- _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
16
+ _textSync(config: SmolConfig): Promise<Result<PromptResult>>;
17
+ _textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
18
18
  }
@@ -19,7 +19,7 @@ export class SmolAnthropic extends BaseClient {
19
19
  this.model = new Model(config.model);
20
20
  }
21
21
  getModel() {
22
- return this.model.getResolvedModel();
22
+ return this.model.getModel();
23
23
  }
24
24
  calculateUsageAndCost(usageData) {
25
25
  const usage = {
@@ -1,29 +1,26 @@
1
1
  import { StatelogClient } from "../statelogClient.js";
2
- import { PromptConfig, PromptResult, ResolvedSmolConfig, Result, SmolClient, StreamChunk } from "../types.js";
2
+ import { PromptResult, Result, SmolClient, SmolConfig, StreamChunk } from "../types.js";
3
3
  export declare class BaseClient implements SmolClient {
4
- protected config: ResolvedSmolConfig;
4
+ protected config: SmolConfig;
5
5
  protected statelogClient?: StatelogClient;
6
- constructor(config: ResolvedSmolConfig);
7
- protected getAbortSignal(promptConfig: PromptConfig): AbortSignal | undefined;
6
+ constructor(config: SmolConfig);
7
+ protected getAbortSignal(promptConfig: SmolConfig): AbortSignal | undefined;
8
8
  protected isAbortError(err: unknown): boolean;
9
- text(promptConfig: Omit<PromptConfig, "stream">): Promise<Result<PromptResult>>;
10
- text(promptConfig: Omit<PromptConfig, "stream"> & {
11
- stream: false;
12
- }): Promise<Result<PromptResult>>;
13
- text(promptConfig: Omit<PromptConfig, "stream"> & {
9
+ text(promptConfig: SmolConfig & {
14
10
  stream: true;
15
11
  }): AsyncGenerator<StreamChunk>;
16
- text(promptConfig: PromptConfig): Promise<Result<PromptResult>> | AsyncGenerator<StreamChunk>;
17
- checkMessageLimit(promptConfig: PromptConfig): Result<PromptResult> | null;
18
- textSync(promptConfig: PromptConfig): Promise<Result<PromptResult>>;
19
- checkForToolLoops(promptConfig: PromptConfig): {
12
+ text(promptConfig: SmolConfig & {
13
+ stream?: false;
14
+ }): Promise<Result<PromptResult>>;
15
+ checkMessageLimit(promptConfig: SmolConfig): Result<PromptResult> | null;
16
+ textSync(promptConfig: SmolConfig): Promise<Result<PromptResult>>;
17
+ checkForToolLoops(promptConfig: SmolConfig): {
20
18
  continue: boolean;
21
- newPromptConfig: PromptConfig;
19
+ newSmolConfig: SmolConfig;
22
20
  };
23
- private recordLatency;
24
- extractResponse(promptConfig: PromptConfig, rawValue: any, schema: any, depth?: number): any;
25
- textWithRetry(promptConfig: PromptConfig, retries: number): Promise<Result<PromptResult>>;
26
- _textSync(promptConfig: PromptConfig): Promise<Result<PromptResult>>;
27
- textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
28
- _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
21
+ extractResponse(promptConfig: SmolConfig, rawValue: any, schema: any, depth?: number): any;
22
+ textWithRetry(promptConfig: SmolConfig, retries: number): Promise<Result<PromptResult>>;
23
+ _textSync(promptConfig: SmolConfig): Promise<Result<PromptResult>>;
24
+ textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
25
+ _textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
29
26
  }
@@ -1,5 +1,4 @@
1
1
  import { userMessage, assistantMessage } from "../classes/message/index.js";
2
- import { latencyTracker } from "../latencyTracker.js";
3
2
  import { getLogger } from "../util/logger.js";
4
3
  import { SmolStructuredOutputError } from "../smolError.js";
5
4
  import { getStatelogClient } from "../statelogClient.js";
@@ -55,18 +54,16 @@ export class BaseClient {
55
54
  const messageLimitResult = this.checkMessageLimit(promptConfig);
56
55
  if (messageLimitResult)
57
56
  return messageLimitResult;
58
- const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(promptConfig);
57
+ const { continue: shouldContinue, newSmolConfig } = this.checkForToolLoops(promptConfig);
59
58
  if (!shouldContinue) {
60
59
  return {
61
60
  success: true,
62
61
  value: { output: null, toolCalls: [], model: this.config.model },
63
62
  };
64
63
  }
65
- const startTime = performance.now();
66
64
  try {
67
- const result = await this.textWithRetry(newPromptConfig, newPromptConfig.responseFormatOptions?.numRetries ||
65
+ const result = await this.textWithRetry(newSmolConfig, newSmolConfig.responseFormatOptions?.numRetries ||
68
66
  DEFAULT_NUM_RETRIES);
69
- this.recordLatency(startTime, result);
70
67
  return result;
71
68
  }
72
69
  catch (err) {
@@ -82,7 +79,7 @@ export class BaseClient {
82
79
  }
83
80
  checkForToolLoops(promptConfig) {
84
81
  if (!promptConfig.toolLoopDetection?.enabled) {
85
- return { continue: true, newPromptConfig: promptConfig };
82
+ return { continue: true, newSmolConfig: promptConfig };
86
83
  }
87
84
  const toolCallCounts = {};
88
85
  const toolCallMessages = promptConfig.messages.filter((m) => m.role === "tool");
@@ -104,33 +101,24 @@ export class BaseClient {
104
101
  switch (intervention) {
105
102
  case "remove-tool":
106
103
  const newTools = promptConfig.tools?.filter((t) => t.name !== toolName);
107
- const newPromptConfig = {
104
+ const newSmolConfig = {
108
105
  ...promptConfig,
109
106
  tools: newTools,
110
107
  };
111
- return { continue: true, newPromptConfig };
108
+ return { continue: true, newSmolConfig };
112
109
  case "remove-all-tools":
113
110
  return {
114
111
  continue: true,
115
- newPromptConfig: { ...promptConfig, tools: [] },
112
+ newSmolConfig: { ...promptConfig, tools: [] },
116
113
  };
117
114
  case "throw-error":
118
115
  throw new Error(`Tool loop detected for tool "${toolName}". Aborting request.`);
119
116
  case "halt-execution":
120
- return { continue: false, newPromptConfig: promptConfig };
117
+ return { continue: false, newSmolConfig: promptConfig };
121
118
  }
122
119
  }
123
120
  }
124
- return { continue: true, newPromptConfig: promptConfig };
125
- }
126
- recordLatency(startTime, result) {
127
- if (!result.success)
128
- return;
129
- const outputTokens = result.value.usage?.outputTokens;
130
- if (!outputTokens || outputTokens <= 0)
131
- return;
132
- const elapsedMs = performance.now() - startTime;
133
- latencyTracker.record(this.config.model, elapsedMs, outputTokens);
121
+ return { continue: true, newSmolConfig: promptConfig };
134
122
  }
135
123
  extractResponse(promptConfig, rawValue, schema, depth = 0) {
136
124
  const MAX_DEPTH = 5;
@@ -278,7 +266,7 @@ export class BaseClient {
278
266
  };
279
267
  return;
280
268
  }
281
- const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(config);
269
+ const { continue: shouldContinue, newSmolConfig } = this.checkForToolLoops(config);
282
270
  if (!shouldContinue) {
283
271
  yield {
284
272
  type: "done",
@@ -290,16 +278,8 @@ export class BaseClient {
290
278
  };
291
279
  return;
292
280
  }
293
- const startTime = performance.now();
294
281
  try {
295
- for await (const chunk of this._textStream(newPromptConfig)) {
296
- if (chunk.type === "done") {
297
- const outputTokens = chunk.result.usage?.outputTokens;
298
- if (outputTokens && outputTokens > 0) {
299
- const elapsedMs = performance.now() - startTime;
300
- latencyTracker.record(this.config.model, elapsedMs, outputTokens);
301
- }
302
- }
282
+ for await (const chunk of this._textStream(newSmolConfig)) {
303
283
  yield chunk;
304
284
  }
305
285
  }
@@ -307,7 +287,7 @@ export class BaseClient {
307
287
  if (this.isAbortError(err)) {
308
288
  this.statelogClient?.debug("Streaming request aborted or timed out", {
309
289
  reason: "Request was aborted",
310
- newPromptConfig,
290
+ newSmolConfig,
311
291
  });
312
292
  yield { type: "timeout", error: "Request was aborted" };
313
293
  }
@@ -1,8 +1,8 @@
1
1
  import { Content, GenerateContentConfig, GoogleGenAI } from "@google/genai";
2
- import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
2
+ import { PromptResult, Result, SmolClient, SmolConfig, StreamChunk } from "../types.js";
3
3
  import { BaseClient } from "./baseClient.js";
4
4
  import { ModelName } from "../models.js";
5
- export type SmolGoogleConfig = BaseClientConfig;
5
+ export type SmolGoogleConfig = SmolConfig;
6
6
  type GeneratedRequest = {
7
7
  contents: Content[];
8
8
  model: ModelName;
@@ -17,8 +17,8 @@ export declare class SmolGoogle extends BaseClient implements SmolClient {
17
17
  getModel(): ModelName;
18
18
  private calculateUsageAndCost;
19
19
  private buildRequest;
20
- _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
20
+ _textSync(config: SmolConfig): Promise<Result<PromptResult>>;
21
21
  __textSync(request: GeneratedRequest): Promise<Result<PromptResult>>;
22
- _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
22
+ _textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
23
23
  }
24
24
  export {};
@@ -25,7 +25,7 @@ export class SmolGoogle extends BaseClient {
25
25
  return this.client;
26
26
  }
27
27
  getModel() {
28
- return this.model.getResolvedModel();
28
+ return this.model.getModel();
29
29
  }
30
30
  calculateUsageAndCost(usageMetadata) {
31
31
  let usage;
@@ -1,9 +1,9 @@
1
1
  import { Ollama } from "ollama";
2
- import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
2
+ import { PromptResult, Result, SmolClient, SmolConfig, StreamChunk } from "../types.js";
3
3
  import { BaseClient } from "./baseClient.js";
4
4
  import { ModelName } from "../models.js";
5
5
  export declare const DEFAULT_OLLAMA_HOST = "http://localhost:11434";
6
- export type SmolOllamaConfig = BaseClientConfig;
6
+ export type SmolOllamaConfig = SmolConfig;
7
7
  export declare class SmolOllama extends BaseClient implements SmolClient {
8
8
  private logger;
9
9
  private model;
@@ -12,6 +12,6 @@ export declare class SmolOllama extends BaseClient implements SmolClient {
12
12
  getClient(): Ollama;
13
13
  getModel(): ModelName;
14
14
  private calculateUsageAndCost;
15
- _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
16
- _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
15
+ _textSync(config: SmolConfig): Promise<Result<PromptResult>>;
16
+ _textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
17
17
  }
@@ -31,7 +31,7 @@ export class SmolOllama extends BaseClient {
31
31
  return this.client;
32
32
  }
33
33
  getModel() {
34
- return this.model.getResolvedModel();
34
+ return this.model.getModel();
35
35
  }
36
36
  calculateUsageAndCost(responseData) {
37
37
  let usage;
@@ -1,8 +1,8 @@
1
1
  import OpenAI from "openai";
2
- import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
2
+ import { PromptResult, Result, SmolClient, SmolConfig, StreamChunk } from "../types.js";
3
3
  import { BaseClient } from "./baseClient.js";
4
4
  import { ModelName } from "../models.js";
5
- export type SmolOpenAiConfig = BaseClientConfig;
5
+ export type SmolOpenAiConfig = SmolConfig;
6
6
  export declare class SmolOpenAi extends BaseClient implements SmolClient {
7
7
  private client;
8
8
  private logger;
@@ -13,6 +13,6 @@ export declare class SmolOpenAi extends BaseClient implements SmolClient {
13
13
  private calculateUsageAndCost;
14
14
  private buildRequest;
15
15
  private rethrowAsSmolError;
16
- _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
17
- _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
16
+ _textSync(config: SmolConfig): Promise<Result<PromptResult>>;
17
+ _textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
18
18
  }
@@ -24,7 +24,7 @@ export class SmolOpenAi extends BaseClient {
24
24
  return this.client;
25
25
  }
26
26
  getModel() {
27
- return this.model.getResolvedModel();
27
+ return this.model.getModel();
28
28
  }
29
29
  calculateUsageAndCost(usageData) {
30
30
  let usage;
@@ -1,8 +1,8 @@
1
1
  import OpenAI from "openai";
2
- import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
2
+ import { PromptResult, Result, SmolClient, SmolConfig, StreamChunk } from "../types.js";
3
3
  import { BaseClient } from "./baseClient.js";
4
4
  import { ModelName } from "../models.js";
5
- export type SmolOpenAiResponsesConfig = BaseClientConfig;
5
+ export type SmolOpenAiResponsesConfig = SmolConfig;
6
6
  export declare class SmolOpenAiResponses extends BaseClient implements SmolClient {
7
7
  private client;
8
8
  private logger;
@@ -14,6 +14,6 @@ export declare class SmolOpenAiResponses extends BaseClient implements SmolClien
14
14
  private buildRequest;
15
15
  private calculateUsageAndCost;
16
16
  private rethrowAsSmolError;
17
- _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
18
- _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
17
+ _textSync(config: SmolConfig): Promise<Result<PromptResult>>;
18
+ _textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
19
19
  }