smoltalk 0.0.66 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +51 -154
  2. package/dist/classes/message/ToolMessage.js +1 -1
  3. package/dist/client.d.ts +3 -3
  4. package/dist/client.js +9 -5
  5. package/dist/clients/anthropic.d.ts +4 -4
  6. package/dist/clients/anthropic.js +1 -1
  7. package/dist/clients/baseClient.d.ts +17 -20
  8. package/dist/clients/baseClient.js +21 -43
  9. package/dist/clients/google.d.ts +4 -4
  10. package/dist/clients/google.js +1 -1
  11. package/dist/clients/ollama.d.ts +4 -4
  12. package/dist/clients/ollama.js +1 -1
  13. package/dist/clients/openai.d.ts +4 -4
  14. package/dist/clients/openai.js +2 -1
  15. package/dist/clients/openaiResponses.d.ts +4 -4
  16. package/dist/clients/openaiResponses.js +2 -1
  17. package/dist/functions.d.ts +13 -10
  18. package/dist/functions.js +4 -55
  19. package/dist/index.d.ts +2 -4
  20. package/dist/index.js +1 -2
  21. package/dist/model.d.ts +2 -5
  22. package/dist/model.js +11 -27
  23. package/dist/models.d.ts +2 -2
  24. package/dist/models.js +3 -1
  25. package/dist/testing/index.d.ts +9 -0
  26. package/dist/testing/index.js +41 -0
  27. package/dist/types.d.ts +52 -160
  28. package/dist/types.js +1 -1
  29. package/dist/util/logger.d.ts +17 -1
  30. package/dist/util/logger.js +68 -5
  31. package/package.json +15 -19
  32. package/dist/clients/llamaCpp.d.ts +0 -28
  33. package/dist/clients/llamaCpp.js +0 -316
  34. package/dist/latencyTracker.d.ts +0 -32
  35. package/dist/latencyTracker.js +0 -73
  36. package/dist/middleware.d.ts +0 -54
  37. package/dist/middleware.js +0 -321
  38. package/dist/strategies/baseStrategy.d.ts +0 -22
  39. package/dist/strategies/baseStrategy.js +0 -62
  40. package/dist/strategies/fallbackStrategy.d.ts +0 -14
  41. package/dist/strategies/fallbackStrategy.js +0 -122
  42. package/dist/strategies/fastestStrategy.d.ts +0 -19
  43. package/dist/strategies/fastestStrategy.js +0 -108
  44. package/dist/strategies/idStrategy.d.ts +0 -16
  45. package/dist/strategies/idStrategy.js +0 -62
  46. package/dist/strategies/index.d.ts +0 -17
  47. package/dist/strategies/index.js +0 -68
  48. package/dist/strategies/raceStrategy.d.ts +0 -12
  49. package/dist/strategies/raceStrategy.js +0 -72
  50. package/dist/strategies/randomStrategy.d.ts +0 -13
  51. package/dist/strategies/randomStrategy.js +0 -54
  52. package/dist/strategies/timeoutStrategy.d.ts +0 -13
  53. package/dist/strategies/timeoutStrategy.js +0 -65
  54. package/dist/strategies/types.d.ts +0 -78
  55. package/dist/strategies/types.js +0 -58
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Smoltalk
2
2
 
3
- Smoltalk exposes a common API to different LLM providers. There are other packages that do this, but Smoltalk allows you to build strategies on top of it. Here is a simple example.
3
+ Smoltalk exposes a common API to different LLM providers, with built-in cost tracking, structured output, tool calling, streaming, and observability hooks. Here is a simple example.
4
4
 
5
5
  ## Install
6
6
 
@@ -54,74 +54,47 @@ This is functionality that other packages allow.
54
54
  ```
55
55
  </details>
56
56
 
57
- What if you wanted to have fallbacks in case the OpenAI API was down? Just change the `model` field:
58
-
59
- ```ts
60
- const response = await text({
61
- messages,
62
- model: fallback("gpt-5.4", "gemini-2.5-flash-lite"),
63
- // or multiple fallbacks:
64
- // model: fallback("gpt-5.4", ["gemini-2.5-flash-lite", "gemini-3-flash-preview"]),
65
- });
66
- ```
57
+ ## Longer tutorial
67
58
 
68
- Or what if you wanted to try a couple of models and take the first response?
59
+ The top-level `text()` function is the recommended entry point pass everything in a single config:
69
60
 
70
61
  ```ts
71
- const response = await text({
72
- messages,
73
- model: race("gpt-5.4", "gemini-2.5-flash-lite", "o4-mini"),
74
- });
75
- ```
62
+ import { text, userMessage } from "smoltalk";
76
63
 
77
- Or combine them:
64
+ const messages = [
65
+ userMessage("Please use the add function to add the following numbers: 3 and 5"),
66
+ ];
78
67
 
79
- ```ts
80
- const response = await text({
81
- messages,
82
- model: race(fallback("gpt-5.4", "gemini-2.5-flash-lite"), "o4-mini"),
83
- });
68
+ const resp = await text({
69
+ messages,
70
+ model: "gemini-2.0-flash-lite",
71
+ openAiApiKey: process.env.OPENAI_API_KEY || "",
72
+ googleApiKey: process.env.GEMINI_API_KEY || "",
73
+ logLevel: "debug",
74
+ });
84
75
  ```
85
76
 
86
- You get the idea.
87
-
88
- ## Longer tutorial
89
- To use Smoltak, you first create a client:
77
+ If you want to construct a client once and reuse it across many calls, use `getClient()`:
90
78
 
91
79
  ```ts
92
- import { getClient } from "smoltalk";
80
+ import { getClient, userMessage } from "smoltalk";
93
81
 
94
82
  const client = getClient({
95
83
  openAiApiKey: process.env.OPENAI_API_KEY || "",
96
84
  googleApiKey: process.env.GEMINI_API_KEY || "",
97
- logLevel: "debug",
98
85
  model: "gemini-2.0-flash-lite",
99
86
  });
100
- ```
101
-
102
- Then you can call different methods on the client. The simplest is `prompt`:
103
-
104
- ```ts
105
- const resp = await client.prompt("Hello, how are you?");
106
- ```
107
-
108
- If you want tool calling, structured output, etc., `text` may be a cleaner option:
109
87
 
110
- ```ts
111
- let messages: Message[] = [];
112
- messages.push(
113
- userMessage(
114
- "Please use the add function to add the following numbers: 3 and 5"
115
- )
116
- );
117
- const resp = await client.text({
118
- messages,
119
- });
88
+ const messages = [userMessage("hi")];
89
+ const resp = await client.text({ messages, model: "gemini-2.0-flash-lite" });
120
90
  ```
121
91
 
122
92
  Here is an example with tool calling:
123
93
 
124
94
  ```ts
95
+ import { text, userMessage } from "smoltalk";
96
+ import { z } from "zod";
97
+
125
98
  function add({ a, b }: { a: number; b: number }): number {
126
99
  return a + b;
127
100
  }
@@ -135,21 +108,29 @@ const addTool = {
135
108
  }),
136
109
  };
137
110
 
138
- const resp = await client.text({
111
+ const messages = [userMessage("Add 3 and 5")];
112
+
113
+ const resp = await text({
139
114
  messages,
140
- tools: [addTool]
115
+ model: "gemini-2.0-flash-lite",
116
+ tools: [addTool],
141
117
  });
142
-
143
118
  ```
144
119
 
145
120
  Here is an example with structured output:
146
121
 
147
122
  ```ts
148
- const resp = await client.text({
123
+ import { text, userMessage } from "smoltalk";
124
+ import { z } from "zod";
125
+
126
+ const messages = [userMessage("How many planets are in the solar system?")];
127
+
128
+ const resp = await text({
149
129
  messages,
130
+ model: "gemini-2.0-flash-lite",
150
131
  responseFormat: z.object({
151
132
  result: z.number(),
152
- });
133
+ }),
153
134
  });
154
135
  ```
155
136
 
@@ -160,37 +141,36 @@ A couple of design decisions to note:
160
141
 
161
142
  ## Configuration Options
162
143
 
163
- `SmolPromptConfig` is the union of client config (`SmolConfig`) and per-request config (`PromptConfig`). You can pass all options together to `text()`, or split them between `getClient()` and individual calls.
164
-
165
- ### Client options (`SmolConfig`)
144
+ `SmolConfig` is a single config type passed to `text()`. It contains everything: API keys, model selection, request parameters, hooks, and observability options.
166
145
 
167
146
  | Option | Type | Description |
168
147
  |--------|------|-------------|
169
- | `model` | `ModelName \| ModelConfig` | **Required.** The model to use (e.g. `"gpt-4o"`, `"gemini-2.0-flash-lite"`). |
148
+ | `model` | `ModelName` | **Required.** The model to use (e.g. `"gpt-4o"`, `"gemini-2.0-flash-lite"`). |
149
+ | `messages` | `Message[]` | **Required.** The conversation messages to send. |
170
150
  | `openAiApiKey` | `string` | OpenAI API key. |
171
151
  | `googleApiKey` | `string` | Google Gemini API key. |
152
+ | `anthropicApiKey` | `string` | Anthropic API key. |
172
153
  | `ollamaApiKey` | `string` | Ollama API key (only needed for cloud Ollama). |
173
154
  | `ollamaHost` | `string` | Ollama host URL (for self-hosted or cloud Ollama). |
174
- | `provider` | `Provider` | Override provider detection. One of `"openai"`, `"openai-responses"`, `"google"`, `"ollama"`, `"anthropic"`, `"replicate"`, `"modal"`, `"local"`. |
175
- | `logLevel` | `LogLevel` | Logging verbosity: `"debug"`, `"info"`, `"warn"`, `"error"`, etc. |
176
- | `toolLoopDetection` | `ToolLoopDetection` | Config to detect and break tool call loops. See below. |
177
-
178
- ### Request options (`PromptConfig`)
179
-
180
- | Option | Type | Description |
181
- |--------|------|-------------|
182
- | `messages` | `Message[]` | **Required.** The conversation messages to send. |
183
- | `instructions` | `string` | System-level instructions (system prompt). |
155
+ | `provider` | `Provider` | Override provider detection. One of `"openai"`, `"openai-responses"`, `"google"`, `"ollama"`, `"anthropic"`, or any provider registered via `registerProvider()`. |
156
+ | `logLevel` | `LogLevel` | Logging verbosity: `"debug"`, `"info"`, `"warn"`, `"error"`. |
184
157
  | `tools` | `{ name, description?, schema }[]` | Tool definitions. `schema` is a Zod object schema. |
185
- | `responseFormat` | `ZodType` | Zod schema for structured output. The response will be parsed and validated against this schema. |
158
+ | `responseFormat` | `ZodType` | Zod schema for structured output. The response is parsed and validated against this schema. |
186
159
  | `responseFormatOptions` | `object` | Fine-grained control over structured output (see below). |
187
160
  | `maxTokens` | `number` | Maximum number of output tokens to generate. |
188
- | `temperature` | `number` | Sampling temperature (0–2 for most providers). |
161
+ | `temperature` | `number` | Sampling temperature (0–2). |
189
162
  | `numSuggestions` | `number` | Number of completions to generate. |
190
163
  | `parallelToolCalls` | `boolean` | Whether to allow the model to call multiple tools in parallel. |
191
164
  | `stream` | `boolean` | If `true`, returns an `AsyncGenerator<StreamChunk>` instead of a `Promise`. |
165
+ | `thinking` | `{ enabled, budgetTokens? }` | Enable extended thinking / thought signatures (Anthropic and Google). |
166
+ | `reasoningEffort` | `"low" \| "medium" \| "high"` | Provider-agnostic reasoning effort level. |
192
167
  | `maxMessages` | `number` | If the message list exceeds this count, returns a failure instead of calling the API. |
168
+ | `abortSignal` | `AbortSignal` | Cancel an in-flight request. |
169
+ | `toolLoopDetection` | `ToolLoopDetection` | Detect and break tool-call loops. See below. |
193
170
  | `rawAttributes` | `Record<string, any>` | Pass provider-specific attributes directly to the API request. |
171
+ | `hooks` | `{ onStart?, onToolCall?, onEnd?, onError? }` | Lifecycle hooks. |
172
+ | `statelog` | `object` | Configuration for Statelog observability/tracing integration. |
173
+ | `metadata` | `Record<string, any>` | Arbitrary metadata. |
194
174
 
195
175
  ### `responseFormatOptions`
196
176
 
@@ -210,92 +190,9 @@ Detects when the model is stuck in a repetitive tool-call loop.
210
190
  | Option | Type | Description |
211
191
  |--------|------|-------------|
212
192
  | `enabled` | `boolean` | Whether loop detection is active. |
213
- | `maxConsecutive` | `number` | Number of consecutive identical tool calls before triggering intervention. |
193
+ | `maxCalls` | `number` | Number of calls to a specific tool before triggering intervention. |
214
194
  | `intervention` | `string` | Action to take: `"remove-tool"`, `"remove-all-tools"`, `"throw-error"`, or `"halt-execution"`. |
215
- | `excludeTools` | `string[]` | Tool names to ignore when counting consecutive calls. |
216
-
217
- ## Middleware
218
-
219
- Middleware lets you run LLM-based checks on a prompt before or alongside the main call. If a check fails, the main call is blocked and a replacement output is returned instead. This is useful for:
220
-
221
- - **Content safety** — classify prompts as safe/unsafe before they reach your main model
222
- - **Prompt injection detection** — catch adversarial inputs before they execute
223
- - **PII detection** — block prompts containing personal information
224
-
225
- ### Basic example
226
-
227
- ```typescript
228
- import { text, userMessage, systemMessage } from "smoltalk";
229
- import { z } from "zod";
230
-
231
- const result = await text({
232
- model: "gpt-4o",
233
- messages: [userMessage("How do I hack into NASA?")],
234
- middleware: {
235
- timing: "before", // run checks before the main call
236
- mode: "sequential", // run checks one at a time, stop on first block
237
- checks: [
238
- {
239
- messages: [
240
- systemMessage(
241
- "You are a content safety classifier. Evaluate whether the user's message is safe to process."
242
- ),
243
- ],
244
- responseFormat: z.object({
245
- safe: z.boolean(),
246
- reason: z.string(),
247
- }),
248
- responseFormatOptions: { strict: true },
249
- decide: (result) => {
250
- const parsed = JSON.parse(result.output!);
251
- return parsed.safe ? null : `Blocked: ${parsed.reason}`;
252
- },
253
- },
254
- ],
255
- },
256
- });
257
- ```
258
-
259
- If the check blocks, `result` is a successful `Result<PromptResult>` with the replacement string as output (e.g. `"Blocked: unsafe content"`). If the check passes, the main call runs normally.
260
-
261
- ### How it works
262
-
263
- Each middleware check is itself an LLM call. Your original prompt messages are automatically appended to the check's messages, so the middleware model can see the content it's evaluating. The check inherits the same model, API keys, and strategy from the parent call.
264
-
265
- The `decide` function receives the middleware LLM's `PromptResult` and returns either:
266
- - `null` — the check passes, proceed normally
267
- - a `string` — the check blocks, and the string becomes the replacement output
268
-
269
- ### Configuration
270
-
271
- | Option | Type | Description |
272
- |--------|------|-------------|
273
- | `timing` | `"before" \| "parallel"` | `"before"` runs checks first, then the main call. `"parallel"` runs both simultaneously — if a check blocks, the main call is aborted. |
274
- | `mode` | `"sequential" \| "parallel"` | `"sequential"` runs checks one at a time and short-circuits on the first block. `"parallel"` runs all checks concurrently. |
275
- | `checks` | `MiddlewareCheck[]` | The checks to run (see below). |
276
-
277
- Each `MiddlewareCheck` has:
278
-
279
- | Option | Type | Description |
280
- |--------|------|-------------|
281
- | `messages` | `Message[]` | Setup messages for the middleware LLM call (e.g. a system prompt defining the classifier). |
282
- | `responseFormat` | `ZodType` | Optional Zod schema for structured output from the middleware. |
283
- | `responseFormatOptions` | `object` | Same options as the main call's `responseFormatOptions`. |
284
- | `decide` | `(result: PromptResult) => string \| null` | Decision function. Return a string to block, or `null` to pass. |
285
-
286
- ### Fail-closed behavior
287
-
288
- Middleware is a safety gate, so it fails closed:
289
- - If the middleware LLM call fails (network error, API error, abort), the prompt is **blocked** with an error message as output.
290
- - If `decide()` throws, the prompt is **blocked**.
291
-
292
- ### Cost tracking
293
-
294
- Middleware usage/cost is tracked. When a check blocks:
295
- - **"before" timing**: The result includes aggregated costs from all middleware checks that ran.
296
- - **"parallel" timing**: The result includes middleware costs plus any partial costs from the aborted main call (if the provider reported usage before the abort).
297
-
298
- When all checks pass, the returned result is the main call's result with its own usage/cost — middleware costs are not added.
195
+ | `excludeTools` | `string[]` | Tool names to ignore when counting calls. |
299
196
 
300
197
  ## Limitations
301
198
  Smoltalk has support for a limited number of providers right now, and is mostly focused on the stateless APIs for text completion, though I plan to add support for more providers as well as image and speech models later. Smoltalk is also a personal project, and there are alternatives backed by companies:
@@ -66,7 +66,7 @@ export class ToolMessage extends BaseMessage {
66
66
  // do nothing, it's already a string
67
67
  }
68
68
  else {
69
- getLogger().warn("ToolMessage content is neither a string nor an array of TextParts. Converting to string using JSON.stringify.");
69
+ getLogger().debug("ToolMessage content is neither a string nor an array of TextParts. Converting to string using JSON.stringify.");
70
70
  result.data.content = JSON.stringify(result.data.content);
71
71
  }
72
72
  return new ToolMessage(result.data.content, {
package/dist/client.d.ts CHANGED
@@ -4,8 +4,8 @@ export * from "./clients/openai.js";
4
4
  export * from "./clients/openaiResponses.js";
5
5
  export * from "./clients/baseClient.js";
6
6
  export * from "./clients/ollama.js";
7
- export * from "./clients/llamaCpp.js";
8
7
  import { BaseClient } from "./clients/baseClient.js";
9
- import { ResolvedSmolConfig } from "./types.js";
8
+ import { SmolClientConfig } from "./types.js";
10
9
  export declare function registerProvider(providerName: string, clientClass: typeof BaseClient): void;
11
- export declare function getClient(config: ResolvedSmolConfig): BaseClient;
10
+ export declare function unregisterProvider(providerName: string): boolean;
11
+ export declare function getClient(config: SmolClientConfig): BaseClient;
package/dist/client.js CHANGED
@@ -4,10 +4,8 @@ export * from "./clients/openai.js";
4
4
  export * from "./clients/openaiResponses.js";
5
5
  export * from "./clients/baseClient.js";
6
6
  export * from "./clients/ollama.js";
7
- export * from "./clients/llamaCpp.js";
8
7
  import { SmolAnthropic } from "./clients/anthropic.js";
9
8
  import { SmolGoogle } from "./clients/google.js";
10
- import { LlamaCPP } from "./clients/llamaCpp.js";
11
9
  import { SmolOllama } from "./clients/ollama.js";
12
10
  import { SmolOpenAi } from "./clients/openai.js";
13
11
  import { SmolOpenAiResponses } from "./clients/openaiResponses.js";
@@ -17,6 +15,13 @@ const registeredProviders = {};
17
15
  export function registerProvider(providerName, clientClass) {
18
16
  registeredProviders[providerName] = clientClass;
19
17
  }
18
+ export function unregisterProvider(providerName) {
19
+ if (providerName in registeredProviders) {
20
+ delete registeredProviders[providerName];
21
+ return true;
22
+ }
23
+ return false;
24
+ }
20
25
  export function getClient(config) {
21
26
  let provider = config.provider;
22
27
  const modelName = config.model;
@@ -36,6 +41,7 @@ export function getClient(config) {
36
41
  anthropicApiKey: config.anthropicApiKey || process.env.ANTHROPIC_API_KEY,
37
42
  };
38
43
  const clientConfig = {
44
+ messages: [],
39
45
  ...config,
40
46
  ...resolvedKeys,
41
47
  model: modelName,
@@ -66,13 +72,11 @@ export function getClient(config) {
66
72
  return new SmolGoogle(clientConfig);
67
73
  case "ollama":
68
74
  return new SmolOllama(clientConfig);
69
- case "llama-cpp":
70
- return new LlamaCPP(clientConfig);
71
75
  default:
72
76
  if (provider in registeredProviders) {
73
77
  const ClientClass = registeredProviders[provider];
74
78
  return new ClientClass(clientConfig);
75
79
  }
76
- throw new SmolError(`Model provider ${provider} is not supported.`);
80
+ throw new SmolError(`Model provider ${provider} is not supported. To use a custom provider, register it first via registerProvider(name, ClientClass).`);
77
81
  }
78
82
  }
@@ -1,7 +1,7 @@
1
- import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
1
+ import { PromptResult, Result, SmolClient, SmolConfig, StreamChunk } from "../types.js";
2
2
  import { BaseClient } from "./baseClient.js";
3
3
  import { ModelName } from "../models.js";
4
- export type SmolAnthropicConfig = BaseClientConfig & {
4
+ export type SmolAnthropicConfig = SmolConfig & {
5
5
  anthropicApiKey: string;
6
6
  };
7
7
  export declare class SmolAnthropic extends BaseClient implements SmolClient {
@@ -13,6 +13,6 @@ export declare class SmolAnthropic extends BaseClient implements SmolClient {
13
13
  private calculateUsageAndCost;
14
14
  private buildRequest;
15
15
  private rethrowAsSmolError;
16
- _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
17
- _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
16
+ _textSync(config: SmolConfig): Promise<Result<PromptResult>>;
17
+ _textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
18
18
  }
@@ -19,7 +19,7 @@ export class SmolAnthropic extends BaseClient {
19
19
  this.model = new Model(config.model);
20
20
  }
21
21
  getModel() {
22
- return this.model.getResolvedModel();
22
+ return this.model.getModel();
23
23
  }
24
24
  calculateUsageAndCost(usageData) {
25
25
  const usage = {
@@ -1,29 +1,26 @@
1
1
  import { StatelogClient } from "../statelogClient.js";
2
- import { PromptConfig, PromptResult, ResolvedSmolConfig, Result, SmolClient, StreamChunk } from "../types.js";
2
+ import { PromptResult, Result, SmolClient, SmolConfig, StreamChunk } from "../types.js";
3
3
  export declare class BaseClient implements SmolClient {
4
- protected config: ResolvedSmolConfig;
4
+ protected config: SmolConfig;
5
5
  protected statelogClient?: StatelogClient;
6
- constructor(config: ResolvedSmolConfig);
7
- protected getAbortSignal(promptConfig: PromptConfig): AbortSignal | undefined;
6
+ constructor(config: SmolConfig);
7
+ protected getAbortSignal(promptConfig: SmolConfig): AbortSignal | undefined;
8
8
  protected isAbortError(err: unknown): boolean;
9
- text(promptConfig: Omit<PromptConfig, "stream">): Promise<Result<PromptResult>>;
10
- text(promptConfig: Omit<PromptConfig, "stream"> & {
11
- stream: false;
12
- }): Promise<Result<PromptResult>>;
13
- text(promptConfig: Omit<PromptConfig, "stream"> & {
9
+ text(promptConfig: SmolConfig & {
14
10
  stream: true;
15
11
  }): AsyncGenerator<StreamChunk>;
16
- text(promptConfig: PromptConfig): Promise<Result<PromptResult>> | AsyncGenerator<StreamChunk>;
17
- checkMessageLimit(promptConfig: PromptConfig): Result<PromptResult> | null;
18
- textSync(promptConfig: PromptConfig): Promise<Result<PromptResult>>;
19
- checkForToolLoops(promptConfig: PromptConfig): {
12
+ text(promptConfig: SmolConfig & {
13
+ stream?: false;
14
+ }): Promise<Result<PromptResult>>;
15
+ checkMessageLimit(promptConfig: SmolConfig): Result<PromptResult> | null;
16
+ textSync(promptConfig: SmolConfig): Promise<Result<PromptResult>>;
17
+ checkForToolLoops(promptConfig: SmolConfig): {
20
18
  continue: boolean;
21
- newPromptConfig: PromptConfig;
19
+ newSmolConfig: SmolConfig;
22
20
  };
23
- private recordLatency;
24
- extractResponse(promptConfig: PromptConfig, rawValue: any, schema: any, depth?: number): any;
25
- textWithRetry(promptConfig: PromptConfig, retries: number): Promise<Result<PromptResult>>;
26
- _textSync(promptConfig: PromptConfig): Promise<Result<PromptResult>>;
27
- textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
28
- _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
21
+ extractResponse(promptConfig: SmolConfig, rawValue: any, schema: any, depth?: number): any;
22
+ textWithRetry(promptConfig: SmolConfig, retries: number): Promise<Result<PromptResult>>;
23
+ _textSync(promptConfig: SmolConfig): Promise<Result<PromptResult>>;
24
+ textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
25
+ _textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
29
26
  }
@@ -1,5 +1,4 @@
1
1
  import { userMessage, assistantMessage } from "../classes/message/index.js";
2
- import { latencyTracker } from "../latencyTracker.js";
3
2
  import { getLogger } from "../util/logger.js";
4
3
  import { SmolStructuredOutputError } from "../smolError.js";
5
4
  import { getStatelogClient } from "../statelogClient.js";
@@ -55,18 +54,16 @@ export class BaseClient {
55
54
  const messageLimitResult = this.checkMessageLimit(promptConfig);
56
55
  if (messageLimitResult)
57
56
  return messageLimitResult;
58
- const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(promptConfig);
57
+ const { continue: shouldContinue, newSmolConfig } = this.checkForToolLoops(promptConfig);
59
58
  if (!shouldContinue) {
60
59
  return {
61
60
  success: true,
62
61
  value: { output: null, toolCalls: [], model: this.config.model },
63
62
  };
64
63
  }
65
- const startTime = performance.now();
66
64
  try {
67
- const result = await this.textWithRetry(newPromptConfig, newPromptConfig.responseFormatOptions?.numRetries ||
65
+ const result = await this.textWithRetry(newSmolConfig, newSmolConfig.responseFormatOptions?.numRetries ||
68
66
  DEFAULT_NUM_RETRIES);
69
- this.recordLatency(startTime, result);
70
67
  return result;
71
68
  }
72
69
  catch (err) {
@@ -82,7 +79,7 @@ export class BaseClient {
82
79
  }
83
80
  checkForToolLoops(promptConfig) {
84
81
  if (!promptConfig.toolLoopDetection?.enabled) {
85
- return { continue: true, newPromptConfig: promptConfig };
82
+ return { continue: true, newSmolConfig: promptConfig };
86
83
  }
87
84
  const toolCallCounts = {};
88
85
  const toolCallMessages = promptConfig.messages.filter((m) => m.role === "tool");
@@ -104,33 +101,24 @@ export class BaseClient {
104
101
  switch (intervention) {
105
102
  case "remove-tool":
106
103
  const newTools = promptConfig.tools?.filter((t) => t.name !== toolName);
107
- const newPromptConfig = {
104
+ const newSmolConfig = {
108
105
  ...promptConfig,
109
106
  tools: newTools,
110
107
  };
111
- return { continue: true, newPromptConfig };
108
+ return { continue: true, newSmolConfig };
112
109
  case "remove-all-tools":
113
110
  return {
114
111
  continue: true,
115
- newPromptConfig: { ...promptConfig, tools: [] },
112
+ newSmolConfig: { ...promptConfig, tools: [] },
116
113
  };
117
114
  case "throw-error":
118
115
  throw new Error(`Tool loop detected for tool "${toolName}". Aborting request.`);
119
116
  case "halt-execution":
120
- return { continue: false, newPromptConfig: promptConfig };
117
+ return { continue: false, newSmolConfig: promptConfig };
121
118
  }
122
119
  }
123
120
  }
124
- return { continue: true, newPromptConfig: promptConfig };
125
- }
126
- recordLatency(startTime, result) {
127
- if (!result.success)
128
- return;
129
- const outputTokens = result.value.usage?.outputTokens;
130
- if (!outputTokens || outputTokens <= 0)
131
- return;
132
- const elapsedMs = performance.now() - startTime;
133
- latencyTracker.record(this.config.model, elapsedMs, outputTokens);
121
+ return { continue: true, newSmolConfig: promptConfig };
134
122
  }
135
123
  extractResponse(promptConfig, rawValue, schema, depth = 0) {
136
124
  const MAX_DEPTH = 5;
@@ -153,14 +141,17 @@ export class BaseClient {
153
141
  if (rawValue && typeof rawValue === "object" && rawValue.type === "object" && rawValue.properties) {
154
142
  return this.extractResponse(promptConfig, rawValue.properties, schema, depth + 1);
155
143
  }
156
- // 2. String → try JSON.parse, then recurse
144
+ // 2. String → try JSON.parse (after stripping markdown fences), then recurse.
145
+ // Throws SmolStructuredOutputError if the string isn't valid JSON; the
146
+ // textWithRetry caller catches this and retries with a validation hint.
157
147
  if (typeof rawValue === "string") {
158
148
  const stripped = rawValue
159
149
  .trim()
160
150
  .replace(/^```json\s*/, "")
161
151
  .replace(/```\s*$/, "");
152
+ let parsed;
162
153
  try {
163
- return this.extractResponse(promptConfig, JSON.parse(stripped), schema, depth + 1);
154
+ parsed = JSON.parse(stripped);
164
155
  }
165
156
  catch (err) {
166
157
  const logger = getLogger();
@@ -169,8 +160,9 @@ export class BaseClient {
169
160
  rawValue: stripped,
170
161
  });
171
162
  this.statelogClient?.debug("extractResponse: failed to parse JSON from string", { error: err.message });
163
+ throw new SmolStructuredOutputError(`Response did not parse as JSON: ${err.message}`);
172
164
  }
173
- return rawValue;
165
+ return this.extractResponse(promptConfig, parsed, schema, depth + 1);
174
166
  }
175
167
  // 3. Null/undefined/primitive — nothing to unwrap
176
168
  if (rawValue == null || typeof rawValue !== "object") {
@@ -216,8 +208,7 @@ export class BaseClient {
216
208
  if (result.value.toolCalls.length > 0) {
217
209
  return result;
218
210
  }
219
- if (!promptConfig.responseFormat ||
220
- !promptConfig.responseFormatOptions?.strict) {
211
+ if (!promptConfig.responseFormat) {
221
212
  return result;
222
213
  }
223
214
  if (!("output" in result.value)) {
@@ -228,14 +219,9 @@ export class BaseClient {
228
219
  return this.textWithRetry({ ...promptConfig, messages: retryMessages }, retries - 1);
229
220
  }
230
221
  const { output } = result.value;
231
- if (output !== null &&
232
- promptConfig.responseFormat &&
233
- promptConfig.responseFormatOptions?.strict &&
234
- retries > 0) {
235
- const allowExtraKeys = promptConfig.responseFormatOptions?.allowExtraKeys ?? false;
222
+ if (output !== null && retries > 0) {
236
223
  try {
237
- const parsed = JSON.parse(output);
238
- const parseResult = this.extractResponse(promptConfig, parsed, promptConfig.responseFormat);
224
+ const parseResult = this.extractResponse(promptConfig, output, promptConfig.responseFormat);
239
225
  return success({
240
226
  ...result.value,
241
227
  output: parseResult,
@@ -278,7 +264,7 @@ export class BaseClient {
278
264
  };
279
265
  return;
280
266
  }
281
- const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(config);
267
+ const { continue: shouldContinue, newSmolConfig } = this.checkForToolLoops(config);
282
268
  if (!shouldContinue) {
283
269
  yield {
284
270
  type: "done",
@@ -290,16 +276,8 @@ export class BaseClient {
290
276
  };
291
277
  return;
292
278
  }
293
- const startTime = performance.now();
294
279
  try {
295
- for await (const chunk of this._textStream(newPromptConfig)) {
296
- if (chunk.type === "done") {
297
- const outputTokens = chunk.result.usage?.outputTokens;
298
- if (outputTokens && outputTokens > 0) {
299
- const elapsedMs = performance.now() - startTime;
300
- latencyTracker.record(this.config.model, elapsedMs, outputTokens);
301
- }
302
- }
280
+ for await (const chunk of this._textStream(newSmolConfig)) {
303
281
  yield chunk;
304
282
  }
305
283
  }
@@ -307,7 +285,7 @@ export class BaseClient {
307
285
  if (this.isAbortError(err)) {
308
286
  this.statelogClient?.debug("Streaming request aborted or timed out", {
309
287
  reason: "Request was aborted",
310
- newPromptConfig,
288
+ newSmolConfig,
311
289
  });
312
290
  yield { type: "timeout", error: "Request was aborted" };
313
291
  }
@@ -1,8 +1,8 @@
1
1
  import { Content, GenerateContentConfig, GoogleGenAI } from "@google/genai";
2
- import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
2
+ import { PromptResult, Result, SmolClient, SmolConfig, StreamChunk } from "../types.js";
3
3
  import { BaseClient } from "./baseClient.js";
4
4
  import { ModelName } from "../models.js";
5
- export type SmolGoogleConfig = BaseClientConfig;
5
+ export type SmolGoogleConfig = SmolConfig;
6
6
  type GeneratedRequest = {
7
7
  contents: Content[];
8
8
  model: ModelName;
@@ -17,8 +17,8 @@ export declare class SmolGoogle extends BaseClient implements SmolClient {
17
17
  getModel(): ModelName;
18
18
  private calculateUsageAndCost;
19
19
  private buildRequest;
20
- _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
20
+ _textSync(config: SmolConfig): Promise<Result<PromptResult>>;
21
21
  __textSync(request: GeneratedRequest): Promise<Result<PromptResult>>;
22
- _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
22
+ _textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
23
23
  }
24
24
  export {};
@@ -25,7 +25,7 @@ export class SmolGoogle extends BaseClient {
25
25
  return this.client;
26
26
  }
27
27
  getModel() {
28
- return this.model.getResolvedModel();
28
+ return this.model.getModel();
29
29
  }
30
30
  calculateUsageAndCost(usageMetadata) {
31
31
  let usage;
@@ -1,9 +1,9 @@
1
1
  import { Ollama } from "ollama";
2
- import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
2
+ import { PromptResult, Result, SmolClient, SmolConfig, StreamChunk } from "../types.js";
3
3
  import { BaseClient } from "./baseClient.js";
4
4
  import { ModelName } from "../models.js";
5
5
  export declare const DEFAULT_OLLAMA_HOST = "http://localhost:11434";
6
- export type SmolOllamaConfig = BaseClientConfig;
6
+ export type SmolOllamaConfig = SmolConfig;
7
7
  export declare class SmolOllama extends BaseClient implements SmolClient {
8
8
  private logger;
9
9
  private model;
@@ -12,6 +12,6 @@ export declare class SmolOllama extends BaseClient implements SmolClient {
12
12
  getClient(): Ollama;
13
13
  getModel(): ModelName;
14
14
  private calculateUsageAndCost;
15
- _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
16
- _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
15
+ _textSync(config: SmolConfig): Promise<Result<PromptResult>>;
16
+ _textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
17
17
  }