smoltalk 0.0.67 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -154
- package/dist/client.d.ts +3 -3
- package/dist/client.js +9 -5
- package/dist/clients/anthropic.d.ts +4 -4
- package/dist/clients/anthropic.js +1 -1
- package/dist/clients/baseClient.d.ts +17 -20
- package/dist/clients/baseClient.js +21 -43
- package/dist/clients/google.d.ts +4 -4
- package/dist/clients/google.js +1 -1
- package/dist/clients/ollama.d.ts +4 -4
- package/dist/clients/ollama.js +1 -1
- package/dist/clients/openai.d.ts +4 -4
- package/dist/clients/openai.js +2 -1
- package/dist/clients/openaiResponses.d.ts +4 -4
- package/dist/clients/openaiResponses.js +2 -1
- package/dist/functions.d.ts +13 -10
- package/dist/functions.js +4 -55
- package/dist/index.d.ts +2 -4
- package/dist/index.js +1 -2
- package/dist/model.d.ts +2 -5
- package/dist/model.js +11 -27
- package/dist/models.d.ts +2 -2
- package/dist/models.js +3 -1
- package/dist/testing/index.d.ts +9 -0
- package/dist/testing/index.js +41 -0
- package/dist/types.d.ts +52 -160
- package/dist/types.js +1 -1
- package/dist/util/logger.d.ts +17 -1
- package/dist/util/logger.js +68 -5
- package/package.json +15 -19
- package/dist/clients/llamaCpp.d.ts +0 -28
- package/dist/clients/llamaCpp.js +0 -316
- package/dist/latencyTracker.d.ts +0 -32
- package/dist/latencyTracker.js +0 -73
- package/dist/middleware.d.ts +0 -54
- package/dist/middleware.js +0 -321
- package/dist/strategies/baseStrategy.d.ts +0 -22
- package/dist/strategies/baseStrategy.js +0 -62
- package/dist/strategies/fallbackStrategy.d.ts +0 -14
- package/dist/strategies/fallbackStrategy.js +0 -122
- package/dist/strategies/fastestStrategy.d.ts +0 -19
- package/dist/strategies/fastestStrategy.js +0 -108
- package/dist/strategies/idStrategy.d.ts +0 -16
- package/dist/strategies/idStrategy.js +0 -62
- package/dist/strategies/index.d.ts +0 -17
- package/dist/strategies/index.js +0 -68
- package/dist/strategies/raceStrategy.d.ts +0 -12
- package/dist/strategies/raceStrategy.js +0 -72
- package/dist/strategies/randomStrategy.d.ts +0 -13
- package/dist/strategies/randomStrategy.js +0 -54
- package/dist/strategies/timeoutStrategy.d.ts +0 -13
- package/dist/strategies/timeoutStrategy.js +0 -65
- package/dist/strategies/types.d.ts +0 -78
- package/dist/strategies/types.js +0 -58
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Smoltalk
|
|
2
2
|
|
|
3
|
-
Smoltalk exposes a common API to different LLM providers
|
|
3
|
+
Smoltalk exposes a common API to different LLM providers, with built-in cost tracking, structured output, tool calling, streaming, and observability hooks. Here is a simple example.
|
|
4
4
|
|
|
5
5
|
## Install
|
|
6
6
|
|
|
@@ -54,74 +54,47 @@ This is functionality that other packages allow.
|
|
|
54
54
|
```
|
|
55
55
|
</details>
|
|
56
56
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
```ts
|
|
60
|
-
const response = await text({
|
|
61
|
-
messages,
|
|
62
|
-
model: fallback("gpt-5.4", "gemini-2.5-flash-lite"),
|
|
63
|
-
// or multiple fallbacks:
|
|
64
|
-
// model: fallback("gpt-5.4", ["gemini-2.5-flash-lite", "gemini-3-flash-preview"]),
|
|
65
|
-
});
|
|
66
|
-
```
|
|
57
|
+
## Longer tutorial
|
|
67
58
|
|
|
68
|
-
|
|
59
|
+
The top-level `text()` function is the recommended entry point — pass everything in a single config:
|
|
69
60
|
|
|
70
61
|
```ts
|
|
71
|
-
|
|
72
|
-
messages,
|
|
73
|
-
model: race("gpt-5.4", "gemini-2.5-flash-lite", "o4-mini"),
|
|
74
|
-
});
|
|
75
|
-
```
|
|
62
|
+
import { text, userMessage } from "smoltalk";
|
|
76
63
|
|
|
77
|
-
|
|
64
|
+
const messages = [
|
|
65
|
+
userMessage("Please use the add function to add the following numbers: 3 and 5"),
|
|
66
|
+
];
|
|
78
67
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
68
|
+
const resp = await text({
|
|
69
|
+
messages,
|
|
70
|
+
model: "gemini-2.0-flash-lite",
|
|
71
|
+
openAiApiKey: process.env.OPENAI_API_KEY || "",
|
|
72
|
+
googleApiKey: process.env.GEMINI_API_KEY || "",
|
|
73
|
+
logLevel: "debug",
|
|
74
|
+
});
|
|
84
75
|
```
|
|
85
76
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
## Longer tutorial
|
|
89
|
-
To use Smoltak, you first create a client:
|
|
77
|
+
If you want to construct a client once and reuse it across many calls, use `getClient()`:
|
|
90
78
|
|
|
91
79
|
```ts
|
|
92
|
-
import { getClient } from "smoltalk";
|
|
80
|
+
import { getClient, userMessage } from "smoltalk";
|
|
93
81
|
|
|
94
82
|
const client = getClient({
|
|
95
83
|
openAiApiKey: process.env.OPENAI_API_KEY || "",
|
|
96
84
|
googleApiKey: process.env.GEMINI_API_KEY || "",
|
|
97
|
-
logLevel: "debug",
|
|
98
85
|
model: "gemini-2.0-flash-lite",
|
|
99
86
|
});
|
|
100
|
-
```
|
|
101
|
-
|
|
102
|
-
Then you can call different methods on the client. The simplest is `prompt`:
|
|
103
|
-
|
|
104
|
-
```ts
|
|
105
|
-
const resp = await client.prompt("Hello, how are you?");
|
|
106
|
-
```
|
|
107
|
-
|
|
108
|
-
If you want tool calling, structured output, etc., `text` may be a cleaner option:
|
|
109
87
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
messages.push(
|
|
113
|
-
userMessage(
|
|
114
|
-
"Please use the add function to add the following numbers: 3 and 5"
|
|
115
|
-
)
|
|
116
|
-
);
|
|
117
|
-
const resp = await client.text({
|
|
118
|
-
messages,
|
|
119
|
-
});
|
|
88
|
+
const messages = [userMessage("hi")];
|
|
89
|
+
const resp = await client.text({ messages, model: "gemini-2.0-flash-lite" });
|
|
120
90
|
```
|
|
121
91
|
|
|
122
92
|
Here is an example with tool calling:
|
|
123
93
|
|
|
124
94
|
```ts
|
|
95
|
+
import { text, userMessage } from "smoltalk";
|
|
96
|
+
import { z } from "zod";
|
|
97
|
+
|
|
125
98
|
function add({ a, b }: { a: number; b: number }): number {
|
|
126
99
|
return a + b;
|
|
127
100
|
}
|
|
@@ -135,21 +108,29 @@ const addTool = {
|
|
|
135
108
|
}),
|
|
136
109
|
};
|
|
137
110
|
|
|
138
|
-
const
|
|
111
|
+
const messages = [userMessage("Add 3 and 5")];
|
|
112
|
+
|
|
113
|
+
const resp = await text({
|
|
139
114
|
messages,
|
|
140
|
-
|
|
115
|
+
model: "gemini-2.0-flash-lite",
|
|
116
|
+
tools: [addTool],
|
|
141
117
|
});
|
|
142
|
-
|
|
143
118
|
```
|
|
144
119
|
|
|
145
120
|
Here is an example with structured output:
|
|
146
121
|
|
|
147
122
|
```ts
|
|
148
|
-
|
|
123
|
+
import { text, userMessage } from "smoltalk";
|
|
124
|
+
import { z } from "zod";
|
|
125
|
+
|
|
126
|
+
const messages = [userMessage("How many planets are in the solar system?")];
|
|
127
|
+
|
|
128
|
+
const resp = await text({
|
|
149
129
|
messages,
|
|
130
|
+
model: "gemini-2.0-flash-lite",
|
|
150
131
|
responseFormat: z.object({
|
|
151
132
|
result: z.number(),
|
|
152
|
-
})
|
|
133
|
+
}),
|
|
153
134
|
});
|
|
154
135
|
```
|
|
155
136
|
|
|
@@ -160,37 +141,36 @@ A couple of design decisions to note:
|
|
|
160
141
|
|
|
161
142
|
## Configuration Options
|
|
162
143
|
|
|
163
|
-
`
|
|
164
|
-
|
|
165
|
-
### Client options (`SmolConfig`)
|
|
144
|
+
`SmolConfig` is a single config type passed to `text()`. It contains everything: API keys, model selection, request parameters, hooks, and observability options.
|
|
166
145
|
|
|
167
146
|
| Option | Type | Description |
|
|
168
147
|
|--------|------|-------------|
|
|
169
|
-
| `model` | `ModelName
|
|
148
|
+
| `model` | `ModelName` | **Required.** The model to use (e.g. `"gpt-4o"`, `"gemini-2.0-flash-lite"`). |
|
|
149
|
+
| `messages` | `Message[]` | **Required.** The conversation messages to send. |
|
|
170
150
|
| `openAiApiKey` | `string` | OpenAI API key. |
|
|
171
151
|
| `googleApiKey` | `string` | Google Gemini API key. |
|
|
152
|
+
| `anthropicApiKey` | `string` | Anthropic API key. |
|
|
172
153
|
| `ollamaApiKey` | `string` | Ollama API key (only needed for cloud Ollama). |
|
|
173
154
|
| `ollamaHost` | `string` | Ollama host URL (for self-hosted or cloud Ollama). |
|
|
174
|
-
| `provider` | `Provider` | Override provider detection. One of `"openai"`, `"openai-responses"`, `"google"`, `"ollama"`, `"anthropic"`,
|
|
175
|
-
| `logLevel` | `LogLevel` | Logging verbosity: `"debug"`, `"info"`, `"warn"`, `"error"
|
|
176
|
-
| `toolLoopDetection` | `ToolLoopDetection` | Config to detect and break tool call loops. See below. |
|
|
177
|
-
|
|
178
|
-
### Request options (`PromptConfig`)
|
|
179
|
-
|
|
180
|
-
| Option | Type | Description |
|
|
181
|
-
|--------|------|-------------|
|
|
182
|
-
| `messages` | `Message[]` | **Required.** The conversation messages to send. |
|
|
183
|
-
| `instructions` | `string` | System-level instructions (system prompt). |
|
|
155
|
+
| `provider` | `Provider` | Override provider detection. One of `"openai"`, `"openai-responses"`, `"google"`, `"ollama"`, `"anthropic"`, or any provider registered via `registerProvider()`. |
|
|
156
|
+
| `logLevel` | `LogLevel` | Logging verbosity: `"debug"`, `"info"`, `"warn"`, `"error"`. |
|
|
184
157
|
| `tools` | `{ name, description?, schema }[]` | Tool definitions. `schema` is a Zod object schema. |
|
|
185
|
-
| `responseFormat` | `ZodType` | Zod schema for structured output. The response
|
|
158
|
+
| `responseFormat` | `ZodType` | Zod schema for structured output. The response is parsed and validated against this schema. |
|
|
186
159
|
| `responseFormatOptions` | `object` | Fine-grained control over structured output (see below). |
|
|
187
160
|
| `maxTokens` | `number` | Maximum number of output tokens to generate. |
|
|
188
|
-
| `temperature` | `number` | Sampling temperature (0–2
|
|
161
|
+
| `temperature` | `number` | Sampling temperature (0–2). |
|
|
189
162
|
| `numSuggestions` | `number` | Number of completions to generate. |
|
|
190
163
|
| `parallelToolCalls` | `boolean` | Whether to allow the model to call multiple tools in parallel. |
|
|
191
164
|
| `stream` | `boolean` | If `true`, returns an `AsyncGenerator<StreamChunk>` instead of a `Promise`. |
|
|
165
|
+
| `thinking` | `{ enabled, budgetTokens? }` | Enable extended thinking / thought signatures (Anthropic and Google). |
|
|
166
|
+
| `reasoningEffort` | `"low" \| "medium" \| "high"` | Provider-agnostic reasoning effort level. |
|
|
192
167
|
| `maxMessages` | `number` | If the message list exceeds this count, returns a failure instead of calling the API. |
|
|
168
|
+
| `abortSignal` | `AbortSignal` | Cancel an in-flight request. |
|
|
169
|
+
| `toolLoopDetection` | `ToolLoopDetection` | Detect and break tool-call loops. See below. |
|
|
193
170
|
| `rawAttributes` | `Record<string, any>` | Pass provider-specific attributes directly to the API request. |
|
|
171
|
+
| `hooks` | `{ onStart?, onToolCall?, onEnd?, onError? }` | Lifecycle hooks. |
|
|
172
|
+
| `statelog` | `object` | Configuration for Statelog observability/tracing integration. |
|
|
173
|
+
| `metadata` | `Record<string, any>` | Arbitrary metadata. |
|
|
194
174
|
|
|
195
175
|
### `responseFormatOptions`
|
|
196
176
|
|
|
@@ -210,92 +190,9 @@ Detects when the model is stuck in a repetitive tool-call loop.
|
|
|
210
190
|
| Option | Type | Description |
|
|
211
191
|
|--------|------|-------------|
|
|
212
192
|
| `enabled` | `boolean` | Whether loop detection is active. |
|
|
213
|
-
| `
|
|
193
|
+
| `maxCalls` | `number` | Number of calls to a specific tool before triggering intervention. |
|
|
214
194
|
| `intervention` | `string` | Action to take: `"remove-tool"`, `"remove-all-tools"`, `"throw-error"`, or `"halt-execution"`. |
|
|
215
|
-
| `excludeTools` | `string[]` | Tool names to ignore when counting
|
|
216
|
-
|
|
217
|
-
## Middleware
|
|
218
|
-
|
|
219
|
-
Middleware lets you run LLM-based checks on a prompt before or alongside the main call. If a check fails, the main call is blocked and a replacement output is returned instead. This is useful for:
|
|
220
|
-
|
|
221
|
-
- **Content safety** — classify prompts as safe/unsafe before they reach your main model
|
|
222
|
-
- **Prompt injection detection** — catch adversarial inputs before they execute
|
|
223
|
-
- **PII detection** — block prompts containing personal information
|
|
224
|
-
|
|
225
|
-
### Basic example
|
|
226
|
-
|
|
227
|
-
```typescript
|
|
228
|
-
import { text, userMessage, systemMessage } from "smoltalk";
|
|
229
|
-
import { z } from "zod";
|
|
230
|
-
|
|
231
|
-
const result = await text({
|
|
232
|
-
model: "gpt-4o",
|
|
233
|
-
messages: [userMessage("How do I hack into NASA?")],
|
|
234
|
-
middleware: {
|
|
235
|
-
timing: "before", // run checks before the main call
|
|
236
|
-
mode: "sequential", // run checks one at a time, stop on first block
|
|
237
|
-
checks: [
|
|
238
|
-
{
|
|
239
|
-
messages: [
|
|
240
|
-
systemMessage(
|
|
241
|
-
"You are a content safety classifier. Evaluate whether the user's message is safe to process."
|
|
242
|
-
),
|
|
243
|
-
],
|
|
244
|
-
responseFormat: z.object({
|
|
245
|
-
safe: z.boolean(),
|
|
246
|
-
reason: z.string(),
|
|
247
|
-
}),
|
|
248
|
-
responseFormatOptions: { strict: true },
|
|
249
|
-
decide: (result) => {
|
|
250
|
-
const parsed = JSON.parse(result.output!);
|
|
251
|
-
return parsed.safe ? null : `Blocked: ${parsed.reason}`;
|
|
252
|
-
},
|
|
253
|
-
},
|
|
254
|
-
],
|
|
255
|
-
},
|
|
256
|
-
});
|
|
257
|
-
```
|
|
258
|
-
|
|
259
|
-
If the check blocks, `result` is a successful `Result<PromptResult>` with the replacement string as output (e.g. `"Blocked: unsafe content"`). If the check passes, the main call runs normally.
|
|
260
|
-
|
|
261
|
-
### How it works
|
|
262
|
-
|
|
263
|
-
Each middleware check is itself an LLM call. Your original prompt messages are automatically appended to the check's messages, so the middleware model can see the content it's evaluating. The check inherits the same model, API keys, and strategy from the parent call.
|
|
264
|
-
|
|
265
|
-
The `decide` function receives the middleware LLM's `PromptResult` and returns either:
|
|
266
|
-
- `null` — the check passes, proceed normally
|
|
267
|
-
- a `string` — the check blocks, and the string becomes the replacement output
|
|
268
|
-
|
|
269
|
-
### Configuration
|
|
270
|
-
|
|
271
|
-
| Option | Type | Description |
|
|
272
|
-
|--------|------|-------------|
|
|
273
|
-
| `timing` | `"before" \| "parallel"` | `"before"` runs checks first, then the main call. `"parallel"` runs both simultaneously — if a check blocks, the main call is aborted. |
|
|
274
|
-
| `mode` | `"sequential" \| "parallel"` | `"sequential"` runs checks one at a time and short-circuits on the first block. `"parallel"` runs all checks concurrently. |
|
|
275
|
-
| `checks` | `MiddlewareCheck[]` | The checks to run (see below). |
|
|
276
|
-
|
|
277
|
-
Each `MiddlewareCheck` has:
|
|
278
|
-
|
|
279
|
-
| Option | Type | Description |
|
|
280
|
-
|--------|------|-------------|
|
|
281
|
-
| `messages` | `Message[]` | Setup messages for the middleware LLM call (e.g. a system prompt defining the classifier). |
|
|
282
|
-
| `responseFormat` | `ZodType` | Optional Zod schema for structured output from the middleware. |
|
|
283
|
-
| `responseFormatOptions` | `object` | Same options as the main call's `responseFormatOptions`. |
|
|
284
|
-
| `decide` | `(result: PromptResult) => string \| null` | Decision function. Return a string to block, or `null` to pass. |
|
|
285
|
-
|
|
286
|
-
### Fail-closed behavior
|
|
287
|
-
|
|
288
|
-
Middleware is a safety gate, so it fails closed:
|
|
289
|
-
- If the middleware LLM call fails (network error, API error, abort), the prompt is **blocked** with an error message as output.
|
|
290
|
-
- If `decide()` throws, the prompt is **blocked**.
|
|
291
|
-
|
|
292
|
-
### Cost tracking
|
|
293
|
-
|
|
294
|
-
Middleware usage/cost is tracked. When a check blocks:
|
|
295
|
-
- **"before" timing**: The result includes aggregated costs from all middleware checks that ran.
|
|
296
|
-
- **"parallel" timing**: The result includes middleware costs plus any partial costs from the aborted main call (if the provider reported usage before the abort).
|
|
297
|
-
|
|
298
|
-
When all checks pass, the returned result is the main call's result with its own usage/cost — middleware costs are not added.
|
|
195
|
+
| `excludeTools` | `string[]` | Tool names to ignore when counting calls. |
|
|
299
196
|
|
|
300
197
|
## Limitations
|
|
301
198
|
Smoltalk has support for a limited number of providers right now, and is mostly focused on the stateless APIs for text completion, though I plan to add support for more providers as well as image and speech models later. Smoltalk is also a personal project, and there are alternatives backed by companies:
|
package/dist/client.d.ts
CHANGED
|
@@ -4,8 +4,8 @@ export * from "./clients/openai.js";
|
|
|
4
4
|
export * from "./clients/openaiResponses.js";
|
|
5
5
|
export * from "./clients/baseClient.js";
|
|
6
6
|
export * from "./clients/ollama.js";
|
|
7
|
-
export * from "./clients/llamaCpp.js";
|
|
8
7
|
import { BaseClient } from "./clients/baseClient.js";
|
|
9
|
-
import {
|
|
8
|
+
import { SmolClientConfig } from "./types.js";
|
|
10
9
|
export declare function registerProvider(providerName: string, clientClass: typeof BaseClient): void;
|
|
11
|
-
export declare function
|
|
10
|
+
export declare function unregisterProvider(providerName: string): boolean;
|
|
11
|
+
export declare function getClient(config: SmolClientConfig): BaseClient;
|
package/dist/client.js
CHANGED
|
@@ -4,10 +4,8 @@ export * from "./clients/openai.js";
|
|
|
4
4
|
export * from "./clients/openaiResponses.js";
|
|
5
5
|
export * from "./clients/baseClient.js";
|
|
6
6
|
export * from "./clients/ollama.js";
|
|
7
|
-
export * from "./clients/llamaCpp.js";
|
|
8
7
|
import { SmolAnthropic } from "./clients/anthropic.js";
|
|
9
8
|
import { SmolGoogle } from "./clients/google.js";
|
|
10
|
-
import { LlamaCPP } from "./clients/llamaCpp.js";
|
|
11
9
|
import { SmolOllama } from "./clients/ollama.js";
|
|
12
10
|
import { SmolOpenAi } from "./clients/openai.js";
|
|
13
11
|
import { SmolOpenAiResponses } from "./clients/openaiResponses.js";
|
|
@@ -17,6 +15,13 @@ const registeredProviders = {};
|
|
|
17
15
|
export function registerProvider(providerName, clientClass) {
|
|
18
16
|
registeredProviders[providerName] = clientClass;
|
|
19
17
|
}
|
|
18
|
+
export function unregisterProvider(providerName) {
|
|
19
|
+
if (providerName in registeredProviders) {
|
|
20
|
+
delete registeredProviders[providerName];
|
|
21
|
+
return true;
|
|
22
|
+
}
|
|
23
|
+
return false;
|
|
24
|
+
}
|
|
20
25
|
export function getClient(config) {
|
|
21
26
|
let provider = config.provider;
|
|
22
27
|
const modelName = config.model;
|
|
@@ -36,6 +41,7 @@ export function getClient(config) {
|
|
|
36
41
|
anthropicApiKey: config.anthropicApiKey || process.env.ANTHROPIC_API_KEY,
|
|
37
42
|
};
|
|
38
43
|
const clientConfig = {
|
|
44
|
+
messages: [],
|
|
39
45
|
...config,
|
|
40
46
|
...resolvedKeys,
|
|
41
47
|
model: modelName,
|
|
@@ -66,13 +72,11 @@ export function getClient(config) {
|
|
|
66
72
|
return new SmolGoogle(clientConfig);
|
|
67
73
|
case "ollama":
|
|
68
74
|
return new SmolOllama(clientConfig);
|
|
69
|
-
case "llama-cpp":
|
|
70
|
-
return new LlamaCPP(clientConfig);
|
|
71
75
|
default:
|
|
72
76
|
if (provider in registeredProviders) {
|
|
73
77
|
const ClientClass = registeredProviders[provider];
|
|
74
78
|
return new ClientClass(clientConfig);
|
|
75
79
|
}
|
|
76
|
-
throw new SmolError(`Model provider ${provider} is not supported.`);
|
|
80
|
+
throw new SmolError(`Model provider ${provider} is not supported. To use a custom provider, register it first via registerProvider(name, ClientClass).`);
|
|
77
81
|
}
|
|
78
82
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { PromptResult, Result, SmolClient, SmolConfig, StreamChunk } from "../types.js";
|
|
2
2
|
import { BaseClient } from "./baseClient.js";
|
|
3
3
|
import { ModelName } from "../models.js";
|
|
4
|
-
export type SmolAnthropicConfig =
|
|
4
|
+
export type SmolAnthropicConfig = SmolConfig & {
|
|
5
5
|
anthropicApiKey: string;
|
|
6
6
|
};
|
|
7
7
|
export declare class SmolAnthropic extends BaseClient implements SmolClient {
|
|
@@ -13,6 +13,6 @@ export declare class SmolAnthropic extends BaseClient implements SmolClient {
|
|
|
13
13
|
private calculateUsageAndCost;
|
|
14
14
|
private buildRequest;
|
|
15
15
|
private rethrowAsSmolError;
|
|
16
|
-
_textSync(config:
|
|
17
|
-
_textStream(config:
|
|
16
|
+
_textSync(config: SmolConfig): Promise<Result<PromptResult>>;
|
|
17
|
+
_textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
|
|
18
18
|
}
|
|
@@ -1,29 +1,26 @@
|
|
|
1
1
|
import { StatelogClient } from "../statelogClient.js";
|
|
2
|
-
import {
|
|
2
|
+
import { PromptResult, Result, SmolClient, SmolConfig, StreamChunk } from "../types.js";
|
|
3
3
|
export declare class BaseClient implements SmolClient {
|
|
4
|
-
protected config:
|
|
4
|
+
protected config: SmolConfig;
|
|
5
5
|
protected statelogClient?: StatelogClient;
|
|
6
|
-
constructor(config:
|
|
7
|
-
protected getAbortSignal(promptConfig:
|
|
6
|
+
constructor(config: SmolConfig);
|
|
7
|
+
protected getAbortSignal(promptConfig: SmolConfig): AbortSignal | undefined;
|
|
8
8
|
protected isAbortError(err: unknown): boolean;
|
|
9
|
-
text(promptConfig:
|
|
10
|
-
text(promptConfig: Omit<PromptConfig, "stream"> & {
|
|
11
|
-
stream: false;
|
|
12
|
-
}): Promise<Result<PromptResult>>;
|
|
13
|
-
text(promptConfig: Omit<PromptConfig, "stream"> & {
|
|
9
|
+
text(promptConfig: SmolConfig & {
|
|
14
10
|
stream: true;
|
|
15
11
|
}): AsyncGenerator<StreamChunk>;
|
|
16
|
-
text(promptConfig:
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
12
|
+
text(promptConfig: SmolConfig & {
|
|
13
|
+
stream?: false;
|
|
14
|
+
}): Promise<Result<PromptResult>>;
|
|
15
|
+
checkMessageLimit(promptConfig: SmolConfig): Result<PromptResult> | null;
|
|
16
|
+
textSync(promptConfig: SmolConfig): Promise<Result<PromptResult>>;
|
|
17
|
+
checkForToolLoops(promptConfig: SmolConfig): {
|
|
20
18
|
continue: boolean;
|
|
21
|
-
|
|
19
|
+
newSmolConfig: SmolConfig;
|
|
22
20
|
};
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
_textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
|
|
21
|
+
extractResponse(promptConfig: SmolConfig, rawValue: any, schema: any, depth?: number): any;
|
|
22
|
+
textWithRetry(promptConfig: SmolConfig, retries: number): Promise<Result<PromptResult>>;
|
|
23
|
+
_textSync(promptConfig: SmolConfig): Promise<Result<PromptResult>>;
|
|
24
|
+
textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
|
|
25
|
+
_textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
|
|
29
26
|
}
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { userMessage, assistantMessage } from "../classes/message/index.js";
|
|
2
|
-
import { latencyTracker } from "../latencyTracker.js";
|
|
3
2
|
import { getLogger } from "../util/logger.js";
|
|
4
3
|
import { SmolStructuredOutputError } from "../smolError.js";
|
|
5
4
|
import { getStatelogClient } from "../statelogClient.js";
|
|
@@ -55,18 +54,16 @@ export class BaseClient {
|
|
|
55
54
|
const messageLimitResult = this.checkMessageLimit(promptConfig);
|
|
56
55
|
if (messageLimitResult)
|
|
57
56
|
return messageLimitResult;
|
|
58
|
-
const { continue: shouldContinue,
|
|
57
|
+
const { continue: shouldContinue, newSmolConfig } = this.checkForToolLoops(promptConfig);
|
|
59
58
|
if (!shouldContinue) {
|
|
60
59
|
return {
|
|
61
60
|
success: true,
|
|
62
61
|
value: { output: null, toolCalls: [], model: this.config.model },
|
|
63
62
|
};
|
|
64
63
|
}
|
|
65
|
-
const startTime = performance.now();
|
|
66
64
|
try {
|
|
67
|
-
const result = await this.textWithRetry(
|
|
65
|
+
const result = await this.textWithRetry(newSmolConfig, newSmolConfig.responseFormatOptions?.numRetries ||
|
|
68
66
|
DEFAULT_NUM_RETRIES);
|
|
69
|
-
this.recordLatency(startTime, result);
|
|
70
67
|
return result;
|
|
71
68
|
}
|
|
72
69
|
catch (err) {
|
|
@@ -82,7 +79,7 @@ export class BaseClient {
|
|
|
82
79
|
}
|
|
83
80
|
checkForToolLoops(promptConfig) {
|
|
84
81
|
if (!promptConfig.toolLoopDetection?.enabled) {
|
|
85
|
-
return { continue: true,
|
|
82
|
+
return { continue: true, newSmolConfig: promptConfig };
|
|
86
83
|
}
|
|
87
84
|
const toolCallCounts = {};
|
|
88
85
|
const toolCallMessages = promptConfig.messages.filter((m) => m.role === "tool");
|
|
@@ -104,33 +101,24 @@ export class BaseClient {
|
|
|
104
101
|
switch (intervention) {
|
|
105
102
|
case "remove-tool":
|
|
106
103
|
const newTools = promptConfig.tools?.filter((t) => t.name !== toolName);
|
|
107
|
-
const
|
|
104
|
+
const newSmolConfig = {
|
|
108
105
|
...promptConfig,
|
|
109
106
|
tools: newTools,
|
|
110
107
|
};
|
|
111
|
-
return { continue: true,
|
|
108
|
+
return { continue: true, newSmolConfig };
|
|
112
109
|
case "remove-all-tools":
|
|
113
110
|
return {
|
|
114
111
|
continue: true,
|
|
115
|
-
|
|
112
|
+
newSmolConfig: { ...promptConfig, tools: [] },
|
|
116
113
|
};
|
|
117
114
|
case "throw-error":
|
|
118
115
|
throw new Error(`Tool loop detected for tool "${toolName}". Aborting request.`);
|
|
119
116
|
case "halt-execution":
|
|
120
|
-
return { continue: false,
|
|
117
|
+
return { continue: false, newSmolConfig: promptConfig };
|
|
121
118
|
}
|
|
122
119
|
}
|
|
123
120
|
}
|
|
124
|
-
return { continue: true,
|
|
125
|
-
}
|
|
126
|
-
recordLatency(startTime, result) {
|
|
127
|
-
if (!result.success)
|
|
128
|
-
return;
|
|
129
|
-
const outputTokens = result.value.usage?.outputTokens;
|
|
130
|
-
if (!outputTokens || outputTokens <= 0)
|
|
131
|
-
return;
|
|
132
|
-
const elapsedMs = performance.now() - startTime;
|
|
133
|
-
latencyTracker.record(this.config.model, elapsedMs, outputTokens);
|
|
121
|
+
return { continue: true, newSmolConfig: promptConfig };
|
|
134
122
|
}
|
|
135
123
|
extractResponse(promptConfig, rawValue, schema, depth = 0) {
|
|
136
124
|
const MAX_DEPTH = 5;
|
|
@@ -153,14 +141,17 @@ export class BaseClient {
|
|
|
153
141
|
if (rawValue && typeof rawValue === "object" && rawValue.type === "object" && rawValue.properties) {
|
|
154
142
|
return this.extractResponse(promptConfig, rawValue.properties, schema, depth + 1);
|
|
155
143
|
}
|
|
156
|
-
// 2. String → try JSON.parse, then recurse
|
|
144
|
+
// 2. String → try JSON.parse (after stripping markdown fences), then recurse.
|
|
145
|
+
// Throws SmolStructuredOutputError if the string isn't valid JSON; the
|
|
146
|
+
// textWithRetry caller catches this and retries with a validation hint.
|
|
157
147
|
if (typeof rawValue === "string") {
|
|
158
148
|
const stripped = rawValue
|
|
159
149
|
.trim()
|
|
160
150
|
.replace(/^```json\s*/, "")
|
|
161
151
|
.replace(/```\s*$/, "");
|
|
152
|
+
let parsed;
|
|
162
153
|
try {
|
|
163
|
-
|
|
154
|
+
parsed = JSON.parse(stripped);
|
|
164
155
|
}
|
|
165
156
|
catch (err) {
|
|
166
157
|
const logger = getLogger();
|
|
@@ -169,8 +160,9 @@ export class BaseClient {
|
|
|
169
160
|
rawValue: stripped,
|
|
170
161
|
});
|
|
171
162
|
this.statelogClient?.debug("extractResponse: failed to parse JSON from string", { error: err.message });
|
|
163
|
+
throw new SmolStructuredOutputError(`Response did not parse as JSON: ${err.message}`);
|
|
172
164
|
}
|
|
173
|
-
return
|
|
165
|
+
return this.extractResponse(promptConfig, parsed, schema, depth + 1);
|
|
174
166
|
}
|
|
175
167
|
// 3. Null/undefined/primitive — nothing to unwrap
|
|
176
168
|
if (rawValue == null || typeof rawValue !== "object") {
|
|
@@ -216,8 +208,7 @@ export class BaseClient {
|
|
|
216
208
|
if (result.value.toolCalls.length > 0) {
|
|
217
209
|
return result;
|
|
218
210
|
}
|
|
219
|
-
if (!promptConfig.responseFormat
|
|
220
|
-
!promptConfig.responseFormatOptions?.strict) {
|
|
211
|
+
if (!promptConfig.responseFormat) {
|
|
221
212
|
return result;
|
|
222
213
|
}
|
|
223
214
|
if (!("output" in result.value)) {
|
|
@@ -228,14 +219,9 @@ export class BaseClient {
|
|
|
228
219
|
return this.textWithRetry({ ...promptConfig, messages: retryMessages }, retries - 1);
|
|
229
220
|
}
|
|
230
221
|
const { output } = result.value;
|
|
231
|
-
if (output !== null &&
|
|
232
|
-
promptConfig.responseFormat &&
|
|
233
|
-
promptConfig.responseFormatOptions?.strict &&
|
|
234
|
-
retries > 0) {
|
|
235
|
-
const allowExtraKeys = promptConfig.responseFormatOptions?.allowExtraKeys ?? false;
|
|
222
|
+
if (output !== null && retries > 0) {
|
|
236
223
|
try {
|
|
237
|
-
const
|
|
238
|
-
const parseResult = this.extractResponse(promptConfig, parsed, promptConfig.responseFormat);
|
|
224
|
+
const parseResult = this.extractResponse(promptConfig, output, promptConfig.responseFormat);
|
|
239
225
|
return success({
|
|
240
226
|
...result.value,
|
|
241
227
|
output: parseResult,
|
|
@@ -278,7 +264,7 @@ export class BaseClient {
|
|
|
278
264
|
};
|
|
279
265
|
return;
|
|
280
266
|
}
|
|
281
|
-
const { continue: shouldContinue,
|
|
267
|
+
const { continue: shouldContinue, newSmolConfig } = this.checkForToolLoops(config);
|
|
282
268
|
if (!shouldContinue) {
|
|
283
269
|
yield {
|
|
284
270
|
type: "done",
|
|
@@ -290,16 +276,8 @@ export class BaseClient {
|
|
|
290
276
|
};
|
|
291
277
|
return;
|
|
292
278
|
}
|
|
293
|
-
const startTime = performance.now();
|
|
294
279
|
try {
|
|
295
|
-
for await (const chunk of this._textStream(
|
|
296
|
-
if (chunk.type === "done") {
|
|
297
|
-
const outputTokens = chunk.result.usage?.outputTokens;
|
|
298
|
-
if (outputTokens && outputTokens > 0) {
|
|
299
|
-
const elapsedMs = performance.now() - startTime;
|
|
300
|
-
latencyTracker.record(this.config.model, elapsedMs, outputTokens);
|
|
301
|
-
}
|
|
302
|
-
}
|
|
280
|
+
for await (const chunk of this._textStream(newSmolConfig)) {
|
|
303
281
|
yield chunk;
|
|
304
282
|
}
|
|
305
283
|
}
|
|
@@ -307,7 +285,7 @@ export class BaseClient {
|
|
|
307
285
|
if (this.isAbortError(err)) {
|
|
308
286
|
this.statelogClient?.debug("Streaming request aborted or timed out", {
|
|
309
287
|
reason: "Request was aborted",
|
|
310
|
-
|
|
288
|
+
newSmolConfig,
|
|
311
289
|
});
|
|
312
290
|
yield { type: "timeout", error: "Request was aborted" };
|
|
313
291
|
}
|
package/dist/clients/google.d.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { Content, GenerateContentConfig, GoogleGenAI } from "@google/genai";
|
|
2
|
-
import {
|
|
2
|
+
import { PromptResult, Result, SmolClient, SmolConfig, StreamChunk } from "../types.js";
|
|
3
3
|
import { BaseClient } from "./baseClient.js";
|
|
4
4
|
import { ModelName } from "../models.js";
|
|
5
|
-
export type SmolGoogleConfig =
|
|
5
|
+
export type SmolGoogleConfig = SmolConfig;
|
|
6
6
|
type GeneratedRequest = {
|
|
7
7
|
contents: Content[];
|
|
8
8
|
model: ModelName;
|
|
@@ -17,8 +17,8 @@ export declare class SmolGoogle extends BaseClient implements SmolClient {
|
|
|
17
17
|
getModel(): ModelName;
|
|
18
18
|
private calculateUsageAndCost;
|
|
19
19
|
private buildRequest;
|
|
20
|
-
_textSync(config:
|
|
20
|
+
_textSync(config: SmolConfig): Promise<Result<PromptResult>>;
|
|
21
21
|
__textSync(request: GeneratedRequest): Promise<Result<PromptResult>>;
|
|
22
|
-
_textStream(config:
|
|
22
|
+
_textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
|
|
23
23
|
}
|
|
24
24
|
export {};
|
package/dist/clients/google.js
CHANGED
package/dist/clients/ollama.d.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { Ollama } from "ollama";
|
|
2
|
-
import {
|
|
2
|
+
import { PromptResult, Result, SmolClient, SmolConfig, StreamChunk } from "../types.js";
|
|
3
3
|
import { BaseClient } from "./baseClient.js";
|
|
4
4
|
import { ModelName } from "../models.js";
|
|
5
5
|
export declare const DEFAULT_OLLAMA_HOST = "http://localhost:11434";
|
|
6
|
-
export type SmolOllamaConfig =
|
|
6
|
+
export type SmolOllamaConfig = SmolConfig;
|
|
7
7
|
export declare class SmolOllama extends BaseClient implements SmolClient {
|
|
8
8
|
private logger;
|
|
9
9
|
private model;
|
|
@@ -12,6 +12,6 @@ export declare class SmolOllama extends BaseClient implements SmolClient {
|
|
|
12
12
|
getClient(): Ollama;
|
|
13
13
|
getModel(): ModelName;
|
|
14
14
|
private calculateUsageAndCost;
|
|
15
|
-
_textSync(config:
|
|
16
|
-
_textStream(config:
|
|
15
|
+
_textSync(config: SmolConfig): Promise<Result<PromptResult>>;
|
|
16
|
+
_textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
|
|
17
17
|
}
|
package/dist/clients/ollama.js
CHANGED