smoltalk 0.0.54 → 0.0.56
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -22
- package/dist/clients/anthropic.d.ts +1 -0
- package/dist/clients/anthropic.js +57 -26
- package/dist/clients/baseClient.d.ts +1 -0
- package/dist/clients/baseClient.js +23 -1
- package/dist/clients/google.js +35 -3
- package/dist/clients/ollama.js +8 -0
- package/dist/clients/openai.d.ts +1 -0
- package/dist/clients/openai.js +39 -9
- package/dist/clients/openaiResponses.d.ts +1 -0
- package/dist/clients/openaiResponses.js +31 -7
- package/dist/index.d.ts +2 -0
- package/dist/index.js +1 -0
- package/dist/latencyTracker.d.ts +32 -0
- package/dist/latencyTracker.js +73 -0
- package/dist/model.d.ts +3 -2
- package/dist/model.js +11 -1
- package/dist/models.d.ts +45 -17
- package/dist/models.js +23 -8
- package/dist/smolError.d.ts +6 -0
- package/dist/smolError.js +12 -0
- package/dist/strategies/fallbackStrategy.js +23 -1
- package/dist/strategies/fastestStrategy.d.ts +17 -0
- package/dist/strategies/fastestStrategy.js +95 -0
- package/dist/strategies/index.d.ts +6 -2
- package/dist/strategies/index.js +27 -2
- package/dist/strategies/randomStrategy.d.ts +12 -0
- package/dist/strategies/randomStrategy.js +39 -0
- package/dist/strategies/types.d.ts +19 -1
- package/dist/strategies/types.js +14 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Smoltalk
|
|
2
2
|
|
|
3
|
-
Smoltalk exposes a common API to different LLM providers. There are other packages that do this, but Smoltalk allows you to build strategies on top of it. Here is a simple example.
|
|
3
|
+
Smoltalk exposes a common API to different LLM providers. There are other packages that do this, but Smoltalk allows you to build strategies on top of it. Here is a simple example.
|
|
4
4
|
|
|
5
5
|
## Install
|
|
6
6
|
|
|
@@ -11,23 +11,80 @@ pnpm install smoltalk
|
|
|
11
11
|
## Hello world example
|
|
12
12
|
|
|
13
13
|
```typescript
|
|
14
|
-
import {
|
|
15
|
-
|
|
16
|
-
const client = getClient({
|
|
17
|
-
openAiApiKey: process.env.OPENAI_API_KEY || "",
|
|
18
|
-
googleApiKey: process.env.GEMINI_API_KEY || "",
|
|
19
|
-
logLevel: "debug",
|
|
20
|
-
model: "gemini-2.0-flash-lite",
|
|
21
|
-
});
|
|
14
|
+
import { text, userMessage } from "smoltalk";
|
|
22
15
|
|
|
23
16
|
async function main() {
|
|
24
|
-
const
|
|
25
|
-
|
|
17
|
+
const messages = [userMessage("Write me a 10 word story.")];
|
|
18
|
+
const response = await text({
|
|
19
|
+
messages,
|
|
20
|
+
model: "gpt-5.4",
|
|
21
|
+
});
|
|
22
|
+
console.log(response);
|
|
26
23
|
}
|
|
27
24
|
|
|
28
25
|
main();
|
|
29
26
|
```
|
|
30
27
|
|
|
28
|
+
This is functionality that other packages allow.
|
|
29
|
+
<details>
|
|
30
|
+
<summary>Response</summary>
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
{
|
|
34
|
+
success: true,
|
|
35
|
+
value: {
|
|
36
|
+
output: 'Clock stopped; everyone smiled as tomorrow finally arrived before yesterday.',
|
|
37
|
+
toolCalls: [],
|
|
38
|
+
usage: {
|
|
39
|
+
inputTokens: 14,
|
|
40
|
+
outputTokens: 15,
|
|
41
|
+
cachedInputTokens: 0,
|
|
42
|
+
totalTokens: 29
|
|
43
|
+
},
|
|
44
|
+
cost: {
|
|
45
|
+
inputCost: 0.000035,
|
|
46
|
+
outputCost: 0.000225,
|
|
47
|
+
cachedInputCost: undefined,
|
|
48
|
+
totalCost: 0.00026,
|
|
49
|
+
currency: 'USD'
|
|
50
|
+
},
|
|
51
|
+
model: 'gpt-5.4'
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
```
|
|
55
|
+
</details>
|
|
56
|
+
|
|
57
|
+
What if you wanted to have fallbacks in case the OpenAI API was down? Just change the `model` field:
|
|
58
|
+
|
|
59
|
+
```ts
|
|
60
|
+
const response = await text({
|
|
61
|
+
messages,
|
|
62
|
+
model: fallback("gpt-5.4", "gemini-2.5-flash-lite"),
|
|
63
|
+
// or multiple fallbacks:
|
|
64
|
+
// model: fallback("gpt-5.4", ["gemini-2.5-flash-lite", "gemini-3-flash-preview"]),
|
|
65
|
+
});
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Or what if you wanted to try a couple of models and take the first response?
|
|
69
|
+
|
|
70
|
+
```ts
|
|
71
|
+
const response = await text({
|
|
72
|
+
messages,
|
|
73
|
+
model: race("gpt-5.4", "gemini-2.5-flash-lite", "o4-mini"),
|
|
74
|
+
});
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Or combine them:
|
|
78
|
+
|
|
79
|
+
```ts
|
|
80
|
+
const response = await text({
|
|
81
|
+
messages,
|
|
82
|
+
model: race(fallback("gpt-5.4", "gemini-2.5-flash-lite"), "o4-mini"),
|
|
83
|
+
});
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
You get the idea.
|
|
87
|
+
|
|
31
88
|
## Longer tutorial
|
|
32
89
|
To use Smoltak, you first create a client:
|
|
33
90
|
|
|
@@ -157,20 +214,15 @@ Detects when the model is stuck in a repetitive tool-call loop.
|
|
|
157
214
|
| `intervention` | `string` | Action to take: `"remove-tool"`, `"remove-all-tools"`, `"throw-error"`, or `"halt-execution"`. |
|
|
158
215
|
| `excludeTools` | `string[]` | Tool names to ignore when counting consecutive calls. |
|
|
159
216
|
|
|
160
|
-
##
|
|
217
|
+
## Limitations
|
|
218
|
+
Smoltalk has support for a limited number of providers right now, and is mostly focused on the stateless APIs for text completion, though I plan to add support for more providers as well as image and speech models later. Smoltalk is also a personal project, and there are alternatives backed by companies:
|
|
219
|
+
|
|
161
220
|
- Langchain
|
|
162
|
-
OpenRouter
|
|
221
|
+
- OpenRouter
|
|
163
222
|
- Vercel AI
|
|
164
223
|
|
|
165
|
-
These are all good options, but they are quite heavy, and I wanted a lighter option. That said, you may be better off with one of the above alternatives:
|
|
166
|
-
- They are backed by a business and are more likely to be responsive.
|
|
167
|
-
- They support way more functionality and providers. Smoltalk currently supports just a subset of functionality for OpenAI and Google.
|
|
168
|
-
|
|
169
|
-
## Functionality
|
|
170
|
-
Smoltalk pretty much lets you generate text using an OpenAI or Google model, with support for function calling and structured output, and that's it. I will add functionality and providers sporadically when I have time and need.
|
|
171
|
-
|
|
172
224
|
## Contributing
|
|
173
|
-
|
|
225
|
+
Contributions are welcome. Any of the following contributions would be helpful:
|
|
174
226
|
- Adding support for API parameters or endpoints
|
|
175
227
|
- Adding support for different providers
|
|
176
|
-
- Updating the list of models
|
|
228
|
+
- Updating the list of models
|
|
@@ -12,6 +12,7 @@ export declare class SmolAnthropic extends BaseClient implements SmolClient {
|
|
|
12
12
|
getModel(): ModelName;
|
|
13
13
|
private calculateUsageAndCost;
|
|
14
14
|
private buildRequest;
|
|
15
|
+
private rethrowAsSmolError;
|
|
15
16
|
_textSync(config: PromptConfig): Promise<Result<PromptResult>>;
|
|
16
17
|
_textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
|
|
17
18
|
}
|
|
@@ -4,6 +4,7 @@ import { SystemMessage, DeveloperMessage } from "../classes/message/index.js";
|
|
|
4
4
|
import { getLogger } from "../logger.js";
|
|
5
5
|
import { success, } from "../types.js";
|
|
6
6
|
import { zodToAnthropicTool } from "../util/tool.js";
|
|
7
|
+
import { SmolContentPolicyError, SmolContextWindowExceededError, } from "../smolError.js";
|
|
7
8
|
import { BaseClient } from "./baseClient.js";
|
|
8
9
|
import { Model } from "../model.js";
|
|
9
10
|
const DEFAULT_MAX_TOKENS = 4096;
|
|
@@ -82,6 +83,24 @@ export class SmolAnthropic extends BaseClient {
|
|
|
82
83
|
: undefined;
|
|
83
84
|
return { system, messages: anthropicMessages, tools, thinking };
|
|
84
85
|
}
|
|
86
|
+
rethrowAsSmolError(error) {
|
|
87
|
+
if (error instanceof Anthropic.APIError) {
|
|
88
|
+
const msg = error.message.toLowerCase();
|
|
89
|
+
if (msg.includes("prompt is too long") ||
|
|
90
|
+
msg.includes("context length") ||
|
|
91
|
+
msg.includes("context window") ||
|
|
92
|
+
msg.includes("too many tokens")) {
|
|
93
|
+
throw new SmolContextWindowExceededError(error.message);
|
|
94
|
+
}
|
|
95
|
+
if (msg.includes("content policy") ||
|
|
96
|
+
msg.includes("usage policies") ||
|
|
97
|
+
msg.includes("content filtering") ||
|
|
98
|
+
msg.includes("violates our")) {
|
|
99
|
+
throw new SmolContentPolicyError(error.message);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
throw error;
|
|
103
|
+
}
|
|
85
104
|
async _textSync(config) {
|
|
86
105
|
const { system, messages, tools, thinking } = this.buildRequest(config);
|
|
87
106
|
let debugData = {
|
|
@@ -95,19 +114,25 @@ export class SmolAnthropic extends BaseClient {
|
|
|
95
114
|
this.logger.debug("Sending request to Anthropic:", debugData);
|
|
96
115
|
this.statelogClient?.promptRequest(debugData);
|
|
97
116
|
const signal = this.getAbortSignal(config);
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
117
|
+
let response;
|
|
118
|
+
try {
|
|
119
|
+
response = await this.client.messages.create({
|
|
120
|
+
model: this.getModel(),
|
|
121
|
+
max_tokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
|
|
122
|
+
messages,
|
|
123
|
+
...(system && { system }),
|
|
124
|
+
...(tools && { tools }),
|
|
125
|
+
...(thinking && { thinking }),
|
|
126
|
+
...(config.temperature !== undefined && {
|
|
127
|
+
temperature: config.temperature,
|
|
128
|
+
}),
|
|
129
|
+
...(config.rawAttributes || {}),
|
|
130
|
+
stream: false,
|
|
131
|
+
}, { ...(signal && { signal }) });
|
|
132
|
+
}
|
|
133
|
+
catch (error) {
|
|
134
|
+
this.rethrowAsSmolError(error);
|
|
135
|
+
}
|
|
111
136
|
this.logger.debug("Response from Anthropic:", response);
|
|
112
137
|
this.statelogClient?.promptResponse(response);
|
|
113
138
|
let output = null;
|
|
@@ -148,19 +173,25 @@ export class SmolAnthropic extends BaseClient {
|
|
|
148
173
|
this.logger.debug("Sending streaming request to Anthropic:", streamDebugData);
|
|
149
174
|
this.statelogClient?.promptRequest(streamDebugData);
|
|
150
175
|
const signal = this.getAbortSignal(config);
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
176
|
+
let stream;
|
|
177
|
+
try {
|
|
178
|
+
stream = await this.client.messages.create({
|
|
179
|
+
model: this.model,
|
|
180
|
+
max_tokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
|
|
181
|
+
messages,
|
|
182
|
+
...(system && { system }),
|
|
183
|
+
...(tools && { tools }),
|
|
184
|
+
...(thinking && { thinking }),
|
|
185
|
+
...(config.temperature !== undefined && {
|
|
186
|
+
temperature: config.temperature,
|
|
187
|
+
}),
|
|
188
|
+
...(config.rawAttributes || {}),
|
|
189
|
+
stream: true,
|
|
190
|
+
}, { ...(signal && { signal }) });
|
|
191
|
+
}
|
|
192
|
+
catch (error) {
|
|
193
|
+
this.rethrowAsSmolError(error);
|
|
194
|
+
}
|
|
164
195
|
let content = "";
|
|
165
196
|
// Track tool blocks by index: index -> { id, name, arguments (partial JSON) }
|
|
166
197
|
const toolBlocks = new Map();
|
|
@@ -24,6 +24,7 @@ export declare class BaseClient implements SmolClient {
|
|
|
24
24
|
continue: boolean;
|
|
25
25
|
newPromptConfig: PromptConfig;
|
|
26
26
|
};
|
|
27
|
+
private recordLatency;
|
|
27
28
|
extractResponse(promptConfig: PromptConfig, rawValue: any, schema: any, depth?: number): any;
|
|
28
29
|
textWithRetry(promptConfig: PromptConfig, retries: number): Promise<Result<PromptResult>>;
|
|
29
30
|
_textSync(promptConfig: PromptConfig): Promise<Result<PromptResult>>;
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { AssistantMessage, userMessage, assistantMessage, } from "../classes/message/index.js";
|
|
2
|
+
import { latencyTracker } from "../latencyTracker.js";
|
|
2
3
|
import { getLogger } from "../logger.js";
|
|
3
4
|
import { getModel, isTextModel } from "../models.js";
|
|
4
5
|
import { SmolStructuredOutputError } from "../smolError.js";
|
|
@@ -146,9 +147,11 @@ export class BaseClient {
|
|
|
146
147
|
value: { output: null, toolCalls: [], model: this.config.model },
|
|
147
148
|
};
|
|
148
149
|
}
|
|
150
|
+
const startTime = performance.now();
|
|
149
151
|
try {
|
|
150
152
|
const result = await this.textWithRetry(newPromptConfig, newPromptConfig.responseFormatOptions?.numRetries ||
|
|
151
153
|
DEFAULT_NUM_RETRIES);
|
|
154
|
+
this.recordLatency(startTime, result);
|
|
152
155
|
return result;
|
|
153
156
|
}
|
|
154
157
|
catch (err) {
|
|
@@ -210,6 +213,15 @@ export class BaseClient {
|
|
|
210
213
|
}
|
|
211
214
|
return { continue: true, newPromptConfig: promptConfig };
|
|
212
215
|
}
|
|
216
|
+
recordLatency(startTime, result) {
|
|
217
|
+
if (!result.success)
|
|
218
|
+
return;
|
|
219
|
+
const outputTokens = result.value.usage?.outputTokens;
|
|
220
|
+
if (!outputTokens || outputTokens <= 0)
|
|
221
|
+
return;
|
|
222
|
+
const elapsedMs = performance.now() - startTime;
|
|
223
|
+
latencyTracker.record(this.config.model, elapsedMs, outputTokens);
|
|
224
|
+
}
|
|
213
225
|
extractResponse(promptConfig, rawValue, schema, depth = 0) {
|
|
214
226
|
const MAX_DEPTH = 5;
|
|
215
227
|
if (depth > MAX_DEPTH) {
|
|
@@ -374,8 +386,18 @@ export class BaseClient {
|
|
|
374
386
|
};
|
|
375
387
|
return;
|
|
376
388
|
}
|
|
389
|
+
const startTime = performance.now();
|
|
377
390
|
try {
|
|
378
|
-
|
|
391
|
+
for await (const chunk of this._textStream(newPromptConfig)) {
|
|
392
|
+
if (chunk.type === "done") {
|
|
393
|
+
const outputTokens = chunk.result.usage?.outputTokens;
|
|
394
|
+
if (outputTokens && outputTokens > 0) {
|
|
395
|
+
const elapsedMs = performance.now() - startTime;
|
|
396
|
+
latencyTracker.record(this.config.model, elapsedMs, outputTokens);
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
yield chunk;
|
|
400
|
+
}
|
|
379
401
|
}
|
|
380
402
|
catch (err) {
|
|
381
403
|
if (this.isAbortError(err)) {
|
package/dist/clients/google.js
CHANGED
|
@@ -3,6 +3,7 @@ import { ToolCall } from "../classes/ToolCall.js";
|
|
|
3
3
|
import { getLogger } from "../logger.js";
|
|
4
4
|
import { addCosts, addTokenUsage, success, } from "../types.js";
|
|
5
5
|
import { zodToGoogleTool } from "../util/tool.js";
|
|
6
|
+
import { SmolContentPolicyError, SmolContextWindowExceededError, } from "../smolError.js";
|
|
6
7
|
import { sanitizeAttributes } from "../util.js";
|
|
7
8
|
import { BaseClient } from "./baseClient.js";
|
|
8
9
|
import { Model } from "../model.js";
|
|
@@ -171,10 +172,28 @@ export class SmolGoogle extends BaseClient {
|
|
|
171
172
|
async __textSync(request) {
|
|
172
173
|
this.logger.debug("Sending request to Google Gemini:", JSON.stringify(request, null, 2));
|
|
173
174
|
this.statelogClient?.promptRequest(request);
|
|
174
|
-
|
|
175
|
-
|
|
175
|
+
let result;
|
|
176
|
+
try {
|
|
177
|
+
result = await this.client.models.generateContent(request);
|
|
178
|
+
}
|
|
179
|
+
catch (error) {
|
|
180
|
+
const msg = (error.message || "").toLowerCase();
|
|
181
|
+
if (msg.includes("token") &&
|
|
182
|
+
(msg.includes("exceed") ||
|
|
183
|
+
msg.includes("too long") ||
|
|
184
|
+
msg.includes("limit"))) {
|
|
185
|
+
throw new SmolContextWindowExceededError(error.message);
|
|
186
|
+
}
|
|
187
|
+
throw error;
|
|
188
|
+
}
|
|
176
189
|
this.logger.debug("Response from Google Gemini:", JSON.stringify(result, null, 2));
|
|
177
190
|
this.statelogClient?.promptResponse(result);
|
|
191
|
+
for (const candidate of result.candidates || []) {
|
|
192
|
+
const finishReason = candidate.finishReason;
|
|
193
|
+
if (finishReason === "SAFETY" || finishReason === "PROHIBITED_CONTENT") {
|
|
194
|
+
throw new SmolContentPolicyError(`Content blocked by Google safety filter: ${finishReason}`);
|
|
195
|
+
}
|
|
196
|
+
}
|
|
178
197
|
const toolCalls = [];
|
|
179
198
|
const thinkingBlocks = [];
|
|
180
199
|
let textContent = "";
|
|
@@ -230,7 +249,20 @@ export class SmolGoogle extends BaseClient {
|
|
|
230
249
|
}
|
|
231
250
|
this.logger.debug("Sending streaming request to Google Gemini:", JSON.stringify(request, null, 2));
|
|
232
251
|
this.statelogClient?.promptRequest(request);
|
|
233
|
-
|
|
252
|
+
let stream;
|
|
253
|
+
try {
|
|
254
|
+
stream = await this.client.models.generateContentStream(request);
|
|
255
|
+
}
|
|
256
|
+
catch (error) {
|
|
257
|
+
const msg = (error.message || "").toLowerCase();
|
|
258
|
+
if (msg.includes("token") &&
|
|
259
|
+
(msg.includes("exceed") ||
|
|
260
|
+
msg.includes("too long") ||
|
|
261
|
+
msg.includes("limit"))) {
|
|
262
|
+
throw new SmolContextWindowExceededError(error.message);
|
|
263
|
+
}
|
|
264
|
+
throw error;
|
|
265
|
+
}
|
|
234
266
|
let content = "";
|
|
235
267
|
const toolCallsMap = new Map();
|
|
236
268
|
const thinkingBlocks = [];
|
package/dist/clients/ollama.js
CHANGED
|
@@ -5,6 +5,7 @@ import { success, } from "../types.js";
|
|
|
5
5
|
import { zodToGoogleTool } from "../util/tool.js";
|
|
6
6
|
import { sanitizeAttributes } from "../util.js";
|
|
7
7
|
import { BaseClient } from "./baseClient.js";
|
|
8
|
+
import { SmolContextWindowExceededError } from "../smolError.js";
|
|
8
9
|
import { Model } from "../model.js";
|
|
9
10
|
export const DEFAULT_OLLAMA_HOST = "http://localhost:11434";
|
|
10
11
|
export class SmolOllama extends BaseClient {
|
|
@@ -80,6 +81,13 @@ export class SmolOllama extends BaseClient {
|
|
|
80
81
|
// @ts-ignore
|
|
81
82
|
result = await this.client.chat(request);
|
|
82
83
|
}
|
|
84
|
+
catch (error) {
|
|
85
|
+
const msg = (error.message || "").toLowerCase();
|
|
86
|
+
if (msg.includes("context length") || msg.includes("context window")) {
|
|
87
|
+
throw new SmolContextWindowExceededError(error.message);
|
|
88
|
+
}
|
|
89
|
+
throw error;
|
|
90
|
+
}
|
|
83
91
|
finally {
|
|
84
92
|
if (signal && abortHandler) {
|
|
85
93
|
signal.removeEventListener("abort", abortHandler);
|
package/dist/clients/openai.d.ts
CHANGED
|
@@ -12,6 +12,7 @@ export declare class SmolOpenAi extends BaseClient implements SmolClient {
|
|
|
12
12
|
getModel(): ModelName;
|
|
13
13
|
private calculateUsageAndCost;
|
|
14
14
|
private buildRequest;
|
|
15
|
+
private rethrowAsSmolError;
|
|
15
16
|
_textSync(config: PromptConfig): Promise<Result<PromptResult>>;
|
|
16
17
|
_textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
|
|
17
18
|
}
|
package/dist/clients/openai.js
CHANGED
|
@@ -4,6 +4,7 @@ import { ToolCall } from "../classes/ToolCall.js";
|
|
|
4
4
|
import { isFunctionToolCall, sanitizeAttributes } from "../util.js";
|
|
5
5
|
import { getLogger } from "../logger.js";
|
|
6
6
|
import { BaseClient } from "./baseClient.js";
|
|
7
|
+
import { SmolContentPolicyError, SmolContextWindowExceededError, } from "../smolError.js";
|
|
7
8
|
import { zodToOpenAITool } from "../util/tool.js";
|
|
8
9
|
import { Model } from "../model.js";
|
|
9
10
|
export class SmolOpenAi extends BaseClient {
|
|
@@ -68,17 +69,37 @@ export class SmolOpenAi extends BaseClient {
|
|
|
68
69
|
}
|
|
69
70
|
return request;
|
|
70
71
|
}
|
|
72
|
+
rethrowAsSmolError(error) {
|
|
73
|
+
if (error instanceof OpenAI.APIError) {
|
|
74
|
+
if (error.code === "context_length_exceeded") {
|
|
75
|
+
throw new SmolContextWindowExceededError(error.message);
|
|
76
|
+
}
|
|
77
|
+
if (error.code === "content_policy_violation") {
|
|
78
|
+
throw new SmolContentPolicyError(error.message);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
throw error;
|
|
82
|
+
}
|
|
71
83
|
async _textSync(config) {
|
|
72
84
|
const request = this.buildRequest(config);
|
|
73
85
|
this.logger.debug("Sending request to OpenAI:", JSON.stringify(request, null, 2));
|
|
74
86
|
this.statelogClient?.promptRequest(request);
|
|
75
87
|
const signal = this.getAbortSignal(config);
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
88
|
+
let completion;
|
|
89
|
+
try {
|
|
90
|
+
completion = await this.client.chat.completions.create({
|
|
91
|
+
...request,
|
|
92
|
+
stream: false,
|
|
93
|
+
}, { ...(signal && { signal }) });
|
|
94
|
+
}
|
|
95
|
+
catch (error) {
|
|
96
|
+
this.rethrowAsSmolError(error);
|
|
97
|
+
}
|
|
80
98
|
this.logger.debug("Response from OpenAI:", JSON.stringify(completion, null, 2));
|
|
81
99
|
this.statelogClient?.promptResponse(completion);
|
|
100
|
+
if (completion.choices[0]?.finish_reason === "content_filter") {
|
|
101
|
+
throw new SmolContentPolicyError("Content blocked by OpenAI content filter");
|
|
102
|
+
}
|
|
82
103
|
const message = completion.choices[0].message;
|
|
83
104
|
const output = message.content;
|
|
84
105
|
const _toolCalls = message.tool_calls;
|
|
@@ -109,11 +130,17 @@ export class SmolOpenAi extends BaseClient {
|
|
|
109
130
|
this.logger.debug("Sending streaming request to OpenAI:", JSON.stringify(request, null, 2));
|
|
110
131
|
this.statelogClient?.promptRequest(request);
|
|
111
132
|
const signal = this.getAbortSignal(config);
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
133
|
+
let completion;
|
|
134
|
+
try {
|
|
135
|
+
completion = await this.client.chat.completions.create({
|
|
136
|
+
...request,
|
|
137
|
+
stream: true,
|
|
138
|
+
stream_options: { include_usage: true },
|
|
139
|
+
}, { ...(signal && { signal }) });
|
|
140
|
+
}
|
|
141
|
+
catch (error) {
|
|
142
|
+
this.rethrowAsSmolError(error);
|
|
143
|
+
}
|
|
117
144
|
let content = "";
|
|
118
145
|
const toolCallsMap = new Map();
|
|
119
146
|
let usage;
|
|
@@ -127,6 +154,9 @@ export class SmolOpenAi extends BaseClient {
|
|
|
127
154
|
}
|
|
128
155
|
if (!chunk.choices || chunk.choices.length === 0)
|
|
129
156
|
continue;
|
|
157
|
+
if (chunk.choices[0]?.finish_reason === "content_filter") {
|
|
158
|
+
throw new SmolContentPolicyError("Content blocked by OpenAI content filter");
|
|
159
|
+
}
|
|
130
160
|
const delta = chunk.choices[0]?.delta;
|
|
131
161
|
if (!delta)
|
|
132
162
|
continue;
|
|
@@ -13,6 +13,7 @@ export declare class SmolOpenAiResponses extends BaseClient implements SmolClien
|
|
|
13
13
|
private convertMessages;
|
|
14
14
|
private buildRequest;
|
|
15
15
|
private calculateUsageAndCost;
|
|
16
|
+
private rethrowAsSmolError;
|
|
16
17
|
_textSync(config: PromptConfig): Promise<Result<PromptResult>>;
|
|
17
18
|
_textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
|
|
18
19
|
}
|
|
@@ -6,6 +6,7 @@ import { BaseClient } from "./baseClient.js";
|
|
|
6
6
|
import { zodToOpenAIResponsesTool } from "../util/tool.js";
|
|
7
7
|
import { sanitizeAttributes } from "../util.js";
|
|
8
8
|
import { Model } from "../model.js";
|
|
9
|
+
import { SmolContentPolicyError, SmolContextWindowExceededError, } from "../smolError.js";
|
|
9
10
|
export class SmolOpenAiResponses extends BaseClient {
|
|
10
11
|
client;
|
|
11
12
|
logger;
|
|
@@ -101,15 +102,32 @@ export class SmolOpenAiResponses extends BaseClient {
|
|
|
101
102
|
}
|
|
102
103
|
return { usage, cost };
|
|
103
104
|
}
|
|
105
|
+
rethrowAsSmolError(error) {
|
|
106
|
+
if (error instanceof OpenAI.APIError) {
|
|
107
|
+
if (error.code === "context_length_exceeded") {
|
|
108
|
+
throw new SmolContextWindowExceededError(error.message);
|
|
109
|
+
}
|
|
110
|
+
if (error.code === "content_policy_violation") {
|
|
111
|
+
throw new SmolContentPolicyError(error.message);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
throw error;
|
|
115
|
+
}
|
|
104
116
|
async _textSync(config) {
|
|
105
117
|
const request = this.buildRequest(config);
|
|
106
118
|
this.logger.debug("Sending request to OpenAI Responses API:", JSON.stringify(request, null, 2));
|
|
107
119
|
this.statelogClient?.promptRequest(request);
|
|
108
120
|
const signal = this.getAbortSignal(config);
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
121
|
+
let response;
|
|
122
|
+
try {
|
|
123
|
+
response = await this.client.responses.create({
|
|
124
|
+
...request,
|
|
125
|
+
stream: false,
|
|
126
|
+
}, { ...(signal && { signal }) });
|
|
127
|
+
}
|
|
128
|
+
catch (error) {
|
|
129
|
+
this.rethrowAsSmolError(error);
|
|
130
|
+
}
|
|
113
131
|
this.logger.debug("Response from OpenAI Responses API:", JSON.stringify(response, null, 2));
|
|
114
132
|
this.statelogClient?.promptResponse(response);
|
|
115
133
|
const output = response.output_text || null;
|
|
@@ -133,9 +151,15 @@ export class SmolOpenAiResponses extends BaseClient {
|
|
|
133
151
|
this.logger.debug("Sending streaming request to OpenAI Responses API:", JSON.stringify(request, null, 2));
|
|
134
152
|
this.statelogClient?.promptRequest(request);
|
|
135
153
|
const signal = this.getAbortSignal(config);
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
154
|
+
let stream;
|
|
155
|
+
try {
|
|
156
|
+
stream = this.client.responses.stream(request, {
|
|
157
|
+
...(signal && { signal }),
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
catch (error) {
|
|
161
|
+
this.rethrowAsSmolError(error);
|
|
162
|
+
}
|
|
139
163
|
let content = "";
|
|
140
164
|
const functionCalls = new Map();
|
|
141
165
|
let usage;
|
package/dist/index.d.ts
CHANGED
|
@@ -8,3 +8,5 @@ export * from "./classes/message/index.js";
|
|
|
8
8
|
export * from "./functions.js";
|
|
9
9
|
export * from "./classes/ToolCall.js";
|
|
10
10
|
export * from "./strategies/index.js";
|
|
11
|
+
export { latencyTracker } from "./latencyTracker.js";
|
|
12
|
+
export type { LatencySample } from "./latencyTracker.js";
|
package/dist/index.js
CHANGED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
export type LatencySample = {
|
|
2
|
+
/** Milliseconds per output token */
|
|
3
|
+
msPerToken: number;
|
|
4
|
+
/** Timestamp when sample was recorded */
|
|
5
|
+
timestamp: number;
|
|
6
|
+
};
|
|
7
|
+
declare class LatencyTracker {
|
|
8
|
+
private samples;
|
|
9
|
+
private windowSize;
|
|
10
|
+
constructor(windowSize?: number);
|
|
11
|
+
/** Record a latency sample for a model. */
|
|
12
|
+
record(model: string, elapsedMs: number, outputTokens: number): void;
|
|
13
|
+
/** Get the windowed mean ms-per-token for a model, or null if no samples. */
|
|
14
|
+
getMeanMsPerToken(model: string): number | null;
|
|
15
|
+
/**
|
|
16
|
+
* Get estimated output tokens per second for a model based on tracked latency.
|
|
17
|
+
* Returns null if no samples exist or if the number of samples is below the minimum required.
|
|
18
|
+
*/
|
|
19
|
+
getTokensPerSecond(model: string, minSamples?: number): number | null;
|
|
20
|
+
/** Get the number of samples recorded for a model. */
|
|
21
|
+
getSampleCount(model: string): number;
|
|
22
|
+
/** Get all samples for a model (defensive copy). */
|
|
23
|
+
getSamples(model: string): LatencySample[];
|
|
24
|
+
/** Clear all samples for a model. */
|
|
25
|
+
clear(model?: string): void;
|
|
26
|
+
/** Update the window size. Existing samples beyond the new size are trimmed. */
|
|
27
|
+
setWindowSize(size: number): void;
|
|
28
|
+
getWindowSize(): number;
|
|
29
|
+
}
|
|
30
|
+
/** Global singleton latency tracker. */
|
|
31
|
+
export declare const latencyTracker: LatencyTracker;
|
|
32
|
+
export {};
|