smoltalk 0.0.67 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -154
- package/dist/client.d.ts +3 -3
- package/dist/client.js +9 -5
- package/dist/clients/anthropic.d.ts +4 -4
- package/dist/clients/anthropic.js +1 -1
- package/dist/clients/baseClient.d.ts +17 -20
- package/dist/clients/baseClient.js +21 -43
- package/dist/clients/google.d.ts +4 -4
- package/dist/clients/google.js +1 -1
- package/dist/clients/ollama.d.ts +4 -4
- package/dist/clients/ollama.js +1 -1
- package/dist/clients/openai.d.ts +4 -4
- package/dist/clients/openai.js +2 -1
- package/dist/clients/openaiResponses.d.ts +4 -4
- package/dist/clients/openaiResponses.js +2 -1
- package/dist/functions.d.ts +13 -10
- package/dist/functions.js +4 -55
- package/dist/index.d.ts +2 -4
- package/dist/index.js +1 -2
- package/dist/model.d.ts +2 -5
- package/dist/model.js +11 -27
- package/dist/models.d.ts +2 -2
- package/dist/models.js +3 -1
- package/dist/testing/index.d.ts +9 -0
- package/dist/testing/index.js +41 -0
- package/dist/types.d.ts +52 -160
- package/dist/types.js +1 -1
- package/dist/util/logger.d.ts +17 -1
- package/dist/util/logger.js +68 -5
- package/package.json +15 -19
- package/dist/clients/llamaCpp.d.ts +0 -28
- package/dist/clients/llamaCpp.js +0 -316
- package/dist/latencyTracker.d.ts +0 -32
- package/dist/latencyTracker.js +0 -73
- package/dist/middleware.d.ts +0 -54
- package/dist/middleware.js +0 -321
- package/dist/strategies/baseStrategy.d.ts +0 -22
- package/dist/strategies/baseStrategy.js +0 -62
- package/dist/strategies/fallbackStrategy.d.ts +0 -14
- package/dist/strategies/fallbackStrategy.js +0 -122
- package/dist/strategies/fastestStrategy.d.ts +0 -19
- package/dist/strategies/fastestStrategy.js +0 -108
- package/dist/strategies/idStrategy.d.ts +0 -16
- package/dist/strategies/idStrategy.js +0 -62
- package/dist/strategies/index.d.ts +0 -17
- package/dist/strategies/index.js +0 -68
- package/dist/strategies/raceStrategy.d.ts +0 -12
- package/dist/strategies/raceStrategy.js +0 -72
- package/dist/strategies/randomStrategy.d.ts +0 -13
- package/dist/strategies/randomStrategy.js +0 -54
- package/dist/strategies/timeoutStrategy.d.ts +0 -13
- package/dist/strategies/timeoutStrategy.js +0 -65
- package/dist/strategies/types.d.ts +0 -78
- package/dist/strategies/types.js +0 -58
package/dist/middleware.js
DELETED
|
@@ -1,321 +0,0 @@
|
|
|
1
|
-
import { success } from "./types.js";
|
|
2
|
-
import { addTokenUsage } from "./types/tokenUsage.js";
|
|
3
|
-
import { addCosts } from "./types/costEstimate.js";
|
|
4
|
-
/**
|
|
5
|
-
* Run a single middleware check. Returns a MiddlewareResult indicating
|
|
6
|
-
* whether the check blocked and what output to use.
|
|
7
|
-
*/
|
|
8
|
-
export async function runMiddlewareCheck(check, parentConfig, textSyncFn) {
|
|
9
|
-
const middlewareConfig = {
|
|
10
|
-
...parentConfig,
|
|
11
|
-
messages: [...check.messages, ...parentConfig.messages],
|
|
12
|
-
responseFormat: check.responseFormat,
|
|
13
|
-
responseFormatOptions: check.responseFormatOptions,
|
|
14
|
-
middleware: undefined,
|
|
15
|
-
stream: undefined,
|
|
16
|
-
};
|
|
17
|
-
let llmResult;
|
|
18
|
-
try {
|
|
19
|
-
llmResult = await textSyncFn(middlewareConfig);
|
|
20
|
-
}
|
|
21
|
-
catch (err) {
|
|
22
|
-
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
23
|
-
return {
|
|
24
|
-
blocked: true,
|
|
25
|
-
result: success({
|
|
26
|
-
output: `Middleware check failed: ${errorMsg}`,
|
|
27
|
-
toolCalls: [],
|
|
28
|
-
}),
|
|
29
|
-
};
|
|
30
|
-
}
|
|
31
|
-
if (!llmResult.success) {
|
|
32
|
-
return {
|
|
33
|
-
blocked: true,
|
|
34
|
-
result: success({
|
|
35
|
-
output: `Middleware check failed: ${llmResult.error}`,
|
|
36
|
-
toolCalls: [],
|
|
37
|
-
}),
|
|
38
|
-
usage: undefined,
|
|
39
|
-
cost: undefined,
|
|
40
|
-
};
|
|
41
|
-
}
|
|
42
|
-
const middlewareUsage = llmResult.value.usage;
|
|
43
|
-
const middlewareCost = llmResult.value.cost;
|
|
44
|
-
let decision;
|
|
45
|
-
try {
|
|
46
|
-
decision = check.decide(llmResult.value);
|
|
47
|
-
}
|
|
48
|
-
catch (err) {
|
|
49
|
-
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
50
|
-
return {
|
|
51
|
-
blocked: true,
|
|
52
|
-
result: success({
|
|
53
|
-
output: `Middleware decide() failed: ${errorMsg}`,
|
|
54
|
-
toolCalls: [],
|
|
55
|
-
usage: middlewareUsage,
|
|
56
|
-
cost: middlewareCost,
|
|
57
|
-
}),
|
|
58
|
-
usage: middlewareUsage,
|
|
59
|
-
cost: middlewareCost,
|
|
60
|
-
};
|
|
61
|
-
}
|
|
62
|
-
if (decision !== null && decision !== undefined) {
|
|
63
|
-
return {
|
|
64
|
-
blocked: true,
|
|
65
|
-
result: success({
|
|
66
|
-
output: decision,
|
|
67
|
-
toolCalls: [],
|
|
68
|
-
usage: middlewareUsage,
|
|
69
|
-
cost: middlewareCost,
|
|
70
|
-
}),
|
|
71
|
-
usage: middlewareUsage,
|
|
72
|
-
cost: middlewareCost,
|
|
73
|
-
};
|
|
74
|
-
}
|
|
75
|
-
return {
|
|
76
|
-
blocked: false,
|
|
77
|
-
result: llmResult,
|
|
78
|
-
usage: middlewareUsage,
|
|
79
|
-
cost: middlewareCost,
|
|
80
|
-
};
|
|
81
|
-
}
|
|
82
|
-
/**
|
|
83
|
-
* Run multiple middleware checks in sequential or parallel mode.
|
|
84
|
-
* Returns a combined MiddlewareResult.
|
|
85
|
-
*/
|
|
86
|
-
export async function runMiddlewareChecks(checks, mode, parentConfig, textSyncFn) {
|
|
87
|
-
if (mode === "sequential") {
|
|
88
|
-
return runSequential(checks, parentConfig, textSyncFn);
|
|
89
|
-
}
|
|
90
|
-
else {
|
|
91
|
-
return runParallel(checks, parentConfig, textSyncFn);
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
async function runSequential(checks, parentConfig, textSyncFn) {
|
|
95
|
-
let aggregatedUsage;
|
|
96
|
-
let aggregatedCost;
|
|
97
|
-
for (const check of checks) {
|
|
98
|
-
const checkResult = await runMiddlewareCheck(check, parentConfig, textSyncFn);
|
|
99
|
-
aggregatedUsage = addTokenUsage(aggregatedUsage, checkResult.usage);
|
|
100
|
-
aggregatedCost = safeAddCosts(aggregatedCost, checkResult.cost);
|
|
101
|
-
if (checkResult.blocked) {
|
|
102
|
-
if (checkResult.result.success) {
|
|
103
|
-
checkResult.result.value.usage = aggregatedUsage;
|
|
104
|
-
checkResult.result.value.cost = aggregatedCost;
|
|
105
|
-
}
|
|
106
|
-
return { ...checkResult, usage: aggregatedUsage, cost: aggregatedCost };
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
// When all checks pass, result is a placeholder — callers check `blocked` first
|
|
110
|
-
return {
|
|
111
|
-
blocked: false,
|
|
112
|
-
result: success({ output: null, toolCalls: [] }),
|
|
113
|
-
usage: aggregatedUsage,
|
|
114
|
-
cost: aggregatedCost,
|
|
115
|
-
};
|
|
116
|
-
}
|
|
117
|
-
async function runParallel(checks, parentConfig, textSyncFn) {
|
|
118
|
-
const results = await Promise.all(checks.map((check) => runMiddlewareCheck(check, parentConfig, textSyncFn)));
|
|
119
|
-
let aggregatedUsage;
|
|
120
|
-
let aggregatedCost;
|
|
121
|
-
for (const r of results) {
|
|
122
|
-
aggregatedUsage = addTokenUsage(aggregatedUsage, r.usage);
|
|
123
|
-
aggregatedCost = safeAddCosts(aggregatedCost, r.cost);
|
|
124
|
-
}
|
|
125
|
-
const firstBlocked = results.find((r) => r.blocked);
|
|
126
|
-
if (firstBlocked) {
|
|
127
|
-
if (firstBlocked.result.success) {
|
|
128
|
-
firstBlocked.result.value.usage = aggregatedUsage;
|
|
129
|
-
firstBlocked.result.value.cost = aggregatedCost;
|
|
130
|
-
}
|
|
131
|
-
return { ...firstBlocked, usage: aggregatedUsage, cost: aggregatedCost };
|
|
132
|
-
}
|
|
133
|
-
// When all checks pass, result is a placeholder — callers check `blocked` first
|
|
134
|
-
return {
|
|
135
|
-
blocked: false,
|
|
136
|
-
result: success({ output: null, toolCalls: [] }),
|
|
137
|
-
usage: aggregatedUsage,
|
|
138
|
-
cost: aggregatedCost,
|
|
139
|
-
};
|
|
140
|
-
}
|
|
141
|
-
/**
|
|
142
|
-
* Wrapper around addCosts that handles currency mismatch gracefully.
|
|
143
|
-
* If currencies differ, returns the first non-undefined cost (best effort).
|
|
144
|
-
*/
|
|
145
|
-
function safeAddCosts(a, b) {
|
|
146
|
-
try {
|
|
147
|
-
return addCosts(a, b);
|
|
148
|
-
}
|
|
149
|
-
catch {
|
|
150
|
-
// addCosts throws on currency mismatch — return whichever is available
|
|
151
|
-
return a ?? b;
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
function stripMiddleware(config) {
|
|
155
|
-
const { middleware, ...rest } = config;
|
|
156
|
-
return rest;
|
|
157
|
-
}
|
|
158
|
-
/**
|
|
159
|
-
* High-level middleware orchestration for sync calls.
|
|
160
|
-
* Returns the blocked result if middleware blocks, the main prompt result for parallel timing,
|
|
161
|
-
* or null to indicate "proceed normally" (no middleware or middleware passed with "before" timing).
|
|
162
|
-
*/
|
|
163
|
-
export async function executeMiddlewareSync(config, runMainPrompt, textSyncFn) {
|
|
164
|
-
const middleware = config.middleware;
|
|
165
|
-
if (!middleware || middleware.checks.length === 0)
|
|
166
|
-
return null;
|
|
167
|
-
const configWithoutMiddleware = stripMiddleware(config);
|
|
168
|
-
if (middleware.timing === "before") {
|
|
169
|
-
const middlewareResult = await runMiddlewareChecks(middleware.checks, middleware.mode, configWithoutMiddleware, textSyncFn);
|
|
170
|
-
return middlewareResult.blocked ? middlewareResult.result : null;
|
|
171
|
-
}
|
|
172
|
-
if (middleware.timing === "parallel") {
|
|
173
|
-
const mainAbort = new AbortController();
|
|
174
|
-
const middlewareAbort = new AbortController();
|
|
175
|
-
const parentAbortSignal = configWithoutMiddleware.abortSignal;
|
|
176
|
-
const parentAbortHandler = parentAbortSignal
|
|
177
|
-
? () => { mainAbort.abort(); middlewareAbort.abort(); }
|
|
178
|
-
: undefined;
|
|
179
|
-
if (parentAbortSignal && parentAbortHandler) {
|
|
180
|
-
parentAbortSignal.addEventListener("abort", parentAbortHandler, { once: true });
|
|
181
|
-
}
|
|
182
|
-
try {
|
|
183
|
-
const mainPromise = runMainPrompt({
|
|
184
|
-
...configWithoutMiddleware,
|
|
185
|
-
abortSignal: mainAbort.signal,
|
|
186
|
-
});
|
|
187
|
-
const middlewareResult = await runMiddlewareChecks(middleware.checks, middleware.mode, { ...configWithoutMiddleware, abortSignal: middlewareAbort.signal }, textSyncFn);
|
|
188
|
-
if (middlewareResult.blocked) {
|
|
189
|
-
mainAbort.abort();
|
|
190
|
-
// Await the aborted main promise to capture any partial usage/cost
|
|
191
|
-
const mainPartialResult = await mainPromise.catch(() => undefined);
|
|
192
|
-
if (mainPartialResult?.success && middlewareResult.result.success) {
|
|
193
|
-
const mainUsage = mainPartialResult.value.usage;
|
|
194
|
-
const mainCost = mainPartialResult.value.cost;
|
|
195
|
-
middlewareResult.result.value.usage = addTokenUsage(middlewareResult.result.value.usage, mainUsage);
|
|
196
|
-
middlewareResult.result.value.cost = safeAddCosts(middlewareResult.result.value.cost, mainCost);
|
|
197
|
-
}
|
|
198
|
-
return middlewareResult.result;
|
|
199
|
-
}
|
|
200
|
-
return await mainPromise;
|
|
201
|
-
}
|
|
202
|
-
finally {
|
|
203
|
-
if (parentAbortSignal && parentAbortHandler) {
|
|
204
|
-
parentAbortSignal.removeEventListener("abort", parentAbortHandler);
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
}
|
|
208
|
-
return null;
|
|
209
|
-
}
|
|
210
|
-
/**
|
|
211
|
-
* High-level middleware orchestration for streaming calls.
|
|
212
|
-
* Yields stream chunks, handling middleware checks according to timing config.
|
|
213
|
-
* Only call this when middleware is configured — the caller should check first.
|
|
214
|
-
*/
|
|
215
|
-
export async function* executeMiddlewareStream(config, getStream, textSyncFn) {
|
|
216
|
-
const middleware = config.middleware;
|
|
217
|
-
const configWithoutMiddleware = stripMiddleware(config);
|
|
218
|
-
if (middleware.timing === "before") {
|
|
219
|
-
const middlewareResult = await runMiddlewareChecks(middleware.checks, middleware.mode, configWithoutMiddleware, textSyncFn);
|
|
220
|
-
if (middlewareResult.blocked) {
|
|
221
|
-
if (middlewareResult.result.success) {
|
|
222
|
-
yield { type: "done", result: middlewareResult.result.value };
|
|
223
|
-
}
|
|
224
|
-
else {
|
|
225
|
-
yield { type: "error", error: middlewareResult.result.error };
|
|
226
|
-
}
|
|
227
|
-
return;
|
|
228
|
-
}
|
|
229
|
-
yield* getStream(configWithoutMiddleware);
|
|
230
|
-
return;
|
|
231
|
-
}
|
|
232
|
-
if (middleware.timing === "parallel") {
|
|
233
|
-
const mainAbort = new AbortController();
|
|
234
|
-
const middlewareAbort = new AbortController();
|
|
235
|
-
const parentAbortSignal = configWithoutMiddleware.abortSignal;
|
|
236
|
-
const parentAbortHandler = parentAbortSignal
|
|
237
|
-
? () => { mainAbort.abort(); middlewareAbort.abort(); }
|
|
238
|
-
: undefined;
|
|
239
|
-
if (parentAbortSignal && parentAbortHandler) {
|
|
240
|
-
parentAbortSignal.addEventListener("abort", parentAbortHandler, { once: true });
|
|
241
|
-
}
|
|
242
|
-
try {
|
|
243
|
-
const stream = getStream({
|
|
244
|
-
...configWithoutMiddleware,
|
|
245
|
-
abortSignal: mainAbort.signal,
|
|
246
|
-
});
|
|
247
|
-
const middlewarePromise = runMiddlewareChecks(middleware.checks, middleware.mode, { ...configWithoutMiddleware, abortSignal: middlewareAbort.signal }, textSyncFn);
|
|
248
|
-
const buffer = [];
|
|
249
|
-
let streamDone = false;
|
|
250
|
-
let middlewareSettled = false;
|
|
251
|
-
let middlewareResult;
|
|
252
|
-
const middlewareFinished = middlewarePromise.then((r) => {
|
|
253
|
-
middlewareSettled = true;
|
|
254
|
-
middlewareResult = r;
|
|
255
|
-
return r;
|
|
256
|
-
});
|
|
257
|
-
const iterator = stream[Symbol.asyncIterator]();
|
|
258
|
-
while (true) {
|
|
259
|
-
// Race the next chunk against middleware completion so we can
|
|
260
|
-
// abort the main stream promptly when middleware blocks.
|
|
261
|
-
const next = iterator.next();
|
|
262
|
-
const raceResult = await Promise.race([
|
|
263
|
-
next.then((v) => ({ source: "stream", ...v })),
|
|
264
|
-
middlewareFinished.then(() => ({ source: "middleware", done: false, value: undefined })),
|
|
265
|
-
]);
|
|
266
|
-
if (raceResult.source === "middleware") {
|
|
267
|
-
// Middleware settled before the next chunk arrived.
|
|
268
|
-
// The stream iterator is still pending — we'll handle it below.
|
|
269
|
-
break;
|
|
270
|
-
}
|
|
271
|
-
if (raceResult.done) {
|
|
272
|
-
streamDone = true;
|
|
273
|
-
break;
|
|
274
|
-
}
|
|
275
|
-
const chunk = raceResult.value;
|
|
276
|
-
buffer.push(chunk);
|
|
277
|
-
if (chunk.type === "done" || chunk.type === "error") {
|
|
278
|
-
streamDone = true;
|
|
279
|
-
}
|
|
280
|
-
if (middlewareSettled)
|
|
281
|
-
break;
|
|
282
|
-
}
|
|
283
|
-
if (!middlewareSettled) {
|
|
284
|
-
middlewareResult = await middlewareFinished;
|
|
285
|
-
}
|
|
286
|
-
if (middlewareResult.blocked) {
|
|
287
|
-
mainAbort.abort();
|
|
288
|
-
// Check buffer for a done chunk that may contain partial usage/cost
|
|
289
|
-
const doneChunk = buffer.find((c) => c.type === "done");
|
|
290
|
-
if (doneChunk && middlewareResult.result.success) {
|
|
291
|
-
middlewareResult.result.value.usage = addTokenUsage(middlewareResult.result.value.usage, doneChunk.result.usage);
|
|
292
|
-
middlewareResult.result.value.cost = safeAddCosts(middlewareResult.result.value.cost, doneChunk.result.cost);
|
|
293
|
-
}
|
|
294
|
-
if (middlewareResult.result.success) {
|
|
295
|
-
yield { type: "done", result: middlewareResult.result.value };
|
|
296
|
-
}
|
|
297
|
-
else {
|
|
298
|
-
yield { type: "error", error: middlewareResult.result.error };
|
|
299
|
-
}
|
|
300
|
-
return;
|
|
301
|
-
}
|
|
302
|
-
for (const chunk of buffer) {
|
|
303
|
-
yield chunk;
|
|
304
|
-
}
|
|
305
|
-
if (!streamDone) {
|
|
306
|
-
while (true) {
|
|
307
|
-
const { value: chunk, done } = await iterator.next();
|
|
308
|
-
if (done)
|
|
309
|
-
break;
|
|
310
|
-
yield chunk;
|
|
311
|
-
}
|
|
312
|
-
}
|
|
313
|
-
return;
|
|
314
|
-
}
|
|
315
|
-
finally {
|
|
316
|
-
if (parentAbortSignal && parentAbortHandler) {
|
|
317
|
-
parentAbortSignal.removeEventListener("abort", parentAbortHandler);
|
|
318
|
-
}
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
}
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import { StatelogClient } from "../statelogClient.js";
|
|
2
|
-
import { PromptResult, Result, SmolPromptConfig, StreamChunk } from "../types.js";
|
|
3
|
-
import { Strategy, StrategyJSON } from "./types.js";
|
|
4
|
-
export declare class BaseStrategy implements Strategy {
|
|
5
|
-
statelogClient?: StatelogClient;
|
|
6
|
-
text(config: Omit<SmolPromptConfig, "stream">): Promise<Result<PromptResult>>;
|
|
7
|
-
text(config: Omit<SmolPromptConfig, "stream"> & {
|
|
8
|
-
stream: false;
|
|
9
|
-
}): Promise<Result<PromptResult>>;
|
|
10
|
-
text(config: Omit<SmolPromptConfig, "stream"> & {
|
|
11
|
-
stream: true;
|
|
12
|
-
}): AsyncGenerator<StreamChunk>;
|
|
13
|
-
text(config: SmolPromptConfig): Promise<Result<PromptResult>> | AsyncGenerator<StreamChunk>;
|
|
14
|
-
textSync(config: SmolPromptConfig): Promise<Result<PromptResult>>;
|
|
15
|
-
textStream(config: SmolPromptConfig): AsyncGenerator<StreamChunk>;
|
|
16
|
-
_text(config: SmolPromptConfig): Promise<Result<PromptResult>>;
|
|
17
|
-
_textSync(config: SmolPromptConfig): Promise<Result<PromptResult>>;
|
|
18
|
-
_textStream(config: SmolPromptConfig): AsyncGenerator<StreamChunk>;
|
|
19
|
-
toJSON(): StrategyJSON;
|
|
20
|
-
toString(): string;
|
|
21
|
-
toShortString(): string;
|
|
22
|
-
}
|
|
@@ -1,62 +0,0 @@
|
|
|
1
|
-
import { getStatelogClient } from "../statelogClient.js";
|
|
2
|
-
export class BaseStrategy {
|
|
3
|
-
statelogClient;
|
|
4
|
-
text(config) {
|
|
5
|
-
this.statelogClient = config.statelog
|
|
6
|
-
? getStatelogClient(config.statelog)
|
|
7
|
-
: undefined;
|
|
8
|
-
this.statelogClient?.debug(`Starting strategy ${this.toString()}`);
|
|
9
|
-
if (config.hooks?.onStrategyStart) {
|
|
10
|
-
this.statelogClient?.debug(`Calling onStrategyStart hook for strategy ${this.toString()}`);
|
|
11
|
-
config.hooks.onStrategyStart(this, config);
|
|
12
|
-
}
|
|
13
|
-
if (config.stream) {
|
|
14
|
-
return this.textStream(config);
|
|
15
|
-
}
|
|
16
|
-
return this._text(config);
|
|
17
|
-
}
|
|
18
|
-
async textSync(config) {
|
|
19
|
-
this.statelogClient = config.statelog
|
|
20
|
-
? getStatelogClient(config.statelog)
|
|
21
|
-
: undefined;
|
|
22
|
-
this.statelogClient?.debug(`Starting strategy (sync) ${this.toString()}`);
|
|
23
|
-
return this._textSync(config);
|
|
24
|
-
}
|
|
25
|
-
textStream(config) {
|
|
26
|
-
this.statelogClient = config.statelog
|
|
27
|
-
? getStatelogClient(config.statelog)
|
|
28
|
-
: undefined;
|
|
29
|
-
this.statelogClient?.debug(`Starting strategy (stream) ${this.toString()}`);
|
|
30
|
-
return this._textStream(config);
|
|
31
|
-
}
|
|
32
|
-
async _text(config) {
|
|
33
|
-
throw new Error("_text method not implemented.");
|
|
34
|
-
}
|
|
35
|
-
async _textSync(config) {
|
|
36
|
-
throw new Error("_textSync method not implemented.");
|
|
37
|
-
}
|
|
38
|
-
async *_textStream(config) {
|
|
39
|
-
const result = await this._textSync(config);
|
|
40
|
-
if (result.success) {
|
|
41
|
-
if (result.value.output) {
|
|
42
|
-
yield { type: "text", text: result.value.output };
|
|
43
|
-
}
|
|
44
|
-
for (const tc of result.value.toolCalls) {
|
|
45
|
-
yield { type: "tool_call", toolCall: tc };
|
|
46
|
-
}
|
|
47
|
-
yield { type: "done", result: result.value };
|
|
48
|
-
}
|
|
49
|
-
else {
|
|
50
|
-
yield { type: "error", error: result.error };
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
toJSON() {
|
|
54
|
-
throw new Error("toJSON method not implemented.");
|
|
55
|
-
}
|
|
56
|
-
toString() {
|
|
57
|
-
return "BaseStrategy";
|
|
58
|
-
}
|
|
59
|
-
toShortString() {
|
|
60
|
-
return this.toString();
|
|
61
|
-
}
|
|
62
|
-
}
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
import { ModelParam, PromptResult, Result, SmolPromptConfig } from "../types.js";
|
|
2
|
-
import { BaseStrategy } from "./baseStrategy.js";
|
|
3
|
-
import { FallbackStrategyConfig, Strategy, StrategyJSON } from "./types.js";
|
|
4
|
-
export declare class FallbackStrategy extends BaseStrategy {
|
|
5
|
-
primaryStrategy: Strategy;
|
|
6
|
-
config: FallbackStrategyConfig;
|
|
7
|
-
constructor(primaryStrategy: ModelParam, config: FallbackStrategyConfig);
|
|
8
|
-
toString(): string;
|
|
9
|
-
toShortString(): string;
|
|
10
|
-
_text(config: SmolPromptConfig): Promise<Result<PromptResult>>;
|
|
11
|
-
_textWithFallbacks(config: SmolPromptConfig, strategy: Strategy, fallbackStrategies: FallbackStrategyConfig): Promise<Result<PromptResult>>;
|
|
12
|
-
toJSON(): StrategyJSON;
|
|
13
|
-
static fromJSON(json: unknown): FallbackStrategy;
|
|
14
|
-
}
|
|
@@ -1,122 +0,0 @@
|
|
|
1
|
-
import { z } from "zod";
|
|
2
|
-
import { SmolContentPolicyError, SmolContextWindowExceededError, SmolStructuredOutputError, SmolTimeoutError, } from "../smolError.js";
|
|
3
|
-
import { success, } from "../types.js";
|
|
4
|
-
import { BaseStrategy } from "./baseStrategy.js";
|
|
5
|
-
import { IDStrategy } from "./idStrategy.js";
|
|
6
|
-
import { fromJSON } from "./index.js";
|
|
7
|
-
import { FallbackStrategyJSONSchema, } from "./types.js";
|
|
8
|
-
export class FallbackStrategy extends BaseStrategy {
|
|
9
|
-
primaryStrategy;
|
|
10
|
-
config;
|
|
11
|
-
constructor(primaryStrategy, config) {
|
|
12
|
-
super();
|
|
13
|
-
this.primaryStrategy =
|
|
14
|
-
primaryStrategy instanceof BaseStrategy
|
|
15
|
-
? primaryStrategy
|
|
16
|
-
: new IDStrategy(primaryStrategy);
|
|
17
|
-
this.config = config;
|
|
18
|
-
}
|
|
19
|
-
toString() {
|
|
20
|
-
return `FallbackStrategy([${this.primaryStrategy.toString()}], config: ${JSON.stringify(this.config)})`;
|
|
21
|
-
}
|
|
22
|
-
toShortString() {
|
|
23
|
-
return `fallback([${this.primaryStrategy.toString()}])`;
|
|
24
|
-
}
|
|
25
|
-
async _text(config) {
|
|
26
|
-
return this._textWithFallbacks(config, this.primaryStrategy, this.config);
|
|
27
|
-
}
|
|
28
|
-
async _textWithFallbacks(config, strategy, fallbackStrategies) {
|
|
29
|
-
try {
|
|
30
|
-
const result = await strategy.text(config);
|
|
31
|
-
return result;
|
|
32
|
-
}
|
|
33
|
-
catch (error) {
|
|
34
|
-
// If the abort signal was triggered (e.g. by a race strategy winner
|
|
35
|
-
// or external cancellation), stop without trying further fallbacks.
|
|
36
|
-
if (config.abortSignal?.aborted) {
|
|
37
|
-
return success({ output: null, toolCalls: [] });
|
|
38
|
-
}
|
|
39
|
-
if (error instanceof SmolTimeoutError) {
|
|
40
|
-
if (fallbackStrategies.timeout &&
|
|
41
|
-
fallbackStrategies.timeout.length > 0) {
|
|
42
|
-
this.statelogClient?.debug("FallbackStrategy: falling back due to timeout", {
|
|
43
|
-
failedStrategy: strategy.toString(),
|
|
44
|
-
});
|
|
45
|
-
return this._textWithFallbacks(config, fromJSON(fallbackStrategies.timeout[0]),
|
|
46
|
-
// from here on, only consider the remaining fallbacks for this specific reason
|
|
47
|
-
{ timeout: fallbackStrategies.timeout.slice(1) });
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
else if (error instanceof SmolStructuredOutputError) {
|
|
51
|
-
if (fallbackStrategies.structuredOutputFailure &&
|
|
52
|
-
fallbackStrategies.structuredOutputFailure.length > 0) {
|
|
53
|
-
this.statelogClient?.debug("FallbackStrategy: falling back due to structured output failure", {
|
|
54
|
-
failedStrategy: strategy.toString(),
|
|
55
|
-
});
|
|
56
|
-
return this._textWithFallbacks(config, fromJSON(fallbackStrategies.structuredOutputFailure[0]),
|
|
57
|
-
// from here on, only consider the remaining fallbacks for this specific reason
|
|
58
|
-
{
|
|
59
|
-
structuredOutputFailure: fallbackStrategies.structuredOutputFailure.slice(1),
|
|
60
|
-
});
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
else if (error instanceof SmolContentPolicyError) {
|
|
64
|
-
if (fallbackStrategies.contentPolicyViolation &&
|
|
65
|
-
fallbackStrategies.contentPolicyViolation.length > 0) {
|
|
66
|
-
this.statelogClient?.debug("FallbackStrategy: falling back due to content policy violation", {
|
|
67
|
-
failedStrategy: strategy.toString(),
|
|
68
|
-
});
|
|
69
|
-
return this._textWithFallbacks(config, fromJSON(fallbackStrategies.contentPolicyViolation[0]), {
|
|
70
|
-
contentPolicyViolation: fallbackStrategies.contentPolicyViolation.slice(1),
|
|
71
|
-
});
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
else if (error instanceof SmolContextWindowExceededError) {
|
|
75
|
-
if (fallbackStrategies.contextWindowExceeded &&
|
|
76
|
-
fallbackStrategies.contextWindowExceeded.length > 0) {
|
|
77
|
-
this.statelogClient?.debug("FallbackStrategy: falling back due to context window exceeded", {
|
|
78
|
-
failedStrategy: strategy.toString(),
|
|
79
|
-
});
|
|
80
|
-
return this._textWithFallbacks(config, fromJSON(fallbackStrategies.contextWindowExceeded[0]), {
|
|
81
|
-
contextWindowExceeded: fallbackStrategies.contextWindowExceeded.slice(1),
|
|
82
|
-
});
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
if (fallbackStrategies.error && fallbackStrategies.error.length > 0) {
|
|
86
|
-
this.statelogClient?.debug("FallbackStrategy: falling back due to error", {
|
|
87
|
-
failedStrategy: strategy.toString(),
|
|
88
|
-
error: error.message,
|
|
89
|
-
});
|
|
90
|
-
return this._textWithFallbacks(config, fromJSON(fallbackStrategies.error[0]),
|
|
91
|
-
// from here on, only consider the remaining fallbacks for this specific reason
|
|
92
|
-
{ error: fallbackStrategies.error.slice(1) });
|
|
93
|
-
}
|
|
94
|
-
this.statelogClient?.debug("All strategies in FallbackStrategy failed", {
|
|
95
|
-
fallbackStrategy: this.toJSON(),
|
|
96
|
-
strategy,
|
|
97
|
-
fallbackStrategies,
|
|
98
|
-
});
|
|
99
|
-
throw error;
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
toJSON() {
|
|
103
|
-
return {
|
|
104
|
-
type: "fallback",
|
|
105
|
-
params: {
|
|
106
|
-
primaryStrategy: this.primaryStrategy.toJSON(),
|
|
107
|
-
config: this.config,
|
|
108
|
-
},
|
|
109
|
-
};
|
|
110
|
-
}
|
|
111
|
-
static fromJSON(json) {
|
|
112
|
-
const result = FallbackStrategyJSONSchema.safeParse(json);
|
|
113
|
-
if (!result.success) {
|
|
114
|
-
console.error("Failed to parse FallbackStrategy");
|
|
115
|
-
console.error(JSON.stringify(json, null, 2));
|
|
116
|
-
console.error(z.prettifyError(result.error));
|
|
117
|
-
throw new Error("Failed to parse FallbackStrategy");
|
|
118
|
-
}
|
|
119
|
-
const primaryStrategy = fromJSON(result.data.params.primaryStrategy);
|
|
120
|
-
return new FallbackStrategy(primaryStrategy, result.data.params.config);
|
|
121
|
-
}
|
|
122
|
-
}
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
import { Model } from "../model.js";
|
|
2
|
-
import { PromptResult, Result, SmolPromptConfig, StreamChunk } from "../types.js";
|
|
3
|
-
import { BaseStrategy } from "./baseStrategy.js";
|
|
4
|
-
import { StrategyJSON } from "./types.js";
|
|
5
|
-
export declare class FastestStrategy extends BaseStrategy {
|
|
6
|
-
models: (string | Model)[];
|
|
7
|
-
epsilon: number;
|
|
8
|
-
constructor(models: (string | Model)[], epsilon?: number);
|
|
9
|
-
toString(): string;
|
|
10
|
-
toShortString(): string;
|
|
11
|
-
private chooseModel;
|
|
12
|
-
_text(config: SmolPromptConfig): Promise<Result<PromptResult>>;
|
|
13
|
-
_textStream(config: SmolPromptConfig): AsyncGenerator<StreamChunk>;
|
|
14
|
-
private pickFastest;
|
|
15
|
-
/** Get tokens/sec for a model: tracked latency first, then static estimate, then 0. */
|
|
16
|
-
private getSpeed;
|
|
17
|
-
toJSON(): StrategyJSON;
|
|
18
|
-
static fromJSON(json: unknown): FastestStrategy;
|
|
19
|
-
}
|
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
import { z } from "zod";
|
|
2
|
-
import { latencyTracker } from "../latencyTracker.js";
|
|
3
|
-
import { getLogger } from "../util/logger.js";
|
|
4
|
-
import { Model } from "../model.js";
|
|
5
|
-
import { BaseStrategy } from "./baseStrategy.js";
|
|
6
|
-
import { IDStrategy } from "./idStrategy.js";
|
|
7
|
-
import { FastestStrategyJSONSchema, } from "./types.js";
|
|
8
|
-
// what percentage of the time to explore (pick a random model instead of the fastest) - this prevents us from getting stuck on a model that was fast in the past but has since become slow
|
|
9
|
-
const DEFAULT_EPSILON = 0.1;
|
|
10
|
-
export class FastestStrategy extends BaseStrategy {
|
|
11
|
-
models;
|
|
12
|
-
epsilon;
|
|
13
|
-
constructor(models, epsilon = DEFAULT_EPSILON) {
|
|
14
|
-
super();
|
|
15
|
-
this.models = models;
|
|
16
|
-
this.epsilon = epsilon;
|
|
17
|
-
}
|
|
18
|
-
toString() {
|
|
19
|
-
return `FastestStrategy([${this.models.map((s) => s.toString()).join(", ")}])`;
|
|
20
|
-
}
|
|
21
|
-
toShortString() {
|
|
22
|
-
return `fastest([${this.models.map((s) => s.toString()).join(", ")}])`;
|
|
23
|
-
}
|
|
24
|
-
chooseModel(config) {
|
|
25
|
-
const resolved = this.models.map((model) => Model.create(model));
|
|
26
|
-
const logger = getLogger(config.logLevel);
|
|
27
|
-
if (Math.random() < this.epsilon) {
|
|
28
|
-
// Explore: pick a random model
|
|
29
|
-
const chosen = resolved[Math.floor(Math.random() * resolved.length)];
|
|
30
|
-
logger.debug("fastest strategy - exploring random model", {
|
|
31
|
-
model: chosen.getResolvedModel(),
|
|
32
|
-
});
|
|
33
|
-
this.statelogClient?.debug("fastest strategy - picking random model", {
|
|
34
|
-
model: chosen.getResolvedModel(),
|
|
35
|
-
});
|
|
36
|
-
return chosen;
|
|
37
|
-
}
|
|
38
|
-
// Exploit: pick the fastest model by tracked latency
|
|
39
|
-
const fastest = this.pickFastest(resolved);
|
|
40
|
-
if (fastest) {
|
|
41
|
-
logger.debug("fastest strategy - exploiting fastest model", {
|
|
42
|
-
model: fastest.getResolvedModel(),
|
|
43
|
-
});
|
|
44
|
-
this.statelogClient?.debug("fastest strategy - using fastest model", {
|
|
45
|
-
model: fastest.getResolvedModel(),
|
|
46
|
-
});
|
|
47
|
-
return fastest;
|
|
48
|
-
}
|
|
49
|
-
// we don't have latency data for any model, so just pick randomly
|
|
50
|
-
const chosen = resolved[Math.floor(Math.random() * resolved.length)];
|
|
51
|
-
logger.debug("fastest strategy - no latency data, picking random model", {
|
|
52
|
-
models: resolved.map((m) => m.getResolvedModel()),
|
|
53
|
-
chosen: chosen.getResolvedModel(),
|
|
54
|
-
});
|
|
55
|
-
this.statelogClient?.debug("fastest strategy - no latency data, picking random model", {
|
|
56
|
-
models: resolved.map((m) => m.getResolvedModel()),
|
|
57
|
-
chosen,
|
|
58
|
-
});
|
|
59
|
-
return chosen;
|
|
60
|
-
}
|
|
61
|
-
async _text(config) {
|
|
62
|
-
const chosen = this.chooseModel(config);
|
|
63
|
-
const strategy = new IDStrategy(chosen);
|
|
64
|
-
return strategy.text(config);
|
|
65
|
-
}
|
|
66
|
-
async *_textStream(config) {
|
|
67
|
-
const chosen = this.chooseModel(config);
|
|
68
|
-
const strategy = new IDStrategy(chosen);
|
|
69
|
-
yield* strategy.textStream(config);
|
|
70
|
-
}
|
|
71
|
-
pickFastest(models) {
|
|
72
|
-
let best = null;
|
|
73
|
-
let bestSpeed = 0;
|
|
74
|
-
for (let model of models) {
|
|
75
|
-
const speed = this.getSpeed(model);
|
|
76
|
-
if (speed && speed > bestSpeed) {
|
|
77
|
-
bestSpeed = speed;
|
|
78
|
-
best = model;
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
return best;
|
|
82
|
-
}
|
|
83
|
-
/** Get tokens/sec for a model: tracked latency first, then static estimate, then 0. */
|
|
84
|
-
getSpeed(model) {
|
|
85
|
-
const MIN_SAMPLES = 3;
|
|
86
|
-
const tracked = latencyTracker.getTokensPerSecond(model.getResolvedModel(), MIN_SAMPLES);
|
|
87
|
-
return tracked;
|
|
88
|
-
}
|
|
89
|
-
toJSON() {
|
|
90
|
-
return {
|
|
91
|
-
type: "fastest",
|
|
92
|
-
params: {
|
|
93
|
-
models: this.models.map((s) => (s instanceof Model ? s.toJSON() : s)),
|
|
94
|
-
},
|
|
95
|
-
};
|
|
96
|
-
}
|
|
97
|
-
static fromJSON(json) {
|
|
98
|
-
const result = FastestStrategyJSONSchema.safeParse(json);
|
|
99
|
-
if (!result.success) {
|
|
100
|
-
console.error("Failed to parse FastestStrategy");
|
|
101
|
-
console.error(JSON.stringify(json, null, 2));
|
|
102
|
-
console.error(z.prettifyError(result.error));
|
|
103
|
-
throw new Error("Failed to parse FastestStrategy");
|
|
104
|
-
}
|
|
105
|
-
const models = result.data.params.models;
|
|
106
|
-
return new FastestStrategy(models);
|
|
107
|
-
}
|
|
108
|
-
}
|