smoltalk 0.0.66 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +51 -154
  2. package/dist/classes/message/ToolMessage.js +1 -1
  3. package/dist/client.d.ts +3 -3
  4. package/dist/client.js +9 -5
  5. package/dist/clients/anthropic.d.ts +4 -4
  6. package/dist/clients/anthropic.js +1 -1
  7. package/dist/clients/baseClient.d.ts +17 -20
  8. package/dist/clients/baseClient.js +21 -43
  9. package/dist/clients/google.d.ts +4 -4
  10. package/dist/clients/google.js +1 -1
  11. package/dist/clients/ollama.d.ts +4 -4
  12. package/dist/clients/ollama.js +1 -1
  13. package/dist/clients/openai.d.ts +4 -4
  14. package/dist/clients/openai.js +2 -1
  15. package/dist/clients/openaiResponses.d.ts +4 -4
  16. package/dist/clients/openaiResponses.js +2 -1
  17. package/dist/functions.d.ts +13 -10
  18. package/dist/functions.js +4 -55
  19. package/dist/index.d.ts +2 -4
  20. package/dist/index.js +1 -2
  21. package/dist/model.d.ts +2 -5
  22. package/dist/model.js +11 -27
  23. package/dist/models.d.ts +2 -2
  24. package/dist/models.js +3 -1
  25. package/dist/testing/index.d.ts +9 -0
  26. package/dist/testing/index.js +41 -0
  27. package/dist/types.d.ts +52 -160
  28. package/dist/types.js +1 -1
  29. package/dist/util/logger.d.ts +17 -1
  30. package/dist/util/logger.js +68 -5
  31. package/package.json +15 -19
  32. package/dist/clients/llamaCpp.d.ts +0 -28
  33. package/dist/clients/llamaCpp.js +0 -316
  34. package/dist/latencyTracker.d.ts +0 -32
  35. package/dist/latencyTracker.js +0 -73
  36. package/dist/middleware.d.ts +0 -54
  37. package/dist/middleware.js +0 -321
  38. package/dist/strategies/baseStrategy.d.ts +0 -22
  39. package/dist/strategies/baseStrategy.js +0 -62
  40. package/dist/strategies/fallbackStrategy.d.ts +0 -14
  41. package/dist/strategies/fallbackStrategy.js +0 -122
  42. package/dist/strategies/fastestStrategy.d.ts +0 -19
  43. package/dist/strategies/fastestStrategy.js +0 -108
  44. package/dist/strategies/idStrategy.d.ts +0 -16
  45. package/dist/strategies/idStrategy.js +0 -62
  46. package/dist/strategies/index.d.ts +0 -17
  47. package/dist/strategies/index.js +0 -68
  48. package/dist/strategies/raceStrategy.d.ts +0 -12
  49. package/dist/strategies/raceStrategy.js +0 -72
  50. package/dist/strategies/randomStrategy.d.ts +0 -13
  51. package/dist/strategies/randomStrategy.js +0 -54
  52. package/dist/strategies/timeoutStrategy.d.ts +0 -13
  53. package/dist/strategies/timeoutStrategy.js +0 -65
  54. package/dist/strategies/types.d.ts +0 -78
  55. package/dist/strategies/types.js +0 -58
package/dist/types.js CHANGED
@@ -4,7 +4,7 @@ export * from "./types/costEstimate.js";
4
4
  export * from "./types/tokenUsage.js";
5
5
  export function promptResult({ output, toolCalls, thinkingBlocks, usage, cost, model, }) {
6
6
  return {
7
- output: output || null,
7
+ output: (output ?? null),
8
8
  toolCalls: toolCalls || [],
9
9
  thinkingBlocks: thinkingBlocks,
10
10
  usage,
@@ -1,2 +1,18 @@
1
- import { EgonLog, LogLevel } from "egonlog";
1
+ export type LogLevel = "error" | "warn" | "info" | "debug";
2
+ export type EgonLogConfig = {
3
+ logLevel: LogLevel;
4
+ };
5
+ export declare class EgonLog {
6
+ private logLevel;
7
+ constructor(config: EgonLogConfig);
8
+ private shouldLog;
9
+ private log;
10
+ error(...args: unknown[]): void;
11
+ warn(...args: unknown[]): void;
12
+ info(...args: unknown[]): void;
13
+ debug(...args: unknown[]): void;
14
+ table(...args: unknown[]): void;
15
+ setLogLevel(level: LogLevel): void;
16
+ getLogLevel(): LogLevel;
17
+ }
2
18
  export declare function getLogger(level?: LogLevel): EgonLog;
@@ -1,9 +1,72 @@
1
- import { EgonLog } from "egonlog";
1
+ const LOG_LEVELS = {
2
+ error: 0,
3
+ warn: 1,
4
+ info: 2,
5
+ debug: 3,
6
+ };
7
+ const RED = "\x1b[31m";
8
+ const YELLOW = "\x1b[33m";
9
+ const GREEN = "\x1b[32m";
10
+ const RESET = "\x1b[0m";
11
+ export class EgonLog {
12
+ logLevel;
13
+ constructor(config) {
14
+ this.logLevel = config.logLevel;
15
+ }
16
+ shouldLog(messageLevel) {
17
+ return LOG_LEVELS[messageLevel] <= LOG_LEVELS[this.logLevel];
18
+ }
19
+ log(level, ...args) {
20
+ if (!this.shouldLog(level))
21
+ return;
22
+ const timestamp = new Date().toISOString();
23
+ const prefix = `[${timestamp}] [${level.toUpperCase()}]`;
24
+ switch (level) {
25
+ case "error":
26
+ console.error(RED + prefix, ...args, RESET);
27
+ break;
28
+ case "warn":
29
+ console.warn(YELLOW + prefix, ...args, RESET);
30
+ break;
31
+ case "info":
32
+ console.info(GREEN + prefix, ...args, RESET);
33
+ break;
34
+ case "debug":
35
+ console.debug(prefix, ...args);
36
+ break;
37
+ }
38
+ }
39
+ error(...args) {
40
+ this.log("error", ...args);
41
+ }
42
+ warn(...args) {
43
+ this.log("warn", ...args);
44
+ }
45
+ info(...args) {
46
+ this.log("info", ...args);
47
+ }
48
+ debug(...args) {
49
+ this.log("debug", ...args);
50
+ }
51
+ table(...args) {
52
+ if (!this.shouldLog("debug"))
53
+ return;
54
+ console.table(...args);
55
+ }
56
+ setLogLevel(level) {
57
+ this.logLevel = level;
58
+ }
59
+ getLogLevel() {
60
+ return this.logLevel;
61
+ }
62
+ }
2
63
  let loggerInstance = null;
3
- export function getLogger(level = "error") {
4
- if (loggerInstance) {
5
- return loggerInstance;
64
+ export function getLogger(level) {
65
+ if (!loggerInstance) {
66
+ loggerInstance = new EgonLog({ logLevel: level ?? "error" });
67
+ }
68
+ else if (level !== undefined) {
69
+ loggerInstance.setLogLevel(level);
6
70
  }
7
- loggerInstance = new EgonLog({ level });
8
71
  return loggerInstance;
9
72
  }
package/package.json CHANGED
@@ -1,17 +1,8 @@
1
1
  {
2
2
  "name": "smoltalk",
3
- "version": "0.0.66",
3
+ "version": "0.2.1",
4
4
  "description": "A common interface for LLM APIs",
5
5
  "homepage": "https://github.com/egonSchiele/smoltalk",
6
- "scripts": {
7
- "test": "vitest",
8
- "test:tsc": "tsc -p tests/tsconfig.json",
9
- "build": "rm -rf dist && tsc",
10
- "start": "cd dist && node index.js",
11
- "doc": "typedoc --disableSources --out docs lib && prettier docs/ --write",
12
- "typecheck": "tsc --noEmit",
13
- "pull": "node-llama-cpp pull --dir ./models"
14
- },
15
6
  "files": [
16
7
  "./dist"
17
8
  ],
@@ -20,6 +11,11 @@
20
11
  "types": "./dist/index.d.ts",
21
12
  "import": "./dist/index.js",
22
13
  "require": "./dist/index.js"
14
+ },
15
+ "./testing": {
16
+ "types": "./dist/testing/index.d.ts",
17
+ "import": "./dist/testing/index.js",
18
+ "require": "./dist/testing/index.js"
23
19
  }
24
20
  },
25
21
  "type": "module",
@@ -31,21 +27,21 @@
31
27
  ],
32
28
  "author": "Aditya Bhargava",
33
29
  "license": "ISC",
34
- "devDependencies": {
35
- "@types/node": "^25.0.3",
36
- "prettier": "^3.7.4",
37
- "typedoc": "^0.28.15",
38
- "typescript": "^5.9.3",
39
- "vitest": "^4.0.16"
40
- },
41
30
  "dependencies": {
42
31
  "@anthropic-ai/sdk": "^0.78.0",
43
32
  "@google/genai": "^1.34.0",
44
- "egonlog": "^0.0.2",
45
33
  "nanoid": "^5.1.6",
46
- "node-llama-cpp": "^3.17.1",
47
34
  "ollama": "^0.6.3",
48
35
  "openai": "^6.15.0",
49
36
  "zod": "^4.3.6"
37
+ },
38
+ "scripts": {
39
+ "test": "vitest --exclude=**/*.live.test.ts",
40
+ "test:live": "vitest run lib/clients/*.live.test.ts",
41
+ "test:tsc": "tsc -p tests/tsconfig.json",
42
+ "build": "rm -rf dist && tsc",
43
+ "start": "cd dist && node index.js",
44
+ "doc": "typedoc --disableSources --out docs lib && prettier docs/ --write",
45
+ "typecheck": "tsc --noEmit"
50
46
  }
51
47
  }
@@ -1,28 +0,0 @@
1
- import { BaseClient } from "./baseClient.js";
2
- import { BaseClientConfig, PromptConfig, PromptResult, Result, StreamChunk } from "../types.js";
3
- export declare class LlamaCPP extends BaseClient {
4
- private llama;
5
- private llamaModel;
6
- private modelDir;
7
- private model;
8
- private logger;
9
- constructor(config: BaseClientConfig);
10
- setup(): Promise<void>;
11
- private getModelName;
12
- /**
13
- * Converts smoltalk messages to node-llama-cpp's ChatHistoryItem format.
14
- * Builds the full history including the last user message (LlamaChat.generateResponse
15
- * expects the complete history, unlike LlamaChatSession which takes the last message separately).
16
- */
17
- private convertMessages;
18
- /**
19
- * Builds node-llama-cpp function definitions from smoltalk tool configs.
20
- * Uses ChatModelFunctions (no handler) — LlamaChat.generateResponse() returns
21
- * function calls without executing them, which matches smoltalk's tool loop model.
22
- */
23
- private buildFunctions;
24
- private calculateUsageAndCost;
25
- private extractToolCalls;
26
- _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
27
- _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
28
- }
@@ -1,316 +0,0 @@
1
- import { getLlama, LlamaChat, LlamaLogLevel } from "node-llama-cpp";
2
- import { BaseClient } from "./baseClient.js";
3
- import { ToolCall } from "../classes/ToolCall.js";
4
- import { getLogger } from "../util/logger.js";
5
- import { Model } from "../model.js";
6
- import { sanitizeAttributes } from "../util/util.js";
7
- import { success, } from "../types.js";
8
- import path from "path";
9
- export class LlamaCPP extends BaseClient {
10
- llama = null;
11
- llamaModel = null;
12
- modelDir;
13
- model;
14
- logger;
15
- constructor(config) {
16
- super(config);
17
- if (!config.llamaCppModelDir) {
18
- throw new Error("llamaCppModelDir is required in the config when using the LlamaCPP client.");
19
- }
20
- this.model = new Model(config.model);
21
- this.modelDir = config.llamaCppModelDir;
22
- this.logger = getLogger();
23
- }
24
- async setup() {
25
- this.llama = await getLlama({ logLevel: LlamaLogLevel.error });
26
- this.llamaModel = await this.llama.loadModel({
27
- modelPath: path.join(this.modelDir, this.config.model),
28
- });
29
- }
30
- getModelName() {
31
- return this.model.getResolvedModel();
32
- }
33
- /**
34
- * Converts smoltalk messages to node-llama-cpp's ChatHistoryItem format.
35
- * Builds the full history including the last user message (LlamaChat.generateResponse
36
- * expects the complete history, unlike LlamaChatSession which takes the last message separately).
37
- */
38
- convertMessages(messages) {
39
- let systemPrompt;
40
- const chatHistory = [];
41
- for (let i = 0; i < messages.length; i++) {
42
- const msg = messages[i];
43
- if (msg.role === "system" || msg.role === "developer") {
44
- if (!systemPrompt) {
45
- systemPrompt = msg.content;
46
- }
47
- else {
48
- systemPrompt += "\n" + msg.content;
49
- }
50
- }
51
- else if (msg.role === "user") {
52
- chatHistory.push({ type: "user", text: msg.content });
53
- }
54
- else if (msg.role === "assistant") {
55
- const assistantMsg = msg;
56
- const response = [];
57
- if (assistantMsg.content) {
58
- response.push(assistantMsg.content);
59
- }
60
- // Handle tool calls: pair them with their results from subsequent tool messages
61
- if (assistantMsg.toolCalls?.length) {
62
- for (const tc of assistantMsg.toolCalls) {
63
- // Find the corresponding tool result message
64
- const toolResultMsg = messages
65
- .slice(i + 1)
66
- .find((m) => m.role === "tool" &&
67
- m.tool_call_id === tc.id);
68
- response.push({
69
- type: "functionCall",
70
- name: tc.name,
71
- params: tc.arguments,
72
- result: toolResultMsg ? toolResultMsg.content : undefined,
73
- });
74
- }
75
- }
76
- chatHistory.push({ type: "model", response });
77
- }
78
- // Tool messages are handled as part of assistant messages above
79
- }
80
- // Prepend system message if present
81
- if (systemPrompt) {
82
- chatHistory.unshift({ type: "system", text: systemPrompt });
83
- }
84
- return { systemPrompt, chatHistory };
85
- }
86
- /**
87
- * Builds node-llama-cpp function definitions from smoltalk tool configs.
88
- * Uses ChatModelFunctions (no handler) — LlamaChat.generateResponse() returns
89
- * function calls without executing them, which matches smoltalk's tool loop model.
90
- */
91
- buildFunctions(tools) {
92
- if (!tools)
93
- return undefined;
94
- const functions = {};
95
- for (const tool of tools) {
96
- const jsonSchema = tool.schema.toJSONSchema();
97
- functions[tool.name] = {
98
- description: tool.description,
99
- params: jsonSchema,
100
- };
101
- }
102
- return functions;
103
- }
104
- calculateUsageAndCost(meterBefore, meterAfter) {
105
- const inputTokens = meterAfter.usedInputTokens - meterBefore.usedInputTokens;
106
- const outputTokens = meterAfter.usedOutputTokens - meterBefore.usedOutputTokens;
107
- const usage = {
108
- inputTokens,
109
- outputTokens,
110
- totalTokens: inputTokens + outputTokens,
111
- };
112
- const cost = this.model.calculateCost(usage) ?? undefined;
113
- return { usage, cost };
114
- }
115
- extractToolCalls(functionCalls) {
116
- if (!functionCalls?.length)
117
- return [];
118
- return functionCalls.map((fc) => new ToolCall(fc.functionName, fc.functionName, (fc.params ?? {})));
119
- }
120
- async _textSync(config) {
121
- if (!this.llama || !this.llamaModel) {
122
- await this.setup();
123
- }
124
- const setupLlama = this.llama;
125
- const setupModel = this.llamaModel;
126
- const { chatHistory } = this.convertMessages(config.messages);
127
- if (chatHistory.length === 0) {
128
- return success({
129
- output: "",
130
- toolCalls: [],
131
- model: this.getModelName(),
132
- });
133
- }
134
- // Create grammar for response format
135
- let grammar;
136
- if (config.responseFormat) {
137
- grammar = await setupLlama.createGrammarForJsonSchema(config.responseFormat.toJSONSchema());
138
- }
139
- // Create context and LlamaChat
140
- const context = await setupModel.createContext();
141
- const sequence = context.getSequence();
142
- const chat = new LlamaChat({
143
- contextSequence: sequence,
144
- });
145
- // Build tools if provided
146
- const functions = this.buildFunctions(config.tools);
147
- // Track token usage
148
- const meterBefore = sequence.tokenMeter.getState();
149
- // Build options
150
- const options = {};
151
- if (config.maxTokens !== undefined) {
152
- options.maxTokens = config.maxTokens;
153
- }
154
- if (config.temperature !== undefined) {
155
- options.temperature = config.temperature;
156
- }
157
- if (config.abortSignal) {
158
- options.signal = config.abortSignal;
159
- options.stopOnAbortSignal = true;
160
- }
161
- if (grammar && !functions) {
162
- options.grammar = grammar;
163
- }
164
- if (functions) {
165
- options.functions = functions;
166
- }
167
- // Apply raw attributes
168
- Object.assign(options, sanitizeAttributes(config.rawAttributes));
169
- this.logger.debug("Sending request to llama.cpp");
170
- this.statelogClient?.promptRequest({
171
- model: this.getModelName(),
172
- messageCount: config.messages.length,
173
- });
174
- let result;
175
- let meterAfter;
176
- try {
177
- result = await chat.generateResponse(chatHistory, options);
178
- meterAfter = sequence.tokenMeter.getState();
179
- }
180
- finally {
181
- chat.dispose();
182
- await context.dispose();
183
- }
184
- // Extract text output
185
- const output = result.response || null;
186
- // Extract tool calls — generateResponse returns them without executing handlers
187
- const toolCalls = this.extractToolCalls(result.functionCalls);
188
- // Calculate usage and cost
189
- const { usage, cost } = this.calculateUsageAndCost(meterBefore, meterAfter);
190
- this.logger.debug("Response from llama.cpp:", output);
191
- this.statelogClient?.promptResponse({ output, usage, cost });
192
- return success({
193
- output,
194
- toolCalls,
195
- usage,
196
- cost,
197
- model: this.getModelName(),
198
- });
199
- }
200
- async *_textStream(config) {
201
- if (!this.llama || !this.llamaModel) {
202
- await this.setup();
203
- }
204
- const setupLlama = this.llama;
205
- const setupModel = this.llamaModel;
206
- const { chatHistory } = this.convertMessages(config.messages);
207
- if (chatHistory.length === 0) {
208
- yield {
209
- type: "done",
210
- result: { output: null, toolCalls: [], model: this.getModelName() },
211
- };
212
- return;
213
- }
214
- // Create grammar for response format
215
- let grammar;
216
- if (config.responseFormat) {
217
- grammar = await setupLlama.createGrammarForJsonSchema(config.responseFormat.toJSONSchema());
218
- }
219
- // Create context and LlamaChat
220
- const context = await setupModel.createContext();
221
- const sequence = context.getSequence();
222
- const chat = new LlamaChat({
223
- contextSequence: sequence,
224
- });
225
- const functions = this.buildFunctions(config.tools);
226
- const meterBefore = sequence.tokenMeter.getState();
227
- // Bridge callback-based streaming to async generator using a queue
228
- const chunks = [];
229
- let resolveWaiter = null;
230
- let done = false;
231
- const pushChunk = (chunk) => {
232
- chunks.push(chunk);
233
- if (resolveWaiter) {
234
- resolveWaiter();
235
- resolveWaiter = null;
236
- }
237
- };
238
- // Build options
239
- const options = {
240
- onTextChunk: (text) => {
241
- pushChunk({ type: "text", text });
242
- },
243
- };
244
- if (config.maxTokens !== undefined) {
245
- options.maxTokens = config.maxTokens;
246
- }
247
- if (config.temperature !== undefined) {
248
- options.temperature = config.temperature;
249
- }
250
- if (config.abortSignal) {
251
- options.signal = config.abortSignal;
252
- options.stopOnAbortSignal = true;
253
- }
254
- if (grammar && !functions) {
255
- options.grammar = grammar;
256
- }
257
- if (functions) {
258
- options.functions = functions;
259
- }
260
- Object.assign(options, sanitizeAttributes(config.rawAttributes));
261
- this.logger.debug("Sending streaming request to llama.cpp");
262
- this.statelogClient?.promptRequest({
263
- model: this.getModelName(),
264
- messageCount: config.messages.length,
265
- });
266
- // Run generateResponse in background, push chunks as they arrive
267
- const promptPromise = chat
268
- .generateResponse(chatHistory, options)
269
- .then((result) => {
270
- const meterAfter = sequence.tokenMeter.getState();
271
- const toolCalls = this.extractToolCalls(result.functionCalls);
272
- for (const tc of toolCalls) {
273
- pushChunk({ type: "tool_call", toolCall: tc });
274
- }
275
- const { usage, cost } = this.calculateUsageAndCost(meterBefore, meterAfter);
276
- const output = result.response || null;
277
- this.logger.debug("Streaming response completed from llama.cpp");
278
- this.statelogClient?.promptResponse({ output, usage, cost });
279
- pushChunk({
280
- type: "done",
281
- result: {
282
- output,
283
- toolCalls,
284
- usage,
285
- cost,
286
- model: this.getModelName(),
287
- },
288
- });
289
- })
290
- .catch((error) => {
291
- pushChunk({ type: "error", error: error.message });
292
- })
293
- .finally(() => {
294
- done = true;
295
- chat.dispose();
296
- context.dispose();
297
- // Wake up the generator if it's waiting
298
- if (resolveWaiter) {
299
- resolveWaiter();
300
- resolveWaiter = null;
301
- }
302
- });
303
- // Yield chunks as they arrive
304
- while (!done || chunks.length > 0) {
305
- if (chunks.length > 0) {
306
- yield chunks.shift();
307
- }
308
- else if (!done) {
309
- await new Promise((resolve) => {
310
- resolveWaiter = resolve;
311
- });
312
- }
313
- }
314
- await promptPromise;
315
- }
316
- }
@@ -1,32 +0,0 @@
1
- export type LatencySample = {
2
- /** Milliseconds per output token */
3
- msPerToken: number;
4
- /** Timestamp when sample was recorded */
5
- timestamp: number;
6
- };
7
- declare class LatencyTracker {
8
- private samples;
9
- private windowSize;
10
- constructor(windowSize?: number);
11
- /** Record a latency sample for a model. */
12
- record(model: string, elapsedMs: number, outputTokens: number): void;
13
- /** Get the windowed mean ms-per-token for a model, or null if no samples. */
14
- getMeanMsPerToken(model: string): number | null;
15
- /**
16
- * Get estimated output tokens per second for a model based on tracked latency.
17
- * Returns null if no samples exist or if the number of samples is below the minimum required.
18
- */
19
- getTokensPerSecond(model: string, minSamples?: number): number | null;
20
- /** Get the number of samples recorded for a model. */
21
- getSampleCount(model: string): number;
22
- /** Get all samples for a model (defensive copy). */
23
- getSamples(model: string): LatencySample[];
24
- /** Clear all samples for a model. */
25
- clear(model?: string): void;
26
- /** Update the window size. Existing samples beyond the new size are trimmed. */
27
- setWindowSize(size: number): void;
28
- getWindowSize(): number;
29
- }
30
- /** Global singleton latency tracker. */
31
- export declare const latencyTracker: LatencyTracker;
32
- export {};
@@ -1,73 +0,0 @@
1
- const DEFAULT_WINDOW_SIZE = 10;
2
- class LatencyTracker {
3
- samples = new Map();
4
- windowSize;
5
- constructor(windowSize = DEFAULT_WINDOW_SIZE) {
6
- this.windowSize = windowSize;
7
- }
8
- /** Record a latency sample for a model. */
9
- record(model, elapsedMs, outputTokens) {
10
- if (outputTokens <= 0 || elapsedMs <= 0)
11
- return;
12
- const msPerToken = elapsedMs / outputTokens;
13
- const samples = this.samples.get(model) ?? [];
14
- samples.push({ msPerToken, timestamp: Date.now() });
15
- // Keep only the last windowSize samples
16
- if (samples.length > this.windowSize) {
17
- samples.splice(0, samples.length - this.windowSize);
18
- }
19
- this.samples.set(model, samples);
20
- }
21
- /** Get the windowed mean ms-per-token for a model, or null if no samples. */
22
- getMeanMsPerToken(model) {
23
- const samples = this.samples.get(model);
24
- if (!samples || samples.length === 0)
25
- return null;
26
- const sum = samples.reduce((acc, s) => acc + s.msPerToken, 0);
27
- return sum / samples.length;
28
- }
29
- /**
30
- * Get estimated output tokens per second for a model based on tracked latency.
31
- * Returns null if no samples exist or if the number of samples is below the minimum required.
32
- */
33
- getTokensPerSecond(model, minSamples = 1) {
34
- const sampleCount = this.getSampleCount(model);
35
- if (sampleCount < minSamples)
36
- return null;
37
- const msPerToken = this.getMeanMsPerToken(model);
38
- if (msPerToken === null || msPerToken === 0)
39
- return null;
40
- return 1000 / msPerToken;
41
- }
42
- /** Get the number of samples recorded for a model. */
43
- getSampleCount(model) {
44
- return this.samples.get(model)?.length ?? 0;
45
- }
46
- /** Get all samples for a model (defensive copy). */
47
- getSamples(model) {
48
- return [...(this.samples.get(model) ?? [])];
49
- }
50
- /** Clear all samples for a model. */
51
- clear(model) {
52
- if (model) {
53
- this.samples.delete(model);
54
- }
55
- else {
56
- this.samples.clear();
57
- }
58
- }
59
- /** Update the window size. Existing samples beyond the new size are trimmed. */
60
- setWindowSize(size) {
61
- this.windowSize = size;
62
- for (const [model, samples] of this.samples) {
63
- if (samples.length > size) {
64
- samples.splice(0, samples.length - size);
65
- }
66
- }
67
- }
68
- getWindowSize() {
69
- return this.windowSize;
70
- }
71
- }
72
- /** Global singleton latency tracker. */
73
- export const latencyTracker = new LatencyTracker();
@@ -1,54 +0,0 @@
1
- import { ZodType } from "zod";
2
- import { Message } from "./classes/message/index.js";
3
- import { PromptConfig, PromptResult, SmolPromptConfig, StreamChunk } from "./types.js";
4
- import { Result } from "./types/result.js";
5
- import { TokenUsage } from "./types/tokenUsage.js";
6
- import { CostEstimate } from "./types/costEstimate.js";
7
- export type MiddlewareCheck = {
8
- /** Messages for the middleware LLM call (original prompt messages are appended automatically). */
9
- messages: Message[];
10
- /** Optional Zod schema for structured output from the middleware. */
11
- responseFormat?: ZodType;
12
- responseFormatOptions?: PromptConfig["responseFormatOptions"];
13
- /**
14
- * Given the middleware's result, decide whether to block.
15
- * Return a replacement output string to block, or null/undefined to pass.
16
- */
17
- decide: (result: PromptResult) => string | null;
18
- };
19
- export type MiddlewareConfig = {
20
- /** Run all checks before the main prompt, or in parallel with it. */
21
- timing: "before" | "parallel";
22
- /** Run checks in parallel or sequentially (short-circuit on first block). */
23
- mode: "parallel" | "sequential";
24
- /** The middleware checks to run. */
25
- checks: MiddlewareCheck[];
26
- };
27
- export type MiddlewareResult = {
28
- blocked: boolean;
29
- result: Result<PromptResult>;
30
- usage?: TokenUsage;
31
- cost?: CostEstimate;
32
- };
33
- /**
34
- * Run a single middleware check. Returns a MiddlewareResult indicating
35
- * whether the check blocked and what output to use.
36
- */
37
- export declare function runMiddlewareCheck(check: MiddlewareCheck, parentConfig: SmolPromptConfig, textSyncFn: (config: SmolPromptConfig) => Promise<Result<PromptResult>>): Promise<MiddlewareResult>;
38
- /**
39
- * Run multiple middleware checks in sequential or parallel mode.
40
- * Returns a combined MiddlewareResult.
41
- */
42
- export declare function runMiddlewareChecks(checks: MiddlewareCheck[], mode: "sequential" | "parallel", parentConfig: SmolPromptConfig, textSyncFn: (config: SmolPromptConfig) => Promise<Result<PromptResult>>): Promise<MiddlewareResult>;
43
- /**
44
- * High-level middleware orchestration for sync calls.
45
- * Returns the blocked result if middleware blocks, the main prompt result for parallel timing,
46
- * or null to indicate "proceed normally" (no middleware or middleware passed with "before" timing).
47
- */
48
- export declare function executeMiddlewareSync(config: SmolPromptConfig, runMainPrompt: (config: SmolPromptConfig) => Promise<Result<PromptResult>>, textSyncFn: (config: SmolPromptConfig) => Promise<Result<PromptResult>>): Promise<Result<PromptResult> | null>;
49
- /**
50
- * High-level middleware orchestration for streaming calls.
51
- * Yields stream chunks, handling middleware checks according to timing config.
52
- * Only call this when middleware is configured — the caller should check first.
53
- */
54
- export declare function executeMiddlewareStream(config: SmolPromptConfig, getStream: (config: SmolPromptConfig) => AsyncGenerator<StreamChunk>, textSyncFn: (config: SmolPromptConfig) => Promise<Result<PromptResult>>): AsyncGenerator<StreamChunk>;