smoltalk 0.0.54 → 0.0.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Smoltalk
2
2
 
3
- Smoltalk exposes a common API to different LLM providers. There are other packages that do this, but Smoltalk allows you to build strategies on top of it. Here is a simple example. Hello world, this is functionality that other packages allow.
3
+ Smoltalk exposes a common API to different LLM providers. There are other packages that do this, but Smoltalk allows you to build strategies on top of it. Here is a simple example.
4
4
 
5
5
  ## Install
6
6
 
@@ -11,23 +11,80 @@ pnpm install smoltalk
11
11
  ## Hello world example
12
12
 
13
13
  ```typescript
14
- import { getClient } from "smoltalk";
15
-
16
- const client = getClient({
17
- openAiApiKey: process.env.OPENAI_API_KEY || "",
18
- googleApiKey: process.env.GEMINI_API_KEY || "",
19
- logLevel: "debug",
20
- model: "gemini-2.0-flash-lite",
21
- });
14
+ import { text, userMessage } from "smoltalk";
22
15
 
23
16
  async function main() {
24
- const resp = await client.prompt("Hello, how are you?");
25
- console.log(resp);
17
+ const messages = [userMessage("Write me a 10 word story.")];
18
+ const response = await text({
19
+ messages,
20
+ model: "gpt-5.4",
21
+ });
22
+ console.log(response);
26
23
  }
27
24
 
28
25
  main();
29
26
  ```
30
27
 
28
+ This is functionality that other packages allow.
29
+ <details>
30
+ <summary>Response</summary>
31
+
32
+ ```
33
+ {
34
+ success: true,
35
+ value: {
36
+ output: 'Clock stopped; everyone smiled as tomorrow finally arrived before yesterday.',
37
+ toolCalls: [],
38
+ usage: {
39
+ inputTokens: 14,
40
+ outputTokens: 15,
41
+ cachedInputTokens: 0,
42
+ totalTokens: 29
43
+ },
44
+ cost: {
45
+ inputCost: 0.000035,
46
+ outputCost: 0.000225,
47
+ cachedInputCost: undefined,
48
+ totalCost: 0.00026,
49
+ currency: 'USD'
50
+ },
51
+ model: 'gpt-5.4'
52
+ }
53
+ }
54
+ ```
55
+ </details>
56
+
57
+ What if you wanted to have fallbacks in case the OpenAI API was down? Just change the `model` field:
58
+
59
+ ```ts
60
+ const response = await text({
61
+ messages,
62
+ model: fallback("gpt-5.4", "gemini-2.5-flash-lite"),
63
+ // or multiple fallbacks:
64
+ // model: fallback("gpt-5.4", ["gemini-2.5-flash-lite", "gemini-3-flash-preview"]),
65
+ });
66
+ ```
67
+
68
+ Or what if you wanted to try a couple of models and take the first response?
69
+
70
+ ```ts
71
+ const response = await text({
72
+ messages,
73
+ model: race("gpt-5.4", "gemini-2.5-flash-lite", "o4-mini"),
74
+ });
75
+ ```
76
+
77
+ Or combine them:
78
+
79
+ ```ts
80
+ const response = await text({
81
+ messages,
82
+ model: race(fallback("gpt-5.4", "gemini-2.5-flash-lite"), "o4-mini"),
83
+ });
84
+ ```
85
+
86
+ You get the idea.
87
+
31
88
  ## Longer tutorial
32
89
  To use Smoltak, you first create a client:
33
90
 
@@ -157,20 +214,15 @@ Detects when the model is stuck in a repetitive tool-call loop.
157
214
  | `intervention` | `string` | Action to take: `"remove-tool"`, `"remove-all-tools"`, `"throw-error"`, or `"halt-execution"`. |
158
215
  | `excludeTools` | `string[]` | Tool names to ignore when counting consecutive calls. |
159
216
 
160
- ## Prior art
217
+ ## Limitations
218
+ Smoltalk has support for a limited number of providers right now, and is mostly focused on the stateless APIs for text completion, though I plan to add support for more providers as well as image and speech models later. Smoltalk is also a personal project, and there are alternatives backed by companies:
219
+
161
220
  - Langchain
162
- OpenRouter
221
+ - OpenRouter
163
222
  - Vercel AI
164
223
 
165
- These are all good options, but they are quite heavy, and I wanted a lighter option. That said, you may be better off with one of the above alternatives:
166
- - They are backed by a business and are more likely to be responsive.
167
- - They support way more functionality and providers. Smoltalk currently supports just a subset of functionality for OpenAI and Google.
168
-
169
- ## Functionality
170
- Smoltalk pretty much lets you generate text using an OpenAI or Google model, with support for function calling and structured output, and that's it. I will add functionality and providers sporadically when I have time and need.
171
-
172
224
  ## Contributing
173
- This repo could use some help! Any of the following contributions would be helpful:
225
+ Contributions are welcome. Any of the following contributions would be helpful:
174
226
  - Adding support for API parameters or endpoints
175
227
  - Adding support for different providers
176
- - Updating the list of models
228
+ - Updating the list of models
@@ -12,6 +12,7 @@ export declare class SmolAnthropic extends BaseClient implements SmolClient {
12
12
  getModel(): ModelName;
13
13
  private calculateUsageAndCost;
14
14
  private buildRequest;
15
+ private rethrowAsSmolError;
15
16
  _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
16
17
  _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
17
18
  }
@@ -4,6 +4,7 @@ import { SystemMessage, DeveloperMessage } from "../classes/message/index.js";
4
4
  import { getLogger } from "../logger.js";
5
5
  import { success, } from "../types.js";
6
6
  import { zodToAnthropicTool } from "../util/tool.js";
7
+ import { SmolContentPolicyError, SmolContextWindowExceededError, } from "../smolError.js";
7
8
  import { BaseClient } from "./baseClient.js";
8
9
  import { Model } from "../model.js";
9
10
  const DEFAULT_MAX_TOKENS = 4096;
@@ -82,6 +83,24 @@ export class SmolAnthropic extends BaseClient {
82
83
  : undefined;
83
84
  return { system, messages: anthropicMessages, tools, thinking };
84
85
  }
86
+ rethrowAsSmolError(error) {
87
+ if (error instanceof Anthropic.APIError) {
88
+ const msg = error.message.toLowerCase();
89
+ if (msg.includes("prompt is too long") ||
90
+ msg.includes("context length") ||
91
+ msg.includes("context window") ||
92
+ msg.includes("too many tokens")) {
93
+ throw new SmolContextWindowExceededError(error.message);
94
+ }
95
+ if (msg.includes("content policy") ||
96
+ msg.includes("usage policies") ||
97
+ msg.includes("content filtering") ||
98
+ msg.includes("violates our")) {
99
+ throw new SmolContentPolicyError(error.message);
100
+ }
101
+ }
102
+ throw error;
103
+ }
85
104
  async _textSync(config) {
86
105
  const { system, messages, tools, thinking } = this.buildRequest(config);
87
106
  let debugData = {
@@ -95,19 +114,25 @@ export class SmolAnthropic extends BaseClient {
95
114
  this.logger.debug("Sending request to Anthropic:", debugData);
96
115
  this.statelogClient?.promptRequest(debugData);
97
116
  const signal = this.getAbortSignal(config);
98
- const response = await this.client.messages.create({
99
- model: this.getModel(),
100
- max_tokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
101
- messages,
102
- ...(system && { system }),
103
- ...(tools && { tools }),
104
- ...(thinking && { thinking }),
105
- ...(config.temperature !== undefined && {
106
- temperature: config.temperature,
107
- }),
108
- ...(config.rawAttributes || {}),
109
- stream: false,
110
- }, { ...(signal && { signal }) });
117
+ let response;
118
+ try {
119
+ response = await this.client.messages.create({
120
+ model: this.getModel(),
121
+ max_tokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
122
+ messages,
123
+ ...(system && { system }),
124
+ ...(tools && { tools }),
125
+ ...(thinking && { thinking }),
126
+ ...(config.temperature !== undefined && {
127
+ temperature: config.temperature,
128
+ }),
129
+ ...(config.rawAttributes || {}),
130
+ stream: false,
131
+ }, { ...(signal && { signal }) });
132
+ }
133
+ catch (error) {
134
+ this.rethrowAsSmolError(error);
135
+ }
111
136
  this.logger.debug("Response from Anthropic:", response);
112
137
  this.statelogClient?.promptResponse(response);
113
138
  let output = null;
@@ -148,19 +173,25 @@ export class SmolAnthropic extends BaseClient {
148
173
  this.logger.debug("Sending streaming request to Anthropic:", streamDebugData);
149
174
  this.statelogClient?.promptRequest(streamDebugData);
150
175
  const signal = this.getAbortSignal(config);
151
- const stream = await this.client.messages.create({
152
- model: this.model,
153
- max_tokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
154
- messages,
155
- ...(system && { system }),
156
- ...(tools && { tools }),
157
- ...(thinking && { thinking }),
158
- ...(config.temperature !== undefined && {
159
- temperature: config.temperature,
160
- }),
161
- ...(config.rawAttributes || {}),
162
- stream: true,
163
- }, { ...(signal && { signal }) });
176
+ let stream;
177
+ try {
178
+ stream = await this.client.messages.create({
179
+ model: this.model,
180
+ max_tokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
181
+ messages,
182
+ ...(system && { system }),
183
+ ...(tools && { tools }),
184
+ ...(thinking && { thinking }),
185
+ ...(config.temperature !== undefined && {
186
+ temperature: config.temperature,
187
+ }),
188
+ ...(config.rawAttributes || {}),
189
+ stream: true,
190
+ }, { ...(signal && { signal }) });
191
+ }
192
+ catch (error) {
193
+ this.rethrowAsSmolError(error);
194
+ }
164
195
  let content = "";
165
196
  // Track tool blocks by index: index -> { id, name, arguments (partial JSON) }
166
197
  const toolBlocks = new Map();
@@ -24,6 +24,7 @@ export declare class BaseClient implements SmolClient {
24
24
  continue: boolean;
25
25
  newPromptConfig: PromptConfig;
26
26
  };
27
+ private recordLatency;
27
28
  extractResponse(promptConfig: PromptConfig, rawValue: any, schema: any, depth?: number): any;
28
29
  textWithRetry(promptConfig: PromptConfig, retries: number): Promise<Result<PromptResult>>;
29
30
  _textSync(promptConfig: PromptConfig): Promise<Result<PromptResult>>;
@@ -1,4 +1,5 @@
1
1
  import { AssistantMessage, userMessage, assistantMessage, } from "../classes/message/index.js";
2
+ import { latencyTracker } from "../latencyTracker.js";
2
3
  import { getLogger } from "../logger.js";
3
4
  import { getModel, isTextModel } from "../models.js";
4
5
  import { SmolStructuredOutputError } from "../smolError.js";
@@ -146,9 +147,11 @@ export class BaseClient {
146
147
  value: { output: null, toolCalls: [], model: this.config.model },
147
148
  };
148
149
  }
150
+ const startTime = performance.now();
149
151
  try {
150
152
  const result = await this.textWithRetry(newPromptConfig, newPromptConfig.responseFormatOptions?.numRetries ||
151
153
  DEFAULT_NUM_RETRIES);
154
+ this.recordLatency(startTime, result);
152
155
  return result;
153
156
  }
154
157
  catch (err) {
@@ -210,6 +213,15 @@ export class BaseClient {
210
213
  }
211
214
  return { continue: true, newPromptConfig: promptConfig };
212
215
  }
216
+ recordLatency(startTime, result) {
217
+ if (!result.success)
218
+ return;
219
+ const outputTokens = result.value.usage?.outputTokens;
220
+ if (!outputTokens || outputTokens <= 0)
221
+ return;
222
+ const elapsedMs = performance.now() - startTime;
223
+ latencyTracker.record(this.config.model, elapsedMs, outputTokens);
224
+ }
213
225
  extractResponse(promptConfig, rawValue, schema, depth = 0) {
214
226
  const MAX_DEPTH = 5;
215
227
  if (depth > MAX_DEPTH) {
@@ -374,8 +386,18 @@ export class BaseClient {
374
386
  };
375
387
  return;
376
388
  }
389
+ const startTime = performance.now();
377
390
  try {
378
- yield* this._textStream(newPromptConfig);
391
+ for await (const chunk of this._textStream(newPromptConfig)) {
392
+ if (chunk.type === "done") {
393
+ const outputTokens = chunk.result.usage?.outputTokens;
394
+ if (outputTokens && outputTokens > 0) {
395
+ const elapsedMs = performance.now() - startTime;
396
+ latencyTracker.record(this.config.model, elapsedMs, outputTokens);
397
+ }
398
+ }
399
+ yield chunk;
400
+ }
379
401
  }
380
402
  catch (err) {
381
403
  if (this.isAbortError(err)) {
@@ -3,6 +3,7 @@ import { ToolCall } from "../classes/ToolCall.js";
3
3
  import { getLogger } from "../logger.js";
4
4
  import { addCosts, addTokenUsage, success, } from "../types.js";
5
5
  import { zodToGoogleTool } from "../util/tool.js";
6
+ import { SmolContentPolicyError, SmolContextWindowExceededError, } from "../smolError.js";
6
7
  import { sanitizeAttributes } from "../util.js";
7
8
  import { BaseClient } from "./baseClient.js";
8
9
  import { Model } from "../model.js";
@@ -171,10 +172,28 @@ export class SmolGoogle extends BaseClient {
171
172
  async __textSync(request) {
172
173
  this.logger.debug("Sending request to Google Gemini:", JSON.stringify(request, null, 2));
173
174
  this.statelogClient?.promptRequest(request);
174
- // Send the prompt as the latest message
175
- const result = await this.client.models.generateContent(request);
175
+ let result;
176
+ try {
177
+ result = await this.client.models.generateContent(request);
178
+ }
179
+ catch (error) {
180
+ const msg = (error.message || "").toLowerCase();
181
+ if (msg.includes("token") &&
182
+ (msg.includes("exceed") ||
183
+ msg.includes("too long") ||
184
+ msg.includes("limit"))) {
185
+ throw new SmolContextWindowExceededError(error.message);
186
+ }
187
+ throw error;
188
+ }
176
189
  this.logger.debug("Response from Google Gemini:", JSON.stringify(result, null, 2));
177
190
  this.statelogClient?.promptResponse(result);
191
+ for (const candidate of result.candidates || []) {
192
+ const finishReason = candidate.finishReason;
193
+ if (finishReason === "SAFETY" || finishReason === "PROHIBITED_CONTENT") {
194
+ throw new SmolContentPolicyError(`Content blocked by Google safety filter: ${finishReason}`);
195
+ }
196
+ }
178
197
  const toolCalls = [];
179
198
  const thinkingBlocks = [];
180
199
  let textContent = "";
@@ -230,7 +249,20 @@ export class SmolGoogle extends BaseClient {
230
249
  }
231
250
  this.logger.debug("Sending streaming request to Google Gemini:", JSON.stringify(request, null, 2));
232
251
  this.statelogClient?.promptRequest(request);
233
- const stream = await this.client.models.generateContentStream(request);
252
+ let stream;
253
+ try {
254
+ stream = await this.client.models.generateContentStream(request);
255
+ }
256
+ catch (error) {
257
+ const msg = (error.message || "").toLowerCase();
258
+ if (msg.includes("token") &&
259
+ (msg.includes("exceed") ||
260
+ msg.includes("too long") ||
261
+ msg.includes("limit"))) {
262
+ throw new SmolContextWindowExceededError(error.message);
263
+ }
264
+ throw error;
265
+ }
234
266
  let content = "";
235
267
  const toolCallsMap = new Map();
236
268
  const thinkingBlocks = [];
@@ -5,6 +5,7 @@ import { success, } from "../types.js";
5
5
  import { zodToGoogleTool } from "../util/tool.js";
6
6
  import { sanitizeAttributes } from "../util.js";
7
7
  import { BaseClient } from "./baseClient.js";
8
+ import { SmolContextWindowExceededError } from "../smolError.js";
8
9
  import { Model } from "../model.js";
9
10
  export const DEFAULT_OLLAMA_HOST = "http://localhost:11434";
10
11
  export class SmolOllama extends BaseClient {
@@ -80,6 +81,13 @@ export class SmolOllama extends BaseClient {
80
81
  // @ts-ignore
81
82
  result = await this.client.chat(request);
82
83
  }
84
+ catch (error) {
85
+ const msg = (error.message || "").toLowerCase();
86
+ if (msg.includes("context length") || msg.includes("context window")) {
87
+ throw new SmolContextWindowExceededError(error.message);
88
+ }
89
+ throw error;
90
+ }
83
91
  finally {
84
92
  if (signal && abortHandler) {
85
93
  signal.removeEventListener("abort", abortHandler);
@@ -12,6 +12,7 @@ export declare class SmolOpenAi extends BaseClient implements SmolClient {
12
12
  getModel(): ModelName;
13
13
  private calculateUsageAndCost;
14
14
  private buildRequest;
15
+ private rethrowAsSmolError;
15
16
  _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
16
17
  _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
17
18
  }
@@ -4,6 +4,7 @@ import { ToolCall } from "../classes/ToolCall.js";
4
4
  import { isFunctionToolCall, sanitizeAttributes } from "../util.js";
5
5
  import { getLogger } from "../logger.js";
6
6
  import { BaseClient } from "./baseClient.js";
7
+ import { SmolContentPolicyError, SmolContextWindowExceededError, } from "../smolError.js";
7
8
  import { zodToOpenAITool } from "../util/tool.js";
8
9
  import { Model } from "../model.js";
9
10
  export class SmolOpenAi extends BaseClient {
@@ -68,17 +69,37 @@ export class SmolOpenAi extends BaseClient {
68
69
  }
69
70
  return request;
70
71
  }
72
+ rethrowAsSmolError(error) {
73
+ if (error instanceof OpenAI.APIError) {
74
+ if (error.code === "context_length_exceeded") {
75
+ throw new SmolContextWindowExceededError(error.message);
76
+ }
77
+ if (error.code === "content_policy_violation") {
78
+ throw new SmolContentPolicyError(error.message);
79
+ }
80
+ }
81
+ throw error;
82
+ }
71
83
  async _textSync(config) {
72
84
  const request = this.buildRequest(config);
73
85
  this.logger.debug("Sending request to OpenAI:", JSON.stringify(request, null, 2));
74
86
  this.statelogClient?.promptRequest(request);
75
87
  const signal = this.getAbortSignal(config);
76
- const completion = await this.client.chat.completions.create({
77
- ...request,
78
- stream: false,
79
- }, { ...(signal && { signal }) });
88
+ let completion;
89
+ try {
90
+ completion = await this.client.chat.completions.create({
91
+ ...request,
92
+ stream: false,
93
+ }, { ...(signal && { signal }) });
94
+ }
95
+ catch (error) {
96
+ this.rethrowAsSmolError(error);
97
+ }
80
98
  this.logger.debug("Response from OpenAI:", JSON.stringify(completion, null, 2));
81
99
  this.statelogClient?.promptResponse(completion);
100
+ if (completion.choices[0]?.finish_reason === "content_filter") {
101
+ throw new SmolContentPolicyError("Content blocked by OpenAI content filter");
102
+ }
82
103
  const message = completion.choices[0].message;
83
104
  const output = message.content;
84
105
  const _toolCalls = message.tool_calls;
@@ -109,11 +130,17 @@ export class SmolOpenAi extends BaseClient {
109
130
  this.logger.debug("Sending streaming request to OpenAI:", JSON.stringify(request, null, 2));
110
131
  this.statelogClient?.promptRequest(request);
111
132
  const signal = this.getAbortSignal(config);
112
- const completion = await this.client.chat.completions.create({
113
- ...request,
114
- stream: true,
115
- stream_options: { include_usage: true },
116
- }, { ...(signal && { signal }) });
133
+ let completion;
134
+ try {
135
+ completion = await this.client.chat.completions.create({
136
+ ...request,
137
+ stream: true,
138
+ stream_options: { include_usage: true },
139
+ }, { ...(signal && { signal }) });
140
+ }
141
+ catch (error) {
142
+ this.rethrowAsSmolError(error);
143
+ }
117
144
  let content = "";
118
145
  const toolCallsMap = new Map();
119
146
  let usage;
@@ -127,6 +154,9 @@ export class SmolOpenAi extends BaseClient {
127
154
  }
128
155
  if (!chunk.choices || chunk.choices.length === 0)
129
156
  continue;
157
+ if (chunk.choices[0]?.finish_reason === "content_filter") {
158
+ throw new SmolContentPolicyError("Content blocked by OpenAI content filter");
159
+ }
130
160
  const delta = chunk.choices[0]?.delta;
131
161
  if (!delta)
132
162
  continue;
@@ -13,6 +13,7 @@ export declare class SmolOpenAiResponses extends BaseClient implements SmolClien
13
13
  private convertMessages;
14
14
  private buildRequest;
15
15
  private calculateUsageAndCost;
16
+ private rethrowAsSmolError;
16
17
  _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
17
18
  _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
18
19
  }
@@ -6,6 +6,7 @@ import { BaseClient } from "./baseClient.js";
6
6
  import { zodToOpenAIResponsesTool } from "../util/tool.js";
7
7
  import { sanitizeAttributes } from "../util.js";
8
8
  import { Model } from "../model.js";
9
+ import { SmolContentPolicyError, SmolContextWindowExceededError, } from "../smolError.js";
9
10
  export class SmolOpenAiResponses extends BaseClient {
10
11
  client;
11
12
  logger;
@@ -101,15 +102,32 @@ export class SmolOpenAiResponses extends BaseClient {
101
102
  }
102
103
  return { usage, cost };
103
104
  }
105
+ rethrowAsSmolError(error) {
106
+ if (error instanceof OpenAI.APIError) {
107
+ if (error.code === "context_length_exceeded") {
108
+ throw new SmolContextWindowExceededError(error.message);
109
+ }
110
+ if (error.code === "content_policy_violation") {
111
+ throw new SmolContentPolicyError(error.message);
112
+ }
113
+ }
114
+ throw error;
115
+ }
104
116
  async _textSync(config) {
105
117
  const request = this.buildRequest(config);
106
118
  this.logger.debug("Sending request to OpenAI Responses API:", JSON.stringify(request, null, 2));
107
119
  this.statelogClient?.promptRequest(request);
108
120
  const signal = this.getAbortSignal(config);
109
- const response = await this.client.responses.create({
110
- ...request,
111
- stream: false,
112
- }, { ...(signal && { signal }) });
121
+ let response;
122
+ try {
123
+ response = await this.client.responses.create({
124
+ ...request,
125
+ stream: false,
126
+ }, { ...(signal && { signal }) });
127
+ }
128
+ catch (error) {
129
+ this.rethrowAsSmolError(error);
130
+ }
113
131
  this.logger.debug("Response from OpenAI Responses API:", JSON.stringify(response, null, 2));
114
132
  this.statelogClient?.promptResponse(response);
115
133
  const output = response.output_text || null;
@@ -133,9 +151,15 @@ export class SmolOpenAiResponses extends BaseClient {
133
151
  this.logger.debug("Sending streaming request to OpenAI Responses API:", JSON.stringify(request, null, 2));
134
152
  this.statelogClient?.promptRequest(request);
135
153
  const signal = this.getAbortSignal(config);
136
- const stream = this.client.responses.stream(request, {
137
- ...(signal && { signal }),
138
- });
154
+ let stream;
155
+ try {
156
+ stream = this.client.responses.stream(request, {
157
+ ...(signal && { signal }),
158
+ });
159
+ }
160
+ catch (error) {
161
+ this.rethrowAsSmolError(error);
162
+ }
139
163
  let content = "";
140
164
  const functionCalls = new Map();
141
165
  let usage;
package/dist/index.d.ts CHANGED
@@ -8,3 +8,5 @@ export * from "./classes/message/index.js";
8
8
  export * from "./functions.js";
9
9
  export * from "./classes/ToolCall.js";
10
10
  export * from "./strategies/index.js";
11
+ export { latencyTracker } from "./latencyTracker.js";
12
+ export type { LatencySample } from "./latencyTracker.js";
package/dist/index.js CHANGED
@@ -8,3 +8,4 @@ export * from "./classes/message/index.js";
8
8
  export * from "./functions.js";
9
9
  export * from "./classes/ToolCall.js";
10
10
  export * from "./strategies/index.js";
11
+ export { latencyTracker } from "./latencyTracker.js";
@@ -0,0 +1,32 @@
1
+ export type LatencySample = {
2
+ /** Milliseconds per output token */
3
+ msPerToken: number;
4
+ /** Timestamp when sample was recorded */
5
+ timestamp: number;
6
+ };
7
+ declare class LatencyTracker {
8
+ private samples;
9
+ private windowSize;
10
+ constructor(windowSize?: number);
11
+ /** Record a latency sample for a model. */
12
+ record(model: string, elapsedMs: number, outputTokens: number): void;
13
+ /** Get the windowed mean ms-per-token for a model, or null if no samples. */
14
+ getMeanMsPerToken(model: string): number | null;
15
+ /**
16
+ * Get estimated output tokens per second for a model based on tracked latency.
17
+ * Returns null if no samples exist or if the number of samples is below the minimum required.
18
+ */
19
+ getTokensPerSecond(model: string, minSamples?: number): number | null;
20
+ /** Get the number of samples recorded for a model. */
21
+ getSampleCount(model: string): number;
22
+ /** Get all samples for a model (defensive copy). */
23
+ getSamples(model: string): LatencySample[];
24
+ /** Clear all samples for a model. */
25
+ clear(model?: string): void;
26
+ /** Update the window size. Existing samples beyond the new size are trimmed. */
27
+ setWindowSize(size: number): void;
28
+ getWindowSize(): number;
29
+ }
30
+ /** Global singleton latency tracker. */
31
+ export declare const latencyTracker: LatencyTracker;
32
+ export {};