npm - smoltalk - Versions diffs - 0.0.55 → 0.0.56 - Mend

smoltalk 0.0.55 → 0.0.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/dist/clients/anthropic.d.ts +1 -0
package/dist/clients/anthropic.js +57 -26
package/dist/clients/baseClient.d.ts +1 -0
package/dist/clients/baseClient.js +23 -1
package/dist/clients/google.js +35 -3
package/dist/clients/ollama.js +8 -0
package/dist/clients/openai.d.ts +1 -0
package/dist/clients/openai.js +39 -9
package/dist/clients/openaiResponses.d.ts +1 -0
package/dist/clients/openaiResponses.js +31 -7
package/dist/index.d.ts +2 -0
package/dist/index.js +1 -0
package/dist/latencyTracker.d.ts +32 -0
package/dist/latencyTracker.js +73 -0
package/dist/model.d.ts +3 -2
package/dist/model.js +11 -1
package/dist/models.d.ts +1 -1
package/dist/smolError.d.ts +6 -0
package/dist/smolError.js +12 -0
package/dist/strategies/fallbackStrategy.js +23 -1
package/dist/strategies/fastestStrategy.d.ts +17 -0
package/dist/strategies/fastestStrategy.js +95 -0
package/dist/strategies/index.d.ts +5 -1
package/dist/strategies/index.js +16 -1
package/dist/strategies/randomStrategy.d.ts +12 -0
package/dist/strategies/randomStrategy.js +39 -0
package/dist/strategies/types.d.ts +19 -1
package/dist/strategies/types.js +14 -0
package/package.json +1 -1

package/dist/clients/anthropic.d.ts CHANGED Viewed

@@ -12,6 +12,7 @@ export declare class SmolAnthropic extends BaseClient implements SmolClient {
     getModel(): ModelName;
     private calculateUsageAndCost;
     private buildRequest;
+    private rethrowAsSmolError;
     _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
     _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
 }

package/dist/clients/anthropic.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { SystemMessage, DeveloperMessage } from "../classes/message/index.js";
 import { getLogger } from "../logger.js";
 import { success, } from "../types.js";
 import { zodToAnthropicTool } from "../util/tool.js";
+import { SmolContentPolicyError, SmolContextWindowExceededError, } from "../smolError.js";
 import { BaseClient } from "./baseClient.js";
 import { Model } from "../model.js";
 const DEFAULT_MAX_TOKENS = 4096;
@@ -82,6 +83,24 @@ export class SmolAnthropic extends BaseClient {
                 : undefined;
         return { system, messages: anthropicMessages, tools, thinking };
     }
+    rethrowAsSmolError(error) {
+        if (error instanceof Anthropic.APIError) {
+            const msg = error.message.toLowerCase();
+            if (msg.includes("prompt is too long") ||
+                msg.includes("context length") ||
+                msg.includes("context window") ||
+                msg.includes("too many tokens")) {
+                throw new SmolContextWindowExceededError(error.message);
+            }
+            if (msg.includes("content policy") ||
+                msg.includes("usage policies") ||
+                msg.includes("content filtering") ||
+                msg.includes("violates our")) {
+                throw new SmolContentPolicyError(error.message);
+            }
+        }
+        throw error;
+    }
     async _textSync(config) {
         const { system, messages, tools, thinking } = this.buildRequest(config);
         let debugData = {
@@ -95,19 +114,25 @@ export class SmolAnthropic extends BaseClient {
         this.logger.debug("Sending request to Anthropic:", debugData);
         this.statelogClient?.promptRequest(debugData);
         const signal = this.getAbortSignal(config);
-        const response = await this.client.messages.create({
-            model: this.getModel(),
-            max_tokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
-            messages,
-            ...(system && { system }),
-            ...(tools && { tools }),
-            ...(thinking && { thinking }),
-            ...(config.temperature !== undefined && {
-                temperature: config.temperature,
-            }),
-            ...(config.rawAttributes || {}),
-            stream: false,
-        }, { ...(signal && { signal }) });
+        let response;
+        try {
+            response = await this.client.messages.create({
+                model: this.getModel(),
+                max_tokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
+                messages,
+                ...(system && { system }),
+                ...(tools && { tools }),
+                ...(thinking && { thinking }),
+                ...(config.temperature !== undefined && {
+                    temperature: config.temperature,
+                }),
+                ...(config.rawAttributes || {}),
+                stream: false,
+            }, { ...(signal && { signal }) });
+        }
+        catch (error) {
+            this.rethrowAsSmolError(error);
+        }
         this.logger.debug("Response from Anthropic:", response);
         this.statelogClient?.promptResponse(response);
         let output = null;
@@ -148,19 +173,25 @@ export class SmolAnthropic extends BaseClient {
         this.logger.debug("Sending streaming request to Anthropic:", streamDebugData);
         this.statelogClient?.promptRequest(streamDebugData);
         const signal = this.getAbortSignal(config);
-        const stream = await this.client.messages.create({
-            model: this.model,
-            max_tokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
-            messages,
-            ...(system && { system }),
-            ...(tools && { tools }),
-            ...(thinking && { thinking }),
-            ...(config.temperature !== undefined && {
-                temperature: config.temperature,
-            }),
-            ...(config.rawAttributes || {}),
-            stream: true,
-        }, { ...(signal && { signal }) });
+        let stream;
+        try {
+            stream = await this.client.messages.create({
+                model: this.model,
+                max_tokens: config.maxTokens ?? DEFAULT_MAX_TOKENS,
+                messages,
+                ...(system && { system }),
+                ...(tools && { tools }),
+                ...(thinking && { thinking }),
+                ...(config.temperature !== undefined && {
+                    temperature: config.temperature,
+                }),
+                ...(config.rawAttributes || {}),
+                stream: true,
+            }, { ...(signal && { signal }) });
+        }
+        catch (error) {
+            this.rethrowAsSmolError(error);
+        }
         let content = "";
         // Track tool blocks by index: index -> { id, name, arguments (partial JSON) }
         const toolBlocks = new Map();

package/dist/clients/baseClient.d.ts CHANGED Viewed

@@ -24,6 +24,7 @@ export declare class BaseClient implements SmolClient {
         continue: boolean;
         newPromptConfig: PromptConfig;
     };
+    private recordLatency;
     extractResponse(promptConfig: PromptConfig, rawValue: any, schema: any, depth?: number): any;
     textWithRetry(promptConfig: PromptConfig, retries: number): Promise<Result<PromptResult>>;
     _textSync(promptConfig: PromptConfig): Promise<Result<PromptResult>>;

package/dist/clients/baseClient.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import { AssistantMessage, userMessage, assistantMessage, } from "../classes/message/index.js";
+import { latencyTracker } from "../latencyTracker.js";
 import { getLogger } from "../logger.js";
 import { getModel, isTextModel } from "../models.js";
 import { SmolStructuredOutputError } from "../smolError.js";
@@ -146,9 +147,11 @@ export class BaseClient {
                 value: { output: null, toolCalls: [], model: this.config.model },
             };
         }
+        const startTime = performance.now();
         try {
             const result = await this.textWithRetry(newPromptConfig, newPromptConfig.responseFormatOptions?.numRetries ||
                 DEFAULT_NUM_RETRIES);
+            this.recordLatency(startTime, result);
             return result;
         }
         catch (err) {
@@ -210,6 +213,15 @@ export class BaseClient {
         }
         return { continue: true, newPromptConfig: promptConfig };
     }
+    recordLatency(startTime, result) {
+        if (!result.success)
+            return;
+        const outputTokens = result.value.usage?.outputTokens;
+        if (!outputTokens || outputTokens <= 0)
+            return;
+        const elapsedMs = performance.now() - startTime;
+        latencyTracker.record(this.config.model, elapsedMs, outputTokens);
+    }
     extractResponse(promptConfig, rawValue, schema, depth = 0) {
         const MAX_DEPTH = 5;
         if (depth > MAX_DEPTH) {
@@ -374,8 +386,18 @@ export class BaseClient {
             };
             return;
         }
+        const startTime = performance.now();
         try {
-            yield* this._textStream(newPromptConfig);
+            for await (const chunk of this._textStream(newPromptConfig)) {
+                if (chunk.type === "done") {
+                    const outputTokens = chunk.result.usage?.outputTokens;
+                    if (outputTokens && outputTokens > 0) {
+                        const elapsedMs = performance.now() - startTime;
+                        latencyTracker.record(this.config.model, elapsedMs, outputTokens);
+                    }
+                }
+                yield chunk;
+            }
         }
         catch (err) {
             if (this.isAbortError(err)) {

package/dist/clients/google.js CHANGED Viewed

@@ -3,6 +3,7 @@ import { ToolCall } from "../classes/ToolCall.js";
 import { getLogger } from "../logger.js";
 import { addCosts, addTokenUsage, success, } from "../types.js";
 import { zodToGoogleTool } from "../util/tool.js";
+import { SmolContentPolicyError, SmolContextWindowExceededError, } from "../smolError.js";
 import { sanitizeAttributes } from "../util.js";
 import { BaseClient } from "./baseClient.js";
 import { Model } from "../model.js";
@@ -171,10 +172,28 @@ export class SmolGoogle extends BaseClient {
     async __textSync(request) {
         this.logger.debug("Sending request to Google Gemini:", JSON.stringify(request, null, 2));
         this.statelogClient?.promptRequest(request);
-        // Send the prompt as the latest message
-        const result = await this.client.models.generateContent(request);
+        let result;
+        try {
+            result = await this.client.models.generateContent(request);
+        }
+        catch (error) {
+            const msg = (error.message || "").toLowerCase();
+            if (msg.includes("token") &&
+                (msg.includes("exceed") ||
+                    msg.includes("too long") ||
+                    msg.includes("limit"))) {
+                throw new SmolContextWindowExceededError(error.message);
+            }
+            throw error;
+        }
         this.logger.debug("Response from Google Gemini:", JSON.stringify(result, null, 2));
         this.statelogClient?.promptResponse(result);
+        for (const candidate of result.candidates || []) {
+            const finishReason = candidate.finishReason;
+            if (finishReason === "SAFETY" || finishReason === "PROHIBITED_CONTENT") {
+                throw new SmolContentPolicyError(`Content blocked by Google safety filter: ${finishReason}`);
+            }
+        }
         const toolCalls = [];
         const thinkingBlocks = [];
         let textContent = "";
@@ -230,7 +249,20 @@ export class SmolGoogle extends BaseClient {
         }
         this.logger.debug("Sending streaming request to Google Gemini:", JSON.stringify(request, null, 2));
         this.statelogClient?.promptRequest(request);
-        const stream = await this.client.models.generateContentStream(request);
+        let stream;
+        try {
+            stream = await this.client.models.generateContentStream(request);
+        }
+        catch (error) {
+            const msg = (error.message || "").toLowerCase();
+            if (msg.includes("token") &&
+                (msg.includes("exceed") ||
+                    msg.includes("too long") ||
+                    msg.includes("limit"))) {
+                throw new SmolContextWindowExceededError(error.message);
+            }
+            throw error;
+        }
         let content = "";
         const toolCallsMap = new Map();
         const thinkingBlocks = [];

package/dist/clients/ollama.js CHANGED Viewed

@@ -5,6 +5,7 @@ import { success, } from "../types.js";
 import { zodToGoogleTool } from "../util/tool.js";
 import { sanitizeAttributes } from "../util.js";
 import { BaseClient } from "./baseClient.js";
+import { SmolContextWindowExceededError } from "../smolError.js";
 import { Model } from "../model.js";
 export const DEFAULT_OLLAMA_HOST = "http://localhost:11434";
 export class SmolOllama extends BaseClient {
@@ -80,6 +81,13 @@ export class SmolOllama extends BaseClient {
             // @ts-ignore
             result = await this.client.chat(request);
         }
+        catch (error) {
+            const msg = (error.message || "").toLowerCase();
+            if (msg.includes("context length") || msg.includes("context window")) {
+                throw new SmolContextWindowExceededError(error.message);
+            }
+            throw error;
+        }
         finally {
             if (signal && abortHandler) {
                 signal.removeEventListener("abort", abortHandler);

package/dist/clients/openai.d.ts CHANGED Viewed

@@ -12,6 +12,7 @@ export declare class SmolOpenAi extends BaseClient implements SmolClient {
     getModel(): ModelName;
     private calculateUsageAndCost;
     private buildRequest;
+    private rethrowAsSmolError;
     _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
     _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
 }

package/dist/clients/openai.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { ToolCall } from "../classes/ToolCall.js";
 import { isFunctionToolCall, sanitizeAttributes } from "../util.js";
 import { getLogger } from "../logger.js";
 import { BaseClient } from "./baseClient.js";
+import { SmolContentPolicyError, SmolContextWindowExceededError, } from "../smolError.js";
 import { zodToOpenAITool } from "../util/tool.js";
 import { Model } from "../model.js";
 export class SmolOpenAi extends BaseClient {
@@ -68,17 +69,37 @@ export class SmolOpenAi extends BaseClient {
         }
         return request;
     }
+    rethrowAsSmolError(error) {
+        if (error instanceof OpenAI.APIError) {
+            if (error.code === "context_length_exceeded") {
+                throw new SmolContextWindowExceededError(error.message);
+            }
+            if (error.code === "content_policy_violation") {
+                throw new SmolContentPolicyError(error.message);
+            }
+        }
+        throw error;
+    }
     async _textSync(config) {
         const request = this.buildRequest(config);
         this.logger.debug("Sending request to OpenAI:", JSON.stringify(request, null, 2));
         this.statelogClient?.promptRequest(request);
         const signal = this.getAbortSignal(config);
-        const completion = await this.client.chat.completions.create({
-            ...request,
-            stream: false,
-        }, { ...(signal && { signal }) });
+        let completion;
+        try {
+            completion = await this.client.chat.completions.create({
+                ...request,
+                stream: false,
+            }, { ...(signal && { signal }) });
+        }
+        catch (error) {
+            this.rethrowAsSmolError(error);
+        }
         this.logger.debug("Response from OpenAI:", JSON.stringify(completion, null, 2));
         this.statelogClient?.promptResponse(completion);
+        if (completion.choices[0]?.finish_reason === "content_filter") {
+            throw new SmolContentPolicyError("Content blocked by OpenAI content filter");
+        }
         const message = completion.choices[0].message;
         const output = message.content;
         const _toolCalls = message.tool_calls;
@@ -109,11 +130,17 @@ export class SmolOpenAi extends BaseClient {
         this.logger.debug("Sending streaming request to OpenAI:", JSON.stringify(request, null, 2));
         this.statelogClient?.promptRequest(request);
         const signal = this.getAbortSignal(config);
-        const completion = await this.client.chat.completions.create({
-            ...request,
-            stream: true,
-            stream_options: { include_usage: true },
-        }, { ...(signal && { signal }) });
+        let completion;
+        try {
+            completion = await this.client.chat.completions.create({
+                ...request,
+                stream: true,
+                stream_options: { include_usage: true },
+            }, { ...(signal && { signal }) });
+        }
+        catch (error) {
+            this.rethrowAsSmolError(error);
+        }
         let content = "";
         const toolCallsMap = new Map();
         let usage;
@@ -127,6 +154,9 @@ export class SmolOpenAi extends BaseClient {
             }
             if (!chunk.choices || chunk.choices.length === 0)
                 continue;
+            if (chunk.choices[0]?.finish_reason === "content_filter") {
+                throw new SmolContentPolicyError("Content blocked by OpenAI content filter");
+            }
             const delta = chunk.choices[0]?.delta;
             if (!delta)
                 continue;

package/dist/clients/openaiResponses.d.ts CHANGED Viewed

@@ -13,6 +13,7 @@ export declare class SmolOpenAiResponses extends BaseClient implements SmolClien
     private convertMessages;
     private buildRequest;
     private calculateUsageAndCost;
+    private rethrowAsSmolError;
     _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
     _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
 }

package/dist/clients/openaiResponses.js CHANGED Viewed

@@ -6,6 +6,7 @@ import { BaseClient } from "./baseClient.js";
 import { zodToOpenAIResponsesTool } from "../util/tool.js";
 import { sanitizeAttributes } from "../util.js";
 import { Model } from "../model.js";
+import { SmolContentPolicyError, SmolContextWindowExceededError, } from "../smolError.js";
 export class SmolOpenAiResponses extends BaseClient {
     client;
     logger;
@@ -101,15 +102,32 @@ export class SmolOpenAiResponses extends BaseClient {
         }
         return { usage, cost };
     }
+    rethrowAsSmolError(error) {
+        if (error instanceof OpenAI.APIError) {
+            if (error.code === "context_length_exceeded") {
+                throw new SmolContextWindowExceededError(error.message);
+            }
+            if (error.code === "content_policy_violation") {
+                throw new SmolContentPolicyError(error.message);
+            }
+        }
+        throw error;
+    }
     async _textSync(config) {
         const request = this.buildRequest(config);
         this.logger.debug("Sending request to OpenAI Responses API:", JSON.stringify(request, null, 2));
         this.statelogClient?.promptRequest(request);
         const signal = this.getAbortSignal(config);
-        const response = await this.client.responses.create({
-            ...request,
-            stream: false,
-        }, { ...(signal && { signal }) });
+        let response;
+        try {
+            response = await this.client.responses.create({
+                ...request,
+                stream: false,
+            }, { ...(signal && { signal }) });
+        }
+        catch (error) {
+            this.rethrowAsSmolError(error);
+        }
         this.logger.debug("Response from OpenAI Responses API:", JSON.stringify(response, null, 2));
         this.statelogClient?.promptResponse(response);
         const output = response.output_text || null;
@@ -133,9 +151,15 @@ export class SmolOpenAiResponses extends BaseClient {
         this.logger.debug("Sending streaming request to OpenAI Responses API:", JSON.stringify(request, null, 2));
         this.statelogClient?.promptRequest(request);
         const signal = this.getAbortSignal(config);
-        const stream = this.client.responses.stream(request, {
-            ...(signal && { signal }),
-        });
+        let stream;
+        try {
+            stream = this.client.responses.stream(request, {
+                ...(signal && { signal }),
+            });
+        }
+        catch (error) {
+            this.rethrowAsSmolError(error);
+        }
         let content = "";
         const functionCalls = new Map();
         let usage;

package/dist/index.d.ts CHANGED Viewed

@@ -8,3 +8,5 @@ export * from "./classes/message/index.js";
 export * from "./functions.js";
 export * from "./classes/ToolCall.js";
 export * from "./strategies/index.js";
+export { latencyTracker } from "./latencyTracker.js";
+export type { LatencySample } from "./latencyTracker.js";

package/dist/index.js CHANGED Viewed

@@ -8,3 +8,4 @@ export * from "./classes/message/index.js";
 export * from "./functions.js";
 export * from "./classes/ToolCall.js";
 export * from "./strategies/index.js";
+export { latencyTracker } from "./latencyTracker.js";

package/dist/latencyTracker.d.ts ADDED Viewed

@@ -0,0 +1,32 @@
+export type LatencySample = {
+    /** Milliseconds per output token */
+    msPerToken: number;
+    /** Timestamp when sample was recorded */
+    timestamp: number;
+};
+declare class LatencyTracker {
+    private samples;
+    private windowSize;
+    constructor(windowSize?: number);
+    /** Record a latency sample for a model. */
+    record(model: string, elapsedMs: number, outputTokens: number): void;
+    /** Get the windowed mean ms-per-token for a model, or null if no samples. */
+    getMeanMsPerToken(model: string): number | null;
+    /**
+     * Get estimated output tokens per second for a model based on tracked latency.
+     * Returns null if no samples exist or if the number of samples is below the minimum required.
+     */
+    getTokensPerSecond(model: string, minSamples?: number): number | null;
+    /** Get the number of samples recorded for a model. */
+    getSampleCount(model: string): number;
+    /** Get all samples for a model (defensive copy). */
+    getSamples(model: string): LatencySample[];
+    /** Clear all samples for a model. */
+    clear(model?: string): void;
+    /** Update the window size. Existing samples beyond the new size are trimmed. */
+    setWindowSize(size: number): void;
+    getWindowSize(): number;
+}
+/** Global singleton latency tracker. */
+export declare const latencyTracker: LatencyTracker;
+export {};

package/dist/latencyTracker.js ADDED Viewed

@@ -0,0 +1,73 @@
+const DEFAULT_WINDOW_SIZE = 10;
+class LatencyTracker {
+    samples = new Map();
+    windowSize;
+    constructor(windowSize = DEFAULT_WINDOW_SIZE) {
+        this.windowSize = windowSize;
+    }
+    /** Record a latency sample for a model. */
+    record(model, elapsedMs, outputTokens) {
+        if (outputTokens <= 0 || elapsedMs <= 0)
+            return;
+        const msPerToken = elapsedMs / outputTokens;
+        const samples = this.samples.get(model) ?? [];
+        samples.push({ msPerToken, timestamp: Date.now() });
+        // Keep only the last windowSize samples
+        if (samples.length > this.windowSize) {
+            samples.splice(0, samples.length - this.windowSize);
+        }
+        this.samples.set(model, samples);
+    }
+    /** Get the windowed mean ms-per-token for a model, or null if no samples. */
+    getMeanMsPerToken(model) {
+        const samples = this.samples.get(model);
+        if (!samples || samples.length === 0)
+            return null;
+        const sum = samples.reduce((acc, s) => acc + s.msPerToken, 0);
+        return sum / samples.length;
+    }
+    /**
+     * Get estimated output tokens per second for a model based on tracked latency.
+     * Returns null if no samples exist or if the number of samples is below the minimum required.
+     */
+    getTokensPerSecond(model, minSamples = 1) {
+        const sampleCount = this.getSampleCount(model);
+        if (sampleCount < minSamples)
+            return null;
+        const msPerToken = this.getMeanMsPerToken(model);
+        if (msPerToken === null || msPerToken === 0)
+            return null;
+        return 1000 / msPerToken;
+    }
+    /** Get the number of samples recorded for a model. */
+    getSampleCount(model) {
+        return this.samples.get(model)?.length ?? 0;
+    }
+    /** Get all samples for a model (defensive copy). */
+    getSamples(model) {
+        return [...(this.samples.get(model) ?? [])];
+    }
+    /** Clear all samples for a model. */
+    clear(model) {
+        if (model) {
+            this.samples.delete(model);
+        }
+        else {
+            this.samples.clear();
+        }
+    }
+    /** Update the window size. Existing samples beyond the new size are trimmed. */
+    setWindowSize(size) {
+        this.windowSize = size;
+        for (const [model, samples] of this.samples) {
+            if (samples.length > size) {
+                samples.splice(0, samples.length - size);
+            }
+        }
+    }
+    getWindowSize() {
+        return this.windowSize;
+    }
+}
+/** Global singleton latency tracker. */
+export const latencyTracker = new LatencyTracker();

package/dist/model.d.ts CHANGED Viewed

@@ -6,14 +6,14 @@ export declare class Model {
     private resolvedModel;
     private provider?;
     constructor(model: ModelName | ModelConfig | ModelNameAndProvider, provider?: Provider);
-    getModel(): ModelName | ModelNameAndProvider | {
+    getModel(): string | ModelNameAndProvider | {
         optimizeFor: ("reasoning" | "speed" | "cost" | "large-context")[];
         providers: ("local" | "ollama" | "openai" | "openai-responses" | "anthropic" | "google" | "replicate" | "modal")[];
         limit?: {
             cost?: number | undefined;
         } | undefined;
     };
-    getResolvedModel(): ModelName;
+    getResolvedModel(): string;
     getProvider(): Provider | undefined;
     setProvider(): Provider | undefined;
     resolveModel(models?: readonly TextModel[]): ModelName;
@@ -31,5 +31,6 @@ export declare class Model {
         currency: string;
     } | null;
     toString(): string;
+    toJSON(): ModelName | ModelNameAndProvider;
     static create(model: ModelLike, provider?: Provider): Model;
 }

package/dist/model.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import { latencyTracker } from "./latencyTracker.js";
 import { getModel, isTextModel, textModels, registeredTextModels, } from "./models.js";
 import { SmolError } from "./smolError.js";
 import { ModelConfigSchema, ModelNameAndProviderSchema, ModelNameSchema, } from "./strategies/types.js";
@@ -115,7 +116,10 @@ export class Model {
             case "cost":
                 return (m.inputTokenCost ?? 0) + (m.outputTokenCost ?? 0);
             case "speed":
-                return m.outputTokensPerSecond ?? 0;
+                // Prefer tracked latency over static estimates
+                return (latencyTracker.getTokensPerSecond(m.modelName) ??
+                    m.outputTokensPerSecond ??
+                    0);
             case "reasoning":
                 return (m.inputTokenCost ?? 0) + (m.outputTokenCost ?? 0);
             case "large-context":
@@ -149,6 +153,12 @@ export class Model {
     toString() {
         return `Model(${JSON.stringify(this.model)})`;
     }
+    toJSON() {
+        if (ModelNameAndProviderSchema.safeParse(this.model).success) {
+            return this.model;
+        }
+        return this.getResolvedModel();
+    }
     static create(model, provider) {
         if (model instanceof Model) {
             return model;

package/dist/models.d.ts CHANGED Viewed

@@ -688,7 +688,7 @@ export type TextModelName = (typeof textModels)[number]["modelName"];
 export type ImageModelName = (typeof imageModels)[number]["modelName"];
 export type SpeechToTextModelName = (typeof speechToTextModels)[number]["modelName"];
 export type EmbeddingsModelName = (typeof embeddingsModels)[number]["modelName"];
-export type ModelName = TextModelName | ImageModelName | SpeechToTextModelName;
+export type ModelName = string;
 export declare const registeredTextModels: TextModel[];
 export declare function registerTextModel(model: Omit<TextModel, "type"> & {
     type?: "text";

package/dist/smolError.d.ts CHANGED Viewed

@@ -7,3 +7,9 @@ export declare class SmolStructuredOutputError extends SmolError {
 export declare class SmolTimeoutError extends SmolError {
     constructor(message: string);
 }
+export declare class SmolContentPolicyError extends SmolError {
+    constructor(message: string);
+}
+export declare class SmolContextWindowExceededError extends SmolError {
+    constructor(message: string);
+}

package/dist/smolError.js CHANGED Viewed

@@ -16,3 +16,15 @@ export class SmolTimeoutError extends SmolError {
         this.name = "SmolTimeoutError";
     }
 }
+export class SmolContentPolicyError extends SmolError {
+    constructor(message) {
+        super(message);
+        this.name = "SmolContentPolicyError";
+    }
+}
+export class SmolContextWindowExceededError extends SmolError {
+    constructor(message) {
+        super(message);
+        this.name = "SmolContextWindowExceededError";
+    }
+}

package/dist/strategies/fallbackStrategy.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { SmolStructuredOutputError, SmolTimeoutError } from "../smolError.js";
+import { SmolContentPolicyError, SmolContextWindowExceededError, SmolStructuredOutputError, SmolTimeoutError, } from "../smolError.js";
 import { success, } from "../types.js";
 import { BaseStrategy } from "./baseStrategy.js";
 import { IDStrategy } from "./idStrategy.js";
@@ -59,6 +59,28 @@ export class FallbackStrategy extends BaseStrategy {
                     });
                 }
             }
+            else if (error instanceof SmolContentPolicyError) {
+                if (fallbackStrategies.contentPolicyViolation &&
+                    fallbackStrategies.contentPolicyViolation.length > 0) {
+                    this.statelogClient?.debug("FallbackStrategy: falling back due to content policy violation", {
+                        failedStrategy: strategy.toString(),
+                    });
+                    return this._textWithFallbacks(config, fromJSON(fallbackStrategies.contentPolicyViolation[0]), {
+                        contentPolicyViolation: fallbackStrategies.contentPolicyViolation.slice(1),
+                    });
+                }
+            }
+            else if (error instanceof SmolContextWindowExceededError) {
+                if (fallbackStrategies.contextWindowExceeded &&
+                    fallbackStrategies.contextWindowExceeded.length > 0) {
+                    this.statelogClient?.debug("FallbackStrategy: falling back due to context window exceeded", {
+                        failedStrategy: strategy.toString(),
+                    });
+                    return this._textWithFallbacks(config, fromJSON(fallbackStrategies.contextWindowExceeded[0]), {
+                        contextWindowExceeded: fallbackStrategies.contextWindowExceeded.slice(1),
+                    });
+                }
+            }
             if (fallbackStrategies.error && fallbackStrategies.error.length > 0) {
                 this.statelogClient?.debug("FallbackStrategy: falling back due to error", {
                     failedStrategy: strategy.toString(),

package/dist/strategies/fastestStrategy.d.ts ADDED Viewed

@@ -0,0 +1,17 @@
+import { Model } from "../model.js";
+import { PromptResult, Result, SmolPromptConfig } from "../types.js";
+import { BaseStrategy } from "./baseStrategy.js";
+import { ModelNameAndProvider, StrategyJSON } from "./types.js";
+export declare class FastestStrategy extends BaseStrategy {
+    models: (string | ModelNameAndProvider | Model)[];
+    epsilon: number;
+    constructor(models: (string | ModelNameAndProvider | Model)[], epsilon?: number);
+    toString(): string;
+    toShortString(): string;
+    _text(config: SmolPromptConfig): Promise<Result<PromptResult>>;
+    private pickFastest;
+    /** Get tokens/sec for a model: tracked latency first, then static estimate, then 0. */
+    private getSpeed;
+    toJSON(): StrategyJSON;
+    static fromJSON(json: unknown): FastestStrategy;
+}

package/dist/strategies/fastestStrategy.js ADDED Viewed

@@ -0,0 +1,95 @@
+import { latencyTracker } from "../latencyTracker.js";
+import { getLogger } from "../logger.js";
+import { Model } from "../model.js";
+import { BaseStrategy } from "./baseStrategy.js";
+import { IDStrategy } from "./idStrategy.js";
+import { FastestStrategyJSONSchema, } from "./types.js";
+// what percentage of the time to explore (pick a random model instead of the fastest) - this prevents us from getting stuck on a model that was fast in the past but has since become slow
+const DEFAULT_EPSILON = 0.1;
+export class FastestStrategy extends BaseStrategy {
+    models;
+    epsilon;
+    constructor(models, epsilon = DEFAULT_EPSILON) {
+        super();
+        this.models = models;
+        this.epsilon = epsilon;
+    }
+    toString() {
+        return `FastestStrategy([${this.models.map((s) => s.toString()).join(", ")}])`;
+    }
+    toShortString() {
+        return `fastest([${this.models.map((s) => s.toString()).join(", ")}])`;
+    }
+    async _text(config) {
+        const resolved = this.models.map((model) => Model.create(model));
+        let chosen = null;
+        const logger = getLogger(config.logLevel);
+        if (Math.random() < this.epsilon) {
+            // Explore: pick a random model
+            chosen = resolved[Math.floor(Math.random() * resolved.length)];
+            logger.debug("fastest strategy - exploring random model", {
+                model: chosen.getResolvedModel(),
+            });
+            this.statelogClient?.debug("fastest strategy - picking random model", {
+                model: chosen.getResolvedModel(),
+            });
+        }
+        else {
+            // Exploit: pick the fastest model by tracked latency
+            chosen = this.pickFastest(resolved);
+            if (chosen) {
+                logger.debug("fastest strategy - exploiting fastest model", {
+                    model: chosen.getResolvedModel(),
+                });
+                this.statelogClient?.debug("fastest strategy - using fastest model", {
+                    model: chosen.getResolvedModel(),
+                });
+            }
+            else {
+                // we don't have latency data for any model, so just pick randomly
+                chosen = resolved[Math.floor(Math.random() * resolved.length)];
+                logger.debug("fastest strategy - no latency data, picking random model", {
+                    models: resolved.map((m) => m.getResolvedModel()),
+                    chosen: chosen.getResolvedModel(),
+                });
+                this.statelogClient?.debug("fastest strategy - no latency data, picking random model", {
+                    models: resolved.map((m) => m.getResolvedModel()),
+                    chosen,
+                });
+            }
+        }
+        const strategy = new IDStrategy(chosen);
+        return strategy.text(config);
+    }
+    pickFastest(models) {
+        let best = null;
+        let bestSpeed = 0;
+        for (let model of models) {
+            const speed = this.getSpeed(model);
+            if (speed && speed > bestSpeed) {
+                bestSpeed = speed;
+                best = model;
+            }
+        }
+        return best;
+    }
+    /** Get tokens/sec for a model: tracked latency first, then static estimate, then 0. */
+    getSpeed(model) {
+        const MIN_SAMPLES = 3;
+        const tracked = latencyTracker.getTokensPerSecond(model.getResolvedModel(), MIN_SAMPLES);
+        return tracked;
+    }
+    toJSON() {
+        return {
+            type: "fastest",
+            params: {
+                models: this.models.map((s) => (s instanceof Model ? s.toJSON() : s)),
+            },
+        };
+    }
+    static fromJSON(json) {
+        const parsed = FastestStrategyJSONSchema.parse(json);
+        const models = parsed.params.models;
+        return new FastestStrategy(models);
+    }
+}

package/dist/strategies/index.d.ts CHANGED Viewed

@@ -1,11 +1,15 @@
+import { Model } from "../model.js";
 import { ModelLike, ModelParam } from "../types.js";
-import { FallbackStrategyConfig, Strategy, StrategyJSON } from "./types.js";
+import { FallbackStrategyConfig, ModelNameAndProvider, Strategy, StrategyJSON } from "./types.js";
 export * from "./baseStrategy.js";
 export * from "./fallbackStrategy.js";
 export * from "./idStrategy.js";
 export * from "./raceStrategy.js";
+export * from "./randomStrategy.js";
 export * from "./types.js";
 export declare function race(...strategies: ModelParam[]): Strategy;
+export declare function random(...strategies: ModelParam[]): Strategy;
+export declare function fastest(models: (string | ModelNameAndProvider | Model)[], epsilon?: number): Strategy;
 export declare function id(model: ModelLike): Strategy;
 export declare function fallback(primaryStrategy: ModelParam, config: FallbackStrategyConfig | string | string[]): Strategy;
 export declare function fromJSON(json: StrategyJSON): Strategy;

package/dist/strategies/index.js CHANGED Viewed

@@ -1,15 +1,24 @@
 import { FallbackStrategy } from "./fallbackStrategy.js";
+import { FastestStrategy } from "./fastestStrategy.js";
 import { IDStrategy } from "./idStrategy.js";
 import { RaceStrategy } from "./raceStrategy.js";
-import { FallbackStrategyJSONSchema, IDStrategyJSONSchema, ModelNameAndProviderSchema, RaceStrategyJSONSchema, } from "./types.js";
+import { RandomStrategy } from "./randomStrategy.js";
+import { FallbackStrategyJSONSchema, FastestStrategyJSONSchema, IDStrategyJSONSchema, ModelNameAndProviderSchema, RaceStrategyJSONSchema, RandomStrategyJSONSchema, } from "./types.js";
 export * from "./baseStrategy.js";
 export * from "./fallbackStrategy.js";
 export * from "./idStrategy.js";
 export * from "./raceStrategy.js";
+export * from "./randomStrategy.js";
 export * from "./types.js";
 export function race(...strategies) {
     return new RaceStrategy(strategies);
 }
+export function random(...strategies) {
+    return new RandomStrategy(...strategies);
+}
+export function fastest(models, epsilon) {
+    return new FastestStrategy(models, epsilon);
+}
 export function id(model) {
     return new IDStrategy(model);
 }
@@ -39,6 +48,12 @@ export function fromJSON(json) {
     else if (FallbackStrategyJSONSchema.safeParse(json).success) {
         return FallbackStrategy.fromJSON(json);
     }
+    else if (RandomStrategyJSONSchema.safeParse(json).success) {
+        return RandomStrategy.fromJSON(json);
+    }
+    else if (FastestStrategyJSONSchema.safeParse(json).success) {
+        return FastestStrategy.fromJSON(json);
+    }
     else if (typeof json === "string") {
         return id(json);
     }

package/dist/strategies/randomStrategy.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+import { ModelParam, PromptResult, Result, SmolPromptConfig } from "../types.js";
+import { BaseStrategy } from "./baseStrategy.js";
+import { Strategy, StrategyJSON } from "./types.js";
+export declare class RandomStrategy extends BaseStrategy {
+    strategies: Strategy[];
+    constructor(...strategies: (Strategy | ModelParam)[]);
+    toString(): string;
+    toShortString(): string;
+    _text(config: SmolPromptConfig): Promise<Result<PromptResult>>;
+    toJSON(): StrategyJSON;
+    static fromJSON(json: unknown): RandomStrategy;
+}

package/dist/strategies/randomStrategy.js ADDED Viewed

@@ -0,0 +1,39 @@
+import { BaseStrategy } from "./baseStrategy.js";
+import { IDStrategy } from "./idStrategy.js";
+import { fromJSON } from "./index.js";
+import { RandomStrategyJSONSchema } from "./types.js";
+export class RandomStrategy extends BaseStrategy {
+    strategies;
+    constructor(...strategies) {
+        super();
+        this.strategies = strategies.map((s) => s instanceof BaseStrategy ? s : new IDStrategy(s));
+    }
+    toString() {
+        return `RandomStrategy([${this.strategies.map((s) => s.toString()).join(", ")}])`;
+    }
+    toShortString() {
+        return `random([${this.strategies.map((s) => s.toString()).join(", ")}])`;
+    }
+    async _text(config) {
+        const randomIndex = Math.floor(Math.random() * this.strategies.length);
+        const strategy = this.strategies[randomIndex];
+        this.statelogClient?.debug("random strategy chosen", {
+            strategy,
+        });
+        const result = await strategy.text(config);
+        return result;
+    }
+    toJSON() {
+        return {
+            type: "random",
+            params: {
+                strategies: this.strategies.map((s) => s.toJSON()),
+            },
+        };
+    }
+    static fromJSON(json) {
+        const parsed = RandomStrategyJSONSchema.parse(json);
+        const strategies = parsed.params.strategies.map((s) => fromJSON(s));
+        return new RandomStrategy(...strategies);
+    }
+}

package/dist/strategies/types.d.ts CHANGED Viewed

@@ -14,15 +14,19 @@ export declare const FallbackReasonSchema: z.ZodEnum<{
     error: "error";
     timeout: "timeout";
     structuredOutputFailure: "structuredOutputFailure";
+    contentPolicyViolation: "contentPolicyViolation";
+    contextWindowExceeded: "contextWindowExceeded";
 }>;
 export declare const FallbackStrategyConfigSchema: z.ZodLazy<z.ZodRecord<z.ZodEnum<{
     error: "error";
     timeout: "timeout";
     structuredOutputFailure: "structuredOutputFailure";
+    contentPolicyViolation: "contentPolicyViolation";
+    contextWindowExceeded: "contextWindowExceeded";
 }> & z.core.$partial, z.ZodArray<z.ZodType<StrategyJSON, unknown, z.core.$ZodTypeInternals<StrategyJSON, unknown>>>>>;
 export type FallbackReason = z.infer<typeof FallbackReasonSchema>;
 export type FallbackStrategyConfig = z.infer<typeof FallbackStrategyConfigSchema>;
-export type StrategyJSON = string | ModelNameAndProvider | IDStrategyJSON | RaceStrategyJSON | FallbackStrategyJSON;
+export type StrategyJSON = string | ModelNameAndProvider | IDStrategyJSON | RaceStrategyJSON | FallbackStrategyJSON | RandomStrategyJSON | FastestStrategyJSON;
 export declare const IDStrategyJSONSchema: z.ZodObject<{
     type: z.ZodLiteral<"id">;
     params: z.ZodObject<{
@@ -46,6 +50,20 @@ export type FallbackStrategyJSON = {
         config: FallbackStrategyConfig;
     };
 };
+export declare const RandomStrategyJSONSchema: z.ZodType<RandomStrategyJSON>;
+export type RandomStrategyJSON = {
+    type: "random";
+    params: {
+        strategies: StrategyJSON[];
+    };
+};
+export declare const FastestStrategyJSONSchema: z.ZodType<FastestStrategyJSON>;
+export type FastestStrategyJSON = {
+    type: "fastest";
+    params: {
+        models: (ModelNameAndProvider | string)[];
+    };
+};
 export type ModelNameAndProvider = {
     model: string;
     provider: string;

package/dist/strategies/types.js CHANGED Viewed

@@ -4,6 +4,8 @@ export const FallbackReasonSchema = z.enum([
     "error",
     "timeout",
     "structuredOutputFailure",
+    "contentPolicyViolation",
+    "contextWindowExceeded",
 ]);
 export const FallbackStrategyConfigSchema = z.lazy(() => z.partialRecord(FallbackReasonSchema, z.array(StrategyJSONSchema)));
 export const IDStrategyJSONSchema = z.object({
@@ -21,6 +23,16 @@ export const FallbackStrategyJSONSchema = z.lazy(() => z.object({
         config: FallbackStrategyConfigSchema,
     }),
 }));
+export const RandomStrategyJSONSchema = z.lazy(() => z.object({
+    type: z.literal("random"),
+    params: z.object({ strategies: z.array(StrategyJSONSchema) }),
+}));
+export const FastestStrategyJSONSchema = z.lazy(() => z.object({
+    type: z.literal("fastest"),
+    params: z.object({
+        models: z.array(z.union([ModelNameAndProviderSchema, z.string()])),
+    }),
+}));
 export const ModelNameAndProviderSchema = z.object({
     model: z.string(),
     provider: z.string(),
@@ -49,6 +61,8 @@ export const StrategyJSONSchema = z.lazy(() => z.union([
     IDStrategyJSONSchema,
     RaceStrategyJSONSchema,
     FallbackStrategyJSONSchema,
+    RandomStrategyJSONSchema,
+    FastestStrategyJSONSchema,
 ]));
 // Helper to detect if a value is a StrategyJSON object (not a plain string)
 export function isStrategy(value) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "smoltalk",
-  "version": "0.0.55",
+  "version": "0.0.56",
   "description": "A common interface for LLM APIs",
   "homepage": "https://github.com/egonSchiele/smoltalk",
   "scripts": {