npm - smoltalk - Versions diffs - 0.0.67 → 0.2.1 - Mend

smoltalk 0.0.67 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/README.md +51 -154
package/dist/client.d.ts +3 -3
package/dist/client.js +9 -5
package/dist/clients/anthropic.d.ts +4 -4
package/dist/clients/anthropic.js +1 -1
package/dist/clients/baseClient.d.ts +17 -20
package/dist/clients/baseClient.js +21 -43
package/dist/clients/google.d.ts +4 -4
package/dist/clients/google.js +1 -1
package/dist/clients/ollama.d.ts +4 -4
package/dist/clients/ollama.js +1 -1
package/dist/clients/openai.d.ts +4 -4
package/dist/clients/openai.js +2 -1
package/dist/clients/openaiResponses.d.ts +4 -4
package/dist/clients/openaiResponses.js +2 -1
package/dist/functions.d.ts +13 -10
package/dist/functions.js +4 -55
package/dist/index.d.ts +2 -4
package/dist/index.js +1 -2
package/dist/model.d.ts +2 -5
package/dist/model.js +11 -27
package/dist/models.d.ts +2 -2
package/dist/models.js +3 -1
package/dist/testing/index.d.ts +9 -0
package/dist/testing/index.js +41 -0
package/dist/types.d.ts +52 -160
package/dist/types.js +1 -1
package/dist/util/logger.d.ts +17 -1
package/dist/util/logger.js +68 -5
package/package.json +15 -19
package/dist/clients/llamaCpp.d.ts +0 -28
package/dist/clients/llamaCpp.js +0 -316
package/dist/latencyTracker.d.ts +0 -32
package/dist/latencyTracker.js +0 -73
package/dist/middleware.d.ts +0 -54
package/dist/middleware.js +0 -321
package/dist/strategies/baseStrategy.d.ts +0 -22
package/dist/strategies/baseStrategy.js +0 -62
package/dist/strategies/fallbackStrategy.d.ts +0 -14
package/dist/strategies/fallbackStrategy.js +0 -122
package/dist/strategies/fastestStrategy.d.ts +0 -19
package/dist/strategies/fastestStrategy.js +0 -108
package/dist/strategies/idStrategy.d.ts +0 -16
package/dist/strategies/idStrategy.js +0 -62
package/dist/strategies/index.d.ts +0 -17
package/dist/strategies/index.js +0 -68
package/dist/strategies/raceStrategy.d.ts +0 -12
package/dist/strategies/raceStrategy.js +0 -72
package/dist/strategies/randomStrategy.d.ts +0 -13
package/dist/strategies/randomStrategy.js +0 -54
package/dist/strategies/timeoutStrategy.d.ts +0 -13
package/dist/strategies/timeoutStrategy.js +0 -65
package/dist/strategies/types.d.ts +0 -78
package/dist/strategies/types.js +0 -58

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # Smoltalk
-Smoltalk exposes a common API to different LLM providers. There are other packages that do this, but Smoltalk allows you to build strategies on top of it. Here is a simple example.
+Smoltalk exposes a common API to different LLM providers, with built-in cost tracking, structured output, tool calling, streaming, and observability hooks. Here is a simple example.
 ## Install
@@ -54,74 +54,47 @@ This is functionality that other packages allow.
 ```
 </details>
-What if you wanted to have fallbacks in case the OpenAI API was down? Just change the `model` field:
-```ts
-  const response = await text({
-    messages,
-    model: fallback("gpt-5.4", "gemini-2.5-flash-lite"),
-    // or multiple fallbacks:
-    // model: fallback("gpt-5.4", ["gemini-2.5-flash-lite", "gemini-3-flash-preview"]),
-  });
-```
+## Longer tutorial
-Or what if you wanted to try a couple of models and take the first response?
+The top-level `text()` function is the recommended entry point — pass everything in a single config:
 ```ts
-  const response = await text({
-    messages,
-    model: race("gpt-5.4", "gemini-2.5-flash-lite", "o4-mini"),
-  });
-```
+import { text, userMessage } from "smoltalk";
-Or combine them:
+const messages = [
+  userMessage("Please use the add function to add the following numbers: 3 and 5"),
+];
-```ts
-  const response = await text({
-    messages,
-    model: race(fallback("gpt-5.4", "gemini-2.5-flash-lite"), "o4-mini"),
-  });
+const resp = await text({
+  messages,
+  model: "gemini-2.0-flash-lite",
+  openAiApiKey: process.env.OPENAI_API_KEY || "",
+  googleApiKey: process.env.GEMINI_API_KEY || "",
+  logLevel: "debug",
+});
 ```
-You get the idea.
-## Longer tutorial
-To use Smoltak, you first create a client:
+If you want to construct a client once and reuse it across many calls, use `getClient()`:
 ```ts
-import { getClient } from "smoltalk";
+import { getClient, userMessage } from "smoltalk";
 const client = getClient({
   openAiApiKey: process.env.OPENAI_API_KEY || "",
   googleApiKey: process.env.GEMINI_API_KEY || "",
-  logLevel: "debug",
   model: "gemini-2.0-flash-lite",
 });
-```
-Then you can call different methods on the client. The simplest is `prompt`:
-```ts
-const resp = await client.prompt("Hello, how are you?");
-```
-If you want tool calling, structured output, etc., `text` may be a cleaner option:
-```ts
-let messages: Message[] = [];
-  messages.push(
-    userMessage(
-      "Please use the add function to add the following numbers: 3 and 5"
-    )
-  );
-  const resp = await client.text({
-    messages,
-  });
+const messages = [userMessage("hi")];
+const resp = await client.text({ messages, model: "gemini-2.0-flash-lite" });
 ```
 Here is an example with tool calling:
 ```ts
+import { text, userMessage } from "smoltalk";
+import { z } from "zod";
 function add({ a, b }: { a: number; b: number }): number {
   return a + b;
 }
@@ -135,21 +108,29 @@ const addTool = {
   }),
 };
-const resp = await client.text({
+const messages = [userMessage("Add 3 and 5")];
+const resp = await text({
   messages,
-  tools: [addTool]
+  model: "gemini-2.0-flash-lite",
+  tools: [addTool],
 });
 ```
 Here is an example with structured output:
 ```ts
-const resp = await client.text({
+import { text, userMessage } from "smoltalk";
+import { z } from "zod";
+const messages = [userMessage("How many planets are in the solar system?")];
+const resp = await text({
   messages,
+  model: "gemini-2.0-flash-lite",
   responseFormat: z.object({
     result: z.number(),
-  });
+  }),
 });
 ```
@@ -160,37 +141,36 @@ A couple of design decisions to note:
 ## Configuration Options
-`SmolPromptConfig` is the union of client config (`SmolConfig`) and per-request config (`PromptConfig`). You can pass all options together to `text()`, or split them between `getClient()` and individual calls.
-### Client options (`SmolConfig`)
+`SmolConfig` is a single config type passed to `text()`. It contains everything: API keys, model selection, request parameters, hooks, and observability options.
 | Option | Type | Description |
 |--------|------|-------------|
-| `model` | `ModelName \| ModelConfig` | **Required.** The model to use (e.g. `"gpt-4o"`, `"gemini-2.0-flash-lite"`). |
+| `model` | `ModelName` | **Required.** The model to use (e.g. `"gpt-4o"`, `"gemini-2.0-flash-lite"`). |
+| `messages` | `Message[]` | **Required.** The conversation messages to send. |
 | `openAiApiKey` | `string` | OpenAI API key. |
 | `googleApiKey` | `string` | Google Gemini API key. |
+| `anthropicApiKey` | `string` | Anthropic API key. |
 | `ollamaApiKey` | `string` | Ollama API key (only needed for cloud Ollama). |
 | `ollamaHost` | `string` | Ollama host URL (for self-hosted or cloud Ollama). |
-| `provider` | `Provider` | Override provider detection. One of `"openai"`, `"openai-responses"`, `"google"`, `"ollama"`, `"anthropic"`, `"replicate"`, `"modal"`, `"local"`. |
-| `logLevel` | `LogLevel` | Logging verbosity: `"debug"`, `"info"`, `"warn"`, `"error"`, etc. |
-| `toolLoopDetection` | `ToolLoopDetection` | Config to detect and break tool call loops. See below. |
-### Request options (`PromptConfig`)
-| Option | Type | Description |
-|--------|------|-------------|
-| `messages` | `Message[]` | **Required.** The conversation messages to send. |
-| `instructions` | `string` | System-level instructions (system prompt). |
+| `provider` | `Provider` | Override provider detection. One of `"openai"`, `"openai-responses"`, `"google"`, `"ollama"`, `"anthropic"`, or any provider registered via `registerProvider()`. |
+| `logLevel` | `LogLevel` | Logging verbosity: `"debug"`, `"info"`, `"warn"`, `"error"`. |
 | `tools` | `{ name, description?, schema }[]` | Tool definitions. `schema` is a Zod object schema. |
-| `responseFormat` | `ZodType` | Zod schema for structured output. The response will be parsed and validated against this schema. |
+| `responseFormat` | `ZodType` | Zod schema for structured output. The response is parsed and validated against this schema. |
 | `responseFormatOptions` | `object` | Fine-grained control over structured output (see below). |
 | `maxTokens` | `number` | Maximum number of output tokens to generate. |
-| `temperature` | `number` | Sampling temperature (0–2 for most providers). |
+| `temperature` | `number` | Sampling temperature (0–2). |
 | `numSuggestions` | `number` | Number of completions to generate. |
 | `parallelToolCalls` | `boolean` | Whether to allow the model to call multiple tools in parallel. |
 | `stream` | `boolean` | If `true`, returns an `AsyncGenerator<StreamChunk>` instead of a `Promise`. |
+| `thinking` | `{ enabled, budgetTokens? }` | Enable extended thinking / thought signatures (Anthropic and Google). |
+| `reasoningEffort` | `"low" \| "medium" \| "high"` | Provider-agnostic reasoning effort level. |
 | `maxMessages` | `number` | If the message list exceeds this count, returns a failure instead of calling the API. |
+| `abortSignal` | `AbortSignal` | Cancel an in-flight request. |
+| `toolLoopDetection` | `ToolLoopDetection` | Detect and break tool-call loops. See below. |
 | `rawAttributes` | `Record<string, any>` | Pass provider-specific attributes directly to the API request. |
+| `hooks` | `{ onStart?, onToolCall?, onEnd?, onError? }` | Lifecycle hooks. |
+| `statelog` | `object` | Configuration for Statelog observability/tracing integration. |
+| `metadata` | `Record<string, any>` | Arbitrary metadata. |
 ### `responseFormatOptions`
@@ -210,92 +190,9 @@ Detects when the model is stuck in a repetitive tool-call loop.
 | Option | Type | Description |
 |--------|------|-------------|
 | `enabled` | `boolean` | Whether loop detection is active. |
-| `maxConsecutive` | `number` | Number of consecutive identical tool calls before triggering intervention. |
+| `maxCalls` | `number` | Number of calls to a specific tool before triggering intervention. |
 | `intervention` | `string` | Action to take: `"remove-tool"`, `"remove-all-tools"`, `"throw-error"`, or `"halt-execution"`. |
-| `excludeTools` | `string[]` | Tool names to ignore when counting consecutive calls. |
-## Middleware
-Middleware lets you run LLM-based checks on a prompt before or alongside the main call. If a check fails, the main call is blocked and a replacement output is returned instead. This is useful for:
-- **Content safety** — classify prompts as safe/unsafe before they reach your main model
-- **Prompt injection detection** — catch adversarial inputs before they execute
-- **PII detection** — block prompts containing personal information
-### Basic example
-```typescript
-import { text, userMessage, systemMessage } from "smoltalk";
-import { z } from "zod";
-const result = await text({
-  model: "gpt-4o",
-  messages: [userMessage("How do I hack into NASA?")],
-  middleware: {
-    timing: "before",       // run checks before the main call
-    mode: "sequential",     // run checks one at a time, stop on first block
-    checks: [
-      {
-        messages: [
-          systemMessage(
-            "You are a content safety classifier. Evaluate whether the user's message is safe to process."
-          ),
-        ],
-        responseFormat: z.object({
-          safe: z.boolean(),
-          reason: z.string(),
-        }),
-        responseFormatOptions: { strict: true },
-        decide: (result) => {
-          const parsed = JSON.parse(result.output!);
-          return parsed.safe ? null : `Blocked: ${parsed.reason}`;
-        },
-      },
-    ],
-  },
-});
-```
-If the check blocks, `result` is a successful `Result<PromptResult>` with the replacement string as output (e.g. `"Blocked: unsafe content"`). If the check passes, the main call runs normally.
-### How it works
-Each middleware check is itself an LLM call. Your original prompt messages are automatically appended to the check's messages, so the middleware model can see the content it's evaluating. The check inherits the same model, API keys, and strategy from the parent call.
-The `decide` function receives the middleware LLM's `PromptResult` and returns either:
-- `null` — the check passes, proceed normally
-- a `string` — the check blocks, and the string becomes the replacement output
-### Configuration
-| Option | Type | Description |
-|--------|------|-------------|
-| `timing` | `"before" \| "parallel"` | `"before"` runs checks first, then the main call. `"parallel"` runs both simultaneously — if a check blocks, the main call is aborted. |
-| `mode` | `"sequential" \| "parallel"` | `"sequential"` runs checks one at a time and short-circuits on the first block. `"parallel"` runs all checks concurrently. |
-| `checks` | `MiddlewareCheck[]` | The checks to run (see below). |
-Each `MiddlewareCheck` has:
-| Option | Type | Description |
-|--------|------|-------------|
-| `messages` | `Message[]` | Setup messages for the middleware LLM call (e.g. a system prompt defining the classifier). |
-| `responseFormat` | `ZodType` | Optional Zod schema for structured output from the middleware. |
-| `responseFormatOptions` | `object` | Same options as the main call's `responseFormatOptions`. |
-| `decide` | `(result: PromptResult) => string \| null` | Decision function. Return a string to block, or `null` to pass. |
-### Fail-closed behavior
-Middleware is a safety gate, so it fails closed:
-- If the middleware LLM call fails (network error, API error, abort), the prompt is **blocked** with an error message as output.
-- If `decide()` throws, the prompt is **blocked**.
-### Cost tracking
-Middleware usage/cost is tracked. When a check blocks:
-- **"before" timing**: The result includes aggregated costs from all middleware checks that ran.
-- **"parallel" timing**: The result includes middleware costs plus any partial costs from the aborted main call (if the provider reported usage before the abort).
-When all checks pass, the returned result is the main call's result with its own usage/cost — middleware costs are not added.
+| `excludeTools` | `string[]` | Tool names to ignore when counting calls. |
 ## Limitations
 Smoltalk has support for a limited number of providers right now, and is mostly focused on the stateless APIs for text completion, though I plan to add support for more providers as well as image and speech models later. Smoltalk is also a personal project, and there are alternatives backed by companies:

package/dist/client.d.ts CHANGED Viewed

@@ -4,8 +4,8 @@ export * from "./clients/openai.js";
 export * from "./clients/openaiResponses.js";
 export * from "./clients/baseClient.js";
 export * from "./clients/ollama.js";
-export * from "./clients/llamaCpp.js";
 import { BaseClient } from "./clients/baseClient.js";
-import { ResolvedSmolConfig } from "./types.js";
+import { SmolClientConfig } from "./types.js";
 export declare function registerProvider(providerName: string, clientClass: typeof BaseClient): void;
-export declare function getClient(config: ResolvedSmolConfig): BaseClient;
+export declare function unregisterProvider(providerName: string): boolean;
+export declare function getClient(config: SmolClientConfig): BaseClient;

package/dist/client.js CHANGED Viewed

@@ -4,10 +4,8 @@ export * from "./clients/openai.js";
 export * from "./clients/openaiResponses.js";
 export * from "./clients/baseClient.js";
 export * from "./clients/ollama.js";
-export * from "./clients/llamaCpp.js";
 import { SmolAnthropic } from "./clients/anthropic.js";
 import { SmolGoogle } from "./clients/google.js";
-import { LlamaCPP } from "./clients/llamaCpp.js";
 import { SmolOllama } from "./clients/ollama.js";
 import { SmolOpenAi } from "./clients/openai.js";
 import { SmolOpenAiResponses } from "./clients/openaiResponses.js";
@@ -17,6 +15,13 @@ const registeredProviders = {};
 export function registerProvider(providerName, clientClass) {
     registeredProviders[providerName] = clientClass;
 }
+export function unregisterProvider(providerName) {
+    if (providerName in registeredProviders) {
+        delete registeredProviders[providerName];
+        return true;
+    }
+    return false;
+}
 export function getClient(config) {
     let provider = config.provider;
     const modelName = config.model;
@@ -36,6 +41,7 @@ export function getClient(config) {
         anthropicApiKey: config.anthropicApiKey || process.env.ANTHROPIC_API_KEY,
     };
     const clientConfig = {
+        messages: [],
         ...config,
         ...resolvedKeys,
         model: modelName,
@@ -66,13 +72,11 @@ export function getClient(config) {
             return new SmolGoogle(clientConfig);
         case "ollama":
             return new SmolOllama(clientConfig);
-        case "llama-cpp":
-            return new LlamaCPP(clientConfig);
         default:
             if (provider in registeredProviders) {
                 const ClientClass = registeredProviders[provider];
                 return new ClientClass(clientConfig);
             }
-            throw new SmolError(`Model provider ${provider} is not supported.`);
+            throw new SmolError(`Model provider ${provider} is not supported. To use a custom provider, register it first via registerProvider(name, ClientClass).`);
     }
 }

package/dist/clients/anthropic.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
-import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
+import { PromptResult, Result, SmolClient, SmolConfig, StreamChunk } from "../types.js";
 import { BaseClient } from "./baseClient.js";
 import { ModelName } from "../models.js";
-export type SmolAnthropicConfig = BaseClientConfig & {
+export type SmolAnthropicConfig = SmolConfig & {
     anthropicApiKey: string;
 };
 export declare class SmolAnthropic extends BaseClient implements SmolClient {
@@ -13,6 +13,6 @@ export declare class SmolAnthropic extends BaseClient implements SmolClient {
     private calculateUsageAndCost;
     private buildRequest;
     private rethrowAsSmolError;
-    _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
-    _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
+    _textSync(config: SmolConfig): Promise<Result<PromptResult>>;
+    _textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
 }

package/dist/clients/anthropic.js CHANGED Viewed

@@ -19,7 +19,7 @@ export class SmolAnthropic extends BaseClient {
         this.model = new Model(config.model);
     }
     getModel() {
-        return this.model.getResolvedModel();
+        return this.model.getModel();
     }
     calculateUsageAndCost(usageData) {
         const usage = {

package/dist/clients/baseClient.d.ts CHANGED Viewed

@@ -1,29 +1,26 @@
 import { StatelogClient } from "../statelogClient.js";
-import { PromptConfig, PromptResult, ResolvedSmolConfig, Result, SmolClient, StreamChunk } from "../types.js";
+import { PromptResult, Result, SmolClient, SmolConfig, StreamChunk } from "../types.js";
 export declare class BaseClient implements SmolClient {
-    protected config: ResolvedSmolConfig;
+    protected config: SmolConfig;
     protected statelogClient?: StatelogClient;
-    constructor(config: ResolvedSmolConfig);
-    protected getAbortSignal(promptConfig: PromptConfig): AbortSignal | undefined;
+    constructor(config: SmolConfig);
+    protected getAbortSignal(promptConfig: SmolConfig): AbortSignal | undefined;
     protected isAbortError(err: unknown): boolean;
-    text(promptConfig: Omit<PromptConfig, "stream">): Promise<Result<PromptResult>>;
-    text(promptConfig: Omit<PromptConfig, "stream"> & {
-        stream: false;
-    }): Promise<Result<PromptResult>>;
-    text(promptConfig: Omit<PromptConfig, "stream"> & {
+    text(promptConfig: SmolConfig & {
         stream: true;
     }): AsyncGenerator<StreamChunk>;
-    text(promptConfig: PromptConfig): Promise<Result<PromptResult>> | AsyncGenerator<StreamChunk>;
-    checkMessageLimit(promptConfig: PromptConfig): Result<PromptResult> | null;
-    textSync(promptConfig: PromptConfig): Promise<Result<PromptResult>>;
-    checkForToolLoops(promptConfig: PromptConfig): {
+    text(promptConfig: SmolConfig & {
+        stream?: false;
+    }): Promise<Result<PromptResult>>;
+    checkMessageLimit(promptConfig: SmolConfig): Result<PromptResult> | null;
+    textSync(promptConfig: SmolConfig): Promise<Result<PromptResult>>;
+    checkForToolLoops(promptConfig: SmolConfig): {
         continue: boolean;
-        newPromptConfig: PromptConfig;
+        newSmolConfig: SmolConfig;
     };
-    private recordLatency;
-    extractResponse(promptConfig: PromptConfig, rawValue: any, schema: any, depth?: number): any;
-    textWithRetry(promptConfig: PromptConfig, retries: number): Promise<Result<PromptResult>>;
-    _textSync(promptConfig: PromptConfig): Promise<Result<PromptResult>>;
-    textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
-    _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
+    extractResponse(promptConfig: SmolConfig, rawValue: any, schema: any, depth?: number): any;
+    textWithRetry(promptConfig: SmolConfig, retries: number): Promise<Result<PromptResult>>;
+    _textSync(promptConfig: SmolConfig): Promise<Result<PromptResult>>;
+    textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
+    _textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
 }

package/dist/clients/baseClient.js CHANGED Viewed

@@ -1,5 +1,4 @@
 import { userMessage, assistantMessage } from "../classes/message/index.js";
-import { latencyTracker } from "../latencyTracker.js";
 import { getLogger } from "../util/logger.js";
 import { SmolStructuredOutputError } from "../smolError.js";
 import { getStatelogClient } from "../statelogClient.js";
@@ -55,18 +54,16 @@ export class BaseClient {
         const messageLimitResult = this.checkMessageLimit(promptConfig);
         if (messageLimitResult)
             return messageLimitResult;
-        const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(promptConfig);
+        const { continue: shouldContinue, newSmolConfig } = this.checkForToolLoops(promptConfig);
         if (!shouldContinue) {
             return {
                 success: true,
                 value: { output: null, toolCalls: [], model: this.config.model },
             };
         }
-        const startTime = performance.now();
         try {
-            const result = await this.textWithRetry(newPromptConfig, newPromptConfig.responseFormatOptions?.numRetries ||
+            const result = await this.textWithRetry(newSmolConfig, newSmolConfig.responseFormatOptions?.numRetries ||
                 DEFAULT_NUM_RETRIES);
-            this.recordLatency(startTime, result);
             return result;
         }
         catch (err) {
@@ -82,7 +79,7 @@ export class BaseClient {
     }
     checkForToolLoops(promptConfig) {
         if (!promptConfig.toolLoopDetection?.enabled) {
-            return { continue: true, newPromptConfig: promptConfig };
+            return { continue: true, newSmolConfig: promptConfig };
         }
         const toolCallCounts = {};
         const toolCallMessages = promptConfig.messages.filter((m) => m.role === "tool");
@@ -104,33 +101,24 @@ export class BaseClient {
                 switch (intervention) {
                     case "remove-tool":
                         const newTools = promptConfig.tools?.filter((t) => t.name !== toolName);
-                        const newPromptConfig = {
+                        const newSmolConfig = {
                             ...promptConfig,
                             tools: newTools,
                         };
-                        return { continue: true, newPromptConfig };
+                        return { continue: true, newSmolConfig };
                     case "remove-all-tools":
                         return {
                             continue: true,
-                            newPromptConfig: { ...promptConfig, tools: [] },
+                            newSmolConfig: { ...promptConfig, tools: [] },
                         };
                     case "throw-error":
                         throw new Error(`Tool loop detected for tool "${toolName}". Aborting request.`);
                     case "halt-execution":
-                        return { continue: false, newPromptConfig: promptConfig };
+                        return { continue: false, newSmolConfig: promptConfig };
                 }
             }
         }
-        return { continue: true, newPromptConfig: promptConfig };
-    }
-    recordLatency(startTime, result) {
-        if (!result.success)
-            return;
-        const outputTokens = result.value.usage?.outputTokens;
-        if (!outputTokens || outputTokens <= 0)
-            return;
-        const elapsedMs = performance.now() - startTime;
-        latencyTracker.record(this.config.model, elapsedMs, outputTokens);
+        return { continue: true, newSmolConfig: promptConfig };
     }
     extractResponse(promptConfig, rawValue, schema, depth = 0) {
         const MAX_DEPTH = 5;
@@ -153,14 +141,17 @@ export class BaseClient {
         if (rawValue && typeof rawValue === "object" && rawValue.type === "object" && rawValue.properties) {
             return this.extractResponse(promptConfig, rawValue.properties, schema, depth + 1);
         }
-        // 2. String → try JSON.parse, then recurse
+        // 2. String → try JSON.parse (after stripping markdown fences), then recurse.
+        // Throws SmolStructuredOutputError if the string isn't valid JSON; the
+        // textWithRetry caller catches this and retries with a validation hint.
         if (typeof rawValue === "string") {
             const stripped = rawValue
                 .trim()
                 .replace(/^```json\s*/, "")
                 .replace(/```\s*$/, "");
+            let parsed;
             try {
-                return this.extractResponse(promptConfig, JSON.parse(stripped), schema, depth + 1);
+                parsed = JSON.parse(stripped);
             }
             catch (err) {
                 const logger = getLogger();
@@ -169,8 +160,9 @@ export class BaseClient {
                     rawValue: stripped,
                 });
                 this.statelogClient?.debug("extractResponse: failed to parse JSON from string", { error: err.message });
+                throw new SmolStructuredOutputError(`Response did not parse as JSON: ${err.message}`);
             }
-            return rawValue;
+            return this.extractResponse(promptConfig, parsed, schema, depth + 1);
         }
         // 3. Null/undefined/primitive — nothing to unwrap
         if (rawValue == null || typeof rawValue !== "object") {
@@ -216,8 +208,7 @@ export class BaseClient {
             if (result.value.toolCalls.length > 0) {
                 return result;
             }
-            if (!promptConfig.responseFormat ||
-                !promptConfig.responseFormatOptions?.strict) {
+            if (!promptConfig.responseFormat) {
                 return result;
             }
             if (!("output" in result.value)) {
@@ -228,14 +219,9 @@ export class BaseClient {
                 return this.textWithRetry({ ...promptConfig, messages: retryMessages }, retries - 1);
             }
             const { output } = result.value;
-            if (output !== null &&
-                promptConfig.responseFormat &&
-                promptConfig.responseFormatOptions?.strict &&
-                retries > 0) {
-                const allowExtraKeys = promptConfig.responseFormatOptions?.allowExtraKeys ?? false;
+            if (output !== null && retries > 0) {
                 try {
-                    const parsed = JSON.parse(output);
-                    const parseResult = this.extractResponse(promptConfig, parsed, promptConfig.responseFormat);
+                    const parseResult = this.extractResponse(promptConfig, output, promptConfig.responseFormat);
                     return success({
                         ...result.value,
                         output: parseResult,
@@ -278,7 +264,7 @@ export class BaseClient {
             };
             return;
         }
-        const { continue: shouldContinue, newPromptConfig } = this.checkForToolLoops(config);
+        const { continue: shouldContinue, newSmolConfig } = this.checkForToolLoops(config);
         if (!shouldContinue) {
             yield {
                 type: "done",
@@ -290,16 +276,8 @@ export class BaseClient {
             };
             return;
         }
-        const startTime = performance.now();
         try {
-            for await (const chunk of this._textStream(newPromptConfig)) {
-                if (chunk.type === "done") {
-                    const outputTokens = chunk.result.usage?.outputTokens;
-                    if (outputTokens && outputTokens > 0) {
-                        const elapsedMs = performance.now() - startTime;
-                        latencyTracker.record(this.config.model, elapsedMs, outputTokens);
-                    }
-                }
+            for await (const chunk of this._textStream(newSmolConfig)) {
                 yield chunk;
             }
         }
@@ -307,7 +285,7 @@ export class BaseClient {
             if (this.isAbortError(err)) {
                 this.statelogClient?.debug("Streaming request aborted or timed out", {
                     reason: "Request was aborted",
-                    newPromptConfig,
+                    newSmolConfig,
                 });
                 yield { type: "timeout", error: "Request was aborted" };
             }

package/dist/clients/google.d.ts CHANGED Viewed

@@ -1,8 +1,8 @@
 import { Content, GenerateContentConfig, GoogleGenAI } from "@google/genai";
-import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
+import { PromptResult, Result, SmolClient, SmolConfig, StreamChunk } from "../types.js";
 import { BaseClient } from "./baseClient.js";
 import { ModelName } from "../models.js";
-export type SmolGoogleConfig = BaseClientConfig;
+export type SmolGoogleConfig = SmolConfig;
 type GeneratedRequest = {
     contents: Content[];
     model: ModelName;
@@ -17,8 +17,8 @@ export declare class SmolGoogle extends BaseClient implements SmolClient {
     getModel(): ModelName;
     private calculateUsageAndCost;
     private buildRequest;
-    _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
+    _textSync(config: SmolConfig): Promise<Result<PromptResult>>;
     __textSync(request: GeneratedRequest): Promise<Result<PromptResult>>;
-    _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
+    _textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
 }
 export {};

package/dist/clients/google.js CHANGED Viewed

@@ -25,7 +25,7 @@ export class SmolGoogle extends BaseClient {
         return this.client;
     }
     getModel() {
-        return this.model.getResolvedModel();
+        return this.model.getModel();
     }
     calculateUsageAndCost(usageMetadata) {
         let usage;

package/dist/clients/ollama.d.ts CHANGED Viewed

@@ -1,9 +1,9 @@
 import { Ollama } from "ollama";
-import { BaseClientConfig, PromptConfig, PromptResult, Result, SmolClient, StreamChunk } from "../types.js";
+import { PromptResult, Result, SmolClient, SmolConfig, StreamChunk } from "../types.js";
 import { BaseClient } from "./baseClient.js";
 import { ModelName } from "../models.js";
 export declare const DEFAULT_OLLAMA_HOST = "http://localhost:11434";
-export type SmolOllamaConfig = BaseClientConfig;
+export type SmolOllamaConfig = SmolConfig;
 export declare class SmolOllama extends BaseClient implements SmolClient {
     private logger;
     private model;
@@ -12,6 +12,6 @@ export declare class SmolOllama extends BaseClient implements SmolClient {
     getClient(): Ollama;
     getModel(): ModelName;
     private calculateUsageAndCost;
-    _textSync(config: PromptConfig): Promise<Result<PromptResult>>;
-    _textStream(config: PromptConfig): AsyncGenerator<StreamChunk>;
+    _textSync(config: SmolConfig): Promise<Result<PromptResult>>;
+    _textStream(config: SmolConfig): AsyncGenerator<StreamChunk>;
 }

package/dist/clients/ollama.js CHANGED Viewed

@@ -31,7 +31,7 @@ export class SmolOllama extends BaseClient {
         return this.client;
     }
     getModel() {
-        return this.model.getResolvedModel();
+        return this.model.getModel();
     }
     calculateUsageAndCost(responseData) {
         let usage;