npm - @almadar/llm - Versions diffs - 2.5.1 → 2.9.0 - Mend

@almadar/llm 2.5.1 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/dist/{chunk-F2DMHMRH.js → chunk-E4NSQM6D.js} +57 -7
package/dist/chunk-E4NSQM6D.js.map +1 -0
package/dist/{chunk-3OVQNNPN.js → chunk-FEN4PB7O.js} +3 -3
package/dist/chunk-FEN4PB7O.js.map +1 -0
package/dist/{chunk-QHJ3T46X.js → chunk-MUTXGY6D.js} +1 -1
package/dist/chunk-MUTXGY6D.js.map +1 -0
package/dist/{chunk-MJS33AAS.js → chunk-ULT7T7O6.js} +101 -13
package/dist/chunk-ULT7T7O6.js.map +1 -0
package/dist/client.d.ts +28 -1
package/dist/client.js +2 -2
package/dist/index.d.ts +19 -3
package/dist/index.js +4 -4
package/dist/providers/index.d.ts +5 -1
package/dist/providers/index.js +1 -1
package/dist/{rate-limiter-DDH7JH5p.d.ts → rate-limiter-B9tDNSMl.d.ts} +6 -2
package/dist/structured-output.d.ts +1 -1
package/dist/structured-output.js +2 -2
package/package.json +1 -1
package/src/client.ts +121 -16
package/src/contracts.ts +20 -2
package/src/providers/masar.ts +4 -1
package/src/structured-output.ts +3 -2
package/src/token-tracker.ts +172 -14
package/dist/chunk-3OVQNNPN.js.map +0 -1
package/dist/chunk-F2DMHMRH.js.map +0 -1
package/dist/chunk-MJS33AAS.js.map +0 -1
package/dist/chunk-QHJ3T46X.js.map +0 -1

package/dist/index.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { LLMFinishReason, LLMClient } from './client.js';
 export { ANTHROPIC_MODELS, CacheAwareLLMCallOptions, CacheableBlock, DEEPSEEK_MODELS, KIMI_MODELS, LLMCallOptions, LLMClientOptions, LLMProvider, LLMResponse, LLMStreamChunk, LLMStreamOptions, LLMUsage, OPENAI_MODELS, OPENROUTER_MODELS, ProviderConfig, createAnthropicClient, createCreativeClient, createDeepSeekClient, createFixClient, createKimiClient, createOpenAIClient, createOpenRouterClient, createRequirementsClient, createZhipuClient, getAvailableProvider, getSharedLLMClient, isProviderAvailable, resetSharedLLMClient } from './client.js';
-export { a as RateLimiter, R as RateLimiterOptions, b as TokenTracker, T as TokenUsage, g as getGlobalRateLimiter, c as getGlobalTokenTracker, r as resetGlobalRateLimiter, d as resetGlobalTokenTracker } from './rate-limiter-DDH7JH5p.js';
+export { a as RateLimiter, R as RateLimiterOptions, b as TokenTracker, T as TokenUsage, g as getGlobalRateLimiter, c as getGlobalTokenTracker, r as resetGlobalRateLimiter, d as resetGlobalTokenTracker } from './rate-limiter-B9tDNSMl.js';
 export { autoCloseJson, extractJsonFromText, isValidJson, parseJsonResponse, safeParseJson } from './json-parser.js';
 import { z } from 'zod';
 export { JsonSchema, STRUCTURED_OUTPUT_MODELS, StructuredGenerationOptions, StructuredGenerationResult, StructuredOutputClient, StructuredOutputOptions, getStructuredOutputClient, isStructuredOutputAvailable, resetStructuredOutputClient } from './structured-output.js';
@@ -118,6 +118,22 @@ declare function buildGenericContinuationPrompt(context: string, partialResponse
  * @packageDocumentation
  */
+/** JSON Schema definition for structured extraction. */
+interface JsonSchemaDefinition {
+    type?: string | string[];
+    properties?: Record<string, JsonSchemaDefinition>;
+    required?: string[];
+    items?: JsonSchemaDefinition;
+    additionalProperties?: boolean | JsonSchemaDefinition;
+    description?: string;
+    enum?: (string | number | boolean | null)[];
+    [key: string]: string | string[] | boolean | number | null | undefined | JsonSchemaDefinition | Record<string, JsonSchemaDefinition> | (string | number | boolean | null)[];
+}
+/** Data extracted by the LLM from unstructured text. Values are JSON-safe primitives or nested structures. */
+type ExtractedValue = string | number | boolean | null | ExtractedValue[] | {
+    [key: string]: ExtractedValue;
+};
+type ExtractedData = Record<string, ExtractedValue>;
 /**
  * All call-service actions exposed by the LLM service.
  */
@@ -157,11 +173,11 @@ type LLMServiceActions = {
     extract: {
         params: {
             text: string;
-            schema: Record<string, unknown>;
+            schema: JsonSchemaDefinition;
             model?: string;
         };
         result: {
-            data: Record<string, unknown>;
+            data: ExtractedData;
             confidence: number;
         };
     };

package/dist/index.js CHANGED Viewed

@@ -18,7 +18,7 @@ import {
   getSharedLLMClient,
   isProviderAvailable,
   resetSharedLLMClient
-} from "./chunk-F2DMHMRH.js";
+} from "./chunk-E4NSQM6D.js";
 import {
   autoCloseJson,
   extractJsonFromText,
@@ -32,7 +32,7 @@ import {
   getStructuredOutputClient,
   isStructuredOutputAvailable,
   resetStructuredOutputClient
-} from "./chunk-3OVQNNPN.js";
+} from "./chunk-FEN4PB7O.js";
 import {
   RateLimiter,
   TokenTracker,
@@ -40,13 +40,13 @@ import {
   getGlobalTokenTracker,
   resetGlobalRateLimiter,
   resetGlobalTokenTracker
-} from "./chunk-MJS33AAS.js";
+} from "./chunk-ULT7T7O6.js";
 import {
   MasarError,
   MasarProvider,
   getMasarProvider,
   resetMasarProvider
-} from "./chunk-QHJ3T46X.js";
+} from "./chunk-MUTXGY6D.js";
 // src/truncation-detector.ts
 function detectTruncation(response, finishReason) {

package/dist/providers/index.d.ts CHANGED Viewed

@@ -25,6 +25,10 @@ interface MasarGenerateResult {
         totalTokens: number;
     };
 }
+/** GFlowNet sampling constraint value: primitives, arrays, or nested constraint maps. */
+type ConstraintValue = string | number | boolean | null | ConstraintValue[] | {
+    [key: string]: ConstraintValue;
+};
 interface GoalSpec {
     /** Natural-language description of the desired application. */
     description: string;
@@ -33,7 +37,7 @@ interface GoalSpec {
     /** Domain hint (e.g. "e-commerce", "healthcare"). */
     domain?: string;
     /** Additional constraints passed to the GFlowNet sampler. */
-    constraints?: Record<string, unknown>;
+    constraints?: Record<string, ConstraintValue>;
 }
 interface GFlowNetResult {
     /** Generated .orb schema text. */

package/dist/providers/index.js CHANGED Viewed

@@ -3,7 +3,7 @@ import {
   MasarProvider,
   getMasarProvider,
   resetMasarProvider
-} from "../chunk-QHJ3T46X.js";
+} from "../chunk-MUTXGY6D.js";
 export {
   MasarError,
   MasarProvider,

package/dist/{rate-limiter-DDH7JH5p.d.ts → rate-limiter-B9tDNSMl.d.ts} RENAMED Viewed

@@ -2,9 +2,10 @@
  * Token Tracker for LLM Usage
  *
  * Tracks token usage across multiple LLM calls for:
- * - Cost estimation
+ * - Cost estimation (pricing fetched from OpenRouter models API)
  * - Usage monitoring
  * - Quota management
+ * - Per-call JSONL logging
  *
  * @packageDocumentation
  */
@@ -18,7 +19,10 @@ declare class TokenTracker {
     private model;
     private usage;
     constructor(model?: string);
-    addUsage(promptTokens: number, completionTokens: number): void;
+    addUsage(promptTokens: number, completionTokens: number, options?: {
+        provider?: string;
+        durationMs?: number;
+    }): void;
     getSummary(): TokenUsage;
     getEstimatedCost(): number;
     getFormattedCost(): string;

package/dist/structured-output.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { R as RateLimiterOptions, T as TokenUsage } from './rate-limiter-DDH7JH5p.js';
+import { R as RateLimiterOptions, T as TokenUsage } from './rate-limiter-B9tDNSMl.js';
 import { z } from 'zod';
 /**

package/dist/structured-output.js CHANGED Viewed

@@ -4,8 +4,8 @@ import {
   getStructuredOutputClient,
   isStructuredOutputAvailable,
   resetStructuredOutputClient
-} from "./chunk-3OVQNNPN.js";
-import "./chunk-MJS33AAS.js";
+} from "./chunk-FEN4PB7O.js";
+import "./chunk-ULT7T7O6.js";
 export {
   STRUCTURED_OUTPUT_MODELS,
   StructuredOutputClient,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@almadar/llm",
-  "version": "2.5.1",
+  "version": "2.9.0",
   "description": "Multi-provider LLM client with rate limiting, token tracking, structured outputs, and continuation handling",
   "type": "module",
   "main": "./dist/index.js",

package/src/client.ts CHANGED Viewed

@@ -24,6 +24,47 @@ import {
 import { TokenTracker, getGlobalTokenTracker } from './token-tracker.js';
 import { parseJsonResponse } from './json-parser.js';
+// ============================================================================
+// Local type helpers (avoid Record<string, unknown> and unsafe casts)
+// ============================================================================
+/** Anthropic generation output with usage metadata (not in Langchain's base types). */
+interface AnthropicGenerationWithUsage {
+  message?: {
+    usage_metadata?: {
+      cache_creation_input_tokens?: number;
+      cache_read_input_tokens?: number;
+      input_tokens?: number;
+      output_tokens?: number;
+    };
+  };
+}
+/** Response metadata from OpenAI-compatible providers. */
+interface OpenAIResponseMetadata {
+  finish_reason?: string;
+}
+/** Model-specific kwargs passed to ChatOpenAI constructor. */
+interface ModelKwargs {
+  max_completion_tokens?: number;
+  thinking?: { type: string };
+  tool_choice?: string;
+}
+/**
+ * Identity cast for generic return types.
+ * Used when a string value must satisfy a generic T parameter
+ * (e.g., rawText mode where caller declares T = string).
+ *
+ * Safety: callers only reach this path when rawText=true, which
+ * constrains T to string by convention. TypeScript cannot verify
+ * this constraint statically because T is caller-supplied.
+ */
+function asGeneric<T>(value: string): T {
+  return value as T;
+}
 // ============================================================================
 // Anthropic Cache Control Helper
 // ============================================================================
@@ -350,18 +391,10 @@ export class LLMClient {
           {
             handleLLMEnd: (output) => {
               const generation = output.generations?.[0]?.[0];
-              const usage = (
-                generation as unknown as {
-                  message?: {
-                    usage_metadata?: {
-                      cache_creation_input_tokens?: number;
-                      cache_read_input_tokens?: number;
-                      input_tokens?: number;
-                      output_tokens?: number;
-                    };
-                  };
-                }
-              )?.message?.usage_metadata;
+              const generationWithUsage = generation as
+                | (typeof generation & AnthropicGenerationWithUsage)
+                | undefined;
+              const usage = generationWithUsage?.message?.usage_metadata;
               if (usage) {
                 const cacheCreated = usage.cache_creation_input_tokens ?? 0;
@@ -416,7 +449,7 @@ export class LLMClient {
     const effectiveTemp = isKimi ? 0.6 : temperature;
     // Build modelKwargs incrementally to avoid spread conflicts
-    const modelKwargs: Record<string, unknown> = {};
+    const modelKwargs: ModelKwargs = {};
     if (useCompletionTokens && maxTokens) {
       modelKwargs.max_completion_tokens = maxTokens;
     }
@@ -571,6 +604,7 @@ export class LLMClient {
               this.tokenTracker.addUsage(
                 usage.promptTokens,
                 usage.completionTokens,
+                { provider: this.provider },
               );
             }
           }
@@ -641,7 +675,7 @@ export class LLMClient {
     response: Awaited<ReturnType<ChatOpenAI['invoke']>>,
   ): LLMFinishReason {
     const metadata = response.response_metadata as
-      | Record<string, unknown>
+      | OpenAIResponseMetadata
       | undefined;
     if (metadata?.finish_reason) {
       const reason = metadata.finish_reason as string;
@@ -661,6 +695,7 @@ export class LLMClient {
     systemPrompt: string;
     userPrompt: string;
     maxTokens?: number;
+    signal?: AbortSignal;
   }): Promise<string> {
     const response = await this.callRawWithMetadata(options);
     return response.raw;
@@ -670,8 +705,9 @@ export class LLMClient {
     systemPrompt: string;
     userPrompt: string;
     maxTokens?: number;
+    signal?: AbortSignal;
   }): Promise<Omit<LLMResponse<string>, 'data'> & { raw: string }> {
-    const { systemPrompt, userPrompt, maxTokens } = options;
+    const { systemPrompt, userPrompt, maxTokens, signal } = options;
     return this.rateLimiter.execute(async () => {
       const modelToUse = maxTokens
@@ -686,6 +722,74 @@ export class LLMClient {
         this.provider === 'anthropic'
           ? addCacheControlToSystemMessages(messages)
           : messages,
+        signal ? { signal } : undefined,
+      );
+      let usage: LLMUsage | null = null;
+      if (response.usage_metadata) {
+        const usageMeta = response.usage_metadata as {
+          input_tokens?: number;
+          output_tokens?: number;
+        };
+        usage = {
+          promptTokens: usageMeta.input_tokens || 0,
+          completionTokens: usageMeta.output_tokens || 0,
+          totalTokens:
+            (usageMeta.input_tokens || 0) + (usageMeta.output_tokens || 0),
+        };
+        if (this.tokenTracker) {
+          this.tokenTracker.addUsage(
+            usage.promptTokens,
+            usage.completionTokens,
+          );
+        }
+      }
+      const finishReason = this.extractFinishReason(response);
+      const content =
+        typeof response.content === 'string'
+          ? response.content
+          : JSON.stringify(response.content);
+      return { raw: content, finishReason, usage };
+    });
+  }
+  /**
+   * Call the LLM with a structured messages array.
+   *
+   * Unlike callRawWithMetadata (which takes systemPrompt + userPrompt strings),
+   * this accepts a full conversation history with proper role separation.
+   * This enables:
+   * - Anthropic prompt caching on message boundaries (not just system prompt)
+   * - Proper tool_use/tool_result role handling across providers
+   * - Reduced token waste from string concatenation
+   *
+   * All providers support the messages format:
+   * - Anthropic: native messages API with cache_control
+   * - DeepSeek: OpenAI-compatible messages via ChatOpenAI
+   * - OpenRouter: OpenAI-compatible messages via ChatOpenAI
+   */
+  async callWithMessages(options: {
+    messages: Array<{ role: string; content: string }>;
+    maxTokens?: number;
+    signal?: AbortSignal;
+  }): Promise<Omit<LLMResponse<string>, 'data'> & { raw: string }> {
+    const { messages, maxTokens, signal } = options;
+    return this.rateLimiter.execute(async () => {
+      const modelToUse = maxTokens
+        ? this.getModelWithOptions({ maxTokens })
+        : this.model;
+      const langchainMessages = this.provider === 'anthropic'
+        ? addCacheControlToSystemMessages(messages)
+        : (messages as BaseMessageLike[]);
+      const response = await modelToUse.invoke(
+        langchainMessages,
+        signal ? { signal } : undefined,
       );
       let usage: LLMUsage | null = null;
@@ -905,7 +1009,8 @@ export class LLMClient {
         let parsed: T;
         if (rawText) {
-          parsed = result.content as unknown as T;
+          // rawText mode: caller expects T = string; content is already a string
+          parsed = asGeneric<T>(result.content);
         } else if (skipSchemaValidation) {
           parsed = parseJsonResponse(result.content, undefined) as T;
         } else {

package/src/contracts.ts CHANGED Viewed

@@ -10,6 +10,24 @@
 import type { ServiceContract } from "@almadar/core";
+/** JSON Schema definition for structured extraction. */
+interface JsonSchemaDefinition {
+  type?: string | string[];
+  properties?: Record<string, JsonSchemaDefinition>;
+  required?: string[];
+  items?: JsonSchemaDefinition;
+  additionalProperties?: boolean | JsonSchemaDefinition;
+  description?: string;
+  enum?: (string | number | boolean | null)[];
+  [key: string]: string | string[] | boolean | number | null | undefined
+    | JsonSchemaDefinition | Record<string, JsonSchemaDefinition>
+    | (string | number | boolean | null)[];
+}
+/** Data extracted by the LLM from unstructured text. Values are JSON-safe primitives or nested structures. */
+type ExtractedValue = string | number | boolean | null | ExtractedValue[] | { [key: string]: ExtractedValue };
+type ExtractedData = Record<string, ExtractedValue>;
 /**
  * All call-service actions exposed by the LLM service.
  */
@@ -51,11 +69,11 @@ export type LLMServiceActions = {
   extract: {
     params: {
       text: string;
-      schema: Record<string, unknown>;
+      schema: JsonSchemaDefinition;
       model?: string;
     };
     result: {
-      data: Record<string, unknown>;
+      data: ExtractedData;
       confidence: number;
     };
   };

package/src/providers/masar.ts CHANGED Viewed

@@ -32,6 +32,9 @@ export interface MasarGenerateResult {
   };
 }
+/** GFlowNet sampling constraint value: primitives, arrays, or nested constraint maps. */
+type ConstraintValue = string | number | boolean | null | ConstraintValue[] | { [key: string]: ConstraintValue };
 export interface GoalSpec {
   /** Natural-language description of the desired application. */
   description: string;
@@ -40,7 +43,7 @@ export interface GoalSpec {
   /** Domain hint (e.g. "e-commerce", "healthcare"). */
   domain?: string;
   /** Additional constraints passed to the GFlowNet sampler. */
-  constraints?: Record<string, unknown>;
+  constraints?: Record<string, ConstraintValue>;
 }
 export interface GFlowNetResult {

package/src/structured-output.ts CHANGED Viewed

@@ -12,6 +12,7 @@
 import OpenAI from 'openai';
 import type { ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions';
+import type { ResponseFormatJSONSchema } from 'openai/resources/shared';
 import { z } from 'zod';
 import {
   RateLimiter,
@@ -236,7 +237,7 @@ export class StructuredOutputClient {
           json_schema: {
             name: schemaName,
             strict: true,
-            schema: jsonSchema as Record<string, unknown>,
+            schema: jsonSchema as ResponseFormatJSONSchema.JSONSchema['schema'],
           },
         },
         ...tempParam,
@@ -267,7 +268,7 @@ export class StructuredOutputClient {
     };
     if (this.tokenTracker) {
-      this.tokenTracker.addUsage(usage.promptTokens, usage.completionTokens);
+      this.tokenTracker.addUsage(usage.promptTokens, usage.completionTokens, { provider: 'structured-output' });
     }
     console.log(

package/src/token-tracker.ts CHANGED Viewed

@@ -2,13 +2,17 @@
  * Token Tracker for LLM Usage
  *
  * Tracks token usage across multiple LLM calls for:
- * - Cost estimation
+ * - Cost estimation (pricing fetched from OpenRouter models API)
  * - Usage monitoring
  * - Quota management
+ * - Per-call JSONL logging
  *
  * @packageDocumentation
  */
+import { appendFileSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
+import { dirname, join } from 'node:path';
 export interface TokenUsage {
   promptTokens: number;
   completionTokens: number;
@@ -21,18 +25,136 @@ export interface TokenCost {
   completionCostPer1K: number;
 }
-// Pricing as of 2024 (update as needed)
-const MODEL_COSTS: Record<string, TokenCost> = {
-  'gpt-4o': { promptCostPer1K: 0.005, completionCostPer1K: 0.015 },
-  'gpt-4o-mini': { promptCostPer1K: 0.00015, completionCostPer1K: 0.0006 },
-  'gpt-4-turbo': { promptCostPer1K: 0.01, completionCostPer1K: 0.03 },
-  'gpt-4': { promptCostPer1K: 0.03, completionCostPer1K: 0.06 },
-  'gpt-3.5-turbo': {
-    promptCostPer1K: 0.0005,
-    completionCostPer1K: 0.0015,
-  },
+export interface CallLogEntry {
+  timestamp: string;
+  provider: string;
+  model: string;
+  promptTokens: number;
+  completionTokens: number;
+  totalTokens: number;
+  estimatedCost: number;
+  durationMs?: number;
+  source: 'local-log';
+}
+// ---------------------------------------------------------------------------
+// Pricing: fetched from OpenRouter /api/v1/models, cached to disk for 24h
+// ---------------------------------------------------------------------------
+const ALMADAR_ROOT = process.env['ALMADAR_ROOT'] ?? process.cwd();
+const PRICING_CACHE_PATH = join(ALMADAR_ROOT, '.llm-pricing-cache.json');
+const CALL_LOG_PATH = join(ALMADAR_ROOT, '.llm-call-log.jsonl');
+const CACHE_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
+/** Map from our local model name to OpenRouter model ID */
+const MODEL_ID_MAP: Record<string, string> = {
+  // Anthropic
+  'claude-opus-4-5-20250929': 'anthropic/claude-opus-4.5',
+  'claude-sonnet-4-5-20250929': 'anthropic/claude-sonnet-4.5',
+  'claude-sonnet-4-20250514': 'anthropic/claude-sonnet-4',
+  'claude-3-5-haiku-20241022': 'anthropic/claude-3.5-haiku',
+  // DeepSeek — map to current versions on OpenRouter
+  'deepseek-chat': 'deepseek/deepseek-v3.2',
+  'deepseek-coder': 'deepseek/deepseek-v3.2',
+  'deepseek-reasoner': 'deepseek/deepseek-r1-0528',
+  // Kimi
+  'kimi-k2.5': 'moonshotai/kimi-k2.5',
 };
+// Fallback: zero cost — forces OpenRouter fetch for real pricing
+const FALLBACK_COSTS: Record<string, TokenCost> = {};
+interface PricingCache {
+  fetchedAt: number;
+  models: Record<string, TokenCost>;
+}
+let pricingCache: PricingCache | null = null;
+function loadCachedPricing(): PricingCache | null {
+  try {
+    const raw = readFileSync(PRICING_CACHE_PATH, 'utf-8');
+    const parsed = JSON.parse(raw) as PricingCache;
+    if (Date.now() - parsed.fetchedAt < CACHE_TTL_MS) {
+      return parsed;
+    }
+  } catch {
+    // No cache or expired
+  }
+  return null;
+}
+async function fetchPricingFromOpenRouter(): Promise<Record<string, TokenCost>> {
+  const res = await fetch('https://openrouter.ai/api/v1/models');
+  if (!res.ok) throw new Error(`OpenRouter models API: HTTP ${res.status}`);
+  const json = await res.json() as { data?: Array<{ id: string; pricing?: { prompt?: string; completion?: string } }> };
+  const models: Record<string, TokenCost> = {};
+  for (const m of json.data ?? []) {
+    const promptPerToken = parseFloat(m.pricing?.prompt ?? '0');
+    const completionPerToken = parseFloat(m.pricing?.completion ?? '0');
+    if (promptPerToken > 0 || completionPerToken > 0) {
+      models[m.id] = {
+        promptCostPer1K: promptPerToken * 1000,
+        completionCostPer1K: completionPerToken * 1000,
+      };
+    }
+  }
+  return models;
+}
+/**
+ * Get pricing for all models. Uses 24h disk cache, fetches from OpenRouter on miss.
+ * Non-blocking: returns cached/fallback immediately, refreshes in background if stale.
+ */
+function getPricing(): Record<string, TokenCost> {
+  if (pricingCache) return pricingCache.models;
+  const diskCache = loadCachedPricing();
+  if (diskCache) {
+    pricingCache = diskCache;
+    return diskCache.models;
+  }
+  // Trigger background fetch, return fallback for now
+  refreshPricingCache();
+  return FALLBACK_COSTS;
+}
+function refreshPricingCache(): void {
+  fetchPricingFromOpenRouter()
+    .then((models) => {
+      pricingCache = { fetchedAt: Date.now(), models };
+      try {
+        mkdirSync(dirname(PRICING_CACHE_PATH), { recursive: true });
+        writeFileSync(PRICING_CACHE_PATH, JSON.stringify(pricingCache));
+      } catch {
+        // Non-critical
+      }
+    })
+    .catch(() => {
+      // Silently fail, use fallback
+    });
+}
+function getCostForModel(model: string): TokenCost {
+  const pricing = getPricing();
+  // Try direct match on OpenRouter ID
+  const orId = MODEL_ID_MAP[model];
+  if (orId && pricing[orId]) return pricing[orId];
+  // Try direct key match (e.g., user passed "openai/gpt-4o")
+  if (pricing[model]) return pricing[model];
+  // Fuzzy: find first key containing the model name
+  for (const [key, cost] of Object.entries(pricing)) {
+    if (key.includes(model) || model.includes(key.split('/')[1] ?? '')) return cost;
+  }
+  // No pricing available — return zero (OpenRouter fetch pending or model not listed)
+  return { promptCostPer1K: 0, completionCostPer1K: 0 };
+}
+// ---------------------------------------------------------------------------
+// TokenTracker
+// ---------------------------------------------------------------------------
 export class TokenTracker {
   private model: string;
   private usage: TokenUsage = {
@@ -42,15 +164,39 @@ export class TokenTracker {
     callCount: 0,
   };
-  constructor(model: string = 'gpt-4o') {
+  constructor(model: string = 'claude-sonnet-4-5-20250929') {
     this.model = model;
   }
-  addUsage(promptTokens: number, completionTokens: number): void {
+  addUsage(promptTokens: number, completionTokens: number, options?: { provider?: string; durationMs?: number }): void {
     this.usage.promptTokens += promptTokens;
     this.usage.completionTokens += completionTokens;
     this.usage.totalTokens += promptTokens + completionTokens;
     this.usage.callCount++;
+    const costs = getCostForModel(this.model);
+    const estimatedCost =
+      (promptTokens / 1000) * costs.promptCostPer1K +
+      (completionTokens / 1000) * costs.completionCostPer1K;
+    const entry: CallLogEntry = {
+      timestamp: new Date().toISOString(),
+      provider: options?.provider ?? 'unknown',
+      model: this.model,
+      promptTokens,
+      completionTokens,
+      totalTokens: promptTokens + completionTokens,
+      estimatedCost,
+      durationMs: options?.durationMs,
+      source: 'local-log',
+    };
+    try {
+      mkdirSync(dirname(CALL_LOG_PATH), { recursive: true });
+      appendFileSync(CALL_LOG_PATH, JSON.stringify(entry) + '\n');
+    } catch {
+      // Non-critical: don't break LLM calls if logging fails
+    }
   }
   getSummary(): TokenUsage {
@@ -58,7 +204,7 @@ export class TokenTracker {
   }
   getEstimatedCost(): number {
-    const costs = MODEL_COSTS[this.model] || MODEL_COSTS['gpt-4o'];
+    const costs = getCostForModel(this.model);
     const promptCost =
       (this.usage.promptTokens / 1000) * costs.promptCostPer1K;
     const completionCost =
@@ -114,3 +260,15 @@ export function getGlobalTokenTracker(model?: string): TokenTracker {
 export function resetGlobalTokenTracker(): void {
   globalTracker?.reset();
 }
+export function getCallLogPath(): string {
+  return CALL_LOG_PATH;
+}
+/** Force-refresh the pricing cache from OpenRouter. */
+export async function refreshPricing(): Promise<void> {
+  const models = await fetchPricingFromOpenRouter();
+  pricingCache = { fetchedAt: Date.now(), models };
+  mkdirSync(dirname(PRICING_CACHE_PATH), { recursive: true });
+  writeFileSync(PRICING_CACHE_PATH, JSON.stringify(pricingCache));
+}