@almadar/llm 2.5.1 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import { LLMFinishReason, LLMClient } from './client.js';
2
2
  export { ANTHROPIC_MODELS, CacheAwareLLMCallOptions, CacheableBlock, DEEPSEEK_MODELS, KIMI_MODELS, LLMCallOptions, LLMClientOptions, LLMProvider, LLMResponse, LLMStreamChunk, LLMStreamOptions, LLMUsage, OPENAI_MODELS, OPENROUTER_MODELS, ProviderConfig, createAnthropicClient, createCreativeClient, createDeepSeekClient, createFixClient, createKimiClient, createOpenAIClient, createOpenRouterClient, createRequirementsClient, createZhipuClient, getAvailableProvider, getSharedLLMClient, isProviderAvailable, resetSharedLLMClient } from './client.js';
3
- export { a as RateLimiter, R as RateLimiterOptions, b as TokenTracker, T as TokenUsage, g as getGlobalRateLimiter, c as getGlobalTokenTracker, r as resetGlobalRateLimiter, d as resetGlobalTokenTracker } from './rate-limiter-DDH7JH5p.js';
3
+ export { a as RateLimiter, R as RateLimiterOptions, b as TokenTracker, T as TokenUsage, g as getGlobalRateLimiter, c as getGlobalTokenTracker, r as resetGlobalRateLimiter, d as resetGlobalTokenTracker } from './rate-limiter-B9tDNSMl.js';
4
4
  export { autoCloseJson, extractJsonFromText, isValidJson, parseJsonResponse, safeParseJson } from './json-parser.js';
5
5
  import { z } from 'zod';
6
6
  export { JsonSchema, STRUCTURED_OUTPUT_MODELS, StructuredGenerationOptions, StructuredGenerationResult, StructuredOutputClient, StructuredOutputOptions, getStructuredOutputClient, isStructuredOutputAvailable, resetStructuredOutputClient } from './structured-output.js';
@@ -118,6 +118,22 @@ declare function buildGenericContinuationPrompt(context: string, partialResponse
118
118
  * @packageDocumentation
119
119
  */
120
120
 
121
+ /** JSON Schema definition for structured extraction. */
122
+ interface JsonSchemaDefinition {
123
+ type?: string | string[];
124
+ properties?: Record<string, JsonSchemaDefinition>;
125
+ required?: string[];
126
+ items?: JsonSchemaDefinition;
127
+ additionalProperties?: boolean | JsonSchemaDefinition;
128
+ description?: string;
129
+ enum?: (string | number | boolean | null)[];
130
+ [key: string]: string | string[] | boolean | number | null | undefined | JsonSchemaDefinition | Record<string, JsonSchemaDefinition> | (string | number | boolean | null)[];
131
+ }
132
+ /** Data extracted by the LLM from unstructured text. Values are JSON-safe primitives or nested structures. */
133
+ type ExtractedValue = string | number | boolean | null | ExtractedValue[] | {
134
+ [key: string]: ExtractedValue;
135
+ };
136
+ type ExtractedData = Record<string, ExtractedValue>;
121
137
  /**
122
138
  * All call-service actions exposed by the LLM service.
123
139
  */
@@ -157,11 +173,11 @@ type LLMServiceActions = {
157
173
  extract: {
158
174
  params: {
159
175
  text: string;
160
- schema: Record<string, unknown>;
176
+ schema: JsonSchemaDefinition;
161
177
  model?: string;
162
178
  };
163
179
  result: {
164
- data: Record<string, unknown>;
180
+ data: ExtractedData;
165
181
  confidence: number;
166
182
  };
167
183
  };
package/dist/index.js CHANGED
@@ -18,7 +18,7 @@ import {
18
18
  getSharedLLMClient,
19
19
  isProviderAvailable,
20
20
  resetSharedLLMClient
21
- } from "./chunk-F2DMHMRH.js";
21
+ } from "./chunk-E4NSQM6D.js";
22
22
  import {
23
23
  autoCloseJson,
24
24
  extractJsonFromText,
@@ -32,7 +32,7 @@ import {
32
32
  getStructuredOutputClient,
33
33
  isStructuredOutputAvailable,
34
34
  resetStructuredOutputClient
35
- } from "./chunk-3OVQNNPN.js";
35
+ } from "./chunk-FEN4PB7O.js";
36
36
  import {
37
37
  RateLimiter,
38
38
  TokenTracker,
@@ -40,13 +40,13 @@ import {
40
40
  getGlobalTokenTracker,
41
41
  resetGlobalRateLimiter,
42
42
  resetGlobalTokenTracker
43
- } from "./chunk-MJS33AAS.js";
43
+ } from "./chunk-ULT7T7O6.js";
44
44
  import {
45
45
  MasarError,
46
46
  MasarProvider,
47
47
  getMasarProvider,
48
48
  resetMasarProvider
49
- } from "./chunk-QHJ3T46X.js";
49
+ } from "./chunk-MUTXGY6D.js";
50
50
 
51
51
  // src/truncation-detector.ts
52
52
  function detectTruncation(response, finishReason) {
@@ -25,6 +25,10 @@ interface MasarGenerateResult {
25
25
  totalTokens: number;
26
26
  };
27
27
  }
28
+ /** GFlowNet sampling constraint value: primitives, arrays, or nested constraint maps. */
29
+ type ConstraintValue = string | number | boolean | null | ConstraintValue[] | {
30
+ [key: string]: ConstraintValue;
31
+ };
28
32
  interface GoalSpec {
29
33
  /** Natural-language description of the desired application. */
30
34
  description: string;
@@ -33,7 +37,7 @@ interface GoalSpec {
33
37
  /** Domain hint (e.g. "e-commerce", "healthcare"). */
34
38
  domain?: string;
35
39
  /** Additional constraints passed to the GFlowNet sampler. */
36
- constraints?: Record<string, unknown>;
40
+ constraints?: Record<string, ConstraintValue>;
37
41
  }
38
42
  interface GFlowNetResult {
39
43
  /** Generated .orb schema text. */
@@ -3,7 +3,7 @@ import {
3
3
  MasarProvider,
4
4
  getMasarProvider,
5
5
  resetMasarProvider
6
- } from "../chunk-QHJ3T46X.js";
6
+ } from "../chunk-MUTXGY6D.js";
7
7
  export {
8
8
  MasarError,
9
9
  MasarProvider,
@@ -2,9 +2,10 @@
2
2
  * Token Tracker for LLM Usage
3
3
  *
4
4
  * Tracks token usage across multiple LLM calls for:
5
- * - Cost estimation
5
+ * - Cost estimation (pricing fetched from OpenRouter models API)
6
6
  * - Usage monitoring
7
7
  * - Quota management
8
+ * - Per-call JSONL logging
8
9
  *
9
10
  * @packageDocumentation
10
11
  */
@@ -18,7 +19,10 @@ declare class TokenTracker {
18
19
  private model;
19
20
  private usage;
20
21
  constructor(model?: string);
21
- addUsage(promptTokens: number, completionTokens: number): void;
22
+ addUsage(promptTokens: number, completionTokens: number, options?: {
23
+ provider?: string;
24
+ durationMs?: number;
25
+ }): void;
22
26
  getSummary(): TokenUsage;
23
27
  getEstimatedCost(): number;
24
28
  getFormattedCost(): string;
@@ -1,4 +1,4 @@
1
- import { R as RateLimiterOptions, T as TokenUsage } from './rate-limiter-DDH7JH5p.js';
1
+ import { R as RateLimiterOptions, T as TokenUsage } from './rate-limiter-B9tDNSMl.js';
2
2
  import { z } from 'zod';
3
3
 
4
4
  /**
@@ -4,8 +4,8 @@ import {
4
4
  getStructuredOutputClient,
5
5
  isStructuredOutputAvailable,
6
6
  resetStructuredOutputClient
7
- } from "./chunk-3OVQNNPN.js";
8
- import "./chunk-MJS33AAS.js";
7
+ } from "./chunk-FEN4PB7O.js";
8
+ import "./chunk-ULT7T7O6.js";
9
9
  export {
10
10
  STRUCTURED_OUTPUT_MODELS,
11
11
  StructuredOutputClient,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@almadar/llm",
3
- "version": "2.5.1",
3
+ "version": "2.9.0",
4
4
  "description": "Multi-provider LLM client with rate limiting, token tracking, structured outputs, and continuation handling",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
package/src/client.ts CHANGED
@@ -24,6 +24,47 @@ import {
24
24
  import { TokenTracker, getGlobalTokenTracker } from './token-tracker.js';
25
25
  import { parseJsonResponse } from './json-parser.js';
26
26
 
27
+ // ============================================================================
28
+ // Local type helpers (avoid Record<string, unknown> and unsafe casts)
29
+ // ============================================================================
30
+
31
+ /** Anthropic generation output with usage metadata (not in Langchain's base types). */
32
+ interface AnthropicGenerationWithUsage {
33
+ message?: {
34
+ usage_metadata?: {
35
+ cache_creation_input_tokens?: number;
36
+ cache_read_input_tokens?: number;
37
+ input_tokens?: number;
38
+ output_tokens?: number;
39
+ };
40
+ };
41
+ }
42
+
43
+ /** Response metadata from OpenAI-compatible providers. */
44
+ interface OpenAIResponseMetadata {
45
+ finish_reason?: string;
46
+ }
47
+
48
+ /** Model-specific kwargs passed to ChatOpenAI constructor. */
49
+ interface ModelKwargs {
50
+ max_completion_tokens?: number;
51
+ thinking?: { type: string };
52
+ tool_choice?: string;
53
+ }
54
+
55
+ /**
56
+ * Identity cast for generic return types.
57
+ * Used when a string value must satisfy a generic T parameter
58
+ * (e.g., rawText mode where caller declares T = string).
59
+ *
60
+ * Safety: callers only reach this path when rawText=true, which
61
+ * constrains T to string by convention. TypeScript cannot verify
62
+ * this constraint statically because T is caller-supplied.
63
+ */
64
+ function asGeneric<T>(value: string): T {
65
+ return value as T;
66
+ }
67
+
27
68
  // ============================================================================
28
69
  // Anthropic Cache Control Helper
29
70
  // ============================================================================
@@ -350,18 +391,10 @@ export class LLMClient {
350
391
  {
351
392
  handleLLMEnd: (output) => {
352
393
  const generation = output.generations?.[0]?.[0];
353
- const usage = (
354
- generation as unknown as {
355
- message?: {
356
- usage_metadata?: {
357
- cache_creation_input_tokens?: number;
358
- cache_read_input_tokens?: number;
359
- input_tokens?: number;
360
- output_tokens?: number;
361
- };
362
- };
363
- }
364
- )?.message?.usage_metadata;
394
+ const generationWithUsage = generation as
395
+ | (typeof generation & AnthropicGenerationWithUsage)
396
+ | undefined;
397
+ const usage = generationWithUsage?.message?.usage_metadata;
365
398
 
366
399
  if (usage) {
367
400
  const cacheCreated = usage.cache_creation_input_tokens ?? 0;
@@ -416,7 +449,7 @@ export class LLMClient {
416
449
  const effectiveTemp = isKimi ? 0.6 : temperature;
417
450
 
418
451
  // Build modelKwargs incrementally to avoid spread conflicts
419
- const modelKwargs: Record<string, unknown> = {};
452
+ const modelKwargs: ModelKwargs = {};
420
453
  if (useCompletionTokens && maxTokens) {
421
454
  modelKwargs.max_completion_tokens = maxTokens;
422
455
  }
@@ -571,6 +604,7 @@ export class LLMClient {
571
604
  this.tokenTracker.addUsage(
572
605
  usage.promptTokens,
573
606
  usage.completionTokens,
607
+ { provider: this.provider },
574
608
  );
575
609
  }
576
610
  }
@@ -641,7 +675,7 @@ export class LLMClient {
641
675
  response: Awaited<ReturnType<ChatOpenAI['invoke']>>,
642
676
  ): LLMFinishReason {
643
677
  const metadata = response.response_metadata as
644
- | Record<string, unknown>
678
+ | OpenAIResponseMetadata
645
679
  | undefined;
646
680
  if (metadata?.finish_reason) {
647
681
  const reason = metadata.finish_reason as string;
@@ -661,6 +695,7 @@ export class LLMClient {
661
695
  systemPrompt: string;
662
696
  userPrompt: string;
663
697
  maxTokens?: number;
698
+ signal?: AbortSignal;
664
699
  }): Promise<string> {
665
700
  const response = await this.callRawWithMetadata(options);
666
701
  return response.raw;
@@ -670,8 +705,9 @@ export class LLMClient {
670
705
  systemPrompt: string;
671
706
  userPrompt: string;
672
707
  maxTokens?: number;
708
+ signal?: AbortSignal;
673
709
  }): Promise<Omit<LLMResponse<string>, 'data'> & { raw: string }> {
674
- const { systemPrompt, userPrompt, maxTokens } = options;
710
+ const { systemPrompt, userPrompt, maxTokens, signal } = options;
675
711
 
676
712
  return this.rateLimiter.execute(async () => {
677
713
  const modelToUse = maxTokens
@@ -686,6 +722,74 @@ export class LLMClient {
686
722
  this.provider === 'anthropic'
687
723
  ? addCacheControlToSystemMessages(messages)
688
724
  : messages,
725
+ signal ? { signal } : undefined,
726
+ );
727
+
728
+ let usage: LLMUsage | null = null;
729
+ if (response.usage_metadata) {
730
+ const usageMeta = response.usage_metadata as {
731
+ input_tokens?: number;
732
+ output_tokens?: number;
733
+ };
734
+ usage = {
735
+ promptTokens: usageMeta.input_tokens || 0,
736
+ completionTokens: usageMeta.output_tokens || 0,
737
+ totalTokens:
738
+ (usageMeta.input_tokens || 0) + (usageMeta.output_tokens || 0),
739
+ };
740
+
741
+ if (this.tokenTracker) {
742
+ this.tokenTracker.addUsage(
743
+ usage.promptTokens,
744
+ usage.completionTokens,
745
+ );
746
+ }
747
+ }
748
+
749
+ const finishReason = this.extractFinishReason(response);
750
+ const content =
751
+ typeof response.content === 'string'
752
+ ? response.content
753
+ : JSON.stringify(response.content);
754
+
755
+ return { raw: content, finishReason, usage };
756
+ });
757
+ }
758
+
759
+ /**
760
+ * Call the LLM with a structured messages array.
761
+ *
762
+ * Unlike callRawWithMetadata (which takes systemPrompt + userPrompt strings),
763
+ * this accepts a full conversation history with proper role separation.
764
+ * This enables:
765
+ * - Anthropic prompt caching on message boundaries (not just system prompt)
766
+ * - Proper tool_use/tool_result role handling across providers
767
+ * - Reduced token waste from string concatenation
768
+ *
769
+ * All providers support the messages format:
770
+ * - Anthropic: native messages API with cache_control
771
+ * - DeepSeek: OpenAI-compatible messages via ChatOpenAI
772
+ * - OpenRouter: OpenAI-compatible messages via ChatOpenAI
773
+ */
774
+ async callWithMessages(options: {
775
+ messages: Array<{ role: string; content: string }>;
776
+ maxTokens?: number;
777
+ signal?: AbortSignal;
778
+ }): Promise<Omit<LLMResponse<string>, 'data'> & { raw: string }> {
779
+ const { messages, maxTokens, signal } = options;
780
+
781
+ return this.rateLimiter.execute(async () => {
782
+ const modelToUse = maxTokens
783
+ ? this.getModelWithOptions({ maxTokens })
784
+ : this.model;
785
+
786
+ const langchainMessages = this.provider === 'anthropic'
787
+ ? addCacheControlToSystemMessages(messages)
788
+ : (messages as BaseMessageLike[]);
789
+
790
+ const response = await modelToUse.invoke(
791
+ langchainMessages,
792
+ signal ? { signal } : undefined,
689
793
  );
690
794
 
691
795
  let usage: LLMUsage | null = null;
@@ -905,7 +1009,8 @@ export class LLMClient {
905
1009
 
906
1010
  let parsed: T;
907
1011
  if (rawText) {
908
- parsed = result.content as unknown as T;
1012
+ // rawText mode: caller expects T = string; content is already a string
1013
+ parsed = asGeneric<T>(result.content);
909
1014
  } else if (skipSchemaValidation) {
910
1015
  parsed = parseJsonResponse(result.content, undefined) as T;
911
1016
  } else {
package/src/contracts.ts CHANGED
@@ -10,6 +10,24 @@
10
10
 
11
11
  import type { ServiceContract } from "@almadar/core";
12
12
 
13
+ /** JSON Schema definition for structured extraction. */
14
+ interface JsonSchemaDefinition {
15
+ type?: string | string[];
16
+ properties?: Record<string, JsonSchemaDefinition>;
17
+ required?: string[];
18
+ items?: JsonSchemaDefinition;
19
+ additionalProperties?: boolean | JsonSchemaDefinition;
20
+ description?: string;
21
+ enum?: (string | number | boolean | null)[];
22
+ [key: string]: string | string[] | boolean | number | null | undefined
23
+ | JsonSchemaDefinition | Record<string, JsonSchemaDefinition>
24
+ | (string | number | boolean | null)[];
25
+ }
26
+
27
+ /** Data extracted by the LLM from unstructured text. Values are JSON-safe primitives or nested structures. */
28
+ type ExtractedValue = string | number | boolean | null | ExtractedValue[] | { [key: string]: ExtractedValue };
29
+ type ExtractedData = Record<string, ExtractedValue>;
30
+
13
31
  /**
14
32
  * All call-service actions exposed by the LLM service.
15
33
  */
@@ -51,11 +69,11 @@ export type LLMServiceActions = {
51
69
  extract: {
52
70
  params: {
53
71
  text: string;
54
- schema: Record<string, unknown>;
72
+ schema: JsonSchemaDefinition;
55
73
  model?: string;
56
74
  };
57
75
  result: {
58
- data: Record<string, unknown>;
76
+ data: ExtractedData;
59
77
  confidence: number;
60
78
  };
61
79
  };
@@ -32,6 +32,9 @@ export interface MasarGenerateResult {
32
32
  };
33
33
  }
34
34
 
35
+ /** GFlowNet sampling constraint value: primitives, arrays, or nested constraint maps. */
36
+ type ConstraintValue = string | number | boolean | null | ConstraintValue[] | { [key: string]: ConstraintValue };
37
+
35
38
  export interface GoalSpec {
36
39
  /** Natural-language description of the desired application. */
37
40
  description: string;
@@ -40,7 +43,7 @@ export interface GoalSpec {
40
43
  /** Domain hint (e.g. "e-commerce", "healthcare"). */
41
44
  domain?: string;
42
45
  /** Additional constraints passed to the GFlowNet sampler. */
43
- constraints?: Record<string, unknown>;
46
+ constraints?: Record<string, ConstraintValue>;
44
47
  }
45
48
 
46
49
  export interface GFlowNetResult {
@@ -12,6 +12,7 @@
12
12
 
13
13
  import OpenAI from 'openai';
14
14
  import type { ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions';
15
+ import type { ResponseFormatJSONSchema } from 'openai/resources/shared';
15
16
  import { z } from 'zod';
16
17
  import {
17
18
  RateLimiter,
@@ -236,7 +237,7 @@ export class StructuredOutputClient {
236
237
  json_schema: {
237
238
  name: schemaName,
238
239
  strict: true,
239
- schema: jsonSchema as Record<string, unknown>,
240
+ schema: jsonSchema as ResponseFormatJSONSchema.JSONSchema['schema'],
240
241
  },
241
242
  },
242
243
  ...tempParam,
@@ -267,7 +268,7 @@ export class StructuredOutputClient {
267
268
  };
268
269
 
269
270
  if (this.tokenTracker) {
270
- this.tokenTracker.addUsage(usage.promptTokens, usage.completionTokens);
271
+ this.tokenTracker.addUsage(usage.promptTokens, usage.completionTokens, { provider: 'structured-output' });
271
272
  }
272
273
 
273
274
  console.log(
@@ -2,13 +2,17 @@
2
2
  * Token Tracker for LLM Usage
3
3
  *
4
4
  * Tracks token usage across multiple LLM calls for:
5
- * - Cost estimation
5
+ * - Cost estimation (pricing fetched from OpenRouter models API)
6
6
  * - Usage monitoring
7
7
  * - Quota management
8
+ * - Per-call JSONL logging
8
9
  *
9
10
  * @packageDocumentation
10
11
  */
11
12
 
13
+ import { appendFileSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
14
+ import { dirname, join } from 'node:path';
15
+
12
16
  export interface TokenUsage {
13
17
  promptTokens: number;
14
18
  completionTokens: number;
@@ -21,18 +25,136 @@ export interface TokenCost {
21
25
  completionCostPer1K: number;
22
26
  }
23
27
 
24
- // Pricing as of 2024 (update as needed)
25
- const MODEL_COSTS: Record<string, TokenCost> = {
26
- 'gpt-4o': { promptCostPer1K: 0.005, completionCostPer1K: 0.015 },
27
- 'gpt-4o-mini': { promptCostPer1K: 0.00015, completionCostPer1K: 0.0006 },
28
- 'gpt-4-turbo': { promptCostPer1K: 0.01, completionCostPer1K: 0.03 },
29
- 'gpt-4': { promptCostPer1K: 0.03, completionCostPer1K: 0.06 },
30
- 'gpt-3.5-turbo': {
31
- promptCostPer1K: 0.0005,
32
- completionCostPer1K: 0.0015,
33
- },
28
+ export interface CallLogEntry {
29
+ timestamp: string;
30
+ provider: string;
31
+ model: string;
32
+ promptTokens: number;
33
+ completionTokens: number;
34
+ totalTokens: number;
35
+ estimatedCost: number;
36
+ durationMs?: number;
37
+ source: 'local-log';
38
+ }
39
+
40
+ // ---------------------------------------------------------------------------
41
+ // Pricing: fetched from OpenRouter /api/v1/models, cached to disk for 24h
42
+ // ---------------------------------------------------------------------------
43
+
44
+ const ALMADAR_ROOT = process.env['ALMADAR_ROOT'] ?? process.cwd();
45
+ const PRICING_CACHE_PATH = join(ALMADAR_ROOT, '.llm-pricing-cache.json');
46
+ const CALL_LOG_PATH = join(ALMADAR_ROOT, '.llm-call-log.jsonl');
47
+ const CACHE_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
48
+
49
+ /** Map from our local model name to OpenRouter model ID */
50
+ const MODEL_ID_MAP: Record<string, string> = {
51
+ // Anthropic
52
+ 'claude-opus-4-5-20250929': 'anthropic/claude-opus-4.5',
53
+ 'claude-sonnet-4-5-20250929': 'anthropic/claude-sonnet-4.5',
54
+ 'claude-sonnet-4-20250514': 'anthropic/claude-sonnet-4',
55
+ 'claude-3-5-haiku-20241022': 'anthropic/claude-3.5-haiku',
56
+ // DeepSeek — map to current versions on OpenRouter
57
+ 'deepseek-chat': 'deepseek/deepseek-v3.2',
58
+ 'deepseek-coder': 'deepseek/deepseek-v3.2',
59
+ 'deepseek-reasoner': 'deepseek/deepseek-r1-0528',
60
+ // Kimi
61
+ 'kimi-k2.5': 'moonshotai/kimi-k2.5',
34
62
  };
35
63
 
64
+ // Fallback: zero cost — forces OpenRouter fetch for real pricing
65
+ const FALLBACK_COSTS: Record<string, TokenCost> = {};
66
+
67
+ interface PricingCache {
68
+ fetchedAt: number;
69
+ models: Record<string, TokenCost>;
70
+ }
71
+
72
+ let pricingCache: PricingCache | null = null;
73
+
74
+ function loadCachedPricing(): PricingCache | null {
75
+ try {
76
+ const raw = readFileSync(PRICING_CACHE_PATH, 'utf-8');
77
+ const parsed = JSON.parse(raw) as PricingCache;
78
+ if (Date.now() - parsed.fetchedAt < CACHE_TTL_MS) {
79
+ return parsed;
80
+ }
81
+ } catch {
82
+ // No cache or expired
83
+ }
84
+ return null;
85
+ }
86
+
87
+ async function fetchPricingFromOpenRouter(): Promise<Record<string, TokenCost>> {
88
+ const res = await fetch('https://openrouter.ai/api/v1/models');
89
+ if (!res.ok) throw new Error(`OpenRouter models API: HTTP ${res.status}`);
90
+ const json = await res.json() as { data?: Array<{ id: string; pricing?: { prompt?: string; completion?: string } }> };
91
+ const models: Record<string, TokenCost> = {};
92
+ for (const m of json.data ?? []) {
93
+ const promptPerToken = parseFloat(m.pricing?.prompt ?? '0');
94
+ const completionPerToken = parseFloat(m.pricing?.completion ?? '0');
95
+ if (promptPerToken > 0 || completionPerToken > 0) {
96
+ models[m.id] = {
97
+ promptCostPer1K: promptPerToken * 1000,
98
+ completionCostPer1K: completionPerToken * 1000,
99
+ };
100
+ }
101
+ }
102
+ return models;
103
+ }
104
+
105
+ /**
106
+ * Get pricing for all models. Uses 24h disk cache, fetches from OpenRouter on miss.
107
+ * Non-blocking: returns cached/fallback immediately, refreshes in background if stale.
108
+ */
109
+ function getPricing(): Record<string, TokenCost> {
110
+ if (pricingCache) return pricingCache.models;
111
+
112
+ const diskCache = loadCachedPricing();
113
+ if (diskCache) {
114
+ pricingCache = diskCache;
115
+ return diskCache.models;
116
+ }
117
+
118
+ // Trigger background fetch, return fallback for now
119
+ refreshPricingCache();
120
+ return FALLBACK_COSTS;
121
+ }
122
+
123
+ function refreshPricingCache(): void {
124
+ fetchPricingFromOpenRouter()
125
+ .then((models) => {
126
+ pricingCache = { fetchedAt: Date.now(), models };
127
+ try {
128
+ mkdirSync(dirname(PRICING_CACHE_PATH), { recursive: true });
129
+ writeFileSync(PRICING_CACHE_PATH, JSON.stringify(pricingCache));
130
+ } catch {
131
+ // Non-critical
132
+ }
133
+ })
134
+ .catch(() => {
135
+ // Silently fail, use fallback
136
+ });
137
+ }
138
+
139
+ function getCostForModel(model: string): TokenCost {
140
+ const pricing = getPricing();
141
+ // Try direct match on OpenRouter ID
142
+ const orId = MODEL_ID_MAP[model];
143
+ if (orId && pricing[orId]) return pricing[orId];
144
+ // Try direct key match (e.g., user passed "openai/gpt-4o")
145
+ if (pricing[model]) return pricing[model];
146
+ // Fuzzy: find first key containing the model name
147
+ for (const [key, cost] of Object.entries(pricing)) {
148
+ if (key.includes(model) || model.includes(key.split('/')[1] ?? '')) return cost;
149
+ }
150
+ // No pricing available — return zero (OpenRouter fetch pending or model not listed)
151
+ return { promptCostPer1K: 0, completionCostPer1K: 0 };
152
+ }
153
+
154
+ // ---------------------------------------------------------------------------
155
+ // TokenTracker
156
+ // ---------------------------------------------------------------------------
157
+
36
158
  export class TokenTracker {
37
159
  private model: string;
38
160
  private usage: TokenUsage = {
@@ -42,15 +164,39 @@ export class TokenTracker {
42
164
  callCount: 0,
43
165
  };
44
166
 
45
- constructor(model: string = 'gpt-4o') {
167
+ constructor(model: string = 'claude-sonnet-4-5-20250929') {
46
168
  this.model = model;
47
169
  }
48
170
 
49
- addUsage(promptTokens: number, completionTokens: number): void {
171
+ addUsage(promptTokens: number, completionTokens: number, options?: { provider?: string; durationMs?: number }): void {
50
172
  this.usage.promptTokens += promptTokens;
51
173
  this.usage.completionTokens += completionTokens;
52
174
  this.usage.totalTokens += promptTokens + completionTokens;
53
175
  this.usage.callCount++;
176
+
177
+ const costs = getCostForModel(this.model);
178
+ const estimatedCost =
179
+ (promptTokens / 1000) * costs.promptCostPer1K +
180
+ (completionTokens / 1000) * costs.completionCostPer1K;
181
+
182
+ const entry: CallLogEntry = {
183
+ timestamp: new Date().toISOString(),
184
+ provider: options?.provider ?? 'unknown',
185
+ model: this.model,
186
+ promptTokens,
187
+ completionTokens,
188
+ totalTokens: promptTokens + completionTokens,
189
+ estimatedCost,
190
+ durationMs: options?.durationMs,
191
+ source: 'local-log',
192
+ };
193
+
194
+ try {
195
+ mkdirSync(dirname(CALL_LOG_PATH), { recursive: true });
196
+ appendFileSync(CALL_LOG_PATH, JSON.stringify(entry) + '\n');
197
+ } catch {
198
+ // Non-critical: don't break LLM calls if logging fails
199
+ }
54
200
  }
55
201
 
56
202
  getSummary(): TokenUsage {
@@ -58,7 +204,7 @@ export class TokenTracker {
58
204
  }
59
205
 
60
206
  getEstimatedCost(): number {
61
- const costs = MODEL_COSTS[this.model] || MODEL_COSTS['gpt-4o'];
207
+ const costs = getCostForModel(this.model);
62
208
  const promptCost =
63
209
  (this.usage.promptTokens / 1000) * costs.promptCostPer1K;
64
210
  const completionCost =
@@ -114,3 +260,15 @@ export function getGlobalTokenTracker(model?: string): TokenTracker {
114
260
  export function resetGlobalTokenTracker(): void {
115
261
  globalTracker?.reset();
116
262
  }
263
+
264
+ export function getCallLogPath(): string {
265
+ return CALL_LOG_PATH;
266
+ }
267
+
268
+ /** Force-refresh the pricing cache from OpenRouter. */
269
+ export async function refreshPricing(): Promise<void> {
270
+ const models = await fetchPricingFromOpenRouter();
271
+ pricingCache = { fetchedAt: Date.now(), models };
272
+ mkdirSync(dirname(PRICING_CACHE_PATH), { recursive: true });
273
+ writeFileSync(PRICING_CACHE_PATH, JSON.stringify(pricingCache));
274
+ }