lynkr 9.4.6 → 9.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,9 +54,41 @@ const DATABRICKS_FALLBACK = {
54
54
  'databricks-bge-large-en': { input: 0.02, output: 0, context: 512 },
55
55
  };
56
56
 
57
- // Default cost for unknown models
57
+ // Default cost for unknown models. Returned with `unknown: true` so callers can
58
+ // distinguish a real price from a fabricated guess.
58
59
  const DEFAULT_COST = { input: 1.0, output: 3.0, context: 128000 };
59
60
 
61
+ // Curated name aliases (exact, one-directional). Maps a name a caller might use
62
+ // to the canonical key likely present in the pricing data. Misses are harmless
63
+ // (resolution simply continues down the ladder).
64
+ const MODEL_ALIASES = {
65
+ 'claude-sonnet-4-5': 'claude-sonnet-4-5-20250929',
66
+ 'claude-opus-4-1': 'claude-opus-4-1-20250805',
67
+ 'claude-3-5-sonnet': 'claude-3-5-sonnet-20241022',
68
+ };
69
+
70
+ /**
71
+ * Parse MODEL_PRICE_OVERRIDES env (JSON object of
72
+ * { "<model>": { "input": <usd/1M>, "output": <usd/1M>, "context"?: N } }).
73
+ * Lets operators pin correct prices for models the registry doesn't know.
74
+ */
75
+ function _loadOverrides() {
76
+ const out = new Map();
77
+ const raw = process.env.MODEL_PRICE_OVERRIDES;
78
+ if (!raw) return out;
79
+ try {
80
+ const parsed = JSON.parse(raw);
81
+ for (const [name, info] of Object.entries(parsed)) {
82
+ if (info && typeof info.input === 'number' && typeof info.output === 'number') {
83
+ out.set(name.toLowerCase(), { context: 128000, ...info });
84
+ }
85
+ }
86
+ } catch (err) {
87
+ logger.warn({ err: err.message }, '[ModelRegistry] Failed to parse MODEL_PRICE_OVERRIDES');
88
+ }
89
+ return out;
90
+ }
91
+
60
92
  class ModelRegistry {
61
93
  constructor() {
62
94
  this.litellmPrices = {};
@@ -64,6 +96,7 @@ class ModelRegistry {
64
96
  this.loaded = false;
65
97
  this.lastFetch = 0;
66
98
  this.modelIndex = new Map();
99
+ this.overrides = _loadOverrides();
67
100
  }
68
101
 
69
102
  /**
@@ -255,40 +288,70 @@ class ModelRegistry {
255
288
  * @returns {Object} Cost info { input, output, context, ... }
256
289
  */
257
290
  getCost(modelName) {
258
- if (!modelName) return { ...DEFAULT_COST, source: 'default' };
291
+ if (!modelName) return { ...DEFAULT_COST, source: 'default', unknown: true };
259
292
 
260
- const normalizedName = modelName.toLowerCase();
293
+ const name = String(modelName).toLowerCase().trim();
294
+ const hit = this._resolveCost(name);
295
+ if (hit) return hit;
261
296
 
262
- // Direct lookup
263
- if (this.modelIndex.has(normalizedName)) {
264
- return this.modelIndex.get(normalizedName);
265
- }
297
+ // Nothing matched — report unknown rather than silently fabricating a price.
298
+ logger.debug({ model: modelName }, '[ModelRegistry] Model not found — cost unknown');
299
+ return { ...DEFAULT_COST, source: 'default', unknown: true };
300
+ }
266
301
 
267
- // Try common variations
268
- const variations = [
269
- normalizedName,
270
- normalizedName.replace('databricks-', ''),
271
- normalizedName.replace('azure/', ''),
272
- normalizedName.replace('bedrock/', ''),
273
- normalizedName.replace('anthropic.', ''),
274
- normalizedName.split('/').pop(),
275
- ];
276
-
277
- for (const variant of variations) {
278
- if (this.modelIndex.has(variant)) {
279
- return this.modelIndex.get(variant);
280
- }
302
+ /**
303
+ * Deterministic price resolution. Each step is exact (no bidirectional
304
+ * substring matching), and the only loose step (longest-prefix) is
305
+ * one-directional and length-bounded, so unrelated names can't false-match.
306
+ * Returns a cost object with a `resolution` tag, or null if nothing matched.
307
+ * @param {string} name - already lowercased/trimmed
308
+ */
309
+ _resolveCost(name) {
310
+ const tag = (value, resolution, matchedAs) => ({
311
+ ...value,
312
+ resolution,
313
+ ...(matchedAs && matchedAs !== name ? { matchedAs } : {}),
314
+ });
315
+
316
+ // 1. Operator overrides (exact) — ground truth.
317
+ if (this.overrides.has(name)) return tag({ ...this.overrides.get(name), source: 'override' }, 'override');
318
+
319
+ // 2. Exact registry hit.
320
+ if (this.modelIndex.has(name)) return tag(this.modelIndex.get(name), 'exact');
321
+
322
+ // 3. Provider-prefix strip (exact).
323
+ const stripped = [
324
+ name.replace(/^databricks-/, ''),
325
+ name.replace(/^azure\//, ''),
326
+ name.replace(/^bedrock\//, ''),
327
+ name.replace(/^anthropic\./, ''),
328
+ name.replace(/^openai\//, ''),
329
+ name.includes('/') ? name.split('/').pop() : null,
330
+ ].filter((v) => v && v !== name);
331
+ for (const v of stripped) {
332
+ if (this.overrides.has(v)) return tag({ ...this.overrides.get(v), source: 'override' }, 'prefix-strip', v);
333
+ if (this.modelIndex.has(v)) return tag(this.modelIndex.get(v), 'prefix-strip', v);
281
334
  }
282
335
 
283
- // Fuzzy match for partial names
336
+ // 4. Curated alias (exact).
337
+ const alias = MODEL_ALIASES[name];
338
+ if (alias && this.modelIndex.has(alias)) return tag(this.modelIndex.get(alias), 'alias', alias);
339
+
340
+ // 5. Date/version-suffix normalization (e.g. -20250929, -2025-09-29, -v2).
341
+ const dateless = name.replace(/[-@](\d{8}|\d{4}-\d{2}-\d{2}|v\d+)$/, '');
342
+ if (dateless !== name && this.modelIndex.has(dateless)) return tag(this.modelIndex.get(dateless), 'date-normalize', dateless);
343
+
344
+ // 6. Longest registry key that is a prefix of the requested name. Bounded so
345
+ // short keys can't grab unrelated names (e.g. "gpt-5.2-chat-2026" → "gpt-5.2-chat").
346
+ let best = null;
284
347
  for (const [key, value] of this.modelIndex.entries()) {
285
- if (key.includes(normalizedName) || normalizedName.includes(key)) {
286
- return value;
348
+ if (key.length >= 6 && name.startsWith(key) && (!best || key.length > best.key.length)) {
349
+ best = { key, value };
287
350
  }
288
351
  }
352
+ if (best) return tag(best.value, 'longest-prefix', best.key);
289
353
 
290
- logger.debug({ model: modelName }, '[ModelRegistry] Model not found, using default');
291
- return { ...DEFAULT_COST, source: 'default' };
354
+ return null;
292
355
  }
293
356
 
294
357
  /**
@@ -13,13 +13,18 @@ const { extractContent } = require('./complexity-analyzer');
13
13
  // Substring keywords found in file paths or instruction text.
14
14
  // Matched case-insensitively as raw substrings, so "auth" hits
15
15
  // "src/auth/login.ts" and "authentication".
16
+ // NOTE: keywords are matched as case-insensitive *substrings* against file
17
+ // paths, so overly generic terms cause false positives. 'session' and 'token'
18
+ // were removed because they match benign paths (src/sessions/*, tokenizer.js,
19
+ // token-budget.js) and were force-escalating ordinary requests to COMPLEX —
20
+ // real secrets/credentials are still covered by the keywords below.
16
21
  const PROTECTED_PATH_KEYWORDS = [
17
- 'auth', 'oauth', 'jwt', 'session', 'security', 'permission', 'rbac',
22
+ 'auth', 'oauth', 'jwt', 'security', 'permission', 'rbac',
18
23
  'payment', 'payments', 'billing', 'invoice', 'subscription',
19
24
  'migration', 'migrations', 'schema',
20
25
  'infra', 'terraform', 'kustomize', 'helm', 'kubernetes',
21
26
  '.github/workflows', '.env', 'secret', 'credential',
22
- 'api-key', 'api_key', 'apikey', 'token',
27
+ 'api-key', 'api_key', 'apikey',
23
28
  'webhook', 'admin',
24
29
  ];
25
30
 
@@ -0,0 +1,96 @@
1
+ /**
2
+ * Session → Provider Affinity
3
+ *
4
+ * A multi-turn agentic conversation builds up tool_use / tool_result history
5
+ * whose tool-call IDs are formatted for the provider that produced them. If a
6
+ * later turn re-routes to a *different* provider (because per-turn complexity
7
+ * or risk changed), that provider rejects the orphaned tool linkage:
8
+ *
9
+ * Azure: 400 "No tool call found for function call output with call_id …"
10
+ * Moonshot: 400 "Invalid request: tool_call_id is not found"
11
+ *
12
+ * To prevent that, once a session has chosen a provider we keep subsequent
13
+ * turns on it *while the payload carries tool history*. Fresh turns (no tool
14
+ * state) still route normally, so per-turn tier routing is preserved.
15
+ *
16
+ * @module routing/session-affinity
17
+ */
18
+
19
+ const MAX_ENTRIES = 2000;
20
+ const TTL_MS = 60 * 60 * 1000; // 1 hour
21
+
22
+ /** @type {Map<string, {provider:string, model:string|null, tier:string|null, ts:number}>} */
23
+ const pins = new Map();
24
+
25
+ function _evictIfNeeded() {
26
+ if (pins.size <= MAX_ENTRIES) return;
27
+ // Map preserves insertion order — drop the oldest.
28
+ const oldest = pins.keys().next().value;
29
+ if (oldest !== undefined) pins.delete(oldest);
30
+ }
31
+
32
+ /**
33
+ * True when the payload contains an in-flight tool exchange — i.e. a prior
34
+ * assistant tool_use or a user tool_result. These are the turns whose
35
+ * tool-call IDs break if the provider changes.
36
+ * @param {object} payload
37
+ * @returns {boolean}
38
+ */
39
+ function payloadHasToolHistory(payload) {
40
+ const messages = payload?.messages;
41
+ if (!Array.isArray(messages)) return false;
42
+ for (const msg of messages) {
43
+ const content = msg?.content;
44
+ if (!Array.isArray(content)) continue;
45
+ for (const block of content) {
46
+ const t = block?.type;
47
+ if (t === "tool_use" || t === "tool_result") return true;
48
+ }
49
+ }
50
+ return false;
51
+ }
52
+
53
+ /**
54
+ * Return the pinned routing decision for a session, or null if none / expired.
55
+ * @param {string} sessionId
56
+ */
57
+ function getPinned(sessionId) {
58
+ if (!sessionId) return null;
59
+ const entry = pins.get(sessionId);
60
+ if (!entry) return null;
61
+ if (Date.now() - entry.ts > TTL_MS) {
62
+ pins.delete(sessionId);
63
+ return null;
64
+ }
65
+ return entry;
66
+ }
67
+
68
+ /**
69
+ * Record the provider a session routed to, for reuse on later tool-bearing turns.
70
+ * @param {string} sessionId
71
+ * @param {{provider:string, model?:string|null, tier?:string|null}} decision
72
+ */
73
+ function setPinned(sessionId, decision) {
74
+ if (!sessionId || !decision?.provider) return;
75
+ // Refresh insertion order so active sessions aren't evicted.
76
+ pins.delete(sessionId);
77
+ pins.set(sessionId, {
78
+ provider: decision.provider,
79
+ model: decision.model ?? null,
80
+ tier: decision.tier ?? null,
81
+ ts: Date.now(),
82
+ });
83
+ _evictIfNeeded();
84
+ }
85
+
86
+ /** Test/maintenance helper. */
87
+ function _clear() {
88
+ pins.clear();
89
+ }
90
+
91
+ module.exports = {
92
+ payloadHasToolHistory,
93
+ getPinned,
94
+ setPinned,
95
+ _clear,
96
+ };
@@ -94,7 +94,9 @@ function init() {
94
94
  circuit_breaker_state TEXT,
95
95
  quality_score REAL,
96
96
  tokens_per_second REAL,
97
- cost_efficiency REAL
97
+ cost_efficiency REAL,
98
+ request_text TEXT,
99
+ response_text TEXT
98
100
  );
99
101
 
100
102
  CREATE INDEX IF NOT EXISTS idx_telemetry_provider
@@ -110,6 +112,15 @@ function init() {
110
112
  ON routing_telemetry(session_id, timestamp);
111
113
  `);
112
114
 
115
+ // Migration: add columns to pre-existing tables (CREATE TABLE IF NOT EXISTS
116
+ // won't add them to a DB created before these columns existed).
117
+ const existingCols = new Set(db.prepare("PRAGMA table_info(routing_telemetry)").all().map((c) => c.name));
118
+ for (const col of ["request_text", "response_text"]) {
119
+ if (!existingCols.has(col)) {
120
+ db.exec(`ALTER TABLE routing_telemetry ADD COLUMN ${col} TEXT`);
121
+ }
122
+ }
123
+
113
124
  logger.info({ dbPath }, "Routing telemetry database initialised");
114
125
  return true;
115
126
  } catch (err) {
@@ -163,14 +174,14 @@ function record(data) {
163
174
  provider, model, routing_method, was_fallback, output_tokens,
164
175
  latency_ms, status_code, error_type, cost_usd, tool_calls_made,
165
176
  retry_count, circuit_breaker_state, quality_score, tokens_per_second,
166
- cost_efficiency
177
+ cost_efficiency, request_text, response_text
167
178
  ) VALUES (
168
179
  @request_id, @session_id, @timestamp, @complexity_score, @tier,
169
180
  @agentic_type, @tool_count, @input_tokens, @message_count, @request_type,
170
181
  @provider, @model, @routing_method, @was_fallback, @output_tokens,
171
182
  @latency_ms, @status_code, @error_type, @cost_usd, @tool_calls_made,
172
183
  @retry_count, @circuit_breaker_state, @quality_score, @tokens_per_second,
173
- @cost_efficiency
184
+ @cost_efficiency, @request_text, @response_text
174
185
  )`
175
186
  );
176
187
  if (!insert) return;
@@ -201,6 +212,8 @@ function record(data) {
201
212
  quality_score: data.quality_score ?? null,
202
213
  tokens_per_second: data.tokens_per_second ?? null,
203
214
  cost_efficiency: data.cost_efficiency ?? null,
215
+ request_text: data.request_text ?? null,
216
+ response_text: data.response_text ?? null,
204
217
  });
205
218
  } catch (err) {
206
219
  logger.debug({ err: err.message }, "Telemetry record failed");
@@ -1,8 +0,0 @@
1
- {
2
- "files": ["docs/**/*.html"],
3
- "insertBefore": "</body>",
4
- "commentSyntax": "html",
5
- "cspChecked": true,
6
- "framework": "multi-page-static",
7
- "devServer": null
8
- }