lynkr 9.1.2 → 9.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +21 -10
  2. package/package.json +3 -1
  3. package/scripts/build-knn-index.js +130 -0
  4. package/scripts/calibrate-thresholds.js +197 -0
  5. package/scripts/compare-policies.js +67 -0
  6. package/scripts/learn-output-ratios.js +162 -0
  7. package/scripts/refresh-pricing.js +122 -0
  8. package/scripts/run-routerarena.js +26 -0
  9. package/scripts/sample-regret.js +84 -0
  10. package/scripts/train-risk-classifier.js +191 -0
  11. package/src/api/middleware/budget-enforcer.js +60 -0
  12. package/src/api/middleware/load-shedding.js +11 -1
  13. package/src/api/middleware/tenant.js +21 -0
  14. package/src/api/router.js +19 -40
  15. package/src/budget/hierarchical-budget.js +159 -0
  16. package/src/cache/semantic.js +28 -2
  17. package/src/clients/databricks.js +59 -5
  18. package/src/config/index.js +239 -43
  19. package/src/context/toon.js +5 -4
  20. package/src/orchestrator/index.js +44 -6
  21. package/src/prompts/system.js +34 -6
  22. package/src/routing/bandit.js +246 -0
  23. package/src/routing/cascade.js +106 -0
  24. package/src/routing/complexity-analyzer.js +7 -15
  25. package/src/routing/confidence-scorer.js +121 -0
  26. package/src/routing/context-validator.js +71 -0
  27. package/src/routing/cost-optimizer.js +5 -2
  28. package/src/routing/deadline.js +52 -0
  29. package/src/routing/drift-monitor.js +113 -0
  30. package/src/routing/embedding-cache.js +77 -0
  31. package/src/routing/index.js +314 -5
  32. package/src/routing/knn-router.js +206 -0
  33. package/src/routing/latency-tracker.js +113 -71
  34. package/src/routing/model-tiers.js +156 -6
  35. package/src/routing/output-ratios.js +57 -0
  36. package/src/routing/regret-estimator.js +91 -0
  37. package/src/routing/reward-pipeline.js +62 -0
  38. package/src/routing/risk-classifier.js +130 -0
  39. package/src/routing/shadow-mode.js +77 -0
  40. package/src/routing/tenant-policy.js +96 -0
  41. package/src/routing/tokenizer.js +162 -0
  42. package/src/server.js +9 -0
@@ -22,16 +22,59 @@ const {
22
22
  const { getAgenticDetector, AGENT_TYPES } = require('./agentic-detector');
23
23
  const { getModelTierSelector, TIER_DEFINITIONS } = require('./model-tiers');
24
24
  const { getCostOptimizer } = require('./cost-optimizer');
25
- const { analyzeRisk } = require('./risk-analyzer');
25
+ const { analyzeRisk } = require('./risk-classifier');
26
+
27
+ // Phase 3-6 routing modules
28
+ const { getKnnRouter } = require('./knn-router');
29
+ const { getBandit } = require('./bandit');
30
+ const { getShadowPolicy, compareAndLog: shadowCompareAndLog } = require('./shadow-mode');
31
+ const { chooseFastest } = require('./deadline');
32
+ const { applyTenantOverrides } = require('./tenant-policy');
26
33
 
27
34
  // Telemetry modules
28
35
  const telemetry = require('./telemetry');
29
36
  const { scoreResponseQuality } = require('./quality-scorer');
30
37
  const { getLatencyTracker } = require('./latency-tracker');
31
38
 
39
+ // Phase 1 modules
40
+ const contextValidator = require('./context-validator');
41
+ const { countPayloadTokens } = require('./tokenizer');
42
+
32
43
  // Local providers
33
44
  const LOCAL_PROVIDERS = ['ollama', 'llamacpp', 'lmstudio'];
34
45
 
46
+ /**
47
+ * Returns true when any message content block is an image.
48
+ * Handles both string content and structured content arrays.
49
+ */
50
+ function _payloadHasImages(payload) {
51
+ const messages = payload?.messages;
52
+ if (!Array.isArray(messages)) return false;
53
+ return messages.some(msg => {
54
+ const content = msg?.content;
55
+ if (!Array.isArray(content)) return false;
56
+ return content.some(block => block?.type === 'image' || block?.type === 'image_url');
57
+ });
58
+ }
59
+
60
+ /**
61
+ * List of providers that currently have credentials configured.
62
+ * Used by the Phase 1.2 cost-optimizer override to scope candidates.
63
+ */
64
+ function _enabledProviders() {
65
+ const out = [];
66
+ if (config.databricks?.url && config.databricks?.apiKey) out.push('databricks');
67
+ if (config.azureAnthropic?.endpoint && config.azureAnthropic?.apiKey) out.push('azure-anthropic');
68
+ if (config.bedrock?.apiKey) out.push('bedrock');
69
+ if (config.openrouter?.apiKey) out.push('openrouter');
70
+ if (config.openai?.apiKey) out.push('openai');
71
+ if (config.azureOpenAI?.endpoint && config.azureOpenAI?.apiKey) out.push('azure-openai');
72
+ if (config.ollama?.endpoint) out.push('ollama');
73
+ if (config.llamacpp?.endpoint) out.push('llamacpp');
74
+ if (config.lmstudio?.endpoint) out.push('lmstudio');
75
+ return out;
76
+ }
77
+
35
78
  /**
36
79
  * Check if a provider is local
37
80
  */
@@ -41,15 +84,28 @@ function isLocalProvider(provider) {
41
84
 
42
85
  /**
43
86
  * Check if fallback is enabled
87
+ * In tier routing mode, fallback is always enabled
44
88
  */
45
89
  function isFallbackEnabled() {
90
+ if (config.modelTiers?.enabled) {
91
+ // Tier routing mode: fallback always enabled
92
+ return true;
93
+ }
94
+ // Static provider mode: use FALLBACK_ENABLED
46
95
  return config.modelProvider?.fallbackEnabled !== false;
47
96
  }
48
97
 
49
98
  /**
50
99
  * Get the configured fallback provider
100
+ * In tier routing mode, fallback = TIER_REASONING provider
51
101
  */
52
102
  function getFallbackProvider() {
103
+ if (config.modelTiers?.enabled && config.modelTiers?.REASONING) {
104
+ // Tier routing mode: extract provider from TIER_REASONING
105
+ const match = config.modelTiers.REASONING.match(/^([a-z-]+):/);
106
+ if (match) return match[1];
107
+ }
108
+ // Static provider mode: use FALLBACK_PROVIDER
53
109
  return config.modelProvider?.fallbackProvider ?? 'databricks';
54
110
  }
55
111
 
@@ -283,9 +339,11 @@ async function determineProviderSmart(payload, options = {}) {
283
339
  }
284
340
  }
285
341
 
286
- // Apply routing decision based on tier config (TIER_* env vars are mandatory)
342
+ // Apply routing decision based on tier config (TIER_* env vars take precedence
343
+ // but Phase 1.2 lets the cost-optimizer pick a cheaper qualifying model when safe).
287
344
  let provider;
288
345
  let method = 'tier_config';
346
+ let costOptimized = false;
289
347
 
290
348
  const selector = getModelTierSelector();
291
349
  const modelSelection = selector.selectModel(tier, null);
@@ -294,8 +352,242 @@ async function determineProviderSmart(payload, options = {}) {
294
352
  selectedModel = modelSelection.model;
295
353
  logger.debug({ tier, provider, model: selectedModel }, '[Routing] Using tier config');
296
354
 
297
- // TIER_* env vars are the final word no cost optimization override.
298
- // The user explicitly configured provider:model per tier; respect that.
355
+ // Phase 1.2 — cost-optimizer override.
356
+ // Only kick in when:
357
+ // - feature flag enabled (default true, disable with LYNKR_COST_OPTIMIZE=false)
358
+ // - risk level is not high (high-risk keeps the explicitly-configured model)
359
+ // - the optimizer finds a meaningfully cheaper qualifying model
360
+ const costOptimizeEnabled = process.env.LYNKR_COST_OPTIMIZE !== 'false'
361
+ && config.routing?.costOptimize !== false;
362
+ if (costOptimizeEnabled && risk?.level !== 'high') {
363
+ try {
364
+ const optimizer = getCostOptimizer();
365
+ const availableProviders = _enabledProviders();
366
+ const cheapest = optimizer.findCheapestForTier(tier, availableProviders);
367
+ if (cheapest && cheapest.model && cheapest.model !== selectedModel) {
368
+ const current = optimizer.estimateCost(selectedModel, 1000);
369
+ const candidate = optimizer.estimateCost(cheapest.model, 1000);
370
+ if (candidate.totalEstimate > 0 && candidate.totalEstimate < current.totalEstimate * 0.75) {
371
+ logger.debug({
372
+ tier,
373
+ from: `${provider}:${selectedModel}`,
374
+ to: `${cheapest.provider}:${cheapest.model}`,
375
+ savedPerK: (current.totalEstimate - candidate.totalEstimate).toFixed(6),
376
+ }, '[Routing] Cost-optimizer override');
377
+ provider = cheapest.provider;
378
+ selectedModel = cheapest.model;
379
+ method = 'tier_config+cost_optimized';
380
+ costOptimized = true;
381
+ }
382
+ }
383
+ } catch (err) {
384
+ logger.debug({ err: err.message }, '[Routing] Cost-optimize failed, keeping tier_config selection');
385
+ }
386
+ }
387
+
388
+ // Phase 1.3 — context window validation. If estimated tokens exceed the
389
+ // selected model's context (with response headroom), escalate to a
390
+ // context-capable model regardless of tier.
391
+ try {
392
+ const estimatedTokens = countPayloadTokens(payload, selectedModel);
393
+ const ctxResult = contextValidator.validate(selectedModel, estimatedTokens);
394
+ if (!ctxResult.ok) {
395
+ const capable = selector.findContextCapable(estimatedTokens, tier);
396
+ if (capable) {
397
+ logger.info({
398
+ from: `${provider}:${selectedModel}`,
399
+ to: `${capable.provider}:${capable.model}`,
400
+ required: estimatedTokens,
401
+ oldContext: ctxResult.context,
402
+ newContext: capable.context,
403
+ }, '[Routing] Context window escalation');
404
+ provider = capable.provider;
405
+ selectedModel = capable.model;
406
+ if (capable.tier) tier = capable.tier;
407
+ method = method + '+context_escalated';
408
+ } else {
409
+ logger.warn({
410
+ model: selectedModel,
411
+ required: estimatedTokens,
412
+ available: ctxResult.context,
413
+ }, '[Routing] No context-capable fallback — request may fail upstream');
414
+ }
415
+ }
416
+ } catch (err) {
417
+ logger.debug({ err: err.message }, '[Routing] Context validation failed, proceeding without check');
418
+ }
419
+
420
+ // Phase 1.4 — vision capability guard.
421
+ // If the payload contains image content blocks but the selected model lacks
422
+ // vision support, silently swap to the cheapest vision-capable model at or
423
+ // above the current tier. Prevents silent upstream failures.
424
+ if (_payloadHasImages(payload)) {
425
+ try {
426
+ const { getModelRegistrySync } = require('./model-registry');
427
+ const registry = getModelRegistrySync();
428
+ const modelInfo = registry.getCost(selectedModel);
429
+ if (!modelInfo?.vision) {
430
+ const visionModel = selector.findVisionCapable(tier);
431
+ if (visionModel) {
432
+ logger.info({
433
+ from: `${provider}:${selectedModel}`,
434
+ to: `${visionModel.provider}:${visionModel.model}`,
435
+ tier: visionModel.tier,
436
+ }, '[Routing] Vision guard — upgrading to vision-capable model');
437
+ provider = visionModel.provider;
438
+ selectedModel = visionModel.model;
439
+ if (visionModel.tier !== tier) tier = visionModel.tier;
440
+ method = method + '+vision_guard';
441
+ } else {
442
+ logger.warn({ model: selectedModel }, '[Routing] Vision guard — no vision-capable model found, request may fail');
443
+ }
444
+ }
445
+ } catch (err) {
446
+ logger.debug({ err: err.message }, '[Routing] Vision guard check failed, proceeding');
447
+ }
448
+ }
449
+
450
+ // Phase 3.1 — kNN routing hint.
451
+ // If the index has enough entries, query it with the last user message.
452
+ // A high-confidence kNN suggestion overrides the heuristic selection.
453
+ let knnResult = null;
454
+ if (config.routing?.knnEnabled !== false) {
455
+ try {
456
+ const msgs = payload?.messages;
457
+ const lastMsg = Array.isArray(msgs) ? msgs[msgs.length - 1]?.content : null;
458
+ const queryText = typeof lastMsg === 'string' ? lastMsg
459
+ : Array.isArray(lastMsg) ? lastMsg.filter(b => b?.type === 'text').map(b => b.text || '').join(' ')
460
+ : null;
461
+ if (queryText) {
462
+ knnResult = await getKnnRouter().query(queryText);
463
+ if (knnResult && knnResult.confidence > 0.7 && knnResult.model && knnResult.model !== selectedModel) {
464
+ // High confidence — trust kNN's model recommendation directly.
465
+ logger.debug({
466
+ from: `${provider}:${selectedModel}`,
467
+ to: `${knnResult.provider}:${knnResult.model}`,
468
+ confidence: knnResult.confidence.toFixed(3),
469
+ }, '[Routing] kNN override');
470
+ provider = knnResult.provider;
471
+ selectedModel = knnResult.model;
472
+ method = method + '+knn';
473
+ } else if (knnResult && knnResult.confidence > 0.4 && knnResult.confidence <= 0.7) {
474
+ // Ambiguous signal — neighbors are split, we can't trust any single model
475
+ // recommendation. Err on quality: bump the current tier one step up so the
476
+ // request gets a more capable model rather than risking a bad answer from
477
+ // a model that was borderline for similar past requests.
478
+ const TIER_ORDER = ['SIMPLE', 'MEDIUM', 'COMPLEX', 'REASONING'];
479
+ const currentIdx = TIER_ORDER.indexOf(tier);
480
+ if (currentIdx >= 0 && currentIdx < TIER_ORDER.length - 1) {
481
+ const upgradedTier = TIER_ORDER[currentIdx + 1];
482
+ try {
483
+ const upgraded = selector.selectModel(upgradedTier, null);
484
+ logger.debug({
485
+ from: `${tier}:${provider}:${selectedModel}`,
486
+ to: `${upgradedTier}:${upgraded.provider}:${upgraded.model}`,
487
+ confidence: knnResult.confidence.toFixed(3),
488
+ }, '[Routing] kNN ambiguous — escalating tier for safety');
489
+ provider = upgraded.provider;
490
+ selectedModel = upgraded.model;
491
+ tier = upgradedTier;
492
+ method = method + '+knn_ambiguous_escalate';
493
+ } catch (err) {
494
+ logger.debug({ err: err.message }, '[Routing] kNN ambiguous escalation failed, keeping current tier');
495
+ }
496
+ }
497
+ }
498
+ }
499
+ } catch (err) {
500
+ logger.debug({ err: err.message }, '[Routing] kNN query failed, ignoring');
501
+ }
502
+ }
503
+
504
+ // Phase 4.1 — LinUCB bandit intra-tier selection.
505
+ // When there are two candidates (heuristic vs kNN), the bandit picks the
506
+ // one with the highest estimated UCB score for the current context.
507
+ if (config.routing?.banditEnabled !== false && knnResult && knnResult.model) {
508
+ try {
509
+ // Build candidates: current selection and kNN alternative if different
510
+ const allCandidates = [{ provider, model: selectedModel }];
511
+ if (knnResult.model !== selectedModel) {
512
+ allCandidates.push({ provider: knnResult.provider, model: knnResult.model });
513
+ }
514
+
515
+ if (allCandidates.length > 1) {
516
+ const bandit = getBandit();
517
+ const TASK_TYPES = ['code_gen', 'summarization', 'reasoning', 'factoid', 'chat', 'other'];
518
+ const inferredTask = (analysis.breakdown?.taskType?.reason || 'other').toLowerCase();
519
+ const taskIdx = Math.max(0, TASK_TYPES.findIndex(t => inferredTask.includes(t)));
520
+ const ctx = [
521
+ (analysis.score || 0) / 100,
522
+ Math.log(Math.max(1, analysis.breakdown?.tokenCount || 0) + 1) / 15,
523
+ ((payload?.tools?.length ?? 0) > 0) ? 1 : 0,
524
+ options.streaming ? 1 : 0,
525
+ risk?.level === 'high' ? 1 : risk?.level === 'medium' ? 0.5 : 0,
526
+ agenticResult?.isAgentic ? 1 : 0,
527
+ ...TASK_TYPES.map((_, i) => i === taskIdx ? 1 : 0),
528
+ ];
529
+ const picked = bandit.pick(tier, allCandidates, ctx);
530
+ if (picked && picked.model !== selectedModel) {
531
+ logger.debug({
532
+ from: `${provider}:${selectedModel}`,
533
+ to: `${picked.provider}:${picked.model}`,
534
+ ucb: picked.ucb?.toFixed(4),
535
+ explored: picked.explored,
536
+ }, '[Routing] Bandit override');
537
+ provider = picked.provider;
538
+ selectedModel = picked.model;
539
+ method = method + (picked.explored ? '+bandit_explore' : '+bandit');
540
+ }
541
+ }
542
+ } catch (err) {
543
+ logger.debug({ err: err.message }, '[Routing] Bandit pick failed, ignoring');
544
+ }
545
+ }
546
+
547
+ // Phase 6.3 — deadline-aware fastest-model selection.
548
+ // Payload carries _deadlineMs injected by the orchestrator from the
549
+ // LYNKR-Deadline-Ms request header.
550
+ const deadlineMs = payload?._deadlineMs ?? null;
551
+ if (deadlineMs) {
552
+ try {
553
+ const fastest = chooseFastest([{ provider, model: selectedModel }], deadlineMs);
554
+ if (fastest && fastest.model !== selectedModel) {
555
+ logger.debug({
556
+ from: `${provider}:${selectedModel}`,
557
+ to: `${fastest.provider}:${fastest.model}`,
558
+ deadlineMs,
559
+ }, '[Routing] Deadline override');
560
+ provider = fastest.provider;
561
+ selectedModel = fastest.model;
562
+ method = method + '+deadline';
563
+ }
564
+ } catch (err) {
565
+ logger.debug({ err: err.message }, '[Routing] Deadline check failed, ignoring');
566
+ }
567
+ }
568
+
569
+ // Phase 6.1 — per-tenant policy overrides.
570
+ // tenantPolicy comes from options (threaded from Express res.locals via
571
+ // orchestrator → databricks → here).
572
+ if (options.tenantPolicy) {
573
+ try {
574
+ const overridden = applyTenantOverrides(
575
+ { provider, model: selectedModel, tier, method },
576
+ options.tenantPolicy,
577
+ );
578
+ if (overridden && overridden.model !== selectedModel) {
579
+ logger.debug({
580
+ from: `${provider}:${selectedModel}`,
581
+ to: `${overridden.provider}:${overridden.model}`,
582
+ }, '[Routing] Tenant override');
583
+ provider = overridden.provider;
584
+ selectedModel = overridden.model;
585
+ method = overridden.method;
586
+ }
587
+ } catch (err) {
588
+ logger.debug({ err: err.message }, '[Routing] Tenant override failed, ignoring');
589
+ }
590
+ }
299
591
 
300
592
  const decision = {
301
593
  provider,
@@ -309,10 +601,19 @@ async function determineProviderSmart(payload, options = {}) {
309
601
  analysis,
310
602
  embeddingsResult,
311
603
  agenticResult,
312
- costOptimized: false,
604
+ costOptimized,
313
605
  risk,
606
+ knnResult,
314
607
  };
315
608
 
609
+ // Phase 4.4 — shadow-mode policy comparison (fire-and-forget).
610
+ const shadowFn = getShadowPolicy();
611
+ if (shadowFn) {
612
+ setImmediate(() =>
613
+ shadowCompareAndLog({ payload, activeDecision: decision, shadowFn }).catch(() => {})
614
+ );
615
+ }
616
+
316
617
  // Phase 3: Record metrics
317
618
  routingMetrics.record(decision);
318
619
 
@@ -419,6 +720,14 @@ module.exports = {
419
720
  AGENT_TYPES,
420
721
  TIER_DEFINITIONS,
421
722
 
723
+ // Phase 3-6 modules
724
+ getKnnRouter,
725
+ getBandit,
726
+ getShadowPolicy,
727
+ shadowCompareAndLog,
728
+ chooseFastest,
729
+ applyTenantOverrides,
730
+
422
731
  // Telemetry
423
732
  telemetry,
424
733
  scoreResponseQuality,
@@ -0,0 +1,206 @@
1
+ /**
2
+ * kNN-based routing decision (Phase 3.1).
3
+ *
4
+ * Embeds the incoming query, finds the K nearest historical queries from the
5
+ * hnswlib-node index, and returns a confidence-weighted recommendation
6
+ * (model, expected quality, expected cost) based on those neighbors' actual
7
+ * outcomes from telemetry.
8
+ *
9
+ * Behavior:
10
+ * - Empty index → returns null. Caller falls back to heuristic router.
11
+ * - Sparse index (N < MIN_INDEX_SIZE) → returns null. Heuristic wins until
12
+ * we have enough data to be confident.
13
+ * - Embedder unavailable → returns null. Same fallback path.
14
+ *
15
+ * Bootstrap: scripts/build-knn-index.js (also accepts optional RouterBench
16
+ * corpus path to seed the index).
17
+ */
18
+
19
+ const fs = require('fs');
20
+ const path = require('path');
21
+ const logger = require('../logger');
22
+ const { generateEmbedding } = require('../cache/embeddings');
23
+ const { getEmbeddingCache } = require('./embedding-cache');
24
+
25
+ const INDEX_DIR = path.join(__dirname, '../../data/knn');
26
+ const INDEX_FILE = path.join(INDEX_DIR, 'index.hnsw');
27
+ const META_FILE = path.join(INDEX_DIR, 'meta.json');
28
+
29
+ const MAX_ELEMENTS = 50000;
30
+ const DIM = 768; // nomic-embed-text default
31
+ const K = 10;
32
+ const MIN_INDEX_SIZE = 1000;
33
+
34
+ let _hnsw = null;
35
+ let _hnswLoaded = false;
36
+ function _loadHnsw() {
37
+ if (_hnswLoaded) return _hnsw;
38
+ _hnswLoaded = true;
39
+ try {
40
+ _hnsw = require('hnswlib-node');
41
+ } catch (err) {
42
+ logger.debug({ err: err.message }, '[KnnRouter] hnswlib-node not available');
43
+ _hnsw = null;
44
+ }
45
+ return _hnsw;
46
+ }
47
+
48
+ class KnnRouter {
49
+ constructor() {
50
+ this.index = null;
51
+ this.meta = []; // parallel to index: per-id outcome { query, model, quality, cost, latency, tier }
52
+ this.size = 0;
53
+ this.dim = DIM;
54
+ this.ready = false;
55
+ }
56
+
57
+ load() {
58
+ const hnsw = _loadHnsw();
59
+ if (!hnsw) return false;
60
+ try {
61
+ if (!fs.existsSync(INDEX_FILE) || !fs.existsSync(META_FILE)) {
62
+ // Initialize empty index (caller can add() later)
63
+ this.index = new hnsw.HierarchicalNSW('cosine', this.dim);
64
+ this.index.initIndex(MAX_ELEMENTS);
65
+ this.meta = [];
66
+ this.size = 0;
67
+ this.ready = true;
68
+ return true;
69
+ }
70
+ const metaData = JSON.parse(fs.readFileSync(META_FILE, 'utf8'));
71
+ this.dim = metaData.dim || DIM;
72
+ this.meta = metaData.entries || [];
73
+ this.size = this.meta.length;
74
+ this.index = new hnsw.HierarchicalNSW('cosine', this.dim);
75
+ this.index.readIndexSync(INDEX_FILE, MAX_ELEMENTS);
76
+ this.ready = true;
77
+ logger.info({ size: this.size, dim: this.dim }, '[KnnRouter] Index loaded');
78
+ return true;
79
+ } catch (err) {
80
+ logger.warn({ err: err.message }, '[KnnRouter] Index load failed');
81
+ return false;
82
+ }
83
+ }
84
+
85
+ save() {
86
+ if (!this.ready || !this.index) return;
87
+ try {
88
+ fs.mkdirSync(INDEX_DIR, { recursive: true });
89
+ this.index.writeIndexSync(INDEX_FILE);
90
+ fs.writeFileSync(META_FILE, JSON.stringify({ dim: this.dim, entries: this.meta }, null, 0));
91
+ } catch (err) {
92
+ logger.warn({ err: err.message }, '[KnnRouter] Index save failed');
93
+ }
94
+ }
95
+
96
+ add(embedding, outcome) {
97
+ if (!this.ready || !this.index || !Array.isArray(embedding)) return;
98
+ if (this.size >= MAX_ELEMENTS) {
99
+ // Simple FIFO eviction: drop the oldest meta and reuse its id
100
+ // hnswlib doesn't support deletion in place; we just stop adding past max
101
+ return;
102
+ }
103
+ this.index.addPoint(embedding, this.size);
104
+ this.meta.push(outcome);
105
+ this.size++;
106
+ }
107
+
108
+ async query(text) {
109
+ if (!this.ready) this.load();
110
+ if (!this.ready || !this.index || this.size < MIN_INDEX_SIZE) return null;
111
+ if (!text || typeof text !== 'string') return null;
112
+
113
+ const cache = getEmbeddingCache();
114
+ let embedding = cache.get(text);
115
+ if (!embedding) {
116
+ try {
117
+ embedding = await generateEmbedding(text);
118
+ if (!embedding || embedding.length !== this.dim) {
119
+ // Skip if dim mismatch (embedder produced different dimensions)
120
+ return null;
121
+ }
122
+ cache.set(text, embedding);
123
+ } catch (err) {
124
+ logger.debug({ err: err.message }, '[KnnRouter] Embedding failed, skipping');
125
+ return null;
126
+ }
127
+ }
128
+
129
+ let result;
130
+ try {
131
+ result = this.index.searchKnn(embedding, K);
132
+ } catch (err) {
133
+ logger.debug({ err: err.message }, '[KnnRouter] Search failed');
134
+ return null;
135
+ }
136
+
137
+ const neighbors = (result.neighbors || []).map((id, i) => ({
138
+ id,
139
+ distance: result.distances?.[i] ?? 1,
140
+ outcome: this.meta[id],
141
+ })).filter(n => n.outcome);
142
+
143
+ if (neighbors.length === 0) return null;
144
+
145
+ // Confidence-weighted aggregation per candidate model.
146
+ // weight = 1 - distance (cosine distance → similarity)
147
+ const byModel = new Map();
148
+ for (const n of neighbors) {
149
+ const w = Math.max(0, 1 - n.distance);
150
+ const m = `${n.outcome.provider}:${n.outcome.model}`;
151
+ if (!byModel.has(m)) {
152
+ byModel.set(m, { weight: 0, quality: 0, cost: 0, latency: 0, count: 0, sample: n.outcome });
153
+ }
154
+ const agg = byModel.get(m);
155
+ agg.weight += w;
156
+ agg.quality += w * (n.outcome.quality || 50);
157
+ agg.cost += w * (n.outcome.cost || 0);
158
+ agg.latency += w * (n.outcome.latency || 0);
159
+ agg.count++;
160
+ }
161
+
162
+ let best = null;
163
+ let bestScore = -Infinity;
164
+ for (const [model, agg] of byModel) {
165
+ const avgQ = agg.quality / agg.weight;
166
+ const avgC = agg.cost / agg.weight;
167
+ // Score = quality / log(cost+1) — reward quality, penalise cost gently
168
+ const score = avgQ / Math.log(avgC * 1000 + 2);
169
+ if (score > bestScore) {
170
+ bestScore = score;
171
+ best = {
172
+ provider: agg.sample.provider,
173
+ model: agg.sample.model,
174
+ tier: agg.sample.tier,
175
+ expectedQuality: avgQ,
176
+ expectedCost: avgC,
177
+ expectedLatency: agg.latency / agg.weight,
178
+ confidence: Math.min(1, agg.weight / K),
179
+ neighborCount: agg.count,
180
+ };
181
+ }
182
+ }
183
+
184
+ return best;
185
+ }
186
+
187
+ getStats() {
188
+ return {
189
+ size: this.size,
190
+ maxElements: MAX_ELEMENTS,
191
+ ready: this.ready,
192
+ dim: this.dim,
193
+ };
194
+ }
195
+ }
196
+
197
+ let _instance = null;
198
+ function getKnnRouter() {
199
+ if (!_instance) {
200
+ _instance = new KnnRouter();
201
+ _instance.load();
202
+ }
203
+ return _instance;
204
+ }
205
+
206
+ module.exports = { KnnRouter, getKnnRouter };