lynkr 9.0.2 → 9.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +21 -10
  2. package/bin/cli.js +18 -1
  3. package/bin/lynkr-trajectory.js +136 -0
  4. package/bin/lynkr-usage.js +219 -0
  5. package/funding.json +110 -0
  6. package/package.json +4 -2
  7. package/public/dashboard.html +665 -0
  8. package/scripts/build-knn-index.js +130 -0
  9. package/scripts/calibrate-thresholds.js +197 -0
  10. package/scripts/compare-policies.js +67 -0
  11. package/scripts/learn-output-ratios.js +162 -0
  12. package/scripts/refresh-pricing.js +122 -0
  13. package/scripts/run-routerarena.js +26 -0
  14. package/scripts/sample-regret.js +84 -0
  15. package/scripts/train-risk-classifier.js +191 -0
  16. package/src/api/files-router.js +6 -6
  17. package/src/api/middleware/budget-enforcer.js +60 -0
  18. package/src/api/middleware/budget.js +19 -1
  19. package/src/api/middleware/load-shedding.js +17 -0
  20. package/src/api/middleware/tenant.js +21 -0
  21. package/src/api/openai-router.js +1 -1
  22. package/src/api/router.js +204 -87
  23. package/src/budget/hierarchical-budget.js +159 -0
  24. package/src/cache/semantic.js +28 -2
  25. package/src/clients/databricks.js +68 -10
  26. package/src/clients/openai-format.js +31 -5
  27. package/src/config/index.js +246 -43
  28. package/src/context/toon.js +5 -4
  29. package/src/dashboard/api.js +170 -0
  30. package/src/dashboard/router.js +13 -0
  31. package/src/headroom/client.js +3 -109
  32. package/src/headroom/index.js +0 -14
  33. package/src/memory/search.js +0 -50
  34. package/src/orchestrator/index.js +106 -11
  35. package/src/orchestrator/preflight.js +188 -0
  36. package/src/prompts/system.js +34 -6
  37. package/src/routing/bandit.js +246 -0
  38. package/src/routing/cascade.js +106 -0
  39. package/src/routing/complexity-analyzer.js +7 -15
  40. package/src/routing/confidence-scorer.js +121 -0
  41. package/src/routing/context-validator.js +71 -0
  42. package/src/routing/cost-optimizer.js +5 -2
  43. package/src/routing/deadline.js +52 -0
  44. package/src/routing/drift-monitor.js +113 -0
  45. package/src/routing/embedding-cache.js +77 -0
  46. package/src/routing/index.js +374 -4
  47. package/src/routing/interaction.js +183 -0
  48. package/src/routing/knn-router.js +206 -0
  49. package/src/routing/latency-tracker.js +113 -71
  50. package/src/routing/model-tiers.js +156 -6
  51. package/src/routing/output-ratios.js +57 -0
  52. package/src/routing/regret-estimator.js +91 -0
  53. package/src/routing/reward-pipeline.js +62 -0
  54. package/src/routing/risk-analyzer.js +194 -0
  55. package/src/routing/risk-classifier.js +130 -0
  56. package/src/routing/shadow-mode.js +77 -0
  57. package/src/routing/telemetry.js +7 -0
  58. package/src/routing/tenant-policy.js +96 -0
  59. package/src/routing/tokenizer.js +162 -0
  60. package/src/server.js +12 -0
  61. package/src/stores/file-store.js +42 -7
  62. package/src/tools/smart-selection.js +11 -2
  63. package/src/training/trajectory-compressor.js +266 -0
  64. package/src/usage/aggregator.js +206 -0
  65. package/src/utils/markdown-ansi.js +146 -0
@@ -0,0 +1,206 @@
1
+ /**
2
+ * kNN-based routing decision (Phase 3.1).
3
+ *
4
+ * Embeds the incoming query, finds the K nearest historical queries from the
5
+ * hnswlib-node index, and returns a confidence-weighted recommendation
6
+ * (model, expected quality, expected cost) based on those neighbors' actual
7
+ * outcomes from telemetry.
8
+ *
9
+ * Behavior:
10
+ * - Empty index → returns null. Caller falls back to heuristic router.
11
+ * - Sparse index (N < MIN_INDEX_SIZE) → returns null. Heuristic wins until
12
+ * we have enough data to be confident.
13
+ * - Embedder unavailable → returns null. Same fallback path.
14
+ *
15
+ * Bootstrap: scripts/build-knn-index.js (also accepts optional RouterBench
16
+ * corpus path to seed the index).
17
+ */
18
+
19
+ const fs = require('fs');
20
+ const path = require('path');
21
+ const logger = require('../logger');
22
+ const { generateEmbedding } = require('../cache/embeddings');
23
+ const { getEmbeddingCache } = require('./embedding-cache');
24
+
25
+ const INDEX_DIR = path.join(__dirname, '../../data/knn');
26
+ const INDEX_FILE = path.join(INDEX_DIR, 'index.hnsw');
27
+ const META_FILE = path.join(INDEX_DIR, 'meta.json');
28
+
29
+ const MAX_ELEMENTS = 50000;
30
+ const DIM = 768; // nomic-embed-text default
31
+ const K = 10;
32
+ const MIN_INDEX_SIZE = 1000;
33
+
34
+ let _hnsw = null;
35
+ let _hnswLoaded = false;
36
+ function _loadHnsw() {
37
+ if (_hnswLoaded) return _hnsw;
38
+ _hnswLoaded = true;
39
+ try {
40
+ _hnsw = require('hnswlib-node');
41
+ } catch (err) {
42
+ logger.debug({ err: err.message }, '[KnnRouter] hnswlib-node not available');
43
+ _hnsw = null;
44
+ }
45
+ return _hnsw;
46
+ }
47
+
48
+ class KnnRouter {
49
+ constructor() {
50
+ this.index = null;
51
+ this.meta = []; // parallel to index: per-id outcome { query, model, quality, cost, latency, tier }
52
+ this.size = 0;
53
+ this.dim = DIM;
54
+ this.ready = false;
55
+ }
56
+
57
+ load() {
58
+ const hnsw = _loadHnsw();
59
+ if (!hnsw) return false;
60
+ try {
61
+ if (!fs.existsSync(INDEX_FILE) || !fs.existsSync(META_FILE)) {
62
+ // Initialize empty index (caller can add() later)
63
+ this.index = new hnsw.HierarchicalNSW('cosine', this.dim);
64
+ this.index.initIndex(MAX_ELEMENTS);
65
+ this.meta = [];
66
+ this.size = 0;
67
+ this.ready = true;
68
+ return true;
69
+ }
70
+ const metaData = JSON.parse(fs.readFileSync(META_FILE, 'utf8'));
71
+ this.dim = metaData.dim || DIM;
72
+ this.meta = metaData.entries || [];
73
+ this.size = this.meta.length;
74
+ this.index = new hnsw.HierarchicalNSW('cosine', this.dim);
75
+ this.index.readIndexSync(INDEX_FILE, MAX_ELEMENTS);
76
+ this.ready = true;
77
+ logger.info({ size: this.size, dim: this.dim }, '[KnnRouter] Index loaded');
78
+ return true;
79
+ } catch (err) {
80
+ logger.warn({ err: err.message }, '[KnnRouter] Index load failed');
81
+ return false;
82
+ }
83
+ }
84
+
85
+ save() {
86
+ if (!this.ready || !this.index) return;
87
+ try {
88
+ fs.mkdirSync(INDEX_DIR, { recursive: true });
89
+ this.index.writeIndexSync(INDEX_FILE);
90
+ fs.writeFileSync(META_FILE, JSON.stringify({ dim: this.dim, entries: this.meta }, null, 0));
91
+ } catch (err) {
92
+ logger.warn({ err: err.message }, '[KnnRouter] Index save failed');
93
+ }
94
+ }
95
+
96
+ add(embedding, outcome) {
97
+ if (!this.ready || !this.index || !Array.isArray(embedding)) return;
98
+ if (this.size >= MAX_ELEMENTS) {
99
+ // Simple FIFO eviction: drop the oldest meta and reuse its id
100
+ // hnswlib doesn't support deletion in place; we just stop adding past max
101
+ return;
102
+ }
103
+ this.index.addPoint(embedding, this.size);
104
+ this.meta.push(outcome);
105
+ this.size++;
106
+ }
107
+
108
+ async query(text) {
109
+ if (!this.ready) this.load();
110
+ if (!this.ready || !this.index || this.size < MIN_INDEX_SIZE) return null;
111
+ if (!text || typeof text !== 'string') return null;
112
+
113
+ const cache = getEmbeddingCache();
114
+ let embedding = cache.get(text);
115
+ if (!embedding) {
116
+ try {
117
+ embedding = await generateEmbedding(text);
118
+ if (!embedding || embedding.length !== this.dim) {
119
+ // Skip if dim mismatch (embedder produced different dimensions)
120
+ return null;
121
+ }
122
+ cache.set(text, embedding);
123
+ } catch (err) {
124
+ logger.debug({ err: err.message }, '[KnnRouter] Embedding failed, skipping');
125
+ return null;
126
+ }
127
+ }
128
+
129
+ let result;
130
+ try {
131
+ result = this.index.searchKnn(embedding, K);
132
+ } catch (err) {
133
+ logger.debug({ err: err.message }, '[KnnRouter] Search failed');
134
+ return null;
135
+ }
136
+
137
+ const neighbors = (result.neighbors || []).map((id, i) => ({
138
+ id,
139
+ distance: result.distances?.[i] ?? 1,
140
+ outcome: this.meta[id],
141
+ })).filter(n => n.outcome);
142
+
143
+ if (neighbors.length === 0) return null;
144
+
145
+ // Confidence-weighted aggregation per candidate model.
146
+ // weight = 1 - distance (cosine distance → similarity)
147
+ const byModel = new Map();
148
+ for (const n of neighbors) {
149
+ const w = Math.max(0, 1 - n.distance);
150
+ const m = `${n.outcome.provider}:${n.outcome.model}`;
151
+ if (!byModel.has(m)) {
152
+ byModel.set(m, { weight: 0, quality: 0, cost: 0, latency: 0, count: 0, sample: n.outcome });
153
+ }
154
+ const agg = byModel.get(m);
155
+ agg.weight += w;
156
+ agg.quality += w * (n.outcome.quality || 50);
157
+ agg.cost += w * (n.outcome.cost || 0);
158
+ agg.latency += w * (n.outcome.latency || 0);
159
+ agg.count++;
160
+ }
161
+
162
+ let best = null;
163
+ let bestScore = -Infinity;
164
+ for (const [model, agg] of byModel) {
165
+ const avgQ = agg.quality / agg.weight;
166
+ const avgC = agg.cost / agg.weight;
167
+ // Score = quality / log(cost+1) — reward quality, penalise cost gently
168
+ const score = avgQ / Math.log(avgC * 1000 + 2);
169
+ if (score > bestScore) {
170
+ bestScore = score;
171
+ best = {
172
+ provider: agg.sample.provider,
173
+ model: agg.sample.model,
174
+ tier: agg.sample.tier,
175
+ expectedQuality: avgQ,
176
+ expectedCost: avgC,
177
+ expectedLatency: agg.latency / agg.weight,
178
+ confidence: Math.min(1, agg.weight / K),
179
+ neighborCount: agg.count,
180
+ };
181
+ }
182
+ }
183
+
184
+ return best;
185
+ }
186
+
187
+ getStats() {
188
+ return {
189
+ size: this.size,
190
+ maxElements: MAX_ELEMENTS,
191
+ ready: this.ready,
192
+ dim: this.dim,
193
+ };
194
+ }
195
+ }
196
+
197
+ let _instance = null;
198
+ function getKnnRouter() {
199
+ if (!_instance) {
200
+ _instance = new KnnRouter();
201
+ _instance.load();
202
+ }
203
+ return _instance;
204
+ }
205
+
206
+ module.exports = { KnnRouter, getKnnRouter };
@@ -1,80 +1,78 @@
1
1
  /**
2
- * Rolling Latency Tracker
2
+ * Rolling Latency Tracker (per provider:model)
3
3
  *
4
- * Tracks per-provider latency using circular buffers to provide
5
- * P50/P95/P99 percentile statistics for routing decisions.
4
+ * Tracks latency keyed by `${provider}:${model}` so models within a provider
5
+ * (Opus vs Haiku) get separate stats. Backward-compatible: callers that pass
6
+ * only a provider still work — they're tracked under `${provider}:*`.
7
+ *
8
+ * Phase 1.5 of the routing overhaul: previous version keyed by provider only.
6
9
  *
7
10
  * @module routing/latency-tracker
8
11
  */
9
12
 
10
13
  const logger = require("../logger");
11
14
 
12
- /** Size of the circular buffer per provider */
13
15
  const BUFFER_SIZE = 200;
14
-
15
- /** Minimum sample count before penalizeScore returns a meaningful value */
16
16
  const MIN_SAMPLES = 10;
17
17
 
18
- /**
19
- * @typedef {Object} LatencyStats
20
- * @property {number} p50 - 50th percentile latency (ms)
21
- * @property {number} p95 - 95th percentile latency (ms)
22
- * @property {number} p99 - 99th percentile latency (ms)
23
- * @property {number} avg - Average latency (ms)
24
- * @property {number} count - Total measurements recorded
25
- * @property {number} lastUpdated - Timestamp of the last recorded measurement
26
- */
18
+ /** Wildcard model used when caller doesn't specify one. */
19
+ const ANY_MODEL = '*';
20
+
21
+ function _key(provider, model) {
22
+ return `${provider}:${model || ANY_MODEL}`;
23
+ }
27
24
 
28
25
  class LatencyTracker {
29
26
  constructor() {
30
- /** @type {Map<string, { buffer: number[], index: number, count: number, lastUpdated: number }>} */
31
- this._providers = new Map();
27
+ /** @type {Map<string, { buffer: number[], index: number, count: number, lastUpdated: number, provider: string, model: string }>} */
28
+ this._entries = new Map();
32
29
  }
33
30
 
34
31
  /**
35
- * Record a latency measurement for a provider.
36
- * @param {string} provider - Provider name (e.g. "databricks", "ollama")
37
- * @param {number} latencyMs - Measured latency in milliseconds
32
+ * Record a latency measurement.
33
+ *
34
+ * Signatures:
35
+ * record(provider, latencyMs) // legacy
36
+ * record(provider, model, latencyMs) // preferred
38
37
  */
39
- record(provider, latencyMs) {
40
- if (!provider || typeof latencyMs !== "number" || latencyMs < 0) {
41
- return;
38
+ record(provider, modelOrLatency, maybeLatency) {
39
+ let model;
40
+ let latencyMs;
41
+ if (typeof modelOrLatency === 'number') {
42
+ model = ANY_MODEL;
43
+ latencyMs = modelOrLatency;
44
+ } else {
45
+ model = modelOrLatency || ANY_MODEL;
46
+ latencyMs = maybeLatency;
42
47
  }
43
48
 
44
- let entry = this._providers.get(provider);
49
+ if (!provider || typeof latencyMs !== "number" || latencyMs < 0) return;
50
+
51
+ const k = _key(provider, model);
52
+ let entry = this._entries.get(k);
45
53
  if (!entry) {
46
54
  entry = {
47
55
  buffer: new Array(BUFFER_SIZE).fill(0),
48
56
  index: 0,
49
57
  count: 0,
50
58
  lastUpdated: 0,
59
+ provider,
60
+ model,
51
61
  };
52
- this._providers.set(provider, entry);
62
+ this._entries.set(k, entry);
53
63
  }
54
-
55
64
  entry.buffer[entry.index] = latencyMs;
56
65
  entry.index = (entry.index + 1) % BUFFER_SIZE;
57
66
  entry.count += 1;
58
67
  entry.lastUpdated = Date.now();
59
68
  }
60
69
 
61
- /**
62
- * Get latency statistics for a specific provider.
63
- * @param {string} provider - Provider name
64
- * @returns {LatencyStats|null} Statistics or null if no data
65
- */
66
- getStats(provider) {
67
- const entry = this._providers.get(provider);
68
- if (!entry || entry.count === 0) {
69
- return null;
70
- }
71
-
70
+ _computeStats(entry) {
71
+ if (!entry || entry.count === 0) return null;
72
72
  const sampleCount = Math.min(entry.count, BUFFER_SIZE);
73
73
  const samples = entry.buffer.slice(0, sampleCount);
74
74
  const sorted = samples.slice().sort((a, b) => a - b);
75
-
76
75
  const sum = sorted.reduce((acc, v) => acc + v, 0);
77
-
78
76
  return {
79
77
  p50: sorted[Math.floor(sampleCount * 0.5)],
80
78
  p95: sorted[Math.floor(sampleCount * 0.95)],
@@ -82,61 +80,105 @@ class LatencyTracker {
82
80
  avg: Math.round(sum / sampleCount),
83
81
  count: entry.count,
84
82
  lastUpdated: entry.lastUpdated,
83
+ provider: entry.provider,
84
+ model: entry.model,
85
85
  };
86
86
  }
87
87
 
88
88
  /**
89
- * Calculate a routing score penalty/bonus based on provider latency.
90
- *
91
- * Returns a value from -5 to +10 that can be added to a routing score:
92
- * +10 if P95 > 10000ms (very slow, penalise by boosting complexity toward cloud)
93
- * +5 if P95 > 5000ms
94
- * -5 if P50 < 1000ms (fast, reward)
95
- * 0 otherwise or if insufficient data
96
- *
97
- * @param {string} provider - Provider name
98
- * @returns {number} Score adjustment (-5 to +10)
89
+ * Get stats for a specific (provider, model) pair, or aggregated for a provider
90
+ * if model is omitted.
99
91
  */
100
- penalizeScore(provider) {
101
- const stats = this.getStats(provider);
102
- if (!stats || stats.count < MIN_SAMPLES) {
103
- return 0;
92
+ getStats(provider, model = null) {
93
+ if (model) {
94
+ return this._computeStats(this._entries.get(_key(provider, model)));
95
+ }
96
+ // Aggregate across all models for this provider
97
+ const provEntries = [];
98
+ for (const [k, entry] of this._entries) {
99
+ if (entry.provider === provider) provEntries.push(entry);
104
100
  }
101
+ if (provEntries.length === 0) return null;
102
+ if (provEntries.length === 1) return this._computeStats(provEntries[0]);
103
+
104
+ // Pool samples across model entries to compute combined percentiles
105
+ const pooled = [];
106
+ let total = 0;
107
+ let lastUpdated = 0;
108
+ for (const e of provEntries) {
109
+ const n = Math.min(e.count, BUFFER_SIZE);
110
+ for (let i = 0; i < n; i++) pooled.push(e.buffer[i]);
111
+ total += e.count;
112
+ if (e.lastUpdated > lastUpdated) lastUpdated = e.lastUpdated;
113
+ }
114
+ if (pooled.length === 0) return null;
115
+ pooled.sort((a, b) => a - b);
116
+ const sum = pooled.reduce((acc, v) => acc + v, 0);
117
+ return {
118
+ p50: pooled[Math.floor(pooled.length * 0.5)],
119
+ p95: pooled[Math.floor(pooled.length * 0.95)],
120
+ p99: pooled[Math.floor(pooled.length * 0.99)],
121
+ avg: Math.round(sum / pooled.length),
122
+ count: total,
123
+ lastUpdated,
124
+ provider,
125
+ model: ANY_MODEL,
126
+ };
127
+ }
105
128
 
129
+ /** Latency penalty/bonus used by complexity-analyzer. */
130
+ penalizeScore(provider, model = null) {
131
+ const stats = this.getStats(provider, model);
132
+ if (!stats || stats.count < MIN_SAMPLES) return 0;
106
133
  if (stats.p95 > 10000) return 10;
107
134
  if (stats.p95 > 5000) return 5;
108
135
  if (stats.p50 < 1000) return -5;
109
-
110
136
  return 0;
111
137
  }
112
138
 
113
139
  /**
114
- * Get statistics for all tracked providers.
115
- * @returns {Map<string, LatencyStats>}
140
+ * Phase 1.5: per-model P95 lookup for deadline-aware routing (Phase 6.3).
141
+ * Returns null if insufficient samples.
142
+ */
143
+ getModelP95(provider, model) {
144
+ const stats = this.getStats(provider, model);
145
+ if (!stats || stats.count < MIN_SAMPLES) return null;
146
+ return stats.p95;
147
+ }
148
+
149
+ /**
150
+ * Whether a model is currently degraded (P95 > 2x its historical median).
151
+ * Currently uses a simple absolute threshold — better signal will come in
152
+ * Phase 4.3 (drift detection).
153
+ */
154
+ isDegraded(provider, model) {
155
+ const stats = this.getStats(provider, model);
156
+ if (!stats || stats.count < MIN_SAMPLES) return false;
157
+ return stats.p95 > stats.p50 * 2 && stats.p95 > 5000;
158
+ }
159
+
160
+ /**
161
+ * Get stats for every tracked entry.
162
+ *
163
+ * Backward-compat: when an entry was recorded via the legacy 2-arg
164
+ * `record(provider, latency)` signature, the model is the wildcard `*`
165
+ * and we return it keyed by provider name only. Entries with explicit
166
+ * models use the `provider:model` key.
116
167
  */
117
168
  getAllStats() {
118
169
  const result = new Map();
119
- for (const provider of this._providers.keys()) {
120
- const stats = this.getStats(provider);
121
- if (stats) {
122
- result.set(provider, stats);
123
- }
170
+ for (const [k, entry] of this._entries) {
171
+ const stats = this._computeStats(entry);
172
+ if (!stats) continue;
173
+ const outKey = entry.model === ANY_MODEL ? entry.provider : k;
174
+ result.set(outKey, stats);
124
175
  }
125
176
  return result;
126
177
  }
127
178
  }
128
179
 
129
- // ---------------------------------------------------------------------------
130
- // Singleton
131
- // ---------------------------------------------------------------------------
132
-
133
- /** @type {LatencyTracker|null} */
134
180
  let instance = null;
135
181
 
136
- /**
137
- * Get the singleton LatencyTracker instance.
138
- * @returns {LatencyTracker}
139
- */
140
182
  function getLatencyTracker() {
141
183
  if (!instance) {
142
184
  instance = new LatencyTracker();
@@ -145,4 +187,4 @@ function getLatencyTracker() {
145
187
  return instance;
146
188
  }
147
189
 
148
- module.exports = { LatencyTracker, getLatencyTracker };
190
+ module.exports = { LatencyTracker, getLatencyTracker, ANY_MODEL };
@@ -12,7 +12,10 @@ const config = require('../config');
12
12
  // Load tier config
13
13
  const TIER_CONFIG_PATH = path.join(__dirname, '../../config/model-tiers.json');
14
14
 
15
- // Tier definitions with complexity ranges
15
+ // Phase 1.4: calibrated thresholds (written by scripts/calibrate-thresholds.js)
16
+ const CALIBRATED_PATH = path.join(__dirname, '../../data/calibrated-thresholds.json');
17
+
18
+ // Tier definitions with complexity ranges (defaults; may be overridden by calibration)
16
19
  const TIER_DEFINITIONS = {
17
20
  SIMPLE: {
18
21
  description: 'Greetings, simple Q&A, confirmations',
@@ -41,13 +44,30 @@ class ModelTierSelector {
41
44
  this.tierConfig = null;
42
45
  this.localProviders = {};
43
46
  this.providerAliases = {};
47
+ /** Per-tier ranges, possibly overridden by calibration. */
48
+ this.ranges = null;
44
49
  this._loadConfig();
50
+ this._loadCalibrated();
45
51
  }
46
52
 
47
53
  /**
48
54
  * Load tier configuration from JSON file
49
55
  */
50
56
  _loadConfig() {
57
+ // Check if tier routing mode is active (all 4 TIER_* env vars set)
58
+ const tierRoutingMode = !!(
59
+ config.modelTiers?.SIMPLE?.trim() &&
60
+ config.modelTiers?.MEDIUM?.trim() &&
61
+ config.modelTiers?.COMPLEX?.trim() &&
62
+ config.modelTiers?.REASONING?.trim()
63
+ );
64
+
65
+ if (tierRoutingMode) {
66
+ logger.debug('[ModelTiers] Tier routing mode active, building config from TIER_* env vars');
67
+ this._buildFromEnvVars();
68
+ return;
69
+ }
70
+
51
71
  try {
52
72
  if (fs.existsSync(TIER_CONFIG_PATH)) {
53
73
  const data = JSON.parse(fs.readFileSync(TIER_CONFIG_PATH, 'utf8'));
@@ -65,9 +85,86 @@ class ModelTierSelector {
65
85
  }
66
86
  }
67
87
 
88
+ /**
89
+ * Phase 1.4: load calibrated tier thresholds if the nightly job has produced them.
90
+ * Falls back silently to TIER_DEFINITIONS when absent or malformed.
91
+ */
92
+ _loadCalibrated() {
93
+ this.ranges = this._defaultRanges();
94
+ try {
95
+ if (!fs.existsSync(CALIBRATED_PATH)) return;
96
+ const data = JSON.parse(fs.readFileSync(CALIBRATED_PATH, 'utf8'));
97
+ if (!data?.ranges) return;
98
+ const calibrated = {};
99
+ for (const tier of Object.keys(TIER_DEFINITIONS)) {
100
+ const r = data.ranges[tier];
101
+ if (Array.isArray(r) && r.length === 2 && r[0] <= r[1]) {
102
+ calibrated[tier] = r;
103
+ } else {
104
+ calibrated[tier] = TIER_DEFINITIONS[tier].range;
105
+ }
106
+ }
107
+ this.ranges = calibrated;
108
+ logger.info({ ranges: this.ranges, calibratedAt: data.calibratedAt }, '[ModelTiers] Using calibrated thresholds');
109
+ } catch (err) {
110
+ logger.debug({ err: err.message }, '[ModelTiers] Calibrated thresholds load failed; using defaults');
111
+ }
112
+ }
113
+
114
+ _defaultRanges() {
115
+ const ranges = {};
116
+ for (const [tier, def] of Object.entries(TIER_DEFINITIONS)) {
117
+ ranges[tier] = def.range.slice();
118
+ }
119
+ return ranges;
120
+ }
121
+
68
122
  /**
69
123
  * Load default tier config
70
124
  */
125
+ /**
126
+ * Build tier config from TIER_* environment variables
127
+ * Format: TIER_SIMPLE=provider:model
128
+ */
129
+ _buildFromEnvVars() {
130
+ this.tierConfig = {};
131
+ this.localProviders = {
132
+ ollama: { free: true, defaultTier: 'SIMPLE' },
133
+ llamacpp: { free: true, defaultTier: 'SIMPLE' },
134
+ lmstudio: { free: true, defaultTier: 'SIMPLE' },
135
+ mlx: { free: true, defaultTier: 'SIMPLE' },
136
+ };
137
+
138
+ const tiers = ['SIMPLE', 'MEDIUM', 'COMPLEX', 'REASONING'];
139
+ for (const tier of tiers) {
140
+ const envValue = config.modelTiers?.[tier]?.trim();
141
+ if (!envValue) continue;
142
+
143
+ // Parse provider:model format
144
+ const match = envValue.match(/^([a-z-]+):(.+)$/);
145
+ if (!match) {
146
+ logger.warn({ tier, value: envValue }, '[ModelTiers] Invalid TIER format, expected provider:model');
147
+ continue;
148
+ }
149
+
150
+ const [, provider, model] = match;
151
+
152
+ // Initialize tier config if not exists
153
+ if (!this.tierConfig[tier]) {
154
+ this.tierConfig[tier] = { preferred: {} };
155
+ }
156
+
157
+ // Set this as the ONLY preferred model for this tier+provider
158
+ this.tierConfig[tier].preferred[provider] = [model];
159
+
160
+ logger.debug({
161
+ tier,
162
+ provider,
163
+ model
164
+ }, '[ModelTiers] Tier configured from env');
165
+ }
166
+ }
167
+
71
168
  _loadDefaults() {
72
169
  this.tierConfig = {
73
170
  SIMPLE: { preferred: { ollama: ['llama3.2'], openai: ['gpt-4o-mini'] } },
@@ -92,20 +189,73 @@ class ModelTierSelector {
92
189
  }
93
190
 
94
191
  /**
95
- * Get tier from complexity score
192
+ * Get tier from complexity score.
193
+ * Phase 1.4: honors calibrated ranges when present.
96
194
  * @param {number} complexityScore - Score from 0-100
97
195
  * @returns {string} Tier name (SIMPLE, MEDIUM, COMPLEX, REASONING)
98
196
  */
99
197
  getTier(complexityScore) {
100
198
  const score = Math.max(0, Math.min(100, complexityScore || 0));
199
+ const ranges = this.ranges || this._defaultRanges();
200
+ for (const tier of Object.keys(TIER_DEFINITIONS)) {
201
+ const [lo, hi] = ranges[tier];
202
+ if (score >= lo && score <= hi) return tier;
203
+ }
204
+ return score > 75 ? 'REASONING' : 'SIMPLE';
205
+ }
101
206
 
102
- for (const [tier, def] of Object.entries(TIER_DEFINITIONS)) {
103
- if (score >= def.range[0] && score <= def.range[1]) {
104
- return tier;
207
+ /**
208
+ * Phase 1.3: find a model with at least `minContext` context window.
209
+ * Returns null when no qualifying model is available.
210
+ */
211
+ findContextCapable(minContext, preferredTier = null) {
212
+ const { getModelRegistrySync } = require('./model-registry');
213
+ const registry = getModelRegistrySync();
214
+ const tierOrder = preferredTier
215
+ ? [preferredTier, 'REASONING', 'COMPLEX', 'MEDIUM', 'SIMPLE']
216
+ : ['REASONING', 'COMPLEX', 'MEDIUM', 'SIMPLE'];
217
+ const seen = new Set();
218
+ for (const tier of tierOrder) {
219
+ if (seen.has(tier)) continue;
220
+ seen.add(tier);
221
+ const tierConfig = this.tierConfig[tier];
222
+ if (!tierConfig?.preferred) continue;
223
+ for (const [provider, models] of Object.entries(tierConfig.preferred)) {
224
+ for (const model of models) {
225
+ const cost = registry.getCost(model);
226
+ if (cost?.context && cost.context >= minContext) {
227
+ return { provider, model, tier, context: cost.context };
228
+ }
229
+ }
105
230
  }
106
231
  }
232
+ return null;
233
+ }
107
234
 
108
- return score > 75 ? 'REASONING' : 'SIMPLE';
235
+ /**
236
+ * Find a vision-capable model at or above `preferredTier`.
237
+ * Walks tier order from preferred upward; returns null when none available.
238
+ */
239
+ findVisionCapable(preferredTier = null) {
240
+ const { getModelRegistrySync } = require('./model-registry');
241
+ const registry = getModelRegistrySync();
242
+ const tierOrder = preferredTier
243
+ ? [preferredTier, 'COMPLEX', 'REASONING', 'MEDIUM', 'SIMPLE']
244
+ : ['COMPLEX', 'REASONING', 'MEDIUM', 'SIMPLE'];
245
+ const seen = new Set();
246
+ for (const t of tierOrder) {
247
+ if (seen.has(t)) continue;
248
+ seen.add(t);
249
+ const tierConfig = this.tierConfig[t];
250
+ if (!tierConfig?.preferred) continue;
251
+ for (const [provider, models] of Object.entries(tierConfig.preferred)) {
252
+ for (const model of models) {
253
+ const info = registry.getCost(model);
254
+ if (info?.vision) return { provider, model, tier: t };
255
+ }
256
+ }
257
+ }
258
+ return null;
109
259
  }
110
260
 
111
261
  /**