lynkr 9.0.2 → 9.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -10
- package/bin/cli.js +18 -1
- package/bin/lynkr-trajectory.js +136 -0
- package/bin/lynkr-usage.js +219 -0
- package/funding.json +110 -0
- package/package.json +4 -2
- package/public/dashboard.html +665 -0
- package/scripts/build-knn-index.js +130 -0
- package/scripts/calibrate-thresholds.js +197 -0
- package/scripts/compare-policies.js +67 -0
- package/scripts/learn-output-ratios.js +162 -0
- package/scripts/refresh-pricing.js +122 -0
- package/scripts/run-routerarena.js +26 -0
- package/scripts/sample-regret.js +84 -0
- package/scripts/train-risk-classifier.js +191 -0
- package/src/api/files-router.js +6 -6
- package/src/api/middleware/budget-enforcer.js +60 -0
- package/src/api/middleware/budget.js +19 -1
- package/src/api/middleware/load-shedding.js +17 -0
- package/src/api/middleware/tenant.js +21 -0
- package/src/api/openai-router.js +1 -1
- package/src/api/router.js +204 -87
- package/src/budget/hierarchical-budget.js +159 -0
- package/src/cache/semantic.js +28 -2
- package/src/clients/databricks.js +68 -10
- package/src/clients/openai-format.js +31 -5
- package/src/config/index.js +246 -43
- package/src/context/toon.js +5 -4
- package/src/dashboard/api.js +170 -0
- package/src/dashboard/router.js +13 -0
- package/src/headroom/client.js +3 -109
- package/src/headroom/index.js +0 -14
- package/src/memory/search.js +0 -50
- package/src/orchestrator/index.js +106 -11
- package/src/orchestrator/preflight.js +188 -0
- package/src/prompts/system.js +34 -6
- package/src/routing/bandit.js +246 -0
- package/src/routing/cascade.js +106 -0
- package/src/routing/complexity-analyzer.js +7 -15
- package/src/routing/confidence-scorer.js +121 -0
- package/src/routing/context-validator.js +71 -0
- package/src/routing/cost-optimizer.js +5 -2
- package/src/routing/deadline.js +52 -0
- package/src/routing/drift-monitor.js +113 -0
- package/src/routing/embedding-cache.js +77 -0
- package/src/routing/index.js +374 -4
- package/src/routing/interaction.js +183 -0
- package/src/routing/knn-router.js +206 -0
- package/src/routing/latency-tracker.js +113 -71
- package/src/routing/model-tiers.js +156 -6
- package/src/routing/output-ratios.js +57 -0
- package/src/routing/regret-estimator.js +91 -0
- package/src/routing/reward-pipeline.js +62 -0
- package/src/routing/risk-analyzer.js +194 -0
- package/src/routing/risk-classifier.js +130 -0
- package/src/routing/shadow-mode.js +77 -0
- package/src/routing/telemetry.js +7 -0
- package/src/routing/tenant-policy.js +96 -0
- package/src/routing/tokenizer.js +162 -0
- package/src/server.js +12 -0
- package/src/stores/file-store.js +42 -7
- package/src/tools/smart-selection.js +11 -2
- package/src/training/trajectory-compressor.js +266 -0
- package/src/usage/aggregator.js +206 -0
- package/src/utils/markdown-ansi.js +146 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* kNN-based routing decision (Phase 3.1).
|
|
3
|
+
*
|
|
4
|
+
* Embeds the incoming query, finds the K nearest historical queries from the
|
|
5
|
+
* hnswlib-node index, and returns a confidence-weighted recommendation
|
|
6
|
+
* (model, expected quality, expected cost) based on those neighbors' actual
|
|
7
|
+
* outcomes from telemetry.
|
|
8
|
+
*
|
|
9
|
+
* Behavior:
|
|
10
|
+
* - Empty index → returns null. Caller falls back to heuristic router.
|
|
11
|
+
* - Sparse index (N < MIN_INDEX_SIZE) → returns null. Heuristic wins until
|
|
12
|
+
* we have enough data to be confident.
|
|
13
|
+
* - Embedder unavailable → returns null. Same fallback path.
|
|
14
|
+
*
|
|
15
|
+
* Bootstrap: scripts/build-knn-index.js (also accepts optional RouterBench
|
|
16
|
+
* corpus path to seed the index).
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
const fs = require('fs');
|
|
20
|
+
const path = require('path');
|
|
21
|
+
const logger = require('../logger');
|
|
22
|
+
const { generateEmbedding } = require('../cache/embeddings');
|
|
23
|
+
const { getEmbeddingCache } = require('./embedding-cache');
|
|
24
|
+
|
|
25
|
+
const INDEX_DIR = path.join(__dirname, '../../data/knn');
|
|
26
|
+
const INDEX_FILE = path.join(INDEX_DIR, 'index.hnsw');
|
|
27
|
+
const META_FILE = path.join(INDEX_DIR, 'meta.json');
|
|
28
|
+
|
|
29
|
+
const MAX_ELEMENTS = 50000;
|
|
30
|
+
const DIM = 768; // nomic-embed-text default
|
|
31
|
+
const K = 10;
|
|
32
|
+
const MIN_INDEX_SIZE = 1000;
|
|
33
|
+
|
|
34
|
+
let _hnsw = null;
|
|
35
|
+
let _hnswLoaded = false;
|
|
36
|
+
function _loadHnsw() {
|
|
37
|
+
if (_hnswLoaded) return _hnsw;
|
|
38
|
+
_hnswLoaded = true;
|
|
39
|
+
try {
|
|
40
|
+
_hnsw = require('hnswlib-node');
|
|
41
|
+
} catch (err) {
|
|
42
|
+
logger.debug({ err: err.message }, '[KnnRouter] hnswlib-node not available');
|
|
43
|
+
_hnsw = null;
|
|
44
|
+
}
|
|
45
|
+
return _hnsw;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
class KnnRouter {
|
|
49
|
+
constructor() {
|
|
50
|
+
this.index = null;
|
|
51
|
+
this.meta = []; // parallel to index: per-id outcome { query, model, quality, cost, latency, tier }
|
|
52
|
+
this.size = 0;
|
|
53
|
+
this.dim = DIM;
|
|
54
|
+
this.ready = false;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
load() {
|
|
58
|
+
const hnsw = _loadHnsw();
|
|
59
|
+
if (!hnsw) return false;
|
|
60
|
+
try {
|
|
61
|
+
if (!fs.existsSync(INDEX_FILE) || !fs.existsSync(META_FILE)) {
|
|
62
|
+
// Initialize empty index (caller can add() later)
|
|
63
|
+
this.index = new hnsw.HierarchicalNSW('cosine', this.dim);
|
|
64
|
+
this.index.initIndex(MAX_ELEMENTS);
|
|
65
|
+
this.meta = [];
|
|
66
|
+
this.size = 0;
|
|
67
|
+
this.ready = true;
|
|
68
|
+
return true;
|
|
69
|
+
}
|
|
70
|
+
const metaData = JSON.parse(fs.readFileSync(META_FILE, 'utf8'));
|
|
71
|
+
this.dim = metaData.dim || DIM;
|
|
72
|
+
this.meta = metaData.entries || [];
|
|
73
|
+
this.size = this.meta.length;
|
|
74
|
+
this.index = new hnsw.HierarchicalNSW('cosine', this.dim);
|
|
75
|
+
this.index.readIndexSync(INDEX_FILE, MAX_ELEMENTS);
|
|
76
|
+
this.ready = true;
|
|
77
|
+
logger.info({ size: this.size, dim: this.dim }, '[KnnRouter] Index loaded');
|
|
78
|
+
return true;
|
|
79
|
+
} catch (err) {
|
|
80
|
+
logger.warn({ err: err.message }, '[KnnRouter] Index load failed');
|
|
81
|
+
return false;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
save() {
|
|
86
|
+
if (!this.ready || !this.index) return;
|
|
87
|
+
try {
|
|
88
|
+
fs.mkdirSync(INDEX_DIR, { recursive: true });
|
|
89
|
+
this.index.writeIndexSync(INDEX_FILE);
|
|
90
|
+
fs.writeFileSync(META_FILE, JSON.stringify({ dim: this.dim, entries: this.meta }, null, 0));
|
|
91
|
+
} catch (err) {
|
|
92
|
+
logger.warn({ err: err.message }, '[KnnRouter] Index save failed');
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
add(embedding, outcome) {
|
|
97
|
+
if (!this.ready || !this.index || !Array.isArray(embedding)) return;
|
|
98
|
+
if (this.size >= MAX_ELEMENTS) {
|
|
99
|
+
// Simple FIFO eviction: drop the oldest meta and reuse its id
|
|
100
|
+
// hnswlib doesn't support deletion in place; we just stop adding past max
|
|
101
|
+
return;
|
|
102
|
+
}
|
|
103
|
+
this.index.addPoint(embedding, this.size);
|
|
104
|
+
this.meta.push(outcome);
|
|
105
|
+
this.size++;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async query(text) {
|
|
109
|
+
if (!this.ready) this.load();
|
|
110
|
+
if (!this.ready || !this.index || this.size < MIN_INDEX_SIZE) return null;
|
|
111
|
+
if (!text || typeof text !== 'string') return null;
|
|
112
|
+
|
|
113
|
+
const cache = getEmbeddingCache();
|
|
114
|
+
let embedding = cache.get(text);
|
|
115
|
+
if (!embedding) {
|
|
116
|
+
try {
|
|
117
|
+
embedding = await generateEmbedding(text);
|
|
118
|
+
if (!embedding || embedding.length !== this.dim) {
|
|
119
|
+
// Skip if dim mismatch (embedder produced different dimensions)
|
|
120
|
+
return null;
|
|
121
|
+
}
|
|
122
|
+
cache.set(text, embedding);
|
|
123
|
+
} catch (err) {
|
|
124
|
+
logger.debug({ err: err.message }, '[KnnRouter] Embedding failed, skipping');
|
|
125
|
+
return null;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
let result;
|
|
130
|
+
try {
|
|
131
|
+
result = this.index.searchKnn(embedding, K);
|
|
132
|
+
} catch (err) {
|
|
133
|
+
logger.debug({ err: err.message }, '[KnnRouter] Search failed');
|
|
134
|
+
return null;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
const neighbors = (result.neighbors || []).map((id, i) => ({
|
|
138
|
+
id,
|
|
139
|
+
distance: result.distances?.[i] ?? 1,
|
|
140
|
+
outcome: this.meta[id],
|
|
141
|
+
})).filter(n => n.outcome);
|
|
142
|
+
|
|
143
|
+
if (neighbors.length === 0) return null;
|
|
144
|
+
|
|
145
|
+
// Confidence-weighted aggregation per candidate model.
|
|
146
|
+
// weight = 1 - distance (cosine distance → similarity)
|
|
147
|
+
const byModel = new Map();
|
|
148
|
+
for (const n of neighbors) {
|
|
149
|
+
const w = Math.max(0, 1 - n.distance);
|
|
150
|
+
const m = `${n.outcome.provider}:${n.outcome.model}`;
|
|
151
|
+
if (!byModel.has(m)) {
|
|
152
|
+
byModel.set(m, { weight: 0, quality: 0, cost: 0, latency: 0, count: 0, sample: n.outcome });
|
|
153
|
+
}
|
|
154
|
+
const agg = byModel.get(m);
|
|
155
|
+
agg.weight += w;
|
|
156
|
+
agg.quality += w * (n.outcome.quality || 50);
|
|
157
|
+
agg.cost += w * (n.outcome.cost || 0);
|
|
158
|
+
agg.latency += w * (n.outcome.latency || 0);
|
|
159
|
+
agg.count++;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
let best = null;
|
|
163
|
+
let bestScore = -Infinity;
|
|
164
|
+
for (const [model, agg] of byModel) {
|
|
165
|
+
const avgQ = agg.quality / agg.weight;
|
|
166
|
+
const avgC = agg.cost / agg.weight;
|
|
167
|
+
// Score = quality / log(cost+1) — reward quality, penalise cost gently
|
|
168
|
+
const score = avgQ / Math.log(avgC * 1000 + 2);
|
|
169
|
+
if (score > bestScore) {
|
|
170
|
+
bestScore = score;
|
|
171
|
+
best = {
|
|
172
|
+
provider: agg.sample.provider,
|
|
173
|
+
model: agg.sample.model,
|
|
174
|
+
tier: agg.sample.tier,
|
|
175
|
+
expectedQuality: avgQ,
|
|
176
|
+
expectedCost: avgC,
|
|
177
|
+
expectedLatency: agg.latency / agg.weight,
|
|
178
|
+
confidence: Math.min(1, agg.weight / K),
|
|
179
|
+
neighborCount: agg.count,
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return best;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
getStats() {
|
|
188
|
+
return {
|
|
189
|
+
size: this.size,
|
|
190
|
+
maxElements: MAX_ELEMENTS,
|
|
191
|
+
ready: this.ready,
|
|
192
|
+
dim: this.dim,
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
let _instance = null;
|
|
198
|
+
function getKnnRouter() {
|
|
199
|
+
if (!_instance) {
|
|
200
|
+
_instance = new KnnRouter();
|
|
201
|
+
_instance.load();
|
|
202
|
+
}
|
|
203
|
+
return _instance;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
module.exports = { KnnRouter, getKnnRouter };
|
|
@@ -1,80 +1,78 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Rolling Latency Tracker
|
|
2
|
+
* Rolling Latency Tracker (per provider:model)
|
|
3
3
|
*
|
|
4
|
-
* Tracks
|
|
5
|
-
*
|
|
4
|
+
* Tracks latency keyed by `${provider}:${model}` so models within a provider
|
|
5
|
+
* (Opus vs Haiku) get separate stats. Backward-compatible: callers that pass
|
|
6
|
+
* only a provider still work — they're tracked under `${provider}:*`.
|
|
7
|
+
*
|
|
8
|
+
* Phase 1.5 of the routing overhaul: previous version keyed by provider only.
|
|
6
9
|
*
|
|
7
10
|
* @module routing/latency-tracker
|
|
8
11
|
*/
|
|
9
12
|
|
|
10
13
|
const logger = require("../logger");
|
|
11
14
|
|
|
12
|
-
/** Size of the circular buffer per provider */
|
|
13
15
|
const BUFFER_SIZE = 200;
|
|
14
|
-
|
|
15
|
-
/** Minimum sample count before penalizeScore returns a meaningful value */
|
|
16
16
|
const MIN_SAMPLES = 10;
|
|
17
17
|
|
|
18
|
-
/**
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
* @property {number} count - Total measurements recorded
|
|
25
|
-
* @property {number} lastUpdated - Timestamp of the last recorded measurement
|
|
26
|
-
*/
|
|
18
|
+
/** Wildcard model used when caller doesn't specify one. */
|
|
19
|
+
const ANY_MODEL = '*';
|
|
20
|
+
|
|
21
|
+
function _key(provider, model) {
|
|
22
|
+
return `${provider}:${model || ANY_MODEL}`;
|
|
23
|
+
}
|
|
27
24
|
|
|
28
25
|
class LatencyTracker {
|
|
29
26
|
constructor() {
|
|
30
|
-
/** @type {Map<string, { buffer: number[], index: number, count: number, lastUpdated: number }>} */
|
|
31
|
-
this.
|
|
27
|
+
/** @type {Map<string, { buffer: number[], index: number, count: number, lastUpdated: number, provider: string, model: string }>} */
|
|
28
|
+
this._entries = new Map();
|
|
32
29
|
}
|
|
33
30
|
|
|
34
31
|
/**
|
|
35
|
-
* Record a latency measurement
|
|
36
|
-
*
|
|
37
|
-
*
|
|
32
|
+
* Record a latency measurement.
|
|
33
|
+
*
|
|
34
|
+
* Signatures:
|
|
35
|
+
* record(provider, latencyMs) // legacy
|
|
36
|
+
* record(provider, model, latencyMs) // preferred
|
|
38
37
|
*/
|
|
39
|
-
record(provider,
|
|
40
|
-
|
|
41
|
-
|
|
38
|
+
record(provider, modelOrLatency, maybeLatency) {
|
|
39
|
+
let model;
|
|
40
|
+
let latencyMs;
|
|
41
|
+
if (typeof modelOrLatency === 'number') {
|
|
42
|
+
model = ANY_MODEL;
|
|
43
|
+
latencyMs = modelOrLatency;
|
|
44
|
+
} else {
|
|
45
|
+
model = modelOrLatency || ANY_MODEL;
|
|
46
|
+
latencyMs = maybeLatency;
|
|
42
47
|
}
|
|
43
48
|
|
|
44
|
-
|
|
49
|
+
if (!provider || typeof latencyMs !== "number" || latencyMs < 0) return;
|
|
50
|
+
|
|
51
|
+
const k = _key(provider, model);
|
|
52
|
+
let entry = this._entries.get(k);
|
|
45
53
|
if (!entry) {
|
|
46
54
|
entry = {
|
|
47
55
|
buffer: new Array(BUFFER_SIZE).fill(0),
|
|
48
56
|
index: 0,
|
|
49
57
|
count: 0,
|
|
50
58
|
lastUpdated: 0,
|
|
59
|
+
provider,
|
|
60
|
+
model,
|
|
51
61
|
};
|
|
52
|
-
this.
|
|
62
|
+
this._entries.set(k, entry);
|
|
53
63
|
}
|
|
54
|
-
|
|
55
64
|
entry.buffer[entry.index] = latencyMs;
|
|
56
65
|
entry.index = (entry.index + 1) % BUFFER_SIZE;
|
|
57
66
|
entry.count += 1;
|
|
58
67
|
entry.lastUpdated = Date.now();
|
|
59
68
|
}
|
|
60
69
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
* @param {string} provider - Provider name
|
|
64
|
-
* @returns {LatencyStats|null} Statistics or null if no data
|
|
65
|
-
*/
|
|
66
|
-
getStats(provider) {
|
|
67
|
-
const entry = this._providers.get(provider);
|
|
68
|
-
if (!entry || entry.count === 0) {
|
|
69
|
-
return null;
|
|
70
|
-
}
|
|
71
|
-
|
|
70
|
+
_computeStats(entry) {
|
|
71
|
+
if (!entry || entry.count === 0) return null;
|
|
72
72
|
const sampleCount = Math.min(entry.count, BUFFER_SIZE);
|
|
73
73
|
const samples = entry.buffer.slice(0, sampleCount);
|
|
74
74
|
const sorted = samples.slice().sort((a, b) => a - b);
|
|
75
|
-
|
|
76
75
|
const sum = sorted.reduce((acc, v) => acc + v, 0);
|
|
77
|
-
|
|
78
76
|
return {
|
|
79
77
|
p50: sorted[Math.floor(sampleCount * 0.5)],
|
|
80
78
|
p95: sorted[Math.floor(sampleCount * 0.95)],
|
|
@@ -82,61 +80,105 @@ class LatencyTracker {
|
|
|
82
80
|
avg: Math.round(sum / sampleCount),
|
|
83
81
|
count: entry.count,
|
|
84
82
|
lastUpdated: entry.lastUpdated,
|
|
83
|
+
provider: entry.provider,
|
|
84
|
+
model: entry.model,
|
|
85
85
|
};
|
|
86
86
|
}
|
|
87
87
|
|
|
88
88
|
/**
|
|
89
|
-
*
|
|
90
|
-
*
|
|
91
|
-
* Returns a value from -5 to +10 that can be added to a routing score:
|
|
92
|
-
* +10 if P95 > 10000ms (very slow, penalise by boosting complexity toward cloud)
|
|
93
|
-
* +5 if P95 > 5000ms
|
|
94
|
-
* -5 if P50 < 1000ms (fast, reward)
|
|
95
|
-
* 0 otherwise or if insufficient data
|
|
96
|
-
*
|
|
97
|
-
* @param {string} provider - Provider name
|
|
98
|
-
* @returns {number} Score adjustment (-5 to +10)
|
|
89
|
+
* Get stats for a specific (provider, model) pair, or aggregated for a provider
|
|
90
|
+
* if model is omitted.
|
|
99
91
|
*/
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
92
|
+
getStats(provider, model = null) {
|
|
93
|
+
if (model) {
|
|
94
|
+
return this._computeStats(this._entries.get(_key(provider, model)));
|
|
95
|
+
}
|
|
96
|
+
// Aggregate across all models for this provider
|
|
97
|
+
const provEntries = [];
|
|
98
|
+
for (const [k, entry] of this._entries) {
|
|
99
|
+
if (entry.provider === provider) provEntries.push(entry);
|
|
104
100
|
}
|
|
101
|
+
if (provEntries.length === 0) return null;
|
|
102
|
+
if (provEntries.length === 1) return this._computeStats(provEntries[0]);
|
|
103
|
+
|
|
104
|
+
// Pool samples across model entries to compute combined percentiles
|
|
105
|
+
const pooled = [];
|
|
106
|
+
let total = 0;
|
|
107
|
+
let lastUpdated = 0;
|
|
108
|
+
for (const e of provEntries) {
|
|
109
|
+
const n = Math.min(e.count, BUFFER_SIZE);
|
|
110
|
+
for (let i = 0; i < n; i++) pooled.push(e.buffer[i]);
|
|
111
|
+
total += e.count;
|
|
112
|
+
if (e.lastUpdated > lastUpdated) lastUpdated = e.lastUpdated;
|
|
113
|
+
}
|
|
114
|
+
if (pooled.length === 0) return null;
|
|
115
|
+
pooled.sort((a, b) => a - b);
|
|
116
|
+
const sum = pooled.reduce((acc, v) => acc + v, 0);
|
|
117
|
+
return {
|
|
118
|
+
p50: pooled[Math.floor(pooled.length * 0.5)],
|
|
119
|
+
p95: pooled[Math.floor(pooled.length * 0.95)],
|
|
120
|
+
p99: pooled[Math.floor(pooled.length * 0.99)],
|
|
121
|
+
avg: Math.round(sum / pooled.length),
|
|
122
|
+
count: total,
|
|
123
|
+
lastUpdated,
|
|
124
|
+
provider,
|
|
125
|
+
model: ANY_MODEL,
|
|
126
|
+
};
|
|
127
|
+
}
|
|
105
128
|
|
|
129
|
+
/** Latency penalty/bonus used by complexity-analyzer. */
|
|
130
|
+
penalizeScore(provider, model = null) {
|
|
131
|
+
const stats = this.getStats(provider, model);
|
|
132
|
+
if (!stats || stats.count < MIN_SAMPLES) return 0;
|
|
106
133
|
if (stats.p95 > 10000) return 10;
|
|
107
134
|
if (stats.p95 > 5000) return 5;
|
|
108
135
|
if (stats.p50 < 1000) return -5;
|
|
109
|
-
|
|
110
136
|
return 0;
|
|
111
137
|
}
|
|
112
138
|
|
|
113
139
|
/**
|
|
114
|
-
*
|
|
115
|
-
*
|
|
140
|
+
* Phase 1.5: per-model P95 lookup for deadline-aware routing (Phase 6.3).
|
|
141
|
+
* Returns null if insufficient samples.
|
|
142
|
+
*/
|
|
143
|
+
getModelP95(provider, model) {
|
|
144
|
+
const stats = this.getStats(provider, model);
|
|
145
|
+
if (!stats || stats.count < MIN_SAMPLES) return null;
|
|
146
|
+
return stats.p95;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Whether a model is currently degraded (P95 > 2x its historical median).
|
|
151
|
+
* Currently uses a simple absolute threshold — better signal will come in
|
|
152
|
+
* Phase 4.3 (drift detection).
|
|
153
|
+
*/
|
|
154
|
+
isDegraded(provider, model) {
|
|
155
|
+
const stats = this.getStats(provider, model);
|
|
156
|
+
if (!stats || stats.count < MIN_SAMPLES) return false;
|
|
157
|
+
return stats.p95 > stats.p50 * 2 && stats.p95 > 5000;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Get stats for every tracked entry.
|
|
162
|
+
*
|
|
163
|
+
* Backward-compat: when an entry was recorded via the legacy 2-arg
|
|
164
|
+
* `record(provider, latency)` signature, the model is the wildcard `*`
|
|
165
|
+
* and we return it keyed by provider name only. Entries with explicit
|
|
166
|
+
* models use the `provider:model` key.
|
|
116
167
|
*/
|
|
117
168
|
getAllStats() {
|
|
118
169
|
const result = new Map();
|
|
119
|
-
for (const
|
|
120
|
-
const stats = this.
|
|
121
|
-
if (stats)
|
|
122
|
-
|
|
123
|
-
|
|
170
|
+
for (const [k, entry] of this._entries) {
|
|
171
|
+
const stats = this._computeStats(entry);
|
|
172
|
+
if (!stats) continue;
|
|
173
|
+
const outKey = entry.model === ANY_MODEL ? entry.provider : k;
|
|
174
|
+
result.set(outKey, stats);
|
|
124
175
|
}
|
|
125
176
|
return result;
|
|
126
177
|
}
|
|
127
178
|
}
|
|
128
179
|
|
|
129
|
-
// ---------------------------------------------------------------------------
|
|
130
|
-
// Singleton
|
|
131
|
-
// ---------------------------------------------------------------------------
|
|
132
|
-
|
|
133
|
-
/** @type {LatencyTracker|null} */
|
|
134
180
|
let instance = null;
|
|
135
181
|
|
|
136
|
-
/**
|
|
137
|
-
* Get the singleton LatencyTracker instance.
|
|
138
|
-
* @returns {LatencyTracker}
|
|
139
|
-
*/
|
|
140
182
|
function getLatencyTracker() {
|
|
141
183
|
if (!instance) {
|
|
142
184
|
instance = new LatencyTracker();
|
|
@@ -145,4 +187,4 @@ function getLatencyTracker() {
|
|
|
145
187
|
return instance;
|
|
146
188
|
}
|
|
147
189
|
|
|
148
|
-
module.exports = { LatencyTracker, getLatencyTracker };
|
|
190
|
+
module.exports = { LatencyTracker, getLatencyTracker, ANY_MODEL };
|
|
@@ -12,7 +12,10 @@ const config = require('../config');
|
|
|
12
12
|
// Load tier config
|
|
13
13
|
const TIER_CONFIG_PATH = path.join(__dirname, '../../config/model-tiers.json');
|
|
14
14
|
|
|
15
|
-
//
|
|
15
|
+
// Phase 1.4: calibrated thresholds (written by scripts/calibrate-thresholds.js)
|
|
16
|
+
const CALIBRATED_PATH = path.join(__dirname, '../../data/calibrated-thresholds.json');
|
|
17
|
+
|
|
18
|
+
// Tier definitions with complexity ranges (defaults; may be overridden by calibration)
|
|
16
19
|
const TIER_DEFINITIONS = {
|
|
17
20
|
SIMPLE: {
|
|
18
21
|
description: 'Greetings, simple Q&A, confirmations',
|
|
@@ -41,13 +44,30 @@ class ModelTierSelector {
|
|
|
41
44
|
this.tierConfig = null;
|
|
42
45
|
this.localProviders = {};
|
|
43
46
|
this.providerAliases = {};
|
|
47
|
+
/** Per-tier ranges, possibly overridden by calibration. */
|
|
48
|
+
this.ranges = null;
|
|
44
49
|
this._loadConfig();
|
|
50
|
+
this._loadCalibrated();
|
|
45
51
|
}
|
|
46
52
|
|
|
47
53
|
/**
|
|
48
54
|
* Load tier configuration from JSON file
|
|
49
55
|
*/
|
|
50
56
|
_loadConfig() {
|
|
57
|
+
// Check if tier routing mode is active (all 4 TIER_* env vars set)
|
|
58
|
+
const tierRoutingMode = !!(
|
|
59
|
+
config.modelTiers?.SIMPLE?.trim() &&
|
|
60
|
+
config.modelTiers?.MEDIUM?.trim() &&
|
|
61
|
+
config.modelTiers?.COMPLEX?.trim() &&
|
|
62
|
+
config.modelTiers?.REASONING?.trim()
|
|
63
|
+
);
|
|
64
|
+
|
|
65
|
+
if (tierRoutingMode) {
|
|
66
|
+
logger.debug('[ModelTiers] Tier routing mode active, building config from TIER_* env vars');
|
|
67
|
+
this._buildFromEnvVars();
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
|
|
51
71
|
try {
|
|
52
72
|
if (fs.existsSync(TIER_CONFIG_PATH)) {
|
|
53
73
|
const data = JSON.parse(fs.readFileSync(TIER_CONFIG_PATH, 'utf8'));
|
|
@@ -65,9 +85,86 @@ class ModelTierSelector {
|
|
|
65
85
|
}
|
|
66
86
|
}
|
|
67
87
|
|
|
88
|
+
/**
|
|
89
|
+
* Phase 1.4: load calibrated tier thresholds if the nightly job has produced them.
|
|
90
|
+
* Falls back silently to TIER_DEFINITIONS when absent or malformed.
|
|
91
|
+
*/
|
|
92
|
+
_loadCalibrated() {
|
|
93
|
+
this.ranges = this._defaultRanges();
|
|
94
|
+
try {
|
|
95
|
+
if (!fs.existsSync(CALIBRATED_PATH)) return;
|
|
96
|
+
const data = JSON.parse(fs.readFileSync(CALIBRATED_PATH, 'utf8'));
|
|
97
|
+
if (!data?.ranges) return;
|
|
98
|
+
const calibrated = {};
|
|
99
|
+
for (const tier of Object.keys(TIER_DEFINITIONS)) {
|
|
100
|
+
const r = data.ranges[tier];
|
|
101
|
+
if (Array.isArray(r) && r.length === 2 && r[0] <= r[1]) {
|
|
102
|
+
calibrated[tier] = r;
|
|
103
|
+
} else {
|
|
104
|
+
calibrated[tier] = TIER_DEFINITIONS[tier].range;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
this.ranges = calibrated;
|
|
108
|
+
logger.info({ ranges: this.ranges, calibratedAt: data.calibratedAt }, '[ModelTiers] Using calibrated thresholds');
|
|
109
|
+
} catch (err) {
|
|
110
|
+
logger.debug({ err: err.message }, '[ModelTiers] Calibrated thresholds load failed; using defaults');
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
_defaultRanges() {
|
|
115
|
+
const ranges = {};
|
|
116
|
+
for (const [tier, def] of Object.entries(TIER_DEFINITIONS)) {
|
|
117
|
+
ranges[tier] = def.range.slice();
|
|
118
|
+
}
|
|
119
|
+
return ranges;
|
|
120
|
+
}
|
|
121
|
+
|
|
68
122
|
/**
|
|
69
123
|
* Load default tier config
|
|
70
124
|
*/
|
|
125
|
+
/**
|
|
126
|
+
* Build tier config from TIER_* environment variables
|
|
127
|
+
* Format: TIER_SIMPLE=provider:model
|
|
128
|
+
*/
|
|
129
|
+
_buildFromEnvVars() {
|
|
130
|
+
this.tierConfig = {};
|
|
131
|
+
this.localProviders = {
|
|
132
|
+
ollama: { free: true, defaultTier: 'SIMPLE' },
|
|
133
|
+
llamacpp: { free: true, defaultTier: 'SIMPLE' },
|
|
134
|
+
lmstudio: { free: true, defaultTier: 'SIMPLE' },
|
|
135
|
+
mlx: { free: true, defaultTier: 'SIMPLE' },
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
const tiers = ['SIMPLE', 'MEDIUM', 'COMPLEX', 'REASONING'];
|
|
139
|
+
for (const tier of tiers) {
|
|
140
|
+
const envValue = config.modelTiers?.[tier]?.trim();
|
|
141
|
+
if (!envValue) continue;
|
|
142
|
+
|
|
143
|
+
// Parse provider:model format
|
|
144
|
+
const match = envValue.match(/^([a-z-]+):(.+)$/);
|
|
145
|
+
if (!match) {
|
|
146
|
+
logger.warn({ tier, value: envValue }, '[ModelTiers] Invalid TIER format, expected provider:model');
|
|
147
|
+
continue;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const [, provider, model] = match;
|
|
151
|
+
|
|
152
|
+
// Initialize tier config if not exists
|
|
153
|
+
if (!this.tierConfig[tier]) {
|
|
154
|
+
this.tierConfig[tier] = { preferred: {} };
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Set this as the ONLY preferred model for this tier+provider
|
|
158
|
+
this.tierConfig[tier].preferred[provider] = [model];
|
|
159
|
+
|
|
160
|
+
logger.debug({
|
|
161
|
+
tier,
|
|
162
|
+
provider,
|
|
163
|
+
model
|
|
164
|
+
}, '[ModelTiers] Tier configured from env');
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
71
168
|
_loadDefaults() {
|
|
72
169
|
this.tierConfig = {
|
|
73
170
|
SIMPLE: { preferred: { ollama: ['llama3.2'], openai: ['gpt-4o-mini'] } },
|
|
@@ -92,20 +189,73 @@ class ModelTierSelector {
|
|
|
92
189
|
}
|
|
93
190
|
|
|
94
191
|
/**
|
|
95
|
-
* Get tier from complexity score
|
|
192
|
+
* Get tier from complexity score.
|
|
193
|
+
* Phase 1.4: honors calibrated ranges when present.
|
|
96
194
|
* @param {number} complexityScore - Score from 0-100
|
|
97
195
|
* @returns {string} Tier name (SIMPLE, MEDIUM, COMPLEX, REASONING)
|
|
98
196
|
*/
|
|
99
197
|
getTier(complexityScore) {
|
|
100
198
|
const score = Math.max(0, Math.min(100, complexityScore || 0));
|
|
199
|
+
const ranges = this.ranges || this._defaultRanges();
|
|
200
|
+
for (const tier of Object.keys(TIER_DEFINITIONS)) {
|
|
201
|
+
const [lo, hi] = ranges[tier];
|
|
202
|
+
if (score >= lo && score <= hi) return tier;
|
|
203
|
+
}
|
|
204
|
+
return score > 75 ? 'REASONING' : 'SIMPLE';
|
|
205
|
+
}
|
|
101
206
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
207
|
+
/**
|
|
208
|
+
* Phase 1.3: find a model with at least `minContext` context window.
|
|
209
|
+
* Returns null when no qualifying model is available.
|
|
210
|
+
*/
|
|
211
|
+
findContextCapable(minContext, preferredTier = null) {
|
|
212
|
+
const { getModelRegistrySync } = require('./model-registry');
|
|
213
|
+
const registry = getModelRegistrySync();
|
|
214
|
+
const tierOrder = preferredTier
|
|
215
|
+
? [preferredTier, 'REASONING', 'COMPLEX', 'MEDIUM', 'SIMPLE']
|
|
216
|
+
: ['REASONING', 'COMPLEX', 'MEDIUM', 'SIMPLE'];
|
|
217
|
+
const seen = new Set();
|
|
218
|
+
for (const tier of tierOrder) {
|
|
219
|
+
if (seen.has(tier)) continue;
|
|
220
|
+
seen.add(tier);
|
|
221
|
+
const tierConfig = this.tierConfig[tier];
|
|
222
|
+
if (!tierConfig?.preferred) continue;
|
|
223
|
+
for (const [provider, models] of Object.entries(tierConfig.preferred)) {
|
|
224
|
+
for (const model of models) {
|
|
225
|
+
const cost = registry.getCost(model);
|
|
226
|
+
if (cost?.context && cost.context >= minContext) {
|
|
227
|
+
return { provider, model, tier, context: cost.context };
|
|
228
|
+
}
|
|
229
|
+
}
|
|
105
230
|
}
|
|
106
231
|
}
|
|
232
|
+
return null;
|
|
233
|
+
}
|
|
107
234
|
|
|
108
|
-
|
|
235
|
+
/**
|
|
236
|
+
* Find a vision-capable model at or above `preferredTier`.
|
|
237
|
+
* Walks tier order from preferred upward; returns null when none available.
|
|
238
|
+
*/
|
|
239
|
+
findVisionCapable(preferredTier = null) {
|
|
240
|
+
const { getModelRegistrySync } = require('./model-registry');
|
|
241
|
+
const registry = getModelRegistrySync();
|
|
242
|
+
const tierOrder = preferredTier
|
|
243
|
+
? [preferredTier, 'COMPLEX', 'REASONING', 'MEDIUM', 'SIMPLE']
|
|
244
|
+
: ['COMPLEX', 'REASONING', 'MEDIUM', 'SIMPLE'];
|
|
245
|
+
const seen = new Set();
|
|
246
|
+
for (const t of tierOrder) {
|
|
247
|
+
if (seen.has(t)) continue;
|
|
248
|
+
seen.add(t);
|
|
249
|
+
const tierConfig = this.tierConfig[t];
|
|
250
|
+
if (!tierConfig?.preferred) continue;
|
|
251
|
+
for (const [provider, models] of Object.entries(tierConfig.preferred)) {
|
|
252
|
+
for (const model of models) {
|
|
253
|
+
const info = registry.getCost(model);
|
|
254
|
+
if (info?.vision) return { provider, model, tier: t };
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
return null;
|
|
109
259
|
}
|
|
110
260
|
|
|
111
261
|
/**
|