lynkr 9.1.2 → 9.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -10
- package/package.json +3 -1
- package/scripts/build-knn-index.js +130 -0
- package/scripts/calibrate-thresholds.js +197 -0
- package/scripts/compare-policies.js +67 -0
- package/scripts/learn-output-ratios.js +162 -0
- package/scripts/refresh-pricing.js +122 -0
- package/scripts/run-routerarena.js +26 -0
- package/scripts/sample-regret.js +84 -0
- package/scripts/train-risk-classifier.js +191 -0
- package/src/api/middleware/budget-enforcer.js +60 -0
- package/src/api/middleware/load-shedding.js +11 -1
- package/src/api/middleware/tenant.js +21 -0
- package/src/api/router.js +19 -40
- package/src/budget/hierarchical-budget.js +159 -0
- package/src/cache/semantic.js +28 -2
- package/src/clients/databricks.js +59 -5
- package/src/config/index.js +239 -43
- package/src/context/toon.js +5 -4
- package/src/orchestrator/index.js +44 -6
- package/src/prompts/system.js +34 -6
- package/src/routing/bandit.js +246 -0
- package/src/routing/cascade.js +106 -0
- package/src/routing/complexity-analyzer.js +7 -15
- package/src/routing/confidence-scorer.js +121 -0
- package/src/routing/context-validator.js +71 -0
- package/src/routing/cost-optimizer.js +5 -2
- package/src/routing/deadline.js +52 -0
- package/src/routing/drift-monitor.js +113 -0
- package/src/routing/embedding-cache.js +77 -0
- package/src/routing/index.js +314 -5
- package/src/routing/knn-router.js +206 -0
- package/src/routing/latency-tracker.js +113 -71
- package/src/routing/model-tiers.js +156 -6
- package/src/routing/output-ratios.js +57 -0
- package/src/routing/regret-estimator.js +91 -0
- package/src/routing/reward-pipeline.js +62 -0
- package/src/routing/risk-classifier.js +130 -0
- package/src/routing/shadow-mode.js +77 -0
- package/src/routing/tenant-policy.js +96 -0
- package/src/routing/tokenizer.js +162 -0
- package/src/server.js +9 -0
|
@@ -1,80 +1,78 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Rolling Latency Tracker
|
|
2
|
+
* Rolling Latency Tracker (per provider:model)
|
|
3
3
|
*
|
|
4
|
-
* Tracks
|
|
5
|
-
*
|
|
4
|
+
* Tracks latency keyed by `${provider}:${model}` so models within a provider
|
|
5
|
+
* (Opus vs Haiku) get separate stats. Backward-compatible: callers that pass
|
|
6
|
+
* only a provider still work — they're tracked under `${provider}:*`.
|
|
7
|
+
*
|
|
8
|
+
* Phase 1.5 of the routing overhaul: previous version keyed by provider only.
|
|
6
9
|
*
|
|
7
10
|
* @module routing/latency-tracker
|
|
8
11
|
*/
|
|
9
12
|
|
|
10
13
|
const logger = require("../logger");
|
|
11
14
|
|
|
12
|
-
/** Size of the circular buffer per provider */
|
|
13
15
|
const BUFFER_SIZE = 200;
|
|
14
|
-
|
|
15
|
-
/** Minimum sample count before penalizeScore returns a meaningful value */
|
|
16
16
|
const MIN_SAMPLES = 10;
|
|
17
17
|
|
|
18
|
-
/**
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
* @property {number} count - Total measurements recorded
|
|
25
|
-
* @property {number} lastUpdated - Timestamp of the last recorded measurement
|
|
26
|
-
*/
|
|
18
|
+
/** Wildcard model used when caller doesn't specify one. */
|
|
19
|
+
const ANY_MODEL = '*';
|
|
20
|
+
|
|
21
|
+
function _key(provider, model) {
|
|
22
|
+
return `${provider}:${model || ANY_MODEL}`;
|
|
23
|
+
}
|
|
27
24
|
|
|
28
25
|
class LatencyTracker {
|
|
29
26
|
constructor() {
|
|
30
|
-
/** @type {Map<string, { buffer: number[], index: number, count: number, lastUpdated: number }>} */
|
|
31
|
-
this.
|
|
27
|
+
/** @type {Map<string, { buffer: number[], index: number, count: number, lastUpdated: number, provider: string, model: string }>} */
|
|
28
|
+
this._entries = new Map();
|
|
32
29
|
}
|
|
33
30
|
|
|
34
31
|
/**
|
|
35
|
-
* Record a latency measurement
|
|
36
|
-
*
|
|
37
|
-
*
|
|
32
|
+
* Record a latency measurement.
|
|
33
|
+
*
|
|
34
|
+
* Signatures:
|
|
35
|
+
* record(provider, latencyMs) // legacy
|
|
36
|
+
* record(provider, model, latencyMs) // preferred
|
|
38
37
|
*/
|
|
39
|
-
record(provider,
|
|
40
|
-
|
|
41
|
-
|
|
38
|
+
record(provider, modelOrLatency, maybeLatency) {
|
|
39
|
+
let model;
|
|
40
|
+
let latencyMs;
|
|
41
|
+
if (typeof modelOrLatency === 'number') {
|
|
42
|
+
model = ANY_MODEL;
|
|
43
|
+
latencyMs = modelOrLatency;
|
|
44
|
+
} else {
|
|
45
|
+
model = modelOrLatency || ANY_MODEL;
|
|
46
|
+
latencyMs = maybeLatency;
|
|
42
47
|
}
|
|
43
48
|
|
|
44
|
-
|
|
49
|
+
if (!provider || typeof latencyMs !== "number" || latencyMs < 0) return;
|
|
50
|
+
|
|
51
|
+
const k = _key(provider, model);
|
|
52
|
+
let entry = this._entries.get(k);
|
|
45
53
|
if (!entry) {
|
|
46
54
|
entry = {
|
|
47
55
|
buffer: new Array(BUFFER_SIZE).fill(0),
|
|
48
56
|
index: 0,
|
|
49
57
|
count: 0,
|
|
50
58
|
lastUpdated: 0,
|
|
59
|
+
provider,
|
|
60
|
+
model,
|
|
51
61
|
};
|
|
52
|
-
this.
|
|
62
|
+
this._entries.set(k, entry);
|
|
53
63
|
}
|
|
54
|
-
|
|
55
64
|
entry.buffer[entry.index] = latencyMs;
|
|
56
65
|
entry.index = (entry.index + 1) % BUFFER_SIZE;
|
|
57
66
|
entry.count += 1;
|
|
58
67
|
entry.lastUpdated = Date.now();
|
|
59
68
|
}
|
|
60
69
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
* @param {string} provider - Provider name
|
|
64
|
-
* @returns {LatencyStats|null} Statistics or null if no data
|
|
65
|
-
*/
|
|
66
|
-
getStats(provider) {
|
|
67
|
-
const entry = this._providers.get(provider);
|
|
68
|
-
if (!entry || entry.count === 0) {
|
|
69
|
-
return null;
|
|
70
|
-
}
|
|
71
|
-
|
|
70
|
+
_computeStats(entry) {
|
|
71
|
+
if (!entry || entry.count === 0) return null;
|
|
72
72
|
const sampleCount = Math.min(entry.count, BUFFER_SIZE);
|
|
73
73
|
const samples = entry.buffer.slice(0, sampleCount);
|
|
74
74
|
const sorted = samples.slice().sort((a, b) => a - b);
|
|
75
|
-
|
|
76
75
|
const sum = sorted.reduce((acc, v) => acc + v, 0);
|
|
77
|
-
|
|
78
76
|
return {
|
|
79
77
|
p50: sorted[Math.floor(sampleCount * 0.5)],
|
|
80
78
|
p95: sorted[Math.floor(sampleCount * 0.95)],
|
|
@@ -82,61 +80,105 @@ class LatencyTracker {
|
|
|
82
80
|
avg: Math.round(sum / sampleCount),
|
|
83
81
|
count: entry.count,
|
|
84
82
|
lastUpdated: entry.lastUpdated,
|
|
83
|
+
provider: entry.provider,
|
|
84
|
+
model: entry.model,
|
|
85
85
|
};
|
|
86
86
|
}
|
|
87
87
|
|
|
88
88
|
/**
|
|
89
|
-
*
|
|
90
|
-
*
|
|
91
|
-
* Returns a value from -5 to +10 that can be added to a routing score:
|
|
92
|
-
* +10 if P95 > 10000ms (very slow, penalise by boosting complexity toward cloud)
|
|
93
|
-
* +5 if P95 > 5000ms
|
|
94
|
-
* -5 if P50 < 1000ms (fast, reward)
|
|
95
|
-
* 0 otherwise or if insufficient data
|
|
96
|
-
*
|
|
97
|
-
* @param {string} provider - Provider name
|
|
98
|
-
* @returns {number} Score adjustment (-5 to +10)
|
|
89
|
+
* Get stats for a specific (provider, model) pair, or aggregated for a provider
|
|
90
|
+
* if model is omitted.
|
|
99
91
|
*/
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
92
|
+
getStats(provider, model = null) {
|
|
93
|
+
if (model) {
|
|
94
|
+
return this._computeStats(this._entries.get(_key(provider, model)));
|
|
95
|
+
}
|
|
96
|
+
// Aggregate across all models for this provider
|
|
97
|
+
const provEntries = [];
|
|
98
|
+
for (const [k, entry] of this._entries) {
|
|
99
|
+
if (entry.provider === provider) provEntries.push(entry);
|
|
104
100
|
}
|
|
101
|
+
if (provEntries.length === 0) return null;
|
|
102
|
+
if (provEntries.length === 1) return this._computeStats(provEntries[0]);
|
|
103
|
+
|
|
104
|
+
// Pool samples across model entries to compute combined percentiles
|
|
105
|
+
const pooled = [];
|
|
106
|
+
let total = 0;
|
|
107
|
+
let lastUpdated = 0;
|
|
108
|
+
for (const e of provEntries) {
|
|
109
|
+
const n = Math.min(e.count, BUFFER_SIZE);
|
|
110
|
+
for (let i = 0; i < n; i++) pooled.push(e.buffer[i]);
|
|
111
|
+
total += e.count;
|
|
112
|
+
if (e.lastUpdated > lastUpdated) lastUpdated = e.lastUpdated;
|
|
113
|
+
}
|
|
114
|
+
if (pooled.length === 0) return null;
|
|
115
|
+
pooled.sort((a, b) => a - b);
|
|
116
|
+
const sum = pooled.reduce((acc, v) => acc + v, 0);
|
|
117
|
+
return {
|
|
118
|
+
p50: pooled[Math.floor(pooled.length * 0.5)],
|
|
119
|
+
p95: pooled[Math.floor(pooled.length * 0.95)],
|
|
120
|
+
p99: pooled[Math.floor(pooled.length * 0.99)],
|
|
121
|
+
avg: Math.round(sum / pooled.length),
|
|
122
|
+
count: total,
|
|
123
|
+
lastUpdated,
|
|
124
|
+
provider,
|
|
125
|
+
model: ANY_MODEL,
|
|
126
|
+
};
|
|
127
|
+
}
|
|
105
128
|
|
|
129
|
+
/** Latency penalty/bonus used by complexity-analyzer. */
|
|
130
|
+
penalizeScore(provider, model = null) {
|
|
131
|
+
const stats = this.getStats(provider, model);
|
|
132
|
+
if (!stats || stats.count < MIN_SAMPLES) return 0;
|
|
106
133
|
if (stats.p95 > 10000) return 10;
|
|
107
134
|
if (stats.p95 > 5000) return 5;
|
|
108
135
|
if (stats.p50 < 1000) return -5;
|
|
109
|
-
|
|
110
136
|
return 0;
|
|
111
137
|
}
|
|
112
138
|
|
|
113
139
|
/**
|
|
114
|
-
*
|
|
115
|
-
*
|
|
140
|
+
* Phase 1.5: per-model P95 lookup for deadline-aware routing (Phase 6.3).
|
|
141
|
+
* Returns null if insufficient samples.
|
|
142
|
+
*/
|
|
143
|
+
getModelP95(provider, model) {
|
|
144
|
+
const stats = this.getStats(provider, model);
|
|
145
|
+
if (!stats || stats.count < MIN_SAMPLES) return null;
|
|
146
|
+
return stats.p95;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Whether a model is currently degraded (P95 > 2x its historical median).
|
|
151
|
+
* Currently uses a simple absolute threshold — better signal will come in
|
|
152
|
+
* Phase 4.3 (drift detection).
|
|
153
|
+
*/
|
|
154
|
+
isDegraded(provider, model) {
|
|
155
|
+
const stats = this.getStats(provider, model);
|
|
156
|
+
if (!stats || stats.count < MIN_SAMPLES) return false;
|
|
157
|
+
return stats.p95 > stats.p50 * 2 && stats.p95 > 5000;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Get stats for every tracked entry.
|
|
162
|
+
*
|
|
163
|
+
* Backward-compat: when an entry was recorded via the legacy 2-arg
|
|
164
|
+
* `record(provider, latency)` signature, the model is the wildcard `*`
|
|
165
|
+
* and we return it keyed by provider name only. Entries with explicit
|
|
166
|
+
* models use the `provider:model` key.
|
|
116
167
|
*/
|
|
117
168
|
getAllStats() {
|
|
118
169
|
const result = new Map();
|
|
119
|
-
for (const
|
|
120
|
-
const stats = this.
|
|
121
|
-
if (stats)
|
|
122
|
-
|
|
123
|
-
|
|
170
|
+
for (const [k, entry] of this._entries) {
|
|
171
|
+
const stats = this._computeStats(entry);
|
|
172
|
+
if (!stats) continue;
|
|
173
|
+
const outKey = entry.model === ANY_MODEL ? entry.provider : k;
|
|
174
|
+
result.set(outKey, stats);
|
|
124
175
|
}
|
|
125
176
|
return result;
|
|
126
177
|
}
|
|
127
178
|
}
|
|
128
179
|
|
|
129
|
-
// ---------------------------------------------------------------------------
|
|
130
|
-
// Singleton
|
|
131
|
-
// ---------------------------------------------------------------------------
|
|
132
|
-
|
|
133
|
-
/** @type {LatencyTracker|null} */
|
|
134
180
|
let instance = null;
|
|
135
181
|
|
|
136
|
-
/**
|
|
137
|
-
* Get the singleton LatencyTracker instance.
|
|
138
|
-
* @returns {LatencyTracker}
|
|
139
|
-
*/
|
|
140
182
|
function getLatencyTracker() {
|
|
141
183
|
if (!instance) {
|
|
142
184
|
instance = new LatencyTracker();
|
|
@@ -145,4 +187,4 @@ function getLatencyTracker() {
|
|
|
145
187
|
return instance;
|
|
146
188
|
}
|
|
147
189
|
|
|
148
|
-
module.exports = { LatencyTracker, getLatencyTracker };
|
|
190
|
+
module.exports = { LatencyTracker, getLatencyTracker, ANY_MODEL };
|
|
@@ -12,7 +12,10 @@ const config = require('../config');
|
|
|
12
12
|
// Load tier config
|
|
13
13
|
const TIER_CONFIG_PATH = path.join(__dirname, '../../config/model-tiers.json');
|
|
14
14
|
|
|
15
|
-
//
|
|
15
|
+
// Phase 1.4: calibrated thresholds (written by scripts/calibrate-thresholds.js)
|
|
16
|
+
const CALIBRATED_PATH = path.join(__dirname, '../../data/calibrated-thresholds.json');
|
|
17
|
+
|
|
18
|
+
// Tier definitions with complexity ranges (defaults; may be overridden by calibration)
|
|
16
19
|
const TIER_DEFINITIONS = {
|
|
17
20
|
SIMPLE: {
|
|
18
21
|
description: 'Greetings, simple Q&A, confirmations',
|
|
@@ -41,13 +44,30 @@ class ModelTierSelector {
|
|
|
41
44
|
this.tierConfig = null;
|
|
42
45
|
this.localProviders = {};
|
|
43
46
|
this.providerAliases = {};
|
|
47
|
+
/** Per-tier ranges, possibly overridden by calibration. */
|
|
48
|
+
this.ranges = null;
|
|
44
49
|
this._loadConfig();
|
|
50
|
+
this._loadCalibrated();
|
|
45
51
|
}
|
|
46
52
|
|
|
47
53
|
/**
|
|
48
54
|
* Load tier configuration from JSON file
|
|
49
55
|
*/
|
|
50
56
|
_loadConfig() {
|
|
57
|
+
// Check if tier routing mode is active (all 4 TIER_* env vars set)
|
|
58
|
+
const tierRoutingMode = !!(
|
|
59
|
+
config.modelTiers?.SIMPLE?.trim() &&
|
|
60
|
+
config.modelTiers?.MEDIUM?.trim() &&
|
|
61
|
+
config.modelTiers?.COMPLEX?.trim() &&
|
|
62
|
+
config.modelTiers?.REASONING?.trim()
|
|
63
|
+
);
|
|
64
|
+
|
|
65
|
+
if (tierRoutingMode) {
|
|
66
|
+
logger.debug('[ModelTiers] Tier routing mode active, building config from TIER_* env vars');
|
|
67
|
+
this._buildFromEnvVars();
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
|
|
51
71
|
try {
|
|
52
72
|
if (fs.existsSync(TIER_CONFIG_PATH)) {
|
|
53
73
|
const data = JSON.parse(fs.readFileSync(TIER_CONFIG_PATH, 'utf8'));
|
|
@@ -65,9 +85,86 @@ class ModelTierSelector {
|
|
|
65
85
|
}
|
|
66
86
|
}
|
|
67
87
|
|
|
88
|
+
/**
|
|
89
|
+
* Phase 1.4: load calibrated tier thresholds if the nightly job has produced them.
|
|
90
|
+
* Falls back silently to TIER_DEFINITIONS when absent or malformed.
|
|
91
|
+
*/
|
|
92
|
+
_loadCalibrated() {
|
|
93
|
+
this.ranges = this._defaultRanges();
|
|
94
|
+
try {
|
|
95
|
+
if (!fs.existsSync(CALIBRATED_PATH)) return;
|
|
96
|
+
const data = JSON.parse(fs.readFileSync(CALIBRATED_PATH, 'utf8'));
|
|
97
|
+
if (!data?.ranges) return;
|
|
98
|
+
const calibrated = {};
|
|
99
|
+
for (const tier of Object.keys(TIER_DEFINITIONS)) {
|
|
100
|
+
const r = data.ranges[tier];
|
|
101
|
+
if (Array.isArray(r) && r.length === 2 && r[0] <= r[1]) {
|
|
102
|
+
calibrated[tier] = r;
|
|
103
|
+
} else {
|
|
104
|
+
calibrated[tier] = TIER_DEFINITIONS[tier].range;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
this.ranges = calibrated;
|
|
108
|
+
logger.info({ ranges: this.ranges, calibratedAt: data.calibratedAt }, '[ModelTiers] Using calibrated thresholds');
|
|
109
|
+
} catch (err) {
|
|
110
|
+
logger.debug({ err: err.message }, '[ModelTiers] Calibrated thresholds load failed; using defaults');
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
_defaultRanges() {
|
|
115
|
+
const ranges = {};
|
|
116
|
+
for (const [tier, def] of Object.entries(TIER_DEFINITIONS)) {
|
|
117
|
+
ranges[tier] = def.range.slice();
|
|
118
|
+
}
|
|
119
|
+
return ranges;
|
|
120
|
+
}
|
|
121
|
+
|
|
68
122
|
/**
|
|
69
123
|
* Load default tier config
|
|
70
124
|
*/
|
|
125
|
+
/**
|
|
126
|
+
* Build tier config from TIER_* environment variables
|
|
127
|
+
* Format: TIER_SIMPLE=provider:model
|
|
128
|
+
*/
|
|
129
|
+
_buildFromEnvVars() {
|
|
130
|
+
this.tierConfig = {};
|
|
131
|
+
this.localProviders = {
|
|
132
|
+
ollama: { free: true, defaultTier: 'SIMPLE' },
|
|
133
|
+
llamacpp: { free: true, defaultTier: 'SIMPLE' },
|
|
134
|
+
lmstudio: { free: true, defaultTier: 'SIMPLE' },
|
|
135
|
+
mlx: { free: true, defaultTier: 'SIMPLE' },
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
const tiers = ['SIMPLE', 'MEDIUM', 'COMPLEX', 'REASONING'];
|
|
139
|
+
for (const tier of tiers) {
|
|
140
|
+
const envValue = config.modelTiers?.[tier]?.trim();
|
|
141
|
+
if (!envValue) continue;
|
|
142
|
+
|
|
143
|
+
// Parse provider:model format
|
|
144
|
+
const match = envValue.match(/^([a-z-]+):(.+)$/);
|
|
145
|
+
if (!match) {
|
|
146
|
+
logger.warn({ tier, value: envValue }, '[ModelTiers] Invalid TIER format, expected provider:model');
|
|
147
|
+
continue;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const [, provider, model] = match;
|
|
151
|
+
|
|
152
|
+
// Initialize tier config if not exists
|
|
153
|
+
if (!this.tierConfig[tier]) {
|
|
154
|
+
this.tierConfig[tier] = { preferred: {} };
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Set this as the ONLY preferred model for this tier+provider
|
|
158
|
+
this.tierConfig[tier].preferred[provider] = [model];
|
|
159
|
+
|
|
160
|
+
logger.debug({
|
|
161
|
+
tier,
|
|
162
|
+
provider,
|
|
163
|
+
model
|
|
164
|
+
}, '[ModelTiers] Tier configured from env');
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
71
168
|
_loadDefaults() {
|
|
72
169
|
this.tierConfig = {
|
|
73
170
|
SIMPLE: { preferred: { ollama: ['llama3.2'], openai: ['gpt-4o-mini'] } },
|
|
@@ -92,20 +189,73 @@ class ModelTierSelector {
|
|
|
92
189
|
}
|
|
93
190
|
|
|
94
191
|
/**
|
|
95
|
-
* Get tier from complexity score
|
|
192
|
+
* Get tier from complexity score.
|
|
193
|
+
* Phase 1.4: honors calibrated ranges when present.
|
|
96
194
|
* @param {number} complexityScore - Score from 0-100
|
|
97
195
|
* @returns {string} Tier name (SIMPLE, MEDIUM, COMPLEX, REASONING)
|
|
98
196
|
*/
|
|
99
197
|
getTier(complexityScore) {
|
|
100
198
|
const score = Math.max(0, Math.min(100, complexityScore || 0));
|
|
199
|
+
const ranges = this.ranges || this._defaultRanges();
|
|
200
|
+
for (const tier of Object.keys(TIER_DEFINITIONS)) {
|
|
201
|
+
const [lo, hi] = ranges[tier];
|
|
202
|
+
if (score >= lo && score <= hi) return tier;
|
|
203
|
+
}
|
|
204
|
+
return score > 75 ? 'REASONING' : 'SIMPLE';
|
|
205
|
+
}
|
|
101
206
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
207
|
+
/**
|
|
208
|
+
* Phase 1.3: find a model with at least `minContext` context window.
|
|
209
|
+
* Returns null when no qualifying model is available.
|
|
210
|
+
*/
|
|
211
|
+
findContextCapable(minContext, preferredTier = null) {
|
|
212
|
+
const { getModelRegistrySync } = require('./model-registry');
|
|
213
|
+
const registry = getModelRegistrySync();
|
|
214
|
+
const tierOrder = preferredTier
|
|
215
|
+
? [preferredTier, 'REASONING', 'COMPLEX', 'MEDIUM', 'SIMPLE']
|
|
216
|
+
: ['REASONING', 'COMPLEX', 'MEDIUM', 'SIMPLE'];
|
|
217
|
+
const seen = new Set();
|
|
218
|
+
for (const tier of tierOrder) {
|
|
219
|
+
if (seen.has(tier)) continue;
|
|
220
|
+
seen.add(tier);
|
|
221
|
+
const tierConfig = this.tierConfig[tier];
|
|
222
|
+
if (!tierConfig?.preferred) continue;
|
|
223
|
+
for (const [provider, models] of Object.entries(tierConfig.preferred)) {
|
|
224
|
+
for (const model of models) {
|
|
225
|
+
const cost = registry.getCost(model);
|
|
226
|
+
if (cost?.context && cost.context >= minContext) {
|
|
227
|
+
return { provider, model, tier, context: cost.context };
|
|
228
|
+
}
|
|
229
|
+
}
|
|
105
230
|
}
|
|
106
231
|
}
|
|
232
|
+
return null;
|
|
233
|
+
}
|
|
107
234
|
|
|
108
|
-
|
|
235
|
+
/**
|
|
236
|
+
* Find a vision-capable model at or above `preferredTier`.
|
|
237
|
+
* Walks tier order from preferred upward; returns null when none available.
|
|
238
|
+
*/
|
|
239
|
+
findVisionCapable(preferredTier = null) {
|
|
240
|
+
const { getModelRegistrySync } = require('./model-registry');
|
|
241
|
+
const registry = getModelRegistrySync();
|
|
242
|
+
const tierOrder = preferredTier
|
|
243
|
+
? [preferredTier, 'COMPLEX', 'REASONING', 'MEDIUM', 'SIMPLE']
|
|
244
|
+
: ['COMPLEX', 'REASONING', 'MEDIUM', 'SIMPLE'];
|
|
245
|
+
const seen = new Set();
|
|
246
|
+
for (const t of tierOrder) {
|
|
247
|
+
if (seen.has(t)) continue;
|
|
248
|
+
seen.add(t);
|
|
249
|
+
const tierConfig = this.tierConfig[t];
|
|
250
|
+
if (!tierConfig?.preferred) continue;
|
|
251
|
+
for (const [provider, models] of Object.entries(tierConfig.preferred)) {
|
|
252
|
+
for (const model of models) {
|
|
253
|
+
const info = registry.getCost(model);
|
|
254
|
+
if (info?.vision) return { provider, model, tier: t };
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
return null;
|
|
109
259
|
}
|
|
110
260
|
|
|
111
261
|
/**
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Output-token ratio lookup (Phase 2.3).
|
|
3
|
+
*
|
|
4
|
+
* Reads data/output-ratios.json (built by scripts/learn-output-ratios.js).
|
|
5
|
+
* Falls back to hardcoded defaults when the file is absent.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
const fs = require('fs');
|
|
9
|
+
const path = require('path');
|
|
10
|
+
const logger = require('../logger');
|
|
11
|
+
|
|
12
|
+
const FILE_PATH = path.join(__dirname, '../../data/output-ratios.json');
|
|
13
|
+
|
|
14
|
+
const DEFAULT_RATIOS = {
|
|
15
|
+
simple_qa: 0.30,
|
|
16
|
+
code_gen: 2.10,
|
|
17
|
+
code_edit: 1.40,
|
|
18
|
+
summarization: 0.15,
|
|
19
|
+
reasoning: 1.50,
|
|
20
|
+
tool_use: 0.80,
|
|
21
|
+
default: 0.50,
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
let _cached = null;
|
|
25
|
+
let _cacheLoadedAt = 0;
|
|
26
|
+
const RELOAD_INTERVAL_MS = 60_000;
|
|
27
|
+
|
|
28
|
+
function _load() {
|
|
29
|
+
if (_cached && Date.now() - _cacheLoadedAt < RELOAD_INTERVAL_MS) return _cached;
|
|
30
|
+
try {
|
|
31
|
+
if (fs.existsSync(FILE_PATH)) {
|
|
32
|
+
const data = JSON.parse(fs.readFileSync(FILE_PATH, 'utf8'));
|
|
33
|
+
if (data?.ratios && typeof data.ratios === 'object') {
|
|
34
|
+
_cached = { ...DEFAULT_RATIOS, ...data.ratios };
|
|
35
|
+
_cacheLoadedAt = Date.now();
|
|
36
|
+
return _cached;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
} catch (err) {
|
|
40
|
+
logger.debug({ err: err.message }, '[OutputRatios] Load failed, using defaults');
|
|
41
|
+
}
|
|
42
|
+
_cached = DEFAULT_RATIOS;
|
|
43
|
+
_cacheLoadedAt = Date.now();
|
|
44
|
+
return _cached;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function ratioFor(taskType) {
|
|
48
|
+
const ratios = _load();
|
|
49
|
+
const key = (taskType || 'default').toLowerCase();
|
|
50
|
+
return ratios[key] ?? ratios.default ?? 0.5;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function reload() {
|
|
54
|
+
_cached = null;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
module.exports = { ratioFor, reload, DEFAULT_RATIOS };
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Regret estimator (Phase 4.2).
|
|
3
|
+
*
|
|
4
|
+
* Periodically samples a fraction of yesterday's requests, re-runs them
|
|
5
|
+
* through a strictly-better model (Opus), and compares quality. If the
|
|
6
|
+
* routed model consistently underperforms vs Opus by >10%, this writes an
|
|
7
|
+
* alert to data/regret-alerts.json.
|
|
8
|
+
*
|
|
9
|
+
* Off by default (costs real money). Enable with LYNKR_REGRET_ESTIMATOR=true
|
|
10
|
+
* and run via cron: `node scripts/sample-regret.js`.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const fs = require('fs');
|
|
14
|
+
const path = require('path');
|
|
15
|
+
const logger = require('../logger');
|
|
16
|
+
|
|
17
|
+
const ALERTS_PATH = path.join(__dirname, '../../data/regret-alerts.json');
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* @param {object} args
|
|
21
|
+
* @param {Array<{request: object, response: object, model: string, quality: number}>} args.samples
|
|
22
|
+
* @param {function} args.runOpus — async (request) → { response, quality }
|
|
23
|
+
* @param {number} args.threshold — fractional underperformance threshold (default 0.10)
|
|
24
|
+
* @returns {Promise<{ regret, sampledCount, alerts }>}
|
|
25
|
+
*/
|
|
26
|
+
async function estimate(args) {
|
|
27
|
+
const threshold = args.threshold ?? 0.10;
|
|
28
|
+
const results = [];
|
|
29
|
+
for (const s of args.samples) {
|
|
30
|
+
try {
|
|
31
|
+
const opus = await args.runOpus(s.request);
|
|
32
|
+
const delta = (opus.quality - s.quality) / Math.max(1, opus.quality);
|
|
33
|
+
results.push({
|
|
34
|
+
model: s.model,
|
|
35
|
+
routedQuality: s.quality,
|
|
36
|
+
opusQuality: opus.quality,
|
|
37
|
+
regret: Math.max(0, delta),
|
|
38
|
+
underperforming: delta > threshold,
|
|
39
|
+
});
|
|
40
|
+
} catch (err) {
|
|
41
|
+
logger.debug({ err: err.message }, '[RegretEstimator] Opus re-run failed');
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const byModel = new Map();
|
|
46
|
+
for (const r of results) {
|
|
47
|
+
if (!byModel.has(r.model)) byModel.set(r.model, []);
|
|
48
|
+
byModel.get(r.model).push(r);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const alerts = [];
|
|
52
|
+
for (const [model, runs] of byModel) {
|
|
53
|
+
const underperforming = runs.filter(r => r.underperforming).length;
|
|
54
|
+
const rate = underperforming / runs.length;
|
|
55
|
+
if (rate > 0.5 && runs.length >= 5) {
|
|
56
|
+
alerts.push({
|
|
57
|
+
model,
|
|
58
|
+
underperformingRate: rate,
|
|
59
|
+
sampleSize: runs.length,
|
|
60
|
+
avgRegret: runs.reduce((s, r) => s + r.regret, 0) / runs.length,
|
|
61
|
+
timestamp: Date.now(),
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if (alerts.length > 0) {
|
|
67
|
+
try {
|
|
68
|
+
fs.mkdirSync(path.dirname(ALERTS_PATH), { recursive: true });
|
|
69
|
+
let existing = [];
|
|
70
|
+
if (fs.existsSync(ALERTS_PATH)) {
|
|
71
|
+
try { existing = JSON.parse(fs.readFileSync(ALERTS_PATH, 'utf8')); } catch {}
|
|
72
|
+
}
|
|
73
|
+
const out = Array.isArray(existing) ? existing : [];
|
|
74
|
+
out.push(...alerts);
|
|
75
|
+
// Keep last 100 alerts
|
|
76
|
+
const trimmed = out.slice(-100);
|
|
77
|
+
fs.writeFileSync(ALERTS_PATH, JSON.stringify(trimmed, null, 2));
|
|
78
|
+
} catch (err) {
|
|
79
|
+
logger.warn({ err: err.message }, '[RegretEstimator] Alert write failed');
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const totalRegret = results.reduce((s, r) => s + r.regret, 0) / Math.max(1, results.length);
|
|
84
|
+
return { regret: totalRegret, sampledCount: results.length, alerts };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function isEnabled() {
|
|
88
|
+
return process.env.LYNKR_REGRET_ESTIMATOR === 'true';
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
module.exports = { estimate, isEnabled };
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reward pipeline for the LinUCB bandit (Phase 4.1).
|
|
3
|
+
*
|
|
4
|
+
* Combines quality score, normalised cost, and normalised latency into a
|
|
5
|
+
* single scalar reward in [0, 100]. The bandit then rescales to [0, 1].
|
|
6
|
+
*
|
|
7
|
+
* reward = quality - λ·norm_cost·100 - μ·norm_latency·100
|
|
8
|
+
*
|
|
9
|
+
* Normalisation uses running min/max so we don't need to pre-compute global
|
|
10
|
+
* scales.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const logger = require('../logger');
|
|
14
|
+
|
|
15
|
+
const DEFAULT_LAMBDA = 0.3;
|
|
16
|
+
const DEFAULT_MU = 0.1;
|
|
17
|
+
|
|
18
|
+
class RewardPipeline {
|
|
19
|
+
constructor({ lambda = DEFAULT_LAMBDA, mu = DEFAULT_MU } = {}) {
|
|
20
|
+
this.lambda = lambda;
|
|
21
|
+
this.mu = mu;
|
|
22
|
+
this.costRange = { min: Infinity, max: -Infinity };
|
|
23
|
+
this.latencyRange = { min: Infinity, max: -Infinity };
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
observe({ cost, latency }) {
|
|
27
|
+
if (typeof cost === 'number' && cost >= 0) {
|
|
28
|
+
this.costRange.min = Math.min(this.costRange.min, cost);
|
|
29
|
+
this.costRange.max = Math.max(this.costRange.max, cost);
|
|
30
|
+
}
|
|
31
|
+
if (typeof latency === 'number' && latency >= 0) {
|
|
32
|
+
this.latencyRange.min = Math.min(this.latencyRange.min, latency);
|
|
33
|
+
this.latencyRange.max = Math.max(this.latencyRange.max, latency);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
_normalize(value, range) {
|
|
38
|
+
if (!isFinite(range.min) || !isFinite(range.max) || range.max <= range.min) return 0;
|
|
39
|
+
const v = Math.max(range.min, Math.min(range.max, value));
|
|
40
|
+
return (v - range.min) / (range.max - range.min);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* @param {object} obs - { quality: 0-100, cost: dollars, latency: ms }
|
|
45
|
+
* @returns {number} reward in [0, 100]
|
|
46
|
+
*/
|
|
47
|
+
reward(obs) {
|
|
48
|
+
this.observe(obs);
|
|
49
|
+
const q = typeof obs.quality === 'number' ? obs.quality : 50;
|
|
50
|
+
const cn = this._normalize(obs.cost ?? 0, this.costRange);
|
|
51
|
+
const ln = this._normalize(obs.latency ?? 0, this.latencyRange);
|
|
52
|
+
return Math.max(0, Math.min(100, q - this.lambda * cn * 100 - this.mu * ln * 100));
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
let _instance = null;
|
|
57
|
+
function getRewardPipeline() {
|
|
58
|
+
if (!_instance) _instance = new RewardPipeline();
|
|
59
|
+
return _instance;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
module.exports = { RewardPipeline, getRewardPipeline };
|