lynkr 9.1.2 → 9.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -10
- package/package.json +3 -1
- package/scripts/build-knn-index.js +130 -0
- package/scripts/calibrate-thresholds.js +197 -0
- package/scripts/compare-policies.js +67 -0
- package/scripts/learn-output-ratios.js +162 -0
- package/scripts/refresh-pricing.js +122 -0
- package/scripts/run-routerarena.js +26 -0
- package/scripts/sample-regret.js +84 -0
- package/scripts/train-risk-classifier.js +191 -0
- package/src/api/middleware/budget-enforcer.js +60 -0
- package/src/api/middleware/load-shedding.js +11 -1
- package/src/api/middleware/tenant.js +21 -0
- package/src/api/router.js +19 -40
- package/src/budget/hierarchical-budget.js +159 -0
- package/src/cache/semantic.js +28 -2
- package/src/clients/databricks.js +59 -5
- package/src/config/index.js +239 -43
- package/src/context/toon.js +5 -4
- package/src/orchestrator/index.js +44 -6
- package/src/prompts/system.js +34 -6
- package/src/routing/bandit.js +246 -0
- package/src/routing/cascade.js +106 -0
- package/src/routing/complexity-analyzer.js +7 -15
- package/src/routing/confidence-scorer.js +121 -0
- package/src/routing/context-validator.js +71 -0
- package/src/routing/cost-optimizer.js +5 -2
- package/src/routing/deadline.js +52 -0
- package/src/routing/drift-monitor.js +113 -0
- package/src/routing/embedding-cache.js +77 -0
- package/src/routing/index.js +314 -5
- package/src/routing/knn-router.js +206 -0
- package/src/routing/latency-tracker.js +113 -71
- package/src/routing/model-tiers.js +156 -6
- package/src/routing/output-ratios.js +57 -0
- package/src/routing/regret-estimator.js +91 -0
- package/src/routing/reward-pipeline.js +62 -0
- package/src/routing/risk-classifier.js +130 -0
- package/src/routing/shadow-mode.js +77 -0
- package/src/routing/tenant-policy.js +96 -0
- package/src/routing/tokenizer.js +162 -0
- package/src/server.js +9 -0
package/README.md
CHANGED
|
@@ -225,14 +225,15 @@ Routes requests to the right model based on 5-phase complexity analysis. Simple
|
|
|
225
225
|
- **Graphify integration** — AST-based knowledge graph detects god nodes, community cohesion, blast radius across 19 languages
|
|
226
226
|
- **Routing telemetry** — every decision recorded with quality scoring (0-100) and latency tracking (P50/P95/P99)
|
|
227
227
|
|
|
228
|
-
### Token Optimization (
|
|
229
|
-
- **
|
|
230
|
-
- **
|
|
231
|
-
- **
|
|
232
|
-
- **
|
|
233
|
-
- **
|
|
234
|
-
- **
|
|
235
|
-
- **
|
|
228
|
+
### Token Optimization (8 Phases)
|
|
229
|
+
- **MCP Code Mode** — replaces 100+ MCP tool schemas with 4 meta-tools (~96% reduction, lazy tool discovery)
|
|
230
|
+
- **Smart tool selection** — only sends tools relevant to the current task (50-70% reduction)
|
|
231
|
+
- **Prompt caching** — SHA-256 keyed LRU cache (30-45% reduction on repeated prompts)
|
|
232
|
+
- **Memory deduplication** — eliminates repeated information across turns (20-30% reduction)
|
|
233
|
+
- **Tool response truncation** — intelligent truncation of long outputs (15-25% reduction)
|
|
234
|
+
- **Dynamic system prompts** — adapt complexity to request type (10-20% reduction)
|
|
235
|
+
- **Distill compression** — structural similarity, delta rendering, smart dedup of repetitive tool outputs (20-40% reduction)
|
|
236
|
+
- **Headroom sidecar** — optional ML-based compression: Smart Crusher, CCR, LLMLingua (47-92% reduction)
|
|
236
237
|
|
|
237
238
|
### Enterprise Resilience
|
|
238
239
|
- **Circuit breakers** — automatic failover with half-open probe recovery
|
|
@@ -254,12 +255,22 @@ SEMANTIC_CACHE_THRESHOLD=0.95
|
|
|
254
255
|
```
|
|
255
256
|
|
|
256
257
|
### MCP Integration + Code Mode
|
|
257
|
-
Automatic Model Context Protocol server discovery and orchestration. Your MCP tools work through Lynkr without configuration.
|
|
258
|
+
Automatic Model Context Protocol server discovery and orchestration. Your MCP tools work through Lynkr without configuration.
|
|
259
|
+
|
|
260
|
+
**MCP Code Mode** — Token optimization for heavy MCP setups:
|
|
261
|
+
- Replaces 100+ individual MCP tool schemas with 4 meta-tools
|
|
262
|
+
- Reduces tool catalog from ~17,500 tokens to ~700 tokens (**96% reduction**)
|
|
263
|
+
- Enables lazy tool discovery: model queries `mcp_list_tools`, then `mcp_tool_info`, then `mcp_execute`
|
|
264
|
+
- Best for: 50+ MCP tools, long conversations, context-constrained setups
|
|
265
|
+
- Trade-off: 3 sequential calls instead of 1 (adds ~2-3s latency)
|
|
258
266
|
|
|
259
267
|
```bash
|
|
260
|
-
CODE_MODE_ENABLED=true
|
|
268
|
+
CODE_MODE_ENABLED=true # Enable Code Mode
|
|
269
|
+
CODE_MODE_CACHE_TTL=60000 # Tool list cache TTL (ms)
|
|
261
270
|
```
|
|
262
271
|
|
|
272
|
+
See [Token Optimization Guide](documentation/token-optimization.md#phase-0-mcp-code-mode-96-reduction-for-mcp-tools) and [Tools Documentation](documentation/tools.md#mcp-code-mode-token-optimization) for details.
|
|
273
|
+
|
|
263
274
|
---
|
|
264
275
|
|
|
265
276
|
## Deployment Options
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "lynkr",
|
|
3
|
-
"version": "9.1.
|
|
3
|
+
"version": "9.1.4",
|
|
4
4
|
"description": "Self-hosted Claude Code & Cursor proxy with Databricks,AWS BedRock,Azure adapters, openrouter, Ollama,llamacpp,LM Studio, workspace tooling, and MCP integration.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"bin": {
|
|
@@ -55,6 +55,8 @@
|
|
|
55
55
|
"express": "^5.1.0",
|
|
56
56
|
"express-rate-limit": "^8.2.1",
|
|
57
57
|
"fast-glob": "^3.3.2",
|
|
58
|
+
"hnswlib-node": "^3.0.0",
|
|
59
|
+
"js-tiktoken": "^1.0.20",
|
|
58
60
|
"js-yaml": "^4.1.1",
|
|
59
61
|
"openai": "^6.14.0",
|
|
60
62
|
"pino": "^8.17.2",
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Build the kNN router index from telemetry (and optional RouterBench bootstrap).
|
|
4
|
+
*
|
|
5
|
+
* Phase 3.1 of the routing overhaul. Should be run nightly:
|
|
6
|
+
* node scripts/build-knn-index.js [--days 30] [--bootstrap path/to/routerbench.jsonl]
|
|
7
|
+
*
|
|
8
|
+
* RouterBench bootstrap format (one JSON per line):
|
|
9
|
+
* { "query": "...", "provider": "anthropic", "model": "claude-...",
|
|
10
|
+
* "quality": 87, "cost": 0.0034, "latency": 1200, "tier": "COMPLEX" }
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const fs = require('fs');
|
|
14
|
+
const path = require('path');
|
|
15
|
+
const { generateEmbedding } = require('../src/cache/embeddings');
|
|
16
|
+
const { getKnnRouter } = require('../src/routing/knn-router');
|
|
17
|
+
|
|
18
|
+
const DEFAULT_DAYS = 30;
|
|
19
|
+
const TELEMETRY_DB_CANDIDATES = [
|
|
20
|
+
path.join(__dirname, '../.lynkr/telemetry.db'),
|
|
21
|
+
path.join(__dirname, '../data/lynkr.db'),
|
|
22
|
+
];
|
|
23
|
+
|
|
24
|
+
function _findDb() {
|
|
25
|
+
for (const p of TELEMETRY_DB_CANDIDATES) if (fs.existsSync(p)) return p;
|
|
26
|
+
return null;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function _parseArgs(argv) {
|
|
30
|
+
const out = { days: DEFAULT_DAYS, bootstrap: null };
|
|
31
|
+
for (let i = 0; i < argv.length; i++) {
|
|
32
|
+
if (argv[i] === '--days') out.days = Number(argv[++i]) || DEFAULT_DAYS;
|
|
33
|
+
else if (argv[i] === '--bootstrap') out.bootstrap = argv[++i];
|
|
34
|
+
}
|
|
35
|
+
return out;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
async function _readTelemetry(days) {
|
|
39
|
+
const dbPath = _findDb();
|
|
40
|
+
if (!dbPath) return [];
|
|
41
|
+
let Database;
|
|
42
|
+
try {
|
|
43
|
+
Database = require('better-sqlite3');
|
|
44
|
+
} catch {
|
|
45
|
+
console.error('better-sqlite3 not installed');
|
|
46
|
+
return [];
|
|
47
|
+
}
|
|
48
|
+
const db = new Database(dbPath, { readonly: true, fileMustExist: true });
|
|
49
|
+
try {
|
|
50
|
+
const since = Date.now() - days * 24 * 3600 * 1000;
|
|
51
|
+
return db
|
|
52
|
+
.prepare(
|
|
53
|
+
`SELECT request_text AS query, provider, model, quality_score AS quality,
|
|
54
|
+
cost, total_latency_ms AS latency, tier
|
|
55
|
+
FROM routing_telemetry
|
|
56
|
+
WHERE timestamp >= ?
|
|
57
|
+
AND quality_score IS NOT NULL
|
|
58
|
+
AND request_text IS NOT NULL
|
|
59
|
+
AND request_text != ''`
|
|
60
|
+
)
|
|
61
|
+
.all(since);
|
|
62
|
+
} catch (err) {
|
|
63
|
+
console.error(`Telemetry query failed: ${err.message}`);
|
|
64
|
+
return [];
|
|
65
|
+
} finally {
|
|
66
|
+
try { db.close(); } catch {}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
async function _readBootstrap(filePath) {
|
|
71
|
+
if (!filePath || !fs.existsSync(filePath)) return [];
|
|
72
|
+
const lines = fs.readFileSync(filePath, 'utf8').split('\n').filter(Boolean);
|
|
73
|
+
const out = [];
|
|
74
|
+
for (const line of lines) {
|
|
75
|
+
try {
|
|
76
|
+
out.push(JSON.parse(line));
|
|
77
|
+
} catch {
|
|
78
|
+
// skip malformed
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return out;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
async function build({ days = DEFAULT_DAYS, bootstrap = null } = {}) {
|
|
85
|
+
const router = getKnnRouter();
|
|
86
|
+
if (!router.ready) {
|
|
87
|
+
console.error('Router index not ready (hnswlib-node may be missing). Aborting.');
|
|
88
|
+
process.exit(2);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const teleRows = await _readTelemetry(days);
|
|
92
|
+
const bootRows = await _readBootstrap(bootstrap);
|
|
93
|
+
const all = [...bootRows, ...teleRows];
|
|
94
|
+
console.log(`Building index from ${bootRows.length} bootstrap + ${teleRows.length} telemetry rows`);
|
|
95
|
+
|
|
96
|
+
let added = 0;
|
|
97
|
+
let failed = 0;
|
|
98
|
+
for (const row of all) {
|
|
99
|
+
const text = row.query || row.request_text;
|
|
100
|
+
if (!text) continue;
|
|
101
|
+
try {
|
|
102
|
+
const emb = await generateEmbedding(text);
|
|
103
|
+
router.add(emb, {
|
|
104
|
+
provider: row.provider,
|
|
105
|
+
model: row.model,
|
|
106
|
+
quality: row.quality,
|
|
107
|
+
cost: row.cost,
|
|
108
|
+
latency: row.latency,
|
|
109
|
+
tier: row.tier,
|
|
110
|
+
});
|
|
111
|
+
added++;
|
|
112
|
+
if (added % 100 === 0) console.log(` ${added} indexed...`);
|
|
113
|
+
} catch (err) {
|
|
114
|
+
failed++;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
router.save();
|
|
119
|
+
console.log(`Indexed ${added}, failed ${failed}. Index size: ${router.size}`);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (require.main === module) {
|
|
123
|
+
const opts = _parseArgs(process.argv.slice(2));
|
|
124
|
+
build(opts).catch((err) => {
|
|
125
|
+
console.error(err.stack || err.message);
|
|
126
|
+
process.exit(1);
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
module.exports = { build };
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Calibrate tier thresholds from telemetry.
|
|
4
|
+
*
|
|
5
|
+
* Phase 1.4 of the routing overhaul. Reads quality_score history from the
|
|
6
|
+
* routing_telemetry table, finds where each tier's median quality drops below
|
|
7
|
+
* acceptable, and writes adjusted [lo, hi] ranges to
|
|
8
|
+
* data/calibrated-thresholds.json. ModelTierSelector picks the file up on
|
|
9
|
+
* next start.
|
|
10
|
+
*
|
|
11
|
+
* Usage: node scripts/calibrate-thresholds.js [--days N] [--dry-run]
|
|
12
|
+
* npx lynkr calibrate
|
|
13
|
+
*
|
|
14
|
+
* Behavior when telemetry is sparse (<100 rows with quality_score):
|
|
15
|
+
* - No file is written and existing calibration is left alone.
|
|
16
|
+
* - Exits 0 with a "skipped" message.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
const fs = require('fs');
|
|
20
|
+
const path = require('path');
|
|
21
|
+
|
|
22
|
+
const DEFAULT_DAYS = 7;
|
|
23
|
+
const MIN_SAMPLES = 100;
|
|
24
|
+
/** Quality score below which a complexity bucket is "underperforming" for its tier. */
|
|
25
|
+
const QUALITY_FLOOR = {
|
|
26
|
+
SIMPLE: 55,
|
|
27
|
+
MEDIUM: 60,
|
|
28
|
+
COMPLEX: 65,
|
|
29
|
+
REASONING: 70,
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
const OUTPUT_PATH = path.join(__dirname, '../data/calibrated-thresholds.json');
|
|
33
|
+
const TELEMETRY_DB_CANDIDATES = [
|
|
34
|
+
path.join(__dirname, '../.lynkr/telemetry.db'),
|
|
35
|
+
path.join(__dirname, '../data/lynkr.db'),
|
|
36
|
+
];
|
|
37
|
+
|
|
38
|
+
function _findDb() {
|
|
39
|
+
for (const p of TELEMETRY_DB_CANDIDATES) {
|
|
40
|
+
if (fs.existsSync(p)) return p;
|
|
41
|
+
}
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function _parseArgs(argv) {
|
|
46
|
+
const out = { days: DEFAULT_DAYS, dryRun: false };
|
|
47
|
+
for (let i = 0; i < argv.length; i++) {
|
|
48
|
+
const a = argv[i];
|
|
49
|
+
if (a === '--days') out.days = Number(argv[++i]) || DEFAULT_DAYS;
|
|
50
|
+
else if (a === '--dry-run') out.dryRun = true;
|
|
51
|
+
}
|
|
52
|
+
return out;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const DEFAULT_RANGES = {
|
|
56
|
+
SIMPLE: [0, 25],
|
|
57
|
+
MEDIUM: [26, 50],
|
|
58
|
+
COMPLEX: [51, 75],
|
|
59
|
+
REASONING: [76, 100],
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
function _openDb(dbPath) {
|
|
63
|
+
let Database;
|
|
64
|
+
try {
|
|
65
|
+
Database = require('better-sqlite3');
|
|
66
|
+
} catch (err) {
|
|
67
|
+
console.error('better-sqlite3 not installed. Install with: npm install --save-optional better-sqlite3');
|
|
68
|
+
process.exit(2);
|
|
69
|
+
}
|
|
70
|
+
return new Database(dbPath, { readonly: true, fileMustExist: true });
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function calibrate({ days = DEFAULT_DAYS, dryRun = false } = {}) {
|
|
74
|
+
const dbPath = _findDb();
|
|
75
|
+
if (!dbPath) {
|
|
76
|
+
console.log('No telemetry DB found — skipping calibration.');
|
|
77
|
+
return { skipped: true, reason: 'no_db' };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
let db;
|
|
81
|
+
try {
|
|
82
|
+
db = _openDb(dbPath);
|
|
83
|
+
} catch (err) {
|
|
84
|
+
console.error(`Failed to open telemetry DB: ${err.message}`);
|
|
85
|
+
return { skipped: true, reason: 'db_open_failed', error: err.message };
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const since = Date.now() - days * 24 * 3600 * 1000;
|
|
89
|
+
let rows;
|
|
90
|
+
try {
|
|
91
|
+
rows = db
|
|
92
|
+
.prepare(
|
|
93
|
+
`SELECT tier, complexity_score AS score, quality_score AS q
|
|
94
|
+
FROM routing_telemetry
|
|
95
|
+
WHERE timestamp >= ?
|
|
96
|
+
AND quality_score IS NOT NULL
|
|
97
|
+
AND complexity_score IS NOT NULL
|
|
98
|
+
AND tier IS NOT NULL`
|
|
99
|
+
)
|
|
100
|
+
.all(since);
|
|
101
|
+
} catch (err) {
|
|
102
|
+
console.error(`Telemetry query failed (DB may be corrupt or schema missing): ${err.message}`);
|
|
103
|
+
return { skipped: true, reason: 'query_failed', error: err.message };
|
|
104
|
+
} finally {
|
|
105
|
+
try { db.close(); } catch {}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (!rows || rows.length < MIN_SAMPLES) {
|
|
109
|
+
console.log(`Only ${rows ? rows.length : 0} rows with quality_score in last ${days}d (need ≥${MIN_SAMPLES}). Skipping.`);
|
|
110
|
+
return { skipped: true, reason: 'insufficient_samples', count: rows ? rows.length : 0 };
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Bucket by score (0-100 in width-5 buckets) per tier, compute median quality.
|
|
114
|
+
const buckets = new Map(); // tier -> Map<bucketLowerBound, q-values[]>
|
|
115
|
+
for (const row of rows) {
|
|
116
|
+
const s = Math.max(0, Math.min(100, Math.floor(row.score)));
|
|
117
|
+
const bucket = Math.floor(s / 5) * 5;
|
|
118
|
+
if (!buckets.has(row.tier)) buckets.set(row.tier, new Map());
|
|
119
|
+
const b = buckets.get(row.tier);
|
|
120
|
+
if (!b.has(bucket)) b.set(bucket, []);
|
|
121
|
+
b.get(bucket).push(row.q);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const _median = (arr) => {
|
|
125
|
+
const s = arr.slice().sort((a, b) => a - b);
|
|
126
|
+
const m = Math.floor(s.length / 2);
|
|
127
|
+
return s.length % 2 ? s[m] : (s[m - 1] + s[m]) / 2;
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
// Default ranges; will adjust per-tier upper bound if late buckets show poor quality.
|
|
131
|
+
const ranges = { ...DEFAULT_RANGES };
|
|
132
|
+
const tierOrder = ['SIMPLE', 'MEDIUM', 'COMPLEX', 'REASONING'];
|
|
133
|
+
const stats = {};
|
|
134
|
+
|
|
135
|
+
for (const tier of tierOrder) {
|
|
136
|
+
const floor = QUALITY_FLOOR[tier];
|
|
137
|
+
const tierBuckets = buckets.get(tier);
|
|
138
|
+
if (!tierBuckets) {
|
|
139
|
+
stats[tier] = { samples: 0, adjusted: false };
|
|
140
|
+
continue;
|
|
141
|
+
}
|
|
142
|
+
const ordered = Array.from(tierBuckets.entries()).sort((a, b) => a[0] - b[0]);
|
|
143
|
+
let suggestedUpper = DEFAULT_RANGES[tier][1];
|
|
144
|
+
const buckets_summary = [];
|
|
145
|
+
for (const [lo, vals] of ordered) {
|
|
146
|
+
if (vals.length < 5) {
|
|
147
|
+
buckets_summary.push({ bucket: lo, samples: vals.length, median: null });
|
|
148
|
+
continue;
|
|
149
|
+
}
|
|
150
|
+
const med = _median(vals);
|
|
151
|
+
buckets_summary.push({ bucket: lo, samples: vals.length, median: med });
|
|
152
|
+
if (med < floor && lo + 4 < suggestedUpper) {
|
|
153
|
+
suggestedUpper = lo + 4; // shrink tier upper bound just below the failing bucket
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
if (suggestedUpper !== DEFAULT_RANGES[tier][1]) {
|
|
157
|
+
ranges[tier] = [DEFAULT_RANGES[tier][0], suggestedUpper];
|
|
158
|
+
stats[tier] = { samples: ordered.reduce((s, [, v]) => s + v.length, 0), adjusted: true, buckets: buckets_summary };
|
|
159
|
+
} else {
|
|
160
|
+
stats[tier] = { samples: ordered.reduce((s, [, v]) => s + v.length, 0), adjusted: false, buckets: buckets_summary };
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Re-stitch ranges so they don't overlap or leave gaps.
|
|
165
|
+
for (let i = 1; i < tierOrder.length; i++) {
|
|
166
|
+
const prev = ranges[tierOrder[i - 1]];
|
|
167
|
+
const cur = ranges[tierOrder[i]];
|
|
168
|
+
if (cur[0] !== prev[1] + 1) cur[0] = prev[1] + 1;
|
|
169
|
+
if (cur[0] > cur[1]) cur[1] = cur[0]; // collapsed; tier disabled in practice
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
const out = {
|
|
173
|
+
calibratedAt: new Date().toISOString(),
|
|
174
|
+
days,
|
|
175
|
+
sampleCount: rows.length,
|
|
176
|
+
ranges,
|
|
177
|
+
stats,
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
if (dryRun) {
|
|
181
|
+
console.log(JSON.stringify(out, null, 2));
|
|
182
|
+
return { ...out, dryRun: true };
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
fs.mkdirSync(path.dirname(OUTPUT_PATH), { recursive: true });
|
|
186
|
+
fs.writeFileSync(OUTPUT_PATH, JSON.stringify(out, null, 2));
|
|
187
|
+
console.log(`Wrote ${OUTPUT_PATH}`);
|
|
188
|
+
console.log(`Ranges: ${tierOrder.map((t) => `${t}=${ranges[t].join('-')}`).join(', ')}`);
|
|
189
|
+
return out;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
if (require.main === module) {
|
|
193
|
+
const opts = _parseArgs(process.argv.slice(2));
|
|
194
|
+
calibrate(opts);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
module.exports = { calibrate };
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Compare active vs shadow routing policies (Phase 4.4).
|
|
4
|
+
*
|
|
5
|
+
* Reads data/shadow-decisions.jsonl and reports agreement rate and the
|
|
6
|
+
* disagreement breakdown by (active model → shadow model).
|
|
7
|
+
*
|
|
8
|
+
* Run weekly: node scripts/compare-policies.js [--days 7]
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
const fs = require('fs');
|
|
12
|
+
const path = require('path');
|
|
13
|
+
const { LOG_PATH } = require('../src/routing/shadow-mode');
|
|
14
|
+
|
|
15
|
+
function _parseArgs(argv) {
|
|
16
|
+
let days = 7;
|
|
17
|
+
for (let i = 0; i < argv.length; i++) {
|
|
18
|
+
if (argv[i] === '--days') days = Number(argv[++i]) || 7;
|
|
19
|
+
}
|
|
20
|
+
return { days };
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function main() {
|
|
24
|
+
const { days } = _parseArgs(process.argv.slice(2));
|
|
25
|
+
if (!fs.existsSync(LOG_PATH)) {
|
|
26
|
+
console.log('No shadow decisions logged yet.');
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
const since = Date.now() - days * 24 * 3600 * 1000;
|
|
30
|
+
const lines = fs.readFileSync(LOG_PATH, 'utf8').split('\n').filter(Boolean);
|
|
31
|
+
|
|
32
|
+
let total = 0;
|
|
33
|
+
let agree = 0;
|
|
34
|
+
const disagreement = new Map(); // "active → shadow" -> count
|
|
35
|
+
for (const line of lines) {
|
|
36
|
+
let entry;
|
|
37
|
+
try {
|
|
38
|
+
entry = JSON.parse(line);
|
|
39
|
+
} catch {
|
|
40
|
+
continue;
|
|
41
|
+
}
|
|
42
|
+
if (entry.timestamp < since) continue;
|
|
43
|
+
total++;
|
|
44
|
+
if (entry.agree) {
|
|
45
|
+
agree++;
|
|
46
|
+
} else if (entry.shadow) {
|
|
47
|
+
const key = `${entry.active.provider}:${entry.active.model} → ${entry.shadow.provider}:${entry.shadow.model}`;
|
|
48
|
+
disagreement.set(key, (disagreement.get(key) || 0) + 1);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (total === 0) {
|
|
53
|
+
console.log(`No decisions in last ${days} days.`);
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
console.log(`Last ${days}d: ${total} decisions, ${(agree / total * 100).toFixed(1)}% agreement`);
|
|
58
|
+
if (disagreement.size > 0) {
|
|
59
|
+
console.log('\nTop disagreements:');
|
|
60
|
+
const sorted = Array.from(disagreement.entries()).sort((a, b) => b[1] - a[1]).slice(0, 10);
|
|
61
|
+
for (const [k, c] of sorted) {
|
|
62
|
+
console.log(` ${c}× ${k}`);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
main();
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Learn per-task-type output-token ratios from telemetry.
|
|
4
|
+
*
|
|
5
|
+
* Phase 2.3 of the routing overhaul. The cost-optimizer's default assumption
|
|
6
|
+
* of `output = 0.5 × input` is wrong for code generation (typically 1.5-3×)
|
|
7
|
+
* and summarization (typically 0.1-0.2×). This script builds an empirical
|
|
8
|
+
* ratio table from past completions, written to data/output-ratios.json.
|
|
9
|
+
*
|
|
10
|
+
* The cost-optimizer reads this file when estimating cost during routing.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const fs = require('fs');
|
|
14
|
+
const path = require('path');
|
|
15
|
+
|
|
16
|
+
const DEFAULT_DAYS = 30;
|
|
17
|
+
const MIN_SAMPLES_PER_TASK = 30;
|
|
18
|
+
const OUTPUT_PATH = path.join(__dirname, '../data/output-ratios.json');
|
|
19
|
+
const TELEMETRY_DB_CANDIDATES = [
|
|
20
|
+
path.join(__dirname, '../.lynkr/telemetry.db'),
|
|
21
|
+
path.join(__dirname, '../data/lynkr.db'),
|
|
22
|
+
];
|
|
23
|
+
|
|
24
|
+
// Fallback ratios when no telemetry exists.
|
|
25
|
+
// Derived from public benchmark data (RouterBench task distribution).
|
|
26
|
+
const FALLBACK_RATIOS = {
|
|
27
|
+
simple_qa: 0.30,
|
|
28
|
+
code_gen: 2.10,
|
|
29
|
+
code_edit: 1.40,
|
|
30
|
+
summarization: 0.15,
|
|
31
|
+
reasoning: 1.50,
|
|
32
|
+
tool_use: 0.80,
|
|
33
|
+
default: 0.50,
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
function _findDb() {
|
|
37
|
+
for (const p of TELEMETRY_DB_CANDIDATES) if (fs.existsSync(p)) return p;
|
|
38
|
+
return null;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function _openDb(dbPath) {
|
|
42
|
+
let Database;
|
|
43
|
+
try {
|
|
44
|
+
Database = require('better-sqlite3');
|
|
45
|
+
} catch {
|
|
46
|
+
console.error('better-sqlite3 not installed. Install with: npm install --save-optional better-sqlite3');
|
|
47
|
+
process.exit(2);
|
|
48
|
+
}
|
|
49
|
+
return new Database(dbPath, { readonly: true, fileMustExist: true });
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function _median(arr) {
|
|
53
|
+
const s = arr.slice().sort((a, b) => a - b);
|
|
54
|
+
const m = Math.floor(s.length / 2);
|
|
55
|
+
return s.length % 2 ? s[m] : (s[m - 1] + s[m]) / 2;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function _parseArgs(argv) {
|
|
59
|
+
const out = { days: DEFAULT_DAYS, dryRun: false };
|
|
60
|
+
for (let i = 0; i < argv.length; i++) {
|
|
61
|
+
if (argv[i] === '--days') out.days = Number(argv[++i]) || DEFAULT_DAYS;
|
|
62
|
+
else if (argv[i] === '--dry-run') out.dryRun = true;
|
|
63
|
+
}
|
|
64
|
+
return out;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function learn({ days = DEFAULT_DAYS, dryRun = false } = {}) {
|
|
68
|
+
const dbPath = _findDb();
|
|
69
|
+
if (!dbPath) {
|
|
70
|
+
console.log('No telemetry DB — writing fallback ratios.');
|
|
71
|
+
if (!dryRun) {
|
|
72
|
+
fs.mkdirSync(path.dirname(OUTPUT_PATH), { recursive: true });
|
|
73
|
+
fs.writeFileSync(OUTPUT_PATH, JSON.stringify({
|
|
74
|
+
learnedAt: new Date().toISOString(),
|
|
75
|
+
source: 'fallback',
|
|
76
|
+
ratios: FALLBACK_RATIOS,
|
|
77
|
+
}, null, 2));
|
|
78
|
+
}
|
|
79
|
+
return { source: 'fallback', ratios: FALLBACK_RATIOS };
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
let db;
|
|
83
|
+
try {
|
|
84
|
+
db = _openDb(dbPath);
|
|
85
|
+
} catch (err) {
|
|
86
|
+
console.error(`Failed to open telemetry DB: ${err.message}. Writing fallback ratios.`);
|
|
87
|
+
if (!dryRun) {
|
|
88
|
+
fs.mkdirSync(path.dirname(OUTPUT_PATH), { recursive: true });
|
|
89
|
+
fs.writeFileSync(OUTPUT_PATH, JSON.stringify({
|
|
90
|
+
learnedAt: new Date().toISOString(),
|
|
91
|
+
source: 'fallback',
|
|
92
|
+
ratios: FALLBACK_RATIOS,
|
|
93
|
+
}, null, 2));
|
|
94
|
+
}
|
|
95
|
+
return { source: 'fallback', ratios: FALLBACK_RATIOS };
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const since = Date.now() - days * 24 * 3600 * 1000;
|
|
99
|
+
let rows;
|
|
100
|
+
try {
|
|
101
|
+
rows = db
|
|
102
|
+
.prepare(
|
|
103
|
+
`SELECT task_type, input_tokens AS i, output_tokens AS o
|
|
104
|
+
FROM routing_telemetry
|
|
105
|
+
WHERE timestamp >= ?
|
|
106
|
+
AND input_tokens > 0
|
|
107
|
+
AND output_tokens > 0
|
|
108
|
+
AND task_type IS NOT NULL`
|
|
109
|
+
)
|
|
110
|
+
.all(since);
|
|
111
|
+
} catch (err) {
|
|
112
|
+
console.error(`Query failed: ${err.message}. Writing fallback.`);
|
|
113
|
+
rows = [];
|
|
114
|
+
} finally {
|
|
115
|
+
try { db.close(); } catch {}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Bucket by task type
|
|
119
|
+
const buckets = new Map();
|
|
120
|
+
for (const row of rows) {
|
|
121
|
+
const key = String(row.task_type || 'default').toLowerCase();
|
|
122
|
+
if (!buckets.has(key)) buckets.set(key, []);
|
|
123
|
+
buckets.get(key).push(row.o / row.i);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const ratios = { ...FALLBACK_RATIOS };
|
|
127
|
+
const stats = {};
|
|
128
|
+
for (const [task, vals] of buckets) {
|
|
129
|
+
if (vals.length >= MIN_SAMPLES_PER_TASK) {
|
|
130
|
+
ratios[task] = +_median(vals).toFixed(3);
|
|
131
|
+
stats[task] = { samples: vals.length, median: ratios[task] };
|
|
132
|
+
} else {
|
|
133
|
+
stats[task] = { samples: vals.length, median: null, used_fallback: true };
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
const out = {
|
|
138
|
+
learnedAt: new Date().toISOString(),
|
|
139
|
+
days,
|
|
140
|
+
source: rows.length > 0 ? 'telemetry' : 'fallback',
|
|
141
|
+
sampleCount: rows.length,
|
|
142
|
+
ratios,
|
|
143
|
+
stats,
|
|
144
|
+
};
|
|
145
|
+
|
|
146
|
+
if (dryRun) {
|
|
147
|
+
console.log(JSON.stringify(out, null, 2));
|
|
148
|
+
return out;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
fs.mkdirSync(path.dirname(OUTPUT_PATH), { recursive: true });
|
|
152
|
+
fs.writeFileSync(OUTPUT_PATH, JSON.stringify(out, null, 2));
|
|
153
|
+
console.log(`Wrote ${OUTPUT_PATH} (source=${out.source}, samples=${out.sampleCount})`);
|
|
154
|
+
return out;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
if (require.main === module) {
|
|
158
|
+
const opts = _parseArgs(process.argv.slice(2));
|
|
159
|
+
learn(opts);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
module.exports = { learn, FALLBACK_RATIOS };
|