lynkr 9.1.2 → 9.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -10
- package/package.json +3 -1
- package/scripts/build-knn-index.js +130 -0
- package/scripts/calibrate-thresholds.js +197 -0
- package/scripts/compare-policies.js +67 -0
- package/scripts/learn-output-ratios.js +162 -0
- package/scripts/refresh-pricing.js +122 -0
- package/scripts/run-routerarena.js +26 -0
- package/scripts/sample-regret.js +84 -0
- package/scripts/train-risk-classifier.js +191 -0
- package/src/api/middleware/budget-enforcer.js +60 -0
- package/src/api/middleware/load-shedding.js +11 -1
- package/src/api/middleware/tenant.js +21 -0
- package/src/api/router.js +19 -40
- package/src/budget/hierarchical-budget.js +159 -0
- package/src/cache/semantic.js +28 -2
- package/src/clients/databricks.js +59 -5
- package/src/config/index.js +239 -43
- package/src/context/toon.js +5 -4
- package/src/orchestrator/index.js +44 -6
- package/src/prompts/system.js +34 -6
- package/src/routing/bandit.js +246 -0
- package/src/routing/cascade.js +106 -0
- package/src/routing/complexity-analyzer.js +7 -15
- package/src/routing/confidence-scorer.js +121 -0
- package/src/routing/context-validator.js +71 -0
- package/src/routing/cost-optimizer.js +5 -2
- package/src/routing/deadline.js +52 -0
- package/src/routing/drift-monitor.js +113 -0
- package/src/routing/embedding-cache.js +77 -0
- package/src/routing/index.js +314 -5
- package/src/routing/knn-router.js +206 -0
- package/src/routing/latency-tracker.js +113 -71
- package/src/routing/model-tiers.js +156 -6
- package/src/routing/output-ratios.js +57 -0
- package/src/routing/regret-estimator.js +91 -0
- package/src/routing/reward-pipeline.js +62 -0
- package/src/routing/risk-classifier.js +130 -0
- package/src/routing/shadow-mode.js +77 -0
- package/src/routing/tenant-policy.js +96 -0
- package/src/routing/tokenizer.js +162 -0
- package/src/server.js +9 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Refresh model pricing data.
|
|
4
|
+
*
|
|
5
|
+
* Phase 2.2 of the routing overhaul. Cron-friendly entrypoint that forces a
|
|
6
|
+
* fresh pull of LiteLLM + models.dev pricing, compares to the last cached
|
|
7
|
+
* snapshot, and logs anything that moved more than 5%.
|
|
8
|
+
*
|
|
9
|
+
* Usage: node scripts/refresh-pricing.js [--diff-only] [--threshold 0.05]
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const fs = require('fs');
|
|
13
|
+
const path = require('path');
|
|
14
|
+
|
|
15
|
+
const CACHE_FILE = path.join(__dirname, '../data/model-prices-cache.json');
|
|
16
|
+
const PREV_FILE = path.join(__dirname, '../data/model-prices-cache.prev.json');
|
|
17
|
+
const DEFAULT_THRESHOLD = 0.05;
|
|
18
|
+
|
|
19
|
+
function _parseArgs(argv) {
|
|
20
|
+
const out = { diffOnly: false, threshold: DEFAULT_THRESHOLD };
|
|
21
|
+
for (let i = 0; i < argv.length; i++) {
|
|
22
|
+
if (argv[i] === '--diff-only') out.diffOnly = true;
|
|
23
|
+
else if (argv[i] === '--threshold') out.threshold = Number(argv[++i]) || DEFAULT_THRESHOLD;
|
|
24
|
+
}
|
|
25
|
+
return out;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function _readJson(p) {
|
|
29
|
+
try {
|
|
30
|
+
if (!fs.existsSync(p)) return null;
|
|
31
|
+
return JSON.parse(fs.readFileSync(p, 'utf8'));
|
|
32
|
+
} catch {
|
|
33
|
+
return null;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function _diff(prev, next, threshold) {
|
|
38
|
+
if (!prev || !next) return [];
|
|
39
|
+
const prevModels = prev.modelIndex || prev;
|
|
40
|
+
const nextModels = next.modelIndex || next;
|
|
41
|
+
const moves = [];
|
|
42
|
+
for (const [modelId, oldCost] of Object.entries(prevModels)) {
|
|
43
|
+
const newCost = nextModels[modelId];
|
|
44
|
+
if (!newCost) {
|
|
45
|
+
moves.push({ model: modelId, type: 'removed', oldCost });
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
const oldTotal = (oldCost.input || 0) + (oldCost.output || 0);
|
|
49
|
+
const newTotal = (newCost.input || 0) + (newCost.output || 0);
|
|
50
|
+
if (oldTotal === 0) continue;
|
|
51
|
+
const delta = (newTotal - oldTotal) / oldTotal;
|
|
52
|
+
if (Math.abs(delta) >= threshold) {
|
|
53
|
+
moves.push({
|
|
54
|
+
model: modelId,
|
|
55
|
+
type: delta > 0 ? 'increased' : 'decreased',
|
|
56
|
+
oldInput: oldCost.input,
|
|
57
|
+
newInput: newCost.input,
|
|
58
|
+
oldOutput: oldCost.output,
|
|
59
|
+
newOutput: newCost.output,
|
|
60
|
+
deltaPct: (delta * 100).toFixed(2) + '%',
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
for (const modelId of Object.keys(nextModels)) {
|
|
65
|
+
if (!prevModels[modelId]) {
|
|
66
|
+
moves.push({ model: modelId, type: 'added', newCost: nextModels[modelId] });
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return moves;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async function refresh({ diffOnly = false, threshold = DEFAULT_THRESHOLD } = {}) {
|
|
73
|
+
if (!diffOnly) {
|
|
74
|
+
// Snapshot current cache as "previous" before fetching
|
|
75
|
+
if (fs.existsSync(CACHE_FILE)) {
|
|
76
|
+
try {
|
|
77
|
+
fs.copyFileSync(CACHE_FILE, PREV_FILE);
|
|
78
|
+
} catch (err) {
|
|
79
|
+
console.error(`Failed to snapshot previous cache: ${err.message}`);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const { getModelRegistry } = require('../src/routing/model-registry');
|
|
84
|
+
const registry = await getModelRegistry();
|
|
85
|
+
// Force a refresh
|
|
86
|
+
if (typeof registry._fetchAll === 'function') {
|
|
87
|
+
await registry._fetchAll();
|
|
88
|
+
}
|
|
89
|
+
console.log(`Refreshed pricing data (cache: ${CACHE_FILE})`);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const prev = _readJson(PREV_FILE);
|
|
93
|
+
const next = _readJson(CACHE_FILE);
|
|
94
|
+
const moves = _diff(prev, next, threshold);
|
|
95
|
+
|
|
96
|
+
if (moves.length === 0) {
|
|
97
|
+
console.log(`No pricing changes ≥${(threshold * 100).toFixed(1)}%.`);
|
|
98
|
+
return { moves: [] };
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
console.log(`${moves.length} pricing change(s) ≥${(threshold * 100).toFixed(1)}%:`);
|
|
102
|
+
for (const move of moves) {
|
|
103
|
+
if (move.type === 'added') {
|
|
104
|
+
console.log(` + ${move.model}: input=${move.newCost.input}, output=${move.newCost.output}`);
|
|
105
|
+
} else if (move.type === 'removed') {
|
|
106
|
+
console.log(` - ${move.model}: was input=${move.oldCost.input}, output=${move.oldCost.output}`);
|
|
107
|
+
} else {
|
|
108
|
+
console.log(` ${move.type === 'increased' ? '↑' : '↓'} ${move.model}: ${move.oldInput}/${move.oldOutput} → ${move.newInput}/${move.newOutput} (${move.deltaPct})`);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return { moves };
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
if (require.main === module) {
|
|
115
|
+
const opts = _parseArgs(process.argv.slice(2));
|
|
116
|
+
refresh(opts).catch((err) => {
|
|
117
|
+
console.error(err.message);
|
|
118
|
+
process.exit(1);
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
module.exports = { refresh };
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* RouterArena evaluation harness (Phase 6.4 — STUB).
|
|
4
|
+
*
|
|
5
|
+
* This is intentionally not wired to CI yet. The plan defers RouterArena
|
|
6
|
+
* integration until after Phases 1-4 have produced 2-4 weeks of telemetry
|
|
7
|
+
* to baseline against.
|
|
8
|
+
*
|
|
9
|
+
* To wire it up:
|
|
10
|
+
* 1. Clone https://github.com/RouteWorks/RouterArena into ./routerarena/
|
|
11
|
+
* 2. Install RouterArena's Python dependencies (transformers, datasets,
|
|
12
|
+
* anthropic, openai)
|
|
13
|
+
* 3. Decide on a subset size for PR-blocking CI (recommend 100-200 queries
|
|
14
|
+
* sampled stratified by difficulty); leave the full benchmark for nightly
|
|
15
|
+
* 4. Wire to GitHub Actions with `paths: [src/routing/**]` trigger
|
|
16
|
+
* 5. Compare PR's router decisions vs main's router on the same query set,
|
|
17
|
+
* report cost/quality delta as a PR comment
|
|
18
|
+
*
|
|
19
|
+
* The intent is to use RouterArena to *catch regressions*, not to gate
|
|
20
|
+
* routing changes on absolute benchmark scores.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
console.log('RouterArena integration is a stub.');
|
|
24
|
+
console.log('See scripts/run-routerarena.js for setup steps.');
|
|
25
|
+
console.log('Phase 6.4 of docs/routing-improvement-plan.md.');
|
|
26
|
+
process.exit(0);
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Sample yesterday's traffic for regret estimation (Phase 4.2).
|
|
4
|
+
*
|
|
5
|
+
* Reads 0.5% of yesterday's requests from telemetry, re-runs them through
|
|
6
|
+
* Opus, and writes alerts if the routed model consistently underperforms.
|
|
7
|
+
*
|
|
8
|
+
* Costs real money — only runs when LYNKR_REGRET_ESTIMATOR=true.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
const path = require('path');
|
|
12
|
+
const fs = require('fs');
|
|
13
|
+
const { estimate, isEnabled } = require('../src/routing/regret-estimator');
|
|
14
|
+
|
|
15
|
+
const SAMPLE_RATE = 0.005;
|
|
16
|
+
|
|
17
|
+
async function main() {
|
|
18
|
+
if (!isEnabled()) {
|
|
19
|
+
console.log('LYNKR_REGRET_ESTIMATOR not set; skipping.');
|
|
20
|
+
return;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
let Database;
|
|
24
|
+
try {
|
|
25
|
+
Database = require('better-sqlite3');
|
|
26
|
+
} catch {
|
|
27
|
+
console.error('better-sqlite3 not installed');
|
|
28
|
+
process.exit(2);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const dbPath = path.join(__dirname, '../.lynkr/telemetry.db');
|
|
32
|
+
if (!fs.existsSync(dbPath)) {
|
|
33
|
+
console.log('No telemetry DB; skipping.');
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const db = new Database(dbPath, { readonly: true });
|
|
38
|
+
const yesterday = Date.now() - 24 * 3600 * 1000;
|
|
39
|
+
const rows = db.prepare(
|
|
40
|
+
`SELECT request_text, response_text, model, quality_score
|
|
41
|
+
FROM routing_telemetry
|
|
42
|
+
WHERE timestamp >= ?
|
|
43
|
+
AND quality_score IS NOT NULL
|
|
44
|
+
AND request_text IS NOT NULL`
|
|
45
|
+
).all(yesterday);
|
|
46
|
+
db.close();
|
|
47
|
+
|
|
48
|
+
if (rows.length === 0) {
|
|
49
|
+
console.log('No eligible rows yesterday.');
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const sampleSize = Math.max(5, Math.floor(rows.length * SAMPLE_RATE));
|
|
54
|
+
const sampled = [];
|
|
55
|
+
while (sampled.length < sampleSize && rows.length > 0) {
|
|
56
|
+
const idx = Math.floor(Math.random() * rows.length);
|
|
57
|
+
sampled.push(rows.splice(idx, 1)[0]);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
console.log(`Sampling ${sampled.length} rows for regret estimation`);
|
|
61
|
+
|
|
62
|
+
// Caller must wire an actual Opus invocation; default to a no-op for safety.
|
|
63
|
+
const runOpus = async (req) => {
|
|
64
|
+
console.warn('No opus runner wired — implement runOpus in scripts/sample-regret.js or override via LYNKR_REGRET_OPUS_RUNNER');
|
|
65
|
+
return { response: null, quality: 0 };
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
const samples = sampled.map(r => ({
|
|
69
|
+
request: { messages: [{ role: 'user', content: r.request_text }] },
|
|
70
|
+
response: r.response_text,
|
|
71
|
+
model: r.model,
|
|
72
|
+
quality: r.quality_score,
|
|
73
|
+
}));
|
|
74
|
+
|
|
75
|
+
const result = await estimate({ samples, runOpus });
|
|
76
|
+
console.log(`Regret: ${result.regret.toFixed(3)} over ${result.sampledCount} samples; ${result.alerts.length} alert(s) written.`);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (require.main === module) {
|
|
80
|
+
main().catch(err => {
|
|
81
|
+
console.error(err.stack || err.message);
|
|
82
|
+
process.exit(1);
|
|
83
|
+
});
|
|
84
|
+
}
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Train the risk classifier (Phase 3.4).
|
|
4
|
+
*
|
|
5
|
+
* Two label sources, fused:
|
|
6
|
+
* 1. Bootstrap: run the existing regex risk-analyzer over recent telemetry
|
|
7
|
+
* to produce weak labels.
|
|
8
|
+
* 2. Confirmed: requests with x-lynkr-risk-confirmed:true header logged in
|
|
9
|
+
* telemetry are treated as strong positive labels.
|
|
10
|
+
*
|
|
11
|
+
* Writes data/risk-classifier.json (weights + bias). Logistic regression
|
|
12
|
+
* trained with simple SGD over TF features (unigrams + bigrams).
|
|
13
|
+
*
|
|
14
|
+
* Usage: node scripts/train-risk-classifier.js [--days 30] [--epochs 10]
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
const fs = require('fs');
|
|
18
|
+
const path = require('path');
|
|
19
|
+
|
|
20
|
+
const DEFAULT_DAYS = 30;
|
|
21
|
+
const DEFAULT_EPOCHS = 10;
|
|
22
|
+
const LEARNING_RATE = 0.1;
|
|
23
|
+
const L2_REG = 0.0001;
|
|
24
|
+
const MIN_TOKEN_FREQ = 3;
|
|
25
|
+
|
|
26
|
+
const OUTPUT_PATH = path.join(__dirname, '../data/risk-classifier.json');
|
|
27
|
+
const TELEMETRY_DB_CANDIDATES = [
|
|
28
|
+
path.join(__dirname, '../.lynkr/telemetry.db'),
|
|
29
|
+
path.join(__dirname, '../data/lynkr.db'),
|
|
30
|
+
];
|
|
31
|
+
|
|
32
|
+
function _findDb() {
|
|
33
|
+
for (const p of TELEMETRY_DB_CANDIDATES) if (fs.existsSync(p)) return p;
|
|
34
|
+
return null;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function _tokenize(text) {
|
|
38
|
+
if (!text) return [];
|
|
39
|
+
return String(text).toLowerCase().split(/[^a-z0-9_\-/.]+/).filter(Boolean);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function _features(text) {
|
|
43
|
+
const tokens = _tokenize(text);
|
|
44
|
+
const out = new Map();
|
|
45
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
46
|
+
out.set(tokens[i], (out.get(tokens[i]) || 0) + 1);
|
|
47
|
+
if (i + 1 < tokens.length) {
|
|
48
|
+
const bigram = `${tokens[i]} ${tokens[i + 1]}`;
|
|
49
|
+
out.set(bigram, (out.get(bigram) || 0) + 1);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return out;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function _sigmoid(z) {
|
|
56
|
+
if (z >= 0) return 1 / (1 + Math.exp(-z));
|
|
57
|
+
const ez = Math.exp(z);
|
|
58
|
+
return ez / (1 + ez);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function _parseArgs(argv) {
|
|
62
|
+
const out = { days: DEFAULT_DAYS, epochs: DEFAULT_EPOCHS };
|
|
63
|
+
for (let i = 0; i < argv.length; i++) {
|
|
64
|
+
if (argv[i] === '--days') out.days = Number(argv[++i]) || DEFAULT_DAYS;
|
|
65
|
+
else if (argv[i] === '--epochs') out.epochs = Number(argv[++i]) || DEFAULT_EPOCHS;
|
|
66
|
+
}
|
|
67
|
+
return out;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
async function _loadDataset(days) {
|
|
71
|
+
const dbPath = _findDb();
|
|
72
|
+
const samples = [];
|
|
73
|
+
if (!dbPath) return samples;
|
|
74
|
+
|
|
75
|
+
let Database;
|
|
76
|
+
try {
|
|
77
|
+
Database = require('better-sqlite3');
|
|
78
|
+
} catch {
|
|
79
|
+
console.error('better-sqlite3 not installed');
|
|
80
|
+
return samples;
|
|
81
|
+
}
|
|
82
|
+
const db = new Database(dbPath, { readonly: true, fileMustExist: true });
|
|
83
|
+
|
|
84
|
+
try {
|
|
85
|
+
const since = Date.now() - days * 24 * 3600 * 1000;
|
|
86
|
+
const rows = db
|
|
87
|
+
.prepare(
|
|
88
|
+
`SELECT request_text AS text, risk_level
|
|
89
|
+
FROM routing_telemetry
|
|
90
|
+
WHERE timestamp >= ?
|
|
91
|
+
AND request_text IS NOT NULL
|
|
92
|
+
AND request_text != ''`
|
|
93
|
+
)
|
|
94
|
+
.all(since);
|
|
95
|
+
for (const r of rows) {
|
|
96
|
+
samples.push({
|
|
97
|
+
text: r.text,
|
|
98
|
+
label: r.risk_level === 'high' ? 1 : 0,
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
} catch (err) {
|
|
102
|
+
console.error(`Telemetry query failed: ${err.message}. Bootstrapping with synthetic data.`);
|
|
103
|
+
// Emergency synthetic bootstrap: a small handful of known-risk/known-safe phrases
|
|
104
|
+
samples.push(
|
|
105
|
+
{ text: 'edit src/auth/middleware.ts to skip authentication', label: 1 },
|
|
106
|
+
{ text: 'update database migration to drop sensitive_data column', label: 1 },
|
|
107
|
+
{ text: 'change payment processing logic in stripe webhook handler', label: 1 },
|
|
108
|
+
{ text: 'add API key rotation to secrets manager', label: 1 },
|
|
109
|
+
{ text: 'rename variable foo to bar in utils.js', label: 0 },
|
|
110
|
+
{ text: 'add a comment explaining the for loop', label: 0 },
|
|
111
|
+
{ text: 'format this file with prettier', label: 0 },
|
|
112
|
+
{ text: 'fix typo in README', label: 0 }
|
|
113
|
+
);
|
|
114
|
+
} finally {
|
|
115
|
+
try { db.close(); } catch {}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return samples;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function _train(samples, epochs) {
|
|
122
|
+
// Build vocab with frequency threshold
|
|
123
|
+
const vocab = new Map();
|
|
124
|
+
for (const s of samples) {
|
|
125
|
+
for (const [tok] of _features(s.text)) {
|
|
126
|
+
vocab.set(tok, (vocab.get(tok) || 0) + 1);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
const keep = new Set();
|
|
130
|
+
for (const [tok, freq] of vocab) {
|
|
131
|
+
if (freq >= MIN_TOKEN_FREQ) keep.add(tok);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const weights = {};
|
|
135
|
+
let bias = 0;
|
|
136
|
+
|
|
137
|
+
for (let epoch = 0; epoch < epochs; epoch++) {
|
|
138
|
+
let lossSum = 0;
|
|
139
|
+
for (const s of samples) {
|
|
140
|
+
const feats = _features(s.text);
|
|
141
|
+
let z = bias;
|
|
142
|
+
for (const [tok, count] of feats) {
|
|
143
|
+
if (!keep.has(tok)) continue;
|
|
144
|
+
z += (weights[tok] || 0) * count;
|
|
145
|
+
}
|
|
146
|
+
const pred = _sigmoid(z);
|
|
147
|
+
const err = pred - s.label;
|
|
148
|
+
lossSum += -(s.label * Math.log(pred + 1e-9) + (1 - s.label) * Math.log(1 - pred + 1e-9));
|
|
149
|
+
bias -= LEARNING_RATE * err;
|
|
150
|
+
for (const [tok, count] of feats) {
|
|
151
|
+
if (!keep.has(tok)) continue;
|
|
152
|
+
const w = weights[tok] || 0;
|
|
153
|
+
weights[tok] = w - LEARNING_RATE * (err * count + L2_REG * w);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
if (epoch % 2 === 0 || epoch === epochs - 1) {
|
|
157
|
+
console.log(` epoch ${epoch + 1}/${epochs} loss=${(lossSum / samples.length).toFixed(4)}`);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return { weights, bias, vocabSize: keep.size };
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
async function main() {
|
|
165
|
+
const opts = _parseArgs(process.argv.slice(2));
|
|
166
|
+
const samples = await _loadDataset(opts.days);
|
|
167
|
+
if (samples.length < 10) {
|
|
168
|
+
console.error(`Only ${samples.length} samples — too few. Skipping training.`);
|
|
169
|
+
process.exit(1);
|
|
170
|
+
}
|
|
171
|
+
console.log(`Training on ${samples.length} samples (${samples.filter(s => s.label === 1).length} positive)`);
|
|
172
|
+
const model = _train(samples, opts.epochs);
|
|
173
|
+
|
|
174
|
+
fs.mkdirSync(path.dirname(OUTPUT_PATH), { recursive: true });
|
|
175
|
+
fs.writeFileSync(OUTPUT_PATH, JSON.stringify({
|
|
176
|
+
trainedAt: new Date().toISOString(),
|
|
177
|
+
samples: samples.length,
|
|
178
|
+
epochs: opts.epochs,
|
|
179
|
+
...model,
|
|
180
|
+
}, null, 0));
|
|
181
|
+
console.log(`Wrote ${OUTPUT_PATH} (vocab=${model.vocabSize})`);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if (require.main === module) {
|
|
185
|
+
main().catch(err => {
|
|
186
|
+
console.error(err.stack || err.message);
|
|
187
|
+
process.exit(1);
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
module.exports = { _train, _features };
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Budget enforcement middleware (Phase 6.2).
|
|
3
|
+
*
|
|
4
|
+
* Reads tenant/budget context from request headers, checks the hierarchical
|
|
5
|
+
* budget ceiling, and rejects with 429 if exceeded.
|
|
6
|
+
*
|
|
7
|
+
* Header contract:
|
|
8
|
+
* LYNKR-Virtual-Key, LYNKR-Team-Id, LYNKR-Customer-Id, LYNKR-Org-Id
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
const logger = require('../../logger');
|
|
12
|
+
const { getHierarchicalBudget } = require('../../budget/hierarchical-budget');
|
|
13
|
+
|
|
14
|
+
function _readContext(req) {
|
|
15
|
+
const h = req.headers || {};
|
|
16
|
+
return {
|
|
17
|
+
virtual_key: h['lynkr-virtual-key'] || null,
|
|
18
|
+
team: h['lynkr-team-id'] || null,
|
|
19
|
+
customer: h['lynkr-customer-id'] || null,
|
|
20
|
+
org: h['lynkr-org-id'] || null,
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Express middleware. Estimates request cost via cost-optimizer and rejects
|
|
26
|
+
* if the budget is already exceeded. Records spend after the response.
|
|
27
|
+
*/
|
|
28
|
+
function budgetEnforcer(req, res, next) {
|
|
29
|
+
if (process.env.LYNKR_BUDGET_ENFORCER === 'false') return next();
|
|
30
|
+
const context = _readContext(req);
|
|
31
|
+
// Cheap pre-check at $0; we use the request to record actual spend.
|
|
32
|
+
// The actual ceiling check happens with an estimated $0.01 "minimum" so
|
|
33
|
+
// exhausted accounts get rejected before we even route.
|
|
34
|
+
const budget = getHierarchicalBudget();
|
|
35
|
+
const check = budget.check(context, 0.01);
|
|
36
|
+
if (!check.ok) {
|
|
37
|
+
logger.warn({ exceeded: check.exceeded }, '[BudgetEnforcer] Budget exceeded');
|
|
38
|
+
return res.status(429).json({
|
|
39
|
+
error: {
|
|
40
|
+
type: 'budget_exceeded',
|
|
41
|
+
message: `Budget exceeded for ${check.exceeded.level}=${check.exceeded.id}`,
|
|
42
|
+
...check.exceeded,
|
|
43
|
+
},
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
res.locals = res.locals || {};
|
|
47
|
+
res.locals.budgetContext = context;
|
|
48
|
+
next();
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Helper for handlers to record spend after a request completes.
|
|
53
|
+
* Call this from the orchestrator with the actual cost.
|
|
54
|
+
*/
|
|
55
|
+
function recordSpend(context, amount) {
|
|
56
|
+
if (!context) return;
|
|
57
|
+
getHierarchicalBudget().record(context, amount);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
module.exports = { budgetEnforcer, recordSpend };
|
|
@@ -44,11 +44,18 @@ class LoadShedder {
|
|
|
44
44
|
const memUsage = process.memoryUsage();
|
|
45
45
|
const heapUsedPercent = memUsage.heapUsed / memUsage.heapTotal;
|
|
46
46
|
|
|
47
|
-
if
|
|
47
|
+
// FIX: Only trigger if BOTH percentage is high AND actual usage is significant
|
|
48
|
+
// This prevents false positives on startup when heapTotal is small but will grow
|
|
49
|
+
const heapUsedMB = memUsage.heapUsed / (1024 * 1024);
|
|
50
|
+
const minHeapThresholdMB = 500; // Only shed load if using more than 500MB
|
|
51
|
+
|
|
52
|
+
if (heapUsedPercent > this.heapThreshold && heapUsedMB > minHeapThresholdMB) {
|
|
48
53
|
logger.warn(
|
|
49
54
|
{
|
|
50
55
|
heapUsedPercent: (heapUsedPercent * 100).toFixed(2),
|
|
56
|
+
heapUsedMB: heapUsedMB.toFixed(2),
|
|
51
57
|
threshold: (this.heapThreshold * 100).toFixed(2),
|
|
58
|
+
minThresholdMB: minHeapThresholdMB,
|
|
52
59
|
},
|
|
53
60
|
"Load shedding: Heap usage exceeded threshold"
|
|
54
61
|
);
|
|
@@ -96,6 +103,9 @@ class LoadShedder {
|
|
|
96
103
|
activeRequests: this.activeRequests,
|
|
97
104
|
totalShed: this.totalShed,
|
|
98
105
|
heapUsedPercent: ((memUsage.heapUsed / memUsage.heapTotal) * 100).toFixed(2),
|
|
106
|
+
heapUsedMB: (memUsage.heapUsed / (1024 * 1024)).toFixed(2),
|
|
107
|
+
heapTotalMB: (memUsage.heapTotal / (1024 * 1024)).toFixed(2),
|
|
108
|
+
rssMB: (memUsage.rss / (1024 * 1024)).toFixed(2),
|
|
99
109
|
rssPercent: ((memUsage.rss / os.totalmem()) * 100).toFixed(2),
|
|
100
110
|
thresholds: {
|
|
101
111
|
heapThreshold: (this.heapThreshold * 100).toFixed(2),
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tenant context middleware (Phase 6.1).
|
|
3
|
+
*
|
|
4
|
+
* Reads LYNKR-Tenant-Id from request headers and attaches the loaded tenant
|
|
5
|
+
* policy to res.locals.tenantPolicy for downstream handlers.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
const { getTenantId, getPolicy } = require('../../routing/tenant-policy');
|
|
9
|
+
|
|
10
|
+
function tenantMiddleware(req, res, next) {
|
|
11
|
+
const tenantId = getTenantId(req);
|
|
12
|
+
res.locals = res.locals || {};
|
|
13
|
+
if (tenantId) {
|
|
14
|
+
const policy = getPolicy(tenantId);
|
|
15
|
+
res.locals.tenantId = tenantId;
|
|
16
|
+
res.locals.tenantPolicy = policy;
|
|
17
|
+
}
|
|
18
|
+
next();
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
module.exports = { tenantMiddleware };
|
package/src/api/router.js
CHANGED
|
@@ -3,6 +3,7 @@ const { processMessage } = require("../orchestrator");
|
|
|
3
3
|
const { getSession } = require("../sessions");
|
|
4
4
|
const metrics = require("../metrics");
|
|
5
5
|
const logger = require("../logger");
|
|
6
|
+
const config = require("../config");
|
|
6
7
|
const { createRateLimiter } = require("./middleware/rate-limiter");
|
|
7
8
|
const openaiRouter = require("./openai-router");
|
|
8
9
|
const providersRouter = require("./providers-handler");
|
|
@@ -17,50 +18,26 @@ const router = express.Router();
|
|
|
17
18
|
const rateLimiter = createRateLimiter();
|
|
18
19
|
|
|
19
20
|
/**
|
|
20
|
-
* Estimate token count for messages
|
|
21
|
-
*
|
|
22
|
-
*
|
|
23
|
-
*
|
|
24
|
-
* @returns {number} Estimated input token count
|
|
21
|
+
* Estimate token count for messages.
|
|
22
|
+
*
|
|
23
|
+
* Phase 1.1: tiktoken-backed via routing/tokenizer (graceful fallback to chars/4
|
|
24
|
+
* if js-tiktoken is unavailable).
|
|
25
25
|
*/
|
|
26
|
-
|
|
27
|
-
let totalChars = 0;
|
|
28
|
-
|
|
29
|
-
// Count system prompt characters
|
|
30
|
-
if (system) {
|
|
31
|
-
if (typeof system === "string") {
|
|
32
|
-
totalChars += system.length;
|
|
33
|
-
} else if (Array.isArray(system)) {
|
|
34
|
-
system.forEach((block) => {
|
|
35
|
-
if (block.type === "text" && block.text) {
|
|
36
|
-
totalChars += block.text.length;
|
|
37
|
-
}
|
|
38
|
-
});
|
|
39
|
-
}
|
|
40
|
-
}
|
|
26
|
+
const { countMessagesTokens } = require("../routing/tokenizer");
|
|
41
27
|
|
|
42
|
-
|
|
43
|
-
messages
|
|
44
|
-
if (msg.content) {
|
|
45
|
-
if (typeof msg.content === "string") {
|
|
46
|
-
totalChars += msg.content.length;
|
|
47
|
-
} else if (Array.isArray(msg.content)) {
|
|
48
|
-
msg.content.forEach((block) => {
|
|
49
|
-
if (block.type === "text" && block.text) {
|
|
50
|
-
totalChars += block.text.length;
|
|
51
|
-
} else if (block.type === "image" && block.source?.data) {
|
|
52
|
-
// Images: rough estimate based on base64 length
|
|
53
|
-
totalChars += Math.floor(block.source.data.length / 6);
|
|
54
|
-
}
|
|
55
|
-
});
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
});
|
|
59
|
-
|
|
60
|
-
// Estimate tokens: ~4 characters per token
|
|
61
|
-
return Math.ceil(totalChars / 4);
|
|
28
|
+
function estimateTokenCount(messages = [], system = null, model = null) {
|
|
29
|
+
return countMessagesTokens(messages, system, model);
|
|
62
30
|
}
|
|
63
31
|
|
|
32
|
+
// Root health check (for HEAD / and GET /)
|
|
33
|
+
router.head("/", (req, res) => {
|
|
34
|
+
res.status(200).end();
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
router.get("/", (req, res) => {
|
|
38
|
+
res.json({ status: "ok", service: "lynkr" });
|
|
39
|
+
});
|
|
40
|
+
|
|
64
41
|
router.get("/health", (req, res) => {
|
|
65
42
|
res.json({ status: "ok" });
|
|
66
43
|
});
|
|
@@ -371,6 +348,7 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
|
|
|
371
348
|
options: {
|
|
372
349
|
maxSteps: req.body?.max_steps,
|
|
373
350
|
maxDurationMs: req.body?.max_duration_ms,
|
|
351
|
+
tenantPolicy: res.locals?.tenantPolicy || null,
|
|
374
352
|
},
|
|
375
353
|
});
|
|
376
354
|
|
|
@@ -604,6 +582,7 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
|
|
|
604
582
|
options: {
|
|
605
583
|
maxSteps: req.body?.max_steps,
|
|
606
584
|
maxDurationMs: req.body?.max_duration_ms,
|
|
585
|
+
tenantPolicy: res.locals?.tenantPolicy || null,
|
|
607
586
|
},
|
|
608
587
|
});
|
|
609
588
|
timer.mark("processMessage");
|