lynkr 9.0.2 → 9.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -10
- package/bin/cli.js +18 -1
- package/bin/lynkr-trajectory.js +136 -0
- package/bin/lynkr-usage.js +219 -0
- package/funding.json +110 -0
- package/package.json +4 -2
- package/public/dashboard.html +665 -0
- package/scripts/build-knn-index.js +130 -0
- package/scripts/calibrate-thresholds.js +197 -0
- package/scripts/compare-policies.js +67 -0
- package/scripts/learn-output-ratios.js +162 -0
- package/scripts/refresh-pricing.js +122 -0
- package/scripts/run-routerarena.js +26 -0
- package/scripts/sample-regret.js +84 -0
- package/scripts/train-risk-classifier.js +191 -0
- package/src/api/files-router.js +6 -6
- package/src/api/middleware/budget-enforcer.js +60 -0
- package/src/api/middleware/budget.js +19 -1
- package/src/api/middleware/load-shedding.js +17 -0
- package/src/api/middleware/tenant.js +21 -0
- package/src/api/openai-router.js +1 -1
- package/src/api/router.js +204 -87
- package/src/budget/hierarchical-budget.js +159 -0
- package/src/cache/semantic.js +28 -2
- package/src/clients/databricks.js +68 -10
- package/src/clients/openai-format.js +31 -5
- package/src/config/index.js +246 -43
- package/src/context/toon.js +5 -4
- package/src/dashboard/api.js +170 -0
- package/src/dashboard/router.js +13 -0
- package/src/headroom/client.js +3 -109
- package/src/headroom/index.js +0 -14
- package/src/memory/search.js +0 -50
- package/src/orchestrator/index.js +106 -11
- package/src/orchestrator/preflight.js +188 -0
- package/src/prompts/system.js +34 -6
- package/src/routing/bandit.js +246 -0
- package/src/routing/cascade.js +106 -0
- package/src/routing/complexity-analyzer.js +7 -15
- package/src/routing/confidence-scorer.js +121 -0
- package/src/routing/context-validator.js +71 -0
- package/src/routing/cost-optimizer.js +5 -2
- package/src/routing/deadline.js +52 -0
- package/src/routing/drift-monitor.js +113 -0
- package/src/routing/embedding-cache.js +77 -0
- package/src/routing/index.js +374 -4
- package/src/routing/interaction.js +183 -0
- package/src/routing/knn-router.js +206 -0
- package/src/routing/latency-tracker.js +113 -71
- package/src/routing/model-tiers.js +156 -6
- package/src/routing/output-ratios.js +57 -0
- package/src/routing/regret-estimator.js +91 -0
- package/src/routing/reward-pipeline.js +62 -0
- package/src/routing/risk-analyzer.js +194 -0
- package/src/routing/risk-classifier.js +130 -0
- package/src/routing/shadow-mode.js +77 -0
- package/src/routing/telemetry.js +7 -0
- package/src/routing/tenant-policy.js +96 -0
- package/src/routing/tokenizer.js +162 -0
- package/src/server.js +12 -0
- package/src/stores/file-store.js +42 -7
- package/src/tools/smart-selection.js +11 -2
- package/src/training/trajectory-compressor.js +266 -0
- package/src/usage/aggregator.js +206 -0
- package/src/utils/markdown-ansi.js +146 -0
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Sample yesterday's traffic for regret estimation (Phase 4.2).
|
|
4
|
+
*
|
|
5
|
+
* Reads 0.5% of yesterday's requests from telemetry, re-runs them through
|
|
6
|
+
* Opus, and writes alerts if the routed model consistently underperforms.
|
|
7
|
+
*
|
|
8
|
+
* Costs real money — only runs when LYNKR_REGRET_ESTIMATOR=true.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
const path = require('path');
|
|
12
|
+
const fs = require('fs');
|
|
13
|
+
const { estimate, isEnabled } = require('../src/routing/regret-estimator');
|
|
14
|
+
|
|
15
|
+
const SAMPLE_RATE = 0.005;
|
|
16
|
+
|
|
17
|
+
async function main() {
|
|
18
|
+
if (!isEnabled()) {
|
|
19
|
+
console.log('LYNKR_REGRET_ESTIMATOR not set; skipping.');
|
|
20
|
+
return;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
let Database;
|
|
24
|
+
try {
|
|
25
|
+
Database = require('better-sqlite3');
|
|
26
|
+
} catch {
|
|
27
|
+
console.error('better-sqlite3 not installed');
|
|
28
|
+
process.exit(2);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const dbPath = path.join(__dirname, '../.lynkr/telemetry.db');
|
|
32
|
+
if (!fs.existsSync(dbPath)) {
|
|
33
|
+
console.log('No telemetry DB; skipping.');
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const db = new Database(dbPath, { readonly: true });
|
|
38
|
+
const yesterday = Date.now() - 24 * 3600 * 1000;
|
|
39
|
+
const rows = db.prepare(
|
|
40
|
+
`SELECT request_text, response_text, model, quality_score
|
|
41
|
+
FROM routing_telemetry
|
|
42
|
+
WHERE timestamp >= ?
|
|
43
|
+
AND quality_score IS NOT NULL
|
|
44
|
+
AND request_text IS NOT NULL`
|
|
45
|
+
).all(yesterday);
|
|
46
|
+
db.close();
|
|
47
|
+
|
|
48
|
+
if (rows.length === 0) {
|
|
49
|
+
console.log('No eligible rows yesterday.');
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const sampleSize = Math.max(5, Math.floor(rows.length * SAMPLE_RATE));
|
|
54
|
+
const sampled = [];
|
|
55
|
+
while (sampled.length < sampleSize && rows.length > 0) {
|
|
56
|
+
const idx = Math.floor(Math.random() * rows.length);
|
|
57
|
+
sampled.push(rows.splice(idx, 1)[0]);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
console.log(`Sampling ${sampled.length} rows for regret estimation`);
|
|
61
|
+
|
|
62
|
+
// Caller must wire an actual Opus invocation; default to a no-op for safety.
|
|
63
|
+
const runOpus = async (req) => {
|
|
64
|
+
console.warn('No opus runner wired — implement runOpus in scripts/sample-regret.js or override via LYNKR_REGRET_OPUS_RUNNER');
|
|
65
|
+
return { response: null, quality: 0 };
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
const samples = sampled.map(r => ({
|
|
69
|
+
request: { messages: [{ role: 'user', content: r.request_text }] },
|
|
70
|
+
response: r.response_text,
|
|
71
|
+
model: r.model,
|
|
72
|
+
quality: r.quality_score,
|
|
73
|
+
}));
|
|
74
|
+
|
|
75
|
+
const result = await estimate({ samples, runOpus });
|
|
76
|
+
console.log(`Regret: ${result.regret.toFixed(3)} over ${result.sampledCount} samples; ${result.alerts.length} alert(s) written.`);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (require.main === module) {
|
|
80
|
+
main().catch(err => {
|
|
81
|
+
console.error(err.stack || err.message);
|
|
82
|
+
process.exit(1);
|
|
83
|
+
});
|
|
84
|
+
}
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Train the risk classifier (Phase 3.4).
|
|
4
|
+
*
|
|
5
|
+
* Two label sources, fused:
|
|
6
|
+
* 1. Bootstrap: run the existing regex risk-analyzer over recent telemetry
|
|
7
|
+
* to produce weak labels.
|
|
8
|
+
* 2. Confirmed: requests with x-lynkr-risk-confirmed:true header logged in
|
|
9
|
+
* telemetry are treated as strong positive labels.
|
|
10
|
+
*
|
|
11
|
+
* Writes data/risk-classifier.json (weights + bias). Logistic regression
|
|
12
|
+
* trained with simple SGD over TF features (unigrams + bigrams).
|
|
13
|
+
*
|
|
14
|
+
* Usage: node scripts/train-risk-classifier.js [--days 30] [--epochs 10]
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
const fs = require('fs');
|
|
18
|
+
const path = require('path');
|
|
19
|
+
|
|
20
|
+
const DEFAULT_DAYS = 30;
|
|
21
|
+
const DEFAULT_EPOCHS = 10;
|
|
22
|
+
const LEARNING_RATE = 0.1;
|
|
23
|
+
const L2_REG = 0.0001;
|
|
24
|
+
const MIN_TOKEN_FREQ = 3;
|
|
25
|
+
|
|
26
|
+
const OUTPUT_PATH = path.join(__dirname, '../data/risk-classifier.json');
|
|
27
|
+
const TELEMETRY_DB_CANDIDATES = [
|
|
28
|
+
path.join(__dirname, '../.lynkr/telemetry.db'),
|
|
29
|
+
path.join(__dirname, '../data/lynkr.db'),
|
|
30
|
+
];
|
|
31
|
+
|
|
32
|
+
function _findDb() {
|
|
33
|
+
for (const p of TELEMETRY_DB_CANDIDATES) if (fs.existsSync(p)) return p;
|
|
34
|
+
return null;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function _tokenize(text) {
|
|
38
|
+
if (!text) return [];
|
|
39
|
+
return String(text).toLowerCase().split(/[^a-z0-9_\-/.]+/).filter(Boolean);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function _features(text) {
|
|
43
|
+
const tokens = _tokenize(text);
|
|
44
|
+
const out = new Map();
|
|
45
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
46
|
+
out.set(tokens[i], (out.get(tokens[i]) || 0) + 1);
|
|
47
|
+
if (i + 1 < tokens.length) {
|
|
48
|
+
const bigram = `${tokens[i]} ${tokens[i + 1]}`;
|
|
49
|
+
out.set(bigram, (out.get(bigram) || 0) + 1);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return out;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function _sigmoid(z) {
|
|
56
|
+
if (z >= 0) return 1 / (1 + Math.exp(-z));
|
|
57
|
+
const ez = Math.exp(z);
|
|
58
|
+
return ez / (1 + ez);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function _parseArgs(argv) {
|
|
62
|
+
const out = { days: DEFAULT_DAYS, epochs: DEFAULT_EPOCHS };
|
|
63
|
+
for (let i = 0; i < argv.length; i++) {
|
|
64
|
+
if (argv[i] === '--days') out.days = Number(argv[++i]) || DEFAULT_DAYS;
|
|
65
|
+
else if (argv[i] === '--epochs') out.epochs = Number(argv[++i]) || DEFAULT_EPOCHS;
|
|
66
|
+
}
|
|
67
|
+
return out;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
async function _loadDataset(days) {
|
|
71
|
+
const dbPath = _findDb();
|
|
72
|
+
const samples = [];
|
|
73
|
+
if (!dbPath) return samples;
|
|
74
|
+
|
|
75
|
+
let Database;
|
|
76
|
+
try {
|
|
77
|
+
Database = require('better-sqlite3');
|
|
78
|
+
} catch {
|
|
79
|
+
console.error('better-sqlite3 not installed');
|
|
80
|
+
return samples;
|
|
81
|
+
}
|
|
82
|
+
const db = new Database(dbPath, { readonly: true, fileMustExist: true });
|
|
83
|
+
|
|
84
|
+
try {
|
|
85
|
+
const since = Date.now() - days * 24 * 3600 * 1000;
|
|
86
|
+
const rows = db
|
|
87
|
+
.prepare(
|
|
88
|
+
`SELECT request_text AS text, risk_level
|
|
89
|
+
FROM routing_telemetry
|
|
90
|
+
WHERE timestamp >= ?
|
|
91
|
+
AND request_text IS NOT NULL
|
|
92
|
+
AND request_text != ''`
|
|
93
|
+
)
|
|
94
|
+
.all(since);
|
|
95
|
+
for (const r of rows) {
|
|
96
|
+
samples.push({
|
|
97
|
+
text: r.text,
|
|
98
|
+
label: r.risk_level === 'high' ? 1 : 0,
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
} catch (err) {
|
|
102
|
+
console.error(`Telemetry query failed: ${err.message}. Bootstrapping with synthetic data.`);
|
|
103
|
+
// Emergency synthetic bootstrap: a small handful of known-risk/known-safe phrases
|
|
104
|
+
samples.push(
|
|
105
|
+
{ text: 'edit src/auth/middleware.ts to skip authentication', label: 1 },
|
|
106
|
+
{ text: 'update database migration to drop sensitive_data column', label: 1 },
|
|
107
|
+
{ text: 'change payment processing logic in stripe webhook handler', label: 1 },
|
|
108
|
+
{ text: 'add API key rotation to secrets manager', label: 1 },
|
|
109
|
+
{ text: 'rename variable foo to bar in utils.js', label: 0 },
|
|
110
|
+
{ text: 'add a comment explaining the for loop', label: 0 },
|
|
111
|
+
{ text: 'format this file with prettier', label: 0 },
|
|
112
|
+
{ text: 'fix typo in README', label: 0 }
|
|
113
|
+
);
|
|
114
|
+
} finally {
|
|
115
|
+
try { db.close(); } catch {}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return samples;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function _train(samples, epochs) {
|
|
122
|
+
// Build vocab with frequency threshold
|
|
123
|
+
const vocab = new Map();
|
|
124
|
+
for (const s of samples) {
|
|
125
|
+
for (const [tok] of _features(s.text)) {
|
|
126
|
+
vocab.set(tok, (vocab.get(tok) || 0) + 1);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
const keep = new Set();
|
|
130
|
+
for (const [tok, freq] of vocab) {
|
|
131
|
+
if (freq >= MIN_TOKEN_FREQ) keep.add(tok);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const weights = {};
|
|
135
|
+
let bias = 0;
|
|
136
|
+
|
|
137
|
+
for (let epoch = 0; epoch < epochs; epoch++) {
|
|
138
|
+
let lossSum = 0;
|
|
139
|
+
for (const s of samples) {
|
|
140
|
+
const feats = _features(s.text);
|
|
141
|
+
let z = bias;
|
|
142
|
+
for (const [tok, count] of feats) {
|
|
143
|
+
if (!keep.has(tok)) continue;
|
|
144
|
+
z += (weights[tok] || 0) * count;
|
|
145
|
+
}
|
|
146
|
+
const pred = _sigmoid(z);
|
|
147
|
+
const err = pred - s.label;
|
|
148
|
+
lossSum += -(s.label * Math.log(pred + 1e-9) + (1 - s.label) * Math.log(1 - pred + 1e-9));
|
|
149
|
+
bias -= LEARNING_RATE * err;
|
|
150
|
+
for (const [tok, count] of feats) {
|
|
151
|
+
if (!keep.has(tok)) continue;
|
|
152
|
+
const w = weights[tok] || 0;
|
|
153
|
+
weights[tok] = w - LEARNING_RATE * (err * count + L2_REG * w);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
if (epoch % 2 === 0 || epoch === epochs - 1) {
|
|
157
|
+
console.log(` epoch ${epoch + 1}/${epochs} loss=${(lossSum / samples.length).toFixed(4)}`);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return { weights, bias, vocabSize: keep.size };
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
async function main() {
|
|
165
|
+
const opts = _parseArgs(process.argv.slice(2));
|
|
166
|
+
const samples = await _loadDataset(opts.days);
|
|
167
|
+
if (samples.length < 10) {
|
|
168
|
+
console.error(`Only ${samples.length} samples — too few. Skipping training.`);
|
|
169
|
+
process.exit(1);
|
|
170
|
+
}
|
|
171
|
+
console.log(`Training on ${samples.length} samples (${samples.filter(s => s.label === 1).length} positive)`);
|
|
172
|
+
const model = _train(samples, opts.epochs);
|
|
173
|
+
|
|
174
|
+
fs.mkdirSync(path.dirname(OUTPUT_PATH), { recursive: true });
|
|
175
|
+
fs.writeFileSync(OUTPUT_PATH, JSON.stringify({
|
|
176
|
+
trainedAt: new Date().toISOString(),
|
|
177
|
+
samples: samples.length,
|
|
178
|
+
epochs: opts.epochs,
|
|
179
|
+
...model,
|
|
180
|
+
}, null, 0));
|
|
181
|
+
console.log(`Wrote ${OUTPUT_PATH} (vocab=${model.vocabSize})`);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if (require.main === module) {
|
|
185
|
+
main().catch(err => {
|
|
186
|
+
console.error(err.stack || err.message);
|
|
187
|
+
process.exit(1);
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
module.exports = { _train, _features };
|
package/src/api/files-router.js
CHANGED
|
@@ -33,7 +33,7 @@ router.post("/files", async (req, res) => {
|
|
|
33
33
|
filename = parsed.filename || filename;
|
|
34
34
|
mimeType = parsed.mimeType || mimeType;
|
|
35
35
|
purpose = parsed.purpose || purpose;
|
|
36
|
-
const entry = fileStore.storeFile(parsed.file, { filename, purpose, mimeType });
|
|
36
|
+
const entry = await fileStore.storeFile(parsed.file, { filename, purpose, mimeType });
|
|
37
37
|
return res.json(entry);
|
|
38
38
|
}
|
|
39
39
|
}
|
|
@@ -43,7 +43,7 @@ router.post("/files", async (req, res) => {
|
|
|
43
43
|
mimeType = contentType.split(";")[0].trim() || mimeType;
|
|
44
44
|
filename = req.headers["x-filename"] || filename;
|
|
45
45
|
purpose = req.query.purpose || purpose;
|
|
46
|
-
const entry = fileStore.storeFile(buffer, { filename, purpose, mimeType });
|
|
46
|
+
const entry = await fileStore.storeFile(buffer, { filename, purpose, mimeType });
|
|
47
47
|
res.json(entry);
|
|
48
48
|
} catch (err) {
|
|
49
49
|
logger.error({ err }, "File upload failed");
|
|
@@ -62,18 +62,18 @@ router.get("/files/:id", (req, res) => {
|
|
|
62
62
|
res.json(file);
|
|
63
63
|
});
|
|
64
64
|
|
|
65
|
-
router.get("/files/:id/content", (req, res) => {
|
|
65
|
+
router.get("/files/:id/content", async (req, res) => {
|
|
66
66
|
const file = fileStore.getFile(req.params.id);
|
|
67
67
|
if (!file) return res.status(404).json({ error: { message: "File not found" } });
|
|
68
|
-
const content = fileStore.getFileContent(req.params.id);
|
|
68
|
+
const content = await fileStore.getFileContent(req.params.id);
|
|
69
69
|
if (!content) return res.status(404).json({ error: { message: "File content not found" } });
|
|
70
70
|
res.setHeader("Content-Type", file.mime_type);
|
|
71
71
|
res.setHeader("Content-Disposition", `attachment; filename="${file.filename}"`);
|
|
72
72
|
res.send(content);
|
|
73
73
|
});
|
|
74
74
|
|
|
75
|
-
router.delete("/files/:id", (req, res) => {
|
|
76
|
-
const deleted = fileStore.deleteFile(req.params.id);
|
|
75
|
+
router.delete("/files/:id", async (req, res) => {
|
|
76
|
+
const deleted = await fileStore.deleteFile(req.params.id);
|
|
77
77
|
if (!deleted) return res.status(404).json({ error: { message: "File not found" } });
|
|
78
78
|
res.json({ id: req.params.id, object: "file", deleted: true });
|
|
79
79
|
});
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Budget enforcement middleware (Phase 6.2).
|
|
3
|
+
*
|
|
4
|
+
* Reads tenant/budget context from request headers, checks the hierarchical
|
|
5
|
+
* budget ceiling, and rejects with 429 if exceeded.
|
|
6
|
+
*
|
|
7
|
+
* Header contract:
|
|
8
|
+
* LYNKR-Virtual-Key, LYNKR-Team-Id, LYNKR-Customer-Id, LYNKR-Org-Id
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
const logger = require('../../logger');
|
|
12
|
+
const { getHierarchicalBudget } = require('../../budget/hierarchical-budget');
|
|
13
|
+
|
|
14
|
+
function _readContext(req) {
|
|
15
|
+
const h = req.headers || {};
|
|
16
|
+
return {
|
|
17
|
+
virtual_key: h['lynkr-virtual-key'] || null,
|
|
18
|
+
team: h['lynkr-team-id'] || null,
|
|
19
|
+
customer: h['lynkr-customer-id'] || null,
|
|
20
|
+
org: h['lynkr-org-id'] || null,
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Express middleware. Estimates request cost via cost-optimizer and rejects
|
|
26
|
+
* if the budget is already exceeded. Records spend after the response.
|
|
27
|
+
*/
|
|
28
|
+
function budgetEnforcer(req, res, next) {
|
|
29
|
+
if (process.env.LYNKR_BUDGET_ENFORCER === 'false') return next();
|
|
30
|
+
const context = _readContext(req);
|
|
31
|
+
// Cheap pre-check at $0; we use the request to record actual spend.
|
|
32
|
+
// The actual ceiling check happens with an estimated $0.01 "minimum" so
|
|
33
|
+
// exhausted accounts get rejected before we even route.
|
|
34
|
+
const budget = getHierarchicalBudget();
|
|
35
|
+
const check = budget.check(context, 0.01);
|
|
36
|
+
if (!check.ok) {
|
|
37
|
+
logger.warn({ exceeded: check.exceeded }, '[BudgetEnforcer] Budget exceeded');
|
|
38
|
+
return res.status(429).json({
|
|
39
|
+
error: {
|
|
40
|
+
type: 'budget_exceeded',
|
|
41
|
+
message: `Budget exceeded for ${check.exceeded.level}=${check.exceeded.id}`,
|
|
42
|
+
...check.exceeded,
|
|
43
|
+
},
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
res.locals = res.locals || {};
|
|
47
|
+
res.locals.budgetContext = context;
|
|
48
|
+
next();
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Helper for handlers to record spend after a request completes.
|
|
53
|
+
* Call this from the orchestrator with the actual cost.
|
|
54
|
+
*/
|
|
55
|
+
function recordSpend(context, amount) {
|
|
56
|
+
if (!context) return;
|
|
57
|
+
getHierarchicalBudget().record(context, amount);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
module.exports = { budgetEnforcer, recordSpend };
|
|
@@ -57,12 +57,30 @@ function budgetMiddleware(req, res, next) {
|
|
|
57
57
|
}, 'Budget warning: approaching limits');
|
|
58
58
|
}
|
|
59
59
|
|
|
60
|
-
// Attach budget info to request for usage recording later
|
|
61
60
|
req.budgetInfo = {
|
|
62
61
|
userId,
|
|
63
62
|
budgetCheck,
|
|
63
|
+
startTime: Date.now(),
|
|
64
64
|
};
|
|
65
65
|
|
|
66
|
+
// Record usage after response completes
|
|
67
|
+
res.on('finish', () => {
|
|
68
|
+
try {
|
|
69
|
+
const usage = res.locals.usage;
|
|
70
|
+
if (!usage) return;
|
|
71
|
+
budgetManager.recordUsage(userId, req.session?.id || null, {
|
|
72
|
+
tokensInput: usage.prompt_tokens || usage.input_tokens || 0,
|
|
73
|
+
tokensOutput: usage.completion_tokens || usage.output_tokens || 0,
|
|
74
|
+
costUsd: usage.cost_usd || 0,
|
|
75
|
+
model: usage.model || null,
|
|
76
|
+
endpoint: req.path,
|
|
77
|
+
latencyMs: Date.now() - req.budgetInfo.startTime,
|
|
78
|
+
});
|
|
79
|
+
} catch (err) {
|
|
80
|
+
logger.warn({ err: err.message }, 'Failed to record usage after response');
|
|
81
|
+
}
|
|
82
|
+
});
|
|
83
|
+
|
|
66
84
|
next();
|
|
67
85
|
}
|
|
68
86
|
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
const os = require("os");
|
|
1
2
|
const logger = require("../../logger");
|
|
2
3
|
const { ServiceUnavailableError } = require("./error-handling");
|
|
3
4
|
|
|
@@ -55,6 +56,20 @@ class LoadShedder {
|
|
|
55
56
|
return true;
|
|
56
57
|
}
|
|
57
58
|
|
|
59
|
+
// Check RSS / system memory
|
|
60
|
+
const rssPercent = memUsage.rss / os.totalmem();
|
|
61
|
+
if (rssPercent > this.memoryThreshold) {
|
|
62
|
+
logger.warn(
|
|
63
|
+
{
|
|
64
|
+
rssPercent: (rssPercent * 100).toFixed(2),
|
|
65
|
+
threshold: (this.memoryThreshold * 100).toFixed(2),
|
|
66
|
+
},
|
|
67
|
+
"Load shedding: RSS memory usage exceeded threshold"
|
|
68
|
+
);
|
|
69
|
+
this.cachedOverloadState = true;
|
|
70
|
+
return true;
|
|
71
|
+
}
|
|
72
|
+
|
|
58
73
|
// Check active requests
|
|
59
74
|
if (this.activeRequests > this.activeRequestsThreshold) {
|
|
60
75
|
logger.warn(
|
|
@@ -81,8 +96,10 @@ class LoadShedder {
|
|
|
81
96
|
activeRequests: this.activeRequests,
|
|
82
97
|
totalShed: this.totalShed,
|
|
83
98
|
heapUsedPercent: ((memUsage.heapUsed / memUsage.heapTotal) * 100).toFixed(2),
|
|
99
|
+
rssPercent: ((memUsage.rss / os.totalmem()) * 100).toFixed(2),
|
|
84
100
|
thresholds: {
|
|
85
101
|
heapThreshold: (this.heapThreshold * 100).toFixed(2),
|
|
102
|
+
memoryThreshold: (this.memoryThreshold * 100).toFixed(2),
|
|
86
103
|
activeRequestsThreshold: this.activeRequestsThreshold,
|
|
87
104
|
},
|
|
88
105
|
};
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tenant context middleware (Phase 6.1).
|
|
3
|
+
*
|
|
4
|
+
* Reads LYNKR-Tenant-Id from request headers and attaches the loaded tenant
|
|
5
|
+
* policy to res.locals.tenantPolicy for downstream handlers.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
const { getTenantId, getPolicy } = require('../../routing/tenant-policy');
|
|
9
|
+
|
|
10
|
+
function tenantMiddleware(req, res, next) {
|
|
11
|
+
const tenantId = getTenantId(req);
|
|
12
|
+
res.locals = res.locals || {};
|
|
13
|
+
if (tenantId) {
|
|
14
|
+
const policy = getPolicy(tenantId);
|
|
15
|
+
res.locals.tenantId = tenantId;
|
|
16
|
+
res.locals.tenantPolicy = policy;
|
|
17
|
+
}
|
|
18
|
+
next();
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
module.exports = { tenantMiddleware };
|
package/src/api/openai-router.js
CHANGED
|
@@ -366,7 +366,7 @@ router.post("/chat/completions", async (req, res) => {
|
|
|
366
366
|
role: m.role,
|
|
367
367
|
contentPreview: typeof m.content === 'string'
|
|
368
368
|
? m.content.substring(0, 200)
|
|
369
|
-
: JSON.stringify(m.content).substring(0, 200)
|
|
369
|
+
: (m.content == null ? null : (JSON.stringify(m.content) ?? '').substring(0, 200))
|
|
370
370
|
}));
|
|
371
371
|
|
|
372
372
|
logger.debug({
|