lynkr 9.0.2 → 9.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +21 -10
  2. package/bin/cli.js +18 -1
  3. package/bin/lynkr-trajectory.js +136 -0
  4. package/bin/lynkr-usage.js +219 -0
  5. package/funding.json +110 -0
  6. package/package.json +4 -2
  7. package/public/dashboard.html +665 -0
  8. package/scripts/build-knn-index.js +130 -0
  9. package/scripts/calibrate-thresholds.js +197 -0
  10. package/scripts/compare-policies.js +67 -0
  11. package/scripts/learn-output-ratios.js +162 -0
  12. package/scripts/refresh-pricing.js +122 -0
  13. package/scripts/run-routerarena.js +26 -0
  14. package/scripts/sample-regret.js +84 -0
  15. package/scripts/train-risk-classifier.js +191 -0
  16. package/src/api/files-router.js +6 -6
  17. package/src/api/middleware/budget-enforcer.js +60 -0
  18. package/src/api/middleware/budget.js +19 -1
  19. package/src/api/middleware/load-shedding.js +17 -0
  20. package/src/api/middleware/tenant.js +21 -0
  21. package/src/api/openai-router.js +1 -1
  22. package/src/api/router.js +204 -87
  23. package/src/budget/hierarchical-budget.js +159 -0
  24. package/src/cache/semantic.js +28 -2
  25. package/src/clients/databricks.js +68 -10
  26. package/src/clients/openai-format.js +31 -5
  27. package/src/config/index.js +246 -43
  28. package/src/context/toon.js +5 -4
  29. package/src/dashboard/api.js +170 -0
  30. package/src/dashboard/router.js +13 -0
  31. package/src/headroom/client.js +3 -109
  32. package/src/headroom/index.js +0 -14
  33. package/src/memory/search.js +0 -50
  34. package/src/orchestrator/index.js +106 -11
  35. package/src/orchestrator/preflight.js +188 -0
  36. package/src/prompts/system.js +34 -6
  37. package/src/routing/bandit.js +246 -0
  38. package/src/routing/cascade.js +106 -0
  39. package/src/routing/complexity-analyzer.js +7 -15
  40. package/src/routing/confidence-scorer.js +121 -0
  41. package/src/routing/context-validator.js +71 -0
  42. package/src/routing/cost-optimizer.js +5 -2
  43. package/src/routing/deadline.js +52 -0
  44. package/src/routing/drift-monitor.js +113 -0
  45. package/src/routing/embedding-cache.js +77 -0
  46. package/src/routing/index.js +374 -4
  47. package/src/routing/interaction.js +183 -0
  48. package/src/routing/knn-router.js +206 -0
  49. package/src/routing/latency-tracker.js +113 -71
  50. package/src/routing/model-tiers.js +156 -6
  51. package/src/routing/output-ratios.js +57 -0
  52. package/src/routing/regret-estimator.js +91 -0
  53. package/src/routing/reward-pipeline.js +62 -0
  54. package/src/routing/risk-analyzer.js +194 -0
  55. package/src/routing/risk-classifier.js +130 -0
  56. package/src/routing/shadow-mode.js +77 -0
  57. package/src/routing/telemetry.js +7 -0
  58. package/src/routing/tenant-policy.js +96 -0
  59. package/src/routing/tokenizer.js +162 -0
  60. package/src/server.js +12 -0
  61. package/src/stores/file-store.js +42 -7
  62. package/src/tools/smart-selection.js +11 -2
  63. package/src/training/trajectory-compressor.js +266 -0
  64. package/src/usage/aggregator.js +206 -0
  65. package/src/utils/markdown-ansi.js +146 -0
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Output-token ratio lookup (Phase 2.3).
3
+ *
4
+ * Reads data/output-ratios.json (built by scripts/learn-output-ratios.js).
5
+ * Falls back to hardcoded defaults when the file is absent.
6
+ */
7
+
8
+ const fs = require('fs');
9
+ const path = require('path');
10
+ const logger = require('../logger');
11
+
12
+ const FILE_PATH = path.join(__dirname, '../../data/output-ratios.json');
13
+
14
+ const DEFAULT_RATIOS = {
15
+ simple_qa: 0.30,
16
+ code_gen: 2.10,
17
+ code_edit: 1.40,
18
+ summarization: 0.15,
19
+ reasoning: 1.50,
20
+ tool_use: 0.80,
21
+ default: 0.50,
22
+ };
23
+
24
+ let _cached = null;
25
+ let _cacheLoadedAt = 0;
26
+ const RELOAD_INTERVAL_MS = 60_000;
27
+
28
+ function _load() {
29
+ if (_cached && Date.now() - _cacheLoadedAt < RELOAD_INTERVAL_MS) return _cached;
30
+ try {
31
+ if (fs.existsSync(FILE_PATH)) {
32
+ const data = JSON.parse(fs.readFileSync(FILE_PATH, 'utf8'));
33
+ if (data?.ratios && typeof data.ratios === 'object') {
34
+ _cached = { ...DEFAULT_RATIOS, ...data.ratios };
35
+ _cacheLoadedAt = Date.now();
36
+ return _cached;
37
+ }
38
+ }
39
+ } catch (err) {
40
+ logger.debug({ err: err.message }, '[OutputRatios] Load failed, using defaults');
41
+ }
42
+ _cached = DEFAULT_RATIOS;
43
+ _cacheLoadedAt = Date.now();
44
+ return _cached;
45
+ }
46
+
47
+ function ratioFor(taskType) {
48
+ const ratios = _load();
49
+ const key = (taskType || 'default').toLowerCase();
50
+ return ratios[key] ?? ratios.default ?? 0.5;
51
+ }
52
+
53
+ function reload() {
54
+ _cached = null;
55
+ }
56
+
57
+ module.exports = { ratioFor, reload, DEFAULT_RATIOS };
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Regret estimator (Phase 4.2).
3
+ *
4
+ * Periodically samples a fraction of yesterday's requests, re-runs them
5
+ * through a strictly-better model (Opus), and compares quality. If the
6
+ * routed model consistently underperforms vs Opus by >10%, this writes an
7
+ * alert to data/regret-alerts.json.
8
+ *
9
+ * Off by default (costs real money). Enable with LYNKR_REGRET_ESTIMATOR=true
10
+ * and run via cron: `node scripts/sample-regret.js`.
11
+ */
12
+
13
+ const fs = require('fs');
14
+ const path = require('path');
15
+ const logger = require('../logger');
16
+
17
+ const ALERTS_PATH = path.join(__dirname, '../../data/regret-alerts.json');
18
+
19
+ /**
20
+ * @param {object} args
21
+ * @param {Array<{request: object, response: object, model: string, quality: number}>} args.samples
22
+ * @param {function} args.runOpus — async (request) → { response, quality }
23
+ * @param {number} args.threshold — fractional underperformance threshold (default 0.10)
24
+ * @returns {Promise<{ regret, sampledCount, alerts }>}
25
+ */
26
+ async function estimate(args) {
27
+ const threshold = args.threshold ?? 0.10;
28
+ const results = [];
29
+ for (const s of args.samples) {
30
+ try {
31
+ const opus = await args.runOpus(s.request);
32
+ const delta = (opus.quality - s.quality) / Math.max(1, opus.quality);
33
+ results.push({
34
+ model: s.model,
35
+ routedQuality: s.quality,
36
+ opusQuality: opus.quality,
37
+ regret: Math.max(0, delta),
38
+ underperforming: delta > threshold,
39
+ });
40
+ } catch (err) {
41
+ logger.debug({ err: err.message }, '[RegretEstimator] Opus re-run failed');
42
+ }
43
+ }
44
+
45
+ const byModel = new Map();
46
+ for (const r of results) {
47
+ if (!byModel.has(r.model)) byModel.set(r.model, []);
48
+ byModel.get(r.model).push(r);
49
+ }
50
+
51
+ const alerts = [];
52
+ for (const [model, runs] of byModel) {
53
+ const underperforming = runs.filter(r => r.underperforming).length;
54
+ const rate = underperforming / runs.length;
55
+ if (rate > 0.5 && runs.length >= 5) {
56
+ alerts.push({
57
+ model,
58
+ underperformingRate: rate,
59
+ sampleSize: runs.length,
60
+ avgRegret: runs.reduce((s, r) => s + r.regret, 0) / runs.length,
61
+ timestamp: Date.now(),
62
+ });
63
+ }
64
+ }
65
+
66
+ if (alerts.length > 0) {
67
+ try {
68
+ fs.mkdirSync(path.dirname(ALERTS_PATH), { recursive: true });
69
+ let existing = [];
70
+ if (fs.existsSync(ALERTS_PATH)) {
71
+ try { existing = JSON.parse(fs.readFileSync(ALERTS_PATH, 'utf8')); } catch {}
72
+ }
73
+ const out = Array.isArray(existing) ? existing : [];
74
+ out.push(...alerts);
75
+ // Keep last 100 alerts
76
+ const trimmed = out.slice(-100);
77
+ fs.writeFileSync(ALERTS_PATH, JSON.stringify(trimmed, null, 2));
78
+ } catch (err) {
79
+ logger.warn({ err: err.message }, '[RegretEstimator] Alert write failed');
80
+ }
81
+ }
82
+
83
+ const totalRegret = results.reduce((s, r) => s + r.regret, 0) / Math.max(1, results.length);
84
+ return { regret: totalRegret, sampledCount: results.length, alerts };
85
+ }
86
+
87
+ function isEnabled() {
88
+ return process.env.LYNKR_REGRET_ESTIMATOR === 'true';
89
+ }
90
+
91
+ module.exports = { estimate, isEnabled };
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Reward pipeline for the LinUCB bandit (Phase 4.1).
3
+ *
4
+ * Combines quality score, normalised cost, and normalised latency into a
5
+ * single scalar reward in [0, 100]. The bandit then rescales to [0, 1].
6
+ *
7
+ * reward = quality - λ·norm_cost·100 - μ·norm_latency·100
8
+ *
9
+ * Normalisation uses running min/max so we don't need to pre-compute global
10
+ * scales.
11
+ */
12
+
13
+ const logger = require('../logger');
14
+
15
+ const DEFAULT_LAMBDA = 0.3;
16
+ const DEFAULT_MU = 0.1;
17
+
18
+ class RewardPipeline {
19
+ constructor({ lambda = DEFAULT_LAMBDA, mu = DEFAULT_MU } = {}) {
20
+ this.lambda = lambda;
21
+ this.mu = mu;
22
+ this.costRange = { min: Infinity, max: -Infinity };
23
+ this.latencyRange = { min: Infinity, max: -Infinity };
24
+ }
25
+
26
+ observe({ cost, latency }) {
27
+ if (typeof cost === 'number' && cost >= 0) {
28
+ this.costRange.min = Math.min(this.costRange.min, cost);
29
+ this.costRange.max = Math.max(this.costRange.max, cost);
30
+ }
31
+ if (typeof latency === 'number' && latency >= 0) {
32
+ this.latencyRange.min = Math.min(this.latencyRange.min, latency);
33
+ this.latencyRange.max = Math.max(this.latencyRange.max, latency);
34
+ }
35
+ }
36
+
37
+ _normalize(value, range) {
38
+ if (!isFinite(range.min) || !isFinite(range.max) || range.max <= range.min) return 0;
39
+ const v = Math.max(range.min, Math.min(range.max, value));
40
+ return (v - range.min) / (range.max - range.min);
41
+ }
42
+
43
+ /**
44
+ * @param {object} obs - { quality: 0-100, cost: dollars, latency: ms }
45
+ * @returns {number} reward in [0, 100]
46
+ */
47
+ reward(obs) {
48
+ this.observe(obs);
49
+ const q = typeof obs.quality === 'number' ? obs.quality : 50;
50
+ const cn = this._normalize(obs.cost ?? 0, this.costRange);
51
+ const ln = this._normalize(obs.latency ?? 0, this.latencyRange);
52
+ return Math.max(0, Math.min(100, q - this.lambda * cn * 100 - this.mu * ln * 100));
53
+ }
54
+ }
55
+
56
+ let _instance = null;
57
+ function getRewardPipeline() {
58
+ if (!_instance) _instance = new RewardPipeline();
59
+ return _instance;
60
+ }
61
+
62
+ module.exports = { RewardPipeline, getRewardPipeline };
@@ -0,0 +1,194 @@
1
+ /**
2
+ * Risk Analyzer
3
+ *
4
+ * Scores a request along a risk axis that is orthogonal to complexity.
5
+ * A trivially short edit to `auth/middleware.ts` is still high risk and
6
+ * should not be served by a cheap local model.
7
+ *
8
+ * @module routing/risk-analyzer
9
+ */
10
+
11
+ const { extractContent } = require('./complexity-analyzer');
12
+
13
+ // Substring keywords found in file paths or instruction text.
14
+ // Matched case-insensitively as raw substrings, so "auth" hits
15
+ // "src/auth/login.ts" and "authentication".
16
+ const PROTECTED_PATH_KEYWORDS = [
17
+ 'auth', 'oauth', 'jwt', 'session', 'security', 'permission', 'rbac',
18
+ 'payment', 'payments', 'billing', 'invoice', 'subscription',
19
+ 'migration', 'migrations', 'schema',
20
+ 'infra', 'terraform', 'kustomize', 'helm', 'kubernetes',
21
+ '.github/workflows', '.env', 'secret', 'credential',
22
+ 'api-key', 'api_key', 'apikey', 'token',
23
+ 'webhook', 'admin',
24
+ ];
25
+
26
+ // Whole-word instruction keywords that signal sensitive intent regardless
27
+ // of which files are involved. Higher signal than path keywords because
28
+ // they reflect what the user is *asking for*.
29
+ const HIGH_RISK_INSTRUCTION_KEYWORDS = [
30
+ 'authentication', 'authorization', 'permission', 'security',
31
+ 'payment', 'billing', 'migration', 'database schema',
32
+ 'encrypt', 'decrypt', 'secret', 'credential', 'api key',
33
+ 'production', 'deploy', 'rollout', 'rollback',
34
+ ];
35
+
36
+ // Path-extracting patterns. We look at:
37
+ // 1. Anything that looks like a file path inside the instruction text.
38
+ // 2. Explicit path-like fields in tool inputs (e.g. tool_use blocks).
39
+ const PATH_LIKE_RE = /(?:^|[\s`'"([])([./a-zA-Z0-9_-]+\.[a-zA-Z0-9]{1,8})(?=[\s`'")\]:,;]|$)/g;
40
+ const SLASHED_PATH_RE = /(?:^|[\s`'"([])((?:[a-zA-Z0-9_.-]+\/)+[a-zA-Z0-9_.-]+)(?=[\s`'")\]:,;]|$)/g;
41
+
42
+ /**
43
+ * Pull every path-shaped substring out of free-form text.
44
+ * @param {string} text
45
+ * @returns {string[]}
46
+ */
47
+ function extractPathsFromText(text) {
48
+ if (!text) return [];
49
+ const out = new Set();
50
+ let m;
51
+ while ((m = PATH_LIKE_RE.exec(text)) !== null) {
52
+ out.add(m[1]);
53
+ }
54
+ while ((m = SLASHED_PATH_RE.exec(text)) !== null) {
55
+ out.add(m[1]);
56
+ }
57
+ return Array.from(out);
58
+ }
59
+
60
+ /**
61
+ * Walk every tool_use block in the conversation and collect any string
62
+ * inputs that look like paths. Catches cases where the model already
63
+ * called an Edit/Read tool on a sensitive file.
64
+ * @param {object} payload
65
+ * @returns {string[]}
66
+ */
67
+ function extractPathsFromToolUses(payload) {
68
+ const out = new Set();
69
+ const messages = payload?.messages;
70
+ if (!Array.isArray(messages)) return [];
71
+
72
+ for (const msg of messages) {
73
+ if (!Array.isArray(msg?.content)) continue;
74
+ for (const block of msg.content) {
75
+ if (block?.type !== 'tool_use' || !block.input) continue;
76
+ const stack = [block.input];
77
+ while (stack.length) {
78
+ const node = stack.pop();
79
+ if (typeof node === 'string') {
80
+ if (node.includes('/') || node.includes('.')) {
81
+ // Treat short tool-input strings that look path-y as paths.
82
+ if (node.length <= 200) out.add(node);
83
+ }
84
+ } else if (Array.isArray(node)) {
85
+ for (const v of node) stack.push(v);
86
+ } else if (node && typeof node === 'object') {
87
+ for (const v of Object.values(node)) stack.push(v);
88
+ }
89
+ }
90
+ }
91
+ }
92
+ return Array.from(out);
93
+ }
94
+
95
+ /**
96
+ * Find which keywords from `keywords` appear (case-insensitively) inside
97
+ * any of `haystack`. Substring match — by design — so "auth" matches
98
+ * both "src/auth/login.ts" and the word "authorization".
99
+ * @param {string[]} keywords
100
+ * @param {string[]} haystack
101
+ * @returns {string[]} hit keywords, sorted
102
+ */
103
+ function findHits(keywords, haystack) {
104
+ const hits = new Set();
105
+ const joined = haystack.join('\n').toLowerCase();
106
+ for (const kw of keywords) {
107
+ if (joined.includes(kw.toLowerCase())) hits.add(kw);
108
+ }
109
+ return Array.from(hits).sort();
110
+ }
111
+
112
+ /**
113
+ * Analyze the risk level of a request.
114
+ *
115
+ * Risk is orthogonal to complexity:
116
+ * - low → no protected paths or sensitive keywords detected
117
+ * - medium → protected paths *or* a read-only task on a protected area
118
+ * - high → instruction explicitly names sensitive domain logic,
119
+ * or protected paths combined with a write-intent task
120
+ *
121
+ * @param {object} payload - Anthropic-format request payload
122
+ * @returns {{ level: 'low'|'medium'|'high',
123
+ * reason: string,
124
+ * pathHits: string[],
125
+ * instructionHits: string[],
126
+ * paths: string[] }}
127
+ */
128
+ function analyzeRisk(payload) {
129
+ const instructionText = extractContent(payload) || '';
130
+ const lowText = instructionText.toLowerCase();
131
+
132
+ const textPaths = extractPathsFromText(instructionText);
133
+ const toolPaths = extractPathsFromToolUses(payload);
134
+ const allPaths = Array.from(new Set([...textPaths, ...toolPaths]));
135
+
136
+ // Instruction-level hits scan the raw text. Path-level hits scan only
137
+ // the extracted path strings so phrases like "authentication is hard"
138
+ // don't double-fire as a path hit.
139
+ const instructionHits = findHits(HIGH_RISK_INSTRUCTION_KEYWORDS, [instructionText]);
140
+ const pathHits = findHits(PROTECTED_PATH_KEYWORDS, allPaths.length ? allPaths : []);
141
+ // Also let path keywords match against the instruction text — covers
142
+ // "update the auth flow" with no path mentioned.
143
+ const textPathHits = findHits(PROTECTED_PATH_KEYWORDS, [instructionText]);
144
+ const mergedPathHits = Array.from(new Set([...pathHits, ...textPathHits])).sort();
145
+
146
+ if (instructionHits.length > 0) {
147
+ return {
148
+ level: 'high',
149
+ reason: 'High-risk instruction keyword detected.',
150
+ pathHits: mergedPathHits,
151
+ instructionHits,
152
+ paths: allPaths,
153
+ };
154
+ }
155
+
156
+ if (mergedPathHits.length > 0) {
157
+ // Read-only intent on a protected area is medium, not high.
158
+ // Heuristic: presence of explain/summarize/read verbs.
159
+ const readOnly = /\b(explain|summarize|describe|what does|walk me through|read|show|list|search|find|grep|locate)\b/i.test(lowText);
160
+ if (readOnly) {
161
+ return {
162
+ level: 'medium',
163
+ reason: 'Protected paths involved but task appears read-only.',
164
+ pathHits: mergedPathHits,
165
+ instructionHits: [],
166
+ paths: allPaths,
167
+ };
168
+ }
169
+ return {
170
+ level: 'high',
171
+ reason: 'Protected path referenced with write-capable intent.',
172
+ pathHits: mergedPathHits,
173
+ instructionHits: [],
174
+ paths: allPaths,
175
+ };
176
+ }
177
+
178
+ return {
179
+ level: 'low',
180
+ reason: 'No risk signals detected.',
181
+ pathHits: [],
182
+ instructionHits: [],
183
+ paths: allPaths,
184
+ };
185
+ }
186
+
187
+ module.exports = {
188
+ analyzeRisk,
189
+ PROTECTED_PATH_KEYWORDS,
190
+ HIGH_RISK_INSTRUCTION_KEYWORDS,
191
+ // Exposed for tests
192
+ extractPathsFromText,
193
+ extractPathsFromToolUses,
194
+ };
@@ -0,0 +1,130 @@
1
+ /**
2
+ * Risk classifier (Phase 3.4).
3
+ *
4
+ * Replaces the regex-based risk-analyzer with a small logistic-regression
5
+ * model trained on TF-IDF of unigrams + bigrams. Bootstrap labels come from
6
+ * the existing regex matcher; subsequent training uses telemetry-flagged
7
+ * outcomes (set the request header `x-lynkr-risk-confirmed: true` to mark a
8
+ * request as truly risky for training).
9
+ *
10
+ * Falls back to the existing regex analyzer when no model artifact is present
11
+ * at data/risk-classifier.json. Model weights are JSON-serializable so they
12
+ * load fast and can be diffed in PRs.
13
+ */
14
+
15
+ const fs = require('fs');
16
+ const path = require('path');
17
+ const logger = require('../logger');
18
+ const { analyzeRisk: regexAnalyzeRisk } = require('./risk-analyzer');
19
+
20
+ const MODEL_PATH = path.join(__dirname, '../../data/risk-classifier.json');
21
+ const DECISION_THRESHOLD = 0.5;
22
+
23
+ let _model = null;
24
+ let _modelLoaded = false;
25
+
26
+ function _tokenize(text) {
27
+ if (!text || typeof text !== 'string') return [];
28
+ return text.toLowerCase().split(/[^a-z0-9_\-/.]+/).filter(Boolean);
29
+ }
30
+
31
+ function _features(text) {
32
+ const tokens = _tokenize(text);
33
+ const out = new Map();
34
+ for (let i = 0; i < tokens.length; i++) {
35
+ out.set(tokens[i], (out.get(tokens[i]) || 0) + 1);
36
+ if (i + 1 < tokens.length) {
37
+ const bigram = `${tokens[i]} ${tokens[i + 1]}`;
38
+ out.set(bigram, (out.get(bigram) || 0) + 1);
39
+ }
40
+ }
41
+ return out;
42
+ }
43
+
44
+ function _loadModel() {
45
+ if (_modelLoaded) return _model;
46
+ _modelLoaded = true;
47
+ try {
48
+ if (!fs.existsSync(MODEL_PATH)) return null;
49
+ const raw = JSON.parse(fs.readFileSync(MODEL_PATH, 'utf8'));
50
+ if (!raw?.weights || !raw?.bias) return null;
51
+ _model = raw;
52
+ return _model;
53
+ } catch (err) {
54
+ logger.debug({ err: err.message }, '[RiskClassifier] Model load failed');
55
+ return null;
56
+ }
57
+ }
58
+
59
+ function _sigmoid(z) {
60
+ if (z >= 0) return 1 / (1 + Math.exp(-z));
61
+ const ez = Math.exp(z);
62
+ return ez / (1 + ez);
63
+ }
64
+
65
+ function _predict(text, model) {
66
+ const feats = _features(text);
67
+ let z = model.bias;
68
+ for (const [tok, count] of feats) {
69
+ const w = model.weights[tok];
70
+ if (typeof w === 'number') z += w * count;
71
+ }
72
+ return _sigmoid(z);
73
+ }
74
+
75
+ /**
76
+ * Drop-in replacement for analyzeRisk(payload).
77
+ * Returns { level: 'low'|'medium'|'high', score, ...regexHits } so it's
78
+ * compatible with the existing telemetry pipeline.
79
+ */
80
+ function analyzeRisk(payload) {
81
+ // Always run the regex analyzer for hit details (kept for telemetry).
82
+ const regexResult = regexAnalyzeRisk(payload);
83
+
84
+ const model = _loadModel();
85
+ if (!model) return regexResult;
86
+
87
+ // Build the text we feed to the classifier: latest user message + tool defs + system fingerprint
88
+ let text = '';
89
+ if (Array.isArray(payload?.messages)) {
90
+ for (let i = payload.messages.length - 1; i >= 0; i--) {
91
+ const msg = payload.messages[i];
92
+ if (msg?.role === 'user') {
93
+ if (typeof msg.content === 'string') text = msg.content;
94
+ else if (Array.isArray(msg.content)) {
95
+ text = msg.content.filter(b => b?.type === 'text').map(b => b.text).join(' ');
96
+ }
97
+ break;
98
+ }
99
+ }
100
+ }
101
+ if (typeof payload?.system === 'string') text += ' ' + payload.system;
102
+
103
+ const prob = _predict(text, model);
104
+ let level;
105
+ if (prob >= 0.75) level = 'high';
106
+ else if (prob >= DECISION_THRESHOLD) level = 'medium';
107
+ else level = 'low';
108
+
109
+ // Reconcile with regex: if classifier disagrees with regex by a lot, prefer the stricter signal.
110
+ // (We never want to *downgrade* a regex-flagged high-risk request silently.)
111
+ if (regexResult?.level === 'high' && level !== 'high') level = 'high';
112
+
113
+ return {
114
+ ...regexResult,
115
+ level,
116
+ score: prob,
117
+ classifierUsed: true,
118
+ };
119
+ }
120
+
121
+ function reloadModel() {
122
+ _modelLoaded = false;
123
+ _model = null;
124
+ }
125
+
126
+ module.exports = {
127
+ analyzeRisk,
128
+ reloadModel,
129
+ _internal: { _features, _predict },
130
+ };
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Shadow-mode policy A/B testing (Phase 4.4).
3
+ *
4
+ * Lets us test a new routing policy against production without serving its
5
+ * decisions. The shadow policy runs alongside the active policy, makes its
6
+ * decision, and that decision is logged. A weekly comparison job
7
+ * (scripts/compare-policies.js) summarises agreement, cost delta, and (via
8
+ * the regret estimator) projected quality delta on the disagreed-on subset.
9
+ *
10
+ * Activation:
11
+ * - Set LYNKR_SHADOW_POLICY=<name> to enable
12
+ * - Implement and register policies via registerPolicy()
13
+ */
14
+
15
+ const fs = require('fs');
16
+ const path = require('path');
17
+ const logger = require('../logger');
18
+
19
+ const LOG_PATH = path.join(__dirname, '../../data/shadow-decisions.jsonl');
20
+
21
+ const _registry = new Map();
22
+
23
+ function registerPolicy(name, fn) {
24
+ if (typeof fn !== 'function') throw new Error('Policy must be a function');
25
+ _registry.set(name, fn);
26
+ }
27
+
28
+ function isEnabled() {
29
+ return !!process.env.LYNKR_SHADOW_POLICY && _registry.has(process.env.LYNKR_SHADOW_POLICY);
30
+ }
31
+
32
+ function getShadowPolicy() {
33
+ if (!isEnabled()) return null;
34
+ return _registry.get(process.env.LYNKR_SHADOW_POLICY);
35
+ }
36
+
37
+ function _appendLog(entry) {
38
+ try {
39
+ fs.mkdirSync(path.dirname(LOG_PATH), { recursive: true });
40
+ fs.appendFileSync(LOG_PATH, JSON.stringify(entry) + '\n');
41
+ } catch (err) {
42
+ logger.debug({ err: err.message }, '[ShadowMode] Log append failed');
43
+ }
44
+ }
45
+
46
+ /**
47
+ * Compare active and shadow decisions on the same payload, log the result.
48
+ * Does NOT change which decision is served — the caller uses activeDecision.
49
+ */
50
+ async function compareAndLog({ payload, activeDecision, shadowFn }) {
51
+ if (!shadowFn) return null;
52
+ let shadowDecision;
53
+ try {
54
+ shadowDecision = await shadowFn(payload);
55
+ } catch (err) {
56
+ logger.debug({ err: err.message }, '[ShadowMode] Shadow policy failed');
57
+ return null;
58
+ }
59
+ const agree = activeDecision.provider === shadowDecision?.provider
60
+ && activeDecision.model === shadowDecision?.model;
61
+ _appendLog({
62
+ timestamp: Date.now(),
63
+ policy: process.env.LYNKR_SHADOW_POLICY,
64
+ agree,
65
+ active: { provider: activeDecision.provider, model: activeDecision.model, tier: activeDecision.tier, score: activeDecision.score },
66
+ shadow: shadowDecision ? { provider: shadowDecision.provider, model: shadowDecision.model, tier: shadowDecision.tier, score: shadowDecision.score } : null,
67
+ });
68
+ return { agree, shadow: shadowDecision };
69
+ }
70
+
71
+ module.exports = {
72
+ registerPolicy,
73
+ isEnabled,
74
+ getShadowPolicy,
75
+ compareAndLog,
76
+ LOG_PATH,
77
+ };
@@ -105,6 +105,9 @@ function init() {
105
105
 
106
106
  CREATE INDEX IF NOT EXISTS idx_telemetry_timestamp
107
107
  ON routing_telemetry(timestamp);
108
+
109
+ CREATE INDEX IF NOT EXISTS idx_telemetry_session_id
110
+ ON routing_telemetry(session_id, timestamp);
108
111
  `);
109
112
 
110
113
  logger.info({ dbPath }, "Routing telemetry database initialised");
@@ -233,6 +236,10 @@ function query(filters = {}) {
233
236
  clauses.push("timestamp >= @since");
234
237
  params.since = filters.since;
235
238
  }
239
+ if (filters.session_id) {
240
+ clauses.push("session_id = @session_id");
241
+ params.session_id = filters.session_id;
242
+ }
236
243
 
237
244
  const where = clauses.length > 0 ? `WHERE ${clauses.join(" AND ")}` : "";
238
245
  const limit = filters.limit ?? 100;