lynkr 9.1.2 → 9.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +21 -10
  2. package/package.json +3 -1
  3. package/scripts/build-knn-index.js +130 -0
  4. package/scripts/calibrate-thresholds.js +197 -0
  5. package/scripts/compare-policies.js +67 -0
  6. package/scripts/learn-output-ratios.js +162 -0
  7. package/scripts/refresh-pricing.js +122 -0
  8. package/scripts/run-routerarena.js +26 -0
  9. package/scripts/sample-regret.js +84 -0
  10. package/scripts/train-risk-classifier.js +191 -0
  11. package/src/api/middleware/budget-enforcer.js +60 -0
  12. package/src/api/middleware/tenant.js +21 -0
  13. package/src/api/router.js +19 -40
  14. package/src/budget/hierarchical-budget.js +159 -0
  15. package/src/cache/semantic.js +28 -2
  16. package/src/clients/databricks.js +59 -5
  17. package/src/config/index.js +239 -43
  18. package/src/context/toon.js +5 -4
  19. package/src/orchestrator/index.js +44 -6
  20. package/src/prompts/system.js +34 -6
  21. package/src/routing/bandit.js +246 -0
  22. package/src/routing/cascade.js +106 -0
  23. package/src/routing/complexity-analyzer.js +7 -15
  24. package/src/routing/confidence-scorer.js +121 -0
  25. package/src/routing/context-validator.js +71 -0
  26. package/src/routing/cost-optimizer.js +5 -2
  27. package/src/routing/deadline.js +52 -0
  28. package/src/routing/drift-monitor.js +113 -0
  29. package/src/routing/embedding-cache.js +77 -0
  30. package/src/routing/index.js +314 -5
  31. package/src/routing/knn-router.js +206 -0
  32. package/src/routing/latency-tracker.js +113 -71
  33. package/src/routing/model-tiers.js +156 -6
  34. package/src/routing/output-ratios.js +57 -0
  35. package/src/routing/regret-estimator.js +91 -0
  36. package/src/routing/reward-pipeline.js +62 -0
  37. package/src/routing/risk-classifier.js +130 -0
  38. package/src/routing/shadow-mode.js +77 -0
  39. package/src/routing/tenant-policy.js +96 -0
  40. package/src/routing/tokenizer.js +162 -0
  41. package/src/server.js +9 -0
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Refresh model pricing data.
4
+ *
5
+ * Phase 2.2 of the routing overhaul. Cron-friendly entrypoint that forces a
6
+ * fresh pull of LiteLLM + models.dev pricing, compares to the last cached
7
+ * snapshot, and logs anything that moved more than 5%.
8
+ *
9
+ * Usage: node scripts/refresh-pricing.js [--diff-only] [--threshold 0.05]
10
+ */
11
+
12
+ const fs = require('fs');
13
+ const path = require('path');
14
+
15
+ const CACHE_FILE = path.join(__dirname, '../data/model-prices-cache.json');
16
+ const PREV_FILE = path.join(__dirname, '../data/model-prices-cache.prev.json');
17
+ const DEFAULT_THRESHOLD = 0.05;
18
+
19
+ function _parseArgs(argv) {
20
+ const out = { diffOnly: false, threshold: DEFAULT_THRESHOLD };
21
+ for (let i = 0; i < argv.length; i++) {
22
+ if (argv[i] === '--diff-only') out.diffOnly = true;
23
+ else if (argv[i] === '--threshold') out.threshold = Number(argv[++i]) || DEFAULT_THRESHOLD;
24
+ }
25
+ return out;
26
+ }
27
+
28
+ function _readJson(p) {
29
+ try {
30
+ if (!fs.existsSync(p)) return null;
31
+ return JSON.parse(fs.readFileSync(p, 'utf8'));
32
+ } catch {
33
+ return null;
34
+ }
35
+ }
36
+
37
+ function _diff(prev, next, threshold) {
38
+ if (!prev || !next) return [];
39
+ const prevModels = prev.modelIndex || prev;
40
+ const nextModels = next.modelIndex || next;
41
+ const moves = [];
42
+ for (const [modelId, oldCost] of Object.entries(prevModels)) {
43
+ const newCost = nextModels[modelId];
44
+ if (!newCost) {
45
+ moves.push({ model: modelId, type: 'removed', oldCost });
46
+ continue;
47
+ }
48
+ const oldTotal = (oldCost.input || 0) + (oldCost.output || 0);
49
+ const newTotal = (newCost.input || 0) + (newCost.output || 0);
50
+ if (oldTotal === 0) continue;
51
+ const delta = (newTotal - oldTotal) / oldTotal;
52
+ if (Math.abs(delta) >= threshold) {
53
+ moves.push({
54
+ model: modelId,
55
+ type: delta > 0 ? 'increased' : 'decreased',
56
+ oldInput: oldCost.input,
57
+ newInput: newCost.input,
58
+ oldOutput: oldCost.output,
59
+ newOutput: newCost.output,
60
+ deltaPct: (delta * 100).toFixed(2) + '%',
61
+ });
62
+ }
63
+ }
64
+ for (const modelId of Object.keys(nextModels)) {
65
+ if (!prevModels[modelId]) {
66
+ moves.push({ model: modelId, type: 'added', newCost: nextModels[modelId] });
67
+ }
68
+ }
69
+ return moves;
70
+ }
71
+
72
+ async function refresh({ diffOnly = false, threshold = DEFAULT_THRESHOLD } = {}) {
73
+ if (!diffOnly) {
74
+ // Snapshot current cache as "previous" before fetching
75
+ if (fs.existsSync(CACHE_FILE)) {
76
+ try {
77
+ fs.copyFileSync(CACHE_FILE, PREV_FILE);
78
+ } catch (err) {
79
+ console.error(`Failed to snapshot previous cache: ${err.message}`);
80
+ }
81
+ }
82
+
83
+ const { getModelRegistry } = require('../src/routing/model-registry');
84
+ const registry = await getModelRegistry();
85
+ // Force a refresh
86
+ if (typeof registry._fetchAll === 'function') {
87
+ await registry._fetchAll();
88
+ }
89
+ console.log(`Refreshed pricing data (cache: ${CACHE_FILE})`);
90
+ }
91
+
92
+ const prev = _readJson(PREV_FILE);
93
+ const next = _readJson(CACHE_FILE);
94
+ const moves = _diff(prev, next, threshold);
95
+
96
+ if (moves.length === 0) {
97
+ console.log(`No pricing changes ≥${(threshold * 100).toFixed(1)}%.`);
98
+ return { moves: [] };
99
+ }
100
+
101
+ console.log(`${moves.length} pricing change(s) ≥${(threshold * 100).toFixed(1)}%:`);
102
+ for (const move of moves) {
103
+ if (move.type === 'added') {
104
+ console.log(` + ${move.model}: input=${move.newCost.input}, output=${move.newCost.output}`);
105
+ } else if (move.type === 'removed') {
106
+ console.log(` - ${move.model}: was input=${move.oldCost.input}, output=${move.oldCost.output}`);
107
+ } else {
108
+ console.log(` ${move.type === 'increased' ? '↑' : '↓'} ${move.model}: ${move.oldInput}/${move.oldOutput} → ${move.newInput}/${move.newOutput} (${move.deltaPct})`);
109
+ }
110
+ }
111
+ return { moves };
112
+ }
113
+
114
+ if (require.main === module) {
115
+ const opts = _parseArgs(process.argv.slice(2));
116
+ refresh(opts).catch((err) => {
117
+ console.error(err.message);
118
+ process.exit(1);
119
+ });
120
+ }
121
+
122
+ module.exports = { refresh };
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * RouterArena evaluation harness (Phase 6.4 — STUB).
4
+ *
5
+ * This is intentionally not wired to CI yet. The plan defers RouterArena
6
+ * integration until after Phases 1-4 have produced 2-4 weeks of telemetry
7
+ * to baseline against.
8
+ *
9
+ * To wire it up:
10
+ * 1. Clone https://github.com/RouteWorks/RouterArena into ./routerarena/
11
+ * 2. Install RouterArena's Python dependencies (transformers, datasets,
12
+ * anthropic, openai)
13
+ * 3. Decide on a subset size for PR-blocking CI (recommend 100-200 queries
14
+ * sampled stratified by difficulty); leave the full benchmark for nightly
15
+ * 4. Wire to GitHub Actions with `paths: [src/routing/**]` trigger
16
+ * 5. Compare PR's router decisions vs main's router on the same query set,
17
+ * report cost/quality delta as a PR comment
18
+ *
19
+ * The intent is to use RouterArena to *catch regressions*, not to gate
20
+ * routing changes on absolute benchmark scores.
21
+ */
22
+
23
+ console.log('RouterArena integration is a stub.');
24
+ console.log('See scripts/run-routerarena.js for setup steps.');
25
+ console.log('Phase 6.4 of docs/routing-improvement-plan.md.');
26
+ process.exit(0);
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Sample yesterday's traffic for regret estimation (Phase 4.2).
4
+ *
5
+ * Reads 0.5% of yesterday's requests from telemetry, re-runs them through
6
+ * Opus, and writes alerts if the routed model consistently underperforms.
7
+ *
8
+ * Costs real money — only runs when LYNKR_REGRET_ESTIMATOR=true.
9
+ */
10
+
11
+ const path = require('path');
12
+ const fs = require('fs');
13
+ const { estimate, isEnabled } = require('../src/routing/regret-estimator');
14
+
15
+ const SAMPLE_RATE = 0.005;
16
+
17
+ async function main() {
18
+ if (!isEnabled()) {
19
+ console.log('LYNKR_REGRET_ESTIMATOR not set; skipping.');
20
+ return;
21
+ }
22
+
23
+ let Database;
24
+ try {
25
+ Database = require('better-sqlite3');
26
+ } catch {
27
+ console.error('better-sqlite3 not installed');
28
+ process.exit(2);
29
+ }
30
+
31
+ const dbPath = path.join(__dirname, '../.lynkr/telemetry.db');
32
+ if (!fs.existsSync(dbPath)) {
33
+ console.log('No telemetry DB; skipping.');
34
+ return;
35
+ }
36
+
37
+ const db = new Database(dbPath, { readonly: true });
38
+ const yesterday = Date.now() - 24 * 3600 * 1000;
39
+ const rows = db.prepare(
40
+ `SELECT request_text, response_text, model, quality_score
41
+ FROM routing_telemetry
42
+ WHERE timestamp >= ?
43
+ AND quality_score IS NOT NULL
44
+ AND request_text IS NOT NULL`
45
+ ).all(yesterday);
46
+ db.close();
47
+
48
+ if (rows.length === 0) {
49
+ console.log('No eligible rows yesterday.');
50
+ return;
51
+ }
52
+
53
+ const sampleSize = Math.max(5, Math.floor(rows.length * SAMPLE_RATE));
54
+ const sampled = [];
55
+ while (sampled.length < sampleSize && rows.length > 0) {
56
+ const idx = Math.floor(Math.random() * rows.length);
57
+ sampled.push(rows.splice(idx, 1)[0]);
58
+ }
59
+
60
+ console.log(`Sampling ${sampled.length} rows for regret estimation`);
61
+
62
+ // Caller must wire an actual Opus invocation; default to a no-op for safety.
63
+ const runOpus = async (req) => {
64
+ console.warn('No opus runner wired — implement runOpus in scripts/sample-regret.js or override via LYNKR_REGRET_OPUS_RUNNER');
65
+ return { response: null, quality: 0 };
66
+ };
67
+
68
+ const samples = sampled.map(r => ({
69
+ request: { messages: [{ role: 'user', content: r.request_text }] },
70
+ response: r.response_text,
71
+ model: r.model,
72
+ quality: r.quality_score,
73
+ }));
74
+
75
+ const result = await estimate({ samples, runOpus });
76
+ console.log(`Regret: ${result.regret.toFixed(3)} over ${result.sampledCount} samples; ${result.alerts.length} alert(s) written.`);
77
+ }
78
+
79
+ if (require.main === module) {
80
+ main().catch(err => {
81
+ console.error(err.stack || err.message);
82
+ process.exit(1);
83
+ });
84
+ }
@@ -0,0 +1,191 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Train the risk classifier (Phase 3.4).
4
+ *
5
+ * Two label sources, fused:
6
+ * 1. Bootstrap: run the existing regex risk-analyzer over recent telemetry
7
+ * to produce weak labels.
8
+ * 2. Confirmed: requests with x-lynkr-risk-confirmed:true header logged in
9
+ * telemetry are treated as strong positive labels.
10
+ *
11
+ * Writes data/risk-classifier.json (weights + bias). Logistic regression
12
+ * trained with simple SGD over TF features (unigrams + bigrams).
13
+ *
14
+ * Usage: node scripts/train-risk-classifier.js [--days 30] [--epochs 10]
15
+ */
16
+
17
+ const fs = require('fs');
18
+ const path = require('path');
19
+
20
+ const DEFAULT_DAYS = 30;
21
+ const DEFAULT_EPOCHS = 10;
22
+ const LEARNING_RATE = 0.1;
23
+ const L2_REG = 0.0001;
24
+ const MIN_TOKEN_FREQ = 3;
25
+
26
+ const OUTPUT_PATH = path.join(__dirname, '../data/risk-classifier.json');
27
+ const TELEMETRY_DB_CANDIDATES = [
28
+ path.join(__dirname, '../.lynkr/telemetry.db'),
29
+ path.join(__dirname, '../data/lynkr.db'),
30
+ ];
31
+
32
+ function _findDb() {
33
+ for (const p of TELEMETRY_DB_CANDIDATES) if (fs.existsSync(p)) return p;
34
+ return null;
35
+ }
36
+
37
+ function _tokenize(text) {
38
+ if (!text) return [];
39
+ return String(text).toLowerCase().split(/[^a-z0-9_\-/.]+/).filter(Boolean);
40
+ }
41
+
42
+ function _features(text) {
43
+ const tokens = _tokenize(text);
44
+ const out = new Map();
45
+ for (let i = 0; i < tokens.length; i++) {
46
+ out.set(tokens[i], (out.get(tokens[i]) || 0) + 1);
47
+ if (i + 1 < tokens.length) {
48
+ const bigram = `${tokens[i]} ${tokens[i + 1]}`;
49
+ out.set(bigram, (out.get(bigram) || 0) + 1);
50
+ }
51
+ }
52
+ return out;
53
+ }
54
+
55
+ function _sigmoid(z) {
56
+ if (z >= 0) return 1 / (1 + Math.exp(-z));
57
+ const ez = Math.exp(z);
58
+ return ez / (1 + ez);
59
+ }
60
+
61
+ function _parseArgs(argv) {
62
+ const out = { days: DEFAULT_DAYS, epochs: DEFAULT_EPOCHS };
63
+ for (let i = 0; i < argv.length; i++) {
64
+ if (argv[i] === '--days') out.days = Number(argv[++i]) || DEFAULT_DAYS;
65
+ else if (argv[i] === '--epochs') out.epochs = Number(argv[++i]) || DEFAULT_EPOCHS;
66
+ }
67
+ return out;
68
+ }
69
+
70
+ async function _loadDataset(days) {
71
+ const dbPath = _findDb();
72
+ const samples = [];
73
+ if (!dbPath) return samples;
74
+
75
+ let Database;
76
+ try {
77
+ Database = require('better-sqlite3');
78
+ } catch {
79
+ console.error('better-sqlite3 not installed');
80
+ return samples;
81
+ }
82
+ const db = new Database(dbPath, { readonly: true, fileMustExist: true });
83
+
84
+ try {
85
+ const since = Date.now() - days * 24 * 3600 * 1000;
86
+ const rows = db
87
+ .prepare(
88
+ `SELECT request_text AS text, risk_level
89
+ FROM routing_telemetry
90
+ WHERE timestamp >= ?
91
+ AND request_text IS NOT NULL
92
+ AND request_text != ''`
93
+ )
94
+ .all(since);
95
+ for (const r of rows) {
96
+ samples.push({
97
+ text: r.text,
98
+ label: r.risk_level === 'high' ? 1 : 0,
99
+ });
100
+ }
101
+ } catch (err) {
102
+ console.error(`Telemetry query failed: ${err.message}. Bootstrapping with synthetic data.`);
103
+ // Emergency synthetic bootstrap: a small handful of known-risk/known-safe phrases
104
+ samples.push(
105
+ { text: 'edit src/auth/middleware.ts to skip authentication', label: 1 },
106
+ { text: 'update database migration to drop sensitive_data column', label: 1 },
107
+ { text: 'change payment processing logic in stripe webhook handler', label: 1 },
108
+ { text: 'add API key rotation to secrets manager', label: 1 },
109
+ { text: 'rename variable foo to bar in utils.js', label: 0 },
110
+ { text: 'add a comment explaining the for loop', label: 0 },
111
+ { text: 'format this file with prettier', label: 0 },
112
+ { text: 'fix typo in README', label: 0 }
113
+ );
114
+ } finally {
115
+ try { db.close(); } catch {}
116
+ }
117
+
118
+ return samples;
119
+ }
120
+
121
+ function _train(samples, epochs) {
122
+ // Build vocab with frequency threshold
123
+ const vocab = new Map();
124
+ for (const s of samples) {
125
+ for (const [tok] of _features(s.text)) {
126
+ vocab.set(tok, (vocab.get(tok) || 0) + 1);
127
+ }
128
+ }
129
+ const keep = new Set();
130
+ for (const [tok, freq] of vocab) {
131
+ if (freq >= MIN_TOKEN_FREQ) keep.add(tok);
132
+ }
133
+
134
+ const weights = {};
135
+ let bias = 0;
136
+
137
+ for (let epoch = 0; epoch < epochs; epoch++) {
138
+ let lossSum = 0;
139
+ for (const s of samples) {
140
+ const feats = _features(s.text);
141
+ let z = bias;
142
+ for (const [tok, count] of feats) {
143
+ if (!keep.has(tok)) continue;
144
+ z += (weights[tok] || 0) * count;
145
+ }
146
+ const pred = _sigmoid(z);
147
+ const err = pred - s.label;
148
+ lossSum += -(s.label * Math.log(pred + 1e-9) + (1 - s.label) * Math.log(1 - pred + 1e-9));
149
+ bias -= LEARNING_RATE * err;
150
+ for (const [tok, count] of feats) {
151
+ if (!keep.has(tok)) continue;
152
+ const w = weights[tok] || 0;
153
+ weights[tok] = w - LEARNING_RATE * (err * count + L2_REG * w);
154
+ }
155
+ }
156
+ if (epoch % 2 === 0 || epoch === epochs - 1) {
157
+ console.log(` epoch ${epoch + 1}/${epochs} loss=${(lossSum / samples.length).toFixed(4)}`);
158
+ }
159
+ }
160
+
161
+ return { weights, bias, vocabSize: keep.size };
162
+ }
163
+
164
+ async function main() {
165
+ const opts = _parseArgs(process.argv.slice(2));
166
+ const samples = await _loadDataset(opts.days);
167
+ if (samples.length < 10) {
168
+ console.error(`Only ${samples.length} samples — too few. Skipping training.`);
169
+ process.exit(1);
170
+ }
171
+ console.log(`Training on ${samples.length} samples (${samples.filter(s => s.label === 1).length} positive)`);
172
+ const model = _train(samples, opts.epochs);
173
+
174
+ fs.mkdirSync(path.dirname(OUTPUT_PATH), { recursive: true });
175
+ fs.writeFileSync(OUTPUT_PATH, JSON.stringify({
176
+ trainedAt: new Date().toISOString(),
177
+ samples: samples.length,
178
+ epochs: opts.epochs,
179
+ ...model,
180
+ }, null, 0));
181
+ console.log(`Wrote ${OUTPUT_PATH} (vocab=${model.vocabSize})`);
182
+ }
183
+
184
+ if (require.main === module) {
185
+ main().catch(err => {
186
+ console.error(err.stack || err.message);
187
+ process.exit(1);
188
+ });
189
+ }
190
+
191
+ module.exports = { _train, _features };
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Budget enforcement middleware (Phase 6.2).
3
+ *
4
+ * Reads tenant/budget context from request headers, checks the hierarchical
5
+ * budget ceiling, and rejects with 429 if exceeded.
6
+ *
7
+ * Header contract:
8
+ * LYNKR-Virtual-Key, LYNKR-Team-Id, LYNKR-Customer-Id, LYNKR-Org-Id
9
+ */
10
+
11
+ const logger = require('../../logger');
12
+ const { getHierarchicalBudget } = require('../../budget/hierarchical-budget');
13
+
14
+ function _readContext(req) {
15
+ const h = req.headers || {};
16
+ return {
17
+ virtual_key: h['lynkr-virtual-key'] || null,
18
+ team: h['lynkr-team-id'] || null,
19
+ customer: h['lynkr-customer-id'] || null,
20
+ org: h['lynkr-org-id'] || null,
21
+ };
22
+ }
23
+
24
+ /**
25
+ * Express middleware. Estimates request cost via cost-optimizer and rejects
26
+ * if the budget is already exceeded. Records spend after the response.
27
+ */
28
+ function budgetEnforcer(req, res, next) {
29
+ if (process.env.LYNKR_BUDGET_ENFORCER === 'false') return next();
30
+ const context = _readContext(req);
31
+ // Cheap pre-check at $0; we use the request to record actual spend.
32
+ // The actual ceiling check happens with an estimated $0.01 "minimum" so
33
+ // exhausted accounts get rejected before we even route.
34
+ const budget = getHierarchicalBudget();
35
+ const check = budget.check(context, 0.01);
36
+ if (!check.ok) {
37
+ logger.warn({ exceeded: check.exceeded }, '[BudgetEnforcer] Budget exceeded');
38
+ return res.status(429).json({
39
+ error: {
40
+ type: 'budget_exceeded',
41
+ message: `Budget exceeded for ${check.exceeded.level}=${check.exceeded.id}`,
42
+ ...check.exceeded,
43
+ },
44
+ });
45
+ }
46
+ res.locals = res.locals || {};
47
+ res.locals.budgetContext = context;
48
+ next();
49
+ }
50
+
51
+ /**
52
+ * Helper for handlers to record spend after a request completes.
53
+ * Call this from the orchestrator with the actual cost.
54
+ */
55
+ function recordSpend(context, amount) {
56
+ if (!context) return;
57
+ getHierarchicalBudget().record(context, amount);
58
+ }
59
+
60
+ module.exports = { budgetEnforcer, recordSpend };
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Tenant context middleware (Phase 6.1).
3
+ *
4
+ * Reads LYNKR-Tenant-Id from request headers and attaches the loaded tenant
5
+ * policy to res.locals.tenantPolicy for downstream handlers.
6
+ */
7
+
8
+ const { getTenantId, getPolicy } = require('../../routing/tenant-policy');
9
+
10
+ function tenantMiddleware(req, res, next) {
11
+ const tenantId = getTenantId(req);
12
+ res.locals = res.locals || {};
13
+ if (tenantId) {
14
+ const policy = getPolicy(tenantId);
15
+ res.locals.tenantId = tenantId;
16
+ res.locals.tenantPolicy = policy;
17
+ }
18
+ next();
19
+ }
20
+
21
+ module.exports = { tenantMiddleware };
package/src/api/router.js CHANGED
@@ -3,6 +3,7 @@ const { processMessage } = require("../orchestrator");
3
3
  const { getSession } = require("../sessions");
4
4
  const metrics = require("../metrics");
5
5
  const logger = require("../logger");
6
+ const config = require("../config");
6
7
  const { createRateLimiter } = require("./middleware/rate-limiter");
7
8
  const openaiRouter = require("./openai-router");
8
9
  const providersRouter = require("./providers-handler");
@@ -17,50 +18,26 @@ const router = express.Router();
17
18
  const rateLimiter = createRateLimiter();
18
19
 
19
20
  /**
20
- * Estimate token count for messages
21
- * Uses rough approximation of ~4 characters per token
22
- * @param {Array} messages - Array of message objects with role and content
23
- * @param {string|Array} system - System prompt (string or array of content blocks)
24
- * @returns {number} Estimated input token count
21
+ * Estimate token count for messages.
22
+ *
23
+ * Phase 1.1: tiktoken-backed via routing/tokenizer (graceful fallback to chars/4
24
+ * if js-tiktoken is unavailable).
25
25
  */
26
- function estimateTokenCount(messages = [], system = null) {
27
- let totalChars = 0;
28
-
29
- // Count system prompt characters
30
- if (system) {
31
- if (typeof system === "string") {
32
- totalChars += system.length;
33
- } else if (Array.isArray(system)) {
34
- system.forEach((block) => {
35
- if (block.type === "text" && block.text) {
36
- totalChars += block.text.length;
37
- }
38
- });
39
- }
40
- }
26
+ const { countMessagesTokens } = require("../routing/tokenizer");
41
27
 
42
- // Count message characters
43
- messages.forEach((msg) => {
44
- if (msg.content) {
45
- if (typeof msg.content === "string") {
46
- totalChars += msg.content.length;
47
- } else if (Array.isArray(msg.content)) {
48
- msg.content.forEach((block) => {
49
- if (block.type === "text" && block.text) {
50
- totalChars += block.text.length;
51
- } else if (block.type === "image" && block.source?.data) {
52
- // Images: rough estimate based on base64 length
53
- totalChars += Math.floor(block.source.data.length / 6);
54
- }
55
- });
56
- }
57
- }
58
- });
59
-
60
- // Estimate tokens: ~4 characters per token
61
- return Math.ceil(totalChars / 4);
28
+ function estimateTokenCount(messages = [], system = null, model = null) {
29
+ return countMessagesTokens(messages, system, model);
62
30
  }
63
31
 
32
+ // Root health check (for HEAD / and GET /)
33
+ router.head("/", (req, res) => {
34
+ res.status(200).end();
35
+ });
36
+
37
+ router.get("/", (req, res) => {
38
+ res.json({ status: "ok", service: "lynkr" });
39
+ });
40
+
64
41
  router.get("/health", (req, res) => {
65
42
  res.json({ status: "ok" });
66
43
  });
@@ -371,6 +348,7 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
371
348
  options: {
372
349
  maxSteps: req.body?.max_steps,
373
350
  maxDurationMs: req.body?.max_duration_ms,
351
+ tenantPolicy: res.locals?.tenantPolicy || null,
374
352
  },
375
353
  });
376
354
 
@@ -604,6 +582,7 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
604
582
  options: {
605
583
  maxSteps: req.body?.max_steps,
606
584
  maxDurationMs: req.body?.max_duration_ms,
585
+ tenantPolicy: res.locals?.tenantPolicy || null,
607
586
  },
608
587
  });
609
588
  timer.mark("processMessage");