lynkr 9.0.2 → 9.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -10
- package/bin/cli.js +18 -1
- package/bin/lynkr-trajectory.js +136 -0
- package/bin/lynkr-usage.js +219 -0
- package/funding.json +110 -0
- package/package.json +4 -2
- package/public/dashboard.html +665 -0
- package/scripts/build-knn-index.js +130 -0
- package/scripts/calibrate-thresholds.js +197 -0
- package/scripts/compare-policies.js +67 -0
- package/scripts/learn-output-ratios.js +162 -0
- package/scripts/refresh-pricing.js +122 -0
- package/scripts/run-routerarena.js +26 -0
- package/scripts/sample-regret.js +84 -0
- package/scripts/train-risk-classifier.js +191 -0
- package/src/api/files-router.js +6 -6
- package/src/api/middleware/budget-enforcer.js +60 -0
- package/src/api/middleware/budget.js +19 -1
- package/src/api/middleware/load-shedding.js +17 -0
- package/src/api/middleware/tenant.js +21 -0
- package/src/api/openai-router.js +1 -1
- package/src/api/router.js +204 -87
- package/src/budget/hierarchical-budget.js +159 -0
- package/src/cache/semantic.js +28 -2
- package/src/clients/databricks.js +68 -10
- package/src/clients/openai-format.js +31 -5
- package/src/config/index.js +246 -43
- package/src/context/toon.js +5 -4
- package/src/dashboard/api.js +170 -0
- package/src/dashboard/router.js +13 -0
- package/src/headroom/client.js +3 -109
- package/src/headroom/index.js +0 -14
- package/src/memory/search.js +0 -50
- package/src/orchestrator/index.js +106 -11
- package/src/orchestrator/preflight.js +188 -0
- package/src/prompts/system.js +34 -6
- package/src/routing/bandit.js +246 -0
- package/src/routing/cascade.js +106 -0
- package/src/routing/complexity-analyzer.js +7 -15
- package/src/routing/confidence-scorer.js +121 -0
- package/src/routing/context-validator.js +71 -0
- package/src/routing/cost-optimizer.js +5 -2
- package/src/routing/deadline.js +52 -0
- package/src/routing/drift-monitor.js +113 -0
- package/src/routing/embedding-cache.js +77 -0
- package/src/routing/index.js +374 -4
- package/src/routing/interaction.js +183 -0
- package/src/routing/knn-router.js +206 -0
- package/src/routing/latency-tracker.js +113 -71
- package/src/routing/model-tiers.js +156 -6
- package/src/routing/output-ratios.js +57 -0
- package/src/routing/regret-estimator.js +91 -0
- package/src/routing/reward-pipeline.js +62 -0
- package/src/routing/risk-analyzer.js +194 -0
- package/src/routing/risk-classifier.js +130 -0
- package/src/routing/shadow-mode.js +77 -0
- package/src/routing/telemetry.js +7 -0
- package/src/routing/tenant-policy.js +96 -0
- package/src/routing/tokenizer.js +162 -0
- package/src/server.js +12 -0
- package/src/stores/file-store.js +42 -7
- package/src/tools/smart-selection.js +11 -2
- package/src/training/trajectory-compressor.js +266 -0
- package/src/usage/aggregator.js +206 -0
- package/src/utils/markdown-ansi.js +146 -0
package/src/routing/index.js
CHANGED
|
@@ -22,15 +22,59 @@ const {
|
|
|
22
22
|
const { getAgenticDetector, AGENT_TYPES } = require('./agentic-detector');
|
|
23
23
|
const { getModelTierSelector, TIER_DEFINITIONS } = require('./model-tiers');
|
|
24
24
|
const { getCostOptimizer } = require('./cost-optimizer');
|
|
25
|
+
const { analyzeRisk } = require('./risk-classifier');
|
|
26
|
+
|
|
27
|
+
// Phase 3-6 routing modules
|
|
28
|
+
const { getKnnRouter } = require('./knn-router');
|
|
29
|
+
const { getBandit } = require('./bandit');
|
|
30
|
+
const { getShadowPolicy, compareAndLog: shadowCompareAndLog } = require('./shadow-mode');
|
|
31
|
+
const { chooseFastest } = require('./deadline');
|
|
32
|
+
const { applyTenantOverrides } = require('./tenant-policy');
|
|
25
33
|
|
|
26
34
|
// Telemetry modules
|
|
27
35
|
const telemetry = require('./telemetry');
|
|
28
36
|
const { scoreResponseQuality } = require('./quality-scorer');
|
|
29
37
|
const { getLatencyTracker } = require('./latency-tracker');
|
|
30
38
|
|
|
39
|
+
// Phase 1 modules
|
|
40
|
+
const contextValidator = require('./context-validator');
|
|
41
|
+
const { countPayloadTokens } = require('./tokenizer');
|
|
42
|
+
|
|
31
43
|
// Local providers
|
|
32
44
|
const LOCAL_PROVIDERS = ['ollama', 'llamacpp', 'lmstudio'];
|
|
33
45
|
|
|
46
|
+
/**
|
|
47
|
+
* Returns true when any message content block is an image.
|
|
48
|
+
* Handles both string content and structured content arrays.
|
|
49
|
+
*/
|
|
50
|
+
function _payloadHasImages(payload) {
|
|
51
|
+
const messages = payload?.messages;
|
|
52
|
+
if (!Array.isArray(messages)) return false;
|
|
53
|
+
return messages.some(msg => {
|
|
54
|
+
const content = msg?.content;
|
|
55
|
+
if (!Array.isArray(content)) return false;
|
|
56
|
+
return content.some(block => block?.type === 'image' || block?.type === 'image_url');
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* List of providers that currently have credentials configured.
|
|
62
|
+
* Used by the Phase 1.2 cost-optimizer override to scope candidates.
|
|
63
|
+
*/
|
|
64
|
+
function _enabledProviders() {
|
|
65
|
+
const out = [];
|
|
66
|
+
if (config.databricks?.url && config.databricks?.apiKey) out.push('databricks');
|
|
67
|
+
if (config.azureAnthropic?.endpoint && config.azureAnthropic?.apiKey) out.push('azure-anthropic');
|
|
68
|
+
if (config.bedrock?.apiKey) out.push('bedrock');
|
|
69
|
+
if (config.openrouter?.apiKey) out.push('openrouter');
|
|
70
|
+
if (config.openai?.apiKey) out.push('openai');
|
|
71
|
+
if (config.azureOpenAI?.endpoint && config.azureOpenAI?.apiKey) out.push('azure-openai');
|
|
72
|
+
if (config.ollama?.endpoint) out.push('ollama');
|
|
73
|
+
if (config.llamacpp?.endpoint) out.push('llamacpp');
|
|
74
|
+
if (config.lmstudio?.endpoint) out.push('lmstudio');
|
|
75
|
+
return out;
|
|
76
|
+
}
|
|
77
|
+
|
|
34
78
|
/**
|
|
35
79
|
* Check if a provider is local
|
|
36
80
|
*/
|
|
@@ -40,15 +84,28 @@ function isLocalProvider(provider) {
|
|
|
40
84
|
|
|
41
85
|
/**
|
|
42
86
|
* Check if fallback is enabled
|
|
87
|
+
* In tier routing mode, fallback is always enabled
|
|
43
88
|
*/
|
|
44
89
|
function isFallbackEnabled() {
|
|
90
|
+
if (config.modelTiers?.enabled) {
|
|
91
|
+
// Tier routing mode: fallback always enabled
|
|
92
|
+
return true;
|
|
93
|
+
}
|
|
94
|
+
// Static provider mode: use FALLBACK_ENABLED
|
|
45
95
|
return config.modelProvider?.fallbackEnabled !== false;
|
|
46
96
|
}
|
|
47
97
|
|
|
48
98
|
/**
|
|
49
99
|
* Get the configured fallback provider
|
|
100
|
+
* In tier routing mode, fallback = TIER_REASONING provider
|
|
50
101
|
*/
|
|
51
102
|
function getFallbackProvider() {
|
|
103
|
+
if (config.modelTiers?.enabled && config.modelTiers?.REASONING) {
|
|
104
|
+
// Tier routing mode: extract provider from TIER_REASONING
|
|
105
|
+
const match = config.modelTiers.REASONING.match(/^([a-z-]+):/);
|
|
106
|
+
if (match) return match[1];
|
|
107
|
+
}
|
|
108
|
+
// Static provider mode: use FALLBACK_PROVIDER
|
|
52
109
|
return config.modelProvider?.fallbackProvider ?? 'databricks';
|
|
53
110
|
}
|
|
54
111
|
|
|
@@ -97,6 +154,18 @@ function getBestLocalProvider() {
|
|
|
97
154
|
async function determineProviderSmart(payload, options = {}) {
|
|
98
155
|
const primaryProvider = config.modelProvider?.type ?? 'databricks';
|
|
99
156
|
|
|
157
|
+
// Risk analysis runs orthogonally to complexity. We compute it once
|
|
158
|
+
// up-front so it can short-circuit force_local and feed the tier
|
|
159
|
+
// selector below. Even when tier routing is disabled we still surface
|
|
160
|
+
// the signal for telemetry.
|
|
161
|
+
let risk = null;
|
|
162
|
+
try {
|
|
163
|
+
risk = analyzeRisk(payload);
|
|
164
|
+
} catch (err) {
|
|
165
|
+
logger.debug({ err: err.message }, '[Routing] Risk analysis failed, ignoring');
|
|
166
|
+
risk = null;
|
|
167
|
+
}
|
|
168
|
+
|
|
100
169
|
// If tier routing is disabled, use static configuration
|
|
101
170
|
if (!config.modelTiers?.enabled) {
|
|
102
171
|
return {
|
|
@@ -104,9 +173,39 @@ async function determineProviderSmart(payload, options = {}) {
|
|
|
104
173
|
model: null,
|
|
105
174
|
method: 'static',
|
|
106
175
|
reason: 'tier_routing_disabled',
|
|
176
|
+
risk,
|
|
107
177
|
};
|
|
108
178
|
}
|
|
109
179
|
|
|
180
|
+
// High-risk requests jump straight to COMPLEX and skip the rest of
|
|
181
|
+
// the analysis. This is independent of complexity score — a one-line
|
|
182
|
+
// edit to auth/middleware.ts should never go to a local model.
|
|
183
|
+
if (risk?.level === 'high' && isFallbackEnabled()) {
|
|
184
|
+
try {
|
|
185
|
+
const selector = getModelTierSelector();
|
|
186
|
+
const modelSelection = selector.selectModel('COMPLEX', null);
|
|
187
|
+
const decision = {
|
|
188
|
+
provider: modelSelection.provider,
|
|
189
|
+
model: modelSelection.model,
|
|
190
|
+
tier: 'COMPLEX',
|
|
191
|
+
method: 'risk',
|
|
192
|
+
reason: 'high_risk_forced_tier',
|
|
193
|
+
score: 100,
|
|
194
|
+
risk,
|
|
195
|
+
};
|
|
196
|
+
routingMetrics.record(decision);
|
|
197
|
+
logger.debug({
|
|
198
|
+
tier: 'COMPLEX',
|
|
199
|
+
provider: decision.provider,
|
|
200
|
+
instructionHits: risk.instructionHits,
|
|
201
|
+
pathHits: risk.pathHits,
|
|
202
|
+
}, '[Routing] High risk → forcing tier');
|
|
203
|
+
return decision;
|
|
204
|
+
} catch (err) {
|
|
205
|
+
logger.debug({ err: err.message }, '[Routing] Risk-forced tier selection failed, falling through');
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
110
209
|
// Quick check for force patterns
|
|
111
210
|
if (shouldForceLocal(payload)) {
|
|
112
211
|
// When tier routing is enabled, respect TIER_SIMPLE instead of blindly choosing local
|
|
@@ -121,6 +220,7 @@ async function determineProviderSmart(payload, options = {}) {
|
|
|
121
220
|
method: 'force',
|
|
122
221
|
reason: 'force_local_pattern',
|
|
123
222
|
score: 0,
|
|
223
|
+
risk,
|
|
124
224
|
};
|
|
125
225
|
routingMetrics.record(decision);
|
|
126
226
|
return decision;
|
|
@@ -135,6 +235,7 @@ async function determineProviderSmart(payload, options = {}) {
|
|
|
135
235
|
method: 'force',
|
|
136
236
|
reason: 'force_local_pattern',
|
|
137
237
|
score: 0,
|
|
238
|
+
risk,
|
|
138
239
|
};
|
|
139
240
|
routingMetrics.record(decision);
|
|
140
241
|
return decision;
|
|
@@ -148,6 +249,7 @@ async function determineProviderSmart(payload, options = {}) {
|
|
|
148
249
|
method: 'force',
|
|
149
250
|
reason: 'force_cloud_pattern',
|
|
150
251
|
score: 100,
|
|
252
|
+
risk,
|
|
151
253
|
};
|
|
152
254
|
routingMetrics.record(decision);
|
|
153
255
|
return decision;
|
|
@@ -201,6 +303,7 @@ async function determineProviderSmart(payload, options = {}) {
|
|
|
201
303
|
reason: 'autonomous_workflow',
|
|
202
304
|
score: analysis.score,
|
|
203
305
|
agenticResult,
|
|
306
|
+
risk,
|
|
204
307
|
};
|
|
205
308
|
routingMetrics.record(decision);
|
|
206
309
|
return decision;
|
|
@@ -236,9 +339,11 @@ async function determineProviderSmart(payload, options = {}) {
|
|
|
236
339
|
}
|
|
237
340
|
}
|
|
238
341
|
|
|
239
|
-
// Apply routing decision based on tier config (TIER_* env vars
|
|
342
|
+
// Apply routing decision based on tier config (TIER_* env vars take precedence
|
|
343
|
+
// but Phase 1.2 lets the cost-optimizer pick a cheaper qualifying model when safe).
|
|
240
344
|
let provider;
|
|
241
345
|
let method = 'tier_config';
|
|
346
|
+
let costOptimized = false;
|
|
242
347
|
|
|
243
348
|
const selector = getModelTierSelector();
|
|
244
349
|
const modelSelection = selector.selectModel(tier, null);
|
|
@@ -247,8 +352,242 @@ async function determineProviderSmart(payload, options = {}) {
|
|
|
247
352
|
selectedModel = modelSelection.model;
|
|
248
353
|
logger.debug({ tier, provider, model: selectedModel }, '[Routing] Using tier config');
|
|
249
354
|
|
|
250
|
-
//
|
|
251
|
-
//
|
|
355
|
+
// Phase 1.2 — cost-optimizer override.
|
|
356
|
+
// Only kick in when:
|
|
357
|
+
// - feature flag enabled (default true, disable with LYNKR_COST_OPTIMIZE=false)
|
|
358
|
+
// - risk level is not high (high-risk keeps the explicitly-configured model)
|
|
359
|
+
// - the optimizer finds a meaningfully cheaper qualifying model
|
|
360
|
+
const costOptimizeEnabled = process.env.LYNKR_COST_OPTIMIZE !== 'false'
|
|
361
|
+
&& config.routing?.costOptimize !== false;
|
|
362
|
+
if (costOptimizeEnabled && risk?.level !== 'high') {
|
|
363
|
+
try {
|
|
364
|
+
const optimizer = getCostOptimizer();
|
|
365
|
+
const availableProviders = _enabledProviders();
|
|
366
|
+
const cheapest = optimizer.findCheapestForTier(tier, availableProviders);
|
|
367
|
+
if (cheapest && cheapest.model && cheapest.model !== selectedModel) {
|
|
368
|
+
const current = optimizer.estimateCost(selectedModel, 1000);
|
|
369
|
+
const candidate = optimizer.estimateCost(cheapest.model, 1000);
|
|
370
|
+
if (candidate.totalEstimate > 0 && candidate.totalEstimate < current.totalEstimate * 0.75) {
|
|
371
|
+
logger.debug({
|
|
372
|
+
tier,
|
|
373
|
+
from: `${provider}:${selectedModel}`,
|
|
374
|
+
to: `${cheapest.provider}:${cheapest.model}`,
|
|
375
|
+
savedPerK: (current.totalEstimate - candidate.totalEstimate).toFixed(6),
|
|
376
|
+
}, '[Routing] Cost-optimizer override');
|
|
377
|
+
provider = cheapest.provider;
|
|
378
|
+
selectedModel = cheapest.model;
|
|
379
|
+
method = 'tier_config+cost_optimized';
|
|
380
|
+
costOptimized = true;
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
} catch (err) {
|
|
384
|
+
logger.debug({ err: err.message }, '[Routing] Cost-optimize failed, keeping tier_config selection');
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
// Phase 1.3 — context window validation. If estimated tokens exceed the
|
|
389
|
+
// selected model's context (with response headroom), escalate to a
|
|
390
|
+
// context-capable model regardless of tier.
|
|
391
|
+
try {
|
|
392
|
+
const estimatedTokens = countPayloadTokens(payload, selectedModel);
|
|
393
|
+
const ctxResult = contextValidator.validate(selectedModel, estimatedTokens);
|
|
394
|
+
if (!ctxResult.ok) {
|
|
395
|
+
const capable = selector.findContextCapable(estimatedTokens, tier);
|
|
396
|
+
if (capable) {
|
|
397
|
+
logger.info({
|
|
398
|
+
from: `${provider}:${selectedModel}`,
|
|
399
|
+
to: `${capable.provider}:${capable.model}`,
|
|
400
|
+
required: estimatedTokens,
|
|
401
|
+
oldContext: ctxResult.context,
|
|
402
|
+
newContext: capable.context,
|
|
403
|
+
}, '[Routing] Context window escalation');
|
|
404
|
+
provider = capable.provider;
|
|
405
|
+
selectedModel = capable.model;
|
|
406
|
+
if (capable.tier) tier = capable.tier;
|
|
407
|
+
method = method + '+context_escalated';
|
|
408
|
+
} else {
|
|
409
|
+
logger.warn({
|
|
410
|
+
model: selectedModel,
|
|
411
|
+
required: estimatedTokens,
|
|
412
|
+
available: ctxResult.context,
|
|
413
|
+
}, '[Routing] No context-capable fallback — request may fail upstream');
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
} catch (err) {
|
|
417
|
+
logger.debug({ err: err.message }, '[Routing] Context validation failed, proceeding without check');
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
// Phase 1.4 — vision capability guard.
|
|
421
|
+
// If the payload contains image content blocks but the selected model lacks
|
|
422
|
+
// vision support, silently swap to the cheapest vision-capable model at or
|
|
423
|
+
// above the current tier. Prevents silent upstream failures.
|
|
424
|
+
if (_payloadHasImages(payload)) {
|
|
425
|
+
try {
|
|
426
|
+
const { getModelRegistrySync } = require('./model-registry');
|
|
427
|
+
const registry = getModelRegistrySync();
|
|
428
|
+
const modelInfo = registry.getCost(selectedModel);
|
|
429
|
+
if (!modelInfo?.vision) {
|
|
430
|
+
const visionModel = selector.findVisionCapable(tier);
|
|
431
|
+
if (visionModel) {
|
|
432
|
+
logger.info({
|
|
433
|
+
from: `${provider}:${selectedModel}`,
|
|
434
|
+
to: `${visionModel.provider}:${visionModel.model}`,
|
|
435
|
+
tier: visionModel.tier,
|
|
436
|
+
}, '[Routing] Vision guard — upgrading to vision-capable model');
|
|
437
|
+
provider = visionModel.provider;
|
|
438
|
+
selectedModel = visionModel.model;
|
|
439
|
+
if (visionModel.tier !== tier) tier = visionModel.tier;
|
|
440
|
+
method = method + '+vision_guard';
|
|
441
|
+
} else {
|
|
442
|
+
logger.warn({ model: selectedModel }, '[Routing] Vision guard — no vision-capable model found, request may fail');
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
} catch (err) {
|
|
446
|
+
logger.debug({ err: err.message }, '[Routing] Vision guard check failed, proceeding');
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
// Phase 3.1 — kNN routing hint.
|
|
451
|
+
// If the index has enough entries, query it with the last user message.
|
|
452
|
+
// A high-confidence kNN suggestion overrides the heuristic selection.
|
|
453
|
+
let knnResult = null;
|
|
454
|
+
if (config.routing?.knnEnabled !== false) {
|
|
455
|
+
try {
|
|
456
|
+
const msgs = payload?.messages;
|
|
457
|
+
const lastMsg = Array.isArray(msgs) ? msgs[msgs.length - 1]?.content : null;
|
|
458
|
+
const queryText = typeof lastMsg === 'string' ? lastMsg
|
|
459
|
+
: Array.isArray(lastMsg) ? lastMsg.filter(b => b?.type === 'text').map(b => b.text || '').join(' ')
|
|
460
|
+
: null;
|
|
461
|
+
if (queryText) {
|
|
462
|
+
knnResult = await getKnnRouter().query(queryText);
|
|
463
|
+
if (knnResult && knnResult.confidence > 0.7 && knnResult.model && knnResult.model !== selectedModel) {
|
|
464
|
+
// High confidence — trust kNN's model recommendation directly.
|
|
465
|
+
logger.debug({
|
|
466
|
+
from: `${provider}:${selectedModel}`,
|
|
467
|
+
to: `${knnResult.provider}:${knnResult.model}`,
|
|
468
|
+
confidence: knnResult.confidence.toFixed(3),
|
|
469
|
+
}, '[Routing] kNN override');
|
|
470
|
+
provider = knnResult.provider;
|
|
471
|
+
selectedModel = knnResult.model;
|
|
472
|
+
method = method + '+knn';
|
|
473
|
+
} else if (knnResult && knnResult.confidence > 0.4 && knnResult.confidence <= 0.7) {
|
|
474
|
+
// Ambiguous signal — neighbors are split, we can't trust any single model
|
|
475
|
+
// recommendation. Err on quality: bump the current tier one step up so the
|
|
476
|
+
// request gets a more capable model rather than risking a bad answer from
|
|
477
|
+
// a model that was borderline for similar past requests.
|
|
478
|
+
const TIER_ORDER = ['SIMPLE', 'MEDIUM', 'COMPLEX', 'REASONING'];
|
|
479
|
+
const currentIdx = TIER_ORDER.indexOf(tier);
|
|
480
|
+
if (currentIdx >= 0 && currentIdx < TIER_ORDER.length - 1) {
|
|
481
|
+
const upgradedTier = TIER_ORDER[currentIdx + 1];
|
|
482
|
+
try {
|
|
483
|
+
const upgraded = selector.selectModel(upgradedTier, null);
|
|
484
|
+
logger.debug({
|
|
485
|
+
from: `${tier}:${provider}:${selectedModel}`,
|
|
486
|
+
to: `${upgradedTier}:${upgraded.provider}:${upgraded.model}`,
|
|
487
|
+
confidence: knnResult.confidence.toFixed(3),
|
|
488
|
+
}, '[Routing] kNN ambiguous — escalating tier for safety');
|
|
489
|
+
provider = upgraded.provider;
|
|
490
|
+
selectedModel = upgraded.model;
|
|
491
|
+
tier = upgradedTier;
|
|
492
|
+
method = method + '+knn_ambiguous_escalate';
|
|
493
|
+
} catch (err) {
|
|
494
|
+
logger.debug({ err: err.message }, '[Routing] kNN ambiguous escalation failed, keeping current tier');
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
} catch (err) {
|
|
500
|
+
logger.debug({ err: err.message }, '[Routing] kNN query failed, ignoring');
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
// Phase 4.1 — LinUCB bandit intra-tier selection.
|
|
505
|
+
// When there are two candidates (heuristic vs kNN), the bandit picks the
|
|
506
|
+
// one with the highest estimated UCB score for the current context.
|
|
507
|
+
if (config.routing?.banditEnabled !== false && knnResult && knnResult.model) {
|
|
508
|
+
try {
|
|
509
|
+
// Build candidates: current selection and kNN alternative if different
|
|
510
|
+
const allCandidates = [{ provider, model: selectedModel }];
|
|
511
|
+
if (knnResult.model !== selectedModel) {
|
|
512
|
+
allCandidates.push({ provider: knnResult.provider, model: knnResult.model });
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
if (allCandidates.length > 1) {
|
|
516
|
+
const bandit = getBandit();
|
|
517
|
+
const TASK_TYPES = ['code_gen', 'summarization', 'reasoning', 'factoid', 'chat', 'other'];
|
|
518
|
+
const inferredTask = (analysis.breakdown?.taskType?.reason || 'other').toLowerCase();
|
|
519
|
+
const taskIdx = Math.max(0, TASK_TYPES.findIndex(t => inferredTask.includes(t)));
|
|
520
|
+
const ctx = [
|
|
521
|
+
(analysis.score || 0) / 100,
|
|
522
|
+
Math.log(Math.max(1, analysis.breakdown?.tokenCount || 0) + 1) / 15,
|
|
523
|
+
((payload?.tools?.length ?? 0) > 0) ? 1 : 0,
|
|
524
|
+
options.streaming ? 1 : 0,
|
|
525
|
+
risk?.level === 'high' ? 1 : risk?.level === 'medium' ? 0.5 : 0,
|
|
526
|
+
agenticResult?.isAgentic ? 1 : 0,
|
|
527
|
+
...TASK_TYPES.map((_, i) => i === taskIdx ? 1 : 0),
|
|
528
|
+
];
|
|
529
|
+
const picked = bandit.pick(tier, allCandidates, ctx);
|
|
530
|
+
if (picked && picked.model !== selectedModel) {
|
|
531
|
+
logger.debug({
|
|
532
|
+
from: `${provider}:${selectedModel}`,
|
|
533
|
+
to: `${picked.provider}:${picked.model}`,
|
|
534
|
+
ucb: picked.ucb?.toFixed(4),
|
|
535
|
+
explored: picked.explored,
|
|
536
|
+
}, '[Routing] Bandit override');
|
|
537
|
+
provider = picked.provider;
|
|
538
|
+
selectedModel = picked.model;
|
|
539
|
+
method = method + (picked.explored ? '+bandit_explore' : '+bandit');
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
} catch (err) {
|
|
543
|
+
logger.debug({ err: err.message }, '[Routing] Bandit pick failed, ignoring');
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
// Phase 6.3 — deadline-aware fastest-model selection.
|
|
548
|
+
// Payload carries _deadlineMs injected by the orchestrator from the
|
|
549
|
+
// LYNKR-Deadline-Ms request header.
|
|
550
|
+
const deadlineMs = payload?._deadlineMs ?? null;
|
|
551
|
+
if (deadlineMs) {
|
|
552
|
+
try {
|
|
553
|
+
const fastest = chooseFastest([{ provider, model: selectedModel }], deadlineMs);
|
|
554
|
+
if (fastest && fastest.model !== selectedModel) {
|
|
555
|
+
logger.debug({
|
|
556
|
+
from: `${provider}:${selectedModel}`,
|
|
557
|
+
to: `${fastest.provider}:${fastest.model}`,
|
|
558
|
+
deadlineMs,
|
|
559
|
+
}, '[Routing] Deadline override');
|
|
560
|
+
provider = fastest.provider;
|
|
561
|
+
selectedModel = fastest.model;
|
|
562
|
+
method = method + '+deadline';
|
|
563
|
+
}
|
|
564
|
+
} catch (err) {
|
|
565
|
+
logger.debug({ err: err.message }, '[Routing] Deadline check failed, ignoring');
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
// Phase 6.1 — per-tenant policy overrides.
|
|
570
|
+
// tenantPolicy comes from options (threaded from Express res.locals via
|
|
571
|
+
// orchestrator → databricks → here).
|
|
572
|
+
if (options.tenantPolicy) {
|
|
573
|
+
try {
|
|
574
|
+
const overridden = applyTenantOverrides(
|
|
575
|
+
{ provider, model: selectedModel, tier, method },
|
|
576
|
+
options.tenantPolicy,
|
|
577
|
+
);
|
|
578
|
+
if (overridden && overridden.model !== selectedModel) {
|
|
579
|
+
logger.debug({
|
|
580
|
+
from: `${provider}:${selectedModel}`,
|
|
581
|
+
to: `${overridden.provider}:${overridden.model}`,
|
|
582
|
+
}, '[Routing] Tenant override');
|
|
583
|
+
provider = overridden.provider;
|
|
584
|
+
selectedModel = overridden.model;
|
|
585
|
+
method = overridden.method;
|
|
586
|
+
}
|
|
587
|
+
} catch (err) {
|
|
588
|
+
logger.debug({ err: err.message }, '[Routing] Tenant override failed, ignoring');
|
|
589
|
+
}
|
|
590
|
+
}
|
|
252
591
|
|
|
253
592
|
const decision = {
|
|
254
593
|
provider,
|
|
@@ -262,9 +601,19 @@ async function determineProviderSmart(payload, options = {}) {
|
|
|
262
601
|
analysis,
|
|
263
602
|
embeddingsResult,
|
|
264
603
|
agenticResult,
|
|
265
|
-
costOptimized
|
|
604
|
+
costOptimized,
|
|
605
|
+
risk,
|
|
606
|
+
knnResult,
|
|
266
607
|
};
|
|
267
608
|
|
|
609
|
+
// Phase 4.4 — shadow-mode policy comparison (fire-and-forget).
|
|
610
|
+
const shadowFn = getShadowPolicy();
|
|
611
|
+
if (shadowFn) {
|
|
612
|
+
setImmediate(() =>
|
|
613
|
+
shadowCompareAndLog({ payload, activeDecision: decision, shadowFn }).catch(() => {})
|
|
614
|
+
);
|
|
615
|
+
}
|
|
616
|
+
|
|
268
617
|
// Phase 3: Record metrics
|
|
269
618
|
routingMetrics.record(decision);
|
|
270
619
|
|
|
@@ -322,6 +671,18 @@ function getRoutingHeaders(decision) {
|
|
|
322
671
|
headers['X-Lynkr-Cost-Optimized'] = 'true';
|
|
323
672
|
}
|
|
324
673
|
|
|
674
|
+
if (decision.risk?.level) {
|
|
675
|
+
headers['X-Lynkr-Risk'] = decision.risk.level;
|
|
676
|
+
const hits = Array.from(new Set([
|
|
677
|
+
...(decision.risk.instructionHits || []),
|
|
678
|
+
...(decision.risk.pathHits || []),
|
|
679
|
+
]));
|
|
680
|
+
if (hits.length > 0) {
|
|
681
|
+
// Header values are ASCII-only; comma-join the first few hits.
|
|
682
|
+
headers['X-Lynkr-Risk-Hits'] = hits.slice(0, 8).join(',');
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
|
|
325
686
|
return headers;
|
|
326
687
|
}
|
|
327
688
|
|
|
@@ -350,6 +711,7 @@ module.exports = {
|
|
|
350
711
|
|
|
351
712
|
// Re-export analyzer for direct access
|
|
352
713
|
analyzeComplexity: require('./complexity-analyzer').analyzeComplexity,
|
|
714
|
+
analyzeRisk,
|
|
353
715
|
|
|
354
716
|
// Intelligent routing modules
|
|
355
717
|
getAgenticDetector,
|
|
@@ -358,6 +720,14 @@ module.exports = {
|
|
|
358
720
|
AGENT_TYPES,
|
|
359
721
|
TIER_DEFINITIONS,
|
|
360
722
|
|
|
723
|
+
// Phase 3-6 modules
|
|
724
|
+
getKnnRouter,
|
|
725
|
+
getBandit,
|
|
726
|
+
getShadowPolicy,
|
|
727
|
+
shadowCompareAndLog,
|
|
728
|
+
chooseFastest,
|
|
729
|
+
applyTenantOverrides,
|
|
730
|
+
|
|
361
731
|
// Telemetry
|
|
362
732
|
telemetry,
|
|
363
733
|
scoreResponseQuality,
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Routing Interaction Block
|
|
3
|
+
*
|
|
4
|
+
* Builds an "interaction" block that explains, in plain text, what
|
|
5
|
+
* Lynkr decided to do with a request — which tier, which provider,
|
|
6
|
+
* why it routed there, and what (if anything) the user should do next.
|
|
7
|
+
*
|
|
8
|
+
* Lynkr already surfaces this information via X-Lynkr-* response
|
|
9
|
+
* headers, but headers are invisible to most users in Claude Code /
|
|
10
|
+
* Cursor / Codex. The interaction block lives in the response body
|
|
11
|
+
* so it shows up alongside the model's reply when the visible-routing
|
|
12
|
+
* env flag is on (LYNKR_VISIBLE_ROUTING=true).
|
|
13
|
+
*
|
|
14
|
+
* @module routing/interaction
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Rough estimate of cost savings vs always-COMPLEX baseline. Not
|
|
19
|
+
* invoice-grade, just a reproducible number for users to glance at.
|
|
20
|
+
*
|
|
21
|
+
* @param {string|null} tier
|
|
22
|
+
* @param {string|null} provider
|
|
23
|
+
* @returns {number} 0-100
|
|
24
|
+
*/
|
|
25
|
+
function estimateSavingsPercent(tier, provider) {
|
|
26
|
+
if (!tier) return 0;
|
|
27
|
+
const t = tier.toUpperCase();
|
|
28
|
+
// Local providers carry the same savings band as their tier.
|
|
29
|
+
const isLocal = provider && ['ollama', 'llamacpp', 'lmstudio'].includes(provider);
|
|
30
|
+
if (t === 'SIMPLE') return isLocal ? 100 : 70;
|
|
31
|
+
if (t === 'MEDIUM') return isLocal ? 90 : 45;
|
|
32
|
+
if (t === 'COMPLEX') return 10;
|
|
33
|
+
if (t === 'REASONING') return 0;
|
|
34
|
+
return 0;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Choose a mode label that describes what happened.
|
|
39
|
+
*
|
|
40
|
+
* @param {object} decision
|
|
41
|
+
* @returns {string}
|
|
42
|
+
*/
|
|
43
|
+
function modeFor(decision) {
|
|
44
|
+
if (decision.method === 'risk') return 'risk_forced_tier';
|
|
45
|
+
if (decision.method === 'agentic') return 'agentic_workflow';
|
|
46
|
+
if (decision.method === 'force' && decision.reason === 'force_local_pattern') return 'force_local';
|
|
47
|
+
if (decision.method === 'force' && decision.reason === 'force_cloud_pattern') return 'force_cloud';
|
|
48
|
+
if (decision.method === 'static') return 'static';
|
|
49
|
+
return 'tier_routed';
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Produce a one-line, terminal-friendly route label, e.g.
|
|
54
|
+
* "[Lynkr] tier=COMPLEX provider=databricks risk=high score=78"
|
|
55
|
+
*
|
|
56
|
+
* @param {object} decision
|
|
57
|
+
* @returns {string}
|
|
58
|
+
*/
|
|
59
|
+
function routeLabel(decision) {
|
|
60
|
+
const parts = ['[Lynkr]'];
|
|
61
|
+
if (decision.tier) parts.push(`tier=${decision.tier}`);
|
|
62
|
+
if (decision.provider) parts.push(`provider=${decision.provider}`);
|
|
63
|
+
if (decision.model) parts.push(`model=${decision.model}`);
|
|
64
|
+
if (decision.risk?.level) parts.push(`risk=${decision.risk.level}`);
|
|
65
|
+
if (typeof decision.score === 'number') parts.push(`score=${decision.score}`);
|
|
66
|
+
return parts.join(' ');
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Headline + next_step are model-facing prose. We keep them terse so
|
|
71
|
+
* they don't pollute the user's view when the model echoes them back.
|
|
72
|
+
*
|
|
73
|
+
* @param {object} decision
|
|
74
|
+
* @returns {{ headline: string, next_step: string }}
|
|
75
|
+
*/
|
|
76
|
+
function copyFor(decision) {
|
|
77
|
+
const mode = modeFor(decision);
|
|
78
|
+
if (mode === 'risk_forced_tier') {
|
|
79
|
+
return {
|
|
80
|
+
headline: `Lynkr routed to ${decision.tier} tier because the request touches a protected domain.`,
|
|
81
|
+
next_step: 'Review the response carefully — sensitive logic was involved.',
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
if (mode === 'agentic_workflow') {
|
|
85
|
+
return {
|
|
86
|
+
headline: `Lynkr detected an agentic workflow and routed to ${decision.provider || decision.tier}.`,
|
|
87
|
+
next_step: 'No action needed — autonomous workflows always use cloud providers.',
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
if (mode === 'force_local') {
|
|
91
|
+
return {
|
|
92
|
+
headline: 'Lynkr routed to the local tier (greeting or trivial request).',
|
|
93
|
+
next_step: 'No action needed.',
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
if (mode === 'force_cloud') {
|
|
97
|
+
return {
|
|
98
|
+
headline: `Lynkr forced cloud routing (${decision.provider || 'cloud'}) for this request.`,
|
|
99
|
+
next_step: 'No action needed.',
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
if (mode === 'static') {
|
|
103
|
+
return {
|
|
104
|
+
headline: `Lynkr used the static provider ${decision.provider}.`,
|
|
105
|
+
next_step: 'Tier routing is disabled — set TIER_* env vars to enable.',
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
return {
|
|
109
|
+
headline: `Lynkr routed to the ${decision.tier || 'default'} tier (${decision.provider || 'unknown'}).`,
|
|
110
|
+
next_step: 'No action needed.',
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Build the full interaction block.
|
|
116
|
+
*
|
|
117
|
+
* @param {object} decision - The routing decision (from determineProviderSmart
|
|
118
|
+
* or the pre-route in api/router.js). Must at least have `provider`; ideally
|
|
119
|
+
* includes `tier`, `model`, `method`, `reason`, `score`, and `risk`.
|
|
120
|
+
* @returns {object}
|
|
121
|
+
*/
|
|
122
|
+
function buildInteractionBlock(decision) {
|
|
123
|
+
if (!decision || typeof decision !== 'object') return null;
|
|
124
|
+
const { headline, next_step } = copyFor(decision);
|
|
125
|
+
return {
|
|
126
|
+
tool: 'lynkr.route',
|
|
127
|
+
mode: modeFor(decision),
|
|
128
|
+
headline,
|
|
129
|
+
route_label: routeLabel(decision),
|
|
130
|
+
reason: decision.reason || 'unspecified',
|
|
131
|
+
tier: decision.tier || null,
|
|
132
|
+
provider: decision.provider || null,
|
|
133
|
+
model: decision.model || null,
|
|
134
|
+
risk: decision.risk?.level || 'low',
|
|
135
|
+
risk_hits: Array.from(new Set([
|
|
136
|
+
...(decision.risk?.instructionHits || []),
|
|
137
|
+
...(decision.risk?.pathHits || []),
|
|
138
|
+
])),
|
|
139
|
+
complexity_score: typeof decision.score === 'number' ? decision.score : null,
|
|
140
|
+
estimated_savings_percent: estimateSavingsPercent(decision.tier, decision.provider),
|
|
141
|
+
next_step,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Attach an interaction block to an Anthropic-format response body.
|
|
147
|
+
* Mutates and returns the body.
|
|
148
|
+
*
|
|
149
|
+
* Anthropic clients ignore unknown top-level fields, so this is safe.
|
|
150
|
+
*
|
|
151
|
+
* @param {object} body
|
|
152
|
+
* @param {object} interaction
|
|
153
|
+
* @returns {object}
|
|
154
|
+
*/
|
|
155
|
+
function attachToAnthropicResponse(body, interaction) {
|
|
156
|
+
if (!body || !interaction) return body;
|
|
157
|
+
body.lynkr_interaction = interaction;
|
|
158
|
+
return body;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Attach an interaction block to an OpenAI chat-completions response.
|
|
163
|
+
* Mutates and returns the body.
|
|
164
|
+
*
|
|
165
|
+
* @param {object} body
|
|
166
|
+
* @param {object} interaction
|
|
167
|
+
* @returns {object}
|
|
168
|
+
*/
|
|
169
|
+
function attachToOpenAIResponse(body, interaction) {
|
|
170
|
+
if (!body || !interaction) return body;
|
|
171
|
+
body.lynkr_interaction = interaction;
|
|
172
|
+
return body;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
module.exports = {
|
|
176
|
+
buildInteractionBlock,
|
|
177
|
+
attachToAnthropicResponse,
|
|
178
|
+
attachToOpenAIResponse,
|
|
179
|
+
// Exposed for tests
|
|
180
|
+
estimateSavingsPercent,
|
|
181
|
+
modeFor,
|
|
182
|
+
routeLabel,
|
|
183
|
+
};
|