lynkr 9.1.2 → 9.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/README.md +21 -10
  2. package/package.json +3 -1
  3. package/scripts/build-knn-index.js +130 -0
  4. package/scripts/calibrate-thresholds.js +197 -0
  5. package/scripts/compare-policies.js +67 -0
  6. package/scripts/learn-output-ratios.js +162 -0
  7. package/scripts/refresh-pricing.js +122 -0
  8. package/scripts/run-routerarena.js +26 -0
  9. package/scripts/sample-regret.js +84 -0
  10. package/scripts/train-risk-classifier.js +191 -0
  11. package/src/api/middleware/budget-enforcer.js +60 -0
  12. package/src/api/middleware/tenant.js +21 -0
  13. package/src/api/router.js +19 -40
  14. package/src/budget/hierarchical-budget.js +159 -0
  15. package/src/cache/semantic.js +28 -2
  16. package/src/clients/databricks.js +59 -5
  17. package/src/config/index.js +239 -43
  18. package/src/context/toon.js +5 -4
  19. package/src/orchestrator/index.js +44 -6
  20. package/src/prompts/system.js +34 -6
  21. package/src/routing/bandit.js +246 -0
  22. package/src/routing/cascade.js +106 -0
  23. package/src/routing/complexity-analyzer.js +7 -15
  24. package/src/routing/confidence-scorer.js +121 -0
  25. package/src/routing/context-validator.js +71 -0
  26. package/src/routing/cost-optimizer.js +5 -2
  27. package/src/routing/deadline.js +52 -0
  28. package/src/routing/drift-monitor.js +113 -0
  29. package/src/routing/embedding-cache.js +77 -0
  30. package/src/routing/index.js +314 -5
  31. package/src/routing/knn-router.js +206 -0
  32. package/src/routing/latency-tracker.js +113 -71
  33. package/src/routing/model-tiers.js +156 -6
  34. package/src/routing/output-ratios.js +57 -0
  35. package/src/routing/regret-estimator.js +91 -0
  36. package/src/routing/reward-pipeline.js +62 -0
  37. package/src/routing/risk-classifier.js +130 -0
  38. package/src/routing/shadow-mode.js +77 -0
  39. package/src/routing/tenant-policy.js +96 -0
  40. package/src/routing/tokenizer.js +162 -0
  41. package/src/server.js +9 -0
@@ -0,0 +1,159 @@
1
+ /**
2
+ * Hierarchical budget controls (Phase 6.2).
3
+ *
4
+ * Tracks spend at four levels: virtual_key → team → customer → org.
5
+ * Each level has a ceiling; a request must pass *every* level it belongs
6
+ * to.
7
+ *
8
+ * Storage: in-process Map by default. Operations are atomic-by-design (single
9
+ * Node event loop), so no locking needed. For multi-process deployments,
10
+ * swap the storage implementation for Redis (the interface is stable; see
11
+ * RedisBudgetStore stub at the bottom of the file).
12
+ */
13
+
14
+ const fs = require('fs');
15
+ const path = require('path');
16
+ const logger = require('../logger');
17
+
18
+ const CONFIG_PATH = path.join(__dirname, '../../data/budgets.json');
19
+ const RELOAD_INTERVAL_MS = 60_000;
20
+
21
+ const LEVELS = ['virtual_key', 'team', 'customer', 'org'];
22
+
23
+ class MapBudgetStore {
24
+ constructor() {
25
+ this._spend = new Map(); // `${level}:${id}` → { spent, periodStart }
26
+ }
27
+
28
+ _key(level, id) {
29
+ return `${level}:${id}`;
30
+ }
31
+
32
+ get(level, id) {
33
+ return this._spend.get(this._key(level, id)) || { spent: 0, periodStart: Date.now() };
34
+ }
35
+
36
+ set(level, id, value) {
37
+ this._spend.set(this._key(level, id), value);
38
+ }
39
+
40
+ incr(level, id, amount) {
41
+ const current = this.get(level, id);
42
+ current.spent += amount;
43
+ this.set(level, id, current);
44
+ return current;
45
+ }
46
+
47
+ resetIfStale(level, id, periodMs) {
48
+ const current = this.get(level, id);
49
+ if (Date.now() - current.periodStart > periodMs) {
50
+ current.spent = 0;
51
+ current.periodStart = Date.now();
52
+ this.set(level, id, current);
53
+ }
54
+ return current;
55
+ }
56
+ }
57
+
58
+ let _config = null;
59
+ let _configLoadedAt = 0;
60
+ function _loadConfig() {
61
+ if (_config && Date.now() - _configLoadedAt < RELOAD_INTERVAL_MS) return _config;
62
+ try {
63
+ if (fs.existsSync(CONFIG_PATH)) {
64
+ _config = JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf8'));
65
+ _configLoadedAt = Date.now();
66
+ return _config;
67
+ }
68
+ } catch (err) {
69
+ logger.debug({ err: err.message }, '[HierarchicalBudget] Config load failed');
70
+ }
71
+ _config = { defaults: { periodMs: 86400000 }, limits: {} };
72
+ _configLoadedAt = Date.now();
73
+ return _config;
74
+ }
75
+
76
+ class HierarchicalBudget {
77
+ constructor(store = new MapBudgetStore()) {
78
+ this.store = store;
79
+ }
80
+
81
+ /**
82
+ * Check whether all relevant ceilings still allow `amount` of spend.
83
+ * @param {object} context — { virtual_key, team, customer, org }
84
+ * @param {number} amount — dollars
85
+ * @returns {{ ok: boolean, exceeded?: { level, id, limit, spent } }}
86
+ */
87
+ check(context, amount) {
88
+ const config = _loadConfig();
89
+ const periodMs = config.defaults?.periodMs || 86400000;
90
+ for (const level of LEVELS) {
91
+ const id = context[level];
92
+ if (!id) continue;
93
+ const limit = config.limits?.[level]?.[id] ?? config.defaults?.[level];
94
+ if (typeof limit !== 'number') continue;
95
+ const current = this.store.resetIfStale(level, id, periodMs);
96
+ if (current.spent + amount > limit) {
97
+ return {
98
+ ok: false,
99
+ exceeded: { level, id, limit, spent: current.spent },
100
+ };
101
+ }
102
+ }
103
+ return { ok: true };
104
+ }
105
+
106
+ /**
107
+ * Record spend after a request completes. Increments all relevant levels.
108
+ */
109
+ record(context, amount) {
110
+ if (typeof amount !== 'number' || amount <= 0) return;
111
+ for (const level of LEVELS) {
112
+ const id = context[level];
113
+ if (!id) continue;
114
+ this.store.incr(level, id, amount);
115
+ }
116
+ }
117
+
118
+ /**
119
+ * Summary for the dashboard.
120
+ */
121
+ status(context) {
122
+ const config = _loadConfig();
123
+ const periodMs = config.defaults?.periodMs || 86400000;
124
+ const out = {};
125
+ for (const level of LEVELS) {
126
+ const id = context[level];
127
+ if (!id) continue;
128
+ const limit = config.limits?.[level]?.[id] ?? config.defaults?.[level];
129
+ const current = this.store.resetIfStale(level, id, periodMs);
130
+ out[level] = { id, spent: current.spent, limit, periodStart: current.periodStart };
131
+ }
132
+ return out;
133
+ }
134
+ }
135
+
136
+ let _instance = null;
137
+ function getHierarchicalBudget() {
138
+ if (!_instance) _instance = new HierarchicalBudget();
139
+ return _instance;
140
+ }
141
+
142
+ /**
143
+ * Redis backend stub. Implement this when scaling beyond a single Node
144
+ * process. The interface mirrors MapBudgetStore so HierarchicalBudget can
145
+ * use either.
146
+ */
147
+ class RedisBudgetStore {
148
+ constructor(_redisClient) {
149
+ throw new Error('RedisBudgetStore not implemented. Stub — wire your Redis client and use INCRBY with periodic TTL.');
150
+ }
151
+ }
152
+
153
+ module.exports = {
154
+ HierarchicalBudget,
155
+ MapBudgetStore,
156
+ RedisBudgetStore,
157
+ getHierarchicalBudget,
158
+ LEVELS,
159
+ };
@@ -14,16 +14,29 @@ const logger = require('../logger');
14
14
  const config = require('../config');
15
15
 
16
16
  // Default configuration (can be overridden via config.semanticCache)
17
+ //
18
+ // Phase 2.1 of the routing overhaul: defaults aligned with the plan
19
+ // (10K entries, 0.95 threshold matches research on GPT Semantic Cache).
20
+ // Short-TTL keywords trigger a reduced TTL rather than blocking caching.
17
21
  function getDefaultConfig() {
18
22
  const configOverrides = config.semanticCache || {};
19
23
  return {
20
24
  enabled: configOverrides.enabled ?? true,
21
25
  similarityThreshold: configOverrides.similarityThreshold ?? 0.92,
22
- maxEntries: configOverrides.maxEntries ?? 500,
26
+ maxEntries: configOverrides.maxEntries ?? 10000,
23
27
  ttlMs: configOverrides.ttlMs ?? 3600000, // 1 hour
28
+ shortTtlMs: configOverrides.shortTtlMs ?? 300000, // 5 min for time-sensitive queries
29
+ shortTtlPatterns: [
30
+ /\bnow\b/i,
31
+ /\btoday\b/i,
32
+ /\bcurrent\b/i,
33
+ /\blatest\b/i,
34
+ /\brecent\b/i,
35
+ /\bjust\s+now\b/i,
36
+ ],
24
37
  minPromptLength: 20, // Don't cache very short prompts
25
38
  maxPromptLength: 5000, // Don't cache very long prompts (too specific)
26
- excludePatterns: [ // Patterns to exclude from caching
39
+ excludePatterns: [ // Patterns to fully exclude from caching
27
40
  /current time/i,
28
41
  /today's date/i,
29
42
  /right now/i,
@@ -33,6 +46,19 @@ function getDefaultConfig() {
33
46
  };
34
47
  }
35
48
 
49
+ /**
50
+ * Phase 2.1 helper: determine the TTL to apply to a given prompt.
51
+ * Time-sensitive keywords ("now", "today", "current") get a short TTL so
52
+ * stale answers don't persist for an hour.
53
+ */
54
+ function _ttlForPrompt(promptText, cfg) {
55
+ if (!promptText || !Array.isArray(cfg.shortTtlPatterns)) return cfg.ttlMs;
56
+ for (const re of cfg.shortTtlPatterns) {
57
+ if (re.test(promptText)) return cfg.shortTtlMs;
58
+ }
59
+ return cfg.ttlMs;
60
+ }
61
+
36
62
  class SemanticCache {
37
63
  constructor(options = {}) {
38
64
  this.config = { ...getDefaultConfig(), ...options };
@@ -107,8 +107,17 @@ async function performJsonRequest(url, { headers = {}, body }, providerLabel) {
107
107
  let json;
108
108
  try {
109
109
  json = JSON.parse(text);
110
- } catch {
110
+ } catch (parseError) {
111
111
  json = null;
112
+ // Log non-JSON responses for debugging
113
+ if (response.ok) {
114
+ logger.warn({
115
+ provider: providerLabel,
116
+ status: response.status,
117
+ contentType: response.headers.get("content-type"),
118
+ textPreview: text.substring(0, 200),
119
+ }, `${providerLabel} returned non-JSON response (status ${response.status})`);
120
+ }
112
121
  }
113
122
 
114
123
  const result = {
@@ -256,7 +265,7 @@ async function invokeOllama(body) {
256
265
  toolCount,
257
266
  toolsInjected,
258
267
  supportsTools,
259
- toolNames: (Array.isArray(toolsToSend) && toolsToSend.length > 0) ? toolsToSend.map(t => t.name) : []
268
+ toolNames: (Array.isArray(toolsToSend) && toolsToSend.length > 0) ? toolsToSend.map(t => t.name || t.function?.name || 'unnamed') : []
260
269
  }, `=== Ollama STANDARD TOOLS INJECTION for ${config.ollama.model} === ${logMessage}`);
261
270
 
262
271
  // ---- Anthropic-native path (Ollama v0.14.0+) ----
@@ -2036,9 +2045,10 @@ async function invokeModel(body, options = {}) {
2036
2045
  // Determine provider via async tier routing
2037
2046
  // Thread workspace for code-graph integration (from X-Lynkr-Workspace header or body._workspace)
2038
2047
  const workspace = body._workspace || options.workspace || null;
2048
+ const tenantPolicy = body._tenantPolicy || options.tenantPolicy || null;
2039
2049
  const routingResult = options.forceProvider
2040
2050
  ? { provider: options.forceProvider, model: null, method: 'forced' }
2041
- : await determineProviderSmart(body, { workspace });
2051
+ : await determineProviderSmart(body, { workspace, tenantPolicy });
2042
2052
  const initialProvider = routingResult.provider;
2043
2053
  const tierSelectedModel = routingResult.model;
2044
2054
 
@@ -2075,6 +2085,50 @@ async function invokeModel(body, options = {}) {
2075
2085
  method: routingResult.method,
2076
2086
  }, "Provider routing decision");
2077
2087
 
2088
+ // Phase 3.3 — small-first cascade (LYNKR_CASCADE_ENABLED=true to opt in).
2089
+ // _cascadeInner prevents recursive cascade when invokeModel is called from inside.
2090
+ if (!options._cascadeInner) {
2091
+ const cascadeModule = require('../routing/cascade');
2092
+ const hasTools = Array.isArray(body.tools) && body.tools.length > 0;
2093
+ if (cascadeModule.shouldCascade({
2094
+ tier: routingDecision.tier,
2095
+ streaming: !!body.stream,
2096
+ hasTools,
2097
+ })) {
2098
+ try {
2099
+ const { getModelTierSelector } = require('../routing/model-tiers');
2100
+ const simpleSelection = getModelTierSelector().selectModel('SIMPLE', null);
2101
+ const cascadeResult = await cascadeModule.run({
2102
+ payload: body,
2103
+ smallModel: simpleSelection,
2104
+ bigModel: { provider: initialProvider, model: tierSelectedModel },
2105
+ invoke: async (provider, model, payload) => {
2106
+ const cloned = { ...payload };
2107
+ if (model) cloned._tierModel = model;
2108
+ const resp = await invokeModel(cloned, { forceProvider: provider, _cascadeInner: true });
2109
+ return resp.json; // confidence-scorer needs response body (.content)
2110
+ },
2111
+ taskType: body._taskType || routingResult.reason || 'reasoning',
2112
+ threshold: 0.85,
2113
+ });
2114
+ logger.debug({
2115
+ accepted: cascadeResult.cascadeStats.accepted,
2116
+ usedModel: cascadeResult.usedModel,
2117
+ totalMs: cascadeResult.cascadeStats.totalLatency,
2118
+ }, '[Cascade] Result');
2119
+ return {
2120
+ ok: true,
2121
+ status: 200,
2122
+ json: cascadeResult.response,
2123
+ stream: null,
2124
+ routingDecision: { ...routingDecision, cascadeStats: cascadeResult.cascadeStats, usedModel: cascadeResult.usedModel },
2125
+ };
2126
+ } catch (err) {
2127
+ logger.debug({ err: err.message }, '[Cascade] Failed, falling through to normal routing');
2128
+ }
2129
+ }
2130
+ }
2131
+
2078
2132
  metricsCollector.recordProviderRouting(initialProvider);
2079
2133
 
2080
2134
  // Get circuit breaker for initial provider
@@ -2202,7 +2256,7 @@ async function invokeModel(body, options = {}) {
2202
2256
  const failLatency = Date.now() - startTime;
2203
2257
  metricsCollector.recordProviderFailure(initialProvider);
2204
2258
  healthTracker.recordFailure(initialProvider, err, err.status);
2205
- getLatencyTracker().record(initialProvider, failLatency);
2259
+ getLatencyTracker().record(initialProvider, routingDecision?.model, failLatency);
2206
2260
 
2207
2261
  // Check if we should fallback (any provider can fall back, not just ollama)
2208
2262
  const shouldFallback =
@@ -2313,7 +2367,7 @@ async function invokeModel(body, options = {}) {
2313
2367
  }, "Fallback to cloud provider succeeded");
2314
2368
 
2315
2369
  // Record latency for fallback provider
2316
- getLatencyTracker().record(fallbackProvider, fallbackLatency);
2370
+ getLatencyTracker().record(fallbackProvider, routingDecision?.model, fallbackLatency);
2317
2371
 
2318
2372
  // Capture fallback telemetry
2319
2373
  const fbOutputTokens = fallbackResult.json?.usage?.output_tokens || fallbackResult.json?.usage?.completion_tokens || 0;
@@ -76,8 +76,8 @@ if (!SUPPORTED_MODEL_PROVIDERS.has(rawModelProvider)) {
76
76
 
77
77
  const modelProvider = rawModelProvider;
78
78
 
79
- const rawBaseUrl = trimTrailingSlash(process.env.DATABRICKS_API_BASE);
80
- const apiKey = process.env.DATABRICKS_API_KEY;
79
+ let rawBaseUrl = trimTrailingSlash(process.env.DATABRICKS_API_BASE);
80
+ let apiKey = process.env.DATABRICKS_API_KEY;
81
81
 
82
82
  const azureAnthropicEndpoint = process.env.AZURE_ANTHROPIC_ENDPOINT ?? null;
83
83
  const azureAnthropicApiKey = process.env.AZURE_ANTHROPIC_API_KEY ?? null;
@@ -255,33 +255,8 @@ const headroomLlmlinguaDevice = process.env.HEADROOM_LLMLINGUA_DEVICE ?? "auto";
255
255
  const headroomProvider = process.env.HEADROOM_PROVIDER ?? "anthropic";
256
256
  const headroomLogLevel = process.env.HEADROOM_LOG_LEVEL ?? "info";
257
257
 
258
- // Only require Databricks credentials if it's the primary provider or used as fallback
259
- if (modelProvider === "databricks" && (!rawBaseUrl || !apiKey)) {
260
- throw new Error("Set DATABRICKS_API_BASE and DATABRICKS_API_KEY before starting the proxy.");
261
- } else if (modelProvider === "ollama" && !fallbackEnabled && (!rawBaseUrl || !apiKey)) {
262
- // Relaxed: Allow mock credentials for true Ollama-only mode (fallback disabled)
263
- if (!rawBaseUrl) process.env.DATABRICKS_API_BASE = "http://localhost:8080";
264
- if (!apiKey) process.env.DATABRICKS_API_KEY = "mock-key-for-ollama-only";
265
- console.log("[CONFIG] Using mock Databricks credentials (Ollama-only mode with fallback disabled)");
266
- }
267
-
268
- if (modelProvider === "azure-anthropic" && (!azureAnthropicEndpoint || !azureAnthropicApiKey)) {
269
- throw new Error(
270
- "Set AZURE_ANTHROPIC_ENDPOINT and AZURE_ANTHROPIC_API_KEY before starting the proxy.",
271
- );
272
- }
273
-
274
- if (modelProvider === "azure-openai" && (!azureOpenAIEndpoint || !azureOpenAIApiKey)) {
275
- throw new Error(
276
- "Set AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY before starting the proxy.",
277
- );
278
- }
279
-
280
- if (modelProvider === "openai" && !openAIApiKey) {
281
- throw new Error(
282
- "Set OPENAI_API_KEY before starting the proxy.",
283
- );
284
- }
258
+ // Credential validation is deferred until after tier routing mode detection
259
+ // (see line ~430 for the actual validation logic)
285
260
 
286
261
  if (modelProvider === "ollama") {
287
262
  try {
@@ -320,34 +295,254 @@ if (process.env.PREFER_OLLAMA) {
320
295
  console.warn('[DEPRECATION] PREFER_OLLAMA is removed. Use TIER_* env vars for routing. See documentation/routing.md');
321
296
  }
322
297
 
323
- // Warn about misconfigured fallback provider (only when tier routing is active,
324
- // since that's the only path that triggers provider fallback)
298
+ // ═══════════════════════════════════════════════════════════════════════════
299
+ // TIER ROUTING MODE DETECTION
300
+ // ═══════════════════════════════════════════════════════════════════════════
301
+ // When all 4 TIER_* variables are set, Lynkr operates in "Tier Routing Mode"
302
+ // In this mode:
303
+ // - MODEL_PROVIDER is auto-detected from TIER_SIMPLE
304
+ // - FALLBACK_PROVIDER is auto-detected from TIER_REASONING
305
+ // - FALLBACK_ENABLED is always true
306
+ // - Only credentials for providers used in tiers are validated
307
+ // ═══════════════════════════════════════════════════════════════════════════
308
+
325
309
  const tiersConfigured = !!(
326
310
  process.env.TIER_SIMPLE?.trim() &&
327
311
  process.env.TIER_MEDIUM?.trim() &&
328
312
  process.env.TIER_COMPLEX?.trim() &&
329
313
  process.env.TIER_REASONING?.trim()
330
314
  );
331
- if (fallbackEnabled && tiersConfigured) {
315
+
316
+ let tierRoutingMode = tiersConfigured;
317
+ let autoDetectedProvider = null;
318
+ let autoDetectedFallback = null;
319
+
320
+ if (tierRoutingMode) {
321
+ console.log('[Config] ✓ Tier routing mode active (all 4 TIER_* variables set)');
322
+
323
+ // Phase 3: Error if legacy variables are set
324
+ if (process.env.MODEL_PROVIDER) {
325
+ throw new Error(
326
+ 'MODEL_PROVIDER not allowed in tier routing mode.\n' +
327
+ 'Remove MODEL_PROVIDER from your .env file.\n' +
328
+ 'Provider is auto-detected from TIER_SIMPLE.\n' +
329
+ 'See: documentation/routing.md'
330
+ );
331
+ }
332
+
333
+ if (process.env.FALLBACK_PROVIDER) {
334
+ throw new Error(
335
+ 'FALLBACK_PROVIDER not allowed in tier routing mode.\n' +
336
+ 'Remove FALLBACK_PROVIDER from your .env file.\n' +
337
+ 'Fallback is auto-detected from TIER_REASONING.\n' +
338
+ 'See: documentation/routing.md'
339
+ );
340
+ }
341
+
342
+ if (process.env.FALLBACK_ENABLED !== undefined) {
343
+ throw new Error(
344
+ 'FALLBACK_ENABLED not allowed in tier routing mode.\n' +
345
+ 'Remove FALLBACK_ENABLED from your .env file.\n' +
346
+ 'Fallback is automatic when TIER_REASONING uses a cloud provider.\n' +
347
+ 'See: documentation/routing.md'
348
+ );
349
+ }
350
+
351
+ // Auto-detect primary provider from TIER_SIMPLE
352
+ const tierSimple = process.env.TIER_SIMPLE.trim();
353
+ const tierReasoning = process.env.TIER_REASONING.trim();
354
+
355
+ const simpleMatch = tierSimple.match(/^([a-z-]+):(.+)$/);
356
+ const reasoningMatch = tierReasoning.match(/^([a-z-]+):(.+)$/);
357
+
358
+ if (!simpleMatch) {
359
+ throw new Error(`TIER_SIMPLE must be in format "provider:model" (got: "${tierSimple}")`);
360
+ }
361
+ if (!reasoningMatch) {
362
+ throw new Error(`TIER_REASONING must be in format "provider:model" (got: "${tierReasoning}")`);
363
+ }
364
+
365
+ autoDetectedProvider = simpleMatch[1];
366
+ autoDetectedFallback = reasoningMatch[1];
367
+
368
+ console.log(`[Config] Auto-detected MODEL_PROVIDER="${autoDetectedProvider}" from TIER_SIMPLE`);
369
+ console.log(`[Config] Auto-detected FALLBACK_PROVIDER="${autoDetectedFallback}" from TIER_REASONING`);
370
+
371
+ // Validate auto-detected providers
372
+ if (!SUPPORTED_MODEL_PROVIDERS.has(autoDetectedProvider)) {
373
+ throw new Error(
374
+ `Invalid provider in TIER_SIMPLE: "${autoDetectedProvider}"\n` +
375
+ `Valid providers: ${Array.from(SUPPORTED_MODEL_PROVIDERS).sort().join(', ')}`
376
+ );
377
+ }
378
+ if (!SUPPORTED_MODEL_PROVIDERS.has(autoDetectedFallback)) {
379
+ throw new Error(
380
+ `Invalid provider in TIER_REASONING: "${autoDetectedFallback}"\n` +
381
+ `Valid providers: ${Array.from(SUPPORTED_MODEL_PROVIDERS).sort().join(', ')}`
382
+ );
383
+ }
384
+
385
+ // Override MODEL_PROVIDER and FALLBACK_PROVIDER internally
386
+ process.env.MODEL_PROVIDER = autoDetectedProvider;
387
+ process.env.FALLBACK_PROVIDER = autoDetectedFallback;
388
+ process.env.FALLBACK_ENABLED = 'true';
389
+ }
390
+
391
+ // Re-read modelProvider and fallbackProvider after tier routing auto-detection
392
+ // This ensures the config object uses the auto-detected values
393
+ const finalModelProvider = (process.env.MODEL_PROVIDER ?? "databricks").toLowerCase();
394
+ const finalFallbackProvider = (process.env.FALLBACK_PROVIDER ?? "databricks").toLowerCase();
395
+ const finalFallbackEnabled = process.env.FALLBACK_ENABLED === "true";
396
+
397
+ // Warn about misconfigured fallback provider (only when tier routing is active,
398
+ // since that's the only path that triggers provider fallback)
399
+ if (finalFallbackEnabled && tiersConfigured) {
332
400
  const localProviders = ["ollama", "llamacpp", "lmstudio"];
333
- if (localProviders.includes(fallbackProvider)) {
334
- throw new Error(`FALLBACK_PROVIDER cannot be '${fallbackProvider}' (local providers should not be fallbacks). Use cloud providers: databricks, azure-anthropic, azure-openai, openrouter, openai, bedrock`);
401
+ // Only warn (not error) if fallback is local - it just means fallback won't work
402
+ if (localProviders.includes(finalFallbackProvider) && finalFallbackProvider !== finalModelProvider) {
403
+ console.warn(`[WARN] FALLBACK_PROVIDER='${finalFallbackProvider}' is a local provider. Fallback should use a cloud provider for redundancy.`);
335
404
  }
336
405
  let fallbackMisconfigured = false;
337
- if (fallbackProvider === "databricks" && (!rawBaseUrl || !apiKey)) {
406
+ if (finalFallbackProvider === "databricks" && (!rawBaseUrl || !apiKey)) {
338
407
  fallbackMisconfigured = true;
339
408
  }
340
- if (fallbackProvider === "azure-anthropic" && (!azureAnthropicEndpoint || !azureAnthropicApiKey)) {
409
+ if (finalFallbackProvider === "azure-anthropic" && (!azureAnthropicEndpoint || !azureAnthropicApiKey)) {
341
410
  fallbackMisconfigured = true;
342
411
  }
343
- if (fallbackProvider === "azure-openai" && (!azureOpenAIEndpoint || !azureOpenAIApiKey)) {
412
+ if (finalFallbackProvider === "azure-openai" && (!azureOpenAIEndpoint || !azureOpenAIApiKey)) {
344
413
  fallbackMisconfigured = true;
345
414
  }
346
- if (fallbackProvider === "bedrock" && !bedrockApiKey) {
415
+ if (finalFallbackProvider === "bedrock" && !bedrockApiKey) {
347
416
  fallbackMisconfigured = true;
348
417
  }
349
418
  if (fallbackMisconfigured) {
350
- console.warn(`[WARN] FALLBACK_PROVIDER='${fallbackProvider}' is enabled but missing credentials. Fallback will not work until configured.`);
419
+ console.warn(`[WARN] FALLBACK_PROVIDER='${finalFallbackProvider}' is enabled but missing credentials. Fallback will not work until configured.`);
420
+ }
421
+ }
422
+
423
+ // ═══════════════════════════════════════════════════════════════════════════
424
+ // SMART CREDENTIAL VALIDATION (TIER ROUTING MODE)
425
+ // ═══════════════════════════════════════════════════════════════════════════
426
+ // Only validate credentials for providers actually used in tier config
427
+ // ═══════════════════════════════════════════════════════════════════════════
428
+
429
+ if (tierRoutingMode) {
430
+ // Extract all unique providers from tier config
431
+ const usedProviders = new Set();
432
+ [
433
+ process.env.TIER_SIMPLE,
434
+ process.env.TIER_MEDIUM,
435
+ process.env.TIER_COMPLEX,
436
+ process.env.TIER_REASONING
437
+ ].forEach(tierValue => {
438
+ const match = tierValue?.match(/^([a-z-]+):/);
439
+ if (match) usedProviders.add(match[1]);
440
+ });
441
+
442
+ console.log(`[Config] Tier routing uses providers: ${Array.from(usedProviders).join(', ')}`);
443
+
444
+ // Validate only providers used in tiers
445
+ if (usedProviders.has('databricks')) {
446
+ if (!rawBaseUrl || !apiKey) {
447
+ throw new Error(
448
+ 'DATABRICKS_API_BASE and DATABRICKS_API_KEY required.\n' +
449
+ 'Databricks is used in your tier routing config.'
450
+ );
451
+ }
452
+ } else {
453
+ // Mock credentials if Databricks not used
454
+ if (!rawBaseUrl) {
455
+ process.env.DATABRICKS_API_BASE = "http://localhost:8080";
456
+ rawBaseUrl = "http://localhost:8080";
457
+ }
458
+ if (!apiKey) {
459
+ process.env.DATABRICKS_API_KEY = "mock-key-unused";
460
+ apiKey = "mock-key-unused";
461
+ }
462
+ }
463
+
464
+ if (usedProviders.has('azure-anthropic') && (!azureAnthropicEndpoint || !azureAnthropicApiKey)) {
465
+ throw new Error(
466
+ 'AZURE_ANTHROPIC_ENDPOINT and AZURE_ANTHROPIC_API_KEY required.\n' +
467
+ 'Azure Anthropic is used in your tier routing config.'
468
+ );
469
+ }
470
+
471
+ if (usedProviders.has('azure-openai') && (!azureOpenAIEndpoint || !azureOpenAIApiKey)) {
472
+ throw new Error(
473
+ 'AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY required.\n' +
474
+ 'Azure OpenAI is used in your tier routing config.'
475
+ );
476
+ }
477
+
478
+ if (usedProviders.has('openai') && !openAIApiKey) {
479
+ throw new Error(
480
+ 'OPENAI_API_KEY required.\n' +
481
+ 'OpenAI is used in your tier routing config.'
482
+ );
483
+ }
484
+
485
+ if (usedProviders.has('openrouter') && !openRouterApiKey) {
486
+ throw new Error(
487
+ 'OPENROUTER_API_KEY required.\n' +
488
+ 'OpenRouter is used in your tier routing config.'
489
+ );
490
+ }
491
+
492
+ if (usedProviders.has('bedrock') && !bedrockApiKey) {
493
+ throw new Error(
494
+ 'AWS_BEDROCK_API_KEY required.\n' +
495
+ 'Bedrock is used in your tier routing config.'
496
+ );
497
+ }
498
+
499
+ // Ollama endpoint validation
500
+ if (usedProviders.has('ollama')) {
501
+ try {
502
+ new URL(ollamaEndpoint);
503
+ } catch (err) {
504
+ throw new Error(`Invalid OLLAMA_ENDPOINT: "${ollamaEndpoint}". Must be a valid URL.`);
505
+ }
506
+ }
507
+
508
+ } else {
509
+ // ═══════════════════════════════════════════════════════════════════════════
510
+ // STATIC PROVIDER MODE - Original validation logic
511
+ // ═══════════════════════════════════════════════════════════════════════════
512
+
513
+ if (modelProvider === "databricks" && (!rawBaseUrl || !apiKey)) {
514
+ throw new Error("Set DATABRICKS_API_BASE and DATABRICKS_API_KEY before starting the proxy.");
515
+ } else if (modelProvider === "ollama" && !fallbackEnabled && (!rawBaseUrl || !apiKey)) {
516
+ // Relaxed: Allow mock credentials for true Ollama-only mode (fallback disabled)
517
+ if (!rawBaseUrl) {
518
+ process.env.DATABRICKS_API_BASE = "http://localhost:8080";
519
+ rawBaseUrl = "http://localhost:8080";
520
+ }
521
+ if (!apiKey) {
522
+ process.env.DATABRICKS_API_KEY = "mock-key-for-ollama-only";
523
+ apiKey = "mock-key-for-ollama-only";
524
+ }
525
+ console.log("[CONFIG] Using mock Databricks credentials (Ollama-only mode with fallback disabled)");
526
+ }
527
+
528
+ if (modelProvider === "azure-anthropic" && (!azureAnthropicEndpoint || !azureAnthropicApiKey)) {
529
+ throw new Error("SET AZURE_ANTHROPIC_ENDPOINT and AZURE_ANTHROPIC_API_KEY before starting the proxy.");
530
+ }
531
+
532
+ if (modelProvider === "azure-openai" && (!azureOpenAIEndpoint || !azureOpenAIApiKey)) {
533
+ throw new Error("Set AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY before starting the proxy.");
534
+ }
535
+
536
+ if (modelProvider === "openai" && !openAIApiKey) {
537
+ throw new Error("Set OPENAI_API_KEY before starting the proxy.");
538
+ }
539
+
540
+ if (modelProvider === "ollama") {
541
+ try {
542
+ new URL(ollamaEndpoint);
543
+ } catch (err) {
544
+ throw new Error(`Invalid OLLAMA_ENDPOINT: "${ollamaEndpoint}". Must be a valid URL.`);
545
+ }
351
546
  }
352
547
  }
353
548
 
@@ -547,7 +742,7 @@ const workerTaskTimeoutMs = Number.parseInt(process.env.WORKER_TASK_TIMEOUT_MS ?
547
742
  const workerOffloadThresholdBytes = Number.parseInt(process.env.WORKER_OFFLOAD_THRESHOLD_BYTES ?? "10000", 10);
548
743
 
549
744
  var config = {
550
- env: process.env.NODE_ENV ?? "development",
745
+ env: process.env.NODE_ENV ?? "production",
551
746
  port: Number.isNaN(port) ? 8080 : port,
552
747
  databricks: {
553
748
  baseUrl: rawBaseUrl,
@@ -629,13 +824,13 @@ var config = {
629
824
  debounceMs: Number.isNaN(hotReloadDebounceMs) ? 1000 : hotReloadDebounceMs,
630
825
  },
631
826
  modelProvider: {
632
- type: modelProvider,
827
+ type: finalModelProvider,
633
828
  defaultModel,
634
829
  suggestionModeModel,
635
- fallbackEnabled,
830
+ fallbackEnabled: finalFallbackEnabled,
636
831
  ollamaMaxToolsForRouting,
637
832
  openRouterMaxToolsForRouting,
638
- fallbackProvider,
833
+ fallbackProvider: finalFallbackProvider,
639
834
  },
640
835
  toolExecutionMode,
641
836
  toolResultCompression: {
@@ -918,6 +1113,7 @@ var config = {
918
1113
  // Intelligent Routing
919
1114
  routing: {
920
1115
  weightedScoring: true,
1116
+ // Cost optimization now respects tier routing mode (only uses TIER_* configured models)
921
1117
  costOptimization: true,
922
1118
  agenticDetection: true,
923
1119
  // Embed an interaction block in the response body so the user can