lynkr 9.0.2 → 9.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -10
- package/bin/cli.js +18 -1
- package/bin/lynkr-trajectory.js +136 -0
- package/bin/lynkr-usage.js +219 -0
- package/funding.json +110 -0
- package/package.json +4 -2
- package/public/dashboard.html +665 -0
- package/scripts/build-knn-index.js +130 -0
- package/scripts/calibrate-thresholds.js +197 -0
- package/scripts/compare-policies.js +67 -0
- package/scripts/learn-output-ratios.js +162 -0
- package/scripts/refresh-pricing.js +122 -0
- package/scripts/run-routerarena.js +26 -0
- package/scripts/sample-regret.js +84 -0
- package/scripts/train-risk-classifier.js +191 -0
- package/src/api/files-router.js +6 -6
- package/src/api/middleware/budget-enforcer.js +60 -0
- package/src/api/middleware/budget.js +19 -1
- package/src/api/middleware/load-shedding.js +17 -0
- package/src/api/middleware/tenant.js +21 -0
- package/src/api/openai-router.js +1 -1
- package/src/api/router.js +204 -87
- package/src/budget/hierarchical-budget.js +159 -0
- package/src/cache/semantic.js +28 -2
- package/src/clients/databricks.js +68 -10
- package/src/clients/openai-format.js +31 -5
- package/src/config/index.js +246 -43
- package/src/context/toon.js +5 -4
- package/src/dashboard/api.js +170 -0
- package/src/dashboard/router.js +13 -0
- package/src/headroom/client.js +3 -109
- package/src/headroom/index.js +0 -14
- package/src/memory/search.js +0 -50
- package/src/orchestrator/index.js +106 -11
- package/src/orchestrator/preflight.js +188 -0
- package/src/prompts/system.js +34 -6
- package/src/routing/bandit.js +246 -0
- package/src/routing/cascade.js +106 -0
- package/src/routing/complexity-analyzer.js +7 -15
- package/src/routing/confidence-scorer.js +121 -0
- package/src/routing/context-validator.js +71 -0
- package/src/routing/cost-optimizer.js +5 -2
- package/src/routing/deadline.js +52 -0
- package/src/routing/drift-monitor.js +113 -0
- package/src/routing/embedding-cache.js +77 -0
- package/src/routing/index.js +374 -4
- package/src/routing/interaction.js +183 -0
- package/src/routing/knn-router.js +206 -0
- package/src/routing/latency-tracker.js +113 -71
- package/src/routing/model-tiers.js +156 -6
- package/src/routing/output-ratios.js +57 -0
- package/src/routing/regret-estimator.js +91 -0
- package/src/routing/reward-pipeline.js +62 -0
- package/src/routing/risk-analyzer.js +194 -0
- package/src/routing/risk-classifier.js +130 -0
- package/src/routing/shadow-mode.js +77 -0
- package/src/routing/telemetry.js +7 -0
- package/src/routing/tenant-policy.js +96 -0
- package/src/routing/tokenizer.js +162 -0
- package/src/server.js +12 -0
- package/src/stores/file-store.js +42 -7
- package/src/tools/smart-selection.js +11 -2
- package/src/training/trajectory-compressor.js +266 -0
- package/src/usage/aggregator.js +206 -0
- package/src/utils/markdown-ansi.js +146 -0
|
@@ -107,8 +107,17 @@ async function performJsonRequest(url, { headers = {}, body }, providerLabel) {
|
|
|
107
107
|
let json;
|
|
108
108
|
try {
|
|
109
109
|
json = JSON.parse(text);
|
|
110
|
-
} catch {
|
|
110
|
+
} catch (parseError) {
|
|
111
111
|
json = null;
|
|
112
|
+
// Log non-JSON responses for debugging
|
|
113
|
+
if (response.ok) {
|
|
114
|
+
logger.warn({
|
|
115
|
+
provider: providerLabel,
|
|
116
|
+
status: response.status,
|
|
117
|
+
contentType: response.headers.get("content-type"),
|
|
118
|
+
textPreview: text.substring(0, 200),
|
|
119
|
+
}, `${providerLabel} returned non-JSON response (status ${response.status})`);
|
|
120
|
+
}
|
|
112
121
|
}
|
|
113
122
|
|
|
114
123
|
const result = {
|
|
@@ -221,7 +230,7 @@ async function invokeOllama(body) {
|
|
|
221
230
|
const useAnthropicApi = await hasAnthropicEndpoint(config.ollama.endpoint);
|
|
222
231
|
|
|
223
232
|
// Check if model supports tools FIRST (before wasteful injection)
|
|
224
|
-
const supportsTools = await checkOllamaToolSupport(
|
|
233
|
+
const supportsTools = await checkOllamaToolSupport(modelName);
|
|
225
234
|
const injectToolsOllama = process.env.INJECT_TOOLS_OLLAMA !== "false";
|
|
226
235
|
|
|
227
236
|
// Determine tools to send
|
|
@@ -256,7 +265,7 @@ async function invokeOllama(body) {
|
|
|
256
265
|
toolCount,
|
|
257
266
|
toolsInjected,
|
|
258
267
|
supportsTools,
|
|
259
|
-
toolNames: (Array.isArray(toolsToSend) && toolsToSend.length > 0) ? toolsToSend.map(t => t.name) : []
|
|
268
|
+
toolNames: (Array.isArray(toolsToSend) && toolsToSend.length > 0) ? toolsToSend.map(t => t.name || t.function?.name || 'unnamed') : []
|
|
260
269
|
}, `=== Ollama STANDARD TOOLS INJECTION for ${config.ollama.model} === ${logMessage}`);
|
|
261
270
|
|
|
262
271
|
// ---- Anthropic-native path (Ollama v0.14.0+) ----
|
|
@@ -476,13 +485,17 @@ async function invokeAzureOpenAI(body) {
|
|
|
476
485
|
// System prompt injection disabled - breaks model response
|
|
477
486
|
// Tool guidance now provided via tool descriptions instead
|
|
478
487
|
|
|
488
|
+
const azureDeployment = body._suggestionModeModel || body._tierModel || config.azureOpenAI.deployment || "";
|
|
489
|
+
const isGpt5 = /gpt-5/i.test(azureDeployment);
|
|
490
|
+
const maxTokensKey = isGpt5 ? "max_completion_tokens" : "max_tokens";
|
|
491
|
+
|
|
479
492
|
const azureBody = {
|
|
480
493
|
messages,
|
|
481
|
-
temperature: body.temperature ?? 0.3,
|
|
482
|
-
|
|
494
|
+
temperature: body.temperature ?? 0.3,
|
|
495
|
+
[maxTokensKey]: Math.min(body.max_tokens ?? 16384, 16384),
|
|
483
496
|
top_p: body.top_p ?? 1.0,
|
|
484
|
-
stream: false,
|
|
485
|
-
model:
|
|
497
|
+
stream: false,
|
|
498
|
+
model: azureDeployment
|
|
486
499
|
};
|
|
487
500
|
|
|
488
501
|
// Add tools - inject standard tools if client didn't send any (passthrough mode)
|
|
@@ -2032,9 +2045,10 @@ async function invokeModel(body, options = {}) {
|
|
|
2032
2045
|
// Determine provider via async tier routing
|
|
2033
2046
|
// Thread workspace for code-graph integration (from X-Lynkr-Workspace header or body._workspace)
|
|
2034
2047
|
const workspace = body._workspace || options.workspace || null;
|
|
2048
|
+
const tenantPolicy = body._tenantPolicy || options.tenantPolicy || null;
|
|
2035
2049
|
const routingResult = options.forceProvider
|
|
2036
2050
|
? { provider: options.forceProvider, model: null, method: 'forced' }
|
|
2037
|
-
: await determineProviderSmart(body, { workspace });
|
|
2051
|
+
: await determineProviderSmart(body, { workspace, tenantPolicy });
|
|
2038
2052
|
const initialProvider = routingResult.provider;
|
|
2039
2053
|
const tierSelectedModel = routingResult.model;
|
|
2040
2054
|
|
|
@@ -2071,6 +2085,50 @@ async function invokeModel(body, options = {}) {
|
|
|
2071
2085
|
method: routingResult.method,
|
|
2072
2086
|
}, "Provider routing decision");
|
|
2073
2087
|
|
|
2088
|
+
// Phase 3.3 — small-first cascade (LYNKR_CASCADE_ENABLED=true to opt in).
|
|
2089
|
+
// _cascadeInner prevents recursive cascade when invokeModel is called from inside.
|
|
2090
|
+
if (!options._cascadeInner) {
|
|
2091
|
+
const cascadeModule = require('../routing/cascade');
|
|
2092
|
+
const hasTools = Array.isArray(body.tools) && body.tools.length > 0;
|
|
2093
|
+
if (cascadeModule.shouldCascade({
|
|
2094
|
+
tier: routingDecision.tier,
|
|
2095
|
+
streaming: !!body.stream,
|
|
2096
|
+
hasTools,
|
|
2097
|
+
})) {
|
|
2098
|
+
try {
|
|
2099
|
+
const { getModelTierSelector } = require('../routing/model-tiers');
|
|
2100
|
+
const simpleSelection = getModelTierSelector().selectModel('SIMPLE', null);
|
|
2101
|
+
const cascadeResult = await cascadeModule.run({
|
|
2102
|
+
payload: body,
|
|
2103
|
+
smallModel: simpleSelection,
|
|
2104
|
+
bigModel: { provider: initialProvider, model: tierSelectedModel },
|
|
2105
|
+
invoke: async (provider, model, payload) => {
|
|
2106
|
+
const cloned = { ...payload };
|
|
2107
|
+
if (model) cloned._tierModel = model;
|
|
2108
|
+
const resp = await invokeModel(cloned, { forceProvider: provider, _cascadeInner: true });
|
|
2109
|
+
return resp.json; // confidence-scorer needs response body (.content)
|
|
2110
|
+
},
|
|
2111
|
+
taskType: body._taskType || routingResult.reason || 'reasoning',
|
|
2112
|
+
threshold: 0.85,
|
|
2113
|
+
});
|
|
2114
|
+
logger.debug({
|
|
2115
|
+
accepted: cascadeResult.cascadeStats.accepted,
|
|
2116
|
+
usedModel: cascadeResult.usedModel,
|
|
2117
|
+
totalMs: cascadeResult.cascadeStats.totalLatency,
|
|
2118
|
+
}, '[Cascade] Result');
|
|
2119
|
+
return {
|
|
2120
|
+
ok: true,
|
|
2121
|
+
status: 200,
|
|
2122
|
+
json: cascadeResult.response,
|
|
2123
|
+
stream: null,
|
|
2124
|
+
routingDecision: { ...routingDecision, cascadeStats: cascadeResult.cascadeStats, usedModel: cascadeResult.usedModel },
|
|
2125
|
+
};
|
|
2126
|
+
} catch (err) {
|
|
2127
|
+
logger.debug({ err: err.message }, '[Cascade] Failed, falling through to normal routing');
|
|
2128
|
+
}
|
|
2129
|
+
}
|
|
2130
|
+
}
|
|
2131
|
+
|
|
2074
2132
|
metricsCollector.recordProviderRouting(initialProvider);
|
|
2075
2133
|
|
|
2076
2134
|
// Get circuit breaker for initial provider
|
|
@@ -2198,7 +2256,7 @@ async function invokeModel(body, options = {}) {
|
|
|
2198
2256
|
const failLatency = Date.now() - startTime;
|
|
2199
2257
|
metricsCollector.recordProviderFailure(initialProvider);
|
|
2200
2258
|
healthTracker.recordFailure(initialProvider, err, err.status);
|
|
2201
|
-
getLatencyTracker().record(initialProvider, failLatency);
|
|
2259
|
+
getLatencyTracker().record(initialProvider, routingDecision?.model, failLatency);
|
|
2202
2260
|
|
|
2203
2261
|
// Check if we should fallback (any provider can fall back, not just ollama)
|
|
2204
2262
|
const shouldFallback =
|
|
@@ -2309,7 +2367,7 @@ async function invokeModel(body, options = {}) {
|
|
|
2309
2367
|
}, "Fallback to cloud provider succeeded");
|
|
2310
2368
|
|
|
2311
2369
|
// Record latency for fallback provider
|
|
2312
|
-
getLatencyTracker().record(fallbackProvider, fallbackLatency);
|
|
2370
|
+
getLatencyTracker().record(fallbackProvider, routingDecision?.model, fallbackLatency);
|
|
2313
2371
|
|
|
2314
2372
|
// Capture fallback telemetry
|
|
2315
2373
|
const fbOutputTokens = fallbackResult.json?.usage?.output_tokens || fallbackResult.json?.usage?.completion_tokens || 0;
|
|
@@ -203,24 +203,37 @@ function convertAnthropicToOpenAI(anthropicResponse, model = "claude-3-5-sonnet-
|
|
|
203
203
|
|
|
204
204
|
const { id, content, stop_reason, usage } = anthropicResponse;
|
|
205
205
|
|
|
206
|
-
//
|
|
207
|
-
|
|
208
|
-
|
|
206
|
+
// Tolerant fallback: providers sometimes return reasoning-only responses
|
|
207
|
+
// (Minimax/DeepSeek), error envelopes, or empty bodies. Treat missing/invalid
|
|
208
|
+
// content as an empty turn so jcode/Pi/Codex don't crash on the response.
|
|
209
|
+
const safeContent = Array.isArray(content) ? content : [];
|
|
210
|
+
if (safeContent.length === 0) {
|
|
211
|
+
logger.warn({
|
|
212
|
+
hasContent: content !== undefined,
|
|
213
|
+
contentType: typeof content,
|
|
214
|
+
stop_reason,
|
|
215
|
+
responseKeys: Object.keys(anthropicResponse),
|
|
216
|
+
hasError: !!anthropicResponse.error,
|
|
217
|
+
errorMessage: anthropicResponse.error?.message,
|
|
218
|
+
}, "convertAnthropicToOpenAI: empty/missing content, returning empty assistant message");
|
|
209
219
|
}
|
|
210
220
|
|
|
211
221
|
// Convert content blocks to OpenAI format
|
|
212
222
|
let messageContent = "";
|
|
223
|
+
let reasoningContent = "";
|
|
213
224
|
const toolCalls = [];
|
|
214
225
|
let citations = [];
|
|
215
226
|
|
|
216
|
-
for (const block of
|
|
227
|
+
for (const block of safeContent) {
|
|
217
228
|
if (block.type === "text") {
|
|
218
229
|
messageContent += block.text;
|
|
219
230
|
if (Array.isArray(block.citations)) {
|
|
220
231
|
citations.push(...block.citations);
|
|
221
232
|
}
|
|
222
233
|
} else if (block.type === "thinking") {
|
|
223
|
-
//
|
|
234
|
+
// Preserve reasoning text so reasoning-only models (Minimax, DeepSeek-R1)
|
|
235
|
+
// surface visible output to OpenAI clients that don't render thinking blocks
|
|
236
|
+
reasoningContent += (block.thinking || "");
|
|
224
237
|
} else if (block.type === "tool_use") {
|
|
225
238
|
toolCalls.push({
|
|
226
239
|
id: block.id,
|
|
@@ -233,6 +246,12 @@ function convertAnthropicToOpenAI(anthropicResponse, model = "claude-3-5-sonnet-
|
|
|
233
246
|
}
|
|
234
247
|
}
|
|
235
248
|
|
|
249
|
+
// Fallback: if the model returned only reasoning (no visible text and no tools),
|
|
250
|
+
// promote reasoning into the visible content so jcode/Pi/Codex see something
|
|
251
|
+
if (!messageContent && !toolCalls.length && reasoningContent) {
|
|
252
|
+
messageContent = reasoningContent;
|
|
253
|
+
}
|
|
254
|
+
|
|
236
255
|
// Build OpenAI response
|
|
237
256
|
// Ensure ID has the chatcmpl- prefix that OpenAI clients expect
|
|
238
257
|
const responseId = id && id.startsWith("chatcmpl-") ? id : `chatcmpl-${Date.now()}`;
|
|
@@ -263,6 +282,13 @@ function convertAnthropicToOpenAI(anthropicResponse, model = "claude-3-5-sonnet-
|
|
|
263
282
|
openaiResponse.citations = citations;
|
|
264
283
|
}
|
|
265
284
|
|
|
285
|
+
// Add reasoning_content as a side-channel field so clients that render
|
|
286
|
+
// thinking (e.g. some jcode / OpenRouter setups) can show it without losing
|
|
287
|
+
// it from the visible content fallback above
|
|
288
|
+
if (reasoningContent && reasoningContent !== messageContent) {
|
|
289
|
+
openaiResponse.choices[0].message.reasoning_content = reasoningContent;
|
|
290
|
+
}
|
|
291
|
+
|
|
266
292
|
// Add tool_calls if present
|
|
267
293
|
if (toolCalls.length > 0) {
|
|
268
294
|
openaiResponse.choices[0].message.tool_calls = toolCalls;
|
package/src/config/index.js
CHANGED
|
@@ -76,8 +76,8 @@ if (!SUPPORTED_MODEL_PROVIDERS.has(rawModelProvider)) {
|
|
|
76
76
|
|
|
77
77
|
const modelProvider = rawModelProvider;
|
|
78
78
|
|
|
79
|
-
|
|
80
|
-
|
|
79
|
+
let rawBaseUrl = trimTrailingSlash(process.env.DATABRICKS_API_BASE);
|
|
80
|
+
let apiKey = process.env.DATABRICKS_API_KEY;
|
|
81
81
|
|
|
82
82
|
const azureAnthropicEndpoint = process.env.AZURE_ANTHROPIC_ENDPOINT ?? null;
|
|
83
83
|
const azureAnthropicApiKey = process.env.AZURE_ANTHROPIC_API_KEY ?? null;
|
|
@@ -255,33 +255,8 @@ const headroomLlmlinguaDevice = process.env.HEADROOM_LLMLINGUA_DEVICE ?? "auto";
|
|
|
255
255
|
const headroomProvider = process.env.HEADROOM_PROVIDER ?? "anthropic";
|
|
256
256
|
const headroomLogLevel = process.env.HEADROOM_LOG_LEVEL ?? "info";
|
|
257
257
|
|
|
258
|
-
//
|
|
259
|
-
|
|
260
|
-
throw new Error("Set DATABRICKS_API_BASE and DATABRICKS_API_KEY before starting the proxy.");
|
|
261
|
-
} else if (modelProvider === "ollama" && !fallbackEnabled && (!rawBaseUrl || !apiKey)) {
|
|
262
|
-
// Relaxed: Allow mock credentials for true Ollama-only mode (fallback disabled)
|
|
263
|
-
if (!rawBaseUrl) process.env.DATABRICKS_API_BASE = "http://localhost:8080";
|
|
264
|
-
if (!apiKey) process.env.DATABRICKS_API_KEY = "mock-key-for-ollama-only";
|
|
265
|
-
console.log("[CONFIG] Using mock Databricks credentials (Ollama-only mode with fallback disabled)");
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
if (modelProvider === "azure-anthropic" && (!azureAnthropicEndpoint || !azureAnthropicApiKey)) {
|
|
269
|
-
throw new Error(
|
|
270
|
-
"Set AZURE_ANTHROPIC_ENDPOINT and AZURE_ANTHROPIC_API_KEY before starting the proxy.",
|
|
271
|
-
);
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
if (modelProvider === "azure-openai" && (!azureOpenAIEndpoint || !azureOpenAIApiKey)) {
|
|
275
|
-
throw new Error(
|
|
276
|
-
"Set AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY before starting the proxy.",
|
|
277
|
-
);
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
if (modelProvider === "openai" && !openAIApiKey) {
|
|
281
|
-
throw new Error(
|
|
282
|
-
"Set OPENAI_API_KEY before starting the proxy.",
|
|
283
|
-
);
|
|
284
|
-
}
|
|
258
|
+
// Credential validation is deferred until after tier routing mode detection
|
|
259
|
+
// (see line ~430 for the actual validation logic)
|
|
285
260
|
|
|
286
261
|
if (modelProvider === "ollama") {
|
|
287
262
|
try {
|
|
@@ -320,34 +295,254 @@ if (process.env.PREFER_OLLAMA) {
|
|
|
320
295
|
console.warn('[DEPRECATION] PREFER_OLLAMA is removed. Use TIER_* env vars for routing. See documentation/routing.md');
|
|
321
296
|
}
|
|
322
297
|
|
|
323
|
-
//
|
|
324
|
-
//
|
|
298
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
299
|
+
// TIER ROUTING MODE DETECTION
|
|
300
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
301
|
+
// When all 4 TIER_* variables are set, Lynkr operates in "Tier Routing Mode"
|
|
302
|
+
// In this mode:
|
|
303
|
+
// - MODEL_PROVIDER is auto-detected from TIER_SIMPLE
|
|
304
|
+
// - FALLBACK_PROVIDER is auto-detected from TIER_REASONING
|
|
305
|
+
// - FALLBACK_ENABLED is always true
|
|
306
|
+
// - Only credentials for providers used in tiers are validated
|
|
307
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
308
|
+
|
|
325
309
|
const tiersConfigured = !!(
|
|
326
310
|
process.env.TIER_SIMPLE?.trim() &&
|
|
327
311
|
process.env.TIER_MEDIUM?.trim() &&
|
|
328
312
|
process.env.TIER_COMPLEX?.trim() &&
|
|
329
313
|
process.env.TIER_REASONING?.trim()
|
|
330
314
|
);
|
|
331
|
-
|
|
315
|
+
|
|
316
|
+
let tierRoutingMode = tiersConfigured;
|
|
317
|
+
let autoDetectedProvider = null;
|
|
318
|
+
let autoDetectedFallback = null;
|
|
319
|
+
|
|
320
|
+
if (tierRoutingMode) {
|
|
321
|
+
console.log('[Config] ✓ Tier routing mode active (all 4 TIER_* variables set)');
|
|
322
|
+
|
|
323
|
+
// Phase 3: Error if legacy variables are set
|
|
324
|
+
if (process.env.MODEL_PROVIDER) {
|
|
325
|
+
throw new Error(
|
|
326
|
+
'MODEL_PROVIDER not allowed in tier routing mode.\n' +
|
|
327
|
+
'Remove MODEL_PROVIDER from your .env file.\n' +
|
|
328
|
+
'Provider is auto-detected from TIER_SIMPLE.\n' +
|
|
329
|
+
'See: documentation/routing.md'
|
|
330
|
+
);
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
if (process.env.FALLBACK_PROVIDER) {
|
|
334
|
+
throw new Error(
|
|
335
|
+
'FALLBACK_PROVIDER not allowed in tier routing mode.\n' +
|
|
336
|
+
'Remove FALLBACK_PROVIDER from your .env file.\n' +
|
|
337
|
+
'Fallback is auto-detected from TIER_REASONING.\n' +
|
|
338
|
+
'See: documentation/routing.md'
|
|
339
|
+
);
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
if (process.env.FALLBACK_ENABLED !== undefined) {
|
|
343
|
+
throw new Error(
|
|
344
|
+
'FALLBACK_ENABLED not allowed in tier routing mode.\n' +
|
|
345
|
+
'Remove FALLBACK_ENABLED from your .env file.\n' +
|
|
346
|
+
'Fallback is automatic when TIER_REASONING uses a cloud provider.\n' +
|
|
347
|
+
'See: documentation/routing.md'
|
|
348
|
+
);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// Auto-detect primary provider from TIER_SIMPLE
|
|
352
|
+
const tierSimple = process.env.TIER_SIMPLE.trim();
|
|
353
|
+
const tierReasoning = process.env.TIER_REASONING.trim();
|
|
354
|
+
|
|
355
|
+
const simpleMatch = tierSimple.match(/^([a-z-]+):(.+)$/);
|
|
356
|
+
const reasoningMatch = tierReasoning.match(/^([a-z-]+):(.+)$/);
|
|
357
|
+
|
|
358
|
+
if (!simpleMatch) {
|
|
359
|
+
throw new Error(`TIER_SIMPLE must be in format "provider:model" (got: "${tierSimple}")`);
|
|
360
|
+
}
|
|
361
|
+
if (!reasoningMatch) {
|
|
362
|
+
throw new Error(`TIER_REASONING must be in format "provider:model" (got: "${tierReasoning}")`);
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
autoDetectedProvider = simpleMatch[1];
|
|
366
|
+
autoDetectedFallback = reasoningMatch[1];
|
|
367
|
+
|
|
368
|
+
console.log(`[Config] Auto-detected MODEL_PROVIDER="${autoDetectedProvider}" from TIER_SIMPLE`);
|
|
369
|
+
console.log(`[Config] Auto-detected FALLBACK_PROVIDER="${autoDetectedFallback}" from TIER_REASONING`);
|
|
370
|
+
|
|
371
|
+
// Validate auto-detected providers
|
|
372
|
+
if (!SUPPORTED_MODEL_PROVIDERS.has(autoDetectedProvider)) {
|
|
373
|
+
throw new Error(
|
|
374
|
+
`Invalid provider in TIER_SIMPLE: "${autoDetectedProvider}"\n` +
|
|
375
|
+
`Valid providers: ${Array.from(SUPPORTED_MODEL_PROVIDERS).sort().join(', ')}`
|
|
376
|
+
);
|
|
377
|
+
}
|
|
378
|
+
if (!SUPPORTED_MODEL_PROVIDERS.has(autoDetectedFallback)) {
|
|
379
|
+
throw new Error(
|
|
380
|
+
`Invalid provider in TIER_REASONING: "${autoDetectedFallback}"\n` +
|
|
381
|
+
`Valid providers: ${Array.from(SUPPORTED_MODEL_PROVIDERS).sort().join(', ')}`
|
|
382
|
+
);
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// Override MODEL_PROVIDER and FALLBACK_PROVIDER internally
|
|
386
|
+
process.env.MODEL_PROVIDER = autoDetectedProvider;
|
|
387
|
+
process.env.FALLBACK_PROVIDER = autoDetectedFallback;
|
|
388
|
+
process.env.FALLBACK_ENABLED = 'true';
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// Re-read modelProvider and fallbackProvider after tier routing auto-detection
|
|
392
|
+
// This ensures the config object uses the auto-detected values
|
|
393
|
+
const finalModelProvider = (process.env.MODEL_PROVIDER ?? "databricks").toLowerCase();
|
|
394
|
+
const finalFallbackProvider = (process.env.FALLBACK_PROVIDER ?? "databricks").toLowerCase();
|
|
395
|
+
const finalFallbackEnabled = process.env.FALLBACK_ENABLED === "true";
|
|
396
|
+
|
|
397
|
+
// Warn about misconfigured fallback provider (only when tier routing is active,
|
|
398
|
+
// since that's the only path that triggers provider fallback)
|
|
399
|
+
if (finalFallbackEnabled && tiersConfigured) {
|
|
332
400
|
const localProviders = ["ollama", "llamacpp", "lmstudio"];
|
|
333
|
-
|
|
334
|
-
|
|
401
|
+
// Only warn (not error) if fallback is local - it just means fallback won't work
|
|
402
|
+
if (localProviders.includes(finalFallbackProvider) && finalFallbackProvider !== finalModelProvider) {
|
|
403
|
+
console.warn(`[WARN] FALLBACK_PROVIDER='${finalFallbackProvider}' is a local provider. Fallback should use a cloud provider for redundancy.`);
|
|
335
404
|
}
|
|
336
405
|
let fallbackMisconfigured = false;
|
|
337
|
-
if (
|
|
406
|
+
if (finalFallbackProvider === "databricks" && (!rawBaseUrl || !apiKey)) {
|
|
338
407
|
fallbackMisconfigured = true;
|
|
339
408
|
}
|
|
340
|
-
if (
|
|
409
|
+
if (finalFallbackProvider === "azure-anthropic" && (!azureAnthropicEndpoint || !azureAnthropicApiKey)) {
|
|
341
410
|
fallbackMisconfigured = true;
|
|
342
411
|
}
|
|
343
|
-
if (
|
|
412
|
+
if (finalFallbackProvider === "azure-openai" && (!azureOpenAIEndpoint || !azureOpenAIApiKey)) {
|
|
344
413
|
fallbackMisconfigured = true;
|
|
345
414
|
}
|
|
346
|
-
if (
|
|
415
|
+
if (finalFallbackProvider === "bedrock" && !bedrockApiKey) {
|
|
347
416
|
fallbackMisconfigured = true;
|
|
348
417
|
}
|
|
349
418
|
if (fallbackMisconfigured) {
|
|
350
|
-
console.warn(`[WARN] FALLBACK_PROVIDER='${
|
|
419
|
+
console.warn(`[WARN] FALLBACK_PROVIDER='${finalFallbackProvider}' is enabled but missing credentials. Fallback will not work until configured.`);
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
424
|
+
// SMART CREDENTIAL VALIDATION (TIER ROUTING MODE)
|
|
425
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
426
|
+
// Only validate credentials for providers actually used in tier config
|
|
427
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
428
|
+
|
|
429
|
+
if (tierRoutingMode) {
|
|
430
|
+
// Extract all unique providers from tier config
|
|
431
|
+
const usedProviders = new Set();
|
|
432
|
+
[
|
|
433
|
+
process.env.TIER_SIMPLE,
|
|
434
|
+
process.env.TIER_MEDIUM,
|
|
435
|
+
process.env.TIER_COMPLEX,
|
|
436
|
+
process.env.TIER_REASONING
|
|
437
|
+
].forEach(tierValue => {
|
|
438
|
+
const match = tierValue?.match(/^([a-z-]+):/);
|
|
439
|
+
if (match) usedProviders.add(match[1]);
|
|
440
|
+
});
|
|
441
|
+
|
|
442
|
+
console.log(`[Config] Tier routing uses providers: ${Array.from(usedProviders).join(', ')}`);
|
|
443
|
+
|
|
444
|
+
// Validate only providers used in tiers
|
|
445
|
+
if (usedProviders.has('databricks')) {
|
|
446
|
+
if (!rawBaseUrl || !apiKey) {
|
|
447
|
+
throw new Error(
|
|
448
|
+
'DATABRICKS_API_BASE and DATABRICKS_API_KEY required.\n' +
|
|
449
|
+
'Databricks is used in your tier routing config.'
|
|
450
|
+
);
|
|
451
|
+
}
|
|
452
|
+
} else {
|
|
453
|
+
// Mock credentials if Databricks not used
|
|
454
|
+
if (!rawBaseUrl) {
|
|
455
|
+
process.env.DATABRICKS_API_BASE = "http://localhost:8080";
|
|
456
|
+
rawBaseUrl = "http://localhost:8080";
|
|
457
|
+
}
|
|
458
|
+
if (!apiKey) {
|
|
459
|
+
process.env.DATABRICKS_API_KEY = "mock-key-unused";
|
|
460
|
+
apiKey = "mock-key-unused";
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
if (usedProviders.has('azure-anthropic') && (!azureAnthropicEndpoint || !azureAnthropicApiKey)) {
|
|
465
|
+
throw new Error(
|
|
466
|
+
'AZURE_ANTHROPIC_ENDPOINT and AZURE_ANTHROPIC_API_KEY required.\n' +
|
|
467
|
+
'Azure Anthropic is used in your tier routing config.'
|
|
468
|
+
);
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
if (usedProviders.has('azure-openai') && (!azureOpenAIEndpoint || !azureOpenAIApiKey)) {
|
|
472
|
+
throw new Error(
|
|
473
|
+
'AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY required.\n' +
|
|
474
|
+
'Azure OpenAI is used in your tier routing config.'
|
|
475
|
+
);
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
if (usedProviders.has('openai') && !openAIApiKey) {
|
|
479
|
+
throw new Error(
|
|
480
|
+
'OPENAI_API_KEY required.\n' +
|
|
481
|
+
'OpenAI is used in your tier routing config.'
|
|
482
|
+
);
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
if (usedProviders.has('openrouter') && !openRouterApiKey) {
|
|
486
|
+
throw new Error(
|
|
487
|
+
'OPENROUTER_API_KEY required.\n' +
|
|
488
|
+
'OpenRouter is used in your tier routing config.'
|
|
489
|
+
);
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
if (usedProviders.has('bedrock') && !bedrockApiKey) {
|
|
493
|
+
throw new Error(
|
|
494
|
+
'AWS_BEDROCK_API_KEY required.\n' +
|
|
495
|
+
'Bedrock is used in your tier routing config.'
|
|
496
|
+
);
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
// Ollama endpoint validation
|
|
500
|
+
if (usedProviders.has('ollama')) {
|
|
501
|
+
try {
|
|
502
|
+
new URL(ollamaEndpoint);
|
|
503
|
+
} catch (err) {
|
|
504
|
+
throw new Error(`Invalid OLLAMA_ENDPOINT: "${ollamaEndpoint}". Must be a valid URL.`);
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
} else {
|
|
509
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
510
|
+
// STATIC PROVIDER MODE - Original validation logic
|
|
511
|
+
// ═══════════════════════════════════════════════════════════════════════════
|
|
512
|
+
|
|
513
|
+
if (modelProvider === "databricks" && (!rawBaseUrl || !apiKey)) {
|
|
514
|
+
throw new Error("Set DATABRICKS_API_BASE and DATABRICKS_API_KEY before starting the proxy.");
|
|
515
|
+
} else if (modelProvider === "ollama" && !fallbackEnabled && (!rawBaseUrl || !apiKey)) {
|
|
516
|
+
// Relaxed: Allow mock credentials for true Ollama-only mode (fallback disabled)
|
|
517
|
+
if (!rawBaseUrl) {
|
|
518
|
+
process.env.DATABRICKS_API_BASE = "http://localhost:8080";
|
|
519
|
+
rawBaseUrl = "http://localhost:8080";
|
|
520
|
+
}
|
|
521
|
+
if (!apiKey) {
|
|
522
|
+
process.env.DATABRICKS_API_KEY = "mock-key-for-ollama-only";
|
|
523
|
+
apiKey = "mock-key-for-ollama-only";
|
|
524
|
+
}
|
|
525
|
+
console.log("[CONFIG] Using mock Databricks credentials (Ollama-only mode with fallback disabled)");
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
if (modelProvider === "azure-anthropic" && (!azureAnthropicEndpoint || !azureAnthropicApiKey)) {
|
|
529
|
+
throw new Error("SET AZURE_ANTHROPIC_ENDPOINT and AZURE_ANTHROPIC_API_KEY before starting the proxy.");
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
if (modelProvider === "azure-openai" && (!azureOpenAIEndpoint || !azureOpenAIApiKey)) {
|
|
533
|
+
throw new Error("Set AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY before starting the proxy.");
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
if (modelProvider === "openai" && !openAIApiKey) {
|
|
537
|
+
throw new Error("Set OPENAI_API_KEY before starting the proxy.");
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
if (modelProvider === "ollama") {
|
|
541
|
+
try {
|
|
542
|
+
new URL(ollamaEndpoint);
|
|
543
|
+
} catch (err) {
|
|
544
|
+
throw new Error(`Invalid OLLAMA_ENDPOINT: "${ollamaEndpoint}". Must be a valid URL.`);
|
|
545
|
+
}
|
|
351
546
|
}
|
|
352
547
|
}
|
|
353
548
|
|
|
@@ -547,7 +742,7 @@ const workerTaskTimeoutMs = Number.parseInt(process.env.WORKER_TASK_TIMEOUT_MS ?
|
|
|
547
742
|
const workerOffloadThresholdBytes = Number.parseInt(process.env.WORKER_OFFLOAD_THRESHOLD_BYTES ?? "10000", 10);
|
|
548
743
|
|
|
549
744
|
var config = {
|
|
550
|
-
env: process.env.NODE_ENV ?? "
|
|
745
|
+
env: process.env.NODE_ENV ?? "production",
|
|
551
746
|
port: Number.isNaN(port) ? 8080 : port,
|
|
552
747
|
databricks: {
|
|
553
748
|
baseUrl: rawBaseUrl,
|
|
@@ -629,13 +824,13 @@ var config = {
|
|
|
629
824
|
debounceMs: Number.isNaN(hotReloadDebounceMs) ? 1000 : hotReloadDebounceMs,
|
|
630
825
|
},
|
|
631
826
|
modelProvider: {
|
|
632
|
-
type:
|
|
827
|
+
type: finalModelProvider,
|
|
633
828
|
defaultModel,
|
|
634
829
|
suggestionModeModel,
|
|
635
|
-
fallbackEnabled,
|
|
830
|
+
fallbackEnabled: finalFallbackEnabled,
|
|
636
831
|
ollamaMaxToolsForRouting,
|
|
637
832
|
openRouterMaxToolsForRouting,
|
|
638
|
-
fallbackProvider,
|
|
833
|
+
fallbackProvider: finalFallbackProvider,
|
|
639
834
|
},
|
|
640
835
|
toolExecutionMode,
|
|
641
836
|
toolResultCompression: {
|
|
@@ -918,8 +1113,16 @@ var config = {
|
|
|
918
1113
|
// Intelligent Routing
|
|
919
1114
|
routing: {
|
|
920
1115
|
weightedScoring: true,
|
|
1116
|
+
// Cost optimization now respects tier routing mode (only uses TIER_* configured models)
|
|
921
1117
|
costOptimization: true,
|
|
922
1118
|
agenticDetection: true,
|
|
1119
|
+
// Embed an interaction block in the response body so the user can
|
|
1120
|
+
// see *why* a particular tier/provider was chosen.
|
|
1121
|
+
visibleInteraction: process.env.LYNKR_VISIBLE_ROUTING === 'true',
|
|
1122
|
+
// Run user-supplied preflight commands before invoking the model.
|
|
1123
|
+
// If all exit 0, short-circuit the request with zero LLM cost.
|
|
1124
|
+
preflightEnabled: process.env.LYNKR_PREFLIGHT_ENABLED === 'true',
|
|
1125
|
+
preflightTimeoutMs: Number(process.env.LYNKR_PREFLIGHT_TIMEOUT_MS) || 120000,
|
|
923
1126
|
},
|
|
924
1127
|
|
|
925
1128
|
// Model Tier Configuration (REQUIRED)
|
package/src/context/toon.js
CHANGED
|
@@ -15,11 +15,12 @@ function normaliseSettings(settings = {}) {
|
|
|
15
15
|
};
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
-
function resolveEncodeFn(overrideEncode) {
|
|
18
|
+
async function resolveEncodeFn(overrideEncode) {
|
|
19
19
|
if (typeof overrideEncode === "function") return overrideEncode;
|
|
20
20
|
if (cachedEncode !== undefined) return cachedEncode;
|
|
21
21
|
try {
|
|
22
|
-
|
|
22
|
+
// Use dynamic import for ES module compatibility
|
|
23
|
+
const toon = await import("@toon-format/toon");
|
|
23
24
|
cachedEncode = typeof toon?.encode === "function" ? toon.encode : null;
|
|
24
25
|
cachedLoadError = cachedEncode ? null : new Error("Missing encode() export from @toon-format/toon");
|
|
25
26
|
} catch (err) {
|
|
@@ -89,7 +90,7 @@ function compressStringContent(content, cfg, encodeFn, stats) {
|
|
|
89
90
|
return toonText;
|
|
90
91
|
}
|
|
91
92
|
|
|
92
|
-
function applyToonCompression(payload, settings = {}, options = {}) {
|
|
93
|
+
async function applyToonCompression(payload, settings = {}, options = {}) {
|
|
93
94
|
const cfg = normaliseSettings(settings);
|
|
94
95
|
const stats = {
|
|
95
96
|
enabled: cfg.enabled,
|
|
@@ -109,7 +110,7 @@ function applyToonCompression(payload, settings = {}, options = {}) {
|
|
|
109
110
|
return { payload, stats };
|
|
110
111
|
}
|
|
111
112
|
|
|
112
|
-
const encodeFn = resolveEncodeFn(options.encode);
|
|
113
|
+
const encodeFn = await resolveEncodeFn(options.encode);
|
|
113
114
|
if (typeof encodeFn !== "function") {
|
|
114
115
|
stats.available = false;
|
|
115
116
|
const err = cachedLoadError ?? new Error("TOON encoder unavailable");
|