lynkr 9.1.6 → 9.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/api/router.js +0 -10
- package/src/clients/databricks.js +2 -11
- package/src/config/index.js +43 -251
- package/src/context/toon.js +4 -5
- package/src/orchestrator/index.js +6 -33
- package/src/prompts/system.js +6 -34
- package/src/routing/index.js +0 -13
- package/src/routing/model-tiers.js +0 -57
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "lynkr",
|
|
3
|
-
"version": "9.1.
|
|
3
|
+
"version": "9.1.8",
|
|
4
4
|
"description": "Self-hosted Claude Code & Cursor proxy with Databricks,AWS BedRock,Azure adapters, openrouter, Ollama,llamacpp,LM Studio, workspace tooling, and MCP integration.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"bin": {
|
package/src/api/router.js
CHANGED
|
@@ -3,7 +3,6 @@ const { processMessage } = require("../orchestrator");
|
|
|
3
3
|
const { getSession } = require("../sessions");
|
|
4
4
|
const metrics = require("../metrics");
|
|
5
5
|
const logger = require("../logger");
|
|
6
|
-
const config = require("../config");
|
|
7
6
|
const { createRateLimiter } = require("./middleware/rate-limiter");
|
|
8
7
|
const openaiRouter = require("./openai-router");
|
|
9
8
|
const providersRouter = require("./providers-handler");
|
|
@@ -29,15 +28,6 @@ function estimateTokenCount(messages = [], system = null, model = null) {
|
|
|
29
28
|
return countMessagesTokens(messages, system, model);
|
|
30
29
|
}
|
|
31
30
|
|
|
32
|
-
// Root health check (for HEAD / and GET /)
|
|
33
|
-
router.head("/", (req, res) => {
|
|
34
|
-
res.status(200).end();
|
|
35
|
-
});
|
|
36
|
-
|
|
37
|
-
router.get("/", (req, res) => {
|
|
38
|
-
res.json({ status: "ok", service: "lynkr" });
|
|
39
|
-
});
|
|
40
|
-
|
|
41
31
|
router.get("/health", (req, res) => {
|
|
42
32
|
res.json({ status: "ok" });
|
|
43
33
|
});
|
|
@@ -107,17 +107,8 @@ async function performJsonRequest(url, { headers = {}, body }, providerLabel) {
|
|
|
107
107
|
let json;
|
|
108
108
|
try {
|
|
109
109
|
json = JSON.parse(text);
|
|
110
|
-
} catch
|
|
110
|
+
} catch {
|
|
111
111
|
json = null;
|
|
112
|
-
// Log non-JSON responses for debugging
|
|
113
|
-
if (response.ok) {
|
|
114
|
-
logger.warn({
|
|
115
|
-
provider: providerLabel,
|
|
116
|
-
status: response.status,
|
|
117
|
-
contentType: response.headers.get("content-type"),
|
|
118
|
-
textPreview: text.substring(0, 200),
|
|
119
|
-
}, `${providerLabel} returned non-JSON response (status ${response.status})`);
|
|
120
|
-
}
|
|
121
112
|
}
|
|
122
113
|
|
|
123
114
|
const result = {
|
|
@@ -265,7 +256,7 @@ async function invokeOllama(body) {
|
|
|
265
256
|
toolCount,
|
|
266
257
|
toolsInjected,
|
|
267
258
|
supportsTools,
|
|
268
|
-
toolNames: (Array.isArray(toolsToSend) && toolsToSend.length > 0) ? toolsToSend.map(t => t.name
|
|
259
|
+
toolNames: (Array.isArray(toolsToSend) && toolsToSend.length > 0) ? toolsToSend.map(t => t.name) : []
|
|
269
260
|
}, `=== Ollama STANDARD TOOLS INJECTION for ${config.ollama.model} === ${logMessage}`);
|
|
270
261
|
|
|
271
262
|
// ---- Anthropic-native path (Ollama v0.14.0+) ----
|
package/src/config/index.js
CHANGED
|
@@ -76,8 +76,8 @@ if (!SUPPORTED_MODEL_PROVIDERS.has(rawModelProvider)) {
|
|
|
76
76
|
|
|
77
77
|
const modelProvider = rawModelProvider;
|
|
78
78
|
|
|
79
|
-
|
|
80
|
-
|
|
79
|
+
const rawBaseUrl = trimTrailingSlash(process.env.DATABRICKS_API_BASE);
|
|
80
|
+
const apiKey = process.env.DATABRICKS_API_KEY;
|
|
81
81
|
|
|
82
82
|
const azureAnthropicEndpoint = process.env.AZURE_ANTHROPIC_ENDPOINT ?? null;
|
|
83
83
|
const azureAnthropicApiKey = process.env.AZURE_ANTHROPIC_API_KEY ?? null;
|
|
@@ -255,8 +255,33 @@ const headroomLlmlinguaDevice = process.env.HEADROOM_LLMLINGUA_DEVICE ?? "auto";
|
|
|
255
255
|
const headroomProvider = process.env.HEADROOM_PROVIDER ?? "anthropic";
|
|
256
256
|
const headroomLogLevel = process.env.HEADROOM_LOG_LEVEL ?? "info";
|
|
257
257
|
|
|
258
|
-
//
|
|
259
|
-
|
|
258
|
+
// Only require Databricks credentials if it's the primary provider or used as fallback
|
|
259
|
+
if (modelProvider === "databricks" && (!rawBaseUrl || !apiKey)) {
|
|
260
|
+
throw new Error("Set DATABRICKS_API_BASE and DATABRICKS_API_KEY before starting the proxy.");
|
|
261
|
+
} else if (modelProvider === "ollama" && !fallbackEnabled && (!rawBaseUrl || !apiKey)) {
|
|
262
|
+
// Relaxed: Allow mock credentials for true Ollama-only mode (fallback disabled)
|
|
263
|
+
if (!rawBaseUrl) process.env.DATABRICKS_API_BASE = "http://localhost:8080";
|
|
264
|
+
if (!apiKey) process.env.DATABRICKS_API_KEY = "mock-key-for-ollama-only";
|
|
265
|
+
console.log("[CONFIG] Using mock Databricks credentials (Ollama-only mode with fallback disabled)");
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
if (modelProvider === "azure-anthropic" && (!azureAnthropicEndpoint || !azureAnthropicApiKey)) {
|
|
269
|
+
throw new Error(
|
|
270
|
+
"Set AZURE_ANTHROPIC_ENDPOINT and AZURE_ANTHROPIC_API_KEY before starting the proxy.",
|
|
271
|
+
);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
if (modelProvider === "azure-openai" && (!azureOpenAIEndpoint || !azureOpenAIApiKey)) {
|
|
275
|
+
throw new Error(
|
|
276
|
+
"Set AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY before starting the proxy.",
|
|
277
|
+
);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
if (modelProvider === "openai" && !openAIApiKey) {
|
|
281
|
+
throw new Error(
|
|
282
|
+
"Set OPENAI_API_KEY before starting the proxy.",
|
|
283
|
+
);
|
|
284
|
+
}
|
|
260
285
|
|
|
261
286
|
if (modelProvider === "ollama") {
|
|
262
287
|
try {
|
|
@@ -295,266 +320,34 @@ if (process.env.PREFER_OLLAMA) {
|
|
|
295
320
|
console.warn('[DEPRECATION] PREFER_OLLAMA is removed. Use TIER_* env vars for routing. See documentation/routing.md');
|
|
296
321
|
}
|
|
297
322
|
|
|
298
|
-
//
|
|
299
|
-
//
|
|
300
|
-
// ═══════════════════════════════════════════════════════════════════════════
|
|
301
|
-
// When all 4 TIER_* variables are set, Lynkr operates in "Tier Routing Mode"
|
|
302
|
-
// In this mode:
|
|
303
|
-
// - MODEL_PROVIDER is auto-detected from TIER_SIMPLE
|
|
304
|
-
// - FALLBACK_PROVIDER is auto-detected from TIER_REASONING
|
|
305
|
-
// - FALLBACK_ENABLED is always true
|
|
306
|
-
// - Only credentials for providers used in tiers are validated
|
|
307
|
-
// ═══════════════════════════════════════════════════════════════════════════
|
|
308
|
-
|
|
323
|
+
// Warn about misconfigured fallback provider (only when tier routing is active,
|
|
324
|
+
// since that's the only path that triggers provider fallback)
|
|
309
325
|
const tiersConfigured = !!(
|
|
310
326
|
process.env.TIER_SIMPLE?.trim() &&
|
|
311
327
|
process.env.TIER_MEDIUM?.trim() &&
|
|
312
328
|
process.env.TIER_COMPLEX?.trim() &&
|
|
313
329
|
process.env.TIER_REASONING?.trim()
|
|
314
330
|
);
|
|
315
|
-
|
|
316
|
-
let tierRoutingMode = tiersConfigured;
|
|
317
|
-
let autoDetectedProvider = null;
|
|
318
|
-
let autoDetectedFallback = null;
|
|
319
|
-
|
|
320
|
-
if (tierRoutingMode) {
|
|
321
|
-
console.log('[Config] ✓ Tier routing mode active (all 4 TIER_* variables set)');
|
|
322
|
-
|
|
323
|
-
// Phase 3: Error if legacy variables are set
|
|
324
|
-
if (process.env.MODEL_PROVIDER) {
|
|
325
|
-
throw new Error(
|
|
326
|
-
'MODEL_PROVIDER not allowed in tier routing mode.\n' +
|
|
327
|
-
'Remove MODEL_PROVIDER from your .env file.\n' +
|
|
328
|
-
'Provider is auto-detected from TIER_SIMPLE.\n' +
|
|
329
|
-
'See: documentation/routing.md'
|
|
330
|
-
);
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
if (process.env.FALLBACK_PROVIDER) {
|
|
334
|
-
throw new Error(
|
|
335
|
-
'FALLBACK_PROVIDER not allowed in tier routing mode.\n' +
|
|
336
|
-
'Remove FALLBACK_PROVIDER from your .env file.\n' +
|
|
337
|
-
'Fallback is auto-detected from TIER_REASONING.\n' +
|
|
338
|
-
'See: documentation/routing.md'
|
|
339
|
-
);
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
if (process.env.FALLBACK_ENABLED !== undefined) {
|
|
343
|
-
throw new Error(
|
|
344
|
-
'FALLBACK_ENABLED not allowed in tier routing mode.\n' +
|
|
345
|
-
'Remove FALLBACK_ENABLED from your .env file.\n' +
|
|
346
|
-
'Fallback is automatic when TIER_REASONING uses a cloud provider.\n' +
|
|
347
|
-
'See: documentation/routing.md'
|
|
348
|
-
);
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
// Auto-detect primary provider from TIER_SIMPLE
|
|
352
|
-
const tierSimple = process.env.TIER_SIMPLE.trim();
|
|
353
|
-
const tierReasoning = process.env.TIER_REASONING.trim();
|
|
354
|
-
|
|
355
|
-
const simpleMatch = tierSimple.match(/^([a-z-]+):(.+)$/);
|
|
356
|
-
const reasoningMatch = tierReasoning.match(/^([a-z-]+):(.+)$/);
|
|
357
|
-
|
|
358
|
-
if (!simpleMatch) {
|
|
359
|
-
throw new Error(`TIER_SIMPLE must be in format "provider:model" (got: "${tierSimple}")`);
|
|
360
|
-
}
|
|
361
|
-
if (!reasoningMatch) {
|
|
362
|
-
throw new Error(`TIER_REASONING must be in format "provider:model" (got: "${tierReasoning}")`);
|
|
363
|
-
}
|
|
364
|
-
|
|
365
|
-
autoDetectedProvider = simpleMatch[1];
|
|
366
|
-
autoDetectedFallback = reasoningMatch[1];
|
|
367
|
-
|
|
368
|
-
console.log(`[Config] Auto-detected MODEL_PROVIDER="${autoDetectedProvider}" from TIER_SIMPLE`);
|
|
369
|
-
console.log(`[Config] Auto-detected FALLBACK_PROVIDER="${autoDetectedFallback}" from TIER_REASONING`);
|
|
370
|
-
|
|
371
|
-
// Validate auto-detected providers
|
|
372
|
-
if (!SUPPORTED_MODEL_PROVIDERS.has(autoDetectedProvider)) {
|
|
373
|
-
throw new Error(
|
|
374
|
-
`Invalid provider in TIER_SIMPLE: "${autoDetectedProvider}"\n` +
|
|
375
|
-
`Valid providers: ${Array.from(SUPPORTED_MODEL_PROVIDERS).sort().join(', ')}`
|
|
376
|
-
);
|
|
377
|
-
}
|
|
378
|
-
if (!SUPPORTED_MODEL_PROVIDERS.has(autoDetectedFallback)) {
|
|
379
|
-
throw new Error(
|
|
380
|
-
`Invalid provider in TIER_REASONING: "${autoDetectedFallback}"\n` +
|
|
381
|
-
`Valid providers: ${Array.from(SUPPORTED_MODEL_PROVIDERS).sort().join(', ')}`
|
|
382
|
-
);
|
|
383
|
-
}
|
|
384
|
-
|
|
385
|
-
// Override MODEL_PROVIDER and FALLBACK_PROVIDER internally
|
|
386
|
-
process.env.MODEL_PROVIDER = autoDetectedProvider;
|
|
387
|
-
process.env.FALLBACK_PROVIDER = autoDetectedFallback;
|
|
388
|
-
process.env.FALLBACK_ENABLED = 'true';
|
|
389
|
-
}
|
|
390
|
-
|
|
391
|
-
// Re-read modelProvider and fallbackProvider after tier routing auto-detection
|
|
392
|
-
// This ensures the config object uses the auto-detected values
|
|
393
|
-
const finalModelProvider = (process.env.MODEL_PROVIDER ?? "databricks").toLowerCase();
|
|
394
|
-
const finalFallbackProvider = (process.env.FALLBACK_PROVIDER ?? "databricks").toLowerCase();
|
|
395
|
-
const finalFallbackEnabled = process.env.FALLBACK_ENABLED === "true";
|
|
396
|
-
|
|
397
|
-
// Safety check: prevent self-loop when tier routing is active and provider is not databricks
|
|
398
|
-
// If using tier routing with ollama/llamacpp/lmstudio, clear databricks URL to prevent agents from calling back to self
|
|
399
|
-
if (tierRoutingMode && finalModelProvider !== "databricks" && rawBaseUrl) {
|
|
400
|
-
const isLocalhost = rawBaseUrl.includes('localhost') || rawBaseUrl.includes('127.0.0.1');
|
|
401
|
-
const matchesServerPort = rawBaseUrl.includes(`:${port}`);
|
|
402
|
-
if (isLocalhost && matchesServerPort) {
|
|
403
|
-
console.warn(`[WARN] DATABRICKS_API_BASE points to this server (${rawBaseUrl}). Clearing to prevent self-loop.`);
|
|
404
|
-
rawBaseUrl = null;
|
|
405
|
-
apiKey = null;
|
|
406
|
-
}
|
|
407
|
-
}
|
|
408
|
-
|
|
409
|
-
// Warn about misconfigured fallback provider (only when tier routing is active,
|
|
410
|
-
// since that's the only path that triggers provider fallback)
|
|
411
|
-
if (finalFallbackEnabled && tiersConfigured) {
|
|
331
|
+
if (fallbackEnabled && tiersConfigured) {
|
|
412
332
|
const localProviders = ["ollama", "llamacpp", "lmstudio"];
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
console.warn(`[WARN] FALLBACK_PROVIDER='${finalFallbackProvider}' is a local provider. Fallback should use a cloud provider for redundancy.`);
|
|
333
|
+
if (localProviders.includes(fallbackProvider)) {
|
|
334
|
+
throw new Error(`FALLBACK_PROVIDER cannot be '${fallbackProvider}' (local providers should not be fallbacks). Use cloud providers: databricks, azure-anthropic, azure-openai, openrouter, openai, bedrock`);
|
|
416
335
|
}
|
|
417
336
|
let fallbackMisconfigured = false;
|
|
418
|
-
if (
|
|
337
|
+
if (fallbackProvider === "databricks" && (!rawBaseUrl || !apiKey)) {
|
|
419
338
|
fallbackMisconfigured = true;
|
|
420
339
|
}
|
|
421
|
-
if (
|
|
340
|
+
if (fallbackProvider === "azure-anthropic" && (!azureAnthropicEndpoint || !azureAnthropicApiKey)) {
|
|
422
341
|
fallbackMisconfigured = true;
|
|
423
342
|
}
|
|
424
|
-
if (
|
|
343
|
+
if (fallbackProvider === "azure-openai" && (!azureOpenAIEndpoint || !azureOpenAIApiKey)) {
|
|
425
344
|
fallbackMisconfigured = true;
|
|
426
345
|
}
|
|
427
|
-
if (
|
|
346
|
+
if (fallbackProvider === "bedrock" && !bedrockApiKey) {
|
|
428
347
|
fallbackMisconfigured = true;
|
|
429
348
|
}
|
|
430
349
|
if (fallbackMisconfigured) {
|
|
431
|
-
console.warn(`[WARN] FALLBACK_PROVIDER='${
|
|
432
|
-
}
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
// ═══════════════════════════════════════════════════════════════════════════
|
|
436
|
-
// SMART CREDENTIAL VALIDATION (TIER ROUTING MODE)
|
|
437
|
-
// ═══════════════════════════════════════════════════════════════════════════
|
|
438
|
-
// Only validate credentials for providers actually used in tier config
|
|
439
|
-
// ═══════════════════════════════════════════════════════════════════════════
|
|
440
|
-
|
|
441
|
-
if (tierRoutingMode) {
|
|
442
|
-
// Extract all unique providers from tier config
|
|
443
|
-
const usedProviders = new Set();
|
|
444
|
-
[
|
|
445
|
-
process.env.TIER_SIMPLE,
|
|
446
|
-
process.env.TIER_MEDIUM,
|
|
447
|
-
process.env.TIER_COMPLEX,
|
|
448
|
-
process.env.TIER_REASONING
|
|
449
|
-
].forEach(tierValue => {
|
|
450
|
-
const match = tierValue?.match(/^([a-z-]+):/);
|
|
451
|
-
if (match) usedProviders.add(match[1]);
|
|
452
|
-
});
|
|
453
|
-
|
|
454
|
-
console.log(`[Config] Tier routing uses providers: ${Array.from(usedProviders).join(', ')}`);
|
|
455
|
-
|
|
456
|
-
// Validate only providers used in tiers
|
|
457
|
-
if (usedProviders.has('databricks')) {
|
|
458
|
-
if (!rawBaseUrl || !apiKey) {
|
|
459
|
-
throw new Error(
|
|
460
|
-
'DATABRICKS_API_BASE and DATABRICKS_API_KEY required.\n' +
|
|
461
|
-
'Databricks is used in your tier routing config.'
|
|
462
|
-
);
|
|
463
|
-
}
|
|
464
|
-
} else {
|
|
465
|
-
// Mock credentials if Databricks not used
|
|
466
|
-
if (!rawBaseUrl) {
|
|
467
|
-
process.env.DATABRICKS_API_BASE = "http://localhost:8080";
|
|
468
|
-
rawBaseUrl = "http://localhost:8080";
|
|
469
|
-
}
|
|
470
|
-
if (!apiKey) {
|
|
471
|
-
process.env.DATABRICKS_API_KEY = "mock-key-unused";
|
|
472
|
-
apiKey = "mock-key-unused";
|
|
473
|
-
}
|
|
474
|
-
}
|
|
475
|
-
|
|
476
|
-
if (usedProviders.has('azure-anthropic') && (!azureAnthropicEndpoint || !azureAnthropicApiKey)) {
|
|
477
|
-
throw new Error(
|
|
478
|
-
'AZURE_ANTHROPIC_ENDPOINT and AZURE_ANTHROPIC_API_KEY required.\n' +
|
|
479
|
-
'Azure Anthropic is used in your tier routing config.'
|
|
480
|
-
);
|
|
481
|
-
}
|
|
482
|
-
|
|
483
|
-
if (usedProviders.has('azure-openai') && (!azureOpenAIEndpoint || !azureOpenAIApiKey)) {
|
|
484
|
-
throw new Error(
|
|
485
|
-
'AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY required.\n' +
|
|
486
|
-
'Azure OpenAI is used in your tier routing config.'
|
|
487
|
-
);
|
|
488
|
-
}
|
|
489
|
-
|
|
490
|
-
if (usedProviders.has('openai') && !openAIApiKey) {
|
|
491
|
-
throw new Error(
|
|
492
|
-
'OPENAI_API_KEY required.\n' +
|
|
493
|
-
'OpenAI is used in your tier routing config.'
|
|
494
|
-
);
|
|
495
|
-
}
|
|
496
|
-
|
|
497
|
-
if (usedProviders.has('openrouter') && !openRouterApiKey) {
|
|
498
|
-
throw new Error(
|
|
499
|
-
'OPENROUTER_API_KEY required.\n' +
|
|
500
|
-
'OpenRouter is used in your tier routing config.'
|
|
501
|
-
);
|
|
502
|
-
}
|
|
503
|
-
|
|
504
|
-
if (usedProviders.has('bedrock') && !bedrockApiKey) {
|
|
505
|
-
throw new Error(
|
|
506
|
-
'AWS_BEDROCK_API_KEY required.\n' +
|
|
507
|
-
'Bedrock is used in your tier routing config.'
|
|
508
|
-
);
|
|
509
|
-
}
|
|
510
|
-
|
|
511
|
-
// Ollama endpoint validation
|
|
512
|
-
if (usedProviders.has('ollama')) {
|
|
513
|
-
try {
|
|
514
|
-
new URL(ollamaEndpoint);
|
|
515
|
-
} catch (err) {
|
|
516
|
-
throw new Error(`Invalid OLLAMA_ENDPOINT: "${ollamaEndpoint}". Must be a valid URL.`);
|
|
517
|
-
}
|
|
518
|
-
}
|
|
519
|
-
|
|
520
|
-
} else {
|
|
521
|
-
// ═══════════════════════════════════════════════════════════════════════════
|
|
522
|
-
// STATIC PROVIDER MODE - Original validation logic
|
|
523
|
-
// ═══════════════════════════════════════════════════════════════════════════
|
|
524
|
-
|
|
525
|
-
if (modelProvider === "databricks" && (!rawBaseUrl || !apiKey)) {
|
|
526
|
-
throw new Error("Set DATABRICKS_API_BASE and DATABRICKS_API_KEY before starting the proxy.");
|
|
527
|
-
} else if (modelProvider === "ollama" && !fallbackEnabled && (!rawBaseUrl || !apiKey)) {
|
|
528
|
-
// Relaxed: Allow mock credentials for true Ollama-only mode (fallback disabled)
|
|
529
|
-
if (!rawBaseUrl) {
|
|
530
|
-
process.env.DATABRICKS_API_BASE = "http://localhost:8080";
|
|
531
|
-
rawBaseUrl = "http://localhost:8080";
|
|
532
|
-
}
|
|
533
|
-
if (!apiKey) {
|
|
534
|
-
process.env.DATABRICKS_API_KEY = "mock-key-for-ollama-only";
|
|
535
|
-
apiKey = "mock-key-for-ollama-only";
|
|
536
|
-
}
|
|
537
|
-
console.log("[CONFIG] Using mock Databricks credentials (Ollama-only mode with fallback disabled)");
|
|
538
|
-
}
|
|
539
|
-
|
|
540
|
-
if (modelProvider === "azure-anthropic" && (!azureAnthropicEndpoint || !azureAnthropicApiKey)) {
|
|
541
|
-
throw new Error("SET AZURE_ANTHROPIC_ENDPOINT and AZURE_ANTHROPIC_API_KEY before starting the proxy.");
|
|
542
|
-
}
|
|
543
|
-
|
|
544
|
-
if (modelProvider === "azure-openai" && (!azureOpenAIEndpoint || !azureOpenAIApiKey)) {
|
|
545
|
-
throw new Error("Set AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY before starting the proxy.");
|
|
546
|
-
}
|
|
547
|
-
|
|
548
|
-
if (modelProvider === "openai" && !openAIApiKey) {
|
|
549
|
-
throw new Error("Set OPENAI_API_KEY before starting the proxy.");
|
|
550
|
-
}
|
|
551
|
-
|
|
552
|
-
if (modelProvider === "ollama") {
|
|
553
|
-
try {
|
|
554
|
-
new URL(ollamaEndpoint);
|
|
555
|
-
} catch (err) {
|
|
556
|
-
throw new Error(`Invalid OLLAMA_ENDPOINT: "${ollamaEndpoint}". Must be a valid URL.`);
|
|
557
|
-
}
|
|
350
|
+
console.warn(`[WARN] FALLBACK_PROVIDER='${fallbackProvider}' is enabled but missing credentials. Fallback will not work until configured.`);
|
|
558
351
|
}
|
|
559
352
|
}
|
|
560
353
|
|
|
@@ -754,7 +547,7 @@ const workerTaskTimeoutMs = Number.parseInt(process.env.WORKER_TASK_TIMEOUT_MS ?
|
|
|
754
547
|
const workerOffloadThresholdBytes = Number.parseInt(process.env.WORKER_OFFLOAD_THRESHOLD_BYTES ?? "10000", 10);
|
|
755
548
|
|
|
756
549
|
var config = {
|
|
757
|
-
env: process.env.NODE_ENV ?? "
|
|
550
|
+
env: process.env.NODE_ENV ?? "development",
|
|
758
551
|
port: Number.isNaN(port) ? 8080 : port,
|
|
759
552
|
databricks: {
|
|
760
553
|
baseUrl: rawBaseUrl,
|
|
@@ -836,13 +629,13 @@ var config = {
|
|
|
836
629
|
debounceMs: Number.isNaN(hotReloadDebounceMs) ? 1000 : hotReloadDebounceMs,
|
|
837
630
|
},
|
|
838
631
|
modelProvider: {
|
|
839
|
-
type:
|
|
632
|
+
type: modelProvider,
|
|
840
633
|
defaultModel,
|
|
841
634
|
suggestionModeModel,
|
|
842
|
-
fallbackEnabled
|
|
635
|
+
fallbackEnabled,
|
|
843
636
|
ollamaMaxToolsForRouting,
|
|
844
637
|
openRouterMaxToolsForRouting,
|
|
845
|
-
fallbackProvider
|
|
638
|
+
fallbackProvider,
|
|
846
639
|
},
|
|
847
640
|
toolExecutionMode,
|
|
848
641
|
toolResultCompression: {
|
|
@@ -1125,7 +918,6 @@ var config = {
|
|
|
1125
918
|
// Intelligent Routing
|
|
1126
919
|
routing: {
|
|
1127
920
|
weightedScoring: true,
|
|
1128
|
-
// Cost optimization now respects tier routing mode (only uses TIER_* configured models)
|
|
1129
921
|
costOptimization: true,
|
|
1130
922
|
agenticDetection: true,
|
|
1131
923
|
// Embed an interaction block in the response body so the user can
|
package/src/context/toon.js
CHANGED
|
@@ -15,12 +15,11 @@ function normaliseSettings(settings = {}) {
|
|
|
15
15
|
};
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
function resolveEncodeFn(overrideEncode) {
|
|
19
19
|
if (typeof overrideEncode === "function") return overrideEncode;
|
|
20
20
|
if (cachedEncode !== undefined) return cachedEncode;
|
|
21
21
|
try {
|
|
22
|
-
|
|
23
|
-
const toon = await import("@toon-format/toon");
|
|
22
|
+
const toon = require("@toon-format/toon");
|
|
24
23
|
cachedEncode = typeof toon?.encode === "function" ? toon.encode : null;
|
|
25
24
|
cachedLoadError = cachedEncode ? null : new Error("Missing encode() export from @toon-format/toon");
|
|
26
25
|
} catch (err) {
|
|
@@ -90,7 +89,7 @@ function compressStringContent(content, cfg, encodeFn, stats) {
|
|
|
90
89
|
return toonText;
|
|
91
90
|
}
|
|
92
91
|
|
|
93
|
-
|
|
92
|
+
function applyToonCompression(payload, settings = {}, options = {}) {
|
|
94
93
|
const cfg = normaliseSettings(settings);
|
|
95
94
|
const stats = {
|
|
96
95
|
enabled: cfg.enabled,
|
|
@@ -110,7 +109,7 @@ async function applyToonCompression(payload, settings = {}, options = {}) {
|
|
|
110
109
|
return { payload, stats };
|
|
111
110
|
}
|
|
112
111
|
|
|
113
|
-
const encodeFn =
|
|
112
|
+
const encodeFn = resolveEncodeFn(options.encode);
|
|
114
113
|
if (typeof encodeFn !== "function") {
|
|
115
114
|
stats.available = false;
|
|
116
115
|
const err = cachedLoadError ?? new Error("TOON encoder unavailable");
|
|
@@ -1101,7 +1101,7 @@ function toAnthropicResponse(openai, requestedModel, wantsThinking) {
|
|
|
1101
1101
|
};
|
|
1102
1102
|
}
|
|
1103
1103
|
|
|
1104
|
-
|
|
1104
|
+
function sanitizePayload(payload) {
|
|
1105
1105
|
const { clonePayloadSmart } = require("../utils/payload");
|
|
1106
1106
|
const providerType = config.modelProvider?.type ?? "databricks";
|
|
1107
1107
|
const willFlatten = providerType !== "azure-anthropic";
|
|
@@ -1418,7 +1418,7 @@ async function sanitizePayload(payload) {
|
|
|
1418
1418
|
|
|
1419
1419
|
// Optional TOON conversion for large JSON message payloads (prompt context only).
|
|
1420
1420
|
// Run this BEFORE message coalescing to preserve parseable JSON boundaries.
|
|
1421
|
-
|
|
1421
|
+
applyToonCompression(clean, config.toon, { logger });
|
|
1422
1422
|
|
|
1423
1423
|
// FIX: Handle consecutive messages with the same role (causes llama.cpp 400 error)
|
|
1424
1424
|
// Strategy: Merge consecutive same-role messages, but NEVER merge messages
|
|
@@ -1529,35 +1529,12 @@ function getToolCallSignature(toolCall) {
|
|
|
1529
1529
|
}
|
|
1530
1530
|
|
|
1531
1531
|
function buildNonJsonResponse(databricksResponse) {
|
|
1532
|
-
// Convert plain text response to Anthropic message format
|
|
1533
|
-
// so SSE handler can properly render it
|
|
1534
|
-
const textContent = databricksResponse.text || "";
|
|
1535
|
-
|
|
1536
1532
|
return {
|
|
1537
1533
|
status: databricksResponse.status,
|
|
1538
1534
|
headers: {
|
|
1539
|
-
"Content-Type":
|
|
1540
|
-
},
|
|
1541
|
-
body: {
|
|
1542
|
-
id: `msg_${Date.now()}`,
|
|
1543
|
-
type: "message",
|
|
1544
|
-
role: "assistant",
|
|
1545
|
-
model: "unknown",
|
|
1546
|
-
content: [
|
|
1547
|
-
{
|
|
1548
|
-
type: "text",
|
|
1549
|
-
text: textContent
|
|
1550
|
-
}
|
|
1551
|
-
],
|
|
1552
|
-
stop_reason: "end_turn",
|
|
1553
|
-
stop_sequence: null,
|
|
1554
|
-
usage: {
|
|
1555
|
-
input_tokens: 0,
|
|
1556
|
-
output_tokens: 0,
|
|
1557
|
-
cache_creation_input_tokens: 0,
|
|
1558
|
-
cache_read_input_tokens: 0,
|
|
1559
|
-
}
|
|
1535
|
+
"Content-Type": databricksResponse.contentType ?? "text/plain",
|
|
1560
1536
|
},
|
|
1537
|
+
body: databricksResponse.text,
|
|
1561
1538
|
terminationReason: "non_json_response",
|
|
1562
1539
|
};
|
|
1563
1540
|
}
|
|
@@ -3929,7 +3906,7 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
|
|
|
3929
3906
|
const { createTimer } = require("../utils/perf-timer");
|
|
3930
3907
|
const pTimer = createTimer("processMessage");
|
|
3931
3908
|
|
|
3932
|
-
const cleanPayload =
|
|
3909
|
+
const cleanPayload = sanitizePayload(payload);
|
|
3933
3910
|
pTimer.mark("sanitizePayload");
|
|
3934
3911
|
|
|
3935
3912
|
// Proactively load tools based on prompt content (lazy loading)
|
|
@@ -4067,11 +4044,7 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
|
|
|
4067
4044
|
if (semanticCache.isEnabled() && semanticLookupResult && !semanticLookupResult.hit) {
|
|
4068
4045
|
if (loopResult.response?.status === 200 && loopResult.response?.body) {
|
|
4069
4046
|
try {
|
|
4070
|
-
|
|
4071
|
-
const body = loopResult.response.body;
|
|
4072
|
-
if (typeof body === 'object' || (typeof body === 'string' && body.trim().startsWith('{'))) {
|
|
4073
|
-
await semanticCache.store(semanticLookupResult, body);
|
|
4074
|
-
}
|
|
4047
|
+
await semanticCache.store(semanticLookupResult, loopResult.response.body);
|
|
4075
4048
|
} catch (err) {
|
|
4076
4049
|
logger.debug({ error: err.message }, "Semantic cache store failed");
|
|
4077
4050
|
}
|
package/src/prompts/system.js
CHANGED
|
@@ -70,41 +70,13 @@ function compressToolDescriptions(tools, mode = null) {
|
|
|
70
70
|
return tools; // Return unmodified if not in minimal mode
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
-
|
|
74
|
-
// Handle both Anthropic format (name + input_schema) and OpenAI format (function.name)
|
|
75
|
-
const hasAnthropicFormat = tool && tool.name && tool.input_schema;
|
|
76
|
-
const hasOpenAIFormat = tool && tool.function && tool.function.name;
|
|
77
|
-
const isValid = hasAnthropicFormat || hasOpenAIFormat;
|
|
78
|
-
|
|
79
|
-
if (!isValid) {
|
|
80
|
-
logger.debug({
|
|
81
|
-
hasName: !!tool?.name,
|
|
82
|
-
hasSchema: !!tool?.input_schema,
|
|
83
|
-
hasFunctionName: !!tool?.function?.name,
|
|
84
|
-
toolType: typeof tool
|
|
85
|
-
}, 'Filtered out malformed tool');
|
|
86
|
-
}
|
|
87
|
-
return isValid;
|
|
88
|
-
});
|
|
89
|
-
|
|
90
|
-
if (validTools.length === 0 && tools.length > 0) {
|
|
91
|
-
logger.warn({ originalCount: tools.length }, 'All tools filtered out as malformed - returning original');
|
|
92
|
-
return tools;
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
return validTools.map(tool => {
|
|
96
|
-
// If already in OpenAI format, return as-is (no compression for OpenAI format)
|
|
97
|
-
if (tool.function && !tool.input_schema) {
|
|
98
|
-
return tool;
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
// Compress Anthropic format
|
|
73
|
+
return tools.map(tool => {
|
|
102
74
|
const compressed = {
|
|
103
75
|
name: tool.name,
|
|
104
76
|
input_schema: {
|
|
105
|
-
type: tool.input_schema
|
|
77
|
+
type: tool.input_schema.type,
|
|
106
78
|
properties: {},
|
|
107
|
-
required: tool.input_schema
|
|
79
|
+
required: tool.input_schema.required || [],
|
|
108
80
|
}
|
|
109
81
|
};
|
|
110
82
|
|
|
@@ -218,7 +190,7 @@ function optimizeSystemPrompt(system, context = {}, mode = null) {
|
|
|
218
190
|
|
|
219
191
|
// 2. Remove file operation guidelines if no file tools
|
|
220
192
|
const hasFileTools = context.tools?.some(t =>
|
|
221
|
-
|
|
193
|
+
['Read', 'Write', 'Edit', 'Glob', 'Grep'].includes(t.name)
|
|
222
194
|
);
|
|
223
195
|
if (!hasFileTools) {
|
|
224
196
|
text = removeSection(text, /# File Operations?[\s\S]*?(?=\n#|\n\n[A-Z]|$)/gi, optimizations, 'file operations');
|
|
@@ -226,7 +198,7 @@ function optimizeSystemPrompt(system, context = {}, mode = null) {
|
|
|
226
198
|
|
|
227
199
|
// 3. Remove git guidelines if no git tools
|
|
228
200
|
const hasGitTools = context.tools?.some(t =>
|
|
229
|
-
t
|
|
201
|
+
t.name.toLowerCase().includes('git')
|
|
230
202
|
);
|
|
231
203
|
if (!hasGitTools) {
|
|
232
204
|
text = removeSection(text, /# Git.*?[\s\S]*?(?=\n#|\n\n[A-Z]|$)/gi, optimizations, 'git guidelines');
|
|
@@ -235,7 +207,7 @@ function optimizeSystemPrompt(system, context = {}, mode = null) {
|
|
|
235
207
|
|
|
236
208
|
// 4. Remove web search guidelines if no web tools
|
|
237
209
|
const hasWebTools = context.tools?.some(t =>
|
|
238
|
-
|
|
210
|
+
['WebSearch', 'WebFetch'].includes(t.name)
|
|
239
211
|
);
|
|
240
212
|
if (!hasWebTools) {
|
|
241
213
|
text = removeSection(text, /# Web.*?[\s\S]*?(?=\n#|\n\n[A-Z]|$)/gi, optimizations, 'web guidelines');
|
package/src/routing/index.js
CHANGED
|
@@ -84,28 +84,15 @@ function isLocalProvider(provider) {
|
|
|
84
84
|
|
|
85
85
|
/**
|
|
86
86
|
* Check if fallback is enabled
|
|
87
|
-
* In tier routing mode, fallback is always enabled
|
|
88
87
|
*/
|
|
89
88
|
function isFallbackEnabled() {
|
|
90
|
-
if (config.modelTiers?.enabled) {
|
|
91
|
-
// Tier routing mode: fallback always enabled
|
|
92
|
-
return true;
|
|
93
|
-
}
|
|
94
|
-
// Static provider mode: use FALLBACK_ENABLED
|
|
95
89
|
return config.modelProvider?.fallbackEnabled !== false;
|
|
96
90
|
}
|
|
97
91
|
|
|
98
92
|
/**
|
|
99
93
|
* Get the configured fallback provider
|
|
100
|
-
* In tier routing mode, fallback = TIER_REASONING provider
|
|
101
94
|
*/
|
|
102
95
|
function getFallbackProvider() {
|
|
103
|
-
if (config.modelTiers?.enabled && config.modelTiers?.REASONING) {
|
|
104
|
-
// Tier routing mode: extract provider from TIER_REASONING
|
|
105
|
-
const match = config.modelTiers.REASONING.match(/^([a-z-]+):/);
|
|
106
|
-
if (match) return match[1];
|
|
107
|
-
}
|
|
108
|
-
// Static provider mode: use FALLBACK_PROVIDER
|
|
109
96
|
return config.modelProvider?.fallbackProvider ?? 'databricks';
|
|
110
97
|
}
|
|
111
98
|
|
|
@@ -54,20 +54,6 @@ class ModelTierSelector {
|
|
|
54
54
|
* Load tier configuration from JSON file
|
|
55
55
|
*/
|
|
56
56
|
_loadConfig() {
|
|
57
|
-
// Check if tier routing mode is active (all 4 TIER_* env vars set)
|
|
58
|
-
const tierRoutingMode = !!(
|
|
59
|
-
config.modelTiers?.SIMPLE?.trim() &&
|
|
60
|
-
config.modelTiers?.MEDIUM?.trim() &&
|
|
61
|
-
config.modelTiers?.COMPLEX?.trim() &&
|
|
62
|
-
config.modelTiers?.REASONING?.trim()
|
|
63
|
-
);
|
|
64
|
-
|
|
65
|
-
if (tierRoutingMode) {
|
|
66
|
-
logger.debug('[ModelTiers] Tier routing mode active, building config from TIER_* env vars');
|
|
67
|
-
this._buildFromEnvVars();
|
|
68
|
-
return;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
57
|
try {
|
|
72
58
|
if (fs.existsSync(TIER_CONFIG_PATH)) {
|
|
73
59
|
const data = JSON.parse(fs.readFileSync(TIER_CONFIG_PATH, 'utf8'));
|
|
@@ -122,49 +108,6 @@ class ModelTierSelector {
|
|
|
122
108
|
/**
|
|
123
109
|
* Load default tier config
|
|
124
110
|
*/
|
|
125
|
-
/**
|
|
126
|
-
* Build tier config from TIER_* environment variables
|
|
127
|
-
* Format: TIER_SIMPLE=provider:model
|
|
128
|
-
*/
|
|
129
|
-
_buildFromEnvVars() {
|
|
130
|
-
this.tierConfig = {};
|
|
131
|
-
this.localProviders = {
|
|
132
|
-
ollama: { free: true, defaultTier: 'SIMPLE' },
|
|
133
|
-
llamacpp: { free: true, defaultTier: 'SIMPLE' },
|
|
134
|
-
lmstudio: { free: true, defaultTier: 'SIMPLE' },
|
|
135
|
-
mlx: { free: true, defaultTier: 'SIMPLE' },
|
|
136
|
-
};
|
|
137
|
-
|
|
138
|
-
const tiers = ['SIMPLE', 'MEDIUM', 'COMPLEX', 'REASONING'];
|
|
139
|
-
for (const tier of tiers) {
|
|
140
|
-
const envValue = config.modelTiers?.[tier]?.trim();
|
|
141
|
-
if (!envValue) continue;
|
|
142
|
-
|
|
143
|
-
// Parse provider:model format
|
|
144
|
-
const match = envValue.match(/^([a-z-]+):(.+)$/);
|
|
145
|
-
if (!match) {
|
|
146
|
-
logger.warn({ tier, value: envValue }, '[ModelTiers] Invalid TIER format, expected provider:model');
|
|
147
|
-
continue;
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
const [, provider, model] = match;
|
|
151
|
-
|
|
152
|
-
// Initialize tier config if not exists
|
|
153
|
-
if (!this.tierConfig[tier]) {
|
|
154
|
-
this.tierConfig[tier] = { preferred: {} };
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
// Set this as the ONLY preferred model for this tier+provider
|
|
158
|
-
this.tierConfig[tier].preferred[provider] = [model];
|
|
159
|
-
|
|
160
|
-
logger.debug({
|
|
161
|
-
tier,
|
|
162
|
-
provider,
|
|
163
|
-
model
|
|
164
|
-
}, '[ModelTiers] Tier configured from env');
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
|
|
168
111
|
_loadDefaults() {
|
|
169
112
|
this.tierConfig = {
|
|
170
113
|
SIMPLE: { preferred: { ollama: ['llama3.2'], openai: ['gpt-4o-mini'] } },
|