lynkr 9.1.6 → 9.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "lynkr",
3
- "version": "9.1.6",
3
+ "version": "9.1.7",
4
4
  "description": "Self-hosted Claude Code & Cursor proxy with Databricks,AWS BedRock,Azure adapters, openrouter, Ollama,llamacpp,LM Studio, workspace tooling, and MCP integration.",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -44,18 +44,11 @@ class LoadShedder {
44
44
  const memUsage = process.memoryUsage();
45
45
  const heapUsedPercent = memUsage.heapUsed / memUsage.heapTotal;
46
46
 
47
- // FIX: Only trigger if BOTH percentage is high AND actual usage is significant
48
- // This prevents false positives on startup when heapTotal is small but will grow
49
- const heapUsedMB = memUsage.heapUsed / (1024 * 1024);
50
- const minHeapThresholdMB = 500; // Only shed load if using more than 500MB
51
-
52
- if (heapUsedPercent > this.heapThreshold && heapUsedMB > minHeapThresholdMB) {
47
+ if (heapUsedPercent > this.heapThreshold) {
53
48
  logger.warn(
54
49
  {
55
50
  heapUsedPercent: (heapUsedPercent * 100).toFixed(2),
56
- heapUsedMB: heapUsedMB.toFixed(2),
57
51
  threshold: (this.heapThreshold * 100).toFixed(2),
58
- minThresholdMB: minHeapThresholdMB,
59
52
  },
60
53
  "Load shedding: Heap usage exceeded threshold"
61
54
  );
@@ -103,9 +96,6 @@ class LoadShedder {
103
96
  activeRequests: this.activeRequests,
104
97
  totalShed: this.totalShed,
105
98
  heapUsedPercent: ((memUsage.heapUsed / memUsage.heapTotal) * 100).toFixed(2),
106
- heapUsedMB: (memUsage.heapUsed / (1024 * 1024)).toFixed(2),
107
- heapTotalMB: (memUsage.heapTotal / (1024 * 1024)).toFixed(2),
108
- rssMB: (memUsage.rss / (1024 * 1024)).toFixed(2),
109
99
  rssPercent: ((memUsage.rss / os.totalmem()) * 100).toFixed(2),
110
100
  thresholds: {
111
101
  heapThreshold: (this.heapThreshold * 100).toFixed(2),
package/src/api/router.js CHANGED
@@ -3,7 +3,6 @@ const { processMessage } = require("../orchestrator");
3
3
  const { getSession } = require("../sessions");
4
4
  const metrics = require("../metrics");
5
5
  const logger = require("../logger");
6
- const config = require("../config");
7
6
  const { createRateLimiter } = require("./middleware/rate-limiter");
8
7
  const openaiRouter = require("./openai-router");
9
8
  const providersRouter = require("./providers-handler");
@@ -29,15 +28,6 @@ function estimateTokenCount(messages = [], system = null, model = null) {
29
28
  return countMessagesTokens(messages, system, model);
30
29
  }
31
30
 
32
- // Root health check (for HEAD / and GET /)
33
- router.head("/", (req, res) => {
34
- res.status(200).end();
35
- });
36
-
37
- router.get("/", (req, res) => {
38
- res.json({ status: "ok", service: "lynkr" });
39
- });
40
-
41
31
  router.get("/health", (req, res) => {
42
32
  res.json({ status: "ok" });
43
33
  });
@@ -107,17 +107,8 @@ async function performJsonRequest(url, { headers = {}, body }, providerLabel) {
107
107
  let json;
108
108
  try {
109
109
  json = JSON.parse(text);
110
- } catch (parseError) {
110
+ } catch {
111
111
  json = null;
112
- // Log non-JSON responses for debugging
113
- if (response.ok) {
114
- logger.warn({
115
- provider: providerLabel,
116
- status: response.status,
117
- contentType: response.headers.get("content-type"),
118
- textPreview: text.substring(0, 200),
119
- }, `${providerLabel} returned non-JSON response (status ${response.status})`);
120
- }
121
112
  }
122
113
 
123
114
  const result = {
@@ -265,7 +256,7 @@ async function invokeOllama(body) {
265
256
  toolCount,
266
257
  toolsInjected,
267
258
  supportsTools,
268
- toolNames: (Array.isArray(toolsToSend) && toolsToSend.length > 0) ? toolsToSend.map(t => t.name || t.function?.name || 'unnamed') : []
259
+ toolNames: (Array.isArray(toolsToSend) && toolsToSend.length > 0) ? toolsToSend.map(t => t.name) : []
269
260
  }, `=== Ollama STANDARD TOOLS INJECTION for ${config.ollama.model} === ${logMessage}`);
270
261
 
271
262
  // ---- Anthropic-native path (Ollama v0.14.0+) ----
@@ -76,8 +76,8 @@ if (!SUPPORTED_MODEL_PROVIDERS.has(rawModelProvider)) {
76
76
 
77
77
  const modelProvider = rawModelProvider;
78
78
 
79
- let rawBaseUrl = trimTrailingSlash(process.env.DATABRICKS_API_BASE);
80
- let apiKey = process.env.DATABRICKS_API_KEY;
79
+ const rawBaseUrl = trimTrailingSlash(process.env.DATABRICKS_API_BASE);
80
+ const apiKey = process.env.DATABRICKS_API_KEY;
81
81
 
82
82
  const azureAnthropicEndpoint = process.env.AZURE_ANTHROPIC_ENDPOINT ?? null;
83
83
  const azureAnthropicApiKey = process.env.AZURE_ANTHROPIC_API_KEY ?? null;
@@ -255,8 +255,33 @@ const headroomLlmlinguaDevice = process.env.HEADROOM_LLMLINGUA_DEVICE ?? "auto";
255
255
  const headroomProvider = process.env.HEADROOM_PROVIDER ?? "anthropic";
256
256
  const headroomLogLevel = process.env.HEADROOM_LOG_LEVEL ?? "info";
257
257
 
258
- // Credential validation is deferred until after tier routing mode detection
259
- // (see line ~430 for the actual validation logic)
258
+ // Only require Databricks credentials if it's the primary provider or used as fallback
259
+ if (modelProvider === "databricks" && (!rawBaseUrl || !apiKey)) {
260
+ throw new Error("Set DATABRICKS_API_BASE and DATABRICKS_API_KEY before starting the proxy.");
261
+ } else if (modelProvider === "ollama" && !fallbackEnabled && (!rawBaseUrl || !apiKey)) {
262
+ // Relaxed: Allow mock credentials for true Ollama-only mode (fallback disabled)
263
+ if (!rawBaseUrl) process.env.DATABRICKS_API_BASE = "http://localhost:8080";
264
+ if (!apiKey) process.env.DATABRICKS_API_KEY = "mock-key-for-ollama-only";
265
+ console.log("[CONFIG] Using mock Databricks credentials (Ollama-only mode with fallback disabled)");
266
+ }
267
+
268
+ if (modelProvider === "azure-anthropic" && (!azureAnthropicEndpoint || !azureAnthropicApiKey)) {
269
+ throw new Error(
270
+ "Set AZURE_ANTHROPIC_ENDPOINT and AZURE_ANTHROPIC_API_KEY before starting the proxy.",
271
+ );
272
+ }
273
+
274
+ if (modelProvider === "azure-openai" && (!azureOpenAIEndpoint || !azureOpenAIApiKey)) {
275
+ throw new Error(
276
+ "Set AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY before starting the proxy.",
277
+ );
278
+ }
279
+
280
+ if (modelProvider === "openai" && !openAIApiKey) {
281
+ throw new Error(
282
+ "Set OPENAI_API_KEY before starting the proxy.",
283
+ );
284
+ }
260
285
 
261
286
  if (modelProvider === "ollama") {
262
287
  try {
@@ -295,266 +320,34 @@ if (process.env.PREFER_OLLAMA) {
295
320
  console.warn('[DEPRECATION] PREFER_OLLAMA is removed. Use TIER_* env vars for routing. See documentation/routing.md');
296
321
  }
297
322
 
298
- // ═══════════════════════════════════════════════════════════════════════════
299
- // TIER ROUTING MODE DETECTION
300
- // ═══════════════════════════════════════════════════════════════════════════
301
- // When all 4 TIER_* variables are set, Lynkr operates in "Tier Routing Mode"
302
- // In this mode:
303
- // - MODEL_PROVIDER is auto-detected from TIER_SIMPLE
304
- // - FALLBACK_PROVIDER is auto-detected from TIER_REASONING
305
- // - FALLBACK_ENABLED is always true
306
- // - Only credentials for providers used in tiers are validated
307
- // ═══════════════════════════════════════════════════════════════════════════
308
-
323
+ // Warn about misconfigured fallback provider (only when tier routing is active,
324
+ // since that's the only path that triggers provider fallback)
309
325
  const tiersConfigured = !!(
310
326
  process.env.TIER_SIMPLE?.trim() &&
311
327
  process.env.TIER_MEDIUM?.trim() &&
312
328
  process.env.TIER_COMPLEX?.trim() &&
313
329
  process.env.TIER_REASONING?.trim()
314
330
  );
315
-
316
- let tierRoutingMode = tiersConfigured;
317
- let autoDetectedProvider = null;
318
- let autoDetectedFallback = null;
319
-
320
- if (tierRoutingMode) {
321
- console.log('[Config] ✓ Tier routing mode active (all 4 TIER_* variables set)');
322
-
323
- // Phase 3: Error if legacy variables are set
324
- if (process.env.MODEL_PROVIDER) {
325
- throw new Error(
326
- 'MODEL_PROVIDER not allowed in tier routing mode.\n' +
327
- 'Remove MODEL_PROVIDER from your .env file.\n' +
328
- 'Provider is auto-detected from TIER_SIMPLE.\n' +
329
- 'See: documentation/routing.md'
330
- );
331
- }
332
-
333
- if (process.env.FALLBACK_PROVIDER) {
334
- throw new Error(
335
- 'FALLBACK_PROVIDER not allowed in tier routing mode.\n' +
336
- 'Remove FALLBACK_PROVIDER from your .env file.\n' +
337
- 'Fallback is auto-detected from TIER_REASONING.\n' +
338
- 'See: documentation/routing.md'
339
- );
340
- }
341
-
342
- if (process.env.FALLBACK_ENABLED !== undefined) {
343
- throw new Error(
344
- 'FALLBACK_ENABLED not allowed in tier routing mode.\n' +
345
- 'Remove FALLBACK_ENABLED from your .env file.\n' +
346
- 'Fallback is automatic when TIER_REASONING uses a cloud provider.\n' +
347
- 'See: documentation/routing.md'
348
- );
349
- }
350
-
351
- // Auto-detect primary provider from TIER_SIMPLE
352
- const tierSimple = process.env.TIER_SIMPLE.trim();
353
- const tierReasoning = process.env.TIER_REASONING.trim();
354
-
355
- const simpleMatch = tierSimple.match(/^([a-z-]+):(.+)$/);
356
- const reasoningMatch = tierReasoning.match(/^([a-z-]+):(.+)$/);
357
-
358
- if (!simpleMatch) {
359
- throw new Error(`TIER_SIMPLE must be in format "provider:model" (got: "${tierSimple}")`);
360
- }
361
- if (!reasoningMatch) {
362
- throw new Error(`TIER_REASONING must be in format "provider:model" (got: "${tierReasoning}")`);
363
- }
364
-
365
- autoDetectedProvider = simpleMatch[1];
366
- autoDetectedFallback = reasoningMatch[1];
367
-
368
- console.log(`[Config] Auto-detected MODEL_PROVIDER="${autoDetectedProvider}" from TIER_SIMPLE`);
369
- console.log(`[Config] Auto-detected FALLBACK_PROVIDER="${autoDetectedFallback}" from TIER_REASONING`);
370
-
371
- // Validate auto-detected providers
372
- if (!SUPPORTED_MODEL_PROVIDERS.has(autoDetectedProvider)) {
373
- throw new Error(
374
- `Invalid provider in TIER_SIMPLE: "${autoDetectedProvider}"\n` +
375
- `Valid providers: ${Array.from(SUPPORTED_MODEL_PROVIDERS).sort().join(', ')}`
376
- );
377
- }
378
- if (!SUPPORTED_MODEL_PROVIDERS.has(autoDetectedFallback)) {
379
- throw new Error(
380
- `Invalid provider in TIER_REASONING: "${autoDetectedFallback}"\n` +
381
- `Valid providers: ${Array.from(SUPPORTED_MODEL_PROVIDERS).sort().join(', ')}`
382
- );
383
- }
384
-
385
- // Override MODEL_PROVIDER and FALLBACK_PROVIDER internally
386
- process.env.MODEL_PROVIDER = autoDetectedProvider;
387
- process.env.FALLBACK_PROVIDER = autoDetectedFallback;
388
- process.env.FALLBACK_ENABLED = 'true';
389
- }
390
-
391
- // Re-read modelProvider and fallbackProvider after tier routing auto-detection
392
- // This ensures the config object uses the auto-detected values
393
- const finalModelProvider = (process.env.MODEL_PROVIDER ?? "databricks").toLowerCase();
394
- const finalFallbackProvider = (process.env.FALLBACK_PROVIDER ?? "databricks").toLowerCase();
395
- const finalFallbackEnabled = process.env.FALLBACK_ENABLED === "true";
396
-
397
- // Safety check: prevent self-loop when tier routing is active and provider is not databricks
398
- // If using tier routing with ollama/llamacpp/lmstudio, clear databricks URL to prevent agents from calling back to self
399
- if (tierRoutingMode && finalModelProvider !== "databricks" && rawBaseUrl) {
400
- const isLocalhost = rawBaseUrl.includes('localhost') || rawBaseUrl.includes('127.0.0.1');
401
- const matchesServerPort = rawBaseUrl.includes(`:${port}`);
402
- if (isLocalhost && matchesServerPort) {
403
- console.warn(`[WARN] DATABRICKS_API_BASE points to this server (${rawBaseUrl}). Clearing to prevent self-loop.`);
404
- rawBaseUrl = null;
405
- apiKey = null;
406
- }
407
- }
408
-
409
- // Warn about misconfigured fallback provider (only when tier routing is active,
410
- // since that's the only path that triggers provider fallback)
411
- if (finalFallbackEnabled && tiersConfigured) {
331
+ if (fallbackEnabled && tiersConfigured) {
412
332
  const localProviders = ["ollama", "llamacpp", "lmstudio"];
413
- // Only warn (not error) if fallback is local - it just means fallback won't work
414
- if (localProviders.includes(finalFallbackProvider) && finalFallbackProvider !== finalModelProvider) {
415
- console.warn(`[WARN] FALLBACK_PROVIDER='${finalFallbackProvider}' is a local provider. Fallback should use a cloud provider for redundancy.`);
333
+ if (localProviders.includes(fallbackProvider)) {
334
+ throw new Error(`FALLBACK_PROVIDER cannot be '${fallbackProvider}' (local providers should not be fallbacks). Use cloud providers: databricks, azure-anthropic, azure-openai, openrouter, openai, bedrock`);
416
335
  }
417
336
  let fallbackMisconfigured = false;
418
- if (finalFallbackProvider === "databricks" && (!rawBaseUrl || !apiKey)) {
337
+ if (fallbackProvider === "databricks" && (!rawBaseUrl || !apiKey)) {
419
338
  fallbackMisconfigured = true;
420
339
  }
421
- if (finalFallbackProvider === "azure-anthropic" && (!azureAnthropicEndpoint || !azureAnthropicApiKey)) {
340
+ if (fallbackProvider === "azure-anthropic" && (!azureAnthropicEndpoint || !azureAnthropicApiKey)) {
422
341
  fallbackMisconfigured = true;
423
342
  }
424
- if (finalFallbackProvider === "azure-openai" && (!azureOpenAIEndpoint || !azureOpenAIApiKey)) {
343
+ if (fallbackProvider === "azure-openai" && (!azureOpenAIEndpoint || !azureOpenAIApiKey)) {
425
344
  fallbackMisconfigured = true;
426
345
  }
427
- if (finalFallbackProvider === "bedrock" && !bedrockApiKey) {
346
+ if (fallbackProvider === "bedrock" && !bedrockApiKey) {
428
347
  fallbackMisconfigured = true;
429
348
  }
430
349
  if (fallbackMisconfigured) {
431
- console.warn(`[WARN] FALLBACK_PROVIDER='${finalFallbackProvider}' is enabled but missing credentials. Fallback will not work until configured.`);
432
- }
433
- }
434
-
435
- // ═══════════════════════════════════════════════════════════════════════════
436
- // SMART CREDENTIAL VALIDATION (TIER ROUTING MODE)
437
- // ═══════════════════════════════════════════════════════════════════════════
438
- // Only validate credentials for providers actually used in tier config
439
- // ═══════════════════════════════════════════════════════════════════════════
440
-
441
- if (tierRoutingMode) {
442
- // Extract all unique providers from tier config
443
- const usedProviders = new Set();
444
- [
445
- process.env.TIER_SIMPLE,
446
- process.env.TIER_MEDIUM,
447
- process.env.TIER_COMPLEX,
448
- process.env.TIER_REASONING
449
- ].forEach(tierValue => {
450
- const match = tierValue?.match(/^([a-z-]+):/);
451
- if (match) usedProviders.add(match[1]);
452
- });
453
-
454
- console.log(`[Config] Tier routing uses providers: ${Array.from(usedProviders).join(', ')}`);
455
-
456
- // Validate only providers used in tiers
457
- if (usedProviders.has('databricks')) {
458
- if (!rawBaseUrl || !apiKey) {
459
- throw new Error(
460
- 'DATABRICKS_API_BASE and DATABRICKS_API_KEY required.\n' +
461
- 'Databricks is used in your tier routing config.'
462
- );
463
- }
464
- } else {
465
- // Mock credentials if Databricks not used
466
- if (!rawBaseUrl) {
467
- process.env.DATABRICKS_API_BASE = "http://localhost:8080";
468
- rawBaseUrl = "http://localhost:8080";
469
- }
470
- if (!apiKey) {
471
- process.env.DATABRICKS_API_KEY = "mock-key-unused";
472
- apiKey = "mock-key-unused";
473
- }
474
- }
475
-
476
- if (usedProviders.has('azure-anthropic') && (!azureAnthropicEndpoint || !azureAnthropicApiKey)) {
477
- throw new Error(
478
- 'AZURE_ANTHROPIC_ENDPOINT and AZURE_ANTHROPIC_API_KEY required.\n' +
479
- 'Azure Anthropic is used in your tier routing config.'
480
- );
481
- }
482
-
483
- if (usedProviders.has('azure-openai') && (!azureOpenAIEndpoint || !azureOpenAIApiKey)) {
484
- throw new Error(
485
- 'AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY required.\n' +
486
- 'Azure OpenAI is used in your tier routing config.'
487
- );
488
- }
489
-
490
- if (usedProviders.has('openai') && !openAIApiKey) {
491
- throw new Error(
492
- 'OPENAI_API_KEY required.\n' +
493
- 'OpenAI is used in your tier routing config.'
494
- );
495
- }
496
-
497
- if (usedProviders.has('openrouter') && !openRouterApiKey) {
498
- throw new Error(
499
- 'OPENROUTER_API_KEY required.\n' +
500
- 'OpenRouter is used in your tier routing config.'
501
- );
502
- }
503
-
504
- if (usedProviders.has('bedrock') && !bedrockApiKey) {
505
- throw new Error(
506
- 'AWS_BEDROCK_API_KEY required.\n' +
507
- 'Bedrock is used in your tier routing config.'
508
- );
509
- }
510
-
511
- // Ollama endpoint validation
512
- if (usedProviders.has('ollama')) {
513
- try {
514
- new URL(ollamaEndpoint);
515
- } catch (err) {
516
- throw new Error(`Invalid OLLAMA_ENDPOINT: "${ollamaEndpoint}". Must be a valid URL.`);
517
- }
518
- }
519
-
520
- } else {
521
- // ═══════════════════════════════════════════════════════════════════════════
522
- // STATIC PROVIDER MODE - Original validation logic
523
- // ═══════════════════════════════════════════════════════════════════════════
524
-
525
- if (modelProvider === "databricks" && (!rawBaseUrl || !apiKey)) {
526
- throw new Error("Set DATABRICKS_API_BASE and DATABRICKS_API_KEY before starting the proxy.");
527
- } else if (modelProvider === "ollama" && !fallbackEnabled && (!rawBaseUrl || !apiKey)) {
528
- // Relaxed: Allow mock credentials for true Ollama-only mode (fallback disabled)
529
- if (!rawBaseUrl) {
530
- process.env.DATABRICKS_API_BASE = "http://localhost:8080";
531
- rawBaseUrl = "http://localhost:8080";
532
- }
533
- if (!apiKey) {
534
- process.env.DATABRICKS_API_KEY = "mock-key-for-ollama-only";
535
- apiKey = "mock-key-for-ollama-only";
536
- }
537
- console.log("[CONFIG] Using mock Databricks credentials (Ollama-only mode with fallback disabled)");
538
- }
539
-
540
- if (modelProvider === "azure-anthropic" && (!azureAnthropicEndpoint || !azureAnthropicApiKey)) {
541
- throw new Error("SET AZURE_ANTHROPIC_ENDPOINT and AZURE_ANTHROPIC_API_KEY before starting the proxy.");
542
- }
543
-
544
- if (modelProvider === "azure-openai" && (!azureOpenAIEndpoint || !azureOpenAIApiKey)) {
545
- throw new Error("Set AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY before starting the proxy.");
546
- }
547
-
548
- if (modelProvider === "openai" && !openAIApiKey) {
549
- throw new Error("Set OPENAI_API_KEY before starting the proxy.");
550
- }
551
-
552
- if (modelProvider === "ollama") {
553
- try {
554
- new URL(ollamaEndpoint);
555
- } catch (err) {
556
- throw new Error(`Invalid OLLAMA_ENDPOINT: "${ollamaEndpoint}". Must be a valid URL.`);
557
- }
350
+ console.warn(`[WARN] FALLBACK_PROVIDER='${fallbackProvider}' is enabled but missing credentials. Fallback will not work until configured.`);
558
351
  }
559
352
  }
560
353
 
@@ -754,7 +547,7 @@ const workerTaskTimeoutMs = Number.parseInt(process.env.WORKER_TASK_TIMEOUT_MS ?
754
547
  const workerOffloadThresholdBytes = Number.parseInt(process.env.WORKER_OFFLOAD_THRESHOLD_BYTES ?? "10000", 10);
755
548
 
756
549
  var config = {
757
- env: process.env.NODE_ENV ?? "production",
550
+ env: process.env.NODE_ENV ?? "development",
758
551
  port: Number.isNaN(port) ? 8080 : port,
759
552
  databricks: {
760
553
  baseUrl: rawBaseUrl,
@@ -836,13 +629,13 @@ var config = {
836
629
  debounceMs: Number.isNaN(hotReloadDebounceMs) ? 1000 : hotReloadDebounceMs,
837
630
  },
838
631
  modelProvider: {
839
- type: finalModelProvider,
632
+ type: modelProvider,
840
633
  defaultModel,
841
634
  suggestionModeModel,
842
- fallbackEnabled: finalFallbackEnabled,
635
+ fallbackEnabled,
843
636
  ollamaMaxToolsForRouting,
844
637
  openRouterMaxToolsForRouting,
845
- fallbackProvider: finalFallbackProvider,
638
+ fallbackProvider,
846
639
  },
847
640
  toolExecutionMode,
848
641
  toolResultCompression: {
@@ -1125,7 +918,6 @@ var config = {
1125
918
  // Intelligent Routing
1126
919
  routing: {
1127
920
  weightedScoring: true,
1128
- // Cost optimization now respects tier routing mode (only uses TIER_* configured models)
1129
921
  costOptimization: true,
1130
922
  agenticDetection: true,
1131
923
  // Embed an interaction block in the response body so the user can
@@ -15,12 +15,11 @@ function normaliseSettings(settings = {}) {
15
15
  };
16
16
  }
17
17
 
18
- async function resolveEncodeFn(overrideEncode) {
18
+ function resolveEncodeFn(overrideEncode) {
19
19
  if (typeof overrideEncode === "function") return overrideEncode;
20
20
  if (cachedEncode !== undefined) return cachedEncode;
21
21
  try {
22
- // Use dynamic import for ES module compatibility
23
- const toon = await import("@toon-format/toon");
22
+ const toon = require("@toon-format/toon");
24
23
  cachedEncode = typeof toon?.encode === "function" ? toon.encode : null;
25
24
  cachedLoadError = cachedEncode ? null : new Error("Missing encode() export from @toon-format/toon");
26
25
  } catch (err) {
@@ -90,7 +89,7 @@ function compressStringContent(content, cfg, encodeFn, stats) {
90
89
  return toonText;
91
90
  }
92
91
 
93
- async function applyToonCompression(payload, settings = {}, options = {}) {
92
+ function applyToonCompression(payload, settings = {}, options = {}) {
94
93
  const cfg = normaliseSettings(settings);
95
94
  const stats = {
96
95
  enabled: cfg.enabled,
@@ -110,7 +109,7 @@ async function applyToonCompression(payload, settings = {}, options = {}) {
110
109
  return { payload, stats };
111
110
  }
112
111
 
113
- const encodeFn = await resolveEncodeFn(options.encode);
112
+ const encodeFn = resolveEncodeFn(options.encode);
114
113
  if (typeof encodeFn !== "function") {
115
114
  stats.available = false;
116
115
  const err = cachedLoadError ?? new Error("TOON encoder unavailable");
@@ -1101,7 +1101,7 @@ function toAnthropicResponse(openai, requestedModel, wantsThinking) {
1101
1101
  };
1102
1102
  }
1103
1103
 
1104
- async function sanitizePayload(payload) {
1104
+ function sanitizePayload(payload) {
1105
1105
  const { clonePayloadSmart } = require("../utils/payload");
1106
1106
  const providerType = config.modelProvider?.type ?? "databricks";
1107
1107
  const willFlatten = providerType !== "azure-anthropic";
@@ -1418,7 +1418,7 @@ async function sanitizePayload(payload) {
1418
1418
 
1419
1419
  // Optional TOON conversion for large JSON message payloads (prompt context only).
1420
1420
  // Run this BEFORE message coalescing to preserve parseable JSON boundaries.
1421
- await applyToonCompression(clean, config.toon, { logger });
1421
+ applyToonCompression(clean, config.toon, { logger });
1422
1422
 
1423
1423
  // FIX: Handle consecutive messages with the same role (causes llama.cpp 400 error)
1424
1424
  // Strategy: Merge consecutive same-role messages, but NEVER merge messages
@@ -1529,35 +1529,12 @@ function getToolCallSignature(toolCall) {
1529
1529
  }
1530
1530
 
1531
1531
  function buildNonJsonResponse(databricksResponse) {
1532
- // Convert plain text response to Anthropic message format
1533
- // so SSE handler can properly render it
1534
- const textContent = databricksResponse.text || "";
1535
-
1536
1532
  return {
1537
1533
  status: databricksResponse.status,
1538
1534
  headers: {
1539
- "Content-Type": "application/json", // Changed from text/plain
1540
- },
1541
- body: {
1542
- id: `msg_${Date.now()}`,
1543
- type: "message",
1544
- role: "assistant",
1545
- model: "unknown",
1546
- content: [
1547
- {
1548
- type: "text",
1549
- text: textContent
1550
- }
1551
- ],
1552
- stop_reason: "end_turn",
1553
- stop_sequence: null,
1554
- usage: {
1555
- input_tokens: 0,
1556
- output_tokens: 0,
1557
- cache_creation_input_tokens: 0,
1558
- cache_read_input_tokens: 0,
1559
- }
1535
+ "Content-Type": databricksResponse.contentType ?? "text/plain",
1560
1536
  },
1537
+ body: databricksResponse.text,
1561
1538
  terminationReason: "non_json_response",
1562
1539
  };
1563
1540
  }
@@ -3929,7 +3906,7 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
3929
3906
  const { createTimer } = require("../utils/perf-timer");
3930
3907
  const pTimer = createTimer("processMessage");
3931
3908
 
3932
- const cleanPayload = await sanitizePayload(payload);
3909
+ const cleanPayload = sanitizePayload(payload);
3933
3910
  pTimer.mark("sanitizePayload");
3934
3911
 
3935
3912
  // Proactively load tools based on prompt content (lazy loading)
@@ -4067,11 +4044,7 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
4067
4044
  if (semanticCache.isEnabled() && semanticLookupResult && !semanticLookupResult.hit) {
4068
4045
  if (loopResult.response?.status === 200 && loopResult.response?.body) {
4069
4046
  try {
4070
- // Only cache valid JSON responses, not HTML error pages
4071
- const body = loopResult.response.body;
4072
- if (typeof body === 'object' || (typeof body === 'string' && body.trim().startsWith('{'))) {
4073
- await semanticCache.store(semanticLookupResult, body);
4074
- }
4047
+ await semanticCache.store(semanticLookupResult, loopResult.response.body);
4075
4048
  } catch (err) {
4076
4049
  logger.debug({ error: err.message }, "Semantic cache store failed");
4077
4050
  }
@@ -70,41 +70,13 @@ function compressToolDescriptions(tools, mode = null) {
70
70
  return tools; // Return unmodified if not in minimal mode
71
71
  }
72
72
 
73
- const validTools = tools.filter(tool => {
74
- // Handle both Anthropic format (name + input_schema) and OpenAI format (function.name)
75
- const hasAnthropicFormat = tool && tool.name && tool.input_schema;
76
- const hasOpenAIFormat = tool && tool.function && tool.function.name;
77
- const isValid = hasAnthropicFormat || hasOpenAIFormat;
78
-
79
- if (!isValid) {
80
- logger.debug({
81
- hasName: !!tool?.name,
82
- hasSchema: !!tool?.input_schema,
83
- hasFunctionName: !!tool?.function?.name,
84
- toolType: typeof tool
85
- }, 'Filtered out malformed tool');
86
- }
87
- return isValid;
88
- });
89
-
90
- if (validTools.length === 0 && tools.length > 0) {
91
- logger.warn({ originalCount: tools.length }, 'All tools filtered out as malformed - returning original');
92
- return tools;
93
- }
94
-
95
- return validTools.map(tool => {
96
- // If already in OpenAI format, return as-is (no compression for OpenAI format)
97
- if (tool.function && !tool.input_schema) {
98
- return tool;
99
- }
100
-
101
- // Compress Anthropic format
73
+ return tools.map(tool => {
102
74
  const compressed = {
103
75
  name: tool.name,
104
76
  input_schema: {
105
- type: tool.input_schema?.type || "object",
77
+ type: tool.input_schema.type,
106
78
  properties: {},
107
- required: tool.input_schema?.required || [],
79
+ required: tool.input_schema.required || [],
108
80
  }
109
81
  };
110
82
 
@@ -218,7 +190,7 @@ function optimizeSystemPrompt(system, context = {}, mode = null) {
218
190
 
219
191
  // 2. Remove file operation guidelines if no file tools
220
192
  const hasFileTools = context.tools?.some(t =>
221
- t?.name && ['Read', 'Write', 'Edit', 'Glob', 'Grep'].includes(t.name)
193
+ ['Read', 'Write', 'Edit', 'Glob', 'Grep'].includes(t.name)
222
194
  );
223
195
  if (!hasFileTools) {
224
196
  text = removeSection(text, /# File Operations?[\s\S]*?(?=\n#|\n\n[A-Z]|$)/gi, optimizations, 'file operations');
@@ -226,7 +198,7 @@ function optimizeSystemPrompt(system, context = {}, mode = null) {
226
198
 
227
199
  // 3. Remove git guidelines if no git tools
228
200
  const hasGitTools = context.tools?.some(t =>
229
- t?.name && t.name.toLowerCase().includes('git')
201
+ t.name.toLowerCase().includes('git')
230
202
  );
231
203
  if (!hasGitTools) {
232
204
  text = removeSection(text, /# Git.*?[\s\S]*?(?=\n#|\n\n[A-Z]|$)/gi, optimizations, 'git guidelines');
@@ -235,7 +207,7 @@ function optimizeSystemPrompt(system, context = {}, mode = null) {
235
207
 
236
208
  // 4. Remove web search guidelines if no web tools
237
209
  const hasWebTools = context.tools?.some(t =>
238
- t?.name && ['WebSearch', 'WebFetch'].includes(t.name)
210
+ ['WebSearch', 'WebFetch'].includes(t.name)
239
211
  );
240
212
  if (!hasWebTools) {
241
213
  text = removeSection(text, /# Web.*?[\s\S]*?(?=\n#|\n\n[A-Z]|$)/gi, optimizations, 'web guidelines');
@@ -84,28 +84,15 @@ function isLocalProvider(provider) {
84
84
 
85
85
  /**
86
86
  * Check if fallback is enabled
87
- * In tier routing mode, fallback is always enabled
88
87
  */
89
88
  function isFallbackEnabled() {
90
- if (config.modelTiers?.enabled) {
91
- // Tier routing mode: fallback always enabled
92
- return true;
93
- }
94
- // Static provider mode: use FALLBACK_ENABLED
95
89
  return config.modelProvider?.fallbackEnabled !== false;
96
90
  }
97
91
 
98
92
  /**
99
93
  * Get the configured fallback provider
100
- * In tier routing mode, fallback = TIER_REASONING provider
101
94
  */
102
95
  function getFallbackProvider() {
103
- if (config.modelTiers?.enabled && config.modelTiers?.REASONING) {
104
- // Tier routing mode: extract provider from TIER_REASONING
105
- const match = config.modelTiers.REASONING.match(/^([a-z-]+):/);
106
- if (match) return match[1];
107
- }
108
- // Static provider mode: use FALLBACK_PROVIDER
109
96
  return config.modelProvider?.fallbackProvider ?? 'databricks';
110
97
  }
111
98
 
@@ -54,20 +54,6 @@ class ModelTierSelector {
54
54
  * Load tier configuration from JSON file
55
55
  */
56
56
  _loadConfig() {
57
- // Check if tier routing mode is active (all 4 TIER_* env vars set)
58
- const tierRoutingMode = !!(
59
- config.modelTiers?.SIMPLE?.trim() &&
60
- config.modelTiers?.MEDIUM?.trim() &&
61
- config.modelTiers?.COMPLEX?.trim() &&
62
- config.modelTiers?.REASONING?.trim()
63
- );
64
-
65
- if (tierRoutingMode) {
66
- logger.debug('[ModelTiers] Tier routing mode active, building config from TIER_* env vars');
67
- this._buildFromEnvVars();
68
- return;
69
- }
70
-
71
57
  try {
72
58
  if (fs.existsSync(TIER_CONFIG_PATH)) {
73
59
  const data = JSON.parse(fs.readFileSync(TIER_CONFIG_PATH, 'utf8'));
@@ -122,49 +108,6 @@ class ModelTierSelector {
122
108
  /**
123
109
  * Load default tier config
124
110
  */
125
- /**
126
- * Build tier config from TIER_* environment variables
127
- * Format: TIER_SIMPLE=provider:model
128
- */
129
- _buildFromEnvVars() {
130
- this.tierConfig = {};
131
- this.localProviders = {
132
- ollama: { free: true, defaultTier: 'SIMPLE' },
133
- llamacpp: { free: true, defaultTier: 'SIMPLE' },
134
- lmstudio: { free: true, defaultTier: 'SIMPLE' },
135
- mlx: { free: true, defaultTier: 'SIMPLE' },
136
- };
137
-
138
- const tiers = ['SIMPLE', 'MEDIUM', 'COMPLEX', 'REASONING'];
139
- for (const tier of tiers) {
140
- const envValue = config.modelTiers?.[tier]?.trim();
141
- if (!envValue) continue;
142
-
143
- // Parse provider:model format
144
- const match = envValue.match(/^([a-z-]+):(.+)$/);
145
- if (!match) {
146
- logger.warn({ tier, value: envValue }, '[ModelTiers] Invalid TIER format, expected provider:model');
147
- continue;
148
- }
149
-
150
- const [, provider, model] = match;
151
-
152
- // Initialize tier config if not exists
153
- if (!this.tierConfig[tier]) {
154
- this.tierConfig[tier] = { preferred: {} };
155
- }
156
-
157
- // Set this as the ONLY preferred model for this tier+provider
158
- this.tierConfig[tier].preferred[provider] = [model];
159
-
160
- logger.debug({
161
- tier,
162
- provider,
163
- model
164
- }, '[ModelTiers] Tier configured from env');
165
- }
166
- }
167
-
168
111
  _loadDefaults() {
169
112
  this.tierConfig = {
170
113
  SIMPLE: { preferred: { ollama: ['llama3.2'], openai: ['gpt-4o-mini'] } },