@blockrun/clawrouter 0.8.31 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1573,9 +1573,10 @@ var DEFAULT_ROUTING_CONFIG = {
1573
1573
  primary: "nvidia/kimi-k2.5",
1574
1574
  // $0.55/$2.5 - best quality/price for simple tasks
1575
1575
  fallback: [
1576
+ "google/gemini-2.5-flash",
1577
+ // 1M context, cost-effective
1576
1578
  "nvidia/gpt-oss-120b",
1577
1579
  // FREE fallback
1578
- "google/gemini-2.5-flash",
1579
1580
  "deepseek/deepseek-chat"
1580
1581
  ]
1581
1582
  },
@@ -1583,17 +1584,22 @@ var DEFAULT_ROUTING_CONFIG = {
1583
1584
  primary: "xai/grok-code-fast-1",
1584
1585
  // Code specialist, $0.20/$1.50
1585
1586
  fallback: [
1586
- "xai/grok-4-1-fast-non-reasoning",
1587
- // Upgraded Grok 4.1
1587
+ "google/gemini-2.5-flash",
1588
+ // 1M context, cost-effective
1588
1589
  "deepseek/deepseek-chat",
1589
- "google/gemini-2.5-flash"
1590
+ "xai/grok-4-1-fast-non-reasoning"
1591
+ // Upgraded Grok 4.1
1590
1592
  ]
1591
1593
  },
1592
1594
  COMPLEX: {
1593
1595
  primary: "google/gemini-3-pro-preview",
1594
1596
  // Latest Gemini - upgraded from 2.5
1595
1597
  fallback: [
1598
+ "google/gemini-2.5-flash",
1599
+ // CRITICAL: 1M context, cheap failsafe before expensive models
1596
1600
  "google/gemini-2.5-pro",
1601
+ "deepseek/deepseek-chat",
1602
+ // Another cheap option
1597
1603
  "xai/grok-4-0709",
1598
1604
  "openai/gpt-4o",
1599
1605
  "openai/gpt-5.2",
@@ -1604,11 +1610,12 @@ var DEFAULT_ROUTING_CONFIG = {
1604
1610
  primary: "xai/grok-4-1-fast-reasoning",
1605
1611
  // Upgraded Grok 4.1 reasoning $0.20/$0.50
1606
1612
  fallback: [
1613
+ "deepseek/deepseek-reasoner",
1614
+ // Cheap reasoning model as first fallback
1607
1615
  "xai/grok-4-fast-reasoning",
1608
1616
  "openai/o3",
1609
1617
  "openai/o4-mini",
1610
1618
  // Latest o-series mini
1611
- "deepseek/deepseek-reasoner",
1612
1619
  "moonshot/kimi-k2.5"
1613
1620
  ]
1614
1621
  }
@@ -2308,6 +2315,739 @@ var BalanceMonitor = class {
2308
2315
  }
2309
2316
  };
2310
2317
 
2318
+ // src/compression/types.ts
2319
+ var DEFAULT_COMPRESSION_CONFIG = {
2320
+ enabled: true,
2321
+ preserveRaw: true,
2322
+ layers: {
2323
+ deduplication: true,
2324
+ // Safe: removes duplicate messages
2325
+ whitespace: true,
2326
+ // Safe: normalizes whitespace
2327
+ dictionary: false,
2328
+ // DISABLED: requires model to understand codebook
2329
+ paths: false,
2330
+ // DISABLED: requires model to understand path codes
2331
+ jsonCompact: true,
2332
+ // Safe: just removes JSON whitespace
2333
+ observation: false,
2334
+ // DISABLED: may lose important context
2335
+ dynamicCodebook: false
2336
+ // DISABLED: requires model to understand codes
2337
+ },
2338
+ dictionary: {
2339
+ maxEntries: 50,
2340
+ minPhraseLength: 15,
2341
+ includeCodebookHeader: false
2342
+ // No codebook header needed
2343
+ }
2344
+ };
2345
+
2346
+ // src/compression/layers/deduplication.ts
2347
+ import crypto2 from "crypto";
2348
+ function hashMessage(message) {
2349
+ const parts = [
2350
+ message.role,
2351
+ message.content || "",
2352
+ message.tool_call_id || "",
2353
+ message.name || ""
2354
+ ];
2355
+ if (message.tool_calls) {
2356
+ parts.push(
2357
+ JSON.stringify(
2358
+ message.tool_calls.map((tc) => ({
2359
+ name: tc.function.name,
2360
+ args: tc.function.arguments
2361
+ }))
2362
+ )
2363
+ );
2364
+ }
2365
+ const content = parts.join("|");
2366
+ return crypto2.createHash("md5").update(content).digest("hex");
2367
+ }
2368
+ function deduplicateMessages(messages) {
2369
+ const seen = /* @__PURE__ */ new Set();
2370
+ const result = [];
2371
+ let duplicatesRemoved = 0;
2372
+ const referencedToolCallIds = /* @__PURE__ */ new Set();
2373
+ for (const message of messages) {
2374
+ if (message.role === "tool" && message.tool_call_id) {
2375
+ referencedToolCallIds.add(message.tool_call_id);
2376
+ }
2377
+ }
2378
+ for (const message of messages) {
2379
+ if (message.role === "system") {
2380
+ result.push(message);
2381
+ continue;
2382
+ }
2383
+ if (message.role === "user") {
2384
+ result.push(message);
2385
+ continue;
2386
+ }
2387
+ if (message.role === "tool") {
2388
+ result.push(message);
2389
+ continue;
2390
+ }
2391
+ if (message.role === "assistant" && message.tool_calls) {
2392
+ const hasReferencedToolCall = message.tool_calls.some(
2393
+ (tc) => referencedToolCallIds.has(tc.id)
2394
+ );
2395
+ if (hasReferencedToolCall) {
2396
+ result.push(message);
2397
+ continue;
2398
+ }
2399
+ }
2400
+ const hash = hashMessage(message);
2401
+ if (!seen.has(hash)) {
2402
+ seen.add(hash);
2403
+ result.push(message);
2404
+ } else {
2405
+ duplicatesRemoved++;
2406
+ }
2407
+ }
2408
+ return {
2409
+ messages: result,
2410
+ duplicatesRemoved,
2411
+ originalCount: messages.length
2412
+ };
2413
+ }
2414
+
2415
+ // src/compression/layers/whitespace.ts
2416
+ function normalizeWhitespace(content) {
2417
+ if (!content) return content;
2418
+ return content.replace(/\r\n/g, "\n").replace(/\r/g, "\n").replace(/\n{3,}/g, "\n\n").replace(/[ \t]+$/gm, "").replace(/([^\n]) {2,}/g, "$1 ").replace(/^[ ]{8,}/gm, (match) => " ".repeat(Math.ceil(match.length / 4))).replace(/\t/g, " ").trim();
2419
+ }
2420
+ function normalizeMessagesWhitespace(messages) {
2421
+ let charsSaved = 0;
2422
+ const result = messages.map((message) => {
2423
+ if (!message.content) return message;
2424
+ const originalLength = message.content.length;
2425
+ const normalizedContent = normalizeWhitespace(message.content);
2426
+ charsSaved += originalLength - normalizedContent.length;
2427
+ return {
2428
+ ...message,
2429
+ content: normalizedContent
2430
+ };
2431
+ });
2432
+ return {
2433
+ messages: result,
2434
+ charsSaved
2435
+ };
2436
+ }
2437
+
2438
+ // src/compression/codebook.ts
2439
+ var STATIC_CODEBOOK = {
2440
+ // High-impact: OpenClaw/Agent system prompt patterns (very common)
2441
+ $OC01: "unbrowse_",
2442
+ // Common prefix in tool names
2443
+ $OC02: "<location>",
2444
+ $OC03: "</location>",
2445
+ $OC04: "<name>",
2446
+ $OC05: "</name>",
2447
+ $OC06: "<description>",
2448
+ $OC07: "</description>",
2449
+ $OC08: "(may need login)",
2450
+ $OC09: "API skill for OpenClaw",
2451
+ $OC10: "endpoints",
2452
+ // Skill/tool markers
2453
+ $SK01: "<available_skills>",
2454
+ $SK02: "</available_skills>",
2455
+ $SK03: "<skill>",
2456
+ $SK04: "</skill>",
2457
+ // Schema patterns (very common in tool definitions)
2458
+ $T01: 'type: "function"',
2459
+ $T02: '"type": "function"',
2460
+ $T03: '"type": "string"',
2461
+ $T04: '"type": "object"',
2462
+ $T05: '"type": "array"',
2463
+ $T06: '"type": "boolean"',
2464
+ $T07: '"type": "number"',
2465
+ // Common descriptions
2466
+ $D01: "description:",
2467
+ $D02: '"description":',
2468
+ // Common instructions
2469
+ $I01: "You are a personal assistant",
2470
+ $I02: "Tool names are case-sensitive",
2471
+ $I03: "Call tools exactly as listed",
2472
+ $I04: "Use when",
2473
+ $I05: "without asking",
2474
+ // Safety phrases
2475
+ $S01: "Do not manipulate or persuade",
2476
+ $S02: "Prioritize safety and human oversight",
2477
+ $S03: "unless explicitly requested",
2478
+ // JSON patterns
2479
+ $J01: '"required": ["',
2480
+ $J02: '"properties": {',
2481
+ $J03: '"additionalProperties": false',
2482
+ // Heartbeat patterns
2483
+ $H01: "HEARTBEAT_OK",
2484
+ $H02: "Read HEARTBEAT.md if it exists",
2485
+ // Role markers
2486
+ $R01: '"role": "system"',
2487
+ $R02: '"role": "user"',
2488
+ $R03: '"role": "assistant"',
2489
+ $R04: '"role": "tool"',
2490
+ // Common endings/phrases
2491
+ $E01: "would you like to",
2492
+ $E02: "Let me know if you",
2493
+ $E03: "internal APIs",
2494
+ $E04: "session cookies",
2495
+ // BlockRun model aliases (common in prompts)
2496
+ $M01: "blockrun/",
2497
+ $M02: "openai/",
2498
+ $M03: "anthropic/",
2499
+ $M04: "google/",
2500
+ $M05: "xai/"
2501
+ };
2502
+ function getInverseCodebook() {
2503
+ const inverse = {};
2504
+ for (const [code, phrase] of Object.entries(STATIC_CODEBOOK)) {
2505
+ inverse[phrase] = code;
2506
+ }
2507
+ return inverse;
2508
+ }
2509
+ function generateCodebookHeader(usedCodes, pathMap = {}) {
2510
+ if (usedCodes.size === 0 && Object.keys(pathMap).length === 0) {
2511
+ return "";
2512
+ }
2513
+ const parts = [];
2514
+ if (usedCodes.size > 0) {
2515
+ const codeEntries = Array.from(usedCodes).map((code) => `${code}=${STATIC_CODEBOOK[code]}`).join(", ");
2516
+ parts.push(`[Dict: ${codeEntries}]`);
2517
+ }
2518
+ if (Object.keys(pathMap).length > 0) {
2519
+ const pathEntries = Object.entries(pathMap).map(([code, path]) => `${code}=${path}`).join(", ");
2520
+ parts.push(`[Paths: ${pathEntries}]`);
2521
+ }
2522
+ return parts.join("\n");
2523
+ }
2524
+
2525
+ // src/compression/layers/dictionary.ts
2526
+ function encodeContent(content, inverseCodebook) {
2527
+ let encoded = content;
2528
+ let substitutions = 0;
2529
+ let charsSaved = 0;
2530
+ const codes = /* @__PURE__ */ new Set();
2531
+ const phrases = Object.keys(inverseCodebook).sort((a, b) => b.length - a.length);
2532
+ for (const phrase of phrases) {
2533
+ const code = inverseCodebook[phrase];
2534
+ const regex = new RegExp(escapeRegex(phrase), "g");
2535
+ const matches = encoded.match(regex);
2536
+ if (matches && matches.length > 0) {
2537
+ encoded = encoded.replace(regex, code);
2538
+ substitutions += matches.length;
2539
+ charsSaved += matches.length * (phrase.length - code.length);
2540
+ codes.add(code);
2541
+ }
2542
+ }
2543
+ return { encoded, substitutions, codes, charsSaved };
2544
+ }
2545
+ function escapeRegex(str) {
2546
+ return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
2547
+ }
2548
+ function encodeMessages(messages) {
2549
+ const inverseCodebook = getInverseCodebook();
2550
+ let totalSubstitutions = 0;
2551
+ let totalCharsSaved = 0;
2552
+ const allUsedCodes = /* @__PURE__ */ new Set();
2553
+ const result = messages.map((message) => {
2554
+ if (!message.content) return message;
2555
+ const { encoded, substitutions, codes, charsSaved } = encodeContent(
2556
+ message.content,
2557
+ inverseCodebook
2558
+ );
2559
+ totalSubstitutions += substitutions;
2560
+ totalCharsSaved += charsSaved;
2561
+ codes.forEach((code) => allUsedCodes.add(code));
2562
+ return {
2563
+ ...message,
2564
+ content: encoded
2565
+ };
2566
+ });
2567
+ return {
2568
+ messages: result,
2569
+ substitutionCount: totalSubstitutions,
2570
+ usedCodes: allUsedCodes,
2571
+ charsSaved: totalCharsSaved
2572
+ };
2573
+ }
2574
+
2575
+ // src/compression/layers/paths.ts
2576
+ var PATH_REGEX = /(?:\/[\w.-]+){3,}/g;
2577
+ function extractPaths(messages) {
2578
+ const paths = [];
2579
+ for (const message of messages) {
2580
+ if (!message.content) continue;
2581
+ const matches = message.content.match(PATH_REGEX);
2582
+ if (matches) {
2583
+ paths.push(...matches);
2584
+ }
2585
+ }
2586
+ return paths;
2587
+ }
2588
+ function findFrequentPrefixes(paths) {
2589
+ const prefixCounts = /* @__PURE__ */ new Map();
2590
+ for (const path of paths) {
2591
+ const parts = path.split("/").filter(Boolean);
2592
+ for (let i = 2; i < parts.length; i++) {
2593
+ const prefix = "/" + parts.slice(0, i).join("/") + "/";
2594
+ prefixCounts.set(prefix, (prefixCounts.get(prefix) || 0) + 1);
2595
+ }
2596
+ }
2597
+ return Array.from(prefixCounts.entries()).filter(([, count]) => count >= 3).sort((a, b) => b[0].length - a[0].length).slice(0, 5).map(([prefix]) => prefix);
2598
+ }
2599
+ function shortenPaths(messages) {
2600
+ const allPaths = extractPaths(messages);
2601
+ if (allPaths.length < 5) {
2602
+ return {
2603
+ messages,
2604
+ pathMap: {},
2605
+ charsSaved: 0
2606
+ };
2607
+ }
2608
+ const prefixes = findFrequentPrefixes(allPaths);
2609
+ if (prefixes.length === 0) {
2610
+ return {
2611
+ messages,
2612
+ pathMap: {},
2613
+ charsSaved: 0
2614
+ };
2615
+ }
2616
+ const pathMap = {};
2617
+ prefixes.forEach((prefix, i) => {
2618
+ pathMap[`$P${i + 1}`] = prefix;
2619
+ });
2620
+ let charsSaved = 0;
2621
+ const result = messages.map((message) => {
2622
+ if (!message.content) return message;
2623
+ let content = message.content;
2624
+ const originalLength = content.length;
2625
+ for (const [code, prefix] of Object.entries(pathMap)) {
2626
+ content = content.split(prefix).join(code + "/");
2627
+ }
2628
+ charsSaved += originalLength - content.length;
2629
+ return {
2630
+ ...message,
2631
+ content
2632
+ };
2633
+ });
2634
+ return {
2635
+ messages: result,
2636
+ pathMap,
2637
+ charsSaved
2638
+ };
2639
+ }
2640
+
2641
+ // src/compression/layers/json-compact.ts
2642
+ function compactJson(jsonString) {
2643
+ try {
2644
+ const parsed = JSON.parse(jsonString);
2645
+ return JSON.stringify(parsed);
2646
+ } catch {
2647
+ return jsonString;
2648
+ }
2649
+ }
2650
+ function looksLikeJson(str) {
2651
+ const trimmed = str.trim();
2652
+ return trimmed.startsWith("{") && trimmed.endsWith("}") || trimmed.startsWith("[") && trimmed.endsWith("]");
2653
+ }
2654
+ function compactToolCalls(toolCalls) {
2655
+ return toolCalls.map((tc) => ({
2656
+ ...tc,
2657
+ function: {
2658
+ ...tc.function,
2659
+ arguments: compactJson(tc.function.arguments)
2660
+ }
2661
+ }));
2662
+ }
2663
+ function compactMessagesJson(messages) {
2664
+ let charsSaved = 0;
2665
+ const result = messages.map((message) => {
2666
+ const newMessage = { ...message };
2667
+ if (message.tool_calls && message.tool_calls.length > 0) {
2668
+ const originalLength = JSON.stringify(message.tool_calls).length;
2669
+ newMessage.tool_calls = compactToolCalls(message.tool_calls);
2670
+ const newLength = JSON.stringify(newMessage.tool_calls).length;
2671
+ charsSaved += originalLength - newLength;
2672
+ }
2673
+ if (message.role === "tool" && message.content && looksLikeJson(message.content)) {
2674
+ const originalLength = message.content.length;
2675
+ const compacted = compactJson(message.content);
2676
+ charsSaved += originalLength - compacted.length;
2677
+ newMessage.content = compacted;
2678
+ }
2679
+ return newMessage;
2680
+ });
2681
+ return {
2682
+ messages: result,
2683
+ charsSaved
2684
+ };
2685
+ }
2686
+
2687
+ // src/compression/layers/observation.ts
2688
+ var TOOL_RESULT_THRESHOLD = 500;
2689
+ var COMPRESSED_RESULT_MAX = 300;
2690
+ function compressToolResult(content) {
2691
+ if (!content || content.length <= TOOL_RESULT_THRESHOLD) {
2692
+ return content;
2693
+ }
2694
+ const lines = content.split("\n").map((l) => l.trim()).filter(Boolean);
2695
+ const errorLines = lines.filter(
2696
+ (l) => /error|exception|failed|denied|refused|timeout|invalid/i.test(l) && l.length < 200
2697
+ );
2698
+ const statusLines = lines.filter(
2699
+ (l) => /success|complete|created|updated|found|result|status|total|count/i.test(l) && l.length < 150
2700
+ );
2701
+ const jsonMatches = [];
2702
+ const jsonPattern = /"(id|name|status|error|message|count|total|url|path)":\s*"?([^",}\n]+)"?/gi;
2703
+ let match;
2704
+ while ((match = jsonPattern.exec(content)) !== null) {
2705
+ jsonMatches.push(`${match[1]}: ${match[2].slice(0, 50)}`);
2706
+ }
2707
+ const firstLine = lines[0]?.slice(0, 100);
2708
+ const lastLine = lines.length > 1 ? lines[lines.length - 1]?.slice(0, 100) : "";
2709
+ const parts = [];
2710
+ if (errorLines.length > 0) {
2711
+ parts.push("[ERR] " + errorLines.slice(0, 3).join(" | "));
2712
+ }
2713
+ if (statusLines.length > 0) {
2714
+ parts.push(statusLines.slice(0, 3).join(" | "));
2715
+ }
2716
+ if (jsonMatches.length > 0) {
2717
+ parts.push(jsonMatches.slice(0, 5).join(", "));
2718
+ }
2719
+ if (parts.length === 0) {
2720
+ parts.push(firstLine || "");
2721
+ if (lines.length > 2) {
2722
+ parts.push(`[...${lines.length - 2} lines...]`);
2723
+ }
2724
+ if (lastLine && lastLine !== firstLine) {
2725
+ parts.push(lastLine);
2726
+ }
2727
+ }
2728
+ let result = parts.join("\n");
2729
+ if (result.length > COMPRESSED_RESULT_MAX) {
2730
+ result = result.slice(0, COMPRESSED_RESULT_MAX - 20) + "\n[...truncated]";
2731
+ }
2732
+ return result;
2733
+ }
2734
+ function deduplicateLargeBlocks(messages) {
2735
+ const blockHashes = /* @__PURE__ */ new Map();
2736
+ let charsSaved = 0;
2737
+ const result = messages.map((msg, idx) => {
2738
+ if (!msg.content || msg.content.length < 500) {
2739
+ return msg;
2740
+ }
2741
+ const blockKey = msg.content.slice(0, 200);
2742
+ if (blockHashes.has(blockKey)) {
2743
+ const firstIdx = blockHashes.get(blockKey);
2744
+ const original = msg.content;
2745
+ const compressed = `[See message #${firstIdx + 1} - same content]`;
2746
+ charsSaved += original.length - compressed.length;
2747
+ return { ...msg, content: compressed };
2748
+ }
2749
+ blockHashes.set(blockKey, idx);
2750
+ return msg;
2751
+ });
2752
+ return { messages: result, charsSaved };
2753
+ }
2754
+ function compressObservations(messages) {
2755
+ let charsSaved = 0;
2756
+ let observationsCompressed = 0;
2757
+ let result = messages.map((msg) => {
2758
+ if (msg.role !== "tool" || !msg.content) {
2759
+ return msg;
2760
+ }
2761
+ const original = msg.content;
2762
+ if (original.length <= TOOL_RESULT_THRESHOLD) {
2763
+ return msg;
2764
+ }
2765
+ const compressed = compressToolResult(original);
2766
+ const saved = original.length - compressed.length;
2767
+ if (saved > 50) {
2768
+ charsSaved += saved;
2769
+ observationsCompressed++;
2770
+ return { ...msg, content: compressed };
2771
+ }
2772
+ return msg;
2773
+ });
2774
+ const dedupResult = deduplicateLargeBlocks(result);
2775
+ result = dedupResult.messages;
2776
+ charsSaved += dedupResult.charsSaved;
2777
+ return {
2778
+ messages: result,
2779
+ charsSaved,
2780
+ observationsCompressed
2781
+ };
2782
+ }
2783
+
2784
+ // src/compression/layers/dynamic-codebook.ts
2785
+ var MIN_PHRASE_LENGTH = 20;
2786
+ var MAX_PHRASE_LENGTH = 200;
2787
+ var MIN_FREQUENCY = 3;
2788
+ var MAX_ENTRIES = 100;
2789
+ var CODE_PREFIX = "$D";
2790
+ function findRepeatedPhrases(allContent) {
2791
+ const phrases = /* @__PURE__ */ new Map();
2792
+ const segments = allContent.split(/(?<=[.!?\n])\s+/);
2793
+ for (const segment of segments) {
2794
+ const trimmed = segment.trim();
2795
+ if (trimmed.length >= MIN_PHRASE_LENGTH && trimmed.length <= MAX_PHRASE_LENGTH) {
2796
+ phrases.set(trimmed, (phrases.get(trimmed) || 0) + 1);
2797
+ }
2798
+ }
2799
+ const lines = allContent.split("\n");
2800
+ for (const line of lines) {
2801
+ const trimmed = line.trim();
2802
+ if (trimmed.length >= MIN_PHRASE_LENGTH && trimmed.length <= MAX_PHRASE_LENGTH) {
2803
+ phrases.set(trimmed, (phrases.get(trimmed) || 0) + 1);
2804
+ }
2805
+ }
2806
+ return phrases;
2807
+ }
2808
+ function buildDynamicCodebook(messages) {
2809
+ let allContent = "";
2810
+ for (const msg of messages) {
2811
+ if (msg.content) {
2812
+ allContent += msg.content + "\n";
2813
+ }
2814
+ }
2815
+ const phrases = findRepeatedPhrases(allContent);
2816
+ const candidates = [];
2817
+ for (const [phrase, count] of phrases.entries()) {
2818
+ if (count >= MIN_FREQUENCY) {
2819
+ const codeLength = 4;
2820
+ const savings = (phrase.length - codeLength) * count;
2821
+ if (savings > 50) {
2822
+ candidates.push({ phrase, count, savings });
2823
+ }
2824
+ }
2825
+ }
2826
+ candidates.sort((a, b) => b.savings - a.savings);
2827
+ const topCandidates = candidates.slice(0, MAX_ENTRIES);
2828
+ const codebook = {};
2829
+ topCandidates.forEach((c, i) => {
2830
+ const code = `${CODE_PREFIX}${String(i + 1).padStart(2, "0")}`;
2831
+ codebook[code] = c.phrase;
2832
+ });
2833
+ return codebook;
2834
+ }
2835
+ function escapeRegex2(str) {
2836
+ return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
2837
+ }
2838
+ function applyDynamicCodebook(messages) {
2839
+ const codebook = buildDynamicCodebook(messages);
2840
+ if (Object.keys(codebook).length === 0) {
2841
+ return {
2842
+ messages,
2843
+ charsSaved: 0,
2844
+ dynamicCodes: {},
2845
+ substitutions: 0
2846
+ };
2847
+ }
2848
+ const phraseToCode = {};
2849
+ for (const [code, phrase] of Object.entries(codebook)) {
2850
+ phraseToCode[phrase] = code;
2851
+ }
2852
+ const sortedPhrases = Object.keys(phraseToCode).sort((a, b) => b.length - a.length);
2853
+ let charsSaved = 0;
2854
+ let substitutions = 0;
2855
+ const result = messages.map((msg) => {
2856
+ if (!msg.content) return msg;
2857
+ let content = msg.content;
2858
+ for (const phrase of sortedPhrases) {
2859
+ const code = phraseToCode[phrase];
2860
+ const regex = new RegExp(escapeRegex2(phrase), "g");
2861
+ const matches = content.match(regex);
2862
+ if (matches) {
2863
+ content = content.replace(regex, code);
2864
+ charsSaved += (phrase.length - code.length) * matches.length;
2865
+ substitutions += matches.length;
2866
+ }
2867
+ }
2868
+ return { ...msg, content };
2869
+ });
2870
+ return {
2871
+ messages: result,
2872
+ charsSaved,
2873
+ dynamicCodes: codebook,
2874
+ substitutions
2875
+ };
2876
+ }
2877
+ function generateDynamicCodebookHeader(codebook) {
2878
+ if (Object.keys(codebook).length === 0) return "";
2879
+ const entries = Object.entries(codebook).slice(0, 20).map(([code, phrase]) => {
2880
+ const displayPhrase = phrase.length > 40 ? phrase.slice(0, 37) + "..." : phrase;
2881
+ return `${code}=${displayPhrase}`;
2882
+ }).join(", ");
2883
+ return `[DynDict: ${entries}]`;
2884
+ }
2885
+
2886
+ // src/compression/index.ts
2887
+ function calculateTotalChars(messages) {
2888
+ return messages.reduce((total, msg) => {
2889
+ let chars = msg.content?.length || 0;
2890
+ if (msg.tool_calls) {
2891
+ chars += JSON.stringify(msg.tool_calls).length;
2892
+ }
2893
+ return total + chars;
2894
+ }, 0);
2895
+ }
2896
+ function cloneMessages(messages) {
2897
+ return JSON.parse(JSON.stringify(messages));
2898
+ }
2899
+ function prependCodebookHeader(messages, usedCodes, pathMap) {
2900
+ const header = generateCodebookHeader(usedCodes, pathMap);
2901
+ if (!header) return messages;
2902
+ const userIndex = messages.findIndex((m) => m.role === "user");
2903
+ if (userIndex === -1) {
2904
+ return [{ role: "system", content: header }, ...messages];
2905
+ }
2906
+ return messages.map((msg, i) => {
2907
+ if (i === userIndex) {
2908
+ return {
2909
+ ...msg,
2910
+ content: `${header}
2911
+
2912
+ ${msg.content || ""}`
2913
+ };
2914
+ }
2915
+ return msg;
2916
+ });
2917
+ }
2918
+ async function compressContext(messages, config = {}) {
2919
+ const fullConfig = {
2920
+ ...DEFAULT_COMPRESSION_CONFIG,
2921
+ ...config,
2922
+ layers: {
2923
+ ...DEFAULT_COMPRESSION_CONFIG.layers,
2924
+ ...config.layers
2925
+ },
2926
+ dictionary: {
2927
+ ...DEFAULT_COMPRESSION_CONFIG.dictionary,
2928
+ ...config.dictionary
2929
+ }
2930
+ };
2931
+ if (!fullConfig.enabled) {
2932
+ const originalChars2 = calculateTotalChars(messages);
2933
+ return {
2934
+ messages,
2935
+ originalMessages: messages,
2936
+ originalChars: originalChars2,
2937
+ compressedChars: originalChars2,
2938
+ compressionRatio: 1,
2939
+ stats: {
2940
+ duplicatesRemoved: 0,
2941
+ whitespaceSavedChars: 0,
2942
+ dictionarySubstitutions: 0,
2943
+ pathsShortened: 0,
2944
+ jsonCompactedChars: 0,
2945
+ observationsCompressed: 0,
2946
+ observationCharsSaved: 0,
2947
+ dynamicSubstitutions: 0,
2948
+ dynamicCharsSaved: 0
2949
+ },
2950
+ codebook: {},
2951
+ pathMap: {},
2952
+ dynamicCodes: {}
2953
+ };
2954
+ }
2955
+ const originalMessages = fullConfig.preserveRaw ? cloneMessages(messages) : messages;
2956
+ const originalChars = calculateTotalChars(messages);
2957
+ const stats = {
2958
+ duplicatesRemoved: 0,
2959
+ whitespaceSavedChars: 0,
2960
+ dictionarySubstitutions: 0,
2961
+ pathsShortened: 0,
2962
+ jsonCompactedChars: 0,
2963
+ observationsCompressed: 0,
2964
+ observationCharsSaved: 0,
2965
+ dynamicSubstitutions: 0,
2966
+ dynamicCharsSaved: 0
2967
+ };
2968
+ let result = cloneMessages(messages);
2969
+ let usedCodes = /* @__PURE__ */ new Set();
2970
+ let pathMap = {};
2971
+ let dynamicCodes = {};
2972
+ if (fullConfig.layers.deduplication) {
2973
+ const dedupResult = deduplicateMessages(result);
2974
+ result = dedupResult.messages;
2975
+ stats.duplicatesRemoved = dedupResult.duplicatesRemoved;
2976
+ }
2977
+ if (fullConfig.layers.whitespace) {
2978
+ const wsResult = normalizeMessagesWhitespace(result);
2979
+ result = wsResult.messages;
2980
+ stats.whitespaceSavedChars = wsResult.charsSaved;
2981
+ }
2982
+ if (fullConfig.layers.dictionary) {
2983
+ const dictResult = encodeMessages(result);
2984
+ result = dictResult.messages;
2985
+ stats.dictionarySubstitutions = dictResult.substitutionCount;
2986
+ usedCodes = dictResult.usedCodes;
2987
+ }
2988
+ if (fullConfig.layers.paths) {
2989
+ const pathResult = shortenPaths(result);
2990
+ result = pathResult.messages;
2991
+ pathMap = pathResult.pathMap;
2992
+ stats.pathsShortened = Object.keys(pathMap).length;
2993
+ }
2994
+ if (fullConfig.layers.jsonCompact) {
2995
+ const jsonResult = compactMessagesJson(result);
2996
+ result = jsonResult.messages;
2997
+ stats.jsonCompactedChars = jsonResult.charsSaved;
2998
+ }
2999
+ if (fullConfig.layers.observation) {
3000
+ const obsResult = compressObservations(result);
3001
+ result = obsResult.messages;
3002
+ stats.observationsCompressed = obsResult.observationsCompressed;
3003
+ stats.observationCharsSaved = obsResult.charsSaved;
3004
+ }
3005
+ if (fullConfig.layers.dynamicCodebook) {
3006
+ const dynResult = applyDynamicCodebook(result);
3007
+ result = dynResult.messages;
3008
+ stats.dynamicSubstitutions = dynResult.substitutions;
3009
+ stats.dynamicCharsSaved = dynResult.charsSaved;
3010
+ dynamicCodes = dynResult.dynamicCodes;
3011
+ }
3012
+ if (fullConfig.dictionary.includeCodebookHeader && (usedCodes.size > 0 || Object.keys(pathMap).length > 0 || Object.keys(dynamicCodes).length > 0)) {
3013
+ result = prependCodebookHeader(result, usedCodes, pathMap);
3014
+ if (Object.keys(dynamicCodes).length > 0) {
3015
+ const dynHeader = generateDynamicCodebookHeader(dynamicCodes);
3016
+ if (dynHeader) {
3017
+ const systemIndex = result.findIndex((m) => m.role === "system");
3018
+ if (systemIndex >= 0) {
3019
+ result[systemIndex] = {
3020
+ ...result[systemIndex],
3021
+ content: `${dynHeader}
3022
+ ${result[systemIndex].content || ""}`
3023
+ };
3024
+ }
3025
+ }
3026
+ }
3027
+ }
3028
+ const compressedChars = calculateTotalChars(result);
3029
+ const compressionRatio = compressedChars / originalChars;
3030
+ const usedCodebook = {};
3031
+ usedCodes.forEach((code) => {
3032
+ usedCodebook[code] = STATIC_CODEBOOK[code];
3033
+ });
3034
+ return {
3035
+ messages: result,
3036
+ originalMessages,
3037
+ originalChars,
3038
+ compressedChars,
3039
+ compressionRatio,
3040
+ stats,
3041
+ codebook: usedCodebook,
3042
+ pathMap,
3043
+ dynamicCodes
3044
+ };
3045
+ }
3046
+ function shouldCompress(messages) {
3047
+ const chars = calculateTotalChars(messages);
3048
+ return chars > 5e3;
3049
+ }
3050
+
2311
3051
  // src/session.ts
2312
3052
  var DEFAULT_SESSION_CONFIG = {
2313
3053
  enabled: false,
@@ -2503,7 +3243,7 @@ var ROUTING_PROFILES = /* @__PURE__ */ new Set([
2503
3243
  var FREE_MODEL = "nvidia/gpt-oss-120b";
2504
3244
  var HEARTBEAT_INTERVAL_MS = 2e3;
2505
3245
  var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
2506
- var MAX_FALLBACK_ATTEMPTS = 3;
3246
+ var MAX_FALLBACK_ATTEMPTS = 5;
2507
3247
  var HEALTH_CHECK_TIMEOUT_MS = 2e3;
2508
3248
  var RATE_LIMIT_COOLDOWN_MS = 6e4;
2509
3249
  var PORT_RETRY_ATTEMPTS = 5;
@@ -2637,7 +3377,10 @@ var PROVIDER_ERROR_PATTERNS = [
2637
3377
  /overloaded/i,
2638
3378
  /temporarily.*unavailable/i,
2639
3379
  /api.*key.*invalid/i,
2640
- /authentication.*failed/i
3380
+ /authentication.*failed/i,
3381
+ /request too large/i,
3382
+ /request.*size.*exceeds/i,
3383
+ /payload too large/i
2641
3384
  ];
2642
3385
  var FALLBACK_STATUS_CODES = [
2643
3386
  400,
@@ -3249,7 +3992,9 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
3249
3992
  const tools = parsed.tools;
3250
3993
  const hasTools = Array.isArray(tools) && tools.length > 0;
3251
3994
  if (hasTools) {
3252
- console.log(`[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`);
3995
+ console.log(
3996
+ `[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`
3997
+ );
3253
3998
  }
3254
3999
  routingDecision = route(prompt, systemPrompt, maxTokens, {
3255
4000
  ...routerOpts,
@@ -3277,6 +4022,87 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
3277
4022
  options.onError?.(new Error(`Routing failed: ${errorMsg}`));
3278
4023
  }
3279
4024
  }
4025
+ const autoCompress = options.autoCompressRequests ?? true;
4026
+ const compressionThreshold = options.compressionThresholdKB ?? 180;
4027
+ const sizeLimit = options.maxRequestSizeKB ?? 200;
4028
+ const requestSizeKB = Math.ceil(body.length / 1024);
4029
+ if (autoCompress && requestSizeKB > compressionThreshold) {
4030
+ try {
4031
+ console.log(
4032
+ `[ClawRouter] Request size ${requestSizeKB}KB exceeds threshold ${compressionThreshold}KB, applying compression...`
4033
+ );
4034
+ const parsed = JSON.parse(body.toString());
4035
+ if (parsed.messages && parsed.messages.length > 0 && shouldCompress(parsed.messages)) {
4036
+ const compressionResult = await compressContext(parsed.messages, {
4037
+ enabled: true,
4038
+ preserveRaw: false,
4039
+ // Don't need originals in proxy
4040
+ layers: {
4041
+ deduplication: true,
4042
+ // Safe: removes duplicate messages
4043
+ whitespace: true,
4044
+ // Safe: normalizes whitespace
4045
+ dictionary: false,
4046
+ // Disabled: requires model to understand codebook
4047
+ paths: false,
4048
+ // Disabled: requires model to understand path codes
4049
+ jsonCompact: true,
4050
+ // Safe: just removes JSON whitespace
4051
+ observation: false,
4052
+ // Disabled: may lose important context
4053
+ dynamicCodebook: false
4054
+ // Disabled: requires model to understand codes
4055
+ },
4056
+ dictionary: {
4057
+ maxEntries: 50,
4058
+ minPhraseLength: 15,
4059
+ includeCodebookHeader: false
4060
+ }
4061
+ });
4062
+ const compressedSizeKB = Math.ceil(compressionResult.compressedChars / 1024);
4063
+ const savings = ((requestSizeKB - compressedSizeKB) / requestSizeKB * 100).toFixed(1);
4064
+ console.log(
4065
+ `[ClawRouter] Compressed ${requestSizeKB}KB \u2192 ${compressedSizeKB}KB (${savings}% reduction)`
4066
+ );
4067
+ parsed.messages = compressionResult.messages;
4068
+ body = Buffer.from(JSON.stringify(parsed));
4069
+ if (compressedSizeKB > sizeLimit) {
4070
+ const errorMsg = {
4071
+ error: {
4072
+ message: `Request size ${compressedSizeKB}KB still exceeds limit after compression (original: ${requestSizeKB}KB). Please reduce context size.`,
4073
+ type: "request_too_large",
4074
+ original_size_kb: requestSizeKB,
4075
+ compressed_size_kb: compressedSizeKB,
4076
+ limit_kb: sizeLimit,
4077
+ help: "Try: 1) Remove old messages from history, 2) Summarize large tool results, 3) Use direct API for very large contexts"
4078
+ }
4079
+ };
4080
+ res.writeHead(413, { "Content-Type": "application/json" });
4081
+ res.end(JSON.stringify(errorMsg));
4082
+ return;
4083
+ }
4084
+ }
4085
+ } catch (err) {
4086
+ console.warn(
4087
+ `[ClawRouter] Compression failed: ${err instanceof Error ? err.message : String(err)}`
4088
+ );
4089
+ }
4090
+ }
4091
+ const finalSizeKB = Math.ceil(body.length / 1024);
4092
+ if (finalSizeKB > sizeLimit) {
4093
+ const errorMsg = {
4094
+ error: {
4095
+ message: `Request size ${finalSizeKB}KB exceeds limit ${sizeLimit}KB. Please reduce context size.`,
4096
+ type: "request_too_large",
4097
+ size_kb: finalSizeKB,
4098
+ limit_kb: sizeLimit,
4099
+ help: "Try: 1) Remove old messages from history, 2) Summarize large tool results, 3) Enable compression (autoCompressRequests: true)"
4100
+ }
4101
+ };
4102
+ res.writeHead(413, { "Content-Type": "application/json" });
4103
+ res.end(JSON.stringify(errorMsg));
4104
+ return;
4105
+ }
3280
4106
  const dedupKey = RequestDeduplicator.hash(body);
3281
4107
  const cached = deduplicator.getCached(dedupKey);
3282
4108
  if (cached) {
@@ -3940,12 +4766,7 @@ function injectModelsConfig(logger) {
3940
4766
  { id: "gemini", alias: "gemini" },
3941
4767
  { id: "flash", alias: "flash" }
3942
4768
  ];
3943
- const DEPRECATED_ALIASES = [
3944
- "blockrun/nvidia",
3945
- "blockrun/gpt",
3946
- "blockrun/o3",
3947
- "blockrun/grok"
3948
- ];
4769
+ const DEPRECATED_ALIASES = ["blockrun/nvidia", "blockrun/gpt", "blockrun/o3", "blockrun/grok"];
3949
4770
  if (!defaults.models) {
3950
4771
  defaults.models = {};
3951
4772
  needsWrite = true;