@blockrun/clawrouter 0.9.9 → 0.9.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js DELETED
@@ -1,4803 +0,0 @@
1
- #!/usr/bin/env node
2
-
3
- // src/proxy.ts
4
- import { createServer } from "http";
5
- import { finished } from "stream";
6
- import { privateKeyToAccount as privateKeyToAccount2 } from "viem/accounts";
7
-
8
- // src/x402.ts
9
- import { signTypedData, privateKeyToAccount } from "viem/accounts";
10
-
11
- // src/payment-cache.ts
12
- var DEFAULT_TTL_MS = 36e5;
13
- var PaymentCache = class {
14
- cache = /* @__PURE__ */ new Map();
15
- ttlMs;
16
- constructor(ttlMs = DEFAULT_TTL_MS) {
17
- this.ttlMs = ttlMs;
18
- }
19
- /** Get cached payment params for an endpoint path. */
20
- get(endpointPath) {
21
- const entry = this.cache.get(endpointPath);
22
- if (!entry) return void 0;
23
- if (Date.now() - entry.cachedAt > this.ttlMs) {
24
- this.cache.delete(endpointPath);
25
- return void 0;
26
- }
27
- return entry;
28
- }
29
- /** Cache payment params from a 402 response. */
30
- set(endpointPath, params) {
31
- this.cache.set(endpointPath, { ...params, cachedAt: Date.now() });
32
- }
33
- /** Invalidate cache for an endpoint (e.g., if payTo changed). */
34
- invalidate(endpointPath) {
35
- this.cache.delete(endpointPath);
36
- }
37
- };
38
-
39
- // src/x402.ts
40
- var BASE_CHAIN_ID = 8453;
41
- var USDC_BASE = "0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913";
42
- var USDC_DOMAIN = {
43
- name: "USD Coin",
44
- version: "2",
45
- chainId: BASE_CHAIN_ID,
46
- verifyingContract: USDC_BASE
47
- };
48
- var TRANSFER_TYPES = {
49
- TransferWithAuthorization: [
50
- { name: "from", type: "address" },
51
- { name: "to", type: "address" },
52
- { name: "value", type: "uint256" },
53
- { name: "validAfter", type: "uint256" },
54
- { name: "validBefore", type: "uint256" },
55
- { name: "nonce", type: "bytes32" }
56
- ]
57
- };
58
- function createNonce() {
59
- const bytes = new Uint8Array(32);
60
- crypto.getRandomValues(bytes);
61
- return `0x${Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("")}`;
62
- }
63
- function parsePaymentRequired(headerValue) {
64
- const decoded = atob(headerValue);
65
- return JSON.parse(decoded);
66
- }
67
- async function createPaymentPayload(privateKey, fromAddress, recipient, amount, resourceUrl) {
68
- const now = Math.floor(Date.now() / 1e3);
69
- const validAfter = now - 600;
70
- const validBefore = now + 300;
71
- const nonce = createNonce();
72
- const signature = await signTypedData({
73
- privateKey,
74
- domain: USDC_DOMAIN,
75
- types: TRANSFER_TYPES,
76
- primaryType: "TransferWithAuthorization",
77
- message: {
78
- from: fromAddress,
79
- to: recipient,
80
- value: BigInt(amount),
81
- validAfter: BigInt(validAfter),
82
- validBefore: BigInt(validBefore),
83
- nonce
84
- }
85
- });
86
- const paymentData = {
87
- x402Version: 2,
88
- resource: {
89
- url: resourceUrl,
90
- description: "BlockRun AI API call",
91
- mimeType: "application/json"
92
- },
93
- accepted: {
94
- scheme: "exact",
95
- network: "eip155:8453",
96
- amount,
97
- asset: USDC_BASE,
98
- payTo: recipient,
99
- maxTimeoutSeconds: 300,
100
- extra: { name: "USD Coin", version: "2" }
101
- },
102
- payload: {
103
- signature,
104
- authorization: {
105
- from: fromAddress,
106
- to: recipient,
107
- value: amount,
108
- validAfter: validAfter.toString(),
109
- validBefore: validBefore.toString(),
110
- nonce
111
- }
112
- },
113
- extensions: {}
114
- };
115
- return btoa(JSON.stringify(paymentData));
116
- }
117
- function createPaymentFetch(privateKey) {
118
- const account = privateKeyToAccount(privateKey);
119
- const walletAddress = account.address;
120
- const paymentCache = new PaymentCache();
121
- const payFetch = async (input, init, preAuth) => {
122
- const url = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
123
- const endpointPath = new URL(url).pathname;
124
- const cached = paymentCache.get(endpointPath);
125
- if (cached && preAuth?.estimatedAmount) {
126
- const paymentPayload = await createPaymentPayload(
127
- privateKey,
128
- walletAddress,
129
- cached.payTo,
130
- preAuth.estimatedAmount,
131
- url
132
- );
133
- const preAuthHeaders = new Headers(init?.headers);
134
- preAuthHeaders.set("payment-signature", paymentPayload);
135
- const response2 = await fetch(input, { ...init, headers: preAuthHeaders });
136
- if (response2.status !== 402) {
137
- return response2;
138
- }
139
- const paymentHeader2 = response2.headers.get("x-payment-required");
140
- if (paymentHeader2) {
141
- return handle402(input, init, url, endpointPath, paymentHeader2);
142
- }
143
- paymentCache.invalidate(endpointPath);
144
- const cleanResponse = await fetch(input, init);
145
- if (cleanResponse.status !== 402) {
146
- return cleanResponse;
147
- }
148
- const cleanHeader = cleanResponse.headers.get("x-payment-required");
149
- if (!cleanHeader) {
150
- throw new Error("402 response missing x-payment-required header");
151
- }
152
- return handle402(input, init, url, endpointPath, cleanHeader);
153
- }
154
- const response = await fetch(input, init);
155
- if (response.status !== 402) {
156
- return response;
157
- }
158
- const paymentHeader = response.headers.get("x-payment-required");
159
- if (!paymentHeader) {
160
- throw new Error("402 response missing x-payment-required header");
161
- }
162
- return handle402(input, init, url, endpointPath, paymentHeader);
163
- };
164
- async function handle402(input, init, url, endpointPath, paymentHeader) {
165
- const paymentRequired = parsePaymentRequired(paymentHeader);
166
- const option = paymentRequired.accepts?.[0];
167
- if (!option) {
168
- throw new Error("No payment options in 402 response");
169
- }
170
- const amount = option.amount || option.maxAmountRequired;
171
- if (!amount) {
172
- throw new Error("No amount in payment requirements");
173
- }
174
- paymentCache.set(endpointPath, {
175
- payTo: option.payTo,
176
- asset: option.asset,
177
- scheme: option.scheme,
178
- network: option.network,
179
- extra: option.extra,
180
- maxTimeoutSeconds: option.maxTimeoutSeconds
181
- });
182
- const paymentPayload = await createPaymentPayload(
183
- privateKey,
184
- walletAddress,
185
- option.payTo,
186
- amount,
187
- url
188
- );
189
- const retryHeaders = new Headers(init?.headers);
190
- retryHeaders.set("payment-signature", paymentPayload);
191
- return fetch(input, {
192
- ...init,
193
- headers: retryHeaders
194
- });
195
- }
196
- return { fetch: payFetch, cache: paymentCache };
197
- }
198
-
199
- // src/router/rules.ts
200
- function scoreTokenCount(estimatedTokens, thresholds) {
201
- if (estimatedTokens < thresholds.simple) {
202
- return { name: "tokenCount", score: -1, signal: `short (${estimatedTokens} tokens)` };
203
- }
204
- if (estimatedTokens > thresholds.complex) {
205
- return { name: "tokenCount", score: 1, signal: `long (${estimatedTokens} tokens)` };
206
- }
207
- return { name: "tokenCount", score: 0, signal: null };
208
- }
209
- function scoreKeywordMatch(text, keywords, name, signalLabel, thresholds, scores) {
210
- const matches = keywords.filter((kw) => text.includes(kw.toLowerCase()));
211
- if (matches.length >= thresholds.high) {
212
- return {
213
- name,
214
- score: scores.high,
215
- signal: `${signalLabel} (${matches.slice(0, 3).join(", ")})`
216
- };
217
- }
218
- if (matches.length >= thresholds.low) {
219
- return {
220
- name,
221
- score: scores.low,
222
- signal: `${signalLabel} (${matches.slice(0, 3).join(", ")})`
223
- };
224
- }
225
- return { name, score: scores.none, signal: null };
226
- }
227
- function scoreMultiStep(text) {
228
- const patterns = [/first.*then/i, /step \d/i, /\d\.\s/];
229
- const hits = patterns.filter((p) => p.test(text));
230
- if (hits.length > 0) {
231
- return { name: "multiStepPatterns", score: 0.5, signal: "multi-step" };
232
- }
233
- return { name: "multiStepPatterns", score: 0, signal: null };
234
- }
235
- function scoreQuestionComplexity(prompt) {
236
- const count = (prompt.match(/\?/g) || []).length;
237
- if (count > 3) {
238
- return { name: "questionComplexity", score: 0.5, signal: `${count} questions` };
239
- }
240
- return { name: "questionComplexity", score: 0, signal: null };
241
- }
242
- function scoreAgenticTask(text, keywords) {
243
- let matchCount = 0;
244
- const signals = [];
245
- for (const keyword of keywords) {
246
- if (text.includes(keyword.toLowerCase())) {
247
- matchCount++;
248
- if (signals.length < 3) {
249
- signals.push(keyword);
250
- }
251
- }
252
- }
253
- if (matchCount >= 4) {
254
- return {
255
- dimensionScore: {
256
- name: "agenticTask",
257
- score: 1,
258
- signal: `agentic (${signals.join(", ")})`
259
- },
260
- agenticScore: 1
261
- };
262
- } else if (matchCount >= 3) {
263
- return {
264
- dimensionScore: {
265
- name: "agenticTask",
266
- score: 0.6,
267
- signal: `agentic (${signals.join(", ")})`
268
- },
269
- agenticScore: 0.6
270
- };
271
- } else if (matchCount >= 1) {
272
- return {
273
- dimensionScore: {
274
- name: "agenticTask",
275
- score: 0.2,
276
- signal: `agentic-light (${signals.join(", ")})`
277
- },
278
- agenticScore: 0.2
279
- };
280
- }
281
- return {
282
- dimensionScore: { name: "agenticTask", score: 0, signal: null },
283
- agenticScore: 0
284
- };
285
- }
286
- function classifyByRules(prompt, systemPrompt, estimatedTokens, config) {
287
- const text = `${systemPrompt ?? ""} ${prompt}`.toLowerCase();
288
- const userText = prompt.toLowerCase();
289
- const dimensions = [
290
- // Original 8 dimensions
291
- scoreTokenCount(estimatedTokens, config.tokenCountThresholds),
292
- scoreKeywordMatch(
293
- text,
294
- config.codeKeywords,
295
- "codePresence",
296
- "code",
297
- { low: 1, high: 2 },
298
- { none: 0, low: 0.5, high: 1 }
299
- ),
300
- // Reasoning markers use USER prompt only — system prompt "step by step" shouldn't trigger reasoning
301
- scoreKeywordMatch(
302
- userText,
303
- config.reasoningKeywords,
304
- "reasoningMarkers",
305
- "reasoning",
306
- { low: 1, high: 2 },
307
- { none: 0, low: 0.7, high: 1 }
308
- ),
309
- scoreKeywordMatch(
310
- text,
311
- config.technicalKeywords,
312
- "technicalTerms",
313
- "technical",
314
- { low: 2, high: 4 },
315
- { none: 0, low: 0.5, high: 1 }
316
- ),
317
- scoreKeywordMatch(
318
- text,
319
- config.creativeKeywords,
320
- "creativeMarkers",
321
- "creative",
322
- { low: 1, high: 2 },
323
- { none: 0, low: 0.5, high: 0.7 }
324
- ),
325
- scoreKeywordMatch(
326
- text,
327
- config.simpleKeywords,
328
- "simpleIndicators",
329
- "simple",
330
- { low: 1, high: 2 },
331
- { none: 0, low: -1, high: -1 }
332
- ),
333
- scoreMultiStep(text),
334
- scoreQuestionComplexity(prompt),
335
- // 6 new dimensions
336
- scoreKeywordMatch(
337
- text,
338
- config.imperativeVerbs,
339
- "imperativeVerbs",
340
- "imperative",
341
- { low: 1, high: 2 },
342
- { none: 0, low: 0.3, high: 0.5 }
343
- ),
344
- scoreKeywordMatch(
345
- text,
346
- config.constraintIndicators,
347
- "constraintCount",
348
- "constraints",
349
- { low: 1, high: 3 },
350
- { none: 0, low: 0.3, high: 0.7 }
351
- ),
352
- scoreKeywordMatch(
353
- text,
354
- config.outputFormatKeywords,
355
- "outputFormat",
356
- "format",
357
- { low: 1, high: 2 },
358
- { none: 0, low: 0.4, high: 0.7 }
359
- ),
360
- scoreKeywordMatch(
361
- text,
362
- config.referenceKeywords,
363
- "referenceComplexity",
364
- "references",
365
- { low: 1, high: 2 },
366
- { none: 0, low: 0.3, high: 0.5 }
367
- ),
368
- scoreKeywordMatch(
369
- text,
370
- config.negationKeywords,
371
- "negationComplexity",
372
- "negation",
373
- { low: 2, high: 3 },
374
- { none: 0, low: 0.3, high: 0.5 }
375
- ),
376
- scoreKeywordMatch(
377
- text,
378
- config.domainSpecificKeywords,
379
- "domainSpecificity",
380
- "domain-specific",
381
- { low: 1, high: 2 },
382
- { none: 0, low: 0.5, high: 0.8 }
383
- )
384
- ];
385
- const agenticResult = scoreAgenticTask(text, config.agenticTaskKeywords);
386
- dimensions.push(agenticResult.dimensionScore);
387
- const agenticScore = agenticResult.agenticScore;
388
- const signals = dimensions.filter((d) => d.signal !== null).map((d) => d.signal);
389
- const weights = config.dimensionWeights;
390
- let weightedScore = 0;
391
- for (const d of dimensions) {
392
- const w = weights[d.name] ?? 0;
393
- weightedScore += d.score * w;
394
- }
395
- const reasoningMatches = config.reasoningKeywords.filter(
396
- (kw) => userText.includes(kw.toLowerCase())
397
- );
398
- if (reasoningMatches.length >= 2) {
399
- const confidence2 = calibrateConfidence(
400
- Math.max(weightedScore, 0.3),
401
- // ensure positive for confidence calc
402
- config.confidenceSteepness
403
- );
404
- return {
405
- score: weightedScore,
406
- tier: "REASONING",
407
- confidence: Math.max(confidence2, 0.85),
408
- signals,
409
- agenticScore
410
- };
411
- }
412
- const { simpleMedium, mediumComplex, complexReasoning } = config.tierBoundaries;
413
- let tier;
414
- let distanceFromBoundary;
415
- if (weightedScore < simpleMedium) {
416
- tier = "SIMPLE";
417
- distanceFromBoundary = simpleMedium - weightedScore;
418
- } else if (weightedScore < mediumComplex) {
419
- tier = "MEDIUM";
420
- distanceFromBoundary = Math.min(weightedScore - simpleMedium, mediumComplex - weightedScore);
421
- } else if (weightedScore < complexReasoning) {
422
- tier = "COMPLEX";
423
- distanceFromBoundary = Math.min(
424
- weightedScore - mediumComplex,
425
- complexReasoning - weightedScore
426
- );
427
- } else {
428
- tier = "REASONING";
429
- distanceFromBoundary = weightedScore - complexReasoning;
430
- }
431
- const confidence = calibrateConfidence(distanceFromBoundary, config.confidenceSteepness);
432
- if (confidence < config.confidenceThreshold) {
433
- return { score: weightedScore, tier: null, confidence, signals, agenticScore };
434
- }
435
- return { score: weightedScore, tier, confidence, signals, agenticScore };
436
- }
437
- function calibrateConfidence(distance, steepness) {
438
- return 1 / (1 + Math.exp(-steepness * distance));
439
- }
440
-
441
- // src/router/selector.ts
442
- function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPricing, estimatedInputTokens, maxOutputTokens, routingProfile) {
443
- const tierConfig = tierConfigs[tier];
444
- const model = tierConfig.primary;
445
- const pricing = modelPricing.get(model);
446
- const inputPrice = pricing?.inputPrice ?? 0;
447
- const outputPrice = pricing?.outputPrice ?? 0;
448
- const inputCost = estimatedInputTokens / 1e6 * inputPrice;
449
- const outputCost = maxOutputTokens / 1e6 * outputPrice;
450
- const costEstimate = inputCost + outputCost;
451
- const opusPricing = modelPricing.get("anthropic/claude-opus-4.5");
452
- const opusInputPrice = opusPricing?.inputPrice ?? 0;
453
- const opusOutputPrice = opusPricing?.outputPrice ?? 0;
454
- const baselineInput = estimatedInputTokens / 1e6 * opusInputPrice;
455
- const baselineOutput = maxOutputTokens / 1e6 * opusOutputPrice;
456
- const baselineCost = baselineInput + baselineOutput;
457
- const savings = routingProfile === "premium" ? 0 : baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
458
- return {
459
- model,
460
- tier,
461
- confidence,
462
- method,
463
- reasoning,
464
- costEstimate,
465
- baselineCost,
466
- savings
467
- };
468
- }
469
- function getFallbackChain(tier, tierConfigs) {
470
- const config = tierConfigs[tier];
471
- return [config.primary, ...config.fallback];
472
- }
473
- function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens, routingProfile) {
474
- const pricing = modelPricing.get(model);
475
- const inputPrice = pricing?.inputPrice ?? 0;
476
- const outputPrice = pricing?.outputPrice ?? 0;
477
- const inputCost = estimatedInputTokens / 1e6 * inputPrice;
478
- const outputCost = maxOutputTokens / 1e6 * outputPrice;
479
- const costEstimate = inputCost + outputCost;
480
- const opusPricing = modelPricing.get("anthropic/claude-opus-4.5");
481
- const opusInputPrice = opusPricing?.inputPrice ?? 0;
482
- const opusOutputPrice = opusPricing?.outputPrice ?? 0;
483
- const baselineInput = estimatedInputTokens / 1e6 * opusInputPrice;
484
- const baselineOutput = maxOutputTokens / 1e6 * opusOutputPrice;
485
- const baselineCost = baselineInput + baselineOutput;
486
- const savings = routingProfile === "premium" ? 0 : baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
487
- return { costEstimate, baselineCost, savings };
488
- }
489
- function getFallbackChainFiltered(tier, tierConfigs, estimatedTotalTokens, getContextWindow) {
490
- const fullChain = getFallbackChain(tier, tierConfigs);
491
- const filtered = fullChain.filter((modelId) => {
492
- const contextWindow = getContextWindow(modelId);
493
- if (contextWindow === void 0) {
494
- return true;
495
- }
496
- return contextWindow >= estimatedTotalTokens * 1.1;
497
- });
498
- if (filtered.length === 0) {
499
- return fullChain;
500
- }
501
- return filtered;
502
- }
503
-
504
- // src/router/config.ts
505
- var DEFAULT_ROUTING_CONFIG = {
506
- version: "2.0",
507
- classifier: {
508
- llmModel: "google/gemini-2.5-flash",
509
- llmMaxTokens: 10,
510
- llmTemperature: 0,
511
- promptTruncationChars: 500,
512
- cacheTtlMs: 36e5
513
- // 1 hour
514
- },
515
- scoring: {
516
- tokenCountThresholds: { simple: 50, complex: 500 },
517
- // Multilingual keywords: English + Chinese (中文) + Japanese (日本語) + Russian (Русский) + German (Deutsch)
518
- codeKeywords: [
519
- // English
520
- "function",
521
- "class",
522
- "import",
523
- "def",
524
- "SELECT",
525
- "async",
526
- "await",
527
- "const",
528
- "let",
529
- "var",
530
- "return",
531
- "```",
532
- // Chinese
533
- "\u51FD\u6570",
534
- "\u7C7B",
535
- "\u5BFC\u5165",
536
- "\u5B9A\u4E49",
537
- "\u67E5\u8BE2",
538
- "\u5F02\u6B65",
539
- "\u7B49\u5F85",
540
- "\u5E38\u91CF",
541
- "\u53D8\u91CF",
542
- "\u8FD4\u56DE",
543
- // Japanese
544
- "\u95A2\u6570",
545
- "\u30AF\u30E9\u30B9",
546
- "\u30A4\u30F3\u30DD\u30FC\u30C8",
547
- "\u975E\u540C\u671F",
548
- "\u5B9A\u6570",
549
- "\u5909\u6570",
550
- // Russian
551
- "\u0444\u0443\u043D\u043A\u0446\u0438\u044F",
552
- "\u043A\u043B\u0430\u0441\u0441",
553
- "\u0438\u043C\u043F\u043E\u0440\u0442",
554
- "\u043E\u043F\u0440\u0435\u0434\u0435\u043B",
555
- "\u0437\u0430\u043F\u0440\u043E\u0441",
556
- "\u0430\u0441\u0438\u043D\u0445\u0440\u043E\u043D\u043D\u044B\u0439",
557
- "\u043E\u0436\u0438\u0434\u0430\u0442\u044C",
558
- "\u043A\u043E\u043D\u0441\u0442\u0430\u043D\u0442\u0430",
559
- "\u043F\u0435\u0440\u0435\u043C\u0435\u043D\u043D\u0430\u044F",
560
- "\u0432\u0435\u0440\u043D\u0443\u0442\u044C",
561
- // German
562
- "funktion",
563
- "klasse",
564
- "importieren",
565
- "definieren",
566
- "abfrage",
567
- "asynchron",
568
- "erwarten",
569
- "konstante",
570
- "variable",
571
- "zur\xFCckgeben"
572
- ],
573
- reasoningKeywords: [
574
- // English
575
- "prove",
576
- "theorem",
577
- "derive",
578
- "step by step",
579
- "chain of thought",
580
- "formally",
581
- "mathematical",
582
- "proof",
583
- "logically",
584
- // Chinese
585
- "\u8BC1\u660E",
586
- "\u5B9A\u7406",
587
- "\u63A8\u5BFC",
588
- "\u9010\u6B65",
589
- "\u601D\u7EF4\u94FE",
590
- "\u5F62\u5F0F\u5316",
591
- "\u6570\u5B66",
592
- "\u903B\u8F91",
593
- // Japanese
594
- "\u8A3C\u660E",
595
- "\u5B9A\u7406",
596
- "\u5C0E\u51FA",
597
- "\u30B9\u30C6\u30C3\u30D7\u30D0\u30A4\u30B9\u30C6\u30C3\u30D7",
598
- "\u8AD6\u7406\u7684",
599
- // Russian
600
- "\u0434\u043E\u043A\u0430\u0437\u0430\u0442\u044C",
601
- "\u0434\u043E\u043A\u0430\u0436\u0438",
602
- "\u0434\u043E\u043A\u0430\u0437\u0430\u0442\u0435\u043B\u044C\u0441\u0442\u0432",
603
- "\u0442\u0435\u043E\u0440\u0435\u043C\u0430",
604
- "\u0432\u044B\u0432\u0435\u0441\u0442\u0438",
605
- "\u0448\u0430\u0433 \u0437\u0430 \u0448\u0430\u0433\u043E\u043C",
606
- "\u043F\u043E\u0448\u0430\u0433\u043E\u0432\u043E",
607
- "\u043F\u043E\u044D\u0442\u0430\u043F\u043D\u043E",
608
- "\u0446\u0435\u043F\u043E\u0447\u043A\u0430 \u0440\u0430\u0441\u0441\u0443\u0436\u0434\u0435\u043D\u0438\u0439",
609
- "\u0440\u0430\u0441\u0441\u0443\u0436\u0434\u0435\u043D\u0438",
610
- "\u0444\u043E\u0440\u043C\u0430\u043B\u044C\u043D\u043E",
611
- "\u043C\u0430\u0442\u0435\u043C\u0430\u0442\u0438\u0447\u0435\u0441\u043A\u0438",
612
- "\u043B\u043E\u0433\u0438\u0447\u0435\u0441\u043A\u0438",
613
- // German
614
- "beweisen",
615
- "beweis",
616
- "theorem",
617
- "ableiten",
618
- "schritt f\xFCr schritt",
619
- "gedankenkette",
620
- "formal",
621
- "mathematisch",
622
- "logisch"
623
- ],
624
- simpleKeywords: [
625
- // English
626
- "what is",
627
- "define",
628
- "translate",
629
- "hello",
630
- "yes or no",
631
- "capital of",
632
- "how old",
633
- "who is",
634
- "when was",
635
- // Chinese
636
- "\u4EC0\u4E48\u662F",
637
- "\u5B9A\u4E49",
638
- "\u7FFB\u8BD1",
639
- "\u4F60\u597D",
640
- "\u662F\u5426",
641
- "\u9996\u90FD",
642
- "\u591A\u5927",
643
- "\u8C01\u662F",
644
- "\u4F55\u65F6",
645
- // Japanese
646
- "\u3068\u306F",
647
- "\u5B9A\u7FA9",
648
- "\u7FFB\u8A33",
649
- "\u3053\u3093\u306B\u3061\u306F",
650
- "\u306F\u3044\u304B\u3044\u3044\u3048",
651
- "\u9996\u90FD",
652
- "\u8AB0",
653
- // Russian
654
- "\u0447\u0442\u043E \u0442\u0430\u043A\u043E\u0435",
655
- "\u043E\u043F\u0440\u0435\u0434\u0435\u043B\u0435\u043D\u0438\u0435",
656
- "\u043F\u0435\u0440\u0435\u0432\u0435\u0441\u0442\u0438",
657
- "\u043F\u0435\u0440\u0435\u0432\u0435\u0434\u0438",
658
- "\u043F\u0440\u0438\u0432\u0435\u0442",
659
- "\u0434\u0430 \u0438\u043B\u0438 \u043D\u0435\u0442",
660
- "\u0441\u0442\u043E\u043B\u0438\u0446\u0430",
661
- "\u0441\u043A\u043E\u043B\u044C\u043A\u043E \u043B\u0435\u0442",
662
- "\u043A\u0442\u043E \u0442\u0430\u043A\u043E\u0439",
663
- "\u043A\u043E\u0433\u0434\u0430",
664
- "\u043E\u0431\u044A\u044F\u0441\u043D\u0438",
665
- // German
666
- "was ist",
667
- "definiere",
668
- "\xFCbersetze",
669
- "hallo",
670
- "ja oder nein",
671
- "hauptstadt",
672
- "wie alt",
673
- "wer ist",
674
- "wann",
675
- "erkl\xE4re"
676
- ],
677
- technicalKeywords: [
678
- // English
679
- "algorithm",
680
- "optimize",
681
- "architecture",
682
- "distributed",
683
- "kubernetes",
684
- "microservice",
685
- "database",
686
- "infrastructure",
687
- // Chinese
688
- "\u7B97\u6CD5",
689
- "\u4F18\u5316",
690
- "\u67B6\u6784",
691
- "\u5206\u5E03\u5F0F",
692
- "\u5FAE\u670D\u52A1",
693
- "\u6570\u636E\u5E93",
694
- "\u57FA\u7840\u8BBE\u65BD",
695
- // Japanese
696
- "\u30A2\u30EB\u30B4\u30EA\u30BA\u30E0",
697
- "\u6700\u9069\u5316",
698
- "\u30A2\u30FC\u30AD\u30C6\u30AF\u30C1\u30E3",
699
- "\u5206\u6563",
700
- "\u30DE\u30A4\u30AF\u30ED\u30B5\u30FC\u30D3\u30B9",
701
- "\u30C7\u30FC\u30BF\u30D9\u30FC\u30B9",
702
- // Russian
703
- "\u0430\u043B\u0433\u043E\u0440\u0438\u0442\u043C",
704
- "\u043E\u043F\u0442\u0438\u043C\u0438\u0437\u0438\u0440\u043E\u0432\u0430\u0442\u044C",
705
- "\u043E\u043F\u0442\u0438\u043C\u0438\u0437\u0430\u0446\u0438",
706
- "\u043E\u043F\u0442\u0438\u043C\u0438\u0437\u0438\u0440\u0443\u0439",
707
- "\u0430\u0440\u0445\u0438\u0442\u0435\u043A\u0442\u0443\u0440\u0430",
708
- "\u0440\u0430\u0441\u043F\u0440\u0435\u0434\u0435\u043B\u0451\u043D\u043D\u044B\u0439",
709
- "\u043C\u0438\u043A\u0440\u043E\u0441\u0435\u0440\u0432\u0438\u0441",
710
- "\u0431\u0430\u0437\u0430 \u0434\u0430\u043D\u043D\u044B\u0445",
711
- "\u0438\u043D\u0444\u0440\u0430\u0441\u0442\u0440\u0443\u043A\u0442\u0443\u0440\u0430",
712
- // German
713
- "algorithmus",
714
- "optimieren",
715
- "architektur",
716
- "verteilt",
717
- "kubernetes",
718
- "mikroservice",
719
- "datenbank",
720
- "infrastruktur"
721
- ],
722
- creativeKeywords: [
723
- // English
724
- "story",
725
- "poem",
726
- "compose",
727
- "brainstorm",
728
- "creative",
729
- "imagine",
730
- "write a",
731
- // Chinese
732
- "\u6545\u4E8B",
733
- "\u8BD7",
734
- "\u521B\u4F5C",
735
- "\u5934\u8111\u98CE\u66B4",
736
- "\u521B\u610F",
737
- "\u60F3\u8C61",
738
- "\u5199\u4E00\u4E2A",
739
- // Japanese
740
- "\u7269\u8A9E",
741
- "\u8A69",
742
- "\u4F5C\u66F2",
743
- "\u30D6\u30EC\u30A4\u30F3\u30B9\u30C8\u30FC\u30E0",
744
- "\u5275\u9020\u7684",
745
- "\u60F3\u50CF",
746
- // Russian
747
- "\u0438\u0441\u0442\u043E\u0440\u0438\u044F",
748
- "\u0440\u0430\u0441\u0441\u043A\u0430\u0437",
749
- "\u0441\u0442\u0438\u0445\u043E\u0442\u0432\u043E\u0440\u0435\u043D\u0438\u0435",
750
- "\u0441\u043E\u0447\u0438\u043D\u0438\u0442\u044C",
751
- "\u0441\u043E\u0447\u0438\u043D\u0438",
752
- "\u043C\u043E\u0437\u0433\u043E\u0432\u043E\u0439 \u0448\u0442\u0443\u0440\u043C",
753
- "\u0442\u0432\u043E\u0440\u0447\u0435\u0441\u043A\u0438\u0439",
754
- "\u043F\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u0438\u0442\u044C",
755
- "\u043F\u0440\u0438\u0434\u0443\u043C\u0430\u0439",
756
- "\u043D\u0430\u043F\u0438\u0448\u0438",
757
- // German
758
- "geschichte",
759
- "gedicht",
760
- "komponieren",
761
- "brainstorming",
762
- "kreativ",
763
- "vorstellen",
764
- "schreibe",
765
- "erz\xE4hlung"
766
- ],
767
- // New dimension keyword lists (multilingual)
768
- imperativeVerbs: [
769
- // English
770
- "build",
771
- "create",
772
- "implement",
773
- "design",
774
- "develop",
775
- "construct",
776
- "generate",
777
- "deploy",
778
- "configure",
779
- "set up",
780
- // Chinese
781
- "\u6784\u5EFA",
782
- "\u521B\u5EFA",
783
- "\u5B9E\u73B0",
784
- "\u8BBE\u8BA1",
785
- "\u5F00\u53D1",
786
- "\u751F\u6210",
787
- "\u90E8\u7F72",
788
- "\u914D\u7F6E",
789
- "\u8BBE\u7F6E",
790
- // Japanese
791
- "\u69CB\u7BC9",
792
- "\u4F5C\u6210",
793
- "\u5B9F\u88C5",
794
- "\u8A2D\u8A08",
795
- "\u958B\u767A",
796
- "\u751F\u6210",
797
- "\u30C7\u30D7\u30ED\u30A4",
798
- "\u8A2D\u5B9A",
799
- // Russian
800
- "\u043F\u043E\u0441\u0442\u0440\u043E\u0438\u0442\u044C",
801
- "\u043F\u043E\u0441\u0442\u0440\u043E\u0439",
802
- "\u0441\u043E\u0437\u0434\u0430\u0442\u044C",
803
- "\u0441\u043E\u0437\u0434\u0430\u0439",
804
- "\u0440\u0435\u0430\u043B\u0438\u0437\u043E\u0432\u0430\u0442\u044C",
805
- "\u0440\u0435\u0430\u043B\u0438\u0437\u0443\u0439",
806
- "\u0441\u043F\u0440\u043E\u0435\u043A\u0442\u0438\u0440\u043E\u0432\u0430\u0442\u044C",
807
- "\u0440\u0430\u0437\u0440\u0430\u0431\u043E\u0442\u0430\u0442\u044C",
808
- "\u0440\u0430\u0437\u0440\u0430\u0431\u043E\u0442\u0430\u0439",
809
- "\u0441\u043A\u043E\u043D\u0441\u0442\u0440\u0443\u0438\u0440\u043E\u0432\u0430\u0442\u044C",
810
- "\u0441\u0433\u0435\u043D\u0435\u0440\u0438\u0440\u043E\u0432\u0430\u0442\u044C",
811
- "\u0441\u0433\u0435\u043D\u0435\u0440\u0438\u0440\u0443\u0439",
812
- "\u0440\u0430\u0437\u0432\u0435\u0440\u043D\u0443\u0442\u044C",
813
- "\u0440\u0430\u0437\u0432\u0435\u0440\u043D\u0438",
814
- "\u043D\u0430\u0441\u0442\u0440\u043E\u0438\u0442\u044C",
815
- "\u043D\u0430\u0441\u0442\u0440\u043E\u0439",
816
- // German
817
- "erstellen",
818
- "bauen",
819
- "implementieren",
820
- "entwerfen",
821
- "entwickeln",
822
- "konstruieren",
823
- "generieren",
824
- "bereitstellen",
825
- "konfigurieren",
826
- "einrichten"
827
- ],
828
- constraintIndicators: [
829
- // English
830
- "under",
831
- "at most",
832
- "at least",
833
- "within",
834
- "no more than",
835
- "o(",
836
- "maximum",
837
- "minimum",
838
- "limit",
839
- "budget",
840
- // Chinese
841
- "\u4E0D\u8D85\u8FC7",
842
- "\u81F3\u5C11",
843
- "\u6700\u591A",
844
- "\u5728\u5185",
845
- "\u6700\u5927",
846
- "\u6700\u5C0F",
847
- "\u9650\u5236",
848
- "\u9884\u7B97",
849
- // Japanese
850
- "\u4EE5\u4E0B",
851
- "\u6700\u5927",
852
- "\u6700\u5C0F",
853
- "\u5236\u9650",
854
- "\u4E88\u7B97",
855
- // Russian
856
- "\u043D\u0435 \u0431\u043E\u043B\u0435\u0435",
857
- "\u043D\u0435 \u043C\u0435\u043D\u0435\u0435",
858
- "\u043A\u0430\u043A \u043C\u0438\u043D\u0438\u043C\u0443\u043C",
859
- "\u0432 \u043F\u0440\u0435\u0434\u0435\u043B\u0430\u0445",
860
- "\u043C\u0430\u043A\u0441\u0438\u043C\u0443\u043C",
861
- "\u043C\u0438\u043D\u0438\u043C\u0443\u043C",
862
- "\u043E\u0433\u0440\u0430\u043D\u0438\u0447\u0435\u043D\u0438\u0435",
863
- "\u0431\u044E\u0434\u0436\u0435\u0442",
864
- // German
865
- "h\xF6chstens",
866
- "mindestens",
867
- "innerhalb",
868
- "nicht mehr als",
869
- "maximal",
870
- "minimal",
871
- "grenze",
872
- "budget"
873
- ],
874
- outputFormatKeywords: [
875
- // English
876
- "json",
877
- "yaml",
878
- "xml",
879
- "table",
880
- "csv",
881
- "markdown",
882
- "schema",
883
- "format as",
884
- "structured",
885
- // Chinese
886
- "\u8868\u683C",
887
- "\u683C\u5F0F\u5316\u4E3A",
888
- "\u7ED3\u6784\u5316",
889
- // Japanese
890
- "\u30C6\u30FC\u30D6\u30EB",
891
- "\u30D5\u30A9\u30FC\u30DE\u30C3\u30C8",
892
- "\u69CB\u9020\u5316",
893
- // Russian
894
- "\u0442\u0430\u0431\u043B\u0438\u0446\u0430",
895
- "\u0444\u043E\u0440\u043C\u0430\u0442\u0438\u0440\u043E\u0432\u0430\u0442\u044C \u043A\u0430\u043A",
896
- "\u0441\u0442\u0440\u0443\u043A\u0442\u0443\u0440\u0438\u0440\u043E\u0432\u0430\u043D\u043D\u044B\u0439",
897
- // German
898
- "tabelle",
899
- "formatieren als",
900
- "strukturiert"
901
- ],
902
- referenceKeywords: [
903
- // English
904
- "above",
905
- "below",
906
- "previous",
907
- "following",
908
- "the docs",
909
- "the api",
910
- "the code",
911
- "earlier",
912
- "attached",
913
- // Chinese
914
- "\u4E0A\u9762",
915
- "\u4E0B\u9762",
916
- "\u4E4B\u524D",
917
- "\u63A5\u4E0B\u6765",
918
- "\u6587\u6863",
919
- "\u4EE3\u7801",
920
- "\u9644\u4EF6",
921
- // Japanese
922
- "\u4E0A\u8A18",
923
- "\u4E0B\u8A18",
924
- "\u524D\u306E",
925
- "\u6B21\u306E",
926
- "\u30C9\u30AD\u30E5\u30E1\u30F3\u30C8",
927
- "\u30B3\u30FC\u30C9",
928
- // Russian
929
- "\u0432\u044B\u0448\u0435",
930
- "\u043D\u0438\u0436\u0435",
931
- "\u043F\u0440\u0435\u0434\u044B\u0434\u0443\u0449\u0438\u0439",
932
- "\u0441\u043B\u0435\u0434\u0443\u044E\u0449\u0438\u0439",
933
- "\u0434\u043E\u043A\u0443\u043C\u0435\u043D\u0442\u0430\u0446\u0438\u044F",
934
- "\u043A\u043E\u0434",
935
- "\u0440\u0430\u043D\u0435\u0435",
936
- "\u0432\u043B\u043E\u0436\u0435\u043D\u0438\u0435",
937
- // German
938
- "oben",
939
- "unten",
940
- "vorherige",
941
- "folgende",
942
- "dokumentation",
943
- "der code",
944
- "fr\xFCher",
945
- "anhang"
946
- ],
947
- negationKeywords: [
948
- // English
949
- "don't",
950
- "do not",
951
- "avoid",
952
- "never",
953
- "without",
954
- "except",
955
- "exclude",
956
- "no longer",
957
- // Chinese
958
- "\u4E0D\u8981",
959
- "\u907F\u514D",
960
- "\u4ECE\u4E0D",
961
- "\u6CA1\u6709",
962
- "\u9664\u4E86",
963
- "\u6392\u9664",
964
- // Japanese
965
- "\u3057\u306A\u3044\u3067",
966
- "\u907F\u3051\u308B",
967
- "\u6C7A\u3057\u3066",
968
- "\u306A\u3057\u3067",
969
- "\u9664\u304F",
970
- // Russian
971
- "\u043D\u0435 \u0434\u0435\u043B\u0430\u0439",
972
- "\u043D\u0435 \u043D\u0430\u0434\u043E",
973
- "\u043D\u0435\u043B\u044C\u0437\u044F",
974
- "\u0438\u0437\u0431\u0435\u0433\u0430\u0442\u044C",
975
- "\u043D\u0438\u043A\u043E\u0433\u0434\u0430",
976
- "\u0431\u0435\u0437",
977
- "\u043A\u0440\u043E\u043C\u0435",
978
- "\u0438\u0441\u043A\u043B\u044E\u0447\u0438\u0442\u044C",
979
- "\u0431\u043E\u043B\u044C\u0448\u0435 \u043D\u0435",
980
- // German
981
- "nicht",
982
- "vermeide",
983
- "niemals",
984
- "ohne",
985
- "au\xDFer",
986
- "ausschlie\xDFen",
987
- "nicht mehr"
988
- ],
989
- domainSpecificKeywords: [
990
- // English
991
- "quantum",
992
- "fpga",
993
- "vlsi",
994
- "risc-v",
995
- "asic",
996
- "photonics",
997
- "genomics",
998
- "proteomics",
999
- "topological",
1000
- "homomorphic",
1001
- "zero-knowledge",
1002
- "lattice-based",
1003
- // Chinese
1004
- "\u91CF\u5B50",
1005
- "\u5149\u5B50\u5B66",
1006
- "\u57FA\u56E0\u7EC4\u5B66",
1007
- "\u86CB\u767D\u8D28\u7EC4\u5B66",
1008
- "\u62D3\u6251",
1009
- "\u540C\u6001",
1010
- "\u96F6\u77E5\u8BC6",
1011
- "\u683C\u5BC6\u7801",
1012
- // Japanese
1013
- "\u91CF\u5B50",
1014
- "\u30D5\u30A9\u30C8\u30CB\u30AF\u30B9",
1015
- "\u30B2\u30CE\u30DF\u30AF\u30B9",
1016
- "\u30C8\u30DD\u30ED\u30B8\u30AB\u30EB",
1017
- // Russian
1018
- "\u043A\u0432\u0430\u043D\u0442\u043E\u0432\u044B\u0439",
1019
- "\u0444\u043E\u0442\u043E\u043D\u0438\u043A\u0430",
1020
- "\u0433\u0435\u043D\u043E\u043C\u0438\u043A\u0430",
1021
- "\u043F\u0440\u043E\u0442\u0435\u043E\u043C\u0438\u043A\u0430",
1022
- "\u0442\u043E\u043F\u043E\u043B\u043E\u0433\u0438\u0447\u0435\u0441\u043A\u0438\u0439",
1023
- "\u0433\u043E\u043C\u043E\u043C\u043E\u0440\u0444\u043D\u044B\u0439",
1024
- "\u0441 \u043D\u0443\u043B\u0435\u0432\u044B\u043C \u0440\u0430\u0437\u0433\u043B\u0430\u0448\u0435\u043D\u0438\u0435\u043C",
1025
- "\u043D\u0430 \u043E\u0441\u043D\u043E\u0432\u0435 \u0440\u0435\u0448\u0451\u0442\u043E\u043A",
1026
- // German
1027
- "quanten",
1028
- "photonik",
1029
- "genomik",
1030
- "proteomik",
1031
- "topologisch",
1032
- "homomorph",
1033
- "zero-knowledge",
1034
- "gitterbasiert"
1035
- ],
1036
- // Agentic task keywords - file ops, execution, multi-step, iterative work
1037
- // Pruned: removed overly common words like "then", "first", "run", "test", "build"
1038
- agenticTaskKeywords: [
1039
- // English - File operations (clearly agentic)
1040
- "read file",
1041
- "read the file",
1042
- "look at",
1043
- "check the",
1044
- "open the",
1045
- "edit",
1046
- "modify",
1047
- "update the",
1048
- "change the",
1049
- "write to",
1050
- "create file",
1051
- // English - Execution (specific commands only)
1052
- "execute",
1053
- "deploy",
1054
- "install",
1055
- "npm",
1056
- "pip",
1057
- "compile",
1058
- // English - Multi-step patterns (specific only)
1059
- "after that",
1060
- "and also",
1061
- "once done",
1062
- "step 1",
1063
- "step 2",
1064
- // English - Iterative work
1065
- "fix",
1066
- "debug",
1067
- "until it works",
1068
- "keep trying",
1069
- "iterate",
1070
- "make sure",
1071
- "verify",
1072
- "confirm",
1073
- // Chinese (keep specific ones)
1074
- "\u8BFB\u53D6\u6587\u4EF6",
1075
- "\u67E5\u770B",
1076
- "\u6253\u5F00",
1077
- "\u7F16\u8F91",
1078
- "\u4FEE\u6539",
1079
- "\u66F4\u65B0",
1080
- "\u521B\u5EFA",
1081
- "\u6267\u884C",
1082
- "\u90E8\u7F72",
1083
- "\u5B89\u88C5",
1084
- "\u7B2C\u4E00\u6B65",
1085
- "\u7B2C\u4E8C\u6B65",
1086
- "\u4FEE\u590D",
1087
- "\u8C03\u8BD5",
1088
- "\u76F4\u5230",
1089
- "\u786E\u8BA4",
1090
- "\u9A8C\u8BC1"
1091
- ],
1092
- // Dimension weights (sum to 1.0)
1093
- dimensionWeights: {
1094
- tokenCount: 0.08,
1095
- codePresence: 0.15,
1096
- reasoningMarkers: 0.18,
1097
- technicalTerms: 0.1,
1098
- creativeMarkers: 0.05,
1099
- simpleIndicators: 0.02,
1100
- // Reduced from 0.12 to make room for agenticTask
1101
- multiStepPatterns: 0.12,
1102
- questionComplexity: 0.05,
1103
- imperativeVerbs: 0.03,
1104
- constraintCount: 0.04,
1105
- outputFormat: 0.03,
1106
- referenceComplexity: 0.02,
1107
- negationComplexity: 0.01,
1108
- domainSpecificity: 0.02,
1109
- agenticTask: 0.04
1110
- // Reduced - agentic signals influence tier selection, not dominate it
1111
- },
1112
- // Tier boundaries on weighted score axis
1113
- tierBoundaries: {
1114
- simpleMedium: 0,
1115
- mediumComplex: 0.3,
1116
- // Raised from 0.18 - prevent simple tasks from reaching expensive COMPLEX tier
1117
- complexReasoning: 0.5
1118
- // Raised from 0.4 - reserve for true reasoning tasks
1119
- },
1120
- // Sigmoid steepness for confidence calibration
1121
- confidenceSteepness: 12,
1122
- // Below this confidence → ambiguous (null tier)
1123
- confidenceThreshold: 0.7
1124
- },
1125
- // Auto (balanced) tier configs - current default smart routing
1126
- tiers: {
1127
- SIMPLE: {
1128
- primary: "moonshot/kimi-k2.5",
1129
- // $0.50/$2.40 - best quality/price for simple tasks
1130
- fallback: [
1131
- "google/gemini-2.5-flash",
1132
- // 1M context, cost-effective
1133
- "nvidia/gpt-oss-120b",
1134
- // FREE fallback
1135
- "deepseek/deepseek-chat"
1136
- ]
1137
- },
1138
- MEDIUM: {
1139
- primary: "xai/grok-code-fast-1",
1140
- // Code specialist, $0.20/$1.50
1141
- fallback: [
1142
- "google/gemini-2.5-flash",
1143
- // 1M context, cost-effective
1144
- "deepseek/deepseek-chat",
1145
- "xai/grok-4-1-fast-non-reasoning"
1146
- // Upgraded Grok 4.1
1147
- ]
1148
- },
1149
- COMPLEX: {
1150
- primary: "google/gemini-3-pro-preview",
1151
- // Latest Gemini - upgraded from 2.5
1152
- fallback: [
1153
- "google/gemini-2.5-flash",
1154
- // CRITICAL: 1M context, cheap failsafe before expensive models
1155
- "google/gemini-2.5-pro",
1156
- "deepseek/deepseek-chat",
1157
- // Another cheap option
1158
- "xai/grok-4-0709",
1159
- "openai/gpt-4o",
1160
- "openai/gpt-5.2",
1161
- "anthropic/claude-sonnet-4"
1162
- ]
1163
- },
1164
- REASONING: {
1165
- primary: "xai/grok-4-1-fast-reasoning",
1166
- // Upgraded Grok 4.1 reasoning $0.20/$0.50
1167
- fallback: [
1168
- "deepseek/deepseek-reasoner",
1169
- // Cheap reasoning model as first fallback
1170
- "xai/grok-4-fast-reasoning",
1171
- "openai/o3",
1172
- "openai/o4-mini",
1173
- // Latest o-series mini
1174
- "moonshot/kimi-k2.5"
1175
- ]
1176
- }
1177
- },
1178
- // Eco tier configs - ultra cost-optimized (blockrun/eco)
1179
- ecoTiers: {
1180
- SIMPLE: {
1181
- primary: "moonshot/kimi-k2.5",
1182
- // $0.50/$2.40
1183
- fallback: ["nvidia/gpt-oss-120b", "deepseek/deepseek-chat", "google/gemini-2.5-flash"]
1184
- },
1185
- MEDIUM: {
1186
- primary: "deepseek/deepseek-chat",
1187
- // $0.14/$0.28
1188
- fallback: ["xai/grok-code-fast-1", "google/gemini-2.5-flash", "moonshot/kimi-k2.5"]
1189
- },
1190
- COMPLEX: {
1191
- primary: "xai/grok-4-0709",
1192
- // $0.20/$1.50
1193
- fallback: ["deepseek/deepseek-chat", "google/gemini-2.5-flash", "openai/gpt-4o-mini"]
1194
- },
1195
- REASONING: {
1196
- primary: "deepseek/deepseek-reasoner",
1197
- // $0.55/$2.19
1198
- fallback: ["xai/grok-4-fast-reasoning", "moonshot/kimi-k2.5"]
1199
- }
1200
- },
1201
- // Premium tier configs - best quality (blockrun/premium)
1202
- // codex=complex coding, kimi=simple coding, sonnet=reasoning/instructions, opus=architecture/PM/audits
1203
- premiumTiers: {
1204
- SIMPLE: {
1205
- primary: "moonshot/kimi-k2.5",
1206
- // $0.50/$2.40 - good for simple coding
1207
- fallback: ["anthropic/claude-haiku-4.5", "google/gemini-2.5-flash", "xai/grok-code-fast-1"]
1208
- },
1209
- MEDIUM: {
1210
- primary: "anthropic/claude-sonnet-4",
1211
- // $3/$15 - reasoning/instructions
1212
- fallback: [
1213
- "openai/gpt-5.2-codex",
1214
- "moonshot/kimi-k2.5",
1215
- "google/gemini-2.5-pro",
1216
- "xai/grok-4-0709"
1217
- ]
1218
- },
1219
- COMPLEX: {
1220
- primary: "openai/gpt-5.2-codex",
1221
- // $2.50/$10 - complex coding (78% cost savings vs Opus)
1222
- fallback: [
1223
- "anthropic/claude-opus-4.6",
1224
- "anthropic/claude-opus-4.5",
1225
- "anthropic/claude-sonnet-4",
1226
- "google/gemini-3-pro-preview",
1227
- "moonshot/kimi-k2.5"
1228
- ]
1229
- },
1230
- REASONING: {
1231
- primary: "anthropic/claude-sonnet-4",
1232
- // $3/$15 - best for reasoning/instructions
1233
- fallback: [
1234
- "anthropic/claude-opus-4.6",
1235
- "anthropic/claude-opus-4.5",
1236
- "openai/o3",
1237
- "xai/grok-4-1-fast-reasoning"
1238
- ]
1239
- }
1240
- },
1241
- // Agentic tier configs - models that excel at multi-step autonomous tasks
1242
- agenticTiers: {
1243
- SIMPLE: {
1244
- primary: "moonshot/kimi-k2.5",
1245
- // Cheaper than Haiku ($0.5/$2.4 vs $1/$5), larger context
1246
- fallback: [
1247
- "anthropic/claude-haiku-4.5",
1248
- "xai/grok-4-fast-non-reasoning",
1249
- "openai/gpt-4o-mini"
1250
- ]
1251
- },
1252
- MEDIUM: {
1253
- primary: "xai/grok-code-fast-1",
1254
- // Code specialist for agentic coding
1255
- fallback: ["moonshot/kimi-k2.5", "anthropic/claude-haiku-4.5", "anthropic/claude-sonnet-4"]
1256
- },
1257
- COMPLEX: {
1258
- primary: "anthropic/claude-sonnet-4",
1259
- fallback: [
1260
- "anthropic/claude-opus-4.6",
1261
- // Latest Opus - best agentic
1262
- "openai/gpt-5.2",
1263
- "google/gemini-3-pro-preview",
1264
- "xai/grok-4-0709"
1265
- ]
1266
- },
1267
- REASONING: {
1268
- primary: "anthropic/claude-sonnet-4",
1269
- // Strong tool use + reasoning for agentic tasks
1270
- fallback: [
1271
- "anthropic/claude-opus-4.6",
1272
- "xai/grok-4-fast-reasoning",
1273
- "moonshot/kimi-k2.5",
1274
- "deepseek/deepseek-reasoner"
1275
- ]
1276
- }
1277
- },
1278
- overrides: {
1279
- maxTokensForceComplex: 1e5,
1280
- structuredOutputMinTier: "MEDIUM",
1281
- ambiguousDefaultTier: "MEDIUM",
1282
- agenticMode: false
1283
- }
1284
- };
1285
-
1286
- // src/router/index.ts
1287
- function route(prompt, systemPrompt, maxOutputTokens, options) {
1288
- const { config, modelPricing } = options;
1289
- const fullText = `${systemPrompt ?? ""} ${prompt}`;
1290
- const estimatedTokens = Math.ceil(fullText.length / 4);
1291
- const ruleResult = classifyByRules(prompt, systemPrompt, estimatedTokens, config.scoring);
1292
- const { routingProfile } = options;
1293
- let tierConfigs;
1294
- let profileSuffix = "";
1295
- if (routingProfile === "eco" && config.ecoTiers) {
1296
- tierConfigs = config.ecoTiers;
1297
- profileSuffix = " | eco";
1298
- } else if (routingProfile === "premium" && config.premiumTiers) {
1299
- tierConfigs = config.premiumTiers;
1300
- profileSuffix = " | premium";
1301
- } else {
1302
- const agenticScore = ruleResult.agenticScore ?? 0;
1303
- const isAutoAgentic = agenticScore >= 0.5;
1304
- const isExplicitAgentic = config.overrides.agenticMode ?? false;
1305
- const useAgenticTiers = (isAutoAgentic || isExplicitAgentic) && config.agenticTiers != null;
1306
- tierConfigs = useAgenticTiers ? config.agenticTiers : config.tiers;
1307
- profileSuffix = useAgenticTiers ? " | agentic" : "";
1308
- }
1309
- if (estimatedTokens > config.overrides.maxTokensForceComplex) {
1310
- return selectModel(
1311
- "COMPLEX",
1312
- 0.95,
1313
- "rules",
1314
- `Input exceeds ${config.overrides.maxTokensForceComplex} tokens${profileSuffix}`,
1315
- tierConfigs,
1316
- modelPricing,
1317
- estimatedTokens,
1318
- maxOutputTokens,
1319
- routingProfile
1320
- );
1321
- }
1322
- const hasStructuredOutput = systemPrompt ? /json|structured|schema/i.test(systemPrompt) : false;
1323
- let tier;
1324
- let confidence;
1325
- const method = "rules";
1326
- let reasoning = `score=${ruleResult.score.toFixed(2)} | ${ruleResult.signals.join(", ")}`;
1327
- if (ruleResult.tier !== null) {
1328
- tier = ruleResult.tier;
1329
- confidence = ruleResult.confidence;
1330
- } else {
1331
- tier = config.overrides.ambiguousDefaultTier;
1332
- confidence = 0.5;
1333
- reasoning += ` | ambiguous -> default: ${tier}`;
1334
- }
1335
- if (hasStructuredOutput) {
1336
- const tierRank = { SIMPLE: 0, MEDIUM: 1, COMPLEX: 2, REASONING: 3 };
1337
- const minTier = config.overrides.structuredOutputMinTier;
1338
- if (tierRank[tier] < tierRank[minTier]) {
1339
- reasoning += ` | upgraded to ${minTier} (structured output)`;
1340
- tier = minTier;
1341
- }
1342
- }
1343
- reasoning += profileSuffix;
1344
- return selectModel(
1345
- tier,
1346
- confidence,
1347
- method,
1348
- reasoning,
1349
- tierConfigs,
1350
- modelPricing,
1351
- estimatedTokens,
1352
- maxOutputTokens,
1353
- routingProfile
1354
- );
1355
- }
1356
-
1357
- // src/models.ts
1358
- var MODEL_ALIASES = {
1359
- // Claude
1360
- claude: "anthropic/claude-sonnet-4",
1361
- sonnet: "anthropic/claude-sonnet-4",
1362
- opus: "anthropic/claude-opus-4.6",
1363
- // Updated to latest Opus 4.6
1364
- "opus-46": "anthropic/claude-opus-4.6",
1365
- "opus-45": "anthropic/claude-opus-4.5",
1366
- haiku: "anthropic/claude-haiku-4.5",
1367
- // OpenAI
1368
- gpt: "openai/gpt-4o",
1369
- gpt4: "openai/gpt-4o",
1370
- gpt5: "openai/gpt-5.2",
1371
- codex: "openai/gpt-5.2-codex",
1372
- mini: "openai/gpt-4o-mini",
1373
- o3: "openai/o3",
1374
- // DeepSeek
1375
- deepseek: "deepseek/deepseek-chat",
1376
- reasoner: "deepseek/deepseek-reasoner",
1377
- // Kimi / Moonshot
1378
- kimi: "moonshot/kimi-k2.5",
1379
- // Google
1380
- gemini: "google/gemini-2.5-pro",
1381
- flash: "google/gemini-2.5-flash",
1382
- // xAI
1383
- grok: "xai/grok-3",
1384
- "grok-fast": "xai/grok-4-fast-reasoning",
1385
- "grok-code": "xai/grok-code-fast-1",
1386
- // NVIDIA
1387
- nvidia: "nvidia/gpt-oss-120b",
1388
- "gpt-120b": "nvidia/gpt-oss-120b"
1389
- // Note: auto, free, eco, premium are virtual routing profiles registered in BLOCKRUN_MODELS
1390
- // They don't need aliases since they're already top-level model IDs
1391
- };
1392
- function resolveModelAlias(model) {
1393
- const normalized = model.trim().toLowerCase();
1394
- const resolved = MODEL_ALIASES[normalized];
1395
- if (resolved) return resolved;
1396
- if (normalized.startsWith("blockrun/")) {
1397
- const withoutPrefix = normalized.slice("blockrun/".length);
1398
- const resolvedWithoutPrefix = MODEL_ALIASES[withoutPrefix];
1399
- if (resolvedWithoutPrefix) return resolvedWithoutPrefix;
1400
- return withoutPrefix;
1401
- }
1402
- return model;
1403
- }
1404
- var BLOCKRUN_MODELS = [
1405
- // Smart routing meta-models — proxy replaces with actual model
1406
- // NOTE: Model IDs are WITHOUT provider prefix (OpenClaw adds "blockrun/" automatically)
1407
- {
1408
- id: "auto",
1409
- name: "Auto (Smart Router - Balanced)",
1410
- inputPrice: 0,
1411
- outputPrice: 0,
1412
- contextWindow: 105e4,
1413
- maxOutput: 128e3
1414
- },
1415
- {
1416
- id: "free",
1417
- name: "Free (NVIDIA GPT-OSS-120B only)",
1418
- inputPrice: 0,
1419
- outputPrice: 0,
1420
- contextWindow: 128e3,
1421
- maxOutput: 4096
1422
- },
1423
- {
1424
- id: "eco",
1425
- name: "Eco (Smart Router - Cost Optimized)",
1426
- inputPrice: 0,
1427
- outputPrice: 0,
1428
- contextWindow: 105e4,
1429
- maxOutput: 128e3
1430
- },
1431
- {
1432
- id: "premium",
1433
- name: "Premium (Smart Router - Best Quality)",
1434
- inputPrice: 0,
1435
- outputPrice: 0,
1436
- contextWindow: 2e6,
1437
- maxOutput: 2e5
1438
- },
1439
- // OpenAI GPT-5 Family
1440
- {
1441
- id: "openai/gpt-5.2",
1442
- name: "GPT-5.2",
1443
- inputPrice: 1.75,
1444
- outputPrice: 14,
1445
- contextWindow: 4e5,
1446
- maxOutput: 128e3,
1447
- reasoning: true,
1448
- vision: true,
1449
- agentic: true
1450
- },
1451
- {
1452
- id: "openai/gpt-5-mini",
1453
- name: "GPT-5 Mini",
1454
- inputPrice: 0.25,
1455
- outputPrice: 2,
1456
- contextWindow: 2e5,
1457
- maxOutput: 65536
1458
- },
1459
- {
1460
- id: "openai/gpt-5-nano",
1461
- name: "GPT-5 Nano",
1462
- inputPrice: 0.05,
1463
- outputPrice: 0.4,
1464
- contextWindow: 128e3,
1465
- maxOutput: 32768
1466
- },
1467
- {
1468
- id: "openai/gpt-5.2-pro",
1469
- name: "GPT-5.2 Pro",
1470
- inputPrice: 21,
1471
- outputPrice: 168,
1472
- contextWindow: 4e5,
1473
- maxOutput: 128e3,
1474
- reasoning: true
1475
- },
1476
- // OpenAI Codex Family
1477
- {
1478
- id: "openai/gpt-5.2-codex",
1479
- name: "GPT-5.2 Codex",
1480
- inputPrice: 2.5,
1481
- outputPrice: 12,
1482
- contextWindow: 128e3,
1483
- maxOutput: 32e3,
1484
- agentic: true
1485
- },
1486
- // OpenAI GPT-4 Family
1487
- {
1488
- id: "openai/gpt-4.1",
1489
- name: "GPT-4.1",
1490
- inputPrice: 2,
1491
- outputPrice: 8,
1492
- contextWindow: 128e3,
1493
- maxOutput: 16384,
1494
- vision: true
1495
- },
1496
- {
1497
- id: "openai/gpt-4.1-mini",
1498
- name: "GPT-4.1 Mini",
1499
- inputPrice: 0.4,
1500
- outputPrice: 1.6,
1501
- contextWindow: 128e3,
1502
- maxOutput: 16384
1503
- },
1504
- // gpt-4.1-nano removed - replaced by gpt-5-nano
1505
- {
1506
- id: "openai/gpt-4o",
1507
- name: "GPT-4o",
1508
- inputPrice: 2.5,
1509
- outputPrice: 10,
1510
- contextWindow: 128e3,
1511
- maxOutput: 16384,
1512
- vision: true,
1513
- agentic: true
1514
- },
1515
- {
1516
- id: "openai/gpt-4o-mini",
1517
- name: "GPT-4o Mini",
1518
- inputPrice: 0.15,
1519
- outputPrice: 0.6,
1520
- contextWindow: 128e3,
1521
- maxOutput: 16384
1522
- },
1523
- // OpenAI O-series (Reasoning) - o1/o1-mini removed, replaced by o3/o4
1524
- {
1525
- id: "openai/o3",
1526
- name: "o3",
1527
- inputPrice: 2,
1528
- outputPrice: 8,
1529
- contextWindow: 2e5,
1530
- maxOutput: 1e5,
1531
- reasoning: true
1532
- },
1533
- {
1534
- id: "openai/o3-mini",
1535
- name: "o3-mini",
1536
- inputPrice: 1.1,
1537
- outputPrice: 4.4,
1538
- contextWindow: 128e3,
1539
- maxOutput: 65536,
1540
- reasoning: true
1541
- },
1542
- {
1543
- id: "openai/o4-mini",
1544
- name: "o4-mini",
1545
- inputPrice: 1.1,
1546
- outputPrice: 4.4,
1547
- contextWindow: 128e3,
1548
- maxOutput: 65536,
1549
- reasoning: true
1550
- },
1551
- // Anthropic - all Claude models excel at agentic workflows
1552
- {
1553
- id: "anthropic/claude-haiku-4.5",
1554
- name: "Claude Haiku 4.5",
1555
- inputPrice: 1,
1556
- outputPrice: 5,
1557
- contextWindow: 2e5,
1558
- maxOutput: 8192,
1559
- agentic: true
1560
- },
1561
- {
1562
- id: "anthropic/claude-sonnet-4",
1563
- name: "Claude Sonnet 4",
1564
- inputPrice: 3,
1565
- outputPrice: 15,
1566
- contextWindow: 2e5,
1567
- maxOutput: 64e3,
1568
- reasoning: true,
1569
- agentic: true
1570
- },
1571
- {
1572
- id: "anthropic/claude-opus-4",
1573
- name: "Claude Opus 4",
1574
- inputPrice: 15,
1575
- outputPrice: 75,
1576
- contextWindow: 2e5,
1577
- maxOutput: 32e3,
1578
- reasoning: true,
1579
- agentic: true
1580
- },
1581
- {
1582
- id: "anthropic/claude-opus-4.5",
1583
- name: "Claude Opus 4.5",
1584
- inputPrice: 5,
1585
- outputPrice: 25,
1586
- contextWindow: 2e5,
1587
- maxOutput: 32e3,
1588
- reasoning: true,
1589
- agentic: true
1590
- },
1591
- {
1592
- id: "anthropic/claude-opus-4.6",
1593
- name: "Claude Opus 4.6",
1594
- inputPrice: 5,
1595
- outputPrice: 25,
1596
- contextWindow: 2e5,
1597
- maxOutput: 64e3,
1598
- reasoning: true,
1599
- vision: true,
1600
- agentic: true
1601
- },
1602
- // Google
1603
- {
1604
- id: "google/gemini-3-pro-preview",
1605
- name: "Gemini 3 Pro Preview",
1606
- inputPrice: 2,
1607
- outputPrice: 12,
1608
- contextWindow: 105e4,
1609
- maxOutput: 65536,
1610
- reasoning: true,
1611
- vision: true
1612
- },
1613
- {
1614
- id: "google/gemini-2.5-pro",
1615
- name: "Gemini 2.5 Pro",
1616
- inputPrice: 1.25,
1617
- outputPrice: 10,
1618
- contextWindow: 105e4,
1619
- maxOutput: 65536,
1620
- reasoning: true,
1621
- vision: true
1622
- },
1623
- {
1624
- id: "google/gemini-2.5-flash",
1625
- name: "Gemini 2.5 Flash",
1626
- inputPrice: 0.15,
1627
- outputPrice: 0.6,
1628
- contextWindow: 1e6,
1629
- maxOutput: 65536
1630
- },
1631
- // DeepSeek
1632
- {
1633
- id: "deepseek/deepseek-chat",
1634
- name: "DeepSeek V3.2 Chat",
1635
- inputPrice: 0.28,
1636
- outputPrice: 0.42,
1637
- contextWindow: 128e3,
1638
- maxOutput: 8192
1639
- },
1640
- {
1641
- id: "deepseek/deepseek-reasoner",
1642
- name: "DeepSeek V3.2 Reasoner",
1643
- inputPrice: 0.28,
1644
- outputPrice: 0.42,
1645
- contextWindow: 128e3,
1646
- maxOutput: 8192,
1647
- reasoning: true
1648
- },
1649
- // Moonshot / Kimi - optimized for agentic workflows
1650
- {
1651
- id: "moonshot/kimi-k2.5",
1652
- name: "Kimi K2.5",
1653
- inputPrice: 0.5,
1654
- outputPrice: 2.4,
1655
- contextWindow: 262144,
1656
- maxOutput: 8192,
1657
- reasoning: true,
1658
- vision: true,
1659
- agentic: true
1660
- },
1661
- // xAI / Grok
1662
- {
1663
- id: "xai/grok-3",
1664
- name: "Grok 3",
1665
- inputPrice: 3,
1666
- outputPrice: 15,
1667
- contextWindow: 131072,
1668
- maxOutput: 16384,
1669
- reasoning: true
1670
- },
1671
- // grok-3-fast removed - too expensive ($5/$25), use grok-4-fast instead
1672
- {
1673
- id: "xai/grok-3-mini",
1674
- name: "Grok 3 Mini",
1675
- inputPrice: 0.3,
1676
- outputPrice: 0.5,
1677
- contextWindow: 131072,
1678
- maxOutput: 16384
1679
- },
1680
- // xAI Grok 4 Family - Ultra-cheap fast models
1681
- {
1682
- id: "xai/grok-4-fast-reasoning",
1683
- name: "Grok 4 Fast Reasoning",
1684
- inputPrice: 0.2,
1685
- outputPrice: 0.5,
1686
- contextWindow: 131072,
1687
- maxOutput: 16384,
1688
- reasoning: true
1689
- },
1690
- {
1691
- id: "xai/grok-4-fast-non-reasoning",
1692
- name: "Grok 4 Fast",
1693
- inputPrice: 0.2,
1694
- outputPrice: 0.5,
1695
- contextWindow: 131072,
1696
- maxOutput: 16384
1697
- },
1698
- {
1699
- id: "xai/grok-4-1-fast-reasoning",
1700
- name: "Grok 4.1 Fast Reasoning",
1701
- inputPrice: 0.2,
1702
- outputPrice: 0.5,
1703
- contextWindow: 131072,
1704
- maxOutput: 16384,
1705
- reasoning: true
1706
- },
1707
- {
1708
- id: "xai/grok-4-1-fast-non-reasoning",
1709
- name: "Grok 4.1 Fast",
1710
- inputPrice: 0.2,
1711
- outputPrice: 0.5,
1712
- contextWindow: 131072,
1713
- maxOutput: 16384
1714
- },
1715
- {
1716
- id: "xai/grok-code-fast-1",
1717
- name: "Grok Code Fast",
1718
- inputPrice: 0.2,
1719
- outputPrice: 1.5,
1720
- contextWindow: 131072,
1721
- maxOutput: 16384,
1722
- agentic: true
1723
- // Good for coding tasks
1724
- },
1725
- {
1726
- id: "xai/grok-4-0709",
1727
- name: "Grok 4 (0709)",
1728
- inputPrice: 0.2,
1729
- outputPrice: 1.5,
1730
- contextWindow: 131072,
1731
- maxOutput: 16384,
1732
- reasoning: true
1733
- },
1734
- // grok-2-vision removed - old, 0 transactions
1735
- // NVIDIA - Free/cheap models
1736
- {
1737
- id: "nvidia/gpt-oss-120b",
1738
- name: "NVIDIA GPT-OSS 120B",
1739
- inputPrice: 0,
1740
- outputPrice: 0,
1741
- contextWindow: 128e3,
1742
- maxOutput: 16384
1743
- },
1744
- {
1745
- id: "nvidia/kimi-k2.5",
1746
- name: "NVIDIA Kimi K2.5",
1747
- inputPrice: 0.55,
1748
- outputPrice: 2.5,
1749
- contextWindow: 262144,
1750
- maxOutput: 16384
1751
- }
1752
- ];
1753
- function toOpenClawModel(m) {
1754
- return {
1755
- id: m.id,
1756
- name: m.name,
1757
- api: "openai-completions",
1758
- reasoning: m.reasoning ?? false,
1759
- input: m.vision ? ["text", "image"] : ["text"],
1760
- cost: {
1761
- input: m.inputPrice,
1762
- output: m.outputPrice,
1763
- cacheRead: 0,
1764
- cacheWrite: 0
1765
- },
1766
- contextWindow: m.contextWindow,
1767
- maxTokens: m.maxOutput
1768
- };
1769
- }
1770
- var ALIAS_MODELS = Object.entries(MODEL_ALIASES).map(([alias, targetId]) => {
1771
- const target = BLOCKRUN_MODELS.find((m) => m.id === targetId);
1772
- if (!target) return null;
1773
- return toOpenClawModel({ ...target, id: alias, name: `${alias} \u2192 ${target.name}` });
1774
- }).filter((m) => m !== null);
1775
- var OPENCLAW_MODELS = [
1776
- ...BLOCKRUN_MODELS.map(toOpenClawModel),
1777
- ...ALIAS_MODELS
1778
- ];
1779
- function getModelContextWindow(modelId) {
1780
- const normalized = modelId.replace("blockrun/", "");
1781
- const model = BLOCKRUN_MODELS.find((m) => m.id === normalized);
1782
- return model?.contextWindow;
1783
- }
1784
- function isReasoningModel(modelId) {
1785
- const normalized = modelId.replace("blockrun/", "");
1786
- const model = BLOCKRUN_MODELS.find((m) => m.id === normalized);
1787
- return model?.reasoning ?? false;
1788
- }
1789
-
1790
- // src/logger.ts
1791
- import { appendFile, mkdir } from "fs/promises";
1792
- import { join } from "path";
1793
- import { homedir } from "os";
1794
- var LOG_DIR = join(homedir(), ".openclaw", "blockrun", "logs");
1795
- var dirReady = false;
1796
- async function ensureDir() {
1797
- if (dirReady) return;
1798
- await mkdir(LOG_DIR, { recursive: true });
1799
- dirReady = true;
1800
- }
1801
- async function logUsage(entry) {
1802
- try {
1803
- await ensureDir();
1804
- const date = entry.timestamp.slice(0, 10);
1805
- const file = join(LOG_DIR, `usage-${date}.jsonl`);
1806
- await appendFile(file, JSON.stringify(entry) + "\n");
1807
- } catch {
1808
- }
1809
- }
1810
-
1811
- // src/stats.ts
1812
- import { readFile, readdir } from "fs/promises";
1813
- import { join as join3 } from "path";
1814
- import { homedir as homedir2 } from "os";
1815
-
1816
- // src/version.ts
1817
- import { createRequire } from "module";
1818
- import { fileURLToPath } from "url";
1819
- import { dirname, join as join2 } from "path";
1820
- var __filename = fileURLToPath(import.meta.url);
1821
- var __dirname = dirname(__filename);
1822
- var require2 = createRequire(import.meta.url);
1823
- var pkg = require2(join2(__dirname, "..", "package.json"));
1824
- var VERSION = pkg.version;
1825
- var USER_AGENT = `clawrouter/${VERSION}`;
1826
-
1827
- // src/stats.ts
1828
- var LOG_DIR2 = join3(homedir2(), ".openclaw", "blockrun", "logs");
1829
- async function parseLogFile(filePath) {
1830
- try {
1831
- const content = await readFile(filePath, "utf-8");
1832
- const lines = content.trim().split("\n").filter(Boolean);
1833
- return lines.map((line) => {
1834
- const entry = JSON.parse(line);
1835
- return {
1836
- timestamp: entry.timestamp || (/* @__PURE__ */ new Date()).toISOString(),
1837
- model: entry.model || "unknown",
1838
- tier: entry.tier || "UNKNOWN",
1839
- cost: entry.cost || 0,
1840
- baselineCost: entry.baselineCost || entry.cost || 0,
1841
- savings: entry.savings || 0,
1842
- latencyMs: entry.latencyMs || 0
1843
- };
1844
- });
1845
- } catch {
1846
- return [];
1847
- }
1848
- }
1849
- async function getLogFiles() {
1850
- try {
1851
- const files = await readdir(LOG_DIR2);
1852
- return files.filter((f) => f.startsWith("usage-") && f.endsWith(".jsonl")).sort().reverse();
1853
- } catch {
1854
- return [];
1855
- }
1856
- }
1857
- function aggregateDay(date, entries) {
1858
- const byTier = {};
1859
- const byModel = {};
1860
- let totalLatency = 0;
1861
- for (const entry of entries) {
1862
- if (!byTier[entry.tier]) byTier[entry.tier] = { count: 0, cost: 0 };
1863
- byTier[entry.tier].count++;
1864
- byTier[entry.tier].cost += entry.cost;
1865
- if (!byModel[entry.model]) byModel[entry.model] = { count: 0, cost: 0 };
1866
- byModel[entry.model].count++;
1867
- byModel[entry.model].cost += entry.cost;
1868
- totalLatency += entry.latencyMs;
1869
- }
1870
- const totalCost = entries.reduce((sum, e) => sum + e.cost, 0);
1871
- const totalBaselineCost = entries.reduce((sum, e) => sum + e.baselineCost, 0);
1872
- return {
1873
- date,
1874
- totalRequests: entries.length,
1875
- totalCost,
1876
- totalBaselineCost,
1877
- totalSavings: totalBaselineCost - totalCost,
1878
- avgLatencyMs: entries.length > 0 ? totalLatency / entries.length : 0,
1879
- byTier,
1880
- byModel
1881
- };
1882
- }
1883
- async function getStats(days = 7) {
1884
- const logFiles = await getLogFiles();
1885
- const filesToRead = logFiles.slice(0, days);
1886
- const dailyBreakdown = [];
1887
- const allByTier = {};
1888
- const allByModel = {};
1889
- let totalRequests = 0;
1890
- let totalCost = 0;
1891
- let totalBaselineCost = 0;
1892
- let totalLatency = 0;
1893
- for (const file of filesToRead) {
1894
- const date = file.replace("usage-", "").replace(".jsonl", "");
1895
- const filePath = join3(LOG_DIR2, file);
1896
- const entries = await parseLogFile(filePath);
1897
- if (entries.length === 0) continue;
1898
- const dayStats = aggregateDay(date, entries);
1899
- dailyBreakdown.push(dayStats);
1900
- totalRequests += dayStats.totalRequests;
1901
- totalCost += dayStats.totalCost;
1902
- totalBaselineCost += dayStats.totalBaselineCost;
1903
- totalLatency += dayStats.avgLatencyMs * dayStats.totalRequests;
1904
- for (const [tier, stats] of Object.entries(dayStats.byTier)) {
1905
- if (!allByTier[tier]) allByTier[tier] = { count: 0, cost: 0 };
1906
- allByTier[tier].count += stats.count;
1907
- allByTier[tier].cost += stats.cost;
1908
- }
1909
- for (const [model, stats] of Object.entries(dayStats.byModel)) {
1910
- if (!allByModel[model]) allByModel[model] = { count: 0, cost: 0 };
1911
- allByModel[model].count += stats.count;
1912
- allByModel[model].cost += stats.cost;
1913
- }
1914
- }
1915
- const byTierWithPercentage = {};
1916
- for (const [tier, stats] of Object.entries(allByTier)) {
1917
- byTierWithPercentage[tier] = {
1918
- ...stats,
1919
- percentage: totalRequests > 0 ? stats.count / totalRequests * 100 : 0
1920
- };
1921
- }
1922
- const byModelWithPercentage = {};
1923
- for (const [model, stats] of Object.entries(allByModel)) {
1924
- byModelWithPercentage[model] = {
1925
- ...stats,
1926
- percentage: totalRequests > 0 ? stats.count / totalRequests * 100 : 0
1927
- };
1928
- }
1929
- const totalSavings = totalBaselineCost - totalCost;
1930
- const savingsPercentage = totalBaselineCost > 0 ? totalSavings / totalBaselineCost * 100 : 0;
1931
- let entriesWithBaseline = 0;
1932
- for (const day of dailyBreakdown) {
1933
- if (day.totalBaselineCost !== day.totalCost) {
1934
- entriesWithBaseline += day.totalRequests;
1935
- }
1936
- }
1937
- return {
1938
- period: days === 1 ? "today" : `last ${days} days`,
1939
- totalRequests,
1940
- totalCost,
1941
- totalBaselineCost,
1942
- totalSavings,
1943
- savingsPercentage,
1944
- avgLatencyMs: totalRequests > 0 ? totalLatency / totalRequests : 0,
1945
- avgCostPerRequest: totalRequests > 0 ? totalCost / totalRequests : 0,
1946
- byTier: byTierWithPercentage,
1947
- byModel: byModelWithPercentage,
1948
- dailyBreakdown: dailyBreakdown.reverse(),
1949
- // Oldest first for charts
1950
- entriesWithBaseline
1951
- // How many entries have valid baseline tracking
1952
- };
1953
- }
1954
-
1955
- // src/dedup.ts
1956
- import { createHash } from "crypto";
1957
- var DEFAULT_TTL_MS2 = 3e4;
1958
- var MAX_BODY_SIZE = 1048576;
1959
- function canonicalize(obj) {
1960
- if (obj === null || typeof obj !== "object") {
1961
- return obj;
1962
- }
1963
- if (Array.isArray(obj)) {
1964
- return obj.map(canonicalize);
1965
- }
1966
- const sorted = {};
1967
- for (const key of Object.keys(obj).sort()) {
1968
- sorted[key] = canonicalize(obj[key]);
1969
- }
1970
- return sorted;
1971
- }
1972
- var TIMESTAMP_PATTERN = /^\[\w{3}\s+\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}\s+\w+\]\s*/;
1973
- function stripTimestamps(obj) {
1974
- if (obj === null || typeof obj !== "object") {
1975
- return obj;
1976
- }
1977
- if (Array.isArray(obj)) {
1978
- return obj.map(stripTimestamps);
1979
- }
1980
- const result = {};
1981
- for (const [key, value] of Object.entries(obj)) {
1982
- if (key === "content" && typeof value === "string") {
1983
- result[key] = value.replace(TIMESTAMP_PATTERN, "");
1984
- } else {
1985
- result[key] = stripTimestamps(value);
1986
- }
1987
- }
1988
- return result;
1989
- }
1990
- var RequestDeduplicator = class {
1991
- inflight = /* @__PURE__ */ new Map();
1992
- completed = /* @__PURE__ */ new Map();
1993
- ttlMs;
1994
- constructor(ttlMs = DEFAULT_TTL_MS2) {
1995
- this.ttlMs = ttlMs;
1996
- }
1997
- /** Hash request body to create a dedup key. */
1998
- static hash(body) {
1999
- let content = body;
2000
- try {
2001
- const parsed = JSON.parse(body.toString());
2002
- const stripped = stripTimestamps(parsed);
2003
- const canonical = canonicalize(stripped);
2004
- content = Buffer.from(JSON.stringify(canonical));
2005
- } catch {
2006
- }
2007
- return createHash("sha256").update(content).digest("hex").slice(0, 16);
2008
- }
2009
- /** Check if a response is cached for this key. */
2010
- getCached(key) {
2011
- const entry = this.completed.get(key);
2012
- if (!entry) return void 0;
2013
- if (Date.now() - entry.completedAt > this.ttlMs) {
2014
- this.completed.delete(key);
2015
- return void 0;
2016
- }
2017
- return entry;
2018
- }
2019
- /** Check if a request with this key is currently in-flight. Returns a promise to wait on. */
2020
- getInflight(key) {
2021
- const entry = this.inflight.get(key);
2022
- if (!entry) return void 0;
2023
- return new Promise((resolve) => {
2024
- entry.resolvers.push(resolve);
2025
- });
2026
- }
2027
- /** Mark a request as in-flight. */
2028
- markInflight(key) {
2029
- this.inflight.set(key, {
2030
- resolvers: []
2031
- });
2032
- }
2033
- /** Complete an in-flight request — cache result and notify waiters. */
2034
- complete(key, result) {
2035
- if (result.body.length <= MAX_BODY_SIZE) {
2036
- this.completed.set(key, result);
2037
- }
2038
- const entry = this.inflight.get(key);
2039
- if (entry) {
2040
- for (const resolve of entry.resolvers) {
2041
- resolve(result);
2042
- }
2043
- this.inflight.delete(key);
2044
- }
2045
- this.prune();
2046
- }
2047
- /** Remove an in-flight entry on error (don't cache failures).
2048
- * Also rejects any waiters so they can retry independently. */
2049
- removeInflight(key) {
2050
- const entry = this.inflight.get(key);
2051
- if (entry) {
2052
- const errorBody = Buffer.from(
2053
- JSON.stringify({
2054
- error: { message: "Original request failed, please retry", type: "dedup_origin_failed" }
2055
- })
2056
- );
2057
- for (const resolve of entry.resolvers) {
2058
- resolve({
2059
- status: 503,
2060
- headers: { "content-type": "application/json" },
2061
- body: errorBody,
2062
- completedAt: Date.now()
2063
- });
2064
- }
2065
- this.inflight.delete(key);
2066
- }
2067
- }
2068
- /** Prune expired completed entries. */
2069
- prune() {
2070
- const now = Date.now();
2071
- for (const [key, entry] of this.completed) {
2072
- if (now - entry.completedAt > this.ttlMs) {
2073
- this.completed.delete(key);
2074
- }
2075
- }
2076
- }
2077
- };
2078
-
2079
- // src/response-cache.ts
2080
- import { createHash as createHash2 } from "crypto";
2081
- var DEFAULT_CONFIG = {
2082
- maxSize: 200,
2083
- defaultTTL: 600,
2084
- maxItemSize: 1048576,
2085
- // 1MB
2086
- enabled: true
2087
- };
2088
- function canonicalize2(obj) {
2089
- if (obj === null || typeof obj !== "object") {
2090
- return obj;
2091
- }
2092
- if (Array.isArray(obj)) {
2093
- return obj.map(canonicalize2);
2094
- }
2095
- const sorted = {};
2096
- for (const key of Object.keys(obj).sort()) {
2097
- sorted[key] = canonicalize2(obj[key]);
2098
- }
2099
- return sorted;
2100
- }
2101
- var TIMESTAMP_PATTERN2 = /^\[\w{3}\s+\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}\s+\w+\]\s*/;
2102
- function normalizeForCache(obj) {
2103
- const result = {};
2104
- for (const [key, value] of Object.entries(obj)) {
2105
- if (["stream", "user", "request_id", "x-request-id"].includes(key)) {
2106
- continue;
2107
- }
2108
- if (key === "messages" && Array.isArray(value)) {
2109
- result[key] = value.map((msg) => {
2110
- if (typeof msg === "object" && msg !== null) {
2111
- const m = msg;
2112
- if (typeof m.content === "string") {
2113
- return { ...m, content: m.content.replace(TIMESTAMP_PATTERN2, "") };
2114
- }
2115
- }
2116
- return msg;
2117
- });
2118
- } else {
2119
- result[key] = value;
2120
- }
2121
- }
2122
- return result;
2123
- }
2124
- var ResponseCache = class {
2125
- cache = /* @__PURE__ */ new Map();
2126
- expirationHeap = [];
2127
- config;
2128
- // Stats for monitoring
2129
- stats = {
2130
- hits: 0,
2131
- misses: 0,
2132
- evictions: 0
2133
- };
2134
- constructor(config = {}) {
2135
- const filtered = Object.fromEntries(
2136
- Object.entries(config).filter(([, v]) => v !== void 0)
2137
- );
2138
- this.config = { ...DEFAULT_CONFIG, ...filtered };
2139
- }
2140
- /**
2141
- * Generate cache key from request body.
2142
- * Hashes: model + messages + temperature + max_tokens + other params
2143
- */
2144
- static generateKey(body) {
2145
- try {
2146
- const parsed = JSON.parse(typeof body === "string" ? body : body.toString());
2147
- const normalized = normalizeForCache(parsed);
2148
- const canonical = canonicalize2(normalized);
2149
- const keyContent = JSON.stringify(canonical);
2150
- return createHash2("sha256").update(keyContent).digest("hex").slice(0, 32);
2151
- } catch {
2152
- const content = typeof body === "string" ? body : body.toString();
2153
- return createHash2("sha256").update(content).digest("hex").slice(0, 32);
2154
- }
2155
- }
2156
- /**
2157
- * Check if caching is enabled for this request.
2158
- * Respects cache control headers and request params.
2159
- */
2160
- shouldCache(body, headers) {
2161
- if (!this.config.enabled) return false;
2162
- if (headers?.["cache-control"]?.includes("no-cache")) {
2163
- return false;
2164
- }
2165
- try {
2166
- const parsed = JSON.parse(typeof body === "string" ? body : body.toString());
2167
- if (parsed.cache === false || parsed.no_cache === true) {
2168
- return false;
2169
- }
2170
- } catch {
2171
- }
2172
- return true;
2173
- }
2174
- /**
2175
- * Get cached response if available and not expired.
2176
- */
2177
- get(key) {
2178
- const entry = this.cache.get(key);
2179
- if (!entry) {
2180
- this.stats.misses++;
2181
- return void 0;
2182
- }
2183
- if (Date.now() > entry.expiresAt) {
2184
- this.cache.delete(key);
2185
- this.stats.misses++;
2186
- return void 0;
2187
- }
2188
- this.stats.hits++;
2189
- return entry;
2190
- }
2191
- /**
2192
- * Cache a response with optional custom TTL.
2193
- */
2194
- set(key, response, ttlSeconds) {
2195
- if (!this.config.enabled || this.config.maxSize <= 0) return;
2196
- if (response.body.length > this.config.maxItemSize) {
2197
- console.log(`[ResponseCache] Skipping cache - item too large: ${response.body.length} bytes`);
2198
- return;
2199
- }
2200
- if (response.status >= 400) {
2201
- return;
2202
- }
2203
- if (this.cache.size >= this.config.maxSize) {
2204
- this.evict();
2205
- }
2206
- const now = Date.now();
2207
- const ttl = ttlSeconds ?? this.config.defaultTTL;
2208
- const expiresAt = now + ttl * 1e3;
2209
- const entry = {
2210
- ...response,
2211
- cachedAt: now,
2212
- expiresAt
2213
- };
2214
- this.cache.set(key, entry);
2215
- this.expirationHeap.push({ expiresAt, key });
2216
- }
2217
- /**
2218
- * Evict expired and oldest entries to make room.
2219
- */
2220
- evict() {
2221
- const now = Date.now();
2222
- this.expirationHeap.sort((a, b) => a.expiresAt - b.expiresAt);
2223
- while (this.expirationHeap.length > 0) {
2224
- const oldest = this.expirationHeap[0];
2225
- const entry = this.cache.get(oldest.key);
2226
- if (!entry || entry.expiresAt !== oldest.expiresAt) {
2227
- this.expirationHeap.shift();
2228
- continue;
2229
- }
2230
- if (oldest.expiresAt <= now) {
2231
- this.cache.delete(oldest.key);
2232
- this.expirationHeap.shift();
2233
- this.stats.evictions++;
2234
- } else {
2235
- break;
2236
- }
2237
- }
2238
- while (this.cache.size >= this.config.maxSize && this.expirationHeap.length > 0) {
2239
- const oldest = this.expirationHeap.shift();
2240
- if (this.cache.has(oldest.key)) {
2241
- this.cache.delete(oldest.key);
2242
- this.stats.evictions++;
2243
- }
2244
- }
2245
- }
2246
- /**
2247
- * Get cache statistics.
2248
- */
2249
- getStats() {
2250
- const total = this.stats.hits + this.stats.misses;
2251
- const hitRate = total > 0 ? (this.stats.hits / total * 100).toFixed(1) + "%" : "0%";
2252
- return {
2253
- size: this.cache.size,
2254
- maxSize: this.config.maxSize,
2255
- hits: this.stats.hits,
2256
- misses: this.stats.misses,
2257
- evictions: this.stats.evictions,
2258
- hitRate
2259
- };
2260
- }
2261
- /**
2262
- * Clear all cached entries.
2263
- */
2264
- clear() {
2265
- this.cache.clear();
2266
- this.expirationHeap = [];
2267
- }
2268
- /**
2269
- * Check if cache is enabled.
2270
- */
2271
- isEnabled() {
2272
- return this.config.enabled;
2273
- }
2274
- };
2275
-
2276
- // src/balance.ts
2277
- import { createPublicClient, http, erc20Abi } from "viem";
2278
- import { base } from "viem/chains";
2279
-
2280
- // src/errors.ts
2281
- var RpcError = class extends Error {
2282
- code = "RPC_ERROR";
2283
- originalError;
2284
- constructor(message, originalError) {
2285
- super(`RPC error: ${message}. Check network connectivity.`);
2286
- this.name = "RpcError";
2287
- this.originalError = originalError;
2288
- }
2289
- };
2290
-
2291
- // src/balance.ts
2292
- var USDC_BASE2 = "0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913";
2293
- var CACHE_TTL_MS = 3e4;
2294
- var BALANCE_THRESHOLDS = {
2295
- /** Low balance warning threshold: $1.00 */
2296
- LOW_BALANCE_MICROS: 1000000n,
2297
- /** Effectively zero threshold: $0.0001 (covers dust/rounding) */
2298
- ZERO_THRESHOLD: 100n
2299
- };
2300
- var BalanceMonitor = class {
2301
- client;
2302
- walletAddress;
2303
- /** Cached balance (null = not yet fetched) */
2304
- cachedBalance = null;
2305
- /** Timestamp when cache was last updated */
2306
- cachedAt = 0;
2307
- constructor(walletAddress) {
2308
- this.walletAddress = walletAddress;
2309
- this.client = createPublicClient({
2310
- chain: base,
2311
- transport: http(void 0, {
2312
- timeout: 1e4
2313
- // 10 second timeout to prevent hanging on slow RPC
2314
- })
2315
- });
2316
- }
2317
- /**
2318
- * Check current USDC balance.
2319
- * Uses cache if valid, otherwise fetches from RPC.
2320
- */
2321
- async checkBalance() {
2322
- const now = Date.now();
2323
- if (this.cachedBalance !== null && now - this.cachedAt < CACHE_TTL_MS) {
2324
- return this.buildInfo(this.cachedBalance);
2325
- }
2326
- const balance = await this.fetchBalance();
2327
- this.cachedBalance = balance;
2328
- this.cachedAt = now;
2329
- return this.buildInfo(balance);
2330
- }
2331
- /**
2332
- * Check if balance is sufficient for an estimated cost.
2333
- *
2334
- * @param estimatedCostMicros - Estimated cost in USDC smallest unit (6 decimals)
2335
- */
2336
- async checkSufficient(estimatedCostMicros) {
2337
- const info = await this.checkBalance();
2338
- if (info.balance >= estimatedCostMicros) {
2339
- return { sufficient: true, info };
2340
- }
2341
- const shortfall = estimatedCostMicros - info.balance;
2342
- return {
2343
- sufficient: false,
2344
- info,
2345
- shortfall: this.formatUSDC(shortfall)
2346
- };
2347
- }
2348
- /**
2349
- * Optimistically deduct estimated cost from cached balance.
2350
- * Call this after a successful payment to keep cache accurate.
2351
- *
2352
- * @param amountMicros - Amount to deduct in USDC smallest unit
2353
- */
2354
- deductEstimated(amountMicros) {
2355
- if (this.cachedBalance !== null && this.cachedBalance >= amountMicros) {
2356
- this.cachedBalance -= amountMicros;
2357
- }
2358
- }
2359
- /**
2360
- * Invalidate cache, forcing next checkBalance() to fetch from RPC.
2361
- * Call this after a payment failure to get accurate balance.
2362
- */
2363
- invalidate() {
2364
- this.cachedBalance = null;
2365
- this.cachedAt = 0;
2366
- }
2367
- /**
2368
- * Force refresh balance from RPC (ignores cache).
2369
- */
2370
- async refresh() {
2371
- this.invalidate();
2372
- return this.checkBalance();
2373
- }
2374
- /**
2375
- * Format USDC amount (in micros) as "$X.XX".
2376
- */
2377
- formatUSDC(amountMicros) {
2378
- const dollars = Number(amountMicros) / 1e6;
2379
- return `$${dollars.toFixed(2)}`;
2380
- }
2381
- /**
2382
- * Get the wallet address being monitored.
2383
- */
2384
- getWalletAddress() {
2385
- return this.walletAddress;
2386
- }
2387
- /** Fetch balance from RPC */
2388
- async fetchBalance() {
2389
- try {
2390
- const balance = await this.client.readContract({
2391
- address: USDC_BASE2,
2392
- abi: erc20Abi,
2393
- functionName: "balanceOf",
2394
- args: [this.walletAddress]
2395
- });
2396
- return balance;
2397
- } catch (error) {
2398
- throw new RpcError(error instanceof Error ? error.message : "Unknown error", error);
2399
- }
2400
- }
2401
- /** Build BalanceInfo from raw balance */
2402
- buildInfo(balance) {
2403
- return {
2404
- balance,
2405
- balanceUSD: this.formatUSDC(balance),
2406
- isLow: balance < BALANCE_THRESHOLDS.LOW_BALANCE_MICROS,
2407
- isEmpty: balance < BALANCE_THRESHOLDS.ZERO_THRESHOLD,
2408
- walletAddress: this.walletAddress
2409
- };
2410
- }
2411
- };
2412
-
2413
- // src/compression/types.ts
2414
- var DEFAULT_COMPRESSION_CONFIG = {
2415
- enabled: true,
2416
- preserveRaw: true,
2417
- layers: {
2418
- deduplication: true,
2419
- // Safe: removes duplicate messages
2420
- whitespace: true,
2421
- // Safe: normalizes whitespace
2422
- dictionary: false,
2423
- // DISABLED: requires model to understand codebook
2424
- paths: false,
2425
- // DISABLED: requires model to understand path codes
2426
- jsonCompact: true,
2427
- // Safe: just removes JSON whitespace
2428
- observation: false,
2429
- // DISABLED: may lose important context
2430
- dynamicCodebook: false
2431
- // DISABLED: requires model to understand codes
2432
- },
2433
- dictionary: {
2434
- maxEntries: 50,
2435
- minPhraseLength: 15,
2436
- includeCodebookHeader: false
2437
- // No codebook header needed
2438
- }
2439
- };
2440
-
2441
- // src/compression/layers/deduplication.ts
2442
- import crypto2 from "crypto";
2443
- function hashMessage(message) {
2444
- const parts = [
2445
- message.role,
2446
- message.content || "",
2447
- message.tool_call_id || "",
2448
- message.name || ""
2449
- ];
2450
- if (message.tool_calls) {
2451
- parts.push(
2452
- JSON.stringify(
2453
- message.tool_calls.map((tc) => ({
2454
- name: tc.function.name,
2455
- args: tc.function.arguments
2456
- }))
2457
- )
2458
- );
2459
- }
2460
- const content = parts.join("|");
2461
- return crypto2.createHash("md5").update(content).digest("hex");
2462
- }
2463
- function deduplicateMessages(messages) {
2464
- const seen = /* @__PURE__ */ new Set();
2465
- const result = [];
2466
- let duplicatesRemoved = 0;
2467
- const referencedToolCallIds = /* @__PURE__ */ new Set();
2468
- for (const message of messages) {
2469
- if (message.role === "tool" && message.tool_call_id) {
2470
- referencedToolCallIds.add(message.tool_call_id);
2471
- }
2472
- }
2473
- for (const message of messages) {
2474
- if (message.role === "system") {
2475
- result.push(message);
2476
- continue;
2477
- }
2478
- if (message.role === "user") {
2479
- result.push(message);
2480
- continue;
2481
- }
2482
- if (message.role === "tool") {
2483
- result.push(message);
2484
- continue;
2485
- }
2486
- if (message.role === "assistant" && message.tool_calls) {
2487
- const hasReferencedToolCall = message.tool_calls.some(
2488
- (tc) => referencedToolCallIds.has(tc.id)
2489
- );
2490
- if (hasReferencedToolCall) {
2491
- result.push(message);
2492
- continue;
2493
- }
2494
- }
2495
- const hash = hashMessage(message);
2496
- if (!seen.has(hash)) {
2497
- seen.add(hash);
2498
- result.push(message);
2499
- } else {
2500
- duplicatesRemoved++;
2501
- }
2502
- }
2503
- return {
2504
- messages: result,
2505
- duplicatesRemoved,
2506
- originalCount: messages.length
2507
- };
2508
- }
2509
-
2510
- // src/compression/layers/whitespace.ts
2511
- function normalizeWhitespace(content) {
2512
- if (!content) return content;
2513
- return content.replace(/\r\n/g, "\n").replace(/\r/g, "\n").replace(/\n{3,}/g, "\n\n").replace(/[ \t]+$/gm, "").replace(/([^\n]) {2,}/g, "$1 ").replace(/^[ ]{8,}/gm, (match) => " ".repeat(Math.ceil(match.length / 4))).replace(/\t/g, " ").trim();
2514
- }
2515
- function normalizeMessagesWhitespace(messages) {
2516
- let charsSaved = 0;
2517
- const result = messages.map((message) => {
2518
- if (!message.content) return message;
2519
- const originalLength = message.content.length;
2520
- const normalizedContent = normalizeWhitespace(message.content);
2521
- charsSaved += originalLength - normalizedContent.length;
2522
- return {
2523
- ...message,
2524
- content: normalizedContent
2525
- };
2526
- });
2527
- return {
2528
- messages: result,
2529
- charsSaved
2530
- };
2531
- }
2532
-
2533
- // src/compression/codebook.ts
2534
- var STATIC_CODEBOOK = {
2535
- // High-impact: OpenClaw/Agent system prompt patterns (very common)
2536
- $OC01: "unbrowse_",
2537
- // Common prefix in tool names
2538
- $OC02: "<location>",
2539
- $OC03: "</location>",
2540
- $OC04: "<name>",
2541
- $OC05: "</name>",
2542
- $OC06: "<description>",
2543
- $OC07: "</description>",
2544
- $OC08: "(may need login)",
2545
- $OC09: "API skill for OpenClaw",
2546
- $OC10: "endpoints",
2547
- // Skill/tool markers
2548
- $SK01: "<available_skills>",
2549
- $SK02: "</available_skills>",
2550
- $SK03: "<skill>",
2551
- $SK04: "</skill>",
2552
- // Schema patterns (very common in tool definitions)
2553
- $T01: 'type: "function"',
2554
- $T02: '"type": "function"',
2555
- $T03: '"type": "string"',
2556
- $T04: '"type": "object"',
2557
- $T05: '"type": "array"',
2558
- $T06: '"type": "boolean"',
2559
- $T07: '"type": "number"',
2560
- // Common descriptions
2561
- $D01: "description:",
2562
- $D02: '"description":',
2563
- // Common instructions
2564
- $I01: "You are a personal assistant",
2565
- $I02: "Tool names are case-sensitive",
2566
- $I03: "Call tools exactly as listed",
2567
- $I04: "Use when",
2568
- $I05: "without asking",
2569
- // Safety phrases
2570
- $S01: "Do not manipulate or persuade",
2571
- $S02: "Prioritize safety and human oversight",
2572
- $S03: "unless explicitly requested",
2573
- // JSON patterns
2574
- $J01: '"required": ["',
2575
- $J02: '"properties": {',
2576
- $J03: '"additionalProperties": false',
2577
- // Heartbeat patterns
2578
- $H01: "HEARTBEAT_OK",
2579
- $H02: "Read HEARTBEAT.md if it exists",
2580
- // Role markers
2581
- $R01: '"role": "system"',
2582
- $R02: '"role": "user"',
2583
- $R03: '"role": "assistant"',
2584
- $R04: '"role": "tool"',
2585
- // Common endings/phrases
2586
- $E01: "would you like to",
2587
- $E02: "Let me know if you",
2588
- $E03: "internal APIs",
2589
- $E04: "session cookies",
2590
- // BlockRun model aliases (common in prompts)
2591
- $M01: "blockrun/",
2592
- $M02: "openai/",
2593
- $M03: "anthropic/",
2594
- $M04: "google/",
2595
- $M05: "xai/"
2596
- };
2597
- function getInverseCodebook() {
2598
- const inverse = {};
2599
- for (const [code, phrase] of Object.entries(STATIC_CODEBOOK)) {
2600
- inverse[phrase] = code;
2601
- }
2602
- return inverse;
2603
- }
2604
- function generateCodebookHeader(usedCodes, pathMap = {}) {
2605
- if (usedCodes.size === 0 && Object.keys(pathMap).length === 0) {
2606
- return "";
2607
- }
2608
- const parts = [];
2609
- if (usedCodes.size > 0) {
2610
- const codeEntries = Array.from(usedCodes).map((code) => `${code}=${STATIC_CODEBOOK[code]}`).join(", ");
2611
- parts.push(`[Dict: ${codeEntries}]`);
2612
- }
2613
- if (Object.keys(pathMap).length > 0) {
2614
- const pathEntries = Object.entries(pathMap).map(([code, path]) => `${code}=${path}`).join(", ");
2615
- parts.push(`[Paths: ${pathEntries}]`);
2616
- }
2617
- return parts.join("\n");
2618
- }
2619
-
2620
- // src/compression/layers/dictionary.ts
2621
- function encodeContent(content, inverseCodebook) {
2622
- let encoded = content;
2623
- let substitutions = 0;
2624
- let charsSaved = 0;
2625
- const codes = /* @__PURE__ */ new Set();
2626
- const phrases = Object.keys(inverseCodebook).sort((a, b) => b.length - a.length);
2627
- for (const phrase of phrases) {
2628
- const code = inverseCodebook[phrase];
2629
- const regex = new RegExp(escapeRegex(phrase), "g");
2630
- const matches = encoded.match(regex);
2631
- if (matches && matches.length > 0) {
2632
- encoded = encoded.replace(regex, code);
2633
- substitutions += matches.length;
2634
- charsSaved += matches.length * (phrase.length - code.length);
2635
- codes.add(code);
2636
- }
2637
- }
2638
- return { encoded, substitutions, codes, charsSaved };
2639
- }
2640
- function escapeRegex(str) {
2641
- return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
2642
- }
2643
- function encodeMessages(messages) {
2644
- const inverseCodebook = getInverseCodebook();
2645
- let totalSubstitutions = 0;
2646
- let totalCharsSaved = 0;
2647
- const allUsedCodes = /* @__PURE__ */ new Set();
2648
- const result = messages.map((message) => {
2649
- if (!message.content) return message;
2650
- const { encoded, substitutions, codes, charsSaved } = encodeContent(
2651
- message.content,
2652
- inverseCodebook
2653
- );
2654
- totalSubstitutions += substitutions;
2655
- totalCharsSaved += charsSaved;
2656
- codes.forEach((code) => allUsedCodes.add(code));
2657
- return {
2658
- ...message,
2659
- content: encoded
2660
- };
2661
- });
2662
- return {
2663
- messages: result,
2664
- substitutionCount: totalSubstitutions,
2665
- usedCodes: allUsedCodes,
2666
- charsSaved: totalCharsSaved
2667
- };
2668
- }
2669
-
2670
- // src/compression/layers/paths.ts
2671
- var PATH_REGEX = /(?:\/[\w.-]+){3,}/g;
2672
- function extractPaths(messages) {
2673
- const paths = [];
2674
- for (const message of messages) {
2675
- if (!message.content) continue;
2676
- const matches = message.content.match(PATH_REGEX);
2677
- if (matches) {
2678
- paths.push(...matches);
2679
- }
2680
- }
2681
- return paths;
2682
- }
2683
- function findFrequentPrefixes(paths) {
2684
- const prefixCounts = /* @__PURE__ */ new Map();
2685
- for (const path of paths) {
2686
- const parts = path.split("/").filter(Boolean);
2687
- for (let i = 2; i < parts.length; i++) {
2688
- const prefix = "/" + parts.slice(0, i).join("/") + "/";
2689
- prefixCounts.set(prefix, (prefixCounts.get(prefix) || 0) + 1);
2690
- }
2691
- }
2692
- return Array.from(prefixCounts.entries()).filter(([, count]) => count >= 3).sort((a, b) => b[0].length - a[0].length).slice(0, 5).map(([prefix]) => prefix);
2693
- }
2694
- function shortenPaths(messages) {
2695
- const allPaths = extractPaths(messages);
2696
- if (allPaths.length < 5) {
2697
- return {
2698
- messages,
2699
- pathMap: {},
2700
- charsSaved: 0
2701
- };
2702
- }
2703
- const prefixes = findFrequentPrefixes(allPaths);
2704
- if (prefixes.length === 0) {
2705
- return {
2706
- messages,
2707
- pathMap: {},
2708
- charsSaved: 0
2709
- };
2710
- }
2711
- const pathMap = {};
2712
- prefixes.forEach((prefix, i) => {
2713
- pathMap[`$P${i + 1}`] = prefix;
2714
- });
2715
- let charsSaved = 0;
2716
- const result = messages.map((message) => {
2717
- if (!message.content) return message;
2718
- let content = message.content;
2719
- const originalLength = content.length;
2720
- for (const [code, prefix] of Object.entries(pathMap)) {
2721
- content = content.split(prefix).join(code + "/");
2722
- }
2723
- charsSaved += originalLength - content.length;
2724
- return {
2725
- ...message,
2726
- content
2727
- };
2728
- });
2729
- return {
2730
- messages: result,
2731
- pathMap,
2732
- charsSaved
2733
- };
2734
- }
2735
-
2736
- // src/compression/layers/json-compact.ts
2737
- function compactJson(jsonString) {
2738
- try {
2739
- const parsed = JSON.parse(jsonString);
2740
- return JSON.stringify(parsed);
2741
- } catch {
2742
- return jsonString;
2743
- }
2744
- }
2745
- function looksLikeJson(str) {
2746
- const trimmed = str.trim();
2747
- return trimmed.startsWith("{") && trimmed.endsWith("}") || trimmed.startsWith("[") && trimmed.endsWith("]");
2748
- }
2749
- function compactToolCalls(toolCalls) {
2750
- return toolCalls.map((tc) => ({
2751
- ...tc,
2752
- function: {
2753
- ...tc.function,
2754
- arguments: compactJson(tc.function.arguments)
2755
- }
2756
- }));
2757
- }
2758
- function compactMessagesJson(messages) {
2759
- let charsSaved = 0;
2760
- const result = messages.map((message) => {
2761
- const newMessage = { ...message };
2762
- if (message.tool_calls && message.tool_calls.length > 0) {
2763
- const originalLength = JSON.stringify(message.tool_calls).length;
2764
- newMessage.tool_calls = compactToolCalls(message.tool_calls);
2765
- const newLength = JSON.stringify(newMessage.tool_calls).length;
2766
- charsSaved += originalLength - newLength;
2767
- }
2768
- if (message.role === "tool" && message.content && looksLikeJson(message.content)) {
2769
- const originalLength = message.content.length;
2770
- const compacted = compactJson(message.content);
2771
- charsSaved += originalLength - compacted.length;
2772
- newMessage.content = compacted;
2773
- }
2774
- return newMessage;
2775
- });
2776
- return {
2777
- messages: result,
2778
- charsSaved
2779
- };
2780
- }
2781
-
2782
- // src/compression/layers/observation.ts
2783
- var TOOL_RESULT_THRESHOLD = 500;
2784
- var COMPRESSED_RESULT_MAX = 300;
2785
- function compressToolResult(content) {
2786
- if (!content || content.length <= TOOL_RESULT_THRESHOLD) {
2787
- return content;
2788
- }
2789
- const lines = content.split("\n").map((l) => l.trim()).filter(Boolean);
2790
- const errorLines = lines.filter(
2791
- (l) => /error|exception|failed|denied|refused|timeout|invalid/i.test(l) && l.length < 200
2792
- );
2793
- const statusLines = lines.filter(
2794
- (l) => /success|complete|created|updated|found|result|status|total|count/i.test(l) && l.length < 150
2795
- );
2796
- const jsonMatches = [];
2797
- const jsonPattern = /"(id|name|status|error|message|count|total|url|path)":\s*"?([^",}\n]+)"?/gi;
2798
- let match;
2799
- while ((match = jsonPattern.exec(content)) !== null) {
2800
- jsonMatches.push(`${match[1]}: ${match[2].slice(0, 50)}`);
2801
- }
2802
- const firstLine = lines[0]?.slice(0, 100);
2803
- const lastLine = lines.length > 1 ? lines[lines.length - 1]?.slice(0, 100) : "";
2804
- const parts = [];
2805
- if (errorLines.length > 0) {
2806
- parts.push("[ERR] " + errorLines.slice(0, 3).join(" | "));
2807
- }
2808
- if (statusLines.length > 0) {
2809
- parts.push(statusLines.slice(0, 3).join(" | "));
2810
- }
2811
- if (jsonMatches.length > 0) {
2812
- parts.push(jsonMatches.slice(0, 5).join(", "));
2813
- }
2814
- if (parts.length === 0) {
2815
- parts.push(firstLine || "");
2816
- if (lines.length > 2) {
2817
- parts.push(`[...${lines.length - 2} lines...]`);
2818
- }
2819
- if (lastLine && lastLine !== firstLine) {
2820
- parts.push(lastLine);
2821
- }
2822
- }
2823
- let result = parts.join("\n");
2824
- if (result.length > COMPRESSED_RESULT_MAX) {
2825
- result = result.slice(0, COMPRESSED_RESULT_MAX - 20) + "\n[...truncated]";
2826
- }
2827
- return result;
2828
- }
2829
- function deduplicateLargeBlocks(messages) {
2830
- const blockHashes = /* @__PURE__ */ new Map();
2831
- let charsSaved = 0;
2832
- const result = messages.map((msg, idx) => {
2833
- if (!msg.content || msg.content.length < 500) {
2834
- return msg;
2835
- }
2836
- const blockKey = msg.content.slice(0, 200);
2837
- if (blockHashes.has(blockKey)) {
2838
- const firstIdx = blockHashes.get(blockKey);
2839
- const original = msg.content;
2840
- const compressed = `[See message #${firstIdx + 1} - same content]`;
2841
- charsSaved += original.length - compressed.length;
2842
- return { ...msg, content: compressed };
2843
- }
2844
- blockHashes.set(blockKey, idx);
2845
- return msg;
2846
- });
2847
- return { messages: result, charsSaved };
2848
- }
2849
- function compressObservations(messages) {
2850
- let charsSaved = 0;
2851
- let observationsCompressed = 0;
2852
- let result = messages.map((msg) => {
2853
- if (msg.role !== "tool" || !msg.content) {
2854
- return msg;
2855
- }
2856
- const original = msg.content;
2857
- if (original.length <= TOOL_RESULT_THRESHOLD) {
2858
- return msg;
2859
- }
2860
- const compressed = compressToolResult(original);
2861
- const saved = original.length - compressed.length;
2862
- if (saved > 50) {
2863
- charsSaved += saved;
2864
- observationsCompressed++;
2865
- return { ...msg, content: compressed };
2866
- }
2867
- return msg;
2868
- });
2869
- const dedupResult = deduplicateLargeBlocks(result);
2870
- result = dedupResult.messages;
2871
- charsSaved += dedupResult.charsSaved;
2872
- return {
2873
- messages: result,
2874
- charsSaved,
2875
- observationsCompressed
2876
- };
2877
- }
2878
-
2879
- // src/compression/layers/dynamic-codebook.ts
2880
- var MIN_PHRASE_LENGTH = 20;
2881
- var MAX_PHRASE_LENGTH = 200;
2882
- var MIN_FREQUENCY = 3;
2883
- var MAX_ENTRIES = 100;
2884
- var CODE_PREFIX = "$D";
2885
- function findRepeatedPhrases(allContent) {
2886
- const phrases = /* @__PURE__ */ new Map();
2887
- const segments = allContent.split(/(?<=[.!?\n])\s+/);
2888
- for (const segment of segments) {
2889
- const trimmed = segment.trim();
2890
- if (trimmed.length >= MIN_PHRASE_LENGTH && trimmed.length <= MAX_PHRASE_LENGTH) {
2891
- phrases.set(trimmed, (phrases.get(trimmed) || 0) + 1);
2892
- }
2893
- }
2894
- const lines = allContent.split("\n");
2895
- for (const line of lines) {
2896
- const trimmed = line.trim();
2897
- if (trimmed.length >= MIN_PHRASE_LENGTH && trimmed.length <= MAX_PHRASE_LENGTH) {
2898
- phrases.set(trimmed, (phrases.get(trimmed) || 0) + 1);
2899
- }
2900
- }
2901
- return phrases;
2902
- }
2903
- function buildDynamicCodebook(messages) {
2904
- let allContent = "";
2905
- for (const msg of messages) {
2906
- if (msg.content) {
2907
- allContent += msg.content + "\n";
2908
- }
2909
- }
2910
- const phrases = findRepeatedPhrases(allContent);
2911
- const candidates = [];
2912
- for (const [phrase, count] of phrases.entries()) {
2913
- if (count >= MIN_FREQUENCY) {
2914
- const codeLength = 4;
2915
- const savings = (phrase.length - codeLength) * count;
2916
- if (savings > 50) {
2917
- candidates.push({ phrase, count, savings });
2918
- }
2919
- }
2920
- }
2921
- candidates.sort((a, b) => b.savings - a.savings);
2922
- const topCandidates = candidates.slice(0, MAX_ENTRIES);
2923
- const codebook = {};
2924
- topCandidates.forEach((c, i) => {
2925
- const code = `${CODE_PREFIX}${String(i + 1).padStart(2, "0")}`;
2926
- codebook[code] = c.phrase;
2927
- });
2928
- return codebook;
2929
- }
2930
- function escapeRegex2(str) {
2931
- return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
2932
- }
2933
- function applyDynamicCodebook(messages) {
2934
- const codebook = buildDynamicCodebook(messages);
2935
- if (Object.keys(codebook).length === 0) {
2936
- return {
2937
- messages,
2938
- charsSaved: 0,
2939
- dynamicCodes: {},
2940
- substitutions: 0
2941
- };
2942
- }
2943
- const phraseToCode = {};
2944
- for (const [code, phrase] of Object.entries(codebook)) {
2945
- phraseToCode[phrase] = code;
2946
- }
2947
- const sortedPhrases = Object.keys(phraseToCode).sort((a, b) => b.length - a.length);
2948
- let charsSaved = 0;
2949
- let substitutions = 0;
2950
- const result = messages.map((msg) => {
2951
- if (!msg.content) return msg;
2952
- let content = msg.content;
2953
- for (const phrase of sortedPhrases) {
2954
- const code = phraseToCode[phrase];
2955
- const regex = new RegExp(escapeRegex2(phrase), "g");
2956
- const matches = content.match(regex);
2957
- if (matches) {
2958
- content = content.replace(regex, code);
2959
- charsSaved += (phrase.length - code.length) * matches.length;
2960
- substitutions += matches.length;
2961
- }
2962
- }
2963
- return { ...msg, content };
2964
- });
2965
- return {
2966
- messages: result,
2967
- charsSaved,
2968
- dynamicCodes: codebook,
2969
- substitutions
2970
- };
2971
- }
2972
- function generateDynamicCodebookHeader(codebook) {
2973
- if (Object.keys(codebook).length === 0) return "";
2974
- const entries = Object.entries(codebook).slice(0, 20).map(([code, phrase]) => {
2975
- const displayPhrase = phrase.length > 40 ? phrase.slice(0, 37) + "..." : phrase;
2976
- return `${code}=${displayPhrase}`;
2977
- }).join(", ");
2978
- return `[DynDict: ${entries}]`;
2979
- }
2980
-
2981
- // src/compression/index.ts
2982
- function calculateTotalChars(messages) {
2983
- return messages.reduce((total, msg) => {
2984
- let chars = msg.content?.length || 0;
2985
- if (msg.tool_calls) {
2986
- chars += JSON.stringify(msg.tool_calls).length;
2987
- }
2988
- return total + chars;
2989
- }, 0);
2990
- }
2991
- function cloneMessages(messages) {
2992
- return JSON.parse(JSON.stringify(messages));
2993
- }
2994
- function prependCodebookHeader(messages, usedCodes, pathMap) {
2995
- const header = generateCodebookHeader(usedCodes, pathMap);
2996
- if (!header) return messages;
2997
- const userIndex = messages.findIndex((m) => m.role === "user");
2998
- if (userIndex === -1) {
2999
- return [{ role: "system", content: header }, ...messages];
3000
- }
3001
- return messages.map((msg, i) => {
3002
- if (i === userIndex) {
3003
- return {
3004
- ...msg,
3005
- content: `${header}
3006
-
3007
- ${msg.content || ""}`
3008
- };
3009
- }
3010
- return msg;
3011
- });
3012
- }
3013
- async function compressContext(messages, config = {}) {
3014
- const fullConfig = {
3015
- ...DEFAULT_COMPRESSION_CONFIG,
3016
- ...config,
3017
- layers: {
3018
- ...DEFAULT_COMPRESSION_CONFIG.layers,
3019
- ...config.layers
3020
- },
3021
- dictionary: {
3022
- ...DEFAULT_COMPRESSION_CONFIG.dictionary,
3023
- ...config.dictionary
3024
- }
3025
- };
3026
- if (!fullConfig.enabled) {
3027
- const originalChars2 = calculateTotalChars(messages);
3028
- return {
3029
- messages,
3030
- originalMessages: messages,
3031
- originalChars: originalChars2,
3032
- compressedChars: originalChars2,
3033
- compressionRatio: 1,
3034
- stats: {
3035
- duplicatesRemoved: 0,
3036
- whitespaceSavedChars: 0,
3037
- dictionarySubstitutions: 0,
3038
- pathsShortened: 0,
3039
- jsonCompactedChars: 0,
3040
- observationsCompressed: 0,
3041
- observationCharsSaved: 0,
3042
- dynamicSubstitutions: 0,
3043
- dynamicCharsSaved: 0
3044
- },
3045
- codebook: {},
3046
- pathMap: {},
3047
- dynamicCodes: {}
3048
- };
3049
- }
3050
- const originalMessages = fullConfig.preserveRaw ? cloneMessages(messages) : messages;
3051
- const originalChars = calculateTotalChars(messages);
3052
- const stats = {
3053
- duplicatesRemoved: 0,
3054
- whitespaceSavedChars: 0,
3055
- dictionarySubstitutions: 0,
3056
- pathsShortened: 0,
3057
- jsonCompactedChars: 0,
3058
- observationsCompressed: 0,
3059
- observationCharsSaved: 0,
3060
- dynamicSubstitutions: 0,
3061
- dynamicCharsSaved: 0
3062
- };
3063
- let result = cloneMessages(messages);
3064
- let usedCodes = /* @__PURE__ */ new Set();
3065
- let pathMap = {};
3066
- let dynamicCodes = {};
3067
- if (fullConfig.layers.deduplication) {
3068
- const dedupResult = deduplicateMessages(result);
3069
- result = dedupResult.messages;
3070
- stats.duplicatesRemoved = dedupResult.duplicatesRemoved;
3071
- }
3072
- if (fullConfig.layers.whitespace) {
3073
- const wsResult = normalizeMessagesWhitespace(result);
3074
- result = wsResult.messages;
3075
- stats.whitespaceSavedChars = wsResult.charsSaved;
3076
- }
3077
- if (fullConfig.layers.dictionary) {
3078
- const dictResult = encodeMessages(result);
3079
- result = dictResult.messages;
3080
- stats.dictionarySubstitutions = dictResult.substitutionCount;
3081
- usedCodes = dictResult.usedCodes;
3082
- }
3083
- if (fullConfig.layers.paths) {
3084
- const pathResult = shortenPaths(result);
3085
- result = pathResult.messages;
3086
- pathMap = pathResult.pathMap;
3087
- stats.pathsShortened = Object.keys(pathMap).length;
3088
- }
3089
- if (fullConfig.layers.jsonCompact) {
3090
- const jsonResult = compactMessagesJson(result);
3091
- result = jsonResult.messages;
3092
- stats.jsonCompactedChars = jsonResult.charsSaved;
3093
- }
3094
- if (fullConfig.layers.observation) {
3095
- const obsResult = compressObservations(result);
3096
- result = obsResult.messages;
3097
- stats.observationsCompressed = obsResult.observationsCompressed;
3098
- stats.observationCharsSaved = obsResult.charsSaved;
3099
- }
3100
- if (fullConfig.layers.dynamicCodebook) {
3101
- const dynResult = applyDynamicCodebook(result);
3102
- result = dynResult.messages;
3103
- stats.dynamicSubstitutions = dynResult.substitutions;
3104
- stats.dynamicCharsSaved = dynResult.charsSaved;
3105
- dynamicCodes = dynResult.dynamicCodes;
3106
- }
3107
- if (fullConfig.dictionary.includeCodebookHeader && (usedCodes.size > 0 || Object.keys(pathMap).length > 0 || Object.keys(dynamicCodes).length > 0)) {
3108
- result = prependCodebookHeader(result, usedCodes, pathMap);
3109
- if (Object.keys(dynamicCodes).length > 0) {
3110
- const dynHeader = generateDynamicCodebookHeader(dynamicCodes);
3111
- if (dynHeader) {
3112
- const systemIndex = result.findIndex((m) => m.role === "system");
3113
- if (systemIndex >= 0) {
3114
- result[systemIndex] = {
3115
- ...result[systemIndex],
3116
- content: `${dynHeader}
3117
- ${result[systemIndex].content || ""}`
3118
- };
3119
- }
3120
- }
3121
- }
3122
- }
3123
- const compressedChars = calculateTotalChars(result);
3124
- const compressionRatio = compressedChars / originalChars;
3125
- const usedCodebook = {};
3126
- usedCodes.forEach((code) => {
3127
- usedCodebook[code] = STATIC_CODEBOOK[code];
3128
- });
3129
- return {
3130
- messages: result,
3131
- originalMessages,
3132
- originalChars,
3133
- compressedChars,
3134
- compressionRatio,
3135
- stats,
3136
- codebook: usedCodebook,
3137
- pathMap,
3138
- dynamicCodes
3139
- };
3140
- }
3141
- function shouldCompress(messages) {
3142
- const chars = calculateTotalChars(messages);
3143
- return chars > 5e3;
3144
- }
3145
-
3146
- // src/session.ts
3147
- var DEFAULT_SESSION_CONFIG = {
3148
- enabled: false,
3149
- timeoutMs: 30 * 60 * 1e3,
3150
- // 30 minutes
3151
- headerName: "x-session-id"
3152
- };
3153
- var SessionStore = class {
3154
- sessions = /* @__PURE__ */ new Map();
3155
- config;
3156
- cleanupInterval = null;
3157
- constructor(config = {}) {
3158
- this.config = { ...DEFAULT_SESSION_CONFIG, ...config };
3159
- if (this.config.enabled) {
3160
- this.cleanupInterval = setInterval(() => this.cleanup(), 5 * 60 * 1e3);
3161
- }
3162
- }
3163
- /**
3164
- * Get the pinned model for a session, if any.
3165
- */
3166
- getSession(sessionId) {
3167
- if (!this.config.enabled || !sessionId) {
3168
- return void 0;
3169
- }
3170
- const entry = this.sessions.get(sessionId);
3171
- if (!entry) {
3172
- return void 0;
3173
- }
3174
- const now = Date.now();
3175
- if (now - entry.lastUsedAt > this.config.timeoutMs) {
3176
- this.sessions.delete(sessionId);
3177
- return void 0;
3178
- }
3179
- return entry;
3180
- }
3181
- /**
3182
- * Pin a model to a session.
3183
- */
3184
- setSession(sessionId, model, tier) {
3185
- if (!this.config.enabled || !sessionId) {
3186
- return;
3187
- }
3188
- const existing = this.sessions.get(sessionId);
3189
- const now = Date.now();
3190
- if (existing) {
3191
- existing.lastUsedAt = now;
3192
- existing.requestCount++;
3193
- if (existing.model !== model) {
3194
- existing.model = model;
3195
- existing.tier = tier;
3196
- }
3197
- } else {
3198
- this.sessions.set(sessionId, {
3199
- model,
3200
- tier,
3201
- createdAt: now,
3202
- lastUsedAt: now,
3203
- requestCount: 1
3204
- });
3205
- }
3206
- }
3207
- /**
3208
- * Touch a session to extend its timeout.
3209
- */
3210
- touchSession(sessionId) {
3211
- if (!this.config.enabled || !sessionId) {
3212
- return;
3213
- }
3214
- const entry = this.sessions.get(sessionId);
3215
- if (entry) {
3216
- entry.lastUsedAt = Date.now();
3217
- entry.requestCount++;
3218
- }
3219
- }
3220
- /**
3221
- * Clear a specific session.
3222
- */
3223
- clearSession(sessionId) {
3224
- this.sessions.delete(sessionId);
3225
- }
3226
- /**
3227
- * Clear all sessions.
3228
- */
3229
- clearAll() {
3230
- this.sessions.clear();
3231
- }
3232
- /**
3233
- * Get session stats for debugging.
3234
- */
3235
- getStats() {
3236
- const now = Date.now();
3237
- const sessions = Array.from(this.sessions.entries()).map(([id, entry]) => ({
3238
- id: id.slice(0, 8) + "...",
3239
- model: entry.model,
3240
- age: Math.round((now - entry.createdAt) / 1e3)
3241
- }));
3242
- return { count: this.sessions.size, sessions };
3243
- }
3244
- /**
3245
- * Clean up expired sessions.
3246
- */
3247
- cleanup() {
3248
- const now = Date.now();
3249
- for (const [id, entry] of this.sessions) {
3250
- if (now - entry.lastUsedAt > this.config.timeoutMs) {
3251
- this.sessions.delete(id);
3252
- }
3253
- }
3254
- }
3255
- /**
3256
- * Stop the cleanup interval.
3257
- */
3258
- close() {
3259
- if (this.cleanupInterval) {
3260
- clearInterval(this.cleanupInterval);
3261
- this.cleanupInterval = null;
3262
- }
3263
- }
3264
- };
3265
- function getSessionId(headers, headerName = DEFAULT_SESSION_CONFIG.headerName) {
3266
- const value = headers[headerName] || headers[headerName.toLowerCase()];
3267
- if (typeof value === "string" && value.length > 0) {
3268
- return value;
3269
- }
3270
- if (Array.isArray(value) && value.length > 0) {
3271
- return value[0];
3272
- }
3273
- return void 0;
3274
- }
3275
-
3276
- // src/updater.ts
3277
- var NPM_REGISTRY = "https://registry.npmjs.org/@blockrun/clawrouter/latest";
3278
- var UPDATE_URL = "https://blockrun.ai/ClawRouter-update";
3279
- var CHECK_TIMEOUT_MS = 5e3;
3280
- function compareSemver(a, b) {
3281
- const pa = a.split(".").map(Number);
3282
- const pb = b.split(".").map(Number);
3283
- for (let i = 0; i < 3; i++) {
3284
- if ((pa[i] || 0) > (pb[i] || 0)) return 1;
3285
- if ((pa[i] || 0) < (pb[i] || 0)) return -1;
3286
- }
3287
- return 0;
3288
- }
3289
- async function checkForUpdates() {
3290
- try {
3291
- const controller = new AbortController();
3292
- const timeout = setTimeout(() => controller.abort(), CHECK_TIMEOUT_MS);
3293
- const res = await fetch(NPM_REGISTRY, {
3294
- signal: controller.signal,
3295
- headers: { Accept: "application/json" }
3296
- });
3297
- clearTimeout(timeout);
3298
- if (!res.ok) return;
3299
- const data = await res.json();
3300
- const latest = data.version;
3301
- if (!latest) return;
3302
- if (compareSemver(latest, VERSION) > 0) {
3303
- console.log("");
3304
- console.log(`\x1B[33m\u2B06\uFE0F ClawRouter ${latest} available (you have ${VERSION})\x1B[0m`);
3305
- console.log(` Run: \x1B[36mcurl -fsSL ${UPDATE_URL} | bash\x1B[0m`);
3306
- console.log("");
3307
- }
3308
- } catch {
3309
- }
3310
- }
3311
-
3312
- // src/config.ts
3313
- var DEFAULT_PORT = 8402;
3314
- var PROXY_PORT = (() => {
3315
- const envPort = process.env.BLOCKRUN_PROXY_PORT;
3316
- if (envPort) {
3317
- const parsed = parseInt(envPort, 10);
3318
- if (!isNaN(parsed) && parsed > 0 && parsed < 65536) {
3319
- return parsed;
3320
- }
3321
- }
3322
- return DEFAULT_PORT;
3323
- })();
3324
-
3325
- // src/proxy.ts
3326
- var BLOCKRUN_API = "https://blockrun.ai/api";
3327
- var AUTO_MODEL = "blockrun/auto";
3328
- var ROUTING_PROFILES = /* @__PURE__ */ new Set([
3329
- "blockrun/free",
3330
- "free",
3331
- "blockrun/eco",
3332
- "eco",
3333
- "blockrun/auto",
3334
- "auto",
3335
- "blockrun/premium",
3336
- "premium"
3337
- ]);
3338
- var FREE_MODEL = "nvidia/gpt-oss-120b";
3339
- var MAX_MESSAGES = 200;
3340
- var HEARTBEAT_INTERVAL_MS = 2e3;
3341
- var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
3342
- var MAX_FALLBACK_ATTEMPTS = 5;
3343
- var HEALTH_CHECK_TIMEOUT_MS = 2e3;
3344
- var RATE_LIMIT_COOLDOWN_MS = 6e4;
3345
- var PORT_RETRY_ATTEMPTS = 5;
3346
- var PORT_RETRY_DELAY_MS = 1e3;
3347
- function transformPaymentError(errorBody) {
3348
- try {
3349
- const parsed = JSON.parse(errorBody);
3350
- if (parsed.error === "Payment verification failed" && parsed.details) {
3351
- const match = parsed.details.match(/Verification failed:\s*(\{.*\})/s);
3352
- if (match) {
3353
- const innerJson = JSON.parse(match[1]);
3354
- if (innerJson.invalidReason === "insufficient_funds" && innerJson.invalidMessage) {
3355
- const balanceMatch = innerJson.invalidMessage.match(
3356
- /insufficient balance:\s*(\d+)\s*<\s*(\d+)/i
3357
- );
3358
- if (balanceMatch) {
3359
- const currentMicros = parseInt(balanceMatch[1], 10);
3360
- const requiredMicros = parseInt(balanceMatch[2], 10);
3361
- const currentUSD = (currentMicros / 1e6).toFixed(6);
3362
- const requiredUSD = (requiredMicros / 1e6).toFixed(6);
3363
- const wallet = innerJson.payer || "unknown";
3364
- const shortWallet = wallet.length > 12 ? `${wallet.slice(0, 6)}...${wallet.slice(-4)}` : wallet;
3365
- return JSON.stringify({
3366
- error: {
3367
- message: `Insufficient USDC balance. Current: $${currentUSD}, Required: ~$${requiredUSD}`,
3368
- type: "insufficient_funds",
3369
- wallet,
3370
- current_balance_usd: currentUSD,
3371
- required_usd: requiredUSD,
3372
- help: `Fund wallet ${shortWallet} with USDC on Base, or use free model: /model free`
3373
- }
3374
- });
3375
- }
3376
- }
3377
- if (innerJson.invalidReason === "invalid_payload") {
3378
- return JSON.stringify({
3379
- error: {
3380
- message: "Payment signature invalid. This may be a temporary issue.",
3381
- type: "invalid_payload",
3382
- help: "Try again. If this persists, reinstall ClawRouter: curl -fsSL https://blockrun.ai/ClawRouter-update | bash"
3383
- }
3384
- });
3385
- }
3386
- }
3387
- }
3388
- if (parsed.error === "Settlement failed" || parsed.details?.includes("Settlement failed")) {
3389
- const details = parsed.details || "";
3390
- const gasError = details.includes("unable to estimate gas");
3391
- return JSON.stringify({
3392
- error: {
3393
- message: gasError ? "Payment failed: network congestion or gas issue. Try again." : "Payment settlement failed. Try again in a moment.",
3394
- type: "settlement_failed",
3395
- help: "This is usually temporary. If it persists, try: /model free"
3396
- }
3397
- });
3398
- }
3399
- } catch {
3400
- }
3401
- return errorBody;
3402
- }
3403
- var rateLimitedModels = /* @__PURE__ */ new Map();
3404
- function isRateLimited(modelId) {
3405
- const hitTime = rateLimitedModels.get(modelId);
3406
- if (!hitTime) return false;
3407
- const elapsed = Date.now() - hitTime;
3408
- if (elapsed >= RATE_LIMIT_COOLDOWN_MS) {
3409
- rateLimitedModels.delete(modelId);
3410
- return false;
3411
- }
3412
- return true;
3413
- }
3414
- function markRateLimited(modelId) {
3415
- rateLimitedModels.set(modelId, Date.now());
3416
- console.log(`[ClawRouter] Model ${modelId} rate-limited, will deprioritize for 60s`);
3417
- }
3418
- function prioritizeNonRateLimited(models) {
3419
- const available = [];
3420
- const rateLimited = [];
3421
- for (const model of models) {
3422
- if (isRateLimited(model)) {
3423
- rateLimited.push(model);
3424
- } else {
3425
- available.push(model);
3426
- }
3427
- }
3428
- return [...available, ...rateLimited];
3429
- }
3430
- function canWrite(res) {
3431
- return !res.writableEnded && !res.destroyed && res.socket !== null && !res.socket.destroyed && res.socket.writable;
3432
- }
3433
- function safeWrite(res, data) {
3434
- if (!canWrite(res)) {
3435
- return false;
3436
- }
3437
- return res.write(data);
3438
- }
3439
- var BALANCE_CHECK_BUFFER = 1.5;
3440
- function getProxyPort() {
3441
- return PROXY_PORT;
3442
- }
3443
- async function checkExistingProxy(port) {
3444
- const controller = new AbortController();
3445
- const timeoutId = setTimeout(() => controller.abort(), HEALTH_CHECK_TIMEOUT_MS);
3446
- try {
3447
- const response = await fetch(`http://127.0.0.1:${port}/health`, {
3448
- signal: controller.signal
3449
- });
3450
- clearTimeout(timeoutId);
3451
- if (response.ok) {
3452
- const data = await response.json();
3453
- if (data.status === "ok" && data.wallet) {
3454
- return data.wallet;
3455
- }
3456
- }
3457
- return void 0;
3458
- } catch {
3459
- clearTimeout(timeoutId);
3460
- return void 0;
3461
- }
3462
- }
3463
- var PROVIDER_ERROR_PATTERNS = [
3464
- /billing/i,
3465
- /insufficient.*balance/i,
3466
- /credits/i,
3467
- /quota.*exceeded/i,
3468
- /rate.*limit/i,
3469
- /model.*unavailable/i,
3470
- /model.*not.*available/i,
3471
- /service.*unavailable/i,
3472
- /capacity/i,
3473
- /overloaded/i,
3474
- /temporarily.*unavailable/i,
3475
- /api.*key.*invalid/i,
3476
- /authentication.*failed/i,
3477
- /request too large/i,
3478
- /request.*size.*exceeds/i,
3479
- /payload too large/i
3480
- ];
3481
- var FALLBACK_STATUS_CODES = [
3482
- 400,
3483
- // Bad request - sometimes used for billing errors
3484
- 401,
3485
- // Unauthorized - provider API key issues
3486
- 402,
3487
- // Payment required - but from upstream, not x402
3488
- 403,
3489
- // Forbidden - provider restrictions
3490
- 413,
3491
- // Payload too large - request exceeds model's context limit
3492
- 429,
3493
- // Rate limited
3494
- 500,
3495
- // Internal server error
3496
- 502,
3497
- // Bad gateway
3498
- 503,
3499
- // Service unavailable
3500
- 504
3501
- // Gateway timeout
3502
- ];
3503
- function isProviderError(status, body) {
3504
- if (!FALLBACK_STATUS_CODES.includes(status)) {
3505
- return false;
3506
- }
3507
- if (status >= 500) {
3508
- return true;
3509
- }
3510
- return PROVIDER_ERROR_PATTERNS.some((pattern) => pattern.test(body));
3511
- }
3512
- var VALID_ROLES = /* @__PURE__ */ new Set(["system", "user", "assistant", "tool", "function"]);
3513
- var ROLE_MAPPINGS = {
3514
- developer: "system",
3515
- // OpenAI's newer API uses "developer" for system messages
3516
- model: "assistant"
3517
- // Some APIs use "model" instead of "assistant"
3518
- };
3519
- var VALID_TOOL_ID_PATTERN = /^[a-zA-Z0-9_-]+$/;
3520
- function sanitizeToolId(id) {
3521
- if (!id || typeof id !== "string") return id;
3522
- if (VALID_TOOL_ID_PATTERN.test(id)) return id;
3523
- return id.replace(/[^a-zA-Z0-9_-]/g, "_");
3524
- }
3525
- function sanitizeToolIds(messages) {
3526
- if (!messages || messages.length === 0) return messages;
3527
- let hasChanges = false;
3528
- const sanitized = messages.map((msg) => {
3529
- const typedMsg = msg;
3530
- let msgChanged = false;
3531
- let newMsg = { ...msg };
3532
- if (typedMsg.tool_calls && Array.isArray(typedMsg.tool_calls)) {
3533
- const newToolCalls = typedMsg.tool_calls.map((tc) => {
3534
- if (tc.id && typeof tc.id === "string") {
3535
- const sanitized2 = sanitizeToolId(tc.id);
3536
- if (sanitized2 !== tc.id) {
3537
- msgChanged = true;
3538
- return { ...tc, id: sanitized2 };
3539
- }
3540
- }
3541
- return tc;
3542
- });
3543
- if (msgChanged) {
3544
- newMsg = { ...newMsg, tool_calls: newToolCalls };
3545
- }
3546
- }
3547
- if (typedMsg.tool_call_id && typeof typedMsg.tool_call_id === "string") {
3548
- const sanitized2 = sanitizeToolId(typedMsg.tool_call_id);
3549
- if (sanitized2 !== typedMsg.tool_call_id) {
3550
- msgChanged = true;
3551
- newMsg = { ...newMsg, tool_call_id: sanitized2 };
3552
- }
3553
- }
3554
- if (Array.isArray(typedMsg.content)) {
3555
- const newContent = typedMsg.content.map((block) => {
3556
- if (!block || typeof block !== "object") return block;
3557
- let blockChanged = false;
3558
- let newBlock = { ...block };
3559
- if (block.type === "tool_use" && block.id && typeof block.id === "string") {
3560
- const sanitized2 = sanitizeToolId(block.id);
3561
- if (sanitized2 !== block.id) {
3562
- blockChanged = true;
3563
- newBlock = { ...newBlock, id: sanitized2 };
3564
- }
3565
- }
3566
- if (block.type === "tool_result" && block.tool_use_id && typeof block.tool_use_id === "string") {
3567
- const sanitized2 = sanitizeToolId(block.tool_use_id);
3568
- if (sanitized2 !== block.tool_use_id) {
3569
- blockChanged = true;
3570
- newBlock = { ...newBlock, tool_use_id: sanitized2 };
3571
- }
3572
- }
3573
- if (blockChanged) {
3574
- msgChanged = true;
3575
- return newBlock;
3576
- }
3577
- return block;
3578
- });
3579
- if (msgChanged) {
3580
- newMsg = { ...newMsg, content: newContent };
3581
- }
3582
- }
3583
- if (msgChanged) {
3584
- hasChanges = true;
3585
- return newMsg;
3586
- }
3587
- return msg;
3588
- });
3589
- return hasChanges ? sanitized : messages;
3590
- }
3591
- function normalizeMessageRoles(messages) {
3592
- if (!messages || messages.length === 0) return messages;
3593
- let hasChanges = false;
3594
- const normalized = messages.map((msg) => {
3595
- if (VALID_ROLES.has(msg.role)) return msg;
3596
- const mappedRole = ROLE_MAPPINGS[msg.role];
3597
- if (mappedRole) {
3598
- hasChanges = true;
3599
- return { ...msg, role: mappedRole };
3600
- }
3601
- hasChanges = true;
3602
- return { ...msg, role: "user" };
3603
- });
3604
- return hasChanges ? normalized : messages;
3605
- }
3606
- function normalizeMessagesForGoogle(messages) {
3607
- if (!messages || messages.length === 0) return messages;
3608
- let firstNonSystemIdx = -1;
3609
- for (let i = 0; i < messages.length; i++) {
3610
- if (messages[i].role !== "system") {
3611
- firstNonSystemIdx = i;
3612
- break;
3613
- }
3614
- }
3615
- if (firstNonSystemIdx === -1) return messages;
3616
- const firstRole = messages[firstNonSystemIdx].role;
3617
- if (firstRole === "user") return messages;
3618
- if (firstRole === "assistant" || firstRole === "model") {
3619
- const normalized = [...messages];
3620
- normalized.splice(firstNonSystemIdx, 0, {
3621
- role: "user",
3622
- content: "(continuing conversation)"
3623
- });
3624
- return normalized;
3625
- }
3626
- return messages;
3627
- }
3628
- function isGoogleModel(modelId) {
3629
- return modelId.startsWith("google/") || modelId.startsWith("gemini");
3630
- }
3631
- function normalizeMessagesForThinking(messages) {
3632
- if (!messages || messages.length === 0) return messages;
3633
- let hasChanges = false;
3634
- const normalized = messages.map((msg) => {
3635
- if (msg.role !== "assistant" || msg.reasoning_content !== void 0) {
3636
- return msg;
3637
- }
3638
- const hasOpenAIToolCalls = msg.tool_calls && Array.isArray(msg.tool_calls) && msg.tool_calls.length > 0;
3639
- const hasAnthropicToolUse = Array.isArray(msg.content) && msg.content.some((block) => block?.type === "tool_use");
3640
- if (hasOpenAIToolCalls || hasAnthropicToolUse) {
3641
- hasChanges = true;
3642
- return { ...msg, reasoning_content: "" };
3643
- }
3644
- return msg;
3645
- });
3646
- return hasChanges ? normalized : messages;
3647
- }
3648
- function truncateMessages(messages) {
3649
- if (!messages || messages.length <= MAX_MESSAGES) return messages;
3650
- const systemMsgs = messages.filter((m) => m.role === "system");
3651
- const conversationMsgs = messages.filter((m) => m.role !== "system");
3652
- const maxConversation = MAX_MESSAGES - systemMsgs.length;
3653
- const truncatedConversation = conversationMsgs.slice(-maxConversation);
3654
- console.log(
3655
- `[ClawRouter] Truncated messages: ${messages.length} \u2192 ${systemMsgs.length + truncatedConversation.length} (kept ${systemMsgs.length} system + ${truncatedConversation.length} recent)`
3656
- );
3657
- return [...systemMsgs, ...truncatedConversation];
3658
- }
3659
- var KIMI_BLOCK_RE = /<[||][^<>]*begin[^<>]*[||]>[\s\S]*?<[||][^<>]*end[^<>]*[||]>/gi;
3660
- var KIMI_TOKEN_RE = /<[||][^<>]*[||]>/g;
3661
- var THINKING_TAG_RE = /<\s*\/?\s*(?:think(?:ing)?|thought|antthinking)\b[^>]*>/gi;
3662
- var THINKING_BLOCK_RE = /<\s*(?:think(?:ing)?|thought|antthinking)\b[^>]*>[\s\S]*?<\s*\/\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi;
3663
- function stripThinkingTokens(content) {
3664
- if (!content) return content;
3665
- let cleaned = content.replace(KIMI_BLOCK_RE, "");
3666
- cleaned = cleaned.replace(KIMI_TOKEN_RE, "");
3667
- cleaned = cleaned.replace(THINKING_BLOCK_RE, "");
3668
- cleaned = cleaned.replace(THINKING_TAG_RE, "");
3669
- return cleaned;
3670
- }
3671
- function buildModelPricing() {
3672
- const map = /* @__PURE__ */ new Map();
3673
- for (const m of BLOCKRUN_MODELS) {
3674
- if (m.id === AUTO_MODEL) continue;
3675
- map.set(m.id, { inputPrice: m.inputPrice, outputPrice: m.outputPrice });
3676
- }
3677
- return map;
3678
- }
3679
- function mergeRoutingConfig(overrides) {
3680
- if (!overrides) return DEFAULT_ROUTING_CONFIG;
3681
- return {
3682
- ...DEFAULT_ROUTING_CONFIG,
3683
- ...overrides,
3684
- classifier: { ...DEFAULT_ROUTING_CONFIG.classifier, ...overrides.classifier },
3685
- scoring: { ...DEFAULT_ROUTING_CONFIG.scoring, ...overrides.scoring },
3686
- tiers: { ...DEFAULT_ROUTING_CONFIG.tiers, ...overrides.tiers },
3687
- overrides: { ...DEFAULT_ROUTING_CONFIG.overrides, ...overrides.overrides }
3688
- };
3689
- }
3690
- function estimateAmount(modelId, bodyLength, maxTokens) {
3691
- const model = BLOCKRUN_MODELS.find((m) => m.id === modelId);
3692
- if (!model) return void 0;
3693
- const estimatedInputTokens = Math.ceil(bodyLength / 4);
3694
- const estimatedOutputTokens = maxTokens || model.maxOutput || 4096;
3695
- const costUsd = estimatedInputTokens / 1e6 * model.inputPrice + estimatedOutputTokens / 1e6 * model.outputPrice;
3696
- const amountMicros = Math.max(100, Math.ceil(costUsd * 1.2 * 1e6));
3697
- return amountMicros.toString();
3698
- }
3699
- async function startProxy(options) {
3700
- const apiBase = options.apiBase ?? BLOCKRUN_API;
3701
- const listenPort = options.port ?? getProxyPort();
3702
- const existingWallet = await checkExistingProxy(listenPort);
3703
- if (existingWallet) {
3704
- const account2 = privateKeyToAccount2(options.walletKey);
3705
- const balanceMonitor2 = new BalanceMonitor(account2.address);
3706
- const baseUrl2 = `http://127.0.0.1:${listenPort}`;
3707
- if (existingWallet !== account2.address) {
3708
- console.warn(
3709
- `[ClawRouter] Existing proxy on port ${listenPort} uses wallet ${existingWallet}, but current config uses ${account2.address}. Reusing existing proxy.`
3710
- );
3711
- }
3712
- options.onReady?.(listenPort);
3713
- return {
3714
- port: listenPort,
3715
- baseUrl: baseUrl2,
3716
- walletAddress: existingWallet,
3717
- balanceMonitor: balanceMonitor2,
3718
- close: async () => {
3719
- }
3720
- };
3721
- }
3722
- const account = privateKeyToAccount2(options.walletKey);
3723
- const { fetch: payFetch } = createPaymentFetch(options.walletKey);
3724
- const balanceMonitor = new BalanceMonitor(account.address);
3725
- const routingConfig = mergeRoutingConfig(options.routingConfig);
3726
- const modelPricing = buildModelPricing();
3727
- const routerOpts = {
3728
- config: routingConfig,
3729
- modelPricing
3730
- };
3731
- const deduplicator = new RequestDeduplicator();
3732
- const responseCache = new ResponseCache(options.cacheConfig);
3733
- const sessionStore = new SessionStore(options.sessionConfig);
3734
- const connections = /* @__PURE__ */ new Set();
3735
- const server = createServer(async (req, res) => {
3736
- req.on("error", (err) => {
3737
- console.error(`[ClawRouter] Request stream error: ${err.message}`);
3738
- });
3739
- res.on("error", (err) => {
3740
- console.error(`[ClawRouter] Response stream error: ${err.message}`);
3741
- });
3742
- finished(res, (err) => {
3743
- if (err && err.code !== "ERR_STREAM_DESTROYED") {
3744
- console.error(`[ClawRouter] Response finished with error: ${err.message}`);
3745
- }
3746
- });
3747
- finished(req, (err) => {
3748
- if (err && err.code !== "ERR_STREAM_DESTROYED") {
3749
- console.error(`[ClawRouter] Request finished with error: ${err.message}`);
3750
- }
3751
- });
3752
- if (req.url === "/health" || req.url?.startsWith("/health?")) {
3753
- const url = new URL(req.url, "http://localhost");
3754
- const full = url.searchParams.get("full") === "true";
3755
- const response = {
3756
- status: "ok",
3757
- wallet: account.address
3758
- };
3759
- if (full) {
3760
- try {
3761
- const balanceInfo = await balanceMonitor.checkBalance();
3762
- response.balance = balanceInfo.balanceUSD;
3763
- response.isLow = balanceInfo.isLow;
3764
- response.isEmpty = balanceInfo.isEmpty;
3765
- } catch {
3766
- response.balanceError = "Could not fetch balance";
3767
- }
3768
- }
3769
- res.writeHead(200, { "Content-Type": "application/json" });
3770
- res.end(JSON.stringify(response));
3771
- return;
3772
- }
3773
- if (req.url === "/cache" || req.url?.startsWith("/cache?")) {
3774
- const stats = responseCache.getStats();
3775
- res.writeHead(200, {
3776
- "Content-Type": "application/json",
3777
- "Cache-Control": "no-cache"
3778
- });
3779
- res.end(JSON.stringify(stats, null, 2));
3780
- return;
3781
- }
3782
- if (req.url === "/stats" || req.url?.startsWith("/stats?")) {
3783
- try {
3784
- const url = new URL(req.url, "http://localhost");
3785
- const days = parseInt(url.searchParams.get("days") || "7", 10);
3786
- const stats = await getStats(Math.min(days, 30));
3787
- res.writeHead(200, {
3788
- "Content-Type": "application/json",
3789
- "Cache-Control": "no-cache"
3790
- });
3791
- res.end(JSON.stringify(stats, null, 2));
3792
- } catch (err) {
3793
- res.writeHead(500, { "Content-Type": "application/json" });
3794
- res.end(
3795
- JSON.stringify({
3796
- error: `Failed to get stats: ${err instanceof Error ? err.message : String(err)}`
3797
- })
3798
- );
3799
- }
3800
- return;
3801
- }
3802
- if (req.url === "/v1/models" && req.method === "GET") {
3803
- const models = BLOCKRUN_MODELS.filter((m) => m.id !== "blockrun/auto").map((m) => ({
3804
- id: m.id,
3805
- object: "model",
3806
- created: Math.floor(Date.now() / 1e3),
3807
- owned_by: m.id.split("/")[0] || "unknown"
3808
- }));
3809
- res.writeHead(200, { "Content-Type": "application/json" });
3810
- res.end(JSON.stringify({ object: "list", data: models }));
3811
- return;
3812
- }
3813
- if (!req.url?.startsWith("/v1")) {
3814
- res.writeHead(404, { "Content-Type": "application/json" });
3815
- res.end(JSON.stringify({ error: "Not found" }));
3816
- return;
3817
- }
3818
- try {
3819
- await proxyRequest(
3820
- req,
3821
- res,
3822
- apiBase,
3823
- payFetch,
3824
- options,
3825
- routerOpts,
3826
- deduplicator,
3827
- balanceMonitor,
3828
- sessionStore,
3829
- responseCache
3830
- );
3831
- } catch (err) {
3832
- const error = err instanceof Error ? err : new Error(String(err));
3833
- options.onError?.(error);
3834
- if (!res.headersSent) {
3835
- res.writeHead(502, { "Content-Type": "application/json" });
3836
- res.end(
3837
- JSON.stringify({
3838
- error: { message: `Proxy error: ${error.message}`, type: "proxy_error" }
3839
- })
3840
- );
3841
- } else if (!res.writableEnded) {
3842
- res.write(
3843
- `data: ${JSON.stringify({ error: { message: error.message, type: "proxy_error" } })}
3844
-
3845
- `
3846
- );
3847
- res.write("data: [DONE]\n\n");
3848
- res.end();
3849
- }
3850
- }
3851
- });
3852
- const tryListen = (attempt) => {
3853
- return new Promise((resolveAttempt, rejectAttempt) => {
3854
- const onError = async (err) => {
3855
- server.removeListener("error", onError);
3856
- if (err.code === "EADDRINUSE") {
3857
- const existingWallet2 = await checkExistingProxy(listenPort);
3858
- if (existingWallet2) {
3859
- console.log(`[ClawRouter] Existing proxy detected on port ${listenPort}, reusing`);
3860
- rejectAttempt({ code: "REUSE_EXISTING", wallet: existingWallet2 });
3861
- return;
3862
- }
3863
- if (attempt < PORT_RETRY_ATTEMPTS) {
3864
- console.log(
3865
- `[ClawRouter] Port ${listenPort} in TIME_WAIT, retrying in ${PORT_RETRY_DELAY_MS}ms (attempt ${attempt}/${PORT_RETRY_ATTEMPTS})`
3866
- );
3867
- rejectAttempt({ code: "RETRY", attempt });
3868
- return;
3869
- }
3870
- console.error(
3871
- `[ClawRouter] Port ${listenPort} still in use after ${PORT_RETRY_ATTEMPTS} attempts`
3872
- );
3873
- rejectAttempt(err);
3874
- return;
3875
- }
3876
- rejectAttempt(err);
3877
- };
3878
- server.once("error", onError);
3879
- server.listen(listenPort, "127.0.0.1", () => {
3880
- server.removeListener("error", onError);
3881
- resolveAttempt();
3882
- });
3883
- });
3884
- };
3885
- let lastError;
3886
- for (let attempt = 1; attempt <= PORT_RETRY_ATTEMPTS; attempt++) {
3887
- try {
3888
- await tryListen(attempt);
3889
- break;
3890
- } catch (err) {
3891
- const error = err;
3892
- if (error.code === "REUSE_EXISTING" && error.wallet) {
3893
- const baseUrl2 = `http://127.0.0.1:${listenPort}`;
3894
- options.onReady?.(listenPort);
3895
- return {
3896
- port: listenPort,
3897
- baseUrl: baseUrl2,
3898
- walletAddress: error.wallet,
3899
- balanceMonitor,
3900
- close: async () => {
3901
- }
3902
- };
3903
- }
3904
- if (error.code === "RETRY") {
3905
- await new Promise((r) => setTimeout(r, PORT_RETRY_DELAY_MS));
3906
- continue;
3907
- }
3908
- lastError = err;
3909
- break;
3910
- }
3911
- }
3912
- if (lastError) {
3913
- throw lastError;
3914
- }
3915
- const addr = server.address();
3916
- const port = addr.port;
3917
- const baseUrl = `http://127.0.0.1:${port}`;
3918
- options.onReady?.(port);
3919
- checkForUpdates();
3920
- server.on("error", (err) => {
3921
- console.error(`[ClawRouter] Server runtime error: ${err.message}`);
3922
- options.onError?.(err);
3923
- });
3924
- server.on("clientError", (err, socket) => {
3925
- console.error(`[ClawRouter] Client error: ${err.message}`);
3926
- if (socket.writable && !socket.destroyed) {
3927
- socket.end("HTTP/1.1 400 Bad Request\r\n\r\n");
3928
- }
3929
- });
3930
- server.on("connection", (socket) => {
3931
- connections.add(socket);
3932
- socket.setTimeout(3e5);
3933
- socket.on("timeout", () => {
3934
- console.error(`[ClawRouter] Socket timeout, destroying connection`);
3935
- socket.destroy();
3936
- });
3937
- socket.on("end", () => {
3938
- });
3939
- socket.on("error", (err) => {
3940
- console.error(`[ClawRouter] Socket error: ${err.message}`);
3941
- });
3942
- socket.on("close", () => {
3943
- connections.delete(socket);
3944
- });
3945
- });
3946
- return {
3947
- port,
3948
- baseUrl,
3949
- walletAddress: account.address,
3950
- balanceMonitor,
3951
- close: () => new Promise((res, rej) => {
3952
- const timeout = setTimeout(() => {
3953
- rej(new Error("[ClawRouter] Close timeout after 4s"));
3954
- }, 4e3);
3955
- sessionStore.close();
3956
- for (const socket of connections) {
3957
- socket.destroy();
3958
- }
3959
- connections.clear();
3960
- server.close((err) => {
3961
- clearTimeout(timeout);
3962
- if (err) {
3963
- rej(err);
3964
- } else {
3965
- res();
3966
- }
3967
- });
3968
- })
3969
- };
3970
- }
3971
- async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxTokens, payFetch, balanceMonitor, signal) {
3972
- let requestBody = body;
3973
- try {
3974
- const parsed = JSON.parse(body.toString());
3975
- parsed.model = modelId;
3976
- if (Array.isArray(parsed.messages)) {
3977
- parsed.messages = normalizeMessageRoles(parsed.messages);
3978
- }
3979
- if (Array.isArray(parsed.messages)) {
3980
- parsed.messages = truncateMessages(parsed.messages);
3981
- }
3982
- if (Array.isArray(parsed.messages)) {
3983
- parsed.messages = sanitizeToolIds(parsed.messages);
3984
- }
3985
- if (isGoogleModel(modelId) && Array.isArray(parsed.messages)) {
3986
- parsed.messages = normalizeMessagesForGoogle(parsed.messages);
3987
- }
3988
- const hasThinkingEnabled = !!(parsed.thinking || parsed.extended_thinking || isReasoningModel(modelId));
3989
- if (hasThinkingEnabled && Array.isArray(parsed.messages)) {
3990
- parsed.messages = normalizeMessagesForThinking(parsed.messages);
3991
- }
3992
- requestBody = Buffer.from(JSON.stringify(parsed));
3993
- } catch {
3994
- }
3995
- const estimated = estimateAmount(modelId, requestBody.length, maxTokens);
3996
- const preAuth = estimated ? { estimatedAmount: estimated } : void 0;
3997
- try {
3998
- const response = await payFetch(
3999
- upstreamUrl,
4000
- {
4001
- method,
4002
- headers,
4003
- body: requestBody.length > 0 ? new Uint8Array(requestBody) : void 0,
4004
- signal
4005
- },
4006
- preAuth
4007
- );
4008
- if (response.status !== 200) {
4009
- const errorBody = await response.text();
4010
- const isProviderErr = isProviderError(response.status, errorBody);
4011
- return {
4012
- success: false,
4013
- errorBody,
4014
- errorStatus: response.status,
4015
- isProviderError: isProviderErr
4016
- };
4017
- }
4018
- return { success: true, response };
4019
- } catch (err) {
4020
- const errorMsg = err instanceof Error ? err.message : String(err);
4021
- return {
4022
- success: false,
4023
- errorBody: errorMsg,
4024
- errorStatus: 500,
4025
- isProviderError: true
4026
- // Network errors are retryable
4027
- };
4028
- }
4029
- }
4030
- async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, deduplicator, balanceMonitor, sessionStore, responseCache) {
4031
- const startTime = Date.now();
4032
- const upstreamUrl = `${apiBase}${req.url}`;
4033
- const bodyChunks = [];
4034
- for await (const chunk of req) {
4035
- bodyChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
4036
- }
4037
- let body = Buffer.concat(bodyChunks);
4038
- let routingDecision;
4039
- let isStreaming = false;
4040
- let modelId = "";
4041
- let maxTokens = 4096;
4042
- let routingProfile = null;
4043
- const isChatCompletion = req.url?.includes("/chat/completions");
4044
- if (isChatCompletion && body.length > 0) {
4045
- try {
4046
- const parsed = JSON.parse(body.toString());
4047
- isStreaming = parsed.stream === true;
4048
- modelId = parsed.model || "";
4049
- maxTokens = parsed.max_tokens || 4096;
4050
- let bodyModified = false;
4051
- if (parsed.stream === true) {
4052
- parsed.stream = false;
4053
- bodyModified = true;
4054
- }
4055
- const normalizedModel = typeof parsed.model === "string" ? parsed.model.trim().toLowerCase() : "";
4056
- const resolvedModel = resolveModelAlias(normalizedModel);
4057
- const wasAlias = resolvedModel !== normalizedModel;
4058
- const isRoutingProfile = ROUTING_PROFILES.has(normalizedModel);
4059
- if (isRoutingProfile) {
4060
- const profileName = normalizedModel.replace("blockrun/", "");
4061
- routingProfile = profileName;
4062
- }
4063
- console.log(
4064
- `[ClawRouter] Received model: "${parsed.model}" -> normalized: "${normalizedModel}"${wasAlias ? ` -> alias: "${resolvedModel}"` : ""}${routingProfile ? `, profile: ${routingProfile}` : ""}`
4065
- );
4066
- if (wasAlias && !isRoutingProfile) {
4067
- parsed.model = resolvedModel;
4068
- modelId = resolvedModel;
4069
- bodyModified = true;
4070
- }
4071
- if (isRoutingProfile) {
4072
- if (routingProfile === "free") {
4073
- const freeModel = "nvidia/gpt-oss-120b";
4074
- console.log(`[ClawRouter] Free profile - using ${freeModel} directly`);
4075
- parsed.model = freeModel;
4076
- modelId = freeModel;
4077
- bodyModified = true;
4078
- await logUsage({
4079
- timestamp: (/* @__PURE__ */ new Date()).toISOString(),
4080
- model: freeModel,
4081
- tier: "SIMPLE",
4082
- cost: 0,
4083
- baselineCost: 0,
4084
- savings: 1,
4085
- // 100% savings
4086
- latencyMs: 0
4087
- });
4088
- } else {
4089
- const sessionId = getSessionId(
4090
- req.headers
4091
- );
4092
- const existingSession = sessionId ? sessionStore.getSession(sessionId) : void 0;
4093
- if (existingSession) {
4094
- console.log(
4095
- `[ClawRouter] Session ${sessionId?.slice(0, 8)}... using pinned model: ${existingSession.model}`
4096
- );
4097
- parsed.model = existingSession.model;
4098
- modelId = existingSession.model;
4099
- bodyModified = true;
4100
- sessionStore.touchSession(sessionId);
4101
- } else {
4102
- const messages = parsed.messages;
4103
- let lastUserMsg;
4104
- if (messages) {
4105
- for (let i = messages.length - 1; i >= 0; i--) {
4106
- if (messages[i].role === "user") {
4107
- lastUserMsg = messages[i];
4108
- break;
4109
- }
4110
- }
4111
- }
4112
- const systemMsg = messages?.find((m) => m.role === "system");
4113
- const prompt = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
4114
- const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
4115
- const tools = parsed.tools;
4116
- const hasTools = Array.isArray(tools) && tools.length > 0;
4117
- if (hasTools) {
4118
- console.log(
4119
- `[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`
4120
- );
4121
- }
4122
- routingDecision = route(prompt, systemPrompt, maxTokens, {
4123
- ...routerOpts,
4124
- routingProfile: routingProfile ?? void 0
4125
- });
4126
- parsed.model = routingDecision.model;
4127
- modelId = routingDecision.model;
4128
- bodyModified = true;
4129
- if (sessionId) {
4130
- sessionStore.setSession(sessionId, routingDecision.model, routingDecision.tier);
4131
- console.log(
4132
- `[ClawRouter] Session ${sessionId.slice(0, 8)}... pinned to model: ${routingDecision.model}`
4133
- );
4134
- }
4135
- options.onRouted?.(routingDecision);
4136
- }
4137
- }
4138
- }
4139
- if (bodyModified) {
4140
- body = Buffer.from(JSON.stringify(parsed));
4141
- }
4142
- } catch (err) {
4143
- const errorMsg = err instanceof Error ? err.message : String(err);
4144
- console.error(`[ClawRouter] Routing error: ${errorMsg}`);
4145
- options.onError?.(new Error(`Routing failed: ${errorMsg}`));
4146
- }
4147
- }
4148
- const autoCompress = options.autoCompressRequests ?? true;
4149
- const compressionThreshold = options.compressionThresholdKB ?? 180;
4150
- const requestSizeKB = Math.ceil(body.length / 1024);
4151
- if (autoCompress && requestSizeKB > compressionThreshold) {
4152
- try {
4153
- console.log(
4154
- `[ClawRouter] Request size ${requestSizeKB}KB exceeds threshold ${compressionThreshold}KB, applying compression...`
4155
- );
4156
- const parsed = JSON.parse(body.toString());
4157
- if (parsed.messages && parsed.messages.length > 0 && shouldCompress(parsed.messages)) {
4158
- const compressionResult = await compressContext(parsed.messages, {
4159
- enabled: true,
4160
- preserveRaw: false,
4161
- // Don't need originals in proxy
4162
- layers: {
4163
- deduplication: true,
4164
- // Safe: removes duplicate messages
4165
- whitespace: true,
4166
- // Safe: normalizes whitespace
4167
- dictionary: false,
4168
- // Disabled: requires model to understand codebook
4169
- paths: false,
4170
- // Disabled: requires model to understand path codes
4171
- jsonCompact: true,
4172
- // Safe: just removes JSON whitespace
4173
- observation: false,
4174
- // Disabled: may lose important context
4175
- dynamicCodebook: false
4176
- // Disabled: requires model to understand codes
4177
- },
4178
- dictionary: {
4179
- maxEntries: 50,
4180
- minPhraseLength: 15,
4181
- includeCodebookHeader: false
4182
- }
4183
- });
4184
- const compressedSizeKB = Math.ceil(compressionResult.compressedChars / 1024);
4185
- const savings = ((requestSizeKB - compressedSizeKB) / requestSizeKB * 100).toFixed(1);
4186
- console.log(
4187
- `[ClawRouter] Compressed ${requestSizeKB}KB \u2192 ${compressedSizeKB}KB (${savings}% reduction)`
4188
- );
4189
- parsed.messages = compressionResult.messages;
4190
- body = Buffer.from(JSON.stringify(parsed));
4191
- }
4192
- } catch (err) {
4193
- console.warn(
4194
- `[ClawRouter] Compression failed: ${err instanceof Error ? err.message : String(err)}`
4195
- );
4196
- }
4197
- }
4198
- const cacheKey = ResponseCache.generateKey(body);
4199
- const reqHeaders = {};
4200
- for (const [key, value] of Object.entries(req.headers)) {
4201
- if (typeof value === "string") reqHeaders[key] = value;
4202
- }
4203
- if (responseCache.shouldCache(body, reqHeaders)) {
4204
- const cachedResponse = responseCache.get(cacheKey);
4205
- if (cachedResponse) {
4206
- console.log(`[ClawRouter] Cache HIT for ${cachedResponse.model} (saved API call)`);
4207
- res.writeHead(cachedResponse.status, cachedResponse.headers);
4208
- res.end(cachedResponse.body);
4209
- return;
4210
- }
4211
- }
4212
- const dedupKey = RequestDeduplicator.hash(body);
4213
- const cached = deduplicator.getCached(dedupKey);
4214
- if (cached) {
4215
- res.writeHead(cached.status, cached.headers);
4216
- res.end(cached.body);
4217
- return;
4218
- }
4219
- const inflight = deduplicator.getInflight(dedupKey);
4220
- if (inflight) {
4221
- const result = await inflight;
4222
- res.writeHead(result.status, result.headers);
4223
- res.end(result.body);
4224
- return;
4225
- }
4226
- deduplicator.markInflight(dedupKey);
4227
- let estimatedCostMicros;
4228
- const isFreeModel = modelId === FREE_MODEL;
4229
- if (modelId && !options.skipBalanceCheck && !isFreeModel) {
4230
- const estimated = estimateAmount(modelId, body.length, maxTokens);
4231
- if (estimated) {
4232
- estimatedCostMicros = BigInt(estimated);
4233
- const bufferedCostMicros = estimatedCostMicros * BigInt(Math.ceil(BALANCE_CHECK_BUFFER * 100)) / 100n;
4234
- const sufficiency = await balanceMonitor.checkSufficient(bufferedCostMicros);
4235
- if (sufficiency.info.isEmpty || !sufficiency.sufficient) {
4236
- const originalModel = modelId;
4237
- console.log(
4238
- `[ClawRouter] Wallet ${sufficiency.info.isEmpty ? "empty" : "insufficient"} ($${sufficiency.info.balanceUSD}), falling back to free model: ${FREE_MODEL} (requested: ${originalModel})`
4239
- );
4240
- modelId = FREE_MODEL;
4241
- const parsed = JSON.parse(body.toString());
4242
- parsed.model = FREE_MODEL;
4243
- body = Buffer.from(JSON.stringify(parsed));
4244
- options.onLowBalance?.({
4245
- balanceUSD: sufficiency.info.balanceUSD,
4246
- walletAddress: sufficiency.info.walletAddress
4247
- });
4248
- } else if (sufficiency.info.isLow) {
4249
- options.onLowBalance?.({
4250
- balanceUSD: sufficiency.info.balanceUSD,
4251
- walletAddress: sufficiency.info.walletAddress
4252
- });
4253
- }
4254
- }
4255
- }
4256
- let heartbeatInterval;
4257
- let headersSentEarly = false;
4258
- if (isStreaming) {
4259
- res.writeHead(200, {
4260
- "content-type": "text/event-stream",
4261
- "cache-control": "no-cache",
4262
- connection: "keep-alive"
4263
- });
4264
- headersSentEarly = true;
4265
- safeWrite(res, ": heartbeat\n\n");
4266
- heartbeatInterval = setInterval(() => {
4267
- if (canWrite(res)) {
4268
- safeWrite(res, ": heartbeat\n\n");
4269
- } else {
4270
- clearInterval(heartbeatInterval);
4271
- heartbeatInterval = void 0;
4272
- }
4273
- }, HEARTBEAT_INTERVAL_MS);
4274
- }
4275
- const headers = {};
4276
- for (const [key, value] of Object.entries(req.headers)) {
4277
- if (key === "host" || key === "connection" || key === "transfer-encoding" || key === "content-length")
4278
- continue;
4279
- if (typeof value === "string") {
4280
- headers[key] = value;
4281
- }
4282
- }
4283
- if (!headers["content-type"]) {
4284
- headers["content-type"] = "application/json";
4285
- }
4286
- headers["user-agent"] = USER_AGENT;
4287
- let completed = false;
4288
- res.on("close", () => {
4289
- if (heartbeatInterval) {
4290
- clearInterval(heartbeatInterval);
4291
- heartbeatInterval = void 0;
4292
- }
4293
- if (!completed) {
4294
- deduplicator.removeInflight(dedupKey);
4295
- }
4296
- });
4297
- const timeoutMs = options.requestTimeoutMs ?? DEFAULT_REQUEST_TIMEOUT_MS;
4298
- const controller = new AbortController();
4299
- const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
4300
- try {
4301
- let modelsToTry;
4302
- if (routingDecision) {
4303
- const estimatedInputTokens = Math.ceil(body.length / 4);
4304
- const estimatedTotalTokens = estimatedInputTokens + maxTokens;
4305
- const useAgenticTiers = routingDecision.reasoning?.includes("agentic") && routerOpts.config.agenticTiers;
4306
- const tierConfigs = useAgenticTiers ? routerOpts.config.agenticTiers : routerOpts.config.tiers;
4307
- const fullChain = getFallbackChain(routingDecision.tier, tierConfigs);
4308
- const contextFiltered = getFallbackChainFiltered(
4309
- routingDecision.tier,
4310
- tierConfigs,
4311
- estimatedTotalTokens,
4312
- getModelContextWindow
4313
- );
4314
- const contextExcluded = fullChain.filter((m) => !contextFiltered.includes(m));
4315
- if (contextExcluded.length > 0) {
4316
- console.log(
4317
- `[ClawRouter] Context filter (~${estimatedTotalTokens} tokens): excluded ${contextExcluded.join(", ")}`
4318
- );
4319
- }
4320
- modelsToTry = contextFiltered.slice(0, MAX_FALLBACK_ATTEMPTS);
4321
- modelsToTry = prioritizeNonRateLimited(modelsToTry);
4322
- } else {
4323
- if (modelId && modelId !== FREE_MODEL) {
4324
- modelsToTry = [modelId, FREE_MODEL];
4325
- } else {
4326
- modelsToTry = modelId ? [modelId] : [];
4327
- }
4328
- }
4329
- let upstream;
4330
- let lastError;
4331
- let actualModelUsed = modelId;
4332
- for (let i = 0; i < modelsToTry.length; i++) {
4333
- const tryModel = modelsToTry[i];
4334
- const isLastAttempt = i === modelsToTry.length - 1;
4335
- console.log(`[ClawRouter] Trying model ${i + 1}/${modelsToTry.length}: ${tryModel}`);
4336
- const result = await tryModelRequest(
4337
- upstreamUrl,
4338
- req.method ?? "POST",
4339
- headers,
4340
- body,
4341
- tryModel,
4342
- maxTokens,
4343
- payFetch,
4344
- balanceMonitor,
4345
- controller.signal
4346
- );
4347
- if (result.success && result.response) {
4348
- upstream = result.response;
4349
- actualModelUsed = tryModel;
4350
- console.log(`[ClawRouter] Success with model: ${tryModel}`);
4351
- break;
4352
- }
4353
- lastError = {
4354
- body: result.errorBody || "Unknown error",
4355
- status: result.errorStatus || 500
4356
- };
4357
- if (result.isProviderError && !isLastAttempt) {
4358
- if (result.errorStatus === 429) {
4359
- markRateLimited(tryModel);
4360
- }
4361
- console.log(
4362
- `[ClawRouter] Provider error from ${tryModel}, trying fallback: ${result.errorBody?.slice(0, 100)}`
4363
- );
4364
- continue;
4365
- }
4366
- if (!result.isProviderError) {
4367
- console.log(
4368
- `[ClawRouter] Non-provider error from ${tryModel}, not retrying: ${result.errorBody?.slice(0, 100)}`
4369
- );
4370
- }
4371
- break;
4372
- }
4373
- clearTimeout(timeoutId);
4374
- if (heartbeatInterval) {
4375
- clearInterval(heartbeatInterval);
4376
- heartbeatInterval = void 0;
4377
- }
4378
- if (routingDecision && actualModelUsed !== routingDecision.model) {
4379
- const estimatedInputTokens = Math.ceil(body.length / 4);
4380
- const newCosts = calculateModelCost(
4381
- actualModelUsed,
4382
- routerOpts.modelPricing,
4383
- estimatedInputTokens,
4384
- maxTokens,
4385
- routingProfile ?? void 0
4386
- );
4387
- routingDecision = {
4388
- ...routingDecision,
4389
- model: actualModelUsed,
4390
- reasoning: `${routingDecision.reasoning} | fallback to ${actualModelUsed}`,
4391
- costEstimate: newCosts.costEstimate,
4392
- baselineCost: newCosts.baselineCost,
4393
- savings: newCosts.savings
4394
- };
4395
- options.onRouted?.(routingDecision);
4396
- }
4397
- if (!upstream) {
4398
- const rawErrBody = lastError?.body || "All models in fallback chain failed";
4399
- const errStatus = lastError?.status || 502;
4400
- const transformedErr = transformPaymentError(rawErrBody);
4401
- if (headersSentEarly) {
4402
- let errPayload;
4403
- try {
4404
- const parsed = JSON.parse(transformedErr);
4405
- errPayload = JSON.stringify(parsed);
4406
- } catch {
4407
- errPayload = JSON.stringify({
4408
- error: { message: rawErrBody, type: "provider_error", status: errStatus }
4409
- });
4410
- }
4411
- const errEvent = `data: ${errPayload}
4412
-
4413
- `;
4414
- safeWrite(res, errEvent);
4415
- safeWrite(res, "data: [DONE]\n\n");
4416
- res.end();
4417
- const errBuf = Buffer.from(errEvent + "data: [DONE]\n\n");
4418
- deduplicator.complete(dedupKey, {
4419
- status: 200,
4420
- headers: { "content-type": "text/event-stream" },
4421
- body: errBuf,
4422
- completedAt: Date.now()
4423
- });
4424
- } else {
4425
- res.writeHead(errStatus, { "Content-Type": "application/json" });
4426
- res.end(transformedErr);
4427
- deduplicator.complete(dedupKey, {
4428
- status: errStatus,
4429
- headers: { "content-type": "application/json" },
4430
- body: Buffer.from(transformedErr),
4431
- completedAt: Date.now()
4432
- });
4433
- }
4434
- return;
4435
- }
4436
- const responseChunks = [];
4437
- if (headersSentEarly) {
4438
- if (upstream.body) {
4439
- const reader = upstream.body.getReader();
4440
- const chunks = [];
4441
- try {
4442
- while (true) {
4443
- const { done, value } = await reader.read();
4444
- if (done) break;
4445
- chunks.push(value);
4446
- }
4447
- } finally {
4448
- reader.releaseLock();
4449
- }
4450
- const jsonBody = Buffer.concat(chunks);
4451
- const jsonStr = jsonBody.toString();
4452
- try {
4453
- const rsp = JSON.parse(jsonStr);
4454
- const baseChunk = {
4455
- id: rsp.id ?? `chatcmpl-${Date.now()}`,
4456
- object: "chat.completion.chunk",
4457
- created: rsp.created ?? Math.floor(Date.now() / 1e3),
4458
- model: rsp.model ?? "unknown",
4459
- system_fingerprint: null
4460
- };
4461
- if (rsp.choices && Array.isArray(rsp.choices)) {
4462
- for (const choice of rsp.choices) {
4463
- const rawContent = choice.message?.content ?? choice.delta?.content ?? "";
4464
- const content = stripThinkingTokens(rawContent);
4465
- const role = choice.message?.role ?? choice.delta?.role ?? "assistant";
4466
- const index = choice.index ?? 0;
4467
- const roleChunk = {
4468
- ...baseChunk,
4469
- choices: [{ index, delta: { role }, logprobs: null, finish_reason: null }]
4470
- };
4471
- const roleData = `data: ${JSON.stringify(roleChunk)}
4472
-
4473
- `;
4474
- safeWrite(res, roleData);
4475
- responseChunks.push(Buffer.from(roleData));
4476
- if (content) {
4477
- const contentChunk = {
4478
- ...baseChunk,
4479
- choices: [{ index, delta: { content }, logprobs: null, finish_reason: null }]
4480
- };
4481
- const contentData = `data: ${JSON.stringify(contentChunk)}
4482
-
4483
- `;
4484
- safeWrite(res, contentData);
4485
- responseChunks.push(Buffer.from(contentData));
4486
- }
4487
- const toolCalls = choice.message?.tool_calls ?? choice.delta?.tool_calls;
4488
- if (toolCalls && toolCalls.length > 0) {
4489
- const toolCallChunk = {
4490
- ...baseChunk,
4491
- choices: [
4492
- {
4493
- index,
4494
- delta: { tool_calls: toolCalls },
4495
- logprobs: null,
4496
- finish_reason: null
4497
- }
4498
- ]
4499
- };
4500
- const toolCallData = `data: ${JSON.stringify(toolCallChunk)}
4501
-
4502
- `;
4503
- safeWrite(res, toolCallData);
4504
- responseChunks.push(Buffer.from(toolCallData));
4505
- }
4506
- const finishChunk = {
4507
- ...baseChunk,
4508
- choices: [
4509
- {
4510
- index,
4511
- delta: {},
4512
- logprobs: null,
4513
- finish_reason: toolCalls && toolCalls.length > 0 ? "tool_calls" : choice.finish_reason ?? "stop"
4514
- }
4515
- ]
4516
- };
4517
- const finishData = `data: ${JSON.stringify(finishChunk)}
4518
-
4519
- `;
4520
- safeWrite(res, finishData);
4521
- responseChunks.push(Buffer.from(finishData));
4522
- }
4523
- }
4524
- } catch {
4525
- const sseData = `data: ${jsonStr}
4526
-
4527
- `;
4528
- safeWrite(res, sseData);
4529
- responseChunks.push(Buffer.from(sseData));
4530
- }
4531
- }
4532
- safeWrite(res, "data: [DONE]\n\n");
4533
- responseChunks.push(Buffer.from("data: [DONE]\n\n"));
4534
- res.end();
4535
- deduplicator.complete(dedupKey, {
4536
- status: 200,
4537
- headers: { "content-type": "text/event-stream" },
4538
- body: Buffer.concat(responseChunks),
4539
- completedAt: Date.now()
4540
- });
4541
- } else {
4542
- const responseHeaders = {};
4543
- upstream.headers.forEach((value, key) => {
4544
- if (key === "transfer-encoding" || key === "connection" || key === "content-encoding")
4545
- return;
4546
- responseHeaders[key] = value;
4547
- });
4548
- res.writeHead(upstream.status, responseHeaders);
4549
- if (upstream.body) {
4550
- const reader = upstream.body.getReader();
4551
- try {
4552
- while (true) {
4553
- const { done, value } = await reader.read();
4554
- if (done) break;
4555
- const chunk = Buffer.from(value);
4556
- safeWrite(res, chunk);
4557
- responseChunks.push(chunk);
4558
- }
4559
- } finally {
4560
- reader.releaseLock();
4561
- }
4562
- }
4563
- res.end();
4564
- const responseBody = Buffer.concat(responseChunks);
4565
- deduplicator.complete(dedupKey, {
4566
- status: upstream.status,
4567
- headers: responseHeaders,
4568
- body: responseBody,
4569
- completedAt: Date.now()
4570
- });
4571
- if (upstream.status === 200 && responseCache.shouldCache(body)) {
4572
- responseCache.set(cacheKey, {
4573
- body: responseBody,
4574
- status: upstream.status,
4575
- headers: responseHeaders,
4576
- model: modelId
4577
- });
4578
- console.log(`[ClawRouter] Cached response for ${modelId} (${responseBody.length} bytes)`);
4579
- }
4580
- }
4581
- if (estimatedCostMicros !== void 0) {
4582
- balanceMonitor.deductEstimated(estimatedCostMicros);
4583
- }
4584
- completed = true;
4585
- } catch (err) {
4586
- clearTimeout(timeoutId);
4587
- if (heartbeatInterval) {
4588
- clearInterval(heartbeatInterval);
4589
- heartbeatInterval = void 0;
4590
- }
4591
- deduplicator.removeInflight(dedupKey);
4592
- balanceMonitor.invalidate();
4593
- if (err instanceof Error && err.name === "AbortError") {
4594
- throw new Error(`Request timed out after ${timeoutMs}ms`);
4595
- }
4596
- throw err;
4597
- }
4598
- if (routingDecision) {
4599
- const estimatedInputTokens = Math.ceil(body.length / 4);
4600
- const accurateCosts = calculateModelCost(
4601
- routingDecision.model,
4602
- routerOpts.modelPricing,
4603
- estimatedInputTokens,
4604
- maxTokens,
4605
- routingProfile ?? void 0
4606
- );
4607
- const costWithBuffer = accurateCosts.costEstimate * 1.2;
4608
- const baselineWithBuffer = accurateCosts.baselineCost * 1.2;
4609
- const entry = {
4610
- timestamp: (/* @__PURE__ */ new Date()).toISOString(),
4611
- model: routingDecision.model,
4612
- tier: routingDecision.tier,
4613
- cost: costWithBuffer,
4614
- baselineCost: baselineWithBuffer,
4615
- savings: accurateCosts.savings,
4616
- latencyMs: Date.now() - startTime
4617
- };
4618
- logUsage(entry).catch(() => {
4619
- });
4620
- }
4621
- }
4622
-
4623
- // src/auth.ts
4624
- import { writeFile, readFile as readFile2, mkdir as mkdir2 } from "fs/promises";
4625
- import { join as join4 } from "path";
4626
- import { homedir as homedir3 } from "os";
4627
- import { generatePrivateKey, privateKeyToAccount as privateKeyToAccount3 } from "viem/accounts";
4628
- var WALLET_DIR = join4(homedir3(), ".openclaw", "blockrun");
4629
- var WALLET_FILE = join4(WALLET_DIR, "wallet.key");
4630
- async function loadSavedWallet() {
4631
- try {
4632
- const key = (await readFile2(WALLET_FILE, "utf-8")).trim();
4633
- if (key.startsWith("0x") && key.length === 66) {
4634
- console.log(`[ClawRouter] \u2713 Loaded existing wallet from ${WALLET_FILE}`);
4635
- return key;
4636
- }
4637
- console.warn(`[ClawRouter] \u26A0 Wallet file exists but is invalid (wrong format)`);
4638
- } catch (err) {
4639
- if (err.code !== "ENOENT") {
4640
- console.error(
4641
- `[ClawRouter] \u2717 Failed to read wallet file: ${err instanceof Error ? err.message : String(err)}`
4642
- );
4643
- }
4644
- }
4645
- return void 0;
4646
- }
4647
- async function generateAndSaveWallet() {
4648
- const key = generatePrivateKey();
4649
- const account = privateKeyToAccount3(key);
4650
- await mkdir2(WALLET_DIR, { recursive: true });
4651
- await writeFile(WALLET_FILE, key + "\n", { mode: 384 });
4652
- try {
4653
- const verification = (await readFile2(WALLET_FILE, "utf-8")).trim();
4654
- if (verification !== key) {
4655
- throw new Error("Wallet file verification failed - content mismatch");
4656
- }
4657
- console.log(`[ClawRouter] \u2713 Wallet saved and verified at ${WALLET_FILE}`);
4658
- } catch (err) {
4659
- throw new Error(
4660
- `Failed to verify wallet file after creation: ${err instanceof Error ? err.message : String(err)}`
4661
- );
4662
- }
4663
- return { key, address: account.address };
4664
- }
4665
- async function resolveOrGenerateWalletKey() {
4666
- const saved = await loadSavedWallet();
4667
- if (saved) {
4668
- const account = privateKeyToAccount3(saved);
4669
- return { key: saved, address: account.address, source: "saved" };
4670
- }
4671
- const envKey = process.env.BLOCKRUN_WALLET_KEY;
4672
- if (typeof envKey === "string" && envKey.startsWith("0x") && envKey.length === 66) {
4673
- const account = privateKeyToAccount3(envKey);
4674
- return { key: envKey, address: account.address, source: "env" };
4675
- }
4676
- const { key, address } = await generateAndSaveWallet();
4677
- return { key, address, source: "generated" };
4678
- }
4679
-
4680
- // src/cli.ts
4681
- function printHelp() {
4682
- console.log(`
4683
- ClawRouter v${VERSION} - Smart LLM Router
4684
-
4685
- Usage:
4686
- clawrouter [options]
4687
-
4688
- Options:
4689
- --version, -v Show version number
4690
- --help, -h Show this help message
4691
- --port <number> Port to listen on (default: ${getProxyPort()})
4692
-
4693
- Examples:
4694
- # Start standalone proxy (survives gateway restarts)
4695
- npx @blockrun/clawrouter
4696
-
4697
- # Start on custom port
4698
- npx @blockrun/clawrouter --port 9000
4699
-
4700
- # Production deployment with PM2
4701
- pm2 start "npx @blockrun/clawrouter" --name clawrouter
4702
-
4703
- Environment Variables:
4704
- BLOCKRUN_WALLET_KEY Private key for x402 payments (auto-generated if not set)
4705
- BLOCKRUN_PROXY_PORT Default proxy port (default: 8402)
4706
-
4707
- For more info: https://github.com/BlockRunAI/ClawRouter
4708
- `);
4709
- }
4710
- function parseArgs(args) {
4711
- const result = { version: false, help: false, port: void 0 };
4712
- for (let i = 0; i < args.length; i++) {
4713
- const arg = args[i];
4714
- if (arg === "--version" || arg === "-v") {
4715
- result.version = true;
4716
- } else if (arg === "--help" || arg === "-h") {
4717
- result.help = true;
4718
- } else if (arg === "--port" && args[i + 1]) {
4719
- result.port = parseInt(args[i + 1], 10);
4720
- i++;
4721
- }
4722
- }
4723
- return result;
4724
- }
4725
- async function main() {
4726
- const args = parseArgs(process.argv.slice(2));
4727
- if (args.version) {
4728
- console.log(VERSION);
4729
- process.exit(0);
4730
- }
4731
- if (args.help) {
4732
- printHelp();
4733
- process.exit(0);
4734
- }
4735
- const { key: walletKey, address, source } = await resolveOrGenerateWalletKey();
4736
- if (source === "generated") {
4737
- console.log(`[ClawRouter] Generated new wallet: ${address}`);
4738
- } else if (source === "saved") {
4739
- console.log(`[ClawRouter] Using saved wallet: ${address}`);
4740
- } else {
4741
- console.log(`[ClawRouter] Using wallet from BLOCKRUN_WALLET_KEY: ${address}`);
4742
- }
4743
- const proxy = await startProxy({
4744
- walletKey,
4745
- port: args.port,
4746
- onReady: (port) => {
4747
- console.log(`[ClawRouter] Proxy listening on http://127.0.0.1:${port}`);
4748
- console.log(`[ClawRouter] Health check: http://127.0.0.1:${port}/health`);
4749
- },
4750
- onError: (error) => {
4751
- console.error(`[ClawRouter] Error: ${error.message}`);
4752
- },
4753
- onRouted: (decision) => {
4754
- const cost = decision.costEstimate.toFixed(4);
4755
- const saved = (decision.savings * 100).toFixed(0);
4756
- console.log(`[ClawRouter] [${decision.tier}] ${decision.model} $${cost} (saved ${saved}%)`);
4757
- },
4758
- onLowBalance: (info) => {
4759
- console.warn(`[ClawRouter] Low balance: ${info.balanceUSD}. Fund: ${info.walletAddress}`);
4760
- },
4761
- onInsufficientFunds: (info) => {
4762
- console.error(
4763
- `[ClawRouter] Insufficient funds. Balance: ${info.balanceUSD}, Need: ${info.requiredUSD}`
4764
- );
4765
- }
4766
- });
4767
- const monitor = new BalanceMonitor(address);
4768
- try {
4769
- const balance = await monitor.checkBalance();
4770
- if (balance.isEmpty) {
4771
- console.log(`[ClawRouter] Wallet balance: $0.00 (using FREE model)`);
4772
- console.log(`[ClawRouter] Fund wallet for premium models: ${address}`);
4773
- } else if (balance.isLow) {
4774
- console.log(`[ClawRouter] Wallet balance: ${balance.balanceUSD} (low)`);
4775
- } else {
4776
- console.log(`[ClawRouter] Wallet balance: ${balance.balanceUSD}`);
4777
- }
4778
- } catch {
4779
- console.log(`[ClawRouter] Wallet: ${address} (balance check pending)`);
4780
- }
4781
- console.log(`[ClawRouter] Ready - Ctrl+C to stop`);
4782
- const shutdown = async (signal) => {
4783
- console.log(`
4784
- [ClawRouter] Received ${signal}, shutting down...`);
4785
- try {
4786
- await proxy.close();
4787
- console.log(`[ClawRouter] Proxy closed`);
4788
- process.exit(0);
4789
- } catch (err) {
4790
- console.error(`[ClawRouter] Error during shutdown: ${err}`);
4791
- process.exit(1);
4792
- }
4793
- };
4794
- process.on("SIGINT", () => shutdown("SIGINT"));
4795
- process.on("SIGTERM", () => shutdown("SIGTERM"));
4796
- await new Promise(() => {
4797
- });
4798
- }
4799
- main().catch((err) => {
4800
- console.error(`[ClawRouter] Fatal error: ${err.message}`);
4801
- process.exit(1);
4802
- });
4803
- //# sourceMappingURL=cli.js.map