clawmux 0.3.10 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs CHANGED
@@ -15597,8 +15597,8 @@ async function defaultParseResponse(client, props) {
15597
15597
  const mediaType = contentType?.split(";")[0]?.trim();
15598
15598
  const isJSON = mediaType?.includes("application/json") || mediaType?.endsWith("+json");
15599
15599
  if (isJSON) {
15600
- const contentLength = response.headers.get("content-length");
15601
- if (contentLength === "0") {
15600
+ const contentLength2 = response.headers.get("content-length");
15601
+ if (contentLength2 === "0") {
15602
15602
  return;
15603
15603
  }
15604
15604
  const json = await response.json();
@@ -21884,8 +21884,8 @@ async function defaultParseResponse2(client, props) {
21884
21884
  const mediaType = contentType?.split(";")[0]?.trim();
21885
21885
  const isJSON = mediaType?.includes("application/json") || mediaType?.endsWith("+json");
21886
21886
  if (isJSON) {
21887
- const contentLength = response.headers.get("content-length");
21888
- if (contentLength === "0") {
21887
+ const contentLength2 = response.headers.get("content-length");
21888
+ if (contentLength2 === "0") {
21889
21889
  return;
21890
21890
  }
21891
21891
  const json = await response.json();
@@ -34508,7 +34508,7 @@ async function consumeBody(data) {
34508
34508
  throw new FetchError(`Premature close of server response while trying to fetch ${data.url}`);
34509
34509
  }
34510
34510
  }
34511
- var import_node_stream, import_node_util, import_node_buffer, pipeline2, INTERNALS, clone = (instance, highWaterMark) => {
34511
+ var import_node_stream, import_node_util, import_node_buffer, pipeline, INTERNALS, clone = (instance, highWaterMark) => {
34512
34512
  let p1;
34513
34513
  let p2;
34514
34514
  let { body } = instance[INTERNALS];
@@ -34569,7 +34569,7 @@ var import_node_stream, import_node_util, import_node_buffer, pipeline2, INTERNA
34569
34569
  if (body === null) {
34570
34570
  dest.end();
34571
34571
  } else {
34572
- await pipeline2(body, dest);
34572
+ await pipeline(body, dest);
34573
34573
  }
34574
34574
  };
34575
34575
  var init_body = __esm(() => {
@@ -34581,7 +34581,7 @@ var init_body = __esm(() => {
34581
34581
  import_node_stream = __toESM(require("node:stream"));
34582
34582
  import_node_util = require("node:util");
34583
34583
  import_node_buffer = require("node:buffer");
34584
- pipeline2 = import_node_util.promisify(import_node_stream.default.pipeline);
34584
+ pipeline = import_node_util.promisify(import_node_stream.default.pipeline);
34585
34585
  INTERNALS = Symbol("Body internals");
34586
34586
  Body.prototype.buffer = import_node_util.deprecate(Body.prototype.buffer, "Please use 'response.arrayBuffer()' instead of 'response.buffer()'", "node-fetch#buffer");
34587
34587
  Object.defineProperties(Body.prototype, {
@@ -57734,8 +57734,8 @@ async function defaultParseResponse3(client, props) {
57734
57734
  const mediaType = (_a4 = contentType === null || contentType === undefined ? undefined : contentType.split(";")[0]) === null || _a4 === undefined ? undefined : _a4.trim();
57735
57735
  const isJSON = (mediaType === null || mediaType === undefined ? undefined : mediaType.includes("application/json")) || (mediaType === null || mediaType === undefined ? undefined : mediaType.endsWith("+json"));
57736
57736
  if (isJSON) {
57737
- const contentLength = response.headers.get("content-length");
57738
- if (contentLength === "0") {
57737
+ const contentLength2 = response.headers.get("content-length");
57738
+ if (contentLength2 === "0") {
57739
57739
  return;
57740
57740
  }
57741
57741
  const json = await response.json();
@@ -112220,7 +112220,7 @@ var import_node_child_process = require("node:child_process");
112220
112220
  var import_node_os2 = require("node:os");
112221
112221
 
112222
112222
  // src/proxy/router.ts
112223
- var VERSION = process.env.npm_package_version ?? "0.3.10";
112223
+ var VERSION = process.env.npm_package_version ?? "0.3.12";
112224
112224
  function jsonResponse(body, status = 200) {
112225
112225
  return new Response(JSON.stringify(body), {
112226
112226
  status,
@@ -112429,6 +112429,12 @@ var import_promises2 = require("node:fs/promises");
112429
112429
  var import_node_path = require("node:path");
112430
112430
 
112431
112431
  // src/config/defaults.ts
112432
+ var ESCALATION_DEFAULTS = {
112433
+ activeThresholdMs: 300000,
112434
+ maxLifetimeMs: 7200000,
112435
+ fingerprintRootCount: 5,
112436
+ enabled: true
112437
+ };
112432
112438
  var DEFAULT_CONFIG = {
112433
112439
  compression: {
112434
112440
  threshold: 0.75,
@@ -112441,7 +112447,8 @@ var DEFAULT_CONFIG = {
112441
112447
  MEDIUM: "",
112442
112448
  HEAVY: ""
112443
112449
  },
112444
- contextWindows: {}
112450
+ contextWindows: {},
112451
+ escalation: ESCALATION_DEFAULTS
112445
112452
  },
112446
112453
  server: {
112447
112454
  port: 3456,
@@ -112462,7 +112469,13 @@ function applyDefaults(partial) {
112462
112469
  MEDIUM: partial.routing.models.MEDIUM ?? defaults.routing.models.MEDIUM,
112463
112470
  HEAVY: partial.routing.models.HEAVY ?? defaults.routing.models.HEAVY
112464
112471
  },
112465
- contextWindows: { ...defaults.routing.contextWindows, ...partial.routing.contextWindows }
112472
+ contextWindows: { ...defaults.routing.contextWindows, ...partial.routing.contextWindows },
112473
+ escalation: {
112474
+ activeThresholdMs: partial.routing.escalation?.activeThresholdMs ?? ESCALATION_DEFAULTS.activeThresholdMs,
112475
+ maxLifetimeMs: partial.routing.escalation?.maxLifetimeMs ?? ESCALATION_DEFAULTS.maxLifetimeMs,
112476
+ fingerprintRootCount: partial.routing.escalation?.fingerprintRootCount ?? ESCALATION_DEFAULTS.fingerprintRootCount,
112477
+ enabled: partial.routing.escalation?.enabled !== undefined ? partial.routing.escalation.enabled : ESCALATION_DEFAULTS.enabled
112478
+ }
112466
112479
  },
112467
112480
  server: {
112468
112481
  port: partial.server?.port ?? defaults.server.port,
@@ -112564,6 +112577,39 @@ function validateConfig(raw) {
112564
112577
  }
112565
112578
  }
112566
112579
  }
112580
+ if (routing.escalation !== undefined) {
112581
+ if (!isObject(routing.escalation)) {
112582
+ errors.push("routing.escalation: must be an object");
112583
+ } else {
112584
+ const esc = routing.escalation;
112585
+ if (esc.activeThresholdMs !== undefined) {
112586
+ if (typeof esc.activeThresholdMs !== "number") {
112587
+ errors.push(`routing.escalation.activeThresholdMs: must be a number, got ${typeof esc.activeThresholdMs}`);
112588
+ } else if (esc.activeThresholdMs <= 0) {
112589
+ errors.push(`routing.escalation.activeThresholdMs: must be a positive number, got ${esc.activeThresholdMs}`);
112590
+ }
112591
+ }
112592
+ if (esc.maxLifetimeMs !== undefined) {
112593
+ if (typeof esc.maxLifetimeMs !== "number") {
112594
+ errors.push(`routing.escalation.maxLifetimeMs: must be a number, got ${typeof esc.maxLifetimeMs}`);
112595
+ } else if (esc.maxLifetimeMs <= 0) {
112596
+ errors.push(`routing.escalation.maxLifetimeMs: must be a positive number, got ${esc.maxLifetimeMs}`);
112597
+ }
112598
+ }
112599
+ if (esc.fingerprintRootCount !== undefined) {
112600
+ if (typeof esc.fingerprintRootCount !== "number") {
112601
+ errors.push(`routing.escalation.fingerprintRootCount: must be a number, got ${typeof esc.fingerprintRootCount}`);
112602
+ } else if (!Number.isInteger(esc.fingerprintRootCount) || esc.fingerprintRootCount < 1) {
112603
+ errors.push(`routing.escalation.fingerprintRootCount: must be an integer >= 1, got ${esc.fingerprintRootCount}`);
112604
+ }
112605
+ }
112606
+ if (esc.enabled !== undefined) {
112607
+ if (typeof esc.enabled !== "boolean") {
112608
+ errors.push(`routing.escalation.enabled: must be a boolean, got ${typeof esc.enabled}`);
112609
+ }
112610
+ }
112611
+ }
112612
+ }
112567
112613
  const server = obj.server !== undefined && isObject(obj.server) ? obj.server : null;
112568
112614
  if (server !== null && server.port !== undefined) {
112569
112615
  checkOptionalNumberRange(errors, "server.port", server.port, 1024, 65535);
@@ -114974,404 +115020,305 @@ function buildSyntheticHttpResponse(parsed, adapter) {
114974
115020
  });
114975
115021
  }
114976
115022
 
114977
- // src/routing/local-classifier.ts
114978
- var import_transformers = require("@huggingface/transformers");
114979
- var CAT_L = "L";
114980
- var CAT_M = "M";
114981
- var CAT_H = "H";
114982
- var CAT_Q = "Q";
114983
- var TIER_MAP = {
114984
- L: "LIGHT",
114985
- M: "MEDIUM",
114986
- H: "HEAVY"
114987
- };
114988
- var MODEL_ID = "Xenova/multilingual-e5-small";
114989
- var E5_PREFIX = "query: ";
114990
- var BATCH_SIZE = 32;
114991
- var TRAINING_LIGHT = [
114992
- "안녕하세요",
114993
- "안녕",
114994
- "안녕히 가세요",
114995
- "안녕히 계세요",
114996
- "반갑습니다",
114997
- "잘 지내시죠",
114998
- "오랜만이에요",
114999
- "고마워",
115000
- "감사합니다",
115001
- "고맙습니다",
115002
- " 고마워요",
115003
- "정말 감사합니다",
115004
- "도와줘서 고마워",
115005
- "네",
115006
- "",
115007
- "아니요",
115008
- "좋아요",
115009
- "알겠습니다",
115010
- "확인했습니다",
115011
- "그래요",
115012
- "맞아요",
115013
- "아 네",
115014
- "Python이 뭐야?",
115015
- "JavaScript가 뭐야?",
115016
- "오늘 날씨 어때?",
115017
- "지금 몇 시야?",
115018
- "이거 뭐야?",
115019
- "TypeScript가 뭐예요?",
115020
- "API가 뭐야?",
115021
- "HTML이 뭐야?",
115022
- "CSS가 뭐야?",
115023
- "Hello",
115024
- "Hi",
115025
- "Hey there",
115026
- "Good morning",
115027
- "Good afternoon",
115028
- "How are you",
115029
- "What's up",
115030
- "Thanks",
115031
- "Thank you",
115032
- "Got it",
115033
- "OK",
115034
- "Sounds good",
115035
- "I see",
115036
- "Understood",
115037
- "Great thanks",
115038
- "What is Python?",
115039
- "What time is it?",
115040
- "What's the weather?",
115041
- "Who is Einstein?",
115042
- "Where is Seoul?",
115043
- "How old are you?",
115044
- "yes",
115045
- "no",
115046
- "maybe",
115047
- "sure",
115048
- "please",
115049
- "done",
115050
- "ok",
115051
- "cool",
115052
- "nice",
115053
- "awesome"
115054
- ];
115055
- var TRAINING_MEDIUM = [
115056
- "Write a quicksort function in TypeScript",
115057
- "Implement a binary search tree with insert and delete",
115058
- "Create a REST API endpoint for user authentication",
115059
- "Write a function to merge two sorted arrays",
115060
- "Implement a linked list in Python",
115061
- "Write a unit test for the calculator module",
115062
- "Create a simple Express.js middleware for logging",
115063
- "Write a regex to validate email addresses",
115064
- "Implement a LRU cache with get and put operations",
115065
- "Create a React component for a todo list",
115066
- "Write a SQL query to join two tables",
115067
- "Implement a basic JWT authentication flow",
115068
- "Write a function to parse CSV files",
115069
- "Create a simple WebSocket server",
115070
- "Implement bubble sort in Java",
115071
- "Write a Python script to read a JSON file",
115072
- "Create a Docker compose file for a web app",
115073
- "Write a Git pre-commit hook",
115074
- "REST API에 로그인 엔드포인트 추가해줘",
115075
- "이 함수에 에러 핸들링 추가해줘",
115076
- "TypeScript로 이벤트 이미터 만들어줘",
115077
- "데이터베이스 마이그레이션 스크립트 작성해줘",
115078
- "React 컴포넌트에 상태 관리 추가해줘",
115079
- "Express 라우터에 CORS 미들웨어 추가해줘",
115080
- "테스트 코드 작성해줘",
115081
- "이 코드 리팩토링해줘",
115082
- "Explain the difference between let and const in JavaScript",
115083
- "What's the difference between SQL and NoSQL databases",
115084
- "Explain how async await works in Python",
115085
- "Describe the MVC architecture pattern",
115086
- "Explain what Docker containers are",
115087
- "REST와 GraphQL의 차이점을 설명해줘",
115088
- "이벤트 루프가 어떻게 동작하는지 설명해줘",
115089
- "클로저가 뭐야? 설명해줘",
115090
- "Set up a Node.js project with TypeScript and ESLint",
115091
- "Create a basic CI/CD pipeline using GitHub Actions",
115092
- "Configure Nginx as a reverse proxy for a Node.js app",
115093
- `이 함수를 리팩토링해줘:
115094
- function processUsers(data) {
115095
- var result = [];
115096
- for (var i = 0; i < data.length; i++) {
115097
- if (data[i].active == true && data[i].age > 18) {
115098
- var name = data[i].firstName + ' ' + data[i].lastName;
115099
- var obj = { name: name, email: data[i].email, role: data[i].isAdmin ? 'admin' : 'user' };
115100
- if (data[i].department !== null && data[i].department !== undefined) {
115101
- obj.department = data[i].department.name;
115102
- obj.manager = data[i].department.manager ? data[i].department.manager.name : 'N/A';
115103
- }
115104
- result.push(obj);
115105
- }
115106
- }
115107
- result.sort(function(a, b) { return a.name > b.name ? 1 : -1; });
115108
- return result;
115109
- }`,
115110
- `Refactor this code to use modern JavaScript:
115111
- function getItems(list) {
115112
- var items = [];
115113
- for (var i = 0; i < list.length; i++) {
115114
- if (list[i].active === true) {
115115
- items.push(list[i].name);
115023
+ // src/routing/signal-detector.ts
115024
+ var ESCALATE_SIGNAL = "===CLAWMUX_ESCALATE===";
115025
+
115026
+ class SignalDetector {
115027
+ buffer = "";
115028
+ detected = false;
115029
+ pending = [];
115030
+ feed(char) {
115031
+ if (this.pending.length > 0) {
115032
+ return this.pending.shift();
115033
+ }
115034
+ if (this.detected) {
115035
+ return { type: "signal_detected" };
115036
+ }
115037
+ if (this.buffer.length === 0) {
115038
+ if (char !== "=") {
115039
+ return { type: "passthrough", text: char };
115040
+ }
115041
+ this.buffer = char;
115042
+ return { type: "buffering" };
115043
+ }
115044
+ const candidate = this.buffer + char;
115045
+ if (candidate === ESCALATE_SIGNAL) {
115046
+ this.detected = true;
115047
+ this.buffer = "";
115048
+ return { type: "signal_detected" };
115049
+ }
115050
+ if (ESCALATE_SIGNAL.startsWith(candidate)) {
115051
+ this.buffer = candidate;
115052
+ return { type: "buffering" };
115116
115053
  }
115054
+ this.buffer = "";
115055
+ this.pending.push({ type: "passthrough", text: char });
115056
+ return { type: "passthrough", text: candidate.slice(0, -1) };
115117
115057
  }
115118
- return items;
115119
- }`
115120
- ];
115121
- var TRAINING_HEAVY = [
115122
- "Design a distributed consensus algorithm for a multi-region database with strong consistency and Byzantine fault tolerance",
115123
- "Explain the theoretical foundations of quantum computing and how quantum entanglement can be used for cryptographic key distribution",
115124
- "Analyze the trade-offs between eventual consistency and strong consistency in distributed systems, including CAP theorem implications",
115125
- "Design a fault-tolerant microservices architecture for a real-time trading platform handling millions of transactions per second",
115126
- "Propose a novel approach to solving the traveling salesman problem that improves upon current approximation algorithms",
115127
- "Design a machine learning pipeline for real-time fraud detection in financial transactions with sub-millisecond latency requirements",
115128
- "Compare and contrast different consensus protocols (Paxos, Raft, PBFT) and recommend the best one for a blockchain-based supply chain system",
115129
- "Architect a system that can handle 10 million concurrent WebSocket connections with horizontal scaling",
115130
- "Design a real-time data streaming architecture combining Kafka, Flink, and a time-series database for IoT sensor data",
115131
- "메모리 릭이 발생하는데 프로파일러에서 이벤트 루프 블로킹과 GC 지연이 동시에 나타나. 마이크로서비스 간 gRPC 연결 풀링도 의심되는 상황인데 원인 분석 방법을 단계별로 설명해줘",
115132
- "대규모 분산 시스템에서 파티션 톨런스와 일관성을 동시에 보장하는 방법을 설계해줘",
115133
- "실시간 추천 시스템을 위한 아키텍처를 설계해줘. 1초 이내에 개인화된 추천을 제공해야 해",
115134
- "카프카 기반 이벤트 드리븐 아키텍처에서 순서 보장과 정확히 한 번 처리를 어떻게 보장할 수 있을까?",
115135
- "마이크로서비스 간의 분산 트랜잭션을 사가 패턴으로 구현하는 방법을 단계별로 설명해줘",
115136
- "Debug a memory leak in a production Node.js application where the heap grows indefinitely but garbage collection logs show normal behavior",
115137
- "Investigate why our Kubernetes pods are being OOMKilled despite having memory limits set to 4GB and actual usage reported as 2GB",
115138
- "Find the root cause of intermittent 500ms latency spikes in our PostgreSQL queries that happen every 15 minutes",
115139
- "Design a multi-tenant SaaS platform with shared infrastructure but isolated data, supporting custom domains and white-labeling",
115140
- "Implement a distributed task scheduler that guarantees at-least-once execution with idempotency support across multiple data centers"
115141
- ];
115142
- var TRAINING_Q = [
115143
- "아까 그거 다시 해줘",
115144
- "그거 좀 더 자세히 설명해줘",
115145
- "아까 말한 거 그대로 해줘",
115146
- "이거 수정해줘",
115147
- "저거 어디 있지",
115148
- "그거 어떻게 됐어",
115149
- "위에꺼 다시 한번",
115150
- "그거 그대로 해줘",
115151
- "아까 한 거 다시",
115152
- "그 코드 다시 보여줘",
115153
- "저번에 한 거 기억나?",
115154
- "그 부분 수정해줘",
115155
- "Do that again",
115156
- "What about the thing we discussed earlier",
115157
- "Show me that again",
115158
- "Can you fix that",
115159
- "Change it like I said before",
115160
- "Continue from where we left off",
115161
- "That thing from earlier, do it again",
115162
- "Remember what we were working on",
115163
- "Go back to the previous one",
115164
- "Make it like the other one",
115165
- "The same thing but different",
115166
- "Update the one from before",
115167
- "그거 해줘",
115168
- "이거 해줘",
115169
- "저거 어때",
115170
- "How about this one",
115171
- "What about that",
115172
- "Try the other approach",
115173
- "Use the one I mentioned",
115174
- "Fix the issue",
115175
- "그냥 그거",
115176
- "이건 어때",
115177
- "Make it better",
115178
- "Change it",
115179
- "이거 수정해"
115180
- ];
115181
- var Q_PATTERNS = [
115182
- /^(아까|그거|저거|이거|그|위에|아래|저번|이전|전에).*(다시|해줘|해|보여|설명|수정|변경|삭제|추가|해봐)/,
115183
- /^(그거|저거|이거|그|이|저)(만|만큼|대로|처럼|같이)?\s*(해줘|해|놔|둬|봐|어때|어떻게)/,
115184
- /^(그거|저거|이거)\s*$/,
115185
- /(아까|저번에|전에|위에서|앞에서|이전에).*(그|그거|그것|그때|했던|말한)/,
115186
- /^(이거|저거|그거)(\s*.*)?$/
115187
- ];
115188
- var DEICTIC_WORDS = new Set(["그거", "저거", "이거", "그것", "이것", "저것", "아까", "저번"]);
115189
- function matchesQPattern(text) {
115190
- const trimmed = text.trim();
115191
- for (const pattern of Q_PATTERNS) {
115192
- if (pattern.test(trimmed))
115193
- return true;
115058
+ feedChunk(chunk) {
115059
+ const out = [];
115060
+ for (let i = 0;i < chunk.length; i++) {
115061
+ out.push(this.feed(chunk[i]));
115062
+ while (this.pending.length > 0) {
115063
+ out.push(this.pending.shift());
115064
+ }
115065
+ }
115066
+ return out;
115194
115067
  }
115195
- if (trimmed.length < 20) {
115196
- for (const word of DEICTIC_WORDS) {
115197
- if (trimmed.includes(word))
115198
- return true;
115068
+ flush() {
115069
+ if (this.buffer.length === 0)
115070
+ return null;
115071
+ const text = this.buffer;
115072
+ this.buffer = "";
115073
+ return text;
115074
+ }
115075
+ reset() {
115076
+ this.buffer = "";
115077
+ this.detected = false;
115078
+ this.pending.length = 0;
115079
+ }
115080
+ get isBuffering() {
115081
+ return this.buffer.length > 0;
115082
+ }
115083
+ }
115084
+
115085
+ // src/routing/escalation-memory.ts
115086
+ function djb2Hash(text) {
115087
+ let hash = 5381;
115088
+ for (let i = 0;i < text.length; i++) {
115089
+ hash = (hash << 5) + hash ^ text.charCodeAt(i);
115090
+ }
115091
+ return (hash >>> 0).toString(16);
115092
+ }
115093
+ function extractText(content) {
115094
+ if (typeof content === "string") {
115095
+ return content.slice(0, 200);
115096
+ }
115097
+ let concatenated = "";
115098
+ for (const block of content) {
115099
+ if (block.type === "text" && block.text !== undefined) {
115100
+ concatenated += block.text;
115199
115101
  }
115200
115102
  }
115201
- return false;
115103
+ return concatenated.slice(0, 200);
115202
115104
  }
115203
- var CODE_PATTERN = /[{}();]|function |const |let |var |class |import |export |=>|\bdef \b|\bfn\b/;
115204
- var TECH_TERMS = /\b(implement|create|design|architect|debug|refactor|migrate|deploy|build|write|develop)\b/i;
115205
- function isLikelyLight(text) {
115206
- const trimmed = text.trim();
115207
- if (trimmed.length <= 20 && !CODE_PATTERN.test(trimmed) && !TECH_TERMS.test(trimmed)) {
115208
- return true;
115105
+ function contentLength(content) {
115106
+ if (typeof content === "string") {
115107
+ return content.length;
115209
115108
  }
115210
- return false;
115109
+ return extractText(content).length;
115211
115110
  }
115212
- var extractorPromise = null;
115213
- function getExtractor() {
115214
- if (!extractorPromise) {
115215
- const dtype = process.env.CLAWMUX_EMBEDDING_DTYPE ?? "fp16";
115216
- console.log(`[clawmux] Loading embedding model (dtype=${dtype})...`);
115217
- extractorPromise = import_transformers.pipeline("feature-extraction", MODEL_ID, { dtype }).then((pipe) => {
115218
- console.log("[clawmux] Embedding model loaded");
115219
- return pipe;
115111
+
115112
+ class EscalationMemory {
115113
+ config;
115114
+ store;
115115
+ constructor(config) {
115116
+ this.config = config;
115117
+ this.store = new Map;
115118
+ }
115119
+ fingerprint(messages) {
115120
+ const count = this.config.fingerprintRootCount;
115121
+ const segments = [];
115122
+ for (let i = 0;i < count; i++) {
115123
+ const msg = messages[i];
115124
+ if (msg === undefined) {
115125
+ segments.push("::");
115126
+ continue;
115127
+ }
115128
+ const text = extractText(msg.content);
115129
+ const len = contentLength(msg.content);
115130
+ segments.push(`${msg.role}:${String(len)}:${djb2Hash(text)}`);
115131
+ }
115132
+ return segments.join("|");
115133
+ }
115134
+ lookup(messages, nowMs) {
115135
+ this.evict(nowMs);
115136
+ return this.store.get(this.fingerprint(messages)) ?? null;
115137
+ }
115138
+ record(messages, tier, nowMs) {
115139
+ const now = nowMs ?? Date.now();
115140
+ const fp = this.fingerprint(messages);
115141
+ const existing = this.store.get(fp);
115142
+ if (existing !== undefined && existing.tier === "HEAVY" && tier === "MEDIUM") {
115143
+ existing.lastActivityAt = now;
115144
+ return;
115145
+ }
115146
+ this.store.set(fp, {
115147
+ tier,
115148
+ firstEscalatedAt: existing?.firstEscalatedAt ?? now,
115149
+ lastActivityAt: now
115220
115150
  });
115221
115151
  }
115222
- return extractorPromise;
115223
- }
115224
- var centroidsPromise = null;
115225
- async function computeMeanEmbedding(texts) {
115226
- const extractor = await getExtractor();
115227
- const allEmbeddings = [];
115228
- for (let i = 0;i < texts.length; i += BATCH_SIZE) {
115229
- const batch = texts.slice(i, i + BATCH_SIZE).map((t) => E5_PREFIX + t);
115230
- const output = await extractor(batch, { pooling: "mean", normalize: true });
115231
- const list = output.tolist();
115232
- for (const emb of list) {
115233
- allEmbeddings.push(emb);
115152
+ touch(messages, nowMs) {
115153
+ const now = nowMs ?? Date.now();
115154
+ const record = this.store.get(this.fingerprint(messages));
115155
+ if (record !== undefined) {
115156
+ record.lastActivityAt = now;
115234
115157
  }
115235
115158
  }
115236
- if (allEmbeddings.length === 0)
115237
- return [];
115238
- const dim = allEmbeddings[0].length;
115239
- const mean = new Array(dim).fill(0);
115240
- for (const emb of allEmbeddings) {
115241
- for (let j = 0;j < dim; j++) {
115242
- mean[j] += emb[j] / allEmbeddings.length;
115243
- }
115244
- }
115245
- const magnitude = Math.sqrt(mean.reduce((sum, v) => sum + v * v, 0));
115246
- if (magnitude > 0) {
115247
- for (let j = 0;j < dim; j++)
115248
- mean[j] /= magnitude;
115249
- }
115250
- return mean;
115251
- }
115252
- function getCentroids() {
115253
- if (!centroidsPromise) {
115254
- centroidsPromise = (async () => {
115255
- console.log("[clawmux] Computing category centroids...");
115256
- const [cL, cM, cH, cQ] = await Promise.all([
115257
- computeMeanEmbedding(TRAINING_LIGHT),
115258
- computeMeanEmbedding(TRAINING_MEDIUM),
115259
- computeMeanEmbedding(TRAINING_HEAVY),
115260
- computeMeanEmbedding(TRAINING_Q)
115261
- ]);
115262
- console.log(`[clawmux] Centroids ready: L=${TRAINING_LIGHT.length} M=${TRAINING_MEDIUM.length} ` + `H=${TRAINING_HEAVY.length} Q=${TRAINING_Q.length} samples`);
115263
- return { [CAT_L]: cL, [CAT_M]: cM, [CAT_H]: cH, [CAT_Q]: cQ };
115264
- })();
115159
+ evict(nowMs) {
115160
+ const now = nowMs ?? Date.now();
115161
+ let evicted = 0;
115162
+ for (const [fp, record] of this.store) {
115163
+ const inactive = now - record.lastActivityAt > this.config.activeThresholdMs;
115164
+ const expired = now - record.firstEscalatedAt > this.config.maxLifetimeMs;
115165
+ if (inactive || expired) {
115166
+ this.store.delete(fp);
115167
+ evicted++;
115168
+ }
115169
+ }
115170
+ return evicted;
115171
+ }
115172
+ clear() {
115173
+ this.store.clear();
115174
+ }
115175
+ get size() {
115176
+ return this.store.size;
115265
115177
  }
115266
- return centroidsPromise;
115267
115178
  }
115268
- function cosineSimilarity(a, b) {
115269
- let dot = 0;
115270
- let magA = 0;
115271
- let magB = 0;
115272
- for (let i = 0;i < a.length; i++) {
115273
- dot += a[i] * b[i];
115274
- magA += a[i] * a[i];
115275
- magB += b[i] * b[i];
115179
+
115180
+ // src/routing/instruction-injector.ts
115181
+ var INJECT_FOR_TIERS = new Set(["LIGHT"]);
115182
+ var ESCALATION_INSTRUCTION = `If you cannot handle this request fully (due to complexity, missing context, or capability limits), output EXACTLY the following marker with no other text on that line: ${ESCALATE_SIGNAL}
115183
+ Do not explain. Do not ask permission. Just emit the marker and stop.`;
115184
+ function injectEscalationInstruction(messages) {
115185
+ if (messages.length === 0) {
115186
+ return [{ role: "system", content: ESCALATION_INSTRUCTION }];
115187
+ }
115188
+ const first = messages[0];
115189
+ const rest = messages.slice(1).map((m) => ({ ...m }));
115190
+ if (first.role !== "system") {
115191
+ return [
115192
+ { role: "system", content: ESCALATION_INSTRUCTION },
115193
+ { ...first },
115194
+ ...rest
115195
+ ];
115276
115196
  }
115277
- const denom = Math.sqrt(magA) * Math.sqrt(magB);
115278
- return denom > 0 ? dot / denom : 0;
115197
+ if (typeof first.content === "string") {
115198
+ return [
115199
+ { role: "system", content: first.content + `
115200
+
115201
+ ` + ESCALATION_INSTRUCTION },
115202
+ ...rest
115203
+ ];
115204
+ }
115205
+ return [
115206
+ {
115207
+ role: "system",
115208
+ content: [...first.content, { type: "text", text: ESCALATION_INSTRUCTION }]
115209
+ },
115210
+ ...rest
115211
+ ];
115279
115212
  }
115280
- async function classifyLocal(messages, config) {
115281
- const userText = extractLastUserText(messages);
115282
- if (!userText) {
115283
- return {
115284
- tier: "MEDIUM",
115285
- confidence: 0,
115286
- reasoning: "No user message found",
115287
- error: "No user message found in request"
115288
- };
115289
- }
115290
- const centroids = await getCentroids();
115291
- const extractor = await getExtractor();
115292
- const output = await extractor([E5_PREFIX + userText], { pooling: "mean", normalize: true });
115293
- const inputEmb = output.tolist()[0];
115294
- let bestCat = CAT_M;
115295
- let bestSim = -Infinity;
115296
- for (const [cat, centroid] of Object.entries(centroids)) {
115297
- const sim = cosineSimilarity(inputEmb, centroid);
115298
- if (sim > bestSim) {
115299
- bestSim = sim;
115300
- bestCat = cat;
115301
- }
115302
- }
115303
- if (isLikelyLight(userText) && bestCat !== CAT_Q) {
115304
- bestCat = CAT_L;
115305
- bestSim = Math.max(bestSim, 0.7);
115306
- }
115307
- const heuristicQ = matchesQPattern(userText);
115308
- if (bestCat === CAT_Q || heuristicQ) {
115309
- const contextText = buildContextText(messages, userText, config?.contextMessages ?? 10);
115310
- const ctxOutput = await extractor([E5_PREFIX + contextText], { pooling: "mean", normalize: true });
115311
- const contextEmb = ctxOutput.tolist()[0];
115312
- let reBestCat = CAT_M;
115313
- let reBestSim = -Infinity;
115314
- for (const [cat, centroid] of Object.entries(centroids)) {
115315
- if (cat === CAT_Q)
115316
- continue;
115317
- const sim = cosineSimilarity(contextEmb, centroid);
115318
- if (sim > reBestSim) {
115319
- reBestSim = sim;
115320
- reBestCat = cat;
115321
- }
115213
+
115214
+ // src/routing/signal-router.ts
115215
+ var NEXT_TIER = {
115216
+ LIGHT: "MEDIUM",
115217
+ MEDIUM: "HEAVY",
115218
+ HEAVY: null
115219
+ };
115220
+
115221
+ class SignalRouter {
115222
+ _memory;
115223
+ _enabled;
115224
+ constructor(config) {
115225
+ this._memory = new EscalationMemory(config.escalation);
115226
+ this._enabled = config.enabled;
115227
+ }
115228
+ selectInitialTier(messages, nowMs) {
115229
+ if (!this._enabled)
115230
+ return "MEDIUM";
115231
+ const record = this._memory.lookup(messages, nowMs);
115232
+ if (record !== null)
115233
+ return record.tier;
115234
+ return "LIGHT";
115235
+ }
115236
+ shouldInjectInstruction(tier) {
115237
+ return this._enabled && INJECT_FOR_TIERS.has(tier);
115238
+ }
115239
+ injectInstructionIfNeeded(tier, messages) {
115240
+ if (this.shouldInjectInstruction(tier)) {
115241
+ return injectEscalationInstruction(messages);
115322
115242
  }
115323
- const tier2 = TIER_MAP[reBestCat] ?? "MEDIUM";
115324
- return {
115325
- tier: tier2,
115326
- confidence: reBestSim,
115327
- reasoning: `Re-classified with context (initial: Q, heuristic: ${heuristicQ})`
115328
- };
115243
+ return messages;
115244
+ }
115245
+ createSignalDetector() {
115246
+ return new SignalDetector;
115247
+ }
115248
+ handleEscalation(_messages, fromTier) {
115249
+ return NEXT_TIER[fromTier];
115250
+ }
115251
+ recordSuccessfulEscalation(messages, tier, nowMs) {
115252
+ this._memory.record(messages, tier, nowMs);
115253
+ }
115254
+ touchActivity(messages, nowMs) {
115255
+ this._memory.touch(messages, nowMs);
115256
+ }
115257
+ get memory() {
115258
+ return this._memory;
115259
+ }
115260
+ get enabled() {
115261
+ return this._enabled;
115329
115262
  }
115330
- const tier = TIER_MAP[bestCat] ?? "MEDIUM";
115331
- return { tier, confidence: bestSim };
115332
115263
  }
115333
- function extractLastUserText(messages) {
115334
- for (let i = messages.length - 1;i >= 0; i--) {
115335
- const msg = messages[i];
115336
- if (msg.role !== "user")
115264
+
115265
+ // src/proxy/signal-detecting-stream.ts
115266
+ function createSignalDetectionState() {
115267
+ return { signalDetected: false, preSignalText: "" };
115268
+ }
115269
+ async function* detectSignalInStream(stream, detector, state, onSignal) {
115270
+ let signaled = false;
115271
+ for await (const event of stream) {
115272
+ if (signaled) {
115273
+ if (event.type === "done" || event.type === "error") {
115274
+ yield event;
115275
+ }
115337
115276
  continue;
115338
- if (typeof msg.content === "string") {
115339
- return msg.content;
115340
115277
  }
115341
- if (Array.isArray(msg.content)) {
115342
- const parts = [];
115343
- for (const block of msg.content) {
115344
- if (block.type === "text" && block.text) {
115345
- parts.push(block.text);
115278
+ if (event.type === "text_delta" && typeof event.delta === "string") {
115279
+ const results = detector.feedChunk(event.delta);
115280
+ let confirmedText = "";
115281
+ for (const r of results) {
115282
+ if (r.type === "passthrough") {
115283
+ state.preSignalText += r.text;
115284
+ confirmedText += r.text;
115285
+ } else if (r.type === "signal_detected") {
115286
+ signaled = true;
115287
+ state.signalDetected = true;
115288
+ onSignal();
115289
+ }
115290
+ }
115291
+ if (signaled) {
115292
+ if (confirmedText.length > 0) {
115293
+ yield {
115294
+ ...event,
115295
+ delta: confirmedText
115296
+ };
115346
115297
  }
115298
+ continue;
115299
+ }
115300
+ if (confirmedText.length > 0) {
115301
+ yield {
115302
+ ...event,
115303
+ delta: confirmedText
115304
+ };
115347
115305
  }
115348
- if (parts.length > 0)
115349
- return parts.join(" ");
115350
- }
115351
- }
115352
- return;
115353
- }
115354
- function buildContextText(allMessages, currentText, contextCount) {
115355
- const relevantMessages = allMessages.filter((m) => m.role === "user" || m.role === "assistant");
115356
- const lastN = relevantMessages.slice(-contextCount);
115357
- const parts = [];
115358
- for (const msg of lastN) {
115359
- let text;
115360
- if (typeof msg.content === "string") {
115361
- text = msg.content;
115362
- } else if (Array.isArray(msg.content)) {
115363
- text = msg.content.filter((b) => b.type === "text" && b.text).map((b) => b.text).join(" ");
115364
- } else {
115365
115306
  continue;
115366
115307
  }
115367
- parts.push(`[${msg.role}]: ${text}`);
115368
- }
115369
- const lastPart = parts[parts.length - 1];
115370
- if (!lastPart || !lastPart.includes(currentText)) {
115371
- parts.push(`[user]: ${currentText}`);
115308
+ if (event.type === "done" || event.type === "error") {
115309
+ const flushed = detector.flush();
115310
+ if (flushed !== null) {
115311
+ state.preSignalText += flushed;
115312
+ yield {
115313
+ type: "text_delta",
115314
+ contentIndex: 0,
115315
+ delta: flushed,
115316
+ partial: event.type === "done" ? event.message : event.error
115317
+ };
115318
+ }
115319
+ }
115320
+ yield event;
115372
115321
  }
115373
- return parts.join(`
115374
- `);
115375
115322
  }
115376
115323
 
115377
115324
  // src/openclaw/auth-resolver.ts
@@ -115674,7 +115621,7 @@ function isStreamContentType(contentType) {
115674
115621
  }
115675
115622
 
115676
115623
  // src/compression/session-store.ts
115677
- function djb2Hash(str) {
115624
+ function djb2Hash2(str) {
115678
115625
  let hash = 5381;
115679
115626
  for (let i = 0;i < str.length; i++) {
115680
115627
  hash = (hash << 5) + hash + str.charCodeAt(i) | 0;
@@ -115686,7 +115633,7 @@ function generateSessionId(messages) {
115686
115633
  if (!firstUserMessage)
115687
115634
  return "empty-session";
115688
115635
  const content = typeof firstUserMessage.content === "string" ? firstUserMessage.content : JSON.stringify(firstUserMessage.content);
115689
- return `session-${djb2Hash(content)}`;
115636
+ return `session-${djb2Hash2(content)}`;
115690
115637
  }
115691
115638
  function createSessionStore(maxSessions = 500) {
115692
115639
  const store = new Map;
@@ -120558,7 +120505,7 @@ function computeRetryDelay(response, attempt, baseDelayMs, maxDelayMs) {
120558
120505
  const jitter = Math.random() * 300;
120559
120506
  return Math.min(baseDelayMs * 2 ** attempt + jitter, maxDelayMs);
120560
120507
  }
120561
- async function handleApiRequest(req, body, apiType, config, openclawConfig, authProfiles, compressionMiddleware) {
120508
+ async function handleApiRequest(req, body, apiType, config, openclawConfig, authProfiles, compressionMiddleware, signalRouter) {
120562
120509
  const adapter = getAdapter(apiType);
120563
120510
  if (!adapter) {
120564
120511
  return jsonErrorResponse(`Unknown API type: ${apiType}`, 500);
@@ -120591,12 +120538,11 @@ async function handleApiRequest(req, body, apiType, config, openclawConfig, auth
120591
120538
  }
120592
120539
  }
120593
120540
  const messages = effectiveParsed.messages;
120594
- const classification = await classifyLocal(messages);
120541
+ const initialTier = signalRouter.selectInitialTier(messages);
120595
120542
  const decision = {
120596
- tier: classification.tier,
120597
- model: config.routing.models[classification.tier],
120598
- confidence: classification.confidence,
120599
- overrideReason: classification.reasoning
120543
+ tier: initialTier,
120544
+ model: config.routing.models[initialTier],
120545
+ confidence: 1
120600
120546
  };
120601
120547
  const lookup = findProviderForModel(decision.model, openclawConfig);
120602
120548
  let providerName;
@@ -120640,67 +120586,90 @@ async function handleApiRequest(req, body, apiType, config, openclawConfig, auth
120640
120586
  const piEligible = piEnabled && PI_CLIENT_APIS.has(apiType) && targetApiType !== "ollama" && targetApiType !== "bedrock-converse-stream";
120641
120587
  if (piEligible) {
120642
120588
  try {
120643
- const model = buildPiAiModel(providerName, actualModelId, openclawConfig);
120644
- const piContext = buildPiContext(effectiveParsed);
120645
- applyCodexSystemPromptFallback(piContext, targetApiType);
120646
- const piOptions = buildPiOptions(effectiveParsed, authInfo, providerName);
120647
120589
  const lastUserMsg2 = [...parsed.messages].reverse().find((m2) => m2.role === "user");
120648
120590
  const msgText2 = typeof lastUserMsg2?.content === "string" ? lastUserMsg2.content : Array.isArray(lastUserMsg2?.content) ? lastUserMsg2.content.filter((b) => b.type === "text").map((b) => b.text ?? "").join(" ") : "";
120649
120591
  const preview2 = msgText2.replace(/\s+/g, " ").trim().slice(0, 100);
120650
- console.log(`[clawmux] [llm] ${decision.tier} → ${decision.model} | conf=${classification.confidence.toFixed(2)} | pi-ai (${apiType})${preview2 ? ` | "${preview2}${msgText2.length > 100 ? "…" : ""}"` : ""}`);
120651
120592
  if (compressionMiddleware) {
120652
120593
  compressionMiddleware.afterResponse(parsed);
120653
120594
  }
120654
- const piStreamHandle = stream(model, piContext, piOptions);
120655
120595
  const wantsStream = effectiveParsed.stream === true;
120656
- if (apiType === "anthropic-messages") {
120596
+ let currentTier = initialTier;
120597
+ const MAX_ESCALATION_ATTEMPTS = 3;
120598
+ for (let attempt = 0;attempt < MAX_ESCALATION_ATTEMPTS; attempt++) {
120599
+ const currentModel = config.routing.models[currentTier];
120600
+ const currentActualModelId = currentModel.split("/").slice(1).join("/");
120601
+ const currentProviderName = findProviderForModel(currentModel, openclawConfig)?.providerName ?? providerName;
120602
+ const currentAuth = resolveApiKey(currentProviderName, openclawConfig, authProfiles);
120603
+ if (!currentAuth) {
120604
+ return jsonErrorResponse(`No auth credentials found for provider: ${currentProviderName}`, 502);
120605
+ }
120606
+ const currentAuthInfo = {
120607
+ apiKey: currentAuth.apiKey,
120608
+ headerName: currentAuth.headerName,
120609
+ headerValue: currentAuth.headerValue,
120610
+ awsAccessKeyId: currentAuth.awsAccessKeyId,
120611
+ awsSecretKey: currentAuth.awsSecretKey,
120612
+ awsSessionToken: currentAuth.awsSessionToken,
120613
+ awsRegion: currentAuth.awsRegion,
120614
+ accountId: currentAuth.accountId
120615
+ };
120616
+ const model = buildPiAiModel(currentProviderName, currentActualModelId, openclawConfig);
120617
+ const injectedMessages = signalRouter.injectInstructionIfNeeded(currentTier, messages);
120618
+ const injectedParsed = {
120619
+ ...effectiveParsed,
120620
+ messages: injectedMessages,
120621
+ rawBody: adapter.modifyMessages(effectiveParsed.rawBody, injectedMessages)
120622
+ };
120623
+ const piContext = buildPiContext(injectedParsed);
120624
+ applyCodexSystemPromptFallback(piContext, targetApiType);
120625
+ const shouldDetect = signalRouter.enabled && NEXT_TIER[currentTier] !== null;
120626
+ const abortCtrl = new AbortController;
120627
+ const piOptions = buildPiOptions(injectedParsed, currentAuthInfo, currentProviderName, abortCtrl.signal);
120628
+ console.log(`[clawmux] [llm] ${currentTier} → ${currentModel} | attempt=${attempt + 1} | pi-ai (${apiType})${preview2 ? ` | "${preview2}${msgText2.length > 100 ? "…" : ""}"` : ""}`);
120629
+ const piStreamHandle = stream(model, piContext, piOptions);
120630
+ if (!shouldDetect) {
120631
+ return await yieldPiAiResponse(piStreamHandle, apiType, wantsStream);
120632
+ }
120633
+ const detector = signalRouter.createSignalDetector();
120634
+ const detectionState = createSignalDetectionState();
120657
120635
  if (wantsStream) {
120658
- return new Response(piStreamToAnthropicSse(piStreamHandle), {
120636
+ const signalGen = detectSignalInStream(piStreamHandle, detector, detectionState, () => {});
120637
+ const sseBody = piStreamToAnthropicSseFromGenerator(signalGen);
120638
+ const response = new Response(sseBody, {
120659
120639
  status: 200,
120660
120640
  headers: { "content-type": "text/event-stream" }
120661
120641
  });
120662
- }
120663
- const json = await piStreamToAnthropicJson(piStreamHandle);
120664
- return new Response(JSON.stringify(json), {
120665
- status: 200,
120666
- headers: { "content-type": "application/json" }
120667
- });
120668
- }
120669
- if (apiType === "openai-completions") {
120670
- if (wantsStream) {
120671
- return new Response(piStreamToOpenAiCompletionsSse(piStreamHandle), {
120672
- status: 200,
120673
- headers: { "content-type": "text/event-stream" }
120642
+ response.clone().text().then(() => {
120643
+ if (detectionState.signalDetected) {
120644
+ abortCtrl.abort();
120645
+ const nextTier = signalRouter.handleEscalation(messages, currentTier);
120646
+ if (nextTier !== null) {
120647
+ console.log(`[clawmux] [escalation] ${currentTier} → ${nextTier} (signal detected)`);
120648
+ currentTier = nextTier;
120649
+ return;
120650
+ }
120651
+ }
120652
+ signalRouter.touchActivity(messages);
120653
+ if (attempt > 0) {
120654
+ signalRouter.recordSuccessfulEscalation(messages, currentTier);
120655
+ }
120674
120656
  });
120657
+ return response;
120675
120658
  }
120676
- const json = await piStreamToOpenAiCompletionsJson(piStreamHandle);
120677
- return new Response(JSON.stringify(json), {
120678
- status: 200,
120679
- headers: { "content-type": "application/json" }
120680
- });
120681
- }
120682
- if (apiType === "openai-responses") {
120683
- if (wantsStream) {
120684
- return new Response(piStreamToOpenAiResponsesSse(piStreamHandle), {
120685
- status: 200,
120686
- headers: { "content-type": "text/event-stream" }
120687
- });
120659
+ const fullText = await collectPiStreamText(piStreamHandle, detector, detectionState);
120660
+ if (detectionState.signalDetected) {
120661
+ const nextTier = signalRouter.handleEscalation(messages, currentTier);
120662
+ if (nextTier !== null) {
120663
+ console.log(`[clawmux] [escalation] ${currentTier} → ${nextTier} (signal detected in non-streaming)`);
120664
+ currentTier = nextTier;
120665
+ continue;
120666
+ }
120688
120667
  }
120689
- const json = await piStreamToOpenAiResponsesJson(piStreamHandle);
120690
- return new Response(JSON.stringify(json), {
120691
- status: 200,
120692
- headers: { "content-type": "application/json" }
120693
- });
120694
- }
120695
- if (apiType === "google-generative-ai") {
120696
- if (wantsStream) {
120697
- return new Response(piStreamToGoogleSse(piStreamHandle), {
120698
- status: 200,
120699
- headers: { "content-type": "text/event-stream" }
120700
- });
120668
+ signalRouter.touchActivity(messages);
120669
+ if (attempt > 0) {
120670
+ signalRouter.recordSuccessfulEscalation(messages, currentTier);
120701
120671
  }
120702
- const json = await piStreamToGoogleJson(piStreamHandle);
120703
- return new Response(JSON.stringify(json), {
120672
+ return new Response(JSON.stringify(fullText), {
120704
120673
  status: 200,
120705
120674
  headers: { "content-type": "application/json" }
120706
120675
  });
@@ -120729,7 +120698,7 @@ async function handleApiRequest(req, body, apiType, config, openclawConfig, auth
120729
120698
  const lastUserMsg = [...parsed.messages].reverse().find((m2) => m2.role === "user");
120730
120699
  const msgText = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : Array.isArray(lastUserMsg?.content) ? lastUserMsg.content.filter((b) => b.type === "text").map((b) => b.text ?? "").join(" ") : "";
120731
120700
  const preview = msgText.replace(/\s+/g, " ").trim().slice(0, 100);
120732
- console.log(`[clawmux] [llm] ${decision.tier} → ${decision.model} | conf=${classification.confidence.toFixed(2)}${classification.reasoning ? ` | ${classification.reasoning}` : ""}${preview ? ` | "${preview}${msgText.length > 100 ? "…" : ""}"` : ""}`);
120701
+ console.log(`[clawmux] [llm] ${decision.tier} → ${decision.model} | legacy${preview ? ` | "${preview}${msgText.length > 100 ? "…" : ""}"` : ""}`);
120733
120702
  if (compressionMiddleware && upstreamResponse.ok) {
120734
120703
  compressionMiddleware.afterResponse(parsed);
120735
120704
  }
@@ -120891,6 +120860,220 @@ function createResolvedCompressionMiddleware(config, openclawConfig, authProfile
120891
120860
  statsTracker
120892
120861
  });
120893
120862
  }
120863
+ async function yieldPiAiResponse(piStreamHandle, apiType, wantsStream) {
120864
+ if (apiType === "anthropic-messages") {
120865
+ if (wantsStream) {
120866
+ return new Response(piStreamToAnthropicSse(piStreamHandle), {
120867
+ status: 200,
120868
+ headers: { "content-type": "text/event-stream" }
120869
+ });
120870
+ }
120871
+ const json = await piStreamToAnthropicJson(piStreamHandle);
120872
+ return new Response(JSON.stringify(json), {
120873
+ status: 200,
120874
+ headers: { "content-type": "application/json" }
120875
+ });
120876
+ }
120877
+ if (apiType === "openai-completions") {
120878
+ if (wantsStream) {
120879
+ return new Response(piStreamToOpenAiCompletionsSse(piStreamHandle), {
120880
+ status: 200,
120881
+ headers: { "content-type": "text/event-stream" }
120882
+ });
120883
+ }
120884
+ const json = await piStreamToOpenAiCompletionsJson(piStreamHandle);
120885
+ return new Response(JSON.stringify(json), {
120886
+ status: 200,
120887
+ headers: { "content-type": "application/json" }
120888
+ });
120889
+ }
120890
+ if (apiType === "openai-responses") {
120891
+ if (wantsStream) {
120892
+ return new Response(piStreamToOpenAiResponsesSse(piStreamHandle), {
120893
+ status: 200,
120894
+ headers: { "content-type": "text/event-stream" }
120895
+ });
120896
+ }
120897
+ const json = await piStreamToOpenAiResponsesJson(piStreamHandle);
120898
+ return new Response(JSON.stringify(json), {
120899
+ status: 200,
120900
+ headers: { "content-type": "application/json" }
120901
+ });
120902
+ }
120903
+ if (apiType === "google-generative-ai") {
120904
+ if (wantsStream) {
120905
+ return new Response(piStreamToGoogleSse(piStreamHandle), {
120906
+ status: 200,
120907
+ headers: { "content-type": "text/event-stream" }
120908
+ });
120909
+ }
120910
+ const json = await piStreamToGoogleJson(piStreamHandle);
120911
+ return new Response(JSON.stringify(json), {
120912
+ status: 200,
120913
+ headers: { "content-type": "application/json" }
120914
+ });
120915
+ }
120916
+ throw new Error(`Unsupported pi-ai apiType: ${apiType}`);
120917
+ }
120918
+ function piStreamToAnthropicSseFromGenerator(gen) {
120919
+ const encoder6 = new TextEncoder;
120920
+ function sseFrame2(event, data) {
120921
+ return encoder6.encode(`event: ${event}
120922
+ data: ${JSON.stringify(data)}
120923
+
120924
+ `);
120925
+ }
120926
+ return new ReadableStream({
120927
+ async start(controller) {
120928
+ try {
120929
+ let messageStarted = false;
120930
+ const openBlocks = new Map;
120931
+ const ensureMessageStart = (model) => {
120932
+ if (messageStarted)
120933
+ return;
120934
+ messageStarted = true;
120935
+ controller.enqueue(sseFrame2("message_start", {
120936
+ type: "message_start",
120937
+ message: {
120938
+ id: "msg_" + Date.now().toString(36),
120939
+ type: "message",
120940
+ role: "assistant",
120941
+ content: [],
120942
+ model,
120943
+ stop_reason: null,
120944
+ stop_sequence: null,
120945
+ usage: { input_tokens: 0, output_tokens: 0 }
120946
+ }
120947
+ }));
120948
+ };
120949
+ for await (const event of gen) {
120950
+ if (event.type === "start") {
120951
+ ensureMessageStart(event.partial.model || "");
120952
+ } else if (event.type === "text_start") {
120953
+ ensureMessageStart(event.partial.model || "");
120954
+ openBlocks.set(event.contentIndex, "text");
120955
+ controller.enqueue(sseFrame2("content_block_start", {
120956
+ type: "content_block_start",
120957
+ index: event.contentIndex,
120958
+ content_block: { type: "text", text: "" }
120959
+ }));
120960
+ } else if (event.type === "text_delta") {
120961
+ if (openBlocks.get(event.contentIndex) !== "text") {
120962
+ ensureMessageStart(event.partial.model || "");
120963
+ openBlocks.set(event.contentIndex, "text");
120964
+ controller.enqueue(sseFrame2("content_block_start", {
120965
+ type: "content_block_start",
120966
+ index: event.contentIndex,
120967
+ content_block: { type: "text", text: "" }
120968
+ }));
120969
+ }
120970
+ controller.enqueue(sseFrame2("content_block_delta", {
120971
+ type: "content_block_delta",
120972
+ index: event.contentIndex,
120973
+ delta: { type: "text_delta", text: event.delta }
120974
+ }));
120975
+ } else if (event.type === "text_end") {
120976
+ if (openBlocks.get(event.contentIndex) === "text") {
120977
+ controller.enqueue(sseFrame2("content_block_stop", {
120978
+ type: "content_block_stop",
120979
+ index: event.contentIndex
120980
+ }));
120981
+ openBlocks.delete(event.contentIndex);
120982
+ }
120983
+ } else if (event.type === "done") {
120984
+ for (const [idx] of openBlocks) {
120985
+ controller.enqueue(sseFrame2("content_block_stop", {
120986
+ type: "content_block_stop",
120987
+ index: idx
120988
+ }));
120989
+ }
120990
+ openBlocks.clear();
120991
+ controller.enqueue(sseFrame2("message_delta", {
120992
+ type: "message_delta",
120993
+ delta: { stop_reason: "end_turn", stop_sequence: null },
120994
+ usage: { output_tokens: event.message.usage?.output ?? 0 }
120995
+ }));
120996
+ controller.enqueue(sseFrame2("message_stop", { type: "message_stop" }));
120997
+ } else if (event.type === "error") {
120998
+ for (const [idx] of openBlocks) {
120999
+ controller.enqueue(sseFrame2("content_block_stop", {
121000
+ type: "content_block_stop",
121001
+ index: idx
121002
+ }));
121003
+ }
121004
+ openBlocks.clear();
121005
+ controller.enqueue(sseFrame2("error", {
121006
+ type: "error",
121007
+ error: {
121008
+ type: "api_error",
121009
+ message: event.error?.errorMessage ?? "Unknown error"
121010
+ }
121011
+ }));
121012
+ }
121013
+ }
121014
+ controller.close();
121015
+ } catch (err) {
121016
+ const msg = err instanceof Error ? err.message : String(err);
121017
+ controller.enqueue(sseFrame2("error", {
121018
+ type: "error",
121019
+ error: { type: "api_error", message: msg }
121020
+ }));
121021
+ controller.close();
121022
+ }
121023
+ }
121024
+ });
121025
+ }
121026
+ async function collectPiStreamText(piStreamHandle, detector, state) {
121027
+ const msg = await piStreamHandle.result();
121028
+ const fullText = msg.content.filter((c) => c.type === "text").map((c) => c.text).join("");
121029
+ const results = detector.feedChunk(fullText);
121030
+ for (const r2 of results) {
121031
+ if (r2.type === "signal_detected") {
121032
+ state.signalDetected = true;
121033
+ break;
121034
+ }
121035
+ }
121036
+ detector.flush();
121037
+ const STOP_REASON_MAP2 = {
121038
+ stop: "end_turn",
121039
+ length: "max_tokens",
121040
+ toolUse: "tool_use",
121041
+ error: "end_turn",
121042
+ aborted: "end_turn"
121043
+ };
121044
+ const blocks = [];
121045
+ for (const c of msg.content) {
121046
+ if (c.type === "text") {
121047
+ blocks.push({ type: "text", text: c.text });
121048
+ } else if (c.type === "thinking") {
121049
+ blocks.push({
121050
+ type: "thinking",
121051
+ thinking: c.thinking,
121052
+ ...c.thinkingSignature ? { signature: c.thinkingSignature } : {}
121053
+ });
121054
+ } else if (c.type === "toolCall") {
121055
+ blocks.push({
121056
+ type: "tool_use",
121057
+ id: c.id,
121058
+ name: c.name,
121059
+ input: c.arguments ?? {}
121060
+ });
121061
+ }
121062
+ }
121063
+ return {
121064
+ id: "msg_" + Date.now().toString(36),
121065
+ type: "message",
121066
+ role: "assistant",
121067
+ content: blocks,
121068
+ model: msg.model || "",
121069
+ stop_reason: STOP_REASON_MAP2[msg.stopReason] ?? "end_turn",
121070
+ stop_sequence: null,
121071
+ usage: {
121072
+ input_tokens: msg.usage?.input ?? 0,
121073
+ output_tokens: msg.usage?.output ?? 0
121074
+ }
121075
+ };
121076
+ }
120894
121077
  var ROUTE_MAPPINGS = [
120895
121078
  { apiType: "anthropic-messages", key: "/v1/messages" },
120896
121079
  { apiType: "openai-completions", key: "/v1/chat/completions" },
@@ -120900,8 +121083,17 @@ var ROUTE_MAPPINGS = [
120900
121083
  { apiType: "bedrock-converse-stream", key: "/model/*/converse-stream" }
120901
121084
  ];
120902
121085
  function setupPipelineRoutes(config, openclawConfig, authProfiles, compressionMiddleware) {
121086
+ const escalationConfig = config.routing.escalation;
121087
+ const signalRouter = new SignalRouter({
121088
+ escalation: {
121089
+ activeThresholdMs: escalationConfig?.activeThresholdMs ?? 300000,
121090
+ maxLifetimeMs: escalationConfig?.maxLifetimeMs ?? 7200000,
121091
+ fingerprintRootCount: escalationConfig?.fingerprintRootCount ?? 5
121092
+ },
121093
+ enabled: escalationConfig?.enabled ?? true
121094
+ });
120903
121095
  for (const mapping of ROUTE_MAPPINGS) {
120904
- setRouteHandler(mapping.key, (req, body) => handleApiRequest(req, body, mapping.apiType, config, openclawConfig, authProfiles, compressionMiddleware));
121096
+ setRouteHandler(mapping.key, (req, body) => handleApiRequest(req, body, mapping.apiType, config, openclawConfig, authProfiles, compressionMiddleware, signalRouter));
120905
121097
  }
120906
121098
  }
120907
121099
 
@@ -121066,15 +121258,24 @@ function detectInstallMethod() {
121066
121258
  return "npm";
121067
121259
  }
121068
121260
  }
121261
+ function resolveBinPath(command) {
121262
+ try {
121263
+ return import_node_child_process.execSync(`which ${command}`, { encoding: "utf-8" }).trim();
121264
+ } catch {
121265
+ return command;
121266
+ }
121267
+ }
121069
121268
  function resolveClawmuxBin() {
121070
121269
  try {
121071
121270
  const bin = import_node_child_process.execSync("which clawmux", { encoding: "utf-8" }).trim();
121072
121271
  if (bin.includes("/tmp/") || bin.includes("bunx-") || bin.includes("npx-")) {
121073
- return detectInstallMethod() === "bun" ? "bunx clawmux" : "npx clawmux";
121272
+ const runtime = detectInstallMethod();
121273
+ return runtime === "bun" ? `${resolveBinPath("bunx")} clawmux` : `${resolveBinPath("npx")} clawmux`;
121074
121274
  }
121075
121275
  return bin;
121076
121276
  } catch {
121077
- return detectInstallMethod() === "bun" ? "bunx clawmux" : "npx clawmux";
121277
+ const runtime = detectInstallMethod();
121278
+ return runtime === "bun" ? `${resolveBinPath("bunx")} clawmux` : `${resolveBinPath("npx")} clawmux`;
121078
121279
  }
121079
121280
  }
121080
121281
  function getHomeDir3() {
@@ -121085,6 +121286,14 @@ var SYSTEMD_PATH = import_node_path7.join(SYSTEMD_DIR, `${SERVICE_NAME}.service`
121085
121286
  var LAUNCHD_DIR = import_node_path7.join(getHomeDir3(), "Library", "LaunchAgents");
121086
121287
  var LAUNCHD_PATH = import_node_path7.join(LAUNCHD_DIR, `com.${SERVICE_NAME}.plist`);
121087
121288
  function buildSystemdUnit(bin, port, workDir) {
121289
+ const userPaths = [
121290
+ import_node_path7.join(getHomeDir3(), ".bun", "bin"),
121291
+ import_node_path7.join(getHomeDir3(), ".local", "bin"),
121292
+ import_node_path7.join(getHomeDir3(), ".npm-global", "bin"),
121293
+ "/usr/local/bin",
121294
+ "/usr/bin",
121295
+ "/bin"
121296
+ ].join(":");
121088
121297
  return `[Unit]
121089
121298
  Description=ClawMux - Smart model routing proxy
121090
121299
  After=network.target
@@ -121096,6 +121305,7 @@ WorkingDirectory=${workDir}
121096
121305
  Restart=on-failure
121097
121306
  RestartSec=5
121098
121307
  Environment=CLAWMUX_PORT=${port}
121308
+ Environment=PATH=${userPaths}
121099
121309
 
121100
121310
  [Install]
121101
121311
  WantedBy=default.target