clawmux 0.3.10 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -15614,8 +15614,8 @@ async function defaultParseResponse(client, props) {
15614
15614
  const mediaType = contentType?.split(";")[0]?.trim();
15615
15615
  const isJSON = mediaType?.includes("application/json") || mediaType?.endsWith("+json");
15616
15616
  if (isJSON) {
15617
- const contentLength = response.headers.get("content-length");
15618
- if (contentLength === "0") {
15617
+ const contentLength2 = response.headers.get("content-length");
15618
+ if (contentLength2 === "0") {
15619
15619
  return;
15620
15620
  }
15621
15621
  const json = await response.json();
@@ -21901,8 +21901,8 @@ async function defaultParseResponse2(client, props) {
21901
21901
  const mediaType = contentType?.split(";")[0]?.trim();
21902
21902
  const isJSON = mediaType?.includes("application/json") || mediaType?.endsWith("+json");
21903
21903
  if (isJSON) {
21904
- const contentLength = response.headers.get("content-length");
21905
- if (contentLength === "0") {
21904
+ const contentLength2 = response.headers.get("content-length");
21905
+ if (contentLength2 === "0") {
21906
21906
  return;
21907
21907
  }
21908
21908
  const json = await response.json();
@@ -34525,7 +34525,7 @@ async function consumeBody(data) {
34525
34525
  throw new FetchError(`Premature close of server response while trying to fetch ${data.url}`);
34526
34526
  }
34527
34527
  }
34528
- var import_node_stream, import_node_util, import_node_buffer, pipeline2, INTERNALS, clone = (instance, highWaterMark) => {
34528
+ var import_node_stream, import_node_util, import_node_buffer, pipeline, INTERNALS, clone = (instance, highWaterMark) => {
34529
34529
  let p1;
34530
34530
  let p2;
34531
34531
  let { body } = instance[INTERNALS];
@@ -34586,7 +34586,7 @@ var import_node_stream, import_node_util, import_node_buffer, pipeline2, INTERNA
34586
34586
  if (body === null) {
34587
34587
  dest.end();
34588
34588
  } else {
34589
- await pipeline2(body, dest);
34589
+ await pipeline(body, dest);
34590
34590
  }
34591
34591
  };
34592
34592
  var init_body = __esm(() => {
@@ -34598,7 +34598,7 @@ var init_body = __esm(() => {
34598
34598
  import_node_stream = __toESM(require("node:stream"));
34599
34599
  import_node_util = require("node:util");
34600
34600
  import_node_buffer = require("node:buffer");
34601
- pipeline2 = import_node_util.promisify(import_node_stream.default.pipeline);
34601
+ pipeline = import_node_util.promisify(import_node_stream.default.pipeline);
34602
34602
  INTERNALS = Symbol("Body internals");
34603
34603
  Body.prototype.buffer = import_node_util.deprecate(Body.prototype.buffer, "Please use 'response.arrayBuffer()' instead of 'response.buffer()'", "node-fetch#buffer");
34604
34604
  Object.defineProperties(Body.prototype, {
@@ -57751,8 +57751,8 @@ async function defaultParseResponse3(client, props) {
57751
57751
  const mediaType = (_a4 = contentType === null || contentType === undefined ? undefined : contentType.split(";")[0]) === null || _a4 === undefined ? undefined : _a4.trim();
57752
57752
  const isJSON = (mediaType === null || mediaType === undefined ? undefined : mediaType.includes("application/json")) || (mediaType === null || mediaType === undefined ? undefined : mediaType.endsWith("+json"));
57753
57753
  if (isJSON) {
57754
- const contentLength = response.headers.get("content-length");
57755
- if (contentLength === "0") {
57754
+ const contentLength2 = response.headers.get("content-length");
57755
+ if (contentLength2 === "0") {
57756
57756
  return;
57757
57757
  }
57758
57758
  const json = await response.json();
@@ -112238,7 +112238,7 @@ __export(exports_src2, {
112238
112238
  module.exports = __toCommonJS(exports_src2);
112239
112239
 
112240
112240
  // src/proxy/router.ts
112241
- var VERSION = process.env.npm_package_version ?? "0.3.10";
112241
+ var VERSION = process.env.npm_package_version ?? "0.3.12";
112242
112242
  function jsonResponse(body, status = 200) {
112243
112243
  return new Response(JSON.stringify(body), {
112244
112244
  status,
@@ -112447,6 +112447,12 @@ var import_promises2 = require("node:fs/promises");
112447
112447
  var import_node_path = require("node:path");
112448
112448
 
112449
112449
  // src/config/defaults.ts
112450
+ var ESCALATION_DEFAULTS = {
112451
+ activeThresholdMs: 300000,
112452
+ maxLifetimeMs: 7200000,
112453
+ fingerprintRootCount: 5,
112454
+ enabled: true
112455
+ };
112450
112456
  var DEFAULT_CONFIG = {
112451
112457
  compression: {
112452
112458
  threshold: 0.75,
@@ -112459,7 +112465,8 @@ var DEFAULT_CONFIG = {
112459
112465
  MEDIUM: "",
112460
112466
  HEAVY: ""
112461
112467
  },
112462
- contextWindows: {}
112468
+ contextWindows: {},
112469
+ escalation: ESCALATION_DEFAULTS
112463
112470
  },
112464
112471
  server: {
112465
112472
  port: 3456,
@@ -112480,7 +112487,13 @@ function applyDefaults(partial) {
112480
112487
  MEDIUM: partial.routing.models.MEDIUM ?? defaults.routing.models.MEDIUM,
112481
112488
  HEAVY: partial.routing.models.HEAVY ?? defaults.routing.models.HEAVY
112482
112489
  },
112483
- contextWindows: { ...defaults.routing.contextWindows, ...partial.routing.contextWindows }
112490
+ contextWindows: { ...defaults.routing.contextWindows, ...partial.routing.contextWindows },
112491
+ escalation: {
112492
+ activeThresholdMs: partial.routing.escalation?.activeThresholdMs ?? ESCALATION_DEFAULTS.activeThresholdMs,
112493
+ maxLifetimeMs: partial.routing.escalation?.maxLifetimeMs ?? ESCALATION_DEFAULTS.maxLifetimeMs,
112494
+ fingerprintRootCount: partial.routing.escalation?.fingerprintRootCount ?? ESCALATION_DEFAULTS.fingerprintRootCount,
112495
+ enabled: partial.routing.escalation?.enabled !== undefined ? partial.routing.escalation.enabled : ESCALATION_DEFAULTS.enabled
112496
+ }
112484
112497
  },
112485
112498
  server: {
112486
112499
  port: partial.server?.port ?? defaults.server.port,
@@ -112582,6 +112595,39 @@ function validateConfig(raw) {
112582
112595
  }
112583
112596
  }
112584
112597
  }
112598
+ if (routing.escalation !== undefined) {
112599
+ if (!isObject(routing.escalation)) {
112600
+ errors.push("routing.escalation: must be an object");
112601
+ } else {
112602
+ const esc = routing.escalation;
112603
+ if (esc.activeThresholdMs !== undefined) {
112604
+ if (typeof esc.activeThresholdMs !== "number") {
112605
+ errors.push(`routing.escalation.activeThresholdMs: must be a number, got ${typeof esc.activeThresholdMs}`);
112606
+ } else if (esc.activeThresholdMs <= 0) {
112607
+ errors.push(`routing.escalation.activeThresholdMs: must be a positive number, got ${esc.activeThresholdMs}`);
112608
+ }
112609
+ }
112610
+ if (esc.maxLifetimeMs !== undefined) {
112611
+ if (typeof esc.maxLifetimeMs !== "number") {
112612
+ errors.push(`routing.escalation.maxLifetimeMs: must be a number, got ${typeof esc.maxLifetimeMs}`);
112613
+ } else if (esc.maxLifetimeMs <= 0) {
112614
+ errors.push(`routing.escalation.maxLifetimeMs: must be a positive number, got ${esc.maxLifetimeMs}`);
112615
+ }
112616
+ }
112617
+ if (esc.fingerprintRootCount !== undefined) {
112618
+ if (typeof esc.fingerprintRootCount !== "number") {
112619
+ errors.push(`routing.escalation.fingerprintRootCount: must be a number, got ${typeof esc.fingerprintRootCount}`);
112620
+ } else if (!Number.isInteger(esc.fingerprintRootCount) || esc.fingerprintRootCount < 1) {
112621
+ errors.push(`routing.escalation.fingerprintRootCount: must be an integer >= 1, got ${esc.fingerprintRootCount}`);
112622
+ }
112623
+ }
112624
+ if (esc.enabled !== undefined) {
112625
+ if (typeof esc.enabled !== "boolean") {
112626
+ errors.push(`routing.escalation.enabled: must be a boolean, got ${typeof esc.enabled}`);
112627
+ }
112628
+ }
112629
+ }
112630
+ }
112585
112631
  const server = obj.server !== undefined && isObject(obj.server) ? obj.server : null;
112586
112632
  if (server !== null && server.port !== undefined) {
112587
112633
  checkOptionalNumberRange(errors, "server.port", server.port, 1024, 65535);
@@ -114992,404 +115038,305 @@ function buildSyntheticHttpResponse(parsed, adapter) {
114992
115038
  });
114993
115039
  }
114994
115040
 
114995
- // src/routing/local-classifier.ts
114996
- var import_transformers = require("@huggingface/transformers");
114997
- var CAT_L = "L";
114998
- var CAT_M = "M";
114999
- var CAT_H = "H";
115000
- var CAT_Q = "Q";
115001
- var TIER_MAP = {
115002
- L: "LIGHT",
115003
- M: "MEDIUM",
115004
- H: "HEAVY"
115005
- };
115006
- var MODEL_ID = "Xenova/multilingual-e5-small";
115007
- var E5_PREFIX = "query: ";
115008
- var BATCH_SIZE = 32;
115009
- var TRAINING_LIGHT = [
115010
- "안녕하세요",
115011
- "안녕",
115012
- "안녕히 가세요",
115013
- "안녕히 계세요",
115014
- "반갑습니다",
115015
- "잘 지내시죠",
115016
- "오랜만이에요",
115017
- "고마워",
115018
- "감사합니다",
115019
- "고맙습니다",
115020
- "네 고마워요",
115021
- "정말 감사합니다",
115022
- "도와줘서 고마워",
115023
- "네",
115024
- "예",
115025
- "아니요",
115026
- "좋아요",
115027
- "알겠습니다",
115028
- "확인했습니다",
115029
- "그래요",
115030
- "맞아요",
115031
- "아 네",
115032
- "Python이 뭐야?",
115033
- "JavaScript가 뭐야?",
115034
- "오늘 날씨 어때?",
115035
- "지금 몇 시야?",
115036
- "이거 뭐야?",
115037
- "TypeScript가 뭐예요?",
115038
- "API가 뭐야?",
115039
- "HTML이 뭐야?",
115040
- "CSS가 뭐야?",
115041
- "Hello",
115042
- "Hi",
115043
- "Hey there",
115044
- "Good morning",
115045
- "Good afternoon",
115046
- "How are you",
115047
- "What's up",
115048
- "Thanks",
115049
- "Thank you",
115050
- "Got it",
115051
- "OK",
115052
- "Sounds good",
115053
- "I see",
115054
- "Understood",
115055
- "Great thanks",
115056
- "What is Python?",
115057
- "What time is it?",
115058
- "What's the weather?",
115059
- "Who is Einstein?",
115060
- "Where is Seoul?",
115061
- "How old are you?",
115062
- "yes",
115063
- "no",
115064
- "maybe",
115065
- "sure",
115066
- "please",
115067
- "done",
115068
- "ok",
115069
- "cool",
115070
- "nice",
115071
- "awesome"
115072
- ];
115073
- var TRAINING_MEDIUM = [
115074
- "Write a quicksort function in TypeScript",
115075
- "Implement a binary search tree with insert and delete",
115076
- "Create a REST API endpoint for user authentication",
115077
- "Write a function to merge two sorted arrays",
115078
- "Implement a linked list in Python",
115079
- "Write a unit test for the calculator module",
115080
- "Create a simple Express.js middleware for logging",
115081
- "Write a regex to validate email addresses",
115082
- "Implement a LRU cache with get and put operations",
115083
- "Create a React component for a todo list",
115084
- "Write a SQL query to join two tables",
115085
- "Implement a basic JWT authentication flow",
115086
- "Write a function to parse CSV files",
115087
- "Create a simple WebSocket server",
115088
- "Implement bubble sort in Java",
115089
- "Write a Python script to read a JSON file",
115090
- "Create a Docker compose file for a web app",
115091
- "Write a Git pre-commit hook",
115092
- "REST API에 로그인 엔드포인트 추가해줘",
115093
- "이 함수에 에러 핸들링 추가해줘",
115094
- "TypeScript로 이벤트 이미터 만들어줘",
115095
- "데이터베이스 마이그레이션 스크립트 작성해줘",
115096
- "React 컴포넌트에 상태 관리 추가해줘",
115097
- "Express 라우터에 CORS 미들웨어 추가해줘",
115098
- "테스트 코드 작성해줘",
115099
- "이 코드 리팩토링해줘",
115100
- "Explain the difference between let and const in JavaScript",
115101
- "What's the difference between SQL and NoSQL databases",
115102
- "Explain how async await works in Python",
115103
- "Describe the MVC architecture pattern",
115104
- "Explain what Docker containers are",
115105
- "REST와 GraphQL의 차이점을 설명해줘",
115106
- "이벤트 루프가 어떻게 동작하는지 설명해줘",
115107
- "클로저가 뭐야? 설명해줘",
115108
- "Set up a Node.js project with TypeScript and ESLint",
115109
- "Create a basic CI/CD pipeline using GitHub Actions",
115110
- "Configure Nginx as a reverse proxy for a Node.js app",
115111
- `이 함수를 리팩토링해줘:
115112
- function processUsers(data) {
115113
- var result = [];
115114
- for (var i = 0; i < data.length; i++) {
115115
- if (data[i].active == true && data[i].age > 18) {
115116
- var name = data[i].firstName + ' ' + data[i].lastName;
115117
- var obj = { name: name, email: data[i].email, role: data[i].isAdmin ? 'admin' : 'user' };
115118
- if (data[i].department !== null && data[i].department !== undefined) {
115119
- obj.department = data[i].department.name;
115120
- obj.manager = data[i].department.manager ? data[i].department.manager.name : 'N/A';
115121
- }
115122
- result.push(obj);
115123
- }
115124
- }
115125
- result.sort(function(a, b) { return a.name > b.name ? 1 : -1; });
115126
- return result;
115127
- }`,
115128
- `Refactor this code to use modern JavaScript:
115129
- function getItems(list) {
115130
- var items = [];
115131
- for (var i = 0; i < list.length; i++) {
115132
- if (list[i].active === true) {
115133
- items.push(list[i].name);
115041
+ // src/routing/signal-detector.ts
115042
+ var ESCALATE_SIGNAL = "===CLAWMUX_ESCALATE===";
115043
+
115044
+ class SignalDetector {
115045
+ buffer = "";
115046
+ detected = false;
115047
+ pending = [];
115048
+ feed(char) {
115049
+ if (this.pending.length > 0) {
115050
+ return this.pending.shift();
115134
115051
  }
115052
+ if (this.detected) {
115053
+ return { type: "signal_detected" };
115054
+ }
115055
+ if (this.buffer.length === 0) {
115056
+ if (char !== "=") {
115057
+ return { type: "passthrough", text: char };
115058
+ }
115059
+ this.buffer = char;
115060
+ return { type: "buffering" };
115061
+ }
115062
+ const candidate = this.buffer + char;
115063
+ if (candidate === ESCALATE_SIGNAL) {
115064
+ this.detected = true;
115065
+ this.buffer = "";
115066
+ return { type: "signal_detected" };
115067
+ }
115068
+ if (ESCALATE_SIGNAL.startsWith(candidate)) {
115069
+ this.buffer = candidate;
115070
+ return { type: "buffering" };
115071
+ }
115072
+ this.buffer = "";
115073
+ this.pending.push({ type: "passthrough", text: char });
115074
+ return { type: "passthrough", text: candidate.slice(0, -1) };
115135
115075
  }
115136
- return items;
115137
- }`
115138
- ];
115139
- var TRAINING_HEAVY = [
115140
- "Design a distributed consensus algorithm for a multi-region database with strong consistency and Byzantine fault tolerance",
115141
- "Explain the theoretical foundations of quantum computing and how quantum entanglement can be used for cryptographic key distribution",
115142
- "Analyze the trade-offs between eventual consistency and strong consistency in distributed systems, including CAP theorem implications",
115143
- "Design a fault-tolerant microservices architecture for a real-time trading platform handling millions of transactions per second",
115144
- "Propose a novel approach to solving the traveling salesman problem that improves upon current approximation algorithms",
115145
- "Design a machine learning pipeline for real-time fraud detection in financial transactions with sub-millisecond latency requirements",
115146
- "Compare and contrast different consensus protocols (Paxos, Raft, PBFT) and recommend the best one for a blockchain-based supply chain system",
115147
- "Architect a system that can handle 10 million concurrent WebSocket connections with horizontal scaling",
115148
- "Design a real-time data streaming architecture combining Kafka, Flink, and a time-series database for IoT sensor data",
115149
- "메모리 릭이 발생하는데 프로파일러에서 이벤트 루프 블로킹과 GC 지연이 동시에 나타나. 마이크로서비스 간 gRPC 연결 풀링도 의심되는 상황인데 원인 분석 방법을 단계별로 설명해줘",
115150
- "대규모 분산 시스템에서 파티션 톨런스와 일관성을 동시에 보장하는 방법을 설계해줘",
115151
- "실시간 추천 시스템을 위한 아키텍처를 설계해줘. 1초 이내에 개인화된 추천을 제공해야 해",
115152
- "카프카 기반 이벤트 드리븐 아키텍처에서 순서 보장과 정확히 한 번 처리를 어떻게 보장할 수 있을까?",
115153
- "마이크로서비스 간의 분산 트랜잭션을 사가 패턴으로 구현하는 방법을 단계별로 설명해줘",
115154
- "Debug a memory leak in a production Node.js application where the heap grows indefinitely but garbage collection logs show normal behavior",
115155
- "Investigate why our Kubernetes pods are being OOMKilled despite having memory limits set to 4GB and actual usage reported as 2GB",
115156
- "Find the root cause of intermittent 500ms latency spikes in our PostgreSQL queries that happen every 15 minutes",
115157
- "Design a multi-tenant SaaS platform with shared infrastructure but isolated data, supporting custom domains and white-labeling",
115158
- "Implement a distributed task scheduler that guarantees at-least-once execution with idempotency support across multiple data centers"
115159
- ];
115160
- var TRAINING_Q = [
115161
- "아까 그거 다시 해줘",
115162
- "그거 좀 더 자세히 설명해줘",
115163
- "아까 말한 거 그대로 해줘",
115164
- "이거 수정해줘",
115165
- "저거 어디 있지",
115166
- "그거 어떻게 됐어",
115167
- "위에꺼 다시 한번",
115168
- "그거 그대로 해줘",
115169
- "아까 한 거 다시",
115170
- "그 코드 다시 보여줘",
115171
- "저번에 한 거 기억나?",
115172
- "그 부분 수정해줘",
115173
- "Do that again",
115174
- "What about the thing we discussed earlier",
115175
- "Show me that again",
115176
- "Can you fix that",
115177
- "Change it like I said before",
115178
- "Continue from where we left off",
115179
- "That thing from earlier, do it again",
115180
- "Remember what we were working on",
115181
- "Go back to the previous one",
115182
- "Make it like the other one",
115183
- "The same thing but different",
115184
- "Update the one from before",
115185
- "그거 해줘",
115186
- "이거 해줘",
115187
- "저거 어때",
115188
- "How about this one",
115189
- "What about that",
115190
- "Try the other approach",
115191
- "Use the one I mentioned",
115192
- "Fix the issue",
115193
- "그냥 그거",
115194
- "이건 어때",
115195
- "Make it better",
115196
- "Change it",
115197
- "이거 수정해"
115198
- ];
115199
- var Q_PATTERNS = [
115200
- /^(아까|그거|저거|이거|그|위에|아래|저번|이전|전에).*(다시|해줘|해|보여|설명|수정|변경|삭제|추가|해봐)/,
115201
- /^(그거|저거|이거|그|이|저)(만|만큼|대로|처럼|같이)?\s*(해줘|해|놔|둬|봐|어때|어떻게)/,
115202
- /^(그거|저거|이거)\s*$/,
115203
- /(아까|저번에|전에|위에서|앞에서|이전에).*(그|그거|그것|그때|했던|말한)/,
115204
- /^(이거|저거|그거)(\s*.*)?$/
115205
- ];
115206
- var DEICTIC_WORDS = new Set(["그거", "저거", "이거", "그것", "이것", "저것", "아까", "저번"]);
115207
- function matchesQPattern(text) {
115208
- const trimmed = text.trim();
115209
- for (const pattern of Q_PATTERNS) {
115210
- if (pattern.test(trimmed))
115211
- return true;
115076
+ feedChunk(chunk) {
115077
+ const out = [];
115078
+ for (let i = 0;i < chunk.length; i++) {
115079
+ out.push(this.feed(chunk[i]));
115080
+ while (this.pending.length > 0) {
115081
+ out.push(this.pending.shift());
115082
+ }
115083
+ }
115084
+ return out;
115212
115085
  }
115213
- if (trimmed.length < 20) {
115214
- for (const word of DEICTIC_WORDS) {
115215
- if (trimmed.includes(word))
115216
- return true;
115086
+ flush() {
115087
+ if (this.buffer.length === 0)
115088
+ return null;
115089
+ const text = this.buffer;
115090
+ this.buffer = "";
115091
+ return text;
115092
+ }
115093
+ reset() {
115094
+ this.buffer = "";
115095
+ this.detected = false;
115096
+ this.pending.length = 0;
115097
+ }
115098
+ get isBuffering() {
115099
+ return this.buffer.length > 0;
115100
+ }
115101
+ }
115102
+
115103
+ // src/routing/escalation-memory.ts
115104
+ function djb2Hash(text) {
115105
+ let hash = 5381;
115106
+ for (let i = 0;i < text.length; i++) {
115107
+ hash = (hash << 5) + hash ^ text.charCodeAt(i);
115108
+ }
115109
+ return (hash >>> 0).toString(16);
115110
+ }
115111
+ function extractText(content) {
115112
+ if (typeof content === "string") {
115113
+ return content.slice(0, 200);
115114
+ }
115115
+ let concatenated = "";
115116
+ for (const block of content) {
115117
+ if (block.type === "text" && block.text !== undefined) {
115118
+ concatenated += block.text;
115217
115119
  }
115218
115120
  }
115219
- return false;
115121
+ return concatenated.slice(0, 200);
115220
115122
  }
115221
- var CODE_PATTERN = /[{}();]|function |const |let |var |class |import |export |=>|\bdef \b|\bfn\b/;
115222
- var TECH_TERMS = /\b(implement|create|design|architect|debug|refactor|migrate|deploy|build|write|develop)\b/i;
115223
- function isLikelyLight(text) {
115224
- const trimmed = text.trim();
115225
- if (trimmed.length <= 20 && !CODE_PATTERN.test(trimmed) && !TECH_TERMS.test(trimmed)) {
115226
- return true;
115123
+ function contentLength(content) {
115124
+ if (typeof content === "string") {
115125
+ return content.length;
115227
115126
  }
115228
- return false;
115127
+ return extractText(content).length;
115229
115128
  }
115230
- var extractorPromise = null;
115231
- function getExtractor() {
115232
- if (!extractorPromise) {
115233
- const dtype = process.env.CLAWMUX_EMBEDDING_DTYPE ?? "fp16";
115234
- console.log(`[clawmux] Loading embedding model (dtype=${dtype})...`);
115235
- extractorPromise = import_transformers.pipeline("feature-extraction", MODEL_ID, { dtype }).then((pipe) => {
115236
- console.log("[clawmux] Embedding model loaded");
115237
- return pipe;
115129
+
115130
+ class EscalationMemory {
115131
+ config;
115132
+ store;
115133
+ constructor(config) {
115134
+ this.config = config;
115135
+ this.store = new Map;
115136
+ }
115137
+ fingerprint(messages) {
115138
+ const count = this.config.fingerprintRootCount;
115139
+ const segments = [];
115140
+ for (let i = 0;i < count; i++) {
115141
+ const msg = messages[i];
115142
+ if (msg === undefined) {
115143
+ segments.push("::");
115144
+ continue;
115145
+ }
115146
+ const text = extractText(msg.content);
115147
+ const len = contentLength(msg.content);
115148
+ segments.push(`${msg.role}:${String(len)}:${djb2Hash(text)}`);
115149
+ }
115150
+ return segments.join("|");
115151
+ }
115152
+ lookup(messages, nowMs) {
115153
+ this.evict(nowMs);
115154
+ return this.store.get(this.fingerprint(messages)) ?? null;
115155
+ }
115156
+ record(messages, tier, nowMs) {
115157
+ const now = nowMs ?? Date.now();
115158
+ const fp = this.fingerprint(messages);
115159
+ const existing = this.store.get(fp);
115160
+ if (existing !== undefined && existing.tier === "HEAVY" && tier === "MEDIUM") {
115161
+ existing.lastActivityAt = now;
115162
+ return;
115163
+ }
115164
+ this.store.set(fp, {
115165
+ tier,
115166
+ firstEscalatedAt: existing?.firstEscalatedAt ?? now,
115167
+ lastActivityAt: now
115238
115168
  });
115239
115169
  }
115240
- return extractorPromise;
115241
- }
115242
- var centroidsPromise = null;
115243
- async function computeMeanEmbedding(texts) {
115244
- const extractor = await getExtractor();
115245
- const allEmbeddings = [];
115246
- for (let i = 0;i < texts.length; i += BATCH_SIZE) {
115247
- const batch = texts.slice(i, i + BATCH_SIZE).map((t) => E5_PREFIX + t);
115248
- const output = await extractor(batch, { pooling: "mean", normalize: true });
115249
- const list = output.tolist();
115250
- for (const emb of list) {
115251
- allEmbeddings.push(emb);
115170
+ touch(messages, nowMs) {
115171
+ const now = nowMs ?? Date.now();
115172
+ const record = this.store.get(this.fingerprint(messages));
115173
+ if (record !== undefined) {
115174
+ record.lastActivityAt = now;
115252
115175
  }
115253
115176
  }
115254
- if (allEmbeddings.length === 0)
115255
- return [];
115256
- const dim = allEmbeddings[0].length;
115257
- const mean = new Array(dim).fill(0);
115258
- for (const emb of allEmbeddings) {
115259
- for (let j = 0;j < dim; j++) {
115260
- mean[j] += emb[j] / allEmbeddings.length;
115261
- }
115262
- }
115263
- const magnitude = Math.sqrt(mean.reduce((sum, v) => sum + v * v, 0));
115264
- if (magnitude > 0) {
115265
- for (let j = 0;j < dim; j++)
115266
- mean[j] /= magnitude;
115267
- }
115268
- return mean;
115269
- }
115270
- function getCentroids() {
115271
- if (!centroidsPromise) {
115272
- centroidsPromise = (async () => {
115273
- console.log("[clawmux] Computing category centroids...");
115274
- const [cL, cM, cH, cQ] = await Promise.all([
115275
- computeMeanEmbedding(TRAINING_LIGHT),
115276
- computeMeanEmbedding(TRAINING_MEDIUM),
115277
- computeMeanEmbedding(TRAINING_HEAVY),
115278
- computeMeanEmbedding(TRAINING_Q)
115279
- ]);
115280
- console.log(`[clawmux] Centroids ready: L=${TRAINING_LIGHT.length} M=${TRAINING_MEDIUM.length} ` + `H=${TRAINING_HEAVY.length} Q=${TRAINING_Q.length} samples`);
115281
- return { [CAT_L]: cL, [CAT_M]: cM, [CAT_H]: cH, [CAT_Q]: cQ };
115282
- })();
115177
+ evict(nowMs) {
115178
+ const now = nowMs ?? Date.now();
115179
+ let evicted = 0;
115180
+ for (const [fp, record] of this.store) {
115181
+ const inactive = now - record.lastActivityAt > this.config.activeThresholdMs;
115182
+ const expired = now - record.firstEscalatedAt > this.config.maxLifetimeMs;
115183
+ if (inactive || expired) {
115184
+ this.store.delete(fp);
115185
+ evicted++;
115186
+ }
115187
+ }
115188
+ return evicted;
115189
+ }
115190
+ clear() {
115191
+ this.store.clear();
115192
+ }
115193
+ get size() {
115194
+ return this.store.size;
115283
115195
  }
115284
- return centroidsPromise;
115285
115196
  }
115286
- function cosineSimilarity(a, b) {
115287
- let dot = 0;
115288
- let magA = 0;
115289
- let magB = 0;
115290
- for (let i = 0;i < a.length; i++) {
115291
- dot += a[i] * b[i];
115292
- magA += a[i] * a[i];
115293
- magB += b[i] * b[i];
115197
+
115198
+ // src/routing/instruction-injector.ts
115199
+ var INJECT_FOR_TIERS = new Set(["LIGHT"]);
115200
+ var ESCALATION_INSTRUCTION = `If you cannot handle this request fully (due to complexity, missing context, or capability limits), output EXACTLY the following marker with no other text on that line: ${ESCALATE_SIGNAL}
115201
+ Do not explain. Do not ask permission. Just emit the marker and stop.`;
115202
+ function injectEscalationInstruction(messages) {
115203
+ if (messages.length === 0) {
115204
+ return [{ role: "system", content: ESCALATION_INSTRUCTION }];
115205
+ }
115206
+ const first = messages[0];
115207
+ const rest = messages.slice(1).map((m) => ({ ...m }));
115208
+ if (first.role !== "system") {
115209
+ return [
115210
+ { role: "system", content: ESCALATION_INSTRUCTION },
115211
+ { ...first },
115212
+ ...rest
115213
+ ];
115214
+ }
115215
+ if (typeof first.content === "string") {
115216
+ return [
115217
+ { role: "system", content: first.content + `
115218
+
115219
+ ` + ESCALATION_INSTRUCTION },
115220
+ ...rest
115221
+ ];
115294
115222
  }
115295
- const denom = Math.sqrt(magA) * Math.sqrt(magB);
115296
- return denom > 0 ? dot / denom : 0;
115223
+ return [
115224
+ {
115225
+ role: "system",
115226
+ content: [...first.content, { type: "text", text: ESCALATION_INSTRUCTION }]
115227
+ },
115228
+ ...rest
115229
+ ];
115297
115230
  }
115298
- async function classifyLocal(messages, config) {
115299
- const userText = extractLastUserText(messages);
115300
- if (!userText) {
115301
- return {
115302
- tier: "MEDIUM",
115303
- confidence: 0,
115304
- reasoning: "No user message found",
115305
- error: "No user message found in request"
115306
- };
115307
- }
115308
- const centroids = await getCentroids();
115309
- const extractor = await getExtractor();
115310
- const output = await extractor([E5_PREFIX + userText], { pooling: "mean", normalize: true });
115311
- const inputEmb = output.tolist()[0];
115312
- let bestCat = CAT_M;
115313
- let bestSim = -Infinity;
115314
- for (const [cat, centroid] of Object.entries(centroids)) {
115315
- const sim = cosineSimilarity(inputEmb, centroid);
115316
- if (sim > bestSim) {
115317
- bestSim = sim;
115318
- bestCat = cat;
115319
- }
115320
- }
115321
- if (isLikelyLight(userText) && bestCat !== CAT_Q) {
115322
- bestCat = CAT_L;
115323
- bestSim = Math.max(bestSim, 0.7);
115324
- }
115325
- const heuristicQ = matchesQPattern(userText);
115326
- if (bestCat === CAT_Q || heuristicQ) {
115327
- const contextText = buildContextText(messages, userText, config?.contextMessages ?? 10);
115328
- const ctxOutput = await extractor([E5_PREFIX + contextText], { pooling: "mean", normalize: true });
115329
- const contextEmb = ctxOutput.tolist()[0];
115330
- let reBestCat = CAT_M;
115331
- let reBestSim = -Infinity;
115332
- for (const [cat, centroid] of Object.entries(centroids)) {
115333
- if (cat === CAT_Q)
115334
- continue;
115335
- const sim = cosineSimilarity(contextEmb, centroid);
115336
- if (sim > reBestSim) {
115337
- reBestSim = sim;
115338
- reBestCat = cat;
115339
- }
115231
+
115232
+ // src/routing/signal-router.ts
115233
+ var NEXT_TIER = {
115234
+ LIGHT: "MEDIUM",
115235
+ MEDIUM: "HEAVY",
115236
+ HEAVY: null
115237
+ };
115238
+
115239
+ class SignalRouter {
115240
+ _memory;
115241
+ _enabled;
115242
+ constructor(config) {
115243
+ this._memory = new EscalationMemory(config.escalation);
115244
+ this._enabled = config.enabled;
115245
+ }
115246
+ selectInitialTier(messages, nowMs) {
115247
+ if (!this._enabled)
115248
+ return "MEDIUM";
115249
+ const record = this._memory.lookup(messages, nowMs);
115250
+ if (record !== null)
115251
+ return record.tier;
115252
+ return "LIGHT";
115253
+ }
115254
+ shouldInjectInstruction(tier) {
115255
+ return this._enabled && INJECT_FOR_TIERS.has(tier);
115256
+ }
115257
+ injectInstructionIfNeeded(tier, messages) {
115258
+ if (this.shouldInjectInstruction(tier)) {
115259
+ return injectEscalationInstruction(messages);
115340
115260
  }
115341
- const tier2 = TIER_MAP[reBestCat] ?? "MEDIUM";
115342
- return {
115343
- tier: tier2,
115344
- confidence: reBestSim,
115345
- reasoning: `Re-classified with context (initial: Q, heuristic: ${heuristicQ})`
115346
- };
115261
+ return messages;
115262
+ }
115263
+ createSignalDetector() {
115264
+ return new SignalDetector;
115265
+ }
115266
+ handleEscalation(_messages, fromTier) {
115267
+ return NEXT_TIER[fromTier];
115268
+ }
115269
+ recordSuccessfulEscalation(messages, tier, nowMs) {
115270
+ this._memory.record(messages, tier, nowMs);
115271
+ }
115272
+ touchActivity(messages, nowMs) {
115273
+ this._memory.touch(messages, nowMs);
115274
+ }
115275
+ get memory() {
115276
+ return this._memory;
115277
+ }
115278
+ get enabled() {
115279
+ return this._enabled;
115347
115280
  }
115348
- const tier = TIER_MAP[bestCat] ?? "MEDIUM";
115349
- return { tier, confidence: bestSim };
115350
115281
  }
115351
- function extractLastUserText(messages) {
115352
- for (let i = messages.length - 1;i >= 0; i--) {
115353
- const msg = messages[i];
115354
- if (msg.role !== "user")
115282
+
115283
+ // src/proxy/signal-detecting-stream.ts
115284
+ function createSignalDetectionState() {
115285
+ return { signalDetected: false, preSignalText: "" };
115286
+ }
115287
+ async function* detectSignalInStream(stream, detector, state, onSignal) {
115288
+ let signaled = false;
115289
+ for await (const event of stream) {
115290
+ if (signaled) {
115291
+ if (event.type === "done" || event.type === "error") {
115292
+ yield event;
115293
+ }
115355
115294
  continue;
115356
- if (typeof msg.content === "string") {
115357
- return msg.content;
115358
115295
  }
115359
- if (Array.isArray(msg.content)) {
115360
- const parts = [];
115361
- for (const block of msg.content) {
115362
- if (block.type === "text" && block.text) {
115363
- parts.push(block.text);
115296
+ if (event.type === "text_delta" && typeof event.delta === "string") {
115297
+ const results = detector.feedChunk(event.delta);
115298
+ let confirmedText = "";
115299
+ for (const r of results) {
115300
+ if (r.type === "passthrough") {
115301
+ state.preSignalText += r.text;
115302
+ confirmedText += r.text;
115303
+ } else if (r.type === "signal_detected") {
115304
+ signaled = true;
115305
+ state.signalDetected = true;
115306
+ onSignal();
115307
+ }
115308
+ }
115309
+ if (signaled) {
115310
+ if (confirmedText.length > 0) {
115311
+ yield {
115312
+ ...event,
115313
+ delta: confirmedText
115314
+ };
115364
115315
  }
115316
+ continue;
115317
+ }
115318
+ if (confirmedText.length > 0) {
115319
+ yield {
115320
+ ...event,
115321
+ delta: confirmedText
115322
+ };
115365
115323
  }
115366
- if (parts.length > 0)
115367
- return parts.join(" ");
115368
- }
115369
- }
115370
- return;
115371
- }
115372
- function buildContextText(allMessages, currentText, contextCount) {
115373
- const relevantMessages = allMessages.filter((m) => m.role === "user" || m.role === "assistant");
115374
- const lastN = relevantMessages.slice(-contextCount);
115375
- const parts = [];
115376
- for (const msg of lastN) {
115377
- let text;
115378
- if (typeof msg.content === "string") {
115379
- text = msg.content;
115380
- } else if (Array.isArray(msg.content)) {
115381
- text = msg.content.filter((b) => b.type === "text" && b.text).map((b) => b.text).join(" ");
115382
- } else {
115383
115324
  continue;
115384
115325
  }
115385
- parts.push(`[${msg.role}]: ${text}`);
115386
- }
115387
- const lastPart = parts[parts.length - 1];
115388
- if (!lastPart || !lastPart.includes(currentText)) {
115389
- parts.push(`[user]: ${currentText}`);
115326
+ if (event.type === "done" || event.type === "error") {
115327
+ const flushed = detector.flush();
115328
+ if (flushed !== null) {
115329
+ state.preSignalText += flushed;
115330
+ yield {
115331
+ type: "text_delta",
115332
+ contentIndex: 0,
115333
+ delta: flushed,
115334
+ partial: event.type === "done" ? event.message : event.error
115335
+ };
115336
+ }
115337
+ }
115338
+ yield event;
115390
115339
  }
115391
- return parts.join(`
115392
- `);
115393
115340
  }
115394
115341
 
115395
115342
  // src/openclaw/auth-resolver.ts
@@ -115692,7 +115639,7 @@ function isStreamContentType(contentType) {
115692
115639
  }
115693
115640
 
115694
115641
  // src/compression/session-store.ts
115695
- function djb2Hash(str) {
115642
+ function djb2Hash2(str) {
115696
115643
  let hash = 5381;
115697
115644
  for (let i = 0;i < str.length; i++) {
115698
115645
  hash = (hash << 5) + hash + str.charCodeAt(i) | 0;
@@ -115704,7 +115651,7 @@ function generateSessionId(messages) {
115704
115651
  if (!firstUserMessage)
115705
115652
  return "empty-session";
115706
115653
  const content = typeof firstUserMessage.content === "string" ? firstUserMessage.content : JSON.stringify(firstUserMessage.content);
115707
- return `session-${djb2Hash(content)}`;
115654
+ return `session-${djb2Hash2(content)}`;
115708
115655
  }
115709
115656
  function createSessionStore(maxSessions = 500) {
115710
115657
  const store = new Map;
@@ -120576,7 +120523,7 @@ function computeRetryDelay(response, attempt, baseDelayMs, maxDelayMs) {
120576
120523
  const jitter = Math.random() * 300;
120577
120524
  return Math.min(baseDelayMs * 2 ** attempt + jitter, maxDelayMs);
120578
120525
  }
120579
- async function handleApiRequest(req, body, apiType, config, openclawConfig, authProfiles, compressionMiddleware) {
120526
+ async function handleApiRequest(req, body, apiType, config, openclawConfig, authProfiles, compressionMiddleware, signalRouter) {
120580
120527
  const adapter = getAdapter(apiType);
120581
120528
  if (!adapter) {
120582
120529
  return jsonErrorResponse(`Unknown API type: ${apiType}`, 500);
@@ -120609,12 +120556,11 @@ async function handleApiRequest(req, body, apiType, config, openclawConfig, auth
120609
120556
  }
120610
120557
  }
120611
120558
  const messages = effectiveParsed.messages;
120612
- const classification = await classifyLocal(messages);
120559
+ const initialTier = signalRouter.selectInitialTier(messages);
120613
120560
  const decision = {
120614
- tier: classification.tier,
120615
- model: config.routing.models[classification.tier],
120616
- confidence: classification.confidence,
120617
- overrideReason: classification.reasoning
120561
+ tier: initialTier,
120562
+ model: config.routing.models[initialTier],
120563
+ confidence: 1
120618
120564
  };
120619
120565
  const lookup = findProviderForModel(decision.model, openclawConfig);
120620
120566
  let providerName;
@@ -120658,67 +120604,90 @@ async function handleApiRequest(req, body, apiType, config, openclawConfig, auth
120658
120604
  const piEligible = piEnabled && PI_CLIENT_APIS.has(apiType) && targetApiType !== "ollama" && targetApiType !== "bedrock-converse-stream";
120659
120605
  if (piEligible) {
120660
120606
  try {
120661
- const model = buildPiAiModel(providerName, actualModelId, openclawConfig);
120662
- const piContext = buildPiContext(effectiveParsed);
120663
- applyCodexSystemPromptFallback(piContext, targetApiType);
120664
- const piOptions = buildPiOptions(effectiveParsed, authInfo, providerName);
120665
120607
  const lastUserMsg2 = [...parsed.messages].reverse().find((m2) => m2.role === "user");
120666
120608
  const msgText2 = typeof lastUserMsg2?.content === "string" ? lastUserMsg2.content : Array.isArray(lastUserMsg2?.content) ? lastUserMsg2.content.filter((b) => b.type === "text").map((b) => b.text ?? "").join(" ") : "";
120667
120609
  const preview2 = msgText2.replace(/\s+/g, " ").trim().slice(0, 100);
120668
- console.log(`[clawmux] [llm] ${decision.tier} → ${decision.model} | conf=${classification.confidence.toFixed(2)} | pi-ai (${apiType})${preview2 ? ` | "${preview2}${msgText2.length > 100 ? "…" : ""}"` : ""}`);
120669
120610
  if (compressionMiddleware) {
120670
120611
  compressionMiddleware.afterResponse(parsed);
120671
120612
  }
120672
- const piStreamHandle = stream(model, piContext, piOptions);
120673
120613
  const wantsStream = effectiveParsed.stream === true;
120674
- if (apiType === "anthropic-messages") {
120614
+ let currentTier = initialTier;
120615
+ const MAX_ESCALATION_ATTEMPTS = 3;
120616
+ for (let attempt = 0;attempt < MAX_ESCALATION_ATTEMPTS; attempt++) {
120617
+ const currentModel = config.routing.models[currentTier];
120618
+ const currentActualModelId = currentModel.split("/").slice(1).join("/");
120619
+ const currentProviderName = findProviderForModel(currentModel, openclawConfig)?.providerName ?? providerName;
120620
+ const currentAuth = resolveApiKey(currentProviderName, openclawConfig, authProfiles);
120621
+ if (!currentAuth) {
120622
+ return jsonErrorResponse(`No auth credentials found for provider: ${currentProviderName}`, 502);
120623
+ }
120624
+ const currentAuthInfo = {
120625
+ apiKey: currentAuth.apiKey,
120626
+ headerName: currentAuth.headerName,
120627
+ headerValue: currentAuth.headerValue,
120628
+ awsAccessKeyId: currentAuth.awsAccessKeyId,
120629
+ awsSecretKey: currentAuth.awsSecretKey,
120630
+ awsSessionToken: currentAuth.awsSessionToken,
120631
+ awsRegion: currentAuth.awsRegion,
120632
+ accountId: currentAuth.accountId
120633
+ };
120634
+ const model = buildPiAiModel(currentProviderName, currentActualModelId, openclawConfig);
120635
+ const injectedMessages = signalRouter.injectInstructionIfNeeded(currentTier, messages);
120636
+ const injectedParsed = {
120637
+ ...effectiveParsed,
120638
+ messages: injectedMessages,
120639
+ rawBody: adapter.modifyMessages(effectiveParsed.rawBody, injectedMessages)
120640
+ };
120641
+ const piContext = buildPiContext(injectedParsed);
120642
+ applyCodexSystemPromptFallback(piContext, targetApiType);
120643
+ const shouldDetect = signalRouter.enabled && NEXT_TIER[currentTier] !== null;
120644
+ const abortCtrl = new AbortController;
120645
+ const piOptions = buildPiOptions(injectedParsed, currentAuthInfo, currentProviderName, abortCtrl.signal);
120646
+ console.log(`[clawmux] [llm] ${currentTier} → ${currentModel} | attempt=${attempt + 1} | pi-ai (${apiType})${preview2 ? ` | "${preview2}${msgText2.length > 100 ? "…" : ""}"` : ""}`);
120647
+ const piStreamHandle = stream(model, piContext, piOptions);
120648
+ if (!shouldDetect) {
120649
+ return await yieldPiAiResponse(piStreamHandle, apiType, wantsStream);
120650
+ }
120651
+ const detector = signalRouter.createSignalDetector();
120652
+ const detectionState = createSignalDetectionState();
120675
120653
  if (wantsStream) {
120676
- return new Response(piStreamToAnthropicSse(piStreamHandle), {
120654
+ const signalGen = detectSignalInStream(piStreamHandle, detector, detectionState, () => {});
120655
+ const sseBody = piStreamToAnthropicSseFromGenerator(signalGen);
120656
+ const response = new Response(sseBody, {
120677
120657
  status: 200,
120678
120658
  headers: { "content-type": "text/event-stream" }
120679
120659
  });
120680
- }
120681
- const json = await piStreamToAnthropicJson(piStreamHandle);
120682
- return new Response(JSON.stringify(json), {
120683
- status: 200,
120684
- headers: { "content-type": "application/json" }
120685
- });
120686
- }
120687
- if (apiType === "openai-completions") {
120688
- if (wantsStream) {
120689
- return new Response(piStreamToOpenAiCompletionsSse(piStreamHandle), {
120690
- status: 200,
120691
- headers: { "content-type": "text/event-stream" }
120660
+ response.clone().text().then(() => {
120661
+ if (detectionState.signalDetected) {
120662
+ abortCtrl.abort();
120663
+ const nextTier = signalRouter.handleEscalation(messages, currentTier);
120664
+ if (nextTier !== null) {
120665
+ console.log(`[clawmux] [escalation] ${currentTier} → ${nextTier} (signal detected)`);
120666
+ currentTier = nextTier;
120667
+ return;
120668
+ }
120669
+ }
120670
+ signalRouter.touchActivity(messages);
120671
+ if (attempt > 0) {
120672
+ signalRouter.recordSuccessfulEscalation(messages, currentTier);
120673
+ }
120692
120674
  });
120675
+ return response;
120693
120676
  }
120694
- const json = await piStreamToOpenAiCompletionsJson(piStreamHandle);
120695
- return new Response(JSON.stringify(json), {
120696
- status: 200,
120697
- headers: { "content-type": "application/json" }
120698
- });
120699
- }
120700
- if (apiType === "openai-responses") {
120701
- if (wantsStream) {
120702
- return new Response(piStreamToOpenAiResponsesSse(piStreamHandle), {
120703
- status: 200,
120704
- headers: { "content-type": "text/event-stream" }
120705
- });
120677
+ const fullText = await collectPiStreamText(piStreamHandle, detector, detectionState);
120678
+ if (detectionState.signalDetected) {
120679
+ const nextTier = signalRouter.handleEscalation(messages, currentTier);
120680
+ if (nextTier !== null) {
120681
+ console.log(`[clawmux] [escalation] ${currentTier} → ${nextTier} (signal detected in non-streaming)`);
120682
+ currentTier = nextTier;
120683
+ continue;
120684
+ }
120706
120685
  }
120707
- const json = await piStreamToOpenAiResponsesJson(piStreamHandle);
120708
- return new Response(JSON.stringify(json), {
120709
- status: 200,
120710
- headers: { "content-type": "application/json" }
120711
- });
120712
- }
120713
- if (apiType === "google-generative-ai") {
120714
- if (wantsStream) {
120715
- return new Response(piStreamToGoogleSse(piStreamHandle), {
120716
- status: 200,
120717
- headers: { "content-type": "text/event-stream" }
120718
- });
120686
+ signalRouter.touchActivity(messages);
120687
+ if (attempt > 0) {
120688
+ signalRouter.recordSuccessfulEscalation(messages, currentTier);
120719
120689
  }
120720
- const json = await piStreamToGoogleJson(piStreamHandle);
120721
- return new Response(JSON.stringify(json), {
120690
+ return new Response(JSON.stringify(fullText), {
120722
120691
  status: 200,
120723
120692
  headers: { "content-type": "application/json" }
120724
120693
  });
@@ -120747,7 +120716,7 @@ async function handleApiRequest(req, body, apiType, config, openclawConfig, auth
120747
120716
  const lastUserMsg = [...parsed.messages].reverse().find((m2) => m2.role === "user");
120748
120717
  const msgText = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : Array.isArray(lastUserMsg?.content) ? lastUserMsg.content.filter((b) => b.type === "text").map((b) => b.text ?? "").join(" ") : "";
120749
120718
  const preview = msgText.replace(/\s+/g, " ").trim().slice(0, 100);
120750
- console.log(`[clawmux] [llm] ${decision.tier} → ${decision.model} | conf=${classification.confidence.toFixed(2)}${classification.reasoning ? ` | ${classification.reasoning}` : ""}${preview ? ` | "${preview}${msgText.length > 100 ? "…" : ""}"` : ""}`);
120719
+ console.log(`[clawmux] [llm] ${decision.tier} → ${decision.model} | legacy${preview ? ` | "${preview}${msgText.length > 100 ? "…" : ""}"` : ""}`);
120751
120720
  if (compressionMiddleware && upstreamResponse.ok) {
120752
120721
  compressionMiddleware.afterResponse(parsed);
120753
120722
  }
@@ -120909,6 +120878,220 @@ function createResolvedCompressionMiddleware(config, openclawConfig, authProfile
120909
120878
  statsTracker
120910
120879
  });
120911
120880
  }
120881
+ async function yieldPiAiResponse(piStreamHandle, apiType, wantsStream) {
120882
+ if (apiType === "anthropic-messages") {
120883
+ if (wantsStream) {
120884
+ return new Response(piStreamToAnthropicSse(piStreamHandle), {
120885
+ status: 200,
120886
+ headers: { "content-type": "text/event-stream" }
120887
+ });
120888
+ }
120889
+ const json = await piStreamToAnthropicJson(piStreamHandle);
120890
+ return new Response(JSON.stringify(json), {
120891
+ status: 200,
120892
+ headers: { "content-type": "application/json" }
120893
+ });
120894
+ }
120895
+ if (apiType === "openai-completions") {
120896
+ if (wantsStream) {
120897
+ return new Response(piStreamToOpenAiCompletionsSse(piStreamHandle), {
120898
+ status: 200,
120899
+ headers: { "content-type": "text/event-stream" }
120900
+ });
120901
+ }
120902
+ const json = await piStreamToOpenAiCompletionsJson(piStreamHandle);
120903
+ return new Response(JSON.stringify(json), {
120904
+ status: 200,
120905
+ headers: { "content-type": "application/json" }
120906
+ });
120907
+ }
120908
+ if (apiType === "openai-responses") {
120909
+ if (wantsStream) {
120910
+ return new Response(piStreamToOpenAiResponsesSse(piStreamHandle), {
120911
+ status: 200,
120912
+ headers: { "content-type": "text/event-stream" }
120913
+ });
120914
+ }
120915
+ const json = await piStreamToOpenAiResponsesJson(piStreamHandle);
120916
+ return new Response(JSON.stringify(json), {
120917
+ status: 200,
120918
+ headers: { "content-type": "application/json" }
120919
+ });
120920
+ }
120921
+ if (apiType === "google-generative-ai") {
120922
+ if (wantsStream) {
120923
+ return new Response(piStreamToGoogleSse(piStreamHandle), {
120924
+ status: 200,
120925
+ headers: { "content-type": "text/event-stream" }
120926
+ });
120927
+ }
120928
+ const json = await piStreamToGoogleJson(piStreamHandle);
120929
+ return new Response(JSON.stringify(json), {
120930
+ status: 200,
120931
+ headers: { "content-type": "application/json" }
120932
+ });
120933
+ }
120934
+ throw new Error(`Unsupported pi-ai apiType: ${apiType}`);
120935
+ }
120936
+ function piStreamToAnthropicSseFromGenerator(gen) {
120937
+ const encoder6 = new TextEncoder;
120938
+ function sseFrame2(event, data) {
120939
+ return encoder6.encode(`event: ${event}
120940
+ data: ${JSON.stringify(data)}
120941
+
120942
+ `);
120943
+ }
120944
+ return new ReadableStream({
120945
+ async start(controller) {
120946
+ try {
120947
+ let messageStarted = false;
120948
+ const openBlocks = new Map;
120949
+ const ensureMessageStart = (model) => {
120950
+ if (messageStarted)
120951
+ return;
120952
+ messageStarted = true;
120953
+ controller.enqueue(sseFrame2("message_start", {
120954
+ type: "message_start",
120955
+ message: {
120956
+ id: "msg_" + Date.now().toString(36),
120957
+ type: "message",
120958
+ role: "assistant",
120959
+ content: [],
120960
+ model,
120961
+ stop_reason: null,
120962
+ stop_sequence: null,
120963
+ usage: { input_tokens: 0, output_tokens: 0 }
120964
+ }
120965
+ }));
120966
+ };
120967
+ for await (const event of gen) {
120968
+ if (event.type === "start") {
120969
+ ensureMessageStart(event.partial.model || "");
120970
+ } else if (event.type === "text_start") {
120971
+ ensureMessageStart(event.partial.model || "");
120972
+ openBlocks.set(event.contentIndex, "text");
120973
+ controller.enqueue(sseFrame2("content_block_start", {
120974
+ type: "content_block_start",
120975
+ index: event.contentIndex,
120976
+ content_block: { type: "text", text: "" }
120977
+ }));
120978
+ } else if (event.type === "text_delta") {
120979
+ if (openBlocks.get(event.contentIndex) !== "text") {
120980
+ ensureMessageStart(event.partial.model || "");
120981
+ openBlocks.set(event.contentIndex, "text");
120982
+ controller.enqueue(sseFrame2("content_block_start", {
120983
+ type: "content_block_start",
120984
+ index: event.contentIndex,
120985
+ content_block: { type: "text", text: "" }
120986
+ }));
120987
+ }
120988
+ controller.enqueue(sseFrame2("content_block_delta", {
120989
+ type: "content_block_delta",
120990
+ index: event.contentIndex,
120991
+ delta: { type: "text_delta", text: event.delta }
120992
+ }));
120993
+ } else if (event.type === "text_end") {
120994
+ if (openBlocks.get(event.contentIndex) === "text") {
120995
+ controller.enqueue(sseFrame2("content_block_stop", {
120996
+ type: "content_block_stop",
120997
+ index: event.contentIndex
120998
+ }));
120999
+ openBlocks.delete(event.contentIndex);
121000
+ }
121001
+ } else if (event.type === "done") {
121002
+ for (const [idx] of openBlocks) {
121003
+ controller.enqueue(sseFrame2("content_block_stop", {
121004
+ type: "content_block_stop",
121005
+ index: idx
121006
+ }));
121007
+ }
121008
+ openBlocks.clear();
121009
+ controller.enqueue(sseFrame2("message_delta", {
121010
+ type: "message_delta",
121011
+ delta: { stop_reason: "end_turn", stop_sequence: null },
121012
+ usage: { output_tokens: event.message.usage?.output ?? 0 }
121013
+ }));
121014
+ controller.enqueue(sseFrame2("message_stop", { type: "message_stop" }));
121015
+ } else if (event.type === "error") {
121016
+ for (const [idx] of openBlocks) {
121017
+ controller.enqueue(sseFrame2("content_block_stop", {
121018
+ type: "content_block_stop",
121019
+ index: idx
121020
+ }));
121021
+ }
121022
+ openBlocks.clear();
121023
+ controller.enqueue(sseFrame2("error", {
121024
+ type: "error",
121025
+ error: {
121026
+ type: "api_error",
121027
+ message: event.error?.errorMessage ?? "Unknown error"
121028
+ }
121029
+ }));
121030
+ }
121031
+ }
121032
+ controller.close();
121033
+ } catch (err) {
121034
+ const msg = err instanceof Error ? err.message : String(err);
121035
+ controller.enqueue(sseFrame2("error", {
121036
+ type: "error",
121037
+ error: { type: "api_error", message: msg }
121038
+ }));
121039
+ controller.close();
121040
+ }
121041
+ }
121042
+ });
121043
+ }
121044
+ async function collectPiStreamText(piStreamHandle, detector, state) {
121045
+ const msg = await piStreamHandle.result();
121046
+ const fullText = msg.content.filter((c) => c.type === "text").map((c) => c.text).join("");
121047
+ const results = detector.feedChunk(fullText);
121048
+ for (const r2 of results) {
121049
+ if (r2.type === "signal_detected") {
121050
+ state.signalDetected = true;
121051
+ break;
121052
+ }
121053
+ }
121054
+ detector.flush();
121055
+ const STOP_REASON_MAP2 = {
121056
+ stop: "end_turn",
121057
+ length: "max_tokens",
121058
+ toolUse: "tool_use",
121059
+ error: "end_turn",
121060
+ aborted: "end_turn"
121061
+ };
121062
+ const blocks = [];
121063
+ for (const c of msg.content) {
121064
+ if (c.type === "text") {
121065
+ blocks.push({ type: "text", text: c.text });
121066
+ } else if (c.type === "thinking") {
121067
+ blocks.push({
121068
+ type: "thinking",
121069
+ thinking: c.thinking,
121070
+ ...c.thinkingSignature ? { signature: c.thinkingSignature } : {}
121071
+ });
121072
+ } else if (c.type === "toolCall") {
121073
+ blocks.push({
121074
+ type: "tool_use",
121075
+ id: c.id,
121076
+ name: c.name,
121077
+ input: c.arguments ?? {}
121078
+ });
121079
+ }
121080
+ }
121081
+ return {
121082
+ id: "msg_" + Date.now().toString(36),
121083
+ type: "message",
121084
+ role: "assistant",
121085
+ content: blocks,
121086
+ model: msg.model || "",
121087
+ stop_reason: STOP_REASON_MAP2[msg.stopReason] ?? "end_turn",
121088
+ stop_sequence: null,
121089
+ usage: {
121090
+ input_tokens: msg.usage?.input ?? 0,
121091
+ output_tokens: msg.usage?.output ?? 0
121092
+ }
121093
+ };
121094
+ }
120912
121095
  var ROUTE_MAPPINGS = [
120913
121096
  { apiType: "anthropic-messages", key: "/v1/messages" },
120914
121097
  { apiType: "openai-completions", key: "/v1/chat/completions" },
@@ -120918,8 +121101,17 @@ var ROUTE_MAPPINGS = [
120918
121101
  { apiType: "bedrock-converse-stream", key: "/model/*/converse-stream" }
120919
121102
  ];
120920
121103
  function setupPipelineRoutes(config, openclawConfig, authProfiles, compressionMiddleware) {
121104
+ const escalationConfig = config.routing.escalation;
121105
+ const signalRouter = new SignalRouter({
121106
+ escalation: {
121107
+ activeThresholdMs: escalationConfig?.activeThresholdMs ?? 300000,
121108
+ maxLifetimeMs: escalationConfig?.maxLifetimeMs ?? 7200000,
121109
+ fingerprintRootCount: escalationConfig?.fingerprintRootCount ?? 5
121110
+ },
121111
+ enabled: escalationConfig?.enabled ?? true
121112
+ });
120921
121113
  for (const mapping of ROUTE_MAPPINGS) {
120922
- setRouteHandler(mapping.key, (req, body) => handleApiRequest(req, body, mapping.apiType, config, openclawConfig, authProfiles, compressionMiddleware));
121114
+ setRouteHandler(mapping.key, (req, body) => handleApiRequest(req, body, mapping.apiType, config, openclawConfig, authProfiles, compressionMiddleware, signalRouter));
120923
121115
  }
120924
121116
  }
120925
121117