reasonix 0.5.0 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -696,6 +696,170 @@ async function runHooks(opts) {
696
696
  return { event, outcomes, blocked };
697
697
  }
698
698
 
699
+ // src/tokenizer.ts
700
+ import { readFileSync as readFileSync3 } from "fs";
701
+ import { createRequire } from "module";
702
+ import { dirname as dirname2, join as join3 } from "path";
703
+ import { fileURLToPath } from "url";
704
+ import { gunzipSync } from "zlib";
705
+ function buildByteToChar() {
706
+ const result = new Array(256);
707
+ const bs = [];
708
+ for (let b = 33; b <= 126; b++) bs.push(b);
709
+ for (let b = 161; b <= 172; b++) bs.push(b);
710
+ for (let b = 174; b <= 255; b++) bs.push(b);
711
+ const cs = bs.slice();
712
+ let n = 0;
713
+ for (let b = 0; b < 256; b++) {
714
+ if (!bs.includes(b)) {
715
+ bs.push(b);
716
+ cs.push(256 + n);
717
+ n++;
718
+ }
719
+ }
720
+ for (let i = 0; i < bs.length; i++) {
721
+ result[bs[i]] = String.fromCodePoint(cs[i]);
722
+ }
723
+ return result;
724
+ }
725
+ var cached = null;
726
+ function resolveDataPath() {
727
+ if (process.env.REASONIX_TOKENIZER_PATH) return process.env.REASONIX_TOKENIZER_PATH;
728
+ try {
729
+ const here = dirname2(fileURLToPath(import.meta.url));
730
+ return join3(here, "..", "data", "deepseek-tokenizer.json.gz");
731
+ } catch {
732
+ const req = createRequire(import.meta.url);
733
+ return join3(
734
+ dirname2(req.resolve("reasonix/package.json")),
735
+ "data",
736
+ "deepseek-tokenizer.json.gz"
737
+ );
738
+ }
739
+ }
740
+ function loadTokenizer() {
741
+ if (cached) return cached;
742
+ const buf = readFileSync3(resolveDataPath());
743
+ const json = gunzipSync(buf).toString("utf8");
744
+ const data = JSON.parse(json);
745
+ const mergeRank = /* @__PURE__ */ new Map();
746
+ for (let i = 0; i < data.model.merges.length; i++) {
747
+ mergeRank.set(data.model.merges[i], i);
748
+ }
749
+ const splitRegexes = [];
750
+ for (const p of data.pre_tokenizer.pretokenizers) {
751
+ if (p.type === "Split") {
752
+ splitRegexes.push(new RegExp(p.pattern.Regex, "gu"));
753
+ }
754
+ }
755
+ const addedMap = /* @__PURE__ */ new Map();
756
+ const addedContents = [];
757
+ for (const t of data.added_tokens) {
758
+ if (!t.special) {
759
+ addedMap.set(t.content, t.id);
760
+ addedContents.push(t.content);
761
+ }
762
+ }
763
+ addedContents.sort((a, b) => b.length - a.length);
764
+ const addedPattern = addedContents.length ? new RegExp(addedContents.map(escapeRegex).join("|"), "g") : null;
765
+ cached = {
766
+ vocab: data.model.vocab,
767
+ mergeRank,
768
+ splitRegexes,
769
+ byteToChar: buildByteToChar(),
770
+ addedPattern,
771
+ addedMap
772
+ };
773
+ return cached;
774
+ }
775
+ function escapeRegex(s) {
776
+ return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
777
+ }
778
+ function applySplit(chunks, re) {
779
+ const out = [];
780
+ for (const chunk of chunks) {
781
+ if (!chunk) continue;
782
+ re.lastIndex = 0;
783
+ let last = 0;
784
+ for (const m of chunk.matchAll(re)) {
785
+ const idx = m.index ?? 0;
786
+ if (idx > last) out.push(chunk.slice(last, idx));
787
+ if (m[0].length > 0) out.push(m[0]);
788
+ last = idx + m[0].length;
789
+ }
790
+ if (last < chunk.length) out.push(chunk.slice(last));
791
+ }
792
+ return out;
793
+ }
794
+ function byteLevelEncode(s, byteToChar) {
795
+ const bytes = new TextEncoder().encode(s);
796
+ let out = "";
797
+ for (let i = 0; i < bytes.length; i++) out += byteToChar[bytes[i]];
798
+ return out;
799
+ }
800
+ function bpeEncode(piece, mergeRank) {
801
+ if (piece.length <= 1) return piece ? [piece] : [];
802
+ let word = Array.from(piece);
803
+ while (true) {
804
+ let bestIdx = -1;
805
+ let bestRank = Number.POSITIVE_INFINITY;
806
+ for (let i = 0; i < word.length - 1; i++) {
807
+ const pair = `${word[i]} ${word[i + 1]}`;
808
+ const rank = mergeRank.get(pair);
809
+ if (rank !== void 0 && rank < bestRank) {
810
+ bestRank = rank;
811
+ bestIdx = i;
812
+ if (rank === 0) break;
813
+ }
814
+ }
815
+ if (bestIdx < 0) break;
816
+ word = [
817
+ ...word.slice(0, bestIdx),
818
+ word[bestIdx] + word[bestIdx + 1],
819
+ ...word.slice(bestIdx + 2)
820
+ ];
821
+ if (word.length === 1) break;
822
+ }
823
+ return word;
824
+ }
825
+ function encode(text) {
826
+ if (!text) return [];
827
+ const t = loadTokenizer();
828
+ const ids = [];
829
+ const process2 = (segment) => {
830
+ if (!segment) return;
831
+ let chunks = [segment];
832
+ for (const re of t.splitRegexes) chunks = applySplit(chunks, re);
833
+ for (const chunk of chunks) {
834
+ if (!chunk) continue;
835
+ const byteLevel = byteLevelEncode(chunk, t.byteToChar);
836
+ const pieces = bpeEncode(byteLevel, t.mergeRank);
837
+ for (const p of pieces) {
838
+ const id = t.vocab[p];
839
+ if (id !== void 0) ids.push(id);
840
+ }
841
+ }
842
+ };
843
+ if (t.addedPattern) {
844
+ t.addedPattern.lastIndex = 0;
845
+ let last = 0;
846
+ for (const m of text.matchAll(t.addedPattern)) {
847
+ const idx = m.index ?? 0;
848
+ if (idx > last) process2(text.slice(last, idx));
849
+ const id = t.addedMap.get(m[0]);
850
+ if (id !== void 0) ids.push(id);
851
+ last = idx + m[0].length;
852
+ }
853
+ if (last < text.length) process2(text.slice(last));
854
+ } else {
855
+ process2(text);
856
+ }
857
+ return ids;
858
+ }
859
+ function countTokens(text) {
860
+ return encode(text).length;
861
+ }
862
+
699
863
  // src/repair/flatten.ts
700
864
  function analyzeSchema(schema) {
701
865
  if (!schema) return { shouldFlatten: false, leafCount: 0, maxDepth: 0 };
@@ -846,7 +1010,14 @@ var ToolRegistry = class {
846
1010
  try {
847
1011
  const result = await tool.fn(args, { signal: opts.signal });
848
1012
  const str = typeof result === "string" ? result : JSON.stringify(result);
849
- return opts.maxResultChars ? truncateForModel(str, opts.maxResultChars) : str;
1013
+ let clipped = str;
1014
+ if (opts.maxResultTokens !== void 0) {
1015
+ clipped = truncateForModelByTokens(clipped, opts.maxResultTokens);
1016
+ }
1017
+ if (opts.maxResultChars !== void 0) {
1018
+ clipped = truncateForModel(clipped, opts.maxResultChars);
1019
+ }
1020
+ return clipped;
850
1021
  } catch (err) {
851
1022
  const e = err;
852
1023
  if (typeof e.toToolResult === "function") {
@@ -880,6 +1051,7 @@ function hasDotKey(obj) {
880
1051
 
881
1052
  // src/mcp/registry.ts
882
1053
  var DEFAULT_MAX_RESULT_CHARS = 32e3;
1054
+ var DEFAULT_MAX_RESULT_TOKENS = 8e3;
883
1055
  async function bridgeMcpTools(client, opts = {}) {
884
1056
  const registry = opts.registry ?? new ToolRegistry({ autoFlatten: opts.autoFlatten });
885
1057
  const prefix = opts.namePrefix ?? "";
@@ -936,6 +1108,61 @@ function truncateForModel(s, maxChars) {
936
1108
 
937
1109
  ${tail}`;
938
1110
  }
1111
+ function truncateForModelByTokens(s, maxTokens) {
1112
+ if (maxTokens <= 0) return "";
1113
+ if (s.length <= maxTokens) return s;
1114
+ if (s.length <= maxTokens * 4) {
1115
+ const tokens = countTokens(s);
1116
+ if (tokens <= maxTokens) return s;
1117
+ }
1118
+ const markerOverhead = 48;
1119
+ const contentBudget = Math.max(0, maxTokens - markerOverhead);
1120
+ const tailBudget = Math.min(256, Math.floor(contentBudget * 0.1));
1121
+ const headBudget = Math.max(0, contentBudget - tailBudget);
1122
+ const head = sizePrefixToTokens(s, headBudget);
1123
+ const tail = sizeSuffixToTokens(s, tailBudget);
1124
+ const droppedChars = s.length - head.length - tail.length;
1125
+ const headTokens = head ? countTokens(head) : 0;
1126
+ const tailTokens = tail ? countTokens(tail) : 0;
1127
+ const sampleChars = head.length + tail.length;
1128
+ const sampleTokens = headTokens + tailTokens;
1129
+ const ratio = sampleChars > 0 ? sampleTokens / sampleChars : 0.3;
1130
+ const estTotalTokens = Math.ceil(s.length * ratio);
1131
+ const droppedTokens = Math.max(0, estTotalTokens - sampleTokens);
1132
+ return `${head}
1133
+
1134
+ [\u2026truncated ~${droppedTokens} tokens (${droppedChars} chars) \u2014 raise BridgeOptions.maxResultTokens, or call the tool with a narrower scope (filter, head, pagination)\u2026]
1135
+
1136
+ ${tail}`;
1137
+ }
1138
+ function sizePrefixToTokens(s, budget) {
1139
+ if (budget <= 0 || s.length === 0) return "";
1140
+ let size = Math.min(s.length, budget * 4);
1141
+ for (let iter = 0; iter < 6; iter++) {
1142
+ if (size <= 0) return "";
1143
+ const slice = s.slice(0, size);
1144
+ const count = countTokens(slice);
1145
+ if (count <= budget) return slice;
1146
+ const next = Math.floor(size * (budget / count) * 0.95);
1147
+ if (next >= size) return s.slice(0, Math.max(0, size - 1));
1148
+ size = next;
1149
+ }
1150
+ return s.slice(0, Math.max(0, size));
1151
+ }
1152
+ function sizeSuffixToTokens(s, budget) {
1153
+ if (budget <= 0 || s.length === 0) return "";
1154
+ let size = Math.min(s.length, budget * 4);
1155
+ for (let iter = 0; iter < 6; iter++) {
1156
+ if (size <= 0) return "";
1157
+ const slice = s.slice(-size);
1158
+ const count = countTokens(slice);
1159
+ if (count <= budget) return slice;
1160
+ const next = Math.floor(size * (budget / count) * 0.95);
1161
+ if (next >= size) return s.slice(-Math.max(0, size - 1));
1162
+ size = next;
1163
+ }
1164
+ return s.slice(-Math.max(0, size));
1165
+ }
939
1166
  function blockToString(block) {
940
1167
  if (block.type === "text") return block.text;
941
1168
  if (block.type === "image") return `[image ${block.mimeType}, ${block.data.length} chars base64]`;
@@ -1321,19 +1548,19 @@ import {
1321
1548
  chmodSync as chmodSync2,
1322
1549
  existsSync as existsSync2,
1323
1550
  mkdirSync as mkdirSync2,
1324
- readFileSync as readFileSync3,
1551
+ readFileSync as readFileSync4,
1325
1552
  readdirSync,
1326
1553
  statSync,
1327
1554
  unlinkSync,
1328
1555
  writeFileSync as writeFileSync2
1329
1556
  } from "fs";
1330
1557
  import { homedir as homedir3 } from "os";
1331
- import { dirname as dirname2, join as join3 } from "path";
1558
+ import { dirname as dirname3, join as join4 } from "path";
1332
1559
  function sessionsDir() {
1333
- return join3(homedir3(), ".reasonix", "sessions");
1560
+ return join4(homedir3(), ".reasonix", "sessions");
1334
1561
  }
1335
1562
  function sessionPath(name) {
1336
- return join3(sessionsDir(), `${sanitizeName(name)}.jsonl`);
1563
+ return join4(sessionsDir(), `${sanitizeName(name)}.jsonl`);
1337
1564
  }
1338
1565
  function sanitizeName(name) {
1339
1566
  const cleaned = name.replace(/[^\w\-\u4e00-\u9fa5]/g, "_").slice(0, 64);
@@ -1343,7 +1570,7 @@ function loadSessionMessages(name) {
1343
1570
  const path = sessionPath(name);
1344
1571
  if (!existsSync2(path)) return [];
1345
1572
  try {
1346
- const raw = readFileSync3(path, "utf8");
1573
+ const raw = readFileSync4(path, "utf8");
1347
1574
  const out = [];
1348
1575
  for (const line of raw.split(/\r?\n/)) {
1349
1576
  const trimmed = line.trim();
@@ -1361,7 +1588,7 @@ function loadSessionMessages(name) {
1361
1588
  }
1362
1589
  function appendSessionMessage(name, message) {
1363
1590
  const path = sessionPath(name);
1364
- mkdirSync2(dirname2(path), { recursive: true });
1591
+ mkdirSync2(dirname3(path), { recursive: true });
1365
1592
  appendFileSync(path, `${JSON.stringify(message)}
1366
1593
  `, "utf8");
1367
1594
  try {
@@ -1375,7 +1602,7 @@ function listSessions() {
1375
1602
  try {
1376
1603
  const files = readdirSync(dir).filter((f) => f.endsWith(".jsonl"));
1377
1604
  return files.map((file) => {
1378
- const path = join3(dir, file);
1605
+ const path = join4(dir, file);
1379
1606
  const stat = statSync(path);
1380
1607
  const name = file.replace(/\.jsonl$/, "");
1381
1608
  const messageCount = countLines(path);
@@ -1396,7 +1623,7 @@ function deleteSession(name) {
1396
1623
  }
1397
1624
  function rewriteSession(name, messages) {
1398
1625
  const path = sessionPath(name);
1399
- mkdirSync2(dirname2(path), { recursive: true });
1626
+ mkdirSync2(dirname3(path), { recursive: true });
1400
1627
  const body = messages.map((m) => JSON.stringify(m)).join("\n");
1401
1628
  writeFileSync2(path, body ? `${body}
1402
1629
  ` : "", "utf8");
@@ -1407,7 +1634,7 @@ function rewriteSession(name, messages) {
1407
1634
  }
1408
1635
  function countLines(path) {
1409
1636
  try {
1410
- const raw = readFileSync3(path, "utf8");
1637
+ const raw = readFileSync4(path, "utf8");
1411
1638
  return raw.split(/\r?\n/).filter((l) => l.trim()).length;
1412
1639
  } catch {
1413
1640
  return 0;
@@ -1589,20 +1816,26 @@ var CacheFirstLoop = class {
1589
1816
  }
1590
1817
  /**
1591
1818
  * Shrink the log by re-truncating oversized tool results to a tighter
1592
- * cap, and persist the result back to disk so the next launch doesn't
1593
- * re-inherit a fat session file. Returns a summary the TUI can
1594
- * display.
1819
+ * token cap, and persist the result back to disk so the next launch
1820
+ * doesn't re-inherit a fat session file. Returns a summary the TUI
1821
+ * can display.
1822
+ *
1823
+ * The cap is in DeepSeek V3 tokens (not chars) — so CJK text gets
1824
+ * capped at the same effective context footprint as English instead
1825
+ * of slipping past a char cap at 2× the token cost. Default 4000
1826
+ * tokens, matching the token-aware dispatch cap from 0.5.2.
1595
1827
  *
1596
1828
  * Only tool-role messages are touched (same rationale as
1597
1829
  * {@link healLoadedMessages}). User and assistant messages carry
1598
1830
  * authored intent we can't mechanically shrink without losing
1599
1831
  * meaning.
1600
1832
  */
1601
- compact(tightCapChars = 4e3) {
1833
+ compact(maxTokens = 4e3) {
1602
1834
  const before = this.log.toMessages();
1603
- const { messages, healedCount, healedFrom } = shrinkOversizedToolResults(before, tightCapChars);
1604
- const afterBytes = messages.filter((m) => m.role === "tool").reduce((s, m) => s + (typeof m.content === "string" ? m.content.length : 0), 0);
1605
- const charsSaved = healedFrom - afterBytes;
1835
+ const { messages, healedCount, tokensSaved, charsSaved } = shrinkOversizedToolResultsByTokens(
1836
+ before,
1837
+ maxTokens
1838
+ );
1606
1839
  if (healedCount > 0) {
1607
1840
  this.log.compactInPlace(messages);
1608
1841
  if (this.sessionName) {
@@ -1612,7 +1845,7 @@ var CacheFirstLoop = class {
1612
1845
  }
1613
1846
  }
1614
1847
  }
1615
- return { healedCount, charsSaved };
1848
+ return { healedCount, tokensSaved, charsSaved };
1616
1849
  }
1617
1850
  appendAndPersist(message) {
1618
1851
  this.log.append(message);
@@ -1976,30 +2209,28 @@ var CacheFirstLoop = class {
1976
2209
  const ratio = usage.promptTokens / ctxMax;
1977
2210
  if (ratio > 0.6 && ratio <= 0.8) {
1978
2211
  const before = usage.promptTokens;
1979
- const soft = this.compact(16e3);
2212
+ const soft = this.compact(4e3);
1980
2213
  if (soft.healedCount > 0) {
1981
- const approxSaved = Math.round(soft.charsSaved / 4);
1982
- const after = Math.max(0, before - approxSaved);
2214
+ const after = Math.max(0, before - soft.tokensSaved);
1983
2215
  yield {
1984
2216
  turn: this._turn,
1985
2217
  role: "warning",
1986
2218
  content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} (${Math.round(
1987
2219
  ratio * 100
1988
- )}%) \u2014 proactively compacted ${soft.healedCount} tool result(s) to 16k, saved ~${approxSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Staying ahead of the 80% guard.`
2220
+ )}%) \u2014 proactively compacted ${soft.healedCount} tool result(s) to 4k tokens, saved ${soft.tokensSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Staying ahead of the 80% guard.`
1989
2221
  };
1990
2222
  }
1991
2223
  }
1992
2224
  }
1993
2225
  if (usage && usage.promptTokens / ctxMax > 0.8) {
1994
2226
  const before = usage.promptTokens;
1995
- const compactResult = this.compact(4e3);
2227
+ const compactResult = this.compact(1e3);
1996
2228
  if (compactResult.healedCount > 0) {
1997
- const approxSaved = Math.round(compactResult.charsSaved / 4);
1998
- const after = before - approxSaved;
2229
+ const after = Math.max(0, before - compactResult.tokensSaved);
1999
2230
  yield {
2000
2231
  turn: this._turn,
2001
2232
  role: "warning",
2002
- content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} \u2014 auto-compacted ${compactResult.healedCount} oversized tool result(s), saved ~${approxSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Continuing.`
2233
+ content: `context ${before.toLocaleString()}/${ctxMax.toLocaleString()} \u2014 auto-compacted ${compactResult.healedCount} oversized tool result(s), saved ${compactResult.tokensSaved.toLocaleString()} tokens (now ~${after.toLocaleString()}). Continuing.`
2003
2234
  };
2004
2235
  } else {
2005
2236
  yield {
@@ -2054,7 +2285,7 @@ ${reason}`;
2054
2285
  } else {
2055
2286
  result = await this.tools.dispatch(name, args, {
2056
2287
  signal,
2057
- maxResultChars: DEFAULT_MAX_RESULT_CHARS
2288
+ maxResultTokens: DEFAULT_MAX_RESULT_TOKENS
2058
2289
  });
2059
2290
  const postReport = await runHooks({
2060
2291
  hooks: this.hooks,
@@ -2200,6 +2431,25 @@ function shrinkOversizedToolResults(messages, maxChars) {
2200
2431
  });
2201
2432
  return { messages: out, healedCount, healedFrom };
2202
2433
  }
2434
+ function shrinkOversizedToolResultsByTokens(messages, maxTokens) {
2435
+ let healedCount = 0;
2436
+ let tokensSaved = 0;
2437
+ let charsSaved = 0;
2438
+ const out = messages.map((msg) => {
2439
+ if (msg.role !== "tool") return msg;
2440
+ const content = typeof msg.content === "string" ? msg.content : "";
2441
+ if (content.length <= maxTokens) return msg;
2442
+ const beforeTokens = countTokens(content);
2443
+ if (beforeTokens <= maxTokens) return msg;
2444
+ const truncated = truncateForModelByTokens(content, maxTokens);
2445
+ const afterTokens = countTokens(truncated);
2446
+ healedCount += 1;
2447
+ tokensSaved += Math.max(0, beforeTokens - afterTokens);
2448
+ charsSaved += Math.max(0, content.length - truncated.length);
2449
+ return { ...msg, content: truncated };
2450
+ });
2451
+ return { messages: out, healedCount, tokensSaved, charsSaved };
2452
+ }
2203
2453
  function healLoadedMessages(messages, maxChars) {
2204
2454
  const shrunk = shrinkOversizedToolResults(messages, maxChars);
2205
2455
  let healedCount = shrunk.healedCount;
@@ -3162,6 +3412,50 @@ function tokenizeCommand(cmd) {
3162
3412
  if (cur.length > 0) out.push(cur);
3163
3413
  return out;
3164
3414
  }
3415
+ function detectShellOperator(cmd) {
3416
+ const opPrefix = /^(?:2>&1|&>|\|{1,2}|&{1,2}|2>{1,2}|>{1,2}|<{1,2})/;
3417
+ let cur = "";
3418
+ let curQuoted = false;
3419
+ let quote = null;
3420
+ const check = () => {
3421
+ if (cur.length === 0 && !curQuoted) return null;
3422
+ if (!curQuoted) {
3423
+ const m = opPrefix.exec(cur);
3424
+ if (m) return m[0] ?? null;
3425
+ }
3426
+ return null;
3427
+ };
3428
+ for (let i = 0; i < cmd.length; i++) {
3429
+ const ch = cmd[i];
3430
+ if (quote) {
3431
+ if (ch === quote) {
3432
+ quote = null;
3433
+ } else if (ch === "\\" && quote === '"' && i + 1 < cmd.length) {
3434
+ cur += cmd[++i];
3435
+ curQuoted = true;
3436
+ } else {
3437
+ cur += ch;
3438
+ curQuoted = true;
3439
+ }
3440
+ continue;
3441
+ }
3442
+ if (ch === '"' || ch === "'") {
3443
+ quote = ch;
3444
+ curQuoted = true;
3445
+ continue;
3446
+ }
3447
+ if (ch === " " || ch === " ") {
3448
+ const op = check();
3449
+ if (op) return op;
3450
+ cur = "";
3451
+ curQuoted = false;
3452
+ continue;
3453
+ }
3454
+ cur += ch;
3455
+ }
3456
+ if (quote) return null;
3457
+ return check();
3458
+ }
3165
3459
  function isAllowed(cmd, extra = []) {
3166
3460
  const normalized = cmd.trim().replace(/\s+/g, " ");
3167
3461
  const allowlist = [...BUILTIN_ALLOWLIST, ...extra];
@@ -3174,6 +3468,12 @@ function isAllowed(cmd, extra = []) {
3174
3468
  async function runCommand(cmd, opts) {
3175
3469
  const argv = tokenizeCommand(cmd);
3176
3470
  if (argv.length === 0) throw new Error("run_command: empty command");
3471
+ const operator = detectShellOperator(cmd);
3472
+ if (operator !== null) {
3473
+ throw new Error(
3474
+ `run_command: shell operator "${operator}" is not supported \u2014 this tool spawns one process, no shell expansion. Split into separate run_command calls and combine the output in your reasoning (e.g. instead of \`grep foo *.ts | wc -l\`, call \`grep -c foo *.ts\` or two separate commands). To pass "${operator}" as a literal argument, wrap it in quotes.`
3475
+ );
3476
+ }
3177
3477
  const timeoutMs = (opts.timeoutSec ?? DEFAULT_TIMEOUT_SEC) * 1e3;
3178
3478
  const maxChars = opts.maxOutputChars ?? DEFAULT_MAX_OUTPUT_CHARS;
3179
3479
  const spawnOpts = {
@@ -3351,7 +3651,7 @@ function registerShellTools(registry, opts) {
3351
3651
  properties: {
3352
3652
  command: {
3353
3653
  type: "string",
3354
- description: "Full command line. Tokenized with POSIX-ish quoting; no shell expansion, no pipes, no redirects."
3654
+ description: 'Full command line. Tokenized with POSIX-ish quoting; no shell expansion. Pipes (`|`), redirects (`>`, `<`, `2>`), and `&&`/`||` chaining are rejected with an error \u2014 split into separate calls instead. To pass an operator character as a literal argument (e.g. a regex), wrap it in quotes: `grep "a|b" file.txt`.'
3355
3655
  },
3356
3656
  timeoutSec: {
3357
3657
  type: "integer",
@@ -3570,12 +3870,12 @@ ${i + 1}. ${r.title}`);
3570
3870
  }
3571
3871
 
3572
3872
  // src/env.ts
3573
- import { readFileSync as readFileSync4 } from "fs";
3873
+ import { readFileSync as readFileSync5 } from "fs";
3574
3874
  import { resolve as resolve3 } from "path";
3575
3875
  function loadDotenv(path = ".env") {
3576
3876
  let raw;
3577
3877
  try {
3578
- raw = readFileSync4(resolve3(process.cwd(), path), "utf8");
3878
+ raw = readFileSync5(resolve3(process.cwd(), path), "utf8");
3579
3879
  } catch {
3580
3880
  return;
3581
3881
  }
@@ -3594,7 +3894,7 @@ function loadDotenv(path = ".env") {
3594
3894
  }
3595
3895
 
3596
3896
  // src/transcript.ts
3597
- import { createWriteStream, readFileSync as readFileSync5 } from "fs";
3897
+ import { createWriteStream, readFileSync as readFileSync6 } from "fs";
3598
3898
  function recordFromLoopEvent(ev, extra) {
3599
3899
  const rec = {
3600
3900
  ts: (/* @__PURE__ */ new Date()).toISOString(),
@@ -3645,7 +3945,7 @@ function openTranscriptFile(path, meta) {
3645
3945
  return stream;
3646
3946
  }
3647
3947
  function readTranscript(path) {
3648
- const raw = readFileSync5(path, "utf8");
3948
+ const raw = readFileSync6(path, "utf8");
3649
3949
  return parseTranscript(raw);
3650
3950
  }
3651
3951
  function isPlanStateEmptyShape(s) {
@@ -4731,8 +5031,8 @@ async function trySection(load) {
4731
5031
  }
4732
5032
 
4733
5033
  // src/code/edit-blocks.ts
4734
- import { existsSync as existsSync4, mkdirSync as mkdirSync3, readFileSync as readFileSync6, unlinkSync as unlinkSync2, writeFileSync as writeFileSync3 } from "fs";
4735
- import { dirname as dirname4, resolve as resolve4 } from "path";
5034
+ import { existsSync as existsSync4, mkdirSync as mkdirSync3, readFileSync as readFileSync7, unlinkSync as unlinkSync2, writeFileSync as writeFileSync3 } from "fs";
5035
+ import { dirname as dirname5, resolve as resolve4 } from "path";
4736
5036
  var BLOCK_RE = /^(\S[^\n]*)\n<{7} SEARCH\n([\s\S]*?)\n?={7}\n([\s\S]*?)\n?>{7} REPLACE/gm;
4737
5037
  function parseEditBlocks(text) {
4738
5038
  const out = [];
@@ -4770,11 +5070,11 @@ function applyEditBlock(block, rootDir) {
4770
5070
  message: "file does not exist; to create it, use an empty SEARCH block"
4771
5071
  };
4772
5072
  }
4773
- mkdirSync3(dirname4(absTarget), { recursive: true });
5073
+ mkdirSync3(dirname5(absTarget), { recursive: true });
4774
5074
  writeFileSync3(absTarget, block.replace, "utf8");
4775
5075
  return { path: block.path, status: "created" };
4776
5076
  }
4777
- const content = readFileSync6(absTarget, "utf8");
5077
+ const content = readFileSync7(absTarget, "utf8");
4778
5078
  if (searchEmpty) {
4779
5079
  return {
4780
5080
  path: block.path,
@@ -4813,7 +5113,7 @@ function snapshotBeforeEdits(blocks, rootDir) {
4813
5113
  continue;
4814
5114
  }
4815
5115
  try {
4816
- snapshots.push({ path: b.path, prevContent: readFileSync6(abs, "utf8") });
5116
+ snapshots.push({ path: b.path, prevContent: readFileSync7(abs, "utf8") });
4817
5117
  } catch {
4818
5118
  snapshots.push({ path: b.path, prevContent: null });
4819
5119
  }
@@ -4856,25 +5156,25 @@ function sep() {
4856
5156
  }
4857
5157
 
4858
5158
  // src/version.ts
4859
- import { existsSync as existsSync5, mkdirSync as mkdirSync4, readFileSync as readFileSync7, writeFileSync as writeFileSync4 } from "fs";
5159
+ import { existsSync as existsSync5, mkdirSync as mkdirSync4, readFileSync as readFileSync8, writeFileSync as writeFileSync4 } from "fs";
4860
5160
  import { homedir as homedir4 } from "os";
4861
- import { dirname as dirname5, join as join5 } from "path";
4862
- import { fileURLToPath } from "url";
5161
+ import { dirname as dirname6, join as join6 } from "path";
5162
+ import { fileURLToPath as fileURLToPath2 } from "url";
4863
5163
  var REGISTRY_URL = "https://registry.npmjs.org/reasonix/latest";
4864
5164
  var LATEST_CACHE_TTL_MS = 24 * 60 * 60 * 1e3;
4865
5165
  var LATEST_FETCH_TIMEOUT_MS = 2e3;
4866
5166
  function readPackageVersion() {
4867
5167
  try {
4868
- let dir = dirname5(fileURLToPath(import.meta.url));
5168
+ let dir = dirname6(fileURLToPath2(import.meta.url));
4869
5169
  for (let i = 0; i < 6; i++) {
4870
- const p = join5(dir, "package.json");
5170
+ const p = join6(dir, "package.json");
4871
5171
  if (existsSync5(p)) {
4872
- const pkg = JSON.parse(readFileSync7(p, "utf8"));
5172
+ const pkg = JSON.parse(readFileSync8(p, "utf8"));
4873
5173
  if (pkg?.name === "reasonix" && typeof pkg.version === "string") {
4874
5174
  return pkg.version;
4875
5175
  }
4876
5176
  }
4877
- const parent = dirname5(dir);
5177
+ const parent = dirname6(dir);
4878
5178
  if (parent === dir) break;
4879
5179
  dir = parent;
4880
5180
  }
@@ -4884,11 +5184,11 @@ function readPackageVersion() {
4884
5184
  }
4885
5185
  var VERSION = readPackageVersion();
4886
5186
  function cachePath(homeDirOverride) {
4887
- return join5(homeDirOverride ?? homedir4(), ".reasonix", "version-cache.json");
5187
+ return join6(homeDirOverride ?? homedir4(), ".reasonix", "version-cache.json");
4888
5188
  }
4889
5189
  function readCache(homeDirOverride) {
4890
5190
  try {
4891
- const raw = readFileSync7(cachePath(homeDirOverride), "utf8");
5191
+ const raw = readFileSync8(cachePath(homeDirOverride), "utf8");
4892
5192
  const parsed = JSON.parse(raw);
4893
5193
  if (parsed && typeof parsed.version === "string" && typeof parsed.checkedAt === "number") {
4894
5194
  return parsed;
@@ -4900,7 +5200,7 @@ function readCache(homeDirOverride) {
4900
5200
  function writeCache(entry, homeDirOverride) {
4901
5201
  try {
4902
5202
  const p = cachePath(homeDirOverride);
4903
- mkdirSync4(dirname5(p), { recursive: true });
5203
+ mkdirSync4(dirname6(p), { recursive: true });
4904
5204
  writeFileSync4(p, JSON.stringify(entry), "utf8");
4905
5205
  } catch {
4906
5206
  }
@@ -4957,11 +5257,11 @@ function isNpxInstall() {
4957
5257
  }
4958
5258
 
4959
5259
  // src/usage.ts
4960
- import { appendFileSync as appendFileSync2, existsSync as existsSync6, mkdirSync as mkdirSync5, readFileSync as readFileSync8, statSync as statSync3 } from "fs";
5260
+ import { appendFileSync as appendFileSync2, existsSync as existsSync6, mkdirSync as mkdirSync5, readFileSync as readFileSync9, statSync as statSync3 } from "fs";
4961
5261
  import { homedir as homedir5 } from "os";
4962
- import { dirname as dirname6, join as join6 } from "path";
5262
+ import { dirname as dirname7, join as join7 } from "path";
4963
5263
  function defaultUsageLogPath(homeDirOverride) {
4964
- return join6(homeDirOverride ?? homedir5(), ".reasonix", "usage.jsonl");
5264
+ return join7(homeDirOverride ?? homedir5(), ".reasonix", "usage.jsonl");
4965
5265
  }
4966
5266
  function appendUsage(input) {
4967
5267
  const record = {
@@ -4977,7 +5277,7 @@ function appendUsage(input) {
4977
5277
  };
4978
5278
  const path = input.path ?? defaultUsageLogPath();
4979
5279
  try {
4980
- mkdirSync5(dirname6(path), { recursive: true });
5280
+ mkdirSync5(dirname7(path), { recursive: true });
4981
5281
  appendFileSync2(path, `${JSON.stringify(record)}
4982
5282
  `, "utf8");
4983
5283
  } catch {
@@ -4988,7 +5288,7 @@ function readUsageLog(path = defaultUsageLogPath()) {
4988
5288
  if (!existsSync6(path)) return [];
4989
5289
  let raw;
4990
5290
  try {
4991
- raw = readFileSync8(path, "utf8");
5291
+ raw = readFileSync9(path, "utf8");
4992
5292
  } catch {
4993
5293
  return [];
4994
5294
  }
@@ -5181,8 +5481,8 @@ function PlanStateBlock({ planState }) {
5181
5481
  }
5182
5482
 
5183
5483
  // src/cli/ui/markdown.tsx
5184
- import { readFileSync as readFileSync9, statSync as statSync4 } from "fs";
5185
- import { isAbsolute as isAbsolute3, join as join7 } from "path";
5484
+ import { readFileSync as readFileSync10, statSync as statSync4 } from "fs";
5485
+ import { isAbsolute as isAbsolute3, join as join8 } from "path";
5186
5486
  import { Box as Box2, Text as Text2 } from "ink";
5187
5487
  import React2 from "react";
5188
5488
  var SUPERSCRIPT = {
@@ -5260,7 +5560,7 @@ function parseCitationUrl(url) {
5260
5560
  function validateCitation(url, projectRoot) {
5261
5561
  const parts = parseCitationUrl(url);
5262
5562
  if (!parts || !parts.path) return { ok: false, reason: "empty path" };
5263
- const fullPath = isAbsolute3(parts.path) ? parts.path : join7(projectRoot, parts.path);
5563
+ const fullPath = isAbsolute3(parts.path) ? parts.path : join8(projectRoot, parts.path);
5264
5564
  let stat;
5265
5565
  try {
5266
5566
  stat = statSync4(fullPath);
@@ -5271,7 +5571,7 @@ function validateCitation(url, projectRoot) {
5271
5571
  if (parts.startLine === void 0) return { ok: true };
5272
5572
  let lineCount;
5273
5573
  try {
5274
- lineCount = readFileSync9(fullPath, "utf8").split("\n").length;
5574
+ lineCount = readFileSync10(fullPath, "utf8").split("\n").length;
5275
5575
  } catch {
5276
5576
  return { ok: false, reason: "unreadable" };
5277
5577
  }
@@ -6450,170 +6750,6 @@ function formatTokens(n) {
6450
6750
  // src/cli/ui/slash.ts
6451
6751
  import { spawnSync } from "child_process";
6452
6752
 
6453
- // src/tokenizer.ts
6454
- import { readFileSync as readFileSync10 } from "fs";
6455
- import { createRequire } from "module";
6456
- import { dirname as dirname7, join as join8 } from "path";
6457
- import { fileURLToPath as fileURLToPath2 } from "url";
6458
- import { gunzipSync } from "zlib";
6459
- function buildByteToChar() {
6460
- const result = new Array(256);
6461
- const bs = [];
6462
- for (let b = 33; b <= 126; b++) bs.push(b);
6463
- for (let b = 161; b <= 172; b++) bs.push(b);
6464
- for (let b = 174; b <= 255; b++) bs.push(b);
6465
- const cs = bs.slice();
6466
- let n = 0;
6467
- for (let b = 0; b < 256; b++) {
6468
- if (!bs.includes(b)) {
6469
- bs.push(b);
6470
- cs.push(256 + n);
6471
- n++;
6472
- }
6473
- }
6474
- for (let i = 0; i < bs.length; i++) {
6475
- result[bs[i]] = String.fromCodePoint(cs[i]);
6476
- }
6477
- return result;
6478
- }
6479
- var cached = null;
6480
- function resolveDataPath() {
6481
- if (process.env.REASONIX_TOKENIZER_PATH) return process.env.REASONIX_TOKENIZER_PATH;
6482
- try {
6483
- const here = dirname7(fileURLToPath2(import.meta.url));
6484
- return join8(here, "..", "data", "deepseek-tokenizer.json.gz");
6485
- } catch {
6486
- const req = createRequire(import.meta.url);
6487
- return join8(
6488
- dirname7(req.resolve("reasonix/package.json")),
6489
- "data",
6490
- "deepseek-tokenizer.json.gz"
6491
- );
6492
- }
6493
- }
6494
- function loadTokenizer() {
6495
- if (cached) return cached;
6496
- const buf = readFileSync10(resolveDataPath());
6497
- const json = gunzipSync(buf).toString("utf8");
6498
- const data = JSON.parse(json);
6499
- const mergeRank = /* @__PURE__ */ new Map();
6500
- for (let i = 0; i < data.model.merges.length; i++) {
6501
- mergeRank.set(data.model.merges[i], i);
6502
- }
6503
- const splitRegexes = [];
6504
- for (const p of data.pre_tokenizer.pretokenizers) {
6505
- if (p.type === "Split") {
6506
- splitRegexes.push(new RegExp(p.pattern.Regex, "gu"));
6507
- }
6508
- }
6509
- const addedMap = /* @__PURE__ */ new Map();
6510
- const addedContents = [];
6511
- for (const t of data.added_tokens) {
6512
- if (!t.special) {
6513
- addedMap.set(t.content, t.id);
6514
- addedContents.push(t.content);
6515
- }
6516
- }
6517
- addedContents.sort((a, b) => b.length - a.length);
6518
- const addedPattern = addedContents.length ? new RegExp(addedContents.map(escapeRegex).join("|"), "g") : null;
6519
- cached = {
6520
- vocab: data.model.vocab,
6521
- mergeRank,
6522
- splitRegexes,
6523
- byteToChar: buildByteToChar(),
6524
- addedPattern,
6525
- addedMap
6526
- };
6527
- return cached;
6528
- }
6529
- function escapeRegex(s) {
6530
- return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
6531
- }
6532
- function applySplit(chunks, re) {
6533
- const out = [];
6534
- for (const chunk of chunks) {
6535
- if (!chunk) continue;
6536
- re.lastIndex = 0;
6537
- let last = 0;
6538
- for (const m of chunk.matchAll(re)) {
6539
- const idx = m.index ?? 0;
6540
- if (idx > last) out.push(chunk.slice(last, idx));
6541
- if (m[0].length > 0) out.push(m[0]);
6542
- last = idx + m[0].length;
6543
- }
6544
- if (last < chunk.length) out.push(chunk.slice(last));
6545
- }
6546
- return out;
6547
- }
6548
- function byteLevelEncode(s, byteToChar) {
6549
- const bytes = new TextEncoder().encode(s);
6550
- let out = "";
6551
- for (let i = 0; i < bytes.length; i++) out += byteToChar[bytes[i]];
6552
- return out;
6553
- }
6554
- function bpeEncode(piece, mergeRank) {
6555
- if (piece.length <= 1) return piece ? [piece] : [];
6556
- let word = Array.from(piece);
6557
- while (true) {
6558
- let bestIdx = -1;
6559
- let bestRank = Number.POSITIVE_INFINITY;
6560
- for (let i = 0; i < word.length - 1; i++) {
6561
- const pair = `${word[i]} ${word[i + 1]}`;
6562
- const rank = mergeRank.get(pair);
6563
- if (rank !== void 0 && rank < bestRank) {
6564
- bestRank = rank;
6565
- bestIdx = i;
6566
- if (rank === 0) break;
6567
- }
6568
- }
6569
- if (bestIdx < 0) break;
6570
- word = [
6571
- ...word.slice(0, bestIdx),
6572
- word[bestIdx] + word[bestIdx + 1],
6573
- ...word.slice(bestIdx + 2)
6574
- ];
6575
- if (word.length === 1) break;
6576
- }
6577
- return word;
6578
- }
6579
- function encode(text) {
6580
- if (!text) return [];
6581
- const t = loadTokenizer();
6582
- const ids = [];
6583
- const process2 = (segment) => {
6584
- if (!segment) return;
6585
- let chunks = [segment];
6586
- for (const re of t.splitRegexes) chunks = applySplit(chunks, re);
6587
- for (const chunk of chunks) {
6588
- if (!chunk) continue;
6589
- const byteLevel = byteLevelEncode(chunk, t.byteToChar);
6590
- const pieces = bpeEncode(byteLevel, t.mergeRank);
6591
- for (const p of pieces) {
6592
- const id = t.vocab[p];
6593
- if (id !== void 0) ids.push(id);
6594
- }
6595
- }
6596
- };
6597
- if (t.addedPattern) {
6598
- t.addedPattern.lastIndex = 0;
6599
- let last = 0;
6600
- for (const m of text.matchAll(t.addedPattern)) {
6601
- const idx = m.index ?? 0;
6602
- if (idx > last) process2(text.slice(last, idx));
6603
- const id = t.addedMap.get(m[0]);
6604
- if (id !== void 0) ids.push(id);
6605
- last = idx + m[0].length;
6606
- }
6607
- if (last < text.length) process2(text.slice(last));
6608
- } else {
6609
- process2(text);
6610
- }
6611
- return ids;
6612
- }
6613
- function countTokens(text) {
6614
- return encode(text).length;
6615
- }
6616
-
6617
6753
  // src/cli/commands/stats.ts
6618
6754
  import { existsSync as existsSync7, readFileSync as readFileSync11 } from "fs";
6619
6755
  function statsCommand(opts) {
@@ -6764,7 +6900,11 @@ var SLASH_COMMANDS = [
6764
6900
  summary: "break down where context tokens are going: system / tools / per-turn log"
6765
6901
  },
6766
6902
  { cmd: "retry", summary: "truncate & resend your last message (fresh sample)" },
6767
- { cmd: "compact", argsHint: "[cap]", summary: "shrink oversized tool results in the log" },
6903
+ {
6904
+ cmd: "compact",
6905
+ argsHint: "[tokens]",
6906
+ summary: "shrink oversized tool results in the log (cap in tokens, default 4000)"
6907
+ },
6768
6908
  { cmd: "sessions", summary: "list saved sessions (current marked with \u25B8)" },
6769
6909
  { cmd: "forget", summary: "delete the current session from disk" },
6770
6910
  { cmd: "setup", summary: "reminds you to exit and run `reasonix setup`" },
@@ -6838,7 +6978,7 @@ function handleSlash(cmd, args, loop, ctx = {}) {
6838
6978
  " /branch <N|off> run N parallel samples (N>=2), pick most confident",
6839
6979
  " /mcp list MCP servers + tools attached to this session",
6840
6980
  " /setup (exit + reconfigure) \u2192 run `reasonix setup`",
6841
- " /compact [cap] shrink large tool results in history (default 4k/result)",
6981
+ " /compact [tokens] shrink large tool results in history (default 4000 tokens/result)",
6842
6982
  " /think dump the most recent turn's full R1 reasoning (reasoner only)",
6843
6983
  " /tool [N] list tool calls (or dump full output of #N, 1=most recent)",
6844
6984
  " /memory [sub] show pinned memory (REASONIX.md + ~/.reasonix/memory).",
@@ -7066,15 +7206,15 @@ ${entry.text}`
7066
7206
  }
7067
7207
  case "compact": {
7068
7208
  const tight = Number.parseInt(args[0] ?? "", 10);
7069
- const cap = Number.isFinite(tight) && tight >= 500 ? tight : 4e3;
7070
- const { healedCount, charsSaved } = loop.compact(cap);
7209
+ const cap = Number.isFinite(tight) && tight >= 100 ? tight : 4e3;
7210
+ const { healedCount, tokensSaved, charsSaved } = loop.compact(cap);
7071
7211
  if (healedCount === 0) {
7072
7212
  return {
7073
- info: `\u25B8 nothing to compact \u2014 no tool result in history exceeds ${cap.toLocaleString()} chars.`
7213
+ info: `\u25B8 nothing to compact \u2014 no tool result in history exceeds ${cap.toLocaleString()} tokens.`
7074
7214
  };
7075
7215
  }
7076
7216
  return {
7077
- info: `\u25B8 compacted ${healedCount} tool result(s), saved ${charsSaved.toLocaleString()} chars (~${Math.round(charsSaved / 4).toLocaleString()} tokens). Session file rewritten.`
7217
+ info: `\u25B8 compacted ${healedCount} tool result(s) to ${cap.toLocaleString()} tokens each, saved ${tokensSaved.toLocaleString()} tokens (${charsSaved.toLocaleString()} chars). Session file rewritten.`
7078
7218
  };
7079
7219
  }
7080
7220
  case "sessions": {