context-compress 2026.3.13 → 2026.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10861,7 +10861,9 @@ var DEFAULTS = {
10861
10861
  searchWindowMs: 6e4,
10862
10862
  searchReduceAfter: 3,
10863
10863
  searchBlockAfter: 8,
10864
- compressionLevel: "normal"
10864
+ compressionLevel: "normal",
10865
+ persistDb: false,
10866
+ dbDir: null
10865
10867
  };
10866
10868
  var LEVEL_OVERRIDES = {
10867
10869
  normal: {},
@@ -10896,7 +10898,9 @@ var ConfigSchema = external_exports.object({
10896
10898
  searchWindowMs: external_exports.number().int().positive().optional(),
10897
10899
  searchReduceAfter: external_exports.number().int().nonnegative().optional(),
10898
10900
  searchBlockAfter: external_exports.number().int().positive().optional(),
10899
- compressionLevel: external_exports.enum(["normal", "compact", "ultra"]).optional()
10901
+ compressionLevel: external_exports.enum(["normal", "compact", "ultra"]).optional(),
10902
+ persistDb: external_exports.boolean().optional(),
10903
+ dbDir: external_exports.string().nullable().optional()
10900
10904
  });
10901
10905
  function parseIntEnv(key) {
10902
10906
  const val = process.env[key];
@@ -10965,6 +10969,12 @@ function loadEnvConfig() {
10965
10969
  if (level === "normal" || level === "compact" || level === "ultra") {
10966
10970
  partial2.compressionLevel = level;
10967
10971
  }
10972
+ if (process.env.CONTEXT_COMPRESS_PERSIST_DB === "1") {
10973
+ partial2.persistDb = true;
10974
+ }
10975
+ if (process.env.CONTEXT_COMPRESS_DB_DIR) {
10976
+ partial2.dbDir = process.env.CONTEXT_COMPRESS_DB_DIR;
10977
+ }
10968
10978
  return partial2;
10969
10979
  }
10970
10980
  var _config = null;
@@ -10980,6 +10990,60 @@ function loadConfig(projectDir2) {
10980
10990
  merged[k] = value;
10981
10991
  }
10982
10992
  }
10993
+ if (merged.maxOutputBytes < 1024) {
10994
+ console.error(
10995
+ `[context-compress] Config: maxOutputBytes clamped from ${merged.maxOutputBytes} to 1024`
10996
+ );
10997
+ merged.maxOutputBytes = 1024;
10998
+ }
10999
+ if (merged.hardCapBytes < merged.maxOutputBytes) {
11000
+ console.error(
11001
+ `[context-compress] Config: hardCapBytes clamped from ${merged.hardCapBytes} to ${merged.maxOutputBytes}`
11002
+ );
11003
+ merged.hardCapBytes = merged.maxOutputBytes;
11004
+ }
11005
+ if (merged.intentSearchThreshold < 0) {
11006
+ console.error(
11007
+ `[context-compress] Config: intentSearchThreshold clamped from ${merged.intentSearchThreshold} to 0`
11008
+ );
11009
+ merged.intentSearchThreshold = 0;
11010
+ }
11011
+ if (merged.searchLimit < 1) {
11012
+ console.error(`[context-compress] Config: searchLimit clamped from ${merged.searchLimit} to 1`);
11013
+ merged.searchLimit = 1;
11014
+ }
11015
+ if (merged.searchWindowMs < 1e3) {
11016
+ console.error(
11017
+ `[context-compress] Config: searchWindowMs clamped from ${merged.searchWindowMs} to 1000`
11018
+ );
11019
+ merged.searchWindowMs = 1e3;
11020
+ }
11021
+ if (merged.searchReduceAfter < 1) {
11022
+ console.error(
11023
+ `[context-compress] Config: searchReduceAfter clamped from ${merged.searchReduceAfter} to 1`
11024
+ );
11025
+ merged.searchReduceAfter = 1;
11026
+ }
11027
+ if (merged.searchBlockAfter < merged.searchReduceAfter + 1) {
11028
+ const minVal = merged.searchReduceAfter + 1;
11029
+ console.error(
11030
+ `[context-compress] Config: searchBlockAfter clamped from ${merged.searchBlockAfter} to ${minVal}`
11031
+ );
11032
+ merged.searchBlockAfter = minVal;
11033
+ }
11034
+ if (merged.searchMaxBytes < 1024) {
11035
+ console.error(
11036
+ `[context-compress] Config: searchMaxBytes clamped from ${merged.searchMaxBytes} to 1024`
11037
+ );
11038
+ merged.searchMaxBytes = 1024;
11039
+ }
11040
+ if (merged.batchMaxBytes < 1024) {
11041
+ console.error(
11042
+ `[context-compress] Config: batchMaxBytes clamped from ${merged.batchMaxBytes} to 1024`
11043
+ );
11044
+ merged.batchMaxBytes = 1024;
11045
+ }
11046
+ if (merged.dbDir) merged.persistDb = true;
10983
11047
  _config = merged;
10984
11048
  return _config;
10985
11049
  }
@@ -10997,7 +11061,7 @@ function debug(...args) {
10997
11061
  }
10998
11062
 
10999
11063
  // src/server.ts
11000
- import { readFileSync as readFileSync2, statSync } from "node:fs";
11064
+ import { readFileSync as readFileSync2, realpathSync, statSync } from "node:fs";
11001
11065
  import { dirname, join as join4, resolve } from "node:path";
11002
11066
  import { fileURLToPath } from "node:url";
11003
11067
 
@@ -21152,6 +21216,54 @@ import { execFileSync, spawn } from "node:child_process";
21152
21216
  import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
21153
21217
  import { tmpdir } from "node:os";
21154
21218
  import { join as join2 } from "node:path";
21219
+
21220
+ // src/utils.ts
21221
+ function detectInjectionPatterns(content) {
21222
+ const warnings = [];
21223
+ const patterns = [
21224
+ { re: /ignore\s+(all\s+)?previous\s+instructions/i, label: "instruction override" },
21225
+ { re: /you\s+are\s+now\s+/i, label: "role reassignment" },
21226
+ {
21227
+ re: /(?:^|\n)\s*system\s*:\s*(?:you are|you're|as an? )/im,
21228
+ label: "system prompt injection"
21229
+ },
21230
+ { re: /\[INST\]|\[\/INST\]|<\|im_start\|>|<\|im_end\|>/i, label: "chat template injection" },
21231
+ { re: /\n\n(?:Human|Assistant):/m, label: "chat delimiter injection" },
21232
+ { re: /reveal\s+(your|the)\s+(system|secret|confidential)/i, label: "data exfiltration" },
21233
+ { re: /act\s+as\s+(if\s+you\s+are|a)\s+/i, label: "role manipulation" }
21234
+ ];
21235
+ for (const { re, label } of patterns) {
21236
+ if (re.test(content)) {
21237
+ warnings.push(label);
21238
+ }
21239
+ }
21240
+ return warnings;
21241
+ }
21242
+ async function limitConcurrency(tasks, limit) {
21243
+ const results = new Array(tasks.length);
21244
+ let nextIndex = 0;
21245
+ async function runNext() {
21246
+ while (nextIndex < tasks.length) {
21247
+ const index = nextIndex++;
21248
+ try {
21249
+ const value = await tasks[index]();
21250
+ results[index] = { status: "fulfilled", value };
21251
+ } catch (reason) {
21252
+ results[index] = { status: "rejected", reason };
21253
+ }
21254
+ }
21255
+ }
21256
+ const workers = Array.from({ length: Math.min(limit, tasks.length) }, () => runNext());
21257
+ await Promise.all(workers);
21258
+ return results;
21259
+ }
21260
+ function formatBytes(bytes) {
21261
+ if (bytes < 1024) return `${bytes}B`;
21262
+ if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)}KB`;
21263
+ return `${(bytes / (1024 * 1024)).toFixed(1)}MB`;
21264
+ }
21265
+
21266
+ // src/executor.ts
21155
21267
  var DEFAULT_TIMEOUT = 3e4;
21156
21268
  var SAFE_ENV_KEYS = [
21157
21269
  "PATH",
@@ -21316,20 +21428,26 @@ function smartTruncate(output, maxBytes) {
21316
21428
  `;
21317
21429
  return headLines.join("\n") + separator + tailLines.join("\n");
21318
21430
  }
21319
- function formatBytes(bytes) {
21320
- if (bytes < 1024) return `${bytes}B`;
21321
- if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)}KB`;
21322
- return `${(bytes / (1024 * 1024)).toFixed(1)}MB`;
21323
- }
21324
21431
  var SubprocessExecutor = class {
21325
21432
  runtimes;
21326
21433
  config;
21327
21434
  env;
21435
+ activeProcesses = /* @__PURE__ */ new Set();
21328
21436
  constructor(runtimes, config3) {
21329
21437
  this.runtimes = runtimes;
21330
21438
  this.config = config3;
21331
21439
  this.env = buildEnv(config3);
21332
21440
  }
21441
+ /** Kill all active child processes and their process trees. */
21442
+ shutdown() {
21443
+ for (const proc of this.activeProcesses) {
21444
+ try {
21445
+ if (proc.pid) killProcessTree(proc.pid);
21446
+ } catch {
21447
+ }
21448
+ }
21449
+ this.activeProcesses.clear();
21450
+ }
21333
21451
  /**
21334
21452
  * Execute code in a subprocess.
21335
21453
  */
@@ -21431,6 +21549,7 @@ var SubprocessExecutor = class {
21431
21549
  shell: useShell,
21432
21550
  detached: process.platform !== "win32"
21433
21551
  });
21552
+ this.activeProcesses.add(proc);
21434
21553
  proc.stdout?.on("data", (chunk) => {
21435
21554
  totalBytes += chunk.length;
21436
21555
  if (totalBytes > hardCap) {
@@ -21451,6 +21570,7 @@ var SubprocessExecutor = class {
21451
21570
  });
21452
21571
  proc.on("error", (err) => {
21453
21572
  debug("Process error:", err.message);
21573
+ this.activeProcesses.delete(proc);
21454
21574
  if (!resolved) {
21455
21575
  resolved = true;
21456
21576
  resolve2({
@@ -21463,6 +21583,7 @@ var SubprocessExecutor = class {
21463
21583
  }
21464
21584
  });
21465
21585
  proc.on("close", (code) => {
21586
+ this.activeProcesses.delete(proc);
21466
21587
  if (resolved) return;
21467
21588
  resolved = true;
21468
21589
  let stdout = Buffer.concat(stdoutChunks).toString("utf-8");
@@ -21476,8 +21597,10 @@ var SubprocessExecutor = class {
21476
21597
  stdout += `
21477
21598
  [output capped at ${formatBytes(hardCap)} \u2014 process killed]`;
21478
21599
  }
21479
- stdout = deduplicateLines(stdout);
21480
- stdout = groupErrorLines(stdout);
21600
+ if (stdout.length > 1e4) {
21601
+ stdout = deduplicateLines(stdout);
21602
+ stdout = groupErrorLines(stdout);
21603
+ }
21481
21604
  const truncated = Buffer.byteLength(stdout) > maxOutput;
21482
21605
  if (truncated) {
21483
21606
  stdout = smartTruncate(stdout, maxOutput);
@@ -21516,10 +21639,12 @@ __cm_main().then(()=>{${epilogue}}).catch(e=>{console.error(e);${epilogue}proces
21516
21639
  }
21517
21640
 
21518
21641
  // src/network.ts
21642
+ import dns from "node:dns";
21519
21643
  function isPrivateHost(hostname2) {
21520
21644
  const h = hostname2.startsWith("[") && hostname2.endsWith("]") ? hostname2.slice(1, -1) : hostname2;
21521
21645
  const lower = h.toLowerCase();
21522
21646
  if (lower === "localhost" || lower === "0.0.0.0") return true;
21647
+ if (/^0\./.test(h)) return true;
21523
21648
  if (/^127\./.test(h)) return true;
21524
21649
  if (/^10\./.test(h)) return true;
21525
21650
  if (/^172\.(1[6-9]|2\d|3[01])\./.test(h)) return true;
@@ -21527,12 +21652,50 @@ function isPrivateHost(hostname2) {
21527
21652
  if (/^169\.254\./.test(h)) return true;
21528
21653
  if (/^100\.(6[4-9]|[7-9]\d|1[01]\d|12[0-7])\./.test(h)) return true;
21529
21654
  if (lower === "::1") return true;
21655
+ if (lower === "::" || lower === "0:0:0:0:0:0:0:0") return true;
21530
21656
  const mappedMatch = lower.match(/^::ffff:(\d+\.\d+\.\d+\.\d+)$/);
21531
21657
  if (mappedMatch) return isPrivateHost(mappedMatch[1]);
21532
21658
  if (/^fe[89ab]/i.test(h)) return true;
21533
21659
  if (/^f[cd]/i.test(h)) return true;
21534
21660
  return false;
21535
21661
  }
21662
+ async function resolveAndValidate(url) {
21663
+ const parsed = new URL(url);
21664
+ const hostname2 = parsed.hostname;
21665
+ if (/^\d+\.\d+\.\d+\.\d+$/.test(hostname2) || hostname2.includes(":")) {
21666
+ if (isPrivateHost(hostname2)) {
21667
+ throw new Error(`Blocked: resolved IP ${hostname2} is a private/internal address`);
21668
+ }
21669
+ return { url, resolvedIp: null };
21670
+ }
21671
+ let resolvedIp = null;
21672
+ let v4Error = false;
21673
+ let v6Error = false;
21674
+ const [v4Result, v6Result] = await Promise.allSettled([
21675
+ dns.promises.lookup(hostname2, { family: 4 }),
21676
+ dns.promises.lookup(hostname2, { family: 6 })
21677
+ ]);
21678
+ if (v4Result.status === "fulfilled") {
21679
+ if (isPrivateHost(v4Result.value.address)) {
21680
+ throw new Error(`Blocked: ${hostname2} resolved to private IP ${v4Result.value.address}`);
21681
+ }
21682
+ resolvedIp = v4Result.value.address;
21683
+ } else {
21684
+ v4Error = true;
21685
+ }
21686
+ if (v6Result.status === "fulfilled") {
21687
+ if (isPrivateHost(v6Result.value.address)) {
21688
+ throw new Error(`Blocked: ${hostname2} resolved to private IPv6 ${v6Result.value.address}`);
21689
+ }
21690
+ if (!resolvedIp) resolvedIp = v6Result.value.address;
21691
+ } else {
21692
+ v6Error = true;
21693
+ }
21694
+ if (v4Error && v6Error) {
21695
+ throw new Error(`DNS resolution failed for ${hostname2}: unable to verify host safety`);
21696
+ }
21697
+ return { url, resolvedIp };
21698
+ }
21536
21699
 
21537
21700
  // src/runtime/index.ts
21538
21701
  import { exec } from "node:child_process";
@@ -21857,8 +22020,9 @@ function asciiBar(ratio, width = BAR_WIDTH) {
21857
22020
  return `[${"\u2588".repeat(filled)}${"\u2591".repeat(empty)}] ${(ratio * 100).toFixed(0)}%`;
21858
22021
  }
21859
22022
  function tokenCost(tokens) {
21860
- const cost = tokens / 1e6 * 3;
21861
- return cost >= 0.01 ? `~$${cost.toFixed(2)}` : "<$0.01";
22023
+ const sonnetCost = tokens / 1e6 * 3;
22024
+ if (sonnetCost < 0.01) return "<$0.01";
22025
+ return `~$${sonnetCost.toFixed(2)} (Sonnet)`;
21862
22026
  }
21863
22027
  var SessionTracker = class {
21864
22028
  stats = {
@@ -21890,10 +22054,14 @@ var SessionTracker = class {
21890
22054
  const totalReturned = Object.values(snap.bytesReturned).reduce((a, b) => a + b, 0);
21891
22055
  const keptOut = snap.bytesIndexed + snap.bytesSandboxed;
21892
22056
  const totalProcessed = keptOut + totalReturned;
21893
- const savingsRatio = totalReturned > 0 ? totalProcessed / totalReturned : 1;
22057
+ const savingsRatio = totalReturned > 0 ? totalProcessed / totalReturned : keptOut > 0 ? Number.POSITIVE_INFINITY : 1;
21894
22058
  const reductionPct = totalProcessed > 0 ? ((1 - totalReturned / totalProcessed) * 100).toFixed(1) : "0.0";
21895
- const estTokens = Math.round(totalReturned / 4);
21896
- const estTokensAvoided = Math.round(keptOut / 4);
22059
+ const estTokensLo = Math.round(totalReturned / 5);
22060
+ const estTokensHi = Math.round(totalReturned / 3);
22061
+ const estTokensAvoidedLo = Math.round(keptOut / 5);
22062
+ const estTokensAvoidedHi = Math.round(keptOut / 3);
22063
+ const estTokensMid = Math.round(totalReturned / 4);
22064
+ const estTokensAvoidedMid = Math.round(keptOut / 4);
21897
22065
  const lines = [];
21898
22066
  lines.push("## Session Statistics\n");
21899
22067
  lines.push("| Metric | Value |");
@@ -21903,13 +22071,14 @@ var SessionTracker = class {
21903
22071
  lines.push(`| Total data processed | ${formatBytes(totalProcessed)} |`);
21904
22072
  lines.push(`| Kept in sandbox | ${formatBytes(keptOut)} |`);
21905
22073
  lines.push(`| Context consumed | ${formatBytes(totalReturned)} |`);
21906
- lines.push(`| Est. tokens used | ~${estTokens.toLocaleString()} (${tokenCost(estTokens)}) |`);
21907
22074
  lines.push(
21908
- `| Est. tokens saved | ~${estTokensAvoided.toLocaleString()} (${tokenCost(estTokensAvoided)}) |`
22075
+ `| Est. tokens used | ~${estTokensLo.toLocaleString()}-${estTokensHi.toLocaleString()} tokens (${tokenCost(estTokensMid)}) |`
21909
22076
  );
21910
22077
  lines.push(
21911
- `| **Savings ratio** | **${savingsRatio.toFixed(1)}x** (${reductionPct}% reduction) |`
22078
+ `| Est. tokens saved | ~${estTokensAvoidedLo.toLocaleString()}-${estTokensAvoidedHi.toLocaleString()} tokens (${tokenCost(estTokensAvoidedMid)}) |`
21912
22079
  );
22080
+ const savingsLabel = Number.isFinite(savingsRatio) ? `${savingsRatio.toFixed(1)}x` : "\u221E";
22081
+ lines.push(`| **Savings ratio** | **${savingsLabel}** (${reductionPct}% reduction) |`);
21913
22082
  if (totalProcessed > 0) {
21914
22083
  const savingsBar = asciiBar(keptOut / totalProcessed);
21915
22084
  lines.push(`
@@ -21923,11 +22092,12 @@ var SessionTracker = class {
21923
22092
  const maxBytes = Math.max(...Object.values(snap.bytesReturned));
21924
22093
  for (const [name, calls] of Object.entries(snap.calls)) {
21925
22094
  const bytes = snap.bytesReturned[name] ?? 0;
21926
- const tokens = Math.round(bytes / 4);
22095
+ const tokLo = Math.round(bytes / 5);
22096
+ const tokHi = Math.round(bytes / 3);
21927
22097
  const barRatio = maxBytes > 0 ? bytes / maxBytes : 0;
21928
22098
  const bar = "\u2588".repeat(Math.max(1, Math.round(barRatio * 15)));
21929
22099
  lines.push(
21930
- ` ${name.padEnd(16)} ${String(calls).padStart(3)} calls ${bar} ${formatBytes(bytes)} (~${tokens.toLocaleString()} tok)`
22100
+ ` ${name.padEnd(16)} ${String(calls).padStart(3)} calls ${bar} ${formatBytes(bytes)} (~${tokLo.toLocaleString()}-${tokHi.toLocaleString()} tok)`
21931
22101
  );
21932
22102
  }
21933
22103
  }
@@ -21940,7 +22110,7 @@ Context-compress kept ${formatBytes(keptOut)} out of context (${reductionPct}% s
21940
22110
  };
21941
22111
 
21942
22112
  // src/store.ts
21943
- import { readdirSync, unlinkSync } from "node:fs";
22113
+ import { mkdirSync as mkdirSync2, readdirSync, unlinkSync } from "node:fs";
21944
22114
  import { tmpdir as tmpdir2 } from "node:os";
21945
22115
  import { join as join3 } from "node:path";
21946
22116
  import Database from "better-sqlite3";
@@ -22031,20 +22201,15 @@ var STOPWORDS = /* @__PURE__ */ new Set([
22031
22201
  "how",
22032
22202
  "its",
22033
22203
  "may",
22034
- "new",
22035
22204
  "now",
22036
22205
  "old",
22037
22206
  "see",
22038
22207
  "way",
22039
22208
  "who",
22040
22209
  "did",
22041
- "get",
22042
- "got",
22043
- "let",
22044
22210
  "say",
22045
22211
  "she",
22046
22212
  "too",
22047
- "use",
22048
22213
  "will",
22049
22214
  "with",
22050
22215
  "this",
@@ -22058,7 +22223,6 @@ var STOPWORDS = /* @__PURE__ */ new Set([
22058
22223
  "them",
22059
22224
  "than",
22060
22225
  "each",
22061
- "make",
22062
22226
  "like",
22063
22227
  "just",
22064
22228
  "over",
@@ -22098,21 +22262,7 @@ var STOPWORDS = /* @__PURE__ */ new Set([
22098
22262
  "where",
22099
22263
  "here",
22100
22264
  "were",
22101
- "much",
22102
- "update",
22103
- "updates",
22104
- "updated",
22105
- "deps",
22106
- "dev",
22107
- "tests",
22108
- "test",
22109
- "add",
22110
- "added",
22111
- "fix",
22112
- "fixed",
22113
- "run",
22114
- "running",
22115
- "using"
22265
+ "much"
22116
22266
  ]);
22117
22267
  var HEADING_RE = /^(#{1,4})\s+(.+)$/;
22118
22268
  var SEPARATOR_RE = /^[-_*]{3,}\s*$/;
@@ -22125,17 +22275,20 @@ function sanitizeQuery(raw) {
22125
22275
  const words = q.split(/\s+/).filter((w) => w.length >= 2).map((w) => `"${w}"`);
22126
22276
  return words.length > 0 ? words.join(" OR ") : "";
22127
22277
  }
22128
- function levenshtein(a, b) {
22278
+ function levenshtein(a, b, maxDist) {
22129
22279
  if (a.length === 0) return b.length;
22130
22280
  if (b.length === 0) return a.length;
22131
22281
  let prev = Array.from({ length: b.length + 1 }, (_, i) => i);
22132
22282
  let curr = new Array(b.length + 1);
22133
22283
  for (let i = 1; i <= a.length; i++) {
22134
22284
  curr[0] = i;
22285
+ let rowMin = curr[0];
22135
22286
  for (let j = 1; j <= b.length; j++) {
22136
22287
  const cost = a[i - 1] === b[j - 1] ? 0 : 1;
22137
22288
  curr[j] = Math.min(prev[j] + 1, curr[j - 1] + 1, prev[j - 1] + cost);
22289
+ if (curr[j] < rowMin) rowMin = curr[j];
22138
22290
  }
22291
+ if (maxDist !== void 0 && rowMin > maxDist) return maxDist + 1;
22139
22292
  [prev, curr] = [curr, prev];
22140
22293
  }
22141
22294
  return prev[b.length];
@@ -22148,8 +22301,18 @@ var ContentStore = class {
22148
22301
  insertChunkStmt;
22149
22302
  vocabCountStmt;
22150
22303
  vocabInsertStmt;
22151
- constructor(dbPath) {
22152
- const path = dbPath ?? join3(tmpdir2(), `context-compress-${process.pid}.db`);
22304
+ constructor(options) {
22305
+ let path;
22306
+ if (typeof options === "string") {
22307
+ path = options;
22308
+ } else if (options?.persistDb || options?.dbDir) {
22309
+ const dir = options.dbDir ?? join3(process.env.CLAUDE_PROJECT_DIR ?? process.cwd(), ".context-compress");
22310
+ mkdirSync2(dir, { recursive: true });
22311
+ path = join3(dir, "store.db");
22312
+ debug("Using persistent DB at", path);
22313
+ } else {
22314
+ path = (typeof options === "object" ? options?.dbPath : void 0) ?? join3(tmpdir2(), `context-compress-${process.pid}.db`);
22315
+ }
22153
22316
  this.db = new Database(path);
22154
22317
  this.db.pragma("journal_mode = WAL");
22155
22318
  this.db.pragma("synchronous = NORMAL");
@@ -22267,22 +22430,22 @@ var ContentStore = class {
22267
22430
  }
22268
22431
  return { query, results: [] };
22269
22432
  }
22270
- porterSearch(sanitized, source, limit) {
22433
+ ftsSearch(table, sanitized, source, limit) {
22271
22434
  const sourceFilter = source ? "AND sources.label LIKE '%' || ? || '%'" : "";
22272
22435
  const params = [sanitized];
22273
22436
  if (source) params.push(source);
22274
22437
  params.push(limit);
22275
22438
  const sql = `
22276
22439
  SELECT
22277
- chunks.title,
22278
- chunks.content,
22279
- chunks.content_type,
22440
+ ${table}.title,
22441
+ ${table}.content,
22442
+ ${table}.content_type,
22280
22443
  sources.label,
22281
- bm25(chunks, 2.0, 1.0) AS rank,
22282
- highlight(chunks, 1, char(2), char(3)) AS highlighted
22283
- FROM chunks
22284
- JOIN sources ON sources.id = chunks.source_id
22285
- WHERE chunks MATCH ? ${sourceFilter}
22444
+ bm25(${table}, 2.0, 1.0) AS rank,
22445
+ highlight(${table}, 1, char(2), char(3)) AS highlighted
22446
+ FROM ${table}
22447
+ JOIN sources ON sources.id = ${table}.source_id
22448
+ WHERE ${table} MATCH ? ${sourceFilter}
22286
22449
  ORDER BY rank
22287
22450
  LIMIT ?
22288
22451
  `;
@@ -22295,41 +22458,15 @@ var ContentStore = class {
22295
22458
  score: Math.abs(row.rank)
22296
22459
  }));
22297
22460
  } catch (e) {
22298
- debug("Porter search error:", e);
22461
+ debug(`FTS search error (${table}):`, e);
22299
22462
  return [];
22300
22463
  }
22301
22464
  }
22465
+ porterSearch(sanitized, source, limit) {
22466
+ return this.ftsSearch("chunks", sanitized, source, limit);
22467
+ }
22302
22468
  trigramSearch(sanitized, source, limit) {
22303
- const sourceFilter = source ? "AND sources.label LIKE '%' || ? || '%'" : "";
22304
- const params = [sanitized];
22305
- if (source) params.push(source);
22306
- params.push(limit);
22307
- const sql = `
22308
- SELECT
22309
- chunks_trigram.title,
22310
- chunks_trigram.content,
22311
- chunks_trigram.content_type,
22312
- sources.label,
22313
- bm25(chunks_trigram, 2.0, 1.0) AS rank,
22314
- highlight(chunks_trigram, 1, char(2), char(3)) AS highlighted
22315
- FROM chunks_trigram
22316
- JOIN sources ON sources.id = chunks_trigram.source_id
22317
- WHERE chunks_trigram MATCH ? ${sourceFilter}
22318
- ORDER BY rank
22319
- LIMIT ?
22320
- `;
22321
- try {
22322
- const rows = this.db.prepare(sql).all(...params);
22323
- return rows.map((row) => ({
22324
- title: row.title,
22325
- snippet: extractSnippet(row.highlighted),
22326
- source: row.label,
22327
- score: Math.abs(row.rank)
22328
- }));
22329
- } catch (e) {
22330
- debug("Trigram search error:", e);
22331
- return [];
22332
- }
22469
+ return this.ftsSearch("chunks_trigram", sanitized, source, limit);
22333
22470
  }
22334
22471
  /**
22335
22472
  * Fuzzy correction using vocabulary + Levenshtein distance.
@@ -22347,7 +22484,7 @@ var ContentStore = class {
22347
22484
  let bestWord = word;
22348
22485
  let bestDist = maxDist + 1;
22349
22486
  for (const { word: candidate } of candidates) {
22350
- const dist = levenshtein(word.toLowerCase(), candidate.toLowerCase());
22487
+ const dist = levenshtein(word.toLowerCase(), candidate.toLowerCase(), maxDist);
22351
22488
  if (dist < bestDist && dist <= maxDist) {
22352
22489
  bestDist = dist;
22353
22490
  bestWord = candidate;
@@ -22364,7 +22501,8 @@ var ContentStore = class {
22364
22501
  updateVocabulary(content) {
22365
22502
  const currentCount = this.vocabCountStmt.get().cnt;
22366
22503
  if (currentCount >= MAX_VOCABULARY) return;
22367
- const words = content.split(WORD_SPLIT_RE).filter((w) => w.length >= 3 && !STOPWORDS.has(w.toLowerCase()));
22504
+ const sample = content.length > 51200 ? content.slice(0, 51200) : content;
22505
+ const words = sample.split(WORD_SPLIT_RE).filter((w) => w.length >= 3 && !STOPWORDS.has(w.toLowerCase()));
22368
22506
  const unique = new Set(words.map((w) => w.toLowerCase()));
22369
22507
  const insert = this.vocabInsertStmt;
22370
22508
  let added = 0;
@@ -22486,6 +22624,10 @@ function chunkMarkdown(content) {
22486
22624
  }
22487
22625
  currentLines.push(line);
22488
22626
  }
22627
+ if (inFence) {
22628
+ debug("Warning: unclosed code fence detected during markdown chunking");
22629
+ hasCode = true;
22630
+ }
22489
22631
  flush();
22490
22632
  return chunks;
22491
22633
  }
@@ -22499,7 +22641,7 @@ function chunkPlainText(content, linesPerChunk = 20, overlap = 2) {
22499
22641
  return {
22500
22642
  title: trimmed.split("\n")[0].slice(0, 80),
22501
22643
  content: trimmed,
22502
- hasCode: FENCE_RE.test(trimmed)
22644
+ hasCode: /`{3,}/.test(trimmed)
22503
22645
  };
22504
22646
  }).filter(Boolean);
22505
22647
  }
@@ -22567,8 +22709,14 @@ var ALL_LANGUAGES = [
22567
22709
  var LANGUAGE_ENUM = ALL_LANGUAGES;
22568
22710
  var projectDir = process.env.CLAUDE_PROJECT_DIR ?? process.cwd();
22569
22711
  function isWithinProject(absPath) {
22570
- const normalized = resolve(absPath);
22571
- return normalized === projectDir || normalized.startsWith(`${projectDir}/`);
22712
+ try {
22713
+ const normalized = realpathSync(resolve(absPath));
22714
+ const realProjectDir = realpathSync(projectDir);
22715
+ return normalized === realProjectDir || normalized.startsWith(`${realProjectDir}/`);
22716
+ } catch {
22717
+ const normalized = resolve(absPath);
22718
+ return normalized === projectDir || normalized.startsWith(`${projectDir}/`);
22719
+ }
22572
22720
  }
22573
22721
  function getVersion() {
22574
22722
  try {
@@ -22600,9 +22748,58 @@ async function createServer(config3) {
22600
22748
  const bunDetected = hasBun(runtimes);
22601
22749
  debug("Runtimes detected:", runtimes.size);
22602
22750
  const executor = new SubprocessExecutor(runtimes, config3);
22603
- const store = new ContentStore();
22751
+ let store;
22752
+ let dbFallback = false;
22753
+ try {
22754
+ store = new ContentStore({ persistDb: config3.persistDb, dbDir: config3.dbDir });
22755
+ } catch (e) {
22756
+ debug("Failed to create DB, falling back to in-memory:", e);
22757
+ store = new ContentStore(":memory:");
22758
+ dbFallback = true;
22759
+ }
22604
22760
  const tracker = new SessionTracker();
22761
+ let activeExecutions = 0;
22762
+ const MAX_CONCURRENT_EXECUTIONS = 8;
22763
+ const EXECUTION_LIMIT_ERROR = "Error: too many concurrent executions. Try again shortly.";
22764
+ async function withExecutionLimit(fn) {
22765
+ if (activeExecutions >= MAX_CONCURRENT_EXECUTIONS) {
22766
+ throw new Error(EXECUTION_LIMIT_ERROR);
22767
+ }
22768
+ activeExecutions++;
22769
+ try {
22770
+ return await fn();
22771
+ } finally {
22772
+ activeExecutions--;
22773
+ }
22774
+ }
22775
+ function applyIntentFilter(output, intent, sourceLabel) {
22776
+ if (Buffer.byteLength(output) <= config3.intentSearchThreshold) return output;
22777
+ const indexed = store.index(output, sourceLabel);
22778
+ tracker.trackIndexed(Buffer.byteLength(output));
22779
+ const searchResults = store.search(intent, { limit: 3 });
22780
+ const terms = store.getDistinctiveTerms(indexed.sourceId);
22781
+ let filtered = `Indexed ${indexed.totalChunks} sections from ${sourceLabel}.
22782
+ `;
22783
+ filtered += `${searchResults.results.length} sections matched "${intent}":
22784
+
22785
+ `;
22786
+ for (const hit of searchResults.results) {
22787
+ filtered += ` - **${hit.title}**: ${hit.snippet.slice(0, 200)}
22788
+ `;
22789
+ }
22790
+ if (terms.length > 0 && config3.compressionLevel !== "ultra") {
22791
+ filtered += `
22792
+ Searchable terms: ${terms.join(", ")}
22793
+ `;
22794
+ }
22795
+ filtered += "\nUse search(queries: [...]) to retrieve full content of any section.";
22796
+ return compactLabel(filtered, config3.compressionLevel);
22797
+ }
22605
22798
  const shutdown = () => {
22799
+ try {
22800
+ executor.shutdown();
22801
+ } catch {
22802
+ }
22606
22803
  try {
22607
22804
  store.close();
22608
22805
  } catch {
@@ -22611,6 +22808,16 @@ async function createServer(config3) {
22611
22808
  process.on("SIGINT", shutdown);
22612
22809
  process.on("SIGTERM", shutdown);
22613
22810
  process.on("beforeExit", shutdown);
22811
+ process.on("uncaughtException", (err) => {
22812
+ debug("Uncaught exception:", err);
22813
+ shutdown();
22814
+ process.exit(1);
22815
+ });
22816
+ process.on("unhandledRejection", (err) => {
22817
+ debug("Unhandled rejection:", err);
22818
+ shutdown();
22819
+ process.exit(1);
22820
+ });
22614
22821
  const searchCalls = [];
22615
22822
  const server2 = new McpServer({
22616
22823
  name: "context-compress",
@@ -22618,7 +22825,7 @@ async function createServer(config3) {
22618
22825
  });
22619
22826
  server2.tool(
22620
22827
  "execute",
22621
- `Execute code in a sandboxed subprocess. Only stdout enters context \u2014 raw data stays in the subprocess. Use instead of bash/cat when output would exceed 20 lines. ${bunDetected ? "(Bun detected \u2014 JS/TS runs 3-5x faster) " : ""}Available: ${ALL_LANGUAGES.join(", ")}.
22828
+ `Execute code in a sandboxed subprocess. Only stdout enters context \u2014 raw data stays in the subprocess. Use instead of bash/cat when output would exceed ~5KB. ${bunDetected ? "(Bun detected \u2014 JS/TS runs 3-5x faster) " : ""}Available: ${ALL_LANGUAGES.join(", ")}.
22622
22829
 
22623
22830
  PREFER THIS OVER BASH for: API calls (gh, curl, aws), test runners (npm test, pytest), git queries (git log, git diff), data processing, and ANY CLI command that may produce large output. Bash should only be used for file mutations, git writes, and navigation.`,
22624
22831
  {
@@ -22632,7 +22839,24 @@ PREFER THIS OVER BASH for: API calls (gh, curl, aws), test runners (npm test, py
22632
22839
  timeout: external_exports.number().default(3e4).describe("Max execution time in ms")
22633
22840
  },
22634
22841
  async ({ language, code, intent, timeout }) => {
22635
- const result = await executor.execute({ language, code, timeout });
22842
+ const codeBytes = Buffer.byteLength(code);
22843
+ if (codeBytes > 1024e3) {
22844
+ return {
22845
+ content: [
22846
+ {
22847
+ type: "text",
22848
+ text: `Error: code too large (${(codeBytes / 1024).toFixed(0)}KB). Max 1MB.`
22849
+ }
22850
+ ]
22851
+ };
22852
+ }
22853
+ let result;
22854
+ try {
22855
+ result = await withExecutionLimit(() => executor.execute({ language, code, timeout }));
22856
+ } catch (e) {
22857
+ const msg = e instanceof Error ? e.message : String(e);
22858
+ return { content: [{ type: "text", text: msg }] };
22859
+ }
22636
22860
  if (result.networkBytes) {
22637
22861
  tracker.trackSandboxed(result.networkBytes);
22638
22862
  }
@@ -22643,27 +22867,8 @@ PREFER THIS OVER BASH for: API calls (gh, curl, aws), test runners (npm test, py
22643
22867
  STDERR:
22644
22868
  ${result.stderr}`;
22645
22869
  }
22646
- if (intent && Buffer.byteLength(output) > config3.intentSearchThreshold) {
22647
- const indexed = store.index(output, `execute:${language}`);
22648
- tracker.trackIndexed(Buffer.byteLength(output));
22649
- const searchResults = store.search(intent, { limit: 3 });
22650
- const terms = store.getDistinctiveTerms(indexed.sourceId);
22651
- let filtered = `Indexed ${indexed.totalChunks} sections from execute output.
22652
- `;
22653
- filtered += `${searchResults.results.length} sections matched "${intent}":
22654
-
22655
- `;
22656
- for (const hit of searchResults.results) {
22657
- filtered += ` - **${hit.title}**: ${hit.snippet.slice(0, 200)}
22658
- `;
22659
- }
22660
- if (terms.length > 0 && config3.compressionLevel !== "ultra") {
22661
- filtered += `
22662
- Searchable terms: ${terms.join(", ")}
22663
- `;
22664
- }
22665
- filtered += "\nUse search(queries: [...]) to retrieve full content of any section.";
22666
- output = compactLabel(filtered, config3.compressionLevel);
22870
+ if (intent) {
22871
+ output = applyIntentFilter(output, intent, `execute:${language}`);
22667
22872
  }
22668
22873
  const responseBytes = Buffer.byteLength(output);
22669
22874
  tracker.trackCall("execute", responseBytes);
@@ -22683,6 +22888,17 @@ Searchable terms: ${terms.join(", ")}
22683
22888
  timeout: external_exports.number().default(3e4).describe("Max execution time in ms")
22684
22889
  },
22685
22890
  async ({ path: filePath, language, code, intent, timeout }) => {
22891
+ const codeBytes = Buffer.byteLength(code);
22892
+ if (codeBytes > 1024e3) {
22893
+ return {
22894
+ content: [
22895
+ {
22896
+ type: "text",
22897
+ text: `Error: code too large (${(codeBytes / 1024).toFixed(0)}KB). Max 1MB.`
22898
+ }
22899
+ ]
22900
+ };
22901
+ }
22686
22902
  const absPath = resolve(projectDir, filePath);
22687
22903
  if (!isWithinProject(absPath)) {
22688
22904
  return {
@@ -22694,12 +22910,20 @@ Searchable terms: ${terms.join(", ")}
22694
22910
  ]
22695
22911
  };
22696
22912
  }
22697
- const result = await executor.executeFile({
22698
- language,
22699
- code,
22700
- filePath: absPath,
22701
- timeout
22702
- });
22913
+ let result;
22914
+ try {
22915
+ result = await withExecutionLimit(
22916
+ () => executor.executeFile({
22917
+ language,
22918
+ code,
22919
+ filePath: absPath,
22920
+ timeout
22921
+ })
22922
+ );
22923
+ } catch (e) {
22924
+ const msg = e instanceof Error ? e.message : String(e);
22925
+ return { content: [{ type: "text", text: msg }] };
22926
+ }
22703
22927
  let output = result.stdout;
22704
22928
  if (result.stderr && result.exitCode !== 0) {
22705
22929
  output += `
@@ -22707,27 +22931,8 @@ Searchable terms: ${terms.join(", ")}
22707
22931
  STDERR:
22708
22932
  ${result.stderr}`;
22709
22933
  }
22710
- if (intent && Buffer.byteLength(output) > config3.intentSearchThreshold) {
22711
- const indexed = store.index(output, `file:${filePath}`);
22712
- tracker.trackIndexed(Buffer.byteLength(output));
22713
- const searchResults = store.search(intent, { limit: 3 });
22714
- const terms = store.getDistinctiveTerms(indexed.sourceId);
22715
- let filtered = `Indexed ${indexed.totalChunks} sections from "${filePath}" into knowledge base.
22716
- `;
22717
- filtered += `${searchResults.results.length} sections matched "${intent}":
22718
-
22719
- `;
22720
- for (const hit of searchResults.results) {
22721
- filtered += ` - **${hit.title}**: ${hit.snippet.slice(0, 200)}
22722
- `;
22723
- }
22724
- if (terms.length > 0 && config3.compressionLevel !== "ultra") {
22725
- filtered += `
22726
- Searchable terms: ${terms.join(", ")}
22727
- `;
22728
- }
22729
- filtered += "\nUse search(queries: [...]) to retrieve full content of any section.";
22730
- output = compactLabel(filtered, config3.compressionLevel);
22934
+ if (intent) {
22935
+ output = applyIntentFilter(output, intent, `file:${filePath}`);
22731
22936
  }
22732
22937
  const responseBytes = Buffer.byteLength(output);
22733
22938
  tracker.trackCall("execute_file", responseBytes);
@@ -22757,20 +22962,38 @@ Searchable terms: ${terms.join(", ")}
22757
22962
  ]
22758
22963
  };
22759
22964
  }
22760
- const fileStat = statSync(absPath);
22761
- if (fileStat.size > 50 * 1024 * 1024) {
22965
+ try {
22966
+ const fileStat = statSync(absPath);
22967
+ if (fileStat.size > 50 * 1024 * 1024) {
22968
+ return {
22969
+ content: [
22970
+ {
22971
+ type: "text",
22972
+ text: `Error: file "${filePath}" is too large (${(fileStat.size / 1024 / 1024).toFixed(1)}MB). Max 50MB.`
22973
+ }
22974
+ ]
22975
+ };
22976
+ }
22977
+ text = readFileSync2(absPath, "utf-8");
22978
+ label = source ?? filePath;
22979
+ } catch (e) {
22980
+ const msg = e instanceof Error ? e.message : String(e);
22981
+ return {
22982
+ content: [{ type: "text", text: `Error reading "${filePath}": ${msg}` }]
22983
+ };
22984
+ }
22985
+ } else if (content) {
22986
+ const contentBytes = Buffer.byteLength(content);
22987
+ if (contentBytes > 50 * 1024 * 1024) {
22762
22988
  return {
22763
22989
  content: [
22764
22990
  {
22765
22991
  type: "text",
22766
- text: `Error: file "${filePath}" is too large (${(fileStat.size / 1024 / 1024).toFixed(1)}MB). Max 50MB.`
22992
+ text: `Error: content too large (${(contentBytes / 1024 / 1024).toFixed(1)}MB). Max 50MB.`
22767
22993
  }
22768
22994
  ]
22769
22995
  };
22770
22996
  }
22771
- text = readFileSync2(absPath, "utf-8");
22772
- label = source ?? filePath;
22773
- } else if (content) {
22774
22997
  text = content;
22775
22998
  } else {
22776
22999
  return {
@@ -22869,13 +23092,35 @@ ${hit.snippet}
22869
23092
  content: [{ type: "text", text: `Error: invalid URL "${url}"` }]
22870
23093
  };
22871
23094
  }
23095
+ let resolvedIp = null;
23096
+ try {
23097
+ const validated = await resolveAndValidate(url);
23098
+ resolvedIp = validated.resolvedIp;
23099
+ } catch (err) {
23100
+ return {
23101
+ content: [
23102
+ {
23103
+ type: "text",
23104
+ text: `Error: ${err instanceof Error ? err.message : "DNS validation failed"}`
23105
+ }
23106
+ ]
23107
+ };
23108
+ }
22872
23109
  const label = source ?? url;
22873
- const fetchCode = buildFetchCode(url);
22874
- const result = await executor.execute({
22875
- language: "javascript",
22876
- code: fetchCode,
22877
- timeout: 3e4
22878
- });
23110
+ const fetchCode = buildFetchCode(url, resolvedIp);
23111
+ let result;
23112
+ try {
23113
+ result = await withExecutionLimit(
23114
+ () => executor.execute({
23115
+ language: "javascript",
23116
+ code: fetchCode,
23117
+ timeout: 3e4
23118
+ })
23119
+ );
23120
+ } catch (e) {
23121
+ const msg = e instanceof Error ? e.message : String(e);
23122
+ return { content: [{ type: "text", text: msg }] };
23123
+ }
22879
23124
  if (result.exitCode !== 0 || !result.stdout.trim()) {
22880
23125
  const errMsg = `Failed to fetch ${url}: ${result.stderr || "empty response"}`;
22881
23126
  tracker.trackCall("fetch_and_index", Buffer.byteLength(errMsg));
@@ -22883,6 +23128,7 @@ ${hit.snippet}
22883
23128
  }
22884
23129
  const markdown = result.stdout;
22885
23130
  tracker.trackSandboxed(result.networkBytes ?? 0);
23131
+ const injectionWarnings = detectInjectionPatterns(markdown);
22886
23132
  const indexed = store.index(markdown, label);
22887
23133
  tracker.trackIndexed(Buffer.byteLength(markdown));
22888
23134
  const preview = markdown.slice(0, 3072);
@@ -22899,6 +23145,11 @@ ${preview}`;
22899
23145
  Searchable terms: ${terms.join(", ")}`;
22900
23146
  }
22901
23147
  output += "\n\nUse search(queries: [...]) to retrieve full content of any section.";
23148
+ if (injectionWarnings.length > 0) {
23149
+ output += `
23150
+
23151
+ \u26A0 Content safety notice: detected patterns (${injectionWarnings.join(", ")}). Review indexed content before relying on it.`;
23152
+ }
22902
23153
  tracker.trackCall("fetch_and_index", Buffer.byteLength(output));
22903
23154
  return { content: [{ type: "text", text: output }] };
22904
23155
  }
@@ -22919,15 +23170,18 @@ Searchable terms: ${terms.join(", ")}`;
22919
23170
  timeout: external_exports.number().default(6e4).describe("Max execution time in ms (default: 60s)")
22920
23171
  },
22921
23172
  async ({ commands, queries, timeout }) => {
22922
- const commandResults = await Promise.allSettled(
22923
- commands.map(async (cmd) => {
22924
- const result = await executor.execute({
22925
- language: "shell",
22926
- code: cmd.command,
22927
- timeout
22928
- });
23173
+ const commandResults = await limitConcurrency(
23174
+ commands.map((cmd) => async () => {
23175
+ const result = await withExecutionLimit(
23176
+ () => executor.execute({
23177
+ language: "shell",
23178
+ code: cmd.command,
23179
+ timeout
23180
+ })
23181
+ );
22929
23182
  return { label: cmd.label, result };
22930
- })
23183
+ }),
23184
+ 4
22931
23185
  );
22932
23186
  let combined = "";
22933
23187
  const inventory = [];
@@ -23072,6 +23326,11 @@ Searchable terms: ${terms.join(", ")}`;
23072
23326
  );
23073
23327
  }
23074
23328
  }
23329
+ if (dbFallback) {
23330
+ lines.push(
23331
+ "\n\u26A0 **Warning:** Persistent DB creation failed \u2014 using in-memory storage. Indexed data will not survive restarts."
23332
+ );
23333
+ }
23075
23334
  const output = lines.join("\n");
23076
23335
  tracker.trackCall("discover", Buffer.byteLength(output));
23077
23336
  return { content: [{ type: "text", text: output }] };
@@ -23085,13 +23344,30 @@ Searchable terms: ${terms.join(", ")}`;
23085
23344
  }
23086
23345
  };
23087
23346
  }
23088
- function buildFetchCode(url) {
23089
- const escaped = JSON.stringify(url);
23090
- return `
23091
- const url = ${escaped};
23092
- const resp = await fetch(url);
23347
+ function buildFetchCode(url, resolvedIp) {
23348
+ let fetchSetup;
23349
+ if (resolvedIp) {
23350
+ const pinnedUrl = new URL(url);
23351
+ const originalHost = pinnedUrl.host;
23352
+ pinnedUrl.hostname = resolvedIp;
23353
+ fetchSetup = `
23354
+ const url = ${JSON.stringify(pinnedUrl.toString())};
23355
+ const resp = await fetch(url, { headers: { 'Host': ${JSON.stringify(originalHost)} }, redirect: 'error' });`;
23356
+ } else {
23357
+ fetchSetup = `
23358
+ const url = ${JSON.stringify(url)};
23359
+ const resp = await fetch(url, { redirect: 'error' });`;
23360
+ }
23361
+ return `${fetchSetup}
23093
23362
  if (!resp.ok) { console.error("HTTP " + resp.status); process.exit(1); }
23363
+ const cl = resp.headers.get('content-length');
23364
+ if (cl && parseInt(cl, 10) > 10 * 1024 * 1024) {
23365
+ console.error("Response too large: " + cl + " bytes"); process.exit(1);
23366
+ }
23094
23367
  const html = await resp.text();
23368
+ if (html.length > 10 * 1024 * 1024) {
23369
+ console.error("Response body too large: " + html.length + " chars"); process.exit(1);
23370
+ }
23095
23371
 
23096
23372
  // Strip unwanted tags
23097
23373
  let md = html
@@ -23125,12 +23401,15 @@ md = md.replace(/<br\\s*\\/?>/gi, "\\n");
23125
23401
  md = md.replace(/<[^>]+>/g, "");
23126
23402
 
23127
23403
  // Decode entities
23128
- md = md.replace(/&amp;/g, "&")
23129
- .replace(/&lt;/g, "<")
23404
+ md = md.replace(/&lt;/g, "<")
23130
23405
  .replace(/&gt;/g, ">")
23131
23406
  .replace(/&quot;/g, '"')
23132
23407
  .replace(/&#39;/g, "'")
23133
- .replace(/&nbsp;/g, " ");
23408
+ .replace(/&apos;/g, "'")
23409
+ .replace(/&nbsp;/g, " ")
23410
+ .replace(/&#(\\d+);/g, (_, n) => { const c = parseInt(n, 10); return c > 0 && c <= 0x10FFFF ? String.fromCodePoint(c) : ''; })
23411
+ .replace(/&#x([0-9a-fA-F]+);/g, (_, h) => { const c = parseInt(h, 16); return c > 0 && c <= 0x10FFFF ? String.fromCodePoint(c) : ''; })
23412
+ .replace(/&amp;/g, "&");
23134
23413
 
23135
23414
  // Clean whitespace
23136
23415
  md = md.replace(/\\n{3,}/g, "\\n\\n").trim();