nubos-pilot 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/bin/np-tools/_commands.cjs +2 -0
  2. package/bin/np-tools/_elision-proxy-entry.cjs +13 -0
  3. package/bin/np-tools/elision-bench.cjs +67 -0
  4. package/bin/np-tools/elision-get.cjs +48 -0
  5. package/bin/np-tools/elision-get.test.cjs +66 -0
  6. package/bin/np-tools/loop-run-round.cjs +25 -11
  7. package/bin/np-tools/plan-milestone.cjs +1 -0
  8. package/bin/np-tools/research-phase.cjs +1 -1
  9. package/bin/np-tools/spawn-headless.cjs +62 -9
  10. package/lib/cache-align.cjs +78 -0
  11. package/lib/cache-align.test.cjs +69 -0
  12. package/lib/compress.cjs +495 -0
  13. package/lib/compress.test.cjs +267 -0
  14. package/lib/config-defaults.cjs +39 -0
  15. package/lib/config-schema.cjs +40 -4
  16. package/lib/elision-bench.cjs +409 -0
  17. package/lib/elision-bench.test.cjs +89 -0
  18. package/lib/elision-proxy.cjs +158 -0
  19. package/lib/elision-proxy.test.cjs +243 -0
  20. package/lib/elision.cjs +163 -0
  21. package/lib/elision.test.cjs +143 -0
  22. package/lib/nubosloop.cjs +1 -1
  23. package/lib/output-steering.cjs +68 -0
  24. package/lib/output-steering.test.cjs +74 -0
  25. package/lib/researcher-swarm.cjs +14 -3
  26. package/lib/runtime/agent-loop.cjs +36 -6
  27. package/lib/runtime/agent-loop.test.cjs +105 -0
  28. package/lib/runtime/dispatch.cjs +6 -6
  29. package/lib/runtime/dispatch.test.cjs +17 -3
  30. package/lib/runtime/providers/openai-compat.cjs +2 -1
  31. package/lib/runtime/providers/openai-compat.test.cjs +9 -0
  32. package/lib/runtime/tools/index.cjs +33 -1
  33. package/lib/runtime/tools/index.test.cjs +24 -0
  34. package/lib/schemas/data/elision-entry.v1.json +16 -0
  35. package/lib/token-cost.cjs +46 -0
  36. package/lib/token-cost.test.cjs +42 -0
  37. package/np-tools.cjs +2 -0
  38. package/package.json +1 -1
@@ -0,0 +1,409 @@
1
+ 'use strict';
2
+
3
+ const fs = require('node:fs');
4
+ const os = require('node:os');
5
+ const path = require('node:path');
6
+ const crypto = require('node:crypto');
7
+
8
+ const compress = require('./compress.cjs');
9
+ const elision = require('./elision.cjs');
10
+ const tokenCost = require('./token-cost.cjs');
11
+
12
+ const CRITICAL_RE = /\b(ERROR|FAIL(ED|URE)?|FATAL|Exception|Traceback|panic|AssertionError|assert(ion)? failed|denied|timeout)\b/i;
13
+ const MARKER_HASH_RE = /⟦elided:([a-f0-9]{12})\b/;
14
+
15
+ function _bytes(s) {
16
+ return Buffer.byteLength(String(s), 'utf-8');
17
+ }
18
+
19
+ function criticalLines(text) {
20
+ return String(text).split('\n').filter((l) => CRITICAL_RE.test(l)).map((l) => l.trim()).filter(Boolean);
21
+ }
22
+
23
+ function buildCorpus() {
24
+ const jsonItems = [];
25
+ for (let i = 0; i < 60; i += 1) {
26
+ jsonItems.push(i === 41
27
+ ? { id: i, status: 'error', message: 'connection refused to db-primary' }
28
+ : { id: i, status: 'ok', message: 'row ' + i + ' processed cleanly with no remarks at all' });
29
+ }
30
+ const logLines = [];
31
+ for (let i = 0; i < 80; i += 1) {
32
+ logLines.push(i === 57
33
+ ? '[2026-06-23T10:00:' + String(i).padStart(2, '0') + 'Z] FATAL migration 0042 failed: duplicate key value violates unique constraint'
34
+ : '[2026-06-23T10:00:' + String(i).padStart(2, '0') + 'Z] INFO step ' + i + ' completed, 0 warnings, elapsed 12ms nominal');
35
+ }
36
+ const searchLines = [];
37
+ for (let f = 0; f < 20; f += 1) {
38
+ for (let h = 0; h < 6; h += 1) {
39
+ searchLines.push('src/module' + f + '/file' + f + '.cjs:' + (10 + h) + ': const handler = resolve(' + h + ');');
40
+ }
41
+ }
42
+ searchLines.push('src/auth/login.cjs:88: throw new Error("invalid credentials");');
43
+ const codeLines = [
44
+ "'use strict';",
45
+ "const { compute } = require('./engine.cjs');",
46
+ "const { validate } = require('./validate.cjs');",
47
+ '',
48
+ 'class PaymentProcessor {',
49
+ ' constructor(config) {',
50
+ ];
51
+ for (let i = 0; i < 10; i += 1) codeLines.push(' this.option' + i + ' = config.option' + i + ' !== undefined ? config.option' + i + ' : defaultFor(' + i + ');');
52
+ codeLines.push(' }', '');
53
+ codeLines.push(' chargeCustomer(amount, currency) {');
54
+ for (let i = 0; i < 12; i += 1) codeLines.push(' const intermediateStep' + i + ' = compute(amount, currency, this.option' + (i % 10) + ', ' + i + ');');
55
+ codeLines.push(' if (amount <= 0) throw new Error("invalid charge amount");');
56
+ for (let i = 0; i < 8; i += 1) codeLines.push(' this.ledger.push({ step: ' + i + ', value: intermediateStep' + i + ', recordedAt: nowIsoString() });');
57
+ codeLines.push(' return runningTotal;', ' }', '');
58
+ codeLines.push(' refundCustomer(transactionId) {');
59
+ for (let i = 0; i < 12; i += 1) codeLines.push(' const reversalEntry' + i + ' = validate(transactionId, this.option' + (i % 10) + ', ' + i + ');');
60
+ codeLines.push(' return reversalReceipt;', ' }', '}', '', 'module.exports = { PaymentProcessor };');
61
+
62
+ const diffLines = ['--- a/lib/pay.cjs', '+++ b/lib/pay.cjs', '@@ -1,40 +1,42 @@'];
63
+ for (let i = 0; i < 40; i += 1) {
64
+ if (i === 20) { diffLines.push('- const fee = base * 0.1;'); diffLines.push('+ const fee = base * 0.15; // FATAL pricing change'); }
65
+ else diffLines.push(' context line ' + i + ' unchanged surrounding code that adds bulk to the hunk');
66
+ }
67
+ return [
68
+ { name: 'json-array-with-error', text: JSON.stringify(jsonItems),
69
+ critical: ['connection refused to db-primary'] },
70
+ { name: 'build-log-with-fatal', text: logLines.join('\n'),
71
+ critical: ['FATAL migration 0042 failed', 'duplicate key value violates unique constraint'] },
72
+ { name: 'grep-search-results', text: searchLines.join('\n'),
73
+ critical: ['src/auth/login.cjs:88', 'invalid credentials'] },
74
+ { name: 'unified-diff', text: diffLines.join('\n'),
75
+ critical: ['FATAL pricing change', '0.15'] },
76
+ { name: 'source-code', text: codeLines.join('\n'),
77
+ critical: ['class PaymentProcessor {', 'chargeCustomer(amount, currency) {', 'throw new Error("invalid charge amount")', 'module.exports = { PaymentProcessor };'] },
78
+ ];
79
+ }
80
+
81
+ function _variantFixtures(seed) {
82
+ const jsonItems = [];
83
+ const errAt = (41 + seed) % 60;
84
+ for (let i = 0; i < 60; i += 1) {
85
+ jsonItems.push(i === errAt
86
+ ? { id: i, seed, status: 'error', message: 'connection refused to db-primary shard ' + seed }
87
+ : { id: i, seed, status: 'ok', message: 'row ' + i + ' processed cleanly with no remarks at all' });
88
+ }
89
+ const logLines = [];
90
+ const fatalAt = (57 + seed) % 80;
91
+ for (let i = 0; i < 80; i += 1) {
92
+ logLines.push(i === fatalAt
93
+ ? '[2026-06-23T10:00:' + String(i).padStart(2, '0') + 'Z] FATAL migration ' + String(42 + seed).padStart(4, '0') + ' failed: duplicate key value violates unique constraint'
94
+ : '[2026-06-23T10:00:' + String(i).padStart(2, '0') + 'Z] INFO step ' + i + ' completed, 0 warnings, elapsed 12ms nominal run ' + seed);
95
+ }
96
+ const searchLines = [];
97
+ for (let f = 0; f < 20; f += 1) {
98
+ for (let h = 0; h < 6; h += 1) {
99
+ searchLines.push('src/module' + f + '/file' + f + '_' + seed + '.cjs:' + (10 + h) + ': const handler = resolve(' + h + ');');
100
+ }
101
+ }
102
+ searchLines.push('src/auth/login' + seed + '.cjs:88: throw new Error("invalid credentials");');
103
+ const codeLines = ["'use strict';", "const { compute } = require('./engine.cjs');", "const { validate } = require('./validate.cjs');", '', 'class Processor' + seed + ' {', ' chargeCustomer(amount, currency) {'];
104
+ for (let i = 0; i < 20; i += 1) codeLines.push(' const intermediateStep' + i + ' = compute(amount, currency, this.option' + (i % 7) + ', ' + (i + seed) + ');');
105
+ codeLines.push(' if (amount <= 0) throw new Error("invalid charge amount");');
106
+ for (let i = 0; i < 16; i += 1) codeLines.push(' this.ledger.push({ step: ' + i + ', value: intermediateStep' + i + ', recordedAt: nowIsoString(' + seed + ') });');
107
+ codeLines.push(' return runningTotal;', ' }', '', ' refundCustomer(transactionId) {');
108
+ for (let i = 0; i < 16; i += 1) codeLines.push(' const reversalEntry' + i + ' = validate(transactionId, this.option' + (i % 7) + ', ' + (i + seed) + ');');
109
+ codeLines.push(' return reversalReceipt;', ' }', '}', '', 'module.exports = { Processor' + seed + ' };');
110
+ const diffLines = ['--- a/lib/pay' + seed + '.cjs', '+++ b/lib/pay' + seed + '.cjs', '@@ -1,40 +1,42 @@'];
111
+ const changeAt = (20 + seed) % 40;
112
+ for (let i = 0; i < 40; i += 1) {
113
+ if (i === changeAt) { diffLines.push('- const fee = base * 0.1;'); diffLines.push('+ const fee = base * 0.15; // FATAL pricing change ' + seed); }
114
+ else diffLines.push(' context line ' + i + ' unchanged surrounding code that adds bulk to the hunk');
115
+ }
116
+ const sfx = '-s' + seed;
117
+ return [
118
+ { name: 'json-array-with-error' + sfx, text: JSON.stringify(jsonItems), critical: ['connection refused to db-primary shard ' + seed] },
119
+ { name: 'build-log-with-fatal' + sfx, text: logLines.join('\n'), critical: ['FATAL migration ' + String(42 + seed).padStart(4, '0') + ' failed', 'duplicate key value violates unique constraint'] },
120
+ { name: 'grep-search-results' + sfx, text: searchLines.join('\n'), critical: ['src/auth/login' + seed + '.cjs:88', 'invalid credentials'] },
121
+ { name: 'unified-diff' + sfx, text: diffLines.join('\n'), critical: ['FATAL pricing change ' + seed, '0.15'] },
122
+ { name: 'source-code' + sfx, text: codeLines.join('\n'), critical: ['class Processor' + seed + ' {', 'throw new Error("invalid charge amount")', 'module.exports = { Processor' + seed + ' };'] },
123
+ ];
124
+ }
125
+
126
+ const SCALE_SEEDS = Object.freeze({ small: 1, medium: 12, large: 60 });
127
+
128
+ function buildCorpusScale(size) {
129
+ if (size === 'small' || !size) return buildCorpus();
130
+ const seeds = SCALE_SEEDS[size];
131
+ if (!seeds) throw new Error('unknown corpus size "' + size + '" (small|medium|large)');
132
+ const out = [];
133
+ for (let s = 0; s < seeds; s += 1) out.push(..._variantFixtures(s));
134
+ return out;
135
+ }
136
+
137
+ function _arm(name, holdoutRatio) {
138
+ const h = crypto.createHash('sha256').update(String(name)).digest('hex').slice(0, 8);
139
+ const frac = parseInt(h, 16) / 0x100000000;
140
+ return frac < holdoutRatio ? 'control' : 'treatment';
141
+ }
142
+
143
+ function runScale(opts) {
144
+ const o = opts || {};
145
+ const size = o.size || 'medium';
146
+ const holdoutRatio = Number.isFinite(o.holdoutRatio) ? o.holdoutRatio : 0.2;
147
+ let corpus = buildCorpusScale(size);
148
+ if (Number.isFinite(o.maxCases) && o.maxCases > 0) corpus = corpus.slice(0, o.maxCases);
149
+
150
+ const arms = { control: [], treatment: [] };
151
+ const strata = {};
152
+ for (const fixture of corpus) {
153
+ const arm = _arm(fixture.name, holdoutRatio);
154
+ const c = fidelityCase(fixture, o);
155
+ c.arm = arm;
156
+ arms[arm].push(c);
157
+ if (arm !== 'treatment') continue;
158
+ const t = c.type;
159
+ if (!strata[t]) strata[t] = { type: t, n: 0, bytes_before: 0, bytes_after: 0, invariants_ok: true };
160
+ strata[t].n += 1;
161
+ strata[t].bytes_before += c.bytes_before;
162
+ strata[t].bytes_after += c.bytes_after;
163
+ if (!c.ok) strata[t].invariants_ok = false;
164
+ }
165
+
166
+ const sum = (list, k) => list.reduce((s, c) => s + c[k], 0);
167
+ const savedPct = (before, after) => (before ? Math.round((1 - after / before) * 100) : 0);
168
+ const tBefore = sum(arms.treatment, 'bytes_before');
169
+ const tAfter = sum(arms.treatment, 'bytes_after');
170
+ const tSaved = savedPct(tBefore, tAfter);
171
+ const cSaved = arms.control.length ? savedPct(sum(arms.control, 'bytes_before'), sum(arms.control, 'bytes_after')) : null;
172
+ const tOk = arms.treatment.every((c) => c.ok);
173
+ const cOk = arms.control.every((c) => c.ok);
174
+
175
+ return {
176
+ strata: Object.values(strata).map((s) => Object.assign({}, s, { saved_pct: savedPct(s.bytes_before, s.bytes_after) })),
177
+ summary: {
178
+ size,
179
+ fixtures: corpus.length,
180
+ holdout_ratio: holdoutRatio,
181
+ control_n: arms.control.length,
182
+ treatment_n: arms.treatment.length,
183
+ treatment_saved_pct: tSaved,
184
+ control_saved_pct: cSaved,
185
+ generalization_gap_pct: cSaved === null ? null : Math.abs(tSaved - cSaved),
186
+ savings_est: tokenCost.summarizeSavings({ bytesBefore: tBefore, bytesAfter: tAfter, charsPerToken: o.charsPerToken, pricePerMTok: o.pricePerMTok, currency: o.currency }),
187
+ treatment_invariants_ok: tOk,
188
+ control_invariants_ok: cOk,
189
+ invariants_ok: tOk && cOk,
190
+ failed: arms.treatment.concat(arms.control).filter((c) => !c.ok).map((c) => c.name),
191
+ },
192
+ };
193
+ }
194
+
195
+ function _tmpStore() {
196
+ const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'np-elision-bench-'));
197
+ const storeFn = (text, type) => {
198
+ try { return elision.store(text, { type }, dir); }
199
+ catch { return null; }
200
+ };
201
+ return { dir, storeFn };
202
+ }
203
+
204
+ function fidelityCase(fixture, opts) {
205
+ const o = opts || {};
206
+ const { dir, storeFn } = _tmpStore();
207
+ try {
208
+ const res = compress.compressBlock(fixture.text, {
209
+ minBlockBytes: Number.isFinite(o.minBlockBytes) ? o.minBlockBytes : undefined,
210
+ store: storeFn,
211
+ });
212
+ const before = _bytes(fixture.text);
213
+ const after = _bytes(res.compressed);
214
+ const crit = (fixture.critical && fixture.critical.length) ? fixture.critical : criticalLines(fixture.text);
215
+ const critPreserved = crit.every((l) => res.compressed.includes(l));
216
+ let reversible = !res.changed;
217
+ let retrievalExact = !res.changed;
218
+ if (res.changed) {
219
+ const m = res.compressed.match(MARKER_HASH_RE);
220
+ if (m) {
221
+ const back = elision.retrieve(m[1], dir);
222
+ retrievalExact = back.status === 'ok' && back.original === fixture.text;
223
+ reversible = retrievalExact;
224
+ } else {
225
+ reversible = false;
226
+ retrievalExact = false;
227
+ }
228
+ }
229
+ return {
230
+ name: fixture.name,
231
+ type: res.type,
232
+ changed: res.changed,
233
+ bytes_before: before,
234
+ bytes_after: after,
235
+ ratio: before ? after / before : 1,
236
+ saved_pct: before ? Math.round((1 - after / before) * 100) : 0,
237
+ critical_total: crit.length,
238
+ critical_preserved: critPreserved,
239
+ reversible,
240
+ retrieval_exact: retrievalExact,
241
+ ok: critPreserved && reversible && retrievalExact,
242
+ };
243
+ } finally {
244
+ try { fs.rmSync(dir, { recursive: true, force: true }); } catch { /* best effort */ }
245
+ }
246
+ }
247
+
248
+ function runFidelity(opts) {
249
+ const o = opts || {};
250
+ const corpus = (o.corpus && o.corpus.length) ? o.corpus : buildCorpus();
251
+ const cases = corpus.map((f) => fidelityCase(f, o));
252
+ const totBefore = cases.reduce((s, c) => s + c.bytes_before, 0);
253
+ const totAfter = cases.reduce((s, c) => s + c.bytes_after, 0);
254
+ return {
255
+ cases,
256
+ summary: {
257
+ fixtures: cases.length,
258
+ compressed: cases.filter((c) => c.changed).length,
259
+ avg_ratio: cases.length ? cases.reduce((s, c) => s + c.ratio, 0) / cases.length : 1,
260
+ total_saved_pct: totBefore ? Math.round((1 - totAfter / totBefore) * 100) : 0,
261
+ invariants_ok: cases.every((c) => c.ok),
262
+ failed: cases.filter((c) => !c.ok).map((c) => c.name),
263
+ },
264
+ };
265
+ }
266
+
267
+ function buildEqCases() {
268
+ const corpus = buildCorpus();
269
+ const byName = Object.fromEntries(corpus.map((f) => [f.name, f.text]));
270
+ return [
271
+ { name: 'find-the-error-item', context: byName['json-array-with-error'],
272
+ question: 'Which item id has status "error", and what is its message? Answer concisely.',
273
+ must_contain: ['41', 'connection refused'] },
274
+ { name: 'find-the-fatal', context: byName['build-log-with-fatal'],
275
+ question: 'What FATAL error occurred during migration, and which migration number?',
276
+ must_contain: ['0042', 'duplicate key'] },
277
+ ];
278
+ }
279
+
280
+ function _expandToolSchema() {
281
+ return [{
282
+ type: 'function',
283
+ function: {
284
+ name: 'context-expand',
285
+ description: 'Retrieve the full original text behind a ⟦elided:<hash>⟧ marker. Pass the 12-char hash.',
286
+ parameters: { type: 'object', properties: { hash: { type: 'string' } }, required: ['hash'] },
287
+ },
288
+ }];
289
+ }
290
+
291
+ async function _ask(chat, provider, context, question, dir, useTools) {
292
+ const sys = 'You answer strictly from the provided context. If a ⟦elided:<hash>⟧ marker hides '
293
+ + 'detail you need, call context-expand with the hash. Be concise.';
294
+ const messages = [
295
+ { role: 'system', content: sys },
296
+ { role: 'user', content: 'Context:\n' + context + '\n\nQuestion: ' + question },
297
+ ];
298
+ const tools = useTools ? _expandToolSchema() : undefined;
299
+ for (let i = 0; i < 4; i += 1) {
300
+ const resp = await chat({ ...provider, messages, tools });
301
+ if (!resp.toolCalls || !resp.toolCalls.length) return resp.content || '';
302
+ messages.push({ role: 'assistant', content: resp.content || '', tool_calls: resp.toolCalls.map((tc) => ({
303
+ id: tc.id, type: 'function', function: { name: tc.name, arguments: typeof tc.arguments === 'string' ? tc.arguments : JSON.stringify(tc.arguments || {}) },
304
+ })) });
305
+ for (const tc of resp.toolCalls) {
306
+ let out = 'Error: unknown tool';
307
+ if (tc.name === 'context-expand') {
308
+ const a = typeof tc.arguments === 'string' ? JSON.parse(tc.arguments || '{}') : (tc.arguments || {});
309
+ const r = elision.retrieve(a.hash, dir);
310
+ out = r.status === 'ok' ? r.original : 'Error: ' + r.status;
311
+ }
312
+ messages.push({ role: 'tool', tool_call_id: tc.id, content: out });
313
+ }
314
+ }
315
+ const last = messages[messages.length - 1];
316
+ return (last && typeof last.content === 'string') ? last.content : '';
317
+ }
318
+
319
+ function _hasAll(answer, needles) {
320
+ const a = String(answer).toLowerCase();
321
+ return needles.every((n) => a.includes(String(n).toLowerCase()));
322
+ }
323
+
324
+ async function runEquivalence(args) {
325
+ const a = args || {};
326
+ const chat = a.chatImpl;
327
+ const provider = a.provider;
328
+ if (typeof chat !== 'function' || !provider) {
329
+ throw new Error('runEquivalence requires { chatImpl, provider }');
330
+ }
331
+ const cases = (a.cases && a.cases.length) ? a.cases : buildEqCases();
332
+ const { dir, storeFn } = _tmpStore();
333
+ const out = [];
334
+ try {
335
+ for (const c of cases) {
336
+ const res = compress.compressBlock(c.context, { minBlockBytes: a.minBlockBytes, store: storeFn });
337
+ const compressed = res.changed ? res.compressed : c.context;
338
+ const rawAns = await _ask(chat, provider, c.context, c.question, dir, false);
339
+ const cmpAns = await _ask(chat, provider, compressed, c.question, dir, true);
340
+ const rawOk = _hasAll(rawAns, c.must_contain);
341
+ const cmpOk = _hasAll(cmpAns, c.must_contain);
342
+ out.push({
343
+ name: c.name, compressed_block: res.changed, type: res.type,
344
+ raw_ok: rawOk, compressed_ok: cmpOk, equivalent: rawOk === cmpOk,
345
+ regression: rawOk && !cmpOk,
346
+ });
347
+ }
348
+ } finally {
349
+ try { fs.rmSync(dir, { recursive: true, force: true }); } catch { /* best effort */ }
350
+ }
351
+ return {
352
+ cases: out,
353
+ summary: {
354
+ cases: out.length,
355
+ equivalent: out.filter((c) => c.equivalent).length,
356
+ regressions: out.filter((c) => c.regression).map((c) => c.name),
357
+ no_regression: out.every((c) => !c.regression),
358
+ },
359
+ };
360
+ }
361
+
362
+ function formatReport(report) {
363
+ const lines = [];
364
+ lines.push('Elision fidelity — ' + report.summary.compressed + '/' + report.summary.fixtures
365
+ + ' fixtures crushed, avg ratio ' + report.summary.avg_ratio.toFixed(2)
366
+ + ', total ' + report.summary.total_saved_pct + '% saved, invariants '
367
+ + (report.summary.invariants_ok ? 'OK' : 'FAILED: ' + report.summary.failed.join(', ')));
368
+ for (const c of report.cases) {
369
+ lines.push(' ' + (c.ok ? '✓' : '✗') + ' ' + c.name.padEnd(24) + ' [' + c.type + '] '
370
+ + c.saved_pct + '% saved, ' + c.critical_total + ' critical line(s) '
371
+ + (c.critical_preserved ? 'kept' : 'LOST') + ', '
372
+ + (c.reversible ? 'reversible' : 'IRREVERSIBLE'));
373
+ }
374
+ return lines.join('\n');
375
+ }
376
+
377
+ function formatScale(report) {
378
+ const s = report.summary;
379
+ const lines = [];
380
+ lines.push('Elision scale fidelity — size=' + s.size + ', ' + s.fixtures + ' fixtures, holdout '
381
+ + Math.round(s.holdout_ratio * 100) + '% (' + s.control_n + ' control / ' + s.treatment_n + ' treatment)');
382
+ lines.push(' treatment: ' + s.treatment_saved_pct + '% saved, invariants ' + (s.treatment_invariants_ok ? 'OK' : 'FAILED')
383
+ + ' · held-out control: ' + (s.control_saved_pct === null ? 'n/a' : s.control_saved_pct + '% saved, invariants ' + (s.control_invariants_ok ? 'OK' : 'FAILED'))
384
+ + (s.generalization_gap_pct === null ? '' : ' · generalization gap ' + s.generalization_gap_pct + '%')
385
+ + (s.failed.length ? ' · failed: ' + s.failed.join(', ') : ''));
386
+ if (s.savings_est) {
387
+ const e = s.savings_est;
388
+ lines.push(' est. saved: ~' + e.tokens_saved_est.toLocaleString('en-US') + ' tokens (treatment, @ ' + e.chars_per_token + ' chars/tok)'
389
+ + (e.cost_saved_est !== undefined ? ' ≈ ' + e.cost_saved_est + ' ' + e.currency + ' @ ' + e.price_per_mtok + '/Mtok' : ' — pass --price-per-mtok for a cost estimate'));
390
+ }
391
+ for (const st of report.strata.sort((a, b) => a.type.localeCompare(b.type))) {
392
+ lines.push(' ' + (st.invariants_ok ? '✓' : '✗') + ' ' + st.type.padEnd(12) + ' n=' + String(st.n).padEnd(4) + st.saved_pct + '% saved');
393
+ }
394
+ return lines.join('\n');
395
+ }
396
+
397
+ module.exports = {
398
+ CRITICAL_RE,
399
+ criticalLines,
400
+ buildCorpus,
401
+ buildCorpusScale,
402
+ buildEqCases,
403
+ fidelityCase,
404
+ runFidelity,
405
+ runScale,
406
+ runEquivalence,
407
+ formatReport,
408
+ formatScale,
409
+ };
@@ -0,0 +1,89 @@
1
+ 'use strict';
2
+
3
+ const test = require('node:test');
4
+ const assert = require('node:assert');
5
+
6
+ const bench = require('./elision-bench.cjs');
7
+
8
+ test('BENCH-1: fidelity crushes every fixture and holds all invariants', () => {
9
+ const report = bench.runFidelity();
10
+ assert.equal(report.cases.length, 5);
11
+ assert.ok(report.summary.invariants_ok, 'invariants must hold: ' + report.summary.failed.join(', '));
12
+ assert.equal(report.summary.compressed, 5, 'all fixtures should compress');
13
+ for (const c of report.cases) {
14
+ assert.ok(c.critical_preserved, c.name + ' lost a critical line');
15
+ assert.ok(c.retrieval_exact, c.name + ' is not byte-exact reversible');
16
+ assert.ok(c.ratio < 0.9, c.name + ' did not save enough to count');
17
+ }
18
+ });
19
+
20
+ test('BENCH-2: detection routes each fixture to the intended crusher', () => {
21
+ const byName = Object.fromEntries(bench.runFidelity().cases.map((c) => [c.name, c.type]));
22
+ assert.equal(byName['json-array-with-error'], 'json-array');
23
+ assert.equal(byName['build-log-with-fatal'], 'log');
24
+ assert.equal(byName['grep-search-results'], 'search');
25
+ assert.equal(byName['unified-diff'], 'diff');
26
+ assert.equal(byName['source-code'], 'code');
27
+ });
28
+
29
+ test('BENCH-5: scale corpus exercises every crusher type at size and holds invariants', () => {
30
+ const report = bench.runScale({ size: 'medium', holdoutRatio: 0.2 });
31
+ assert.equal(report.summary.fixtures, 60);
32
+ assert.ok(report.summary.invariants_ok, 'treatment + held-out control invariants must hold: ' + report.summary.failed.join(', '));
33
+ assert.ok(report.summary.control_invariants_ok, 'held-out control fixtures must also pass every fidelity invariant');
34
+ assert.ok(report.summary.treatment_saved_pct > 50, 'meaningful savings on treatment');
35
+ assert.ok(report.summary.generalization_gap_pct <= 25, 'control savings track treatment — no overfit (gap ' + report.summary.generalization_gap_pct + '%)');
36
+ const types = report.strata.map((s) => s.type).sort();
37
+ assert.deepEqual(types, ['code', 'diff', 'json-array', 'log', 'search'], 'all five crusher types represented');
38
+ for (const s of report.strata) assert.ok(s.invariants_ok, s.type + ' stratum lost an invariant');
39
+ });
40
+
41
+ test('BENCH-6: holdout arm assignment is deterministic and ratio-monotone', () => {
42
+ const a = bench.runScale({ size: 'medium', holdoutRatio: 0.2 });
43
+ const b = bench.runScale({ size: 'medium', holdoutRatio: 0.2 });
44
+ assert.equal(a.summary.control_n, b.summary.control_n, 'same corpus + ratio → same partition (sha256, not random)');
45
+ const more = bench.runScale({ size: 'medium', holdoutRatio: 0.5 });
46
+ assert.ok(more.summary.control_n >= a.summary.control_n, 'a larger holdout ratio never shrinks the control arm');
47
+ assert.equal(a.summary.control_n + a.summary.treatment_n, 60);
48
+ });
49
+
50
+ test('BENCH-7: buildCorpusScale sizes and rejects an unknown size', () => {
51
+ assert.equal(bench.buildCorpusScale('small').length, 5);
52
+ assert.equal(bench.buildCorpusScale('medium').length, 60);
53
+ assert.equal(bench.buildCorpusScale('large').length, 300);
54
+ assert.throws(() => bench.buildCorpusScale('huge'), /unknown corpus size/);
55
+ });
56
+
57
+ function needsExpandCase() {
58
+ const lines = [];
59
+ for (let i = 0; i < 80; i += 1) {
60
+ lines.push(i === 57
61
+ ? '[2026-06-23T10:00:57Z] FATAL migration 0042 failed: duplicate key'
62
+ : '[2026-06-23T10:00:' + String(i).padStart(2, '0') + 'Z] INFO step ' + i + ' done token-' + i + '-marker');
63
+ }
64
+ return [{ name: 'needs-expand', context: lines.join('\n'),
65
+ question: 'What token is logged on step 40?', must_contain: ['token-40-marker'] }];
66
+ }
67
+
68
+ test('BENCH-3: expanding the marker recovers an elided fact (stub round-trip)', async () => {
69
+ const chat = async ({ messages, tools }) => {
70
+ const seen = messages.map((m) => String(m.content || '')).join('\n');
71
+ const marker = seen.match(/⟦elided:([a-f0-9]{12})\b/);
72
+ const alreadyExpanded = messages.some((m) => m.role === 'tool');
73
+ if (tools && marker && !alreadyExpanded) {
74
+ return { content: '', toolCalls: [{ id: 't1', name: 'context-expand', arguments: { hash: marker[1] } }] };
75
+ }
76
+ return { content: seen, toolCalls: [] };
77
+ };
78
+ const report = await bench.runEquivalence({ chatImpl: chat, provider: { baseUrl: 'http://stub', model: 'stub' }, cases: needsExpandCase() });
79
+ assert.ok(report.summary.no_regression, 'regressions: ' + report.summary.regressions.join(', '));
80
+ assert.ok(report.cases[0].compressed_ok, 'expanding agent should recover the elided token');
81
+ });
82
+
83
+ test('BENCH-4: a marker-blind model surfaces as a regression', async () => {
84
+ const chat = async ({ messages }) => ({ content: messages.map((m) => String(m.content || '')).join('\n'), toolCalls: [] });
85
+ const report = await bench.runEquivalence({ chatImpl: chat, provider: { baseUrl: 'http://stub', model: 'stub' }, cases: needsExpandCase() });
86
+ assert.equal(report.cases[0].raw_ok, true, 'raw context must contain the answer');
87
+ assert.equal(report.cases[0].compressed_ok, false, 'marker-blind compressed answer must miss the elided token');
88
+ assert.ok(report.summary.regressions.length >= 1, 'must be reported as a regression');
89
+ });
@@ -0,0 +1,158 @@
1
+ 'use strict';
2
+
3
+ const http = require('node:http');
4
+ const https = require('node:https');
5
+ const { URL } = require('node:url');
6
+
7
+ const compress = require('./compress.cjs');
8
+ const elision = require('./elision.cjs');
9
+ const cacheAlign = require('./cache-align.cjs');
10
+ const config = require('./config.cjs');
11
+ const { DEFAULT_COMPRESSION } = require('./config-defaults.cjs');
12
+ const logger = require('./logger.cjs').child('elision-proxy');
13
+
14
+ const DEFAULT_UPSTREAM = 'https://api.anthropic.com';
15
+
16
+ function proxyEnabled(cwd) {
17
+ if (!elision.compressionContext(cwd).enabled) return false;
18
+ let cfg;
19
+ try { cfg = config.tryReadConfigPath(cwd, 'compression', DEFAULT_COMPRESSION) || DEFAULT_COMPRESSION; }
20
+ catch { cfg = DEFAULT_COMPRESSION; }
21
+ return !!(cfg.proxy && cfg.proxy.enabled === true);
22
+ }
23
+
24
+ function _squeeze(text, cx, acc) {
25
+ let res;
26
+ try {
27
+ res = compress.compressBlock(text, { minBlockBytes: cx.minBlockBytes, store: cx.store });
28
+ } catch {
29
+ return text;
30
+ }
31
+ if (!res || !res.changed) return text;
32
+ acc.blocks_compressed += 1;
33
+ acc.bytes_before += Buffer.byteLength(text, 'utf-8');
34
+ acc.bytes_after += Buffer.byteLength(res.compressed, 'utf-8');
35
+ return res.compressed;
36
+ }
37
+
38
+ function compressAnthropicBody(body, cx) {
39
+ const acc = { blocks_compressed: 0, bytes_before: 0, bytes_after: 0 };
40
+ if (!cx || !cx.enabled || typeof cx.store !== 'function' || !body || !Array.isArray(body.messages)) {
41
+ return { body, stats: acc };
42
+ }
43
+ const messages = body.messages.map((msg) => {
44
+ if (!msg || !Array.isArray(msg.content)) return msg;
45
+ const content = msg.content.map((block) => {
46
+ if (!block || block.type !== 'tool_result') return block;
47
+ const c = block.content;
48
+ if (typeof c === 'string') {
49
+ const sq = _squeeze(c, cx, acc);
50
+ return sq === c ? block : Object.assign({}, block, { content: sq });
51
+ }
52
+ if (Array.isArray(c)) {
53
+ let changed = false;
54
+ const nc = c.map((b) => {
55
+ if (b && b.type === 'text' && typeof b.text === 'string') {
56
+ const sq = _squeeze(b.text, cx, acc);
57
+ if (sq !== b.text) { changed = true; return Object.assign({}, b, { text: sq }); }
58
+ }
59
+ return b;
60
+ });
61
+ return changed ? Object.assign({}, block, { content: nc }) : block;
62
+ }
63
+ return block;
64
+ });
65
+ return Object.assign({}, msg, { content });
66
+ });
67
+ return { body: Object.assign({}, body, { messages }), stats: acc };
68
+ }
69
+
70
+ function _upstreamPath(upstreamUrl, reqUrl) {
71
+ const base = upstreamUrl.pathname === '/' ? '' : upstreamUrl.pathname.replace(/\/$/, '');
72
+ return base + reqUrl;
73
+ }
74
+
75
+ function createServer(opts) {
76
+ const o = opts || {};
77
+ const cwd = o.cwd;
78
+ const upstreamUrl = new URL(o.upstream || DEFAULT_UPSTREAM);
79
+ const isHttps = upstreamUrl.protocol === 'https:';
80
+ const transport = isHttps ? https : http;
81
+ const onStats = typeof o.onStats === 'function' ? o.onStats : null;
82
+ const cxImpl = o.compressionContext || elision.compressionContext;
83
+ const warnedVolatile = new Set();
84
+
85
+ return http.createServer((req, res) => {
86
+ const chunks = [];
87
+ req.on('error', () => { try { res.destroy(); } catch { /* client gone */ } });
88
+ req.on('data', (c) => chunks.push(c));
89
+ req.on('end', () => {
90
+ let outBody = Buffer.concat(chunks);
91
+ try {
92
+ const cx = cxImpl(cwd);
93
+ if (cx && cx.enabled) {
94
+ const parsed = JSON.parse(outBody.toString('utf-8'));
95
+ const t = compressAnthropicBody(parsed, cx);
96
+ let nextBody = t.body;
97
+ let dirty = t.stats.blocks_compressed > 0;
98
+ if (t.stats.blocks_compressed > 0 && onStats) {
99
+ try { onStats(t.stats); } catch { /* observer must not break proxying */ }
100
+ }
101
+ if (cx.cacheAlign) {
102
+ const a = cacheAlign.alignAnthropicBody(nextBody);
103
+ for (const f of a.findings) {
104
+ if (warnedVolatile.has(f.kind)) continue;
105
+ warnedVolatile.add(f.kind);
106
+ logger.warn('volatile token in system prompt breaks prompt cache', { kind: f.kind });
107
+ }
108
+ if (a.applied) { nextBody = a.body; dirty = true; }
109
+ }
110
+ if (dirty) outBody = Buffer.from(JSON.stringify(nextBody), 'utf-8');
111
+ }
112
+ } catch (err) {
113
+ logger.warn('passthrough (body not transformed)', { cause: err && err.message });
114
+ }
115
+
116
+ const headers = Object.assign({}, req.headers);
117
+ headers.host = upstreamUrl.host;
118
+ headers['content-length'] = String(Buffer.byteLength(outBody));
119
+
120
+ const fwd = transport.request({
121
+ protocol: upstreamUrl.protocol,
122
+ hostname: upstreamUrl.hostname,
123
+ port: upstreamUrl.port || (isHttps ? 443 : 80),
124
+ method: req.method,
125
+ path: _upstreamPath(upstreamUrl, req.url),
126
+ headers,
127
+ }, (up) => {
128
+ res.writeHead(up.statusCode || 502, up.headers);
129
+ up.pipe(res);
130
+ });
131
+ fwd.on('error', (err) => {
132
+ logger.warn('upstream error', { cause: err && err.message });
133
+ if (!res.headersSent) res.writeHead(502, { 'content-type': 'application/json' });
134
+ res.end(JSON.stringify({ error: { type: 'elision_proxy_upstream_error', message: String(err && err.message) } }));
135
+ });
136
+ fwd.end(outBody);
137
+ });
138
+ });
139
+ }
140
+
141
+ function start(opts) {
142
+ const server = createServer(opts);
143
+ return new Promise((resolve, reject) => {
144
+ server.once('error', reject);
145
+ server.listen((opts && opts.port) || 0, '127.0.0.1', () => {
146
+ const { port } = server.address();
147
+ resolve({ server, port, baseUrl: 'http://127.0.0.1:' + port });
148
+ });
149
+ });
150
+ }
151
+
152
+ module.exports = {
153
+ DEFAULT_UPSTREAM,
154
+ proxyEnabled,
155
+ compressAnthropicBody,
156
+ createServer,
157
+ start,
158
+ };