nubos-pilot 1.3.0 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/bin/np-tools/_commands.cjs +2 -0
- package/bin/np-tools/_elision-proxy-entry.cjs +13 -0
- package/bin/np-tools/doctor.cjs +25 -3
- package/bin/np-tools/elision-bench.cjs +67 -0
- package/bin/np-tools/elision-get.cjs +48 -0
- package/bin/np-tools/elision-get.test.cjs +66 -0
- package/bin/np-tools/loop-run-round.cjs +25 -11
- package/bin/np-tools/plan-milestone.cjs +1 -0
- package/bin/np-tools/research-phase.cjs +1 -1
- package/bin/np-tools/resume-work.cjs +9 -0
- package/bin/np-tools/resume-work.test.cjs +21 -1
- package/bin/np-tools/spawn-headless.cjs +62 -9
- package/lib/cache-align.cjs +78 -0
- package/lib/cache-align.test.cjs +69 -0
- package/lib/checkpoint-reconcile.cjs +42 -0
- package/lib/checkpoint-reconcile.test.cjs +106 -0
- package/lib/compress.cjs +495 -0
- package/lib/compress.test.cjs +267 -0
- package/lib/config-defaults.cjs +39 -0
- package/lib/config-schema.cjs +40 -4
- package/lib/elision-bench.cjs +409 -0
- package/lib/elision-bench.test.cjs +89 -0
- package/lib/elision-proxy.cjs +158 -0
- package/lib/elision-proxy.test.cjs +243 -0
- package/lib/elision.cjs +163 -0
- package/lib/elision.test.cjs +143 -0
- package/lib/git.cjs +4 -2
- package/lib/nubosloop.cjs +1 -1
- package/lib/output-steering.cjs +68 -0
- package/lib/output-steering.test.cjs +74 -0
- package/lib/researcher-swarm.cjs +14 -3
- package/lib/runtime/agent-loop.cjs +36 -6
- package/lib/runtime/agent-loop.test.cjs +105 -0
- package/lib/runtime/dispatch.cjs +6 -6
- package/lib/runtime/dispatch.test.cjs +17 -3
- package/lib/runtime/providers/openai-compat.cjs +2 -1
- package/lib/runtime/providers/openai-compat.test.cjs +9 -0
- package/lib/runtime/tools/index.cjs +33 -1
- package/lib/runtime/tools/index.test.cjs +24 -0
- package/lib/schemas/data/elision-entry.v1.json +16 -0
- package/lib/token-cost.cjs +46 -0
- package/lib/token-cost.test.cjs +42 -0
- package/np-tools.cjs +2 -0
- package/package.json +1 -1
- package/workflows/execute-phase.md +10 -2
package/lib/compress.cjs
ADDED
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const MIN_BLOCK_BYTES = 2048;
|
|
4
|
+
const MAX_RATIO = 0.9;
|
|
5
|
+
|
|
6
|
+
const JSON_MIN_ITEMS = 20;
|
|
7
|
+
const JSON_KEEP_HEAD = 5;
|
|
8
|
+
const JSON_KEEP_TAIL = 3;
|
|
9
|
+
const JSON_TARGET_RATIO = 0.3;
|
|
10
|
+
|
|
11
|
+
const SEARCH_MAX_FILES = 15;
|
|
12
|
+
const SEARCH_MAX_PER_FILE = 5;
|
|
13
|
+
const SEARCH_MAX_TOTAL = 30;
|
|
14
|
+
|
|
15
|
+
const DIFF_CONTEXT = 2;
|
|
16
|
+
|
|
17
|
+
const CODE_SIG_RE = /(^|[\s(])(function|class|def|func|fn|interface|struct|impl|enum|namespace|trait|module|public|private|protected|static|async|export|import|const|let|var|type)\b|=>\s*\{|\)\s*\{\s*$/;
|
|
18
|
+
const CODE_OPEN_RE = /\{\s*$/;
|
|
19
|
+
const CODE_KEEP_RE = /\b(throw|TODO|FIXME|XXX|HACK)\b/i;
|
|
20
|
+
const CODE_MIN_LINES = 15;
|
|
21
|
+
const CODE_ISH_RE = /[{}]|;\s*$/;
|
|
22
|
+
const CODE_ISH_RATIO = 0.6;
|
|
23
|
+
|
|
24
|
+
const PY_HEADER_RE = /^\s*(@|async\s+def\s|def\s|class\s|if\s|elif\s|else\b|for\s|while\s|with\s|try\b|except\b|finally\b)/;
|
|
25
|
+
const PY_KEEP_RE = /\b(raise|assert|TODO|FIXME|XXX|HACK)\b/;
|
|
26
|
+
const PY_SIG_RE = /^\s*(@|async\s+def\s|def\s|class\s)/;
|
|
27
|
+
|
|
28
|
+
const PROSE_MIN_BYTES = 800;
|
|
29
|
+
const PROSE_MIN_SENTENCES = 8;
|
|
30
|
+
const PROSE_KEEP_HEAD = 3;
|
|
31
|
+
const PROSE_KEEP_TAIL = 2;
|
|
32
|
+
const PROSE_TARGET_RATIO = 0.4;
|
|
33
|
+
const PROSE_ALPHA_RATIO = 0.6;
|
|
34
|
+
const PROSE_CRITICAL_RE = /\b(ERROR|FAIL(ED|URE)?|FATAL|WARN(ING)?|TODO|FIXME|XXX|HACK|must|important|note|caveat|deprecated|do not|never|always)\b/i;
|
|
35
|
+
|
|
36
|
+
const ERROR_RE = /\b(ERROR|FAIL(ED|URE)?|FATAL|Exception|Traceback|panic|AssertionError|assert(ion)? failed)\b/i;
|
|
37
|
+
const STACK_RE = /^\s+(at\s|File "|\.{3}|[A-Za-z_$][\w$.]*\s*\()/;
|
|
38
|
+
const WARN_RE = /\bWARN(ING)?\b/i;
|
|
39
|
+
const SEARCH_LINE_RE = /^(.+?):(\d+):/;
|
|
40
|
+
const SEARCH_DETECT_RE = /^(?:[^\s:]*\/[^\s:]*|[^\s:]+\.[A-Za-z0-9]+):(\d+):/;
|
|
41
|
+
const VALUE_ERROR_RE = /\b(error|fail|fatal|exception|denied|invalid|timeout)\b/i;
|
|
42
|
+
const GUTTER_RE = /^\s*\d+\t/;
|
|
43
|
+
|
|
44
|
+
function _bytes(s) {
|
|
45
|
+
return Buffer.byteLength(s, 'utf-8');
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function _maskLiterals(line) {
|
|
49
|
+
return line
|
|
50
|
+
.replace(/\/\*.*?\*\//g, '')
|
|
51
|
+
.replace(/\/\/.*$/, '')
|
|
52
|
+
.replace(/#.*$/, '')
|
|
53
|
+
.replace(/"(?:[^"\\]|\\.)*"/g, '""')
|
|
54
|
+
.replace(/'(?:[^'\\]|\\.)*'/g, "''")
|
|
55
|
+
.replace(/`(?:[^`\\]|\\.)*`/g, '``');
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function _maskedLines(text) {
|
|
59
|
+
const noBlock = text.replace(/\/\*[\s\S]*?\*\//g, (m) => m.replace(/[^\n]/g, ' '));
|
|
60
|
+
return noBlock.split('\n').map(_maskLiterals);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function _indentWidth(line) {
|
|
64
|
+
const m = line.match(/^[ \t]*/);
|
|
65
|
+
return m ? m[0].replace(/\t/g, ' ').length : 0;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function _looksLikePython(lines) {
|
|
69
|
+
let nonEmpty = 0;
|
|
70
|
+
let braces = 0;
|
|
71
|
+
let hasSig = false;
|
|
72
|
+
let colonHdr = false;
|
|
73
|
+
for (const l of lines) {
|
|
74
|
+
if (!l.trim()) continue;
|
|
75
|
+
nonEmpty += 1;
|
|
76
|
+
braces += (_maskLiterals(l).match(/[{}]/g) || []).length;
|
|
77
|
+
if (PY_SIG_RE.test(l)) hasSig = true;
|
|
78
|
+
if (/:\s*$/.test(l) && PY_HEADER_RE.test(l)) colonHdr = true;
|
|
79
|
+
}
|
|
80
|
+
if (nonEmpty < CODE_MIN_LINES || !hasSig || !colonHdr) return false;
|
|
81
|
+
return braces <= nonEmpty * 0.1;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function _splitSentences(text) {
|
|
85
|
+
return String(text)
|
|
86
|
+
.split(/(?<=[.!?])\s+/)
|
|
87
|
+
.map((s) => s.trim())
|
|
88
|
+
.filter((s) => s.length > 0);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function _looksLikeProse(text) {
|
|
92
|
+
if (_bytes(text) < PROSE_MIN_BYTES) return false;
|
|
93
|
+
if (_splitSentences(text).length < PROSE_MIN_SENTENCES) return false;
|
|
94
|
+
const alpha = (text.match(/[A-Za-zÀ-ɏ]/g) || []).length;
|
|
95
|
+
const compact = text.replace(/\s/g, '');
|
|
96
|
+
if (!compact.length) return false;
|
|
97
|
+
return alpha >= compact.length * PROSE_ALPHA_RATIO;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function _degutter(text) {
|
|
101
|
+
const lines = text.split('\n');
|
|
102
|
+
let hits = 0;
|
|
103
|
+
let nonEmpty = 0;
|
|
104
|
+
for (const l of lines) {
|
|
105
|
+
if (!l) continue;
|
|
106
|
+
nonEmpty += 1;
|
|
107
|
+
if (GUTTER_RE.test(l)) hits += 1;
|
|
108
|
+
}
|
|
109
|
+
if (nonEmpty === 0 || hits < nonEmpty * 0.8) return text;
|
|
110
|
+
return lines.map((l) => l.replace(GUTTER_RE, '')).join('\n');
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function marker(hash, note) {
|
|
114
|
+
return '⟦elided:' + hash + ' ' + note + ' · retrieve: nubos elision-get ' + hash + '⟧';
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function _marker(hash, dropped, unit, summary) {
|
|
118
|
+
const note = dropped + ' ' + unit + ' elided' + (summary ? ' · ' + summary : '');
|
|
119
|
+
return marker(hash, note);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function _gapNote(n, unit) {
|
|
123
|
+
return '… ' + n + ' ' + unit + (n === 1 ? '' : 's') + ' elided …';
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function _renderGaps(items, keepIdx, joiner, unit) {
|
|
127
|
+
const out = [];
|
|
128
|
+
let gap = 0;
|
|
129
|
+
for (let i = 0; i < items.length; i += 1) {
|
|
130
|
+
if (keepIdx.has(i)) {
|
|
131
|
+
if (gap > 0) { out.push(_gapNote(gap, unit)); gap = 0; }
|
|
132
|
+
out.push(items[i]);
|
|
133
|
+
} else {
|
|
134
|
+
gap += 1;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
if (gap > 0) out.push(_gapNote(gap, unit));
|
|
138
|
+
return out.join(joiner);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function detectType(text) {
|
|
142
|
+
if (typeof text !== 'string' || !text.length) return 'plain';
|
|
143
|
+
const t = _degutter(text);
|
|
144
|
+
const trimmed = t.trim();
|
|
145
|
+
if (trimmed.startsWith('[') || trimmed.startsWith('{')) {
|
|
146
|
+
try {
|
|
147
|
+
const parsed = JSON.parse(trimmed);
|
|
148
|
+
if (Array.isArray(parsed)) return 'json-array';
|
|
149
|
+
return 'plain';
|
|
150
|
+
} catch { return _detectNonJson(t); }
|
|
151
|
+
}
|
|
152
|
+
return _detectNonJson(t);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function _looksLikeCode(lines) {
|
|
156
|
+
if (lines.length < CODE_MIN_LINES) return false;
|
|
157
|
+
let openBlocks = 0;
|
|
158
|
+
let codeish = 0;
|
|
159
|
+
let nonEmpty = 0;
|
|
160
|
+
let open = 0;
|
|
161
|
+
let close = 0;
|
|
162
|
+
for (const l of lines) {
|
|
163
|
+
if (!l.trim()) continue;
|
|
164
|
+
nonEmpty += 1;
|
|
165
|
+
const masked = _maskLiterals(l);
|
|
166
|
+
if (CODE_OPEN_RE.test(masked)) openBlocks += 1;
|
|
167
|
+
open += (masked.match(/\{/g) || []).length;
|
|
168
|
+
close += (masked.match(/\}/g) || []).length;
|
|
169
|
+
if (CODE_SIG_RE.test(l) || CODE_ISH_RE.test(l)) codeish += 1;
|
|
170
|
+
}
|
|
171
|
+
if (openBlocks < 1 || nonEmpty === 0) return false;
|
|
172
|
+
if (Math.abs(open - close) > 2) return false;
|
|
173
|
+
return codeish >= nonEmpty * CODE_ISH_RATIO;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function _detectNonJson(text) {
|
|
177
|
+
if (/^---\s/m.test(text) && /^\+\+\+\s/m.test(text) && /^@@ /m.test(text)) return 'diff';
|
|
178
|
+
const lines = text.split('\n');
|
|
179
|
+
let searchHits = 0;
|
|
180
|
+
for (const l of lines) if (SEARCH_DETECT_RE.test(l)) searchHits += 1;
|
|
181
|
+
if (searchHits >= 5 && searchHits >= lines.length * 0.5) return 'search';
|
|
182
|
+
if (_looksLikeCode(lines) || _looksLikePython(lines)) return 'code';
|
|
183
|
+
let errLines = 0;
|
|
184
|
+
for (const l of lines) if (ERROR_RE.test(l) || WARN_RE.test(l)) errLines += 1;
|
|
185
|
+
if (lines.length >= 10 && errLines >= 1) return 'log';
|
|
186
|
+
if (_looksLikeProse(text)) return 'prose';
|
|
187
|
+
return 'plain';
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
function _isErrorItem(item) {
|
|
191
|
+
if (item == null) return false;
|
|
192
|
+
if (typeof item === 'string') return VALUE_ERROR_RE.test(item);
|
|
193
|
+
if (typeof item === 'object') {
|
|
194
|
+
for (const k of Object.keys(item)) {
|
|
195
|
+
const v = item[k];
|
|
196
|
+
if (typeof v === 'string' && VALUE_ERROR_RE.test(v)) return true;
|
|
197
|
+
if (typeof v === 'string' && VALUE_ERROR_RE.test(k)) return true;
|
|
198
|
+
}
|
|
199
|
+
if (VALUE_ERROR_RE.test(Object.keys(item).join(' '))) return true;
|
|
200
|
+
}
|
|
201
|
+
return false;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function crushJsonArray(text) {
|
|
205
|
+
let arr;
|
|
206
|
+
try { arr = JSON.parse(text); }
|
|
207
|
+
catch { return null; }
|
|
208
|
+
if (!Array.isArray(arr) || arr.length < JSON_MIN_ITEMS) return null;
|
|
209
|
+
const keep = new Set();
|
|
210
|
+
let flagged = 0;
|
|
211
|
+
for (let i = 0; i < arr.length; i += 1) if (_isErrorItem(arr[i])) { keep.add(i); flagged += 1; }
|
|
212
|
+
for (let i = 0; i < Math.min(JSON_KEEP_HEAD, arr.length); i += 1) keep.add(i);
|
|
213
|
+
for (let i = Math.max(0, arr.length - JSON_KEEP_TAIL); i < arr.length; i += 1) keep.add(i);
|
|
214
|
+
const target = Math.max(keep.size, Math.ceil(arr.length * JSON_TARGET_RATIO));
|
|
215
|
+
if (target < arr.length) {
|
|
216
|
+
const step = arr.length / (target - keep.size + 1 || 1);
|
|
217
|
+
for (let s = step; keep.size < target && s < arr.length; s += step) {
|
|
218
|
+
keep.add(Math.floor(s));
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
if (keep.size >= arr.length) return null;
|
|
222
|
+
const kept = [];
|
|
223
|
+
const indices = Array.from(keep).sort((a, b) => a - b);
|
|
224
|
+
for (const i of indices) kept.push(arr[i]);
|
|
225
|
+
return {
|
|
226
|
+
compressed: JSON.stringify(kept), dropped: arr.length - kept.length, unit: 'items',
|
|
227
|
+
summary: flagged ? flagged + ' flagged kept' : 'head+tail+sample kept',
|
|
228
|
+
};
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
function _renderKept(lines, keepIdx) {
|
|
232
|
+
return _renderGaps(lines, keepIdx, '\n', 'line');
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function crushLog(text) {
|
|
236
|
+
const lines = text.split('\n');
|
|
237
|
+
const keep = new Set();
|
|
238
|
+
let errKept = 0;
|
|
239
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
240
|
+
if (ERROR_RE.test(lines[i]) || STACK_RE.test(lines[i])) {
|
|
241
|
+
keep.add(i);
|
|
242
|
+
errKept += 1;
|
|
243
|
+
if (i > 0) keep.add(i - 1);
|
|
244
|
+
if (i + 1 < lines.length) keep.add(i + 1);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
for (let i = 0; i < Math.min(3, lines.length); i += 1) keep.add(i);
|
|
248
|
+
for (let i = Math.max(0, lines.length - 5); i < lines.length; i += 1) keep.add(i);
|
|
249
|
+
if (keep.size >= lines.length) return null;
|
|
250
|
+
return {
|
|
251
|
+
compressed: _renderKept(lines, keep), dropped: lines.length - keep.size, unit: 'lines',
|
|
252
|
+
summary: errKept ? errKept + ' error/stack line' + (errKept === 1 ? '' : 's') + ' kept' : 'head+tail kept',
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
function crushSearch(text) {
|
|
257
|
+
const lines = text.split('\n');
|
|
258
|
+
const byFile = new Map();
|
|
259
|
+
const order = [];
|
|
260
|
+
let parsed = 0;
|
|
261
|
+
for (const l of lines) {
|
|
262
|
+
const m = l.match(SEARCH_LINE_RE);
|
|
263
|
+
if (!m) continue;
|
|
264
|
+
parsed += 1;
|
|
265
|
+
const file = m[1];
|
|
266
|
+
if (!byFile.has(file)) { byFile.set(file, []); order.push(file); }
|
|
267
|
+
byFile.get(file).push(l);
|
|
268
|
+
}
|
|
269
|
+
if (!parsed) return null;
|
|
270
|
+
const errorFiles = order.filter((f) => byFile.get(f).some((l) => VALUE_ERROR_RE.test(l)));
|
|
271
|
+
const headFiles = order.slice(0, SEARCH_MAX_FILES);
|
|
272
|
+
const files = headFiles.slice();
|
|
273
|
+
for (const f of errorFiles) if (!files.includes(f)) files.push(f);
|
|
274
|
+
const out = [];
|
|
275
|
+
let kept = 0;
|
|
276
|
+
for (const file of files) {
|
|
277
|
+
const hits = byFile.get(file);
|
|
278
|
+
const errs = hits.filter((l) => VALUE_ERROR_RE.test(l));
|
|
279
|
+
const rest = hits.filter((l) => !VALUE_ERROR_RE.test(l));
|
|
280
|
+
let sampled;
|
|
281
|
+
if (rest.length <= SEARCH_MAX_PER_FILE) {
|
|
282
|
+
sampled = rest;
|
|
283
|
+
} else {
|
|
284
|
+
sampled = [rest[0]];
|
|
285
|
+
const mid = rest.slice(1, -1);
|
|
286
|
+
const step = mid.length / (SEARCH_MAX_PER_FILE - 1 || 1);
|
|
287
|
+
for (let s = step; sampled.length < SEARCH_MAX_PER_FILE - 1 && s < mid.length; s += step) {
|
|
288
|
+
sampled.push(mid[Math.floor(s)]);
|
|
289
|
+
}
|
|
290
|
+
sampled.push(rest[rest.length - 1]);
|
|
291
|
+
}
|
|
292
|
+
for (const h of errs) { out.push(h); kept += 1; }
|
|
293
|
+
for (const h of sampled) { if (kept >= SEARCH_MAX_TOTAL) break; out.push(h); kept += 1; }
|
|
294
|
+
}
|
|
295
|
+
if (kept >= parsed && files.length === order.length) return null;
|
|
296
|
+
return {
|
|
297
|
+
compressed: out.join('\n'), dropped: parsed - kept, unit: 'matches',
|
|
298
|
+
summary: files.length + '/' + order.length + ' files' + (errorFiles.length ? ', ' + errorFiles.length + ' with errors' : ''),
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
function crushDiff(text) {
|
|
303
|
+
const lines = text.split('\n');
|
|
304
|
+
const keep = new Set();
|
|
305
|
+
let changed = 0;
|
|
306
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
307
|
+
const l = lines[i];
|
|
308
|
+
const isChange = /^[+-]/.test(l) && !/^(\+\+\+|---)/.test(l);
|
|
309
|
+
if (/^(\+\+\+|---|@@|diff |index )/.test(l) || /^[+-]/.test(l)) {
|
|
310
|
+
keep.add(i);
|
|
311
|
+
if (isChange) changed += 1;
|
|
312
|
+
for (let d = 1; d <= DIFF_CONTEXT; d += 1) {
|
|
313
|
+
if (i - d >= 0 && /^[ ]/.test(lines[i - d])) keep.add(i - d);
|
|
314
|
+
if (i + d < lines.length && /^[ ]/.test(lines[i + d])) keep.add(i + d);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
if (keep.size >= lines.length) return null;
|
|
319
|
+
return {
|
|
320
|
+
compressed: _renderKept(lines, keep), dropped: lines.length - keep.size, unit: 'lines',
|
|
321
|
+
summary: 'all ' + changed + ' changed line' + (changed === 1 ? '' : 's') + ' kept',
|
|
322
|
+
};
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
function crushPython(text) {
|
|
326
|
+
const lines = text.split('\n');
|
|
327
|
+
const keep = new Set();
|
|
328
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
329
|
+
const l = lines[i];
|
|
330
|
+
if (!l.trim()) continue;
|
|
331
|
+
if (_indentWidth(l) === 0 || PY_HEADER_RE.test(l) || PY_KEEP_RE.test(l) || ERROR_RE.test(l)) {
|
|
332
|
+
keep.add(i);
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
for (let i = 0; i < Math.min(2, lines.length); i += 1) keep.add(i);
|
|
336
|
+
for (let i = Math.max(0, lines.length - 2); i < lines.length; i += 1) keep.add(i);
|
|
337
|
+
if (keep.size >= lines.length) return null;
|
|
338
|
+
let defs = 0;
|
|
339
|
+
for (const i of keep) if (PY_SIG_RE.test(lines[i])) defs += 1;
|
|
340
|
+
return {
|
|
341
|
+
compressed: _renderKept(lines, keep), dropped: lines.length - keep.size, unit: 'lines',
|
|
342
|
+
summary: defs + ' def/class kept',
|
|
343
|
+
};
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
function crushCode(text) {
|
|
347
|
+
const lines = text.split('\n');
|
|
348
|
+
if (_looksLikePython(lines)) return crushPython(text);
|
|
349
|
+
const masked = _maskedLines(text);
|
|
350
|
+
const keep = new Set();
|
|
351
|
+
let depth = 0;
|
|
352
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
353
|
+
const l = lines[i];
|
|
354
|
+
const m = masked[i] || '';
|
|
355
|
+
const opens = (m.match(/\{/g) || []).length;
|
|
356
|
+
const closes = (m.match(/\}/g) || []).length;
|
|
357
|
+
const structural = opens !== closes;
|
|
358
|
+
if (depth === 0 || structural || CODE_KEEP_RE.test(l) || ERROR_RE.test(l)) keep.add(i);
|
|
359
|
+
depth += opens - closes;
|
|
360
|
+
if (depth < 0) depth = 0;
|
|
361
|
+
}
|
|
362
|
+
for (let i = 0; i < Math.min(2, lines.length); i += 1) keep.add(i);
|
|
363
|
+
for (let i = Math.max(0, lines.length - 2); i < lines.length; i += 1) keep.add(i);
|
|
364
|
+
if (keep.size >= lines.length) return null;
|
|
365
|
+
let sigs = 0;
|
|
366
|
+
for (const i of keep) if (CODE_SIG_RE.test(lines[i])) sigs += 1;
|
|
367
|
+
return {
|
|
368
|
+
compressed: _renderKept(lines, keep), dropped: lines.length - keep.size, unit: 'lines',
|
|
369
|
+
summary: sigs + ' signature' + (sigs === 1 ? '' : 's') + ' kept',
|
|
370
|
+
};
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
function crushProse(text) {
|
|
374
|
+
const sents = _splitSentences(text);
|
|
375
|
+
if (sents.length < PROSE_MIN_SENTENCES) return null;
|
|
376
|
+
const keep = new Set();
|
|
377
|
+
for (let i = 0; i < Math.min(PROSE_KEEP_HEAD, sents.length); i += 1) keep.add(i);
|
|
378
|
+
for (let i = Math.max(0, sents.length - PROSE_KEEP_TAIL); i < sents.length; i += 1) keep.add(i);
|
|
379
|
+
for (let i = 0; i < sents.length; i += 1) if (PROSE_CRITICAL_RE.test(sents[i])) keep.add(i);
|
|
380
|
+
const target = Math.max(keep.size, Math.ceil(sents.length * PROSE_TARGET_RATIO));
|
|
381
|
+
if (target < sents.length) {
|
|
382
|
+
const step = sents.length / (target - keep.size + 1 || 1);
|
|
383
|
+
for (let s = step; keep.size < target && s < sents.length; s += step) keep.add(Math.floor(s));
|
|
384
|
+
}
|
|
385
|
+
if (keep.size >= sents.length) return null;
|
|
386
|
+
let key = 0;
|
|
387
|
+
for (let i = 0; i < sents.length; i += 1) if (keep.has(i) && PROSE_CRITICAL_RE.test(sents[i])) key += 1;
|
|
388
|
+
return {
|
|
389
|
+
compressed: _renderGaps(sents, keep, ' ', 'sentence'), dropped: sents.length - keep.size, unit: 'sentences',
|
|
390
|
+
summary: key ? key + ' key sentence' + (key === 1 ? '' : 's') + ' kept' : 'head+tail kept',
|
|
391
|
+
};
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
const _CRUSHERS = {
|
|
395
|
+
'json-array': crushJsonArray,
|
|
396
|
+
log: crushLog,
|
|
397
|
+
search: crushSearch,
|
|
398
|
+
diff: crushDiff,
|
|
399
|
+
code: crushCode,
|
|
400
|
+
prose: crushProse,
|
|
401
|
+
};
|
|
402
|
+
|
|
403
|
+
function compressBlock(text, opts) {
|
|
404
|
+
const o = opts || {};
|
|
405
|
+
const minBytes = Number.isFinite(o.minBlockBytes) ? o.minBlockBytes : MIN_BLOCK_BYTES;
|
|
406
|
+
const unchanged = { compressed: text, ratio: 1, dropped: 0, type: 'plain', changed: false };
|
|
407
|
+
if (typeof text !== 'string' || _bytes(text) < minBytes) return unchanged;
|
|
408
|
+
const work = _degutter(text);
|
|
409
|
+
const type = detectType(work);
|
|
410
|
+
const crusher = _CRUSHERS[type];
|
|
411
|
+
if (!crusher) return Object.assign({}, unchanged, { type });
|
|
412
|
+
const result = crusher(work);
|
|
413
|
+
if (!result) return Object.assign({}, unchanged, { type });
|
|
414
|
+
const ratio = text.length ? result.compressed.length / text.length : 1;
|
|
415
|
+
if (ratio > MAX_RATIO) return Object.assign({}, unchanged, { type });
|
|
416
|
+
let body = result.compressed;
|
|
417
|
+
if (typeof o.store === 'function') {
|
|
418
|
+
const hash = o.store(text, type);
|
|
419
|
+
if (!hash) return Object.assign({}, unchanged, { type });
|
|
420
|
+
body = body + '\n' + _marker(hash, result.dropped, result.unit, result.summary);
|
|
421
|
+
}
|
|
422
|
+
return { compressed: body, ratio, dropped: result.dropped, type, changed: true };
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
const _FENCE_RE = /(```[^\n]*\n)([\s\S]*?)(\n```)/g;
|
|
426
|
+
|
|
427
|
+
function compressPrompt(blob, opts) {
|
|
428
|
+
const o = opts || {};
|
|
429
|
+
if (typeof blob !== 'string' || !blob.length) {
|
|
430
|
+
return { text: blob, stats: { bytes_before: 0, bytes_after: 0, blocks_compressed: 0 } };
|
|
431
|
+
}
|
|
432
|
+
const before = _bytes(blob);
|
|
433
|
+
let blocks = 0;
|
|
434
|
+
const text = blob.replace(_FENCE_RE, (full, open, inner, close) => {
|
|
435
|
+
const res = compressBlock(inner, o);
|
|
436
|
+
if (!res.changed) return full;
|
|
437
|
+
blocks += 1;
|
|
438
|
+
return open + res.compressed + close;
|
|
439
|
+
});
|
|
440
|
+
return {
|
|
441
|
+
text,
|
|
442
|
+
stats: { bytes_before: before, bytes_after: _bytes(text), blocks_compressed: blocks },
|
|
443
|
+
};
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
function crushLogToBudget(text, maxBytes) {
|
|
447
|
+
if (typeof text !== 'string') return '';
|
|
448
|
+
if (_bytes(text) <= maxBytes) return text;
|
|
449
|
+
const lines = text.split('\n');
|
|
450
|
+
const errIdx = [];
|
|
451
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
452
|
+
if (ERROR_RE.test(lines[i]) || STACK_RE.test(lines[i])) errIdx.push(i);
|
|
453
|
+
}
|
|
454
|
+
const keep = new Set();
|
|
455
|
+
let used = 0;
|
|
456
|
+
const budget = Math.max(256, maxBytes - 80);
|
|
457
|
+
for (let j = errIdx.length - 1; j >= 0; j -= 1) {
|
|
458
|
+
const i = errIdx[j];
|
|
459
|
+
const cost = _bytes(lines[i]) + 1;
|
|
460
|
+
if (used + cost > budget * 0.6) break;
|
|
461
|
+
keep.add(i);
|
|
462
|
+
used += cost;
|
|
463
|
+
}
|
|
464
|
+
for (let i = lines.length - 1; i >= 0; i -= 1) {
|
|
465
|
+
if (keep.has(i)) continue;
|
|
466
|
+
const cost = _bytes(lines[i]) + 1;
|
|
467
|
+
if (used + cost > budget) break;
|
|
468
|
+
keep.add(i);
|
|
469
|
+
used += cost;
|
|
470
|
+
}
|
|
471
|
+
let out = _renderKept(lines, keep);
|
|
472
|
+
while (_bytes(out) > maxBytes && out.length) {
|
|
473
|
+
const nl = out.indexOf('\n');
|
|
474
|
+
if (nl < 0) return out.slice(0, maxBytes);
|
|
475
|
+
out = out.slice(nl + 1);
|
|
476
|
+
}
|
|
477
|
+
return out;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
module.exports = {
|
|
481
|
+
MIN_BLOCK_BYTES,
|
|
482
|
+
MAX_RATIO,
|
|
483
|
+
marker,
|
|
484
|
+
detectType,
|
|
485
|
+
crushJsonArray,
|
|
486
|
+
crushLog,
|
|
487
|
+
crushSearch,
|
|
488
|
+
crushDiff,
|
|
489
|
+
crushCode,
|
|
490
|
+
crushPython,
|
|
491
|
+
crushProse,
|
|
492
|
+
compressBlock,
|
|
493
|
+
compressPrompt,
|
|
494
|
+
crushLogToBudget,
|
|
495
|
+
};
|