@shrkcrft/compress 0.1.0-alpha.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +21 -0
- package/dist/cache/align-volatile-tokens.d.ts +13 -0
- package/dist/cache/align-volatile-tokens.d.ts.map +1 -0
- package/dist/cache/align-volatile-tokens.js +51 -0
- package/dist/cache/alignment-map.d.ts +23 -0
- package/dist/cache/alignment-map.d.ts.map +1 -0
- package/dist/cache/alignment-map.js +1 -0
- package/dist/cache/alignment-result.d.ts +11 -0
- package/dist/cache/alignment-result.d.ts.map +1 -0
- package/dist/cache/alignment-result.js +1 -0
- package/dist/cache/detect-volatile-tokens.d.ts +10 -0
- package/dist/cache/detect-volatile-tokens.d.ts.map +1 -0
- package/dist/cache/detect-volatile-tokens.js +41 -0
- package/dist/cache/placeholder.d.ts +28 -0
- package/dist/cache/placeholder.d.ts.map +1 -0
- package/dist/cache/placeholder.js +0 -0
- package/dist/cache/restore-volatile-tokens.d.ts +10 -0
- package/dist/cache/restore-volatile-tokens.d.ts.map +1 -0
- package/dist/cache/restore-volatile-tokens.js +21 -0
- package/dist/cache/volatile-classify.d.ts +11 -0
- package/dist/cache/volatile-classify.d.ts.map +1 -0
- package/dist/cache/volatile-classify.js +35 -0
- package/dist/cache/volatile-kind.d.ts +13 -0
- package/dist/cache/volatile-kind.d.ts.map +1 -0
- package/dist/cache/volatile-kind.js +13 -0
- package/dist/cache/volatile-token.d.ts +14 -0
- package/dist/cache/volatile-token.d.ts.map +1 -0
- package/dist/cache/volatile-token.js +1 -0
- package/dist/ccr/ccr-entry.d.ts +13 -0
- package/dist/ccr/ccr-entry.d.ts.map +1 -0
- package/dist/ccr/ccr-entry.js +1 -0
- package/dist/ccr/ccr-key.d.ts +9 -0
- package/dist/ccr/ccr-key.d.ts.map +1 -0
- package/dist/ccr/ccr-key.js +19 -0
- package/dist/ccr/ccr-marker.d.ts +23 -0
- package/dist/ccr/ccr-marker.d.ts.map +1 -0
- package/dist/ccr/ccr-marker.js +30 -0
- package/dist/ccr/ccr-store.d.ts +18 -0
- package/dist/ccr/ccr-store.d.ts.map +1 -0
- package/dist/ccr/ccr-store.js +1 -0
- package/dist/ccr/file-ccr-store.d.ts +19 -0
- package/dist/ccr/file-ccr-store.d.ts.map +1 -0
- package/dist/ccr/file-ccr-store.js +53 -0
- package/dist/ccr/in-memory-ccr-store.d.ts +21 -0
- package/dist/ccr/in-memory-ccr-store.d.ts.map +1 -0
- package/dist/ccr/in-memory-ccr-store.js +45 -0
- package/dist/ccr/ttl-file-ccr-store.d.ts +43 -0
- package/dist/ccr/ttl-file-ccr-store.d.ts.map +1 -0
- package/dist/ccr/ttl-file-ccr-store.js +117 -0
- package/dist/code/compress-code.d.ts +4 -0
- package/dist/code/compress-code.d.ts.map +1 -0
- package/dist/code/compress-code.js +294 -0
- package/dist/compress-content.d.ts +11 -0
- package/dist/compress-content.d.ts.map +1 -0
- package/dist/compress-content.js +79 -0
- package/dist/content/content-type.d.ts +28 -0
- package/dist/content/content-type.d.ts.map +1 -0
- package/dist/content/content-type.js +28 -0
- package/dist/content/detect-content-type.d.ts +9 -0
- package/dist/content/detect-content-type.d.ts.map +1 -0
- package/dist/content/detect-content-type.js +184 -0
- package/dist/content/segment.d.ts +21 -0
- package/dist/content/segment.d.ts.map +1 -0
- package/dist/content/segment.js +117 -0
- package/dist/index.d.ts +61 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +49 -0
- package/dist/json/compress-json.d.ts +18 -0
- package/dist/json/compress-json.d.ts.map +1 -0
- package/dist/json/compress-json.js +139 -0
- package/dist/json/render-compact-json.d.ts +10 -0
- package/dist/json/render-compact-json.d.ts.map +1 -0
- package/dist/json/render-compact-json.js +18 -0
- package/dist/relevance/bm25.d.ts +26 -0
- package/dist/relevance/bm25.d.ts.map +1 -0
- package/dist/relevance/bm25.js +115 -0
- package/dist/result/compress-options.d.ts +26 -0
- package/dist/result/compress-options.d.ts.map +1 -0
- package/dist/result/compress-options.js +1 -0
- package/dist/result/compression-result.d.ts +26 -0
- package/dist/result/compression-result.d.ts.map +1 -0
- package/dist/result/compression-result.js +1 -0
- package/dist/result/compression-strategy.d.ts +30 -0
- package/dist/result/compression-strategy.d.ts.map +1 -0
- package/dist/result/compression-strategy.js +30 -0
- package/dist/table/adaptive-size.d.ts +46 -0
- package/dist/table/adaptive-size.d.ts.map +1 -0
- package/dist/table/adaptive-size.js +170 -0
- package/dist/table/apply-value-dictionaries.d.ts +30 -0
- package/dist/table/apply-value-dictionaries.d.ts.map +1 -0
- package/dist/table/apply-value-dictionaries.js +99 -0
- package/dist/table/column-presence.d.ts +20 -0
- package/dist/table/column-presence.d.ts.map +1 -0
- package/dist/table/column-presence.js +52 -0
- package/dist/table/columnar-json.d.ts +24 -0
- package/dist/table/columnar-json.d.ts.map +1 -0
- package/dist/table/columnar-json.js +83 -0
- package/dist/table/columnar-table.d.ts +24 -0
- package/dist/table/columnar-table.d.ts.map +1 -0
- package/dist/table/columnar-table.js +1 -0
- package/dist/table/compact-object-array.d.ts +12 -0
- package/dist/table/compact-object-array.d.ts.map +1 -0
- package/dist/table/compact-object-array.js +88 -0
- package/dist/table/field-spec.d.ts +13 -0
- package/dist/table/field-spec.d.ts.map +1 -0
- package/dist/table/field-spec.js +1 -0
- package/dist/table/object-map.d.ts +28 -0
- package/dist/table/object-map.d.ts.map +1 -0
- package/dist/table/object-map.js +119 -0
- package/dist/table/render-table.d.ts +11 -0
- package/dist/table/render-table.d.ts.map +1 -0
- package/dist/table/render-table.js +39 -0
- package/dist/table/sample-object-array.d.ts +11 -0
- package/dist/table/sample-object-array.d.ts.map +1 -0
- package/dist/table/sample-object-array.js +171 -0
- package/dist/table/sample-options.d.ts +29 -0
- package/dist/table/sample-options.d.ts.map +1 -0
- package/dist/table/sample-options.js +1 -0
- package/dist/table/sampled-table.d.ts +33 -0
- package/dist/table/sampled-table.d.ts.map +1 -0
- package/dist/table/sampled-table.js +8 -0
- package/dist/table/table-compaction.d.ts +19 -0
- package/dist/table/table-compaction.d.ts.map +1 -0
- package/dist/table/table-compaction.js +1 -0
- package/dist/table/table-formats.d.ts +23 -0
- package/dist/table/table-formats.d.ts.map +1 -0
- package/dist/table/table-formats.js +233 -0
- package/dist/text/compress-diff.d.ts +20 -0
- package/dist/text/compress-diff.d.ts.map +1 -0
- package/dist/text/compress-diff.js +344 -0
- package/dist/text/compress-lines.d.ts +12 -0
- package/dist/text/compress-lines.d.ts.map +1 -0
- package/dist/text/compress-lines.js +44 -0
- package/dist/text/compress-log.d.ts +12 -0
- package/dist/text/compress-log.d.ts.map +1 -0
- package/dist/text/compress-log.js +202 -0
- package/dist/text/compress-markdown.d.ts +15 -0
- package/dist/text/compress-markdown.d.ts.map +1 -0
- package/dist/text/compress-markdown.js +96 -0
- package/dist/text/compress-search.d.ts +11 -0
- package/dist/text/compress-search.d.ts.map +1 -0
- package/dist/text/compress-search.js +78 -0
- package/dist/text/finalize.d.ts +21 -0
- package/dist/text/finalize.d.ts.map +1 -0
- package/dist/text/finalize.js +54 -0
- package/dist/text/line-utils.d.ts +20 -0
- package/dist/text/line-utils.d.ts.map +1 -0
- package/dist/text/line-utils.js +65 -0
- package/dist/text/lockfile-names.d.ts +3 -0
- package/dist/text/lockfile-names.d.ts.map +1 -0
- package/dist/text/lockfile-names.js +33 -0
- package/dist/text/log-template.d.ts +31 -0
- package/dist/text/log-template.d.ts.map +1 -0
- package/dist/text/log-template.js +239 -0
- package/dist/tokens/estimate-tokens.d.ts +17 -0
- package/dist/tokens/estimate-tokens.d.ts.map +1 -0
- package/dist/tokens/estimate-tokens.js +53 -0
- package/dist/tokens/token-savings.d.ts +20 -0
- package/dist/tokens/token-savings.d.ts.map +1 -0
- package/dist/tokens/token-savings.js +1 -0
- package/package.json +52 -0
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
import { EContentType } from "../content/content-type.js";
|
|
2
|
+
import { ECompressionStrategy } from "../result/compression-strategy.js";
|
|
3
|
+
import { splitLines, queryTokens, queryOverlap, elide } from "./line-utils.js";
|
|
4
|
+
import { finalizeLossy, passthroughResult } from "./finalize.js";
|
|
5
|
+
import { formatCcrMarker } from "../ccr/ccr-marker.js";
|
|
6
|
+
import { isLockfileName } from "./lockfile-names.js";
|
|
7
|
+
// Header tokens that are unambiguous at column 0 (a hunk body line starts with
|
|
8
|
+
// ` `/`+`/`-`, never these). `--- `/`+++ ` are handled separately because they
|
|
9
|
+
// collide with deleted/added content lines.
|
|
10
|
+
const SAFE_HEADER_RE = /^(?:diff --git |index |new file|deleted file|old mode|new mode|similarity |rename |copy )/;
|
|
11
|
+
const HUNK_HEADER_RE = /^@@ /;
|
|
12
|
+
function isChangeLine(line) {
|
|
13
|
+
if (line.startsWith('+') && !line.startsWith('+++'))
|
|
14
|
+
return true;
|
|
15
|
+
if (line.startsWith('-') && !line.startsWith('---'))
|
|
16
|
+
return true;
|
|
17
|
+
return false;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Reduce a unified diff to its highest-signal lines. Two passes compose:
|
|
21
|
+
*
|
|
22
|
+
* 1. **Diff-noise offload** (this wrapper): lockfile sections
|
|
23
|
+
* (`package-lock.json` & friends) collapse to a one-line marker, and
|
|
24
|
+
* whitespace-only hunks (pure reindentation) collapse to a marker — the
|
|
25
|
+
* single largest sources of useless diff tokens. Both are CCR-recoverable.
|
|
26
|
+
* 2. **Core hunk compression** ({@link compressDiffCore}): the remaining real
|
|
27
|
+
* changes keep their changed lines plus a tight context window, capped per
|
|
28
|
+
* file.
|
|
29
|
+
*
|
|
30
|
+
* The offload pass only engages when a `diff --git` section is actually a
|
|
31
|
+
* lockfile or contains a whitespace-only hunk; every other diff routes straight
|
|
32
|
+
* to the core compressor, byte-identical to before. Recoverable via CCR; output
|
|
33
|
+
* favours LLM readability over `git apply` fidelity.
|
|
34
|
+
*/
|
|
35
|
+
export function compressDiff(text, opts = {}) {
|
|
36
|
+
const lines = splitLines(text);
|
|
37
|
+
const sections = segmentDiffSections(lines);
|
|
38
|
+
// Only take the offload path when the diff cleanly segments into `diff --git`
|
|
39
|
+
// sections AND at least one is noise. Anything else stays on the core path.
|
|
40
|
+
const hasNoise = sections !== null &&
|
|
41
|
+
sections.some((s) => s.isLockfile || s.hunks.some((h) => h.whitespaceOnly));
|
|
42
|
+
if (!sections || !hasNoise)
|
|
43
|
+
return compressDiffCore(text, opts);
|
|
44
|
+
const out = [];
|
|
45
|
+
let lockfileCount = 0;
|
|
46
|
+
let wsHunkCount = 0;
|
|
47
|
+
for (const s of sections) {
|
|
48
|
+
if (s.isLockfile) {
|
|
49
|
+
lockfileCount += 1;
|
|
50
|
+
const sectionText = s.lines.join('\n');
|
|
51
|
+
const elided = s.lines.length - s.headerLines.length;
|
|
52
|
+
const key = opts.store ? opts.store.put(sectionText) : undefined;
|
|
53
|
+
const marker = `[lockfile ${s.basename}: ${plural(elided, 'line')} elided${key ? ` ${formatCcrMarker(key)}` : ''}]`;
|
|
54
|
+
out.push([...s.headerLines, marker].join('\n'));
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
const wsHunks = s.hunks.filter((h) => h.whitespaceOnly);
|
|
58
|
+
const normalHunks = s.hunks.filter((h) => !h.whitespaceOnly);
|
|
59
|
+
if (wsHunks.length === 0) {
|
|
60
|
+
// No noise in this section — compress it with the core pass (per-section
|
|
61
|
+
// CCR is handled once over the whole diff by finalizeLossy below).
|
|
62
|
+
out.push(compressDiffCore(s.lines.join('\n'), { ...opts, store: undefined }).compressed);
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
wsHunkCount += wsHunks.length;
|
|
66
|
+
const normalSection = [...s.headerLines, ...normalHunks.flatMap((h) => h.lines)];
|
|
67
|
+
const compressedNormal = normalHunks.length > 0
|
|
68
|
+
? compressDiffCore(normalSection.join('\n'), { ...opts, store: undefined }).compressed
|
|
69
|
+
: s.headerLines.join('\n');
|
|
70
|
+
const wsText = wsHunks.flatMap((h) => h.lines).join('\n');
|
|
71
|
+
const wsLines = wsHunks.reduce((n, h) => n + h.lines.length, 0);
|
|
72
|
+
const key = opts.store ? opts.store.put(wsText) : undefined;
|
|
73
|
+
const wsMarker = `[whitespace-only: ${plural(wsHunks.length, 'hunk')}, ${plural(wsLines, 'line')} elided${key ? ` ${formatCcrMarker(key)}` : ''}]`;
|
|
74
|
+
out.push([compressedNormal, wsMarker].join('\n'));
|
|
75
|
+
}
|
|
76
|
+
const note = `full diff: ${plural(lockfileCount, 'lockfile')} + ${plural(wsHunkCount, 'whitespace hunk')} offloaded`;
|
|
77
|
+
return finalizeLossy({
|
|
78
|
+
original: text,
|
|
79
|
+
body: out.join('\n'),
|
|
80
|
+
contentType: EContentType.GitDiff,
|
|
81
|
+
strategy: ECompressionStrategy.Diff,
|
|
82
|
+
opts,
|
|
83
|
+
note,
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
function plural(n, noun) {
|
|
87
|
+
return `${n} ${noun}${n === 1 ? '' : 's'}`;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Split a diff into per-file sections at `diff --git` boundaries. Returns null
|
|
91
|
+
* when the diff doesn't cleanly start with a `diff --git` section (preamble,
|
|
92
|
+
* headerless `diff -u`, etc.) so the caller falls back to the core path rather
|
|
93
|
+
* than risk a fragile segmentation.
|
|
94
|
+
*/
|
|
95
|
+
function segmentDiffSections(lines) {
|
|
96
|
+
const starts = [];
|
|
97
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
98
|
+
if ((lines[i] ?? '').startsWith('diff --git '))
|
|
99
|
+
starts.push(i);
|
|
100
|
+
}
|
|
101
|
+
if (starts.length === 0 || starts[0] !== 0)
|
|
102
|
+
return null;
|
|
103
|
+
const sections = [];
|
|
104
|
+
for (let k = 0; k < starts.length; k += 1) {
|
|
105
|
+
const begin = starts[k];
|
|
106
|
+
const end = k + 1 < starts.length ? starts[k + 1] : lines.length;
|
|
107
|
+
const sectionLines = lines.slice(begin, end);
|
|
108
|
+
sections.push(buildSection(sectionLines));
|
|
109
|
+
}
|
|
110
|
+
return sections;
|
|
111
|
+
}
|
|
112
|
+
function buildSection(sectionLines) {
|
|
113
|
+
let firstHunk = sectionLines.findIndex((l) => HUNK_HEADER_RE.test(l));
|
|
114
|
+
if (firstHunk < 0)
|
|
115
|
+
firstHunk = sectionLines.length;
|
|
116
|
+
const headerLines = sectionLines.slice(0, firstHunk);
|
|
117
|
+
const hunks = [];
|
|
118
|
+
let cur = null;
|
|
119
|
+
for (let i = firstHunk; i < sectionLines.length; i += 1) {
|
|
120
|
+
const line = sectionLines[i] ?? '';
|
|
121
|
+
if (HUNK_HEADER_RE.test(line)) {
|
|
122
|
+
if (cur)
|
|
123
|
+
hunks.push({ lines: cur, whitespaceOnly: isWhitespaceOnlyHunk(cur) });
|
|
124
|
+
cur = [line];
|
|
125
|
+
}
|
|
126
|
+
else if (cur) {
|
|
127
|
+
cur.push(line);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
if (cur)
|
|
131
|
+
hunks.push({ lines: cur, whitespaceOnly: isWhitespaceOnlyHunk(cur) });
|
|
132
|
+
const basename = sectionPath(headerLines);
|
|
133
|
+
return {
|
|
134
|
+
lines: sectionLines,
|
|
135
|
+
headerLines,
|
|
136
|
+
hunks,
|
|
137
|
+
basename,
|
|
138
|
+
isLockfile: basename.length > 0 && isLockfileName(basename),
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
/** The changed file's basename, parsed from `+++ b/<path>` or the git header. */
|
|
142
|
+
function sectionPath(headerLines) {
|
|
143
|
+
let path = '';
|
|
144
|
+
for (const line of headerLines) {
|
|
145
|
+
const plus = /^\+\+\+ b\/(.*)$/.exec(line);
|
|
146
|
+
if (plus) {
|
|
147
|
+
path = plus[1] ?? '';
|
|
148
|
+
break;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
if (!path) {
|
|
152
|
+
const git = /^diff --git a\/.+ b\/(.+)$/.exec(headerLines[0] ?? '');
|
|
153
|
+
if (git)
|
|
154
|
+
path = git[1] ?? '';
|
|
155
|
+
}
|
|
156
|
+
// `+++ b/path` is clean inside a `diff --git` section, but guard a stray tab.
|
|
157
|
+
path = (path.split('\t')[0] ?? '').trim();
|
|
158
|
+
return path.split('/').pop() ?? '';
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* True when a hunk's only real change is whitespace: the normalized contents of
|
|
162
|
+
* its removed lines and added lines are identical multisets (so each `-x` is
|
|
163
|
+
* answered by a `+x` differing only in leading/trailing/internal whitespace).
|
|
164
|
+
* Pure-context hunks (no changes) are NOT whitespace-only — they're left for the
|
|
165
|
+
* core pass.
|
|
166
|
+
*/
|
|
167
|
+
function isWhitespaceOnlyHunk(hunkLines) {
|
|
168
|
+
const norm = (s) => s.replace(/\s+/g, ' ').trim();
|
|
169
|
+
const removed = [];
|
|
170
|
+
const added = [];
|
|
171
|
+
for (const line of hunkLines) {
|
|
172
|
+
if (line.startsWith('-') && !line.startsWith('---'))
|
|
173
|
+
removed.push(norm(line.slice(1)));
|
|
174
|
+
else if (line.startsWith('+') && !line.startsWith('+++'))
|
|
175
|
+
added.push(norm(line.slice(1)));
|
|
176
|
+
}
|
|
177
|
+
if (removed.length === 0 || removed.length !== added.length)
|
|
178
|
+
return false;
|
|
179
|
+
const a = [...removed].sort();
|
|
180
|
+
const b = [...added].sort();
|
|
181
|
+
return a.every((v, i) => v === b[i]);
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Reduce a unified diff to the changed lines plus a tight context window,
|
|
185
|
+
* capping hunks per file (first + last + highest-scoring kept). File headers
|
|
186
|
+
* are preserved so the diff stays attributable; trimmed context and dropped
|
|
187
|
+
* hunks are elided. Recoverable via CCR. Output favours LLM readability over
|
|
188
|
+
* `git apply` fidelity.
|
|
189
|
+
*/
|
|
190
|
+
function compressDiffCore(text, opts = {}) {
|
|
191
|
+
const lines = splitLines(text);
|
|
192
|
+
const minLines = opts.minLines ?? 12;
|
|
193
|
+
if (lines.length < minLines)
|
|
194
|
+
return passthroughResult(text, EContentType.GitDiff);
|
|
195
|
+
const tokens = queryTokens(opts.query);
|
|
196
|
+
const maxContext = 3;
|
|
197
|
+
const maxHunks = opts.maxItems ?? 12;
|
|
198
|
+
const fileHeaderLines = new Map();
|
|
199
|
+
const hunks = [];
|
|
200
|
+
let currentFile = -1;
|
|
201
|
+
let current = null;
|
|
202
|
+
let sawGitHeader = false; // a `diff --git` just opened the current file
|
|
203
|
+
let expectPlusHeader = false; // a `--- ` file header was just seen; its `+++ ` partner is next
|
|
204
|
+
const registerHeader = (i) => {
|
|
205
|
+
if (currentFile < 0) {
|
|
206
|
+
currentFile = 0;
|
|
207
|
+
if (!fileHeaderLines.has(0))
|
|
208
|
+
fileHeaderLines.set(0, []);
|
|
209
|
+
}
|
|
210
|
+
const list = fileHeaderLines.get(currentFile) ?? [];
|
|
211
|
+
list.push(i);
|
|
212
|
+
fileHeaderLines.set(currentFile, list);
|
|
213
|
+
current = null; // header lines sit between hunks
|
|
214
|
+
};
|
|
215
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
216
|
+
const line = lines[i] ?? '';
|
|
217
|
+
if (line.startsWith('diff --git ')) {
|
|
218
|
+
currentFile += 1;
|
|
219
|
+
fileHeaderLines.set(currentFile, [i]);
|
|
220
|
+
current = null;
|
|
221
|
+
sawGitHeader = true;
|
|
222
|
+
expectPlusHeader = false;
|
|
223
|
+
continue;
|
|
224
|
+
}
|
|
225
|
+
if (HUNK_HEADER_RE.test(line)) {
|
|
226
|
+
if (currentFile < 0) {
|
|
227
|
+
currentFile = 0;
|
|
228
|
+
if (!fileHeaderLines.has(0))
|
|
229
|
+
fileHeaderLines.set(0, []);
|
|
230
|
+
}
|
|
231
|
+
current = { file: currentFile, header: i, body: [], changeCount: 0, score: 0 };
|
|
232
|
+
hunks.push(current);
|
|
233
|
+
sawGitHeader = false;
|
|
234
|
+
expectPlusHeader = false;
|
|
235
|
+
continue;
|
|
236
|
+
}
|
|
237
|
+
// A `--- ` line is a file header only when its `+++ ` partner follows AND a
|
|
238
|
+
// hunk header comes next — a real header is immediately followed by `@@`.
|
|
239
|
+
// This rejects an in-hunk deleted/added content pair (`--- foo` / `+++ bar`)
|
|
240
|
+
// that would otherwise be mistaken for a new file. It begins a NEW file
|
|
241
|
+
// unless a `diff --git` already opened this one (headerless `diff -u`).
|
|
242
|
+
if (line.startsWith('--- ') &&
|
|
243
|
+
(lines[i + 1] ?? '').startsWith('+++ ') &&
|
|
244
|
+
(lines[i + 2] ?? '').startsWith('@@')) {
|
|
245
|
+
if (!sawGitHeader) {
|
|
246
|
+
currentFile += 1;
|
|
247
|
+
fileHeaderLines.set(currentFile, []);
|
|
248
|
+
}
|
|
249
|
+
registerHeader(i);
|
|
250
|
+
sawGitHeader = false;
|
|
251
|
+
expectPlusHeader = true;
|
|
252
|
+
continue;
|
|
253
|
+
}
|
|
254
|
+
if (expectPlusHeader && line.startsWith('+++ ')) {
|
|
255
|
+
registerHeader(i);
|
|
256
|
+
expectPlusHeader = false;
|
|
257
|
+
continue;
|
|
258
|
+
}
|
|
259
|
+
if (SAFE_HEADER_RE.test(line)) {
|
|
260
|
+
registerHeader(i);
|
|
261
|
+
continue;
|
|
262
|
+
}
|
|
263
|
+
if (current) {
|
|
264
|
+
current.body.push(i);
|
|
265
|
+
if (isChangeLine(line)) {
|
|
266
|
+
current.changeCount += 1;
|
|
267
|
+
current.score += queryOverlap(line, tokens) * 0.3;
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
if (hunks.length === 0)
|
|
272
|
+
return passthroughResult(text, EContentType.GitDiff);
|
|
273
|
+
// Per-file hunk cap: always keep first + last, fill remainder by score.
|
|
274
|
+
const keptHunks = new Set();
|
|
275
|
+
const byFile = new Map();
|
|
276
|
+
for (const h of hunks) {
|
|
277
|
+
const list = byFile.get(h.file) ?? [];
|
|
278
|
+
list.push(h);
|
|
279
|
+
byFile.set(h.file, list);
|
|
280
|
+
}
|
|
281
|
+
for (const list of byFile.values()) {
|
|
282
|
+
if (list.length <= maxHunks) {
|
|
283
|
+
for (const h of list)
|
|
284
|
+
keptHunks.add(h);
|
|
285
|
+
continue;
|
|
286
|
+
}
|
|
287
|
+
// Select per file so the cap is honoured exactly: the first hunk, the last
|
|
288
|
+
// (only if the cap allows two), then the highest-scoring until full. A
|
|
289
|
+
// per-file set is the source of truth — a global counter has cross-file
|
|
290
|
+
// slack and lets one file overflow by one.
|
|
291
|
+
const fileKept = new Set();
|
|
292
|
+
fileKept.add(list[0]);
|
|
293
|
+
if (maxHunks >= 2)
|
|
294
|
+
fileKept.add(list[list.length - 1]);
|
|
295
|
+
const ranked = [...list].sort((a, b) => (b.score - a.score) || (b.changeCount - a.changeCount) || (a.header - b.header));
|
|
296
|
+
for (const h of ranked) {
|
|
297
|
+
if (fileKept.size >= maxHunks)
|
|
298
|
+
break;
|
|
299
|
+
fileKept.add(h);
|
|
300
|
+
}
|
|
301
|
+
for (const h of fileKept)
|
|
302
|
+
keptHunks.add(h);
|
|
303
|
+
}
|
|
304
|
+
const keep = new Set();
|
|
305
|
+
const filesWithKeptHunk = new Set();
|
|
306
|
+
for (const h of keptHunks)
|
|
307
|
+
filesWithKeptHunk.add(h.file);
|
|
308
|
+
for (const [file, headerLines] of fileHeaderLines) {
|
|
309
|
+
if (filesWithKeptHunk.has(file))
|
|
310
|
+
for (const i of headerLines)
|
|
311
|
+
keep.add(i);
|
|
312
|
+
}
|
|
313
|
+
for (const h of keptHunks) {
|
|
314
|
+
keep.add(h.header);
|
|
315
|
+
// Mark change-line positions, then keep context within ±maxContext.
|
|
316
|
+
const changePos = new Set();
|
|
317
|
+
for (let p = 0; p < h.body.length; p += 1) {
|
|
318
|
+
const li = h.body[p];
|
|
319
|
+
if (isChangeLine(lines[li] ?? ''))
|
|
320
|
+
changePos.add(p);
|
|
321
|
+
}
|
|
322
|
+
for (let p = 0; p < h.body.length; p += 1) {
|
|
323
|
+
const li = h.body[p];
|
|
324
|
+
let near = changePos.has(p);
|
|
325
|
+
if (!near) {
|
|
326
|
+
for (let d = 1; d <= maxContext && !near; d += 1) {
|
|
327
|
+
if (changePos.has(p - d) || changePos.has(p + d))
|
|
328
|
+
near = true;
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
if (near)
|
|
332
|
+
keep.add(li);
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
const body = elide(lines, keep);
|
|
336
|
+
return finalizeLossy({
|
|
337
|
+
original: text,
|
|
338
|
+
body,
|
|
339
|
+
contentType: EContentType.GitDiff,
|
|
340
|
+
strategy: ECompressionStrategy.Diff,
|
|
341
|
+
opts,
|
|
342
|
+
note: `full diff: ${hunks.length} hunks across ${byFile.size} files`,
|
|
343
|
+
});
|
|
344
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { EContentType } from '../content/content-type.js';
|
|
2
|
+
import type { ICompressionResult } from '../result/compression-result.js';
|
|
3
|
+
import type { ICompressOptions } from '../result/compress-options.js';
|
|
4
|
+
/**
|
|
5
|
+
* Conservative generic reduction for prose / plain text: drop exact-duplicate
|
|
6
|
+
* non-blank lines (keeping the first occurrence) and collapse runs of blank
|
|
7
|
+
* lines. Prose with little repetition passes through unchanged — which is the
|
|
8
|
+
* honest outcome; structured content should route to a typed compressor
|
|
9
|
+
* instead. Recoverable via CCR.
|
|
10
|
+
*/
|
|
11
|
+
export declare function compressLines(text: string, contentType?: EContentType, opts?: ICompressOptions): ICompressionResult;
|
|
12
|
+
//# sourceMappingURL=compress-lines.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"compress-lines.d.ts","sourceRoot":"","sources":["../../src/text/compress-lines.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAE1D,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,iCAAiC,CAAC;AAC1E,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,+BAA+B,CAAC;AAItE;;;;;;GAMG;AACH,wBAAgB,aAAa,CAC3B,IAAI,EAAE,MAAM,EACZ,WAAW,GAAE,YAAqC,EAClD,IAAI,GAAE,gBAAqB,GAC1B,kBAAkB,CA+BpB"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { EContentType } from "../content/content-type.js";
|
|
2
|
+
import { ECompressionStrategy } from "../result/compression-strategy.js";
|
|
3
|
+
import { splitLines, elide } from "./line-utils.js";
|
|
4
|
+
import { finalizeLossy, passthroughResult } from "./finalize.js";
|
|
5
|
+
/**
|
|
6
|
+
* Conservative generic reduction for prose / plain text: drop exact-duplicate
|
|
7
|
+
* non-blank lines (keeping the first occurrence) and collapse runs of blank
|
|
8
|
+
* lines. Prose with little repetition passes through unchanged — which is the
|
|
9
|
+
* honest outcome; structured content should route to a typed compressor
|
|
10
|
+
* instead. Recoverable via CCR.
|
|
11
|
+
*/
|
|
12
|
+
export function compressLines(text, contentType = EContentType.PlainText, opts = {}) {
|
|
13
|
+
const lines = splitLines(text);
|
|
14
|
+
const minLines = opts.minLines ?? 8;
|
|
15
|
+
if (lines.length < minLines)
|
|
16
|
+
return passthroughResult(text, contentType);
|
|
17
|
+
const keep = new Set();
|
|
18
|
+
const seen = new Set();
|
|
19
|
+
let prevBlank = false;
|
|
20
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
21
|
+
const line = lines[i] ?? '';
|
|
22
|
+
const blank = line.trim().length === 0;
|
|
23
|
+
if (blank) {
|
|
24
|
+
if (!prevBlank)
|
|
25
|
+
keep.add(i);
|
|
26
|
+
prevBlank = true;
|
|
27
|
+
continue;
|
|
28
|
+
}
|
|
29
|
+
prevBlank = false;
|
|
30
|
+
if (seen.has(line))
|
|
31
|
+
continue; // exact duplicate — drop
|
|
32
|
+
seen.add(line);
|
|
33
|
+
keep.add(i);
|
|
34
|
+
}
|
|
35
|
+
const body = elide(lines, keep);
|
|
36
|
+
return finalizeLossy({
|
|
37
|
+
original: text,
|
|
38
|
+
body,
|
|
39
|
+
contentType,
|
|
40
|
+
strategy: ECompressionStrategy.Lines,
|
|
41
|
+
opts,
|
|
42
|
+
note: `full text: ${lines.length} lines`,
|
|
43
|
+
});
|
|
44
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { ICompressionResult } from '../result/compression-result.js';
|
|
2
|
+
import type { ICompressOptions } from '../result/compress-options.js';
|
|
3
|
+
/**
|
|
4
|
+
* Reduce build / test / runtime logs to their signal: errors and their FULL
|
|
5
|
+
* multi-frame stack traces, the exception punchline, de-duplicated warnings,
|
|
6
|
+
* summary lines, and first/last anchors. The rest is elided. When a hard
|
|
7
|
+
* `maxItems` cap applies, lines are dropped by PRIORITY (summaries > errors >
|
|
8
|
+
* anchors > other), never by position — so the closing summary always survives.
|
|
9
|
+
* Deterministic and order-preserving; the full log is recoverable via CCR.
|
|
10
|
+
*/
|
|
11
|
+
export declare function compressLog(text: string, opts?: ICompressOptions): ICompressionResult;
|
|
12
|
+
//# sourceMappingURL=compress-log.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"compress-log.d.ts","sourceRoot":"","sources":["../../src/text/compress-log.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,iCAAiC,CAAC;AAC1E,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,+BAA+B,CAAC;AA0BtE;;;;;;;GAOG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,GAAE,gBAAqB,GAAG,kBAAkB,CAsIzF"}
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
import { EContentType } from "../content/content-type.js";
|
|
2
|
+
import { ECompressionStrategy } from "../result/compression-strategy.js";
|
|
3
|
+
import { splitLines, dedupeKey, queryTokens } from "./line-utils.js";
|
|
4
|
+
import { finalizeLossy, passthroughResult } from "./finalize.js";
|
|
5
|
+
import { mineLogTemplates } from "./log-template.js";
|
|
6
|
+
import { bm25Scores } from "../relevance/bm25.js";
|
|
7
|
+
import { formatCcrMarker } from "../ccr/ccr-marker.js";
|
|
8
|
+
const ERROR_RE = /\b(?:ERROR|FATAL|FAIL(?:ED|URE)?|EXCEPTION|panic)\b/i;
|
|
9
|
+
// High-signal failure lines that often carry NONE of the ERROR/FATAL/FAIL
|
|
10
|
+
// keywords yet ARE the root cause: native crashes (segfault, core dump, bus
|
|
11
|
+
// error), the OOM killer, linker errors, fatal POSIX signals, and assertion
|
|
12
|
+
// failures. Without this they get elided when they aren't an anchor. Treated
|
|
13
|
+
// exactly like an error line (kept, with the preceding line and any following
|
|
14
|
+
// trace). Keeping an occasional benign match costs one extra line — far cheaper
|
|
15
|
+
// than dropping the actual cause.
|
|
16
|
+
const FATAL_SIGNAL_RE = /\b(?:segmentation fault|segfault|core dumped|bus error|out of memory|oom[- ]?kill(?:er|ed)?|killed process|undefined reference to|undefined symbol|symbol\(s\) not found|cannot find -l|assertion (?:failed|.*failed)|SIG(?:SEGV|ABRT|KILL|BUS|FPE|ILL)\b|signal \d+|Aborted)\b/i;
|
|
17
|
+
const WARN_RE = /\bWARN(?:ING)?\b/i;
|
|
18
|
+
const SUMMARY_RE = /\b(?:\d+ (?:passed|failed|error|errors|skipped)|Tests:|Test Suites:|collected \d+|BUILD (?:SUCCESS|FAIL(?:ED|URE)?)|Summary:)\b|^[✓✗×]/;
|
|
19
|
+
const STACK_RE = /^\s+(?:at\s+\S+|File ".*", line \d+)/;
|
|
20
|
+
// Start of a multi-frame trace region.
|
|
21
|
+
const TRACEBACK_START = /^\s*Traceback\b|^\s*Caused by:|^\s*Exception in thread\b/;
|
|
22
|
+
// The punchline of a trace: `ValueError: boom`, `java.lang.NullPointerException: null`.
|
|
23
|
+
const EXCEPTION_SUMMARY = /^[\w.$]*(?:Error|Exception|Warning|Panic)\b.*:/;
|
|
24
|
+
/**
|
|
25
|
+
* Reduce build / test / runtime logs to their signal: errors and their FULL
|
|
26
|
+
* multi-frame stack traces, the exception punchline, de-duplicated warnings,
|
|
27
|
+
* summary lines, and first/last anchors. The rest is elided. When a hard
|
|
28
|
+
* `maxItems` cap applies, lines are dropped by PRIORITY (summaries > errors >
|
|
29
|
+
* anchors > other), never by position — so the closing summary always survives.
|
|
30
|
+
* Deterministic and order-preserving; the full log is recoverable via CCR.
|
|
31
|
+
*/
|
|
32
|
+
export function compressLog(text, opts = {}) {
|
|
33
|
+
const lines = splitLines(text);
|
|
34
|
+
const minLines = opts.minLines ?? 12;
|
|
35
|
+
if (lines.length < minLines)
|
|
36
|
+
return passthroughResult(text, EContentType.BuildLog);
|
|
37
|
+
const tokens = queryTokens(opts.query);
|
|
38
|
+
// P3.2: BM25 relevance for the query (idf-weighted, length-normalized, ID-term
|
|
39
|
+
// boosted). Computed only when a query is present, so the no-query path is
|
|
40
|
+
// unchanged.
|
|
41
|
+
const relScores = opts.query ? bm25Scores(opts.query, lines) : null;
|
|
42
|
+
const keep = new Set();
|
|
43
|
+
const errorIdx = new Set();
|
|
44
|
+
const summaryIdx = new Set();
|
|
45
|
+
const anchorIdx = new Set();
|
|
46
|
+
const queryIdx = new Set();
|
|
47
|
+
const seenWarn = new Set();
|
|
48
|
+
let stackActive = false;
|
|
49
|
+
let inFrameSource = false; // we are inside a frame's indented source block
|
|
50
|
+
for (const i of [0, 1, lines.length - 2, lines.length - 1]) {
|
|
51
|
+
if (i >= 0 && i < lines.length) {
|
|
52
|
+
keep.add(i);
|
|
53
|
+
anchorIdx.add(i);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
57
|
+
const line = lines[i] ?? '';
|
|
58
|
+
const isSummary = SUMMARY_RE.test(line);
|
|
59
|
+
// Errors / trace starts are handled FIRST so trace control flow is correct,
|
|
60
|
+
// but a line that is ALSO a summary (e.g. "Tests: 1 failed" — "failed"
|
|
61
|
+
// matches ERROR_RE) is still tagged into summaryIdx so the cap ranks it as
|
|
62
|
+
// a summary. This keeps multi-frame traces intact while letting the closing
|
|
63
|
+
// result survive a tight cap.
|
|
64
|
+
if (ERROR_RE.test(line) ||
|
|
65
|
+
FATAL_SIGNAL_RE.test(line) ||
|
|
66
|
+
TRACEBACK_START.test(line) ||
|
|
67
|
+
EXCEPTION_SUMMARY.test(line)) {
|
|
68
|
+
keep.add(i);
|
|
69
|
+
errorIdx.add(i);
|
|
70
|
+
if (isSummary)
|
|
71
|
+
summaryIdx.add(i);
|
|
72
|
+
if (i - 1 >= 0)
|
|
73
|
+
keep.add(i - 1);
|
|
74
|
+
stackActive = true;
|
|
75
|
+
inFrameSource = false;
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
if (stackActive) {
|
|
79
|
+
if (line.trim().length === 0) {
|
|
80
|
+
stackActive = false; // a blank line ends the trace region
|
|
81
|
+
inFrameSource = false;
|
|
82
|
+
}
|
|
83
|
+
else if (STACK_RE.test(line)) {
|
|
84
|
+
keep.add(i); // a real stack frame (`at …` / `File …`)
|
|
85
|
+
errorIdx.add(i);
|
|
86
|
+
inFrameSource = true;
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
else if (inFrameSource && /^\s/.test(line)) {
|
|
90
|
+
keep.add(i); // indented source line(s) under a frame — keep the whole block
|
|
91
|
+
errorIdx.add(i);
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
else if (/^\s/.test(line)) {
|
|
95
|
+
// Indented, but NOT after a frame (e.g. a captured-stdout / locals dump
|
|
96
|
+
// straight after the error) — drop it, but stay in the trace region.
|
|
97
|
+
continue;
|
|
98
|
+
}
|
|
99
|
+
else {
|
|
100
|
+
stackActive = false; // a dedented non-trace line ends the region — re-check it below
|
|
101
|
+
inFrameSource = false;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
if (isSummary) {
|
|
105
|
+
keep.add(i);
|
|
106
|
+
summaryIdx.add(i);
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
if (WARN_RE.test(line)) {
|
|
110
|
+
const k = dedupeKey(line);
|
|
111
|
+
if (!seenWarn.has(k)) {
|
|
112
|
+
seenWarn.add(k);
|
|
113
|
+
keep.add(i);
|
|
114
|
+
}
|
|
115
|
+
continue;
|
|
116
|
+
}
|
|
117
|
+
if (relScores && relScores[i] > 0) {
|
|
118
|
+
keep.add(i);
|
|
119
|
+
queryIdx.add(i);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
// Hard cap: force-keep the CLOSING summary (the last summary line) so the
|
|
123
|
+
// test/build result always survives, then fill the rest errors-first, then
|
|
124
|
+
// other summaries, then anchors, then the remainder. This keeps both the real
|
|
125
|
+
// error and the closing result even when summary-shaped noise is abundant.
|
|
126
|
+
if (opts.maxItems && keep.size > opts.maxItems) {
|
|
127
|
+
const cap = opts.maxItems;
|
|
128
|
+
const chosen = new Set();
|
|
129
|
+
const summaries = [...summaryIdx].sort((a, b) => a - b);
|
|
130
|
+
if (summaries.length > 0)
|
|
131
|
+
chosen.add(summaries[summaries.length - 1]);
|
|
132
|
+
const rank = (i) => errorIdx.has(i) ? 0 : summaryIdx.has(i) ? 1 : queryIdx.has(i) ? 2 : anchorIdx.has(i) ? 3 : 4;
|
|
133
|
+
// Within a tier, the more query-relevant line (higher BM25) wins; rel is 0
|
|
134
|
+
// for non-query lines, so this is a no-op tiebreak without a query.
|
|
135
|
+
const rel = (i) => (relScores ? relScores[i] : 0);
|
|
136
|
+
const rest = [...keep]
|
|
137
|
+
.filter((i) => !chosen.has(i))
|
|
138
|
+
.sort((a, b) => rank(a) - rank(b) || rel(b) - rel(a) || a - b);
|
|
139
|
+
for (const i of rest) {
|
|
140
|
+
if (chosen.size >= cap)
|
|
141
|
+
break;
|
|
142
|
+
chosen.add(i);
|
|
143
|
+
}
|
|
144
|
+
keep.clear();
|
|
145
|
+
for (const i of chosen)
|
|
146
|
+
keep.add(i);
|
|
147
|
+
}
|
|
148
|
+
// P2.2: collapse repetitive runs of KEPT lines (summary/query spam) into
|
|
149
|
+
// lossless template blocks. Mining only the *kept* runs is the key: lines the
|
|
150
|
+
// selector drops stay dropped (a one-line `… omitted …` always beats keeping
|
|
151
|
+
// a template block), so noise logs never regress — only signal the agent
|
|
152
|
+
// actually sees gets the lossless columnar collapse.
|
|
153
|
+
//
|
|
154
|
+
// P4.5: when a CCR store is present, cache the original up front and stamp its
|
|
155
|
+
// key into each elision hint, so the agent can tell a root cause was dropped
|
|
156
|
+
// RIGHT THERE and retrieve it. finalizeLossy reuses this same key (and skips
|
|
157
|
+
// its own trailing marker since the body already carries it).
|
|
158
|
+
const ccrKey = opts.store ? opts.store.put(text) : undefined;
|
|
159
|
+
const body = elideWithTemplates(lines, keep, ccrKey);
|
|
160
|
+
return finalizeLossy({
|
|
161
|
+
original: text,
|
|
162
|
+
body,
|
|
163
|
+
contentType: EContentType.BuildLog,
|
|
164
|
+
strategy: ECompressionStrategy.Log,
|
|
165
|
+
opts,
|
|
166
|
+
note: `full log: ${lines.length} lines`,
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Like {@link elide}, but each maximal run of consecutive KEPT lines is passed
|
|
171
|
+
* through {@link mineLogTemplates} so repetitive kept lines collapse to a
|
|
172
|
+
* lossless template block. Each dropped run becomes a single hint; when
|
|
173
|
+
* `ccrKey` is given the hint carries `→ <<ccr:KEY>>` so the elided detail is
|
|
174
|
+
* retrievable in place (P4.5).
|
|
175
|
+
*/
|
|
176
|
+
function elideWithTemplates(lines, keep, ccrKey) {
|
|
177
|
+
const out = [];
|
|
178
|
+
let dropped = 0;
|
|
179
|
+
const flush = () => {
|
|
180
|
+
if (dropped > 0) {
|
|
181
|
+
const hint = ccrKey ? ` → ${formatCcrMarker(ccrKey)}` : '';
|
|
182
|
+
out.push(`… ${dropped} line${dropped === 1 ? '' : 's'} omitted${hint}`);
|
|
183
|
+
dropped = 0;
|
|
184
|
+
}
|
|
185
|
+
};
|
|
186
|
+
let i = 0;
|
|
187
|
+
while (i < lines.length) {
|
|
188
|
+
if (!keep.has(i)) {
|
|
189
|
+
dropped += 1;
|
|
190
|
+
i += 1;
|
|
191
|
+
continue;
|
|
192
|
+
}
|
|
193
|
+
flush();
|
|
194
|
+
let j = i;
|
|
195
|
+
while (j < lines.length && keep.has(j))
|
|
196
|
+
j += 1;
|
|
197
|
+
out.push(...mineLogTemplates(lines.slice(i, j)).lines);
|
|
198
|
+
i = j;
|
|
199
|
+
}
|
|
200
|
+
flush();
|
|
201
|
+
return out.join('\n');
|
|
202
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { ICompressionResult } from '../result/compression-result.js';
|
|
2
|
+
import type { ICompressOptions } from '../result/compress-options.js';
|
|
3
|
+
/**
|
|
4
|
+
* Markdown-aware reduction that keeps a document's SKELETON — every header, the
|
|
5
|
+
* first line of each section/paragraph, table rows, and a capped run of list
|
|
6
|
+
* items — while thinning paragraph continuations and collapsing fenced code
|
|
7
|
+
* block bodies. Structure is never dropped (headers always survive), so the
|
|
8
|
+
* outline stays navigable; the full document is recoverable via CCR.
|
|
9
|
+
*
|
|
10
|
+
* Note: this runs only when an agent explicitly compresses markdown (via
|
|
11
|
+
* `shrk compress` / `compress_context`). SharkCraft's own briefs/context are
|
|
12
|
+
* never silently passed through it.
|
|
13
|
+
*/
|
|
14
|
+
export declare function compressMarkdown(text: string, opts?: ICompressOptions): ICompressionResult;
|
|
15
|
+
//# sourceMappingURL=compress-markdown.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"compress-markdown.d.ts","sourceRoot":"","sources":["../../src/text/compress-markdown.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,iCAAiC,CAAC;AAC1E,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,+BAA+B,CAAC;AAStE;;;;;;;;;;GAUG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,GAAE,gBAAqB,GAAG,kBAAkB,CA8E9F"}
|