context-compress 2026.3.22 → 2026.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +17 -0
- package/.claude-plugin/plugin.json +12 -0
- package/.codex-plugin/plugin.json +40 -0
- package/.mcp.json +11 -0
- package/README.md +275 -44
- package/dist/cli/doctor.d.ts.map +1 -1
- package/dist/cli/doctor.js +2 -10
- package/dist/cli/doctor.js.map +1 -1
- package/dist/cli/filter.d.ts +52 -0
- package/dist/cli/filter.d.ts.map +1 -0
- package/dist/cli/filter.js +200 -0
- package/dist/cli/filter.js.map +1 -0
- package/dist/cli/index.d.ts +8 -4
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +19 -6
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/lite.d.ts +15 -0
- package/dist/cli/lite.d.ts.map +1 -0
- package/dist/cli/lite.js +37 -0
- package/dist/cli/lite.js.map +1 -0
- package/dist/cli/setup.d.ts +23 -1
- package/dist/cli/setup.d.ts.map +1 -1
- package/dist/cli/setup.js +122 -21
- package/dist/cli/setup.js.map +1 -1
- package/dist/executor.d.ts.map +1 -1
- package/dist/executor.js +7 -4
- package/dist/executor.js.map +1 -1
- package/dist/filters.d.ts +39 -5
- package/dist/filters.d.ts.map +1 -1
- package/dist/filters.js +577 -25
- package/dist/filters.js.map +1 -1
- package/dist/hooks/pretooluse.js +57 -0
- package/dist/hooks/pretooluse.js.map +1 -1
- package/dist/network.d.ts.map +1 -1
- package/dist/network.js +11 -0
- package/dist/network.js.map +1 -1
- package/dist/server.bundle.mjs +1140 -641
- package/dist/server.bundle.mjs.map +4 -4
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +36 -612
- package/dist/server.js.map +1 -1
- package/dist/stats.js +1 -1
- package/dist/stats.js.map +1 -1
- package/dist/store.d.ts +1 -0
- package/dist/store.d.ts.map +1 -1
- package/dist/store.js +15 -2
- package/dist/store.js.map +1 -1
- package/dist/tools/batch-execute.d.ts +4 -0
- package/dist/tools/batch-execute.d.ts.map +1 -0
- package/dist/tools/batch-execute.js +75 -0
- package/dist/tools/batch-execute.js.map +1 -0
- package/dist/tools/context.d.ts +17 -0
- package/dist/tools/context.d.ts.map +1 -0
- package/dist/tools/context.js +2 -0
- package/dist/tools/context.js.map +1 -0
- package/dist/tools/discover.d.ts +4 -0
- package/dist/tools/discover.d.ts.map +1 -0
- package/dist/tools/discover.js +65 -0
- package/dist/tools/discover.js.map +1 -0
- package/dist/tools/execute-file.d.ts +4 -0
- package/dist/tools/execute-file.d.ts.map +1 -0
- package/dist/tools/execute-file.js +66 -0
- package/dist/tools/execute-file.js.map +1 -0
- package/dist/tools/execute.d.ts +4 -0
- package/dist/tools/execute.d.ts.map +1 -0
- package/dist/tools/execute.js +54 -0
- package/dist/tools/execute.js.map +1 -0
- package/dist/tools/fetch-and-index.d.ts +4 -0
- package/dist/tools/fetch-and-index.d.ts.map +1 -0
- package/dist/tools/fetch-and-index.js +91 -0
- package/dist/tools/fetch-and-index.js.map +1 -0
- package/dist/tools/index-content.d.ts +4 -0
- package/dist/tools/index-content.d.ts.map +1 -0
- package/dist/tools/index-content.js +85 -0
- package/dist/tools/index-content.js.map +1 -0
- package/dist/tools/search.d.ts +4 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +57 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/tools/stats.d.ts +4 -0
- package/dist/tools/stats.d.ts.map +1 -0
- package/dist/tools/stats.js +10 -0
- package/dist/tools/stats.js.map +1 -0
- package/dist/types.d.ts +0 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/util/auto-mode.d.ts +40 -0
- package/dist/util/auto-mode.d.ts.map +1 -0
- package/dist/util/auto-mode.js +181 -0
- package/dist/util/auto-mode.js.map +1 -0
- package/dist/util/fetch-code.d.ts +10 -0
- package/dist/util/fetch-code.d.ts.map +1 -0
- package/dist/util/fetch-code.js +87 -0
- package/dist/util/fetch-code.js.map +1 -0
- package/dist/util/intent-filter.d.ts +17 -0
- package/dist/util/intent-filter.d.ts.map +1 -0
- package/dist/util/intent-filter.js +28 -0
- package/dist/util/intent-filter.js.map +1 -0
- package/dist/util/label.d.ts +4 -0
- package/dist/util/label.d.ts.map +1 -0
- package/dist/util/label.js +14 -0
- package/dist/util/label.js.map +1 -0
- package/dist/util/path.d.ts +8 -0
- package/dist/util/path.d.ts.map +1 -0
- package/dist/util/path.js +21 -0
- package/dist/util/path.js.map +1 -0
- package/dist/util/stream-compress.d.ts +36 -0
- package/dist/util/stream-compress.d.ts.map +1 -0
- package/dist/util/stream-compress.js +104 -0
- package/dist/util/stream-compress.js.map +1 -0
- package/dist/util/version.d.ts +2 -0
- package/dist/util/version.d.ts.map +1 -0
- package/dist/util/version.js +15 -0
- package/dist/util/version.js.map +1 -0
- package/docs/agentic-benchmark.md +110 -0
- package/docs/token-reduction-report.md +47 -18
- package/hooks/claude-codex-hooks.json +19 -0
- package/hooks/pretooluse.mjs +38 -0
- package/package.json +12 -8
- package/skills/context-compress-audit/SKILL.md +49 -0
- package/skills/context-compress-audit/agents/openai.yaml +13 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Build a self-contained JS snippet that fetches a URL, converts HTML→markdown,
|
|
3
|
+
* and prints the result. Runs inside the sandbox subprocess.
|
|
4
|
+
*
|
|
5
|
+
* When `resolvedIp` is provided, the URL is rewritten to connect to that IP
|
|
6
|
+
* with the original Host header preserved — defeats DNS rebinding (TOCTOU)
|
|
7
|
+
* between the validation step and the actual fetch.
|
|
8
|
+
*/
|
|
9
|
+
export declare function buildFetchCode(url: string, resolvedIp?: string | null): string;
|
|
10
|
+
//# sourceMappingURL=fetch-code.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-code.d.ts","sourceRoot":"","sources":["../../src/util/fetch-code.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,GAAG,MAAM,CA6E9E"}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Build a self-contained JS snippet that fetches a URL, converts HTML→markdown,
|
|
3
|
+
* and prints the result. Runs inside the sandbox subprocess.
|
|
4
|
+
*
|
|
5
|
+
* When `resolvedIp` is provided, the URL is rewritten to connect to that IP
|
|
6
|
+
* with the original Host header preserved — defeats DNS rebinding (TOCTOU)
|
|
7
|
+
* between the validation step and the actual fetch.
|
|
8
|
+
*/
|
|
9
|
+
export function buildFetchCode(url, resolvedIp) {
|
|
10
|
+
let fetchSetup;
|
|
11
|
+
if (resolvedIp) {
|
|
12
|
+
const pinnedUrl = new URL(url);
|
|
13
|
+
const originalHost = pinnedUrl.host;
|
|
14
|
+
// URL.hostname setter requires IPv6 literals to be bracketed; raw forms
|
|
15
|
+
// like "2001:db8::1" parse incorrectly (the first ":" is treated as a
|
|
16
|
+
// port delimiter). Detect IPv6 by colon-presence and wrap.
|
|
17
|
+
const hostnameValue = resolvedIp.includes(":") && !resolvedIp.startsWith("[") ? `[${resolvedIp}]` : resolvedIp;
|
|
18
|
+
pinnedUrl.hostname = hostnameValue;
|
|
19
|
+
fetchSetup = `
|
|
20
|
+
const url = ${JSON.stringify(pinnedUrl.toString())};
|
|
21
|
+
const resp = await fetch(url, { headers: { 'Host': ${JSON.stringify(originalHost)} }, redirect: 'error' });`;
|
|
22
|
+
}
|
|
23
|
+
else {
|
|
24
|
+
fetchSetup = `
|
|
25
|
+
const url = ${JSON.stringify(url)};
|
|
26
|
+
const resp = await fetch(url, { redirect: 'error' });`;
|
|
27
|
+
}
|
|
28
|
+
return `${fetchSetup}
|
|
29
|
+
if (!resp.ok) { console.error("HTTP " + resp.status); process.exit(1); }
|
|
30
|
+
const cl = resp.headers.get('content-length');
|
|
31
|
+
if (cl && parseInt(cl, 10) > 10 * 1024 * 1024) {
|
|
32
|
+
console.error("Response too large: " + cl + " bytes"); process.exit(1);
|
|
33
|
+
}
|
|
34
|
+
const html = await resp.text();
|
|
35
|
+
if (html.length > 10 * 1024 * 1024) {
|
|
36
|
+
console.error("Response body too large: " + html.length + " chars"); process.exit(1);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Strip unwanted tags
|
|
40
|
+
let md = html
|
|
41
|
+
.replace(/<script[^>]*>[\\s\\S]*?<\\/script>/gi, "")
|
|
42
|
+
.replace(/<style[^>]*>[\\s\\S]*?<\\/style>/gi, "")
|
|
43
|
+
.replace(/<nav[^>]*>[\\s\\S]*?<\\/nav>/gi, "")
|
|
44
|
+
.replace(/<header[^>]*>[\\s\\S]*?<\\/header>/gi, "")
|
|
45
|
+
.replace(/<footer[^>]*>[\\s\\S]*?<\\/footer>/gi, "");
|
|
46
|
+
|
|
47
|
+
// Convert headings
|
|
48
|
+
md = md.replace(/<h1[^>]*>(.*?)<\\/h1>/gi, "# $1\\n");
|
|
49
|
+
md = md.replace(/<h2[^>]*>(.*?)<\\/h2>/gi, "## $1\\n");
|
|
50
|
+
md = md.replace(/<h3[^>]*>(.*?)<\\/h3>/gi, "### $1\\n");
|
|
51
|
+
md = md.replace(/<h4[^>]*>(.*?)<\\/h4>/gi, "#### $1\\n");
|
|
52
|
+
|
|
53
|
+
// Convert code blocks
|
|
54
|
+
md = md.replace(/<pre[^>]*><code[^>]*>(.*?)<\\/code><\\/pre>/gis, "\`\`\`\\n$1\\n\`\`\`\\n");
|
|
55
|
+
md = md.replace(/<code[^>]*>(.*?)<\\/code>/gi, "\`$1\`");
|
|
56
|
+
|
|
57
|
+
// Convert links
|
|
58
|
+
md = md.replace(/<a[^>]*href="([^"]*)"[^>]*>(.*?)<\\/a>/gi, "[$2]($1)");
|
|
59
|
+
|
|
60
|
+
// Convert lists
|
|
61
|
+
md = md.replace(/<li[^>]*>(.*?)<\\/li>/gi, "- $1\\n");
|
|
62
|
+
|
|
63
|
+
// Convert paragraphs
|
|
64
|
+
md = md.replace(/<p[^>]*>(.*?)<\\/p>/gis, "$1\\n\\n");
|
|
65
|
+
md = md.replace(/<br\\s*\\/?>/gi, "\\n");
|
|
66
|
+
|
|
67
|
+
// Strip remaining tags
|
|
68
|
+
md = md.replace(/<[^>]+>/g, "");
|
|
69
|
+
|
|
70
|
+
// Decode entities
|
|
71
|
+
md = md.replace(/</g, "<")
|
|
72
|
+
.replace(/>/g, ">")
|
|
73
|
+
.replace(/"/g, '"')
|
|
74
|
+
.replace(/'/g, "'")
|
|
75
|
+
.replace(/'/g, "'")
|
|
76
|
+
.replace(/ /g, " ")
|
|
77
|
+
.replace(/&#(\\d+);/g, (_, n) => { const c = parseInt(n, 10); return c > 0 && c <= 0x10FFFF ? String.fromCodePoint(c) : ''; })
|
|
78
|
+
.replace(/&#x([0-9a-fA-F]+);/g, (_, h) => { const c = parseInt(h, 16); return c > 0 && c <= 0x10FFFF ? String.fromCodePoint(c) : ''; })
|
|
79
|
+
.replace(/&/g, "&");
|
|
80
|
+
|
|
81
|
+
// Clean whitespace
|
|
82
|
+
md = md.replace(/\\n{3,}/g, "\\n\\n").trim();
|
|
83
|
+
|
|
84
|
+
console.log(md);
|
|
85
|
+
`;
|
|
86
|
+
}
|
|
87
|
+
//# sourceMappingURL=fetch-code.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-code.js","sourceRoot":"","sources":["../../src/util/fetch-code.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AACH,MAAM,UAAU,cAAc,CAAC,GAAW,EAAE,UAA0B;IACrE,IAAI,UAAkB,CAAC;IACvB,IAAI,UAAU,EAAE,CAAC;QAChB,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC/B,MAAM,YAAY,GAAG,SAAS,CAAC,IAAI,CAAC;QACpC,wEAAwE;QACxE,sEAAsE;QACtE,2DAA2D;QAC3D,MAAM,aAAa,GAClB,UAAU,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,UAAU,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC;QAC1F,SAAS,CAAC,QAAQ,GAAG,aAAa,CAAC;QACnC,UAAU,GAAG;cACD,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,QAAQ,EAAE,CAAC;qDACG,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,2BAA2B,CAAC;IAC5G,CAAC;SAAM,CAAC;QACP,UAAU,GAAG;cACD,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC;sDACqB,CAAC;IACtD,CAAC;IACD,OAAO,GAAG,UAAU;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAyDpB,CAAC;AACF,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { Config } from "../config.js";
|
|
2
|
+
import type { SessionTracker } from "../stats.js";
|
|
3
|
+
import type { ContentStore } from "../store.js";
|
|
4
|
+
interface IntentFilterDeps {
|
|
5
|
+
config: Config;
|
|
6
|
+
store: ContentStore;
|
|
7
|
+
tracker: SessionTracker;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Index large output and return a compact summary keyed to `intent`.
|
|
11
|
+
* For small output (<= config.intentSearchThreshold bytes), returns the
|
|
12
|
+
* original output unchanged so callers don't pay for trivial filtering.
|
|
13
|
+
*/
|
|
14
|
+
export declare function createIntentFilter(deps: IntentFilterDeps): (output: string, intent: string, sourceLabel: string) => string;
|
|
15
|
+
export type ApplyIntentFilter = ReturnType<typeof createIntentFilter>;
|
|
16
|
+
export {};
|
|
17
|
+
//# sourceMappingURL=intent-filter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"intent-filter.d.ts","sourceRoot":"","sources":["../../src/util/intent-filter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAC3C,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAGhD,UAAU,gBAAgB;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,YAAY,CAAC;IACpB,OAAO,EAAE,cAAc,CAAC;CACxB;AAED;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,gBAAgB,IAGtB,QAAQ,MAAM,EAAE,QAAQ,MAAM,EAAE,aAAa,MAAM,KAAG,MAAM,CAoB9F;AAED,MAAM,MAAM,iBAAiB,GAAG,UAAU,CAAC,OAAO,kBAAkB,CAAC,CAAC"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { compactLabel } from "./label.js";
|
|
2
|
+
/**
|
|
3
|
+
* Index large output and return a compact summary keyed to `intent`.
|
|
4
|
+
* For small output (<= config.intentSearchThreshold bytes), returns the
|
|
5
|
+
* original output unchanged so callers don't pay for trivial filtering.
|
|
6
|
+
*/
|
|
7
|
+
export function createIntentFilter(deps) {
|
|
8
|
+
const { config, store, tracker } = deps;
|
|
9
|
+
return function applyIntentFilter(output, intent, sourceLabel) {
|
|
10
|
+
if (Buffer.byteLength(output) <= config.intentSearchThreshold)
|
|
11
|
+
return output;
|
|
12
|
+
const indexed = store.index(output, sourceLabel);
|
|
13
|
+
tracker.trackIndexed(Buffer.byteLength(output));
|
|
14
|
+
const searchResults = store.search(intent, { limit: 3 });
|
|
15
|
+
const terms = store.getDistinctiveTerms(indexed.sourceId);
|
|
16
|
+
let filtered = `Indexed ${indexed.totalChunks} sections from ${sourceLabel}.\n`;
|
|
17
|
+
filtered += `${searchResults.results.length} sections matched "${intent}":\n\n`;
|
|
18
|
+
for (const hit of searchResults.results) {
|
|
19
|
+
filtered += ` - **${hit.title}**: ${hit.snippet.slice(0, 200)}\n`;
|
|
20
|
+
}
|
|
21
|
+
if (terms.length > 0 && config.compressionLevel !== "ultra") {
|
|
22
|
+
filtered += `\nSearchable terms: ${terms.join(", ")}\n`;
|
|
23
|
+
}
|
|
24
|
+
filtered += "\nUse search(queries: [...]) to retrieve full content of any section.";
|
|
25
|
+
return compactLabel(filtered, config.compressionLevel);
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
//# sourceMappingURL=intent-filter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"intent-filter.js","sourceRoot":"","sources":["../../src/util/intent-filter.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAQ1C;;;;GAIG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAsB;IACxD,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,GAAG,IAAI,CAAC;IAExC,OAAO,SAAS,iBAAiB,CAAC,MAAc,EAAE,MAAc,EAAE,WAAmB;QACpF,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,MAAM,CAAC,qBAAqB;YAAE,OAAO,MAAM,CAAC;QAE7E,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;QACjD,OAAO,CAAC,YAAY,CAAC,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC;QAEhD,MAAM,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;QACzD,MAAM,KAAK,GAAG,KAAK,CAAC,mBAAmB,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QAE1D,IAAI,QAAQ,GAAG,WAAW,OAAO,CAAC,WAAW,kBAAkB,WAAW,KAAK,CAAC;QAChF,QAAQ,IAAI,GAAG,aAAa,CAAC,OAAO,CAAC,MAAM,sBAAsB,MAAM,QAAQ,CAAC;QAChF,KAAK,MAAM,GAAG,IAAI,aAAa,CAAC,OAAO,EAAE,CAAC;YACzC,QAAQ,IAAI,SAAS,GAAG,CAAC,KAAK,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,CAAC;QACpE,CAAC;QACD,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,MAAM,CAAC,gBAAgB,KAAK,OAAO,EAAE,CAAC;YAC7D,QAAQ,IAAI,uBAAuB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;QACzD,CAAC;QACD,QAAQ,IAAI,uEAAuE,CAAC;QACpF,OAAO,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,gBAAgB,CAAC,CAAC;IACxD,CAAC,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"label.d.ts","sourceRoot":"","sources":["../../src/util/label.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AAErD,gDAAgD;AAChD,wBAAgB,YAAY,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,gBAAgB,GAAG,MAAM,CAc5E"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/** Shorten labels based on compression level */
|
|
2
|
+
export function compactLabel(normal, level) {
|
|
3
|
+
if (level === "ultra") {
|
|
4
|
+
return normal
|
|
5
|
+
.replace(/\*\*/g, "")
|
|
6
|
+
.replace(/Use search\(queries: \[\.\.\.]\) to retrieve.*$/gm, "→ search() for more")
|
|
7
|
+
.replace(/Searchable terms: .+$/gm, "");
|
|
8
|
+
}
|
|
9
|
+
if (level === "compact") {
|
|
10
|
+
return normal.replace(/Use search\(queries: \[\.\.\.]\) to retrieve full content of any section\./, "→ search() for details");
|
|
11
|
+
}
|
|
12
|
+
return normal;
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=label.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"label.js","sourceRoot":"","sources":["../../src/util/label.ts"],"names":[],"mappings":"AAEA,gDAAgD;AAChD,MAAM,UAAU,YAAY,CAAC,MAAc,EAAE,KAAuB;IACnE,IAAI,KAAK,KAAK,OAAO,EAAE,CAAC;QACvB,OAAO,MAAM;aACX,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;aACpB,OAAO,CAAC,mDAAmD,EAAE,qBAAqB,CAAC;aACnF,OAAO,CAAC,yBAAyB,EAAE,EAAE,CAAC,CAAC;IAC1C,CAAC;IACD,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;QACzB,OAAO,MAAM,CAAC,OAAO,CACpB,4EAA4E,EAC5E,wBAAwB,CACxB,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AACf,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Returns true when `absPath` resolves inside (or equal to) `projectDir`.
|
|
3
|
+
* Uses realpathSync to defeat symlink-based escapes when the path exists,
|
|
4
|
+
* falling back to a string-prefix check for paths that don't exist yet
|
|
5
|
+
* (e.g. files about to be written).
|
|
6
|
+
*/
|
|
7
|
+
export declare function isWithinProject(absPath: string, projectDir: string): boolean;
|
|
8
|
+
//# sourceMappingURL=path.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"path.d.ts","sourceRoot":"","sources":["../../src/util/path.ts"],"names":[],"mappings":"AAGA;;;;;GAKG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,OAAO,CAU5E"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { realpathSync } from "node:fs";
|
|
2
|
+
import { resolve } from "node:path";
|
|
3
|
+
/**
|
|
4
|
+
* Returns true when `absPath` resolves inside (or equal to) `projectDir`.
|
|
5
|
+
* Uses realpathSync to defeat symlink-based escapes when the path exists,
|
|
6
|
+
* falling back to a string-prefix check for paths that don't exist yet
|
|
7
|
+
* (e.g. files about to be written).
|
|
8
|
+
*/
|
|
9
|
+
export function isWithinProject(absPath, projectDir) {
|
|
10
|
+
try {
|
|
11
|
+
const normalized = realpathSync(resolve(absPath));
|
|
12
|
+
const realProjectDir = realpathSync(projectDir);
|
|
13
|
+
return normalized === realProjectDir || normalized.startsWith(`${realProjectDir}/`);
|
|
14
|
+
}
|
|
15
|
+
catch {
|
|
16
|
+
const normalized = resolve(absPath);
|
|
17
|
+
const normalizedProject = resolve(projectDir);
|
|
18
|
+
return normalized === normalizedProject || normalized.startsWith(`${normalizedProject}/`);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
//# sourceMappingURL=path.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"path.js","sourceRoot":"","sources":["../../src/util/path.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC;;;;;GAKG;AACH,MAAM,UAAU,eAAe,CAAC,OAAe,EAAE,UAAkB;IAClE,IAAI,CAAC;QACJ,MAAM,UAAU,GAAG,YAAY,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;QAClD,MAAM,cAAc,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC;QAChD,OAAO,UAAU,KAAK,cAAc,IAAI,UAAU,CAAC,UAAU,CAAC,GAAG,cAAc,GAAG,CAAC,CAAC;IACrF,CAAC;IAAC,MAAM,CAAC;QACR,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;QACpC,MAAM,iBAAiB,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;QAC9C,OAAO,UAAU,KAAK,iBAAiB,IAAI,UAAU,CAAC,UAAU,CAAC,GAAG,iBAAiB,GAAG,CAAC,CAAC;IAC3F,CAAC;AACF,CAAC"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Line-by-line streaming compressor for long-running commands.
|
|
3
|
+
*
|
|
4
|
+
* Unlike the buffered pipeline (executor.ts / cli/filter.ts), this can emit
|
|
5
|
+
* compressed output before the child process exits. Necessary for
|
|
6
|
+
* `tail -f`, `cargo watch`, build commands with progressive output, etc.
|
|
7
|
+
*
|
|
8
|
+
* Trade-off: only stream-safe transformations are applied —
|
|
9
|
+
* - ANSI stripping (per-line, no state)
|
|
10
|
+
* - Progress/spinner line removal (per-line, no state)
|
|
11
|
+
* - Adjacent-duplicate dedup (single-line lookback)
|
|
12
|
+
*
|
|
13
|
+
* Skipped because they need full output:
|
|
14
|
+
* - applyCommandFilter (needs to detect summary/test markers globally)
|
|
15
|
+
* - groupErrorLines (needs full set of error patterns)
|
|
16
|
+
* - smartTruncate (needs final length)
|
|
17
|
+
*/
|
|
18
|
+
/**
|
|
19
|
+
* Stateful streaming compressor. Feed it chunks via `process(chunk)`, get
|
|
20
|
+
* compressed output back. Call `flush()` at end-of-stream to drain any
|
|
21
|
+
* buffered incomplete line + emit any pending dedup counter.
|
|
22
|
+
*/
|
|
23
|
+
export declare class StreamCompressor {
|
|
24
|
+
private buffer;
|
|
25
|
+
private prevLine;
|
|
26
|
+
private repeatCount;
|
|
27
|
+
/** Process a chunk; returns the filtered output ready to emit (may be empty). */
|
|
28
|
+
process(chunk: string): string;
|
|
29
|
+
/** Drain any remaining buffered line and emit final dedup counter. */
|
|
30
|
+
flush(): string;
|
|
31
|
+
/** Process the partial line in `buffer` (no trailing newline) and clear the buffer. */
|
|
32
|
+
private consumeBufferedLine;
|
|
33
|
+
/** Emit "(×N identical lines)" if a repeat counter is pending; reset state. */
|
|
34
|
+
private drainRepeatCounter;
|
|
35
|
+
}
|
|
36
|
+
//# sourceMappingURL=stream-compress.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stream-compress.d.ts","sourceRoot":"","sources":["../../src/util/stream-compress.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAqBH;;;;GAIG;AACH,qBAAa,gBAAgB;IAC5B,OAAO,CAAC,MAAM,CAAM;IACpB,OAAO,CAAC,QAAQ,CAAuB;IACvC,OAAO,CAAC,WAAW,CAAK;IAExB,iFAAiF;IACjF,OAAO,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM;IA2B9B,sEAAsE;IACtE,KAAK,IAAI,MAAM;IASf,uFAAuF;IACvF,OAAO,CAAC,mBAAmB;IAa3B,+EAA+E;IAC/E,OAAO,CAAC,kBAAkB;CAK1B"}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Line-by-line streaming compressor for long-running commands.
|
|
3
|
+
*
|
|
4
|
+
* Unlike the buffered pipeline (executor.ts / cli/filter.ts), this can emit
|
|
5
|
+
* compressed output before the child process exits. Necessary for
|
|
6
|
+
* `tail -f`, `cargo watch`, build commands with progressive output, etc.
|
|
7
|
+
*
|
|
8
|
+
* Trade-off: only stream-safe transformations are applied —
|
|
9
|
+
* - ANSI stripping (per-line, no state)
|
|
10
|
+
* - Progress/spinner line removal (per-line, no state)
|
|
11
|
+
* - Adjacent-duplicate dedup (single-line lookback)
|
|
12
|
+
*
|
|
13
|
+
* Skipped because they need full output:
|
|
14
|
+
* - applyCommandFilter (needs to detect summary/test markers globally)
|
|
15
|
+
* - groupErrorLines (needs full set of error patterns)
|
|
16
|
+
* - smartTruncate (needs final length)
|
|
17
|
+
*/
|
|
18
|
+
// biome-ignore lint/suspicious/noControlCharactersInRegex: ANSI escape detection requires \x1b
|
|
19
|
+
const ANSI_RE_G = /\x1b\[[0-9;]*[a-zA-Z]/g;
|
|
20
|
+
const PROGRESS_BAR_RE = /^[\s\[│├└─═━▓░█▒▏▎▍▌▋▊▉\]>=#\-.\d%]+$/;
|
|
21
|
+
const SPINNER_RE = /^[⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏\-\\|/]\s/;
|
|
22
|
+
const DOWNLOAD_RE = /(?:downloading|uploading|fetching|resolving)\s+[\d.]+\s*[kmg]?b/i;
|
|
23
|
+
const SPEED_ETA_RE = /\d+\.?\d*\s*[kmg]?b\/s/i;
|
|
24
|
+
const ETA_RE = /eta|remaining/i;
|
|
25
|
+
function isProgressLine(line) {
|
|
26
|
+
const t = line.trim();
|
|
27
|
+
if (t === "")
|
|
28
|
+
return false; // keep empty lines (they delimit blocks)
|
|
29
|
+
if (PROGRESS_BAR_RE.test(t) && t.length > 3)
|
|
30
|
+
return true;
|
|
31
|
+
if (SPINNER_RE.test(t))
|
|
32
|
+
return true;
|
|
33
|
+
if (DOWNLOAD_RE.test(t))
|
|
34
|
+
return true;
|
|
35
|
+
if (SPEED_ETA_RE.test(t) && ETA_RE.test(t))
|
|
36
|
+
return true;
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Stateful streaming compressor. Feed it chunks via `process(chunk)`, get
|
|
41
|
+
* compressed output back. Call `flush()` at end-of-stream to drain any
|
|
42
|
+
* buffered incomplete line + emit any pending dedup counter.
|
|
43
|
+
*/
|
|
44
|
+
export class StreamCompressor {
|
|
45
|
+
buffer = "";
|
|
46
|
+
prevLine = null;
|
|
47
|
+
repeatCount = 0;
|
|
48
|
+
/** Process a chunk; returns the filtered output ready to emit (may be empty). */
|
|
49
|
+
process(chunk) {
|
|
50
|
+
this.buffer += chunk;
|
|
51
|
+
const lastNl = this.buffer.lastIndexOf("\n");
|
|
52
|
+
if (lastNl < 0)
|
|
53
|
+
return ""; // no complete line yet
|
|
54
|
+
const complete = this.buffer.slice(0, lastNl);
|
|
55
|
+
this.buffer = this.buffer.slice(lastNl + 1);
|
|
56
|
+
let out = "";
|
|
57
|
+
for (const raw of complete.split("\n")) {
|
|
58
|
+
const filtered = raw.replace(ANSI_RE_G, "");
|
|
59
|
+
if (isProgressLine(filtered))
|
|
60
|
+
continue;
|
|
61
|
+
if (filtered === this.prevLine && filtered.trim() !== "") {
|
|
62
|
+
this.repeatCount++;
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
if (this.repeatCount > 1) {
|
|
66
|
+
out += ` ... (×${this.repeatCount} identical lines)\n`;
|
|
67
|
+
}
|
|
68
|
+
this.repeatCount = 0;
|
|
69
|
+
out += `${filtered}\n`;
|
|
70
|
+
this.prevLine = filtered;
|
|
71
|
+
}
|
|
72
|
+
return out;
|
|
73
|
+
}
|
|
74
|
+
/** Drain any remaining buffered line and emit final dedup counter. */
|
|
75
|
+
flush() {
|
|
76
|
+
let out = "";
|
|
77
|
+
if (this.buffer.length > 0) {
|
|
78
|
+
out += this.consumeBufferedLine();
|
|
79
|
+
}
|
|
80
|
+
out += this.drainRepeatCounter();
|
|
81
|
+
return out;
|
|
82
|
+
}
|
|
83
|
+
/** Process the partial line in `buffer` (no trailing newline) and clear the buffer. */
|
|
84
|
+
consumeBufferedLine() {
|
|
85
|
+
const filtered = this.buffer.replace(ANSI_RE_G, "");
|
|
86
|
+
this.buffer = "";
|
|
87
|
+
if (isProgressLine(filtered))
|
|
88
|
+
return "";
|
|
89
|
+
if (filtered === this.prevLine && filtered.trim() !== "") {
|
|
90
|
+
this.repeatCount++;
|
|
91
|
+
return "";
|
|
92
|
+
}
|
|
93
|
+
const counter = this.drainRepeatCounter();
|
|
94
|
+
this.prevLine = filtered;
|
|
95
|
+
return counter + filtered + (filtered.endsWith("\n") ? "" : "\n");
|
|
96
|
+
}
|
|
97
|
+
/** Emit "(×N identical lines)" if a repeat counter is pending; reset state. */
|
|
98
|
+
drainRepeatCounter() {
|
|
99
|
+
const out = this.repeatCount > 1 ? ` ... (×${this.repeatCount} identical lines)\n` : "";
|
|
100
|
+
this.repeatCount = 0;
|
|
101
|
+
return out;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
//# sourceMappingURL=stream-compress.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stream-compress.js","sourceRoot":"","sources":["../../src/util/stream-compress.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,+FAA+F;AAC/F,MAAM,SAAS,GAAG,wBAAwB,CAAC;AAE3C,MAAM,eAAe,GAAG,uCAAuC,CAAC;AAChE,MAAM,UAAU,GAAG,uBAAuB,CAAC;AAC3C,MAAM,WAAW,GAAG,kEAAkE,CAAC;AACvF,MAAM,YAAY,GAAG,yBAAyB,CAAC;AAC/C,MAAM,MAAM,GAAG,gBAAgB,CAAC;AAEhC,SAAS,cAAc,CAAC,IAAY;IACnC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IACtB,IAAI,CAAC,KAAK,EAAE;QAAE,OAAO,KAAK,CAAC,CAAC,yCAAyC;IACrE,IAAI,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IACzD,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IACpC,IAAI,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IACrC,IAAI,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IACxD,OAAO,KAAK,CAAC;AACd,CAAC;AAED;;;;GAIG;AACH,MAAM,OAAO,gBAAgB;IACpB,MAAM,GAAG,EAAE,CAAC;IACZ,QAAQ,GAAkB,IAAI,CAAC;IAC/B,WAAW,GAAG,CAAC,CAAC;IAExB,iFAAiF;IACjF,OAAO,CAAC,KAAa;QACpB,IAAI,CAAC,MAAM,IAAI,KAAK,CAAC;QACrB,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QAC7C,IAAI,MAAM,GAAG,CAAC;YAAE,OAAO,EAAE,CAAC,CAAC,uBAAuB;QAClD,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;QAC9C,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAE5C,IAAI,GAAG,GAAG,EAAE,CAAC;QACb,KAAK,MAAM,GAAG,IAAI,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;YACxC,MAAM,QAAQ,GAAG,GAAG,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;YAC5C,IAAI,cAAc,CAAC,QAAQ,CAAC;gBAAE,SAAS;YAEvC,IAAI,QAAQ,KAAK,IAAI,CAAC,QAAQ,IAAI,QAAQ,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;gBAC1D,IAAI,CAAC,WAAW,EAAE,CAAC;gBACnB,SAAS;YACV,CAAC;YAED,IAAI,IAAI,CAAC,WAAW,GAAG,CAAC,EAAE,CAAC;gBAC1B,GAAG,IAAI,WAAW,IAAI,CAAC,WAAW,qBAAqB,CAAC;YACzD,CAAC;YACD,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC;YACrB,GAAG,IAAI,GAAG,QAAQ,IAAI,CAAC;YACvB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QAC1B,CAAC;QACD,OAAO,GAAG,CAAC;IACZ,CAAC;IAED,sEAAsE;IACtE,KAAK;QACJ,IAAI,GAAG,GAAG,EAAE,CAAC;QACb,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,GAAG,IAAI,IAAI,CAAC,mBAAmB,EAAE,CAAC;QACnC,CAAC;QACD,GAAG,IAAI,IAAI,CAAC,kBAAkB,EAAE,CAAC;QACjC,OAAO,GAAG,CAAC;IACZ,CAAC;IAED,uFAAuF;IAC/E,mBAAmB;QAC1B,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QACpD,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC;QACjB,IAAI,cAAc,CAAC,QAAQ,CAAC;YAAE,OAAO,EAAE,CAAC;QACxC,IAAI,QAAQ,KAAK,IAAI,CAAC,QAAQ,IAAI,QAAQ,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;YAC1D,IAAI,CAAC,WAAW,EAAE,CAAC;YACnB,OAAO,EAAE,CAAC;QACX,CAAC;QACD,MAAM,OAAO,GAAG,IAAI,CAAC,kBAAkB,EAAE,CAAC;QAC1C,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,OAAO,OAAO,GAAG,QAAQ,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IACnE,CAAC;IAED,+EAA+E;IACvE,kBAAkB;QACzB,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,IAAI,CAAC,WAAW,qBAAqB,CAAC,CAAC,CAAC,EAAE,CAAC;QACzF,IAAI,CAAC,WAAW,GAAG,CAAC,CAAC;QACrB,OAAO,GAAG,CAAC;IACZ,CAAC;CACD"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"version.d.ts","sourceRoot":"","sources":["../../src/util/version.ts"],"names":[],"mappings":"AAMA,wBAAgB,UAAU,CAAC,QAAQ,SAAU,GAAG,MAAM,CAQrD"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
import { dirname, resolve } from "node:path";
|
|
3
|
+
import { fileURLToPath } from "node:url";
|
|
4
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
5
|
+
export function getVersion(fallback = "1.0.0") {
|
|
6
|
+
try {
|
|
7
|
+
const pkgPath = resolve(__dirname, "..", "..", "package.json");
|
|
8
|
+
const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
|
|
9
|
+
return pkg.version ?? fallback;
|
|
10
|
+
}
|
|
11
|
+
catch {
|
|
12
|
+
return fallback;
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
//# sourceMappingURL=version.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"version.js","sourceRoot":"","sources":["../../src/util/version.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAEzC,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAE1D,MAAM,UAAU,UAAU,CAAC,QAAQ,GAAG,OAAO;IAC5C,IAAI,CAAC;QACJ,MAAM,OAAO,GAAG,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,cAAc,CAAC,CAAC;QAC/D,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC;QACvD,OAAO,GAAG,CAAC,OAAO,IAAI,QAAQ,CAAC;IAChC,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,QAAQ,CAAC;IACjB,CAAC;AACF,CAAC"}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# Agentic Benchmark Plan
|
|
2
|
+
|
|
3
|
+
This benchmark measures context-compress in real agent sessions, not synthetic command output alone.
|
|
4
|
+
|
|
5
|
+
The claim to test:
|
|
6
|
+
|
|
7
|
+
> Large tool output should stay searchable outside the conversation, while the agent still solves the same task with less context pressure.
|
|
8
|
+
|
|
9
|
+
## Why This Exists
|
|
10
|
+
|
|
11
|
+
`docs/token-reduction-report.md` measures byte and token reduction for common operations. That is necessary, but it does not fully answer whether an agent remains effective across a real coding task.
|
|
12
|
+
|
|
13
|
+
This benchmark adds the missing layer: run the same task with and without context-compress, isolate each arm, and compare context usage, task success, cost, and time.
|
|
14
|
+
|
|
15
|
+
## Arms
|
|
16
|
+
|
|
17
|
+
| Arm | Setup | Purpose |
|
|
18
|
+
| --- | --- | --- |
|
|
19
|
+
| `baseline` | No context-compress MCP, no hook | Measures normal agent behavior. |
|
|
20
|
+
| `mcp-only` | MCP server registered, no PreToolUse hook | Measures explicit tool adoption. |
|
|
21
|
+
| `hook-balanced` | MCP plus PreToolUse hook, `CONTEXT_COMPRESS_MODE=balanced` | Default recommended setup. |
|
|
22
|
+
| `hook-aggressive` | MCP plus PreToolUse hook, `CONTEXT_COMPRESS_MODE=aggressive` | Maximum compression trade-off. |
|
|
23
|
+
|
|
24
|
+
Each arm must run in a fresh workspace with isolated agent settings. Do not allow global plugins, global MCP servers, or previous conversation state to leak into the run.
|
|
25
|
+
|
|
26
|
+
## Task Set
|
|
27
|
+
|
|
28
|
+
Use tasks that naturally produce large outputs:
|
|
29
|
+
|
|
30
|
+
1. Diagnose a failing test suite and patch the root cause.
|
|
31
|
+
2. Review a multi-commit diff and summarize risky changes.
|
|
32
|
+
3. Inspect a large API response and implement one missing field mapping.
|
|
33
|
+
4. Analyze a generated Playwright snapshot and fix one selector bug.
|
|
34
|
+
5. Audit dependency output and identify one vulnerable or outdated package.
|
|
35
|
+
6. Search a large log file and explain the first recurring failure.
|
|
36
|
+
|
|
37
|
+
Pin every input repository and fixture by commit hash. Preserve every run directory so metrics can be recomputed.
|
|
38
|
+
|
|
39
|
+
## Metrics
|
|
40
|
+
|
|
41
|
+
| Metric | How to collect |
|
|
42
|
+
| --- | --- |
|
|
43
|
+
| Context bytes returned by tools | Sum raw tool payloads in agent logs. |
|
|
44
|
+
| Compressed bytes returned | Sum context-compress tool responses. |
|
|
45
|
+
| Indexed bytes | Use `stats` output and session DB stats. |
|
|
46
|
+
| Task success | Deterministic test, assertion, or scorer per task. |
|
|
47
|
+
| Cost/time | Agent runner JSON output when available. |
|
|
48
|
+
| Follow-up retrieval quality | Count whether the final answer cites indexed/search results when needed. |
|
|
49
|
+
|
|
50
|
+
Report raw numbers and relative deltas. Do not only report the best percentage.
|
|
51
|
+
|
|
52
|
+
## Isolation Rules
|
|
53
|
+
|
|
54
|
+
- Use a new temp workspace for every `(task, arm, run)` cell.
|
|
55
|
+
- Disable user/global plugin sources for the baseline arm.
|
|
56
|
+
- Install exactly the intended plugin or MCP config for non-baseline arms.
|
|
57
|
+
- Clear persistent context-compress DBs between runs unless the task explicitly tests persistence.
|
|
58
|
+
- Keep model, prompt, timeout, and working tree identical across arms.
|
|
59
|
+
- Record the exact agent version, model, OS, Node version, and context-compress version.
|
|
60
|
+
|
|
61
|
+
## Safety Checks
|
|
62
|
+
|
|
63
|
+
Compression must not hide important failures. Every task needs one deterministic scorer:
|
|
64
|
+
|
|
65
|
+
- tests pass after the agent patch,
|
|
66
|
+
- expected files changed and unrelated files did not,
|
|
67
|
+
- security-relevant details are still retrievable with `search`,
|
|
68
|
+
- final answer includes the actual root cause, not just a compressed summary.
|
|
69
|
+
|
|
70
|
+
If an arm uses fewer tokens but fails the scorer, mark it as a failure, not a win.
|
|
71
|
+
|
|
72
|
+
## Reporting Template
|
|
73
|
+
|
|
74
|
+
```md
|
|
75
|
+
# Agentic benchmark: context-compress on real coding tasks
|
|
76
|
+
|
|
77
|
+
Date:
|
|
78
|
+
Agent:
|
|
79
|
+
Model:
|
|
80
|
+
context-compress:
|
|
81
|
+
Repo/fixture commits:
|
|
82
|
+
|
|
83
|
+
## Summary
|
|
84
|
+
|
|
85
|
+
| Arm | Success | Tool bytes in context | Indexed bytes | Cost | Time |
|
|
86
|
+
| --- | ---: | ---: | ---: | ---: | ---: |
|
|
87
|
+
|
|
88
|
+
## Per-task Results
|
|
89
|
+
|
|
90
|
+
| Task | Arm | Success | Tool bytes | Indexed bytes | Notes |
|
|
91
|
+
| --- | --- | ---: | ---: | ---: | --- |
|
|
92
|
+
|
|
93
|
+
## Failures And Limits
|
|
94
|
+
|
|
95
|
+
- What failed:
|
|
96
|
+
- What this benchmark does not prove:
|
|
97
|
+
- Known nondeterminism:
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Reproduce
|
|
101
|
+
|
|
102
|
+
Until this harness is automated, run the benchmark manually with:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
npm run build
|
|
106
|
+
context-compress setup --auto
|
|
107
|
+
CONTEXT_COMPRESS_MODE=balanced context-compress doctor
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Then run each task in isolated agent settings and attach the resulting logs plus `context-compress stats` output to the benchmark result.
|