@astudioplus/compressor 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +52 -0
- package/LICENSE +20 -0
- package/README.md +167 -0
- package/dist/adapters/agents-md.d.ts +2 -0
- package/dist/adapters/agents-md.js +91 -0
- package/dist/adapters/apply.d.ts +3 -0
- package/dist/adapters/apply.js +83 -0
- package/dist/adapters/claude-code.d.ts +2 -0
- package/dist/adapters/claude-code.js +403 -0
- package/dist/adapters/copilot.d.ts +2 -0
- package/dist/adapters/copilot.js +418 -0
- package/dist/adapters/cursor.d.ts +2 -0
- package/dist/adapters/cursor.js +149 -0
- package/dist/adapters/index.d.ts +11 -0
- package/dist/adapters/index.js +19 -0
- package/dist/adapters/markers.d.ts +7 -0
- package/dist/adapters/markers.js +129 -0
- package/dist/adapters/types.d.ts +44 -0
- package/dist/adapters/types.js +1 -0
- package/dist/bench/ablate.d.ts +35 -0
- package/dist/bench/ablate.js +163 -0
- package/dist/bench/cell.d.ts +33 -0
- package/dist/bench/cell.js +437 -0
- package/dist/bench/results.d.ts +37 -0
- package/dist/bench/results.js +157 -0
- package/dist/bench/runner.d.ts +24 -0
- package/dist/bench/runner.js +121 -0
- package/dist/bench/tasks.d.ts +4 -0
- package/dist/bench/tasks.js +147 -0
- package/dist/bench/types.d.ts +109 -0
- package/dist/bench/types.js +1 -0
- package/dist/claude/transcripts.d.ts +30 -0
- package/dist/claude/transcripts.js +154 -0
- package/dist/cli/commands/benchmark.d.ts +33 -0
- package/dist/cli/commands/benchmark.js +203 -0
- package/dist/cli/commands/compress.d.ts +8 -0
- package/dist/cli/commands/compress.js +45 -0
- package/dist/cli/commands/count.d.ts +5 -0
- package/dist/cli/commands/count.js +25 -0
- package/dist/cli/commands/hook.d.ts +6 -0
- package/dist/cli/commands/hook.js +30 -0
- package/dist/cli/commands/init.d.ts +16 -0
- package/dist/cli/commands/init.js +76 -0
- package/dist/cli/commands/report.d.ts +90 -0
- package/dist/cli/commands/report.js +464 -0
- package/dist/cli/commands/savings.d.ts +38 -0
- package/dist/cli/commands/savings.js +196 -0
- package/dist/cli/commands/set-mode.d.ts +5 -0
- package/dist/cli/commands/set-mode.js +13 -0
- package/dist/cli/commands/stats.d.ts +5 -0
- package/dist/cli/commands/stats.js +51 -0
- package/dist/cli/commands/status.d.ts +1 -0
- package/dist/cli/commands/status.js +11 -0
- package/dist/cli/commands/uninstall.d.ts +7 -0
- package/dist/cli/commands/uninstall.js +22 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +146 -0
- package/dist/copilot-hook-entry.d.ts +1 -0
- package/dist/copilot-hook-entry.js +36 -0
- package/dist/copilot-hook.js +1000 -0
- package/dist/engine/detect.d.ts +2 -0
- package/dist/engine/detect.js +47 -0
- package/dist/engine/index.d.ts +4 -0
- package/dist/engine/index.js +90 -0
- package/dist/engine/policy.d.ts +2 -0
- package/dist/engine/policy.js +48 -0
- package/dist/engine/tiers/code.d.ts +7 -0
- package/dist/engine/tiers/code.js +206 -0
- package/dist/engine/tiers/logs.d.ts +4 -0
- package/dist/engine/tiers/logs.js +139 -0
- package/dist/engine/tiers/structural.d.ts +28 -0
- package/dist/engine/tiers/structural.js +199 -0
- package/dist/engine/types.d.ts +71 -0
- package/dist/engine/types.js +5 -0
- package/dist/hook/copilot.d.ts +5 -0
- package/dist/hook/copilot.js +136 -0
- package/dist/hook/core.d.ts +36 -0
- package/dist/hook/core.js +138 -0
- package/dist/hook/exit.d.ts +22 -0
- package/dist/hook/exit.js +56 -0
- package/dist/hook/post-tool-use.d.ts +5 -0
- package/dist/hook/post-tool-use.js +57 -0
- package/dist/hook-entry.d.ts +1 -0
- package/dist/hook-entry.js +35 -0
- package/dist/hook.js +946 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.js +16 -0
- package/dist/ledger/read.d.ts +9 -0
- package/dist/ledger/read.js +91 -0
- package/dist/ledger/write.d.ts +29 -0
- package/dist/ledger/write.js +61 -0
- package/dist/packs/atoms.d.ts +3 -0
- package/dist/packs/atoms.js +108 -0
- package/dist/packs/modes.d.ts +3 -0
- package/dist/packs/modes.js +34 -0
- package/dist/packs/render.d.ts +24 -0
- package/dist/packs/render.js +115 -0
- package/dist/packs/types.d.ts +32 -0
- package/dist/packs/types.js +1 -0
- package/dist/paths.d.ts +29 -0
- package/dist/paths.js +87 -0
- package/dist/tokens/estimate.d.ts +12 -0
- package/dist/tokens/estimate.js +23 -0
- package/dist/tokens/exact.d.ts +5 -0
- package/dist/tokens/exact.js +16 -0
- package/dist/tokens/index.d.ts +2 -0
- package/dist/tokens/index.js +2 -0
- package/package.json +77 -0
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import { exec, execFile } from 'node:child_process';
|
|
2
|
+
import { stat } from 'node:fs/promises';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
import process from 'node:process';
|
|
5
|
+
import { promisify } from 'node:util';
|
|
6
|
+
import { buildVariants } from "../../bench/ablate.js";
|
|
7
|
+
import { balanceWarning } from "../../bench/results.js";
|
|
8
|
+
import { runBenchmark } from "../../bench/runner.js";
|
|
9
|
+
import { loadSuite, suiteFixturesDir } from "../../bench/tasks.js";
|
|
10
|
+
import { resolveHookCommand } from "../../paths.js";
|
|
11
|
+
const execFileAsync = promisify(execFile);
|
|
12
|
+
function parsePositiveInt(value, flag) {
|
|
13
|
+
const n = Number(value);
|
|
14
|
+
if (!Number.isInteger(n) || n <= 0) {
|
|
15
|
+
throw new Error(`${flag} must be a positive integer, got '${value}'`);
|
|
16
|
+
}
|
|
17
|
+
return n;
|
|
18
|
+
}
|
|
19
|
+
function parsePositiveNumber(value, flag) {
|
|
20
|
+
const n = Number(value);
|
|
21
|
+
if (!Number.isFinite(n) || n <= 0) {
|
|
22
|
+
throw new Error(`${flag} must be a positive number, got '${value}'`);
|
|
23
|
+
}
|
|
24
|
+
return n;
|
|
25
|
+
}
|
|
26
|
+
function parseModes(value) {
|
|
27
|
+
const names = value
|
|
28
|
+
.split(',')
|
|
29
|
+
.map((name) => name.trim())
|
|
30
|
+
.filter((name) => name.length > 0);
|
|
31
|
+
if (names.length === 0) {
|
|
32
|
+
throw new Error(`--modes must name at least one of full|optimized|slim`);
|
|
33
|
+
}
|
|
34
|
+
return names.map((name) => {
|
|
35
|
+
if (name === 'full' || name === 'optimized' || name === 'slim') {
|
|
36
|
+
return name;
|
|
37
|
+
}
|
|
38
|
+
throw new Error(`unknown mode '${name}' in --modes (expected full|optimized|slim)`);
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
function parseIdList(value) {
|
|
42
|
+
if (value === undefined) {
|
|
43
|
+
return [];
|
|
44
|
+
}
|
|
45
|
+
return value
|
|
46
|
+
.split(',')
|
|
47
|
+
.map((id) => id.trim())
|
|
48
|
+
.filter((id) => id.length > 0);
|
|
49
|
+
}
|
|
50
|
+
async function isDir(dirPath) {
|
|
51
|
+
try {
|
|
52
|
+
return (await stat(dirPath)).isDirectory();
|
|
53
|
+
}
|
|
54
|
+
catch {
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
async function assertFixtures(fixturesDir, suite) {
|
|
59
|
+
if (!(await isDir(fixturesDir))) {
|
|
60
|
+
throw new Error(`fixtures dir missing: ${fixturesDir} (expected next to the suite as <suiteDir>/../fixtures)`);
|
|
61
|
+
}
|
|
62
|
+
for (const task of suite.tasks) {
|
|
63
|
+
const dir = path.join(fixturesDir, task.fixture);
|
|
64
|
+
if (!(await isDir(dir))) {
|
|
65
|
+
throw new Error(`task '${task.id}': fixture dir missing: ${dir}`);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
/** Shell out with stdin (hook commands are shell strings with quoted paths). */
|
|
70
|
+
function execWithInput(command, input, env) {
|
|
71
|
+
return new Promise((resolve, reject) => {
|
|
72
|
+
const child = exec(command, { timeout: 30_000, maxBuffer: 8 * 1024 * 1024, env }, (error, stdout) => {
|
|
73
|
+
if (error !== null) {
|
|
74
|
+
reject(new Error(errorText(error)));
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
resolve(stdout);
|
|
78
|
+
});
|
|
79
|
+
child.stdin?.end(input);
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
function errorText(error) {
|
|
83
|
+
return error instanceof Error ? error.message : String(error);
|
|
84
|
+
}
|
|
85
|
+
/** Synthetic over-budget PostToolUse payload: distinct rows so only the
|
|
86
|
+
* truncation tier fires and its marker line carries the style. ~51k chars
|
|
87
|
+
* (~14.6k est tokens) clears every mode's touch and truncate thresholds. */
|
|
88
|
+
function markerStylePreflightPayload() {
|
|
89
|
+
const rows = Array.from({ length: 900 }, (_, i) => `row ${String(i).padStart(5, '0')} lorem ipsum dolor sit amet consectetur adipiscing`).join('\n');
|
|
90
|
+
return JSON.stringify({
|
|
91
|
+
tool_name: 'Bash',
|
|
92
|
+
tool_input: { command: 'echo preflight' },
|
|
93
|
+
tool_use_id: 'toolu_preflight',
|
|
94
|
+
tool_response: { stdout: rows, stderr: '', interrupted: false, isImage: false },
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Preflight for marker-style experiments: the hook entry parses argv
|
|
99
|
+
* fail-open, so a STALE dist/hook.js that predates --marker-style ignores
|
|
100
|
+
* the flag silently and every arm measures identical 'plain' behavior — a
|
|
101
|
+
* three-arm run of pure noise with zero errors anywhere. Verify the exact
|
|
102
|
+
* installed hook command by piping the same over-budget payload through it
|
|
103
|
+
* with two different styles and requiring the outputs to differ.
|
|
104
|
+
* COMPRESSOR_NO_LEDGER keeps the probe out of the live savings ledger.
|
|
105
|
+
*/
|
|
106
|
+
export async function assertHookHandlesMarkerStyle(baseHookCommand) {
|
|
107
|
+
const payload = markerStylePreflightPayload();
|
|
108
|
+
const env = { ...process.env, COMPRESSOR_NO_LEDGER: '1' };
|
|
109
|
+
const outputs = [];
|
|
110
|
+
for (const style of ['plain', 'deterrent']) {
|
|
111
|
+
let stdout;
|
|
112
|
+
try {
|
|
113
|
+
stdout = await execWithInput(`${baseHookCommand} --marker-style ${style}`, payload, env);
|
|
114
|
+
}
|
|
115
|
+
catch (error) {
|
|
116
|
+
throw new Error(`marker-style preflight: hook command failed (${baseHookCommand} --marker-style ${style}): ${errorText(error)}`);
|
|
117
|
+
}
|
|
118
|
+
if (stdout.trim() === '') {
|
|
119
|
+
throw new Error(`marker-style preflight: hook emitted nothing for an over-budget payload (${baseHookCommand} --marker-style ${style}) — the installed bundle is broken or stale; run 'npm run build' and retry`);
|
|
120
|
+
}
|
|
121
|
+
outputs.push(stdout);
|
|
122
|
+
}
|
|
123
|
+
if (outputs[0] === outputs[1]) {
|
|
124
|
+
throw new Error(`marker-style preflight: hook output is byte-identical for --marker-style plain and deterrent — the installed dist/hook.js ignores the flag (stale bundle); run 'npm run build' and retry, or the experiment arms would all measure 'plain'`);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
async function assertClaudeAnswersVersion(bin) {
|
|
128
|
+
try {
|
|
129
|
+
// .mjs/.js bins (test stubs) are not directly executable: run via node
|
|
130
|
+
if (/\.(mjs|js)$/.test(bin)) {
|
|
131
|
+
await execFileAsync(process.execPath, [bin, '--version'], { timeout: 30_000 });
|
|
132
|
+
}
|
|
133
|
+
else {
|
|
134
|
+
await execFileAsync(bin, ['--version'], { timeout: 30_000 });
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
catch (error) {
|
|
138
|
+
const detail = error instanceof Error ? error.message : String(error);
|
|
139
|
+
throw new Error(`claude binary '${bin}' failed --version — install Claude Code (or point COMPRESSOR_CLAUDE_BIN at a working binary): ${detail}`);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
export async function runBenchmarkCommand(opts) {
|
|
143
|
+
const trials = parsePositiveInt(opts.trials, '--trials');
|
|
144
|
+
const concurrency = parsePositiveInt(opts.concurrency, '--concurrency');
|
|
145
|
+
const maxBudgetUsd = parsePositiveNumber(opts.maxBudgetUsd, '--max-budget-usd');
|
|
146
|
+
const modes = parseModes(opts.modes);
|
|
147
|
+
const suitePath = path.resolve(opts.suite);
|
|
148
|
+
const outDir = path.resolve(opts.out);
|
|
149
|
+
// preflight: every check below runs before any workspace or results file exists
|
|
150
|
+
const suite = await loadSuite(suitePath);
|
|
151
|
+
const fixturesDir = suiteFixturesDir(suitePath);
|
|
152
|
+
await assertFixtures(fixturesDir, suite);
|
|
153
|
+
const hookArgs = opts.hookArgs?.trim();
|
|
154
|
+
const markerStyles = parseIdList(opts.markerStyles);
|
|
155
|
+
const variants = buildVariants({
|
|
156
|
+
modes,
|
|
157
|
+
ablate: parseIdList(opts.ablate),
|
|
158
|
+
ablateAdd: parseIdList(opts.ablateAdd),
|
|
159
|
+
ablateGroups: parseIdList(opts.ablateGroup),
|
|
160
|
+
hook: opts.hook,
|
|
161
|
+
...(hookArgs !== undefined && hookArgs !== '' ? { hookArgs } : {}),
|
|
162
|
+
...(markerStyles.length > 0 ? { markerStyles } : {}),
|
|
163
|
+
});
|
|
164
|
+
const hooked = variants.find((variant) => variant.hook);
|
|
165
|
+
if (hooked !== undefined && hooked.baseMode !== 'full') {
|
|
166
|
+
// throws 'run npm run build' when dist/hook.js is missing
|
|
167
|
+
const hookCommand = resolveHookCommand(hooked.baseMode);
|
|
168
|
+
// a bundle that EXISTS can still predate --marker-style: verify it
|
|
169
|
+
// before spending a single API dollar on indistinguishable arms
|
|
170
|
+
if (variants.some((v) => v.hook && v.hookArgs?.includes('--marker-style') === true)) {
|
|
171
|
+
await assertHookHandlesMarkerStyle(hookCommand);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
const bin = process.env.COMPRESSOR_CLAUDE_BIN;
|
|
175
|
+
if (bin === undefined && (process.env.ANTHROPIC_API_KEY ?? '') === '') {
|
|
176
|
+
throw new Error('ANTHROPIC_API_KEY is not set: claude --bare never reads OAuth/keychain, so benchmarks need ANTHROPIC_API_KEY exported.');
|
|
177
|
+
}
|
|
178
|
+
await assertClaudeAnswersVersion(bin ?? 'claude');
|
|
179
|
+
const cellCount = suite.tasks.length * variants.length * trials;
|
|
180
|
+
console.log(`${cellCount} cells: ${suite.tasks.length} tasks × ${variants.length} variants (${variants
|
|
181
|
+
.map((variant) => variant.id)
|
|
182
|
+
.join(', ')}) × ${trials} trials`);
|
|
183
|
+
console.log(`hard ceiling: $${maxBudgetUsd} (--max-budget-usd)`);
|
|
184
|
+
const { runId, results, resultsFile } = await runBenchmark({
|
|
185
|
+
suite,
|
|
186
|
+
variants,
|
|
187
|
+
trials,
|
|
188
|
+
model: opts.model,
|
|
189
|
+
maxBudgetUsd,
|
|
190
|
+
concurrency,
|
|
191
|
+
outDir,
|
|
192
|
+
fixturesDir,
|
|
193
|
+
onProgress: (line) => console.log(line),
|
|
194
|
+
});
|
|
195
|
+
console.log('');
|
|
196
|
+
// post-run balance assertion: unbalanced variants invalidate comparisons
|
|
197
|
+
const imbalance = balanceWarning(results);
|
|
198
|
+
if (imbalance !== null) {
|
|
199
|
+
console.log(imbalance);
|
|
200
|
+
}
|
|
201
|
+
console.log(`results: ${resultsFile}`);
|
|
202
|
+
console.log(`next: compressor report --run ${runId} --out ${outDir}`);
|
|
203
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import process from 'node:process';
|
|
2
|
+
import { compress, policyFor } from "../../engine/index.js";
|
|
3
|
+
import { tiktokenEstimator } from "../../tokens/estimate.js";
|
|
4
|
+
export async function readStdin() {
|
|
5
|
+
const chunks = [];
|
|
6
|
+
for await (const chunk of process.stdin) {
|
|
7
|
+
chunks.push(typeof chunk === 'string' ? Buffer.from(chunk) : chunk);
|
|
8
|
+
}
|
|
9
|
+
return Buffer.concat(chunks).toString('utf8');
|
|
10
|
+
}
|
|
11
|
+
function parseMode(value) {
|
|
12
|
+
if (value === 'full' || value === 'optimized' || value === 'slim') {
|
|
13
|
+
return value;
|
|
14
|
+
}
|
|
15
|
+
throw new Error(`unknown mode '${value}' (expected full|optimized|slim)`);
|
|
16
|
+
}
|
|
17
|
+
function parseKind(value) {
|
|
18
|
+
if (value === 'read' || value === 'bash' || value === 'search' || value === 'other') {
|
|
19
|
+
return value;
|
|
20
|
+
}
|
|
21
|
+
throw new Error(`unknown kind '${value}' (expected read|bash|search|other)`);
|
|
22
|
+
}
|
|
23
|
+
function parseMarkerStyle(value) {
|
|
24
|
+
if (value === 'plain' || value === 'deterrent' || value === 'informative') {
|
|
25
|
+
return value;
|
|
26
|
+
}
|
|
27
|
+
throw new Error(`unknown marker style '${value}' (expected plain|deterrent|informative)`);
|
|
28
|
+
}
|
|
29
|
+
export async function runCompress(opts) {
|
|
30
|
+
const mode = parseMode(opts.mode ?? 'optimized');
|
|
31
|
+
const meta = { tool: parseKind(opts.kind ?? 'other'), mode };
|
|
32
|
+
if (opts.filePath !== undefined) {
|
|
33
|
+
meta.filePath = opts.filePath;
|
|
34
|
+
}
|
|
35
|
+
const policy = opts.markerStyle === undefined
|
|
36
|
+
? policyFor(mode)
|
|
37
|
+
: { ...policyFor(mode), markerStyle: parseMarkerStyle(opts.markerStyle) };
|
|
38
|
+
const text = await readStdin();
|
|
39
|
+
const result = compress(text, meta, policy, tiktokenEstimator());
|
|
40
|
+
process.stdout.write(result.content);
|
|
41
|
+
const { estTokensIn, estTokensOut, kind, transforms } = result.stats;
|
|
42
|
+
const pct = estTokensIn === 0 ? 0 : Math.round(((estTokensIn - estTokensOut) / estTokensIn) * 100);
|
|
43
|
+
const ids = transforms.map((t) => t.id).join(',');
|
|
44
|
+
process.stderr.write(`kind=${kind} ~${estTokensIn} → ~${estTokensOut} est tokens (estimated; -${pct}%) transforms=${ids}\n`);
|
|
45
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { readFile } from 'node:fs/promises';
|
|
2
|
+
export async function runCount(files, opts) {
|
|
3
|
+
const model = opts.model ?? 'claude-sonnet-4-6';
|
|
4
|
+
let total = 0;
|
|
5
|
+
for (const file of files) {
|
|
6
|
+
const text = await readFile(file, 'utf8');
|
|
7
|
+
if (opts.exact === true) {
|
|
8
|
+
const { countTokensExact } = await import("../../tokens/exact.js");
|
|
9
|
+
const n = await countTokensExact(text, model);
|
|
10
|
+
total += n;
|
|
11
|
+
console.log(`${file}: ${n} tokens (exact, ${model})`);
|
|
12
|
+
}
|
|
13
|
+
else {
|
|
14
|
+
const { estimateTokens } = await import("../../tokens/estimate.js");
|
|
15
|
+
const n = estimateTokens(text);
|
|
16
|
+
total += n;
|
|
17
|
+
console.log(`${file}: ~${n} tokens (estimated)`);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
if (files.length > 1) {
|
|
21
|
+
console.log(opts.exact === true
|
|
22
|
+
? `total: ${total} tokens (exact, ${model})`
|
|
23
|
+
: `total: ~${total} tokens (estimated)`);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { handleCopilotPostToolUse } from "../../hook/copilot.js";
|
|
2
|
+
import { settleThenExit } from "../../hook/exit.js";
|
|
3
|
+
import { handlePostToolUse } from "../../hook/post-tool-use.js";
|
|
4
|
+
import { readStdin } from "./compress.js";
|
|
5
|
+
function parseMode(value) {
|
|
6
|
+
return value === 'full' || value === 'optimized' || value === 'slim' ? value : 'optimized';
|
|
7
|
+
}
|
|
8
|
+
/** Fail-open (hook hot path): unknown style falls back to the policy default. */
|
|
9
|
+
function parseMarkerStyle(value) {
|
|
10
|
+
return value === 'plain' || value === 'deterrent' || value === 'informative'
|
|
11
|
+
? value
|
|
12
|
+
: undefined;
|
|
13
|
+
}
|
|
14
|
+
// Both actions are documented protocol surfaces a user can wire into
|
|
15
|
+
// .claude/settings.json or .github/hooks, so they need the SAME hot-path
|
|
16
|
+
// bound as the bundles: settleThenExit writes stdout first, caps the ledger
|
|
17
|
+
// settle at 250ms, and force-terminates on timeout. Without it the process
|
|
18
|
+
// exits only when the event loop drains — i.e. after the fire-and-forget
|
|
19
|
+
// ledger append completes, which is unbounded on a slow disk and infinite on
|
|
20
|
+
// a stuck one. These functions therefore never return.
|
|
21
|
+
export async function runHookPostToolUse(opts) {
|
|
22
|
+
const payload = await readStdin();
|
|
23
|
+
const result = handlePostToolUse(payload, parseMode(opts.mode ?? 'optimized'), parseMarkerStyle(opts.markerStyle));
|
|
24
|
+
await settleThenExit(result.output);
|
|
25
|
+
}
|
|
26
|
+
export async function runHookCopilotPostToolUse(opts) {
|
|
27
|
+
const payload = await readStdin();
|
|
28
|
+
const result = handleCopilotPostToolUse(payload, parseMode(opts.mode ?? 'optimized'), parseMarkerStyle(opts.markerStyle));
|
|
29
|
+
await settleThenExit(result.output);
|
|
30
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { Adapter, AdapterContext } from '../../adapters/index.ts';
|
|
2
|
+
import type { PackMode } from '../../packs/types.ts';
|
|
3
|
+
export interface ScopeOptions {
|
|
4
|
+
global?: boolean;
|
|
5
|
+
dryRun?: boolean;
|
|
6
|
+
}
|
|
7
|
+
export interface InitOptions extends ScopeOptions {
|
|
8
|
+
agent: string[];
|
|
9
|
+
mode: string;
|
|
10
|
+
}
|
|
11
|
+
export declare function effectNote(agents: readonly Pick<Adapter, 'name'>[]): string;
|
|
12
|
+
export declare function parsePackMode(value: string): PackMode;
|
|
13
|
+
export declare function resolveAgents(names: string[]): Adapter[];
|
|
14
|
+
export declare function buildContext(global: boolean, mode: PackMode, requireHookBundle?: boolean): AdapterContext;
|
|
15
|
+
export declare function installForAgents(agents: Adapter[], mode: PackMode, opts: ScopeOptions): Promise<void>;
|
|
16
|
+
export declare function runInit(opts: InitOptions): Promise<void>;
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { homedir } from 'node:os';
|
|
2
|
+
import process from 'node:process';
|
|
3
|
+
import { applyChanges, getAdapter, renderChanges } from "../../adapters/index.js";
|
|
4
|
+
import { describeHookCommand, resolveCopilotHookCommand, resolveHookCommand, } from "../../paths.js";
|
|
5
|
+
const AGENT_EFFECT_NOTES = {
|
|
6
|
+
'claude-code': 'Claude Code: takes effect on the next session (/clear or new session).',
|
|
7
|
+
copilot: 'Copilot: hook config loads when the CLI starts — restart any running copilot session.',
|
|
8
|
+
cursor: 'Cursor: rules apply to new chats.',
|
|
9
|
+
'agents-md': 'AGENTS.md: read at agent startup.',
|
|
10
|
+
};
|
|
11
|
+
export function effectNote(agents) {
|
|
12
|
+
return agents.map((a) => AGENT_EFFECT_NOTES[a.name]).join(' ');
|
|
13
|
+
}
|
|
14
|
+
export function parsePackMode(value) {
|
|
15
|
+
if (value === 'optimized' || value === 'slim') {
|
|
16
|
+
return value;
|
|
17
|
+
}
|
|
18
|
+
throw new Error(`unknown mode '${value}' (expected optimized|slim)`);
|
|
19
|
+
}
|
|
20
|
+
const AGENT_NAMES = ['claude-code', 'copilot', 'cursor', 'agents-md'];
|
|
21
|
+
export function resolveAgents(names) {
|
|
22
|
+
return names.map((name) => {
|
|
23
|
+
const known = AGENT_NAMES.find((agent) => agent === name);
|
|
24
|
+
const adapter = known === undefined ? undefined : getAdapter(known);
|
|
25
|
+
if (adapter === undefined) {
|
|
26
|
+
throw new Error(`no adapter for agent '${name}' (known: ${AGENT_NAMES.join(', ')})`);
|
|
27
|
+
}
|
|
28
|
+
return adapter;
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
export function buildContext(global, mode, requireHookBundle = true) {
|
|
32
|
+
return {
|
|
33
|
+
projectDir: process.cwd(),
|
|
34
|
+
homeDir: homedir(),
|
|
35
|
+
global,
|
|
36
|
+
// status/uninstall only match against the command string — they must work
|
|
37
|
+
// even when the bundle is missing (e.g. removing a broken install)
|
|
38
|
+
hookCommand: requireHookBundle
|
|
39
|
+
? resolveHookCommand(mode)
|
|
40
|
+
: describeHookCommand(mode),
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
export async function installForAgents(agents, mode, opts) {
|
|
44
|
+
// claude-code installs the PostToolUse hook (dist/hook.js) and copilot the
|
|
45
|
+
// postToolUse hook (dist/copilot-hook.js); instruction-only agents (cursor,
|
|
46
|
+
// agents-md) must not require either bundle to exist
|
|
47
|
+
const needsHookBundle = agents.some((adapter) => adapter.name === 'claude-code');
|
|
48
|
+
const ctx = buildContext(opts.global === true, mode, needsHookBundle);
|
|
49
|
+
const hasCopilot = agents.some((adapter) => adapter.name === 'copilot');
|
|
50
|
+
if (hasCopilot) {
|
|
51
|
+
// existence check only (both scopes — global installs the hook too):
|
|
52
|
+
// refuses to install a hook command that would fail on every tool call
|
|
53
|
+
// (Copilot postToolUse is fail-open = silent no-op)
|
|
54
|
+
resolveCopilotHookCommand(mode);
|
|
55
|
+
}
|
|
56
|
+
for (const adapter of agents) {
|
|
57
|
+
const changes = await adapter.install(mode, ctx);
|
|
58
|
+
const rendered = renderChanges(changes);
|
|
59
|
+
if (rendered !== '') {
|
|
60
|
+
console.log(rendered);
|
|
61
|
+
}
|
|
62
|
+
if (opts.dryRun !== true) {
|
|
63
|
+
await applyChanges(changes);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
const names = agents.map((adapter) => adapter.name).join(', ');
|
|
67
|
+
const suffix = opts.dryRun === true ? ' (dry-run: nothing written)' : '';
|
|
68
|
+
console.log(`Mode ${mode} installed for ${names}. ${effectNote(agents)}${suffix}`);
|
|
69
|
+
if (hasCopilot && opts.global === true) {
|
|
70
|
+
const verb = opts.dryRun === true ? 'would be installed' : 'installed';
|
|
71
|
+
console.log(`Copilot --global: hook ${verb} machine-wide (Copilot CLI); instructions were NOT installed (no global mechanism) — run init --agent copilot in each repo for instruction packs.`);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
export async function runInit(opts) {
|
|
75
|
+
await installForAgents(resolveAgents(opts.agent), parsePackMode(opts.mode), opts);
|
|
76
|
+
}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import type { VariantAggregate } from '../../bench/results.ts';
|
|
2
|
+
import type { CellResult, RunMeta } from '../../bench/types.ts';
|
|
3
|
+
export type ReportFormat = 'table' | 'md' | 'json';
|
|
4
|
+
export interface ReportCliOptions {
|
|
5
|
+
run?: string;
|
|
6
|
+
out: string;
|
|
7
|
+
compare?: string[];
|
|
8
|
+
format: string;
|
|
9
|
+
}
|
|
10
|
+
/** Deltas vs a baseline variant — actual usage numbers, never estimates. */
|
|
11
|
+
export interface VariantDelta {
|
|
12
|
+
variantId: string;
|
|
13
|
+
/** % change in median output tokens; null when either side has no valid cells or the baseline median is 0 */
|
|
14
|
+
outputPct: number | null;
|
|
15
|
+
/** % change in median cost; null when either side is missing or the baseline is 0 */
|
|
16
|
+
costPct: number | null;
|
|
17
|
+
/** success-rate change in percentage points; null when either side unjudged */
|
|
18
|
+
successPp: number | null;
|
|
19
|
+
}
|
|
20
|
+
/** Marginal per-atom deltas: <baseline>-minus/plus-<atom> vs that baseline. */
|
|
21
|
+
export interface AblationDeltas {
|
|
22
|
+
baselineId: string;
|
|
23
|
+
deltas: VariantDelta[];
|
|
24
|
+
}
|
|
25
|
+
export interface DataQualityIssues {
|
|
26
|
+
/** tasks whose check passed BEFORE the agent ran — the cell proves nothing */
|
|
27
|
+
vacuousTasks: string[];
|
|
28
|
+
/** cells served by a model other than the requested one (silent fallback) */
|
|
29
|
+
substitutedCells: string[];
|
|
30
|
+
/** cells whose result JSON had no modelUsage — served model unverifiable */
|
|
31
|
+
unknownServedCells: string[];
|
|
32
|
+
/** cells the runner never scheduled (budget ceiling / unenforceable ceiling) */
|
|
33
|
+
skippedCells: string[];
|
|
34
|
+
/** infra failures — separate from task failures, excluded from success% */
|
|
35
|
+
errorCells: string[];
|
|
36
|
+
/** cells with permission denials — the model burned turns on retries, numbers are inflated */
|
|
37
|
+
deniedCells: string[];
|
|
38
|
+
}
|
|
39
|
+
/** One taskId × variantId cell of the per-task breakdown. */
|
|
40
|
+
export interface TaskVariantCell {
|
|
41
|
+
taskId: string;
|
|
42
|
+
variantId: string;
|
|
43
|
+
/** all rows for this pair, including infra errors */
|
|
44
|
+
cells: number;
|
|
45
|
+
/** non-error rows — medians below are over these */
|
|
46
|
+
valid: number;
|
|
47
|
+
/** valid rows with a boolean success verdict */
|
|
48
|
+
judged: number;
|
|
49
|
+
successes: number;
|
|
50
|
+
/** successes / judged; null when nothing was judged */
|
|
51
|
+
successFraction: number | null;
|
|
52
|
+
/** median output tokens over valid rows */
|
|
53
|
+
medianOutput: number;
|
|
54
|
+
/** median input + cacheCreation + cacheRead (context volume) over valid rows */
|
|
55
|
+
medianContext: number;
|
|
56
|
+
}
|
|
57
|
+
/** taskIds × variantIds matrix; cells[i][j] = taskIds[i] × variantIds[j], null = no rows. */
|
|
58
|
+
export interface ByTaskBreakdown {
|
|
59
|
+
/** sorted ascending */
|
|
60
|
+
taskIds: string[];
|
|
61
|
+
/** 'full' first when present, then alphabetical */
|
|
62
|
+
variantIds: string[];
|
|
63
|
+
cells: (TaskVariantCell | null)[][];
|
|
64
|
+
}
|
|
65
|
+
export interface RunReport {
|
|
66
|
+
runId: string;
|
|
67
|
+
meta: RunMeta | null;
|
|
68
|
+
aggregates: VariantAggregate[];
|
|
69
|
+
/** null when the run has no 'full' variant to compare against */
|
|
70
|
+
deltas: VariantDelta[] | null;
|
|
71
|
+
/** per-atom marginal deltas vs each ablation baseline; null when no ablation variants */
|
|
72
|
+
ablationDeltas: AblationDeltas[] | null;
|
|
73
|
+
byTask: ByTaskBreakdown;
|
|
74
|
+
issues: DataQualityIssues;
|
|
75
|
+
}
|
|
76
|
+
export declare function parseFormat(value: string): ReportFormat;
|
|
77
|
+
export declare function computeDeltas(aggregates: VariantAggregate[]): VariantDelta[] | null;
|
|
78
|
+
/**
|
|
79
|
+
* The ablation gate question: for each <baseline>-minus/plus-<atom> variant,
|
|
80
|
+
* the marginal output & success delta vs that baseline — not vs full.
|
|
81
|
+
*/
|
|
82
|
+
export declare function computeAblationDeltas(aggregates: VariantAggregate[]): AblationDeltas[] | null;
|
|
83
|
+
export declare function computeByTask(results: CellResult[]): ByTaskBreakdown;
|
|
84
|
+
export declare function findIssues(results: CellResult[]): DataQualityIssues;
|
|
85
|
+
export declare function buildRunReport(runId: string, meta: RunMeta | null, results: CellResult[]): RunReport;
|
|
86
|
+
export declare function loadRunReport(outDir: string, runId: string): Promise<RunReport>;
|
|
87
|
+
export declare function latestRunId(outDir: string): Promise<string | null>;
|
|
88
|
+
export declare function formatReport(report: RunReport, format: ReportFormat): string;
|
|
89
|
+
export declare function formatComparison(a: RunReport, b: RunReport, format: ReportFormat): string;
|
|
90
|
+
export declare function runReport(opts: ReportCliOptions): Promise<void>;
|