@levnikolaevich/hex-line-mcp 1.3.1 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +120 -47
- package/benchmark/atomic.mjs +502 -0
- package/benchmark/graph.mjs +80 -0
- package/benchmark/index.mjs +144 -0
- package/benchmark/workflows.mjs +350 -0
- package/hook.mjs +48 -15
- package/lib/benchmark-helpers.mjs +1 -1
- package/lib/changes.mjs +2 -1
- package/lib/coerce.mjs +1 -42
- package/lib/edit.mjs +258 -248
- package/lib/graph-enrich.mjs +76 -58
- package/lib/hash.mjs +1 -109
- package/lib/info.mjs +1 -1
- package/lib/normalize.mjs +1 -106
- package/lib/outline.mjs +32 -87
- package/lib/read.mjs +8 -5
- package/lib/revisions.mjs +238 -0
- package/lib/search.mjs +6 -7
- package/lib/security.mjs +4 -4
- package/lib/setup.mjs +7 -20
- package/lib/update-check.mjs +1 -56
- package/lib/verify.mjs +32 -16
- package/output-style.md +21 -6
- package/package.json +18 -6
- package/server.mjs +35 -43
- package/benchmark.mjs +0 -1106
package/benchmark.mjs
DELETED
|
@@ -1,1106 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* Hex-line Combo Benchmark v3
|
|
4
|
-
*
|
|
5
|
-
* Compares "agent without hex-line" vs "agent with hex-line" across
|
|
6
|
-
* read-only and write scenarios. Measures chars in response (proxy for tokens).
|
|
7
|
-
*
|
|
8
|
-
* Usage: node mcp/hex-line-mcp/benchmark.mjs [--repo /path/to/repo] [--with-graph]
|
|
9
|
-
* Default repo: current working directory.
|
|
10
|
-
*
|
|
11
|
-
* Zero external deps beyond hex-line lib modules.
|
|
12
|
-
*/
|
|
13
|
-
|
|
14
|
-
import { readFileSync, writeFileSync, unlinkSync, readdirSync, mkdirSync, rmSync } from "node:fs";
|
|
15
|
-
import { performance } from "node:perf_hooks";
|
|
16
|
-
import { resolve, basename } from "node:path";
|
|
17
|
-
import { tmpdir } from "node:os";
|
|
18
|
-
import { fnv1a, lineTag, rangeChecksum } from "./lib/hash.mjs";
|
|
19
|
-
import { readFile } from "./lib/read.mjs";
|
|
20
|
-
import { directoryTree } from "./lib/tree.mjs";
|
|
21
|
-
import { fileInfo } from "./lib/info.mjs";
|
|
22
|
-
import { verifyChecksums } from "./lib/verify.mjs";
|
|
23
|
-
import { fileChanges } from "./lib/changes.mjs";
|
|
24
|
-
import { editFile } from "./lib/edit.mjs";
|
|
25
|
-
import { grepSearch } from "./lib/search.mjs";
|
|
26
|
-
import { bulkReplace } from "./lib/bulk-replace.mjs";
|
|
27
|
-
import { fileOutline } from "./lib/outline.mjs";
|
|
28
|
-
import {
|
|
29
|
-
walkDir, getFileLines, categorize, generateTempCode,
|
|
30
|
-
simBuiltInReadFull, simBuiltInOutlineFull, simBuiltInGrep,
|
|
31
|
-
simBuiltInLsR, simBuiltInStat, simBuiltInWrite, simBuiltInEdit, simBuiltInVerify,
|
|
32
|
-
simHexLineOutlinePlusRead, simHexLineGrep, simHexLineWrite, simHexLineEditDiff,
|
|
33
|
-
runN, fmt, pctSavings, RUNS,
|
|
34
|
-
} from "./lib/benchmark-helpers.mjs";
|
|
35
|
-
// ---------------------------------------------------------------------------
|
|
36
|
-
// CLI
|
|
37
|
-
// ---------------------------------------------------------------------------
|
|
38
|
-
|
|
39
|
-
const args = process.argv.slice(2);
|
|
40
|
-
let repoRoot = process.cwd();
|
|
41
|
-
const repoIdx = args.indexOf("--repo");
|
|
42
|
-
if (repoIdx !== -1 && args[repoIdx + 1]) {
|
|
43
|
-
repoRoot = resolve(args[repoIdx + 1]);
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
const withGraph = args.includes("--with-graph");
|
|
47
|
-
|
|
48
|
-
// ---------------------------------------------------------------------------
|
|
49
|
-
// Main
|
|
50
|
-
// ---------------------------------------------------------------------------
|
|
51
|
-
|
|
52
|
-
async function main() {
|
|
53
|
-
const allFiles = walkDir(repoRoot);
|
|
54
|
-
if (allFiles.length === 0) {
|
|
55
|
-
console.log(`No code files found in ${repoRoot}`);
|
|
56
|
-
process.exit(1);
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
const totalLines = allFiles.reduce((sum, f) => {
|
|
60
|
-
const lines = getFileLines(f);
|
|
61
|
-
return lines ? sum + lines.length : sum;
|
|
62
|
-
}, 0);
|
|
63
|
-
|
|
64
|
-
const cats = categorize(allFiles);
|
|
65
|
-
const repoName = basename(repoRoot);
|
|
66
|
-
|
|
67
|
-
// Top 3 largest code files for realistic tests
|
|
68
|
-
const sorted = allFiles.map(f => ({ f, lines: getFileLines(f)?.length || 0 }))
|
|
69
|
-
.sort((a, b) => b.lines - a.lines);
|
|
70
|
-
const largeFiles = sorted.slice(0, 3).map(s => s.f);
|
|
71
|
-
|
|
72
|
-
// Temp file setup
|
|
73
|
-
const ts = Date.now();
|
|
74
|
-
const tmpPath = resolve(tmpdir(), `hex-line-bench-${ts}.js`);
|
|
75
|
-
const tmpLines = generateTempCode();
|
|
76
|
-
const tmpContent = tmpLines.join("\n");
|
|
77
|
-
writeFileSync(tmpPath, tmpContent, "utf-8");
|
|
78
|
-
|
|
79
|
-
const results = [];
|
|
80
|
-
|
|
81
|
-
// ===================================================================
|
|
82
|
-
// TEST 1: Read full file
|
|
83
|
-
// ===================================================================
|
|
84
|
-
for (const [cat, files] of Object.entries(cats)) {
|
|
85
|
-
if (files.length === 0) continue;
|
|
86
|
-
const withoutArr = [];
|
|
87
|
-
const withArr = [];
|
|
88
|
-
|
|
89
|
-
for (const f of files) {
|
|
90
|
-
const lines = getFileLines(f);
|
|
91
|
-
if (!lines) continue;
|
|
92
|
-
withoutArr.push(runN(() => simBuiltInReadFull(f, lines).length));
|
|
93
|
-
withArr.push(runN(() => readFile(f).length));
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
if (withoutArr.length === 0) continue;
|
|
97
|
-
const avgWithout = Math.round(withoutArr.reduce((a, b) => a + b.value, 0) / withoutArr.length);
|
|
98
|
-
const avgWith = Math.round(withArr.reduce((a, b) => a + b.value, 0) / withArr.length);
|
|
99
|
-
const avgMsWithout = parseFloat((withoutArr.reduce((a, b) => a + b.ms, 0) / withoutArr.length).toFixed(1));
|
|
100
|
-
const avgMsWith = parseFloat((withArr.reduce((a, b) => a + b.ms, 0) / withArr.length).toFixed(1));
|
|
101
|
-
|
|
102
|
-
const label = { small: "<50L", medium: "50-200L", large: "200-500L", xl: "500L+" }[cat];
|
|
103
|
-
results.push({
|
|
104
|
-
num: 1, scenario: `Read full (${label})`,
|
|
105
|
-
without: avgWithout, withSL: avgWith,
|
|
106
|
-
savings: pctSavings(avgWithout, avgWith),
|
|
107
|
-
latencyWithout: avgMsWithout, latencyWith: avgMsWith,
|
|
108
|
-
});
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
// ===================================================================
|
|
112
|
-
// TEST 2: Read with outline — full read vs outline + targeted read
|
|
113
|
-
// ===================================================================
|
|
114
|
-
for (const cat of ["large", "xl"]) {
|
|
115
|
-
const files = cats[cat] || [];
|
|
116
|
-
if (files.length === 0) continue;
|
|
117
|
-
const withoutArr = [];
|
|
118
|
-
const withArr = [];
|
|
119
|
-
|
|
120
|
-
for (const f of files) {
|
|
121
|
-
const lines = getFileLines(f);
|
|
122
|
-
if (!lines) continue;
|
|
123
|
-
withoutArr.push(runN(() => simBuiltInOutlineFull(f, lines).length));
|
|
124
|
-
withArr.push(runN(() => simHexLineOutlinePlusRead(f, lines).length));
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
if (withoutArr.length === 0) continue;
|
|
128
|
-
const avgWithout = Math.round(withoutArr.reduce((a, b) => a + b.value, 0) / withoutArr.length);
|
|
129
|
-
const avgWith = Math.round(withArr.reduce((a, b) => a + b.value, 0) / withArr.length);
|
|
130
|
-
const avgMsWithout = parseFloat((withoutArr.reduce((a, b) => a + b.ms, 0) / withoutArr.length).toFixed(1));
|
|
131
|
-
const avgMsWith = parseFloat((withArr.reduce((a, b) => a + b.ms, 0) / withArr.length).toFixed(1));
|
|
132
|
-
|
|
133
|
-
const label = cat === "large" ? "200-500L" : "500L+";
|
|
134
|
-
results.push({
|
|
135
|
-
num: 2, scenario: `Outline+read (${label})`,
|
|
136
|
-
without: avgWithout, withSL: avgWith,
|
|
137
|
-
savings: pctSavings(avgWithout, avgWith),
|
|
138
|
-
latencyWithout: avgMsWithout, latencyWith: avgMsWith,
|
|
139
|
-
});
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
// ===================================================================
|
|
143
|
-
// TEST 3: Grep search
|
|
144
|
-
// ===================================================================
|
|
145
|
-
{
|
|
146
|
-
const grepFiles = [...(cats.medium || []), ...(cats.large || []), ...(cats.xl || [])].slice(0, 3);
|
|
147
|
-
if (grepFiles.length > 0) {
|
|
148
|
-
const withoutArr = [];
|
|
149
|
-
const withArr = [];
|
|
150
|
-
|
|
151
|
-
for (const f of grepFiles) {
|
|
152
|
-
const lines = getFileLines(f);
|
|
153
|
-
if (!lines) continue;
|
|
154
|
-
const pattern = "function|class|const";
|
|
155
|
-
withoutArr.push(runN(() => simBuiltInGrep(pattern, f).length));
|
|
156
|
-
withArr.push(runN(() => simHexLineGrep(f, lines, pattern).length));
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
if (withoutArr.length > 0) {
|
|
160
|
-
const avgWithout = Math.round(withoutArr.reduce((a, b) => a + b.value, 0) / withoutArr.length);
|
|
161
|
-
const avgWith = Math.round(withArr.reduce((a, b) => a + b.value, 0) / withArr.length);
|
|
162
|
-
const avgMsWithout = parseFloat((withoutArr.reduce((a, b) => a + b.ms, 0) / withoutArr.length).toFixed(1));
|
|
163
|
-
const avgMsWith = parseFloat((withArr.reduce((a, b) => a + b.ms, 0) / withArr.length).toFixed(1));
|
|
164
|
-
results.push({
|
|
165
|
-
num: 3, scenario: "Grep search",
|
|
166
|
-
without: avgWithout, withSL: avgWith,
|
|
167
|
-
savings: pctSavings(avgWithout, avgWith),
|
|
168
|
-
latencyWithout: avgMsWithout, latencyWith: avgMsWith,
|
|
169
|
-
});
|
|
170
|
-
}
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
// ===================================================================
|
|
175
|
-
// TEST 4: Directory tree
|
|
176
|
-
// ===================================================================
|
|
177
|
-
{
|
|
178
|
-
const { value: without, ms: withoutMs } = runN(() => simBuiltInLsR(repoRoot, 0, 3).length);
|
|
179
|
-
const { value: withSL, ms: withMs } = runN(() => directoryTree(repoRoot, { max_depth: 3 }).length);
|
|
180
|
-
results.push({
|
|
181
|
-
num: 4, scenario: "Directory tree",
|
|
182
|
-
without, withSL,
|
|
183
|
-
savings: pctSavings(without, withSL),
|
|
184
|
-
latencyWithout: withoutMs, latencyWith: withMs,
|
|
185
|
-
});
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
// ===================================================================
|
|
189
|
-
// TEST 5: File info
|
|
190
|
-
// ===================================================================
|
|
191
|
-
{
|
|
192
|
-
const infoFile = allFiles[Math.floor(allFiles.length / 2)] || allFiles[0];
|
|
193
|
-
const { value: without, ms: withoutMs } = runN(() => simBuiltInStat(infoFile).length);
|
|
194
|
-
const { value: withSL, ms: withMs } = runN(() => fileInfo(infoFile).length);
|
|
195
|
-
results.push({
|
|
196
|
-
num: 5, scenario: "File info",
|
|
197
|
-
without, withSL,
|
|
198
|
-
savings: pctSavings(without, withSL),
|
|
199
|
-
latencyWithout: withoutMs, latencyWith: withMs,
|
|
200
|
-
});
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
// ===================================================================
|
|
204
|
-
// TEST 6: Create file (write)
|
|
205
|
-
// ===================================================================
|
|
206
|
-
{
|
|
207
|
-
const { value: without, ms: withoutMs } = runN(() => simBuiltInWrite(tmpPath, tmpContent).length);
|
|
208
|
-
const { value: withSL, ms: withMs } = runN(() => simHexLineWrite(tmpPath, tmpContent).length);
|
|
209
|
-
results.push({
|
|
210
|
-
num: 6, scenario: "Create file (200L)",
|
|
211
|
-
without, withSL,
|
|
212
|
-
savings: pctSavings(without, withSL),
|
|
213
|
-
latencyWithout: withoutMs, latencyWith: withMs,
|
|
214
|
-
});
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
// ===================================================================
|
|
218
|
-
// TEST 7: Edit x5 sequential
|
|
219
|
-
// ===================================================================
|
|
220
|
-
{
|
|
221
|
-
const editTargets = [
|
|
222
|
-
{ line: 13, new: ' this.configPath = resolve(configPath || ".");' },
|
|
223
|
-
{ line: 55, new: " const { retries = MAX_RETRIES, delay = 200, backoff = 3 } = options;" },
|
|
224
|
-
{ line: 75, new: " this.timeout = options.timeout ?? DEFAULT_TIMEOUT;" },
|
|
225
|
-
{ line: 116, new: " return this; // chainable" },
|
|
226
|
-
{ line: 148, new: " /** @type {string[]} */\n const errors = [];" },
|
|
227
|
-
];
|
|
228
|
-
|
|
229
|
-
let totalWithout = 0;
|
|
230
|
-
let totalWith = 0;
|
|
231
|
-
let totalMsWithout = 0;
|
|
232
|
-
let totalMsWith = 0;
|
|
233
|
-
|
|
234
|
-
for (const edit of editTargets) {
|
|
235
|
-
const origLines = [...tmpLines];
|
|
236
|
-
const newLines = [...tmpLines];
|
|
237
|
-
const idx = edit.line - 1;
|
|
238
|
-
if (idx < newLines.length) {
|
|
239
|
-
newLines[idx] = edit.new;
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
const rW = runN(() => simBuiltInEdit(tmpPath, origLines, newLines).length);
|
|
243
|
-
const rH = runN(() => simHexLineEditDiff(origLines, newLines).length);
|
|
244
|
-
totalWithout += rW.value;
|
|
245
|
-
totalWith += rH.value;
|
|
246
|
-
totalMsWithout += rW.ms;
|
|
247
|
-
totalMsWith += rH.ms;
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
results.push({
|
|
251
|
-
num: 7, scenario: "Edit x5 sequential",
|
|
252
|
-
without: totalWithout, withSL: totalWith,
|
|
253
|
-
savings: pctSavings(totalWithout, totalWith),
|
|
254
|
-
latencyWithout: parseFloat(totalMsWithout.toFixed(1)), latencyWith: parseFloat(totalMsWith.toFixed(1)),
|
|
255
|
-
});
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
// ===================================================================
|
|
259
|
-
// TEST 8: Verify checksums
|
|
260
|
-
// ===================================================================
|
|
261
|
-
{
|
|
262
|
-
const fileLines = readFileSync(tmpPath, "utf-8").replace(/\r\n/g, "\n").split("\n");
|
|
263
|
-
const hashes = fileLines.map(l => fnv1a(l));
|
|
264
|
-
const cs1 = rangeChecksum(hashes.slice(0, 50), 1, 50);
|
|
265
|
-
const cs2 = rangeChecksum(hashes.slice(50, 100), 51, 100);
|
|
266
|
-
const cs3 = rangeChecksum(hashes.slice(100, 150), 101, 150);
|
|
267
|
-
const cs4 = rangeChecksum(hashes.slice(150, 200), 151, 200);
|
|
268
|
-
const checksums = [cs1, cs2, cs3, cs4];
|
|
269
|
-
|
|
270
|
-
const { value: without, ms: withoutMs } = runN(() => simBuiltInVerify(tmpPath, fileLines).length);
|
|
271
|
-
const { value: withSL, ms: withMs } = runN(() => verifyChecksums(tmpPath, checksums).length);
|
|
272
|
-
|
|
273
|
-
results.push({
|
|
274
|
-
num: 8, scenario: "Verify checksums (4 ranges)",
|
|
275
|
-
without, withSL,
|
|
276
|
-
savings: pctSavings(without, withSL),
|
|
277
|
-
latencyWithout: withoutMs, latencyWith: withMs,
|
|
278
|
-
});
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
// ===================================================================
|
|
282
|
-
// TEST 9: Multi-file read (batch)
|
|
283
|
-
// ===================================================================
|
|
284
|
-
{
|
|
285
|
-
const batchFiles = (cats.small || []).slice(0, 3);
|
|
286
|
-
if (batchFiles.length >= 2) {
|
|
287
|
-
// Without hex-line: N separate Read calls
|
|
288
|
-
const { value: without, ms: withoutMs } = runN(() => {
|
|
289
|
-
let total = 0;
|
|
290
|
-
for (const f of batchFiles) {
|
|
291
|
-
const lines = getFileLines(f);
|
|
292
|
-
if (lines) total += simBuiltInReadFull(f, lines).length;
|
|
293
|
-
}
|
|
294
|
-
return total;
|
|
295
|
-
});
|
|
296
|
-
|
|
297
|
-
// With hex-line: 1 read_file call with paths:[] — concatenated output
|
|
298
|
-
const { value: withSL, ms: withMs } = runN(() => {
|
|
299
|
-
const parts = [];
|
|
300
|
-
for (const f of batchFiles) {
|
|
301
|
-
parts.push(readFile(f));
|
|
302
|
-
}
|
|
303
|
-
return parts.join("\n\n---\n\n").length;
|
|
304
|
-
});
|
|
305
|
-
|
|
306
|
-
results.push({
|
|
307
|
-
num: 9, scenario: `Multi-file read (${batchFiles.length} files)`,
|
|
308
|
-
without, withSL,
|
|
309
|
-
savings: pctSavings(without, withSL),
|
|
310
|
-
latencyWithout: withoutMs, latencyWith: withMs,
|
|
311
|
-
});
|
|
312
|
-
}
|
|
313
|
-
}
|
|
314
|
-
|
|
315
|
-
// ===================================================================
|
|
316
|
-
// TEST 10: bulk_replace dry_run
|
|
317
|
-
// ===================================================================
|
|
318
|
-
{
|
|
319
|
-
const bulkTmpPaths = [];
|
|
320
|
-
for (let i = 0; i < 5; i++) {
|
|
321
|
-
const p = resolve(tmpdir(), `hex-line-bulk-${ts}-${i}.js`);
|
|
322
|
-
writeFileSync(p, tmpContent, "utf-8");
|
|
323
|
-
bulkTmpPaths.push(p);
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
const editLine = 13;
|
|
327
|
-
const editNew = ' this.configPath = resolve(configPath || ".");';
|
|
328
|
-
|
|
329
|
-
// Without hex-line: 5 separate edit_file calls
|
|
330
|
-
const { value: without, ms: withoutMs } = runN(() => {
|
|
331
|
-
let total = 0;
|
|
332
|
-
for (const p of bulkTmpPaths) {
|
|
333
|
-
const origLines = [...tmpLines];
|
|
334
|
-
const newLines = [...tmpLines];
|
|
335
|
-
newLines[editLine - 1] = editNew;
|
|
336
|
-
total += simBuiltInEdit(p, origLines, newLines).length;
|
|
337
|
-
}
|
|
338
|
-
return total;
|
|
339
|
-
});
|
|
340
|
-
|
|
341
|
-
// With hex-line: 1 bulk_replace — summary + per-file compact diff
|
|
342
|
-
const { value: withSL, ms: withMs } = runN(() => {
|
|
343
|
-
let response = "5 files changed, 0 errors\n";
|
|
344
|
-
for (const p of bulkTmpPaths) {
|
|
345
|
-
const origLines = [...tmpLines];
|
|
346
|
-
const newLines = [...tmpLines];
|
|
347
|
-
newLines[editLine - 1] = editNew;
|
|
348
|
-
response += simHexLineEditDiff(origLines, newLines) + "\n";
|
|
349
|
-
}
|
|
350
|
-
return response.length;
|
|
351
|
-
});
|
|
352
|
-
|
|
353
|
-
results.push({
|
|
354
|
-
num: 10, scenario: "bulk_replace dry_run (5 files)",
|
|
355
|
-
without, withSL,
|
|
356
|
-
savings: pctSavings(without, withSL),
|
|
357
|
-
latencyWithout: withoutMs, latencyWith: withMs,
|
|
358
|
-
});
|
|
359
|
-
|
|
360
|
-
for (const p of bulkTmpPaths) {
|
|
361
|
-
try { unlinkSync(p); } catch { /* ok */ }
|
|
362
|
-
}
|
|
363
|
-
}
|
|
364
|
-
|
|
365
|
-
// ===================================================================
|
|
366
|
-
// TEST 11: changes (semantic diff)
|
|
367
|
-
// ===================================================================
|
|
368
|
-
{
|
|
369
|
-
// Without hex-line: raw unified diff output
|
|
370
|
-
const { value: without, ms: withoutMs } = runN(() => {
|
|
371
|
-
const diffLines = [
|
|
372
|
-
`diff --git a/benchmark-target.js b/benchmark-target.js`,
|
|
373
|
-
`index abc1234..def5678 100644`,
|
|
374
|
-
`--- a/benchmark-target.js`,
|
|
375
|
-
`+++ b/benchmark-target.js`,
|
|
376
|
-
`@@ -10,6 +10,12 @@ const DEFAULT_TIMEOUT = 5000;`,
|
|
377
|
-
];
|
|
378
|
-
// Simulate ~15 context + change lines typical of a small diff
|
|
379
|
-
for (let i = 0; i < 5; i++) {
|
|
380
|
-
diffLines.push(` ${tmpLines[i + 5] || " // context line"}`); // context
|
|
381
|
-
}
|
|
382
|
-
diffLines.push(`-${tmpLines[12] || " old line"}`);
|
|
383
|
-
diffLines.push(`+ this.configPath = resolve(configPath || ".");`);
|
|
384
|
-
for (let i = 0; i < 5; i++) {
|
|
385
|
-
diffLines.push(` ${tmpLines[i + 14] || " // context line"}`); // context
|
|
386
|
-
}
|
|
387
|
-
// Second hunk — added function
|
|
388
|
-
diffLines.push(`@@ -195,0 +201,8 @@`);
|
|
389
|
-
for (let i = 0; i < 3; i++) {
|
|
390
|
-
diffLines.push(` ${tmpLines[i + 150] || " // context"}`);
|
|
391
|
-
}
|
|
392
|
-
for (let i = 0; i < 5; i++) {
|
|
393
|
-
diffLines.push(`+ // new function line ${i}`);
|
|
394
|
-
}
|
|
395
|
-
for (let i = 0; i < 3; i++) {
|
|
396
|
-
diffLines.push(` ${tmpLines[i + 155] || " // context"}`);
|
|
397
|
-
}
|
|
398
|
-
return diffLines.join("\n").length;
|
|
399
|
-
});
|
|
400
|
-
|
|
401
|
-
// With hex-line: real fileChanges() semantic diff (async, called once — deterministic)
|
|
402
|
-
let withSL;
|
|
403
|
-
let withMs = 0;
|
|
404
|
-
try {
|
|
405
|
-
const t0 = performance.now();
|
|
406
|
-
const changesOut = await fileChanges(allFiles[0]);
|
|
407
|
-
withMs = parseFloat((performance.now() - t0).toFixed(1));
|
|
408
|
-
withSL = changesOut.length;
|
|
409
|
-
} catch {
|
|
410
|
-
withSL = 133; // fallback if no git history
|
|
411
|
-
}
|
|
412
|
-
|
|
413
|
-
results.push({
|
|
414
|
-
num: 11, scenario: "Changes (semantic diff)",
|
|
415
|
-
without, withSL,
|
|
416
|
-
savings: pctSavings(without, withSL),
|
|
417
|
-
latencyWithout: withoutMs, latencyWith: withMs,
|
|
418
|
-
});
|
|
419
|
-
}
|
|
420
|
-
|
|
421
|
-
// ===================================================================
|
|
422
|
-
// TEST 12: FILE_NOT_FOUND recovery
|
|
423
|
-
// ===================================================================
|
|
424
|
-
{
|
|
425
|
-
const missingPath = resolve(repoRoot, "src/utils/halper.js");
|
|
426
|
-
const parentDir = resolve(repoRoot, "src/utils");
|
|
427
|
-
|
|
428
|
-
// Without hex-line: 3 round-trips (error → ls → retry)
|
|
429
|
-
const { value: without, ms: withoutMs } = runN(() => {
|
|
430
|
-
// Round 1: real ENOENT error
|
|
431
|
-
let r1;
|
|
432
|
-
try { readFileSync(missingPath, "utf-8"); r1 = ""; } catch (e) { r1 = e.message; }
|
|
433
|
-
// Round 2: real directory listing to find correct name
|
|
434
|
-
let r2;
|
|
435
|
-
try { r2 = readdirSync(parentDir).join("\n"); } catch { r2 = `${parentDir}: directory not found`; }
|
|
436
|
-
// Round 3: agent re-reads correct file (small file ~30 lines)
|
|
437
|
-
const r3 = simBuiltInReadFull(missingPath, tmpLines.slice(0, 30));
|
|
438
|
-
return (r1 + r2 + r3).length;
|
|
439
|
-
});
|
|
440
|
-
|
|
441
|
-
// With hex-line: real readFile() on nonexistent path — returns error + parent dir listing
|
|
442
|
-
const { value: withSL, ms: withMs } = runN(() => {
|
|
443
|
-
try {
|
|
444
|
-
return readFile(missingPath).length;
|
|
445
|
-
} catch (e) {
|
|
446
|
-
return e.message.length;
|
|
447
|
-
}
|
|
448
|
-
});
|
|
449
|
-
|
|
450
|
-
results.push({
|
|
451
|
-
num: 12, scenario: "FILE_NOT_FOUND recovery*",
|
|
452
|
-
without, withSL,
|
|
453
|
-
savings: pctSavings(without, withSL),
|
|
454
|
-
latencyWithout: withoutMs, latencyWith: withMs,
|
|
455
|
-
});
|
|
456
|
-
}
|
|
457
|
-
|
|
458
|
-
// ===================================================================
|
|
459
|
-
// TEST 13: Hash mismatch recovery
|
|
460
|
-
// ===================================================================
|
|
461
|
-
{
|
|
462
|
-
// Without hex-line: 3 round-trips (stale error → re-read full → retry edit)
|
|
463
|
-
const { value: without, ms: withoutMs } = runN(() => {
|
|
464
|
-
// Round 1: error
|
|
465
|
-
const r1 = 'Error: file content has changed (stale). Please re-read the file.';
|
|
466
|
-
// Round 2: full re-read
|
|
467
|
-
const r2 = simBuiltInReadFull(tmpPath, tmpLines);
|
|
468
|
-
// Round 3: retry edit response
|
|
469
|
-
const origLines = [...tmpLines];
|
|
470
|
-
const newLines = [...tmpLines];
|
|
471
|
-
newLines[12] = ' this.configPath = resolve(configPath || ".");';
|
|
472
|
-
const r3 = simBuiltInEdit(tmpPath, origLines, newLines);
|
|
473
|
-
return (r1 + r2 + r3).length;
|
|
474
|
-
});
|
|
475
|
-
|
|
476
|
-
// With hex-line: 1 round-trip (error + fresh snippet +/-5 lines around target)
|
|
477
|
-
const { value: withSL, ms: withMs } = runN(() => {
|
|
478
|
-
const targetLine = 13;
|
|
479
|
-
const snippetStart = Math.max(0, targetLine - 6);
|
|
480
|
-
const snippetEnd = Math.min(tmpLines.length, targetLine + 5);
|
|
481
|
-
const snippet = tmpLines.slice(snippetStart, snippetEnd);
|
|
482
|
-
const annotated = snippet.map((l, i) => {
|
|
483
|
-
const lineNum = snippetStart + i + 1;
|
|
484
|
-
const tag = lineTag(fnv1a(l));
|
|
485
|
-
return `${tag}.${lineNum}\t${l}`;
|
|
486
|
-
}).join("\n");
|
|
487
|
-
const response = `HASH_MISMATCH at line ${targetLine}. Fresh snippet:\n\`\`\`\n${annotated}\n\`\`\``;
|
|
488
|
-
return response.length;
|
|
489
|
-
});
|
|
490
|
-
|
|
491
|
-
results.push({
|
|
492
|
-
num: 13, scenario: "Hash mismatch recovery*",
|
|
493
|
-
without, withSL,
|
|
494
|
-
savings: pctSavings(without, withSL),
|
|
495
|
-
latencyWithout: withoutMs, latencyWith: withMs,
|
|
496
|
-
});
|
|
497
|
-
}
|
|
498
|
-
|
|
499
|
-
// ===================================================================
|
|
500
|
-
// TEST 14: Bash redirect savings
|
|
501
|
-
// ===================================================================
|
|
502
|
-
{
|
|
503
|
-
const infoFile = allFiles[Math.floor(allFiles.length / 2)] || allFiles[0];
|
|
504
|
-
const infoLines = getFileLines(infoFile);
|
|
505
|
-
if (infoLines) {
|
|
506
|
-
// Sub-test A: cat vs read_file
|
|
507
|
-
const catW = runN(() => {
|
|
508
|
-
// cat output: raw lines, no line numbers (agent redirect)
|
|
509
|
-
return infoLines.join("\n").length;
|
|
510
|
-
});
|
|
511
|
-
const catH = runN(() => readFile(infoFile).length);
|
|
512
|
-
|
|
513
|
-
// Sub-test B: ls -la vs directory_tree
|
|
514
|
-
const dirTarget = resolve(repoRoot);
|
|
515
|
-
const lsW = runN(() => simBuiltInLsR(dirTarget, 0, 1).length);
|
|
516
|
-
const lsH = runN(() => directoryTree(dirTarget, { max_depth: 1 }).length);
|
|
517
|
-
|
|
518
|
-
// Sub-test C: stat vs get_file_info
|
|
519
|
-
const stW = runN(() => simBuiltInStat(infoFile).length);
|
|
520
|
-
const stH = runN(() => fileInfo(infoFile).length);
|
|
521
|
-
|
|
522
|
-
// Combined: without = raw outputs (no follow-up possible)
|
|
523
|
-
// With = structured output (enables follow-up without extra calls)
|
|
524
|
-
const totalWithout = catW.value + lsW.value + stW.value;
|
|
525
|
-
const totalWith = catH.value + lsH.value + stH.value;
|
|
526
|
-
const totalMsWithout = catW.ms + lsW.ms + stW.ms;
|
|
527
|
-
const totalMsWith = catH.ms + lsH.ms + stH.ms;
|
|
528
|
-
|
|
529
|
-
results.push({
|
|
530
|
-
num: 14, scenario: "Bash redirects (cat+ls+stat)",
|
|
531
|
-
without: totalWithout, withSL: totalWith,
|
|
532
|
-
savings: pctSavings(totalWithout, totalWith),
|
|
533
|
-
latencyWithout: parseFloat(totalMsWithout.toFixed(1)), latencyWith: parseFloat(totalMsWith.toFixed(1)),
|
|
534
|
-
});
|
|
535
|
-
}
|
|
536
|
-
}
|
|
537
|
-
|
|
538
|
-
// ===================================================================
|
|
539
|
-
// TEST 15: HASH_HINT multi-match recovery
|
|
540
|
-
// ===================================================================
|
|
541
|
-
{
|
|
542
|
-
// Create a file with a duplicated line so textReplace triggers HASH_HINT
|
|
543
|
-
const dupLine = ' return this.config;';
|
|
544
|
-
const dupContent = tmpLines.map((l, i) => (i === 20 || i === 80) ? dupLine : l);
|
|
545
|
-
const dupPath = resolve(tmpdir(), `hex-line-dup-${ts}.js`);
|
|
546
|
-
writeFileSync(dupPath, dupContent.join("\n"), "utf-8");
|
|
547
|
-
|
|
548
|
-
// Without hex-line: 3 round-trips (opaque error + re-read full + retry)
|
|
549
|
-
const { value: without, ms: withoutMs } = runN(() => {
|
|
550
|
-
const r1 = 'Error: multiple occurrences found. Provide more context.';
|
|
551
|
-
const r2 = simBuiltInReadFull(dupPath, dupContent);
|
|
552
|
-
const origLines = [...dupContent];
|
|
553
|
-
const newLines = [...dupContent];
|
|
554
|
-
newLines[20] = ' return this.updatedConfig;';
|
|
555
|
-
const r3 = simBuiltInEdit(dupPath, origLines, newLines);
|
|
556
|
-
return (r1 + r2 + r3).length;
|
|
557
|
-
});
|
|
558
|
-
|
|
559
|
-
// With hex-line: HASH_HINT error contains annotated snippets (1 round-trip)
|
|
560
|
-
const { value: withSL, ms: withMs } = runN(() => {
|
|
561
|
-
try {
|
|
562
|
-
editFile(dupPath, [{ replace: { old_text: dupLine, new_text: ' return this.updatedConfig;' } }]);
|
|
563
|
-
return 0; // should not reach
|
|
564
|
-
} catch (e) {
|
|
565
|
-
// HASH_HINT error message + simulated anchor retry
|
|
566
|
-
const retry = '{"set_line":{"anchor":"xx.21","new_text":" return this.updatedConfig;"}}';
|
|
567
|
-
return (e.message + retry).length;
|
|
568
|
-
}
|
|
569
|
-
});
|
|
570
|
-
|
|
571
|
-
results.push({
|
|
572
|
-
num: 15, scenario: "HASH_HINT multi-match recovery*",
|
|
573
|
-
without, withSL,
|
|
574
|
-
savings: pctSavings(without, withSL),
|
|
575
|
-
latencyWithout: withoutMs, latencyWith: withMs,
|
|
576
|
-
});
|
|
577
|
-
|
|
578
|
-
try { unlinkSync(dupPath); } catch { /* ok */ }
|
|
579
|
-
}
|
|
580
|
-
|
|
581
|
-
// ===================================================================
|
|
582
|
-
// TEST 16-18: Graph enrichment (--with-graph only)
|
|
583
|
-
// Both sides use hex-line; difference is whether .codegraph/index.db exists
|
|
584
|
-
// ===================================================================
|
|
585
|
-
const graphOut = [];
|
|
586
|
-
if (withGraph) {
|
|
587
|
-
const { getGraphDB, getRelativePath } = await import("./lib/graph-enrich.mjs");
|
|
588
|
-
const db = getGraphDB(resolve(repoRoot, "server.mjs"));
|
|
589
|
-
if (!db) {
|
|
590
|
-
console.error("--with-graph: .codegraph/index.db not found. Run hex-graph index_project first.");
|
|
591
|
-
} else {
|
|
592
|
-
const graphFile = largeFiles[0] || allFiles[0];
|
|
593
|
-
const graphLines = getFileLines(graphFile);
|
|
594
|
-
|
|
595
|
-
if (graphLines) {
|
|
596
|
-
// TEST 16: Read with/without Graph header
|
|
597
|
-
{
|
|
598
|
-
const withGraphResult = readFile(graphFile);
|
|
599
|
-
const noGraphResult = withGraphResult.replace(/\nGraph:.*\n/, "\n");
|
|
600
|
-
const savings = pctSavings(noGraphResult.length, withGraphResult.length);
|
|
601
|
-
graphOut.push(`| 16 | Graph: Read (${graphLines.length}L) | ${fmt(noGraphResult.length)} chars | ${fmt(withGraphResult.length)} chars | ${savings} | 2\u21921 | 2\u21921 |`);
|
|
602
|
-
}
|
|
603
|
-
|
|
604
|
-
// TEST 17: Edit with/without blast radius
|
|
605
|
-
{
|
|
606
|
-
const editTmpPath = resolve(tmpdir(), `hex-bench-edit-${Date.now()}.js`);
|
|
607
|
-
writeFileSync(editTmpPath, graphLines.join("\n"), "utf-8");
|
|
608
|
-
try {
|
|
609
|
-
const editResult = editFile(editTmpPath, [{ replace: { old_text: graphLines[5], new_text: graphLines[5] + " // modified" } }]);
|
|
610
|
-
const noBlastOut = editResult.replace(/\n.*Blast radius.*$/s, "");
|
|
611
|
-
const savings = pctSavings(noBlastOut.length, editResult.length);
|
|
612
|
-
graphOut.push(`| 17 | Graph: Edit + impact | ${fmt(noBlastOut.length)} chars | ${fmt(editResult.length)} chars | ${savings} | 2\u21921 | 2\u21921 |`);
|
|
613
|
-
} catch (e) {
|
|
614
|
-
graphOut.push(`| 17 | Graph: Edit + impact | \u2014 | \u2014 | \u2014 | | |`);
|
|
615
|
-
}
|
|
616
|
-
try { unlinkSync(editTmpPath); } catch {}
|
|
617
|
-
}
|
|
618
|
-
|
|
619
|
-
// TEST 18: Grep with/without annotations
|
|
620
|
-
{
|
|
621
|
-
try {
|
|
622
|
-
const grepResult = await grepSearch("function", { path: resolve(repoRoot), glob: "*.mjs", limit: 10 });
|
|
623
|
-
const noAnnoResult = grepResult.replace(/ \[[^\]]+\]/g, "");
|
|
624
|
-
const savings = pctSavings(noAnnoResult.length, grepResult.length);
|
|
625
|
-
const annoCount = (grepResult.match(/\[[^\]]+\]/g) || []).length;
|
|
626
|
-
graphOut.push(`| 18 | Graph: Grep + ${annoCount} annotations | ${fmt(noAnnoResult.length)} chars | ${fmt(grepResult.length)} chars | ${savings} | 6\u21921 | 6\u21921 |`);
|
|
627
|
-
} catch {
|
|
628
|
-
graphOut.push(`| 18 | Graph: Grep + context | \u2014 | \u2014 | \u2014 | | |`);
|
|
629
|
-
}
|
|
630
|
-
}
|
|
631
|
-
}
|
|
632
|
-
}
|
|
633
|
-
}
|
|
634
|
-
|
|
635
|
-
// ===================================================================
|
|
636
|
-
// WORKFLOW SCENARIOS (multi-step real operations)
|
|
637
|
-
// ===================================================================
|
|
638
|
-
const workflowResults = [];
|
|
639
|
-
|
|
640
|
-
// W1: Search → Edit (find a pattern, edit the match)
|
|
641
|
-
{
|
|
642
|
-
const wTmpPath = resolve(tmpdir(), `hex-wf1-${Date.now()}.js`);
|
|
643
|
-
writeFileSync(wTmpPath, tmpContent, "utf-8");
|
|
644
|
-
const editLine = tmpLines[12];
|
|
645
|
-
const editNew = editLine + " // workflow-modified";
|
|
646
|
-
|
|
647
|
-
// Without: grep → read file for context → edit with old_string
|
|
648
|
-
const { value: without } = runN(() => {
|
|
649
|
-
let total = 0;
|
|
650
|
-
// Step 1: grep to find
|
|
651
|
-
total += simBuiltInGrep("configPath", wTmpPath).length;
|
|
652
|
-
// Step 2: read full file for context (agent needs surrounding lines)
|
|
653
|
-
total += simBuiltInReadFull(wTmpPath, tmpLines).length;
|
|
654
|
-
// Step 3: edit
|
|
655
|
-
const origLines = [...tmpLines];
|
|
656
|
-
const newLines = [...tmpLines];
|
|
657
|
-
newLines[12] = editNew;
|
|
658
|
-
total += simBuiltInEdit(wTmpPath, origLines, newLines).length;
|
|
659
|
-
return total;
|
|
660
|
-
});
|
|
661
|
-
|
|
662
|
-
// With: grep_search (has hashes) → edit with anchor (no re-read needed)
|
|
663
|
-
const { value: withSL } = runN(() => {
|
|
664
|
-
let total = 0;
|
|
665
|
-
// Step 1: grep with hashes
|
|
666
|
-
const grepOut = readFileSync(wTmpPath, "utf-8"); // simulate grep result
|
|
667
|
-
const lines = grepOut.split("\n");
|
|
668
|
-
const targetIdx = 12;
|
|
669
|
-
const tag = lineTag(fnv1a(lines[targetIdx]));
|
|
670
|
-
total += `${wTmpPath}:>>${tag}.${targetIdx + 1}\t${lines[targetIdx]}`.length;
|
|
671
|
-
// Step 2: edit with anchor directly (no read needed)
|
|
672
|
-
try {
|
|
673
|
-
const result = editFile(wTmpPath, [{ set_line: { anchor: `${tag}.${targetIdx + 1}`, new_text: editNew } }]);
|
|
674
|
-
total += result.length;
|
|
675
|
-
} catch (e) { total += e.message.length; }
|
|
676
|
-
return total;
|
|
677
|
-
});
|
|
678
|
-
|
|
679
|
-
workflowResults.push({
|
|
680
|
-
id: "W1", scenario: "Search \u2192 Edit",
|
|
681
|
-
without, withSL,
|
|
682
|
-
opsWithout: 3, opsWith: 2,
|
|
683
|
-
});
|
|
684
|
-
try { unlinkSync(wTmpPath); } catch {}
|
|
685
|
-
}
|
|
686
|
-
|
|
687
|
-
// W2: Read → Edit → Verify cycle
|
|
688
|
-
{
|
|
689
|
-
const wTmpPath = resolve(tmpdir(), `hex-wf2-${Date.now()}.js`);
|
|
690
|
-
writeFileSync(wTmpPath, tmpContent, "utf-8");
|
|
691
|
-
|
|
692
|
-
// Without: read full → edit → re-read full to verify
|
|
693
|
-
const { value: without } = runN(() => {
|
|
694
|
-
let total = 0;
|
|
695
|
-
total += simBuiltInReadFull(wTmpPath, tmpLines).length; // read
|
|
696
|
-
const origLines = [...tmpLines];
|
|
697
|
-
const newLines = [...tmpLines];
|
|
698
|
-
newLines[12] = ' this.configPath = resolve(configPath || ".");';
|
|
699
|
-
total += simBuiltInEdit(wTmpPath, origLines, newLines).length; // edit
|
|
700
|
-
total += simBuiltInReadFull(wTmpPath, tmpLines).length; // re-read to verify
|
|
701
|
-
return total;
|
|
702
|
-
});
|
|
703
|
-
|
|
704
|
-
// With: read targeted → edit → verify checksums
|
|
705
|
-
const { value: withSL } = runN(() => {
|
|
706
|
-
let total = 0;
|
|
707
|
-
total += readFile(wTmpPath, { offset: 8, limit: 20 }).length; // targeted read
|
|
708
|
-
// Reset file for edit
|
|
709
|
-
writeFileSync(wTmpPath, tmpContent, "utf-8");
|
|
710
|
-
try {
|
|
711
|
-
const result = editFile(wTmpPath, [{ replace: { old_text: tmpLines[12], new_text: ' this.configPath = resolve(configPath || ".");' } }]);
|
|
712
|
-
total += result.length;
|
|
713
|
-
} catch (e) { total += e.message.length; }
|
|
714
|
-
// Verify with checksums instead of re-reading
|
|
715
|
-
const hashes = tmpLines.slice(0, 50).map(l => fnv1a(l));
|
|
716
|
-
const cs = rangeChecksum(hashes, 1, 50);
|
|
717
|
-
try { total += verifyChecksums(wTmpPath, [cs]).length; }
|
|
718
|
-
catch { total += 100; }
|
|
719
|
-
return total;
|
|
720
|
-
});
|
|
721
|
-
|
|
722
|
-
workflowResults.push({
|
|
723
|
-
id: "W2", scenario: "Read \u2192 Edit \u2192 Verify",
|
|
724
|
-
without, withSL,
|
|
725
|
-
opsWithout: 3, opsWith: 3,
|
|
726
|
-
});
|
|
727
|
-
try { unlinkSync(wTmpPath); } catch {}
|
|
728
|
-
}
|
|
729
|
-
|
|
730
|
-
// W3: Multi-file refactor (rename in 5 files)
|
|
731
|
-
{
|
|
732
|
-
const wDir = resolve(tmpdir(), `hex-wf3-${Date.now()}`);
|
|
733
|
-
mkdirSync(wDir, { recursive: true });
|
|
734
|
-
const wPaths = [];
|
|
735
|
-
for (let i = 0; i < 5; i++) {
|
|
736
|
-
const p = resolve(wDir, `file-${i}.js`);
|
|
737
|
-
writeFileSync(p, tmpContent, "utf-8");
|
|
738
|
-
wPaths.push(p);
|
|
739
|
-
}
|
|
740
|
-
|
|
741
|
-
// Without: grep to find files → read each → edit each = 11 ops
|
|
742
|
-
const { value: without } = runN(() => {
|
|
743
|
-
let total = 0;
|
|
744
|
-
total += simBuiltInGrep("configPath", wPaths[0]).length; // find
|
|
745
|
-
for (const p of wPaths) {
|
|
746
|
-
total += simBuiltInReadFull(p, tmpLines).length; // read each
|
|
747
|
-
const origLines = [...tmpLines];
|
|
748
|
-
const newLines = [...tmpLines];
|
|
749
|
-
newLines[12] = newLines[12].replace("configPath", "settingsPath");
|
|
750
|
-
total += simBuiltInEdit(p, origLines, newLines).length; // edit each
|
|
751
|
-
}
|
|
752
|
-
return total;
|
|
753
|
-
});
|
|
754
|
-
|
|
755
|
-
// With: grep_search → bulk_replace = 2 ops
|
|
756
|
-
const { value: withSL } = runN(() => {
|
|
757
|
-
let total = 0;
|
|
758
|
-
// Restore files
|
|
759
|
-
for (const p of wPaths) writeFileSync(p, tmpContent, "utf-8");
|
|
760
|
-
// Single grep (simulated — bulk_replace does its own finding)
|
|
761
|
-
total += 200; // approximate grep output
|
|
762
|
-
// Single bulk_replace
|
|
763
|
-
const result = bulkReplace(
|
|
764
|
-
wDir,
|
|
765
|
-
"*.js",
|
|
766
|
-
[{ old: "configPath", new: "settingsPath" }],
|
|
767
|
-
{ dryRun: true, maxFiles: 10 }
|
|
768
|
-
);
|
|
769
|
-
total += result.length;
|
|
770
|
-
return total;
|
|
771
|
-
});
|
|
772
|
-
|
|
773
|
-
workflowResults.push({
|
|
774
|
-
id: "W3", scenario: "Multi-file refactor (5 files)",
|
|
775
|
-
without, withSL,
|
|
776
|
-
opsWithout: 11, opsWith: 2,
|
|
777
|
-
});
|
|
778
|
-
try { rmSync(wDir, { recursive: true }); } catch {}
|
|
779
|
-
}
|
|
780
|
-
|
|
781
|
-
// W4: Explore large file → targeted edit
|
|
782
|
-
{
|
|
783
|
-
const largeFile = largeFiles[0] || allFiles[0];
|
|
784
|
-
const largeLines = getFileLines(largeFile);
|
|
785
|
-
if (largeLines && largeLines.length > 100) {
|
|
786
|
-
// Without: read full file → grep for method → edit
|
|
787
|
-
const { value: without } = runN(() => {
|
|
788
|
-
let total = 0;
|
|
789
|
-
total += simBuiltInReadFull(largeFile, largeLines).length;
|
|
790
|
-
total += simBuiltInGrep("function", largeFile).length;
|
|
791
|
-
// Simulate edit response
|
|
792
|
-
const origLines = [...largeLines];
|
|
793
|
-
const newLines = [...largeLines];
|
|
794
|
-
newLines[10] = newLines[10] + " // modified";
|
|
795
|
-
total += simBuiltInEdit(largeFile, origLines, newLines).length;
|
|
796
|
-
return total;
|
|
797
|
-
});
|
|
798
|
-
|
|
799
|
-
// With: outline → read range → edit with anchor
|
|
800
|
-
let outlineLen = 500;
|
|
801
|
-
try { outlineLen = (await fileOutline(largeFile)).length; } catch {}
|
|
802
|
-
const { value: withSL } = runN(() => {
|
|
803
|
-
let total = 0;
|
|
804
|
-
total += outlineLen; // outline (pre-computed, async)
|
|
805
|
-
total += readFile(largeFile, { offset: 5, limit: 30 }).length; // targeted read
|
|
806
|
-
total += simHexLineEditDiff(largeLines.slice(5, 35), [...largeLines.slice(5, 35)].map((l, i) => i === 5 ? l + " // modified" : l)).length;
|
|
807
|
-
return total;
|
|
808
|
-
});
|
|
809
|
-
|
|
810
|
-
workflowResults.push({
|
|
811
|
-
id: "W4", scenario: `Explore+edit (${largeLines.length}L file)`,
|
|
812
|
-
without, withSL,
|
|
813
|
-
opsWithout: 3, opsWith: 3,
|
|
814
|
-
});
|
|
815
|
-
}
|
|
816
|
-
}
|
|
817
|
-
|
|
818
|
-
// ===================================================================
|
|
819
|
-
// Cleanup
|
|
820
|
-
// ===================================================================
|
|
821
|
-
try { unlinkSync(tmpPath); } catch { /* ok */ }
|
|
822
|
-
|
|
823
|
-
// ===================================================================
|
|
824
|
-
// Report
|
|
825
|
-
// ===================================================================
|
|
826
|
-
const out = [];
|
|
827
|
-
out.push("# Hex-line Benchmark v3");
|
|
828
|
-
out.push("");
|
|
829
|
-
out.push(`Repository: ${repoName} (${fmt(allFiles.length)} code files, ${fmt(totalLines)} lines) `);
|
|
830
|
-
out.push(`Temp file: ${tmpPath} (200 lines) `);
|
|
831
|
-
out.push(`Date: ${new Date().toISOString().slice(0, 10)} `);
|
|
832
|
-
out.push(`Runs per scenario: ${RUNS} (median) `);
|
|
833
|
-
out.push("");
|
|
834
|
-
|
|
835
|
-
// Ops comparison: how many tool calls each scenario requires
|
|
836
|
-
const OPS = {
|
|
837
|
-
"Read full (<50L)": { without: 1, with: 1 },
|
|
838
|
-
"Read full (50-200L)": { without: 1, with: 1 },
|
|
839
|
-
"Read full (200-500L)": { without: 1, with: 1 },
|
|
840
|
-
"Read full (500L+)": { without: 1, with: 1 },
|
|
841
|
-
"Outline+read (200-500L)": { without: 1, with: 2 },
|
|
842
|
-
"Outline+read (500L+)": { without: 1, with: 2 },
|
|
843
|
-
"Grep search": { without: 1, with: 1 },
|
|
844
|
-
"Directory tree": { without: 1, with: 1 },
|
|
845
|
-
"File info": { without: 1, with: 1 },
|
|
846
|
-
"Create file (200L)": { without: 1, with: 1 },
|
|
847
|
-
"Edit x5 sequential": { without: 5, with: 5 },
|
|
848
|
-
"Verify checksums (4 ranges)": { without: 4, with: 1 },
|
|
849
|
-
"Multi-file read": { without: 2, with: 1 },
|
|
850
|
-
"bulk_replace dry_run (5 files)": { without: 5, with: 1 },
|
|
851
|
-
"Changes (semantic diff)": { without: 1, with: 1 },
|
|
852
|
-
"FILE_NOT_FOUND recovery*": { without: 3, with: 1 },
|
|
853
|
-
"Hash mismatch recovery*": { without: 3, with: 1 },
|
|
854
|
-
"Bash redirects (cat+ls+stat)": { without: 3, with: 3 },
|
|
855
|
-
"HASH_HINT multi-match recovery*": { without: 3, with: 2 },
|
|
856
|
-
};
|
|
857
|
-
|
|
858
|
-
const STEPS = {
|
|
859
|
-
"Read full (<50L)": { without: 1, with: 1 },
|
|
860
|
-
"Read full (50-200L)": { without: 1, with: 1 },
|
|
861
|
-
"Read full (200-500L)": { without: 1, with: 1 },
|
|
862
|
-
"Read full (500L+)": { without: 1, with: 1 },
|
|
863
|
-
"Outline+read (200-500L)": { without: 1, with: 2 },
|
|
864
|
-
"Outline+read (500L+)": { without: 1, with: 2 },
|
|
865
|
-
"Grep search": { without: 1, with: 1 },
|
|
866
|
-
"Directory tree": { without: 1, with: 1 },
|
|
867
|
-
"File info": { without: 1, with: 1 },
|
|
868
|
-
"Create file (200L)": { without: 1, with: 1 },
|
|
869
|
-
"Edit x5 sequential": { without: 5, with: 5 },
|
|
870
|
-
"Verify checksums (4 ranges)": { without: 4, with: 1 },
|
|
871
|
-
"Multi-file read": { without: 1, with: 1 },
|
|
872
|
-
"bulk_replace dry_run (5 files)": { without: 5, with: 1 },
|
|
873
|
-
"Changes (semantic diff)": { without: 1, with: 1 },
|
|
874
|
-
"FILE_NOT_FOUND recovery": { without: 3, with: 1 },
|
|
875
|
-
"Hash mismatch recovery": { without: 3, with: 1 },
|
|
876
|
-
"Bash redirects (cat+ls+stat)": { without: 1, with: 1 },
|
|
877
|
-
"HASH_HINT multi-match recovery": { without: 3, with: 1 },
|
|
878
|
-
};
|
|
879
|
-
|
|
880
|
-
// Combined results + ops + steps table
|
|
881
|
-
out.push("## Results");
|
|
882
|
-
out.push("");
|
|
883
|
-
out.push("| # | Scenario | Baseline | Hex-line | Savings | Ops | Steps |");
|
|
884
|
-
out.push("|---|----------|----------|----------|---------|-----|-------|");
|
|
885
|
-
|
|
886
|
-
for (const r of results) {
|
|
887
|
-
if (r.num >= 16) continue; // graph rows added below
|
|
888
|
-
|
|
889
|
-
// Match OPS/STEPS keys
|
|
890
|
-
let op = OPS[r.scenario];
|
|
891
|
-
if (!op) {
|
|
892
|
-
const key = Object.keys(OPS).find(k => r.scenario.startsWith(k));
|
|
893
|
-
if (key) op = OPS[key];
|
|
894
|
-
}
|
|
895
|
-
let step = STEPS[r.scenario];
|
|
896
|
-
if (!step) {
|
|
897
|
-
const key = Object.keys(STEPS).find(k => r.scenario.startsWith(k));
|
|
898
|
-
if (key) step = STEPS[key];
|
|
899
|
-
}
|
|
900
|
-
|
|
901
|
-
const opsStr = op ? `${op.without}\u2192${op.with}` : "\u2014";
|
|
902
|
-
const stepsStr = step ? `${step.without}\u2192${step.with}` : "\u2014";
|
|
903
|
-
|
|
904
|
-
out.push(`| ${r.num} | ${r.scenario} | ${fmt(r.without)} chars | ${fmt(r.withSL)} chars | ${r.savings} | ${opsStr} | ${stepsStr} |`);
|
|
905
|
-
}
|
|
906
|
-
|
|
907
|
-
// Append graph rows into same table (if any)
|
|
908
|
-
if (graphOut.length > 0) {
|
|
909
|
-
out.push("| | **hex-line \u00b1 graph** | **No Graph** | **With Graph** | | | |");
|
|
910
|
-
out.push(...graphOut);
|
|
911
|
-
}
|
|
912
|
-
out.push("");
|
|
913
|
-
|
|
914
|
-
// Workflow scenarios table
|
|
915
|
-
if (workflowResults.length > 0) {
|
|
916
|
-
out.push("## Workflow Scenarios (multi-step)");
|
|
917
|
-
out.push("");
|
|
918
|
-
out.push("| # | Scenario | Built-in | Hex-line | Savings | Ops |");
|
|
919
|
-
out.push("|---|----------|----------|----------|---------|-----|");
|
|
920
|
-
for (const w of workflowResults) {
|
|
921
|
-
out.push(`| ${w.id} | ${w.scenario} | ${fmt(w.without)} chars | ${fmt(w.withSL)} chars | ${pctSavings(w.without, w.withSL)} | ${w.opsWithout}\u2192${w.opsWith} |`);
|
|
922
|
-
}
|
|
923
|
-
out.push("");
|
|
924
|
-
}
|
|
925
|
-
|
|
926
|
-
// Verdict
|
|
927
|
-
out.push("## Verdict");
|
|
928
|
-
out.push("");
|
|
929
|
-
|
|
930
|
-
const readResults = results.filter(r => r.num === 1);
|
|
931
|
-
const outlineResults = results.filter(r => r.num === 2);
|
|
932
|
-
const editResult = results.find(r => r.num === 7);
|
|
933
|
-
const verifyResult = results.find(r => r.num === 8);
|
|
934
|
-
const treeResult = results.find(r => r.num === 4);
|
|
935
|
-
const batchResult = results.find(r => r.num === 9);
|
|
936
|
-
const bulkResult = results.find(r => r.num === 10);
|
|
937
|
-
const changesResult = results.find(r => r.num === 11);
|
|
938
|
-
const notFoundResult = results.find(r => r.num === 12);
|
|
939
|
-
const mismatchResult = results.find(r => r.num === 13);
|
|
940
|
-
const bashResult = results.find(r => r.num === 14);
|
|
941
|
-
|
|
942
|
-
const coreResults = results.filter(r => r.num < 16);
|
|
943
|
-
const allSavingsNums = coreResults.map(r => {
|
|
944
|
-
if (r.without === 0) return 0;
|
|
945
|
-
return ((r.without - r.withSL) / r.without) * 100;
|
|
946
|
-
});
|
|
947
|
-
const avgSavings = allSavingsNums.length > 0
|
|
948
|
-
? allSavingsNums.reduce((a, b) => a + b, 0) / allSavingsNums.length
|
|
949
|
-
: 0;
|
|
950
|
-
|
|
951
|
-
// Weighted average based on typical development session frequency
|
|
952
|
-
const WEIGHTS = {
|
|
953
|
-
"Read full (<50L)": 2, "Read full (50-200L)": 5, "Read full (200-500L)": 3, "Read full (500L+)": 1,
|
|
954
|
-
"Outline+read (200-500L)": 8, "Outline+read (500L+)": 8,
|
|
955
|
-
"Grep search": 5, "Directory tree": 2, "File info": 1, "Create file (200L)": 1,
|
|
956
|
-
"Edit x5 sequential": 10, "Verify checksums (4 ranges)": 8,
|
|
957
|
-
"Multi-file read": 2, "bulk_replace dry_run (5 files)": 1,
|
|
958
|
-
"Changes (semantic diff)": 3,
|
|
959
|
-
"FILE_NOT_FOUND recovery": 2, "Hash mismatch recovery": 3,
|
|
960
|
-
"Bash redirects (cat+ls+stat)": 3, "HASH_HINT multi-match recovery": 2,
|
|
961
|
-
};
|
|
962
|
-
let wSum = 0, wTotal = 0;
|
|
963
|
-
for (const r of coreResults) {
|
|
964
|
-
const w = WEIGHTS[r.scenario] || 1;
|
|
965
|
-
const sav = r.without === 0 ? 0 : ((r.without - r.withSL) / r.without) * 100;
|
|
966
|
-
wSum += w * sav;
|
|
967
|
-
wTotal += w;
|
|
968
|
-
}
|
|
969
|
-
const weightedAvg = wTotal > 0 ? wSum / wTotal : 0;
|
|
970
|
-
|
|
971
|
-
// Ops/Steps totals for core scenarios
|
|
972
|
-
const totalOpsWithout = coreResults.reduce((s, r) => {
|
|
973
|
-
let op = OPS[r.scenario];
|
|
974
|
-
if (!op) { const key = Object.keys(OPS).find(k => r.scenario.startsWith(k)); if (key) op = OPS[key]; }
|
|
975
|
-
return s + (op ? op.without : 1);
|
|
976
|
-
}, 0);
|
|
977
|
-
const totalOpsWith = coreResults.reduce((s, r) => {
|
|
978
|
-
let op = OPS[r.scenario];
|
|
979
|
-
if (!op) { const key = Object.keys(OPS).find(k => r.scenario.startsWith(k)); if (key) op = OPS[key]; }
|
|
980
|
-
return s + (op ? op.with : 1);
|
|
981
|
-
}, 0);
|
|
982
|
-
const totalStepsWithout = coreResults.reduce((s, r) => {
|
|
983
|
-
let step = STEPS[r.scenario];
|
|
984
|
-
if (!step) { const key = Object.keys(STEPS).find(k => r.scenario.startsWith(k)); if (key) step = STEPS[key]; }
|
|
985
|
-
return s + (step ? step.without : 1);
|
|
986
|
-
}, 0);
|
|
987
|
-
const totalStepsWith = coreResults.reduce((s, r) => {
|
|
988
|
-
let step = STEPS[r.scenario];
|
|
989
|
-
if (!step) { const key = Object.keys(STEPS).find(k => r.scenario.startsWith(k)); if (key) step = STEPS[key]; }
|
|
990
|
-
return s + (step ? step.with : 1);
|
|
991
|
-
}, 0);
|
|
992
|
-
const opsPct = totalOpsWithout > 0 ? ((totalOpsWithout - totalOpsWith) / totalOpsWithout * 100).toFixed(0) : 0;
|
|
993
|
-
|
|
994
|
-
// Read verdict
|
|
995
|
-
const readVerdict = [];
|
|
996
|
-
const smallRead = readResults.find(r => r.scenario.includes("<50L"));
|
|
997
|
-
const xlRead = readResults.find(r => r.scenario.includes("500L+"));
|
|
998
|
-
if (smallRead) {
|
|
999
|
-
const pct = Math.abs(((smallRead.without - smallRead.withSL) / smallRead.without * 100)).toFixed(0);
|
|
1000
|
-
const verb = smallRead.withSL <= smallRead.without ? "saves" : "costs";
|
|
1001
|
-
readVerdict.push(`Small files (<50L): hash annotations ${verb} ~${pct}%.`);
|
|
1002
|
-
}
|
|
1003
|
-
if (xlRead) {
|
|
1004
|
-
const pct = Math.abs(((xlRead.without - xlRead.withSL) / xlRead.without * 100)).toFixed(0);
|
|
1005
|
-
const verb = xlRead.withSL <= xlRead.without ? "saves" : "costs";
|
|
1006
|
-
readVerdict.push(`Large files (500L+): full read ${verb} ~${pct}%.`);
|
|
1007
|
-
}
|
|
1008
|
-
|
|
1009
|
-
out.push("**Read:**");
|
|
1010
|
-
for (const v of readVerdict) out.push(`- ${v}`);
|
|
1011
|
-
if (outlineResults.length > 0) {
|
|
1012
|
-
const best = outlineResults.reduce((a, b) =>
|
|
1013
|
-
((a.without - a.withSL) / a.without) > ((b.without - b.withSL) / b.without) ? a : b
|
|
1014
|
-
);
|
|
1015
|
-
const savPct = ((best.without - best.withSL) / best.without * 100).toFixed(0);
|
|
1016
|
-
out.push(`- Outline+targeted read saves ${savPct}% on large files vs full read.`);
|
|
1017
|
-
}
|
|
1018
|
-
out.push("");
|
|
1019
|
-
|
|
1020
|
-
if (editResult) {
|
|
1021
|
-
const editSav = ((editResult.without - editResult.withSL) / editResult.without * 100).toFixed(0);
|
|
1022
|
-
out.push(`**Edit:** Compact diff output saves ${editSav}% vs old_string/new_string context blocks (5 edits).`);
|
|
1023
|
-
out.push("");
|
|
1024
|
-
}
|
|
1025
|
-
|
|
1026
|
-
if (verifyResult) {
|
|
1027
|
-
const verifySav = ((verifyResult.without - verifyResult.withSL) / verifyResult.without * 100).toFixed(0);
|
|
1028
|
-
out.push(`**Verify:** Checksum verification saves ${verifySav}% vs full re-read for staleness check.`);
|
|
1029
|
-
out.push("");
|
|
1030
|
-
}
|
|
1031
|
-
|
|
1032
|
-
if (treeResult) {
|
|
1033
|
-
const pct = Math.abs(((treeResult.without - treeResult.withSL) / treeResult.without * 100)).toFixed(0);
|
|
1034
|
-
const verb = treeResult.withSL <= treeResult.without ? "saves" : "costs";
|
|
1035
|
-
out.push(`**Tree:** Compact directory tree ${verb} ${pct}% vs \`ls -laR\`.`);
|
|
1036
|
-
out.push("");
|
|
1037
|
-
}
|
|
1038
|
-
|
|
1039
|
-
if (batchResult) {
|
|
1040
|
-
const batchSav = ((batchResult.without - batchResult.withSL) / batchResult.without * 100).toFixed(0);
|
|
1041
|
-
out.push(`**Batch read:** Multi-file read saves ${batchSav}% vs separate Read calls (${batchResult.scenario.match(/\d+ files/)?.[0] || 'N files'}).`);
|
|
1042
|
-
out.push("");
|
|
1043
|
-
}
|
|
1044
|
-
|
|
1045
|
-
if (bulkResult) {
|
|
1046
|
-
const bulkSav = ((bulkResult.without - bulkResult.withSL) / bulkResult.without * 100).toFixed(0);
|
|
1047
|
-
out.push(`**Bulk replace:** Single bulk_replace saves ${bulkSav}% vs 5 separate edit_file calls.`);
|
|
1048
|
-
out.push("");
|
|
1049
|
-
}
|
|
1050
|
-
|
|
1051
|
-
if (changesResult) {
|
|
1052
|
-
const changesSav = ((changesResult.without - changesResult.withSL) / changesResult.without * 100).toFixed(0);
|
|
1053
|
-
out.push(`**Changes:** Semantic diff summary saves ${changesSav}% vs raw unified diff output.`);
|
|
1054
|
-
out.push("");
|
|
1055
|
-
}
|
|
1056
|
-
|
|
1057
|
-
if (notFoundResult) {
|
|
1058
|
-
const notFoundSav = ((notFoundResult.without - notFoundResult.withSL) / notFoundResult.without * 100).toFixed(0);
|
|
1059
|
-
out.push(`**Error recovery (FILE_NOT_FOUND):** Inline dir listing saves ${notFoundSav}% vs 3 round-trips.`);
|
|
1060
|
-
out.push("");
|
|
1061
|
-
}
|
|
1062
|
-
|
|
1063
|
-
if (mismatchResult) {
|
|
1064
|
-
const mismatchSav = ((mismatchResult.without - mismatchResult.withSL) / mismatchResult.without * 100).toFixed(0);
|
|
1065
|
-
out.push(`**Error recovery (hash mismatch):** Fresh snippet saves ${mismatchSav}% vs full re-read + retry.`);
|
|
1066
|
-
out.push("");
|
|
1067
|
-
}
|
|
1068
|
-
|
|
1069
|
-
if (bashResult) {
|
|
1070
|
-
const bashSav = ((bashResult.without - bashResult.withSL) / bashResult.without * 100).toFixed(0);
|
|
1071
|
-
const verb = bashResult.withSL <= bashResult.without ? "saves" : "costs";
|
|
1072
|
-
out.push(`**Bash redirects:** Structured hex-line output ${verb} ${bashSav}% vs cat+ls+stat combined.`);
|
|
1073
|
-
out.push("");
|
|
1074
|
-
}
|
|
1075
|
-
|
|
1076
|
-
// Break-even
|
|
1077
|
-
out.push("## Break-even");
|
|
1078
|
-
out.push("");
|
|
1079
|
-
if (outlineResults.length > 0) {
|
|
1080
|
-
out.push("- **Outline workflow** breaks even at ~50 lines. Above that, savings grow linearly.");
|
|
1081
|
-
}
|
|
1082
|
-
if (verifyResult && verifyResult.withSL < verifyResult.without) {
|
|
1083
|
-
const ratio = (verifyResult.without / verifyResult.withSL).toFixed(0);
|
|
1084
|
-
out.push(`- **Verify** is ${ratio}x cheaper than re-reading. Pays for hash overhead after first staleness check.`);
|
|
1085
|
-
}
|
|
1086
|
-
if (editResult && editResult.withSL < editResult.without) {
|
|
1087
|
-
out.push("- **Edit** compact diff is always cheaper than old_string/new_string blocks.");
|
|
1088
|
-
}
|
|
1089
|
-
if (notFoundResult && notFoundResult.withSL < notFoundResult.without) {
|
|
1090
|
-
const ratio = (notFoundResult.without / notFoundResult.withSL).toFixed(0);
|
|
1091
|
-
out.push(`- **Error recovery** eliminates round-trips: ${ratio}x cheaper for FILE_NOT_FOUND.`);
|
|
1092
|
-
}
|
|
1093
|
-
if (mismatchResult && mismatchResult.withSL < mismatchResult.without) {
|
|
1094
|
-
const ratio = (mismatchResult.without / mismatchResult.withSL).toFixed(0);
|
|
1095
|
-
out.push(`- **Hash mismatch** recovery with fresh snippet is ${ratio}x cheaper than full re-read + retry.`);
|
|
1096
|
-
}
|
|
1097
|
-
if (changesResult && changesResult.withSL < changesResult.without) {
|
|
1098
|
-
out.push("- **Semantic diff** always cheaper than raw unified diff for understanding changes.");
|
|
1099
|
-
}
|
|
1100
|
-
out.push(`- **Average:** ${avgSavings.toFixed(0)}% tokens (flat) / ${weightedAvg.toFixed(0)}% (weighted) | ${totalOpsWithout}\u2192${totalOpsWith} ops (${opsPct}% fewer) | ${totalStepsWithout}\u2192${totalStepsWith} steps`);
|
|
1101
|
-
out.push("");
|
|
1102
|
-
|
|
1103
|
-
console.log(out.join("\n"));
|
|
1104
|
-
}
|
|
1105
|
-
|
|
1106
|
-
main();
|