@colbymchenry/codegraph 0.7.9 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -49
- package/dist/bin/codegraph.js +47 -20
- package/dist/bin/codegraph.js.map +1 -1
- package/dist/bin/node-version-check.d.ts +3 -0
- package/dist/bin/node-version-check.d.ts.map +1 -1
- package/dist/bin/node-version-check.js +5 -2
- package/dist/bin/node-version-check.js.map +1 -1
- package/dist/context/index.d.ts.map +1 -1
- package/dist/context/index.js +4 -2
- package/dist/context/index.js.map +1 -1
- package/dist/db/queries.d.ts.map +1 -1
- package/dist/db/queries.js +7 -1
- package/dist/db/queries.js.map +1 -1
- package/dist/extraction/index.d.ts.map +1 -1
- package/dist/extraction/index.js +63 -37
- package/dist/extraction/index.js.map +1 -1
- package/dist/installer/config-writer.d.ts.map +1 -1
- package/dist/installer/config-writer.js +3 -1
- package/dist/installer/config-writer.js.map +1 -1
- package/dist/installer/index.d.ts +12 -0
- package/dist/installer/index.d.ts.map +1 -1
- package/dist/installer/index.js +74 -5
- package/dist/installer/index.js.map +1 -1
- package/dist/installer/instructions-template.d.ts +2 -2
- package/dist/installer/instructions-template.d.ts.map +1 -1
- package/dist/installer/instructions-template.js +3 -2
- package/dist/installer/instructions-template.js.map +1 -1
- package/dist/installer/targets/claude.d.ts +10 -6
- package/dist/installer/targets/claude.d.ts.map +1 -1
- package/dist/installer/targets/claude.js +72 -10
- package/dist/installer/targets/claude.js.map +1 -1
- package/dist/mcp/index.d.ts +12 -0
- package/dist/mcp/index.d.ts.map +1 -1
- package/dist/mcp/index.js +143 -20
- package/dist/mcp/index.js.map +1 -1
- package/dist/mcp/server-instructions.d.ts +1 -1
- package/dist/mcp/server-instructions.d.ts.map +1 -1
- package/dist/mcp/server-instructions.js +14 -2
- package/dist/mcp/server-instructions.js.map +1 -1
- package/dist/mcp/tools.d.ts +75 -5
- package/dist/mcp/tools.d.ts.map +1 -1
- package/dist/mcp/tools.js +470 -87
- package/dist/mcp/tools.js.map +1 -1
- package/dist/mcp/transport.d.ts +17 -0
- package/dist/mcp/transport.d.ts.map +1 -1
- package/dist/mcp/transport.js +63 -0
- package/dist/mcp/transport.js.map +1 -1
- package/dist/resolution/frameworks/index.d.ts +1 -0
- package/dist/resolution/frameworks/index.d.ts.map +1 -1
- package/dist/resolution/frameworks/index.js +5 -1
- package/dist/resolution/frameworks/index.js.map +1 -1
- package/dist/resolution/frameworks/nestjs.d.ts +26 -0
- package/dist/resolution/frameworks/nestjs.d.ts.map +1 -0
- package/dist/resolution/frameworks/nestjs.js +374 -0
- package/dist/resolution/frameworks/nestjs.js.map +1 -0
- package/dist/search/query-utils.d.ts.map +1 -1
- package/dist/search/query-utils.js +29 -26
- package/dist/search/query-utils.js.map +1 -1
- package/dist/sync/git-hooks.d.ts +45 -0
- package/dist/sync/git-hooks.d.ts.map +1 -0
- package/dist/sync/git-hooks.js +223 -0
- package/dist/sync/git-hooks.js.map +1 -0
- package/dist/sync/index.d.ts +4 -0
- package/dist/sync/index.d.ts.map +1 -1
- package/dist/sync/index.js +12 -1
- package/dist/sync/index.js.map +1 -1
- package/dist/sync/watch-policy.d.ts +48 -0
- package/dist/sync/watch-policy.d.ts.map +1 -0
- package/dist/sync/watch-policy.js +124 -0
- package/dist/sync/watch-policy.js.map +1 -0
- package/dist/sync/watcher.d.ts.map +1 -1
- package/dist/sync/watcher.js +10 -0
- package/dist/sync/watcher.js.map +1 -1
- package/dist/ui/glyphs.d.ts +42 -0
- package/dist/ui/glyphs.d.ts.map +1 -0
- package/dist/ui/glyphs.js +78 -0
- package/dist/ui/glyphs.js.map +1 -0
- package/dist/ui/shimmer-worker.js +17 -11
- package/dist/ui/shimmer-worker.js.map +1 -1
- package/package.json +3 -3
- package/scripts/agent-eval/audit.sh +68 -0
- package/scripts/agent-eval/itrun.sh +107 -0
- package/scripts/agent-eval/parse-run.mjs +45 -0
- package/scripts/agent-eval/parse-session.mjs +93 -0
- package/scripts/agent-eval/run-agent.sh +34 -0
- package/scripts/agent-eval/run-all.sh +67 -0
- package/scripts/extract-release-notes.mjs +130 -0
- package/scripts/release.sh +5 -7
|
@@ -2,15 +2,21 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
const worker_threads_1 = require("worker_threads");
|
|
4
4
|
const fs_1 = require("fs");
|
|
5
|
+
const glyphs_1 = require("./glyphs");
|
|
5
6
|
// Write directly to fd 1 (stdout) instead of writeStdout().
|
|
6
7
|
// In Node.js worker threads, process.stdout is proxied through the main
|
|
7
8
|
// thread's event loop — so if the main thread is blocked (e.g. SQLite),
|
|
8
9
|
// stdout writes from the worker queue up and the animation freezes.
|
|
9
10
|
// fs.writeSync(1, ...) is a direct kernel syscall that bypasses this.
|
|
11
|
+
//
|
|
12
|
+
// Side effect: bypasses Node's TTY-aware encoding conversion on Windows,
|
|
13
|
+
// so UTF-8 bytes hit the console raw and mojibake on OEM codepages.
|
|
14
|
+
// `getGlyphs()` returns ASCII fallbacks on Windows to avoid this (#168).
|
|
10
15
|
function writeStdout(s) {
|
|
11
16
|
(0, fs_1.writeSync)(1, s);
|
|
12
17
|
}
|
|
13
|
-
const
|
|
18
|
+
const G = (0, glyphs_1.getGlyphs)();
|
|
19
|
+
const SPINNER_GLYPHS = G.spinner;
|
|
14
20
|
const ANIM_INTERVAL = 150;
|
|
15
21
|
const FRAMES_PER_GLYPH = 3;
|
|
16
22
|
const RST = '\x1b[0m';
|
|
@@ -36,7 +42,7 @@ function formatNumber(n) {
|
|
|
36
42
|
}
|
|
37
43
|
function renderBar(frame, filled, empty) {
|
|
38
44
|
if (filled === 0)
|
|
39
|
-
return `${DM}${
|
|
45
|
+
return `${DM}${G.barEmpty.repeat(empty)}${RST}`;
|
|
40
46
|
const cycleFrames = 24;
|
|
41
47
|
const shimmerPos = ((frame % cycleFrames) / cycleFrames) * (filled + 6) - 3;
|
|
42
48
|
const shimmerWidth = 3;
|
|
@@ -47,9 +53,9 @@ function renderBar(frame, filled, empty) {
|
|
|
47
53
|
const r = lerp(160, 251, t);
|
|
48
54
|
const g = lerp(100, 191, t);
|
|
49
55
|
const b = lerp(9, 36, t);
|
|
50
|
-
bar += `\x1b[38;2;${r};${g};${b}m${BOLD}
|
|
56
|
+
bar += `\x1b[38;2;${r};${g};${b}m${BOLD}${G.barFilled}`;
|
|
51
57
|
}
|
|
52
|
-
bar += `${RST}${DM}${
|
|
58
|
+
bar += `${RST}${DM}${G.barEmpty.repeat(empty)}${RST}`;
|
|
53
59
|
return bar;
|
|
54
60
|
}
|
|
55
61
|
// Mutable state
|
|
@@ -61,20 +67,20 @@ function render() {
|
|
|
61
67
|
return;
|
|
62
68
|
const frame = animFrame();
|
|
63
69
|
const glyphIdx = Math.floor(frame / FRAMES_PER_GLYPH) % SPINNER_GLYPHS.length;
|
|
64
|
-
const glyph = SPINNER_GLYPHS[glyphIdx] ?? '
|
|
70
|
+
const glyph = SPINNER_GLYPHS[glyphIdx] ?? SPINNER_GLYPHS[0] ?? '.';
|
|
65
71
|
const color = shimmerColor(frame);
|
|
66
72
|
let line;
|
|
67
73
|
if (currentPercent >= 0) {
|
|
68
74
|
const barWidth = 25;
|
|
69
75
|
const filled = Math.round(barWidth * currentPercent / 100);
|
|
70
76
|
const empty = barWidth - filled;
|
|
71
|
-
line = `${DM}
|
|
77
|
+
line = `${DM}${G.rail}${RST} ${color}${glyph}${RST} ${currentMessage} ${renderBar(frame, filled, empty)} ${currentPercent}%`;
|
|
72
78
|
}
|
|
73
79
|
else if (currentCount > 0) {
|
|
74
|
-
line = `${DM}
|
|
80
|
+
line = `${DM}${G.rail}${RST} ${color}${glyph}${RST} ${currentMessage}... ${formatNumber(currentCount)} found`;
|
|
75
81
|
}
|
|
76
82
|
else {
|
|
77
|
-
line = `${DM}
|
|
83
|
+
line = `${DM}${G.rail}${RST} ${color}${glyph}${RST} ${currentMessage}...`;
|
|
78
84
|
}
|
|
79
85
|
writeStdout(`\r\x1b[K${line}`);
|
|
80
86
|
}
|
|
@@ -84,10 +90,10 @@ function finishPhase() {
|
|
|
84
90
|
writeStdout(`\r\x1b[K`);
|
|
85
91
|
let detail = '';
|
|
86
92
|
if (currentPercent >= 0)
|
|
87
|
-
detail =
|
|
93
|
+
detail = ` ${G.dash} done`;
|
|
88
94
|
else if (currentCount > 0)
|
|
89
|
-
detail = `
|
|
90
|
-
writeStdout(`${DM}
|
|
95
|
+
detail = ` ${G.dash} ${formatNumber(currentCount)} found`;
|
|
96
|
+
writeStdout(`${DM}${G.rail}${RST} ${GRN}${G.phaseDone}${RST} ${currentMessage}${detail}\n`);
|
|
91
97
|
currentMessage = '';
|
|
92
98
|
currentPercent = -1;
|
|
93
99
|
currentCount = 0;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"shimmer-worker.js","sourceRoot":"","sources":["../../src/ui/shimmer-worker.ts"],"names":[],"mappings":";;AAAA,mDAAwD;AACxD,2BAA+B;
|
|
1
|
+
{"version":3,"file":"shimmer-worker.js","sourceRoot":"","sources":["../../src/ui/shimmer-worker.ts"],"names":[],"mappings":";;AAAA,mDAAwD;AACxD,2BAA+B;AAC/B,qCAAqC;AAGrC,4DAA4D;AAC5D,wEAAwE;AACxE,wEAAwE;AACxE,oEAAoE;AACpE,sEAAsE;AACtE,EAAE;AACF,yEAAyE;AACzE,oEAAoE;AACpE,yEAAyE;AACzE,SAAS,WAAW,CAAC,CAAS;IAC5B,IAAA,cAAS,EAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,CAAC,GAAG,IAAA,kBAAS,GAAE,CAAC;AACtB,MAAM,cAAc,GAAG,CAAC,CAAC,OAAO,CAAC;AACjC,MAAM,aAAa,GAAG,GAAG,CAAC;AAC1B,MAAM,gBAAgB,GAAG,CAAC,CAAC;AAE3B,MAAM,GAAG,GAAG,SAAS,CAAC;AACtB,MAAM,EAAE,GAAG,SAAS,CAAC;AACrB,MAAM,GAAG,GAAG,UAAU,CAAC;AACvB,MAAM,IAAI,GAAG,SAAS,CAAC;AAEvB,MAAM,SAAS,GAAW,2BAAU,CAAC,SAAS,CAAC;AAE/C,SAAS,SAAS;IAChB,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,GAAG,aAAa,CAAC,CAAC;AAC9D,CAAC;AAED,SAAS,IAAI,CAAC,CAAS,EAAE,CAAS,EAAE,CAAS;IAC3C,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;AACrC,CAAC;AAED,SAAS,YAAY,CAAC,KAAa;IACjC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,GAAG,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;IACvD,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC;IAC5B,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC;IAC5B,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;IACzB,OAAO,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;AAC5C,CAAC;AAED,SAAS,YAAY,CAAC,CAAS;IAC7B,OAAO,CAAC,CAAC,cAAc,EAAE,CAAC;AAC5B,CAAC;AAED,SAAS,SAAS,CAAC,KAAa,EAAE,MAAc,EAAE,KAAa;IAC7D,IAAI,MAAM,KAAK,CAAC;QAAE,OAAO,GAAG,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,GAAG,EAAE,CAAC;IAClE,MAAM,WAAW,GAAG,EAAE,CAAC;IACvB,MAAM,UAAU,GAAG,CAAC,CAAC,KAAK,GAAG,WAAW,CAAC,GAAG,WAAW,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;IAC5E,MAAM,YAAY,GAAG,CAAC,CAAC;IACvB,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAChC,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC;QACtC,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,GAAG,YAAY,CAAC,CAAC;QAC/C,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;QACzB,GAAG,IAAI,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,GAAG,CAAC,CAAC,SAAS,EAAE,CAAC;IAC1D,CAAC;IACD,GAAG,IAAI,GAAG,GAAG,GAAG,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,GAAG,EAAE,CAAC;IACtD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,gBAAgB;AAChB,IAAI,cAAc,GAAG,EAAE,CAAC;AACxB,IAAI,cAAc,GAAG,CAAC,CAAC,CAAC;AACxB,IAAI,YAAY,GAAG,CAAC,CAAC;AAErB,SAAS,MAAM;IACb,IAAI,CAAC,cAAc;QAAE,OAAO;IAC5B,MAAM,KAAK,GAAG,SAAS,EAAE,CAAC;IAC1B,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,gBAAgB,CAAC,GAAG,cAAc,CAAC,MAAM,CAAC;IAC9E,MAAM,KAAK,GAAG,cAAc,CAAC,QAAQ,CAAC,IAAI,cAAc,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC;IACnE,MAAM,KAAK,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;IAElC,IAAI,IAAY,CAAC;IACjB,IAAI,cAAc,IAAI,CAAC,EAAE,CAAC;QACxB,MAAM,QAAQ,GAAG,EAAE,CAAC;QACpB,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,cAAc,GAAG,GAAG,CAAC,CAAC;QAC3D,MAAM,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;QAChC,IAAI,GAAG,GAAG,EAAE,GAAG,CAAC,CAAC,IAAI,GAAG,GAAG,KAAK,KAAK,GAAG,KAAK,GAAG,GAAG,IAAI,cAAc,KAAK,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,CAAC,KAAK,cAAc,GAAG,CAAC;IAClI,CAAC;SAAM,IAAI,YAAY,GAAG,CAAC,EAAE,CAAC;QAC5B,IAAI,GAAG,GAAG,EAAE,GAAG,CAAC,CAAC,IAAI,GAAG,GAAG,KAAK,KAAK,GAAG,KAAK,GAAG,GAAG,IAAI,cAAc,OAAO,YAAY,CAAC,YAAY,CAAC,QAAQ,CAAC;IACjH,CAAC;SAAM,CAAC;QACN,IAAI,GAAG,GAAG,EAAE,GAAG,CAAC,CAAC,IAAI,GAAG,GAAG,KAAK,KAAK,GAAG,KAAK,GAAG,GAAG,IAAI,cAAc,KAAK,CAAC;IAC7E,CAAC;IAED,WAAW,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC;AACjC,CAAC;AAED,SAAS,WAAW;IAClB,IAAI,CAAC,cAAc;QAAE,OAAO;IAC5B,WAAW,CAAC,UAAU,CAAC,CAAC;IACxB,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,IAAI,cAAc,IAAI,CAAC;QAAE,MAAM,GAAG,IAAI,CAAC,CAAC,IAAI,OAAO,CAAC;SAC/C,IAAI,YAAY,GAAG,CAAC;QAAE,MAAM,GAAG,IAAI,CAAC,CAAC,IAAI,IAAI,YAAY,CAAC,YAAY,CAAC,QAAQ,CAAC;IACrF,WAAW,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,IAAI,GAAG,GAAG,KAAK,GAAG,GAAG,CAAC,CAAC,SAAS,GAAG,GAAG,IAAI,cAAc,GAAG,MAAM,IAAI,CAAC,CAAC;IAC7F,cAAc,GAAG,EAAE,CAAC;IACpB,cAAc,GAAG,CAAC,CAAC,CAAC;IACpB,YAAY,GAAG,CAAC,CAAC;AACnB,CAAC;AAED,2CAA2C;AAC3C,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;AAE7C,2BAAW,CAAC,EAAE,CAAC,SAAS,EAAE,CAAC,GAAyB,EAAE,EAAE;IACtD,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;QAC1B,cAAc,GAAG,GAAG,CAAC,SAAS,CAAC;QAC/B,cAAc,GAAG,GAAG,CAAC,OAAO,CAAC;QAC7B,YAAY,GAAG,GAAG,CAAC,KAAK,CAAC;IAC3B,CAAC;SAAM,IAAI,GAAG,CAAC,IAAI,KAAK,cAAc,EAAE,CAAC;QACvC,WAAW,EAAE,CAAC;IAChB,CAAC;SAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;QAC/B,aAAa,CAAC,YAAY,CAAC,CAAC;QAC5B,WAAW,EAAE,CAAC;QACd,2BAAW,CAAC,WAAW,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC,CAAC;IAC/C,CAAC;AACH,CAAC,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@colbymchenry/codegraph",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.8.0",
|
|
4
4
|
"description": "Supercharge Claude Code with semantic code intelligence. 94% fewer tool calls • 77% faster exploration • 100% local.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -51,9 +51,9 @@
|
|
|
51
51
|
"vitest": "^2.1.9"
|
|
52
52
|
},
|
|
53
53
|
"optionalDependencies": {
|
|
54
|
-
"better-sqlite3": "^
|
|
54
|
+
"better-sqlite3": "^12.4.1"
|
|
55
55
|
},
|
|
56
56
|
"engines": {
|
|
57
|
-
"node": ">=
|
|
57
|
+
"node": ">=20.0.0 <25.0.0"
|
|
58
58
|
}
|
|
59
59
|
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# One-shot CodeGraph quality audit:
|
|
3
|
+
# set version -> ensure corpus repo -> wipe+reindex with that version ->
|
|
4
|
+
# run with/without A/B -> restore the local dev link.
|
|
5
|
+
#
|
|
6
|
+
# Usage: audit.sh <version> <repo-name> <repo-url> "<question>" [headless|all]
|
|
7
|
+
# <version> "local" (build + npm link this repo) | "latest" | a version (e.g. 0.7.10)
|
|
8
|
+
# <repo-name> dir name under the corpus dir
|
|
9
|
+
# <repo-url> git URL (cloned --depth 1 when the repo dir is missing)
|
|
10
|
+
# [mode] headless (default) | all (also the interactive tmux arms)
|
|
11
|
+
# Env: CORPUS corpus dir (default: /tmp/codegraph-corpus)
|
|
12
|
+
set -uo pipefail
|
|
13
|
+
|
|
14
|
+
VERSION="${1:?usage: audit.sh <version> <repo-name> <repo-url> \"<question>\" [mode]}"
|
|
15
|
+
NAME="${2:?repo-name required}"
|
|
16
|
+
URL="${3:?repo-url required}"
|
|
17
|
+
Q="${4:?question required}"
|
|
18
|
+
MODE="${5:-headless}"
|
|
19
|
+
|
|
20
|
+
HARNESS="$(cd "$(dirname "$0")" && pwd)"
|
|
21
|
+
REPO_ROOT="$(cd "$HARNESS/../.." && pwd)" # codegraph repo root
|
|
22
|
+
CORPUS="${CORPUS:-/tmp/codegraph-corpus}"
|
|
23
|
+
REPO="$CORPUS/$NAME"
|
|
24
|
+
PKG="@colbymchenry/codegraph"
|
|
25
|
+
|
|
26
|
+
echo "==================== CodeGraph audit ===================="
|
|
27
|
+
echo "version=$VERSION repo=$NAME mode=$MODE corpus=$CORPUS"
|
|
28
|
+
echo
|
|
29
|
+
|
|
30
|
+
# 1. Set the codegraph version under test (mutates the global install).
|
|
31
|
+
if [ "$VERSION" = local ]; then
|
|
32
|
+
echo "→ [1/4] building + linking local dev build (local-install.sh)"
|
|
33
|
+
( cd "$REPO_ROOT" && ./scripts/local-install.sh ) || { echo "local-install.sh failed"; exit 1; }
|
|
34
|
+
else
|
|
35
|
+
echo "→ [1/4] installing $PKG@$VERSION globally"
|
|
36
|
+
npm install -g "$PKG@$VERSION" || { echo "npm install -g $PKG@$VERSION failed"; exit 1; }
|
|
37
|
+
fi
|
|
38
|
+
ACTUAL="$(codegraph --version 2>/dev/null || echo '?')"
|
|
39
|
+
echo " codegraph on PATH: $(command -v codegraph) -> $ACTUAL"
|
|
40
|
+
|
|
41
|
+
# 2. Ensure the corpus repo exists (clone shallow if missing, reuse if present).
|
|
42
|
+
mkdir -p "$CORPUS"
|
|
43
|
+
if [ -d "$REPO/.git" ]; then
|
|
44
|
+
echo "→ [2/4] reusing existing checkout: $REPO"
|
|
45
|
+
else
|
|
46
|
+
echo "→ [2/4] cloning $URL"
|
|
47
|
+
git clone --depth 1 "$URL" "$REPO" || { echo "git clone failed"; exit 1; }
|
|
48
|
+
fi
|
|
49
|
+
|
|
50
|
+
# 3. Wipe + re-index with THIS version (the index must be built by the same
|
|
51
|
+
# binary that serves it — different versions extract differently).
|
|
52
|
+
echo "→ [3/4] wiping .codegraph and re-indexing with $ACTUAL"
|
|
53
|
+
rm -rf "$REPO/.codegraph"
|
|
54
|
+
( cd "$REPO" && codegraph init -i ) || { echo "indexing failed"; exit 1; }
|
|
55
|
+
|
|
56
|
+
# 4. Run the with/without A/B.
|
|
57
|
+
echo "→ [4/4] running A/B harness (mode=$MODE)"
|
|
58
|
+
bash "$HARNESS/run-all.sh" "$REPO" "$Q" "$MODE"
|
|
59
|
+
|
|
60
|
+
# Restore the dev link (the normal working state in this repo).
|
|
61
|
+
echo
|
|
62
|
+
echo "→ restoring local dev link (local-install.sh)"
|
|
63
|
+
if ( cd "$REPO_ROOT" && ./scripts/local-install.sh >/dev/null 2>&1 ); then
|
|
64
|
+
echo " global codegraph restored to dev build"
|
|
65
|
+
else
|
|
66
|
+
echo " WARN: restore failed — run ./scripts/local-install.sh manually"
|
|
67
|
+
fi
|
|
68
|
+
echo "==================== audit complete ===================="
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Drive an INTERACTIVE Claude Code session in tmux, send a prompt, wait for the
|
|
3
|
+
# agent to finish, then print the tool-call breakdown from the session logs.
|
|
4
|
+
#
|
|
5
|
+
# Why interactive (not `claude -p`): headless print-mode picks the
|
|
6
|
+
# general-purpose subagent, while real interactive sessions delegate to the
|
|
7
|
+
# Explore subagent (or drive codegraph from the main thread). Only the
|
|
8
|
+
# interactive TUI reproduces the behavior users actually see. (Idle-detection
|
|
9
|
+
# technique borrowed from devpit's WaitForIdle.)
|
|
10
|
+
#
|
|
11
|
+
# Usage: itrun.sh <repo-path> <label> "<prompt>"
|
|
12
|
+
# Output dir: $AGENT_EVAL_OUT (default /tmp/agent-eval)
|
|
13
|
+
# Requires: tmux 3.0+, a logged-in `claude` CLI, codegraph MCP configured.
|
|
14
|
+
set -uo pipefail
|
|
15
|
+
REPO="$1"; LABEL="$2"; PROMPT="$3"
|
|
16
|
+
SESSION="cgt_${LABEL}"
|
|
17
|
+
OUT_DIR="${AGENT_EVAL_OUT:-/tmp/agent-eval}"; mkdir -p "$OUT_DIR"
|
|
18
|
+
OUT="$OUT_DIR/itrun-${LABEL}.txt"
|
|
19
|
+
HERE="$(cd "$(dirname "$0")" && pwd)"
|
|
20
|
+
|
|
21
|
+
cap() { tmux capture-pane -p -t "$SESSION" -S -40; }
|
|
22
|
+
|
|
23
|
+
tmux kill-session -t "$SESSION" 2>/dev/null
|
|
24
|
+
|
|
25
|
+
# Wide pane so the TUI doesn't hard-wrap tool lines.
|
|
26
|
+
tmux new-session -d -s "$SESSION" -x 230 -y 60
|
|
27
|
+
tmux send-keys -t "$SESSION" "cd $REPO && claude --dangerously-skip-permissions ${CLAUDE_EXTRA_ARGS:-}" Enter
|
|
28
|
+
|
|
29
|
+
# Wait for the ❯ prompt (claude drew its UI), up to 60s. NOTE: ❯ appears on the
|
|
30
|
+
# welcome screen seconds before the input actually accepts keystrokes, so this is
|
|
31
|
+
# necessary but NOT sufficient — the type-and-verify loop below is what proves
|
|
32
|
+
# the input is live.
|
|
33
|
+
ready=0
|
|
34
|
+
for _ in $(seq 1 120); do
|
|
35
|
+
cap | grep -q "❯" && { ready=1; break; }
|
|
36
|
+
sleep 0.5
|
|
37
|
+
done
|
|
38
|
+
[ "$ready" = 1 ] || { echo "claude never drew its UI"; cap; tmux kill-session -t "$SESSION" 2>/dev/null; exit 1; }
|
|
39
|
+
|
|
40
|
+
# Accept the per-folder "Is this a project you trust?" dialog if it shows (first
|
|
41
|
+
# time claude opens a given repo). Option 1 ("Yes, I trust this folder") is
|
|
42
|
+
# pre-selected, so Enter accepts. This dialog also contains ❯, so it must be
|
|
43
|
+
# cleared before the type-and-verify loop or keystrokes land on the menu.
|
|
44
|
+
for _ in $(seq 1 20); do
|
|
45
|
+
cap | grep -q "trust this folder" || break
|
|
46
|
+
tmux send-keys -t "$SESSION" Enter
|
|
47
|
+
sleep 1
|
|
48
|
+
done
|
|
49
|
+
|
|
50
|
+
# Type-and-verify: send the prompt, confirm a distinctive chunk of it actually
|
|
51
|
+
# landed in the input box, retry if it didn't (handles the early-❯ race where
|
|
52
|
+
# the welcome screen shows the prompt glyph but MCP init is still eating keys).
|
|
53
|
+
needle="${PROMPT:0:24}"
|
|
54
|
+
typed=0
|
|
55
|
+
for _ in $(seq 1 30); do
|
|
56
|
+
tmux send-keys -l -t "$SESSION" "$PROMPT"
|
|
57
|
+
sleep 1
|
|
58
|
+
if cap | grep -Fq "$needle"; then typed=1; break; fi
|
|
59
|
+
# Clear whatever partial text may have landed, then retry.
|
|
60
|
+
tmux send-keys -t "$SESSION" C-u
|
|
61
|
+
sleep 1
|
|
62
|
+
done
|
|
63
|
+
[ "$typed" = 1 ] || { echo "prompt never landed in the input box"; cap; tmux kill-session -t "$SESSION" 2>/dev/null; exit 1; }
|
|
64
|
+
sleep 0.5
|
|
65
|
+
tmux send-keys -t "$SESSION" Enter
|
|
66
|
+
|
|
67
|
+
# Busy signals. The robust one is the spinner's elapsed-time-in-parens, which
|
|
68
|
+
# EVERY working state shows — both the pre-stream thinking phase
|
|
69
|
+
# "(8s · thinking with max effort)" and the streaming phase
|
|
70
|
+
# "(24s · ↑ 2.5k tokens · …)", and it survives the 32s→"1m 3s" rollover. We OR
|
|
71
|
+
# in the token arrows, "esc to interrupt", and "Initializing" as belt-and-braces
|
|
72
|
+
# (some TUI versions/states show one but not the others).
|
|
73
|
+
BUSY_RE='esc to interrupt|↓ [0-9]|↑ [0-9]|Initializing|\(([0-9]+m )?[0-9]+s ·'
|
|
74
|
+
|
|
75
|
+
# Wait for work to START (busy indicator appears), up to 60s. If it never starts,
|
|
76
|
+
# fail loudly rather than silently reporting an empty run.
|
|
77
|
+
started=0
|
|
78
|
+
for _ in $(seq 1 120); do
|
|
79
|
+
cap | grep -qE "$BUSY_RE" && { started=1; break; }
|
|
80
|
+
sleep 0.5
|
|
81
|
+
done
|
|
82
|
+
[ "$started" = 1 ] || { echo "agent never started working"; cap; tmux kill-session -t "$SESSION" 2>/dev/null; exit 1; }
|
|
83
|
+
|
|
84
|
+
# Poll for idle: not busy AND ❯ present, for 10 consecutive polls (~5s) to ride
|
|
85
|
+
# out mid-conversation thinking gaps that briefly drop the spinner. Up to ~15min.
|
|
86
|
+
consec=0
|
|
87
|
+
for _ in $(seq 1 1800); do
|
|
88
|
+
pane=$(cap)
|
|
89
|
+
if echo "$pane" | grep -qE "$BUSY_RE"; then
|
|
90
|
+
consec=0
|
|
91
|
+
elif echo "$pane" | grep -q "❯"; then
|
|
92
|
+
consec=$((consec+1)); [ "$consec" -ge 10 ] && break
|
|
93
|
+
else
|
|
94
|
+
consec=0
|
|
95
|
+
fi
|
|
96
|
+
sleep 0.5
|
|
97
|
+
done
|
|
98
|
+
sleep 1
|
|
99
|
+
|
|
100
|
+
tmux capture-pane -p -t "$SESSION" -S - > "$OUT"
|
|
101
|
+
echo "captured $(wc -l < "$OUT") lines -> $OUT"
|
|
102
|
+
grep -oE "Done \([^)]*\)" "$OUT" | tail -1
|
|
103
|
+
grep -oE "[0-9.]+k?/[0-9.]+M" "$OUT" | tail -1 | sed 's/^/Context /'
|
|
104
|
+
tmux kill-session -t "$SESSION" 2>/dev/null
|
|
105
|
+
|
|
106
|
+
# Clean tool breakdown from the session logs (main + subagents).
|
|
107
|
+
node "$HERE/parse-session.mjs" "$REPO" 2>/dev/null || true
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Parse a Claude Code stream-json run log: tool-call sequence + token usage.
|
|
3
|
+
import { readFileSync } from 'fs';
|
|
4
|
+
const file = process.argv[2];
|
|
5
|
+
const lines = readFileSync(file, 'utf8').split('\n').filter(Boolean);
|
|
6
|
+
|
|
7
|
+
const toolCalls = [];
|
|
8
|
+
let result = null;
|
|
9
|
+
let initTools = null;
|
|
10
|
+
|
|
11
|
+
for (const line of lines) {
|
|
12
|
+
let ev;
|
|
13
|
+
try { ev = JSON.parse(line); } catch { continue; }
|
|
14
|
+
if (ev.type === 'system' && ev.subtype === 'init') {
|
|
15
|
+
initTools = (ev.tools || []).filter(t => /codegraph/.test(t));
|
|
16
|
+
}
|
|
17
|
+
if (ev.type === 'assistant' && ev.message?.content) {
|
|
18
|
+
for (const block of ev.message.content) {
|
|
19
|
+
if (block.type === 'tool_use') {
|
|
20
|
+
let detail = '';
|
|
21
|
+
if (block.name === 'Task') detail = ` [subagent_type=${block.input?.subagent_type ?? '?'}] ${(block.input?.description ?? '').slice(0,40)}`;
|
|
22
|
+
else if (/codegraph/.test(block.name)) detail = ` ${JSON.stringify(block.input?.query ?? block.input?.task ?? block.input?.symbol ?? '').slice(0,60)}`;
|
|
23
|
+
else if (block.name === 'Bash') detail = ` ${(block.input?.command ?? '').slice(0,50)}`;
|
|
24
|
+
else if (block.name === 'Read') detail = ` ${(block.input?.file_path ?? '').split('/').slice(-1)[0]}`;
|
|
25
|
+
toolCalls.push(`${block.name}${detail}`);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
if (ev.type === 'result') result = ev;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
console.log(`\n=== ${file.split('/').pop()} ===`);
|
|
33
|
+
console.log(`codegraph tools exposed: ${initTools ? initTools.length : '?'}`);
|
|
34
|
+
console.log(`\nTool calls (${toolCalls.length}):`);
|
|
35
|
+
const counts = {};
|
|
36
|
+
for (const tc of toolCalls) { const n = tc.split(' ')[0]; counts[n] = (counts[n]||0)+1; }
|
|
37
|
+
console.log(' by type:', JSON.stringify(counts));
|
|
38
|
+
toolCalls.forEach((tc, i) => console.log(` ${i+1}. ${tc}`));
|
|
39
|
+
|
|
40
|
+
if (result) {
|
|
41
|
+
const u = result.usage || {};
|
|
42
|
+
const totalIn = (u.input_tokens||0) + (u.cache_read_input_tokens||0) + (u.cache_creation_input_tokens||0);
|
|
43
|
+
console.log(`\nResult: ${result.subtype} | duration ${(result.duration_ms/1000).toFixed(0)}s | turns ${result.num_turns}`);
|
|
44
|
+
console.log(` tokens: in=${totalIn} out=${u.output_tokens||0} | cost $${(result.total_cost_usd||0).toFixed(3)}`);
|
|
45
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Parse the newest Claude Code session log for a project + its subagent logs,
|
|
3
|
+
// and report the tool-call breakdown (main + subagents). Works for interactive
|
|
4
|
+
// runs (driven via itrun.sh) — Claude Code writes full transcripts to
|
|
5
|
+
// ~/.claude/projects/<escaped-cwd>/<session>.jsonl with subagents/ alongside.
|
|
6
|
+
import { readFileSync, readdirSync, statSync, existsSync, realpathSync } from 'fs';
|
|
7
|
+
import { join } from 'path';
|
|
8
|
+
import { homedir } from 'os';
|
|
9
|
+
|
|
10
|
+
const projectArg = process.argv[2];
|
|
11
|
+
if (!projectArg) { console.error('usage: parse-session.mjs <project-dir>'); process.exit(1); }
|
|
12
|
+
|
|
13
|
+
// Claude Code escapes the (real) cwd by replacing every "/" with "-".
|
|
14
|
+
const real = realpathSync(projectArg);
|
|
15
|
+
const escaped = real.replace(/\//g, '-');
|
|
16
|
+
const projDir = join(homedir(), '.claude', 'projects', escaped);
|
|
17
|
+
if (!existsSync(projDir)) { console.error('no session logs at', projDir); process.exit(1); }
|
|
18
|
+
|
|
19
|
+
// Newest top-level session .jsonl
|
|
20
|
+
const sessions = readdirSync(projDir)
|
|
21
|
+
.filter(f => f.endsWith('.jsonl'))
|
|
22
|
+
.map(f => ({ f, m: statSync(join(projDir, f)).mtimeMs }))
|
|
23
|
+
.sort((a, b) => b.m - a.m);
|
|
24
|
+
if (sessions.length === 0) { console.error('no .jsonl sessions in', projDir); process.exit(1); }
|
|
25
|
+
const sessionId = sessions[0].f.replace('.jsonl', '');
|
|
26
|
+
|
|
27
|
+
function tally(file) {
|
|
28
|
+
const counts = {};
|
|
29
|
+
for (const line of readFileSync(file, 'utf8').split('\n')) {
|
|
30
|
+
if (!line) continue;
|
|
31
|
+
let ev; try { ev = JSON.parse(line); } catch { continue; }
|
|
32
|
+
const content = ev.message?.content;
|
|
33
|
+
if (!Array.isArray(content)) continue;
|
|
34
|
+
for (const b of content) {
|
|
35
|
+
if (b.type === 'tool_use') counts[b.name] = (counts[b.name] || 0) + 1;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
return counts;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Sum token usage from a transcript. The TUI's "Done (…Xk tokens…)" line only
|
|
42
|
+
// covers a subagent's throughput; this works for main-thread runs too and is
|
|
43
|
+
// consistent across both paths. `gen` = output, `fresh` = uncached input
|
|
44
|
+
// (input + cache_creation), `cached` = cache reads (≈free), `total` = all.
|
|
45
|
+
function sumTokens(file) {
|
|
46
|
+
const t = { gen: 0, fresh: 0, cached: 0 };
|
|
47
|
+
for (const line of readFileSync(file, 'utf8').split('\n')) {
|
|
48
|
+
if (!line) continue;
|
|
49
|
+
let ev; try { ev = JSON.parse(line); } catch { continue; }
|
|
50
|
+
const u = ev.message?.usage;
|
|
51
|
+
if (!u) continue;
|
|
52
|
+
t.gen += u.output_tokens || 0;
|
|
53
|
+
t.fresh += (u.input_tokens || 0) + (u.cache_creation_input_tokens || 0);
|
|
54
|
+
t.cached += u.cache_read_input_tokens || 0;
|
|
55
|
+
}
|
|
56
|
+
return t;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const mainCounts = tally(join(projDir, sessionId + '.jsonl'));
|
|
60
|
+
|
|
61
|
+
// Subagent transcripts live under <session>/subagents/*.jsonl
|
|
62
|
+
const subDir = join(projDir, sessionId, 'subagents');
|
|
63
|
+
const subCounts = {};
|
|
64
|
+
let subAgentFiles = 0;
|
|
65
|
+
if (existsSync(subDir)) {
|
|
66
|
+
for (const f of readdirSync(subDir).filter(f => f.endsWith('.jsonl'))) {
|
|
67
|
+
subAgentFiles++;
|
|
68
|
+
const c = tally(join(subDir, f));
|
|
69
|
+
for (const [k, v] of Object.entries(c)) subCounts[k] = (subCounts[k] || 0) + v;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const fmt = (counts) => Object.entries(counts).sort((a, b) => b[1] - a[1])
|
|
74
|
+
.map(([k, v]) => ` ${String(v).padStart(3)} ${k}`).join('\n') || ' (none)';
|
|
75
|
+
|
|
76
|
+
console.log(`session: ${sessionId}`);
|
|
77
|
+
console.log(`\nMAIN thread tools:\n${fmt(mainCounts)}`);
|
|
78
|
+
console.log(`\nSUBAGENT tools (${subAgentFiles} subagent transcript${subAgentFiles === 1 ? '' : 's'}):\n${fmt(subCounts)}`);
|
|
79
|
+
|
|
80
|
+
const explore = subCounts['mcp__codegraph__codegraph_explore'] || mainCounts['mcp__codegraph__codegraph_explore'] || 0;
|
|
81
|
+
const reads = (subCounts['Read'] || 0) + (mainCounts['Read'] || 0);
|
|
82
|
+
const greps = (subCounts['Grep'] || 0) + (mainCounts['Grep'] || 0) + (subCounts['Bash'] || 0) + (mainCounts['Bash'] || 0);
|
|
83
|
+
console.log(`\nVERDICT: codegraph_explore used ${explore}x | Read ${reads} | Grep/Bash ${greps}`);
|
|
84
|
+
|
|
85
|
+
// Token totals (main + subagents), consistent across main-thread and subagent runs.
|
|
86
|
+
const tok = { gen: 0, fresh: 0, cached: 0 };
|
|
87
|
+
const addTok = (t) => { tok.gen += t.gen; tok.fresh += t.fresh; tok.cached += t.cached; };
|
|
88
|
+
addTok(sumTokens(join(projDir, sessionId + '.jsonl')));
|
|
89
|
+
if (existsSync(subDir)) {
|
|
90
|
+
for (const f of readdirSync(subDir).filter(f => f.endsWith('.jsonl'))) addTok(sumTokens(join(subDir, f)));
|
|
91
|
+
}
|
|
92
|
+
const k = (n) => (n / 1000).toFixed(1) + 'k';
|
|
93
|
+
console.log(`TOKENS: gen ${k(tok.gen)} | fresh-in ${k(tok.fresh)} | cached-in ${k(tok.cached)} | billable≈ ${k(tok.gen + tok.fresh)}`);
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Headless Claude Code run against a repo with codegraph MCP, capturing the
|
|
3
|
+
# full stream-json so we can see tool calls + token usage. Complements the
|
|
4
|
+
# interactive itrun.sh: headless gives a clean per-tool breakdown + exact
|
|
5
|
+
# tokens/cost, but defaults to the general-purpose subagent (not Explore).
|
|
6
|
+
# To force the Explore path, ask for it in the prompt.
|
|
7
|
+
#
|
|
8
|
+
# Usage: run-agent.sh <repo-path> <label> "<prompt>"
|
|
9
|
+
# Env: AGENT_EVAL_OUT (default /tmp/agent-eval), CG_BIN (codegraph dist binary)
|
|
10
|
+
set -uo pipefail
|
|
11
|
+
|
|
12
|
+
REPO="$1"; LABEL="$2"; PROMPT="$3"
|
|
13
|
+
CG_BIN="${CG_BIN:-$(command -v codegraph || echo /usr/local/bin/codegraph)}"
|
|
14
|
+
OUT_DIR="${AGENT_EVAL_OUT:-/tmp/agent-eval}"; mkdir -p "$OUT_DIR"
|
|
15
|
+
OUT="$OUT_DIR/run-${LABEL}.jsonl"
|
|
16
|
+
|
|
17
|
+
MCP_CONFIG=$(cat <<JSON
|
|
18
|
+
{"mcpServers":{"codegraph":{"command":"${CG_BIN}","args":["serve","--mcp","--path","${REPO}"]}}}
|
|
19
|
+
JSON
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
echo "→ running [$LABEL] in $REPO"
|
|
23
|
+
cd "$REPO" || exit 1
|
|
24
|
+
|
|
25
|
+
claude -p "$PROMPT" \
|
|
26
|
+
--output-format stream-json --verbose \
|
|
27
|
+
--permission-mode bypassPermissions \
|
|
28
|
+
--model opus \
|
|
29
|
+
--max-budget-usd 2 \
|
|
30
|
+
--strict-mcp-config --mcp-config "$MCP_CONFIG" \
|
|
31
|
+
> "$OUT" 2>"$OUT_DIR/run-${LABEL}.err"
|
|
32
|
+
|
|
33
|
+
echo "exit: $? | wrote $OUT ($(wc -l < "$OUT") lines)"
|
|
34
|
+
node "$(cd "$(dirname "$0")" && pwd)/parse-run.mjs" "$OUT" 2>/dev/null || true
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# With/without A/B (and optional interactive) eval for a codegraph version on a
|
|
3
|
+
# repo. Codegraph is the ONLY variable: both arms launch claude with
|
|
4
|
+
# --strict-mcp-config — with = codegraph-only MCP (pointed at $CG_BIN),
|
|
5
|
+
# without = empty MCP. Built-in Read/Grep/Bash stay available in both arms.
|
|
6
|
+
#
|
|
7
|
+
# Usage: run-all.sh <repo-path> "<question>" [headless|tmux|all]
|
|
8
|
+
# Env: CG_BIN codegraph binary (default: command -v codegraph)
|
|
9
|
+
# AGENT_EVAL_OUT output dir (default: /tmp/agent-eval)
|
|
10
|
+
set -uo pipefail
|
|
11
|
+
|
|
12
|
+
REPO="${1:?usage: run-all.sh <repo-path> \"<question>\" [headless|tmux|all]}"
|
|
13
|
+
Q="${2:?question required}"
|
|
14
|
+
MODE="${3:-headless}"
|
|
15
|
+
CG_BIN="${CG_BIN:-$(command -v codegraph)}"
|
|
16
|
+
OUT="${AGENT_EVAL_OUT:-/tmp/agent-eval}"
|
|
17
|
+
HARNESS="$(cd "$(dirname "$0")" && pwd)"
|
|
18
|
+
mkdir -p "$OUT"
|
|
19
|
+
|
|
20
|
+
[ -n "$CG_BIN" ] || { echo "no codegraph binary on PATH (set CG_BIN)"; exit 1; }
|
|
21
|
+
[ -d "$REPO/.codegraph" ] || { echo "no .codegraph index at $REPO — index it first"; exit 1; }
|
|
22
|
+
case "$MODE" in headless|tmux|all) ;; *) echo "mode must be headless|tmux|all (got '$MODE')"; exit 1;; esac
|
|
23
|
+
|
|
24
|
+
# MCP config files (path form avoids inline-JSON quoting through tmux).
|
|
25
|
+
cat > "$OUT/mcp-codegraph.json" <<JSON
|
|
26
|
+
{"mcpServers":{"codegraph":{"command":"$CG_BIN","args":["serve","--mcp","--path","$REPO"]}}}
|
|
27
|
+
JSON
|
|
28
|
+
echo '{"mcpServers":{}}' > "$OUT/mcp-empty.json"
|
|
29
|
+
|
|
30
|
+
echo "###### codegraph: $CG_BIN"
|
|
31
|
+
echo "###### repo: $REPO"
|
|
32
|
+
echo "###### question: $Q"
|
|
33
|
+
echo
|
|
34
|
+
|
|
35
|
+
# Headless arm: claude -p with stream-json -> exact tool sequence + tokens/cost.
|
|
36
|
+
headless() {
|
|
37
|
+
local label="$1" cfg="$2"
|
|
38
|
+
echo "############################## HEADLESS [$label] ##############################"
|
|
39
|
+
( cd "$REPO" && claude -p "$Q" \
|
|
40
|
+
--output-format stream-json --verbose \
|
|
41
|
+
--permission-mode bypassPermissions \
|
|
42
|
+
--model opus \
|
|
43
|
+
--max-budget-usd 4 \
|
|
44
|
+
--strict-mcp-config --mcp-config "$cfg" \
|
|
45
|
+
> "$OUT/run-$label.jsonl" 2>"$OUT/run-$label.err" )
|
|
46
|
+
echo "exit $? -> $OUT/run-$label.jsonl ($(wc -l < "$OUT/run-$label.jsonl" | tr -d ' ') lines)"
|
|
47
|
+
tail -2 "$OUT/run-$label.err" 2>/dev/null
|
|
48
|
+
node "$HARNESS/parse-run.mjs" "$OUT/run-$label.jsonl" 2>&1 || true
|
|
49
|
+
echo
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if [ "$MODE" = headless ] || [ "$MODE" = all ]; then
|
|
53
|
+
headless "headless-with" "$OUT/mcp-codegraph.json"
|
|
54
|
+
headless "headless-without" "$OUT/mcp-empty.json"
|
|
55
|
+
fi
|
|
56
|
+
|
|
57
|
+
if [ "$MODE" = tmux ] || [ "$MODE" = all ]; then
|
|
58
|
+
echo "############################## INTERACTIVE [with] ##############################"
|
|
59
|
+
CLAUDE_EXTRA_ARGS="--model opus --strict-mcp-config --mcp-config $OUT/mcp-codegraph.json" \
|
|
60
|
+
bash "$HARNESS/itrun.sh" "$REPO" "int-with" "$Q" 2>&1 || echo "[itrun WITH failed]"
|
|
61
|
+
echo
|
|
62
|
+
echo "############################## INTERACTIVE [without] ##############################"
|
|
63
|
+
CLAUDE_EXTRA_ARGS="--model opus --strict-mcp-config --mcp-config $OUT/mcp-empty.json" \
|
|
64
|
+
bash "$HARNESS/itrun.sh" "$REPO" "int-without" "$Q" 2>&1 || echo "[itrun WITHOUT failed]"
|
|
65
|
+
echo
|
|
66
|
+
fi
|
|
67
|
+
echo "############################## RUN-ALL COMPLETE ##############################"
|