@colbymchenry/codegraph 0.7.10 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -48
- package/dist/bin/codegraph.js +25 -0
- package/dist/bin/codegraph.js.map +1 -1
- package/dist/context/index.d.ts.map +1 -1
- package/dist/context/index.js +4 -2
- package/dist/context/index.js.map +1 -1
- package/dist/extraction/index.d.ts.map +1 -1
- package/dist/extraction/index.js +63 -37
- package/dist/extraction/index.js.map +1 -1
- package/dist/installer/config-writer.d.ts.map +1 -1
- package/dist/installer/config-writer.js +3 -1
- package/dist/installer/config-writer.js.map +1 -1
- package/dist/installer/index.d.ts +12 -0
- package/dist/installer/index.d.ts.map +1 -1
- package/dist/installer/index.js +72 -4
- package/dist/installer/index.js.map +1 -1
- package/dist/installer/instructions-template.d.ts +2 -2
- package/dist/installer/instructions-template.d.ts.map +1 -1
- package/dist/installer/instructions-template.js +3 -2
- package/dist/installer/instructions-template.js.map +1 -1
- package/dist/installer/targets/claude.d.ts +10 -6
- package/dist/installer/targets/claude.d.ts.map +1 -1
- package/dist/installer/targets/claude.js +72 -10
- package/dist/installer/targets/claude.js.map +1 -1
- package/dist/mcp/index.d.ts +8 -0
- package/dist/mcp/index.d.ts.map +1 -1
- package/dist/mcp/index.js +116 -18
- package/dist/mcp/index.js.map +1 -1
- package/dist/mcp/server-instructions.d.ts +1 -1
- package/dist/mcp/server-instructions.d.ts.map +1 -1
- package/dist/mcp/server-instructions.js +14 -2
- package/dist/mcp/server-instructions.js.map +1 -1
- package/dist/mcp/tools.d.ts +59 -2
- package/dist/mcp/tools.d.ts.map +1 -1
- package/dist/mcp/tools.js +384 -70
- package/dist/mcp/tools.js.map +1 -1
- package/dist/mcp/transport.d.ts +17 -0
- package/dist/mcp/transport.d.ts.map +1 -1
- package/dist/mcp/transport.js +63 -0
- package/dist/mcp/transport.js.map +1 -1
- package/dist/resolution/frameworks/index.d.ts +1 -0
- package/dist/resolution/frameworks/index.d.ts.map +1 -1
- package/dist/resolution/frameworks/index.js +5 -1
- package/dist/resolution/frameworks/index.js.map +1 -1
- package/dist/resolution/frameworks/nestjs.d.ts +26 -0
- package/dist/resolution/frameworks/nestjs.d.ts.map +1 -0
- package/dist/resolution/frameworks/nestjs.js +374 -0
- package/dist/resolution/frameworks/nestjs.js.map +1 -0
- package/dist/search/query-utils.d.ts.map +1 -1
- package/dist/search/query-utils.js +29 -26
- package/dist/search/query-utils.js.map +1 -1
- package/dist/sync/git-hooks.d.ts +45 -0
- package/dist/sync/git-hooks.d.ts.map +1 -0
- package/dist/sync/git-hooks.js +223 -0
- package/dist/sync/git-hooks.js.map +1 -0
- package/dist/sync/index.d.ts +4 -0
- package/dist/sync/index.d.ts.map +1 -1
- package/dist/sync/index.js +12 -1
- package/dist/sync/index.js.map +1 -1
- package/dist/sync/watch-policy.d.ts +48 -0
- package/dist/sync/watch-policy.d.ts.map +1 -0
- package/dist/sync/watch-policy.js +124 -0
- package/dist/sync/watch-policy.js.map +1 -0
- package/dist/sync/watcher.d.ts.map +1 -1
- package/dist/sync/watcher.js +10 -0
- package/dist/sync/watcher.js.map +1 -1
- package/package.json +3 -3
- package/scripts/agent-eval/audit.sh +68 -0
- package/scripts/agent-eval/itrun.sh +107 -0
- package/scripts/agent-eval/parse-run.mjs +45 -0
- package/scripts/agent-eval/parse-session.mjs +93 -0
- package/scripts/agent-eval/run-agent.sh +34 -0
- package/scripts/agent-eval/run-all.sh +67 -0
- package/scripts/extract-release-notes.mjs +130 -0
- package/scripts/release.sh +5 -7
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Drive an INTERACTIVE Claude Code session in tmux, send a prompt, wait for the
|
|
3
|
+
# agent to finish, then print the tool-call breakdown from the session logs.
|
|
4
|
+
#
|
|
5
|
+
# Why interactive (not `claude -p`): headless print-mode picks the
|
|
6
|
+
# general-purpose subagent, while real interactive sessions delegate to the
|
|
7
|
+
# Explore subagent (or drive codegraph from the main thread). Only the
|
|
8
|
+
# interactive TUI reproduces the behavior users actually see. (Idle-detection
|
|
9
|
+
# technique borrowed from devpit's WaitForIdle.)
|
|
10
|
+
#
|
|
11
|
+
# Usage: itrun.sh <repo-path> <label> "<prompt>"
|
|
12
|
+
# Output dir: $AGENT_EVAL_OUT (default /tmp/agent-eval)
|
|
13
|
+
# Requires: tmux 3.0+, a logged-in `claude` CLI, codegraph MCP configured.
|
|
14
|
+
set -uo pipefail
|
|
15
|
+
REPO="$1"; LABEL="$2"; PROMPT="$3"
|
|
16
|
+
SESSION="cgt_${LABEL}"
|
|
17
|
+
OUT_DIR="${AGENT_EVAL_OUT:-/tmp/agent-eval}"; mkdir -p "$OUT_DIR"
|
|
18
|
+
OUT="$OUT_DIR/itrun-${LABEL}.txt"
|
|
19
|
+
HERE="$(cd "$(dirname "$0")" && pwd)"
|
|
20
|
+
|
|
21
|
+
cap() { tmux capture-pane -p -t "$SESSION" -S -40; }
|
|
22
|
+
|
|
23
|
+
tmux kill-session -t "$SESSION" 2>/dev/null
|
|
24
|
+
|
|
25
|
+
# Wide pane so the TUI doesn't hard-wrap tool lines.
|
|
26
|
+
tmux new-session -d -s "$SESSION" -x 230 -y 60
|
|
27
|
+
tmux send-keys -t "$SESSION" "cd $REPO && claude --dangerously-skip-permissions ${CLAUDE_EXTRA_ARGS:-}" Enter
|
|
28
|
+
|
|
29
|
+
# Wait for the ❯ prompt (claude drew its UI), up to 60s. NOTE: ❯ appears on the
|
|
30
|
+
# welcome screen seconds before the input actually accepts keystrokes, so this is
|
|
31
|
+
# necessary but NOT sufficient — the type-and-verify loop below is what proves
|
|
32
|
+
# the input is live.
|
|
33
|
+
ready=0
|
|
34
|
+
for _ in $(seq 1 120); do
|
|
35
|
+
cap | grep -q "❯" && { ready=1; break; }
|
|
36
|
+
sleep 0.5
|
|
37
|
+
done
|
|
38
|
+
[ "$ready" = 1 ] || { echo "claude never drew its UI"; cap; tmux kill-session -t "$SESSION" 2>/dev/null; exit 1; }
|
|
39
|
+
|
|
40
|
+
# Accept the per-folder "Is this a project you trust?" dialog if it shows (first
|
|
41
|
+
# time claude opens a given repo). Option 1 ("Yes, I trust this folder") is
|
|
42
|
+
# pre-selected, so Enter accepts. This dialog also contains ❯, so it must be
|
|
43
|
+
# cleared before the type-and-verify loop or keystrokes land on the menu.
|
|
44
|
+
for _ in $(seq 1 20); do
|
|
45
|
+
cap | grep -q "trust this folder" || break
|
|
46
|
+
tmux send-keys -t "$SESSION" Enter
|
|
47
|
+
sleep 1
|
|
48
|
+
done
|
|
49
|
+
|
|
50
|
+
# Type-and-verify: send the prompt, confirm a distinctive chunk of it actually
|
|
51
|
+
# landed in the input box, retry if it didn't (handles the early-❯ race where
|
|
52
|
+
# the welcome screen shows the prompt glyph but MCP init is still eating keys).
|
|
53
|
+
needle="${PROMPT:0:24}"
|
|
54
|
+
typed=0
|
|
55
|
+
for _ in $(seq 1 30); do
|
|
56
|
+
tmux send-keys -l -t "$SESSION" "$PROMPT"
|
|
57
|
+
sleep 1
|
|
58
|
+
if cap | grep -Fq "$needle"; then typed=1; break; fi
|
|
59
|
+
# Clear whatever partial text may have landed, then retry.
|
|
60
|
+
tmux send-keys -t "$SESSION" C-u
|
|
61
|
+
sleep 1
|
|
62
|
+
done
|
|
63
|
+
[ "$typed" = 1 ] || { echo "prompt never landed in the input box"; cap; tmux kill-session -t "$SESSION" 2>/dev/null; exit 1; }
|
|
64
|
+
sleep 0.5
|
|
65
|
+
tmux send-keys -t "$SESSION" Enter
|
|
66
|
+
|
|
67
|
+
# Busy signals. The robust one is the spinner's elapsed-time-in-parens, which
|
|
68
|
+
# EVERY working state shows — both the pre-stream thinking phase
|
|
69
|
+
# "(8s · thinking with max effort)" and the streaming phase
|
|
70
|
+
# "(24s · ↑ 2.5k tokens · …)", and it survives the 32s→"1m 3s" rollover. We OR
|
|
71
|
+
# in the token arrows, "esc to interrupt", and "Initializing" as belt-and-braces
|
|
72
|
+
# (some TUI versions/states show one but not the others).
|
|
73
|
+
BUSY_RE='esc to interrupt|↓ [0-9]|↑ [0-9]|Initializing|\(([0-9]+m )?[0-9]+s ·'
|
|
74
|
+
|
|
75
|
+
# Wait for work to START (busy indicator appears), up to 60s. If it never starts,
|
|
76
|
+
# fail loudly rather than silently reporting an empty run.
|
|
77
|
+
started=0
|
|
78
|
+
for _ in $(seq 1 120); do
|
|
79
|
+
cap | grep -qE "$BUSY_RE" && { started=1; break; }
|
|
80
|
+
sleep 0.5
|
|
81
|
+
done
|
|
82
|
+
[ "$started" = 1 ] || { echo "agent never started working"; cap; tmux kill-session -t "$SESSION" 2>/dev/null; exit 1; }
|
|
83
|
+
|
|
84
|
+
# Poll for idle: not busy AND ❯ present, for 10 consecutive polls (~5s) to ride
|
|
85
|
+
# out mid-conversation thinking gaps that briefly drop the spinner. Up to ~15min.
|
|
86
|
+
consec=0
|
|
87
|
+
for _ in $(seq 1 1800); do
|
|
88
|
+
pane=$(cap)
|
|
89
|
+
if echo "$pane" | grep -qE "$BUSY_RE"; then
|
|
90
|
+
consec=0
|
|
91
|
+
elif echo "$pane" | grep -q "❯"; then
|
|
92
|
+
consec=$((consec+1)); [ "$consec" -ge 10 ] && break
|
|
93
|
+
else
|
|
94
|
+
consec=0
|
|
95
|
+
fi
|
|
96
|
+
sleep 0.5
|
|
97
|
+
done
|
|
98
|
+
sleep 1
|
|
99
|
+
|
|
100
|
+
tmux capture-pane -p -t "$SESSION" -S - > "$OUT"
|
|
101
|
+
echo "captured $(wc -l < "$OUT") lines -> $OUT"
|
|
102
|
+
grep -oE "Done \([^)]*\)" "$OUT" | tail -1
|
|
103
|
+
grep -oE "[0-9.]+k?/[0-9.]+M" "$OUT" | tail -1 | sed 's/^/Context /'
|
|
104
|
+
tmux kill-session -t "$SESSION" 2>/dev/null
|
|
105
|
+
|
|
106
|
+
# Clean tool breakdown from the session logs (main + subagents).
|
|
107
|
+
node "$HERE/parse-session.mjs" "$REPO" 2>/dev/null || true
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Parse a Claude Code stream-json run log: tool-call sequence + token usage.
|
|
3
|
+
import { readFileSync } from 'fs';
|
|
4
|
+
const file = process.argv[2];
|
|
5
|
+
const lines = readFileSync(file, 'utf8').split('\n').filter(Boolean);
|
|
6
|
+
|
|
7
|
+
const toolCalls = [];
|
|
8
|
+
let result = null;
|
|
9
|
+
let initTools = null;
|
|
10
|
+
|
|
11
|
+
for (const line of lines) {
|
|
12
|
+
let ev;
|
|
13
|
+
try { ev = JSON.parse(line); } catch { continue; }
|
|
14
|
+
if (ev.type === 'system' && ev.subtype === 'init') {
|
|
15
|
+
initTools = (ev.tools || []).filter(t => /codegraph/.test(t));
|
|
16
|
+
}
|
|
17
|
+
if (ev.type === 'assistant' && ev.message?.content) {
|
|
18
|
+
for (const block of ev.message.content) {
|
|
19
|
+
if (block.type === 'tool_use') {
|
|
20
|
+
let detail = '';
|
|
21
|
+
if (block.name === 'Task') detail = ` [subagent_type=${block.input?.subagent_type ?? '?'}] ${(block.input?.description ?? '').slice(0,40)}`;
|
|
22
|
+
else if (/codegraph/.test(block.name)) detail = ` ${JSON.stringify(block.input?.query ?? block.input?.task ?? block.input?.symbol ?? '').slice(0,60)}`;
|
|
23
|
+
else if (block.name === 'Bash') detail = ` ${(block.input?.command ?? '').slice(0,50)}`;
|
|
24
|
+
else if (block.name === 'Read') detail = ` ${(block.input?.file_path ?? '').split('/').slice(-1)[0]}`;
|
|
25
|
+
toolCalls.push(`${block.name}${detail}`);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
if (ev.type === 'result') result = ev;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
console.log(`\n=== ${file.split('/').pop()} ===`);
|
|
33
|
+
console.log(`codegraph tools exposed: ${initTools ? initTools.length : '?'}`);
|
|
34
|
+
console.log(`\nTool calls (${toolCalls.length}):`);
|
|
35
|
+
const counts = {};
|
|
36
|
+
for (const tc of toolCalls) { const n = tc.split(' ')[0]; counts[n] = (counts[n]||0)+1; }
|
|
37
|
+
console.log(' by type:', JSON.stringify(counts));
|
|
38
|
+
toolCalls.forEach((tc, i) => console.log(` ${i+1}. ${tc}`));
|
|
39
|
+
|
|
40
|
+
if (result) {
|
|
41
|
+
const u = result.usage || {};
|
|
42
|
+
const totalIn = (u.input_tokens||0) + (u.cache_read_input_tokens||0) + (u.cache_creation_input_tokens||0);
|
|
43
|
+
console.log(`\nResult: ${result.subtype} | duration ${(result.duration_ms/1000).toFixed(0)}s | turns ${result.num_turns}`);
|
|
44
|
+
console.log(` tokens: in=${totalIn} out=${u.output_tokens||0} | cost $${(result.total_cost_usd||0).toFixed(3)}`);
|
|
45
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Parse the newest Claude Code session log for a project + its subagent logs,
|
|
3
|
+
// and report the tool-call breakdown (main + subagents). Works for interactive
|
|
4
|
+
// runs (driven via itrun.sh) — Claude Code writes full transcripts to
|
|
5
|
+
// ~/.claude/projects/<escaped-cwd>/<session>.jsonl with subagents/ alongside.
|
|
6
|
+
import { readFileSync, readdirSync, statSync, existsSync, realpathSync } from 'fs';
|
|
7
|
+
import { join } from 'path';
|
|
8
|
+
import { homedir } from 'os';
|
|
9
|
+
|
|
10
|
+
const projectArg = process.argv[2];
|
|
11
|
+
if (!projectArg) { console.error('usage: parse-session.mjs <project-dir>'); process.exit(1); }
|
|
12
|
+
|
|
13
|
+
// Claude Code escapes the (real) cwd by replacing every "/" with "-".
|
|
14
|
+
const real = realpathSync(projectArg);
|
|
15
|
+
const escaped = real.replace(/\//g, '-');
|
|
16
|
+
const projDir = join(homedir(), '.claude', 'projects', escaped);
|
|
17
|
+
if (!existsSync(projDir)) { console.error('no session logs at', projDir); process.exit(1); }
|
|
18
|
+
|
|
19
|
+
// Newest top-level session .jsonl
|
|
20
|
+
const sessions = readdirSync(projDir)
|
|
21
|
+
.filter(f => f.endsWith('.jsonl'))
|
|
22
|
+
.map(f => ({ f, m: statSync(join(projDir, f)).mtimeMs }))
|
|
23
|
+
.sort((a, b) => b.m - a.m);
|
|
24
|
+
if (sessions.length === 0) { console.error('no .jsonl sessions in', projDir); process.exit(1); }
|
|
25
|
+
const sessionId = sessions[0].f.replace('.jsonl', '');
|
|
26
|
+
|
|
27
|
+
function tally(file) {
|
|
28
|
+
const counts = {};
|
|
29
|
+
for (const line of readFileSync(file, 'utf8').split('\n')) {
|
|
30
|
+
if (!line) continue;
|
|
31
|
+
let ev; try { ev = JSON.parse(line); } catch { continue; }
|
|
32
|
+
const content = ev.message?.content;
|
|
33
|
+
if (!Array.isArray(content)) continue;
|
|
34
|
+
for (const b of content) {
|
|
35
|
+
if (b.type === 'tool_use') counts[b.name] = (counts[b.name] || 0) + 1;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
return counts;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Sum token usage from a transcript. The TUI's "Done (…Xk tokens…)" line only
|
|
42
|
+
// covers a subagent's throughput; this works for main-thread runs too and is
|
|
43
|
+
// consistent across both paths. `gen` = output, `fresh` = uncached input
|
|
44
|
+
// (input + cache_creation), `cached` = cache reads (≈free), `total` = all.
|
|
45
|
+
function sumTokens(file) {
|
|
46
|
+
const t = { gen: 0, fresh: 0, cached: 0 };
|
|
47
|
+
for (const line of readFileSync(file, 'utf8').split('\n')) {
|
|
48
|
+
if (!line) continue;
|
|
49
|
+
let ev; try { ev = JSON.parse(line); } catch { continue; }
|
|
50
|
+
const u = ev.message?.usage;
|
|
51
|
+
if (!u) continue;
|
|
52
|
+
t.gen += u.output_tokens || 0;
|
|
53
|
+
t.fresh += (u.input_tokens || 0) + (u.cache_creation_input_tokens || 0);
|
|
54
|
+
t.cached += u.cache_read_input_tokens || 0;
|
|
55
|
+
}
|
|
56
|
+
return t;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const mainCounts = tally(join(projDir, sessionId + '.jsonl'));
|
|
60
|
+
|
|
61
|
+
// Subagent transcripts live under <session>/subagents/*.jsonl
|
|
62
|
+
const subDir = join(projDir, sessionId, 'subagents');
|
|
63
|
+
const subCounts = {};
|
|
64
|
+
let subAgentFiles = 0;
|
|
65
|
+
if (existsSync(subDir)) {
|
|
66
|
+
for (const f of readdirSync(subDir).filter(f => f.endsWith('.jsonl'))) {
|
|
67
|
+
subAgentFiles++;
|
|
68
|
+
const c = tally(join(subDir, f));
|
|
69
|
+
for (const [k, v] of Object.entries(c)) subCounts[k] = (subCounts[k] || 0) + v;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const fmt = (counts) => Object.entries(counts).sort((a, b) => b[1] - a[1])
|
|
74
|
+
.map(([k, v]) => ` ${String(v).padStart(3)} ${k}`).join('\n') || ' (none)';
|
|
75
|
+
|
|
76
|
+
console.log(`session: ${sessionId}`);
|
|
77
|
+
console.log(`\nMAIN thread tools:\n${fmt(mainCounts)}`);
|
|
78
|
+
console.log(`\nSUBAGENT tools (${subAgentFiles} subagent transcript${subAgentFiles === 1 ? '' : 's'}):\n${fmt(subCounts)}`);
|
|
79
|
+
|
|
80
|
+
const explore = subCounts['mcp__codegraph__codegraph_explore'] || mainCounts['mcp__codegraph__codegraph_explore'] || 0;
|
|
81
|
+
const reads = (subCounts['Read'] || 0) + (mainCounts['Read'] || 0);
|
|
82
|
+
const greps = (subCounts['Grep'] || 0) + (mainCounts['Grep'] || 0) + (subCounts['Bash'] || 0) + (mainCounts['Bash'] || 0);
|
|
83
|
+
console.log(`\nVERDICT: codegraph_explore used ${explore}x | Read ${reads} | Grep/Bash ${greps}`);
|
|
84
|
+
|
|
85
|
+
// Token totals (main + subagents), consistent across main-thread and subagent runs.
|
|
86
|
+
const tok = { gen: 0, fresh: 0, cached: 0 };
|
|
87
|
+
const addTok = (t) => { tok.gen += t.gen; tok.fresh += t.fresh; tok.cached += t.cached; };
|
|
88
|
+
addTok(sumTokens(join(projDir, sessionId + '.jsonl')));
|
|
89
|
+
if (existsSync(subDir)) {
|
|
90
|
+
for (const f of readdirSync(subDir).filter(f => f.endsWith('.jsonl'))) addTok(sumTokens(join(subDir, f)));
|
|
91
|
+
}
|
|
92
|
+
const k = (n) => (n / 1000).toFixed(1) + 'k';
|
|
93
|
+
console.log(`TOKENS: gen ${k(tok.gen)} | fresh-in ${k(tok.fresh)} | cached-in ${k(tok.cached)} | billable≈ ${k(tok.gen + tok.fresh)}`);
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Headless Claude Code run against a repo with codegraph MCP, capturing the
|
|
3
|
+
# full stream-json so we can see tool calls + token usage. Complements the
|
|
4
|
+
# interactive itrun.sh: headless gives a clean per-tool breakdown + exact
|
|
5
|
+
# tokens/cost, but defaults to the general-purpose subagent (not Explore).
|
|
6
|
+
# To force the Explore path, ask for it in the prompt.
|
|
7
|
+
#
|
|
8
|
+
# Usage: run-agent.sh <repo-path> <label> "<prompt>"
|
|
9
|
+
# Env: AGENT_EVAL_OUT (default /tmp/agent-eval), CG_BIN (codegraph dist binary)
|
|
10
|
+
set -uo pipefail
|
|
11
|
+
|
|
12
|
+
REPO="$1"; LABEL="$2"; PROMPT="$3"
|
|
13
|
+
CG_BIN="${CG_BIN:-$(command -v codegraph || echo /usr/local/bin/codegraph)}"
|
|
14
|
+
OUT_DIR="${AGENT_EVAL_OUT:-/tmp/agent-eval}"; mkdir -p "$OUT_DIR"
|
|
15
|
+
OUT="$OUT_DIR/run-${LABEL}.jsonl"
|
|
16
|
+
|
|
17
|
+
MCP_CONFIG=$(cat <<JSON
|
|
18
|
+
{"mcpServers":{"codegraph":{"command":"${CG_BIN}","args":["serve","--mcp","--path","${REPO}"]}}}
|
|
19
|
+
JSON
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
echo "→ running [$LABEL] in $REPO"
|
|
23
|
+
cd "$REPO" || exit 1
|
|
24
|
+
|
|
25
|
+
claude -p "$PROMPT" \
|
|
26
|
+
--output-format stream-json --verbose \
|
|
27
|
+
--permission-mode bypassPermissions \
|
|
28
|
+
--model opus \
|
|
29
|
+
--max-budget-usd 2 \
|
|
30
|
+
--strict-mcp-config --mcp-config "$MCP_CONFIG" \
|
|
31
|
+
> "$OUT" 2>"$OUT_DIR/run-${LABEL}.err"
|
|
32
|
+
|
|
33
|
+
echo "exit: $? | wrote $OUT ($(wc -l < "$OUT") lines)"
|
|
34
|
+
node "$(cd "$(dirname "$0")" && pwd)/parse-run.mjs" "$OUT" 2>/dev/null || true
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# With/without A/B (and optional interactive) eval for a codegraph version on a
|
|
3
|
+
# repo. Codegraph is the ONLY variable: both arms launch claude with
|
|
4
|
+
# --strict-mcp-config — with = codegraph-only MCP (pointed at $CG_BIN),
|
|
5
|
+
# without = empty MCP. Built-in Read/Grep/Bash stay available in both arms.
|
|
6
|
+
#
|
|
7
|
+
# Usage: run-all.sh <repo-path> "<question>" [headless|tmux|all]
|
|
8
|
+
# Env: CG_BIN codegraph binary (default: command -v codegraph)
|
|
9
|
+
# AGENT_EVAL_OUT output dir (default: /tmp/agent-eval)
|
|
10
|
+
set -uo pipefail
|
|
11
|
+
|
|
12
|
+
REPO="${1:?usage: run-all.sh <repo-path> \"<question>\" [headless|tmux|all]}"
|
|
13
|
+
Q="${2:?question required}"
|
|
14
|
+
MODE="${3:-headless}"
|
|
15
|
+
CG_BIN="${CG_BIN:-$(command -v codegraph)}"
|
|
16
|
+
OUT="${AGENT_EVAL_OUT:-/tmp/agent-eval}"
|
|
17
|
+
HARNESS="$(cd "$(dirname "$0")" && pwd)"
|
|
18
|
+
mkdir -p "$OUT"
|
|
19
|
+
|
|
20
|
+
[ -n "$CG_BIN" ] || { echo "no codegraph binary on PATH (set CG_BIN)"; exit 1; }
|
|
21
|
+
[ -d "$REPO/.codegraph" ] || { echo "no .codegraph index at $REPO — index it first"; exit 1; }
|
|
22
|
+
case "$MODE" in headless|tmux|all) ;; *) echo "mode must be headless|tmux|all (got '$MODE')"; exit 1;; esac
|
|
23
|
+
|
|
24
|
+
# MCP config files (path form avoids inline-JSON quoting through tmux).
|
|
25
|
+
cat > "$OUT/mcp-codegraph.json" <<JSON
|
|
26
|
+
{"mcpServers":{"codegraph":{"command":"$CG_BIN","args":["serve","--mcp","--path","$REPO"]}}}
|
|
27
|
+
JSON
|
|
28
|
+
echo '{"mcpServers":{}}' > "$OUT/mcp-empty.json"
|
|
29
|
+
|
|
30
|
+
echo "###### codegraph: $CG_BIN"
|
|
31
|
+
echo "###### repo: $REPO"
|
|
32
|
+
echo "###### question: $Q"
|
|
33
|
+
echo
|
|
34
|
+
|
|
35
|
+
# Headless arm: claude -p with stream-json -> exact tool sequence + tokens/cost.
|
|
36
|
+
headless() {
|
|
37
|
+
local label="$1" cfg="$2"
|
|
38
|
+
echo "############################## HEADLESS [$label] ##############################"
|
|
39
|
+
( cd "$REPO" && claude -p "$Q" \
|
|
40
|
+
--output-format stream-json --verbose \
|
|
41
|
+
--permission-mode bypassPermissions \
|
|
42
|
+
--model opus \
|
|
43
|
+
--max-budget-usd 4 \
|
|
44
|
+
--strict-mcp-config --mcp-config "$cfg" \
|
|
45
|
+
> "$OUT/run-$label.jsonl" 2>"$OUT/run-$label.err" )
|
|
46
|
+
echo "exit $? -> $OUT/run-$label.jsonl ($(wc -l < "$OUT/run-$label.jsonl" | tr -d ' ') lines)"
|
|
47
|
+
tail -2 "$OUT/run-$label.err" 2>/dev/null
|
|
48
|
+
node "$HARNESS/parse-run.mjs" "$OUT/run-$label.jsonl" 2>&1 || true
|
|
49
|
+
echo
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if [ "$MODE" = headless ] || [ "$MODE" = all ]; then
|
|
53
|
+
headless "headless-with" "$OUT/mcp-codegraph.json"
|
|
54
|
+
headless "headless-without" "$OUT/mcp-empty.json"
|
|
55
|
+
fi
|
|
56
|
+
|
|
57
|
+
if [ "$MODE" = tmux ] || [ "$MODE" = all ]; then
|
|
58
|
+
echo "############################## INTERACTIVE [with] ##############################"
|
|
59
|
+
CLAUDE_EXTRA_ARGS="--model opus --strict-mcp-config --mcp-config $OUT/mcp-codegraph.json" \
|
|
60
|
+
bash "$HARNESS/itrun.sh" "$REPO" "int-with" "$Q" 2>&1 || echo "[itrun WITH failed]"
|
|
61
|
+
echo
|
|
62
|
+
echo "############################## INTERACTIVE [without] ##############################"
|
|
63
|
+
CLAUDE_EXTRA_ARGS="--model opus --strict-mcp-config --mcp-config $OUT/mcp-empty.json" \
|
|
64
|
+
bash "$HARNESS/itrun.sh" "$REPO" "int-without" "$Q" 2>&1 || echo "[itrun WITHOUT failed]"
|
|
65
|
+
echo
|
|
66
|
+
fi
|
|
67
|
+
echo "############################## RUN-ALL COMPLETE ##############################"
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Extract a release-notes block from CHANGELOG.md for a given version
|
|
4
|
+
* (or unwrap text supplied on stdin), then join hard-wrapped paragraphs.
|
|
5
|
+
*
|
|
6
|
+
* Why: GitHub renders release-note Markdown with GFM hard breaks, so
|
|
7
|
+
* every `\n` becomes `<br>`. The CHANGELOG is hard-wrapped at ~75
|
|
8
|
+
* chars for readable diffs, which then renders as awkward visible
|
|
9
|
+
* line breaks on the release page. This script joins indented
|
|
10
|
+
* continuation lines into a single line per bullet so the GFM
|
|
11
|
+
* renderer produces clean paragraphs.
|
|
12
|
+
*
|
|
13
|
+
* Repo-level CHANGELOG.md viewing is unaffected (CommonMark treats
|
|
14
|
+
* newlines as spaces there).
|
|
15
|
+
*
|
|
16
|
+
* Usage:
|
|
17
|
+
* extract-release-notes.mjs <version> # read CHANGELOG.md
|
|
18
|
+
* extract-release-notes.mjs --stdin # read from stdin (any text)
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { readFileSync } from 'fs';
|
|
22
|
+
|
|
23
|
+
const arg = process.argv[2];
|
|
24
|
+
if (!arg) {
|
|
25
|
+
console.error('usage: extract-release-notes.mjs <version> | --stdin');
|
|
26
|
+
process.exit(1);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
let block;
|
|
30
|
+
if (arg === '--stdin') {
|
|
31
|
+
block = readFileSync(0, 'utf8').replace(/\r\n?/g, '\n').split('\n');
|
|
32
|
+
} else {
|
|
33
|
+
const version = arg;
|
|
34
|
+
const escaped = version.replace(/\./g, '\\.');
|
|
35
|
+
const headerRe = new RegExp(`^## \\[${escaped}\\]`);
|
|
36
|
+
const anyHeaderRe = /^## \[/;
|
|
37
|
+
const lines = readFileSync('CHANGELOG.md', 'utf8').split('\n');
|
|
38
|
+
const start = lines.findIndex((l) => headerRe.test(l));
|
|
39
|
+
if (start === -1) {
|
|
40
|
+
console.error(`no '## [${version}]' entry found in CHANGELOG.md`);
|
|
41
|
+
process.exit(1);
|
|
42
|
+
}
|
|
43
|
+
const after = lines.findIndex((l, i) => i > start && anyHeaderRe.test(l));
|
|
44
|
+
block = lines.slice(start, after === -1 ? lines.length : after);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Track a stack of `{ indent: number }` frames so a continuation line
|
|
48
|
+
// can attach to the right ancestor. Handles the post-nested-list
|
|
49
|
+
// continuation pattern:
|
|
50
|
+
//
|
|
51
|
+
// - top-level
|
|
52
|
+
// - nested
|
|
53
|
+
// back to top-level <- 2-space indent, joins the top-level bullet
|
|
54
|
+
const out = [];
|
|
55
|
+
let buf = '';
|
|
56
|
+
let stack = [];
|
|
57
|
+
|
|
58
|
+
function flushBuf() {
|
|
59
|
+
if (buf !== '') {
|
|
60
|
+
out.push(buf);
|
|
61
|
+
buf = '';
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function leadingSpaces(s) {
|
|
66
|
+
const m = s.match(/^(\s*)/);
|
|
67
|
+
return m ? m[1].length : 0;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Bullets: `-`, `*`, `digit.` only. `+` is intentionally excluded — the
|
|
71
|
+
// CHANGELOG uses literal `+` inline (`config + instructions`) and we
|
|
72
|
+
// don't want to misread those as nested bullets.
|
|
73
|
+
const listItemRe = /^(\s*)([-*]|\d+\.)\s+/;
|
|
74
|
+
const fenceRe = /^\s*```/;
|
|
75
|
+
|
|
76
|
+
let inFence = false;
|
|
77
|
+
|
|
78
|
+
for (const line of block) {
|
|
79
|
+
// Fenced code blocks: pass through verbatim, no joining.
|
|
80
|
+
if (fenceRe.test(line)) {
|
|
81
|
+
flushBuf();
|
|
82
|
+
stack = [];
|
|
83
|
+
out.push(line);
|
|
84
|
+
inFence = !inFence;
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
if (inFence) {
|
|
88
|
+
out.push(line);
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
if (/^\s*$/.test(line)) {
|
|
92
|
+
flushBuf();
|
|
93
|
+
out.push('');
|
|
94
|
+
continue;
|
|
95
|
+
}
|
|
96
|
+
if (/^#/.test(line)) {
|
|
97
|
+
flushBuf();
|
|
98
|
+
stack = [];
|
|
99
|
+
out.push(line);
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
const itemMatch = line.match(listItemRe);
|
|
103
|
+
if (itemMatch) {
|
|
104
|
+
flushBuf();
|
|
105
|
+
const indent = itemMatch[1].length;
|
|
106
|
+
while (stack.length > 0 && stack[stack.length - 1].indent >= indent) {
|
|
107
|
+
stack.pop();
|
|
108
|
+
}
|
|
109
|
+
stack.push({ indent });
|
|
110
|
+
buf = line;
|
|
111
|
+
continue;
|
|
112
|
+
}
|
|
113
|
+
if (/^\s/.test(line)) {
|
|
114
|
+
const indent = leadingSpaces(line);
|
|
115
|
+
while (stack.length > 1 && stack[stack.length - 1].indent >= indent) {
|
|
116
|
+
flushBuf();
|
|
117
|
+
stack.pop();
|
|
118
|
+
}
|
|
119
|
+
const trimmed = line.replace(/^\s+/, '');
|
|
120
|
+
buf = buf === '' ? trimmed : `${buf} ${trimmed}`;
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
flushBuf();
|
|
124
|
+
stack = [];
|
|
125
|
+
out.push(line);
|
|
126
|
+
}
|
|
127
|
+
flushBuf();
|
|
128
|
+
|
|
129
|
+
process.stdout.write(out.join('\n'));
|
|
130
|
+
if (!out[out.length - 1]?.endsWith('\n')) process.stdout.write('\n');
|
package/scripts/release.sh
CHANGED
|
@@ -30,13 +30,11 @@ if ! grep -q "^## \[${VERSION}\]" CHANGELOG.md; then
|
|
|
30
30
|
exit 1
|
|
31
31
|
fi
|
|
32
32
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
p
|
|
39
|
-
' CHANGELOG.md)
|
|
33
|
+
# Extract notes with paragraph unwrapping — GitHub Releases render with
|
|
34
|
+
# GFM hard-breaks, so the CHANGELOG's hard-wrapped lines would show as
|
|
35
|
+
# visible `<br>` breaks otherwise. The helper joins continuation lines
|
|
36
|
+
# into a single line per bullet.
|
|
37
|
+
NOTES=$(node scripts/extract-release-notes.mjs "${VERSION}")
|
|
40
38
|
|
|
41
39
|
if [ -z "${NOTES}" ]; then
|
|
42
40
|
echo "error: failed to extract changelog notes for ${VERSION}" >&2
|