@stupidloud/codegraph 0.9.5 → 0.9.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +252 -116
- package/dist/bin/codegraph.js +52 -82
- package/dist/bin/codegraph.js.map +1 -1
- package/dist/context/formatter.d.ts.map +1 -1
- package/dist/context/formatter.js +25 -6
- package/dist/context/formatter.js.map +1 -1
- package/dist/context/index.d.ts +22 -0
- package/dist/context/index.d.ts.map +1 -1
- package/dist/context/index.js +257 -6
- package/dist/context/index.js.map +1 -1
- package/dist/context/markers.d.ts +19 -0
- package/dist/context/markers.d.ts.map +1 -0
- package/dist/context/markers.js +22 -0
- package/dist/context/markers.js.map +1 -0
- package/dist/db/queries.d.ts +88 -0
- package/dist/db/queries.d.ts.map +1 -1
- package/dist/db/queries.js +251 -7
- package/dist/db/queries.js.map +1 -1
- package/dist/db/sqlite-adapter.d.ts +7 -0
- package/dist/db/sqlite-adapter.d.ts.map +1 -1
- package/dist/db/sqlite-adapter.js +3 -0
- package/dist/db/sqlite-adapter.js.map +1 -1
- package/dist/directory.d.ts.map +1 -1
- package/dist/directory.js +6 -20
- package/dist/directory.js.map +1 -1
- package/dist/extraction/generated-detection.d.ts +30 -0
- package/dist/extraction/generated-detection.d.ts.map +1 -0
- package/dist/extraction/generated-detection.js +80 -0
- package/dist/extraction/generated-detection.js.map +1 -0
- package/dist/extraction/grammars.d.ts +17 -1
- package/dist/extraction/grammars.d.ts.map +1 -1
- package/dist/extraction/grammars.js +65 -1
- package/dist/extraction/grammars.js.map +1 -1
- package/dist/extraction/index.d.ts +15 -2
- package/dist/extraction/index.d.ts.map +1 -1
- package/dist/extraction/index.js +206 -98
- package/dist/extraction/index.js.map +1 -1
- package/dist/extraction/languages/c-cpp.d.ts.map +1 -1
- package/dist/extraction/languages/c-cpp.js +45 -0
- package/dist/extraction/languages/c-cpp.js.map +1 -1
- package/dist/extraction/languages/csharp.d.ts.map +1 -1
- package/dist/extraction/languages/csharp.js +2 -1
- package/dist/extraction/languages/csharp.js.map +1 -1
- package/dist/extraction/languages/go.d.ts.map +1 -1
- package/dist/extraction/languages/go.js +18 -2
- package/dist/extraction/languages/go.js.map +1 -1
- package/dist/extraction/languages/index.d.ts.map +1 -1
- package/dist/extraction/languages/index.js +2 -0
- package/dist/extraction/languages/index.js.map +1 -1
- package/dist/extraction/languages/java.d.ts.map +1 -1
- package/dist/extraction/languages/java.js +6 -0
- package/dist/extraction/languages/java.js.map +1 -1
- package/dist/extraction/languages/kotlin.d.ts.map +1 -1
- package/dist/extraction/languages/kotlin.js +6 -0
- package/dist/extraction/languages/kotlin.js.map +1 -1
- package/dist/extraction/languages/objc.d.ts +3 -0
- package/dist/extraction/languages/objc.d.ts.map +1 -0
- package/dist/extraction/languages/objc.js +133 -0
- package/dist/extraction/languages/objc.js.map +1 -0
- package/dist/extraction/mybatis-extractor.d.ts +48 -0
- package/dist/extraction/mybatis-extractor.d.ts.map +1 -0
- package/dist/extraction/mybatis-extractor.js +198 -0
- package/dist/extraction/mybatis-extractor.js.map +1 -0
- package/dist/extraction/tree-sitter-types.d.ts +14 -0
- package/dist/extraction/tree-sitter-types.d.ts.map +1 -1
- package/dist/extraction/tree-sitter.d.ts +84 -0
- package/dist/extraction/tree-sitter.d.ts.map +1 -1
- package/dist/extraction/tree-sitter.js +681 -20
- package/dist/extraction/tree-sitter.js.map +1 -1
- package/dist/extraction/vue-extractor.d.ts +15 -0
- package/dist/extraction/vue-extractor.d.ts.map +1 -1
- package/dist/extraction/vue-extractor.js +88 -0
- package/dist/extraction/vue-extractor.js.map +1 -1
- package/dist/extraction/wasm-runtime-flags.d.ts.map +1 -1
- package/dist/extraction/wasm-runtime-flags.js +1 -0
- package/dist/extraction/wasm-runtime-flags.js.map +1 -1
- package/dist/graph/traversal.d.ts.map +1 -1
- package/dist/graph/traversal.js +5 -2
- package/dist/graph/traversal.js.map +1 -1
- package/dist/index.d.ts +66 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +105 -1
- package/dist/index.js.map +1 -1
- package/dist/installer/config-writer.d.ts +7 -8
- package/dist/installer/config-writer.d.ts.map +1 -1
- package/dist/installer/config-writer.js +7 -27
- package/dist/installer/config-writer.js.map +1 -1
- package/dist/installer/index.d.ts +3 -20
- package/dist/installer/index.d.ts.map +1 -1
- package/dist/installer/index.js +8 -39
- package/dist/installer/index.js.map +1 -1
- package/dist/installer/instructions-template.d.ts +11 -21
- package/dist/installer/instructions-template.d.ts.map +1 -1
- package/dist/installer/instructions-template.js +12 -56
- package/dist/installer/instructions-template.js.map +1 -1
- package/dist/installer/targets/antigravity.d.ts +57 -0
- package/dist/installer/targets/antigravity.d.ts.map +1 -0
- package/dist/installer/targets/antigravity.js +308 -0
- package/dist/installer/targets/antigravity.js.map +1 -0
- package/dist/installer/targets/claude.d.ts +10 -1
- package/dist/installer/targets/claude.d.ts.map +1 -1
- package/dist/installer/targets/claude.js +25 -40
- package/dist/installer/targets/claude.js.map +1 -1
- package/dist/installer/targets/codex.d.ts.map +1 -1
- package/dist/installer/targets/codex.js +15 -13
- package/dist/installer/targets/codex.js.map +1 -1
- package/dist/installer/targets/cursor.d.ts.map +1 -1
- package/dist/installer/targets/cursor.js +9 -38
- package/dist/installer/targets/cursor.js.map +1 -1
- package/dist/installer/targets/gemini.d.ts +26 -0
- package/dist/installer/targets/gemini.d.ts.map +1 -0
- package/dist/installer/targets/gemini.js +167 -0
- package/dist/installer/targets/gemini.js.map +1 -0
- package/dist/installer/targets/hermes.d.ts.map +1 -1
- package/dist/installer/targets/hermes.js +57 -3
- package/dist/installer/targets/hermes.js.map +1 -1
- package/dist/installer/targets/kiro.d.ts +27 -0
- package/dist/installer/targets/kiro.d.ts.map +1 -0
- package/dist/installer/targets/kiro.js +178 -0
- package/dist/installer/targets/kiro.js.map +1 -0
- package/dist/installer/targets/opencode.d.ts.map +1 -1
- package/dist/installer/targets/opencode.js +15 -13
- package/dist/installer/targets/opencode.js.map +1 -1
- package/dist/installer/targets/registry.d.ts.map +1 -1
- package/dist/installer/targets/registry.js +6 -0
- package/dist/installer/targets/registry.js.map +1 -1
- package/dist/installer/targets/shared.d.ts.map +1 -1
- package/dist/installer/targets/shared.js +3 -2
- package/dist/installer/targets/shared.js.map +1 -1
- package/dist/installer/targets/types.d.ts +1 -16
- package/dist/installer/targets/types.d.ts.map +1 -1
- package/dist/mcp/daemon-paths.d.ts +46 -0
- package/dist/mcp/daemon-paths.d.ts.map +1 -0
- package/dist/mcp/daemon-paths.js +125 -0
- package/dist/mcp/daemon-paths.js.map +1 -0
- package/dist/mcp/daemon.d.ts +161 -0
- package/dist/mcp/daemon.d.ts.map +1 -0
- package/dist/mcp/daemon.js +403 -0
- package/dist/mcp/daemon.js.map +1 -0
- package/dist/mcp/engine.d.ts +105 -0
- package/dist/mcp/engine.d.ts.map +1 -0
- package/dist/mcp/engine.js +270 -0
- package/dist/mcp/engine.js.map +1 -0
- package/dist/mcp/index.d.ts +67 -53
- package/dist/mcp/index.d.ts.map +1 -1
- package/dist/mcp/index.js +315 -388
- package/dist/mcp/index.js.map +1 -1
- package/dist/mcp/proxy.d.ts +81 -0
- package/dist/mcp/proxy.d.ts.map +1 -0
- package/dist/mcp/proxy.js +510 -0
- package/dist/mcp/proxy.js.map +1 -0
- package/dist/mcp/server-instructions.d.ts +1 -1
- package/dist/mcp/server-instructions.d.ts.map +1 -1
- package/dist/mcp/server-instructions.js +21 -21
- package/dist/mcp/session.d.ts +77 -0
- package/dist/mcp/session.d.ts.map +1 -0
- package/dist/mcp/session.js +294 -0
- package/dist/mcp/session.js.map +1 -0
- package/dist/mcp/tools.d.ts +160 -14
- package/dist/mcp/tools.d.ts.map +1 -1
- package/dist/mcp/tools.js +1622 -322
- package/dist/mcp/tools.js.map +1 -1
- package/dist/mcp/transport.d.ts +111 -29
- package/dist/mcp/transport.d.ts.map +1 -1
- package/dist/mcp/transport.js +181 -71
- package/dist/mcp/transport.js.map +1 -1
- package/dist/mcp/version.d.ts +19 -0
- package/dist/mcp/version.d.ts.map +1 -0
- package/dist/mcp/version.js +71 -0
- package/dist/mcp/version.js.map +1 -0
- package/dist/resolution/callback-synthesizer.d.ts +10 -0
- package/dist/resolution/callback-synthesizer.d.ts.map +1 -0
- package/dist/resolution/callback-synthesizer.js +1300 -0
- package/dist/resolution/callback-synthesizer.js.map +1 -0
- package/dist/resolution/frameworks/csharp.d.ts.map +1 -1
- package/dist/resolution/frameworks/csharp.js +36 -8
- package/dist/resolution/frameworks/csharp.js.map +1 -1
- package/dist/resolution/frameworks/drupal.d.ts.map +1 -1
- package/dist/resolution/frameworks/drupal.js +44 -12
- package/dist/resolution/frameworks/drupal.js.map +1 -1
- package/dist/resolution/frameworks/expo-modules.d.ts +3 -0
- package/dist/resolution/frameworks/expo-modules.d.ts.map +1 -0
- package/dist/resolution/frameworks/expo-modules.js +143 -0
- package/dist/resolution/frameworks/expo-modules.js.map +1 -0
- package/dist/resolution/frameworks/express.d.ts.map +1 -1
- package/dist/resolution/frameworks/express.js +102 -19
- package/dist/resolution/frameworks/express.js.map +1 -1
- package/dist/resolution/frameworks/fabric.d.ts +3 -0
- package/dist/resolution/frameworks/fabric.d.ts.map +1 -0
- package/dist/resolution/frameworks/fabric.js +354 -0
- package/dist/resolution/frameworks/fabric.js.map +1 -0
- package/dist/resolution/frameworks/go.d.ts.map +1 -1
- package/dist/resolution/frameworks/go.js +6 -3
- package/dist/resolution/frameworks/go.js.map +1 -1
- package/dist/resolution/frameworks/index.d.ts +5 -0
- package/dist/resolution/frameworks/index.d.ts.map +1 -1
- package/dist/resolution/frameworks/index.js +25 -1
- package/dist/resolution/frameworks/index.js.map +1 -1
- package/dist/resolution/frameworks/java.d.ts.map +1 -1
- package/dist/resolution/frameworks/java.js +339 -12
- package/dist/resolution/frameworks/java.js.map +1 -1
- package/dist/resolution/frameworks/laravel.d.ts.map +1 -1
- package/dist/resolution/frameworks/laravel.js +17 -8
- package/dist/resolution/frameworks/laravel.js.map +1 -1
- package/dist/resolution/frameworks/nestjs.d.ts.map +1 -1
- package/dist/resolution/frameworks/nestjs.js +324 -0
- package/dist/resolution/frameworks/nestjs.js.map +1 -1
- package/dist/resolution/frameworks/play.d.ts +19 -0
- package/dist/resolution/frameworks/play.d.ts.map +1 -0
- package/dist/resolution/frameworks/play.js +111 -0
- package/dist/resolution/frameworks/play.js.map +1 -0
- package/dist/resolution/frameworks/python.d.ts.map +1 -1
- package/dist/resolution/frameworks/python.js +134 -16
- package/dist/resolution/frameworks/python.js.map +1 -1
- package/dist/resolution/frameworks/react-native.d.ts +3 -0
- package/dist/resolution/frameworks/react-native.d.ts.map +1 -0
- package/dist/resolution/frameworks/react-native.js +360 -0
- package/dist/resolution/frameworks/react-native.js.map +1 -0
- package/dist/resolution/frameworks/react.d.ts.map +1 -1
- package/dist/resolution/frameworks/react.js +96 -3
- package/dist/resolution/frameworks/react.js.map +1 -1
- package/dist/resolution/frameworks/ruby.d.ts.map +1 -1
- package/dist/resolution/frameworks/ruby.js +106 -2
- package/dist/resolution/frameworks/ruby.js.map +1 -1
- package/dist/resolution/frameworks/rust.d.ts.map +1 -1
- package/dist/resolution/frameworks/rust.js +102 -5
- package/dist/resolution/frameworks/rust.js.map +1 -1
- package/dist/resolution/frameworks/swift-objc.d.ts +37 -0
- package/dist/resolution/frameworks/swift-objc.d.ts.map +1 -0
- package/dist/resolution/frameworks/swift-objc.js +252 -0
- package/dist/resolution/frameworks/swift-objc.js.map +1 -0
- package/dist/resolution/frameworks/swift.d.ts.map +1 -1
- package/dist/resolution/frameworks/swift.js +30 -6
- package/dist/resolution/frameworks/swift.js.map +1 -1
- package/dist/resolution/go-module.d.ts +26 -0
- package/dist/resolution/go-module.d.ts.map +1 -0
- package/dist/resolution/go-module.js +78 -0
- package/dist/resolution/go-module.js.map +1 -0
- package/dist/resolution/import-resolver.d.ts +28 -0
- package/dist/resolution/import-resolver.d.ts.map +1 -1
- package/dist/resolution/import-resolver.js +617 -5
- package/dist/resolution/import-resolver.js.map +1 -1
- package/dist/resolution/index.d.ts +11 -0
- package/dist/resolution/index.d.ts.map +1 -1
- package/dist/resolution/index.js +156 -3
- package/dist/resolution/index.js.map +1 -1
- package/dist/resolution/name-matcher.d.ts.map +1 -1
- package/dist/resolution/name-matcher.js +212 -0
- package/dist/resolution/name-matcher.js.map +1 -1
- package/dist/resolution/swift-objc-bridge.d.ts +134 -0
- package/dist/resolution/swift-objc-bridge.d.ts.map +1 -0
- package/dist/resolution/swift-objc-bridge.js +256 -0
- package/dist/resolution/swift-objc-bridge.js.map +1 -0
- package/dist/resolution/types.d.ts +44 -0
- package/dist/resolution/types.d.ts.map +1 -1
- package/dist/resolution/workspace-packages.d.ts +48 -0
- package/dist/resolution/workspace-packages.d.ts.map +1 -0
- package/dist/resolution/workspace-packages.js +208 -0
- package/dist/resolution/workspace-packages.js.map +1 -0
- package/dist/search/query-utils.d.ts +18 -0
- package/dist/search/query-utils.d.ts.map +1 -1
- package/dist/search/query-utils.js +30 -0
- package/dist/search/query-utils.js.map +1 -1
- package/dist/sync/git-hooks.d.ts.map +1 -1
- package/dist/sync/git-hooks.js +2 -0
- package/dist/sync/git-hooks.js.map +1 -1
- package/dist/sync/index.d.ts +3 -1
- package/dist/sync/index.d.ts.map +1 -1
- package/dist/sync/index.js +8 -1
- package/dist/sync/index.js.map +1 -1
- package/dist/sync/watcher.d.ts +212 -8
- package/dist/sync/watcher.d.ts.map +1 -1
- package/dist/sync/watcher.js +465 -51
- package/dist/sync/watcher.js.map +1 -1
- package/dist/sync/worktree.d.ts +54 -0
- package/dist/sync/worktree.d.ts.map +1 -0
- package/dist/sync/worktree.js +137 -0
- package/dist/sync/worktree.js.map +1 -0
- package/dist/types.d.ts +9 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
- package/scripts/agent-eval/arms-F.sh +21 -0
- package/scripts/agent-eval/arms-matrix.sh +37 -0
- package/scripts/agent-eval/bench-readme.sh +28 -0
- package/scripts/agent-eval/bench-why-repo.sh +22 -0
- package/scripts/agent-eval/block-read-hook.sh +19 -0
- package/scripts/agent-eval/hook-settings.json +15 -0
- package/scripts/agent-eval/itrun.sh +24 -11
- package/scripts/agent-eval/parse-arms.mjs +116 -0
- package/scripts/agent-eval/parse-bench-readme.mjs +84 -0
- package/scripts/agent-eval/probe-context.mjs +21 -0
- package/scripts/agent-eval/probe-explore.mjs +40 -0
- package/scripts/agent-eval/probe-node.mjs +20 -0
- package/scripts/agent-eval/probe-sweep.mjs +119 -0
- package/scripts/agent-eval/probe-trace.mjs +20 -0
- package/scripts/agent-eval/run-arms.sh +56 -0
- package/scripts/agent-eval/seq-matrix.mjs +137 -0
- package/scripts/npm-sdk.js +75 -0
- package/scripts/pack-npm.sh +25 -1
- package/scripts/prepare-release.mjs +270 -0
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Aggregate the README A/B (bench-readme.sh output): per repo, median of N runs
|
|
3
|
+
// per arm → time, tool calls, tokens, cost, and % saved. Plus an average row.
|
|
4
|
+
//
|
|
5
|
+
// Tokens = SUM of per-turn assistant `usage` (input + output + cache read +
|
|
6
|
+
// cache creation) — the cumulative "total tokens processed". NOTE: `result.usage`
|
|
7
|
+
// is last-turn-only in current Claude Code, so it under-counts badly; don't use it.
|
|
8
|
+
// `total_cost_usd` and `duration_ms` are already cumulative.
|
|
9
|
+
//
|
|
10
|
+
// Usage: node parse-bench-readme.mjs [/tmp/ab-readme]
|
|
11
|
+
import { readFileSync, existsSync, readdirSync } from 'fs';
|
|
12
|
+
import { join } from 'path';
|
|
13
|
+
const ROOT = process.argv[2] || '/tmp/ab-readme';
|
|
14
|
+
const REPOS = ['vscode', 'excalidraw', 'django', 'tokio', 'okhttp', 'gin', 'alamofire'];
|
|
15
|
+
|
|
16
|
+
function parse(file) {
|
|
17
|
+
if (!existsSync(file)) return null;
|
|
18
|
+
const L = readFileSync(file, 'utf8').split('\n').filter(Boolean);
|
|
19
|
+
let tools = 0, reads = 0, grep = 0, cg = 0, tokens = 0, r = null, raced = false;
|
|
20
|
+
for (const l of L) { let e; try { e = JSON.parse(l); } catch { continue; }
|
|
21
|
+
if (e.type === 'assistant') {
|
|
22
|
+
const u = e.message?.usage;
|
|
23
|
+
if (u) tokens += (u.input_tokens || 0) + (u.output_tokens || 0) + (u.cache_read_input_tokens || 0) + (u.cache_creation_input_tokens || 0);
|
|
24
|
+
for (const b of (e.message?.content || [])) if (b.type === 'tool_use') {
|
|
25
|
+
const n = b.name;
|
|
26
|
+
if (n === 'ToolSearch') continue;
|
|
27
|
+
tools++;
|
|
28
|
+
if (n === 'Read') reads++;
|
|
29
|
+
else if (n === 'Grep' || n === 'Glob') grep++;
|
|
30
|
+
else if (/codegraph/.test(n)) cg++;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
// MCP cold-start race: the headless agent fired before `codegraph serve --mcp`
|
|
34
|
+
// finished registering its tools, so early calls returned "No such tool
|
|
35
|
+
// available" and the agent floundered into grep/Read. That measures CodeGraph's
|
|
36
|
+
// startup latency, NOT its steady-state value — flag the run so the aggregate
|
|
37
|
+
// can exclude it (an artifact of headless first-turn timing, not the tool).
|
|
38
|
+
if (e.type === 'user') for (const b of (Array.isArray(e.message?.content) ? e.message.content : [])) {
|
|
39
|
+
if (b.type === 'tool_result') {
|
|
40
|
+
const t = Array.isArray(b.content) ? b.content.map(c => c.text || '').join('') : (b.content || '');
|
|
41
|
+
if (/No such tool available/.test(t)) raced = true;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
if (e.type === 'result') r = e;
|
|
45
|
+
}
|
|
46
|
+
if (!r || r.subtype !== 'success') return null;
|
|
47
|
+
return { dur: r.duration_ms / 1000, tools, reads, grep, cg, tokens, cost: r.total_cost_usd || 0, raced };
|
|
48
|
+
}
|
|
49
|
+
const median = (arr) => { const v = [...arr].sort((a, b) => a - b); const n = v.length; return n === 0 ? 0 : n % 2 ? v[(n - 1) / 2] : (v[n / 2 - 1] + v[n / 2]) / 2; };
|
|
50
|
+
const fmtTime = (s) => s >= 60 ? `${Math.floor(s / 60)}m ${Math.round(s % 60)}s` : `${Math.round(s)}s`;
|
|
51
|
+
const fmtTok = (t) => t >= 1e6 ? `${(t / 1e6).toFixed(1)}M` : `${Math.round(t / 1000)}k`;
|
|
52
|
+
const pct = (w, wo) => wo > 0 ? Math.round((1 - w / wo) * 100) : 0;
|
|
53
|
+
|
|
54
|
+
console.log('repo n(w/wo) time WITH→WITHOUT tools W→WO tokens W→WO (saved) cost W→WO (saved)');
|
|
55
|
+
const savings = { cost: [], tokens: [], time: [], tools: [] };
|
|
56
|
+
for (const repo of REPOS) {
|
|
57
|
+
const dir = join(ROOT, repo);
|
|
58
|
+
const runDirs = existsSync(dir) ? readdirSync(dir).filter(d => /^run\d+$/.test(d)) : [];
|
|
59
|
+
// Exclude MCP-cold-start-raced WITH runs by default — they measure a startup
|
|
60
|
+
// race, not steady-state value. `CG_INCLUDE_RACED=1` keeps them (to see the raw
|
|
61
|
+
// distribution). The WITHOUT arm has no MCP, so it's never raced.
|
|
62
|
+
const includeRaced = process.env.CG_INCLUDE_RACED === '1';
|
|
63
|
+
const W = [], WO = []; let racedExcluded = 0;
|
|
64
|
+
for (const rd of runDirs) {
|
|
65
|
+
const w = parse(join(dir, rd, 'run-headless-with.jsonl'));
|
|
66
|
+
if (w) { if (w.raced && !includeRaced) racedExcluded++; else W.push(w); }
|
|
67
|
+
const wo = parse(join(dir, rd, 'run-headless-without.jsonl')); if (wo) WO.push(wo);
|
|
68
|
+
}
|
|
69
|
+
if (!W.length || !WO.length) { console.log(`${repo.padEnd(11)} (incomplete: w=${W.length} wo=${WO.length})`); continue; }
|
|
70
|
+
const m = (arr, k) => median(arr.map(x => x[k]));
|
|
71
|
+
const wT = m(W, 'dur'), woT = m(WO, 'dur'), wTok = m(W, 'tokens'), woTok = m(WO, 'tokens');
|
|
72
|
+
const wC = m(W, 'cost'), woC = m(WO, 'cost'), wTl = m(W, 'tools'), woTl = m(WO, 'tools');
|
|
73
|
+
savings.time.push(pct(wT, woT)); savings.tokens.push(pct(wTok, woTok)); savings.cost.push(pct(wC, woC)); savings.tools.push(pct(wTl, woTl));
|
|
74
|
+
console.log(
|
|
75
|
+
`${repo.padEnd(11)} ${W.length}/${WO.length} ` +
|
|
76
|
+
`${(fmtTime(wT) + '→' + fmtTime(woT)).padEnd(22)}` +
|
|
77
|
+
`${(Math.round(wTl) + '→' + Math.round(woTl)).padEnd(12)}` +
|
|
78
|
+
`${(fmtTok(wTok) + '→' + fmtTok(woTok) + ' (' + pct(wTok, woTok) + '%)').padEnd(24)}` +
|
|
79
|
+
`$${wC.toFixed(2)}→$${woC.toFixed(2)} (${pct(wC, woC)}%)` +
|
|
80
|
+
(racedExcluded ? ` [${racedExcluded} raced run${racedExcluded === 1 ? '' : 's'} excluded]` : '')
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
const avg = (a) => a.length ? Math.round(a.reduce((s, x) => s + x, 0) / a.length) : 0;
|
|
84
|
+
console.log(`\nAVERAGE saved: cost ${avg(savings.cost)}% · tokens ${avg(savings.tokens)}% · time ${avg(savings.time)}% · tool calls ${avg(savings.tools)}%`);
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Probe codegraph_context (with call-paths) against an index using the built dist.
|
|
3
|
+
// Usage: node probe-context.mjs <repo-with-.codegraph> <task words...>
|
|
4
|
+
import { pathToFileURL } from 'node:url';
|
|
5
|
+
import { resolve } from 'node:path';
|
|
6
|
+
|
|
7
|
+
const [, , repo, ...taskParts] = process.argv;
|
|
8
|
+
const task = taskParts.join(' ');
|
|
9
|
+
if (!repo || !task) { console.error('usage: probe-context.mjs <repo> <task...>'); process.exit(1); }
|
|
10
|
+
|
|
11
|
+
const load = async (rel) => import(pathToFileURL(resolve(rel)).href);
|
|
12
|
+
const idx = await load('dist/index.js');
|
|
13
|
+
const tools = await load('dist/mcp/tools.js');
|
|
14
|
+
const CodeGraph = idx.default?.default ?? idx.default ?? idx.CodeGraph;
|
|
15
|
+
const ToolHandler = tools.ToolHandler ?? tools.default?.ToolHandler;
|
|
16
|
+
|
|
17
|
+
const cg = CodeGraph.openSync(repo);
|
|
18
|
+
const h = new ToolHandler(cg);
|
|
19
|
+
const res = await h.execute('codegraph_context', { task });
|
|
20
|
+
console.log(res.content?.[0]?.text ?? '(no text)');
|
|
21
|
+
try { cg.close?.(); } catch {}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// One-shot probe: run handleExplore against an existing index using the built
|
|
3
|
+
// dist, print the output + a few stats. Lets us verify explore's coverage fix
|
|
4
|
+
// without a full agent run. Usage: node probe-explore.mjs <repo-with-.codegraph> "<query>"
|
|
5
|
+
import { pathToFileURL } from 'node:url';
|
|
6
|
+
import { resolve } from 'node:path';
|
|
7
|
+
|
|
8
|
+
const [, , repo, query] = process.argv;
|
|
9
|
+
if (!repo || !query) {
|
|
10
|
+
console.error('usage: probe-explore.mjs <repo> "<query>"');
|
|
11
|
+
process.exit(1);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const load = async (rel) => import(pathToFileURL(resolve(rel)).href);
|
|
15
|
+
const idx = await load('dist/index.js');
|
|
16
|
+
const tools = await load('dist/mcp/tools.js');
|
|
17
|
+
|
|
18
|
+
// esModuleInterop: dynamic import of CJS yields { default: module.exports, ...named }
|
|
19
|
+
const CodeGraph = idx.default?.default ?? idx.default ?? idx.CodeGraph;
|
|
20
|
+
const ToolHandler = tools.ToolHandler ?? tools.default?.ToolHandler;
|
|
21
|
+
|
|
22
|
+
if (typeof CodeGraph?.openSync !== 'function') {
|
|
23
|
+
console.error('could not resolve CodeGraph.openSync; index keys:', Object.keys(idx), 'default keys:', idx.default && Object.keys(idx.default));
|
|
24
|
+
process.exit(2);
|
|
25
|
+
}
|
|
26
|
+
if (typeof ToolHandler !== 'function') {
|
|
27
|
+
console.error('could not resolve ToolHandler; tools keys:', Object.keys(tools));
|
|
28
|
+
process.exit(2);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const cg = CodeGraph.openSync(repo);
|
|
32
|
+
const h = new ToolHandler(cg);
|
|
33
|
+
const res = await h.execute('codegraph_explore', { query });
|
|
34
|
+
const text = res.content?.[0]?.text ?? '(no text)';
|
|
35
|
+
console.log(text);
|
|
36
|
+
console.error('\n--- PROBE STATS ---');
|
|
37
|
+
console.error('output chars:', text.length);
|
|
38
|
+
console.error('triggerRender body present (-> setState({})):', /triggerRender[\s\S]{0,400}setState\(\{\}\)/.test(text));
|
|
39
|
+
console.error('App.tsx in source section:', /#### .*App\.tsx —/.test(text));
|
|
40
|
+
try { cg.close?.(); } catch {}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Probe codegraph_node (with trail) against an index using the built dist.
|
|
3
|
+
// Usage: node probe-node.mjs <repo-with-.codegraph> <symbol> [code]
|
|
4
|
+
import { pathToFileURL } from 'node:url';
|
|
5
|
+
import { resolve } from 'node:path';
|
|
6
|
+
|
|
7
|
+
const [, , repo, symbol, code] = process.argv;
|
|
8
|
+
if (!repo || !symbol) { console.error('usage: probe-node.mjs <repo> <symbol> [code]'); process.exit(1); }
|
|
9
|
+
|
|
10
|
+
const load = async (rel) => import(pathToFileURL(resolve(rel)).href);
|
|
11
|
+
const idx = await load('dist/index.js');
|
|
12
|
+
const tools = await load('dist/mcp/tools.js');
|
|
13
|
+
const CodeGraph = idx.default?.default ?? idx.default ?? idx.CodeGraph;
|
|
14
|
+
const ToolHandler = tools.ToolHandler ?? tools.default?.ToolHandler;
|
|
15
|
+
|
|
16
|
+
const cg = CodeGraph.openSync(repo);
|
|
17
|
+
const h = new ToolHandler(cg);
|
|
18
|
+
const res = await h.execute('codegraph_node', { symbol, includeCode: code === 'code' });
|
|
19
|
+
console.log(res.content?.[0]?.text ?? '(no text)');
|
|
20
|
+
try { cg.close?.(); } catch {}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// probe-sweep — direct MCP test across N repos × N tools, no claude needed.
|
|
3
|
+
//
|
|
4
|
+
// Measures response characteristics (size, sections present, signals fired)
|
|
5
|
+
// for each (repo, query) pair against the built dist/. Sub-second per probe;
|
|
6
|
+
// the full sweep below runs in ~10-30s vs hours for a real claude audit.
|
|
7
|
+
//
|
|
8
|
+
// Use this to iterate on backend changes rapidly: change tools.ts /
|
|
9
|
+
// context-builder, npm run build, re-run probe-sweep, compare. Once a
|
|
10
|
+
// change looks good on probe metrics, run a focused claude audit for the
|
|
11
|
+
// few repos that matter to confirm end-to-end cost behavior.
|
|
12
|
+
//
|
|
13
|
+
// Usage: node scripts/agent-eval/probe-sweep.mjs [--tool=context|explore|trace] [--repos=a,b,c]
|
|
14
|
+
import { pathToFileURL } from 'node:url';
|
|
15
|
+
import { resolve } from 'node:path';
|
|
16
|
+
|
|
17
|
+
const args = Object.fromEntries(
|
|
18
|
+
process.argv.slice(2).map(a => a.startsWith('--') ? a.slice(2).split('=') : [a, true])
|
|
19
|
+
);
|
|
20
|
+
const TOOL = args.tool ?? 'context';
|
|
21
|
+
|
|
22
|
+
const load = (rel) => import(pathToFileURL(resolve(rel)).href);
|
|
23
|
+
const idx = await load('dist/index.js');
|
|
24
|
+
const tools = await load('dist/mcp/tools.js');
|
|
25
|
+
const CodeGraph = idx.default?.default ?? idx.default ?? idx.CodeGraph;
|
|
26
|
+
const ToolHandler = tools.ToolHandler ?? tools.default?.ToolHandler;
|
|
27
|
+
|
|
28
|
+
// Each entry: repo, query, optional 2nd arg for trace (from, to).
|
|
29
|
+
// The query is the same prompt used in the real claude audits, so probe
|
|
30
|
+
// output is directly comparable to the agent's would-be input.
|
|
31
|
+
const SWEEP = [
|
|
32
|
+
// Small realworld template repos (the loss cases from the cross-language sweep)
|
|
33
|
+
{ id: 'gin-rw', repo: '/tmp/codegraph-corpus/gin-realworld', q: 'How does this Gin app route a request through its middleware chain to a handler?' },
|
|
34
|
+
{ id: 'go-mux', repo: '/tmp/codegraph-corpus/go-mux', q: 'How does this gorilla/mux app route a request to its handler?' },
|
|
35
|
+
{ id: 'fastapi-rw', repo: '/tmp/codegraph-corpus/fastapi-realworld', q: 'How does FastAPI route a request through its dependencies to a handler?' },
|
|
36
|
+
{ id: 'spring-pc', repo: '/tmp/codegraph-corpus/spring-petclinic', q: 'How does Spring route an HTTP request to a controller method?' },
|
|
37
|
+
{ id: 'axum-rw', repo: '/tmp/codegraph-corpus/rust-axum-realworld', q: 'How does Axum route a request to its handler in this app?' },
|
|
38
|
+
{ id: 'express-rw', repo: '/tmp/codegraph-corpus/express-realworld', q: 'How does this Express app route a request through middleware to a handler?' },
|
|
39
|
+
{ id: 'kotlin-pc', repo: '/tmp/codegraph-corpus/kotlin-petclinic', q: 'How does the Kotlin Spring app route an HTTP request to its handler?' },
|
|
40
|
+
{ id: 'flask-mb', repo: '/tmp/codegraph-corpus/flask-microblog', q: 'How does this Flask app route a request to a view function?' },
|
|
41
|
+
{ id: 'vapor-tpl', repo: '/tmp/codegraph-corpus/vapor-template', q: 'How does Vapor route an HTTP request to its handler?' },
|
|
42
|
+
{ id: 'cpp-leveldb', repo: '/tmp/codegraph-corpus/cpp-leveldb', q: 'How does LevelDB handle a Put operation through to disk?' },
|
|
43
|
+
{ id: 'lualine', repo: '/tmp/codegraph-corpus/lualine.nvim', q: 'How does lualine assemble and render the statusline?' },
|
|
44
|
+
{ id: 'drupal-admin', repo: '/tmp/codegraph-corpus/drupal-admintoolbar', q: 'How does the Drupal admin toolbar module render its toolbar?' },
|
|
45
|
+
{ id: 'svelte-rw', repo: '/tmp/codegraph-corpus/svelte-realworld', q: 'How does this SvelteKit app route a request to a handler?' },
|
|
46
|
+
{ id: 'react-rw', repo: '/tmp/codegraph-corpus/react-realworld', q: 'How does this React app fetch and display articles?' },
|
|
47
|
+
{ id: 'rails-rw', repo: '/tmp/codegraph-corpus/rails-realworld', q: 'How does Rails route a request to a controller action?' },
|
|
48
|
+
{ id: 'flask-rest', repo: '/tmp/codegraph-corpus/flask-restful-realworld', q: 'How does Flask-RESTful route a request to a resource method?' },
|
|
49
|
+
{ id: 'laravel-rw', repo: '/tmp/codegraph-corpus/laravel-realworld', q: 'How does Laravel route a request to the controller method?' },
|
|
50
|
+
{ id: 'aspnet-rw', repo: '/tmp/codegraph-corpus/aspnet-realworld', q: 'How does ASP.NET route a request to the controller action?' },
|
|
51
|
+
// The iter7 wins/ties (to make sure we don't regress)
|
|
52
|
+
{ id: 'cobra', repo: '/tmp/codegraph-corpus/cobra', q: 'How does cobra parse commands and flags?' },
|
|
53
|
+
{ id: 'sinatra', repo: '/tmp/codegraph-corpus/sinatra', q: 'How does sinatra route a request to its handler?' },
|
|
54
|
+
{ id: 'slim', repo: '/tmp/codegraph-corpus/slim', q: 'How does slim route a request and apply middleware?' },
|
|
55
|
+
];
|
|
56
|
+
|
|
57
|
+
// Detect signals in response text — these are the levers we've added that
|
|
58
|
+
// otherwise only show up via "agent ran X more tool calls" downstream.
|
|
59
|
+
const detect = (text) => ({
|
|
60
|
+
hasEntryPoints: /^### Entry Points/m.test(text),
|
|
61
|
+
hasRelatedSymbols: /^### Related Symbols/m.test(text),
|
|
62
|
+
hasFlowTrace: /^## Inline flow trace/m.test(text),
|
|
63
|
+
hasRouteManifest: /^## Routing manifest/m.test(text),
|
|
64
|
+
hasTopHandler: /^### Top handler file/m.test(text),
|
|
65
|
+
hasSmallRepoTail: /This project is small/.test(text),
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
const filterRepos = args.repos ? new Set(String(args.repos).split(',')) : null;
|
|
69
|
+
const subjects = SWEEP.filter(s => !filterRepos || filterRepos.has(s.id));
|
|
70
|
+
|
|
71
|
+
const t0 = Date.now();
|
|
72
|
+
const rows = [];
|
|
73
|
+
for (const s of subjects) {
|
|
74
|
+
try {
|
|
75
|
+
const cg = CodeGraph.openSync(s.repo);
|
|
76
|
+
const handler = new ToolHandler(cg);
|
|
77
|
+
const t1 = Date.now();
|
|
78
|
+
const res = await handler.execute('codegraph_' + TOOL,
|
|
79
|
+
TOOL === 'context' ? { task: s.q } :
|
|
80
|
+
TOOL === 'explore' ? { query: s.q } : { from: 'main', to: 'main' });
|
|
81
|
+
const text = res.content?.[0]?.text ?? '';
|
|
82
|
+
const signals = detect(text);
|
|
83
|
+
rows.push({
|
|
84
|
+
id: s.id,
|
|
85
|
+
ms: Date.now() - t1,
|
|
86
|
+
chars: text.length,
|
|
87
|
+
lines: text.split('\n').length,
|
|
88
|
+
...signals,
|
|
89
|
+
});
|
|
90
|
+
try { cg.close?.(); } catch {}
|
|
91
|
+
} catch (e) {
|
|
92
|
+
rows.push({ id: s.id, error: String(e).slice(0, 80) });
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Pretty-print as a compact table.
|
|
97
|
+
const fmt = (r) =>
|
|
98
|
+
r.error
|
|
99
|
+
? ` ${r.id.padEnd(13)} ERROR: ${r.error}`
|
|
100
|
+
: ` ${r.id.padEnd(13)} ${String(r.chars).padStart(6)}c ${String(r.lines).padStart(4)}L ${String(r.ms).padStart(4)}ms` +
|
|
101
|
+
` ${r.hasEntryPoints ? 'EP ' : ' '}` +
|
|
102
|
+
`${r.hasFlowTrace ? 'TRC ' : ' '}` +
|
|
103
|
+
`${r.hasRouteManifest ? 'MAN ' : ' '}` +
|
|
104
|
+
`${r.hasTopHandler ? 'HND ' : ' '}` +
|
|
105
|
+
`${r.hasSmallRepoTail ? 'TAIL' : ' '}`;
|
|
106
|
+
console.log(`=== probe-sweep tool=${TOOL} n=${subjects.length} (${Date.now() - t0}ms total) ===`);
|
|
107
|
+
console.log(' id chars lines ms signals');
|
|
108
|
+
console.log(' ' + '-'.repeat(56));
|
|
109
|
+
for (const r of rows) console.log(fmt(r));
|
|
110
|
+
|
|
111
|
+
// Sum + medians for the size pillar
|
|
112
|
+
const sizes = rows.filter(r => !r.error).map(r => r.chars);
|
|
113
|
+
sizes.sort((a, b) => a - b);
|
|
114
|
+
const median = sizes[Math.floor(sizes.length / 2)];
|
|
115
|
+
const sum = sizes.reduce((a, b) => a + b, 0);
|
|
116
|
+
console.log(` ${'-'.repeat(64)}`);
|
|
117
|
+
console.log(` median=${median}c total=${sum}c ` +
|
|
118
|
+
`manifest=${rows.filter(r => r.hasRouteManifest).length}/${rows.filter(r => !r.error).length} ` +
|
|
119
|
+
`top-handler=${rows.filter(r => r.hasTopHandler).length}/${rows.filter(r => !r.error).length}`);
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Probe codegraph_trace against an index using the built dist.
|
|
3
|
+
// Usage: node probe-trace.mjs <repo-with-.codegraph> <from> <to>
|
|
4
|
+
import { pathToFileURL } from 'node:url';
|
|
5
|
+
import { resolve } from 'node:path';
|
|
6
|
+
|
|
7
|
+
const [, , repo, from, to] = process.argv;
|
|
8
|
+
if (!repo || !from || !to) { console.error('usage: probe-trace.mjs <repo> <from> <to>'); process.exit(1); }
|
|
9
|
+
|
|
10
|
+
const load = async (rel) => import(pathToFileURL(resolve(rel)).href);
|
|
11
|
+
const idx = await load('dist/index.js');
|
|
12
|
+
const tools = await load('dist/mcp/tools.js');
|
|
13
|
+
const CodeGraph = idx.default?.default ?? idx.default ?? idx.CodeGraph;
|
|
14
|
+
const ToolHandler = tools.ToolHandler ?? tools.default?.ToolHandler;
|
|
15
|
+
|
|
16
|
+
const cg = CodeGraph.openSync(repo);
|
|
17
|
+
const h = new ToolHandler(cg);
|
|
18
|
+
const res = await h.execute('codegraph_trace', { from, to });
|
|
19
|
+
console.log(res.content?.[0]?.text ?? '(no text)');
|
|
20
|
+
try { cg.close?.(); } catch {}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Tool-surface ablation — run ONE repo+question under ONE arm.
|
|
3
|
+
#
|
|
4
|
+
# Arms vary (exposed codegraph tools, trace-first steering). Tools are trimmed
|
|
5
|
+
# SERVER-SIDE via CODEGRAPH_MCP_TOOLS in the MCP config's `env` block, so an
|
|
6
|
+
# ablated tool is genuinely absent from ListTools — no deferred-ToolSearch or
|
|
7
|
+
# denied-call confound (which --disallowedTools would introduce). Steering is
|
|
8
|
+
# injected with --append-system-prompt, so no rebuild of the shipped
|
|
9
|
+
# server-instructions is needed to A/B it.
|
|
10
|
+
#
|
|
11
|
+
# A control all tools no steering
|
|
12
|
+
# B steer all tools trace-first
|
|
13
|
+
# C no-explore hide explore trace-first
|
|
14
|
+
# D trace-centric hide explore+context trace-first
|
|
15
|
+
# E control-probe hide explore+context trace-first (caller passes a NON-flow Q)
|
|
16
|
+
#
|
|
17
|
+
# Usage: run-arms.sh <repo-path> "<question>" <A|B|C|D|E> [run-id]
|
|
18
|
+
set -uo pipefail
|
|
19
|
+
REPO="${1:?repo path}"; Q="${2:?question}"; ARM="${3:?arm A-E}"; RID="${4:-1}"
|
|
20
|
+
CG_BIN="${CG_BIN:-$(command -v codegraph)}"
|
|
21
|
+
OUT="${ARMS_OUT:-/tmp/arms}/$(basename "$REPO")"
|
|
22
|
+
mkdir -p "$OUT"
|
|
23
|
+
[ -n "$CG_BIN" ] || { echo "no codegraph binary (set CG_BIN)"; exit 1; }
|
|
24
|
+
[ -d "$REPO/.codegraph" ] || { echo "no .codegraph index at $REPO"; exit 1; }
|
|
25
|
+
|
|
26
|
+
STEER='Flow questions ("how does X reach/become Y", "trace the flow", request to handler, state to render): call codegraph_trace(from,to) FIRST — one call returns the whole path. Use codegraph_context/search only to locate the two endpoint symbols if you do not know them. Do NOT reconstruct the path with repeated search/callers/explore.'
|
|
27
|
+
KEEP_NO_EXPLORE="trace,search,node,context,callers,callees,impact,files,status"
|
|
28
|
+
KEEP_TRACE_CENTRIC="trace,search,node,callers,callees,impact,files,status"
|
|
29
|
+
|
|
30
|
+
case "$ARM" in
|
|
31
|
+
A|G|H|I) TOOLS=""; STEERING="" ;; # no steering; H = body-trace, I = body-trace + destination callees (sufficiency)
|
|
32
|
+
B|F) TOOLS=""; STEERING="$STEER" ;; # F = B's surface, run on the body-inlining trace build
|
|
33
|
+
C) TOOLS="$KEEP_NO_EXPLORE"; STEERING="$STEER" ;;
|
|
34
|
+
D|E) TOOLS="$KEEP_TRACE_CENTRIC"; STEERING="$STEER" ;;
|
|
35
|
+
*) echo "bad arm '$ARM' (want A|B|C|D|E)"; exit 1 ;;
|
|
36
|
+
esac
|
|
37
|
+
|
|
38
|
+
CFG="$OUT/mcp-$ARM.json"
|
|
39
|
+
if [ -n "$TOOLS" ]; then
|
|
40
|
+
cat > "$CFG" <<JSON
|
|
41
|
+
{"mcpServers":{"codegraph":{"command":"$CG_BIN","args":["serve","--mcp","--path","$REPO"],"env":{"CODEGRAPH_MCP_TOOLS":"$TOOLS"}}}}
|
|
42
|
+
JSON
|
|
43
|
+
else
|
|
44
|
+
cat > "$CFG" <<JSON
|
|
45
|
+
{"mcpServers":{"codegraph":{"command":"$CG_BIN","args":["serve","--mcp","--path","$REPO"]}}}
|
|
46
|
+
JSON
|
|
47
|
+
fi
|
|
48
|
+
|
|
49
|
+
LOG="$OUT/$ARM-r$RID.jsonl"; ERR="$OUT/$ARM-r$RID.err"
|
|
50
|
+
ARGS=( -p "$Q" --output-format stream-json --verbose
|
|
51
|
+
--permission-mode bypassPermissions --model opus --max-budget-usd 4
|
|
52
|
+
--strict-mcp-config --mcp-config "$CFG" )
|
|
53
|
+
[ -n "$STEERING" ] && ARGS+=( --append-system-prompt "$STEERING" )
|
|
54
|
+
|
|
55
|
+
( cd "$REPO" && claude "${ARGS[@]}" > "$LOG" 2>"$ERR" )
|
|
56
|
+
echo "[$(basename "$REPO") $ARM r$RID] exit $? -> $LOG ($(wc -l < "$LOG" | tr -d ' ') lines)"
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Mine the surviving A/B stream-json logs (/tmp/ab-matrix/<Cell>/run-headless-*.jsonl)
|
|
3
|
+
// for what the aggregate matrix can't see: the call SEQUENCE and per-call output SIZE.
|
|
4
|
+
//
|
|
5
|
+
// Answers three questions:
|
|
6
|
+
// 1. Trace adoption — on a flow question, does the with-arm actually call codegraph_trace?
|
|
7
|
+
// 2. Payload size vs repo size — is trace path-scoped (tiny, size-independent) while
|
|
8
|
+
// explore is breadth-scoped (grows with the repo / over-returns on small repos)?
|
|
9
|
+
// 3. Round-trips — num_turns with vs without (the real wall-clock driver).
|
|
10
|
+
//
|
|
11
|
+
// Usage: node scripts/agent-eval/seq-matrix.mjs [/tmp/ab-matrix]
|
|
12
|
+
import { readFileSync, readdirSync, existsSync } from 'fs';
|
|
13
|
+
import { join } from 'path';
|
|
14
|
+
|
|
15
|
+
const AB = process.argv[2] || '/tmp/ab-matrix';
|
|
16
|
+
const MD = new URL('../../docs/benchmarks/codegraph-ab-matrix.md', import.meta.url).pathname;
|
|
17
|
+
|
|
18
|
+
// repo -> {lang,size,files} from the published matrix table
|
|
19
|
+
const repoMeta = {};
|
|
20
|
+
if (existsSync(MD)) for (const line of readFileSync(MD, 'utf8').split('\n')) {
|
|
21
|
+
const m = line.match(/^\|\s*([^|]+?)\s*\|\s*(S|M|L)\s*\|\s*`([^`]+)`\s*\|\s*(\d+)\s*\|/);
|
|
22
|
+
if (m) repoMeta[m[3]] = { lang: m[1].trim(), size: m[2], files: +m[4] };
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const cgShort = (n) => n.replace('mcp__codegraph__codegraph_', '').replace('mcp__codegraph__', '');
|
|
26
|
+
const tag = (n) => n === 'Read' ? 'R' : n === 'Grep' ? 'G' : n === 'Glob' ? 'Gl'
|
|
27
|
+
: n === 'Bash' ? 'B' : n === 'Task' ? 'Ag' : n === 'ToolSearch' ? 'TS'
|
|
28
|
+
: n.includes('codegraph') ? cgShort(n) : n;
|
|
29
|
+
|
|
30
|
+
function parse(file) {
|
|
31
|
+
if (!existsSync(file)) return null;
|
|
32
|
+
const lines = readFileSync(file, 'utf8').split('\n').filter(Boolean);
|
|
33
|
+
const calls = []; let result = null, initCg = 0;
|
|
34
|
+
for (const l of lines) {
|
|
35
|
+
let ev; try { ev = JSON.parse(l); } catch { continue; }
|
|
36
|
+
if (ev.type === 'system' && ev.subtype === 'init') initCg = (ev.tools || []).filter(t => /codegraph/.test(t)).length;
|
|
37
|
+
if (ev.type === 'assistant') for (const b of (ev.message?.content || [])) if (b.type === 'tool_use') {
|
|
38
|
+
const i = b.input || {};
|
|
39
|
+
const q = i.query ?? i.symbol ?? i.task ?? (i.from && i.to ? `${i.from}->${i.to}` : (i.file_path || i.command || ''));
|
|
40
|
+
calls.push({ id: b.id, name: b.name, q: String(q ?? '').slice(0, 38), out: 0 });
|
|
41
|
+
}
|
|
42
|
+
if (ev.type === 'user') for (const b of (ev.message?.content || [])) if (b.type === 'tool_result') {
|
|
43
|
+
const c = b.content;
|
|
44
|
+
const txt = typeof c === 'string' ? c : Array.isArray(c) ? c.map(x => x?.text || '').join('') : '';
|
|
45
|
+
const call = calls.find(k => k.id === b.tool_use_id); if (call) call.out = txt.length;
|
|
46
|
+
}
|
|
47
|
+
if (ev.type === 'result') result = ev;
|
|
48
|
+
}
|
|
49
|
+
const cg = calls.filter(c => c.name.includes('codegraph'));
|
|
50
|
+
const perTool = {};
|
|
51
|
+
for (const c of cg) { const k = cgShort(c.name); (perTool[k] ??= { n: 0, out: 0 }); perTool[k].n++; perTool[k].out += c.out; }
|
|
52
|
+
const traceIdx = cg.findIndex(c => c.name.includes('trace'));
|
|
53
|
+
const u = result?.usage || {};
|
|
54
|
+
return {
|
|
55
|
+
initCg, cg, perTool,
|
|
56
|
+
cgSeq: cg.map(c => cgShort(c.name)),
|
|
57
|
+
seq: calls.map(c => tag(c.name)),
|
|
58
|
+
reads: calls.filter(c => c.name === 'Read').length,
|
|
59
|
+
greps: calls.filter(c => c.name === 'Grep').length,
|
|
60
|
+
cgOut: cg.reduce((s, c) => s + c.out, 0),
|
|
61
|
+
traceUsed: traceIdx >= 0,
|
|
62
|
+
afterTrace: traceIdx >= 0 ? cg.slice(traceIdx + 1).map(c => cgShort(c.name)) : null,
|
|
63
|
+
turns: result?.num_turns ?? null,
|
|
64
|
+
dur: result?.duration_ms ? Math.round(result.duration_ms / 1000) : null,
|
|
65
|
+
cost: result?.total_cost_usd || 0,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const cells = [];
|
|
70
|
+
for (const d of readdirSync(AB)) {
|
|
71
|
+
const dir = join(AB, d);
|
|
72
|
+
if (!existsSync(join(dir, 'run-headless-with.jsonl'))) continue;
|
|
73
|
+
const log = existsSync(join(AB, d + '.log')) ? readFileSync(join(AB, d + '.log'), 'utf8') : '';
|
|
74
|
+
const repo = (log.match(/repo:\s*\S*\/([^\s/]+)/) || [])[1] || d;
|
|
75
|
+
const question = (log.match(/question:\s*(.+)/) || [])[1] || '';
|
|
76
|
+
cells.push({ cell: d, repo, question, ...(repoMeta[repo] || {}),
|
|
77
|
+
with: parse(join(dir, 'run-headless-with.jsonl')),
|
|
78
|
+
without: parse(join(dir, 'run-headless-without.jsonl')) });
|
|
79
|
+
}
|
|
80
|
+
cells.sort((a, b) => (a.files || 0) - (b.files || 0));
|
|
81
|
+
|
|
82
|
+
const k = (n) => (n / 1000).toFixed(1);
|
|
83
|
+
const pad = (s, n) => String(s).padEnd(n);
|
|
84
|
+
|
|
85
|
+
// ---- per-cell sequence table ----
|
|
86
|
+
console.log('\n=== PER-CELL: with-arm codegraph sequence + payload (sorted by repo size) ===');
|
|
87
|
+
console.log(pad('repo', 22), pad('files', 6), 'trace', pad('cg-call sequence', 40), pad('cgOutK', 7), 'turns(w/wo)');
|
|
88
|
+
for (const c of cells) {
|
|
89
|
+
const w = c.with;
|
|
90
|
+
console.log(
|
|
91
|
+
pad(c.repo, 22), pad(c.files ?? '?', 6),
|
|
92
|
+
pad(w.traceUsed ? 'YES' : 'no', 5),
|
|
93
|
+
pad(w.cgSeq.join(',') || '(none)', 40),
|
|
94
|
+
pad(k(w.cgOut), 7),
|
|
95
|
+
`${w.turns}/${c.without?.turns}`,
|
|
96
|
+
);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// ---- trace adoption ----
|
|
100
|
+
const flow = cells; // every matrix question is a canonical flow question by design
|
|
101
|
+
const used = flow.filter(c => c.with.traceUsed);
|
|
102
|
+
console.log(`\n=== TRACE ADOPTION (all ${flow.length} cells are flow questions) ===`);
|
|
103
|
+
console.log(`trace called in ${used.length}/${flow.length} cells`);
|
|
104
|
+
console.log('used trace:', used.map(c => c.repo).join(', ') || '(none)');
|
|
105
|
+
if (used.length) console.log('after-trace follow-ups:', used.map(c => `${c.repo}[${c.with.afterTrace.join(',') || 'none'}]`).join(' '));
|
|
106
|
+
|
|
107
|
+
// ---- payload size by repo-size tier ----
|
|
108
|
+
const tier = (f) => f < 200 ? 'S(<200)' : f < 2000 ? 'M(<2000)' : 'L(>=2000)';
|
|
109
|
+
const byTier = {};
|
|
110
|
+
for (const c of cells) { (byTier[tier(c.files || 0)] ??= []).push(c.with.cgOut); }
|
|
111
|
+
console.log('\n=== with-arm TOTAL codegraph payload by repo-size tier ===');
|
|
112
|
+
for (const t of ['S(<200)', 'M(<2000)', 'L(>=2000)']) {
|
|
113
|
+
const a = byTier[t] || []; if (!a.length) continue;
|
|
114
|
+
const avg = a.reduce((s, x) => s + x, 0) / a.length;
|
|
115
|
+
console.log(` ${pad(t, 10)} n=${a.length} avg cgOut=${k(avg)}K range ${k(Math.min(...a))}-${k(Math.max(...a))}K`);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// ---- per-tool usage + avg payload (breadth vs path evidence) ----
|
|
119
|
+
const tot = {};
|
|
120
|
+
for (const c of cells) for (const [name, v] of Object.entries(c.with.perTool)) {
|
|
121
|
+
(tot[name] ??= { n: 0, out: 0 }); tot[name].n += v.n; tot[name].out += v.out;
|
|
122
|
+
}
|
|
123
|
+
console.log('\n=== codegraph tool usage across all cells (n calls, avg payload/call) ===');
|
|
124
|
+
for (const [name, v] of Object.entries(tot).sort((a, b) => b[1].n - a[1].n)) {
|
|
125
|
+
console.log(` ${pad(name, 10)} calls=${pad(v.n, 4)} avg=${k(v.out / v.n)}K/call total=${k(v.out)}K`);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// ---- round-trips ----
|
|
129
|
+
const sum = (arr, f) => arr.reduce((s, x) => s + (f(x) || 0), 0);
|
|
130
|
+
const wTurns = sum(cells, c => c.with.turns), woTurns = sum(cells, c => c.without?.turns);
|
|
131
|
+
const wCalls = sum(cells, c => c.with.cg.length);
|
|
132
|
+
const tsAll = cells.every(c => c.with.seq[0] === 'TS');
|
|
133
|
+
console.log('\n=== ROUND-TRIPS ===');
|
|
134
|
+
console.log(`turns: with=${wTurns} without=${woTurns} (${((1 - wTurns / woTurns) * 100).toFixed(0)}% fewer with)`);
|
|
135
|
+
console.log(`avg turns/cell: with=${(wTurns / cells.length).toFixed(1)} without=${(woTurns / cells.length).toFixed(1)}`);
|
|
136
|
+
console.log(`total codegraph calls=${wCalls} (avg ${(wCalls / cells.length).toFixed(1)}/cell)`);
|
|
137
|
+
console.log(`every with-arm opens with a ToolSearch round-trip (deferred tools): ${tsAll ? 'YES — 1 fixed tax/run' : 'no'}`);
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
//
|
|
3
|
+
// Programmatic / embedded SDK entry for @colbymchenry/codegraph (issue #354).
|
|
4
|
+
//
|
|
5
|
+
// The CLI/MCP `bin` (npm-shim.js) execs the per-platform bundle's OWN Node 24 so
|
|
6
|
+
// the tool never depends on the user's runtime. Embedded library consumers are
|
|
7
|
+
// the opposite case: they already run their own Node and just want the compiled
|
|
8
|
+
// API — `require("@colbymchenry/codegraph")` returning the CodeGraph class et al.
|
|
9
|
+
//
|
|
10
|
+
// The compiled library + its production dependencies (web-tree-sitter,
|
|
11
|
+
// tree-sitter-wasms, …) ship INSIDE the per-platform bundle, at
|
|
12
|
+
// @colbymchenry/codegraph-<platform>-<arch>/lib/dist/index.js
|
|
13
|
+
// (with the deps in the sibling lib/node_modules). Re-exporting that bundle keeps
|
|
14
|
+
// the main package thin — no second 50 MB copy of the grammars — while making the
|
|
15
|
+
// SDK work in the consumer's process. Types are a separate concern: the main
|
|
16
|
+
// package ships its own dist/**/*.d.ts tree (pointed at by `types`), built from
|
|
17
|
+
// the same release so it can never skew from the runtime it re-exports.
|
|
18
|
+
//
|
|
19
|
+
// node:sqlite (Node >= 22.5) is required to OPEN a graph, but only lazily inside
|
|
20
|
+
// the SQLite adapter — so loading this module is safe on older Node, and the
|
|
21
|
+
// node:sqlite requirement surfaces with an actionable error only when a DB is
|
|
22
|
+
// actually opened. Heavy extraction additionally wants the bundled launcher's
|
|
23
|
+
// --liftoff-only flag (the WASM Zone-OOM guard, issues #293/#298); an embedded
|
|
24
|
+
// host that drives large indexing should pass that flag to its own Node.
|
|
25
|
+
|
|
26
|
+
var path = require('path');
|
|
27
|
+
var os = require('os');
|
|
28
|
+
var fs = require('fs');
|
|
29
|
+
|
|
30
|
+
var target = process.platform + '-' + process.arch; // e.g. darwin-arm64, linux-x64
|
|
31
|
+
var pkg = '@colbymchenry/codegraph-' + target;
|
|
32
|
+
|
|
33
|
+
module.exports = require(resolveLibrary());
|
|
34
|
+
|
|
35
|
+
// Locate the compiled library entry inside the installed per-platform bundle.
|
|
36
|
+
// Throws an actionable error (rather than a bare MODULE_NOT_FOUND) when no bundle
|
|
37
|
+
// is present, so an embedded consumer knows exactly what to install.
|
|
38
|
+
function resolveLibrary() {
|
|
39
|
+
// 1) The npm-installed optional dependency — the normal case.
|
|
40
|
+
try {
|
|
41
|
+
return require.resolve(pkg + '/lib/dist/index.js');
|
|
42
|
+
} catch (e) {
|
|
43
|
+
/* fall through to the self-healed cache */
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// 2) A bundle the CLI shim self-healed from GitHub Releases into the cache
|
|
47
|
+
// (issue #303). Same node/lib/bin layout as the npm package. We only REUSE a
|
|
48
|
+
// cached bundle here — unlike the CLI shim we never trigger a network
|
|
49
|
+
// download from inside require(), which must stay synchronous and cheap.
|
|
50
|
+
var cached = cachedLibrary();
|
|
51
|
+
if (cached) return cached;
|
|
52
|
+
|
|
53
|
+
throw new Error(
|
|
54
|
+
'codegraph: the programmatic API is unavailable because the platform bundle\n' +
|
|
55
|
+
'(' + pkg + ') is not installed.\n' +
|
|
56
|
+
'The compiled library ships inside that per-platform optional dependency.\n' +
|
|
57
|
+
'Fixes:\n' +
|
|
58
|
+
' - install from the official npm registry so the matching bundle is fetched:\n' +
|
|
59
|
+
' npm i @colbymchenry/codegraph --registry=https://registry.npmjs.org\n' +
|
|
60
|
+
' - or run the CLI once (e.g. `npx @colbymchenry/codegraph status`) to\n' +
|
|
61
|
+
' self-heal the bundle into ~/.codegraph, then require() will find it.'
|
|
62
|
+
);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function cachedLibrary() {
|
|
66
|
+
try {
|
|
67
|
+
var version = require(path.join(__dirname, 'package.json')).version;
|
|
68
|
+
var base = process.env.CODEGRAPH_INSTALL_DIR || path.join(os.homedir(), '.codegraph');
|
|
69
|
+
var lib = path.join(base, 'bundles', target + '-' + version, 'lib', 'dist', 'index.js');
|
|
70
|
+
if (fs.existsSync(lib)) return lib;
|
|
71
|
+
} catch (e) {
|
|
72
|
+
/* no readable cache → caller reports the install guidance */
|
|
73
|
+
}
|
|
74
|
+
return null;
|
|
75
|
+
}
|
package/scripts/pack-npm.sh
CHANGED
|
@@ -72,8 +72,26 @@ for archive in "${archives[@]}"; do
|
|
|
72
72
|
done
|
|
73
73
|
|
|
74
74
|
# Main shim package.
|
|
75
|
+
# npm-shim.js CLI/MCP launcher (execs the bundled Node) — the `bin`.
|
|
76
|
+
# npm-sdk.js programmatic/embedded entry (#354): re-exports the installed
|
|
77
|
+
# platform bundle's compiled library — the `main`.
|
|
78
|
+
# dist/ the .d.ts tree only (types). The runtime .js stays in the
|
|
79
|
+
# per-platform bundle so its deps aren't duplicated here.
|
|
75
80
|
cp "$ROOT/scripts/npm-shim.js" "$NPM/main/npm-shim.js"
|
|
81
|
+
cp "$ROOT/scripts/npm-sdk.js" "$NPM/main/npm-sdk.js"
|
|
76
82
|
[ -f "$ROOT/README.md" ] && cp "$ROOT/README.md" "$NPM/main/README.md"
|
|
83
|
+
|
|
84
|
+
# Ship the type declarations so `types`/`exports.types` resolve. Built from this
|
|
85
|
+
# same release, so they can't skew from the runtime npm-sdk.js re-exports.
|
|
86
|
+
[ -f "$ROOT/dist/index.d.ts" ] || ( echo "[pack-npm] building dist for .d.ts" >&2 && cd "$ROOT" && npm run build >/dev/null )
|
|
87
|
+
ROOT="$ROOT" DEST="$NPM/main" node -e '
|
|
88
|
+
const fs=require("fs"), path=require("path");
|
|
89
|
+
const src=path.join(process.env.ROOT,"dist"), dest=path.join(process.env.DEST,"dist");
|
|
90
|
+
fs.cpSync(src, dest, { recursive:true, filter(s){
|
|
91
|
+
try { return fs.statSync(s).isDirectory() || s.endsWith(".d.ts"); } catch (e) { return false; }
|
|
92
|
+
}});
|
|
93
|
+
'
|
|
94
|
+
|
|
77
95
|
VERSION="$VERSION" SCOPE="$SCOPE" TARGETS="${targets[*]}" \
|
|
78
96
|
node -e '
|
|
79
97
|
const fs=require("fs");
|
|
@@ -85,8 +103,14 @@ VERSION="$VERSION" SCOPE="$SCOPE" TARGETS="${targets[*]}" \
|
|
|
85
103
|
version: process.env.VERSION,
|
|
86
104
|
description: "Local-first code intelligence for AI agents (MCP). Self-contained — bundles its own runtime.",
|
|
87
105
|
bin: { codegraph: "npm-shim.js" },
|
|
106
|
+
main: "npm-sdk.js",
|
|
107
|
+
types: "dist/index.d.ts",
|
|
108
|
+
exports: {
|
|
109
|
+
".": { types: "./dist/index.d.ts", default: "./npm-sdk.js" },
|
|
110
|
+
"./package.json": "./package.json"
|
|
111
|
+
},
|
|
88
112
|
optionalDependencies: opt,
|
|
89
|
-
files: ["npm-shim.js","README.md"],
|
|
113
|
+
files: ["npm-shim.js","npm-sdk.js","dist","README.md"],
|
|
90
114
|
license: "MIT"
|
|
91
115
|
}, null, 2) + "\n");
|
|
92
116
|
' "$NPM/main/package.json"
|