@stupidloud/codegraph 0.8.1 → 0.9.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +319 -152
- package/dist/bin/codegraph.d.ts +4 -0
- package/dist/bin/codegraph.d.ts.map +1 -1
- package/dist/bin/codegraph.js +354 -90
- package/dist/bin/codegraph.js.map +1 -1
- package/dist/bin/node-version-check.d.ts +17 -0
- package/dist/bin/node-version-check.d.ts.map +1 -1
- package/dist/bin/node-version-check.js +37 -0
- package/dist/bin/node-version-check.js.map +1 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +1 -11
- package/dist/config.js.map +1 -1
- package/dist/context/formatter.d.ts.map +1 -1
- package/dist/context/formatter.js +25 -6
- package/dist/context/formatter.js.map +1 -1
- package/dist/context/index.d.ts +22 -0
- package/dist/context/index.d.ts.map +1 -1
- package/dist/context/index.js +257 -6
- package/dist/context/index.js.map +1 -1
- package/dist/context/markers.d.ts +19 -0
- package/dist/context/markers.d.ts.map +1 -0
- package/dist/context/markers.js +22 -0
- package/dist/context/markers.js.map +1 -0
- package/dist/db/index.d.ts +30 -1
- package/dist/db/index.d.ts.map +1 -1
- package/dist/db/index.js +75 -25
- package/dist/db/index.js.map +1 -1
- package/dist/db/queries.d.ts +104 -0
- package/dist/db/queries.d.ts.map +1 -1
- package/dist/db/queries.js +328 -31
- package/dist/db/queries.js.map +1 -1
- package/dist/db/sqlite-adapter.d.ts +24 -23
- package/dist/db/sqlite-adapter.d.ts.map +1 -1
- package/dist/db/sqlite-adapter.js +54 -174
- package/dist/db/sqlite-adapter.js.map +1 -1
- package/dist/directory.d.ts.map +1 -1
- package/dist/directory.js +6 -20
- package/dist/directory.js.map +1 -1
- package/dist/extraction/generated-detection.d.ts +30 -0
- package/dist/extraction/generated-detection.d.ts.map +1 -0
- package/dist/extraction/generated-detection.js +80 -0
- package/dist/extraction/generated-detection.js.map +1 -0
- package/dist/extraction/grammars.d.ts +23 -1
- package/dist/extraction/grammars.d.ts.map +1 -1
- package/dist/extraction/grammars.js +107 -3
- package/dist/extraction/grammars.js.map +1 -1
- package/dist/extraction/index.d.ts +22 -14
- package/dist/extraction/index.d.ts.map +1 -1
- package/dist/extraction/index.js +272 -183
- package/dist/extraction/index.js.map +1 -1
- package/dist/extraction/languages/c-cpp.d.ts.map +1 -1
- package/dist/extraction/languages/c-cpp.js +45 -0
- package/dist/extraction/languages/c-cpp.js.map +1 -1
- package/dist/extraction/languages/csharp.d.ts.map +1 -1
- package/dist/extraction/languages/csharp.js +2 -1
- package/dist/extraction/languages/csharp.js.map +1 -1
- package/dist/extraction/languages/go.d.ts.map +1 -1
- package/dist/extraction/languages/go.js +18 -2
- package/dist/extraction/languages/go.js.map +1 -1
- package/dist/extraction/languages/index.d.ts.map +1 -1
- package/dist/extraction/languages/index.js +6 -0
- package/dist/extraction/languages/index.js.map +1 -1
- package/dist/extraction/languages/java.d.ts.map +1 -1
- package/dist/extraction/languages/java.js +6 -0
- package/dist/extraction/languages/java.js.map +1 -1
- package/dist/extraction/languages/kotlin.d.ts.map +1 -1
- package/dist/extraction/languages/kotlin.js +6 -0
- package/dist/extraction/languages/kotlin.js.map +1 -1
- package/dist/extraction/languages/lua.d.ts +3 -0
- package/dist/extraction/languages/lua.d.ts.map +1 -0
- package/dist/extraction/languages/lua.js +150 -0
- package/dist/extraction/languages/lua.js.map +1 -0
- package/dist/extraction/languages/luau.d.ts +3 -0
- package/dist/extraction/languages/luau.d.ts.map +1 -0
- package/dist/extraction/languages/luau.js +37 -0
- package/dist/extraction/languages/luau.js.map +1 -0
- package/dist/extraction/languages/objc.d.ts +3 -0
- package/dist/extraction/languages/objc.d.ts.map +1 -0
- package/dist/extraction/languages/objc.js +133 -0
- package/dist/extraction/languages/objc.js.map +1 -0
- package/dist/extraction/mybatis-extractor.d.ts +48 -0
- package/dist/extraction/mybatis-extractor.d.ts.map +1 -0
- package/dist/extraction/mybatis-extractor.js +198 -0
- package/dist/extraction/mybatis-extractor.js.map +1 -0
- package/dist/extraction/tree-sitter-types.d.ts +14 -0
- package/dist/extraction/tree-sitter-types.d.ts.map +1 -1
- package/dist/extraction/tree-sitter.d.ts +84 -0
- package/dist/extraction/tree-sitter.d.ts.map +1 -1
- package/dist/extraction/tree-sitter.js +715 -16
- package/dist/extraction/tree-sitter.js.map +1 -1
- package/dist/extraction/vue-extractor.d.ts +15 -0
- package/dist/extraction/vue-extractor.d.ts.map +1 -1
- package/dist/extraction/vue-extractor.js +88 -0
- package/dist/extraction/vue-extractor.js.map +1 -1
- package/dist/extraction/wasm/tree-sitter-lua.wasm +0 -0
- package/dist/extraction/wasm/tree-sitter-luau.wasm +0 -0
- package/dist/extraction/wasm-runtime-flags.d.ts +38 -0
- package/dist/extraction/wasm-runtime-flags.d.ts.map +1 -0
- package/dist/extraction/wasm-runtime-flags.js +106 -0
- package/dist/extraction/wasm-runtime-flags.js.map +1 -0
- package/dist/graph/traversal.d.ts.map +1 -1
- package/dist/graph/traversal.js +76 -38
- package/dist/graph/traversal.js.map +1 -1
- package/dist/index.d.ts +77 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +133 -19
- package/dist/index.js.map +1 -1
- package/dist/installer/config-writer.d.ts +7 -8
- package/dist/installer/config-writer.d.ts.map +1 -1
- package/dist/installer/config-writer.js +7 -27
- package/dist/installer/config-writer.js.map +1 -1
- package/dist/installer/index.d.ts +51 -16
- package/dist/installer/index.d.ts.map +1 -1
- package/dist/installer/index.js +120 -29
- package/dist/installer/index.js.map +1 -1
- package/dist/installer/instructions-template.d.ts +11 -21
- package/dist/installer/instructions-template.d.ts.map +1 -1
- package/dist/installer/instructions-template.js +12 -56
- package/dist/installer/instructions-template.js.map +1 -1
- package/dist/installer/targets/antigravity.d.ts +57 -0
- package/dist/installer/targets/antigravity.d.ts.map +1 -0
- package/dist/installer/targets/antigravity.js +308 -0
- package/dist/installer/targets/antigravity.js.map +1 -0
- package/dist/installer/targets/claude.d.ts +26 -1
- package/dist/installer/targets/claude.d.ts.map +1 -1
- package/dist/installer/targets/claude.js +118 -40
- package/dist/installer/targets/claude.js.map +1 -1
- package/dist/installer/targets/codex.d.ts.map +1 -1
- package/dist/installer/targets/codex.js +15 -13
- package/dist/installer/targets/codex.js.map +1 -1
- package/dist/installer/targets/cursor.d.ts.map +1 -1
- package/dist/installer/targets/cursor.js +61 -36
- package/dist/installer/targets/cursor.js.map +1 -1
- package/dist/installer/targets/gemini.d.ts +26 -0
- package/dist/installer/targets/gemini.d.ts.map +1 -0
- package/dist/installer/targets/gemini.js +167 -0
- package/dist/installer/targets/gemini.js.map +1 -0
- package/dist/installer/targets/hermes.d.ts +18 -0
- package/dist/installer/targets/hermes.d.ts.map +1 -0
- package/dist/installer/targets/hermes.js +359 -0
- package/dist/installer/targets/hermes.js.map +1 -0
- package/dist/installer/targets/kiro.d.ts +27 -0
- package/dist/installer/targets/kiro.d.ts.map +1 -0
- package/dist/installer/targets/kiro.js +178 -0
- package/dist/installer/targets/kiro.js.map +1 -0
- package/dist/installer/targets/opencode.d.ts.map +1 -1
- package/dist/installer/targets/opencode.js +15 -13
- package/dist/installer/targets/opencode.js.map +1 -1
- package/dist/installer/targets/registry.d.ts.map +1 -1
- package/dist/installer/targets/registry.js +8 -0
- package/dist/installer/targets/registry.js.map +1 -1
- package/dist/installer/targets/shared.d.ts.map +1 -1
- package/dist/installer/targets/shared.js +3 -2
- package/dist/installer/targets/shared.js.map +1 -1
- package/dist/installer/targets/types.d.ts +1 -16
- package/dist/installer/targets/types.d.ts.map +1 -1
- package/dist/mcp/daemon-paths.d.ts +46 -0
- package/dist/mcp/daemon-paths.d.ts.map +1 -0
- package/dist/mcp/daemon-paths.js +125 -0
- package/dist/mcp/daemon-paths.js.map +1 -0
- package/dist/mcp/daemon.d.ts +161 -0
- package/dist/mcp/daemon.d.ts.map +1 -0
- package/dist/mcp/daemon.js +403 -0
- package/dist/mcp/daemon.js.map +1 -0
- package/dist/mcp/engine.d.ts +105 -0
- package/dist/mcp/engine.d.ts.map +1 -0
- package/dist/mcp/engine.js +270 -0
- package/dist/mcp/engine.js.map +1 -0
- package/dist/mcp/index.d.ts +70 -52
- package/dist/mcp/index.d.ts.map +1 -1
- package/dist/mcp/index.js +355 -331
- package/dist/mcp/index.js.map +1 -1
- package/dist/mcp/proxy.d.ts +81 -0
- package/dist/mcp/proxy.d.ts.map +1 -0
- package/dist/mcp/proxy.js +510 -0
- package/dist/mcp/proxy.js.map +1 -0
- package/dist/mcp/server-instructions.d.ts +1 -1
- package/dist/mcp/server-instructions.d.ts.map +1 -1
- package/dist/mcp/server-instructions.js +21 -21
- package/dist/mcp/session.d.ts +77 -0
- package/dist/mcp/session.d.ts.map +1 -0
- package/dist/mcp/session.js +294 -0
- package/dist/mcp/session.js.map +1 -0
- package/dist/mcp/tools.d.ts +171 -15
- package/dist/mcp/tools.d.ts.map +1 -1
- package/dist/mcp/tools.js +1714 -298
- package/dist/mcp/tools.js.map +1 -1
- package/dist/mcp/transport.d.ts +111 -29
- package/dist/mcp/transport.d.ts.map +1 -1
- package/dist/mcp/transport.js +181 -71
- package/dist/mcp/transport.js.map +1 -1
- package/dist/mcp/version.d.ts +19 -0
- package/dist/mcp/version.d.ts.map +1 -0
- package/dist/mcp/version.js +71 -0
- package/dist/mcp/version.js.map +1 -0
- package/dist/resolution/callback-synthesizer.d.ts +10 -0
- package/dist/resolution/callback-synthesizer.d.ts.map +1 -0
- package/dist/resolution/callback-synthesizer.js +1300 -0
- package/dist/resolution/callback-synthesizer.js.map +1 -0
- package/dist/resolution/frameworks/csharp.d.ts.map +1 -1
- package/dist/resolution/frameworks/csharp.js +36 -8
- package/dist/resolution/frameworks/csharp.js.map +1 -1
- package/dist/resolution/frameworks/drupal.d.ts +51 -0
- package/dist/resolution/frameworks/drupal.d.ts.map +1 -0
- package/dist/resolution/frameworks/drupal.js +367 -0
- package/dist/resolution/frameworks/drupal.js.map +1 -0
- package/dist/resolution/frameworks/expo-modules.d.ts +3 -0
- package/dist/resolution/frameworks/expo-modules.d.ts.map +1 -0
- package/dist/resolution/frameworks/expo-modules.js +143 -0
- package/dist/resolution/frameworks/expo-modules.js.map +1 -0
- package/dist/resolution/frameworks/express.d.ts.map +1 -1
- package/dist/resolution/frameworks/express.js +102 -19
- package/dist/resolution/frameworks/express.js.map +1 -1
- package/dist/resolution/frameworks/fabric.d.ts +3 -0
- package/dist/resolution/frameworks/fabric.d.ts.map +1 -0
- package/dist/resolution/frameworks/fabric.js +354 -0
- package/dist/resolution/frameworks/fabric.js.map +1 -0
- package/dist/resolution/frameworks/go.d.ts.map +1 -1
- package/dist/resolution/frameworks/go.js +6 -3
- package/dist/resolution/frameworks/go.js.map +1 -1
- package/dist/resolution/frameworks/index.d.ts +6 -0
- package/dist/resolution/frameworks/index.d.ts.map +1 -1
- package/dist/resolution/frameworks/index.js +29 -1
- package/dist/resolution/frameworks/index.js.map +1 -1
- package/dist/resolution/frameworks/java.d.ts.map +1 -1
- package/dist/resolution/frameworks/java.js +339 -12
- package/dist/resolution/frameworks/java.js.map +1 -1
- package/dist/resolution/frameworks/laravel.d.ts.map +1 -1
- package/dist/resolution/frameworks/laravel.js +17 -8
- package/dist/resolution/frameworks/laravel.js.map +1 -1
- package/dist/resolution/frameworks/nestjs.d.ts.map +1 -1
- package/dist/resolution/frameworks/nestjs.js +324 -0
- package/dist/resolution/frameworks/nestjs.js.map +1 -1
- package/dist/resolution/frameworks/play.d.ts +19 -0
- package/dist/resolution/frameworks/play.d.ts.map +1 -0
- package/dist/resolution/frameworks/play.js +111 -0
- package/dist/resolution/frameworks/play.js.map +1 -0
- package/dist/resolution/frameworks/python.d.ts.map +1 -1
- package/dist/resolution/frameworks/python.js +134 -16
- package/dist/resolution/frameworks/python.js.map +1 -1
- package/dist/resolution/frameworks/react-native.d.ts +3 -0
- package/dist/resolution/frameworks/react-native.d.ts.map +1 -0
- package/dist/resolution/frameworks/react-native.js +360 -0
- package/dist/resolution/frameworks/react-native.js.map +1 -0
- package/dist/resolution/frameworks/react.d.ts.map +1 -1
- package/dist/resolution/frameworks/react.js +96 -3
- package/dist/resolution/frameworks/react.js.map +1 -1
- package/dist/resolution/frameworks/ruby.d.ts.map +1 -1
- package/dist/resolution/frameworks/ruby.js +106 -2
- package/dist/resolution/frameworks/ruby.js.map +1 -1
- package/dist/resolution/frameworks/rust.d.ts.map +1 -1
- package/dist/resolution/frameworks/rust.js +102 -5
- package/dist/resolution/frameworks/rust.js.map +1 -1
- package/dist/resolution/frameworks/swift-objc.d.ts +37 -0
- package/dist/resolution/frameworks/swift-objc.d.ts.map +1 -0
- package/dist/resolution/frameworks/swift-objc.js +252 -0
- package/dist/resolution/frameworks/swift-objc.js.map +1 -0
- package/dist/resolution/frameworks/swift.d.ts.map +1 -1
- package/dist/resolution/frameworks/swift.js +30 -6
- package/dist/resolution/frameworks/swift.js.map +1 -1
- package/dist/resolution/go-module.d.ts +26 -0
- package/dist/resolution/go-module.d.ts.map +1 -0
- package/dist/resolution/go-module.js +78 -0
- package/dist/resolution/go-module.js.map +1 -0
- package/dist/resolution/import-resolver.d.ts +28 -0
- package/dist/resolution/import-resolver.d.ts.map +1 -1
- package/dist/resolution/import-resolver.js +617 -5
- package/dist/resolution/import-resolver.js.map +1 -1
- package/dist/resolution/index.d.ts +11 -0
- package/dist/resolution/index.d.ts.map +1 -1
- package/dist/resolution/index.js +196 -10
- package/dist/resolution/index.js.map +1 -1
- package/dist/resolution/lru-cache.d.ts +24 -0
- package/dist/resolution/lru-cache.d.ts.map +1 -0
- package/dist/resolution/lru-cache.js +62 -0
- package/dist/resolution/lru-cache.js.map +1 -0
- package/dist/resolution/name-matcher.d.ts.map +1 -1
- package/dist/resolution/name-matcher.js +212 -0
- package/dist/resolution/name-matcher.js.map +1 -1
- package/dist/resolution/swift-objc-bridge.d.ts +134 -0
- package/dist/resolution/swift-objc-bridge.d.ts.map +1 -0
- package/dist/resolution/swift-objc-bridge.js +256 -0
- package/dist/resolution/swift-objc-bridge.js.map +1 -0
- package/dist/resolution/types.d.ts +44 -0
- package/dist/resolution/types.d.ts.map +1 -1
- package/dist/resolution/workspace-packages.d.ts +48 -0
- package/dist/resolution/workspace-packages.d.ts.map +1 -0
- package/dist/resolution/workspace-packages.js +208 -0
- package/dist/resolution/workspace-packages.js.map +1 -0
- package/dist/search/query-utils.d.ts +18 -0
- package/dist/search/query-utils.d.ts.map +1 -1
- package/dist/search/query-utils.js +30 -0
- package/dist/search/query-utils.js.map +1 -1
- package/dist/sync/git-hooks.d.ts.map +1 -1
- package/dist/sync/git-hooks.js +2 -0
- package/dist/sync/git-hooks.js.map +1 -1
- package/dist/sync/index.d.ts +3 -1
- package/dist/sync/index.d.ts.map +1 -1
- package/dist/sync/index.js +8 -1
- package/dist/sync/index.js.map +1 -1
- package/dist/sync/watcher.d.ts +214 -12
- package/dist/sync/watcher.d.ts.map +1 -1
- package/dist/sync/watcher.js +467 -55
- package/dist/sync/watcher.js.map +1 -1
- package/dist/sync/worktree.d.ts +54 -0
- package/dist/sync/worktree.d.ts.map +1 -0
- package/dist/sync/worktree.js +137 -0
- package/dist/sync/worktree.js.map +1 -0
- package/dist/types.d.ts +9 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +14 -0
- package/dist/types.js.map +1 -1
- package/dist/utils.js +1 -1
- package/package.json +2 -2
- package/scripts/add-lang/bench.sh +60 -0
- package/scripts/add-lang/check-grammar.mjs +75 -0
- package/scripts/add-lang/dump-ast.mjs +103 -0
- package/scripts/add-lang/verify-extraction.mjs +70 -0
- package/scripts/agent-eval/arms-F.sh +21 -0
- package/scripts/agent-eval/arms-matrix.sh +37 -0
- package/scripts/agent-eval/bench-readme.sh +28 -0
- package/scripts/agent-eval/bench-why-repo.sh +22 -0
- package/scripts/agent-eval/block-read-hook.sh +19 -0
- package/scripts/agent-eval/hook-settings.json +15 -0
- package/scripts/agent-eval/itrun.sh +24 -11
- package/scripts/agent-eval/parse-arms.mjs +116 -0
- package/scripts/agent-eval/parse-bench-readme.mjs +84 -0
- package/scripts/agent-eval/probe-context.mjs +21 -0
- package/scripts/agent-eval/probe-explore.mjs +40 -0
- package/scripts/agent-eval/probe-node.mjs +20 -0
- package/scripts/agent-eval/probe-sweep.mjs +119 -0
- package/scripts/agent-eval/probe-trace.mjs +20 -0
- package/scripts/agent-eval/run-arms.sh +56 -0
- package/scripts/agent-eval/seq-matrix.mjs +137 -0
- package/scripts/build-bundle.sh +118 -0
- package/scripts/npm-sdk.js +75 -0
- package/scripts/npm-shim.js +246 -0
- package/scripts/pack-npm.sh +119 -0
- package/scripts/prepare-release.mjs +270 -0
- package/scripts/patch-tree-sitter-dart.js +0 -112
- package/scripts/release.sh +0 -68
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Arm F (body-inlining trace + trace-first steering) across the same 6 repos as
|
|
3
|
+
# arms-matrix.sh, so F vs B isolates the trace-enrichment effect (same surface,
|
|
4
|
+
# old thin trace in B vs body-inlining trace here).
|
|
5
|
+
set -uo pipefail
|
|
6
|
+
H="$(cd "$(dirname "$0")" && pwd)"; RUNS="${RUNS:-2}"; C="${CORPUS:-/tmp/codegraph-corpus}"
|
|
7
|
+
ROWS=(
|
|
8
|
+
"$C/flutter-samples/add_to_app/books/flutter_module_books|How does the books UI build and what child widgets does it show?"
|
|
9
|
+
"$C/aspnet-realworld|How is creating an article handled? Trace the controller to the service."
|
|
10
|
+
"$C/spring-mall|How is a product-list request handled? Trace the controller to the service."
|
|
11
|
+
"$C/vapor-spi|How is a package-show request handled? Name the route and controller."
|
|
12
|
+
"$C/excalidraw|How does updating an element re-render the canvas on screen? Trace the flow."
|
|
13
|
+
"$C/spring-halo|How is publishing a post handled? Trace the controller to the service."
|
|
14
|
+
)
|
|
15
|
+
ARM="${ARM:-F}"
|
|
16
|
+
echo "### ARM $ARM START $(date) RUNS=$RUNS"
|
|
17
|
+
for row in "${ROWS[@]}"; do
|
|
18
|
+
repo="${row%%|*}"; q="${row#*|}"
|
|
19
|
+
for r in $(seq 1 "$RUNS"); do bash "$H/run-arms.sh" "$repo" "$q" "$ARM" "$r"; done
|
|
20
|
+
done
|
|
21
|
+
echo "### ARM $ARM COMPLETE $(date)"
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Drive the tool-surface ablation across the chosen repos × arms (A–E).
|
|
3
|
+
# Arms A–D ask the canonical FLOW question; arm E asks a NON-flow survey
|
|
4
|
+
# question (the control probe — should degrade without explore+context).
|
|
5
|
+
# Output: /tmp/arms/<repo>/<arm>-r<n>.jsonl (parse with parse-arms.mjs).
|
|
6
|
+
set -uo pipefail
|
|
7
|
+
HARNESS="$(cd "$(dirname "$0")" && pwd)"
|
|
8
|
+
RUNS="${RUNS:-2}"
|
|
9
|
+
C="${CORPUS:-/tmp/codegraph-corpus}"
|
|
10
|
+
NFQ='What are the main modules/components of this codebase and what does each one do? Give an overview of how it is organized.'
|
|
11
|
+
|
|
12
|
+
# repo-path|flow-question (2 small, 2 medium, 2 large — spans the size range)
|
|
13
|
+
ROWS=(
|
|
14
|
+
"$C/flutter-samples/add_to_app/books/flutter_module_books|How does the books UI build and what child widgets does it show?"
|
|
15
|
+
"$C/aspnet-realworld|How is creating an article handled? Trace the controller to the service."
|
|
16
|
+
"$C/spring-mall|How is a product-list request handled? Trace the controller to the service."
|
|
17
|
+
"$C/vapor-spi|How is a package-show request handled? Name the route and controller."
|
|
18
|
+
"$C/excalidraw|How does updating an element re-render the canvas on screen? Trace the flow."
|
|
19
|
+
"$C/spring-halo|How is publishing a post handled? Trace the controller to the service."
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
echo "### ARMS MATRIX START $(date) RUNS=$RUNS"
|
|
23
|
+
for row in "${ROWS[@]}"; do
|
|
24
|
+
repo="${row%%|*}"; q="${row#*|}"
|
|
25
|
+
for arm in A B C D; do
|
|
26
|
+
for r in $(seq 1 "$RUNS"); do
|
|
27
|
+
bash "$HARNESS/run-arms.sh" "$repo" "$q" "$arm" "$r"
|
|
28
|
+
done
|
|
29
|
+
done
|
|
30
|
+
done
|
|
31
|
+
# E: non-flow control probe on two repos (must degrade without explore+context)
|
|
32
|
+
for repo in "$C/excalidraw" "$C/spring-mall"; do
|
|
33
|
+
for r in $(seq 1 "$RUNS"); do
|
|
34
|
+
bash "$HARNESS/run-arms.sh" "$repo" "$NFQ" E "$r"
|
|
35
|
+
done
|
|
36
|
+
done
|
|
37
|
+
echo "### ARMS MATRIX COMPLETE $(date)"
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Re-run the README "Benchmark Results" A/B (with vs without codegraph) on the
|
|
3
|
+
# current build: the 7 README repos, same queries, RUNS per arm (default 4).
|
|
4
|
+
# Output → /tmp/ab-readme/<repo>/run<n>/run-headless-{with,without}.jsonl
|
|
5
|
+
# Aggregate with parse-bench-readme.mjs. Repos must be cloned + indexed under
|
|
6
|
+
# $CORPUS (default /tmp/codegraph-corpus) by the build under test.
|
|
7
|
+
set -uo pipefail
|
|
8
|
+
H="$(cd "$(dirname "$0")" && pwd)"
|
|
9
|
+
C="${CORPUS:-/tmp/codegraph-corpus}"
|
|
10
|
+
RUNS="${RUNS:-4}"
|
|
11
|
+
ROWS=(
|
|
12
|
+
"vscode|How does the extension host communicate with the main process?"
|
|
13
|
+
"excalidraw|How does Excalidraw render and update canvas elements?"
|
|
14
|
+
"django|How does Django's ORM build and execute a query from a QuerySet?"
|
|
15
|
+
"tokio|How does tokio schedule and run async tasks on its runtime?"
|
|
16
|
+
"okhttp|How does OkHttp process a request through its interceptor chain?"
|
|
17
|
+
"gin|How does gin route requests through its middleware chain?"
|
|
18
|
+
"alamofire|How does Alamofire build, send, and validate a request?"
|
|
19
|
+
)
|
|
20
|
+
echo "### README A/B START $(date) RUNS=$RUNS"
|
|
21
|
+
for row in "${ROWS[@]}"; do
|
|
22
|
+
repo="${row%%|*}"; q="${row#*|}"
|
|
23
|
+
echo "===== $repo ====="
|
|
24
|
+
for run in $(seq 1 "$RUNS"); do
|
|
25
|
+
AGENT_EVAL_OUT="/tmp/ab-readme/$repo/run$run" bash "$H/run-all.sh" "$C/$repo" "$q" headless 2>&1 | grep -E "exit [0-9]" || echo " run$run: (no exit line)"
|
|
26
|
+
done
|
|
27
|
+
done
|
|
28
|
+
echo "### README A/B DONE $(date)"
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# One README repo, WITH-codegraph only, N runs. Each run appends a why-Read
|
|
3
|
+
# diagnostic so the agent explains any Read/Grep. (The WITHOUT baseline is
|
|
4
|
+
# codegraph-independent and already in the README — no point re-running it.)
|
|
5
|
+
# Output -> /tmp/ab-why/<repo>/with<n>.jsonl
|
|
6
|
+
# Usage: bench-why-repo.sh <repo-path> "<query>" [N]
|
|
7
|
+
set -uo pipefail
|
|
8
|
+
REPO="$1"; Q="$2"; N="${3:-4}"
|
|
9
|
+
NAME="$(basename "$REPO")"
|
|
10
|
+
CG="/Users/colby/Development/Personal/codegraph/dist/bin/codegraph.js"
|
|
11
|
+
OUT="/tmp/ab-why/$NAME"; mkdir -p "$OUT"
|
|
12
|
+
WHY=$'\n\nIMPORTANT — diagnostic: if you use the Read or Grep tool at ANY point, for EACH such call explain why codegraph_explore / codegraph_node did not already give you what you needed. End your entire answer with a section titled exactly "## Why I read" listing every Read and Grep you made and the precise reason codegraph fell short for it. If you used neither, write "## Why I read" then "none — codegraph was sufficient."'
|
|
13
|
+
printf '{"mcpServers":{"codegraph":{"command":"%s","args":["serve","--mcp","--path","%s"]}}}' "$CG" "$REPO" > "$OUT/cg.json"
|
|
14
|
+
|
|
15
|
+
for i in $(seq 1 "$N"); do
|
|
16
|
+
pkill -f "serve --mcp" 2>/dev/null; sleep 1; rm -f "$REPO/.codegraph/daemon.sock"
|
|
17
|
+
( cd "$REPO" && claude -p "$Q$WHY" --output-format stream-json --verbose \
|
|
18
|
+
--permission-mode bypassPermissions --model opus --effort "${EFFORT:-high}" --max-budget-usd 4 \
|
|
19
|
+
--strict-mcp-config --mcp-config "$OUT/cg.json" > "$OUT/with$i.jsonl" 2>"$OUT/with$i.err" )
|
|
20
|
+
echo "WITH run $i: exit $? ($(wc -l < "$OUT/with$i.jsonl" | tr -d ' ') lines)"
|
|
21
|
+
done
|
|
22
|
+
echo "DONE $NAME"
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# PreToolUse hook (experiment): deny Read of codegraph-indexed source files and
|
|
3
|
+
# steer the agent to codegraph_explore/codegraph_node instead. Tests whether
|
|
4
|
+
# codegraph can FULLY replace Read for code-understanding once the escape hatch
|
|
5
|
+
# is removed. Non-source reads (config, .env, markdown, new files) pass through.
|
|
6
|
+
#
|
|
7
|
+
# Wire via: claude ... --settings scripts/agent-eval/hook-settings.json
|
|
8
|
+
set -uo pipefail
|
|
9
|
+
input="$(cat)"
|
|
10
|
+
fp="$(printf '%s' "$input" | jq -r '.tool_input.file_path // empty' 2>/dev/null)"
|
|
11
|
+
|
|
12
|
+
case "$fp" in
|
|
13
|
+
*.ts|*.tsx|*.js|*.jsx|*.mjs|*.cjs|*.py|*.go|*.rs|*.java|*.rb|*.php|*.swift|*.kt|*.kts|*.c|*.cc|*.cpp|*.h|*.hpp|*.cs|*.lua|*.vue|*.svelte)
|
|
14
|
+
msg="Read is disabled for source files in this session — codegraph already has this file indexed (with line numbers, kept in sync on every change). Use codegraph_explore (several related symbols at once) or codegraph_node (one symbol's full source). If a symbol you need wasn't in a prior explore, run ANOTHER codegraph_explore with its exact name instead of reading the file."
|
|
15
|
+
jq -n --arg m "$msg" '{reason:$m, hookSpecificOutput:{hookEventName:"PreToolUse",permissionDecision:"deny",permissionDecisionReason:$m}}'
|
|
16
|
+
exit 0
|
|
17
|
+
;;
|
|
18
|
+
esac
|
|
19
|
+
exit 0
|
|
@@ -81,25 +81,38 @@ for _ in $(seq 1 120); do
|
|
|
81
81
|
done
|
|
82
82
|
[ "$started" = 1 ] || { echo "agent never started working"; cap; tmux kill-session -t "$SESSION" 2>/dev/null; exit 1; }
|
|
83
83
|
|
|
84
|
-
# Poll for idle:
|
|
85
|
-
#
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
84
|
+
# Poll for idle. CRITICAL: Opus 4.8 (extended thinking) renders NO spinner /
|
|
85
|
+
# "esc to interrupt" / timer while it STREAMS its final answer — those appear
|
|
86
|
+
# only during the thinking + tool-use phases ("✻ Marinating… (32s · ↓ 1.3k
|
|
87
|
+
# tokens · thinking with max effort)"). So BUSY_RE reads "not busy" for the whole
|
|
88
|
+
# 10-30s answer stream, and any short not-busy threshold kills the run mid-answer
|
|
89
|
+
# (the truncation bug). We therefore detect "done" by CONTENT STABILITY, not by a
|
|
90
|
+
# spinner string: while the agent streams, the captured pane changes every poll,
|
|
91
|
+
# so stability never accrues; it accrues only once the agent has finished and the
|
|
92
|
+
# static "✻ Brewed for 1m 9s" summary is all that is left. BUSY_RE still hard-
|
|
93
|
+
# resets stability (covers thinking/tool-use/live-timer, where text can briefly
|
|
94
|
+
# sit still). Need STABLE_NEEDED polls (~8s) of zero pane change + ❯ present.
|
|
95
|
+
# Content-stability is model-agnostic — it survives future spinner re-wordings.
|
|
96
|
+
STABLE_NEEDED=16
|
|
97
|
+
prev=""; stable=0
|
|
98
|
+
for _ in $(seq 1 2400); do # up to ~20 min
|
|
99
|
+
pane="$(cap)"
|
|
100
|
+
sig="$(printf '%s' "$pane" | tr -s '[:space:]' ' ')"
|
|
101
|
+
if printf '%s' "$pane" | grep -qE "$BUSY_RE"; then
|
|
102
|
+
stable=0 # thinking / tool use / live timer → busy
|
|
103
|
+
elif [ -n "$sig" ] && [ "$sig" = "$prev" ] && printf '%s' "$pane" | grep -q "❯"; then
|
|
104
|
+
stable=$((stable+1)); [ "$stable" -ge "$STABLE_NEEDED" ] && break
|
|
93
105
|
else
|
|
94
|
-
|
|
106
|
+
stable=0 # answer still streaming → pane changing
|
|
95
107
|
fi
|
|
108
|
+
prev="$sig"
|
|
96
109
|
sleep 0.5
|
|
97
110
|
done
|
|
98
111
|
sleep 1
|
|
99
112
|
|
|
100
113
|
tmux capture-pane -p -t "$SESSION" -S - > "$OUT"
|
|
101
114
|
echo "captured $(wc -l < "$OUT") lines -> $OUT"
|
|
102
|
-
grep -oE "Done \([^)]*\)" "$OUT" | tail -1
|
|
115
|
+
grep -oE "Done \([^)]*\)|[A-Z][a-z]+ for ([0-9]+m )?[0-9]+s" "$OUT" | tail -1
|
|
103
116
|
grep -oE "[0-9.]+k?/[0-9.]+M" "$OUT" | tail -1 | sed 's/^/Context /'
|
|
104
117
|
tmux kill-session -t "$SESSION" 2>/dev/null
|
|
105
118
|
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Analyze the tool-surface ablation (/tmp/arms/<repo>/<arm>-r<n>.jsonl).
|
|
3
|
+
// Compares arms A–E on trace adoption, Read/Grep fallback, codegraph payload,
|
|
4
|
+
// round-trips, and duration — averaged across runs per arm.
|
|
5
|
+
//
|
|
6
|
+
// The decisive signal is READS: if removing a tool raises Reads on a question
|
|
7
|
+
// class, that tool was load-bearing for it (not redundant). If removing it
|
|
8
|
+
// changes nothing, it was redundant.
|
|
9
|
+
//
|
|
10
|
+
// A control all tools no steering (baseline)
|
|
11
|
+
// B steer all tools trace-first (adoption)
|
|
12
|
+
// C no-explore hide explore trace-first (is explore redundant?)
|
|
13
|
+
// D trace-centric hide explore+context trace-first (is the survey pair redundant?)
|
|
14
|
+
// E control-probe hide explore+context trace-first (NON-flow Q — should degrade)
|
|
15
|
+
//
|
|
16
|
+
// Usage: node scripts/agent-eval/parse-arms.mjs [/tmp/arms]
|
|
17
|
+
import { readFileSync, readdirSync, existsSync, statSync } from 'fs';
|
|
18
|
+
import { join } from 'path';
|
|
19
|
+
|
|
20
|
+
const ROOT = process.argv[2] || '/tmp/arms';
|
|
21
|
+
const cgShort = (n) => n.replace('mcp__codegraph__codegraph_', '').replace('mcp__codegraph__', '');
|
|
22
|
+
|
|
23
|
+
function parse(file) {
|
|
24
|
+
if (!existsSync(file)) return null;
|
|
25
|
+
const lines = readFileSync(file, 'utf8').split('\n').filter(Boolean);
|
|
26
|
+
const calls = []; let result = null, initCg = 0;
|
|
27
|
+
for (const l of lines) {
|
|
28
|
+
let ev; try { ev = JSON.parse(l); } catch { continue; }
|
|
29
|
+
if (ev.type === 'system' && ev.subtype === 'init') initCg = (ev.tools || []).filter(t => /codegraph/.test(t)).length;
|
|
30
|
+
if (ev.type === 'assistant') for (const b of (ev.message?.content || [])) if (b.type === 'tool_use')
|
|
31
|
+
calls.push({ id: b.id, name: b.name, out: 0 });
|
|
32
|
+
if (ev.type === 'user') for (const b of (ev.message?.content || [])) if (b.type === 'tool_result') {
|
|
33
|
+
const c = b.content;
|
|
34
|
+
const txt = typeof c === 'string' ? c : Array.isArray(c) ? c.map(x => x?.text || '').join('') : '';
|
|
35
|
+
const call = calls.find(k => k.id === b.tool_use_id); if (call) call.out = txt.length;
|
|
36
|
+
}
|
|
37
|
+
if (ev.type === 'result') result = ev;
|
|
38
|
+
}
|
|
39
|
+
const cg = calls.filter(c => c.name.includes('codegraph'));
|
|
40
|
+
return {
|
|
41
|
+
initCg,
|
|
42
|
+
reads: calls.filter(c => c.name === 'Read').length,
|
|
43
|
+
greps: calls.filter(c => c.name === 'Grep').length + calls.filter(c => c.name === 'Glob').length,
|
|
44
|
+
cgCalls: cg.length,
|
|
45
|
+
cgSeq: cg.map(c => cgShort(c.name)),
|
|
46
|
+
cgOut: cg.reduce((s, c) => s + c.out, 0),
|
|
47
|
+
traceUsed: cg.some(c => c.name.includes('trace')),
|
|
48
|
+
turns: result?.num_turns ?? null,
|
|
49
|
+
dur: result?.duration_ms ? Math.round(result.duration_ms / 1000) : null,
|
|
50
|
+
cost: result?.total_cost_usd || 0,
|
|
51
|
+
ok: result?.subtype === 'success',
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// repo -> arm -> [runs]
|
|
56
|
+
const data = {};
|
|
57
|
+
if (!existsSync(ROOT)) { console.error(`no ${ROOT}`); process.exit(1); }
|
|
58
|
+
for (const repo of readdirSync(ROOT)) {
|
|
59
|
+
const rdir = join(ROOT, repo);
|
|
60
|
+
if (!statSync(rdir).isDirectory()) continue;
|
|
61
|
+
for (const f of readdirSync(rdir)) {
|
|
62
|
+
const m = f.match(/^([A-I])-r(\d+)\.jsonl$/); if (!m) continue;
|
|
63
|
+
const p = parse(join(rdir, f)); if (!p || !p.ok) continue;
|
|
64
|
+
(((data[repo] ??= {})[m[1]]) ??= []).push(p);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const avg = (a, f) => a.length ? a.reduce((s, x) => s + (f(x) || 0), 0) / a.length : 0;
|
|
69
|
+
const k = (n) => (n / 1000).toFixed(1);
|
|
70
|
+
const pad = (s, n) => String(s).padEnd(n);
|
|
71
|
+
const ARMS = ['A', 'H', 'I', 'B', 'F', 'G', 'C', 'D', 'E'];
|
|
72
|
+
const LABEL = { A: 'A all/none(old)', H: 'H body-trace/none', I: 'I bodytrace+dest', B: 'B all/steer(thin)', F: 'F all/steer(body)', G: 'G ported(noprompt)', C: 'C no-explore', D: 'D trace-centric', E: 'E nonflow-probe' };
|
|
73
|
+
|
|
74
|
+
// ---- per repo × arm ----
|
|
75
|
+
console.log('\n=== PER REPO × ARM (avg over runs) ===');
|
|
76
|
+
console.log(pad('repo', 22), pad('arm', 16), 'tools', 'trace', pad('reads', 6), pad('cgOutK', 7), pad('turns', 6), 'dur');
|
|
77
|
+
for (const repo of Object.keys(data).sort()) {
|
|
78
|
+
for (const arm of ARMS) {
|
|
79
|
+
const runs = data[repo][arm]; if (!runs?.length) continue;
|
|
80
|
+
console.log(
|
|
81
|
+
pad(repo, 22), pad(LABEL[arm], 16),
|
|
82
|
+
pad(runs[0].initCg, 5),
|
|
83
|
+
pad(runs.filter(r => r.traceUsed).length + '/' + runs.length, 5),
|
|
84
|
+
pad(avg(runs, r => r.reads).toFixed(1), 6),
|
|
85
|
+
pad(k(avg(runs, r => r.cgOut)), 7),
|
|
86
|
+
pad(avg(runs, r => r.turns).toFixed(1), 6),
|
|
87
|
+
avg(runs, r => r.dur).toFixed(0) + 's',
|
|
88
|
+
);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// ---- aggregate per arm (flow arms A–D over the flow repos; E shown apart) ----
|
|
93
|
+
console.log('\n=== AGGREGATE PER ARM (mean across repos) ===');
|
|
94
|
+
console.log(pad('arm', 16), pad('adoption', 9), pad('reads', 7), pad('greps', 7), pad('cgOutK', 8), pad('turns', 7), pad('dur', 6), 'cost');
|
|
95
|
+
for (const arm of ARMS) {
|
|
96
|
+
const all = [];
|
|
97
|
+
for (const repo of Object.keys(data)) for (const r of (data[repo][arm] || [])) all.push({ ...r, repo });
|
|
98
|
+
if (!all.length) continue;
|
|
99
|
+
const repos = new Set(all.map(r => r.repo)).size;
|
|
100
|
+
const adopt = all.filter(r => r.traceUsed).length;
|
|
101
|
+
console.log(
|
|
102
|
+
pad(LABEL[arm], 16),
|
|
103
|
+
pad(`${adopt}/${all.length}`, 9),
|
|
104
|
+
pad(avg(all, r => r.reads).toFixed(2), 7),
|
|
105
|
+
pad(avg(all, r => r.greps).toFixed(2), 7),
|
|
106
|
+
pad(k(avg(all, r => r.cgOut)), 8),
|
|
107
|
+
pad(avg(all, r => r.turns).toFixed(1), 7),
|
|
108
|
+
pad(avg(all, r => r.dur).toFixed(0) + 's', 6),
|
|
109
|
+
'$' + avg(all, r => r.cost).toFixed(3),
|
|
110
|
+
` (${repos} repos)`,
|
|
111
|
+
);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
console.log('\nRead the signal: B vs A = does steering alone fix adoption + cut payload.');
|
|
115
|
+
console.log('C vs B = is explore redundant (reads should NOT jump). D vs C = is context redundant.');
|
|
116
|
+
console.log('E = non-flow under trace-centric; reads SHOULD jump (proves survey tools are load-bearing).');
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Aggregate the README A/B (bench-readme.sh output): per repo, median of N runs
|
|
3
|
+
// per arm → time, tool calls, tokens, cost, and % saved. Plus an average row.
|
|
4
|
+
//
|
|
5
|
+
// Tokens = SUM of per-turn assistant `usage` (input + output + cache read +
|
|
6
|
+
// cache creation) — the cumulative "total tokens processed". NOTE: `result.usage`
|
|
7
|
+
// is last-turn-only in current Claude Code, so it under-counts badly; don't use it.
|
|
8
|
+
// `total_cost_usd` and `duration_ms` are already cumulative.
|
|
9
|
+
//
|
|
10
|
+
// Usage: node parse-bench-readme.mjs [/tmp/ab-readme]
|
|
11
|
+
import { readFileSync, existsSync, readdirSync } from 'fs';
|
|
12
|
+
import { join } from 'path';
|
|
13
|
+
const ROOT = process.argv[2] || '/tmp/ab-readme';
|
|
14
|
+
const REPOS = ['vscode', 'excalidraw', 'django', 'tokio', 'okhttp', 'gin', 'alamofire'];
|
|
15
|
+
|
|
16
|
+
function parse(file) {
|
|
17
|
+
if (!existsSync(file)) return null;
|
|
18
|
+
const L = readFileSync(file, 'utf8').split('\n').filter(Boolean);
|
|
19
|
+
let tools = 0, reads = 0, grep = 0, cg = 0, tokens = 0, r = null, raced = false;
|
|
20
|
+
for (const l of L) { let e; try { e = JSON.parse(l); } catch { continue; }
|
|
21
|
+
if (e.type === 'assistant') {
|
|
22
|
+
const u = e.message?.usage;
|
|
23
|
+
if (u) tokens += (u.input_tokens || 0) + (u.output_tokens || 0) + (u.cache_read_input_tokens || 0) + (u.cache_creation_input_tokens || 0);
|
|
24
|
+
for (const b of (e.message?.content || [])) if (b.type === 'tool_use') {
|
|
25
|
+
const n = b.name;
|
|
26
|
+
if (n === 'ToolSearch') continue;
|
|
27
|
+
tools++;
|
|
28
|
+
if (n === 'Read') reads++;
|
|
29
|
+
else if (n === 'Grep' || n === 'Glob') grep++;
|
|
30
|
+
else if (/codegraph/.test(n)) cg++;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
// MCP cold-start race: the headless agent fired before `codegraph serve --mcp`
|
|
34
|
+
// finished registering its tools, so early calls returned "No such tool
|
|
35
|
+
// available" and the agent floundered into grep/Read. That measures CodeGraph's
|
|
36
|
+
// startup latency, NOT its steady-state value — flag the run so the aggregate
|
|
37
|
+
// can exclude it (an artifact of headless first-turn timing, not the tool).
|
|
38
|
+
if (e.type === 'user') for (const b of (Array.isArray(e.message?.content) ? e.message.content : [])) {
|
|
39
|
+
if (b.type === 'tool_result') {
|
|
40
|
+
const t = Array.isArray(b.content) ? b.content.map(c => c.text || '').join('') : (b.content || '');
|
|
41
|
+
if (/No such tool available/.test(t)) raced = true;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
if (e.type === 'result') r = e;
|
|
45
|
+
}
|
|
46
|
+
if (!r || r.subtype !== 'success') return null;
|
|
47
|
+
return { dur: r.duration_ms / 1000, tools, reads, grep, cg, tokens, cost: r.total_cost_usd || 0, raced };
|
|
48
|
+
}
|
|
49
|
+
const median = (arr) => { const v = [...arr].sort((a, b) => a - b); const n = v.length; return n === 0 ? 0 : n % 2 ? v[(n - 1) / 2] : (v[n / 2 - 1] + v[n / 2]) / 2; };
|
|
50
|
+
const fmtTime = (s) => s >= 60 ? `${Math.floor(s / 60)}m ${Math.round(s % 60)}s` : `${Math.round(s)}s`;
|
|
51
|
+
const fmtTok = (t) => t >= 1e6 ? `${(t / 1e6).toFixed(1)}M` : `${Math.round(t / 1000)}k`;
|
|
52
|
+
const pct = (w, wo) => wo > 0 ? Math.round((1 - w / wo) * 100) : 0;
|
|
53
|
+
|
|
54
|
+
console.log('repo n(w/wo) time WITH→WITHOUT tools W→WO tokens W→WO (saved) cost W→WO (saved)');
|
|
55
|
+
const savings = { cost: [], tokens: [], time: [], tools: [] };
|
|
56
|
+
for (const repo of REPOS) {
|
|
57
|
+
const dir = join(ROOT, repo);
|
|
58
|
+
const runDirs = existsSync(dir) ? readdirSync(dir).filter(d => /^run\d+$/.test(d)) : [];
|
|
59
|
+
// Exclude MCP-cold-start-raced WITH runs by default — they measure a startup
|
|
60
|
+
// race, not steady-state value. `CG_INCLUDE_RACED=1` keeps them (to see the raw
|
|
61
|
+
// distribution). The WITHOUT arm has no MCP, so it's never raced.
|
|
62
|
+
const includeRaced = process.env.CG_INCLUDE_RACED === '1';
|
|
63
|
+
const W = [], WO = []; let racedExcluded = 0;
|
|
64
|
+
for (const rd of runDirs) {
|
|
65
|
+
const w = parse(join(dir, rd, 'run-headless-with.jsonl'));
|
|
66
|
+
if (w) { if (w.raced && !includeRaced) racedExcluded++; else W.push(w); }
|
|
67
|
+
const wo = parse(join(dir, rd, 'run-headless-without.jsonl')); if (wo) WO.push(wo);
|
|
68
|
+
}
|
|
69
|
+
if (!W.length || !WO.length) { console.log(`${repo.padEnd(11)} (incomplete: w=${W.length} wo=${WO.length})`); continue; }
|
|
70
|
+
const m = (arr, k) => median(arr.map(x => x[k]));
|
|
71
|
+
const wT = m(W, 'dur'), woT = m(WO, 'dur'), wTok = m(W, 'tokens'), woTok = m(WO, 'tokens');
|
|
72
|
+
const wC = m(W, 'cost'), woC = m(WO, 'cost'), wTl = m(W, 'tools'), woTl = m(WO, 'tools');
|
|
73
|
+
savings.time.push(pct(wT, woT)); savings.tokens.push(pct(wTok, woTok)); savings.cost.push(pct(wC, woC)); savings.tools.push(pct(wTl, woTl));
|
|
74
|
+
console.log(
|
|
75
|
+
`${repo.padEnd(11)} ${W.length}/${WO.length} ` +
|
|
76
|
+
`${(fmtTime(wT) + '→' + fmtTime(woT)).padEnd(22)}` +
|
|
77
|
+
`${(Math.round(wTl) + '→' + Math.round(woTl)).padEnd(12)}` +
|
|
78
|
+
`${(fmtTok(wTok) + '→' + fmtTok(woTok) + ' (' + pct(wTok, woTok) + '%)').padEnd(24)}` +
|
|
79
|
+
`$${wC.toFixed(2)}→$${woC.toFixed(2)} (${pct(wC, woC)}%)` +
|
|
80
|
+
(racedExcluded ? ` [${racedExcluded} raced run${racedExcluded === 1 ? '' : 's'} excluded]` : '')
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
const avg = (a) => a.length ? Math.round(a.reduce((s, x) => s + x, 0) / a.length) : 0;
|
|
84
|
+
console.log(`\nAVERAGE saved: cost ${avg(savings.cost)}% · tokens ${avg(savings.tokens)}% · time ${avg(savings.time)}% · tool calls ${avg(savings.tools)}%`);
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Probe codegraph_context (with call-paths) against an index using the built dist.
|
|
3
|
+
// Usage: node probe-context.mjs <repo-with-.codegraph> <task words...>
|
|
4
|
+
import { pathToFileURL } from 'node:url';
|
|
5
|
+
import { resolve } from 'node:path';
|
|
6
|
+
|
|
7
|
+
const [, , repo, ...taskParts] = process.argv;
|
|
8
|
+
const task = taskParts.join(' ');
|
|
9
|
+
if (!repo || !task) { console.error('usage: probe-context.mjs <repo> <task...>'); process.exit(1); }
|
|
10
|
+
|
|
11
|
+
const load = async (rel) => import(pathToFileURL(resolve(rel)).href);
|
|
12
|
+
const idx = await load('dist/index.js');
|
|
13
|
+
const tools = await load('dist/mcp/tools.js');
|
|
14
|
+
const CodeGraph = idx.default?.default ?? idx.default ?? idx.CodeGraph;
|
|
15
|
+
const ToolHandler = tools.ToolHandler ?? tools.default?.ToolHandler;
|
|
16
|
+
|
|
17
|
+
const cg = CodeGraph.openSync(repo);
|
|
18
|
+
const h = new ToolHandler(cg);
|
|
19
|
+
const res = await h.execute('codegraph_context', { task });
|
|
20
|
+
console.log(res.content?.[0]?.text ?? '(no text)');
|
|
21
|
+
try { cg.close?.(); } catch {}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// One-shot probe: run handleExplore against an existing index using the built
|
|
3
|
+
// dist, print the output + a few stats. Lets us verify explore's coverage fix
|
|
4
|
+
// without a full agent run. Usage: node probe-explore.mjs <repo-with-.codegraph> "<query>"
|
|
5
|
+
import { pathToFileURL } from 'node:url';
|
|
6
|
+
import { resolve } from 'node:path';
|
|
7
|
+
|
|
8
|
+
const [, , repo, query] = process.argv;
|
|
9
|
+
if (!repo || !query) {
|
|
10
|
+
console.error('usage: probe-explore.mjs <repo> "<query>"');
|
|
11
|
+
process.exit(1);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const load = async (rel) => import(pathToFileURL(resolve(rel)).href);
|
|
15
|
+
const idx = await load('dist/index.js');
|
|
16
|
+
const tools = await load('dist/mcp/tools.js');
|
|
17
|
+
|
|
18
|
+
// esModuleInterop: dynamic import of CJS yields { default: module.exports, ...named }
|
|
19
|
+
const CodeGraph = idx.default?.default ?? idx.default ?? idx.CodeGraph;
|
|
20
|
+
const ToolHandler = tools.ToolHandler ?? tools.default?.ToolHandler;
|
|
21
|
+
|
|
22
|
+
if (typeof CodeGraph?.openSync !== 'function') {
|
|
23
|
+
console.error('could not resolve CodeGraph.openSync; index keys:', Object.keys(idx), 'default keys:', idx.default && Object.keys(idx.default));
|
|
24
|
+
process.exit(2);
|
|
25
|
+
}
|
|
26
|
+
if (typeof ToolHandler !== 'function') {
|
|
27
|
+
console.error('could not resolve ToolHandler; tools keys:', Object.keys(tools));
|
|
28
|
+
process.exit(2);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const cg = CodeGraph.openSync(repo);
|
|
32
|
+
const h = new ToolHandler(cg);
|
|
33
|
+
const res = await h.execute('codegraph_explore', { query });
|
|
34
|
+
const text = res.content?.[0]?.text ?? '(no text)';
|
|
35
|
+
console.log(text);
|
|
36
|
+
console.error('\n--- PROBE STATS ---');
|
|
37
|
+
console.error('output chars:', text.length);
|
|
38
|
+
console.error('triggerRender body present (-> setState({})):', /triggerRender[\s\S]{0,400}setState\(\{\}\)/.test(text));
|
|
39
|
+
console.error('App.tsx in source section:', /#### .*App\.tsx —/.test(text));
|
|
40
|
+
try { cg.close?.(); } catch {}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Probe codegraph_node (with trail) against an index using the built dist.
|
|
3
|
+
// Usage: node probe-node.mjs <repo-with-.codegraph> <symbol> [code]
|
|
4
|
+
import { pathToFileURL } from 'node:url';
|
|
5
|
+
import { resolve } from 'node:path';
|
|
6
|
+
|
|
7
|
+
const [, , repo, symbol, code] = process.argv;
|
|
8
|
+
if (!repo || !symbol) { console.error('usage: probe-node.mjs <repo> <symbol> [code]'); process.exit(1); }
|
|
9
|
+
|
|
10
|
+
const load = async (rel) => import(pathToFileURL(resolve(rel)).href);
|
|
11
|
+
const idx = await load('dist/index.js');
|
|
12
|
+
const tools = await load('dist/mcp/tools.js');
|
|
13
|
+
const CodeGraph = idx.default?.default ?? idx.default ?? idx.CodeGraph;
|
|
14
|
+
const ToolHandler = tools.ToolHandler ?? tools.default?.ToolHandler;
|
|
15
|
+
|
|
16
|
+
const cg = CodeGraph.openSync(repo);
|
|
17
|
+
const h = new ToolHandler(cg);
|
|
18
|
+
const res = await h.execute('codegraph_node', { symbol, includeCode: code === 'code' });
|
|
19
|
+
console.log(res.content?.[0]?.text ?? '(no text)');
|
|
20
|
+
try { cg.close?.(); } catch {}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// probe-sweep — direct MCP test across N repos × N tools, no claude needed.
|
|
3
|
+
//
|
|
4
|
+
// Measures response characteristics (size, sections present, signals fired)
|
|
5
|
+
// for each (repo, query) pair against the built dist/. Sub-second per probe;
|
|
6
|
+
// the full sweep below runs in ~10-30s vs hours for a real claude audit.
|
|
7
|
+
//
|
|
8
|
+
// Use this to iterate on backend changes rapidly: change tools.ts /
|
|
9
|
+
// context-builder, npm run build, re-run probe-sweep, compare. Once a
|
|
10
|
+
// change looks good on probe metrics, run a focused claude audit for the
|
|
11
|
+
// few repos that matter to confirm end-to-end cost behavior.
|
|
12
|
+
//
|
|
13
|
+
// Usage: node scripts/agent-eval/probe-sweep.mjs [--tool=context|explore|trace] [--repos=a,b,c]
|
|
14
|
+
import { pathToFileURL } from 'node:url';
|
|
15
|
+
import { resolve } from 'node:path';
|
|
16
|
+
|
|
17
|
+
const args = Object.fromEntries(
|
|
18
|
+
process.argv.slice(2).map(a => a.startsWith('--') ? a.slice(2).split('=') : [a, true])
|
|
19
|
+
);
|
|
20
|
+
const TOOL = args.tool ?? 'context';
|
|
21
|
+
|
|
22
|
+
const load = (rel) => import(pathToFileURL(resolve(rel)).href);
|
|
23
|
+
const idx = await load('dist/index.js');
|
|
24
|
+
const tools = await load('dist/mcp/tools.js');
|
|
25
|
+
const CodeGraph = idx.default?.default ?? idx.default ?? idx.CodeGraph;
|
|
26
|
+
const ToolHandler = tools.ToolHandler ?? tools.default?.ToolHandler;
|
|
27
|
+
|
|
28
|
+
// Each entry: repo, query, optional 2nd arg for trace (from, to).
|
|
29
|
+
// The query is the same prompt used in the real claude audits, so probe
|
|
30
|
+
// output is directly comparable to the agent's would-be input.
|
|
31
|
+
const SWEEP = [
|
|
32
|
+
// Small realworld template repos (the loss cases from the cross-language sweep)
|
|
33
|
+
{ id: 'gin-rw', repo: '/tmp/codegraph-corpus/gin-realworld', q: 'How does this Gin app route a request through its middleware chain to a handler?' },
|
|
34
|
+
{ id: 'go-mux', repo: '/tmp/codegraph-corpus/go-mux', q: 'How does this gorilla/mux app route a request to its handler?' },
|
|
35
|
+
{ id: 'fastapi-rw', repo: '/tmp/codegraph-corpus/fastapi-realworld', q: 'How does FastAPI route a request through its dependencies to a handler?' },
|
|
36
|
+
{ id: 'spring-pc', repo: '/tmp/codegraph-corpus/spring-petclinic', q: 'How does Spring route an HTTP request to a controller method?' },
|
|
37
|
+
{ id: 'axum-rw', repo: '/tmp/codegraph-corpus/rust-axum-realworld', q: 'How does Axum route a request to its handler in this app?' },
|
|
38
|
+
{ id: 'express-rw', repo: '/tmp/codegraph-corpus/express-realworld', q: 'How does this Express app route a request through middleware to a handler?' },
|
|
39
|
+
{ id: 'kotlin-pc', repo: '/tmp/codegraph-corpus/kotlin-petclinic', q: 'How does the Kotlin Spring app route an HTTP request to its handler?' },
|
|
40
|
+
{ id: 'flask-mb', repo: '/tmp/codegraph-corpus/flask-microblog', q: 'How does this Flask app route a request to a view function?' },
|
|
41
|
+
{ id: 'vapor-tpl', repo: '/tmp/codegraph-corpus/vapor-template', q: 'How does Vapor route an HTTP request to its handler?' },
|
|
42
|
+
{ id: 'cpp-leveldb', repo: '/tmp/codegraph-corpus/cpp-leveldb', q: 'How does LevelDB handle a Put operation through to disk?' },
|
|
43
|
+
{ id: 'lualine', repo: '/tmp/codegraph-corpus/lualine.nvim', q: 'How does lualine assemble and render the statusline?' },
|
|
44
|
+
{ id: 'drupal-admin', repo: '/tmp/codegraph-corpus/drupal-admintoolbar', q: 'How does the Drupal admin toolbar module render its toolbar?' },
|
|
45
|
+
{ id: 'svelte-rw', repo: '/tmp/codegraph-corpus/svelte-realworld', q: 'How does this SvelteKit app route a request to a handler?' },
|
|
46
|
+
{ id: 'react-rw', repo: '/tmp/codegraph-corpus/react-realworld', q: 'How does this React app fetch and display articles?' },
|
|
47
|
+
{ id: 'rails-rw', repo: '/tmp/codegraph-corpus/rails-realworld', q: 'How does Rails route a request to a controller action?' },
|
|
48
|
+
{ id: 'flask-rest', repo: '/tmp/codegraph-corpus/flask-restful-realworld', q: 'How does Flask-RESTful route a request to a resource method?' },
|
|
49
|
+
{ id: 'laravel-rw', repo: '/tmp/codegraph-corpus/laravel-realworld', q: 'How does Laravel route a request to the controller method?' },
|
|
50
|
+
{ id: 'aspnet-rw', repo: '/tmp/codegraph-corpus/aspnet-realworld', q: 'How does ASP.NET route a request to the controller action?' },
|
|
51
|
+
// The iter7 wins/ties (to make sure we don't regress)
|
|
52
|
+
{ id: 'cobra', repo: '/tmp/codegraph-corpus/cobra', q: 'How does cobra parse commands and flags?' },
|
|
53
|
+
{ id: 'sinatra', repo: '/tmp/codegraph-corpus/sinatra', q: 'How does sinatra route a request to its handler?' },
|
|
54
|
+
{ id: 'slim', repo: '/tmp/codegraph-corpus/slim', q: 'How does slim route a request and apply middleware?' },
|
|
55
|
+
];
|
|
56
|
+
|
|
57
|
+
// Detect signals in response text — these are the levers we've added that
|
|
58
|
+
// otherwise only show up via "agent ran X more tool calls" downstream.
|
|
59
|
+
const detect = (text) => ({
|
|
60
|
+
hasEntryPoints: /^### Entry Points/m.test(text),
|
|
61
|
+
hasRelatedSymbols: /^### Related Symbols/m.test(text),
|
|
62
|
+
hasFlowTrace: /^## Inline flow trace/m.test(text),
|
|
63
|
+
hasRouteManifest: /^## Routing manifest/m.test(text),
|
|
64
|
+
hasTopHandler: /^### Top handler file/m.test(text),
|
|
65
|
+
hasSmallRepoTail: /This project is small/.test(text),
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
const filterRepos = args.repos ? new Set(String(args.repos).split(',')) : null;
|
|
69
|
+
const subjects = SWEEP.filter(s => !filterRepos || filterRepos.has(s.id));
|
|
70
|
+
|
|
71
|
+
const t0 = Date.now();
|
|
72
|
+
const rows = [];
|
|
73
|
+
for (const s of subjects) {
|
|
74
|
+
try {
|
|
75
|
+
const cg = CodeGraph.openSync(s.repo);
|
|
76
|
+
const handler = new ToolHandler(cg);
|
|
77
|
+
const t1 = Date.now();
|
|
78
|
+
const res = await handler.execute('codegraph_' + TOOL,
|
|
79
|
+
TOOL === 'context' ? { task: s.q } :
|
|
80
|
+
TOOL === 'explore' ? { query: s.q } : { from: 'main', to: 'main' });
|
|
81
|
+
const text = res.content?.[0]?.text ?? '';
|
|
82
|
+
const signals = detect(text);
|
|
83
|
+
rows.push({
|
|
84
|
+
id: s.id,
|
|
85
|
+
ms: Date.now() - t1,
|
|
86
|
+
chars: text.length,
|
|
87
|
+
lines: text.split('\n').length,
|
|
88
|
+
...signals,
|
|
89
|
+
});
|
|
90
|
+
try { cg.close?.(); } catch {}
|
|
91
|
+
} catch (e) {
|
|
92
|
+
rows.push({ id: s.id, error: String(e).slice(0, 80) });
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Pretty-print as a compact table.
|
|
97
|
+
const fmt = (r) =>
|
|
98
|
+
r.error
|
|
99
|
+
? ` ${r.id.padEnd(13)} ERROR: ${r.error}`
|
|
100
|
+
: ` ${r.id.padEnd(13)} ${String(r.chars).padStart(6)}c ${String(r.lines).padStart(4)}L ${String(r.ms).padStart(4)}ms` +
|
|
101
|
+
` ${r.hasEntryPoints ? 'EP ' : ' '}` +
|
|
102
|
+
`${r.hasFlowTrace ? 'TRC ' : ' '}` +
|
|
103
|
+
`${r.hasRouteManifest ? 'MAN ' : ' '}` +
|
|
104
|
+
`${r.hasTopHandler ? 'HND ' : ' '}` +
|
|
105
|
+
`${r.hasSmallRepoTail ? 'TAIL' : ' '}`;
|
|
106
|
+
console.log(`=== probe-sweep tool=${TOOL} n=${subjects.length} (${Date.now() - t0}ms total) ===`);
|
|
107
|
+
console.log(' id chars lines ms signals');
|
|
108
|
+
console.log(' ' + '-'.repeat(56));
|
|
109
|
+
for (const r of rows) console.log(fmt(r));
|
|
110
|
+
|
|
111
|
+
// Sum + medians for the size pillar
|
|
112
|
+
const sizes = rows.filter(r => !r.error).map(r => r.chars);
|
|
113
|
+
sizes.sort((a, b) => a - b);
|
|
114
|
+
const median = sizes[Math.floor(sizes.length / 2)];
|
|
115
|
+
const sum = sizes.reduce((a, b) => a + b, 0);
|
|
116
|
+
console.log(` ${'-'.repeat(64)}`);
|
|
117
|
+
console.log(` median=${median}c total=${sum}c ` +
|
|
118
|
+
`manifest=${rows.filter(r => r.hasRouteManifest).length}/${rows.filter(r => !r.error).length} ` +
|
|
119
|
+
`top-handler=${rows.filter(r => r.hasTopHandler).length}/${rows.filter(r => !r.error).length}`);
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Probe codegraph_trace against an index using the built dist.
|
|
3
|
+
// Usage: node probe-trace.mjs <repo-with-.codegraph> <from> <to>
|
|
4
|
+
import { pathToFileURL } from 'node:url';
|
|
5
|
+
import { resolve } from 'node:path';
|
|
6
|
+
|
|
7
|
+
const [, , repo, from, to] = process.argv;
|
|
8
|
+
if (!repo || !from || !to) { console.error('usage: probe-trace.mjs <repo> <from> <to>'); process.exit(1); }
|
|
9
|
+
|
|
10
|
+
const load = async (rel) => import(pathToFileURL(resolve(rel)).href);
|
|
11
|
+
const idx = await load('dist/index.js');
|
|
12
|
+
const tools = await load('dist/mcp/tools.js');
|
|
13
|
+
const CodeGraph = idx.default?.default ?? idx.default ?? idx.CodeGraph;
|
|
14
|
+
const ToolHandler = tools.ToolHandler ?? tools.default?.ToolHandler;
|
|
15
|
+
|
|
16
|
+
const cg = CodeGraph.openSync(repo);
|
|
17
|
+
const h = new ToolHandler(cg);
|
|
18
|
+
const res = await h.execute('codegraph_trace', { from, to });
|
|
19
|
+
console.log(res.content?.[0]?.text ?? '(no text)');
|
|
20
|
+
try { cg.close?.(); } catch {}
|