@stupidloud/codegraph 0.8.1 → 0.9.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +319 -152
- package/dist/bin/codegraph.d.ts +4 -0
- package/dist/bin/codegraph.d.ts.map +1 -1
- package/dist/bin/codegraph.js +354 -90
- package/dist/bin/codegraph.js.map +1 -1
- package/dist/bin/node-version-check.d.ts +17 -0
- package/dist/bin/node-version-check.d.ts.map +1 -1
- package/dist/bin/node-version-check.js +37 -0
- package/dist/bin/node-version-check.js.map +1 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +1 -11
- package/dist/config.js.map +1 -1
- package/dist/context/formatter.d.ts.map +1 -1
- package/dist/context/formatter.js +25 -6
- package/dist/context/formatter.js.map +1 -1
- package/dist/context/index.d.ts +22 -0
- package/dist/context/index.d.ts.map +1 -1
- package/dist/context/index.js +257 -6
- package/dist/context/index.js.map +1 -1
- package/dist/context/markers.d.ts +19 -0
- package/dist/context/markers.d.ts.map +1 -0
- package/dist/context/markers.js +22 -0
- package/dist/context/markers.js.map +1 -0
- package/dist/db/index.d.ts +30 -1
- package/dist/db/index.d.ts.map +1 -1
- package/dist/db/index.js +75 -25
- package/dist/db/index.js.map +1 -1
- package/dist/db/queries.d.ts +104 -0
- package/dist/db/queries.d.ts.map +1 -1
- package/dist/db/queries.js +328 -31
- package/dist/db/queries.js.map +1 -1
- package/dist/db/sqlite-adapter.d.ts +24 -23
- package/dist/db/sqlite-adapter.d.ts.map +1 -1
- package/dist/db/sqlite-adapter.js +54 -174
- package/dist/db/sqlite-adapter.js.map +1 -1
- package/dist/directory.d.ts.map +1 -1
- package/dist/directory.js +6 -20
- package/dist/directory.js.map +1 -1
- package/dist/extraction/generated-detection.d.ts +30 -0
- package/dist/extraction/generated-detection.d.ts.map +1 -0
- package/dist/extraction/generated-detection.js +80 -0
- package/dist/extraction/generated-detection.js.map +1 -0
- package/dist/extraction/grammars.d.ts +23 -1
- package/dist/extraction/grammars.d.ts.map +1 -1
- package/dist/extraction/grammars.js +107 -3
- package/dist/extraction/grammars.js.map +1 -1
- package/dist/extraction/index.d.ts +22 -14
- package/dist/extraction/index.d.ts.map +1 -1
- package/dist/extraction/index.js +272 -183
- package/dist/extraction/index.js.map +1 -1
- package/dist/extraction/languages/c-cpp.d.ts.map +1 -1
- package/dist/extraction/languages/c-cpp.js +45 -0
- package/dist/extraction/languages/c-cpp.js.map +1 -1
- package/dist/extraction/languages/csharp.d.ts.map +1 -1
- package/dist/extraction/languages/csharp.js +2 -1
- package/dist/extraction/languages/csharp.js.map +1 -1
- package/dist/extraction/languages/go.d.ts.map +1 -1
- package/dist/extraction/languages/go.js +18 -2
- package/dist/extraction/languages/go.js.map +1 -1
- package/dist/extraction/languages/index.d.ts.map +1 -1
- package/dist/extraction/languages/index.js +6 -0
- package/dist/extraction/languages/index.js.map +1 -1
- package/dist/extraction/languages/java.d.ts.map +1 -1
- package/dist/extraction/languages/java.js +6 -0
- package/dist/extraction/languages/java.js.map +1 -1
- package/dist/extraction/languages/kotlin.d.ts.map +1 -1
- package/dist/extraction/languages/kotlin.js +6 -0
- package/dist/extraction/languages/kotlin.js.map +1 -1
- package/dist/extraction/languages/lua.d.ts +3 -0
- package/dist/extraction/languages/lua.d.ts.map +1 -0
- package/dist/extraction/languages/lua.js +150 -0
- package/dist/extraction/languages/lua.js.map +1 -0
- package/dist/extraction/languages/luau.d.ts +3 -0
- package/dist/extraction/languages/luau.d.ts.map +1 -0
- package/dist/extraction/languages/luau.js +37 -0
- package/dist/extraction/languages/luau.js.map +1 -0
- package/dist/extraction/languages/objc.d.ts +3 -0
- package/dist/extraction/languages/objc.d.ts.map +1 -0
- package/dist/extraction/languages/objc.js +133 -0
- package/dist/extraction/languages/objc.js.map +1 -0
- package/dist/extraction/mybatis-extractor.d.ts +48 -0
- package/dist/extraction/mybatis-extractor.d.ts.map +1 -0
- package/dist/extraction/mybatis-extractor.js +198 -0
- package/dist/extraction/mybatis-extractor.js.map +1 -0
- package/dist/extraction/tree-sitter-types.d.ts +14 -0
- package/dist/extraction/tree-sitter-types.d.ts.map +1 -1
- package/dist/extraction/tree-sitter.d.ts +84 -0
- package/dist/extraction/tree-sitter.d.ts.map +1 -1
- package/dist/extraction/tree-sitter.js +715 -16
- package/dist/extraction/tree-sitter.js.map +1 -1
- package/dist/extraction/vue-extractor.d.ts +15 -0
- package/dist/extraction/vue-extractor.d.ts.map +1 -1
- package/dist/extraction/vue-extractor.js +88 -0
- package/dist/extraction/vue-extractor.js.map +1 -1
- package/dist/extraction/wasm/tree-sitter-lua.wasm +0 -0
- package/dist/extraction/wasm/tree-sitter-luau.wasm +0 -0
- package/dist/extraction/wasm-runtime-flags.d.ts +38 -0
- package/dist/extraction/wasm-runtime-flags.d.ts.map +1 -0
- package/dist/extraction/wasm-runtime-flags.js +106 -0
- package/dist/extraction/wasm-runtime-flags.js.map +1 -0
- package/dist/graph/traversal.d.ts.map +1 -1
- package/dist/graph/traversal.js +76 -38
- package/dist/graph/traversal.js.map +1 -1
- package/dist/index.d.ts +77 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +133 -19
- package/dist/index.js.map +1 -1
- package/dist/installer/config-writer.d.ts +7 -8
- package/dist/installer/config-writer.d.ts.map +1 -1
- package/dist/installer/config-writer.js +7 -27
- package/dist/installer/config-writer.js.map +1 -1
- package/dist/installer/index.d.ts +51 -16
- package/dist/installer/index.d.ts.map +1 -1
- package/dist/installer/index.js +120 -29
- package/dist/installer/index.js.map +1 -1
- package/dist/installer/instructions-template.d.ts +11 -21
- package/dist/installer/instructions-template.d.ts.map +1 -1
- package/dist/installer/instructions-template.js +12 -56
- package/dist/installer/instructions-template.js.map +1 -1
- package/dist/installer/targets/antigravity.d.ts +57 -0
- package/dist/installer/targets/antigravity.d.ts.map +1 -0
- package/dist/installer/targets/antigravity.js +308 -0
- package/dist/installer/targets/antigravity.js.map +1 -0
- package/dist/installer/targets/claude.d.ts +26 -1
- package/dist/installer/targets/claude.d.ts.map +1 -1
- package/dist/installer/targets/claude.js +118 -40
- package/dist/installer/targets/claude.js.map +1 -1
- package/dist/installer/targets/codex.d.ts.map +1 -1
- package/dist/installer/targets/codex.js +15 -13
- package/dist/installer/targets/codex.js.map +1 -1
- package/dist/installer/targets/cursor.d.ts.map +1 -1
- package/dist/installer/targets/cursor.js +61 -36
- package/dist/installer/targets/cursor.js.map +1 -1
- package/dist/installer/targets/gemini.d.ts +26 -0
- package/dist/installer/targets/gemini.d.ts.map +1 -0
- package/dist/installer/targets/gemini.js +167 -0
- package/dist/installer/targets/gemini.js.map +1 -0
- package/dist/installer/targets/hermes.d.ts +18 -0
- package/dist/installer/targets/hermes.d.ts.map +1 -0
- package/dist/installer/targets/hermes.js +359 -0
- package/dist/installer/targets/hermes.js.map +1 -0
- package/dist/installer/targets/kiro.d.ts +27 -0
- package/dist/installer/targets/kiro.d.ts.map +1 -0
- package/dist/installer/targets/kiro.js +178 -0
- package/dist/installer/targets/kiro.js.map +1 -0
- package/dist/installer/targets/opencode.d.ts.map +1 -1
- package/dist/installer/targets/opencode.js +15 -13
- package/dist/installer/targets/opencode.js.map +1 -1
- package/dist/installer/targets/registry.d.ts.map +1 -1
- package/dist/installer/targets/registry.js +8 -0
- package/dist/installer/targets/registry.js.map +1 -1
- package/dist/installer/targets/shared.d.ts.map +1 -1
- package/dist/installer/targets/shared.js +3 -2
- package/dist/installer/targets/shared.js.map +1 -1
- package/dist/installer/targets/types.d.ts +1 -16
- package/dist/installer/targets/types.d.ts.map +1 -1
- package/dist/mcp/daemon-paths.d.ts +46 -0
- package/dist/mcp/daemon-paths.d.ts.map +1 -0
- package/dist/mcp/daemon-paths.js +125 -0
- package/dist/mcp/daemon-paths.js.map +1 -0
- package/dist/mcp/daemon.d.ts +161 -0
- package/dist/mcp/daemon.d.ts.map +1 -0
- package/dist/mcp/daemon.js +403 -0
- package/dist/mcp/daemon.js.map +1 -0
- package/dist/mcp/engine.d.ts +105 -0
- package/dist/mcp/engine.d.ts.map +1 -0
- package/dist/mcp/engine.js +270 -0
- package/dist/mcp/engine.js.map +1 -0
- package/dist/mcp/index.d.ts +70 -52
- package/dist/mcp/index.d.ts.map +1 -1
- package/dist/mcp/index.js +355 -331
- package/dist/mcp/index.js.map +1 -1
- package/dist/mcp/proxy.d.ts +81 -0
- package/dist/mcp/proxy.d.ts.map +1 -0
- package/dist/mcp/proxy.js +510 -0
- package/dist/mcp/proxy.js.map +1 -0
- package/dist/mcp/server-instructions.d.ts +1 -1
- package/dist/mcp/server-instructions.d.ts.map +1 -1
- package/dist/mcp/server-instructions.js +21 -21
- package/dist/mcp/session.d.ts +77 -0
- package/dist/mcp/session.d.ts.map +1 -0
- package/dist/mcp/session.js +294 -0
- package/dist/mcp/session.js.map +1 -0
- package/dist/mcp/tools.d.ts +171 -15
- package/dist/mcp/tools.d.ts.map +1 -1
- package/dist/mcp/tools.js +1714 -298
- package/dist/mcp/tools.js.map +1 -1
- package/dist/mcp/transport.d.ts +111 -29
- package/dist/mcp/transport.d.ts.map +1 -1
- package/dist/mcp/transport.js +181 -71
- package/dist/mcp/transport.js.map +1 -1
- package/dist/mcp/version.d.ts +19 -0
- package/dist/mcp/version.d.ts.map +1 -0
- package/dist/mcp/version.js +71 -0
- package/dist/mcp/version.js.map +1 -0
- package/dist/resolution/callback-synthesizer.d.ts +10 -0
- package/dist/resolution/callback-synthesizer.d.ts.map +1 -0
- package/dist/resolution/callback-synthesizer.js +1300 -0
- package/dist/resolution/callback-synthesizer.js.map +1 -0
- package/dist/resolution/frameworks/csharp.d.ts.map +1 -1
- package/dist/resolution/frameworks/csharp.js +36 -8
- package/dist/resolution/frameworks/csharp.js.map +1 -1
- package/dist/resolution/frameworks/drupal.d.ts +51 -0
- package/dist/resolution/frameworks/drupal.d.ts.map +1 -0
- package/dist/resolution/frameworks/drupal.js +367 -0
- package/dist/resolution/frameworks/drupal.js.map +1 -0
- package/dist/resolution/frameworks/expo-modules.d.ts +3 -0
- package/dist/resolution/frameworks/expo-modules.d.ts.map +1 -0
- package/dist/resolution/frameworks/expo-modules.js +143 -0
- package/dist/resolution/frameworks/expo-modules.js.map +1 -0
- package/dist/resolution/frameworks/express.d.ts.map +1 -1
- package/dist/resolution/frameworks/express.js +102 -19
- package/dist/resolution/frameworks/express.js.map +1 -1
- package/dist/resolution/frameworks/fabric.d.ts +3 -0
- package/dist/resolution/frameworks/fabric.d.ts.map +1 -0
- package/dist/resolution/frameworks/fabric.js +354 -0
- package/dist/resolution/frameworks/fabric.js.map +1 -0
- package/dist/resolution/frameworks/go.d.ts.map +1 -1
- package/dist/resolution/frameworks/go.js +6 -3
- package/dist/resolution/frameworks/go.js.map +1 -1
- package/dist/resolution/frameworks/index.d.ts +6 -0
- package/dist/resolution/frameworks/index.d.ts.map +1 -1
- package/dist/resolution/frameworks/index.js +29 -1
- package/dist/resolution/frameworks/index.js.map +1 -1
- package/dist/resolution/frameworks/java.d.ts.map +1 -1
- package/dist/resolution/frameworks/java.js +339 -12
- package/dist/resolution/frameworks/java.js.map +1 -1
- package/dist/resolution/frameworks/laravel.d.ts.map +1 -1
- package/dist/resolution/frameworks/laravel.js +17 -8
- package/dist/resolution/frameworks/laravel.js.map +1 -1
- package/dist/resolution/frameworks/nestjs.d.ts.map +1 -1
- package/dist/resolution/frameworks/nestjs.js +324 -0
- package/dist/resolution/frameworks/nestjs.js.map +1 -1
- package/dist/resolution/frameworks/play.d.ts +19 -0
- package/dist/resolution/frameworks/play.d.ts.map +1 -0
- package/dist/resolution/frameworks/play.js +111 -0
- package/dist/resolution/frameworks/play.js.map +1 -0
- package/dist/resolution/frameworks/python.d.ts.map +1 -1
- package/dist/resolution/frameworks/python.js +134 -16
- package/dist/resolution/frameworks/python.js.map +1 -1
- package/dist/resolution/frameworks/react-native.d.ts +3 -0
- package/dist/resolution/frameworks/react-native.d.ts.map +1 -0
- package/dist/resolution/frameworks/react-native.js +360 -0
- package/dist/resolution/frameworks/react-native.js.map +1 -0
- package/dist/resolution/frameworks/react.d.ts.map +1 -1
- package/dist/resolution/frameworks/react.js +96 -3
- package/dist/resolution/frameworks/react.js.map +1 -1
- package/dist/resolution/frameworks/ruby.d.ts.map +1 -1
- package/dist/resolution/frameworks/ruby.js +106 -2
- package/dist/resolution/frameworks/ruby.js.map +1 -1
- package/dist/resolution/frameworks/rust.d.ts.map +1 -1
- package/dist/resolution/frameworks/rust.js +102 -5
- package/dist/resolution/frameworks/rust.js.map +1 -1
- package/dist/resolution/frameworks/swift-objc.d.ts +37 -0
- package/dist/resolution/frameworks/swift-objc.d.ts.map +1 -0
- package/dist/resolution/frameworks/swift-objc.js +252 -0
- package/dist/resolution/frameworks/swift-objc.js.map +1 -0
- package/dist/resolution/frameworks/swift.d.ts.map +1 -1
- package/dist/resolution/frameworks/swift.js +30 -6
- package/dist/resolution/frameworks/swift.js.map +1 -1
- package/dist/resolution/go-module.d.ts +26 -0
- package/dist/resolution/go-module.d.ts.map +1 -0
- package/dist/resolution/go-module.js +78 -0
- package/dist/resolution/go-module.js.map +1 -0
- package/dist/resolution/import-resolver.d.ts +28 -0
- package/dist/resolution/import-resolver.d.ts.map +1 -1
- package/dist/resolution/import-resolver.js +617 -5
- package/dist/resolution/import-resolver.js.map +1 -1
- package/dist/resolution/index.d.ts +11 -0
- package/dist/resolution/index.d.ts.map +1 -1
- package/dist/resolution/index.js +196 -10
- package/dist/resolution/index.js.map +1 -1
- package/dist/resolution/lru-cache.d.ts +24 -0
- package/dist/resolution/lru-cache.d.ts.map +1 -0
- package/dist/resolution/lru-cache.js +62 -0
- package/dist/resolution/lru-cache.js.map +1 -0
- package/dist/resolution/name-matcher.d.ts.map +1 -1
- package/dist/resolution/name-matcher.js +212 -0
- package/dist/resolution/name-matcher.js.map +1 -1
- package/dist/resolution/swift-objc-bridge.d.ts +134 -0
- package/dist/resolution/swift-objc-bridge.d.ts.map +1 -0
- package/dist/resolution/swift-objc-bridge.js +256 -0
- package/dist/resolution/swift-objc-bridge.js.map +1 -0
- package/dist/resolution/types.d.ts +44 -0
- package/dist/resolution/types.d.ts.map +1 -1
- package/dist/resolution/workspace-packages.d.ts +48 -0
- package/dist/resolution/workspace-packages.d.ts.map +1 -0
- package/dist/resolution/workspace-packages.js +208 -0
- package/dist/resolution/workspace-packages.js.map +1 -0
- package/dist/search/query-utils.d.ts +18 -0
- package/dist/search/query-utils.d.ts.map +1 -1
- package/dist/search/query-utils.js +30 -0
- package/dist/search/query-utils.js.map +1 -1
- package/dist/sync/git-hooks.d.ts.map +1 -1
- package/dist/sync/git-hooks.js +2 -0
- package/dist/sync/git-hooks.js.map +1 -1
- package/dist/sync/index.d.ts +3 -1
- package/dist/sync/index.d.ts.map +1 -1
- package/dist/sync/index.js +8 -1
- package/dist/sync/index.js.map +1 -1
- package/dist/sync/watcher.d.ts +214 -12
- package/dist/sync/watcher.d.ts.map +1 -1
- package/dist/sync/watcher.js +467 -55
- package/dist/sync/watcher.js.map +1 -1
- package/dist/sync/worktree.d.ts +54 -0
- package/dist/sync/worktree.d.ts.map +1 -0
- package/dist/sync/worktree.js +137 -0
- package/dist/sync/worktree.js.map +1 -0
- package/dist/types.d.ts +9 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +14 -0
- package/dist/types.js.map +1 -1
- package/dist/utils.js +1 -1
- package/package.json +2 -2
- package/scripts/add-lang/bench.sh +60 -0
- package/scripts/add-lang/check-grammar.mjs +75 -0
- package/scripts/add-lang/dump-ast.mjs +103 -0
- package/scripts/add-lang/verify-extraction.mjs +70 -0
- package/scripts/agent-eval/arms-F.sh +21 -0
- package/scripts/agent-eval/arms-matrix.sh +37 -0
- package/scripts/agent-eval/bench-readme.sh +28 -0
- package/scripts/agent-eval/bench-why-repo.sh +22 -0
- package/scripts/agent-eval/block-read-hook.sh +19 -0
- package/scripts/agent-eval/hook-settings.json +15 -0
- package/scripts/agent-eval/itrun.sh +24 -11
- package/scripts/agent-eval/parse-arms.mjs +116 -0
- package/scripts/agent-eval/parse-bench-readme.mjs +84 -0
- package/scripts/agent-eval/probe-context.mjs +21 -0
- package/scripts/agent-eval/probe-explore.mjs +40 -0
- package/scripts/agent-eval/probe-node.mjs +20 -0
- package/scripts/agent-eval/probe-sweep.mjs +119 -0
- package/scripts/agent-eval/probe-trace.mjs +20 -0
- package/scripts/agent-eval/run-arms.sh +56 -0
- package/scripts/agent-eval/seq-matrix.mjs +137 -0
- package/scripts/build-bundle.sh +118 -0
- package/scripts/npm-sdk.js +75 -0
- package/scripts/npm-shim.js +246 -0
- package/scripts/pack-npm.sh +119 -0
- package/scripts/prepare-release.mjs +270 -0
- package/scripts/patch-tree-sitter-dart.js +0 -112
- package/scripts/release.sh +0 -68
package/dist/mcp/tools.js
CHANGED
|
@@ -4,52 +4,42 @@
|
|
|
4
4
|
*
|
|
5
5
|
* Defines the tools exposed by the CodeGraph MCP server.
|
|
6
6
|
*/
|
|
7
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
8
|
-
if (k2 === undefined) k2 = k;
|
|
9
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
10
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
11
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
12
|
-
}
|
|
13
|
-
Object.defineProperty(o, k2, desc);
|
|
14
|
-
}) : (function(o, m, k, k2) {
|
|
15
|
-
if (k2 === undefined) k2 = k;
|
|
16
|
-
o[k2] = m[k];
|
|
17
|
-
}));
|
|
18
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
19
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
20
|
-
}) : function(o, v) {
|
|
21
|
-
o["default"] = v;
|
|
22
|
-
});
|
|
23
|
-
var __importStar = (this && this.__importStar) || (function () {
|
|
24
|
-
var ownKeys = function(o) {
|
|
25
|
-
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
26
|
-
var ar = [];
|
|
27
|
-
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
28
|
-
return ar;
|
|
29
|
-
};
|
|
30
|
-
return ownKeys(o);
|
|
31
|
-
};
|
|
32
|
-
return function (mod) {
|
|
33
|
-
if (mod && mod.__esModule) return mod;
|
|
34
|
-
var result = {};
|
|
35
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
36
|
-
__setModuleDefault(result, mod);
|
|
37
|
-
return result;
|
|
38
|
-
};
|
|
39
|
-
})();
|
|
40
7
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
41
8
|
exports.ToolHandler = exports.tools = void 0;
|
|
42
9
|
exports.getExploreBudget = getExploreBudget;
|
|
43
10
|
exports.getExploreOutputBudget = getExploreOutputBudget;
|
|
44
|
-
|
|
45
|
-
|
|
11
|
+
exports.formatStaleBanner = formatStaleBanner;
|
|
12
|
+
exports.formatStaleFooter = formatStaleFooter;
|
|
13
|
+
exports.getStaticTools = getStaticTools;
|
|
14
|
+
const directory_1 = require("../directory");
|
|
15
|
+
// Lazy-load the heavy CodeGraph chain off the MCP startup path — see the same
|
|
16
|
+
// helper in engine.ts. ToolHandler must load to answer tools/list (static
|
|
17
|
+
// schemas), but it must NOT drag in sqlite/query layers before the daemon binds;
|
|
18
|
+
// CodeGraph is pulled in only when a tool actually opens a project. require() is
|
|
19
|
+
// sync + cached (CommonJS build).
|
|
20
|
+
const loadCodeGraph = () => require('../index').default;
|
|
21
|
+
const worktree_1 = require("../sync/worktree");
|
|
22
|
+
const query_utils_1 = require("../search/query-utils");
|
|
46
23
|
const fs_1 = require("fs");
|
|
47
24
|
const utils_1 = require("../utils");
|
|
48
|
-
const
|
|
25
|
+
const generated_detection_1 = require("../extraction/generated-detection");
|
|
49
26
|
const path_1 = require("path");
|
|
50
|
-
const db_1 = require("../db");
|
|
51
27
|
/** Maximum output length to prevent context bloat (characters) */
|
|
52
28
|
const MAX_OUTPUT_LENGTH = 15000;
|
|
29
|
+
/**
|
|
30
|
+
* Maximum length for free-form string inputs (query, task, symbol).
|
|
31
|
+
* Bounds memory and CPU when a buggy or hostile MCP client sends a
|
|
32
|
+
* huge payload — without this an attacker could ship a 100MB string
|
|
33
|
+
* and force a full FTS5 scan / OOM the server. 10 000 characters is
|
|
34
|
+
* far beyond any realistic legitimate query.
|
|
35
|
+
*/
|
|
36
|
+
const MAX_INPUT_LENGTH = 10_000;
|
|
37
|
+
/**
|
|
38
|
+
* Maximum length for path-like string inputs (projectPath, path
|
|
39
|
+
* filter, glob pattern). Paths beyond a few thousand chars are
|
|
40
|
+
* never legitimate and signal abuse or a bug upstream.
|
|
41
|
+
*/
|
|
42
|
+
const MAX_PATH_LENGTH = 4_096;
|
|
53
43
|
/**
|
|
54
44
|
* Rust path roots that have no file-system equivalent — `crate` is the
|
|
55
45
|
* current crate, `super` is the parent module, `self` is the current
|
|
@@ -89,38 +79,79 @@ function getExploreBudget(fileCount) {
|
|
|
89
79
|
return 5;
|
|
90
80
|
}
|
|
91
81
|
function getExploreOutputBudget(fileCount) {
|
|
82
|
+
// Tiered budget, scaled to project size. The budget is a CEILING (relevance
|
|
83
|
+
// still gates WHAT is included), and it MUST stay under the agent's INLINE
|
|
84
|
+
// tool-result cap (~25K chars). Above that, the host externalizes the result
|
|
85
|
+
// to a file the agent then Reads back — re-introducing a read AND the
|
|
86
|
+
// cache-write cost — which is exactly what a 35K vscode explore did in the
|
|
87
|
+
// n=4 README A/B. So even large repos cap at ~24K: the answer is the handful
|
|
88
|
+
// of ~100-line flow windows the agent would have grep-located and read (it
|
|
89
|
+
// natively reads ~6–9 files, median 100-line ranges), NOT a sprawl of 12
|
|
90
|
+
// files. Concentration onto the flow emerges from this cap + the named-file-
|
|
91
|
+
// first sort dropping peripheral files. Invariant: a larger tier must never
|
|
92
|
+
// get a smaller `maxCharsPerFile` than a smaller tier.
|
|
93
|
+
if (fileCount < 150) {
|
|
94
|
+
return {
|
|
95
|
+
// ITER3: revert iter2's aggressive body shrink (forced Read fallback —
|
|
96
|
+
// the per-file 2.5K cap pushed the agent to Read instead of node).
|
|
97
|
+
// Back to the iter1 shape (13K/4/3.8K) but keep the test-file
|
|
98
|
+
// hard-exclude. The cost lever for this tier lives in steering the
|
|
99
|
+
// agent to stop after 1-2 calls, not in this budget.
|
|
100
|
+
maxOutputChars: 13000,
|
|
101
|
+
defaultMaxFiles: 4,
|
|
102
|
+
maxCharsPerFile: 3800,
|
|
103
|
+
gapThreshold: 7,
|
|
104
|
+
maxSymbolsInFileHeader: 5,
|
|
105
|
+
maxEdgesPerRelationshipKind: 4,
|
|
106
|
+
includeRelationships: false,
|
|
107
|
+
includeAdditionalFiles: false,
|
|
108
|
+
includeCompletenessSignal: false,
|
|
109
|
+
includeBudgetNote: false,
|
|
110
|
+
excludeLowValueFiles: true,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
92
113
|
if (fileCount < 500) {
|
|
93
114
|
return {
|
|
115
|
+
// ITER3: same revert/keep-filter pattern as <150.
|
|
94
116
|
maxOutputChars: 18000,
|
|
95
117
|
defaultMaxFiles: 5,
|
|
96
118
|
maxCharsPerFile: 3800,
|
|
97
119
|
gapThreshold: 8,
|
|
98
120
|
maxSymbolsInFileHeader: 6,
|
|
99
121
|
maxEdgesPerRelationshipKind: 6,
|
|
100
|
-
includeRelationships:
|
|
122
|
+
includeRelationships: false,
|
|
101
123
|
includeAdditionalFiles: false,
|
|
102
124
|
includeCompletenessSignal: false,
|
|
103
125
|
includeBudgetNote: false,
|
|
126
|
+
excludeLowValueFiles: true,
|
|
104
127
|
};
|
|
105
128
|
}
|
|
106
129
|
if (fileCount < 5000) {
|
|
107
130
|
return {
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
131
|
+
// ~150-line per-file window (the native read unit) × ~6 files, capped at
|
|
132
|
+
// the ~24K inline ceiling so the response is never externalized. Per-file
|
|
133
|
+
// stays ≥ the <500 tier (3800) — monotonic.
|
|
134
|
+
maxOutputChars: 24000,
|
|
135
|
+
defaultMaxFiles: 8,
|
|
136
|
+
maxCharsPerFile: 6500,
|
|
137
|
+
gapThreshold: 12,
|
|
138
|
+
maxSymbolsInFileHeader: 10,
|
|
139
|
+
maxEdgesPerRelationshipKind: 10,
|
|
114
140
|
includeRelationships: true,
|
|
115
141
|
includeAdditionalFiles: true,
|
|
116
142
|
includeCompletenessSignal: true,
|
|
117
143
|
includeBudgetNote: true,
|
|
144
|
+
excludeLowValueFiles: false,
|
|
118
145
|
};
|
|
119
146
|
}
|
|
147
|
+
// Large + very-large repos: SAME ~24K inline ceiling (a bigger response just
|
|
148
|
+
// externalizes — see vscode). More files indexed → more CALLS via
|
|
149
|
+
// getExploreBudget, not a bigger single response. Per-file 7000 (≥ smaller
|
|
150
|
+
// tiers) gives the central file a ~180-line orientation window.
|
|
120
151
|
if (fileCount < 15000) {
|
|
121
152
|
return {
|
|
122
|
-
maxOutputChars:
|
|
123
|
-
defaultMaxFiles:
|
|
153
|
+
maxOutputChars: 24000,
|
|
154
|
+
defaultMaxFiles: 8,
|
|
124
155
|
maxCharsPerFile: 7000,
|
|
125
156
|
gapThreshold: 15,
|
|
126
157
|
maxSymbolsInFileHeader: 15,
|
|
@@ -129,11 +160,12 @@ function getExploreOutputBudget(fileCount) {
|
|
|
129
160
|
includeAdditionalFiles: true,
|
|
130
161
|
includeCompletenessSignal: true,
|
|
131
162
|
includeBudgetNote: true,
|
|
163
|
+
excludeLowValueFiles: false,
|
|
132
164
|
};
|
|
133
165
|
}
|
|
134
166
|
return {
|
|
135
|
-
maxOutputChars:
|
|
136
|
-
defaultMaxFiles:
|
|
167
|
+
maxOutputChars: 24000,
|
|
168
|
+
defaultMaxFiles: 8,
|
|
137
169
|
maxCharsPerFile: 7000,
|
|
138
170
|
gapThreshold: 15,
|
|
139
171
|
maxSymbolsInFileHeader: 15,
|
|
@@ -142,6 +174,7 @@ function getExploreOutputBudget(fileCount) {
|
|
|
142
174
|
includeAdditionalFiles: true,
|
|
143
175
|
includeCompletenessSignal: true,
|
|
144
176
|
includeBudgetNote: true,
|
|
177
|
+
excludeLowValueFiles: false,
|
|
145
178
|
};
|
|
146
179
|
}
|
|
147
180
|
/**
|
|
@@ -158,6 +191,21 @@ function getExploreOutputBudget(fileCount) {
|
|
|
158
191
|
function exploreLineNumbersEnabled() {
|
|
159
192
|
return process.env.CODEGRAPH_EXPLORE_LINENUMS !== '0';
|
|
160
193
|
}
|
|
194
|
+
/**
|
|
195
|
+
* Adaptive explore sizing (default ON). `codegraph_explore` skeletonizes OFF-SPINE
|
|
196
|
+
* polymorphic-sibling files — a file whose class is one of ≥3 interchangeable
|
|
197
|
+
* implementations of a shared interface (e.g. OkHttp's `: Interceptor` classes) —
|
|
198
|
+
* to class + member signatures (bodies elided), keeping the on-spine exemplar full.
|
|
199
|
+
* This sizes the response to the answer instead of the budget cap on sibling-heavy
|
|
200
|
+
* flows (OkHttp interceptor-chain explore 28.5k→16.6k, ~28% cheaper than native
|
|
201
|
+
* search, reads flat). It is PROVABLY INERT elsewhere: distinct pipeline steps (no
|
|
202
|
+
* ≥3-implementer supertype, e.g. Excalidraw's `renderStaticScene`) and on-spine
|
|
203
|
+
* files keep full source — output is byte-identical to shipped on excalidraw /
|
|
204
|
+
* tokio / django / vscode / gin. Set `CODEGRAPH_ADAPTIVE_EXPLORE=0` to disable.
|
|
205
|
+
*/
|
|
206
|
+
function adaptiveExploreEnabled() {
|
|
207
|
+
return process.env.CODEGRAPH_ADAPTIVE_EXPLORE !== '0' && process.env.CODEGRAPH_ADAPTIVE_EXPLORE !== 'false';
|
|
208
|
+
}
|
|
161
209
|
/**
|
|
162
210
|
* Prefix each line of a source slice with its 1-based line number, matching
|
|
163
211
|
* the Read tool's `cat -n` convention (number + tab) so the agent treats it
|
|
@@ -175,18 +223,40 @@ function numberSourceLines(slice, firstLineNumber) {
|
|
|
175
223
|
return out.join('\n');
|
|
176
224
|
}
|
|
177
225
|
/**
|
|
178
|
-
*
|
|
179
|
-
*
|
|
226
|
+
* Per-file staleness banner emitted at the top of a tool response when the
|
|
227
|
+
* file watcher has pending events for files referenced by the response.
|
|
228
|
+
* The agent uses this to fall back to Read for those specific files
|
|
229
|
+
* without waiting for the debounced sync (issue #403).
|
|
180
230
|
*/
|
|
181
|
-
function
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
const
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
231
|
+
function formatStaleBanner(stale) {
|
|
232
|
+
const now = Date.now();
|
|
233
|
+
const lines = stale.map((p) => {
|
|
234
|
+
const ageMs = Math.max(0, now - p.lastSeenMs);
|
|
235
|
+
const label = p.indexing ? 'indexing in progress' : 'pending sync';
|
|
236
|
+
return ` - ${p.path} (edited ${ageMs}ms ago, ${label})`;
|
|
237
|
+
});
|
|
238
|
+
return ('⚠️ Some files referenced below were edited since the last index sync — ' +
|
|
239
|
+
'their codegraph entries may be stale:\n' +
|
|
240
|
+
lines.join('\n') +
|
|
241
|
+
'\nFor accurate content of those specific files, Read them directly. ' +
|
|
242
|
+
'The rest of this response is fresh.');
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Compact footer listing pending files that are NOT referenced in this
|
|
246
|
+
* response. Gives the agent a complete project-wide freshness picture
|
|
247
|
+
* without bloating the main banner.
|
|
248
|
+
*/
|
|
249
|
+
function formatStaleFooter(stale) {
|
|
250
|
+
const MAX = 5;
|
|
251
|
+
const now = Date.now();
|
|
252
|
+
const shown = stale.slice(0, MAX);
|
|
253
|
+
const lines = shown.map((p) => {
|
|
254
|
+
const ageMs = Math.max(0, now - p.lastSeenMs);
|
|
255
|
+
return ` - ${p.path} (edited ${ageMs}ms ago)`;
|
|
256
|
+
});
|
|
257
|
+
const more = stale.length > MAX ? `\n - …and ${stale.length - MAX} more` : '';
|
|
258
|
+
return (`(Note: ${stale.length} file(s) elsewhere in this project are pending index ` +
|
|
259
|
+
`sync but were not referenced above:\n${lines.join('\n')}${more})`);
|
|
190
260
|
}
|
|
191
261
|
/**
|
|
192
262
|
* Common projectPath property for cross-project queries
|
|
@@ -198,15 +268,16 @@ const projectPathProperty = {
|
|
|
198
268
|
/**
|
|
199
269
|
* All CodeGraph MCP tools
|
|
200
270
|
*
|
|
201
|
-
* Designed for minimal context usage - use
|
|
202
|
-
* and only use other tools for
|
|
271
|
+
* Designed for minimal context usage - use codegraph_explore as the primary tool
|
|
272
|
+
* (one call usually answers the whole question), and only use other tools for
|
|
273
|
+
* targeted follow-up queries.
|
|
203
274
|
*
|
|
204
275
|
* All tools support cross-project queries via the optional `projectPath` parameter.
|
|
205
276
|
*/
|
|
206
277
|
exports.tools = [
|
|
207
278
|
{
|
|
208
279
|
name: 'codegraph_search',
|
|
209
|
-
description: 'Quick symbol search by name. Returns locations only (no code)
|
|
280
|
+
description: 'Quick symbol search by name. Returns locations only (no code). Use codegraph_explore instead to get the actual source / understand an area in one call.',
|
|
210
281
|
inputSchema: {
|
|
211
282
|
type: 'object',
|
|
212
283
|
properties: {
|
|
@@ -229,34 +300,9 @@ exports.tools = [
|
|
|
229
300
|
required: ['query'],
|
|
230
301
|
},
|
|
231
302
|
},
|
|
232
|
-
{
|
|
233
|
-
name: 'codegraph_context',
|
|
234
|
-
description: 'PRIMARY TOOL — call this FIRST for any "how does X work", architecture, feature, or bug-context question. Composes search + node + callers + callees and returns entry points, related symbols, and key code in ONE call — usually enough to answer with no further search/Read/Grep. Prefer this over chaining codegraph_search + codegraph_node, and over codegraph_explore. NOTE: provides CODE context, not product requirements; for new features still clarify UX/edge cases with the user.',
|
|
235
|
-
inputSchema: {
|
|
236
|
-
type: 'object',
|
|
237
|
-
properties: {
|
|
238
|
-
task: {
|
|
239
|
-
type: 'string',
|
|
240
|
-
description: 'Description of the task, bug, or feature to build context for',
|
|
241
|
-
},
|
|
242
|
-
maxNodes: {
|
|
243
|
-
type: 'number',
|
|
244
|
-
description: 'Maximum symbols to include (default: 20)',
|
|
245
|
-
default: 20,
|
|
246
|
-
},
|
|
247
|
-
includeCode: {
|
|
248
|
-
type: 'boolean',
|
|
249
|
-
description: 'Include code snippets for key symbols (default: true)',
|
|
250
|
-
default: true,
|
|
251
|
-
},
|
|
252
|
-
projectPath: projectPathProperty,
|
|
253
|
-
},
|
|
254
|
-
required: ['task'],
|
|
255
|
-
},
|
|
256
|
-
},
|
|
257
303
|
{
|
|
258
304
|
name: 'codegraph_callers',
|
|
259
|
-
description: '
|
|
305
|
+
description: 'List functions that call <symbol>. For the full flow, use codegraph_explore.',
|
|
260
306
|
inputSchema: {
|
|
261
307
|
type: 'object',
|
|
262
308
|
properties: {
|
|
@@ -276,7 +322,7 @@ exports.tools = [
|
|
|
276
322
|
},
|
|
277
323
|
{
|
|
278
324
|
name: 'codegraph_callees',
|
|
279
|
-
description: '
|
|
325
|
+
description: 'List functions that <symbol> calls. For the full flow, use codegraph_explore.',
|
|
280
326
|
inputSchema: {
|
|
281
327
|
type: 'object',
|
|
282
328
|
properties: {
|
|
@@ -296,7 +342,7 @@ exports.tools = [
|
|
|
296
342
|
},
|
|
297
343
|
{
|
|
298
344
|
name: 'codegraph_impact',
|
|
299
|
-
description: '
|
|
345
|
+
description: 'List symbols affected by changing <symbol>. Use before a refactor.',
|
|
300
346
|
inputSchema: {
|
|
301
347
|
type: 'object',
|
|
302
348
|
properties: {
|
|
@@ -316,7 +362,7 @@ exports.tools = [
|
|
|
316
362
|
},
|
|
317
363
|
{
|
|
318
364
|
name: 'codegraph_node',
|
|
319
|
-
description: '
|
|
365
|
+
description: 'SECONDARY (after codegraph_explore): get ONE symbol in full — its location, signature, callers/callees trail, and verbatim body (includeCode=true). When the name is AMBIGUOUS (an overloaded method, or the same method name on different types), it returns EVERY matching definition\'s full body in a single call — so you never need to Read a file to find the specific overload you want. For a heavily-overloaded name, pass `file` (and/or `line`) to pin the exact definition — e.g. the `file:line` a trail or another tool already showed you. Reach for this when explore trimmed a body you need. Use codegraph_explore for several related symbols or the full flow.',
|
|
320
366
|
inputSchema: {
|
|
321
367
|
type: 'object',
|
|
322
368
|
properties: {
|
|
@@ -329,6 +375,14 @@ exports.tools = [
|
|
|
329
375
|
description: 'Include full source code (default: false to minimize context)',
|
|
330
376
|
default: false,
|
|
331
377
|
},
|
|
378
|
+
file: {
|
|
379
|
+
type: 'string',
|
|
380
|
+
description: 'Optional: disambiguate an overloaded name to the definition in this file (path or basename, e.g. "harness.rs").',
|
|
381
|
+
},
|
|
382
|
+
line: {
|
|
383
|
+
type: 'number',
|
|
384
|
+
description: 'Optional: disambiguate to the definition at/around this line (use with the file:line a trail showed you).',
|
|
385
|
+
},
|
|
332
386
|
projectPath: projectPathProperty,
|
|
333
387
|
},
|
|
334
388
|
required: ['symbol'],
|
|
@@ -336,7 +390,7 @@ exports.tools = [
|
|
|
336
390
|
},
|
|
337
391
|
{
|
|
338
392
|
name: 'codegraph_explore',
|
|
339
|
-
description: 'PRIMARY TOOL
|
|
393
|
+
description: 'PRIMARY TOOL — call FIRST for almost any question: how does X work, architecture, a bug, where/what is X, or surveying an area. Returns the verbatim source of the relevant symbols grouped by file in ONE capped call (Read-equivalent — do NOT re-open shown files). Query can be a natural-language question OR a bag of symbol/file names. Usually the ONLY call you need — answers without further search/node/Read/Grep.',
|
|
340
394
|
inputSchema: {
|
|
341
395
|
type: 'object',
|
|
342
396
|
properties: {
|
|
@@ -356,7 +410,7 @@ exports.tools = [
|
|
|
356
410
|
},
|
|
357
411
|
{
|
|
358
412
|
name: 'codegraph_status',
|
|
359
|
-
description: '
|
|
413
|
+
description: 'Index health check (files / nodes / edges). Skip unless debugging.',
|
|
360
414
|
inputSchema: {
|
|
361
415
|
type: 'object',
|
|
362
416
|
properties: {
|
|
@@ -366,7 +420,7 @@ exports.tools = [
|
|
|
366
420
|
},
|
|
367
421
|
{
|
|
368
422
|
name: 'codegraph_files',
|
|
369
|
-
description: '
|
|
423
|
+
description: 'Indexed file tree with language + symbol counts. Faster than Glob for project layout.',
|
|
370
424
|
inputSchema: {
|
|
371
425
|
type: 'object',
|
|
372
426
|
properties: {
|
|
@@ -398,6 +452,19 @@ exports.tools = [
|
|
|
398
452
|
},
|
|
399
453
|
},
|
|
400
454
|
];
|
|
455
|
+
/**
|
|
456
|
+
* Allowlist-filtered tool definitions WITHOUT an engine — the static surface the
|
|
457
|
+
* proxy answers `tools/list` with before any project is open. Mirrors
|
|
458
|
+
* `ToolHandler.getTools()` in the no-CodeGraph case (the dynamic per-repo budget
|
|
459
|
+
* note in a description only adds once `cg` is loaded; the schemas are static).
|
|
460
|
+
*/
|
|
461
|
+
function getStaticTools() {
|
|
462
|
+
const raw = process.env.CODEGRAPH_MCP_TOOLS;
|
|
463
|
+
if (!raw || !raw.trim())
|
|
464
|
+
return exports.tools;
|
|
465
|
+
const allow = new Set(raw.split(',').map(s => s.trim().replace(/^codegraph_/, '')).filter(Boolean));
|
|
466
|
+
return allow.size ? exports.tools.filter(t => allow.has(t.name.replace(/^codegraph_/, ''))) : exports.tools;
|
|
467
|
+
}
|
|
401
468
|
/**
|
|
402
469
|
* Tool handler that executes tools against a CodeGraph instance
|
|
403
470
|
*
|
|
@@ -411,6 +478,20 @@ class ToolHandler {
|
|
|
411
478
|
// The directory the server last searched for a default project. Surfaced in
|
|
412
479
|
// the "not initialized" error so users can see why detection missed.
|
|
413
480
|
defaultProjectHint = null;
|
|
481
|
+
// Per-start-path cache of the git worktree/index mismatch (issue #155). The
|
|
482
|
+
// mismatch is a fixed property of (where the request came from → which
|
|
483
|
+
// .codegraph/ it resolves to), so the up-to-two `git rev-parse` spawns run
|
|
484
|
+
// once and every later tool call reuses the result — never shelling out to
|
|
485
|
+
// git on the hot path. `undefined` = not computed yet; `null` = no mismatch.
|
|
486
|
+
worktreeMismatchCache = new Map();
|
|
487
|
+
// Gate that the MCP engine pokes after `cg.open()` so the first tool call
|
|
488
|
+
// blocks on the post-open filesystem reconcile (catch-up sync). Without
|
|
489
|
+
// this, a tool call that races past `catchUpSync()` serves rows for files
|
|
490
|
+
// that were deleted (or edited) while no MCP server was running — and the
|
|
491
|
+
// per-file staleness banner can't help, because `getPendingFiles()` is
|
|
492
|
+
// populated by the watcher, not by catch-up. Cleared on first await so
|
|
493
|
+
// subsequent calls don't pay any cost.
|
|
494
|
+
catchUpGate = null;
|
|
414
495
|
constructor(cg) {
|
|
415
496
|
this.cg = cg;
|
|
416
497
|
}
|
|
@@ -420,6 +501,16 @@ class ToolHandler {
|
|
|
420
501
|
setDefaultCodeGraph(cg) {
|
|
421
502
|
this.cg = cg;
|
|
422
503
|
}
|
|
504
|
+
/**
|
|
505
|
+
* Engine-only: register the catch-up sync promise so the next `execute()`
|
|
506
|
+
* call awaits it before serving. The handler swallows rejections (the
|
|
507
|
+
* engine logs them) so a sync failure never propagates as a tool error;
|
|
508
|
+
* we still want to serve a best-effort result over the same potentially-
|
|
509
|
+
* stale data, which is what would have happened without the gate.
|
|
510
|
+
*/
|
|
511
|
+
setCatchUpGate(p) {
|
|
512
|
+
this.catchUpGate = p;
|
|
513
|
+
}
|
|
423
514
|
/**
|
|
424
515
|
* Record the directory the server tried to resolve the default project from.
|
|
425
516
|
* Used only to make the "no default project" error actionable.
|
|
@@ -433,18 +524,75 @@ class ToolHandler {
|
|
|
433
524
|
hasDefaultCodeGraph() {
|
|
434
525
|
return this.cg !== null;
|
|
435
526
|
}
|
|
527
|
+
/**
|
|
528
|
+
* Optional allowlist of exposed tools, parsed from the CODEGRAPH_MCP_TOOLS
|
|
529
|
+
* env var (comma-separated short names, e.g. "trace,search,node,context").
|
|
530
|
+
* Unset/empty → every tool is exposed. Lets an operator (or an A/B harness)
|
|
531
|
+
* trim the tool surface without rebuilding the client config; the ablated
|
|
532
|
+
* tool is then truly absent from ListTools rather than merely denied on call.
|
|
533
|
+
* Matching is on the short form, so "node" and "codegraph_node" both work.
|
|
534
|
+
*/
|
|
535
|
+
toolAllowlist() {
|
|
536
|
+
const raw = process.env.CODEGRAPH_MCP_TOOLS;
|
|
537
|
+
if (!raw || !raw.trim())
|
|
538
|
+
return null;
|
|
539
|
+
const short = (s) => s.trim().replace(/^codegraph_/, '');
|
|
540
|
+
const set = new Set(raw.split(',').map(short).filter(Boolean));
|
|
541
|
+
return set.size ? set : null;
|
|
542
|
+
}
|
|
543
|
+
/** Whether a tool name passes the CODEGRAPH_MCP_TOOLS allowlist (if any). */
|
|
544
|
+
isToolAllowed(name) {
|
|
545
|
+
const allow = this.toolAllowlist();
|
|
546
|
+
return !allow || allow.has(name.replace(/^codegraph_/, ''));
|
|
547
|
+
}
|
|
436
548
|
/**
|
|
437
549
|
* Get tool definitions with dynamic descriptions based on project size.
|
|
438
550
|
* The codegraph_explore tool description includes a budget recommendation
|
|
439
|
-
* scaled to the number of indexed files.
|
|
551
|
+
* scaled to the number of indexed files. Honors the CODEGRAPH_MCP_TOOLS
|
|
552
|
+
* allowlist so a trimmed surface is reflected in ListTools.
|
|
440
553
|
*/
|
|
441
554
|
getTools() {
|
|
555
|
+
const allow = this.toolAllowlist();
|
|
556
|
+
let visible = allow
|
|
557
|
+
? exports.tools.filter(t => allow.has(t.name.replace(/^codegraph_/, '')))
|
|
558
|
+
: exports.tools;
|
|
442
559
|
if (!this.cg)
|
|
443
|
-
return
|
|
560
|
+
return visible;
|
|
444
561
|
try {
|
|
445
562
|
const stats = this.cg.getStats();
|
|
446
563
|
const budget = getExploreBudget(stats.fileCount);
|
|
447
|
-
|
|
564
|
+
// Tiny-repo tool gating: on projects under TINY_REPO_FILE_THRESHOLD
|
|
565
|
+
// files, only expose the 5 core tools (search, context, node,
|
|
566
|
+
// explore, trace). The 5 omitted tools (callers, callees, impact,
|
|
567
|
+
// status, files) reduce to one grep at this scale.
|
|
568
|
+
//
|
|
569
|
+
// n=2 audits ruled out cutting below 5 tools:
|
|
570
|
+
// - 3-tool gate (search + context + trace): cost regressed on
|
|
571
|
+
// cobra/ky/sinatra. The agent fell back to raw Reads to cover
|
|
572
|
+
// what codegraph_node + codegraph_explore would have answered.
|
|
573
|
+
// - 1-tool gate (search only): catastrophic regression — express
|
|
574
|
+
// went from -43% WIN to +107% LOSS. With only search, the agent
|
|
575
|
+
// can't navigate the call graph structurally and reads everything.
|
|
576
|
+
//
|
|
577
|
+
// 5 is the empirical lower bound. Tools beyond search/context/
|
|
578
|
+
// node/explore/trace pay overhead that the agent doesn't recoup
|
|
579
|
+
// on tiny-repo flow questions.
|
|
580
|
+
// ITER4: raise threshold 150 → 500 so single-file frameworks
|
|
581
|
+
// (sinatra at 159, slim_framework around 200) also get the
|
|
582
|
+
// 5-tool surface. The empirical 5-tool floor was set on <150
|
|
583
|
+
// probes; iter3 measurement showed sinatra is structurally the
|
|
584
|
+
// SAME problem as cobra (single-file WITHOUT-arm Read wins),
|
|
585
|
+
// so it deserves the same gating.
|
|
586
|
+
const TINY_REPO_FILE_THRESHOLD = 500;
|
|
587
|
+
const TINY_REPO_CORE_TOOLS = new Set([
|
|
588
|
+
'codegraph_explore',
|
|
589
|
+
'codegraph_search',
|
|
590
|
+
'codegraph_node',
|
|
591
|
+
]);
|
|
592
|
+
if (stats.fileCount < TINY_REPO_FILE_THRESHOLD) {
|
|
593
|
+
visible = visible.filter(t => TINY_REPO_CORE_TOOLS.has(t.name));
|
|
594
|
+
}
|
|
595
|
+
return visible.map(tool => {
|
|
448
596
|
if (tool.name === 'codegraph_explore') {
|
|
449
597
|
return {
|
|
450
598
|
...tool,
|
|
@@ -455,7 +603,7 @@ class ToolHandler {
|
|
|
455
603
|
});
|
|
456
604
|
}
|
|
457
605
|
catch {
|
|
458
|
-
return
|
|
606
|
+
return visible;
|
|
459
607
|
}
|
|
460
608
|
}
|
|
461
609
|
/**
|
|
@@ -485,11 +633,32 @@ class ToolHandler {
|
|
|
485
633
|
if (this.projectCache.has(projectPath)) {
|
|
486
634
|
return this.projectCache.get(projectPath);
|
|
487
635
|
}
|
|
636
|
+
// Reject sensitive system directories before opening. Only validate a
|
|
637
|
+
// path that actually exists — a nested or not-yet-created sub-path of a
|
|
638
|
+
// real project must still be allowed to resolve UP to its .codegraph/
|
|
639
|
+
// root below (issue #238), so we don't run the existence-checking
|
|
640
|
+
// validator on paths that are meant to walk up.
|
|
641
|
+
if ((0, fs_1.existsSync)(projectPath)) {
|
|
642
|
+
const pathError = (0, utils_1.validateProjectPath)(projectPath);
|
|
643
|
+
if (pathError) {
|
|
644
|
+
throw new Error(pathError);
|
|
645
|
+
}
|
|
646
|
+
}
|
|
488
647
|
// Walk up parent directories to find nearest .codegraph/
|
|
489
|
-
const resolvedRoot = (0,
|
|
648
|
+
const resolvedRoot = (0, directory_1.findNearestCodeGraphRoot)(projectPath);
|
|
490
649
|
if (!resolvedRoot) {
|
|
491
650
|
throw new Error(`CodeGraph not initialized in ${projectPath}. Run 'codegraph init' in that project first.`);
|
|
492
651
|
}
|
|
652
|
+
// If the path resolves to the default project, reuse the already-open
|
|
653
|
+
// default instance rather than opening a SECOND connection to the same DB.
|
|
654
|
+
// A duplicate connection serializes reads against the watcher's auto-sync
|
|
655
|
+
// writes; on the wasm backend (no WAL) that surfaces as intermittent
|
|
656
|
+
// "database is locked" on concurrent tool calls. See issue #238. Deliberately
|
|
657
|
+
// not cached under projectPath — the server owns and closes the default
|
|
658
|
+
// instance, so routing it through projectCache.closeAll() would double-close it.
|
|
659
|
+
if (this.cg && this.cg.getProjectRoot() === resolvedRoot) {
|
|
660
|
+
return this.cg;
|
|
661
|
+
}
|
|
493
662
|
// Check if we already have this resolved root cached (different path, same project)
|
|
494
663
|
if (this.projectCache.has(resolvedRoot)) {
|
|
495
664
|
const cg = this.projectCache.get(resolvedRoot);
|
|
@@ -498,7 +667,7 @@ class ToolHandler {
|
|
|
498
667
|
return cg;
|
|
499
668
|
}
|
|
500
669
|
// Open and cache under both paths
|
|
501
|
-
const cg =
|
|
670
|
+
const cg = loadCodeGraph().openSync(resolvedRoot);
|
|
502
671
|
this.projectCache.set(resolvedRoot, cg);
|
|
503
672
|
if (projectPath !== resolvedRoot) {
|
|
504
673
|
this.projectCache.set(projectPath, cg);
|
|
@@ -513,43 +682,248 @@ class ToolHandler {
|
|
|
513
682
|
cg.close();
|
|
514
683
|
}
|
|
515
684
|
this.projectCache.clear();
|
|
685
|
+
this.worktreeMismatchCache.clear();
|
|
516
686
|
}
|
|
517
687
|
/**
|
|
518
|
-
* Validate that a value is a non-empty string
|
|
688
|
+
* Validate that a value is a non-empty string within length bounds.
|
|
689
|
+
*
|
|
690
|
+
* The `maxLength` cap protects against MCP clients that ship huge
|
|
691
|
+
* payloads (10MB+ query strings either by accident or maliciously).
|
|
692
|
+
* Without this, a single oversized input can pin the FTS5 index or
|
|
693
|
+
* exhaust memory before any real work runs.
|
|
519
694
|
*/
|
|
520
|
-
validateString(value, name) {
|
|
695
|
+
validateString(value, name, maxLength = MAX_INPUT_LENGTH) {
|
|
521
696
|
if (typeof value !== 'string' || value.length === 0) {
|
|
522
697
|
return this.errorResult(`${name} must be a non-empty string`);
|
|
523
698
|
}
|
|
699
|
+
if (value.length > maxLength) {
|
|
700
|
+
return this.errorResult(`${name} exceeds maximum length of ${maxLength} characters (got ${value.length})`);
|
|
701
|
+
}
|
|
524
702
|
return value;
|
|
525
703
|
}
|
|
704
|
+
/**
|
|
705
|
+
* Validate an optional path-like string input. Returns the value if
|
|
706
|
+
* valid (or undefined), or a ToolResult with the error.
|
|
707
|
+
*/
|
|
708
|
+
validateOptionalPath(value, name) {
|
|
709
|
+
if (value === undefined || value === null)
|
|
710
|
+
return undefined;
|
|
711
|
+
if (typeof value !== 'string') {
|
|
712
|
+
return this.errorResult(`${name} must be a string`);
|
|
713
|
+
}
|
|
714
|
+
if (value.length > MAX_PATH_LENGTH) {
|
|
715
|
+
return this.errorResult(`${name} exceeds maximum length of ${MAX_PATH_LENGTH} characters (got ${value.length})`);
|
|
716
|
+
}
|
|
717
|
+
return value;
|
|
718
|
+
}
|
|
719
|
+
/**
|
|
720
|
+
* Cached git worktree/index mismatch for a tool call's effective project.
|
|
721
|
+
*
|
|
722
|
+
* The "effective project" is what the request targets: an explicit
|
|
723
|
+
* `projectPath` arg, else the directory the server resolved its default
|
|
724
|
+
* project from (`defaultProjectHint`), else cwd. Memoized per start path —
|
|
725
|
+
* see `worktreeMismatchCache`. Best-effort: if the project can't be resolved
|
|
726
|
+
* (e.g. nothing initialized yet), it reports "no mismatch" so a tool is never
|
|
727
|
+
* broken by this check.
|
|
728
|
+
*/
|
|
729
|
+
worktreeMismatchFor(projectPath) {
|
|
730
|
+
const startPath = projectPath ?? this.defaultProjectHint ?? process.cwd();
|
|
731
|
+
const cached = this.worktreeMismatchCache.get(startPath);
|
|
732
|
+
if (cached !== undefined)
|
|
733
|
+
return cached;
|
|
734
|
+
let mismatch = null;
|
|
735
|
+
try {
|
|
736
|
+
mismatch = (0, worktree_1.detectWorktreeIndexMismatch)(startPath, this.getCodeGraph(projectPath).getProjectRoot());
|
|
737
|
+
}
|
|
738
|
+
catch {
|
|
739
|
+
// No resolvable project (or any other resolution error) → nothing to warn.
|
|
740
|
+
mismatch = null;
|
|
741
|
+
}
|
|
742
|
+
this.worktreeMismatchCache.set(startPath, mismatch);
|
|
743
|
+
return mismatch;
|
|
744
|
+
}
|
|
745
|
+
/**
|
|
746
|
+
* Prefix a successful read-tool result with a compact worktree-mismatch
|
|
747
|
+
* notice when the resolved index belongs to a different git working tree than
|
|
748
|
+
* the caller's (issue #155). Without this, an agent in a nested worktree
|
|
749
|
+
* silently trusts main-branch results. No-op on error results and when there
|
|
750
|
+
* is no mismatch. `codegraph_status` is excluded — it embeds its own verbose
|
|
751
|
+
* warning — so it stays out of this path.
|
|
752
|
+
*/
|
|
753
|
+
withWorktreeNotice(result, projectPath) {
|
|
754
|
+
if (result.isError)
|
|
755
|
+
return result;
|
|
756
|
+
const mismatch = this.worktreeMismatchFor(projectPath);
|
|
757
|
+
if (!mismatch)
|
|
758
|
+
return result;
|
|
759
|
+
const notice = (0, worktree_1.worktreeMismatchNotice)(mismatch);
|
|
760
|
+
const [first, ...rest] = result.content;
|
|
761
|
+
if (first && first.type === 'text') {
|
|
762
|
+
return { ...result, content: [{ type: 'text', text: `${notice}\n\n${first.text}` }, ...rest] };
|
|
763
|
+
}
|
|
764
|
+
return result;
|
|
765
|
+
}
|
|
766
|
+
/**
|
|
767
|
+
* Annotate a successful read-tool result with per-file staleness — the
|
|
768
|
+
* non-blocking answer to issue #403. The file watcher tracks every event
|
|
769
|
+
* it sees per path; here we intersect "files referenced in this response"
|
|
770
|
+
* against that pending set and prepend a compact banner so the agent can
|
|
771
|
+
* fall back to Read for those *specific* files without waiting for the
|
|
772
|
+
* debounced sync to fire. Other pending files in the project (not
|
|
773
|
+
* referenced by this response) get a small footer so the agent has a
|
|
774
|
+
* complete picture without bloating the banner.
|
|
775
|
+
*
|
|
776
|
+
* Cost when nothing is pending — the common case — is one boolean check.
|
|
777
|
+
* No I/O, no parsing of markdown beyond a per-pending-file substring scan.
|
|
778
|
+
*/
|
|
779
|
+
withStalenessNotice(result, projectPath) {
|
|
780
|
+
if (result.isError)
|
|
781
|
+
return result;
|
|
782
|
+
let cg;
|
|
783
|
+
try {
|
|
784
|
+
cg = this.getCodeGraph(projectPath);
|
|
785
|
+
}
|
|
786
|
+
catch {
|
|
787
|
+
return result; // no default project — leave as is
|
|
788
|
+
}
|
|
789
|
+
// Cross-project `projectPath` calls open a cached CodeGraph WITHOUT a
|
|
790
|
+
// watcher (watchers are only attached to the default session project).
|
|
791
|
+
// When the cross-project path happens to be the same project as the
|
|
792
|
+
// default cg, the cached instance is the wrong one — its pendingFiles is
|
|
793
|
+
// permanently empty. Detect the equal-path case and prefer the default
|
|
794
|
+
// cg so the staleness signal still fires when an agent passes the
|
|
795
|
+
// explicit projectPath form of its own project.
|
|
796
|
+
if (this.cg && cg !== this.cg) {
|
|
797
|
+
try {
|
|
798
|
+
const sameProject = (0, path_1.resolve)(this.cg.getProjectRoot()) === (0, path_1.resolve)(cg.getProjectRoot());
|
|
799
|
+
if (sameProject)
|
|
800
|
+
cg = this.cg;
|
|
801
|
+
}
|
|
802
|
+
catch {
|
|
803
|
+
/* getProjectRoot may throw on a closed instance — leave cg as is */
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
// Defensive: some test fakes inject a partial CodeGraph stub without the
|
|
807
|
+
// newer pending-files API. Treat missing/throwing as "no pending files."
|
|
808
|
+
let pending = [];
|
|
809
|
+
try {
|
|
810
|
+
pending = cg.getPendingFiles?.() ?? [];
|
|
811
|
+
}
|
|
812
|
+
catch {
|
|
813
|
+
return result;
|
|
814
|
+
}
|
|
815
|
+
if (pending.length === 0)
|
|
816
|
+
return result;
|
|
817
|
+
const [first, ...rest] = result.content;
|
|
818
|
+
if (!first || first.type !== 'text')
|
|
819
|
+
return result;
|
|
820
|
+
const text = first.text;
|
|
821
|
+
const inResponse = [];
|
|
822
|
+
const elsewhere = [];
|
|
823
|
+
for (const p of pending) {
|
|
824
|
+
// Substring match against the project-relative POSIX path — that's
|
|
825
|
+
// exactly the format both the watcher and every codegraph response
|
|
826
|
+
// emit, so a plain includes() is sufficient and avoids regex pitfalls.
|
|
827
|
+
if (text.includes(p.path))
|
|
828
|
+
inResponse.push(p);
|
|
829
|
+
else
|
|
830
|
+
elsewhere.push(p);
|
|
831
|
+
}
|
|
832
|
+
let banner = '';
|
|
833
|
+
if (inResponse.length > 0) {
|
|
834
|
+
banner = formatStaleBanner(inResponse);
|
|
835
|
+
}
|
|
836
|
+
let footer = '';
|
|
837
|
+
if (elsewhere.length > 0) {
|
|
838
|
+
footer = formatStaleFooter(elsewhere);
|
|
839
|
+
}
|
|
840
|
+
if (!banner && !footer)
|
|
841
|
+
return result;
|
|
842
|
+
const composed = [banner, text, footer].filter(Boolean).join('\n\n');
|
|
843
|
+
return { ...result, content: [{ type: 'text', text: composed }, ...rest] };
|
|
844
|
+
}
|
|
526
845
|
/**
|
|
527
846
|
* Execute a tool by name
|
|
528
847
|
*/
|
|
529
848
|
async execute(toolName, args) {
|
|
530
849
|
try {
|
|
850
|
+
// Block the first tool call on the engine's post-open reconcile so we
|
|
851
|
+
// never serve rows for files deleted/edited while no MCP server was
|
|
852
|
+
// running. The gate is cleared after first await — subsequent calls
|
|
853
|
+
// pay nothing. Catch-up failures are logged by the engine; we
|
|
854
|
+
// proceed regardless so a transient sync error never breaks tools.
|
|
855
|
+
if (this.catchUpGate) {
|
|
856
|
+
const gate = this.catchUpGate;
|
|
857
|
+
this.catchUpGate = null;
|
|
858
|
+
try {
|
|
859
|
+
await gate;
|
|
860
|
+
}
|
|
861
|
+
catch { /* engine already logged */ }
|
|
862
|
+
}
|
|
863
|
+
// Honor the optional tool allowlist (CODEGRAPH_MCP_TOOLS): a trimmed
|
|
864
|
+
// surface rejects ablated tools defensively even if a client cached them.
|
|
865
|
+
if (!this.isToolAllowed(toolName)) {
|
|
866
|
+
return this.errorResult(`Tool ${toolName} is disabled via CODEGRAPH_MCP_TOOLS`);
|
|
867
|
+
}
|
|
868
|
+
// Cross-cutting input validation. All tools accept an optional
|
|
869
|
+
// `projectPath` and most accept either `query`, `task`, or
|
|
870
|
+
// `symbol` — bound their lengths centrally so individual handlers
|
|
871
|
+
// can stay focused on tool-specific logic.
|
|
872
|
+
const pathCheck = this.validateOptionalPath(args.projectPath, 'projectPath');
|
|
873
|
+
if (typeof pathCheck === 'object' && pathCheck !== undefined) {
|
|
874
|
+
return pathCheck;
|
|
875
|
+
}
|
|
876
|
+
// The `path` and `pattern` properties used by codegraph_files are
|
|
877
|
+
// also path-shaped — apply the same cap.
|
|
878
|
+
if (args.path !== undefined) {
|
|
879
|
+
const check = this.validateOptionalPath(args.path, 'path');
|
|
880
|
+
if (typeof check === 'object' && check !== undefined)
|
|
881
|
+
return check;
|
|
882
|
+
}
|
|
883
|
+
if (args.pattern !== undefined) {
|
|
884
|
+
const check = this.validateOptionalPath(args.pattern, 'pattern');
|
|
885
|
+
if (typeof check === 'object' && check !== undefined)
|
|
886
|
+
return check;
|
|
887
|
+
}
|
|
888
|
+
// Read tools resolve through a single result variable so cross-cutting
|
|
889
|
+
// notices — worktree-index mismatch (issue #155) and per-file
|
|
890
|
+
// staleness (issue #403) — can be applied in one place. status embeds
|
|
891
|
+
// its own verbose worktree warning but still flows through the
|
|
892
|
+
// staleness wrapper so its pending-files section stays consistent
|
|
893
|
+
// with what the read tools surface.
|
|
894
|
+
let result;
|
|
531
895
|
switch (toolName) {
|
|
532
896
|
case 'codegraph_search':
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
return await this.handleContext(args);
|
|
897
|
+
result = await this.handleSearch(args);
|
|
898
|
+
break;
|
|
536
899
|
case 'codegraph_callers':
|
|
537
|
-
|
|
900
|
+
result = await this.handleCallers(args);
|
|
901
|
+
break;
|
|
538
902
|
case 'codegraph_callees':
|
|
539
|
-
|
|
903
|
+
result = await this.handleCallees(args);
|
|
904
|
+
break;
|
|
540
905
|
case 'codegraph_impact':
|
|
541
|
-
|
|
906
|
+
result = await this.handleImpact(args);
|
|
907
|
+
break;
|
|
542
908
|
case 'codegraph_explore':
|
|
543
|
-
|
|
909
|
+
result = await this.handleExplore(args);
|
|
910
|
+
break;
|
|
544
911
|
case 'codegraph_node':
|
|
545
|
-
|
|
912
|
+
result = await this.handleNode(args);
|
|
913
|
+
break;
|
|
546
914
|
case 'codegraph_status':
|
|
915
|
+
// status embeds the pending-files list as a first-class section
|
|
916
|
+
// (see handleStatus), so we skip the auto-banner wrapper here to
|
|
917
|
+
// avoid duplicating the same info at the top of the response.
|
|
547
918
|
return await this.handleStatus(args);
|
|
548
919
|
case 'codegraph_files':
|
|
549
|
-
|
|
920
|
+
result = await this.handleFiles(args);
|
|
921
|
+
break;
|
|
550
922
|
default:
|
|
551
923
|
return this.errorResult(`Unknown tool: ${toolName}`);
|
|
552
924
|
}
|
|
925
|
+
const withWorktree = this.withWorktreeNotice(result, args.projectPath);
|
|
926
|
+
return this.withStalenessNotice(withWorktree, args.projectPath);
|
|
553
927
|
}
|
|
554
928
|
catch (err) {
|
|
555
929
|
return this.errorResult(`Tool execution failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
@@ -573,66 +947,16 @@ class ToolHandler {
|
|
|
573
947
|
if (results.length === 0) {
|
|
574
948
|
return this.textResult(`No results found for "${query}"`);
|
|
575
949
|
}
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
const task = this.validateString(args.task, 'task');
|
|
584
|
-
if (typeof task !== 'string')
|
|
585
|
-
return task;
|
|
586
|
-
// Mark session as consulted (enables Grep/Glob/Bash)
|
|
587
|
-
const sessionId = process.env.CLAUDE_SESSION_ID;
|
|
588
|
-
if (sessionId) {
|
|
589
|
-
markSessionConsulted(sessionId);
|
|
590
|
-
}
|
|
591
|
-
const cg = this.getCodeGraph(args.projectPath);
|
|
592
|
-
const maxNodes = args.maxNodes || 20;
|
|
593
|
-
const includeCode = args.includeCode !== false;
|
|
594
|
-
const context = await cg.buildContext(task, {
|
|
595
|
-
maxNodes,
|
|
596
|
-
includeCode,
|
|
597
|
-
format: 'markdown',
|
|
950
|
+
// Down-rank generated files within the FTS-returned set so a search
|
|
951
|
+
// for "Send" surfaces the hand-written keeper before .pb.go stubs
|
|
952
|
+
// that share the name. Stable: only reorders generated vs. not.
|
|
953
|
+
const ranked = [...results].sort((a, b) => {
|
|
954
|
+
const aGen = (0, generated_detection_1.isGeneratedFile)(a.node.filePath) ? 1 : 0;
|
|
955
|
+
const bGen = (0, generated_detection_1.isGeneratedFile)(b.node.filePath) ? 1 : 0;
|
|
956
|
+
return aGen - bGen;
|
|
598
957
|
});
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
const reminder = isFeatureQuery
|
|
602
|
-
? '\n\n⚠️ **Ask user:** UX preferences, edge cases, acceptance criteria'
|
|
603
|
-
: '';
|
|
604
|
-
// buildContext returns string when format is 'markdown'
|
|
605
|
-
if (typeof context === 'string') {
|
|
606
|
-
return this.textResult(context + reminder);
|
|
607
|
-
}
|
|
608
|
-
// If it returns TaskContext, format it
|
|
609
|
-
return this.textResult(this.formatTaskContext(context) + reminder);
|
|
610
|
-
}
|
|
611
|
-
/**
|
|
612
|
-
* Heuristic to detect if a query looks like a feature request
|
|
613
|
-
*/
|
|
614
|
-
looksLikeFeatureRequest(task) {
|
|
615
|
-
const featureKeywords = [
|
|
616
|
-
'add', 'create', 'implement', 'build', 'enable', 'allow',
|
|
617
|
-
'new feature', 'support for', 'ability to', 'want to',
|
|
618
|
-
'should be able', 'need to add', 'swap', 'edit', 'modify'
|
|
619
|
-
];
|
|
620
|
-
const bugKeywords = [
|
|
621
|
-
'fix', 'bug', 'error', 'broken', 'crash', 'issue', 'problem',
|
|
622
|
-
'not working', 'fails', 'undefined', 'null'
|
|
623
|
-
];
|
|
624
|
-
const explorationKeywords = [
|
|
625
|
-
'how does', 'where is', 'what is', 'find', 'show me',
|
|
626
|
-
'explain', 'understand', 'explore'
|
|
627
|
-
];
|
|
628
|
-
const lowerTask = task.toLowerCase();
|
|
629
|
-
// If it's clearly a bug or exploration, not a feature
|
|
630
|
-
if (bugKeywords.some(k => lowerTask.includes(k)))
|
|
631
|
-
return false;
|
|
632
|
-
if (explorationKeywords.some(k => lowerTask.includes(k)))
|
|
633
|
-
return false;
|
|
634
|
-
// If it matches feature keywords, it's likely a feature request
|
|
635
|
-
return featureKeywords.some(k => lowerTask.includes(k));
|
|
958
|
+
const formatted = this.formatSearchResults(ranked);
|
|
959
|
+
return this.textResult(this.truncateOutput(formatted));
|
|
636
960
|
}
|
|
637
961
|
/**
|
|
638
962
|
* Handle codegraph_callers
|
|
@@ -732,6 +1056,388 @@ class ToolHandler {
|
|
|
732
1056
|
const formatted = this.formatImpact(symbol, mergedImpact) + allMatches.note;
|
|
733
1057
|
return this.textResult(this.truncateOutput(formatted));
|
|
734
1058
|
}
|
|
1059
|
+
/**
|
|
1060
|
+
* Describe a synthesized (dynamic-dispatch) edge for human output: how the
|
|
1061
|
+
* callback was wired up — the bridge static parsing can't see. Returns null
|
|
1062
|
+
* for ordinary static edges. Used by trace + the node trail so a synthesized
|
|
1063
|
+
* hop reads as "registered via onUpdate at App.tsx:3148", not a bare arrow.
|
|
1064
|
+
*/
|
|
1065
|
+
synthEdgeNote(edge) {
|
|
1066
|
+
if (!edge || edge.provenance !== 'heuristic')
|
|
1067
|
+
return null;
|
|
1068
|
+
const m = edge.metadata;
|
|
1069
|
+
const registeredAt = typeof m?.registeredAt === 'string' ? m.registeredAt : undefined;
|
|
1070
|
+
const at = registeredAt ? ` @${registeredAt}` : '';
|
|
1071
|
+
if (m?.synthesizedBy === 'callback') {
|
|
1072
|
+
const via = m.via ? `\`${String(m.via)}\`` : 'a registrar';
|
|
1073
|
+
const field = m.field ? ` on .${String(m.field)}` : '';
|
|
1074
|
+
return {
|
|
1075
|
+
label: `callback — registered via ${via}${field} (dynamic dispatch)`,
|
|
1076
|
+
compact: `dynamic: callback via ${via}${at}`,
|
|
1077
|
+
registeredAt,
|
|
1078
|
+
};
|
|
1079
|
+
}
|
|
1080
|
+
if (m?.synthesizedBy === 'event-emitter') {
|
|
1081
|
+
const ev = m.event ? `\`${String(m.event)}\`` : 'an event';
|
|
1082
|
+
return {
|
|
1083
|
+
label: `event ${ev} — emit → handler (dynamic dispatch)`,
|
|
1084
|
+
compact: `dynamic: event ${ev}${at}`,
|
|
1085
|
+
registeredAt,
|
|
1086
|
+
};
|
|
1087
|
+
}
|
|
1088
|
+
if (m?.synthesizedBy === 'react-render') {
|
|
1089
|
+
return {
|
|
1090
|
+
label: `React re-render — \`setState\` re-runs render() (dynamic dispatch)`,
|
|
1091
|
+
compact: `dynamic: React re-render via setState${at}`,
|
|
1092
|
+
registeredAt,
|
|
1093
|
+
};
|
|
1094
|
+
}
|
|
1095
|
+
if (m?.synthesizedBy === 'jsx-render') {
|
|
1096
|
+
const child = m.via ? `<${String(m.via)}>` : 'a child component';
|
|
1097
|
+
return {
|
|
1098
|
+
label: `renders ${child} (JSX child — dynamic dispatch)`,
|
|
1099
|
+
compact: `dynamic: renders ${child}`,
|
|
1100
|
+
registeredAt,
|
|
1101
|
+
};
|
|
1102
|
+
}
|
|
1103
|
+
if (m?.synthesizedBy === 'vue-handler') {
|
|
1104
|
+
const ev = m.event ? `@${String(m.event)}` : 'a template event';
|
|
1105
|
+
return {
|
|
1106
|
+
label: `Vue template handler — bound to ${ev} (dynamic dispatch)`,
|
|
1107
|
+
compact: `dynamic: Vue ${ev} handler`,
|
|
1108
|
+
registeredAt,
|
|
1109
|
+
};
|
|
1110
|
+
}
|
|
1111
|
+
if (m?.synthesizedBy === 'interface-impl') {
|
|
1112
|
+
return {
|
|
1113
|
+
label: `interface/abstract dispatch — runs the implementation override (dynamic dispatch)`,
|
|
1114
|
+
compact: `dynamic: interface → impl${at}`,
|
|
1115
|
+
registeredAt,
|
|
1116
|
+
};
|
|
1117
|
+
}
|
|
1118
|
+
if (m?.synthesizedBy === 'closure-collection') {
|
|
1119
|
+
const field = m.field ? `\`${String(m.field)}\`` : 'a collection';
|
|
1120
|
+
return {
|
|
1121
|
+
label: `closure collection — runs handlers appended to ${field} (dynamic dispatch)`,
|
|
1122
|
+
compact: `dynamic: runs ${field} handlers${at}`,
|
|
1123
|
+
registeredAt,
|
|
1124
|
+
};
|
|
1125
|
+
}
|
|
1126
|
+
return null;
|
|
1127
|
+
}
|
|
1128
|
+
/**
|
|
1129
|
+
* Flow-from-named-symbols: an agent's codegraph_explore query is a bag of
|
|
1130
|
+
* symbol names that usually spans the flow it's investigating (e.g.
|
|
1131
|
+
* "PmsProductController getList PmsProductService list PmsProductServiceImpl").
|
|
1132
|
+
* Surface the longest call chain AMONG those named symbols — scoped to what the
|
|
1133
|
+
* agent explicitly named, so (unlike a fuzzy relevance set) there's no
|
|
1134
|
+
* wrong-feature wandering. Rides synthesized edges, so controller→service-
|
|
1135
|
+
* interface→impl shows up. Returns '' if no chain of >=3 nodes exists.
|
|
1136
|
+
*
|
|
1137
|
+
* Ambiguous tokens (Java `list` → dozens of nodes) are disambiguated by
|
|
1138
|
+
* CO-NAMING: the agent names the class too, so we keep only `list` candidates
|
|
1139
|
+
* whose qualifiedName contains another named token (`PmsProductServiceImpl::list`),
|
|
1140
|
+
* dropping unrelated `OmsOrderService::list`.
|
|
1141
|
+
*/
|
|
1142
|
+
buildFlowFromNamedSymbols(cg, query) {
|
|
1143
|
+
const EMPTY = { text: '', pathNodeIds: new Set(), namedNodeIds: new Set(), uniqueNamedNodeIds: new Set() };
|
|
1144
|
+
try {
|
|
1145
|
+
const CALLABLE = new Set(['method', 'function', 'component', 'constructor']);
|
|
1146
|
+
// Strip only a REAL file extension (Create.cs → Create); KEEP qualified
|
|
1147
|
+
// names (Class.method / Class::method) — the agent's most precise input,
|
|
1148
|
+
// resolved exactly by findAllSymbols. (The old strip mangled Class.method
|
|
1149
|
+
// into Class, throwing the method away.)
|
|
1150
|
+
const FILE_EXT = /\.(?:java|kt|kts|ts|tsx|js|jsx|mjs|cjs|cs|py|go|rb|php|swift|rs|cpp|cc|cxx|c|h|hpp|scala|lua|dart|vue|svelte)$/i;
|
|
1151
|
+
const tokens = [...new Set(query.split(/[\s,()[\]]+/)
|
|
1152
|
+
.map((t) => t.replace(FILE_EXT, '').trim())
|
|
1153
|
+
.filter((t) => t.length >= 3 && /^[A-Za-z_$][\w$]*(?:(?:::|\.)[\w$]+)*$/.test(t)))].slice(0, 16);
|
|
1154
|
+
if (tokens.length < 2)
|
|
1155
|
+
return EMPTY;
|
|
1156
|
+
// Pool of name SEGMENTS (Class + method from every token) used to
|
|
1157
|
+
// disambiguate an ambiguous SIMPLE name: keep a candidate only if its
|
|
1158
|
+
// CONTAINER class is itself named in the query.
|
|
1159
|
+
const segPool = new Set();
|
|
1160
|
+
for (const t of tokens)
|
|
1161
|
+
for (const s of t.toLowerCase().split(/::|\./))
|
|
1162
|
+
if (s)
|
|
1163
|
+
segPool.add(s);
|
|
1164
|
+
const named = new Map();
|
|
1165
|
+
// Nodes whose token is SPECIFIC — a (near-)unique callable name (<=3 defs in
|
|
1166
|
+
// the whole graph). These are safe to SPARE a file on: the agent named THIS
|
|
1167
|
+
// method (`getResponseWithInterceptorChain`, 1 def). A hyper-polymorphic name
|
|
1168
|
+
// (`as_sql`, 110 defs across every Expression/Compiler subclass) is NOT here,
|
|
1169
|
+
// so naming it doesn't keep every backend variant full and flood the budget.
|
|
1170
|
+
const uniqueNamedNodeIds = new Set();
|
|
1171
|
+
for (const t of tokens) {
|
|
1172
|
+
const cands = this.findAllSymbols(cg, t).nodes.filter((n) => CALLABLE.has(n.kind));
|
|
1173
|
+
// A qualified or otherwise-specific name (<=3 hits) keeps all; an
|
|
1174
|
+
// ambiguous simple name keeps only candidates whose container is named.
|
|
1175
|
+
const specific = cands.length <= 3;
|
|
1176
|
+
const pick = specific
|
|
1177
|
+
? cands
|
|
1178
|
+
: cands.filter((n) => {
|
|
1179
|
+
const segs = (n.qualifiedName || '').toLowerCase().split(/::|\./).filter(Boolean);
|
|
1180
|
+
const container = segs.length >= 2 ? segs[segs.length - 2] : '';
|
|
1181
|
+
return !!container && segPool.has(container);
|
|
1182
|
+
});
|
|
1183
|
+
for (const n of pick.slice(0, 6)) {
|
|
1184
|
+
named.set(n.id, n);
|
|
1185
|
+
if (specific)
|
|
1186
|
+
uniqueNamedNodeIds.add(n.id);
|
|
1187
|
+
}
|
|
1188
|
+
if (named.size > 40)
|
|
1189
|
+
break;
|
|
1190
|
+
}
|
|
1191
|
+
if (named.size < 2)
|
|
1192
|
+
return EMPTY;
|
|
1193
|
+
const MAX_HOPS = 7;
|
|
1194
|
+
let best = null;
|
|
1195
|
+
// BFS the full call graph (incl. synth edges) from each named seed, but
|
|
1196
|
+
// only ACCEPT a sink that is also named — both ends anchored to symbols the
|
|
1197
|
+
// agent named, so the chain stays on-topic while bridging intermediates
|
|
1198
|
+
// (e.g. the exact interface overload) that the token resolution missed.
|
|
1199
|
+
for (const seed of [...named.values()].slice(0, 8)) {
|
|
1200
|
+
const parent = new Map();
|
|
1201
|
+
parent.set(seed.id, { prev: null, edge: null, node: seed });
|
|
1202
|
+
const q = [{ id: seed.id, depth: 0, streak: 0 }];
|
|
1203
|
+
let deep = null, deepDepth = 0;
|
|
1204
|
+
const MAX_BRIDGE = 1; // ≤1 consecutive UNNAMED hop: bridge one missing intermediate, never wander a god-function's fan-out
|
|
1205
|
+
for (let h = 0; h < q.length && parent.size < 1500; h++) {
|
|
1206
|
+
const { id, depth, streak } = q[h];
|
|
1207
|
+
if (id !== seed.id && named.has(id) && depth > deepDepth) {
|
|
1208
|
+
deep = id;
|
|
1209
|
+
deepDepth = depth;
|
|
1210
|
+
}
|
|
1211
|
+
if (depth >= MAX_HOPS - 1)
|
|
1212
|
+
continue;
|
|
1213
|
+
for (const c of cg.getCallees(id)) {
|
|
1214
|
+
if (c.edge.kind !== 'calls' || parent.has(c.node.id))
|
|
1215
|
+
continue;
|
|
1216
|
+
const newStreak = named.has(c.node.id) ? 0 : streak + 1;
|
|
1217
|
+
if (newStreak > MAX_BRIDGE)
|
|
1218
|
+
continue;
|
|
1219
|
+
parent.set(c.node.id, { prev: id, edge: c.edge, node: c.node });
|
|
1220
|
+
q.push({ id: c.node.id, depth: depth + 1, streak: newStreak });
|
|
1221
|
+
}
|
|
1222
|
+
}
|
|
1223
|
+
if (!deep)
|
|
1224
|
+
continue;
|
|
1225
|
+
const chain = [];
|
|
1226
|
+
let cur = deep;
|
|
1227
|
+
while (cur) {
|
|
1228
|
+
const p = parent.get(cur);
|
|
1229
|
+
if (!p)
|
|
1230
|
+
break;
|
|
1231
|
+
chain.push({ node: p.node, edge: p.edge });
|
|
1232
|
+
cur = p.prev;
|
|
1233
|
+
}
|
|
1234
|
+
chain.reverse();
|
|
1235
|
+
if (!best || chain.length > best.length)
|
|
1236
|
+
best = chain;
|
|
1237
|
+
}
|
|
1238
|
+
const hasMain = !!best && best.length >= 3;
|
|
1239
|
+
const pathIds = new Set((best ?? []).map((s) => s.node.id));
|
|
1240
|
+
// Supplementary: dynamic-dispatch (synthesized) edges incident to a NAMED
|
|
1241
|
+
// symbol — the indirect hops an agent would otherwise grep/Read to
|
|
1242
|
+
// reconstruct ("where do the appended `validators` actually run?"). The
|
|
1243
|
+
// synth edge IS that answer, so surface it even when the OTHER end wasn't
|
|
1244
|
+
// named (e.g. the agent names `validate` but not the `didCompleteTask`
|
|
1245
|
+
// that drains the collection). On-topic by construction: only heuristic
|
|
1246
|
+
// edges touching a symbol the agent named; skipped when the hop already
|
|
1247
|
+
// shows in the main chain.
|
|
1248
|
+
const synthLines = [];
|
|
1249
|
+
const synthSeen = new Set();
|
|
1250
|
+
for (const n of named.values()) {
|
|
1251
|
+
if (synthLines.length >= 6)
|
|
1252
|
+
break;
|
|
1253
|
+
for (const { node: other, edge } of [...cg.getCallers(n.id), ...cg.getCallees(n.id)]) {
|
|
1254
|
+
if (synthLines.length >= 6)
|
|
1255
|
+
break;
|
|
1256
|
+
if (edge.provenance !== 'heuristic' || other.id === n.id)
|
|
1257
|
+
continue;
|
|
1258
|
+
if (pathIds.has(edge.source) && pathIds.has(edge.target))
|
|
1259
|
+
continue; // already in the main chain
|
|
1260
|
+
const src = edge.source === n.id ? n : other;
|
|
1261
|
+
const tgt = edge.source === n.id ? other : n;
|
|
1262
|
+
const key = `${src.name}>${tgt.name}`;
|
|
1263
|
+
if (synthSeen.has(key))
|
|
1264
|
+
continue;
|
|
1265
|
+
synthSeen.add(key);
|
|
1266
|
+
const note = this.synthEdgeNote(edge);
|
|
1267
|
+
synthLines.push(`- ${src.name} → ${tgt.name} [${note ? note.compact : edge.kind}]`);
|
|
1268
|
+
}
|
|
1269
|
+
}
|
|
1270
|
+
if (!hasMain && synthLines.length === 0)
|
|
1271
|
+
return EMPTY;
|
|
1272
|
+
const out = [];
|
|
1273
|
+
if (hasMain) {
|
|
1274
|
+
out.push('## Flow (call path among the symbols you queried)', '');
|
|
1275
|
+
for (let i = 0; i < best.length; i++) {
|
|
1276
|
+
const step = best[i];
|
|
1277
|
+
if (step.edge) {
|
|
1278
|
+
const sy = this.synthEdgeNote(step.edge);
|
|
1279
|
+
out.push(` ↓ ${sy ? sy.compact : step.edge.kind}`);
|
|
1280
|
+
}
|
|
1281
|
+
out.push(`${i + 1}. ${step.node.name} (${step.node.filePath}:${step.node.startLine})`);
|
|
1282
|
+
}
|
|
1283
|
+
out.push('');
|
|
1284
|
+
}
|
|
1285
|
+
if (synthLines.length) {
|
|
1286
|
+
out.push('## Dynamic-dispatch links among your symbols', '(synthesized — the indirect hops grep/Read would reconstruct; the `@file:line` is the wiring site)', '', ...synthLines, '');
|
|
1287
|
+
}
|
|
1288
|
+
out.push('> Full source for these symbols is below — the call flow among them, followed by their bodies.', '');
|
|
1289
|
+
// namedNodeIds = every callable the agent explicitly named (a superset of
|
|
1290
|
+
// the spine). A file holding one is something the agent asked to SEE, so it
|
|
1291
|
+
// must keep full source even if it's an off-spine polymorphic sibling — the
|
|
1292
|
+
// agent named `getResponseWithInterceptorChain` / `SQLCompiler.execute_sql`
|
|
1293
|
+
// as the mechanism, not as an interchangeable leaf. See the skeleton gate.
|
|
1294
|
+
return { text: out.join('\n'), pathNodeIds: pathIds, namedNodeIds: new Set(named.keys()), uniqueNamedNodeIds };
|
|
1295
|
+
}
|
|
1296
|
+
catch {
|
|
1297
|
+
return EMPTY;
|
|
1298
|
+
}
|
|
1299
|
+
}
|
|
1300
|
+
/**
|
|
1301
|
+
* Compact "blast radius" for the entry symbols of an explore result: who
|
|
1302
|
+
* depends on each (callers) and which test files cover it — LOCATIONS ONLY,
|
|
1303
|
+
* no source, so the agent knows what to update / re-verify before editing
|
|
1304
|
+
* without reaching for a separate impact call. Always-on, but skips symbols
|
|
1305
|
+
* that have no dependents (nothing to warn about), and returns '' when none
|
|
1306
|
+
* qualify so a leaf-only exploration stays clean.
|
|
1307
|
+
*/
|
|
1308
|
+
buildBlastRadiusSection(cg, subgraph) {
|
|
1309
|
+
const ROOT_CAP = 5; // only the symbols the query actually targeted
|
|
1310
|
+
const FILE_CAP = 4; // caller files listed per symbol before "+N more"
|
|
1311
|
+
const MEANINGFUL = new Set([
|
|
1312
|
+
'function', 'method', 'class', 'interface', 'struct', 'trait', 'protocol',
|
|
1313
|
+
'enum', 'type_alias', 'component', 'constant', 'variable', 'property', 'field',
|
|
1314
|
+
]);
|
|
1315
|
+
const rel = (p) => p.replace(/\\/g, '/');
|
|
1316
|
+
const roots = subgraph.roots
|
|
1317
|
+
.map((id) => subgraph.nodes.get(id))
|
|
1318
|
+
.filter((n) => !!n && MEANINGFUL.has(n.kind))
|
|
1319
|
+
.slice(0, ROOT_CAP);
|
|
1320
|
+
if (roots.length === 0)
|
|
1321
|
+
return '';
|
|
1322
|
+
const entries = [];
|
|
1323
|
+
for (const root of roots) {
|
|
1324
|
+
let callers = [];
|
|
1325
|
+
try {
|
|
1326
|
+
callers = cg.getCallers(root.id);
|
|
1327
|
+
}
|
|
1328
|
+
catch { /* skip this root */ }
|
|
1329
|
+
const seen = new Set();
|
|
1330
|
+
const uniq = [];
|
|
1331
|
+
for (const c of callers) {
|
|
1332
|
+
if (c?.node && !seen.has(c.node.id)) {
|
|
1333
|
+
seen.add(c.node.id);
|
|
1334
|
+
uniq.push(c.node);
|
|
1335
|
+
}
|
|
1336
|
+
}
|
|
1337
|
+
if (uniq.length === 0)
|
|
1338
|
+
continue; // no blast radius → nothing to flag
|
|
1339
|
+
const callerFiles = [...new Set(uniq.map((n) => rel(n.filePath)))];
|
|
1340
|
+
const testFiles = callerFiles.filter((f) => (0, query_utils_1.isTestFile)(f));
|
|
1341
|
+
const nonTest = callerFiles.filter((f) => !(0, query_utils_1.isTestFile)(f));
|
|
1342
|
+
const shown = nonTest.slice(0, FILE_CAP).map((f) => `\`${f}\``).join(', ');
|
|
1343
|
+
const more = nonTest.length > FILE_CAP ? ` +${nonTest.length - FILE_CAP} more` : '';
|
|
1344
|
+
const where = nonTest.length > 0 ? ` in ${shown}${more}` : '';
|
|
1345
|
+
const tests = testFiles.length > 0
|
|
1346
|
+
? `; tests: ${testFiles.slice(0, FILE_CAP).map((f) => `\`${f}\``).join(', ')}${testFiles.length > FILE_CAP ? ` +${testFiles.length - FILE_CAP}` : ''}`
|
|
1347
|
+
: '; ⚠️ no covering tests found';
|
|
1348
|
+
entries.push(`- \`${root.name}\` (${rel(root.filePath)}:${root.startLine}) — ${uniq.length} caller${uniq.length === 1 ? '' : 's'}${where}${tests}`);
|
|
1349
|
+
}
|
|
1350
|
+
if (entries.length === 0)
|
|
1351
|
+
return '';
|
|
1352
|
+
return [
|
|
1353
|
+
'### Blast radius — what depends on these (update/verify before editing)',
|
|
1354
|
+
'',
|
|
1355
|
+
...entries,
|
|
1356
|
+
'',
|
|
1357
|
+
].join('\n');
|
|
1358
|
+
}
|
|
1359
|
+
/**
|
|
1360
|
+
* Graph-connectivity relevance via Random-Walk-with-Restart (personalized
|
|
1361
|
+
* PageRank) from the query's matched SEED nodes over the call/reference graph.
|
|
1362
|
+
*
|
|
1363
|
+
* This is the ranking signal text search (FTS/bm25) CANNOT provide, and it's
|
|
1364
|
+
* codegraph's home turf: relevance by STRUCTURE, not words. A file whose
|
|
1365
|
+
* symbols are call-connected to the matched cluster accrues walk mass and
|
|
1366
|
+
* ranks high; a lone TEXT match — e.g. `LensSwitcher.swift` matched the word
|
|
1367
|
+
* "switch" from `switchOrganization`, but calls none of `setUser`/`fetchUser`
|
|
1368
|
+
* — gets only its own restart probability and ranks ~0. Immune to the
|
|
1369
|
+
* tokenization trap that fools term matching, deterministic, no embeddings.
|
|
1370
|
+
*
|
|
1371
|
+
* Undirected adjacency (reachability both ways), restart α=0.25 to the seeds,
|
|
1372
|
+
* power iteration to convergence. Bounded to the already-relevant subgraph, so
|
|
1373
|
+
* it's a few hundred nodes × ~25 iterations — negligible cost.
|
|
1374
|
+
*/
|
|
1375
|
+
computeGraphRelevance(nodeIds, edges, seedIds) {
|
|
1376
|
+
const out = new Map();
|
|
1377
|
+
const n = nodeIds.length;
|
|
1378
|
+
if (n === 0)
|
|
1379
|
+
return out;
|
|
1380
|
+
const idx = new Map();
|
|
1381
|
+
for (let i = 0; i < n; i++)
|
|
1382
|
+
idx.set(nodeIds[i], i);
|
|
1383
|
+
const RANK_EDGES = new Set([
|
|
1384
|
+
'calls', 'references', 'extends', 'implements', 'overrides',
|
|
1385
|
+
'instantiates', 'returns', 'type_of', 'imports',
|
|
1386
|
+
]);
|
|
1387
|
+
const adj = Array.from({ length: n }, () => []);
|
|
1388
|
+
for (const e of edges) {
|
|
1389
|
+
if (!RANK_EDGES.has(e.kind))
|
|
1390
|
+
continue;
|
|
1391
|
+
const i = idx.get(e.source);
|
|
1392
|
+
const j = idx.get(e.target);
|
|
1393
|
+
if (i === undefined || j === undefined || i === j)
|
|
1394
|
+
continue;
|
|
1395
|
+
adj[i].push(j);
|
|
1396
|
+
adj[j].push(i); // undirected — reachable either direction
|
|
1397
|
+
}
|
|
1398
|
+
// Restart vector: uniform over seeds present in the candidate set. (Falls
|
|
1399
|
+
// back to uniform-over-all if no seed landed in the set, so we never return
|
|
1400
|
+
// all-zero.)
|
|
1401
|
+
const r = new Array(n).fill(0);
|
|
1402
|
+
let rsum = 0;
|
|
1403
|
+
for (const id of seedIds) {
|
|
1404
|
+
const i = idx.get(id);
|
|
1405
|
+
if (i !== undefined) {
|
|
1406
|
+
r[i] = 1;
|
|
1407
|
+
rsum += 1;
|
|
1408
|
+
}
|
|
1409
|
+
}
|
|
1410
|
+
if (rsum === 0) {
|
|
1411
|
+
for (let i = 0; i < n; i++)
|
|
1412
|
+
r[i] = 1;
|
|
1413
|
+
rsum = n;
|
|
1414
|
+
}
|
|
1415
|
+
for (let i = 0; i < n; i++)
|
|
1416
|
+
r[i] /= rsum;
|
|
1417
|
+
const alpha = 0.25;
|
|
1418
|
+
let s = r.slice();
|
|
1419
|
+
for (let iter = 0; iter < 25; iter++) {
|
|
1420
|
+
const next = new Array(n).fill(0);
|
|
1421
|
+
for (let i = 0; i < n; i++) {
|
|
1422
|
+
const si = s[i];
|
|
1423
|
+
if (si === 0)
|
|
1424
|
+
continue;
|
|
1425
|
+
const d = adj[i].length;
|
|
1426
|
+
if (d === 0) {
|
|
1427
|
+
next[i] += si;
|
|
1428
|
+
continue;
|
|
1429
|
+
} // dangling: keep its mass
|
|
1430
|
+
const share = si / d;
|
|
1431
|
+
for (const j of adj[i])
|
|
1432
|
+
next[j] += share;
|
|
1433
|
+
}
|
|
1434
|
+
for (let i = 0; i < n; i++)
|
|
1435
|
+
s[i] = (1 - alpha) * next[i] + alpha * r[i];
|
|
1436
|
+
}
|
|
1437
|
+
for (let i = 0; i < n; i++)
|
|
1438
|
+
out.set(nodeIds[i], s[i]);
|
|
1439
|
+
return out;
|
|
1440
|
+
}
|
|
735
1441
|
/**
|
|
736
1442
|
* Handle codegraph_explore — deep exploration in a single call
|
|
737
1443
|
*
|
|
@@ -773,9 +1479,111 @@ class ToolHandler {
|
|
|
773
1479
|
if (subgraph.nodes.size === 0) {
|
|
774
1480
|
return this.textResult(`No relevant code found for "${query}"`);
|
|
775
1481
|
}
|
|
1482
|
+
// Graph-aware glue: findRelevantContext builds the subgraph from name/text
|
|
1483
|
+
// search, so a method that BRIDGES named symbols — e.g. App.tsx's
|
|
1484
|
+
// triggerRender, which calls the named triggerUpdate — is never a search hit
|
|
1485
|
+
// and gets missed, forcing the agent to Read the file to trace it. Pull in
|
|
1486
|
+
// the callers/callees of the entry (root) nodes, but ONLY those that live in
|
|
1487
|
+
// files the subgraph already surfaces (where the agent reads to fill gaps),
|
|
1488
|
+
// so we add wiring without dragging in unrelated files. These get an
|
|
1489
|
+
// importance boost below so they survive the per-file cluster budget.
|
|
1490
|
+
const glueNodeIds = new Set();
|
|
1491
|
+
const subgraphFiles = new Set();
|
|
1492
|
+
for (const n of subgraph.nodes.values())
|
|
1493
|
+
subgraphFiles.add(n.filePath);
|
|
1494
|
+
const GLUE_NODE_CAP = 60;
|
|
1495
|
+
for (const rootId of subgraph.roots) {
|
|
1496
|
+
if (glueNodeIds.size >= GLUE_NODE_CAP)
|
|
1497
|
+
break;
|
|
1498
|
+
let neighbors = [];
|
|
1499
|
+
try {
|
|
1500
|
+
neighbors = [
|
|
1501
|
+
...cg.getCallers(rootId).map(c => c.node),
|
|
1502
|
+
...cg.getCallees(rootId).map(c => c.node),
|
|
1503
|
+
];
|
|
1504
|
+
}
|
|
1505
|
+
catch {
|
|
1506
|
+
continue;
|
|
1507
|
+
}
|
|
1508
|
+
for (const nb of neighbors) {
|
|
1509
|
+
if (glueNodeIds.size >= GLUE_NODE_CAP)
|
|
1510
|
+
break;
|
|
1511
|
+
if (subgraph.nodes.has(nb.id))
|
|
1512
|
+
continue;
|
|
1513
|
+
if (!subgraphFiles.has(nb.filePath))
|
|
1514
|
+
continue;
|
|
1515
|
+
subgraph.nodes.set(nb.id, nb);
|
|
1516
|
+
glueNodeIds.add(nb.id);
|
|
1517
|
+
}
|
|
1518
|
+
}
|
|
1519
|
+
// Named-symbol seeding: findRelevantContext is an FTS/text rank, so a query
|
|
1520
|
+
// that's a BAG of symbol names skewed toward one phase (Alamofire: 5 build
|
|
1521
|
+
// terms, each a high-frequency name, vs 3 validate terms) lets the
|
|
1522
|
+
// lower-frequency names fall below the search cut — their definitions, and
|
|
1523
|
+
// whole files (Validation.swift), never get gathered, so they can never
|
|
1524
|
+
// render and the agent Reads them. Resolve EACH named token to its
|
|
1525
|
+
// substantive definition (skip empty stubs + test files, same relevance the
|
|
1526
|
+
// trace endpoint picker uses) and inject it as an entry, so every symbol the
|
|
1527
|
+
// agent explicitly named is in the subgraph and its file is scored.
|
|
1528
|
+
const namedSeedIds = new Set();
|
|
1529
|
+
{
|
|
1530
|
+
const FILE_EXT = /\.(?:java|kt|kts|ts|tsx|js|jsx|mjs|cjs|cs|py|go|rb|php|swift|rs|cpp|cc|cxx|c|h|hpp|scala|lua|dart|vue|svelte)$/i;
|
|
1531
|
+
const CALLABLE = new Set(['method', 'function', 'component', 'constructor']);
|
|
1532
|
+
const isTestPath = (p) => /(^|\/)(tests?|specs?|__tests__|testdata|mocks?|fixtures?)\//i.test(p) || /\.(test|spec)\.[a-z]+$/i.test(p);
|
|
1533
|
+
const bodyLines = (n) => Math.max(0, (n.endLine ?? n.startLine) - n.startLine);
|
|
1534
|
+
const tokens = [...new Set(query.split(/[\s,()[\]]+/)
|
|
1535
|
+
.map((t) => t.replace(FILE_EXT, '').trim())
|
|
1536
|
+
.filter((t) => t.length >= 3 && /^[A-Za-z_$][\w$]*(?:(?:::|\.)[\w$]+)*$/.test(t)))].slice(0, 16);
|
|
1537
|
+
// PascalCase tokens in the query are type/file disambiguators — when the
|
|
1538
|
+
// agent writes "DataRequest task validate", the `task`/`validate` it wants
|
|
1539
|
+
// are DataRequest's, NOT the same-named overloads in Validation.swift /
|
|
1540
|
+
// Concurrency.swift / the abstract base. Used below to bias overloaded
|
|
1541
|
+
// names toward the file/class the query also names.
|
|
1542
|
+
const typeTokens = tokens.filter((o) => /^[A-Z][A-Za-z0-9]{3,}/.test(o));
|
|
1543
|
+
const inNamedContext = (n) => typeTokens.some((ct) => {
|
|
1544
|
+
const lc = ct.toLowerCase();
|
|
1545
|
+
return n.filePath.toLowerCase().includes(lc) || n.qualifiedName.toLowerCase().includes(lc);
|
|
1546
|
+
});
|
|
1547
|
+
for (const t of tokens) {
|
|
1548
|
+
// Enumerate ALL defs of a bare token via the direct index, not FTS — a
|
|
1549
|
+
// 50+-overload name (tokio `poll`) ranks the wanted def (`Harness::poll`)
|
|
1550
|
+
// below the FTS cut, so findAllSymbols would never see it and the
|
|
1551
|
+
// type-token bias below couldn't pick the harness.rs one. (Same fix as
|
|
1552
|
+
// codegraph_node's findSymbolMatches.) Qualified tokens keep findAllSymbols.
|
|
1553
|
+
const isQual = /[.\/]|::/.test(t);
|
|
1554
|
+
const raw = isQual ? this.findAllSymbols(cg, t).nodes : cg.getNodesByName(t);
|
|
1555
|
+
const cands = raw
|
|
1556
|
+
.filter((n) => CALLABLE.has(n.kind) && !isTestPath(n.filePath))
|
|
1557
|
+
.sort((a, b) => (bodyLines(b) > 1 ? 1 : 0) - (bodyLines(a) > 1 ? 1 : 0) || bodyLines(b) - bodyLines(a));
|
|
1558
|
+
// A specific name (<=3 defs) injects all its defs. An overloaded name
|
|
1559
|
+
// (`validate` = 10, `request` = 44) would flood the subgraph, so inject
|
|
1560
|
+
// only: the overloads whose file/class the query ALSO names (the agent
|
|
1561
|
+
// told us which one it wants — DataRequest's, not Validation.swift's),
|
|
1562
|
+
// capped; else fall back to the single most-substantive def. This is the
|
|
1563
|
+
// explore-side mirror of codegraph_node's overload disambiguation.
|
|
1564
|
+
let picks;
|
|
1565
|
+
if (cands.length <= 3) {
|
|
1566
|
+
picks = cands;
|
|
1567
|
+
}
|
|
1568
|
+
else {
|
|
1569
|
+
const ctx = cands.filter(inNamedContext);
|
|
1570
|
+
picks = ctx.length > 0 ? ctx.slice(0, 4) : cands.slice(0, 1);
|
|
1571
|
+
}
|
|
1572
|
+
for (const n of picks) {
|
|
1573
|
+
if (!subgraph.nodes.has(n.id))
|
|
1574
|
+
subgraph.nodes.set(n.id, n);
|
|
1575
|
+
// Mark as a named seed EVEN IF the FTS gather already had it — being
|
|
1576
|
+
// "named by the agent" is independent of whether search happened to
|
|
1577
|
+
// surface it, and it drives the +50 score, the gate, and the
|
|
1578
|
+
// named-file sort below. (Previously only NEW injections were marked,
|
|
1579
|
+
// so a named symbol FTS already gathered never sorted to the top.)
|
|
1580
|
+
namedSeedIds.add(n.id);
|
|
1581
|
+
}
|
|
1582
|
+
}
|
|
1583
|
+
}
|
|
776
1584
|
// Step 2: Group nodes by file, score by relevance
|
|
777
1585
|
const fileGroups = new Map();
|
|
778
|
-
const entryNodeIds = new Set(subgraph.roots);
|
|
1586
|
+
const entryNodeIds = new Set([...subgraph.roots, ...namedSeedIds]);
|
|
779
1587
|
// Build a set of nodes directly connected to entry points (depth 1)
|
|
780
1588
|
const connectedToEntry = new Set();
|
|
781
1589
|
for (const edge of subgraph.edges) {
|
|
@@ -790,8 +1598,16 @@ class ToolHandler {
|
|
|
790
1598
|
continue;
|
|
791
1599
|
const group = fileGroups.get(node.filePath) || { nodes: [], score: 0 };
|
|
792
1600
|
group.nodes.push(node);
|
|
793
|
-
// Score:
|
|
794
|
-
|
|
1601
|
+
// Score: a NAMED-SEED node (a symbol the agent named that FTS missed, now
|
|
1602
|
+
// injected) is worth far more than a mere reference — its file is where the
|
|
1603
|
+
// answer lives. Without this, an incidental file that name-drops the flow
|
|
1604
|
+
// (Combine.swift references request/task → score 23 from connected nodes)
|
|
1605
|
+
// outranks the file that DEFINES a named symbol (Validation.swift's
|
|
1606
|
+
// `validate` → 10) and steals its render slot. Definition ≫ reference.
|
|
1607
|
+
if (namedSeedIds.has(node.id)) {
|
|
1608
|
+
group.score += 50;
|
|
1609
|
+
}
|
|
1610
|
+
else if (entryNodeIds.has(node.id)) {
|
|
795
1611
|
group.score += 10;
|
|
796
1612
|
}
|
|
797
1613
|
else if (connectedToEntry.has(node.id)) {
|
|
@@ -803,32 +1619,159 @@ class ToolHandler {
|
|
|
803
1619
|
fileGroups.set(node.filePath, group);
|
|
804
1620
|
}
|
|
805
1621
|
// Only include files that have entry points or nodes directly connected to entry points
|
|
806
|
-
|
|
1622
|
+
let relevantFiles = [...fileGroups.entries()].filter(([, group]) => group.score >= 3);
|
|
807
1623
|
// Extract query terms for relevance checking
|
|
808
1624
|
const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length >= 3);
|
|
809
|
-
//
|
|
1625
|
+
// Test/spec/icon/i18n file detector — used both for the pre-sort hard
|
|
1626
|
+
// filter (tiny tier) and the comparator deprioritization (all tiers).
|
|
1627
|
+
const isLowValue = (p) => {
|
|
1628
|
+
const lp = p.toLowerCase();
|
|
1629
|
+
return (/\/(tests?|__tests?__|spec)\//.test(lp) ||
|
|
1630
|
+
/_test\.go$/.test(lp) ||
|
|
1631
|
+
/(?:^|\/)test_[^/]+\.py$/.test(lp) ||
|
|
1632
|
+
/_test\.py$/.test(lp) ||
|
|
1633
|
+
/_spec\.rb$/.test(lp) ||
|
|
1634
|
+
/_test\.rb$/.test(lp) ||
|
|
1635
|
+
/\.(test|spec)\.[jt]sx?$/.test(lp) ||
|
|
1636
|
+
/(test|spec|tests)\.(java|kt|scala)$/.test(lp) ||
|
|
1637
|
+
/(tests?|spec)\.cs$/.test(lp) ||
|
|
1638
|
+
/tests?\.swift$/.test(lp) ||
|
|
1639
|
+
/_test\.dart$/.test(lp) ||
|
|
1640
|
+
/\bicons?\b/.test(lp) ||
|
|
1641
|
+
/\bi18n\b/.test(lp));
|
|
1642
|
+
};
|
|
1643
|
+
// Hard-exclude test/spec files (ALL tiers, not just tiny). One slipped test
|
|
1644
|
+
// file dominates the per-file budget on small repos (cobra's `command_test.go`
|
|
1645
|
+
// displaced `args.go`) AND wastes budget on large ones (Django's
|
|
1646
|
+
// `custom_lookups/tests.py` ate ~2.3 KB of the 28 KB cap, crowding out the
|
|
1647
|
+
// SQLCompiler mechanism the agent then Read). A test file almost never answers
|
|
1648
|
+
// an architecture question. Skip when the query itself is about tests — the
|
|
1649
|
+
// legitimate "explore the tests" case — and only cut if ≥2 non-test candidates
|
|
1650
|
+
// remain (else tests are the only signal for this area).
|
|
1651
|
+
{
|
|
1652
|
+
const queryMentionsTests = /\b(test|tests|testing|spec|verify|verifies)\b/i.test(query);
|
|
1653
|
+
if (!queryMentionsTests) {
|
|
1654
|
+
const nonLow = relevantFiles.filter(([p]) => !isLowValue(p));
|
|
1655
|
+
if (nonLow.length >= 2) {
|
|
1656
|
+
relevantFiles = nonLow;
|
|
1657
|
+
}
|
|
1658
|
+
}
|
|
1659
|
+
}
|
|
1660
|
+
// Secondary signal: how many DISTINCT query terms each file matches (path +
|
|
1661
|
+
// symbol names). Kept only as a tiebreak — the PRIMARY relevance is graph
|
|
1662
|
+
// connectivity below. (Term counting alone tied the real central file with
|
|
1663
|
+
// incidental same-word matches; it's a weak text signal, not the ranker.)
|
|
1664
|
+
const uniqueQueryTerms = [...new Set(queryTerms)].filter(t => t.length >= 3);
|
|
1665
|
+
const fileTermHits = new Map();
|
|
1666
|
+
for (const [fp, group] of relevantFiles) {
|
|
1667
|
+
const hay = fp.toLowerCase() + ' ' + group.nodes.map(n => n.name.toLowerCase()).join(' ');
|
|
1668
|
+
let hits = 0;
|
|
1669
|
+
for (const t of uniqueQueryTerms)
|
|
1670
|
+
if (hay.includes(t))
|
|
1671
|
+
hits++;
|
|
1672
|
+
fileTermHits.set(fp, hits);
|
|
1673
|
+
}
|
|
1674
|
+
// PRIMARY relevance: graph connectivity (Random-Walk-with-Restart from the
|
|
1675
|
+
// matched seeds — see computeGraphRelevance). Aggregate each file's nodes'
|
|
1676
|
+
// walk mass. This is the signal text search lacks: the real cluster
|
|
1677
|
+
// (org-user.storage.ts, call-connected to the matches) accrues mass; a lone
|
|
1678
|
+
// text match (LensSwitcher.swift, matched "switch" but calls nothing in the
|
|
1679
|
+
// flow) gets only its restart probability → ~0, and is dropped by the gate.
|
|
1680
|
+
const nodeRwr = this.computeGraphRelevance([...subgraph.nodes.keys()], subgraph.edges, entryNodeIds);
|
|
1681
|
+
const fileGraphScore = new Map();
|
|
1682
|
+
for (const node of subgraph.nodes.values()) {
|
|
1683
|
+
fileGraphScore.set(node.filePath, (fileGraphScore.get(node.filePath) ?? 0) + (nodeRwr.get(node.id) ?? 0));
|
|
1684
|
+
}
|
|
1685
|
+
const maxGraph = Math.max(0, ...fileGraphScore.values());
|
|
1686
|
+
// Central file(s): the 1-2 most graph-central files that also match the
|
|
1687
|
+
// query textually (so a connected hub-utility with no term match isn't
|
|
1688
|
+
// mistaken for the subject). The heart of the answer — they earn the larger
|
|
1689
|
+
// WHOLE-FILE ceiling below (a god-file central file still exceeds it and
|
|
1690
|
+
// falls to generous full-method sectioning — never a whole dump).
|
|
1691
|
+
const centralFiles = new Set([...fileGraphScore.entries()]
|
|
1692
|
+
.filter(([fp, g]) => g > 0 && (fileTermHits.get(fp) ?? 0) >= 1)
|
|
1693
|
+
.sort((a, b) => b[1] - a[1] || (fileTermHits.get(b[0]) ?? 0) - (fileTermHits.get(a[0]) ?? 0))
|
|
1694
|
+
.slice(0, 2)
|
|
1695
|
+
.map(([f]) => f));
|
|
1696
|
+
// Files that DEFINE a symbol the agent named (or a subgraph root). These are
|
|
1697
|
+
// the highest-relevance files there are — the agent asked for them by name —
|
|
1698
|
+
// so the connectivity gate below must never drop them, even when their RWR
|
|
1699
|
+
// mass is low (a leaf family file like codec.ts is call-connected to little
|
|
1700
|
+
// but is exactly what the agent queried). Without this protection the gate
|
|
1701
|
+
// prunes a named file and the agent Reads it back.
|
|
1702
|
+
const entryFiles = new Set();
|
|
1703
|
+
for (const id of entryNodeIds) {
|
|
1704
|
+
const n = subgraph.nodes.get(id);
|
|
1705
|
+
if (n)
|
|
1706
|
+
entryFiles.add(n.filePath);
|
|
1707
|
+
}
|
|
1708
|
+
// Relevance gate (so the generous budget is a CEILING, not a target): keep a
|
|
1709
|
+
// file only if it is STRUCTURALLY relevant by ANY of:
|
|
1710
|
+
// - graph score within a fraction of the top (it's on/near the flow), OR
|
|
1711
|
+
// - central (a query entry-point lives here), OR
|
|
1712
|
+
// - it DEFINES a symbol the agent named (entryFiles), OR
|
|
1713
|
+
// - it matches >= 2 DISTINCT named query terms — a strong text signal that
|
|
1714
|
+
// the agent is asking about this file even when nothing calls it (codec.ts:
|
|
1715
|
+
// the agent named `encode`/`Codec`/`JsonCodec`, all leaf classes with zero
|
|
1716
|
+
// RWR mass — graph alone wrongly drops it).
|
|
1717
|
+
// A lone text match on one shared word (LensSwitcher: term=1, g~0) is still
|
|
1718
|
+
// dropped, so the budget never fills with incidental files. Guarded so it
|
|
1719
|
+
// never prunes below 2.
|
|
1720
|
+
if (maxGraph > 0) {
|
|
1721
|
+
const gated = relevantFiles.filter(([fp]) => (fileGraphScore.get(fp) ?? 0) >= maxGraph * 0.06
|
|
1722
|
+
|| centralFiles.has(fp)
|
|
1723
|
+
|| entryFiles.has(fp)
|
|
1724
|
+
|| (fileTermHits.get(fp) ?? 0) >= 2);
|
|
1725
|
+
if (gated.length >= 2)
|
|
1726
|
+
relevantFiles = gated;
|
|
1727
|
+
}
|
|
1728
|
+
// Sort files: graph-central first, then distinct-term match, then the
|
|
1729
|
+
// existing low-value/generated/score tiebreaks.
|
|
1730
|
+
// Files that DEFINE a symbol the agent NAMED. These sort first — ahead of
|
|
1731
|
+
// graph connectivity — because the agent asked for them by name. Without
|
|
1732
|
+
// this, a named leaf override reached only by dynamic dispatch (Alamofire's
|
|
1733
|
+
// `DataRequest.task`/`validate`, low RWR mass) sorts below the high-
|
|
1734
|
+
// connectivity abstract base (`Request.swift`) and the same-named overloads
|
|
1735
|
+
// in other files (`Validation.swift`), falls outside the budget, and the
|
|
1736
|
+
// agent Reads it. The named file is the answer — rank it at the top.
|
|
1737
|
+
const namedSeedFiles = new Set();
|
|
1738
|
+
for (const id of namedSeedIds) {
|
|
1739
|
+
const n = subgraph.nodes.get(id);
|
|
1740
|
+
if (n)
|
|
1741
|
+
namedSeedFiles.add(n.filePath);
|
|
1742
|
+
}
|
|
810
1743
|
const sortedFiles = relevantFiles.sort((a, b) => {
|
|
811
1744
|
const aPath = a[0].toLowerCase();
|
|
812
1745
|
const bPath = b[0].toLowerCase();
|
|
813
|
-
//
|
|
814
|
-
const
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
const
|
|
821
|
-
const
|
|
822
|
-
if (
|
|
823
|
-
return
|
|
824
|
-
|
|
825
|
-
const
|
|
826
|
-
|
|
827
|
-
|
|
1746
|
+
// Agent-named files first (it asked for a symbol defined here by name).
|
|
1747
|
+
const aNamed = namedSeedFiles.has(a[0]) ? 1 : 0;
|
|
1748
|
+
const bNamed = namedSeedFiles.has(b[0]) ? 1 : 0;
|
|
1749
|
+
if (aNamed !== bNamed)
|
|
1750
|
+
return bNamed - aNamed;
|
|
1751
|
+
// Graph connectivity is the next key (small epsilon so near-ties fall
|
|
1752
|
+
// through to the text signal rather than coin-flipping on float noise).
|
|
1753
|
+
const aG = fileGraphScore.get(a[0]) ?? 0;
|
|
1754
|
+
const bG = fileGraphScore.get(b[0]) ?? 0;
|
|
1755
|
+
if (Math.abs(aG - bG) > maxGraph * 0.01)
|
|
1756
|
+
return bG - aG;
|
|
1757
|
+
const aHits = fileTermHits.get(a[0]) ?? 0;
|
|
1758
|
+
const bHits = fileTermHits.get(b[0]) ?? 0;
|
|
1759
|
+
if (aHits !== bHits)
|
|
1760
|
+
return bHits - aHits;
|
|
828
1761
|
const aLow = isLowValue(aPath);
|
|
829
1762
|
const bLow = isLowValue(bPath);
|
|
830
1763
|
if (aLow !== bLow)
|
|
831
1764
|
return aLow ? 1 : -1;
|
|
1765
|
+
// Deprioritize generated source (.pb.go / .pulsar.go / _mocks.go / …) —
|
|
1766
|
+
// the agent rarely needs to see the protobuf scaffold or gomock output
|
|
1767
|
+
// when asking about the actual flow, and dumping their bodies inflates
|
|
1768
|
+
// the response (the cosmos Q3 explore otherwise leads with
|
|
1769
|
+
// `expected_keepers_mocks.go`, displacing the real `tally.go` content
|
|
1770
|
+
// and forcing the agent to Read tally.go anyway).
|
|
1771
|
+
const aGen = (0, generated_detection_1.isGeneratedFile)(a[0]);
|
|
1772
|
+
const bGen = (0, generated_detection_1.isGeneratedFile)(b[0]);
|
|
1773
|
+
if (aGen !== bGen)
|
|
1774
|
+
return aGen ? 1 : -1;
|
|
832
1775
|
if (a[1].score !== b[1].score)
|
|
833
1776
|
return b[1].score - a[1].score;
|
|
834
1777
|
return b[1].nodes.length - a[1].nodes.length;
|
|
@@ -840,6 +1783,12 @@ class ToolHandler {
|
|
|
840
1783
|
`Found ${subgraph.nodes.size} symbols across ${fileGroups.size} files.`,
|
|
841
1784
|
'',
|
|
842
1785
|
];
|
|
1786
|
+
// Blast radius (always-on, compact): for the entry symbols, who depends on
|
|
1787
|
+
// them + which tests cover them — locations only, no source — so the agent
|
|
1788
|
+
// knows what to update/verify before editing without a separate call.
|
|
1789
|
+
const blastRadius = this.buildBlastRadiusSection(cg, subgraph);
|
|
1790
|
+
if (blastRadius)
|
|
1791
|
+
lines.push(blastRadius);
|
|
843
1792
|
// Relationship map — show how symbols connect
|
|
844
1793
|
const significantEdges = subgraph.edges.filter(e => e.kind !== 'contains' // skip contains — it's implied by file grouping
|
|
845
1794
|
);
|
|
@@ -871,16 +1820,83 @@ class ToolHandler {
|
|
|
871
1820
|
}
|
|
872
1821
|
}
|
|
873
1822
|
// Step 4: Read contiguous file sections
|
|
1823
|
+
// Compute the flow spine once — used both to prepend the Flow section (below)
|
|
1824
|
+
// and to gate adaptive source sizing: files on the spine get full source,
|
|
1825
|
+
// off-spine peers skeletonize.
|
|
1826
|
+
const flow = this.buildFlowFromNamedSymbols(cg, query);
|
|
1827
|
+
// Polymorphic-sibling detector for adaptive sizing. A class that implements/
|
|
1828
|
+
// extends a supertype shared by >= MIN_SIBLINGS classes is one of many
|
|
1829
|
+
// INTERCHANGEABLE implementations (OkHttp's 14 `: Interceptor` classes —
|
|
1830
|
+
// showing one + the rest as signatures is enough), as opposed to a DISTINCT
|
|
1831
|
+
// pipeline step (Excalidraw's `renderStaticScene`, which shares no supertype and
|
|
1832
|
+
// must stay full or the agent loses real content). Only off-spine sibling files
|
|
1833
|
+
// skeletonize; distinct steps and on-spine files keep full source. Cache
|
|
1834
|
+
// supertype→(has ≥N implementers) so this stays a handful of edge queries.
|
|
1835
|
+
const MIN_SIBLINGS = 3;
|
|
1836
|
+
const siblingSuper = new Map();
|
|
1837
|
+
const isPolymorphicSibling = (nodes) => {
|
|
1838
|
+
for (const n of nodes) {
|
|
1839
|
+
for (const e of cg.getOutgoingEdges(n.id)) {
|
|
1840
|
+
if (e.kind !== 'implements' && e.kind !== 'extends')
|
|
1841
|
+
continue;
|
|
1842
|
+
let many = siblingSuper.get(e.target);
|
|
1843
|
+
if (many === undefined) {
|
|
1844
|
+
many = cg.getIncomingEdges(e.target)
|
|
1845
|
+
.filter((x) => x.kind === 'implements' || x.kind === 'extends').length >= MIN_SIBLINGS;
|
|
1846
|
+
siblingSuper.set(e.target, many);
|
|
1847
|
+
}
|
|
1848
|
+
if (many)
|
|
1849
|
+
return true;
|
|
1850
|
+
}
|
|
1851
|
+
}
|
|
1852
|
+
return false;
|
|
1853
|
+
};
|
|
1854
|
+
// A file that DEFINES a polymorphic supertype (a class/interface with ≥
|
|
1855
|
+
// MIN_SIBLINGS implementers) AND co-locates its subclasses is a redundant
|
|
1856
|
+
// "family" file — Django's compiler.py holds `SQLCompiler` + its 4 subclasses
|
|
1857
|
+
// (SQLInsert/Update/Delete/AggregateCompiler) in 2,266 lines. Such files are
|
|
1858
|
+
// huge and read-anyway, so they should STILL skeletonize even when the agent
|
|
1859
|
+
// named a method in them: a full one eats ~6.5K of the explore budget (Django
|
|
1860
|
+
// is pinned at the 28K cap, truncating), starving the sibling files the agent
|
|
1861
|
+
// then Reads. This flag OVERRIDES the named-callable spare below — it does NOT
|
|
1862
|
+
// by itself spare a file. (OkHttp's RealCall implements the `Lockable` mixin
|
|
1863
|
+
// but defines no ≥3-impl supertype, so the named spare keeps it full.)
|
|
1864
|
+
const superMany = new Map();
|
|
1865
|
+
const definesPolymorphicSupertype = (nodes) => {
|
|
1866
|
+
for (const n of nodes) {
|
|
1867
|
+
if (n.kind !== 'class' && n.kind !== 'interface' && n.kind !== 'struct'
|
|
1868
|
+
&& n.kind !== 'trait' && n.kind !== 'protocol' && n.kind !== 'type_alias')
|
|
1869
|
+
continue;
|
|
1870
|
+
let many = superMany.get(n.id);
|
|
1871
|
+
if (many === undefined) {
|
|
1872
|
+
many = cg.getIncomingEdges(n.id)
|
|
1873
|
+
.filter((x) => x.kind === 'implements' || x.kind === 'extends').length >= MIN_SIBLINGS;
|
|
1874
|
+
superMany.set(n.id, many);
|
|
1875
|
+
}
|
|
1876
|
+
if (many)
|
|
1877
|
+
return true;
|
|
1878
|
+
}
|
|
1879
|
+
return false;
|
|
1880
|
+
};
|
|
874
1881
|
lines.push('### Source Code');
|
|
875
1882
|
lines.push('');
|
|
1883
|
+
lines.push('> The code below is the **verbatim, current on-disk source** of these files — re-read from disk on this call and line-numbered, byte-for-byte identical to what the Read tool returns. It is NOT a summary, outline, or stale cache. Treat each block as a Read you have already performed: do not Read a file shown here.');
|
|
1884
|
+
lines.push('');
|
|
876
1885
|
let totalChars = lines.join('\n').length;
|
|
877
1886
|
let filesIncluded = 0;
|
|
878
1887
|
let anyFileTrimmed = false;
|
|
879
1888
|
for (const [filePath, group] of sortedFiles) {
|
|
880
1889
|
if (filesIncluded >= maxFiles)
|
|
881
1890
|
break;
|
|
882
|
-
|
|
883
|
-
|
|
1891
|
+
// A file DEFINES a named/spine symbol (the answer) vs merely references the
|
|
1892
|
+
// flow. Past 90% budget, stop pulling INCIDENTAL files — but keep scanning
|
|
1893
|
+
// for necessary ones, which render even past the cap (bounded by maxFiles).
|
|
1894
|
+
// Without this `continue` (was an unconditional `break`), the loop stopped
|
|
1895
|
+
// after the build + validators-exec files and never reached the ranked-in
|
|
1896
|
+
// validate-logic file (Alamofire's Validation.swift).
|
|
1897
|
+
const fileNecessary = group.nodes.some(n => entryNodeIds.has(n.id) || flow.pathNodeIds.has(n.id) || flow.uniqueNamedNodeIds.has(n.id));
|
|
1898
|
+
if (!fileNecessary && totalChars > budget.maxOutputChars * 0.9)
|
|
1899
|
+
continue;
|
|
884
1900
|
const absPath = (0, utils_1.validatePathWithinRoot)(projectRoot, filePath);
|
|
885
1901
|
if (!absPath || !(0, fs_1.existsSync)(absPath))
|
|
886
1902
|
continue;
|
|
@@ -893,6 +1909,181 @@ class ToolHandler {
|
|
|
893
1909
|
}
|
|
894
1910
|
const fileLines = fileContent.split('\n');
|
|
895
1911
|
const lang = group.nodes[0]?.language || '';
|
|
1912
|
+
// Adaptive sizing (CODEGRAPH_ADAPTIVE_EXPLORE, default on): collapse a file
|
|
1913
|
+
// to a per-symbol view when it's a redundant member of a polymorphic family.
|
|
1914
|
+
// Engages iff ALL hold:
|
|
1915
|
+
// 1. a flow spine exists,
|
|
1916
|
+
// 2. no symbol in the file is on that spine (it's not the mechanism path),
|
|
1917
|
+
// 3. it IS a polymorphic sibling (≥ MIN_SIBLINGS impls of a shared supertype),
|
|
1918
|
+
// 4. it is NOT SPARED, where a file is spared iff the agent named a
|
|
1919
|
+
// (near-)UNIQUE callable in it (`getResponseWithInterceptorChain`, 1 def →
|
|
1920
|
+
// keep RealCall.kt full) UNLESS the file DEFINES the family supertype (a
|
|
1921
|
+
// base+subclasses "family" file like Django's compiler.py — collapse it).
|
|
1922
|
+
// Uniqueness matters: `as_sql` has 110 defs across every Compiler/Expression
|
|
1923
|
+
// subclass; naming it must NOT keep every backend variant + test file full
|
|
1924
|
+
// and flood the budget. That's why the spare reads uniqueNamedNodeIds.
|
|
1925
|
+
// Within a collapsed file the render is PER-SYMBOL (condition B): a method the
|
|
1926
|
+
// agent NAMED or that's on the spine is shown with its FULL body (so the agent
|
|
1927
|
+
// doesn't Read the file back for it — Django's SQLCompiler.execute_sql/as_sql);
|
|
1928
|
+
// every other symbol is just its signature. So the base mechanism survives while
|
|
1929
|
+
// the file's other ~80 symbols + the redundant subclasses collapse to one line each.
|
|
1930
|
+
const spareNamed = group.nodes.some(n => flow.uniqueNamedNodeIds.has(n.id));
|
|
1931
|
+
const fileDefinesSuper = definesPolymorphicSupertype(group.nodes);
|
|
1932
|
+
const spared = spareNamed && !fileDefinesSuper;
|
|
1933
|
+
const CALLABLE_BODY = new Set(['method', 'function', 'constructor', 'component']);
|
|
1934
|
+
const hasSpineNode = group.nodes.some(n => flow.pathNodeIds.has(n.id));
|
|
1935
|
+
// On-spine god-file: the flow path runs THROUGH this file, but it also holds
|
|
1936
|
+
// many OTHER named methods, and rendering all of them in full blows the
|
|
1937
|
+
// per-file budget and starves the other flow files (Alamofire: the agent
|
|
1938
|
+
// names ~7 Session.swift methods — the build spine PLUS off-path
|
|
1939
|
+
// task/didCompleteTask — far past the whole response budget). Engage the
|
|
1940
|
+
// per-symbol view to keep the SPINE full and collapse the off-path named
|
|
1941
|
+
// methods to signatures. Only when there IS off-path content to shed —
|
|
1942
|
+
// otherwise the spine is irreducible (a sequential flow has no redundancy),
|
|
1943
|
+
// so leave it to the normal full render.
|
|
1944
|
+
const namedBodyChars = group.nodes
|
|
1945
|
+
.filter(n => CALLABLE_BODY.has(n.kind) && (flow.pathNodeIds.has(n.id) || flow.uniqueNamedNodeIds.has(n.id)))
|
|
1946
|
+
.reduce((s, n) => s + fileLines.slice(n.startLine - 1, n.endLine).join('\n').length, 0);
|
|
1947
|
+
const onSpineGodFile = hasSpineNode
|
|
1948
|
+
&& namedBodyChars > budget.maxCharsPerFile
|
|
1949
|
+
&& group.nodes.some(n => CALLABLE_BODY.has(n.kind) && flow.uniqueNamedNodeIds.has(n.id) && !flow.pathNodeIds.has(n.id));
|
|
1950
|
+
if (adaptiveExploreEnabled() && flow.pathNodeIds.size > 0
|
|
1951
|
+
&& (onSpineGodFile || (!hasSpineNode && isPolymorphicSibling(group.nodes) && !spared))) {
|
|
1952
|
+
const syms = group.nodes
|
|
1953
|
+
.filter(n => n.kind !== 'import' && n.kind !== 'export' && n.startLine > 0)
|
|
1954
|
+
.sort((a, b) => a.startLine - b.startLine);
|
|
1955
|
+
// Pass 1: choose which symbols get a FULL body, by priority, greedily within
|
|
1956
|
+
// a per-file body cap — so one huge family file can't body every named method
|
|
1957
|
+
// and crowd out the other flow files (Django's query.py). A symbol earns a
|
|
1958
|
+
// body if it's on-spine, or UNIQUELY named (`SQLCompiler.execute_sql`), or a
|
|
1959
|
+
// co-named method WHEN this file DEFINES the family supertype (so the base
|
|
1960
|
+
// `SQLCompiler.as_sql` body shows, but the 110 leaf `as_sql` overrides — and
|
|
1961
|
+
// OkHttp's 5 `intercept`s if the agent names `intercept` — stay signatures).
|
|
1962
|
+
const prio = (n) => !CALLABLE_BODY.has(n.kind) ? 99
|
|
1963
|
+
: flow.pathNodeIds.has(n.id) ? 0
|
|
1964
|
+
: flow.uniqueNamedNodeIds.has(n.id) ? 1
|
|
1965
|
+
: (fileDefinesSuper && flow.namedNodeIds.has(n.id)) ? 2 : 99;
|
|
1966
|
+
// One ~250-line WINDOW per file. syms are taken by priority (spine first,
|
|
1967
|
+
// then uniquely-named, then family-base), and the cap applies to ALL of
|
|
1968
|
+
// them — including the spine — so a big-spine god-file (tokio's worker.rs:
|
|
1969
|
+
// run→run_task→next_task→steal_work) can't eat the whole response and
|
|
1970
|
+
// starve the co-flow file (harness.rs's poll). The native agent windows
|
|
1971
|
+
// such a file too (~190 lines at a time), so this mimics, not truncates.
|
|
1972
|
+
// Always emit ≥1 (never an empty section).
|
|
1973
|
+
const bodyCap = budget.maxCharsPerFile * 1.5;
|
|
1974
|
+
const bodyIds = new Set();
|
|
1975
|
+
let bodyChars = 0;
|
|
1976
|
+
for (const n of syms.filter(n => prio(n) < 99 && n.endLine >= n.startLine).sort((a, b) => prio(a) - prio(b))) {
|
|
1977
|
+
const sz = fileLines.slice(n.startLine - 1, n.endLine).join('\n').length;
|
|
1978
|
+
if (bodyChars + sz > bodyCap && bodyIds.size > 0)
|
|
1979
|
+
continue;
|
|
1980
|
+
bodyIds.add(n.id);
|
|
1981
|
+
bodyChars += sz;
|
|
1982
|
+
}
|
|
1983
|
+
// Pass 2: render in line order — full body for chosen symbols, else the
|
|
1984
|
+
// signature line (capped, with a "+N more" tail so the structure map of a
|
|
1985
|
+
// god-file doesn't itself bloat the budget).
|
|
1986
|
+
const skel = [];
|
|
1987
|
+
let coveredUntil = 0; // skip symbols already inside an emitted body
|
|
1988
|
+
let sigCount = 0, sigDropped = 0;
|
|
1989
|
+
const SIG_MAX = Math.max(12, budget.maxSymbolsInFileHeader * 2);
|
|
1990
|
+
for (const n of syms) {
|
|
1991
|
+
if (n.startLine <= coveredUntil)
|
|
1992
|
+
continue;
|
|
1993
|
+
if (bodyIds.has(n.id)) {
|
|
1994
|
+
const end = n.endLine;
|
|
1995
|
+
const body = fileLines.slice(n.startLine - 1, end).join('\n');
|
|
1996
|
+
skel.push(exploreLineNumbersEnabled() ? numberSourceLines(body, n.startLine) : body);
|
|
1997
|
+
coveredUntil = end;
|
|
1998
|
+
}
|
|
1999
|
+
else {
|
|
2000
|
+
// Elide the body, emit the signature. node.startLine can point at a
|
|
2001
|
+
// decorator/annotation, so scan forward for the line that names the symbol.
|
|
2002
|
+
let lineNo = n.startLine;
|
|
2003
|
+
for (let k = 0; k < 4; k++) {
|
|
2004
|
+
if ((fileLines[n.startLine - 1 + k] || '').includes(n.name)) {
|
|
2005
|
+
lineNo = n.startLine + k;
|
|
2006
|
+
break;
|
|
2007
|
+
}
|
|
2008
|
+
}
|
|
2009
|
+
if (lineNo <= coveredUntil)
|
|
2010
|
+
continue;
|
|
2011
|
+
if (sigCount >= SIG_MAX) {
|
|
2012
|
+
sigDropped++;
|
|
2013
|
+
continue;
|
|
2014
|
+
}
|
|
2015
|
+
const sig = (fileLines[lineNo - 1] || '').trim();
|
|
2016
|
+
if (sig) {
|
|
2017
|
+
skel.push(exploreLineNumbersEnabled() ? `${lineNo}\t${sig}` : sig);
|
|
2018
|
+
sigCount++;
|
|
2019
|
+
}
|
|
2020
|
+
}
|
|
2021
|
+
}
|
|
2022
|
+
if (sigDropped > 0)
|
|
2023
|
+
skel.push(`… +${sigDropped} more (signatures elided)`);
|
|
2024
|
+
if (skel.length > 0) {
|
|
2025
|
+
const names = [...new Set(group.nodes.filter(n => n.kind !== 'import' && n.kind !== 'export').map(n => n.name))]
|
|
2026
|
+
.slice(0, budget.maxSymbolsInFileHeader).join(', ');
|
|
2027
|
+
// Steer the agent to codegraph_explore for an elided body — NEVER to
|
|
2028
|
+
// Read. The old "Read for more" / "Read for a full body" tags invited
|
|
2029
|
+
// a Read of the very file just skeletonized; on a central, wanted file
|
|
2030
|
+
// (Session.swift, DataRequest.swift) that fired an over-investigation
|
|
2031
|
+
// spiral (the agent Read the skeletonized file, then kept digging).
|
|
2032
|
+
// CLAUDE.md: explore output must never tell the agent to Read.
|
|
2033
|
+
const tag = bodyIds.size > 0
|
|
2034
|
+
? 'focused (the methods you named in full, the rest as signatures — codegraph_explore a signature by name for its body; do NOT Read)'
|
|
2035
|
+
: 'skeleton (signatures only — codegraph_explore a name for its full body; do NOT Read)';
|
|
2036
|
+
lines.push(`#### ${filePath} — ${names} · ${tag}`, '', '```' + lang, skel.join('\n'), '```', '');
|
|
2037
|
+
totalChars += skel.join('\n').length + 120;
|
|
2038
|
+
filesIncluded++;
|
|
2039
|
+
continue;
|
|
2040
|
+
}
|
|
2041
|
+
}
|
|
2042
|
+
// Whole-file rule: if a relevant file is small enough to afford, return it
|
|
2043
|
+
// ENTIRELY instead of clustering. Clustering exists to tame god-files
|
|
2044
|
+
// (App.tsx ~13k lines); on a ~134-line component a cluster is a lossy
|
|
2045
|
+
// subset of a file the agent will just Read in full anyway — costing a
|
|
2046
|
+
// round-trip and a re-read every later turn. Reserve clustering for files
|
|
2047
|
+
// too big to ship whole. Still bounded by the total maxOutputChars check.
|
|
2048
|
+
//
|
|
2049
|
+
// CENTRAL files (where the query's entry points live) get a larger — but
|
|
2050
|
+
// bounded — ceiling: they're the heart of the answer, the file(s) the agent
|
|
2051
|
+
// would Read whole, so a genuinely small one comes back whole rather than as
|
|
2052
|
+
// thin clusters. A LARGE central file (the 791-line org-user store) exceeds
|
|
2053
|
+
// the ceiling and falls through to sectioning/clustering below — full method
|
|
2054
|
+
// bodies + signatures — so we never dump (or overflow on) a whole god-file.
|
|
2055
|
+
const isCentralFile = centralFiles.has(filePath);
|
|
2056
|
+
// Central files get a slightly larger whole-file window than peripheral ones,
|
|
2057
|
+
// but a TIGHT one (~1.5× the per-file cap): the native read of a central file
|
|
2058
|
+
// is a ~150–250 line orientation window, NOT the whole file. A flat "whole
|
|
2059
|
+
// central file" both overflowed the inline cap AND starved the co-flow files
|
|
2060
|
+
// (worker.rs ate the budget, dropping harness.rs's poll). A larger central
|
|
2061
|
+
// file falls through to per-method windowing/clustering below.
|
|
2062
|
+
const WHOLE_FILE_MAX_LINES = isCentralFile ? 280 : 220;
|
|
2063
|
+
const WHOLE_FILE_MAX_CHARS = isCentralFile
|
|
2064
|
+
? Math.min(Math.max(0, budget.maxOutputChars - totalChars - 200), Math.round(budget.maxCharsPerFile * 1.5))
|
|
2065
|
+
: budget.maxCharsPerFile * 3;
|
|
2066
|
+
if (fileLines.length <= WHOLE_FILE_MAX_LINES && fileContent.length <= WHOLE_FILE_MAX_CHARS) {
|
|
2067
|
+
const body = fileContent.replace(/\n+$/, '');
|
|
2068
|
+
let wholeSection = exploreLineNumbersEnabled() ? numberSourceLines(body, 1) : body;
|
|
2069
|
+
const uniqSymbols = [...new Set(group.nodes
|
|
2070
|
+
.filter(n => n.kind !== 'import' && n.kind !== 'export')
|
|
2071
|
+
.map(n => `${n.name}(${n.kind})`))];
|
|
2072
|
+
const headerNames = uniqSymbols.slice(0, budget.maxSymbolsInFileHeader);
|
|
2073
|
+
const omitted = uniqSymbols.length - headerNames.length;
|
|
2074
|
+
const wholeHeader = `#### ${filePath} — ${omitted > 0 ? `${headerNames.join(', ')}, +${omitted} more` : headerNames.join(', ')}`;
|
|
2075
|
+
if (!fileNecessary && totalChars + wholeSection.length + 200 > budget.maxOutputChars) {
|
|
2076
|
+
// Don't slice a whole file mid-method: an incidental file that doesn't
|
|
2077
|
+
// fit is skipped; a necessary one (below) renders in full. Half a file
|
|
2078
|
+
// forces the Read this is meant to prevent.
|
|
2079
|
+
anyFileTrimmed = true;
|
|
2080
|
+
continue;
|
|
2081
|
+
}
|
|
2082
|
+
lines.push(wholeHeader, '', '```' + lang, wholeSection, '```', '');
|
|
2083
|
+
totalChars += wholeSection.length + 200;
|
|
2084
|
+
filesIncluded++;
|
|
2085
|
+
continue;
|
|
2086
|
+
}
|
|
896
2087
|
// Cluster nearby symbols to avoid reading huge gaps between distant symbols.
|
|
897
2088
|
// Sort by start line, then merge overlapping/adjacent ranges (within the
|
|
898
2089
|
// adaptive gap threshold). Include both node ranges AND edge source
|
|
@@ -913,14 +2104,35 @@ class ToolHandler {
|
|
|
913
2104
|
// Alamofire is the canonical case: the `Session` class spans ~1,400
|
|
914
2105
|
// lines). We want the granular symbols inside, not the envelope.
|
|
915
2106
|
const ENVELOPE_KINDS = new Set(['file', 'module', 'class', 'struct', 'interface', 'enum', 'namespace', 'protocol', 'trait', 'component']);
|
|
916
|
-
|
|
917
|
-
|
|
2107
|
+
// Cluster from this file's gathered nodes PLUS any callable the agent NAMED that
|
|
2108
|
+
// lives here. Explore's relevance gather can miss a named method def in a huge
|
|
2109
|
+
// non-sibling file — Django's query.py is 3,040 lines and `_fetch_all` (L2237)
|
|
2110
|
+
// was gathered only as call-reference edges, never as a def, so it formed no
|
|
2111
|
+
// cluster and the agent Read it back. Inject named defs directly and rank them
|
|
2112
|
+
// ABOVE connected/glue nodes (importance 9) so their cluster wins the per-file
|
|
2113
|
+
// budget — the agent explicitly asked for these symbols.
|
|
2114
|
+
const rangeNodes = new Map();
|
|
2115
|
+
for (const n of group.nodes)
|
|
2116
|
+
if (n.startLine > 0 && n.endLine > 0)
|
|
2117
|
+
rangeNodes.set(n.id, n);
|
|
2118
|
+
for (const id of flow.namedNodeIds) {
|
|
2119
|
+
if (rangeNodes.has(id))
|
|
2120
|
+
continue;
|
|
2121
|
+
const n = cg.getNode(id);
|
|
2122
|
+
if (n && n.filePath === filePath && n.startLine > 0 && n.endLine > 0)
|
|
2123
|
+
rangeNodes.set(id, n);
|
|
2124
|
+
}
|
|
2125
|
+
const ranges = [...rangeNodes.values()]
|
|
918
2126
|
// Drop whole-file envelope nodes (containers covering >50% of the file).
|
|
919
2127
|
.filter(n => !(ENVELOPE_KINDS.has(n.kind) && (n.endLine - n.startLine + 1) > fileLines.length * 0.5))
|
|
920
2128
|
.map(n => {
|
|
921
2129
|
let importance = 1;
|
|
922
2130
|
if (entryNodeIds.has(n.id))
|
|
923
2131
|
importance = 10;
|
|
2132
|
+
else if (flow.namedNodeIds.has(n.id))
|
|
2133
|
+
importance = 9; // agent named it → keep its cluster
|
|
2134
|
+
else if (glueNodeIds.has(n.id))
|
|
2135
|
+
importance = 6; // bridging caller/callee of an entry
|
|
924
2136
|
else if (connectedToEntry.has(n.id))
|
|
925
2137
|
importance = 3;
|
|
926
2138
|
return { start: n.startLine, end: n.endLine, name: n.name, kind: n.kind, importance };
|
|
@@ -1019,6 +2231,13 @@ class ToolHandler {
|
|
|
1019
2231
|
return b.c.score - a.c.score;
|
|
1020
2232
|
return a.span - b.span;
|
|
1021
2233
|
});
|
|
2234
|
+
// Per-file budget is the SMALLER of the per-file cap and what's left of the
|
|
2235
|
+
// total output cap — so selection (which ranks by importance) keeps the
|
|
2236
|
+
// high-importance clusters and drops peripheral ones, instead of the
|
|
2237
|
+
// downstream source-order trim slicing off whatever comes last in the file.
|
|
2238
|
+
// That source-order slice is what cut Django's `_fetch_all` (L2237, importance
|
|
2239
|
+
// 9 — agent-named) when query.py was the last of four big files to be emitted.
|
|
2240
|
+
const fileBudget = Math.min(budget.maxCharsPerFile, Math.max(0, budget.maxOutputChars - totalChars - 200));
|
|
1022
2241
|
const chosenIndices = new Set();
|
|
1023
2242
|
let projectedChars = 0;
|
|
1024
2243
|
for (const rc of rankedClusters) {
|
|
@@ -1031,7 +2250,7 @@ class ToolHandler {
|
|
|
1031
2250
|
projectedChars += sectionLen;
|
|
1032
2251
|
continue;
|
|
1033
2252
|
}
|
|
1034
|
-
if (projectedChars + sectionLen >
|
|
2253
|
+
if (projectedChars + sectionLen > fileBudget)
|
|
1035
2254
|
continue;
|
|
1036
2255
|
chosenIndices.add(rc.idx);
|
|
1037
2256
|
projectedChars += sectionLen;
|
|
@@ -1039,7 +2258,6 @@ class ToolHandler {
|
|
|
1039
2258
|
// Emit chosen clusters in source order so the file reads top-to-bottom.
|
|
1040
2259
|
let fileSection = '';
|
|
1041
2260
|
const allSymbols = [];
|
|
1042
|
-
let fileTrimmed = false;
|
|
1043
2261
|
for (let i = 0; i < clusters.length; i++) {
|
|
1044
2262
|
if (!chosenIndices.has(i))
|
|
1045
2263
|
continue;
|
|
@@ -1050,13 +2268,12 @@ class ToolHandler {
|
|
|
1050
2268
|
fileSection += section;
|
|
1051
2269
|
allSymbols.push(...cluster.symbols);
|
|
1052
2270
|
}
|
|
1053
|
-
//
|
|
1054
|
-
//
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
if (chosenIndices.size < clusters.length || fileTrimmed) {
|
|
2271
|
+
// A chosen cluster is a COMPLETE method-range — we never cut through a body.
|
|
2272
|
+
// An oversize single cluster (a long monolithic function) renders in FULL:
|
|
2273
|
+
// half a method is useless (the agent just Reads the rest for the other half),
|
|
2274
|
+
// which is the very fallback explore exists to prevent. A pathological file is
|
|
2275
|
+
// bounded by the per-file cluster SELECTION above + the total hard ceiling.
|
|
2276
|
+
if (chosenIndices.size < clusters.length) {
|
|
1060
2277
|
anyFileTrimmed = true;
|
|
1061
2278
|
}
|
|
1062
2279
|
// Dedupe + cap the symbols list shown in the per-file header. Some
|
|
@@ -1077,22 +2294,22 @@ class ToolHandler {
|
|
|
1077
2294
|
? `${headerSymbols.join(', ')}, +${omittedCount} more`
|
|
1078
2295
|
: headerSymbols.join(', ');
|
|
1079
2296
|
const fileHeader = `#### ${filePath} — ${headerSuffix}`;
|
|
1080
|
-
//
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
2297
|
+
// The total cap bounds INCIDENTAL files only. A file that DEFINES a symbol
|
|
2298
|
+
// the agent named (or that's on the flow spine) renders even when the
|
|
2299
|
+
// nominal total is used up — it's the answer, and the set is bounded by
|
|
2300
|
+
// maxFiles AND by true-spine/named-seeding having already trimmed each file
|
|
2301
|
+
// to its necessary content. A file that merely REFERENCES the flow
|
|
2302
|
+
// (Combine.swift name-drops request/task) is incidental → still capped, so
|
|
2303
|
+
// freed budget never leaks into noise. This is the last god-file layer:
|
|
2304
|
+
// build (Session, true-spined) + validators-exec (Request) + validate
|
|
2305
|
+
// (DataRequest/Validation) all render, instead of the cap dropping whichever
|
|
2306
|
+
// phase the file order happened to put last.
|
|
2307
|
+
if (!fileNecessary && totalChars + fileSection.length + 200 > budget.maxOutputChars) {
|
|
2308
|
+
// Incidental file that doesn't fit: SKIP it whole — never slice mid-method.
|
|
2309
|
+
// Keep scanning for necessary files (which bypass this cap and render in
|
|
2310
|
+
// full, bounded by the hard ceiling).
|
|
1094
2311
|
anyFileTrimmed = true;
|
|
1095
|
-
|
|
2312
|
+
continue;
|
|
1096
2313
|
}
|
|
1097
2314
|
lines.push(fileHeader);
|
|
1098
2315
|
lines.push('');
|
|
@@ -1113,7 +2330,7 @@ class ToolHandler {
|
|
|
1113
2330
|
.sort((a, b) => b[1].score - a[1].score);
|
|
1114
2331
|
const remainingFiles = [...remainingRelevant, ...peripheralFiles];
|
|
1115
2332
|
if (remainingFiles.length > 0) {
|
|
1116
|
-
lines.push('###
|
|
2333
|
+
lines.push('### Not shown above — explore these names for their source');
|
|
1117
2334
|
lines.push('');
|
|
1118
2335
|
for (const [filePath, group] of remainingFiles.slice(0, 10)) {
|
|
1119
2336
|
const symbols = group.nodes.map(n => `${n.name}:${n.startLine}`).join(', ');
|
|
@@ -1131,11 +2348,11 @@ class ToolHandler {
|
|
|
1131
2348
|
if (budget.includeCompletenessSignal) {
|
|
1132
2349
|
lines.push('');
|
|
1133
2350
|
lines.push('---');
|
|
1134
|
-
lines.push(`> **Complete source
|
|
2351
|
+
lines.push(`> **Complete source for ${filesIncluded} files is included above — do NOT re-read them.** If your question also needs files/symbols listed under "Not shown above" (or any area this call didn't cover), make ANOTHER codegraph_explore targeting those names — it returns the same source with line numbers and is cheaper and more complete than reading. Reserve Read for a single specific line range explore can't surface.`);
|
|
1135
2352
|
}
|
|
1136
2353
|
else if (anyFileTrimmed) {
|
|
1137
2354
|
lines.push('');
|
|
1138
|
-
lines.push(`> Some file sections were trimmed for size.
|
|
2355
|
+
lines.push(`> Some file sections were trimmed for size. For a specific symbol you still need, run another \`codegraph_explore\` (or \`codegraph_node\`) with its exact name — line-numbered source, cheaper and more complete than Read.`);
|
|
1139
2356
|
}
|
|
1140
2357
|
// Add explore budget note based on project size
|
|
1141
2358
|
if (budget.includeBudgetNote) {
|
|
@@ -1143,24 +2360,33 @@ class ToolHandler {
|
|
|
1143
2360
|
const stats = cg.getStats();
|
|
1144
2361
|
const callBudget = getExploreBudget(stats.fileCount);
|
|
1145
2362
|
lines.push('');
|
|
1146
|
-
lines.push(`> **Explore budget: ${callBudget} calls
|
|
2363
|
+
lines.push(`> **Explore budget: ${callBudget} calls for this project (${stats.fileCount.toLocaleString()} files indexed).** Each call covers ~6 files; if your question spans more, spend your remaining calls on the uncovered area BEFORE falling back to Read — another explore is cheaper and more complete than reading those files. Synthesize once you've used ${callBudget}.`);
|
|
1147
2364
|
}
|
|
1148
2365
|
catch {
|
|
1149
2366
|
// Stats unavailable — skip budget note
|
|
1150
2367
|
}
|
|
1151
2368
|
}
|
|
1152
|
-
//
|
|
1153
|
-
//
|
|
1154
|
-
//
|
|
1155
|
-
//
|
|
1156
|
-
//
|
|
1157
|
-
//
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
2369
|
+
// Final ceiling — an ABSOLUTE inline cap, not a multiple of the budget. The
|
|
2370
|
+
// render loop renders necessary (named/spine) files even a bit past
|
|
2371
|
+
// maxOutputChars and caps only incidental ones, so this is the last safety.
|
|
2372
|
+
// It MUST stay under the host's inline tool-result limit (~25K chars): above
|
|
2373
|
+
// that the result is externalized to a file the agent Reads back (a 35K
|
|
2374
|
+
// vscode explore did exactly this in the n=4 A/B). So allow a little
|
|
2375
|
+
// necessary overflow above the 24K budget, but hard-stop at 25K — never into
|
|
2376
|
+
// externalize territory.
|
|
2377
|
+
const output = flow.text + lines.join('\n');
|
|
2378
|
+
const hardCeiling = Math.min(Math.round(budget.maxOutputChars * 1.5), 25000);
|
|
2379
|
+
if (output.length > hardCeiling) {
|
|
2380
|
+
// Cut at a FILE-SECTION boundary (the last `#### ` header before the
|
|
2381
|
+
// ceiling) so we drop whole trailing file-sections rather than slicing
|
|
2382
|
+
// through a method body — a half-rendered method just forces the Read this
|
|
2383
|
+
// tool exists to prevent. Fall back to a line boundary only if no section
|
|
2384
|
+
// header sits in the back half (degenerate single-giant-section case).
|
|
2385
|
+
const cut = output.slice(0, hardCeiling);
|
|
2386
|
+
const lastSection = cut.lastIndexOf('\n#### ');
|
|
2387
|
+
const boundary = lastSection > hardCeiling * 0.5 ? lastSection : cut.lastIndexOf('\n');
|
|
2388
|
+
const safe = boundary > 0 ? cut.slice(0, boundary) : cut;
|
|
2389
|
+
return this.textResult(safe + '\n\n... (output truncated to budget; the source above is complete and verbatim — treat it as already Read. For any area not covered, run another codegraph_explore with the specific names — do NOT Read these files.)');
|
|
1164
2390
|
}
|
|
1165
2391
|
return this.textResult(output);
|
|
1166
2392
|
}
|
|
@@ -1174,53 +2400,197 @@ class ToolHandler {
|
|
|
1174
2400
|
const cg = this.getCodeGraph(args.projectPath);
|
|
1175
2401
|
// Default to false to minimize context usage
|
|
1176
2402
|
const includeCode = args.includeCode === true;
|
|
1177
|
-
const
|
|
1178
|
-
|
|
2403
|
+
const fileHint = typeof args.file === 'string' && args.file.trim() ? args.file.trim() : undefined;
|
|
2404
|
+
const lineHint = typeof args.line === 'number' && args.line > 0 ? args.line : undefined;
|
|
2405
|
+
let matches = this.findSymbolMatches(cg, symbol);
|
|
2406
|
+
if (matches.length === 0) {
|
|
1179
2407
|
return this.textResult(`Symbol "${symbol}" not found in the codebase`);
|
|
1180
2408
|
}
|
|
2409
|
+
// Disambiguate a heavily-overloaded name to a specific definition the caller
|
|
2410
|
+
// pinned by file/line (the `file:line` a trail or another tool showed it) —
|
|
2411
|
+
// so it can fetch e.g. `Harness::poll` at harness.rs:153 out of 50+ `poll`s
|
|
2412
|
+
// instead of Reading. file matches by path suffix/substring; line prefers the
|
|
2413
|
+
// def whose body contains it, else the nearest start. Only narrows (never
|
|
2414
|
+
// empties — if a hint matches nothing it's ignored).
|
|
2415
|
+
if (matches.length > 1 && (fileHint || lineHint !== undefined)) {
|
|
2416
|
+
const norm = (p) => p.replace(/\\/g, '/').toLowerCase();
|
|
2417
|
+
let narrowed = matches;
|
|
2418
|
+
if (fileHint) {
|
|
2419
|
+
const fh = norm(fileHint);
|
|
2420
|
+
const byFile = narrowed.filter((n) => norm(n.filePath).endsWith(fh) || norm(n.filePath).includes(fh));
|
|
2421
|
+
if (byFile.length > 0)
|
|
2422
|
+
narrowed = byFile;
|
|
2423
|
+
}
|
|
2424
|
+
if (lineHint !== undefined && narrowed.length > 1) {
|
|
2425
|
+
const containing = narrowed.filter((n) => n.startLine <= lineHint && (n.endLine ?? n.startLine) >= lineHint);
|
|
2426
|
+
narrowed = containing.length > 0
|
|
2427
|
+
? containing
|
|
2428
|
+
: [...narrowed].sort((a, b) => Math.abs(a.startLine - lineHint) - Math.abs(b.startLine - lineHint)).slice(0, 1);
|
|
2429
|
+
}
|
|
2430
|
+
if (narrowed.length > 0)
|
|
2431
|
+
matches = narrowed;
|
|
2432
|
+
}
|
|
2433
|
+
// Single definition — the common case.
|
|
2434
|
+
if (matches.length === 1) {
|
|
2435
|
+
return this.textResult(this.truncateOutput(await this.renderNodeSection(cg, matches[0], includeCode)));
|
|
2436
|
+
}
|
|
2437
|
+
// Multiple definitions share this name — overloads, or same-named methods on
|
|
2438
|
+
// different types (Alamofire `didCompleteTask`/`task`/`validate`, gin
|
|
2439
|
+
// `reset`). Returning ONE forces the agent to guess, and when it guesses
|
|
2440
|
+
// wrong it READS the file to find the right overload — the dominant
|
|
2441
|
+
// codegraph_node read cause on Swift/Go. So return them ALL: pack as many
|
|
2442
|
+
// FULL bodies as fit a char budget (the agent gets the one it needs in this
|
|
2443
|
+
// one call, no follow-up parameter to learn), and list any remainder by
|
|
2444
|
+
// file:line so a large overload set can't overflow the per-tool cap.
|
|
2445
|
+
const header = `**${matches.length} definitions named "${symbol}"**`;
|
|
2446
|
+
if (!includeCode) {
|
|
2447
|
+
const list = matches.map((n) => `- \`${n.name}\` (${n.kind}) — ${n.filePath}:${n.startLine}`);
|
|
2448
|
+
return this.textResult(this.truncateOutput([header, '', 'Re-query with `includeCode: true` to get every body in one call — no need to pick one first.', '', ...list].join('\n')));
|
|
2449
|
+
}
|
|
2450
|
+
const BODY_BUDGET = 12000; // leaves room under MAX_OUTPUT_LENGTH for the header + list
|
|
2451
|
+
// The CHAR budget is the real limiter — keep the count cap high so a set of
|
|
2452
|
+
// SHORT overloads (Alamofire's 10 `validate` variants, each a few lines) all
|
|
2453
|
+
// render in full rather than relegating the one the agent wanted to a
|
|
2454
|
+
// bodiless list. Only a set of many LARGE bodies hits the char budget first.
|
|
2455
|
+
const HARD_CAP = 16;
|
|
2456
|
+
const rendered = [];
|
|
2457
|
+
const listed = [];
|
|
2458
|
+
let used = 0;
|
|
2459
|
+
for (const n of matches) {
|
|
2460
|
+
if (rendered.length >= HARD_CAP) {
|
|
2461
|
+
listed.push(n);
|
|
2462
|
+
continue;
|
|
2463
|
+
}
|
|
2464
|
+
const section = await this.renderNodeSection(cg, n, true);
|
|
2465
|
+
// Always emit the first; emit the rest only while within the char budget.
|
|
2466
|
+
if (rendered.length === 0 || used + section.length <= BODY_BUDGET) {
|
|
2467
|
+
rendered.push(section);
|
|
2468
|
+
used += section.length;
|
|
2469
|
+
}
|
|
2470
|
+
else {
|
|
2471
|
+
listed.push(n);
|
|
2472
|
+
}
|
|
2473
|
+
}
|
|
2474
|
+
const out = [
|
|
2475
|
+
header,
|
|
2476
|
+
`Returning ${rendered.length} in full${listed.length ? `; ${listed.length} more listed below` : ''} — pick the one you need (no Read required).`,
|
|
2477
|
+
'',
|
|
2478
|
+
rendered.join('\n\n---\n\n'),
|
|
2479
|
+
];
|
|
2480
|
+
if (listed.length) {
|
|
2481
|
+
const LIST_CAP = 20;
|
|
2482
|
+
const shownList = listed.slice(0, LIST_CAP);
|
|
2483
|
+
out.push('', '### Other definitions', ...shownList.map((n) => `- \`${n.name}\` (${n.kind}) — ${n.filePath}:${n.startLine}`));
|
|
2484
|
+
if (listed.length > LIST_CAP)
|
|
2485
|
+
out.push(`- … +${listed.length - LIST_CAP} more`);
|
|
2486
|
+
out.push('', `> Need one of these in full? Call codegraph_node again with \`file\` (e.g. \`"${listed[0].filePath.split('/').pop()}"\`) or \`line\` — do NOT Read it.`);
|
|
2487
|
+
}
|
|
2488
|
+
return this.textResult(this.truncateOutput(out.join('\n')));
|
|
2489
|
+
}
|
|
2490
|
+
/** Render one symbol: details + (optional) body/outline + its caller/callee trail. */
|
|
2491
|
+
async renderNodeSection(cg, node, includeCode) {
|
|
1181
2492
|
let code = null;
|
|
1182
2493
|
let outline = null;
|
|
1183
2494
|
if (includeCode) {
|
|
1184
2495
|
// For container symbols (class/interface/struct/…), the full body is the
|
|
1185
|
-
// sum of every method body — a wall of source
|
|
1186
|
-
//
|
|
1187
|
-
//
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
if (CONTAINER_NODE_KINDS.has(match.node.kind)) {
|
|
1191
|
-
outline = this.buildContainerOutline(cg, match.node);
|
|
2496
|
+
// sum of every method body — a wall of source. Return a structural outline
|
|
2497
|
+
// (members + signatures + line numbers) instead; leaf symbols return their
|
|
2498
|
+
// full body.
|
|
2499
|
+
if (CONTAINER_NODE_KINDS.has(node.kind)) {
|
|
2500
|
+
outline = this.buildContainerOutline(cg, node);
|
|
1192
2501
|
}
|
|
1193
2502
|
if (!outline) {
|
|
1194
|
-
code = await cg.getCode(
|
|
2503
|
+
code = await cg.getCode(node.id);
|
|
1195
2504
|
}
|
|
1196
2505
|
}
|
|
1197
|
-
|
|
1198
|
-
|
|
2506
|
+
return this.formatNodeDetails(node, code, outline) + this.formatTrail(cg, node);
|
|
2507
|
+
}
|
|
2508
|
+
/**
|
|
2509
|
+
* Build the "trail" for a symbol: its direct callees (what it calls) and
|
|
2510
|
+
* callers (what calls it), each with file:line — so codegraph_node doubles as
|
|
2511
|
+
* the structural Grep→Read→expand primitive: a spot PLUS where to go next.
|
|
2512
|
+
* Capped to stay cheap. Walk the graph by calling codegraph_node on a trail
|
|
2513
|
+
* entry; no Read needed for covered hops. Empty edges on a non-leaf often mean
|
|
2514
|
+
* dynamic dispatch the static graph couldn't resolve — that absence is itself
|
|
2515
|
+
* a signal (read that one hop) rather than a dead end.
|
|
2516
|
+
*/
|
|
2517
|
+
formatTrail(cg, node) {
|
|
2518
|
+
const TRAIL_CAP = 12;
|
|
2519
|
+
const fmt = (e) => {
|
|
2520
|
+
const base = `${e.node.name} (${e.node.filePath}:${e.node.startLine})`;
|
|
2521
|
+
const synth = this.synthEdgeNote(e.edge);
|
|
2522
|
+
return synth ? `${base} [${synth.compact}]` : base;
|
|
2523
|
+
};
|
|
2524
|
+
const collect = (edges) => {
|
|
2525
|
+
const seen = new Set([node.id]);
|
|
2526
|
+
const out = [];
|
|
2527
|
+
for (const e of edges) {
|
|
2528
|
+
if (seen.has(e.node.id))
|
|
2529
|
+
continue;
|
|
2530
|
+
seen.add(e.node.id);
|
|
2531
|
+
out.push(e);
|
|
2532
|
+
}
|
|
2533
|
+
return out;
|
|
2534
|
+
};
|
|
2535
|
+
const callees = collect(cg.getCallees(node.id));
|
|
2536
|
+
const callers = collect(cg.getCallers(node.id));
|
|
2537
|
+
if (callees.length === 0 && callers.length === 0)
|
|
2538
|
+
return '';
|
|
2539
|
+
const lines = ['', '### Trail — codegraph_node any of these to follow it (no Read needed)'];
|
|
2540
|
+
if (callees.length > 0) {
|
|
2541
|
+
lines.push(`**Calls →** ${callees.slice(0, TRAIL_CAP).map(fmt).join(', ')}${callees.length > TRAIL_CAP ? `, +${callees.length - TRAIL_CAP} more` : ''}`);
|
|
2542
|
+
}
|
|
2543
|
+
if (callers.length > 0) {
|
|
2544
|
+
lines.push(`**Called by ←** ${callers.slice(0, TRAIL_CAP).map(fmt).join(', ')}${callers.length > TRAIL_CAP ? `, +${callers.length - TRAIL_CAP} more` : ''}`);
|
|
2545
|
+
}
|
|
2546
|
+
return lines.join('\n');
|
|
1199
2547
|
}
|
|
1200
2548
|
/**
|
|
1201
2549
|
* Handle codegraph_status
|
|
1202
2550
|
*/
|
|
1203
2551
|
async handleStatus(args) {
|
|
1204
|
-
|
|
2552
|
+
let cg = this.getCodeGraph(args.projectPath);
|
|
2553
|
+
// Same trick as withStalenessNotice — when an explicit projectPath
|
|
2554
|
+
// resolves to the same project as the default session cg, prefer the
|
|
2555
|
+
// default so getPendingFiles() (only populated by the default's watcher)
|
|
2556
|
+
// is non-empty when there are pending edits.
|
|
2557
|
+
if (this.cg && cg !== this.cg) {
|
|
2558
|
+
try {
|
|
2559
|
+
if ((0, path_1.resolve)(this.cg.getProjectRoot()) === (0, path_1.resolve)(cg.getProjectRoot())) {
|
|
2560
|
+
cg = this.cg;
|
|
2561
|
+
}
|
|
2562
|
+
}
|
|
2563
|
+
catch { /* closed instance — leave as is */ }
|
|
2564
|
+
}
|
|
1205
2565
|
const stats = cg.getStats();
|
|
2566
|
+
// Warn when this index actually belongs to a different git working tree
|
|
2567
|
+
// (e.g. the server resolved up from a nested worktree to the main checkout).
|
|
2568
|
+
// Queries then reflect that tree's branch, not the worktree being edited.
|
|
2569
|
+
// status shows the verbose, multi-line form; the read tools get the compact
|
|
2570
|
+
// one-liner via withWorktreeNotice. Both share the cached detection.
|
|
2571
|
+
const mismatch = this.worktreeMismatchFor(args.projectPath);
|
|
1206
2572
|
const lines = [
|
|
1207
2573
|
'## CodeGraph Status',
|
|
1208
2574
|
'',
|
|
1209
|
-
`**Files indexed:** ${stats.fileCount}`,
|
|
1210
|
-
`**Total nodes:** ${stats.nodeCount}`,
|
|
1211
|
-
`**Total edges:** ${stats.edgeCount}`,
|
|
1212
|
-
`**Database size:** ${(stats.dbSizeBytes / 1024 / 1024).toFixed(2)} MB`,
|
|
1213
2575
|
];
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
2576
|
+
if (mismatch) {
|
|
2577
|
+
lines.push(`> ⚠ ${(0, worktree_1.worktreeMismatchWarning)(mismatch).replace(/\n/g, '\n> ')}`, '');
|
|
2578
|
+
}
|
|
2579
|
+
lines.push(`**Files indexed:** ${stats.fileCount}`, `**Total nodes:** ${stats.nodeCount}`, `**Total edges:** ${stats.edgeCount}`, `**Database size:** ${(stats.dbSizeBytes / 1024 / 1024).toFixed(2)} MB`);
|
|
2580
|
+
// Surface the active SQLite backend (node:sqlite, Node's built-in real
|
|
2581
|
+
// SQLite — full WAL + FTS5, no native build).
|
|
2582
|
+
lines.push(`**Backend:** node:sqlite (Node built-in) — full WAL + FTS5`);
|
|
2583
|
+
// Effective journal mode. 'wal' ⇒ concurrent reads never block on a writer;
|
|
2584
|
+
// anything else ⇒ they can ("database is locked"). node:sqlite supports WAL
|
|
2585
|
+
// everywhere, so a non-wal mode means the filesystem can't (network/
|
|
2586
|
+
// virtualized mounts, WSL2 /mnt). See issue #238.
|
|
2587
|
+
const journalMode = cg.getJournalMode();
|
|
2588
|
+
if (journalMode === 'wal') {
|
|
2589
|
+
lines.push(`**Journal mode:** wal (concurrent reads safe)`);
|
|
1220
2590
|
}
|
|
1221
2591
|
else {
|
|
1222
|
-
lines.push(`**
|
|
1223
|
-
`
|
|
2592
|
+
lines.push(`**Journal mode:** ⚠ ${journalMode || 'unknown'} — WAL not active, so reads ` +
|
|
2593
|
+
`can block on a concurrent write (WAL appears unsupported on this filesystem)`);
|
|
1224
2594
|
}
|
|
1225
2595
|
lines.push('', '### Nodes by Kind:');
|
|
1226
2596
|
for (const [kind, count] of Object.entries(stats.nodesByKind)) {
|
|
@@ -1234,6 +2604,20 @@ class ToolHandler {
|
|
|
1234
2604
|
lines.push(`- ${lang}: ${count}`);
|
|
1235
2605
|
}
|
|
1236
2606
|
}
|
|
2607
|
+
// Per-file freshness — the inverse of the auto-prepended staleness banner
|
|
2608
|
+
// (issue #403). Surfacing it inside `status` gives the agent a single
|
|
2609
|
+
// place to ask "is the index caught up?" rather than inferring from
|
|
2610
|
+
// banners on other tool calls.
|
|
2611
|
+
const pending = cg.getPendingFiles();
|
|
2612
|
+
if (pending.length > 0) {
|
|
2613
|
+
lines.push('', '### Pending sync:');
|
|
2614
|
+
const now = Date.now();
|
|
2615
|
+
for (const p of pending) {
|
|
2616
|
+
const ageMs = Math.max(0, now - p.lastSeenMs);
|
|
2617
|
+
const label = p.indexing ? 'indexing in progress' : 'pending sync';
|
|
2618
|
+
lines.push(`- ${p.path} (edited ${ageMs}ms ago, ${label})`);
|
|
2619
|
+
}
|
|
2620
|
+
}
|
|
1237
2621
|
return this.textResult(lines.join('\n'));
|
|
1238
2622
|
}
|
|
1239
2623
|
/**
|
|
@@ -1251,9 +2635,20 @@ class ToolHandler {
|
|
|
1251
2635
|
if (allFiles.length === 0) {
|
|
1252
2636
|
return this.textResult('No files indexed. Run `codegraph index` first.');
|
|
1253
2637
|
}
|
|
1254
|
-
// Filter by path prefix
|
|
1255
|
-
|
|
1256
|
-
|
|
2638
|
+
// Filter by path prefix. Stored paths are project-relative POSIX (e.g.
|
|
2639
|
+
// "src/foo.ts"), but agents commonly pass project-root variants like "/",
|
|
2640
|
+
// ".", "./", "" or Windows-style "src\foo" — and prefixes with leading
|
|
2641
|
+
// "/", "./" or "\". Normalize all of those before matching so the agent
|
|
2642
|
+
// gets results instead of falling back to Read/Glob (see #426).
|
|
2643
|
+
const normalizedFilter = pathFilter
|
|
2644
|
+
? pathFilter
|
|
2645
|
+
.replace(/\\/g, '/')
|
|
2646
|
+
.replace(/^(?:\.?\/+)+/, '')
|
|
2647
|
+
.replace(/^\.$/, '')
|
|
2648
|
+
.replace(/\/+$/, '')
|
|
2649
|
+
: '';
|
|
2650
|
+
let files = normalizedFilter
|
|
2651
|
+
? allFiles.filter(f => f.path === normalizedFilter || f.path.startsWith(normalizedFilter + '/'))
|
|
1257
2652
|
: allFiles;
|
|
1258
2653
|
// Filter by glob pattern
|
|
1259
2654
|
if (pattern) {
|
|
@@ -1448,42 +2843,55 @@ class ToolHandler {
|
|
|
1448
2843
|
const segments = node.filePath.split('/').filter((s) => s.length > 0);
|
|
1449
2844
|
return containerHints.every((hint) => segments.some((seg) => seg === hint || seg.replace(/\.[^.]+$/, '') === hint));
|
|
1450
2845
|
}
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
2846
|
+
/**
|
|
2847
|
+
* Find ALL definitions matching a name, ranked, so codegraph_node can return
|
|
2848
|
+
* every overload instead of guessing one (the wrong guess → a Read). Keepers
|
|
2849
|
+
* rank before generated stubs (.pb.go etc.); stable within a group preserves
|
|
2850
|
+
* FTS order. Returns [] when nothing matches; a qualified lookup that finds no
|
|
2851
|
+
* exact match returns [] rather than a misleading fuzzy file hit (#173); a
|
|
2852
|
+
* bare name with no exact match falls back to the single top fuzzy result.
|
|
2853
|
+
*/
|
|
2854
|
+
findSymbolMatches(cg, symbol) {
|
|
1455
2855
|
const isQualified = /[.\/]|::/.test(symbol);
|
|
1456
|
-
|
|
2856
|
+
// For a bare name, enumerate EVERY exact-name definition via the direct index
|
|
2857
|
+
// (not FTS, which caps + ranks): tokio's `poll` has 50+ defs and the one the
|
|
2858
|
+
// caller wants (`Harness::poll` at harness.rs:153) ranks below any search cut,
|
|
2859
|
+
// so it could be neither rendered nor pinned by the file/line disambiguator —
|
|
2860
|
+
// and the agent Read it. With the full set, the multi-overload render + the
|
|
2861
|
+
// file/line filter can both reach it.
|
|
2862
|
+
if (!isQualified) {
|
|
2863
|
+
const exact = cg.getNodesByName(symbol);
|
|
2864
|
+
if (exact.length > 0) {
|
|
2865
|
+
return [...exact].sort((a, b) => ((0, generated_detection_1.isGeneratedFile)(a.filePath) ? 1 : 0) - ((0, generated_detection_1.isGeneratedFile)(b.filePath) ? 1 : 0));
|
|
2866
|
+
}
|
|
2867
|
+
// No exact match — use the single top fuzzy result (e.g. a file basename).
|
|
2868
|
+
const fuzzy = cg.searchNodes(symbol, { limit: 10 });
|
|
2869
|
+
return fuzzy[0] ? [fuzzy[0].node] : [];
|
|
2870
|
+
}
|
|
2871
|
+
// Qualified lookup (`Session.request`, `stage_apply::run`): FTS + matchesSymbol.
|
|
2872
|
+
const limit = 50;
|
|
1457
2873
|
let results = cg.searchNodes(symbol, { limit });
|
|
1458
|
-
// FTS strips colons
|
|
1459
|
-
//
|
|
1460
|
-
//
|
|
2874
|
+
// FTS strips colons, so `stage_apply::run` searches the literal
|
|
2875
|
+
// `stage_applyrun` and finds nothing. Re-search by the bare last part and
|
|
2876
|
+
// let `matchesSymbol` filter by qualifier.
|
|
1461
2877
|
if (isQualified && results.length === 0) {
|
|
1462
2878
|
const tail = lastQualifierPart(symbol);
|
|
1463
2879
|
if (tail && tail !== symbol)
|
|
1464
2880
|
results = cg.searchNodes(tail, { limit });
|
|
1465
2881
|
}
|
|
1466
|
-
if (results.length === 0
|
|
1467
|
-
return
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
}
|
|
1480
|
-
// No exact match. For qualified lookups, don't silently fall back
|
|
1481
|
-
// to a fuzzy result — the user typed a specific qualifier, and
|
|
1482
|
-
// resolving `stage_apply::nonexistent_fn` to the unrelated
|
|
1483
|
-
// `stage_apply.rs` file would be actively misleading (#173).
|
|
1484
|
-
if (isQualified)
|
|
1485
|
-
return null;
|
|
1486
|
-
return { node: results[0].node, note: '' };
|
|
2882
|
+
if (results.length === 0)
|
|
2883
|
+
return [];
|
|
2884
|
+
const exactMatches = results.filter((r) => this.matchesSymbol(r.node, symbol));
|
|
2885
|
+
if (exactMatches.length === 0) {
|
|
2886
|
+
// No exact match — a qualified lookup must not fall back to a fuzzy file
|
|
2887
|
+
// hit (#173); a bare name may use the single top fuzzy result.
|
|
2888
|
+
return isQualified ? [] : results[0] ? [results[0].node] : [];
|
|
2889
|
+
}
|
|
2890
|
+
// Down-rank generated files (.pb.go, .pulsar.go, _grpc.pb.go, …) so a flow
|
|
2891
|
+
// query prefers the keeper implementation over the protobuf-generated stub.
|
|
2892
|
+
return [...exactMatches]
|
|
2893
|
+
.sort((a, b) => ((0, generated_detection_1.isGeneratedFile)(a.node.filePath) ? 1 : 0) - ((0, generated_detection_1.isGeneratedFile)(b.node.filePath) ? 1 : 0))
|
|
2894
|
+
.map((r) => r.node);
|
|
1487
2895
|
}
|
|
1488
2896
|
/**
|
|
1489
2897
|
* Find ALL symbols matching a name. Used by callers/callees/impact to aggregate
|
|
@@ -1507,9 +2915,17 @@ class ToolHandler {
|
|
|
1507
2915
|
const node = exactMatches[0]?.node ?? results[0].node;
|
|
1508
2916
|
return { nodes: [node], note: '' };
|
|
1509
2917
|
}
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
2918
|
+
// Same generated-file down-rank as findSymbol — keeps callers/callees
|
|
2919
|
+
// /impact aggregation aligned (a query against "Send" returns the
|
|
2920
|
+
// hand-written implementations before the protobuf scaffold).
|
|
2921
|
+
const ranked = [...exactMatches].sort((a, b) => {
|
|
2922
|
+
const aGen = (0, generated_detection_1.isGeneratedFile)(a.node.filePath) ? 1 : 0;
|
|
2923
|
+
const bGen = (0, generated_detection_1.isGeneratedFile)(b.node.filePath) ? 1 : 0;
|
|
2924
|
+
return aGen - bGen;
|
|
2925
|
+
});
|
|
2926
|
+
const locations = ranked.map(r => `${r.node.kind} at ${r.node.filePath}:${r.node.startLine}`);
|
|
2927
|
+
const note = `\n\n> **Note:** Aggregated results across ${ranked.length} symbols named "${symbol}": ${locations.join(', ')}`;
|
|
2928
|
+
return { nodes: ranked.map(r => r.node), note };
|
|
1513
2929
|
}
|
|
1514
2930
|
/**
|
|
1515
2931
|
* Truncate output if it exceeds the maximum length
|
|
@@ -1610,13 +3026,13 @@ class ToolHandler {
|
|
|
1610
3026
|
lines.push('', outline, '', `> Structural outline only. Read \`${node.filePath}\` or call codegraph_node on a specific member for its body.`);
|
|
1611
3027
|
}
|
|
1612
3028
|
else if (code) {
|
|
1613
|
-
|
|
3029
|
+
// Line-numbered (cat -n style, like codegraph_explore and Read) so the
|
|
3030
|
+
// agent can cite/edit exact lines without re-Reading the file for them.
|
|
3031
|
+
const numbered = node.startLine ? numberSourceLines(code, node.startLine) : code;
|
|
3032
|
+
lines.push('', '```' + node.language, numbered, '```');
|
|
1614
3033
|
}
|
|
1615
3034
|
return lines.join('\n');
|
|
1616
3035
|
}
|
|
1617
|
-
formatTaskContext(context) {
|
|
1618
|
-
return context.summary || 'No context found';
|
|
1619
|
-
}
|
|
1620
3036
|
textResult(text) {
|
|
1621
3037
|
return {
|
|
1622
3038
|
content: [{ type: 'text', text }],
|