@zuvia-software-solutions/code-mapper 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +215 -0
- package/dist/cli/ai-context.d.ts +19 -0
- package/dist/cli/ai-context.js +168 -0
- package/dist/cli/analyze.d.ts +7 -0
- package/dist/cli/analyze.js +325 -0
- package/dist/cli/augment.d.ts +7 -0
- package/dist/cli/augment.js +27 -0
- package/dist/cli/clean.d.ts +5 -0
- package/dist/cli/clean.js +56 -0
- package/dist/cli/eval-server.d.ts +25 -0
- package/dist/cli/eval-server.js +365 -0
- package/dist/cli/index.d.ts +6 -0
- package/dist/cli/index.js +102 -0
- package/dist/cli/lazy-action.d.ts +6 -0
- package/dist/cli/lazy-action.js +19 -0
- package/dist/cli/list.d.ts +2 -0
- package/dist/cli/list.js +27 -0
- package/dist/cli/mcp.d.ts +8 -0
- package/dist/cli/mcp.js +35 -0
- package/dist/cli/refresh.d.ts +12 -0
- package/dist/cli/refresh.js +165 -0
- package/dist/cli/serve.d.ts +5 -0
- package/dist/cli/serve.js +8 -0
- package/dist/cli/setup.d.ts +6 -0
- package/dist/cli/setup.js +218 -0
- package/dist/cli/status.d.ts +2 -0
- package/dist/cli/status.js +33 -0
- package/dist/cli/tool.d.ts +28 -0
- package/dist/cli/tool.js +87 -0
- package/dist/config/ignore-service.d.ts +32 -0
- package/dist/config/ignore-service.js +282 -0
- package/dist/config/supported-languages.d.ts +23 -0
- package/dist/config/supported-languages.js +52 -0
- package/dist/core/augmentation/engine.d.ts +22 -0
- package/dist/core/augmentation/engine.js +232 -0
- package/dist/core/embeddings/embedder.d.ts +35 -0
- package/dist/core/embeddings/embedder.js +171 -0
- package/dist/core/embeddings/embedding-pipeline.d.ts +41 -0
- package/dist/core/embeddings/embedding-pipeline.js +402 -0
- package/dist/core/embeddings/index.d.ts +5 -0
- package/dist/core/embeddings/index.js +6 -0
- package/dist/core/embeddings/text-generator.d.ts +20 -0
- package/dist/core/embeddings/text-generator.js +159 -0
- package/dist/core/embeddings/types.d.ts +60 -0
- package/dist/core/embeddings/types.js +23 -0
- package/dist/core/graph/graph.d.ts +4 -0
- package/dist/core/graph/graph.js +65 -0
- package/dist/core/graph/types.d.ts +69 -0
- package/dist/core/graph/types.js +3 -0
- package/dist/core/incremental/child-process.d.ts +8 -0
- package/dist/core/incremental/child-process.js +649 -0
- package/dist/core/incremental/refresh-coordinator.d.ts +32 -0
- package/dist/core/incremental/refresh-coordinator.js +147 -0
- package/dist/core/incremental/types.d.ts +78 -0
- package/dist/core/incremental/types.js +153 -0
- package/dist/core/incremental/watcher.d.ts +63 -0
- package/dist/core/incremental/watcher.js +338 -0
- package/dist/core/ingestion/ast-cache.d.ts +12 -0
- package/dist/core/ingestion/ast-cache.js +34 -0
- package/dist/core/ingestion/call-processor.d.ts +34 -0
- package/dist/core/ingestion/call-processor.js +937 -0
- package/dist/core/ingestion/call-routing.d.ts +40 -0
- package/dist/core/ingestion/call-routing.js +97 -0
- package/dist/core/ingestion/cluster-enricher.d.ts +30 -0
- package/dist/core/ingestion/cluster-enricher.js +151 -0
- package/dist/core/ingestion/community-processor.d.ts +26 -0
- package/dist/core/ingestion/community-processor.js +272 -0
- package/dist/core/ingestion/constants.d.ts +5 -0
- package/dist/core/ingestion/constants.js +8 -0
- package/dist/core/ingestion/entry-point-scoring.d.ts +23 -0
- package/dist/core/ingestion/entry-point-scoring.js +317 -0
- package/dist/core/ingestion/export-detection.d.ts +11 -0
- package/dist/core/ingestion/export-detection.js +203 -0
- package/dist/core/ingestion/filesystem-walker.d.ts +18 -0
- package/dist/core/ingestion/filesystem-walker.js +64 -0
- package/dist/core/ingestion/framework-detection.d.ts +42 -0
- package/dist/core/ingestion/framework-detection.js +405 -0
- package/dist/core/ingestion/heritage-processor.d.ts +15 -0
- package/dist/core/ingestion/heritage-processor.js +237 -0
- package/dist/core/ingestion/import-processor.d.ts +31 -0
- package/dist/core/ingestion/import-processor.js +416 -0
- package/dist/core/ingestion/language-config.d.ts +32 -0
- package/dist/core/ingestion/language-config.js +161 -0
- package/dist/core/ingestion/mro-processor.d.ts +32 -0
- package/dist/core/ingestion/mro-processor.js +343 -0
- package/dist/core/ingestion/named-binding-extraction.d.ts +51 -0
- package/dist/core/ingestion/named-binding-extraction.js +343 -0
- package/dist/core/ingestion/parsing-processor.d.ts +20 -0
- package/dist/core/ingestion/parsing-processor.js +282 -0
- package/dist/core/ingestion/pipeline.d.ts +3 -0
- package/dist/core/ingestion/pipeline.js +416 -0
- package/dist/core/ingestion/process-processor.d.ts +42 -0
- package/dist/core/ingestion/process-processor.js +357 -0
- package/dist/core/ingestion/resolution-context.d.ts +40 -0
- package/dist/core/ingestion/resolution-context.js +171 -0
- package/dist/core/ingestion/resolvers/csharp.d.ts +10 -0
- package/dist/core/ingestion/resolvers/csharp.js +101 -0
- package/dist/core/ingestion/resolvers/go.d.ts +8 -0
- package/dist/core/ingestion/resolvers/go.js +33 -0
- package/dist/core/ingestion/resolvers/index.d.ts +14 -0
- package/dist/core/ingestion/resolvers/index.js +10 -0
- package/dist/core/ingestion/resolvers/jvm.d.ts +9 -0
- package/dist/core/ingestion/resolvers/jvm.js +74 -0
- package/dist/core/ingestion/resolvers/php.d.ts +7 -0
- package/dist/core/ingestion/resolvers/php.js +30 -0
- package/dist/core/ingestion/resolvers/ruby.d.ts +9 -0
- package/dist/core/ingestion/resolvers/ruby.js +13 -0
- package/dist/core/ingestion/resolvers/rust.d.ts +5 -0
- package/dist/core/ingestion/resolvers/rust.js +62 -0
- package/dist/core/ingestion/resolvers/standard.d.ts +16 -0
- package/dist/core/ingestion/resolvers/standard.js +144 -0
- package/dist/core/ingestion/resolvers/utils.d.ts +18 -0
- package/dist/core/ingestion/resolvers/utils.js +113 -0
- package/dist/core/ingestion/structure-processor.d.ts +4 -0
- package/dist/core/ingestion/structure-processor.js +39 -0
- package/dist/core/ingestion/symbol-table.d.ts +34 -0
- package/dist/core/ingestion/symbol-table.js +48 -0
- package/dist/core/ingestion/tree-sitter-queries.d.ts +20 -0
- package/dist/core/ingestion/tree-sitter-queries.js +691 -0
- package/dist/core/ingestion/type-env.d.ts +52 -0
- package/dist/core/ingestion/type-env.js +349 -0
- package/dist/core/ingestion/type-extractors/c-cpp.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/c-cpp.js +214 -0
- package/dist/core/ingestion/type-extractors/csharp.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/csharp.js +224 -0
- package/dist/core/ingestion/type-extractors/go.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/go.js +261 -0
- package/dist/core/ingestion/type-extractors/index.d.ts +20 -0
- package/dist/core/ingestion/type-extractors/index.js +30 -0
- package/dist/core/ingestion/type-extractors/jvm.d.ts +5 -0
- package/dist/core/ingestion/type-extractors/jvm.js +386 -0
- package/dist/core/ingestion/type-extractors/php.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/php.js +280 -0
- package/dist/core/ingestion/type-extractors/python.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/python.js +175 -0
- package/dist/core/ingestion/type-extractors/ruby.d.ts +12 -0
- package/dist/core/ingestion/type-extractors/ruby.js +218 -0
- package/dist/core/ingestion/type-extractors/rust.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/rust.js +290 -0
- package/dist/core/ingestion/type-extractors/shared.d.ts +81 -0
- package/dist/core/ingestion/type-extractors/shared.js +322 -0
- package/dist/core/ingestion/type-extractors/swift.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/swift.js +140 -0
- package/dist/core/ingestion/type-extractors/types.d.ts +111 -0
- package/dist/core/ingestion/type-extractors/types.js +4 -0
- package/dist/core/ingestion/type-extractors/typescript.d.ts +4 -0
- package/dist/core/ingestion/type-extractors/typescript.js +227 -0
- package/dist/core/ingestion/utils.d.ts +73 -0
- package/dist/core/ingestion/utils.js +992 -0
- package/dist/core/ingestion/workers/parse-worker.d.ts +99 -0
- package/dist/core/ingestion/workers/parse-worker.js +1055 -0
- package/dist/core/ingestion/workers/worker-pool.d.ts +15 -0
- package/dist/core/ingestion/workers/worker-pool.js +123 -0
- package/dist/core/lbug/csv-generator.d.ts +28 -0
- package/dist/core/lbug/csv-generator.js +355 -0
- package/dist/core/lbug/lbug-adapter.d.ts +96 -0
- package/dist/core/lbug/lbug-adapter.js +753 -0
- package/dist/core/lbug/schema.d.ts +46 -0
- package/dist/core/lbug/schema.js +402 -0
- package/dist/core/search/bm25-index.d.ts +20 -0
- package/dist/core/search/bm25-index.js +123 -0
- package/dist/core/search/hybrid-search.d.ts +32 -0
- package/dist/core/search/hybrid-search.js +131 -0
- package/dist/core/search/query-cache.d.ts +18 -0
- package/dist/core/search/query-cache.js +47 -0
- package/dist/core/search/query-expansion.d.ts +19 -0
- package/dist/core/search/query-expansion.js +75 -0
- package/dist/core/search/reranker.d.ts +29 -0
- package/dist/core/search/reranker.js +122 -0
- package/dist/core/search/types.d.ts +154 -0
- package/dist/core/search/types.js +51 -0
- package/dist/core/semantic/tsgo-service.d.ts +67 -0
- package/dist/core/semantic/tsgo-service.js +355 -0
- package/dist/core/tree-sitter/parser-loader.d.ts +12 -0
- package/dist/core/tree-sitter/parser-loader.js +71 -0
- package/dist/lib/memory-guard.d.ts +35 -0
- package/dist/lib/memory-guard.js +70 -0
- package/dist/lib/utils.d.ts +3 -0
- package/dist/lib/utils.js +6 -0
- package/dist/mcp/compatible-stdio-transport.d.ts +32 -0
- package/dist/mcp/compatible-stdio-transport.js +209 -0
- package/dist/mcp/core/embedder.d.ts +24 -0
- package/dist/mcp/core/embedder.js +168 -0
- package/dist/mcp/core/lbug-adapter.d.ts +29 -0
- package/dist/mcp/core/lbug-adapter.js +330 -0
- package/dist/mcp/local/local-backend.d.ts +188 -0
- package/dist/mcp/local/local-backend.js +2759 -0
- package/dist/mcp/resources.d.ts +22 -0
- package/dist/mcp/resources.js +379 -0
- package/dist/mcp/server.d.ts +10 -0
- package/dist/mcp/server.js +217 -0
- package/dist/mcp/staleness.d.ts +10 -0
- package/dist/mcp/staleness.js +25 -0
- package/dist/mcp/tools.d.ts +21 -0
- package/dist/mcp/tools.js +202 -0
- package/dist/server/api.d.ts +5 -0
- package/dist/server/api.js +340 -0
- package/dist/server/mcp-http.d.ts +7 -0
- package/dist/server/mcp-http.js +95 -0
- package/dist/storage/git.d.ts +6 -0
- package/dist/storage/git.js +35 -0
- package/dist/storage/repo-manager.d.ts +87 -0
- package/dist/storage/repo-manager.js +249 -0
- package/dist/types/pipeline.d.ts +35 -0
- package/dist/types/pipeline.js +20 -0
- package/hooks/claude/code-mapper-hook.cjs +238 -0
- package/hooks/claude/pre-tool-use.sh +79 -0
- package/hooks/claude/session-start.sh +42 -0
- package/models/mlx-embedder.py +185 -0
- package/package.json +100 -0
- package/scripts/patch-tree-sitter-swift.cjs +74 -0
- package/vendor/leiden/index.cjs +355 -0
- package/vendor/leiden/utils.cjs +392 -0
|
@@ -0,0 +1,2759 @@
|
|
|
1
|
+
// code-mapper/src/mcp/local/local-backend.ts
|
|
2
|
+
/** @file local-backend.ts
|
|
3
|
+
* @description Tool implementations using local .code-mapper/ indexes
|
|
4
|
+
* Supports multiple indexed repositories via a global registry
|
|
5
|
+
* LadybugDB connections are opened lazily per repo on first query */
|
|
6
|
+
import fs from 'fs/promises';
|
|
7
|
+
import path from 'path';
|
|
8
|
+
import fsSync from 'fs';
|
|
9
|
+
import { execFileSync } from 'child_process';
|
|
10
|
+
import Parser from 'tree-sitter';
|
|
11
|
+
import { initLbug, executeQuery, executeParameterized, closeLbug, isLbugReady } from '../core/lbug-adapter.js';
|
|
12
|
+
import { FileSystemWatcher } from '../../core/incremental/watcher.js';
|
|
13
|
+
import { toRelativeFilePath, toRepoRoot } from '../../core/incremental/types.js';
|
|
14
|
+
import { getLanguageFromFilename, getDefinitionNodeFromCaptures } from '../../core/ingestion/utils.js';
|
|
15
|
+
import { loadParser, loadLanguage, isLanguageAvailable } from '../../core/tree-sitter/parser-loader.js';
|
|
16
|
+
import { LANGUAGE_QUERIES } from '../../core/ingestion/tree-sitter-queries.js';
|
|
17
|
+
import { getTreeSitterBufferSize, TREE_SITTER_MAX_BUFFER } from '../../core/ingestion/constants.js';
|
|
18
|
+
import { generateId } from '../../lib/utils.js';
|
|
19
|
+
import { NODE_TABLES, REL_TABLE_NAME } from '../../core/lbug/schema.js';
|
|
20
|
+
import { FTS_TABLES } from '../../core/search/types.js';
|
|
21
|
+
import { getTsgoService, stopTsgoService } from '../../core/semantic/tsgo-service.js';
|
|
22
|
+
// Embedding imports are lazy (dynamic import) to avoid loading onnxruntime-node
|
|
23
|
+
// at MCP server startup — crashes on unsupported Node ABI versions (#89)
|
|
24
|
+
import { listRegisteredRepos, cleanupOldKuzuFiles, } from '../../storage/repo-manager.js';
|
|
25
|
+
/** Quick test-file detection for filtering impact results across all supported languages */
|
|
26
|
+
export function isTestFilePath(filePath) {
|
|
27
|
+
const p = filePath.toLowerCase().replace(/\\/g, '/');
|
|
28
|
+
return (p.includes('.test.') || p.includes('.spec.') ||
|
|
29
|
+
p.includes('__tests__/') || p.includes('__mocks__/') ||
|
|
30
|
+
p.includes('/test/') || p.includes('/tests/') ||
|
|
31
|
+
p.includes('/testing/') || p.includes('/fixtures/') ||
|
|
32
|
+
p.endsWith('_test.go') || p.endsWith('_test.py') ||
|
|
33
|
+
p.endsWith('_spec.rb') || p.endsWith('_test.rb') || p.includes('/spec/') ||
|
|
34
|
+
p.includes('/test_') || p.includes('/conftest.'));
|
|
35
|
+
}
|
|
36
|
+
/** Valid LadybugDB node labels for safe Cypher query construction */
|
|
37
|
+
export const VALID_NODE_LABELS = new Set([
|
|
38
|
+
'File', 'Folder', 'Function', 'Class', 'Interface', 'Method', 'CodeElement',
|
|
39
|
+
'Community', 'Process', 'Struct', 'Enum', 'Macro', 'Typedef', 'Union',
|
|
40
|
+
'Namespace', 'Trait', 'Impl', 'TypeAlias', 'Const', 'Static', 'Property',
|
|
41
|
+
'Record', 'Delegate', 'Annotation', 'Constructor', 'Template', 'Module',
|
|
42
|
+
]);
|
|
43
|
+
/** Valid relation types for impact analysis filtering */
|
|
44
|
+
export const VALID_RELATION_TYPES = new Set(['CALLS', 'IMPORTS', 'EXTENDS', 'IMPLEMENTS', 'DEPENDS_ON', 'PROVIDES']);
|
|
45
|
+
/** Regex to detect write operations in user-supplied Cypher queries */
|
|
46
|
+
export const CYPHER_WRITE_RE = /\b(CREATE|DELETE|SET|MERGE|REMOVE|DROP|ALTER|COPY|DETACH)\b/i;
|
|
47
|
+
/** Check if a Cypher query contains write operations */
|
|
48
|
+
export function isWriteQuery(query) {
|
|
49
|
+
return CYPHER_WRITE_RE.test(query);
|
|
50
|
+
}
|
|
51
|
+
/** Structured error logging for query failures — replaces empty catch blocks */
|
|
52
|
+
function logQueryError(context, err) {
|
|
53
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
54
|
+
console.error(`Code Mapper [${context}]: ${msg}`);
|
|
55
|
+
}
|
|
56
|
+
export class LocalBackend {
|
|
57
|
+
repos = new Map();
|
|
58
|
+
contextCache = new Map();
|
|
59
|
+
initializedRepos = new Set();
|
|
60
|
+
watchers = new Map();
|
|
61
|
+
/** Per-repo promise chain that serializes ensureFresh calls.
|
|
62
|
+
* Prevents race: Call 2 skipping refresh while Call 1 is still writing. */
|
|
63
|
+
refreshLocks = new Map();
|
|
64
|
+
/** Hard ceiling — beyond this, incremental is unreliable, warn prominently */
|
|
65
|
+
static MAX_INCREMENTAL_FILES = 200;
|
|
66
|
+
/** Optional tsgo LSP service for confidence-1.0 semantic resolution */
|
|
67
|
+
tsgoEnabled = false;
|
|
68
|
+
/** Start file system watcher for a repo to detect source changes */
|
|
69
|
+
startWatcher(repoId, handle) {
|
|
70
|
+
if (this.watchers.has(repoId))
|
|
71
|
+
return;
|
|
72
|
+
try {
|
|
73
|
+
const watcher = new FileSystemWatcher(toRepoRoot(handle.repoPath));
|
|
74
|
+
watcher.start();
|
|
75
|
+
this.watchers.set(repoId, watcher);
|
|
76
|
+
}
|
|
77
|
+
catch (err) {
|
|
78
|
+
// Watcher failure is non-fatal — tools still work, just without auto-refresh
|
|
79
|
+
console.error(`Code Mapper: Failed to start watcher for ${handle.name}: ${err instanceof Error ? err.message : err}`);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Seed the watcher with file changes that happened while the MCP server was
|
|
84
|
+
* not running. Compares meta.json lastCommit against current git HEAD and
|
|
85
|
+
* injects any changed files as dirty entries.
|
|
86
|
+
*/
|
|
87
|
+
seedWatcherFromGit(repoId, handle) {
|
|
88
|
+
if (!handle.lastCommit)
|
|
89
|
+
return;
|
|
90
|
+
const watcher = this.watchers.get(repoId);
|
|
91
|
+
if (!watcher)
|
|
92
|
+
return;
|
|
93
|
+
try {
|
|
94
|
+
const currentHead = execFileSync('git', ['rev-parse', 'HEAD'], {
|
|
95
|
+
cwd: handle.repoPath,
|
|
96
|
+
encoding: 'utf-8',
|
|
97
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
98
|
+
}).trim();
|
|
99
|
+
if (currentHead === handle.lastCommit)
|
|
100
|
+
return;
|
|
101
|
+
const diffOutput = execFileSync('git', ['diff', '--name-status', `${handle.lastCommit}..HEAD`], { cwd: handle.repoPath, encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] }).trim();
|
|
102
|
+
if (!diffOutput)
|
|
103
|
+
return;
|
|
104
|
+
const entries = [];
|
|
105
|
+
for (const line of diffOutput.split('\n')) {
|
|
106
|
+
if (!line)
|
|
107
|
+
continue;
|
|
108
|
+
const [status, ...pathParts] = line.split('\t');
|
|
109
|
+
const filePath = pathParts.join('\t'); // handle tabs in filenames
|
|
110
|
+
if (!filePath)
|
|
111
|
+
continue;
|
|
112
|
+
let changeKind;
|
|
113
|
+
if (status === 'D')
|
|
114
|
+
changeKind = 'deleted';
|
|
115
|
+
else if (status === 'A')
|
|
116
|
+
changeKind = 'created';
|
|
117
|
+
else
|
|
118
|
+
changeKind = 'modified'; // M, R, C, etc.
|
|
119
|
+
try {
|
|
120
|
+
entries.push({
|
|
121
|
+
relativePath: toRelativeFilePath(filePath),
|
|
122
|
+
changeKind,
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
catch {
|
|
126
|
+
// Skip absolute paths or other invalid entries
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
if (entries.length > 0) {
|
|
130
|
+
watcher.inject(entries);
|
|
131
|
+
console.error(`Code Mapper: Seeded ${entries.length} file change(s) from git (${handle.lastCommit.slice(0, 7)}..${currentHead.slice(0, 7)})`);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
catch {
|
|
135
|
+
// Git command failed — non-fatal, watcher will catch future changes
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Serialized entry point — ensures only one refresh runs per repo at a time.
|
|
140
|
+
* Call 2 waits for Call 1 to finish, then re-checks the watcher.
|
|
141
|
+
*/
|
|
142
|
+
ensureFresh(repo) {
|
|
143
|
+
const key = repo.id;
|
|
144
|
+
const prev = this.refreshLocks.get(key) ?? Promise.resolve();
|
|
145
|
+
const next = prev.then(() => this.doEnsureFresh(repo));
|
|
146
|
+
// Store with swallowed errors so the chain doesn't break for future callers
|
|
147
|
+
this.refreshLocks.set(key, next.catch(() => { }));
|
|
148
|
+
return next;
|
|
149
|
+
}
|
|
150
|
+
/** Check for file changes and refresh the DB + embeddings before a tool call */
|
|
151
|
+
async doEnsureFresh(repo) {
|
|
152
|
+
const watcher = this.watchers.get(repo.id);
|
|
153
|
+
if (!watcher)
|
|
154
|
+
return;
|
|
155
|
+
// Flush pending debounce timers — edits within the 500ms window become
|
|
156
|
+
// visible immediately so no tool call can miss a recent save
|
|
157
|
+
await watcher.flush();
|
|
158
|
+
if (!watcher.hasDirtyFiles())
|
|
159
|
+
return;
|
|
160
|
+
const dirtyMap = watcher.drain();
|
|
161
|
+
if (!dirtyMap)
|
|
162
|
+
return;
|
|
163
|
+
const dirtyFiles = [...dirtyMap.values()];
|
|
164
|
+
const totalChanged = dirtyFiles.length;
|
|
165
|
+
// Hard ceiling — incremental is unreliable for huge diffs (branch switch, etc.)
|
|
166
|
+
if (totalChanged > LocalBackend.MAX_INCREMENTAL_FILES) {
|
|
167
|
+
// Re-inject so the files aren't lost; user must run full analyze
|
|
168
|
+
watcher.inject(dirtyFiles);
|
|
169
|
+
console.error(`Code Mapper: ${totalChanged} files changed — exceeds incremental limit (${LocalBackend.MAX_INCREMENTAL_FILES}), run: code-mapper analyze`);
|
|
170
|
+
// Don't silently serve stale — the staleness warning from getStalenessWarning
|
|
171
|
+
// will flag the tool response since git HEAD will differ from meta.lastCommit
|
|
172
|
+
return;
|
|
173
|
+
}
|
|
174
|
+
try {
|
|
175
|
+
// In-process incremental refresh — uses the existing read-write connection
|
|
176
|
+
// pool directly. No child process fork needed, no lock conflicts.
|
|
177
|
+
await this.ensureInitialized(repo.id);
|
|
178
|
+
const result = await this.inProcessRefresh(repo, dirtyFiles);
|
|
179
|
+
console.error(`Code Mapper: Refreshed ${result.filesProcessed} file(s) in ${result.durationMs}ms (${result.nodesInserted} nodes, ${result.edgesInserted} edges)`);
|
|
180
|
+
// Incremental embedding refresh — keep embeddings in sync with the graph
|
|
181
|
+
await this.refreshEmbeddings(repo, dirtyFiles);
|
|
182
|
+
}
|
|
183
|
+
catch (err) {
|
|
184
|
+
// Re-inject dirty files so the next tool call retries them
|
|
185
|
+
watcher.inject(dirtyFiles);
|
|
186
|
+
console.error(`Code Mapper: Incremental refresh failed (will retry next call): ${err instanceof Error ? err.message : err}`);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
/**
|
|
190
|
+
* Tables that are global metadata — NOT deleted per-file during incremental refresh.
|
|
191
|
+
*/
|
|
192
|
+
static SKIP_DELETE_TABLES = new Set(['Community', 'Process']);
|
|
193
|
+
/** Tables requiring backtick-quoting in Cypher (reserved words) */
|
|
194
|
+
static BACKTICK_TABLES = new Set([
|
|
195
|
+
'Struct', 'Enum', 'Macro', 'Typedef', 'Union', 'Namespace', 'Trait', 'Impl',
|
|
196
|
+
'TypeAlias', 'Const', 'Static', 'Property', 'Record', 'Delegate', 'Annotation',
|
|
197
|
+
'Constructor', 'Template', 'Module',
|
|
198
|
+
]);
|
|
199
|
+
static quoteTable(table) {
|
|
200
|
+
return LocalBackend.BACKTICK_TABLES.has(table) ? `\`${table}\`` : table;
|
|
201
|
+
}
|
|
202
|
+
static escapeCypher(value) {
|
|
203
|
+
return value.replace(/\\/g, '\\\\').replace(/'/g, "\\'");
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* In-process incremental refresh — parses dirty files with tree-sitter and
|
|
207
|
+
* writes directly to the DB through the existing connection pool.
|
|
208
|
+
*
|
|
209
|
+
* This avoids the LadybugDB lock conflict that prevented the child-process
|
|
210
|
+
* approach from working: LadybugDB on macOS holds an exclusive file lock
|
|
211
|
+
* even for read-only connections, and db.close() segfaults via N-API.
|
|
212
|
+
*/
|
|
213
|
+
async inProcessRefresh(repo, dirtyFiles) {
|
|
214
|
+
const t0 = Date.now();
|
|
215
|
+
const esc = LocalBackend.escapeCypher;
|
|
216
|
+
const qt = LocalBackend.quoteTable;
|
|
217
|
+
let nodesDeleted = 0;
|
|
218
|
+
let nodesInserted = 0;
|
|
219
|
+
let edgesInserted = 0;
|
|
220
|
+
let filesSkipped = 0;
|
|
221
|
+
// Phase 1: Delete old nodes for all dirty files
|
|
222
|
+
for (const entry of dirtyFiles) {
|
|
223
|
+
const escaped = esc(entry.relativePath);
|
|
224
|
+
for (const table of NODE_TABLES) {
|
|
225
|
+
if (LocalBackend.SKIP_DELETE_TABLES.has(table))
|
|
226
|
+
continue;
|
|
227
|
+
try {
|
|
228
|
+
await executeQuery(repo.id, `MATCH (n:${qt(table)}) WHERE n.filePath = '${escaped}' DETACH DELETE n`);
|
|
229
|
+
nodesDeleted++;
|
|
230
|
+
}
|
|
231
|
+
catch (err) {
|
|
232
|
+
console.error(`Code Mapper: [refresh] DELETE ${table} for ${entry.relativePath}: ${err instanceof Error ? err.message : err}`);
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
// Phase 2: Parse modified/created files with tree-sitter
|
|
237
|
+
const parser = await loadParser();
|
|
238
|
+
const filesToProcess = dirtyFiles.filter(f => f.changeKind === 'modified' || f.changeKind === 'created');
|
|
239
|
+
const allDefinitions = [];
|
|
240
|
+
const callSites = [];
|
|
241
|
+
const insertedFilePaths = new Set();
|
|
242
|
+
for (const entry of filesToProcess) {
|
|
243
|
+
const relPath = entry.relativePath;
|
|
244
|
+
const absPath = path.resolve(repo.repoPath, relPath);
|
|
245
|
+
const language = getLanguageFromFilename(relPath);
|
|
246
|
+
if (!language || !isLanguageAvailable(language)) {
|
|
247
|
+
filesSkipped++;
|
|
248
|
+
continue;
|
|
249
|
+
}
|
|
250
|
+
let content;
|
|
251
|
+
try {
|
|
252
|
+
content = fsSync.readFileSync(absPath, 'utf-8');
|
|
253
|
+
}
|
|
254
|
+
catch {
|
|
255
|
+
filesSkipped++;
|
|
256
|
+
continue;
|
|
257
|
+
}
|
|
258
|
+
if (content.length > TREE_SITTER_MAX_BUFFER) {
|
|
259
|
+
filesSkipped++;
|
|
260
|
+
continue;
|
|
261
|
+
}
|
|
262
|
+
try {
|
|
263
|
+
await loadLanguage(language, relPath);
|
|
264
|
+
}
|
|
265
|
+
catch {
|
|
266
|
+
filesSkipped++;
|
|
267
|
+
continue;
|
|
268
|
+
}
|
|
269
|
+
let tree;
|
|
270
|
+
try {
|
|
271
|
+
tree = parser.parse(content, undefined, { bufferSize: getTreeSitterBufferSize(content.length) });
|
|
272
|
+
}
|
|
273
|
+
catch {
|
|
274
|
+
filesSkipped++;
|
|
275
|
+
continue;
|
|
276
|
+
}
|
|
277
|
+
const queryString = LANGUAGE_QUERIES[language];
|
|
278
|
+
if (!queryString) {
|
|
279
|
+
filesSkipped++;
|
|
280
|
+
continue;
|
|
281
|
+
}
|
|
282
|
+
let matches;
|
|
283
|
+
try {
|
|
284
|
+
const tsLang = parser.getLanguage();
|
|
285
|
+
const query = new Parser.Query(tsLang, queryString);
|
|
286
|
+
matches = query.matches(tree.rootNode);
|
|
287
|
+
}
|
|
288
|
+
catch {
|
|
289
|
+
filesSkipped++;
|
|
290
|
+
continue;
|
|
291
|
+
}
|
|
292
|
+
insertedFilePaths.add(relPath);
|
|
293
|
+
for (const match of matches) {
|
|
294
|
+
const captureMap = {};
|
|
295
|
+
for (const c of match.captures)
|
|
296
|
+
captureMap[c.name] = c.node;
|
|
297
|
+
// Skip imports/heritage captures — only extract definitions and calls
|
|
298
|
+
if (captureMap['import'] || captureMap['import.source'])
|
|
299
|
+
continue;
|
|
300
|
+
if (captureMap['heritage'] || captureMap['heritage.impl'])
|
|
301
|
+
continue;
|
|
302
|
+
// Collect call sites for tsgo resolution
|
|
303
|
+
if (captureMap['call'] || captureMap['call.name']) {
|
|
304
|
+
const callNameNode = captureMap['call.name'];
|
|
305
|
+
if (callNameNode) {
|
|
306
|
+
callSites.push({
|
|
307
|
+
filePath: relPath,
|
|
308
|
+
absPath: absPath,
|
|
309
|
+
name: callNameNode.text,
|
|
310
|
+
line: callNameNode.startPosition.row,
|
|
311
|
+
character: callNameNode.startPosition.column,
|
|
312
|
+
});
|
|
313
|
+
}
|
|
314
|
+
continue;
|
|
315
|
+
}
|
|
316
|
+
const nameNode = captureMap['name'];
|
|
317
|
+
if (!nameNode && !captureMap['definition.constructor'])
|
|
318
|
+
continue;
|
|
319
|
+
const nodeName = nameNode ? nameNode.text : 'init';
|
|
320
|
+
let nodeLabel = 'CodeElement';
|
|
321
|
+
if (captureMap['definition.function'])
|
|
322
|
+
nodeLabel = 'Function';
|
|
323
|
+
else if (captureMap['definition.class'])
|
|
324
|
+
nodeLabel = 'Class';
|
|
325
|
+
else if (captureMap['definition.interface'])
|
|
326
|
+
nodeLabel = 'Interface';
|
|
327
|
+
else if (captureMap['definition.method'])
|
|
328
|
+
nodeLabel = 'Method';
|
|
329
|
+
else if (captureMap['definition.struct'])
|
|
330
|
+
nodeLabel = 'Struct';
|
|
331
|
+
else if (captureMap['definition.enum'])
|
|
332
|
+
nodeLabel = 'Enum';
|
|
333
|
+
else if (captureMap['definition.namespace'])
|
|
334
|
+
nodeLabel = 'Namespace';
|
|
335
|
+
else if (captureMap['definition.module'])
|
|
336
|
+
nodeLabel = 'Module';
|
|
337
|
+
else if (captureMap['definition.trait'])
|
|
338
|
+
nodeLabel = 'Trait';
|
|
339
|
+
else if (captureMap['definition.impl'])
|
|
340
|
+
nodeLabel = 'Impl';
|
|
341
|
+
else if (captureMap['definition.type'])
|
|
342
|
+
nodeLabel = 'TypeAlias';
|
|
343
|
+
else if (captureMap['definition.const'])
|
|
344
|
+
nodeLabel = 'Const';
|
|
345
|
+
else if (captureMap['definition.static'])
|
|
346
|
+
nodeLabel = 'Static';
|
|
347
|
+
else if (captureMap['definition.typedef'])
|
|
348
|
+
nodeLabel = 'Typedef';
|
|
349
|
+
else if (captureMap['definition.macro'])
|
|
350
|
+
nodeLabel = 'Macro';
|
|
351
|
+
else if (captureMap['definition.union'])
|
|
352
|
+
nodeLabel = 'Union';
|
|
353
|
+
else if (captureMap['definition.property'])
|
|
354
|
+
nodeLabel = 'Property';
|
|
355
|
+
else if (captureMap['definition.record'])
|
|
356
|
+
nodeLabel = 'Record';
|
|
357
|
+
else if (captureMap['definition.delegate'])
|
|
358
|
+
nodeLabel = 'Delegate';
|
|
359
|
+
else if (captureMap['definition.annotation'])
|
|
360
|
+
nodeLabel = 'Annotation';
|
|
361
|
+
else if (captureMap['definition.constructor'])
|
|
362
|
+
nodeLabel = 'Constructor';
|
|
363
|
+
else if (captureMap['definition.template'])
|
|
364
|
+
nodeLabel = 'Template';
|
|
365
|
+
const defNode = getDefinitionNodeFromCaptures(captureMap);
|
|
366
|
+
const startLine = defNode ? defNode.startPosition.row : (nameNode ? nameNode.startPosition.row : 0);
|
|
367
|
+
const endLine = defNode ? defNode.endPosition.row : startLine;
|
|
368
|
+
const nodeContent = defNode ? (defNode.text || '').slice(0, 50_000) : '';
|
|
369
|
+
allDefinitions.push({
|
|
370
|
+
nodeId: generateId(nodeLabel, `${relPath}:${nodeName}`),
|
|
371
|
+
name: nodeName,
|
|
372
|
+
label: nodeLabel,
|
|
373
|
+
filePath: relPath,
|
|
374
|
+
startLine,
|
|
375
|
+
endLine,
|
|
376
|
+
content: nodeContent,
|
|
377
|
+
});
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
// Phase 3: Insert File nodes + symbol nodes
|
|
381
|
+
for (const filePath of insertedFilePaths) {
|
|
382
|
+
const fileId = generateId('File', filePath);
|
|
383
|
+
try {
|
|
384
|
+
await executeQuery(repo.id, `CREATE (n:File {id: '${esc(fileId)}', name: '${esc(path.basename(filePath))}', filePath: '${esc(filePath)}', content: ''})`);
|
|
385
|
+
nodesInserted++;
|
|
386
|
+
}
|
|
387
|
+
catch (err) {
|
|
388
|
+
console.error(`Code Mapper: [refresh] CREATE File ${filePath}: ${err instanceof Error ? err.message : err}`);
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
for (const def of allDefinitions) {
|
|
392
|
+
try {
|
|
393
|
+
await executeQuery(repo.id, `CREATE (n:${qt(def.label)} {id: '${esc(def.nodeId)}', name: '${esc(def.name)}', filePath: '${esc(def.filePath)}', startLine: ${def.startLine}, endLine: ${def.endLine}, content: '${esc(def.content)}', description: ''})`);
|
|
394
|
+
nodesInserted++;
|
|
395
|
+
}
|
|
396
|
+
catch (err) {
|
|
397
|
+
console.error(`Code Mapper: [refresh] CREATE ${def.label} ${def.name}: ${err instanceof Error ? err.message : err}`);
|
|
398
|
+
}
|
|
399
|
+
const fileId = generateId('File', def.filePath);
|
|
400
|
+
try {
|
|
401
|
+
await executeQuery(repo.id, `MATCH (a:File), (b:${qt(def.label)}) WHERE a.id = '${esc(fileId)}' AND b.id = '${esc(def.nodeId)}' CREATE (a)-[:${REL_TABLE_NAME} {type: 'DEFINES', confidence: 1.0, reason: '', step: 0}]->(b)`);
|
|
402
|
+
edgesInserted++;
|
|
403
|
+
}
|
|
404
|
+
catch (err) {
|
|
405
|
+
console.error(`Code Mapper: [refresh] CREATE DEFINES edge for ${def.name}: ${err instanceof Error ? err.message : err}`);
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
// Phase 4: Resolve call edges via tsgo (if enabled)
|
|
409
|
+
if (this.tsgoEnabled && callSites.length > 0) {
|
|
410
|
+
const tsgo = getTsgoService(repo.repoPath);
|
|
411
|
+
if (await tsgo.start()) {
|
|
412
|
+
// Notify tsgo about changed files so it has fresh state
|
|
413
|
+
for (const entry of filesToProcess) {
|
|
414
|
+
const absPath = path.resolve(repo.repoPath, entry.relativePath);
|
|
415
|
+
await tsgo.notifyFileChanged(absPath);
|
|
416
|
+
}
|
|
417
|
+
let tsgoResolved = 0;
|
|
418
|
+
for (const call of callSites) {
|
|
419
|
+
try {
|
|
420
|
+
const def = await tsgo.resolveDefinition(call.absPath, call.line, call.character);
|
|
421
|
+
if (!def)
|
|
422
|
+
continue;
|
|
423
|
+
// Find the target node in the DB by file path + name match
|
|
424
|
+
const targetRows = await executeQuery(repo.id, `MATCH (n) WHERE n.filePath = '${esc(def.filePath)}' AND n.startLine <= ${def.line} AND n.endLine >= ${def.line} RETURN n.id AS id LIMIT 1`);
|
|
425
|
+
if (targetRows.length === 0)
|
|
426
|
+
continue;
|
|
427
|
+
const targetId = String(targetRows[0].id ?? '');
|
|
428
|
+
if (!targetId)
|
|
429
|
+
continue;
|
|
430
|
+
// Find the caller node (the function/method containing this call site)
|
|
431
|
+
const callerRows = await executeQuery(repo.id, `MATCH (n) WHERE n.filePath = '${esc(call.filePath)}' AND n.startLine <= ${call.line} AND n.endLine >= ${call.line} AND NOT n:File RETURN n.id AS id LIMIT 1`);
|
|
432
|
+
const callerId = callerRows.length > 0
|
|
433
|
+
? String(callerRows[0].id ?? '')
|
|
434
|
+
: generateId('File', call.filePath);
|
|
435
|
+
if (callerId === targetId)
|
|
436
|
+
continue; // self-call
|
|
437
|
+
try {
|
|
438
|
+
await executeQuery(repo.id, `MATCH (a), (b) WHERE a.id = '${esc(callerId)}' AND b.id = '${esc(targetId)}' CREATE (a)-[:${REL_TABLE_NAME} {type: 'CALLS', confidence: 1.0, reason: 'tsgo-semantic', step: 0}]->(b)`);
|
|
439
|
+
edgesInserted++;
|
|
440
|
+
tsgoResolved++;
|
|
441
|
+
}
|
|
442
|
+
catch { /* duplicate edge */ }
|
|
443
|
+
}
|
|
444
|
+
catch { /* resolution failed for this call */ }
|
|
445
|
+
}
|
|
446
|
+
if (tsgoResolved > 0) {
|
|
447
|
+
console.error(`Code Mapper: tsgo resolved ${tsgoResolved}/${callSites.length} call(s) at confidence 1.0`);
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
// Phase 5: Rebuild FTS indexes
|
|
452
|
+
for (const { table, index } of FTS_TABLES) {
|
|
453
|
+
try {
|
|
454
|
+
await executeQuery(repo.id, `CALL DROP_FTS_INDEX('${table}', '${index}')`);
|
|
455
|
+
}
|
|
456
|
+
catch { /* may not exist */ }
|
|
457
|
+
try {
|
|
458
|
+
await executeQuery(repo.id, `CALL CREATE_FTS_INDEX('${table}', '${index}', ['name', 'content'], stemmer := 'porter')`);
|
|
459
|
+
}
|
|
460
|
+
catch { /* non-fatal */ }
|
|
461
|
+
}
|
|
462
|
+
return {
|
|
463
|
+
filesProcessed: filesToProcess.length,
|
|
464
|
+
filesSkipped,
|
|
465
|
+
nodesDeleted,
|
|
466
|
+
nodesInserted,
|
|
467
|
+
edgesInserted,
|
|
468
|
+
durationMs: Date.now() - t0,
|
|
469
|
+
};
|
|
470
|
+
}
|
|
471
|
+
/**
|
|
472
|
+
* Update CodeEmbedding rows for dirty files so semantic search is never stale.
|
|
473
|
+
*
|
|
474
|
+
* Runs ONLY when the repo previously had embeddings (stats.embeddings > 0).
|
|
475
|
+
* Steps:
|
|
476
|
+
* 1. Delete stale CodeEmbedding rows for all dirty file paths (always)
|
|
477
|
+
* 2. Query new embeddable nodes for modified/created files
|
|
478
|
+
* 3. Generate text → batch embed using the warm MCP singleton model
|
|
479
|
+
* 4. Insert new CodeEmbedding rows
|
|
480
|
+
* 5. Drop + recreate HNSW vector index
|
|
481
|
+
*
|
|
482
|
+
* If the embedding model fails to load, stale rows are still deleted —
|
|
483
|
+
* semantic search returns fewer results but never wrong ones.
|
|
484
|
+
*/
|
|
485
|
+
async refreshEmbeddings(repo, dirtyFiles) {
|
|
486
|
+
if (!repo.stats?.embeddings || repo.stats.embeddings === 0)
|
|
487
|
+
return;
|
|
488
|
+
if (dirtyFiles.length === 0)
|
|
489
|
+
return;
|
|
490
|
+
const esc = LocalBackend.escapeCypher;
|
|
491
|
+
const dirtyRelPaths = dirtyFiles.map((f) => f.relativePath);
|
|
492
|
+
try {
|
|
493
|
+
// Step 1: Delete stale embeddings for all dirty files
|
|
494
|
+
for (const relPath of dirtyRelPaths) {
|
|
495
|
+
try {
|
|
496
|
+
const rows = await executeQuery(repo.id, `MATCH (n) WHERE n.filePath = '${esc(relPath)}' RETURN n.id AS id`);
|
|
497
|
+
for (const row of rows) {
|
|
498
|
+
const nodeId = String(row.id ?? '');
|
|
499
|
+
if (!nodeId)
|
|
500
|
+
continue;
|
|
501
|
+
try {
|
|
502
|
+
await executeQuery(repo.id, `MATCH (e:CodeEmbedding) WHERE e.nodeId = '${esc(nodeId)}' DETACH DELETE e`);
|
|
503
|
+
}
|
|
504
|
+
catch { /* row may not exist */ }
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
catch { /* file may not have nodes */ }
|
|
508
|
+
}
|
|
509
|
+
// Step 2: Query new embeddable nodes
|
|
510
|
+
const embeddableLabels = ['Function', 'Class', 'Method', 'Interface', 'File'];
|
|
511
|
+
const modifiedPaths = dirtyFiles
|
|
512
|
+
.filter((f) => f.changeKind === 'modified' || f.changeKind === 'created')
|
|
513
|
+
.map((f) => f.relativePath);
|
|
514
|
+
if (modifiedPaths.length === 0) {
|
|
515
|
+
await this.rebuildVectorIndex(repo.id);
|
|
516
|
+
return;
|
|
517
|
+
}
|
|
518
|
+
const newNodes = [];
|
|
519
|
+
for (const relPath of modifiedPaths) {
|
|
520
|
+
for (const label of embeddableLabels) {
|
|
521
|
+
try {
|
|
522
|
+
const q = label === 'File'
|
|
523
|
+
? `MATCH (n:File) WHERE n.filePath = '${esc(relPath)}' RETURN n.id AS id, n.name AS name, 'File' AS label, n.filePath AS filePath, n.content AS content`
|
|
524
|
+
: `MATCH (n:${label}) WHERE n.filePath = '${esc(relPath)}' RETURN n.id AS id, n.name AS name, '${label}' AS label, n.filePath AS filePath, n.content AS content, n.startLine AS startLine, n.endLine AS endLine`;
|
|
525
|
+
const rows = await executeQuery(repo.id, q);
|
|
526
|
+
for (const row of rows) {
|
|
527
|
+
const r = row;
|
|
528
|
+
newNodes.push({
|
|
529
|
+
id: String(r.id ?? ''), name: String(r.name ?? ''), label: String(r.label ?? label),
|
|
530
|
+
filePath: String(r.filePath ?? ''), content: String(r.content ?? ''),
|
|
531
|
+
startLine: r.startLine != null ? Number(r.startLine) : undefined,
|
|
532
|
+
endLine: r.endLine != null ? Number(r.endLine) : undefined,
|
|
533
|
+
});
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
catch { /* table may not exist */ }
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
// Step 3: Embed + insert
|
|
540
|
+
if (newNodes.length > 0) {
|
|
541
|
+
try {
|
|
542
|
+
const { generateEmbeddingText } = await import('../../core/embeddings/text-generator.js');
|
|
543
|
+
const { embedBatch } = await import('../core/embedder.js');
|
|
544
|
+
const texts = newNodes.map((node) => generateEmbeddingText(node));
|
|
545
|
+
const embeddings = await embedBatch(texts);
|
|
546
|
+
for (let i = 0; i < newNodes.length; i++) {
|
|
547
|
+
const vecStr = `[${embeddings[i].join(',')}]`;
|
|
548
|
+
try {
|
|
549
|
+
await executeQuery(repo.id, `CREATE (e:CodeEmbedding {nodeId: '${esc(newNodes[i].id)}', embedding: CAST(${vecStr} AS FLOAT[256])})`);
|
|
550
|
+
}
|
|
551
|
+
catch { /* duplicate */ }
|
|
552
|
+
}
|
|
553
|
+
console.error(`Code Mapper: Embedded ${newNodes.length} node(s) incrementally`);
|
|
554
|
+
}
|
|
555
|
+
catch (err) {
|
|
556
|
+
console.error(`Code Mapper: Incremental embedding failed (stale entries removed): ${err instanceof Error ? err.message : err}`);
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
// Step 4: Rebuild vector index
|
|
560
|
+
await this.rebuildVectorIndex(repo.id);
|
|
561
|
+
}
|
|
562
|
+
catch (err) {
|
|
563
|
+
console.error(`Code Mapper: Embedding refresh failed: ${err instanceof Error ? err.message : err}`);
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
async rebuildVectorIndex(repoId) {
|
|
567
|
+
try {
|
|
568
|
+
await executeQuery(repoId, 'INSTALL VECTOR');
|
|
569
|
+
}
|
|
570
|
+
catch { }
|
|
571
|
+
try {
|
|
572
|
+
await executeQuery(repoId, 'LOAD EXTENSION VECTOR');
|
|
573
|
+
}
|
|
574
|
+
catch { }
|
|
575
|
+
try {
|
|
576
|
+
await executeQuery(repoId, `CALL DROP_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx')`);
|
|
577
|
+
}
|
|
578
|
+
catch { }
|
|
579
|
+
try {
|
|
580
|
+
const rows = await executeQuery(repoId, `MATCH (e:CodeEmbedding) RETURN COUNT(*) AS cnt`);
|
|
581
|
+
const cnt = Number(rows[0]?.cnt ?? 0);
|
|
582
|
+
if (cnt === 0)
|
|
583
|
+
return;
|
|
584
|
+
}
|
|
585
|
+
catch {
|
|
586
|
+
return;
|
|
587
|
+
}
|
|
588
|
+
try {
|
|
589
|
+
await executeQuery(repoId, `CALL CREATE_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx', 'embedding', metric := 'cosine')`);
|
|
590
|
+
}
|
|
591
|
+
catch (err) {
|
|
592
|
+
const msg = err instanceof Error ? err.message : '';
|
|
593
|
+
if (!msg.includes('already exists')) {
|
|
594
|
+
console.error(`Code Mapper: Vector index rebuild failed: ${msg}`);
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
// Initialization
|
|
599
|
+
/**
|
|
600
|
+
* Initialize from the global registry, returns true if at least one repo is available.
|
|
601
|
+
* @param opts.tsgo — Enable tsgo semantic resolution (confidence-1.0 call edges)
|
|
602
|
+
*/
|
|
603
|
+
async init(opts) {
|
|
604
|
+
this.tsgoEnabled = opts?.tsgo ?? false;
|
|
605
|
+
await this.refreshRepos();
|
|
606
|
+
// Start file watchers for incremental refresh
|
|
607
|
+
for (const [id, handle] of this.repos) {
|
|
608
|
+
this.startWatcher(id, handle);
|
|
609
|
+
// Seed watcher with changes that happened while the server was down
|
|
610
|
+
this.seedWatcherFromGit(id, handle);
|
|
611
|
+
// Start tsgo LSP for semantic resolution (optional, non-blocking)
|
|
612
|
+
if (this.tsgoEnabled) {
|
|
613
|
+
const svc = getTsgoService(handle.repoPath);
|
|
614
|
+
svc.start().catch(() => { }); // warm up in background
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
return this.repos.size > 0;
|
|
618
|
+
}
|
|
619
|
+
/**
|
|
620
|
+
* Re-read the global registry and update the in-memory repo map
|
|
621
|
+
* LadybugDB connections for removed repos idle-timeout naturally
|
|
622
|
+
*/
|
|
623
|
+
async refreshRepos() {
|
|
624
|
+
const entries = await listRegisteredRepos({ validate: true });
|
|
625
|
+
const freshIds = new Set();
|
|
626
|
+
for (const entry of entries) {
|
|
627
|
+
const id = this.repoId(entry.name, entry.path);
|
|
628
|
+
freshIds.add(id);
|
|
629
|
+
const storagePath = entry.storagePath;
|
|
630
|
+
const lbugPath = path.join(storagePath, 'lbug');
|
|
631
|
+
// Clean up leftover KuzuDB files from before the LadybugDB migration
|
|
632
|
+
// Warn if kuzu exists but lbug doesn't (re-analyze needed)
|
|
633
|
+
const kuzu = await cleanupOldKuzuFiles(storagePath);
|
|
634
|
+
if (kuzu.found && kuzu.needsReindex) {
|
|
635
|
+
console.error(`Code Mapper: "${entry.name}" has a stale KuzuDB index. Run: code-mapper analyze ${entry.path}`);
|
|
636
|
+
}
|
|
637
|
+
const handle = {
|
|
638
|
+
id,
|
|
639
|
+
name: entry.name,
|
|
640
|
+
repoPath: entry.path,
|
|
641
|
+
storagePath,
|
|
642
|
+
lbugPath,
|
|
643
|
+
indexedAt: entry.indexedAt,
|
|
644
|
+
lastCommit: entry.lastCommit,
|
|
645
|
+
stats: entry.stats,
|
|
646
|
+
};
|
|
647
|
+
this.repos.set(id, handle);
|
|
648
|
+
// Build lightweight context (no LadybugDB needed)
|
|
649
|
+
const s = entry.stats || {};
|
|
650
|
+
this.contextCache.set(id, {
|
|
651
|
+
projectName: entry.name,
|
|
652
|
+
stats: {
|
|
653
|
+
fileCount: s.files || 0,
|
|
654
|
+
functionCount: s.nodes || 0,
|
|
655
|
+
communityCount: s.communities || 0,
|
|
656
|
+
processCount: s.processes || 0,
|
|
657
|
+
},
|
|
658
|
+
});
|
|
659
|
+
}
|
|
660
|
+
// Prune repos that no longer exist in the registry
|
|
661
|
+
for (const id of this.repos.keys()) {
|
|
662
|
+
if (!freshIds.has(id)) {
|
|
663
|
+
this.repos.delete(id);
|
|
664
|
+
this.contextCache.delete(id);
|
|
665
|
+
this.initializedRepos.delete(id);
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
/** Generate a stable repo ID from name + path (appends hash on collision) */
|
|
670
|
+
repoId(name, repoPath) {
|
|
671
|
+
const base = name.toLowerCase();
|
|
672
|
+
// Check for name collision with a different path
|
|
673
|
+
for (const [id, handle] of this.repos) {
|
|
674
|
+
if (id === base && handle.repoPath !== path.resolve(repoPath)) {
|
|
675
|
+
// Collision — use path hash
|
|
676
|
+
const hash = Buffer.from(repoPath).toString('base64url').slice(0, 6);
|
|
677
|
+
return `${base}-${hash}`;
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
return base;
|
|
681
|
+
}
|
|
682
|
+
// Repo Resolution
|
|
683
|
+
/**
|
|
684
|
+
* Resolve which repo to use (by name, path, or single-repo default)
|
|
685
|
+
* Re-reads the registry once on miss in case a new repo was indexed
|
|
686
|
+
*/
|
|
687
|
+
async resolveRepo(repoParam) {
|
|
688
|
+
const result = this.resolveRepoFromCache(repoParam);
|
|
689
|
+
if (result)
|
|
690
|
+
return result;
|
|
691
|
+
// Miss — refresh registry and try once more
|
|
692
|
+
await this.refreshRepos();
|
|
693
|
+
const retried = this.resolveRepoFromCache(repoParam);
|
|
694
|
+
if (retried)
|
|
695
|
+
return retried;
|
|
696
|
+
// Still no match — throw with helpful message
|
|
697
|
+
if (this.repos.size === 0) {
|
|
698
|
+
throw new Error('No indexed repositories. Run: code-mapper analyze');
|
|
699
|
+
}
|
|
700
|
+
if (repoParam) {
|
|
701
|
+
const names = [...this.repos.values()].map(h => h.name);
|
|
702
|
+
throw new Error(`Repository "${repoParam}" not found. Available: ${names.join(', ')}`);
|
|
703
|
+
}
|
|
704
|
+
const names = [...this.repos.values()].map(h => h.name);
|
|
705
|
+
throw new Error(`Multiple repositories indexed. Specify which one with the "repo" parameter. Available: ${names.join(', ')}`);
|
|
706
|
+
}
|
|
707
|
+
/** Try to resolve a repo from the in-memory cache, returns null on miss */
|
|
708
|
+
resolveRepoFromCache(repoParam) {
|
|
709
|
+
if (this.repos.size === 0)
|
|
710
|
+
return null;
|
|
711
|
+
if (repoParam) {
|
|
712
|
+
const paramLower = repoParam.toLowerCase();
|
|
713
|
+
// Match by id
|
|
714
|
+
if (this.repos.has(paramLower))
|
|
715
|
+
return this.repos.get(paramLower);
|
|
716
|
+
// Match by name (case-insensitive)
|
|
717
|
+
for (const handle of this.repos.values()) {
|
|
718
|
+
if (handle.name.toLowerCase() === paramLower)
|
|
719
|
+
return handle;
|
|
720
|
+
}
|
|
721
|
+
// Match by path (substring)
|
|
722
|
+
const resolved = path.resolve(repoParam);
|
|
723
|
+
for (const handle of this.repos.values()) {
|
|
724
|
+
if (handle.repoPath === resolved)
|
|
725
|
+
return handle;
|
|
726
|
+
}
|
|
727
|
+
// Match by partial name
|
|
728
|
+
for (const handle of this.repos.values()) {
|
|
729
|
+
if (handle.name.toLowerCase().includes(paramLower))
|
|
730
|
+
return handle;
|
|
731
|
+
}
|
|
732
|
+
return null;
|
|
733
|
+
}
|
|
734
|
+
if (this.repos.size === 1) {
|
|
735
|
+
return this.repos.values().next().value;
|
|
736
|
+
}
|
|
737
|
+
return null; // Multiple repos, no param — ambiguous
|
|
738
|
+
}
|
|
739
|
+
// Lazy LadybugDB Init
|
|
740
|
+
async ensureInitialized(repoId) {
|
|
741
|
+
// Always check the actual pool — the idle timer may have evicted the connection
|
|
742
|
+
if (this.initializedRepos.has(repoId) && isLbugReady(repoId))
|
|
743
|
+
return;
|
|
744
|
+
const handle = this.repos.get(repoId);
|
|
745
|
+
if (!handle)
|
|
746
|
+
throw new Error(`Unknown repo: ${repoId}`);
|
|
747
|
+
try {
|
|
748
|
+
await initLbug(repoId, handle.lbugPath);
|
|
749
|
+
this.initializedRepos.add(repoId);
|
|
750
|
+
}
|
|
751
|
+
catch (err) {
|
|
752
|
+
// If lock error, mark as not initialized so next call retries
|
|
753
|
+
this.initializedRepos.delete(repoId);
|
|
754
|
+
throw err;
|
|
755
|
+
}
|
|
756
|
+
}
|
|
757
|
+
// Public Getters
|
|
758
|
+
/** Get context for a specific repo (or the single repo if only one) */
|
|
759
|
+
getContext(repoId) {
|
|
760
|
+
if (repoId && this.contextCache.has(repoId)) {
|
|
761
|
+
return this.contextCache.get(repoId);
|
|
762
|
+
}
|
|
763
|
+
if (this.repos.size === 1) {
|
|
764
|
+
return this.contextCache.values().next().value ?? null;
|
|
765
|
+
}
|
|
766
|
+
return null;
|
|
767
|
+
}
|
|
768
|
+
/** List all registered repos, re-reading the registry to discover new ones */
|
|
769
|
+
async listRepos() {
|
|
770
|
+
await this.refreshRepos();
|
|
771
|
+
return [...this.repos.values()].map(h => ({
|
|
772
|
+
name: h.name,
|
|
773
|
+
path: h.repoPath,
|
|
774
|
+
indexedAt: h.indexedAt,
|
|
775
|
+
lastCommit: h.lastCommit,
|
|
776
|
+
stats: h.stats,
|
|
777
|
+
}));
|
|
778
|
+
}
|
|
779
|
+
// ── Compact text formatters — optimized for LLM token efficiency ────
|
|
780
|
+
/** Extract signature from content: the declaration line(s), not the full body */
|
|
781
|
+
extractSignature(content, name, _type) {
|
|
782
|
+
if (!content)
|
|
783
|
+
return name || '?';
|
|
784
|
+
const lines = content.split('\n');
|
|
785
|
+
const declKeywords = /^\s*(export\s+)?(default\s+)?(async\s+)?(function|class|interface|type|const|let|var|enum|struct|trait|impl|pub\s|fn\s|def\s|private|protected|public|static|abstract|override|readonly)/;
|
|
786
|
+
const namePattern = name ? new RegExp(`\\b${name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`) : null;
|
|
787
|
+
// Helper: collect multi-line signature (for arrow functions with params on multiple lines)
|
|
788
|
+
const collectSignature = (startIdx) => {
|
|
789
|
+
let sig = lines[startIdx].trim();
|
|
790
|
+
// If the line ends with '(' or has unmatched parens, collect continuation lines
|
|
791
|
+
let openParens = (sig.match(/\(/g) || []).length - (sig.match(/\)/g) || []).length;
|
|
792
|
+
let i = startIdx + 1;
|
|
793
|
+
while (openParens > 0 && i < lines.length && i < startIdx + 8) {
|
|
794
|
+
const next = lines[i].trim();
|
|
795
|
+
if (!next || next.startsWith('//') || next.startsWith('*')) {
|
|
796
|
+
i++;
|
|
797
|
+
continue;
|
|
798
|
+
}
|
|
799
|
+
sig += ' ' + next;
|
|
800
|
+
openParens += (next.match(/\(/g) || []).length - (next.match(/\)/g) || []).length;
|
|
801
|
+
// Stop at => or { after parens are balanced
|
|
802
|
+
if (openParens <= 0)
|
|
803
|
+
break;
|
|
804
|
+
i++;
|
|
805
|
+
}
|
|
806
|
+
// If we found the closing paren but there's a return type annotation, grab it
|
|
807
|
+
if (openParens <= 0 && i + 1 < lines.length) {
|
|
808
|
+
const nextLine = lines[i + 1]?.trim() || '';
|
|
809
|
+
if (nextLine.startsWith('):') || nextLine.startsWith('): ')) {
|
|
810
|
+
sig += ' ' + nextLine;
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
return sig.replace(/\s*[{=]>\s*\{?\s*$/, '').replace(/\s+/g, ' ').trim();
|
|
814
|
+
};
|
|
815
|
+
// Strategy 1: Find the line containing the symbol name AND a declaration keyword
|
|
816
|
+
for (let i = 0; i < lines.length; i++) {
|
|
817
|
+
const trimmed = lines[i].trim();
|
|
818
|
+
if (namePattern && namePattern.test(trimmed) && (declKeywords.test(trimmed) || trimmed.includes('(') || trimmed.includes(':'))) {
|
|
819
|
+
return collectSignature(i);
|
|
820
|
+
}
|
|
821
|
+
}
|
|
822
|
+
// Strategy 2: Find any line with a declaration keyword
|
|
823
|
+
for (let i = 0; i < lines.length; i++) {
|
|
824
|
+
const trimmed = lines[i].trim();
|
|
825
|
+
if (declKeywords.test(trimmed)) {
|
|
826
|
+
return collectSignature(i);
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
// Strategy 3: Find any line containing the name
|
|
830
|
+
if (namePattern) {
|
|
831
|
+
for (const line of lines) {
|
|
832
|
+
const trimmed = line.trim();
|
|
833
|
+
if (namePattern.test(trimmed) && trimmed.length > 5 && !trimmed.startsWith('//') && !trimmed.startsWith('*')) {
|
|
834
|
+
return trimmed.replace(/\s*\{?\s*$/, '').replace(/\s+/g, ' ');
|
|
835
|
+
}
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
return name || '?';
|
|
839
|
+
}
|
|
840
|
+
/** Short file path: strip common prefix if all paths share it */
|
|
841
|
+
shortPath(filePath) {
|
|
842
|
+
return filePath;
|
|
843
|
+
}
|
|
844
|
+
/** C2: Count how many initial steps two flows share (by symbol name) */
|
|
845
|
+
sharedPrefixLength(a, b) {
|
|
846
|
+
let i = 0;
|
|
847
|
+
while (i < a.length && i < b.length && (a[i].name === b[i].name))
|
|
848
|
+
i++;
|
|
849
|
+
return i;
|
|
850
|
+
}
|
|
851
|
+
/** D1: Generate a readable flow description from step names */
|
|
852
|
+
/** Generate readable flow description from step names.
|
|
853
|
+
* Uses step 2 (first dispatch point) as the middle — it's where the entry
|
|
854
|
+
* point specializes and is the most discriminating step in most flows. */
|
|
855
|
+
describeFlow(stepNames) {
|
|
856
|
+
if (stepNames.length === 0)
|
|
857
|
+
return '';
|
|
858
|
+
const humanize = (name) => name
|
|
859
|
+
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
|
860
|
+
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
|
|
861
|
+
.toLowerCase();
|
|
862
|
+
const first = humanize(stepNames[0]);
|
|
863
|
+
const last = humanize(stepNames[stepNames.length - 1]);
|
|
864
|
+
if (stepNames.length <= 2)
|
|
865
|
+
return `${first} → ${last}`;
|
|
866
|
+
// Step 2 is the dispatch point — where the flow specializes
|
|
867
|
+
const dispatch = humanize(stepNames[1]);
|
|
868
|
+
return `${first} → ${dispatch} → ${last}`;
|
|
869
|
+
}
|
|
870
|
+
formatQueryAsText(result) {
|
|
871
|
+
if (result.error)
|
|
872
|
+
return `Error: ${result.error}`;
|
|
873
|
+
const lines = [];
|
|
874
|
+
// Match quality signal
|
|
875
|
+
const allSymbols = [...(result.process_symbols || []), ...(result.definitions || [])];
|
|
876
|
+
const bothCount = allSymbols.filter((s) => s.matched_by === 'bm25 + semantic').length;
|
|
877
|
+
const total = allSymbols.length;
|
|
878
|
+
const quality = bothCount >= total * 0.5 ? 'strong' : bothCount >= total * 0.25 ? 'good' : 'partial';
|
|
879
|
+
lines.push(`match: ${quality} | ${bothCount}/${total} bm25+semantic`);
|
|
880
|
+
// Build flow and symbol data
|
|
881
|
+
const processes = result.processes || [];
|
|
882
|
+
const processSymbols = result.process_symbols || [];
|
|
883
|
+
const defs = result.definitions || [];
|
|
884
|
+
const flows = [];
|
|
885
|
+
if (processes.length > 0) {
|
|
886
|
+
for (const proc of processes) {
|
|
887
|
+
const syms = processSymbols
|
|
888
|
+
.filter((s) => s.process_id === proc.id)
|
|
889
|
+
.sort((a, b) => (a.step_index || 0) - (b.step_index || 0));
|
|
890
|
+
if (syms.length > 0)
|
|
891
|
+
flows.push({ proc, syms });
|
|
892
|
+
}
|
|
893
|
+
}
|
|
894
|
+
// Check if any flow has step names that overlap with query terms
|
|
895
|
+
const searchQuery = result._searchQuery || '';
|
|
896
|
+
const qTerms = searchQuery.toLowerCase().split(/\s+/).filter((t) => t.length >= 3);
|
|
897
|
+
const anyFlowRelevant = flows.some(flow => {
|
|
898
|
+
const words = flow.syms.map((s) => (s.name || '').toLowerCase()
|
|
899
|
+
.replace(/([a-z])([A-Z])/g, '$1 $2').replace(/[_\-]/g, ' ')).join(' ');
|
|
900
|
+
return qTerms.some(t => words.includes(t));
|
|
901
|
+
});
|
|
902
|
+
// Helper to render symbols section
|
|
903
|
+
const renderSymbols = () => {
|
|
904
|
+
if (defs.length === 0)
|
|
905
|
+
return;
|
|
906
|
+
lines.push('');
|
|
907
|
+
lines.push('## Symbols');
|
|
908
|
+
for (const d of defs) {
|
|
909
|
+
const sig = d.signature || d.name;
|
|
910
|
+
const mod = d.module ? ` [${d.module}]` : '';
|
|
911
|
+
const start = d.startLine || 0;
|
|
912
|
+
const end = d.endLine || 0;
|
|
913
|
+
const isSmall = end > 0 && start > 0 && (end - start) < 10;
|
|
914
|
+
lines.push(` ${sig} — ${d.type} @ ${this.shortPath(d.filePath)}:${start || '?'}${mod}`);
|
|
915
|
+
if (isSmall && d.content) {
|
|
916
|
+
const src = String(d.content).trim();
|
|
917
|
+
if (src.length < 500) {
|
|
918
|
+
for (const srcLine of src.split('\n')) {
|
|
919
|
+
lines.push(` ${srcLine}`);
|
|
920
|
+
}
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
}
|
|
924
|
+
};
|
|
925
|
+
// Helper to render flows section
|
|
926
|
+
const formatStep = (sym) => {
|
|
927
|
+
const sig = sym.signature || sym.name;
|
|
928
|
+
const mod = sym.module ? ` [${sym.module}]` : '';
|
|
929
|
+
const matched = sym.matched_by ? ' ←' : '';
|
|
930
|
+
return ` [${sym.step_index}] ${sig} — ${sym.type} @ ${this.shortPath(sym.filePath)}:${sym.startLine || '?'}${mod}${matched}`;
|
|
931
|
+
};
|
|
932
|
+
const renderFlows = (maxFlows) => {
|
|
933
|
+
const toShow = maxFlows ? flows.slice(0, maxFlows) : flows;
|
|
934
|
+
let i = 0;
|
|
935
|
+
while (i < toShow.length) {
|
|
936
|
+
const group = [toShow[i]];
|
|
937
|
+
for (let j = i + 1; j < toShow.length; j++) {
|
|
938
|
+
if (this.sharedPrefixLength(toShow[i].syms, toShow[j].syms) >= 3)
|
|
939
|
+
group.push(toShow[j]);
|
|
940
|
+
}
|
|
941
|
+
if (group.length >= 2) {
|
|
942
|
+
const prefixLen = this.sharedPrefixLength(group[0].syms, group[1].syms);
|
|
943
|
+
const prefix = group[0].syms.slice(0, prefixLen);
|
|
944
|
+
const desc = this.describeFlow(group[0].syms.map((s) => s.name));
|
|
945
|
+
lines.push('');
|
|
946
|
+
lines.push(`## ${group.length} flows: ${desc} (shared prefix: ${prefixLen} steps)`);
|
|
947
|
+
for (const sym of prefix) {
|
|
948
|
+
lines.push(formatStep(sym));
|
|
949
|
+
}
|
|
950
|
+
for (const flow of group) {
|
|
951
|
+
const suffix = flow.syms.slice(prefixLen);
|
|
952
|
+
if (suffix.length > 0) {
|
|
953
|
+
lines.push(` then → ${flow.proc.summary.split(' → ').pop()}`);
|
|
954
|
+
for (const sym of suffix) {
|
|
955
|
+
lines.push(formatStep(sym));
|
|
956
|
+
}
|
|
957
|
+
}
|
|
958
|
+
}
|
|
959
|
+
i += group.length;
|
|
960
|
+
}
|
|
961
|
+
else {
|
|
962
|
+
const flow = toShow[i];
|
|
963
|
+
const desc = this.describeFlow(flow.syms.map((s) => s.name));
|
|
964
|
+
lines.push('');
|
|
965
|
+
lines.push(`## ${flow.proc.summary}: ${desc} (${flow.proc.step_count} steps)`);
|
|
966
|
+
for (const sym of flow.syms) {
|
|
967
|
+
lines.push(formatStep(sym));
|
|
968
|
+
}
|
|
969
|
+
i++;
|
|
970
|
+
}
|
|
971
|
+
}
|
|
972
|
+
};
|
|
973
|
+
// When flows are relevant (step names match query), show flows first
|
|
974
|
+
// When flows are noise (no name overlap), show Symbols first, then max 2 flows
|
|
975
|
+
if (anyFlowRelevant) {
|
|
976
|
+
renderFlows();
|
|
977
|
+
renderSymbols();
|
|
978
|
+
}
|
|
979
|
+
else {
|
|
980
|
+
renderSymbols();
|
|
981
|
+
if (flows.length > 0) {
|
|
982
|
+
lines.push('');
|
|
983
|
+
lines.push(`## Related flows (${flows.length})`);
|
|
984
|
+
renderFlows(2);
|
|
985
|
+
}
|
|
986
|
+
}
|
|
987
|
+
return lines.join('\n');
|
|
988
|
+
}
|
|
989
|
+
formatContextAsText(result) {
|
|
990
|
+
if (result.error)
|
|
991
|
+
return `Error: ${result.error}`;
|
|
992
|
+
if (result.status === 'ambiguous') {
|
|
993
|
+
const lines = [`Ambiguous: ${result.candidates.length} matches for '${result.message}'`];
|
|
994
|
+
for (const c of result.candidates) {
|
|
995
|
+
lines.push(` ${c.name} — ${c.kind} @ ${c.filePath}:${c.line || '?'}`);
|
|
996
|
+
}
|
|
997
|
+
return lines.join('\n');
|
|
998
|
+
}
|
|
999
|
+
const sym = result.symbol;
|
|
1000
|
+
const lines = [];
|
|
1001
|
+
// Header with signature + C5 module
|
|
1002
|
+
const sig = sym.signature || sym.name;
|
|
1003
|
+
const modTag = sym.module ? ` [${sym.module}]` : '';
|
|
1004
|
+
lines.push(`## ${sym.name}`);
|
|
1005
|
+
lines.push(`${sym.kind || sym.type || 'Symbol'} @ ${sym.filePath}:${sym.startLine || '?'}-${sym.endLine || '?'}${modTag}`);
|
|
1006
|
+
if (sym.signature && sym.signature !== sym.name) {
|
|
1007
|
+
lines.push(`\`${sym.signature}\``);
|
|
1008
|
+
}
|
|
1009
|
+
// Incoming refs (F3: with line numbers, F4: with kind, Fix 6: error-handler/conditional annotations)
|
|
1010
|
+
const incoming = result.incoming || {};
|
|
1011
|
+
const incomingEntries = Object.entries(incoming);
|
|
1012
|
+
if (incomingEntries.length > 0) {
|
|
1013
|
+
lines.push('');
|
|
1014
|
+
for (const [relType, refs] of incomingEntries) {
|
|
1015
|
+
const refList = refs;
|
|
1016
|
+
lines.push(`### ${relType} (${refList.length} incoming)`);
|
|
1017
|
+
for (const ref of refList) {
|
|
1018
|
+
const loc = ref.startLine ? `${ref.filePath}:${ref.startLine}` : (ref.filePath || '?');
|
|
1019
|
+
const annotation = ref.reason === 'error-handler' ? ' (error path)' : ref.reason === 'conditional' ? ' (conditional)' : '';
|
|
1020
|
+
lines.push(` ${ref.name} — ${ref.kind || 'Symbol'} @ ${loc}${annotation}`);
|
|
1021
|
+
}
|
|
1022
|
+
}
|
|
1023
|
+
}
|
|
1024
|
+
// Outgoing refs — sorted by call site line number for execution order
|
|
1025
|
+
const outgoing = result.outgoing || {};
|
|
1026
|
+
const outgoingEntries = Object.entries(outgoing);
|
|
1027
|
+
if (outgoingEntries.length > 0) {
|
|
1028
|
+
lines.push('');
|
|
1029
|
+
for (const [relType, refs] of outgoingEntries) {
|
|
1030
|
+
const refList = refs.slice().sort((a, b) => (a.callLine || 9999) - (b.callLine || 9999));
|
|
1031
|
+
lines.push(`### ${relType} (${refList.length} outgoing)`);
|
|
1032
|
+
for (const ref of refList) {
|
|
1033
|
+
const loc = ref.startLine ? `${ref.filePath}:${ref.startLine}` : (ref.filePath || '?');
|
|
1034
|
+
const annotation = ref.reason === 'error-handler' ? ' (error path)' : ref.reason === 'conditional' ? ' (conditional)' : '';
|
|
1035
|
+
const linePrefix = ref.callLine ? `:${ref.callLine}` : '';
|
|
1036
|
+
lines.push(` ${linePrefix ? `[${linePrefix}] ` : ''}${ref.name} — ${ref.kind || 'Symbol'} @ ${loc}${annotation}`);
|
|
1037
|
+
}
|
|
1038
|
+
}
|
|
1039
|
+
}
|
|
1040
|
+
// Process participation
|
|
1041
|
+
const processes = result.processes || [];
|
|
1042
|
+
if (processes.length > 0) {
|
|
1043
|
+
lines.push('');
|
|
1044
|
+
lines.push(`### Flows (${processes.length})`);
|
|
1045
|
+
for (const p of processes) {
|
|
1046
|
+
lines.push(` ${p.name} (step ${p.step_index}/${p.step_count})`);
|
|
1047
|
+
}
|
|
1048
|
+
}
|
|
1049
|
+
return lines.join('\n');
|
|
1050
|
+
}
|
|
1051
|
+
formatImpactAsText(result) {
|
|
1052
|
+
if (result.error)
|
|
1053
|
+
return `Error: ${result.error}`;
|
|
1054
|
+
const lines = [];
|
|
1055
|
+
const t = result.target;
|
|
1056
|
+
lines.push(`## Impact: ${t.name} (${result.direction})`);
|
|
1057
|
+
lines.push(`Risk: ${result.risk} | ${result.impactedCount} affected | ${result.summary.processes_affected} processes | ${result.summary.modules_affected} modules${result.truncated ? ' | TRUNCATED' : ''}`);
|
|
1058
|
+
// By depth (F4: use actual type, F10: show non-CALLS relation types)
|
|
1059
|
+
const byDepth = result.byDepth || {};
|
|
1060
|
+
const depthLabels = { 1: 'WILL BREAK', 2: 'LIKELY AFFECTED', 3: 'MAY NEED TESTING' };
|
|
1061
|
+
for (const [depth, items] of Object.entries(byDepth)) {
|
|
1062
|
+
const d = Number(depth);
|
|
1063
|
+
const label = depthLabels[d] || `depth ${d}`;
|
|
1064
|
+
const itemList = items;
|
|
1065
|
+
lines.push('');
|
|
1066
|
+
lines.push(`### d=${d} ${label} (${itemList.length})`);
|
|
1067
|
+
for (const item of itemList) {
|
|
1068
|
+
// F10: show relationship type when not CALLS (the interesting cases)
|
|
1069
|
+
const relInfo = item.relationType && item.relationType !== 'CALLS' ? ` via ${item.relationType}` : '';
|
|
1070
|
+
const conf = item.confidence != null && item.confidence < 1 ? ` (${item.confidence})` : '';
|
|
1071
|
+
lines.push(` ${item.name} — ${item.type || 'Symbol'} @ ${this.shortPath(item.filePath)}${relInfo}${conf}`);
|
|
1072
|
+
}
|
|
1073
|
+
}
|
|
1074
|
+
// F5: Cap affected processes at 7, group overflow
|
|
1075
|
+
const procs = result.affected_processes || [];
|
|
1076
|
+
if (procs.length > 0) {
|
|
1077
|
+
const MAX_SHOWN_FLOWS = 7;
|
|
1078
|
+
const shown = procs.slice(0, MAX_SHOWN_FLOWS);
|
|
1079
|
+
const overflow = procs.length - MAX_SHOWN_FLOWS;
|
|
1080
|
+
lines.push('');
|
|
1081
|
+
lines.push(`### Affected flows (${procs.length})`);
|
|
1082
|
+
for (const p of shown) {
|
|
1083
|
+
lines.push(` ${p.name} (${p.hits} hits, ${p.step_count} steps)`);
|
|
1084
|
+
}
|
|
1085
|
+
if (overflow > 0) {
|
|
1086
|
+
lines.push(` ... +${overflow} more flows`);
|
|
1087
|
+
}
|
|
1088
|
+
}
|
|
1089
|
+
// Affected modules
|
|
1090
|
+
const mods = result.affected_modules || [];
|
|
1091
|
+
if (mods.length > 0) {
|
|
1092
|
+
lines.push('');
|
|
1093
|
+
lines.push(`### Modules: ${mods.map((m) => `${m.name} (${m.hits} ${m.impact})`).join(' | ')}`);
|
|
1094
|
+
}
|
|
1095
|
+
return lines.join('\n');
|
|
1096
|
+
}
|
|
1097
|
+
formatDetectChangesAsText(result) {
|
|
1098
|
+
if (result.error)
|
|
1099
|
+
return `Error: ${result.error}`;
|
|
1100
|
+
const summary = result.summary;
|
|
1101
|
+
if (summary.changed_count === 0)
|
|
1102
|
+
return 'No changes detected.';
|
|
1103
|
+
const lines = [];
|
|
1104
|
+
lines.push(`## Changes (${summary.risk_level.toUpperCase()})`);
|
|
1105
|
+
lines.push(`${summary.changed_files} files | ${summary.changed_count} symbols | ${summary.affected_count} processes`);
|
|
1106
|
+
// Fix 7: Show interface change risk summary at the top
|
|
1107
|
+
const interfaceChanges = (result.changed_symbols || []).filter((s) => s.change_type === 'interface changed');
|
|
1108
|
+
if (interfaceChanges.length > 0) {
|
|
1109
|
+
lines.push(`\u26A0 ${interfaceChanges.length} interface changes: ${interfaceChanges.slice(0, 5).map((s) => s.name).join(', ')}${interfaceChanges.length > 5 ? ` +${interfaceChanges.length - 5} more` : ''}`);
|
|
1110
|
+
}
|
|
1111
|
+
// Group changed symbols by file
|
|
1112
|
+
const byFile = new Map();
|
|
1113
|
+
for (const sym of (result.changed_symbols || [])) {
|
|
1114
|
+
const fp = sym.filePath || '';
|
|
1115
|
+
if (!byFile.has(fp))
|
|
1116
|
+
byFile.set(fp, []);
|
|
1117
|
+
byFile.get(fp).push(sym);
|
|
1118
|
+
}
|
|
1119
|
+
// Show files sorted by symbol count, F3: with diff stats
|
|
1120
|
+
const diffStats = result.diff_stats || {};
|
|
1121
|
+
const sortedFiles = Array.from(byFile.entries())
|
|
1122
|
+
.sort((a, b) => b[1].length - a[1].length)
|
|
1123
|
+
.slice(0, 30);
|
|
1124
|
+
if (sortedFiles.length > 0) {
|
|
1125
|
+
lines.push('');
|
|
1126
|
+
lines.push(`### Changed files (${byFile.size})`);
|
|
1127
|
+
for (const [fp, syms] of sortedFiles) {
|
|
1128
|
+
const names = syms.slice(0, 5).map((s) => s.name).join(', ');
|
|
1129
|
+
const more = syms.length > 5 ? ` +${syms.length - 5} more` : '';
|
|
1130
|
+
const stat = diffStats[fp] || diffStats[fp.split('/').slice(-1)[0]] || '';
|
|
1131
|
+
const statSuffix = stat ? ` (${stat})` : '';
|
|
1132
|
+
lines.push(` ${this.shortPath(fp)}${statSuffix} — ${syms.length} symbols: ${names}${more}`);
|
|
1133
|
+
}
|
|
1134
|
+
}
|
|
1135
|
+
// Affected processes
|
|
1136
|
+
const procs = result.affected_processes || [];
|
|
1137
|
+
if (procs.length > 0) {
|
|
1138
|
+
const topProcs = procs.sort((a, b) => b.changed_steps.length - a.changed_steps.length).slice(0, 15);
|
|
1139
|
+
lines.push('');
|
|
1140
|
+
lines.push(`### Affected flows (${procs.length})`);
|
|
1141
|
+
for (const p of topProcs) {
|
|
1142
|
+
const steps = p.changed_steps.map((s) => s.symbol).join(', ');
|
|
1143
|
+
lines.push(` ${p.name} — ${p.changed_steps.length} changed: ${steps}`);
|
|
1144
|
+
}
|
|
1145
|
+
}
|
|
1146
|
+
return lines.join('\n');
|
|
1147
|
+
}
|
|
1148
|
+
// ── Staleness check ────────────────────────────────────────────────
|
|
1149
|
+
/** C3: Check if index is behind HEAD and return a warning prefix */
|
|
1150
|
+
getStalenessWarning(repo) {
|
|
1151
|
+
try {
|
|
1152
|
+
const { checkStaleness } = require('../staleness.js');
|
|
1153
|
+
const info = checkStaleness(repo.repoPath, repo.lastCommit);
|
|
1154
|
+
if (info.isStale) {
|
|
1155
|
+
return `⚠ index ${info.commitsBehind} commit${info.commitsBehind > 1 ? 's' : ''} behind HEAD\n\n`;
|
|
1156
|
+
}
|
|
1157
|
+
}
|
|
1158
|
+
catch { }
|
|
1159
|
+
return '';
|
|
1160
|
+
}
|
|
1161
|
+
// ── Tool Dispatch ─────────────────────────────────────────────────
|
|
1162
|
+
async callTool(method, params) {
|
|
1163
|
+
if (method === 'list_repos') {
|
|
1164
|
+
return this.listRepos();
|
|
1165
|
+
}
|
|
1166
|
+
// Resolve repo from optional param (re-reads registry on miss)
|
|
1167
|
+
const repo = await this.resolveRepo(params?.repo);
|
|
1168
|
+
await this.ensureFresh(repo);
|
|
1169
|
+
// C3: Prepend staleness warning to all tool responses
|
|
1170
|
+
const staleWarning = this.getStalenessWarning(repo);
|
|
1171
|
+
switch (method) {
|
|
1172
|
+
case 'query':
|
|
1173
|
+
return staleWarning + this.formatQueryAsText(await this.query(repo, params));
|
|
1174
|
+
case 'cypher': {
|
|
1175
|
+
const raw = await this.cypher(repo, params);
|
|
1176
|
+
return this.formatCypherAsMarkdown(raw);
|
|
1177
|
+
}
|
|
1178
|
+
case 'context': {
|
|
1179
|
+
// F5: Bulk context — if names array provided, fetch context for each
|
|
1180
|
+
if (params?.names && Array.isArray(params.names) && params.names.length > 0) {
|
|
1181
|
+
const results = [];
|
|
1182
|
+
for (const name of params.names.slice(0, 5)) { // cap at 5 to prevent abuse
|
|
1183
|
+
const result = await this.context(repo, { ...params, name, names: undefined });
|
|
1184
|
+
results.push(this.formatContextAsText(result));
|
|
1185
|
+
}
|
|
1186
|
+
return staleWarning + results.join('\n\n---\n\n');
|
|
1187
|
+
}
|
|
1188
|
+
return staleWarning + this.formatContextAsText(await this.context(repo, params));
|
|
1189
|
+
}
|
|
1190
|
+
case 'impact':
|
|
1191
|
+
return staleWarning + this.formatImpactAsText(await this.impact(repo, params));
|
|
1192
|
+
case 'detect_changes':
|
|
1193
|
+
return staleWarning + this.formatDetectChangesAsText(await this.detectChanges(repo, params));
|
|
1194
|
+
case 'rename':
|
|
1195
|
+
return this.rename(repo, params);
|
|
1196
|
+
// Legacy aliases for backwards compatibility
|
|
1197
|
+
case 'search':
|
|
1198
|
+
return staleWarning + this.formatQueryAsText(await this.query(repo, params));
|
|
1199
|
+
case 'explore':
|
|
1200
|
+
return staleWarning + this.formatContextAsText(await this.context(repo, { name: params?.name, ...params }));
|
|
1201
|
+
case 'overview':
|
|
1202
|
+
return this.overview(repo, params);
|
|
1203
|
+
default:
|
|
1204
|
+
throw new Error(`Unknown tool: ${method}`);
|
|
1205
|
+
}
|
|
1206
|
+
}
|
|
1207
|
+
// Tool Implementations
|
|
1208
|
+
/**
|
|
1209
|
+
* Query tool: process-grouped search
|
|
1210
|
+
* Hybrid BM25+semantic search, trace to processes, rank by relevance + cohesion
|
|
1211
|
+
*/
|
|
1212
|
+
async query(repo, params) {
|
|
1213
|
+
if (!params.query?.trim()) {
|
|
1214
|
+
return { error: 'query parameter is required and cannot be empty.' };
|
|
1215
|
+
}
|
|
1216
|
+
await this.ensureInitialized(repo.id);
|
|
1217
|
+
const searchQuery = params.query.trim();
|
|
1218
|
+
// Detect if query is a specific identifier vs natural language
|
|
1219
|
+
// Identifiers: camelCase, PascalCase, snake_case, UPPER_CASE, or single words without spaces
|
|
1220
|
+
const isIdentifierQuery = /^[a-zA-Z_$][a-zA-Z0-9_$]*$/.test(searchQuery) || /^[A-Z_][A-Z0-9_]*$/.test(searchQuery);
|
|
1221
|
+
// Fix 8: Special case for "overview" / "architecture" queries
|
|
1222
|
+
if (searchQuery.toLowerCase() === 'overview' || searchQuery.toLowerCase() === 'architecture') {
|
|
1223
|
+
// Return top clusters with their key symbols instead of search results
|
|
1224
|
+
const clusterResult = await this.queryClusters(repo.name, 10);
|
|
1225
|
+
const lines = [`## Codebase Overview: ${repo.name}`];
|
|
1226
|
+
for (const cluster of (clusterResult.clusters || []).slice(0, 8)) {
|
|
1227
|
+
lines.push(`\n### ${cluster.heuristicLabel || cluster.label} (${cluster.symbolCount} symbols, cohesion: ${(cluster.cohesion || 0).toFixed(2)})`);
|
|
1228
|
+
// Fetch top 5 symbols from this cluster
|
|
1229
|
+
try {
|
|
1230
|
+
const members = await executeQuery(repo.id, `
|
|
1231
|
+
MATCH (n)-[:CodeRelation {type: 'MEMBER_OF'}]->(c:Community {heuristicLabel: '${(cluster.heuristicLabel || cluster.label || '').replace(/'/g, "''")}'})
|
|
1232
|
+
RETURN n.name AS name, labels(n) AS type, n.filePath AS filePath
|
|
1233
|
+
ORDER BY n.startLine
|
|
1234
|
+
LIMIT 5
|
|
1235
|
+
`);
|
|
1236
|
+
for (const m of members) {
|
|
1237
|
+
lines.push(` ${m.name || m[0]} — ${m.type || m[1]} @ ${m.filePath || m[2]}`);
|
|
1238
|
+
}
|
|
1239
|
+
}
|
|
1240
|
+
catch { }
|
|
1241
|
+
}
|
|
1242
|
+
return lines.join('\n');
|
|
1243
|
+
}
|
|
1244
|
+
const { DEFAULT_PROCESS_LIMIT, DEFAULT_MAX_SYMBOLS_PER_PROCESS, DEFAULT_MAX_DEFINITIONS, COHESION_WEIGHT, MAX_BATCH_CANDIDATES, } = await import('../../core/search/types.js');
|
|
1245
|
+
const { mergeWithRRF } = await import('../../core/search/hybrid-search.js');
|
|
1246
|
+
const processLimit = params.limit || DEFAULT_PROCESS_LIMIT;
|
|
1247
|
+
const maxSymbolsPerProcess = params.max_symbols || DEFAULT_MAX_SYMBOLS_PER_PROCESS;
|
|
1248
|
+
const includeContent = params.include_content ?? false;
|
|
1249
|
+
// Enrich semantic query with task_context/goal for better embeddings
|
|
1250
|
+
const semanticQuery = [searchQuery, params.goal, params.task_context]
|
|
1251
|
+
.filter(Boolean).join(' — ');
|
|
1252
|
+
// Step 1: Hybrid search (BM25 + semantic in parallel)
|
|
1253
|
+
// BM25 uses raw query (keyword matching); semantic uses enriched query (better embedding)
|
|
1254
|
+
const searchLimit = processLimit * maxSymbolsPerProcess;
|
|
1255
|
+
const [bm25Results, semanticResults] = await Promise.all([
|
|
1256
|
+
this.bm25Search(repo, searchQuery, searchLimit),
|
|
1257
|
+
this.semanticSearch(repo, semanticQuery, searchLimit),
|
|
1258
|
+
]);
|
|
1259
|
+
// Step 2: Weighted RRF merge (single implementation — no duplication)
|
|
1260
|
+
// BM25 now returns symbol-level results with nodeId, name, type
|
|
1261
|
+
const bm25ForRRF = bm25Results.map((r, i) => ({
|
|
1262
|
+
nodeId: String(r.nodeId ?? ''),
|
|
1263
|
+
name: String(r.name ?? ''),
|
|
1264
|
+
type: String(r.type ?? 'File'),
|
|
1265
|
+
filePath: String(r.filePath ?? ''),
|
|
1266
|
+
score: Number(r.bm25Score ?? 0),
|
|
1267
|
+
rank: i + 1,
|
|
1268
|
+
startLine: r.startLine,
|
|
1269
|
+
endLine: r.endLine,
|
|
1270
|
+
}));
|
|
1271
|
+
const semanticForRRF = semanticResults.map((r) => ({
|
|
1272
|
+
nodeId: String(r.nodeId ?? ''), name: String(r.name ?? ''), label: String(r.type ?? ''),
|
|
1273
|
+
filePath: String(r.filePath ?? ''), distance: Number(r.distance ?? 1),
|
|
1274
|
+
startLine: r.startLine, endLine: r.endLine,
|
|
1275
|
+
}));
|
|
1276
|
+
const rrfMerged = mergeWithRRF(bm25ForRRF, semanticForRRF, { limit: searchLimit });
|
|
1277
|
+
// Build lookup from original search data (keyed by both nodeId and filePath for cross-referencing)
|
|
1278
|
+
const searchDataMap = new Map();
|
|
1279
|
+
for (const r of bm25Results) {
|
|
1280
|
+
if (r.nodeId)
|
|
1281
|
+
searchDataMap.set(r.nodeId, r);
|
|
1282
|
+
if (r.filePath && !searchDataMap.has(r.filePath))
|
|
1283
|
+
searchDataMap.set(r.filePath, r);
|
|
1284
|
+
}
|
|
1285
|
+
for (const r of semanticResults) {
|
|
1286
|
+
const key = r.nodeId || r.filePath;
|
|
1287
|
+
if (!searchDataMap.has(key))
|
|
1288
|
+
searchDataMap.set(key, r);
|
|
1289
|
+
}
|
|
1290
|
+
let merged = rrfMerged.map(rrf => ({
|
|
1291
|
+
score: rrf.score,
|
|
1292
|
+
data: searchDataMap.get(rrf.nodeId ?? '') ?? searchDataMap.get(rrf.filePath) ?? {
|
|
1293
|
+
name: rrf.name ?? rrf.filePath.split('/').pop(), type: rrf.label ?? 'File',
|
|
1294
|
+
filePath: rrf.filePath, nodeId: rrf.nodeId,
|
|
1295
|
+
},
|
|
1296
|
+
}));
|
|
1297
|
+
// Filter noise: remove test files, config files, docs from results by default
|
|
1298
|
+
merged = merged.filter(item => {
|
|
1299
|
+
const fp = String(item.data.filePath ?? '').toLowerCase();
|
|
1300
|
+
if (isTestFilePath(fp))
|
|
1301
|
+
return false;
|
|
1302
|
+
if (fp.endsWith('.json') || fp.endsWith('.md') || fp.endsWith('.yml') || fp.endsWith('.yaml'))
|
|
1303
|
+
return false;
|
|
1304
|
+
if (fp.includes('/skills/') || fp.includes('/fixtures/') || fp.includes('/eval/'))
|
|
1305
|
+
return false;
|
|
1306
|
+
return true;
|
|
1307
|
+
});
|
|
1308
|
+
// F1 (new): File path filter — restrict results to a specific file
|
|
1309
|
+
if (params.file_path) {
|
|
1310
|
+
const fpFilter = params.file_path.toLowerCase();
|
|
1311
|
+
merged = merged.filter(item => String(item.data.filePath ?? '').toLowerCase().includes(fpFilter));
|
|
1312
|
+
}
|
|
1313
|
+
// Exact name match boost: if a symbol name matches the query exactly, pin it high
|
|
1314
|
+
const queryLower = searchQuery.toLowerCase();
|
|
1315
|
+
for (const item of merged) {
|
|
1316
|
+
const name = String(item.data.name ?? '').toLowerCase();
|
|
1317
|
+
if (name === queryLower || name.includes(queryLower)) {
|
|
1318
|
+
item.score *= 10; // strong boost for exact/substring match
|
|
1319
|
+
}
|
|
1320
|
+
}
|
|
1321
|
+
// F7: Boost symbols whose name matches terms from goal/task_context
|
|
1322
|
+
// e.g., goal="find timeout logic" boosts symbol named "withTimeout"
|
|
1323
|
+
const contextTerms = [params.goal, params.task_context]
|
|
1324
|
+
.filter(Boolean)
|
|
1325
|
+
.join(' ')
|
|
1326
|
+
.toLowerCase()
|
|
1327
|
+
.split(/\s+/)
|
|
1328
|
+
.filter(t => t.length >= 3 && !['the', 'and', 'for', 'how', 'does', 'what', 'find', 'understand', 'where', 'when', 'with', 'from', 'that', 'this',
|
|
1329
|
+
'extract', 'search', 'get', 'set', 'create', 'add', 'remove', 'delete', 'update', 'make', 'build',
|
|
1330
|
+
'use', 'using', 'into', 'code', 'function', 'method', 'class', 'file', 'module', 'adding', 'could',
|
|
1331
|
+
'logic', 'existing', 'current', 'new', 'all', 'any', 'should', 'want', 'need'].includes(t));
|
|
1332
|
+
if (contextTerms.length > 0) {
|
|
1333
|
+
for (const item of merged) {
|
|
1334
|
+
const name = String(item.data.name ?? '').toLowerCase();
|
|
1335
|
+
for (const term of contextTerms) {
|
|
1336
|
+
if (name.includes(term)) {
|
|
1337
|
+
item.score *= 5;
|
|
1338
|
+
break;
|
|
1339
|
+
}
|
|
1340
|
+
}
|
|
1341
|
+
}
|
|
1342
|
+
}
|
|
1343
|
+
merged.sort((a, b) => b.score - a.score);
|
|
1344
|
+
// Step 2b: Cross-encoder re-ranking (optional, improves top-k precision)
|
|
1345
|
+
// Uses enriched query (with goal/context) for better reranker scoring
|
|
1346
|
+
if (merged.length > 1) {
|
|
1347
|
+
try {
|
|
1348
|
+
const rerankCandidates = merged.filter(m => m.data.nodeId).slice(0, 30);
|
|
1349
|
+
if (rerankCandidates.length > 1) {
|
|
1350
|
+
const nodeIdList = rerankCandidates.map(c => `'${String(c.data.nodeId).replace(/'/g, "''")}'`).join(', ');
|
|
1351
|
+
const snippetRows = await executeQuery(repo.id, `
|
|
1352
|
+
MATCH (n) WHERE n.id IN [${nodeIdList}]
|
|
1353
|
+
RETURN n.id AS nodeId, COALESCE(n.content, n.name) AS snippet
|
|
1354
|
+
`);
|
|
1355
|
+
const snippetMap = new Map();
|
|
1356
|
+
for (const row of snippetRows) {
|
|
1357
|
+
snippetMap.set(String(row.nodeId ?? row[0]), String(row.snippet ?? row[1] ?? ''));
|
|
1358
|
+
}
|
|
1359
|
+
const passages = rerankCandidates
|
|
1360
|
+
.map(c => ({ id: String(c.data.nodeId), text: snippetMap.get(c.data.nodeId) ?? String(c.data.name ?? '') }))
|
|
1361
|
+
.filter(p => p.text.length > 0);
|
|
1362
|
+
if (passages.length > 1) {
|
|
1363
|
+
const { rerank } = await import('../../core/search/reranker.js');
|
|
1364
|
+
// Use enriched query for reranker — goal/context improves cross-encoder precision
|
|
1365
|
+
const reranked = await rerank(semanticQuery, passages);
|
|
1366
|
+
const rerankedIds = new Set(reranked.map(r => r.id));
|
|
1367
|
+
const nonCandidates = merged.filter(m => !m.data.nodeId || !rerankedIds.has(m.data.nodeId));
|
|
1368
|
+
// C4: Simplified reranker blending — normalize both to [0,1], blend with alpha
|
|
1369
|
+
const maxRRF = Math.max(...rerankCandidates.map(c => c.score), 1e-9);
|
|
1370
|
+
const maxRerank = Math.max(...reranked.map(r => r.score), 1e-9);
|
|
1371
|
+
const minRerank = Math.min(...reranked.map(r => r.score), 0);
|
|
1372
|
+
const rerankRange = maxRerank - minRerank || 1;
|
|
1373
|
+
const rerankScoreMap = new Map(reranked.map(r => [
|
|
1374
|
+
r.id,
|
|
1375
|
+
(r.score - minRerank) / rerankRange, // normalized 0-1
|
|
1376
|
+
]));
|
|
1377
|
+
const RERANKER_ALPHA = 0.6; // 60% RRF, 40% reranker
|
|
1378
|
+
const rerankedMerged = rerankCandidates
|
|
1379
|
+
.map(item => {
|
|
1380
|
+
const rrfNorm = item.score / maxRRF; // normalized 0-1
|
|
1381
|
+
const rerankNorm = rerankScoreMap.get(item.data.nodeId) ?? 0;
|
|
1382
|
+
return { ...item, score: RERANKER_ALPHA * rrfNorm + (1 - RERANKER_ALPHA) * rerankNorm };
|
|
1383
|
+
})
|
|
1384
|
+
.sort((a, b) => b.score - a.score);
|
|
1385
|
+
merged = [...rerankedMerged, ...nonCandidates];
|
|
1386
|
+
}
|
|
1387
|
+
}
|
|
1388
|
+
}
|
|
1389
|
+
catch {
|
|
1390
|
+
// Re-ranker is optional — fall back to RRF order silently
|
|
1391
|
+
}
|
|
1392
|
+
}
|
|
1393
|
+
// Build matched_by lookup from RRF sources
|
|
1394
|
+
const matchedByMap = new Map();
|
|
1395
|
+
for (const rrf of rrfMerged) {
|
|
1396
|
+
const key = rrf.nodeId || rrf.filePath;
|
|
1397
|
+
matchedByMap.set(key, rrf.sources.join(' + '));
|
|
1398
|
+
}
|
|
1399
|
+
// Cap candidates before expensive batch lookups
|
|
1400
|
+
merged = merged.slice(0, MAX_BATCH_CANDIDATES);
|
|
1401
|
+
// Step 3: BATCHED process + cluster + content lookup (1-3 queries total, not N×3)
|
|
1402
|
+
const symbolsWithNodeId = [];
|
|
1403
|
+
const definitions = [];
|
|
1404
|
+
for (const item of merged) {
|
|
1405
|
+
const matchedBy = matchedByMap.get(item.data.nodeId ?? '') || matchedByMap.get(item.data.filePath ?? '') || 'bm25';
|
|
1406
|
+
const type = String(item.data.type ?? 'File');
|
|
1407
|
+
if (!item.data.nodeId || type === 'File') {
|
|
1408
|
+
if (type !== 'File') {
|
|
1409
|
+
definitions.push({ name: item.data.name, type, filePath: item.data.filePath, matched_by: matchedBy });
|
|
1410
|
+
}
|
|
1411
|
+
else if (isIdentifierQuery) {
|
|
1412
|
+
// Include File results for identifier queries (e.g. "FTS_TABLES", "TYPE_WEIGHTS")
|
|
1413
|
+
// These help find constants, config, and non-function symbols
|
|
1414
|
+
definitions.push({ name: item.data.name, type: 'File', filePath: item.data.filePath, matched_by: matchedBy });
|
|
1415
|
+
}
|
|
1416
|
+
// Natural language queries: File-type results are noise, exclude them
|
|
1417
|
+
}
|
|
1418
|
+
else {
|
|
1419
|
+
symbolsWithNodeId.push({ nodeId: item.data.nodeId, score: item.score, data: { ...item.data, matched_by: matchedBy } });
|
|
1420
|
+
}
|
|
1421
|
+
}
|
|
1422
|
+
const processMap = new Map();
|
|
1423
|
+
// Hoisted for F1 full-flow-steps access outside the if block
|
|
1424
|
+
let signatureMap = new Map();
|
|
1425
|
+
let clusterByNode = new Map();
|
|
1426
|
+
if (symbolsWithNodeId.length > 0) {
|
|
1427
|
+
const nodeIdList = symbolsWithNodeId.map(s => `'${String(s.nodeId).replace(/'/g, "''")}'`).join(', ');
|
|
1428
|
+
// Batch process lookup
|
|
1429
|
+
let processRows = [];
|
|
1430
|
+
try {
|
|
1431
|
+
processRows = await executeQuery(repo.id, `
|
|
1432
|
+
MATCH (n)-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p:Process)
|
|
1433
|
+
WHERE n.id IN [${nodeIdList}]
|
|
1434
|
+
RETURN n.id AS nodeId, p.id AS pid, p.label AS label, p.heuristicLabel AS heuristicLabel,
|
|
1435
|
+
p.processType AS processType, p.stepCount AS stepCount, r.step AS step
|
|
1436
|
+
`);
|
|
1437
|
+
}
|
|
1438
|
+
catch (e) {
|
|
1439
|
+
logQueryError('query:batch-process-lookup', e);
|
|
1440
|
+
}
|
|
1441
|
+
// Batch cluster lookup
|
|
1442
|
+
let clusterRows = [];
|
|
1443
|
+
try {
|
|
1444
|
+
clusterRows = await executeQuery(repo.id, `
|
|
1445
|
+
MATCH (n)-[:CodeRelation {type: 'MEMBER_OF'}]->(c:Community)
|
|
1446
|
+
WHERE n.id IN [${nodeIdList}]
|
|
1447
|
+
RETURN n.id AS nodeId, c.cohesion AS cohesion, c.heuristicLabel AS module
|
|
1448
|
+
`);
|
|
1449
|
+
}
|
|
1450
|
+
catch (e) {
|
|
1451
|
+
logQueryError('query:batch-cluster-info', e);
|
|
1452
|
+
}
|
|
1453
|
+
// Always fetch content for signature extraction (eliminates follow-up Read calls)
|
|
1454
|
+
const contentMap = new Map();
|
|
1455
|
+
// signatureMap hoisted above for F1 access
|
|
1456
|
+
try {
|
|
1457
|
+
const contentRows = await executeQuery(repo.id, `
|
|
1458
|
+
MATCH (n) WHERE n.id IN [${nodeIdList}] RETURN n.id AS nodeId, n.content AS content
|
|
1459
|
+
`);
|
|
1460
|
+
// Build a name lookup for signature extraction
|
|
1461
|
+
const nodeNameMap = new Map();
|
|
1462
|
+
for (const s of symbolsWithNodeId) {
|
|
1463
|
+
nodeNameMap.set(s.nodeId, String(s.data.name ?? ''));
|
|
1464
|
+
}
|
|
1465
|
+
for (const row of contentRows) {
|
|
1466
|
+
const nid = String(row.nodeId ?? row[0]);
|
|
1467
|
+
const cnt = row.content ?? row[1];
|
|
1468
|
+
if (cnt) {
|
|
1469
|
+
const full = String(cnt);
|
|
1470
|
+
// Extract signature: the declaration line only
|
|
1471
|
+
signatureMap.set(nid, this.extractSignature(full, nodeNameMap.get(nid) || '', ''));
|
|
1472
|
+
if (includeContent) {
|
|
1473
|
+
const lines = full.split('\n');
|
|
1474
|
+
let snippet = '';
|
|
1475
|
+
for (const line of lines) {
|
|
1476
|
+
snippet += (snippet ? '\n' : '') + line;
|
|
1477
|
+
if (snippet.length > 200 || line.includes('{') || line.includes('=>'))
|
|
1478
|
+
break;
|
|
1479
|
+
}
|
|
1480
|
+
contentMap.set(nid, snippet);
|
|
1481
|
+
}
|
|
1482
|
+
}
|
|
1483
|
+
}
|
|
1484
|
+
}
|
|
1485
|
+
catch (e) {
|
|
1486
|
+
logQueryError('query:batch-content-fetch', e);
|
|
1487
|
+
}
|
|
1488
|
+
// Index batched results by nodeId
|
|
1489
|
+
const processRowsByNode = new Map();
|
|
1490
|
+
for (const row of processRows) {
|
|
1491
|
+
const nid = String(row.nodeId ?? row[0]);
|
|
1492
|
+
if (!processRowsByNode.has(nid))
|
|
1493
|
+
processRowsByNode.set(nid, []);
|
|
1494
|
+
processRowsByNode.get(nid).push(row);
|
|
1495
|
+
}
|
|
1496
|
+
// clusterByNode hoisted above for F1 access
|
|
1497
|
+
for (const row of clusterRows) {
|
|
1498
|
+
const nid = String(row.nodeId ?? row[0]);
|
|
1499
|
+
if (!clusterByNode.has(nid)) {
|
|
1500
|
+
clusterByNode.set(nid, { cohesion: Number(row.cohesion ?? row[1] ?? 0), module: String(row.module ?? row[2] ?? '') });
|
|
1501
|
+
}
|
|
1502
|
+
}
|
|
1503
|
+
// Assemble using batched data
|
|
1504
|
+
for (const symInfo of symbolsWithNodeId) {
|
|
1505
|
+
const cluster = clusterByNode.get(symInfo.nodeId);
|
|
1506
|
+
const module = cluster?.module || undefined;
|
|
1507
|
+
const content = contentMap.get(symInfo.nodeId);
|
|
1508
|
+
const signature = signatureMap.get(symInfo.nodeId) || symInfo.data.name;
|
|
1509
|
+
const symbolEntry = {
|
|
1510
|
+
name: symInfo.data.name, type: symInfo.data.type,
|
|
1511
|
+
filePath: symInfo.data.filePath, startLine: symInfo.data.startLine, endLine: symInfo.data.endLine,
|
|
1512
|
+
signature,
|
|
1513
|
+
...(module ? { module } : {}),
|
|
1514
|
+
// F9: always include content for small symbols (< 10 lines) for inline display
|
|
1515
|
+
...(content ? { content } : {}),
|
|
1516
|
+
matched_by: symInfo.data.matched_by,
|
|
1517
|
+
};
|
|
1518
|
+
const symProcessRows = processRowsByNode.get(symInfo.nodeId);
|
|
1519
|
+
if (!symProcessRows || symProcessRows.length === 0) {
|
|
1520
|
+
definitions.push(symbolEntry);
|
|
1521
|
+
}
|
|
1522
|
+
else {
|
|
1523
|
+
for (const row of symProcessRows) {
|
|
1524
|
+
const pid = String(row.pid ?? row[1]);
|
|
1525
|
+
if (!processMap.has(pid)) {
|
|
1526
|
+
processMap.set(pid, {
|
|
1527
|
+
id: pid, label: String(row.label ?? row[2] ?? ''),
|
|
1528
|
+
heuristicLabel: String(row.heuristicLabel ?? row[3] ?? ''),
|
|
1529
|
+
processType: String(row.processType ?? row[4] ?? ''),
|
|
1530
|
+
stepCount: Number(row.stepCount ?? row[5] ?? 0),
|
|
1531
|
+
bestScore: 0, symbolScoreSum: 0, symbolCount: 0, cohesionBoost: 0, symbols: [],
|
|
1532
|
+
});
|
|
1533
|
+
}
|
|
1534
|
+
const proc = processMap.get(pid);
|
|
1535
|
+
proc.bestScore = Math.max(proc.bestScore, symInfo.score);
|
|
1536
|
+
proc.symbolScoreSum += symInfo.score;
|
|
1537
|
+
proc.symbolCount++;
|
|
1538
|
+
proc.cohesionBoost = Math.max(proc.cohesionBoost, cluster?.cohesion ?? 0);
|
|
1539
|
+
proc.symbols.push({ ...symbolEntry, process_id: pid, step_index: Number(row.step ?? row[6] ?? 0) });
|
|
1540
|
+
}
|
|
1541
|
+
}
|
|
1542
|
+
}
|
|
1543
|
+
}
|
|
1544
|
+
// Step 4: Rank processes — match density + name overlap + score + cohesion
|
|
1545
|
+
// Split query into terms for flow-level name matching
|
|
1546
|
+
const queryTerms = queryLower.split(/\s+/).filter(t => t.length >= 3);
|
|
1547
|
+
const rankedProcesses = Array.from(processMap.values())
|
|
1548
|
+
.map(p => {
|
|
1549
|
+
const breadthBonus = p.symbolCount > 1
|
|
1550
|
+
? (p.symbolScoreSum - p.bestScore) / (p.symbolCount - 1) * 0.1 * (p.symbolCount - 1)
|
|
1551
|
+
: 0;
|
|
1552
|
+
const baseScore = p.bestScore + breadthBonus + (p.cohesionBoost * COHESION_WEIGHT);
|
|
1553
|
+
// Match density: what fraction of steps in this process matched the search?
|
|
1554
|
+
const matchedSteps = p.symbols.filter((s) => s.matched_by).length;
|
|
1555
|
+
const density = p.stepCount > 0 ? matchedSteps / p.stepCount : 0;
|
|
1556
|
+
// Flow name overlap: how many query terms appear in step names (camelCase split)?
|
|
1557
|
+
const stepNames = p.symbols.map((s) => (s.name || '').toLowerCase()
|
|
1558
|
+
.replace(/([a-z])([A-Z])/g, '$1 $2').replace(/[_\-]/g, ' ')).join(' ');
|
|
1559
|
+
const nameOverlap = queryTerms.filter(t => stepNames.includes(t)).length;
|
|
1560
|
+
const nameBoost = queryTerms.length > 0 ? nameOverlap / queryTerms.length : 0;
|
|
1561
|
+
// Name overlap is the strongest relevance signal for flows.
|
|
1562
|
+
// If zero step names overlap with query terms, the flow likely matched on
|
|
1563
|
+
// generic BM25 content, not because it's about the queried concept.
|
|
1564
|
+
const relevanceMult = nameBoost > 0
|
|
1565
|
+
? (1 + density) * (1 + nameBoost)
|
|
1566
|
+
: 0.1; // 90% penalty — these flows are noise for this query
|
|
1567
|
+
return { ...p, priority: baseScore * relevanceMult };
|
|
1568
|
+
})
|
|
1569
|
+
.sort((a, b) => b.priority - a.priority)
|
|
1570
|
+
.slice(0, processLimit);
|
|
1571
|
+
// Demote server/CLI entry flows for non-server queries
|
|
1572
|
+
const queryMentionsServer = queryLower.includes('server') || queryLower.includes('mcp') || queryLower.includes('http') || queryLower.includes('api');
|
|
1573
|
+
if (!queryMentionsServer && rankedProcesses.length > 1) {
|
|
1574
|
+
const SERVER_ENTRY_PREFIXES = ['createserver', 'mcpcommand', 'startmcpserver', 'handlerequest', 'servecommand', 'setupcommand', 'statuscommand', 'listcommand', 'cleancommand'];
|
|
1575
|
+
for (const proc of rankedProcesses) {
|
|
1576
|
+
const entryName = (proc.heuristicLabel || proc.label || '').toLowerCase().split(' ')[0];
|
|
1577
|
+
if (SERVER_ENTRY_PREFIXES.some(p => entryName.includes(p))) {
|
|
1578
|
+
proc.priority *= 0.5;
|
|
1579
|
+
}
|
|
1580
|
+
}
|
|
1581
|
+
rankedProcesses.sort((a, b) => b.priority - a.priority);
|
|
1582
|
+
}
|
|
1583
|
+
const processes = rankedProcesses.map(p => ({
|
|
1584
|
+
id: p.id, summary: p.heuristicLabel || p.label,
|
|
1585
|
+
priority: Math.round(p.priority * 1000) / 1000,
|
|
1586
|
+
symbol_count: p.symbols.length, process_type: p.processType, step_count: p.stepCount,
|
|
1587
|
+
}));
|
|
1588
|
+
// F1: Fetch ALL steps for top processes so LLM sees the full flow, not just matched symbols
|
|
1589
|
+
const matchedNodeIds = new Set(symbolsWithNodeId.map(s => s.nodeId));
|
|
1590
|
+
const topProcIds = rankedProcesses.map(p => p.id);
|
|
1591
|
+
let allStepsMap = new Map(); // pid -> all steps
|
|
1592
|
+
if (topProcIds.length > 0) {
|
|
1593
|
+
try {
|
|
1594
|
+
const procIdList = topProcIds.map(id => `'${id.replace(/'/g, "''")}'`).join(', ');
|
|
1595
|
+
const allStepsRows = await executeQuery(repo.id, `
|
|
1596
|
+
MATCH (s)-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p:Process)
|
|
1597
|
+
WHERE p.id IN [${procIdList}]
|
|
1598
|
+
RETURN s.id AS nodeId, s.name AS name, labels(s) AS type, s.filePath AS filePath, s.startLine AS startLine,
|
|
1599
|
+
p.id AS pid, r.step AS step
|
|
1600
|
+
ORDER BY p.id, r.step
|
|
1601
|
+
`);
|
|
1602
|
+
for (const row of allStepsRows) {
|
|
1603
|
+
const pid = String(row.pid ?? row[5]);
|
|
1604
|
+
if (!allStepsMap.has(pid))
|
|
1605
|
+
allStepsMap.set(pid, []);
|
|
1606
|
+
const nodeId = String(row.nodeId ?? row[0]);
|
|
1607
|
+
const sig = signatureMap.get(nodeId);
|
|
1608
|
+
allStepsMap.get(pid).push({
|
|
1609
|
+
nodeId,
|
|
1610
|
+
name: String(row.name ?? row[1]),
|
|
1611
|
+
type: String(row.type ?? row[2] ?? 'Symbol'),
|
|
1612
|
+
filePath: String(row.filePath ?? row[3] ?? ''),
|
|
1613
|
+
startLine: row.startLine ?? row[4],
|
|
1614
|
+
step_index: Number(row.step ?? row[6] ?? 0),
|
|
1615
|
+
matched: matchedNodeIds.has(nodeId),
|
|
1616
|
+
signature: sig || String(row.name ?? row[1]),
|
|
1617
|
+
module: clusterByNode.get(nodeId)?.module,
|
|
1618
|
+
});
|
|
1619
|
+
}
|
|
1620
|
+
}
|
|
1621
|
+
catch (e) {
|
|
1622
|
+
logQueryError('query:full-flow-steps', e);
|
|
1623
|
+
}
|
|
1624
|
+
}
|
|
1625
|
+
// Build full process symbols: matched symbols enriched, plus unmatched steps for context
|
|
1626
|
+
const fullProcessSymbols = [];
|
|
1627
|
+
for (const proc of rankedProcesses) {
|
|
1628
|
+
const allSteps = allStepsMap.get(proc.id) || [];
|
|
1629
|
+
const matchedSymMap = new Map(proc.symbols.map((s) => [s.step_index, s]));
|
|
1630
|
+
if (allSteps.length > 0) {
|
|
1631
|
+
// Use full steps, preferring matched symbol data where available
|
|
1632
|
+
for (const step of allSteps.slice(0, maxSymbolsPerProcess)) {
|
|
1633
|
+
const matched = matchedSymMap.get(step.step_index);
|
|
1634
|
+
fullProcessSymbols.push(matched || { ...step, process_id: proc.id, matched_by: null });
|
|
1635
|
+
}
|
|
1636
|
+
}
|
|
1637
|
+
else {
|
|
1638
|
+
// Fallback: just use matched symbols
|
|
1639
|
+
fullProcessSymbols.push(...proc.symbols.slice(0, maxSymbolsPerProcess));
|
|
1640
|
+
}
|
|
1641
|
+
}
|
|
1642
|
+
const seen = new Set();
|
|
1643
|
+
const dedupedSymbols = fullProcessSymbols.filter(s => {
|
|
1644
|
+
const key = `${s.process_id || ''}:${s.nodeId || s.name}:${s.step_index}`;
|
|
1645
|
+
if (seen.has(key))
|
|
1646
|
+
return false;
|
|
1647
|
+
seen.add(key);
|
|
1648
|
+
return true;
|
|
1649
|
+
});
|
|
1650
|
+
return { processes, process_symbols: dedupedSymbols, definitions: definitions.slice(0, DEFAULT_MAX_DEFINITIONS), _searchQuery: searchQuery };
|
|
1651
|
+
}
|
|
1652
|
+
/**
|
|
1653
|
+
* BM25 keyword search helper - uses LadybugDB FTS for always-fresh results
|
|
1654
|
+
*/
|
|
1655
|
+
async bm25Search(repo, query, limit) {
|
|
1656
|
+
const { searchFTSFromLbug } = await import('../../core/search/bm25-index.js');
|
|
1657
|
+
const { expandQuery } = await import('../../core/search/query-expansion.js');
|
|
1658
|
+
const { PRF_SPARSE_THRESHOLD, PRF_WEAK_SCORE_THRESHOLD } = await import('../../core/search/types.js');
|
|
1659
|
+
try {
|
|
1660
|
+
let results = await searchFTSFromLbug(query, limit, repo.id);
|
|
1661
|
+
// Pseudo-relevance feedback: expand when results are sparse OR top score is weak
|
|
1662
|
+
const topScore = results.length > 0 ? results[0].score : 0;
|
|
1663
|
+
const shouldExpand = results.length > 0 && (results.length < PRF_SPARSE_THRESHOLD || topScore < PRF_WEAK_SCORE_THRESHOLD);
|
|
1664
|
+
if (shouldExpand) {
|
|
1665
|
+
const topSymbolNames = results.slice(0, 3).map(r => r.name);
|
|
1666
|
+
const expandedQuery = expandQuery(query, topSymbolNames);
|
|
1667
|
+
if (expandedQuery !== query) {
|
|
1668
|
+
const expandedResults = await searchFTSFromLbug(expandedQuery, limit, repo.id);
|
|
1669
|
+
const seen = new Set(results.map(r => r.nodeId));
|
|
1670
|
+
for (const r of expandedResults) {
|
|
1671
|
+
if (!seen.has(r.nodeId)) {
|
|
1672
|
+
results.push(r);
|
|
1673
|
+
seen.add(r.nodeId);
|
|
1674
|
+
}
|
|
1675
|
+
}
|
|
1676
|
+
results = results.slice(0, limit);
|
|
1677
|
+
}
|
|
1678
|
+
}
|
|
1679
|
+
// Map to the shape expected by the query pipeline
|
|
1680
|
+
return results.map(r => ({
|
|
1681
|
+
nodeId: r.nodeId,
|
|
1682
|
+
name: r.name,
|
|
1683
|
+
type: r.type,
|
|
1684
|
+
filePath: r.filePath,
|
|
1685
|
+
startLine: r.startLine,
|
|
1686
|
+
endLine: r.endLine,
|
|
1687
|
+
bm25Score: r.score,
|
|
1688
|
+
}));
|
|
1689
|
+
}
|
|
1690
|
+
catch (err) {
|
|
1691
|
+
console.error('Code Mapper: BM25/FTS search failed (FTS indexes may not exist) -', err.message);
|
|
1692
|
+
return [];
|
|
1693
|
+
}
|
|
1694
|
+
}
|
|
1695
|
+
/**
|
|
1696
|
+
* Semantic vector search helper
|
|
1697
|
+
*/
|
|
1698
|
+
async semanticSearch(repo, query, limit) {
|
|
1699
|
+
try {
|
|
1700
|
+
// Check if embedding table exists before loading the model (avoids heavy model init when embeddings are off)
|
|
1701
|
+
const tableCheck = await executeQuery(repo.id, `MATCH (e:CodeEmbedding) RETURN COUNT(*) AS cnt LIMIT 1`);
|
|
1702
|
+
if (!tableCheck.length || (tableCheck[0].cnt ?? tableCheck[0][0]) === 0)
|
|
1703
|
+
return [];
|
|
1704
|
+
const { DEFAULT_MAX_SEMANTIC_DISTANCE } = await import('../../core/search/types.js');
|
|
1705
|
+
const { embedQuery, getEmbeddingDims } = await import('../core/embedder.js');
|
|
1706
|
+
const queryVec = await embedQuery(query);
|
|
1707
|
+
const dims = getEmbeddingDims();
|
|
1708
|
+
const queryVecStr = `[${queryVec.join(',')}]`;
|
|
1709
|
+
const vectorQuery = `
|
|
1710
|
+
CALL QUERY_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx',
|
|
1711
|
+
CAST(${queryVecStr} AS FLOAT[${dims}]), ${limit})
|
|
1712
|
+
YIELD node AS emb, distance
|
|
1713
|
+
RETURN emb.nodeId AS nodeId, distance
|
|
1714
|
+
ORDER BY distance
|
|
1715
|
+
`;
|
|
1716
|
+
const embResults = await executeQuery(repo.id, vectorQuery);
|
|
1717
|
+
if (embResults.length === 0)
|
|
1718
|
+
return [];
|
|
1719
|
+
// Filter by distance threshold — cut irrelevant results before RRF merge
|
|
1720
|
+
const filteredResults = embResults.filter(r => Number(r.distance ?? r[1] ?? 1) < DEFAULT_MAX_SEMANTIC_DISTANCE);
|
|
1721
|
+
if (filteredResults.length === 0)
|
|
1722
|
+
return [];
|
|
1723
|
+
// Batch metadata fetch — single query instead of N per-row queries
|
|
1724
|
+
const nodeIds = filteredResults.map(r => String(r.nodeId ?? r[0]));
|
|
1725
|
+
const distanceMap = new Map();
|
|
1726
|
+
for (const r of filteredResults) {
|
|
1727
|
+
distanceMap.set(String(r.nodeId ?? r[0]), Number(r.distance ?? r[1] ?? 1));
|
|
1728
|
+
}
|
|
1729
|
+
const idList = nodeIds.map(id => `'${id.replace(/'/g, "''")}'`).join(', ');
|
|
1730
|
+
let metaRows = [];
|
|
1731
|
+
try {
|
|
1732
|
+
metaRows = await executeQuery(repo.id, `
|
|
1733
|
+
MATCH (n) WHERE n.id IN [${idList}]
|
|
1734
|
+
RETURN n.id AS nodeId, n.name AS name, labels(n) AS type, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine
|
|
1735
|
+
`);
|
|
1736
|
+
}
|
|
1737
|
+
catch { }
|
|
1738
|
+
return metaRows.map(row => {
|
|
1739
|
+
const nid = String(row.nodeId ?? row[0]);
|
|
1740
|
+
return {
|
|
1741
|
+
nodeId: nid,
|
|
1742
|
+
name: String(row.name ?? row[1] ?? ''),
|
|
1743
|
+
type: String(row.type ?? row[2] ?? 'Unknown'),
|
|
1744
|
+
filePath: String(row.filePath ?? row[3] ?? ''),
|
|
1745
|
+
distance: distanceMap.get(nid) ?? 1,
|
|
1746
|
+
startLine: row.startLine ?? row[4],
|
|
1747
|
+
endLine: row.endLine ?? row[5],
|
|
1748
|
+
};
|
|
1749
|
+
});
|
|
1750
|
+
}
|
|
1751
|
+
catch {
|
|
1752
|
+
// Expected when embeddings are disabled — silently fall back to BM25-only
|
|
1753
|
+
return [];
|
|
1754
|
+
}
|
|
1755
|
+
}
|
|
1756
|
+
async executeCypher(repoName, query) {
|
|
1757
|
+
const repo = await this.resolveRepo(repoName);
|
|
1758
|
+
return this.cypher(repo, { query });
|
|
1759
|
+
}
|
|
1760
|
+
async cypher(repo, params) {
|
|
1761
|
+
await this.ensureInitialized(repo.id);
|
|
1762
|
+
if (!isLbugReady(repo.id)) {
|
|
1763
|
+
return { error: 'LadybugDB not ready. Index may be corrupted.' };
|
|
1764
|
+
}
|
|
1765
|
+
// Block write operations (defense-in-depth — DB is already read-only)
|
|
1766
|
+
if (CYPHER_WRITE_RE.test(params.query)) {
|
|
1767
|
+
return { error: 'Write operations (CREATE, DELETE, SET, MERGE, REMOVE, DROP, ALTER, COPY, DETACH) are not allowed. The knowledge graph is read-only.' };
|
|
1768
|
+
}
|
|
1769
|
+
try {
|
|
1770
|
+
const result = await executeQuery(repo.id, params.query);
|
|
1771
|
+
return result;
|
|
1772
|
+
}
|
|
1773
|
+
catch (err) {
|
|
1774
|
+
return { error: err.message || 'Query failed' };
|
|
1775
|
+
}
|
|
1776
|
+
}
|
|
1777
|
+
/** Format raw Cypher result rows as a markdown table, with raw fallback */
|
|
1778
|
+
formatCypherAsMarkdown(result) {
|
|
1779
|
+
if (!Array.isArray(result) || result.length === 0)
|
|
1780
|
+
return result;
|
|
1781
|
+
const firstRow = result[0];
|
|
1782
|
+
if (typeof firstRow !== 'object' || firstRow === null)
|
|
1783
|
+
return result;
|
|
1784
|
+
const keys = Object.keys(firstRow);
|
|
1785
|
+
if (keys.length === 0)
|
|
1786
|
+
return result;
|
|
1787
|
+
const header = '| ' + keys.join(' | ') + ' |';
|
|
1788
|
+
const separator = '| ' + keys.map(() => '---').join(' | ') + ' |';
|
|
1789
|
+
const dataRows = result.map((row) => '| ' + keys.map(k => {
|
|
1790
|
+
const v = row[k];
|
|
1791
|
+
if (v === null || v === undefined)
|
|
1792
|
+
return '';
|
|
1793
|
+
if (typeof v === 'object')
|
|
1794
|
+
return JSON.stringify(v);
|
|
1795
|
+
return String(v);
|
|
1796
|
+
}).join(' | ') + ' |');
|
|
1797
|
+
return {
|
|
1798
|
+
markdown: [header, separator, ...dataRows].join('\n'),
|
|
1799
|
+
row_count: result.length,
|
|
1800
|
+
};
|
|
1801
|
+
}
|
|
1802
|
+
/** Aggregate same-named clusters by heuristicLabel, filtering tiny clusters (<5 symbols) */
|
|
1803
|
+
aggregateClusters(clusters) {
|
|
1804
|
+
const groups = new Map();
|
|
1805
|
+
for (const c of clusters) {
|
|
1806
|
+
const label = c.heuristicLabel || c.label || 'Unknown';
|
|
1807
|
+
const symbols = c.symbolCount || 0;
|
|
1808
|
+
const cohesion = c.cohesion || 0;
|
|
1809
|
+
const existing = groups.get(label);
|
|
1810
|
+
if (!existing) {
|
|
1811
|
+
groups.set(label, { ids: [c.id], totalSymbols: symbols, weightedCohesion: cohesion * symbols, largest: c });
|
|
1812
|
+
}
|
|
1813
|
+
else {
|
|
1814
|
+
existing.ids.push(c.id);
|
|
1815
|
+
existing.totalSymbols += symbols;
|
|
1816
|
+
existing.weightedCohesion += cohesion * symbols;
|
|
1817
|
+
if (symbols > (existing.largest.symbolCount || 0)) {
|
|
1818
|
+
existing.largest = c;
|
|
1819
|
+
}
|
|
1820
|
+
}
|
|
1821
|
+
}
|
|
1822
|
+
return Array.from(groups.entries())
|
|
1823
|
+
.map(([label, g]) => ({
|
|
1824
|
+
id: g.largest.id,
|
|
1825
|
+
label,
|
|
1826
|
+
heuristicLabel: label,
|
|
1827
|
+
symbolCount: g.totalSymbols,
|
|
1828
|
+
cohesion: g.totalSymbols > 0 ? g.weightedCohesion / g.totalSymbols : 0,
|
|
1829
|
+
subCommunities: g.ids.length,
|
|
1830
|
+
}))
|
|
1831
|
+
.filter(c => c.symbolCount >= 5)
|
|
1832
|
+
.sort((a, b) => b.symbolCount - a.symbolCount);
|
|
1833
|
+
}
|
|
1834
|
+
async overview(repo, params) {
|
|
1835
|
+
await this.ensureInitialized(repo.id);
|
|
1836
|
+
const limit = params.limit || 20;
|
|
1837
|
+
const result = {
|
|
1838
|
+
repo: repo.name,
|
|
1839
|
+
repoPath: repo.repoPath,
|
|
1840
|
+
stats: repo.stats,
|
|
1841
|
+
indexedAt: repo.indexedAt,
|
|
1842
|
+
lastCommit: repo.lastCommit,
|
|
1843
|
+
};
|
|
1844
|
+
if (params.showClusters !== false) {
|
|
1845
|
+
try {
|
|
1846
|
+
// Fetch more raw communities than the display limit so aggregation has enough data
|
|
1847
|
+
const rawLimit = Math.max(limit * 5, 200);
|
|
1848
|
+
const clusters = await executeQuery(repo.id, `
|
|
1849
|
+
MATCH (c:Community)
|
|
1850
|
+
RETURN c.id AS id, c.label AS label, c.heuristicLabel AS heuristicLabel, c.cohesion AS cohesion, c.symbolCount AS symbolCount
|
|
1851
|
+
ORDER BY c.symbolCount DESC
|
|
1852
|
+
LIMIT ${rawLimit}
|
|
1853
|
+
`);
|
|
1854
|
+
const rawClusters = clusters.map((c) => ({
|
|
1855
|
+
id: c.id || c[0],
|
|
1856
|
+
label: c.label || c[1],
|
|
1857
|
+
heuristicLabel: c.heuristicLabel || c[2],
|
|
1858
|
+
cohesion: c.cohesion || c[3],
|
|
1859
|
+
symbolCount: c.symbolCount || c[4],
|
|
1860
|
+
}));
|
|
1861
|
+
result.clusters = this.aggregateClusters(rawClusters).slice(0, limit);
|
|
1862
|
+
}
|
|
1863
|
+
catch {
|
|
1864
|
+
result.clusters = [];
|
|
1865
|
+
}
|
|
1866
|
+
}
|
|
1867
|
+
if (params.showProcesses !== false) {
|
|
1868
|
+
try {
|
|
1869
|
+
const processes = await executeQuery(repo.id, `
|
|
1870
|
+
MATCH (p:Process)
|
|
1871
|
+
RETURN p.id AS id, p.label AS label, p.heuristicLabel AS heuristicLabel, p.processType AS processType, p.stepCount AS stepCount
|
|
1872
|
+
ORDER BY p.stepCount DESC
|
|
1873
|
+
LIMIT ${limit}
|
|
1874
|
+
`);
|
|
1875
|
+
result.processes = processes.map((p) => ({
|
|
1876
|
+
id: p.id || p[0],
|
|
1877
|
+
label: p.label || p[1],
|
|
1878
|
+
heuristicLabel: p.heuristicLabel || p[2],
|
|
1879
|
+
processType: p.processType || p[3],
|
|
1880
|
+
stepCount: p.stepCount || p[4],
|
|
1881
|
+
}));
|
|
1882
|
+
}
|
|
1883
|
+
catch {
|
|
1884
|
+
result.processes = [];
|
|
1885
|
+
}
|
|
1886
|
+
}
|
|
1887
|
+
return result;
|
|
1888
|
+
}
|
|
1889
|
+
/** Context tool: 360-degree symbol view with categorized refs and disambiguation */
|
|
1890
|
+
async context(repo, params) {
|
|
1891
|
+
await this.ensureInitialized(repo.id);
|
|
1892
|
+
const { name, uid, file_path, include_content } = params;
|
|
1893
|
+
if (!name && !uid) {
|
|
1894
|
+
return { error: 'Either "name" or "uid" parameter is required.' };
|
|
1895
|
+
}
|
|
1896
|
+
// Step 1: Find the symbol
|
|
1897
|
+
let symbols;
|
|
1898
|
+
if (uid) {
|
|
1899
|
+
// Always fetch content for signature extraction (token-efficient alternative to full Read calls)
|
|
1900
|
+
symbols = await executeParameterized(repo.id, `
|
|
1901
|
+
MATCH (n {id: $uid})
|
|
1902
|
+
RETURN n.id AS id, n.name AS name, labels(n) AS type, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine, n.content AS content
|
|
1903
|
+
LIMIT 1
|
|
1904
|
+
`, { uid });
|
|
1905
|
+
}
|
|
1906
|
+
else {
|
|
1907
|
+
const isQualified = name.includes('/') || name.includes(':');
|
|
1908
|
+
let whereClause;
|
|
1909
|
+
let queryParams;
|
|
1910
|
+
if (file_path) {
|
|
1911
|
+
whereClause = `WHERE n.name = $symName AND n.filePath CONTAINS $filePath`;
|
|
1912
|
+
queryParams = { symName: name, filePath: file_path };
|
|
1913
|
+
}
|
|
1914
|
+
else if (isQualified) {
|
|
1915
|
+
whereClause = `WHERE n.id = $symName OR n.name = $symName`;
|
|
1916
|
+
queryParams = { symName: name };
|
|
1917
|
+
}
|
|
1918
|
+
else {
|
|
1919
|
+
whereClause = `WHERE n.name = $symName`;
|
|
1920
|
+
queryParams = { symName: name };
|
|
1921
|
+
}
|
|
1922
|
+
symbols = await executeParameterized(repo.id, `
|
|
1923
|
+
MATCH (n) ${whereClause}
|
|
1924
|
+
RETURN n.id AS id, n.name AS name, labels(n) AS type, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine, n.content AS content
|
|
1925
|
+
LIMIT 10
|
|
1926
|
+
`, queryParams);
|
|
1927
|
+
}
|
|
1928
|
+
if (symbols.length === 0) {
|
|
1929
|
+
return { error: `Symbol '${name || uid}' not found` };
|
|
1930
|
+
}
|
|
1931
|
+
// Step 2: Disambiguation
|
|
1932
|
+
if (symbols.length > 1 && !uid) {
|
|
1933
|
+
return {
|
|
1934
|
+
status: 'ambiguous',
|
|
1935
|
+
message: `Found ${symbols.length} symbols matching '${name}'. Use uid or file_path to disambiguate.`,
|
|
1936
|
+
candidates: symbols.map((s) => ({
|
|
1937
|
+
uid: s.id || s[0],
|
|
1938
|
+
name: s.name || s[1],
|
|
1939
|
+
kind: s.type || s[2],
|
|
1940
|
+
filePath: s.filePath || s[3],
|
|
1941
|
+
line: s.startLine || s[4],
|
|
1942
|
+
})),
|
|
1943
|
+
};
|
|
1944
|
+
}
|
|
1945
|
+
// Step 3: Build full context
|
|
1946
|
+
const sym = symbols[0];
|
|
1947
|
+
const symId = sym.id || sym[0];
|
|
1948
|
+
// Categorized incoming refs (F3: line numbers, E1: UNION ALL for single round-trip)
|
|
1949
|
+
// KuzuDB bug: `r.type IN [list]` drops results. Use UNION ALL as workaround.
|
|
1950
|
+
const REL_TYPES = ['CALLS', 'IMPORTS', 'EXTENDS', 'IMPLEMENTS', 'DEPENDS_ON', 'PROVIDES'];
|
|
1951
|
+
// Incoming: use lower confidence threshold (0.5) — callers are more important to see even if fuzzy
|
|
1952
|
+
const incomingUnion = REL_TYPES.map(t => `MATCH (caller)-[r:CodeRelation {type: '${t}'}]->(n {id: '${symId.replace(/'/g, "''")}'}) WHERE r.confidence >= 0.5 RETURN '${t}' AS relType, caller.id AS uid, caller.name AS name, caller.filePath AS filePath, labels(caller) AS kind, caller.startLine AS startLine, r.reason AS reason LIMIT 15`).join(' UNION ALL ');
|
|
1953
|
+
let incomingRows = [];
|
|
1954
|
+
try {
|
|
1955
|
+
incomingRows = await executeQuery(repo.id, incomingUnion);
|
|
1956
|
+
}
|
|
1957
|
+
catch { }
|
|
1958
|
+
// Outgoing refs — lower threshold to 0.5 so dynamic imports (global tier) show up,
|
|
1959
|
+
// but exclude generic method names that produce false positives at low confidence
|
|
1960
|
+
const GENERIC_NAMES_EXCLUDE = ['has', 'get', 'set', 'add', 'remove', 'delete', 'close', 'stop', 'clear', 'reset', 'toString', 'valueOf', 'push', 'pop', 'entries', 'keys', 'values'];
|
|
1961
|
+
const genericExclude = GENERIC_NAMES_EXCLUDE.map(n => `'${n}'`).join(', ');
|
|
1962
|
+
const outgoingUnion = REL_TYPES.map(t => `MATCH (n {id: '${symId.replace(/'/g, "''")}'} )-[r:CodeRelation {type: '${t}'}]->(target) WHERE r.confidence >= 0.5 AND NOT target.name IN [${genericExclude}] RETURN '${t}' AS relType, target.id AS uid, target.name AS name, target.filePath AS filePath, labels(target) AS kind, target.startLine AS startLine, r.reason AS reason, r.callLine AS callLine LIMIT 15`).join(' UNION ALL ');
|
|
1963
|
+
let outgoingRows = [];
|
|
1964
|
+
try {
|
|
1965
|
+
outgoingRows = await executeQuery(repo.id, outgoingUnion);
|
|
1966
|
+
}
|
|
1967
|
+
catch { }
|
|
1968
|
+
// Process participation
|
|
1969
|
+
let processRows = [];
|
|
1970
|
+
try {
|
|
1971
|
+
processRows = await executeParameterized(repo.id, `
|
|
1972
|
+
MATCH (n {id: $symId})-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p:Process)
|
|
1973
|
+
RETURN p.id AS pid, p.heuristicLabel AS label, r.step AS step, p.stepCount AS stepCount
|
|
1974
|
+
`, { symId });
|
|
1975
|
+
}
|
|
1976
|
+
catch (e) {
|
|
1977
|
+
logQueryError('context:process-participation', e);
|
|
1978
|
+
}
|
|
1979
|
+
// C5: Module/cluster membership
|
|
1980
|
+
let module;
|
|
1981
|
+
try {
|
|
1982
|
+
const clusterRows = await executeParameterized(repo.id, `
|
|
1983
|
+
MATCH (n {id: $symId})-[:CodeRelation {type: 'MEMBER_OF'}]->(c:Community)
|
|
1984
|
+
RETURN c.heuristicLabel AS module
|
|
1985
|
+
LIMIT 1
|
|
1986
|
+
`, { symId });
|
|
1987
|
+
if (clusterRows.length > 0) {
|
|
1988
|
+
module = String(clusterRows[0].module ?? clusterRows[0][0] ?? '');
|
|
1989
|
+
}
|
|
1990
|
+
}
|
|
1991
|
+
catch { }
|
|
1992
|
+
// Helper to categorize refs
|
|
1993
|
+
const categorize = (rows) => {
|
|
1994
|
+
const cats = {};
|
|
1995
|
+
for (const row of rows) {
|
|
1996
|
+
const relType = (row.relType || row[0] || '').toLowerCase();
|
|
1997
|
+
const entry = {
|
|
1998
|
+
uid: row.uid || row[1],
|
|
1999
|
+
name: row.name || row[2],
|
|
2000
|
+
filePath: row.filePath || row[3],
|
|
2001
|
+
kind: row.kind || row[4] || 'Symbol',
|
|
2002
|
+
startLine: row.startLine ?? row[5],
|
|
2003
|
+
reason: row.reason || row[6] || '',
|
|
2004
|
+
callLine: row.callLine ?? row[7],
|
|
2005
|
+
};
|
|
2006
|
+
if (!cats[relType])
|
|
2007
|
+
cats[relType] = [];
|
|
2008
|
+
cats[relType].push(entry);
|
|
2009
|
+
}
|
|
2010
|
+
return cats;
|
|
2011
|
+
};
|
|
2012
|
+
// Always extract signature for compact display
|
|
2013
|
+
const rawContent = sym.content || sym[6] || '';
|
|
2014
|
+
const signature = rawContent ? this.extractSignature(String(rawContent), sym.name || sym[1], sym.type || sym[2]) : (sym.name || sym[1]);
|
|
2015
|
+
return {
|
|
2016
|
+
status: 'found',
|
|
2017
|
+
symbol: {
|
|
2018
|
+
uid: sym.id || sym[0],
|
|
2019
|
+
name: sym.name || sym[1],
|
|
2020
|
+
kind: sym.type || sym[2],
|
|
2021
|
+
filePath: sym.filePath || sym[3],
|
|
2022
|
+
startLine: sym.startLine || sym[4],
|
|
2023
|
+
endLine: sym.endLine || sym[5],
|
|
2024
|
+
signature,
|
|
2025
|
+
...(module ? { module } : {}),
|
|
2026
|
+
...(include_content && rawContent ? { content: rawContent } : {}),
|
|
2027
|
+
},
|
|
2028
|
+
incoming: categorize(incomingRows),
|
|
2029
|
+
outgoing: categorize(outgoingRows),
|
|
2030
|
+
processes: processRows.map((r) => ({
|
|
2031
|
+
id: r.pid || r[0],
|
|
2032
|
+
name: r.label || r[1],
|
|
2033
|
+
step_index: r.step || r[2],
|
|
2034
|
+
step_count: r.stepCount || r[3],
|
|
2035
|
+
})),
|
|
2036
|
+
};
|
|
2037
|
+
}
|
|
2038
|
+
/** Legacy explore for backwards compatibility with resources.ts */
|
|
2039
|
+
async explore(repo, params) {
|
|
2040
|
+
await this.ensureInitialized(repo.id);
|
|
2041
|
+
const { name, type } = params;
|
|
2042
|
+
if (type === 'symbol') {
|
|
2043
|
+
return this.context(repo, { name });
|
|
2044
|
+
}
|
|
2045
|
+
if (type === 'cluster') {
|
|
2046
|
+
const clusters = await executeParameterized(repo.id, `
|
|
2047
|
+
MATCH (c:Community)
|
|
2048
|
+
WHERE c.label = $clusterName OR c.heuristicLabel = $clusterName
|
|
2049
|
+
RETURN c.id AS id, c.label AS label, c.heuristicLabel AS heuristicLabel, c.cohesion AS cohesion, c.symbolCount AS symbolCount
|
|
2050
|
+
`, { clusterName: name });
|
|
2051
|
+
if (clusters.length === 0)
|
|
2052
|
+
return { error: `Cluster '${name}' not found` };
|
|
2053
|
+
const rawClusters = clusters.map((c) => ({
|
|
2054
|
+
id: c.id || c[0], label: c.label || c[1], heuristicLabel: c.heuristicLabel || c[2],
|
|
2055
|
+
cohesion: c.cohesion || c[3], symbolCount: c.symbolCount || c[4],
|
|
2056
|
+
}));
|
|
2057
|
+
let totalSymbols = 0, weightedCohesion = 0;
|
|
2058
|
+
for (const c of rawClusters) {
|
|
2059
|
+
const s = c.symbolCount || 0;
|
|
2060
|
+
totalSymbols += s;
|
|
2061
|
+
weightedCohesion += (c.cohesion || 0) * s;
|
|
2062
|
+
}
|
|
2063
|
+
const members = await executeParameterized(repo.id, `
|
|
2064
|
+
MATCH (n)-[:CodeRelation {type: 'MEMBER_OF'}]->(c:Community)
|
|
2065
|
+
WHERE c.label = $clusterName OR c.heuristicLabel = $clusterName
|
|
2066
|
+
RETURN DISTINCT n.name AS name, labels(n) AS type, n.filePath AS filePath
|
|
2067
|
+
LIMIT 30
|
|
2068
|
+
`, { clusterName: name });
|
|
2069
|
+
return {
|
|
2070
|
+
cluster: {
|
|
2071
|
+
id: rawClusters[0].id,
|
|
2072
|
+
label: rawClusters[0].heuristicLabel || rawClusters[0].label,
|
|
2073
|
+
heuristicLabel: rawClusters[0].heuristicLabel || rawClusters[0].label,
|
|
2074
|
+
cohesion: totalSymbols > 0 ? weightedCohesion / totalSymbols : 0,
|
|
2075
|
+
symbolCount: totalSymbols,
|
|
2076
|
+
subCommunities: rawClusters.length,
|
|
2077
|
+
},
|
|
2078
|
+
members: members.map((m) => ({
|
|
2079
|
+
name: m.name || m[0], type: m.type || m[1], filePath: m.filePath || m[2],
|
|
2080
|
+
})),
|
|
2081
|
+
};
|
|
2082
|
+
}
|
|
2083
|
+
if (type === 'process') {
|
|
2084
|
+
const processes = await executeParameterized(repo.id, `
|
|
2085
|
+
MATCH (p:Process)
|
|
2086
|
+
WHERE p.label = $processName OR p.heuristicLabel = $processName
|
|
2087
|
+
RETURN p.id AS id, p.label AS label, p.heuristicLabel AS heuristicLabel, p.processType AS processType, p.stepCount AS stepCount
|
|
2088
|
+
LIMIT 1
|
|
2089
|
+
`, { processName: name });
|
|
2090
|
+
if (processes.length === 0)
|
|
2091
|
+
return { error: `Process '${name}' not found` };
|
|
2092
|
+
const proc = processes[0];
|
|
2093
|
+
const procId = proc.id || proc[0];
|
|
2094
|
+
const steps = await executeParameterized(repo.id, `
|
|
2095
|
+
MATCH (n)-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p {id: $procId})
|
|
2096
|
+
RETURN n.name AS name, labels(n) AS type, n.filePath AS filePath, r.step AS step
|
|
2097
|
+
ORDER BY r.step
|
|
2098
|
+
`, { procId });
|
|
2099
|
+
return {
|
|
2100
|
+
process: {
|
|
2101
|
+
id: procId, label: proc.label || proc[1], heuristicLabel: proc.heuristicLabel || proc[2],
|
|
2102
|
+
processType: proc.processType || proc[3], stepCount: proc.stepCount || proc[4],
|
|
2103
|
+
},
|
|
2104
|
+
steps: steps.map((s) => ({
|
|
2105
|
+
step: s.step || s[3], name: s.name || s[0], type: s.type || s[1], filePath: s.filePath || s[2],
|
|
2106
|
+
})),
|
|
2107
|
+
};
|
|
2108
|
+
}
|
|
2109
|
+
return { error: 'Invalid type. Use: symbol, cluster, or process' };
|
|
2110
|
+
}
|
|
2111
|
+
/** Detect changes: git-diff impact analysis mapping changed lines to symbols and processes */
|
|
2112
|
+
async detectChanges(repo, params) {
|
|
2113
|
+
await this.ensureInitialized(repo.id);
|
|
2114
|
+
const scope = params.scope || 'unstaged';
|
|
2115
|
+
const { execFileSync } = await import('child_process');
|
|
2116
|
+
// Build git diff args based on scope (using execFileSync to avoid shell injection)
|
|
2117
|
+
let diffArgs;
|
|
2118
|
+
switch (scope) {
|
|
2119
|
+
case 'staged':
|
|
2120
|
+
diffArgs = ['diff', '--staged', '--name-only'];
|
|
2121
|
+
break;
|
|
2122
|
+
case 'all':
|
|
2123
|
+
diffArgs = ['diff', 'HEAD', '--name-only'];
|
|
2124
|
+
break;
|
|
2125
|
+
case 'compare':
|
|
2126
|
+
if (!params.base_ref)
|
|
2127
|
+
return { error: 'base_ref is required for "compare" scope' };
|
|
2128
|
+
diffArgs = ['diff', params.base_ref, '--name-only'];
|
|
2129
|
+
break;
|
|
2130
|
+
case 'unstaged':
|
|
2131
|
+
default:
|
|
2132
|
+
diffArgs = ['diff', '--name-only'];
|
|
2133
|
+
break;
|
|
2134
|
+
}
|
|
2135
|
+
let changedFiles;
|
|
2136
|
+
let diffStatMap = new Map(); // file -> "+N/-M"
|
|
2137
|
+
try {
|
|
2138
|
+
const output = execFileSync('git', diffArgs, { cwd: repo.repoPath, encoding: 'utf-8' });
|
|
2139
|
+
changedFiles = output.trim().split('\n').filter(f => f.length > 0);
|
|
2140
|
+
}
|
|
2141
|
+
catch (err) {
|
|
2142
|
+
return { error: `Git diff failed: ${err.message}` };
|
|
2143
|
+
}
|
|
2144
|
+
// F3: Get line-level diff stats per file
|
|
2145
|
+
try {
|
|
2146
|
+
const statArgs = [...diffArgs.filter(a => a !== '--name-only'), '--numstat'];
|
|
2147
|
+
const statOutput = execFileSync('git', statArgs, { cwd: repo.repoPath, encoding: 'utf-8' });
|
|
2148
|
+
for (const line of statOutput.trim().split('\n')) {
|
|
2149
|
+
const parts = line.split('\t');
|
|
2150
|
+
if (parts.length >= 3) {
|
|
2151
|
+
const added = parts[0] === '-' ? '?' : parts[0];
|
|
2152
|
+
const removed = parts[1] === '-' ? '?' : parts[1];
|
|
2153
|
+
diffStatMap.set(parts[2], `+${added}/-${removed}`);
|
|
2154
|
+
}
|
|
2155
|
+
}
|
|
2156
|
+
}
|
|
2157
|
+
catch { }
|
|
2158
|
+
if (changedFiles.length === 0) {
|
|
2159
|
+
return {
|
|
2160
|
+
summary: { changed_count: 0, affected_count: 0, risk_level: 'none', message: 'No changes detected.' },
|
|
2161
|
+
changed_symbols: [],
|
|
2162
|
+
affected_processes: [],
|
|
2163
|
+
};
|
|
2164
|
+
}
|
|
2165
|
+
// Map changed files to indexed symbols
|
|
2166
|
+
const changedSymbols = [];
|
|
2167
|
+
for (const file of changedFiles) {
|
|
2168
|
+
const normalizedFile = file.replace(/\\/g, '/');
|
|
2169
|
+
try {
|
|
2170
|
+
const symbols = await executeParameterized(repo.id, `
|
|
2171
|
+
MATCH (n) WHERE n.filePath CONTAINS $filePath
|
|
2172
|
+
RETURN n.id AS id, n.name AS name, labels(n) AS type, n.filePath AS filePath, n.startLine AS startLine
|
|
2173
|
+
LIMIT 20
|
|
2174
|
+
`, { filePath: normalizedFile });
|
|
2175
|
+
for (const sym of symbols) {
|
|
2176
|
+
changedSymbols.push({
|
|
2177
|
+
id: sym.id || sym[0],
|
|
2178
|
+
name: sym.name || sym[1],
|
|
2179
|
+
type: sym.type || sym[2],
|
|
2180
|
+
filePath: sym.filePath || sym[3],
|
|
2181
|
+
startLine: sym.startLine ?? sym[4],
|
|
2182
|
+
change_type: 'Modified',
|
|
2183
|
+
});
|
|
2184
|
+
}
|
|
2185
|
+
}
|
|
2186
|
+
catch (e) {
|
|
2187
|
+
logQueryError('detect-changes:file-symbols', e);
|
|
2188
|
+
}
|
|
2189
|
+
}
|
|
2190
|
+
// Fix 7: Detect REAL interface changes by comparing signature text
|
|
2191
|
+
// Read each changed file from disk and compare the declaration line against
|
|
2192
|
+
// the stored n.content signature. Only flags symbols whose actual signature changed,
|
|
2193
|
+
// not just any symbol on a line that git touched.
|
|
2194
|
+
const fileContentCache = new Map();
|
|
2195
|
+
const readFileContent = async (fp) => {
|
|
2196
|
+
if (fileContentCache.has(fp))
|
|
2197
|
+
return fileContentCache.get(fp);
|
|
2198
|
+
try {
|
|
2199
|
+
const full = path.resolve(repo.repoPath, fp);
|
|
2200
|
+
const content = await fs.readFile(full, 'utf-8');
|
|
2201
|
+
fileContentCache.set(fp, content);
|
|
2202
|
+
return content;
|
|
2203
|
+
}
|
|
2204
|
+
catch {
|
|
2205
|
+
return null;
|
|
2206
|
+
}
|
|
2207
|
+
};
|
|
2208
|
+
// Batch-fetch stored signatures from graph for changed symbols
|
|
2209
|
+
const symIds = changedSymbols.filter(s => s.id).map(s => `'${String(s.id).replace(/'/g, "''")}'`);
|
|
2210
|
+
const storedSigs = new Map();
|
|
2211
|
+
if (symIds.length > 0) {
|
|
2212
|
+
try {
|
|
2213
|
+
const sigRows = await executeParameterized(repo.id, `
|
|
2214
|
+
MATCH (n) WHERE n.id IN [${symIds.join(', ')}]
|
|
2215
|
+
RETURN n.id AS id, n.content AS content
|
|
2216
|
+
`, {});
|
|
2217
|
+
for (const row of sigRows) {
|
|
2218
|
+
const id = String(row.id ?? row[0]);
|
|
2219
|
+
const content = row.content ?? row[1];
|
|
2220
|
+
if (content) {
|
|
2221
|
+
storedSigs.set(id, this.extractSignature(String(content), '', ''));
|
|
2222
|
+
}
|
|
2223
|
+
}
|
|
2224
|
+
}
|
|
2225
|
+
catch { }
|
|
2226
|
+
}
|
|
2227
|
+
// Compare stored signatures against current file content
|
|
2228
|
+
for (const sym of changedSymbols) {
|
|
2229
|
+
if (!sym.id || !sym.filePath || !sym.name)
|
|
2230
|
+
continue;
|
|
2231
|
+
const storedSig = storedSigs.get(sym.id);
|
|
2232
|
+
if (!storedSig)
|
|
2233
|
+
continue;
|
|
2234
|
+
const content = await readFileContent(sym.filePath);
|
|
2235
|
+
if (!content)
|
|
2236
|
+
continue;
|
|
2237
|
+
// Find the current declaration line near the symbol's startLine
|
|
2238
|
+
const lines = content.split('\n');
|
|
2239
|
+
const searchStart = Math.max(0, (sym.startLine || 1) - 6);
|
|
2240
|
+
const searchEnd = Math.min(lines.length, (sym.startLine || 1) + 6);
|
|
2241
|
+
let currentSig = null;
|
|
2242
|
+
const namePattern = new RegExp(`\\b${sym.name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`);
|
|
2243
|
+
for (let i = searchStart; i < searchEnd; i++) {
|
|
2244
|
+
if (namePattern.test(lines[i])) {
|
|
2245
|
+
currentSig = this.extractSignature(lines.slice(Math.max(0, i - 1), i + 5).join('\n'), sym.name, sym.type);
|
|
2246
|
+
break;
|
|
2247
|
+
}
|
|
2248
|
+
}
|
|
2249
|
+
if (currentSig && storedSig !== currentSig) {
|
|
2250
|
+
sym.change_type = 'interface changed';
|
|
2251
|
+
}
|
|
2252
|
+
}
|
|
2253
|
+
// Find affected processes
|
|
2254
|
+
const affectedProcesses = new Map();
|
|
2255
|
+
for (const sym of changedSymbols) {
|
|
2256
|
+
try {
|
|
2257
|
+
const procs = await executeParameterized(repo.id, `
|
|
2258
|
+
MATCH (n {id: $nodeId})-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p:Process)
|
|
2259
|
+
RETURN p.id AS pid, p.heuristicLabel AS label, p.processType AS processType, p.stepCount AS stepCount, r.step AS step
|
|
2260
|
+
`, { nodeId: sym.id });
|
|
2261
|
+
for (const proc of procs) {
|
|
2262
|
+
const pid = proc.pid || proc[0];
|
|
2263
|
+
if (!affectedProcesses.has(pid)) {
|
|
2264
|
+
affectedProcesses.set(pid, {
|
|
2265
|
+
id: pid,
|
|
2266
|
+
name: proc.label || proc[1],
|
|
2267
|
+
process_type: proc.processType || proc[2],
|
|
2268
|
+
step_count: proc.stepCount || proc[3],
|
|
2269
|
+
changed_steps: [],
|
|
2270
|
+
});
|
|
2271
|
+
}
|
|
2272
|
+
affectedProcesses.get(pid).changed_steps.push({
|
|
2273
|
+
symbol: sym.name,
|
|
2274
|
+
step: proc.step || proc[4],
|
|
2275
|
+
});
|
|
2276
|
+
}
|
|
2277
|
+
}
|
|
2278
|
+
catch (e) {
|
|
2279
|
+
logQueryError('detect-changes:process-lookup', e);
|
|
2280
|
+
}
|
|
2281
|
+
}
|
|
2282
|
+
const processCount = affectedProcesses.size;
|
|
2283
|
+
const risk = processCount === 0 ? 'low' : processCount <= 5 ? 'medium' : processCount <= 15 ? 'high' : 'critical';
|
|
2284
|
+
return {
|
|
2285
|
+
summary: {
|
|
2286
|
+
changed_count: changedSymbols.length,
|
|
2287
|
+
affected_count: processCount,
|
|
2288
|
+
changed_files: changedFiles.length,
|
|
2289
|
+
risk_level: risk,
|
|
2290
|
+
},
|
|
2291
|
+
changed_symbols: changedSymbols,
|
|
2292
|
+
affected_processes: Array.from(affectedProcesses.values()),
|
|
2293
|
+
diff_stats: Object.fromEntries(diffStatMap), // F3: line stats per file
|
|
2294
|
+
};
|
|
2295
|
+
}
|
|
2296
|
+
/** Rename tool: multi-file coordinated rename using graph (high confidence) + text search */
|
|
2297
|
+
async rename(repo, params) {
|
|
2298
|
+
await this.ensureInitialized(repo.id);
|
|
2299
|
+
const { new_name, file_path } = params;
|
|
2300
|
+
const dry_run = params.dry_run ?? true;
|
|
2301
|
+
if (!params.symbol_name && !params.symbol_uid) {
|
|
2302
|
+
return { error: 'Either symbol_name or symbol_uid is required.' };
|
|
2303
|
+
}
|
|
2304
|
+
/** Guard: ensure a file path resolves within the repo root (prevents path traversal) */
|
|
2305
|
+
const assertSafePath = (filePath) => {
|
|
2306
|
+
const full = path.resolve(repo.repoPath, filePath);
|
|
2307
|
+
if (!full.startsWith(repo.repoPath + path.sep) && full !== repo.repoPath) {
|
|
2308
|
+
throw new Error(`Path traversal blocked: ${filePath}`);
|
|
2309
|
+
}
|
|
2310
|
+
return full;
|
|
2311
|
+
};
|
|
2312
|
+
// Step 1: Find the target symbol (reuse context's lookup)
|
|
2313
|
+
const lookupResult = await this.context(repo, {
|
|
2314
|
+
name: params.symbol_name,
|
|
2315
|
+
uid: params.symbol_uid,
|
|
2316
|
+
file_path,
|
|
2317
|
+
});
|
|
2318
|
+
if (lookupResult.status === 'ambiguous') {
|
|
2319
|
+
return lookupResult; // pass disambiguation through
|
|
2320
|
+
}
|
|
2321
|
+
if (lookupResult.error) {
|
|
2322
|
+
return lookupResult;
|
|
2323
|
+
}
|
|
2324
|
+
const sym = lookupResult.symbol;
|
|
2325
|
+
const oldName = sym.name;
|
|
2326
|
+
if (oldName === new_name) {
|
|
2327
|
+
return { error: 'New name is the same as the current name.' };
|
|
2328
|
+
}
|
|
2329
|
+
// Step 2: Collect edits from graph (high confidence)
|
|
2330
|
+
const changes = new Map();
|
|
2331
|
+
const addEdit = (filePath, line, oldText, newText, confidence) => {
|
|
2332
|
+
if (!changes.has(filePath)) {
|
|
2333
|
+
changes.set(filePath, { file_path: filePath, edits: [] });
|
|
2334
|
+
}
|
|
2335
|
+
changes.get(filePath).edits.push({ line, old_text: oldText, new_text: newText, confidence });
|
|
2336
|
+
};
|
|
2337
|
+
// The definition itself
|
|
2338
|
+
if (sym.filePath && sym.startLine) {
|
|
2339
|
+
try {
|
|
2340
|
+
const content = await fs.readFile(assertSafePath(sym.filePath), 'utf-8');
|
|
2341
|
+
const lines = content.split('\n');
|
|
2342
|
+
const lineIdx = sym.startLine - 1;
|
|
2343
|
+
if (lineIdx >= 0 && lineIdx < lines.length && lines[lineIdx].includes(oldName)) {
|
|
2344
|
+
const defRegex = new RegExp(`\\b${oldName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'g');
|
|
2345
|
+
addEdit(sym.filePath, sym.startLine, lines[lineIdx].trim(), lines[lineIdx].replace(defRegex, new_name).trim(), 'graph');
|
|
2346
|
+
}
|
|
2347
|
+
}
|
|
2348
|
+
catch (e) {
|
|
2349
|
+
logQueryError('rename:read-definition', e);
|
|
2350
|
+
}
|
|
2351
|
+
}
|
|
2352
|
+
// All incoming refs from graph (callers, importers, etc.)
|
|
2353
|
+
const allIncoming = [
|
|
2354
|
+
...(lookupResult.incoming.calls || []),
|
|
2355
|
+
...(lookupResult.incoming.imports || []),
|
|
2356
|
+
...(lookupResult.incoming.extends || []),
|
|
2357
|
+
...(lookupResult.incoming.implements || []),
|
|
2358
|
+
];
|
|
2359
|
+
let graphEdits = changes.size > 0 ? 1 : 0; // count definition edit
|
|
2360
|
+
for (const ref of allIncoming) {
|
|
2361
|
+
if (!ref.filePath)
|
|
2362
|
+
continue;
|
|
2363
|
+
try {
|
|
2364
|
+
const content = await fs.readFile(assertSafePath(ref.filePath), 'utf-8');
|
|
2365
|
+
const lines = content.split('\n');
|
|
2366
|
+
for (let i = 0; i < lines.length; i++) {
|
|
2367
|
+
if (lines[i].includes(oldName)) {
|
|
2368
|
+
addEdit(ref.filePath, i + 1, lines[i].trim(), lines[i].replace(new RegExp(`\\b${oldName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'g'), new_name).trim(), 'graph');
|
|
2369
|
+
graphEdits++;
|
|
2370
|
+
break; // one edit per file from graph refs
|
|
2371
|
+
}
|
|
2372
|
+
}
|
|
2373
|
+
}
|
|
2374
|
+
catch (e) {
|
|
2375
|
+
logQueryError('rename:read-ref', e);
|
|
2376
|
+
}
|
|
2377
|
+
}
|
|
2378
|
+
// Step 3: Text search for refs the graph might have missed
|
|
2379
|
+
let astSearchEdits = 0;
|
|
2380
|
+
const graphFiles = new Set([sym.filePath, ...allIncoming.map(r => r.filePath)].filter(Boolean));
|
|
2381
|
+
// Simple text search across the repo for the old name (in files not already covered by graph)
|
|
2382
|
+
try {
|
|
2383
|
+
const { execFileSync } = await import('child_process');
|
|
2384
|
+
const rgArgs = [
|
|
2385
|
+
'-l',
|
|
2386
|
+
'--type-add', 'code:*.{ts,tsx,js,jsx,py,go,rs,java,c,h,cpp,cc,cxx,hpp,hxx,hh,cs,php,swift}',
|
|
2387
|
+
'-t', 'code',
|
|
2388
|
+
`\\b${oldName}\\b`,
|
|
2389
|
+
'.',
|
|
2390
|
+
];
|
|
2391
|
+
const output = execFileSync('rg', rgArgs, { cwd: repo.repoPath, encoding: 'utf-8', timeout: 5000 });
|
|
2392
|
+
const files = output.trim().split('\n').filter(f => f.length > 0);
|
|
2393
|
+
for (const file of files) {
|
|
2394
|
+
const normalizedFile = file.replace(/\\/g, '/').replace(/^\.\//, '');
|
|
2395
|
+
if (graphFiles.has(normalizedFile))
|
|
2396
|
+
continue; // already covered by graph
|
|
2397
|
+
try {
|
|
2398
|
+
const content = await fs.readFile(assertSafePath(normalizedFile), 'utf-8');
|
|
2399
|
+
const lines = content.split('\n');
|
|
2400
|
+
const regex = new RegExp(`\\b${oldName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'g');
|
|
2401
|
+
for (let i = 0; i < lines.length; i++) {
|
|
2402
|
+
regex.lastIndex = 0;
|
|
2403
|
+
if (regex.test(lines[i])) {
|
|
2404
|
+
regex.lastIndex = 0;
|
|
2405
|
+
addEdit(normalizedFile, i + 1, lines[i].trim(), lines[i].replace(regex, new_name).trim(), 'text_search');
|
|
2406
|
+
astSearchEdits++;
|
|
2407
|
+
}
|
|
2408
|
+
}
|
|
2409
|
+
}
|
|
2410
|
+
catch (e) {
|
|
2411
|
+
logQueryError('rename:text-search-read', e);
|
|
2412
|
+
}
|
|
2413
|
+
}
|
|
2414
|
+
}
|
|
2415
|
+
catch (e) {
|
|
2416
|
+
logQueryError('rename:ripgrep', e);
|
|
2417
|
+
}
|
|
2418
|
+
// Step 4: Apply or preview
|
|
2419
|
+
const allChanges = Array.from(changes.values());
|
|
2420
|
+
const totalEdits = allChanges.reduce((sum, c) => sum + c.edits.length, 0);
|
|
2421
|
+
if (!dry_run) {
|
|
2422
|
+
// Apply edits to files
|
|
2423
|
+
for (const change of allChanges) {
|
|
2424
|
+
try {
|
|
2425
|
+
const fullPath = assertSafePath(change.file_path);
|
|
2426
|
+
let content = await fs.readFile(fullPath, 'utf-8');
|
|
2427
|
+
const regex = new RegExp(`\\b${oldName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'g');
|
|
2428
|
+
content = content.replace(regex, new_name);
|
|
2429
|
+
await fs.writeFile(fullPath, content, 'utf-8');
|
|
2430
|
+
}
|
|
2431
|
+
catch (e) {
|
|
2432
|
+
logQueryError('rename:apply-edit', e);
|
|
2433
|
+
}
|
|
2434
|
+
}
|
|
2435
|
+
}
|
|
2436
|
+
return {
|
|
2437
|
+
status: 'success',
|
|
2438
|
+
old_name: oldName,
|
|
2439
|
+
new_name,
|
|
2440
|
+
files_affected: allChanges.length,
|
|
2441
|
+
total_edits: totalEdits,
|
|
2442
|
+
graph_edits: graphEdits,
|
|
2443
|
+
text_search_edits: astSearchEdits,
|
|
2444
|
+
changes: allChanges,
|
|
2445
|
+
applied: !dry_run,
|
|
2446
|
+
};
|
|
2447
|
+
}
|
|
2448
|
+
async impact(repo, params) {
|
|
2449
|
+
await this.ensureInitialized(repo.id);
|
|
2450
|
+
const { target, direction } = params;
|
|
2451
|
+
const maxDepth = params.maxDepth || 3;
|
|
2452
|
+
const rawRelTypes = params.relationTypes && params.relationTypes.length > 0
|
|
2453
|
+
? params.relationTypes.filter(t => VALID_RELATION_TYPES.has(t))
|
|
2454
|
+
: ['CALLS', 'IMPORTS', 'EXTENDS', 'IMPLEMENTS', 'DEPENDS_ON', 'PROVIDES'];
|
|
2455
|
+
const relationTypes = rawRelTypes.length > 0 ? rawRelTypes : ['CALLS', 'IMPORTS', 'EXTENDS', 'IMPLEMENTS', 'DEPENDS_ON', 'PROVIDES'];
|
|
2456
|
+
const includeTests = params.includeTests ?? false;
|
|
2457
|
+
const minConfidence = params.minConfidence ?? 0.6;
|
|
2458
|
+
// d=1 uses lower threshold (0.5) — direct callers are critical, even dynamic imports
|
|
2459
|
+
// d=2+ uses 0.6 to avoid noise explosion from transitive fuzzy edges
|
|
2460
|
+
const d1Confidence = Math.min(minConfidence, 0.5);
|
|
2461
|
+
// C6: Use OR chain instead of IN list — KuzuDB IN list silently drops results
|
|
2462
|
+
const relTypeFilter = relationTypes.map(t => `r.type = '${t}'`).join(' OR ');
|
|
2463
|
+
// Generic method names that produce false positives at low confidence
|
|
2464
|
+
const IMPACT_GENERIC_NAMES = new Set(['has', 'get', 'set', 'add', 'remove', 'delete', 'close', 'stop', 'clear', 'reset', 'toString', 'valueOf', 'push', 'pop', 'entries', 'keys', 'values']);
|
|
2465
|
+
const targets = await executeParameterized(repo.id, `
|
|
2466
|
+
MATCH (n)
|
|
2467
|
+
WHERE n.name = $targetName
|
|
2468
|
+
RETURN n.id AS id, n.name AS name, labels(n) AS type, n.filePath AS filePath
|
|
2469
|
+
LIMIT 1
|
|
2470
|
+
`, { targetName: target });
|
|
2471
|
+
if (targets.length === 0)
|
|
2472
|
+
return { error: `Target '${target}' not found` };
|
|
2473
|
+
const sym = targets[0];
|
|
2474
|
+
const symId = sym.id || sym[0];
|
|
2475
|
+
// Safety caps: prevent OOM / segfaults on high-fan-in graphs.
|
|
2476
|
+
// Without caps, a hub with 50 callers explodes: d1=50, d2=1500, d3=45000,
|
|
2477
|
+
// producing ~400KB Cypher WHERE-IN clauses that crash LadybugDB.
|
|
2478
|
+
const MAX_IMPACTED = 500;
|
|
2479
|
+
const MAX_FRONTIER_PER_DEPTH = 200;
|
|
2480
|
+
const impacted = [];
|
|
2481
|
+
const visited = new Set([symId]);
|
|
2482
|
+
let frontier = [symId];
|
|
2483
|
+
let truncated = false;
|
|
2484
|
+
for (let depth = 1; depth <= maxDepth && frontier.length > 0; depth++) {
|
|
2485
|
+
const nextFrontier = [];
|
|
2486
|
+
// Cap frontier to prevent massive Cypher queries
|
|
2487
|
+
const effectiveFrontier = frontier.length > MAX_FRONTIER_PER_DEPTH
|
|
2488
|
+
? (truncated = true, frontier.slice(0, MAX_FRONTIER_PER_DEPTH))
|
|
2489
|
+
: frontier;
|
|
2490
|
+
// Batch frontier nodes into a single Cypher query per depth level
|
|
2491
|
+
const idList = effectiveFrontier.map(id => `'${id.replace(/'/g, "''")}'`).join(', ');
|
|
2492
|
+
// Per-depth confidence: d=1 uses lower threshold to catch dynamic imports
|
|
2493
|
+
const depthConfidence = depth === 1 ? d1Confidence : minConfidence;
|
|
2494
|
+
const confidenceFilter = depthConfidence > 0 ? ` AND r.confidence >= ${depthConfidence}` : '';
|
|
2495
|
+
const query = direction === 'upstream'
|
|
2496
|
+
? `MATCH (caller)-[r:CodeRelation]->(n) WHERE n.id IN [${idList}] AND (${relTypeFilter})${confidenceFilter} RETURN n.id AS sourceId, caller.id AS id, caller.name AS name, labels(caller) AS nodeType, caller.filePath AS filePath, r.type AS relType, r.confidence AS confidence`
|
|
2497
|
+
: `MATCH (n)-[r:CodeRelation]->(callee) WHERE n.id IN [${idList}] AND (${relTypeFilter})${confidenceFilter} RETURN n.id AS sourceId, callee.id AS id, callee.name AS name, labels(callee) AS nodeType, callee.filePath AS filePath, r.type AS relType, r.confidence AS confidence`;
|
|
2498
|
+
try {
|
|
2499
|
+
const related = await executeQuery(repo.id, query);
|
|
2500
|
+
for (const rel of related) {
|
|
2501
|
+
const relId = rel.id || rel[1];
|
|
2502
|
+
const filePath = rel.filePath || rel[4] || '';
|
|
2503
|
+
if (!includeTests && isTestFilePath(filePath))
|
|
2504
|
+
continue;
|
|
2505
|
+
// Skip generic method names at low confidence (false positives like Map.has → type-env.has)
|
|
2506
|
+
const relName = rel.name || rel[2] || '';
|
|
2507
|
+
const relConf = rel.confidence || rel[6] || 1.0;
|
|
2508
|
+
if (relConf < 0.6 && IMPACT_GENERIC_NAMES.has(relName))
|
|
2509
|
+
continue;
|
|
2510
|
+
if (!visited.has(relId)) {
|
|
2511
|
+
visited.add(relId);
|
|
2512
|
+
nextFrontier.push(relId);
|
|
2513
|
+
impacted.push({
|
|
2514
|
+
depth,
|
|
2515
|
+
id: relId,
|
|
2516
|
+
name: rel.name || rel[2],
|
|
2517
|
+
type: rel.nodeType || rel[3] || 'Symbol',
|
|
2518
|
+
filePath,
|
|
2519
|
+
relationType: rel.relType || rel[5],
|
|
2520
|
+
confidence: rel.confidence || rel[6] || 1.0,
|
|
2521
|
+
});
|
|
2522
|
+
// Cap total impacted count
|
|
2523
|
+
if (impacted.length >= MAX_IMPACTED) {
|
|
2524
|
+
truncated = true;
|
|
2525
|
+
break;
|
|
2526
|
+
}
|
|
2527
|
+
}
|
|
2528
|
+
}
|
|
2529
|
+
}
|
|
2530
|
+
catch (e) {
|
|
2531
|
+
logQueryError('impact:depth-traversal', e);
|
|
2532
|
+
}
|
|
2533
|
+
if (impacted.length >= MAX_IMPACTED)
|
|
2534
|
+
break;
|
|
2535
|
+
frontier = nextFrontier;
|
|
2536
|
+
}
|
|
2537
|
+
const grouped = {};
|
|
2538
|
+
for (const item of impacted) {
|
|
2539
|
+
if (!grouped[item.depth])
|
|
2540
|
+
grouped[item.depth] = [];
|
|
2541
|
+
grouped[item.depth].push(item);
|
|
2542
|
+
}
|
|
2543
|
+
// ── Enrichment: affected processes, modules, risk ──────────────
|
|
2544
|
+
const directCount = (grouped[1] || []).length;
|
|
2545
|
+
let affectedProcesses = [];
|
|
2546
|
+
let affectedModules = [];
|
|
2547
|
+
if (impacted.length > 0) {
|
|
2548
|
+
const allIds = impacted.map(i => `'${i.id.replace(/'/g, "''")}'`).join(', ');
|
|
2549
|
+
const d1Ids = (grouped[1] || []).map((i) => `'${i.id.replace(/'/g, "''")}'`).join(', ');
|
|
2550
|
+
// Affected processes: which execution flows are broken and at which step
|
|
2551
|
+
const [processRows, moduleRows, directModuleRows] = await Promise.all([
|
|
2552
|
+
executeQuery(repo.id, `
|
|
2553
|
+
MATCH (s)-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p:Process)
|
|
2554
|
+
WHERE s.id IN [${allIds}]
|
|
2555
|
+
RETURN p.heuristicLabel AS name, COUNT(DISTINCT s.id) AS hits, MIN(r.step) AS minStep, p.stepCount AS stepCount
|
|
2556
|
+
ORDER BY hits DESC
|
|
2557
|
+
LIMIT 20
|
|
2558
|
+
`).catch(() => []),
|
|
2559
|
+
executeQuery(repo.id, `
|
|
2560
|
+
MATCH (s)-[:CodeRelation {type: 'MEMBER_OF'}]->(c:Community)
|
|
2561
|
+
WHERE s.id IN [${allIds}]
|
|
2562
|
+
RETURN c.heuristicLabel AS name, COUNT(DISTINCT s.id) AS hits
|
|
2563
|
+
ORDER BY hits DESC
|
|
2564
|
+
LIMIT 20
|
|
2565
|
+
`).catch(() => []),
|
|
2566
|
+
d1Ids ? executeQuery(repo.id, `
|
|
2567
|
+
MATCH (s)-[:CodeRelation {type: 'MEMBER_OF'}]->(c:Community)
|
|
2568
|
+
WHERE s.id IN [${d1Ids}]
|
|
2569
|
+
RETURN DISTINCT c.heuristicLabel AS name
|
|
2570
|
+
LIMIT 20
|
|
2571
|
+
`).catch(() => []) : Promise.resolve([]),
|
|
2572
|
+
]);
|
|
2573
|
+
affectedProcesses = processRows.map((r) => ({
|
|
2574
|
+
name: r.name || r[0],
|
|
2575
|
+
hits: r.hits || r[1],
|
|
2576
|
+
broken_at_step: r.minStep ?? r[2],
|
|
2577
|
+
step_count: r.stepCount ?? r[3],
|
|
2578
|
+
}));
|
|
2579
|
+
const directModuleSet = new Set(directModuleRows.map((r) => r.name || r[0]));
|
|
2580
|
+
affectedModules = moduleRows.map((r) => {
|
|
2581
|
+
const name = r.name || r[0];
|
|
2582
|
+
return {
|
|
2583
|
+
name,
|
|
2584
|
+
hits: r.hits || r[1],
|
|
2585
|
+
impact: directModuleSet.has(name) ? 'direct' : 'indirect',
|
|
2586
|
+
};
|
|
2587
|
+
});
|
|
2588
|
+
}
|
|
2589
|
+
// Risk scoring
|
|
2590
|
+
const processCount = affectedProcesses.length;
|
|
2591
|
+
const moduleCount = affectedModules.length;
|
|
2592
|
+
let risk = 'LOW';
|
|
2593
|
+
if (directCount >= 30 || processCount >= 5 || moduleCount >= 5 || impacted.length >= 200) {
|
|
2594
|
+
risk = 'CRITICAL';
|
|
2595
|
+
}
|
|
2596
|
+
else if (directCount >= 15 || processCount >= 3 || moduleCount >= 3 || impacted.length >= 100) {
|
|
2597
|
+
risk = 'HIGH';
|
|
2598
|
+
}
|
|
2599
|
+
else if (directCount >= 5 || impacted.length >= 30) {
|
|
2600
|
+
risk = 'MEDIUM';
|
|
2601
|
+
}
|
|
2602
|
+
return {
|
|
2603
|
+
target: {
|
|
2604
|
+
id: symId,
|
|
2605
|
+
name: sym.name || sym[1],
|
|
2606
|
+
type: sym.type || sym[2],
|
|
2607
|
+
filePath: sym.filePath || sym[3],
|
|
2608
|
+
},
|
|
2609
|
+
direction,
|
|
2610
|
+
impactedCount: impacted.length,
|
|
2611
|
+
risk,
|
|
2612
|
+
...(truncated ? { truncated: true } : {}),
|
|
2613
|
+
summary: {
|
|
2614
|
+
direct: directCount,
|
|
2615
|
+
processes_affected: processCount,
|
|
2616
|
+
modules_affected: moduleCount,
|
|
2617
|
+
},
|
|
2618
|
+
affected_processes: affectedProcesses,
|
|
2619
|
+
affected_modules: affectedModules,
|
|
2620
|
+
byDepth: grouped,
|
|
2621
|
+
};
|
|
2622
|
+
}
|
|
2623
|
+
// Direct Graph Queries (for resources.ts)
|
|
2624
|
+
/** Query clusters (communities) directly from graph for getClustersResource */
|
|
2625
|
+
async queryClusters(repoName, limit = 100) {
|
|
2626
|
+
const repo = await this.resolveRepo(repoName);
|
|
2627
|
+
await this.ensureInitialized(repo.id);
|
|
2628
|
+
try {
|
|
2629
|
+
const rawLimit = Math.max(limit * 5, 200);
|
|
2630
|
+
const clusters = await executeQuery(repo.id, `
|
|
2631
|
+
MATCH (c:Community)
|
|
2632
|
+
RETURN c.id AS id, c.label AS label, c.heuristicLabel AS heuristicLabel, c.cohesion AS cohesion, c.symbolCount AS symbolCount
|
|
2633
|
+
ORDER BY c.symbolCount DESC
|
|
2634
|
+
LIMIT ${rawLimit}
|
|
2635
|
+
`);
|
|
2636
|
+
const rawClusters = clusters.map((c) => ({
|
|
2637
|
+
id: c.id || c[0],
|
|
2638
|
+
label: c.label || c[1],
|
|
2639
|
+
heuristicLabel: c.heuristicLabel || c[2],
|
|
2640
|
+
cohesion: c.cohesion || c[3],
|
|
2641
|
+
symbolCount: c.symbolCount || c[4],
|
|
2642
|
+
}));
|
|
2643
|
+
return { clusters: this.aggregateClusters(rawClusters).slice(0, limit) };
|
|
2644
|
+
}
|
|
2645
|
+
catch {
|
|
2646
|
+
return { clusters: [] };
|
|
2647
|
+
}
|
|
2648
|
+
}
|
|
2649
|
+
/** Query processes directly from graph for getProcessesResource */
|
|
2650
|
+
async queryProcesses(repoName, limit = 50) {
|
|
2651
|
+
const repo = await this.resolveRepo(repoName);
|
|
2652
|
+
await this.ensureInitialized(repo.id);
|
|
2653
|
+
try {
|
|
2654
|
+
const processes = await executeQuery(repo.id, `
|
|
2655
|
+
MATCH (p:Process)
|
|
2656
|
+
RETURN p.id AS id, p.label AS label, p.heuristicLabel AS heuristicLabel, p.processType AS processType, p.stepCount AS stepCount
|
|
2657
|
+
ORDER BY p.stepCount DESC
|
|
2658
|
+
LIMIT ${limit}
|
|
2659
|
+
`);
|
|
2660
|
+
return {
|
|
2661
|
+
processes: processes.map((p) => ({
|
|
2662
|
+
id: p.id || p[0],
|
|
2663
|
+
label: p.label || p[1],
|
|
2664
|
+
heuristicLabel: p.heuristicLabel || p[2],
|
|
2665
|
+
processType: p.processType || p[3],
|
|
2666
|
+
stepCount: p.stepCount || p[4],
|
|
2667
|
+
})),
|
|
2668
|
+
};
|
|
2669
|
+
}
|
|
2670
|
+
catch {
|
|
2671
|
+
return { processes: [] };
|
|
2672
|
+
}
|
|
2673
|
+
}
|
|
2674
|
+
/** Query cluster detail (members) for getClusterDetailResource */
|
|
2675
|
+
async queryClusterDetail(name, repoName) {
|
|
2676
|
+
const repo = await this.resolveRepo(repoName);
|
|
2677
|
+
await this.ensureInitialized(repo.id);
|
|
2678
|
+
const clusters = await executeParameterized(repo.id, `
|
|
2679
|
+
MATCH (c:Community)
|
|
2680
|
+
WHERE c.label = $clusterName OR c.heuristicLabel = $clusterName
|
|
2681
|
+
RETURN c.id AS id, c.label AS label, c.heuristicLabel AS heuristicLabel, c.cohesion AS cohesion, c.symbolCount AS symbolCount
|
|
2682
|
+
`, { clusterName: name });
|
|
2683
|
+
if (clusters.length === 0)
|
|
2684
|
+
return { error: `Cluster '${name}' not found` };
|
|
2685
|
+
const rawClusters = clusters.map((c) => ({
|
|
2686
|
+
id: c.id || c[0], label: c.label || c[1], heuristicLabel: c.heuristicLabel || c[2],
|
|
2687
|
+
cohesion: c.cohesion || c[3], symbolCount: c.symbolCount || c[4],
|
|
2688
|
+
}));
|
|
2689
|
+
let totalSymbols = 0, weightedCohesion = 0;
|
|
2690
|
+
for (const c of rawClusters) {
|
|
2691
|
+
const s = c.symbolCount || 0;
|
|
2692
|
+
totalSymbols += s;
|
|
2693
|
+
weightedCohesion += (c.cohesion || 0) * s;
|
|
2694
|
+
}
|
|
2695
|
+
const members = await executeParameterized(repo.id, `
|
|
2696
|
+
MATCH (n)-[:CodeRelation {type: 'MEMBER_OF'}]->(c:Community)
|
|
2697
|
+
WHERE c.label = $clusterName OR c.heuristicLabel = $clusterName
|
|
2698
|
+
RETURN DISTINCT n.name AS name, labels(n) AS type, n.filePath AS filePath
|
|
2699
|
+
LIMIT 30
|
|
2700
|
+
`, { clusterName: name });
|
|
2701
|
+
return {
|
|
2702
|
+
cluster: {
|
|
2703
|
+
id: rawClusters[0].id,
|
|
2704
|
+
label: rawClusters[0].heuristicLabel || rawClusters[0].label,
|
|
2705
|
+
heuristicLabel: rawClusters[0].heuristicLabel || rawClusters[0].label,
|
|
2706
|
+
cohesion: totalSymbols > 0 ? weightedCohesion / totalSymbols : 0,
|
|
2707
|
+
symbolCount: totalSymbols,
|
|
2708
|
+
subCommunities: rawClusters.length,
|
|
2709
|
+
},
|
|
2710
|
+
members: members.map((m) => ({
|
|
2711
|
+
name: m.name || m[0], type: m.type || m[1], filePath: m.filePath || m[2],
|
|
2712
|
+
})),
|
|
2713
|
+
};
|
|
2714
|
+
}
|
|
2715
|
+
/** Query process detail (steps) for getProcessDetailResource */
|
|
2716
|
+
async queryProcessDetail(name, repoName) {
|
|
2717
|
+
const repo = await this.resolveRepo(repoName);
|
|
2718
|
+
await this.ensureInitialized(repo.id);
|
|
2719
|
+
const processes = await executeParameterized(repo.id, `
|
|
2720
|
+
MATCH (p:Process)
|
|
2721
|
+
WHERE p.label = $processName OR p.heuristicLabel = $processName
|
|
2722
|
+
RETURN p.id AS id, p.label AS label, p.heuristicLabel AS heuristicLabel, p.processType AS processType, p.stepCount AS stepCount
|
|
2723
|
+
LIMIT 1
|
|
2724
|
+
`, { processName: name });
|
|
2725
|
+
if (processes.length === 0)
|
|
2726
|
+
return { error: `Process '${name}' not found` };
|
|
2727
|
+
const proc = processes[0];
|
|
2728
|
+
const procId = proc.id || proc[0];
|
|
2729
|
+
const steps = await executeParameterized(repo.id, `
|
|
2730
|
+
MATCH (n)-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p {id: $procId})
|
|
2731
|
+
RETURN n.name AS name, labels(n) AS type, n.filePath AS filePath, r.step AS step
|
|
2732
|
+
ORDER BY r.step
|
|
2733
|
+
`, { procId });
|
|
2734
|
+
return {
|
|
2735
|
+
process: {
|
|
2736
|
+
id: procId, label: proc.label || proc[1], heuristicLabel: proc.heuristicLabel || proc[2],
|
|
2737
|
+
processType: proc.processType || proc[3], stepCount: proc.stepCount || proc[4],
|
|
2738
|
+
},
|
|
2739
|
+
steps: steps.map((s) => ({
|
|
2740
|
+
step: s.step || s[3], name: s.name || s[0], type: s.type || s[1], filePath: s.filePath || s[2],
|
|
2741
|
+
})),
|
|
2742
|
+
};
|
|
2743
|
+
}
|
|
2744
|
+
async disconnect() {
|
|
2745
|
+
for (const watcher of this.watchers.values())
|
|
2746
|
+
watcher.stop();
|
|
2747
|
+
this.watchers.clear();
|
|
2748
|
+
stopTsgoService();
|
|
2749
|
+
await closeLbug(); // close all connections
|
|
2750
|
+
// Note: we intentionally do NOT call disposeEmbedder() here.
|
|
2751
|
+
// ONNX Runtime's native cleanup segfaults on macOS and some Linux configs,
|
|
2752
|
+
// and importing the embedder module on Node v24+ crashes if onnxruntime
|
|
2753
|
+
// was never loaded during the session. Since process.exit(0) follows
|
|
2754
|
+
// immediately after disconnect(), the OS reclaims everything. See #38, #89.
|
|
2755
|
+
this.repos.clear();
|
|
2756
|
+
this.contextCache.clear();
|
|
2757
|
+
this.initializedRepos.clear();
|
|
2758
|
+
}
|
|
2759
|
+
}
|