gitnexus 1.4.1 → 1.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +215 -194
- package/dist/cli/ai-context.d.ts +2 -1
- package/dist/cli/ai-context.js +117 -90
- package/dist/cli/analyze.d.ts +2 -0
- package/dist/cli/analyze.js +57 -30
- package/dist/cli/augment.js +1 -1
- package/dist/cli/eval-server.d.ts +1 -1
- package/dist/cli/eval-server.js +14 -6
- package/dist/cli/index.js +18 -25
- package/dist/cli/lazy-action.d.ts +6 -0
- package/dist/cli/lazy-action.js +18 -0
- package/dist/cli/mcp.js +1 -1
- package/dist/cli/setup.js +42 -32
- package/dist/cli/skill-gen.d.ts +26 -0
- package/dist/cli/skill-gen.js +549 -0
- package/dist/cli/status.js +13 -4
- package/dist/cli/tool.d.ts +3 -2
- package/dist/cli/tool.js +48 -13
- package/dist/cli/wiki.js +2 -2
- package/dist/config/ignore-service.d.ts +25 -0
- package/dist/config/ignore-service.js +76 -0
- package/dist/config/supported-languages.d.ts +1 -0
- package/dist/config/supported-languages.js +1 -1
- package/dist/core/augmentation/engine.js +99 -72
- package/dist/core/embeddings/embedder.d.ts +1 -1
- package/dist/core/embeddings/embedder.js +1 -1
- package/dist/core/embeddings/embedding-pipeline.d.ts +3 -3
- package/dist/core/embeddings/embedding-pipeline.js +74 -47
- package/dist/core/embeddings/types.d.ts +1 -1
- package/dist/core/graph/types.d.ts +5 -2
- package/dist/core/ingestion/ast-cache.js +3 -2
- package/dist/core/ingestion/call-processor.d.ts +5 -7
- package/dist/core/ingestion/call-processor.js +430 -283
- package/dist/core/ingestion/call-routing.d.ts +53 -0
- package/dist/core/ingestion/call-routing.js +108 -0
- package/dist/core/ingestion/cluster-enricher.js +16 -16
- package/dist/core/ingestion/constants.d.ts +16 -0
- package/dist/core/ingestion/constants.js +16 -0
- package/dist/core/ingestion/entry-point-scoring.d.ts +2 -1
- package/dist/core/ingestion/entry-point-scoring.js +94 -24
- package/dist/core/ingestion/export-detection.d.ts +18 -0
- package/dist/core/ingestion/export-detection.js +231 -0
- package/dist/core/ingestion/filesystem-walker.js +4 -3
- package/dist/core/ingestion/framework-detection.d.ts +5 -1
- package/dist/core/ingestion/framework-detection.js +48 -8
- package/dist/core/ingestion/heritage-processor.d.ts +13 -5
- package/dist/core/ingestion/heritage-processor.js +109 -55
- package/dist/core/ingestion/import-processor.d.ts +16 -20
- package/dist/core/ingestion/import-processor.js +202 -696
- package/dist/core/ingestion/language-config.d.ts +46 -0
- package/dist/core/ingestion/language-config.js +167 -0
- package/dist/core/ingestion/mro-processor.d.ts +45 -0
- package/dist/core/ingestion/mro-processor.js +369 -0
- package/dist/core/ingestion/named-binding-extraction.d.ts +61 -0
- package/dist/core/ingestion/named-binding-extraction.js +363 -0
- package/dist/core/ingestion/parsing-processor.d.ts +3 -11
- package/dist/core/ingestion/parsing-processor.js +85 -181
- package/dist/core/ingestion/pipeline.d.ts +5 -1
- package/dist/core/ingestion/pipeline.js +192 -116
- package/dist/core/ingestion/process-processor.js +2 -1
- package/dist/core/ingestion/resolution-context.d.ts +53 -0
- package/dist/core/ingestion/resolution-context.js +132 -0
- package/dist/core/ingestion/resolvers/csharp.d.ts +22 -0
- package/dist/core/ingestion/resolvers/csharp.js +109 -0
- package/dist/core/ingestion/resolvers/go.d.ts +19 -0
- package/dist/core/ingestion/resolvers/go.js +42 -0
- package/dist/core/ingestion/resolvers/index.d.ts +18 -0
- package/dist/core/ingestion/resolvers/index.js +13 -0
- package/dist/core/ingestion/resolvers/jvm.d.ts +23 -0
- package/dist/core/ingestion/resolvers/jvm.js +87 -0
- package/dist/core/ingestion/resolvers/php.d.ts +15 -0
- package/dist/core/ingestion/resolvers/php.js +35 -0
- package/dist/core/ingestion/resolvers/python.d.ts +19 -0
- package/dist/core/ingestion/resolvers/python.js +52 -0
- package/dist/core/ingestion/resolvers/ruby.d.ts +12 -0
- package/dist/core/ingestion/resolvers/ruby.js +15 -0
- package/dist/core/ingestion/resolvers/rust.d.ts +15 -0
- package/dist/core/ingestion/resolvers/rust.js +73 -0
- package/dist/core/ingestion/resolvers/standard.d.ts +28 -0
- package/dist/core/ingestion/resolvers/standard.js +123 -0
- package/dist/core/ingestion/resolvers/utils.d.ts +33 -0
- package/dist/core/ingestion/resolvers/utils.js +122 -0
- package/dist/core/ingestion/symbol-table.d.ts +21 -1
- package/dist/core/ingestion/symbol-table.js +40 -12
- package/dist/core/ingestion/tree-sitter-queries.d.ts +12 -11
- package/dist/core/ingestion/tree-sitter-queries.js +642 -485
- package/dist/core/ingestion/type-env.d.ts +49 -0
- package/dist/core/ingestion/type-env.js +611 -0
- package/dist/core/ingestion/type-extractors/c-cpp.d.ts +2 -0
- package/dist/core/ingestion/type-extractors/c-cpp.js +385 -0
- package/dist/core/ingestion/type-extractors/csharp.d.ts +2 -0
- package/dist/core/ingestion/type-extractors/csharp.js +383 -0
- package/dist/core/ingestion/type-extractors/go.d.ts +2 -0
- package/dist/core/ingestion/type-extractors/go.js +467 -0
- package/dist/core/ingestion/type-extractors/index.d.ts +22 -0
- package/dist/core/ingestion/type-extractors/index.js +31 -0
- package/dist/core/ingestion/type-extractors/jvm.d.ts +3 -0
- package/dist/core/ingestion/type-extractors/jvm.js +681 -0
- package/dist/core/ingestion/type-extractors/php.d.ts +2 -0
- package/dist/core/ingestion/type-extractors/php.js +549 -0
- package/dist/core/ingestion/type-extractors/python.d.ts +2 -0
- package/dist/core/ingestion/type-extractors/python.js +406 -0
- package/dist/core/ingestion/type-extractors/ruby.d.ts +2 -0
- package/dist/core/ingestion/type-extractors/ruby.js +389 -0
- package/dist/core/ingestion/type-extractors/rust.d.ts +2 -0
- package/dist/core/ingestion/type-extractors/rust.js +449 -0
- package/dist/core/ingestion/type-extractors/shared.d.ts +133 -0
- package/dist/core/ingestion/type-extractors/shared.js +703 -0
- package/dist/core/ingestion/type-extractors/swift.d.ts +2 -0
- package/dist/core/ingestion/type-extractors/swift.js +137 -0
- package/dist/core/ingestion/type-extractors/types.d.ts +127 -0
- package/dist/core/ingestion/type-extractors/types.js +1 -0
- package/dist/core/ingestion/type-extractors/typescript.d.ts +2 -0
- package/dist/core/ingestion/type-extractors/typescript.js +494 -0
- package/dist/core/ingestion/utils.d.ts +98 -0
- package/dist/core/ingestion/utils.js +1064 -9
- package/dist/core/ingestion/workers/parse-worker.d.ts +38 -4
- package/dist/core/ingestion/workers/parse-worker.js +251 -359
- package/dist/core/ingestion/workers/worker-pool.js +8 -0
- package/dist/core/{kuzu → lbug}/csv-generator.d.ts +1 -1
- package/dist/core/{kuzu → lbug}/csv-generator.js +20 -4
- package/dist/core/{kuzu/kuzu-adapter.d.ts → lbug/lbug-adapter.d.ts} +19 -19
- package/dist/core/{kuzu/kuzu-adapter.js → lbug/lbug-adapter.js} +82 -82
- package/dist/core/{kuzu → lbug}/schema.d.ts +4 -4
- package/dist/core/{kuzu → lbug}/schema.js +304 -289
- package/dist/core/search/bm25-index.d.ts +4 -4
- package/dist/core/search/bm25-index.js +17 -16
- package/dist/core/search/hybrid-search.d.ts +2 -2
- package/dist/core/search/hybrid-search.js +9 -9
- package/dist/core/tree-sitter/parser-loader.js +9 -2
- package/dist/core/wiki/generator.d.ts +4 -52
- package/dist/core/wiki/generator.js +53 -552
- package/dist/core/wiki/graph-queries.d.ts +4 -46
- package/dist/core/wiki/graph-queries.js +103 -282
- package/dist/core/wiki/html-viewer.js +192 -192
- package/dist/core/wiki/llm-client.js +11 -73
- package/dist/core/wiki/prompts.d.ts +8 -52
- package/dist/core/wiki/prompts.js +86 -200
- package/dist/mcp/compatible-stdio-transport.d.ts +25 -0
- package/dist/mcp/compatible-stdio-transport.js +200 -0
- package/dist/mcp/core/{kuzu-adapter.d.ts → lbug-adapter.d.ts} +7 -9
- package/dist/mcp/core/{kuzu-adapter.js → lbug-adapter.js} +77 -79
- package/dist/mcp/local/local-backend.d.ts +7 -6
- package/dist/mcp/local/local-backend.js +176 -147
- package/dist/mcp/resources.js +42 -42
- package/dist/mcp/server.js +18 -19
- package/dist/mcp/tools.js +103 -104
- package/dist/server/api.js +12 -12
- package/dist/server/mcp-http.d.ts +1 -1
- package/dist/server/mcp-http.js +1 -1
- package/dist/storage/repo-manager.d.ts +20 -2
- package/dist/storage/repo-manager.js +55 -1
- package/dist/types/pipeline.d.ts +1 -1
- package/hooks/claude/gitnexus-hook.cjs +238 -155
- package/hooks/claude/pre-tool-use.sh +79 -79
- package/hooks/claude/session-start.sh +42 -42
- package/package.json +99 -96
- package/scripts/patch-tree-sitter-swift.cjs +74 -74
- package/skills/gitnexus-cli.md +82 -82
- package/skills/gitnexus-debugging.md +89 -89
- package/skills/gitnexus-exploring.md +78 -78
- package/skills/gitnexus-guide.md +64 -64
- package/skills/gitnexus-impact-analysis.md +97 -97
- package/skills/gitnexus-pr-review.md +163 -163
- package/skills/gitnexus-refactoring.md +121 -121
- package/vendor/leiden/index.cjs +355 -355
- package/vendor/leiden/utils.cjs +392 -392
- package/dist/core/wiki/diagrams.d.ts +0 -27
- package/dist/core/wiki/diagrams.js +0 -163
package/dist/cli/wiki.js
CHANGED
|
@@ -86,7 +86,7 @@ export const wikiCommand = async (inputPath, options) => {
|
|
|
86
86
|
return;
|
|
87
87
|
}
|
|
88
88
|
// ── Check for existing index ────────────────────────────────────────
|
|
89
|
-
const { storagePath,
|
|
89
|
+
const { storagePath, lbugPath } = getStoragePaths(repoPath);
|
|
90
90
|
const meta = await loadMeta(storagePath);
|
|
91
91
|
if (!meta) {
|
|
92
92
|
console.log(' Error: No GitNexus index found.');
|
|
@@ -217,7 +217,7 @@ export const wikiCommand = async (inputPath, options) => {
|
|
|
217
217
|
baseUrl: options?.baseUrl,
|
|
218
218
|
concurrency: options?.concurrency ? parseInt(options.concurrency, 10) : undefined,
|
|
219
219
|
};
|
|
220
|
-
const generator = new WikiGenerator(repoPath, storagePath,
|
|
220
|
+
const generator = new WikiGenerator(repoPath, storagePath, lbugPath, llmConfig, wikiOptions, (phase, percent, detail) => {
|
|
221
221
|
const label = detail || phase;
|
|
222
222
|
if (label !== lastPhase) {
|
|
223
223
|
lastPhase = label;
|
|
@@ -1 +1,26 @@
|
|
|
1
|
+
import { type Ignore } from 'ignore';
|
|
2
|
+
import type { Path } from 'path-scurry';
|
|
1
3
|
export declare const shouldIgnorePath: (filePath: string) => boolean;
|
|
4
|
+
/** Check if a directory name is in the hardcoded ignore list */
|
|
5
|
+
export declare const isHardcodedIgnoredDirectory: (name: string) => boolean;
|
|
6
|
+
/**
|
|
7
|
+
* Load .gitignore and .gitnexusignore rules from the repo root.
|
|
8
|
+
* Returns an `ignore` instance with all patterns, or null if no files found.
|
|
9
|
+
*/
|
|
10
|
+
export interface IgnoreOptions {
|
|
11
|
+
/** Skip .gitignore parsing, only read .gitnexusignore. Defaults to GITNEXUS_NO_GITIGNORE env var. */
|
|
12
|
+
noGitignore?: boolean;
|
|
13
|
+
}
|
|
14
|
+
export declare const loadIgnoreRules: (repoPath: string, options?: IgnoreOptions) => Promise<Ignore | null>;
|
|
15
|
+
/**
|
|
16
|
+
* Create a glob-compatible ignore filter combining:
|
|
17
|
+
* - .gitignore / .gitnexusignore patterns (via `ignore` package)
|
|
18
|
+
* - Hardcoded DEFAULT_IGNORE_LIST, IGNORED_EXTENSIONS, IGNORED_FILES
|
|
19
|
+
*
|
|
20
|
+
* Returns an IgnoreLike object for glob's `ignore` option,
|
|
21
|
+
* enabling directory-level pruning during traversal.
|
|
22
|
+
*/
|
|
23
|
+
export declare const createIgnoreFilter: (repoPath: string, options?: IgnoreOptions) => Promise<{
|
|
24
|
+
ignored(p: Path): boolean;
|
|
25
|
+
childrenIgnored(p: Path): boolean;
|
|
26
|
+
}>;
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import ignore from 'ignore';
|
|
2
|
+
import fs from 'fs/promises';
|
|
3
|
+
import nodePath from 'path';
|
|
1
4
|
const DEFAULT_IGNORE_LIST = new Set([
|
|
2
5
|
// Version Control
|
|
3
6
|
'.git',
|
|
@@ -161,6 +164,10 @@ const IGNORED_FILES = new Set([
|
|
|
161
164
|
'.env.test',
|
|
162
165
|
'.env.example',
|
|
163
166
|
]);
|
|
167
|
+
// NOTE: Negation patterns in .gitnexusignore (e.g. `!vendor/`) cannot override
|
|
168
|
+
// entries in DEFAULT_IGNORE_LIST — this is intentional. The hardcoded list protects
|
|
169
|
+
// against indexing directories that are almost never source code (node_modules, .git, etc.).
|
|
170
|
+
// Users who need to include such directories should remove them from the hardcoded list.
|
|
164
171
|
export const shouldIgnorePath = (filePath) => {
|
|
165
172
|
const normalizedPath = filePath.replace(/\\/g, '/');
|
|
166
173
|
const parts = normalizedPath.split('/');
|
|
@@ -206,3 +213,72 @@ export const shouldIgnorePath = (filePath) => {
|
|
|
206
213
|
}
|
|
207
214
|
return false;
|
|
208
215
|
};
|
|
216
|
+
/** Check if a directory name is in the hardcoded ignore list */
|
|
217
|
+
export const isHardcodedIgnoredDirectory = (name) => {
|
|
218
|
+
return DEFAULT_IGNORE_LIST.has(name);
|
|
219
|
+
};
|
|
220
|
+
export const loadIgnoreRules = async (repoPath, options) => {
|
|
221
|
+
const ig = ignore();
|
|
222
|
+
let hasRules = false;
|
|
223
|
+
// Allow users to bypass .gitignore parsing (e.g. when .gitignore accidentally excludes source files)
|
|
224
|
+
const skipGitignore = options?.noGitignore ?? !!process.env.GITNEXUS_NO_GITIGNORE;
|
|
225
|
+
const filenames = skipGitignore
|
|
226
|
+
? ['.gitnexusignore']
|
|
227
|
+
: ['.gitignore', '.gitnexusignore'];
|
|
228
|
+
for (const filename of filenames) {
|
|
229
|
+
try {
|
|
230
|
+
const content = await fs.readFile(nodePath.join(repoPath, filename), 'utf-8');
|
|
231
|
+
ig.add(content);
|
|
232
|
+
hasRules = true;
|
|
233
|
+
}
|
|
234
|
+
catch (err) {
|
|
235
|
+
const code = err.code;
|
|
236
|
+
if (code !== 'ENOENT') {
|
|
237
|
+
console.warn(` Warning: could not read ${filename}: ${err.message}`);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
return hasRules ? ig : null;
|
|
242
|
+
};
|
|
243
|
+
/**
|
|
244
|
+
* Create a glob-compatible ignore filter combining:
|
|
245
|
+
* - .gitignore / .gitnexusignore patterns (via `ignore` package)
|
|
246
|
+
* - Hardcoded DEFAULT_IGNORE_LIST, IGNORED_EXTENSIONS, IGNORED_FILES
|
|
247
|
+
*
|
|
248
|
+
* Returns an IgnoreLike object for glob's `ignore` option,
|
|
249
|
+
* enabling directory-level pruning during traversal.
|
|
250
|
+
*/
|
|
251
|
+
export const createIgnoreFilter = async (repoPath, options) => {
|
|
252
|
+
const ig = await loadIgnoreRules(repoPath, options);
|
|
253
|
+
return {
|
|
254
|
+
ignored(p) {
|
|
255
|
+
// path-scurry's Path.relative() returns POSIX paths on all platforms,
|
|
256
|
+
// which is what the `ignore` package expects. No explicit normalization needed.
|
|
257
|
+
const rel = p.relative();
|
|
258
|
+
if (!rel)
|
|
259
|
+
return false;
|
|
260
|
+
// Check .gitignore / .gitnexusignore patterns
|
|
261
|
+
if (ig && ig.ignores(rel))
|
|
262
|
+
return true;
|
|
263
|
+
// Fall back to hardcoded rules
|
|
264
|
+
return shouldIgnorePath(rel);
|
|
265
|
+
},
|
|
266
|
+
childrenIgnored(p) {
|
|
267
|
+
// Fast path: check directory name against hardcoded list.
|
|
268
|
+
// Note: dot-directories (.git, .vscode, etc.) are primarily excluded by
|
|
269
|
+
// glob's `dot: false` option in filesystem-walker.ts. This check is
|
|
270
|
+
// defense-in-depth — do not remove `dot: false` assuming this covers it.
|
|
271
|
+
if (DEFAULT_IGNORE_LIST.has(p.name))
|
|
272
|
+
return true;
|
|
273
|
+
// Check against .gitignore / .gitnexusignore patterns.
|
|
274
|
+
// Test both bare path and path with trailing slash to handle
|
|
275
|
+
// bare-name patterns (e.g. `local`) and dir-only patterns (e.g. `local/`).
|
|
276
|
+
if (ig) {
|
|
277
|
+
const rel = p.relative();
|
|
278
|
+
if (rel && (ig.ignores(rel) || ig.ignores(rel + '/')))
|
|
279
|
+
return true;
|
|
280
|
+
}
|
|
281
|
+
return false;
|
|
282
|
+
},
|
|
283
|
+
};
|
|
284
|
+
};
|
|
@@ -8,9 +8,9 @@ export var SupportedLanguages;
|
|
|
8
8
|
SupportedLanguages["CPlusPlus"] = "cpp";
|
|
9
9
|
SupportedLanguages["CSharp"] = "csharp";
|
|
10
10
|
SupportedLanguages["Go"] = "go";
|
|
11
|
+
SupportedLanguages["Ruby"] = "ruby";
|
|
11
12
|
SupportedLanguages["Rust"] = "rust";
|
|
12
13
|
SupportedLanguages["PHP"] = "php";
|
|
13
14
|
SupportedLanguages["Kotlin"] = "kotlin";
|
|
14
|
-
// Ruby = 'ruby',
|
|
15
15
|
SupportedLanguages["Swift"] = "swift";
|
|
16
16
|
})(SupportedLanguages || (SupportedLanguages = {}));
|
|
@@ -56,7 +56,7 @@ async function findRepoForCwd(cwd) {
|
|
|
56
56
|
return {
|
|
57
57
|
name: bestMatch.name,
|
|
58
58
|
storagePath: bestMatch.storagePath,
|
|
59
|
-
|
|
59
|
+
lbugPath: path.join(bestMatch.storagePath, 'lbug'),
|
|
60
60
|
};
|
|
61
61
|
}
|
|
62
62
|
catch {
|
|
@@ -81,16 +81,16 @@ export async function augment(pattern, cwd) {
|
|
|
81
81
|
const repo = await findRepoForCwd(workDir);
|
|
82
82
|
if (!repo)
|
|
83
83
|
return '';
|
|
84
|
-
// Lazy-load
|
|
85
|
-
const {
|
|
86
|
-
const {
|
|
84
|
+
// Lazy-load lbug adapter (skip unnecessary init)
|
|
85
|
+
const { initLbug, executeQuery, isLbugReady } = await import('../../mcp/core/lbug-adapter.js');
|
|
86
|
+
const { searchFTSFromLbug } = await import('../search/bm25-index.js');
|
|
87
87
|
const repoId = repo.name.toLowerCase();
|
|
88
|
-
// Init
|
|
89
|
-
if (!
|
|
90
|
-
await
|
|
88
|
+
// Init LadybugDB if not already
|
|
89
|
+
if (!isLbugReady(repoId)) {
|
|
90
|
+
await initLbug(repoId, repo.lbugPath);
|
|
91
91
|
}
|
|
92
92
|
// Step 1: BM25 search (fast, no embeddings)
|
|
93
|
-
const bm25Results = await
|
|
93
|
+
const bm25Results = await searchFTSFromLbug(pattern, 10, repoId);
|
|
94
94
|
if (bm25Results.length === 0)
|
|
95
95
|
return '';
|
|
96
96
|
// Step 2: Map BM25 file results to symbols
|
|
@@ -98,11 +98,11 @@ export async function augment(pattern, cwd) {
|
|
|
98
98
|
for (const result of bm25Results.slice(0, 5)) {
|
|
99
99
|
const escaped = result.filePath.replace(/'/g, "''");
|
|
100
100
|
try {
|
|
101
|
-
const symbols = await executeQuery(repoId, `
|
|
102
|
-
MATCH (n) WHERE n.filePath = '${escaped}'
|
|
103
|
-
AND n.name CONTAINS '${pattern.replace(/'/g, "''").split(/\s+/)[0]}'
|
|
104
|
-
RETURN n.id AS id, n.name AS name, labels(n)[0] AS type, n.filePath AS filePath
|
|
105
|
-
LIMIT 3
|
|
101
|
+
const symbols = await executeQuery(repoId, `
|
|
102
|
+
MATCH (n) WHERE n.filePath = '${escaped}'
|
|
103
|
+
AND n.name CONTAINS '${pattern.replace(/'/g, "''").split(/\s+/)[0]}'
|
|
104
|
+
RETURN n.id AS id, n.name AS name, labels(n)[0] AS type, n.filePath AS filePath
|
|
105
|
+
LIMIT 3
|
|
106
106
|
`);
|
|
107
107
|
for (const sym of symbols) {
|
|
108
108
|
symbolMatches.push({
|
|
@@ -118,72 +118,99 @@ export async function augment(pattern, cwd) {
|
|
|
118
118
|
}
|
|
119
119
|
if (symbolMatches.length === 0)
|
|
120
120
|
return '';
|
|
121
|
-
// Step 3:
|
|
122
|
-
//
|
|
123
|
-
const
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
const rows = await executeQuery(repoId, `
|
|
145
|
-
MATCH (n {id: '${escaped}'})-[:CodeRelation {type: 'CALLS'}]->(callee)
|
|
146
|
-
RETURN callee.name AS name
|
|
147
|
-
LIMIT 3
|
|
148
|
-
`);
|
|
149
|
-
callees = rows.map((r) => r.name || r[0]).filter(Boolean);
|
|
121
|
+
// Step 3: Batch-fetch callers/callees/processes/cohesion for top matches
|
|
122
|
+
// Uses batched WHERE n.id IN [...] queries instead of per-symbol queries
|
|
123
|
+
const uniqueSymbols = symbolMatches.slice(0, 5).filter((sym, i, arr) => arr.findIndex(s => s.nodeId === sym.nodeId) === i);
|
|
124
|
+
if (uniqueSymbols.length === 0)
|
|
125
|
+
return '';
|
|
126
|
+
const idList = uniqueSymbols.map(s => `'${s.nodeId.replace(/'/g, "''")}'`).join(', ');
|
|
127
|
+
// Batch fetch callers
|
|
128
|
+
const callersMap = new Map();
|
|
129
|
+
try {
|
|
130
|
+
const rows = await executeQuery(repoId, `
|
|
131
|
+
MATCH (caller)-[:CodeRelation {type: 'CALLS'}]->(n)
|
|
132
|
+
WHERE n.id IN [${idList}]
|
|
133
|
+
RETURN n.id AS targetId, caller.name AS name
|
|
134
|
+
LIMIT 15
|
|
135
|
+
`);
|
|
136
|
+
for (const r of rows) {
|
|
137
|
+
const tid = r.targetId || r[0];
|
|
138
|
+
const name = r.name || r[1];
|
|
139
|
+
if (tid && name) {
|
|
140
|
+
if (!callersMap.has(tid))
|
|
141
|
+
callersMap.set(tid, []);
|
|
142
|
+
callersMap.get(tid).push(name);
|
|
143
|
+
}
|
|
150
144
|
}
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
145
|
+
}
|
|
146
|
+
catch { /* skip */ }
|
|
147
|
+
// Batch fetch callees
|
|
148
|
+
const calleesMap = new Map();
|
|
149
|
+
try {
|
|
150
|
+
const rows = await executeQuery(repoId, `
|
|
151
|
+
MATCH (n)-[:CodeRelation {type: 'CALLS'}]->(callee)
|
|
152
|
+
WHERE n.id IN [${idList}]
|
|
153
|
+
RETURN n.id AS sourceId, callee.name AS name
|
|
154
|
+
LIMIT 15
|
|
155
|
+
`);
|
|
156
|
+
for (const r of rows) {
|
|
157
|
+
const sid = r.sourceId || r[0];
|
|
158
|
+
const name = r.name || r[1];
|
|
159
|
+
if (sid && name) {
|
|
160
|
+
if (!calleesMap.has(sid))
|
|
161
|
+
calleesMap.set(sid, []);
|
|
162
|
+
calleesMap.get(sid).push(name);
|
|
163
|
+
}
|
|
165
164
|
}
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
165
|
+
}
|
|
166
|
+
catch { /* skip */ }
|
|
167
|
+
// Batch fetch processes
|
|
168
|
+
const processesMap = new Map();
|
|
169
|
+
try {
|
|
170
|
+
const rows = await executeQuery(repoId, `
|
|
171
|
+
MATCH (n)-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p:Process)
|
|
172
|
+
WHERE n.id IN [${idList}]
|
|
173
|
+
RETURN n.id AS nodeId, p.heuristicLabel AS label, r.step AS step, p.stepCount AS stepCount
|
|
174
|
+
`);
|
|
175
|
+
for (const r of rows) {
|
|
176
|
+
const nid = r.nodeId || r[0];
|
|
177
|
+
const label = r.label || r[1];
|
|
178
|
+
const step = r.step || r[2];
|
|
179
|
+
const stepCount = r.stepCount || r[3];
|
|
180
|
+
if (nid && label) {
|
|
181
|
+
if (!processesMap.has(nid))
|
|
182
|
+
processesMap.set(nid, []);
|
|
183
|
+
processesMap.get(nid).push(`${label} (step ${step}/${stepCount})`);
|
|
177
184
|
}
|
|
178
185
|
}
|
|
179
|
-
|
|
186
|
+
}
|
|
187
|
+
catch { /* skip */ }
|
|
188
|
+
// Batch fetch cohesion
|
|
189
|
+
const cohesionMap = new Map();
|
|
190
|
+
try {
|
|
191
|
+
const rows = await executeQuery(repoId, `
|
|
192
|
+
MATCH (n)-[:CodeRelation {type: 'MEMBER_OF'}]->(c:Community)
|
|
193
|
+
WHERE n.id IN [${idList}]
|
|
194
|
+
RETURN n.id AS nodeId, c.cohesion AS cohesion
|
|
195
|
+
`);
|
|
196
|
+
for (const r of rows) {
|
|
197
|
+
const nid = r.nodeId || r[0];
|
|
198
|
+
const coh = r.cohesion ?? r[1] ?? 0;
|
|
199
|
+
if (nid)
|
|
200
|
+
cohesionMap.set(nid, coh);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
catch { /* skip */ }
|
|
204
|
+
// Assemble enriched results
|
|
205
|
+
const enriched = [];
|
|
206
|
+
for (const sym of uniqueSymbols) {
|
|
180
207
|
enriched.push({
|
|
181
208
|
name: sym.name,
|
|
182
209
|
filePath: sym.filePath,
|
|
183
|
-
callers,
|
|
184
|
-
callees,
|
|
185
|
-
processes,
|
|
186
|
-
cohesion,
|
|
210
|
+
callers: (callersMap.get(sym.nodeId) || []).slice(0, 3),
|
|
211
|
+
callees: (calleesMap.get(sym.nodeId) || []).slice(0, 3),
|
|
212
|
+
processes: processesMap.get(sym.nodeId) || [],
|
|
213
|
+
cohesion: cohesionMap.get(sym.nodeId) || 0,
|
|
187
214
|
});
|
|
188
215
|
}
|
|
189
216
|
if (enriched.length === 0)
|
|
@@ -50,7 +50,7 @@ export declare const embedText: (text: string) => Promise<Float32Array>;
|
|
|
50
50
|
*/
|
|
51
51
|
export declare const embedBatch: (texts: string[]) => Promise<Float32Array[]>;
|
|
52
52
|
/**
|
|
53
|
-
* Convert Float32Array to regular number array (for
|
|
53
|
+
* Convert Float32Array to regular number array (for LadybugDB storage)
|
|
54
54
|
*/
|
|
55
55
|
export declare const embeddingToArray: (embedding: Float32Array) => number[];
|
|
56
56
|
/**
|
|
@@ -225,7 +225,7 @@ export const embedBatch = async (texts) => {
|
|
|
225
225
|
return embeddings;
|
|
226
226
|
};
|
|
227
227
|
/**
|
|
228
|
-
* Convert Float32Array to regular number array (for
|
|
228
|
+
* Convert Float32Array to regular number array (for LadybugDB storage)
|
|
229
229
|
*/
|
|
230
230
|
export const embeddingToArray = (embedding) => {
|
|
231
231
|
return Array.from(embedding);
|
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
* Embedding Pipeline Module
|
|
3
3
|
*
|
|
4
4
|
* Orchestrates the background embedding process:
|
|
5
|
-
* 1. Query embeddable nodes from
|
|
5
|
+
* 1. Query embeddable nodes from LadybugDB
|
|
6
6
|
* 2. Generate text representations
|
|
7
7
|
* 3. Batch embed using transformers.js
|
|
8
|
-
* 4. Update
|
|
8
|
+
* 4. Update LadybugDB with embeddings
|
|
9
9
|
* 5. Create vector index for semantic search
|
|
10
10
|
*/
|
|
11
11
|
import { type EmbeddingProgress, type EmbeddingConfig, type SemanticSearchResult } from './types.js';
|
|
@@ -16,7 +16,7 @@ export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
|
|
|
16
16
|
/**
|
|
17
17
|
* Run the embedding pipeline
|
|
18
18
|
*
|
|
19
|
-
* @param executeQuery - Function to execute Cypher queries against
|
|
19
|
+
* @param executeQuery - Function to execute Cypher queries against LadybugDB
|
|
20
20
|
* @param executeWithReusedStatement - Function to execute with reused prepared statement
|
|
21
21
|
* @param onProgress - Callback for progress updates
|
|
22
22
|
* @param config - Optional configuration override
|
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
* Embedding Pipeline Module
|
|
3
3
|
*
|
|
4
4
|
* Orchestrates the background embedding process:
|
|
5
|
-
* 1. Query embeddable nodes from
|
|
5
|
+
* 1. Query embeddable nodes from LadybugDB
|
|
6
6
|
* 2. Generate text representations
|
|
7
7
|
* 3. Batch embed using transformers.js
|
|
8
|
-
* 4. Update
|
|
8
|
+
* 4. Update LadybugDB with embeddings
|
|
9
9
|
* 5. Create vector index for semantic search
|
|
10
10
|
*/
|
|
11
11
|
import { initEmbedder, embedBatch, embedText, embeddingToArray, isEmbedderReady } from './embedder.js';
|
|
@@ -13,7 +13,7 @@ import { generateBatchEmbeddingTexts } from './text-generator.js';
|
|
|
13
13
|
import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, } from './types.js';
|
|
14
14
|
const isDev = process.env.NODE_ENV === 'development';
|
|
15
15
|
/**
|
|
16
|
-
* Query all embeddable nodes from
|
|
16
|
+
* Query all embeddable nodes from LadybugDB
|
|
17
17
|
* Uses table-specific queries (File has different schema than code elements)
|
|
18
18
|
*/
|
|
19
19
|
const queryEmbeddableNodes = async (executeQuery) => {
|
|
@@ -24,19 +24,19 @@ const queryEmbeddableNodes = async (executeQuery) => {
|
|
|
24
24
|
let query;
|
|
25
25
|
if (label === 'File') {
|
|
26
26
|
// File nodes don't have startLine/endLine
|
|
27
|
-
query = `
|
|
28
|
-
MATCH (n:File)
|
|
29
|
-
RETURN n.id AS id, n.name AS name, 'File' AS label,
|
|
30
|
-
n.filePath AS filePath, n.content AS content
|
|
27
|
+
query = `
|
|
28
|
+
MATCH (n:File)
|
|
29
|
+
RETURN n.id AS id, n.name AS name, 'File' AS label,
|
|
30
|
+
n.filePath AS filePath, n.content AS content
|
|
31
31
|
`;
|
|
32
32
|
}
|
|
33
33
|
else {
|
|
34
34
|
// Code elements have startLine/endLine
|
|
35
|
-
query = `
|
|
36
|
-
MATCH (n:${label})
|
|
37
|
-
RETURN n.id AS id, n.name AS name, '${label}' AS label,
|
|
38
|
-
n.filePath AS filePath, n.content AS content,
|
|
39
|
-
n.startLine AS startLine, n.endLine AS endLine
|
|
35
|
+
query = `
|
|
36
|
+
MATCH (n:${label})
|
|
37
|
+
RETURN n.id AS id, n.name AS name, '${label}' AS label,
|
|
38
|
+
n.filePath AS filePath, n.content AS content,
|
|
39
|
+
n.startLine AS startLine, n.endLine AS endLine
|
|
40
40
|
`;
|
|
41
41
|
}
|
|
42
42
|
const rows = await executeQuery(query);
|
|
@@ -76,9 +76,22 @@ const batchInsertEmbeddings = async (executeWithReusedStatement, updates) => {
|
|
|
76
76
|
* Create the vector index for semantic search
|
|
77
77
|
* Now indexes the separate CodeEmbedding table
|
|
78
78
|
*/
|
|
79
|
+
let vectorExtensionLoaded = false;
|
|
79
80
|
const createVectorIndex = async (executeQuery) => {
|
|
80
|
-
|
|
81
|
-
|
|
81
|
+
// LadybugDB v0.15+ requires explicit VECTOR extension loading (once per session)
|
|
82
|
+
if (!vectorExtensionLoaded) {
|
|
83
|
+
try {
|
|
84
|
+
await executeQuery('INSTALL VECTOR');
|
|
85
|
+
await executeQuery('LOAD EXTENSION VECTOR');
|
|
86
|
+
vectorExtensionLoaded = true;
|
|
87
|
+
}
|
|
88
|
+
catch {
|
|
89
|
+
// Extension may already be loaded — CREATE_VECTOR_INDEX will fail clearly if not
|
|
90
|
+
vectorExtensionLoaded = true;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
const cypher = `
|
|
94
|
+
CALL CREATE_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx', 'embedding', metric := 'cosine')
|
|
82
95
|
`;
|
|
83
96
|
try {
|
|
84
97
|
await executeQuery(cypher);
|
|
@@ -93,7 +106,7 @@ const createVectorIndex = async (executeQuery) => {
|
|
|
93
106
|
/**
|
|
94
107
|
* Run the embedding pipeline
|
|
95
108
|
*
|
|
96
|
-
* @param executeQuery - Function to execute Cypher queries against
|
|
109
|
+
* @param executeQuery - Function to execute Cypher queries against LadybugDB
|
|
97
110
|
* @param executeWithReusedStatement - Function to execute with reused prepared statement
|
|
98
111
|
* @param onProgress - Callback for progress updates
|
|
99
112
|
* @param config - Optional configuration override
|
|
@@ -167,7 +180,7 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
|
|
|
167
180
|
const texts = generateBatchEmbeddingTexts(batch, finalConfig);
|
|
168
181
|
// Embed the batch
|
|
169
182
|
const embeddings = await embedBatch(texts);
|
|
170
|
-
// Update
|
|
183
|
+
// Update LadybugDB with embeddings
|
|
171
184
|
const updates = batch.map((node, i) => ({
|
|
172
185
|
id: node.id,
|
|
173
186
|
embedding: embeddingToArray(embeddings[i]),
|
|
@@ -240,62 +253,76 @@ export const semanticSearch = async (executeQuery, query, k = 10, maxDistance =
|
|
|
240
253
|
const queryVec = embeddingToArray(queryEmbedding);
|
|
241
254
|
const queryVecStr = `[${queryVec.join(',')}]`;
|
|
242
255
|
// Query the vector index on CodeEmbedding to get nodeIds and distances
|
|
243
|
-
const vectorQuery = `
|
|
244
|
-
CALL QUERY_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx',
|
|
245
|
-
CAST(${queryVecStr} AS FLOAT[384]), ${k})
|
|
246
|
-
YIELD node AS emb, distance
|
|
247
|
-
WITH emb, distance
|
|
248
|
-
WHERE distance < ${maxDistance}
|
|
249
|
-
RETURN emb.nodeId AS nodeId, distance
|
|
250
|
-
ORDER BY distance
|
|
256
|
+
const vectorQuery = `
|
|
257
|
+
CALL QUERY_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx',
|
|
258
|
+
CAST(${queryVecStr} AS FLOAT[384]), ${k})
|
|
259
|
+
YIELD node AS emb, distance
|
|
260
|
+
WITH emb, distance
|
|
261
|
+
WHERE distance < ${maxDistance}
|
|
262
|
+
RETURN emb.nodeId AS nodeId, distance
|
|
263
|
+
ORDER BY distance
|
|
251
264
|
`;
|
|
252
265
|
const embResults = await executeQuery(vectorQuery);
|
|
253
266
|
if (embResults.length === 0) {
|
|
254
267
|
return [];
|
|
255
268
|
}
|
|
256
|
-
//
|
|
257
|
-
const
|
|
269
|
+
// Group results by label for batched metadata queries
|
|
270
|
+
const byLabel = new Map();
|
|
258
271
|
for (const embRow of embResults) {
|
|
259
272
|
const nodeId = embRow.nodeId ?? embRow[0];
|
|
260
273
|
const distance = embRow.distance ?? embRow[1];
|
|
261
|
-
// Extract label from node ID (format: Label:path:name)
|
|
262
274
|
const labelEndIdx = nodeId.indexOf(':');
|
|
263
275
|
const label = labelEndIdx > 0 ? nodeId.substring(0, labelEndIdx) : 'Unknown';
|
|
264
|
-
|
|
265
|
-
|
|
276
|
+
if (!byLabel.has(label))
|
|
277
|
+
byLabel.set(label, []);
|
|
278
|
+
byLabel.get(label).push({ nodeId, distance });
|
|
279
|
+
}
|
|
280
|
+
// Batch-fetch metadata per label
|
|
281
|
+
const results = [];
|
|
282
|
+
for (const [label, items] of byLabel) {
|
|
283
|
+
const idList = items.map(i => `'${i.nodeId.replace(/'/g, "''")}'`).join(', ');
|
|
266
284
|
try {
|
|
267
285
|
let nodeQuery;
|
|
268
286
|
if (label === 'File') {
|
|
269
|
-
nodeQuery = `
|
|
270
|
-
MATCH (n:File
|
|
271
|
-
RETURN n.name AS name, n.filePath AS filePath
|
|
287
|
+
nodeQuery = `
|
|
288
|
+
MATCH (n:File) WHERE n.id IN [${idList}]
|
|
289
|
+
RETURN n.id AS id, n.name AS name, n.filePath AS filePath
|
|
272
290
|
`;
|
|
273
291
|
}
|
|
274
292
|
else {
|
|
275
|
-
nodeQuery = `
|
|
276
|
-
MATCH (n:${label}
|
|
277
|
-
RETURN n.name AS name, n.filePath AS filePath,
|
|
278
|
-
n.startLine AS startLine, n.endLine AS endLine
|
|
293
|
+
nodeQuery = `
|
|
294
|
+
MATCH (n:${label}) WHERE n.id IN [${idList}]
|
|
295
|
+
RETURN n.id AS id, n.name AS name, n.filePath AS filePath,
|
|
296
|
+
n.startLine AS startLine, n.endLine AS endLine
|
|
279
297
|
`;
|
|
280
298
|
}
|
|
281
299
|
const nodeRows = await executeQuery(nodeQuery);
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
300
|
+
const rowMap = new Map();
|
|
301
|
+
for (const row of nodeRows) {
|
|
302
|
+
const id = row.id ?? row[0];
|
|
303
|
+
rowMap.set(id, row);
|
|
304
|
+
}
|
|
305
|
+
for (const item of items) {
|
|
306
|
+
const nodeRow = rowMap.get(item.nodeId);
|
|
307
|
+
if (nodeRow) {
|
|
308
|
+
results.push({
|
|
309
|
+
nodeId: item.nodeId,
|
|
310
|
+
name: nodeRow.name ?? nodeRow[1] ?? '',
|
|
311
|
+
label,
|
|
312
|
+
filePath: nodeRow.filePath ?? nodeRow[2] ?? '',
|
|
313
|
+
distance: item.distance,
|
|
314
|
+
startLine: label !== 'File' ? (nodeRow.startLine ?? nodeRow[3]) : undefined,
|
|
315
|
+
endLine: label !== 'File' ? (nodeRow.endLine ?? nodeRow[4]) : undefined,
|
|
316
|
+
});
|
|
317
|
+
}
|
|
293
318
|
}
|
|
294
319
|
}
|
|
295
320
|
catch {
|
|
296
321
|
// Table might not exist, skip
|
|
297
322
|
}
|
|
298
323
|
}
|
|
324
|
+
// Re-sort by distance since batch queries may have mixed order
|
|
325
|
+
results.sort((a, b) => a.distance - b.distance);
|
|
299
326
|
return results;
|
|
300
327
|
};
|
|
301
328
|
/**
|
|
@@ -64,7 +64,7 @@ export interface SemanticSearchResult {
|
|
|
64
64
|
endLine?: number;
|
|
65
65
|
}
|
|
66
66
|
/**
|
|
67
|
-
* Node data for embedding (minimal structure from
|
|
67
|
+
* Node data for embedding (minimal structure from LadybugDB query)
|
|
68
68
|
*/
|
|
69
69
|
export interface EmbeddableNode {
|
|
70
70
|
id: string;
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
export type NodeLabel = 'Project' | 'Package' | 'Module' | 'Folder' | 'File' | 'Class' | 'Function' | 'Method' | 'Variable' | 'Interface' | 'Enum' | 'Decorator' | 'Import' | 'Type' | 'CodeElement' | 'Community' | 'Process' | 'Struct' | 'Macro' | 'Typedef' | 'Union' | 'Namespace' | 'Trait' | 'Impl' | 'TypeAlias' | 'Const' | 'Static' | 'Property' | 'Record' | 'Delegate' | 'Annotation' | 'Constructor' | 'Template';
|
|
2
|
+
import { SupportedLanguages } from '../../config/supported-languages.js';
|
|
2
3
|
export type NodeProperties = {
|
|
3
4
|
name: string;
|
|
4
5
|
filePath: string;
|
|
5
6
|
startLine?: number;
|
|
6
7
|
endLine?: number;
|
|
7
|
-
language?:
|
|
8
|
+
language?: SupportedLanguages;
|
|
8
9
|
isExported?: boolean;
|
|
9
10
|
astFrameworkMultiplier?: number;
|
|
10
11
|
astFrameworkReason?: string;
|
|
@@ -21,8 +22,10 @@ export type NodeProperties = {
|
|
|
21
22
|
terminalId?: string;
|
|
22
23
|
entryPointScore?: number;
|
|
23
24
|
entryPointReason?: string;
|
|
25
|
+
parameterCount?: number;
|
|
26
|
+
returnType?: string;
|
|
24
27
|
};
|
|
25
|
-
export type RelationshipType = 'CONTAINS' | 'CALLS' | 'INHERITS' | 'OVERRIDES' | 'IMPORTS' | 'USES' | 'DEFINES' | 'DECORATES' | 'IMPLEMENTS' | 'EXTENDS' | 'MEMBER_OF' | 'STEP_IN_PROCESS';
|
|
28
|
+
export type RelationshipType = 'CONTAINS' | 'CALLS' | 'INHERITS' | 'OVERRIDES' | 'IMPORTS' | 'USES' | 'DEFINES' | 'DECORATES' | 'IMPLEMENTS' | 'EXTENDS' | 'HAS_METHOD' | 'MEMBER_OF' | 'STEP_IN_PROCESS';
|
|
26
29
|
export interface GraphNode {
|
|
27
30
|
id: string;
|
|
28
31
|
label: NodeLabel;
|