npm - gitnexus - Versions diffs - 1.1.9 → 1.2.0 - Mend

gitnexus 1.1.9 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/README.md +50 -59
package/dist/cli/analyze.js +114 -32
package/dist/cli/eval-server.d.ts +30 -0
package/dist/cli/eval-server.js +372 -0
package/dist/cli/index.js +51 -1
package/dist/cli/mcp.js +9 -0
package/dist/cli/setup.js +44 -7
package/dist/cli/tool.d.ts +37 -0
package/dist/cli/tool.js +91 -0
package/dist/cli/wiki.d.ts +13 -0
package/dist/cli/wiki.js +199 -0
package/dist/core/embeddings/embedder.d.ts +2 -2
package/dist/core/embeddings/embedder.js +10 -10
package/dist/core/embeddings/embedding-pipeline.d.ts +2 -1
package/dist/core/embeddings/embedding-pipeline.js +12 -4
package/dist/core/embeddings/types.d.ts +2 -2
package/dist/core/ingestion/call-processor.d.ts +7 -0
package/dist/core/ingestion/call-processor.js +61 -23
package/dist/core/ingestion/community-processor.js +34 -26
package/dist/core/ingestion/filesystem-walker.js +15 -10
package/dist/core/ingestion/heritage-processor.d.ts +6 -0
package/dist/core/ingestion/heritage-processor.js +68 -5
package/dist/core/ingestion/import-processor.d.ts +22 -0
package/dist/core/ingestion/import-processor.js +214 -19
package/dist/core/ingestion/parsing-processor.d.ts +8 -1
package/dist/core/ingestion/parsing-processor.js +66 -25
package/dist/core/ingestion/pipeline.js +103 -39
package/dist/core/ingestion/workers/parse-worker.d.ts +58 -0
package/dist/core/ingestion/workers/parse-worker.js +451 -0
package/dist/core/ingestion/workers/worker-pool.d.ts +22 -0
package/dist/core/ingestion/workers/worker-pool.js +65 -0
package/dist/core/kuzu/kuzu-adapter.d.ts +15 -1
package/dist/core/kuzu/kuzu-adapter.js +177 -67
package/dist/core/kuzu/schema.d.ts +1 -1
package/dist/core/kuzu/schema.js +3 -0
package/dist/core/wiki/generator.d.ts +96 -0
package/dist/core/wiki/generator.js +674 -0
package/dist/core/wiki/graph-queries.d.ts +80 -0
package/dist/core/wiki/graph-queries.js +238 -0
package/dist/core/wiki/html-viewer.d.ts +10 -0
package/dist/core/wiki/html-viewer.js +297 -0
package/dist/core/wiki/llm-client.d.ts +36 -0
package/dist/core/wiki/llm-client.js +111 -0
package/dist/core/wiki/prompts.d.ts +53 -0
package/dist/core/wiki/prompts.js +174 -0
package/dist/mcp/core/embedder.js +4 -2
package/dist/mcp/core/kuzu-adapter.d.ts +2 -1
package/dist/mcp/core/kuzu-adapter.js +35 -15
package/dist/mcp/local/local-backend.js +9 -2
package/dist/mcp/server.js +1 -1
package/dist/storage/git.d.ts +0 -1
package/dist/storage/git.js +1 -8
package/dist/storage/repo-manager.d.ts +17 -0
package/dist/storage/repo-manager.js +26 -0
package/package.json +1 -1

package/dist/cli/wiki.js ADDED Viewed

@@ -0,0 +1,199 @@
+/**
+ * Wiki Command
+ *
+ * Generates repository documentation from the knowledge graph.
+ * Usage: gitnexus wiki [path] [options]
+ */
+import path from 'path';
+import readline from 'readline';
+import cliProgress from 'cli-progress';
+import { getGitRoot, isGitRepo } from '../storage/git.js';
+import { getStoragePaths, loadMeta, loadCLIConfig, saveCLIConfig } from '../storage/repo-manager.js';
+import { WikiGenerator } from '../core/wiki/generator.js';
+import { resolveLLMConfig } from '../core/wiki/llm-client.js';
+/**
+ * Prompt the user for input via stdin.
+ */
+function prompt(question, hide = false) {
+    return new Promise((resolve) => {
+        const rl = readline.createInterface({
+            input: process.stdin,
+            output: process.stdout,
+        });
+        if (hide && process.stdin.isTTY) {
+            // Mask input for API keys
+            process.stdout.write(question);
+            let input = '';
+            process.stdin.setRawMode(true);
+            process.stdin.resume();
+            process.stdin.setEncoding('utf-8');
+            const onData = (char) => {
+                if (char === '\n' || char === '\r' || char === '\u0004') {
+                    process.stdin.setRawMode(false);
+                    process.stdin.removeListener('data', onData);
+                    process.stdout.write('\n');
+                    rl.close();
+                    resolve(input);
+                }
+                else if (char === '\u0003') {
+                    // Ctrl+C
+                    process.stdin.setRawMode(false);
+                    rl.close();
+                    process.exit(1);
+                }
+                else if (char === '\u007F' || char === '\b') {
+                    // Backspace
+                    if (input.length > 0) {
+                        input = input.slice(0, -1);
+                        process.stdout.write('\b \b');
+                    }
+                }
+                else {
+                    input += char;
+                    process.stdout.write('*');
+                }
+            };
+            process.stdin.on('data', onData);
+        }
+        else {
+            rl.question(question, (answer) => {
+                rl.close();
+                resolve(answer.trim());
+            });
+        }
+    });
+}
+export const wikiCommand = async (inputPath, options) => {
+    console.log('\n  GitNexus Wiki Generator\n');
+    // ── Resolve repo path ───────────────────────────────────────────────
+    let repoPath;
+    if (inputPath) {
+        repoPath = path.resolve(inputPath);
+    }
+    else {
+        const gitRoot = getGitRoot(process.cwd());
+        if (!gitRoot) {
+            console.log('  Error: Not inside a git repository\n');
+            process.exitCode = 1;
+            return;
+        }
+        repoPath = gitRoot;
+    }
+    if (!isGitRepo(repoPath)) {
+        console.log('  Error: Not a git repository\n');
+        process.exitCode = 1;
+        return;
+    }
+    // ── Check for existing index ────────────────────────────────────────
+    const { storagePath, kuzuPath } = getStoragePaths(repoPath);
+    const meta = await loadMeta(storagePath);
+    if (!meta) {
+        console.log('  Error: No GitNexus index found.');
+        console.log('  Run `gitnexus analyze` first to index this repository.\n');
+        process.exitCode = 1;
+        return;
+    }
+    // ── Resolve LLM config (with interactive fallback) ─────────────────
+    // If --api-key was passed via CLI, save it immediately
+    if (options?.apiKey) {
+        const existing = await loadCLIConfig();
+        await saveCLIConfig({ ...existing, apiKey: options.apiKey });
+        console.log('  API key saved to ~/.gitnexus/config.json\n');
+    }
+    let llmConfig = await resolveLLMConfig({
+        model: options?.model,
+        baseUrl: options?.baseUrl,
+        apiKey: options?.apiKey,
+    });
+    if (!llmConfig.apiKey) {
+        if (!process.stdin.isTTY) {
+            console.log('  Error: No LLM API key found.');
+            console.log('  Set OPENAI_API_KEY or GITNEXUS_API_KEY environment variable,');
+            console.log('  or pass --api-key <key>.\n');
+            process.exitCode = 1;
+            return;
+        }
+        console.log('  No API key configured.\n');
+        console.log('  The wiki command requires an LLM API key (OpenAI-compatible).');
+        console.log('  You can also set OPENAI_API_KEY or GITNEXUS_API_KEY env var.\n');
+        const key = await prompt('  Enter your API key: ', true);
+        if (!key) {
+            console.log('\n  No key provided. Aborting.\n');
+            process.exitCode = 1;
+            return;
+        }
+        const save = await prompt('  Save key to ~/.gitnexus/config.json for future use? (Y/n): ');
+        if (!save || save.toLowerCase() === 'y' || save.toLowerCase() === 'yes') {
+            const existing = await loadCLIConfig();
+            await saveCLIConfig({ ...existing, apiKey: key });
+            console.log('  Key saved.\n');
+        }
+        else {
+            console.log('  Key will be used for this session only.\n');
+        }
+        llmConfig = { ...llmConfig, apiKey: key };
+    }
+    // ── Setup progress bar ──────────────────────────────────────────────
+    const bar = new cliProgress.SingleBar({
+        format: '  {bar} {percentage}% | {phase}',
+        barCompleteChar: '\u2588',
+        barIncompleteChar: '\u2591',
+        hideCursor: true,
+        barGlue: '',
+        autopadding: true,
+        clearOnComplete: false,
+        stopOnComplete: false,
+    }, cliProgress.Presets.shades_grey);
+    bar.start(100, 0, { phase: 'Initializing...' });
+    const t0 = Date.now();
+    // ── Run generator ───────────────────────────────────────────────────
+    const wikiOptions = {
+        force: options?.force,
+        model: options?.model,
+        baseUrl: options?.baseUrl,
+    };
+    const generator = new WikiGenerator(repoPath, storagePath, kuzuPath, llmConfig, wikiOptions, (phase, percent, detail) => {
+        bar.update(percent, { phase: detail || phase });
+    });
+    try {
+        const result = await generator.run();
+        bar.update(100, { phase: 'Done' });
+        bar.stop();
+        const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
+        if (result.mode === 'up-to-date' && !options?.force) {
+            console.log('\n  Wiki is already up to date.');
+            console.log(`  ${path.join(storagePath, 'wiki')}\n`);
+            return;
+        }
+        const wikiDir = path.join(storagePath, 'wiki');
+        console.log(`\n  Wiki generated successfully (${elapsed}s)\n`);
+        console.log(`  Mode: ${result.mode}`);
+        console.log(`  Pages: ${result.pagesGenerated}`);
+        console.log(`  Output: ${wikiDir}`);
+        console.log(`  Viewer: ${path.join(wikiDir, 'index.html')}`);
+        if (result.failedModules && result.failedModules.length > 0) {
+            console.log(`\n  Failed modules (${result.failedModules.length}):`);
+            for (const mod of result.failedModules) {
+                console.log(`    - ${mod}`);
+            }
+            console.log('  Re-run to retry failed modules (pages will be regenerated).');
+        }
+        console.log('');
+    }
+    catch (err) {
+        bar.stop();
+        if (err.message?.includes('No source files')) {
+            console.log(`\n  ${err.message}\n`);
+        }
+        else if (err.message?.includes('API key') || err.message?.includes('API error')) {
+            console.log(`\n  LLM Error: ${err.message}\n`);
+        }
+        else {
+            console.log(`\n  Error: ${err.message}\n`);
+            if (process.env.DEBUG) {
+                console.error(err);
+            }
+        }
+        process.exitCode = 1;
+    }
+};

package/dist/core/embeddings/embedder.d.ts CHANGED Viewed

@@ -15,7 +15,7 @@ export type ModelProgressCallback = (progress: ModelProgress) => void;
 /**
  * Get the current device being used for inference
  */
-export declare const getCurrentDevice: () => "webgpu" | "cuda" | "cpu" | "wasm" | null;
+export declare const getCurrentDevice: () => "dml" | "cuda" | "cpu" | "wasm" | null;
 /**
  * Initialize the embedding model
  * Uses singleton pattern - only loads once, subsequent calls return cached instance
@@ -25,7 +25,7 @@ export declare const getCurrentDevice: () => "webgpu" | "cuda" | "cpu" | "wasm"
  * @param forceDevice - Force a specific device
  * @returns Promise resolving to the embedder pipeline
  */
-export declare const initEmbedder: (onProgress?: ModelProgressCallback, config?: Partial<EmbeddingConfig>, forceDevice?: "webgpu" | "cuda" | "cpu" | "wasm") => Promise<FeatureExtractionPipeline>;
+export declare const initEmbedder: (onProgress?: ModelProgressCallback, config?: Partial<EmbeddingConfig>, forceDevice?: "dml" | "cuda" | "cpu" | "wasm") => Promise<FeatureExtractionPipeline>;
 /**
  * Check if the embedder is initialized and ready
  */

package/dist/core/embeddings/embedder.js CHANGED Viewed

@@ -37,10 +37,10 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
     }
     isInitializing = true;
     const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
-    // On Windows, use webgpu for GPU acceleration (via DirectX12/DirectML)
-    // CUDA is only available on Linux with onnxruntime-node
+    // On Windows, use DirectML for GPU acceleration (via DirectX12)
+    // CUDA is only available on Linux x64 with onnxruntime-node
     const isWindows = process.platform === 'win32';
-    const gpuDevice = isWindows ? 'webgpu' : 'cuda';
+    const gpuDevice = isWindows ? 'dml' : 'cuda';
     let requestedDevice = forceDevice || (finalConfig.device === 'auto' ? gpuDevice : finalConfig.device);
     initPromise = (async () => {
         try {
@@ -61,14 +61,14 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
                 onProgress(progress);
             } : undefined;
             // Try GPU first if auto, fall back to CPU
-            // Windows: webgpu (DirectX12/DirectML), Linux: cuda
-            const devicesToTry = (requestedDevice === 'webgpu' || requestedDevice === 'cuda')
+            // Windows: dml (DirectML/DirectX12), Linux: cuda
+            const devicesToTry = (requestedDevice === 'dml' || requestedDevice === 'cuda')
                 ? [requestedDevice, 'cpu']
                 : [requestedDevice];
             for (const device of devicesToTry) {
                 try {
-                    if (isDev && device === 'webgpu') {
-                        console.log('🔧 Trying WebGPU (DirectX12) backend...');
+                    if (isDev && device === 'dml') {
+                        console.log('🔧 Trying DirectML (DirectX12) GPU backend...');
                     }
                     else if (isDev && device === 'cuda') {
                         console.log('🔧 Trying CUDA GPU backend...');
@@ -86,7 +86,7 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
                     });
                     currentDevice = device;
                     if (isDev) {
-                        const label = device === 'webgpu' ? 'GPU (WebGPU/DirectX12)'
+                        const label = device === 'dml' ? 'GPU (DirectML/DirectX12)'
                             : device === 'cuda' ? 'GPU (CUDA)'
                                 : device.toUpperCase();
                         console.log(`✅ Using ${label} backend`);
@@ -95,8 +95,8 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
                     return embedderInstance;
                 }
                 catch (deviceError) {
-                    if (isDev && (device === 'cuda' || device === 'webgpu')) {
-                        const gpuType = device === 'webgpu' ? 'WebGPU' : 'CUDA';
+                    if (isDev && (device === 'cuda' || device === 'dml')) {
+                        const gpuType = device === 'dml' ? 'DirectML' : 'CUDA';
                         console.log(`⚠️  ${gpuType} not available, falling back to CPU...`);
                     }
                     // Continue to next device in list

package/dist/core/embeddings/embedding-pipeline.d.ts CHANGED Viewed

@@ -20,8 +20,9 @@ export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
  * @param executeWithReusedStatement - Function to execute with reused prepared statement
  * @param onProgress - Callback for progress updates
  * @param config - Optional configuration override
+ * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
  */
-export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>) => Promise<void>;
+export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, skipNodeIds?: Set<string>) => Promise<void>;
 /**
  * Perform semantic search using the vector index
  *

package/dist/core/embeddings/embedding-pipeline.js CHANGED Viewed

@@ -97,8 +97,9 @@ const createVectorIndex = async (executeQuery) => {
  * @param executeWithReusedStatement - Function to execute with reused prepared statement
  * @param onProgress - Callback for progress updates
  * @param config - Optional configuration override
+ * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
  */
-export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}) => {
+export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}, skipNodeIds) => {
     const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
     try {
         // Phase 1: Load embedding model
@@ -108,11 +109,10 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
             modelDownloadPercent: 0,
         });
         await initEmbedder((modelProgress) => {
-            // Report model download progress
             const downloadPercent = modelProgress.progress ?? 0;
             onProgress({
                 phase: 'loading-model',
-                percent: Math.round(downloadPercent * 0.2), // 0-20% for model loading
+                percent: Math.round(downloadPercent * 0.2),
                 modelDownloadPercent: downloadPercent,
             });
         }, finalConfig);
@@ -125,7 +125,15 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
             console.log('🔍 Querying embeddable nodes...');
         }
         // Phase 2: Query embeddable nodes
-        const nodes = await queryEmbeddableNodes(executeQuery);
+        let nodes = await queryEmbeddableNodes(executeQuery);
+        // Incremental mode: filter out nodes that already have embeddings
+        if (skipNodeIds && skipNodeIds.size > 0) {
+            const beforeCount = nodes.length;
+            nodes = nodes.filter(n => !skipNodeIds.has(n.id));
+            if (isDev) {
+                console.log(`📦 Incremental embeddings: ${beforeCount} total, ${skipNodeIds.size} cached, ${nodes.length} to embed`);
+            }
+        }
         const totalNodes = nodes.length;
         if (isDev) {
             console.log(`📊 Found ${totalNodes} embeddable nodes`);

package/dist/core/embeddings/types.d.ts CHANGED Viewed

@@ -40,8 +40,8 @@ export interface EmbeddingConfig {
     batchSize: number;
     /** Embedding vector dimensions */
     dimensions: number;
-    /** Device to use for inference: 'auto' tries GPU first, falls back to CPU */
-    device: 'auto' | 'webgpu' | 'cuda' | 'cpu' | 'wasm';
+    /** Device to use for inference: 'auto' tries GPU first (DirectML on Windows, CUDA on Linux), falls back to CPU */
+    device: 'auto' | 'dml' | 'cuda' | 'cpu' | 'wasm';
     /** Maximum characters of code snippet to include */
     maxSnippetLength: number;
 }

package/dist/core/ingestion/call-processor.d.ts CHANGED Viewed

@@ -2,7 +2,14 @@ import { KnowledgeGraph } from '../graph/types.js';
 import { ASTCache } from './ast-cache.js';
 import { SymbolTable } from './symbol-table.js';
 import { ImportMap } from './import-processor.js';
+import type { ExtractedCall } from './workers/parse-worker.js';
 export declare const processCalls: (graph: KnowledgeGraph, files: {
     path: string;
     content: string;
 }[], astCache: ASTCache, symbolTable: SymbolTable, importMap: ImportMap, onProgress?: (current: number, total: number) => void) => Promise<void>;
+/**
+ * Fast path: resolve pre-extracted call sites from workers.
+ * No AST parsing — workers already extracted calledName + sourceId.
+ * This function only does symbol table lookups + graph mutations.
+ */
+export declare const processCallsFromExtracted: (graph: KnowledgeGraph, extractedCalls: ExtractedCall[], symbolTable: SymbolTable, importMap: ImportMap, onProgress?: (current: number, total: number) => void) => Promise<void>;

package/dist/core/ingestion/call-processor.js CHANGED Viewed

@@ -145,6 +145,8 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
                 continue;
             }
             wasReparsed = true;
+            // Cache re-parsed tree so heritage phase gets hits
+            astCache.set(file.path, tree);
         }
         let query;
         let matches;
@@ -155,8 +157,6 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
         }
         catch (queryError) {
             console.warn(`Query error for ${file.path}:`, queryError);
-            if (wasReparsed)
-                tree.delete?.();
             continue;
         }
         // 3. Process each call match
@@ -192,10 +192,7 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
                 reason: resolved.reason,
             });
         });
-        // Cleanup if re-parsed
-        if (wasReparsed) {
-            tree.delete?.();
-        }
+        // Tree is now owned by the LRU cache — no manual delete needed
     }
 };
 /**
@@ -207,27 +204,27 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
  * Returns confidence score so agents know what to trust.
  */
 const resolveCallTarget = (calledName, currentFile, symbolTable, importMap) => {
-    // Strategy A: Check imported files (HIGH confidence - we know the import chain)
-    const importedFiles = importMap.get(currentFile);
-    if (importedFiles) {
-        for (const importedFile of importedFiles) {
-            const nodeId = symbolTable.lookupExact(importedFile, calledName);
-            if (nodeId) {
-                return { nodeId, confidence: 0.9, reason: 'import-resolved' };
-            }
-        }
-    }
-    // Strategy B: Check local file (HIGH confidence - same file definition)
+    // Strategy B first (cheapest — single map lookup): Check local file
     const localNodeId = symbolTable.lookupExact(currentFile, calledName);
     if (localNodeId) {
         return { nodeId: localNodeId, confidence: 0.85, reason: 'same-file' };
     }
-    // Strategy C: Fuzzy global search (LOW confidence - just matching by name)
-    const fuzzyMatches = symbolTable.lookupFuzzy(calledName);
-    if (fuzzyMatches.length > 0) {
-        // Lower confidence if multiple matches exist (more ambiguous)
-        const confidence = fuzzyMatches.length === 1 ? 0.5 : 0.3;
-        return { nodeId: fuzzyMatches[0].nodeId, confidence, reason: 'fuzzy-global' };
+    // Strategy A: Check if any definition of calledName is in an imported file
+    // Reversed: instead of iterating all imports and checking each, get all definitions
+    // and check if any is imported. O(definitions) instead of O(imports).
+    const allDefs = symbolTable.lookupFuzzy(calledName);
+    if (allDefs.length > 0) {
+        const importedFiles = importMap.get(currentFile);
+        if (importedFiles) {
+            for (const def of allDefs) {
+                if (importedFiles.has(def.filePath)) {
+                    return { nodeId: def.nodeId, confidence: 0.9, reason: 'import-resolved' };
+                }
+            }
+        }
+        // Strategy C: Fuzzy global (no import match found)
+        const confidence = allDefs.length === 1 ? 0.5 : 0.3;
+        return { nodeId: allDefs[0].nodeId, confidence, reason: 'fuzzy-global' };
     }
     return null;
 };
@@ -267,3 +264,44 @@ const isBuiltInOrNoise = (name) => {
     ]);
     return builtIns.has(name);
 };
+/**
+ * Fast path: resolve pre-extracted call sites from workers.
+ * No AST parsing — workers already extracted calledName + sourceId.
+ * This function only does symbol table lookups + graph mutations.
+ */
+export const processCallsFromExtracted = async (graph, extractedCalls, symbolTable, importMap, onProgress) => {
+    // Group by file for progress reporting
+    const byFile = new Map();
+    for (const call of extractedCalls) {
+        let list = byFile.get(call.filePath);
+        if (!list) {
+            list = [];
+            byFile.set(call.filePath, list);
+        }
+        list.push(call);
+    }
+    const totalFiles = byFile.size;
+    let filesProcessed = 0;
+    for (const [_filePath, calls] of byFile) {
+        filesProcessed++;
+        if (filesProcessed % 100 === 0) {
+            onProgress?.(filesProcessed, totalFiles);
+            await yieldToEventLoop();
+        }
+        for (const call of calls) {
+            const resolved = resolveCallTarget(call.calledName, call.filePath, symbolTable, importMap);
+            if (!resolved)
+                continue;
+            const relId = generateId('CALLS', `${call.sourceId}:${call.calledName}->${resolved.nodeId}`);
+            graph.addRelationship({
+                id: relId,
+                sourceId: call.sourceId,
+                targetId: resolved.nodeId,
+                type: 'CALLS',
+                confidence: resolved.confidence,
+                reason: resolved.reason,
+            });
+        }
+    }
+    onProgress?.(totalFiles, totalFiles);
+};

package/dist/core/ingestion/community-processor.js CHANGED Viewed

@@ -103,9 +103,19 @@ const buildGraphologyGraph = (knowledgeGraph) => {
     const graph = new Graph({ type: 'undirected', allowSelfLoops: false });
     // Symbol types that should be clustered
     const symbolTypes = new Set(['Function', 'Class', 'Method', 'Interface']);
-    // Add symbol nodes
+    // First pass: collect which nodes participate in clustering edges
+    const clusteringRelTypes = new Set(['CALLS', 'EXTENDS', 'IMPLEMENTS']);
+    const connectedNodes = new Set();
+    knowledgeGraph.relationships.forEach(rel => {
+        if (clusteringRelTypes.has(rel.type) && rel.sourceId !== rel.targetId) {
+            connectedNodes.add(rel.sourceId);
+            connectedNodes.add(rel.targetId);
+        }
+    });
+    // Only add nodes that have at least one clustering edge
+    // Isolated nodes would just become singletons (skipped anyway)
     knowledgeGraph.nodes.forEach(node => {
-        if (symbolTypes.has(node.label)) {
+        if (symbolTypes.has(node.label) && connectedNodes.has(node.id)) {
             graph.addNode(node.id, {
                 name: node.properties.name,
                 filePath: node.properties.filePath,
@@ -113,15 +123,10 @@ const buildGraphologyGraph = (knowledgeGraph) => {
             });
         }
     });
-    // Add CALLS edges (primary clustering signal)
-    // We can also include EXTENDS/IMPLEMENTS for OOP clustering
-    const clusteringRelTypes = new Set(['CALLS', 'EXTENDS', 'IMPLEMENTS']);
+    // Add edges
     knowledgeGraph.relationships.forEach(rel => {
         if (clusteringRelTypes.has(rel.type)) {
-            // Only add edge if both nodes exist in our symbol graph
-            // Also skip self-loops (recursive calls) - not allowed in undirected graph
             if (graph.hasNode(rel.sourceId) && graph.hasNode(rel.targetId) && rel.sourceId !== rel.targetId) {
-                // Avoid duplicate edges
                 if (!graph.hasEdge(rel.sourceId, rel.targetId)) {
                     graph.addEdge(rel.sourceId, rel.targetId);
                 }
@@ -241,29 +246,32 @@ const findCommonPrefix = (strings) => {
 // HELPER: Calculate community cohesion
 // ============================================================================
 /**
- * Calculate cohesion score (0-1) based on internal edge density
- * Higher cohesion = more internal connections relative to size
+ * Estimate cohesion score (0-1) based on internal edge density.
+ * Uses sampling for large communities to avoid O(N^2) cost.
  */
 const calculateCohesion = (memberIds, graph) => {
     if (memberIds.length <= 1)
         return 1.0;
     const memberSet = new Set(memberIds);
+    // Sample up to 50 members for large communities
+    const SAMPLE_SIZE = 50;
+    const sample = memberIds.length <= SAMPLE_SIZE
+        ? memberIds
+        : memberIds.slice(0, SAMPLE_SIZE);
     let internalEdges = 0;
-    // Count edges within the community
-    memberIds.forEach(nodeId => {
-        if (graph.hasNode(nodeId)) {
-            graph.forEachNeighbor(nodeId, neighbor => {
-                if (memberSet.has(neighbor)) {
-                    internalEdges++;
-                }
-            });
-        }
-    });
-    // Each edge is counted twice (once from each end), so divide by 2
-    internalEdges = internalEdges / 2;
-    // Maximum possible internal edges for n nodes: n*(n-1)/2
-    const maxPossibleEdges = (memberIds.length * (memberIds.length - 1)) / 2;
-    if (maxPossibleEdges === 0)
+    let totalEdges = 0;
+    for (const nodeId of sample) {
+        if (!graph.hasNode(nodeId))
+            continue;
+        graph.forEachNeighbor(nodeId, (neighbor) => {
+            totalEdges++;
+            if (memberSet.has(neighbor)) {
+                internalEdges++;
+            }
+        });
+    }
+    // Cohesion = fraction of edges that stay internal
+    if (totalEdges === 0)
         return 1.0;
-    return Math.min(1.0, internalEdges / maxPossibleEdges);
+    return Math.min(1.0, internalEdges / totalEdges);
 };

package/dist/core/ingestion/filesystem-walker.js CHANGED Viewed

@@ -2,6 +2,7 @@ import fs from 'fs/promises';
 import path from 'path';
 import { glob } from 'glob';
 import { shouldIgnorePath } from '../../config/ignore-service.js';
+const READ_CONCURRENCY = 32;
 export const walkRepository = async (repoPath, onProgress) => {
     const files = await glob('**/*', {
         cwd: repoPath,
@@ -10,16 +11,20 @@ export const walkRepository = async (repoPath, onProgress) => {
     });
     const filtered = files.filter(file => !shouldIgnorePath(file));
     const entries = [];
-    for (let i = 0; i < filtered.length; i++) {
-        const relativePath = filtered[i];
-        const fullPath = path.join(repoPath, relativePath);
-        try {
-            const content = await fs.readFile(fullPath, 'utf-8');
-            entries.push({ path: relativePath.replace(/\\/g, '/'), content });
-            onProgress?.(i + 1, filtered.length, relativePath);
-        }
-        catch {
-            onProgress?.(i + 1, filtered.length, relativePath);
+    let processed = 0;
+    for (let start = 0; start < filtered.length; start += READ_CONCURRENCY) {
+        const batch = filtered.slice(start, start + READ_CONCURRENCY);
+        const results = await Promise.allSettled(batch.map(relativePath => fs.readFile(path.join(repoPath, relativePath), 'utf-8')
+            .then(content => ({ path: relativePath.replace(/\\/g, '/'), content }))));
+        for (const result of results) {
+            processed++;
+            if (result.status === 'fulfilled') {
+                entries.push(result.value);
+                onProgress?.(processed, filtered.length, result.value.path);
+            }
+            else {
+                onProgress?.(processed, filtered.length, batch[results.indexOf(result)]);
+            }
         }
     }
     return entries;

package/dist/core/ingestion/heritage-processor.d.ts CHANGED Viewed

@@ -8,7 +8,13 @@
 import { KnowledgeGraph } from '../graph/types.js';
 import { ASTCache } from './ast-cache.js';
 import { SymbolTable } from './symbol-table.js';
+import type { ExtractedHeritage } from './workers/parse-worker.js';
 export declare const processHeritage: (graph: KnowledgeGraph, files: {
     path: string;
     content: string;
 }[], astCache: ASTCache, symbolTable: SymbolTable, onProgress?: (current: number, total: number) => void) => Promise<void>;
+/**
+ * Fast path: resolve pre-extracted heritage from workers.
+ * No AST parsing — workers already extracted className + parentName + kind.
+ */
+export declare const processHeritageFromExtracted: (graph: KnowledgeGraph, extractedHeritage: ExtractedHeritage[], symbolTable: SymbolTable, onProgress?: (current: number, total: number) => void) => Promise<void>;