npm - gitnexus - Versions diffs - 1.1.8 → 1.2.0 - Mend

gitnexus 1.1.8 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

package/README.md +50 -59
package/dist/cli/ai-context.js +9 -9
package/dist/cli/analyze.js +139 -47
package/dist/cli/augment.d.ts +13 -0
package/dist/cli/augment.js +33 -0
package/dist/cli/claude-hooks.d.ts +22 -0
package/dist/cli/claude-hooks.js +97 -0
package/dist/cli/eval-server.d.ts +30 -0
package/dist/cli/eval-server.js +372 -0
package/dist/cli/index.js +56 -1
package/dist/cli/mcp.js +9 -0
package/dist/cli/setup.js +184 -5
package/dist/cli/tool.d.ts +37 -0
package/dist/cli/tool.js +91 -0
package/dist/cli/wiki.d.ts +13 -0
package/dist/cli/wiki.js +199 -0
package/dist/core/augmentation/engine.d.ts +26 -0
package/dist/core/augmentation/engine.js +213 -0
package/dist/core/embeddings/embedder.d.ts +2 -2
package/dist/core/embeddings/embedder.js +11 -11
package/dist/core/embeddings/embedding-pipeline.d.ts +2 -1
package/dist/core/embeddings/embedding-pipeline.js +13 -5
package/dist/core/embeddings/types.d.ts +2 -2
package/dist/core/ingestion/call-processor.d.ts +7 -0
package/dist/core/ingestion/call-processor.js +61 -23
package/dist/core/ingestion/community-processor.js +34 -26
package/dist/core/ingestion/filesystem-walker.js +15 -10
package/dist/core/ingestion/heritage-processor.d.ts +6 -0
package/dist/core/ingestion/heritage-processor.js +68 -5
package/dist/core/ingestion/import-processor.d.ts +22 -0
package/dist/core/ingestion/import-processor.js +215 -20
package/dist/core/ingestion/parsing-processor.d.ts +8 -1
package/dist/core/ingestion/parsing-processor.js +66 -25
package/dist/core/ingestion/pipeline.js +104 -40
package/dist/core/ingestion/process-processor.js +1 -1
package/dist/core/ingestion/workers/parse-worker.d.ts +58 -0
package/dist/core/ingestion/workers/parse-worker.js +451 -0
package/dist/core/ingestion/workers/worker-pool.d.ts +22 -0
package/dist/core/ingestion/workers/worker-pool.js +65 -0
package/dist/core/kuzu/kuzu-adapter.d.ts +15 -1
package/dist/core/kuzu/kuzu-adapter.js +177 -63
package/dist/core/kuzu/schema.d.ts +1 -1
package/dist/core/kuzu/schema.js +3 -0
package/dist/core/search/bm25-index.js +13 -15
package/dist/core/wiki/generator.d.ts +96 -0
package/dist/core/wiki/generator.js +674 -0
package/dist/core/wiki/graph-queries.d.ts +80 -0
package/dist/core/wiki/graph-queries.js +238 -0
package/dist/core/wiki/html-viewer.d.ts +10 -0
package/dist/core/wiki/html-viewer.js +297 -0
package/dist/core/wiki/llm-client.d.ts +36 -0
package/dist/core/wiki/llm-client.js +111 -0
package/dist/core/wiki/prompts.d.ts +53 -0
package/dist/core/wiki/prompts.js +174 -0
package/dist/mcp/core/embedder.js +4 -2
package/dist/mcp/core/kuzu-adapter.d.ts +2 -1
package/dist/mcp/core/kuzu-adapter.js +35 -15
package/dist/mcp/local/local-backend.d.ts +54 -1
package/dist/mcp/local/local-backend.js +716 -171
package/dist/mcp/resources.d.ts +1 -1
package/dist/mcp/resources.js +111 -73
package/dist/mcp/server.d.ts +1 -1
package/dist/mcp/server.js +91 -22
package/dist/mcp/tools.js +80 -61
package/dist/storage/git.d.ts +0 -1
package/dist/storage/git.js +1 -8
package/dist/storage/repo-manager.d.ts +17 -0
package/dist/storage/repo-manager.js +26 -0
package/hooks/claude/gitnexus-hook.cjs +135 -0
package/hooks/claude/pre-tool-use.sh +78 -0
package/hooks/claude/session-start.sh +42 -0
package/package.json +4 -2
package/skills/debugging.md +24 -22
package/skills/exploring.md +26 -24
package/skills/impact-analysis.md +19 -13
package/skills/refactoring.md +37 -26

package/dist/core/augmentation/engine.js ADDED Viewed

@@ -0,0 +1,213 @@
+/**
+ * Augmentation Engine
+ *
+ * Lightweight, fast-path enrichment of search patterns with knowledge graph context.
+ * Designed to be called from platform hooks (Claude Code PreToolUse, Cursor beforeShellExecution)
+ * when an agent runs grep/glob/search.
+ *
+ * Performance target: <500ms cold start, <200ms warm.
+ *
+ * Design decisions:
+ * - Uses only BM25 search (no semantic/embedding) for speed
+ * - Clusters used internally for ranking, NEVER in output
+ * - Output is pure relationships: callers, callees, process participation
+ * - Graceful failure: any error → return empty string
+ */
+import path from 'path';
+import { listRegisteredRepos } from '../../storage/repo-manager.js';
+/**
+ * Find the best matching repo for a given working directory.
+ * Matches by checking if cwd is within the repo's path.
+ */
+async function findRepoForCwd(cwd) {
+    try {
+        const entries = await listRegisteredRepos({ validate: true });
+        const resolved = path.resolve(cwd);
+        // Normalize to lowercase on Windows (drive letters can differ: D: vs d:)
+        const isWindows = process.platform === 'win32';
+        const normalizedCwd = isWindows ? resolved.toLowerCase() : resolved;
+        const sep = path.sep;
+        // Find the LONGEST matching repo path (most specific match wins)
+        let bestMatch = null;
+        let bestLen = 0;
+        for (const entry of entries) {
+            const repoResolved = path.resolve(entry.path);
+            const normalizedRepo = isWindows ? repoResolved.toLowerCase() : repoResolved;
+            // Check if cwd is inside repo OR repo is inside cwd
+            // Must match at a path separator boundary to avoid false positives
+            // (e.g. /projects/gitnexusv2 should NOT match /projects/gitnexus)
+            let matched = false;
+            if (normalizedCwd === normalizedRepo) {
+                matched = true;
+            }
+            else if (normalizedCwd.startsWith(normalizedRepo + sep)) {
+                matched = true;
+            }
+            else if (normalizedRepo.startsWith(normalizedCwd + sep)) {
+                matched = true;
+            }
+            if (matched && normalizedRepo.length > bestLen) {
+                bestMatch = entry;
+                bestLen = normalizedRepo.length;
+            }
+        }
+        if (!bestMatch)
+            return null;
+        return {
+            name: bestMatch.name,
+            storagePath: bestMatch.storagePath,
+            kuzuPath: path.join(bestMatch.storagePath, 'kuzu'),
+        };
+    }
+    catch {
+        return null;
+    }
+}
+/**
+ * Augment a search pattern with knowledge graph context.
+ *
+ * 1. BM25 search for the pattern
+ * 2. For top matches, fetch callers/callees/processes
+ * 3. Rank by internal cluster cohesion (not exposed)
+ * 4. Format as structured text block
+ *
+ * Returns empty string on any error (graceful failure).
+ */
+export async function augment(pattern, cwd) {
+    if (!pattern || pattern.length < 3)
+        return '';
+    const workDir = cwd || process.cwd();
+    try {
+        const repo = await findRepoForCwd(workDir);
+        if (!repo)
+            return '';
+        // Lazy-load kuzu adapter (skip unnecessary init)
+        const { initKuzu, executeQuery, isKuzuReady } = await import('../../mcp/core/kuzu-adapter.js');
+        const { searchFTSFromKuzu } = await import('../search/bm25-index.js');
+        const repoId = repo.name.toLowerCase();
+        // Init KuzuDB if not already
+        if (!isKuzuReady(repoId)) {
+            await initKuzu(repoId, repo.kuzuPath);
+        }
+        // Step 1: BM25 search (fast, no embeddings)
+        const bm25Results = await searchFTSFromKuzu(pattern, 10, repoId);
+        if (bm25Results.length === 0)
+            return '';
+        // Step 2: Map BM25 file results to symbols
+        const symbolMatches = [];
+        for (const result of bm25Results.slice(0, 5)) {
+            const escaped = result.filePath.replace(/'/g, "''");
+            try {
+                const symbols = await executeQuery(repoId, `
+          MATCH (n) WHERE n.filePath = '${escaped}'
+          AND n.name CONTAINS '${pattern.replace(/'/g, "''").split(/\s+/)[0]}'
+          RETURN n.id AS id, n.name AS name, labels(n)[0] AS type, n.filePath AS filePath
+          LIMIT 3
+        `);
+                for (const sym of symbols) {
+                    symbolMatches.push({
+                        nodeId: sym.id || sym[0],
+                        name: sym.name || sym[1],
+                        type: sym.type || sym[2],
+                        filePath: sym.filePath || sym[3],
+                        score: result.score,
+                    });
+                }
+            }
+            catch { /* skip */ }
+        }
+        if (symbolMatches.length === 0)
+            return '';
+        // Step 3: For top matches, fetch callers/callees/processes
+        // Also get cluster cohesion internally for ranking
+        const enriched = [];
+        const seen = new Set();
+        for (const sym of symbolMatches.slice(0, 5)) {
+            if (seen.has(sym.nodeId))
+                continue;
+            seen.add(sym.nodeId);
+            const escaped = sym.nodeId.replace(/'/g, "''");
+            // Callers
+            let callers = [];
+            try {
+                const rows = await executeQuery(repoId, `
+          MATCH (caller)-[:CodeRelation {type: 'CALLS'}]->(n {id: '${escaped}'})
+          RETURN caller.name AS name
+          LIMIT 3
+        `);
+                callers = rows.map((r) => r.name || r[0]).filter(Boolean);
+            }
+            catch { /* skip */ }
+            // Callees
+            let callees = [];
+            try {
+                const rows = await executeQuery(repoId, `
+          MATCH (n {id: '${escaped}'})-[:CodeRelation {type: 'CALLS'}]->(callee)
+          RETURN callee.name AS name
+          LIMIT 3
+        `);
+                callees = rows.map((r) => r.name || r[0]).filter(Boolean);
+            }
+            catch { /* skip */ }
+            // Processes
+            let processes = [];
+            try {
+                const rows = await executeQuery(repoId, `
+          MATCH (n {id: '${escaped}'})-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p:Process)
+          RETURN p.heuristicLabel AS label, r.step AS step, p.stepCount AS stepCount
+        `);
+                processes = rows.map((r) => {
+                    const label = r.label || r[0];
+                    const step = r.step || r[1];
+                    const stepCount = r.stepCount || r[2];
+                    return `${label} (step ${step}/${stepCount})`;
+                }).filter(Boolean);
+            }
+            catch { /* skip */ }
+            // Cluster cohesion (internal ranking signal)
+            let cohesion = 0;
+            try {
+                const rows = await executeQuery(repoId, `
+          MATCH (n {id: '${escaped}'})-[:CodeRelation {type: 'MEMBER_OF'}]->(c:Community)
+          RETURN c.cohesion AS cohesion
+          LIMIT 1
+        `);
+                if (rows.length > 0) {
+                    cohesion = (rows[0].cohesion ?? rows[0][0]) || 0;
+                }
+            }
+            catch { /* skip */ }
+            enriched.push({
+                name: sym.name,
+                filePath: sym.filePath,
+                callers,
+                callees,
+                processes,
+                cohesion,
+            });
+        }
+        if (enriched.length === 0)
+            return '';
+        // Step 4: Rank by cohesion (internal signal) and format
+        enriched.sort((a, b) => b.cohesion - a.cohesion);
+        const lines = [`[GitNexus] ${enriched.length} related symbols found:`, ''];
+        for (const item of enriched) {
+            lines.push(`${item.name} (${item.filePath})`);
+            if (item.callers.length > 0) {
+                lines.push(`  Called by: ${item.callers.join(', ')}`);
+            }
+            if (item.callees.length > 0) {
+                lines.push(`  Calls: ${item.callees.join(', ')}`);
+            }
+            if (item.processes.length > 0) {
+                lines.push(`  Flows: ${item.processes.join(', ')}`);
+            }
+            lines.push('');
+        }
+        return lines.join('\n').trim();
+    }
+    catch {
+        // Graceful failure — never break the original tool
+        return '';
+    }
+}

package/dist/core/embeddings/embedder.d.ts CHANGED Viewed

@@ -15,7 +15,7 @@ export type ModelProgressCallback = (progress: ModelProgress) => void;
 /**
  * Get the current device being used for inference
  */
-export declare const getCurrentDevice: () => "webgpu" | "cuda" | "cpu" | "wasm" | null;
+export declare const getCurrentDevice: () => "dml" | "cuda" | "cpu" | "wasm" | null;
 /**
  * Initialize the embedding model
  * Uses singleton pattern - only loads once, subsequent calls return cached instance
@@ -25,7 +25,7 @@ export declare const getCurrentDevice: () => "webgpu" | "cuda" | "cpu" | "wasm"
  * @param forceDevice - Force a specific device
  * @returns Promise resolving to the embedder pipeline
  */
-export declare const initEmbedder: (onProgress?: ModelProgressCallback, config?: Partial<EmbeddingConfig>, forceDevice?: "webgpu" | "cuda" | "cpu" | "wasm") => Promise<FeatureExtractionPipeline>;
+export declare const initEmbedder: (onProgress?: ModelProgressCallback, config?: Partial<EmbeddingConfig>, forceDevice?: "dml" | "cuda" | "cpu" | "wasm") => Promise<FeatureExtractionPipeline>;
 /**
  * Check if the embedder is initialized and ready
  */

package/dist/core/embeddings/embedder.js CHANGED Viewed

@@ -37,16 +37,16 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
     }
     isInitializing = true;
     const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
-    // On Windows, use webgpu for GPU acceleration (via DirectX12/DirectML)
-    // CUDA is only available on Linux with onnxruntime-node
+    // On Windows, use DirectML for GPU acceleration (via DirectX12)
+    // CUDA is only available on Linux x64 with onnxruntime-node
     const isWindows = process.platform === 'win32';
-    const gpuDevice = isWindows ? 'webgpu' : 'cuda';
+    const gpuDevice = isWindows ? 'dml' : 'cuda';
     let requestedDevice = forceDevice || (finalConfig.device === 'auto' ? gpuDevice : finalConfig.device);
     initPromise = (async () => {
         try {
             // Configure transformers.js environment
             env.allowLocalModels = false;
-            const isDev = process.env.NODE_ENV !== 'production';
+            const isDev = process.env.NODE_ENV === 'development';
             if (isDev) {
                 console.log(`🧠 Loading embedding model: ${finalConfig.modelId}`);
             }
@@ -61,14 +61,14 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
                 onProgress(progress);
             } : undefined;
             // Try GPU first if auto, fall back to CPU
-            // Windows: webgpu (DirectX12/DirectML), Linux: cuda
-            const devicesToTry = (requestedDevice === 'webgpu' || requestedDevice === 'cuda')
+            // Windows: dml (DirectML/DirectX12), Linux: cuda
+            const devicesToTry = (requestedDevice === 'dml' || requestedDevice === 'cuda')
                 ? [requestedDevice, 'cpu']
                 : [requestedDevice];
             for (const device of devicesToTry) {
                 try {
-                    if (isDev && device === 'webgpu') {
-                        console.log('🔧 Trying WebGPU (DirectX12) backend...');
+                    if (isDev && device === 'dml') {
+                        console.log('🔧 Trying DirectML (DirectX12) GPU backend...');
                     }
                     else if (isDev && device === 'cuda') {
                         console.log('🔧 Trying CUDA GPU backend...');
@@ -86,7 +86,7 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
                     });
                     currentDevice = device;
                     if (isDev) {
-                        const label = device === 'webgpu' ? 'GPU (WebGPU/DirectX12)'
+                        const label = device === 'dml' ? 'GPU (DirectML/DirectX12)'
                             : device === 'cuda' ? 'GPU (CUDA)'
                                 : device.toUpperCase();
                         console.log(`✅ Using ${label} backend`);
@@ -95,8 +95,8 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
                     return embedderInstance;
                 }
                 catch (deviceError) {
-                    if (isDev && (device === 'cuda' || device === 'webgpu')) {
-                        const gpuType = device === 'webgpu' ? 'WebGPU' : 'CUDA';
+                    if (isDev && (device === 'cuda' || device === 'dml')) {
+                        const gpuType = device === 'dml' ? 'DirectML' : 'CUDA';
                         console.log(`⚠️  ${gpuType} not available, falling back to CPU...`);
                     }
                     // Continue to next device in list

package/dist/core/embeddings/embedding-pipeline.d.ts CHANGED Viewed

@@ -20,8 +20,9 @@ export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
  * @param executeWithReusedStatement - Function to execute with reused prepared statement
  * @param onProgress - Callback for progress updates
  * @param config - Optional configuration override
+ * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
  */
-export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>) => Promise<void>;
+export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, skipNodeIds?: Set<string>) => Promise<void>;
 /**
  * Perform semantic search using the vector index
  *

package/dist/core/embeddings/embedding-pipeline.js CHANGED Viewed

@@ -11,7 +11,7 @@
 import { initEmbedder, embedBatch, embedText, embeddingToArray, isEmbedderReady } from './embedder.js';
 import { generateBatchEmbeddingTexts } from './text-generator.js';
 import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, } from './types.js';
-const isDev = process.env.NODE_ENV !== 'production';
+const isDev = process.env.NODE_ENV === 'development';
 /**
  * Query all embeddable nodes from KuzuDB
  * Uses table-specific queries (File has different schema than code elements)
@@ -97,8 +97,9 @@ const createVectorIndex = async (executeQuery) => {
  * @param executeWithReusedStatement - Function to execute with reused prepared statement
  * @param onProgress - Callback for progress updates
  * @param config - Optional configuration override
+ * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
  */
-export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}) => {
+export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}, skipNodeIds) => {
     const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
     try {
         // Phase 1: Load embedding model
@@ -108,11 +109,10 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
             modelDownloadPercent: 0,
         });
         await initEmbedder((modelProgress) => {
-            // Report model download progress
             const downloadPercent = modelProgress.progress ?? 0;
             onProgress({
                 phase: 'loading-model',
-                percent: Math.round(downloadPercent * 0.2), // 0-20% for model loading
+                percent: Math.round(downloadPercent * 0.2),
                 modelDownloadPercent: downloadPercent,
             });
         }, finalConfig);
@@ -125,7 +125,15 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
             console.log('🔍 Querying embeddable nodes...');
         }
         // Phase 2: Query embeddable nodes
-        const nodes = await queryEmbeddableNodes(executeQuery);
+        let nodes = await queryEmbeddableNodes(executeQuery);
+        // Incremental mode: filter out nodes that already have embeddings
+        if (skipNodeIds && skipNodeIds.size > 0) {
+            const beforeCount = nodes.length;
+            nodes = nodes.filter(n => !skipNodeIds.has(n.id));
+            if (isDev) {
+                console.log(`📦 Incremental embeddings: ${beforeCount} total, ${skipNodeIds.size} cached, ${nodes.length} to embed`);
+            }
+        }
         const totalNodes = nodes.length;
         if (isDev) {
             console.log(`📊 Found ${totalNodes} embeddable nodes`);

package/dist/core/embeddings/types.d.ts CHANGED Viewed

@@ -40,8 +40,8 @@ export interface EmbeddingConfig {
     batchSize: number;
     /** Embedding vector dimensions */
     dimensions: number;
-    /** Device to use for inference: 'auto' tries GPU first, falls back to CPU */
-    device: 'auto' | 'webgpu' | 'cuda' | 'cpu' | 'wasm';
+    /** Device to use for inference: 'auto' tries GPU first (DirectML on Windows, CUDA on Linux), falls back to CPU */
+    device: 'auto' | 'dml' | 'cuda' | 'cpu' | 'wasm';
     /** Maximum characters of code snippet to include */
     maxSnippetLength: number;
 }

package/dist/core/ingestion/call-processor.d.ts CHANGED Viewed

@@ -2,7 +2,14 @@ import { KnowledgeGraph } from '../graph/types.js';
 import { ASTCache } from './ast-cache.js';
 import { SymbolTable } from './symbol-table.js';
 import { ImportMap } from './import-processor.js';
+import type { ExtractedCall } from './workers/parse-worker.js';
 export declare const processCalls: (graph: KnowledgeGraph, files: {
     path: string;
     content: string;
 }[], astCache: ASTCache, symbolTable: SymbolTable, importMap: ImportMap, onProgress?: (current: number, total: number) => void) => Promise<void>;
+/**
+ * Fast path: resolve pre-extracted call sites from workers.
+ * No AST parsing — workers already extracted calledName + sourceId.
+ * This function only does symbol table lookups + graph mutations.
+ */
+export declare const processCallsFromExtracted: (graph: KnowledgeGraph, extractedCalls: ExtractedCall[], symbolTable: SymbolTable, importMap: ImportMap, onProgress?: (current: number, total: number) => void) => Promise<void>;

package/dist/core/ingestion/call-processor.js CHANGED Viewed

@@ -145,6 +145,8 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
                 continue;
             }
             wasReparsed = true;
+            // Cache re-parsed tree so heritage phase gets hits
+            astCache.set(file.path, tree);
         }
         let query;
         let matches;
@@ -155,8 +157,6 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
         }
         catch (queryError) {
             console.warn(`Query error for ${file.path}:`, queryError);
-            if (wasReparsed)
-                tree.delete?.();
             continue;
         }
         // 3. Process each call match
@@ -192,10 +192,7 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
                 reason: resolved.reason,
             });
         });
-        // Cleanup if re-parsed
-        if (wasReparsed) {
-            tree.delete?.();
-        }
+        // Tree is now owned by the LRU cache — no manual delete needed
     }
 };
 /**
@@ -207,27 +204,27 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
  * Returns confidence score so agents know what to trust.
  */
 const resolveCallTarget = (calledName, currentFile, symbolTable, importMap) => {
-    // Strategy A: Check imported files (HIGH confidence - we know the import chain)
-    const importedFiles = importMap.get(currentFile);
-    if (importedFiles) {
-        for (const importedFile of importedFiles) {
-            const nodeId = symbolTable.lookupExact(importedFile, calledName);
-            if (nodeId) {
-                return { nodeId, confidence: 0.9, reason: 'import-resolved' };
-            }
-        }
-    }
-    // Strategy B: Check local file (HIGH confidence - same file definition)
+    // Strategy B first (cheapest — single map lookup): Check local file
     const localNodeId = symbolTable.lookupExact(currentFile, calledName);
     if (localNodeId) {
         return { nodeId: localNodeId, confidence: 0.85, reason: 'same-file' };
     }
-    // Strategy C: Fuzzy global search (LOW confidence - just matching by name)
-    const fuzzyMatches = symbolTable.lookupFuzzy(calledName);
-    if (fuzzyMatches.length > 0) {
-        // Lower confidence if multiple matches exist (more ambiguous)
-        const confidence = fuzzyMatches.length === 1 ? 0.5 : 0.3;
-        return { nodeId: fuzzyMatches[0].nodeId, confidence, reason: 'fuzzy-global' };
+    // Strategy A: Check if any definition of calledName is in an imported file
+    // Reversed: instead of iterating all imports and checking each, get all definitions
+    // and check if any is imported. O(definitions) instead of O(imports).
+    const allDefs = symbolTable.lookupFuzzy(calledName);
+    if (allDefs.length > 0) {
+        const importedFiles = importMap.get(currentFile);
+        if (importedFiles) {
+            for (const def of allDefs) {
+                if (importedFiles.has(def.filePath)) {
+                    return { nodeId: def.nodeId, confidence: 0.9, reason: 'import-resolved' };
+                }
+            }
+        }
+        // Strategy C: Fuzzy global (no import match found)
+        const confidence = allDefs.length === 1 ? 0.5 : 0.3;
+        return { nodeId: allDefs[0].nodeId, confidence, reason: 'fuzzy-global' };
     }
     return null;
 };
@@ -267,3 +264,44 @@ const isBuiltInOrNoise = (name) => {
     ]);
     return builtIns.has(name);
 };
+/**
+ * Fast path: resolve pre-extracted call sites from workers.
+ * No AST parsing — workers already extracted calledName + sourceId.
+ * This function only does symbol table lookups + graph mutations.
+ */
+export const processCallsFromExtracted = async (graph, extractedCalls, symbolTable, importMap, onProgress) => {
+    // Group by file for progress reporting
+    const byFile = new Map();
+    for (const call of extractedCalls) {
+        let list = byFile.get(call.filePath);
+        if (!list) {
+            list = [];
+            byFile.set(call.filePath, list);
+        }
+        list.push(call);
+    }
+    const totalFiles = byFile.size;
+    let filesProcessed = 0;
+    for (const [_filePath, calls] of byFile) {
+        filesProcessed++;
+        if (filesProcessed % 100 === 0) {
+            onProgress?.(filesProcessed, totalFiles);
+            await yieldToEventLoop();
+        }
+        for (const call of calls) {
+            const resolved = resolveCallTarget(call.calledName, call.filePath, symbolTable, importMap);
+            if (!resolved)
+                continue;
+            const relId = generateId('CALLS', `${call.sourceId}:${call.calledName}->${resolved.nodeId}`);
+            graph.addRelationship({
+                id: relId,
+                sourceId: call.sourceId,
+                targetId: resolved.nodeId,
+                type: 'CALLS',
+                confidence: resolved.confidence,
+                reason: resolved.reason,
+            });
+        }
+    }
+    onProgress?.(totalFiles, totalFiles);
+};

package/dist/core/ingestion/community-processor.js CHANGED Viewed

@@ -103,9 +103,19 @@ const buildGraphologyGraph = (knowledgeGraph) => {
     const graph = new Graph({ type: 'undirected', allowSelfLoops: false });
     // Symbol types that should be clustered
     const symbolTypes = new Set(['Function', 'Class', 'Method', 'Interface']);
-    // Add symbol nodes
+    // First pass: collect which nodes participate in clustering edges
+    const clusteringRelTypes = new Set(['CALLS', 'EXTENDS', 'IMPLEMENTS']);
+    const connectedNodes = new Set();
+    knowledgeGraph.relationships.forEach(rel => {
+        if (clusteringRelTypes.has(rel.type) && rel.sourceId !== rel.targetId) {
+            connectedNodes.add(rel.sourceId);
+            connectedNodes.add(rel.targetId);
+        }
+    });
+    // Only add nodes that have at least one clustering edge
+    // Isolated nodes would just become singletons (skipped anyway)
     knowledgeGraph.nodes.forEach(node => {
-        if (symbolTypes.has(node.label)) {
+        if (symbolTypes.has(node.label) && connectedNodes.has(node.id)) {
             graph.addNode(node.id, {
                 name: node.properties.name,
                 filePath: node.properties.filePath,
@@ -113,15 +123,10 @@ const buildGraphologyGraph = (knowledgeGraph) => {
             });
         }
     });
-    // Add CALLS edges (primary clustering signal)
-    // We can also include EXTENDS/IMPLEMENTS for OOP clustering
-    const clusteringRelTypes = new Set(['CALLS', 'EXTENDS', 'IMPLEMENTS']);
+    // Add edges
     knowledgeGraph.relationships.forEach(rel => {
         if (clusteringRelTypes.has(rel.type)) {
-            // Only add edge if both nodes exist in our symbol graph
-            // Also skip self-loops (recursive calls) - not allowed in undirected graph
             if (graph.hasNode(rel.sourceId) && graph.hasNode(rel.targetId) && rel.sourceId !== rel.targetId) {
-                // Avoid duplicate edges
                 if (!graph.hasEdge(rel.sourceId, rel.targetId)) {
                     graph.addEdge(rel.sourceId, rel.targetId);
                 }
@@ -241,29 +246,32 @@ const findCommonPrefix = (strings) => {
 // HELPER: Calculate community cohesion
 // ============================================================================
 /**
- * Calculate cohesion score (0-1) based on internal edge density
- * Higher cohesion = more internal connections relative to size
+ * Estimate cohesion score (0-1) based on internal edge density.
+ * Uses sampling for large communities to avoid O(N^2) cost.
  */
 const calculateCohesion = (memberIds, graph) => {
     if (memberIds.length <= 1)
         return 1.0;
     const memberSet = new Set(memberIds);
+    // Sample up to 50 members for large communities
+    const SAMPLE_SIZE = 50;
+    const sample = memberIds.length <= SAMPLE_SIZE
+        ? memberIds
+        : memberIds.slice(0, SAMPLE_SIZE);
     let internalEdges = 0;
-    // Count edges within the community
-    memberIds.forEach(nodeId => {
-        if (graph.hasNode(nodeId)) {
-            graph.forEachNeighbor(nodeId, neighbor => {
-                if (memberSet.has(neighbor)) {
-                    internalEdges++;
-                }
-            });
-        }
-    });
-    // Each edge is counted twice (once from each end), so divide by 2
-    internalEdges = internalEdges / 2;
-    // Maximum possible internal edges for n nodes: n*(n-1)/2
-    const maxPossibleEdges = (memberIds.length * (memberIds.length - 1)) / 2;
-    if (maxPossibleEdges === 0)
+    let totalEdges = 0;
+    for (const nodeId of sample) {
+        if (!graph.hasNode(nodeId))
+            continue;
+        graph.forEachNeighbor(nodeId, (neighbor) => {
+            totalEdges++;
+            if (memberSet.has(neighbor)) {
+                internalEdges++;
+            }
+        });
+    }
+    // Cohesion = fraction of edges that stay internal
+    if (totalEdges === 0)
         return 1.0;
-    return Math.min(1.0, internalEdges / maxPossibleEdges);
+    return Math.min(1.0, internalEdges / totalEdges);
 };

package/dist/core/ingestion/filesystem-walker.js CHANGED Viewed

@@ -2,6 +2,7 @@ import fs from 'fs/promises';
 import path from 'path';
 import { glob } from 'glob';
 import { shouldIgnorePath } from '../../config/ignore-service.js';
+const READ_CONCURRENCY = 32;
 export const walkRepository = async (repoPath, onProgress) => {
     const files = await glob('**/*', {
         cwd: repoPath,
@@ -10,16 +11,20 @@ export const walkRepository = async (repoPath, onProgress) => {
     });
     const filtered = files.filter(file => !shouldIgnorePath(file));
     const entries = [];
-    for (let i = 0; i < filtered.length; i++) {
-        const relativePath = filtered[i];
-        const fullPath = path.join(repoPath, relativePath);
-        try {
-            const content = await fs.readFile(fullPath, 'utf-8');
-            entries.push({ path: relativePath.replace(/\\/g, '/'), content });
-            onProgress?.(i + 1, filtered.length, relativePath);
-        }
-        catch {
-            onProgress?.(i + 1, filtered.length, relativePath);
+    let processed = 0;
+    for (let start = 0; start < filtered.length; start += READ_CONCURRENCY) {
+        const batch = filtered.slice(start, start + READ_CONCURRENCY);
+        const results = await Promise.allSettled(batch.map(relativePath => fs.readFile(path.join(repoPath, relativePath), 'utf-8')
+            .then(content => ({ path: relativePath.replace(/\\/g, '/'), content }))));
+        for (const result of results) {
+            processed++;
+            if (result.status === 'fulfilled') {
+                entries.push(result.value);
+                onProgress?.(processed, filtered.length, result.value.path);
+            }
+            else {
+                onProgress?.(processed, filtered.length, batch[results.indexOf(result)]);
+            }
         }
     }
     return entries;

package/dist/core/ingestion/heritage-processor.d.ts CHANGED Viewed

@@ -8,7 +8,13 @@
 import { KnowledgeGraph } from '../graph/types.js';
 import { ASTCache } from './ast-cache.js';
 import { SymbolTable } from './symbol-table.js';
+import type { ExtractedHeritage } from './workers/parse-worker.js';
 export declare const processHeritage: (graph: KnowledgeGraph, files: {
     path: string;
     content: string;
 }[], astCache: ASTCache, symbolTable: SymbolTable, onProgress?: (current: number, total: number) => void) => Promise<void>;
+/**
+ * Fast path: resolve pre-extracted heritage from workers.
+ * No AST parsing — workers already extracted className + parentName + kind.
+ */
+export declare const processHeritageFromExtracted: (graph: KnowledgeGraph, extractedHeritage: ExtractedHeritage[], symbolTable: SymbolTable, onProgress?: (current: number, total: number) => void) => Promise<void>;