npm - gitnexus - Versions diffs - 1.2.8 → 1.3.0 - Mend

gitnexus 1.2.8 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

package/README.md +194 -186
package/dist/cli/ai-context.js +71 -71
package/dist/cli/analyze.js +69 -28
package/dist/cli/index.js +20 -0
package/dist/cli/setup.js +8 -1
package/dist/cli/view.d.ts +13 -0
package/dist/cli/view.js +59 -0
package/dist/core/augmentation/engine.js +20 -20
package/dist/core/embeddings/embedding-pipeline.js +26 -26
package/dist/core/graph/graph.js +5 -0
package/dist/core/graph/html-graph-viewer.d.ts +15 -0
package/dist/core/graph/html-graph-viewer.js +542 -0
package/dist/core/graph/html-graph-viewer.test.d.ts +1 -0
package/dist/core/graph/html-graph-viewer.test.js +67 -0
package/dist/core/graph/types.d.ts +12 -1
package/dist/core/ingestion/call-processor.js +52 -32
package/dist/core/ingestion/cluster-enricher.js +16 -16
package/dist/core/ingestion/community-processor.js +75 -40
package/dist/core/ingestion/filesystem-walker.d.ts +23 -0
package/dist/core/ingestion/filesystem-walker.js +38 -3
package/dist/core/ingestion/import-processor.d.ts +11 -3
package/dist/core/ingestion/import-processor.js +27 -11
package/dist/core/ingestion/parsing-processor.js +2 -4
package/dist/core/ingestion/pipeline.js +142 -135
package/dist/core/ingestion/process-processor.js +12 -11
package/dist/core/ingestion/workers/parse-worker.js +67 -6
package/dist/core/ingestion/workers/worker-pool.d.ts +3 -9
package/dist/core/ingestion/workers/worker-pool.js +39 -18
package/dist/core/kuzu/csv-generator.d.ts +15 -8
package/dist/core/kuzu/csv-generator.js +258 -196
package/dist/core/kuzu/kuzu-adapter.d.ts +1 -4
package/dist/core/kuzu/kuzu-adapter.js +84 -72
package/dist/core/kuzu/schema.d.ts +1 -1
package/dist/core/kuzu/schema.js +266 -256
package/dist/core/search/bm25-index.js +5 -5
package/dist/core/search/hybrid-search.js +3 -3
package/dist/core/wiki/graph-queries.js +52 -52
package/dist/core/wiki/html-viewer.js +192 -192
package/dist/core/wiki/prompts.js +82 -82
package/dist/mcp/core/embedder.js +8 -4
package/dist/mcp/local/local-backend.d.ts +6 -0
package/dist/mcp/local/local-backend.js +224 -117
package/dist/mcp/resources.js +42 -42
package/dist/mcp/server.js +16 -16
package/dist/mcp/tools.js +86 -77
package/dist/server/api.d.ts +4 -2
package/dist/server/api.js +253 -83
package/dist/types/pipeline.d.ts +6 -2
package/dist/types/pipeline.js +6 -4
package/hooks/claude/gitnexus-hook.cjs +135 -135
package/hooks/claude/pre-tool-use.sh +78 -78
package/hooks/claude/session-start.sh +42 -42
package/package.json +82 -82
package/skills/debugging.md +85 -85
package/skills/exploring.md +75 -75
package/skills/impact-analysis.md +94 -94
package/skills/refactoring.md +113 -113
package/vendor/leiden/index.cjs +355 -355
package/vendor/leiden/utils.cjs +392 -392

package/dist/core/ingestion/pipeline.js CHANGED Viewed

@@ -1,34 +1,41 @@
 import { createKnowledgeGraph } from '../graph/graph.js';
 import { processStructure } from './structure-processor.js';
 import { processParsing } from './parsing-processor.js';
-import { processImports, processImportsFromExtracted, createImportMap } from './import-processor.js';
+import { processImports, processImportsFromExtracted, createImportMap, buildImportResolutionContext } from './import-processor.js';
 import { processCalls, processCallsFromExtracted } from './call-processor.js';
 import { processHeritage, processHeritageFromExtracted } from './heritage-processor.js';
 import { processCommunities } from './community-processor.js';
 import { processProcesses } from './process-processor.js';
 import { createSymbolTable } from './symbol-table.js';
 import { createASTCache } from './ast-cache.js';
-import { walkRepository } from './filesystem-walker.js';
+import { walkRepositoryPaths, readFileContents } from './filesystem-walker.js';
+import { getLanguageFromFilename } from './utils.js';
 import { createWorkerPool } from './workers/worker-pool.js';
 const isDev = process.env.NODE_ENV === 'development';
+/** Max bytes of source content to load per parse chunk. Each chunk's source +
+ *  parsed ASTs + extracted records + worker serialization overhead all live in
+ *  memory simultaneously, so this must be conservative. 20MB source ≈ 200-400MB
+ *  peak working memory per chunk after parse expansion. */
+const CHUNK_BYTE_BUDGET = 20 * 1024 * 1024; // 20MB
+/** Max AST trees to keep in LRU cache */
+const AST_CACHE_CAP = 50;
 export const runPipelineFromRepo = async (repoPath, onProgress) => {
     const graph = createKnowledgeGraph();
-    const fileContents = new Map();
     const symbolTable = createSymbolTable();
-    // AST cache sized after file scan — start with a placeholder, resize after we know file count
-    let astCache = createASTCache(50);
+    let astCache = createASTCache(AST_CACHE_CAP);
     const importMap = createImportMap();
     const cleanup = () => {
         astCache.clear();
         symbolTable.clear();
     };
     try {
+        // ── Phase 1: Scan paths only (no content read) ─────────────────────
         onProgress({
             phase: 'extracting',
             percent: 0,
             message: 'Scanning repository...',
         });
-        const files = await walkRepository(repoPath, (current, total, filePath) => {
+        const scannedFiles = await walkRepositoryPaths(repoPath, (current, total, filePath) => {
             const scanProgress = Math.round((current / total) * 15);
             onProgress({
                 phase: 'extracting',
@@ -38,167 +45,165 @@ export const runPipelineFromRepo = async (repoPath, onProgress) => {
                 stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
             });
         });
-        files.forEach(f => fileContents.set(f.path, f.content));
-        // Resize AST cache to fit all files — avoids re-parsing in import/call/heritage phases
-        astCache = createASTCache(files.length);
+        const totalFiles = scannedFiles.length;
         onProgress({
             phase: 'extracting',
             percent: 15,
             message: 'Repository scanned successfully',
-            stats: { filesProcessed: files.length, totalFiles: files.length, nodesCreated: graph.nodeCount },
+            stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
         });
+        // ── Phase 2: Structure (paths only — no content needed) ────────────
         onProgress({
             phase: 'structure',
             percent: 15,
             message: 'Analyzing project structure...',
-            stats: { filesProcessed: 0, totalFiles: files.length, nodesCreated: graph.nodeCount },
+            stats: { filesProcessed: 0, totalFiles, nodesCreated: graph.nodeCount },
         });
-        const filePaths = files.map(f => f.path);
-        processStructure(graph, filePaths);
+        const allPaths = scannedFiles.map(f => f.path);
+        processStructure(graph, allPaths);
         onProgress({
             phase: 'structure',
-            percent: 30,
+            percent: 20,
             message: 'Project structure analyzed',
-            stats: { filesProcessed: files.length, totalFiles: files.length, nodesCreated: graph.nodeCount },
+            stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
         });
+        // ── Phase 3+4: Chunked read + parse ────────────────────────────────
+        // Group parseable files into byte-budget chunks so only ~20MB of source
+        // is in memory at a time. Each chunk is: read → parse → extract → free.
+        const parseableScanned = scannedFiles.filter(f => getLanguageFromFilename(f.path));
+        const totalParseable = parseableScanned.length;
+        // Build byte-budget chunks
+        const chunks = [];
+        let currentChunk = [];
+        let currentBytes = 0;
+        for (const file of parseableScanned) {
+            if (currentChunk.length > 0 && currentBytes + file.size > CHUNK_BYTE_BUDGET) {
+                chunks.push(currentChunk);
+                currentChunk = [];
+                currentBytes = 0;
+            }
+            currentChunk.push(file.path);
+            currentBytes += file.size;
+        }
+        if (currentChunk.length > 0)
+            chunks.push(currentChunk);
+        const numChunks = chunks.length;
+        if (isDev) {
+            const totalMB = parseableScanned.reduce((s, f) => s + f.size, 0) / (1024 * 1024);
+            console.log(`📂 Scan: ${totalFiles} paths, ${totalParseable} parseable (${totalMB.toFixed(0)}MB), ${numChunks} chunks @ ${CHUNK_BYTE_BUDGET / (1024 * 1024)}MB budget`);
+        }
         onProgress({
             phase: 'parsing',
-            percent: 30,
-            message: 'Parsing code definitions...',
-            stats: { filesProcessed: 0, totalFiles: files.length, nodesCreated: graph.nodeCount },
+            percent: 20,
+            message: `Parsing ${totalParseable} files in ${numChunks} chunk${numChunks !== 1 ? 's' : ''}...`,
+            stats: { filesProcessed: 0, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
         });
-        // Create worker pool for parallel parsing, with graceful fallback
+        // Create worker pool once, reuse across chunks
         let workerPool;
         try {
             const workerUrl = new URL('./workers/parse-worker.js', import.meta.url);
             workerPool = createWorkerPool(workerUrl);
         }
         catch (err) {
-            // Worker pool creation failed (e.g., single core) — sequential fallback
+            // Worker pool creation failed — sequential fallback
         }
-        let workerData = null;
+        let filesParsedSoFar = 0;
+        // AST cache sized for one chunk (sequential fallback uses it for import/call/heritage)
+        const maxChunkFiles = chunks.reduce((max, c) => Math.max(max, c.length), 0);
+        astCache = createASTCache(maxChunkFiles);
+        // Build import resolution context once — suffix index, file lists, resolve cache.
+        // Reused across all chunks to avoid rebuilding O(files × path_depth) structures.
+        const importCtx = buildImportResolutionContext(allPaths);
+        const allPathObjects = allPaths.map(p => ({ path: p }));
+        // Single-pass: parse + resolve imports/calls/heritage per chunk.
+        // Calls/heritage use the symbol table built so far (symbols from earlier chunks
+        // are already registered). This trades ~5% cross-chunk resolution accuracy for
+        // 200-400MB less memory — critical for Linux-kernel-scale repos.
+        const sequentialChunkPaths = [];
         try {
-            workerData = await processParsing(graph, files, symbolTable, astCache, (current, total, filePath) => {
-                const parsingProgress = 30 + ((current / total) * 40);
-                onProgress({
-                    phase: 'parsing',
-                    percent: Math.round(parsingProgress),
-                    message: 'Parsing code definitions...',
-                    detail: filePath,
-                    stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
-                });
-            }, workerPool);
+            for (let chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) {
+                const chunkPaths = chunks[chunkIdx];
+                // Read content for this chunk only
+                const chunkContents = await readFileContents(repoPath, chunkPaths);
+                const chunkFiles = chunkPaths
+                    .filter(p => chunkContents.has(p))
+                    .map(p => ({ path: p, content: chunkContents.get(p) }));
+                // Parse this chunk (workers or sequential fallback)
+                const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, (current, _total, filePath) => {
+                    const globalCurrent = filesParsedSoFar + current;
+                    const parsingProgress = 20 + ((globalCurrent / totalParseable) * 62);
+                    onProgress({
+                        phase: 'parsing',
+                        percent: Math.round(parsingProgress),
+                        message: `Parsing chunk ${chunkIdx + 1}/${numChunks}...`,
+                        detail: filePath,
+                        stats: { filesProcessed: globalCurrent, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
+                    });
+                }, workerPool);
+                if (chunkWorkerData) {
+                    // Imports
+                    await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, importMap, undefined, repoPath, importCtx);
+                    // Calls — resolve immediately, then free the array
+                    if (chunkWorkerData.calls.length > 0) {
+                        await processCallsFromExtracted(graph, chunkWorkerData.calls, symbolTable, importMap);
+                    }
+                    // Heritage — resolve immediately, then free
+                    if (chunkWorkerData.heritage.length > 0) {
+                        await processHeritageFromExtracted(graph, chunkWorkerData.heritage, symbolTable);
+                    }
+                }
+                else {
+                    await processImports(graph, chunkFiles, astCache, importMap, undefined, repoPath, allPaths);
+                    sequentialChunkPaths.push(chunkPaths);
+                }
+                filesParsedSoFar += chunkFiles.length;
+                // Clear AST cache between chunks to free memory
+                astCache.clear();
+                // chunkContents + chunkFiles + chunkWorkerData go out of scope → GC reclaims
+            }
         }
         finally {
             await workerPool?.terminate();
         }
-        onProgress({
-            phase: 'imports',
-            percent: 70,
-            message: 'Resolving imports...',
-            stats: { filesProcessed: 0, totalFiles: files.length, nodesCreated: graph.nodeCount },
-        });
-        if (workerData) {
-            // Fast path: imports already extracted by workers, just resolve paths
-            await processImportsFromExtracted(graph, files, workerData.imports, importMap, (current, total) => {
-                const importProgress = 70 + ((current / total) * 12);
-                onProgress({
-                    phase: 'imports',
-                    percent: Math.round(importProgress),
-                    message: 'Resolving imports...',
-                    stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
-                });
-            }, repoPath);
-        }
-        else {
-            // Fallback: full parse + resolve (sequential path)
-            await processImports(graph, files, astCache, importMap, (current, total) => {
-                const importProgress = 70 + ((current / total) * 12);
-                onProgress({
-                    phase: 'imports',
-                    percent: Math.round(importProgress),
-                    message: 'Resolving imports...',
-                    stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
-                });
-            }, repoPath);
+        // Sequential fallback chunks: re-read source for call/heritage resolution
+        for (const chunkPaths of sequentialChunkPaths) {
+            const chunkContents = await readFileContents(repoPath, chunkPaths);
+            const chunkFiles = chunkPaths
+                .filter(p => chunkContents.has(p))
+                .map(p => ({ path: p, content: chunkContents.get(p) }));
+            astCache = createASTCache(chunkFiles.length);
+            await processCalls(graph, chunkFiles, astCache, symbolTable, importMap);
+            await processHeritage(graph, chunkFiles, astCache, symbolTable);
+            astCache.clear();
         }
+        // Free import resolution context — suffix index + resolve cache no longer needed
+        // (allPathObjects and importCtx hold ~94MB+ for large repos)
+        allPathObjects.length = 0;
+        importCtx.resolveCache.clear();
+        importCtx.suffixIndex = null;
+        importCtx.normalizedFileList = null;
         if (isDev) {
-            const importsCount = graph.relationships.filter(r => r.type === 'IMPORTS').length;
-            console.log(`📊 Pipeline: After import phase, graph has ${importsCount} IMPORTS relationships (total: ${graph.relationshipCount})`);
-        }
-        onProgress({
-            phase: 'calls',
-            percent: 82,
-            message: 'Tracing function calls...',
-            stats: { filesProcessed: 0, totalFiles: files.length, nodesCreated: graph.nodeCount },
-        });
-        if (workerData) {
-            // Fast path: calls already extracted by workers, just resolve targets
-            await processCallsFromExtracted(graph, workerData.calls, symbolTable, importMap, (current, total) => {
-                const callProgress = 82 + ((current / total) * 10);
-                onProgress({
-                    phase: 'calls',
-                    percent: Math.round(callProgress),
-                    message: 'Tracing function calls...',
-                    stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
-                });
-            });
-        }
-        else {
-            // Fallback: full parse + resolve (sequential path)
-            await processCalls(graph, files, astCache, symbolTable, importMap, (current, total) => {
-                const callProgress = 82 + ((current / total) * 10);
-                onProgress({
-                    phase: 'calls',
-                    percent: Math.round(callProgress),
-                    message: 'Tracing function calls...',
-                    stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
-                });
-            });
-        }
-        onProgress({
-            phase: 'heritage',
-            percent: 92,
-            message: 'Extracting class inheritance...',
-            stats: { filesProcessed: 0, totalFiles: files.length, nodesCreated: graph.nodeCount },
-        });
-        if (workerData) {
-            // Fast path: heritage already extracted by workers, just resolve symbols
-            await processHeritageFromExtracted(graph, workerData.heritage, symbolTable, (current, total) => {
-                const heritageProgress = 88 + ((current / total) * 4);
-                onProgress({
-                    phase: 'heritage',
-                    percent: Math.round(heritageProgress),
-                    message: 'Extracting class inheritance...',
-                    stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
-                });
-            });
-        }
-        else {
-            // Fallback: full parse + resolve (sequential path)
-            await processHeritage(graph, files, astCache, symbolTable, (current, total) => {
-                const heritageProgress = 88 + ((current / total) * 4);
-                onProgress({
-                    phase: 'heritage',
-                    percent: Math.round(heritageProgress),
-                    message: 'Extracting class inheritance...',
-                    stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
-                });
-            });
+            let importsCount = 0;
+            for (const r of graph.iterRelationships()) {
+                if (r.type === 'IMPORTS')
+                    importsCount++;
+            }
+            console.log(`📊 Pipeline: graph has ${importsCount} IMPORTS, ${graph.relationshipCount} total relationships`);
         }
+        // ── Phase 5: Communities ───────────────────────────────────────────
         onProgress({
             phase: 'communities',
-            percent: 92,
+            percent: 82,
             message: 'Detecting code communities...',
-            stats: { filesProcessed: files.length, totalFiles: files.length, nodesCreated: graph.nodeCount },
+            stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
         });
         const communityResult = await processCommunities(graph, (message, progress) => {
-            const communityProgress = 92 + (progress * 0.06);
+            const communityProgress = 82 + (progress * 0.10);
             onProgress({
                 phase: 'communities',
                 percent: Math.round(communityProgress),
                 message,
-                stats: { filesProcessed: files.length, totalFiles: files.length, nodesCreated: graph.nodeCount },
+                stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
             });
         });
         if (isDev) {
@@ -227,22 +232,24 @@ export const runPipelineFromRepo = async (repoPath, onProgress) => {
                 reason: 'leiden-algorithm',
             });
         });
+        // ── Phase 6: Processes ─────────────────────────────────────────────
         onProgress({
             phase: 'processes',
-            percent: 98,
+            percent: 94,
             message: 'Detecting execution flows...',
-            stats: { filesProcessed: files.length, totalFiles: files.length, nodesCreated: graph.nodeCount },
+            stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
         });
-        // Dynamic process cap based on codebase size
-        const symbolCount = graph.nodes.filter(n => n.label !== 'File').length;
+        let symbolCount = 0;
+        graph.forEachNode(n => { if (n.label !== 'File')
+            symbolCount++; });
         const dynamicMaxProcesses = Math.max(20, Math.min(300, Math.round(symbolCount / 10)));
         const processResult = await processProcesses(graph, communityResult.memberships, (message, progress) => {
-            const processProgress = 98 + (progress * 0.01);
+            const processProgress = 94 + (progress * 0.05);
             onProgress({
                 phase: 'processes',
                 percent: Math.round(processProgress),
                 message,
-                stats: { filesProcessed: files.length, totalFiles: files.length, nodesCreated: graph.nodeCount },
+                stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
             });
         }, { maxProcesses: dynamicMaxProcesses, minSteps: 3 });
         if (isDev) {
@@ -280,13 +287,13 @@ export const runPipelineFromRepo = async (repoPath, onProgress) => {
             percent: 100,
             message: `Graph complete! ${communityResult.stats.totalCommunities} communities, ${processResult.stats.totalProcesses} processes detected.`,
             stats: {
-                filesProcessed: files.length,
-                totalFiles: files.length,
+                filesProcessed: totalFiles,
+                totalFiles,
                 nodesCreated: graph.nodeCount
             },
         });
         astCache.clear();
-        return { graph, fileContents, communityResult, processResult };
+        return { graph, repoPath, totalFileCount: totalFiles, communityResult, processResult };
     }
     catch (error) {
         cleanup();

package/dist/core/ingestion/process-processor.js CHANGED Viewed

@@ -34,7 +34,8 @@ export const processProcesses = async (knowledgeGraph, memberships, onProgress,
     const callsEdges = buildCallsGraph(knowledgeGraph);
     const reverseCallsEdges = buildReverseCallsGraph(knowledgeGraph);
     const nodeMap = new Map();
-    knowledgeGraph.nodes.forEach(n => nodeMap.set(n.id, n));
+    for (const n of knowledgeGraph.iterNodes())
+        nodeMap.set(n.id, n);
     // Step 1: Find entry points (functions that call others but have few callers)
     const entryPoints = findEntryPoints(knowledgeGraph, reverseCallsEdges, callsEdges);
     onProgress?.(`Found ${entryPoints.length} entry points, tracing flows...`, 20);
@@ -129,26 +130,26 @@ export const processProcesses = async (knowledgeGraph, memberships, onProgress,
 const MIN_TRACE_CONFIDENCE = 0.5;
 const buildCallsGraph = (graph) => {
     const adj = new Map();
-    graph.relationships.forEach(rel => {
+    for (const rel of graph.iterRelationships()) {
         if (rel.type === 'CALLS' && rel.confidence >= MIN_TRACE_CONFIDENCE) {
             if (!adj.has(rel.sourceId)) {
                 adj.set(rel.sourceId, []);
             }
             adj.get(rel.sourceId).push(rel.targetId);
         }
-    });
+    }
     return adj;
 };
 const buildReverseCallsGraph = (graph) => {
     const adj = new Map();
-    graph.relationships.forEach(rel => {
+    for (const rel of graph.iterRelationships()) {
         if (rel.type === 'CALLS' && rel.confidence >= MIN_TRACE_CONFIDENCE) {
             if (!adj.has(rel.targetId)) {
                 adj.set(rel.targetId, []);
             }
             adj.get(rel.targetId).push(rel.sourceId);
         }
-    });
+    }
     return adj;
 };
 /**
@@ -164,32 +165,32 @@ const buildReverseCallsGraph = (graph) => {
 const findEntryPoints = (graph, reverseCallsEdges, callsEdges) => {
     const symbolTypes = new Set(['Function', 'Method']);
     const entryPointCandidates = [];
-    graph.nodes.forEach(node => {
+    for (const node of graph.iterNodes()) {
         if (!symbolTypes.has(node.label))
-            return;
+            continue;
         const filePath = node.properties.filePath || '';
         // Skip test files entirely
         if (isTestFile(filePath))
-            return;
+            continue;
         const callers = reverseCallsEdges.get(node.id) || [];
         const callees = callsEdges.get(node.id) || [];
         // Must have at least 1 outgoing call to trace forward
         if (callees.length === 0)
-            return;
+            continue;
         // Calculate entry point score using new scoring system
         const { score, reasons } = calculateEntryPointScore(node.properties.name, node.properties.language || 'javascript', node.properties.isExported ?? false, callers.length, callees.length, filePath // Pass filePath for framework detection
         );
         if (score > 0) {
             entryPointCandidates.push({ id: node.id, score, reasons });
         }
-    });
+    }
     // Sort by score descending and return top candidates
     const sorted = entryPointCandidates.sort((a, b) => b.score - a.score);
     // DEBUG: Log top candidates with new scoring details
     if (sorted.length > 0 && isDev) {
         console.log(`[Process] Top 10 entry point candidates (new scoring):`);
         sorted.slice(0, 10).forEach((c, i) => {
-            const node = graph.nodes.find(n => n.id === c.id);
+            const node = graph.getNode(c.id);
             const exported = node?.properties.isExported ? '✓' : '✗';
             const shortPath = node?.properties.filePath?.split('/').slice(-2).join('/') || '';
             console.log(`  ${i + 1}. ${node?.properties.name} [exported:${exported}] (${shortPath})`);

package/dist/core/ingestion/workers/parse-worker.js CHANGED Viewed

@@ -171,6 +171,7 @@ const findEnclosingFunctionId = (node, filePath) => {
     return null;
 };
 const BUILT_INS = new Set([
+    // JavaScript/TypeScript
     'console', 'log', 'warn', 'error', 'info', 'debug',
     'setTimeout', 'setInterval', 'clearTimeout', 'clearInterval',
     'parseInt', 'parseFloat', 'isNaN', 'isFinite',
@@ -189,10 +190,32 @@ const BUILT_INS = new Set([
     'push', 'pop', 'shift', 'unshift', 'sort', 'reverse',
     'keys', 'values', 'entries', 'assign', 'freeze', 'seal',
     'hasOwnProperty', 'toString', 'valueOf',
+    // Python
     'print', 'len', 'range', 'str', 'int', 'float', 'list', 'dict', 'set', 'tuple',
     'open', 'read', 'write', 'close', 'append', 'extend', 'update',
     'super', 'type', 'isinstance', 'issubclass', 'getattr', 'setattr', 'hasattr',
     'enumerate', 'zip', 'sorted', 'reversed', 'min', 'max', 'sum', 'abs',
+    // C/C++ standard library
+    'printf', 'fprintf', 'sprintf', 'snprintf', 'vprintf', 'vfprintf', 'vsprintf', 'vsnprintf',
+    'scanf', 'fscanf', 'sscanf',
+    'malloc', 'calloc', 'realloc', 'free', 'memcpy', 'memmove', 'memset', 'memcmp',
+    'strlen', 'strcpy', 'strncpy', 'strcat', 'strncat', 'strcmp', 'strncmp', 'strstr', 'strchr', 'strrchr',
+    'atoi', 'atol', 'atof', 'strtol', 'strtoul', 'strtoll', 'strtoull', 'strtod',
+    'sizeof', 'offsetof', 'typeof',
+    'assert', 'abort', 'exit', '_exit',
+    'fopen', 'fclose', 'fread', 'fwrite', 'fseek', 'ftell', 'rewind', 'fflush', 'fgets', 'fputs',
+    // Linux kernel common macros/helpers (not real call targets)
+    'likely', 'unlikely', 'BUG', 'BUG_ON', 'WARN', 'WARN_ON', 'WARN_ONCE',
+    'IS_ERR', 'PTR_ERR', 'ERR_PTR', 'IS_ERR_OR_NULL',
+    'ARRAY_SIZE', 'container_of', 'list_for_each_entry', 'list_for_each_entry_safe',
+    'min', 'max', 'clamp', 'abs', 'swap',
+    'pr_info', 'pr_warn', 'pr_err', 'pr_debug', 'pr_notice', 'pr_crit', 'pr_emerg',
+    'printk', 'dev_info', 'dev_warn', 'dev_err', 'dev_dbg',
+    'GFP_KERNEL', 'GFP_ATOMIC',
+    'spin_lock', 'spin_unlock', 'spin_lock_irqsave', 'spin_unlock_irqrestore',
+    'mutex_lock', 'mutex_unlock', 'mutex_init',
+    'kfree', 'kmalloc', 'kzalloc', 'kcalloc', 'krealloc', 'kvmalloc', 'kvfree',
+    'get', 'put',
 ]);
 // ============================================================================
 // Label detection from capture map
@@ -444,14 +467,52 @@ const processFileGroup = (files, language, queryString, result, onFileProcessed)
     }
 };
 // ============================================================================
-// Worker message handler
+// Worker message handler — supports sub-batch streaming
 // ============================================================================
-parentPort.on('message', (files) => {
+/** Accumulated result across sub-batches */
+let accumulated = {
+    nodes: [], relationships: [], symbols: [],
+    imports: [], calls: [], heritage: [], fileCount: 0,
+};
+let cumulativeProcessed = 0;
+const mergeResult = (target, src) => {
+    target.nodes.push(...src.nodes);
+    target.relationships.push(...src.relationships);
+    target.symbols.push(...src.symbols);
+    target.imports.push(...src.imports);
+    target.calls.push(...src.calls);
+    target.heritage.push(...src.heritage);
+    target.fileCount += src.fileCount;
+};
+parentPort.on('message', (msg) => {
     try {
-        const result = processBatch(files, (filesProcessed) => {
-            parentPort.postMessage({ type: 'progress', filesProcessed });
-        });
-        parentPort.postMessage({ type: 'result', data: result });
+        // Sub-batch mode: { type: 'sub-batch', files: [...] }
+        if (msg && msg.type === 'sub-batch') {
+            const result = processBatch(msg.files, (filesProcessed) => {
+                parentPort.postMessage({ type: 'progress', filesProcessed: cumulativeProcessed + filesProcessed });
+            });
+            cumulativeProcessed += result.fileCount;
+            mergeResult(accumulated, result);
+            // Signal ready for next sub-batch
+            parentPort.postMessage({ type: 'sub-batch-done' });
+            return;
+        }
+        // Flush: send accumulated results
+        if (msg && msg.type === 'flush') {
+            parentPort.postMessage({ type: 'result', data: accumulated });
+            // Reset for potential reuse
+            accumulated = { nodes: [], relationships: [], symbols: [], imports: [], calls: [], heritage: [], fileCount: 0 };
+            cumulativeProcessed = 0;
+            return;
+        }
+        // Legacy single-message mode (backward compat): array of files
+        if (Array.isArray(msg)) {
+            const result = processBatch(msg, (filesProcessed) => {
+                parentPort.postMessage({ type: 'progress', filesProcessed });
+            });
+            parentPort.postMessage({ type: 'result', data: result });
+            return;
+        }
     }
     catch (err) {
         const message = err instanceof Error ? err.message : String(err);

package/dist/core/ingestion/workers/worker-pool.d.ts CHANGED Viewed

@@ -1,22 +1,16 @@
 export interface WorkerPool {
     /**
      * Dispatch items across workers. Items are split into chunks (one per worker),
-     * each worker processes its chunk, and results are concatenated back in order.
-     *
-     * @param onProgress - Called with cumulative files processed across all workers
+     * each worker processes its chunk via sub-batches to limit peak memory,
+     * and results are concatenated back in order.
      */
     dispatch<TInput, TResult>(items: TInput[], onProgress?: (filesProcessed: number) => void): Promise<TResult[]>;
-    /**
-     * Terminate all workers. Must be called when done.
-     */
+    /** Terminate all workers. Must be called when done. */
     terminate(): Promise<void>;
     /** Number of workers in the pool */
     readonly size: number;
 }
 /**
  * Create a pool of worker threads.
- *
- * @param workerUrl - URL to the worker script (use `new URL('./parse-worker.js', import.meta.url)`)
- * @param poolSize - Number of workers (defaults to cpus - 1, minimum 1)
  */
 export declare const createWorkerPool: (workerUrl: URL, poolSize?: number) => WorkerPool;