npm - gitnexus - Versions diffs - 1.2.8 → 1.3.0 - Mend

gitnexus 1.2.8 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

package/README.md +194 -186
package/dist/cli/ai-context.js +71 -71
package/dist/cli/analyze.js +69 -28
package/dist/cli/index.js +20 -0
package/dist/cli/setup.js +8 -1
package/dist/cli/view.d.ts +13 -0
package/dist/cli/view.js +59 -0
package/dist/core/augmentation/engine.js +20 -20
package/dist/core/embeddings/embedding-pipeline.js +26 -26
package/dist/core/graph/graph.js +5 -0
package/dist/core/graph/html-graph-viewer.d.ts +15 -0
package/dist/core/graph/html-graph-viewer.js +542 -0
package/dist/core/graph/html-graph-viewer.test.d.ts +1 -0
package/dist/core/graph/html-graph-viewer.test.js +67 -0
package/dist/core/graph/types.d.ts +12 -1
package/dist/core/ingestion/call-processor.js +52 -32
package/dist/core/ingestion/cluster-enricher.js +16 -16
package/dist/core/ingestion/community-processor.js +75 -40
package/dist/core/ingestion/filesystem-walker.d.ts +23 -0
package/dist/core/ingestion/filesystem-walker.js +38 -3
package/dist/core/ingestion/import-processor.d.ts +11 -3
package/dist/core/ingestion/import-processor.js +27 -11
package/dist/core/ingestion/parsing-processor.js +2 -4
package/dist/core/ingestion/pipeline.js +142 -135
package/dist/core/ingestion/process-processor.js +12 -11
package/dist/core/ingestion/workers/parse-worker.js +67 -6
package/dist/core/ingestion/workers/worker-pool.d.ts +3 -9
package/dist/core/ingestion/workers/worker-pool.js +39 -18
package/dist/core/kuzu/csv-generator.d.ts +15 -8
package/dist/core/kuzu/csv-generator.js +258 -196
package/dist/core/kuzu/kuzu-adapter.d.ts +1 -4
package/dist/core/kuzu/kuzu-adapter.js +84 -72
package/dist/core/kuzu/schema.d.ts +1 -1
package/dist/core/kuzu/schema.js +266 -256
package/dist/core/search/bm25-index.js +5 -5
package/dist/core/search/hybrid-search.js +3 -3
package/dist/core/wiki/graph-queries.js +52 -52
package/dist/core/wiki/html-viewer.js +192 -192
package/dist/core/wiki/prompts.js +82 -82
package/dist/mcp/core/embedder.js +8 -4
package/dist/mcp/local/local-backend.d.ts +6 -0
package/dist/mcp/local/local-backend.js +224 -117
package/dist/mcp/resources.js +42 -42
package/dist/mcp/server.js +16 -16
package/dist/mcp/tools.js +86 -77
package/dist/server/api.d.ts +4 -2
package/dist/server/api.js +253 -83
package/dist/types/pipeline.d.ts +6 -2
package/dist/types/pipeline.js +6 -4
package/hooks/claude/gitnexus-hook.cjs +135 -135
package/hooks/claude/pre-tool-use.sh +78 -78
package/hooks/claude/session-start.sh +42 -42
package/package.json +82 -82
package/skills/debugging.md +85 -85
package/skills/exploring.md +75 -75
package/skills/impact-analysis.md +94 -94
package/skills/refactoring.md +113 -113
package/vendor/leiden/index.cjs +355 -355
package/vendor/leiden/utils.cjs +392 -392

package/dist/core/ingestion/call-processor.js CHANGED Viewed

@@ -232,38 +232,58 @@ const resolveCallTarget = (calledName, currentFile, symbolTable, importMap) => {
  * Filter out common built-in functions and noise
  * that shouldn't be tracked as calls
  */
-const isBuiltInOrNoise = (name) => {
-    const builtIns = new Set([
-        // JavaScript/TypeScript built-ins
-        'console', 'log', 'warn', 'error', 'info', 'debug',
-        'setTimeout', 'setInterval', 'clearTimeout', 'clearInterval',
-        'parseInt', 'parseFloat', 'isNaN', 'isFinite',
-        'encodeURI', 'decodeURI', 'encodeURIComponent', 'decodeURIComponent',
-        'JSON', 'parse', 'stringify',
-        'Object', 'Array', 'String', 'Number', 'Boolean', 'Symbol', 'BigInt',
-        'Map', 'Set', 'WeakMap', 'WeakSet',
-        'Promise', 'resolve', 'reject', 'then', 'catch', 'finally',
-        'Math', 'Date', 'RegExp', 'Error',
-        'require', 'import', 'export',
-        'fetch', 'Response', 'Request',
-        // React hooks and common functions
-        'useState', 'useEffect', 'useCallback', 'useMemo', 'useRef', 'useContext',
-        'useReducer', 'useLayoutEffect', 'useImperativeHandle', 'useDebugValue',
-        'createElement', 'createContext', 'createRef', 'forwardRef', 'memo', 'lazy',
-        // Common array/object methods
-        'map', 'filter', 'reduce', 'forEach', 'find', 'findIndex', 'some', 'every',
-        'includes', 'indexOf', 'slice', 'splice', 'concat', 'join', 'split',
-        'push', 'pop', 'shift', 'unshift', 'sort', 'reverse',
-        'keys', 'values', 'entries', 'assign', 'freeze', 'seal',
-        'hasOwnProperty', 'toString', 'valueOf',
-        // Python built-ins
-        'print', 'len', 'range', 'str', 'int', 'float', 'list', 'dict', 'set', 'tuple',
-        'open', 'read', 'write', 'close', 'append', 'extend', 'update',
-        'super', 'type', 'isinstance', 'issubclass', 'getattr', 'setattr', 'hasattr',
-        'enumerate', 'zip', 'sorted', 'reversed', 'min', 'max', 'sum', 'abs',
-    ]);
-    return builtIns.has(name);
-};
+/** Pre-built set (module-level singleton) to avoid re-creating per call */
+const BUILT_IN_NAMES = new Set([
+    // JavaScript/TypeScript built-ins
+    'console', 'log', 'warn', 'error', 'info', 'debug',
+    'setTimeout', 'setInterval', 'clearTimeout', 'clearInterval',
+    'parseInt', 'parseFloat', 'isNaN', 'isFinite',
+    'encodeURI', 'decodeURI', 'encodeURIComponent', 'decodeURIComponent',
+    'JSON', 'parse', 'stringify',
+    'Object', 'Array', 'String', 'Number', 'Boolean', 'Symbol', 'BigInt',
+    'Map', 'Set', 'WeakMap', 'WeakSet',
+    'Promise', 'resolve', 'reject', 'then', 'catch', 'finally',
+    'Math', 'Date', 'RegExp', 'Error',
+    'require', 'import', 'export',
+    'fetch', 'Response', 'Request',
+    // React hooks and common functions
+    'useState', 'useEffect', 'useCallback', 'useMemo', 'useRef', 'useContext',
+    'useReducer', 'useLayoutEffect', 'useImperativeHandle', 'useDebugValue',
+    'createElement', 'createContext', 'createRef', 'forwardRef', 'memo', 'lazy',
+    // Common array/object methods
+    'map', 'filter', 'reduce', 'forEach', 'find', 'findIndex', 'some', 'every',
+    'includes', 'indexOf', 'slice', 'splice', 'concat', 'join', 'split',
+    'push', 'pop', 'shift', 'unshift', 'sort', 'reverse',
+    'keys', 'values', 'entries', 'assign', 'freeze', 'seal',
+    'hasOwnProperty', 'toString', 'valueOf',
+    // Python built-ins
+    'print', 'len', 'range', 'str', 'int', 'float', 'list', 'dict', 'set', 'tuple',
+    'open', 'read', 'write', 'close', 'append', 'extend', 'update',
+    'super', 'type', 'isinstance', 'issubclass', 'getattr', 'setattr', 'hasattr',
+    'enumerate', 'zip', 'sorted', 'reversed', 'min', 'max', 'sum', 'abs',
+    // C/C++ standard library and common kernel helpers
+    'printf', 'fprintf', 'sprintf', 'snprintf', 'vprintf', 'vfprintf', 'vsprintf', 'vsnprintf',
+    'scanf', 'fscanf', 'sscanf',
+    'malloc', 'calloc', 'realloc', 'free', 'memcpy', 'memmove', 'memset', 'memcmp',
+    'strlen', 'strcpy', 'strncpy', 'strcat', 'strncat', 'strcmp', 'strncmp', 'strstr', 'strchr', 'strrchr',
+    'atoi', 'atol', 'atof', 'strtol', 'strtoul', 'strtoll', 'strtoull', 'strtod',
+    'sizeof', 'offsetof', 'typeof',
+    'assert', 'abort', 'exit', '_exit',
+    'fopen', 'fclose', 'fread', 'fwrite', 'fseek', 'ftell', 'rewind', 'fflush', 'fgets', 'fputs',
+    // Linux kernel common macros/helpers (not real call targets)
+    'likely', 'unlikely', 'BUG', 'BUG_ON', 'WARN', 'WARN_ON', 'WARN_ONCE',
+    'IS_ERR', 'PTR_ERR', 'ERR_PTR', 'IS_ERR_OR_NULL',
+    'ARRAY_SIZE', 'container_of', 'list_for_each_entry', 'list_for_each_entry_safe',
+    'min', 'max', 'clamp', 'abs', 'swap',
+    'pr_info', 'pr_warn', 'pr_err', 'pr_debug', 'pr_notice', 'pr_crit', 'pr_emerg',
+    'printk', 'dev_info', 'dev_warn', 'dev_err', 'dev_dbg',
+    'GFP_KERNEL', 'GFP_ATOMIC',
+    'spin_lock', 'spin_unlock', 'spin_lock_irqsave', 'spin_unlock_irqrestore',
+    'mutex_lock', 'mutex_unlock', 'mutex_init',
+    'kfree', 'kmalloc', 'kzalloc', 'kcalloc', 'krealloc', 'kvmalloc', 'kvfree',
+    'get', 'put',
+]);
+const isBuiltInOrNoise = (name) => BUILT_IN_NAMES.has(name);
 /**
  * Fast path: resolve pre-extracted call sites from workers.
  * No AST parsing — workers already extracted calledName + sourceId.

package/dist/core/ingestion/cluster-enricher.js CHANGED Viewed

@@ -13,12 +13,12 @@ const buildEnrichmentPrompt = (members, heuristicLabel) => {
     const memberList = limitedMembers
         .map(m => `${m.name} (${m.type})`)
         .join(', ');
-    return `Analyze this code cluster and provide a semantic name and short description.
-Heuristic: "${heuristicLabel}"
-Members: ${memberList}${members.length > 20 ? ` (+${members.length - 20} more)` : ''}
-Reply with JSON only:
+    return `Analyze this code cluster and provide a semantic name and short description.
+Heuristic: "${heuristicLabel}"
+Members: ${memberList}${members.length > 20 ? ` (+${members.length - 20} more)` : ''}
+Reply with JSON only:
 {"name": "2-4 word semantic name", "description": "One sentence describing purpose"}`;
 };
 // ============================================================================
@@ -115,18 +115,18 @@ export const enrichClustersBatch = async (communities, memberMap, llmClient, bat
             const memberList = limitedMembers
                 .map(m => `${m.name} (${m.type})`)
                 .join(', ');
-            return `Cluster ${idx + 1} (id: ${community.id}):
-Heuristic: "${community.heuristicLabel}"
+            return `Cluster ${idx + 1} (id: ${community.id}):
+Heuristic: "${community.heuristicLabel}"
 Members: ${memberList}`;
         }).join('\n\n');
-        const prompt = `Analyze these code clusters and generate semantic names, keywords, and descriptions.
-${batchPrompt}
-Output JSON array:
-[
-  {"id": "comm_X", "name": "...", "keywords": [...], "description": "..."},
-  ...
+        const prompt = `Analyze these code clusters and generate semantic names, keywords, and descriptions.
+${batchPrompt}
+Output JSON array:
+[
+  {"id": "comm_X", "name": "...", "keywords": [...], "description": "..."},
+  ...
 ]`;
         try {
             const response = await llmClient.generate(prompt);

package/dist/core/ingestion/community-processor.js CHANGED Viewed

@@ -51,23 +51,51 @@ export const getCommunityColor = (communityIndex) => {
  */
 export const processCommunities = async (knowledgeGraph, onProgress) => {
     onProgress?.('Building graph for community detection...', 0);
-    // Step 1: Build a graphology graph from the knowledge graph
-    // We only include symbol nodes (Function, Class, Method) and CALLS edges
-    const graph = buildGraphologyGraph(knowledgeGraph);
+    // Pre-check total symbol count to determine large-graph mode before building
+    let symbolCount = 0;
+    knowledgeGraph.forEachNode(node => {
+        if (node.label === 'Function' || node.label === 'Class' || node.label === 'Method' || node.label === 'Interface') {
+            symbolCount++;
+        }
+    });
+    const isLarge = symbolCount > 10_000;
+    const graph = buildGraphologyGraph(knowledgeGraph, isLarge);
     if (graph.order === 0) {
-        // No nodes to cluster
         return {
             communities: [],
             memberships: [],
             stats: { totalCommunities: 0, modularity: 0, nodesProcessed: 0 }
         };
     }
-    onProgress?.(`Running Leiden algorithm on ${graph.order} nodes...`, 30);
-    // Step 2: Run Leiden algorithm for community detection
-    const details = leiden.detailed(graph, {
-        resolution: 1.0, // Default resolution, can be tuned
-        randomWalk: true,
-    });
+    const nodeCount = graph.order;
+    const edgeCount = graph.size;
+    onProgress?.(`Running Leiden on ${nodeCount} nodes, ${edgeCount} edges${isLarge ? ` (filtered from ${symbolCount} symbols)` : ''}...`, 30);
+    // Large graphs: higher resolution + capped iterations (matching Python leidenalg default of 2).
+    // The first 2 iterations capture ~95%+ of modularity; additional iterations have diminishing returns.
+    // Timeout: abort after 60s for pathological graph structures.
+    const LEIDEN_TIMEOUT_MS = 60_000;
+    let details;
+    try {
+        details = await Promise.race([
+            Promise.resolve(leiden.detailed(graph, {
+                resolution: isLarge ? 2.0 : 1.0,
+                maxIterations: isLarge ? 3 : 0,
+            })),
+            new Promise((_, reject) => setTimeout(() => reject(new Error('Leiden timeout')), LEIDEN_TIMEOUT_MS)),
+        ]);
+    }
+    catch (e) {
+        if (e.message === 'Leiden timeout') {
+            onProgress?.('Community detection timed out, using fallback...', 60);
+            // Fallback: assign all nodes to community 0
+            const communities = {};
+            graph.forEachNode((node) => { communities[node] = 0; });
+            details = { communities, count: 1, modularity: 0 };
+        }
+        else {
+            throw e;
+        }
+    }
     onProgress?.(`Found ${details.count} communities...`, 60);
     // Step 3: Create community nodes with heuristic labels
     const communityNodes = createCommunityNodes(details.communities, details.count, graph, knowledgeGraph);
@@ -95,41 +123,48 @@ export const processCommunities = async (knowledgeGraph, onProgress) => {
 // HELPER: Build graphology graph from knowledge graph
 // ============================================================================
 /**
- * Build a graphology graph containing only symbol nodes and CALLS edges
- * This is what the Leiden algorithm will cluster
+ * Build a graphology graph containing only symbol nodes and clustering edges.
+ * For large graphs (>10K symbols), filter out low-confidence fuzzy-global edges
+ * and degree-1 nodes that add noise and massively increase Leiden runtime.
  */
-const buildGraphologyGraph = (knowledgeGraph) => {
-    // Use undirected graph for Leiden - it looks at edge density, not direction
+const MIN_CONFIDENCE_LARGE = 0.5;
+const buildGraphologyGraph = (knowledgeGraph, isLarge) => {
     const graph = new Graph({ type: 'undirected', allowSelfLoops: false });
-    // Symbol types that should be clustered
     const symbolTypes = new Set(['Function', 'Class', 'Method', 'Interface']);
-    // First pass: collect which nodes participate in clustering edges
     const clusteringRelTypes = new Set(['CALLS', 'EXTENDS', 'IMPLEMENTS']);
     const connectedNodes = new Set();
-    knowledgeGraph.relationships.forEach(rel => {
-        if (clusteringRelTypes.has(rel.type) && rel.sourceId !== rel.targetId) {
-            connectedNodes.add(rel.sourceId);
-            connectedNodes.add(rel.targetId);
-        }
+    const nodeDegree = new Map();
+    knowledgeGraph.forEachRelationship(rel => {
+        if (!clusteringRelTypes.has(rel.type) || rel.sourceId === rel.targetId)
+            return;
+        if (isLarge && rel.confidence < MIN_CONFIDENCE_LARGE)
+            return;
+        connectedNodes.add(rel.sourceId);
+        connectedNodes.add(rel.targetId);
+        nodeDegree.set(rel.sourceId, (nodeDegree.get(rel.sourceId) || 0) + 1);
+        nodeDegree.set(rel.targetId, (nodeDegree.get(rel.targetId) || 0) + 1);
     });
-    // Only add nodes that have at least one clustering edge
-    // Isolated nodes would just become singletons (skipped anyway)
-    knowledgeGraph.nodes.forEach(node => {
-        if (symbolTypes.has(node.label) && connectedNodes.has(node.id)) {
-            graph.addNode(node.id, {
-                name: node.properties.name,
-                filePath: node.properties.filePath,
-                type: node.label,
-            });
-        }
+    knowledgeGraph.forEachNode(node => {
+        if (!symbolTypes.has(node.label) || !connectedNodes.has(node.id))
+            return;
+        // For large graphs, skip degree-1 nodes — they just become singletons or
+        // get absorbed into their single neighbor's community, but cost iteration time.
+        if (isLarge && (nodeDegree.get(node.id) || 0) < 2)
+            return;
+        graph.addNode(node.id, {
+            name: node.properties.name,
+            filePath: node.properties.filePath,
+            type: node.label,
+        });
     });
-    // Add edges
-    knowledgeGraph.relationships.forEach(rel => {
-        if (clusteringRelTypes.has(rel.type)) {
-            if (graph.hasNode(rel.sourceId) && graph.hasNode(rel.targetId) && rel.sourceId !== rel.targetId) {
-                if (!graph.hasEdge(rel.sourceId, rel.targetId)) {
-                    graph.addEdge(rel.sourceId, rel.targetId);
-                }
+    knowledgeGraph.forEachRelationship(rel => {
+        if (!clusteringRelTypes.has(rel.type))
+            return;
+        if (isLarge && rel.confidence < MIN_CONFIDENCE_LARGE)
+            return;
+        if (graph.hasNode(rel.sourceId) && graph.hasNode(rel.targetId) && rel.sourceId !== rel.targetId) {
+            if (!graph.hasEdge(rel.sourceId, rel.targetId)) {
+                graph.addEdge(rel.sourceId, rel.targetId);
             }
         }
     });
@@ -152,11 +187,11 @@ const createCommunityNodes = (communities, communityCount, graph, knowledgeGraph
     });
     // Build node lookup for file paths
     const nodePathMap = new Map();
-    knowledgeGraph.nodes.forEach(node => {
+    for (const node of knowledgeGraph.iterNodes()) {
         if (node.properties.filePath) {
             nodePathMap.set(node.id, node.properties.filePath);
         }
-    });
+    }
     // Create community nodes - SKIP SINGLETONS (isolated nodes)
     const communityNodes = [];
     communityMembers.forEach((memberIds, commNum) => {

package/dist/core/ingestion/filesystem-walker.d.ts CHANGED Viewed

@@ -2,4 +2,27 @@ export interface FileEntry {
     path: string;
     content: string;
 }
+/** Lightweight entry — path + size from stat, no content in memory */
+export interface ScannedFile {
+    path: string;
+    size: number;
+}
+/** Path-only reference (for type signatures) */
+export interface FilePath {
+    path: string;
+}
+/**
+ * Phase 1: Scan repository — stat files to get paths + sizes, no content loaded.
+ * Memory: ~10MB for 100K files vs ~1GB+ with content.
+ */
+export declare const walkRepositoryPaths: (repoPath: string, onProgress?: (current: number, total: number, filePath: string) => void) => Promise<ScannedFile[]>;
+/**
+ * Phase 2: Read file contents for a specific set of relative paths.
+ * Returns a Map for O(1) lookup. Silently skips files that fail to read.
+ */
+export declare const readFileContents: (repoPath: string, relativePaths: string[]) => Promise<Map<string, string>>;
+/**
+ * Legacy API — scans and reads everything into memory.
+ * Used by sequential fallback path only.
+ */
 export declare const walkRepository: (repoPath: string, onProgress?: (current: number, total: number, filePath: string) => void) => Promise<FileEntry[]>;

package/dist/core/ingestion/filesystem-walker.js CHANGED Viewed

@@ -5,7 +5,11 @@ import { shouldIgnorePath } from '../../config/ignore-service.js';
 const READ_CONCURRENCY = 32;
 /** Skip files larger than 512KB — they're usually generated/vendored and crash tree-sitter */
 const MAX_FILE_SIZE = 512 * 1024;
-export const walkRepository = async (repoPath, onProgress) => {
+/**
+ * Phase 1: Scan repository — stat files to get paths + sizes, no content loaded.
+ * Memory: ~10MB for 100K files vs ~1GB+ with content.
+ */
+export const walkRepositoryPaths = async (repoPath, onProgress) => {
     const files = await glob('**/*', {
         cwd: repoPath,
         nodir: true,
@@ -24,8 +28,7 @@ export const walkRepository = async (repoPath, onProgress) => {
                 skippedLarge++;
                 return null;
             }
-            const content = await fs.readFile(fullPath, 'utf-8');
-            return { path: relativePath.replace(/\\/g, '/'), content };
+            return { path: relativePath.replace(/\\/g, '/'), size: stat.size };
         }));
         for (const result of results) {
             processed++;
@@ -43,3 +46,35 @@ export const walkRepository = async (repoPath, onProgress) => {
     }
     return entries;
 };
+/**
+ * Phase 2: Read file contents for a specific set of relative paths.
+ * Returns a Map for O(1) lookup. Silently skips files that fail to read.
+ */
+export const readFileContents = async (repoPath, relativePaths) => {
+    const contents = new Map();
+    for (let start = 0; start < relativePaths.length; start += READ_CONCURRENCY) {
+        const batch = relativePaths.slice(start, start + READ_CONCURRENCY);
+        const results = await Promise.allSettled(batch.map(async (relativePath) => {
+            const fullPath = path.join(repoPath, relativePath);
+            const content = await fs.readFile(fullPath, 'utf-8');
+            return { path: relativePath, content };
+        }));
+        for (const result of results) {
+            if (result.status === 'fulfilled') {
+                contents.set(result.value.path, result.value.content);
+            }
+        }
+    }
+    return contents;
+};
+/**
+ * Legacy API — scans and reads everything into memory.
+ * Used by sequential fallback path only.
+ */
+export const walkRepository = async (repoPath, onProgress) => {
+    const scanned = await walkRepositoryPaths(repoPath, onProgress);
+    const contents = await readFileContents(repoPath, scanned.map(f => f.path));
+    return scanned
+        .filter(f => contents.has(f.path))
+        .map(f => ({ path: f.path, content: contents.get(f.path) }));
+};

package/dist/core/ingestion/import-processor.d.ts CHANGED Viewed

@@ -3,6 +3,15 @@ import { ASTCache } from './ast-cache.js';
 import type { ExtractedImport } from './workers/parse-worker.js';
 export type ImportMap = Map<string, Set<string>>;
 export declare const createImportMap: () => ImportMap;
+/** Pre-built lookup structures for import resolution. Build once, reuse across chunks. */
+export interface ImportResolutionContext {
+    allFilePaths: Set<string>;
+    allFileList: string[];
+    normalizedFileList: string[];
+    suffixIndex: SuffixIndex;
+    resolveCache: Map<string, string | null>;
+}
+export declare function buildImportResolutionContext(allPaths: string[]): ImportResolutionContext;
 /**
  * Build a suffix index for O(1) endsWith lookups.
  * Maps every possible path suffix to its original file path.
@@ -23,8 +32,7 @@ export interface SuffixIndex {
 export declare const processImports: (graph: KnowledgeGraph, files: {
     path: string;
     content: string;
-}[], astCache: ASTCache, importMap: ImportMap, onProgress?: (current: number, total: number) => void, repoRoot?: string) => Promise<void>;
+}[], astCache: ASTCache, importMap: ImportMap, onProgress?: (current: number, total: number) => void, repoRoot?: string, allPaths?: string[]) => Promise<void>;
 export declare const processImportsFromExtracted: (graph: KnowledgeGraph, files: {
     path: string;
-    content: string;
-}[], extractedImports: ExtractedImport[], importMap: ImportMap, onProgress?: (current: number, total: number) => void, repoRoot?: string) => Promise<void>;
+}[], extractedImports: ExtractedImport[], importMap: ImportMap, onProgress?: (current: number, total: number) => void, repoRoot?: string, prebuiltCtx?: ImportResolutionContext) => Promise<void>;

package/dist/core/ingestion/import-processor.js CHANGED Viewed

@@ -8,6 +8,16 @@ import { getLanguageFromFilename, yieldToEventLoop } from './utils.js';
 import { SupportedLanguages } from '../../config/supported-languages.js';
 const isDev = process.env.NODE_ENV === 'development';
 export const createImportMap = () => new Map();
+/** Max entries in the resolve cache. Beyond this, the cache is cleared to bound memory.
+ *  100K entries ≈ 15MB — covers the most common import patterns. */
+const RESOLVE_CACHE_CAP = 100_000;
+export function buildImportResolutionContext(allPaths) {
+    const allFileList = allPaths;
+    const normalizedFileList = allFileList.map(p => p.replace(/\\/g, '/'));
+    const allFilePaths = new Set(allFileList);
+    const suffixIndex = buildSuffixIndex(normalizedFileList, allFileList);
+    return { allFilePaths, allFileList, normalizedFileList, suffixIndex, resolveCache: new Map() };
+}
 /**
  * Parse tsconfig.json to extract path aliases.
  * Tries tsconfig.json, tsconfig.app.json, tsconfig.base.json in order.
@@ -196,6 +206,16 @@ const resolveImportPath = (currentFile, importPath, allFiles, allFileList, norma
     if (resolveCache.has(cacheKey))
         return resolveCache.get(cacheKey) ?? null;
     const cache = (result) => {
+        // Evict oldest 20% when cap is reached instead of clearing all
+        if (resolveCache.size >= RESOLVE_CACHE_CAP) {
+            const evictCount = Math.floor(RESOLVE_CACHE_CAP * 0.2);
+            const iter = resolveCache.keys();
+            for (let i = 0; i < evictCount; i++) {
+                const key = iter.next().value;
+                if (key !== undefined)
+                    resolveCache.delete(key);
+            }
+        }
         resolveCache.set(cacheKey, result);
         return result;
     };
@@ -429,12 +449,12 @@ function resolveGoPackage(importPath, goModule, normalizedFileList, allFileList)
 // ============================================================================
 // MAIN IMPORT PROCESSOR
 // ============================================================================
-export const processImports = async (graph, files, astCache, importMap, onProgress, repoRoot) => {
-    // Create a Set of all file paths for fast lookup during resolution
-    const allFilePaths = new Set(files.map(f => f.path));
+export const processImports = async (graph, files, astCache, importMap, onProgress, repoRoot, allPaths) => {
+    // Use allPaths (full repo) when available for cross-chunk resolution, else fall back to chunk files
+    const allFileList = allPaths ?? files.map(f => f.path);
+    const allFilePaths = new Set(allFileList);
     const parser = await loadParser();
     const resolveCache = new Map();
-    const allFileList = files.map(f => f.path);
     // Pre-compute normalized file list once (forward slashes)
     const normalizedFileList = allFileList.map(p => p.replace(/\\/g, '/'));
     // Build suffix index for O(1) lookups
@@ -573,13 +593,9 @@ export const processImports = async (graph, files, astCache, importMap, onProgre
 // ============================================================================
 // FAST PATH: Resolve pre-extracted imports (no parsing needed)
 // ============================================================================
-export const processImportsFromExtracted = async (graph, files, extractedImports, importMap, onProgress, repoRoot) => {
-    const allFilePaths = new Set(files.map(f => f.path));
-    const resolveCache = new Map();
-    const allFileList = files.map(f => f.path);
-    const normalizedFileList = allFileList.map(p => p.replace(/\\/g, '/'));
-    // Build suffix index for O(1) lookups
-    const index = buildSuffixIndex(normalizedFileList, allFileList);
+export const processImportsFromExtracted = async (graph, files, extractedImports, importMap, onProgress, repoRoot, prebuiltCtx) => {
+    const ctx = prebuiltCtx ?? buildImportResolutionContext(files.map(f => f.path));
+    const { allFilePaths, allFileList, normalizedFileList, suffixIndex: index, resolveCache } = ctx;
     let totalImportsFound = 0;
     let totalImportsResolved = 0;
     const effectiveRoot = repoRoot || '';

package/dist/core/ingestion/parsing-processor.js CHANGED Viewed

@@ -106,15 +106,13 @@ const processParsingWithWorkers = async (graph, files, symbolTable, astCache, wo
     const parseableFiles = [];
     for (const file of files) {
         const lang = getLanguageFromFilename(file.path);
-        if (lang) {
+        if (lang)
             parseableFiles.push({ path: file.path, content: file.content });
-        }
     }
     if (parseableFiles.length === 0)
         return { imports: [], calls: [], heritage: [] };
     const total = files.length;
-    // Dispatch to worker pool — pool handles splitting into chunks
-    // Workers send progress messages during parsing so the bar updates smoothly
+    // Dispatch to worker pool — pool handles splitting into chunks and sub-batching
     const chunkResults = await workerPool.dispatch(parseableFiles, (filesProcessed) => {
         onFileProgress?.(Math.min(filesProcessed, total), total, 'Parsing...');
     });