npm - @veewo/gitnexus - Versions diffs - 1.3.4 - Mend

@veewo/gitnexus 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (231) hide show

package/README.md +234 -0
package/dist/benchmark/agent-context/evaluators.d.ts +9 -0
package/dist/benchmark/agent-context/evaluators.js +196 -0
package/dist/benchmark/agent-context/evaluators.test.d.ts +1 -0
package/dist/benchmark/agent-context/evaluators.test.js +39 -0
package/dist/benchmark/agent-context/io.d.ts +2 -0
package/dist/benchmark/agent-context/io.js +23 -0
package/dist/benchmark/agent-context/io.test.d.ts +1 -0
package/dist/benchmark/agent-context/io.test.js +19 -0
package/dist/benchmark/agent-context/report.d.ts +2 -0
package/dist/benchmark/agent-context/report.js +59 -0
package/dist/benchmark/agent-context/report.test.d.ts +1 -0
package/dist/benchmark/agent-context/report.test.js +85 -0
package/dist/benchmark/agent-context/runner.d.ts +46 -0
package/dist/benchmark/agent-context/runner.js +111 -0
package/dist/benchmark/agent-context/runner.test.d.ts +1 -0
package/dist/benchmark/agent-context/runner.test.js +79 -0
package/dist/benchmark/agent-context/tool-runner.d.ts +7 -0
package/dist/benchmark/agent-context/tool-runner.js +18 -0
package/dist/benchmark/agent-context/tool-runner.test.d.ts +1 -0
package/dist/benchmark/agent-context/tool-runner.test.js +11 -0
package/dist/benchmark/agent-context/types.d.ts +40 -0
package/dist/benchmark/agent-context/types.js +1 -0
package/dist/benchmark/analyze-runner.d.ts +16 -0
package/dist/benchmark/analyze-runner.js +51 -0
package/dist/benchmark/analyze-runner.test.d.ts +1 -0
package/dist/benchmark/analyze-runner.test.js +37 -0
package/dist/benchmark/evaluators.d.ts +6 -0
package/dist/benchmark/evaluators.js +10 -0
package/dist/benchmark/evaluators.test.d.ts +1 -0
package/dist/benchmark/evaluators.test.js +12 -0
package/dist/benchmark/io.d.ts +7 -0
package/dist/benchmark/io.js +25 -0
package/dist/benchmark/io.test.d.ts +1 -0
package/dist/benchmark/io.test.js +35 -0
package/dist/benchmark/neonspark-candidates.d.ts +19 -0
package/dist/benchmark/neonspark-candidates.js +94 -0
package/dist/benchmark/neonspark-candidates.test.d.ts +1 -0
package/dist/benchmark/neonspark-candidates.test.js +43 -0
package/dist/benchmark/neonspark-materialize.d.ts +19 -0
package/dist/benchmark/neonspark-materialize.js +111 -0
package/dist/benchmark/neonspark-materialize.test.d.ts +1 -0
package/dist/benchmark/neonspark-materialize.test.js +124 -0
package/dist/benchmark/neonspark-sync.d.ts +3 -0
package/dist/benchmark/neonspark-sync.js +53 -0
package/dist/benchmark/neonspark-sync.test.d.ts +1 -0
package/dist/benchmark/neonspark-sync.test.js +20 -0
package/dist/benchmark/report.d.ts +1 -0
package/dist/benchmark/report.js +7 -0
package/dist/benchmark/runner.d.ts +48 -0
package/dist/benchmark/runner.js +302 -0
package/dist/benchmark/runner.test.d.ts +1 -0
package/dist/benchmark/runner.test.js +50 -0
package/dist/benchmark/scoring.d.ts +16 -0
package/dist/benchmark/scoring.js +27 -0
package/dist/benchmark/scoring.test.d.ts +1 -0
package/dist/benchmark/scoring.test.js +24 -0
package/dist/benchmark/tool-runner.d.ts +6 -0
package/dist/benchmark/tool-runner.js +17 -0
package/dist/benchmark/types.d.ts +36 -0
package/dist/benchmark/types.js +1 -0
package/dist/cli/ai-context.d.ts +22 -0
package/dist/cli/ai-context.js +184 -0
package/dist/cli/ai-context.test.d.ts +1 -0
package/dist/cli/ai-context.test.js +30 -0
package/dist/cli/analyze-multi-scope-regression.test.d.ts +1 -0
package/dist/cli/analyze-multi-scope-regression.test.js +22 -0
package/dist/cli/analyze-options.d.ts +7 -0
package/dist/cli/analyze-options.js +56 -0
package/dist/cli/analyze-options.test.d.ts +1 -0
package/dist/cli/analyze-options.test.js +36 -0
package/dist/cli/analyze.d.ts +14 -0
package/dist/cli/analyze.js +384 -0
package/dist/cli/augment.d.ts +13 -0
package/dist/cli/augment.js +33 -0
package/dist/cli/benchmark-agent-context.d.ts +29 -0
package/dist/cli/benchmark-agent-context.js +61 -0
package/dist/cli/benchmark-agent-context.test.d.ts +1 -0
package/dist/cli/benchmark-agent-context.test.js +80 -0
package/dist/cli/benchmark-unity.d.ts +15 -0
package/dist/cli/benchmark-unity.js +31 -0
package/dist/cli/benchmark-unity.test.d.ts +1 -0
package/dist/cli/benchmark-unity.test.js +18 -0
package/dist/cli/claude-hooks.d.ts +22 -0
package/dist/cli/claude-hooks.js +97 -0
package/dist/cli/clean.d.ts +10 -0
package/dist/cli/clean.js +60 -0
package/dist/cli/eval-server.d.ts +30 -0
package/dist/cli/eval-server.js +372 -0
package/dist/cli/index.d.ts +2 -0
package/dist/cli/index.js +182 -0
package/dist/cli/list.d.ts +6 -0
package/dist/cli/list.js +33 -0
package/dist/cli/mcp.d.ts +8 -0
package/dist/cli/mcp.js +34 -0
package/dist/cli/repo-manager-alias.test.d.ts +1 -0
package/dist/cli/repo-manager-alias.test.js +40 -0
package/dist/cli/scope-filter.test.d.ts +1 -0
package/dist/cli/scope-filter.test.js +49 -0
package/dist/cli/serve.d.ts +4 -0
package/dist/cli/serve.js +6 -0
package/dist/cli/setup.d.ts +8 -0
package/dist/cli/setup.js +311 -0
package/dist/cli/setup.test.d.ts +1 -0
package/dist/cli/setup.test.js +31 -0
package/dist/cli/status.d.ts +6 -0
package/dist/cli/status.js +27 -0
package/dist/cli/tool.d.ts +40 -0
package/dist/cli/tool.js +94 -0
package/dist/cli/version.test.d.ts +1 -0
package/dist/cli/version.test.js +19 -0
package/dist/cli/wiki.d.ts +15 -0
package/dist/cli/wiki.js +361 -0
package/dist/config/ignore-service.d.ts +1 -0
package/dist/config/ignore-service.js +210 -0
package/dist/config/supported-languages.d.ts +12 -0
package/dist/config/supported-languages.js +15 -0
package/dist/core/augmentation/engine.d.ts +26 -0
package/dist/core/augmentation/engine.js +213 -0
package/dist/core/embeddings/embedder.d.ts +60 -0
package/dist/core/embeddings/embedder.js +251 -0
package/dist/core/embeddings/embedding-pipeline.d.ts +51 -0
package/dist/core/embeddings/embedding-pipeline.js +329 -0
package/dist/core/embeddings/index.d.ts +9 -0
package/dist/core/embeddings/index.js +9 -0
package/dist/core/embeddings/text-generator.d.ts +24 -0
package/dist/core/embeddings/text-generator.js +182 -0
package/dist/core/embeddings/types.d.ts +87 -0
package/dist/core/embeddings/types.js +32 -0
package/dist/core/graph/graph.d.ts +2 -0
package/dist/core/graph/graph.js +66 -0
package/dist/core/graph/types.d.ts +61 -0
package/dist/core/graph/types.js +1 -0
package/dist/core/ingestion/ast-cache.d.ts +11 -0
package/dist/core/ingestion/ast-cache.js +34 -0
package/dist/core/ingestion/call-processor.d.ts +15 -0
package/dist/core/ingestion/call-processor.js +327 -0
package/dist/core/ingestion/cluster-enricher.d.ts +38 -0
package/dist/core/ingestion/cluster-enricher.js +170 -0
package/dist/core/ingestion/community-processor.d.ts +39 -0
package/dist/core/ingestion/community-processor.js +312 -0
package/dist/core/ingestion/entry-point-scoring.d.ts +39 -0
package/dist/core/ingestion/entry-point-scoring.js +260 -0
package/dist/core/ingestion/filesystem-walker.d.ts +28 -0
package/dist/core/ingestion/filesystem-walker.js +80 -0
package/dist/core/ingestion/framework-detection.d.ts +39 -0
package/dist/core/ingestion/framework-detection.js +235 -0
package/dist/core/ingestion/heritage-processor.d.ts +20 -0
package/dist/core/ingestion/heritage-processor.js +197 -0
package/dist/core/ingestion/import-processor.d.ts +38 -0
package/dist/core/ingestion/import-processor.js +778 -0
package/dist/core/ingestion/parsing-processor.d.ts +15 -0
package/dist/core/ingestion/parsing-processor.js +291 -0
package/dist/core/ingestion/pipeline.d.ts +5 -0
package/dist/core/ingestion/pipeline.js +323 -0
package/dist/core/ingestion/process-processor.d.ts +51 -0
package/dist/core/ingestion/process-processor.js +309 -0
package/dist/core/ingestion/scope-filter.d.ts +25 -0
package/dist/core/ingestion/scope-filter.js +100 -0
package/dist/core/ingestion/structure-processor.d.ts +2 -0
package/dist/core/ingestion/structure-processor.js +36 -0
package/dist/core/ingestion/symbol-table.d.ts +33 -0
package/dist/core/ingestion/symbol-table.js +38 -0
package/dist/core/ingestion/tree-sitter-queries.d.ts +12 -0
package/dist/core/ingestion/tree-sitter-queries.js +398 -0
package/dist/core/ingestion/utils.d.ts +10 -0
package/dist/core/ingestion/utils.js +50 -0
package/dist/core/ingestion/workers/parse-worker.d.ts +59 -0
package/dist/core/ingestion/workers/parse-worker.js +672 -0
package/dist/core/ingestion/workers/worker-pool.d.ts +16 -0
package/dist/core/ingestion/workers/worker-pool.js +120 -0
package/dist/core/kuzu/csv-generator.d.ts +29 -0
package/dist/core/kuzu/csv-generator.js +336 -0
package/dist/core/kuzu/kuzu-adapter.d.ts +101 -0
package/dist/core/kuzu/kuzu-adapter.js +753 -0
package/dist/core/kuzu/schema.d.ts +53 -0
package/dist/core/kuzu/schema.js +407 -0
package/dist/core/search/bm25-index.d.ts +23 -0
package/dist/core/search/bm25-index.js +95 -0
package/dist/core/search/hybrid-search.d.ts +49 -0
package/dist/core/search/hybrid-search.js +118 -0
package/dist/core/tree-sitter/parser-loader.d.ts +4 -0
package/dist/core/tree-sitter/parser-loader.js +44 -0
package/dist/core/wiki/generator.d.ts +110 -0
package/dist/core/wiki/generator.js +786 -0
package/dist/core/wiki/graph-queries.d.ts +80 -0
package/dist/core/wiki/graph-queries.js +238 -0
package/dist/core/wiki/html-viewer.d.ts +10 -0
package/dist/core/wiki/html-viewer.js +297 -0
package/dist/core/wiki/llm-client.d.ts +40 -0
package/dist/core/wiki/llm-client.js +162 -0
package/dist/core/wiki/prompts.d.ts +53 -0
package/dist/core/wiki/prompts.js +174 -0
package/dist/lib/utils.d.ts +1 -0
package/dist/lib/utils.js +3 -0
package/dist/mcp/core/embedder.d.ts +27 -0
package/dist/mcp/core/embedder.js +108 -0
package/dist/mcp/core/kuzu-adapter.d.ts +34 -0
package/dist/mcp/core/kuzu-adapter.js +231 -0
package/dist/mcp/local/local-backend.d.ts +160 -0
package/dist/mcp/local/local-backend.js +1646 -0
package/dist/mcp/resources.d.ts +31 -0
package/dist/mcp/resources.js +407 -0
package/dist/mcp/server.d.ts +23 -0
package/dist/mcp/server.js +251 -0
package/dist/mcp/staleness.d.ts +15 -0
package/dist/mcp/staleness.js +29 -0
package/dist/mcp/tools.d.ts +24 -0
package/dist/mcp/tools.js +195 -0
package/dist/server/api.d.ts +10 -0
package/dist/server/api.js +344 -0
package/dist/server/mcp-http.d.ts +13 -0
package/dist/server/mcp-http.js +100 -0
package/dist/storage/git.d.ts +6 -0
package/dist/storage/git.js +32 -0
package/dist/storage/repo-manager.d.ts +125 -0
package/dist/storage/repo-manager.js +257 -0
package/dist/types/pipeline.d.ts +34 -0
package/dist/types/pipeline.js +18 -0
package/hooks/claude/gitnexus-hook.cjs +135 -0
package/hooks/claude/pre-tool-use.sh +78 -0
package/hooks/claude/session-start.sh +42 -0
package/package.json +92 -0
package/skills/gitnexus-cli.md +82 -0
package/skills/gitnexus-debugging.md +89 -0
package/skills/gitnexus-exploring.md +78 -0
package/skills/gitnexus-guide.md +64 -0
package/skills/gitnexus-impact-analysis.md +97 -0
package/skills/gitnexus-refactoring.md +121 -0
package/vendor/leiden/index.cjs +355 -0
package/vendor/leiden/utils.cjs +392 -0

package/dist/core/ingestion/parsing-processor.d.ts ADDED Viewed

@@ -0,0 +1,15 @@
+import { KnowledgeGraph } from '../graph/types.js';
+import { SymbolTable } from './symbol-table.js';
+import { ASTCache } from './ast-cache.js';
+import { WorkerPool } from './workers/worker-pool.js';
+import type { ExtractedImport, ExtractedCall, ExtractedHeritage } from './workers/parse-worker.js';
+export type FileProgressCallback = (current: number, total: number, filePath: string) => void;
+export interface WorkerExtractedData {
+    imports: ExtractedImport[];
+    calls: ExtractedCall[];
+    heritage: ExtractedHeritage[];
+}
+export declare const processParsing: (graph: KnowledgeGraph, files: {
+    path: string;
+    content: string;
+}[], symbolTable: SymbolTable, astCache: ASTCache, onFileProgress?: FileProgressCallback, workerPool?: WorkerPool) => Promise<WorkerExtractedData | null>;

package/dist/core/ingestion/parsing-processor.js ADDED Viewed

@@ -0,0 +1,291 @@
+import Parser from 'tree-sitter';
+import { loadParser, loadLanguage } from '../tree-sitter/parser-loader.js';
+import { LANGUAGE_QUERIES } from './tree-sitter-queries.js';
+import { generateId } from '../../lib/utils.js';
+import { getLanguageFromFilename, yieldToEventLoop } from './utils.js';
+// ============================================================================
+// EXPORT DETECTION - Language-specific visibility detection
+// ============================================================================
+/**
+ * Check if a symbol (function, class, etc.) is exported/public
+ * Handles all 9 supported languages with explicit logic
+ *
+ * @param node - The AST node for the symbol name
+ * @param name - The symbol name
+ * @param language - The programming language
+ * @returns true if the symbol is exported/public
+ */
+const isNodeExported = (node, name, language) => {
+    let current = node;
+    switch (language) {
+        // JavaScript/TypeScript: Check for export keyword in ancestors
+        case 'javascript':
+        case 'typescript':
+            while (current) {
+                const type = current.type;
+                if (type === 'export_statement' ||
+                    type === 'export_specifier' ||
+                    type === 'lexical_declaration' && current.parent?.type === 'export_statement') {
+                    return true;
+                }
+                // Also check if text starts with 'export '
+                if (current.text?.startsWith('export ')) {
+                    return true;
+                }
+                current = current.parent;
+            }
+            return false;
+        // Python: Public if no leading underscore (convention)
+        case 'python':
+            return !name.startsWith('_');
+        // Java: Check for 'public' modifier
+        // In tree-sitter Java, modifiers are siblings of the name node, not parents
+        case 'java':
+            while (current) {
+                // Check if this node or any sibling is a 'modifiers' node containing 'public'
+                if (current.parent) {
+                    const parent = current.parent;
+                    // Check all children of the parent for modifiers
+                    for (let i = 0; i < parent.childCount; i++) {
+                        const child = parent.child(i);
+                        if (child?.type === 'modifiers' && child.text?.includes('public')) {
+                            return true;
+                        }
+                    }
+                    // Also check if the parent's text starts with 'public' (fallback)
+                    if (parent.type === 'method_declaration' || parent.type === 'constructor_declaration') {
+                        if (parent.text?.trimStart().startsWith('public')) {
+                            return true;
+                        }
+                    }
+                }
+                current = current.parent;
+            }
+            return false;
+        // C#: Check for 'public' modifier in ancestors
+        case 'csharp':
+            while (current) {
+                if (current.type === 'modifier' || current.type === 'modifiers') {
+                    if (current.text?.includes('public'))
+                        return true;
+                }
+                current = current.parent;
+            }
+            return false;
+        // Go: Uppercase first letter = exported
+        case 'go':
+            if (name.length === 0)
+                return false;
+            const first = name[0];
+            // Must be uppercase letter (not a number or symbol)
+            return first === first.toUpperCase() && first !== first.toLowerCase();
+        // Rust: Check for 'pub' visibility modifier
+        case 'rust':
+            while (current) {
+                if (current.type === 'visibility_modifier') {
+                    if (current.text?.includes('pub'))
+                        return true;
+                }
+                current = current.parent;
+            }
+            return false;
+        // C/C++: No native export concept at language level
+        // Entry points will be detected via name patterns (main, etc.)
+        case 'c':
+        case 'cpp':
+            return false;
+        default:
+            return false;
+    }
+};
+// ============================================================================
+// Worker-based parallel parsing
+// ============================================================================
+const processParsingWithWorkers = async (graph, files, symbolTable, astCache, workerPool, onFileProgress) => {
+    // Filter to parseable files only
+    const parseableFiles = [];
+    for (const file of files) {
+        const lang = getLanguageFromFilename(file.path);
+        if (lang)
+            parseableFiles.push({ path: file.path, content: file.content });
+    }
+    if (parseableFiles.length === 0)
+        return { imports: [], calls: [], heritage: [] };
+    const total = files.length;
+    // Dispatch to worker pool — pool handles splitting into chunks and sub-batching
+    const chunkResults = await workerPool.dispatch(parseableFiles, (filesProcessed) => {
+        onFileProgress?.(Math.min(filesProcessed, total), total, 'Parsing...');
+    });
+    // Merge results from all workers into graph and symbol table
+    const allImports = [];
+    const allCalls = [];
+    const allHeritage = [];
+    for (const result of chunkResults) {
+        for (const node of result.nodes) {
+            graph.addNode({
+                id: node.id,
+                label: node.label,
+                properties: node.properties,
+            });
+        }
+        for (const rel of result.relationships) {
+            graph.addRelationship(rel);
+        }
+        for (const sym of result.symbols) {
+            symbolTable.add(sym.filePath, sym.name, sym.nodeId, sym.type);
+        }
+        allImports.push(...result.imports);
+        allCalls.push(...result.calls);
+        allHeritage.push(...result.heritage);
+    }
+    // Final progress
+    onFileProgress?.(total, total, 'done');
+    return { imports: allImports, calls: allCalls, heritage: allHeritage };
+};
+// ============================================================================
+// Sequential fallback (original implementation)
+// ============================================================================
+const processParsingSequential = async (graph, files, symbolTable, astCache, onFileProgress) => {
+    const parser = await loadParser();
+    const total = files.length;
+    for (let i = 0; i < files.length; i++) {
+        const file = files[i];
+        onFileProgress?.(i + 1, total, file.path);
+        if (i % 20 === 0)
+            await yieldToEventLoop();
+        const language = getLanguageFromFilename(file.path);
+        if (!language)
+            continue;
+        // Skip very large files — they can crash tree-sitter or cause OOM
+        if (file.content.length > 512 * 1024)
+            continue;
+        await loadLanguage(language, file.path);
+        let tree;
+        try {
+            tree = parser.parse(file.content, undefined, { bufferSize: 1024 * 256 });
+        }
+        catch (parseError) {
+            console.warn(`Skipping unparseable file: ${file.path}`);
+            continue;
+        }
+        astCache.set(file.path, tree);
+        const queryString = LANGUAGE_QUERIES[language];
+        if (!queryString) {
+            continue;
+        }
+        let query;
+        let matches;
+        try {
+            const language = parser.getLanguage();
+            query = new Parser.Query(language, queryString);
+            matches = query.matches(tree.rootNode);
+        }
+        catch (queryError) {
+            console.warn(`Query error for ${file.path}:`, queryError);
+            continue;
+        }
+        matches.forEach(match => {
+            const captureMap = {};
+            match.captures.forEach(c => {
+                captureMap[c.name] = c.node;
+            });
+            if (captureMap['import']) {
+                return;
+            }
+            if (captureMap['call']) {
+                return;
+            }
+            const nameNode = captureMap['name'];
+            if (!nameNode)
+                return;
+            const nodeName = nameNode.text;
+            let nodeLabel = 'CodeElement';
+            if (captureMap['definition.function'])
+                nodeLabel = 'Function';
+            else if (captureMap['definition.class'])
+                nodeLabel = 'Class';
+            else if (captureMap['definition.interface'])
+                nodeLabel = 'Interface';
+            else if (captureMap['definition.method'])
+                nodeLabel = 'Method';
+            else if (captureMap['definition.struct'])
+                nodeLabel = 'Struct';
+            else if (captureMap['definition.enum'])
+                nodeLabel = 'Enum';
+            else if (captureMap['definition.namespace'])
+                nodeLabel = 'Namespace';
+            else if (captureMap['definition.module'])
+                nodeLabel = 'Module';
+            else if (captureMap['definition.trait'])
+                nodeLabel = 'Trait';
+            else if (captureMap['definition.impl'])
+                nodeLabel = 'Impl';
+            else if (captureMap['definition.type'])
+                nodeLabel = 'TypeAlias';
+            else if (captureMap['definition.const'])
+                nodeLabel = 'Const';
+            else if (captureMap['definition.static'])
+                nodeLabel = 'Static';
+            else if (captureMap['definition.typedef'])
+                nodeLabel = 'Typedef';
+            else if (captureMap['definition.macro'])
+                nodeLabel = 'Macro';
+            else if (captureMap['definition.union'])
+                nodeLabel = 'Union';
+            else if (captureMap['definition.property'])
+                nodeLabel = 'Property';
+            else if (captureMap['definition.record'])
+                nodeLabel = 'Record';
+            else if (captureMap['definition.delegate'])
+                nodeLabel = 'Delegate';
+            else if (captureMap['definition.annotation'])
+                nodeLabel = 'Annotation';
+            else if (captureMap['definition.constructor'])
+                nodeLabel = 'Constructor';
+            else if (captureMap['definition.template'])
+                nodeLabel = 'Template';
+            const nodeId = generateId(nodeLabel, `${file.path}:${nodeName}`);
+            const node = {
+                id: nodeId,
+                label: nodeLabel,
+                properties: {
+                    name: nodeName,
+                    filePath: file.path,
+                    startLine: nameNode.startPosition.row + 1,
+                    endLine: nameNode.endPosition.row + 1,
+                    language: language,
+                    isExported: isNodeExported(nameNode, nodeName, language),
+                }
+            };
+            graph.addNode(node);
+            symbolTable.add(file.path, nodeName, nodeId, nodeLabel);
+            const fileId = generateId('File', file.path);
+            const relId = generateId('DEFINES', `${fileId}->${nodeId}`);
+            const relationship = {
+                id: relId,
+                sourceId: fileId,
+                targetId: nodeId,
+                type: 'DEFINES',
+                confidence: 1.0,
+                reason: '',
+            };
+            graph.addRelationship(relationship);
+        });
+    }
+};
+// ============================================================================
+// Public API
+// ============================================================================
+export const processParsing = async (graph, files, symbolTable, astCache, onFileProgress, workerPool) => {
+    if (workerPool) {
+        try {
+            return await processParsingWithWorkers(graph, files, symbolTable, astCache, workerPool, onFileProgress);
+        }
+        catch (err) {
+            console.warn('Worker pool parsing failed, falling back to sequential:', err instanceof Error ? err.message : err);
+        }
+    }
+    // Fallback: sequential parsing (no pre-extracted data)
+    await processParsingSequential(graph, files, symbolTable, astCache, onFileProgress);
+    return null;
+};

package/dist/core/ingestion/pipeline.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+import { PipelineProgress, PipelineResult } from '../../types/pipeline.js';
+export declare const runPipelineFromRepo: (repoPath: string, onProgress: (progress: PipelineProgress) => void, options?: {
+    includeExtensions?: string[];
+    scopeRules?: string[];
+}) => Promise<PipelineResult>;

package/dist/core/ingestion/pipeline.js ADDED Viewed

@@ -0,0 +1,323 @@
+import { createKnowledgeGraph } from '../graph/graph.js';
+import { processStructure } from './structure-processor.js';
+import { processParsing } from './parsing-processor.js';
+import { processImports, processImportsFromExtracted, createImportMap, buildImportResolutionContext } from './import-processor.js';
+import { processCalls, processCallsFromExtracted } from './call-processor.js';
+import { processHeritage, processHeritageFromExtracted } from './heritage-processor.js';
+import { processCommunities } from './community-processor.js';
+import { processProcesses } from './process-processor.js';
+import { createSymbolTable } from './symbol-table.js';
+import { createASTCache } from './ast-cache.js';
+import { walkRepositoryPaths, readFileContents } from './filesystem-walker.js';
+import { getLanguageFromFilename } from './utils.js';
+import { createWorkerPool } from './workers/worker-pool.js';
+import { selectEntriesByScopeRules } from './scope-filter.js';
+import path from 'path';
+const isDev = process.env.NODE_ENV === 'development';
+/** Max bytes of source content to load per parse chunk. Each chunk's source +
+ *  parsed ASTs + extracted records + worker serialization overhead all live in
+ *  memory simultaneously, so this must be conservative. 20MB source ≈ 200-400MB
+ *  peak working memory per chunk after parse expansion. */
+const CHUNK_BYTE_BUDGET = 20 * 1024 * 1024; // 20MB
+/** Max AST trees to keep in LRU cache */
+const AST_CACHE_CAP = 50;
+export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
+    const graph = createKnowledgeGraph();
+    const symbolTable = createSymbolTable();
+    let astCache = createASTCache(AST_CACHE_CAP);
+    const importMap = createImportMap();
+    const cleanup = () => {
+        astCache.clear();
+        symbolTable.clear();
+    };
+    try {
+        // ── Phase 1: Scan paths only (no content read) ─────────────────────
+        onProgress({
+            phase: 'extracting',
+            percent: 0,
+            message: 'Scanning repository...',
+        });
+        const scannedFiles = await walkRepositoryPaths(repoPath, (current, total, filePath) => {
+            const scanProgress = Math.round((current / total) * 15);
+            onProgress({
+                phase: 'extracting',
+                percent: scanProgress,
+                message: 'Scanning repository...',
+                detail: filePath,
+                stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
+            });
+        });
+        const scopeSelection = selectEntriesByScopeRules(scannedFiles, options?.scopeRules || []);
+        const scopedFiles = scopeSelection.selected;
+        if (scopeSelection.diagnostics.appliedRuleCount > 0 && scopedFiles.length === 0) {
+            throw new Error('Scope filters matched zero files. Check --scope-manifest/--scope-prefix.');
+        }
+        const includeExtensions = new Set((options?.includeExtensions || [])
+            .map(ext => ext.trim().toLowerCase())
+            .filter(Boolean)
+            .map(ext => (ext.startsWith('.') ? ext : `.${ext}`)));
+        const extensionFiltered = includeExtensions.size > 0
+            ? scopedFiles.filter(f => includeExtensions.has(path.extname(f.path).toLowerCase()))
+            : scopedFiles;
+        const totalFiles = extensionFiltered.length;
+        onProgress({
+            phase: 'extracting',
+            percent: 15,
+            message: 'Repository scanned successfully',
+            stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
+        });
+        // ── Phase 2: Structure (paths only — no content needed) ────────────
+        onProgress({
+            phase: 'structure',
+            percent: 15,
+            message: 'Analyzing project structure...',
+            stats: { filesProcessed: 0, totalFiles, nodesCreated: graph.nodeCount },
+        });
+        const allPaths = extensionFiltered.map(f => f.path);
+        processStructure(graph, allPaths);
+        onProgress({
+            phase: 'structure',
+            percent: 20,
+            message: 'Project structure analyzed',
+            stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
+        });
+        // ── Phase 3+4: Chunked read + parse ────────────────────────────────
+        // Group parseable files into byte-budget chunks so only ~20MB of source
+        // is in memory at a time. Each chunk is: read → parse → extract → free.
+        const parseableScanned = extensionFiltered.filter(f => getLanguageFromFilename(f.path));
+        const totalParseable = parseableScanned.length;
+        // Build byte-budget chunks
+        const chunks = [];
+        let currentChunk = [];
+        let currentBytes = 0;
+        for (const file of parseableScanned) {
+            if (currentChunk.length > 0 && currentBytes + file.size > CHUNK_BYTE_BUDGET) {
+                chunks.push(currentChunk);
+                currentChunk = [];
+                currentBytes = 0;
+            }
+            currentChunk.push(file.path);
+            currentBytes += file.size;
+        }
+        if (currentChunk.length > 0)
+            chunks.push(currentChunk);
+        const numChunks = chunks.length;
+        if (isDev) {
+            const totalMB = parseableScanned.reduce((s, f) => s + f.size, 0) / (1024 * 1024);
+            console.log(`📂 Scan: ${totalFiles} paths, ${totalParseable} parseable (${totalMB.toFixed(0)}MB), ${numChunks} chunks @ ${CHUNK_BYTE_BUDGET / (1024 * 1024)}MB budget`);
+        }
+        onProgress({
+            phase: 'parsing',
+            percent: 20,
+            message: `Parsing ${totalParseable} files in ${numChunks} chunk${numChunks !== 1 ? 's' : ''}...`,
+            stats: { filesProcessed: 0, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
+        });
+        // Create worker pool once, reuse across chunks
+        let workerPool;
+        try {
+            const workerUrl = new URL('./workers/parse-worker.js', import.meta.url);
+            workerPool = createWorkerPool(workerUrl);
+        }
+        catch (err) {
+            // Worker pool creation failed — sequential fallback
+        }
+        let filesParsedSoFar = 0;
+        // AST cache sized for one chunk (sequential fallback uses it for import/call/heritage)
+        const maxChunkFiles = chunks.reduce((max, c) => Math.max(max, c.length), 0);
+        astCache = createASTCache(maxChunkFiles);
+        // Build import resolution context once — suffix index, file lists, resolve cache.
+        // Reused across all chunks to avoid rebuilding O(files × path_depth) structures.
+        const importCtx = buildImportResolutionContext(allPaths);
+        const allPathObjects = allPaths.map(p => ({ path: p }));
+        // Single-pass: parse + resolve imports/calls/heritage per chunk.
+        // Calls/heritage use the symbol table built so far (symbols from earlier chunks
+        // are already registered). This trades ~5% cross-chunk resolution accuracy for
+        // 200-400MB less memory — critical for Linux-kernel-scale repos.
+        const sequentialChunkPaths = [];
+        try {
+            for (let chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) {
+                const chunkPaths = chunks[chunkIdx];
+                // Read content for this chunk only
+                const chunkContents = await readFileContents(repoPath, chunkPaths);
+                const chunkFiles = chunkPaths
+                    .filter(p => chunkContents.has(p))
+                    .map(p => ({ path: p, content: chunkContents.get(p) }));
+                // Parse this chunk (workers or sequential fallback)
+                const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, (current, _total, filePath) => {
+                    const globalCurrent = filesParsedSoFar + current;
+                    const parsingProgress = 20 + ((globalCurrent / totalParseable) * 62);
+                    onProgress({
+                        phase: 'parsing',
+                        percent: Math.round(parsingProgress),
+                        message: `Parsing chunk ${chunkIdx + 1}/${numChunks}...`,
+                        detail: filePath,
+                        stats: { filesProcessed: globalCurrent, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
+                    });
+                }, workerPool);
+                if (chunkWorkerData) {
+                    // Imports
+                    await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, importMap, undefined, repoPath, importCtx);
+                    // Calls — resolve immediately, then free the array
+                    if (chunkWorkerData.calls.length > 0) {
+                        await processCallsFromExtracted(graph, chunkWorkerData.calls, symbolTable, importMap);
+                    }
+                    // Heritage — resolve immediately, then free
+                    if (chunkWorkerData.heritage.length > 0) {
+                        await processHeritageFromExtracted(graph, chunkWorkerData.heritage, symbolTable);
+                    }
+                }
+                else {
+                    await processImports(graph, chunkFiles, astCache, importMap, undefined, repoPath, allPaths);
+                    sequentialChunkPaths.push(chunkPaths);
+                }
+                filesParsedSoFar += chunkFiles.length;
+                // Clear AST cache between chunks to free memory
+                astCache.clear();
+                // chunkContents + chunkFiles + chunkWorkerData go out of scope → GC reclaims
+            }
+        }
+        finally {
+            await workerPool?.terminate();
+        }
+        // Sequential fallback chunks: re-read source for call/heritage resolution
+        for (const chunkPaths of sequentialChunkPaths) {
+            const chunkContents = await readFileContents(repoPath, chunkPaths);
+            const chunkFiles = chunkPaths
+                .filter(p => chunkContents.has(p))
+                .map(p => ({ path: p, content: chunkContents.get(p) }));
+            astCache = createASTCache(chunkFiles.length);
+            await processCalls(graph, chunkFiles, astCache, symbolTable, importMap);
+            await processHeritage(graph, chunkFiles, astCache, symbolTable);
+            astCache.clear();
+        }
+        // Free import resolution context — suffix index + resolve cache no longer needed
+        // (allPathObjects and importCtx hold ~94MB+ for large repos)
+        allPathObjects.length = 0;
+        importCtx.resolveCache.clear();
+        importCtx.suffixIndex = null;
+        importCtx.normalizedFileList = null;
+        if (isDev) {
+            let importsCount = 0;
+            for (const r of graph.iterRelationships()) {
+                if (r.type === 'IMPORTS')
+                    importsCount++;
+            }
+            console.log(`📊 Pipeline: graph has ${importsCount} IMPORTS, ${graph.relationshipCount} total relationships`);
+        }
+        // ── Phase 5: Communities ───────────────────────────────────────────
+        onProgress({
+            phase: 'communities',
+            percent: 82,
+            message: 'Detecting code communities...',
+            stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
+        });
+        const communityResult = await processCommunities(graph, (message, progress) => {
+            const communityProgress = 82 + (progress * 0.10);
+            onProgress({
+                phase: 'communities',
+                percent: Math.round(communityProgress),
+                message,
+                stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
+            });
+        });
+        if (isDev) {
+            console.log(`🏘️ Community detection: ${communityResult.stats.totalCommunities} communities found (modularity: ${communityResult.stats.modularity.toFixed(3)})`);
+        }
+        communityResult.communities.forEach(comm => {
+            graph.addNode({
+                id: comm.id,
+                label: 'Community',
+                properties: {
+                    name: comm.label,
+                    filePath: '',
+                    heuristicLabel: comm.heuristicLabel,
+                    cohesion: comm.cohesion,
+                    symbolCount: comm.symbolCount,
+                }
+            });
+        });
+        communityResult.memberships.forEach(membership => {
+            graph.addRelationship({
+                id: `${membership.nodeId}_member_of_${membership.communityId}`,
+                type: 'MEMBER_OF',
+                sourceId: membership.nodeId,
+                targetId: membership.communityId,
+                confidence: 1.0,
+                reason: 'leiden-algorithm',
+            });
+        });
+        // ── Phase 6: Processes ─────────────────────────────────────────────
+        onProgress({
+            phase: 'processes',
+            percent: 94,
+            message: 'Detecting execution flows...',
+            stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
+        });
+        let symbolCount = 0;
+        graph.forEachNode(n => { if (n.label !== 'File')
+            symbolCount++; });
+        const dynamicMaxProcesses = Math.max(20, Math.min(300, Math.round(symbolCount / 10)));
+        const processResult = await processProcesses(graph, communityResult.memberships, (message, progress) => {
+            const processProgress = 94 + (progress * 0.05);
+            onProgress({
+                phase: 'processes',
+                percent: Math.round(processProgress),
+                message,
+                stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
+            });
+        }, { maxProcesses: dynamicMaxProcesses, minSteps: 3 });
+        if (isDev) {
+            console.log(`🔄 Process detection: ${processResult.stats.totalProcesses} processes found (${processResult.stats.crossCommunityCount} cross-community)`);
+        }
+        processResult.processes.forEach(proc => {
+            graph.addNode({
+                id: proc.id,
+                label: 'Process',
+                properties: {
+                    name: proc.label,
+                    filePath: '',
+                    heuristicLabel: proc.heuristicLabel,
+                    processType: proc.processType,
+                    stepCount: proc.stepCount,
+                    communities: proc.communities,
+                    entryPointId: proc.entryPointId,
+                    terminalId: proc.terminalId,
+                }
+            });
+        });
+        processResult.steps.forEach(step => {
+            graph.addRelationship({
+                id: `${step.nodeId}_step_${step.step}_${step.processId}`,
+                type: 'STEP_IN_PROCESS',
+                sourceId: step.nodeId,
+                targetId: step.processId,
+                confidence: 1.0,
+                reason: 'trace-detection',
+                step: step.step,
+            });
+        });
+        onProgress({
+            phase: 'complete',
+            percent: 100,
+            message: `Graph complete! ${communityResult.stats.totalCommunities} communities, ${processResult.stats.totalProcesses} processes detected.`,
+            stats: {
+                filesProcessed: totalFiles,
+                totalFiles,
+                nodesCreated: graph.nodeCount
+            },
+        });
+        astCache.clear();
+        return {
+            graph,
+            repoPath,
+            totalFileCount: totalFiles,
+            communityResult,
+            processResult,
+            scopeDiagnostics: scopeSelection.diagnostics,
+        };
+    }
+    catch (error) {
+        cleanup();
+        throw error;
+    }
+};