npm - code-graph-context - Versions diffs - 2.0.1 → 2.2.0 - Mend

code-graph-context 2.0.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/README.md +156 -2
package/dist/constants.js +167 -0
package/dist/core/config/fairsquare-framework-schema.js +9 -7
package/dist/core/config/schema.js +41 -2
package/dist/core/embeddings/natural-language-to-cypher.service.js +166 -110
package/dist/core/parsers/typescript-parser.js +1039 -742
package/dist/core/parsers/workspace-parser.js +175 -193
package/dist/core/utils/code-normalizer.js +299 -0
package/dist/core/utils/file-change-detection.js +17 -2
package/dist/core/utils/file-utils.js +40 -5
package/dist/core/utils/graph-factory.js +161 -0
package/dist/core/utils/shared-utils.js +79 -0
package/dist/core/workspace/workspace-detector.js +59 -5
package/dist/mcp/constants.js +141 -8
package/dist/mcp/handlers/graph-generator.handler.js +1 -0
package/dist/mcp/handlers/incremental-parse.handler.js +3 -6
package/dist/mcp/handlers/parallel-import.handler.js +136 -0
package/dist/mcp/handlers/streaming-import.handler.js +14 -59
package/dist/mcp/mcp.server.js +1 -1
package/dist/mcp/services/job-manager.js +5 -8
package/dist/mcp/services/watch-manager.js +7 -18
package/dist/mcp/tools/detect-dead-code.tool.js +413 -0
package/dist/mcp/tools/detect-duplicate-code.tool.js +450 -0
package/dist/mcp/tools/impact-analysis.tool.js +20 -4
package/dist/mcp/tools/index.js +4 -0
package/dist/mcp/tools/parse-typescript-project.tool.js +15 -14
package/dist/mcp/workers/chunk-worker-pool.js +196 -0
package/dist/mcp/workers/chunk-worker.types.js +4 -0
package/dist/mcp/workers/chunk.worker.js +89 -0
package/dist/mcp/workers/parse-coordinator.js +183 -0
package/dist/mcp/workers/worker.pool.js +54 -0
package/dist/storage/neo4j/neo4j.service.js +190 -10
package/package.json +1 -1

package/dist/mcp/tools/detect-duplicate-code.tool.js ADDED Viewed

@@ -0,0 +1,450 @@
+/**
+ * Detect Duplicate Code Tool
+ * Identifies duplicate code using structural (AST hash) and semantic (embedding similarity) analysis
+ */
+import { z } from 'zod';
+import { toNumber, isUIComponent, getMonorepoAppName, getShortPath, truncateSourceCode, } from '../../core/utils/shared-utils.js';
+import { Neo4jService, QUERIES } from '../../storage/neo4j/neo4j.service.js';
+import { TOOL_NAMES, TOOL_METADATA } from '../constants.js';
+import { createErrorResponse, createSuccessResponse, debugLog, resolveProjectIdOrError } from '../utils.js';
+/**
+ * Determine confidence based on duplicate characteristics.
+ */
+const determineConfidence = (type, similarity, itemCount) => {
+    if (type === 'structural') {
+        // Structural duplicates with identical hash are high confidence
+        return 'HIGH';
+    }
+    // Semantic duplicates: confidence based on similarity and item count
+    if (similarity >= 0.9 && itemCount >= 2) {
+        return 'HIGH';
+    }
+    if (similarity >= 0.85) {
+        return 'MEDIUM';
+    }
+    return 'LOW';
+};
+/**
+ * Check if items are in different monorepo apps.
+ */
+const areInDifferentApps = (items) => {
+    const apps = new Set(items.map((i) => getMonorepoAppName(i.filePath)).filter(Boolean));
+    return apps.size > 1;
+};
+/**
+ * Analyze duplicates and generate category + recommendation.
+ */
+const analyzeAndRecommend = (type, items) => {
+    const names = [...new Set(items.map((i) => i.name))].slice(0, 3).join(', ');
+    const filesAffected = new Set(items.map((i) => i.filePath)).size;
+    // Check for UI component patterns
+    const allUIComponents = items.every((i) => isUIComponent(i.filePath));
+    if (allUIComponents) {
+        return {
+            category: 'ui-component',
+            recommendation: `UI components ${names} have similar structure - likely intentional co-location`,
+        };
+    }
+    // Check for monorepo cross-app duplicates
+    if (areInDifferentApps(items)) {
+        const apps = [...new Set(items.map((i) => getMonorepoAppName(i.filePath)).filter(Boolean))];
+        return {
+            category: 'cross-app',
+            recommendation: `Code duplicated across apps (${apps.slice(0, 3).join(', ')}) - consider shared package if unifying`,
+        };
+    }
+    // Same file duplicates
+    if (filesAffected === 1) {
+        return {
+            category: 'same-file',
+            recommendation: type === 'structural'
+                ? `Consider extracting shared logic from ${names} into a single method`
+                : `Review ${names} for potential consolidation`,
+        };
+    }
+    // Cross-file duplicates (default)
+    return {
+        category: 'cross-file',
+        recommendation: type === 'structural'
+            ? `Consider extracting ${names} into a shared utility function`
+            : `Semantically similar code in ${names} - consider unifying the approach`,
+    };
+};
+/**
+ * Map scope to core types for filtering.
+ */
+const getScopeFilter = (scope) => {
+    switch (scope) {
+        case 'methods':
+            return ['MethodDeclaration'];
+        case 'functions':
+            return ['FunctionDeclaration'];
+        case 'classes':
+            return ['ClassDeclaration'];
+        case 'all':
+        default:
+            return ['MethodDeclaration', 'FunctionDeclaration', 'ClassDeclaration'];
+    }
+};
+export const createDetectDuplicateCodeTool = (server) => {
+    server.registerTool(TOOL_NAMES.detectDuplicateCode, {
+        title: TOOL_METADATA[TOOL_NAMES.detectDuplicateCode].title,
+        description: TOOL_METADATA[TOOL_NAMES.detectDuplicateCode].description,
+        inputSchema: {
+            projectId: z.string().describe('Project ID, name, or path (e.g., "backend" or "proj_a1b2c3d4e5f6")'),
+            type: z
+                .enum(['structural', 'semantic', 'all'])
+                .optional()
+                .describe('Detection approach: structural (AST hash), semantic (embeddings), or all (default: all)')
+                .default('all'),
+            minSimilarity: z
+                .number()
+                .min(0.5)
+                .max(1.0)
+                .optional()
+                .describe('Minimum similarity for semantic duplicates (0.5-1.0, default: 0.80)')
+                .default(0.8),
+            includeCode: z
+                .boolean()
+                .optional()
+                .describe('Include source code snippets in results (default: false)')
+                .default(false),
+            maxResults: z
+                .number()
+                .int()
+                .min(1)
+                .max(100)
+                .optional()
+                .describe('Maximum number of duplicate groups to return (default: 20)')
+                .default(20),
+            scope: z
+                .enum(['methods', 'functions', 'classes', 'all'])
+                .optional()
+                .describe('Node types to analyze (default: all)')
+                .default('all'),
+            summaryOnly: z
+                .boolean()
+                .optional()
+                .describe('Return only summary statistics without full duplicates list (default: false)')
+                .default(false),
+            offset: z
+                .number()
+                .int()
+                .min(0)
+                .optional()
+                .describe('Number of groups to skip for pagination (default: 0)')
+                .default(0),
+            vectorNeighbors: z
+                .number()
+                .int()
+                .min(10)
+                .max(200)
+                .optional()
+                .describe('Number of vector neighbors to search per node for semantic duplicates (default: 50, higher = more thorough)')
+                .default(50),
+        },
+    }, async ({ projectId, type = 'all', minSimilarity = 0.8, includeCode = false, maxResults = 20, scope = 'all', summaryOnly = false, offset = 0, vectorNeighbors = 50, }) => {
+        const neo4jService = new Neo4jService();
+        try {
+            // Resolve project ID
+            const projectResult = await resolveProjectIdOrError(projectId, neo4jService);
+            if (!projectResult.success)
+                return projectResult.error;
+            const resolvedProjectId = projectResult.projectId;
+            await debugLog('Duplicate code detection started', {
+                projectId: resolvedProjectId,
+                type,
+                minSimilarity,
+                scope,
+            });
+            const coreTypes = getScopeFilter(scope);
+            const duplicateGroups = [];
+            let groupCounter = 1;
+            const includeStructuralInOutput = type === 'structural' || type === 'all';
+            // 1. Find structural duplicates (always run for filtering, only include in output if requested)
+            // This ensures semantic-only mode filters out exact copy pairs
+            const structuralPairs = new Set(); // Pairs of nodeIds that are exact copies
+            {
+                const structuralResult = (await neo4jService.run(QUERIES.FIND_STRUCTURAL_DUPLICATES, {
+                    projectId: resolvedProjectId,
+                    coreTypes,
+                    limit: Math.floor(maxResults * 10), // Get extra for grouping (each group has multiple items)
+                }));
+                // Group by normalizedHash
+                const hashGroups = new Map();
+                for (const item of structuralResult) {
+                    const hash = item.normalizedHash;
+                    if (!hash)
+                        continue;
+                    const duplicateItem = {
+                        nodeId: item.nodeId,
+                        name: item.name,
+                        coreType: item.coreType,
+                        semanticType: item.semanticType ?? null,
+                        filePath: item.filePath,
+                        lineNumber: toNumber(item.lineNumber),
+                    };
+                    if (includeCode) {
+                        duplicateItem.sourceCode = truncateSourceCode(item.sourceCode);
+                    }
+                    if (!hashGroups.has(hash)) {
+                        hashGroups.set(hash, []);
+                    }
+                    hashGroups.get(hash).push(duplicateItem);
+                }
+                // Convert to duplicate groups (only groups with 2+ items are duplicates)
+                for (const [, items] of hashGroups) {
+                    if (items.length >= 2) {
+                        // Track all pairs within this group for semantic filtering
+                        // This ensures we only filter pairs that are EXACT copies of each other
+                        for (let i = 0; i < items.length; i++) {
+                            for (let j = i + 1; j < items.length; j++) {
+                                const pairKey = [items[i].nodeId, items[j].nodeId].sort().join('::');
+                                structuralPairs.add(pairKey);
+                            }
+                        }
+                        // Only add to output if structural was requested
+                        if (includeStructuralInOutput) {
+                            const { category, recommendation } = analyzeAndRecommend('structural', items);
+                            duplicateGroups.push({
+                                groupId: `dup_${groupCounter++}`,
+                                type: 'structural',
+                                similarity: 1.0,
+                                confidence: determineConfidence('structural', 1.0, items.length),
+                                category,
+                                items,
+                                recommendation,
+                            });
+                        }
+                    }
+                }
+            }
+            // 2. Find semantic duplicates (embedding similarity)
+            // Diagnostic counters to debug filtering
+            let semanticQueryResults = 0;
+            let filteredAsSameFile = 0;
+            let filteredAsSeenPair = 0;
+            let filteredAsStructural = 0;
+            let filteredAsUsedInGroup = 0;
+            let semanticQueryError = null;
+            if (type === 'semantic' || type === 'all') {
+                let semanticResult = [];
+                try {
+                    semanticResult = (await neo4jService.run(QUERIES.FIND_SEMANTIC_DUPLICATES, {
+                        projectId: resolvedProjectId,
+                        coreTypes,
+                        minSimilarity,
+                        vectorNeighbors,
+                        limit: Math.floor(maxResults * 2), // Get extra for filtering (ensure integer)
+                    }));
+                }
+                catch (error) {
+                    const errorMessage = error instanceof Error ? error.message : String(error);
+                    // Check for vector index errors
+                    if (errorMessage.includes('vector') ||
+                        errorMessage.includes('index') ||
+                        errorMessage.includes('embedding')) {
+                        semanticQueryError =
+                            'Semantic duplicate detection requires embeddings. ' +
+                                'Re-parse the project with embeddings enabled (useEmbeddings: true) to enable this feature.';
+                        await debugLog('Semantic query skipped - vector index not available', { error: errorMessage });
+                    }
+                    else {
+                        // Re-throw non-vector-index errors
+                        throw error;
+                    }
+                }
+                // Process semantic pairs
+                const seenPairs = new Set();
+                const usedInSemanticGroup = new Set();
+                for (const pair of semanticResult) {
+                    semanticQueryResults++;
+                    const nodeId1 = pair.nodeId1;
+                    const nodeId2 = pair.nodeId2;
+                    const similarity = toNumber(pair.similarity);
+                    // Skip if same file (same-file similarity is expected)
+                    if (pair.filePath1 === pair.filePath2) {
+                        filteredAsSameFile++;
+                        continue;
+                    }
+                    // Skip if already seen this pair
+                    const pairKey = [nodeId1, nodeId2].sort().join('::');
+                    if (seenPairs.has(pairKey)) {
+                        filteredAsSeenPair++;
+                        continue;
+                    }
+                    seenPairs.add(pairKey);
+                    // Skip if this specific pair is already a structural duplicate (exact copies of each other)
+                    if (structuralPairs.has(pairKey)) {
+                        filteredAsStructural++;
+                        continue;
+                    }
+                    // Skip if either node is already in a semantic duplicate group (first match wins)
+                    if (usedInSemanticGroup.has(nodeId1) || usedInSemanticGroup.has(nodeId2)) {
+                        filteredAsUsedInGroup++;
+                        continue;
+                    }
+                    const items = [
+                        {
+                            nodeId: nodeId1,
+                            name: pair.name1,
+                            coreType: pair.coreType1,
+                            semanticType: pair.semanticType1 ?? null,
+                            filePath: pair.filePath1,
+                            lineNumber: toNumber(pair.lineNumber1),
+                        },
+                        {
+                            nodeId: nodeId2,
+                            name: pair.name2,
+                            coreType: pair.coreType2,
+                            semanticType: pair.semanticType2 ?? null,
+                            filePath: pair.filePath2,
+                            lineNumber: toNumber(pair.lineNumber2),
+                        },
+                    ];
+                    if (includeCode) {
+                        items[0].sourceCode = truncateSourceCode(pair.sourceCode1);
+                        items[1].sourceCode = truncateSourceCode(pair.sourceCode2);
+                    }
+                    const { category, recommendation } = analyzeAndRecommend('semantic', items);
+                    duplicateGroups.push({
+                        groupId: `dup_${groupCounter++}`,
+                        type: 'semantic',
+                        similarity: Math.round(similarity * 1000) / 1000,
+                        confidence: determineConfidence('semantic', similarity, 2),
+                        category,
+                        items,
+                        recommendation,
+                    });
+                    // Mark both nodes as used to prevent appearing in multiple groups
+                    usedInSemanticGroup.add(nodeId1);
+                    usedInSemanticGroup.add(nodeId2);
+                }
+                // Log semantic query diagnostics
+                await debugLog('Semantic query diagnostics', {
+                    rawResults: semanticQueryResults,
+                    filteredAsSameFile,
+                    filteredAsSeenPair,
+                    filteredAsStructural,
+                    filteredAsUsedInGroup,
+                    structuralPairsCount: structuralPairs.size,
+                    finalSemanticGroups: duplicateGroups.filter((g) => g.type === 'semantic').length,
+                });
+            }
+            // Sort by similarity (descending)
+            duplicateGroups.sort((a, b) => b.similarity - a.similarity);
+            // Calculate statistics on ALL groups before pagination
+            const allStructuralGroups = duplicateGroups.filter((g) => g.type === 'structural');
+            const allSemanticGroups = duplicateGroups.filter((g) => g.type === 'semantic');
+            const totalGroups = duplicateGroups.length;
+            const totalDuplicates = duplicateGroups.reduce((sum, g) => sum + g.items.length, 0);
+            const affectedFiles = [...new Set(duplicateGroups.flatMap((g) => g.items.map((i) => i.filePath)))].sort();
+            const byType = {
+                structural: {
+                    groups: allStructuralGroups.length,
+                    items: allStructuralGroups.reduce((sum, g) => sum + g.items.length, 0),
+                },
+                semantic: {
+                    groups: allSemanticGroups.length,
+                    items: allSemanticGroups.reduce((sum, g) => sum + g.items.length, 0),
+                },
+            };
+            // Check embedding count for diagnostic (do this before building summary)
+            let embeddingCount = 0;
+            let semanticDiagnostic = null;
+            if ((type === 'semantic' || type === 'all') && allSemanticGroups.length === 0) {
+                const embeddingCountResult = await neo4jService.run(`MATCH (n:Embedded) WHERE n.projectId = $projectId RETURN count(n) AS count`, { projectId: resolvedProjectId });
+                embeddingCount = toNumber(embeddingCountResult[0]?.count);
+                if (embeddingCount === 0) {
+                    semanticDiagnostic = {
+                        nodesWithEmbeddings: 0,
+                        message: 'No nodes have embeddings. Re-parse with OPENAI_API_KEY set to enable semantic duplicate detection.',
+                    };
+                }
+                else {
+                    semanticDiagnostic = {
+                        nodesWithEmbeddings: embeddingCount,
+                        message: `${embeddingCount} nodes have embeddings but no semantic duplicates found above ${minSimilarity} similarity threshold.`,
+                    };
+                }
+                // Log diagnostic so user sees it in debug output
+                await debugLog('Semantic duplicate diagnostic', semanticDiagnostic);
+            }
+            // Build summary with warning if no embeddings
+            let summary = totalGroups === 0
+                ? 'No duplicate code found'
+                : `Found ${totalGroups} duplicate code groups across ${affectedFiles.length} files`;
+            if (semanticQueryError) {
+                summary += ` (Warning: ${semanticQueryError})`;
+            }
+            else if ((type === 'semantic' || type === 'all') && embeddingCount === 0 && allSemanticGroups.length === 0) {
+                summary += ' (Warning: No embeddings for semantic detection)';
+            }
+            // Build result based on summaryOnly flag
+            let result;
+            if (summaryOnly) {
+                // Summary mode: statistics only, no full arrays
+                const fileDuplicateCounts = {};
+                for (const group of duplicateGroups) {
+                    for (const item of group.items) {
+                        const shortPath = getShortPath(item.filePath);
+                        fileDuplicateCounts[shortPath] = (fileDuplicateCounts[shortPath] ?? 0) + 1;
+                    }
+                }
+                const topFilesByDuplicates = Object.entries(fileDuplicateCounts)
+                    .sort((a, b) => b[1] - a[1])
+                    .slice(0, 20)
+                    .map(([file, count]) => ({ file, count }));
+                result = {
+                    summary,
+                    totalGroups,
+                    totalDuplicates,
+                    byType,
+                    affectedFiles,
+                    topFilesByDuplicates,
+                };
+            }
+            else {
+                // Paginated mode: apply offset/maxResults
+                const paginatedGroups = duplicateGroups.slice(offset, offset + maxResults);
+                const hasMore = offset + maxResults < duplicateGroups.length;
+                result = {
+                    summary,
+                    totalGroups,
+                    totalDuplicates,
+                    byType,
+                    duplicates: paginatedGroups,
+                    pagination: {
+                        offset,
+                        limit: maxResults,
+                        returned: paginatedGroups.length,
+                        hasMore,
+                    },
+                    affectedFiles,
+                };
+            }
+            // Add pre-computed diagnostic to result
+            if (semanticDiagnostic) {
+                result.semanticDiagnostic = semanticDiagnostic;
+            }
+            await debugLog('Duplicate code detection complete', {
+                projectId: resolvedProjectId,
+                totalGroups,
+                structuralGroups: allStructuralGroups.length,
+                semanticGroups: allSemanticGroups.length,
+                summaryOnly,
+                offset,
+                maxResults,
+            });
+            return createSuccessResponse(JSON.stringify(result, null, 2));
+        }
+        catch (error) {
+            console.error('Duplicate code detection error:', error);
+            await debugLog('Duplicate code detection error', { projectId, error });
+            return createErrorResponse(error);
+        }
+        finally {
+            await neo4jService.close();
+        }
+    });
+};

package/dist/mcp/tools/impact-analysis.tool.js CHANGED Viewed

@@ -7,19 +7,35 @@ import { z } from 'zod';
 import { Neo4jService, QUERIES } from '../../storage/neo4j/neo4j.service.js';
 import { TOOL_NAMES, TOOL_METADATA } from '../constants.js';
 import { createErrorResponse, createSuccessResponse, debugLog, resolveProjectIdOrError } from '../utils.js';
-// Default relationship weights for core AST relationships
+/**
+ * Default relationship weights for impact/risk analysis.
+ *
+ * NOTE: These weights are intentionally different from CoreEdge.relationshipWeight
+ * in the core schema. They serve different purposes:
+ *
+ * - Core schema weights (traversalWeight): "What relationships help me understand the code?"
+ *   → CALLS is high (0.85) because following execution flow aids comprehension
+ *
+ * - Impact analysis weights: "What breaks if I modify this node?"
+ *   → EXTENDS/IMPLEMENTS are highest (0.95) because changing a base class/interface
+ *     breaks ALL subclasses/implementers - inheritance is a hard contract
+ *
+ * Example: A class with 50 callers and 10 subclasses
+ * - For traversal: follow the 50 CALLS to understand usage patterns
+ * - For impact: the 10 subclasses are CRITICAL - they inherit the contract
+ */
 const DEFAULT_RELATIONSHIP_WEIGHTS = {
-    // Critical - inheritance/interface contracts
+    // Critical - inheritance/interface contracts (changing base breaks ALL children)
     EXTENDS: 0.95,
     IMPLEMENTS: 0.95,
-    // High - direct code dependencies
+    // High - direct code dependencies (callers may break but often handle changes)
     CALLS: 0.75,
     HAS_MEMBER: 0.65,
     TYPED_AS: 0.6,
     // Medium - module dependencies
     IMPORTS: 0.5,
     EXPORTS: 0.5,
-    // Lower - structural
+    // Lower - structural (container doesn't break if child changes)
     CONTAINS: 0.3,
     HAS_PARAMETER: 0.3,
     DECORATED_WITH: 0.4,

package/dist/mcp/tools/index.js CHANGED Viewed

@@ -3,6 +3,8 @@
  * Centralized tool creation and registration
  */
 import { createCheckParseStatusTool } from './check-parse-status.tool.js';
+import { createDetectDeadCodeTool } from './detect-dead-code.tool.js';
+import { createDetectDuplicateCodeTool } from './detect-duplicate-code.tool.js';
 import { createHelloTool } from './hello.tool.js';
 import { createImpactAnalysisTool } from './impact-analysis.tool.js';
 import { createListProjectsTool } from './list-projects.tool.js';
@@ -26,6 +28,8 @@ export const registerAllTools = (server) => {
     createTraverseFromNodeTool(server);
     createNaturalLanguageToCypherTool(server);
     createImpactAnalysisTool(server);
+    createDetectDeadCodeTool(server);
+    createDetectDuplicateCodeTool(server);
     // Register project parsing tools
     createParseTypescriptProjectTool(server);
     createCheckParseStatusTool(server);

package/dist/mcp/tools/parse-typescript-project.tool.js CHANGED Viewed

@@ -12,20 +12,16 @@ import { z } from 'zod';
 import { CORE_TYPESCRIPT_SCHEMA } from '../../core/config/schema.js';
 import { EmbeddingsService } from '../../core/embeddings/embeddings.service.js';
 import { ParserFactory } from '../../core/parsers/parser-factory.js';
+import { detectChangedFiles } from '../../core/utils/file-change-detection.js';
 import { resolveProjectId, getProjectName, UPSERT_PROJECT_QUERY, UPDATE_PROJECT_STATUS_QUERY, } from '../../core/utils/project-id.js';
 import { Neo4jService, QUERIES } from '../../storage/neo4j/neo4j.service.js';
-import { TOOL_NAMES, TOOL_METADATA, DEFAULTS, FILE_PATHS, LOG_CONFIG } from '../constants.js';
+import { TOOL_NAMES, TOOL_METADATA, DEFAULTS, FILE_PATHS, LOG_CONFIG, PARSING } from '../constants.js';
 import { deleteSourceFileSubgraphs, loadExistingNodesForEdgeDetection, getCrossFileEdges, } from '../handlers/cross-file-edge.helpers.js';
-import { detectChangedFiles } from '../../core/utils/file-change-detection.js';
 import { GraphGeneratorHandler } from '../handlers/graph-generator.handler.js';
 import { StreamingImportHandler } from '../handlers/streaming-import.handler.js';
 import { jobManager } from '../services/job-manager.js';
 import { watchManager } from '../services/watch-manager.js';
 import { createErrorResponse, createSuccessResponse, formatParseSuccess, formatParsePartialSuccess, debugLog, } from '../utils.js';
-// Threshold for using streaming import (files)
-const STREAMING_THRESHOLD = 100;
-// Worker thread timeout (30 minutes)
-const WORKER_TIMEOUT_MS = 30 * 60 * 1000;
 /**
  * Validates that a path exists and is accessible
  * @throws Error if path doesn't exist or isn't accessible
@@ -87,7 +83,7 @@ export const createParseTypescriptProjectTool = (server) => {
             chunkSize: z
                 .number()
                 .optional()
-                .default(50)
+                .default(100)
                 .describe('Files per chunk for streaming import (default: 50). Set to 0 to disable streaming.'),
             useStreaming: z
                 .enum(['auto', 'always', 'never'])
@@ -139,7 +135,7 @@ export const createParseTypescriptProjectTool = (server) => {
                 // Get path to worker script
                 const __filename = fileURLToPath(import.meta.url);
                 const __dirname = dirname(__filename);
-                const workerPath = join(__dirname, '..', 'workers', 'parse-worker.js');
+                const workerPath = join(__dirname, '..', 'workers', 'parse-coordinator.js');
                 // Create Worker thread to run parsing without blocking MCP server
                 const worker = new Worker(workerPath, {
                     workerData: {
@@ -168,10 +164,10 @@ export const createParseTypescriptProjectTool = (server) => {
                 const timeoutId = setTimeout(async () => {
                     const job = jobManager.getJob(jobId);
                     if (job && job.status === 'running') {
-                        jobManager.failJob(jobId, `Worker timed out after ${WORKER_TIMEOUT_MS / 60000} minutes`);
+                        jobManager.failJob(jobId, `Worker timed out after ${PARSING.workerTimeoutMs / 60000} minutes`);
                         await terminateWorker('timeout');
                     }
-                }, WORKER_TIMEOUT_MS);
+                }, PARSING.workerTimeoutMs);
                 // Handle progress messages from worker
                 worker.on('message', (msg) => {
                     if (msg.type === 'progress') {
@@ -216,16 +212,19 @@ export const createParseTypescriptProjectTool = (server) => {
             const embeddingsService = new EmbeddingsService();
             const graphGeneratorHandler = new GraphGeneratorHandler(neo4jService, embeddingsService);
             // Determine if we should use streaming import
+            // Use lazyLoad = true for consistent glob-based file discovery (matches incremental parse)
             const parser = projectType === 'auto'
-                ? await ParserFactory.createParserWithAutoDetection(projectPath, tsconfigPath, resolvedProjectId)
+                ? await ParserFactory.createParserWithAutoDetection(projectPath, tsconfigPath, resolvedProjectId, true)
                 : ParserFactory.createParser({
                     workspacePath: projectPath,
                     tsConfigPath: tsconfigPath,
                     projectType: projectType,
                     projectId: resolvedProjectId,
+                    lazyLoad: true,
                 });
-            const totalFiles = parser.getSourceFilePaths().length;
-            const shouldUseStreaming = useStreaming === 'always' || (useStreaming === 'auto' && totalFiles > STREAMING_THRESHOLD && chunkSize > 0);
+            const discoveredFiles = await parser.discoverSourceFiles();
+            const totalFiles = discoveredFiles.length;
+            const shouldUseStreaming = useStreaming === 'always' || (useStreaming === 'auto' && totalFiles > PARSING.streamingThreshold && chunkSize > 0);
             console.log(`📊 Project has ${totalFiles} files. Streaming: ${shouldUseStreaming ? 'enabled' : 'disabled'}`);
             if (shouldUseStreaming && clearExisting !== false) {
                 // Use streaming import for large projects
@@ -385,13 +384,15 @@ const parseProject = async (options) => {
     const { neo4jService, tsconfigPath, projectPath, projectId, clearExisting = true, projectType = 'auto' } = options;
     // Resolve projectId early - needed for incremental queries before parser is created
     const resolvedId = resolveProjectId(projectPath, projectId);
+    // Use lazyLoad = true for consistent glob-based file discovery (matches incremental parse)
     const parser = projectType === 'auto'
-        ? await ParserFactory.createParserWithAutoDetection(projectPath, tsconfigPath, resolvedId)
+        ? await ParserFactory.createParserWithAutoDetection(projectPath, tsconfigPath, resolvedId, true)
         : ParserFactory.createParser({
             workspacePath: projectPath,
             tsConfigPath: tsconfigPath,
             projectType: projectType,
             projectId: resolvedId,
+            lazyLoad: true,
         });
     let incrementalStats;
     let savedCrossFileEdges = [];