code-graph-context 2.0.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/README.md +156 -2
  2. package/dist/constants.js +167 -0
  3. package/dist/core/config/fairsquare-framework-schema.js +9 -7
  4. package/dist/core/config/schema.js +41 -2
  5. package/dist/core/embeddings/natural-language-to-cypher.service.js +166 -110
  6. package/dist/core/parsers/typescript-parser.js +1039 -742
  7. package/dist/core/parsers/workspace-parser.js +175 -193
  8. package/dist/core/utils/code-normalizer.js +299 -0
  9. package/dist/core/utils/file-change-detection.js +17 -2
  10. package/dist/core/utils/file-utils.js +40 -5
  11. package/dist/core/utils/graph-factory.js +161 -0
  12. package/dist/core/utils/shared-utils.js +79 -0
  13. package/dist/core/workspace/workspace-detector.js +59 -5
  14. package/dist/mcp/constants.js +141 -8
  15. package/dist/mcp/handlers/graph-generator.handler.js +1 -0
  16. package/dist/mcp/handlers/incremental-parse.handler.js +3 -6
  17. package/dist/mcp/handlers/parallel-import.handler.js +136 -0
  18. package/dist/mcp/handlers/streaming-import.handler.js +14 -59
  19. package/dist/mcp/mcp.server.js +1 -1
  20. package/dist/mcp/services/job-manager.js +5 -8
  21. package/dist/mcp/services/watch-manager.js +7 -18
  22. package/dist/mcp/tools/detect-dead-code.tool.js +413 -0
  23. package/dist/mcp/tools/detect-duplicate-code.tool.js +450 -0
  24. package/dist/mcp/tools/impact-analysis.tool.js +20 -4
  25. package/dist/mcp/tools/index.js +4 -0
  26. package/dist/mcp/tools/parse-typescript-project.tool.js +15 -14
  27. package/dist/mcp/workers/chunk-worker-pool.js +196 -0
  28. package/dist/mcp/workers/chunk-worker.types.js +4 -0
  29. package/dist/mcp/workers/chunk.worker.js +89 -0
  30. package/dist/mcp/workers/parse-coordinator.js +183 -0
  31. package/dist/mcp/workers/worker.pool.js +54 -0
  32. package/dist/storage/neo4j/neo4j.service.js +190 -10
  33. package/package.json +1 -1
@@ -0,0 +1,450 @@
1
+ /**
2
+ * Detect Duplicate Code Tool
3
+ * Identifies duplicate code using structural (AST hash) and semantic (embedding similarity) analysis
4
+ */
5
+ import { z } from 'zod';
6
+ import { toNumber, isUIComponent, getMonorepoAppName, getShortPath, truncateSourceCode, } from '../../core/utils/shared-utils.js';
7
+ import { Neo4jService, QUERIES } from '../../storage/neo4j/neo4j.service.js';
8
+ import { TOOL_NAMES, TOOL_METADATA } from '../constants.js';
9
+ import { createErrorResponse, createSuccessResponse, debugLog, resolveProjectIdOrError } from '../utils.js';
10
+ /**
11
+ * Determine confidence based on duplicate characteristics.
12
+ */
13
+ const determineConfidence = (type, similarity, itemCount) => {
14
+ if (type === 'structural') {
15
+ // Structural duplicates with identical hash are high confidence
16
+ return 'HIGH';
17
+ }
18
+ // Semantic duplicates: confidence based on similarity and item count
19
+ if (similarity >= 0.9 && itemCount >= 2) {
20
+ return 'HIGH';
21
+ }
22
+ if (similarity >= 0.85) {
23
+ return 'MEDIUM';
24
+ }
25
+ return 'LOW';
26
+ };
27
+ /**
28
+ * Check if items are in different monorepo apps.
29
+ */
30
+ const areInDifferentApps = (items) => {
31
+ const apps = new Set(items.map((i) => getMonorepoAppName(i.filePath)).filter(Boolean));
32
+ return apps.size > 1;
33
+ };
34
+ /**
35
+ * Analyze duplicates and generate category + recommendation.
36
+ */
37
+ const analyzeAndRecommend = (type, items) => {
38
+ const names = [...new Set(items.map((i) => i.name))].slice(0, 3).join(', ');
39
+ const filesAffected = new Set(items.map((i) => i.filePath)).size;
40
+ // Check for UI component patterns
41
+ const allUIComponents = items.every((i) => isUIComponent(i.filePath));
42
+ if (allUIComponents) {
43
+ return {
44
+ category: 'ui-component',
45
+ recommendation: `UI components ${names} have similar structure - likely intentional co-location`,
46
+ };
47
+ }
48
+ // Check for monorepo cross-app duplicates
49
+ if (areInDifferentApps(items)) {
50
+ const apps = [...new Set(items.map((i) => getMonorepoAppName(i.filePath)).filter(Boolean))];
51
+ return {
52
+ category: 'cross-app',
53
+ recommendation: `Code duplicated across apps (${apps.slice(0, 3).join(', ')}) - consider shared package if unifying`,
54
+ };
55
+ }
56
+ // Same file duplicates
57
+ if (filesAffected === 1) {
58
+ return {
59
+ category: 'same-file',
60
+ recommendation: type === 'structural'
61
+ ? `Consider extracting shared logic from ${names} into a single method`
62
+ : `Review ${names} for potential consolidation`,
63
+ };
64
+ }
65
+ // Cross-file duplicates (default)
66
+ return {
67
+ category: 'cross-file',
68
+ recommendation: type === 'structural'
69
+ ? `Consider extracting ${names} into a shared utility function`
70
+ : `Semantically similar code in ${names} - consider unifying the approach`,
71
+ };
72
+ };
73
+ /**
74
+ * Map scope to core types for filtering.
75
+ */
76
+ const getScopeFilter = (scope) => {
77
+ switch (scope) {
78
+ case 'methods':
79
+ return ['MethodDeclaration'];
80
+ case 'functions':
81
+ return ['FunctionDeclaration'];
82
+ case 'classes':
83
+ return ['ClassDeclaration'];
84
+ case 'all':
85
+ default:
86
+ return ['MethodDeclaration', 'FunctionDeclaration', 'ClassDeclaration'];
87
+ }
88
+ };
89
+ export const createDetectDuplicateCodeTool = (server) => {
90
+ server.registerTool(TOOL_NAMES.detectDuplicateCode, {
91
+ title: TOOL_METADATA[TOOL_NAMES.detectDuplicateCode].title,
92
+ description: TOOL_METADATA[TOOL_NAMES.detectDuplicateCode].description,
93
+ inputSchema: {
94
+ projectId: z.string().describe('Project ID, name, or path (e.g., "backend" or "proj_a1b2c3d4e5f6")'),
95
+ type: z
96
+ .enum(['structural', 'semantic', 'all'])
97
+ .optional()
98
+ .describe('Detection approach: structural (AST hash), semantic (embeddings), or all (default: all)')
99
+ .default('all'),
100
+ minSimilarity: z
101
+ .number()
102
+ .min(0.5)
103
+ .max(1.0)
104
+ .optional()
105
+ .describe('Minimum similarity for semantic duplicates (0.5-1.0, default: 0.80)')
106
+ .default(0.8),
107
+ includeCode: z
108
+ .boolean()
109
+ .optional()
110
+ .describe('Include source code snippets in results (default: false)')
111
+ .default(false),
112
+ maxResults: z
113
+ .number()
114
+ .int()
115
+ .min(1)
116
+ .max(100)
117
+ .optional()
118
+ .describe('Maximum number of duplicate groups to return (default: 20)')
119
+ .default(20),
120
+ scope: z
121
+ .enum(['methods', 'functions', 'classes', 'all'])
122
+ .optional()
123
+ .describe('Node types to analyze (default: all)')
124
+ .default('all'),
125
+ summaryOnly: z
126
+ .boolean()
127
+ .optional()
128
+ .describe('Return only summary statistics without full duplicates list (default: false)')
129
+ .default(false),
130
+ offset: z
131
+ .number()
132
+ .int()
133
+ .min(0)
134
+ .optional()
135
+ .describe('Number of groups to skip for pagination (default: 0)')
136
+ .default(0),
137
+ vectorNeighbors: z
138
+ .number()
139
+ .int()
140
+ .min(10)
141
+ .max(200)
142
+ .optional()
143
+ .describe('Number of vector neighbors to search per node for semantic duplicates (default: 50, higher = more thorough)')
144
+ .default(50),
145
+ },
146
+ }, async ({ projectId, type = 'all', minSimilarity = 0.8, includeCode = false, maxResults = 20, scope = 'all', summaryOnly = false, offset = 0, vectorNeighbors = 50, }) => {
147
+ const neo4jService = new Neo4jService();
148
+ try {
149
+ // Resolve project ID
150
+ const projectResult = await resolveProjectIdOrError(projectId, neo4jService);
151
+ if (!projectResult.success)
152
+ return projectResult.error;
153
+ const resolvedProjectId = projectResult.projectId;
154
+ await debugLog('Duplicate code detection started', {
155
+ projectId: resolvedProjectId,
156
+ type,
157
+ minSimilarity,
158
+ scope,
159
+ });
160
+ const coreTypes = getScopeFilter(scope);
161
+ const duplicateGroups = [];
162
+ let groupCounter = 1;
163
+ const includeStructuralInOutput = type === 'structural' || type === 'all';
164
+ // 1. Find structural duplicates (always run for filtering, only include in output if requested)
165
+ // This ensures semantic-only mode filters out exact copy pairs
166
+ const structuralPairs = new Set(); // Pairs of nodeIds that are exact copies
167
+ {
168
+ const structuralResult = (await neo4jService.run(QUERIES.FIND_STRUCTURAL_DUPLICATES, {
169
+ projectId: resolvedProjectId,
170
+ coreTypes,
171
+ limit: Math.floor(maxResults * 10), // Get extra for grouping (each group has multiple items)
172
+ }));
173
+ // Group by normalizedHash
174
+ const hashGroups = new Map();
175
+ for (const item of structuralResult) {
176
+ const hash = item.normalizedHash;
177
+ if (!hash)
178
+ continue;
179
+ const duplicateItem = {
180
+ nodeId: item.nodeId,
181
+ name: item.name,
182
+ coreType: item.coreType,
183
+ semanticType: item.semanticType ?? null,
184
+ filePath: item.filePath,
185
+ lineNumber: toNumber(item.lineNumber),
186
+ };
187
+ if (includeCode) {
188
+ duplicateItem.sourceCode = truncateSourceCode(item.sourceCode);
189
+ }
190
+ if (!hashGroups.has(hash)) {
191
+ hashGroups.set(hash, []);
192
+ }
193
+ hashGroups.get(hash).push(duplicateItem);
194
+ }
195
+ // Convert to duplicate groups (only groups with 2+ items are duplicates)
196
+ for (const [, items] of hashGroups) {
197
+ if (items.length >= 2) {
198
+ // Track all pairs within this group for semantic filtering
199
+ // This ensures we only filter pairs that are EXACT copies of each other
200
+ for (let i = 0; i < items.length; i++) {
201
+ for (let j = i + 1; j < items.length; j++) {
202
+ const pairKey = [items[i].nodeId, items[j].nodeId].sort().join('::');
203
+ structuralPairs.add(pairKey);
204
+ }
205
+ }
206
+ // Only add to output if structural was requested
207
+ if (includeStructuralInOutput) {
208
+ const { category, recommendation } = analyzeAndRecommend('structural', items);
209
+ duplicateGroups.push({
210
+ groupId: `dup_${groupCounter++}`,
211
+ type: 'structural',
212
+ similarity: 1.0,
213
+ confidence: determineConfidence('structural', 1.0, items.length),
214
+ category,
215
+ items,
216
+ recommendation,
217
+ });
218
+ }
219
+ }
220
+ }
221
+ }
222
+ // 2. Find semantic duplicates (embedding similarity)
223
+ // Diagnostic counters to debug filtering
224
+ let semanticQueryResults = 0;
225
+ let filteredAsSameFile = 0;
226
+ let filteredAsSeenPair = 0;
227
+ let filteredAsStructural = 0;
228
+ let filteredAsUsedInGroup = 0;
229
+ let semanticQueryError = null;
230
+ if (type === 'semantic' || type === 'all') {
231
+ let semanticResult = [];
232
+ try {
233
+ semanticResult = (await neo4jService.run(QUERIES.FIND_SEMANTIC_DUPLICATES, {
234
+ projectId: resolvedProjectId,
235
+ coreTypes,
236
+ minSimilarity,
237
+ vectorNeighbors,
238
+ limit: Math.floor(maxResults * 2), // Get extra for filtering (ensure integer)
239
+ }));
240
+ }
241
+ catch (error) {
242
+ const errorMessage = error instanceof Error ? error.message : String(error);
243
+ // Check for vector index errors
244
+ if (errorMessage.includes('vector') ||
245
+ errorMessage.includes('index') ||
246
+ errorMessage.includes('embedding')) {
247
+ semanticQueryError =
248
+ 'Semantic duplicate detection requires embeddings. ' +
249
+ 'Re-parse the project with embeddings enabled (useEmbeddings: true) to enable this feature.';
250
+ await debugLog('Semantic query skipped - vector index not available', { error: errorMessage });
251
+ }
252
+ else {
253
+ // Re-throw non-vector-index errors
254
+ throw error;
255
+ }
256
+ }
257
+ // Process semantic pairs
258
+ const seenPairs = new Set();
259
+ const usedInSemanticGroup = new Set();
260
+ for (const pair of semanticResult) {
261
+ semanticQueryResults++;
262
+ const nodeId1 = pair.nodeId1;
263
+ const nodeId2 = pair.nodeId2;
264
+ const similarity = toNumber(pair.similarity);
265
+ // Skip if same file (same-file similarity is expected)
266
+ if (pair.filePath1 === pair.filePath2) {
267
+ filteredAsSameFile++;
268
+ continue;
269
+ }
270
+ // Skip if already seen this pair
271
+ const pairKey = [nodeId1, nodeId2].sort().join('::');
272
+ if (seenPairs.has(pairKey)) {
273
+ filteredAsSeenPair++;
274
+ continue;
275
+ }
276
+ seenPairs.add(pairKey);
277
+ // Skip if this specific pair is already a structural duplicate (exact copies of each other)
278
+ if (structuralPairs.has(pairKey)) {
279
+ filteredAsStructural++;
280
+ continue;
281
+ }
282
+ // Skip if either node is already in a semantic duplicate group (first match wins)
283
+ if (usedInSemanticGroup.has(nodeId1) || usedInSemanticGroup.has(nodeId2)) {
284
+ filteredAsUsedInGroup++;
285
+ continue;
286
+ }
287
+ const items = [
288
+ {
289
+ nodeId: nodeId1,
290
+ name: pair.name1,
291
+ coreType: pair.coreType1,
292
+ semanticType: pair.semanticType1 ?? null,
293
+ filePath: pair.filePath1,
294
+ lineNumber: toNumber(pair.lineNumber1),
295
+ },
296
+ {
297
+ nodeId: nodeId2,
298
+ name: pair.name2,
299
+ coreType: pair.coreType2,
300
+ semanticType: pair.semanticType2 ?? null,
301
+ filePath: pair.filePath2,
302
+ lineNumber: toNumber(pair.lineNumber2),
303
+ },
304
+ ];
305
+ if (includeCode) {
306
+ items[0].sourceCode = truncateSourceCode(pair.sourceCode1);
307
+ items[1].sourceCode = truncateSourceCode(pair.sourceCode2);
308
+ }
309
+ const { category, recommendation } = analyzeAndRecommend('semantic', items);
310
+ duplicateGroups.push({
311
+ groupId: `dup_${groupCounter++}`,
312
+ type: 'semantic',
313
+ similarity: Math.round(similarity * 1000) / 1000,
314
+ confidence: determineConfidence('semantic', similarity, 2),
315
+ category,
316
+ items,
317
+ recommendation,
318
+ });
319
+ // Mark both nodes as used to prevent appearing in multiple groups
320
+ usedInSemanticGroup.add(nodeId1);
321
+ usedInSemanticGroup.add(nodeId2);
322
+ }
323
+ // Log semantic query diagnostics
324
+ await debugLog('Semantic query diagnostics', {
325
+ rawResults: semanticQueryResults,
326
+ filteredAsSameFile,
327
+ filteredAsSeenPair,
328
+ filteredAsStructural,
329
+ filteredAsUsedInGroup,
330
+ structuralPairsCount: structuralPairs.size,
331
+ finalSemanticGroups: duplicateGroups.filter((g) => g.type === 'semantic').length,
332
+ });
333
+ }
334
+ // Sort by similarity (descending)
335
+ duplicateGroups.sort((a, b) => b.similarity - a.similarity);
336
+ // Calculate statistics on ALL groups before pagination
337
+ const allStructuralGroups = duplicateGroups.filter((g) => g.type === 'structural');
338
+ const allSemanticGroups = duplicateGroups.filter((g) => g.type === 'semantic');
339
+ const totalGroups = duplicateGroups.length;
340
+ const totalDuplicates = duplicateGroups.reduce((sum, g) => sum + g.items.length, 0);
341
+ const affectedFiles = [...new Set(duplicateGroups.flatMap((g) => g.items.map((i) => i.filePath)))].sort();
342
+ const byType = {
343
+ structural: {
344
+ groups: allStructuralGroups.length,
345
+ items: allStructuralGroups.reduce((sum, g) => sum + g.items.length, 0),
346
+ },
347
+ semantic: {
348
+ groups: allSemanticGroups.length,
349
+ items: allSemanticGroups.reduce((sum, g) => sum + g.items.length, 0),
350
+ },
351
+ };
352
+ // Check embedding count for diagnostic (do this before building summary)
353
+ let embeddingCount = 0;
354
+ let semanticDiagnostic = null;
355
+ if ((type === 'semantic' || type === 'all') && allSemanticGroups.length === 0) {
356
+ const embeddingCountResult = await neo4jService.run(`MATCH (n:Embedded) WHERE n.projectId = $projectId RETURN count(n) AS count`, { projectId: resolvedProjectId });
357
+ embeddingCount = toNumber(embeddingCountResult[0]?.count);
358
+ if (embeddingCount === 0) {
359
+ semanticDiagnostic = {
360
+ nodesWithEmbeddings: 0,
361
+ message: 'No nodes have embeddings. Re-parse with OPENAI_API_KEY set to enable semantic duplicate detection.',
362
+ };
363
+ }
364
+ else {
365
+ semanticDiagnostic = {
366
+ nodesWithEmbeddings: embeddingCount,
367
+ message: `${embeddingCount} nodes have embeddings but no semantic duplicates found above ${minSimilarity} similarity threshold.`,
368
+ };
369
+ }
370
+ // Log diagnostic so user sees it in debug output
371
+ await debugLog('Semantic duplicate diagnostic', semanticDiagnostic);
372
+ }
373
+ // Build summary with warning if no embeddings
374
+ let summary = totalGroups === 0
375
+ ? 'No duplicate code found'
376
+ : `Found ${totalGroups} duplicate code groups across ${affectedFiles.length} files`;
377
+ if (semanticQueryError) {
378
+ summary += ` (Warning: ${semanticQueryError})`;
379
+ }
380
+ else if ((type === 'semantic' || type === 'all') && embeddingCount === 0 && allSemanticGroups.length === 0) {
381
+ summary += ' (Warning: No embeddings for semantic detection)';
382
+ }
383
+ // Build result based on summaryOnly flag
384
+ let result;
385
+ if (summaryOnly) {
386
+ // Summary mode: statistics only, no full arrays
387
+ const fileDuplicateCounts = {};
388
+ for (const group of duplicateGroups) {
389
+ for (const item of group.items) {
390
+ const shortPath = getShortPath(item.filePath);
391
+ fileDuplicateCounts[shortPath] = (fileDuplicateCounts[shortPath] ?? 0) + 1;
392
+ }
393
+ }
394
+ const topFilesByDuplicates = Object.entries(fileDuplicateCounts)
395
+ .sort((a, b) => b[1] - a[1])
396
+ .slice(0, 20)
397
+ .map(([file, count]) => ({ file, count }));
398
+ result = {
399
+ summary,
400
+ totalGroups,
401
+ totalDuplicates,
402
+ byType,
403
+ affectedFiles,
404
+ topFilesByDuplicates,
405
+ };
406
+ }
407
+ else {
408
+ // Paginated mode: apply offset/maxResults
409
+ const paginatedGroups = duplicateGroups.slice(offset, offset + maxResults);
410
+ const hasMore = offset + maxResults < duplicateGroups.length;
411
+ result = {
412
+ summary,
413
+ totalGroups,
414
+ totalDuplicates,
415
+ byType,
416
+ duplicates: paginatedGroups,
417
+ pagination: {
418
+ offset,
419
+ limit: maxResults,
420
+ returned: paginatedGroups.length,
421
+ hasMore,
422
+ },
423
+ affectedFiles,
424
+ };
425
+ }
426
+ // Add pre-computed diagnostic to result
427
+ if (semanticDiagnostic) {
428
+ result.semanticDiagnostic = semanticDiagnostic;
429
+ }
430
+ await debugLog('Duplicate code detection complete', {
431
+ projectId: resolvedProjectId,
432
+ totalGroups,
433
+ structuralGroups: allStructuralGroups.length,
434
+ semanticGroups: allSemanticGroups.length,
435
+ summaryOnly,
436
+ offset,
437
+ maxResults,
438
+ });
439
+ return createSuccessResponse(JSON.stringify(result, null, 2));
440
+ }
441
+ catch (error) {
442
+ console.error('Duplicate code detection error:', error);
443
+ await debugLog('Duplicate code detection error', { projectId, error });
444
+ return createErrorResponse(error);
445
+ }
446
+ finally {
447
+ await neo4jService.close();
448
+ }
449
+ });
450
+ };
@@ -7,19 +7,35 @@ import { z } from 'zod';
7
7
  import { Neo4jService, QUERIES } from '../../storage/neo4j/neo4j.service.js';
8
8
  import { TOOL_NAMES, TOOL_METADATA } from '../constants.js';
9
9
  import { createErrorResponse, createSuccessResponse, debugLog, resolveProjectIdOrError } from '../utils.js';
10
- // Default relationship weights for core AST relationships
10
+ /**
11
+ * Default relationship weights for impact/risk analysis.
12
+ *
13
+ * NOTE: These weights are intentionally different from CoreEdge.relationshipWeight
14
+ * in the core schema. They serve different purposes:
15
+ *
16
+ * - Core schema weights (traversalWeight): "What relationships help me understand the code?"
17
+ * → CALLS is high (0.85) because following execution flow aids comprehension
18
+ *
19
+ * - Impact analysis weights: "What breaks if I modify this node?"
20
+ * → EXTENDS/IMPLEMENTS are highest (0.95) because changing a base class/interface
21
+ * breaks ALL subclasses/implementers - inheritance is a hard contract
22
+ *
23
+ * Example: A class with 50 callers and 10 subclasses
24
+ * - For traversal: follow the 50 CALLS to understand usage patterns
25
+ * - For impact: the 10 subclasses are CRITICAL - they inherit the contract
26
+ */
11
27
  const DEFAULT_RELATIONSHIP_WEIGHTS = {
12
- // Critical - inheritance/interface contracts
28
+ // Critical - inheritance/interface contracts (changing base breaks ALL children)
13
29
  EXTENDS: 0.95,
14
30
  IMPLEMENTS: 0.95,
15
- // High - direct code dependencies
31
+ // High - direct code dependencies (callers may break but often handle changes)
16
32
  CALLS: 0.75,
17
33
  HAS_MEMBER: 0.65,
18
34
  TYPED_AS: 0.6,
19
35
  // Medium - module dependencies
20
36
  IMPORTS: 0.5,
21
37
  EXPORTS: 0.5,
22
- // Lower - structural
38
+ // Lower - structural (container doesn't break if child changes)
23
39
  CONTAINS: 0.3,
24
40
  HAS_PARAMETER: 0.3,
25
41
  DECORATED_WITH: 0.4,
@@ -3,6 +3,8 @@
3
3
  * Centralized tool creation and registration
4
4
  */
5
5
  import { createCheckParseStatusTool } from './check-parse-status.tool.js';
6
+ import { createDetectDeadCodeTool } from './detect-dead-code.tool.js';
7
+ import { createDetectDuplicateCodeTool } from './detect-duplicate-code.tool.js';
6
8
  import { createHelloTool } from './hello.tool.js';
7
9
  import { createImpactAnalysisTool } from './impact-analysis.tool.js';
8
10
  import { createListProjectsTool } from './list-projects.tool.js';
@@ -26,6 +28,8 @@ export const registerAllTools = (server) => {
26
28
  createTraverseFromNodeTool(server);
27
29
  createNaturalLanguageToCypherTool(server);
28
30
  createImpactAnalysisTool(server);
31
+ createDetectDeadCodeTool(server);
32
+ createDetectDuplicateCodeTool(server);
29
33
  // Register project parsing tools
30
34
  createParseTypescriptProjectTool(server);
31
35
  createCheckParseStatusTool(server);
@@ -12,20 +12,16 @@ import { z } from 'zod';
12
12
  import { CORE_TYPESCRIPT_SCHEMA } from '../../core/config/schema.js';
13
13
  import { EmbeddingsService } from '../../core/embeddings/embeddings.service.js';
14
14
  import { ParserFactory } from '../../core/parsers/parser-factory.js';
15
+ import { detectChangedFiles } from '../../core/utils/file-change-detection.js';
15
16
  import { resolveProjectId, getProjectName, UPSERT_PROJECT_QUERY, UPDATE_PROJECT_STATUS_QUERY, } from '../../core/utils/project-id.js';
16
17
  import { Neo4jService, QUERIES } from '../../storage/neo4j/neo4j.service.js';
17
- import { TOOL_NAMES, TOOL_METADATA, DEFAULTS, FILE_PATHS, LOG_CONFIG } from '../constants.js';
18
+ import { TOOL_NAMES, TOOL_METADATA, DEFAULTS, FILE_PATHS, LOG_CONFIG, PARSING } from '../constants.js';
18
19
  import { deleteSourceFileSubgraphs, loadExistingNodesForEdgeDetection, getCrossFileEdges, } from '../handlers/cross-file-edge.helpers.js';
19
- import { detectChangedFiles } from '../../core/utils/file-change-detection.js';
20
20
  import { GraphGeneratorHandler } from '../handlers/graph-generator.handler.js';
21
21
  import { StreamingImportHandler } from '../handlers/streaming-import.handler.js';
22
22
  import { jobManager } from '../services/job-manager.js';
23
23
  import { watchManager } from '../services/watch-manager.js';
24
24
  import { createErrorResponse, createSuccessResponse, formatParseSuccess, formatParsePartialSuccess, debugLog, } from '../utils.js';
25
- // Threshold for using streaming import (files)
26
- const STREAMING_THRESHOLD = 100;
27
- // Worker thread timeout (30 minutes)
28
- const WORKER_TIMEOUT_MS = 30 * 60 * 1000;
29
25
  /**
30
26
  * Validates that a path exists and is accessible
31
27
  * @throws Error if path doesn't exist or isn't accessible
@@ -87,7 +83,7 @@ export const createParseTypescriptProjectTool = (server) => {
87
83
  chunkSize: z
88
84
  .number()
89
85
  .optional()
90
- .default(50)
86
+ .default(100)
91
87
  .describe('Files per chunk for streaming import (default: 50). Set to 0 to disable streaming.'),
92
88
  useStreaming: z
93
89
  .enum(['auto', 'always', 'never'])
@@ -139,7 +135,7 @@ export const createParseTypescriptProjectTool = (server) => {
139
135
  // Get path to worker script
140
136
  const __filename = fileURLToPath(import.meta.url);
141
137
  const __dirname = dirname(__filename);
142
- const workerPath = join(__dirname, '..', 'workers', 'parse-worker.js');
138
+ const workerPath = join(__dirname, '..', 'workers', 'parse-coordinator.js');
143
139
  // Create Worker thread to run parsing without blocking MCP server
144
140
  const worker = new Worker(workerPath, {
145
141
  workerData: {
@@ -168,10 +164,10 @@ export const createParseTypescriptProjectTool = (server) => {
168
164
  const timeoutId = setTimeout(async () => {
169
165
  const job = jobManager.getJob(jobId);
170
166
  if (job && job.status === 'running') {
171
- jobManager.failJob(jobId, `Worker timed out after ${WORKER_TIMEOUT_MS / 60000} minutes`);
167
+ jobManager.failJob(jobId, `Worker timed out after ${PARSING.workerTimeoutMs / 60000} minutes`);
172
168
  await terminateWorker('timeout');
173
169
  }
174
- }, WORKER_TIMEOUT_MS);
170
+ }, PARSING.workerTimeoutMs);
175
171
  // Handle progress messages from worker
176
172
  worker.on('message', (msg) => {
177
173
  if (msg.type === 'progress') {
@@ -216,16 +212,19 @@ export const createParseTypescriptProjectTool = (server) => {
216
212
  const embeddingsService = new EmbeddingsService();
217
213
  const graphGeneratorHandler = new GraphGeneratorHandler(neo4jService, embeddingsService);
218
214
  // Determine if we should use streaming import
215
+ // Use lazyLoad = true for consistent glob-based file discovery (matches incremental parse)
219
216
  const parser = projectType === 'auto'
220
- ? await ParserFactory.createParserWithAutoDetection(projectPath, tsconfigPath, resolvedProjectId)
217
+ ? await ParserFactory.createParserWithAutoDetection(projectPath, tsconfigPath, resolvedProjectId, true)
221
218
  : ParserFactory.createParser({
222
219
  workspacePath: projectPath,
223
220
  tsConfigPath: tsconfigPath,
224
221
  projectType: projectType,
225
222
  projectId: resolvedProjectId,
223
+ lazyLoad: true,
226
224
  });
227
- const totalFiles = parser.getSourceFilePaths().length;
228
- const shouldUseStreaming = useStreaming === 'always' || (useStreaming === 'auto' && totalFiles > STREAMING_THRESHOLD && chunkSize > 0);
225
+ const discoveredFiles = await parser.discoverSourceFiles();
226
+ const totalFiles = discoveredFiles.length;
227
+ const shouldUseStreaming = useStreaming === 'always' || (useStreaming === 'auto' && totalFiles > PARSING.streamingThreshold && chunkSize > 0);
229
228
  console.log(`📊 Project has ${totalFiles} files. Streaming: ${shouldUseStreaming ? 'enabled' : 'disabled'}`);
230
229
  if (shouldUseStreaming && clearExisting !== false) {
231
230
  // Use streaming import for large projects
@@ -385,13 +384,15 @@ const parseProject = async (options) => {
385
384
  const { neo4jService, tsconfigPath, projectPath, projectId, clearExisting = true, projectType = 'auto' } = options;
386
385
  // Resolve projectId early - needed for incremental queries before parser is created
387
386
  const resolvedId = resolveProjectId(projectPath, projectId);
387
+ // Use lazyLoad = true for consistent glob-based file discovery (matches incremental parse)
388
388
  const parser = projectType === 'auto'
389
- ? await ParserFactory.createParserWithAutoDetection(projectPath, tsconfigPath, resolvedId)
389
+ ? await ParserFactory.createParserWithAutoDetection(projectPath, tsconfigPath, resolvedId, true)
390
390
  : ParserFactory.createParser({
391
391
  workspacePath: projectPath,
392
392
  tsConfigPath: tsconfigPath,
393
393
  projectType: projectType,
394
394
  projectId: resolvedId,
395
+ lazyLoad: true,
395
396
  });
396
397
  let incrementalStats;
397
398
  let savedCrossFileEdges = [];