code-graph-context 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +156 -2
  2. package/dist/constants.js +167 -0
  3. package/dist/core/config/fairsquare-framework-schema.js +9 -7
  4. package/dist/core/config/nestjs-framework-schema.js +60 -43
  5. package/dist/core/config/schema.js +41 -2
  6. package/dist/core/embeddings/natural-language-to-cypher.service.js +166 -110
  7. package/dist/core/parsers/typescript-parser.js +1043 -747
  8. package/dist/core/parsers/workspace-parser.js +177 -194
  9. package/dist/core/utils/code-normalizer.js +299 -0
  10. package/dist/core/utils/file-change-detection.js +17 -2
  11. package/dist/core/utils/file-utils.js +40 -5
  12. package/dist/core/utils/graph-factory.js +161 -0
  13. package/dist/core/utils/shared-utils.js +79 -0
  14. package/dist/core/workspace/workspace-detector.js +59 -5
  15. package/dist/mcp/constants.js +141 -8
  16. package/dist/mcp/handlers/graph-generator.handler.js +1 -0
  17. package/dist/mcp/handlers/incremental-parse.handler.js +3 -6
  18. package/dist/mcp/handlers/parallel-import.handler.js +136 -0
  19. package/dist/mcp/handlers/streaming-import.handler.js +14 -59
  20. package/dist/mcp/mcp.server.js +1 -1
  21. package/dist/mcp/services/job-manager.js +5 -8
  22. package/dist/mcp/services/watch-manager.js +7 -18
  23. package/dist/mcp/tools/detect-dead-code.tool.js +413 -0
  24. package/dist/mcp/tools/detect-duplicate-code.tool.js +450 -0
  25. package/dist/mcp/tools/impact-analysis.tool.js +20 -4
  26. package/dist/mcp/tools/index.js +4 -0
  27. package/dist/mcp/tools/parse-typescript-project.tool.js +15 -14
  28. package/dist/mcp/workers/chunk-worker-pool.js +196 -0
  29. package/dist/mcp/workers/chunk-worker.types.js +4 -0
  30. package/dist/mcp/workers/chunk.worker.js +89 -0
  31. package/dist/mcp/workers/parse-coordinator.js +183 -0
  32. package/dist/mcp/workers/worker.pool.js +54 -0
  33. package/dist/storage/neo4j/neo4j.service.js +190 -10
  34. package/package.json +1 -1
@@ -44,6 +44,13 @@ export class WorkspaceDetector {
44
44
  if (hasTurboJson) {
45
45
  return 'turborepo';
46
46
  }
47
+ // Check for Nx (has nx.json) - check before pnpm/npm since Nx can coexist with them
48
+ const nxJsonPath = path.join(rootPath, 'nx.json');
49
+ const hasNxJson = await this.fileExists(nxJsonPath);
50
+ await debugLog('Checking for nx.json', { path: nxJsonPath, exists: hasNxJson });
51
+ if (hasNxJson) {
52
+ return 'nx';
53
+ }
47
54
  // Check for pnpm workspaces (has pnpm-workspace.yaml)
48
55
  const pnpmWorkspacePath = path.join(rootPath, 'pnpm-workspace.yaml');
49
56
  const hasPnpmWorkspace = await this.fileExists(pnpmWorkspacePath);
@@ -99,6 +106,43 @@ export class WorkspaceDetector {
99
106
  // Turborepo default patterns
100
107
  return ['apps/*', 'packages/*'];
101
108
  }
109
+ case 'nx': {
110
+ // For Nx, scan for all project.json files to find all projects
111
+ // This is more reliable than workspaces since Nx projects may not be in package.json workspaces
112
+ const projectJsonFiles = await glob('**/project.json', {
113
+ cwd: rootPath,
114
+ ignore: ['**/node_modules/**', '**/dist/**', '**/build/**'],
115
+ absolute: false,
116
+ });
117
+ // Extract unique parent directories (the project roots)
118
+ const projectDirs = new Set();
119
+ for (const projectJsonPath of projectJsonFiles) {
120
+ const projectDir = path.dirname(projectJsonPath);
121
+ if (projectDir !== '.') {
122
+ projectDirs.add(projectDir);
123
+ }
124
+ }
125
+ if (projectDirs.size > 0) {
126
+ await debugLog('Found Nx projects via project.json scan', { count: projectDirs.size });
127
+ return Array.from(projectDirs);
128
+ }
129
+ // Fallback to package.json workspaces if no project.json files found
130
+ const packageJsonPath = path.join(rootPath, 'package.json');
131
+ try {
132
+ const packageJson = JSON.parse(await fs.readFile(packageJsonPath, 'utf-8'));
133
+ if (Array.isArray(packageJson.workspaces)) {
134
+ return packageJson.workspaces;
135
+ }
136
+ if (packageJson.workspaces?.packages) {
137
+ return packageJson.workspaces.packages;
138
+ }
139
+ }
140
+ catch {
141
+ // Fall through to defaults
142
+ }
143
+ // Nx default patterns
144
+ return ['apps/*', 'libs/*', 'packages/*', 'src/*/*'];
145
+ }
102
146
  case 'yarn':
103
147
  case 'npm': {
104
148
  // Read from package.json workspaces
@@ -151,16 +195,26 @@ export class WorkspaceDetector {
151
195
  if (seenPaths.has(packagePath))
152
196
  continue;
153
197
  seenPaths.add(packagePath);
154
- // Check if this is a valid package (has package.json)
198
+ // Check if this is a valid package (has package.json) or Nx project (has project.json)
155
199
  const packageJsonPath = path.join(packagePath, 'package.json');
156
- if (!(await this.fileExists(packageJsonPath))) {
200
+ const projectJsonPath = path.join(packagePath, 'project.json');
201
+ const hasPackageJson = await this.fileExists(packageJsonPath);
202
+ const hasProjectJson = await this.fileExists(projectJsonPath);
203
+ if (!hasPackageJson && !hasProjectJson) {
157
204
  continue;
158
205
  }
159
- // Read package name
206
+ // Read package/project name
160
207
  let packageName;
161
208
  try {
162
- const packageJson = JSON.parse(await fs.readFile(packageJsonPath, 'utf-8'));
163
- packageName = packageJson.name ?? path.basename(packagePath);
209
+ if (hasPackageJson) {
210
+ const packageJson = JSON.parse(await fs.readFile(packageJsonPath, 'utf-8'));
211
+ packageName = packageJson.name ?? path.basename(packagePath);
212
+ }
213
+ else {
214
+ // Nx project.json - try to read name from it
215
+ const projectJson = JSON.parse(await fs.readFile(projectJsonPath, 'utf-8'));
216
+ packageName = projectJson.name ?? path.basename(packagePath);
217
+ }
164
218
  }
165
219
  catch {
166
220
  packageName = path.basename(packagePath);
@@ -2,6 +2,8 @@
2
2
  * MCP Server Constants
3
3
  * All constants used throughout the MCP server implementation
4
4
  */
5
+ // Re-export shared constants
6
+ export { LOG_CONFIG } from '../constants.js';
5
7
  // Server Configuration
6
8
  export const MCP_SERVER_CONFIG = {
7
9
  name: 'codebase-graph',
@@ -27,6 +29,8 @@ export const TOOL_NAMES = {
27
29
  startWatchProject: 'start_watch_project',
28
30
  stopWatchProject: 'stop_watch_project',
29
31
  listWatchers: 'list_watchers',
32
+ detectDeadCode: 'detect_dead_code',
33
+ detectDuplicateCode: 'detect_duplicate_code',
30
34
  };
31
35
  // Tool Metadata
32
36
  export const TOOL_METADATA = {
@@ -93,10 +97,28 @@ Use list_projects to see available projects and get the project name.
93
97
 
94
98
  **Tips:**
95
99
  - Import nodes store file paths, not module names (use 'path containing X')
96
- - Node types: SourceFile, ClassDeclaration, FunctionDeclaration, MethodDeclaration, InterfaceDeclaration
97
- - Relationships: CONTAINS, IMPORTS, HAS_PARAMETER, IMPLEMENTS, EXTENDS, HAS_MEMBER
100
+ - Node types: SourceFile, Class, Function, Method, Interface, Property, Parameter, Constructor, Import, Export, Decorator, Enum, Variable, TypeAlias
101
+ - Relationships: CONTAINS, IMPORTS, EXTENDS, IMPLEMENTS, HAS_MEMBER, HAS_PARAMETER, TYPED_AS, CALLS, DECORATED_WITH
98
102
  - For NestJS, use semanticType property instead of decorators (e.g., semanticType = 'NestController')
99
103
 
104
+ **Relationships (Core):**
105
+ - CONTAINS: File/class contains members
106
+ - HAS_MEMBER: Class/interface has methods/properties
107
+ - HAS_PARAMETER: Method/function has parameters
108
+ - IMPORTS: SourceFile imports another
109
+ - EXPORTS: SourceFile exports items
110
+ - EXTENDS: Class/interface extends another
111
+ - IMPLEMENTS: Class implements interface(s)
112
+ - CALLS: Method/function calls another
113
+ - TYPED_AS: Parameter/property has type annotation
114
+ - DECORATED_WITH: Node has decorators
115
+
116
+ **Relationships (NestJS/Framework):**
117
+ - INJECTS: Service/controller injects dependency
118
+ - EXPOSES: Controller exposes HTTP endpoints
119
+ - MODULE_IMPORTS, MODULE_PROVIDES, MODULE_EXPORTS: NestJS module system
120
+ - GUARDED_BY, TRANSFORMED_BY, INTERCEPTED_BY: Security/middleware
121
+
100
122
  **Query Phrasing:**
101
123
  Phrase queries using properties known to exist (filePath, name) rather than abstract concepts:
102
124
  - Use "in account folder" or "filePath contains /account/" instead of "in account module"
@@ -252,6 +274,86 @@ Returns information about each watcher:
252
274
 
253
275
  Use stop_watch_project to stop a watcher.`,
254
276
  },
277
+ [TOOL_NAMES.detectDeadCode]: {
278
+ title: 'Detect Dead Code',
279
+ description: `Identify potentially unused code in the codebase including exports never imported, private methods never called, and orphan interfaces.
280
+
281
+ **Before analyzing:**
282
+ Use list_projects to see available projects and get the project name.
283
+
284
+ Returns:
285
+ - Risk level (LOW/MEDIUM/HIGH/CRITICAL) based on dead code count
286
+ - Dead code items with confidence levels (HIGH/MEDIUM/LOW) and categories
287
+ - Grouped by type (methods, classes, interfaces, etc.)
288
+ - Grouped by category (library-export, ui-component, internal-unused)
289
+ - Affected files list
290
+ - Excluded entry points for audit (controllers, modules, etc.)
291
+
292
+ Parameters:
293
+ - projectId: Project name, path, or ID (required)
294
+ - excludePatterns: Additional file patterns to exclude (e.g., ["*.config.ts", "*.seed.ts"])
295
+ - excludeSemanticTypes: Additional semantic types to exclude (e.g., ["EntityClass", "DTOClass"])
296
+ - excludeLibraryExports: Exclude all items from packages/* directories (default: false)
297
+ - excludeCoreTypes: Exclude specific AST types (e.g., ["InterfaceDeclaration", "EnumDeclaration"])
298
+ - includeEntryPoints: Include excluded entry points in audit section (default: true)
299
+ - minConfidence: Minimum confidence to include (LOW/MEDIUM/HIGH, default: LOW)
300
+ - filterCategory: Filter by category (library-export, ui-component, internal-unused, all) (default: all)
301
+ - summaryOnly: Return only statistics without full dead code list (default: false)
302
+ - limit: Maximum items per page (default: 100, max: 500)
303
+ - offset: Number of items to skip for pagination (default: 0)
304
+
305
+ **Categories:**
306
+ - library-export: Exports from packages/* directories (may be used by external consumers)
307
+ - ui-component: Exports from components/ui/* (component library, intentionally broad API)
308
+ - internal-unused: Regular internal code that appears unused
309
+
310
+ **Auto-excluded entry points:**
311
+ - Semantic types: NestController, NestModule, NestGuard, NestPipe, NestInterceptor, NestFilter, NestProvider, NestService, HttpEndpoint
312
+ - File patterns: main.ts, *.module.ts, *.controller.ts, index.ts
313
+
314
+ **Confidence levels:**
315
+ - HIGH: Exported but never imported or referenced
316
+ - MEDIUM: Private with no internal calls
317
+ - LOW: Could be used dynamically
318
+
319
+ Use filterCategory=internal-unused for actionable dead code cleanup.`,
320
+ },
321
+ [TOOL_NAMES.detectDuplicateCode]: {
322
+ title: 'Detect Duplicate Code',
323
+ description: `Find duplicate code patterns using structural (AST hash) and semantic (embedding similarity) analysis.
324
+
325
+ **Before analyzing:**
326
+ Use list_projects to see available projects and get the project name.
327
+
328
+ Returns:
329
+ - Duplicate groups with similarity scores
330
+ - Confidence levels (HIGH/MEDIUM/LOW)
331
+ - Grouped by detection type (structural, semantic)
332
+ - Recommendations for each duplicate group
333
+ - Affected files list
334
+
335
+ Parameters:
336
+ - projectId: Project name, path, or ID (required)
337
+ - type: Detection approach - "structural", "semantic", or "all" (default: all)
338
+ - minSimilarity: Minimum similarity for semantic duplicates (0.5-1.0, default: 0.80)
339
+ - includeCode: Include source code snippets (default: false)
340
+ - maxResults: Maximum duplicate groups per page (default: 20, max: 100)
341
+ - scope: Node types to analyze - "methods", "functions", "classes", or "all" (default: all)
342
+ - summaryOnly: Return only statistics without full duplicates list (default: false)
343
+ - offset: Number of groups to skip for pagination (default: 0)
344
+
345
+ **Detection Types:**
346
+ - structural: Finds exact duplicates by normalized code hash (ignores formatting, variable names, literals)
347
+ - semantic: Finds similar code using embedding similarity (catches different implementations of same logic)
348
+ - all: Runs both detection types
349
+
350
+ **Similarity Thresholds:**
351
+ - 0.90+: Very high similarity, almost certainly duplicates
352
+ - 0.85-0.90: High similarity, likely duplicates with minor variations
353
+ - 0.80-0.85: Moderate similarity, worth reviewing
354
+
355
+ Use this to identify refactoring opportunities and reduce code duplication.`,
356
+ },
255
357
  };
256
358
  // Default Values
257
359
  export const DEFAULTS = {
@@ -263,6 +365,43 @@ export const DEFAULTS = {
263
365
  chainSnippetLength: 700,
264
366
  maxEmbeddingChars: 30000, // ~7500 tokens, under 8192 limit for text-embedding-3-large
265
367
  };
368
+ // Parsing Configuration
369
+ export const PARSING = {
370
+ /** File count threshold to trigger parallel parsing with worker pool */
371
+ parallelThreshold: 500,
372
+ /** File count threshold to trigger streaming import */
373
+ streamingThreshold: 100,
374
+ /** Default number of files per chunk */
375
+ defaultChunkSize: 100,
376
+ /** Worker timeout in milliseconds (30 minutes) */
377
+ workerTimeoutMs: 30 * 60 * 1000,
378
+ };
379
+ // Job Management
380
+ export const JOBS = {
381
+ /** Interval for cleaning up completed/stale jobs (5 minutes) */
382
+ cleanupIntervalMs: 5 * 60 * 1000,
383
+ /** Maximum number of jobs to keep in memory */
384
+ maxJobs: 100,
385
+ };
386
+ // Watch Mode Configuration
387
+ export const WATCH = {
388
+ /** Default debounce delay before processing file changes */
389
+ defaultDebounceMs: 1000,
390
+ /** Maximum concurrent file watchers */
391
+ maxWatchers: 10,
392
+ /** Maximum pending file change events before dropping */
393
+ maxPendingEvents: 1000,
394
+ /** Default exclude patterns for file watching */
395
+ excludePatterns: [
396
+ '**/node_modules/**',
397
+ '**/dist/**',
398
+ '**/build/**',
399
+ '**/.git/**',
400
+ '**/*.d.ts',
401
+ '**/*.js',
402
+ '**/*.map',
403
+ ],
404
+ };
266
405
  // Messages
267
406
  export const MESSAGES = {
268
407
  errors: {
@@ -298,9 +437,3 @@ export const MESSAGES = {
298
437
  startingServer: 'Starting MCP server...',
299
438
  },
300
439
  };
301
- // Logging Configuration
302
- export const LOG_CONFIG = {
303
- timestampFormat: 'iso',
304
- logSeparator: '---',
305
- jsonIndentation: 2,
306
- };
@@ -73,6 +73,7 @@ export class GraphGeneratorHandler {
73
73
  await this.neo4jService.run(QUERIES.CREATE_PROJECT_INDEX_SOURCEFILE);
74
74
  await this.neo4jService.run(QUERIES.CREATE_PROJECT_ID_INDEX_EMBEDDED);
75
75
  await this.neo4jService.run(QUERIES.CREATE_PROJECT_ID_INDEX_SOURCEFILE);
76
+ await this.neo4jService.run(QUERIES.CREATE_NORMALIZED_HASH_INDEX);
76
77
  await debugLog('Project indexes created');
77
78
  }
78
79
  async importNodes(nodes, batchSize) {
@@ -23,7 +23,7 @@ export const performIncrementalParse = async (projectPath, projectId, tsconfigPa
23
23
  const embeddingsService = new EmbeddingsService();
24
24
  const graphHandler = new GraphGeneratorHandler(neo4jService, embeddingsService);
25
25
  try {
26
- await debugLog('Incremental parse started (watch)', { projectPath, projectId });
26
+ await debugLog('Incremental parse started', { projectPath, projectId });
27
27
  // Resolve project ID
28
28
  const resolvedId = resolveProjectId(projectPath, projectId);
29
29
  const projectName = await getProjectName(projectPath);
@@ -31,13 +31,12 @@ export const performIncrementalParse = async (projectPath, projectId, tsconfigPa
31
31
  const parser = await ParserFactory.createParserWithAutoDetection(projectPath, tsconfigPath, resolvedId, true);
32
32
  // Detect changed files
33
33
  const { filesToReparse, filesToDelete } = await detectChangedFiles(projectPath, neo4jService, resolvedId);
34
- await debugLog('Watch incremental change detection', {
34
+ await debugLog('Incremental change detection', {
35
35
  filesToReparse: filesToReparse.length,
36
36
  filesToDelete: filesToDelete.length,
37
37
  });
38
38
  // If no changes, return early
39
39
  if (filesToReparse.length === 0 && filesToDelete.length === 0) {
40
- await debugLog('Watch incremental: no changes detected');
41
40
  return {
42
41
  nodesUpdated: 0,
43
42
  edgesUpdated: 0,
@@ -50,7 +49,6 @@ export const performIncrementalParse = async (projectPath, projectId, tsconfigPa
50
49
  if (filesToRemoveFromGraph.length > 0) {
51
50
  // Save cross-file edges before deletion
52
51
  savedCrossFileEdges = await getCrossFileEdges(neo4jService, filesToRemoveFromGraph, resolvedId);
53
- await debugLog('Watch: saved cross-file edges', { count: savedCrossFileEdges.length });
54
52
  // Delete old subgraphs
55
53
  await deleteSourceFileSubgraphs(neo4jService, filesToRemoveFromGraph, resolvedId);
56
54
  }
@@ -124,10 +122,9 @@ export const performIncrementalParse = async (projectPath, projectId, tsconfigPa
124
122
  ? Number(firstResult.recreatedCount) || 0
125
123
  : 0;
126
124
  edgesImported += recreatedCount;
127
- await debugLog('Watch: cross-file edges recreated', { recreatedCount });
128
125
  }
129
126
  }
130
- await debugLog('Watch incremental parse completed', {
127
+ await debugLog('Incremental parse completed', {
131
128
  nodesImported,
132
129
  edgesImported,
133
130
  filesReparsed: filesToReparse.length,
@@ -0,0 +1,136 @@
1
+ /**
2
+ * Parallel Import Handler
3
+ * Orchestrates parallel chunk parsing using a worker pool with pipelined import.
4
+ * Used for large codebases (>= PARSING.parallelThreshold files).
5
+ */
6
+ import { join } from 'path';
7
+ import { ProgressReporter } from '../../core/utils/progress-reporter.js';
8
+ import { debugLog } from '../utils.js';
9
+ import { ChunkWorkerPool } from '../workers/chunk-worker-pool.js';
10
+ export class ParallelImportHandler {
11
+ graphGeneratorHandler;
12
+ progressReporter;
13
+ constructor(graphGeneratorHandler) {
14
+ this.graphGeneratorHandler = graphGeneratorHandler;
15
+ this.progressReporter = new ProgressReporter();
16
+ }
17
+ /**
18
+ * Import a project using parallel worker pool with pipelined import.
19
+ * Chunks are distributed to workers, and imports happen as chunks complete.
20
+ */
21
+ async importProjectParallel(parser, sourceFiles, config) {
22
+ const startTime = Date.now();
23
+ if (config.onProgress) {
24
+ this.progressReporter.setCallback(config.onProgress);
25
+ }
26
+ const totalFiles = sourceFiles.length;
27
+ let totalNodesImported = 0;
28
+ let totalEdgesImported = 0;
29
+ const chunks = this.createChunks(sourceFiles, config.chunkSize);
30
+ this.progressReporter.report({
31
+ phase: 'parsing',
32
+ current: 0,
33
+ total: totalFiles,
34
+ message: `Starting parallel parse of ${totalFiles} files in ${chunks.length} chunks`,
35
+ details: { chunkIndex: 0, totalChunks: chunks.length },
36
+ });
37
+ await debugLog('Using parallel chunk workers', { totalFiles, chunkCount: chunks.length });
38
+ const pool = new ChunkWorkerPool({
39
+ projectPath: config.projectPath,
40
+ tsconfigPath: config.tsconfigPath,
41
+ projectId: config.projectId,
42
+ projectType: config.projectType,
43
+ });
44
+ // Pipelined: import starts as soon as each chunk completes parsing
45
+ const poolResult = await pool.processChunks(chunks, async (result, stats) => {
46
+ await this.importToNeo4j(result.nodes, result.edges);
47
+ totalNodesImported += result.nodes.length;
48
+ totalEdgesImported += result.edges.length;
49
+ // Accumulate nodes for cross-chunk edge resolution
50
+ parser.addParsedNodesFromChunk(result.nodes);
51
+ // Merge shared context from workers for enabling cross-chunk references
52
+ if (result.sharedContext && result.sharedContext.length > 0) {
53
+ parser.mergeSerializedSharedContext(result.sharedContext);
54
+ }
55
+ // Collect deferred edges for resolution after all chunks complete
56
+ if (result.deferredEdges && result.deferredEdges.length > 0) {
57
+ parser.mergeDeferredEdges(result.deferredEdges);
58
+ }
59
+ this.progressReporter.report({
60
+ phase: 'parsing',
61
+ current: stats.chunksCompleted * config.chunkSize,
62
+ total: totalFiles,
63
+ message: `Chunk ${stats.chunksCompleted}/${stats.totalChunks}: ${totalNodesImported} nodes, ${totalEdgesImported} edges`,
64
+ details: {
65
+ nodesCreated: totalNodesImported,
66
+ edgesCreated: totalEdgesImported,
67
+ chunkIndex: stats.chunksCompleted,
68
+ totalChunks: stats.totalChunks,
69
+ },
70
+ });
71
+ debugLog(`Chunk ${result.chunkIndex + 1}/${stats.totalChunks}: ${result.nodes.length} nodes, ${result.edges.length} edges (imported)`);
72
+ });
73
+ debugLog(`Parallel parse+import complete: ${poolResult.totalNodes} nodes, ${poolResult.totalEdges} edges in ${poolResult.elapsedMs}ms`);
74
+ this.progressReporter.report({
75
+ phase: 'resolving',
76
+ current: totalFiles,
77
+ total: totalFiles,
78
+ message: 'Resolving cross-chunk edges',
79
+ details: {
80
+ nodesCreated: totalNodesImported,
81
+ edgesCreated: totalEdgesImported,
82
+ chunkIndex: chunks.length,
83
+ totalChunks: chunks.length,
84
+ },
85
+ });
86
+ const resolvedEdges = await parser.resolveDeferredEdges();
87
+ if (resolvedEdges.length > 0) {
88
+ await this.importToNeo4j([], resolvedEdges);
89
+ totalEdgesImported += resolvedEdges.length;
90
+ await debugLog(`Resolved ${resolvedEdges.length} cross-chunk edges`);
91
+ }
92
+ parser.loadFrameworkSchemasForType(config.projectType);
93
+ const enhancedEdges = await parser.applyEdgeEnhancementsManually();
94
+ if (enhancedEdges.length > 0) {
95
+ await this.importToNeo4j([], enhancedEdges);
96
+ totalEdgesImported += enhancedEdges.length;
97
+ await debugLog(`Created ${enhancedEdges.length} edges from enhancements`);
98
+ }
99
+ parser.clearParsedData();
100
+ const elapsedMs = Date.now() - startTime;
101
+ return {
102
+ nodesImported: totalNodesImported,
103
+ edgesImported: totalEdgesImported,
104
+ filesProcessed: totalFiles,
105
+ chunksProcessed: chunks.length,
106
+ elapsedMs,
107
+ };
108
+ }
109
+ createChunks(files, chunkSize) {
110
+ const chunks = [];
111
+ for (let i = 0; i < files.length; i += chunkSize) {
112
+ chunks.push(files.slice(i, i + chunkSize));
113
+ }
114
+ return chunks;
115
+ }
116
+ async importToNeo4j(nodes, edges) {
117
+ if (nodes.length === 0 && edges.length === 0)
118
+ return;
119
+ const fs = await import('fs/promises');
120
+ const { randomBytes } = await import('crypto');
121
+ const { tmpdir } = await import('os');
122
+ const tempPath = join(tmpdir(), `chunk-${Date.now()}-${randomBytes(8).toString('hex')}.json`);
123
+ try {
124
+ await fs.writeFile(tempPath, JSON.stringify({ nodes, edges, metadata: { parallel: true } }));
125
+ await this.graphGeneratorHandler.generateGraph(tempPath, 100, false);
126
+ }
127
+ finally {
128
+ try {
129
+ await fs.unlink(tempPath);
130
+ }
131
+ catch {
132
+ // Ignore cleanup errors
133
+ }
134
+ }
135
+ }
136
+ }
@@ -33,36 +33,33 @@ export class StreamingImportHandler {
33
33
  if (config.onProgress) {
34
34
  this.progressReporter.setCallback(config.onProgress);
35
35
  }
36
- // Set project ID on graph generator
37
- this.graphGeneratorHandler.setProjectId(config.projectId);
38
- // Phase 1: Get discovered files (already discovered by worker, this returns cached result)
39
36
  const allFilePaths = await parser.discoverSourceFiles();
40
- console.log(`📁 Found ${allFilePaths.length} files to parse`);
41
37
  await debugLog('Streaming import started', {
42
38
  totalFiles: allFilePaths.length,
43
39
  chunkSize: config.chunkSize,
44
40
  });
45
- // Create chunks
41
+ this.progressReporter.report({
42
+ phase: 'parsing',
43
+ current: 0,
44
+ total: allFilePaths.length,
45
+ message: `Starting streaming import of ${allFilePaths.length} files in chunks of ~${config.chunkSize}`,
46
+ });
46
47
  const chunks = [];
47
48
  for (let i = 0; i < allFilePaths.length; i += config.chunkSize) {
48
49
  chunks.push(allFilePaths.slice(i, i + config.chunkSize));
49
50
  }
50
- console.log(`📦 Split into ${chunks.length} chunks of ~${config.chunkSize} files each`);
51
51
  let totalNodesImported = 0;
52
52
  let totalEdgesImported = 0;
53
- // Phase 2: Parse and import chunks
54
53
  for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) {
55
54
  const chunk = chunks[chunkIndex];
56
55
  const filesProcessed = chunkIndex * config.chunkSize + chunk.length;
57
- console.log(`\n🔄 Processing chunk ${chunkIndex + 1}/${chunks.length} (${chunk.length} files)`);
58
56
  try {
59
- // Parse the chunk (skip edge resolution for streaming)
57
+ // Skip edge resolution during chunk parsing - resolve after all chunks complete
60
58
  const { nodes, edges } = await parser.parseChunk(chunk, true);
61
- // Add parsed nodes to existing nodes for cross-chunk edge resolution
59
+ // Accumulate nodes for cross-chunk edge resolution
62
60
  parser.addExistingNodesFromChunk(nodes);
63
- // Import to Neo4j if we have data
64
61
  if (nodes.length > 0 || edges.length > 0) {
65
- await debugLog('Importing chunk - generating embeddings', {
62
+ await debugLog('Importing chunk', {
66
63
  chunkIndex: chunkIndex + 1,
67
64
  totalChunks: chunks.length,
68
65
  nodeCount: nodes.length,
@@ -72,14 +69,12 @@ export class StreamingImportHandler {
72
69
  totalEdgesImported += edges.length;
73
70
  }
74
71
  else {
75
- console.warn(`⚠️ Chunk ${chunkIndex + 1} produced 0 nodes/edges from ${chunk.length} files`);
76
72
  await debugLog('Empty chunk result', {
77
73
  chunkIndex: chunkIndex + 1,
78
74
  fileCount: chunk.length,
79
75
  sampleFiles: chunk.slice(0, 3),
80
76
  });
81
77
  }
82
- // Report progress with all relevant data
83
78
  await this.progressReporter.report({
84
79
  phase: 'importing',
85
80
  current: filesProcessed,
@@ -94,10 +89,8 @@ export class StreamingImportHandler {
94
89
  totalChunks: chunks.length,
95
90
  },
96
91
  });
97
- console.log(`✅ Chunk ${chunkIndex + 1}: ${nodes.length} nodes, ${edges.length} edges imported`);
98
92
  }
99
93
  catch (chunkError) {
100
- console.error(`❌ Error processing chunk ${chunkIndex + 1}:`, chunkError);
101
94
  await debugLog('Chunk processing error', {
102
95
  chunkIndex: chunkIndex + 1,
103
96
  fileCount: chunk.length,
@@ -105,40 +98,24 @@ export class StreamingImportHandler {
105
98
  error: chunkError instanceof Error ? chunkError.message : String(chunkError),
106
99
  stack: chunkError instanceof Error ? chunkError.stack : undefined,
107
100
  });
108
- // Re-throw to fail the entire import - don't silently continue
109
101
  throw chunkError;
110
102
  }
111
- // Note: Don't clear parsed data during streaming - we need accumulated nodes for cross-chunk edge resolution
112
- // Memory usage is bounded because we only keep Neo4jNode references (not full AST)
113
103
  }
114
- // Phase 3: Resolve cross-chunk deferred edges
115
104
  await this.progressReporter.reportResolving(0, totalEdgesImported);
116
- console.log('\n🔗 Resolving cross-chunk edges...');
117
- const resolvedEdges = await parser.resolveDeferredEdgesManually();
105
+ const resolvedEdges = await parser.resolveDeferredEdges();
118
106
  if (resolvedEdges.length > 0) {
119
107
  await this.importEdgesToNeo4j(resolvedEdges);
120
108
  totalEdgesImported += resolvedEdges.length;
121
- console.log(`✅ Resolved ${resolvedEdges.length} cross-chunk edges`);
122
- }
123
- else {
124
- console.log('ℹ️ No cross-chunk edges to resolve');
109
+ await debugLog(`Resolved ${resolvedEdges.length} cross-chunk edges`);
125
110
  }
126
- // Phase 3b: Apply edge enhancements on all accumulated nodes
127
- // This catches context-dependent edges (like INTERNAL_API_CALL) that span chunks
128
- console.log('\n🔗 Applying edge enhancements on all nodes...');
129
111
  const enhancedEdges = await parser.applyEdgeEnhancementsManually();
130
112
  if (enhancedEdges.length > 0) {
131
113
  await this.importEdgesToNeo4j(enhancedEdges);
132
114
  totalEdgesImported += enhancedEdges.length;
133
- console.log(`✅ Created ${enhancedEdges.length} edges from edge enhancements`);
115
+ await debugLog(`Created ${enhancedEdges.length} edges from edge enhancements`);
134
116
  }
135
- else {
136
- console.log('ℹ️ No edges from edge enhancements');
137
- }
138
- // Clear accumulated data now that edge resolution is complete
139
117
  parser.clearParsedData();
140
118
  await this.progressReporter.reportResolving(resolvedEdges.length, resolvedEdges.length);
141
- // Phase 4: Complete
142
119
  const elapsedMs = Date.now() - startTime;
143
120
  await this.progressReporter.reportComplete(totalNodesImported, totalEdgesImported);
144
121
  const result = {
@@ -148,32 +125,17 @@ export class StreamingImportHandler {
148
125
  chunksProcessed: chunks.length,
149
126
  elapsedMs,
150
127
  };
151
- console.log(`\n🎉 Streaming import complete!`);
152
- console.log(` Files: ${allFilePaths.length}`);
153
- console.log(` Nodes: ${totalNodesImported}`);
154
- console.log(` Edges: ${totalEdgesImported}`);
155
- console.log(` Time: ${(elapsedMs / 1000).toFixed(2)}s`);
156
128
  await debugLog('Streaming import completed', result);
157
129
  return result;
158
130
  }
159
- /**
160
- * Import a chunk of nodes and edges to Neo4j using the graph generator handler
161
- */
162
131
  async importChunkToNeo4j(nodes, edges) {
163
- // Write to temporary JSON and use existing import mechanism
164
- // This reuses the batched embedding and import logic
165
132
  const tempPath = generateTempPath('chunk');
166
133
  const fs = await import('fs/promises');
167
134
  try {
168
- await fs.writeFile(tempPath, JSON.stringify({
169
- nodes,
170
- edges,
171
- metadata: { chunked: true },
172
- }));
135
+ await fs.writeFile(tempPath, JSON.stringify({ nodes, edges, metadata: { chunked: true } }));
173
136
  await this.graphGeneratorHandler.generateGraph(tempPath, DEFAULTS.batchSize, false);
174
137
  }
175
138
  finally {
176
- // Clean up temp file
177
139
  try {
178
140
  await fs.unlink(tempPath);
179
141
  }
@@ -182,20 +144,13 @@ export class StreamingImportHandler {
182
144
  }
183
145
  }
184
146
  }
185
- /**
186
- * Import resolved edges to Neo4j
187
- */
188
147
  async importEdgesToNeo4j(edges) {
189
148
  if (edges.length === 0)
190
149
  return;
191
150
  const tempPath = generateTempPath('edges');
192
151
  const fs = await import('fs/promises');
193
152
  try {
194
- await fs.writeFile(tempPath, JSON.stringify({
195
- nodes: [],
196
- edges,
197
- metadata: { edgesOnly: true },
198
- }));
153
+ await fs.writeFile(tempPath, JSON.stringify({ nodes: [], edges, metadata: { edgesOnly: true } }));
199
154
  await this.graphGeneratorHandler.generateGraph(tempPath, DEFAULTS.batchSize, false);
200
155
  }
201
156
  finally {
@@ -18,8 +18,8 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
18
18
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
19
19
  import { MCP_SERVER_CONFIG, MESSAGES } from './constants.js';
20
20
  import { performIncrementalParse } from './handlers/incremental-parse.handler.js';
21
- import { watchManager } from './services/watch-manager.js';
22
21
  import { initializeServices } from './service-init.js';
22
+ import { watchManager } from './services/watch-manager.js';
23
23
  import { registerAllTools } from './tools/index.js';
24
24
  import { debugLog } from './utils.js';
25
25
  /**