code-graph-context 2.0.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +156 -2
- package/dist/constants.js +167 -0
- package/dist/core/config/fairsquare-framework-schema.js +9 -7
- package/dist/core/config/nestjs-framework-schema.js +60 -43
- package/dist/core/config/schema.js +41 -2
- package/dist/core/embeddings/natural-language-to-cypher.service.js +166 -110
- package/dist/core/parsers/typescript-parser.js +1043 -747
- package/dist/core/parsers/workspace-parser.js +177 -194
- package/dist/core/utils/code-normalizer.js +299 -0
- package/dist/core/utils/file-change-detection.js +17 -2
- package/dist/core/utils/file-utils.js +40 -5
- package/dist/core/utils/graph-factory.js +161 -0
- package/dist/core/utils/shared-utils.js +79 -0
- package/dist/core/workspace/workspace-detector.js +59 -5
- package/dist/mcp/constants.js +141 -8
- package/dist/mcp/handlers/graph-generator.handler.js +1 -0
- package/dist/mcp/handlers/incremental-parse.handler.js +3 -6
- package/dist/mcp/handlers/parallel-import.handler.js +136 -0
- package/dist/mcp/handlers/streaming-import.handler.js +14 -59
- package/dist/mcp/mcp.server.js +1 -1
- package/dist/mcp/services/job-manager.js +5 -8
- package/dist/mcp/services/watch-manager.js +7 -18
- package/dist/mcp/tools/detect-dead-code.tool.js +413 -0
- package/dist/mcp/tools/detect-duplicate-code.tool.js +450 -0
- package/dist/mcp/tools/impact-analysis.tool.js +20 -4
- package/dist/mcp/tools/index.js +4 -0
- package/dist/mcp/tools/parse-typescript-project.tool.js +15 -14
- package/dist/mcp/workers/chunk-worker-pool.js +196 -0
- package/dist/mcp/workers/chunk-worker.types.js +4 -0
- package/dist/mcp/workers/chunk.worker.js +89 -0
- package/dist/mcp/workers/parse-coordinator.js +183 -0
- package/dist/mcp/workers/worker.pool.js +54 -0
- package/dist/storage/neo4j/neo4j.service.js +190 -10
- package/package.json +1 -1
|
@@ -44,6 +44,13 @@ export class WorkspaceDetector {
|
|
|
44
44
|
if (hasTurboJson) {
|
|
45
45
|
return 'turborepo';
|
|
46
46
|
}
|
|
47
|
+
// Check for Nx (has nx.json) - check before pnpm/npm since Nx can coexist with them
|
|
48
|
+
const nxJsonPath = path.join(rootPath, 'nx.json');
|
|
49
|
+
const hasNxJson = await this.fileExists(nxJsonPath);
|
|
50
|
+
await debugLog('Checking for nx.json', { path: nxJsonPath, exists: hasNxJson });
|
|
51
|
+
if (hasNxJson) {
|
|
52
|
+
return 'nx';
|
|
53
|
+
}
|
|
47
54
|
// Check for pnpm workspaces (has pnpm-workspace.yaml)
|
|
48
55
|
const pnpmWorkspacePath = path.join(rootPath, 'pnpm-workspace.yaml');
|
|
49
56
|
const hasPnpmWorkspace = await this.fileExists(pnpmWorkspacePath);
|
|
@@ -99,6 +106,43 @@ export class WorkspaceDetector {
|
|
|
99
106
|
// Turborepo default patterns
|
|
100
107
|
return ['apps/*', 'packages/*'];
|
|
101
108
|
}
|
|
109
|
+
case 'nx': {
|
|
110
|
+
// For Nx, scan for all project.json files to find all projects
|
|
111
|
+
// This is more reliable than workspaces since Nx projects may not be in package.json workspaces
|
|
112
|
+
const projectJsonFiles = await glob('**/project.json', {
|
|
113
|
+
cwd: rootPath,
|
|
114
|
+
ignore: ['**/node_modules/**', '**/dist/**', '**/build/**'],
|
|
115
|
+
absolute: false,
|
|
116
|
+
});
|
|
117
|
+
// Extract unique parent directories (the project roots)
|
|
118
|
+
const projectDirs = new Set();
|
|
119
|
+
for (const projectJsonPath of projectJsonFiles) {
|
|
120
|
+
const projectDir = path.dirname(projectJsonPath);
|
|
121
|
+
if (projectDir !== '.') {
|
|
122
|
+
projectDirs.add(projectDir);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
if (projectDirs.size > 0) {
|
|
126
|
+
await debugLog('Found Nx projects via project.json scan', { count: projectDirs.size });
|
|
127
|
+
return Array.from(projectDirs);
|
|
128
|
+
}
|
|
129
|
+
// Fallback to package.json workspaces if no project.json files found
|
|
130
|
+
const packageJsonPath = path.join(rootPath, 'package.json');
|
|
131
|
+
try {
|
|
132
|
+
const packageJson = JSON.parse(await fs.readFile(packageJsonPath, 'utf-8'));
|
|
133
|
+
if (Array.isArray(packageJson.workspaces)) {
|
|
134
|
+
return packageJson.workspaces;
|
|
135
|
+
}
|
|
136
|
+
if (packageJson.workspaces?.packages) {
|
|
137
|
+
return packageJson.workspaces.packages;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
catch {
|
|
141
|
+
// Fall through to defaults
|
|
142
|
+
}
|
|
143
|
+
// Nx default patterns
|
|
144
|
+
return ['apps/*', 'libs/*', 'packages/*', 'src/*/*'];
|
|
145
|
+
}
|
|
102
146
|
case 'yarn':
|
|
103
147
|
case 'npm': {
|
|
104
148
|
// Read from package.json workspaces
|
|
@@ -151,16 +195,26 @@ export class WorkspaceDetector {
|
|
|
151
195
|
if (seenPaths.has(packagePath))
|
|
152
196
|
continue;
|
|
153
197
|
seenPaths.add(packagePath);
|
|
154
|
-
// Check if this is a valid package (has package.json)
|
|
198
|
+
// Check if this is a valid package (has package.json) or Nx project (has project.json)
|
|
155
199
|
const packageJsonPath = path.join(packagePath, 'package.json');
|
|
156
|
-
|
|
200
|
+
const projectJsonPath = path.join(packagePath, 'project.json');
|
|
201
|
+
const hasPackageJson = await this.fileExists(packageJsonPath);
|
|
202
|
+
const hasProjectJson = await this.fileExists(projectJsonPath);
|
|
203
|
+
if (!hasPackageJson && !hasProjectJson) {
|
|
157
204
|
continue;
|
|
158
205
|
}
|
|
159
|
-
// Read package name
|
|
206
|
+
// Read package/project name
|
|
160
207
|
let packageName;
|
|
161
208
|
try {
|
|
162
|
-
|
|
163
|
-
|
|
209
|
+
if (hasPackageJson) {
|
|
210
|
+
const packageJson = JSON.parse(await fs.readFile(packageJsonPath, 'utf-8'));
|
|
211
|
+
packageName = packageJson.name ?? path.basename(packagePath);
|
|
212
|
+
}
|
|
213
|
+
else {
|
|
214
|
+
// Nx project.json - try to read name from it
|
|
215
|
+
const projectJson = JSON.parse(await fs.readFile(projectJsonPath, 'utf-8'));
|
|
216
|
+
packageName = projectJson.name ?? path.basename(packagePath);
|
|
217
|
+
}
|
|
164
218
|
}
|
|
165
219
|
catch {
|
|
166
220
|
packageName = path.basename(packagePath);
|
package/dist/mcp/constants.js
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
* MCP Server Constants
|
|
3
3
|
* All constants used throughout the MCP server implementation
|
|
4
4
|
*/
|
|
5
|
+
// Re-export shared constants
|
|
6
|
+
export { LOG_CONFIG } from '../constants.js';
|
|
5
7
|
// Server Configuration
|
|
6
8
|
export const MCP_SERVER_CONFIG = {
|
|
7
9
|
name: 'codebase-graph',
|
|
@@ -27,6 +29,8 @@ export const TOOL_NAMES = {
|
|
|
27
29
|
startWatchProject: 'start_watch_project',
|
|
28
30
|
stopWatchProject: 'stop_watch_project',
|
|
29
31
|
listWatchers: 'list_watchers',
|
|
32
|
+
detectDeadCode: 'detect_dead_code',
|
|
33
|
+
detectDuplicateCode: 'detect_duplicate_code',
|
|
30
34
|
};
|
|
31
35
|
// Tool Metadata
|
|
32
36
|
export const TOOL_METADATA = {
|
|
@@ -93,10 +97,28 @@ Use list_projects to see available projects and get the project name.
|
|
|
93
97
|
|
|
94
98
|
**Tips:**
|
|
95
99
|
- Import nodes store file paths, not module names (use 'path containing X')
|
|
96
|
-
- Node types: SourceFile,
|
|
97
|
-
- Relationships: CONTAINS, IMPORTS,
|
|
100
|
+
- Node types: SourceFile, Class, Function, Method, Interface, Property, Parameter, Constructor, Import, Export, Decorator, Enum, Variable, TypeAlias
|
|
101
|
+
- Relationships: CONTAINS, IMPORTS, EXTENDS, IMPLEMENTS, HAS_MEMBER, HAS_PARAMETER, TYPED_AS, CALLS, DECORATED_WITH
|
|
98
102
|
- For NestJS, use semanticType property instead of decorators (e.g., semanticType = 'NestController')
|
|
99
103
|
|
|
104
|
+
**Relationships (Core):**
|
|
105
|
+
- CONTAINS: File/class contains members
|
|
106
|
+
- HAS_MEMBER: Class/interface has methods/properties
|
|
107
|
+
- HAS_PARAMETER: Method/function has parameters
|
|
108
|
+
- IMPORTS: SourceFile imports another
|
|
109
|
+
- EXPORTS: SourceFile exports items
|
|
110
|
+
- EXTENDS: Class/interface extends another
|
|
111
|
+
- IMPLEMENTS: Class implements interface(s)
|
|
112
|
+
- CALLS: Method/function calls another
|
|
113
|
+
- TYPED_AS: Parameter/property has type annotation
|
|
114
|
+
- DECORATED_WITH: Node has decorators
|
|
115
|
+
|
|
116
|
+
**Relationships (NestJS/Framework):**
|
|
117
|
+
- INJECTS: Service/controller injects dependency
|
|
118
|
+
- EXPOSES: Controller exposes HTTP endpoints
|
|
119
|
+
- MODULE_IMPORTS, MODULE_PROVIDES, MODULE_EXPORTS: NestJS module system
|
|
120
|
+
- GUARDED_BY, TRANSFORMED_BY, INTERCEPTED_BY: Security/middleware
|
|
121
|
+
|
|
100
122
|
**Query Phrasing:**
|
|
101
123
|
Phrase queries using properties known to exist (filePath, name) rather than abstract concepts:
|
|
102
124
|
- Use "in account folder" or "filePath contains /account/" instead of "in account module"
|
|
@@ -252,6 +274,86 @@ Returns information about each watcher:
|
|
|
252
274
|
|
|
253
275
|
Use stop_watch_project to stop a watcher.`,
|
|
254
276
|
},
|
|
277
|
+
[TOOL_NAMES.detectDeadCode]: {
|
|
278
|
+
title: 'Detect Dead Code',
|
|
279
|
+
description: `Identify potentially unused code in the codebase including exports never imported, private methods never called, and orphan interfaces.
|
|
280
|
+
|
|
281
|
+
**Before analyzing:**
|
|
282
|
+
Use list_projects to see available projects and get the project name.
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
- Risk level (LOW/MEDIUM/HIGH/CRITICAL) based on dead code count
|
|
286
|
+
- Dead code items with confidence levels (HIGH/MEDIUM/LOW) and categories
|
|
287
|
+
- Grouped by type (methods, classes, interfaces, etc.)
|
|
288
|
+
- Grouped by category (library-export, ui-component, internal-unused)
|
|
289
|
+
- Affected files list
|
|
290
|
+
- Excluded entry points for audit (controllers, modules, etc.)
|
|
291
|
+
|
|
292
|
+
Parameters:
|
|
293
|
+
- projectId: Project name, path, or ID (required)
|
|
294
|
+
- excludePatterns: Additional file patterns to exclude (e.g., ["*.config.ts", "*.seed.ts"])
|
|
295
|
+
- excludeSemanticTypes: Additional semantic types to exclude (e.g., ["EntityClass", "DTOClass"])
|
|
296
|
+
- excludeLibraryExports: Exclude all items from packages/* directories (default: false)
|
|
297
|
+
- excludeCoreTypes: Exclude specific AST types (e.g., ["InterfaceDeclaration", "EnumDeclaration"])
|
|
298
|
+
- includeEntryPoints: Include excluded entry points in audit section (default: true)
|
|
299
|
+
- minConfidence: Minimum confidence to include (LOW/MEDIUM/HIGH, default: LOW)
|
|
300
|
+
- filterCategory: Filter by category (library-export, ui-component, internal-unused, all) (default: all)
|
|
301
|
+
- summaryOnly: Return only statistics without full dead code list (default: false)
|
|
302
|
+
- limit: Maximum items per page (default: 100, max: 500)
|
|
303
|
+
- offset: Number of items to skip for pagination (default: 0)
|
|
304
|
+
|
|
305
|
+
**Categories:**
|
|
306
|
+
- library-export: Exports from packages/* directories (may be used by external consumers)
|
|
307
|
+
- ui-component: Exports from components/ui/* (component library, intentionally broad API)
|
|
308
|
+
- internal-unused: Regular internal code that appears unused
|
|
309
|
+
|
|
310
|
+
**Auto-excluded entry points:**
|
|
311
|
+
- Semantic types: NestController, NestModule, NestGuard, NestPipe, NestInterceptor, NestFilter, NestProvider, NestService, HttpEndpoint
|
|
312
|
+
- File patterns: main.ts, *.module.ts, *.controller.ts, index.ts
|
|
313
|
+
|
|
314
|
+
**Confidence levels:**
|
|
315
|
+
- HIGH: Exported but never imported or referenced
|
|
316
|
+
- MEDIUM: Private with no internal calls
|
|
317
|
+
- LOW: Could be used dynamically
|
|
318
|
+
|
|
319
|
+
Use filterCategory=internal-unused for actionable dead code cleanup.`,
|
|
320
|
+
},
|
|
321
|
+
[TOOL_NAMES.detectDuplicateCode]: {
|
|
322
|
+
title: 'Detect Duplicate Code',
|
|
323
|
+
description: `Find duplicate code patterns using structural (AST hash) and semantic (embedding similarity) analysis.
|
|
324
|
+
|
|
325
|
+
**Before analyzing:**
|
|
326
|
+
Use list_projects to see available projects and get the project name.
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
- Duplicate groups with similarity scores
|
|
330
|
+
- Confidence levels (HIGH/MEDIUM/LOW)
|
|
331
|
+
- Grouped by detection type (structural, semantic)
|
|
332
|
+
- Recommendations for each duplicate group
|
|
333
|
+
- Affected files list
|
|
334
|
+
|
|
335
|
+
Parameters:
|
|
336
|
+
- projectId: Project name, path, or ID (required)
|
|
337
|
+
- type: Detection approach - "structural", "semantic", or "all" (default: all)
|
|
338
|
+
- minSimilarity: Minimum similarity for semantic duplicates (0.5-1.0, default: 0.80)
|
|
339
|
+
- includeCode: Include source code snippets (default: false)
|
|
340
|
+
- maxResults: Maximum duplicate groups per page (default: 20, max: 100)
|
|
341
|
+
- scope: Node types to analyze - "methods", "functions", "classes", or "all" (default: all)
|
|
342
|
+
- summaryOnly: Return only statistics without full duplicates list (default: false)
|
|
343
|
+
- offset: Number of groups to skip for pagination (default: 0)
|
|
344
|
+
|
|
345
|
+
**Detection Types:**
|
|
346
|
+
- structural: Finds exact duplicates by normalized code hash (ignores formatting, variable names, literals)
|
|
347
|
+
- semantic: Finds similar code using embedding similarity (catches different implementations of same logic)
|
|
348
|
+
- all: Runs both detection types
|
|
349
|
+
|
|
350
|
+
**Similarity Thresholds:**
|
|
351
|
+
- 0.90+: Very high similarity, almost certainly duplicates
|
|
352
|
+
- 0.85-0.90: High similarity, likely duplicates with minor variations
|
|
353
|
+
- 0.80-0.85: Moderate similarity, worth reviewing
|
|
354
|
+
|
|
355
|
+
Use this to identify refactoring opportunities and reduce code duplication.`,
|
|
356
|
+
},
|
|
255
357
|
};
|
|
256
358
|
// Default Values
|
|
257
359
|
export const DEFAULTS = {
|
|
@@ -263,6 +365,43 @@ export const DEFAULTS = {
|
|
|
263
365
|
chainSnippetLength: 700,
|
|
264
366
|
maxEmbeddingChars: 30000, // ~7500 tokens, under 8192 limit for text-embedding-3-large
|
|
265
367
|
};
|
|
368
|
+
// Parsing Configuration
|
|
369
|
+
export const PARSING = {
|
|
370
|
+
/** File count threshold to trigger parallel parsing with worker pool */
|
|
371
|
+
parallelThreshold: 500,
|
|
372
|
+
/** File count threshold to trigger streaming import */
|
|
373
|
+
streamingThreshold: 100,
|
|
374
|
+
/** Default number of files per chunk */
|
|
375
|
+
defaultChunkSize: 100,
|
|
376
|
+
/** Worker timeout in milliseconds (30 minutes) */
|
|
377
|
+
workerTimeoutMs: 30 * 60 * 1000,
|
|
378
|
+
};
|
|
379
|
+
// Job Management
|
|
380
|
+
export const JOBS = {
|
|
381
|
+
/** Interval for cleaning up completed/stale jobs (5 minutes) */
|
|
382
|
+
cleanupIntervalMs: 5 * 60 * 1000,
|
|
383
|
+
/** Maximum number of jobs to keep in memory */
|
|
384
|
+
maxJobs: 100,
|
|
385
|
+
};
|
|
386
|
+
// Watch Mode Configuration
|
|
387
|
+
export const WATCH = {
|
|
388
|
+
/** Default debounce delay before processing file changes */
|
|
389
|
+
defaultDebounceMs: 1000,
|
|
390
|
+
/** Maximum concurrent file watchers */
|
|
391
|
+
maxWatchers: 10,
|
|
392
|
+
/** Maximum pending file change events before dropping */
|
|
393
|
+
maxPendingEvents: 1000,
|
|
394
|
+
/** Default exclude patterns for file watching */
|
|
395
|
+
excludePatterns: [
|
|
396
|
+
'**/node_modules/**',
|
|
397
|
+
'**/dist/**',
|
|
398
|
+
'**/build/**',
|
|
399
|
+
'**/.git/**',
|
|
400
|
+
'**/*.d.ts',
|
|
401
|
+
'**/*.js',
|
|
402
|
+
'**/*.map',
|
|
403
|
+
],
|
|
404
|
+
};
|
|
266
405
|
// Messages
|
|
267
406
|
export const MESSAGES = {
|
|
268
407
|
errors: {
|
|
@@ -298,9 +437,3 @@ export const MESSAGES = {
|
|
|
298
437
|
startingServer: 'Starting MCP server...',
|
|
299
438
|
},
|
|
300
439
|
};
|
|
301
|
-
// Logging Configuration
|
|
302
|
-
export const LOG_CONFIG = {
|
|
303
|
-
timestampFormat: 'iso',
|
|
304
|
-
logSeparator: '---',
|
|
305
|
-
jsonIndentation: 2,
|
|
306
|
-
};
|
|
@@ -73,6 +73,7 @@ export class GraphGeneratorHandler {
|
|
|
73
73
|
await this.neo4jService.run(QUERIES.CREATE_PROJECT_INDEX_SOURCEFILE);
|
|
74
74
|
await this.neo4jService.run(QUERIES.CREATE_PROJECT_ID_INDEX_EMBEDDED);
|
|
75
75
|
await this.neo4jService.run(QUERIES.CREATE_PROJECT_ID_INDEX_SOURCEFILE);
|
|
76
|
+
await this.neo4jService.run(QUERIES.CREATE_NORMALIZED_HASH_INDEX);
|
|
76
77
|
await debugLog('Project indexes created');
|
|
77
78
|
}
|
|
78
79
|
async importNodes(nodes, batchSize) {
|
|
@@ -23,7 +23,7 @@ export const performIncrementalParse = async (projectPath, projectId, tsconfigPa
|
|
|
23
23
|
const embeddingsService = new EmbeddingsService();
|
|
24
24
|
const graphHandler = new GraphGeneratorHandler(neo4jService, embeddingsService);
|
|
25
25
|
try {
|
|
26
|
-
await debugLog('Incremental parse started
|
|
26
|
+
await debugLog('Incremental parse started', { projectPath, projectId });
|
|
27
27
|
// Resolve project ID
|
|
28
28
|
const resolvedId = resolveProjectId(projectPath, projectId);
|
|
29
29
|
const projectName = await getProjectName(projectPath);
|
|
@@ -31,13 +31,12 @@ export const performIncrementalParse = async (projectPath, projectId, tsconfigPa
|
|
|
31
31
|
const parser = await ParserFactory.createParserWithAutoDetection(projectPath, tsconfigPath, resolvedId, true);
|
|
32
32
|
// Detect changed files
|
|
33
33
|
const { filesToReparse, filesToDelete } = await detectChangedFiles(projectPath, neo4jService, resolvedId);
|
|
34
|
-
await debugLog('
|
|
34
|
+
await debugLog('Incremental change detection', {
|
|
35
35
|
filesToReparse: filesToReparse.length,
|
|
36
36
|
filesToDelete: filesToDelete.length,
|
|
37
37
|
});
|
|
38
38
|
// If no changes, return early
|
|
39
39
|
if (filesToReparse.length === 0 && filesToDelete.length === 0) {
|
|
40
|
-
await debugLog('Watch incremental: no changes detected');
|
|
41
40
|
return {
|
|
42
41
|
nodesUpdated: 0,
|
|
43
42
|
edgesUpdated: 0,
|
|
@@ -50,7 +49,6 @@ export const performIncrementalParse = async (projectPath, projectId, tsconfigPa
|
|
|
50
49
|
if (filesToRemoveFromGraph.length > 0) {
|
|
51
50
|
// Save cross-file edges before deletion
|
|
52
51
|
savedCrossFileEdges = await getCrossFileEdges(neo4jService, filesToRemoveFromGraph, resolvedId);
|
|
53
|
-
await debugLog('Watch: saved cross-file edges', { count: savedCrossFileEdges.length });
|
|
54
52
|
// Delete old subgraphs
|
|
55
53
|
await deleteSourceFileSubgraphs(neo4jService, filesToRemoveFromGraph, resolvedId);
|
|
56
54
|
}
|
|
@@ -124,10 +122,9 @@ export const performIncrementalParse = async (projectPath, projectId, tsconfigPa
|
|
|
124
122
|
? Number(firstResult.recreatedCount) || 0
|
|
125
123
|
: 0;
|
|
126
124
|
edgesImported += recreatedCount;
|
|
127
|
-
await debugLog('Watch: cross-file edges recreated', { recreatedCount });
|
|
128
125
|
}
|
|
129
126
|
}
|
|
130
|
-
await debugLog('
|
|
127
|
+
await debugLog('Incremental parse completed', {
|
|
131
128
|
nodesImported,
|
|
132
129
|
edgesImported,
|
|
133
130
|
filesReparsed: filesToReparse.length,
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parallel Import Handler
|
|
3
|
+
* Orchestrates parallel chunk parsing using a worker pool with pipelined import.
|
|
4
|
+
* Used for large codebases (>= PARSING.parallelThreshold files).
|
|
5
|
+
*/
|
|
6
|
+
import { join } from 'path';
|
|
7
|
+
import { ProgressReporter } from '../../core/utils/progress-reporter.js';
|
|
8
|
+
import { debugLog } from '../utils.js';
|
|
9
|
+
import { ChunkWorkerPool } from '../workers/chunk-worker-pool.js';
|
|
10
|
+
export class ParallelImportHandler {
|
|
11
|
+
graphGeneratorHandler;
|
|
12
|
+
progressReporter;
|
|
13
|
+
constructor(graphGeneratorHandler) {
|
|
14
|
+
this.graphGeneratorHandler = graphGeneratorHandler;
|
|
15
|
+
this.progressReporter = new ProgressReporter();
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Import a project using parallel worker pool with pipelined import.
|
|
19
|
+
* Chunks are distributed to workers, and imports happen as chunks complete.
|
|
20
|
+
*/
|
|
21
|
+
async importProjectParallel(parser, sourceFiles, config) {
|
|
22
|
+
const startTime = Date.now();
|
|
23
|
+
if (config.onProgress) {
|
|
24
|
+
this.progressReporter.setCallback(config.onProgress);
|
|
25
|
+
}
|
|
26
|
+
const totalFiles = sourceFiles.length;
|
|
27
|
+
let totalNodesImported = 0;
|
|
28
|
+
let totalEdgesImported = 0;
|
|
29
|
+
const chunks = this.createChunks(sourceFiles, config.chunkSize);
|
|
30
|
+
this.progressReporter.report({
|
|
31
|
+
phase: 'parsing',
|
|
32
|
+
current: 0,
|
|
33
|
+
total: totalFiles,
|
|
34
|
+
message: `Starting parallel parse of ${totalFiles} files in ${chunks.length} chunks`,
|
|
35
|
+
details: { chunkIndex: 0, totalChunks: chunks.length },
|
|
36
|
+
});
|
|
37
|
+
await debugLog('Using parallel chunk workers', { totalFiles, chunkCount: chunks.length });
|
|
38
|
+
const pool = new ChunkWorkerPool({
|
|
39
|
+
projectPath: config.projectPath,
|
|
40
|
+
tsconfigPath: config.tsconfigPath,
|
|
41
|
+
projectId: config.projectId,
|
|
42
|
+
projectType: config.projectType,
|
|
43
|
+
});
|
|
44
|
+
// Pipelined: import starts as soon as each chunk completes parsing
|
|
45
|
+
const poolResult = await pool.processChunks(chunks, async (result, stats) => {
|
|
46
|
+
await this.importToNeo4j(result.nodes, result.edges);
|
|
47
|
+
totalNodesImported += result.nodes.length;
|
|
48
|
+
totalEdgesImported += result.edges.length;
|
|
49
|
+
// Accumulate nodes for cross-chunk edge resolution
|
|
50
|
+
parser.addParsedNodesFromChunk(result.nodes);
|
|
51
|
+
// Merge shared context from workers for enabling cross-chunk references
|
|
52
|
+
if (result.sharedContext && result.sharedContext.length > 0) {
|
|
53
|
+
parser.mergeSerializedSharedContext(result.sharedContext);
|
|
54
|
+
}
|
|
55
|
+
// Collect deferred edges for resolution after all chunks complete
|
|
56
|
+
if (result.deferredEdges && result.deferredEdges.length > 0) {
|
|
57
|
+
parser.mergeDeferredEdges(result.deferredEdges);
|
|
58
|
+
}
|
|
59
|
+
this.progressReporter.report({
|
|
60
|
+
phase: 'parsing',
|
|
61
|
+
current: stats.chunksCompleted * config.chunkSize,
|
|
62
|
+
total: totalFiles,
|
|
63
|
+
message: `Chunk ${stats.chunksCompleted}/${stats.totalChunks}: ${totalNodesImported} nodes, ${totalEdgesImported} edges`,
|
|
64
|
+
details: {
|
|
65
|
+
nodesCreated: totalNodesImported,
|
|
66
|
+
edgesCreated: totalEdgesImported,
|
|
67
|
+
chunkIndex: stats.chunksCompleted,
|
|
68
|
+
totalChunks: stats.totalChunks,
|
|
69
|
+
},
|
|
70
|
+
});
|
|
71
|
+
debugLog(`Chunk ${result.chunkIndex + 1}/${stats.totalChunks}: ${result.nodes.length} nodes, ${result.edges.length} edges (imported)`);
|
|
72
|
+
});
|
|
73
|
+
debugLog(`Parallel parse+import complete: ${poolResult.totalNodes} nodes, ${poolResult.totalEdges} edges in ${poolResult.elapsedMs}ms`);
|
|
74
|
+
this.progressReporter.report({
|
|
75
|
+
phase: 'resolving',
|
|
76
|
+
current: totalFiles,
|
|
77
|
+
total: totalFiles,
|
|
78
|
+
message: 'Resolving cross-chunk edges',
|
|
79
|
+
details: {
|
|
80
|
+
nodesCreated: totalNodesImported,
|
|
81
|
+
edgesCreated: totalEdgesImported,
|
|
82
|
+
chunkIndex: chunks.length,
|
|
83
|
+
totalChunks: chunks.length,
|
|
84
|
+
},
|
|
85
|
+
});
|
|
86
|
+
const resolvedEdges = await parser.resolveDeferredEdges();
|
|
87
|
+
if (resolvedEdges.length > 0) {
|
|
88
|
+
await this.importToNeo4j([], resolvedEdges);
|
|
89
|
+
totalEdgesImported += resolvedEdges.length;
|
|
90
|
+
await debugLog(`Resolved ${resolvedEdges.length} cross-chunk edges`);
|
|
91
|
+
}
|
|
92
|
+
parser.loadFrameworkSchemasForType(config.projectType);
|
|
93
|
+
const enhancedEdges = await parser.applyEdgeEnhancementsManually();
|
|
94
|
+
if (enhancedEdges.length > 0) {
|
|
95
|
+
await this.importToNeo4j([], enhancedEdges);
|
|
96
|
+
totalEdgesImported += enhancedEdges.length;
|
|
97
|
+
await debugLog(`Created ${enhancedEdges.length} edges from enhancements`);
|
|
98
|
+
}
|
|
99
|
+
parser.clearParsedData();
|
|
100
|
+
const elapsedMs = Date.now() - startTime;
|
|
101
|
+
return {
|
|
102
|
+
nodesImported: totalNodesImported,
|
|
103
|
+
edgesImported: totalEdgesImported,
|
|
104
|
+
filesProcessed: totalFiles,
|
|
105
|
+
chunksProcessed: chunks.length,
|
|
106
|
+
elapsedMs,
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
createChunks(files, chunkSize) {
|
|
110
|
+
const chunks = [];
|
|
111
|
+
for (let i = 0; i < files.length; i += chunkSize) {
|
|
112
|
+
chunks.push(files.slice(i, i + chunkSize));
|
|
113
|
+
}
|
|
114
|
+
return chunks;
|
|
115
|
+
}
|
|
116
|
+
async importToNeo4j(nodes, edges) {
|
|
117
|
+
if (nodes.length === 0 && edges.length === 0)
|
|
118
|
+
return;
|
|
119
|
+
const fs = await import('fs/promises');
|
|
120
|
+
const { randomBytes } = await import('crypto');
|
|
121
|
+
const { tmpdir } = await import('os');
|
|
122
|
+
const tempPath = join(tmpdir(), `chunk-${Date.now()}-${randomBytes(8).toString('hex')}.json`);
|
|
123
|
+
try {
|
|
124
|
+
await fs.writeFile(tempPath, JSON.stringify({ nodes, edges, metadata: { parallel: true } }));
|
|
125
|
+
await this.graphGeneratorHandler.generateGraph(tempPath, 100, false);
|
|
126
|
+
}
|
|
127
|
+
finally {
|
|
128
|
+
try {
|
|
129
|
+
await fs.unlink(tempPath);
|
|
130
|
+
}
|
|
131
|
+
catch {
|
|
132
|
+
// Ignore cleanup errors
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
@@ -33,36 +33,33 @@ export class StreamingImportHandler {
|
|
|
33
33
|
if (config.onProgress) {
|
|
34
34
|
this.progressReporter.setCallback(config.onProgress);
|
|
35
35
|
}
|
|
36
|
-
// Set project ID on graph generator
|
|
37
|
-
this.graphGeneratorHandler.setProjectId(config.projectId);
|
|
38
|
-
// Phase 1: Get discovered files (already discovered by worker, this returns cached result)
|
|
39
36
|
const allFilePaths = await parser.discoverSourceFiles();
|
|
40
|
-
console.log(`📁 Found ${allFilePaths.length} files to parse`);
|
|
41
37
|
await debugLog('Streaming import started', {
|
|
42
38
|
totalFiles: allFilePaths.length,
|
|
43
39
|
chunkSize: config.chunkSize,
|
|
44
40
|
});
|
|
45
|
-
|
|
41
|
+
this.progressReporter.report({
|
|
42
|
+
phase: 'parsing',
|
|
43
|
+
current: 0,
|
|
44
|
+
total: allFilePaths.length,
|
|
45
|
+
message: `Starting streaming import of ${allFilePaths.length} files in chunks of ~${config.chunkSize}`,
|
|
46
|
+
});
|
|
46
47
|
const chunks = [];
|
|
47
48
|
for (let i = 0; i < allFilePaths.length; i += config.chunkSize) {
|
|
48
49
|
chunks.push(allFilePaths.slice(i, i + config.chunkSize));
|
|
49
50
|
}
|
|
50
|
-
console.log(`📦 Split into ${chunks.length} chunks of ~${config.chunkSize} files each`);
|
|
51
51
|
let totalNodesImported = 0;
|
|
52
52
|
let totalEdgesImported = 0;
|
|
53
|
-
// Phase 2: Parse and import chunks
|
|
54
53
|
for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) {
|
|
55
54
|
const chunk = chunks[chunkIndex];
|
|
56
55
|
const filesProcessed = chunkIndex * config.chunkSize + chunk.length;
|
|
57
|
-
console.log(`\n🔄 Processing chunk ${chunkIndex + 1}/${chunks.length} (${chunk.length} files)`);
|
|
58
56
|
try {
|
|
59
|
-
//
|
|
57
|
+
// Skip edge resolution during chunk parsing - resolve after all chunks complete
|
|
60
58
|
const { nodes, edges } = await parser.parseChunk(chunk, true);
|
|
61
|
-
//
|
|
59
|
+
// Accumulate nodes for cross-chunk edge resolution
|
|
62
60
|
parser.addExistingNodesFromChunk(nodes);
|
|
63
|
-
// Import to Neo4j if we have data
|
|
64
61
|
if (nodes.length > 0 || edges.length > 0) {
|
|
65
|
-
await debugLog('Importing chunk
|
|
62
|
+
await debugLog('Importing chunk', {
|
|
66
63
|
chunkIndex: chunkIndex + 1,
|
|
67
64
|
totalChunks: chunks.length,
|
|
68
65
|
nodeCount: nodes.length,
|
|
@@ -72,14 +69,12 @@ export class StreamingImportHandler {
|
|
|
72
69
|
totalEdgesImported += edges.length;
|
|
73
70
|
}
|
|
74
71
|
else {
|
|
75
|
-
console.warn(`⚠️ Chunk ${chunkIndex + 1} produced 0 nodes/edges from ${chunk.length} files`);
|
|
76
72
|
await debugLog('Empty chunk result', {
|
|
77
73
|
chunkIndex: chunkIndex + 1,
|
|
78
74
|
fileCount: chunk.length,
|
|
79
75
|
sampleFiles: chunk.slice(0, 3),
|
|
80
76
|
});
|
|
81
77
|
}
|
|
82
|
-
// Report progress with all relevant data
|
|
83
78
|
await this.progressReporter.report({
|
|
84
79
|
phase: 'importing',
|
|
85
80
|
current: filesProcessed,
|
|
@@ -94,10 +89,8 @@ export class StreamingImportHandler {
|
|
|
94
89
|
totalChunks: chunks.length,
|
|
95
90
|
},
|
|
96
91
|
});
|
|
97
|
-
console.log(`✅ Chunk ${chunkIndex + 1}: ${nodes.length} nodes, ${edges.length} edges imported`);
|
|
98
92
|
}
|
|
99
93
|
catch (chunkError) {
|
|
100
|
-
console.error(`❌ Error processing chunk ${chunkIndex + 1}:`, chunkError);
|
|
101
94
|
await debugLog('Chunk processing error', {
|
|
102
95
|
chunkIndex: chunkIndex + 1,
|
|
103
96
|
fileCount: chunk.length,
|
|
@@ -105,40 +98,24 @@ export class StreamingImportHandler {
|
|
|
105
98
|
error: chunkError instanceof Error ? chunkError.message : String(chunkError),
|
|
106
99
|
stack: chunkError instanceof Error ? chunkError.stack : undefined,
|
|
107
100
|
});
|
|
108
|
-
// Re-throw to fail the entire import - don't silently continue
|
|
109
101
|
throw chunkError;
|
|
110
102
|
}
|
|
111
|
-
// Note: Don't clear parsed data during streaming - we need accumulated nodes for cross-chunk edge resolution
|
|
112
|
-
// Memory usage is bounded because we only keep Neo4jNode references (not full AST)
|
|
113
103
|
}
|
|
114
|
-
// Phase 3: Resolve cross-chunk deferred edges
|
|
115
104
|
await this.progressReporter.reportResolving(0, totalEdgesImported);
|
|
116
|
-
|
|
117
|
-
const resolvedEdges = await parser.resolveDeferredEdgesManually();
|
|
105
|
+
const resolvedEdges = await parser.resolveDeferredEdges();
|
|
118
106
|
if (resolvedEdges.length > 0) {
|
|
119
107
|
await this.importEdgesToNeo4j(resolvedEdges);
|
|
120
108
|
totalEdgesImported += resolvedEdges.length;
|
|
121
|
-
|
|
122
|
-
}
|
|
123
|
-
else {
|
|
124
|
-
console.log('ℹ️ No cross-chunk edges to resolve');
|
|
109
|
+
await debugLog(`Resolved ${resolvedEdges.length} cross-chunk edges`);
|
|
125
110
|
}
|
|
126
|
-
// Phase 3b: Apply edge enhancements on all accumulated nodes
|
|
127
|
-
// This catches context-dependent edges (like INTERNAL_API_CALL) that span chunks
|
|
128
|
-
console.log('\n🔗 Applying edge enhancements on all nodes...');
|
|
129
111
|
const enhancedEdges = await parser.applyEdgeEnhancementsManually();
|
|
130
112
|
if (enhancedEdges.length > 0) {
|
|
131
113
|
await this.importEdgesToNeo4j(enhancedEdges);
|
|
132
114
|
totalEdgesImported += enhancedEdges.length;
|
|
133
|
-
|
|
115
|
+
await debugLog(`Created ${enhancedEdges.length} edges from edge enhancements`);
|
|
134
116
|
}
|
|
135
|
-
else {
|
|
136
|
-
console.log('ℹ️ No edges from edge enhancements');
|
|
137
|
-
}
|
|
138
|
-
// Clear accumulated data now that edge resolution is complete
|
|
139
117
|
parser.clearParsedData();
|
|
140
118
|
await this.progressReporter.reportResolving(resolvedEdges.length, resolvedEdges.length);
|
|
141
|
-
// Phase 4: Complete
|
|
142
119
|
const elapsedMs = Date.now() - startTime;
|
|
143
120
|
await this.progressReporter.reportComplete(totalNodesImported, totalEdgesImported);
|
|
144
121
|
const result = {
|
|
@@ -148,32 +125,17 @@ export class StreamingImportHandler {
|
|
|
148
125
|
chunksProcessed: chunks.length,
|
|
149
126
|
elapsedMs,
|
|
150
127
|
};
|
|
151
|
-
console.log(`\n🎉 Streaming import complete!`);
|
|
152
|
-
console.log(` Files: ${allFilePaths.length}`);
|
|
153
|
-
console.log(` Nodes: ${totalNodesImported}`);
|
|
154
|
-
console.log(` Edges: ${totalEdgesImported}`);
|
|
155
|
-
console.log(` Time: ${(elapsedMs / 1000).toFixed(2)}s`);
|
|
156
128
|
await debugLog('Streaming import completed', result);
|
|
157
129
|
return result;
|
|
158
130
|
}
|
|
159
|
-
/**
|
|
160
|
-
* Import a chunk of nodes and edges to Neo4j using the graph generator handler
|
|
161
|
-
*/
|
|
162
131
|
async importChunkToNeo4j(nodes, edges) {
|
|
163
|
-
// Write to temporary JSON and use existing import mechanism
|
|
164
|
-
// This reuses the batched embedding and import logic
|
|
165
132
|
const tempPath = generateTempPath('chunk');
|
|
166
133
|
const fs = await import('fs/promises');
|
|
167
134
|
try {
|
|
168
|
-
await fs.writeFile(tempPath, JSON.stringify({
|
|
169
|
-
nodes,
|
|
170
|
-
edges,
|
|
171
|
-
metadata: { chunked: true },
|
|
172
|
-
}));
|
|
135
|
+
await fs.writeFile(tempPath, JSON.stringify({ nodes, edges, metadata: { chunked: true } }));
|
|
173
136
|
await this.graphGeneratorHandler.generateGraph(tempPath, DEFAULTS.batchSize, false);
|
|
174
137
|
}
|
|
175
138
|
finally {
|
|
176
|
-
// Clean up temp file
|
|
177
139
|
try {
|
|
178
140
|
await fs.unlink(tempPath);
|
|
179
141
|
}
|
|
@@ -182,20 +144,13 @@ export class StreamingImportHandler {
|
|
|
182
144
|
}
|
|
183
145
|
}
|
|
184
146
|
}
|
|
185
|
-
/**
|
|
186
|
-
* Import resolved edges to Neo4j
|
|
187
|
-
*/
|
|
188
147
|
async importEdgesToNeo4j(edges) {
|
|
189
148
|
if (edges.length === 0)
|
|
190
149
|
return;
|
|
191
150
|
const tempPath = generateTempPath('edges');
|
|
192
151
|
const fs = await import('fs/promises');
|
|
193
152
|
try {
|
|
194
|
-
await fs.writeFile(tempPath, JSON.stringify({
|
|
195
|
-
nodes: [],
|
|
196
|
-
edges,
|
|
197
|
-
metadata: { edgesOnly: true },
|
|
198
|
-
}));
|
|
153
|
+
await fs.writeFile(tempPath, JSON.stringify({ nodes: [], edges, metadata: { edgesOnly: true } }));
|
|
199
154
|
await this.graphGeneratorHandler.generateGraph(tempPath, DEFAULTS.batchSize, false);
|
|
200
155
|
}
|
|
201
156
|
finally {
|
package/dist/mcp/mcp.server.js
CHANGED
|
@@ -18,8 +18,8 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
|
18
18
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
19
19
|
import { MCP_SERVER_CONFIG, MESSAGES } from './constants.js';
|
|
20
20
|
import { performIncrementalParse } from './handlers/incremental-parse.handler.js';
|
|
21
|
-
import { watchManager } from './services/watch-manager.js';
|
|
22
21
|
import { initializeServices } from './service-init.js';
|
|
22
|
+
import { watchManager } from './services/watch-manager.js';
|
|
23
23
|
import { registerAllTools } from './tools/index.js';
|
|
24
24
|
import { debugLog } from './utils.js';
|
|
25
25
|
/**
|