code-graph-context 2.0.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +221 -2
- package/dist/constants.js +167 -0
- package/dist/core/config/fairsquare-framework-schema.js +9 -7
- package/dist/core/config/schema.js +41 -2
- package/dist/core/embeddings/natural-language-to-cypher.service.js +166 -110
- package/dist/core/parsers/typescript-parser.js +1039 -742
- package/dist/core/parsers/workspace-parser.js +175 -193
- package/dist/core/utils/code-normalizer.js +299 -0
- package/dist/core/utils/file-change-detection.js +17 -2
- package/dist/core/utils/file-utils.js +40 -5
- package/dist/core/utils/graph-factory.js +161 -0
- package/dist/core/utils/shared-utils.js +79 -0
- package/dist/core/workspace/workspace-detector.js +59 -5
- package/dist/mcp/constants.js +261 -8
- package/dist/mcp/handlers/graph-generator.handler.js +1 -0
- package/dist/mcp/handlers/incremental-parse.handler.js +22 -6
- package/dist/mcp/handlers/parallel-import.handler.js +136 -0
- package/dist/mcp/handlers/streaming-import.handler.js +14 -59
- package/dist/mcp/mcp.server.js +77 -2
- package/dist/mcp/services/job-manager.js +5 -8
- package/dist/mcp/services/watch-manager.js +64 -25
- package/dist/mcp/tools/detect-dead-code.tool.js +413 -0
- package/dist/mcp/tools/detect-duplicate-code.tool.js +450 -0
- package/dist/mcp/tools/hello.tool.js +16 -2
- package/dist/mcp/tools/impact-analysis.tool.js +20 -4
- package/dist/mcp/tools/index.js +37 -0
- package/dist/mcp/tools/parse-typescript-project.tool.js +15 -14
- package/dist/mcp/tools/swarm-cleanup.tool.js +157 -0
- package/dist/mcp/tools/swarm-constants.js +35 -0
- package/dist/mcp/tools/swarm-pheromone.tool.js +196 -0
- package/dist/mcp/tools/swarm-sense.tool.js +212 -0
- package/dist/mcp/workers/chunk-worker-pool.js +196 -0
- package/dist/mcp/workers/chunk-worker.types.js +4 -0
- package/dist/mcp/workers/chunk.worker.js +89 -0
- package/dist/mcp/workers/parse-coordinator.js +183 -0
- package/dist/mcp/workers/worker.pool.js +54 -0
- package/dist/storage/neo4j/neo4j.service.js +198 -14
- package/package.json +1 -1
|
@@ -44,6 +44,13 @@ export class WorkspaceDetector {
|
|
|
44
44
|
if (hasTurboJson) {
|
|
45
45
|
return 'turborepo';
|
|
46
46
|
}
|
|
47
|
+
// Check for Nx (has nx.json) - check before pnpm/npm since Nx can coexist with them
|
|
48
|
+
const nxJsonPath = path.join(rootPath, 'nx.json');
|
|
49
|
+
const hasNxJson = await this.fileExists(nxJsonPath);
|
|
50
|
+
await debugLog('Checking for nx.json', { path: nxJsonPath, exists: hasNxJson });
|
|
51
|
+
if (hasNxJson) {
|
|
52
|
+
return 'nx';
|
|
53
|
+
}
|
|
47
54
|
// Check for pnpm workspaces (has pnpm-workspace.yaml)
|
|
48
55
|
const pnpmWorkspacePath = path.join(rootPath, 'pnpm-workspace.yaml');
|
|
49
56
|
const hasPnpmWorkspace = await this.fileExists(pnpmWorkspacePath);
|
|
@@ -99,6 +106,43 @@ export class WorkspaceDetector {
|
|
|
99
106
|
// Turborepo default patterns
|
|
100
107
|
return ['apps/*', 'packages/*'];
|
|
101
108
|
}
|
|
109
|
+
case 'nx': {
|
|
110
|
+
// For Nx, scan for all project.json files to find all projects
|
|
111
|
+
// This is more reliable than workspaces since Nx projects may not be in package.json workspaces
|
|
112
|
+
const projectJsonFiles = await glob('**/project.json', {
|
|
113
|
+
cwd: rootPath,
|
|
114
|
+
ignore: ['**/node_modules/**', '**/dist/**', '**/build/**'],
|
|
115
|
+
absolute: false,
|
|
116
|
+
});
|
|
117
|
+
// Extract unique parent directories (the project roots)
|
|
118
|
+
const projectDirs = new Set();
|
|
119
|
+
for (const projectJsonPath of projectJsonFiles) {
|
|
120
|
+
const projectDir = path.dirname(projectJsonPath);
|
|
121
|
+
if (projectDir !== '.') {
|
|
122
|
+
projectDirs.add(projectDir);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
if (projectDirs.size > 0) {
|
|
126
|
+
await debugLog('Found Nx projects via project.json scan', { count: projectDirs.size });
|
|
127
|
+
return Array.from(projectDirs);
|
|
128
|
+
}
|
|
129
|
+
// Fallback to package.json workspaces if no project.json files found
|
|
130
|
+
const packageJsonPath = path.join(rootPath, 'package.json');
|
|
131
|
+
try {
|
|
132
|
+
const packageJson = JSON.parse(await fs.readFile(packageJsonPath, 'utf-8'));
|
|
133
|
+
if (Array.isArray(packageJson.workspaces)) {
|
|
134
|
+
return packageJson.workspaces;
|
|
135
|
+
}
|
|
136
|
+
if (packageJson.workspaces?.packages) {
|
|
137
|
+
return packageJson.workspaces.packages;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
catch {
|
|
141
|
+
// Fall through to defaults
|
|
142
|
+
}
|
|
143
|
+
// Nx default patterns
|
|
144
|
+
return ['apps/*', 'libs/*', 'packages/*', 'src/*/*'];
|
|
145
|
+
}
|
|
102
146
|
case 'yarn':
|
|
103
147
|
case 'npm': {
|
|
104
148
|
// Read from package.json workspaces
|
|
@@ -151,16 +195,26 @@ export class WorkspaceDetector {
|
|
|
151
195
|
if (seenPaths.has(packagePath))
|
|
152
196
|
continue;
|
|
153
197
|
seenPaths.add(packagePath);
|
|
154
|
-
// Check if this is a valid package (has package.json)
|
|
198
|
+
// Check if this is a valid package (has package.json) or Nx project (has project.json)
|
|
155
199
|
const packageJsonPath = path.join(packagePath, 'package.json');
|
|
156
|
-
|
|
200
|
+
const projectJsonPath = path.join(packagePath, 'project.json');
|
|
201
|
+
const hasPackageJson = await this.fileExists(packageJsonPath);
|
|
202
|
+
const hasProjectJson = await this.fileExists(projectJsonPath);
|
|
203
|
+
if (!hasPackageJson && !hasProjectJson) {
|
|
157
204
|
continue;
|
|
158
205
|
}
|
|
159
|
-
// Read package name
|
|
206
|
+
// Read package/project name
|
|
160
207
|
let packageName;
|
|
161
208
|
try {
|
|
162
|
-
|
|
163
|
-
|
|
209
|
+
if (hasPackageJson) {
|
|
210
|
+
const packageJson = JSON.parse(await fs.readFile(packageJsonPath, 'utf-8'));
|
|
211
|
+
packageName = packageJson.name ?? path.basename(packagePath);
|
|
212
|
+
}
|
|
213
|
+
else {
|
|
214
|
+
// Nx project.json - try to read name from it
|
|
215
|
+
const projectJson = JSON.parse(await fs.readFile(projectJsonPath, 'utf-8'));
|
|
216
|
+
packageName = projectJson.name ?? path.basename(packagePath);
|
|
217
|
+
}
|
|
164
218
|
}
|
|
165
219
|
catch {
|
|
166
220
|
packageName = path.basename(packagePath);
|
package/dist/mcp/constants.js
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
* MCP Server Constants
|
|
3
3
|
* All constants used throughout the MCP server implementation
|
|
4
4
|
*/
|
|
5
|
+
// Re-export shared constants
|
|
6
|
+
export { LOG_CONFIG } from '../constants.js';
|
|
5
7
|
// Server Configuration
|
|
6
8
|
export const MCP_SERVER_CONFIG = {
|
|
7
9
|
name: 'codebase-graph',
|
|
@@ -27,6 +29,11 @@ export const TOOL_NAMES = {
|
|
|
27
29
|
startWatchProject: 'start_watch_project',
|
|
28
30
|
stopWatchProject: 'stop_watch_project',
|
|
29
31
|
listWatchers: 'list_watchers',
|
|
32
|
+
detectDeadCode: 'detect_dead_code',
|
|
33
|
+
detectDuplicateCode: 'detect_duplicate_code',
|
|
34
|
+
swarmPheromone: 'swarm_pheromone',
|
|
35
|
+
swarmSense: 'swarm_sense',
|
|
36
|
+
swarmCleanup: 'swarm_cleanup',
|
|
30
37
|
};
|
|
31
38
|
// Tool Metadata
|
|
32
39
|
export const TOOL_METADATA = {
|
|
@@ -93,10 +100,28 @@ Use list_projects to see available projects and get the project name.
|
|
|
93
100
|
|
|
94
101
|
**Tips:**
|
|
95
102
|
- Import nodes store file paths, not module names (use 'path containing X')
|
|
96
|
-
- Node types: SourceFile,
|
|
97
|
-
- Relationships: CONTAINS, IMPORTS,
|
|
103
|
+
- Node types: SourceFile, Class, Function, Method, Interface, Property, Parameter, Constructor, Import, Export, Decorator, Enum, Variable, TypeAlias
|
|
104
|
+
- Relationships: CONTAINS, IMPORTS, EXTENDS, IMPLEMENTS, HAS_MEMBER, HAS_PARAMETER, TYPED_AS, CALLS, DECORATED_WITH
|
|
98
105
|
- For NestJS, use semanticType property instead of decorators (e.g., semanticType = 'NestController')
|
|
99
106
|
|
|
107
|
+
**Relationships (Core):**
|
|
108
|
+
- CONTAINS: File/class contains members
|
|
109
|
+
- HAS_MEMBER: Class/interface has methods/properties
|
|
110
|
+
- HAS_PARAMETER: Method/function has parameters
|
|
111
|
+
- IMPORTS: SourceFile imports another
|
|
112
|
+
- EXPORTS: SourceFile exports items
|
|
113
|
+
- EXTENDS: Class/interface extends another
|
|
114
|
+
- IMPLEMENTS: Class implements interface(s)
|
|
115
|
+
- CALLS: Method/function calls another
|
|
116
|
+
- TYPED_AS: Parameter/property has type annotation
|
|
117
|
+
- DECORATED_WITH: Node has decorators
|
|
118
|
+
|
|
119
|
+
**Relationships (NestJS/Framework):**
|
|
120
|
+
- INJECTS: Service/controller injects dependency
|
|
121
|
+
- EXPOSES: Controller exposes HTTP endpoints
|
|
122
|
+
- MODULE_IMPORTS, MODULE_PROVIDES, MODULE_EXPORTS: NestJS module system
|
|
123
|
+
- GUARDED_BY, TRANSFORMED_BY, INTERCEPTED_BY: Security/middleware
|
|
124
|
+
|
|
100
125
|
**Query Phrasing:**
|
|
101
126
|
Phrase queries using properties known to exist (filePath, name) rather than abstract concepts:
|
|
102
127
|
- Use "in account folder" or "filePath contains /account/" instead of "in account module"
|
|
@@ -252,6 +277,203 @@ Returns information about each watcher:
|
|
|
252
277
|
|
|
253
278
|
Use stop_watch_project to stop a watcher.`,
|
|
254
279
|
},
|
|
280
|
+
[TOOL_NAMES.detectDeadCode]: {
|
|
281
|
+
title: 'Detect Dead Code',
|
|
282
|
+
description: `Identify potentially unused code in the codebase including exports never imported, private methods never called, and orphan interfaces.
|
|
283
|
+
|
|
284
|
+
**Before analyzing:**
|
|
285
|
+
Use list_projects to see available projects and get the project name.
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
- Risk level (LOW/MEDIUM/HIGH/CRITICAL) based on dead code count
|
|
289
|
+
- Dead code items with confidence levels (HIGH/MEDIUM/LOW) and categories
|
|
290
|
+
- Grouped by type (methods, classes, interfaces, etc.)
|
|
291
|
+
- Grouped by category (library-export, ui-component, internal-unused)
|
|
292
|
+
- Affected files list
|
|
293
|
+
- Excluded entry points for audit (controllers, modules, etc.)
|
|
294
|
+
|
|
295
|
+
Parameters:
|
|
296
|
+
- projectId: Project name, path, or ID (required)
|
|
297
|
+
- excludePatterns: Additional file patterns to exclude (e.g., ["*.config.ts", "*.seed.ts"])
|
|
298
|
+
- excludeSemanticTypes: Additional semantic types to exclude (e.g., ["EntityClass", "DTOClass"])
|
|
299
|
+
- excludeLibraryExports: Exclude all items from packages/* directories (default: false)
|
|
300
|
+
- excludeCoreTypes: Exclude specific AST types (e.g., ["InterfaceDeclaration", "EnumDeclaration"])
|
|
301
|
+
- includeEntryPoints: Include excluded entry points in audit section (default: true)
|
|
302
|
+
- minConfidence: Minimum confidence to include (LOW/MEDIUM/HIGH, default: LOW)
|
|
303
|
+
- filterCategory: Filter by category (library-export, ui-component, internal-unused, all) (default: all)
|
|
304
|
+
- summaryOnly: Return only statistics without full dead code list (default: false)
|
|
305
|
+
- limit: Maximum items per page (default: 100, max: 500)
|
|
306
|
+
- offset: Number of items to skip for pagination (default: 0)
|
|
307
|
+
|
|
308
|
+
**Categories:**
|
|
309
|
+
- library-export: Exports from packages/* directories (may be used by external consumers)
|
|
310
|
+
- ui-component: Exports from components/ui/* (component library, intentionally broad API)
|
|
311
|
+
- internal-unused: Regular internal code that appears unused
|
|
312
|
+
|
|
313
|
+
**Auto-excluded entry points:**
|
|
314
|
+
- Semantic types: NestController, NestModule, NestGuard, NestPipe, NestInterceptor, NestFilter, NestProvider, NestService, HttpEndpoint
|
|
315
|
+
- File patterns: main.ts, *.module.ts, *.controller.ts, index.ts
|
|
316
|
+
|
|
317
|
+
**Confidence levels:**
|
|
318
|
+
- HIGH: Exported but never imported or referenced
|
|
319
|
+
- MEDIUM: Private with no internal calls
|
|
320
|
+
- LOW: Could be used dynamically
|
|
321
|
+
|
|
322
|
+
Use filterCategory=internal-unused for actionable dead code cleanup.`,
|
|
323
|
+
},
|
|
324
|
+
[TOOL_NAMES.detectDuplicateCode]: {
|
|
325
|
+
title: 'Detect Duplicate Code',
|
|
326
|
+
description: `Find duplicate code patterns using structural (AST hash) and semantic (embedding similarity) analysis.
|
|
327
|
+
|
|
328
|
+
**Before analyzing:**
|
|
329
|
+
Use list_projects to see available projects and get the project name.
|
|
330
|
+
|
|
331
|
+
Returns:
|
|
332
|
+
- Duplicate groups with similarity scores
|
|
333
|
+
- Confidence levels (HIGH/MEDIUM/LOW)
|
|
334
|
+
- Grouped by detection type (structural, semantic)
|
|
335
|
+
- Recommendations for each duplicate group
|
|
336
|
+
- Affected files list
|
|
337
|
+
|
|
338
|
+
Parameters:
|
|
339
|
+
- projectId: Project name, path, or ID (required)
|
|
340
|
+
- type: Detection approach - "structural", "semantic", or "all" (default: all)
|
|
341
|
+
- minSimilarity: Minimum similarity for semantic duplicates (0.5-1.0, default: 0.80)
|
|
342
|
+
- includeCode: Include source code snippets (default: false)
|
|
343
|
+
- maxResults: Maximum duplicate groups per page (default: 20, max: 100)
|
|
344
|
+
- scope: Node types to analyze - "methods", "functions", "classes", or "all" (default: all)
|
|
345
|
+
- summaryOnly: Return only statistics without full duplicates list (default: false)
|
|
346
|
+
- offset: Number of groups to skip for pagination (default: 0)
|
|
347
|
+
|
|
348
|
+
**Detection Types:**
|
|
349
|
+
- structural: Finds exact duplicates by normalized code hash (ignores formatting, variable names, literals)
|
|
350
|
+
- semantic: Finds similar code using embedding similarity (catches different implementations of same logic)
|
|
351
|
+
- all: Runs both detection types
|
|
352
|
+
|
|
353
|
+
**Similarity Thresholds:**
|
|
354
|
+
- 0.90+: Very high similarity, almost certainly duplicates
|
|
355
|
+
- 0.85-0.90: High similarity, likely duplicates with minor variations
|
|
356
|
+
- 0.80-0.85: Moderate similarity, worth reviewing
|
|
357
|
+
|
|
358
|
+
Use this to identify refactoring opportunities and reduce code duplication.`,
|
|
359
|
+
},
|
|
360
|
+
[TOOL_NAMES.swarmPheromone]: {
|
|
361
|
+
title: 'Swarm Pheromone',
|
|
362
|
+
description: `Leave a pheromone marker on a code node for stigmergic coordination between agents.
|
|
363
|
+
|
|
364
|
+
**What is Stigmergy?**
|
|
365
|
+
Agents coordinate indirectly by leaving markers (pheromones) on code nodes. Other agents sense these markers and adapt their behavior. No direct messaging needed.
|
|
366
|
+
|
|
367
|
+
**Pheromone Types:**
|
|
368
|
+
- exploring: "I'm looking at this" (2 min half-life)
|
|
369
|
+
- modifying: "I'm actively working on this" (10 min half-life)
|
|
370
|
+
- claiming: "This is my territory" (1 hour half-life)
|
|
371
|
+
- completed: "I finished work here" (24 hour half-life)
|
|
372
|
+
- warning: "Danger - don't touch" (never decays)
|
|
373
|
+
- blocked: "I'm stuck on this" (5 min half-life)
|
|
374
|
+
- proposal: "Proposed artifact awaiting approval" (1 hour half-life)
|
|
375
|
+
- needs_review: "Someone should check this" (30 min half-life)
|
|
376
|
+
|
|
377
|
+
**Parameters:**
|
|
378
|
+
- nodeId: The code node ID to mark
|
|
379
|
+
- type: Type of pheromone (see above)
|
|
380
|
+
- agentId: Your unique agent identifier
|
|
381
|
+
- swarmId: Swarm ID from orchestrator (for bulk cleanup)
|
|
382
|
+
- intensity: 0.0-1.0, how strong the signal (default: 1.0)
|
|
383
|
+
- data: Optional metadata (summary, reason, etc.)
|
|
384
|
+
- remove: Set true to remove the pheromone
|
|
385
|
+
|
|
386
|
+
**Workflow states** (exploring, claiming, modifying, completed, blocked) are mutually exclusive per agent+node. Setting one automatically removes others.
|
|
387
|
+
|
|
388
|
+
**Usage Pattern:**
|
|
389
|
+
1. Before starting work: swarm_sense to check what's claimed
|
|
390
|
+
2. Claim your target: swarm_pheromone({ nodeId, type: "claiming", agentId, swarmId })
|
|
391
|
+
3. Refresh periodically if working long
|
|
392
|
+
4. Mark complete: swarm_pheromone({ nodeId, type: "completed", agentId, swarmId, data: { summary: "..." } })
|
|
393
|
+
|
|
394
|
+
**Decay:**
|
|
395
|
+
Pheromones automatically fade over time. If an agent dies, its markers decay and work becomes available again.`,
|
|
396
|
+
},
|
|
397
|
+
[TOOL_NAMES.swarmSense]: {
|
|
398
|
+
title: 'Swarm Sense',
|
|
399
|
+
description: `Query pheromones in the code graph to sense what other agents are doing.
|
|
400
|
+
|
|
401
|
+
**What This Does:**
|
|
402
|
+
Returns active pheromones with their current intensity (after decay). Use this to:
|
|
403
|
+
- See what nodes are being worked on
|
|
404
|
+
- Avoid conflicts with other agents
|
|
405
|
+
- Find unclaimed work
|
|
406
|
+
- Check if your dependencies are being modified
|
|
407
|
+
|
|
408
|
+
**Parameters:**
|
|
409
|
+
- swarmId: Filter by swarm ID (see only this swarm's pheromones)
|
|
410
|
+
- types: Filter by pheromone types (e.g., ["modifying", "claiming"])
|
|
411
|
+
- nodeIds: Check specific nodes
|
|
412
|
+
- agentIds: Filter by specific agents
|
|
413
|
+
- excludeAgentId: Exclude your own pheromones (see what OTHERS are doing)
|
|
414
|
+
- minIntensity: Minimum intensity after decay (default: 0.3)
|
|
415
|
+
- limit: Max results (default: 50)
|
|
416
|
+
- includeStats: Get summary statistics by type
|
|
417
|
+
- cleanup: Remove fully decayed pheromones (intensity < 0.01)
|
|
418
|
+
|
|
419
|
+
**Usage Pattern:**
|
|
420
|
+
\`\`\`
|
|
421
|
+
// Before starting work, check what's taken
|
|
422
|
+
swarm_sense({
|
|
423
|
+
types: ["modifying", "claiming"],
|
|
424
|
+
minIntensity: 0.3
|
|
425
|
+
})
|
|
426
|
+
|
|
427
|
+
// Check a specific node before modifying
|
|
428
|
+
swarm_sense({
|
|
429
|
+
nodeIds: ["proj_xxx:Service:UserService"],
|
|
430
|
+
types: ["modifying", "warning"]
|
|
431
|
+
})
|
|
432
|
+
|
|
433
|
+
// See what other agents are doing (exclude self)
|
|
434
|
+
swarm_sense({
|
|
435
|
+
excludeAgentId: "my-agent-id",
|
|
436
|
+
types: ["exploring", "modifying"]
|
|
437
|
+
})
|
|
438
|
+
\`\`\`
|
|
439
|
+
|
|
440
|
+
**Decay:**
|
|
441
|
+
Intensity decreases over time (exponential decay). A pheromone with intensity 0.25 is almost gone. Below minIntensity threshold, it's not returned.`,
|
|
442
|
+
},
|
|
443
|
+
[TOOL_NAMES.swarmCleanup]: {
|
|
444
|
+
title: 'Swarm Cleanup',
|
|
445
|
+
description: `Bulk delete pheromones after a swarm completes.
|
|
446
|
+
|
|
447
|
+
**When to use:**
|
|
448
|
+
Call this when a swarm finishes to clean up all its pheromones. Prevents pollution for future swarms.
|
|
449
|
+
|
|
450
|
+
**Parameters:**
|
|
451
|
+
- projectId: Required - the project
|
|
452
|
+
- swarmId: Delete all pheromones from this swarm
|
|
453
|
+
- agentId: Delete all pheromones from this specific agent
|
|
454
|
+
- all: Set true to delete ALL pheromones in project (use with caution)
|
|
455
|
+
- keepTypes: Pheromone types to preserve (default: ["warning"])
|
|
456
|
+
- dryRun: Preview what would be deleted without deleting
|
|
457
|
+
|
|
458
|
+
**Must specify one of:** swarmId, agentId, or all=true
|
|
459
|
+
|
|
460
|
+
**Examples:**
|
|
461
|
+
\`\`\`
|
|
462
|
+
// Clean up after a swarm completes
|
|
463
|
+
swarm_cleanup({ projectId: "backend", swarmId: "swarm_abc123" })
|
|
464
|
+
|
|
465
|
+
// Preview what would be deleted
|
|
466
|
+
swarm_cleanup({ projectId: "backend", swarmId: "swarm_abc123", dryRun: true })
|
|
467
|
+
|
|
468
|
+
// Clean up a specific agent's pheromones
|
|
469
|
+
swarm_cleanup({ projectId: "backend", agentId: "swarm_abc123_auth" })
|
|
470
|
+
|
|
471
|
+
// Nuclear option: delete all (except warnings)
|
|
472
|
+
swarm_cleanup({ projectId: "backend", all: true })
|
|
473
|
+
\`\`\`
|
|
474
|
+
|
|
475
|
+
**Note:** \`warning\` pheromones are preserved by default. Pass \`keepTypes: []\` to delete everything.`,
|
|
476
|
+
},
|
|
255
477
|
};
|
|
256
478
|
// Default Values
|
|
257
479
|
export const DEFAULTS = {
|
|
@@ -263,6 +485,43 @@ export const DEFAULTS = {
|
|
|
263
485
|
chainSnippetLength: 700,
|
|
264
486
|
maxEmbeddingChars: 30000, // ~7500 tokens, under 8192 limit for text-embedding-3-large
|
|
265
487
|
};
|
|
488
|
+
// Parsing Configuration
|
|
489
|
+
export const PARSING = {
|
|
490
|
+
/** File count threshold to trigger parallel parsing with worker pool */
|
|
491
|
+
parallelThreshold: 500,
|
|
492
|
+
/** File count threshold to trigger streaming import */
|
|
493
|
+
streamingThreshold: 100,
|
|
494
|
+
/** Default number of files per chunk */
|
|
495
|
+
defaultChunkSize: 100,
|
|
496
|
+
/** Worker timeout in milliseconds (30 minutes) */
|
|
497
|
+
workerTimeoutMs: 30 * 60 * 1000,
|
|
498
|
+
};
|
|
499
|
+
// Job Management
|
|
500
|
+
export const JOBS = {
|
|
501
|
+
/** Interval for cleaning up completed/stale jobs (5 minutes) */
|
|
502
|
+
cleanupIntervalMs: 5 * 60 * 1000,
|
|
503
|
+
/** Maximum number of jobs to keep in memory */
|
|
504
|
+
maxJobs: 100,
|
|
505
|
+
};
|
|
506
|
+
// Watch Mode Configuration
|
|
507
|
+
export const WATCH = {
|
|
508
|
+
/** Default debounce delay before processing file changes */
|
|
509
|
+
defaultDebounceMs: 1000,
|
|
510
|
+
/** Maximum concurrent file watchers */
|
|
511
|
+
maxWatchers: 10,
|
|
512
|
+
/** Maximum pending file change events before dropping */
|
|
513
|
+
maxPendingEvents: 1000,
|
|
514
|
+
/** Default exclude patterns for file watching */
|
|
515
|
+
excludePatterns: [
|
|
516
|
+
'**/node_modules/**',
|
|
517
|
+
'**/dist/**',
|
|
518
|
+
'**/build/**',
|
|
519
|
+
'**/.git/**',
|
|
520
|
+
'**/*.d.ts',
|
|
521
|
+
'**/*.js',
|
|
522
|
+
'**/*.map',
|
|
523
|
+
],
|
|
524
|
+
};
|
|
266
525
|
// Messages
|
|
267
526
|
export const MESSAGES = {
|
|
268
527
|
errors: {
|
|
@@ -298,9 +557,3 @@ export const MESSAGES = {
|
|
|
298
557
|
startingServer: 'Starting MCP server...',
|
|
299
558
|
},
|
|
300
559
|
};
|
|
301
|
-
// Logging Configuration
|
|
302
|
-
export const LOG_CONFIG = {
|
|
303
|
-
timestampFormat: 'iso',
|
|
304
|
-
logSeparator: '---',
|
|
305
|
-
jsonIndentation: 2,
|
|
306
|
-
};
|
|
@@ -73,6 +73,7 @@ export class GraphGeneratorHandler {
|
|
|
73
73
|
await this.neo4jService.run(QUERIES.CREATE_PROJECT_INDEX_SOURCEFILE);
|
|
74
74
|
await this.neo4jService.run(QUERIES.CREATE_PROJECT_ID_INDEX_EMBEDDED);
|
|
75
75
|
await this.neo4jService.run(QUERIES.CREATE_PROJECT_ID_INDEX_SOURCEFILE);
|
|
76
|
+
await this.neo4jService.run(QUERIES.CREATE_NORMALIZED_HASH_INDEX);
|
|
76
77
|
await debugLog('Project indexes created');
|
|
77
78
|
}
|
|
78
79
|
async importNodes(nodes, batchSize) {
|
|
@@ -23,7 +23,7 @@ export const performIncrementalParse = async (projectPath, projectId, tsconfigPa
|
|
|
23
23
|
const embeddingsService = new EmbeddingsService();
|
|
24
24
|
const graphHandler = new GraphGeneratorHandler(neo4jService, embeddingsService);
|
|
25
25
|
try {
|
|
26
|
-
await debugLog('Incremental parse started
|
|
26
|
+
await debugLog('Incremental parse started', { projectPath, projectId });
|
|
27
27
|
// Resolve project ID
|
|
28
28
|
const resolvedId = resolveProjectId(projectPath, projectId);
|
|
29
29
|
const projectName = await getProjectName(projectPath);
|
|
@@ -31,13 +31,13 @@ export const performIncrementalParse = async (projectPath, projectId, tsconfigPa
|
|
|
31
31
|
const parser = await ParserFactory.createParserWithAutoDetection(projectPath, tsconfigPath, resolvedId, true);
|
|
32
32
|
// Detect changed files
|
|
33
33
|
const { filesToReparse, filesToDelete } = await detectChangedFiles(projectPath, neo4jService, resolvedId);
|
|
34
|
-
await debugLog('
|
|
34
|
+
await debugLog('Incremental change detection', {
|
|
35
35
|
filesToReparse: filesToReparse.length,
|
|
36
36
|
filesToDelete: filesToDelete.length,
|
|
37
37
|
});
|
|
38
38
|
// If no changes, return early
|
|
39
39
|
if (filesToReparse.length === 0 && filesToDelete.length === 0) {
|
|
40
|
-
await debugLog('
|
|
40
|
+
await debugLog('Incremental parse: no changes, returning early', {});
|
|
41
41
|
return {
|
|
42
42
|
nodesUpdated: 0,
|
|
43
43
|
edgesUpdated: 0,
|
|
@@ -45,25 +45,35 @@ export const performIncrementalParse = async (projectPath, projectId, tsconfigPa
|
|
|
45
45
|
filesDeleted: filesToDelete.length,
|
|
46
46
|
};
|
|
47
47
|
}
|
|
48
|
+
await debugLog('Incremental parse: changes detected, continuing', { filesToReparse: filesToReparse.length });
|
|
48
49
|
let savedCrossFileEdges = [];
|
|
49
50
|
const filesToRemoveFromGraph = [...filesToDelete, ...filesToReparse];
|
|
50
51
|
if (filesToRemoveFromGraph.length > 0) {
|
|
52
|
+
await debugLog('Incremental parse: getting cross-file edges', { count: filesToRemoveFromGraph.length });
|
|
51
53
|
// Save cross-file edges before deletion
|
|
52
54
|
savedCrossFileEdges = await getCrossFileEdges(neo4jService, filesToRemoveFromGraph, resolvedId);
|
|
53
|
-
await debugLog('
|
|
55
|
+
await debugLog('Incremental parse: got cross-file edges', { savedCount: savedCrossFileEdges.length });
|
|
56
|
+
await debugLog('Incremental parse: deleting old subgraphs', {});
|
|
54
57
|
// Delete old subgraphs
|
|
55
58
|
await deleteSourceFileSubgraphs(neo4jService, filesToRemoveFromGraph, resolvedId);
|
|
59
|
+
await debugLog('Incremental parse: deleted old subgraphs', {});
|
|
56
60
|
}
|
|
57
61
|
let nodesImported = 0;
|
|
58
62
|
let edgesImported = 0;
|
|
59
63
|
if (filesToReparse.length > 0) {
|
|
64
|
+
await debugLog('Incremental parse: loading existing nodes', {});
|
|
60
65
|
// Load existing nodes for edge detection
|
|
61
66
|
const existingNodes = await loadExistingNodesForEdgeDetection(neo4jService, filesToRemoveFromGraph, resolvedId);
|
|
67
|
+
await debugLog('Incremental parse: loaded existing nodes', { count: existingNodes.length });
|
|
62
68
|
parser.setExistingNodes(existingNodes);
|
|
69
|
+
await debugLog('Incremental parse: parsing workspace', { fileCount: filesToReparse.length });
|
|
63
70
|
// Parse only changed files
|
|
64
71
|
await parser.parseWorkspace(filesToReparse);
|
|
72
|
+
await debugLog('Incremental parse: parsed workspace', {});
|
|
65
73
|
// Export graph data
|
|
74
|
+
await debugLog('Incremental parse: exporting to JSON', {});
|
|
66
75
|
const { nodes, edges } = parser.exportToJson();
|
|
76
|
+
await debugLog('Incremental parse: exported to JSON', { nodeCount: nodes.length, edgeCount: edges.length });
|
|
67
77
|
// Get framework schemas if available (use unknown as intermediate to access private property)
|
|
68
78
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
69
79
|
const parserAny = parser;
|
|
@@ -82,26 +92,33 @@ export const performIncrementalParse = async (projectPath, projectId, tsconfigPa
|
|
|
82
92
|
},
|
|
83
93
|
};
|
|
84
94
|
// Write to JSON file (required by GraphGeneratorHandler)
|
|
95
|
+
await debugLog('Incremental parse: writing JSON file', {});
|
|
85
96
|
const outputPath = join(projectPath, FILE_PATHS.graphOutput);
|
|
86
97
|
writeFileSync(outputPath, JSON.stringify(graphData, null, LOG_CONFIG.jsonIndentation));
|
|
98
|
+
await debugLog('Incremental parse: wrote JSON file', { outputPath });
|
|
87
99
|
// Update Project node
|
|
100
|
+
await debugLog('Incremental parse: updating project node', {});
|
|
88
101
|
await neo4jService.run(UPSERT_PROJECT_QUERY, {
|
|
89
102
|
projectId: resolvedId,
|
|
90
103
|
path: projectPath,
|
|
91
104
|
name: projectName,
|
|
92
105
|
status: 'complete',
|
|
93
106
|
});
|
|
107
|
+
await debugLog('Incremental parse: updated project node', {});
|
|
94
108
|
// Import nodes and edges (clearExisting = false for incremental)
|
|
109
|
+
await debugLog('Incremental parse: starting graph import', {});
|
|
95
110
|
graphHandler.setProjectId(resolvedId);
|
|
96
111
|
try {
|
|
97
112
|
const result = await graphHandler.generateGraph(outputPath, DEFAULTS.batchSize, false);
|
|
98
113
|
nodesImported = result.nodesImported;
|
|
99
114
|
edgesImported = result.edgesImported;
|
|
115
|
+
await debugLog('Incremental parse: graph import completed', { nodesImported, edgesImported });
|
|
100
116
|
}
|
|
101
117
|
finally {
|
|
102
118
|
// Clean up temporary graph.json file
|
|
103
119
|
try {
|
|
104
120
|
unlinkSync(outputPath);
|
|
121
|
+
await debugLog('Incremental parse: cleaned up temp file', {});
|
|
105
122
|
}
|
|
106
123
|
catch {
|
|
107
124
|
// Ignore cleanup errors - file may not exist or be inaccessible
|
|
@@ -124,10 +141,9 @@ export const performIncrementalParse = async (projectPath, projectId, tsconfigPa
|
|
|
124
141
|
? Number(firstResult.recreatedCount) || 0
|
|
125
142
|
: 0;
|
|
126
143
|
edgesImported += recreatedCount;
|
|
127
|
-
await debugLog('Watch: cross-file edges recreated', { recreatedCount });
|
|
128
144
|
}
|
|
129
145
|
}
|
|
130
|
-
await debugLog('
|
|
146
|
+
await debugLog('Incremental parse completed', {
|
|
131
147
|
nodesImported,
|
|
132
148
|
edgesImported,
|
|
133
149
|
filesReparsed: filesToReparse.length,
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parallel Import Handler
|
|
3
|
+
* Orchestrates parallel chunk parsing using a worker pool with pipelined import.
|
|
4
|
+
* Used for large codebases (>= PARSING.parallelThreshold files).
|
|
5
|
+
*/
|
|
6
|
+
import { join } from 'path';
|
|
7
|
+
import { ProgressReporter } from '../../core/utils/progress-reporter.js';
|
|
8
|
+
import { debugLog } from '../utils.js';
|
|
9
|
+
import { ChunkWorkerPool } from '../workers/chunk-worker-pool.js';
|
|
10
|
+
export class ParallelImportHandler {
|
|
11
|
+
graphGeneratorHandler;
|
|
12
|
+
progressReporter;
|
|
13
|
+
constructor(graphGeneratorHandler) {
|
|
14
|
+
this.graphGeneratorHandler = graphGeneratorHandler;
|
|
15
|
+
this.progressReporter = new ProgressReporter();
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Import a project using parallel worker pool with pipelined import.
|
|
19
|
+
* Chunks are distributed to workers, and imports happen as chunks complete.
|
|
20
|
+
*/
|
|
21
|
+
async importProjectParallel(parser, sourceFiles, config) {
|
|
22
|
+
const startTime = Date.now();
|
|
23
|
+
if (config.onProgress) {
|
|
24
|
+
this.progressReporter.setCallback(config.onProgress);
|
|
25
|
+
}
|
|
26
|
+
const totalFiles = sourceFiles.length;
|
|
27
|
+
let totalNodesImported = 0;
|
|
28
|
+
let totalEdgesImported = 0;
|
|
29
|
+
const chunks = this.createChunks(sourceFiles, config.chunkSize);
|
|
30
|
+
this.progressReporter.report({
|
|
31
|
+
phase: 'parsing',
|
|
32
|
+
current: 0,
|
|
33
|
+
total: totalFiles,
|
|
34
|
+
message: `Starting parallel parse of ${totalFiles} files in ${chunks.length} chunks`,
|
|
35
|
+
details: { chunkIndex: 0, totalChunks: chunks.length },
|
|
36
|
+
});
|
|
37
|
+
await debugLog('Using parallel chunk workers', { totalFiles, chunkCount: chunks.length });
|
|
38
|
+
const pool = new ChunkWorkerPool({
|
|
39
|
+
projectPath: config.projectPath,
|
|
40
|
+
tsconfigPath: config.tsconfigPath,
|
|
41
|
+
projectId: config.projectId,
|
|
42
|
+
projectType: config.projectType,
|
|
43
|
+
});
|
|
44
|
+
// Pipelined: import starts as soon as each chunk completes parsing
|
|
45
|
+
const poolResult = await pool.processChunks(chunks, async (result, stats) => {
|
|
46
|
+
await this.importToNeo4j(result.nodes, result.edges);
|
|
47
|
+
totalNodesImported += result.nodes.length;
|
|
48
|
+
totalEdgesImported += result.edges.length;
|
|
49
|
+
// Accumulate nodes for cross-chunk edge resolution
|
|
50
|
+
parser.addParsedNodesFromChunk(result.nodes);
|
|
51
|
+
// Merge shared context from workers for enabling cross-chunk references
|
|
52
|
+
if (result.sharedContext && result.sharedContext.length > 0) {
|
|
53
|
+
parser.mergeSerializedSharedContext(result.sharedContext);
|
|
54
|
+
}
|
|
55
|
+
// Collect deferred edges for resolution after all chunks complete
|
|
56
|
+
if (result.deferredEdges && result.deferredEdges.length > 0) {
|
|
57
|
+
parser.mergeDeferredEdges(result.deferredEdges);
|
|
58
|
+
}
|
|
59
|
+
this.progressReporter.report({
|
|
60
|
+
phase: 'parsing',
|
|
61
|
+
current: stats.chunksCompleted * config.chunkSize,
|
|
62
|
+
total: totalFiles,
|
|
63
|
+
message: `Chunk ${stats.chunksCompleted}/${stats.totalChunks}: ${totalNodesImported} nodes, ${totalEdgesImported} edges`,
|
|
64
|
+
details: {
|
|
65
|
+
nodesCreated: totalNodesImported,
|
|
66
|
+
edgesCreated: totalEdgesImported,
|
|
67
|
+
chunkIndex: stats.chunksCompleted,
|
|
68
|
+
totalChunks: stats.totalChunks,
|
|
69
|
+
},
|
|
70
|
+
});
|
|
71
|
+
debugLog(`Chunk ${result.chunkIndex + 1}/${stats.totalChunks}: ${result.nodes.length} nodes, ${result.edges.length} edges (imported)`);
|
|
72
|
+
});
|
|
73
|
+
debugLog(`Parallel parse+import complete: ${poolResult.totalNodes} nodes, ${poolResult.totalEdges} edges in ${poolResult.elapsedMs}ms`);
|
|
74
|
+
this.progressReporter.report({
|
|
75
|
+
phase: 'resolving',
|
|
76
|
+
current: totalFiles,
|
|
77
|
+
total: totalFiles,
|
|
78
|
+
message: 'Resolving cross-chunk edges',
|
|
79
|
+
details: {
|
|
80
|
+
nodesCreated: totalNodesImported,
|
|
81
|
+
edgesCreated: totalEdgesImported,
|
|
82
|
+
chunkIndex: chunks.length,
|
|
83
|
+
totalChunks: chunks.length,
|
|
84
|
+
},
|
|
85
|
+
});
|
|
86
|
+
const resolvedEdges = await parser.resolveDeferredEdges();
|
|
87
|
+
if (resolvedEdges.length > 0) {
|
|
88
|
+
await this.importToNeo4j([], resolvedEdges);
|
|
89
|
+
totalEdgesImported += resolvedEdges.length;
|
|
90
|
+
await debugLog(`Resolved ${resolvedEdges.length} cross-chunk edges`);
|
|
91
|
+
}
|
|
92
|
+
parser.loadFrameworkSchemasForType(config.projectType);
|
|
93
|
+
const enhancedEdges = await parser.applyEdgeEnhancementsManually();
|
|
94
|
+
if (enhancedEdges.length > 0) {
|
|
95
|
+
await this.importToNeo4j([], enhancedEdges);
|
|
96
|
+
totalEdgesImported += enhancedEdges.length;
|
|
97
|
+
await debugLog(`Created ${enhancedEdges.length} edges from enhancements`);
|
|
98
|
+
}
|
|
99
|
+
parser.clearParsedData();
|
|
100
|
+
const elapsedMs = Date.now() - startTime;
|
|
101
|
+
return {
|
|
102
|
+
nodesImported: totalNodesImported,
|
|
103
|
+
edgesImported: totalEdgesImported,
|
|
104
|
+
filesProcessed: totalFiles,
|
|
105
|
+
chunksProcessed: chunks.length,
|
|
106
|
+
elapsedMs,
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
createChunks(files, chunkSize) {
|
|
110
|
+
const chunks = [];
|
|
111
|
+
for (let i = 0; i < files.length; i += chunkSize) {
|
|
112
|
+
chunks.push(files.slice(i, i + chunkSize));
|
|
113
|
+
}
|
|
114
|
+
return chunks;
|
|
115
|
+
}
|
|
116
|
+
async importToNeo4j(nodes, edges) {
|
|
117
|
+
if (nodes.length === 0 && edges.length === 0)
|
|
118
|
+
return;
|
|
119
|
+
const fs = await import('fs/promises');
|
|
120
|
+
const { randomBytes } = await import('crypto');
|
|
121
|
+
const { tmpdir } = await import('os');
|
|
122
|
+
const tempPath = join(tmpdir(), `chunk-${Date.now()}-${randomBytes(8).toString('hex')}.json`);
|
|
123
|
+
try {
|
|
124
|
+
await fs.writeFile(tempPath, JSON.stringify({ nodes, edges, metadata: { parallel: true } }));
|
|
125
|
+
await this.graphGeneratorHandler.generateGraph(tempPath, 100, false);
|
|
126
|
+
}
|
|
127
|
+
finally {
|
|
128
|
+
try {
|
|
129
|
+
await fs.unlink(tempPath);
|
|
130
|
+
}
|
|
131
|
+
catch {
|
|
132
|
+
// Ignore cleanup errors
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|