code-graph-context 2.0.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +221 -2
- package/dist/constants.js +167 -0
- package/dist/core/config/fairsquare-framework-schema.js +9 -7
- package/dist/core/config/schema.js +41 -2
- package/dist/core/embeddings/natural-language-to-cypher.service.js +166 -110
- package/dist/core/parsers/typescript-parser.js +1039 -742
- package/dist/core/parsers/workspace-parser.js +175 -193
- package/dist/core/utils/code-normalizer.js +299 -0
- package/dist/core/utils/file-change-detection.js +17 -2
- package/dist/core/utils/file-utils.js +40 -5
- package/dist/core/utils/graph-factory.js +161 -0
- package/dist/core/utils/shared-utils.js +79 -0
- package/dist/core/workspace/workspace-detector.js +59 -5
- package/dist/mcp/constants.js +261 -8
- package/dist/mcp/handlers/graph-generator.handler.js +1 -0
- package/dist/mcp/handlers/incremental-parse.handler.js +22 -6
- package/dist/mcp/handlers/parallel-import.handler.js +136 -0
- package/dist/mcp/handlers/streaming-import.handler.js +14 -59
- package/dist/mcp/mcp.server.js +77 -2
- package/dist/mcp/services/job-manager.js +5 -8
- package/dist/mcp/services/watch-manager.js +64 -25
- package/dist/mcp/tools/detect-dead-code.tool.js +413 -0
- package/dist/mcp/tools/detect-duplicate-code.tool.js +450 -0
- package/dist/mcp/tools/hello.tool.js +16 -2
- package/dist/mcp/tools/impact-analysis.tool.js +20 -4
- package/dist/mcp/tools/index.js +37 -0
- package/dist/mcp/tools/parse-typescript-project.tool.js +15 -14
- package/dist/mcp/tools/swarm-cleanup.tool.js +157 -0
- package/dist/mcp/tools/swarm-constants.js +35 -0
- package/dist/mcp/tools/swarm-pheromone.tool.js +196 -0
- package/dist/mcp/tools/swarm-sense.tool.js +212 -0
- package/dist/mcp/workers/chunk-worker-pool.js +196 -0
- package/dist/mcp/workers/chunk-worker.types.js +4 -0
- package/dist/mcp/workers/chunk.worker.js +89 -0
- package/dist/mcp/workers/parse-coordinator.js +183 -0
- package/dist/mcp/workers/worker.pool.js +54 -0
- package/dist/storage/neo4j/neo4j.service.js +198 -14
- package/package.json +1 -1
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Swarm Sense Tool
|
|
3
|
+
* Query pheromones in the code graph for stigmergic coordination
|
|
4
|
+
*/
|
|
5
|
+
import { z } from 'zod';
|
|
6
|
+
import { Neo4jService } from '../../storage/neo4j/neo4j.service.js';
|
|
7
|
+
import { TOOL_NAMES, TOOL_METADATA } from '../constants.js';
|
|
8
|
+
import { createErrorResponse, createSuccessResponse, resolveProjectIdOrError, debugLog } from '../utils.js';
|
|
9
|
+
import { PHEROMONE_TYPES } from './swarm-constants.js';
|
|
10
|
+
/**
|
|
11
|
+
* Neo4j query to sense pheromones with decay calculation
|
|
12
|
+
* Uses nodeId-based matching (self-healing) instead of [:MARKS] relationship
|
|
13
|
+
* This survives graph rebuilds since nodeIds are deterministic
|
|
14
|
+
*/
|
|
15
|
+
const SENSE_PHEROMONES_QUERY = `
|
|
16
|
+
// Match pheromones scoped to project, optionally filtering by type
|
|
17
|
+
MATCH (p:Pheromone)
|
|
18
|
+
WHERE p.projectId = $projectId
|
|
19
|
+
AND ($types IS NULL OR size($types) = 0 OR p.type IN $types)
|
|
20
|
+
AND ($nodeIds IS NULL OR size($nodeIds) = 0 OR p.nodeId IN $nodeIds)
|
|
21
|
+
AND ($agentIds IS NULL OR size($agentIds) = 0 OR p.agentId IN $agentIds)
|
|
22
|
+
AND ($swarmId IS NULL OR p.swarmId = $swarmId)
|
|
23
|
+
AND ($excludeAgentId IS NULL OR p.agentId <> $excludeAgentId)
|
|
24
|
+
|
|
25
|
+
// Calculate current intensity with exponential decay
|
|
26
|
+
WITH p,
|
|
27
|
+
CASE
|
|
28
|
+
WHEN p.halfLife IS NULL OR p.halfLife <= 0 THEN p.intensity
|
|
29
|
+
ELSE p.intensity * exp(-0.693147 * (timestamp() - p.timestamp) / p.halfLife)
|
|
30
|
+
END AS currentIntensity
|
|
31
|
+
|
|
32
|
+
// Filter by minimum intensity
|
|
33
|
+
WHERE currentIntensity >= $minIntensity
|
|
34
|
+
|
|
35
|
+
// Find target by nodeId (self-healing - survives graph rebuilds)
|
|
36
|
+
OPTIONAL MATCH (target)
|
|
37
|
+
WHERE target.id = p.nodeId AND target.projectId = p.projectId
|
|
38
|
+
|
|
39
|
+
// Return pheromone data
|
|
40
|
+
RETURN
|
|
41
|
+
p.id AS id,
|
|
42
|
+
p.projectId AS projectId,
|
|
43
|
+
p.nodeId AS nodeId,
|
|
44
|
+
p.type AS type,
|
|
45
|
+
p.intensity AS originalIntensity,
|
|
46
|
+
currentIntensity,
|
|
47
|
+
p.agentId AS agentId,
|
|
48
|
+
p.swarmId AS swarmId,
|
|
49
|
+
p.timestamp AS timestamp,
|
|
50
|
+
p.data AS data,
|
|
51
|
+
p.halfLife AS halfLifeMs,
|
|
52
|
+
CASE WHEN target IS NOT NULL THEN labels(target)[0] ELSE null END AS targetType,
|
|
53
|
+
CASE WHEN target IS NOT NULL THEN target.name ELSE null END AS targetName,
|
|
54
|
+
CASE WHEN target IS NOT NULL THEN target.filePath ELSE null END AS targetFilePath
|
|
55
|
+
|
|
56
|
+
ORDER BY currentIntensity DESC, p.timestamp DESC
|
|
57
|
+
LIMIT toInteger($limit)
|
|
58
|
+
`;
|
|
59
|
+
/**
|
|
60
|
+
* Neo4j query to get pheromone summary statistics
|
|
61
|
+
*/
|
|
62
|
+
const PHEROMONE_STATS_QUERY = `
|
|
63
|
+
MATCH (p:Pheromone)
|
|
64
|
+
WHERE p.projectId = $projectId
|
|
65
|
+
WITH p,
|
|
66
|
+
CASE
|
|
67
|
+
WHEN p.halfLife IS NULL OR p.halfLife <= 0 THEN p.intensity
|
|
68
|
+
ELSE p.intensity * exp(-0.693147 * (timestamp() - p.timestamp) / p.halfLife)
|
|
69
|
+
END AS currentIntensity
|
|
70
|
+
WHERE currentIntensity >= $minIntensity
|
|
71
|
+
|
|
72
|
+
RETURN
|
|
73
|
+
p.type AS type,
|
|
74
|
+
count(p) AS count,
|
|
75
|
+
avg(currentIntensity) AS avgIntensity,
|
|
76
|
+
collect(DISTINCT p.agentId) AS agents
|
|
77
|
+
ORDER BY count DESC
|
|
78
|
+
`;
|
|
79
|
+
/**
|
|
80
|
+
* Neo4j query to clean up fully decayed pheromones for a project
|
|
81
|
+
*/
|
|
82
|
+
const CLEANUP_DECAYED_QUERY = `
|
|
83
|
+
MATCH (p:Pheromone)
|
|
84
|
+
WHERE p.projectId = $projectId
|
|
85
|
+
AND p.halfLife IS NOT NULL
|
|
86
|
+
AND p.halfLife > 0
|
|
87
|
+
AND p.intensity * exp(-0.693147 * (timestamp() - p.timestamp) / p.halfLife) < 0.01
|
|
88
|
+
DETACH DELETE p
|
|
89
|
+
RETURN count(p) AS cleaned
|
|
90
|
+
`;
|
|
91
|
+
export const createSwarmSenseTool = (server) => {
|
|
92
|
+
server.registerTool(TOOL_NAMES.swarmSense, {
|
|
93
|
+
title: TOOL_METADATA[TOOL_NAMES.swarmSense].title,
|
|
94
|
+
description: TOOL_METADATA[TOOL_NAMES.swarmSense].description,
|
|
95
|
+
inputSchema: {
|
|
96
|
+
projectId: z.string().describe('Project ID, name, or path (e.g., "backend" or "proj_a1b2c3d4e5f6")'),
|
|
97
|
+
types: z
|
|
98
|
+
.array(z.enum(PHEROMONE_TYPES))
|
|
99
|
+
.optional()
|
|
100
|
+
.describe('Filter by pheromone types. If empty, returns all types. Options: exploring, modifying, claiming, completed, warning, blocked, proposal, needs_review'),
|
|
101
|
+
nodeIds: z.array(z.string()).optional().describe('Filter by specific node IDs. If empty, searches all nodes.'),
|
|
102
|
+
agentIds: z
|
|
103
|
+
.array(z.string())
|
|
104
|
+
.optional()
|
|
105
|
+
.describe('Filter by specific agent IDs. If empty, returns pheromones from all agents.'),
|
|
106
|
+
swarmId: z.string().optional().describe('Filter by swarm ID. If empty, returns pheromones from all swarms.'),
|
|
107
|
+
excludeAgentId: z
|
|
108
|
+
.string()
|
|
109
|
+
.optional()
|
|
110
|
+
.describe('Exclude pheromones from this agent ID (useful for seeing what OTHER agents are doing)'),
|
|
111
|
+
minIntensity: z
|
|
112
|
+
.number()
|
|
113
|
+
.min(0)
|
|
114
|
+
.max(1)
|
|
115
|
+
.optional()
|
|
116
|
+
.default(0.3)
|
|
117
|
+
.describe('Minimum effective intensity after decay (0.0-1.0, default: 0.3)'),
|
|
118
|
+
limit: z
|
|
119
|
+
.number()
|
|
120
|
+
.int()
|
|
121
|
+
.min(1)
|
|
122
|
+
.max(500)
|
|
123
|
+
.optional()
|
|
124
|
+
.default(50)
|
|
125
|
+
.describe('Maximum number of pheromones to return (default: 50, max: 500)'),
|
|
126
|
+
includeStats: z.boolean().optional().default(false).describe('Include summary statistics by pheromone type'),
|
|
127
|
+
cleanup: z
|
|
128
|
+
.boolean()
|
|
129
|
+
.optional()
|
|
130
|
+
.default(false)
|
|
131
|
+
.describe('Run cleanup of fully decayed pheromones (intensity < 0.01)'),
|
|
132
|
+
},
|
|
133
|
+
}, async ({ projectId, types, nodeIds, agentIds, swarmId, excludeAgentId, minIntensity = 0.3, limit = 50, includeStats = false, cleanup = false, }) => {
|
|
134
|
+
const neo4jService = new Neo4jService();
|
|
135
|
+
// Resolve project ID
|
|
136
|
+
const projectResult = await resolveProjectIdOrError(projectId, neo4jService);
|
|
137
|
+
if (!projectResult.success) {
|
|
138
|
+
await neo4jService.close();
|
|
139
|
+
return projectResult.error;
|
|
140
|
+
}
|
|
141
|
+
const resolvedProjectId = projectResult.projectId;
|
|
142
|
+
try {
|
|
143
|
+
const result = {
|
|
144
|
+
pheromones: [],
|
|
145
|
+
projectId: resolvedProjectId,
|
|
146
|
+
query: {
|
|
147
|
+
types: types ?? null,
|
|
148
|
+
minIntensity,
|
|
149
|
+
limit,
|
|
150
|
+
},
|
|
151
|
+
};
|
|
152
|
+
// Run cleanup if requested
|
|
153
|
+
if (cleanup) {
|
|
154
|
+
const cleanupResult = await neo4jService.run(CLEANUP_DECAYED_QUERY, { projectId: resolvedProjectId });
|
|
155
|
+
result.cleaned = cleanupResult[0]?.cleaned ?? 0;
|
|
156
|
+
}
|
|
157
|
+
// Query pheromones (ensure limit is integer for Neo4j LIMIT clause)
|
|
158
|
+
const pheromones = await neo4jService.run(SENSE_PHEROMONES_QUERY, {
|
|
159
|
+
projectId: resolvedProjectId,
|
|
160
|
+
types: types ?? null,
|
|
161
|
+
nodeIds: nodeIds ?? null,
|
|
162
|
+
agentIds: agentIds ?? null,
|
|
163
|
+
swarmId: swarmId ?? null,
|
|
164
|
+
excludeAgentId: excludeAgentId ?? null,
|
|
165
|
+
minIntensity,
|
|
166
|
+
limit: Math.floor(limit),
|
|
167
|
+
});
|
|
168
|
+
result.pheromones = pheromones.map((p) => {
|
|
169
|
+
// Convert Neo4j Integer to JS number
|
|
170
|
+
const ts = typeof p.timestamp === 'object' && p.timestamp?.toNumber ? p.timestamp.toNumber() : p.timestamp;
|
|
171
|
+
return {
|
|
172
|
+
id: p.id,
|
|
173
|
+
projectId: p.projectId,
|
|
174
|
+
nodeId: p.nodeId,
|
|
175
|
+
type: p.type,
|
|
176
|
+
intensity: Math.round(p.currentIntensity * 1000) / 1000, // Round to 3 decimals
|
|
177
|
+
originalIntensity: p.originalIntensity,
|
|
178
|
+
agentId: p.agentId,
|
|
179
|
+
swarmId: p.swarmId,
|
|
180
|
+
timestamp: ts,
|
|
181
|
+
age: ts ? `${Math.round((Date.now() - ts) / 1000)}s ago` : null,
|
|
182
|
+
data: p.data ? JSON.parse(p.data) : null,
|
|
183
|
+
target: p.targetType
|
|
184
|
+
? {
|
|
185
|
+
type: p.targetType,
|
|
186
|
+
name: p.targetName,
|
|
187
|
+
filePath: p.targetFilePath,
|
|
188
|
+
}
|
|
189
|
+
: null,
|
|
190
|
+
};
|
|
191
|
+
});
|
|
192
|
+
// Include stats if requested
|
|
193
|
+
if (includeStats) {
|
|
194
|
+
const stats = await neo4jService.run(PHEROMONE_STATS_QUERY, { projectId: resolvedProjectId, minIntensity });
|
|
195
|
+
result.stats = stats.map((s) => ({
|
|
196
|
+
type: s.type,
|
|
197
|
+
count: typeof s.count === 'object' ? s.count.toNumber() : s.count,
|
|
198
|
+
avgIntensity: Math.round(s.avgIntensity * 1000) / 1000,
|
|
199
|
+
activeAgents: s.agents,
|
|
200
|
+
}));
|
|
201
|
+
}
|
|
202
|
+
return createSuccessResponse(JSON.stringify(result, null, 2));
|
|
203
|
+
}
|
|
204
|
+
catch (error) {
|
|
205
|
+
await debugLog('Swarm sense error', { error: String(error) });
|
|
206
|
+
return createErrorResponse(error instanceof Error ? error : String(error));
|
|
207
|
+
}
|
|
208
|
+
finally {
|
|
209
|
+
await neo4jService.close();
|
|
210
|
+
}
|
|
211
|
+
});
|
|
212
|
+
};
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunk Worker Pool
|
|
3
|
+
* Manages a pool of chunk workers for parallel parsing.
|
|
4
|
+
* Uses message passing (pull model): workers signal ready, coordinator sends chunks.
|
|
5
|
+
* Streams results as they complete for pipelined importing.
|
|
6
|
+
*/
|
|
7
|
+
import { cpus } from 'os';
|
|
8
|
+
import { dirname, join } from 'path';
|
|
9
|
+
import { fileURLToPath } from 'url';
|
|
10
|
+
import { Worker } from 'worker_threads';
|
|
11
|
+
import { debugLog } from '../utils.js';
|
|
12
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
13
|
+
const __dirname = dirname(__filename);
|
|
14
|
+
export class ChunkWorkerPool {
|
|
15
|
+
config;
|
|
16
|
+
workers = [];
|
|
17
|
+
chunkQueue = [];
|
|
18
|
+
totalChunks = 0;
|
|
19
|
+
completedChunks = 0;
|
|
20
|
+
totalNodes = 0;
|
|
21
|
+
totalEdges = 0;
|
|
22
|
+
totalFiles = 0;
|
|
23
|
+
startTime = 0;
|
|
24
|
+
resolve = null;
|
|
25
|
+
reject = null;
|
|
26
|
+
onChunkComplete = null;
|
|
27
|
+
pendingCallbacks = [];
|
|
28
|
+
isShuttingDown = false;
|
|
29
|
+
constructor(config) {
|
|
30
|
+
this.config = config;
|
|
31
|
+
process.on('exit', () => {
|
|
32
|
+
this.forceTerminateAll();
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Process chunks in parallel using worker pool.
|
|
37
|
+
* Calls onChunkComplete for EACH result as it arrives (for pipelined importing).
|
|
38
|
+
* Returns final stats when all chunks AND all callbacks are complete.
|
|
39
|
+
*/
|
|
40
|
+
async processChunks(chunks, onChunkComplete) {
|
|
41
|
+
this.startTime = Date.now();
|
|
42
|
+
this.totalChunks = chunks.length;
|
|
43
|
+
this.completedChunks = 0;
|
|
44
|
+
this.totalNodes = 0;
|
|
45
|
+
this.totalEdges = 0;
|
|
46
|
+
this.totalFiles = 0;
|
|
47
|
+
this.onChunkComplete = onChunkComplete;
|
|
48
|
+
this.pendingCallbacks = [];
|
|
49
|
+
this.chunkQueue = chunks.map((files, index) => ({
|
|
50
|
+
type: 'chunk',
|
|
51
|
+
chunkIndex: index,
|
|
52
|
+
totalChunks: chunks.length,
|
|
53
|
+
files,
|
|
54
|
+
}));
|
|
55
|
+
const numWorkers = this.config.numWorkers ?? Math.floor(cpus().length * 0.75);
|
|
56
|
+
const actualWorkers = Math.min(numWorkers, chunks.length);
|
|
57
|
+
debugLog(`Spawning ${actualWorkers} chunk workers for ${chunks.length} chunks`);
|
|
58
|
+
return new Promise((resolve, reject) => {
|
|
59
|
+
this.resolve = resolve;
|
|
60
|
+
this.reject = reject;
|
|
61
|
+
for (let i = 0; i < actualWorkers; i++) {
|
|
62
|
+
this.spawnWorker();
|
|
63
|
+
}
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
spawnWorker() {
|
|
67
|
+
const workerPath = join(__dirname, 'chunk.worker.js');
|
|
68
|
+
const workerConfig = {
|
|
69
|
+
projectPath: this.config.projectPath,
|
|
70
|
+
tsconfigPath: this.config.tsconfigPath,
|
|
71
|
+
projectId: this.config.projectId,
|
|
72
|
+
projectType: this.config.projectType,
|
|
73
|
+
};
|
|
74
|
+
const worker = new Worker(workerPath, {
|
|
75
|
+
workerData: workerConfig,
|
|
76
|
+
resourceLimits: {
|
|
77
|
+
maxOldGenerationSizeMb: 2048,
|
|
78
|
+
maxYoungGenerationSizeMb: 512,
|
|
79
|
+
},
|
|
80
|
+
});
|
|
81
|
+
const state = { worker, busy: false };
|
|
82
|
+
this.workers.push(state);
|
|
83
|
+
worker.on('message', (msg) => {
|
|
84
|
+
this.handleWorkerMessage(state, msg);
|
|
85
|
+
});
|
|
86
|
+
worker.on('error', (error) => {
|
|
87
|
+
debugLog('Worker error', { error: error.message });
|
|
88
|
+
this.reject?.(error);
|
|
89
|
+
void this.shutdown();
|
|
90
|
+
});
|
|
91
|
+
worker.on('exit', (code) => {
|
|
92
|
+
if (code !== 0 && this.completedChunks < this.totalChunks) {
|
|
93
|
+
this.reject?.(new Error(`Worker exited with code ${code}`));
|
|
94
|
+
void this.shutdown();
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
handleWorkerMessage(state, msg) {
|
|
99
|
+
switch (msg.type) {
|
|
100
|
+
case 'ready':
|
|
101
|
+
state.busy = false;
|
|
102
|
+
this.dispatchNextChunk(state);
|
|
103
|
+
break;
|
|
104
|
+
case 'result':
|
|
105
|
+
this.handleResult(msg);
|
|
106
|
+
break;
|
|
107
|
+
case 'error':
|
|
108
|
+
debugLog(`Chunk ${msg.chunkIndex} failed`, { error: msg.error });
|
|
109
|
+
this.reject?.(new Error(`Chunk ${msg.chunkIndex} failed: ${msg.error}`));
|
|
110
|
+
void this.shutdown();
|
|
111
|
+
break;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
handleResult(msg) {
|
|
115
|
+
this.completedChunks++;
|
|
116
|
+
this.totalNodes += msg.nodes.length;
|
|
117
|
+
this.totalEdges += msg.edges.length;
|
|
118
|
+
this.totalFiles += msg.filesProcessed;
|
|
119
|
+
const result = {
|
|
120
|
+
chunkIndex: msg.chunkIndex,
|
|
121
|
+
nodes: msg.nodes,
|
|
122
|
+
edges: msg.edges,
|
|
123
|
+
filesProcessed: msg.filesProcessed,
|
|
124
|
+
sharedContext: msg.sharedContext,
|
|
125
|
+
deferredEdges: msg.deferredEdges,
|
|
126
|
+
};
|
|
127
|
+
const stats = this.getStats();
|
|
128
|
+
// Fire callback immediately - enables pipelined importing
|
|
129
|
+
if (this.onChunkComplete) {
|
|
130
|
+
const callbackPromise = this.onChunkComplete(result, stats).catch((err) => {
|
|
131
|
+
debugLog(`Import callback failed for chunk ${msg.chunkIndex}`, {
|
|
132
|
+
error: err instanceof Error ? err.message : String(err),
|
|
133
|
+
});
|
|
134
|
+
throw err;
|
|
135
|
+
});
|
|
136
|
+
this.pendingCallbacks.push(callbackPromise);
|
|
137
|
+
}
|
|
138
|
+
// Check if all parsing is done
|
|
139
|
+
if (this.completedChunks === this.totalChunks) {
|
|
140
|
+
this.completeWhenCallbacksDone();
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
async completeWhenCallbacksDone() {
|
|
144
|
+
try {
|
|
145
|
+
await Promise.all(this.pendingCallbacks);
|
|
146
|
+
}
|
|
147
|
+
catch (error) {
|
|
148
|
+
this.reject?.(error instanceof Error ? error : new Error(String(error)));
|
|
149
|
+
await this.shutdown();
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
152
|
+
await this.shutdown();
|
|
153
|
+
this.resolve?.(this.getStats());
|
|
154
|
+
}
|
|
155
|
+
getStats() {
|
|
156
|
+
return {
|
|
157
|
+
totalNodes: this.totalNodes,
|
|
158
|
+
totalEdges: this.totalEdges,
|
|
159
|
+
totalFiles: this.totalFiles,
|
|
160
|
+
chunksCompleted: this.completedChunks,
|
|
161
|
+
totalChunks: this.totalChunks,
|
|
162
|
+
elapsedMs: Date.now() - this.startTime,
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
dispatchNextChunk(state) {
|
|
166
|
+
if (this.chunkQueue.length === 0) {
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
const chunk = this.chunkQueue.shift();
|
|
170
|
+
state.busy = true;
|
|
171
|
+
state.worker.postMessage(chunk);
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Graceful shutdown - lets workers finish cleanup
|
|
175
|
+
* Call this on normal completion
|
|
176
|
+
*/
|
|
177
|
+
async shutdown() {
|
|
178
|
+
if (this.isShuttingDown)
|
|
179
|
+
return;
|
|
180
|
+
this.isShuttingDown = true;
|
|
181
|
+
const exitPromises = this.workers.map(({ worker }) => {
|
|
182
|
+
return new Promise((resolve) => {
|
|
183
|
+
worker.on('exit', () => resolve());
|
|
184
|
+
worker.postMessage({ type: 'terminate' });
|
|
185
|
+
});
|
|
186
|
+
});
|
|
187
|
+
await Promise.race([Promise.all(exitPromises), new Promise((resolve) => setTimeout(resolve, 15000))]);
|
|
188
|
+
this.forceTerminateAll();
|
|
189
|
+
}
|
|
190
|
+
forceTerminateAll() {
|
|
191
|
+
for (const { worker } of this.workers) {
|
|
192
|
+
worker.terminate();
|
|
193
|
+
}
|
|
194
|
+
this.workers = [];
|
|
195
|
+
}
|
|
196
|
+
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunk Worker
|
|
3
|
+
* Receives file chunks from coordinator, parses them, returns nodes/edges.
|
|
4
|
+
* Each worker creates its own parser with lazyLoad=true for memory efficiency.
|
|
5
|
+
*/
|
|
6
|
+
import { dirname, join } from 'path';
|
|
7
|
+
import { fileURLToPath } from 'url';
|
|
8
|
+
import { parentPort, workerData } from 'worker_threads';
|
|
9
|
+
// Load environment variables in worker thread
|
|
10
|
+
import dotenv from 'dotenv';
|
|
11
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
12
|
+
const __dirname = dirname(__filename);
|
|
13
|
+
dotenv.config({ path: join(__dirname, '..', '..', '..', '.env') });
|
|
14
|
+
import { ParserFactory } from '../../core/parsers/parser-factory.js';
|
|
15
|
+
const config = workerData;
|
|
16
|
+
let parser = null;
|
|
17
|
+
const sendReady = () => {
|
|
18
|
+
const msg = { type: 'ready' };
|
|
19
|
+
parentPort?.postMessage(msg);
|
|
20
|
+
};
|
|
21
|
+
const sendResult = (result) => {
|
|
22
|
+
const msg = { type: 'result', ...result };
|
|
23
|
+
parentPort?.postMessage(msg);
|
|
24
|
+
};
|
|
25
|
+
const sendError = (chunkIndex, error) => {
|
|
26
|
+
const msg = {
|
|
27
|
+
type: 'error',
|
|
28
|
+
chunkIndex,
|
|
29
|
+
error: error.message,
|
|
30
|
+
stack: error.stack,
|
|
31
|
+
};
|
|
32
|
+
parentPort?.postMessage(msg);
|
|
33
|
+
};
|
|
34
|
+
/**
|
|
35
|
+
* Initialize parser lazily on first chunk.
|
|
36
|
+
* Uses lazyLoad=true so parser only loads files we give it.
|
|
37
|
+
* projectType is already resolved by coordinator (no auto-detection here).
|
|
38
|
+
*/
|
|
39
|
+
const initParser = () => {
|
|
40
|
+
if (parser)
|
|
41
|
+
return parser;
|
|
42
|
+
parser = ParserFactory.createParser({
|
|
43
|
+
workspacePath: config.projectPath,
|
|
44
|
+
tsConfigPath: config.tsconfigPath,
|
|
45
|
+
projectType: config.projectType,
|
|
46
|
+
projectId: config.projectId,
|
|
47
|
+
lazyLoad: true, // Critical: only load files we're given
|
|
48
|
+
});
|
|
49
|
+
// Defer edge enhancements - coordinator will handle after all chunks complete
|
|
50
|
+
parser.setDeferEdgeEnhancements(true);
|
|
51
|
+
return parser;
|
|
52
|
+
};
|
|
53
|
+
const processChunk = async (files, chunkIndex) => {
|
|
54
|
+
try {
|
|
55
|
+
const p = initParser();
|
|
56
|
+
// Clear any accumulated data from previous chunks
|
|
57
|
+
p.clearParsedData();
|
|
58
|
+
// Parse chunk - skip deferred edge resolution (coordinator handles that)
|
|
59
|
+
const { nodes, edges } = await p.parseChunk(files, true);
|
|
60
|
+
// Get serialized shared context for merging in coordinator
|
|
61
|
+
const sharedContext = p.getSerializedSharedContext();
|
|
62
|
+
// Get deferred edges for cross-chunk resolution
|
|
63
|
+
const deferredEdges = p.getDeferredEdges();
|
|
64
|
+
sendResult({
|
|
65
|
+
chunkIndex,
|
|
66
|
+
nodes,
|
|
67
|
+
edges,
|
|
68
|
+
filesProcessed: files.length,
|
|
69
|
+
sharedContext,
|
|
70
|
+
deferredEdges,
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
catch (error) {
|
|
74
|
+
sendError(chunkIndex, error instanceof Error ? error : new Error(String(error)));
|
|
75
|
+
}
|
|
76
|
+
};
|
|
77
|
+
parentPort?.on('message', async (msg) => {
|
|
78
|
+
switch (msg.type) {
|
|
79
|
+
case 'chunk':
|
|
80
|
+
await processChunk(msg.files, msg.chunkIndex);
|
|
81
|
+
sendReady();
|
|
82
|
+
break;
|
|
83
|
+
case 'terminate':
|
|
84
|
+
parser?.clearParsedData();
|
|
85
|
+
process.exit(0);
|
|
86
|
+
break;
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
sendReady();
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parse Coordinator
|
|
3
|
+
* Runs TypeScript parsing in a separate thread to avoid blocking the MCP server.
|
|
4
|
+
* For large projects, spawns a worker pool for parallel chunk parsing.
|
|
5
|
+
*/
|
|
6
|
+
import { dirname, join } from 'path';
|
|
7
|
+
import { fileURLToPath } from 'url';
|
|
8
|
+
import { parentPort, workerData } from 'worker_threads';
|
|
9
|
+
// Load environment variables in worker thread
|
|
10
|
+
import dotenv from 'dotenv';
|
|
11
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
12
|
+
const __dirname = dirname(__filename);
|
|
13
|
+
dotenv.config({ path: join(__dirname, '..', '..', '..', '.env') });
|
|
14
|
+
import { EmbeddingsService } from '../../core/embeddings/embeddings.service.js';
|
|
15
|
+
import { ParserFactory } from '../../core/parsers/parser-factory.js';
|
|
16
|
+
import { WorkspaceParser } from '../../core/parsers/workspace-parser.js';
|
|
17
|
+
import { debugLog } from '../../core/utils/file-utils.js';
|
|
18
|
+
import { getProjectName, UPSERT_PROJECT_QUERY, UPDATE_PROJECT_STATUS_QUERY } from '../../core/utils/project-id.js';
|
|
19
|
+
import { WorkspaceDetector } from '../../core/workspace/index.js';
|
|
20
|
+
import { Neo4jService, QUERIES } from '../../storage/neo4j/neo4j.service.js';
|
|
21
|
+
import { PARSING } from '../constants.js';
|
|
22
|
+
import { GraphGeneratorHandler } from '../handlers/graph-generator.handler.js';
|
|
23
|
+
import { ParallelImportHandler } from '../handlers/parallel-import.handler.js';
|
|
24
|
+
import { StreamingImportHandler } from '../handlers/streaming-import.handler.js';
|
|
25
|
+
const sendMessage = (msg) => {
|
|
26
|
+
parentPort?.postMessage(msg);
|
|
27
|
+
};
|
|
28
|
+
const sendProgress = (phase, filesProcessed, filesTotal, nodesImported, edgesImported, currentChunk, totalChunks) => {
|
|
29
|
+
sendMessage({
|
|
30
|
+
type: 'progress',
|
|
31
|
+
data: { phase, filesProcessed, filesTotal, nodesImported, edgesImported, currentChunk, totalChunks },
|
|
32
|
+
});
|
|
33
|
+
};
|
|
34
|
+
const runParser = async () => {
|
|
35
|
+
const config = workerData;
|
|
36
|
+
const startTime = Date.now();
|
|
37
|
+
let resolvedProjectId = config.projectId;
|
|
38
|
+
let neo4jService = null;
|
|
39
|
+
try {
|
|
40
|
+
sendProgress('discovery', 0, 0, 0, 0, 0, 0);
|
|
41
|
+
neo4jService = new Neo4jService();
|
|
42
|
+
const embeddingsService = new EmbeddingsService();
|
|
43
|
+
const graphGeneratorHandler = new GraphGeneratorHandler(neo4jService, embeddingsService);
|
|
44
|
+
const lazyLoad = true;
|
|
45
|
+
const workspaceDetector = new WorkspaceDetector();
|
|
46
|
+
await debugLog('Detecting workspace', { projectPath: config.projectPath });
|
|
47
|
+
const workspaceConfig = await workspaceDetector.detect(config.projectPath);
|
|
48
|
+
await debugLog('Workspace detection result', {
|
|
49
|
+
type: workspaceConfig.type,
|
|
50
|
+
rootPath: workspaceConfig.rootPath,
|
|
51
|
+
packageCount: workspaceConfig.packages.length,
|
|
52
|
+
});
|
|
53
|
+
let detectedProjectType;
|
|
54
|
+
if (config.projectType === 'auto') {
|
|
55
|
+
detectedProjectType = await ParserFactory.detectProjectType(config.projectPath);
|
|
56
|
+
await debugLog('Auto-detected project type', { projectType: detectedProjectType });
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
detectedProjectType = config.projectType;
|
|
60
|
+
}
|
|
61
|
+
let parser;
|
|
62
|
+
if (workspaceConfig.type !== 'single' && workspaceConfig.packages.length > 1) {
|
|
63
|
+
await debugLog('Using WorkspaceParser', {
|
|
64
|
+
type: workspaceConfig.type,
|
|
65
|
+
packageCount: workspaceConfig.packages.length,
|
|
66
|
+
});
|
|
67
|
+
parser = new WorkspaceParser(workspaceConfig, config.projectId, lazyLoad, detectedProjectType);
|
|
68
|
+
resolvedProjectId = parser.getProjectId();
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
await debugLog('Using single project mode');
|
|
72
|
+
parser = ParserFactory.createParser({
|
|
73
|
+
workspacePath: config.projectPath,
|
|
74
|
+
tsConfigPath: config.tsconfigPath,
|
|
75
|
+
projectType: detectedProjectType,
|
|
76
|
+
projectId: config.projectId,
|
|
77
|
+
lazyLoad,
|
|
78
|
+
});
|
|
79
|
+
resolvedProjectId = parser.getProjectId();
|
|
80
|
+
}
|
|
81
|
+
const sourceFiles = await parser.discoverSourceFiles();
|
|
82
|
+
const totalFiles = sourceFiles.length;
|
|
83
|
+
const chunkSize = config.chunkSize > 0 ? config.chunkSize : PARSING.defaultChunkSize;
|
|
84
|
+
graphGeneratorHandler.setProjectId(resolvedProjectId);
|
|
85
|
+
await neo4jService.run(QUERIES.CLEAR_PROJECT, { projectId: resolvedProjectId });
|
|
86
|
+
const projectName = await getProjectName(config.projectPath);
|
|
87
|
+
await neo4jService.run(UPSERT_PROJECT_QUERY, {
|
|
88
|
+
projectId: resolvedProjectId,
|
|
89
|
+
name: projectName,
|
|
90
|
+
path: config.projectPath,
|
|
91
|
+
status: 'parsing',
|
|
92
|
+
});
|
|
93
|
+
await debugLog('Project node created', { projectId: resolvedProjectId, name: projectName });
|
|
94
|
+
let totalNodesImported = 0;
|
|
95
|
+
let totalEdgesImported = 0;
|
|
96
|
+
const onProgress = async (progress) => {
|
|
97
|
+
sendProgress(progress.phase, progress.current, progress.total, progress.details?.nodesCreated ?? 0, progress.details?.edgesCreated ?? 0, progress.details?.chunkIndex ?? 0, progress.details?.totalChunks ?? 0);
|
|
98
|
+
};
|
|
99
|
+
const useParallel = totalFiles >= PARSING.parallelThreshold;
|
|
100
|
+
if (useParallel) {
|
|
101
|
+
await debugLog('Using parallel parsing', { totalFiles });
|
|
102
|
+
const parallelHandler = new ParallelImportHandler(graphGeneratorHandler);
|
|
103
|
+
const result = await parallelHandler.importProjectParallel(parser, sourceFiles, {
|
|
104
|
+
chunkSize,
|
|
105
|
+
projectId: resolvedProjectId,
|
|
106
|
+
projectPath: config.projectPath,
|
|
107
|
+
tsconfigPath: config.tsconfigPath,
|
|
108
|
+
projectType: detectedProjectType,
|
|
109
|
+
onProgress,
|
|
110
|
+
});
|
|
111
|
+
totalNodesImported = result.nodesImported;
|
|
112
|
+
totalEdgesImported = result.edgesImported;
|
|
113
|
+
}
|
|
114
|
+
else {
|
|
115
|
+
await debugLog('Using sequential parsing', { totalFiles });
|
|
116
|
+
const streamingHandler = new StreamingImportHandler(graphGeneratorHandler);
|
|
117
|
+
const result = await streamingHandler.importProjectStreaming(parser, {
|
|
118
|
+
chunkSize,
|
|
119
|
+
projectId: resolvedProjectId,
|
|
120
|
+
onProgress,
|
|
121
|
+
});
|
|
122
|
+
totalNodesImported = result.nodesImported;
|
|
123
|
+
totalEdgesImported = result.edgesImported;
|
|
124
|
+
}
|
|
125
|
+
await neo4jService.run(UPDATE_PROJECT_STATUS_QUERY, {
|
|
126
|
+
projectId: resolvedProjectId,
|
|
127
|
+
status: 'complete',
|
|
128
|
+
nodeCount: totalNodesImported,
|
|
129
|
+
edgeCount: totalEdgesImported,
|
|
130
|
+
});
|
|
131
|
+
await debugLog('Project node updated', {
|
|
132
|
+
projectId: resolvedProjectId,
|
|
133
|
+
status: 'complete',
|
|
134
|
+
nodeCount: totalNodesImported,
|
|
135
|
+
edgeCount: totalEdgesImported,
|
|
136
|
+
});
|
|
137
|
+
sendMessage({
|
|
138
|
+
type: 'complete',
|
|
139
|
+
data: {
|
|
140
|
+
nodesImported: totalNodesImported,
|
|
141
|
+
edgesImported: totalEdgesImported,
|
|
142
|
+
elapsedMs: Date.now() - startTime,
|
|
143
|
+
},
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
catch (error) {
|
|
147
|
+
try {
|
|
148
|
+
const serviceForUpdate = neo4jService ?? new Neo4jService();
|
|
149
|
+
await serviceForUpdate.run(UPDATE_PROJECT_STATUS_QUERY, {
|
|
150
|
+
projectId: resolvedProjectId,
|
|
151
|
+
status: 'failed',
|
|
152
|
+
nodeCount: 0,
|
|
153
|
+
edgeCount: 0,
|
|
154
|
+
});
|
|
155
|
+
if (!neo4jService) {
|
|
156
|
+
await serviceForUpdate.close();
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
catch {
|
|
160
|
+
// Ignore errors updating project status on failure
|
|
161
|
+
}
|
|
162
|
+
sendMessage({
|
|
163
|
+
type: 'error',
|
|
164
|
+
error: error.message ?? String(error),
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
finally {
|
|
168
|
+
if (neo4jService) {
|
|
169
|
+
try {
|
|
170
|
+
await neo4jService.close();
|
|
171
|
+
}
|
|
172
|
+
catch {
|
|
173
|
+
// Ignore cleanup errors
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
};
|
|
178
|
+
runParser().catch((err) => {
|
|
179
|
+
sendMessage({
|
|
180
|
+
type: 'error',
|
|
181
|
+
error: err.message ?? String(err),
|
|
182
|
+
});
|
|
183
|
+
});
|