code-graph-context 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +221 -101
- package/dist/core/config/fairsquare-framework-schema.js +47 -60
- package/dist/core/config/nestjs-framework-schema.js +11 -1
- package/dist/core/config/schema.js +1 -1
- package/dist/core/config/timeouts.js +27 -0
- package/dist/core/embeddings/embeddings.service.js +122 -2
- package/dist/core/embeddings/natural-language-to-cypher.service.js +428 -30
- package/dist/core/parsers/parser-factory.js +6 -6
- package/dist/core/parsers/typescript-parser.js +639 -44
- package/dist/core/parsers/workspace-parser.js +553 -0
- package/dist/core/utils/edge-factory.js +37 -0
- package/dist/core/utils/file-change-detection.js +105 -0
- package/dist/core/utils/file-utils.js +20 -0
- package/dist/core/utils/index.js +3 -0
- package/dist/core/utils/path-utils.js +75 -0
- package/dist/core/utils/progress-reporter.js +112 -0
- package/dist/core/utils/project-id.js +176 -0
- package/dist/core/utils/retry.js +41 -0
- package/dist/core/workspace/index.js +4 -0
- package/dist/core/workspace/workspace-detector.js +221 -0
- package/dist/mcp/constants.js +172 -7
- package/dist/mcp/handlers/cross-file-edge.helpers.js +19 -0
- package/dist/mcp/handlers/file-change-detection.js +105 -0
- package/dist/mcp/handlers/graph-generator.handler.js +97 -32
- package/dist/mcp/handlers/incremental-parse.handler.js +146 -0
- package/dist/mcp/handlers/streaming-import.handler.js +210 -0
- package/dist/mcp/handlers/traversal.handler.js +130 -71
- package/dist/mcp/mcp.server.js +46 -7
- package/dist/mcp/service-init.js +79 -0
- package/dist/mcp/services/job-manager.js +165 -0
- package/dist/mcp/services/watch-manager.js +376 -0
- package/dist/mcp/services.js +48 -127
- package/dist/mcp/tools/check-parse-status.tool.js +64 -0
- package/dist/mcp/tools/impact-analysis.tool.js +319 -0
- package/dist/mcp/tools/index.js +15 -1
- package/dist/mcp/tools/list-projects.tool.js +62 -0
- package/dist/mcp/tools/list-watchers.tool.js +51 -0
- package/dist/mcp/tools/natural-language-to-cypher.tool.js +34 -8
- package/dist/mcp/tools/parse-typescript-project.tool.js +325 -60
- package/dist/mcp/tools/search-codebase.tool.js +57 -23
- package/dist/mcp/tools/start-watch-project.tool.js +100 -0
- package/dist/mcp/tools/stop-watch-project.tool.js +49 -0
- package/dist/mcp/tools/traverse-from-node.tool.js +68 -9
- package/dist/mcp/utils.js +35 -12
- package/dist/mcp/workers/parse-worker.js +198 -0
- package/dist/storage/neo4j/neo4j.service.js +273 -34
- package/package.json +4 -2
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
* Handles importing parsed graph data into Neo4j with embeddings
|
|
4
4
|
*/
|
|
5
5
|
import fs from 'fs/promises';
|
|
6
|
+
import { EMBEDDING_BATCH_CONFIG } from '../../core/embeddings/embeddings.service.js';
|
|
6
7
|
import { QUERIES } from '../../storage/neo4j/neo4j.service.js';
|
|
7
8
|
import { DEFAULTS } from '../constants.js';
|
|
8
9
|
import { debugLog } from '../utils.js';
|
|
@@ -10,13 +11,20 @@ export class GraphGeneratorHandler {
|
|
|
10
11
|
neo4jService;
|
|
11
12
|
embeddingsService;
|
|
12
13
|
static EMBEDDED_LABEL = 'Embedded';
|
|
14
|
+
projectId = null;
|
|
13
15
|
constructor(neo4jService, embeddingsService) {
|
|
14
16
|
this.neo4jService = neo4jService;
|
|
15
17
|
this.embeddingsService = embeddingsService;
|
|
16
18
|
}
|
|
19
|
+
/**
|
|
20
|
+
* Set the projectId for project-scoped operations
|
|
21
|
+
*/
|
|
22
|
+
setProjectId(projectId) {
|
|
23
|
+
this.projectId = projectId;
|
|
24
|
+
}
|
|
17
25
|
async generateGraph(graphJsonPath, batchSize = DEFAULTS.batchSize, clearExisting = true) {
|
|
18
26
|
console.log(`Generating graph from JSON file: ${graphJsonPath}`);
|
|
19
|
-
await debugLog('Starting graph generation', { graphJsonPath, batchSize, clearExisting });
|
|
27
|
+
await debugLog('Starting graph generation', { graphJsonPath, batchSize, clearExisting, projectId: this.projectId });
|
|
20
28
|
try {
|
|
21
29
|
const graphData = await this.loadGraphData(graphJsonPath);
|
|
22
30
|
const { nodes, edges, metadata } = graphData;
|
|
@@ -25,6 +33,7 @@ export class GraphGeneratorHandler {
|
|
|
25
33
|
if (clearExisting) {
|
|
26
34
|
await this.clearExistingData();
|
|
27
35
|
}
|
|
36
|
+
await this.createProjectIndexes();
|
|
28
37
|
await this.importNodes(nodes, batchSize);
|
|
29
38
|
await this.importEdges(edges, batchSize);
|
|
30
39
|
await this.createVectorIndexes();
|
|
@@ -47,9 +56,24 @@ export class GraphGeneratorHandler {
|
|
|
47
56
|
return JSON.parse(fileContent);
|
|
48
57
|
}
|
|
49
58
|
async clearExistingData() {
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
59
|
+
if (this.projectId) {
|
|
60
|
+
console.log(`Clearing existing graph data for project: ${this.projectId}...`);
|
|
61
|
+
await this.neo4jService.run(QUERIES.CLEAR_PROJECT, { projectId: this.projectId });
|
|
62
|
+
await debugLog('Existing project graph data cleared', { projectId: this.projectId });
|
|
63
|
+
}
|
|
64
|
+
else {
|
|
65
|
+
console.log('Clearing ALL existing graph data (no projectId set)...');
|
|
66
|
+
await this.neo4jService.run(QUERIES.CLEAR_DATABASE);
|
|
67
|
+
await debugLog('Existing graph data cleared');
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
async createProjectIndexes() {
|
|
71
|
+
console.log('Creating project indexes...');
|
|
72
|
+
await this.neo4jService.run(QUERIES.CREATE_PROJECT_INDEX_EMBEDDED);
|
|
73
|
+
await this.neo4jService.run(QUERIES.CREATE_PROJECT_INDEX_SOURCEFILE);
|
|
74
|
+
await this.neo4jService.run(QUERIES.CREATE_PROJECT_ID_INDEX_EMBEDDED);
|
|
75
|
+
await this.neo4jService.run(QUERIES.CREATE_PROJECT_ID_INDEX_SOURCEFILE);
|
|
76
|
+
await debugLog('Project indexes created');
|
|
53
77
|
}
|
|
54
78
|
async importNodes(nodes, batchSize) {
|
|
55
79
|
console.log(`Importing ${nodes.length} nodes with embeddings...`);
|
|
@@ -65,18 +89,72 @@ export class GraphGeneratorHandler {
|
|
|
65
89
|
});
|
|
66
90
|
}
|
|
67
91
|
}
|
|
92
|
+
/**
|
|
93
|
+
* Process a batch of nodes with batched embedding calls.
|
|
94
|
+
* Collects all texts needing embedding, makes a single batched API call,
|
|
95
|
+
* then maps embeddings back to their respective nodes.
|
|
96
|
+
*/
|
|
68
97
|
async processNodeBatch(nodes) {
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
98
|
+
// Separate nodes that need embedding from those that don't
|
|
99
|
+
const nodesNeedingEmbedding = [];
|
|
100
|
+
const nodeResults = new Array(nodes.length);
|
|
101
|
+
// First pass: identify nodes needing embedding and prepare texts
|
|
102
|
+
nodes.forEach((node, index) => {
|
|
103
|
+
if (node.properties?.sourceCode && !node.skipEmbedding) {
|
|
104
|
+
// Truncate to stay under embedding model's 8192 token limit (~4 chars/token)
|
|
105
|
+
const truncatedCode = node.properties.sourceCode.slice(0, DEFAULTS.maxEmbeddingChars);
|
|
106
|
+
// Include node name and type in embedding for better search matching
|
|
107
|
+
// e.g., "ProfileService ClassDeclaration" helps "profile service" queries match
|
|
108
|
+
const metadata = `${node.properties.name ?? ''} ${node.labels?.join(' ') ?? ''}`.trim();
|
|
109
|
+
const embeddingText = metadata ? `${metadata}\n${truncatedCode}` : truncatedCode;
|
|
110
|
+
nodesNeedingEmbedding.push({
|
|
111
|
+
node,
|
|
112
|
+
index,
|
|
113
|
+
text: embeddingText,
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
else {
|
|
117
|
+
// Node doesn't need embedding - prepare it immediately
|
|
118
|
+
nodeResults[index] = {
|
|
119
|
+
...node,
|
|
120
|
+
labels: node.labels,
|
|
121
|
+
properties: {
|
|
122
|
+
...this.flattenProperties(node.properties),
|
|
123
|
+
embedding: null,
|
|
124
|
+
},
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
});
|
|
128
|
+
// Batch embed all texts that need it
|
|
129
|
+
if (nodesNeedingEmbedding.length > 0) {
|
|
130
|
+
const texts = nodesNeedingEmbedding.map((n) => n.text);
|
|
131
|
+
try {
|
|
132
|
+
const embeddings = await this.embeddingsService.embedTextsInBatches(texts, EMBEDDING_BATCH_CONFIG.maxBatchSize);
|
|
133
|
+
// Map embeddings back to their nodes
|
|
134
|
+
nodesNeedingEmbedding.forEach((item, i) => {
|
|
135
|
+
const embedding = embeddings[i];
|
|
136
|
+
nodeResults[item.index] = {
|
|
137
|
+
...item.node,
|
|
138
|
+
labels: embedding ? [...item.node.labels, GraphGeneratorHandler.EMBEDDED_LABEL] : item.node.labels,
|
|
139
|
+
properties: {
|
|
140
|
+
...this.flattenProperties(item.node.properties),
|
|
141
|
+
embedding,
|
|
142
|
+
},
|
|
143
|
+
};
|
|
144
|
+
});
|
|
145
|
+
await debugLog('Batch embedding completed', {
|
|
146
|
+
totalNodes: nodes.length,
|
|
147
|
+
nodesEmbedded: nodesNeedingEmbedding.length,
|
|
148
|
+
batchesUsed: Math.ceil(texts.length / EMBEDDING_BATCH_CONFIG.maxBatchSize),
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
catch (error) {
|
|
152
|
+
// DON'T silently continue - propagate the error so user knows what's wrong
|
|
153
|
+
await debugLog('Embedding failed', { error: error instanceof Error ? error.message : String(error) });
|
|
154
|
+
throw error;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
return nodeResults;
|
|
80
158
|
}
|
|
81
159
|
async importEdges(edges, batchSize) {
|
|
82
160
|
console.log(`Importing ${edges.length} edges using APOC...`);
|
|
@@ -85,7 +163,10 @@ export class GraphGeneratorHandler {
|
|
|
85
163
|
...edge,
|
|
86
164
|
properties: this.flattenProperties(edge.properties),
|
|
87
165
|
}));
|
|
88
|
-
const result = await this.neo4jService.run(QUERIES.CREATE_RELATIONSHIP, {
|
|
166
|
+
const result = await this.neo4jService.run(QUERIES.CREATE_RELATIONSHIP, {
|
|
167
|
+
edges: batch,
|
|
168
|
+
projectId: this.projectId,
|
|
169
|
+
});
|
|
89
170
|
const batchEnd = Math.min(i + batchSize, edges.length);
|
|
90
171
|
console.log(`Created ${result[0].created} edges in batch ${i + 1}-${batchEnd}`);
|
|
91
172
|
await debugLog('Edge batch imported', {
|
|
@@ -100,22 +181,6 @@ export class GraphGeneratorHandler {
|
|
|
100
181
|
await this.neo4jService.run(QUERIES.CREATE_EMBEDDED_VECTOR_INDEX);
|
|
101
182
|
await debugLog('Vector indexes created');
|
|
102
183
|
}
|
|
103
|
-
async embedNodeSourceCode(node) {
|
|
104
|
-
if (!node.properties?.sourceCode || node.skipEmbedding) {
|
|
105
|
-
return null;
|
|
106
|
-
}
|
|
107
|
-
try {
|
|
108
|
-
const sourceCode = node.properties.sourceCode;
|
|
109
|
-
const embedding = await this.embeddingsService.embedText(sourceCode);
|
|
110
|
-
await debugLog('Node embedded', { nodeId: node.id, codeLength: sourceCode.length });
|
|
111
|
-
return embedding;
|
|
112
|
-
}
|
|
113
|
-
catch (error) {
|
|
114
|
-
console.warn(`Failed to embed node ${node.id}:`, error);
|
|
115
|
-
await debugLog('Embedding failed', { nodeId: node.id, error });
|
|
116
|
-
return null;
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
184
|
flattenProperties(properties) {
|
|
120
185
|
const flattened = {};
|
|
121
186
|
for (const [key, value] of Object.entries(properties)) {
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Incremental Parse Handler
|
|
3
|
+
* Handles incremental graph updates triggered by file watchers
|
|
4
|
+
*/
|
|
5
|
+
import { writeFileSync, unlinkSync } from 'fs';
|
|
6
|
+
import { join } from 'path';
|
|
7
|
+
import { CORE_TYPESCRIPT_SCHEMA } from '../../core/config/schema.js';
|
|
8
|
+
import { EmbeddingsService } from '../../core/embeddings/embeddings.service.js';
|
|
9
|
+
import { ParserFactory } from '../../core/parsers/parser-factory.js';
|
|
10
|
+
import { detectChangedFiles } from '../../core/utils/file-change-detection.js';
|
|
11
|
+
import { resolveProjectId, getProjectName, UPSERT_PROJECT_QUERY } from '../../core/utils/project-id.js';
|
|
12
|
+
import { Neo4jService, QUERIES } from '../../storage/neo4j/neo4j.service.js';
|
|
13
|
+
import { DEFAULTS, FILE_PATHS, LOG_CONFIG } from '../constants.js';
|
|
14
|
+
import { debugLog } from '../utils.js';
|
|
15
|
+
import { deleteSourceFileSubgraphs, loadExistingNodesForEdgeDetection, getCrossFileEdges, } from './cross-file-edge.helpers.js';
|
|
16
|
+
import { GraphGeneratorHandler } from './graph-generator.handler.js';
|
|
17
|
+
/**
|
|
18
|
+
* Performs incremental parsing for a project
|
|
19
|
+
* This is used by the WatchManager when files change
|
|
20
|
+
*/
|
|
21
|
+
export const performIncrementalParse = async (projectPath, projectId, tsconfigPath) => {
|
|
22
|
+
const neo4jService = new Neo4jService();
|
|
23
|
+
const embeddingsService = new EmbeddingsService();
|
|
24
|
+
const graphHandler = new GraphGeneratorHandler(neo4jService, embeddingsService);
|
|
25
|
+
try {
|
|
26
|
+
await debugLog('Incremental parse started (watch)', { projectPath, projectId });
|
|
27
|
+
// Resolve project ID
|
|
28
|
+
const resolvedId = resolveProjectId(projectPath, projectId);
|
|
29
|
+
const projectName = await getProjectName(projectPath);
|
|
30
|
+
// Create parser with auto-detection and lazy loading enabled for memory efficiency
|
|
31
|
+
const parser = await ParserFactory.createParserWithAutoDetection(projectPath, tsconfigPath, resolvedId, true);
|
|
32
|
+
// Detect changed files
|
|
33
|
+
const { filesToReparse, filesToDelete } = await detectChangedFiles(projectPath, neo4jService, resolvedId);
|
|
34
|
+
await debugLog('Watch incremental change detection', {
|
|
35
|
+
filesToReparse: filesToReparse.length,
|
|
36
|
+
filesToDelete: filesToDelete.length,
|
|
37
|
+
});
|
|
38
|
+
// If no changes, return early
|
|
39
|
+
if (filesToReparse.length === 0 && filesToDelete.length === 0) {
|
|
40
|
+
await debugLog('Watch incremental: no changes detected');
|
|
41
|
+
return {
|
|
42
|
+
nodesUpdated: 0,
|
|
43
|
+
edgesUpdated: 0,
|
|
44
|
+
filesReparsed: 0,
|
|
45
|
+
filesDeleted: filesToDelete.length,
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
let savedCrossFileEdges = [];
|
|
49
|
+
const filesToRemoveFromGraph = [...filesToDelete, ...filesToReparse];
|
|
50
|
+
if (filesToRemoveFromGraph.length > 0) {
|
|
51
|
+
// Save cross-file edges before deletion
|
|
52
|
+
savedCrossFileEdges = await getCrossFileEdges(neo4jService, filesToRemoveFromGraph, resolvedId);
|
|
53
|
+
await debugLog('Watch: saved cross-file edges', { count: savedCrossFileEdges.length });
|
|
54
|
+
// Delete old subgraphs
|
|
55
|
+
await deleteSourceFileSubgraphs(neo4jService, filesToRemoveFromGraph, resolvedId);
|
|
56
|
+
}
|
|
57
|
+
let nodesImported = 0;
|
|
58
|
+
let edgesImported = 0;
|
|
59
|
+
if (filesToReparse.length > 0) {
|
|
60
|
+
// Load existing nodes for edge detection
|
|
61
|
+
const existingNodes = await loadExistingNodesForEdgeDetection(neo4jService, filesToRemoveFromGraph, resolvedId);
|
|
62
|
+
parser.setExistingNodes(existingNodes);
|
|
63
|
+
// Parse only changed files
|
|
64
|
+
await parser.parseWorkspace(filesToReparse);
|
|
65
|
+
// Export graph data
|
|
66
|
+
const { nodes, edges } = parser.exportToJson();
|
|
67
|
+
// Get framework schemas if available (use unknown as intermediate to access private property)
|
|
68
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
69
|
+
const parserAny = parser;
|
|
70
|
+
const frameworkSchemas = Array.isArray(parserAny.frameworkSchemas)
|
|
71
|
+
? parserAny.frameworkSchemas.map((s) => s.name)
|
|
72
|
+
: ['Auto-detected'];
|
|
73
|
+
const graphData = {
|
|
74
|
+
nodes,
|
|
75
|
+
edges,
|
|
76
|
+
metadata: {
|
|
77
|
+
coreSchema: CORE_TYPESCRIPT_SCHEMA.name,
|
|
78
|
+
frameworkSchemas,
|
|
79
|
+
projectType: 'auto',
|
|
80
|
+
projectId: resolvedId,
|
|
81
|
+
generated: new Date().toISOString(),
|
|
82
|
+
},
|
|
83
|
+
};
|
|
84
|
+
// Write to JSON file (required by GraphGeneratorHandler)
|
|
85
|
+
const outputPath = join(projectPath, FILE_PATHS.graphOutput);
|
|
86
|
+
writeFileSync(outputPath, JSON.stringify(graphData, null, LOG_CONFIG.jsonIndentation));
|
|
87
|
+
// Update Project node
|
|
88
|
+
await neo4jService.run(UPSERT_PROJECT_QUERY, {
|
|
89
|
+
projectId: resolvedId,
|
|
90
|
+
path: projectPath,
|
|
91
|
+
name: projectName,
|
|
92
|
+
status: 'complete',
|
|
93
|
+
});
|
|
94
|
+
// Import nodes and edges (clearExisting = false for incremental)
|
|
95
|
+
graphHandler.setProjectId(resolvedId);
|
|
96
|
+
try {
|
|
97
|
+
const result = await graphHandler.generateGraph(outputPath, DEFAULTS.batchSize, false);
|
|
98
|
+
nodesImported = result.nodesImported;
|
|
99
|
+
edgesImported = result.edgesImported;
|
|
100
|
+
}
|
|
101
|
+
finally {
|
|
102
|
+
// Clean up temporary graph.json file
|
|
103
|
+
try {
|
|
104
|
+
unlinkSync(outputPath);
|
|
105
|
+
}
|
|
106
|
+
catch {
|
|
107
|
+
// Ignore cleanup errors - file may not exist or be inaccessible
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
// Recreate cross-file edges
|
|
111
|
+
if (savedCrossFileEdges.length > 0) {
|
|
112
|
+
const recreateResult = await neo4jService.run(QUERIES.RECREATE_CROSS_FILE_EDGES, {
|
|
113
|
+
projectId: resolvedId,
|
|
114
|
+
edges: savedCrossFileEdges.map((e) => ({
|
|
115
|
+
startNodeId: e.startNodeId,
|
|
116
|
+
endNodeId: e.endNodeId,
|
|
117
|
+
edgeType: e.edgeType,
|
|
118
|
+
edgeProperties: e.edgeProperties,
|
|
119
|
+
})),
|
|
120
|
+
});
|
|
121
|
+
// Safely extract recreatedCount with runtime validation
|
|
122
|
+
const firstResult = recreateResult[0];
|
|
123
|
+
const recreatedCount = firstResult && typeof firstResult === 'object' && 'recreatedCount' in firstResult
|
|
124
|
+
? Number(firstResult.recreatedCount) || 0
|
|
125
|
+
: 0;
|
|
126
|
+
edgesImported += recreatedCount;
|
|
127
|
+
await debugLog('Watch: cross-file edges recreated', { recreatedCount });
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
await debugLog('Watch incremental parse completed', {
|
|
131
|
+
nodesImported,
|
|
132
|
+
edgesImported,
|
|
133
|
+
filesReparsed: filesToReparse.length,
|
|
134
|
+
filesDeleted: filesToDelete.length,
|
|
135
|
+
});
|
|
136
|
+
return {
|
|
137
|
+
nodesUpdated: nodesImported,
|
|
138
|
+
edgesUpdated: edgesImported,
|
|
139
|
+
filesReparsed: filesToReparse.length,
|
|
140
|
+
filesDeleted: filesToDelete.length,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
finally {
|
|
144
|
+
await neo4jService.close();
|
|
145
|
+
}
|
|
146
|
+
};
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Streaming Import Handler
|
|
3
|
+
* Orchestrates chunked parsing and import for large codebases
|
|
4
|
+
*/
|
|
5
|
+
import { randomBytes } from 'crypto';
|
|
6
|
+
import { tmpdir } from 'os';
|
|
7
|
+
import { join } from 'path';
|
|
8
|
+
import { ProgressReporter } from '../../core/utils/progress-reporter.js';
|
|
9
|
+
import { DEFAULTS } from '../constants.js';
|
|
10
|
+
import { debugLog } from '../utils.js';
|
|
11
|
+
/**
|
|
12
|
+
* Generate a secure temporary file path using crypto random bytes
|
|
13
|
+
* to avoid race conditions and predictable filenames
|
|
14
|
+
*/
|
|
15
|
+
const generateTempPath = (prefix) => {
|
|
16
|
+
const randomSuffix = randomBytes(16).toString('hex');
|
|
17
|
+
return join(tmpdir(), `${prefix}-${Date.now()}-${randomSuffix}.json`);
|
|
18
|
+
};
|
|
19
|
+
export class StreamingImportHandler {
|
|
20
|
+
graphGeneratorHandler;
|
|
21
|
+
progressReporter;
|
|
22
|
+
constructor(graphGeneratorHandler) {
|
|
23
|
+
this.graphGeneratorHandler = graphGeneratorHandler;
|
|
24
|
+
this.progressReporter = new ProgressReporter();
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Import a project using chunked parsing to reduce memory usage.
|
|
28
|
+
* Files are parsed and imported in chunks, with progress reporting.
|
|
29
|
+
* Supports both TypeScriptParser (single project) and WorkspaceParser (monorepo).
|
|
30
|
+
*/
|
|
31
|
+
async importProjectStreaming(parser, config) {
|
|
32
|
+
const startTime = Date.now();
|
|
33
|
+
if (config.onProgress) {
|
|
34
|
+
this.progressReporter.setCallback(config.onProgress);
|
|
35
|
+
}
|
|
36
|
+
// Set project ID on graph generator
|
|
37
|
+
this.graphGeneratorHandler.setProjectId(config.projectId);
|
|
38
|
+
// Phase 1: Get discovered files (already discovered by worker, this returns cached result)
|
|
39
|
+
const allFilePaths = await parser.discoverSourceFiles();
|
|
40
|
+
console.log(`📁 Found ${allFilePaths.length} files to parse`);
|
|
41
|
+
await debugLog('Streaming import started', {
|
|
42
|
+
totalFiles: allFilePaths.length,
|
|
43
|
+
chunkSize: config.chunkSize,
|
|
44
|
+
});
|
|
45
|
+
// Create chunks
|
|
46
|
+
const chunks = [];
|
|
47
|
+
for (let i = 0; i < allFilePaths.length; i += config.chunkSize) {
|
|
48
|
+
chunks.push(allFilePaths.slice(i, i + config.chunkSize));
|
|
49
|
+
}
|
|
50
|
+
console.log(`📦 Split into ${chunks.length} chunks of ~${config.chunkSize} files each`);
|
|
51
|
+
let totalNodesImported = 0;
|
|
52
|
+
let totalEdgesImported = 0;
|
|
53
|
+
// Phase 2: Parse and import chunks
|
|
54
|
+
for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) {
|
|
55
|
+
const chunk = chunks[chunkIndex];
|
|
56
|
+
const filesProcessed = chunkIndex * config.chunkSize + chunk.length;
|
|
57
|
+
console.log(`\n🔄 Processing chunk ${chunkIndex + 1}/${chunks.length} (${chunk.length} files)`);
|
|
58
|
+
try {
|
|
59
|
+
// Parse the chunk (skip edge resolution for streaming)
|
|
60
|
+
const { nodes, edges } = await parser.parseChunk(chunk, true);
|
|
61
|
+
// Add parsed nodes to existing nodes for cross-chunk edge resolution
|
|
62
|
+
parser.addExistingNodesFromChunk(nodes);
|
|
63
|
+
// Import to Neo4j if we have data
|
|
64
|
+
if (nodes.length > 0 || edges.length > 0) {
|
|
65
|
+
await debugLog('Importing chunk - generating embeddings', {
|
|
66
|
+
chunkIndex: chunkIndex + 1,
|
|
67
|
+
totalChunks: chunks.length,
|
|
68
|
+
nodeCount: nodes.length,
|
|
69
|
+
});
|
|
70
|
+
await this.importChunkToNeo4j(nodes, edges);
|
|
71
|
+
totalNodesImported += nodes.length;
|
|
72
|
+
totalEdgesImported += edges.length;
|
|
73
|
+
}
|
|
74
|
+
else {
|
|
75
|
+
console.warn(`⚠️ Chunk ${chunkIndex + 1} produced 0 nodes/edges from ${chunk.length} files`);
|
|
76
|
+
await debugLog('Empty chunk result', {
|
|
77
|
+
chunkIndex: chunkIndex + 1,
|
|
78
|
+
fileCount: chunk.length,
|
|
79
|
+
sampleFiles: chunk.slice(0, 3),
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
// Report progress with all relevant data
|
|
83
|
+
await this.progressReporter.report({
|
|
84
|
+
phase: 'importing',
|
|
85
|
+
current: filesProcessed,
|
|
86
|
+
total: allFilePaths.length,
|
|
87
|
+
message: `Processed chunk ${chunkIndex + 1}/${chunks.length}: ${totalNodesImported} nodes, ${totalEdgesImported} edges`,
|
|
88
|
+
details: {
|
|
89
|
+
filesProcessed,
|
|
90
|
+
nodesCreated: totalNodesImported,
|
|
91
|
+
edgesCreated: totalEdgesImported,
|
|
92
|
+
currentFile: chunk[0],
|
|
93
|
+
chunkIndex: chunkIndex + 1,
|
|
94
|
+
totalChunks: chunks.length,
|
|
95
|
+
},
|
|
96
|
+
});
|
|
97
|
+
console.log(`✅ Chunk ${chunkIndex + 1}: ${nodes.length} nodes, ${edges.length} edges imported`);
|
|
98
|
+
}
|
|
99
|
+
catch (chunkError) {
|
|
100
|
+
console.error(`❌ Error processing chunk ${chunkIndex + 1}:`, chunkError);
|
|
101
|
+
await debugLog('Chunk processing error', {
|
|
102
|
+
chunkIndex: chunkIndex + 1,
|
|
103
|
+
fileCount: chunk.length,
|
|
104
|
+
sampleFiles: chunk.slice(0, 3),
|
|
105
|
+
error: chunkError instanceof Error ? chunkError.message : String(chunkError),
|
|
106
|
+
stack: chunkError instanceof Error ? chunkError.stack : undefined,
|
|
107
|
+
});
|
|
108
|
+
// Re-throw to fail the entire import - don't silently continue
|
|
109
|
+
throw chunkError;
|
|
110
|
+
}
|
|
111
|
+
// Note: Don't clear parsed data during streaming - we need accumulated nodes for cross-chunk edge resolution
|
|
112
|
+
// Memory usage is bounded because we only keep Neo4jNode references (not full AST)
|
|
113
|
+
}
|
|
114
|
+
// Phase 3: Resolve cross-chunk deferred edges
|
|
115
|
+
await this.progressReporter.reportResolving(0, totalEdgesImported);
|
|
116
|
+
console.log('\n🔗 Resolving cross-chunk edges...');
|
|
117
|
+
const resolvedEdges = await parser.resolveDeferredEdgesManually();
|
|
118
|
+
if (resolvedEdges.length > 0) {
|
|
119
|
+
await this.importEdgesToNeo4j(resolvedEdges);
|
|
120
|
+
totalEdgesImported += resolvedEdges.length;
|
|
121
|
+
console.log(`✅ Resolved ${resolvedEdges.length} cross-chunk edges`);
|
|
122
|
+
}
|
|
123
|
+
else {
|
|
124
|
+
console.log('ℹ️ No cross-chunk edges to resolve');
|
|
125
|
+
}
|
|
126
|
+
// Phase 3b: Apply edge enhancements on all accumulated nodes
|
|
127
|
+
// This catches context-dependent edges (like INTERNAL_API_CALL) that span chunks
|
|
128
|
+
console.log('\n🔗 Applying edge enhancements on all nodes...');
|
|
129
|
+
const enhancedEdges = await parser.applyEdgeEnhancementsManually();
|
|
130
|
+
if (enhancedEdges.length > 0) {
|
|
131
|
+
await this.importEdgesToNeo4j(enhancedEdges);
|
|
132
|
+
totalEdgesImported += enhancedEdges.length;
|
|
133
|
+
console.log(`✅ Created ${enhancedEdges.length} edges from edge enhancements`);
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
console.log('ℹ️ No edges from edge enhancements');
|
|
137
|
+
}
|
|
138
|
+
// Clear accumulated data now that edge resolution is complete
|
|
139
|
+
parser.clearParsedData();
|
|
140
|
+
await this.progressReporter.reportResolving(resolvedEdges.length, resolvedEdges.length);
|
|
141
|
+
// Phase 4: Complete
|
|
142
|
+
const elapsedMs = Date.now() - startTime;
|
|
143
|
+
await this.progressReporter.reportComplete(totalNodesImported, totalEdgesImported);
|
|
144
|
+
const result = {
|
|
145
|
+
nodesImported: totalNodesImported,
|
|
146
|
+
edgesImported: totalEdgesImported,
|
|
147
|
+
filesProcessed: allFilePaths.length,
|
|
148
|
+
chunksProcessed: chunks.length,
|
|
149
|
+
elapsedMs,
|
|
150
|
+
};
|
|
151
|
+
console.log(`\n🎉 Streaming import complete!`);
|
|
152
|
+
console.log(` Files: ${allFilePaths.length}`);
|
|
153
|
+
console.log(` Nodes: ${totalNodesImported}`);
|
|
154
|
+
console.log(` Edges: ${totalEdgesImported}`);
|
|
155
|
+
console.log(` Time: ${(elapsedMs / 1000).toFixed(2)}s`);
|
|
156
|
+
await debugLog('Streaming import completed', result);
|
|
157
|
+
return result;
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Import a chunk of nodes and edges to Neo4j using the graph generator handler
|
|
161
|
+
*/
|
|
162
|
+
async importChunkToNeo4j(nodes, edges) {
|
|
163
|
+
// Write to temporary JSON and use existing import mechanism
|
|
164
|
+
// This reuses the batched embedding and import logic
|
|
165
|
+
const tempPath = generateTempPath('chunk');
|
|
166
|
+
const fs = await import('fs/promises');
|
|
167
|
+
try {
|
|
168
|
+
await fs.writeFile(tempPath, JSON.stringify({
|
|
169
|
+
nodes,
|
|
170
|
+
edges,
|
|
171
|
+
metadata: { chunked: true },
|
|
172
|
+
}));
|
|
173
|
+
await this.graphGeneratorHandler.generateGraph(tempPath, DEFAULTS.batchSize, false);
|
|
174
|
+
}
|
|
175
|
+
finally {
|
|
176
|
+
// Clean up temp file
|
|
177
|
+
try {
|
|
178
|
+
await fs.unlink(tempPath);
|
|
179
|
+
}
|
|
180
|
+
catch {
|
|
181
|
+
// Ignore cleanup errors
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Import resolved edges to Neo4j
|
|
187
|
+
*/
|
|
188
|
+
async importEdgesToNeo4j(edges) {
|
|
189
|
+
if (edges.length === 0)
|
|
190
|
+
return;
|
|
191
|
+
const tempPath = generateTempPath('edges');
|
|
192
|
+
const fs = await import('fs/promises');
|
|
193
|
+
try {
|
|
194
|
+
await fs.writeFile(tempPath, JSON.stringify({
|
|
195
|
+
nodes: [],
|
|
196
|
+
edges,
|
|
197
|
+
metadata: { edgesOnly: true },
|
|
198
|
+
}));
|
|
199
|
+
await this.graphGeneratorHandler.generateGraph(tempPath, DEFAULTS.batchSize, false);
|
|
200
|
+
}
|
|
201
|
+
finally {
|
|
202
|
+
try {
|
|
203
|
+
await fs.unlink(tempPath);
|
|
204
|
+
}
|
|
205
|
+
catch {
|
|
206
|
+
// Ignore cleanup errors
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|