code-graph-context 1.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +221 -101
  2. package/dist/core/config/fairsquare-framework-schema.js +47 -60
  3. package/dist/core/config/nestjs-framework-schema.js +11 -1
  4. package/dist/core/config/schema.js +1 -1
  5. package/dist/core/config/timeouts.js +27 -0
  6. package/dist/core/embeddings/embeddings.service.js +122 -2
  7. package/dist/core/embeddings/natural-language-to-cypher.service.js +416 -17
  8. package/dist/core/parsers/parser-factory.js +5 -3
  9. package/dist/core/parsers/typescript-parser.js +614 -45
  10. package/dist/core/parsers/workspace-parser.js +553 -0
  11. package/dist/core/utils/edge-factory.js +37 -0
  12. package/dist/core/utils/file-change-detection.js +105 -0
  13. package/dist/core/utils/file-utils.js +20 -0
  14. package/dist/core/utils/index.js +3 -0
  15. package/dist/core/utils/path-utils.js +75 -0
  16. package/dist/core/utils/progress-reporter.js +112 -0
  17. package/dist/core/utils/project-id.js +176 -0
  18. package/dist/core/utils/retry.js +41 -0
  19. package/dist/core/workspace/index.js +4 -0
  20. package/dist/core/workspace/workspace-detector.js +221 -0
  21. package/dist/mcp/constants.js +153 -5
  22. package/dist/mcp/handlers/cross-file-edge.helpers.js +19 -0
  23. package/dist/mcp/handlers/file-change-detection.js +105 -0
  24. package/dist/mcp/handlers/graph-generator.handler.js +97 -32
  25. package/dist/mcp/handlers/incremental-parse.handler.js +146 -0
  26. package/dist/mcp/handlers/streaming-import.handler.js +210 -0
  27. package/dist/mcp/handlers/traversal.handler.js +130 -71
  28. package/dist/mcp/mcp.server.js +45 -6
  29. package/dist/mcp/service-init.js +79 -0
  30. package/dist/mcp/services/job-manager.js +165 -0
  31. package/dist/mcp/services/watch-manager.js +376 -0
  32. package/dist/mcp/services.js +2 -2
  33. package/dist/mcp/tools/check-parse-status.tool.js +64 -0
  34. package/dist/mcp/tools/impact-analysis.tool.js +84 -18
  35. package/dist/mcp/tools/index.js +13 -1
  36. package/dist/mcp/tools/list-projects.tool.js +62 -0
  37. package/dist/mcp/tools/list-watchers.tool.js +51 -0
  38. package/dist/mcp/tools/natural-language-to-cypher.tool.js +34 -8
  39. package/dist/mcp/tools/parse-typescript-project.tool.js +318 -58
  40. package/dist/mcp/tools/search-codebase.tool.js +56 -16
  41. package/dist/mcp/tools/start-watch-project.tool.js +100 -0
  42. package/dist/mcp/tools/stop-watch-project.tool.js +49 -0
  43. package/dist/mcp/tools/traverse-from-node.tool.js +68 -9
  44. package/dist/mcp/utils.js +35 -13
  45. package/dist/mcp/workers/parse-worker.js +198 -0
  46. package/dist/storage/neo4j/neo4j.service.js +147 -48
  47. package/package.json +4 -2
@@ -3,6 +3,7 @@
3
3
  * Handles importing parsed graph data into Neo4j with embeddings
4
4
  */
5
5
  import fs from 'fs/promises';
6
+ import { EMBEDDING_BATCH_CONFIG } from '../../core/embeddings/embeddings.service.js';
6
7
  import { QUERIES } from '../../storage/neo4j/neo4j.service.js';
7
8
  import { DEFAULTS } from '../constants.js';
8
9
  import { debugLog } from '../utils.js';
@@ -10,13 +11,20 @@ export class GraphGeneratorHandler {
10
11
  neo4jService;
11
12
  embeddingsService;
12
13
  static EMBEDDED_LABEL = 'Embedded';
14
+ projectId = null;
13
15
  constructor(neo4jService, embeddingsService) {
14
16
  this.neo4jService = neo4jService;
15
17
  this.embeddingsService = embeddingsService;
16
18
  }
19
+ /**
20
+ * Set the projectId for project-scoped operations
21
+ */
22
+ setProjectId(projectId) {
23
+ this.projectId = projectId;
24
+ }
17
25
  async generateGraph(graphJsonPath, batchSize = DEFAULTS.batchSize, clearExisting = true) {
18
26
  console.log(`Generating graph from JSON file: ${graphJsonPath}`);
19
- await debugLog('Starting graph generation', { graphJsonPath, batchSize, clearExisting });
27
+ await debugLog('Starting graph generation', { graphJsonPath, batchSize, clearExisting, projectId: this.projectId });
20
28
  try {
21
29
  const graphData = await this.loadGraphData(graphJsonPath);
22
30
  const { nodes, edges, metadata } = graphData;
@@ -25,6 +33,7 @@ export class GraphGeneratorHandler {
25
33
  if (clearExisting) {
26
34
  await this.clearExistingData();
27
35
  }
36
+ await this.createProjectIndexes();
28
37
  await this.importNodes(nodes, batchSize);
29
38
  await this.importEdges(edges, batchSize);
30
39
  await this.createVectorIndexes();
@@ -47,9 +56,24 @@ export class GraphGeneratorHandler {
47
56
  return JSON.parse(fileContent);
48
57
  }
49
58
  async clearExistingData() {
50
- console.log('Clearing existing graph data...');
51
- await this.neo4jService.run(QUERIES.CLEAR_DATABASE);
52
- await debugLog('Existing graph data cleared');
59
+ if (this.projectId) {
60
+ console.log(`Clearing existing graph data for project: ${this.projectId}...`);
61
+ await this.neo4jService.run(QUERIES.CLEAR_PROJECT, { projectId: this.projectId });
62
+ await debugLog('Existing project graph data cleared', { projectId: this.projectId });
63
+ }
64
+ else {
65
+ console.log('Clearing ALL existing graph data (no projectId set)...');
66
+ await this.neo4jService.run(QUERIES.CLEAR_DATABASE);
67
+ await debugLog('Existing graph data cleared');
68
+ }
69
+ }
70
+ async createProjectIndexes() {
71
+ console.log('Creating project indexes...');
72
+ await this.neo4jService.run(QUERIES.CREATE_PROJECT_INDEX_EMBEDDED);
73
+ await this.neo4jService.run(QUERIES.CREATE_PROJECT_INDEX_SOURCEFILE);
74
+ await this.neo4jService.run(QUERIES.CREATE_PROJECT_ID_INDEX_EMBEDDED);
75
+ await this.neo4jService.run(QUERIES.CREATE_PROJECT_ID_INDEX_SOURCEFILE);
76
+ await debugLog('Project indexes created');
53
77
  }
54
78
  async importNodes(nodes, batchSize) {
55
79
  console.log(`Importing ${nodes.length} nodes with embeddings...`);
@@ -65,18 +89,72 @@ export class GraphGeneratorHandler {
65
89
  });
66
90
  }
67
91
  }
92
+ /**
93
+ * Process a batch of nodes with batched embedding calls.
94
+ * Collects all texts needing embedding, makes a single batched API call,
95
+ * then maps embeddings back to their respective nodes.
96
+ */
68
97
  async processNodeBatch(nodes) {
69
- return Promise.all(nodes.map(async (node) => {
70
- const embedding = await this.embedNodeSourceCode(node);
71
- return {
72
- ...node,
73
- labels: embedding ? [...node.labels, GraphGeneratorHandler.EMBEDDED_LABEL] : node.labels,
74
- properties: {
75
- ...this.flattenProperties(node.properties),
76
- embedding,
77
- },
78
- };
79
- }));
98
+ // Separate nodes that need embedding from those that don't
99
+ const nodesNeedingEmbedding = [];
100
+ const nodeResults = new Array(nodes.length);
101
+ // First pass: identify nodes needing embedding and prepare texts
102
+ nodes.forEach((node, index) => {
103
+ if (node.properties?.sourceCode && !node.skipEmbedding) {
104
+ // Truncate to stay under embedding model's 8192 token limit (~4 chars/token)
105
+ const truncatedCode = node.properties.sourceCode.slice(0, DEFAULTS.maxEmbeddingChars);
106
+ // Include node name and type in embedding for better search matching
107
+ // e.g., "ProfileService ClassDeclaration" helps "profile service" queries match
108
+ const metadata = `${node.properties.name ?? ''} ${node.labels?.join(' ') ?? ''}`.trim();
109
+ const embeddingText = metadata ? `${metadata}\n${truncatedCode}` : truncatedCode;
110
+ nodesNeedingEmbedding.push({
111
+ node,
112
+ index,
113
+ text: embeddingText,
114
+ });
115
+ }
116
+ else {
117
+ // Node doesn't need embedding - prepare it immediately
118
+ nodeResults[index] = {
119
+ ...node,
120
+ labels: node.labels,
121
+ properties: {
122
+ ...this.flattenProperties(node.properties),
123
+ embedding: null,
124
+ },
125
+ };
126
+ }
127
+ });
128
+ // Batch embed all texts that need it
129
+ if (nodesNeedingEmbedding.length > 0) {
130
+ const texts = nodesNeedingEmbedding.map((n) => n.text);
131
+ try {
132
+ const embeddings = await this.embeddingsService.embedTextsInBatches(texts, EMBEDDING_BATCH_CONFIG.maxBatchSize);
133
+ // Map embeddings back to their nodes
134
+ nodesNeedingEmbedding.forEach((item, i) => {
135
+ const embedding = embeddings[i];
136
+ nodeResults[item.index] = {
137
+ ...item.node,
138
+ labels: embedding ? [...item.node.labels, GraphGeneratorHandler.EMBEDDED_LABEL] : item.node.labels,
139
+ properties: {
140
+ ...this.flattenProperties(item.node.properties),
141
+ embedding,
142
+ },
143
+ };
144
+ });
145
+ await debugLog('Batch embedding completed', {
146
+ totalNodes: nodes.length,
147
+ nodesEmbedded: nodesNeedingEmbedding.length,
148
+ batchesUsed: Math.ceil(texts.length / EMBEDDING_BATCH_CONFIG.maxBatchSize),
149
+ });
150
+ }
151
+ catch (error) {
152
+ // DON'T silently continue - propagate the error so user knows what's wrong
153
+ await debugLog('Embedding failed', { error: error instanceof Error ? error.message : String(error) });
154
+ throw error;
155
+ }
156
+ }
157
+ return nodeResults;
80
158
  }
81
159
  async importEdges(edges, batchSize) {
82
160
  console.log(`Importing ${edges.length} edges using APOC...`);
@@ -85,7 +163,10 @@ export class GraphGeneratorHandler {
85
163
  ...edge,
86
164
  properties: this.flattenProperties(edge.properties),
87
165
  }));
88
- const result = await this.neo4jService.run(QUERIES.CREATE_RELATIONSHIP, { edges: batch });
166
+ const result = await this.neo4jService.run(QUERIES.CREATE_RELATIONSHIP, {
167
+ edges: batch,
168
+ projectId: this.projectId,
169
+ });
89
170
  const batchEnd = Math.min(i + batchSize, edges.length);
90
171
  console.log(`Created ${result[0].created} edges in batch ${i + 1}-${batchEnd}`);
91
172
  await debugLog('Edge batch imported', {
@@ -100,22 +181,6 @@ export class GraphGeneratorHandler {
100
181
  await this.neo4jService.run(QUERIES.CREATE_EMBEDDED_VECTOR_INDEX);
101
182
  await debugLog('Vector indexes created');
102
183
  }
103
- async embedNodeSourceCode(node) {
104
- if (!node.properties?.sourceCode || node.skipEmbedding) {
105
- return null;
106
- }
107
- try {
108
- const sourceCode = node.properties.sourceCode;
109
- const embedding = await this.embeddingsService.embedText(sourceCode);
110
- await debugLog('Node embedded', { nodeId: node.id, codeLength: sourceCode.length });
111
- return embedding;
112
- }
113
- catch (error) {
114
- console.warn(`Failed to embed node ${node.id}:`, error);
115
- await debugLog('Embedding failed', { nodeId: node.id, error });
116
- return null;
117
- }
118
- }
119
184
  flattenProperties(properties) {
120
185
  const flattened = {};
121
186
  for (const [key, value] of Object.entries(properties)) {
@@ -0,0 +1,146 @@
1
+ /**
2
+ * Incremental Parse Handler
3
+ * Handles incremental graph updates triggered by file watchers
4
+ */
5
+ import { writeFileSync, unlinkSync } from 'fs';
6
+ import { join } from 'path';
7
+ import { CORE_TYPESCRIPT_SCHEMA } from '../../core/config/schema.js';
8
+ import { EmbeddingsService } from '../../core/embeddings/embeddings.service.js';
9
+ import { ParserFactory } from '../../core/parsers/parser-factory.js';
10
+ import { detectChangedFiles } from '../../core/utils/file-change-detection.js';
11
+ import { resolveProjectId, getProjectName, UPSERT_PROJECT_QUERY } from '../../core/utils/project-id.js';
12
+ import { Neo4jService, QUERIES } from '../../storage/neo4j/neo4j.service.js';
13
+ import { DEFAULTS, FILE_PATHS, LOG_CONFIG } from '../constants.js';
14
+ import { debugLog } from '../utils.js';
15
+ import { deleteSourceFileSubgraphs, loadExistingNodesForEdgeDetection, getCrossFileEdges, } from './cross-file-edge.helpers.js';
16
+ import { GraphGeneratorHandler } from './graph-generator.handler.js';
17
+ /**
18
+ * Performs incremental parsing for a project
19
+ * This is used by the WatchManager when files change
20
+ */
21
+ export const performIncrementalParse = async (projectPath, projectId, tsconfigPath) => {
22
+ const neo4jService = new Neo4jService();
23
+ const embeddingsService = new EmbeddingsService();
24
+ const graphHandler = new GraphGeneratorHandler(neo4jService, embeddingsService);
25
+ try {
26
+ await debugLog('Incremental parse started (watch)', { projectPath, projectId });
27
+ // Resolve project ID
28
+ const resolvedId = resolveProjectId(projectPath, projectId);
29
+ const projectName = await getProjectName(projectPath);
30
+ // Create parser with auto-detection and lazy loading enabled for memory efficiency
31
+ const parser = await ParserFactory.createParserWithAutoDetection(projectPath, tsconfigPath, resolvedId, true);
32
+ // Detect changed files
33
+ const { filesToReparse, filesToDelete } = await detectChangedFiles(projectPath, neo4jService, resolvedId);
34
+ await debugLog('Watch incremental change detection', {
35
+ filesToReparse: filesToReparse.length,
36
+ filesToDelete: filesToDelete.length,
37
+ });
38
+ // If no changes, return early
39
+ if (filesToReparse.length === 0 && filesToDelete.length === 0) {
40
+ await debugLog('Watch incremental: no changes detected');
41
+ return {
42
+ nodesUpdated: 0,
43
+ edgesUpdated: 0,
44
+ filesReparsed: 0,
45
+ filesDeleted: filesToDelete.length,
46
+ };
47
+ }
48
+ let savedCrossFileEdges = [];
49
+ const filesToRemoveFromGraph = [...filesToDelete, ...filesToReparse];
50
+ if (filesToRemoveFromGraph.length > 0) {
51
+ // Save cross-file edges before deletion
52
+ savedCrossFileEdges = await getCrossFileEdges(neo4jService, filesToRemoveFromGraph, resolvedId);
53
+ await debugLog('Watch: saved cross-file edges', { count: savedCrossFileEdges.length });
54
+ // Delete old subgraphs
55
+ await deleteSourceFileSubgraphs(neo4jService, filesToRemoveFromGraph, resolvedId);
56
+ }
57
+ let nodesImported = 0;
58
+ let edgesImported = 0;
59
+ if (filesToReparse.length > 0) {
60
+ // Load existing nodes for edge detection
61
+ const existingNodes = await loadExistingNodesForEdgeDetection(neo4jService, filesToRemoveFromGraph, resolvedId);
62
+ parser.setExistingNodes(existingNodes);
63
+ // Parse only changed files
64
+ await parser.parseWorkspace(filesToReparse);
65
+ // Export graph data
66
+ const { nodes, edges } = parser.exportToJson();
67
+ // Get framework schemas if available (use unknown as intermediate to access private property)
68
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
69
+ const parserAny = parser;
70
+ const frameworkSchemas = Array.isArray(parserAny.frameworkSchemas)
71
+ ? parserAny.frameworkSchemas.map((s) => s.name)
72
+ : ['Auto-detected'];
73
+ const graphData = {
74
+ nodes,
75
+ edges,
76
+ metadata: {
77
+ coreSchema: CORE_TYPESCRIPT_SCHEMA.name,
78
+ frameworkSchemas,
79
+ projectType: 'auto',
80
+ projectId: resolvedId,
81
+ generated: new Date().toISOString(),
82
+ },
83
+ };
84
+ // Write to JSON file (required by GraphGeneratorHandler)
85
+ const outputPath = join(projectPath, FILE_PATHS.graphOutput);
86
+ writeFileSync(outputPath, JSON.stringify(graphData, null, LOG_CONFIG.jsonIndentation));
87
+ // Update Project node
88
+ await neo4jService.run(UPSERT_PROJECT_QUERY, {
89
+ projectId: resolvedId,
90
+ path: projectPath,
91
+ name: projectName,
92
+ status: 'complete',
93
+ });
94
+ // Import nodes and edges (clearExisting = false for incremental)
95
+ graphHandler.setProjectId(resolvedId);
96
+ try {
97
+ const result = await graphHandler.generateGraph(outputPath, DEFAULTS.batchSize, false);
98
+ nodesImported = result.nodesImported;
99
+ edgesImported = result.edgesImported;
100
+ }
101
+ finally {
102
+ // Clean up temporary graph.json file
103
+ try {
104
+ unlinkSync(outputPath);
105
+ }
106
+ catch {
107
+ // Ignore cleanup errors - file may not exist or be inaccessible
108
+ }
109
+ }
110
+ // Recreate cross-file edges
111
+ if (savedCrossFileEdges.length > 0) {
112
+ const recreateResult = await neo4jService.run(QUERIES.RECREATE_CROSS_FILE_EDGES, {
113
+ projectId: resolvedId,
114
+ edges: savedCrossFileEdges.map((e) => ({
115
+ startNodeId: e.startNodeId,
116
+ endNodeId: e.endNodeId,
117
+ edgeType: e.edgeType,
118
+ edgeProperties: e.edgeProperties,
119
+ })),
120
+ });
121
+ // Safely extract recreatedCount with runtime validation
122
+ const firstResult = recreateResult[0];
123
+ const recreatedCount = firstResult && typeof firstResult === 'object' && 'recreatedCount' in firstResult
124
+ ? Number(firstResult.recreatedCount) || 0
125
+ : 0;
126
+ edgesImported += recreatedCount;
127
+ await debugLog('Watch: cross-file edges recreated', { recreatedCount });
128
+ }
129
+ }
130
+ await debugLog('Watch incremental parse completed', {
131
+ nodesImported,
132
+ edgesImported,
133
+ filesReparsed: filesToReparse.length,
134
+ filesDeleted: filesToDelete.length,
135
+ });
136
+ return {
137
+ nodesUpdated: nodesImported,
138
+ edgesUpdated: edgesImported,
139
+ filesReparsed: filesToReparse.length,
140
+ filesDeleted: filesToDelete.length,
141
+ };
142
+ }
143
+ finally {
144
+ await neo4jService.close();
145
+ }
146
+ };
@@ -0,0 +1,210 @@
1
+ /**
2
+ * Streaming Import Handler
3
+ * Orchestrates chunked parsing and import for large codebases
4
+ */
5
+ import { randomBytes } from 'crypto';
6
+ import { tmpdir } from 'os';
7
+ import { join } from 'path';
8
+ import { ProgressReporter } from '../../core/utils/progress-reporter.js';
9
+ import { DEFAULTS } from '../constants.js';
10
+ import { debugLog } from '../utils.js';
11
+ /**
12
+ * Generate a secure temporary file path using crypto random bytes
13
+ * to avoid race conditions and predictable filenames
14
+ */
15
+ const generateTempPath = (prefix) => {
16
+ const randomSuffix = randomBytes(16).toString('hex');
17
+ return join(tmpdir(), `${prefix}-${Date.now()}-${randomSuffix}.json`);
18
+ };
19
+ export class StreamingImportHandler {
20
+ graphGeneratorHandler;
21
+ progressReporter;
22
+ constructor(graphGeneratorHandler) {
23
+ this.graphGeneratorHandler = graphGeneratorHandler;
24
+ this.progressReporter = new ProgressReporter();
25
+ }
26
+ /**
27
+ * Import a project using chunked parsing to reduce memory usage.
28
+ * Files are parsed and imported in chunks, with progress reporting.
29
+ * Supports both TypeScriptParser (single project) and WorkspaceParser (monorepo).
30
+ */
31
+ async importProjectStreaming(parser, config) {
32
+ const startTime = Date.now();
33
+ if (config.onProgress) {
34
+ this.progressReporter.setCallback(config.onProgress);
35
+ }
36
+ // Set project ID on graph generator
37
+ this.graphGeneratorHandler.setProjectId(config.projectId);
38
+ // Phase 1: Get discovered files (already discovered by worker, this returns cached result)
39
+ const allFilePaths = await parser.discoverSourceFiles();
40
+ console.log(`📁 Found ${allFilePaths.length} files to parse`);
41
+ await debugLog('Streaming import started', {
42
+ totalFiles: allFilePaths.length,
43
+ chunkSize: config.chunkSize,
44
+ });
45
+ // Create chunks
46
+ const chunks = [];
47
+ for (let i = 0; i < allFilePaths.length; i += config.chunkSize) {
48
+ chunks.push(allFilePaths.slice(i, i + config.chunkSize));
49
+ }
50
+ console.log(`📦 Split into ${chunks.length} chunks of ~${config.chunkSize} files each`);
51
+ let totalNodesImported = 0;
52
+ let totalEdgesImported = 0;
53
+ // Phase 2: Parse and import chunks
54
+ for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) {
55
+ const chunk = chunks[chunkIndex];
56
+ const filesProcessed = chunkIndex * config.chunkSize + chunk.length;
57
+ console.log(`\n🔄 Processing chunk ${chunkIndex + 1}/${chunks.length} (${chunk.length} files)`);
58
+ try {
59
+ // Parse the chunk (skip edge resolution for streaming)
60
+ const { nodes, edges } = await parser.parseChunk(chunk, true);
61
+ // Add parsed nodes to existing nodes for cross-chunk edge resolution
62
+ parser.addExistingNodesFromChunk(nodes);
63
+ // Import to Neo4j if we have data
64
+ if (nodes.length > 0 || edges.length > 0) {
65
+ await debugLog('Importing chunk - generating embeddings', {
66
+ chunkIndex: chunkIndex + 1,
67
+ totalChunks: chunks.length,
68
+ nodeCount: nodes.length,
69
+ });
70
+ await this.importChunkToNeo4j(nodes, edges);
71
+ totalNodesImported += nodes.length;
72
+ totalEdgesImported += edges.length;
73
+ }
74
+ else {
75
+ console.warn(`⚠️ Chunk ${chunkIndex + 1} produced 0 nodes/edges from ${chunk.length} files`);
76
+ await debugLog('Empty chunk result', {
77
+ chunkIndex: chunkIndex + 1,
78
+ fileCount: chunk.length,
79
+ sampleFiles: chunk.slice(0, 3),
80
+ });
81
+ }
82
+ // Report progress with all relevant data
83
+ await this.progressReporter.report({
84
+ phase: 'importing',
85
+ current: filesProcessed,
86
+ total: allFilePaths.length,
87
+ message: `Processed chunk ${chunkIndex + 1}/${chunks.length}: ${totalNodesImported} nodes, ${totalEdgesImported} edges`,
88
+ details: {
89
+ filesProcessed,
90
+ nodesCreated: totalNodesImported,
91
+ edgesCreated: totalEdgesImported,
92
+ currentFile: chunk[0],
93
+ chunkIndex: chunkIndex + 1,
94
+ totalChunks: chunks.length,
95
+ },
96
+ });
97
+ console.log(`✅ Chunk ${chunkIndex + 1}: ${nodes.length} nodes, ${edges.length} edges imported`);
98
+ }
99
+ catch (chunkError) {
100
+ console.error(`❌ Error processing chunk ${chunkIndex + 1}:`, chunkError);
101
+ await debugLog('Chunk processing error', {
102
+ chunkIndex: chunkIndex + 1,
103
+ fileCount: chunk.length,
104
+ sampleFiles: chunk.slice(0, 3),
105
+ error: chunkError instanceof Error ? chunkError.message : String(chunkError),
106
+ stack: chunkError instanceof Error ? chunkError.stack : undefined,
107
+ });
108
+ // Re-throw to fail the entire import - don't silently continue
109
+ throw chunkError;
110
+ }
111
+ // Note: Don't clear parsed data during streaming - we need accumulated nodes for cross-chunk edge resolution
112
+ // Memory usage is bounded because we only keep Neo4jNode references (not full AST)
113
+ }
114
+ // Phase 3: Resolve cross-chunk deferred edges
115
+ await this.progressReporter.reportResolving(0, totalEdgesImported);
116
+ console.log('\n🔗 Resolving cross-chunk edges...');
117
+ const resolvedEdges = await parser.resolveDeferredEdgesManually();
118
+ if (resolvedEdges.length > 0) {
119
+ await this.importEdgesToNeo4j(resolvedEdges);
120
+ totalEdgesImported += resolvedEdges.length;
121
+ console.log(`✅ Resolved ${resolvedEdges.length} cross-chunk edges`);
122
+ }
123
+ else {
124
+ console.log('ℹ️ No cross-chunk edges to resolve');
125
+ }
126
+ // Phase 3b: Apply edge enhancements on all accumulated nodes
127
+ // This catches context-dependent edges (like INTERNAL_API_CALL) that span chunks
128
+ console.log('\n🔗 Applying edge enhancements on all nodes...');
129
+ const enhancedEdges = await parser.applyEdgeEnhancementsManually();
130
+ if (enhancedEdges.length > 0) {
131
+ await this.importEdgesToNeo4j(enhancedEdges);
132
+ totalEdgesImported += enhancedEdges.length;
133
+ console.log(`✅ Created ${enhancedEdges.length} edges from edge enhancements`);
134
+ }
135
+ else {
136
+ console.log('ℹ️ No edges from edge enhancements');
137
+ }
138
+ // Clear accumulated data now that edge resolution is complete
139
+ parser.clearParsedData();
140
+ await this.progressReporter.reportResolving(resolvedEdges.length, resolvedEdges.length);
141
+ // Phase 4: Complete
142
+ const elapsedMs = Date.now() - startTime;
143
+ await this.progressReporter.reportComplete(totalNodesImported, totalEdgesImported);
144
+ const result = {
145
+ nodesImported: totalNodesImported,
146
+ edgesImported: totalEdgesImported,
147
+ filesProcessed: allFilePaths.length,
148
+ chunksProcessed: chunks.length,
149
+ elapsedMs,
150
+ };
151
+ console.log(`\n🎉 Streaming import complete!`);
152
+ console.log(` Files: ${allFilePaths.length}`);
153
+ console.log(` Nodes: ${totalNodesImported}`);
154
+ console.log(` Edges: ${totalEdgesImported}`);
155
+ console.log(` Time: ${(elapsedMs / 1000).toFixed(2)}s`);
156
+ await debugLog('Streaming import completed', result);
157
+ return result;
158
+ }
159
+ /**
160
+ * Import a chunk of nodes and edges to Neo4j using the graph generator handler
161
+ */
162
+ async importChunkToNeo4j(nodes, edges) {
163
+ // Write to temporary JSON and use existing import mechanism
164
+ // This reuses the batched embedding and import logic
165
+ const tempPath = generateTempPath('chunk');
166
+ const fs = await import('fs/promises');
167
+ try {
168
+ await fs.writeFile(tempPath, JSON.stringify({
169
+ nodes,
170
+ edges,
171
+ metadata: { chunked: true },
172
+ }));
173
+ await this.graphGeneratorHandler.generateGraph(tempPath, DEFAULTS.batchSize, false);
174
+ }
175
+ finally {
176
+ // Clean up temp file
177
+ try {
178
+ await fs.unlink(tempPath);
179
+ }
180
+ catch {
181
+ // Ignore cleanup errors
182
+ }
183
+ }
184
+ }
185
+ /**
186
+ * Import resolved edges to Neo4j
187
+ */
188
+ async importEdgesToNeo4j(edges) {
189
+ if (edges.length === 0)
190
+ return;
191
+ const tempPath = generateTempPath('edges');
192
+ const fs = await import('fs/promises');
193
+ try {
194
+ await fs.writeFile(tempPath, JSON.stringify({
195
+ nodes: [],
196
+ edges,
197
+ metadata: { edgesOnly: true },
198
+ }));
199
+ await this.graphGeneratorHandler.generateGraph(tempPath, DEFAULTS.batchSize, false);
200
+ }
201
+ finally {
202
+ try {
203
+ await fs.unlink(tempPath);
204
+ }
205
+ catch {
206
+ // Ignore cleanup errors
207
+ }
208
+ }
209
+ }
210
+ }