code-graph-context 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +221 -101
  2. package/dist/core/config/fairsquare-framework-schema.js +47 -60
  3. package/dist/core/config/nestjs-framework-schema.js +11 -1
  4. package/dist/core/config/schema.js +1 -1
  5. package/dist/core/config/timeouts.js +27 -0
  6. package/dist/core/embeddings/embeddings.service.js +122 -2
  7. package/dist/core/embeddings/natural-language-to-cypher.service.js +428 -30
  8. package/dist/core/parsers/parser-factory.js +6 -6
  9. package/dist/core/parsers/typescript-parser.js +639 -44
  10. package/dist/core/parsers/workspace-parser.js +553 -0
  11. package/dist/core/utils/edge-factory.js +37 -0
  12. package/dist/core/utils/file-change-detection.js +105 -0
  13. package/dist/core/utils/file-utils.js +20 -0
  14. package/dist/core/utils/index.js +3 -0
  15. package/dist/core/utils/path-utils.js +75 -0
  16. package/dist/core/utils/progress-reporter.js +112 -0
  17. package/dist/core/utils/project-id.js +176 -0
  18. package/dist/core/utils/retry.js +41 -0
  19. package/dist/core/workspace/index.js +4 -0
  20. package/dist/core/workspace/workspace-detector.js +221 -0
  21. package/dist/mcp/constants.js +172 -7
  22. package/dist/mcp/handlers/cross-file-edge.helpers.js +19 -0
  23. package/dist/mcp/handlers/file-change-detection.js +105 -0
  24. package/dist/mcp/handlers/graph-generator.handler.js +97 -32
  25. package/dist/mcp/handlers/incremental-parse.handler.js +146 -0
  26. package/dist/mcp/handlers/streaming-import.handler.js +210 -0
  27. package/dist/mcp/handlers/traversal.handler.js +130 -71
  28. package/dist/mcp/mcp.server.js +46 -7
  29. package/dist/mcp/service-init.js +79 -0
  30. package/dist/mcp/services/job-manager.js +165 -0
  31. package/dist/mcp/services/watch-manager.js +376 -0
  32. package/dist/mcp/services.js +48 -127
  33. package/dist/mcp/tools/check-parse-status.tool.js +64 -0
  34. package/dist/mcp/tools/impact-analysis.tool.js +319 -0
  35. package/dist/mcp/tools/index.js +15 -1
  36. package/dist/mcp/tools/list-projects.tool.js +62 -0
  37. package/dist/mcp/tools/list-watchers.tool.js +51 -0
  38. package/dist/mcp/tools/natural-language-to-cypher.tool.js +34 -8
  39. package/dist/mcp/tools/parse-typescript-project.tool.js +325 -60
  40. package/dist/mcp/tools/search-codebase.tool.js +57 -23
  41. package/dist/mcp/tools/start-watch-project.tool.js +100 -0
  42. package/dist/mcp/tools/stop-watch-project.tool.js +49 -0
  43. package/dist/mcp/tools/traverse-from-node.tool.js +68 -9
  44. package/dist/mcp/utils.js +35 -12
  45. package/dist/mcp/workers/parse-worker.js +198 -0
  46. package/dist/storage/neo4j/neo4j.service.js +273 -34
  47. package/package.json +4 -2
@@ -2,25 +2,31 @@
2
2
  import crypto from 'crypto';
3
3
  import fs from 'fs/promises';
4
4
  import path from 'node:path';
5
+ import { glob } from 'glob';
5
6
  import { minimatch } from 'minimatch';
6
7
  import { Project, Node } from 'ts-morph';
8
+ import { createFrameworkEdgeData } from '../utils/edge-factory.js';
7
9
  /**
8
10
  * Generate a deterministic node ID based on stable properties.
9
11
  * This ensures the same node gets the same ID across reparses.
10
12
  *
11
- * Identity is based on: coreType + filePath + name (+ parentId for nested nodes)
13
+ * Identity is based on: projectId + coreType + filePath + name (+ parentId for nested nodes)
12
14
  * This is stable because when it matters (one side of edge not reparsed),
13
15
  * names are guaranteed unchanged (or imports would break, triggering reparse).
16
+ *
17
+ * Including projectId ensures nodes from different projects have unique IDs
18
+ * even if they have identical file paths and names.
14
19
  */
15
- const generateDeterministicId = (coreType, filePath, name, parentId) => {
16
- const parts = parentId ? [coreType, filePath, parentId, name] : [coreType, filePath, name];
20
+ const generateDeterministicId = (projectId, coreType, filePath, name, parentId) => {
21
+ const parts = parentId ? [projectId, coreType, filePath, parentId, name] : [projectId, coreType, filePath, name];
17
22
  const identity = parts.join('::');
18
23
  const hash = crypto.createHash('sha256').update(identity).digest('hex').substring(0, 16);
19
- return `${coreType}:${hash}`;
24
+ return `${projectId}:${coreType}:${hash}`;
20
25
  };
21
- import { hashFile } from '../../utils/file-utils.js';
26
+ import { debugLog, hashFile } from '../utils/file-utils.js';
22
27
  import { NESTJS_FRAMEWORK_SCHEMA } from '../config/nestjs-framework-schema.js';
23
28
  import { CoreNodeType, CORE_TYPESCRIPT_SCHEMA, DEFAULT_PARSE_OPTIONS, CoreEdgeType, } from '../config/schema.js';
29
+ import { resolveProjectId } from '../utils/project-id.js';
24
30
  export class TypeScriptParser {
25
31
  workspacePath;
26
32
  tsConfigPath;
@@ -33,24 +39,63 @@ export class TypeScriptParser {
33
39
  existingNodes = new Map(); // Nodes from Neo4j for edge target matching
34
40
  deferredEdges = [];
35
41
  sharedContext = new Map(); // Shared context for custom data
36
- constructor(workspacePath, tsConfigPath = 'tsconfig.json', coreSchema = CORE_TYPESCRIPT_SCHEMA, frameworkSchemas = [NESTJS_FRAMEWORK_SCHEMA], parseConfig = DEFAULT_PARSE_OPTIONS) {
42
+ projectId; // Project identifier for multi-project isolation
43
+ lazyLoad; // Whether to use lazy file loading for large projects
44
+ discoveredFiles = null; // Cached file discovery results
45
+ deferEdgeEnhancements = false; // When true, skip edge enhancements (parent will handle)
46
+ constructor(workspacePath, tsConfigPath = 'tsconfig.json', coreSchema = CORE_TYPESCRIPT_SCHEMA, frameworkSchemas = [NESTJS_FRAMEWORK_SCHEMA], parseConfig = DEFAULT_PARSE_OPTIONS, projectId, // Optional - derived from workspacePath if not provided
47
+ lazyLoad = false) {
37
48
  this.workspacePath = workspacePath;
38
49
  this.tsConfigPath = tsConfigPath;
39
50
  this.coreSchema = coreSchema;
40
51
  this.frameworkSchemas = frameworkSchemas;
41
52
  this.parseConfig = parseConfig;
42
- this.project = new Project({
43
- tsConfigFilePath: tsConfigPath,
44
- skipAddingFilesFromTsConfig: false,
45
- compilerOptions: {
46
- experimentalDecorators: true,
47
- emitDecoratorMetadata: true,
48
- target: 7,
49
- module: 1,
50
- esModuleInterop: true,
51
- },
52
- });
53
- this.project.addSourceFilesAtPaths(path.join(workspacePath, '**/*.ts'));
53
+ this.projectId = resolveProjectId(workspacePath, projectId);
54
+ this.lazyLoad = lazyLoad;
55
+ console.log(`🆔 Project ID: ${this.projectId}`);
56
+ console.log(`📂 Lazy loading: ${lazyLoad ? 'enabled' : 'disabled'}`);
57
+ if (lazyLoad) {
58
+ // Lazy mode: create Project without loading any files
59
+ // Files will be added just-in-time during parseChunk()
60
+ this.project = new Project({
61
+ tsConfigFilePath: tsConfigPath,
62
+ skipAddingFilesFromTsConfig: true, // Don't load files from tsconfig
63
+ skipFileDependencyResolution: true, // Don't load node_modules types
64
+ compilerOptions: {
65
+ experimentalDecorators: true,
66
+ emitDecoratorMetadata: true,
67
+ target: 7,
68
+ module: 1,
69
+ esModuleInterop: true,
70
+ skipLibCheck: true,
71
+ },
72
+ });
73
+ }
74
+ else {
75
+ // Eager mode: load all files upfront (original behavior for small projects)
76
+ this.project = new Project({
77
+ tsConfigFilePath: tsConfigPath,
78
+ skipAddingFilesFromTsConfig: false,
79
+ skipFileDependencyResolution: true,
80
+ compilerOptions: {
81
+ experimentalDecorators: true,
82
+ emitDecoratorMetadata: true,
83
+ target: 7,
84
+ module: 1,
85
+ esModuleInterop: true,
86
+ skipLibCheck: true,
87
+ },
88
+ });
89
+ // Include both .ts and .tsx files
90
+ this.project.addSourceFilesAtPaths(path.join(workspacePath, '**/*.{ts,tsx}'));
91
+ }
92
+ }
93
+ /**
94
+ * Get the projectId for this parser instance.
95
+ * This is used by tools to pass projectId to Neo4j queries.
96
+ */
97
+ getProjectId() {
98
+ return this.projectId;
54
99
  }
55
100
  /**
56
101
  * Set existing nodes from Neo4j for edge target matching during incremental parsing.
@@ -67,6 +112,7 @@ export class TypeScriptParser {
67
112
  labels: node.labels,
68
113
  properties: {
69
114
  id: node.id,
115
+ projectId: this.projectId,
70
116
  name: node.name,
71
117
  coreType: node.coreType,
72
118
  filePath: node.filePath,
@@ -81,8 +127,20 @@ export class TypeScriptParser {
81
127
  async parseWorkspace(filesToParse) {
82
128
  let sourceFiles;
83
129
  if (filesToParse && filesToParse.length > 0) {
130
+ // In lazy mode, files may not be loaded yet - add them if needed
84
131
  sourceFiles = filesToParse
85
- .map((filePath) => this.project.getSourceFile(filePath))
132
+ .map((filePath) => {
133
+ const existing = this.project.getSourceFile(filePath);
134
+ if (existing)
135
+ return existing;
136
+ // Add file to project if not already loaded (lazy mode)
137
+ try {
138
+ return this.project.addSourceFileAtPath(filePath);
139
+ }
140
+ catch {
141
+ return undefined;
142
+ }
143
+ })
86
144
  .filter((sf) => sf !== undefined);
87
145
  }
88
146
  else {
@@ -93,7 +151,7 @@ export class TypeScriptParser {
93
151
  continue;
94
152
  await this.parseCoreTypeScriptV2(sourceFile);
95
153
  }
96
- this.resolveDeferredEdges();
154
+ await this.resolveDeferredEdges();
97
155
  await this.applyContextExtractors();
98
156
  if (this.frameworkSchemas.length > 0) {
99
157
  await this.applyFrameworkEnhancements();
@@ -124,13 +182,45 @@ export class TypeScriptParser {
124
182
  const filePath = sourceFile.getFilePath();
125
183
  const stats = await fs.stat(filePath);
126
184
  const fileTrackingProperties = {
127
- size: stats.size,
128
- mtime: stats.mtimeMs,
185
+ size: Number(stats.size),
186
+ mtime: Number(stats.mtimeMs),
129
187
  contentHash: await hashFile(filePath),
130
188
  };
131
189
  const sourceFileNode = this.createCoreNode(sourceFile, CoreNodeType.SOURCE_FILE, fileTrackingProperties);
132
190
  this.addNode(sourceFileNode);
133
191
  await this.parseChildNodes(this.coreSchema.nodeTypes[CoreNodeType.SOURCE_FILE], sourceFileNode, sourceFile);
192
+ // Queue IMPORTS edges for deferred resolution
193
+ // Note: ImportDeclaration nodes are already created by parseChildNodes via the schema
194
+ // This adds SourceFile → SourceFile IMPORTS edges for cross-file dependency tracking
195
+ for (const importDecl of sourceFile.getImportDeclarations()) {
196
+ const moduleSpecifier = importDecl.getModuleSpecifierValue();
197
+ // Skip external modules (node_modules) - only process relative and scoped imports
198
+ if (!moduleSpecifier.startsWith('.') && !moduleSpecifier.startsWith('@')) {
199
+ continue;
200
+ }
201
+ // Use ts-morph's module resolution to get the actual file path
202
+ // This correctly resolves relative imports like './auth.controller' to absolute paths
203
+ try {
204
+ const targetSourceFile = importDecl.getModuleSpecifierSourceFile();
205
+ if (targetSourceFile) {
206
+ this.deferredEdges.push({
207
+ edgeType: CoreEdgeType.IMPORTS,
208
+ sourceNodeId: sourceFileNode.id,
209
+ targetName: targetSourceFile.getFilePath(), // Store resolved absolute path
210
+ targetType: CoreNodeType.SOURCE_FILE,
211
+ });
212
+ }
213
+ }
214
+ catch {
215
+ // If resolution fails, fall back to raw module specifier
216
+ this.deferredEdges.push({
217
+ edgeType: CoreEdgeType.IMPORTS,
218
+ sourceNodeId: sourceFileNode.id,
219
+ targetName: moduleSpecifier,
220
+ targetType: CoreNodeType.SOURCE_FILE,
221
+ });
222
+ }
223
+ }
134
224
  if (this.shouldParseVariables(sourceFile.getFilePath())) {
135
225
  for (const varStatement of sourceFile.getVariableStatements()) {
136
226
  for (const varDecl of varStatement.getDeclarations()) {
@@ -171,6 +261,14 @@ export class TypeScriptParser {
171
261
  this.addNode(coreNode);
172
262
  const coreEdge = this.createCoreEdge(edgeType, parentNode.id, coreNode.id);
173
263
  this.addEdge(coreEdge);
264
+ const SKELETONIZE_TYPES = new Set([
265
+ CoreNodeType.METHOD_DECLARATION,
266
+ CoreNodeType.FUNCTION_DECLARATION,
267
+ CoreNodeType.PROPERTY_DECLARATION,
268
+ ]);
269
+ if (SKELETONIZE_TYPES.has(type)) {
270
+ this.skeletonizeChildInParent(parentNode, coreNode);
271
+ }
174
272
  const childNodeConfig = this.coreSchema.nodeTypes[type];
175
273
  if (childNodeConfig) {
176
274
  this.queueRelationshipNodes(childNodeConfig, coreNode, child);
@@ -179,6 +277,15 @@ export class TypeScriptParser {
179
277
  }
180
278
  }
181
279
  }
280
+ skeletonizeChildInParent(parent, child) {
281
+ const childText = child.properties.sourceCode;
282
+ const bodyStart = childText.indexOf('{');
283
+ if (bodyStart > -1) {
284
+ const signature = childText.substring(0, bodyStart).trim();
285
+ const placeholder = `${signature} { /* NodeID: ${child.id} */ }`;
286
+ parent.properties.sourceCode = parent.properties.sourceCode.replace(childText, placeholder);
287
+ }
288
+ }
182
289
  /**
183
290
  * Queue relationship edges for deferred processing
184
291
  * These are resolved after all nodes are parsed since the target may not exist yet
@@ -201,15 +308,50 @@ export class TypeScriptParser {
201
308
  const targetName = this.extractRelationshipTargetName(target);
202
309
  if (!targetName)
203
310
  continue;
311
+ // For EXTENDS/IMPLEMENTS, try to get the file path from the resolved declaration
312
+ let targetFilePath;
313
+ if (edgeType === CoreEdgeType.EXTENDS || edgeType === CoreEdgeType.IMPLEMENTS) {
314
+ targetFilePath = this.extractTargetFilePath(target);
315
+ }
204
316
  this.deferredEdges.push({
205
317
  edgeType: edgeType,
206
318
  sourceNodeId: parsedNode.id,
207
319
  targetName,
208
320
  targetType: targetNodeType,
321
+ targetFilePath,
209
322
  });
210
323
  }
211
324
  }
212
325
  }
326
+ /**
327
+ * Extract the file path from a resolved target declaration.
328
+ * Used for EXTENDS/IMPLEMENTS to enable precise matching.
329
+ */
330
+ extractTargetFilePath(target) {
331
+ try {
332
+ // If target is already a ClassDeclaration or InterfaceDeclaration, get its source file
333
+ if (Node.isClassDeclaration(target) || Node.isInterfaceDeclaration(target)) {
334
+ return target.getSourceFile().getFilePath();
335
+ }
336
+ // If target is ExpressionWithTypeArguments (e.g., extends Foo<T>), resolve the type
337
+ if (Node.isExpressionWithTypeArguments(target)) {
338
+ const expression = target.getExpression();
339
+ if (Node.isIdentifier(expression)) {
340
+ // Try to get the definition of the type
341
+ const definitions = expression.getDefinitionNodes();
342
+ for (const def of definitions) {
343
+ if (Node.isClassDeclaration(def) || Node.isInterfaceDeclaration(def)) {
344
+ return def.getSourceFile().getFilePath();
345
+ }
346
+ }
347
+ }
348
+ }
349
+ }
350
+ catch {
351
+ // If resolution fails (e.g., external type), return undefined
352
+ }
353
+ return undefined;
354
+ }
213
355
  /**
214
356
  * Extract the target name from an AST node returned by relationship methods
215
357
  */
@@ -218,15 +360,103 @@ export class TypeScriptParser {
218
360
  return target.getName();
219
361
  if (Node.isInterfaceDeclaration(target))
220
362
  return target.getName();
221
- if (Node.isExpressionWithTypeArguments(target))
222
- return target.getExpression().getText();
363
+ if (Node.isExpressionWithTypeArguments(target)) {
364
+ const expression = target.getExpression();
365
+ const text = expression.getText();
366
+ const genericIndex = text.indexOf('<');
367
+ return genericIndex > 0 ? text.substring(0, genericIndex) : text;
368
+ }
223
369
  return undefined;
224
370
  }
225
371
  /**
226
372
  * Find a parsed node by name and core type
373
+ * For SourceFiles, implements smart import resolution:
374
+ * - Direct file path match
375
+ * - Relative import resolution (./foo, ../bar)
376
+ * - Scoped package imports (@workspace/ui, @ui/core)
377
+ *
378
+ * For ClassDeclaration/InterfaceDeclaration with filePath, uses precise matching.
227
379
  */
228
- findNodeByNameAndType(name, coreType) {
229
- for (const node of this.parsedNodes.values()) {
380
+ findNodeByNameAndType(name, coreType, filePath) {
381
+ // Combine both node collections for searching
382
+ const allNodes = [...this.parsedNodes.values(), ...this.existingNodes.values()];
383
+ // If we have a file path and it's not a SOURCE_FILE, use precise matching first
384
+ if (filePath && coreType !== CoreNodeType.SOURCE_FILE) {
385
+ for (const node of allNodes) {
386
+ if (node.coreType === coreType && node.properties.name === name && node.properties.filePath === filePath) {
387
+ return node;
388
+ }
389
+ }
390
+ // If precise match fails, fall through to name-only matching below
391
+ }
392
+ // For SOURCE_FILE with import specifier, try multiple matching strategies
393
+ if (coreType === CoreNodeType.SOURCE_FILE) {
394
+ // Strategy 1: Direct file path match
395
+ for (const node of allNodes) {
396
+ if (node.coreType === coreType && node.properties.filePath === name) {
397
+ return node;
398
+ }
399
+ }
400
+ // Strategy 2: Resolve relative imports (./foo, ../bar, ../../baz)
401
+ if (name.startsWith('.')) {
402
+ // Normalize: remove all leading ./ or ../ segments (handles ../../foo, ./bar, etc.)
403
+ const normalizedPath = name.replace(/^(\.\.?\/)+/, '');
404
+ // Try matching with common extensions
405
+ const extensions = ['', '.ts', '.tsx', '/index.ts', '/index.tsx'];
406
+ for (const ext of extensions) {
407
+ const searchPath = normalizedPath + ext;
408
+ for (const node of allNodes) {
409
+ if (node.coreType === coreType) {
410
+ // Match if filePath ends with the normalized path
411
+ if (node.properties.filePath.endsWith(searchPath) ||
412
+ node.properties.filePath.endsWith('/' + searchPath)) {
413
+ return node;
414
+ }
415
+ }
416
+ }
417
+ }
418
+ }
419
+ // Strategy 3: Workspace package imports (@workspace/ui, @ui/core)
420
+ if (name.startsWith('@')) {
421
+ const parts = name.split('/');
422
+ const packageName = parts.slice(0, 2).join('/'); // @scope/package
423
+ const subPath = parts.slice(2).join('/'); // rest of path after package name
424
+ // First, try to find an exact match with subpath
425
+ if (subPath) {
426
+ const extensions = ['', '.ts', '.tsx', '/index.ts', '/index.tsx'];
427
+ for (const ext of extensions) {
428
+ const searchPath = subPath + ext;
429
+ for (const node of allNodes) {
430
+ if (node.coreType === coreType && node.properties.packageName === packageName) {
431
+ if (node.properties.filePath.endsWith(searchPath) ||
432
+ node.properties.filePath.endsWith('/' + searchPath)) {
433
+ return node;
434
+ }
435
+ }
436
+ }
437
+ }
438
+ }
439
+ // For bare package imports (@workspace/ui), look for index files
440
+ if (!subPath) {
441
+ for (const node of allNodes) {
442
+ if (node.coreType === coreType && node.properties.packageName === packageName) {
443
+ const fileName = node.properties.name;
444
+ if (fileName === 'index.ts' || fileName === 'index.tsx') {
445
+ return node;
446
+ }
447
+ }
448
+ }
449
+ // If no index file, return any file from the package as a fallback
450
+ for (const node of allNodes) {
451
+ if (node.coreType === coreType && node.properties.packageName === packageName) {
452
+ return node;
453
+ }
454
+ }
455
+ }
456
+ }
457
+ }
458
+ // Default: exact name match (for non-SourceFile types like classes, interfaces)
459
+ for (const node of allNodes) {
230
460
  if (node.coreType === coreType && node.properties.name === name) {
231
461
  return node;
232
462
  }
@@ -236,14 +466,68 @@ export class TypeScriptParser {
236
466
  /**
237
467
  * Resolve deferred edges after all nodes have been parsed
238
468
  */
239
- resolveDeferredEdges() {
469
+ async resolveDeferredEdges() {
470
+ // Count edges by type for logging
471
+ const importsCount = this.deferredEdges.filter((e) => e.edgeType === CoreEdgeType.IMPORTS).length;
472
+ const extendsCount = this.deferredEdges.filter((e) => e.edgeType === CoreEdgeType.EXTENDS).length;
473
+ const implementsCount = this.deferredEdges.filter((e) => e.edgeType === CoreEdgeType.IMPLEMENTS).length;
474
+ let importsResolved = 0;
475
+ let extendsResolved = 0;
476
+ let implementsResolved = 0;
477
+ const unresolvedImports = [];
478
+ const unresolvedExtends = [];
479
+ const unresolvedImplements = [];
240
480
  for (const deferred of this.deferredEdges) {
241
- const targetNode = this.findNodeByNameAndType(deferred.targetName, deferred.targetType);
481
+ // Pass filePath for precise matching (especially important for EXTENDS/IMPLEMENTS)
482
+ const targetNode = this.findNodeByNameAndType(deferred.targetName, deferred.targetType, deferred.targetFilePath);
242
483
  if (targetNode) {
243
484
  const edge = this.createCoreEdge(deferred.edgeType, deferred.sourceNodeId, targetNode.id);
244
485
  this.addEdge(edge);
486
+ // Track resolution by type
487
+ if (deferred.edgeType === CoreEdgeType.IMPORTS) {
488
+ importsResolved++;
489
+ }
490
+ else if (deferred.edgeType === CoreEdgeType.EXTENDS) {
491
+ extendsResolved++;
492
+ }
493
+ else if (deferred.edgeType === CoreEdgeType.IMPLEMENTS) {
494
+ implementsResolved++;
495
+ }
496
+ }
497
+ else {
498
+ // Track unresolved by type
499
+ if (deferred.edgeType === CoreEdgeType.IMPORTS) {
500
+ unresolvedImports.push(deferred.targetName);
501
+ }
502
+ else if (deferred.edgeType === CoreEdgeType.EXTENDS) {
503
+ unresolvedExtends.push(deferred.targetName);
504
+ }
505
+ else if (deferred.edgeType === CoreEdgeType.IMPLEMENTS) {
506
+ unresolvedImplements.push(deferred.targetName);
507
+ }
245
508
  }
246
- // If not found, it's likely an external type (from node_modules) - skip silently
509
+ }
510
+ // Log import resolution stats
511
+ if (importsCount > 0) {
512
+ await debugLog('Import edge resolution', {
513
+ totalImports: importsCount,
514
+ resolved: importsResolved,
515
+ unresolvedCount: unresolvedImports.length,
516
+ unresolvedSample: unresolvedImports.slice(0, 10),
517
+ });
518
+ }
519
+ // Log inheritance (EXTENDS/IMPLEMENTS) resolution stats
520
+ if (extendsCount > 0 || implementsCount > 0) {
521
+ await debugLog('Inheritance edge resolution', {
522
+ extendsQueued: extendsCount,
523
+ extendsResolved,
524
+ extendsUnresolved: unresolvedExtends.length,
525
+ unresolvedExtendsSample: unresolvedExtends.slice(0, 10),
526
+ implementsQueued: implementsCount,
527
+ implementsResolved,
528
+ implementsUnresolved: unresolvedImplements.length,
529
+ unresolvedImplementsSample: unresolvedImplements.slice(0, 10),
530
+ });
247
531
  }
248
532
  this.deferredEdges = [];
249
533
  }
@@ -347,6 +631,23 @@ export class TypeScriptParser {
347
631
  // File contains import relationship
348
632
  const containsEdge = this.createCoreEdge(CoreEdgeType.CONTAINS, sourceFileNode.id, importNode.id);
349
633
  this.addEdge(containsEdge);
634
+ // Try to resolve import to create SourceFile -> SourceFile IMPORTS edge
635
+ try {
636
+ const targetSourceFile = importDecl.getModuleSpecifierSourceFile();
637
+ if (targetSourceFile) {
638
+ const targetFilePath = targetSourceFile.getFilePath();
639
+ // Queue deferred edge - will be resolved after all files are parsed
640
+ this.deferredEdges.push({
641
+ edgeType: CoreEdgeType.IMPORTS,
642
+ sourceNodeId: sourceFileNode.id,
643
+ targetName: targetFilePath, // Use file path as "name" for SourceFiles
644
+ targetType: CoreNodeType.SOURCE_FILE,
645
+ });
646
+ }
647
+ }
648
+ catch {
649
+ // Module resolution failed - external dependency, skip
650
+ }
350
651
  }
351
652
  // Parse variable declarations if framework schema specifies this file should have them parsed
352
653
  if (this.shouldParseVariables(sourceFile.getFilePath())) {
@@ -368,10 +669,11 @@ export class TypeScriptParser {
368
669
  createCoreNode(astNode, coreType, baseProperties = {}, parentId) {
369
670
  const name = this.extractNodeName(astNode, coreType);
370
671
  const filePath = astNode.getSourceFile().getFilePath();
371
- const nodeId = generateDeterministicId(coreType, filePath, name, parentId);
672
+ const nodeId = generateDeterministicId(this.projectId, coreType, filePath, name, parentId);
372
673
  // Extract base properties using schema
373
674
  const properties = {
374
675
  id: nodeId,
676
+ projectId: this.projectId,
375
677
  name,
376
678
  coreType,
377
679
  filePath,
@@ -449,6 +751,7 @@ export class TypeScriptParser {
449
751
  targetNodeId,
450
752
  properties: {
451
753
  coreType: relationshipType,
754
+ projectId: this.projectId,
452
755
  source: 'ast',
453
756
  confidence: 1.0,
454
757
  relationshipWeight,
@@ -600,22 +903,16 @@ export class TypeScriptParser {
600
903
  }
601
904
  }
602
905
  createFrameworkEdge(semanticType, relationshipType, sourceNodeId, targetNodeId, context = {}, relationshipWeight = 0.5) {
603
- // Generate deterministic edge ID based on type + source + target
604
- const edgeIdentity = `${semanticType}::${sourceNodeId}::${targetNodeId}`;
605
- const edgeHash = crypto.createHash('sha256').update(edgeIdentity).digest('hex').substring(0, 16);
606
- const edgeId = `${semanticType}:${edgeHash}`;
607
- const properties = {
608
- coreType: semanticType, // This might need adjustment based on schema
906
+ const { id, properties } = createFrameworkEdgeData({
609
907
  semanticType,
610
- source: 'pattern',
611
- confidence: 0.8,
612
- relationshipWeight,
613
- filePath: '',
614
- createdAt: new Date().toISOString(),
908
+ sourceNodeId,
909
+ targetNodeId,
910
+ projectId: this.projectId,
615
911
  context,
616
- };
912
+ relationshipWeight,
913
+ });
617
914
  return {
618
- id: edgeId,
915
+ id,
619
916
  relationshipType,
620
917
  sourceNodeId,
621
918
  targetNodeId,
@@ -712,11 +1009,29 @@ export class TypeScriptParser {
712
1009
  const excludedNodeTypes = this.parseConfig.excludedNodeTypes ?? [];
713
1010
  return excludedNodeTypes.includes(node.getKindName());
714
1011
  }
1012
+ /**
1013
+ * Safely test if a file path matches a pattern (string or regex).
1014
+ * Falls back to literal string matching if the pattern is an invalid regex.
1015
+ */
1016
+ matchesPattern(filePath, pattern) {
1017
+ // First try literal string match (always safe)
1018
+ if (filePath.includes(pattern)) {
1019
+ return true;
1020
+ }
1021
+ // Then try regex match with error handling
1022
+ try {
1023
+ return new RegExp(pattern).test(filePath);
1024
+ }
1025
+ catch {
1026
+ // Invalid regex pattern - already checked via includes() above
1027
+ return false;
1028
+ }
1029
+ }
715
1030
  shouldSkipFile(sourceFile) {
716
1031
  const filePath = sourceFile.getFilePath();
717
1032
  const excludedPatterns = this.parseConfig.excludePatterns ?? [];
718
1033
  for (const pattern of excludedPatterns) {
719
- if (filePath.includes(pattern) || filePath.match(new RegExp(pattern))) {
1034
+ if (this.matchesPattern(filePath, pattern)) {
720
1035
  return true;
721
1036
  }
722
1037
  }
@@ -778,4 +1093,284 @@ export class TypeScriptParser {
778
1093
  }));
779
1094
  return { nodes, edges };
780
1095
  }
1096
+ // ============================================
1097
+ // CHUNK-AWARE PARSING METHODS
1098
+ // For streaming/chunked parsing of large codebases
1099
+ // ============================================
1100
+ /**
1101
+ * Export current chunk results without clearing internal state.
1102
+ * Use this when importing chunks incrementally.
1103
+ */
1104
+ exportChunkResults() {
1105
+ const nodes = Array.from(this.parsedNodes.values()).map(this.toNeo4jNode);
1106
+ const edges = Array.from(this.parsedEdges.values()).map(this.toNeo4jEdge);
1107
+ return {
1108
+ nodes,
1109
+ edges,
1110
+ deferredEdges: [...this.deferredEdges],
1111
+ };
1112
+ }
1113
+ /**
1114
+ * Clear all parsed data (nodes, edges, deferred edges).
1115
+ * Call this after importing a chunk to free memory.
1116
+ */
1117
+ clearParsedData() {
1118
+ this.parsedNodes.clear();
1119
+ this.parsedEdges.clear();
1120
+ this.deferredEdges = [];
1121
+ }
1122
+ /**
1123
+ * Get count of currently parsed nodes and edges.
1124
+ * Useful for progress reporting.
1125
+ */
1126
+ getCurrentCounts() {
1127
+ return {
1128
+ nodes: this.parsedNodes.size,
1129
+ edges: this.parsedEdges.size,
1130
+ deferredEdges: this.deferredEdges.length,
1131
+ };
1132
+ }
1133
+ /**
1134
+ * Set the shared context for this parser.
1135
+ * Use this to share context across multiple parsers (e.g., in WorkspaceParser).
1136
+ * @param context The shared context map to use
1137
+ */
1138
+ setSharedContext(context) {
1139
+ this.sharedContext = context;
1140
+ }
1141
+ /**
1142
+ * Get the shared context from this parser.
1143
+ * Useful for aggregating context across multiple parsers.
1144
+ */
1145
+ getSharedContext() {
1146
+ return this.sharedContext;
1147
+ }
1148
+ /**
1149
+ * Get all parsed nodes (for cross-parser edge resolution).
1150
+ * Returns the internal Map of ParsedNodes.
1151
+ */
1152
+ getParsedNodes() {
1153
+ return this.parsedNodes;
1154
+ }
1155
+ /**
1156
+ * Get the framework schemas used by this parser.
1157
+ * Useful for WorkspaceParser to apply cross-package edge enhancements.
1158
+ */
1159
+ getFrameworkSchemas() {
1160
+ return this.frameworkSchemas;
1161
+ }
1162
+ /**
1163
+ * Defer edge enhancements to a parent parser (e.g., WorkspaceParser).
1164
+ * When true, parseChunk() will skip applyEdgeEnhancements().
1165
+ * The parent is responsible for calling applyEdgeEnhancementsManually() at the end.
1166
+ */
1167
+ setDeferEdgeEnhancements(defer) {
1168
+ this.deferEdgeEnhancements = defer;
1169
+ }
1170
+ /**
1171
+ * Get list of source files in the project.
1172
+ * In lazy mode, uses glob to discover files without loading them into memory.
1173
+ * Useful for determining total work and creating chunks.
1174
+ */
1175
+ async discoverSourceFiles() {
1176
+ if (this.discoveredFiles !== null) {
1177
+ return this.discoveredFiles;
1178
+ }
1179
+ if (this.lazyLoad) {
1180
+ // Use glob to find files without loading them into ts-morph
1181
+ // Include both .ts and .tsx files
1182
+ const pattern = path.join(this.workspacePath, '**/*.{ts,tsx}');
1183
+ const allFiles = await glob(pattern, {
1184
+ ignore: ['**/node_modules/**', '**/*.d.ts'],
1185
+ absolute: true,
1186
+ });
1187
+ // Apply exclude patterns from parseConfig
1188
+ const excludedPatterns = this.parseConfig.excludePatterns ?? [];
1189
+ this.discoveredFiles = allFiles.filter((filePath) => {
1190
+ for (const excludePattern of excludedPatterns) {
1191
+ if (this.matchesPattern(filePath, excludePattern)) {
1192
+ return false;
1193
+ }
1194
+ }
1195
+ return true;
1196
+ });
1197
+ console.log(`🔍 Discovered ${this.discoveredFiles.length} TypeScript files (lazy mode)`);
1198
+ return this.discoveredFiles;
1199
+ }
1200
+ else {
1201
+ // Eager mode - files are already loaded
1202
+ this.discoveredFiles = this.project
1203
+ .getSourceFiles()
1204
+ .filter((sf) => !this.shouldSkipFile(sf))
1205
+ .map((sf) => sf.getFilePath());
1206
+ return this.discoveredFiles;
1207
+ }
1208
+ }
1209
+ /**
1210
+ * @deprecated Use discoverSourceFiles() instead for async file discovery
1211
+ */
1212
+ getSourceFilePaths() {
1213
+ if (this.lazyLoad) {
1214
+ throw new Error('getSourceFilePaths() is not supported in lazy mode. Use discoverSourceFiles() instead.');
1215
+ }
1216
+ return this.project
1217
+ .getSourceFiles()
1218
+ .filter((sf) => !this.shouldSkipFile(sf))
1219
+ .map((sf) => sf.getFilePath());
1220
+ }
1221
+ /**
1222
+ * Parse a chunk of files without resolving deferred edges.
1223
+ * Use this for streaming parsing where edges are resolved after all chunks.
1224
+ * In lazy mode, files are added to the project just-in-time and removed after parsing.
1225
+ * @param filePaths Specific file paths to parse
1226
+ * @param skipEdgeResolution If true, deferred edges are not resolved (default: false)
1227
+ */
1228
+ async parseChunk(filePaths, skipEdgeResolution = false) {
1229
+ // Declare sourceFiles outside try so it's available in finally
1230
+ const sourceFiles = [];
1231
+ try {
1232
+ if (this.lazyLoad) {
1233
+ // Lazy mode: add files to project just-in-time
1234
+ for (const filePath of filePaths) {
1235
+ try {
1236
+ // Check if file already exists in project (shouldn't happen in lazy mode)
1237
+ // Add the file to the project if not already present
1238
+ const sourceFile = this.project.getSourceFile(filePath) ?? this.project.addSourceFileAtPath(filePath);
1239
+ sourceFiles.push(sourceFile);
1240
+ }
1241
+ catch (error) {
1242
+ console.warn(`Failed to add source file ${filePath}:`, error);
1243
+ }
1244
+ }
1245
+ }
1246
+ else {
1247
+ // Eager mode: files are already loaded
1248
+ const loadedFiles = filePaths
1249
+ .map((filePath) => this.project.getSourceFile(filePath))
1250
+ .filter((sf) => sf !== undefined);
1251
+ sourceFiles.push(...loadedFiles);
1252
+ }
1253
+ for (const sourceFile of sourceFiles) {
1254
+ if (this.shouldSkipFile(sourceFile))
1255
+ continue;
1256
+ await this.parseCoreTypeScriptV2(sourceFile);
1257
+ }
1258
+ // Only resolve edges if not skipping
1259
+ if (!skipEdgeResolution) {
1260
+ await this.resolveDeferredEdges();
1261
+ }
1262
+ await this.applyContextExtractors();
1263
+ if (this.frameworkSchemas.length > 0) {
1264
+ await this.applyFrameworkEnhancements();
1265
+ }
1266
+ // Apply edge enhancements unless deferred to parent (e.g., WorkspaceParser)
1267
+ // When deferred, parent will call applyEdgeEnhancementsManually() at the end
1268
+ // with all accumulated nodes for cross-package edge detection
1269
+ if (!this.deferEdgeEnhancements) {
1270
+ await this.applyEdgeEnhancements();
1271
+ }
1272
+ const neo4jNodes = Array.from(this.parsedNodes.values()).map(this.toNeo4jNode);
1273
+ const neo4jEdges = Array.from(this.parsedEdges.values()).map(this.toNeo4jEdge);
1274
+ return { nodes: neo4jNodes, edges: neo4jEdges };
1275
+ }
1276
+ finally {
1277
+ // Always clean up in lazy mode to prevent memory leaks
1278
+ if (this.lazyLoad) {
1279
+ for (const sourceFile of sourceFiles) {
1280
+ try {
1281
+ this.project.removeSourceFile(sourceFile);
1282
+ }
1283
+ catch {
1284
+ // Ignore errors when removing files
1285
+ }
1286
+ }
1287
+ }
1288
+ }
1289
+ }
1290
+ /**
1291
+ * Resolve deferred edges against both parsed nodes and existing nodes.
1292
+ * Call this after all chunks have been parsed.
1293
+ * @returns Resolved edges
1294
+ */
1295
+ async resolveDeferredEdgesManually() {
1296
+ const resolvedEdges = [];
1297
+ // Count edges by type for logging
1298
+ const extendsCount = this.deferredEdges.filter((e) => e.edgeType === CoreEdgeType.EXTENDS).length;
1299
+ const implementsCount = this.deferredEdges.filter((e) => e.edgeType === CoreEdgeType.IMPLEMENTS).length;
1300
+ let extendsResolved = 0;
1301
+ let implementsResolved = 0;
1302
+ const unresolvedExtends = [];
1303
+ const unresolvedImplements = [];
1304
+ for (const deferred of this.deferredEdges) {
1305
+ // Pass filePath for precise matching (especially important for EXTENDS/IMPLEMENTS)
1306
+ const targetNode = this.findNodeByNameAndType(deferred.targetName, deferred.targetType, deferred.targetFilePath);
1307
+ if (targetNode) {
1308
+ const edge = this.createCoreEdge(deferred.edgeType, deferred.sourceNodeId, targetNode.id);
1309
+ resolvedEdges.push(edge);
1310
+ this.addEdge(edge);
1311
+ if (deferred.edgeType === CoreEdgeType.EXTENDS) {
1312
+ extendsResolved++;
1313
+ }
1314
+ else if (deferred.edgeType === CoreEdgeType.IMPLEMENTS) {
1315
+ implementsResolved++;
1316
+ }
1317
+ }
1318
+ else {
1319
+ if (deferred.edgeType === CoreEdgeType.EXTENDS) {
1320
+ unresolvedExtends.push(deferred.targetName);
1321
+ }
1322
+ else if (deferred.edgeType === CoreEdgeType.IMPLEMENTS) {
1323
+ unresolvedImplements.push(deferred.targetName);
1324
+ }
1325
+ }
1326
+ }
1327
+ // Log inheritance resolution stats
1328
+ if (extendsCount > 0 || implementsCount > 0) {
1329
+ await debugLog('Inheritance edge resolution (manual)', {
1330
+ extendsQueued: extendsCount,
1331
+ extendsResolved,
1332
+ extendsUnresolved: unresolvedExtends.length,
1333
+ unresolvedExtendsSample: unresolvedExtends.slice(0, 10),
1334
+ implementsQueued: implementsCount,
1335
+ implementsResolved,
1336
+ implementsUnresolved: unresolvedImplements.length,
1337
+ unresolvedImplementsSample: unresolvedImplements.slice(0, 10),
1338
+ });
1339
+ }
1340
+ this.deferredEdges = [];
1341
+ return resolvedEdges.map(this.toNeo4jEdge);
1342
+ }
1343
+ /**
1344
+ * Apply edge enhancements on all accumulated nodes.
1345
+ * Call this after all chunks have been parsed for streaming mode.
1346
+ * This allows context-dependent edges (like INTERNAL_API_CALL) to be detected
1347
+ * after all nodes and their context have been collected.
1348
+ * @returns New edges created by edge enhancements
1349
+ */
1350
+ async applyEdgeEnhancementsManually() {
1351
+ const edgeCountBefore = this.parsedEdges.size;
1352
+ console.log(`🔗 Applying edge enhancements on ${this.parsedNodes.size} accumulated nodes...`);
1353
+ await this.applyEdgeEnhancements();
1354
+ const newEdgeCount = this.parsedEdges.size - edgeCountBefore;
1355
+ console.log(` ✅ Created ${newEdgeCount} edges from edge enhancements`);
1356
+ // Return only the new edges (those created by edge enhancements)
1357
+ const allEdges = Array.from(this.parsedEdges.values()).map(this.toNeo4jEdge);
1358
+ return allEdges.slice(edgeCountBefore);
1359
+ }
1360
+ /**
1361
+ * Add nodes to the existing nodes map for cross-chunk edge resolution.
1362
+ * These nodes are considered as potential edge targets but won't be exported.
1363
+ */
1364
+ addExistingNodesFromChunk(nodes) {
1365
+ for (const node of nodes) {
1366
+ const parsedNode = {
1367
+ id: node.id,
1368
+ coreType: node.properties.coreType,
1369
+ semanticType: node.properties.semanticType,
1370
+ labels: node.labels,
1371
+ properties: node.properties,
1372
+ };
1373
+ this.existingNodes.set(node.id, parsedNode);
1374
+ }
1375
+ }
781
1376
  }