code-graph-context 1.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +221 -101
  2. package/dist/core/config/fairsquare-framework-schema.js +47 -60
  3. package/dist/core/config/nestjs-framework-schema.js +11 -1
  4. package/dist/core/config/schema.js +1 -1
  5. package/dist/core/config/timeouts.js +27 -0
  6. package/dist/core/embeddings/embeddings.service.js +122 -2
  7. package/dist/core/embeddings/natural-language-to-cypher.service.js +416 -17
  8. package/dist/core/parsers/parser-factory.js +5 -3
  9. package/dist/core/parsers/typescript-parser.js +614 -45
  10. package/dist/core/parsers/workspace-parser.js +553 -0
  11. package/dist/core/utils/edge-factory.js +37 -0
  12. package/dist/core/utils/file-change-detection.js +105 -0
  13. package/dist/core/utils/file-utils.js +20 -0
  14. package/dist/core/utils/index.js +3 -0
  15. package/dist/core/utils/path-utils.js +75 -0
  16. package/dist/core/utils/progress-reporter.js +112 -0
  17. package/dist/core/utils/project-id.js +176 -0
  18. package/dist/core/utils/retry.js +41 -0
  19. package/dist/core/workspace/index.js +4 -0
  20. package/dist/core/workspace/workspace-detector.js +221 -0
  21. package/dist/mcp/constants.js +153 -5
  22. package/dist/mcp/handlers/cross-file-edge.helpers.js +19 -0
  23. package/dist/mcp/handlers/file-change-detection.js +105 -0
  24. package/dist/mcp/handlers/graph-generator.handler.js +97 -32
  25. package/dist/mcp/handlers/incremental-parse.handler.js +146 -0
  26. package/dist/mcp/handlers/streaming-import.handler.js +210 -0
  27. package/dist/mcp/handlers/traversal.handler.js +130 -71
  28. package/dist/mcp/mcp.server.js +45 -6
  29. package/dist/mcp/service-init.js +79 -0
  30. package/dist/mcp/services/job-manager.js +165 -0
  31. package/dist/mcp/services/watch-manager.js +376 -0
  32. package/dist/mcp/services.js +2 -2
  33. package/dist/mcp/tools/check-parse-status.tool.js +64 -0
  34. package/dist/mcp/tools/impact-analysis.tool.js +84 -18
  35. package/dist/mcp/tools/index.js +13 -1
  36. package/dist/mcp/tools/list-projects.tool.js +62 -0
  37. package/dist/mcp/tools/list-watchers.tool.js +51 -0
  38. package/dist/mcp/tools/natural-language-to-cypher.tool.js +34 -8
  39. package/dist/mcp/tools/parse-typescript-project.tool.js +318 -58
  40. package/dist/mcp/tools/search-codebase.tool.js +56 -16
  41. package/dist/mcp/tools/start-watch-project.tool.js +100 -0
  42. package/dist/mcp/tools/stop-watch-project.tool.js +49 -0
  43. package/dist/mcp/tools/traverse-from-node.tool.js +68 -9
  44. package/dist/mcp/utils.js +35 -13
  45. package/dist/mcp/workers/parse-worker.js +198 -0
  46. package/dist/storage/neo4j/neo4j.service.js +147 -48
  47. package/package.json +4 -2
@@ -2,25 +2,31 @@
2
2
  import crypto from 'crypto';
3
3
  import fs from 'fs/promises';
4
4
  import path from 'node:path';
5
+ import { glob } from 'glob';
5
6
  import { minimatch } from 'minimatch';
6
7
  import { Project, Node } from 'ts-morph';
8
+ import { createFrameworkEdgeData } from '../utils/edge-factory.js';
7
9
  /**
8
10
  * Generate a deterministic node ID based on stable properties.
9
11
  * This ensures the same node gets the same ID across reparses.
10
12
  *
11
- * Identity is based on: coreType + filePath + name (+ parentId for nested nodes)
13
+ * Identity is based on: projectId + coreType + filePath + name (+ parentId for nested nodes)
12
14
  * This is stable because when it matters (one side of edge not reparsed),
13
15
  * names are guaranteed unchanged (or imports would break, triggering reparse).
16
+ *
17
+ * Including projectId ensures nodes from different projects have unique IDs
18
+ * even if they have identical file paths and names.
14
19
  */
15
- const generateDeterministicId = (coreType, filePath, name, parentId) => {
16
- const parts = parentId ? [coreType, filePath, parentId, name] : [coreType, filePath, name];
20
+ const generateDeterministicId = (projectId, coreType, filePath, name, parentId) => {
21
+ const parts = parentId ? [projectId, coreType, filePath, parentId, name] : [projectId, coreType, filePath, name];
17
22
  const identity = parts.join('::');
18
23
  const hash = crypto.createHash('sha256').update(identity).digest('hex').substring(0, 16);
19
- return `${coreType}:${hash}`;
24
+ return `${projectId}:${coreType}:${hash}`;
20
25
  };
21
- import { hashFile } from '../../utils/file-utils.js';
26
+ import { debugLog, hashFile } from '../utils/file-utils.js';
22
27
  import { NESTJS_FRAMEWORK_SCHEMA } from '../config/nestjs-framework-schema.js';
23
28
  import { CoreNodeType, CORE_TYPESCRIPT_SCHEMA, DEFAULT_PARSE_OPTIONS, CoreEdgeType, } from '../config/schema.js';
29
+ import { resolveProjectId } from '../utils/project-id.js';
24
30
  export class TypeScriptParser {
25
31
  workspacePath;
26
32
  tsConfigPath;
@@ -33,24 +39,63 @@ export class TypeScriptParser {
33
39
  existingNodes = new Map(); // Nodes from Neo4j for edge target matching
34
40
  deferredEdges = [];
35
41
  sharedContext = new Map(); // Shared context for custom data
36
- constructor(workspacePath, tsConfigPath = 'tsconfig.json', coreSchema = CORE_TYPESCRIPT_SCHEMA, frameworkSchemas = [NESTJS_FRAMEWORK_SCHEMA], parseConfig = DEFAULT_PARSE_OPTIONS) {
42
+ projectId; // Project identifier for multi-project isolation
43
+ lazyLoad; // Whether to use lazy file loading for large projects
44
+ discoveredFiles = null; // Cached file discovery results
45
+ deferEdgeEnhancements = false; // When true, skip edge enhancements (parent will handle)
46
+ constructor(workspacePath, tsConfigPath = 'tsconfig.json', coreSchema = CORE_TYPESCRIPT_SCHEMA, frameworkSchemas = [NESTJS_FRAMEWORK_SCHEMA], parseConfig = DEFAULT_PARSE_OPTIONS, projectId, // Optional - derived from workspacePath if not provided
47
+ lazyLoad = false) {
37
48
  this.workspacePath = workspacePath;
38
49
  this.tsConfigPath = tsConfigPath;
39
50
  this.coreSchema = coreSchema;
40
51
  this.frameworkSchemas = frameworkSchemas;
41
52
  this.parseConfig = parseConfig;
42
- this.project = new Project({
43
- tsConfigFilePath: tsConfigPath,
44
- skipAddingFilesFromTsConfig: false,
45
- compilerOptions: {
46
- experimentalDecorators: true,
47
- emitDecoratorMetadata: true,
48
- target: 7,
49
- module: 1,
50
- esModuleInterop: true,
51
- },
52
- });
53
- this.project.addSourceFilesAtPaths(path.join(workspacePath, '**/*.ts'));
53
+ this.projectId = resolveProjectId(workspacePath, projectId);
54
+ this.lazyLoad = lazyLoad;
55
+ console.log(`🆔 Project ID: ${this.projectId}`);
56
+ console.log(`📂 Lazy loading: ${lazyLoad ? 'enabled' : 'disabled'}`);
57
+ if (lazyLoad) {
58
+ // Lazy mode: create Project without loading any files
59
+ // Files will be added just-in-time during parseChunk()
60
+ this.project = new Project({
61
+ tsConfigFilePath: tsConfigPath,
62
+ skipAddingFilesFromTsConfig: true, // Don't load files from tsconfig
63
+ skipFileDependencyResolution: true, // Don't load node_modules types
64
+ compilerOptions: {
65
+ experimentalDecorators: true,
66
+ emitDecoratorMetadata: true,
67
+ target: 7,
68
+ module: 1,
69
+ esModuleInterop: true,
70
+ skipLibCheck: true,
71
+ },
72
+ });
73
+ }
74
+ else {
75
+ // Eager mode: load all files upfront (original behavior for small projects)
76
+ this.project = new Project({
77
+ tsConfigFilePath: tsConfigPath,
78
+ skipAddingFilesFromTsConfig: false,
79
+ skipFileDependencyResolution: true,
80
+ compilerOptions: {
81
+ experimentalDecorators: true,
82
+ emitDecoratorMetadata: true,
83
+ target: 7,
84
+ module: 1,
85
+ esModuleInterop: true,
86
+ skipLibCheck: true,
87
+ },
88
+ });
89
+ // Include both .ts and .tsx files
90
+ this.project.addSourceFilesAtPaths(path.join(workspacePath, '**/*.{ts,tsx}'));
91
+ }
92
+ }
93
+ /**
94
+ * Get the projectId for this parser instance.
95
+ * This is used by tools to pass projectId to Neo4j queries.
96
+ */
97
+ getProjectId() {
98
+ return this.projectId;
54
99
  }
55
100
  /**
56
101
  * Set existing nodes from Neo4j for edge target matching during incremental parsing.
@@ -67,6 +112,7 @@ export class TypeScriptParser {
67
112
  labels: node.labels,
68
113
  properties: {
69
114
  id: node.id,
115
+ projectId: this.projectId,
70
116
  name: node.name,
71
117
  coreType: node.coreType,
72
118
  filePath: node.filePath,
@@ -81,8 +127,20 @@ export class TypeScriptParser {
81
127
  async parseWorkspace(filesToParse) {
82
128
  let sourceFiles;
83
129
  if (filesToParse && filesToParse.length > 0) {
130
+ // In lazy mode, files may not be loaded yet - add them if needed
84
131
  sourceFiles = filesToParse
85
- .map((filePath) => this.project.getSourceFile(filePath))
132
+ .map((filePath) => {
133
+ const existing = this.project.getSourceFile(filePath);
134
+ if (existing)
135
+ return existing;
136
+ // Add file to project if not already loaded (lazy mode)
137
+ try {
138
+ return this.project.addSourceFileAtPath(filePath);
139
+ }
140
+ catch {
141
+ return undefined;
142
+ }
143
+ })
86
144
  .filter((sf) => sf !== undefined);
87
145
  }
88
146
  else {
@@ -93,7 +151,7 @@ export class TypeScriptParser {
93
151
  continue;
94
152
  await this.parseCoreTypeScriptV2(sourceFile);
95
153
  }
96
- this.resolveDeferredEdges();
154
+ await this.resolveDeferredEdges();
97
155
  await this.applyContextExtractors();
98
156
  if (this.frameworkSchemas.length > 0) {
99
157
  await this.applyFrameworkEnhancements();
@@ -124,13 +182,45 @@ export class TypeScriptParser {
124
182
  const filePath = sourceFile.getFilePath();
125
183
  const stats = await fs.stat(filePath);
126
184
  const fileTrackingProperties = {
127
- size: stats.size,
128
- mtime: stats.mtimeMs,
185
+ size: Number(stats.size),
186
+ mtime: Number(stats.mtimeMs),
129
187
  contentHash: await hashFile(filePath),
130
188
  };
131
189
  const sourceFileNode = this.createCoreNode(sourceFile, CoreNodeType.SOURCE_FILE, fileTrackingProperties);
132
190
  this.addNode(sourceFileNode);
133
191
  await this.parseChildNodes(this.coreSchema.nodeTypes[CoreNodeType.SOURCE_FILE], sourceFileNode, sourceFile);
192
+ // Queue IMPORTS edges for deferred resolution
193
+ // Note: ImportDeclaration nodes are already created by parseChildNodes via the schema
194
+ // This adds SourceFile → SourceFile IMPORTS edges for cross-file dependency tracking
195
+ for (const importDecl of sourceFile.getImportDeclarations()) {
196
+ const moduleSpecifier = importDecl.getModuleSpecifierValue();
197
+ // Skip external modules (node_modules) - only process relative and scoped imports
198
+ if (!moduleSpecifier.startsWith('.') && !moduleSpecifier.startsWith('@')) {
199
+ continue;
200
+ }
201
+ // Use ts-morph's module resolution to get the actual file path
202
+ // This correctly resolves relative imports like './auth.controller' to absolute paths
203
+ try {
204
+ const targetSourceFile = importDecl.getModuleSpecifierSourceFile();
205
+ if (targetSourceFile) {
206
+ this.deferredEdges.push({
207
+ edgeType: CoreEdgeType.IMPORTS,
208
+ sourceNodeId: sourceFileNode.id,
209
+ targetName: targetSourceFile.getFilePath(), // Store resolved absolute path
210
+ targetType: CoreNodeType.SOURCE_FILE,
211
+ });
212
+ }
213
+ }
214
+ catch {
215
+ // If resolution fails, fall back to raw module specifier
216
+ this.deferredEdges.push({
217
+ edgeType: CoreEdgeType.IMPORTS,
218
+ sourceNodeId: sourceFileNode.id,
219
+ targetName: moduleSpecifier,
220
+ targetType: CoreNodeType.SOURCE_FILE,
221
+ });
222
+ }
223
+ }
134
224
  if (this.shouldParseVariables(sourceFile.getFilePath())) {
135
225
  for (const varStatement of sourceFile.getVariableStatements()) {
136
226
  for (const varDecl of varStatement.getDeclarations()) {
@@ -218,15 +308,50 @@ export class TypeScriptParser {
218
308
  const targetName = this.extractRelationshipTargetName(target);
219
309
  if (!targetName)
220
310
  continue;
311
+ // For EXTENDS/IMPLEMENTS, try to get the file path from the resolved declaration
312
+ let targetFilePath;
313
+ if (edgeType === CoreEdgeType.EXTENDS || edgeType === CoreEdgeType.IMPLEMENTS) {
314
+ targetFilePath = this.extractTargetFilePath(target);
315
+ }
221
316
  this.deferredEdges.push({
222
317
  edgeType: edgeType,
223
318
  sourceNodeId: parsedNode.id,
224
319
  targetName,
225
320
  targetType: targetNodeType,
321
+ targetFilePath,
226
322
  });
227
323
  }
228
324
  }
229
325
  }
326
+ /**
327
+ * Extract the file path from a resolved target declaration.
328
+ * Used for EXTENDS/IMPLEMENTS to enable precise matching.
329
+ */
330
+ extractTargetFilePath(target) {
331
+ try {
332
+ // If target is already a ClassDeclaration or InterfaceDeclaration, get its source file
333
+ if (Node.isClassDeclaration(target) || Node.isInterfaceDeclaration(target)) {
334
+ return target.getSourceFile().getFilePath();
335
+ }
336
+ // If target is ExpressionWithTypeArguments (e.g., extends Foo<T>), resolve the type
337
+ if (Node.isExpressionWithTypeArguments(target)) {
338
+ const expression = target.getExpression();
339
+ if (Node.isIdentifier(expression)) {
340
+ // Try to get the definition of the type
341
+ const definitions = expression.getDefinitionNodes();
342
+ for (const def of definitions) {
343
+ if (Node.isClassDeclaration(def) || Node.isInterfaceDeclaration(def)) {
344
+ return def.getSourceFile().getFilePath();
345
+ }
346
+ }
347
+ }
348
+ }
349
+ }
350
+ catch {
351
+ // If resolution fails (e.g., external type), return undefined
352
+ }
353
+ return undefined;
354
+ }
230
355
  /**
231
356
  * Extract the target name from an AST node returned by relationship methods
232
357
  */
@@ -245,14 +370,93 @@ export class TypeScriptParser {
245
370
  }
246
371
  /**
247
372
  * Find a parsed node by name and core type
373
+ * For SourceFiles, implements smart import resolution:
374
+ * - Direct file path match
375
+ * - Relative import resolution (./foo, ../bar)
376
+ * - Scoped package imports (@workspace/ui, @ui/core)
377
+ *
378
+ * For ClassDeclaration/InterfaceDeclaration with filePath, uses precise matching.
248
379
  */
249
- findNodeByNameAndType(name, coreType) {
250
- for (const node of this.parsedNodes.values()) {
251
- if (node.coreType === coreType && node.properties.name === name) {
252
- return node;
380
+ findNodeByNameAndType(name, coreType, filePath) {
381
+ // Combine both node collections for searching
382
+ const allNodes = [...this.parsedNodes.values(), ...this.existingNodes.values()];
383
+ // If we have a file path and it's not a SOURCE_FILE, use precise matching first
384
+ if (filePath && coreType !== CoreNodeType.SOURCE_FILE) {
385
+ for (const node of allNodes) {
386
+ if (node.coreType === coreType && node.properties.name === name && node.properties.filePath === filePath) {
387
+ return node;
388
+ }
253
389
  }
390
+ // If precise match fails, fall through to name-only matching below
254
391
  }
255
- for (const node of this.existingNodes.values()) {
392
+ // For SOURCE_FILE with import specifier, try multiple matching strategies
393
+ if (coreType === CoreNodeType.SOURCE_FILE) {
394
+ // Strategy 1: Direct file path match
395
+ for (const node of allNodes) {
396
+ if (node.coreType === coreType && node.properties.filePath === name) {
397
+ return node;
398
+ }
399
+ }
400
+ // Strategy 2: Resolve relative imports (./foo, ../bar, ../../baz)
401
+ if (name.startsWith('.')) {
402
+ // Normalize: remove all leading ./ or ../ segments (handles ../../foo, ./bar, etc.)
403
+ const normalizedPath = name.replace(/^(\.\.?\/)+/, '');
404
+ // Try matching with common extensions
405
+ const extensions = ['', '.ts', '.tsx', '/index.ts', '/index.tsx'];
406
+ for (const ext of extensions) {
407
+ const searchPath = normalizedPath + ext;
408
+ for (const node of allNodes) {
409
+ if (node.coreType === coreType) {
410
+ // Match if filePath ends with the normalized path
411
+ if (node.properties.filePath.endsWith(searchPath) ||
412
+ node.properties.filePath.endsWith('/' + searchPath)) {
413
+ return node;
414
+ }
415
+ }
416
+ }
417
+ }
418
+ }
419
+ // Strategy 3: Workspace package imports (@workspace/ui, @ui/core)
420
+ if (name.startsWith('@')) {
421
+ const parts = name.split('/');
422
+ const packageName = parts.slice(0, 2).join('/'); // @scope/package
423
+ const subPath = parts.slice(2).join('/'); // rest of path after package name
424
+ // First, try to find an exact match with subpath
425
+ if (subPath) {
426
+ const extensions = ['', '.ts', '.tsx', '/index.ts', '/index.tsx'];
427
+ for (const ext of extensions) {
428
+ const searchPath = subPath + ext;
429
+ for (const node of allNodes) {
430
+ if (node.coreType === coreType && node.properties.packageName === packageName) {
431
+ if (node.properties.filePath.endsWith(searchPath) ||
432
+ node.properties.filePath.endsWith('/' + searchPath)) {
433
+ return node;
434
+ }
435
+ }
436
+ }
437
+ }
438
+ }
439
+ // For bare package imports (@workspace/ui), look for index files
440
+ if (!subPath) {
441
+ for (const node of allNodes) {
442
+ if (node.coreType === coreType && node.properties.packageName === packageName) {
443
+ const fileName = node.properties.name;
444
+ if (fileName === 'index.ts' || fileName === 'index.tsx') {
445
+ return node;
446
+ }
447
+ }
448
+ }
449
+ // If no index file, return any file from the package as a fallback
450
+ for (const node of allNodes) {
451
+ if (node.coreType === coreType && node.properties.packageName === packageName) {
452
+ return node;
453
+ }
454
+ }
455
+ }
456
+ }
457
+ }
458
+ // Default: exact name match (for non-SourceFile types like classes, interfaces)
459
+ for (const node of allNodes) {
256
460
  if (node.coreType === coreType && node.properties.name === name) {
257
461
  return node;
258
462
  }
@@ -262,14 +466,68 @@ export class TypeScriptParser {
262
466
  /**
263
467
  * Resolve deferred edges after all nodes have been parsed
264
468
  */
265
- resolveDeferredEdges() {
469
+ async resolveDeferredEdges() {
470
+ // Count edges by type for logging
471
+ const importsCount = this.deferredEdges.filter((e) => e.edgeType === CoreEdgeType.IMPORTS).length;
472
+ const extendsCount = this.deferredEdges.filter((e) => e.edgeType === CoreEdgeType.EXTENDS).length;
473
+ const implementsCount = this.deferredEdges.filter((e) => e.edgeType === CoreEdgeType.IMPLEMENTS).length;
474
+ let importsResolved = 0;
475
+ let extendsResolved = 0;
476
+ let implementsResolved = 0;
477
+ const unresolvedImports = [];
478
+ const unresolvedExtends = [];
479
+ const unresolvedImplements = [];
266
480
  for (const deferred of this.deferredEdges) {
267
- const targetNode = this.findNodeByNameAndType(deferred.targetName, deferred.targetType);
481
+ // Pass filePath for precise matching (especially important for EXTENDS/IMPLEMENTS)
482
+ const targetNode = this.findNodeByNameAndType(deferred.targetName, deferred.targetType, deferred.targetFilePath);
268
483
  if (targetNode) {
269
484
  const edge = this.createCoreEdge(deferred.edgeType, deferred.sourceNodeId, targetNode.id);
270
485
  this.addEdge(edge);
486
+ // Track resolution by type
487
+ if (deferred.edgeType === CoreEdgeType.IMPORTS) {
488
+ importsResolved++;
489
+ }
490
+ else if (deferred.edgeType === CoreEdgeType.EXTENDS) {
491
+ extendsResolved++;
492
+ }
493
+ else if (deferred.edgeType === CoreEdgeType.IMPLEMENTS) {
494
+ implementsResolved++;
495
+ }
496
+ }
497
+ else {
498
+ // Track unresolved by type
499
+ if (deferred.edgeType === CoreEdgeType.IMPORTS) {
500
+ unresolvedImports.push(deferred.targetName);
501
+ }
502
+ else if (deferred.edgeType === CoreEdgeType.EXTENDS) {
503
+ unresolvedExtends.push(deferred.targetName);
504
+ }
505
+ else if (deferred.edgeType === CoreEdgeType.IMPLEMENTS) {
506
+ unresolvedImplements.push(deferred.targetName);
507
+ }
271
508
  }
272
- // If not found, it's likely an external type (from node_modules) - skip silently
509
+ }
510
+ // Log import resolution stats
511
+ if (importsCount > 0) {
512
+ await debugLog('Import edge resolution', {
513
+ totalImports: importsCount,
514
+ resolved: importsResolved,
515
+ unresolvedCount: unresolvedImports.length,
516
+ unresolvedSample: unresolvedImports.slice(0, 10),
517
+ });
518
+ }
519
+ // Log inheritance (EXTENDS/IMPLEMENTS) resolution stats
520
+ if (extendsCount > 0 || implementsCount > 0) {
521
+ await debugLog('Inheritance edge resolution', {
522
+ extendsQueued: extendsCount,
523
+ extendsResolved,
524
+ extendsUnresolved: unresolvedExtends.length,
525
+ unresolvedExtendsSample: unresolvedExtends.slice(0, 10),
526
+ implementsQueued: implementsCount,
527
+ implementsResolved,
528
+ implementsUnresolved: unresolvedImplements.length,
529
+ unresolvedImplementsSample: unresolvedImplements.slice(0, 10),
530
+ });
273
531
  }
274
532
  this.deferredEdges = [];
275
533
  }
@@ -373,6 +631,23 @@ export class TypeScriptParser {
373
631
  // File contains import relationship
374
632
  const containsEdge = this.createCoreEdge(CoreEdgeType.CONTAINS, sourceFileNode.id, importNode.id);
375
633
  this.addEdge(containsEdge);
634
+ // Try to resolve import to create SourceFile -> SourceFile IMPORTS edge
635
+ try {
636
+ const targetSourceFile = importDecl.getModuleSpecifierSourceFile();
637
+ if (targetSourceFile) {
638
+ const targetFilePath = targetSourceFile.getFilePath();
639
+ // Queue deferred edge - will be resolved after all files are parsed
640
+ this.deferredEdges.push({
641
+ edgeType: CoreEdgeType.IMPORTS,
642
+ sourceNodeId: sourceFileNode.id,
643
+ targetName: targetFilePath, // Use file path as "name" for SourceFiles
644
+ targetType: CoreNodeType.SOURCE_FILE,
645
+ });
646
+ }
647
+ }
648
+ catch {
649
+ // Module resolution failed - external dependency, skip
650
+ }
376
651
  }
377
652
  // Parse variable declarations if framework schema specifies this file should have them parsed
378
653
  if (this.shouldParseVariables(sourceFile.getFilePath())) {
@@ -394,10 +669,11 @@ export class TypeScriptParser {
394
669
  createCoreNode(astNode, coreType, baseProperties = {}, parentId) {
395
670
  const name = this.extractNodeName(astNode, coreType);
396
671
  const filePath = astNode.getSourceFile().getFilePath();
397
- const nodeId = generateDeterministicId(coreType, filePath, name, parentId);
672
+ const nodeId = generateDeterministicId(this.projectId, coreType, filePath, name, parentId);
398
673
  // Extract base properties using schema
399
674
  const properties = {
400
675
  id: nodeId,
676
+ projectId: this.projectId,
401
677
  name,
402
678
  coreType,
403
679
  filePath,
@@ -475,6 +751,7 @@ export class TypeScriptParser {
475
751
  targetNodeId,
476
752
  properties: {
477
753
  coreType: relationshipType,
754
+ projectId: this.projectId,
478
755
  source: 'ast',
479
756
  confidence: 1.0,
480
757
  relationshipWeight,
@@ -626,22 +903,16 @@ export class TypeScriptParser {
626
903
  }
627
904
  }
628
905
  createFrameworkEdge(semanticType, relationshipType, sourceNodeId, targetNodeId, context = {}, relationshipWeight = 0.5) {
629
- // Generate deterministic edge ID based on type + source + target
630
- const edgeIdentity = `${semanticType}::${sourceNodeId}::${targetNodeId}`;
631
- const edgeHash = crypto.createHash('sha256').update(edgeIdentity).digest('hex').substring(0, 16);
632
- const edgeId = `${semanticType}:${edgeHash}`;
633
- const properties = {
634
- coreType: semanticType, // This might need adjustment based on schema
906
+ const { id, properties } = createFrameworkEdgeData({
635
907
  semanticType,
636
- source: 'pattern',
637
- confidence: 0.8,
638
- relationshipWeight,
639
- filePath: '',
640
- createdAt: new Date().toISOString(),
908
+ sourceNodeId,
909
+ targetNodeId,
910
+ projectId: this.projectId,
641
911
  context,
642
- };
912
+ relationshipWeight,
913
+ });
643
914
  return {
644
- id: edgeId,
915
+ id,
645
916
  relationshipType,
646
917
  sourceNodeId,
647
918
  targetNodeId,
@@ -738,11 +1009,29 @@ export class TypeScriptParser {
738
1009
  const excludedNodeTypes = this.parseConfig.excludedNodeTypes ?? [];
739
1010
  return excludedNodeTypes.includes(node.getKindName());
740
1011
  }
1012
+ /**
1013
+ * Safely test if a file path matches a pattern (string or regex).
1014
+ * Falls back to literal string matching if the pattern is an invalid regex.
1015
+ */
1016
+ matchesPattern(filePath, pattern) {
1017
+ // First try literal string match (always safe)
1018
+ if (filePath.includes(pattern)) {
1019
+ return true;
1020
+ }
1021
+ // Then try regex match with error handling
1022
+ try {
1023
+ return new RegExp(pattern).test(filePath);
1024
+ }
1025
+ catch {
1026
+ // Invalid regex pattern - already checked via includes() above
1027
+ return false;
1028
+ }
1029
+ }
741
1030
  shouldSkipFile(sourceFile) {
742
1031
  const filePath = sourceFile.getFilePath();
743
1032
  const excludedPatterns = this.parseConfig.excludePatterns ?? [];
744
1033
  for (const pattern of excludedPatterns) {
745
- if (filePath.includes(pattern) || filePath.match(new RegExp(pattern))) {
1034
+ if (this.matchesPattern(filePath, pattern)) {
746
1035
  return true;
747
1036
  }
748
1037
  }
@@ -804,4 +1093,284 @@ export class TypeScriptParser {
804
1093
  }));
805
1094
  return { nodes, edges };
806
1095
  }
1096
+ // ============================================
1097
+ // CHUNK-AWARE PARSING METHODS
1098
+ // For streaming/chunked parsing of large codebases
1099
+ // ============================================
1100
+ /**
1101
+ * Export current chunk results without clearing internal state.
1102
+ * Use this when importing chunks incrementally.
1103
+ */
1104
+ exportChunkResults() {
1105
+ const nodes = Array.from(this.parsedNodes.values()).map(this.toNeo4jNode);
1106
+ const edges = Array.from(this.parsedEdges.values()).map(this.toNeo4jEdge);
1107
+ return {
1108
+ nodes,
1109
+ edges,
1110
+ deferredEdges: [...this.deferredEdges],
1111
+ };
1112
+ }
1113
+ /**
1114
+ * Clear all parsed data (nodes, edges, deferred edges).
1115
+ * Call this after importing a chunk to free memory.
1116
+ */
1117
+ clearParsedData() {
1118
+ this.parsedNodes.clear();
1119
+ this.parsedEdges.clear();
1120
+ this.deferredEdges = [];
1121
+ }
1122
+ /**
1123
+ * Get count of currently parsed nodes and edges.
1124
+ * Useful for progress reporting.
1125
+ */
1126
+ getCurrentCounts() {
1127
+ return {
1128
+ nodes: this.parsedNodes.size,
1129
+ edges: this.parsedEdges.size,
1130
+ deferredEdges: this.deferredEdges.length,
1131
+ };
1132
+ }
1133
+ /**
1134
+ * Set the shared context for this parser.
1135
+ * Use this to share context across multiple parsers (e.g., in WorkspaceParser).
1136
+ * @param context The shared context map to use
1137
+ */
1138
+ setSharedContext(context) {
1139
+ this.sharedContext = context;
1140
+ }
1141
+ /**
1142
+ * Get the shared context from this parser.
1143
+ * Useful for aggregating context across multiple parsers.
1144
+ */
1145
+ getSharedContext() {
1146
+ return this.sharedContext;
1147
+ }
1148
+ /**
1149
+ * Get all parsed nodes (for cross-parser edge resolution).
1150
+ * Returns the internal Map of ParsedNodes.
1151
+ */
1152
+ getParsedNodes() {
1153
+ return this.parsedNodes;
1154
+ }
1155
+ /**
1156
+ * Get the framework schemas used by this parser.
1157
+ * Useful for WorkspaceParser to apply cross-package edge enhancements.
1158
+ */
1159
+ getFrameworkSchemas() {
1160
+ return this.frameworkSchemas;
1161
+ }
1162
+ /**
1163
+ * Defer edge enhancements to a parent parser (e.g., WorkspaceParser).
1164
+ * When true, parseChunk() will skip applyEdgeEnhancements().
1165
+ * The parent is responsible for calling applyEdgeEnhancementsManually() at the end.
1166
+ */
1167
+ setDeferEdgeEnhancements(defer) {
1168
+ this.deferEdgeEnhancements = defer;
1169
+ }
1170
+ /**
1171
+ * Get list of source files in the project.
1172
+ * In lazy mode, uses glob to discover files without loading them into memory.
1173
+ * Useful for determining total work and creating chunks.
1174
+ */
1175
+ async discoverSourceFiles() {
1176
+ if (this.discoveredFiles !== null) {
1177
+ return this.discoveredFiles;
1178
+ }
1179
+ if (this.lazyLoad) {
1180
+ // Use glob to find files without loading them into ts-morph
1181
+ // Include both .ts and .tsx files
1182
+ const pattern = path.join(this.workspacePath, '**/*.{ts,tsx}');
1183
+ const allFiles = await glob(pattern, {
1184
+ ignore: ['**/node_modules/**', '**/*.d.ts'],
1185
+ absolute: true,
1186
+ });
1187
+ // Apply exclude patterns from parseConfig
1188
+ const excludedPatterns = this.parseConfig.excludePatterns ?? [];
1189
+ this.discoveredFiles = allFiles.filter((filePath) => {
1190
+ for (const excludePattern of excludedPatterns) {
1191
+ if (this.matchesPattern(filePath, excludePattern)) {
1192
+ return false;
1193
+ }
1194
+ }
1195
+ return true;
1196
+ });
1197
+ console.log(`🔍 Discovered ${this.discoveredFiles.length} TypeScript files (lazy mode)`);
1198
+ return this.discoveredFiles;
1199
+ }
1200
+ else {
1201
+ // Eager mode - files are already loaded
1202
+ this.discoveredFiles = this.project
1203
+ .getSourceFiles()
1204
+ .filter((sf) => !this.shouldSkipFile(sf))
1205
+ .map((sf) => sf.getFilePath());
1206
+ return this.discoveredFiles;
1207
+ }
1208
+ }
1209
+ /**
1210
+ * @deprecated Use discoverSourceFiles() instead for async file discovery
1211
+ */
1212
+ getSourceFilePaths() {
1213
+ if (this.lazyLoad) {
1214
+ throw new Error('getSourceFilePaths() is not supported in lazy mode. Use discoverSourceFiles() instead.');
1215
+ }
1216
+ return this.project
1217
+ .getSourceFiles()
1218
+ .filter((sf) => !this.shouldSkipFile(sf))
1219
+ .map((sf) => sf.getFilePath());
1220
+ }
1221
+ /**
1222
+ * Parse a chunk of files without resolving deferred edges.
1223
+ * Use this for streaming parsing where edges are resolved after all chunks.
1224
+ * In lazy mode, files are added to the project just-in-time and removed after parsing.
1225
+ * @param filePaths Specific file paths to parse
1226
+ * @param skipEdgeResolution If true, deferred edges are not resolved (default: false)
1227
+ */
1228
+ async parseChunk(filePaths, skipEdgeResolution = false) {
1229
+ // Declare sourceFiles outside try so it's available in finally
1230
+ const sourceFiles = [];
1231
+ try {
1232
+ if (this.lazyLoad) {
1233
+ // Lazy mode: add files to project just-in-time
1234
+ for (const filePath of filePaths) {
1235
+ try {
1236
+ // Check if file already exists in project (shouldn't happen in lazy mode)
1237
+ // Add the file to the project if not already present
1238
+ const sourceFile = this.project.getSourceFile(filePath) ?? this.project.addSourceFileAtPath(filePath);
1239
+ sourceFiles.push(sourceFile);
1240
+ }
1241
+ catch (error) {
1242
+ console.warn(`Failed to add source file ${filePath}:`, error);
1243
+ }
1244
+ }
1245
+ }
1246
+ else {
1247
+ // Eager mode: files are already loaded
1248
+ const loadedFiles = filePaths
1249
+ .map((filePath) => this.project.getSourceFile(filePath))
1250
+ .filter((sf) => sf !== undefined);
1251
+ sourceFiles.push(...loadedFiles);
1252
+ }
1253
+ for (const sourceFile of sourceFiles) {
1254
+ if (this.shouldSkipFile(sourceFile))
1255
+ continue;
1256
+ await this.parseCoreTypeScriptV2(sourceFile);
1257
+ }
1258
+ // Only resolve edges if not skipping
1259
+ if (!skipEdgeResolution) {
1260
+ await this.resolveDeferredEdges();
1261
+ }
1262
+ await this.applyContextExtractors();
1263
+ if (this.frameworkSchemas.length > 0) {
1264
+ await this.applyFrameworkEnhancements();
1265
+ }
1266
+ // Apply edge enhancements unless deferred to parent (e.g., WorkspaceParser)
1267
+ // When deferred, parent will call applyEdgeEnhancementsManually() at the end
1268
+ // with all accumulated nodes for cross-package edge detection
1269
+ if (!this.deferEdgeEnhancements) {
1270
+ await this.applyEdgeEnhancements();
1271
+ }
1272
+ const neo4jNodes = Array.from(this.parsedNodes.values()).map(this.toNeo4jNode);
1273
+ const neo4jEdges = Array.from(this.parsedEdges.values()).map(this.toNeo4jEdge);
1274
+ return { nodes: neo4jNodes, edges: neo4jEdges };
1275
+ }
1276
+ finally {
1277
+ // Always clean up in lazy mode to prevent memory leaks
1278
+ if (this.lazyLoad) {
1279
+ for (const sourceFile of sourceFiles) {
1280
+ try {
1281
+ this.project.removeSourceFile(sourceFile);
1282
+ }
1283
+ catch {
1284
+ // Ignore errors when removing files
1285
+ }
1286
+ }
1287
+ }
1288
+ }
1289
+ }
1290
+ /**
1291
+ * Resolve deferred edges against both parsed nodes and existing nodes.
1292
+ * Call this after all chunks have been parsed.
1293
+ * @returns Resolved edges
1294
+ */
1295
+ async resolveDeferredEdgesManually() {
1296
+ const resolvedEdges = [];
1297
+ // Count edges by type for logging
1298
+ const extendsCount = this.deferredEdges.filter((e) => e.edgeType === CoreEdgeType.EXTENDS).length;
1299
+ const implementsCount = this.deferredEdges.filter((e) => e.edgeType === CoreEdgeType.IMPLEMENTS).length;
1300
+ let extendsResolved = 0;
1301
+ let implementsResolved = 0;
1302
+ const unresolvedExtends = [];
1303
+ const unresolvedImplements = [];
1304
+ for (const deferred of this.deferredEdges) {
1305
+ // Pass filePath for precise matching (especially important for EXTENDS/IMPLEMENTS)
1306
+ const targetNode = this.findNodeByNameAndType(deferred.targetName, deferred.targetType, deferred.targetFilePath);
1307
+ if (targetNode) {
1308
+ const edge = this.createCoreEdge(deferred.edgeType, deferred.sourceNodeId, targetNode.id);
1309
+ resolvedEdges.push(edge);
1310
+ this.addEdge(edge);
1311
+ if (deferred.edgeType === CoreEdgeType.EXTENDS) {
1312
+ extendsResolved++;
1313
+ }
1314
+ else if (deferred.edgeType === CoreEdgeType.IMPLEMENTS) {
1315
+ implementsResolved++;
1316
+ }
1317
+ }
1318
+ else {
1319
+ if (deferred.edgeType === CoreEdgeType.EXTENDS) {
1320
+ unresolvedExtends.push(deferred.targetName);
1321
+ }
1322
+ else if (deferred.edgeType === CoreEdgeType.IMPLEMENTS) {
1323
+ unresolvedImplements.push(deferred.targetName);
1324
+ }
1325
+ }
1326
+ }
1327
+ // Log inheritance resolution stats
1328
+ if (extendsCount > 0 || implementsCount > 0) {
1329
+ await debugLog('Inheritance edge resolution (manual)', {
1330
+ extendsQueued: extendsCount,
1331
+ extendsResolved,
1332
+ extendsUnresolved: unresolvedExtends.length,
1333
+ unresolvedExtendsSample: unresolvedExtends.slice(0, 10),
1334
+ implementsQueued: implementsCount,
1335
+ implementsResolved,
1336
+ implementsUnresolved: unresolvedImplements.length,
1337
+ unresolvedImplementsSample: unresolvedImplements.slice(0, 10),
1338
+ });
1339
+ }
1340
+ this.deferredEdges = [];
1341
+ return resolvedEdges.map(this.toNeo4jEdge);
1342
+ }
1343
+ /**
1344
+ * Apply edge enhancements on all accumulated nodes.
1345
+ * Call this after all chunks have been parsed for streaming mode.
1346
+ * This allows context-dependent edges (like INTERNAL_API_CALL) to be detected
1347
+ * after all nodes and their context have been collected.
1348
+ * @returns New edges created by edge enhancements
1349
+ */
1350
+ async applyEdgeEnhancementsManually() {
1351
+ const edgeCountBefore = this.parsedEdges.size;
1352
+ console.log(`🔗 Applying edge enhancements on ${this.parsedNodes.size} accumulated nodes...`);
1353
+ await this.applyEdgeEnhancements();
1354
+ const newEdgeCount = this.parsedEdges.size - edgeCountBefore;
1355
+ console.log(` ✅ Created ${newEdgeCount} edges from edge enhancements`);
1356
+ // Return only the new edges (those created by edge enhancements)
1357
+ const allEdges = Array.from(this.parsedEdges.values()).map(this.toNeo4jEdge);
1358
+ return allEdges.slice(edgeCountBefore);
1359
+ }
1360
+ /**
1361
+ * Add nodes to the existing nodes map for cross-chunk edge resolution.
1362
+ * These nodes are considered as potential edge targets but won't be exported.
1363
+ */
1364
+ addExistingNodesFromChunk(nodes) {
1365
+ for (const node of nodes) {
1366
+ const parsedNode = {
1367
+ id: node.id,
1368
+ coreType: node.properties.coreType,
1369
+ semanticType: node.properties.semanticType,
1370
+ labels: node.labels,
1371
+ properties: node.properties,
1372
+ };
1373
+ this.existingNodes.set(node.id, parsedNode);
1374
+ }
1375
+ }
807
1376
  }