code-graph-context 2.0.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +221 -2
  2. package/dist/constants.js +167 -0
  3. package/dist/core/config/fairsquare-framework-schema.js +9 -7
  4. package/dist/core/config/schema.js +41 -2
  5. package/dist/core/embeddings/natural-language-to-cypher.service.js +166 -110
  6. package/dist/core/parsers/typescript-parser.js +1039 -742
  7. package/dist/core/parsers/workspace-parser.js +175 -193
  8. package/dist/core/utils/code-normalizer.js +299 -0
  9. package/dist/core/utils/file-change-detection.js +17 -2
  10. package/dist/core/utils/file-utils.js +40 -5
  11. package/dist/core/utils/graph-factory.js +161 -0
  12. package/dist/core/utils/shared-utils.js +79 -0
  13. package/dist/core/workspace/workspace-detector.js +59 -5
  14. package/dist/mcp/constants.js +261 -8
  15. package/dist/mcp/handlers/graph-generator.handler.js +1 -0
  16. package/dist/mcp/handlers/incremental-parse.handler.js +22 -6
  17. package/dist/mcp/handlers/parallel-import.handler.js +136 -0
  18. package/dist/mcp/handlers/streaming-import.handler.js +14 -59
  19. package/dist/mcp/mcp.server.js +77 -2
  20. package/dist/mcp/services/job-manager.js +5 -8
  21. package/dist/mcp/services/watch-manager.js +64 -25
  22. package/dist/mcp/tools/detect-dead-code.tool.js +413 -0
  23. package/dist/mcp/tools/detect-duplicate-code.tool.js +450 -0
  24. package/dist/mcp/tools/hello.tool.js +16 -2
  25. package/dist/mcp/tools/impact-analysis.tool.js +20 -4
  26. package/dist/mcp/tools/index.js +37 -0
  27. package/dist/mcp/tools/parse-typescript-project.tool.js +15 -14
  28. package/dist/mcp/tools/swarm-cleanup.tool.js +157 -0
  29. package/dist/mcp/tools/swarm-constants.js +35 -0
  30. package/dist/mcp/tools/swarm-pheromone.tool.js +196 -0
  31. package/dist/mcp/tools/swarm-sense.tool.js +212 -0
  32. package/dist/mcp/workers/chunk-worker-pool.js +196 -0
  33. package/dist/mcp/workers/chunk-worker.types.js +4 -0
  34. package/dist/mcp/workers/chunk.worker.js +89 -0
  35. package/dist/mcp/workers/parse-coordinator.js +183 -0
  36. package/dist/mcp/workers/worker.pool.js +54 -0
  37. package/dist/storage/neo4j/neo4j.service.js +198 -14
  38. package/package.json +1 -1
@@ -0,0 +1,54 @@
1
+ import { Worker } from 'worker_threads';
2
+ export class ParallelPool {
3
+ workerPath;
4
+ numWorkers;
5
+ constructor(workerPath, numWorkers = 2) {
6
+ this.workerPath = workerPath;
7
+ this.numWorkers = numWorkers;
8
+ }
9
+ async run(items) {
10
+ const start = Date.now();
11
+ const indexBuffer = new SharedArrayBuffer(4);
12
+ const sharedIndex = new Int32Array(indexBuffer);
13
+ const workerPromises = Array.from({ length: this.numWorkers }, (_, id) => this.spawnWorker(id, items, indexBuffer));
14
+ const workerResults = await Promise.all(workerPromises);
15
+ const results = [];
16
+ const workerTaskCounts = [];
17
+ for (const { results: map, count } of workerResults) {
18
+ workerTaskCounts.push(count);
19
+ for (const [i, result] of map) {
20
+ results[i] = result;
21
+ }
22
+ }
23
+ return {
24
+ results,
25
+ stats: {
26
+ workerTaskCounts,
27
+ totalTasks: items.length,
28
+ totalTimeMs: Date.now() - start,
29
+ },
30
+ };
31
+ }
32
+ spawnWorker(workerId, items, indexBuffer) {
33
+ return new Promise((resolve, reject) => {
34
+ const worker = new Worker(this.workerPath, {
35
+ workerData: {
36
+ items,
37
+ indexBuffer,
38
+ total: items.length,
39
+ workerId,
40
+ },
41
+ });
42
+ worker.on('message', (result) => {
43
+ worker.terminate();
44
+ resolve(result);
45
+ });
46
+ worker.on('error', reject);
47
+ worker.on('exit', (code) => {
48
+ if (code !== 0) {
49
+ reject(new Error(`Worker ${workerId} exited with code ${code}`));
50
+ }
51
+ });
52
+ });
53
+ }
54
+ }
@@ -114,6 +114,8 @@ export const QUERIES = {
114
114
  // Create composite indexes on projectId + id for efficient lookups
115
115
  CREATE_PROJECT_ID_INDEX_EMBEDDED: 'CREATE INDEX project_id_embedded_idx IF NOT EXISTS FOR (n:Embedded) ON (n.projectId, n.id)',
116
116
  CREATE_PROJECT_ID_INDEX_SOURCEFILE: 'CREATE INDEX project_id_sourcefile_idx IF NOT EXISTS FOR (n:SourceFile) ON (n.projectId, n.id)',
117
+ // Create index on normalizedHash for efficient structural duplicate detection
118
+ CREATE_NORMALIZED_HASH_INDEX: 'CREATE INDEX normalized_hash_idx IF NOT EXISTS FOR (n:Embedded) ON (n.normalizedHash)',
117
119
  CREATE_NODE: `
118
120
  UNWIND $nodes AS nodeData
119
121
  CALL apoc.create.node(nodeData.labels, nodeData.properties) YIELD node
@@ -175,13 +177,13 @@ export const QUERIES = {
175
177
  `,
176
178
  // Get cross-file edges before deletion (edges where one endpoint is outside the subgraph)
177
179
  // These will be recreated after import using deterministic IDs
180
+ // Uses filePath matching instead of relationship traversal to avoid following INJECTS/IMPORTS
178
181
  GET_CROSS_FILE_EDGES: `
179
- MATCH (sf:SourceFile)
180
- WHERE sf.filePath IN $filePaths AND sf.projectId = $projectId
181
- OPTIONAL MATCH (sf)-[*]->(child)
182
- WITH collect(DISTINCT sf) + collect(DISTINCT child) AS nodesToDelete
183
- UNWIND nodesToDelete AS n
184
- MATCH (n)-[r]-(other)
182
+ MATCH (n)
183
+ WHERE n.filePath IN $filePaths AND n.projectId = $projectId
184
+ WITH collect(DISTINCT n) AS nodesToDelete
185
+ UNWIND nodesToDelete AS node
186
+ MATCH (node)-[r]-(other)
185
187
  WHERE NOT other IN nodesToDelete AND other.projectId = $projectId
186
188
  RETURN DISTINCT
187
189
  startNode(r).id AS startNodeId,
@@ -190,11 +192,12 @@ export const QUERIES = {
190
192
  properties(r) AS edgeProperties
191
193
  `,
192
194
  // Delete source file subgraphs (nodes and all their edges)
195
+ // Uses filePath matching to delete only nodes belonging to the specified files
196
+ // Avoids following INJECTS/IMPORTS edges which would delete nodes from other files
193
197
  DELETE_SOURCE_FILE_SUBGRAPHS: `
194
- MATCH (sf:SourceFile)
195
- WHERE sf.filePath IN $filePaths AND sf.projectId = $projectId
196
- OPTIONAL MATCH (sf)-[*]->(child)
197
- DETACH DELETE sf, child
198
+ MATCH (n)
199
+ WHERE n.filePath IN $filePaths AND n.projectId = $projectId
200
+ DETACH DELETE n
198
201
  `,
199
202
  // Recreate cross-file edges after import (uses deterministic IDs)
200
203
  RECREATE_CROSS_FILE_EDGES: `
@@ -212,15 +215,19 @@ export const QUERIES = {
212
215
  // The previous query (WHERE startNode(r) IS NULL OR endNode(r) IS NULL) could never match anything
213
216
  // Get existing nodes (excluding files being reparsed) for edge target matching
214
217
  // Returns minimal info needed for edge detection: id, name, coreType, semanticType
218
+ // NOTE: Using property-based query instead of path traversal to avoid Cartesian explosion
219
+ // The old query `MATCH (sf:SourceFile)-[*]->(n)` caused OOM with large graphs
215
220
  GET_EXISTING_NODES_FOR_EDGE_DETECTION: `
216
- MATCH (sf:SourceFile)-[*]->(n)
217
- WHERE NOT sf.filePath IN $excludeFilePaths AND sf.projectId = $projectId
218
- RETURN n.id AS id,
221
+ MATCH (n)
222
+ WHERE n.projectId = $projectId
223
+ AND n.filePath IS NOT NULL
224
+ AND NOT n.filePath IN $excludeFilePaths
225
+ RETURN DISTINCT n.id AS id,
219
226
  n.name AS name,
220
227
  n.coreType AS coreType,
221
228
  n.semanticType AS semanticType,
222
229
  labels(n) AS labels,
223
- sf.filePath AS filePath
230
+ n.filePath AS filePath
224
231
  `,
225
232
  EXPLORE_ALL_CONNECTIONS: (maxDepth = MAX_TRAVERSAL_DEPTH, direction = 'BOTH', relationshipTypes) => {
226
233
  const safeMaxDepth = Math.min(Math.max(maxDepth, 1), MAX_TRAVERSAL_DEPTH);
@@ -568,4 +575,181 @@ export const QUERIES = {
568
575
  relationshipPath
569
576
  ORDER BY depth ASC
570
577
  `,
578
+ // ============================================
579
+ // DEAD CODE DETECTION QUERIES
580
+ // ============================================
581
+ /**
582
+ * Find exported classes/functions/interfaces with no incoming references from other files.
583
+ * These are potentially dead code - exported but never imported or used.
584
+ */
585
+ FIND_UNREFERENCED_EXPORTS: `
586
+ MATCH (n)
587
+ WHERE n.projectId = $projectId
588
+ AND n.isExported = true
589
+ AND n.coreType IN ['ClassDeclaration', 'FunctionDeclaration', 'InterfaceDeclaration']
590
+ WITH n
591
+ OPTIONAL MATCH (other)-[r]->(n)
592
+ WHERE other.projectId = $projectId
593
+ AND other.filePath <> n.filePath
594
+ AND type(r) IN ['IMPORTS', 'EXTENDS', 'IMPLEMENTS', 'TYPED_AS', 'INJECTS', 'CALLS']
595
+ WITH n, count(other) AS incomingCount
596
+ WHERE incomingCount = 0
597
+ RETURN n.id AS nodeId,
598
+ n.name AS name,
599
+ n.coreType AS coreType,
600
+ n.semanticType AS semanticType,
601
+ n.filePath AS filePath,
602
+ n.startLine AS lineNumber,
603
+ n.isExported AS isExported,
604
+ 'Exported but never imported or referenced' AS reason
605
+ ORDER BY n.filePath, n.startLine
606
+ `,
607
+ /**
608
+ * Find private methods with no incoming CALLS edges.
609
+ * Private methods that are never called are likely dead code.
610
+ */
611
+ FIND_UNCALLED_PRIVATE_METHODS: `
612
+ MATCH (n)
613
+ WHERE n.projectId = $projectId
614
+ AND n.coreType = 'MethodDeclaration'
615
+ AND n.visibility = 'private'
616
+ WITH n
617
+ OPTIONAL MATCH (caller)-[r:CALLS]->(n)
618
+ WHERE caller.projectId = $projectId
619
+ WITH n, count(caller) AS callCount
620
+ WHERE callCount = 0
621
+ RETURN n.id AS nodeId,
622
+ n.name AS name,
623
+ n.coreType AS coreType,
624
+ n.semanticType AS semanticType,
625
+ n.filePath AS filePath,
626
+ n.startLine AS lineNumber,
627
+ n.visibility AS visibility,
628
+ 'Private method never called' AS reason
629
+ ORDER BY n.filePath, n.startLine
630
+ `,
631
+ /**
632
+ * Find interfaces that are never implemented or referenced.
633
+ * Interfaces without implementations may be dead code.
634
+ */
635
+ FIND_UNREFERENCED_INTERFACES: `
636
+ MATCH (n)
637
+ WHERE n.projectId = $projectId
638
+ AND n.coreType = 'InterfaceDeclaration'
639
+ AND n.isExported = true
640
+ WITH n
641
+ OPTIONAL MATCH (other)-[r]->(n)
642
+ WHERE other.projectId = $projectId
643
+ AND type(r) IN ['IMPLEMENTS', 'EXTENDS', 'TYPED_AS', 'IMPORTS']
644
+ WITH n, count(other) AS refCount
645
+ WHERE refCount = 0
646
+ RETURN n.id AS nodeId,
647
+ n.name AS name,
648
+ n.coreType AS coreType,
649
+ n.semanticType AS semanticType,
650
+ n.filePath AS filePath,
651
+ n.startLine AS lineNumber,
652
+ 'Interface never implemented or referenced' AS reason
653
+ ORDER BY n.filePath, n.startLine
654
+ `,
655
+ /**
656
+ * Get all distinct semantic types for a project.
657
+ * Used to dynamically determine framework entry points for dead code detection.
658
+ */
659
+ GET_PROJECT_SEMANTIC_TYPES: `
660
+ MATCH (n)
661
+ WHERE n.projectId = $projectId
662
+ AND n.semanticType IS NOT NULL
663
+ AND n.coreType IN ['ClassDeclaration', 'FunctionDeclaration', 'InterfaceDeclaration', 'MethodDeclaration']
664
+ RETURN DISTINCT n.semanticType AS semanticType
665
+ `,
666
+ /**
667
+ * Get framework entry points that should be excluded from dead code analysis.
668
+ * These are nodes that may appear unused but are actually framework-managed.
669
+ * Filters by coreType to exclude ImportDeclarations and only return actual classes/functions/interfaces.
670
+ * Accepts $semanticTypes parameter for dynamic, per-project framework detection.
671
+ */
672
+ GET_FRAMEWORK_ENTRY_POINTS: `
673
+ MATCH (n)
674
+ WHERE n.projectId = $projectId
675
+ AND n.coreType IN ['ClassDeclaration', 'FunctionDeclaration', 'InterfaceDeclaration']
676
+ AND (
677
+ n.semanticType IN $semanticTypes
678
+ OR n.filePath ENDS WITH 'main.ts'
679
+ OR n.filePath ENDS WITH '.module.ts'
680
+ OR n.filePath ENDS WITH '.controller.ts'
681
+ OR n.filePath ENDS WITH 'index.ts'
682
+ )
683
+ RETURN n.id AS nodeId,
684
+ n.name AS name,
685
+ n.coreType AS coreType,
686
+ n.semanticType AS semanticType,
687
+ n.filePath AS filePath
688
+ ORDER BY n.semanticType, n.name
689
+ `,
690
+ // ============================================================================
691
+ // DUPLICATE CODE DETECTION QUERIES
692
+ // ============================================================================
693
+ /**
694
+ * Find structural duplicates - nodes with identical normalizedHash.
695
+ * Returns all nodes that share the same normalized code hash.
696
+ * Limited to prevent memory issues on large codebases.
697
+ */
698
+ FIND_STRUCTURAL_DUPLICATES: `
699
+ MATCH (n)
700
+ WHERE n.projectId = $projectId
701
+ AND n.coreType IN $coreTypes
702
+ AND n.normalizedHash IS NOT NULL
703
+ AND n.normalizedHash <> ''
704
+ WITH n.normalizedHash AS hash, collect(n) AS nodes
705
+ WHERE size(nodes) >= 2
706
+ UNWIND nodes AS n
707
+ RETURN n.id AS nodeId,
708
+ n.name AS name,
709
+ n.coreType AS coreType,
710
+ n.semanticType AS semanticType,
711
+ n.filePath AS filePath,
712
+ n.startLine AS lineNumber,
713
+ n.normalizedHash AS normalizedHash,
714
+ n.sourceCode AS sourceCode
715
+ ORDER BY n.normalizedHash, n.filePath, n.startLine
716
+ LIMIT toInteger($limit)
717
+ `,
718
+ /**
719
+ * Find semantic duplicates - nodes with similar embeddings.
720
+ * Uses vector similarity search to find semantically similar code.
721
+ * Note: Requires the vector index 'embedded_nodes_idx' to exist.
722
+ */
723
+ FIND_SEMANTIC_DUPLICATES: `
724
+ MATCH (n1)
725
+ WHERE n1.projectId = $projectId
726
+ AND n1.coreType IN $coreTypes
727
+ AND n1.embedding IS NOT NULL
728
+ WITH n1
729
+ CALL db.index.vector.queryNodes('embedded_nodes_idx', toInteger($vectorNeighbors), n1.embedding)
730
+ YIELD node AS n2, score AS similarity
731
+ WHERE n2.projectId = $projectId
732
+ AND n2.coreType IN $coreTypes
733
+ AND n1.id < n2.id
734
+ AND similarity >= $minSimilarity
735
+ AND n1.filePath <> n2.filePath
736
+ AND (n1.normalizedHash IS NULL OR n2.normalizedHash IS NULL OR n1.normalizedHash <> n2.normalizedHash)
737
+ RETURN n1.id AS nodeId1,
738
+ n1.name AS name1,
739
+ n1.coreType AS coreType1,
740
+ n1.semanticType AS semanticType1,
741
+ n1.filePath AS filePath1,
742
+ n1.startLine AS lineNumber1,
743
+ n1.sourceCode AS sourceCode1,
744
+ n2.id AS nodeId2,
745
+ n2.name AS name2,
746
+ n2.coreType AS coreType2,
747
+ n2.semanticType AS semanticType2,
748
+ n2.filePath AS filePath2,
749
+ n2.startLine AS lineNumber2,
750
+ n2.sourceCode AS sourceCode2,
751
+ similarity
752
+ ORDER BY similarity DESC
753
+ LIMIT toInteger($limit)
754
+ `,
571
755
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "code-graph-context",
3
- "version": "2.0.1",
3
+ "version": "2.3.0",
4
4
  "description": "MCP server that builds code graphs to provide rich context to LLMs",
5
5
  "type": "module",
6
6
  "homepage": "https://github.com/drewdrewH/code-graph-context#readme",