code-graph-context 2.0.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/README.md +156 -2
  2. package/dist/constants.js +167 -0
  3. package/dist/core/config/fairsquare-framework-schema.js +9 -7
  4. package/dist/core/config/schema.js +41 -2
  5. package/dist/core/embeddings/natural-language-to-cypher.service.js +166 -110
  6. package/dist/core/parsers/typescript-parser.js +1039 -742
  7. package/dist/core/parsers/workspace-parser.js +175 -193
  8. package/dist/core/utils/code-normalizer.js +299 -0
  9. package/dist/core/utils/file-change-detection.js +17 -2
  10. package/dist/core/utils/file-utils.js +40 -5
  11. package/dist/core/utils/graph-factory.js +161 -0
  12. package/dist/core/utils/shared-utils.js +79 -0
  13. package/dist/core/workspace/workspace-detector.js +59 -5
  14. package/dist/mcp/constants.js +141 -8
  15. package/dist/mcp/handlers/graph-generator.handler.js +1 -0
  16. package/dist/mcp/handlers/incremental-parse.handler.js +3 -6
  17. package/dist/mcp/handlers/parallel-import.handler.js +136 -0
  18. package/dist/mcp/handlers/streaming-import.handler.js +14 -59
  19. package/dist/mcp/mcp.server.js +1 -1
  20. package/dist/mcp/services/job-manager.js +5 -8
  21. package/dist/mcp/services/watch-manager.js +7 -18
  22. package/dist/mcp/tools/detect-dead-code.tool.js +413 -0
  23. package/dist/mcp/tools/detect-duplicate-code.tool.js +450 -0
  24. package/dist/mcp/tools/impact-analysis.tool.js +20 -4
  25. package/dist/mcp/tools/index.js +4 -0
  26. package/dist/mcp/tools/parse-typescript-project.tool.js +15 -14
  27. package/dist/mcp/workers/chunk-worker-pool.js +196 -0
  28. package/dist/mcp/workers/chunk-worker.types.js +4 -0
  29. package/dist/mcp/workers/chunk.worker.js +89 -0
  30. package/dist/mcp/workers/parse-coordinator.js +183 -0
  31. package/dist/mcp/workers/worker.pool.js +54 -0
  32. package/dist/storage/neo4j/neo4j.service.js +190 -10
  33. package/package.json +1 -1
@@ -114,6 +114,8 @@ export const QUERIES = {
114
114
  // Create composite indexes on projectId + id for efficient lookups
115
115
  CREATE_PROJECT_ID_INDEX_EMBEDDED: 'CREATE INDEX project_id_embedded_idx IF NOT EXISTS FOR (n:Embedded) ON (n.projectId, n.id)',
116
116
  CREATE_PROJECT_ID_INDEX_SOURCEFILE: 'CREATE INDEX project_id_sourcefile_idx IF NOT EXISTS FOR (n:SourceFile) ON (n.projectId, n.id)',
117
+ // Create index on normalizedHash for efficient structural duplicate detection
118
+ CREATE_NORMALIZED_HASH_INDEX: 'CREATE INDEX normalized_hash_idx IF NOT EXISTS FOR (n:Embedded) ON (n.normalizedHash)',
117
119
  CREATE_NODE: `
118
120
  UNWIND $nodes AS nodeData
119
121
  CALL apoc.create.node(nodeData.labels, nodeData.properties) YIELD node
@@ -175,13 +177,13 @@ export const QUERIES = {
175
177
  `,
176
178
  // Get cross-file edges before deletion (edges where one endpoint is outside the subgraph)
177
179
  // These will be recreated after import using deterministic IDs
180
+ // Uses filePath matching instead of relationship traversal to avoid following INJECTS/IMPORTS
178
181
  GET_CROSS_FILE_EDGES: `
179
- MATCH (sf:SourceFile)
180
- WHERE sf.filePath IN $filePaths AND sf.projectId = $projectId
181
- OPTIONAL MATCH (sf)-[*]->(child)
182
- WITH collect(DISTINCT sf) + collect(DISTINCT child) AS nodesToDelete
183
- UNWIND nodesToDelete AS n
184
- MATCH (n)-[r]-(other)
182
+ MATCH (n)
183
+ WHERE n.filePath IN $filePaths AND n.projectId = $projectId
184
+ WITH collect(DISTINCT n) AS nodesToDelete
185
+ UNWIND nodesToDelete AS node
186
+ MATCH (node)-[r]-(other)
185
187
  WHERE NOT other IN nodesToDelete AND other.projectId = $projectId
186
188
  RETURN DISTINCT
187
189
  startNode(r).id AS startNodeId,
@@ -190,11 +192,12 @@ export const QUERIES = {
190
192
  properties(r) AS edgeProperties
191
193
  `,
192
194
  // Delete source file subgraphs (nodes and all their edges)
195
+ // Uses filePath matching to delete only nodes belonging to the specified files
196
+ // Avoids following INJECTS/IMPORTS edges which would delete nodes from other files
193
197
  DELETE_SOURCE_FILE_SUBGRAPHS: `
194
- MATCH (sf:SourceFile)
195
- WHERE sf.filePath IN $filePaths AND sf.projectId = $projectId
196
- OPTIONAL MATCH (sf)-[*]->(child)
197
- DETACH DELETE sf, child
198
+ MATCH (n)
199
+ WHERE n.filePath IN $filePaths AND n.projectId = $projectId
200
+ DETACH DELETE n
198
201
  `,
199
202
  // Recreate cross-file edges after import (uses deterministic IDs)
200
203
  RECREATE_CROSS_FILE_EDGES: `
@@ -568,4 +571,181 @@ export const QUERIES = {
568
571
  relationshipPath
569
572
  ORDER BY depth ASC
570
573
  `,
574
+ // ============================================
575
+ // DEAD CODE DETECTION QUERIES
576
+ // ============================================
577
+ /**
578
+ * Find exported classes/functions/interfaces with no incoming references from other files.
579
+ * These are potentially dead code - exported but never imported or used.
580
+ */
581
+ FIND_UNREFERENCED_EXPORTS: `
582
+ MATCH (n)
583
+ WHERE n.projectId = $projectId
584
+ AND n.isExported = true
585
+ AND n.coreType IN ['ClassDeclaration', 'FunctionDeclaration', 'InterfaceDeclaration']
586
+ WITH n
587
+ OPTIONAL MATCH (other)-[r]->(n)
588
+ WHERE other.projectId = $projectId
589
+ AND other.filePath <> n.filePath
590
+ AND type(r) IN ['IMPORTS', 'EXTENDS', 'IMPLEMENTS', 'TYPED_AS', 'INJECTS', 'CALLS']
591
+ WITH n, count(other) AS incomingCount
592
+ WHERE incomingCount = 0
593
+ RETURN n.id AS nodeId,
594
+ n.name AS name,
595
+ n.coreType AS coreType,
596
+ n.semanticType AS semanticType,
597
+ n.filePath AS filePath,
598
+ n.startLine AS lineNumber,
599
+ n.isExported AS isExported,
600
+ 'Exported but never imported or referenced' AS reason
601
+ ORDER BY n.filePath, n.startLine
602
+ `,
603
+ /**
604
+ * Find private methods with no incoming CALLS edges.
605
+ * Private methods that are never called are likely dead code.
606
+ */
607
+ FIND_UNCALLED_PRIVATE_METHODS: `
608
+ MATCH (n)
609
+ WHERE n.projectId = $projectId
610
+ AND n.coreType = 'MethodDeclaration'
611
+ AND n.visibility = 'private'
612
+ WITH n
613
+ OPTIONAL MATCH (caller)-[r:CALLS]->(n)
614
+ WHERE caller.projectId = $projectId
615
+ WITH n, count(caller) AS callCount
616
+ WHERE callCount = 0
617
+ RETURN n.id AS nodeId,
618
+ n.name AS name,
619
+ n.coreType AS coreType,
620
+ n.semanticType AS semanticType,
621
+ n.filePath AS filePath,
622
+ n.startLine AS lineNumber,
623
+ n.visibility AS visibility,
624
+ 'Private method never called' AS reason
625
+ ORDER BY n.filePath, n.startLine
626
+ `,
627
+ /**
628
+ * Find interfaces that are never implemented or referenced.
629
+ * Interfaces without implementations may be dead code.
630
+ */
631
+ FIND_UNREFERENCED_INTERFACES: `
632
+ MATCH (n)
633
+ WHERE n.projectId = $projectId
634
+ AND n.coreType = 'InterfaceDeclaration'
635
+ AND n.isExported = true
636
+ WITH n
637
+ OPTIONAL MATCH (other)-[r]->(n)
638
+ WHERE other.projectId = $projectId
639
+ AND type(r) IN ['IMPLEMENTS', 'EXTENDS', 'TYPED_AS', 'IMPORTS']
640
+ WITH n, count(other) AS refCount
641
+ WHERE refCount = 0
642
+ RETURN n.id AS nodeId,
643
+ n.name AS name,
644
+ n.coreType AS coreType,
645
+ n.semanticType AS semanticType,
646
+ n.filePath AS filePath,
647
+ n.startLine AS lineNumber,
648
+ 'Interface never implemented or referenced' AS reason
649
+ ORDER BY n.filePath, n.startLine
650
+ `,
651
+ /**
652
+ * Get all distinct semantic types for a project.
653
+ * Used to dynamically determine framework entry points for dead code detection.
654
+ */
655
+ GET_PROJECT_SEMANTIC_TYPES: `
656
+ MATCH (n)
657
+ WHERE n.projectId = $projectId
658
+ AND n.semanticType IS NOT NULL
659
+ AND n.coreType IN ['ClassDeclaration', 'FunctionDeclaration', 'InterfaceDeclaration', 'MethodDeclaration']
660
+ RETURN DISTINCT n.semanticType AS semanticType
661
+ `,
662
+ /**
663
+ * Get framework entry points that should be excluded from dead code analysis.
664
+ * These are nodes that may appear unused but are actually framework-managed.
665
+ * Filters by coreType to exclude ImportDeclarations and only return actual classes/functions/interfaces.
666
+ * Accepts $semanticTypes parameter for dynamic, per-project framework detection.
667
+ */
668
+ GET_FRAMEWORK_ENTRY_POINTS: `
669
+ MATCH (n)
670
+ WHERE n.projectId = $projectId
671
+ AND n.coreType IN ['ClassDeclaration', 'FunctionDeclaration', 'InterfaceDeclaration']
672
+ AND (
673
+ n.semanticType IN $semanticTypes
674
+ OR n.filePath ENDS WITH 'main.ts'
675
+ OR n.filePath ENDS WITH '.module.ts'
676
+ OR n.filePath ENDS WITH '.controller.ts'
677
+ OR n.filePath ENDS WITH 'index.ts'
678
+ )
679
+ RETURN n.id AS nodeId,
680
+ n.name AS name,
681
+ n.coreType AS coreType,
682
+ n.semanticType AS semanticType,
683
+ n.filePath AS filePath
684
+ ORDER BY n.semanticType, n.name
685
+ `,
686
+ // ============================================================================
687
+ // DUPLICATE CODE DETECTION QUERIES
688
+ // ============================================================================
689
+ /**
690
+ * Find structural duplicates - nodes with identical normalizedHash.
691
+ * Returns all nodes that share the same normalized code hash.
692
+ * Limited to prevent memory issues on large codebases.
693
+ */
694
+ FIND_STRUCTURAL_DUPLICATES: `
695
+ MATCH (n)
696
+ WHERE n.projectId = $projectId
697
+ AND n.coreType IN $coreTypes
698
+ AND n.normalizedHash IS NOT NULL
699
+ AND n.normalizedHash <> ''
700
+ WITH n.normalizedHash AS hash, collect(n) AS nodes
701
+ WHERE size(nodes) >= 2
702
+ UNWIND nodes AS n
703
+ RETURN n.id AS nodeId,
704
+ n.name AS name,
705
+ n.coreType AS coreType,
706
+ n.semanticType AS semanticType,
707
+ n.filePath AS filePath,
708
+ n.startLine AS lineNumber,
709
+ n.normalizedHash AS normalizedHash,
710
+ n.sourceCode AS sourceCode
711
+ ORDER BY n.normalizedHash, n.filePath, n.startLine
712
+ LIMIT toInteger($limit)
713
+ `,
714
+ /**
715
+ * Find semantic duplicates - nodes with similar embeddings.
716
+ * Uses vector similarity search to find semantically similar code.
717
+ * Note: Requires the vector index 'embedded_nodes_idx' to exist.
718
+ */
719
+ FIND_SEMANTIC_DUPLICATES: `
720
+ MATCH (n1)
721
+ WHERE n1.projectId = $projectId
722
+ AND n1.coreType IN $coreTypes
723
+ AND n1.embedding IS NOT NULL
724
+ WITH n1
725
+ CALL db.index.vector.queryNodes('embedded_nodes_idx', toInteger($vectorNeighbors), n1.embedding)
726
+ YIELD node AS n2, score AS similarity
727
+ WHERE n2.projectId = $projectId
728
+ AND n2.coreType IN $coreTypes
729
+ AND n1.id < n2.id
730
+ AND similarity >= $minSimilarity
731
+ AND n1.filePath <> n2.filePath
732
+ AND (n1.normalizedHash IS NULL OR n2.normalizedHash IS NULL OR n1.normalizedHash <> n2.normalizedHash)
733
+ RETURN n1.id AS nodeId1,
734
+ n1.name AS name1,
735
+ n1.coreType AS coreType1,
736
+ n1.semanticType AS semanticType1,
737
+ n1.filePath AS filePath1,
738
+ n1.startLine AS lineNumber1,
739
+ n1.sourceCode AS sourceCode1,
740
+ n2.id AS nodeId2,
741
+ n2.name AS name2,
742
+ n2.coreType AS coreType2,
743
+ n2.semanticType AS semanticType2,
744
+ n2.filePath AS filePath2,
745
+ n2.startLine AS lineNumber2,
746
+ n2.sourceCode AS sourceCode2,
747
+ similarity
748
+ ORDER BY similarity DESC
749
+ LIMIT toInteger($limit)
750
+ `,
571
751
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "code-graph-context",
3
- "version": "2.0.1",
3
+ "version": "2.2.0",
4
4
  "description": "MCP server that builds code graphs to provide rich context to LLMs",
5
5
  "type": "module",
6
6
  "homepage": "https://github.com/drewdrewH/code-graph-context#readme",