code-graph-context 2.0.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +221 -2
- package/dist/constants.js +167 -0
- package/dist/core/config/fairsquare-framework-schema.js +9 -7
- package/dist/core/config/schema.js +41 -2
- package/dist/core/embeddings/natural-language-to-cypher.service.js +166 -110
- package/dist/core/parsers/typescript-parser.js +1039 -742
- package/dist/core/parsers/workspace-parser.js +175 -193
- package/dist/core/utils/code-normalizer.js +299 -0
- package/dist/core/utils/file-change-detection.js +17 -2
- package/dist/core/utils/file-utils.js +40 -5
- package/dist/core/utils/graph-factory.js +161 -0
- package/dist/core/utils/shared-utils.js +79 -0
- package/dist/core/workspace/workspace-detector.js +59 -5
- package/dist/mcp/constants.js +261 -8
- package/dist/mcp/handlers/graph-generator.handler.js +1 -0
- package/dist/mcp/handlers/incremental-parse.handler.js +22 -6
- package/dist/mcp/handlers/parallel-import.handler.js +136 -0
- package/dist/mcp/handlers/streaming-import.handler.js +14 -59
- package/dist/mcp/mcp.server.js +77 -2
- package/dist/mcp/services/job-manager.js +5 -8
- package/dist/mcp/services/watch-manager.js +64 -25
- package/dist/mcp/tools/detect-dead-code.tool.js +413 -0
- package/dist/mcp/tools/detect-duplicate-code.tool.js +450 -0
- package/dist/mcp/tools/hello.tool.js +16 -2
- package/dist/mcp/tools/impact-analysis.tool.js +20 -4
- package/dist/mcp/tools/index.js +37 -0
- package/dist/mcp/tools/parse-typescript-project.tool.js +15 -14
- package/dist/mcp/tools/swarm-cleanup.tool.js +157 -0
- package/dist/mcp/tools/swarm-constants.js +35 -0
- package/dist/mcp/tools/swarm-pheromone.tool.js +196 -0
- package/dist/mcp/tools/swarm-sense.tool.js +212 -0
- package/dist/mcp/workers/chunk-worker-pool.js +196 -0
- package/dist/mcp/workers/chunk-worker.types.js +4 -0
- package/dist/mcp/workers/chunk.worker.js +89 -0
- package/dist/mcp/workers/parse-coordinator.js +183 -0
- package/dist/mcp/workers/worker.pool.js +54 -0
- package/dist/storage/neo4j/neo4j.service.js +198 -14
- package/package.json +1 -1
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { Worker } from 'worker_threads';
|
|
2
|
+
export class ParallelPool {
|
|
3
|
+
workerPath;
|
|
4
|
+
numWorkers;
|
|
5
|
+
constructor(workerPath, numWorkers = 2) {
|
|
6
|
+
this.workerPath = workerPath;
|
|
7
|
+
this.numWorkers = numWorkers;
|
|
8
|
+
}
|
|
9
|
+
async run(items) {
|
|
10
|
+
const start = Date.now();
|
|
11
|
+
const indexBuffer = new SharedArrayBuffer(4);
|
|
12
|
+
const sharedIndex = new Int32Array(indexBuffer);
|
|
13
|
+
const workerPromises = Array.from({ length: this.numWorkers }, (_, id) => this.spawnWorker(id, items, indexBuffer));
|
|
14
|
+
const workerResults = await Promise.all(workerPromises);
|
|
15
|
+
const results = [];
|
|
16
|
+
const workerTaskCounts = [];
|
|
17
|
+
for (const { results: map, count } of workerResults) {
|
|
18
|
+
workerTaskCounts.push(count);
|
|
19
|
+
for (const [i, result] of map) {
|
|
20
|
+
results[i] = result;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return {
|
|
24
|
+
results,
|
|
25
|
+
stats: {
|
|
26
|
+
workerTaskCounts,
|
|
27
|
+
totalTasks: items.length,
|
|
28
|
+
totalTimeMs: Date.now() - start,
|
|
29
|
+
},
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
spawnWorker(workerId, items, indexBuffer) {
|
|
33
|
+
return new Promise((resolve, reject) => {
|
|
34
|
+
const worker = new Worker(this.workerPath, {
|
|
35
|
+
workerData: {
|
|
36
|
+
items,
|
|
37
|
+
indexBuffer,
|
|
38
|
+
total: items.length,
|
|
39
|
+
workerId,
|
|
40
|
+
},
|
|
41
|
+
});
|
|
42
|
+
worker.on('message', (result) => {
|
|
43
|
+
worker.terminate();
|
|
44
|
+
resolve(result);
|
|
45
|
+
});
|
|
46
|
+
worker.on('error', reject);
|
|
47
|
+
worker.on('exit', (code) => {
|
|
48
|
+
if (code !== 0) {
|
|
49
|
+
reject(new Error(`Worker ${workerId} exited with code ${code}`));
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
}
|
|
@@ -114,6 +114,8 @@ export const QUERIES = {
|
|
|
114
114
|
// Create composite indexes on projectId + id for efficient lookups
|
|
115
115
|
CREATE_PROJECT_ID_INDEX_EMBEDDED: 'CREATE INDEX project_id_embedded_idx IF NOT EXISTS FOR (n:Embedded) ON (n.projectId, n.id)',
|
|
116
116
|
CREATE_PROJECT_ID_INDEX_SOURCEFILE: 'CREATE INDEX project_id_sourcefile_idx IF NOT EXISTS FOR (n:SourceFile) ON (n.projectId, n.id)',
|
|
117
|
+
// Create index on normalizedHash for efficient structural duplicate detection
|
|
118
|
+
CREATE_NORMALIZED_HASH_INDEX: 'CREATE INDEX normalized_hash_idx IF NOT EXISTS FOR (n:Embedded) ON (n.normalizedHash)',
|
|
117
119
|
CREATE_NODE: `
|
|
118
120
|
UNWIND $nodes AS nodeData
|
|
119
121
|
CALL apoc.create.node(nodeData.labels, nodeData.properties) YIELD node
|
|
@@ -175,13 +177,13 @@ export const QUERIES = {
|
|
|
175
177
|
`,
|
|
176
178
|
// Get cross-file edges before deletion (edges where one endpoint is outside the subgraph)
|
|
177
179
|
// These will be recreated after import using deterministic IDs
|
|
180
|
+
// Uses filePath matching instead of relationship traversal to avoid following INJECTS/IMPORTS
|
|
178
181
|
GET_CROSS_FILE_EDGES: `
|
|
179
|
-
MATCH (
|
|
180
|
-
WHERE
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
MATCH (n)-[r]-(other)
|
|
182
|
+
MATCH (n)
|
|
183
|
+
WHERE n.filePath IN $filePaths AND n.projectId = $projectId
|
|
184
|
+
WITH collect(DISTINCT n) AS nodesToDelete
|
|
185
|
+
UNWIND nodesToDelete AS node
|
|
186
|
+
MATCH (node)-[r]-(other)
|
|
185
187
|
WHERE NOT other IN nodesToDelete AND other.projectId = $projectId
|
|
186
188
|
RETURN DISTINCT
|
|
187
189
|
startNode(r).id AS startNodeId,
|
|
@@ -190,11 +192,12 @@ export const QUERIES = {
|
|
|
190
192
|
properties(r) AS edgeProperties
|
|
191
193
|
`,
|
|
192
194
|
// Delete source file subgraphs (nodes and all their edges)
|
|
195
|
+
// Uses filePath matching to delete only nodes belonging to the specified files
|
|
196
|
+
// Avoids following INJECTS/IMPORTS edges which would delete nodes from other files
|
|
193
197
|
DELETE_SOURCE_FILE_SUBGRAPHS: `
|
|
194
|
-
MATCH (
|
|
195
|
-
WHERE
|
|
196
|
-
|
|
197
|
-
DETACH DELETE sf, child
|
|
198
|
+
MATCH (n)
|
|
199
|
+
WHERE n.filePath IN $filePaths AND n.projectId = $projectId
|
|
200
|
+
DETACH DELETE n
|
|
198
201
|
`,
|
|
199
202
|
// Recreate cross-file edges after import (uses deterministic IDs)
|
|
200
203
|
RECREATE_CROSS_FILE_EDGES: `
|
|
@@ -212,15 +215,19 @@ export const QUERIES = {
|
|
|
212
215
|
// The previous query (WHERE startNode(r) IS NULL OR endNode(r) IS NULL) could never match anything
|
|
213
216
|
// Get existing nodes (excluding files being reparsed) for edge target matching
|
|
214
217
|
// Returns minimal info needed for edge detection: id, name, coreType, semanticType
|
|
218
|
+
// NOTE: Using property-based query instead of path traversal to avoid Cartesian explosion
|
|
219
|
+
// The old query `MATCH (sf:SourceFile)-[*]->(n)` caused OOM with large graphs
|
|
215
220
|
GET_EXISTING_NODES_FOR_EDGE_DETECTION: `
|
|
216
|
-
MATCH (
|
|
217
|
-
WHERE
|
|
218
|
-
|
|
221
|
+
MATCH (n)
|
|
222
|
+
WHERE n.projectId = $projectId
|
|
223
|
+
AND n.filePath IS NOT NULL
|
|
224
|
+
AND NOT n.filePath IN $excludeFilePaths
|
|
225
|
+
RETURN DISTINCT n.id AS id,
|
|
219
226
|
n.name AS name,
|
|
220
227
|
n.coreType AS coreType,
|
|
221
228
|
n.semanticType AS semanticType,
|
|
222
229
|
labels(n) AS labels,
|
|
223
|
-
|
|
230
|
+
n.filePath AS filePath
|
|
224
231
|
`,
|
|
225
232
|
EXPLORE_ALL_CONNECTIONS: (maxDepth = MAX_TRAVERSAL_DEPTH, direction = 'BOTH', relationshipTypes) => {
|
|
226
233
|
const safeMaxDepth = Math.min(Math.max(maxDepth, 1), MAX_TRAVERSAL_DEPTH);
|
|
@@ -568,4 +575,181 @@ export const QUERIES = {
|
|
|
568
575
|
relationshipPath
|
|
569
576
|
ORDER BY depth ASC
|
|
570
577
|
`,
|
|
578
|
+
// ============================================
|
|
579
|
+
// DEAD CODE DETECTION QUERIES
|
|
580
|
+
// ============================================
|
|
581
|
+
/**
|
|
582
|
+
* Find exported classes/functions/interfaces with no incoming references from other files.
|
|
583
|
+
* These are potentially dead code - exported but never imported or used.
|
|
584
|
+
*/
|
|
585
|
+
FIND_UNREFERENCED_EXPORTS: `
|
|
586
|
+
MATCH (n)
|
|
587
|
+
WHERE n.projectId = $projectId
|
|
588
|
+
AND n.isExported = true
|
|
589
|
+
AND n.coreType IN ['ClassDeclaration', 'FunctionDeclaration', 'InterfaceDeclaration']
|
|
590
|
+
WITH n
|
|
591
|
+
OPTIONAL MATCH (other)-[r]->(n)
|
|
592
|
+
WHERE other.projectId = $projectId
|
|
593
|
+
AND other.filePath <> n.filePath
|
|
594
|
+
AND type(r) IN ['IMPORTS', 'EXTENDS', 'IMPLEMENTS', 'TYPED_AS', 'INJECTS', 'CALLS']
|
|
595
|
+
WITH n, count(other) AS incomingCount
|
|
596
|
+
WHERE incomingCount = 0
|
|
597
|
+
RETURN n.id AS nodeId,
|
|
598
|
+
n.name AS name,
|
|
599
|
+
n.coreType AS coreType,
|
|
600
|
+
n.semanticType AS semanticType,
|
|
601
|
+
n.filePath AS filePath,
|
|
602
|
+
n.startLine AS lineNumber,
|
|
603
|
+
n.isExported AS isExported,
|
|
604
|
+
'Exported but never imported or referenced' AS reason
|
|
605
|
+
ORDER BY n.filePath, n.startLine
|
|
606
|
+
`,
|
|
607
|
+
/**
|
|
608
|
+
* Find private methods with no incoming CALLS edges.
|
|
609
|
+
* Private methods that are never called are likely dead code.
|
|
610
|
+
*/
|
|
611
|
+
FIND_UNCALLED_PRIVATE_METHODS: `
|
|
612
|
+
MATCH (n)
|
|
613
|
+
WHERE n.projectId = $projectId
|
|
614
|
+
AND n.coreType = 'MethodDeclaration'
|
|
615
|
+
AND n.visibility = 'private'
|
|
616
|
+
WITH n
|
|
617
|
+
OPTIONAL MATCH (caller)-[r:CALLS]->(n)
|
|
618
|
+
WHERE caller.projectId = $projectId
|
|
619
|
+
WITH n, count(caller) AS callCount
|
|
620
|
+
WHERE callCount = 0
|
|
621
|
+
RETURN n.id AS nodeId,
|
|
622
|
+
n.name AS name,
|
|
623
|
+
n.coreType AS coreType,
|
|
624
|
+
n.semanticType AS semanticType,
|
|
625
|
+
n.filePath AS filePath,
|
|
626
|
+
n.startLine AS lineNumber,
|
|
627
|
+
n.visibility AS visibility,
|
|
628
|
+
'Private method never called' AS reason
|
|
629
|
+
ORDER BY n.filePath, n.startLine
|
|
630
|
+
`,
|
|
631
|
+
/**
|
|
632
|
+
* Find interfaces that are never implemented or referenced.
|
|
633
|
+
* Interfaces without implementations may be dead code.
|
|
634
|
+
*/
|
|
635
|
+
FIND_UNREFERENCED_INTERFACES: `
|
|
636
|
+
MATCH (n)
|
|
637
|
+
WHERE n.projectId = $projectId
|
|
638
|
+
AND n.coreType = 'InterfaceDeclaration'
|
|
639
|
+
AND n.isExported = true
|
|
640
|
+
WITH n
|
|
641
|
+
OPTIONAL MATCH (other)-[r]->(n)
|
|
642
|
+
WHERE other.projectId = $projectId
|
|
643
|
+
AND type(r) IN ['IMPLEMENTS', 'EXTENDS', 'TYPED_AS', 'IMPORTS']
|
|
644
|
+
WITH n, count(other) AS refCount
|
|
645
|
+
WHERE refCount = 0
|
|
646
|
+
RETURN n.id AS nodeId,
|
|
647
|
+
n.name AS name,
|
|
648
|
+
n.coreType AS coreType,
|
|
649
|
+
n.semanticType AS semanticType,
|
|
650
|
+
n.filePath AS filePath,
|
|
651
|
+
n.startLine AS lineNumber,
|
|
652
|
+
'Interface never implemented or referenced' AS reason
|
|
653
|
+
ORDER BY n.filePath, n.startLine
|
|
654
|
+
`,
|
|
655
|
+
/**
|
|
656
|
+
* Get all distinct semantic types for a project.
|
|
657
|
+
* Used to dynamically determine framework entry points for dead code detection.
|
|
658
|
+
*/
|
|
659
|
+
GET_PROJECT_SEMANTIC_TYPES: `
|
|
660
|
+
MATCH (n)
|
|
661
|
+
WHERE n.projectId = $projectId
|
|
662
|
+
AND n.semanticType IS NOT NULL
|
|
663
|
+
AND n.coreType IN ['ClassDeclaration', 'FunctionDeclaration', 'InterfaceDeclaration', 'MethodDeclaration']
|
|
664
|
+
RETURN DISTINCT n.semanticType AS semanticType
|
|
665
|
+
`,
|
|
666
|
+
/**
|
|
667
|
+
* Get framework entry points that should be excluded from dead code analysis.
|
|
668
|
+
* These are nodes that may appear unused but are actually framework-managed.
|
|
669
|
+
* Filters by coreType to exclude ImportDeclarations and only return actual classes/functions/interfaces.
|
|
670
|
+
* Accepts $semanticTypes parameter for dynamic, per-project framework detection.
|
|
671
|
+
*/
|
|
672
|
+
GET_FRAMEWORK_ENTRY_POINTS: `
|
|
673
|
+
MATCH (n)
|
|
674
|
+
WHERE n.projectId = $projectId
|
|
675
|
+
AND n.coreType IN ['ClassDeclaration', 'FunctionDeclaration', 'InterfaceDeclaration']
|
|
676
|
+
AND (
|
|
677
|
+
n.semanticType IN $semanticTypes
|
|
678
|
+
OR n.filePath ENDS WITH 'main.ts'
|
|
679
|
+
OR n.filePath ENDS WITH '.module.ts'
|
|
680
|
+
OR n.filePath ENDS WITH '.controller.ts'
|
|
681
|
+
OR n.filePath ENDS WITH 'index.ts'
|
|
682
|
+
)
|
|
683
|
+
RETURN n.id AS nodeId,
|
|
684
|
+
n.name AS name,
|
|
685
|
+
n.coreType AS coreType,
|
|
686
|
+
n.semanticType AS semanticType,
|
|
687
|
+
n.filePath AS filePath
|
|
688
|
+
ORDER BY n.semanticType, n.name
|
|
689
|
+
`,
|
|
690
|
+
// ============================================================================
|
|
691
|
+
// DUPLICATE CODE DETECTION QUERIES
|
|
692
|
+
// ============================================================================
|
|
693
|
+
/**
|
|
694
|
+
* Find structural duplicates - nodes with identical normalizedHash.
|
|
695
|
+
* Returns all nodes that share the same normalized code hash.
|
|
696
|
+
* Limited to prevent memory issues on large codebases.
|
|
697
|
+
*/
|
|
698
|
+
FIND_STRUCTURAL_DUPLICATES: `
|
|
699
|
+
MATCH (n)
|
|
700
|
+
WHERE n.projectId = $projectId
|
|
701
|
+
AND n.coreType IN $coreTypes
|
|
702
|
+
AND n.normalizedHash IS NOT NULL
|
|
703
|
+
AND n.normalizedHash <> ''
|
|
704
|
+
WITH n.normalizedHash AS hash, collect(n) AS nodes
|
|
705
|
+
WHERE size(nodes) >= 2
|
|
706
|
+
UNWIND nodes AS n
|
|
707
|
+
RETURN n.id AS nodeId,
|
|
708
|
+
n.name AS name,
|
|
709
|
+
n.coreType AS coreType,
|
|
710
|
+
n.semanticType AS semanticType,
|
|
711
|
+
n.filePath AS filePath,
|
|
712
|
+
n.startLine AS lineNumber,
|
|
713
|
+
n.normalizedHash AS normalizedHash,
|
|
714
|
+
n.sourceCode AS sourceCode
|
|
715
|
+
ORDER BY n.normalizedHash, n.filePath, n.startLine
|
|
716
|
+
LIMIT toInteger($limit)
|
|
717
|
+
`,
|
|
718
|
+
/**
|
|
719
|
+
* Find semantic duplicates - nodes with similar embeddings.
|
|
720
|
+
* Uses vector similarity search to find semantically similar code.
|
|
721
|
+
* Note: Requires the vector index 'embedded_nodes_idx' to exist.
|
|
722
|
+
*/
|
|
723
|
+
FIND_SEMANTIC_DUPLICATES: `
|
|
724
|
+
MATCH (n1)
|
|
725
|
+
WHERE n1.projectId = $projectId
|
|
726
|
+
AND n1.coreType IN $coreTypes
|
|
727
|
+
AND n1.embedding IS NOT NULL
|
|
728
|
+
WITH n1
|
|
729
|
+
CALL db.index.vector.queryNodes('embedded_nodes_idx', toInteger($vectorNeighbors), n1.embedding)
|
|
730
|
+
YIELD node AS n2, score AS similarity
|
|
731
|
+
WHERE n2.projectId = $projectId
|
|
732
|
+
AND n2.coreType IN $coreTypes
|
|
733
|
+
AND n1.id < n2.id
|
|
734
|
+
AND similarity >= $minSimilarity
|
|
735
|
+
AND n1.filePath <> n2.filePath
|
|
736
|
+
AND (n1.normalizedHash IS NULL OR n2.normalizedHash IS NULL OR n1.normalizedHash <> n2.normalizedHash)
|
|
737
|
+
RETURN n1.id AS nodeId1,
|
|
738
|
+
n1.name AS name1,
|
|
739
|
+
n1.coreType AS coreType1,
|
|
740
|
+
n1.semanticType AS semanticType1,
|
|
741
|
+
n1.filePath AS filePath1,
|
|
742
|
+
n1.startLine AS lineNumber1,
|
|
743
|
+
n1.sourceCode AS sourceCode1,
|
|
744
|
+
n2.id AS nodeId2,
|
|
745
|
+
n2.name AS name2,
|
|
746
|
+
n2.coreType AS coreType2,
|
|
747
|
+
n2.semanticType AS semanticType2,
|
|
748
|
+
n2.filePath AS filePath2,
|
|
749
|
+
n2.startLine AS lineNumber2,
|
|
750
|
+
n2.sourceCode AS sourceCode2,
|
|
751
|
+
similarity
|
|
752
|
+
ORDER BY similarity DESC
|
|
753
|
+
LIMIT toInteger($limit)
|
|
754
|
+
`,
|
|
571
755
|
};
|
package/package.json
CHANGED