npm - code-graph-context - Versions diffs - 2.0.1 → 2.3.0 - Mend

code-graph-context 2.0.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/README.md +221 -2
package/dist/constants.js +167 -0
package/dist/core/config/fairsquare-framework-schema.js +9 -7
package/dist/core/config/schema.js +41 -2
package/dist/core/embeddings/natural-language-to-cypher.service.js +166 -110
package/dist/core/parsers/typescript-parser.js +1039 -742
package/dist/core/parsers/workspace-parser.js +175 -193
package/dist/core/utils/code-normalizer.js +299 -0
package/dist/core/utils/file-change-detection.js +17 -2
package/dist/core/utils/file-utils.js +40 -5
package/dist/core/utils/graph-factory.js +161 -0
package/dist/core/utils/shared-utils.js +79 -0
package/dist/core/workspace/workspace-detector.js +59 -5
package/dist/mcp/constants.js +261 -8
package/dist/mcp/handlers/graph-generator.handler.js +1 -0
package/dist/mcp/handlers/incremental-parse.handler.js +22 -6
package/dist/mcp/handlers/parallel-import.handler.js +136 -0
package/dist/mcp/handlers/streaming-import.handler.js +14 -59
package/dist/mcp/mcp.server.js +77 -2
package/dist/mcp/services/job-manager.js +5 -8
package/dist/mcp/services/watch-manager.js +64 -25
package/dist/mcp/tools/detect-dead-code.tool.js +413 -0
package/dist/mcp/tools/detect-duplicate-code.tool.js +450 -0
package/dist/mcp/tools/hello.tool.js +16 -2
package/dist/mcp/tools/impact-analysis.tool.js +20 -4
package/dist/mcp/tools/index.js +37 -0
package/dist/mcp/tools/parse-typescript-project.tool.js +15 -14
package/dist/mcp/tools/swarm-cleanup.tool.js +157 -0
package/dist/mcp/tools/swarm-constants.js +35 -0
package/dist/mcp/tools/swarm-pheromone.tool.js +196 -0
package/dist/mcp/tools/swarm-sense.tool.js +212 -0
package/dist/mcp/workers/chunk-worker-pool.js +196 -0
package/dist/mcp/workers/chunk-worker.types.js +4 -0
package/dist/mcp/workers/chunk.worker.js +89 -0
package/dist/mcp/workers/parse-coordinator.js +183 -0
package/dist/mcp/workers/worker.pool.js +54 -0
package/dist/storage/neo4j/neo4j.service.js +198 -14
package/package.json +1 -1

package/dist/mcp/workers/worker.pool.js ADDED Viewed

@@ -0,0 +1,54 @@
+import { Worker } from 'worker_threads';
+export class ParallelPool {
+    workerPath;
+    numWorkers;
+    constructor(workerPath, numWorkers = 2) {
+        this.workerPath = workerPath;
+        this.numWorkers = numWorkers;
+    }
+    async run(items) {
+        const start = Date.now();
+        const indexBuffer = new SharedArrayBuffer(4);
+        const sharedIndex = new Int32Array(indexBuffer);
+        const workerPromises = Array.from({ length: this.numWorkers }, (_, id) => this.spawnWorker(id, items, indexBuffer));
+        const workerResults = await Promise.all(workerPromises);
+        const results = [];
+        const workerTaskCounts = [];
+        for (const { results: map, count } of workerResults) {
+            workerTaskCounts.push(count);
+            for (const [i, result] of map) {
+                results[i] = result;
+            }
+        }
+        return {
+            results,
+            stats: {
+                workerTaskCounts,
+                totalTasks: items.length,
+                totalTimeMs: Date.now() - start,
+            },
+        };
+    }
+    spawnWorker(workerId, items, indexBuffer) {
+        return new Promise((resolve, reject) => {
+            const worker = new Worker(this.workerPath, {
+                workerData: {
+                    items,
+                    indexBuffer,
+                    total: items.length,
+                    workerId,
+                },
+            });
+            worker.on('message', (result) => {
+                worker.terminate();
+                resolve(result);
+            });
+            worker.on('error', reject);
+            worker.on('exit', (code) => {
+                if (code !== 0) {
+                    reject(new Error(`Worker ${workerId} exited with code ${code}`));
+                }
+            });
+        });
+    }
+}

package/dist/storage/neo4j/neo4j.service.js CHANGED Viewed

@@ -114,6 +114,8 @@ export const QUERIES = {
     // Create composite indexes on projectId + id for efficient lookups
     CREATE_PROJECT_ID_INDEX_EMBEDDED: 'CREATE INDEX project_id_embedded_idx IF NOT EXISTS FOR (n:Embedded) ON (n.projectId, n.id)',
     CREATE_PROJECT_ID_INDEX_SOURCEFILE: 'CREATE INDEX project_id_sourcefile_idx IF NOT EXISTS FOR (n:SourceFile) ON (n.projectId, n.id)',
+    // Create index on normalizedHash for efficient structural duplicate detection
+    CREATE_NORMALIZED_HASH_INDEX: 'CREATE INDEX normalized_hash_idx IF NOT EXISTS FOR (n:Embedded) ON (n.normalizedHash)',
     CREATE_NODE: `
     UNWIND $nodes AS nodeData
     CALL apoc.create.node(nodeData.labels, nodeData.properties) YIELD node
@@ -175,13 +177,13 @@ export const QUERIES = {
   `,
     // Get cross-file edges before deletion (edges where one endpoint is outside the subgraph)
     // These will be recreated after import using deterministic IDs
+    // Uses filePath matching instead of relationship traversal to avoid following INJECTS/IMPORTS
     GET_CROSS_FILE_EDGES: `
-    MATCH (sf:SourceFile)
-    WHERE sf.filePath IN $filePaths AND sf.projectId = $projectId
-    OPTIONAL MATCH (sf)-[*]->(child)
-    WITH collect(DISTINCT sf) + collect(DISTINCT child) AS nodesToDelete
-    UNWIND nodesToDelete AS n
-    MATCH (n)-[r]-(other)
+    MATCH (n)
+    WHERE n.filePath IN $filePaths AND n.projectId = $projectId
+    WITH collect(DISTINCT n) AS nodesToDelete
+    UNWIND nodesToDelete AS node
+    MATCH (node)-[r]-(other)
     WHERE NOT other IN nodesToDelete AND other.projectId = $projectId
     RETURN DISTINCT
       startNode(r).id AS startNodeId,
@@ -190,11 +192,12 @@ export const QUERIES = {
       properties(r) AS edgeProperties
   `,
     // Delete source file subgraphs (nodes and all their edges)
+    // Uses filePath matching to delete only nodes belonging to the specified files
+    // Avoids following INJECTS/IMPORTS edges which would delete nodes from other files
     DELETE_SOURCE_FILE_SUBGRAPHS: `
-    MATCH (sf:SourceFile)
-    WHERE sf.filePath IN $filePaths AND sf.projectId = $projectId
-    OPTIONAL MATCH (sf)-[*]->(child)
-    DETACH DELETE sf, child
+    MATCH (n)
+    WHERE n.filePath IN $filePaths AND n.projectId = $projectId
+    DETACH DELETE n
   `,
     // Recreate cross-file edges after import (uses deterministic IDs)
     RECREATE_CROSS_FILE_EDGES: `
@@ -212,15 +215,19 @@ export const QUERIES = {
     // The previous query (WHERE startNode(r) IS NULL OR endNode(r) IS NULL) could never match anything
     // Get existing nodes (excluding files being reparsed) for edge target matching
     // Returns minimal info needed for edge detection: id, name, coreType, semanticType
+    // NOTE: Using property-based query instead of path traversal to avoid Cartesian explosion
+    // The old query `MATCH (sf:SourceFile)-[*]->(n)` caused OOM with large graphs
     GET_EXISTING_NODES_FOR_EDGE_DETECTION: `
-    MATCH (sf:SourceFile)-[*]->(n)
-    WHERE NOT sf.filePath IN $excludeFilePaths AND sf.projectId = $projectId
-    RETURN n.id AS id,
+    MATCH (n)
+    WHERE n.projectId = $projectId
+      AND n.filePath IS NOT NULL
+      AND NOT n.filePath IN $excludeFilePaths
+    RETURN DISTINCT n.id AS id,
            n.name AS name,
            n.coreType AS coreType,
            n.semanticType AS semanticType,
            labels(n) AS labels,
-           sf.filePath AS filePath
+           n.filePath AS filePath
   `,
     EXPLORE_ALL_CONNECTIONS: (maxDepth = MAX_TRAVERSAL_DEPTH, direction = 'BOTH', relationshipTypes) => {
         const safeMaxDepth = Math.min(Math.max(maxDepth, 1), MAX_TRAVERSAL_DEPTH);
@@ -568,4 +575,181 @@ export const QUERIES = {
       relationshipPath
     ORDER BY depth ASC
   `,
+    // ============================================
+    // DEAD CODE DETECTION QUERIES
+    // ============================================
+    /**
+     * Find exported classes/functions/interfaces with no incoming references from other files.
+     * These are potentially dead code - exported but never imported or used.
+     */
+    FIND_UNREFERENCED_EXPORTS: `
+    MATCH (n)
+    WHERE n.projectId = $projectId
+      AND n.isExported = true
+      AND n.coreType IN ['ClassDeclaration', 'FunctionDeclaration', 'InterfaceDeclaration']
+    WITH n
+    OPTIONAL MATCH (other)-[r]->(n)
+    WHERE other.projectId = $projectId
+      AND other.filePath <> n.filePath
+      AND type(r) IN ['IMPORTS', 'EXTENDS', 'IMPLEMENTS', 'TYPED_AS', 'INJECTS', 'CALLS']
+    WITH n, count(other) AS incomingCount
+    WHERE incomingCount = 0
+    RETURN n.id AS nodeId,
+           n.name AS name,
+           n.coreType AS coreType,
+           n.semanticType AS semanticType,
+           n.filePath AS filePath,
+           n.startLine AS lineNumber,
+           n.isExported AS isExported,
+           'Exported but never imported or referenced' AS reason
+    ORDER BY n.filePath, n.startLine
+  `,
+    /**
+     * Find private methods with no incoming CALLS edges.
+     * Private methods that are never called are likely dead code.
+     */
+    FIND_UNCALLED_PRIVATE_METHODS: `
+    MATCH (n)
+    WHERE n.projectId = $projectId
+      AND n.coreType = 'MethodDeclaration'
+      AND n.visibility = 'private'
+    WITH n
+    OPTIONAL MATCH (caller)-[r:CALLS]->(n)
+    WHERE caller.projectId = $projectId
+    WITH n, count(caller) AS callCount
+    WHERE callCount = 0
+    RETURN n.id AS nodeId,
+           n.name AS name,
+           n.coreType AS coreType,
+           n.semanticType AS semanticType,
+           n.filePath AS filePath,
+           n.startLine AS lineNumber,
+           n.visibility AS visibility,
+           'Private method never called' AS reason
+    ORDER BY n.filePath, n.startLine
+  `,
+    /**
+     * Find interfaces that are never implemented or referenced.
+     * Interfaces without implementations may be dead code.
+     */
+    FIND_UNREFERENCED_INTERFACES: `
+    MATCH (n)
+    WHERE n.projectId = $projectId
+      AND n.coreType = 'InterfaceDeclaration'
+      AND n.isExported = true
+    WITH n
+    OPTIONAL MATCH (other)-[r]->(n)
+    WHERE other.projectId = $projectId
+      AND type(r) IN ['IMPLEMENTS', 'EXTENDS', 'TYPED_AS', 'IMPORTS']
+    WITH n, count(other) AS refCount
+    WHERE refCount = 0
+    RETURN n.id AS nodeId,
+           n.name AS name,
+           n.coreType AS coreType,
+           n.semanticType AS semanticType,
+           n.filePath AS filePath,
+           n.startLine AS lineNumber,
+           'Interface never implemented or referenced' AS reason
+    ORDER BY n.filePath, n.startLine
+  `,
+    /**
+     * Get all distinct semantic types for a project.
+     * Used to dynamically determine framework entry points for dead code detection.
+     */
+    GET_PROJECT_SEMANTIC_TYPES: `
+    MATCH (n)
+    WHERE n.projectId = $projectId
+      AND n.semanticType IS NOT NULL
+      AND n.coreType IN ['ClassDeclaration', 'FunctionDeclaration', 'InterfaceDeclaration', 'MethodDeclaration']
+    RETURN DISTINCT n.semanticType AS semanticType
+  `,
+    /**
+     * Get framework entry points that should be excluded from dead code analysis.
+     * These are nodes that may appear unused but are actually framework-managed.
+     * Filters by coreType to exclude ImportDeclarations and only return actual classes/functions/interfaces.
+     * Accepts $semanticTypes parameter for dynamic, per-project framework detection.
+     */
+    GET_FRAMEWORK_ENTRY_POINTS: `
+    MATCH (n)
+    WHERE n.projectId = $projectId
+      AND n.coreType IN ['ClassDeclaration', 'FunctionDeclaration', 'InterfaceDeclaration']
+      AND (
+        n.semanticType IN $semanticTypes
+        OR n.filePath ENDS WITH 'main.ts'
+        OR n.filePath ENDS WITH '.module.ts'
+        OR n.filePath ENDS WITH '.controller.ts'
+        OR n.filePath ENDS WITH 'index.ts'
+      )
+    RETURN n.id AS nodeId,
+           n.name AS name,
+           n.coreType AS coreType,
+           n.semanticType AS semanticType,
+           n.filePath AS filePath
+    ORDER BY n.semanticType, n.name
+  `,
+    // ============================================================================
+    // DUPLICATE CODE DETECTION QUERIES
+    // ============================================================================
+    /**
+     * Find structural duplicates - nodes with identical normalizedHash.
+     * Returns all nodes that share the same normalized code hash.
+     * Limited to prevent memory issues on large codebases.
+     */
+    FIND_STRUCTURAL_DUPLICATES: `
+    MATCH (n)
+    WHERE n.projectId = $projectId
+      AND n.coreType IN $coreTypes
+      AND n.normalizedHash IS NOT NULL
+      AND n.normalizedHash <> ''
+    WITH n.normalizedHash AS hash, collect(n) AS nodes
+    WHERE size(nodes) >= 2
+    UNWIND nodes AS n
+    RETURN n.id AS nodeId,
+           n.name AS name,
+           n.coreType AS coreType,
+           n.semanticType AS semanticType,
+           n.filePath AS filePath,
+           n.startLine AS lineNumber,
+           n.normalizedHash AS normalizedHash,
+           n.sourceCode AS sourceCode
+    ORDER BY n.normalizedHash, n.filePath, n.startLine
+    LIMIT toInteger($limit)
+  `,
+    /**
+     * Find semantic duplicates - nodes with similar embeddings.
+     * Uses vector similarity search to find semantically similar code.
+     * Note: Requires the vector index 'embedded_nodes_idx' to exist.
+     */
+    FIND_SEMANTIC_DUPLICATES: `
+    MATCH (n1)
+    WHERE n1.projectId = $projectId
+      AND n1.coreType IN $coreTypes
+      AND n1.embedding IS NOT NULL
+    WITH n1
+    CALL db.index.vector.queryNodes('embedded_nodes_idx', toInteger($vectorNeighbors), n1.embedding)
+    YIELD node AS n2, score AS similarity
+    WHERE n2.projectId = $projectId
+      AND n2.coreType IN $coreTypes
+      AND n1.id < n2.id
+      AND similarity >= $minSimilarity
+      AND n1.filePath <> n2.filePath
+      AND (n1.normalizedHash IS NULL OR n2.normalizedHash IS NULL OR n1.normalizedHash <> n2.normalizedHash)
+    RETURN n1.id AS nodeId1,
+           n1.name AS name1,
+           n1.coreType AS coreType1,
+           n1.semanticType AS semanticType1,
+           n1.filePath AS filePath1,
+           n1.startLine AS lineNumber1,
+           n1.sourceCode AS sourceCode1,
+           n2.id AS nodeId2,
+           n2.name AS name2,
+           n2.coreType AS coreType2,
+           n2.semanticType AS semanticType2,
+           n2.filePath AS filePath2,
+           n2.startLine AS lineNumber2,
+           n2.sourceCode AS sourceCode2,
+           similarity
+    ORDER BY similarity DESC
+    LIMIT toInteger($limit)
+  `,
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "code-graph-context",
-  "version": "2.0.1",
+  "version": "2.3.0",
   "description": "MCP server that builds code graphs to provide rich context to LLMs",
   "type": "module",
   "homepage": "https://github.com/drewdrewH/code-graph-context#readme",