code-graph-context 2.0.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +221 -2
- package/dist/constants.js +167 -0
- package/dist/core/config/fairsquare-framework-schema.js +9 -7
- package/dist/core/config/schema.js +41 -2
- package/dist/core/embeddings/natural-language-to-cypher.service.js +166 -110
- package/dist/core/parsers/typescript-parser.js +1039 -742
- package/dist/core/parsers/workspace-parser.js +175 -193
- package/dist/core/utils/code-normalizer.js +299 -0
- package/dist/core/utils/file-change-detection.js +17 -2
- package/dist/core/utils/file-utils.js +40 -5
- package/dist/core/utils/graph-factory.js +161 -0
- package/dist/core/utils/shared-utils.js +79 -0
- package/dist/core/workspace/workspace-detector.js +59 -5
- package/dist/mcp/constants.js +261 -8
- package/dist/mcp/handlers/graph-generator.handler.js +1 -0
- package/dist/mcp/handlers/incremental-parse.handler.js +22 -6
- package/dist/mcp/handlers/parallel-import.handler.js +136 -0
- package/dist/mcp/handlers/streaming-import.handler.js +14 -59
- package/dist/mcp/mcp.server.js +77 -2
- package/dist/mcp/services/job-manager.js +5 -8
- package/dist/mcp/services/watch-manager.js +64 -25
- package/dist/mcp/tools/detect-dead-code.tool.js +413 -0
- package/dist/mcp/tools/detect-duplicate-code.tool.js +450 -0
- package/dist/mcp/tools/hello.tool.js +16 -2
- package/dist/mcp/tools/impact-analysis.tool.js +20 -4
- package/dist/mcp/tools/index.js +37 -0
- package/dist/mcp/tools/parse-typescript-project.tool.js +15 -14
- package/dist/mcp/tools/swarm-cleanup.tool.js +157 -0
- package/dist/mcp/tools/swarm-constants.js +35 -0
- package/dist/mcp/tools/swarm-pheromone.tool.js +196 -0
- package/dist/mcp/tools/swarm-sense.tool.js +212 -0
- package/dist/mcp/workers/chunk-worker-pool.js +196 -0
- package/dist/mcp/workers/chunk-worker.types.js +4 -0
- package/dist/mcp/workers/chunk.worker.js +89 -0
- package/dist/mcp/workers/parse-coordinator.js +183 -0
- package/dist/mcp/workers/worker.pool.js +54 -0
- package/dist/storage/neo4j/neo4j.service.js +198 -14
- package/package.json +1 -1
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code Normalizer Utility
|
|
3
|
+
* Normalizes code for structural duplicate detection by:
|
|
4
|
+
* - Stripping comments and whitespace
|
|
5
|
+
* - Replacing variable names with positional placeholders
|
|
6
|
+
* - Replacing literals with type placeholders
|
|
7
|
+
* - Computing SHA256 hash for comparison
|
|
8
|
+
*/
|
|
9
|
+
import * as crypto from 'crypto';
|
|
10
|
+
// TypeScript/JavaScript keywords and built-in identifiers to preserve during normalization
|
|
11
|
+
const RESERVED_KEYWORDS = new Set([
|
|
12
|
+
// Keywords
|
|
13
|
+
'async',
|
|
14
|
+
'await',
|
|
15
|
+
'break',
|
|
16
|
+
'case',
|
|
17
|
+
'catch',
|
|
18
|
+
'class',
|
|
19
|
+
'const',
|
|
20
|
+
'continue',
|
|
21
|
+
'debugger',
|
|
22
|
+
'default',
|
|
23
|
+
'delete',
|
|
24
|
+
'do',
|
|
25
|
+
'else',
|
|
26
|
+
'enum',
|
|
27
|
+
'export',
|
|
28
|
+
'extends',
|
|
29
|
+
'false',
|
|
30
|
+
'finally',
|
|
31
|
+
'for',
|
|
32
|
+
'function',
|
|
33
|
+
'if',
|
|
34
|
+
'implements',
|
|
35
|
+
'import',
|
|
36
|
+
'in',
|
|
37
|
+
'instanceof',
|
|
38
|
+
'interface',
|
|
39
|
+
'let',
|
|
40
|
+
'new',
|
|
41
|
+
'null',
|
|
42
|
+
'of',
|
|
43
|
+
'private',
|
|
44
|
+
'protected',
|
|
45
|
+
'public',
|
|
46
|
+
'readonly',
|
|
47
|
+
'return',
|
|
48
|
+
'static',
|
|
49
|
+
'super',
|
|
50
|
+
'switch',
|
|
51
|
+
'this',
|
|
52
|
+
'throw',
|
|
53
|
+
'true',
|
|
54
|
+
'try',
|
|
55
|
+
'typeof',
|
|
56
|
+
'undefined',
|
|
57
|
+
'var',
|
|
58
|
+
'void',
|
|
59
|
+
'while',
|
|
60
|
+
'with',
|
|
61
|
+
'yield',
|
|
62
|
+
// TypeScript-specific keywords
|
|
63
|
+
'abstract',
|
|
64
|
+
'as',
|
|
65
|
+
'asserts',
|
|
66
|
+
'constructor',
|
|
67
|
+
'declare',
|
|
68
|
+
'get',
|
|
69
|
+
'set',
|
|
70
|
+
'infer',
|
|
71
|
+
'is',
|
|
72
|
+
'keyof',
|
|
73
|
+
'module',
|
|
74
|
+
'namespace',
|
|
75
|
+
'require',
|
|
76
|
+
'type',
|
|
77
|
+
'satisfies',
|
|
78
|
+
'using',
|
|
79
|
+
// Built-in types
|
|
80
|
+
'any',
|
|
81
|
+
'boolean',
|
|
82
|
+
'never',
|
|
83
|
+
'number',
|
|
84
|
+
'object',
|
|
85
|
+
'string',
|
|
86
|
+
'symbol',
|
|
87
|
+
'unknown',
|
|
88
|
+
// Common built-ins
|
|
89
|
+
'Array',
|
|
90
|
+
'Object',
|
|
91
|
+
'String',
|
|
92
|
+
'Number',
|
|
93
|
+
'Boolean',
|
|
94
|
+
'Promise',
|
|
95
|
+
'Map',
|
|
96
|
+
'Set',
|
|
97
|
+
'WeakMap',
|
|
98
|
+
'WeakSet',
|
|
99
|
+
'Date',
|
|
100
|
+
'Error',
|
|
101
|
+
'console',
|
|
102
|
+
'JSON',
|
|
103
|
+
'Math',
|
|
104
|
+
'BigInt',
|
|
105
|
+
'Symbol',
|
|
106
|
+
'Proxy',
|
|
107
|
+
'Reflect',
|
|
108
|
+
// Our placeholders
|
|
109
|
+
'$STR',
|
|
110
|
+
'$NUM',
|
|
111
|
+
]);
|
|
112
|
+
/**
|
|
113
|
+
* Normalize code for structural comparison.
|
|
114
|
+
* Removes formatting differences while preserving semantic structure.
|
|
115
|
+
*/
|
|
116
|
+
export const normalizeCode = (code) => {
|
|
117
|
+
if (!code || code.trim().length === 0) {
|
|
118
|
+
return {
|
|
119
|
+
normalizedCode: '',
|
|
120
|
+
normalizedHash: '',
|
|
121
|
+
metrics: {
|
|
122
|
+
parameterCount: 0,
|
|
123
|
+
statementCount: 0,
|
|
124
|
+
controlFlowDepth: 0,
|
|
125
|
+
lineCount: 0,
|
|
126
|
+
tokenCount: 0,
|
|
127
|
+
},
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
// Step 1: Replace string literals FIRST (to protect their contents from comment removal)
|
|
131
|
+
// This prevents strings containing "//" or "/*" from being corrupted
|
|
132
|
+
let normalized = replaceStringLiterals(code);
|
|
133
|
+
// Step 2: Remove comments (now safe since strings are already placeholders)
|
|
134
|
+
normalized = removeComments(normalized);
|
|
135
|
+
// Step 3: Normalize whitespace
|
|
136
|
+
normalized = normalizeWhitespace(normalized);
|
|
137
|
+
// Step 4: Replace numeric literals with placeholder
|
|
138
|
+
normalized = replaceNumericLiterals(normalized);
|
|
139
|
+
// Step 5: Replace variable names with positional placeholders
|
|
140
|
+
normalized = replaceVariableNames(normalized);
|
|
141
|
+
// Step 6: Calculate metrics
|
|
142
|
+
const metrics = calculateMetrics(code);
|
|
143
|
+
// Step 7: Compute hash
|
|
144
|
+
const normalizedHash = computeHash(normalized);
|
|
145
|
+
return {
|
|
146
|
+
normalizedCode: normalized,
|
|
147
|
+
normalizedHash,
|
|
148
|
+
metrics,
|
|
149
|
+
};
|
|
150
|
+
};
|
|
151
|
+
/**
|
|
152
|
+
* Remove single-line and multi-line comments from code.
|
|
153
|
+
*/
|
|
154
|
+
const removeComments = (code) => {
|
|
155
|
+
// Remove multi-line comments /* ... */
|
|
156
|
+
let result = code.replace(/\/\*[\s\S]*?\*\//g, '');
|
|
157
|
+
// Remove single-line comments // ...
|
|
158
|
+
result = result.replace(/\/\/.*$/gm, '');
|
|
159
|
+
return result;
|
|
160
|
+
};
|
|
161
|
+
/**
|
|
162
|
+
* Normalize whitespace: collapse multiple spaces, remove leading/trailing.
|
|
163
|
+
*/
|
|
164
|
+
const normalizeWhitespace = (code) => {
|
|
165
|
+
return code
|
|
166
|
+
.split('\n')
|
|
167
|
+
.map((line) => line.trim())
|
|
168
|
+
.filter((line) => line.length > 0)
|
|
169
|
+
.join(' ')
|
|
170
|
+
.replace(/\s+/g, ' ')
|
|
171
|
+
.trim();
|
|
172
|
+
};
|
|
173
|
+
/**
|
|
174
|
+
* Replace string literals with $STR placeholder.
|
|
175
|
+
* Handles single quotes, double quotes, and template literals.
|
|
176
|
+
*/
|
|
177
|
+
const replaceStringLiterals = (code) => {
|
|
178
|
+
// Replace template literals (backticks) - handle simple cases
|
|
179
|
+
let result = code.replace(/`[^`]*`/g, '$STR');
|
|
180
|
+
// Replace double-quoted strings
|
|
181
|
+
result = result.replace(/"(?:[^"\\]|\\.)*"/g, '$STR');
|
|
182
|
+
// Replace single-quoted strings
|
|
183
|
+
result = result.replace(/'(?:[^'\\]|\\.)*'/g, '$STR');
|
|
184
|
+
return result;
|
|
185
|
+
};
|
|
186
|
+
/**
|
|
187
|
+
* Replace numeric literals with $NUM placeholder.
|
|
188
|
+
* Handles: integers, floats, hex (0x), binary (0b), octal (0o), scientific notation, BigInt (n suffix)
|
|
189
|
+
*/
|
|
190
|
+
const replaceNumericLiterals = (code) => {
|
|
191
|
+
// Handle hex literals (0xFF, 0XAB)
|
|
192
|
+
let result = code.replace(/\b0[xX][0-9a-fA-F_]+n?\b/g, '$NUM');
|
|
193
|
+
// Handle binary literals (0b1010)
|
|
194
|
+
result = result.replace(/\b0[bB][01_]+n?\b/g, '$NUM');
|
|
195
|
+
// Handle octal literals (0o777)
|
|
196
|
+
result = result.replace(/\b0[oO][0-7_]+n?\b/g, '$NUM');
|
|
197
|
+
// Handle regular numbers (integers, floats, scientific notation, BigInt)
|
|
198
|
+
// Supports underscore separators (1_000_000) and BigInt suffix (123n)
|
|
199
|
+
// But not numbers that are part of variable names like $VAR_1
|
|
200
|
+
result = result.replace(/(?<![a-zA-Z_$])\b\d[\d_]*(\.\d[\d_]*)?([eE][+-]?\d[\d_]*)?n?\b/g, '$NUM');
|
|
201
|
+
return result;
|
|
202
|
+
};
|
|
203
|
+
/**
|
|
204
|
+
* Replace variable and parameter names with positional placeholders.
|
|
205
|
+
* Preserves keywords, built-in types, and operators.
|
|
206
|
+
*/
|
|
207
|
+
const replaceVariableNames = (code) => {
|
|
208
|
+
// Track variable name mappings
|
|
209
|
+
const variableMap = new Map();
|
|
210
|
+
let varCounter = 1;
|
|
211
|
+
// Match identifiers (variable names, function names, etc.)
|
|
212
|
+
// This is a simplified approach - matches word characters after boundaries
|
|
213
|
+
const identifierPattern = /\b([a-zA-Z_$][a-zA-Z0-9_$]*)\b/g;
|
|
214
|
+
return code.replace(identifierPattern, (match) => {
|
|
215
|
+
// Skip keywords and built-ins (uses module-level constant)
|
|
216
|
+
if (RESERVED_KEYWORDS.has(match)) {
|
|
217
|
+
return match;
|
|
218
|
+
}
|
|
219
|
+
// Check if we've seen this identifier before
|
|
220
|
+
if (variableMap.has(match)) {
|
|
221
|
+
return variableMap.get(match);
|
|
222
|
+
}
|
|
223
|
+
// Assign new placeholder
|
|
224
|
+
const placeholder = `$VAR_${varCounter++}`;
|
|
225
|
+
variableMap.set(match, placeholder);
|
|
226
|
+
return placeholder;
|
|
227
|
+
});
|
|
228
|
+
};
|
|
229
|
+
/**
|
|
230
|
+
* Calculate structural metrics from the original code.
|
|
231
|
+
*/
|
|
232
|
+
const calculateMetrics = (code) => {
|
|
233
|
+
// Count parameters: look for function/method signatures
|
|
234
|
+
const paramMatches = code.match(/\([^)]*\)/g) ?? [];
|
|
235
|
+
let parameterCount = 0;
|
|
236
|
+
for (const match of paramMatches) {
|
|
237
|
+
// Count commas + 1 for non-empty param lists
|
|
238
|
+
const inner = match.slice(1, -1).trim();
|
|
239
|
+
if (inner.length > 0) {
|
|
240
|
+
parameterCount += inner.split(',').filter((p) => p.trim().length > 0).length;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
// Count statements: approximate by counting semicolons and block closures
|
|
244
|
+
const statementCount = (code.match(/[;{}]/g)?.length ?? 0) / 2;
|
|
245
|
+
// Calculate control flow depth: count nesting of if/for/while/switch
|
|
246
|
+
let maxDepth = 0;
|
|
247
|
+
let currentDepth = 0;
|
|
248
|
+
const controlFlowPattern = /\b(if|for|while|switch|try|catch)\s*\(|{|}/g;
|
|
249
|
+
let match;
|
|
250
|
+
while ((match = controlFlowPattern.exec(code)) !== null) {
|
|
251
|
+
if (match[0] === '{') {
|
|
252
|
+
currentDepth++;
|
|
253
|
+
maxDepth = Math.max(maxDepth, currentDepth);
|
|
254
|
+
}
|
|
255
|
+
else if (match[0] === '}') {
|
|
256
|
+
currentDepth = Math.max(0, currentDepth - 1);
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
// Count lines (non-empty)
|
|
260
|
+
const lineCount = code.split('\n').filter((line) => line.trim().length > 0).length;
|
|
261
|
+
// Approximate token count
|
|
262
|
+
const tokenCount = code.split(/\s+/).filter((t) => t.length > 0).length;
|
|
263
|
+
return {
|
|
264
|
+
parameterCount,
|
|
265
|
+
statementCount: Math.round(statementCount),
|
|
266
|
+
controlFlowDepth: maxDepth,
|
|
267
|
+
lineCount,
|
|
268
|
+
tokenCount,
|
|
269
|
+
};
|
|
270
|
+
};
|
|
271
|
+
/**
|
|
272
|
+
* Compute SHA256 hash of the normalized code.
|
|
273
|
+
*/
|
|
274
|
+
const computeHash = (normalizedCode) => {
|
|
275
|
+
if (normalizedCode.length === 0) {
|
|
276
|
+
return '';
|
|
277
|
+
}
|
|
278
|
+
return crypto.createHash('sha256').update(normalizedCode).digest('hex');
|
|
279
|
+
};
|
|
280
|
+
/**
|
|
281
|
+
* Check if two code blocks are structurally similar based on metrics.
|
|
282
|
+
* Used for near-duplicate detection when hashes don't match.
|
|
283
|
+
*/
|
|
284
|
+
export const areMetricsSimilar = (metrics1, metrics2, threshold = 0.8) => {
|
|
285
|
+
// Compare each metric and calculate similarity score
|
|
286
|
+
const paramSim = 1 -
|
|
287
|
+
Math.abs(metrics1.parameterCount - metrics2.parameterCount) /
|
|
288
|
+
Math.max(metrics1.parameterCount, metrics2.parameterCount, 1);
|
|
289
|
+
const stmtSim = 1 -
|
|
290
|
+
Math.abs(metrics1.statementCount - metrics2.statementCount) /
|
|
291
|
+
Math.max(metrics1.statementCount, metrics2.statementCount, 1);
|
|
292
|
+
const depthSim = 1 -
|
|
293
|
+
Math.abs(metrics1.controlFlowDepth - metrics2.controlFlowDepth) /
|
|
294
|
+
Math.max(metrics1.controlFlowDepth, metrics2.controlFlowDepth, 1);
|
|
295
|
+
const lineSim = 1 - Math.abs(metrics1.lineCount - metrics2.lineCount) / Math.max(metrics1.lineCount, metrics2.lineCount, 1);
|
|
296
|
+
// Weighted average (statement count and line count are more important)
|
|
297
|
+
const avgSim = paramSim * 0.15 + stmtSim * 0.35 + depthSim * 0.15 + lineSim * 0.35;
|
|
298
|
+
return avgSim >= threshold;
|
|
299
|
+
};
|
|
@@ -5,9 +5,21 @@
|
|
|
5
5
|
import { stat, realpath } from 'fs/promises';
|
|
6
6
|
import { resolve, sep } from 'path';
|
|
7
7
|
import { glob } from 'glob';
|
|
8
|
-
import { EXCLUDE_PATTERNS_GLOB } from '../../constants.js';
|
|
8
|
+
import { EXCLUDE_PATTERNS_GLOB, EXCLUDE_PATTERNS_REGEX } from '../../constants.js';
|
|
9
9
|
import { QUERIES } from '../../storage/neo4j/neo4j.service.js';
|
|
10
10
|
import { hashFile } from './file-utils.js';
|
|
11
|
+
/**
|
|
12
|
+
* Check if a file path matches any of the exclude patterns.
|
|
13
|
+
* Uses the same patterns as the TypeScript parser.
|
|
14
|
+
*/
|
|
15
|
+
const shouldExcludeFile = (filePath) => {
|
|
16
|
+
for (const pattern of EXCLUDE_PATTERNS_REGEX) {
|
|
17
|
+
if (filePath.includes(pattern) || new RegExp(pattern).test(filePath)) {
|
|
18
|
+
return true;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
return false;
|
|
22
|
+
};
|
|
11
23
|
/**
|
|
12
24
|
* Detect which files have changed and need reparsing.
|
|
13
25
|
* Compares current files on disk with indexed files in Neo4j.
|
|
@@ -59,7 +71,10 @@ export const detectChangedFiles = async (projectPath, neo4jService, projectId, o
|
|
|
59
71
|
for (const filePath of currentFiles) {
|
|
60
72
|
const indexed = indexedMap.get(filePath);
|
|
61
73
|
if (!indexed) {
|
|
62
|
-
// New file -
|
|
74
|
+
// New file - check if it should be excluded (same rules as parser)
|
|
75
|
+
if (shouldExcludeFile(filePath)) {
|
|
76
|
+
continue; // Skip excluded files
|
|
77
|
+
}
|
|
63
78
|
filesToReparse.push(filePath);
|
|
64
79
|
continue;
|
|
65
80
|
}
|
|
@@ -1,20 +1,55 @@
|
|
|
1
1
|
import * as crypto from 'crypto';
|
|
2
2
|
import * as fs from 'fs/promises';
|
|
3
3
|
import * as path from 'path';
|
|
4
|
-
|
|
5
|
-
const LOG_SEPARATOR = '---';
|
|
6
|
-
const JSON_INDENT = 2;
|
|
4
|
+
import { LOG_CONFIG } from '../../constants.js';
|
|
7
5
|
export const hashFile = async (filePath) => {
|
|
8
6
|
const content = await fs.readFile(filePath);
|
|
9
7
|
return crypto.createHash('sha256').update(content).digest('hex');
|
|
10
8
|
};
|
|
11
9
|
export const debugLog = async (message, data) => {
|
|
12
10
|
const timestamp = new Date().toISOString();
|
|
13
|
-
const logEntry = `[${timestamp}] ${message}\n${data ? JSON.stringify(data, null,
|
|
11
|
+
const logEntry = `[${timestamp}] ${message}\n${data ? JSON.stringify(data, null, LOG_CONFIG.jsonIndent) : ''}\n${LOG_CONFIG.separator}\n`;
|
|
14
12
|
try {
|
|
15
|
-
await fs.appendFile(path.join(process.cwd(),
|
|
13
|
+
await fs.appendFile(path.join(process.cwd(), LOG_CONFIG.debugLogFile), logEntry);
|
|
16
14
|
}
|
|
17
15
|
catch (error) {
|
|
18
16
|
console.error('Failed to write debug log:', error);
|
|
19
17
|
}
|
|
20
18
|
};
|
|
19
|
+
/**
|
|
20
|
+
* Safely test if a file path matches a pattern (string or regex).
|
|
21
|
+
* Falls back to literal string matching if the pattern is an invalid regex.
|
|
22
|
+
*/
|
|
23
|
+
export const matchesPattern = (filePath, pattern) => {
|
|
24
|
+
// First try literal string match (always safe)
|
|
25
|
+
if (filePath.includes(pattern)) {
|
|
26
|
+
return true;
|
|
27
|
+
}
|
|
28
|
+
// Then try regex match with error handling
|
|
29
|
+
try {
|
|
30
|
+
return new RegExp(pattern).test(filePath);
|
|
31
|
+
}
|
|
32
|
+
catch {
|
|
33
|
+
// Invalid regex pattern - already checked via includes() above
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
};
|
|
37
|
+
/**
|
|
38
|
+
* Clean up a TypeScript type name by removing generics, imports, etc.
|
|
39
|
+
* Examples:
|
|
40
|
+
* import("./foo").ClassName -> ClassName
|
|
41
|
+
* ClassName<T> -> ClassName
|
|
42
|
+
* ClassName[] -> ClassName
|
|
43
|
+
*/
|
|
44
|
+
export const cleanTypeName = (typeName) => {
|
|
45
|
+
// Remove import paths: import("...").ClassName -> ClassName
|
|
46
|
+
let cleaned = typeName.replace(/import\([^)]+\)\./g, '');
|
|
47
|
+
// Remove generics: ClassName<T> -> ClassName
|
|
48
|
+
const genericIndex = cleaned.indexOf('<');
|
|
49
|
+
if (genericIndex > 0) {
|
|
50
|
+
cleaned = cleaned.substring(0, genericIndex);
|
|
51
|
+
}
|
|
52
|
+
// Remove array notation: ClassName[] -> ClassName
|
|
53
|
+
cleaned = cleaned.replace(/\[\]$/, '');
|
|
54
|
+
return cleaned.trim();
|
|
55
|
+
};
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Graph Factory
|
|
3
|
+
* Shared utilities for creating/converting graph nodes and edges
|
|
4
|
+
*/
|
|
5
|
+
import crypto from 'crypto';
|
|
6
|
+
import { CoreEdgeType, CORE_TYPESCRIPT_SCHEMA } from '../config/schema.js';
|
|
7
|
+
// ============================================
|
|
8
|
+
// Node ID Generation
|
|
9
|
+
// ============================================
|
|
10
|
+
/**
|
|
11
|
+
* Generate a deterministic node ID based on stable properties.
|
|
12
|
+
* This ensures the same node gets the same ID across reparses.
|
|
13
|
+
*
|
|
14
|
+
* Identity is based on: projectId + coreType + filePath + name (+ parentId for nested nodes)
|
|
15
|
+
* This is stable because when it matters (one side of edge not reparsed),
|
|
16
|
+
* names are guaranteed unchanged (or imports would break, triggering reparse).
|
|
17
|
+
*
|
|
18
|
+
* Including projectId ensures nodes from different projects have unique IDs
|
|
19
|
+
* even if they have identical file paths and names.
|
|
20
|
+
*/
|
|
21
|
+
export const generateDeterministicId = (projectId, coreType, filePath, name, parentId) => {
|
|
22
|
+
const parts = parentId ? [projectId, coreType, filePath, parentId, name] : [projectId, coreType, filePath, name];
|
|
23
|
+
const identity = parts.join('::');
|
|
24
|
+
const hash = crypto.createHash('sha256').update(identity).digest('hex').substring(0, 16);
|
|
25
|
+
return `${projectId}:${coreType}:${hash}`;
|
|
26
|
+
};
|
|
27
|
+
/**
|
|
28
|
+
* Generate a deterministic edge ID based on semantic type, source, and target.
|
|
29
|
+
* Uses SHA256 hash truncated to 16 characters for uniqueness.
|
|
30
|
+
*/
|
|
31
|
+
export const generateFrameworkEdgeId = (semanticType, sourceNodeId, targetNodeId) => {
|
|
32
|
+
const edgeIdentity = `${semanticType}::${sourceNodeId}::${targetNodeId}`;
|
|
33
|
+
const edgeHash = crypto.createHash('sha256').update(edgeIdentity).digest('hex').substring(0, 16);
|
|
34
|
+
return `${semanticType}:${edgeHash}`;
|
|
35
|
+
};
|
|
36
|
+
/**
|
|
37
|
+
* Create framework edge ID and properties.
|
|
38
|
+
* Returns common edge data that can be used to construct either ParsedEdge or Neo4jEdge.
|
|
39
|
+
*
|
|
40
|
+
* @param params - Edge parameters
|
|
41
|
+
* @returns Edge ID and properties object
|
|
42
|
+
*/
|
|
43
|
+
export const createFrameworkEdgeData = (params) => {
|
|
44
|
+
const { semanticType, sourceNodeId, targetNodeId, projectId, context = {}, relationshipWeight = 0.5 } = params;
|
|
45
|
+
const id = generateFrameworkEdgeId(semanticType, sourceNodeId, targetNodeId);
|
|
46
|
+
const properties = {
|
|
47
|
+
coreType: semanticType,
|
|
48
|
+
projectId,
|
|
49
|
+
semanticType,
|
|
50
|
+
source: 'pattern',
|
|
51
|
+
confidence: 0.8,
|
|
52
|
+
relationshipWeight,
|
|
53
|
+
filePath: '',
|
|
54
|
+
createdAt: new Date().toISOString(),
|
|
55
|
+
context,
|
|
56
|
+
};
|
|
57
|
+
return { id, properties };
|
|
58
|
+
};
|
|
59
|
+
/**
|
|
60
|
+
* Generate a deterministic edge ID for core edges.
|
|
61
|
+
*/
|
|
62
|
+
export const generateCoreEdgeId = (edgeType, sourceNodeId, targetNodeId) => {
|
|
63
|
+
const edgeIdentity = `${edgeType}::${sourceNodeId}::${targetNodeId}`;
|
|
64
|
+
const edgeHash = crypto.createHash('sha256').update(edgeIdentity).digest('hex').substring(0, 16);
|
|
65
|
+
return `${edgeType}:${edgeHash}`;
|
|
66
|
+
};
|
|
67
|
+
/**
|
|
68
|
+
* Create a core edge (CONTAINS, IMPORTS, EXTENDS, IMPLEMENTS, etc.)
|
|
69
|
+
*/
|
|
70
|
+
export const createCoreEdge = (params) => {
|
|
71
|
+
const { edgeType, sourceNodeId, targetNodeId, projectId, filePath = '' } = params;
|
|
72
|
+
const coreEdgeSchema = CORE_TYPESCRIPT_SCHEMA.edgeTypes[edgeType];
|
|
73
|
+
const relationshipWeight = coreEdgeSchema?.relationshipWeight ?? 0.5;
|
|
74
|
+
const id = generateCoreEdgeId(edgeType, sourceNodeId, targetNodeId);
|
|
75
|
+
return {
|
|
76
|
+
id,
|
|
77
|
+
type: edgeType,
|
|
78
|
+
startNodeId: sourceNodeId,
|
|
79
|
+
endNodeId: targetNodeId,
|
|
80
|
+
properties: {
|
|
81
|
+
coreType: edgeType,
|
|
82
|
+
projectId,
|
|
83
|
+
source: 'ast',
|
|
84
|
+
confidence: 1.0,
|
|
85
|
+
relationshipWeight,
|
|
86
|
+
filePath,
|
|
87
|
+
createdAt: new Date().toISOString(),
|
|
88
|
+
},
|
|
89
|
+
};
|
|
90
|
+
};
|
|
91
|
+
/**
|
|
92
|
+
* Create a CALLS edge with call-specific context.
|
|
93
|
+
*/
|
|
94
|
+
export const createCallsEdge = (params) => {
|
|
95
|
+
const { sourceNodeId, targetNodeId, projectId, callContext } = params;
|
|
96
|
+
const coreEdgeSchema = CORE_TYPESCRIPT_SCHEMA.edgeTypes[CoreEdgeType.CALLS];
|
|
97
|
+
const relationshipWeight = coreEdgeSchema?.relationshipWeight ?? 0.85;
|
|
98
|
+
// Confidence: higher if we resolved the receiver type
|
|
99
|
+
const confidence = callContext?.receiverType ? 0.9 : 0.7;
|
|
100
|
+
// Generate deterministic edge ID based on type + source + target + line
|
|
101
|
+
const lineNum = callContext?.lineNumber ?? 0;
|
|
102
|
+
const edgeIdentity = `CALLS::${sourceNodeId}::${targetNodeId}::${lineNum}`;
|
|
103
|
+
const edgeHash = crypto.createHash('sha256').update(edgeIdentity).digest('hex').substring(0, 16);
|
|
104
|
+
const id = `CALLS:${edgeHash}`;
|
|
105
|
+
return {
|
|
106
|
+
id,
|
|
107
|
+
type: 'CALLS',
|
|
108
|
+
startNodeId: sourceNodeId,
|
|
109
|
+
endNodeId: targetNodeId,
|
|
110
|
+
properties: {
|
|
111
|
+
coreType: CoreEdgeType.CALLS,
|
|
112
|
+
projectId,
|
|
113
|
+
source: 'ast',
|
|
114
|
+
confidence,
|
|
115
|
+
relationshipWeight,
|
|
116
|
+
filePath: '',
|
|
117
|
+
createdAt: new Date().toISOString(),
|
|
118
|
+
lineNumber: callContext?.lineNumber,
|
|
119
|
+
context: callContext
|
|
120
|
+
? {
|
|
121
|
+
isAsync: callContext.isAsync,
|
|
122
|
+
argumentCount: callContext.argumentCount,
|
|
123
|
+
receiverType: callContext.receiverType,
|
|
124
|
+
}
|
|
125
|
+
: undefined,
|
|
126
|
+
},
|
|
127
|
+
};
|
|
128
|
+
};
|
|
129
|
+
// ============================================
|
|
130
|
+
// Node/Edge Conversion Functions
|
|
131
|
+
// Convert internal parsed types to Neo4j types
|
|
132
|
+
// ============================================
|
|
133
|
+
/**
|
|
134
|
+
* Convert a ParsedNode to Neo4jNode format for storage/export.
|
|
135
|
+
*/
|
|
136
|
+
export const toNeo4jNode = (parsedNode) => ({
|
|
137
|
+
id: parsedNode.id,
|
|
138
|
+
labels: parsedNode.labels,
|
|
139
|
+
properties: parsedNode.properties,
|
|
140
|
+
skipEmbedding: parsedNode.skipEmbedding ?? false,
|
|
141
|
+
});
|
|
142
|
+
/**
|
|
143
|
+
* Convert a ParsedEdge to Neo4jEdge format for storage/export.
|
|
144
|
+
*/
|
|
145
|
+
export const toNeo4jEdge = (parsedEdge) => ({
|
|
146
|
+
id: parsedEdge.id,
|
|
147
|
+
type: parsedEdge.relationshipType,
|
|
148
|
+
startNodeId: parsedEdge.sourceNodeId,
|
|
149
|
+
endNodeId: parsedEdge.targetNodeId,
|
|
150
|
+
properties: parsedEdge.properties,
|
|
151
|
+
});
|
|
152
|
+
/**
|
|
153
|
+
* Convert a Neo4jEdge to ParsedEdge format for internal use.
|
|
154
|
+
*/
|
|
155
|
+
export const toParsedEdge = (neo4jEdge) => ({
|
|
156
|
+
id: neo4jEdge.id,
|
|
157
|
+
relationshipType: neo4jEdge.type,
|
|
158
|
+
sourceNodeId: neo4jEdge.startNodeId,
|
|
159
|
+
targetNodeId: neo4jEdge.endNodeId,
|
|
160
|
+
properties: neo4jEdge.properties,
|
|
161
|
+
});
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared Utilities
|
|
3
|
+
* Common interfaces, types, and helper functions used across dead code and duplicate code detection tools.
|
|
4
|
+
*/
|
|
5
|
+
import path from 'path';
|
|
6
|
+
// ============================================================================
|
|
7
|
+
// Helper Functions
|
|
8
|
+
// ============================================================================
|
|
9
|
+
/**
|
|
10
|
+
* Convert Neo4j value to JavaScript number.
|
|
11
|
+
* Handles both regular numbers and Neo4j Integer objects.
|
|
12
|
+
*/
|
|
13
|
+
export const toNumber = (value) => {
|
|
14
|
+
if (value === null || value === undefined) {
|
|
15
|
+
return 0;
|
|
16
|
+
}
|
|
17
|
+
if (typeof value === 'number') {
|
|
18
|
+
return value;
|
|
19
|
+
}
|
|
20
|
+
if (typeof value === 'object' && value !== null && 'toNumber' in value) {
|
|
21
|
+
return value.toNumber();
|
|
22
|
+
}
|
|
23
|
+
return 0;
|
|
24
|
+
};
|
|
25
|
+
/**
|
|
26
|
+
* Check if file path indicates a UI component.
|
|
27
|
+
* Must be in UI component directory AND be a React/Vue component file.
|
|
28
|
+
* Cross-platform: matches both / and \ path separators.
|
|
29
|
+
*/
|
|
30
|
+
export const isUIComponent = (filePath) => {
|
|
31
|
+
const isInUIDir = /[/\\](components[/\\]ui|ui[/\\]components)[/\\]/.test(filePath);
|
|
32
|
+
const isFrontendFile = /\.(tsx|jsx|vue)$/.test(filePath);
|
|
33
|
+
return isInUIDir && isFrontendFile;
|
|
34
|
+
};
|
|
35
|
+
/**
|
|
36
|
+
* Check if file is in a package directory (monorepo packages).
|
|
37
|
+
* Cross-platform: matches both / and \ path separators.
|
|
38
|
+
*/
|
|
39
|
+
export const isPackageExport = (filePath) => {
|
|
40
|
+
return /[/\\]packages[/\\][^/\\]+[/\\]/.test(filePath);
|
|
41
|
+
};
|
|
42
|
+
/**
|
|
43
|
+
* Extract monorepo app name from file path.
|
|
44
|
+
* Cross-platform: matches both / and \ path separators.
|
|
45
|
+
*/
|
|
46
|
+
export const getMonorepoAppName = (filePath) => {
|
|
47
|
+
const match = filePath.match(/[/\\](apps|packages)[/\\]([^/\\]+)[/\\]/);
|
|
48
|
+
return match ? match[2] : null;
|
|
49
|
+
};
|
|
50
|
+
/**
|
|
51
|
+
* Check if file matches exclusion pattern.
|
|
52
|
+
* Supports simple glob patterns starting with *.
|
|
53
|
+
*/
|
|
54
|
+
export const isExcludedByPattern = (filePath, patterns) => {
|
|
55
|
+
return patterns.some((pattern) => {
|
|
56
|
+
if (pattern.startsWith('*')) {
|
|
57
|
+
return filePath.endsWith(pattern.substring(1));
|
|
58
|
+
}
|
|
59
|
+
return filePath.endsWith(pattern);
|
|
60
|
+
});
|
|
61
|
+
};
|
|
62
|
+
/**
|
|
63
|
+
* Truncate source code to a maximum length.
|
|
64
|
+
* Useful for limiting response sizes.
|
|
65
|
+
*/
|
|
66
|
+
export const truncateSourceCode = (sourceCode, maxLength = 500) => {
|
|
67
|
+
if (!sourceCode)
|
|
68
|
+
return undefined;
|
|
69
|
+
return sourceCode.substring(0, maxLength);
|
|
70
|
+
};
|
|
71
|
+
/**
|
|
72
|
+
* Get shortened file path (last N segments).
|
|
73
|
+
* Useful for compact display.
|
|
74
|
+
* Cross-platform: uses path.sep for correct separator handling.
|
|
75
|
+
*/
|
|
76
|
+
export const getShortPath = (filePath, segments = 2) => {
|
|
77
|
+
const parts = filePath.split(path.sep);
|
|
78
|
+
return parts.slice(-segments).join(path.sep);
|
|
79
|
+
};
|