@mrxkun/mcfast-mcp 2.2.0 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mrxkun/mcfast-mcp",
3
- "version": "2.2.0",
3
+ "version": "2.2.2",
4
4
  "description": "Ultra-fast code editing with fuzzy patching, auto-rollback, and 5 unified tools.",
5
5
  "type": "module",
6
6
  "bin": {
package/src/index.js CHANGED
@@ -23,6 +23,11 @@ import {
23
23
  inlineVariable,
24
24
  applyASTTransformation
25
25
  } from './strategies/ast-detector.js';
26
+ import {
27
+ detectCrossFileEdit,
28
+ batchRenameSymbol,
29
+ updateImportPaths
30
+ } from './strategies/multi-file-coordinator.js';
26
31
  import { safeEdit } from './utils/backup.js';
27
32
  import { formatError } from './utils/error-formatter.js';
28
33
 
@@ -500,6 +505,61 @@ async function handleReapply({ instruction, files, errorContext = "", attempt =
500
505
  * Auto-detects best strategy based on input
501
506
  */
502
507
  async function handleEdit({ instruction, files, code_edit, dryRun = false }) {
508
+ // Check for multi-file edits first
509
+ const multiFileEdit = detectCrossFileEdit(instruction, files);
510
+
511
+ if (multiFileEdit && Object.keys(files).length > 1) {
512
+ console.error(`${colors.cyan}[MULTI-FILE EDIT]${colors.reset} ${multiFileEdit.type} - ${multiFileEdit.reason}`);
513
+
514
+ try {
515
+ let result;
516
+
517
+ if (multiFileEdit.type === 'symbol_rename') {
518
+ result = await batchRenameSymbol(files, multiFileEdit.symbol, multiFileEdit.newSymbol);
519
+ } else if (multiFileEdit.type === 'dependency') {
520
+ // Extract old and new paths from instruction
521
+ const pathMatch = instruction.match(/from\s+['"]([^'"]+)['"]\s+to\s+['"]([^'"]+)['"]/i);
522
+ if (pathMatch) {
523
+ result = await updateImportPaths(files, pathMatch[1], pathMatch[2]);
524
+ } else {
525
+ throw new Error('Could not extract import paths from instruction');
526
+ }
527
+ } else {
528
+ throw new Error(`Unsupported multi-file edit type: ${multiFileEdit.type}`);
529
+ }
530
+
531
+ if (!result.success) {
532
+ return {
533
+ content: [{
534
+ type: "text",
535
+ text: formatError('generic', {
536
+ error: result.message || result.error
537
+ })
538
+ }],
539
+ isError: true
540
+ };
541
+ }
542
+
543
+ return {
544
+ content: [{
545
+ type: "text",
546
+ text: `✅ Multi-File Edit Successful\n\nType: ${multiFileEdit.type}\nFiles Modified: ${result.files.length}\n\nBackups created for all files.`
547
+ }]
548
+ };
549
+
550
+ } catch (error) {
551
+ return {
552
+ content: [{
553
+ type: "text",
554
+ text: formatError('generic', {
555
+ error: `Multi-file edit failed: ${error.message}`
556
+ })
557
+ }],
558
+ isError: true
559
+ };
560
+ }
561
+ }
562
+
503
563
  const strategy = detectEditStrategy({ instruction, code_edit, files });
504
564
 
505
565
  console.error(`${colors.cyan}[EDIT STRATEGY]${colors.reset} ${strategy}`);
@@ -1,8 +1,16 @@
1
1
  /**
2
- * Fuzzy Patching Engine for mcfast v2.1
3
- * Applies code changes with tolerance for whitespace and minor formatting differences
2
+ * Fuzzy Patch Strategy for mcfast v2.1+
3
+ * Applies unified diffs with whitespace tolerance and semantic similarity
4
4
  */
5
5
 
6
+ import { parsePatch } from './patch-parser.js';
7
+ import {
8
+ tokenSimilarity,
9
+ calculateConfidence,
10
+ contextAwareMatch,
11
+ isSemanticMatchingEnabled
12
+ } from './semantic-similarity.js';
13
+
6
14
  /**
7
15
  * Calculate Levenshtein distance between two strings
8
16
  * Used for fuzzy matching to find best location for patch
@@ -122,59 +130,102 @@ export function parseDiff(diffText) {
122
130
 
123
131
  /**
124
132
  * Find best match location for a pattern in target text
125
- * Returns { index, score, lineNumber } or null if no good match
133
+ * Returns { index, distance, confidence } or null if no good match
126
134
  */
127
- export function findBestMatch(targetLines, patternLines, threshold = 0.8) {
128
- // Input validation
129
- if (!targetLines || !patternLines || patternLines.length === 0) {
130
- return null;
131
- }
135
+ export function findBestMatch(targetLines, fileLines, startHint = 0) {
136
+ let bestMatch = null;
137
+ let bestScore = Infinity;
138
+ const maxIterations = 10000;
139
+ let iterations = 0;
132
140
 
133
- if (targetLines.length < patternLines.length) {
134
- return null;
141
+ const useSemanticMatching = isSemanticMatchingEnabled();
142
+
143
+ if (useSemanticMatching) {
144
+ console.error('[FUZZY] Semantic matching enabled');
135
145
  }
136
146
 
137
- // Performance limit: skip if pattern is too large (>500 lines)
138
- if (patternLines.length > 500) {
139
- return null;
147
+ // Try exact match first at hint location
148
+ if (startHint >= 0 && startHint + targetLines.length <= fileLines.length) {
149
+ const exactMatch = targetLines.every((line, i) =>
150
+ fileLines[startHint + i] === line
151
+ );
152
+ if (exactMatch) {
153
+ return { index: startHint, distance: 0, confidence: 1.0 };
154
+ }
140
155
  }
141
156
 
142
- let bestMatch = null;
143
- let bestScore = 0;
144
- const maxIterations = Math.min(targetLines.length - patternLines.length + 1, 10000);
157
+ // Fuzzy search with semantic similarity
158
+ for (let i = 0; i <= fileLines.length - targetLines.length; i++) {
159
+ iterations++;
160
+ if (iterations > maxIterations) {
161
+ console.error(`[FUZZY] Max iterations (${maxIterations}) reached`);
162
+ break;
163
+ }
145
164
 
146
- // Sliding window search with iteration limit
147
- for (let i = 0; i < maxIterations; i++) {
148
- const window = targetLines.slice(i, i + patternLines.length);
149
- const windowText = window.join('\n');
150
- const patternText = patternLines.join('\n');
165
+ let totalDistance = 0;
166
+ let tokenSimilaritySum = 0;
167
+ let contextMatchSum = 0;
151
168
 
152
- // Calculate max acceptable distance based on threshold
153
- const maxLen = Math.max(windowText.length, patternText.length);
154
- const maxDistance = Math.ceil(maxLen * (1 - threshold));
169
+ for (let j = 0; j < targetLines.length; j++) {
170
+ const targetLine = targetLines[j];
171
+ const fileLine = fileLines[i + j];
155
172
 
156
- const distance = levenshteinDistance(windowText, patternText, maxDistance);
173
+ // Levenshtein distance
174
+ const distance = levenshteinDistance(targetLine, fileLine);
175
+ totalDistance += distance;
157
176
 
158
- // Skip if distance exceeds threshold
159
- if (distance > maxDistance) continue;
177
+ // Token similarity (always available)
178
+ const tokSim = tokenSimilarity(targetLine, fileLine);
179
+ tokenSimilaritySum += tokSim;
160
180
 
161
- const score = 1.0 - (distance / maxLen);
181
+ // Context-aware matching (use surrounding lines)
182
+ const surroundingLines = [
183
+ fileLines[i + j - 1],
184
+ fileLines[i + j + 1]
185
+ ].filter(Boolean);
162
186
 
163
- if (score > bestScore && score >= threshold) {
187
+ const contextScore = contextAwareMatch(targetLine, fileLine, surroundingLines);
188
+ contextMatchSum += contextScore;
189
+ }
190
+
191
+ const avgTokenSim = tokenSimilaritySum / targetLines.length;
192
+ const avgContextMatch = contextMatchSum / targetLines.length;
193
+
194
+ // Calculate confidence using enhanced scoring
195
+ const confidence = calculateConfidence(null, {
196
+ levenshteinDistance: totalDistance,
197
+ maxDistance: targetLines.length * 100,
198
+ tokenSimilarity: avgTokenSim,
199
+ structuralSimilarity: 0, // Would need AST parsing
200
+ lineNumberMatch: i === startHint,
201
+ surroundingContextMatch: avgContextMatch
202
+ });
203
+
204
+ // Prefer matches with higher confidence
205
+ const score = totalDistance * (1 - confidence * 0.5); // Confidence reduces effective distance
206
+
207
+ if (score < bestScore) {
164
208
  bestScore = score;
165
209
  bestMatch = {
166
210
  index: i,
167
- score: score,
168
- lineNumber: i + 1
211
+ distance: totalDistance,
212
+ confidence,
213
+ tokenSimilarity: avgTokenSim,
214
+ contextMatch: avgContextMatch
169
215
  };
170
216
 
171
- // Early exit if perfect match found
172
- if (score >= 0.99) {
217
+ // Early termination if we find a very good match
218
+ if (confidence > 0.99) {
219
+ console.error(`[FUZZY] Early termination at ${confidence.toFixed(2)} confidence`);
173
220
  break;
174
221
  }
175
222
  }
176
223
  }
177
224
 
225
+ if (bestMatch) {
226
+ console.error(`[FUZZY] Best match: line ${bestMatch.index}, distance ${bestMatch.distance}, confidence ${bestMatch.confidence.toFixed(2)}, token_sim ${bestMatch.tokenSimilarity.toFixed(2)}`);
227
+ }
228
+
178
229
  return bestMatch;
179
230
  }
180
231
 
@@ -0,0 +1,298 @@
1
+ /**
2
+ * Multi-File Coordination for mcfast v2.2
3
+ * Enables cross-file edits with atomic rollback
4
+ */
5
+
6
+ import fs from 'fs/promises';
7
+ import path from 'path';
8
+ import { parseCode, renameIdentifier, findIdentifierUsages } from './ast-detector.js';
9
+ import { createBackup, restoreFromBackup, validateFile } from '../utils/backup.js';
10
+ import _generate from '@babel/generator';
11
+
12
+ // Handle default export from Babel (CommonJS compatibility)
13
+ const generate = _generate.default || _generate;
14
+
15
+ /**
16
+ * Detect if edit requires multiple files
17
+ */
18
+ export function detectCrossFileEdit(instruction, files) {
19
+ const fileKeys = Object.keys(files);
20
+
21
+ // Pattern 1: Explicit multi-file mention
22
+ if (/in\s+all\s+files|across\s+files|in\s+every|everywhere/i.test(instruction)) {
23
+ return {
24
+ type: 'broadcast',
25
+ files: fileKeys,
26
+ reason: 'Explicit multi-file instruction'
27
+ };
28
+ }
29
+
30
+ // Pattern 2: Import/Export changes
31
+ if (/change\s+import|update\s+import|fix\s+import|update\s+export/i.test(instruction)) {
32
+ return {
33
+ type: 'dependency',
34
+ files: fileKeys,
35
+ reason: 'Import/export modification'
36
+ };
37
+ }
38
+
39
+ // Pattern 3: Symbol rename (check if symbol exists in multiple files)
40
+ const renameMatch = instruction.match(/rename\s+(?:variable|function|class|const|let)?\s*["`']?(\w+)["`']?\s+to\s+["`']?(\w+)["`']?/i);
41
+ if (renameMatch && fileKeys.length > 1) {
42
+ const [, oldName, newName] = renameMatch;
43
+ const filesWithSymbol = findFilesUsingSymbol(files, oldName);
44
+
45
+ if (filesWithSymbol.length > 1) {
46
+ return {
47
+ type: 'symbol_rename',
48
+ symbol: oldName,
49
+ newSymbol: newName,
50
+ files: filesWithSymbol,
51
+ reason: `Symbol "${oldName}" found in ${filesWithSymbol.length} files`
52
+ };
53
+ }
54
+ }
55
+
56
+ return null;
57
+ }
58
+
59
+ /**
60
+ * Find files that use a specific symbol
61
+ */
62
+ export function findFilesUsingSymbol(files, symbolName) {
63
+ const filesWithSymbol = [];
64
+
65
+ for (const [filePath, content] of Object.entries(files)) {
66
+ // Quick text search first (performance optimization)
67
+ if (!content.includes(symbolName)) continue;
68
+
69
+ try {
70
+ const ast = parseCode(content, filePath);
71
+ const usages = findIdentifierUsages(ast, symbolName);
72
+
73
+ if (usages.length > 0) {
74
+ filesWithSymbol.push({
75
+ path: filePath,
76
+ usageCount: usages.length,
77
+ usages
78
+ });
79
+ }
80
+ } catch (error) {
81
+ // If AST parsing fails, fall back to text search
82
+ const regex = new RegExp(`\\b${symbolName}\\b`, 'g');
83
+ const matches = content.match(regex);
84
+ if (matches && matches.length > 0) {
85
+ filesWithSymbol.push({
86
+ path: filePath,
87
+ usageCount: matches.length,
88
+ usages: [],
89
+ parseError: error.message
90
+ });
91
+ }
92
+ }
93
+ }
94
+
95
+ return filesWithSymbol.map(f => f.path);
96
+ }
97
+
98
+ /**
99
+ * Find files that import from a specific file
100
+ */
101
+ export function findDependentFiles(files, targetFile) {
102
+ const dependents = [];
103
+ const targetBasename = path.basename(targetFile, path.extname(targetFile));
104
+
105
+ for (const [filePath, content] of Object.entries(files)) {
106
+ if (filePath === targetFile) continue;
107
+
108
+ // Check for imports from target file
109
+ const importPatterns = [
110
+ new RegExp(`from\\s+['"](\\.\\/|\\.\\.\\/)*${targetBasename}['"]`, 'g'),
111
+ new RegExp(`require\\(['"](\\.\\/|\\.\\.\\/)*${targetBasename}['"]\\)`, 'g'),
112
+ new RegExp(`import\\(['"](\\.\\/|\\.\\.\\/)*${targetBasename}['"]\\)`, 'g')
113
+ ];
114
+
115
+ const hasImport = importPatterns.some(pattern => pattern.test(content));
116
+ if (hasImport) {
117
+ dependents.push(filePath);
118
+ }
119
+ }
120
+
121
+ return dependents;
122
+ }
123
+
124
+ /**
125
+ * Atomic multi-file edit with rollback
126
+ */
127
+ export async function safeMultiFileEdit(fileEdits) {
128
+ const backups = new Map();
129
+ const completed = [];
130
+ const results = [];
131
+
132
+ try {
133
+ // Phase 1: Backup all files
134
+ console.error(`[MULTI-FILE] Backing up ${fileEdits.length} files...`);
135
+ for (const { filePath } of fileEdits) {
136
+ const backupPath = await createBackup(filePath);
137
+ backups.set(filePath, backupPath);
138
+ }
139
+
140
+ // Phase 2: Apply all edits
141
+ console.error(`[MULTI-FILE] Applying edits...`);
142
+ for (const { filePath, editFn } of fileEdits) {
143
+ try {
144
+ const result = await editFn();
145
+ completed.push(filePath);
146
+ results.push({ filePath, success: true, result });
147
+ } catch (error) {
148
+ throw new Error(`Edit failed for ${filePath}: ${error.message}`);
149
+ }
150
+ }
151
+
152
+ // Phase 3: Validate all files
153
+ console.error(`[MULTI-FILE] Validating ${completed.length} files...`);
154
+ for (const filePath of completed) {
155
+ const validation = await validateFile(filePath);
156
+ if (!validation.valid) {
157
+ throw new Error(`Validation failed for ${filePath}: ${validation.error}`);
158
+ }
159
+ }
160
+
161
+ console.error(`[MULTI-FILE] ✅ All ${completed.length} files edited successfully`);
162
+
163
+ return {
164
+ success: true,
165
+ files: completed,
166
+ results,
167
+ backups: Array.from(backups.entries())
168
+ };
169
+
170
+ } catch (error) {
171
+ // Rollback ALL files
172
+ console.error(`[MULTI-FILE] ❌ Error: ${error.message}`);
173
+ console.error(`[MULTI-FILE] Rolling back ${backups.size} files...`);
174
+
175
+ for (const [filePath, backupPath] of backups) {
176
+ try {
177
+ await restoreFromBackup(filePath, backupPath);
178
+ } catch (restoreError) {
179
+ console.error(`[MULTI-FILE] Failed to restore ${filePath}: ${restoreError.message}`);
180
+ }
181
+ }
182
+
183
+ return {
184
+ success: false,
185
+ error: error.message,
186
+ rolledBack: Array.from(backups.keys())
187
+ };
188
+ }
189
+ }
190
+
191
+ /**
192
+ * Batch rename symbol across multiple files
193
+ */
194
+ export async function batchRenameSymbol(files, oldName, newName) {
195
+ const fileEdits = [];
196
+
197
+ for (const [filePath, content] of Object.entries(files)) {
198
+ // Skip if symbol not in file
199
+ if (!content.includes(oldName)) continue;
200
+
201
+ fileEdits.push({
202
+ filePath,
203
+ editFn: async () => {
204
+ const ast = parseCode(content, filePath);
205
+ const result = renameIdentifier(ast, oldName, newName);
206
+
207
+ if (!result.success) {
208
+ throw new Error(result.message);
209
+ }
210
+
211
+ const output = generate(result.ast, {
212
+ retainLines: true,
213
+ comments: true
214
+ });
215
+
216
+ await fs.writeFile(filePath, output.code, 'utf8');
217
+
218
+ return {
219
+ count: result.count,
220
+ locations: result.locations
221
+ };
222
+ }
223
+ });
224
+ }
225
+
226
+ if (fileEdits.length === 0) {
227
+ return {
228
+ success: false,
229
+ message: `Symbol "${oldName}" not found in any files`
230
+ };
231
+ }
232
+
233
+ return await safeMultiFileEdit(fileEdits);
234
+ }
235
+
236
+ /**
237
+ * Update import paths across multiple files
238
+ */
239
+ export async function updateImportPaths(files, oldPath, newPath) {
240
+ const fileEdits = [];
241
+ const oldBasename = path.basename(oldPath, path.extname(oldPath));
242
+ const newBasename = path.basename(newPath, path.extname(newPath));
243
+
244
+ for (const [filePath, content] of Object.entries(files)) {
245
+ const hasImport = content.includes(oldBasename);
246
+ if (!hasImport) continue;
247
+
248
+ fileEdits.push({
249
+ filePath,
250
+ editFn: async () => {
251
+ // Replace import paths
252
+ let newContent = content;
253
+
254
+ // Handle various import formats
255
+ const patterns = [
256
+ {
257
+ regex: new RegExp(`from\\s+(['"])(\\.\\/|\\.\\.\\/)*${oldBasename}\\1`, 'g'),
258
+ replace: (match, quote, prefix) => `from ${quote}${prefix || './'}${newBasename}${quote}`
259
+ },
260
+ {
261
+ regex: new RegExp(`require\\((['"])(\\.\\/|\\.\\.\\/)*${oldBasename}\\1\\)`, 'g'),
262
+ replace: (match, quote, prefix) => `require(${quote}${prefix || './'}${newBasename}${quote})`
263
+ },
264
+ {
265
+ regex: new RegExp(`import\\((['"])(\\.\\/|\\.\\.\\/)*${oldBasename}\\1\\)`, 'g'),
266
+ replace: (match, quote, prefix) => `import(${quote}${prefix || './'}${newBasename}${quote})`
267
+ }
268
+ ];
269
+
270
+ let changeCount = 0;
271
+ for (const { regex, replace } of patterns) {
272
+ const matches = newContent.match(regex);
273
+ if (matches) {
274
+ changeCount += matches.length;
275
+ newContent = newContent.replace(regex, replace);
276
+ }
277
+ }
278
+
279
+ if (changeCount === 0) {
280
+ throw new Error('No import statements found to update');
281
+ }
282
+
283
+ await fs.writeFile(filePath, newContent, 'utf8');
284
+
285
+ return { changeCount };
286
+ }
287
+ });
288
+ }
289
+
290
+ if (fileEdits.length === 0) {
291
+ return {
292
+ success: false,
293
+ message: `No files import from "${oldPath}"`
294
+ };
295
+ }
296
+
297
+ return await safeMultiFileEdit(fileEdits);
298
+ }
@@ -0,0 +1,292 @@
1
+ /**
2
+ * Semantic Similarity for mcfast v2.2
3
+ * Optional embedding-based code similarity (controlled by MCFAST_SEMANTIC_MATCHING env var)
4
+ */
5
+
6
+ import crypto from 'crypto';
7
+
8
+ /**
9
+ * Check if semantic matching is enabled
10
+ */
11
+ export function isSemanticMatchingEnabled() {
12
+ return process.env.MCFAST_SEMANTIC_MATCHING === 'true' ||
13
+ process.env.MCFAST_SEMANTIC_MATCHING === '1';
14
+ }
15
+
16
+ /**
17
+ * Simple hash-based similarity (always available, no dependencies)
18
+ * Uses token-level comparison for code similarity
19
+ */
20
+ export function tokenSimilarity(code1, code2) {
21
+ // Normalize whitespace and tokenize
22
+ const normalize = (code) => {
23
+ return code
24
+ .replace(/\s+/g, ' ') // Normalize whitespace
25
+ .replace(/[{}();,]/g, ' $& ') // Separate punctuation
26
+ .trim()
27
+ .toLowerCase()
28
+ .split(/\s+/)
29
+ .filter(t => t.length > 0);
30
+ };
31
+
32
+ const tokens1 = normalize(code1);
33
+ const tokens2 = normalize(code2);
34
+
35
+ // Jaccard similarity
36
+ const set1 = new Set(tokens1);
37
+ const set2 = new Set(tokens2);
38
+
39
+ const intersection = new Set([...set1].filter(x => set2.has(x)));
40
+ const union = new Set([...set1, ...set2]);
41
+
42
+ return intersection.size / union.size;
43
+ }
44
+
45
+ /**
46
+ * Structural similarity based on AST depth and node types
47
+ * Lightweight alternative to full embeddings
48
+ */
49
+ export function structuralSimilarity(ast1, ast2) {
50
+ const getStructure = (ast) => {
51
+ const structure = {
52
+ nodeTypes: new Map(),
53
+ depth: 0,
54
+ totalNodes: 0
55
+ };
56
+
57
+ const traverse = (node, depth = 0) => {
58
+ if (!node || typeof node !== 'object') return;
59
+
60
+ structure.totalNodes++;
61
+ structure.depth = Math.max(structure.depth, depth);
62
+
63
+ if (node.type) {
64
+ structure.nodeTypes.set(
65
+ node.type,
66
+ (structure.nodeTypes.get(node.type) || 0) + 1
67
+ );
68
+ }
69
+
70
+ for (const key in node) {
71
+ if (key === 'loc' || key === 'range') continue;
72
+ const value = node[key];
73
+ if (Array.isArray(value)) {
74
+ value.forEach(child => traverse(child, depth + 1));
75
+ } else if (value && typeof value === 'object') {
76
+ traverse(value, depth + 1);
77
+ }
78
+ }
79
+ };
80
+
81
+ traverse(ast);
82
+ return structure;
83
+ };
84
+
85
+ const s1 = getStructure(ast1);
86
+ const s2 = getStructure(ast2);
87
+
88
+ // Compare depth similarity
89
+ const depthSim = 1 - Math.abs(s1.depth - s2.depth) / Math.max(s1.depth, s2.depth, 1);
90
+
91
+ // Compare node count similarity
92
+ const countSim = 1 - Math.abs(s1.totalNodes - s2.totalNodes) / Math.max(s1.totalNodes, s2.totalNodes, 1);
93
+
94
+ // Compare node type distribution
95
+ const allTypes = new Set([...s1.nodeTypes.keys(), ...s2.nodeTypes.keys()]);
96
+ let typeMatchScore = 0;
97
+ let typeTotal = 0;
98
+
99
+ for (const type of allTypes) {
100
+ const count1 = s1.nodeTypes.get(type) || 0;
101
+ const count2 = s2.nodeTypes.get(type) || 0;
102
+ const maxCount = Math.max(count1, count2);
103
+ const minCount = Math.min(count1, count2);
104
+ typeMatchScore += minCount;
105
+ typeTotal += maxCount;
106
+ }
107
+
108
+ const typeSim = typeTotal > 0 ? typeMatchScore / typeTotal : 0;
109
+
110
+ // Weighted average
111
+ return (depthSim * 0.2) + (countSim * 0.3) + (typeSim * 0.5);
112
+ }
113
+
114
+ /**
115
+ * Semantic code similarity (optional, requires OpenAI API)
116
+ * Only used when MCFAST_SEMANTIC_MATCHING=true
117
+ */
118
+ export async function semanticSimilarity(code1, code2, options = {}) {
119
+ if (!isSemanticMatchingEnabled()) {
120
+ // Fallback to token similarity
121
+ return tokenSimilarity(code1, code2);
122
+ }
123
+
124
+ const { apiKey = process.env.OPENAI_API_KEY, model = 'text-embedding-3-small' } = options;
125
+
126
+ if (!apiKey) {
127
+ console.warn('[SEMANTIC] OpenAI API key not found, falling back to token similarity');
128
+ return tokenSimilarity(code1, code2);
129
+ }
130
+
131
+ try {
132
+ // Get embeddings for both code snippets
133
+ const [embedding1, embedding2] = await Promise.all([
134
+ getEmbedding(code1, apiKey, model),
135
+ getEmbedding(code2, apiKey, model)
136
+ ]);
137
+
138
+ // Cosine similarity
139
+ return cosineSimilarity(embedding1, embedding2);
140
+ } catch (error) {
141
+ console.warn(`[SEMANTIC] Embedding failed: ${error.message}, falling back to token similarity`);
142
+ return tokenSimilarity(code1, code2);
143
+ }
144
+ }
145
+
146
+ /**
147
+ * Get embedding from OpenAI API
148
+ */
149
+ async function getEmbedding(text, apiKey, model) {
150
+ const response = await fetch('https://api.openai.com/v1/embeddings', {
151
+ method: 'POST',
152
+ headers: {
153
+ 'Content-Type': 'application/json',
154
+ 'Authorization': `Bearer ${apiKey}`
155
+ },
156
+ body: JSON.stringify({
157
+ input: text,
158
+ model: model
159
+ })
160
+ });
161
+
162
+ if (!response.ok) {
163
+ throw new Error(`OpenAI API error: ${response.status} ${response.statusText}`);
164
+ }
165
+
166
+ const data = await response.json();
167
+ return data.data[0].embedding;
168
+ }
169
+
170
+ /**
171
+ * Cosine similarity between two vectors
172
+ */
173
+ function cosineSimilarity(vec1, vec2) {
174
+ if (vec1.length !== vec2.length) {
175
+ throw new Error('Vectors must have same length');
176
+ }
177
+
178
+ let dotProduct = 0;
179
+ let norm1 = 0;
180
+ let norm2 = 0;
181
+
182
+ for (let i = 0; i < vec1.length; i++) {
183
+ dotProduct += vec1[i] * vec2[i];
184
+ norm1 += vec1[i] * vec1[i];
185
+ norm2 += vec2[i] * vec2[i];
186
+ }
187
+
188
+ return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
189
+ }
190
+
191
+ /**
192
+ * Enhanced fuzzy matching with semantic similarity
193
+ * Combines Levenshtein distance with semantic understanding
194
+ */
195
+ export function enhancedFuzzyMatch(expected, actual, options = {}) {
196
+ const {
197
+ useSemanticSimilarity = isSemanticMatchingEnabled(),
198
+ threshold = 0.7
199
+ } = options;
200
+
201
+ // 1. Exact match
202
+ if (expected === actual) {
203
+ return { score: 1.0, method: 'exact' };
204
+ }
205
+
206
+ // 2. Normalized match (whitespace-insensitive)
207
+ const normalizedExpected = expected.replace(/\s+/g, ' ').trim();
208
+ const normalizedActual = actual.replace(/\s+/g, ' ').trim();
209
+
210
+ if (normalizedExpected === normalizedActual) {
211
+ return { score: 0.95, method: 'normalized' };
212
+ }
213
+
214
+ // 3. Token similarity (fast, no API calls)
215
+ const tokenScore = tokenSimilarity(expected, actual);
216
+
217
+ if (tokenScore >= threshold) {
218
+ return { score: tokenScore, method: 'token' };
219
+ }
220
+
221
+ // 4. Semantic similarity (optional, requires API)
222
+ if (useSemanticSimilarity) {
223
+ // Note: This is async, caller must await
224
+ return semanticSimilarity(expected, actual).then(score => ({
225
+ score,
226
+ method: 'semantic'
227
+ }));
228
+ }
229
+
230
+ return { score: tokenScore, method: 'token' };
231
+ }
232
+
233
+ /**
234
+ * Improved confidence scoring for fuzzy patches
235
+ * Combines multiple signals for better accuracy
236
+ */
237
+ export function calculateConfidence(match, context = {}) {
238
+ const {
239
+ levenshteinDistance = 0,
240
+ maxDistance = 100,
241
+ tokenSimilarity = 0,
242
+ structuralSimilarity = 0,
243
+ lineNumberMatch = false,
244
+ surroundingContextMatch = 0
245
+ } = context;
246
+
247
+ // Base score from Levenshtein distance
248
+ const distanceScore = 1 - (levenshteinDistance / maxDistance);
249
+
250
+ // Weighted combination
251
+ let confidence = 0;
252
+ confidence += distanceScore * 0.3; // 30% from edit distance
253
+ confidence += tokenSimilarity * 0.25; // 25% from token similarity
254
+ confidence += structuralSimilarity * 0.2; // 20% from structure
255
+ confidence += surroundingContextMatch * 0.15; // 15% from context
256
+ confidence += (lineNumberMatch ? 0.1 : 0); // 10% bonus for line match
257
+
258
+ // Clamp to [0, 1]
259
+ return Math.max(0, Math.min(1, confidence));
260
+ }
261
+
262
+ /**
263
+ * Context-aware code matching
264
+ * Uses surrounding lines to improve match accuracy
265
+ */
266
+ export function contextAwareMatch(targetLine, candidateLine, surroundingLines = []) {
267
+ // Match the target line
268
+ const lineScore = tokenSimilarity(targetLine, candidateLine);
269
+
270
+ if (surroundingLines.length === 0) {
271
+ return lineScore;
272
+ }
273
+
274
+ // Check if surrounding context also matches
275
+ let contextScore = 0;
276
+ let contextCount = 0;
277
+
278
+ for (const contextLine of surroundingLines) {
279
+ if (contextLine && contextLine.trim().length > 0) {
280
+ // Simple check: does context appear nearby?
281
+ contextScore += 0.5; // Placeholder - would need actual implementation
282
+ contextCount++;
283
+ }
284
+ }
285
+
286
+ if (contextCount > 0) {
287
+ contextScore /= contextCount;
288
+ }
289
+
290
+ // Combine line score with context score
291
+ return (lineScore * 0.7) + (contextScore * 0.3);
292
+ }