@mrxkun/mcfast-mcp 2.2.1 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mrxkun/mcfast-mcp",
3
- "version": "2.2.1",
3
+ "version": "2.2.2",
4
4
  "description": "Ultra-fast code editing with fuzzy patching, auto-rollback, and 5 unified tools.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,8 +1,16 @@
1
1
  /**
2
- * Fuzzy Patching Engine for mcfast v2.1
3
- * Applies code changes with tolerance for whitespace and minor formatting differences
2
+ * Fuzzy Patch Strategy for mcfast v2.1+
3
+ * Applies unified diffs with whitespace tolerance and semantic similarity
4
4
  */
5
5
 
6
+ import { parsePatch } from './patch-parser.js';
7
+ import {
8
+ tokenSimilarity,
9
+ calculateConfidence,
10
+ contextAwareMatch,
11
+ isSemanticMatchingEnabled
12
+ } from './semantic-similarity.js';
13
+
6
14
  /**
7
15
  * Calculate Levenshtein distance between two strings
8
16
  * Used for fuzzy matching to find best location for patch
@@ -122,59 +130,102 @@ export function parseDiff(diffText) {
122
130
 
123
131
  /**
124
132
  * Find best match location for a pattern in target text
125
- * Returns { index, score, lineNumber } or null if no good match
133
+ * Returns { index, distance, confidence } or null if no good match
126
134
  */
127
- export function findBestMatch(targetLines, patternLines, threshold = 0.8) {
128
- // Input validation
129
- if (!targetLines || !patternLines || patternLines.length === 0) {
130
- return null;
131
- }
135
+ export function findBestMatch(targetLines, fileLines, startHint = 0) {
136
+ let bestMatch = null;
137
+ let bestScore = Infinity;
138
+ const maxIterations = 10000;
139
+ let iterations = 0;
132
140
 
133
- if (targetLines.length < patternLines.length) {
134
- return null;
141
+ const useSemanticMatching = isSemanticMatchingEnabled();
142
+
143
+ if (useSemanticMatching) {
144
+ console.error('[FUZZY] Semantic matching enabled');
135
145
  }
136
146
 
137
- // Performance limit: skip if pattern is too large (>500 lines)
138
- if (patternLines.length > 500) {
139
- return null;
147
+ // Try exact match first at hint location
148
+ if (startHint >= 0 && startHint + targetLines.length <= fileLines.length) {
149
+ const exactMatch = targetLines.every((line, i) =>
150
+ fileLines[startHint + i] === line
151
+ );
152
+ if (exactMatch) {
153
+ return { index: startHint, distance: 0, confidence: 1.0 };
154
+ }
140
155
  }
141
156
 
142
- let bestMatch = null;
143
- let bestScore = 0;
144
- const maxIterations = Math.min(targetLines.length - patternLines.length + 1, 10000);
157
+ // Fuzzy search with semantic similarity
158
+ for (let i = 0; i <= fileLines.length - targetLines.length; i++) {
159
+ iterations++;
160
+ if (iterations > maxIterations) {
161
+ console.error(`[FUZZY] Max iterations (${maxIterations}) reached`);
162
+ break;
163
+ }
145
164
 
146
- // Sliding window search with iteration limit
147
- for (let i = 0; i < maxIterations; i++) {
148
- const window = targetLines.slice(i, i + patternLines.length);
149
- const windowText = window.join('\n');
150
- const patternText = patternLines.join('\n');
165
+ let totalDistance = 0;
166
+ let tokenSimilaritySum = 0;
167
+ let contextMatchSum = 0;
151
168
 
152
- // Calculate max acceptable distance based on threshold
153
- const maxLen = Math.max(windowText.length, patternText.length);
154
- const maxDistance = Math.ceil(maxLen * (1 - threshold));
169
+ for (let j = 0; j < targetLines.length; j++) {
170
+ const targetLine = targetLines[j];
171
+ const fileLine = fileLines[i + j];
155
172
 
156
- const distance = levenshteinDistance(windowText, patternText, maxDistance);
173
+ // Levenshtein distance
174
+ const distance = levenshteinDistance(targetLine, fileLine);
175
+ totalDistance += distance;
157
176
 
158
- // Skip if distance exceeds threshold
159
- if (distance > maxDistance) continue;
177
+ // Token similarity (always available)
178
+ const tokSim = tokenSimilarity(targetLine, fileLine);
179
+ tokenSimilaritySum += tokSim;
160
180
 
161
- const score = 1.0 - (distance / maxLen);
181
+ // Context-aware matching (use surrounding lines)
182
+ const surroundingLines = [
183
+ fileLines[i + j - 1],
184
+ fileLines[i + j + 1]
185
+ ].filter(Boolean);
162
186
 
163
- if (score > bestScore && score >= threshold) {
187
+ const contextScore = contextAwareMatch(targetLine, fileLine, surroundingLines);
188
+ contextMatchSum += contextScore;
189
+ }
190
+
191
+ const avgTokenSim = tokenSimilaritySum / targetLines.length;
192
+ const avgContextMatch = contextMatchSum / targetLines.length;
193
+
194
+ // Calculate confidence using enhanced scoring
195
+ const confidence = calculateConfidence(null, {
196
+ levenshteinDistance: totalDistance,
197
+ maxDistance: targetLines.length * 100,
198
+ tokenSimilarity: avgTokenSim,
199
+ structuralSimilarity: 0, // Would need AST parsing
200
+ lineNumberMatch: i === startHint,
201
+ surroundingContextMatch: avgContextMatch
202
+ });
203
+
204
+ // Prefer matches with higher confidence
205
+ const score = totalDistance * (1 - confidence * 0.5); // Confidence reduces effective distance
206
+
207
+ if (score < bestScore) {
164
208
  bestScore = score;
165
209
  bestMatch = {
166
210
  index: i,
167
- score: score,
168
- lineNumber: i + 1
211
+ distance: totalDistance,
212
+ confidence,
213
+ tokenSimilarity: avgTokenSim,
214
+ contextMatch: avgContextMatch
169
215
  };
170
216
 
171
- // Early exit if perfect match found
172
- if (score >= 0.99) {
217
+ // Early termination if we find a very good match
218
+ if (confidence > 0.99) {
219
+ console.error(`[FUZZY] Early termination at ${confidence.toFixed(2)} confidence`);
173
220
  break;
174
221
  }
175
222
  }
176
223
  }
177
224
 
225
+ if (bestMatch) {
226
+ console.error(`[FUZZY] Best match: line ${bestMatch.index}, distance ${bestMatch.distance}, confidence ${bestMatch.confidence.toFixed(2)}, token_sim ${bestMatch.tokenSimilarity.toFixed(2)}`);
227
+ }
228
+
178
229
  return bestMatch;
179
230
  }
180
231
 
@@ -0,0 +1,292 @@
1
+ /**
2
+ * Semantic Similarity for mcfast v2.2
3
+ * Optional embedding-based code similarity (controlled by MCFAST_SEMANTIC_MATCHING env var)
4
+ */
5
+
6
+ import crypto from 'crypto';
7
+
8
+ /**
9
+ * Check if semantic matching is enabled
10
+ */
11
+ export function isSemanticMatchingEnabled() {
12
+ return process.env.MCFAST_SEMANTIC_MATCHING === 'true' ||
13
+ process.env.MCFAST_SEMANTIC_MATCHING === '1';
14
+ }
15
+
16
+ /**
17
+ * Simple hash-based similarity (always available, no dependencies)
18
+ * Uses token-level comparison for code similarity
19
+ */
20
+ export function tokenSimilarity(code1, code2) {
21
+ // Normalize whitespace and tokenize
22
+ const normalize = (code) => {
23
+ return code
24
+ .replace(/\s+/g, ' ') // Normalize whitespace
25
+ .replace(/[{}();,]/g, ' $& ') // Separate punctuation
26
+ .trim()
27
+ .toLowerCase()
28
+ .split(/\s+/)
29
+ .filter(t => t.length > 0);
30
+ };
31
+
32
+ const tokens1 = normalize(code1);
33
+ const tokens2 = normalize(code2);
34
+
35
+ // Jaccard similarity
36
+ const set1 = new Set(tokens1);
37
+ const set2 = new Set(tokens2);
38
+
39
+ const intersection = new Set([...set1].filter(x => set2.has(x)));
40
+ const union = new Set([...set1, ...set2]);
41
+
42
+ return intersection.size / union.size;
43
+ }
44
+
45
+ /**
46
+ * Structural similarity based on AST depth and node types
47
+ * Lightweight alternative to full embeddings
48
+ */
49
+ export function structuralSimilarity(ast1, ast2) {
50
+ const getStructure = (ast) => {
51
+ const structure = {
52
+ nodeTypes: new Map(),
53
+ depth: 0,
54
+ totalNodes: 0
55
+ };
56
+
57
+ const traverse = (node, depth = 0) => {
58
+ if (!node || typeof node !== 'object') return;
59
+
60
+ structure.totalNodes++;
61
+ structure.depth = Math.max(structure.depth, depth);
62
+
63
+ if (node.type) {
64
+ structure.nodeTypes.set(
65
+ node.type,
66
+ (structure.nodeTypes.get(node.type) || 0) + 1
67
+ );
68
+ }
69
+
70
+ for (const key in node) {
71
+ if (key === 'loc' || key === 'range') continue;
72
+ const value = node[key];
73
+ if (Array.isArray(value)) {
74
+ value.forEach(child => traverse(child, depth + 1));
75
+ } else if (value && typeof value === 'object') {
76
+ traverse(value, depth + 1);
77
+ }
78
+ }
79
+ };
80
+
81
+ traverse(ast);
82
+ return structure;
83
+ };
84
+
85
+ const s1 = getStructure(ast1);
86
+ const s2 = getStructure(ast2);
87
+
88
+ // Compare depth similarity
89
+ const depthSim = 1 - Math.abs(s1.depth - s2.depth) / Math.max(s1.depth, s2.depth, 1);
90
+
91
+ // Compare node count similarity
92
+ const countSim = 1 - Math.abs(s1.totalNodes - s2.totalNodes) / Math.max(s1.totalNodes, s2.totalNodes, 1);
93
+
94
+ // Compare node type distribution
95
+ const allTypes = new Set([...s1.nodeTypes.keys(), ...s2.nodeTypes.keys()]);
96
+ let typeMatchScore = 0;
97
+ let typeTotal = 0;
98
+
99
+ for (const type of allTypes) {
100
+ const count1 = s1.nodeTypes.get(type) || 0;
101
+ const count2 = s2.nodeTypes.get(type) || 0;
102
+ const maxCount = Math.max(count1, count2);
103
+ const minCount = Math.min(count1, count2);
104
+ typeMatchScore += minCount;
105
+ typeTotal += maxCount;
106
+ }
107
+
108
+ const typeSim = typeTotal > 0 ? typeMatchScore / typeTotal : 0;
109
+
110
+ // Weighted average
111
+ return (depthSim * 0.2) + (countSim * 0.3) + (typeSim * 0.5);
112
+ }
113
+
114
+ /**
115
+ * Semantic code similarity (optional, requires OpenAI API)
116
+ * Only used when MCFAST_SEMANTIC_MATCHING=true
117
+ */
118
+ export async function semanticSimilarity(code1, code2, options = {}) {
119
+ if (!isSemanticMatchingEnabled()) {
120
+ // Fallback to token similarity
121
+ return tokenSimilarity(code1, code2);
122
+ }
123
+
124
+ const { apiKey = process.env.OPENAI_API_KEY, model = 'text-embedding-3-small' } = options;
125
+
126
+ if (!apiKey) {
127
+ console.warn('[SEMANTIC] OpenAI API key not found, falling back to token similarity');
128
+ return tokenSimilarity(code1, code2);
129
+ }
130
+
131
+ try {
132
+ // Get embeddings for both code snippets
133
+ const [embedding1, embedding2] = await Promise.all([
134
+ getEmbedding(code1, apiKey, model),
135
+ getEmbedding(code2, apiKey, model)
136
+ ]);
137
+
138
+ // Cosine similarity
139
+ return cosineSimilarity(embedding1, embedding2);
140
+ } catch (error) {
141
+ console.warn(`[SEMANTIC] Embedding failed: ${error.message}, falling back to token similarity`);
142
+ return tokenSimilarity(code1, code2);
143
+ }
144
+ }
145
+
146
+ /**
147
+ * Get embedding from OpenAI API
148
+ */
149
+ async function getEmbedding(text, apiKey, model) {
150
+ const response = await fetch('https://api.openai.com/v1/embeddings', {
151
+ method: 'POST',
152
+ headers: {
153
+ 'Content-Type': 'application/json',
154
+ 'Authorization': `Bearer ${apiKey}`
155
+ },
156
+ body: JSON.stringify({
157
+ input: text,
158
+ model: model
159
+ })
160
+ });
161
+
162
+ if (!response.ok) {
163
+ throw new Error(`OpenAI API error: ${response.status} ${response.statusText}`);
164
+ }
165
+
166
+ const data = await response.json();
167
+ return data.data[0].embedding;
168
+ }
169
+
170
+ /**
171
+ * Cosine similarity between two vectors
172
+ */
173
+ function cosineSimilarity(vec1, vec2) {
174
+ if (vec1.length !== vec2.length) {
175
+ throw new Error('Vectors must have same length');
176
+ }
177
+
178
+ let dotProduct = 0;
179
+ let norm1 = 0;
180
+ let norm2 = 0;
181
+
182
+ for (let i = 0; i < vec1.length; i++) {
183
+ dotProduct += vec1[i] * vec2[i];
184
+ norm1 += vec1[i] * vec1[i];
185
+ norm2 += vec2[i] * vec2[i];
186
+ }
187
+
188
+ return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
189
+ }
190
+
191
+ /**
192
+ * Enhanced fuzzy matching with semantic similarity
193
+ * Combines Levenshtein distance with semantic understanding
194
+ */
195
+ export function enhancedFuzzyMatch(expected, actual, options = {}) {
196
+ const {
197
+ useSemanticSimilarity = isSemanticMatchingEnabled(),
198
+ threshold = 0.7
199
+ } = options;
200
+
201
+ // 1. Exact match
202
+ if (expected === actual) {
203
+ return { score: 1.0, method: 'exact' };
204
+ }
205
+
206
+ // 2. Normalized match (whitespace-insensitive)
207
+ const normalizedExpected = expected.replace(/\s+/g, ' ').trim();
208
+ const normalizedActual = actual.replace(/\s+/g, ' ').trim();
209
+
210
+ if (normalizedExpected === normalizedActual) {
211
+ return { score: 0.95, method: 'normalized' };
212
+ }
213
+
214
+ // 3. Token similarity (fast, no API calls)
215
+ const tokenScore = tokenSimilarity(expected, actual);
216
+
217
+ if (tokenScore >= threshold) {
218
+ return { score: tokenScore, method: 'token' };
219
+ }
220
+
221
+ // 4. Semantic similarity (optional, requires API)
222
+ if (useSemanticSimilarity) {
223
+ // Note: This is async, caller must await
224
+ return semanticSimilarity(expected, actual).then(score => ({
225
+ score,
226
+ method: 'semantic'
227
+ }));
228
+ }
229
+
230
+ return { score: tokenScore, method: 'token' };
231
+ }
232
+
233
+ /**
234
+ * Improved confidence scoring for fuzzy patches
235
+ * Combines multiple signals for better accuracy
236
+ */
237
+ export function calculateConfidence(match, context = {}) {
238
+ const {
239
+ levenshteinDistance = 0,
240
+ maxDistance = 100,
241
+ tokenSimilarity = 0,
242
+ structuralSimilarity = 0,
243
+ lineNumberMatch = false,
244
+ surroundingContextMatch = 0
245
+ } = context;
246
+
247
+ // Base score from Levenshtein distance
248
+ const distanceScore = 1 - (levenshteinDistance / maxDistance);
249
+
250
+ // Weighted combination
251
+ let confidence = 0;
252
+ confidence += distanceScore * 0.3; // 30% from edit distance
253
+ confidence += tokenSimilarity * 0.25; // 25% from token similarity
254
+ confidence += structuralSimilarity * 0.2; // 20% from structure
255
+ confidence += surroundingContextMatch * 0.15; // 15% from context
256
+ confidence += (lineNumberMatch ? 0.1 : 0); // 10% bonus for line match
257
+
258
+ // Clamp to [0, 1]
259
+ return Math.max(0, Math.min(1, confidence));
260
+ }
261
+
262
+ /**
263
+ * Context-aware code matching
264
+ * Uses surrounding lines to improve match accuracy
265
+ */
266
+ export function contextAwareMatch(targetLine, candidateLine, surroundingLines = []) {
267
+ // Match the target line
268
+ const lineScore = tokenSimilarity(targetLine, candidateLine);
269
+
270
+ if (surroundingLines.length === 0) {
271
+ return lineScore;
272
+ }
273
+
274
+ // Check if surrounding context also matches
275
+ let contextScore = 0;
276
+ let contextCount = 0;
277
+
278
+ for (const contextLine of surroundingLines) {
279
+ if (contextLine && contextLine.trim().length > 0) {
280
+ // Simple check: does context appear nearby?
281
+ contextScore += 0.5; // Placeholder - would need actual implementation
282
+ contextCount++;
283
+ }
284
+ }
285
+
286
+ if (contextCount > 0) {
287
+ contextScore /= contextCount;
288
+ }
289
+
290
+ // Combine line score with context score
291
+ return (lineScore * 0.7) + (contextScore * 0.3);
292
+ }