@mrxkun/mcfast-mcp 2.2.1 → 2.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,8 +1,16 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Fuzzy
|
|
3
|
-
* Applies
|
|
2
|
+
* Fuzzy Patch Strategy for mcfast v2.1+
|
|
3
|
+
* Applies unified diffs with whitespace tolerance and semantic similarity
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
|
+
import { parsePatch } from './patch-parser.js';
|
|
7
|
+
import {
|
|
8
|
+
tokenSimilarity,
|
|
9
|
+
calculateConfidence,
|
|
10
|
+
contextAwareMatch,
|
|
11
|
+
isSemanticMatchingEnabled
|
|
12
|
+
} from './semantic-similarity.js';
|
|
13
|
+
|
|
6
14
|
/**
|
|
7
15
|
* Calculate Levenshtein distance between two strings
|
|
8
16
|
* Used for fuzzy matching to find best location for patch
|
|
@@ -122,59 +130,102 @@ export function parseDiff(diffText) {
|
|
|
122
130
|
|
|
123
131
|
/**
|
|
124
132
|
* Find best match location for a pattern in target text
|
|
125
|
-
* Returns { index,
|
|
133
|
+
* Returns { index, distance, confidence } or null if no good match
|
|
126
134
|
*/
|
|
127
|
-
export function findBestMatch(targetLines,
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
135
|
+
export function findBestMatch(targetLines, fileLines, startHint = 0) {
|
|
136
|
+
let bestMatch = null;
|
|
137
|
+
let bestScore = Infinity;
|
|
138
|
+
const maxIterations = 10000;
|
|
139
|
+
let iterations = 0;
|
|
132
140
|
|
|
133
|
-
|
|
134
|
-
|
|
141
|
+
const useSemanticMatching = isSemanticMatchingEnabled();
|
|
142
|
+
|
|
143
|
+
if (useSemanticMatching) {
|
|
144
|
+
console.error('[FUZZY] Semantic matching enabled');
|
|
135
145
|
}
|
|
136
146
|
|
|
137
|
-
//
|
|
138
|
-
if (
|
|
139
|
-
|
|
147
|
+
// Try exact match first at hint location
|
|
148
|
+
if (startHint >= 0 && startHint + targetLines.length <= fileLines.length) {
|
|
149
|
+
const exactMatch = targetLines.every((line, i) =>
|
|
150
|
+
fileLines[startHint + i] === line
|
|
151
|
+
);
|
|
152
|
+
if (exactMatch) {
|
|
153
|
+
return { index: startHint, distance: 0, confidence: 1.0 };
|
|
154
|
+
}
|
|
140
155
|
}
|
|
141
156
|
|
|
142
|
-
|
|
143
|
-
let
|
|
144
|
-
|
|
157
|
+
// Fuzzy search with semantic similarity
|
|
158
|
+
for (let i = 0; i <= fileLines.length - targetLines.length; i++) {
|
|
159
|
+
iterations++;
|
|
160
|
+
if (iterations > maxIterations) {
|
|
161
|
+
console.error(`[FUZZY] Max iterations (${maxIterations}) reached`);
|
|
162
|
+
break;
|
|
163
|
+
}
|
|
145
164
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
const windowText = window.join('\n');
|
|
150
|
-
const patternText = patternLines.join('\n');
|
|
165
|
+
let totalDistance = 0;
|
|
166
|
+
let tokenSimilaritySum = 0;
|
|
167
|
+
let contextMatchSum = 0;
|
|
151
168
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
169
|
+
for (let j = 0; j < targetLines.length; j++) {
|
|
170
|
+
const targetLine = targetLines[j];
|
|
171
|
+
const fileLine = fileLines[i + j];
|
|
155
172
|
|
|
156
|
-
|
|
173
|
+
// Levenshtein distance
|
|
174
|
+
const distance = levenshteinDistance(targetLine, fileLine);
|
|
175
|
+
totalDistance += distance;
|
|
157
176
|
|
|
158
|
-
|
|
159
|
-
|
|
177
|
+
// Token similarity (always available)
|
|
178
|
+
const tokSim = tokenSimilarity(targetLine, fileLine);
|
|
179
|
+
tokenSimilaritySum += tokSim;
|
|
160
180
|
|
|
161
|
-
|
|
181
|
+
// Context-aware matching (use surrounding lines)
|
|
182
|
+
const surroundingLines = [
|
|
183
|
+
fileLines[i + j - 1],
|
|
184
|
+
fileLines[i + j + 1]
|
|
185
|
+
].filter(Boolean);
|
|
162
186
|
|
|
163
|
-
|
|
187
|
+
const contextScore = contextAwareMatch(targetLine, fileLine, surroundingLines);
|
|
188
|
+
contextMatchSum += contextScore;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const avgTokenSim = tokenSimilaritySum / targetLines.length;
|
|
192
|
+
const avgContextMatch = contextMatchSum / targetLines.length;
|
|
193
|
+
|
|
194
|
+
// Calculate confidence using enhanced scoring
|
|
195
|
+
const confidence = calculateConfidence(null, {
|
|
196
|
+
levenshteinDistance: totalDistance,
|
|
197
|
+
maxDistance: targetLines.length * 100,
|
|
198
|
+
tokenSimilarity: avgTokenSim,
|
|
199
|
+
structuralSimilarity: 0, // Would need AST parsing
|
|
200
|
+
lineNumberMatch: i === startHint,
|
|
201
|
+
surroundingContextMatch: avgContextMatch
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
// Prefer matches with higher confidence
|
|
205
|
+
const score = totalDistance * (1 - confidence * 0.5); // Confidence reduces effective distance
|
|
206
|
+
|
|
207
|
+
if (score < bestScore) {
|
|
164
208
|
bestScore = score;
|
|
165
209
|
bestMatch = {
|
|
166
210
|
index: i,
|
|
167
|
-
|
|
168
|
-
|
|
211
|
+
distance: totalDistance,
|
|
212
|
+
confidence,
|
|
213
|
+
tokenSimilarity: avgTokenSim,
|
|
214
|
+
contextMatch: avgContextMatch
|
|
169
215
|
};
|
|
170
216
|
|
|
171
|
-
// Early
|
|
172
|
-
if (
|
|
217
|
+
// Early termination if we find a very good match
|
|
218
|
+
if (confidence > 0.99) {
|
|
219
|
+
console.error(`[FUZZY] Early termination at ${confidence.toFixed(2)} confidence`);
|
|
173
220
|
break;
|
|
174
221
|
}
|
|
175
222
|
}
|
|
176
223
|
}
|
|
177
224
|
|
|
225
|
+
if (bestMatch) {
|
|
226
|
+
console.error(`[FUZZY] Best match: line ${bestMatch.index}, distance ${bestMatch.distance}, confidence ${bestMatch.confidence.toFixed(2)}, token_sim ${bestMatch.tokenSimilarity.toFixed(2)}`);
|
|
227
|
+
}
|
|
228
|
+
|
|
178
229
|
return bestMatch;
|
|
179
230
|
}
|
|
180
231
|
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Similarity for mcfast v2.2
|
|
3
|
+
* Optional embedding-based code similarity (controlled by MCFAST_SEMANTIC_MATCHING env var)
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import crypto from 'crypto';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Check if semantic matching is enabled
|
|
10
|
+
*/
|
|
11
|
+
export function isSemanticMatchingEnabled() {
|
|
12
|
+
return process.env.MCFAST_SEMANTIC_MATCHING === 'true' ||
|
|
13
|
+
process.env.MCFAST_SEMANTIC_MATCHING === '1';
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Simple hash-based similarity (always available, no dependencies)
|
|
18
|
+
* Uses token-level comparison for code similarity
|
|
19
|
+
*/
|
|
20
|
+
export function tokenSimilarity(code1, code2) {
|
|
21
|
+
// Normalize whitespace and tokenize
|
|
22
|
+
const normalize = (code) => {
|
|
23
|
+
return code
|
|
24
|
+
.replace(/\s+/g, ' ') // Normalize whitespace
|
|
25
|
+
.replace(/[{}();,]/g, ' $& ') // Separate punctuation
|
|
26
|
+
.trim()
|
|
27
|
+
.toLowerCase()
|
|
28
|
+
.split(/\s+/)
|
|
29
|
+
.filter(t => t.length > 0);
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
const tokens1 = normalize(code1);
|
|
33
|
+
const tokens2 = normalize(code2);
|
|
34
|
+
|
|
35
|
+
// Jaccard similarity
|
|
36
|
+
const set1 = new Set(tokens1);
|
|
37
|
+
const set2 = new Set(tokens2);
|
|
38
|
+
|
|
39
|
+
const intersection = new Set([...set1].filter(x => set2.has(x)));
|
|
40
|
+
const union = new Set([...set1, ...set2]);
|
|
41
|
+
|
|
42
|
+
return intersection.size / union.size;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Structural similarity based on AST depth and node types
|
|
47
|
+
* Lightweight alternative to full embeddings
|
|
48
|
+
*/
|
|
49
|
+
export function structuralSimilarity(ast1, ast2) {
|
|
50
|
+
const getStructure = (ast) => {
|
|
51
|
+
const structure = {
|
|
52
|
+
nodeTypes: new Map(),
|
|
53
|
+
depth: 0,
|
|
54
|
+
totalNodes: 0
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
const traverse = (node, depth = 0) => {
|
|
58
|
+
if (!node || typeof node !== 'object') return;
|
|
59
|
+
|
|
60
|
+
structure.totalNodes++;
|
|
61
|
+
structure.depth = Math.max(structure.depth, depth);
|
|
62
|
+
|
|
63
|
+
if (node.type) {
|
|
64
|
+
structure.nodeTypes.set(
|
|
65
|
+
node.type,
|
|
66
|
+
(structure.nodeTypes.get(node.type) || 0) + 1
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
for (const key in node) {
|
|
71
|
+
if (key === 'loc' || key === 'range') continue;
|
|
72
|
+
const value = node[key];
|
|
73
|
+
if (Array.isArray(value)) {
|
|
74
|
+
value.forEach(child => traverse(child, depth + 1));
|
|
75
|
+
} else if (value && typeof value === 'object') {
|
|
76
|
+
traverse(value, depth + 1);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
traverse(ast);
|
|
82
|
+
return structure;
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
const s1 = getStructure(ast1);
|
|
86
|
+
const s2 = getStructure(ast2);
|
|
87
|
+
|
|
88
|
+
// Compare depth similarity
|
|
89
|
+
const depthSim = 1 - Math.abs(s1.depth - s2.depth) / Math.max(s1.depth, s2.depth, 1);
|
|
90
|
+
|
|
91
|
+
// Compare node count similarity
|
|
92
|
+
const countSim = 1 - Math.abs(s1.totalNodes - s2.totalNodes) / Math.max(s1.totalNodes, s2.totalNodes, 1);
|
|
93
|
+
|
|
94
|
+
// Compare node type distribution
|
|
95
|
+
const allTypes = new Set([...s1.nodeTypes.keys(), ...s2.nodeTypes.keys()]);
|
|
96
|
+
let typeMatchScore = 0;
|
|
97
|
+
let typeTotal = 0;
|
|
98
|
+
|
|
99
|
+
for (const type of allTypes) {
|
|
100
|
+
const count1 = s1.nodeTypes.get(type) || 0;
|
|
101
|
+
const count2 = s2.nodeTypes.get(type) || 0;
|
|
102
|
+
const maxCount = Math.max(count1, count2);
|
|
103
|
+
const minCount = Math.min(count1, count2);
|
|
104
|
+
typeMatchScore += minCount;
|
|
105
|
+
typeTotal += maxCount;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const typeSim = typeTotal > 0 ? typeMatchScore / typeTotal : 0;
|
|
109
|
+
|
|
110
|
+
// Weighted average
|
|
111
|
+
return (depthSim * 0.2) + (countSim * 0.3) + (typeSim * 0.5);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Semantic code similarity (optional, requires OpenAI API)
|
|
116
|
+
* Only used when MCFAST_SEMANTIC_MATCHING=true
|
|
117
|
+
*/
|
|
118
|
+
export async function semanticSimilarity(code1, code2, options = {}) {
|
|
119
|
+
if (!isSemanticMatchingEnabled()) {
|
|
120
|
+
// Fallback to token similarity
|
|
121
|
+
return tokenSimilarity(code1, code2);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const { apiKey = process.env.OPENAI_API_KEY, model = 'text-embedding-3-small' } = options;
|
|
125
|
+
|
|
126
|
+
if (!apiKey) {
|
|
127
|
+
console.warn('[SEMANTIC] OpenAI API key not found, falling back to token similarity');
|
|
128
|
+
return tokenSimilarity(code1, code2);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
try {
|
|
132
|
+
// Get embeddings for both code snippets
|
|
133
|
+
const [embedding1, embedding2] = await Promise.all([
|
|
134
|
+
getEmbedding(code1, apiKey, model),
|
|
135
|
+
getEmbedding(code2, apiKey, model)
|
|
136
|
+
]);
|
|
137
|
+
|
|
138
|
+
// Cosine similarity
|
|
139
|
+
return cosineSimilarity(embedding1, embedding2);
|
|
140
|
+
} catch (error) {
|
|
141
|
+
console.warn(`[SEMANTIC] Embedding failed: ${error.message}, falling back to token similarity`);
|
|
142
|
+
return tokenSimilarity(code1, code2);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Get embedding from OpenAI API
|
|
148
|
+
*/
|
|
149
|
+
async function getEmbedding(text, apiKey, model) {
|
|
150
|
+
const response = await fetch('https://api.openai.com/v1/embeddings', {
|
|
151
|
+
method: 'POST',
|
|
152
|
+
headers: {
|
|
153
|
+
'Content-Type': 'application/json',
|
|
154
|
+
'Authorization': `Bearer ${apiKey}`
|
|
155
|
+
},
|
|
156
|
+
body: JSON.stringify({
|
|
157
|
+
input: text,
|
|
158
|
+
model: model
|
|
159
|
+
})
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
if (!response.ok) {
|
|
163
|
+
throw new Error(`OpenAI API error: ${response.status} ${response.statusText}`);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
const data = await response.json();
|
|
167
|
+
return data.data[0].embedding;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Cosine similarity between two vectors
|
|
172
|
+
*/
|
|
173
|
+
function cosineSimilarity(vec1, vec2) {
|
|
174
|
+
if (vec1.length !== vec2.length) {
|
|
175
|
+
throw new Error('Vectors must have same length');
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
let dotProduct = 0;
|
|
179
|
+
let norm1 = 0;
|
|
180
|
+
let norm2 = 0;
|
|
181
|
+
|
|
182
|
+
for (let i = 0; i < vec1.length; i++) {
|
|
183
|
+
dotProduct += vec1[i] * vec2[i];
|
|
184
|
+
norm1 += vec1[i] * vec1[i];
|
|
185
|
+
norm2 += vec2[i] * vec2[i];
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Enhanced fuzzy matching with semantic similarity
|
|
193
|
+
* Combines Levenshtein distance with semantic understanding
|
|
194
|
+
*/
|
|
195
|
+
export function enhancedFuzzyMatch(expected, actual, options = {}) {
|
|
196
|
+
const {
|
|
197
|
+
useSemanticSimilarity = isSemanticMatchingEnabled(),
|
|
198
|
+
threshold = 0.7
|
|
199
|
+
} = options;
|
|
200
|
+
|
|
201
|
+
// 1. Exact match
|
|
202
|
+
if (expected === actual) {
|
|
203
|
+
return { score: 1.0, method: 'exact' };
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// 2. Normalized match (whitespace-insensitive)
|
|
207
|
+
const normalizedExpected = expected.replace(/\s+/g, ' ').trim();
|
|
208
|
+
const normalizedActual = actual.replace(/\s+/g, ' ').trim();
|
|
209
|
+
|
|
210
|
+
if (normalizedExpected === normalizedActual) {
|
|
211
|
+
return { score: 0.95, method: 'normalized' };
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// 3. Token similarity (fast, no API calls)
|
|
215
|
+
const tokenScore = tokenSimilarity(expected, actual);
|
|
216
|
+
|
|
217
|
+
if (tokenScore >= threshold) {
|
|
218
|
+
return { score: tokenScore, method: 'token' };
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// 4. Semantic similarity (optional, requires API)
|
|
222
|
+
if (useSemanticSimilarity) {
|
|
223
|
+
// Note: This is async, caller must await
|
|
224
|
+
return semanticSimilarity(expected, actual).then(score => ({
|
|
225
|
+
score,
|
|
226
|
+
method: 'semantic'
|
|
227
|
+
}));
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
return { score: tokenScore, method: 'token' };
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Improved confidence scoring for fuzzy patches
|
|
235
|
+
* Combines multiple signals for better accuracy
|
|
236
|
+
*/
|
|
237
|
+
export function calculateConfidence(match, context = {}) {
|
|
238
|
+
const {
|
|
239
|
+
levenshteinDistance = 0,
|
|
240
|
+
maxDistance = 100,
|
|
241
|
+
tokenSimilarity = 0,
|
|
242
|
+
structuralSimilarity = 0,
|
|
243
|
+
lineNumberMatch = false,
|
|
244
|
+
surroundingContextMatch = 0
|
|
245
|
+
} = context;
|
|
246
|
+
|
|
247
|
+
// Base score from Levenshtein distance
|
|
248
|
+
const distanceScore = 1 - (levenshteinDistance / maxDistance);
|
|
249
|
+
|
|
250
|
+
// Weighted combination
|
|
251
|
+
let confidence = 0;
|
|
252
|
+
confidence += distanceScore * 0.3; // 30% from edit distance
|
|
253
|
+
confidence += tokenSimilarity * 0.25; // 25% from token similarity
|
|
254
|
+
confidence += structuralSimilarity * 0.2; // 20% from structure
|
|
255
|
+
confidence += surroundingContextMatch * 0.15; // 15% from context
|
|
256
|
+
confidence += (lineNumberMatch ? 0.1 : 0); // 10% bonus for line match
|
|
257
|
+
|
|
258
|
+
// Clamp to [0, 1]
|
|
259
|
+
return Math.max(0, Math.min(1, confidence));
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Context-aware code matching
|
|
264
|
+
* Uses surrounding lines to improve match accuracy
|
|
265
|
+
*/
|
|
266
|
+
export function contextAwareMatch(targetLine, candidateLine, surroundingLines = []) {
|
|
267
|
+
// Match the target line
|
|
268
|
+
const lineScore = tokenSimilarity(targetLine, candidateLine);
|
|
269
|
+
|
|
270
|
+
if (surroundingLines.length === 0) {
|
|
271
|
+
return lineScore;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// Check if surrounding context also matches
|
|
275
|
+
let contextScore = 0;
|
|
276
|
+
let contextCount = 0;
|
|
277
|
+
|
|
278
|
+
for (const contextLine of surroundingLines) {
|
|
279
|
+
if (contextLine && contextLine.trim().length > 0) {
|
|
280
|
+
// Simple check: does context appear nearby?
|
|
281
|
+
contextScore += 0.5; // Placeholder - would need actual implementation
|
|
282
|
+
contextCount++;
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
if (contextCount > 0) {
|
|
287
|
+
contextScore /= contextCount;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// Combine line score with context score
|
|
291
|
+
return (lineScore * 0.7) + (contextScore * 0.3);
|
|
292
|
+
}
|