@mrxkun/mcfast-mcp 2.2.1 → 2.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,8 +1,15 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Fuzzy
|
|
3
|
-
* Applies
|
|
2
|
+
* Fuzzy Patch Strategy for mcfast v2.1+
|
|
3
|
+
* Applies unified diffs with whitespace tolerance and semantic similarity
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
|
+
import {
|
|
7
|
+
tokenSimilarity,
|
|
8
|
+
calculateConfidence,
|
|
9
|
+
contextAwareMatch,
|
|
10
|
+
isSemanticMatchingEnabled
|
|
11
|
+
} from './semantic-similarity.js';
|
|
12
|
+
|
|
6
13
|
/**
|
|
7
14
|
* Calculate Levenshtein distance between two strings
|
|
8
15
|
* Used for fuzzy matching to find best location for patch
|
|
@@ -122,59 +129,102 @@ export function parseDiff(diffText) {
|
|
|
122
129
|
|
|
123
130
|
/**
|
|
124
131
|
* Find best match location for a pattern in target text
|
|
125
|
-
* Returns { index,
|
|
132
|
+
* Returns { index, distance, confidence } or null if no good match
|
|
126
133
|
*/
|
|
127
|
-
export function findBestMatch(targetLines,
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
134
|
+
export function findBestMatch(targetLines, fileLines, startHint = 0) {
|
|
135
|
+
let bestMatch = null;
|
|
136
|
+
let bestScore = Infinity;
|
|
137
|
+
const maxIterations = 10000;
|
|
138
|
+
let iterations = 0;
|
|
132
139
|
|
|
133
|
-
|
|
134
|
-
|
|
140
|
+
const useSemanticMatching = isSemanticMatchingEnabled();
|
|
141
|
+
|
|
142
|
+
if (useSemanticMatching) {
|
|
143
|
+
console.error('[FUZZY] Semantic matching enabled');
|
|
135
144
|
}
|
|
136
145
|
|
|
137
|
-
//
|
|
138
|
-
if (
|
|
139
|
-
|
|
146
|
+
// Try exact match first at hint location
|
|
147
|
+
if (startHint >= 0 && startHint + targetLines.length <= fileLines.length) {
|
|
148
|
+
const exactMatch = targetLines.every((line, i) =>
|
|
149
|
+
fileLines[startHint + i] === line
|
|
150
|
+
);
|
|
151
|
+
if (exactMatch) {
|
|
152
|
+
return { index: startHint, distance: 0, confidence: 1.0 };
|
|
153
|
+
}
|
|
140
154
|
}
|
|
141
155
|
|
|
142
|
-
|
|
143
|
-
let
|
|
144
|
-
|
|
156
|
+
// Fuzzy search with semantic similarity
|
|
157
|
+
for (let i = 0; i <= fileLines.length - targetLines.length; i++) {
|
|
158
|
+
iterations++;
|
|
159
|
+
if (iterations > maxIterations) {
|
|
160
|
+
console.error(`[FUZZY] Max iterations (${maxIterations}) reached`);
|
|
161
|
+
break;
|
|
162
|
+
}
|
|
145
163
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
const windowText = window.join('\n');
|
|
150
|
-
const patternText = patternLines.join('\n');
|
|
164
|
+
let totalDistance = 0;
|
|
165
|
+
let tokenSimilaritySum = 0;
|
|
166
|
+
let contextMatchSum = 0;
|
|
151
167
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
168
|
+
for (let j = 0; j < targetLines.length; j++) {
|
|
169
|
+
const targetLine = targetLines[j];
|
|
170
|
+
const fileLine = fileLines[i + j];
|
|
155
171
|
|
|
156
|
-
|
|
172
|
+
// Levenshtein distance
|
|
173
|
+
const distance = levenshteinDistance(targetLine, fileLine);
|
|
174
|
+
totalDistance += distance;
|
|
157
175
|
|
|
158
|
-
|
|
159
|
-
|
|
176
|
+
// Token similarity (always available)
|
|
177
|
+
const tokSim = tokenSimilarity(targetLine, fileLine);
|
|
178
|
+
tokenSimilaritySum += tokSim;
|
|
160
179
|
|
|
161
|
-
|
|
180
|
+
// Context-aware matching (use surrounding lines)
|
|
181
|
+
const surroundingLines = [
|
|
182
|
+
fileLines[i + j - 1],
|
|
183
|
+
fileLines[i + j + 1]
|
|
184
|
+
].filter(Boolean);
|
|
162
185
|
|
|
163
|
-
|
|
186
|
+
const contextScore = contextAwareMatch(targetLine, fileLine, surroundingLines);
|
|
187
|
+
contextMatchSum += contextScore;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const avgTokenSim = tokenSimilaritySum / targetLines.length;
|
|
191
|
+
const avgContextMatch = contextMatchSum / targetLines.length;
|
|
192
|
+
|
|
193
|
+
// Calculate confidence using enhanced scoring
|
|
194
|
+
const confidence = calculateConfidence(null, {
|
|
195
|
+
levenshteinDistance: totalDistance,
|
|
196
|
+
maxDistance: targetLines.length * 100,
|
|
197
|
+
tokenSimilarity: avgTokenSim,
|
|
198
|
+
structuralSimilarity: 0, // Would need AST parsing
|
|
199
|
+
lineNumberMatch: i === startHint,
|
|
200
|
+
surroundingContextMatch: avgContextMatch
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
// Prefer matches with higher confidence
|
|
204
|
+
const score = totalDistance * (1 - confidence * 0.5); // Confidence reduces effective distance
|
|
205
|
+
|
|
206
|
+
if (score < bestScore) {
|
|
164
207
|
bestScore = score;
|
|
165
208
|
bestMatch = {
|
|
166
209
|
index: i,
|
|
167
|
-
|
|
168
|
-
|
|
210
|
+
distance: totalDistance,
|
|
211
|
+
confidence,
|
|
212
|
+
tokenSimilarity: avgTokenSim,
|
|
213
|
+
contextMatch: avgContextMatch
|
|
169
214
|
};
|
|
170
215
|
|
|
171
|
-
// Early
|
|
172
|
-
if (
|
|
216
|
+
// Early termination if we find a very good match
|
|
217
|
+
if (confidence > 0.99) {
|
|
218
|
+
console.error(`[FUZZY] Early termination at ${confidence.toFixed(2)} confidence`);
|
|
173
219
|
break;
|
|
174
220
|
}
|
|
175
221
|
}
|
|
176
222
|
}
|
|
177
223
|
|
|
224
|
+
if (bestMatch) {
|
|
225
|
+
console.error(`[FUZZY] Best match: line ${bestMatch.index}, distance ${bestMatch.distance}, confidence ${bestMatch.confidence.toFixed(2)}, token_sim ${bestMatch.tokenSimilarity.toFixed(2)}`);
|
|
226
|
+
}
|
|
227
|
+
|
|
178
228
|
return bestMatch;
|
|
179
229
|
}
|
|
180
230
|
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic Similarity for mcfast v2.2
|
|
3
|
+
* Optional embedding-based code similarity (controlled by MCFAST_SEMANTIC_MATCHING env var)
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import crypto from 'crypto';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Check if semantic matching is enabled
|
|
10
|
+
*/
|
|
11
|
+
export function isSemanticMatchingEnabled() {
|
|
12
|
+
return process.env.MCFAST_SEMANTIC_MATCHING === 'true' ||
|
|
13
|
+
process.env.MCFAST_SEMANTIC_MATCHING === '1';
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Simple hash-based similarity (always available, no dependencies)
|
|
18
|
+
* Uses token-level comparison for code similarity
|
|
19
|
+
*/
|
|
20
|
+
export function tokenSimilarity(code1, code2) {
|
|
21
|
+
// Normalize whitespace and tokenize
|
|
22
|
+
const normalize = (code) => {
|
|
23
|
+
return code
|
|
24
|
+
.replace(/\s+/g, ' ') // Normalize whitespace
|
|
25
|
+
.replace(/[{}();,]/g, ' $& ') // Separate punctuation
|
|
26
|
+
.trim()
|
|
27
|
+
.toLowerCase()
|
|
28
|
+
.split(/\s+/)
|
|
29
|
+
.filter(t => t.length > 0);
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
const tokens1 = normalize(code1);
|
|
33
|
+
const tokens2 = normalize(code2);
|
|
34
|
+
|
|
35
|
+
// Jaccard similarity
|
|
36
|
+
const set1 = new Set(tokens1);
|
|
37
|
+
const set2 = new Set(tokens2);
|
|
38
|
+
|
|
39
|
+
const intersection = new Set([...set1].filter(x => set2.has(x)));
|
|
40
|
+
const union = new Set([...set1, ...set2]);
|
|
41
|
+
|
|
42
|
+
return intersection.size / union.size;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Structural similarity based on AST depth and node types
|
|
47
|
+
* Lightweight alternative to full embeddings
|
|
48
|
+
*/
|
|
49
|
+
export function structuralSimilarity(ast1, ast2) {
|
|
50
|
+
const getStructure = (ast) => {
|
|
51
|
+
const structure = {
|
|
52
|
+
nodeTypes: new Map(),
|
|
53
|
+
depth: 0,
|
|
54
|
+
totalNodes: 0
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
const traverse = (node, depth = 0) => {
|
|
58
|
+
if (!node || typeof node !== 'object') return;
|
|
59
|
+
|
|
60
|
+
structure.totalNodes++;
|
|
61
|
+
structure.depth = Math.max(structure.depth, depth);
|
|
62
|
+
|
|
63
|
+
if (node.type) {
|
|
64
|
+
structure.nodeTypes.set(
|
|
65
|
+
node.type,
|
|
66
|
+
(structure.nodeTypes.get(node.type) || 0) + 1
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
for (const key in node) {
|
|
71
|
+
if (key === 'loc' || key === 'range') continue;
|
|
72
|
+
const value = node[key];
|
|
73
|
+
if (Array.isArray(value)) {
|
|
74
|
+
value.forEach(child => traverse(child, depth + 1));
|
|
75
|
+
} else if (value && typeof value === 'object') {
|
|
76
|
+
traverse(value, depth + 1);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
traverse(ast);
|
|
82
|
+
return structure;
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
const s1 = getStructure(ast1);
|
|
86
|
+
const s2 = getStructure(ast2);
|
|
87
|
+
|
|
88
|
+
// Compare depth similarity
|
|
89
|
+
const depthSim = 1 - Math.abs(s1.depth - s2.depth) / Math.max(s1.depth, s2.depth, 1);
|
|
90
|
+
|
|
91
|
+
// Compare node count similarity
|
|
92
|
+
const countSim = 1 - Math.abs(s1.totalNodes - s2.totalNodes) / Math.max(s1.totalNodes, s2.totalNodes, 1);
|
|
93
|
+
|
|
94
|
+
// Compare node type distribution
|
|
95
|
+
const allTypes = new Set([...s1.nodeTypes.keys(), ...s2.nodeTypes.keys()]);
|
|
96
|
+
let typeMatchScore = 0;
|
|
97
|
+
let typeTotal = 0;
|
|
98
|
+
|
|
99
|
+
for (const type of allTypes) {
|
|
100
|
+
const count1 = s1.nodeTypes.get(type) || 0;
|
|
101
|
+
const count2 = s2.nodeTypes.get(type) || 0;
|
|
102
|
+
const maxCount = Math.max(count1, count2);
|
|
103
|
+
const minCount = Math.min(count1, count2);
|
|
104
|
+
typeMatchScore += minCount;
|
|
105
|
+
typeTotal += maxCount;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const typeSim = typeTotal > 0 ? typeMatchScore / typeTotal : 0;
|
|
109
|
+
|
|
110
|
+
// Weighted average
|
|
111
|
+
return (depthSim * 0.2) + (countSim * 0.3) + (typeSim * 0.5);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Semantic code similarity (optional, requires OpenAI API)
|
|
116
|
+
* Only used when MCFAST_SEMANTIC_MATCHING=true
|
|
117
|
+
*/
|
|
118
|
+
export async function semanticSimilarity(code1, code2, options = {}) {
|
|
119
|
+
if (!isSemanticMatchingEnabled()) {
|
|
120
|
+
// Fallback to token similarity
|
|
121
|
+
return tokenSimilarity(code1, code2);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const { apiKey = process.env.OPENAI_API_KEY, model = 'text-embedding-3-small' } = options;
|
|
125
|
+
|
|
126
|
+
if (!apiKey) {
|
|
127
|
+
console.warn('[SEMANTIC] OpenAI API key not found, falling back to token similarity');
|
|
128
|
+
return tokenSimilarity(code1, code2);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
try {
|
|
132
|
+
// Get embeddings for both code snippets
|
|
133
|
+
const [embedding1, embedding2] = await Promise.all([
|
|
134
|
+
getEmbedding(code1, apiKey, model),
|
|
135
|
+
getEmbedding(code2, apiKey, model)
|
|
136
|
+
]);
|
|
137
|
+
|
|
138
|
+
// Cosine similarity
|
|
139
|
+
return cosineSimilarity(embedding1, embedding2);
|
|
140
|
+
} catch (error) {
|
|
141
|
+
console.warn(`[SEMANTIC] Embedding failed: ${error.message}, falling back to token similarity`);
|
|
142
|
+
return tokenSimilarity(code1, code2);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Get embedding from OpenAI API
|
|
148
|
+
*/
|
|
149
|
+
async function getEmbedding(text, apiKey, model) {
|
|
150
|
+
const response = await fetch('https://api.openai.com/v1/embeddings', {
|
|
151
|
+
method: 'POST',
|
|
152
|
+
headers: {
|
|
153
|
+
'Content-Type': 'application/json',
|
|
154
|
+
'Authorization': `Bearer ${apiKey}`
|
|
155
|
+
},
|
|
156
|
+
body: JSON.stringify({
|
|
157
|
+
input: text,
|
|
158
|
+
model: model
|
|
159
|
+
})
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
if (!response.ok) {
|
|
163
|
+
throw new Error(`OpenAI API error: ${response.status} ${response.statusText}`);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
const data = await response.json();
|
|
167
|
+
return data.data[0].embedding;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Cosine similarity between two vectors
|
|
172
|
+
*/
|
|
173
|
+
function cosineSimilarity(vec1, vec2) {
|
|
174
|
+
if (vec1.length !== vec2.length) {
|
|
175
|
+
throw new Error('Vectors must have same length');
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
let dotProduct = 0;
|
|
179
|
+
let norm1 = 0;
|
|
180
|
+
let norm2 = 0;
|
|
181
|
+
|
|
182
|
+
for (let i = 0; i < vec1.length; i++) {
|
|
183
|
+
dotProduct += vec1[i] * vec2[i];
|
|
184
|
+
norm1 += vec1[i] * vec1[i];
|
|
185
|
+
norm2 += vec2[i] * vec2[i];
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Enhanced fuzzy matching with semantic similarity
|
|
193
|
+
* Combines Levenshtein distance with semantic understanding
|
|
194
|
+
*/
|
|
195
|
+
export function enhancedFuzzyMatch(expected, actual, options = {}) {
|
|
196
|
+
const {
|
|
197
|
+
useSemanticSimilarity = isSemanticMatchingEnabled(),
|
|
198
|
+
threshold = 0.7
|
|
199
|
+
} = options;
|
|
200
|
+
|
|
201
|
+
// 1. Exact match
|
|
202
|
+
if (expected === actual) {
|
|
203
|
+
return { score: 1.0, method: 'exact' };
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// 2. Normalized match (whitespace-insensitive)
|
|
207
|
+
const normalizedExpected = expected.replace(/\s+/g, ' ').trim();
|
|
208
|
+
const normalizedActual = actual.replace(/\s+/g, ' ').trim();
|
|
209
|
+
|
|
210
|
+
if (normalizedExpected === normalizedActual) {
|
|
211
|
+
return { score: 0.95, method: 'normalized' };
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// 3. Token similarity (fast, no API calls)
|
|
215
|
+
const tokenScore = tokenSimilarity(expected, actual);
|
|
216
|
+
|
|
217
|
+
if (tokenScore >= threshold) {
|
|
218
|
+
return { score: tokenScore, method: 'token' };
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// 4. Semantic similarity (optional, requires API)
|
|
222
|
+
if (useSemanticSimilarity) {
|
|
223
|
+
// Note: This is async, caller must await
|
|
224
|
+
return semanticSimilarity(expected, actual).then(score => ({
|
|
225
|
+
score,
|
|
226
|
+
method: 'semantic'
|
|
227
|
+
}));
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
return { score: tokenScore, method: 'token' };
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Improved confidence scoring for fuzzy patches
|
|
235
|
+
* Combines multiple signals for better accuracy
|
|
236
|
+
*/
|
|
237
|
+
export function calculateConfidence(match, context = {}) {
|
|
238
|
+
const {
|
|
239
|
+
levenshteinDistance = 0,
|
|
240
|
+
maxDistance = 100,
|
|
241
|
+
tokenSimilarity = 0,
|
|
242
|
+
structuralSimilarity = 0,
|
|
243
|
+
lineNumberMatch = false,
|
|
244
|
+
surroundingContextMatch = 0
|
|
245
|
+
} = context;
|
|
246
|
+
|
|
247
|
+
// Base score from Levenshtein distance
|
|
248
|
+
const distanceScore = 1 - (levenshteinDistance / maxDistance);
|
|
249
|
+
|
|
250
|
+
// Weighted combination
|
|
251
|
+
let confidence = 0;
|
|
252
|
+
confidence += distanceScore * 0.3; // 30% from edit distance
|
|
253
|
+
confidence += tokenSimilarity * 0.25; // 25% from token similarity
|
|
254
|
+
confidence += structuralSimilarity * 0.2; // 20% from structure
|
|
255
|
+
confidence += surroundingContextMatch * 0.15; // 15% from context
|
|
256
|
+
confidence += (lineNumberMatch ? 0.1 : 0); // 10% bonus for line match
|
|
257
|
+
|
|
258
|
+
// Clamp to [0, 1]
|
|
259
|
+
return Math.max(0, Math.min(1, confidence));
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Context-aware code matching
|
|
264
|
+
* Uses surrounding lines to improve match accuracy
|
|
265
|
+
*/
|
|
266
|
+
export function contextAwareMatch(targetLine, candidateLine, surroundingLines = []) {
|
|
267
|
+
// Match the target line
|
|
268
|
+
const lineScore = tokenSimilarity(targetLine, candidateLine);
|
|
269
|
+
|
|
270
|
+
if (surroundingLines.length === 0) {
|
|
271
|
+
return lineScore;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// Check if surrounding context also matches
|
|
275
|
+
let contextScore = 0;
|
|
276
|
+
let contextCount = 0;
|
|
277
|
+
|
|
278
|
+
for (const contextLine of surroundingLines) {
|
|
279
|
+
if (contextLine && contextLine.trim().length > 0) {
|
|
280
|
+
// Simple check: does context appear nearby?
|
|
281
|
+
contextScore += 0.5; // Placeholder - would need actual implementation
|
|
282
|
+
contextCount++;
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
if (contextCount > 0) {
|
|
287
|
+
contextScore /= contextCount;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// Combine line score with context score
|
|
291
|
+
return (lineScore * 0.7) + (contextScore * 0.3);
|
|
292
|
+
}
|