npm - @mrxkun/mcfast-mcp - Versions diffs - 2.2.1 → 2.2.3 - Mend

@mrxkun/mcfast-mcp 2.2.1 → 2.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json +1 -1
package/src/strategies/fuzzy-patch.js +83 -33
package/src/strategies/semantic-similarity.js +292 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mrxkun/mcfast-mcp",
-  "version": "2.2.1",
+  "version": "2.2.3",
   "description": "Ultra-fast code editing with fuzzy patching, auto-rollback, and 5 unified tools.",
   "type": "module",
   "bin": {

package/src/strategies/fuzzy-patch.js CHANGED Viewed

@@ -1,8 +1,15 @@
 /**
- * Fuzzy Patching Engine for mcfast v2.1
- * Applies code changes with tolerance for whitespace and minor formatting differences
+ * Fuzzy Patch Strategy for mcfast v2.1+
+ * Applies unified diffs with whitespace tolerance and semantic similarity
  */
+import {
+    tokenSimilarity,
+    calculateConfidence,
+    contextAwareMatch,
+    isSemanticMatchingEnabled
+} from './semantic-similarity.js';
 /**
  * Calculate Levenshtein distance between two strings
  * Used for fuzzy matching to find best location for patch
@@ -122,59 +129,102 @@ export function parseDiff(diffText) {
 /**
  * Find best match location for a pattern in target text
- * Returns { index, score, lineNumber } or null if no good match
+ * Returns { index, distance, confidence } or null if no good match
  */
-export function findBestMatch(targetLines, patternLines, threshold = 0.8) {
-    // Input validation
-    if (!targetLines || !patternLines || patternLines.length === 0) {
-        return null;
-    }
+export function findBestMatch(targetLines, fileLines, startHint = 0) {
+    let bestMatch = null;
+    let bestScore = Infinity;
+    const maxIterations = 10000;
+    let iterations = 0;
-    if (targetLines.length < patternLines.length) {
-        return null;
+    const useSemanticMatching = isSemanticMatchingEnabled();
+    if (useSemanticMatching) {
+        console.error('[FUZZY] Semantic matching enabled');
     }
-    // Performance limit: skip if pattern is too large (>500 lines)
-    if (patternLines.length > 500) {
-        return null;
+    // Try exact match first at hint location
+    if (startHint >= 0 && startHint + targetLines.length <= fileLines.length) {
+        const exactMatch = targetLines.every((line, i) =>
+            fileLines[startHint + i] === line
+        );
+        if (exactMatch) {
+            return { index: startHint, distance: 0, confidence: 1.0 };
+        }
     }
-    let bestMatch = null;
-    let bestScore = 0;
-    const maxIterations = Math.min(targetLines.length - patternLines.length + 1, 10000);
+    // Fuzzy search with semantic similarity
+    for (let i = 0; i <= fileLines.length - targetLines.length; i++) {
+        iterations++;
+        if (iterations > maxIterations) {
+            console.error(`[FUZZY] Max iterations (${maxIterations}) reached`);
+            break;
+        }
-    // Sliding window search with iteration limit
-    for (let i = 0; i < maxIterations; i++) {
-        const window = targetLines.slice(i, i + patternLines.length);
-        const windowText = window.join('\n');
-        const patternText = patternLines.join('\n');
+        let totalDistance = 0;
+        let tokenSimilaritySum = 0;
+        let contextMatchSum = 0;
-        // Calculate max acceptable distance based on threshold
-        const maxLen = Math.max(windowText.length, patternText.length);
-        const maxDistance = Math.ceil(maxLen * (1 - threshold));
+        for (let j = 0; j < targetLines.length; j++) {
+            const targetLine = targetLines[j];
+            const fileLine = fileLines[i + j];
-        const distance = levenshteinDistance(windowText, patternText, maxDistance);
+            // Levenshtein distance
+            const distance = levenshteinDistance(targetLine, fileLine);
+            totalDistance += distance;
-        // Skip if distance exceeds threshold
-        if (distance > maxDistance) continue;
+            // Token similarity (always available)
+            const tokSim = tokenSimilarity(targetLine, fileLine);
+            tokenSimilaritySum += tokSim;
-        const score = 1.0 - (distance / maxLen);
+            // Context-aware matching (use surrounding lines)
+            const surroundingLines = [
+                fileLines[i + j - 1],
+                fileLines[i + j + 1]
+            ].filter(Boolean);
-        if (score > bestScore && score >= threshold) {
+            const contextScore = contextAwareMatch(targetLine, fileLine, surroundingLines);
+            contextMatchSum += contextScore;
+        }
+        const avgTokenSim = tokenSimilaritySum / targetLines.length;
+        const avgContextMatch = contextMatchSum / targetLines.length;
+        // Calculate confidence using enhanced scoring
+        const confidence = calculateConfidence(null, {
+            levenshteinDistance: totalDistance,
+            maxDistance: targetLines.length * 100,
+            tokenSimilarity: avgTokenSim,
+            structuralSimilarity: 0, // Would need AST parsing
+            lineNumberMatch: i === startHint,
+            surroundingContextMatch: avgContextMatch
+        });
+        // Prefer matches with higher confidence
+        const score = totalDistance * (1 - confidence * 0.5); // Confidence reduces effective distance
+        if (score < bestScore) {
             bestScore = score;
             bestMatch = {
                 index: i,
-                score: score,
-                lineNumber: i + 1
+                distance: totalDistance,
+                confidence,
+                tokenSimilarity: avgTokenSim,
+                contextMatch: avgContextMatch
             };
-            // Early exit if perfect match found
-            if (score >= 0.99) {
+            // Early termination if we find a very good match
+            if (confidence > 0.99) {
+                console.error(`[FUZZY] Early termination at ${confidence.toFixed(2)} confidence`);
                 break;
             }
         }
     }
+    if (bestMatch) {
+        console.error(`[FUZZY] Best match: line ${bestMatch.index}, distance ${bestMatch.distance}, confidence ${bestMatch.confidence.toFixed(2)}, token_sim ${bestMatch.tokenSimilarity.toFixed(2)}`);
+    }
     return bestMatch;
 }

package/src/strategies/semantic-similarity.js ADDED Viewed

@@ -0,0 +1,292 @@
+/**
+ * Semantic Similarity for mcfast v2.2
+ * Optional embedding-based code similarity (controlled by MCFAST_SEMANTIC_MATCHING env var)
+ */
+import crypto from 'crypto';
+/**
+ * Check if semantic matching is enabled
+ */
+export function isSemanticMatchingEnabled() {
+    return process.env.MCFAST_SEMANTIC_MATCHING === 'true' ||
+        process.env.MCFAST_SEMANTIC_MATCHING === '1';
+}
+/**
+ * Simple hash-based similarity (always available, no dependencies)
+ * Uses token-level comparison for code similarity
+ */
+export function tokenSimilarity(code1, code2) {
+    // Normalize whitespace and tokenize
+    const normalize = (code) => {
+        return code
+            .replace(/\s+/g, ' ')           // Normalize whitespace
+            .replace(/[{}();,]/g, ' $& ')   // Separate punctuation
+            .trim()
+            .toLowerCase()
+            .split(/\s+/)
+            .filter(t => t.length > 0);
+    };
+    const tokens1 = normalize(code1);
+    const tokens2 = normalize(code2);
+    // Jaccard similarity
+    const set1 = new Set(tokens1);
+    const set2 = new Set(tokens2);
+    const intersection = new Set([...set1].filter(x => set2.has(x)));
+    const union = new Set([...set1, ...set2]);
+    return intersection.size / union.size;
+}
+/**
+ * Structural similarity based on AST depth and node types
+ * Lightweight alternative to full embeddings
+ */
+export function structuralSimilarity(ast1, ast2) {
+    const getStructure = (ast) => {
+        const structure = {
+            nodeTypes: new Map(),
+            depth: 0,
+            totalNodes: 0
+        };
+        const traverse = (node, depth = 0) => {
+            if (!node || typeof node !== 'object') return;
+            structure.totalNodes++;
+            structure.depth = Math.max(structure.depth, depth);
+            if (node.type) {
+                structure.nodeTypes.set(
+                    node.type,
+                    (structure.nodeTypes.get(node.type) || 0) + 1
+                );
+            }
+            for (const key in node) {
+                if (key === 'loc' || key === 'range') continue;
+                const value = node[key];
+                if (Array.isArray(value)) {
+                    value.forEach(child => traverse(child, depth + 1));
+                } else if (value && typeof value === 'object') {
+                    traverse(value, depth + 1);
+                }
+            }
+        };
+        traverse(ast);
+        return structure;
+    };
+    const s1 = getStructure(ast1);
+    const s2 = getStructure(ast2);
+    // Compare depth similarity
+    const depthSim = 1 - Math.abs(s1.depth - s2.depth) / Math.max(s1.depth, s2.depth, 1);
+    // Compare node count similarity
+    const countSim = 1 - Math.abs(s1.totalNodes - s2.totalNodes) / Math.max(s1.totalNodes, s2.totalNodes, 1);
+    // Compare node type distribution
+    const allTypes = new Set([...s1.nodeTypes.keys(), ...s2.nodeTypes.keys()]);
+    let typeMatchScore = 0;
+    let typeTotal = 0;
+    for (const type of allTypes) {
+        const count1 = s1.nodeTypes.get(type) || 0;
+        const count2 = s2.nodeTypes.get(type) || 0;
+        const maxCount = Math.max(count1, count2);
+        const minCount = Math.min(count1, count2);
+        typeMatchScore += minCount;
+        typeTotal += maxCount;
+    }
+    const typeSim = typeTotal > 0 ? typeMatchScore / typeTotal : 0;
+    // Weighted average
+    return (depthSim * 0.2) + (countSim * 0.3) + (typeSim * 0.5);
+}
+/**
+ * Semantic code similarity (optional, requires OpenAI API)
+ * Only used when MCFAST_SEMANTIC_MATCHING=true
+ */
+export async function semanticSimilarity(code1, code2, options = {}) {
+    if (!isSemanticMatchingEnabled()) {
+        // Fallback to token similarity
+        return tokenSimilarity(code1, code2);
+    }
+    const { apiKey = process.env.OPENAI_API_KEY, model = 'text-embedding-3-small' } = options;
+    if (!apiKey) {
+        console.warn('[SEMANTIC] OpenAI API key not found, falling back to token similarity');
+        return tokenSimilarity(code1, code2);
+    }
+    try {
+        // Get embeddings for both code snippets
+        const [embedding1, embedding2] = await Promise.all([
+            getEmbedding(code1, apiKey, model),
+            getEmbedding(code2, apiKey, model)
+        ]);
+        // Cosine similarity
+        return cosineSimilarity(embedding1, embedding2);
+    } catch (error) {
+        console.warn(`[SEMANTIC] Embedding failed: ${error.message}, falling back to token similarity`);
+        return tokenSimilarity(code1, code2);
+    }
+}
+/**
+ * Get embedding from OpenAI API
+ */
+async function getEmbedding(text, apiKey, model) {
+    const response = await fetch('https://api.openai.com/v1/embeddings', {
+        method: 'POST',
+        headers: {
+            'Content-Type': 'application/json',
+            'Authorization': `Bearer ${apiKey}`
+        },
+        body: JSON.stringify({
+            input: text,
+            model: model
+        })
+    });
+    if (!response.ok) {
+        throw new Error(`OpenAI API error: ${response.status} ${response.statusText}`);
+    }
+    const data = await response.json();
+    return data.data[0].embedding;
+}
+/**
+ * Cosine similarity between two vectors
+ */
+function cosineSimilarity(vec1, vec2) {
+    if (vec1.length !== vec2.length) {
+        throw new Error('Vectors must have same length');
+    }
+    let dotProduct = 0;
+    let norm1 = 0;
+    let norm2 = 0;
+    for (let i = 0; i < vec1.length; i++) {
+        dotProduct += vec1[i] * vec2[i];
+        norm1 += vec1[i] * vec1[i];
+        norm2 += vec2[i] * vec2[i];
+    }
+    return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
+}
+/**
+ * Enhanced fuzzy matching with semantic similarity
+ * Combines Levenshtein distance with semantic understanding
+ */
+export function enhancedFuzzyMatch(expected, actual, options = {}) {
+    const {
+        useSemanticSimilarity = isSemanticMatchingEnabled(),
+        threshold = 0.7
+    } = options;
+    // 1. Exact match
+    if (expected === actual) {
+        return { score: 1.0, method: 'exact' };
+    }
+    // 2. Normalized match (whitespace-insensitive)
+    const normalizedExpected = expected.replace(/\s+/g, ' ').trim();
+    const normalizedActual = actual.replace(/\s+/g, ' ').trim();
+    if (normalizedExpected === normalizedActual) {
+        return { score: 0.95, method: 'normalized' };
+    }
+    // 3. Token similarity (fast, no API calls)
+    const tokenScore = tokenSimilarity(expected, actual);
+    if (tokenScore >= threshold) {
+        return { score: tokenScore, method: 'token' };
+    }
+    // 4. Semantic similarity (optional, requires API)
+    if (useSemanticSimilarity) {
+        // Note: This is async, caller must await
+        return semanticSimilarity(expected, actual).then(score => ({
+            score,
+            method: 'semantic'
+        }));
+    }
+    return { score: tokenScore, method: 'token' };
+}
+/**
+ * Improved confidence scoring for fuzzy patches
+ * Combines multiple signals for better accuracy
+ */
+export function calculateConfidence(match, context = {}) {
+    const {
+        levenshteinDistance = 0,
+        maxDistance = 100,
+        tokenSimilarity = 0,
+        structuralSimilarity = 0,
+        lineNumberMatch = false,
+        surroundingContextMatch = 0
+    } = context;
+    // Base score from Levenshtein distance
+    const distanceScore = 1 - (levenshteinDistance / maxDistance);
+    // Weighted combination
+    let confidence = 0;
+    confidence += distanceScore * 0.3;           // 30% from edit distance
+    confidence += tokenSimilarity * 0.25;        // 25% from token similarity
+    confidence += structuralSimilarity * 0.2;    // 20% from structure
+    confidence += surroundingContextMatch * 0.15; // 15% from context
+    confidence += (lineNumberMatch ? 0.1 : 0);   // 10% bonus for line match
+    // Clamp to [0, 1]
+    return Math.max(0, Math.min(1, confidence));
+}
+/**
+ * Context-aware code matching
+ * Uses surrounding lines to improve match accuracy
+ */
+export function contextAwareMatch(targetLine, candidateLine, surroundingLines = []) {
+    // Match the target line
+    const lineScore = tokenSimilarity(targetLine, candidateLine);
+    if (surroundingLines.length === 0) {
+        return lineScore;
+    }
+    // Check if surrounding context also matches
+    let contextScore = 0;
+    let contextCount = 0;
+    for (const contextLine of surroundingLines) {
+        if (contextLine && contextLine.trim().length > 0) {
+            // Simple check: does context appear nearby?
+            contextScore += 0.5; // Placeholder - would need actual implementation
+            contextCount++;
+        }
+    }
+    if (contextCount > 0) {
+        contextScore /= contextCount;
+    }
+    // Combine line score with context score
+    return (lineScore * 0.7) + (contextScore * 0.3);
+}