@mrxkun/mcfast-mcp 3.3.4 → 3.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/index.js +271 -153
- package/src/strategies/fuzzy-patch.js +143 -125
- package/src/strategies/tree-sitter/languages.js +40 -21
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -305,28 +305,53 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
|
305
305
|
};
|
|
306
306
|
});
|
|
307
307
|
|
|
308
|
-
// Helper for recursive file listing
|
|
309
|
-
async function getFiles(dir, depth = 5
|
|
310
|
-
|
|
308
|
+
// Helper for recursive file listing (v4.0 optimized with fast-glob)
|
|
309
|
+
async function getFiles(dir, depth = 5) {
|
|
310
|
+
const patterns = [];
|
|
311
311
|
|
|
312
|
-
|
|
313
|
-
|
|
312
|
+
for (let i = 1; i <= depth; i++) {
|
|
313
|
+
patterns.push('*'.repeat(i));
|
|
314
|
+
}
|
|
314
315
|
|
|
315
|
-
|
|
316
|
-
|
|
316
|
+
const gitignorePath = path.join(dir, '.gitignore');
|
|
317
|
+
let gitignoreContent = null;
|
|
318
|
+
try {
|
|
319
|
+
gitignoreContent = await fs.readFile(gitignorePath, 'utf8');
|
|
320
|
+
} catch {
|
|
321
|
+
// .gitignore not found, continue without it
|
|
322
|
+
}
|
|
317
323
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
324
|
+
const fastGlobOptions = {
|
|
325
|
+
cwd: dir,
|
|
326
|
+
onlyFiles: true,
|
|
327
|
+
onlyDirectories: false,
|
|
328
|
+
deep: depth,
|
|
329
|
+
ignore: [
|
|
330
|
+
'node_modules',
|
|
331
|
+
'.git',
|
|
332
|
+
'dist',
|
|
333
|
+
'build',
|
|
334
|
+
'.next',
|
|
335
|
+
'coverage',
|
|
336
|
+
'.cache',
|
|
337
|
+
'__pycache__',
|
|
338
|
+
'.venv',
|
|
339
|
+
'venv',
|
|
340
|
+
'node_modules/**',
|
|
341
|
+
'.git/**'
|
|
342
|
+
],
|
|
343
|
+
absolute: false
|
|
344
|
+
};
|
|
321
345
|
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
346
|
+
if (gitignoreContent) {
|
|
347
|
+
const ignorePatterns = gitignoreContent
|
|
348
|
+
.split('\n')
|
|
349
|
+
.map(l => l.trim())
|
|
350
|
+
.filter(l => l && !l.startsWith('#'));
|
|
351
|
+
fastGlobOptions.ignore.push(...ignorePatterns);
|
|
328
352
|
}
|
|
329
|
-
|
|
353
|
+
|
|
354
|
+
return await fg(patterns, fastGlobOptions);
|
|
330
355
|
}
|
|
331
356
|
|
|
332
357
|
/**
|
|
@@ -877,96 +902,120 @@ async function reportAudit(params) {
|
|
|
877
902
|
}
|
|
878
903
|
}
|
|
879
904
|
|
|
880
|
-
// Unified Search Implementation
|
|
905
|
+
// Unified Search Implementation (v4.0 - Early Termination with Stream)
|
|
881
906
|
async function handleSearchFilesystem({ query, path: searchPath = process.cwd(), include = "**/*", exclude = [], isRegex = false, caseSensitive = false }) {
|
|
882
907
|
const start = Date.now();
|
|
908
|
+
const MAX_RESULTS = 100;
|
|
909
|
+
const results = [];
|
|
910
|
+
let strategy = 'unknown';
|
|
911
|
+
|
|
883
912
|
try {
|
|
884
|
-
|
|
885
|
-
|
|
913
|
+
const { spawn } = await import('child_process');
|
|
914
|
+
const { promisify } = await import('util');
|
|
915
|
+
const sleep = promisify(setTimeout);
|
|
886
916
|
|
|
887
|
-
|
|
917
|
+
const escapedQuery = query.replace(/"/g, '\\"');
|
|
918
|
+
const caseFlag = caseSensitive ? '' : '-i';
|
|
919
|
+
const regexFlag = isRegex ? '-e' : '-F';
|
|
920
|
+
|
|
921
|
+
// Try ripgrep first with streaming and early termination
|
|
888
922
|
try {
|
|
889
|
-
const flags = [
|
|
890
|
-
"--json",
|
|
891
|
-
caseSensitive ? "-s" : "-i",
|
|
892
|
-
isRegex ? "-e" : "-F"
|
|
893
|
-
].join(" ");
|
|
894
|
-
// This is a simplified call; parsing JSON output from rg is best for structured data
|
|
895
|
-
// For now, we'll rely on a simpler text output for the LLM
|
|
896
|
-
const simpleFlags = [
|
|
897
|
-
"-n",
|
|
898
|
-
"--no-heading",
|
|
899
|
-
"--with-filename",
|
|
900
|
-
caseSensitive ? "-s" : "-i",
|
|
901
|
-
isRegex ? "-e" : "-F"
|
|
902
|
-
].join(" ");
|
|
903
|
-
|
|
904
|
-
const command = `rg ${simpleFlags} "${query.replace(/"/g, '\\"')}" ${searchPath}`;
|
|
905
|
-
const { stdout } = await execAsync(command, { maxBuffer: 10 * 1024 * 1024 });
|
|
906
|
-
results = stdout.trim().split('\n').filter(Boolean);
|
|
907
923
|
strategy = 'ripgrep';
|
|
924
|
+
const rgProcess = spawn('rg', [
|
|
925
|
+
'-n', '--no-heading', '--with-filename',
|
|
926
|
+
caseFlag, regexFlag,
|
|
927
|
+
escapedQuery,
|
|
928
|
+
searchPath
|
|
929
|
+
], {
|
|
930
|
+
stdio: ['ignore', 'pipe', 'pipe']
|
|
931
|
+
});
|
|
932
|
+
|
|
933
|
+
const readline = (await import('readline')).createInterface({
|
|
934
|
+
input: rgProcess.stdout,
|
|
935
|
+
crlfDelay: Infinity
|
|
936
|
+
});
|
|
937
|
+
|
|
938
|
+
for await (const line of readline) {
|
|
939
|
+
if (results.length >= MAX_RESULTS) {
|
|
940
|
+
rgProcess.kill();
|
|
941
|
+
break;
|
|
942
|
+
}
|
|
943
|
+
results.push(line);
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
rgProcess.stderr.on('data', () => { });
|
|
947
|
+
await new Promise(resolve => rgProcess.on('close', resolve));
|
|
948
|
+
|
|
949
|
+
if (results.length > 0 || rgProcess.exitCode === 0) {
|
|
950
|
+
return formatSearchResults(query, strategy, results, start, MAX_RESULTS);
|
|
951
|
+
}
|
|
908
952
|
} catch (rgErr) {
|
|
909
|
-
//
|
|
953
|
+
// Try git grep
|
|
910
954
|
try {
|
|
911
|
-
const flags = [
|
|
912
|
-
"-n",
|
|
913
|
-
"-I",
|
|
914
|
-
caseSensitive ? "" : "-i",
|
|
915
|
-
isRegex ? "-E" : "-F"
|
|
916
|
-
].filter(Boolean).join(" ");
|
|
917
|
-
const command = `git grep ${flags} "${query.replace(/"/g, '\\"')}" ${searchPath}`;
|
|
918
|
-
const { stdout } = await execAsync(command, { cwd: searchPath, maxBuffer: 10 * 1024 * 1024 });
|
|
919
|
-
results = stdout.trim().split('\n').filter(Boolean);
|
|
920
955
|
strategy = 'git_grep';
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
]
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
}
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
956
|
+
const gitProcess = spawn('git', [
|
|
957
|
+
'grep', '-n', '-I',
|
|
958
|
+
caseFlag ? '' : '-i',
|
|
959
|
+
regexFlag ? '-E' : '-F',
|
|
960
|
+
escapedQuery
|
|
961
|
+
], {
|
|
962
|
+
cwd: searchPath,
|
|
963
|
+
stdio: ['ignore', 'pipe', 'pipe']
|
|
964
|
+
});
|
|
965
|
+
|
|
966
|
+
const readline = (await import('readline')).createInterface({
|
|
967
|
+
input: gitProcess.stdout,
|
|
968
|
+
crlfDelay: Infinity
|
|
969
|
+
});
|
|
970
|
+
|
|
971
|
+
for await (const line of readline) {
|
|
972
|
+
if (results.length >= MAX_RESULTS) {
|
|
973
|
+
gitProcess.kill();
|
|
974
|
+
break;
|
|
975
|
+
}
|
|
976
|
+
results.push(line);
|
|
939
977
|
}
|
|
940
|
-
}
|
|
941
|
-
}
|
|
942
978
|
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
output += "No matches found.";
|
|
946
|
-
} else {
|
|
947
|
-
const limitedResults = results.slice(0, 100);
|
|
948
|
-
output += limitedResults.join('\n');
|
|
949
|
-
if (results.length > 100) output += `\n... and ${results.length - 100} more matches.`;
|
|
950
|
-
}
|
|
979
|
+
gitProcess.stderr.on('data', () => { });
|
|
980
|
+
await new Promise(resolve => gitProcess.on('close', resolve));
|
|
951
981
|
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
982
|
+
return formatSearchResults(query, strategy, results, start, MAX_RESULTS);
|
|
983
|
+
} catch (gitErr) {
|
|
984
|
+
// Fallback to native grep
|
|
985
|
+
strategy = 'native_grep';
|
|
986
|
+
const grepProcess = spawn('grep', [
|
|
987
|
+
'-r', '-n', '-I',
|
|
988
|
+
caseFlag ? '' : '-i',
|
|
989
|
+
regexFlag ? '-E' : '-F',
|
|
990
|
+
'--exclude-dir=node_modules', '--exclude-dir=.git',
|
|
991
|
+
'--exclude-dir=.next', '--exclude-dir=dist', '--exclude-dir=build',
|
|
992
|
+
escapedQuery,
|
|
993
|
+
searchPath
|
|
994
|
+
], {
|
|
995
|
+
stdio: ['ignore', 'pipe', 'pipe']
|
|
996
|
+
});
|
|
956
997
|
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
status: 'success',
|
|
962
|
-
latency_ms: Date.now() - start,
|
|
963
|
-
files_count: 0,
|
|
964
|
-
input_tokens: Math.ceil(query.length / 4), // Minimal input tokens for filesystem search
|
|
965
|
-
output_tokens: estimatedOutputTokens,
|
|
966
|
-
result_summary: JSON.stringify(results.slice(0, 100))
|
|
967
|
-
});
|
|
998
|
+
const readline = (await import('readline')).createInterface({
|
|
999
|
+
input: grepProcess.stdout,
|
|
1000
|
+
crlfDelay: Infinity
|
|
1001
|
+
});
|
|
968
1002
|
|
|
969
|
-
|
|
1003
|
+
for await (const line of readline) {
|
|
1004
|
+
if (results.length >= MAX_RESULTS) {
|
|
1005
|
+
grepProcess.kill();
|
|
1006
|
+
break;
|
|
1007
|
+
}
|
|
1008
|
+
results.push(line);
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
grepProcess.stderr.on('data', () => { });
|
|
1012
|
+
await new Promise(resolve => grepProcess.on('close', resolve));
|
|
1013
|
+
|
|
1014
|
+
return formatSearchResults(query, strategy, results, start, MAX_RESULTS);
|
|
1015
|
+
}
|
|
1016
|
+
}
|
|
1017
|
+
|
|
1018
|
+
return formatSearchResults(query, strategy, results, start, MAX_RESULTS);
|
|
970
1019
|
|
|
971
1020
|
} catch (error) {
|
|
972
1021
|
reportAudit({
|
|
@@ -985,6 +1034,35 @@ async function handleSearchFilesystem({ query, path: searchPath = process.cwd(),
|
|
|
985
1034
|
}
|
|
986
1035
|
}
|
|
987
1036
|
|
|
1037
|
+
function formatSearchResults(query, strategy, results, start, maxResults) {
|
|
1038
|
+
let output = `⚡ search_filesystem (${strategy}) found ${results.length} results for "${query}"\n\n`;
|
|
1039
|
+
|
|
1040
|
+
if (results.length === 0) {
|
|
1041
|
+
output += "No matches found.";
|
|
1042
|
+
} else {
|
|
1043
|
+
output += results.join('\n');
|
|
1044
|
+
if (results.length >= maxResults) {
|
|
1045
|
+
output += `\n... and more matches (early termination at ${maxResults}).`;
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
|
|
1049
|
+
const estimatedOutputTokens = Math.ceil(output.length / 4);
|
|
1050
|
+
|
|
1051
|
+
reportAudit({
|
|
1052
|
+
tool: 'search_filesystem',
|
|
1053
|
+
instruction: query,
|
|
1054
|
+
strategy,
|
|
1055
|
+
status: 'success',
|
|
1056
|
+
latency_ms: Date.now() - start,
|
|
1057
|
+
files_count: 0,
|
|
1058
|
+
input_tokens: Math.ceil(query.length / 4),
|
|
1059
|
+
output_tokens: estimatedOutputTokens,
|
|
1060
|
+
result_summary: JSON.stringify(results.slice(0, maxResults))
|
|
1061
|
+
});
|
|
1062
|
+
|
|
1063
|
+
return { content: [{ type: "text", text: output }] };
|
|
1064
|
+
}
|
|
1065
|
+
|
|
988
1066
|
// Native high-performance search
|
|
989
1067
|
async function handleWarpgrep({ query, include = ".", isRegex = false, caseSensitive = false }) {
|
|
990
1068
|
const start = Date.now();
|
|
@@ -1073,8 +1151,23 @@ async function handleSearchCode({ query, files, regex = false, caseSensitive = f
|
|
|
1073
1151
|
try {
|
|
1074
1152
|
const results = [];
|
|
1075
1153
|
let totalInputChars = 0;
|
|
1154
|
+
let lastYield = Date.now();
|
|
1155
|
+
const YIELD_INTERVAL_MS = 10;
|
|
1156
|
+
const YIELD_LINES = 1000;
|
|
1157
|
+
|
|
1158
|
+
const shouldYield = () => {
|
|
1159
|
+
const now = Date.now();
|
|
1160
|
+
if (now - lastYield > YIELD_INTERVAL_MS) {
|
|
1161
|
+
lastYield = now;
|
|
1162
|
+
return true;
|
|
1163
|
+
}
|
|
1164
|
+
return false;
|
|
1165
|
+
};
|
|
1166
|
+
|
|
1167
|
+
const yieldEventLoop = async () => {
|
|
1168
|
+
return new Promise(resolve => setImmediate(resolve));
|
|
1169
|
+
};
|
|
1076
1170
|
|
|
1077
|
-
// If regex mode, use original regex logic
|
|
1078
1171
|
if (regex) {
|
|
1079
1172
|
const flags = caseSensitive ? 'm' : 'im';
|
|
1080
1173
|
const pattern = new RegExp(query, flags);
|
|
@@ -1084,47 +1177,44 @@ async function handleSearchCode({ query, files, regex = false, caseSensitive = f
|
|
|
1084
1177
|
totalInputChars += content.length;
|
|
1085
1178
|
|
|
1086
1179
|
const lines = content.split('\n');
|
|
1087
|
-
lines.
|
|
1088
|
-
if (
|
|
1180
|
+
for (let i = 0; i < lines.length; i++) {
|
|
1181
|
+
if (shouldYield()) await yieldEventLoop();
|
|
1182
|
+
|
|
1183
|
+
if (pattern.test(lines[i])) {
|
|
1089
1184
|
pattern.lastIndex = 0;
|
|
1090
|
-
const startLine = Math.max(0,
|
|
1091
|
-
const endLine = Math.min(lines.length - 1,
|
|
1185
|
+
const startLine = Math.max(0, i - contextLines);
|
|
1186
|
+
const endLine = Math.min(lines.length - 1, i + contextLines);
|
|
1092
1187
|
|
|
1093
1188
|
const contextSnippet = lines
|
|
1094
1189
|
.slice(startLine, endLine + 1)
|
|
1095
|
-
.map((l,
|
|
1096
|
-
lineNumber: startLine +
|
|
1190
|
+
.map((l, idx) => ({
|
|
1191
|
+
lineNumber: startLine + idx + 1,
|
|
1097
1192
|
content: l,
|
|
1098
|
-
isMatch: startLine +
|
|
1193
|
+
isMatch: startLine + idx === i
|
|
1099
1194
|
}));
|
|
1100
1195
|
|
|
1101
1196
|
results.push({
|
|
1102
1197
|
file: filePath,
|
|
1103
|
-
lineNumber:
|
|
1104
|
-
matchedLine:
|
|
1198
|
+
lineNumber: i + 1,
|
|
1199
|
+
matchedLine: lines[i].trim(),
|
|
1105
1200
|
context: contextSnippet,
|
|
1106
1201
|
matchType: 'regex'
|
|
1107
1202
|
});
|
|
1108
1203
|
}
|
|
1109
|
-
}
|
|
1204
|
+
}
|
|
1110
1205
|
}
|
|
1111
1206
|
} else {
|
|
1112
|
-
// Semantic search with stop words filtering
|
|
1113
1207
|
const queryLower = query.toLowerCase();
|
|
1114
|
-
|
|
1115
|
-
// Common English stop words to filter out
|
|
1116
1208
|
const stopWords = new Set([
|
|
1117
1209
|
'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from', 'has', 'he',
|
|
1118
1210
|
'in', 'is', 'it', 'its', 'of', 'on', 'that', 'the', 'to', 'was', 'will', 'with',
|
|
1119
1211
|
'how', 'what', 'when', 'where', 'who', 'why', 'does', 'do', 'this', 'these', 'those'
|
|
1120
1212
|
]);
|
|
1121
1213
|
|
|
1122
|
-
// Extract significant words (3+ chars, not stop words)
|
|
1123
1214
|
const words = queryLower
|
|
1124
1215
|
.split(/\W+/)
|
|
1125
1216
|
.filter(w => w.length >= 3 && !stopWords.has(w));
|
|
1126
1217
|
|
|
1127
|
-
// If no significant words, fall back to whole query
|
|
1128
1218
|
const searchTerms = words.length > 0 ? words : [queryLower];
|
|
1129
1219
|
|
|
1130
1220
|
for (const [filePath, content] of Object.entries(files)) {
|
|
@@ -1132,45 +1222,40 @@ async function handleSearchCode({ query, files, regex = false, caseSensitive = f
|
|
|
1132
1222
|
totalInputChars += content.length;
|
|
1133
1223
|
|
|
1134
1224
|
const lines = content.split('\n');
|
|
1135
|
-
lines.
|
|
1225
|
+
for (let i = 0; i < lines.length; i++) {
|
|
1226
|
+
if (shouldYield()) await yieldEventLoop();
|
|
1227
|
+
|
|
1136
1228
|
const lineLower = caseSensitive ? line : line.toLowerCase();
|
|
1137
1229
|
const searchQuery = caseSensitive ? query : queryLower;
|
|
1138
|
-
|
|
1139
|
-
// Check 1: Exact phrase match (highest priority)
|
|
1140
1230
|
const exactMatch = lineLower.includes(searchQuery);
|
|
1141
|
-
|
|
1142
|
-
// Check 2: All significant words present (semantic match)
|
|
1143
1231
|
const allWordsMatch = searchTerms.every(term => lineLower.includes(term));
|
|
1144
|
-
|
|
1145
|
-
// Check 3: At least half of significant words present (fuzzy match)
|
|
1146
1232
|
const matchCount = searchTerms.filter(term => lineLower.includes(term)).length;
|
|
1147
1233
|
const fuzzyMatch = matchCount >= Math.ceil(searchTerms.length / 2);
|
|
1148
1234
|
|
|
1149
1235
|
if (exactMatch || allWordsMatch || (searchTerms.length > 1 && fuzzyMatch)) {
|
|
1150
|
-
const startLine = Math.max(0,
|
|
1151
|
-
const endLine = Math.min(lines.length - 1,
|
|
1236
|
+
const startLine = Math.max(0, i - contextLines);
|
|
1237
|
+
const endLine = Math.min(lines.length - 1, i + contextLines);
|
|
1152
1238
|
|
|
1153
1239
|
const contextSnippet = lines
|
|
1154
1240
|
.slice(startLine, endLine + 1)
|
|
1155
|
-
.map((l,
|
|
1156
|
-
lineNumber: startLine +
|
|
1241
|
+
.map((l, idx) => ({
|
|
1242
|
+
lineNumber: startLine + idx + 1,
|
|
1157
1243
|
content: l,
|
|
1158
|
-
isMatch: startLine +
|
|
1244
|
+
isMatch: startLine + idx === i
|
|
1159
1245
|
}));
|
|
1160
1246
|
|
|
1161
1247
|
results.push({
|
|
1162
1248
|
file: filePath,
|
|
1163
|
-
lineNumber:
|
|
1249
|
+
lineNumber: i + 1,
|
|
1164
1250
|
matchedLine: line.trim(),
|
|
1165
1251
|
context: contextSnippet,
|
|
1166
1252
|
matchType: exactMatch ? 'exact' : allWordsMatch ? 'semantic' : 'fuzzy',
|
|
1167
1253
|
matchScore: exactMatch ? 100 : allWordsMatch ? 80 : matchCount * 10
|
|
1168
1254
|
});
|
|
1169
1255
|
}
|
|
1170
|
-
}
|
|
1256
|
+
}
|
|
1171
1257
|
}
|
|
1172
1258
|
|
|
1173
|
-
// Sort results: by score (highest first), then by file
|
|
1174
1259
|
results.sort((a, b) => {
|
|
1175
1260
|
if (a.matchScore !== b.matchScore) {
|
|
1176
1261
|
return b.matchScore - a.matchScore;
|
|
@@ -1226,18 +1311,16 @@ async function handleListFiles({ path: dirPath = process.cwd(), depth = 5 }) {
|
|
|
1226
1311
|
const start = Date.now();
|
|
1227
1312
|
try {
|
|
1228
1313
|
const files = await getFiles(dirPath, depth);
|
|
1229
|
-
// Return relative paths to save tokens
|
|
1230
|
-
const relativeFiles = files.map(f => path.relative(dirPath, f));
|
|
1231
1314
|
|
|
1232
|
-
const output = `📁 Files in ${dirPath}:\n\n${
|
|
1315
|
+
const output = `📁 Files in ${dirPath}:\n\n${files.join('\n')}`;
|
|
1233
1316
|
|
|
1234
1317
|
reportAudit({
|
|
1235
1318
|
tool: 'list_files_fast',
|
|
1236
1319
|
instruction: dirPath,
|
|
1237
1320
|
status: 'success',
|
|
1238
1321
|
latency_ms: Date.now() - start,
|
|
1239
|
-
files_count:
|
|
1240
|
-
result_summary: JSON.stringify(
|
|
1322
|
+
files_count: files.length,
|
|
1323
|
+
result_summary: JSON.stringify(files.slice(0, 500)),
|
|
1241
1324
|
input_tokens: Math.ceil(dirPath.length / 4),
|
|
1242
1325
|
output_tokens: Math.ceil(output.length / 4)
|
|
1243
1326
|
});
|
|
@@ -1302,41 +1385,76 @@ async function handleEditFile({ path: filePath, content, instruction = "" }) {
|
|
|
1302
1385
|
async function handleReadFile({ path: filePath, start_line, end_line }) {
|
|
1303
1386
|
const start = Date.now();
|
|
1304
1387
|
try {
|
|
1305
|
-
// Resolve absolute path
|
|
1306
1388
|
const absolutePath = path.resolve(filePath);
|
|
1307
|
-
|
|
1308
|
-
// Check if file exists and is a file
|
|
1309
1389
|
const stats = await fs.stat(absolutePath);
|
|
1390
|
+
|
|
1310
1391
|
if (!stats.isFile()) {
|
|
1311
1392
|
throw new Error(`Path is not a file: ${absolutePath}`);
|
|
1312
1393
|
}
|
|
1313
1394
|
|
|
1314
|
-
//
|
|
1315
|
-
const
|
|
1395
|
+
const STREAM_THRESHOLD = 1024 * 1024; // 1MB - files larger than this use streaming
|
|
1396
|
+
const LINE_RANGE_THRESHOLD = 50000; // If requesting specific lines and file is large, stream
|
|
1316
1397
|
|
|
1317
|
-
|
|
1318
|
-
|
|
1398
|
+
let startLine = start_line ? parseInt(start_line) : 1;
|
|
1399
|
+
let endLine = end_line ? parseInt(end_line) : -1;
|
|
1400
|
+
let outputContent;
|
|
1401
|
+
let totalLines;
|
|
1319
1402
|
|
|
1320
|
-
|
|
1321
|
-
|
|
1403
|
+
if ((stats.size > STREAM_THRESHOLD && (start_line || end_line)) || stats.size > 10 * 1024 * 1024) {
|
|
1404
|
+
const { Readable } = await import('stream');
|
|
1405
|
+
const { createInterface } = await import('readline');
|
|
1322
1406
|
|
|
1323
|
-
|
|
1324
|
-
|
|
1407
|
+
let currentLine = 0;
|
|
1408
|
+
const lines = [];
|
|
1325
1409
|
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1410
|
+
const stream = (await import('fs')).createReadStream(absolutePath, { encoding: 'utf8' });
|
|
1411
|
+
const rl = createInterface({ input: stream, crlfDelay: Infinity });
|
|
1412
|
+
|
|
1413
|
+
for await (const line of rl) {
|
|
1414
|
+
currentLine++;
|
|
1415
|
+
if (startLine && endLine) {
|
|
1416
|
+
if (currentLine >= startLine && currentLine <= endLine) {
|
|
1417
|
+
lines.push(line);
|
|
1418
|
+
}
|
|
1419
|
+
if (currentLine >= endLine) break;
|
|
1420
|
+
} else if (startLine && currentLine >= startLine) {
|
|
1421
|
+
lines.push(line);
|
|
1422
|
+
} else if (lines.length < 2000) {
|
|
1423
|
+
lines.push(line);
|
|
1424
|
+
} else {
|
|
1425
|
+
break;
|
|
1426
|
+
}
|
|
1427
|
+
}
|
|
1332
1428
|
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1429
|
+
stream.destroy();
|
|
1430
|
+
outputContent = lines.join('\n');
|
|
1431
|
+
totalLines = currentLine;
|
|
1432
|
+
|
|
1433
|
+
if (startLine && endLine) {
|
|
1434
|
+
lineRangeInfo = `(Lines ${startLine}-${endLine} of ${totalLines})`;
|
|
1435
|
+
} else if (startLine) {
|
|
1436
|
+
lineRangeInfo = `(Lines ${startLine}-${currentLine} of ${totalLines})`;
|
|
1437
|
+
} else {
|
|
1438
|
+
lineRangeInfo = `(Lines 1-${lines.length} of ${totalLines} - truncated)`;
|
|
1439
|
+
}
|
|
1440
|
+
} else {
|
|
1441
|
+
const content = await fs.readFile(absolutePath, 'utf8');
|
|
1442
|
+
const lines = content.split('\n');
|
|
1443
|
+
totalLines = lines.length;
|
|
1444
|
+
|
|
1445
|
+
if (startLine < 1) startLine = 1;
|
|
1446
|
+
if (endLine < 1 || endLine > totalLines) endLine = totalLines;
|
|
1447
|
+
if (startLine > endLine) {
|
|
1448
|
+
throw new Error(`Invalid line range: start_line (${startLine}) > end_line (${endLine})`);
|
|
1449
|
+
}
|
|
1450
|
+
|
|
1451
|
+
if (start_line || end_line) {
|
|
1452
|
+
outputContent = lines.slice(startLine - 1, endLine).join('\n');
|
|
1453
|
+
lineRangeInfo = `(Lines ${startLine}-${endLine} of ${totalLines})`;
|
|
1454
|
+
} else {
|
|
1455
|
+
outputContent = content;
|
|
1456
|
+
lineRangeInfo = `(Total ${totalLines} lines)`;
|
|
1457
|
+
}
|
|
1340
1458
|
}
|
|
1341
1459
|
|
|
1342
1460
|
const output = `📄 File: ${filePath} ${lineRangeInfo}\n----------------------------------------\n${outputContent}`;
|
|
@@ -1,6 +1,14 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Fuzzy Patch Strategy for mcfast
|
|
3
|
-
*
|
|
2
|
+
* Fuzzy Patch Strategy for mcfast v4.0+
|
|
3
|
+
*
|
|
4
|
+
* Performance Optimizations (v4.0):
|
|
5
|
+
* 1. Parse file into lines ONCE, not per hunk
|
|
6
|
+
* 2. Use Hash Maps for exact match before fuzzy search
|
|
7
|
+
* 3. Process all hunks in single pass with offset adjustment
|
|
8
|
+
* 4. Early termination when good match found
|
|
9
|
+
* 5. Space-optimized Levenshtein with early exit
|
|
10
|
+
*
|
|
11
|
+
* Complexity: O(Hunk * FileSize) → O(FileSize + Hunk * SearchWindow)
|
|
4
12
|
*/
|
|
5
13
|
|
|
6
14
|
import {
|
|
@@ -10,26 +18,22 @@ import {
|
|
|
10
18
|
isSemanticMatchingEnabled
|
|
11
19
|
} from './semantic-similarity.js';
|
|
12
20
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
*/
|
|
21
|
+
// =============================================================================
|
|
22
|
+
// OPTIMIZED LEVENSHTEIN (space-optimized with early termination)
|
|
23
|
+
// =============================================================================
|
|
24
|
+
|
|
18
25
|
function levenshteinDistance(str1, str2, maxDistance = Infinity) {
|
|
19
26
|
const len1 = str1.length;
|
|
20
27
|
const len2 = str2.length;
|
|
21
28
|
|
|
22
|
-
// Quick optimization: if length difference exceeds maxDistance, return early
|
|
23
29
|
if (Math.abs(len1 - len2) > maxDistance) {
|
|
24
30
|
return maxDistance + 1;
|
|
25
31
|
}
|
|
26
32
|
|
|
27
|
-
// Optimization: swap to ensure str1 is shorter (reduces memory)
|
|
28
33
|
if (len1 > len2) {
|
|
29
34
|
return levenshteinDistance(str2, str1, maxDistance);
|
|
30
35
|
}
|
|
31
36
|
|
|
32
|
-
// Use single array instead of matrix (space optimization)
|
|
33
37
|
let prevRow = Array(len2 + 1).fill(0).map((_, i) => i);
|
|
34
38
|
|
|
35
39
|
for (let i = 1; i <= len1; i++) {
|
|
@@ -39,15 +43,14 @@ function levenshteinDistance(str1, str2, maxDistance = Infinity) {
|
|
|
39
43
|
for (let j = 1; j <= len2; j++) {
|
|
40
44
|
const cost = str1[i - 1] === str2[j - 1] ? 0 : 1;
|
|
41
45
|
const val = Math.min(
|
|
42
|
-
prevRow[j] + 1,
|
|
43
|
-
currentRow[j - 1] + 1,
|
|
44
|
-
prevRow[j - 1] + cost
|
|
46
|
+
prevRow[j] + 1,
|
|
47
|
+
currentRow[j - 1] + 1,
|
|
48
|
+
prevRow[j - 1] + cost
|
|
45
49
|
);
|
|
46
50
|
currentRow.push(val);
|
|
47
51
|
minInRow = Math.min(minInRow, val);
|
|
48
52
|
}
|
|
49
53
|
|
|
50
|
-
// Early termination: if minimum in row exceeds maxDistance, abort
|
|
51
54
|
if (minInRow > maxDistance) {
|
|
52
55
|
return maxDistance + 1;
|
|
53
56
|
}
|
|
@@ -58,10 +61,6 @@ function levenshteinDistance(str1, str2, maxDistance = Infinity) {
|
|
|
58
61
|
return prevRow[len2];
|
|
59
62
|
}
|
|
60
63
|
|
|
61
|
-
/**
|
|
62
|
-
* Normalize whitespace for comparison
|
|
63
|
-
* Converts tabs to spaces, trims lines, removes trailing whitespace
|
|
64
|
-
*/
|
|
65
64
|
function normalizeWhitespace(text) {
|
|
66
65
|
return text
|
|
67
66
|
.split('\n')
|
|
@@ -70,25 +69,18 @@ function normalizeWhitespace(text) {
|
|
|
70
69
|
.trim();
|
|
71
70
|
}
|
|
72
71
|
|
|
73
|
-
/**
|
|
74
|
-
* Calculate similarity score between two strings (0-1)
|
|
75
|
-
* 1.0 = identical, 0.0 = completely different
|
|
76
|
-
*/
|
|
77
72
|
function similarityScore(str1, str2) {
|
|
78
73
|
const normalized1 = normalizeWhitespace(str1);
|
|
79
74
|
const normalized2 = normalizeWhitespace(str2);
|
|
80
|
-
|
|
81
75
|
const maxLen = Math.max(normalized1.length, normalized2.length);
|
|
82
76
|
if (maxLen === 0) return 1.0;
|
|
83
|
-
|
|
84
|
-
const distance = levenshteinDistance(normalized1, normalized2);
|
|
85
|
-
return 1.0 - (distance / maxLen);
|
|
77
|
+
return 1.0 - (levenshteinDistance(normalized1, normalized2) / maxLen);
|
|
86
78
|
}
|
|
87
79
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
80
|
+
// =============================================================================
|
|
81
|
+
// OPTIMIZED DIFF PARSING
|
|
82
|
+
// =============================================================================
|
|
83
|
+
|
|
92
84
|
export function parseDiff(diffText) {
|
|
93
85
|
const hunks = [];
|
|
94
86
|
const lines = diffText.split('\n');
|
|
@@ -97,7 +89,6 @@ export function parseDiff(diffText) {
|
|
|
97
89
|
for (let i = 0; i < lines.length; i++) {
|
|
98
90
|
const line = lines[i];
|
|
99
91
|
|
|
100
|
-
// Parse hunk header: @@ -1,3 +1,4 @@
|
|
101
92
|
if (line.startsWith('@@')) {
|
|
102
93
|
if (currentHunk) hunks.push(currentHunk);
|
|
103
94
|
|
|
@@ -112,7 +103,6 @@ export function parseDiff(diffText) {
|
|
|
112
103
|
};
|
|
113
104
|
}
|
|
114
105
|
} else if (currentHunk) {
|
|
115
|
-
// Parse diff lines
|
|
116
106
|
if (line.startsWith('-')) {
|
|
117
107
|
currentHunk.lines.push({ type: 'remove', content: line.slice(1) });
|
|
118
108
|
} else if (line.startsWith('+')) {
|
|
@@ -127,27 +117,66 @@ export function parseDiff(diffText) {
|
|
|
127
117
|
return hunks;
|
|
128
118
|
}
|
|
129
119
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
120
|
+
// =============================================================================
|
|
121
|
+
// OPTIMIZED HASH-MAP BASED EXACT MATCH (v4.0 NEW)
|
|
122
|
+
// =============================================================================
|
|
123
|
+
|
|
124
|
+
function buildLineIndex(lines, windowSize = 3) {
|
|
125
|
+
const index = new Map();
|
|
126
|
+
|
|
127
|
+
for (let i = 0; i <= lines.length - windowSize; i++) {
|
|
128
|
+
const key = lines.slice(i, i + windowSize).join('|');
|
|
129
|
+
if (!index.has(key)) {
|
|
130
|
+
index.set(key, []);
|
|
131
|
+
}
|
|
132
|
+
index.get(key).push(i);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
return index;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
function findExactMatchHashMap(targetLines, fileLines, lineIndex, windowSize = 3) {
|
|
139
|
+
if (targetLines.length < windowSize) return null;
|
|
140
|
+
|
|
141
|
+
const targetKey = targetLines.slice(0, windowSize).join('|');
|
|
142
|
+
const candidates = lineIndex.get(targetKey);
|
|
143
|
+
|
|
144
|
+
if (!candidates) return null;
|
|
145
|
+
|
|
146
|
+
for (const startPos of candidates) {
|
|
147
|
+
let match = true;
|
|
148
|
+
for (let j = 0; j < targetLines.length; j++) {
|
|
149
|
+
if (fileLines[startPos + j] !== targetLines[j]) {
|
|
150
|
+
match = false;
|
|
151
|
+
break;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
if (match) {
|
|
156
|
+
return { index: startPos, distance: 0, confidence: 1.0 };
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return null;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// =============================================================================
|
|
164
|
+
// OPTIMIZED FUZZY SEARCH (v4.0)
|
|
165
|
+
// =============================================================================
|
|
166
|
+
|
|
134
167
|
export function findBestMatch(targetLines, fileLines, startHint = 0) {
|
|
135
|
-
|
|
136
|
-
let bestScore = Infinity;
|
|
137
|
-
const maxIterations = 50000; // Increased from 10k to 50k
|
|
168
|
+
const maxIterations = 50000;
|
|
138
169
|
let iterations = 0;
|
|
139
|
-
|
|
140
170
|
const useSemanticMatching = isSemanticMatchingEnabled();
|
|
141
171
|
|
|
142
172
|
if (useSemanticMatching) {
|
|
143
173
|
console.error('[FUZZY] Semantic matching enabled');
|
|
144
174
|
}
|
|
145
175
|
|
|
146
|
-
// optimization: pre-normalize lines to handle indentation/whitespace
|
|
147
176
|
const normTargetLines = targetLines.map(l => normalizeWhitespace(l));
|
|
148
177
|
const normFileLines = fileLines.map(l => normalizeWhitespace(l));
|
|
149
178
|
|
|
150
|
-
// Try exact match
|
|
179
|
+
// OPTIMIZATION 1: Try exact match at hint location first
|
|
151
180
|
if (startHint >= 0 && startHint + targetLines.length <= fileLines.length) {
|
|
152
181
|
const exactMatch = targetLines.every((line, i) =>
|
|
153
182
|
fileLines[startHint + i] === line
|
|
@@ -157,27 +186,35 @@ export function findBestMatch(targetLines, fileLines, startHint = 0) {
|
|
|
157
186
|
}
|
|
158
187
|
}
|
|
159
188
|
|
|
160
|
-
//
|
|
161
|
-
|
|
189
|
+
// OPTIMIZATION 2: Build hash index for faster exact lookups
|
|
190
|
+
const lineIndex = buildLineIndex(fileLines, Math.min(3, targetLines.length));
|
|
191
|
+
const exactResult = findExactMatchHashMap(targetLines, fileLines, lineIndex, Math.min(3, targetLines.length));
|
|
192
|
+
|
|
193
|
+
if (exactResult) {
|
|
194
|
+
console.error(`[FUZZY] Exact match found at line ${exactResult.index}`);
|
|
195
|
+
return exactResult;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// OPTIMIZATION 3: Sampled fuzzy search with larger skip
|
|
199
|
+
let bestMatch = null;
|
|
200
|
+
let bestScore = Infinity;
|
|
201
|
+
const sampleStep = Math.max(1, Math.floor(fileLines.length / 5000)); // Skip positions for large files
|
|
202
|
+
|
|
203
|
+
for (let i = 0; i <= fileLines.length - targetLines.length; i += sampleStep) {
|
|
162
204
|
iterations++;
|
|
163
205
|
if (iterations > maxIterations) {
|
|
164
206
|
console.error(`[FUZZY] Max iterations (${maxIterations}) reached`);
|
|
165
207
|
break;
|
|
166
208
|
}
|
|
167
209
|
|
|
168
|
-
//
|
|
169
|
-
// Check first, middle, and last line. If they are very different, skip block.
|
|
210
|
+
// Sampled check for first, middle, last lines
|
|
170
211
|
if (targetLines.length > 5) {
|
|
171
212
|
const indices = [0, Math.floor(targetLines.length / 2), targetLines.length - 1];
|
|
172
213
|
let sampleDist = 0;
|
|
173
214
|
for (const idx of indices) {
|
|
174
|
-
|
|
175
|
-
sampleDist += levenshteinDistance(normTargetLines[idx], normFileLines[i + idx], 20); // strict limit
|
|
176
|
-
}
|
|
177
|
-
// If average distance per sample line is high (> 10 chars), skip
|
|
178
|
-
if (sampleDist > indices.length * 10) {
|
|
179
|
-
continue;
|
|
215
|
+
sampleDist += levenshteinDistance(normTargetLines[idx], normFileLines[i + idx], 20);
|
|
180
216
|
}
|
|
217
|
+
if (sampleDist > indices.length * 10) continue;
|
|
181
218
|
}
|
|
182
219
|
|
|
183
220
|
let totalDistance = 0;
|
|
@@ -185,46 +222,29 @@ export function findBestMatch(targetLines, fileLines, startHint = 0) {
|
|
|
185
222
|
let contextMatchSum = 0;
|
|
186
223
|
|
|
187
224
|
for (let j = 0; j < targetLines.length; j++) {
|
|
188
|
-
const targetLine = targetLines[j];
|
|
189
|
-
const fileLine = fileLines[i + j];
|
|
190
|
-
|
|
191
|
-
// Use NORMALIZED lines for distance to ignore indentation differences
|
|
192
225
|
const nTarget = normTargetLines[j];
|
|
193
226
|
const nFile = normFileLines[i + j];
|
|
194
227
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
totalDistance += distance;
|
|
198
|
-
|
|
199
|
-
// Token similarity (always available)
|
|
200
|
-
const tokSim = tokenSimilarity(targetLine, fileLine);
|
|
201
|
-
tokenSimilaritySum += tokSim;
|
|
202
|
-
|
|
203
|
-
// Context-aware matching (use surrounding lines)
|
|
204
|
-
const surroundingLines = [
|
|
205
|
-
fileLines[i + j - 1],
|
|
206
|
-
fileLines[i + j + 1]
|
|
207
|
-
].filter(Boolean);
|
|
228
|
+
totalDistance += levenshteinDistance(nTarget, nFile);
|
|
229
|
+
tokenSimilaritySum += tokenSimilarity(targetLines[j], fileLines[i + j]);
|
|
208
230
|
|
|
209
|
-
const
|
|
210
|
-
contextMatchSum +=
|
|
231
|
+
const surroundingLines = [fileLines[i + j - 1], fileLines[i + j + 1]].filter(Boolean);
|
|
232
|
+
contextMatchSum += contextAwareMatch(targetLines[j], fileLines[i + j], surroundingLines);
|
|
211
233
|
}
|
|
212
234
|
|
|
213
235
|
const avgTokenSim = tokenSimilaritySum / targetLines.length;
|
|
214
236
|
const avgContextMatch = contextMatchSum / targetLines.length;
|
|
215
237
|
|
|
216
|
-
// Calculate confidence using enhanced scoring
|
|
217
238
|
const confidence = calculateConfidence(null, {
|
|
218
239
|
levenshteinDistance: totalDistance,
|
|
219
240
|
maxDistance: targetLines.length * 100,
|
|
220
241
|
tokenSimilarity: avgTokenSim,
|
|
221
|
-
structuralSimilarity: 0,
|
|
242
|
+
structuralSimilarity: 0,
|
|
222
243
|
lineNumberMatch: i === startHint,
|
|
223
244
|
surroundingContextMatch: avgContextMatch
|
|
224
245
|
});
|
|
225
246
|
|
|
226
|
-
|
|
227
|
-
const score = totalDistance * (1 - confidence * 0.5); // Confidence reduces effective distance
|
|
247
|
+
const score = totalDistance * (1 - confidence * 0.5);
|
|
228
248
|
|
|
229
249
|
if (score < bestScore) {
|
|
230
250
|
bestScore = score;
|
|
@@ -236,7 +256,6 @@ export function findBestMatch(targetLines, fileLines, startHint = 0) {
|
|
|
236
256
|
contextMatch: avgContextMatch
|
|
237
257
|
};
|
|
238
258
|
|
|
239
|
-
// Early termination if we find a very good match
|
|
240
259
|
if (confidence > 0.99) {
|
|
241
260
|
console.error(`[FUZZY] Early termination at ${confidence.toFixed(2)} confidence`);
|
|
242
261
|
break;
|
|
@@ -245,25 +264,22 @@ export function findBestMatch(targetLines, fileLines, startHint = 0) {
|
|
|
245
264
|
}
|
|
246
265
|
|
|
247
266
|
if (bestMatch) {
|
|
248
|
-
console.error(`[FUZZY] Best match: line ${bestMatch.index}, distance ${bestMatch.distance}, confidence ${bestMatch.confidence.toFixed(2)}
|
|
267
|
+
console.error(`[FUZZY] Best match: line ${bestMatch.index}, distance ${bestMatch.distance}, confidence ${bestMatch.confidence.toFixed(2)}`);
|
|
249
268
|
}
|
|
250
269
|
|
|
251
270
|
return bestMatch;
|
|
252
271
|
}
|
|
253
272
|
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
273
|
+
// =============================================================================
|
|
274
|
+
// OPTIMIZED SINGLE-PASS MULTI-HUNK APPLICATION (v4.0 NEW)
|
|
275
|
+
// =============================================================================
|
|
276
|
+
|
|
258
277
|
export function applyHunk(fileContent, hunk, threshold = 0.8) {
|
|
259
278
|
const lines = fileContent.split('\n');
|
|
260
|
-
|
|
261
|
-
// Extract context lines (lines that should match)
|
|
262
279
|
const contextLines = hunk.lines
|
|
263
280
|
.filter(l => l.type === 'context' || l.type === 'remove')
|
|
264
281
|
.map(l => l.content);
|
|
265
282
|
|
|
266
|
-
// Find best match location
|
|
267
283
|
const match = findBestMatch(lines, contextLines, threshold);
|
|
268
284
|
|
|
269
285
|
if (!match) {
|
|
@@ -275,20 +291,17 @@ export function applyHunk(fileContent, hunk, threshold = 0.8) {
|
|
|
275
291
|
};
|
|
276
292
|
}
|
|
277
293
|
|
|
278
|
-
// Apply changes at matched location
|
|
279
294
|
const newLines = [...lines];
|
|
280
295
|
let offset = match.index;
|
|
281
296
|
|
|
282
297
|
for (const diffLine of hunk.lines) {
|
|
283
298
|
if (diffLine.type === 'remove' || diffLine.type === 'context') {
|
|
284
|
-
// Remove or skip context line
|
|
285
299
|
if (diffLine.type === 'remove') {
|
|
286
300
|
newLines.splice(offset, 1);
|
|
287
301
|
} else {
|
|
288
302
|
offset++;
|
|
289
303
|
}
|
|
290
304
|
} else if (diffLine.type === 'add') {
|
|
291
|
-
// Insert new line
|
|
292
305
|
newLines.splice(offset, 0, diffLine.content);
|
|
293
306
|
offset++;
|
|
294
307
|
}
|
|
@@ -297,90 +310,95 @@ export function applyHunk(fileContent, hunk, threshold = 0.8) {
|
|
|
297
310
|
return {
|
|
298
311
|
success: true,
|
|
299
312
|
content: newLines.join('\n'),
|
|
300
|
-
confidence: match.
|
|
301
|
-
message: `Applied at line ${match.
|
|
313
|
+
confidence: match.confidence,
|
|
314
|
+
message: `Applied at line ${match.index + 1} (confidence: ${(match.confidence * 100).toFixed(1)}%)`
|
|
302
315
|
};
|
|
303
316
|
}
|
|
304
317
|
|
|
305
318
|
/**
|
|
306
|
-
* Apply complete diff with fuzzy matching
|
|
307
|
-
*
|
|
319
|
+
* Apply complete diff with fuzzy matching - OPTIMIZED v4.0
|
|
320
|
+
*
|
|
321
|
+
* Key optimization: Parse file ONCE, apply all hunks with offset tracking
|
|
322
|
+
* instead of re-parsing for each hunk.
|
|
308
323
|
*/
|
|
309
324
|
export function applyFuzzyPatch(fileContent, diffText, options = {}) {
|
|
310
|
-
// Input validation
|
|
311
325
|
if (!fileContent || typeof fileContent !== 'string') {
|
|
312
|
-
return {
|
|
313
|
-
success: false,
|
|
314
|
-
content: fileContent || '',
|
|
315
|
-
message: 'Invalid file content provided'
|
|
316
|
-
};
|
|
326
|
+
return { success: false, content: fileContent || '', message: 'Invalid file content provided' };
|
|
317
327
|
}
|
|
318
328
|
|
|
319
329
|
if (!diffText || typeof diffText !== 'string') {
|
|
320
|
-
return {
|
|
321
|
-
success: false,
|
|
322
|
-
content: fileContent,
|
|
323
|
-
message: 'Invalid diff text provided'
|
|
324
|
-
};
|
|
330
|
+
return { success: false, content: fileContent, message: 'Invalid diff text provided' };
|
|
325
331
|
}
|
|
326
332
|
|
|
327
333
|
const threshold = options.threshold || parseFloat(process.env.MCFAST_FUZZY_THRESHOLD || '0.8');
|
|
328
334
|
|
|
329
|
-
// Validate threshold range
|
|
330
335
|
if (threshold < 0 || threshold > 1) {
|
|
331
|
-
return {
|
|
332
|
-
success: false,
|
|
333
|
-
content: fileContent,
|
|
334
|
-
message: 'Threshold must be between 0 and 1'
|
|
335
|
-
};
|
|
336
|
+
return { success: false, content: fileContent, message: 'Threshold must be between 0 and 1' };
|
|
336
337
|
}
|
|
337
338
|
|
|
338
339
|
const hunks = parseDiff(diffText);
|
|
339
340
|
|
|
340
341
|
if (hunks.length === 0) {
|
|
341
|
-
return {
|
|
342
|
-
success: false,
|
|
343
|
-
content: fileContent,
|
|
344
|
-
message: 'No valid hunks found in diff'
|
|
345
|
-
};
|
|
342
|
+
return { success: false, content: fileContent, message: 'No valid hunks found in diff' };
|
|
346
343
|
}
|
|
347
344
|
|
|
348
|
-
|
|
345
|
+
// OPTIMIZATION: Parse file into lines ONCE at the start
|
|
346
|
+
let currentLines = fileContent.split('\n');
|
|
349
347
|
const results = [];
|
|
350
348
|
|
|
351
349
|
for (const hunk of hunks) {
|
|
352
|
-
|
|
353
|
-
|
|
350
|
+
// Extract context lines from hunk
|
|
351
|
+
const contextLines = hunk.lines
|
|
352
|
+
.filter(l => l.type === 'context' || l.type === 'remove')
|
|
353
|
+
.map(l => l.content);
|
|
354
|
+
|
|
355
|
+
// Find match in current (already modified) lines
|
|
356
|
+
const match = findBestMatch(currentLines, contextLines, 0);
|
|
354
357
|
|
|
355
|
-
if (!
|
|
358
|
+
if (!match) {
|
|
356
359
|
return {
|
|
357
360
|
success: false,
|
|
358
361
|
content: fileContent,
|
|
359
|
-
message: `Failed to apply hunk: ${
|
|
362
|
+
message: `Failed to apply hunk at line ${hunk.oldStart}: ${results[results.length - 1]?.message || 'match not found'}`,
|
|
360
363
|
partialResults: results
|
|
361
364
|
};
|
|
362
365
|
}
|
|
363
366
|
|
|
364
|
-
|
|
367
|
+
// Apply changes at matched location
|
|
368
|
+
let offset = match.index;
|
|
369
|
+
|
|
370
|
+
for (const diffLine of hunk.lines) {
|
|
371
|
+
if (diffLine.type === 'remove' || diffLine.type === 'context') {
|
|
372
|
+
if (diffLine.type === 'remove') {
|
|
373
|
+
currentLines.splice(offset, 1);
|
|
374
|
+
} else {
|
|
375
|
+
offset++;
|
|
376
|
+
}
|
|
377
|
+
} else if (diffLine.type === 'add') {
|
|
378
|
+
currentLines.splice(offset, 0, diffLine.content);
|
|
379
|
+
offset++;
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
results.push({
|
|
384
|
+
success: true,
|
|
385
|
+
confidence: match.confidence,
|
|
386
|
+
line: match.index + 1
|
|
387
|
+
});
|
|
365
388
|
}
|
|
366
389
|
|
|
367
390
|
const avgConfidence = results.reduce((sum, r) => sum + r.confidence, 0) / results.length;
|
|
368
391
|
|
|
369
392
|
return {
|
|
370
393
|
success: true,
|
|
371
|
-
content:
|
|
394
|
+
content: currentLines.join('\n'),
|
|
372
395
|
confidence: avgConfidence,
|
|
373
396
|
message: `Applied ${hunks.length} hunk(s) successfully`,
|
|
374
397
|
details: results
|
|
375
398
|
};
|
|
376
399
|
}
|
|
377
400
|
|
|
378
|
-
/**
|
|
379
|
-
* Detect if instruction is a diff-based edit
|
|
380
|
-
* Returns true if instruction contains unified diff markers
|
|
381
|
-
*/
|
|
382
401
|
export function isDiffBasedEdit(instruction) {
|
|
383
402
|
if (!instruction) return false;
|
|
384
|
-
// Check for unified diff hunk markers (@@)
|
|
385
403
|
return instruction.includes('@@');
|
|
386
404
|
}
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Tree-sitter Language Loader
|
|
2
|
+
* Tree-sitter Language Loader (v4.0+)
|
|
3
|
+
*
|
|
4
|
+
* Performance Optimization:
|
|
5
|
+
* - Cache Parser instances per language to avoid WASM re-initialization overhead
|
|
6
|
+
* - Reusing parsers significantly reduces latency for repeated operations
|
|
3
7
|
*/
|
|
4
8
|
import path from 'path';
|
|
5
9
|
import { createRequire } from 'module';
|
|
@@ -12,36 +16,31 @@ const require = createRequire(import.meta.url);
|
|
|
12
16
|
const _Parser = require('web-tree-sitter');
|
|
13
17
|
|
|
14
18
|
let Parser = _Parser;
|
|
15
|
-
// If default export exists, use it
|
|
16
19
|
if (Parser.default) {
|
|
17
20
|
Parser = Parser.default;
|
|
18
21
|
}
|
|
19
|
-
// If Parser property exists and Parser itself isn't the constructor
|
|
20
22
|
if (typeof Parser !== 'function' && Parser.Parser) {
|
|
21
23
|
Parser = Parser.Parser;
|
|
22
24
|
}
|
|
23
25
|
|
|
24
26
|
let isInitialized = false;
|
|
25
27
|
const languageCache = new Map();
|
|
28
|
+
const parserCache = new Map();
|
|
26
29
|
|
|
27
|
-
// Map internal language IDs to WASM filenames
|
|
28
30
|
const WASM_MAP = {
|
|
29
31
|
'go': 'tree-sitter-go.wasm',
|
|
30
32
|
'rust': 'tree-sitter-rust.wasm',
|
|
31
33
|
'java': 'tree-sitter-java.wasm',
|
|
32
34
|
'javascript': 'tree-sitter-javascript.wasm',
|
|
33
|
-
'typescript': 'tree-sitter-javascript.wasm',
|
|
35
|
+
'typescript': 'tree-sitter-javascript.wasm',
|
|
34
36
|
'python': 'tree-sitter-python.wasm',
|
|
35
37
|
'cpp': 'tree-sitter-cpp.wasm',
|
|
36
|
-
'c': 'tree-sitter-cpp.wasm',
|
|
38
|
+
'c': 'tree-sitter-cpp.wasm',
|
|
37
39
|
'csharp': 'tree-sitter-c-sharp.wasm',
|
|
38
40
|
'php': 'tree-sitter-php.wasm',
|
|
39
41
|
'ruby': 'tree-sitter-ruby.wasm',
|
|
40
42
|
};
|
|
41
43
|
|
|
42
|
-
/**
|
|
43
|
-
* Initialize web-tree-sitter
|
|
44
|
-
*/
|
|
45
44
|
async function init() {
|
|
46
45
|
if (isInitialized) return;
|
|
47
46
|
try {
|
|
@@ -52,7 +51,6 @@ async function init() {
|
|
|
52
51
|
isInitialized = true;
|
|
53
52
|
} catch (e) {
|
|
54
53
|
console.error('Failed to initialize web-tree-sitter:', e);
|
|
55
|
-
// Fallback: try without explicit path (may work if in same dir)
|
|
56
54
|
try {
|
|
57
55
|
await Parser.init();
|
|
58
56
|
isInitialized = true;
|
|
@@ -62,10 +60,6 @@ async function init() {
|
|
|
62
60
|
}
|
|
63
61
|
}
|
|
64
62
|
|
|
65
|
-
/**
|
|
66
|
-
* Load language parser
|
|
67
|
-
* @param {string} language - 'go', 'rust', 'java', etc.
|
|
68
|
-
*/
|
|
69
63
|
export async function loadLanguage(language) {
|
|
70
64
|
if (!isInitialized) await init();
|
|
71
65
|
|
|
@@ -78,12 +72,7 @@ export async function loadLanguage(language) {
|
|
|
78
72
|
throw new Error(`Unsupported tree-sitter language: ${language}`);
|
|
79
73
|
}
|
|
80
74
|
|
|
81
|
-
// Resolve path to wasm file
|
|
82
|
-
// Assuming this file is at packages/mcp-client/src/strategies/tree-sitter/languages.js
|
|
83
|
-
// and wasm files are at packages/mcp-client/src/strategies/tree-sitter/wasm/
|
|
84
75
|
const wasmPath = path.resolve(__dirname, 'wasm', wasmFile);
|
|
85
|
-
|
|
86
|
-
// Resolve Language class
|
|
87
76
|
let Language = Parser.Language;
|
|
88
77
|
if (!Language && _Parser.Language) {
|
|
89
78
|
Language = _Parser.Language;
|
|
@@ -94,20 +83,50 @@ export async function loadLanguage(language) {
|
|
|
94
83
|
languageCache.set(language, lang);
|
|
95
84
|
return lang;
|
|
96
85
|
} catch (e) {
|
|
97
|
-
// Fallback for different environments where __dirname might behave differently
|
|
98
|
-
// Try relative path if absolute fails, or check common locations
|
|
99
86
|
console.error(`Failed to load language ${language} from ${wasmPath}`, e);
|
|
100
87
|
throw e;
|
|
101
88
|
}
|
|
102
89
|
}
|
|
103
90
|
|
|
91
|
+
/**
|
|
92
|
+
* Get cached Parser instance for language (v4.0 optimization)
|
|
93
|
+
* Reuses Parser instances instead of creating new ones each time
|
|
94
|
+
*/
|
|
104
95
|
export async function getParser(language) {
|
|
96
|
+
if (!isInitialized) await init();
|
|
97
|
+
|
|
98
|
+
if (parserCache.has(language)) {
|
|
99
|
+
return parserCache.get(language);
|
|
100
|
+
}
|
|
101
|
+
|
|
105
102
|
const lang = await loadLanguage(language);
|
|
106
103
|
const parser = new Parser();
|
|
107
104
|
parser.setLanguage(lang);
|
|
105
|
+
parserCache.set(language, parser);
|
|
108
106
|
return parser;
|
|
109
107
|
}
|
|
110
108
|
|
|
109
|
+
/**
|
|
110
|
+
* Clear parser cache (useful for testing or memory management)
|
|
111
|
+
*/
|
|
112
|
+
export function clearParserCache() {
|
|
113
|
+
for (const parser of parserCache.values()) {
|
|
114
|
+
parser.delete?.();
|
|
115
|
+
}
|
|
116
|
+
parserCache.clear();
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Get cache statistics
|
|
121
|
+
*/
|
|
122
|
+
export function getCacheStats() {
|
|
123
|
+
return {
|
|
124
|
+
languagesLoaded: languageCache.size,
|
|
125
|
+
parsersCached: parserCache.size,
|
|
126
|
+
supportedLanguages: Object.keys(WASM_MAP)
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
|
|
111
130
|
/**
|
|
112
131
|
* Get compiled query for language
|
|
113
132
|
*/
|