@probelabs/probe 0.6.0-rc253 → 0.6.0-rc255
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +166 -3
- package/bin/binaries/probe-v0.6.0-rc255-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc255-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc255-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc255-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc255-x86_64-unknown-linux-musl.tar.gz +0 -0
- package/build/agent/ProbeAgent.d.ts +1 -1
- package/build/agent/ProbeAgent.js +51 -16
- package/build/agent/acp/tools.js +2 -1
- package/build/agent/acp/tools.test.js +2 -1
- package/build/agent/dsl/environment.js +19 -0
- package/build/agent/index.js +1512 -413
- package/build/agent/schemaUtils.js +91 -2
- package/build/agent/tools.js +0 -28
- package/build/delegate.js +3 -0
- package/build/index.js +2 -0
- package/build/tools/common.js +6 -5
- package/build/tools/edit.js +457 -65
- package/build/tools/executePlan.js +3 -1
- package/build/tools/fileTracker.js +318 -0
- package/build/tools/fuzzyMatch.js +271 -0
- package/build/tools/hashline.js +131 -0
- package/build/tools/lineEditHeuristics.js +138 -0
- package/build/tools/symbolEdit.js +119 -0
- package/build/tools/vercel.js +40 -9
- package/cjs/agent/ProbeAgent.cjs +1615 -517
- package/cjs/index.cjs +1643 -543
- package/index.d.ts +189 -1
- package/package.json +1 -1
- package/src/agent/ProbeAgent.d.ts +1 -1
- package/src/agent/ProbeAgent.js +51 -16
- package/src/agent/acp/tools.js +2 -1
- package/src/agent/acp/tools.test.js +2 -1
- package/src/agent/dsl/environment.js +19 -0
- package/src/agent/index.js +14 -3
- package/src/agent/schemaUtils.js +91 -2
- package/src/agent/tools.js +0 -28
- package/src/delegate.js +3 -0
- package/src/index.js +2 -0
- package/src/tools/common.js +6 -5
- package/src/tools/edit.js +457 -65
- package/src/tools/executePlan.js +3 -1
- package/src/tools/fileTracker.js +318 -0
- package/src/tools/fuzzyMatch.js +271 -0
- package/src/tools/hashline.js +131 -0
- package/src/tools/lineEditHeuristics.js +138 -0
- package/src/tools/symbolEdit.js +119 -0
- package/src/tools/vercel.js +40 -9
- package/bin/binaries/probe-v0.6.0-rc253-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc253-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc253-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc253-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc253-x86_64-unknown-linux-musl.tar.gz +0 -0
package/src/tools/executePlan.js
CHANGED
|
@@ -436,6 +436,7 @@ ${lastError}
|
|
|
436
436
|
|
|
437
437
|
RULES REMINDER:
|
|
438
438
|
- search(query) is KEYWORD SEARCH — pass a search query, NOT a filename. Use extract(filepath) to read file contents.
|
|
439
|
+
- search(query, path) — the path argument must be a STRING, not an object. Use field.file_path, not field.
|
|
439
440
|
- search() returns up to 20K tokens by default. Use search(query, path, {maxTokens: null}) for unlimited, or searchAll(query) to auto-paginate ALL results.
|
|
440
441
|
- search(), searchAll(), query(), extract(), listFiles(), bash() all return STRINGS, not arrays.
|
|
441
442
|
- Use chunk(stringData) to split a string into an array of chunks.
|
|
@@ -444,7 +445,8 @@ RULES REMINDER:
|
|
|
444
445
|
- Do NOT define helper functions that call tools — write logic inline.
|
|
445
446
|
- Do NOT use async/await, template literals, or shorthand properties.
|
|
446
447
|
- Do NOT use regex literals (/pattern/) — use String methods like indexOf, includes, startsWith instead.
|
|
447
|
-
- String concatenation with +, not template literals
|
|
448
|
+
- String concatenation with +, not template literals.
|
|
449
|
+
- IMPORTANT: If a tool returns "ERROR: ...", do NOT pass that error string to LLM() — handle or skip it.`;
|
|
448
450
|
|
|
449
451
|
const fixedCode = await llmCallFn(fixPrompt, '', { maxTokens: 4000, temperature: 0.2 });
|
|
450
452
|
// Strip markdown fences and XML tags the LLM might add
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FileTracker — per-session content-aware file state tracking for safe multi-edit workflows
|
|
3
|
+
*
|
|
4
|
+
* Two-tier tracking:
|
|
5
|
+
* 1. _seenFiles (Set) — which files the LLM has "seen" via search/extract. Guards against blind edits.
|
|
6
|
+
* 2. _contentRecords (Map) — per-symbol content hashes from extract #symbol targets. Detects stale edits.
|
|
7
|
+
*
|
|
8
|
+
* Key benefit: edits proceed when the target symbol hasn't changed, even if other parts of the file changed.
|
|
9
|
+
* Uses SHA-256 content hashing instead of mtime/size for precise change detection.
|
|
10
|
+
*
|
|
11
|
+
* @module tools/fileTracker
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { createHash } from 'crypto';
|
|
15
|
+
import { resolve, isAbsolute } from 'path';
|
|
16
|
+
import { findSymbol } from './symbolEdit.js';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Compute a SHA-256 content hash for a code block.
|
|
20
|
+
* Normalizes trailing whitespace per line for robustness against editor formatting.
|
|
21
|
+
* @param {string} content - The code content to hash
|
|
22
|
+
* @returns {string} First 16 hex chars of SHA-256 hash (64 bits of collision resistance)
|
|
23
|
+
*/
|
|
24
|
+
export function computeContentHash(content) {
|
|
25
|
+
const normalized = (content || '').split('\n').map(l => l.trimEnd()).join('\n');
|
|
26
|
+
return createHash('sha256').update(normalized).digest('hex').slice(0, 16);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Extract the file path portion from an extract target string.
|
|
31
|
+
* Strips symbol references (#Symbol) and line references (:line, :start-end).
|
|
32
|
+
* @param {string} target - Extract target (e.g. "file.js#fn", "file.js:10-20")
|
|
33
|
+
* @returns {string} Just the file path
|
|
34
|
+
*/
|
|
35
|
+
function extractFilePath(target) {
|
|
36
|
+
// Strip #Symbol suffix
|
|
37
|
+
const hashIdx = target.indexOf('#');
|
|
38
|
+
if (hashIdx !== -1) {
|
|
39
|
+
return target.slice(0, hashIdx);
|
|
40
|
+
}
|
|
41
|
+
// Strip :line or :start-end suffix (use lastIndexOf to skip Windows drive letter colons)
|
|
42
|
+
const colonIdx = target.lastIndexOf(':');
|
|
43
|
+
if (colonIdx !== -1) {
|
|
44
|
+
// Only strip if what follows looks like a line reference (digits, dash)
|
|
45
|
+
const after = target.slice(colonIdx + 1);
|
|
46
|
+
if (/^\d+(-\d+)?$/.test(after)) {
|
|
47
|
+
return target.slice(0, colonIdx);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return target;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Extract the symbol name from an extract target string.
|
|
55
|
+
* @param {string} target - Extract target (e.g. "file.js#fn")
|
|
56
|
+
* @returns {string|null} Symbol name or null if not a symbol target
|
|
57
|
+
*/
|
|
58
|
+
function extractSymbolName(target) {
|
|
59
|
+
const hashIdx = target.indexOf('#');
|
|
60
|
+
if (hashIdx !== -1) {
|
|
61
|
+
const symbol = target.slice(hashIdx + 1);
|
|
62
|
+
return symbol || null;
|
|
63
|
+
}
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Parse file paths from probe search/extract output.
|
|
69
|
+
* Looks for "File: path" headers and "--- path ---" separators.
|
|
70
|
+
* @param {string} output - Probe output text
|
|
71
|
+
* @returns {string[]} Array of file paths found
|
|
72
|
+
*/
|
|
73
|
+
function parseFilePathsFromOutput(output) {
|
|
74
|
+
const paths = [];
|
|
75
|
+
const regex = /^(?:File:\s+|---\s+)([^\s].*?)(?:\s+---)?$/gm;
|
|
76
|
+
let match;
|
|
77
|
+
while ((match = regex.exec(output)) !== null) {
|
|
78
|
+
const path = match[1].trim();
|
|
79
|
+
// Skip things that look like metadata, not file paths
|
|
80
|
+
if (path && !path.startsWith('Results') && !path.startsWith('Page') && (path.includes('/') || path.includes('.') || path.includes('\\'))) {
|
|
81
|
+
paths.push(path);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
return paths;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export class FileTracker {
|
|
88
|
+
/**
|
|
89
|
+
* @param {Object} [options]
|
|
90
|
+
* @param {boolean} [options.debug=false] - Enable debug logging
|
|
91
|
+
*/
|
|
92
|
+
constructor(options = {}) {
|
|
93
|
+
this.debug = options.debug || false;
|
|
94
|
+
/** @type {Set<string>} Files seen via search/extract */
|
|
95
|
+
this._seenFiles = new Set();
|
|
96
|
+
/** @type {Map<string, {contentHash: string, startLine: number, endLine: number, symbolName: string|null, source: string, timestamp: number}>} */
|
|
97
|
+
this._contentRecords = new Map();
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Mark a file as "seen" — the LLM has read its content.
|
|
102
|
+
* @param {string} resolvedPath - Absolute path to the file
|
|
103
|
+
*/
|
|
104
|
+
markFileSeen(resolvedPath) {
|
|
105
|
+
this._seenFiles.add(resolvedPath);
|
|
106
|
+
if (this.debug) {
|
|
107
|
+
console.error(`[FileTracker] Marked as seen: ${resolvedPath}`);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Check if a file has been seen in this session.
|
|
113
|
+
* @param {string} resolvedPath - Absolute path to the file
|
|
114
|
+
* @returns {boolean}
|
|
115
|
+
*/
|
|
116
|
+
isFileSeen(resolvedPath) {
|
|
117
|
+
return this._seenFiles.has(resolvedPath);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Store a content hash for a symbol in a file.
|
|
122
|
+
* @param {string} resolvedPath - Absolute path to the file
|
|
123
|
+
* @param {string} symbolName - Symbol name (e.g. "calculateTotal")
|
|
124
|
+
* @param {string} code - The symbol's source code
|
|
125
|
+
* @param {number} startLine - 1-indexed start line
|
|
126
|
+
* @param {number} endLine - 1-indexed end line
|
|
127
|
+
* @param {string} [source='extract'] - How the content was obtained
|
|
128
|
+
*/
|
|
129
|
+
trackSymbolContent(resolvedPath, symbolName, code, startLine, endLine, source = 'extract') {
|
|
130
|
+
const key = `${resolvedPath}#${symbolName}`;
|
|
131
|
+
const contentHash = computeContentHash(code);
|
|
132
|
+
this._contentRecords.set(key, {
|
|
133
|
+
contentHash,
|
|
134
|
+
startLine,
|
|
135
|
+
endLine,
|
|
136
|
+
symbolName,
|
|
137
|
+
source,
|
|
138
|
+
timestamp: Date.now()
|
|
139
|
+
});
|
|
140
|
+
if (this.debug) {
|
|
141
|
+
console.error(`[FileTracker] Tracked symbol ${key} (hash: ${contentHash}, lines ${startLine}-${endLine})`);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Look up a stored content record for a symbol.
|
|
147
|
+
* @param {string} resolvedPath - Absolute path to the file
|
|
148
|
+
* @param {string} symbolName - Symbol name
|
|
149
|
+
* @returns {Object|null} The stored record or null
|
|
150
|
+
*/
|
|
151
|
+
getSymbolRecord(resolvedPath, symbolName) {
|
|
152
|
+
return this._contentRecords.get(`${resolvedPath}#${symbolName}`) || null;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Check if a symbol's current content matches what was stored.
|
|
157
|
+
* @param {string} resolvedPath - Absolute path to the file
|
|
158
|
+
* @param {string} symbolName - Symbol name
|
|
159
|
+
* @param {string} currentCode - The symbol's current source code (from findSymbol)
|
|
160
|
+
* @returns {{ok: boolean, reason?: string, message?: string}}
|
|
161
|
+
*/
|
|
162
|
+
checkSymbolContent(resolvedPath, symbolName, currentCode) {
|
|
163
|
+
const key = `${resolvedPath}#${symbolName}`;
|
|
164
|
+
const record = this._contentRecords.get(key);
|
|
165
|
+
|
|
166
|
+
if (!record) {
|
|
167
|
+
// No record for this specific symbol — allow (file was seen, this is first edit)
|
|
168
|
+
return { ok: true };
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
const currentHash = computeContentHash(currentCode);
|
|
172
|
+
if (currentHash === record.contentHash) {
|
|
173
|
+
return { ok: true };
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
return {
|
|
177
|
+
ok: false,
|
|
178
|
+
reason: 'stale',
|
|
179
|
+
message: `Symbol "${symbolName}" has changed since you last read it (hash: ${record.contentHash} → ${currentHash}).`
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Track files from extract target strings.
|
|
185
|
+
* Marks each file as seen. For #symbol targets, calls findSymbol to get and hash the code.
|
|
186
|
+
* @param {string[]} targets - Array of extract targets (e.g. ["file.js#fn", "file.js:10-20"])
|
|
187
|
+
* @param {string} cwd - Working directory for resolving relative paths
|
|
188
|
+
*/
|
|
189
|
+
async trackFilesFromExtract(targets, cwd) {
|
|
190
|
+
const seenPaths = new Set();
|
|
191
|
+
const symbolPromises = [];
|
|
192
|
+
|
|
193
|
+
for (const target of targets) {
|
|
194
|
+
const filePath = extractFilePath(target);
|
|
195
|
+
const resolved = isAbsolute(filePath) ? filePath : resolve(cwd, filePath);
|
|
196
|
+
|
|
197
|
+
// Mark file as seen (deduplicate)
|
|
198
|
+
if (!seenPaths.has(resolved)) {
|
|
199
|
+
seenPaths.add(resolved);
|
|
200
|
+
this.markFileSeen(resolved);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// For symbol targets, get the content hash
|
|
204
|
+
const symbolName = extractSymbolName(target);
|
|
205
|
+
if (symbolName) {
|
|
206
|
+
symbolPromises.push(
|
|
207
|
+
findSymbol(resolved, symbolName, cwd)
|
|
208
|
+
.then(symbolInfo => {
|
|
209
|
+
if (symbolInfo) {
|
|
210
|
+
this.trackSymbolContent(
|
|
211
|
+
resolved, symbolName, symbolInfo.code,
|
|
212
|
+
symbolInfo.startLine, symbolInfo.endLine, 'extract'
|
|
213
|
+
);
|
|
214
|
+
}
|
|
215
|
+
})
|
|
216
|
+
.catch(err => {
|
|
217
|
+
if (this.debug) {
|
|
218
|
+
console.error(`[FileTracker] Failed to track symbol "${symbolName}" in ${resolved}: ${err.message}`);
|
|
219
|
+
}
|
|
220
|
+
})
|
|
221
|
+
);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
if (symbolPromises.length > 0) {
|
|
226
|
+
await Promise.all(symbolPromises);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* Track files discovered in probe search/extract output.
|
|
232
|
+
* Parses "File: path" headers and "--- path ---" separators, marks each as "seen".
|
|
233
|
+
* @param {string} output - Probe output text
|
|
234
|
+
* @param {string} cwd - Working directory for resolving relative paths
|
|
235
|
+
*/
|
|
236
|
+
async trackFilesFromOutput(output, cwd) {
|
|
237
|
+
const paths = parseFilePathsFromOutput(output);
|
|
238
|
+
for (const filePath of paths) {
|
|
239
|
+
const resolved = isAbsolute(filePath) ? filePath : resolve(cwd, filePath);
|
|
240
|
+
this.markFileSeen(resolved);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Check if a file is safe to edit (seen-check only).
|
|
246
|
+
* Mode-specific content verification happens in edit handlers.
|
|
247
|
+
* @param {string} resolvedPath - Absolute path to the file
|
|
248
|
+
* @returns {{ok: boolean, reason?: string, message?: string}}
|
|
249
|
+
*/
|
|
250
|
+
checkBeforeEdit(resolvedPath) {
|
|
251
|
+
if (!this._seenFiles.has(resolvedPath)) {
|
|
252
|
+
return {
|
|
253
|
+
ok: false,
|
|
254
|
+
reason: 'untracked',
|
|
255
|
+
message: 'This file has not been read yet in this session. Use extract or search to read the file first.'
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
return { ok: true };
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Mark a file as seen after a successful write (backward compat).
|
|
263
|
+
* Also invalidates content records for the file since its content changed.
|
|
264
|
+
* @param {string} resolvedPath - Absolute path to the file
|
|
265
|
+
*/
|
|
266
|
+
async trackFileAfterWrite(resolvedPath) {
|
|
267
|
+
this.markFileSeen(resolvedPath);
|
|
268
|
+
this.invalidateFileRecords(resolvedPath);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
/**
|
|
272
|
+
* Update the stored hash for a symbol after a successful write.
|
|
273
|
+
* Enables chained edits to the same symbol.
|
|
274
|
+
* @param {string} resolvedPath - Absolute path to the file
|
|
275
|
+
* @param {string} symbolName - Symbol name
|
|
276
|
+
* @param {string} code - The symbol's new source code
|
|
277
|
+
* @param {number} startLine - 1-indexed start line (new position)
|
|
278
|
+
* @param {number} endLine - 1-indexed end line (new position)
|
|
279
|
+
*/
|
|
280
|
+
trackSymbolAfterWrite(resolvedPath, symbolName, code, startLine, endLine) {
|
|
281
|
+
this.trackSymbolContent(resolvedPath, symbolName, code, startLine, endLine, 'edit');
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* Remove all content records for a file.
|
|
286
|
+
* Called after non-symbol edits (text/line mode) since those change content
|
|
287
|
+
* without providing a symbol-level update.
|
|
288
|
+
* @param {string} resolvedPath - Absolute path to the file
|
|
289
|
+
*/
|
|
290
|
+
invalidateFileRecords(resolvedPath) {
|
|
291
|
+
const prefix = resolvedPath + '#';
|
|
292
|
+
for (const key of this._contentRecords.keys()) {
|
|
293
|
+
if (key.startsWith(prefix)) {
|
|
294
|
+
this._contentRecords.delete(key);
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
if (this.debug) {
|
|
298
|
+
console.error(`[FileTracker] Invalidated content records for ${resolvedPath}`);
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
/**
|
|
303
|
+
* Quick sync check if a file is being tracked (alias for isFileSeen).
|
|
304
|
+
* @param {string} resolvedPath - Absolute path to the file
|
|
305
|
+
* @returns {boolean}
|
|
306
|
+
*/
|
|
307
|
+
isTracked(resolvedPath) {
|
|
308
|
+
return this.isFileSeen(resolvedPath);
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Clear all tracking state.
|
|
313
|
+
*/
|
|
314
|
+
clear() {
|
|
315
|
+
this._seenFiles.clear();
|
|
316
|
+
this._contentRecords.clear();
|
|
317
|
+
}
|
|
318
|
+
}
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Progressive fuzzy string matching for the edit tool.
|
|
3
|
+
* Strategies are tried in order:
|
|
4
|
+
* exact (handled by caller) → line-trimmed → whitespace-normalized → indent-flexible
|
|
5
|
+
*
|
|
6
|
+
* All functions are PURE — no file I/O, no side effects.
|
|
7
|
+
* Each match function returns the ACTUAL text from the file content (not the search string),
|
|
8
|
+
* so the caller can do content.replace(matchedText, newString).
|
|
9
|
+
*
|
|
10
|
+
* @module tools/fuzzyMatch
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Try all fuzzy strategies in order. Returns first match or null.
|
|
15
|
+
* @param {string} content - Full file content
|
|
16
|
+
* @param {string} searchString - String to find
|
|
17
|
+
* @returns {{ matchedText: string, strategy: string, count: number } | null}
|
|
18
|
+
*/
|
|
19
|
+
export function findFuzzyMatch(content, searchString) {
|
|
20
|
+
// Guard: empty or whitespace-only search string
|
|
21
|
+
if (!searchString || searchString.trim().length === 0) {
|
|
22
|
+
return null;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// Normalize \r\n to \n for consistent handling
|
|
26
|
+
const normalizedContent = content.replace(/\r\n/g, '\n');
|
|
27
|
+
const normalizedSearch = searchString.replace(/\r\n/g, '\n');
|
|
28
|
+
|
|
29
|
+
const contentLines = normalizedContent.split('\n');
|
|
30
|
+
const searchLines = normalizedSearch.split('\n');
|
|
31
|
+
|
|
32
|
+
// Strategy 1: Line-trimmed
|
|
33
|
+
const trimmed = lineTrimmedMatch(contentLines, searchLines);
|
|
34
|
+
if (trimmed) return { ...trimmed, strategy: 'line-trimmed' };
|
|
35
|
+
|
|
36
|
+
// Strategy 2: Whitespace-normalized
|
|
37
|
+
const normalized = whitespaceNormalizedMatch(normalizedContent, normalizedSearch);
|
|
38
|
+
if (normalized) return { ...normalized, strategy: 'whitespace-normalized' };
|
|
39
|
+
|
|
40
|
+
// Strategy 3: Indentation-flexible
|
|
41
|
+
const indentFlex = indentFlexibleMatch(contentLines, searchLines);
|
|
42
|
+
if (indentFlex) return { ...indentFlex, strategy: 'indent-flexible' };
|
|
43
|
+
|
|
44
|
+
return null;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Line-trimmed matching: trims each line before comparing.
|
|
49
|
+
* Slides a window of searchLines.length across contentLines.
|
|
50
|
+
* If trimmed lines match, returns the actual text from content at those line positions.
|
|
51
|
+
*
|
|
52
|
+
* @param {string[]} contentLines - Lines of the full file content
|
|
53
|
+
* @param {string[]} searchLines - Lines of the search string
|
|
54
|
+
* @returns {{ matchedText: string, count: number } | null}
|
|
55
|
+
*/
|
|
56
|
+
export function lineTrimmedMatch(contentLines, searchLines) {
|
|
57
|
+
if (searchLines.length === 0) return null;
|
|
58
|
+
|
|
59
|
+
const trimmedSearchLines = searchLines.map(line => line.trim());
|
|
60
|
+
|
|
61
|
+
// If all search lines are empty after trimming, no meaningful match
|
|
62
|
+
if (trimmedSearchLines.every(line => line === '')) return null;
|
|
63
|
+
|
|
64
|
+
const windowSize = searchLines.length;
|
|
65
|
+
const matches = [];
|
|
66
|
+
|
|
67
|
+
for (let i = 0; i <= contentLines.length - windowSize; i++) {
|
|
68
|
+
let allMatch = true;
|
|
69
|
+
for (let j = 0; j < windowSize; j++) {
|
|
70
|
+
if (contentLines[i + j].trim() !== trimmedSearchLines[j]) {
|
|
71
|
+
allMatch = false;
|
|
72
|
+
break;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
if (allMatch) {
|
|
76
|
+
const matchedText = contentLines.slice(i, i + windowSize).join('\n');
|
|
77
|
+
matches.push(matchedText);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
if (matches.length === 0) return null;
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
matchedText: matches[0],
|
|
85
|
+
count: matches.length,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Whitespace-normalized matching: collapses all whitespace runs (spaces, tabs)
|
|
91
|
+
* to a single space before comparing. Returns actual text from content.
|
|
92
|
+
*
|
|
93
|
+
* Builds a character index map from normalized positions back to original positions
|
|
94
|
+
* so we can extract the actual content substring.
|
|
95
|
+
*
|
|
96
|
+
* @param {string} content - Full file content
|
|
97
|
+
* @param {string} search - Search string
|
|
98
|
+
* @returns {{ matchedText: string, count: number } | null}
|
|
99
|
+
*/
|
|
100
|
+
export function whitespaceNormalizedMatch(content, search) {
|
|
101
|
+
if (!search || search.trim().length === 0) return null;
|
|
102
|
+
|
|
103
|
+
// Build normalized content with position mapping.
|
|
104
|
+
// We normalize horizontal whitespace (spaces, tabs) to single space,
|
|
105
|
+
// but preserve newlines as meaningful structure.
|
|
106
|
+
const { normalized: normContent, indexMap: contentMap } = buildNormalizedMap(content);
|
|
107
|
+
const { normalized: normSearch } = buildNormalizedMap(search);
|
|
108
|
+
|
|
109
|
+
if (normSearch.length === 0) return null;
|
|
110
|
+
|
|
111
|
+
// Find all occurrences of normalized search in normalized content
|
|
112
|
+
const matches = [];
|
|
113
|
+
let searchStart = 0;
|
|
114
|
+
|
|
115
|
+
while (searchStart <= normContent.length - normSearch.length) {
|
|
116
|
+
const idx = normContent.indexOf(normSearch, searchStart);
|
|
117
|
+
if (idx === -1) break;
|
|
118
|
+
|
|
119
|
+
// Map normalized positions back to original content positions
|
|
120
|
+
const originalStart = contentMap[idx];
|
|
121
|
+
const originalEnd = contentMap[idx + normSearch.length - 1];
|
|
122
|
+
|
|
123
|
+
// Extract actual text from original content — include the full last character
|
|
124
|
+
// We need to find the end of the character at originalEnd
|
|
125
|
+
let actualEnd = originalEnd + 1;
|
|
126
|
+
// If the original character at originalEnd started a whitespace run that was collapsed,
|
|
127
|
+
// extend to include the full whitespace run
|
|
128
|
+
while (actualEnd < content.length && /[ \t]/.test(content[actualEnd]) && (actualEnd === originalEnd + 1 || /[ \t]/.test(content[actualEnd - 1]))) {
|
|
129
|
+
// Only extend if the next normalized position would be beyond our match
|
|
130
|
+
if (contentMap.indexOf(actualEnd) > idx + normSearch.length - 1 || contentMap.indexOf(actualEnd) === -1) {
|
|
131
|
+
break;
|
|
132
|
+
}
|
|
133
|
+
actualEnd++;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const matchedText = content.substring(originalStart, actualEnd);
|
|
137
|
+
matches.push(matchedText);
|
|
138
|
+
|
|
139
|
+
searchStart = idx + 1;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
if (matches.length === 0) return null;
|
|
143
|
+
|
|
144
|
+
return {
|
|
145
|
+
matchedText: matches[0],
|
|
146
|
+
count: matches.length,
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Build a normalized string and a map from normalized character index to original index.
|
|
152
|
+
* Collapses runs of horizontal whitespace (spaces, tabs) to a single space.
|
|
153
|
+
* Preserves newlines.
|
|
154
|
+
*
|
|
155
|
+
* @param {string} str - Original string
|
|
156
|
+
* @returns {{ normalized: string, indexMap: number[] }}
|
|
157
|
+
*/
|
|
158
|
+
function buildNormalizedMap(str) {
|
|
159
|
+
const normalized = [];
|
|
160
|
+
const indexMap = [];
|
|
161
|
+
let i = 0;
|
|
162
|
+
|
|
163
|
+
while (i < str.length) {
|
|
164
|
+
const ch = str[i];
|
|
165
|
+
|
|
166
|
+
if (ch === ' ' || ch === '\t') {
|
|
167
|
+
// Start of a whitespace run — collapse to single space
|
|
168
|
+
normalized.push(' ');
|
|
169
|
+
indexMap.push(i);
|
|
170
|
+
// Skip the rest of the whitespace run
|
|
171
|
+
while (i < str.length && (str[i] === ' ' || str[i] === '\t')) {
|
|
172
|
+
i++;
|
|
173
|
+
}
|
|
174
|
+
} else {
|
|
175
|
+
normalized.push(ch);
|
|
176
|
+
indexMap.push(i);
|
|
177
|
+
i++;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
return {
|
|
182
|
+
normalized: normalized.join(''),
|
|
183
|
+
indexMap,
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Indentation-flexible matching: strips minimum common indentation from both
|
|
189
|
+
* content window and search lines, then compares.
|
|
190
|
+
*
|
|
191
|
+
* @param {string[]} contentLines - Lines of the full file content
|
|
192
|
+
* @param {string[]} searchLines - Lines of the search string
|
|
193
|
+
* @returns {{ matchedText: string, count: number } | null}
|
|
194
|
+
*/
|
|
195
|
+
export function indentFlexibleMatch(contentLines, searchLines) {
|
|
196
|
+
if (searchLines.length === 0) return null;
|
|
197
|
+
|
|
198
|
+
// If all search lines are empty, no meaningful match
|
|
199
|
+
if (searchLines.every(line => line.trim() === '')) return null;
|
|
200
|
+
|
|
201
|
+
// Strip minimum indent from search lines
|
|
202
|
+
const searchMinIndent = getMinIndent(searchLines);
|
|
203
|
+
const strippedSearch = searchLines.map(line => stripIndent(line, searchMinIndent));
|
|
204
|
+
|
|
205
|
+
const windowSize = searchLines.length;
|
|
206
|
+
const matches = [];
|
|
207
|
+
|
|
208
|
+
for (let i = 0; i <= contentLines.length - windowSize; i++) {
|
|
209
|
+
const windowLines = contentLines.slice(i, i + windowSize);
|
|
210
|
+
const windowMinIndent = getMinIndent(windowLines);
|
|
211
|
+
const strippedWindow = windowLines.map(line => stripIndent(line, windowMinIndent));
|
|
212
|
+
|
|
213
|
+
let allMatch = true;
|
|
214
|
+
for (let j = 0; j < windowSize; j++) {
|
|
215
|
+
if (strippedWindow[j] !== strippedSearch[j]) {
|
|
216
|
+
allMatch = false;
|
|
217
|
+
break;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
if (allMatch) {
|
|
222
|
+
const matchedText = windowLines.join('\n');
|
|
223
|
+
matches.push(matchedText);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
if (matches.length === 0) return null;
|
|
228
|
+
|
|
229
|
+
return {
|
|
230
|
+
matchedText: matches[0],
|
|
231
|
+
count: matches.length,
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Get the minimum indentation level (number of leading whitespace characters)
|
|
237
|
+
* across all non-empty lines.
|
|
238
|
+
*
|
|
239
|
+
* @param {string[]} lines
|
|
240
|
+
* @returns {number}
|
|
241
|
+
*/
|
|
242
|
+
function getMinIndent(lines) {
|
|
243
|
+
let min = Infinity;
|
|
244
|
+
|
|
245
|
+
for (const line of lines) {
|
|
246
|
+
// Skip empty or whitespace-only lines for indent calculation
|
|
247
|
+
if (line.trim() === '') continue;
|
|
248
|
+
|
|
249
|
+
const match = line.match(/^([ \t]*)/);
|
|
250
|
+
if (match) {
|
|
251
|
+
min = Math.min(min, match[1].length);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
return min === Infinity ? 0 : min;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* Strip a fixed number of leading characters from a line.
|
|
260
|
+
* For empty/whitespace-only lines, return them as-is (trimmed to empty)
|
|
261
|
+
* to handle blank lines in code blocks gracefully.
|
|
262
|
+
*
|
|
263
|
+
* @param {string} line
|
|
264
|
+
* @param {number} amount - Number of leading characters to strip
|
|
265
|
+
* @returns {string}
|
|
266
|
+
*/
|
|
267
|
+
function stripIndent(line, amount) {
|
|
268
|
+
if (line.trim() === '') return '';
|
|
269
|
+
if (amount <= 0) return line;
|
|
270
|
+
return line.substring(Math.min(amount, line.length));
|
|
271
|
+
}
|