search-replace-py 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- search_replace/__init__.py +38 -0
- search_replace/apply.py +436 -0
- search_replace/errors.py +30 -0
- search_replace/fuzzy.py +80 -0
- search_replace/parser.py +220 -0
- search_replace/prompts.py +242 -0
- search_replace/types.py +22 -0
- search_replace_py-0.0.1.dist-info/METADATA +234 -0
- search_replace_py-0.0.1.dist-info/RECORD +11 -0
- search_replace_py-0.0.1.dist-info/WHEEL +4 -0
- search_replace_py-0.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from .apply import apply_diff, apply_edits
|
|
2
|
+
from .errors import (
|
|
3
|
+
ApplyError,
|
|
4
|
+
MissingFilenameError,
|
|
5
|
+
ParseError,
|
|
6
|
+
PathEscapeError,
|
|
7
|
+
SearchReplaceError,
|
|
8
|
+
)
|
|
9
|
+
from .parser import all_fences, find_original_update_blocks, parse_edit_blocks
|
|
10
|
+
from .prompts import (
|
|
11
|
+
EditBlockFencedPrompts,
|
|
12
|
+
FewShotExampleMessages,
|
|
13
|
+
get_example_messages,
|
|
14
|
+
render_system_prompt,
|
|
15
|
+
)
|
|
16
|
+
from .types import ApplyResult, DEFAULT_FENCE, EditBlock, Fence, ParseResult
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"ApplyError",
|
|
20
|
+
"ApplyResult",
|
|
21
|
+
"apply_diff",
|
|
22
|
+
"apply_edits",
|
|
23
|
+
"all_fences",
|
|
24
|
+
"DEFAULT_FENCE",
|
|
25
|
+
"EditBlock",
|
|
26
|
+
"EditBlockFencedPrompts",
|
|
27
|
+
"Fence",
|
|
28
|
+
"FewShotExampleMessages",
|
|
29
|
+
"find_original_update_blocks",
|
|
30
|
+
"get_example_messages",
|
|
31
|
+
"MissingFilenameError",
|
|
32
|
+
"parse_edit_blocks",
|
|
33
|
+
"ParseError",
|
|
34
|
+
"PathEscapeError",
|
|
35
|
+
"ParseResult",
|
|
36
|
+
"render_system_prompt",
|
|
37
|
+
"SearchReplaceError",
|
|
38
|
+
]
|
search_replace/apply.py
ADDED
|
@@ -0,0 +1,436 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Sequence
|
|
4
|
+
|
|
5
|
+
from .errors import ApplyError, ParseError, PathEscapeError
|
|
6
|
+
from .fuzzy import find_similar_lines, replace_closest_edit_distance
|
|
7
|
+
from .parser import parse_edit_blocks
|
|
8
|
+
from .types import DEFAULT_FENCE, ApplyResult, EditBlock, Fence
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def prep(content: str) -> tuple[str, list[str]]:
|
|
12
|
+
if content and not content.endswith("\n"):
|
|
13
|
+
content += "\n"
|
|
14
|
+
lines = content.splitlines(keepends=True)
|
|
15
|
+
return content, lines
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def perfect_or_whitespace(
|
|
19
|
+
whole_lines: list[str],
|
|
20
|
+
part_lines: list[str],
|
|
21
|
+
replace_lines: list[str],
|
|
22
|
+
) -> str | None:
|
|
23
|
+
# Try for a perfect match.
|
|
24
|
+
result = perfect_replace(whole_lines, part_lines, replace_lines)
|
|
25
|
+
if result:
|
|
26
|
+
return result
|
|
27
|
+
|
|
28
|
+
# Try being flexible about leading whitespace.
|
|
29
|
+
result = replace_part_with_missing_leading_whitespace(
|
|
30
|
+
whole_lines, part_lines, replace_lines
|
|
31
|
+
)
|
|
32
|
+
if result:
|
|
33
|
+
return result
|
|
34
|
+
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def perfect_replace(
|
|
39
|
+
whole_lines: list[str], part_lines: list[str], replace_lines: list[str]
|
|
40
|
+
) -> str | None:
|
|
41
|
+
part_tup = tuple(part_lines)
|
|
42
|
+
part_len = len(part_lines)
|
|
43
|
+
|
|
44
|
+
for index in range(len(whole_lines) - part_len + 1):
|
|
45
|
+
whole_tup = tuple(whole_lines[index : index + part_len])
|
|
46
|
+
if part_tup == whole_tup:
|
|
47
|
+
result = (
|
|
48
|
+
whole_lines[:index] + replace_lines + whole_lines[index + part_len :]
|
|
49
|
+
)
|
|
50
|
+
return "".join(result)
|
|
51
|
+
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def replace_most_similar_chunk(whole: str, part: str, replace: str) -> str | None:
|
|
56
|
+
"""Best efforts to find `part` lines in `whole` and replace them with `replace`."""
|
|
57
|
+
whole, whole_lines = prep(whole)
|
|
58
|
+
part, part_lines = prep(part)
|
|
59
|
+
replace, replace_lines = prep(replace)
|
|
60
|
+
|
|
61
|
+
result = perfect_or_whitespace(whole_lines, part_lines, replace_lines)
|
|
62
|
+
if result:
|
|
63
|
+
return result
|
|
64
|
+
|
|
65
|
+
# Drop leading empty line, GPT sometimes adds them spuriously (issue #25).
|
|
66
|
+
if len(part_lines) > 2 and not part_lines[0].strip():
|
|
67
|
+
skip_blank_line_part_lines = part_lines[1:]
|
|
68
|
+
result = perfect_or_whitespace(
|
|
69
|
+
whole_lines, skip_blank_line_part_lines, replace_lines
|
|
70
|
+
)
|
|
71
|
+
if result:
|
|
72
|
+
return result
|
|
73
|
+
|
|
74
|
+
# Try to handle when it elides code with ...
|
|
75
|
+
try:
|
|
76
|
+
result = try_dotdotdots(whole, part, replace)
|
|
77
|
+
if result:
|
|
78
|
+
return result
|
|
79
|
+
except ValueError:
|
|
80
|
+
pass
|
|
81
|
+
|
|
82
|
+
return None
|
|
83
|
+
# Try fuzzy matching.
|
|
84
|
+
result = replace_closest_edit_distance(whole_lines, part, part_lines, replace_lines)
|
|
85
|
+
if result:
|
|
86
|
+
return result
|
|
87
|
+
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def try_dotdotdots(whole: str, part: str, replace: str) -> str | None:
|
|
92
|
+
"""
|
|
93
|
+
See if the edit block has ... lines.
|
|
94
|
+
If not, return none.
|
|
95
|
+
|
|
96
|
+
If yes, try and do a perfect edit with the ... chunks.
|
|
97
|
+
If there's a mismatch or otherwise imperfect edit, raise ValueError.
|
|
98
|
+
|
|
99
|
+
If perfect edit succeeds, return the updated whole.
|
|
100
|
+
"""
|
|
101
|
+
dots_re = re.compile(r"(^\s*\.\.\.\n)", re.MULTILINE | re.DOTALL)
|
|
102
|
+
|
|
103
|
+
part_pieces = re.split(dots_re, part)
|
|
104
|
+
replace_pieces = re.split(dots_re, replace)
|
|
105
|
+
|
|
106
|
+
if len(part_pieces) != len(replace_pieces):
|
|
107
|
+
raise ValueError("Unpaired ... in SEARCH/REPLACE block")
|
|
108
|
+
|
|
109
|
+
if len(part_pieces) == 1:
|
|
110
|
+
# No dots in this edit block, just return None.
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
# Compare odd strings in part_pieces and replace_pieces.
|
|
114
|
+
all_dots_match = all(
|
|
115
|
+
part_pieces[i] == replace_pieces[i] for i in range(1, len(part_pieces), 2)
|
|
116
|
+
)
|
|
117
|
+
if not all_dots_match:
|
|
118
|
+
raise ValueError("Unmatched ... in SEARCH/REPLACE block")
|
|
119
|
+
|
|
120
|
+
part_pieces = [part_pieces[i] for i in range(0, len(part_pieces), 2)]
|
|
121
|
+
replace_pieces = [replace_pieces[i] for i in range(0, len(replace_pieces), 2)]
|
|
122
|
+
|
|
123
|
+
pairs = zip(part_pieces, replace_pieces)
|
|
124
|
+
for part_piece, replace_piece in pairs:
|
|
125
|
+
if not part_piece and not replace_piece:
|
|
126
|
+
continue
|
|
127
|
+
|
|
128
|
+
if not part_piece and replace_piece:
|
|
129
|
+
if not whole.endswith("\n"):
|
|
130
|
+
whole += "\n"
|
|
131
|
+
whole += replace_piece
|
|
132
|
+
continue
|
|
133
|
+
|
|
134
|
+
if whole.count(part_piece) == 0:
|
|
135
|
+
raise ValueError
|
|
136
|
+
if whole.count(part_piece) > 1:
|
|
137
|
+
raise ValueError
|
|
138
|
+
|
|
139
|
+
whole = whole.replace(part_piece, replace_piece, 1)
|
|
140
|
+
|
|
141
|
+
return whole
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def replace_part_with_missing_leading_whitespace(
|
|
145
|
+
whole_lines: list[str],
|
|
146
|
+
part_lines: list[str],
|
|
147
|
+
replace_lines: list[str],
|
|
148
|
+
) -> str | None:
|
|
149
|
+
# GPT often messes up leading whitespace.
|
|
150
|
+
# It usually does it uniformly across the ORIG and UPD blocks.
|
|
151
|
+
# Either omitting all leading whitespace, or including only some of it.
|
|
152
|
+
|
|
153
|
+
# Outdent everything in part_lines and replace_lines by the max fixed amount possible.
|
|
154
|
+
leading: list[int] = [len(p) - len(p.lstrip()) for p in part_lines if p.strip()] + [
|
|
155
|
+
len(p) - len(p.lstrip()) for p in replace_lines if p.strip()
|
|
156
|
+
]
|
|
157
|
+
|
|
158
|
+
if leading:
|
|
159
|
+
num_leading = min(leading)
|
|
160
|
+
if num_leading:
|
|
161
|
+
part_lines = [p[num_leading:] if p.strip() else p for p in part_lines]
|
|
162
|
+
replace_lines = [p[num_leading:] if p.strip() else p for p in replace_lines]
|
|
163
|
+
|
|
164
|
+
# Can we find an exact match not including the leading whitespace.
|
|
165
|
+
num_part_lines = len(part_lines)
|
|
166
|
+
|
|
167
|
+
for index in range(len(whole_lines) - num_part_lines + 1):
|
|
168
|
+
add_leading = match_but_for_leading_whitespace(
|
|
169
|
+
whole_lines[index : index + num_part_lines], part_lines
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
if add_leading is None:
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
replace_lines = [
|
|
176
|
+
add_leading + line if line.strip() else line for line in replace_lines
|
|
177
|
+
]
|
|
178
|
+
whole_lines = (
|
|
179
|
+
whole_lines[:index] + replace_lines + whole_lines[index + num_part_lines :]
|
|
180
|
+
)
|
|
181
|
+
return "".join(whole_lines)
|
|
182
|
+
|
|
183
|
+
return None
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def match_but_for_leading_whitespace(
|
|
187
|
+
whole_lines: list[str], part_lines: list[str]
|
|
188
|
+
) -> str | None:
|
|
189
|
+
num = len(whole_lines)
|
|
190
|
+
|
|
191
|
+
# Does the non-whitespace all agree?
|
|
192
|
+
if not all(whole_lines[i].lstrip() == part_lines[i].lstrip() for i in range(num)):
|
|
193
|
+
return None
|
|
194
|
+
|
|
195
|
+
# Are they all offset the same?
|
|
196
|
+
add = set(
|
|
197
|
+
whole_lines[i][: len(whole_lines[i]) - len(part_lines[i])]
|
|
198
|
+
for i in range(num)
|
|
199
|
+
if whole_lines[i].strip()
|
|
200
|
+
)
|
|
201
|
+
if len(add) != 1:
|
|
202
|
+
return None
|
|
203
|
+
|
|
204
|
+
return add.pop()
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def strip_quoted_wrapping(
|
|
208
|
+
res: str, fname: str | None = None, fence: Fence = DEFAULT_FENCE
|
|
209
|
+
) -> str:
|
|
210
|
+
"""
|
|
211
|
+
Given an input string which may have extra "wrapping" around it, remove the wrapping.
|
|
212
|
+
For example:
|
|
213
|
+
|
|
214
|
+
filename.ext
|
|
215
|
+
```
|
|
216
|
+
We just want this content
|
|
217
|
+
Not the filename and triple quotes
|
|
218
|
+
```
|
|
219
|
+
"""
|
|
220
|
+
if not res:
|
|
221
|
+
return res
|
|
222
|
+
|
|
223
|
+
res_lines = res.splitlines()
|
|
224
|
+
|
|
225
|
+
if fname and res_lines[0].strip().endswith(Path(fname).name):
|
|
226
|
+
res_lines = res_lines[1:]
|
|
227
|
+
|
|
228
|
+
if (
|
|
229
|
+
res_lines
|
|
230
|
+
and res_lines[0].startswith(fence[0])
|
|
231
|
+
and res_lines[-1].startswith(fence[1])
|
|
232
|
+
):
|
|
233
|
+
res_lines = res_lines[1:-1]
|
|
234
|
+
|
|
235
|
+
result = "\n".join(res_lines)
|
|
236
|
+
if result and result[-1] != "\n":
|
|
237
|
+
result += "\n"
|
|
238
|
+
|
|
239
|
+
return result
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def do_replace(
|
|
243
|
+
fname: str | Path,
|
|
244
|
+
content: str | None,
|
|
245
|
+
before_text: str,
|
|
246
|
+
after_text: str,
|
|
247
|
+
fence: Fence | None = None,
|
|
248
|
+
) -> str | None:
|
|
249
|
+
local_fence = fence or DEFAULT_FENCE
|
|
250
|
+
before_text = strip_quoted_wrapping(before_text, str(fname), local_fence)
|
|
251
|
+
after_text = strip_quoted_wrapping(after_text, str(fname), local_fence)
|
|
252
|
+
path = Path(fname)
|
|
253
|
+
|
|
254
|
+
# Does it want to make a new file?
|
|
255
|
+
if not path.exists() and not before_text.strip():
|
|
256
|
+
path.touch()
|
|
257
|
+
content = ""
|
|
258
|
+
|
|
259
|
+
if content is None:
|
|
260
|
+
return None
|
|
261
|
+
|
|
262
|
+
new_content: str | None
|
|
263
|
+
if not before_text.strip():
|
|
264
|
+
# Append to existing file, or start a new file.
|
|
265
|
+
new_content = content + after_text
|
|
266
|
+
else:
|
|
267
|
+
new_content = replace_most_similar_chunk(content, before_text, after_text)
|
|
268
|
+
|
|
269
|
+
return new_content
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def apply_edits(
|
|
273
|
+
edits: Sequence[EditBlock],
|
|
274
|
+
root: str | Path,
|
|
275
|
+
chat_files: Sequence[str | Path] | None = None,
|
|
276
|
+
fence: Fence = DEFAULT_FENCE,
|
|
277
|
+
dry_run: bool = False,
|
|
278
|
+
) -> ApplyResult:
|
|
279
|
+
failed: list[EditBlock] = []
|
|
280
|
+
passed: list[EditBlock] = []
|
|
281
|
+
updated_edits: list[EditBlock] = []
|
|
282
|
+
|
|
283
|
+
root_path = Path(root)
|
|
284
|
+
fallback_files = _resolve_chat_files(root_path, chat_files)
|
|
285
|
+
|
|
286
|
+
for edit in edits:
|
|
287
|
+
path = edit.path
|
|
288
|
+
original = edit.original
|
|
289
|
+
updated = edit.updated
|
|
290
|
+
|
|
291
|
+
full_path = _resolve_path(root_path, path)
|
|
292
|
+
new_content: str | None = None
|
|
293
|
+
|
|
294
|
+
if full_path.exists():
|
|
295
|
+
content = full_path.read_text(encoding="utf-8")
|
|
296
|
+
new_content = do_replace(full_path, content, original, updated, fence)
|
|
297
|
+
elif not original.strip():
|
|
298
|
+
new_content = do_replace(full_path, None, original, updated, fence)
|
|
299
|
+
|
|
300
|
+
# If the edit failed, and this is not a "create a new file" with an empty original...
|
|
301
|
+
# https://github.com/Aider-AI/aider/issues/2258
|
|
302
|
+
if not new_content and original.strip():
|
|
303
|
+
# Try patching any of the other files in the chat.
|
|
304
|
+
for candidate_file in fallback_files:
|
|
305
|
+
content = candidate_file.read_text(encoding="utf-8")
|
|
306
|
+
new_content = do_replace(
|
|
307
|
+
candidate_file, content, original, updated, fence
|
|
308
|
+
)
|
|
309
|
+
if new_content:
|
|
310
|
+
path = _make_relative(candidate_file, root_path)
|
|
311
|
+
full_path = candidate_file
|
|
312
|
+
break
|
|
313
|
+
|
|
314
|
+
updated_edit = EditBlock(path=path, original=original, updated=updated)
|
|
315
|
+
updated_edits.append(updated_edit)
|
|
316
|
+
|
|
317
|
+
if new_content:
|
|
318
|
+
if not dry_run:
|
|
319
|
+
full_path.write_text(new_content, encoding="utf-8")
|
|
320
|
+
passed.append(edit)
|
|
321
|
+
else:
|
|
322
|
+
failed.append(edit)
|
|
323
|
+
|
|
324
|
+
if not failed:
|
|
325
|
+
return ApplyResult(updated_edits=updated_edits)
|
|
326
|
+
|
|
327
|
+
blocks = "block" if len(failed) == 1 else "blocks"
|
|
328
|
+
result = f"# {len(failed)} SEARCH/REPLACE {blocks} failed to match!\n"
|
|
329
|
+
for edit in failed:
|
|
330
|
+
path = edit.path
|
|
331
|
+
original = edit.original
|
|
332
|
+
updated = edit.updated
|
|
333
|
+
|
|
334
|
+
full_path = _resolve_path(root_path, path)
|
|
335
|
+
content = full_path.read_text(encoding="utf-8")
|
|
336
|
+
|
|
337
|
+
result += f"""
|
|
338
|
+
## SearchReplaceNoExactMatch: This SEARCH block failed to exactly match lines in {path}
|
|
339
|
+
<<<<<<< SEARCH
|
|
340
|
+
{original}=======
|
|
341
|
+
{updated}>>>>>>> REPLACE
|
|
342
|
+
|
|
343
|
+
"""
|
|
344
|
+
did_you_mean = find_similar_lines(original, content)
|
|
345
|
+
if did_you_mean:
|
|
346
|
+
result += f"""Did you mean to match some of these actual lines from {path}?
|
|
347
|
+
|
|
348
|
+
{fence[0]}
|
|
349
|
+
{did_you_mean}
|
|
350
|
+
{fence[1]}
|
|
351
|
+
|
|
352
|
+
"""
|
|
353
|
+
|
|
354
|
+
if updated in content and updated:
|
|
355
|
+
result += f"""Are you sure you need this SEARCH/REPLACE block?
|
|
356
|
+
The REPLACE lines are already in {path}!
|
|
357
|
+
|
|
358
|
+
"""
|
|
359
|
+
|
|
360
|
+
result += (
|
|
361
|
+
"The SEARCH section must exactly match an existing block of lines including all white"
|
|
362
|
+
" space, comments, indentation, docstrings, etc\n"
|
|
363
|
+
)
|
|
364
|
+
if passed:
|
|
365
|
+
passed_blocks = "block" if len(passed) == 1 else "blocks"
|
|
366
|
+
if dry_run:
|
|
367
|
+
result += f"""
|
|
368
|
+
# The other {len(passed)} SEARCH/REPLACE {passed_blocks} would apply successfully.
|
|
369
|
+
"""
|
|
370
|
+
else:
|
|
371
|
+
result += f"""
|
|
372
|
+
# The other {len(passed)} SEARCH/REPLACE {passed_blocks} were applied successfully.
|
|
373
|
+
Don't re-send them.
|
|
374
|
+
Just reply with fixed versions of the {blocks} above that failed to match.
|
|
375
|
+
"""
|
|
376
|
+
|
|
377
|
+
raise ApplyError(
|
|
378
|
+
message=result, failed=failed, passed=passed, updated_edits=updated_edits
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def _resolve_path(root_path: Path, path: str | Path) -> Path:
|
|
383
|
+
resolved_root = root_path.resolve()
|
|
384
|
+
file_path = Path(path)
|
|
385
|
+
if file_path.is_absolute():
|
|
386
|
+
resolved_path = file_path.resolve()
|
|
387
|
+
else:
|
|
388
|
+
resolved_path = (resolved_root / file_path).resolve()
|
|
389
|
+
|
|
390
|
+
try:
|
|
391
|
+
resolved_path.relative_to(resolved_root)
|
|
392
|
+
except ValueError as exc:
|
|
393
|
+
raise PathEscapeError(
|
|
394
|
+
f"Refusing to edit path '{path}' because it resolves outside root "
|
|
395
|
+
f"'{resolved_root}'."
|
|
396
|
+
) from exc
|
|
397
|
+
|
|
398
|
+
return resolved_path
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def _resolve_chat_files(
|
|
402
|
+
root_path: Path,
|
|
403
|
+
chat_files: Sequence[str | Path] | None,
|
|
404
|
+
) -> list[Path]:
|
|
405
|
+
if chat_files is None:
|
|
406
|
+
return []
|
|
407
|
+
|
|
408
|
+
resolved_paths: list[Path] = []
|
|
409
|
+
for chat_file in chat_files:
|
|
410
|
+
resolved_paths.append(_resolve_path(root_path, chat_file))
|
|
411
|
+
return resolved_paths
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def _make_relative(path: Path, root_path: Path) -> str:
|
|
415
|
+
try:
|
|
416
|
+
return str(path.relative_to(root_path))
|
|
417
|
+
except ValueError:
|
|
418
|
+
return str(path)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def apply_diff(
|
|
422
|
+
llm_response: str,
|
|
423
|
+
root: str | Path,
|
|
424
|
+
chat_files: Sequence[str | Path] | None = None,
|
|
425
|
+
fence: Fence = DEFAULT_FENCE,
|
|
426
|
+
) -> ApplyResult:
|
|
427
|
+
"""Parse SEARCH/REPLACE blocks from an LLM response and apply them to disk.
|
|
428
|
+
|
|
429
|
+
Convenience wrapper around ``parse_edit_blocks`` + ``apply_edits``.
|
|
430
|
+
Raises ``ParseError`` if the response contains no valid blocks or has
|
|
431
|
+
malformed syntax, and ``ApplyError`` if one or more blocks fail to match.
|
|
432
|
+
"""
|
|
433
|
+
result = parse_edit_blocks(llm_response, fence=fence)
|
|
434
|
+
if not result.edits:
|
|
435
|
+
raise ParseError("No SEARCH/REPLACE blocks found in the LLM response.")
|
|
436
|
+
return apply_edits(result.edits, root=root, chat_files=chat_files, fence=fence)
|
search_replace/errors.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from .types import EditBlock
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SearchReplaceError(ValueError):
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ParseError(SearchReplaceError):
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class MissingFilenameError(ParseError):
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class PathEscapeError(SearchReplaceError):
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(slots=True)
|
|
23
|
+
class ApplyError(SearchReplaceError):
|
|
24
|
+
message: str
|
|
25
|
+
failed: list[EditBlock]
|
|
26
|
+
passed: list[EditBlock]
|
|
27
|
+
updated_edits: list[EditBlock]
|
|
28
|
+
|
|
29
|
+
def __str__(self) -> str:
|
|
30
|
+
return self.message
|
search_replace/fuzzy.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import math
|
|
2
|
+
from difflib import SequenceMatcher
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def replace_closest_edit_distance(
|
|
6
|
+
whole_lines: list[str],
|
|
7
|
+
part: str,
|
|
8
|
+
part_lines: list[str],
|
|
9
|
+
replace_lines: list[str],
|
|
10
|
+
) -> str | None:
|
|
11
|
+
similarity_thresh = 0.8
|
|
12
|
+
|
|
13
|
+
max_similarity = 0.0
|
|
14
|
+
most_similar_chunk_start = -1
|
|
15
|
+
most_similar_chunk_end = -1
|
|
16
|
+
|
|
17
|
+
scale = 0.1
|
|
18
|
+
min_len = math.floor(len(part_lines) * (1 - scale))
|
|
19
|
+
max_len = math.ceil(len(part_lines) * (1 + scale))
|
|
20
|
+
|
|
21
|
+
for length in range(min_len, max_len):
|
|
22
|
+
for i in range(len(whole_lines) - length + 1):
|
|
23
|
+
chunk_lines = whole_lines[i : i + length]
|
|
24
|
+
chunk = "".join(chunk_lines)
|
|
25
|
+
|
|
26
|
+
similarity = SequenceMatcher(None, chunk, part).ratio()
|
|
27
|
+
|
|
28
|
+
if similarity > max_similarity and similarity:
|
|
29
|
+
max_similarity = similarity
|
|
30
|
+
most_similar_chunk_start = i
|
|
31
|
+
most_similar_chunk_end = i + length
|
|
32
|
+
|
|
33
|
+
if max_similarity < similarity_thresh:
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
modified_whole = (
|
|
37
|
+
whole_lines[:most_similar_chunk_start]
|
|
38
|
+
+ replace_lines
|
|
39
|
+
+ whole_lines[most_similar_chunk_end:]
|
|
40
|
+
)
|
|
41
|
+
return "".join(modified_whole)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def find_similar_lines(
|
|
45
|
+
search_lines: str, content_lines: str, threshold: float = 0.6
|
|
46
|
+
) -> str:
|
|
47
|
+
search_lines_list = search_lines.splitlines()
|
|
48
|
+
content_lines_list = content_lines.splitlines()
|
|
49
|
+
|
|
50
|
+
best_ratio = 0.0
|
|
51
|
+
best_match: list[str] | None = None
|
|
52
|
+
best_match_i = -1
|
|
53
|
+
|
|
54
|
+
for i in range(len(content_lines_list) - len(search_lines_list) + 1):
|
|
55
|
+
chunk = content_lines_list[i : i + len(search_lines_list)]
|
|
56
|
+
ratio = SequenceMatcher(None, search_lines_list, chunk).ratio()
|
|
57
|
+
if ratio > best_ratio:
|
|
58
|
+
best_ratio = ratio
|
|
59
|
+
best_match = chunk
|
|
60
|
+
best_match_i = i
|
|
61
|
+
|
|
62
|
+
if best_ratio < threshold or best_match is None:
|
|
63
|
+
return ""
|
|
64
|
+
|
|
65
|
+
if (
|
|
66
|
+
best_match
|
|
67
|
+
and search_lines_list
|
|
68
|
+
and best_match[0] == search_lines_list[0]
|
|
69
|
+
and best_match[-1] == search_lines_list[-1]
|
|
70
|
+
):
|
|
71
|
+
return "\n".join(best_match)
|
|
72
|
+
|
|
73
|
+
context_lines = 5
|
|
74
|
+
best_match_end = min(
|
|
75
|
+
len(content_lines_list), best_match_i + len(search_lines_list) + context_lines
|
|
76
|
+
)
|
|
77
|
+
best_match_i = max(0, best_match_i - context_lines)
|
|
78
|
+
|
|
79
|
+
best = content_lines_list[best_match_i:best_match_end]
|
|
80
|
+
return "\n".join(best)
|
search_replace/parser.py
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
import difflib
|
|
2
|
+
import re
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Iterator, Sequence, TypeAlias
|
|
5
|
+
|
|
6
|
+
from .errors import MissingFilenameError, ParseError
|
|
7
|
+
from .types import DEFAULT_FENCE, EditBlock, Fence, ParseResult
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def wrap_fence(name: str) -> Fence:
|
|
11
|
+
return f"<{name}>", f"</{name}>"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
all_fences = [
|
|
15
|
+
("`" * 3, "`" * 3),
|
|
16
|
+
("`" * 4, "`" * 4), # LLMs ignore and revert to triple-backtick, causing #2879
|
|
17
|
+
wrap_fence("source"),
|
|
18
|
+
wrap_fence("code"),
|
|
19
|
+
wrap_fence("pre"),
|
|
20
|
+
wrap_fence("codeblock"),
|
|
21
|
+
wrap_fence("sourcecode"),
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
HEAD = r"^<{5,9} SEARCH>?\s*$"
|
|
26
|
+
DIVIDER = r"^={5,9}\s*$"
|
|
27
|
+
UPDATED = r"^>{5,9} REPLACE\s*$"
|
|
28
|
+
|
|
29
|
+
HEAD_ERR = "<<<<<<< SEARCH"
|
|
30
|
+
DIVIDER_ERR = "======="
|
|
31
|
+
UPDATED_ERR = ">>>>>>> REPLACE"
|
|
32
|
+
|
|
33
|
+
separators = "|".join([HEAD, DIVIDER, UPDATED])
|
|
34
|
+
split_re = re.compile(r"^((?:" + separators + r")[ ]*\n)", re.MULTILINE | re.DOTALL)
|
|
35
|
+
|
|
36
|
+
missing_filename_err = (
|
|
37
|
+
"Bad/missing filename. The filename must be alone on the line before the opening fence"
|
|
38
|
+
" {fence[0]}"
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# Always be willing to treat triple-backticks as a fence when searching for filenames.
|
|
42
|
+
triple_backticks = "`" * 3
|
|
43
|
+
|
|
44
|
+
ParsedEditBlock: TypeAlias = tuple[str, str, str]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def strip_filename(filename: str, fence: Fence) -> str | None:
|
|
48
|
+
filename = filename.strip()
|
|
49
|
+
|
|
50
|
+
if filename == "...":
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
start_fence = fence[0]
|
|
54
|
+
if filename.startswith(start_fence):
|
|
55
|
+
candidate = filename[len(start_fence) :]
|
|
56
|
+
if candidate and ("." in candidate or "/" in candidate):
|
|
57
|
+
return candidate
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
if filename.startswith(triple_backticks):
|
|
61
|
+
candidate = filename[len(triple_backticks) :]
|
|
62
|
+
if candidate and ("." in candidate or "/" in candidate):
|
|
63
|
+
return candidate
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
filename = filename.rstrip(":")
|
|
67
|
+
filename = filename.lstrip("#")
|
|
68
|
+
filename = filename.strip()
|
|
69
|
+
filename = filename.strip("`")
|
|
70
|
+
filename = filename.strip("*")
|
|
71
|
+
|
|
72
|
+
# https://github.com/Aider-AI/aider/issues/1158
|
|
73
|
+
# filename = filename.replace("\\_", "_")
|
|
74
|
+
return filename
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def find_filename(
|
|
78
|
+
lines: list[str], fence: Fence, valid_fnames: Sequence[str] | None
|
|
79
|
+
) -> str | None:
|
|
80
|
+
"""
|
|
81
|
+
Deepseek Coder v2 has been doing this:
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
word_count.py
|
|
85
|
+
```
|
|
86
|
+
```python
|
|
87
|
+
<<<<<<< SEARCH
|
|
88
|
+
...
|
|
89
|
+
|
|
90
|
+
This is a more flexible search back for filenames.
|
|
91
|
+
"""
|
|
92
|
+
if valid_fnames is None:
|
|
93
|
+
valid_fnames = []
|
|
94
|
+
|
|
95
|
+
# Go back through the 3 preceding lines.
|
|
96
|
+
lines.reverse()
|
|
97
|
+
lines = lines[:3]
|
|
98
|
+
|
|
99
|
+
filenames: list[str] = []
|
|
100
|
+
for line in lines:
|
|
101
|
+
filename = strip_filename(line, fence)
|
|
102
|
+
if filename:
|
|
103
|
+
filenames.append(filename)
|
|
104
|
+
|
|
105
|
+
# Only continue as long as we keep seeing fences.
|
|
106
|
+
if not line.startswith(fence[0]) and not line.startswith(triple_backticks):
|
|
107
|
+
break
|
|
108
|
+
|
|
109
|
+
if not filenames:
|
|
110
|
+
return None
|
|
111
|
+
|
|
112
|
+
# Check for exact match first.
|
|
113
|
+
for fname in filenames:
|
|
114
|
+
if fname in valid_fnames:
|
|
115
|
+
return fname
|
|
116
|
+
|
|
117
|
+
# Check for partial match (basename match).
|
|
118
|
+
for fname in filenames:
|
|
119
|
+
for valid_name in valid_fnames:
|
|
120
|
+
if fname == Path(valid_name).name:
|
|
121
|
+
return valid_name
|
|
122
|
+
|
|
123
|
+
# Perform fuzzy matching with valid_fnames.
|
|
124
|
+
for fname in filenames:
|
|
125
|
+
close_matches = difflib.get_close_matches(fname, valid_fnames, n=1, cutoff=0.8)
|
|
126
|
+
if len(close_matches) == 1:
|
|
127
|
+
return close_matches[0]
|
|
128
|
+
|
|
129
|
+
# If no fuzzy match, look for a file w/extension.
|
|
130
|
+
for fname in filenames:
|
|
131
|
+
if "." in fname:
|
|
132
|
+
return fname
|
|
133
|
+
|
|
134
|
+
if filenames:
|
|
135
|
+
return filenames[0]
|
|
136
|
+
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def find_original_update_blocks(
|
|
141
|
+
content: str,
|
|
142
|
+
fence: Fence = DEFAULT_FENCE,
|
|
143
|
+
valid_fnames: Sequence[str] | None = None,
|
|
144
|
+
) -> Iterator[ParsedEditBlock]:
|
|
145
|
+
lines = content.splitlines(keepends=True)
|
|
146
|
+
i = 0
|
|
147
|
+
current_filename: str | None = None
|
|
148
|
+
|
|
149
|
+
head_pattern = re.compile(HEAD)
|
|
150
|
+
divider_pattern = re.compile(DIVIDER)
|
|
151
|
+
updated_pattern = re.compile(UPDATED)
|
|
152
|
+
|
|
153
|
+
while i < len(lines):
|
|
154
|
+
line = lines[i]
|
|
155
|
+
|
|
156
|
+
if head_pattern.match(line.strip()):
|
|
157
|
+
try:
|
|
158
|
+
# If next line after HEAD exists and is DIVIDER, it's a new file.
|
|
159
|
+
if i + 1 < len(lines) and divider_pattern.match(lines[i + 1].strip()):
|
|
160
|
+
filename = find_filename(lines[max(0, i - 3) : i], fence, None)
|
|
161
|
+
else:
|
|
162
|
+
filename = find_filename(
|
|
163
|
+
lines[max(0, i - 3) : i], fence, valid_fnames
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
if not filename:
|
|
167
|
+
if current_filename:
|
|
168
|
+
filename = current_filename
|
|
169
|
+
else:
|
|
170
|
+
raise MissingFilenameError(
|
|
171
|
+
missing_filename_err.format(fence=fence)
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
current_filename = filename
|
|
175
|
+
|
|
176
|
+
original_text: list[str] = []
|
|
177
|
+
i += 1
|
|
178
|
+
while i < len(lines) and not divider_pattern.match(lines[i].strip()):
|
|
179
|
+
original_text.append(lines[i])
|
|
180
|
+
i += 1
|
|
181
|
+
|
|
182
|
+
if i >= len(lines) or not divider_pattern.match(lines[i].strip()):
|
|
183
|
+
raise ParseError(f"Expected `{DIVIDER_ERR}`")
|
|
184
|
+
|
|
185
|
+
updated_text: list[str] = []
|
|
186
|
+
i += 1
|
|
187
|
+
while i < len(lines) and not (
|
|
188
|
+
updated_pattern.match(lines[i].strip())
|
|
189
|
+
or divider_pattern.match(lines[i].strip())
|
|
190
|
+
):
|
|
191
|
+
updated_text.append(lines[i])
|
|
192
|
+
i += 1
|
|
193
|
+
|
|
194
|
+
if i >= len(lines) or not (
|
|
195
|
+
updated_pattern.match(lines[i].strip())
|
|
196
|
+
or divider_pattern.match(lines[i].strip())
|
|
197
|
+
):
|
|
198
|
+
raise ParseError(f"Expected `{UPDATED_ERR}` or `{DIVIDER_ERR}`")
|
|
199
|
+
|
|
200
|
+
yield filename, "".join(original_text), "".join(updated_text)
|
|
201
|
+
except ValueError as exc:
|
|
202
|
+
processed = "".join(lines[: i + 1])
|
|
203
|
+
err = exc.args[0]
|
|
204
|
+
raise ParseError(f"{processed}\n^^^ {err}") from exc
|
|
205
|
+
|
|
206
|
+
i += 1
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def parse_edit_blocks(
|
|
210
|
+
content: str,
|
|
211
|
+
fence: Fence = DEFAULT_FENCE,
|
|
212
|
+
valid_fnames: Sequence[str] | None = None,
|
|
213
|
+
) -> ParseResult:
|
|
214
|
+
edits = [
|
|
215
|
+
EditBlock(path=block[0], original=block[1], updated=block[2])
|
|
216
|
+
for block in find_original_update_blocks(
|
|
217
|
+
content, fence=fence, valid_fnames=valid_fnames
|
|
218
|
+
)
|
|
219
|
+
]
|
|
220
|
+
return ParseResult(edits=edits)
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
# flake8: noqa: E501
|
|
2
|
+
|
|
3
|
+
from typing import NamedTuple
|
|
4
|
+
|
|
5
|
+
from .types import DEFAULT_FENCE, Fence
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class FewShotExampleMessages(NamedTuple):
|
|
9
|
+
"""Two-turn few-shot examples that demonstrate the SEARCH/REPLACE format.
|
|
10
|
+
|
|
11
|
+
Include these at the *beginning* of the conversation history (before the real
|
|
12
|
+
user request) to improve format reliability. Models that see worked examples
|
|
13
|
+
in context produce significantly better-structured diffs.
|
|
14
|
+
|
|
15
|
+
Fields are plain strings — the caller maps them into whatever message type
|
|
16
|
+
their framework expects (e.g. Pydantic AI ``ModelRequest``/``ModelResponse``,
|
|
17
|
+
OpenAI ``{"role": "user", ...}`` dicts, etc.).
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
first_user_message: str
|
|
21
|
+
first_assistant_message: str
|
|
22
|
+
second_user_message: str
|
|
23
|
+
second_assistant_message: str
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class EditBlockFencedPrompts:
|
|
27
|
+
main_system = """Act as an expert software developer.
|
|
28
|
+
Always use best practices when coding.
|
|
29
|
+
Respect and use existing conventions, libraries, etc that are already present in the code base.
|
|
30
|
+
{final_reminders}
|
|
31
|
+
Take requests for changes to the supplied code.
|
|
32
|
+
If the request is ambiguous, ask questions.
|
|
33
|
+
|
|
34
|
+
Once you understand the request you MUST:
|
|
35
|
+
|
|
36
|
+
1. Decide if you need to propose *SEARCH/REPLACE* edits to any files that haven't been added to the chat. You can create new files without asking!
|
|
37
|
+
|
|
38
|
+
But if you need to propose edits to existing files not already added to the chat, you *MUST* tell the user their full path names and ask them to *add the files to the chat*.
|
|
39
|
+
End your reply and wait for their approval.
|
|
40
|
+
You can keep asking if you then decide you need to edit more files.
|
|
41
|
+
|
|
42
|
+
2. Think step-by-step and explain the needed changes in a few short sentences.
|
|
43
|
+
|
|
44
|
+
3. Describe each change with a *SEARCH/REPLACE block* per the examples below.
|
|
45
|
+
|
|
46
|
+
All changes to files must use this *SEARCH/REPLACE block* format.
|
|
47
|
+
ONLY EVER RETURN CODE IN A *SEARCH/REPLACE BLOCK*!
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
system_reminder = """
|
|
51
|
+
# *SEARCH/REPLACE block* Rules:
|
|
52
|
+
|
|
53
|
+
Every *SEARCH/REPLACE block* must use this format:
|
|
54
|
+
1. The opening fence and code language, eg: {fence[0]}python
|
|
55
|
+
2. The *FULL* file path alone on a line, verbatim. No bold asterisks, no quotes around it, no escaping of characters, etc.
|
|
56
|
+
3. The start of search block: <<<<<<< SEARCH
|
|
57
|
+
4. A contiguous chunk of lines to search for in the existing source code
|
|
58
|
+
5. The dividing line: =======
|
|
59
|
+
6. The lines to replace into the source code
|
|
60
|
+
7. The end of the replace block: >>>>>>> REPLACE
|
|
61
|
+
8. The closing fence: {fence[1]}
|
|
62
|
+
|
|
63
|
+
Use the *FULL* file path, as shown to you by the user.
|
|
64
|
+
{quad_backtick_reminder}
|
|
65
|
+
Every *SEARCH* section must *EXACTLY MATCH* the existing file content, character for character, including all comments, docstrings, etc.
|
|
66
|
+
If the file contains code or other data wrapped/escaped in json/xml/quotes or other containers, you need to propose edits to the literal contents of the file, including the container markup.
|
|
67
|
+
|
|
68
|
+
*SEARCH/REPLACE* blocks will *only* replace the first match occurrence.
|
|
69
|
+
Including multiple unique *SEARCH/REPLACE* blocks if needed.
|
|
70
|
+
Include enough lines in each SEARCH section to uniquely match each set of lines that need to change.
|
|
71
|
+
|
|
72
|
+
Keep *SEARCH/REPLACE* blocks concise.
|
|
73
|
+
Break large *SEARCH/REPLACE* blocks into a series of smaller blocks that each change a small portion of the file.
|
|
74
|
+
Include just the changing lines, and a few surrounding lines if needed for uniqueness.
|
|
75
|
+
Do not include long runs of unchanging lines in *SEARCH/REPLACE* blocks.
|
|
76
|
+
|
|
77
|
+
Only create *SEARCH/REPLACE* blocks for files that the user has added to the chat!
|
|
78
|
+
|
|
79
|
+
To move code within a file, use 2 *SEARCH/REPLACE* blocks: 1 to delete it from its current location, 1 to insert it in the new location.
|
|
80
|
+
|
|
81
|
+
Pay attention to which filenames the user wants you to edit, especially if they are asking you to create a new file.
|
|
82
|
+
|
|
83
|
+
If you want to put code in a new file, use a *SEARCH/REPLACE block* with:
|
|
84
|
+
- A new file path, including dir name if needed
|
|
85
|
+
- An empty `SEARCH` section
|
|
86
|
+
- The new file's contents in the `REPLACE` section
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
If the user just says something like "ok" or "go ahead" or "do that" they probably want you to make SEARCH/REPLACE blocks for the code changes you just proposed.
|
|
90
|
+
The user will say when they've applied your edits. If they haven't explicitly confirmed the edits have been applied, they probably want proper SEARCH/REPLACE blocks.
|
|
91
|
+
|
|
92
|
+
{final_reminders}
|
|
93
|
+
ONLY EVER RETURN CODE IN A *SEARCH/REPLACE BLOCK*!
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
example_messages = [
|
|
97
|
+
dict(
|
|
98
|
+
role="user",
|
|
99
|
+
content="Change get_factorial() to use math.factorial",
|
|
100
|
+
),
|
|
101
|
+
dict(
|
|
102
|
+
role="assistant",
|
|
103
|
+
content="""To make this change we need to modify `mathweb/flask/app.py` to:
|
|
104
|
+
|
|
105
|
+
1. Import the math package.
|
|
106
|
+
2. Remove the existing factorial() function.
|
|
107
|
+
3. Update get_factorial() to call math.factorial instead.
|
|
108
|
+
|
|
109
|
+
Here are the *SEARCH/REPLACE* blocks:
|
|
110
|
+
|
|
111
|
+
{fence[0]}python
|
|
112
|
+
mathweb/flask/app.py
|
|
113
|
+
<<<<<<< SEARCH
|
|
114
|
+
from flask import Flask
|
|
115
|
+
=======
|
|
116
|
+
import math
|
|
117
|
+
from flask import Flask
|
|
118
|
+
>>>>>>> REPLACE
|
|
119
|
+
{fence[1]}
|
|
120
|
+
|
|
121
|
+
{fence[0]}python
|
|
122
|
+
mathweb/flask/app.py
|
|
123
|
+
<<<<<<< SEARCH
|
|
124
|
+
def factorial(n):
|
|
125
|
+
"compute factorial"
|
|
126
|
+
|
|
127
|
+
if n == 0:
|
|
128
|
+
return 1
|
|
129
|
+
else:
|
|
130
|
+
return n * factorial(n-1)
|
|
131
|
+
|
|
132
|
+
=======
|
|
133
|
+
>>>>>>> REPLACE
|
|
134
|
+
{fence[1]}
|
|
135
|
+
|
|
136
|
+
{fence[0]}python
|
|
137
|
+
mathweb/flask/app.py
|
|
138
|
+
<<<<<<< SEARCH
|
|
139
|
+
return str(factorial(n))
|
|
140
|
+
=======
|
|
141
|
+
return str(math.factorial(n))
|
|
142
|
+
>>>>>>> REPLACE
|
|
143
|
+
{fence[1]}
|
|
144
|
+
""",
|
|
145
|
+
),
|
|
146
|
+
dict(
|
|
147
|
+
role="user",
|
|
148
|
+
content="Refactor hello() into its own file.",
|
|
149
|
+
),
|
|
150
|
+
dict(
|
|
151
|
+
role="assistant",
|
|
152
|
+
content="""To make this change we need to modify `main.py` and make a new file `hello.py`:
|
|
153
|
+
|
|
154
|
+
1. Make a new hello.py file with hello() in it.
|
|
155
|
+
2. Remove hello() from main.py and replace it with an import.
|
|
156
|
+
|
|
157
|
+
Here are the *SEARCH/REPLACE* blocks:
|
|
158
|
+
|
|
159
|
+
{fence[0]}python
|
|
160
|
+
hello.py
|
|
161
|
+
<<<<<<< SEARCH
|
|
162
|
+
=======
|
|
163
|
+
def hello():
|
|
164
|
+
"print a greeting"
|
|
165
|
+
|
|
166
|
+
print("hello")
|
|
167
|
+
>>>>>>> REPLACE
|
|
168
|
+
{fence[1]}
|
|
169
|
+
|
|
170
|
+
{fence[0]}python
|
|
171
|
+
main.py
|
|
172
|
+
<<<<<<< SEARCH
|
|
173
|
+
def hello():
|
|
174
|
+
"print a greeting"
|
|
175
|
+
|
|
176
|
+
print("hello")
|
|
177
|
+
=======
|
|
178
|
+
from hello import hello
|
|
179
|
+
>>>>>>> REPLACE
|
|
180
|
+
{fence[1]}
|
|
181
|
+
""",
|
|
182
|
+
),
|
|
183
|
+
]
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def render_prompt(
|
|
187
|
+
template: str,
|
|
188
|
+
*,
|
|
189
|
+
fence: Fence = DEFAULT_FENCE,
|
|
190
|
+
final_reminders: str = "",
|
|
191
|
+
quad_backtick_reminder: str = "",
|
|
192
|
+
) -> str:
|
|
193
|
+
return template.format(
|
|
194
|
+
fence=fence,
|
|
195
|
+
final_reminders=final_reminders,
|
|
196
|
+
quad_backtick_reminder=quad_backtick_reminder,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def render_system_prompt(
|
|
201
|
+
*,
|
|
202
|
+
fence: Fence = DEFAULT_FENCE,
|
|
203
|
+
final_reminders: str = "",
|
|
204
|
+
quad_backtick_reminder: str = "",
|
|
205
|
+
) -> str:
|
|
206
|
+
"""Return the fully rendered system prompt string for the fenced editblock format.
|
|
207
|
+
|
|
208
|
+
The returned string is plain text — the caller decides how to include it in
|
|
209
|
+
their message list (e.g. as the ``content`` of a ``{"role": "system", ...}``
|
|
210
|
+
dict, or as the ``instructions`` argument of a Pydantic AI agent).
|
|
211
|
+
"""
|
|
212
|
+
body = render_prompt(
|
|
213
|
+
EditBlockFencedPrompts.main_system,
|
|
214
|
+
fence=fence,
|
|
215
|
+
final_reminders=final_reminders,
|
|
216
|
+
quad_backtick_reminder=quad_backtick_reminder,
|
|
217
|
+
)
|
|
218
|
+
reminder = render_prompt(
|
|
219
|
+
EditBlockFencedPrompts.system_reminder,
|
|
220
|
+
fence=fence,
|
|
221
|
+
final_reminders=final_reminders,
|
|
222
|
+
quad_backtick_reminder=quad_backtick_reminder,
|
|
223
|
+
)
|
|
224
|
+
return body + reminder
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def get_example_messages(*, fence: Fence = DEFAULT_FENCE) -> FewShotExampleMessages:
|
|
228
|
+
"""Return the two-turn few-shot examples with all fence placeholders expanded.
|
|
229
|
+
|
|
230
|
+
Prepend these to the conversation history (before the real user request) to
|
|
231
|
+
improve format reliability. See ``FewShotExampleMessages`` for field names.
|
|
232
|
+
"""
|
|
233
|
+
expanded = [
|
|
234
|
+
msg["content"].format(fence=fence)
|
|
235
|
+
for msg in EditBlockFencedPrompts.example_messages
|
|
236
|
+
]
|
|
237
|
+
return FewShotExampleMessages(
|
|
238
|
+
first_user_message=expanded[0],
|
|
239
|
+
first_assistant_message=expanded[1],
|
|
240
|
+
second_user_message=expanded[2],
|
|
241
|
+
second_assistant_message=expanded[3],
|
|
242
|
+
)
|
search_replace/types.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import TypeAlias
|
|
3
|
+
|
|
4
|
+
Fence: TypeAlias = tuple[str, str]
|
|
5
|
+
DEFAULT_FENCE: Fence = ("`" * 3, "`" * 3)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(frozen=True, slots=True)
|
|
9
|
+
class EditBlock:
|
|
10
|
+
path: str
|
|
11
|
+
original: str
|
|
12
|
+
updated: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True, slots=True)
|
|
16
|
+
class ParseResult:
|
|
17
|
+
edits: list[EditBlock]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(frozen=True, slots=True)
|
|
21
|
+
class ApplyResult:
|
|
22
|
+
updated_edits: list[EditBlock]
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: search-replace-py
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Parse and apply Aider-style SEARCH/REPLACE patch blocks to files
|
|
5
|
+
Project-URL: Homepage, https://github.com/marcius-llmus/search-replace-py
|
|
6
|
+
Project-URL: Repository, https://github.com/marcius-llmus/search-replace-py
|
|
7
|
+
Project-URL: Issues, https://github.com/marcius-llmus/search-replace-py/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/marcius-llmus/search-replace-py/releases
|
|
9
|
+
Author: marcin
|
|
10
|
+
License: MIT License
|
|
11
|
+
|
|
12
|
+
Copyright (c) 2026 marcin
|
|
13
|
+
|
|
14
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
15
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
16
|
+
in the Software without restriction, including without limitation the rights
|
|
17
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
18
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
19
|
+
furnished to do so, subject to the following conditions:
|
|
20
|
+
|
|
21
|
+
The above copyright notice and this permission notice shall be included in all
|
|
22
|
+
copies or substantial portions of the Software.
|
|
23
|
+
|
|
24
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
25
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
26
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
27
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
28
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
29
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
30
|
+
SOFTWARE.
|
|
31
|
+
License-File: LICENSE
|
|
32
|
+
Keywords: aider,diff,editblock,llm,patch,replace,search,tooling
|
|
33
|
+
Classifier: Development Status :: 3 - Alpha
|
|
34
|
+
Classifier: Intended Audience :: Developers
|
|
35
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
36
|
+
Classifier: Programming Language :: Python :: 3
|
|
37
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
39
|
+
Classifier: Topic :: Software Development :: Build Tools
|
|
40
|
+
Classifier: Topic :: Software Development :: Version Control
|
|
41
|
+
Classifier: Typing :: Typed
|
|
42
|
+
Requires-Python: >=3.14
|
|
43
|
+
Description-Content-Type: text/markdown
|
|
44
|
+
|
|
45
|
+
# search-replace-py
|
|
46
|
+
|
|
47
|
+
A standalone Python library for parsing and applying SEARCH/REPLACE patch blocks, extracted from [Aider's](https://github.com/Aider-AI/aider) editblock engine.
|
|
48
|
+
|
|
49
|
+
Use it to give any LLM the ability to propose and apply precise code changes using the battle-tested editblock format.
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## How it works
|
|
54
|
+
|
|
55
|
+
The editblock format is Aider's primary mechanism for LLM-driven code editing. The LLM is prompted to output changes as structured `SEARCH/REPLACE` blocks:
|
|
56
|
+
|
|
57
|
+
````
|
|
58
|
+
```python
|
|
59
|
+
mathweb/flask/app.py
|
|
60
|
+
<<<<<<< SEARCH
|
|
61
|
+
from flask import Flask
|
|
62
|
+
=======
|
|
63
|
+
import math
|
|
64
|
+
from flask import Flask
|
|
65
|
+
>>>>>>> REPLACE
|
|
66
|
+
```
|
|
67
|
+
````
|
|
68
|
+
|
|
69
|
+
This library provides:
|
|
70
|
+
|
|
71
|
+
1. **`render_system_prompt()`** — returns the rendered system prompt string to instruct the LLM.
|
|
72
|
+
2. **`get_example_messages()`** — returns a `FewShotExampleMessages` named tuple with four plain strings (two user + two assistant turns) to prepend to the conversation history.
|
|
73
|
+
3. **`apply_diff(llm_response, root)`** — parses the LLM's response and applies all blocks to disk in one call.
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## What is included
|
|
78
|
+
|
|
79
|
+
- Block parsing (`<<<<<<< SEARCH`, `=======`, `>>>>>>> REPLACE`) with filename discovery and fuzzy filename resolution.
|
|
80
|
+
- Three replacement strategies:
|
|
81
|
+
- exact match
|
|
82
|
+
- leading-whitespace-tolerant match
|
|
83
|
+
- dotdotdot (`...`) segmented replacement
|
|
84
|
+
- Typed errors (`ParseError`, `ApplyError`) for clean error handling in retry loops.
|
|
85
|
+
|
|
86
|
+
---
|
|
87
|
+
|
|
88
|
+
## Installation
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
pip install search-replace-py
|
|
92
|
+
# or with uv
|
|
93
|
+
uv add search-replace-py
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## Quick start
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from pathlib import Path
|
|
102
|
+
from search_replace import render_system_prompt, get_example_messages, apply_diff
|
|
103
|
+
|
|
104
|
+
# 1. Build the system prompt — plain string, append your own context if needed
|
|
105
|
+
system_prompt = render_system_prompt()
|
|
106
|
+
|
|
107
|
+
# 2. Build the messages list; prepend few-shot examples before the real request
|
|
108
|
+
ex = get_example_messages()
|
|
109
|
+
messages = [{"role": "system", "content": system_prompt}]
|
|
110
|
+
messages += [
|
|
111
|
+
{"role": "user", "content": ex.first_user_message},
|
|
112
|
+
{"role": "assistant", "content": ex.first_assistant_message},
|
|
113
|
+
{"role": "user", "content": ex.second_user_message},
|
|
114
|
+
{"role": "assistant", "content": ex.second_assistant_message},
|
|
115
|
+
]
|
|
116
|
+
messages.append({"role": "user", "content": "Add a docstring to the greet() function in hello.py"})
|
|
117
|
+
|
|
118
|
+
# 3. Send to your LLM and get a response string
|
|
119
|
+
llm_response = "..."
|
|
120
|
+
|
|
121
|
+
# 4. Parse and apply in one call
|
|
122
|
+
apply_diff(llm_response, root=Path("."))
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
## Integration with Pydantic AI
|
|
128
|
+
|
|
129
|
+
[Pydantic AI](https://ai.pydantic.dev) accepts a string for `instructions` and a list of `ModelMessage` objects for `message_history`.
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
from pathlib import Path
|
|
133
|
+
|
|
134
|
+
from pydantic_ai import Agent, ModelRequest, ModelResponse, TextPart, UserPromptPart
|
|
135
|
+
from search_replace import render_system_prompt, get_example_messages, apply_diff
|
|
136
|
+
|
|
137
|
+
ex = get_example_messages()
|
|
138
|
+
|
|
139
|
+
few_shot = [
|
|
140
|
+
ModelRequest(parts=[UserPromptPart(content=ex.first_user_message)]),
|
|
141
|
+
ModelResponse(parts=[TextPart(content=ex.first_assistant_message)]),
|
|
142
|
+
ModelRequest(parts=[UserPromptPart(content=ex.second_user_message)]),
|
|
143
|
+
ModelResponse(parts=[TextPart(content=ex.second_assistant_message)]),
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
agent = Agent("openai:gpt-5.2", instructions=render_system_prompt())
|
|
147
|
+
|
|
148
|
+
auth_py = Path("auth.py").read_text()
|
|
149
|
+
result = agent.run_sync(
|
|
150
|
+
f"Refactor the login function in auth.py to use bcrypt.\n\nauth.py\n```python\n{auth_py}\n```",
|
|
151
|
+
message_history=few_shot,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
apply_diff(result.output, root=Path("."))
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### With dry-run validation before writing
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
from search_replace import parse_edit_blocks, apply_edits
|
|
161
|
+
from search_replace.errors import ApplyError
|
|
162
|
+
|
|
163
|
+
blocks = parse_edit_blocks(result.output)
|
|
164
|
+
|
|
165
|
+
# Validate all blocks match before touching disk.
|
|
166
|
+
# Without dry_run, blocks that match are written immediately — a later failure
|
|
167
|
+
# would leave files partially patched with no rollback.
|
|
168
|
+
try:
|
|
169
|
+
apply_edits(blocks.edits, root=Path("."), dry_run=True)
|
|
170
|
+
except ApplyError as e:
|
|
171
|
+
# feed the error back to the LLM for a retry
|
|
172
|
+
print(f"Patch would not apply: {e}")
|
|
173
|
+
else:
|
|
174
|
+
apply_edits(blocks.edits, root=Path("."))
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
## Public API
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
from search_replace import (
|
|
183
|
+
# Prompt
|
|
184
|
+
render_system_prompt,
|
|
185
|
+
get_example_messages, # returns FewShotExampleMessages
|
|
186
|
+
FewShotExampleMessages, # NamedTuple: first/second_user/assistant_message
|
|
187
|
+
render_prompt, # render a single template string
|
|
188
|
+
EditBlockFencedPrompts, # raw class with main_system, system_reminder, example_messages
|
|
189
|
+
|
|
190
|
+
# Parse + apply (convenience)
|
|
191
|
+
apply_diff,
|
|
192
|
+
|
|
193
|
+
# Parsing
|
|
194
|
+
parse_edit_blocks,
|
|
195
|
+
find_original_update_blocks,
|
|
196
|
+
EditBlock,
|
|
197
|
+
|
|
198
|
+
# Applying
|
|
199
|
+
apply_edits, # pass dry_run=True to validate without writing
|
|
200
|
+
|
|
201
|
+
# Errors
|
|
202
|
+
ParseError,
|
|
203
|
+
ApplyError,
|
|
204
|
+
MissingFilenameError,
|
|
205
|
+
)
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
---
|
|
209
|
+
|
|
210
|
+
## Tests and validation
|
|
211
|
+
|
|
212
|
+
- `tests/test_parser.py` — block parsing, filename resolution, edge cases
|
|
213
|
+
- `tests/test_apply.py` — replacement strategies, whitespace tolerance, new-file creation
|
|
214
|
+
- `tests/test_prompts.py` — `render_system_prompt` and `get_example_messages` output
|
|
215
|
+
- `tests/test_parity_harness.py` — byte-for-byte comparison against Aider's reference output on the real 100K-line `chat-history.md` fixture
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
uv run python -m pytest tests/
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
## Credits
|
|
224
|
+
|
|
225
|
+
The parsing engine, replacement strategies, and prompt templates in this library are derived from [Aider](https://github.com/Aider-AI/aider), created by [Paul Gauthier](https://github.com/paul-gauthier). Aider is an outstanding AI pair-programming tool — this library simply extracts and packages its editblock mechanism so it can be reused in other applications. All credit for the original design and implementation goes to him.
|
|
226
|
+
|
|
227
|
+
---
|
|
228
|
+
|
|
229
|
+
## Extraction notes
|
|
230
|
+
|
|
231
|
+
- Extracted from `aider/coders/editblock_coder.py` and the fenced editblock prompt module.
|
|
232
|
+
- Runtime coupling to Aider's coder/model lifecycle is fully removed.
|
|
233
|
+
- Error message contracts for malformed blocks and failed apply paths are preserved to maintain LLM retry-loop compatibility.
|
|
234
|
+
- `replace_closest_edit_distance()` remains defined but inactive, preserving the original behaviour of the early return in `replace_most_similar_chunk()`.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
search_replace/__init__.py,sha256=Jyzs5GZMCtYMVcdqJkoR07EIxGXJxf14wKZDlOneYQ0,907
|
|
2
|
+
search_replace/apply.py,sha256=V04eEtTGRlJ4_gHGsvUJ9UDO7taOgR_sLNLU8jh3tcQ,13363
|
|
3
|
+
search_replace/errors.py,sha256=aR3QrSi9wNFX7auxZ-3b3ehqp6qwng4CAjURldhLztA,497
|
|
4
|
+
search_replace/fuzzy.py,sha256=YsEVhstYnM_dIBF-_UWWinlxyC1kuJrWA2ok6Rel4Vs,2347
|
|
5
|
+
search_replace/parser.py,sha256=88zS_clAgpLl7zIfrKxs6pTiwzJPe67FzwoULNInG1c,6542
|
|
6
|
+
search_replace/prompts.py,sha256=IHcxq-EVp54JgO1DIqHfc8RfWxf90-CV4ypp4x4q8-I,7934
|
|
7
|
+
search_replace/types.py,sha256=aHff-EA8lPg7h3AEpXZdVHP9KTQPwFu1qU7BApb4Xdw,421
|
|
8
|
+
search_replace_py-0.0.1.dist-info/METADATA,sha256=0cElG_DIB_Qu2JIO3l4_RmyZmvicQlqyU4bRo2re9UA,8494
|
|
9
|
+
search_replace_py-0.0.1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
10
|
+
search_replace_py-0.0.1.dist-info/licenses/LICENSE,sha256=cOD3IuAXuzQsvTmvINNjkW2NCRMgkov1vBliZYjg5gk,1063
|
|
11
|
+
search_replace_py-0.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 marcin
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|