deepy-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. deepy/__init__.py +9 -0
  2. deepy/__main__.py +7 -0
  3. deepy/cli.py +413 -0
  4. deepy/config/__init__.py +21 -0
  5. deepy/config/settings.py +237 -0
  6. deepy/data/__init__.py +1 -0
  7. deepy/data/tools/AskUserQuestion.md +10 -0
  8. deepy/data/tools/WebFetch.md +9 -0
  9. deepy/data/tools/WebSearch.md +9 -0
  10. deepy/data/tools/__init__.py +1 -0
  11. deepy/data/tools/bash.md +7 -0
  12. deepy/data/tools/edit.md +13 -0
  13. deepy/data/tools/modify.md +17 -0
  14. deepy/data/tools/read.md +8 -0
  15. deepy/data/tools/write.md +12 -0
  16. deepy/errors.py +63 -0
  17. deepy/llm/__init__.py +13 -0
  18. deepy/llm/agent.py +31 -0
  19. deepy/llm/context.py +109 -0
  20. deepy/llm/events.py +187 -0
  21. deepy/llm/model_capabilities.py +7 -0
  22. deepy/llm/provider.py +81 -0
  23. deepy/llm/replay.py +120 -0
  24. deepy/llm/runner.py +412 -0
  25. deepy/llm/thinking.py +30 -0
  26. deepy/prompts/__init__.py +6 -0
  27. deepy/prompts/compact.py +100 -0
  28. deepy/prompts/rules.py +24 -0
  29. deepy/prompts/runtime_context.py +98 -0
  30. deepy/prompts/system.py +72 -0
  31. deepy/prompts/tool_docs.py +21 -0
  32. deepy/sessions/__init__.py +17 -0
  33. deepy/sessions/jsonl.py +306 -0
  34. deepy/sessions/manager.py +202 -0
  35. deepy/skills.py +202 -0
  36. deepy/status.py +65 -0
  37. deepy/tools/__init__.py +6 -0
  38. deepy/tools/agents.py +343 -0
  39. deepy/tools/builtin.py +2113 -0
  40. deepy/tools/file_state.py +85 -0
  41. deepy/tools/result.py +54 -0
  42. deepy/tools/shell_utils.py +83 -0
  43. deepy/ui/__init__.py +5 -0
  44. deepy/ui/app.py +118 -0
  45. deepy/ui/ask_user_question.py +182 -0
  46. deepy/ui/exit_summary.py +142 -0
  47. deepy/ui/loading_text.py +87 -0
  48. deepy/ui/markdown.py +152 -0
  49. deepy/ui/message_view.py +546 -0
  50. deepy/ui/prompt_buffer.py +176 -0
  51. deepy/ui/prompt_input.py +286 -0
  52. deepy/ui/session_list.py +140 -0
  53. deepy/ui/session_picker.py +179 -0
  54. deepy/ui/slash_commands.py +67 -0
  55. deepy/ui/styles.py +21 -0
  56. deepy/ui/terminal.py +959 -0
  57. deepy/ui/thinking_state.py +29 -0
  58. deepy/ui/welcome.py +195 -0
  59. deepy/update_check.py +195 -0
  60. deepy/usage.py +192 -0
  61. deepy/utils/__init__.py +15 -0
  62. deepy/utils/debug_logger.py +62 -0
  63. deepy/utils/error_logger.py +107 -0
  64. deepy/utils/json.py +29 -0
  65. deepy/utils/notify.py +66 -0
  66. deepy_cli-0.1.1.dist-info/METADATA +205 -0
  67. deepy_cli-0.1.1.dist-info/RECORD +69 -0
  68. deepy_cli-0.1.1.dist-info/WHEEL +4 -0
  69. deepy_cli-0.1.1.dist-info/entry_points.txt +3 -0
deepy/tools/builtin.py ADDED
@@ -0,0 +1,2113 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import math
5
+ import os
6
+ import re
7
+ import signal
8
+ import shlex
9
+ import subprocess
10
+ import tempfile
11
+ import time
12
+ import urllib.parse
13
+ import urllib.request
14
+ import uuid
15
+ from dataclasses import dataclass, field
16
+ from difflib import unified_diff
17
+ from fnmatch import fnmatch
18
+ from html.parser import HTMLParser
19
+ from pathlib import Path
20
+
21
+ from deepy.config import Settings
22
+ from deepy.utils import json as json_utils
23
+
24
+ from .file_state import FileSnippet, FileState
25
+ from .result import ToolResult
26
+ from .shell_utils import build_disable_extglob_command
27
+ from .shell_utils import build_shell_init_command
28
+ from .shell_utils import rewrite_windows_null_redirect
29
+
30
+
31
+ DEFAULT_LINE_LIMIT = 2_000
32
+ MAX_LINE_LENGTH = 2_000
33
+ MAX_BASH_OUTPUT_CHARS = 30_000
34
+ MAX_BASH_CAPTURE_CHARS = 10 * 1024 * 1024
35
+ MAX_WEB_FETCH_BYTES = 2 * 1024 * 1024
36
+ MAX_WEB_FETCH_OUTPUT_CHARS = 30_000
37
+ DEFAULT_WEB_SEARCH_URL = "https://html.duckduckgo.com/html/"
38
+ DEFAULT_WEB_SEARCH_RESULTS = 8
39
+ PDF_LARGE_PAGE_THRESHOLD = 10
40
+ PDF_MAX_PAGE_RANGE = 20
41
+ MAX_CANDIDATE_COUNT = 5
42
+ MIN_FUZZY_SCORE = 0.45
43
+ IGNORED_DIRECTORY_ENTRIES = {
44
+ ".git",
45
+ ".mypy_cache",
46
+ ".pytest_cache",
47
+ ".ruff_cache",
48
+ ".venv",
49
+ "__pycache__",
50
+ "build",
51
+ "dist",
52
+ "node_modules",
53
+ "wheels",
54
+ }
55
+
56
+
57
+ def _resolve_in_cwd(cwd: Path, path: str) -> Path:
58
+ candidate = Path(path).expanduser()
59
+ if not candidate.is_absolute():
60
+ candidate = cwd / candidate
61
+ return candidate.resolve()
62
+
63
+
64
+ def _resolve_read_target(cwd: Path, path: str) -> tuple[Path | None, str | None]:
65
+ candidate = Path(path).expanduser()
66
+ target = _resolve_in_cwd(cwd, path)
67
+ if target.exists() or candidate.is_absolute():
68
+ return target, None
69
+ if candidate.parts and candidate.parts[0] == "..":
70
+ return None, "Relative read paths must stay within the current project."
71
+
72
+ suffix = _normalize_relative_suffix(path)
73
+ if not suffix:
74
+ return target, None
75
+ matches = _find_suffix_matches(cwd, suffix)
76
+ if len(matches) > 1:
77
+ shown = "\n".join(str(match) for match in matches[:3])
78
+ more = f"\n...and {len(matches) - 3} more." if len(matches) > 3 else ""
79
+ return (
80
+ None,
81
+ "File path is ambiguous and may refer to multiple files:\n" + shown + more,
82
+ )
83
+ if len(matches) == 1:
84
+ return matches[0], None
85
+ return target, None
86
+
87
+
88
+ def _snippet_metadata(snippet: FileSnippet) -> dict[str, object]:
89
+ return {
90
+ "id": snippet.id,
91
+ "filePath": str(snippet.path),
92
+ "file_path": str(snippet.path),
93
+ "startLine": snippet.start_line,
94
+ "endLine": snippet.end_line,
95
+ "start_line": snippet.start_line,
96
+ "end_line": snippet.end_line,
97
+ }
98
+
99
+
100
+ def _edit_scope(text: str, snippet: FileSnippet | None) -> tuple[int, int]:
101
+ if snippet is None:
102
+ return 0, len(text)
103
+ return _line_scope_offsets(text, snippet.start_line, snippet.end_line)
104
+
105
+
106
+ def _line_scope_offsets(text: str, start_line: int, end_line: int) -> tuple[int, int]:
107
+ lines = text.splitlines(keepends=True)
108
+ if not lines:
109
+ return 0, 0
110
+ start_idx = min(max(start_line - 1, 0), len(lines))
111
+ end_idx = min(max(end_line, start_idx), len(lines))
112
+ start = sum(len(line) for line in lines[:start_idx])
113
+ end = sum(len(line) for line in lines[:end_idx])
114
+ return start, end
115
+
116
+
117
+ @dataclass(frozen=True)
118
+ class MatchOccurrence:
119
+ start_offset: int
120
+ end_offset: int
121
+ start_line: int
122
+ end_line: int
123
+
124
+
125
+ @dataclass(frozen=True)
126
+ class ClosestMatch:
127
+ text: str
128
+ start_line: int
129
+ end_line: int
130
+ score: float
131
+ strategy: str
132
+
133
+
134
+ @dataclass(frozen=True)
135
+ class TextFileMetadata:
136
+ content: str
137
+ encoding: str
138
+ line_endings: str
139
+
140
+
141
+ @dataclass(frozen=True)
142
+ class WebSearchPreparation:
143
+ original_query: str
144
+ resolved_query: str
145
+ dominant_language: str
146
+ language_reason: str
147
+ translated: bool = False
148
+
149
+ def metadata(self) -> dict[str, object]:
150
+ return {
151
+ "query": self.resolved_query,
152
+ "originalQuery": self.original_query,
153
+ "resolvedQuery": self.resolved_query,
154
+ "translated": self.translated,
155
+ "dominantLanguage": self.dominant_language,
156
+ "languageReason": self.language_reason,
157
+ }
158
+
159
+
160
+ @dataclass(frozen=True)
161
+ class WebSearchResult:
162
+ title: str
163
+ url: str
164
+ snippet: str = ""
165
+
166
+
167
+ def _find_occurrences(text: str, needle: str, scope: tuple[int, int]) -> list[MatchOccurrence]:
168
+ matches: list[MatchOccurrence] = []
169
+ scoped_text = text[scope[0] : scope[1]]
170
+ search_index = 0
171
+ while True:
172
+ found = scoped_text.find(needle, search_index)
173
+ if found == -1:
174
+ return matches
175
+ start_offset = scope[0] + found
176
+ end_offset = start_offset + len(needle)
177
+ matches.append(
178
+ MatchOccurrence(
179
+ start_offset=start_offset,
180
+ end_offset=end_offset,
181
+ start_line=_offset_to_line(text, start_offset),
182
+ end_line=_offset_to_line(text, max(start_offset, end_offset - 1)),
183
+ )
184
+ )
185
+ search_index = found + len(needle)
186
+
187
+
188
+ def _offset_to_line(text: str, offset: int) -> int:
189
+ if offset <= 0:
190
+ return 1
191
+ return text.count("\n", 0, min(offset, len(text))) + 1
192
+
193
+
194
+ def _build_candidate_metadata(
195
+ file_state: FileState,
196
+ path: Path,
197
+ text: str,
198
+ matches: list[MatchOccurrence],
199
+ ) -> list[dict[str, object]]:
200
+ candidates = []
201
+ for match in matches[:MAX_CANDIDATE_COUNT]:
202
+ preview = _build_candidate_preview(text, match.start_line, match.end_line)
203
+ snippet = file_state.create_snippet(
204
+ path,
205
+ start_line=match.start_line,
206
+ end_line=match.end_line,
207
+ text=preview,
208
+ )
209
+ candidates.append(
210
+ {
211
+ "snippet_id": snippet.id,
212
+ "start_line": match.start_line,
213
+ "end_line": match.end_line,
214
+ "preview": preview,
215
+ }
216
+ )
217
+ return candidates
218
+
219
+
220
+ def _build_candidate_preview(text: str, start_line: int, end_line: int) -> str:
221
+ lines = text.splitlines()
222
+ selected = lines[start_line - 1 : end_line]
223
+ return "\n".join(
224
+ f"{str(start_line + index).rjust(6)}\t{line}"
225
+ for index, line in enumerate(selected)
226
+ )
227
+
228
+
229
+ def _build_closest_match_metadata(
230
+ file_state: FileState,
231
+ path: Path,
232
+ closest_match: ClosestMatch,
233
+ ) -> dict[str, object]:
234
+ preview = _build_candidate_preview(text=closest_match.text, start_line=1, end_line=10)
235
+ if preview:
236
+ preview = _renumber_preview(preview, closest_match.start_line)
237
+ snippet = file_state.create_snippet(
238
+ path,
239
+ start_line=closest_match.start_line,
240
+ end_line=closest_match.end_line,
241
+ text=preview,
242
+ )
243
+ return {
244
+ "snippet_id": snippet.id,
245
+ "start_line": closest_match.start_line,
246
+ "end_line": closest_match.end_line,
247
+ "similarity": round(closest_match.score, 3),
248
+ "strategy": closest_match.strategy,
249
+ "preview": preview,
250
+ }
251
+
252
+
253
+ def _renumber_preview(preview: str, start_line: int) -> str:
254
+ lines = [line.split("\t", 1)[1] if "\t" in line else line for line in preview.splitlines()]
255
+ return "\n".join(
256
+ f"{str(start_line + index).rjust(6)}\t{line}"
257
+ for index, line in enumerate(lines)
258
+ )
259
+
260
+
261
+ def _format_scope_metadata(
262
+ path: Path,
263
+ snippet: FileSnippet | None,
264
+ scope: tuple[int, int],
265
+ text: str,
266
+ ) -> dict[str, object]:
267
+ if snippet is not None:
268
+ return {
269
+ **_snippet_metadata(snippet),
270
+ "type": "snippet",
271
+ "snippet_id": snippet.id,
272
+ }
273
+ return {
274
+ "type": "full",
275
+ "filePath": str(path),
276
+ "file_path": str(path),
277
+ "startLine": 1,
278
+ "endLine": _offset_to_line(text, max(scope[0], scope[1] - 1)),
279
+ "start_line": 1,
280
+ "end_line": _offset_to_line(text, max(scope[0], scope[1] - 1)),
281
+ "snippet_id": None,
282
+ }
283
+
284
+
285
+ def _apply_replacements(
286
+ text: str,
287
+ matches: list[MatchOccurrence],
288
+ replacement: str,
289
+ replace_all: bool,
290
+ ) -> str:
291
+ selected_matches = matches if replace_all else matches[:1]
292
+ result = []
293
+ cursor = 0
294
+ for match in selected_matches:
295
+ result.append(text[cursor : match.start_offset])
296
+ result.append(replacement)
297
+ cursor = match.end_offset
298
+ result.append(text[cursor:])
299
+ return "".join(result)
300
+
301
+
302
+ def _find_loose_escape_occurrences(
303
+ text: str,
304
+ needle: str,
305
+ scope: tuple[int, int],
306
+ ) -> list[tuple[MatchOccurrence, float, str]]:
307
+ pattern = _build_loose_escape_pattern(needle)
308
+ if pattern is None:
309
+ return []
310
+ scoped_text = text[scope[0] : scope[1]]
311
+ normalized_needle = _normalize_loose_text(needle)
312
+ matches = []
313
+ for regex_match in pattern.finditer(scoped_text):
314
+ start_offset = scope[0] + regex_match.start()
315
+ end_offset = scope[0] + regex_match.end()
316
+ matched_text = regex_match.group(0)
317
+ matches.append(
318
+ (
319
+ MatchOccurrence(
320
+ start_offset=start_offset,
321
+ end_offset=end_offset,
322
+ start_line=_offset_to_line(text, start_offset),
323
+ end_line=_offset_to_line(text, max(start_offset, end_offset - 1)),
324
+ ),
325
+ _similarity_score(normalized_needle, _normalize_loose_text(matched_text)),
326
+ matched_text,
327
+ )
328
+ )
329
+ return matches
330
+
331
+
332
+ def _build_loose_escape_pattern(source: str) -> re.Pattern[str] | None:
333
+ if not source:
334
+ return None
335
+ pattern = []
336
+ index = 0
337
+ while index < len(source):
338
+ if source[index] == "\\":
339
+ slash_end = index
340
+ while slash_end < len(source) and source[slash_end] == "\\":
341
+ slash_end += 1
342
+ if slash_end < len(source) and source[slash_end] in "\"'`\\":
343
+ pattern.append(r"\\*")
344
+ pattern.append(re.escape(source[slash_end]))
345
+ index = slash_end + 1
346
+ continue
347
+ pattern.append(re.escape(source[index:slash_end]))
348
+ index = slash_end
349
+ continue
350
+ pattern.append(re.escape(source[index]))
351
+ index += 1
352
+ return re.compile("".join(pattern))
353
+
354
+
355
+ def _find_closest_match(
356
+ text: str,
357
+ needle: str,
358
+ scope: tuple[int, int],
359
+ ) -> ClosestMatch | None:
360
+ loose_matches = _find_loose_escape_occurrences(text, needle, scope)
361
+ best_loose: ClosestMatch | None = None
362
+ for occurrence, score, matched_text in loose_matches:
363
+ candidate = ClosestMatch(
364
+ text=matched_text,
365
+ start_line=occurrence.start_line,
366
+ end_line=occurrence.end_line,
367
+ score=score,
368
+ strategy="loose_escape",
369
+ )
370
+ if best_loose is None or candidate.score > best_loose.score:
371
+ best_loose = candidate
372
+ if best_loose is not None:
373
+ return best_loose
374
+
375
+ normalized_target = _normalize_loose_text(needle)
376
+ target_line_count = max(1, len(needle.splitlines()) or 1)
377
+ window_sizes = sorted({max(1, target_line_count - 1), target_line_count, target_line_count + 1})
378
+ start_line = _offset_to_line(text, scope[0])
379
+ end_line = _offset_to_line(text, max(scope[0], scope[1] - 1))
380
+ best_match: ClosestMatch | None = None
381
+ for line in range(start_line, end_line + 1):
382
+ for window_size in window_sizes:
383
+ candidate_end = line + window_size - 1
384
+ if candidate_end > end_line:
385
+ continue
386
+ candidate_text = _slice_lines(text, line, candidate_end)
387
+ score = _similarity_score(normalized_target, _normalize_loose_text(candidate_text))
388
+ if score < MIN_FUZZY_SCORE:
389
+ continue
390
+ candidate = ClosestMatch(
391
+ text=candidate_text,
392
+ start_line=line,
393
+ end_line=candidate_end,
394
+ score=score,
395
+ strategy="fuzzy_window",
396
+ )
397
+ if best_match is None or candidate.score > best_match.score:
398
+ best_match = candidate
399
+ return best_match
400
+
401
+
402
+ def _correct_escaped_strings_with_llm(
403
+ settings: Settings,
404
+ *,
405
+ snippet_text: str,
406
+ old: str,
407
+ new: str,
408
+ matched_text: str,
409
+ ) -> tuple[str, str] | None:
410
+ if not settings.model.api_key or not settings.model.base_url or not settings.model.name:
411
+ return None
412
+ try:
413
+ content = _edit_correction_chat(settings, snippet_text, old, new, matched_text)
414
+ parsed = _parse_corrected_edit_strings(content)
415
+ if parsed is None:
416
+ return None
417
+ corrected_old, corrected_new = parsed
418
+ if _normalize_loose_text(corrected_old) != _normalize_loose_text(old):
419
+ return None
420
+ if _normalize_loose_text(corrected_new) != _normalize_loose_text(new):
421
+ return None
422
+ if corrected_old == corrected_new:
423
+ return None
424
+ return corrected_old, corrected_new
425
+ except Exception:
426
+ return None
427
+
428
+
429
+ def _edit_correction_chat(
430
+ settings: Settings,
431
+ snippet_text: str,
432
+ old: str,
433
+ new: str,
434
+ matched_text: str,
435
+ ) -> str:
436
+ from openai import OpenAI
437
+
438
+ client = OpenAI(api_key=settings.model.api_key, base_url=settings.model.base_url)
439
+ response = client.chat.completions.create(
440
+ model=settings.model.name,
441
+ messages=[
442
+ {
443
+ "role": "system",
444
+ "content": (
445
+ "You correct file-edit strings when the only problem is escaping. "
446
+ "Return XML only using <response><corrected_old_string>...</corrected_old_string>"
447
+ "<corrected_new_string>...</corrected_new_string></response>. "
448
+ "Do not change semantics; only fix quoting or escaping so corrected_old_string "
449
+ "matches the snippet exactly."
450
+ ),
451
+ },
452
+ {
453
+ "role": "user",
454
+ "content": (
455
+ "<request>\n"
456
+ f" <snippet_text><![CDATA[{snippet_text}]]></snippet_text>\n"
457
+ f" <old_string><![CDATA[{old}]]></old_string>\n"
458
+ f" <new_string><![CDATA[{new}]]></new_string>\n"
459
+ f" <matched_text><![CDATA[{matched_text}]]></matched_text>\n"
460
+ "</request>\n"
461
+ "<output_format>\n"
462
+ " <response>\n"
463
+ " <corrected_old_string><![CDATA[...]]></corrected_old_string>\n"
464
+ " <corrected_new_string><![CDATA[...]]></corrected_new_string>\n"
465
+ " </response>\n"
466
+ "</output_format>"
467
+ ),
468
+ },
469
+ ],
470
+ )
471
+ content = response.choices[0].message.content
472
+ return content.strip() if isinstance(content, str) else ""
473
+
474
+
475
+ def _parse_corrected_edit_strings(content: str) -> tuple[str, str] | None:
476
+ normalized = _strip_code_fence(content).strip()
477
+ if not normalized:
478
+ return None
479
+ old_match = re.search(
480
+ r"<corrected_old_string>(?:<!\[CDATA\[([\s\S]*?)\]\]>|([\s\S]*?))</corrected_old_string>",
481
+ normalized,
482
+ flags=re.IGNORECASE,
483
+ )
484
+ new_match = re.search(
485
+ r"<corrected_new_string>(?:<!\[CDATA\[([\s\S]*?)\]\]>|([\s\S]*?))</corrected_new_string>",
486
+ normalized,
487
+ flags=re.IGNORECASE,
488
+ )
489
+ corrected_old = old_match.group(1) or old_match.group(2) if old_match else None
490
+ corrected_new = new_match.group(1) or new_match.group(2) if new_match else None
491
+ if isinstance(corrected_old, str) and isinstance(corrected_new, str):
492
+ return corrected_old, corrected_new
493
+ return None
494
+
495
+
496
+ def _slice_lines(text: str, start_line: int, end_line: int) -> str:
497
+ lines = text.splitlines(keepends=True)
498
+ return "".join(lines[start_line - 1 : end_line])
499
+
500
+
501
+ def _normalize_loose_text(value: str) -> str:
502
+ normalized = value.replace("\r\n", "\n").replace("\r", "\n")
503
+ normalized = re.sub(r"\\+(?=[\"'`\\])", "", normalized)
504
+ normalized = re.sub(r"[ \t]+", " ", normalized)
505
+ return normalized.strip()
506
+
507
+
508
+ def _similarity_score(left: str, right: str) -> float:
509
+ if left == right:
510
+ return 1.0
511
+ if not left or not right:
512
+ return 0.0
513
+ left_bigrams = _to_bigrams(left)
514
+ right_bigrams = _to_bigrams(right)
515
+ if not left_bigrams or not right_bigrams:
516
+ return 1.0 if left == right else 0.0
517
+ right_counts: dict[str, int] = {}
518
+ for bigram in right_bigrams:
519
+ right_counts[bigram] = right_counts.get(bigram, 0) + 1
520
+ overlap = 0
521
+ for bigram in left_bigrams:
522
+ count = right_counts.get(bigram, 0)
523
+ if count > 0:
524
+ overlap += 1
525
+ right_counts[bigram] = count - 1
526
+ return (2 * overlap) / (len(left_bigrams) + len(right_bigrams))
527
+
528
+
529
+ def _to_bigrams(value: str) -> list[str]:
530
+ if len(value) < 2:
531
+ return [value]
532
+ return [value[index : index + 2] for index in range(len(value) - 1)]
533
+
534
+
535
+ def _prepare_web_search_query(query: str) -> WebSearchPreparation:
536
+ stripped = " ".join(query.split())
537
+ contains_chinese = _contains_chinese_char(stripped)
538
+ if contains_chinese:
539
+ return WebSearchPreparation(
540
+ original_query=query,
541
+ resolved_query=stripped,
542
+ dominant_language="zh",
543
+ language_reason="The query contains Chinese characters.",
544
+ )
545
+ return WebSearchPreparation(
546
+ original_query=query,
547
+ resolved_query=stripped,
548
+ dominant_language="en",
549
+ language_reason="The query does not contain Chinese characters.",
550
+ )
551
+
552
+
553
+ def _prepare_web_search_query_with_llm(
554
+ query: str,
555
+ settings: Settings,
556
+ ) -> tuple[WebSearchPreparation, str | None]:
557
+ stripped = " ".join(query.split())
558
+ if not settings.model.api_key or not settings.model.base_url or not settings.model.name:
559
+ return (
560
+ _prepare_web_search_query(query),
561
+ "WebSearch default mode requires a valid LLM configuration.",
562
+ )
563
+ try:
564
+ decision = _decide_search_language_with_llm(stripped, settings)
565
+ contains_chinese = _contains_chinese_char(stripped)
566
+ if decision["dominant_language"] == "en" and contains_chinese:
567
+ translated = _translate_search_query_with_llm(stripped, "English", settings)
568
+ if translated:
569
+ return (
570
+ WebSearchPreparation(
571
+ original_query=query,
572
+ resolved_query=translated,
573
+ dominant_language="en",
574
+ language_reason=decision["reason"],
575
+ translated=True,
576
+ ),
577
+ None,
578
+ )
579
+ if decision["dominant_language"] == "zh" and not contains_chinese:
580
+ translated = _translate_search_query_with_llm(stripped, "Chinese", settings)
581
+ if translated:
582
+ return (
583
+ WebSearchPreparation(
584
+ original_query=query,
585
+ resolved_query=translated,
586
+ dominant_language="zh",
587
+ language_reason=decision["reason"],
588
+ translated=True,
589
+ ),
590
+ None,
591
+ )
592
+ return (
593
+ WebSearchPreparation(
594
+ original_query=query,
595
+ resolved_query=stripped,
596
+ dominant_language=decision["dominant_language"],
597
+ language_reason=decision["reason"],
598
+ ),
599
+ None,
600
+ )
601
+ except Exception as exc:
602
+ return _prepare_web_search_query(query), str(exc)
603
+
604
+
605
+ def _decide_search_language_with_llm(query: str, settings: Settings) -> dict[str, str]:
606
+ prompt = (
607
+ "Decide whether the topic below has more useful online material in English or Chinese.\n\n"
608
+ "Topic:\n"
609
+ "```text\n"
610
+ f"{query}\n"
611
+ "```\n\n"
612
+ "Return strict JSON:\n"
613
+ '{"dominant_language":"en"|"zh","reason":"one short sentence"}\n'
614
+ "Do not include markdown or any extra text."
615
+ )
616
+ parsed = _parse_json_response(_web_search_chat(settings, prompt))
617
+ dominant_language = parsed.get("dominant_language")
618
+ if dominant_language not in {"en", "zh"}:
619
+ raise ValueError(f"Unexpected dominant language: {dominant_language}")
620
+ reason = parsed.get("reason")
621
+ return {
622
+ "dominant_language": dominant_language,
623
+ "reason": reason if isinstance(reason, str) else "",
624
+ }
625
+
626
+
627
+ def _translate_search_query_with_llm(query: str, target_language: str, settings: Settings) -> str:
628
+ prompt = (
629
+ f"Translate the query text below into {target_language}.\n\n"
630
+ "Requirements:\n"
631
+ "- Preserve product names, library names, API names, versions, and abbreviations when appropriate.\n"
632
+ "- Return only the translated query, without quotes or explanation.\n\n"
633
+ "Query:\n"
634
+ "```text\n"
635
+ f"{query}\n"
636
+ "```"
637
+ )
638
+ return _strip_code_fence(_web_search_chat(settings, prompt)).strip().strip("\"'")
639
+
640
+
641
+ def _web_search_chat(settings: Settings, prompt: str) -> str:
642
+ from openai import OpenAI
643
+
644
+ client = OpenAI(api_key=settings.model.api_key, base_url=settings.model.base_url)
645
+ response = client.chat.completions.create(
646
+ model=settings.model.name,
647
+ messages=[{"role": "user", "content": prompt}],
648
+ )
649
+ content = response.choices[0].message.content
650
+ if isinstance(content, str):
651
+ return content.strip()
652
+ if isinstance(content, list):
653
+ parts = []
654
+ for part in content:
655
+ text = part.get("text") if isinstance(part, dict) else getattr(part, "text", "")
656
+ if isinstance(text, str):
657
+ parts.append(text)
658
+ return "\n".join(parts).strip()
659
+ return ""
660
+
661
+
662
+ def _parse_json_response(text: str) -> dict[str, object]:
663
+ cleaned = _strip_code_fence(text).strip()
664
+ try:
665
+ parsed = json_utils.loads(cleaned)
666
+ except json_utils.JSONDecodeError:
667
+ first_brace = cleaned.find("{")
668
+ last_brace = cleaned.rfind("}")
669
+ if first_brace < 0 or last_brace <= first_brace:
670
+ raise ValueError(f"Failed to parse JSON response: {cleaned or '<empty>'}")
671
+ parsed = json_utils.loads(cleaned[first_brace : last_brace + 1])
672
+ if not isinstance(parsed, dict):
673
+ raise ValueError("JSON response must be an object.")
674
+ return parsed
675
+
676
+
677
+ def _strip_code_fence(text: str) -> str:
678
+ trimmed = text.strip()
679
+ match = re.match(r"^```(?:[\w-]+)?\n([\s\S]*?)\n```$", trimmed)
680
+ return match.group(1) if match else trimmed
681
+
682
+
683
+ def _contains_chinese_char(text: str) -> bool:
684
+ return any("\u4e00" <= char <= "\u9fff" for char in text)
685
+
686
+
687
+ def _format_web_search_activity_label(query: str) -> str:
688
+ normalized = " ".join(query.split())
689
+ if len(normalized) > 180:
690
+ normalized = normalized[:177] + "..."
691
+ return f"WebSearch: {normalized}"
692
+
693
+
694
+ class _SearchResultParser(HTMLParser):
695
+ def __init__(self) -> None:
696
+ super().__init__(convert_charrefs=True)
697
+ self.results: list[WebSearchResult] = []
698
+ self._current_title: list[str] | None = None
699
+ self._current_url: str = ""
700
+ self._snippet_index: int | None = None
701
+ self._snippet_chunks: list[str] = []
702
+
703
+ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
704
+ values = {key: value or "" for key, value in attrs}
705
+ classes = set(values.get("class", "").split())
706
+ if tag == "a" and "result__a" in classes:
707
+ self._current_title = []
708
+ self._current_url = _decode_search_result_url(values.get("href", ""))
709
+ return
710
+ if "result__snippet" in classes and self.results:
711
+ self._snippet_index = len(self.results) - 1
712
+ self._snippet_chunks = []
713
+
714
+ def handle_data(self, data: str) -> None:
715
+ if self._current_title is not None:
716
+ self._current_title.append(data)
717
+ elif self._snippet_index is not None:
718
+ self._snippet_chunks.append(data)
719
+
720
+ def handle_endtag(self, tag: str) -> None:
721
+ if tag == "a" and self._current_title is not None:
722
+ title = " ".join("".join(self._current_title).split())
723
+ if title and self._current_url:
724
+ self.results.append(WebSearchResult(title=title, url=self._current_url))
725
+ self._current_title = None
726
+ self._current_url = ""
727
+ return
728
+ if self._snippet_index is not None and tag in {"a", "div", "td"}:
729
+ snippet = " ".join("".join(self._snippet_chunks).split())
730
+ if snippet:
731
+ result = self.results[self._snippet_index]
732
+ self.results[self._snippet_index] = WebSearchResult(
733
+ title=result.title,
734
+ url=result.url,
735
+ snippet=snippet,
736
+ )
737
+ self._snippet_index = None
738
+ self._snippet_chunks = []
739
+
740
+
741
+ def _decode_search_result_url(href: str) -> str:
742
+ parsed = urllib.parse.urlparse(href)
743
+ query = urllib.parse.parse_qs(parsed.query)
744
+ target = query.get("uddg", [""])[0]
745
+ if target:
746
+ return target
747
+ if parsed.scheme and parsed.netloc:
748
+ return href
749
+ return urllib.parse.urljoin("https://duckduckgo.com", href)
750
+
751
+
752
+ def _parse_search_results(html: str) -> list[WebSearchResult]:
753
+ parser = _SearchResultParser()
754
+ parser.feed(html)
755
+ unique: list[WebSearchResult] = []
756
+ seen_urls: set[str] = set()
757
+ for result in parser.results:
758
+ if result.url in seen_urls:
759
+ continue
760
+ seen_urls.add(result.url)
761
+ unique.append(result)
762
+ return unique
763
+
764
+
765
+ def _format_search_results(query: str, results: list[WebSearchResult]) -> str:
766
+ lines = [f"Web search results for: {query}", ""]
767
+ for index, result in enumerate(results[:DEFAULT_WEB_SEARCH_RESULTS], start=1):
768
+ lines.append(f"{index}. {result.title}")
769
+ lines.append(f" {result.url}")
770
+ if result.snippet:
771
+ lines.append(f" {result.snippet}")
772
+ lines.append("")
773
+ return "\n".join(lines).strip()
774
+
775
+
776
+ class _ReadableHtmlParser(HTMLParser):
777
+ BLOCK_TAGS = {
778
+ "address",
779
+ "article",
780
+ "aside",
781
+ "blockquote",
782
+ "br",
783
+ "dd",
784
+ "div",
785
+ "dl",
786
+ "dt",
787
+ "figcaption",
788
+ "figure",
789
+ "footer",
790
+ "h1",
791
+ "h2",
792
+ "h3",
793
+ "h4",
794
+ "h5",
795
+ "h6",
796
+ "header",
797
+ "hr",
798
+ "li",
799
+ "main",
800
+ "nav",
801
+ "ol",
802
+ "p",
803
+ "pre",
804
+ "section",
805
+ "table",
806
+ "tbody",
807
+ "td",
808
+ "tfoot",
809
+ "th",
810
+ "thead",
811
+ "tr",
812
+ "ul",
813
+ }
814
+ SKIP_TAGS = {"script", "style", "noscript", "svg"}
815
+
816
+ def __init__(self) -> None:
817
+ super().__init__(convert_charrefs=True)
818
+ self.title_parts: list[str] = []
819
+ self.text_parts: list[str] = []
820
+ self._in_title = False
821
+ self._skip_depth = 0
822
+
823
+ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
824
+ del attrs
825
+ normalized = tag.lower()
826
+ if normalized in self.SKIP_TAGS:
827
+ self._skip_depth += 1
828
+ return
829
+ if normalized == "title":
830
+ self._in_title = True
831
+ return
832
+ if normalized in self.BLOCK_TAGS:
833
+ self._append_newline()
834
+
835
+ def handle_endtag(self, tag: str) -> None:
836
+ normalized = tag.lower()
837
+ if normalized in self.SKIP_TAGS and self._skip_depth > 0:
838
+ self._skip_depth -= 1
839
+ return
840
+ if normalized == "title":
841
+ self._in_title = False
842
+ return
843
+ if normalized in self.BLOCK_TAGS:
844
+ self._append_newline()
845
+
846
+ def handle_data(self, data: str) -> None:
847
+ text = " ".join(data.split())
848
+ if not text:
849
+ return
850
+ if self._in_title:
851
+ self.title_parts.append(text)
852
+ return
853
+ if self._skip_depth:
854
+ return
855
+ self.text_parts.append(text)
856
+
857
+ def _append_newline(self) -> None:
858
+ if self.text_parts and self.text_parts[-1] != "\n":
859
+ self.text_parts.append("\n")
860
+
861
+ @property
862
+ def title(self) -> str:
863
+ return " ".join(self.title_parts).strip()
864
+
865
+ @property
866
+ def readable_text(self) -> str:
867
+ raw = " ".join(self.text_parts)
868
+ raw = re.sub(r"[ \t]*\n[ \t]*", "\n", raw)
869
+ raw = re.sub(r"\n{3,}", "\n\n", raw)
870
+ return "\n".join(line.strip() for line in raw.splitlines()).strip()
871
+
872
+
873
+ def _validate_web_fetch_url(url: str) -> tuple[str | None, str | None]:
874
+ stripped = url.strip()
875
+ parsed = urllib.parse.urlparse(stripped)
876
+ if parsed.scheme not in {"http", "https"} or not parsed.netloc:
877
+ return None, "WebFetch requires a complete http or https URL."
878
+ return stripped, None
879
+
880
+
881
+ def _charset_from_content_type(content_type: str) -> str:
882
+ match = re.search(r"charset=([^\s;]+)", content_type, flags=re.IGNORECASE)
883
+ return match.group(1).strip("\"'") if match else "utf-8"
884
+
885
+
886
+ def _is_html_response(content_type: str, text: str) -> bool:
887
+ lowered = content_type.lower()
888
+ if "html" in lowered:
889
+ return True
890
+ prefix = text[:500].lower()
891
+ return "<html" in prefix or "<!doctype html" in prefix
892
+
893
+
894
+ def _extract_readable_html(html: str) -> tuple[str, str]:
895
+ parser = _ReadableHtmlParser()
896
+ parser.feed(html)
897
+ parser.close()
898
+ return parser.title, parser.readable_text
899
+
900
+
901
+ def _format_web_fetch_output(
902
+ *,
903
+ url: str,
904
+ final_url: str,
905
+ content_type: str,
906
+ title: str,
907
+ text: str,
908
+ bytes_truncated: bool,
909
+ ) -> str:
910
+ lines = [
911
+ f"URL: {url}",
912
+ f"Final URL: {final_url}",
913
+ ]
914
+ if title:
915
+ lines.append(f"Title: {title}")
916
+ if content_type:
917
+ lines.append(f"Content-Type: {content_type}")
918
+ if bytes_truncated:
919
+ lines.append(f"Note: response body was truncated at {MAX_WEB_FETCH_BYTES:,} bytes.")
920
+ lines.append("")
921
+ lines.append(text.strip() if text.strip() else "[No readable text extracted.]")
922
+ return "\n".join(lines).strip()
923
+
924
+
925
+ @dataclass
926
+ class ToolRuntime:
927
+ cwd: Path
928
+ settings: Settings
929
+ file_state: FileState = field(default_factory=FileState)
930
+ running_processes: dict[str, dict[str, str]] = field(default_factory=dict)
931
+
932
+ def read(
933
+ self,
934
+ path: str,
935
+ start_line: int = 1,
936
+ limit: int | None = None,
937
+ pages: str | None = None,
938
+ ) -> str:
939
+ name = "read"
940
+ target, error = _resolve_read_target(self.cwd, path)
941
+ if error is not None:
942
+ return ToolResult.error_result(name, error).to_json()
943
+ if target is None or not target.exists():
944
+ return ToolResult.error_result(name, f"File does not exist: {path}").to_json()
945
+ if target.is_dir():
946
+ entries, visible_count, ignored_count = _format_directory_entries(target, self.cwd)
947
+ return ToolResult.ok_result(
948
+ name,
949
+ entries,
950
+ metadata={
951
+ "path": str(target),
952
+ "kind": "directory",
953
+ "entryCount": len(list(target.iterdir())),
954
+ "visibleEntryCount": visible_count,
955
+ "ignoredEntryCount": ignored_count,
956
+ },
957
+ ).to_json()
958
+
959
+ if target.suffix.lower() == ".ipynb":
960
+ output, error = _format_notebook(target)
961
+ if error is not None:
962
+ return ToolResult.error_result(name, error, metadata={"path": str(target)}).to_json()
963
+ return ToolResult.ok_result(
964
+ name,
965
+ output,
966
+ metadata={
967
+ "path": str(target),
968
+ "kind": "notebook",
969
+ "trackedForWrite": False,
970
+ },
971
+ ).to_json()
972
+
973
+ if target.suffix.lower() == ".pdf":
974
+ return _read_pdf(target, pages)
975
+
976
+ mime = _image_mime_type(target.suffix.lower())
977
+ if mime is not None:
978
+ data = target.read_bytes()
979
+ return ToolResult(
980
+ ok=True,
981
+ name=name,
982
+ output="File loaded.",
983
+ metadata={"path": str(target), "mime": mime, "bytes": len(data)},
984
+ followUpMessages=[_build_image_follow_up_message(target, mime, data)],
985
+ ).to_json()
986
+
987
+ text_metadata = _read_text_metadata(target)
988
+ text = text_metadata.content
989
+ lines = text.splitlines()
990
+ start = max(start_line, 1) - 1
991
+ effective_limit = limit if limit and limit > 0 else DEFAULT_LINE_LIMIT
992
+ selected = lines[start : start + effective_limit]
993
+ formatted_lines = [_truncate_line(line) for line in selected]
994
+ truncated = start + len(selected) < len(lines) or any(
995
+ len(line) > MAX_LINE_LENGTH for line in selected
996
+ )
997
+ full_file_read = start == 0 and not truncated
998
+ numbered = "\n".join(
999
+ f"{idx + start + 1}: {line}" for idx, line in enumerate(formatted_lines)
1000
+ )
1001
+ if full_file_read:
1002
+ self.file_state.mark_read(target)
1003
+ snippet_metadata = None
1004
+ if not full_file_read and selected:
1005
+ snippet = self.file_state.create_snippet(
1006
+ target,
1007
+ start_line=start + 1,
1008
+ end_line=start + len(selected),
1009
+ text="\n".join(selected),
1010
+ )
1011
+ self.file_state.mark_read(target, full=False)
1012
+ snippet_metadata = _snippet_metadata(snippet)
1013
+ metadata = {
1014
+ "path": str(target),
1015
+ "kind": "file",
1016
+ "startLine": start + 1,
1017
+ "lineCount": len(selected),
1018
+ "lineLimit": effective_limit,
1019
+ "totalLines": len(lines),
1020
+ "truncated": truncated,
1021
+ "trackedForWrite": full_file_read,
1022
+ "encoding": text_metadata.encoding,
1023
+ }
1024
+ if snippet_metadata is not None:
1025
+ metadata["snippet"] = snippet_metadata
1026
+ return ToolResult.ok_result(
1027
+ name,
1028
+ numbered,
1029
+ metadata=metadata,
1030
+ ).to_json()
1031
+
1032
+ def modify(
1033
+ self,
1034
+ path: str | None,
1035
+ *,
1036
+ content: object | None = None,
1037
+ old: str | None = None,
1038
+ new: str | None = None,
1039
+ replace_all: bool = False,
1040
+ snippet_id: str | None = None,
1041
+ ) -> str:
1042
+ has_content = content is not None
1043
+ has_replacement = old is not None or new is not None
1044
+ if has_content and has_replacement:
1045
+ return ToolResult.error_result(
1046
+ "modify",
1047
+ "Use either content for a new file or old_string/new_string for an existing file, not both.",
1048
+ ).to_json()
1049
+ if has_content:
1050
+ if not path:
1051
+ return ToolResult.error_result("modify", "file_path is required for new files.").to_json()
1052
+ target = _resolve_in_cwd(self.cwd, path)
1053
+ if target.exists():
1054
+ return ToolResult.error_result(
1055
+ "modify",
1056
+ "File already exists. Read it and use old_string/new_string with modify instead of content.",
1057
+ metadata={"path": str(target)},
1058
+ ).to_json()
1059
+ return self.write(path, content)
1060
+ if old is None or new is None:
1061
+ return ToolResult.error_result(
1062
+ "modify",
1063
+ "Provide content for a new file, or both old_string and new_string for an existing file.",
1064
+ ).to_json()
1065
+ return self.edit(path, old, new, replace_all=replace_all, snippet_id=snippet_id)
1066
+
1067
+ def write(self, path: str, content: object) -> str:
1068
+ name = "write"
1069
+ target = _resolve_in_cwd(self.cwd, path)
1070
+ ok, error = self.file_state.check_writable(target, require_read=True)
1071
+ if not ok:
1072
+ return ToolResult.error_result(name, error or "File is not writable.").to_json()
1073
+ text_content, repair_metadata, content_error = _coerce_write_content(target, content)
1074
+ if content_error is not None:
1075
+ return ToolResult.error_result(name, content_error).to_json()
1076
+ existing_metadata = _read_text_metadata(target) if target.exists() else None
1077
+ old_content = existing_metadata.content if existing_metadata is not None else ""
1078
+ encoding = existing_metadata.encoding if existing_metadata is not None else "utf8"
1079
+ line_endings = _detect_line_endings(old_content or text_content)
1080
+ normalized_content = _normalize_line_endings(text_content, line_endings)
1081
+ target.parent.mkdir(parents=True, exist_ok=True)
1082
+ _write_text_with_encoding(target, normalized_content, encoding)
1083
+ self.file_state.mark_written(target)
1084
+ diff = _unified_diff(old_content, normalized_content, path=str(target))
1085
+ return ToolResult.ok_result(
1086
+ name,
1087
+ f"Wrote {target}",
1088
+ metadata={
1089
+ "path": str(target),
1090
+ "encoding": encoding,
1091
+ "line_endings": line_endings,
1092
+ **repair_metadata,
1093
+ "diff": diff,
1094
+ "diff_preview": diff,
1095
+ },
1096
+ ).to_json()
1097
+
1098
+ def edit(
1099
+ self,
1100
+ path: str | None,
1101
+ old: str,
1102
+ new: str,
1103
+ replace_all: bool = False,
1104
+ snippet_id: str | None = None,
1105
+ ) -> str:
1106
+ name = "edit"
1107
+ if not old:
1108
+ return ToolResult.error_result(name, "old text must not be empty.").to_json()
1109
+ snippet = None
1110
+ if snippet_id:
1111
+ snippet = self.file_state.get_snippet(snippet_id)
1112
+ if snippet is None:
1113
+ return ToolResult.error_result(name, f"Unknown snippet_id: {snippet_id}").to_json()
1114
+ target = snippet.path
1115
+ if path:
1116
+ requested_target = _resolve_in_cwd(self.cwd, path)
1117
+ if requested_target != target:
1118
+ return ToolResult.error_result(
1119
+ name,
1120
+ "snippet_id does not belong to the provided file path.",
1121
+ ).to_json()
1122
+ else:
1123
+ if not path:
1124
+ return ToolResult.error_result(
1125
+ name,
1126
+ "path is required unless snippet_id is provided.",
1127
+ ).to_json()
1128
+ target = _resolve_in_cwd(self.cwd, path)
1129
+ if not target.exists():
1130
+ return ToolResult.error_result(name, f"File does not exist: {target}").to_json()
1131
+ ok, error = self.file_state.check_writable(
1132
+ target,
1133
+ require_read=True,
1134
+ allow_partial=snippet is not None,
1135
+ )
1136
+ if not ok:
1137
+ return ToolResult.error_result(name, error or "File is not writable.").to_json()
1138
+ text_metadata = _read_text_metadata(target)
1139
+ text = text_metadata.content
1140
+ scope = _edit_scope(text, snippet)
1141
+ matches = _find_occurrences(text, old, scope)
1142
+ matched_via = "exact"
1143
+ replacement_new = new
1144
+ if not matches:
1145
+ loose_matches = _find_loose_escape_occurrences(text, old, scope)
1146
+ if len(loose_matches) == 1 and loose_matches[0][1] == 1.0:
1147
+ corrected = _correct_escaped_strings_with_llm(
1148
+ self.settings,
1149
+ snippet_text=text[scope[0] : scope[1]],
1150
+ old=old,
1151
+ new=new,
1152
+ matched_text=loose_matches[0][2],
1153
+ )
1154
+ if corrected is not None:
1155
+ corrected_old, corrected_new = corrected
1156
+ corrected_matches = _find_occurrences(text, corrected_old, scope)
1157
+ if corrected_matches:
1158
+ matches = corrected_matches
1159
+ replacement_new = corrected_new
1160
+ matched_via = "llm_escape_correction"
1161
+ if not matches:
1162
+ matches = [loose_matches[0][0]]
1163
+ matched_via = "loose_escape"
1164
+ if not matches:
1165
+ closest_match = _find_closest_match(text, old, scope)
1166
+ metadata = {"scope": _format_scope_metadata(target, snippet, scope, text)}
1167
+ if closest_match is not None:
1168
+ metadata["closest_match"] = _build_closest_match_metadata(
1169
+ self.file_state,
1170
+ target,
1171
+ closest_match,
1172
+ )
1173
+ return ToolResult.error_result(
1174
+ name,
1175
+ "old_string not found in file.",
1176
+ metadata=metadata,
1177
+ ).to_json()
1178
+ occurrences = len(matches)
1179
+ if occurrences > 1 and not replace_all:
1180
+ return ToolResult.error_result(
1181
+ name,
1182
+ "old_string is not unique; use snippet_id, replace_all, or provide more context.",
1183
+ metadata={
1184
+ "occurrences": occurrences,
1185
+ "match_count": occurrences,
1186
+ "scope": _format_scope_metadata(target, snippet, scope, text),
1187
+ "candidates": _build_candidate_metadata(
1188
+ self.file_state,
1189
+ target,
1190
+ text,
1191
+ matches,
1192
+ ),
1193
+ },
1194
+ ).to_json()
1195
+ line_endings = text_metadata.line_endings
1196
+ normalized_new = _normalize_line_endings(replacement_new, line_endings)
1197
+ updated = _apply_replacements(text, matches, normalized_new, replace_all)
1198
+ _write_text_with_encoding(target, updated, text_metadata.encoding)
1199
+ self.file_state.mark_written(target)
1200
+ diff = _unified_diff(text, updated, path=str(target))
1201
+ metadata = {
1202
+ "path": str(target),
1203
+ "file_path": str(target),
1204
+ "occurrences": occurrences if replace_all else 1,
1205
+ "matched_via": matched_via,
1206
+ "encoding": text_metadata.encoding,
1207
+ "line_endings": line_endings,
1208
+ "read_scope_type": "snippet" if snippet is not None else "full",
1209
+ "diff": diff,
1210
+ "diff_preview": diff,
1211
+ }
1212
+ if snippet is not None:
1213
+ metadata["scope"] = _format_scope_metadata(target, snippet, scope, text)
1214
+ return ToolResult.ok_result(name, f"Edited {target}", metadata=metadata).to_json()
1215
+
1216
+ def bash(self, command: str, timeout_ms: int = 120_000) -> str:
1217
+ name = "bash"
1218
+ timeout = max(timeout_ms, 1) / 1000
1219
+ marker = f"__DEEPY_CWD_{uuid.uuid4().hex}__"
1220
+ shell_path, shell_args = _build_shell_command(command, marker)
1221
+ process: subprocess.Popen[str] | None = None
1222
+ process_id: str | None = None
1223
+ try:
1224
+ with (
1225
+ tempfile.TemporaryFile(mode="w+", encoding="utf-8", errors="replace") as stdout_file,
1226
+ tempfile.TemporaryFile(mode="w+", encoding="utf-8", errors="replace") as stderr_file,
1227
+ ):
1228
+ process = subprocess.Popen(
1229
+ [shell_path, *shell_args],
1230
+ cwd=self.cwd,
1231
+ text=True,
1232
+ stdout=stdout_file,
1233
+ stderr=stderr_file,
1234
+ stdin=subprocess.DEVNULL,
1235
+ start_new_session=os.name != "nt",
1236
+ )
1237
+ process_id = str(process.pid)
1238
+ self.running_processes[process_id] = {
1239
+ "startTime": _now_iso(),
1240
+ "command": command,
1241
+ }
1242
+ try:
1243
+ process.wait(timeout=timeout)
1244
+ except subprocess.TimeoutExpired:
1245
+ _terminate_process(process)
1246
+ process.wait()
1247
+ stdout, stdout_capture_truncated = _read_captured_output(stdout_file)
1248
+ stderr, stderr_capture_truncated = _read_captured_output(stderr_file)
1249
+ output, output_truncated = _truncate_output((stdout or "") + (stderr or ""))
1250
+ return ToolResult.error_result(
1251
+ name,
1252
+ f"Command timed out after {timeout_ms}ms.",
1253
+ output=output,
1254
+ metadata={
1255
+ "cwd": str(self.cwd),
1256
+ "timeoutMs": timeout_ms,
1257
+ "processId": process_id,
1258
+ "shellPath": shell_path,
1259
+ "interrupted": True,
1260
+ "outputTruncated": output_truncated,
1261
+ "captureTruncated": stdout_capture_truncated
1262
+ or stderr_capture_truncated,
1263
+ },
1264
+ ).to_json()
1265
+ stdout, stdout_capture_truncated = _read_captured_output(stdout_file)
1266
+ stderr, stderr_capture_truncated = _read_captured_output(stderr_file)
1267
+ finally:
1268
+ if process_id is not None:
1269
+ self.running_processes.pop(process_id, None)
1270
+
1271
+ stdout, final_cwd, exit_code = _extract_bash_sentinel(stdout or "", marker)
1272
+ if final_cwd is not None and final_cwd.is_dir():
1273
+ self.cwd = final_cwd
1274
+ returncode = exit_code if exit_code is not None else process.returncode
1275
+ output, output_truncated = _truncate_output(stdout + (stderr or ""))
1276
+ result = ToolResult.ok_result if returncode == 0 else ToolResult.error_result
1277
+ if returncode == 0:
1278
+ return result(
1279
+ name,
1280
+ output,
1281
+ metadata={
1282
+ "cwd": str(self.cwd),
1283
+ "exitCode": returncode,
1284
+ "processId": process_id,
1285
+ "shellPath": shell_path,
1286
+ "outputTruncated": output_truncated,
1287
+ "captureTruncated": stdout_capture_truncated or stderr_capture_truncated,
1288
+ },
1289
+ ).to_json()
1290
+ return result(
1291
+ name,
1292
+ f"Command exited with code {returncode}.",
1293
+ output=output,
1294
+ metadata={
1295
+ "cwd": str(self.cwd),
1296
+ "exitCode": returncode,
1297
+ "processId": process_id,
1298
+ "shellPath": shell_path,
1299
+ "outputTruncated": output_truncated,
1300
+ "captureTruncated": stdout_capture_truncated or stderr_capture_truncated,
1301
+ },
1302
+ ).to_json()
1303
+
1304
+ def ask_user_question(self, questions: object) -> str:
1305
+ parsed_questions, error = _parse_ask_user_questions(questions)
1306
+ if error is not None:
1307
+ return ToolResult.error_result("AskUserQuestion", error).to_json()
1308
+ return ToolResult(
1309
+ ok=True,
1310
+ name="AskUserQuestion",
1311
+ output=_build_question_summary(parsed_questions),
1312
+ metadata={"kind": "ask_user_question", "questions": parsed_questions},
1313
+ awaitUserResponse=True,
1314
+ ).to_json()
1315
+
1316
+ def web_search(self, query: str) -> str:
1317
+ name = "WebSearch"
1318
+ if not query.strip():
1319
+ return ToolResult.error_result(name, 'Missing required "query" string.').to_json()
1320
+ command = self.settings.tools.web_search.command
1321
+ if command:
1322
+ return self._web_search_command(query, command)
1323
+ api_url = self.settings.tools.web_search.api_url
1324
+ if api_url:
1325
+ return self._web_search_api(query, api_url)
1326
+ return self._web_search_builtin(query)
1327
+
1328
+ def web_fetch(self, url: str) -> str:
1329
+ name = "WebFetch"
1330
+ target_url, validation_error = _validate_web_fetch_url(url)
1331
+ if validation_error is not None or target_url is None:
1332
+ return ToolResult.error_result(name, validation_error or 'Missing required "url" string.').to_json()
1333
+
1334
+ activity_label = f"WebFetch: {target_url}"
1335
+ activity_id = f"web-fetch-{uuid.uuid4().hex}"
1336
+ self.running_processes[activity_id] = {
1337
+ "startTime": _now_iso(),
1338
+ "command": activity_label,
1339
+ }
1340
+ request = urllib.request.Request(
1341
+ target_url,
1342
+ headers={
1343
+ "User-Agent": (
1344
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
1345
+ "AppleWebKit/537.36 (KHTML, like Gecko) Deepy/0.1"
1346
+ ),
1347
+ "Accept": "text/html,application/xhtml+xml,text/plain;q=0.9,*/*;q=0.8",
1348
+ },
1349
+ method="GET",
1350
+ )
1351
+ try:
1352
+ with urllib.request.urlopen(request, timeout=30) as response:
1353
+ final_url = response.geturl()
1354
+ content_type = response.headers.get("Content-Type", "")
1355
+ body = response.read(MAX_WEB_FETCH_BYTES + 1)
1356
+ except Exception as exc:
1357
+ return ToolResult.error_result(
1358
+ name,
1359
+ f"WebFetch request failed: {exc}",
1360
+ metadata={
1361
+ "url": target_url,
1362
+ "activityLabel": activity_label,
1363
+ },
1364
+ ).to_json()
1365
+ finally:
1366
+ self.running_processes.pop(activity_id, None)
1367
+
1368
+ bytes_truncated = len(body) > MAX_WEB_FETCH_BYTES
1369
+ body = body[:MAX_WEB_FETCH_BYTES]
1370
+ charset = _charset_from_content_type(content_type)
1371
+ decoded = body.decode(charset, errors="replace")
1372
+ if _is_html_response(content_type, decoded):
1373
+ title, readable_text = _extract_readable_html(decoded)
1374
+ else:
1375
+ title = ""
1376
+ readable_text = decoded.strip()
1377
+ output = _format_web_fetch_output(
1378
+ url=target_url,
1379
+ final_url=final_url,
1380
+ content_type=content_type,
1381
+ title=title,
1382
+ text=readable_text,
1383
+ bytes_truncated=bytes_truncated,
1384
+ )
1385
+ output, output_truncated = _truncate_output(output, MAX_WEB_FETCH_OUTPUT_CHARS)
1386
+ return ToolResult.ok_result(
1387
+ name,
1388
+ output,
1389
+ metadata={
1390
+ "url": target_url,
1391
+ "finalUrl": final_url,
1392
+ "contentType": content_type,
1393
+ "charset": charset,
1394
+ "byteCount": len(body),
1395
+ "bodyTruncated": bytes_truncated,
1396
+ "outputTruncated": output_truncated,
1397
+ "activityLabel": activity_label,
1398
+ },
1399
+ ).to_json()
1400
+
1401
+ def _web_search_command(self, query: str, command: str) -> str:
1402
+ name = "WebSearch"
1403
+ prepared = _prepare_web_search_query(query)
1404
+ activity_label = _format_web_search_activity_label(query)
1405
+ process: subprocess.Popen[str] | None = None
1406
+ try:
1407
+ process = subprocess.Popen(
1408
+ f"{command} {shlex.quote(prepared.resolved_query)}",
1409
+ shell=True,
1410
+ cwd=self.cwd,
1411
+ text=True,
1412
+ stdout=subprocess.PIPE,
1413
+ stderr=subprocess.PIPE,
1414
+ stdin=subprocess.DEVNULL,
1415
+ executable="/bin/zsh",
1416
+ )
1417
+ process_id = str(process.pid)
1418
+ self.running_processes[process_id] = {
1419
+ "startTime": _now_iso(),
1420
+ "command": activity_label,
1421
+ }
1422
+ stdout, stderr = process.communicate(timeout=60)
1423
+ except subprocess.TimeoutExpired:
1424
+ if process is not None:
1425
+ _terminate_process(process)
1426
+ stdout, stderr = process.communicate()
1427
+ self.running_processes.pop(str(process.pid), None)
1428
+ output, output_truncated = _truncate_output((stdout or "") + (stderr or ""))
1429
+ return ToolResult.error_result(
1430
+ name,
1431
+ "WebSearch command timed out after 60000ms.",
1432
+ output=output,
1433
+ metadata={
1434
+ **prepared.metadata(),
1435
+ "activityLabel": activity_label,
1436
+ "outputTruncated": output_truncated,
1437
+ "interrupted": True,
1438
+ },
1439
+ ).to_json()
1440
+ finally:
1441
+ if process is not None:
1442
+ self.running_processes.pop(str(process.pid), None)
1443
+ output = (stdout or "") + (stderr or "")
1444
+ output, output_truncated = _truncate_output(output)
1445
+ if process.returncode != 0:
1446
+ return ToolResult.error_result(
1447
+ name,
1448
+ f"WebSearch command exited with code {process.returncode}.",
1449
+ output=output,
1450
+ metadata={
1451
+ **prepared.metadata(),
1452
+ "exitCode": process.returncode,
1453
+ "activityLabel": activity_label,
1454
+ "outputTruncated": output_truncated,
1455
+ },
1456
+ ).to_json()
1457
+ return ToolResult.ok_result(
1458
+ name,
1459
+ output,
1460
+ metadata={
1461
+ **prepared.metadata(),
1462
+ "exitCode": process.returncode,
1463
+ "activityLabel": activity_label,
1464
+ "outputTruncated": output_truncated,
1465
+ },
1466
+ ).to_json()
1467
+
1468
+ def _web_search_builtin(self, query: str) -> str:
1469
+ name = "WebSearch"
1470
+ prepared, prepare_error = _prepare_web_search_query_with_llm(query, self.settings)
1471
+ search_url = (
1472
+ DEFAULT_WEB_SEARCH_URL
1473
+ + "?"
1474
+ + urllib.parse.urlencode({"q": prepared.resolved_query}, doseq=False)
1475
+ )
1476
+ activity_label = _format_web_search_activity_label(prepared.resolved_query)
1477
+ activity_id = f"web-search-{uuid.uuid4().hex}"
1478
+ self.running_processes[activity_id] = {
1479
+ "startTime": _now_iso(),
1480
+ "command": activity_label,
1481
+ }
1482
+ request = urllib.request.Request(
1483
+ search_url,
1484
+ headers={
1485
+ "User-Agent": (
1486
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
1487
+ "AppleWebKit/537.36 (KHTML, like Gecko) Deepy/0.1"
1488
+ )
1489
+ },
1490
+ method="GET",
1491
+ )
1492
+ try:
1493
+ with urllib.request.urlopen(request, timeout=30) as response:
1494
+ body = response.read().decode("utf-8", errors="replace")
1495
+ except Exception as exc:
1496
+ return ToolResult.error_result(
1497
+ name,
1498
+ f"WebSearch request failed: {exc}",
1499
+ metadata={
1500
+ **prepared.metadata(),
1501
+ "backend": "duckduckgo_html",
1502
+ "searchUrl": search_url,
1503
+ "activityLabel": activity_label,
1504
+ **({"queryPreparationWarning": prepare_error} if prepare_error else {}),
1505
+ },
1506
+ ).to_json()
1507
+ finally:
1508
+ self.running_processes.pop(activity_id, None)
1509
+
1510
+ results = _parse_search_results(body)
1511
+ if not results:
1512
+ return ToolResult.error_result(
1513
+ name,
1514
+ "WebSearch returned no parseable results.",
1515
+ metadata={
1516
+ **prepared.metadata(),
1517
+ "backend": "duckduckgo_html",
1518
+ "searchUrl": search_url,
1519
+ "activityLabel": activity_label,
1520
+ **({"queryPreparationWarning": prepare_error} if prepare_error else {}),
1521
+ },
1522
+ ).to_json()
1523
+ return ToolResult.ok_result(
1524
+ name,
1525
+ _format_search_results(prepared.resolved_query, results),
1526
+ metadata={
1527
+ **prepared.metadata(),
1528
+ "backend": "duckduckgo_html",
1529
+ "searchUrl": search_url,
1530
+ "activityLabel": activity_label,
1531
+ "resultCount": min(len(results), DEFAULT_WEB_SEARCH_RESULTS),
1532
+ **({"queryPreparationWarning": prepare_error} if prepare_error else {}),
1533
+ },
1534
+ ).to_json()
1535
+
1536
+ def _web_search_api(self, query: str, api_url: str) -> str:
1537
+ name = "WebSearch"
1538
+ prepared, prepare_error = _prepare_web_search_query_with_llm(query, self.settings)
1539
+ if prepare_error is not None:
1540
+ return ToolResult.error_result(
1541
+ name,
1542
+ f"WebSearch custom API mode failed: {prepare_error}",
1543
+ metadata={"query": query, "apiUrl": api_url},
1544
+ ).to_json()
1545
+ machine_id = self.settings.tools.web_search.machine_id
1546
+ if not machine_id:
1547
+ return ToolResult.error_result(
1548
+ name,
1549
+ "WebSearch custom API mode requires machine_id in the TOML tools.web_search config.",
1550
+ metadata={**prepared.metadata(), "apiUrl": api_url},
1551
+ ).to_json()
1552
+ body = json_utils.dumps({"query": prepared.resolved_query}).encode("utf-8")
1553
+ request = urllib.request.Request(
1554
+ api_url,
1555
+ data=body,
1556
+ headers={
1557
+ "Content-Type": "application/json",
1558
+ "Token": machine_id,
1559
+ },
1560
+ method="POST",
1561
+ )
1562
+ try:
1563
+ with urllib.request.urlopen(request, timeout=30) as response:
1564
+ body = response.read().decode("utf-8", errors="replace")
1565
+ except Exception as exc:
1566
+ return ToolResult.error_result(
1567
+ name,
1568
+ f"WebSearch API request failed: {exc}",
1569
+ metadata={**prepared.metadata(), "apiUrl": api_url},
1570
+ ).to_json()
1571
+ output = body.strip()
1572
+ try:
1573
+ payload = json_utils.loads(body)
1574
+ except json_utils.JSONDecodeError:
1575
+ payload = None
1576
+ if isinstance(payload, dict):
1577
+ result = payload.get("result")
1578
+ if isinstance(result, str) and result.strip():
1579
+ output = result.strip()
1580
+ if not output:
1581
+ return ToolResult.error_result(
1582
+ name,
1583
+ "WebSearch custom API mode failed: The web search response was empty.",
1584
+ metadata={**prepared.metadata(), "apiUrl": api_url},
1585
+ ).to_json()
1586
+ return ToolResult.ok_result(
1587
+ name,
1588
+ output,
1589
+ metadata={**prepared.metadata(), "apiUrl": api_url, "usedMachineId": bool(machine_id)},
1590
+ ).to_json()
1591
+
1592
+
1593
+ def _unified_diff(old: str, new: str, *, path: str) -> str:
1594
+ return "".join(
1595
+ unified_diff(
1596
+ old.splitlines(keepends=True),
1597
+ new.splitlines(keepends=True),
1598
+ fromfile=f"a/{path}",
1599
+ tofile=f"b/{path}",
1600
+ )
1601
+ )
1602
+
1603
+
1604
+ def _read_text_preserving_newlines(path: Path) -> str:
1605
+ return _read_text_metadata(path).content
1606
+
1607
+
1608
+ def _read_text_metadata(path: Path) -> TextFileMetadata:
1609
+ data = path.read_bytes()
1610
+ encoding = _detect_text_encoding(data)
1611
+ python_encoding = "utf-16" if encoding == "utf16le" else "utf-8"
1612
+ text = data.decode(python_encoding, errors="replace")
1613
+ return TextFileMetadata(
1614
+ content=text,
1615
+ encoding=encoding,
1616
+ line_endings=_detect_line_endings(text),
1617
+ )
1618
+
1619
+
1620
+ def _detect_text_encoding(data: bytes) -> str:
1621
+ if len(data) >= 2 and data[0] == 0xFF and data[1] == 0xFE:
1622
+ return "utf16le"
1623
+ return "utf8"
1624
+
1625
+
1626
+ def _write_text_with_encoding(path: Path, content: str, encoding: str) -> None:
1627
+ python_encoding = "utf-16" if encoding == "utf16le" else "utf-8"
1628
+ path.write_text(content, encoding=python_encoding)
1629
+
1630
+
1631
+ def _coerce_write_content(path: Path, content: object) -> tuple[str, dict[str, object], str | None]:
1632
+ if isinstance(content, str):
1633
+ return content, {}, None
1634
+ if path.suffix.lower() == ".json" and content is not None and not isinstance(content, bytes):
1635
+ try:
1636
+ return (
1637
+ json_utils.dumps_pretty(content),
1638
+ {"input_repaired": True, "repair_kind": "json-stringify-content"},
1639
+ None,
1640
+ )
1641
+ except TypeError as exc:
1642
+ return "", {}, f"JSON content is not serializable: {exc}"
1643
+ return "", {}, "content must be a string."
1644
+
1645
+
1646
+ def _format_notebook(path: Path) -> tuple[str, str | None]:
1647
+ raw = _read_text_preserving_newlines(path)
1648
+ if not raw:
1649
+ return "WARNING: File is empty.", None
1650
+ try:
1651
+ parsed = json_utils.loads(raw)
1652
+ except json_utils.JSONDecodeError as exc:
1653
+ return "", f"Failed to parse notebook JSON: {exc}"
1654
+ if not isinstance(parsed, dict):
1655
+ return "WARNING: Notebook has no cells.", None
1656
+
1657
+ cells = parsed.get("cells")
1658
+ lines: list[str] = []
1659
+ if isinstance(cells, list):
1660
+ for index, cell in enumerate(cells):
1661
+ if not isinstance(cell, dict):
1662
+ continue
1663
+ cell_type = cell.get("cell_type") if isinstance(cell.get("cell_type"), str) else "unknown"
1664
+ lines.append(f"# Cell {index + 1} ({cell_type})")
1665
+ lines.extend(_normalize_notebook_field(cell.get("source")))
1666
+
1667
+ outputs = cell.get("outputs")
1668
+ if not isinstance(outputs, list):
1669
+ continue
1670
+ for output_index, output in enumerate(outputs):
1671
+ if not isinstance(output, dict):
1672
+ continue
1673
+ output_type = (
1674
+ output.get("output_type")
1675
+ if isinstance(output.get("output_type"), str)
1676
+ else "output"
1677
+ )
1678
+ lines.append(f"# Output {output_index + 1} ({output_type})")
1679
+ lines.extend(_format_notebook_output(output))
1680
+
1681
+ if not lines:
1682
+ return "WARNING: Notebook has no cells.", None
1683
+ return "\n".join(f"{idx + 1}: {line}" for idx, line in enumerate(lines)), None
1684
+
1685
+
1686
+ def _normalize_notebook_field(value: object) -> list[str]:
1687
+ if isinstance(value, list):
1688
+ return [str(item).removesuffix("\n").removesuffix("\r") for item in value]
1689
+ if isinstance(value, str):
1690
+ return value.splitlines()
1691
+ return []
1692
+
1693
+
1694
+ def _format_notebook_output(output: dict[str, object]) -> list[str]:
1695
+ lines = _normalize_notebook_field(output.get("text"))
1696
+ data = output.get("data")
1697
+ if isinstance(data, dict):
1698
+ lines.extend(_normalize_notebook_field(data.get("text/plain")))
1699
+ image_png = data.get("image/png")
1700
+ if isinstance(image_png, str):
1701
+ lines.append(f"[image/png {len(image_png)} chars]")
1702
+ image_jpeg = data.get("image/jpeg")
1703
+ if isinstance(image_jpeg, str):
1704
+ lines.append(f"[image/jpeg {len(image_jpeg)} chars]")
1705
+ traceback = output.get("traceback")
1706
+ if isinstance(traceback, list):
1707
+ lines.extend(str(item).removesuffix("\n").removesuffix("\r") for item in traceback)
1708
+ return lines or ["[output omitted]"]
1709
+
1710
+
1711
+ @dataclass(frozen=True)
1712
+ class PageRange:
1713
+ start: int
1714
+ end: int
1715
+
1716
+ @property
1717
+ def count(self) -> int:
1718
+ return self.end - self.start + 1
1719
+
1720
+ def label(self) -> str:
1721
+ return f"{self.start}-{self.end}"
1722
+
1723
+
1724
+ def _read_pdf(path: Path, pages: str | None) -> str:
1725
+ data = path.read_bytes()
1726
+ page_count = _count_pdf_pages(data)
1727
+ page_range, range_error = _parse_page_range(pages)
1728
+ if range_error is not None:
1729
+ return ToolResult.error_result("read", range_error, metadata={"path": str(path)}).to_json()
1730
+
1731
+ if page_range is None and page_count is not None and page_count > PDF_LARGE_PAGE_THRESHOLD:
1732
+ return ToolResult.error_result(
1733
+ "read",
1734
+ f'PDF has {page_count} pages; provide "pages" to read a range.',
1735
+ metadata={"path": str(path), "pageCount": page_count},
1736
+ ).to_json()
1737
+ if page_range is not None and page_range.count > PDF_MAX_PAGE_RANGE:
1738
+ return ToolResult.error_result(
1739
+ "read",
1740
+ f"PDF page range exceeds {PDF_MAX_PAGE_RANGE} pages.",
1741
+ metadata={"path": str(path), "pageCount": page_count},
1742
+ ).to_json()
1743
+ if page_range is not None and page_count is not None and page_range.end > page_count:
1744
+ return ToolResult.error_result(
1745
+ "read",
1746
+ f"PDF page range exceeds total page count ({page_count}).",
1747
+ metadata={"path": str(path), "pageCount": page_count},
1748
+ ).to_json()
1749
+
1750
+ encoded = base64.b64encode(data).decode("ascii")
1751
+ return ToolResult.ok_result(
1752
+ "read",
1753
+ f"data:application/pdf;base64,{encoded}",
1754
+ metadata={
1755
+ "path": str(path),
1756
+ "mime": "application/pdf",
1757
+ "encoding": "base64",
1758
+ "bytes": len(data),
1759
+ "pageCount": page_count,
1760
+ "pages": page_range.label() if page_range is not None else None,
1761
+ },
1762
+ ).to_json()
1763
+
1764
+
1765
+ def _count_pdf_pages(data: bytes) -> int | None:
1766
+ try:
1767
+ text = data.decode("latin1", errors="ignore")
1768
+ except Exception:
1769
+ return None
1770
+ return len(re.findall(r"/Type\s*/Page\b(?!s)", text))
1771
+
1772
+
1773
+ def _parse_page_range(value: str | None) -> tuple[PageRange | None, str | None]:
1774
+ if value is None or not value.strip():
1775
+ return None, None
1776
+ trimmed = value.strip()
1777
+ if "," in trimmed:
1778
+ return None, 'pages must be a single range like "1-5" or "3".'
1779
+ parts = [part.strip() for part in trimmed.split("-")]
1780
+ if len(parts) == 1:
1781
+ start, error = _parse_positive_int(parts[0], "pages")
1782
+ return (PageRange(start, start), None) if error is None else (None, error)
1783
+ if len(parts) == 2:
1784
+ start, start_error = _parse_positive_int(parts[0], "pages")
1785
+ if start_error is not None:
1786
+ return None, start_error
1787
+ end, end_error = _parse_positive_int(parts[1], "pages")
1788
+ if end_error is not None:
1789
+ return None, end_error
1790
+ if end < start:
1791
+ return None, "pages range end must be >= start."
1792
+ return PageRange(start, end), None
1793
+ return None, 'pages must be a single range like "1-5" or "3".'
1794
+
1795
+
1796
+ def _parse_positive_int(value: str, label: str) -> tuple[int, str | None]:
1797
+ try:
1798
+ numeric = float(value)
1799
+ except ValueError:
1800
+ return 0, f"{label} must be a number."
1801
+ if not math.isfinite(numeric):
1802
+ return 0, f"{label} must be a number."
1803
+ integer = int(numeric)
1804
+ if integer < 1:
1805
+ return 0, f"{label} must be >= 1."
1806
+ return integer, None
1807
+
1808
+
1809
+ IMAGE_MIME_TYPES = {
1810
+ ".png": "image/png",
1811
+ ".jpg": "image/jpeg",
1812
+ ".jpeg": "image/jpeg",
1813
+ ".gif": "image/gif",
1814
+ ".webp": "image/webp",
1815
+ ".bmp": "image/bmp",
1816
+ ".tif": "image/tiff",
1817
+ ".tiff": "image/tiff",
1818
+ ".svg": "image/svg+xml",
1819
+ ".ico": "image/x-icon",
1820
+ ".avif": "image/avif",
1821
+ }
1822
+
1823
+
1824
+ def _image_mime_type(suffix: str) -> str | None:
1825
+ return IMAGE_MIME_TYPES.get(suffix)
1826
+
1827
+
1828
+ def _build_image_follow_up_message(path: Path, mime: str, data: bytes) -> dict[str, object]:
1829
+ encoded = base64.b64encode(data).decode("ascii")
1830
+ return {
1831
+ "role": "system",
1832
+ "content": [
1833
+ {
1834
+ "type": "input_text",
1835
+ "text": (
1836
+ f"The read tool has loaded `{path.name}`. "
1837
+ "Use the attached image content to answer the original request."
1838
+ ),
1839
+ },
1840
+ {
1841
+ "type": "input_image",
1842
+ "image_url": f"data:{mime};base64,{encoded}",
1843
+ },
1844
+ ],
1845
+ }
1846
+
1847
+
1848
+ def _detect_line_endings(text: str) -> str:
1849
+ return "CRLF" if "\r\n" in text else "LF"
1850
+
1851
+
1852
+ def _normalize_line_endings(text: str, line_endings: str) -> str:
1853
+ normalized = text.replace("\r\n", "\n").replace("\r", "\n")
1854
+ return normalized.replace("\n", "\r\n") if line_endings == "CRLF" else normalized
1855
+
1856
+
1857
+ def _truncate_line(line: str) -> str:
1858
+ if len(line) <= MAX_LINE_LENGTH:
1859
+ return line
1860
+ return line[:MAX_LINE_LENGTH] + "... [truncated]"
1861
+
1862
+
1863
+ def _truncate_output(output: str, max_chars: int = MAX_BASH_OUTPUT_CHARS) -> tuple[str, bool]:
1864
+ if len(output) <= max_chars:
1865
+ return output, False
1866
+ omitted = len(output) - max_chars
1867
+ return output[:max_chars] + f"\n... [truncated {omitted} chars]", True
1868
+
1869
+
1870
+ def _read_captured_output(stream) -> tuple[str, bool]:
1871
+ stream.flush()
1872
+ stream.seek(0)
1873
+ text = stream.read(MAX_BASH_CAPTURE_CHARS + 1)
1874
+ if len(text) <= MAX_BASH_CAPTURE_CHARS:
1875
+ return text, False
1876
+ return text[:MAX_BASH_CAPTURE_CHARS], True
1877
+
1878
+
1879
+ def _build_shell_command(command: str, marker: str) -> tuple[str, list[str]]:
1880
+ shell_path = _resolve_shell_path()
1881
+ normalized_command = rewrite_windows_null_redirect(command)
1882
+ parts = [
1883
+ part
1884
+ for part in (
1885
+ build_shell_init_command(shell_path),
1886
+ build_disable_extglob_command(shell_path),
1887
+ normalized_command,
1888
+ "__deepy_exit=$?",
1889
+ f"printf '\\n{marker}CWD=%s\\n{marker}EXIT=%s\\n' \"$PWD\" \"$__deepy_exit\"",
1890
+ "exit $__deepy_exit",
1891
+ )
1892
+ if part
1893
+ ]
1894
+ return shell_path, ["-c", "{ " + "; ".join(parts) + "; } < /dev/null"]
1895
+
1896
+
1897
+ def _resolve_shell_path() -> str:
1898
+ shell_path = os.environ.get("SHELL")
1899
+ if shell_path:
1900
+ return shell_path
1901
+ return "/bin/zsh" if Path("/bin/zsh").exists() else "/bin/sh"
1902
+
1903
+
1904
+ def _now_iso() -> str:
1905
+ return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
1906
+
1907
+
1908
+ def _terminate_process(process: subprocess.Popen[str]) -> None:
1909
+ try:
1910
+ if os.name != "nt":
1911
+ os.killpg(process.pid, signal.SIGKILL)
1912
+ else:
1913
+ process.kill()
1914
+ except OSError:
1915
+ return
1916
+
1917
+
1918
+ def _format_directory_entries(path: Path, project_root: Path) -> tuple[str, int, int]:
1919
+ lines: list[str] = []
1920
+ ignored_count = 0
1921
+ gitignore = _load_gitignore_matcher(project_root)
1922
+ for entry in sorted(path.iterdir(), key=lambda item: (not item.is_dir(), item.name.lower())):
1923
+ if _is_ignored_entry(entry, project_root, gitignore):
1924
+ ignored_count += 1
1925
+ continue
1926
+ suffix = "/" if entry.is_dir() else ""
1927
+ try:
1928
+ size = entry.stat().st_size
1929
+ except OSError:
1930
+ size = 0
1931
+ lines.append(f"{entry.name}{suffix}\t{size}")
1932
+ return "\n".join(lines), len(lines), ignored_count
1933
+
1934
+
1935
+ def _normalize_relative_suffix(path: str) -> str:
1936
+ suffix = path.replace("\\", "/").strip("/")
1937
+ parts = [part for part in suffix.split("/") if part and part != "."]
1938
+ return "/".join(parts)
1939
+
1940
+
1941
+ def _find_suffix_matches(root: Path, suffix: str) -> list[Path]:
1942
+ matches: list[Path] = []
1943
+ gitignore = _load_gitignore_matcher(root)
1944
+ for current, dirnames, filenames in os.walk(root):
1945
+ dirnames[:] = [
1946
+ dirname
1947
+ for dirname in dirnames
1948
+ if not _is_ignored_entry(Path(current) / dirname, root, gitignore)
1949
+ ]
1950
+ current_path = Path(current)
1951
+ for filename in filenames:
1952
+ full_path = current_path / filename
1953
+ if _is_ignored_entry(full_path, root, gitignore):
1954
+ continue
1955
+ try:
1956
+ relative = full_path.relative_to(root).as_posix()
1957
+ except ValueError:
1958
+ continue
1959
+ if relative.endswith(suffix):
1960
+ matches.append(full_path.resolve())
1961
+ return matches
1962
+
1963
+
1964
+ def _is_ignored_entry(
1965
+ path: Path,
1966
+ project_root: Path,
1967
+ gitignore: "GitignoreMatcher",
1968
+ ) -> bool:
1969
+ if path.name in IGNORED_DIRECTORY_ENTRIES:
1970
+ return True
1971
+ try:
1972
+ relative = path.relative_to(project_root).as_posix()
1973
+ except ValueError:
1974
+ return False
1975
+ return gitignore.ignores(relative, path.is_dir())
1976
+
1977
+
1978
+ @dataclass(frozen=True)
1979
+ class GitignorePattern:
1980
+ pattern: str
1981
+ negated: bool = False
1982
+
1983
+
1984
+ @dataclass(frozen=True)
1985
+ class GitignoreMatcher:
1986
+ patterns: tuple[GitignorePattern, ...]
1987
+
1988
+ def ignores(self, relative_path: str, is_dir: bool) -> bool:
1989
+ normalized = relative_path.strip("/")
1990
+ if not normalized:
1991
+ return False
1992
+ ignored = False
1993
+ for item in self.patterns:
1994
+ if _gitignore_pattern_matches(item.pattern, normalized, is_dir):
1995
+ ignored = not item.negated
1996
+ return ignored
1997
+
1998
+
1999
+ def _load_gitignore_matcher(project_root: Path) -> GitignoreMatcher:
2000
+ gitignore = project_root / ".gitignore"
2001
+ if not gitignore.is_file():
2002
+ return GitignoreMatcher(())
2003
+ patterns: list[GitignorePattern] = []
2004
+ for raw_line in gitignore.read_text(encoding="utf-8", errors="replace").splitlines():
2005
+ line = raw_line.strip()
2006
+ if not line or line.startswith("#"):
2007
+ continue
2008
+ negated = line.startswith("!")
2009
+ if negated:
2010
+ line = line[1:].strip()
2011
+ if line:
2012
+ patterns.append(GitignorePattern(line.replace("\\", "/"), negated))
2013
+ return GitignoreMatcher(tuple(patterns))
2014
+
2015
+
2016
+ def _gitignore_pattern_matches(pattern: str, relative_path: str, is_dir: bool) -> bool:
2017
+ directory_only = pattern.endswith("/")
2018
+ normalized_pattern = pattern.strip("/")
2019
+ if not normalized_pattern:
2020
+ return False
2021
+ if directory_only and not is_dir:
2022
+ return relative_path.startswith(normalized_pattern + "/")
2023
+ if "/" in normalized_pattern:
2024
+ return fnmatch(relative_path, normalized_pattern) or relative_path.startswith(
2025
+ normalized_pattern + "/"
2026
+ )
2027
+ parts = relative_path.split("/")
2028
+ return any(fnmatch(part, normalized_pattern) for part in parts)
2029
+
2030
+
2031
+ def _parse_ask_user_questions(value: object) -> tuple[list[dict[str, object]], str | None]:
2032
+ if not isinstance(value, list) or not value:
2033
+ return [], '"questions" must be a non-empty array.'
2034
+
2035
+ questions: list[dict[str, object]] = []
2036
+ for index, item in enumerate(value):
2037
+ if not isinstance(item, dict):
2038
+ return [], f"Question at index {index} must be an object."
2039
+
2040
+ question = _trimmed_string(item.get("question"))
2041
+ if not question:
2042
+ return [], f'Question at index {index} is missing a non-empty "question" string.'
2043
+
2044
+ raw_options = item.get("options")
2045
+ if not isinstance(raw_options, list) or not raw_options:
2046
+ return [], f'Question at index {index} must include a non-empty "options" array.'
2047
+
2048
+ options: list[dict[str, str]] = []
2049
+ for option_index, option in enumerate(raw_options):
2050
+ if not isinstance(option, dict):
2051
+ return [], f"Option {option_index} for question {index} must be an object."
2052
+
2053
+ label = _trimmed_string(option.get("label"))
2054
+ if not label:
2055
+ return (
2056
+ [],
2057
+ f'Option {option_index} for question {index} is missing a non-empty "label" string.',
2058
+ )
2059
+
2060
+ parsed_option = {"label": label}
2061
+ description = _trimmed_string(option.get("description"))
2062
+ if description:
2063
+ parsed_option["description"] = description
2064
+ options.append(parsed_option)
2065
+
2066
+ parsed_question: dict[str, object] = {
2067
+ "question": question,
2068
+ "options": options,
2069
+ }
2070
+ multi_select = item.get("multiSelect")
2071
+ if isinstance(multi_select, bool):
2072
+ parsed_question["multiSelect"] = multi_select
2073
+ questions.append(parsed_question)
2074
+
2075
+ return questions, None
2076
+
2077
+
2078
+ def _build_question_summary(questions: list[dict[str, object]]) -> str:
2079
+ lines = ["Waiting for user input."]
2080
+ for index, item in enumerate(questions):
2081
+ lines.append("")
2082
+ lines.append(f"{index + 1}. {item['question']}")
2083
+ lines.append(f" Mode: {'multi-select' if item.get('multiSelect') else 'single-select'}")
2084
+ for option in item["options"]:
2085
+ if not isinstance(option, dict):
2086
+ continue
2087
+ lines.append(f" - {option['label']}")
2088
+ if option.get("description"):
2089
+ lines.append(f" {option['description']}")
2090
+ lines.append(" - Other")
2091
+ return "\n".join(lines)
2092
+
2093
+
2094
+ def _trimmed_string(value: object) -> str:
2095
+ return value.strip() if isinstance(value, str) else ""
2096
+
2097
+
2098
+ def _extract_bash_sentinel(stdout: str, marker: str) -> tuple[str, Path | None, int | None]:
2099
+ start = stdout.rfind(f"\n{marker}CWD=")
2100
+ if start == -1:
2101
+ return stdout, None, None
2102
+ visible = stdout[:start]
2103
+ tail = stdout[start + 1 :].splitlines()
2104
+ cwd: Path | None = None
2105
+ exit_code: int | None = None
2106
+ for line in tail:
2107
+ if line.startswith(f"{marker}CWD="):
2108
+ cwd = Path(line.removeprefix(f"{marker}CWD=")).resolve()
2109
+ elif line.startswith(f"{marker}EXIT="):
2110
+ raw = line.removeprefix(f"{marker}EXIT=")
2111
+ if raw.isdigit():
2112
+ exit_code = int(raw)
2113
+ return visible, cwd, exit_code