mycode-sdk 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mycode/tools.py ADDED
@@ -0,0 +1,1275 @@
1
+ """Tool execution runtime.
2
+
3
+ Runtime exposes four built-in tools: ``read``, ``write``, ``edit``, ``bash``.
4
+ External callers register custom tools in two ways:
5
+
6
+ - Build a :class:`ToolSpec` directly (full control over JSON schema).
7
+ - Use :func:`tool` to wrap a plain Python function; the schema is inferred
8
+ from type hints.
9
+
10
+ Built-in and custom tools share one execution path: ``ToolExecutor.execute``.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import asyncio
16
+ import inspect
17
+ import json
18
+ import os
19
+ import queue
20
+ import shlex
21
+ import signal
22
+ import subprocess
23
+ import threading
24
+ import time
25
+ import typing
26
+ from base64 import b64encode
27
+ from collections import deque
28
+ from collections.abc import Callable, Sequence
29
+ from dataclasses import dataclass
30
+ from difflib import SequenceMatcher
31
+ from mimetypes import guess_type
32
+ from pathlib import Path
33
+ from typing import Any, Literal, TextIO, cast, get_args, get_origin, overload
34
+
35
+ from mycode.messages import image_block, text_block
36
+
37
+ # ---------------------------------------------------------------------------
38
+ # Limits (keep token usage low)
39
+ # ---------------------------------------------------------------------------
40
+ DEFAULT_MAX_LINES = 2000
41
+ DEFAULT_MAX_BYTES = 50 * 1024
42
+ READ_MAX_LINE_CHARS = 2000
43
+
44
+ BASH_TIMEOUT_SECONDS = 120
45
+ _BASH_MAX_IN_MEMORY_BYTES = 5_000_000
46
+
47
+
48
+ ToolOutputCallback = Callable[[str], None]
49
+
50
+
51
+ @dataclass(frozen=True)
52
+ class ToolExecutionResult:
53
+ """Structured tool result used by the runtime.
54
+
55
+ ``model_text`` is appended to session history for future provider replay.
56
+ ``display_text`` is shown to the user.
57
+ """
58
+
59
+ model_text: str
60
+ display_text: str
61
+ is_error: bool = False
62
+ content: list[dict[str, Any]] | None = None
63
+
64
+
65
+ ToolRunner = Callable[["ToolContext", dict[str, Any]], ToolExecutionResult]
66
+
67
+
68
+ @dataclass(frozen=True)
69
+ class ToolSpec:
70
+ """One tool the agent can call.
71
+
72
+ ``runner`` receives a :class:`ToolContext` and the raw argument dict from
73
+ the model. Tools that emit incremental output (currently only ``bash``)
74
+ set ``streams_output=True`` and write lines via ``ctx.emit``.
75
+ """
76
+
77
+ name: str
78
+ description: str
79
+ input_schema: dict[str, Any]
80
+ runner: ToolRunner
81
+ streams_output: bool = False
82
+
83
+
84
+ # ---------------------------------------------------------------------------
85
+ # Utilities
86
+ # ---------------------------------------------------------------------------
87
+
88
+
89
+ @dataclass(frozen=True)
90
+ class Truncation:
91
+ truncated: bool
92
+ truncated_by: str | None
93
+ output_lines: int
94
+ output_bytes: int
95
+
96
+
97
+ def truncate_text(
98
+ text: str,
99
+ *,
100
+ max_lines: int = DEFAULT_MAX_LINES,
101
+ max_bytes: int = DEFAULT_MAX_BYTES,
102
+ tail: bool = False,
103
+ ) -> tuple[str, Truncation]:
104
+ """Truncate text by both line and byte limits.
105
+
106
+ Returns (content, truncation).
107
+ """
108
+
109
+ lines = text.splitlines()
110
+ total_bytes = len(text.encode("utf-8"))
111
+ out_lines: list[str] = []
112
+ out_bytes = 0
113
+
114
+ source = reversed(lines) if tail else lines
115
+
116
+ for line in source:
117
+ if len(out_lines) >= max_lines:
118
+ break
119
+ b = len(line.encode("utf-8")) + 1 # +1 for newline
120
+ if out_bytes + b > max_bytes:
121
+ break
122
+ out_lines.append(line)
123
+ out_bytes += b
124
+
125
+ if tail:
126
+ out_lines.reverse()
127
+
128
+ # Edge case: a single line exceeds max_bytes — take the tail/head slice
129
+ if not out_lines and lines:
130
+ target = lines[-1] if tail else lines[0]
131
+ encoded = target.encode("utf-8")
132
+ sliced = encoded[-max_bytes:] if tail else encoded[:max_bytes]
133
+ content = sliced.decode("utf-8", errors="ignore")
134
+ return content, Truncation(
135
+ truncated=True,
136
+ truncated_by="bytes",
137
+ output_lines=1,
138
+ output_bytes=len(sliced),
139
+ )
140
+
141
+ content = "\n".join(out_lines)
142
+ truncated = len(out_lines) < len(lines) or out_bytes < total_bytes
143
+
144
+ truncated_by: str | None = None
145
+ if truncated:
146
+ if len(out_lines) < len(lines):
147
+ truncated_by = "lines" if len(out_lines) == max_lines else "bytes"
148
+ else:
149
+ truncated_by = "bytes"
150
+
151
+ trunc = Truncation(
152
+ truncated=truncated,
153
+ truncated_by=truncated_by,
154
+ output_lines=len(out_lines),
155
+ output_bytes=out_bytes,
156
+ )
157
+ return content, trunc
158
+
159
+
160
+ def resolve_path(path: str, *, cwd: str) -> str:
161
+ """Resolve path relative to cwd (without changing global process cwd)."""
162
+
163
+ p = Path(path).expanduser()
164
+ if not p.is_absolute():
165
+ p = Path(cwd) / p
166
+ return str(p.resolve(strict=False))
167
+
168
+
169
+ def _atomic_write_text(path: Path, content: str, *, newline: str | None = None) -> None:
170
+ tmp = path.with_suffix(path.suffix + ".tmp")
171
+ if newline is None:
172
+ tmp.write_text(content, encoding="utf-8")
173
+ else:
174
+ normalized = content.replace("\r\n", "\n")
175
+ if newline == "\r\n":
176
+ normalized = normalized.replace("\n", "\r\n")
177
+ with tmp.open("w", encoding="utf-8", newline="") as file:
178
+ file.write(normalized)
179
+ tmp.replace(path)
180
+
181
+
182
+ def detect_image_mime_type(path: Path) -> str | None:
183
+ try:
184
+ with path.open("rb") as file:
185
+ header = file.read(16)
186
+ except OSError:
187
+ return None
188
+
189
+ if header.startswith(b"\x89PNG\r\n\x1a\n"):
190
+ return "image/png"
191
+ if header.startswith(b"\xff\xd8\xff"):
192
+ return "image/jpeg"
193
+ if header.startswith((b"GIF87a", b"GIF89a")):
194
+ return "image/gif"
195
+ if header.startswith(b"RIFF") and header[8:12] == b"WEBP":
196
+ return "image/webp"
197
+ guessed, _ = guess_type(path.name)
198
+ if guessed in {"image/png", "image/jpeg", "image/gif", "image/webp"}:
199
+ return guessed
200
+ return None
201
+
202
+
203
+ def detect_document_mime_type(path: Path) -> str | None:
204
+ try:
205
+ with path.open("rb") as file:
206
+ if file.read(5).startswith(b"%PDF-"):
207
+ return "application/pdf"
208
+ except OSError:
209
+ pass
210
+ guessed, _ = guess_type(path.name)
211
+ return "application/pdf" if guessed == "application/pdf" else None
212
+
213
+
214
+ # ---------------------------------------------------------------------------
215
+ # Subprocess tracking for cancellation
216
+ # ---------------------------------------------------------------------------
217
+
218
+
219
+ _ACTIVE_PROCS: set[subprocess.Popen[str]] = set()
220
+ _ACTIVE_PROCS_LOCK = threading.Lock()
221
+
222
+
223
+ def _kill_proc_tree(proc: subprocess.Popen[str]) -> None:
224
+ try:
225
+ if os.name == "posix":
226
+ os.killpg(proc.pid, signal.SIGKILL)
227
+ else:
228
+ proc.kill()
229
+ except Exception:
230
+ try:
231
+ proc.kill()
232
+ except Exception:
233
+ pass
234
+
235
+
236
+ def cancel_all_tools() -> None:
237
+ """Terminate all running bash subprocesses across every executor."""
238
+
239
+ with _ACTIVE_PROCS_LOCK:
240
+ procs = list(_ACTIVE_PROCS)
241
+ _ACTIVE_PROCS.clear()
242
+
243
+ for proc in procs:
244
+ _kill_proc_tree(proc)
245
+
246
+
247
+ # ---------------------------------------------------------------------------
248
+ # Tool execution context
249
+ # ---------------------------------------------------------------------------
250
+
251
+
252
+ class ToolContext:
253
+ """Runtime context passed to a tool's ``runner``.
254
+
255
+ Exposes executor configuration, the executor itself (so custom tools can
256
+ invoke other registered tools via :meth:`call`), and streaming helpers for
257
+ tools declared with ``streams_output=True``.
258
+ """
259
+
260
+ def __init__(
261
+ self,
262
+ *,
263
+ executor: ToolExecutor,
264
+ tool_call_id: str | None = None,
265
+ emit: ToolOutputCallback | None = None,
266
+ ):
267
+ self.executor = executor
268
+ self.tool_call_id = tool_call_id
269
+ self.emit = emit
270
+
271
+ @property
272
+ def cwd(self) -> str:
273
+ return self.executor.cwd
274
+
275
+ @property
276
+ def session_dir(self) -> Path:
277
+ return self.executor.session_dir
278
+
279
+ @property
280
+ def tool_output_dir(self) -> Path:
281
+ return self.executor.tool_output_dir
282
+
283
+ @property
284
+ def supports_image_input(self) -> bool:
285
+ return self.executor.supports_image_input
286
+
287
+ def call(self, name: str, args: dict[str, Any]) -> ToolExecutionResult:
288
+ """Invoke another registered tool from inside this one.
289
+
290
+ ``tool_call_id`` and ``emit`` from the current context are forwarded,
291
+ so a streaming tool that delegates to ``bash`` keeps producing
292
+ ``tool_output`` events upstream.
293
+ """
294
+
295
+ return self.executor.execute(
296
+ name,
297
+ args,
298
+ tool_call_id=self.tool_call_id,
299
+ on_output=self.emit,
300
+ )
301
+
302
+
303
+ # ---------------------------------------------------------------------------
304
+ # Executor
305
+ # ---------------------------------------------------------------------------
306
+
307
+
308
+ class ToolExecutor:
309
+ """Execute tool calls for a single session."""
310
+
311
+ def __init__(
312
+ self,
313
+ *,
314
+ cwd: str,
315
+ session_dir: Path,
316
+ tools: Sequence[ToolSpec] | None = None,
317
+ supports_image_input: bool = False,
318
+ ):
319
+ self.cwd = str(Path(cwd).resolve(strict=False))
320
+ self.session_dir = session_dir
321
+ self.supports_image_input = supports_image_input
322
+ self.session_dir.mkdir(parents=True, exist_ok=True)
323
+ self.tool_output_dir = self.session_dir / "tool-output"
324
+ self.tool_output_dir.mkdir(parents=True, exist_ok=True)
325
+ self._active_procs: set[subprocess.Popen[str]] = set()
326
+ self._active_procs_lock = threading.Lock()
327
+
328
+ specs = tuple(tools if tools is not None else DEFAULT_TOOL_SPECS)
329
+ self.tool_specs: tuple[ToolSpec, ...] = specs
330
+ self._tools_by_name: dict[str, ToolSpec] = {}
331
+ for spec in specs:
332
+ if spec.name in self._tools_by_name:
333
+ raise ValueError(f"duplicate tool name: {spec.name}")
334
+ self._tools_by_name[spec.name] = spec
335
+
336
+ @property
337
+ def definitions(self) -> list[dict[str, Any]]:
338
+ """Return provider-facing tool definitions."""
339
+
340
+ return [
341
+ {
342
+ "name": spec.name,
343
+ "description": spec.description,
344
+ "input_schema": spec.input_schema,
345
+ }
346
+ for spec in self.tool_specs
347
+ ]
348
+
349
+ def get(self, name: str) -> ToolSpec | None:
350
+ """Return the registered spec for a tool name."""
351
+
352
+ return self._tools_by_name.get(name)
353
+
354
+ def execute(
355
+ self,
356
+ name: str,
357
+ args: dict[str, Any],
358
+ *,
359
+ tool_call_id: str | None = None,
360
+ on_output: ToolOutputCallback | None = None,
361
+ ) -> ToolExecutionResult:
362
+ """Execute one registered tool by name.
363
+
364
+ ``on_output`` is forwarded to the runner as ``ctx.emit`` for tools that
365
+ stream incremental output.
366
+ """
367
+
368
+ spec = self._tools_by_name.get(name)
369
+ if spec is None:
370
+ return ToolExecutionResult(
371
+ model_text=f"error: unknown tool: {name}",
372
+ display_text=f"Unknown tool: {name}",
373
+ is_error=True,
374
+ )
375
+ ctx = ToolContext(executor=self, tool_call_id=tool_call_id, emit=on_output)
376
+ return spec.runner(ctx, args)
377
+
378
+ def cancel_active(self) -> None:
379
+ """Terminate bash subprocesses started by this executor."""
380
+
381
+ with self._active_procs_lock:
382
+ procs = list(self._active_procs)
383
+ self._active_procs.clear()
384
+
385
+ for proc in procs:
386
+ with _ACTIVE_PROCS_LOCK:
387
+ _ACTIVE_PROCS.discard(proc)
388
+ _kill_proc_tree(proc)
389
+
390
+ def track_proc(self, proc: subprocess.Popen[str]) -> None:
391
+ """Register a subprocess so ``cancel_active`` and ``cancel_all_tools``
392
+ can terminate it if the agent turn is cancelled."""
393
+
394
+ with self._active_procs_lock:
395
+ self._active_procs.add(proc)
396
+ with _ACTIVE_PROCS_LOCK:
397
+ _ACTIVE_PROCS.add(proc)
398
+
399
+ def untrack_proc(self, proc: subprocess.Popen[str]) -> None:
400
+ """Remove a subprocess from the cancellation registry once it exits."""
401
+
402
+ with self._active_procs_lock:
403
+ self._active_procs.discard(proc)
404
+ with _ACTIVE_PROCS_LOCK:
405
+ _ACTIVE_PROCS.discard(proc)
406
+
407
+
408
+ # ---------------------------------------------------------------------------
409
+ # Built-in tool runners
410
+ # ---------------------------------------------------------------------------
411
+
412
+
413
+ def _run_read(ctx: ToolContext, args: dict[str, Any]) -> ToolExecutionResult:
414
+ """Read a text file or supported image file.
415
+
416
+ ``offset`` is 1-indexed. ``limit`` is the number of lines.
417
+ """
418
+
419
+ path = str(args.get("path") or "")
420
+ offset = args.get("offset")
421
+ limit = args.get("limit")
422
+ file_path = Path(resolve_path(path, cwd=ctx.cwd))
423
+
424
+ image_mime_type = detect_image_mime_type(file_path)
425
+ if image_mime_type:
426
+ if not ctx.supports_image_input:
427
+ return ToolExecutionResult(
428
+ model_text="error: image input is not supported by the current model",
429
+ display_text="Current model does not support image input",
430
+ is_error=True,
431
+ )
432
+ summary = f"Read image file [{image_mime_type}]"
433
+ try:
434
+ image_data = b64encode(file_path.read_bytes()).decode("utf-8")
435
+ except FileNotFoundError:
436
+ return ToolExecutionResult(
437
+ model_text=f"error: file not found: {path}",
438
+ display_text=f"File not found: {path}",
439
+ is_error=True,
440
+ )
441
+ except Exception as exc:
442
+ return ToolExecutionResult(
443
+ model_text=f"error: failed to read file: {exc}",
444
+ display_text=f"Failed to read file: {path}",
445
+ is_error=True,
446
+ )
447
+ return ToolExecutionResult(
448
+ model_text=summary,
449
+ display_text=summary,
450
+ content=[
451
+ text_block(summary),
452
+ image_block(image_data, mime_type=image_mime_type, name=file_path.name),
453
+ ],
454
+ )
455
+
456
+ start_line = offset if isinstance(offset, int) and offset > 0 else 1
457
+ line_limit = limit if isinstance(limit, int) and limit > 0 else DEFAULT_MAX_LINES
458
+ lines: list[str] = []
459
+ total_lines = 0
460
+ next_offset: int | None = None
461
+ first_shortened_line: int | None = None
462
+ shortened_lines = 0
463
+
464
+ try:
465
+ with file_path.open("r", encoding="utf-8") as f:
466
+ for total_lines, raw_line in enumerate(f, start=1):
467
+ if total_lines < start_line:
468
+ continue
469
+ if len(lines) >= line_limit:
470
+ next_offset = total_lines
471
+ break
472
+
473
+ line = raw_line.rstrip("\r\n")
474
+ if len(line) > READ_MAX_LINE_CHARS:
475
+ if first_shortened_line is None:
476
+ first_shortened_line = total_lines
477
+ shortened_lines += 1
478
+ line = line[:READ_MAX_LINE_CHARS] + " ... [line truncated]"
479
+ lines.append(line)
480
+ except FileNotFoundError:
481
+ return ToolExecutionResult(
482
+ model_text=f"error: file not found: {path}",
483
+ display_text=f"File not found: {path}",
484
+ is_error=True,
485
+ )
486
+ except IsADirectoryError:
487
+ return ToolExecutionResult(
488
+ model_text=f"error: not a file: {path}",
489
+ display_text=f"Not a file: {path}",
490
+ is_error=True,
491
+ )
492
+ except UnicodeDecodeError:
493
+ return ToolExecutionResult(
494
+ model_text=f"error: file is not valid utf-8 text: {path}",
495
+ display_text=f"File is not valid UTF-8 text: {path}",
496
+ is_error=True,
497
+ )
498
+ except Exception as exc:
499
+ return ToolExecutionResult(
500
+ model_text=f"error: failed to read file: {exc}",
501
+ display_text=f"Failed to read file: {path}",
502
+ is_error=True,
503
+ )
504
+
505
+ if total_lines < start_line and not (total_lines == 0 and start_line == 1):
506
+ return ToolExecutionResult(
507
+ model_text=f"error: offset {offset} beyond end of file ({total_lines} lines)",
508
+ display_text=f"Offset {offset} beyond end of file: {path}",
509
+ is_error=True,
510
+ )
511
+
512
+ parts: list[str] = []
513
+ content = "\n".join(lines)
514
+ if content:
515
+ parts.append(content)
516
+
517
+ if next_offset is not None:
518
+ parts.append(f"[Showing lines {start_line}-{next_offset - 1}. Use offset={next_offset} to continue.]")
519
+
520
+ if first_shortened_line is not None:
521
+ quoted = shlex.quote(str(file_path))
522
+ prefix = f"[Line {first_shortened_line} was shortened to {READ_MAX_LINE_CHARS} chars."
523
+ if shortened_lines > 1:
524
+ prefix = (
525
+ f"[{shortened_lines} lines were shortened to {READ_MAX_LINE_CHARS} chars. "
526
+ f"First shortened line: {first_shortened_line}."
527
+ )
528
+ parts.append(
529
+ f"{prefix}\n"
530
+ + "Use bash to inspect it in bytes:\n"
531
+ + f"sed -n '{first_shortened_line}p' {quoted} | head -c 2000\n"
532
+ + f"sed -n '{first_shortened_line}p' {quoted} | tail -c +2001 | head -c 2000]"
533
+ )
534
+
535
+ joined = "\n\n".join(parts) if parts else ""
536
+ return ToolExecutionResult(model_text=joined, display_text=joined)
537
+
538
+
539
+ def _run_write(ctx: ToolContext, args: dict[str, Any]) -> ToolExecutionResult:
540
+ path = str(args.get("path") or "")
541
+ content = str(args.get("content") or "")
542
+ file_path = Path(resolve_path(path, cwd=ctx.cwd))
543
+ try:
544
+ file_path.parent.mkdir(parents=True, exist_ok=True)
545
+ _atomic_write_text(file_path, content)
546
+ except Exception as exc:
547
+ return ToolExecutionResult(
548
+ model_text=f"error: failed to write file: {exc}",
549
+ display_text=f"Failed to write file: {path}",
550
+ is_error=True,
551
+ )
552
+ return ToolExecutionResult(model_text="ok", display_text=f"Wrote {path}")
553
+
554
+
555
+ def _run_edit(ctx: ToolContext, args: dict[str, Any]) -> ToolExecutionResult:
556
+ """Replace one or more unique snippets in a file.
557
+
558
+ All edits are matched against the original file content (not
559
+ incrementally). Exact match is tried first; if that fails, a conservative
560
+ fuzzy match tolerates line-ending and trailing-whitespace differences
561
+ while only replacing the matched region in the original text.
562
+ """
563
+
564
+ path = str(args.get("path") or "")
565
+ raw_edits = args.get("edits")
566
+ if not isinstance(raw_edits, list):
567
+ return ToolExecutionResult(
568
+ model_text="error: edits must be a list",
569
+ display_text="Edits must be a list",
570
+ is_error=True,
571
+ )
572
+ edits = cast(list[dict[str, str]], raw_edits)
573
+ file_path = Path(resolve_path(path, cwd=ctx.cwd))
574
+ if not edits:
575
+ return ToolExecutionResult(
576
+ model_text="error: edits must not be empty",
577
+ display_text="Edits list is empty",
578
+ is_error=True,
579
+ )
580
+
581
+ multi = len(edits) > 1
582
+ for i, entry in enumerate(edits):
583
+ old_text = entry.get("oldText", "")
584
+ new_text = entry.get("newText", "")
585
+ pfx = f"edits[{i}]: " if multi else ""
586
+ if not old_text:
587
+ return ToolExecutionResult(
588
+ model_text=f"error: {pfx}oldText must not be empty",
589
+ display_text="Edit target must not be empty",
590
+ is_error=True,
591
+ )
592
+ if old_text == new_text:
593
+ return ToolExecutionResult(
594
+ model_text=f"error: {pfx}oldText and newText are identical",
595
+ display_text="Edit would not change the file",
596
+ is_error=True,
597
+ )
598
+
599
+ try:
600
+ read_mtime_ns = file_path.stat().st_mtime_ns
601
+ with file_path.open("r", encoding="utf-8", newline="") as file:
602
+ text = file.read()
603
+ except FileNotFoundError:
604
+ return ToolExecutionResult(
605
+ model_text=f"error: file not found: {path}",
606
+ display_text=f"File not found: {path}",
607
+ is_error=True,
608
+ )
609
+ except IsADirectoryError:
610
+ return ToolExecutionResult(
611
+ model_text=f"error: not a file: {path}",
612
+ display_text=f"Not a file: {path}",
613
+ is_error=True,
614
+ )
615
+ except Exception as exc:
616
+ return ToolExecutionResult(
617
+ model_text=f"error: failed to read file: {exc}",
618
+ display_text=f"Failed to read file: {path}",
619
+ is_error=True,
620
+ )
621
+
622
+ newline = "\r\n" if "\r\n" in text else None
623
+
624
+ matches: list[tuple[int, int, str, int]] = []
625
+ norm_text: str | None = None
626
+ norm_imap: list[int] | None = None
627
+
628
+ for i, entry in enumerate(edits):
629
+ old_text = entry["oldText"]
630
+ new_text = entry["newText"]
631
+ pfx = f"edits[{i}]: " if multi else ""
632
+
633
+ exact_count = text.count(old_text)
634
+ if exact_count == 1:
635
+ pos = text.index(old_text)
636
+ matches.append((pos, pos + len(old_text), new_text, i))
637
+ continue
638
+ if exact_count > 1:
639
+ return ToolExecutionResult(
640
+ model_text=f"error: {pfx}oldText occurs {exact_count} times; provide a more specific oldText",
641
+ display_text="Edit target is ambiguous",
642
+ is_error=True,
643
+ )
644
+
645
+ # Fuzzy fallback: normalize both sides, find in normalized space,
646
+ # but map the span back to the original text for replacement.
647
+ if norm_text is None:
648
+ norm_text, norm_imap = _normalize_text(text)
649
+ norm_old, _ = _normalize_text(old_text)
650
+
651
+ norm_count = norm_text.count(norm_old)
652
+ if norm_count == 0:
653
+ hint = _closest_line_hint(text, old_text)
654
+ msg = f"error: {pfx}oldText not found"
655
+ if hint:
656
+ msg += f". closest line: {hint}"
657
+ return ToolExecutionResult(
658
+ model_text=msg,
659
+ display_text="Edit target not found",
660
+ is_error=True,
661
+ )
662
+ if norm_count > 1:
663
+ return ToolExecutionResult(
664
+ model_text=(
665
+ f"error: {pfx}oldText occurs {norm_count} times after normalization; "
666
+ "provide a more specific oldText"
667
+ ),
668
+ display_text="Edit target is ambiguous after normalization",
669
+ is_error=True,
670
+ )
671
+
672
+ idx = norm_text.find(norm_old)
673
+ assert norm_imap is not None # set together with norm_text
674
+ orig_start = norm_imap[idx]
675
+ end_idx = idx + len(norm_old)
676
+ orig_end = norm_imap[end_idx] if end_idx < len(norm_imap) else len(text)
677
+ matches.append((orig_start, orig_end, new_text, i))
678
+
679
+ # Sort by position and reject overlapping edits.
680
+ matches.sort(key=lambda m: m[0])
681
+ for j in range(1, len(matches)):
682
+ _, prev_end, _, prev_i = matches[j - 1]
683
+ curr_start, _, _, curr_i = matches[j]
684
+ if prev_end > curr_start:
685
+ return ToolExecutionResult(
686
+ model_text=f"error: edits[{prev_i}] and edits[{curr_i}] overlap",
687
+ display_text="Edit regions overlap",
688
+ is_error=True,
689
+ )
690
+
691
+ # Apply replacements back-to-front so earlier offsets stay valid.
692
+ updated = text
693
+ for start, end, new_text, _ in reversed(matches):
694
+ updated = updated[:start] + new_text + updated[end:]
695
+
696
+ if updated == text:
697
+ return ToolExecutionResult(
698
+ model_text="error: edits produced no changes",
699
+ display_text="Edits would not change the file",
700
+ is_error=True,
701
+ )
702
+
703
+ try:
704
+ if file_path.stat().st_mtime_ns != read_mtime_ns:
705
+ return ToolExecutionResult(
706
+ model_text="error: file changed while editing; read it again and retry",
707
+ display_text="File changed while editing",
708
+ is_error=True,
709
+ )
710
+ _atomic_write_text(file_path, updated, newline=newline)
711
+ except Exception as exc:
712
+ return ToolExecutionResult(
713
+ model_text=f"error: failed to write file: {exc}",
714
+ display_text=f"Failed to write file: {path}",
715
+ is_error=True,
716
+ )
717
+
718
+ # Build per-edit metadata for the web UI diff view.
719
+ # Matches are sorted by original position; track cumulative character
720
+ # shift so we can compute correct line numbers in the updated text.
721
+ updated_lines = updated.splitlines()
722
+ edit_metas: list[dict[str, Any]] = []
723
+ char_shift = 0
724
+ context_lines = 3
725
+
726
+ for start, end, new_text, _ in matches:
727
+ old_snippet = text[start:end]
728
+ new_start = start + char_shift
729
+ start_line = updated[:new_start].count("\n") + 1
730
+ old_lc = len(old_snippet.splitlines()) or 1
731
+ new_lc = len(new_text.splitlines()) or 1
732
+
733
+ si = start_line - 1
734
+ before = updated_lines[max(0, si - context_lines) : si]
735
+ after = updated_lines[si + new_lc : si + new_lc + context_lines]
736
+
737
+ edit_metas.append(
738
+ {
739
+ "start_line": start_line,
740
+ "old_line_count": old_lc,
741
+ "new_line_count": new_lc,
742
+ "context_before": before,
743
+ "context_after": after,
744
+ }
745
+ )
746
+ char_shift += len(new_text) - (end - start)
747
+
748
+ n = len(edits)
749
+ display = f"Updated {path}" if n == 1 else f"Updated {path} ({n} edits)"
750
+ return ToolExecutionResult(
751
+ model_text=json.dumps({"status": "ok", "edits": edit_metas}),
752
+ display_text=display,
753
+ )
754
+
755
+
756
+ def _run_bash(ctx: ToolContext, args: dict[str, Any]) -> ToolExecutionResult:
757
+ """Run a shell command and return combined stdout/stderr text.
758
+
759
+ Output is streamed line-by-line through ``ctx.emit`` when present.
760
+
761
+ Truncation has two layers:
762
+ 1. Memory protection: when total output exceeds ``_BASH_MAX_IN_MEMORY_BYTES``,
763
+ further output is written to a log file and only a bounded tail
764
+ (``deque(maxlen=DEFAULT_MAX_LINES)``) is kept in memory.
765
+ 2. Display truncation: the final text is truncated to
766
+ ``DEFAULT_MAX_LINES`` / ``DEFAULT_MAX_BYTES`` via ``truncate_text``.
767
+ """
768
+
769
+ command = str(args.get("command") or "")
770
+ timeout = args.get("timeout")
771
+
772
+ timeout_seconds = int(timeout) if isinstance(timeout, int) and timeout > 0 else BASH_TIMEOUT_SECONDS
773
+
774
+ proc: subprocess.Popen[str] | None = None
775
+ log_path = ctx.tool_output_dir / f"bash-{ctx.tool_call_id or 'call'}.log"
776
+ # Streaming phase: accumulate in memory until _BASH_MAX_IN_MEMORY_BYTES,
777
+ # then spill to log file and keep only a bounded tail via deque.
778
+ kept_lines: list[str] = []
779
+ kept_bytes = 0
780
+ total_line_count = 0
781
+ tail_lines: deque[str] = deque(maxlen=DEFAULT_MAX_LINES)
782
+ log_file: TextIO | None = None
783
+ saved_output_path: Path | None = None
784
+
785
+ try:
786
+ proc = subprocess.Popen(
787
+ command,
788
+ shell=True,
789
+ cwd=ctx.cwd,
790
+ stdout=subprocess.PIPE,
791
+ stderr=subprocess.STDOUT,
792
+ text=True,
793
+ bufsize=1,
794
+ universal_newlines=True,
795
+ start_new_session=os.name == "posix",
796
+ )
797
+ ctx.executor.track_proc(proc)
798
+
799
+ stdout = cast(TextIO, proc.stdout)
800
+ output_queue: queue.Queue[str | None] = queue.Queue()
801
+ reader_errors: list[Exception] = []
802
+
803
+ def read_stdout() -> None:
804
+ try:
805
+ for line in stdout:
806
+ output_queue.put(line)
807
+ except Exception as exc: # pragma: no cover - defensive
808
+ reader_errors.append(exc)
809
+ finally:
810
+ output_queue.put(None)
811
+
812
+ reader = threading.Thread(target=read_stdout, daemon=True)
813
+ reader.start()
814
+ deadline = time.monotonic() + timeout_seconds
815
+
816
+ while True:
817
+ remaining = deadline - time.monotonic()
818
+ if remaining <= 0:
819
+ _kill_proc_tree(proc)
820
+ return ToolExecutionResult(
821
+ model_text=f"error: timeout after {timeout_seconds}s",
822
+ display_text=f"Command timed out after {timeout_seconds}s",
823
+ is_error=True,
824
+ )
825
+
826
+ try:
827
+ line = output_queue.get(timeout=min(0.1, remaining))
828
+ except queue.Empty:
829
+ continue
830
+
831
+ if line is None:
832
+ break
833
+
834
+ line = line.rstrip("\n")
835
+ total_line_count += 1
836
+ kept_bytes += len(line.encode("utf-8")) + 1
837
+
838
+ if log_file is None:
839
+ kept_lines.append(line)
840
+ if kept_bytes > _BASH_MAX_IN_MEMORY_BYTES:
841
+ log_file = log_path.open("w", encoding="utf-8")
842
+ saved_output_path = log_path
843
+ if kept_lines:
844
+ log_file.write("\n".join(kept_lines))
845
+ log_file.write("\n")
846
+ tail_lines.extend(kept_lines)
847
+ kept_lines = []
848
+ else:
849
+ tail_lines.append(line)
850
+ log_file.write(line)
851
+ log_file.write("\n")
852
+
853
+ if ctx.emit is not None:
854
+ ctx.emit(line)
855
+
856
+ if reader_errors:
857
+ message = str(reader_errors[0])
858
+ return ToolExecutionResult(
859
+ model_text=f"error: {message}",
860
+ display_text=message,
861
+ is_error=True,
862
+ )
863
+
864
+ try:
865
+ remaining = max(0.1, deadline - time.monotonic())
866
+ proc.wait(timeout=remaining)
867
+ except subprocess.TimeoutExpired:
868
+ _kill_proc_tree(proc)
869
+ return ToolExecutionResult(
870
+ model_text=f"error: timeout after {timeout_seconds}s",
871
+ display_text=f"Command timed out after {timeout_seconds}s",
872
+ is_error=True,
873
+ )
874
+
875
+ exit_code = proc.returncode
876
+
877
+ raw_output = "\n".join(list(tail_lines) if log_file is not None else kept_lines)
878
+ output = raw_output.strip() or "(empty)"
879
+ content, trunc = truncate_text(output, tail=True)
880
+
881
+ # Save full output to log file when truncated but not already on disk
882
+ if log_file is None and trunc.truncated:
883
+ try:
884
+ log_path.write_text(raw_output, encoding="utf-8")
885
+ saved_output_path = log_path
886
+ except Exception:
887
+ saved_output_path = None
888
+
889
+ result = content
890
+
891
+ # Append truncation notice if any output was dropped.
892
+ shown_lines = trunc.output_lines
893
+ was_truncated = log_file is not None or trunc.truncated
894
+ if was_truncated:
895
+ if trunc.truncated_by == "bytes":
896
+ if total_line_count <= 1:
897
+ notice = (
898
+ f"[Truncated: showing last {DEFAULT_MAX_BYTES // 1024}KB of output "
899
+ f"({DEFAULT_MAX_BYTES // 1024}KB limit)."
900
+ )
901
+ else:
902
+ notice = f"[Truncated: showing tail output ({DEFAULT_MAX_BYTES // 1024}KB limit)."
903
+ else:
904
+ notice = f"[Truncated: last {shown_lines} of {total_line_count} lines."
905
+ if saved_output_path is not None:
906
+ notice += f" Full output: {saved_output_path}]"
907
+ else:
908
+ notice += "]"
909
+ result += "\n\n" + notice
910
+
911
+ if exit_code:
912
+ result += f"\n\n[exit code: {exit_code}]"
913
+
914
+ return ToolExecutionResult(model_text=result, display_text=result)
915
+
916
+ except Exception as exc:
917
+ message = str(exc)
918
+ return ToolExecutionResult(
919
+ model_text=f"error: {message}",
920
+ display_text=message,
921
+ is_error=True,
922
+ )
923
+ finally:
924
+ if log_file is not None:
925
+ try:
926
+ log_file.close()
927
+ except Exception:
928
+ pass
929
+ if proc is not None:
930
+ ctx.executor.untrack_proc(proc)
931
+ if proc.poll() is None:
932
+ _kill_proc_tree(proc)
933
+
934
+
935
+ DEFAULT_TOOL_SPECS: tuple[ToolSpec, ...] = (
936
+ ToolSpec(
937
+ name="read",
938
+ description=(
939
+ "Read a UTF-8 text file or supported image file. Returns up to 2000 lines for text files. "
940
+ "Use offset/limit for large files. Very long lines are shortened."
941
+ ),
942
+ input_schema={
943
+ "type": "object",
944
+ "properties": {
945
+ "path": {"type": "string", "description": "File path (relative or absolute)."},
946
+ "offset": {"type": "integer", "description": "Line number to start from (1-indexed)."},
947
+ "limit": {"type": "integer", "description": "Maximum number of lines to return."},
948
+ },
949
+ "required": ["path"],
950
+ "additionalProperties": False,
951
+ },
952
+ runner=_run_read,
953
+ ),
954
+ ToolSpec(
955
+ name="write",
956
+ description="Write a file (create or overwrite).",
957
+ input_schema={
958
+ "type": "object",
959
+ "properties": {
960
+ "path": {"type": "string", "description": "File path (relative or absolute)."},
961
+ "content": {"type": "string", "description": "File content."},
962
+ },
963
+ "required": ["path", "content"],
964
+ "additionalProperties": False,
965
+ },
966
+ runner=_run_write,
967
+ ),
968
+ ToolSpec(
969
+ name="edit",
970
+ description=(
971
+ "Edit a file by replacing text snippets. "
972
+ "Each edits[].oldText must match uniquely in the original file. "
973
+ "For multiple disjoint changes in one file, use one call with multiple edits."
974
+ ),
975
+ input_schema={
976
+ "type": "object",
977
+ "properties": {
978
+ "path": {"type": "string", "description": "File path (relative or absolute)."},
979
+ "edits": {
980
+ "type": "array",
981
+ "items": {
982
+ "type": "object",
983
+ "properties": {
984
+ "oldText": {
985
+ "type": "string",
986
+ "description": "Exact text to find (must be unique in the file).",
987
+ },
988
+ "newText": {
989
+ "type": "string",
990
+ "description": "Replacement text.",
991
+ },
992
+ },
993
+ "required": ["oldText", "newText"],
994
+ "additionalProperties": False,
995
+ },
996
+ "description": "Replacements to apply. All matched against the original file, not incrementally.",
997
+ },
998
+ },
999
+ "required": ["path", "edits"],
1000
+ "additionalProperties": False,
1001
+ },
1002
+ runner=_run_edit,
1003
+ ),
1004
+ ToolSpec(
1005
+ name="bash",
1006
+ description=(
1007
+ "Run a shell command in the session working directory. "
1008
+ "Large output returns the tail and saves the full log to a file."
1009
+ ),
1010
+ input_schema={
1011
+ "type": "object",
1012
+ "properties": {
1013
+ "command": {"type": "string", "description": "Shell command."},
1014
+ "timeout": {"type": "integer", "description": "Timeout in seconds (optional)."},
1015
+ },
1016
+ "required": ["command"],
1017
+ "additionalProperties": False,
1018
+ },
1019
+ runner=_run_bash,
1020
+ streams_output=True,
1021
+ ),
1022
+ )
1023
+
1024
+
1025
+ # ---------------------------------------------------------------------------
1026
+ # Internal helpers
1027
+ # ---------------------------------------------------------------------------
1028
+
1029
+
1030
+ def _closest_line_hint(text: str, needle: str) -> str | None:
1031
+ needle_clean = needle.strip()
1032
+ if not needle_clean:
1033
+ return None
1034
+
1035
+ best_ratio = 0.0
1036
+ best_line = ""
1037
+ for line in text.splitlines():
1038
+ candidate = line.strip()
1039
+ if not candidate:
1040
+ continue
1041
+ ratio = SequenceMatcher(None, needle_clean, candidate).ratio()
1042
+ if ratio > best_ratio:
1043
+ best_ratio = ratio
1044
+ best_line = candidate
1045
+ if ratio >= 1.0:
1046
+ break
1047
+
1048
+ if best_ratio < 0.6 or not best_line:
1049
+ return None
1050
+
1051
+ if len(best_line) > 120:
1052
+ return best_line[:117] + "..."
1053
+ return best_line
1054
+
1055
+
1056
+ def _normalize_text(text: str) -> tuple[str, list[int]]:
1057
+ """Normalize for fuzzy edit matching: strip trailing whitespace per line, CRLF→LF.
1058
+
1059
+ Returns (normalized, index_map) where ``index_map[i]`` is the position of
1060
+ normalized char *i* in the original text. This lets callers find a match
1061
+ in the normalized string and map the span back to exact original byte
1062
+ offsets, so untouched regions of the file are never altered.
1063
+ """
1064
+
1065
+ chars: list[str] = []
1066
+ imap: list[int] = []
1067
+ pos = 0
1068
+ for line in text.splitlines(keepends=True):
1069
+ content = line.rstrip("\r\n")
1070
+ trimmed = content.rstrip(" \t")
1071
+ chars.extend(trimmed)
1072
+ imap.extend(range(pos, pos + len(trimmed)))
1073
+ eol = line[len(content) :]
1074
+ if eol:
1075
+ chars.append("\n")
1076
+ imap.append(pos + len(content))
1077
+ pos += len(line)
1078
+
1079
+ return "".join(chars), imap
1080
+
1081
+
1082
+ # ---------------------------------------------------------------------------
1083
+ # @tool decorator
1084
+ # ---------------------------------------------------------------------------
1085
+
1086
+
1087
+ @overload
1088
+ def tool(
1089
+ function: Callable[..., Any],
1090
+ *,
1091
+ name: str | None = None,
1092
+ description: str | None = None,
1093
+ streams_output: bool = False,
1094
+ ) -> ToolSpec: ...
1095
+
1096
+
1097
+ @overload
1098
+ def tool(
1099
+ function: None = None,
1100
+ *,
1101
+ name: str | None = None,
1102
+ description: str | None = None,
1103
+ streams_output: bool = False,
1104
+ ) -> Callable[[Callable[..., Any]], ToolSpec]: ...
1105
+
1106
+
1107
+ def tool(
1108
+ function: Callable[..., Any] | None = None,
1109
+ *,
1110
+ name: str | None = None,
1111
+ description: str | None = None,
1112
+ streams_output: bool = False,
1113
+ ) -> ToolSpec | Callable[[Callable[..., Any]], ToolSpec]:
1114
+ """Wrap a plain Python function as a :class:`ToolSpec`.
1115
+
1116
+ Sync and async functions are both supported. If the first parameter is
1117
+ annotated :class:`ToolContext` the context is injected automatically and
1118
+ the remaining parameters drive the JSON schema sent to the provider.
1119
+
1120
+ The function may return a :class:`ToolExecutionResult` or any
1121
+ JSON-serializable value; non-result values are wrapped as plain text.
1122
+ """
1123
+
1124
+ def wrap(fn: Callable[..., Any]) -> ToolSpec:
1125
+ parameters = list(inspect.signature(fn).parameters.values())
1126
+ try:
1127
+ resolved_hints = typing.get_type_hints(fn)
1128
+ except Exception:
1129
+ resolved_hints = {}
1130
+ wants_context = bool(parameters) and resolved_hints.get(parameters[0].name) is ToolContext
1131
+ tool_params = parameters[1:] if wants_context else parameters
1132
+ param_names = {p.name for p in tool_params}
1133
+ input_schema, coercions = _build_input_schema(tool_params, resolved_hints)
1134
+
1135
+ resolved_description = description or inspect.getdoc(fn)
1136
+ if not resolved_description:
1137
+ raise ValueError(f"tool {(name or fn.__name__)!r} requires a docstring or explicit description")
1138
+
1139
+ is_async = inspect.iscoroutinefunction(fn)
1140
+
1141
+ def runner(context: ToolContext, args: dict[str, Any]) -> ToolExecutionResult:
1142
+ call_args: dict[str, Any] = {}
1143
+ for key, raw_value in args.items():
1144
+ if key not in param_names:
1145
+ continue
1146
+ coerce = coercions.get(key)
1147
+ call_args[key] = coerce(raw_value) if (coerce is not None and raw_value is not None) else raw_value
1148
+ if is_async:
1149
+ # The executor itself runs on a worker thread (see Agent loop),
1150
+ # so spinning a fresh event loop here is safe.
1151
+ value = asyncio.run(fn(context, **call_args) if wants_context else fn(**call_args))
1152
+ else:
1153
+ value = fn(context, **call_args) if wants_context else fn(**call_args)
1154
+ return _coerce_tool_result(value)
1155
+
1156
+ return ToolSpec(
1157
+ name=name or fn.__name__,
1158
+ description=resolved_description,
1159
+ input_schema=input_schema,
1160
+ runner=runner,
1161
+ streams_output=streams_output,
1162
+ )
1163
+
1164
+ if function is None:
1165
+ return wrap
1166
+ return wrap(function)
1167
+
1168
+
1169
+ def _build_input_schema(
1170
+ parameters: list[inspect.Parameter],
1171
+ resolved_hints: dict[str, Any],
1172
+ ) -> tuple[dict[str, Any], dict[str, Callable[[Any], Any]]]:
1173
+ """Build the JSON schema for ``parameters`` and a per-name coercion map.
1174
+
1175
+ The coercion map carries the post-JSON conversions needed when an
1176
+ annotation has no native JSON type (currently only ``Path``).
1177
+ """
1178
+
1179
+ properties: dict[str, Any] = {}
1180
+ required: list[str] = []
1181
+ coercions: dict[str, Callable[[Any], Any]] = {}
1182
+
1183
+ for parameter in parameters:
1184
+ if parameter.kind not in {inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.KEYWORD_ONLY}:
1185
+ raise ValueError(f"unsupported tool parameter kind: {parameter.name}")
1186
+
1187
+ annotation = resolved_hints.get(parameter.name, parameter.annotation)
1188
+ properties[parameter.name] = _annotation_to_schema(annotation, parameter.name)
1189
+ coerce = _coercion_for_annotation(annotation)
1190
+ if coerce is not None:
1191
+ coercions[parameter.name] = coerce
1192
+ if parameter.default is inspect.Signature.empty:
1193
+ required.append(parameter.name)
1194
+ continue
1195
+
1196
+ try:
1197
+ json.dumps(parameter.default)
1198
+ except TypeError:
1199
+ continue
1200
+ properties[parameter.name]["default"] = parameter.default
1201
+
1202
+ schema = {
1203
+ "type": "object",
1204
+ "properties": properties,
1205
+ "required": required,
1206
+ "additionalProperties": False,
1207
+ }
1208
+ return schema, coercions
1209
+
1210
+
1211
+ def _coercion_for_annotation(annotation: Any) -> Callable[[Any], Any] | None:
1212
+ """Return a value coercion to apply after JSON parsing, or None.
1213
+
1214
+ Path is the only non-JSON-native type that ``_annotation_to_schema``
1215
+ accepts; the runner must rebuild a Path from the raw string the model
1216
+ sends.
1217
+ """
1218
+
1219
+ if annotation is Path:
1220
+ return Path
1221
+
1222
+ args = get_args(annotation)
1223
+ if args and type(None) in args:
1224
+ non_none = [arg for arg in args if arg is not type(None)]
1225
+ if len(non_none) == 1:
1226
+ return _coercion_for_annotation(non_none[0])
1227
+
1228
+ return None
1229
+
1230
+
1231
+ def _annotation_to_schema(annotation: Any, parameter_name: str) -> dict[str, Any]:
1232
+ if annotation is inspect.Signature.empty:
1233
+ raise TypeError(f"tool parameter {parameter_name!r} requires a type annotation")
1234
+ if annotation in {str, Path}:
1235
+ return {"type": "string"}
1236
+ if annotation is bool:
1237
+ return {"type": "boolean"}
1238
+ if annotation is int:
1239
+ return {"type": "integer"}
1240
+ if annotation is float:
1241
+ return {"type": "number"}
1242
+
1243
+ origin = get_origin(annotation)
1244
+ args = get_args(annotation)
1245
+
1246
+ if origin in {list, tuple, set}:
1247
+ items = _annotation_to_schema(args[0], parameter_name) if args else {"type": "string"}
1248
+ return {"type": "array", "items": items}
1249
+
1250
+ if origin is Literal:
1251
+ values = list(args)
1252
+ if not values:
1253
+ raise TypeError(f"Literal annotation on {parameter_name!r} must list at least one value")
1254
+ schema = _annotation_to_schema(type(values[0]), parameter_name)
1255
+ schema["enum"] = values
1256
+ return schema
1257
+
1258
+ if args and type(None) in args:
1259
+ non_none = [arg for arg in args if arg is not type(None)]
1260
+ if len(non_none) == 1:
1261
+ return _annotation_to_schema(non_none[0], parameter_name)
1262
+
1263
+ raise TypeError(f"unsupported tool parameter type for {parameter_name!r}: {annotation!r}")
1264
+
1265
+
1266
+ def _coerce_tool_result(value: Any) -> ToolExecutionResult:
1267
+ if isinstance(value, ToolExecutionResult):
1268
+ return value
1269
+ if isinstance(value, str):
1270
+ return ToolExecutionResult(model_text=value, display_text=value)
1271
+ try:
1272
+ text = json.dumps(value, ensure_ascii=False)
1273
+ except TypeError:
1274
+ text = str(value)
1275
+ return ToolExecutionResult(model_text=text, display_text=text)