sliceagent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. sliceagent/__init__.py +3 -0
  2. sliceagent/__main__.py +6 -0
  3. sliceagent/access.py +93 -0
  4. sliceagent/agents.py +173 -0
  5. sliceagent/background_review.py +146 -0
  6. sliceagent/binsniff.py +89 -0
  7. sliceagent/cli.py +890 -0
  8. sliceagent/clock.py +32 -0
  9. sliceagent/code_grep.py +329 -0
  10. sliceagent/code_index.py +417 -0
  11. sliceagent/config.py +240 -0
  12. sliceagent/context_overflow.py +227 -0
  13. sliceagent/envspec.py +129 -0
  14. sliceagent/errors.py +167 -0
  15. sliceagent/events.py +96 -0
  16. sliceagent/finding_types.py +70 -0
  17. sliceagent/flags.py +63 -0
  18. sliceagent/fuzzy.py +135 -0
  19. sliceagent/guardrails.py +438 -0
  20. sliceagent/guidance.py +69 -0
  21. sliceagent/hippocampus.py +581 -0
  22. sliceagent/hooks.py +334 -0
  23. sliceagent/interfaces.py +144 -0
  24. sliceagent/llm.py +695 -0
  25. sliceagent/loop.py +548 -0
  26. sliceagent/mcp_client.py +255 -0
  27. sliceagent/mcp_security.py +77 -0
  28. sliceagent/memory.py +428 -0
  29. sliceagent/metrics.py +103 -0
  30. sliceagent/model_catalog.py +124 -0
  31. sliceagent/monitor.py +615 -0
  32. sliceagent/neocortex.py +436 -0
  33. sliceagent/onboarding.py +323 -0
  34. sliceagent/oracle.py +36 -0
  35. sliceagent/pagetable.py +255 -0
  36. sliceagent/pfc.py +449 -0
  37. sliceagent/plugins.py +127 -0
  38. sliceagent/policy.py +234 -0
  39. sliceagent/procman.py +187 -0
  40. sliceagent/prompt.py +239 -0
  41. sliceagent/records.py +108 -0
  42. sliceagent/recovery.py +119 -0
  43. sliceagent/regions.py +678 -0
  44. sliceagent/registry.py +128 -0
  45. sliceagent/retriever.py +19 -0
  46. sliceagent/safety.py +332 -0
  47. sliceagent/sandbox.py +143 -0
  48. sliceagent/scheduler.py +92 -0
  49. sliceagent/search_index.py +289 -0
  50. sliceagent/seed.py +465 -0
  51. sliceagent/sensory_cortex.py +500 -0
  52. sliceagent/session.py +222 -0
  53. sliceagent/skill_provenance.py +71 -0
  54. sliceagent/skill_usage.py +123 -0
  55. sliceagent/skills.py +209 -0
  56. sliceagent/subagent.py +332 -0
  57. sliceagent/subdir_hints.py +222 -0
  58. sliceagent/swap.py +182 -0
  59. sliceagent/taskstate.py +57 -0
  60. sliceagent/telemetry.py +59 -0
  61. sliceagent/terminal.py +240 -0
  62. sliceagent/text_utils.py +56 -0
  63. sliceagent/tool_summary.py +93 -0
  64. sliceagent/tools.py +1194 -0
  65. sliceagent/tui.py +1377 -0
  66. sliceagent/web.py +354 -0
  67. sliceagent-0.1.0.dist-info/METADATA +262 -0
  68. sliceagent-0.1.0.dist-info/RECORD +71 -0
  69. sliceagent-0.1.0.dist-info/WHEEL +4 -0
  70. sliceagent-0.1.0.dist-info/entry_points.txt +2 -0
  71. sliceagent-0.1.0.dist-info/licenses/LICENSE +21 -0
sliceagent/tools.py ADDED
@@ -0,0 +1,1194 @@
1
+ """LocalToolHost — the default ToolHost.
2
+
3
+ Safe execution lives here: file ops are confined to the workspace root (no path
4
+ traversal out of it), and shell runs through a Sandbox backend (sandbox.py) — so
5
+ swapping in a container later never touches the loop. Authorization (which calls
6
+ are allowed at all) is separate: policy.py via the PermissionHook.
7
+
8
+ Note: Python's str.replace is literal, so str_replace has no $-pattern footgun
9
+ (unlike JS).
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import os
14
+ import re
15
+ import shlex
16
+ import tempfile
17
+
18
+ from .access import AllAccess, FileAccess
19
+ from .binsniff import looks_binary
20
+ from .fuzzy import fuzzy_find_unique
21
+ from .procman import ProcManager
22
+ from .registry import ToolEntry, ToolRegistry, ToolText
23
+ from .sandbox import LocalSandbox
24
+ from .sensory_cortex import _is_ignored
25
+ from .terminal import SessionManager
26
+
27
+ # I1 PROVENANCE — host SELF-INFLICTED error sentinels. These name failures caused by the HOST's own
28
+ # guard rails (file-tool confinement, permission denial), NOT by a real bug in the user's code. Lesson
29
+ # mining filters pitfalls whose signature contains one of these so a turn whose only error was the
30
+ # agent hitting its OWN sandbox mines nothing (D2). Lower-cased substrings, matched task-agnostically;
31
+ # defined HERE (the source of these strings) so the denylist tracks the actual error messages.
32
+ HOST_ERROR_SENTINELS = (
33
+ "path escapes the boundary",
34
+ "file tools are confined",
35
+ "permission denied",
36
+ "operation not permitted",
37
+ )
38
+
39
+ # Prepended to every execute_code script: the in-sandbox tool helpers (code-as-action).
40
+ # No imports needed by the model. The workspace is cwd and on sys.path,
41
+ # Strip a leading "cat -n" line-number prefix (" 123\t") from a str_replace snippet pasted back from the
42
+ # numbered OPEN FILES render. Only fires when EVERY non-blank line has one (clearly cat -n output, not real
43
+ # source), so a genuine match is never altered; used as a fallback in _t_str_replace.
44
+ _LINENO_PREFIX = re.compile(r"^[ \t]*\d+\t")
45
+
46
+
47
+ def _strip_line_numbers(text: str) -> str:
48
+ lines = text.split("\n")
49
+ nonblank = [ln for ln in lines if ln.strip()]
50
+ if not nonblank or not all(_LINENO_PREFIX.match(ln) for ln in nonblank):
51
+ return text
52
+ return "\n".join(_LINENO_PREFIX.sub("", ln) if ln.strip() else ln for ln in lines)
53
+
54
+
55
+ def _number_lines(lines, start: int = 1) -> str:
56
+ """cat -n number a LIST of lines from `start` (1-based) — ABSOLUTE numbers so a windowed read still
57
+ gives correct file:line evidence."""
58
+ return "\n".join(f"{i:>6}\t{ln}" for i, ln in enumerate(lines, start))
59
+
60
+
61
+ def _numbered(text: str) -> str:
62
+ """cat -n line numbers for read_file's RETURN, so the model gets file:line evidence IMMEDIATELY in-turn
63
+ (same format as the OPEN FILES render). The number is a display prefix, NOT file content — str_replace
64
+ strips a pasted prefix via _strip_line_numbers, so editing from a numbered read still matches."""
65
+ return _number_lines(text.splitlines(), 1)
66
+
67
+
68
+ _READ_MAX_LINES = 1500 # default in-slice VIEW cap for read_file; the full file ALWAYS stays on disk (bound the view, not the file)
69
+
70
+
71
+ def _coerce_int(v):
72
+ """Tolerant int() for model-supplied args (str/float/None) — never raises."""
73
+ try:
74
+ return int(v) if v is not None else None
75
+ except (TypeError, ValueError):
76
+ return None
77
+
78
+
79
+ # so `import <workspace_module>` works for testing freshly-written code.
80
+ _CODE_PRELUDE = '''\
81
+ import os as _os, sys as _sys, subprocess as _sp
82
+ _sys.path.insert(0, _os.getcwd())
83
+
84
+ def _confine(path):
85
+ # Confine code-as-action file helpers to the workspace (cwd = workspace root in the sandbox). Without
86
+ # this, an absolute path or ../ escape let execute_code read/write outside allowed_roots, bypassing the
87
+ # file-tool boundary. Shell (run_command) stays unconfined by design; these in-code helpers do not.
88
+ _p = _os.path.realpath(path)
89
+ _root = _os.path.realpath(_os.getcwd())
90
+ if _p != _root and not _p.startswith(_root + _os.sep):
91
+ raise PermissionError(f"path escapes the boundary: {path} (use run_command for paths outside it)")
92
+ return path
93
+
94
+ def read_file(path):
95
+ with open(_confine(path), encoding="utf-8") as _f: return _f.read()
96
+
97
+ def write_file(path, content):
98
+ path = _confine(path)
99
+ _d = _os.path.dirname(path)
100
+ if _d: _os.makedirs(_d, exist_ok=True)
101
+ with open(path, "w", encoding="utf-8", newline="") as _f: _f.write(content)
102
+ if content[:2] == "#!": # a shebang script should be runnable (parity with the edit_file tool)
103
+ try: _os.chmod(path, _os.stat(path).st_mode | 0o111)
104
+ except OSError: pass
105
+ return f"wrote {len(content)} bytes to {path}"
106
+
107
+ def append_file(path, content):
108
+ path = _confine(path)
109
+ _d = _os.path.dirname(path)
110
+ if _d: _os.makedirs(_d, exist_ok=True)
111
+ with open(path, "a", encoding="utf-8", newline="") as _f: _f.write(content)
112
+ return f"appended {len(content)} bytes to {path}"
113
+
114
+ def str_replace(path, old, new):
115
+ path = _confine(path)
116
+ with open(path, encoding="utf-8", newline="") as _f: _cur = _f.read()
117
+ _n = _cur.count(old)
118
+ if _n != 1: return (f"error: old_string occurs {_n}x in {path} (need exactly 1) — "
119
+ f"add surrounding lines to make it unique, or write_file the whole file")
120
+ with open(path, "w", encoding="utf-8", newline="") as _f: _f.write(_cur.replace(old, new, 1))
121
+ return f"replaced 1 occurrence in {path}"
122
+
123
+ def list_files(path="."):
124
+ return sorted(_os.listdir(_confine(path)))
125
+
126
+ def run(cmd, timeout=60):
127
+ _r = _sp.run(cmd, shell=True, capture_output=True, text=True, timeout=timeout)
128
+ _o = (_r.stdout or "") + (_r.stderr or "")
129
+ return _o if _r.returncode == 0 else f"[exit {_r.returncode}]\\n{_o}"
130
+ '''
131
+
132
+
133
+ def _fn(name: str, desc: str, props: dict, req: list[str]) -> dict:
134
+ return {
135
+ "type": "function",
136
+ "function": {"name": name, "description": desc,
137
+ "parameters": {"type": "object", "properties": props, "required": req}},
138
+ }
139
+
140
+
141
+ # The FINDINGS-capture seam. Every tool call carries a 'note' — the model's distilled conclusion
142
+ # for this turn. It rides on the call the model is ALREADY making (no extra round-trip, unlike a
143
+ # dedicated note tool) and is folded into the slice's FINDINGS tier. This is how a Markov/slice
144
+ # agent gives a REASONING model its own prior conclusions back: the slice has no transcript, so
145
+ # without it the model re-derives the situation each turn (big reasoning bursts → slow). Reasoning
146
+ # models (e.g. deepseek) emit empty message content while tool-calling, so a tool ARG — not message
147
+ # text — is the only reliable capture point.
148
+ NOTE_PROP = {
149
+ "note": {
150
+ "type": "string",
151
+ "description": ("Optional — usually leave EMPTY. Fill ONLY when this call established a NEW durable FACT "
152
+ "(root cause, a confirmed fix, a ruled-out hypothesis, or 'task done'), in <=15 words — a "
153
+ "conclusion, NOT the action you're taking. Saved across turns so you never re-derive it; "
154
+ "routine reads/edits need no note."),
155
+ }
156
+ }
157
+
158
+
159
+ def with_note(schema: dict) -> dict:
160
+ """Inject the 'note' arg (first, OPTIONAL) into a tool schema — the FINDINGS capture seam.
161
+ Applied to EVERY tool the model sees, regardless of source (builtin/MCP/plugin/skill).
162
+ Optional, not required: the model writes it only when it has a genuine durable fact, so the
163
+ tier fills with conclusions — not the action-narration that forcing a note on every call
164
+ produces (and which can self-reinforce loops)."""
165
+ fn = schema.get("function") or {}
166
+ params = fn.get("parameters") or {"type": "object", "properties": {}, "required": []}
167
+ props = {**NOTE_PROP, **(params.get("properties") or {})}
168
+ req = [r for r in (params.get("required") or []) if r != "note"]
169
+ return {**schema, "function": {**fn, "parameters": {**params, "properties": props, "required": req}}}
170
+
171
+
172
+ # _IGNORE_NAMES/_IGNORE_SUFFIX/_is_ignored (the ignore-aware directory-walk primitive shared with
173
+ # repo_map) now live in sensory_cortex.py — "ignore-aware walking" is itself a SENSORY CORTEX concern
174
+ # (perception of the live filesystem). Imported at the top of this file for _t_list_files's own use below.
175
+ _LIST_CAP = 600 # bound recursive output so a huge tree can't flood the slice
176
+
177
+ # Tool-output PAGE-OUT (#74): a single tool result larger than this is written to a blob under
178
+ # .sliceagent/blobs and replaced inline by a BOUNDED head+tail view + a read_file reference — L1→L2 paging,
179
+ # NOT a cut (the full output is preserved on disk and recall-on-demand). Keeps one huge run_command /
180
+ # execute_code / terminal_read result from flooding the within-turn transcript and forcing coarse overflow.
181
+ _OUTPUT_INLINE_CAP = 16000
182
+ _OUTPUT_HEAD = 10000
183
+ _OUTPUT_TAIL = 4000
184
+
185
+ # Drop C0/C1 control bytes (keep \t \n \r) + DEL from a paged-out output, so (a) the blob is PLAIN TEXT
186
+ # and read_file's binary gate won't hexdump it on page-back, and (b) a stray NUL can't break the API call
187
+ # when the bounded head+tail rides the transcript. Only applied on the paged path (large outputs).
188
+ _CONTROL_DROP = {c: None for c in range(0x20) if c not in (0x09, 0x0a, 0x0d)}
189
+ _CONTROL_DROP[0x7f] = None
190
+
191
+
192
+ def _strip_control(s: str) -> str:
193
+ return s.translate(_CONTROL_DROP)
194
+ # Credential/secret dirs the shell-path auto-grant (#31) must never widen file-tool reach into.
195
+ _SECRET_DIRS = {".ssh", ".aws", ".gnupg", ".gpg", ".kube", ".docker", ".config", "keyrings", ".password-store"}
196
+
197
+
198
+ TOOL_SCHEMAS = [
199
+ _fn("read_file",
200
+ "Read a file's contents with cat -n line numbers for reference (the leading number is NOT part of the "
201
+ "file, so don't include it in a str_replace old_string). A large file returns a bounded window with a "
202
+ "<system> footer giving the total line count and how to page; pass `offset` (1-based start line) and/or "
203
+ "`limit` (max lines) to read a specific range. To list a directory use list_files; to SEARCH file "
204
+ "contents use the `grep` tool (ripgrep-backed) — not bash grep. "
205
+ "Arg `path` is workspace-relative or absolute but confined to the workspace — for outside paths use "
206
+ "run_command. A binary file returns a hexdump preview, not editable text.",
207
+ {"path": {"type": "string"},
208
+ "offset": {"type": "integer", "description": "1-based first line to read (optional)"},
209
+ "limit": {"type": "integer", "description": "max number of lines to return (optional)"}},
210
+ ["path"]),
211
+ _fn("list_files",
212
+ "List directory entries (ignore-aware: skips .git/.venv/caches/build/node_modules noise). Use to "
213
+ "discover what exists; use read_file for a file's CONTENTS and the `grep` tool (ripgrep-backed) to "
214
+ "SEARCH text. Pass recursive=true to map a whole subtree in ONE call (flat file paths, capped at 600 — "
215
+ "pass a subdir to narrow) — PREFER this over shell `find` for a clean cache-free map.",
216
+ {"path": {"type": "string"}, "recursive": {"type": "boolean"}}, []),
217
+ _fn("edit_file",
218
+ "Create a new file, or OVERWRITE an existing file's ENTIRE contents with `content` (the complete text); "
219
+ "parent dirs are auto-created and a leading `#!` shebang makes it executable. To change PART of an "
220
+ "existing file use str_replace; to add to its end use append_to_file. Do NOT use edit_file to tweak a "
221
+ "file — it discards all current content.",
222
+ {"path": {"type": "string"}, "content": {"type": "string"}}, ["path", "content"]),
223
+ _fn("append_to_file",
224
+ "Append `content` verbatim to the END of a file (creates it + parent dirs if missing) — the only writer "
225
+ "that ADDS without touching existing content. Use str_replace to modify text already in the file, "
226
+ "edit_file to replace the whole file. No newline is added — include a leading '\\n' yourself if needed.",
227
+ {"path": {"type": "string"}, "content": {"type": "string"}}, ["path", "content"]),
228
+ _fn("code_review",
229
+ "Review code changes: returns the `git diff` for the workspace (default vs HEAD; pass `ref` for a "
230
+ "branch / commit / range like 'main', 'HEAD~3', or 'main...HEAD') so you can audit the changes for "
231
+ "correctness, security, and edge cases — cite file:line for each issue you find. Read-only; needs a "
232
+ "git repo. Prefer this over piecing a review together from many read_file calls.",
233
+ {"ref": {"type": "string"}}, []),
234
+ _fn("str_replace",
235
+ "Make a SURGICAL edit to an EXISTING file — replace one snippet, leave the rest. The default for "
236
+ "changing a file you've read. `old_string` should be the SMALLEST unique snippet — usually 2-4 adjacent "
237
+ "lines, not 10+. It must identify exactly ONE place: more than one occurrence is rejected (add "
238
+ "surrounding context, or pass replace_all=true to change EVERY occurrence); an exact match is used, "
239
+ "else a unique whitespace-tolerant fuzzy match. If old_string isn't found the file may be STALE — "
240
+ "re-read it and copy the current text rather than retrying the same edit; for a bigger change use edit_file.",
241
+ {"path": {"type": "string"}, "old_string": {"type": "string"}, "new_string": {"type": "string"},
242
+ "replace_all": {"type": "boolean", "description": "replace ALL occurrences (default false: a >1 match is rejected)"}},
243
+ ["path", "old_string", "new_string"]),
244
+ _fn("run_command",
245
+ "Run a shell command (blocking, cwd=workspace root); returns combined stdout+stderr (exit code on "
246
+ "failure). Pass timeout (seconds, default 30, max 600) for slow builds. Use for one-shot commands that "
247
+ "finish; for a process that must STAY alive use proc_start, for an interactive REPL use terminal_open, "
248
+ "to chain several edits + a test in one turn use execute_code. No cwd arg — prepend `cd DIR &&`. The "
249
+ "shell is unconfined (can reach outside the workspace, unlike the file tools). If a command could "
250
+ "emit a LARGE dump (disassembly, a long log, a dataset), FILTER it in the command itself — pipe "
251
+ "through grep/head/tail/sed -n or target a range — so only the relevant slice returns.",
252
+ {"command": {"type": "string"}, "timeout": {"type": "number"}}, ["command"]),
253
+ _fn("execute_code",
254
+ "Run a Python script that does SEVERAL file/shell steps in ONE turn (e.g. multiple edits + a test). Use "
255
+ "over run_command when you'd chain many calls; over proc_start when it's one-shot (blocking, ~30s). "
256
+ "Helpers (no imports): read_file(path), write_file(path, content), append_file(path, content), "
257
+ "str_replace(path, old, new), list_files(path='.'), run(shell_cmd). Workspace is cwd + on sys.path. ONLY "
258
+ "what you print() is returned. The file helpers are workspace-confined — use run() (shell) for outside paths.",
259
+ {"code": {"type": "string"}}, ["code"]),
260
+ _fn("ask_user",
261
+ "Ask the user a concise follow-up question and WAIT for their answer (returned to you). Use this "
262
+ "whenever you are UNSURE or the request is AMBIGUOUS, or when you have FAILED / been blocked and don't "
263
+ "know how to proceed — instead of guessing or repeating a failing action; prefer just answering in text "
264
+ "when you can infer intent. Give a few short 'options' for multiple-choice, or omit for open-ended. In "
265
+ "headless/eval runs there is no interactive user — it returns a fallback telling you to proceed with a "
266
+ "stated assumption, so never loop waiting on it.",
267
+ {"question": {"type": "string"},
268
+ "options": {"type": "array", "items": {"type": "string"}}}, ["question"]),
269
+ _fn("proc_start",
270
+ "Start a LONG-RUNNING / background process (a server, a watcher, a multi-minute build) and return a "
271
+ "handle (p1, p2, …) immediately; it keeps running across turns. Use over run_command when the process "
272
+ "must outlive the turn, over terminal_open when you only launch-and-probe (it gets no stdin). It does "
273
+ "NOT confirm the process started — one that instantly dies still returns a handle — so "
274
+ "proc_poll/proc_tail to check status and proc_kill to stop.",
275
+ {"command": {"type": "string"}}, ["command"]),
276
+ _fn("proc_poll", "Check a background process by handle: 'running' or 'exited <code>'.",
277
+ {"handle": {"type": "string"}}, ["handle"]),
278
+ _fn("proc_tail", "Read recent output (stdout+stderr) of a background process.",
279
+ {"handle": {"type": "string"}, "lines": {"type": "number"}}, ["handle"]),
280
+ _fn("proc_wait",
281
+ "Wait up to timeout seconds for a background process to exit; returns its status + recent output.",
282
+ {"handle": {"type": "string"}, "timeout": {"type": "number"}}, ["handle"]),
283
+ _fn("proc_kill", "Terminate a background process and its child group.",
284
+ {"handle": {"type": "string"}}, ["handle"]),
285
+ _fn("terminal_open",
286
+ "Open a persistent interactive PTY session for anything needing a LIVE terminal across turns: a "
287
+ "REPL/text-game/TUI, answering successive prompts, or holding shell state (cd/export/venv). Unlike "
288
+ "proc_start (no stdin) or run_command (one-shot), you drive it with terminal_send/terminal_wait/"
289
+ "terminal_read and end with terminal_close. Omit command for a shell, or pass one (e.g. 'python3 -i -q'); "
290
+ "'session' names it (default 'main'). Don't reopen an already-open session name — close it first.",
291
+ {"session": {"type": "string"}, "command": {"type": "string"}}, []),
292
+ _fn("terminal_send",
293
+ "Send input to a terminal session. By default a newline is appended (sends a line). Set "
294
+ "enter=false to send raw keys without a newline (e.g. a control char like '\\u0003' for Ctrl-C, "
295
+ "or an escape sequence). Returns the immediate echo/output.",
296
+ {"session": {"type": "string"}, "input": {"type": "string"}, "enter": {"type": "boolean"}},
297
+ ["input"]),
298
+ _fn("terminal_read", "Read the output a terminal session has produced (drains the live stream).",
299
+ {"session": {"type": "string"}, "timeout": {"type": "number"}}, []),
300
+ _fn("terminal_wait",
301
+ "Wait until a regex pattern appears in a terminal session's output (or timeout) — the reliable "
302
+ "way to sync: send a command, then wait for its prompt/result before sending the next.",
303
+ {"session": {"type": "string"}, "until": {"type": "string"}, "timeout": {"type": "number"}},
304
+ ["until"]),
305
+ _fn("terminal_close", "Close a terminal session and kill its process group.",
306
+ {"session": {"type": "string"}}, []),
307
+ _fn("world_set",
308
+ "Save DURABLE task state to your WORLD MODEL under a key (overwrites that key). Use it to maintain "
309
+ "non-code state across turns: an explored maze map, a game's rooms+inventory, a system "
310
+ "inventory, a running plan. It appears in the WORLD MODEL section of your context from your NEXT "
311
+ "turn on; within THIS turn, re-read a value from your own world_set call above. value may be multiline.",
312
+ {"key": {"type": "string"}, "value": {"type": "string"}}, ["key", "value"]),
313
+ _fn("world_clear", "Remove a key from your WORLD MODEL (omit key to clear all of it).",
314
+ {"key": {"type": "string"}}, []),
315
+ _fn("require",
316
+ "Record a STANDING REQUIREMENT that must HOLD when the task is done — an exact name/signature, an "
317
+ "output format, a stated rule, or a constraint the user adds. It joins your STANDING REQUIREMENTS "
318
+ "contract (shown every turn from your next turn on, and the bar for 'done'). Record only DURABLE "
319
+ "constraints, never transient sub-steps or chit-chat; re-recording the same one is a no-op.",
320
+ {"text": {"type": "string"}}, ["text"]),
321
+ _fn("requirement_done",
322
+ "Mark a STANDING REQUIREMENT satisfied (after verifying it against the real end-state). It stays "
323
+ "shown as '[x] done' so it is not re-flagged but not forgotten. `text` must match the requirement.",
324
+ {"text": {"type": "string"}}, ["text"]),
325
+ _fn("drop_requirement",
326
+ "Remove a STANDING REQUIREMENT the user RETRACTED or that no longer applies. `text` must match.",
327
+ {"text": {"type": "string"}}, ["text"]),
328
+ _fn("update_plan",
329
+ "Maintain an ordered PLAN (a TODO list) for a multi-step task. Pass the COMPLETE list of steps "
330
+ "every time — it REPLACES the previous plan. Keep exactly ONE step 'in_progress'; mark each 'done' "
331
+ "as you finish it. The plan shows in your PLAN section across turns so progress survives and the "
332
+ "user can follow along. Use it for non-trivial multi-step work; skip it for a single action.",
333
+ {"steps": {"type": "array", "description": "the full ordered step list (replaces the prior plan)",
334
+ "items": {"type": "object", "properties": {
335
+ "step": {"type": "string", "description": "one concrete step, imperative"},
336
+ "status": {"type": "string", "enum": ["pending", "in_progress", "done"]}},
337
+ "required": ["step", "status"]}}},
338
+ ["steps"]),
339
+ _fn("set_mission",
340
+ "Set your MISSION — the overarching NORTH-STAR objective for a long multi-step task (the 'why'), "
341
+ "shown at the top of your context every turn so you stay oriented across many steps. Set it once at "
342
+ "the start of a substantial task; it is ABOVE the literal task and your step plan. Re-setting "
343
+ "replaces it. Skip it for quick one-off requests.",
344
+ {"text": {"type": "string"}}, ["text"]),
345
+ _fn("mission_done", "Clear your MISSION once the overarching objective is achieved (it stops showing).",
346
+ {}, []),
347
+ ]
348
+
349
+
350
+ def _default_ask_user(question: str, options) -> str:
351
+ """Fallback when no interactive user is wired (headless/eval) — never hangs."""
352
+ return ("(no interactive user is available to answer; proceed with your best assumption and "
353
+ "STATE it explicitly, or stop with a clear summary of what you need)")
354
+
355
+
356
+ def _sniff_image_mime(raw: bytes) -> str | None:
357
+ """Identify an image by MAGIC BYTES (not extension). Returns the MIME type or None if not an image."""
358
+ if raw[:8] == b"\x89PNG\r\n\x1a\n":
359
+ return "image/png"
360
+ if raw[:3] == b"\xff\xd8\xff":
361
+ return "image/jpeg"
362
+ if raw[:6] in (b"GIF87a", b"GIF89a"):
363
+ return "image/gif"
364
+ if raw[:4] == b"RIFF" and raw[8:12] == b"WEBP":
365
+ return "image/webp"
366
+ if raw[:2] == b"BM":
367
+ return "image/bmp"
368
+ return None
369
+
370
+
371
+ def _numbered_window(text: str, start_line: int, end_line: int, *, ctx: int = 4, cap: int = 40) -> str:
372
+ """A cat -n numbered snippet of `text` around [start_line..end_line] (0-based), ±ctx lines, capped at
373
+ `cap`. Edit tools echo this POST-EDIT region back in their result so the model sees the file's CURRENT
374
+ state in-transcript — the within-turn analog of the OPEN FILES tier (the seed is frozen mid-turn, so the
375
+ live view must ride the tool results). Bounded by construction; never the whole file."""
376
+ lines = text.replace("\r\n", "\n").split("\n")
377
+ if lines and lines[-1] == "":
378
+ lines = lines[:-1] # drop the trailing empty from a final newline
379
+ a = max(0, start_line - ctx)
380
+ b = min(len(lines), max(end_line + 1 + ctx, a + 1))
381
+ b = min(b, a + cap)
382
+ snippet = "\n".join(f"{i:>6}\t{ln}" for i, ln in enumerate(lines[a:b], a + 1)) # cat -n, absolute line nums
383
+ if b < len(lines):
384
+ snippet += f"\n … (+{len(lines) - b} more lines)"
385
+ return snippet
386
+
387
+
388
+ class LocalToolHost:
389
+ def __init__(self, root: str | None = None, *, sandbox=None, timeout: int = 30,
390
+ registry: ToolRegistry | None = None):
391
+ # root=None → confine to the *current* working directory, resolved per call
392
+ # (so the eval runner, which chdirs into a temp workdir after construction,
393
+ # is confined to that workdir). Pass an explicit root to pin it.
394
+ self._root = root
395
+ self.timeout = timeout
396
+ self.sandbox = sandbox or LocalSandbox()
397
+ # Background/long-running processes — the live-handle registry the one-shot sandbox can't
398
+ # express (servers, multi-minute builds). Scrubs secrets like the sandbox; cleanup() at exit.
399
+ _scrub = getattr(self.sandbox, "scrub_secrets", True)
400
+ self.procs = ProcManager(scrub_secrets=_scrub)
401
+ # Interactive PTY sessions — drive REPLs/TUIs/games, hold shell+env across turns.
402
+ self.terminals = SessionManager(scrub_secrets=_scrub)
403
+ # I2 — RE-OBSERVATION REACH = ACTION REACH. File tools and shell must reach the
404
+ # SAME places, or the agent writes (via shell, unconfined) files its file tools can
405
+ # never read back, and OPEN FILES lies "(not created yet)" about real on-disk files.
406
+ # `_extra_roots` holds dirs the goal/user EXPLICITLY targets (added via add_root):
407
+ # _resolve accepts a path under the workspace root OR any extra root. Explicit and
408
+ # bounded — never a blanket '/'; the workspace stays the default and only the launch
409
+ # dir is implicit. Task-agnostic (we don't parse the goal) and safe (opt-in).
410
+ self._extra_roots: list[str] = []
411
+ self._focus: str | None = None # most-recently-worked EXTERNAL dir → the active focus (slice-surfaced)
412
+ # ask_user (the "come back and ask" capability): a host callback that prompts the real user and
413
+ # returns their answer. Defaults to a non-interactive fallback so headless/eval never hangs; the
414
+ # CLI overrides it with a TUI/plain prompt. Injected (not a core dependency) — task/LLM-agnostic.
415
+ self.on_ask_user = _default_ask_user
416
+ self._edit_journal: list = [] # (rel, full, prev_bytes|None) per write — powers /undo
417
+ self.pending_images: list = [] # images @-attached for the NEXT seed build (vision models only)
418
+ # The registry is the single source of tools; MCP/plugin/skill tools register
419
+ # into this same object later (Step ③). The host just projects from it.
420
+ self.registry = registry or ToolRegistry()
421
+ self._register_builtins()
422
+ import atexit
423
+ atexit.register(self.cleanup) # leaked background procs / PTYs must not survive exit/abort/crash
424
+
425
+ def cleanup(self) -> None:
426
+ """Tear down background processes + PTY sessions (idempotent; never raises). Wired to atexit AND
427
+ called by the CLI on exit/abort, so leaked servers/shells/PTYs don't outlive the agent (#5)."""
428
+ for _mgr in (getattr(self, "procs", None), getattr(self, "terminals", None)):
429
+ try:
430
+ if _mgr is not None:
431
+ _mgr.cleanup()
432
+ except Exception: # noqa: BLE001
433
+ pass
434
+
435
+ def _register_builtins(self) -> None:
436
+ handlers = {
437
+ "read_file": self._t_read_file, "list_files": self._t_list_files,
438
+ "edit_file": self._t_edit_file, "append_to_file": self._t_append,
439
+ "str_replace": self._t_str_replace, "run_command": self._t_run_command,
440
+ "execute_code": self._t_execute_code, "ask_user": self._t_ask_user,
441
+ "proc_start": self._t_proc_start, "proc_poll": self._t_proc_poll,
442
+ "proc_tail": self._t_proc_tail, "proc_wait": self._t_proc_wait,
443
+ "proc_kill": self._t_proc_kill,
444
+ "terminal_open": self._t_terminal_open, "terminal_send": self._t_terminal_send,
445
+ "terminal_read": self._t_terminal_read, "terminal_wait": self._t_terminal_wait,
446
+ "terminal_close": self._t_terminal_close,
447
+ "world_set": self._t_world_set, "world_clear": self._t_world_clear,
448
+ "require": self._t_require, "requirement_done": self._t_requirement_done,
449
+ "drop_requirement": self._t_drop_requirement, "update_plan": self._t_update_plan,
450
+ "set_mission": self._t_set_mission, "mission_done": self._t_mission_done,
451
+ "code_review": self._t_code_review,
452
+ }
453
+ for schema in TOOL_SCHEMAS:
454
+ name = schema["function"]["name"]
455
+ self.registry.register(ToolEntry(
456
+ name=name, schema=schema, handler=handlers[name],
457
+ accesses=(lambda args, n=name: self._builtin_accesses(n, args)),
458
+ source="builtin",
459
+ ))
460
+
461
+ def root(self) -> str:
462
+ return os.path.realpath(self._root or os.getcwd())
463
+
464
+ def add_root(self, path: str) -> str | None:
465
+ """Mark a directory the goal/user EXPLICITLY targets as in-reach for file tools.
466
+
467
+ The minimal, safe, task-agnostic mechanism for "explicitly-targeted dir" (I2): a
468
+ SETTABLE root, not goal-parsing heuristics. After this, read_file/edit_file/list_files
469
+ resolve paths under `path` exactly as the shell already does (shell is unconfined),
470
+ so a shell-written file is always readable back through OPEN FILES — reach matches.
471
+ Refuses a blanket root ('/' or '~') so the workspace boundary is never erased.
472
+ Returns the realpath added (idempotent), or None if rejected/unusable."""
473
+ if not path:
474
+ return None
475
+ full = os.path.realpath(os.path.expanduser(path))
476
+ # never widen reach to the whole filesystem or the bare home dir
477
+ if full == os.sep or full == os.path.realpath(os.path.expanduser("~")):
478
+ return None
479
+ if full == self.root() or full in self._extra_roots:
480
+ return full
481
+ self._extra_roots.append(full)
482
+ return full
483
+
484
+ def allowed_roots(self) -> list[str]:
485
+ """The set of dirs file tools may reach: the workspace root ∪ explicitly-targeted dirs.
486
+ Honored by `_resolve`; matches where the shell already acts (I2: reach = action reach)."""
487
+ roots = [self.root()]
488
+ for r in self._extra_roots:
489
+ if r not in roots:
490
+ roots.append(r)
491
+ return roots
492
+
493
+ def focus(self) -> tuple[str | None, list[str]]:
494
+ """The active focus (most-recently-worked EXTERNAL dir) + every extra root the file tools reach
495
+ beyond the workspace. Surfaced in the slice so the model KNOWS its file tools reach there: the
496
+ auto-granted reach was invisible, so the agent defaulted to the workspace frame and lost the
497
+ thread across turns (the hunter 'index.ts' miss). Delegated by SubagentHost via __getattr__."""
498
+ return self._focus, list(self._extra_roots)
499
+
500
+ def resolution_base(self) -> str:
501
+ """The CURRENT PROJECT a bare RELATIVE path resolves against — the frame, not the floor. Defaults
502
+ to the active focus (the most-recent dir worked in) when set, else the boundary root. This ONLY
503
+ moves the relative-path anchor + display frame; it NEVER widens reach: the result of `_resolve`
504
+ must still land inside `allowed_roots()`, and the immutable boundary root is unchanged. So the
505
+ 'current project' can roam over the authorized dirs while the floor it sits on never moves."""
506
+ base = self._focus or self.root()
507
+ # defensive: the base must itself be an authorized root (focus is only ever set to a granted dir)
508
+ return base if base in self.allowed_roots() else self.root()
509
+
510
+ def locate(self, path: str) -> str:
511
+ """Resolve a working-set path for RE-READING (OPEN FILES). Base-STABLE — independent of the current
512
+ project: a relative path is matched against EVERY authorized root (boundary root first, then extra
513
+ roots) and the first EXISTING match wins, so a pin stays truthful even after `resolution_base()`
514
+ moves. Falls back to the boundary-root resolution when nothing exists, so the truthful
515
+ '(not created yet)' / 'outside reach' branch in build_artifacts still fires per exception type."""
516
+ expanded = os.path.expanduser(path)
517
+ if os.path.isabs(expanded):
518
+ return self._resolve(path) # absolute → _resolve enforces the boundary
519
+ for r in self.allowed_roots():
520
+ cand = os.path.realpath(os.path.join(r, expanded))
521
+ if (cand == r or cand.startswith(r + os.sep)) and os.path.exists(cand):
522
+ return cand
523
+ # nothing exists under any root → a boundary-SAFE truthful-404 path. realpath + confine so a relative
524
+ # '../x' can't resolve to a real file OUTSIDE the boundary when read_file opens it (confinement).
525
+ root = self.root()
526
+ fallback = os.path.realpath(os.path.join(root, expanded))
527
+ if fallback == root or fallback.startswith(root + os.sep):
528
+ return fallback
529
+ return self._resolve(path) # escapes the boundary → raise (same as the file tools)
530
+
531
+ def _grant_shell_paths(self, text: str) -> None:
532
+ """I2 — reach FOLLOWS action. When the shell acts on a path outside the allowed roots,
533
+ grant file-tool reach to its directory so a shell-written file is ALWAYS readable back via
534
+ OPEN FILES. No NEW capability — the shell already reaches there; this only lets the file
535
+ tools observe it (the original split-brain: writes it could never read back). Restricted to
536
+ the user's HOME subtree, never HOME itself or an ancestor of the workspace (add_root also
537
+ refuses '/' and '~'). Pure path detection — task/LLM-agnostic, no command parsing."""
538
+ if not text:
539
+ return
540
+ home = os.path.realpath(os.path.expanduser("~"))
541
+ root = self.root()
542
+ # quoted paths (may contain spaces) OR bare ~/-rooted tokens up to a shell metachar/space
543
+ for q, uq in re.findall(
544
+ r"""['"]([^'"]*/[^'"]*)['"]|(?<![\w'"])((?:~|/)[^\s'"|&;<>()]+)""", text):
545
+ cand = (q or uq).strip()
546
+ if not (cand.startswith("/") or cand.startswith("~")):
547
+ continue
548
+ full = os.path.realpath(os.path.expanduser(cand))
549
+ d = full if os.path.isdir(full) else os.path.dirname(full)
550
+ if not d or not os.path.isdir(d):
551
+ continue
552
+ if not d.startswith(home + os.sep): # only the user's own subtree (excludes HOME itself)
553
+ continue
554
+ if d == root or root.startswith(d + os.sep): # never an ancestor of the workspace
555
+ continue
556
+ # #31: never auto-widen file-tool reach into credential/secret dirs, even inside HOME — a path
557
+ # merely MENTIONED in an allowed shell command must not make ~/.ssh etc. readable by the tools.
558
+ if any(part.lower() in _SECRET_DIRS for part in d.split(os.sep)): # casefold: ~/.SSH == ~/.ssh on a case-insensitive FS (macOS)
559
+ continue
560
+ self.add_root(d)
561
+ self._focus = d # the most-recent external dir the shell worked on → the active focus
562
+
563
+ def resolve_read(self, path: str) -> str:
564
+ """Resolution shared by read_file AND the OPEN FILES display so they never diverge. Prefer the
565
+ current-project (focus) copy; if nothing exists there, fall back to a base-STABLE search of every
566
+ authorized root (locate). Keeps focus-relative semantics while making a paged-out blob — or any file
567
+ under a root that isn't the current focus — reachable regardless of where focus now points (the
568
+ blob's read_file('.sliceagent/blobs/…') ref was minted against a possibly-different base)."""
569
+ try:
570
+ full = self._resolve(path)
571
+ except (ValueError, PermissionError):
572
+ return self.locate(path)
573
+ if os.path.exists(full):
574
+ return full
575
+ alt = self.locate(path)
576
+ return alt if os.path.exists(alt) else full
577
+
578
+ def _resolve(self, path: str) -> str:
579
+ """Resolve a tool path under an ALLOWED root (workspace ∪ explicitly-targeted dirs);
580
+ reject escapes. expanduser FIRST so '~' behaves like the shell (P2) instead of
581
+ silently creating a literal '~' dir inside the workspace."""
582
+ if not path:
583
+ raise ValueError("empty path")
584
+ path = os.path.expanduser(path) # P2 — '~' → $HOME before any join/realpath
585
+ roots = self.allowed_roots()
586
+ # A bare relative path resolves against the CURRENT PROJECT (resolution_base), not always the
587
+ # boundary root — so when the agent moves into another authorized project, relative paths follow
588
+ # it. Reach is unchanged: `full` must still land inside an authorized root below.
589
+ base = self.resolution_base()
590
+ full = path if os.path.isabs(path) else os.path.join(base, path)
591
+ full = os.path.realpath(full)
592
+ for root in roots:
593
+ if full == root or full.startswith(root + os.sep):
594
+ return full
595
+ # P3 — prescriptive error: name the boundary AND the escape hatch so a no-transcript
596
+ # model recovers instead of re-deriving the dead end (and looping into shell fallback).
597
+ raise PermissionError(
598
+ f"path escapes the boundary ({base}): {path} — File tools are confined to your "
599
+ "authorized directories (the boundary). To act on paths outside it, use "
600
+ "run_command/execute_code (shell is unconfined), or re-run sliceagent rooted at that directory.")
601
+
602
+ def _resolve_for_access(self, path: str) -> str | None:
603
+ """Canonical PHYSICAL path for SCHEDULING conflict detection only — NOT a security check (the real
604
+ _resolve enforces the boundary at run time). Mirrors _resolve's expanduser + base-join + realpath
605
+ so 'foo.py', './foo.py', and the absolute spelling collapse to ONE key, and the scheduler then
606
+ serializes concurrent writes to the same inode (otherwise a parallel edit_file + str_replace via
607
+ different spellings race → lost update). Returns None on empty/bad input → caller falls back."""
608
+ if not path:
609
+ return None
610
+ try:
611
+ p = os.path.expanduser(path)
612
+ base = self.resolution_base()
613
+ full = p if os.path.isabs(p) else os.path.join(base, p)
614
+ return os.path.realpath(full)
615
+ except Exception: # noqa: BLE001 — access declaration must never fail the call
616
+ return None
617
+
618
+ # --- ToolHost projection: everything comes from the registry now ---
619
+ def schemas(self) -> list[dict]:
620
+ # inject the 'note' arg into every tool so the model's per-turn conclusion rides on the
621
+ # call it already makes and lands in the slice's FINDINGS tier (anti-re-derivation)
622
+ return [with_note(s) for s in self.registry.schemas()]
623
+
624
+ def accesses(self, name: str, args: dict) -> list:
625
+ return self.registry.accesses(name, args)
626
+
627
+ def run(self, name: str, args: dict) -> str:
628
+ return self.registry.run(name, args) # registry wraps the handler in try/except
629
+
630
+ def read_text(self, path: str, *, lossy: bool = True) -> str:
631
+ # Read bytes first so the binary gate runs BEFORE we trust the file as text.
632
+ # A NUL byte / mostly-control-char head means "not text" — feeding it through
633
+ # OPEN FILES would corrupt the slice and burn tokens. ValueError flows through
634
+ # the registry try/except so both read_file and str_replace degrade gracefully.
635
+ full = self._resolve(path)
636
+ with open(full, "rb") as f:
637
+ raw = f.read()
638
+ sample = raw[:8192].decode("utf-8", errors="replace")
639
+ if looks_binary(path, sample):
640
+ raise ValueError(f"{path} appears to be binary; not shown")
641
+ # DISPLAY callers (read_file / OPEN FILES render) pass lossy=True: a stray invalid UTF-8 byte PAST
642
+ # the 8192-byte sniff sample must not crash an otherwise-text file's read. The READ-MODIFY-WRITE
643
+ # caller (str_replace) passes lossy=False: strict decode RAISES on any invalid byte so the call
644
+ # aborts cleanly (file untouched) instead of writing back a U+FFFD-mangled whole file — silent
645
+ # corruption of bytes the edit never touched.
646
+ return raw.decode("utf-8", errors="replace" if lossy else "strict")
647
+
648
+ def _builtin_accesses(self, name: str, args: dict) -> list:
649
+ """Declare what each builtin call touches so the scheduler can safely parallelize."""
650
+ p = args.get("path")
651
+ # resolve to the physical path so two spellings of one file conflict (and serialize) correctly
652
+ if name == "read_file":
653
+ rp = self._resolve_for_access(p)
654
+ return [FileAccess("read", rp)] if rp else []
655
+ if name == "list_files":
656
+ d = args.get("path") or "."
657
+ return [FileAccess("search", self._resolve_for_access(d) or d, recursive=True)]
658
+ if name in ("edit_file", "append_to_file", "str_replace"):
659
+ rp = self._resolve_for_access(p)
660
+ return [FileAccess("readwrite", rp)] if rp else [AllAccess()]
661
+ if name in ("run_command", "execute_code", "proc_start", "proc_poll",
662
+ "proc_tail", "proc_wait", "proc_kill", "terminal_open", "terminal_send",
663
+ "terminal_read", "terminal_wait", "terminal_close"):
664
+ return [AllAccess()] # arbitrary / stateful execution → globally exclusive
665
+ return [AllAccess()]
666
+
667
+ # --- builtin tool handlers (args) -> str (the registry catches exceptions) ---
668
+ def _page_out(self, text: str, *, label: str = "output") -> str:
669
+ """Page a large tool output OUT to a blob and return a BOUNDED head+tail view + a read_file
670
+ reference, instead of inlining the whole thing into the turn transcript. Moat-coherent: the FULL
671
+ output is preserved on disk (recall-on-demand, the L1→L2 page-out), never cut. Best-effort — on a
672
+ write failure it still bounds the inline view with a hard head+tail slice."""
673
+ if not text or len(text) <= _OUTPUT_INLINE_CAP:
674
+ return _strip_control(text) # strip C0/NUL on the SMALL path too — a NUL is valid UTF-8 (errors='replace' won't drop it) and breaks the LLM JSON request
675
+ text = _strip_control(text) # paged path: plain-text blob (read_file page-back works) + API-safe view
676
+ if len(text) <= _OUTPUT_INLINE_CAP:
677
+ # control-heavy output can drop below the cap AFTER stripping — return it inline rather than
678
+ # computing head/tail/elided on the now-short text (which gave a negative elided + duplicated
679
+ # head==tail content + a false "paged out" banner). The full clean output still rides the turn.
680
+ return text
681
+ ref = None
682
+ try:
683
+ import hashlib
684
+ digest = hashlib.sha1(text.encode("utf-8", "replace")).hexdigest()[:12]
685
+ rel = os.path.join(".sliceagent", "blobs", f"{label.replace(' ', '-')}-{digest}.txt")
686
+ full = self._resolve(rel)
687
+ self._mkparent(full)
688
+ if not os.path.exists(full):
689
+ self._atomic_write(full, text)
690
+ ref = f"read_file('{rel}')"
691
+ except Exception: # noqa: BLE001 — a paging failure must never fail the tool itself
692
+ ref = None
693
+ elided = len(text) - _OUTPUT_HEAD - _OUTPUT_TAIL
694
+ how = f"page the full {label} back with {ref}" if ref else f"the elided {label} is unavailable (blob write failed)"
695
+ return (f"{text[:_OUTPUT_HEAD]}\n\n"
696
+ f"[… {elided} of {len(text)} chars paged out — {how} …]\n\n"
697
+ f"{text[-_OUTPUT_TAIL:]}")
698
+
699
+ def _t_read_file(self, args: dict) -> str:
700
+ # Text files: return the content. Binary files: instead of refusing (which blanks the
701
+ # agent on forensics/media/archive tasks), return a hexdump + size + magic so it can
702
+ # inspect structure and pick the right CLI. str_replace still uses read_text() (which
703
+ # raises on binary) — you can't text-edit a binary, so that path stays a hard error.
704
+ path = args["path"]
705
+ full = self.resolve_read(path) # focus copy if present, else search all roots (paged-out blob recall)
706
+ with open(full, "rb") as f:
707
+ raw = f.read()
708
+ sample = raw[:8192].decode("utf-8", errors="replace")
709
+ if looks_binary(path, sample):
710
+ return self._binary_view(path, raw)
711
+ # Return WITH cat -n line numbers so the model has file:line evidence immediately this turn (matching
712
+ # the OPEN FILES render). Safe for editing: str_replace strips a pasted line-number prefix.
713
+ # BOUNDED VIEW (moat-safe): a huge file would flood the slice, so cap the default view + support a
714
+ # line window (offset/limit). The FULL file always stays on disk — this bounds the VIEW, not the file.
715
+ lines = raw.decode("utf-8", errors="replace").splitlines() # consistent with read_text's gate decode
716
+ total = len(lines)
717
+ offset, limit = _coerce_int(args.get("offset")), _coerce_int(args.get("limit"))
718
+ windowed = offset is not None or limit is not None
719
+ # a paged-out blob recall is the deliberate L1→L2 "give me the FULL output back" channel — never cap
720
+ # it (only the default view of an ordinary file is capped). Still windowable if offset/limit is given.
721
+ is_blob = ".sliceagent/blobs/" in path.replace("\\", "/") or ".sliceagent/blobs/" in str(full).replace("\\", "/")
722
+ if not windowed:
723
+ start, end = 1, (total if (is_blob or total <= _READ_MAX_LINES) else _READ_MAX_LINES)
724
+ else:
725
+ start = min(max(1, offset or 1), total + 1)
726
+ end = total if limit is None else min(total, start - 1 + max(1, limit))
727
+ body = _number_lines(lines[start - 1:end], start)
728
+ if not windowed and end >= total:
729
+ return body # complete read → unchanged contract (no footer)
730
+ more = (f" · +{total - end} more — read_file(path, offset={end + 1}) to continue"
731
+ if end < total else "")
732
+ return f"{body}\n<system>read_file {path}: lines {start}-{end} of {total}{more}</system>"
733
+
734
+ @staticmethod
735
+ def _binary_view(path: str, raw: bytes, head_bytes: int = 256) -> str:
736
+ head = raw[:head_bytes]
737
+ rows = []
738
+ for off in range(0, len(head), 16):
739
+ chunk = head[off:off + 16]
740
+ hexpart = " ".join(f"{b:02x}" for b in chunk)
741
+ asciipart = "".join(chr(b) if 32 <= b < 127 else "." for b in chunk)
742
+ rows.append(f"{off:08x} {hexpart:<47} {asciipart}")
743
+ return (f"{path}: binary file, {len(raw)} bytes — text tools can't edit it; inspect/convert "
744
+ f"it with run_command/execute_code (the right CLI).\n"
745
+ f"magic: {head[:8].hex()}\n"
746
+ f"hexdump (first {len(head)} bytes):\n" + "\n".join(rows))
747
+
748
+ @staticmethod
749
+ def _detect_crlf(full: str) -> bool:
750
+ """True if the existing file uses Windows CRLF line endings (sample the head). Used to PRESERVE
751
+ line endings on edit: the model emits '\\n', and writing that to a CRLF file rewrites every line
752
+ ending — a huge spurious diff / corruption on Windows-authored repos."""
753
+ try:
754
+ with open(full, "rb") as f:
755
+ return b"\r\n" in f.read(65536)
756
+ except OSError:
757
+ return False
758
+
759
+ @staticmethod
760
+ def _preserve_eol(text: str, crlf: bool) -> str:
761
+ """Convert `text` to CRLF iff the target file is CRLF (normalize first → idempotent, handles
762
+ mixed input). No-op for the common LF case, so LF files never gain spurious '\\r'."""
763
+ return text.replace("\r\n", "\n").replace("\n", "\r\n") if crlf else text
764
+
765
+ def _t_list_files(self, args: dict) -> str:
766
+ base = self._resolve(args.get("path") or ".")
767
+ if not args.get("recursive"):
768
+ entries = sorted(os.listdir(base))
769
+ shown = [e + "/" if os.path.isdir(os.path.join(base, e)) else e
770
+ for e in entries if not _is_ignored(e)]
771
+ hidden = [e for e in entries if _is_ignored(e)]
772
+ body = "\n".join(shown) or "(empty)"
773
+ if hidden: # name them so the model KNOWS they exist (recoverable), without flooding
774
+ body += f"\n(+{len(hidden)} ignored: {', '.join(hidden[:6])})"
775
+ return body
776
+ # recursive: a clean, ignore-pruned, bounded repo MAP — the native alternative to shell `find`
777
+ rels: list[str] = []
778
+ capped = False
779
+ for dirpath, dirnames, filenames in os.walk(base): # symlinks not followed (no .venv loops)
780
+ dirnames[:] = sorted(d for d in dirnames if not _is_ignored(d)) # prune in place → don't descend
781
+ rel = os.path.relpath(dirpath, base)
782
+ for f in sorted(filenames):
783
+ if _is_ignored(f):
784
+ continue
785
+ rels.append(f if rel == "." else os.path.join(rel, f))
786
+ if len(rels) >= _LIST_CAP:
787
+ capped = True
788
+ break
789
+ if capped:
790
+ break
791
+ body = "\n".join(sorted(rels)) or "(empty)"
792
+ if capped:
793
+ body += f"\n(+more — capped at {_LIST_CAP}; pass a subdirectory path to narrow)"
794
+ return body
795
+
796
+ def _t_edit_file(self, args: dict) -> str:
797
+ full = self.resolve_read(args["path"]) # I2: target the SAME file read_file shows (existing match across roots); new files still land at the focus base
798
+ self._mkparent(full)
799
+ content = args["content"]
800
+ if os.path.exists(full): # preserve the file's existing line endings (CRLF)
801
+ content = self._preserve_eol(content, self._detect_crlf(full))
802
+ self._journal(args["path"], full)
803
+ self._atomic_write(full, content)
804
+ if content[:2] == "#!": # a shebang script should be runnable (general, task-agnostic)
805
+ self._make_executable(full)
806
+ msg = f"Wrote {len(content)} bytes to {args['path']}"
807
+ try: # echo the head so the model sees what landed (post-EOL-normalization)
808
+ n = content.replace("\r\n", "\n").rstrip("\n").count("\n") + 1 if content.strip() else 0
809
+ return f"{msg} ({n} lines). Head:\n" + _numbered_window(content, 0, 15, ctx=0, cap=16)
810
+ except Exception: # noqa: BLE001 — the echo must never fail the write
811
+ return msg
812
+
813
+ def _make_executable(self, full: str) -> None:
814
+ """chmod +x a freshly-written shebang script (a script the agent declared executable via '#!'
815
+ should run without a separate chmod). Best-effort; never fails the write."""
816
+ try:
817
+ import stat as _stat
818
+ os.chmod(full, os.stat(full).st_mode | _stat.S_IXUSR | _stat.S_IXGRP | _stat.S_IXOTH)
819
+ except OSError:
820
+ pass
821
+
822
+ def _t_append(self, args: dict) -> str:
823
+ full = self.resolve_read(args["path"]) # I2: append to the SAME file read_file shows; new files still land at the focus base
824
+ self._mkparent(full)
825
+ self._journal(args["path"], full)
826
+ with open(full, "ab") as f: # byte-exact (like write_file's "wb") — text mode would translate newlines, corrupting CRLF
827
+ f.write(args["content"].encode("utf-8"))
828
+ msg = f"Appended {len(args['content'])} bytes to {args['path']}"
829
+ try: # echo the file tail so the model sees the appended content in context
830
+ with open(full, encoding="utf-8", errors="replace") as _f:
831
+ whole = _f.read()
832
+ total = whole.replace("\r\n", "\n").rstrip("\n").count("\n") + 1
833
+ app = args["content"].replace("\r\n", "\n").rstrip("\n").count("\n") + 1
834
+ return f"{msg}. File tail:\n" + _numbered_window(whole, max(0, total - app), total - 1, ctx=2)
835
+ except Exception: # noqa: BLE001
836
+ return msg
837
+
838
+ def _edit_result(self, path: str, before: str, after: str, change_offset: int, new_text: str,
839
+ *, fuzzy: bool = False) -> str:
840
+ """str_replace result: byte delta + a numbered POST-EDIT window around the change, so the model sees
841
+ the file's CURRENT state in-transcript. Best-effort — falls back to the plain byte message."""
842
+ tag = " (normalized/fuzzy match)" if fuzzy else ""
843
+ msg = f"Replaced 1 occurrence{tag} in {path} ({len(before)} → {len(after)} bytes)"
844
+ try:
845
+ s0 = before[:change_offset].count("\n") # 0-based start line (unchanged prefix ⇒ same in `after`)
846
+ e0 = s0 + new_text.replace("\r\n", "\n").count("\n")
847
+ return f"{msg}. Updated region (lines {s0 + 1}-{e0 + 1}):\n" + _numbered_window(after, s0, e0)
848
+ except Exception: # noqa: BLE001 — the echo must never fail the edit
849
+ return msg
850
+
851
+ def _t_str_replace(self, args: dict) -> str:
852
+ full = self.resolve_read(args["path"]) # I2: edit the SAME file read_file shows (search all roots), not a focus-relative phantom
853
+ try:
854
+ cur = self.read_text(full, lossy=False) # read the resolved target; strict: abort on invalid UTF-8, never write back a mangled file
855
+ except UnicodeDecodeError as ex:
856
+ # actionable error (not an opaque codec traceback) — read_file shows the file as editable, so name
857
+ # the cause + the fallback rather than half-disagreeing with the display path.
858
+ return ToolText(f"Error: {args['path']} contains a non-UTF-8 byte ({ex}); str_replace can't safely "
859
+ "edit it (a whole-file write-back would corrupt the other bytes). Use edit_file to "
860
+ "rewrite the file, or fix its encoding first.", ok=False)
861
+ crlf = self._detect_crlf(full) # preserve the file's line endings on write-back
862
+ old = args["old_string"]
863
+ new = args["new_string"]
864
+ # OPEN FILES renders with cat -n line numbers; if the model pasted a numbered snippet back into
865
+ # old_string, strip the " N\t" prefixes so it still matches the real (unnumbered) file. Tried only
866
+ # as a FALLBACK after the raw text, and only when EVERY line carried a number (clearly cat -n output,
867
+ # not source) — so a real match is never altered.
868
+ candidates = [old]
869
+ stripped = _strip_line_numbers(old)
870
+ if stripped != old:
871
+ candidates.append(stripped)
872
+ # PRIMARY: exact match (raw first, then de-numbered). >1 is ambiguous UNLESS replace_all is set.
873
+ replace_all = bool(args.get("replace_all"))
874
+ for cand in candidates:
875
+ n = cur.count(cand)
876
+ if n == 0:
877
+ continue
878
+ if n == 1 or replace_all:
879
+ updated = self._preserve_eol(cur.replace(cand, new, n if replace_all else 1), crlf)
880
+ self._journal(args["path"], full)
881
+ self._atomic_write(full, updated)
882
+ return self._edit_result(args["path"], cur, updated, cur.index(cand), new)
883
+ return ToolText(f"Error: old_string occurs {n} times in {args['path']}; add context to make it "
884
+ "unique, or pass replace_all=true to change them all", ok=False)
885
+ # FALLBACK: whitespace-tolerant UNIQUE fuzzy span (raw first, then de-numbered). fuzzy_find_unique
886
+ # returns None on 0/>1 candidates, so uniqueness is preserved — we never replace an ambiguous match.
887
+ for cand in candidates:
888
+ span = fuzzy_find_unique(cur, cand)
889
+ if span is not None:
890
+ updated = self._preserve_eol(cur[:span[0]] + new + cur[span[1]:], crlf)
891
+ self._journal(args["path"], full)
892
+ self._atomic_write(full, updated)
893
+ return self._edit_result(args["path"], cur, updated, span[0], new, fuzzy=True)
894
+ return ToolText(f"Error: old_string not found in {args['path']} — your snippet does not match "
895
+ f"the file. Copy the EXACT text from OPEN FILES (the live content, WITHOUT the line-number "
896
+ f"prefix), or rewrite the whole file with edit_file. Do NOT retry the same str_replace.", ok=False)
897
+
898
+ # --- edit journal (powers /undo) -----------------------------------------
899
+ def _journal(self, rel: str, full: str) -> None:
900
+ """Record a file's pre-image (or None if it didn't exist) just before a write, so /undo can revert
901
+ the most recent edit. Bounded ring — recent edits only, never an unbounded history."""
902
+ try:
903
+ if os.path.exists(full):
904
+ with open(full, "rb") as _f:
905
+ prev = _f.read()
906
+ else:
907
+ prev = None
908
+ except OSError:
909
+ prev = None
910
+ self._edit_journal.append((rel, full, prev))
911
+ if len(self._edit_journal) > 50:
912
+ del self._edit_journal[:-50]
913
+
914
+ def undo_last(self) -> str:
915
+ """Revert the most recent journaled edit. Returns a human-readable result for the UI."""
916
+ if not self._edit_journal:
917
+ return "Nothing to undo."
918
+ rel, full, prev = self._edit_journal.pop()
919
+ try:
920
+ if prev is None:
921
+ if os.path.exists(full):
922
+ os.remove(full)
923
+ return f"Undid: removed {rel} (it did not exist before that edit)."
924
+ with open(full, "wb") as f:
925
+ f.write(prev)
926
+ return f"Undid the last edit to {rel} ({len(prev)} bytes restored)."
927
+ except OSError as e:
928
+ return f"Undo failed for {rel}: {e}"
929
+
930
+ def attach_image(self, path: str) -> str:
931
+ """Stash a workspace image for the NEXT seed build as a vision content part. Returns a status line.
932
+ Gated by the caller (only called for a vision-capable model). Confined to the workspace like reads.
933
+ The MIME type is sniffed from MAGIC BYTES (not the extension), so a spoofed extension can't smuggle a
934
+ non-image through as image/png."""
935
+ import base64
936
+ try:
937
+ full = self._resolve(path)
938
+ with open(full, "rb") as _f:
939
+ raw = _f.read()
940
+ except OSError as e:
941
+ return f"Error: cannot read image {path}: {e}"
942
+ if len(raw) > 8 * 1024 * 1024:
943
+ return f"Error: image {path} is {len(raw)} bytes (cap 8MB) — too large to attach"
944
+ mime = _sniff_image_mime(raw)
945
+ if mime is None:
946
+ return f"Error: {path} is not a recognized image (png/jpeg/gif/webp/bmp) — not attached"
947
+ self.pending_images.append({"path": path, "b64": base64.b64encode(raw).decode("ascii"), "mime": mime})
948
+ # cost-awareness: a base64 image is large + billed as image tokens → this turn costs more than text.
949
+ return f"attached image {path} ({len(raw) // 1024} KB, {mime}) — vision turn, costs more than a text turn"
950
+
951
+ def _t_code_review(self, args: dict) -> str:
952
+ """Return the git diff for the workspace so the model can review it (read-only; task-agnostic)."""
953
+ import subprocess
954
+ ref = (args.get("ref") or "HEAD").strip() or "HEAD"
955
+ # SECURITY: `ref` is model-controlled. An option-shaped ref (e.g. --output=/path, -O, --ext-diff)
956
+ # would be parsed by git as a FLAG → arbitrary out-of-workspace file write / command exec, bypassing
957
+ # the file-tool confinement. Reject leading-dash refs (a real ref/range never starts with '-') and
958
+ # pass `--` so the ref can never be read as an option. Valid ranges (main...HEAD, HEAD~3) still work.
959
+ if ref.startswith("-"):
960
+ return ToolText(f"Error: invalid ref {ref!r} (a ref must not start with '-').", ok=False)
961
+ try:
962
+ p = subprocess.run(["git", "-C", self.root(), "diff", ref, "--"],
963
+ capture_output=True, text=True, timeout=30)
964
+ except FileNotFoundError:
965
+ return ToolText("Error: git is not installed.", ok=False)
966
+ except subprocess.SubprocessError as e:
967
+ return ToolText(f"Error: git diff failed ({type(e).__name__}: {e}).", ok=False)
968
+ if p.returncode != 0:
969
+ return ToolText(f"Error: `git diff {ref}` failed — {p.stderr.strip()[:300]} "
970
+ "(is this a git repo? is the ref valid?)", ok=False)
971
+ diff = p.stdout
972
+ if not diff.strip():
973
+ return f"No changes vs {ref} — the working tree matches it. Nothing to review."
974
+ # PAGE a large diff out (full diff preserved on disk, reachable via read_file) instead of a hard
975
+ # truncation that silently discarded the tail — a review/security task must not miss bugs past the cut.
976
+ body = self._page_out(diff, label=f"git-diff-{ref}")
977
+ return (f"git diff {ref} ({len(diff)} chars). Review for correctness, security, and edge cases; "
978
+ f"cite file:line per issue.\n\n{body}")
979
+
980
+ def _t_ask_user(self, args: dict) -> str:
981
+ q = (args.get("question") or "").strip()
982
+ if not q:
983
+ return ToolText("Error: ask_user requires a non-empty 'question'.", ok=False)
984
+ opts = args.get("options")
985
+ opts = [str(o) for o in opts] if isinstance(opts, list) and opts else None
986
+ try:
987
+ ans = (self.on_ask_user or _default_ask_user)(q, opts)
988
+ except (EOFError, KeyboardInterrupt):
989
+ ans = "(no answer)"
990
+ return f"User answered: {str(ans).strip()}"
991
+
992
+ def _t_run_command(self, args: dict) -> str:
993
+ # Optional per-call timeout (default self.timeout, hard ceiling 600s) so slow builds don't
994
+ # die at the 30s default and come back as exit 124. Long-lived processes use proc_start.
995
+ try:
996
+ t = float(args.get("timeout") or self.timeout)
997
+ except (TypeError, ValueError):
998
+ t = float(self.timeout)
999
+ t = max(1.0, min(t, 600.0))
1000
+ code, out = self.sandbox.run(args["command"], cwd=self.root(), timeout=t)
1001
+ self._grant_shell_paths(args.get("command", "")) # I2 reach=action: dirs the shell touched
1002
+ out = out.strip()
1003
+ if code != 0:
1004
+ return ToolText(f"Exit code {code}\n{self._page_out(out, label='command output') or '(no output)'}", ok=False)
1005
+ return self._page_out(out, label="command output") if out else "(command produced no output)"
1006
+
1007
+ # --- background / long-running processes (procman) ---
1008
+ def _host_only_note(self) -> str:
1009
+ # #4: background procs + PTY sessions run on the HOST, not through self.sandbox. Under a non-local
1010
+ # sandbox (e.g. docker) that defeats container isolation — surface it instead of silently bypassing.
1011
+ return ("[warning: this runs on the HOST, NOT inside the configured sandbox — "
1012
+ f"{type(self.sandbox).__name__} isolation does not apply]\n"
1013
+ if type(self.sandbox).__name__ != "LocalSandbox" else "")
1014
+
1015
+ def _t_proc_start(self, args: dict) -> str:
1016
+ h = self.procs.start(args["command"], cwd=self.root())
1017
+ return (f"{self._host_only_note()}Started background process {h}: {args['command']}\n"
1018
+ f"Use proc_tail/proc_poll/proc_wait/proc_kill with handle {h}.")
1019
+
1020
+ def _t_proc_poll(self, args: dict) -> str:
1021
+ return self.procs.poll(args["handle"])
1022
+
1023
+ def _t_proc_tail(self, args: dict) -> str:
1024
+ # #26: cap requested lines so a huge `lines` can't dump a chatty server's whole log into the slice.
1025
+ try:
1026
+ n = int(args.get("lines") or 40)
1027
+ except (TypeError, ValueError):
1028
+ n = 40 # a non-numeric `lines` arg must not crash the tool
1029
+ return self.procs.tail(args["handle"], max(1, min(n, 2000)))
1030
+
1031
+ def _t_proc_wait(self, args: dict) -> str:
1032
+ try:
1033
+ t = float(args.get("timeout") or 30.0)
1034
+ except (TypeError, ValueError):
1035
+ t = 30.0
1036
+ # proc_wait is a poll-with-timeout — allow sub-second waits (unlike run_command's 1s floor).
1037
+ return self.procs.wait(args["handle"], max(0.05, min(t, 600.0)))
1038
+
1039
+ def _t_proc_kill(self, args: dict) -> str:
1040
+ return self.procs.kill(args["handle"])
1041
+
1042
+ # --- interactive PTY sessions (terminal) ---
1043
+ def _t_terminal_open(self, args: dict) -> str:
1044
+ name = args.get("session") or "main"
1045
+ self.terminals.open(name, cwd=self.root(), command=args.get("command") or None)
1046
+ banner = self.terminals.peek(name, timeout=0.6) # peek, not read — don't eat the first prompt
1047
+ return f"{self._host_only_note()}Opened terminal session {name!r}.\n{banner}"
1048
+
1049
+ def _t_terminal_send(self, args: dict) -> str:
1050
+ name = args.get("session") or "main"
1051
+ enter = args.get("enter")
1052
+ enter = True if enter is None else bool(enter)
1053
+ return self.terminals.send(name, args["input"], enter=enter)
1054
+
1055
+ def _t_terminal_read(self, args: dict) -> str:
1056
+ name = args.get("session") or "main"
1057
+ try:
1058
+ t = float(args.get("timeout") or 1.0)
1059
+ except (TypeError, ValueError):
1060
+ t = 1.0
1061
+ return self._page_out(self.terminals.read(name, timeout=max(0.05, min(t, 120.0))), label="terminal output")
1062
+
1063
+ def _t_terminal_wait(self, args: dict) -> str:
1064
+ name = args.get("session") or "main"
1065
+ try:
1066
+ t = float(args.get("timeout") or 10.0)
1067
+ except (TypeError, ValueError):
1068
+ t = 10.0
1069
+ return self.terminals.wait(name, args["until"], timeout=max(0.1, min(t, 600.0)))
1070
+
1071
+ def _t_terminal_close(self, args: dict) -> str:
1072
+ return self.terminals.close(args.get("session") or "main")
1073
+
1074
+ # --- world model (durable agent scratchpad; state lives in the Slice, folded by slice_sink) ---
1075
+ def _t_world_set(self, args: dict) -> str:
1076
+ k = (args.get("key") or "").strip()
1077
+ if not k:
1078
+ return ToolText("Error: world_set requires a non-empty 'key'.", ok=False)
1079
+ v = " ".join(str(args.get("value", "")).split()) # one-line echo so the value is readable THIS turn
1080
+ if len(v) > 200:
1081
+ v = v[:200] + "…"
1082
+ return (f"WORLD MODEL: saved {k!r} = {v} (in your WORLD MODEL section from your NEXT turn; "
1083
+ f"this turn, re-read it from this call).")
1084
+
1085
+ def _t_world_clear(self, args: dict) -> str:
1086
+ k = (args.get("key") or "").strip()
1087
+ return f"WORLD MODEL: cleared {repr(k) if k else '(all keys)'}."
1088
+
1089
+ # --- standing requirements (the durable contract; state lives in the Slice, folded by slice_sink) ---
1090
+ def _t_require(self, args: dict) -> str:
1091
+ t = " ".join((args.get("text") or "").split())
1092
+ if not t:
1093
+ return ToolText("Error: require needs a non-empty 'text'.", ok=False)
1094
+ return f"REQUIREMENT recorded: {t} (in your STANDING REQUIREMENTS from your next turn until done/dropped)."
1095
+
1096
+ def _t_requirement_done(self, args: dict) -> str:
1097
+ t = " ".join((args.get("text") or "").split())
1098
+ if not t:
1099
+ return ToolText("Error: requirement_done needs the requirement 'text'.", ok=False)
1100
+ return f"REQUIREMENT marked done: {t} (stays shown as [x], no longer flagged outstanding)."
1101
+
1102
+ def _t_drop_requirement(self, args: dict) -> str:
1103
+ t = " ".join((args.get("text") or "").split())
1104
+ if not t:
1105
+ return ToolText("Error: drop_requirement needs the requirement 'text'.", ok=False)
1106
+ return f"REQUIREMENT dropped: {t}."
1107
+
1108
+ def _t_update_plan(self, args: dict) -> str:
1109
+ # The STATE lives in the slice's PLAN tier (folded by slice_sink from this event); the handler
1110
+ # only validates + confirms (the world_set/require pattern).
1111
+ steps = args.get("steps")
1112
+ if not isinstance(steps, list) or not steps:
1113
+ return ToolText("Error: update_plan requires a non-empty 'steps' list "
1114
+ "(each {step, status: pending|in_progress|done}).", ok=False)
1115
+ n = len(steps)
1116
+ done = sum(1 for s in steps if isinstance(s, dict) and s.get("status") == "done")
1117
+ doing = sum(1 for s in steps if isinstance(s, dict) and s.get("status") == "in_progress")
1118
+ return f"PLAN updated: {n} steps ({done} done, {doing} in progress) — shown in your PLAN section."
1119
+
1120
+ def _t_set_mission(self, args: dict) -> str:
1121
+ t = " ".join((args.get("text") or "").split())
1122
+ if not t:
1123
+ return ToolText("Error: set_mission needs a non-empty 'text'.", ok=False)
1124
+ return f"MISSION set: {t} (shown at the top of your context until you call mission_done)."
1125
+
1126
+ def _t_mission_done(self, args: dict) -> str:
1127
+ return "MISSION cleared (achieved — no longer shown)."
1128
+
1129
+ def _t_execute_code(self, args: dict) -> str:
1130
+ out = self._execute_code(args["code"])
1131
+ self._grant_shell_paths(args.get("code", "")) # I2 reach=action: dirs code-as-action touched
1132
+ return out
1133
+
1134
+ def _execute_code(self, code: str) -> str:
1135
+ """Code-as-action: run the model's script (prelude + code) in the sandbox, cwd=workspace.
1136
+ Only stdout returns. The script is written INSIDE the workspace as a hidden temp file
1137
+ (so it's mounted/available in every backend) and deleted right after; cwd is on sys.path
1138
+ so workspace imports resolve. `sandbox.python_cmd` keeps it backend-portable."""
1139
+ script = _CODE_PRELUDE + "\n# --- agent code ---\n" + code
1140
+ root = self.root()
1141
+ fd, path = tempfile.mkstemp(suffix=".py", prefix=".sliceagent-exec-", dir=root)
1142
+ try:
1143
+ with os.fdopen(fd, "w", encoding="utf-8") as f:
1144
+ f.write(script)
1145
+ cmd = f"{shlex.quote(self.sandbox.python_cmd)} {shlex.quote(os.path.basename(path))}"
1146
+ code_n, out = self.sandbox.run(cmd, cwd=root, timeout=self.timeout)
1147
+ out = out.strip()
1148
+ if code_n != 0:
1149
+ return ToolText(f"Exit code {code_n}\n{self._page_out(out, label='execute_code output') or '(no output)'}", ok=False)
1150
+ return self._page_out(out, label="execute_code output") if out else "(execute_code produced no output)"
1151
+ finally:
1152
+ try:
1153
+ os.unlink(path)
1154
+ except OSError:
1155
+ pass
1156
+
1157
+ @staticmethod
1158
+ def _mkparent(path: str) -> None:
1159
+ parent = os.path.dirname(os.path.abspath(path))
1160
+ os.makedirs(parent, exist_ok=True)
1161
+
1162
+ @staticmethod
1163
+ def _atomic_write(full: str, content: str) -> None:
1164
+ """Write `content` to `full` atomically: write a temp file in the SAME directory,
1165
+ then os.replace() it over the target. A crash/error mid-write leaves the original
1166
+ intact (the rename is atomic on POSIX); the temp is unlinked on any failure. The
1167
+ temp must share the target's filesystem for os.replace to be atomic, hence
1168
+ dir=os.path.dirname(full) (full is already _resolve()'d)."""
1169
+ import stat as _stat
1170
+ d = os.path.dirname(full)
1171
+ # preserve the target's permission bits across the replace — else a str_replace/edit_file on an
1172
+ # existing 0755 script silently resets it to the mkstemp 0600 (drops the executable + group/other bits).
1173
+ # ONE stat in a try (no exists()+stat() TOCTOU): if the file is absent or concurrently removed, write
1174
+ # fresh with default perms rather than raising an unhandled FileNotFoundError.
1175
+ try:
1176
+ mode = _stat.S_IMODE(os.stat(full).st_mode)
1177
+ except OSError:
1178
+ mode = None
1179
+ fd, tmp = tempfile.mkstemp(prefix=".sliceagent-tmp-", dir=d)
1180
+ try:
1181
+ # newline="" disables the platform newline translation: _preserve_eol already normalized the
1182
+ # content's line endings (LF or CRLF) to match the target, so text-mode translation on Windows
1183
+ # would double-convert \n→\r\n inside an already-CRLF string (\r\r\n) and corrupt the file.
1184
+ with os.fdopen(fd, "w", encoding="utf-8", newline="") as f:
1185
+ f.write(content)
1186
+ if mode is not None:
1187
+ os.chmod(tmp, mode)
1188
+ os.replace(tmp, full)
1189
+ except BaseException:
1190
+ try:
1191
+ os.unlink(tmp)
1192
+ except OSError:
1193
+ pass
1194
+ raise