sliceagent 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sliceagent/__init__.py +3 -0
- sliceagent/__main__.py +6 -0
- sliceagent/access.py +93 -0
- sliceagent/agents.py +173 -0
- sliceagent/background_review.py +146 -0
- sliceagent/binsniff.py +89 -0
- sliceagent/cli.py +890 -0
- sliceagent/clock.py +32 -0
- sliceagent/code_grep.py +329 -0
- sliceagent/code_index.py +417 -0
- sliceagent/config.py +240 -0
- sliceagent/context_overflow.py +227 -0
- sliceagent/envspec.py +129 -0
- sliceagent/errors.py +167 -0
- sliceagent/events.py +96 -0
- sliceagent/finding_types.py +70 -0
- sliceagent/flags.py +63 -0
- sliceagent/fuzzy.py +135 -0
- sliceagent/guardrails.py +438 -0
- sliceagent/guidance.py +69 -0
- sliceagent/hippocampus.py +581 -0
- sliceagent/hooks.py +334 -0
- sliceagent/interfaces.py +144 -0
- sliceagent/llm.py +695 -0
- sliceagent/loop.py +548 -0
- sliceagent/mcp_client.py +255 -0
- sliceagent/mcp_security.py +77 -0
- sliceagent/memory.py +428 -0
- sliceagent/metrics.py +103 -0
- sliceagent/model_catalog.py +124 -0
- sliceagent/monitor.py +615 -0
- sliceagent/neocortex.py +436 -0
- sliceagent/onboarding.py +323 -0
- sliceagent/oracle.py +36 -0
- sliceagent/pagetable.py +255 -0
- sliceagent/pfc.py +449 -0
- sliceagent/plugins.py +127 -0
- sliceagent/policy.py +234 -0
- sliceagent/procman.py +187 -0
- sliceagent/prompt.py +239 -0
- sliceagent/records.py +108 -0
- sliceagent/recovery.py +119 -0
- sliceagent/regions.py +678 -0
- sliceagent/registry.py +128 -0
- sliceagent/retriever.py +19 -0
- sliceagent/safety.py +332 -0
- sliceagent/sandbox.py +143 -0
- sliceagent/scheduler.py +92 -0
- sliceagent/search_index.py +289 -0
- sliceagent/seed.py +465 -0
- sliceagent/sensory_cortex.py +500 -0
- sliceagent/session.py +222 -0
- sliceagent/skill_provenance.py +71 -0
- sliceagent/skill_usage.py +123 -0
- sliceagent/skills.py +209 -0
- sliceagent/subagent.py +332 -0
- sliceagent/subdir_hints.py +222 -0
- sliceagent/swap.py +182 -0
- sliceagent/taskstate.py +57 -0
- sliceagent/telemetry.py +59 -0
- sliceagent/terminal.py +240 -0
- sliceagent/text_utils.py +56 -0
- sliceagent/tool_summary.py +93 -0
- sliceagent/tools.py +1194 -0
- sliceagent/tui.py +1377 -0
- sliceagent/web.py +354 -0
- sliceagent-0.1.0.dist-info/METADATA +262 -0
- sliceagent-0.1.0.dist-info/RECORD +71 -0
- sliceagent-0.1.0.dist-info/WHEEL +4 -0
- sliceagent-0.1.0.dist-info/entry_points.txt +2 -0
- sliceagent-0.1.0.dist-info/licenses/LICENSE +21 -0
sliceagent/tools.py
ADDED
|
@@ -0,0 +1,1194 @@
|
|
|
1
|
+
"""LocalToolHost — the default ToolHost.
|
|
2
|
+
|
|
3
|
+
Safe execution lives here: file ops are confined to the workspace root (no path
|
|
4
|
+
traversal out of it), and shell runs through a Sandbox backend (sandbox.py) — so
|
|
5
|
+
swapping in a container later never touches the loop. Authorization (which calls
|
|
6
|
+
are allowed at all) is separate: policy.py via the PermissionHook.
|
|
7
|
+
|
|
8
|
+
Note: Python's str.replace is literal, so str_replace has no $-pattern footgun
|
|
9
|
+
(unlike JS).
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
import re
|
|
15
|
+
import shlex
|
|
16
|
+
import tempfile
|
|
17
|
+
|
|
18
|
+
from .access import AllAccess, FileAccess
|
|
19
|
+
from .binsniff import looks_binary
|
|
20
|
+
from .fuzzy import fuzzy_find_unique
|
|
21
|
+
from .procman import ProcManager
|
|
22
|
+
from .registry import ToolEntry, ToolRegistry, ToolText
|
|
23
|
+
from .sandbox import LocalSandbox
|
|
24
|
+
from .sensory_cortex import _is_ignored
|
|
25
|
+
from .terminal import SessionManager
|
|
26
|
+
|
|
27
|
+
# I1 PROVENANCE — host SELF-INFLICTED error sentinels. These name failures caused by the HOST's own
|
|
28
|
+
# guard rails (file-tool confinement, permission denial), NOT by a real bug in the user's code. Lesson
|
|
29
|
+
# mining filters pitfalls whose signature contains one of these so a turn whose only error was the
|
|
30
|
+
# agent hitting its OWN sandbox mines nothing (D2). Lower-cased substrings, matched task-agnostically;
|
|
31
|
+
# defined HERE (the source of these strings) so the denylist tracks the actual error messages.
|
|
32
|
+
HOST_ERROR_SENTINELS = (
|
|
33
|
+
"path escapes the boundary",
|
|
34
|
+
"file tools are confined",
|
|
35
|
+
"permission denied",
|
|
36
|
+
"operation not permitted",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Prepended to every execute_code script: the in-sandbox tool helpers (code-as-action).
|
|
40
|
+
# No imports needed by the model. The workspace is cwd and on sys.path,
|
|
41
|
+
# Strip a leading "cat -n" line-number prefix (" 123\t") from a str_replace snippet pasted back from the
|
|
42
|
+
# numbered OPEN FILES render. Only fires when EVERY non-blank line has one (clearly cat -n output, not real
|
|
43
|
+
# source), so a genuine match is never altered; used as a fallback in _t_str_replace.
|
|
44
|
+
_LINENO_PREFIX = re.compile(r"^[ \t]*\d+\t")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _strip_line_numbers(text: str) -> str:
|
|
48
|
+
lines = text.split("\n")
|
|
49
|
+
nonblank = [ln for ln in lines if ln.strip()]
|
|
50
|
+
if not nonblank or not all(_LINENO_PREFIX.match(ln) for ln in nonblank):
|
|
51
|
+
return text
|
|
52
|
+
return "\n".join(_LINENO_PREFIX.sub("", ln) if ln.strip() else ln for ln in lines)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _number_lines(lines, start: int = 1) -> str:
|
|
56
|
+
"""cat -n number a LIST of lines from `start` (1-based) — ABSOLUTE numbers so a windowed read still
|
|
57
|
+
gives correct file:line evidence."""
|
|
58
|
+
return "\n".join(f"{i:>6}\t{ln}" for i, ln in enumerate(lines, start))
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _numbered(text: str) -> str:
|
|
62
|
+
"""cat -n line numbers for read_file's RETURN, so the model gets file:line evidence IMMEDIATELY in-turn
|
|
63
|
+
(same format as the OPEN FILES render). The number is a display prefix, NOT file content — str_replace
|
|
64
|
+
strips a pasted prefix via _strip_line_numbers, so editing from a numbered read still matches."""
|
|
65
|
+
return _number_lines(text.splitlines(), 1)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
_READ_MAX_LINES = 1500 # default in-slice VIEW cap for read_file; the full file ALWAYS stays on disk (bound the view, not the file)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _coerce_int(v):
|
|
72
|
+
"""Tolerant int() for model-supplied args (str/float/None) — never raises."""
|
|
73
|
+
try:
|
|
74
|
+
return int(v) if v is not None else None
|
|
75
|
+
except (TypeError, ValueError):
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# so `import <workspace_module>` works for testing freshly-written code.
|
|
80
|
+
_CODE_PRELUDE = '''\
|
|
81
|
+
import os as _os, sys as _sys, subprocess as _sp
|
|
82
|
+
_sys.path.insert(0, _os.getcwd())
|
|
83
|
+
|
|
84
|
+
def _confine(path):
|
|
85
|
+
# Confine code-as-action file helpers to the workspace (cwd = workspace root in the sandbox). Without
|
|
86
|
+
# this, an absolute path or ../ escape let execute_code read/write outside allowed_roots, bypassing the
|
|
87
|
+
# file-tool boundary. Shell (run_command) stays unconfined by design; these in-code helpers do not.
|
|
88
|
+
_p = _os.path.realpath(path)
|
|
89
|
+
_root = _os.path.realpath(_os.getcwd())
|
|
90
|
+
if _p != _root and not _p.startswith(_root + _os.sep):
|
|
91
|
+
raise PermissionError(f"path escapes the boundary: {path} (use run_command for paths outside it)")
|
|
92
|
+
return path
|
|
93
|
+
|
|
94
|
+
def read_file(path):
|
|
95
|
+
with open(_confine(path), encoding="utf-8") as _f: return _f.read()
|
|
96
|
+
|
|
97
|
+
def write_file(path, content):
|
|
98
|
+
path = _confine(path)
|
|
99
|
+
_d = _os.path.dirname(path)
|
|
100
|
+
if _d: _os.makedirs(_d, exist_ok=True)
|
|
101
|
+
with open(path, "w", encoding="utf-8", newline="") as _f: _f.write(content)
|
|
102
|
+
if content[:2] == "#!": # a shebang script should be runnable (parity with the edit_file tool)
|
|
103
|
+
try: _os.chmod(path, _os.stat(path).st_mode | 0o111)
|
|
104
|
+
except OSError: pass
|
|
105
|
+
return f"wrote {len(content)} bytes to {path}"
|
|
106
|
+
|
|
107
|
+
def append_file(path, content):
|
|
108
|
+
path = _confine(path)
|
|
109
|
+
_d = _os.path.dirname(path)
|
|
110
|
+
if _d: _os.makedirs(_d, exist_ok=True)
|
|
111
|
+
with open(path, "a", encoding="utf-8", newline="") as _f: _f.write(content)
|
|
112
|
+
return f"appended {len(content)} bytes to {path}"
|
|
113
|
+
|
|
114
|
+
def str_replace(path, old, new):
|
|
115
|
+
path = _confine(path)
|
|
116
|
+
with open(path, encoding="utf-8", newline="") as _f: _cur = _f.read()
|
|
117
|
+
_n = _cur.count(old)
|
|
118
|
+
if _n != 1: return (f"error: old_string occurs {_n}x in {path} (need exactly 1) — "
|
|
119
|
+
f"add surrounding lines to make it unique, or write_file the whole file")
|
|
120
|
+
with open(path, "w", encoding="utf-8", newline="") as _f: _f.write(_cur.replace(old, new, 1))
|
|
121
|
+
return f"replaced 1 occurrence in {path}"
|
|
122
|
+
|
|
123
|
+
def list_files(path="."):
|
|
124
|
+
return sorted(_os.listdir(_confine(path)))
|
|
125
|
+
|
|
126
|
+
def run(cmd, timeout=60):
|
|
127
|
+
_r = _sp.run(cmd, shell=True, capture_output=True, text=True, timeout=timeout)
|
|
128
|
+
_o = (_r.stdout or "") + (_r.stderr or "")
|
|
129
|
+
return _o if _r.returncode == 0 else f"[exit {_r.returncode}]\\n{_o}"
|
|
130
|
+
'''
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _fn(name: str, desc: str, props: dict, req: list[str]) -> dict:
|
|
134
|
+
return {
|
|
135
|
+
"type": "function",
|
|
136
|
+
"function": {"name": name, "description": desc,
|
|
137
|
+
"parameters": {"type": "object", "properties": props, "required": req}},
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# The FINDINGS-capture seam. Every tool call carries a 'note' — the model's distilled conclusion
|
|
142
|
+
# for this turn. It rides on the call the model is ALREADY making (no extra round-trip, unlike a
|
|
143
|
+
# dedicated note tool) and is folded into the slice's FINDINGS tier. This is how a Markov/slice
|
|
144
|
+
# agent gives a REASONING model its own prior conclusions back: the slice has no transcript, so
|
|
145
|
+
# without it the model re-derives the situation each turn (big reasoning bursts → slow). Reasoning
|
|
146
|
+
# models (e.g. deepseek) emit empty message content while tool-calling, so a tool ARG — not message
|
|
147
|
+
# text — is the only reliable capture point.
|
|
148
|
+
NOTE_PROP = {
|
|
149
|
+
"note": {
|
|
150
|
+
"type": "string",
|
|
151
|
+
"description": ("Optional — usually leave EMPTY. Fill ONLY when this call established a NEW durable FACT "
|
|
152
|
+
"(root cause, a confirmed fix, a ruled-out hypothesis, or 'task done'), in <=15 words — a "
|
|
153
|
+
"conclusion, NOT the action you're taking. Saved across turns so you never re-derive it; "
|
|
154
|
+
"routine reads/edits need no note."),
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def with_note(schema: dict) -> dict:
|
|
160
|
+
"""Inject the 'note' arg (first, OPTIONAL) into a tool schema — the FINDINGS capture seam.
|
|
161
|
+
Applied to EVERY tool the model sees, regardless of source (builtin/MCP/plugin/skill).
|
|
162
|
+
Optional, not required: the model writes it only when it has a genuine durable fact, so the
|
|
163
|
+
tier fills with conclusions — not the action-narration that forcing a note on every call
|
|
164
|
+
produces (and which can self-reinforce loops)."""
|
|
165
|
+
fn = schema.get("function") or {}
|
|
166
|
+
params = fn.get("parameters") or {"type": "object", "properties": {}, "required": []}
|
|
167
|
+
props = {**NOTE_PROP, **(params.get("properties") or {})}
|
|
168
|
+
req = [r for r in (params.get("required") or []) if r != "note"]
|
|
169
|
+
return {**schema, "function": {**fn, "parameters": {**params, "properties": props, "required": req}}}
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# _IGNORE_NAMES/_IGNORE_SUFFIX/_is_ignored (the ignore-aware directory-walk primitive shared with
|
|
173
|
+
# repo_map) now live in sensory_cortex.py — "ignore-aware walking" is itself a SENSORY CORTEX concern
|
|
174
|
+
# (perception of the live filesystem). Imported at the top of this file for _t_list_files's own use below.
|
|
175
|
+
_LIST_CAP = 600 # bound recursive output so a huge tree can't flood the slice
|
|
176
|
+
|
|
177
|
+
# Tool-output PAGE-OUT (#74): a single tool result larger than this is written to a blob under
|
|
178
|
+
# .sliceagent/blobs and replaced inline by a BOUNDED head+tail view + a read_file reference — L1→L2 paging,
|
|
179
|
+
# NOT a cut (the full output is preserved on disk and recall-on-demand). Keeps one huge run_command /
|
|
180
|
+
# execute_code / terminal_read result from flooding the within-turn transcript and forcing coarse overflow.
|
|
181
|
+
_OUTPUT_INLINE_CAP = 16000
|
|
182
|
+
_OUTPUT_HEAD = 10000
|
|
183
|
+
_OUTPUT_TAIL = 4000
|
|
184
|
+
|
|
185
|
+
# Drop C0/C1 control bytes (keep \t \n \r) + DEL from a paged-out output, so (a) the blob is PLAIN TEXT
|
|
186
|
+
# and read_file's binary gate won't hexdump it on page-back, and (b) a stray NUL can't break the API call
|
|
187
|
+
# when the bounded head+tail rides the transcript. Only applied on the paged path (large outputs).
|
|
188
|
+
_CONTROL_DROP = {c: None for c in range(0x20) if c not in (0x09, 0x0a, 0x0d)}
|
|
189
|
+
_CONTROL_DROP[0x7f] = None
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _strip_control(s: str) -> str:
|
|
193
|
+
return s.translate(_CONTROL_DROP)
|
|
194
|
+
# Credential/secret dirs the shell-path auto-grant (#31) must never widen file-tool reach into.
|
|
195
|
+
_SECRET_DIRS = {".ssh", ".aws", ".gnupg", ".gpg", ".kube", ".docker", ".config", "keyrings", ".password-store"}
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
TOOL_SCHEMAS = [
|
|
199
|
+
_fn("read_file",
|
|
200
|
+
"Read a file's contents with cat -n line numbers for reference (the leading number is NOT part of the "
|
|
201
|
+
"file, so don't include it in a str_replace old_string). A large file returns a bounded window with a "
|
|
202
|
+
"<system> footer giving the total line count and how to page; pass `offset` (1-based start line) and/or "
|
|
203
|
+
"`limit` (max lines) to read a specific range. To list a directory use list_files; to SEARCH file "
|
|
204
|
+
"contents use the `grep` tool (ripgrep-backed) — not bash grep. "
|
|
205
|
+
"Arg `path` is workspace-relative or absolute but confined to the workspace — for outside paths use "
|
|
206
|
+
"run_command. A binary file returns a hexdump preview, not editable text.",
|
|
207
|
+
{"path": {"type": "string"},
|
|
208
|
+
"offset": {"type": "integer", "description": "1-based first line to read (optional)"},
|
|
209
|
+
"limit": {"type": "integer", "description": "max number of lines to return (optional)"}},
|
|
210
|
+
["path"]),
|
|
211
|
+
_fn("list_files",
|
|
212
|
+
"List directory entries (ignore-aware: skips .git/.venv/caches/build/node_modules noise). Use to "
|
|
213
|
+
"discover what exists; use read_file for a file's CONTENTS and the `grep` tool (ripgrep-backed) to "
|
|
214
|
+
"SEARCH text. Pass recursive=true to map a whole subtree in ONE call (flat file paths, capped at 600 — "
|
|
215
|
+
"pass a subdir to narrow) — PREFER this over shell `find` for a clean cache-free map.",
|
|
216
|
+
{"path": {"type": "string"}, "recursive": {"type": "boolean"}}, []),
|
|
217
|
+
_fn("edit_file",
|
|
218
|
+
"Create a new file, or OVERWRITE an existing file's ENTIRE contents with `content` (the complete text); "
|
|
219
|
+
"parent dirs are auto-created and a leading `#!` shebang makes it executable. To change PART of an "
|
|
220
|
+
"existing file use str_replace; to add to its end use append_to_file. Do NOT use edit_file to tweak a "
|
|
221
|
+
"file — it discards all current content.",
|
|
222
|
+
{"path": {"type": "string"}, "content": {"type": "string"}}, ["path", "content"]),
|
|
223
|
+
_fn("append_to_file",
|
|
224
|
+
"Append `content` verbatim to the END of a file (creates it + parent dirs if missing) — the only writer "
|
|
225
|
+
"that ADDS without touching existing content. Use str_replace to modify text already in the file, "
|
|
226
|
+
"edit_file to replace the whole file. No newline is added — include a leading '\\n' yourself if needed.",
|
|
227
|
+
{"path": {"type": "string"}, "content": {"type": "string"}}, ["path", "content"]),
|
|
228
|
+
_fn("code_review",
|
|
229
|
+
"Review code changes: returns the `git diff` for the workspace (default vs HEAD; pass `ref` for a "
|
|
230
|
+
"branch / commit / range like 'main', 'HEAD~3', or 'main...HEAD') so you can audit the changes for "
|
|
231
|
+
"correctness, security, and edge cases — cite file:line for each issue you find. Read-only; needs a "
|
|
232
|
+
"git repo. Prefer this over piecing a review together from many read_file calls.",
|
|
233
|
+
{"ref": {"type": "string"}}, []),
|
|
234
|
+
_fn("str_replace",
|
|
235
|
+
"Make a SURGICAL edit to an EXISTING file — replace one snippet, leave the rest. The default for "
|
|
236
|
+
"changing a file you've read. `old_string` should be the SMALLEST unique snippet — usually 2-4 adjacent "
|
|
237
|
+
"lines, not 10+. It must identify exactly ONE place: more than one occurrence is rejected (add "
|
|
238
|
+
"surrounding context, or pass replace_all=true to change EVERY occurrence); an exact match is used, "
|
|
239
|
+
"else a unique whitespace-tolerant fuzzy match. If old_string isn't found the file may be STALE — "
|
|
240
|
+
"re-read it and copy the current text rather than retrying the same edit; for a bigger change use edit_file.",
|
|
241
|
+
{"path": {"type": "string"}, "old_string": {"type": "string"}, "new_string": {"type": "string"},
|
|
242
|
+
"replace_all": {"type": "boolean", "description": "replace ALL occurrences (default false: a >1 match is rejected)"}},
|
|
243
|
+
["path", "old_string", "new_string"]),
|
|
244
|
+
_fn("run_command",
|
|
245
|
+
"Run a shell command (blocking, cwd=workspace root); returns combined stdout+stderr (exit code on "
|
|
246
|
+
"failure). Pass timeout (seconds, default 30, max 600) for slow builds. Use for one-shot commands that "
|
|
247
|
+
"finish; for a process that must STAY alive use proc_start, for an interactive REPL use terminal_open, "
|
|
248
|
+
"to chain several edits + a test in one turn use execute_code. No cwd arg — prepend `cd DIR &&`. The "
|
|
249
|
+
"shell is unconfined (can reach outside the workspace, unlike the file tools). If a command could "
|
|
250
|
+
"emit a LARGE dump (disassembly, a long log, a dataset), FILTER it in the command itself — pipe "
|
|
251
|
+
"through grep/head/tail/sed -n or target a range — so only the relevant slice returns.",
|
|
252
|
+
{"command": {"type": "string"}, "timeout": {"type": "number"}}, ["command"]),
|
|
253
|
+
_fn("execute_code",
|
|
254
|
+
"Run a Python script that does SEVERAL file/shell steps in ONE turn (e.g. multiple edits + a test). Use "
|
|
255
|
+
"over run_command when you'd chain many calls; over proc_start when it's one-shot (blocking, ~30s). "
|
|
256
|
+
"Helpers (no imports): read_file(path), write_file(path, content), append_file(path, content), "
|
|
257
|
+
"str_replace(path, old, new), list_files(path='.'), run(shell_cmd). Workspace is cwd + on sys.path. ONLY "
|
|
258
|
+
"what you print() is returned. The file helpers are workspace-confined — use run() (shell) for outside paths.",
|
|
259
|
+
{"code": {"type": "string"}}, ["code"]),
|
|
260
|
+
_fn("ask_user",
|
|
261
|
+
"Ask the user a concise follow-up question and WAIT for their answer (returned to you). Use this "
|
|
262
|
+
"whenever you are UNSURE or the request is AMBIGUOUS, or when you have FAILED / been blocked and don't "
|
|
263
|
+
"know how to proceed — instead of guessing or repeating a failing action; prefer just answering in text "
|
|
264
|
+
"when you can infer intent. Give a few short 'options' for multiple-choice, or omit for open-ended. In "
|
|
265
|
+
"headless/eval runs there is no interactive user — it returns a fallback telling you to proceed with a "
|
|
266
|
+
"stated assumption, so never loop waiting on it.",
|
|
267
|
+
{"question": {"type": "string"},
|
|
268
|
+
"options": {"type": "array", "items": {"type": "string"}}}, ["question"]),
|
|
269
|
+
_fn("proc_start",
|
|
270
|
+
"Start a LONG-RUNNING / background process (a server, a watcher, a multi-minute build) and return a "
|
|
271
|
+
"handle (p1, p2, …) immediately; it keeps running across turns. Use over run_command when the process "
|
|
272
|
+
"must outlive the turn, over terminal_open when you only launch-and-probe (it gets no stdin). It does "
|
|
273
|
+
"NOT confirm the process started — one that instantly dies still returns a handle — so "
|
|
274
|
+
"proc_poll/proc_tail to check status and proc_kill to stop.",
|
|
275
|
+
{"command": {"type": "string"}}, ["command"]),
|
|
276
|
+
_fn("proc_poll", "Check a background process by handle: 'running' or 'exited <code>'.",
|
|
277
|
+
{"handle": {"type": "string"}}, ["handle"]),
|
|
278
|
+
_fn("proc_tail", "Read recent output (stdout+stderr) of a background process.",
|
|
279
|
+
{"handle": {"type": "string"}, "lines": {"type": "number"}}, ["handle"]),
|
|
280
|
+
_fn("proc_wait",
|
|
281
|
+
"Wait up to timeout seconds for a background process to exit; returns its status + recent output.",
|
|
282
|
+
{"handle": {"type": "string"}, "timeout": {"type": "number"}}, ["handle"]),
|
|
283
|
+
_fn("proc_kill", "Terminate a background process and its child group.",
|
|
284
|
+
{"handle": {"type": "string"}}, ["handle"]),
|
|
285
|
+
_fn("terminal_open",
|
|
286
|
+
"Open a persistent interactive PTY session for anything needing a LIVE terminal across turns: a "
|
|
287
|
+
"REPL/text-game/TUI, answering successive prompts, or holding shell state (cd/export/venv). Unlike "
|
|
288
|
+
"proc_start (no stdin) or run_command (one-shot), you drive it with terminal_send/terminal_wait/"
|
|
289
|
+
"terminal_read and end with terminal_close. Omit command for a shell, or pass one (e.g. 'python3 -i -q'); "
|
|
290
|
+
"'session' names it (default 'main'). Don't reopen an already-open session name — close it first.",
|
|
291
|
+
{"session": {"type": "string"}, "command": {"type": "string"}}, []),
|
|
292
|
+
_fn("terminal_send",
|
|
293
|
+
"Send input to a terminal session. By default a newline is appended (sends a line). Set "
|
|
294
|
+
"enter=false to send raw keys without a newline (e.g. a control char like '\\u0003' for Ctrl-C, "
|
|
295
|
+
"or an escape sequence). Returns the immediate echo/output.",
|
|
296
|
+
{"session": {"type": "string"}, "input": {"type": "string"}, "enter": {"type": "boolean"}},
|
|
297
|
+
["input"]),
|
|
298
|
+
_fn("terminal_read", "Read the output a terminal session has produced (drains the live stream).",
|
|
299
|
+
{"session": {"type": "string"}, "timeout": {"type": "number"}}, []),
|
|
300
|
+
_fn("terminal_wait",
|
|
301
|
+
"Wait until a regex pattern appears in a terminal session's output (or timeout) — the reliable "
|
|
302
|
+
"way to sync: send a command, then wait for its prompt/result before sending the next.",
|
|
303
|
+
{"session": {"type": "string"}, "until": {"type": "string"}, "timeout": {"type": "number"}},
|
|
304
|
+
["until"]),
|
|
305
|
+
_fn("terminal_close", "Close a terminal session and kill its process group.",
|
|
306
|
+
{"session": {"type": "string"}}, []),
|
|
307
|
+
_fn("world_set",
|
|
308
|
+
"Save DURABLE task state to your WORLD MODEL under a key (overwrites that key). Use it to maintain "
|
|
309
|
+
"non-code state across turns: an explored maze map, a game's rooms+inventory, a system "
|
|
310
|
+
"inventory, a running plan. It appears in the WORLD MODEL section of your context from your NEXT "
|
|
311
|
+
"turn on; within THIS turn, re-read a value from your own world_set call above. value may be multiline.",
|
|
312
|
+
{"key": {"type": "string"}, "value": {"type": "string"}}, ["key", "value"]),
|
|
313
|
+
_fn("world_clear", "Remove a key from your WORLD MODEL (omit key to clear all of it).",
|
|
314
|
+
{"key": {"type": "string"}}, []),
|
|
315
|
+
_fn("require",
|
|
316
|
+
"Record a STANDING REQUIREMENT that must HOLD when the task is done — an exact name/signature, an "
|
|
317
|
+
"output format, a stated rule, or a constraint the user adds. It joins your STANDING REQUIREMENTS "
|
|
318
|
+
"contract (shown every turn from your next turn on, and the bar for 'done'). Record only DURABLE "
|
|
319
|
+
"constraints, never transient sub-steps or chit-chat; re-recording the same one is a no-op.",
|
|
320
|
+
{"text": {"type": "string"}}, ["text"]),
|
|
321
|
+
_fn("requirement_done",
|
|
322
|
+
"Mark a STANDING REQUIREMENT satisfied (after verifying it against the real end-state). It stays "
|
|
323
|
+
"shown as '[x] done' so it is not re-flagged but not forgotten. `text` must match the requirement.",
|
|
324
|
+
{"text": {"type": "string"}}, ["text"]),
|
|
325
|
+
_fn("drop_requirement",
|
|
326
|
+
"Remove a STANDING REQUIREMENT the user RETRACTED or that no longer applies. `text` must match.",
|
|
327
|
+
{"text": {"type": "string"}}, ["text"]),
|
|
328
|
+
_fn("update_plan",
|
|
329
|
+
"Maintain an ordered PLAN (a TODO list) for a multi-step task. Pass the COMPLETE list of steps "
|
|
330
|
+
"every time — it REPLACES the previous plan. Keep exactly ONE step 'in_progress'; mark each 'done' "
|
|
331
|
+
"as you finish it. The plan shows in your PLAN section across turns so progress survives and the "
|
|
332
|
+
"user can follow along. Use it for non-trivial multi-step work; skip it for a single action.",
|
|
333
|
+
{"steps": {"type": "array", "description": "the full ordered step list (replaces the prior plan)",
|
|
334
|
+
"items": {"type": "object", "properties": {
|
|
335
|
+
"step": {"type": "string", "description": "one concrete step, imperative"},
|
|
336
|
+
"status": {"type": "string", "enum": ["pending", "in_progress", "done"]}},
|
|
337
|
+
"required": ["step", "status"]}}},
|
|
338
|
+
["steps"]),
|
|
339
|
+
_fn("set_mission",
|
|
340
|
+
"Set your MISSION — the overarching NORTH-STAR objective for a long multi-step task (the 'why'), "
|
|
341
|
+
"shown at the top of your context every turn so you stay oriented across many steps. Set it once at "
|
|
342
|
+
"the start of a substantial task; it is ABOVE the literal task and your step plan. Re-setting "
|
|
343
|
+
"replaces it. Skip it for quick one-off requests.",
|
|
344
|
+
{"text": {"type": "string"}}, ["text"]),
|
|
345
|
+
_fn("mission_done", "Clear your MISSION once the overarching objective is achieved (it stops showing).",
|
|
346
|
+
{}, []),
|
|
347
|
+
]
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def _default_ask_user(question: str, options) -> str:
|
|
351
|
+
"""Fallback when no interactive user is wired (headless/eval) — never hangs."""
|
|
352
|
+
return ("(no interactive user is available to answer; proceed with your best assumption and "
|
|
353
|
+
"STATE it explicitly, or stop with a clear summary of what you need)")
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def _sniff_image_mime(raw: bytes) -> str | None:
|
|
357
|
+
"""Identify an image by MAGIC BYTES (not extension). Returns the MIME type or None if not an image."""
|
|
358
|
+
if raw[:8] == b"\x89PNG\r\n\x1a\n":
|
|
359
|
+
return "image/png"
|
|
360
|
+
if raw[:3] == b"\xff\xd8\xff":
|
|
361
|
+
return "image/jpeg"
|
|
362
|
+
if raw[:6] in (b"GIF87a", b"GIF89a"):
|
|
363
|
+
return "image/gif"
|
|
364
|
+
if raw[:4] == b"RIFF" and raw[8:12] == b"WEBP":
|
|
365
|
+
return "image/webp"
|
|
366
|
+
if raw[:2] == b"BM":
|
|
367
|
+
return "image/bmp"
|
|
368
|
+
return None
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def _numbered_window(text: str, start_line: int, end_line: int, *, ctx: int = 4, cap: int = 40) -> str:
|
|
372
|
+
"""A cat -n numbered snippet of `text` around [start_line..end_line] (0-based), ±ctx lines, capped at
|
|
373
|
+
`cap`. Edit tools echo this POST-EDIT region back in their result so the model sees the file's CURRENT
|
|
374
|
+
state in-transcript — the within-turn analog of the OPEN FILES tier (the seed is frozen mid-turn, so the
|
|
375
|
+
live view must ride the tool results). Bounded by construction; never the whole file."""
|
|
376
|
+
lines = text.replace("\r\n", "\n").split("\n")
|
|
377
|
+
if lines and lines[-1] == "":
|
|
378
|
+
lines = lines[:-1] # drop the trailing empty from a final newline
|
|
379
|
+
a = max(0, start_line - ctx)
|
|
380
|
+
b = min(len(lines), max(end_line + 1 + ctx, a + 1))
|
|
381
|
+
b = min(b, a + cap)
|
|
382
|
+
snippet = "\n".join(f"{i:>6}\t{ln}" for i, ln in enumerate(lines[a:b], a + 1)) # cat -n, absolute line nums
|
|
383
|
+
if b < len(lines):
|
|
384
|
+
snippet += f"\n … (+{len(lines) - b} more lines)"
|
|
385
|
+
return snippet
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
class LocalToolHost:
|
|
389
|
+
def __init__(self, root: str | None = None, *, sandbox=None, timeout: int = 30,
|
|
390
|
+
registry: ToolRegistry | None = None):
|
|
391
|
+
# root=None → confine to the *current* working directory, resolved per call
|
|
392
|
+
# (so the eval runner, which chdirs into a temp workdir after construction,
|
|
393
|
+
# is confined to that workdir). Pass an explicit root to pin it.
|
|
394
|
+
self._root = root
|
|
395
|
+
self.timeout = timeout
|
|
396
|
+
self.sandbox = sandbox or LocalSandbox()
|
|
397
|
+
# Background/long-running processes — the live-handle registry the one-shot sandbox can't
|
|
398
|
+
# express (servers, multi-minute builds). Scrubs secrets like the sandbox; cleanup() at exit.
|
|
399
|
+
_scrub = getattr(self.sandbox, "scrub_secrets", True)
|
|
400
|
+
self.procs = ProcManager(scrub_secrets=_scrub)
|
|
401
|
+
# Interactive PTY sessions — drive REPLs/TUIs/games, hold shell+env across turns.
|
|
402
|
+
self.terminals = SessionManager(scrub_secrets=_scrub)
|
|
403
|
+
# I2 — RE-OBSERVATION REACH = ACTION REACH. File tools and shell must reach the
|
|
404
|
+
# SAME places, or the agent writes (via shell, unconfined) files its file tools can
|
|
405
|
+
# never read back, and OPEN FILES lies "(not created yet)" about real on-disk files.
|
|
406
|
+
# `_extra_roots` holds dirs the goal/user EXPLICITLY targets (added via add_root):
|
|
407
|
+
# _resolve accepts a path under the workspace root OR any extra root. Explicit and
|
|
408
|
+
# bounded — never a blanket '/'; the workspace stays the default and only the launch
|
|
409
|
+
# dir is implicit. Task-agnostic (we don't parse the goal) and safe (opt-in).
|
|
410
|
+
self._extra_roots: list[str] = []
|
|
411
|
+
self._focus: str | None = None # most-recently-worked EXTERNAL dir → the active focus (slice-surfaced)
|
|
412
|
+
# ask_user (the "come back and ask" capability): a host callback that prompts the real user and
|
|
413
|
+
# returns their answer. Defaults to a non-interactive fallback so headless/eval never hangs; the
|
|
414
|
+
# CLI overrides it with a TUI/plain prompt. Injected (not a core dependency) — task/LLM-agnostic.
|
|
415
|
+
self.on_ask_user = _default_ask_user
|
|
416
|
+
self._edit_journal: list = [] # (rel, full, prev_bytes|None) per write — powers /undo
|
|
417
|
+
self.pending_images: list = [] # images @-attached for the NEXT seed build (vision models only)
|
|
418
|
+
# The registry is the single source of tools; MCP/plugin/skill tools register
|
|
419
|
+
# into this same object later (Step ③). The host just projects from it.
|
|
420
|
+
self.registry = registry or ToolRegistry()
|
|
421
|
+
self._register_builtins()
|
|
422
|
+
import atexit
|
|
423
|
+
atexit.register(self.cleanup) # leaked background procs / PTYs must not survive exit/abort/crash
|
|
424
|
+
|
|
425
|
+
def cleanup(self) -> None:
|
|
426
|
+
"""Tear down background processes + PTY sessions (idempotent; never raises). Wired to atexit AND
|
|
427
|
+
called by the CLI on exit/abort, so leaked servers/shells/PTYs don't outlive the agent (#5)."""
|
|
428
|
+
for _mgr in (getattr(self, "procs", None), getattr(self, "terminals", None)):
|
|
429
|
+
try:
|
|
430
|
+
if _mgr is not None:
|
|
431
|
+
_mgr.cleanup()
|
|
432
|
+
except Exception: # noqa: BLE001
|
|
433
|
+
pass
|
|
434
|
+
|
|
435
|
+
def _register_builtins(self) -> None:
|
|
436
|
+
handlers = {
|
|
437
|
+
"read_file": self._t_read_file, "list_files": self._t_list_files,
|
|
438
|
+
"edit_file": self._t_edit_file, "append_to_file": self._t_append,
|
|
439
|
+
"str_replace": self._t_str_replace, "run_command": self._t_run_command,
|
|
440
|
+
"execute_code": self._t_execute_code, "ask_user": self._t_ask_user,
|
|
441
|
+
"proc_start": self._t_proc_start, "proc_poll": self._t_proc_poll,
|
|
442
|
+
"proc_tail": self._t_proc_tail, "proc_wait": self._t_proc_wait,
|
|
443
|
+
"proc_kill": self._t_proc_kill,
|
|
444
|
+
"terminal_open": self._t_terminal_open, "terminal_send": self._t_terminal_send,
|
|
445
|
+
"terminal_read": self._t_terminal_read, "terminal_wait": self._t_terminal_wait,
|
|
446
|
+
"terminal_close": self._t_terminal_close,
|
|
447
|
+
"world_set": self._t_world_set, "world_clear": self._t_world_clear,
|
|
448
|
+
"require": self._t_require, "requirement_done": self._t_requirement_done,
|
|
449
|
+
"drop_requirement": self._t_drop_requirement, "update_plan": self._t_update_plan,
|
|
450
|
+
"set_mission": self._t_set_mission, "mission_done": self._t_mission_done,
|
|
451
|
+
"code_review": self._t_code_review,
|
|
452
|
+
}
|
|
453
|
+
for schema in TOOL_SCHEMAS:
|
|
454
|
+
name = schema["function"]["name"]
|
|
455
|
+
self.registry.register(ToolEntry(
|
|
456
|
+
name=name, schema=schema, handler=handlers[name],
|
|
457
|
+
accesses=(lambda args, n=name: self._builtin_accesses(n, args)),
|
|
458
|
+
source="builtin",
|
|
459
|
+
))
|
|
460
|
+
|
|
461
|
+
def root(self) -> str:
|
|
462
|
+
return os.path.realpath(self._root or os.getcwd())
|
|
463
|
+
|
|
464
|
+
def add_root(self, path: str) -> str | None:
|
|
465
|
+
"""Mark a directory the goal/user EXPLICITLY targets as in-reach for file tools.
|
|
466
|
+
|
|
467
|
+
The minimal, safe, task-agnostic mechanism for "explicitly-targeted dir" (I2): a
|
|
468
|
+
SETTABLE root, not goal-parsing heuristics. After this, read_file/edit_file/list_files
|
|
469
|
+
resolve paths under `path` exactly as the shell already does (shell is unconfined),
|
|
470
|
+
so a shell-written file is always readable back through OPEN FILES — reach matches.
|
|
471
|
+
Refuses a blanket root ('/' or '~') so the workspace boundary is never erased.
|
|
472
|
+
Returns the realpath added (idempotent), or None if rejected/unusable."""
|
|
473
|
+
if not path:
|
|
474
|
+
return None
|
|
475
|
+
full = os.path.realpath(os.path.expanduser(path))
|
|
476
|
+
# never widen reach to the whole filesystem or the bare home dir
|
|
477
|
+
if full == os.sep or full == os.path.realpath(os.path.expanduser("~")):
|
|
478
|
+
return None
|
|
479
|
+
if full == self.root() or full in self._extra_roots:
|
|
480
|
+
return full
|
|
481
|
+
self._extra_roots.append(full)
|
|
482
|
+
return full
|
|
483
|
+
|
|
484
|
+
def allowed_roots(self) -> list[str]:
|
|
485
|
+
"""The set of dirs file tools may reach: the workspace root ∪ explicitly-targeted dirs.
|
|
486
|
+
Honored by `_resolve`; matches where the shell already acts (I2: reach = action reach)."""
|
|
487
|
+
roots = [self.root()]
|
|
488
|
+
for r in self._extra_roots:
|
|
489
|
+
if r not in roots:
|
|
490
|
+
roots.append(r)
|
|
491
|
+
return roots
|
|
492
|
+
|
|
493
|
+
def focus(self) -> tuple[str | None, list[str]]:
|
|
494
|
+
"""The active focus (most-recently-worked EXTERNAL dir) + every extra root the file tools reach
|
|
495
|
+
beyond the workspace. Surfaced in the slice so the model KNOWS its file tools reach there: the
|
|
496
|
+
auto-granted reach was invisible, so the agent defaulted to the workspace frame and lost the
|
|
497
|
+
thread across turns (the hunter 'index.ts' miss). Delegated by SubagentHost via __getattr__."""
|
|
498
|
+
return self._focus, list(self._extra_roots)
|
|
499
|
+
|
|
500
|
+
def resolution_base(self) -> str:
|
|
501
|
+
"""The CURRENT PROJECT a bare RELATIVE path resolves against — the frame, not the floor. Defaults
|
|
502
|
+
to the active focus (the most-recent dir worked in) when set, else the boundary root. This ONLY
|
|
503
|
+
moves the relative-path anchor + display frame; it NEVER widens reach: the result of `_resolve`
|
|
504
|
+
must still land inside `allowed_roots()`, and the immutable boundary root is unchanged. So the
|
|
505
|
+
'current project' can roam over the authorized dirs while the floor it sits on never moves."""
|
|
506
|
+
base = self._focus or self.root()
|
|
507
|
+
# defensive: the base must itself be an authorized root (focus is only ever set to a granted dir)
|
|
508
|
+
return base if base in self.allowed_roots() else self.root()
|
|
509
|
+
|
|
510
|
+
def locate(self, path: str) -> str:
|
|
511
|
+
"""Resolve a working-set path for RE-READING (OPEN FILES). Base-STABLE — independent of the current
|
|
512
|
+
project: a relative path is matched against EVERY authorized root (boundary root first, then extra
|
|
513
|
+
roots) and the first EXISTING match wins, so a pin stays truthful even after `resolution_base()`
|
|
514
|
+
moves. Falls back to the boundary-root resolution when nothing exists, so the truthful
|
|
515
|
+
'(not created yet)' / 'outside reach' branch in build_artifacts still fires per exception type."""
|
|
516
|
+
expanded = os.path.expanduser(path)
|
|
517
|
+
if os.path.isabs(expanded):
|
|
518
|
+
return self._resolve(path) # absolute → _resolve enforces the boundary
|
|
519
|
+
for r in self.allowed_roots():
|
|
520
|
+
cand = os.path.realpath(os.path.join(r, expanded))
|
|
521
|
+
if (cand == r or cand.startswith(r + os.sep)) and os.path.exists(cand):
|
|
522
|
+
return cand
|
|
523
|
+
# nothing exists under any root → a boundary-SAFE truthful-404 path. realpath + confine so a relative
|
|
524
|
+
# '../x' can't resolve to a real file OUTSIDE the boundary when read_file opens it (confinement).
|
|
525
|
+
root = self.root()
|
|
526
|
+
fallback = os.path.realpath(os.path.join(root, expanded))
|
|
527
|
+
if fallback == root or fallback.startswith(root + os.sep):
|
|
528
|
+
return fallback
|
|
529
|
+
return self._resolve(path) # escapes the boundary → raise (same as the file tools)
|
|
530
|
+
|
|
531
|
+
def _grant_shell_paths(self, text: str) -> None:
|
|
532
|
+
"""I2 — reach FOLLOWS action. When the shell acts on a path outside the allowed roots,
|
|
533
|
+
grant file-tool reach to its directory so a shell-written file is ALWAYS readable back via
|
|
534
|
+
OPEN FILES. No NEW capability — the shell already reaches there; this only lets the file
|
|
535
|
+
tools observe it (the original split-brain: writes it could never read back). Restricted to
|
|
536
|
+
the user's HOME subtree, never HOME itself or an ancestor of the workspace (add_root also
|
|
537
|
+
refuses '/' and '~'). Pure path detection — task/LLM-agnostic, no command parsing."""
|
|
538
|
+
if not text:
|
|
539
|
+
return
|
|
540
|
+
home = os.path.realpath(os.path.expanduser("~"))
|
|
541
|
+
root = self.root()
|
|
542
|
+
# quoted paths (may contain spaces) OR bare ~/-rooted tokens up to a shell metachar/space
|
|
543
|
+
for q, uq in re.findall(
|
|
544
|
+
r"""['"]([^'"]*/[^'"]*)['"]|(?<![\w'"])((?:~|/)[^\s'"|&;<>()]+)""", text):
|
|
545
|
+
cand = (q or uq).strip()
|
|
546
|
+
if not (cand.startswith("/") or cand.startswith("~")):
|
|
547
|
+
continue
|
|
548
|
+
full = os.path.realpath(os.path.expanduser(cand))
|
|
549
|
+
d = full if os.path.isdir(full) else os.path.dirname(full)
|
|
550
|
+
if not d or not os.path.isdir(d):
|
|
551
|
+
continue
|
|
552
|
+
if not d.startswith(home + os.sep): # only the user's own subtree (excludes HOME itself)
|
|
553
|
+
continue
|
|
554
|
+
if d == root or root.startswith(d + os.sep): # never an ancestor of the workspace
|
|
555
|
+
continue
|
|
556
|
+
# #31: never auto-widen file-tool reach into credential/secret dirs, even inside HOME — a path
|
|
557
|
+
# merely MENTIONED in an allowed shell command must not make ~/.ssh etc. readable by the tools.
|
|
558
|
+
if any(part.lower() in _SECRET_DIRS for part in d.split(os.sep)): # casefold: ~/.SSH == ~/.ssh on a case-insensitive FS (macOS)
|
|
559
|
+
continue
|
|
560
|
+
self.add_root(d)
|
|
561
|
+
self._focus = d # the most-recent external dir the shell worked on → the active focus
|
|
562
|
+
|
|
563
|
+
def resolve_read(self, path: str) -> str:
|
|
564
|
+
"""Resolution shared by read_file AND the OPEN FILES display so they never diverge. Prefer the
|
|
565
|
+
current-project (focus) copy; if nothing exists there, fall back to a base-STABLE search of every
|
|
566
|
+
authorized root (locate). Keeps focus-relative semantics while making a paged-out blob — or any file
|
|
567
|
+
under a root that isn't the current focus — reachable regardless of where focus now points (the
|
|
568
|
+
blob's read_file('.sliceagent/blobs/…') ref was minted against a possibly-different base)."""
|
|
569
|
+
try:
|
|
570
|
+
full = self._resolve(path)
|
|
571
|
+
except (ValueError, PermissionError):
|
|
572
|
+
return self.locate(path)
|
|
573
|
+
if os.path.exists(full):
|
|
574
|
+
return full
|
|
575
|
+
alt = self.locate(path)
|
|
576
|
+
return alt if os.path.exists(alt) else full
|
|
577
|
+
|
|
578
|
+
def _resolve(self, path: str) -> str:
|
|
579
|
+
"""Resolve a tool path under an ALLOWED root (workspace ∪ explicitly-targeted dirs);
|
|
580
|
+
reject escapes. expanduser FIRST so '~' behaves like the shell (P2) instead of
|
|
581
|
+
silently creating a literal '~' dir inside the workspace."""
|
|
582
|
+
if not path:
|
|
583
|
+
raise ValueError("empty path")
|
|
584
|
+
path = os.path.expanduser(path) # P2 — '~' → $HOME before any join/realpath
|
|
585
|
+
roots = self.allowed_roots()
|
|
586
|
+
# A bare relative path resolves against the CURRENT PROJECT (resolution_base), not always the
|
|
587
|
+
# boundary root — so when the agent moves into another authorized project, relative paths follow
|
|
588
|
+
# it. Reach is unchanged: `full` must still land inside an authorized root below.
|
|
589
|
+
base = self.resolution_base()
|
|
590
|
+
full = path if os.path.isabs(path) else os.path.join(base, path)
|
|
591
|
+
full = os.path.realpath(full)
|
|
592
|
+
for root in roots:
|
|
593
|
+
if full == root or full.startswith(root + os.sep):
|
|
594
|
+
return full
|
|
595
|
+
# P3 — prescriptive error: name the boundary AND the escape hatch so a no-transcript
|
|
596
|
+
# model recovers instead of re-deriving the dead end (and looping into shell fallback).
|
|
597
|
+
raise PermissionError(
|
|
598
|
+
f"path escapes the boundary ({base}): {path} — File tools are confined to your "
|
|
599
|
+
"authorized directories (the boundary). To act on paths outside it, use "
|
|
600
|
+
"run_command/execute_code (shell is unconfined), or re-run sliceagent rooted at that directory.")
|
|
601
|
+
|
|
602
|
+
def _resolve_for_access(self, path: str) -> str | None:
|
|
603
|
+
"""Canonical PHYSICAL path for SCHEDULING conflict detection only — NOT a security check (the real
|
|
604
|
+
_resolve enforces the boundary at run time). Mirrors _resolve's expanduser + base-join + realpath
|
|
605
|
+
so 'foo.py', './foo.py', and the absolute spelling collapse to ONE key, and the scheduler then
|
|
606
|
+
serializes concurrent writes to the same inode (otherwise a parallel edit_file + str_replace via
|
|
607
|
+
different spellings race → lost update). Returns None on empty/bad input → caller falls back."""
|
|
608
|
+
if not path:
|
|
609
|
+
return None
|
|
610
|
+
try:
|
|
611
|
+
p = os.path.expanduser(path)
|
|
612
|
+
base = self.resolution_base()
|
|
613
|
+
full = p if os.path.isabs(p) else os.path.join(base, p)
|
|
614
|
+
return os.path.realpath(full)
|
|
615
|
+
except Exception: # noqa: BLE001 — access declaration must never fail the call
|
|
616
|
+
return None
|
|
617
|
+
|
|
618
|
+
# --- ToolHost projection: everything comes from the registry now ---
|
|
619
|
+
def schemas(self) -> list[dict]:
|
|
620
|
+
# inject the 'note' arg into every tool so the model's per-turn conclusion rides on the
|
|
621
|
+
# call it already makes and lands in the slice's FINDINGS tier (anti-re-derivation)
|
|
622
|
+
return [with_note(s) for s in self.registry.schemas()]
|
|
623
|
+
|
|
624
|
+
def accesses(self, name: str, args: dict) -> list:
|
|
625
|
+
return self.registry.accesses(name, args)
|
|
626
|
+
|
|
627
|
+
def run(self, name: str, args: dict) -> str:
|
|
628
|
+
return self.registry.run(name, args) # registry wraps the handler in try/except
|
|
629
|
+
|
|
630
|
+
def read_text(self, path: str, *, lossy: bool = True) -> str:
|
|
631
|
+
# Read bytes first so the binary gate runs BEFORE we trust the file as text.
|
|
632
|
+
# A NUL byte / mostly-control-char head means "not text" — feeding it through
|
|
633
|
+
# OPEN FILES would corrupt the slice and burn tokens. ValueError flows through
|
|
634
|
+
# the registry try/except so both read_file and str_replace degrade gracefully.
|
|
635
|
+
full = self._resolve(path)
|
|
636
|
+
with open(full, "rb") as f:
|
|
637
|
+
raw = f.read()
|
|
638
|
+
sample = raw[:8192].decode("utf-8", errors="replace")
|
|
639
|
+
if looks_binary(path, sample):
|
|
640
|
+
raise ValueError(f"{path} appears to be binary; not shown")
|
|
641
|
+
# DISPLAY callers (read_file / OPEN FILES render) pass lossy=True: a stray invalid UTF-8 byte PAST
|
|
642
|
+
# the 8192-byte sniff sample must not crash an otherwise-text file's read. The READ-MODIFY-WRITE
|
|
643
|
+
# caller (str_replace) passes lossy=False: strict decode RAISES on any invalid byte so the call
|
|
644
|
+
# aborts cleanly (file untouched) instead of writing back a U+FFFD-mangled whole file — silent
|
|
645
|
+
# corruption of bytes the edit never touched.
|
|
646
|
+
return raw.decode("utf-8", errors="replace" if lossy else "strict")
|
|
647
|
+
|
|
648
|
+
def _builtin_accesses(self, name: str, args: dict) -> list:
|
|
649
|
+
"""Declare what each builtin call touches so the scheduler can safely parallelize."""
|
|
650
|
+
p = args.get("path")
|
|
651
|
+
# resolve to the physical path so two spellings of one file conflict (and serialize) correctly
|
|
652
|
+
if name == "read_file":
|
|
653
|
+
rp = self._resolve_for_access(p)
|
|
654
|
+
return [FileAccess("read", rp)] if rp else []
|
|
655
|
+
if name == "list_files":
|
|
656
|
+
d = args.get("path") or "."
|
|
657
|
+
return [FileAccess("search", self._resolve_for_access(d) or d, recursive=True)]
|
|
658
|
+
if name in ("edit_file", "append_to_file", "str_replace"):
|
|
659
|
+
rp = self._resolve_for_access(p)
|
|
660
|
+
return [FileAccess("readwrite", rp)] if rp else [AllAccess()]
|
|
661
|
+
if name in ("run_command", "execute_code", "proc_start", "proc_poll",
|
|
662
|
+
"proc_tail", "proc_wait", "proc_kill", "terminal_open", "terminal_send",
|
|
663
|
+
"terminal_read", "terminal_wait", "terminal_close"):
|
|
664
|
+
return [AllAccess()] # arbitrary / stateful execution → globally exclusive
|
|
665
|
+
return [AllAccess()]
|
|
666
|
+
|
|
667
|
+
# --- builtin tool handlers (args) -> str (the registry catches exceptions) ---
|
|
668
|
+
def _page_out(self, text: str, *, label: str = "output") -> str:
|
|
669
|
+
"""Page a large tool output OUT to a blob and return a BOUNDED head+tail view + a read_file
|
|
670
|
+
reference, instead of inlining the whole thing into the turn transcript. Moat-coherent: the FULL
|
|
671
|
+
output is preserved on disk (recall-on-demand, the L1→L2 page-out), never cut. Best-effort — on a
|
|
672
|
+
write failure it still bounds the inline view with a hard head+tail slice."""
|
|
673
|
+
if not text or len(text) <= _OUTPUT_INLINE_CAP:
|
|
674
|
+
return _strip_control(text) # strip C0/NUL on the SMALL path too — a NUL is valid UTF-8 (errors='replace' won't drop it) and breaks the LLM JSON request
|
|
675
|
+
text = _strip_control(text) # paged path: plain-text blob (read_file page-back works) + API-safe view
|
|
676
|
+
if len(text) <= _OUTPUT_INLINE_CAP:
|
|
677
|
+
# control-heavy output can drop below the cap AFTER stripping — return it inline rather than
|
|
678
|
+
# computing head/tail/elided on the now-short text (which gave a negative elided + duplicated
|
|
679
|
+
# head==tail content + a false "paged out" banner). The full clean output still rides the turn.
|
|
680
|
+
return text
|
|
681
|
+
ref = None
|
|
682
|
+
try:
|
|
683
|
+
import hashlib
|
|
684
|
+
digest = hashlib.sha1(text.encode("utf-8", "replace")).hexdigest()[:12]
|
|
685
|
+
rel = os.path.join(".sliceagent", "blobs", f"{label.replace(' ', '-')}-{digest}.txt")
|
|
686
|
+
full = self._resolve(rel)
|
|
687
|
+
self._mkparent(full)
|
|
688
|
+
if not os.path.exists(full):
|
|
689
|
+
self._atomic_write(full, text)
|
|
690
|
+
ref = f"read_file('{rel}')"
|
|
691
|
+
except Exception: # noqa: BLE001 — a paging failure must never fail the tool itself
|
|
692
|
+
ref = None
|
|
693
|
+
elided = len(text) - _OUTPUT_HEAD - _OUTPUT_TAIL
|
|
694
|
+
how = f"page the full {label} back with {ref}" if ref else f"the elided {label} is unavailable (blob write failed)"
|
|
695
|
+
return (f"{text[:_OUTPUT_HEAD]}\n\n"
|
|
696
|
+
f"[… {elided} of {len(text)} chars paged out — {how} …]\n\n"
|
|
697
|
+
f"{text[-_OUTPUT_TAIL:]}")
|
|
698
|
+
|
|
699
|
+
def _t_read_file(self, args: dict) -> str:
|
|
700
|
+
# Text files: return the content. Binary files: instead of refusing (which blanks the
|
|
701
|
+
# agent on forensics/media/archive tasks), return a hexdump + size + magic so it can
|
|
702
|
+
# inspect structure and pick the right CLI. str_replace still uses read_text() (which
|
|
703
|
+
# raises on binary) — you can't text-edit a binary, so that path stays a hard error.
|
|
704
|
+
path = args["path"]
|
|
705
|
+
full = self.resolve_read(path) # focus copy if present, else search all roots (paged-out blob recall)
|
|
706
|
+
with open(full, "rb") as f:
|
|
707
|
+
raw = f.read()
|
|
708
|
+
sample = raw[:8192].decode("utf-8", errors="replace")
|
|
709
|
+
if looks_binary(path, sample):
|
|
710
|
+
return self._binary_view(path, raw)
|
|
711
|
+
# Return WITH cat -n line numbers so the model has file:line evidence immediately this turn (matching
|
|
712
|
+
# the OPEN FILES render). Safe for editing: str_replace strips a pasted line-number prefix.
|
|
713
|
+
# BOUNDED VIEW (moat-safe): a huge file would flood the slice, so cap the default view + support a
|
|
714
|
+
# line window (offset/limit). The FULL file always stays on disk — this bounds the VIEW, not the file.
|
|
715
|
+
lines = raw.decode("utf-8", errors="replace").splitlines() # consistent with read_text's gate decode
|
|
716
|
+
total = len(lines)
|
|
717
|
+
offset, limit = _coerce_int(args.get("offset")), _coerce_int(args.get("limit"))
|
|
718
|
+
windowed = offset is not None or limit is not None
|
|
719
|
+
# a paged-out blob recall is the deliberate L1→L2 "give me the FULL output back" channel — never cap
|
|
720
|
+
# it (only the default view of an ordinary file is capped). Still windowable if offset/limit is given.
|
|
721
|
+
is_blob = ".sliceagent/blobs/" in path.replace("\\", "/") or ".sliceagent/blobs/" in str(full).replace("\\", "/")
|
|
722
|
+
if not windowed:
|
|
723
|
+
start, end = 1, (total if (is_blob or total <= _READ_MAX_LINES) else _READ_MAX_LINES)
|
|
724
|
+
else:
|
|
725
|
+
start = min(max(1, offset or 1), total + 1)
|
|
726
|
+
end = total if limit is None else min(total, start - 1 + max(1, limit))
|
|
727
|
+
body = _number_lines(lines[start - 1:end], start)
|
|
728
|
+
if not windowed and end >= total:
|
|
729
|
+
return body # complete read → unchanged contract (no footer)
|
|
730
|
+
more = (f" · +{total - end} more — read_file(path, offset={end + 1}) to continue"
|
|
731
|
+
if end < total else "")
|
|
732
|
+
return f"{body}\n<system>read_file {path}: lines {start}-{end} of {total}{more}</system>"
|
|
733
|
+
|
|
734
|
+
@staticmethod
|
|
735
|
+
def _binary_view(path: str, raw: bytes, head_bytes: int = 256) -> str:
|
|
736
|
+
head = raw[:head_bytes]
|
|
737
|
+
rows = []
|
|
738
|
+
for off in range(0, len(head), 16):
|
|
739
|
+
chunk = head[off:off + 16]
|
|
740
|
+
hexpart = " ".join(f"{b:02x}" for b in chunk)
|
|
741
|
+
asciipart = "".join(chr(b) if 32 <= b < 127 else "." for b in chunk)
|
|
742
|
+
rows.append(f"{off:08x} {hexpart:<47} {asciipart}")
|
|
743
|
+
return (f"{path}: binary file, {len(raw)} bytes — text tools can't edit it; inspect/convert "
|
|
744
|
+
f"it with run_command/execute_code (the right CLI).\n"
|
|
745
|
+
f"magic: {head[:8].hex()}\n"
|
|
746
|
+
f"hexdump (first {len(head)} bytes):\n" + "\n".join(rows))
|
|
747
|
+
|
|
748
|
+
@staticmethod
|
|
749
|
+
def _detect_crlf(full: str) -> bool:
|
|
750
|
+
"""True if the existing file uses Windows CRLF line endings (sample the head). Used to PRESERVE
|
|
751
|
+
line endings on edit: the model emits '\\n', and writing that to a CRLF file rewrites every line
|
|
752
|
+
ending — a huge spurious diff / corruption on Windows-authored repos."""
|
|
753
|
+
try:
|
|
754
|
+
with open(full, "rb") as f:
|
|
755
|
+
return b"\r\n" in f.read(65536)
|
|
756
|
+
except OSError:
|
|
757
|
+
return False
|
|
758
|
+
|
|
759
|
+
@staticmethod
|
|
760
|
+
def _preserve_eol(text: str, crlf: bool) -> str:
|
|
761
|
+
"""Convert `text` to CRLF iff the target file is CRLF (normalize first → idempotent, handles
|
|
762
|
+
mixed input). No-op for the common LF case, so LF files never gain spurious '\\r'."""
|
|
763
|
+
return text.replace("\r\n", "\n").replace("\n", "\r\n") if crlf else text
|
|
764
|
+
|
|
765
|
+
def _t_list_files(self, args: dict) -> str:
|
|
766
|
+
base = self._resolve(args.get("path") or ".")
|
|
767
|
+
if not args.get("recursive"):
|
|
768
|
+
entries = sorted(os.listdir(base))
|
|
769
|
+
shown = [e + "/" if os.path.isdir(os.path.join(base, e)) else e
|
|
770
|
+
for e in entries if not _is_ignored(e)]
|
|
771
|
+
hidden = [e for e in entries if _is_ignored(e)]
|
|
772
|
+
body = "\n".join(shown) or "(empty)"
|
|
773
|
+
if hidden: # name them so the model KNOWS they exist (recoverable), without flooding
|
|
774
|
+
body += f"\n(+{len(hidden)} ignored: {', '.join(hidden[:6])})"
|
|
775
|
+
return body
|
|
776
|
+
# recursive: a clean, ignore-pruned, bounded repo MAP — the native alternative to shell `find`
|
|
777
|
+
rels: list[str] = []
|
|
778
|
+
capped = False
|
|
779
|
+
for dirpath, dirnames, filenames in os.walk(base): # symlinks not followed (no .venv loops)
|
|
780
|
+
dirnames[:] = sorted(d for d in dirnames if not _is_ignored(d)) # prune in place → don't descend
|
|
781
|
+
rel = os.path.relpath(dirpath, base)
|
|
782
|
+
for f in sorted(filenames):
|
|
783
|
+
if _is_ignored(f):
|
|
784
|
+
continue
|
|
785
|
+
rels.append(f if rel == "." else os.path.join(rel, f))
|
|
786
|
+
if len(rels) >= _LIST_CAP:
|
|
787
|
+
capped = True
|
|
788
|
+
break
|
|
789
|
+
if capped:
|
|
790
|
+
break
|
|
791
|
+
body = "\n".join(sorted(rels)) or "(empty)"
|
|
792
|
+
if capped:
|
|
793
|
+
body += f"\n(+more — capped at {_LIST_CAP}; pass a subdirectory path to narrow)"
|
|
794
|
+
return body
|
|
795
|
+
|
|
796
|
+
def _t_edit_file(self, args: dict) -> str:
|
|
797
|
+
full = self.resolve_read(args["path"]) # I2: target the SAME file read_file shows (existing match across roots); new files still land at the focus base
|
|
798
|
+
self._mkparent(full)
|
|
799
|
+
content = args["content"]
|
|
800
|
+
if os.path.exists(full): # preserve the file's existing line endings (CRLF)
|
|
801
|
+
content = self._preserve_eol(content, self._detect_crlf(full))
|
|
802
|
+
self._journal(args["path"], full)
|
|
803
|
+
self._atomic_write(full, content)
|
|
804
|
+
if content[:2] == "#!": # a shebang script should be runnable (general, task-agnostic)
|
|
805
|
+
self._make_executable(full)
|
|
806
|
+
msg = f"Wrote {len(content)} bytes to {args['path']}"
|
|
807
|
+
try: # echo the head so the model sees what landed (post-EOL-normalization)
|
|
808
|
+
n = content.replace("\r\n", "\n").rstrip("\n").count("\n") + 1 if content.strip() else 0
|
|
809
|
+
return f"{msg} ({n} lines). Head:\n" + _numbered_window(content, 0, 15, ctx=0, cap=16)
|
|
810
|
+
except Exception: # noqa: BLE001 — the echo must never fail the write
|
|
811
|
+
return msg
|
|
812
|
+
|
|
813
|
+
def _make_executable(self, full: str) -> None:
|
|
814
|
+
"""chmod +x a freshly-written shebang script (a script the agent declared executable via '#!'
|
|
815
|
+
should run without a separate chmod). Best-effort; never fails the write."""
|
|
816
|
+
try:
|
|
817
|
+
import stat as _stat
|
|
818
|
+
os.chmod(full, os.stat(full).st_mode | _stat.S_IXUSR | _stat.S_IXGRP | _stat.S_IXOTH)
|
|
819
|
+
except OSError:
|
|
820
|
+
pass
|
|
821
|
+
|
|
822
|
+
def _t_append(self, args: dict) -> str:
|
|
823
|
+
full = self.resolve_read(args["path"]) # I2: append to the SAME file read_file shows; new files still land at the focus base
|
|
824
|
+
self._mkparent(full)
|
|
825
|
+
self._journal(args["path"], full)
|
|
826
|
+
with open(full, "ab") as f: # byte-exact (like write_file's "wb") — text mode would translate newlines, corrupting CRLF
|
|
827
|
+
f.write(args["content"].encode("utf-8"))
|
|
828
|
+
msg = f"Appended {len(args['content'])} bytes to {args['path']}"
|
|
829
|
+
try: # echo the file tail so the model sees the appended content in context
|
|
830
|
+
with open(full, encoding="utf-8", errors="replace") as _f:
|
|
831
|
+
whole = _f.read()
|
|
832
|
+
total = whole.replace("\r\n", "\n").rstrip("\n").count("\n") + 1
|
|
833
|
+
app = args["content"].replace("\r\n", "\n").rstrip("\n").count("\n") + 1
|
|
834
|
+
return f"{msg}. File tail:\n" + _numbered_window(whole, max(0, total - app), total - 1, ctx=2)
|
|
835
|
+
except Exception: # noqa: BLE001
|
|
836
|
+
return msg
|
|
837
|
+
|
|
838
|
+
def _edit_result(self, path: str, before: str, after: str, change_offset: int, new_text: str,
|
|
839
|
+
*, fuzzy: bool = False) -> str:
|
|
840
|
+
"""str_replace result: byte delta + a numbered POST-EDIT window around the change, so the model sees
|
|
841
|
+
the file's CURRENT state in-transcript. Best-effort — falls back to the plain byte message."""
|
|
842
|
+
tag = " (normalized/fuzzy match)" if fuzzy else ""
|
|
843
|
+
msg = f"Replaced 1 occurrence{tag} in {path} ({len(before)} → {len(after)} bytes)"
|
|
844
|
+
try:
|
|
845
|
+
s0 = before[:change_offset].count("\n") # 0-based start line (unchanged prefix ⇒ same in `after`)
|
|
846
|
+
e0 = s0 + new_text.replace("\r\n", "\n").count("\n")
|
|
847
|
+
return f"{msg}. Updated region (lines {s0 + 1}-{e0 + 1}):\n" + _numbered_window(after, s0, e0)
|
|
848
|
+
except Exception: # noqa: BLE001 — the echo must never fail the edit
|
|
849
|
+
return msg
|
|
850
|
+
|
|
851
|
+
def _t_str_replace(self, args: dict) -> str:
|
|
852
|
+
full = self.resolve_read(args["path"]) # I2: edit the SAME file read_file shows (search all roots), not a focus-relative phantom
|
|
853
|
+
try:
|
|
854
|
+
cur = self.read_text(full, lossy=False) # read the resolved target; strict: abort on invalid UTF-8, never write back a mangled file
|
|
855
|
+
except UnicodeDecodeError as ex:
|
|
856
|
+
# actionable error (not an opaque codec traceback) — read_file shows the file as editable, so name
|
|
857
|
+
# the cause + the fallback rather than half-disagreeing with the display path.
|
|
858
|
+
return ToolText(f"Error: {args['path']} contains a non-UTF-8 byte ({ex}); str_replace can't safely "
|
|
859
|
+
"edit it (a whole-file write-back would corrupt the other bytes). Use edit_file to "
|
|
860
|
+
"rewrite the file, or fix its encoding first.", ok=False)
|
|
861
|
+
crlf = self._detect_crlf(full) # preserve the file's line endings on write-back
|
|
862
|
+
old = args["old_string"]
|
|
863
|
+
new = args["new_string"]
|
|
864
|
+
# OPEN FILES renders with cat -n line numbers; if the model pasted a numbered snippet back into
|
|
865
|
+
# old_string, strip the " N\t" prefixes so it still matches the real (unnumbered) file. Tried only
|
|
866
|
+
# as a FALLBACK after the raw text, and only when EVERY line carried a number (clearly cat -n output,
|
|
867
|
+
# not source) — so a real match is never altered.
|
|
868
|
+
candidates = [old]
|
|
869
|
+
stripped = _strip_line_numbers(old)
|
|
870
|
+
if stripped != old:
|
|
871
|
+
candidates.append(stripped)
|
|
872
|
+
# PRIMARY: exact match (raw first, then de-numbered). >1 is ambiguous UNLESS replace_all is set.
|
|
873
|
+
replace_all = bool(args.get("replace_all"))
|
|
874
|
+
for cand in candidates:
|
|
875
|
+
n = cur.count(cand)
|
|
876
|
+
if n == 0:
|
|
877
|
+
continue
|
|
878
|
+
if n == 1 or replace_all:
|
|
879
|
+
updated = self._preserve_eol(cur.replace(cand, new, n if replace_all else 1), crlf)
|
|
880
|
+
self._journal(args["path"], full)
|
|
881
|
+
self._atomic_write(full, updated)
|
|
882
|
+
return self._edit_result(args["path"], cur, updated, cur.index(cand), new)
|
|
883
|
+
return ToolText(f"Error: old_string occurs {n} times in {args['path']}; add context to make it "
|
|
884
|
+
"unique, or pass replace_all=true to change them all", ok=False)
|
|
885
|
+
# FALLBACK: whitespace-tolerant UNIQUE fuzzy span (raw first, then de-numbered). fuzzy_find_unique
|
|
886
|
+
# returns None on 0/>1 candidates, so uniqueness is preserved — we never replace an ambiguous match.
|
|
887
|
+
for cand in candidates:
|
|
888
|
+
span = fuzzy_find_unique(cur, cand)
|
|
889
|
+
if span is not None:
|
|
890
|
+
updated = self._preserve_eol(cur[:span[0]] + new + cur[span[1]:], crlf)
|
|
891
|
+
self._journal(args["path"], full)
|
|
892
|
+
self._atomic_write(full, updated)
|
|
893
|
+
return self._edit_result(args["path"], cur, updated, span[0], new, fuzzy=True)
|
|
894
|
+
return ToolText(f"Error: old_string not found in {args['path']} — your snippet does not match "
|
|
895
|
+
f"the file. Copy the EXACT text from OPEN FILES (the live content, WITHOUT the line-number "
|
|
896
|
+
f"prefix), or rewrite the whole file with edit_file. Do NOT retry the same str_replace.", ok=False)
|
|
897
|
+
|
|
898
|
+
# --- edit journal (powers /undo) -----------------------------------------
|
|
899
|
+
def _journal(self, rel: str, full: str) -> None:
|
|
900
|
+
"""Record a file's pre-image (or None if it didn't exist) just before a write, so /undo can revert
|
|
901
|
+
the most recent edit. Bounded ring — recent edits only, never an unbounded history."""
|
|
902
|
+
try:
|
|
903
|
+
if os.path.exists(full):
|
|
904
|
+
with open(full, "rb") as _f:
|
|
905
|
+
prev = _f.read()
|
|
906
|
+
else:
|
|
907
|
+
prev = None
|
|
908
|
+
except OSError:
|
|
909
|
+
prev = None
|
|
910
|
+
self._edit_journal.append((rel, full, prev))
|
|
911
|
+
if len(self._edit_journal) > 50:
|
|
912
|
+
del self._edit_journal[:-50]
|
|
913
|
+
|
|
914
|
+
def undo_last(self) -> str:
|
|
915
|
+
"""Revert the most recent journaled edit. Returns a human-readable result for the UI."""
|
|
916
|
+
if not self._edit_journal:
|
|
917
|
+
return "Nothing to undo."
|
|
918
|
+
rel, full, prev = self._edit_journal.pop()
|
|
919
|
+
try:
|
|
920
|
+
if prev is None:
|
|
921
|
+
if os.path.exists(full):
|
|
922
|
+
os.remove(full)
|
|
923
|
+
return f"Undid: removed {rel} (it did not exist before that edit)."
|
|
924
|
+
with open(full, "wb") as f:
|
|
925
|
+
f.write(prev)
|
|
926
|
+
return f"Undid the last edit to {rel} ({len(prev)} bytes restored)."
|
|
927
|
+
except OSError as e:
|
|
928
|
+
return f"Undo failed for {rel}: {e}"
|
|
929
|
+
|
|
930
|
+
def attach_image(self, path: str) -> str:
|
|
931
|
+
"""Stash a workspace image for the NEXT seed build as a vision content part. Returns a status line.
|
|
932
|
+
Gated by the caller (only called for a vision-capable model). Confined to the workspace like reads.
|
|
933
|
+
The MIME type is sniffed from MAGIC BYTES (not the extension), so a spoofed extension can't smuggle a
|
|
934
|
+
non-image through as image/png."""
|
|
935
|
+
import base64
|
|
936
|
+
try:
|
|
937
|
+
full = self._resolve(path)
|
|
938
|
+
with open(full, "rb") as _f:
|
|
939
|
+
raw = _f.read()
|
|
940
|
+
except OSError as e:
|
|
941
|
+
return f"Error: cannot read image {path}: {e}"
|
|
942
|
+
if len(raw) > 8 * 1024 * 1024:
|
|
943
|
+
return f"Error: image {path} is {len(raw)} bytes (cap 8MB) — too large to attach"
|
|
944
|
+
mime = _sniff_image_mime(raw)
|
|
945
|
+
if mime is None:
|
|
946
|
+
return f"Error: {path} is not a recognized image (png/jpeg/gif/webp/bmp) — not attached"
|
|
947
|
+
self.pending_images.append({"path": path, "b64": base64.b64encode(raw).decode("ascii"), "mime": mime})
|
|
948
|
+
# cost-awareness: a base64 image is large + billed as image tokens → this turn costs more than text.
|
|
949
|
+
return f"attached image {path} ({len(raw) // 1024} KB, {mime}) — vision turn, costs more than a text turn"
|
|
950
|
+
|
|
951
|
+
def _t_code_review(self, args: dict) -> str:
|
|
952
|
+
"""Return the git diff for the workspace so the model can review it (read-only; task-agnostic)."""
|
|
953
|
+
import subprocess
|
|
954
|
+
ref = (args.get("ref") or "HEAD").strip() or "HEAD"
|
|
955
|
+
# SECURITY: `ref` is model-controlled. An option-shaped ref (e.g. --output=/path, -O, --ext-diff)
|
|
956
|
+
# would be parsed by git as a FLAG → arbitrary out-of-workspace file write / command exec, bypassing
|
|
957
|
+
# the file-tool confinement. Reject leading-dash refs (a real ref/range never starts with '-') and
|
|
958
|
+
# pass `--` so the ref can never be read as an option. Valid ranges (main...HEAD, HEAD~3) still work.
|
|
959
|
+
if ref.startswith("-"):
|
|
960
|
+
return ToolText(f"Error: invalid ref {ref!r} (a ref must not start with '-').", ok=False)
|
|
961
|
+
try:
|
|
962
|
+
p = subprocess.run(["git", "-C", self.root(), "diff", ref, "--"],
|
|
963
|
+
capture_output=True, text=True, timeout=30)
|
|
964
|
+
except FileNotFoundError:
|
|
965
|
+
return ToolText("Error: git is not installed.", ok=False)
|
|
966
|
+
except subprocess.SubprocessError as e:
|
|
967
|
+
return ToolText(f"Error: git diff failed ({type(e).__name__}: {e}).", ok=False)
|
|
968
|
+
if p.returncode != 0:
|
|
969
|
+
return ToolText(f"Error: `git diff {ref}` failed — {p.stderr.strip()[:300]} "
|
|
970
|
+
"(is this a git repo? is the ref valid?)", ok=False)
|
|
971
|
+
diff = p.stdout
|
|
972
|
+
if not diff.strip():
|
|
973
|
+
return f"No changes vs {ref} — the working tree matches it. Nothing to review."
|
|
974
|
+
# PAGE a large diff out (full diff preserved on disk, reachable via read_file) instead of a hard
|
|
975
|
+
# truncation that silently discarded the tail — a review/security task must not miss bugs past the cut.
|
|
976
|
+
body = self._page_out(diff, label=f"git-diff-{ref}")
|
|
977
|
+
return (f"git diff {ref} ({len(diff)} chars). Review for correctness, security, and edge cases; "
|
|
978
|
+
f"cite file:line per issue.\n\n{body}")
|
|
979
|
+
|
|
980
|
+
def _t_ask_user(self, args: dict) -> str:
|
|
981
|
+
q = (args.get("question") or "").strip()
|
|
982
|
+
if not q:
|
|
983
|
+
return ToolText("Error: ask_user requires a non-empty 'question'.", ok=False)
|
|
984
|
+
opts = args.get("options")
|
|
985
|
+
opts = [str(o) for o in opts] if isinstance(opts, list) and opts else None
|
|
986
|
+
try:
|
|
987
|
+
ans = (self.on_ask_user or _default_ask_user)(q, opts)
|
|
988
|
+
except (EOFError, KeyboardInterrupt):
|
|
989
|
+
ans = "(no answer)"
|
|
990
|
+
return f"User answered: {str(ans).strip()}"
|
|
991
|
+
|
|
992
|
+
def _t_run_command(self, args: dict) -> str:
|
|
993
|
+
# Optional per-call timeout (default self.timeout, hard ceiling 600s) so slow builds don't
|
|
994
|
+
# die at the 30s default and come back as exit 124. Long-lived processes use proc_start.
|
|
995
|
+
try:
|
|
996
|
+
t = float(args.get("timeout") or self.timeout)
|
|
997
|
+
except (TypeError, ValueError):
|
|
998
|
+
t = float(self.timeout)
|
|
999
|
+
t = max(1.0, min(t, 600.0))
|
|
1000
|
+
code, out = self.sandbox.run(args["command"], cwd=self.root(), timeout=t)
|
|
1001
|
+
self._grant_shell_paths(args.get("command", "")) # I2 reach=action: dirs the shell touched
|
|
1002
|
+
out = out.strip()
|
|
1003
|
+
if code != 0:
|
|
1004
|
+
return ToolText(f"Exit code {code}\n{self._page_out(out, label='command output') or '(no output)'}", ok=False)
|
|
1005
|
+
return self._page_out(out, label="command output") if out else "(command produced no output)"
|
|
1006
|
+
|
|
1007
|
+
# --- background / long-running processes (procman) ---
|
|
1008
|
+
def _host_only_note(self) -> str:
|
|
1009
|
+
# #4: background procs + PTY sessions run on the HOST, not through self.sandbox. Under a non-local
|
|
1010
|
+
# sandbox (e.g. docker) that defeats container isolation — surface it instead of silently bypassing.
|
|
1011
|
+
return ("[warning: this runs on the HOST, NOT inside the configured sandbox — "
|
|
1012
|
+
f"{type(self.sandbox).__name__} isolation does not apply]\n"
|
|
1013
|
+
if type(self.sandbox).__name__ != "LocalSandbox" else "")
|
|
1014
|
+
|
|
1015
|
+
def _t_proc_start(self, args: dict) -> str:
|
|
1016
|
+
h = self.procs.start(args["command"], cwd=self.root())
|
|
1017
|
+
return (f"{self._host_only_note()}Started background process {h}: {args['command']}\n"
|
|
1018
|
+
f"Use proc_tail/proc_poll/proc_wait/proc_kill with handle {h}.")
|
|
1019
|
+
|
|
1020
|
+
def _t_proc_poll(self, args: dict) -> str:
|
|
1021
|
+
return self.procs.poll(args["handle"])
|
|
1022
|
+
|
|
1023
|
+
def _t_proc_tail(self, args: dict) -> str:
|
|
1024
|
+
# #26: cap requested lines so a huge `lines` can't dump a chatty server's whole log into the slice.
|
|
1025
|
+
try:
|
|
1026
|
+
n = int(args.get("lines") or 40)
|
|
1027
|
+
except (TypeError, ValueError):
|
|
1028
|
+
n = 40 # a non-numeric `lines` arg must not crash the tool
|
|
1029
|
+
return self.procs.tail(args["handle"], max(1, min(n, 2000)))
|
|
1030
|
+
|
|
1031
|
+
def _t_proc_wait(self, args: dict) -> str:
|
|
1032
|
+
try:
|
|
1033
|
+
t = float(args.get("timeout") or 30.0)
|
|
1034
|
+
except (TypeError, ValueError):
|
|
1035
|
+
t = 30.0
|
|
1036
|
+
# proc_wait is a poll-with-timeout — allow sub-second waits (unlike run_command's 1s floor).
|
|
1037
|
+
return self.procs.wait(args["handle"], max(0.05, min(t, 600.0)))
|
|
1038
|
+
|
|
1039
|
+
def _t_proc_kill(self, args: dict) -> str:
|
|
1040
|
+
return self.procs.kill(args["handle"])
|
|
1041
|
+
|
|
1042
|
+
# --- interactive PTY sessions (terminal) ---
|
|
1043
|
+
def _t_terminal_open(self, args: dict) -> str:
|
|
1044
|
+
name = args.get("session") or "main"
|
|
1045
|
+
self.terminals.open(name, cwd=self.root(), command=args.get("command") or None)
|
|
1046
|
+
banner = self.terminals.peek(name, timeout=0.6) # peek, not read — don't eat the first prompt
|
|
1047
|
+
return f"{self._host_only_note()}Opened terminal session {name!r}.\n{banner}"
|
|
1048
|
+
|
|
1049
|
+
def _t_terminal_send(self, args: dict) -> str:
|
|
1050
|
+
name = args.get("session") or "main"
|
|
1051
|
+
enter = args.get("enter")
|
|
1052
|
+
enter = True if enter is None else bool(enter)
|
|
1053
|
+
return self.terminals.send(name, args["input"], enter=enter)
|
|
1054
|
+
|
|
1055
|
+
def _t_terminal_read(self, args: dict) -> str:
|
|
1056
|
+
name = args.get("session") or "main"
|
|
1057
|
+
try:
|
|
1058
|
+
t = float(args.get("timeout") or 1.0)
|
|
1059
|
+
except (TypeError, ValueError):
|
|
1060
|
+
t = 1.0
|
|
1061
|
+
return self._page_out(self.terminals.read(name, timeout=max(0.05, min(t, 120.0))), label="terminal output")
|
|
1062
|
+
|
|
1063
|
+
def _t_terminal_wait(self, args: dict) -> str:
|
|
1064
|
+
name = args.get("session") or "main"
|
|
1065
|
+
try:
|
|
1066
|
+
t = float(args.get("timeout") or 10.0)
|
|
1067
|
+
except (TypeError, ValueError):
|
|
1068
|
+
t = 10.0
|
|
1069
|
+
return self.terminals.wait(name, args["until"], timeout=max(0.1, min(t, 600.0)))
|
|
1070
|
+
|
|
1071
|
+
def _t_terminal_close(self, args: dict) -> str:
|
|
1072
|
+
return self.terminals.close(args.get("session") or "main")
|
|
1073
|
+
|
|
1074
|
+
# --- world model (durable agent scratchpad; state lives in the Slice, folded by slice_sink) ---
|
|
1075
|
+
def _t_world_set(self, args: dict) -> str:
|
|
1076
|
+
k = (args.get("key") or "").strip()
|
|
1077
|
+
if not k:
|
|
1078
|
+
return ToolText("Error: world_set requires a non-empty 'key'.", ok=False)
|
|
1079
|
+
v = " ".join(str(args.get("value", "")).split()) # one-line echo so the value is readable THIS turn
|
|
1080
|
+
if len(v) > 200:
|
|
1081
|
+
v = v[:200] + "…"
|
|
1082
|
+
return (f"WORLD MODEL: saved {k!r} = {v} (in your WORLD MODEL section from your NEXT turn; "
|
|
1083
|
+
f"this turn, re-read it from this call).")
|
|
1084
|
+
|
|
1085
|
+
def _t_world_clear(self, args: dict) -> str:
|
|
1086
|
+
k = (args.get("key") or "").strip()
|
|
1087
|
+
return f"WORLD MODEL: cleared {repr(k) if k else '(all keys)'}."
|
|
1088
|
+
|
|
1089
|
+
# --- standing requirements (the durable contract; state lives in the Slice, folded by slice_sink) ---
|
|
1090
|
+
def _t_require(self, args: dict) -> str:
|
|
1091
|
+
t = " ".join((args.get("text") or "").split())
|
|
1092
|
+
if not t:
|
|
1093
|
+
return ToolText("Error: require needs a non-empty 'text'.", ok=False)
|
|
1094
|
+
return f"REQUIREMENT recorded: {t} (in your STANDING REQUIREMENTS from your next turn until done/dropped)."
|
|
1095
|
+
|
|
1096
|
+
def _t_requirement_done(self, args: dict) -> str:
|
|
1097
|
+
t = " ".join((args.get("text") or "").split())
|
|
1098
|
+
if not t:
|
|
1099
|
+
return ToolText("Error: requirement_done needs the requirement 'text'.", ok=False)
|
|
1100
|
+
return f"REQUIREMENT marked done: {t} (stays shown as [x], no longer flagged outstanding)."
|
|
1101
|
+
|
|
1102
|
+
def _t_drop_requirement(self, args: dict) -> str:
|
|
1103
|
+
t = " ".join((args.get("text") or "").split())
|
|
1104
|
+
if not t:
|
|
1105
|
+
return ToolText("Error: drop_requirement needs the requirement 'text'.", ok=False)
|
|
1106
|
+
return f"REQUIREMENT dropped: {t}."
|
|
1107
|
+
|
|
1108
|
+
def _t_update_plan(self, args: dict) -> str:
|
|
1109
|
+
# The STATE lives in the slice's PLAN tier (folded by slice_sink from this event); the handler
|
|
1110
|
+
# only validates + confirms (the world_set/require pattern).
|
|
1111
|
+
steps = args.get("steps")
|
|
1112
|
+
if not isinstance(steps, list) or not steps:
|
|
1113
|
+
return ToolText("Error: update_plan requires a non-empty 'steps' list "
|
|
1114
|
+
"(each {step, status: pending|in_progress|done}).", ok=False)
|
|
1115
|
+
n = len(steps)
|
|
1116
|
+
done = sum(1 for s in steps if isinstance(s, dict) and s.get("status") == "done")
|
|
1117
|
+
doing = sum(1 for s in steps if isinstance(s, dict) and s.get("status") == "in_progress")
|
|
1118
|
+
return f"PLAN updated: {n} steps ({done} done, {doing} in progress) — shown in your PLAN section."
|
|
1119
|
+
|
|
1120
|
+
def _t_set_mission(self, args: dict) -> str:
|
|
1121
|
+
t = " ".join((args.get("text") or "").split())
|
|
1122
|
+
if not t:
|
|
1123
|
+
return ToolText("Error: set_mission needs a non-empty 'text'.", ok=False)
|
|
1124
|
+
return f"MISSION set: {t} (shown at the top of your context until you call mission_done)."
|
|
1125
|
+
|
|
1126
|
+
def _t_mission_done(self, args: dict) -> str:
|
|
1127
|
+
return "MISSION cleared (achieved — no longer shown)."
|
|
1128
|
+
|
|
1129
|
+
def _t_execute_code(self, args: dict) -> str:
|
|
1130
|
+
out = self._execute_code(args["code"])
|
|
1131
|
+
self._grant_shell_paths(args.get("code", "")) # I2 reach=action: dirs code-as-action touched
|
|
1132
|
+
return out
|
|
1133
|
+
|
|
1134
|
+
def _execute_code(self, code: str) -> str:
|
|
1135
|
+
"""Code-as-action: run the model's script (prelude + code) in the sandbox, cwd=workspace.
|
|
1136
|
+
Only stdout returns. The script is written INSIDE the workspace as a hidden temp file
|
|
1137
|
+
(so it's mounted/available in every backend) and deleted right after; cwd is on sys.path
|
|
1138
|
+
so workspace imports resolve. `sandbox.python_cmd` keeps it backend-portable."""
|
|
1139
|
+
script = _CODE_PRELUDE + "\n# --- agent code ---\n" + code
|
|
1140
|
+
root = self.root()
|
|
1141
|
+
fd, path = tempfile.mkstemp(suffix=".py", prefix=".sliceagent-exec-", dir=root)
|
|
1142
|
+
try:
|
|
1143
|
+
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
|
1144
|
+
f.write(script)
|
|
1145
|
+
cmd = f"{shlex.quote(self.sandbox.python_cmd)} {shlex.quote(os.path.basename(path))}"
|
|
1146
|
+
code_n, out = self.sandbox.run(cmd, cwd=root, timeout=self.timeout)
|
|
1147
|
+
out = out.strip()
|
|
1148
|
+
if code_n != 0:
|
|
1149
|
+
return ToolText(f"Exit code {code_n}\n{self._page_out(out, label='execute_code output') or '(no output)'}", ok=False)
|
|
1150
|
+
return self._page_out(out, label="execute_code output") if out else "(execute_code produced no output)"
|
|
1151
|
+
finally:
|
|
1152
|
+
try:
|
|
1153
|
+
os.unlink(path)
|
|
1154
|
+
except OSError:
|
|
1155
|
+
pass
|
|
1156
|
+
|
|
1157
|
+
@staticmethod
|
|
1158
|
+
def _mkparent(path: str) -> None:
|
|
1159
|
+
parent = os.path.dirname(os.path.abspath(path))
|
|
1160
|
+
os.makedirs(parent, exist_ok=True)
|
|
1161
|
+
|
|
1162
|
+
@staticmethod
|
|
1163
|
+
def _atomic_write(full: str, content: str) -> None:
|
|
1164
|
+
"""Write `content` to `full` atomically: write a temp file in the SAME directory,
|
|
1165
|
+
then os.replace() it over the target. A crash/error mid-write leaves the original
|
|
1166
|
+
intact (the rename is atomic on POSIX); the temp is unlinked on any failure. The
|
|
1167
|
+
temp must share the target's filesystem for os.replace to be atomic, hence
|
|
1168
|
+
dir=os.path.dirname(full) (full is already _resolve()'d)."""
|
|
1169
|
+
import stat as _stat
|
|
1170
|
+
d = os.path.dirname(full)
|
|
1171
|
+
# preserve the target's permission bits across the replace — else a str_replace/edit_file on an
|
|
1172
|
+
# existing 0755 script silently resets it to the mkstemp 0600 (drops the executable + group/other bits).
|
|
1173
|
+
# ONE stat in a try (no exists()+stat() TOCTOU): if the file is absent or concurrently removed, write
|
|
1174
|
+
# fresh with default perms rather than raising an unhandled FileNotFoundError.
|
|
1175
|
+
try:
|
|
1176
|
+
mode = _stat.S_IMODE(os.stat(full).st_mode)
|
|
1177
|
+
except OSError:
|
|
1178
|
+
mode = None
|
|
1179
|
+
fd, tmp = tempfile.mkstemp(prefix=".sliceagent-tmp-", dir=d)
|
|
1180
|
+
try:
|
|
1181
|
+
# newline="" disables the platform newline translation: _preserve_eol already normalized the
|
|
1182
|
+
# content's line endings (LF or CRLF) to match the target, so text-mode translation on Windows
|
|
1183
|
+
# would double-convert \n→\r\n inside an already-CRLF string (\r\r\n) and corrupt the file.
|
|
1184
|
+
with os.fdopen(fd, "w", encoding="utf-8", newline="") as f:
|
|
1185
|
+
f.write(content)
|
|
1186
|
+
if mode is not None:
|
|
1187
|
+
os.chmod(tmp, mode)
|
|
1188
|
+
os.replace(tmp, full)
|
|
1189
|
+
except BaseException:
|
|
1190
|
+
try:
|
|
1191
|
+
os.unlink(tmp)
|
|
1192
|
+
except OSError:
|
|
1193
|
+
pass
|
|
1194
|
+
raise
|