sliceagent 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sliceagent/__init__.py +3 -0
- sliceagent/__main__.py +6 -0
- sliceagent/access.py +93 -0
- sliceagent/agents.py +173 -0
- sliceagent/background_review.py +146 -0
- sliceagent/binsniff.py +89 -0
- sliceagent/cli.py +890 -0
- sliceagent/clock.py +32 -0
- sliceagent/code_grep.py +329 -0
- sliceagent/code_index.py +417 -0
- sliceagent/config.py +240 -0
- sliceagent/context_overflow.py +227 -0
- sliceagent/envspec.py +129 -0
- sliceagent/errors.py +167 -0
- sliceagent/events.py +96 -0
- sliceagent/finding_types.py +70 -0
- sliceagent/flags.py +63 -0
- sliceagent/fuzzy.py +135 -0
- sliceagent/guardrails.py +438 -0
- sliceagent/guidance.py +69 -0
- sliceagent/hippocampus.py +581 -0
- sliceagent/hooks.py +334 -0
- sliceagent/interfaces.py +144 -0
- sliceagent/llm.py +695 -0
- sliceagent/loop.py +548 -0
- sliceagent/mcp_client.py +255 -0
- sliceagent/mcp_security.py +77 -0
- sliceagent/memory.py +428 -0
- sliceagent/metrics.py +103 -0
- sliceagent/model_catalog.py +124 -0
- sliceagent/monitor.py +615 -0
- sliceagent/neocortex.py +436 -0
- sliceagent/onboarding.py +323 -0
- sliceagent/oracle.py +36 -0
- sliceagent/pagetable.py +255 -0
- sliceagent/pfc.py +449 -0
- sliceagent/plugins.py +127 -0
- sliceagent/policy.py +234 -0
- sliceagent/procman.py +187 -0
- sliceagent/prompt.py +239 -0
- sliceagent/records.py +108 -0
- sliceagent/recovery.py +119 -0
- sliceagent/regions.py +678 -0
- sliceagent/registry.py +128 -0
- sliceagent/retriever.py +19 -0
- sliceagent/safety.py +332 -0
- sliceagent/sandbox.py +143 -0
- sliceagent/scheduler.py +92 -0
- sliceagent/search_index.py +289 -0
- sliceagent/seed.py +465 -0
- sliceagent/sensory_cortex.py +500 -0
- sliceagent/session.py +222 -0
- sliceagent/skill_provenance.py +71 -0
- sliceagent/skill_usage.py +123 -0
- sliceagent/skills.py +209 -0
- sliceagent/subagent.py +332 -0
- sliceagent/subdir_hints.py +222 -0
- sliceagent/swap.py +182 -0
- sliceagent/taskstate.py +57 -0
- sliceagent/telemetry.py +59 -0
- sliceagent/terminal.py +240 -0
- sliceagent/text_utils.py +56 -0
- sliceagent/tool_summary.py +93 -0
- sliceagent/tools.py +1194 -0
- sliceagent/tui.py +1377 -0
- sliceagent/web.py +354 -0
- sliceagent-0.1.0.dist-info/METADATA +262 -0
- sliceagent-0.1.0.dist-info/RECORD +71 -0
- sliceagent-0.1.0.dist-info/WHEEL +4 -0
- sliceagent-0.1.0.dist-info/entry_points.txt +2 -0
- sliceagent-0.1.0.dist-info/licenses/LICENSE +21 -0
sliceagent/hooks.py
ADDED
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
"""Hooks: the policy seam. The loop calls these; the host supplies them.
|
|
2
|
+
|
|
3
|
+
This is how policy stays OUT of the moat: the Oracle, permission gate, and token
|
|
4
|
+
budget are all hooks, not hardcoded loop logic.
|
|
5
|
+
|
|
6
|
+
Hook return conventions (all optional, return None to no-op):
|
|
7
|
+
before_step(step) -> {"block": bool, "reason": str} | None
|
|
8
|
+
record_step_usage(usage) -> {"stop_turn": bool} | None
|
|
9
|
+
after_step(step, usage, stop_reason) -> {"stop_turn": bool} | None
|
|
10
|
+
should_continue_after_stop(stop) -> {"continue": bool} | None
|
|
11
|
+
authorize_tool(name, args) -> ToolDecision
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
|
|
18
|
+
from .guardrails import ToolCallGuardrail
|
|
19
|
+
from .guidance import DENIAL_NO_PROMPT, DENIAL_USER
|
|
20
|
+
|
|
21
|
+
# Commands a wide AGENT_AUTO_APPROVE glob (e.g. "git *") must NEVER silently approve: destructive ops that
|
|
22
|
+
# are not catastrophic (so the policy floor lets them through to ASK) yet discard work/data. These always
|
|
23
|
+
# fall through to a confirmation even when a glob matches. The catastrophic floor is screened too (below).
|
|
24
|
+
_DESTRUCTIVE_AUTO = [
|
|
25
|
+
re.compile(r"\bgit\b[^\n]*\b(reset|clean|checkout|restore|rebase|filter-branch)\b", re.I),
|
|
26
|
+
re.compile(r"\bgit\b[^\n]*\bbranch\b[^\n]*\s-D\b", re.I),
|
|
27
|
+
re.compile(r"\bgit\b[^\n]*\bstash\b[^\n]*\b(drop|clear)\b", re.I),
|
|
28
|
+
re.compile(r"\bgit\b[^\n]*\bpush\b[^\n]*(--force|--force-with-lease|\s-f\b)", re.I),
|
|
29
|
+
re.compile(r"\brm\b(?=[^|;&\n]*\s-[a-z]*r)", re.I), # any recursive rm
|
|
30
|
+
re.compile(r"\b(shred|mkfs|wipefs)\b", re.I),
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _is_destructive_command(name: str, cmd: str) -> bool:
|
|
35
|
+
"""True if `cmd` must never be silently auto-approved — catastrophic OR work-discarding."""
|
|
36
|
+
from . import policy # deferred: policy imports hooks, so import here to avoid a cycle at load
|
|
37
|
+
if policy.no_dangerous_commands(name, {"command": cmd}) is not None:
|
|
38
|
+
return True
|
|
39
|
+
return any(p.search(cmd) for p in _DESTRUCTIVE_AUTO)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class ToolDecision:
|
|
44
|
+
allow: bool
|
|
45
|
+
reason: str = ""
|
|
46
|
+
ask: bool = False # policy abstains to an interactive prompt (resolved by PermissionHook)
|
|
47
|
+
# Does this block count toward the per-turn STUCK floor (loop.py STUCK_BLOCK_BUDGET)? True for a genuine
|
|
48
|
+
# SPIN (a repeated failing call, a policy denial the model keeps retrying); FALSE for a harmless dedup
|
|
49
|
+
# (re-reading the same file → the guard just skips it). So a long, legit exploration that re-reads a file
|
|
50
|
+
# a few times is NOT killed as "stuck" — only real spinning is.
|
|
51
|
+
counts_as_stuck: bool = True
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
ALLOW = ToolDecision(True)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class Hooks:
|
|
58
|
+
def before_step(self, step: int):
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
def record_step_usage(self, usage: dict):
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
def after_step(self, step: int, usage: dict, stop_reason: str):
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
def should_continue_after_stop(self, stop_reason: str):
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
def authorize_tool(self, name: str, args: dict) -> ToolDecision:
|
|
71
|
+
return ALLOW
|
|
72
|
+
|
|
73
|
+
def reset_for_turn(self):
|
|
74
|
+
"""Reset any per-turn state at the start of a user task (fires ONCE per turn,
|
|
75
|
+
not per step). Used by the guardrail to clear cross-step loop counters so they
|
|
76
|
+
do not bleed across tasks. No-op by default."""
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
# --- mutating seams (events can't mutate; these can) ---
|
|
80
|
+
def prepare_messages(self, messages: list[dict]):
|
|
81
|
+
"""Last chance to transform the model-visible messages before the LLM call
|
|
82
|
+
(e.g. inject context). Return new messages, or None to leave unchanged."""
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
def transform_tool_result(self, name: str, args: dict, output: str):
|
|
86
|
+
"""Rewrite a tool result before it enters the slice (e.g. redaction, formatting).
|
|
87
|
+
Return new output, or None to leave unchanged."""
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class CompositeHooks(Hooks):
|
|
92
|
+
"""Fan a single hook surface out over several hooks (first deny / any stop / any continue)."""
|
|
93
|
+
|
|
94
|
+
def __init__(self, *hooks: Hooks):
|
|
95
|
+
self.hooks = hooks
|
|
96
|
+
|
|
97
|
+
def before_step(self, step):
|
|
98
|
+
for h in self.hooks:
|
|
99
|
+
r = h.before_step(step)
|
|
100
|
+
if r and r.get("block"):
|
|
101
|
+
return r
|
|
102
|
+
return None
|
|
103
|
+
|
|
104
|
+
def record_step_usage(self, usage):
|
|
105
|
+
# materialize ALL results first — these callbacks have side effects (e.g. BudgetHook.spent +=), so a
|
|
106
|
+
# generator-fed any() that short-circuits on the first stop_turn would skip trailing hooks' observation.
|
|
107
|
+
flags = [(h.record_step_usage(usage) or {}).get("stop_turn") for h in self.hooks]
|
|
108
|
+
return {"stop_turn": True} if any(flags) else None
|
|
109
|
+
|
|
110
|
+
def after_step(self, step, usage, stop_reason):
|
|
111
|
+
flags = [(h.after_step(step, usage, stop_reason) or {}).get("stop_turn") for h in self.hooks]
|
|
112
|
+
return {"stop_turn": True} if any(flags) else None
|
|
113
|
+
|
|
114
|
+
def should_continue_after_stop(self, stop_reason):
|
|
115
|
+
for h in self.hooks:
|
|
116
|
+
r = h.should_continue_after_stop(stop_reason)
|
|
117
|
+
if r and r.get("continue"):
|
|
118
|
+
return r
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
def authorize_tool(self, name, args):
|
|
122
|
+
for h in self.hooks:
|
|
123
|
+
d = h.authorize_tool(name, args)
|
|
124
|
+
if not d.allow:
|
|
125
|
+
return d
|
|
126
|
+
return ALLOW
|
|
127
|
+
|
|
128
|
+
def prepare_messages(self, messages):
|
|
129
|
+
changed = False
|
|
130
|
+
for h in self.hooks:
|
|
131
|
+
r = h.prepare_messages(messages)
|
|
132
|
+
if r is not None:
|
|
133
|
+
messages, changed = r, True
|
|
134
|
+
return messages if changed else None
|
|
135
|
+
|
|
136
|
+
def transform_tool_result(self, name, args, output):
|
|
137
|
+
changed = False
|
|
138
|
+
for h in self.hooks:
|
|
139
|
+
r = h.transform_tool_result(name, args, output)
|
|
140
|
+
if r is not None:
|
|
141
|
+
output, changed = r, True
|
|
142
|
+
return output if changed else None
|
|
143
|
+
|
|
144
|
+
def reset_for_turn(self):
|
|
145
|
+
for h in self.hooks:
|
|
146
|
+
h.reset_for_turn()
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# --- concrete hooks ---
|
|
150
|
+
|
|
151
|
+
class OracleHook(Hooks):
|
|
152
|
+
"""Verification gate: when the model declares done, run an oracle (tests/lint).
|
|
153
|
+
If it fails, record the failure into the slice and force another turn."""
|
|
154
|
+
|
|
155
|
+
def __init__(self, oracle, on_feedback):
|
|
156
|
+
self.oracle = oracle
|
|
157
|
+
self.on_feedback = on_feedback # callable(output:str) -> records into the slice
|
|
158
|
+
|
|
159
|
+
def should_continue_after_stop(self, stop_reason):
|
|
160
|
+
if stop_reason != "end_turn":
|
|
161
|
+
return None
|
|
162
|
+
try:
|
|
163
|
+
ok, output = self.oracle.verify()
|
|
164
|
+
except Exception as e: # noqa: BLE001 — a verify ERROR must FORCE another turn, never silently pass the done-gate
|
|
165
|
+
ok, output = False, f"verification could not run: {type(e).__name__}: {e}"
|
|
166
|
+
if ok:
|
|
167
|
+
return None
|
|
168
|
+
self.on_feedback(output) # also record into the slice (for the NEXT turn's seed / durable cache)
|
|
169
|
+
# CRITICAL: the failure detail must ride the MESSAGE channel — under the accumulate loop the seed
|
|
170
|
+
# is built once and never re-rendered mid-turn, so a slice mutation (last_error) is invisible to
|
|
171
|
+
# THIS turn's retry. Put `output` in `feedback` so the loop appends it as the model's next input.
|
|
172
|
+
return {"continue": True, "feedback": f"Verification failed — fix this, then finish:\n{output}"}
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
_SELF_CHECK = (
|
|
176
|
+
"STOP — definition-of-done check (required). Before you finish, verify your work against "
|
|
177
|
+
"the task's REAL acceptance criteria:\n"
|
|
178
|
+
"1) List EVERY concrete requirement: start from your STANDING REQUIREMENTS contract if you have one "
|
|
179
|
+
"(each open '[ ]' item is binding), and ALSO re-read the task for anything not yet recorded — the exact "
|
|
180
|
+
"output file path(s), required fields/values/format, each distinct sub-task, and any 'do not change X'.\n"
|
|
181
|
+
"2) For EACH requirement, CONFIRM it against the ACTUAL end-state right now — run a command or read "
|
|
182
|
+
"the real file (do NOT trust your memory, a note, or a schema-shape check): the required output "
|
|
183
|
+
"exists at the EXACT path, its contents/values are correct, every sub-task is done, and you changed "
|
|
184
|
+
"nothing you were told to leave alone. Call requirement_done(...) on each contract item you confirm.\n"
|
|
185
|
+
"3) If anything is unmet or unverified, fix it and re-check. When a value must match something that "
|
|
186
|
+
"already exists (a file, a git object, expected output), COPY it exactly — do not retype it.\n"
|
|
187
|
+
"Finish only when ALL requirements are confirmed against the real end-state. If everything already "
|
|
188
|
+
"checks out, just say so and finish — do not make changes for their own sake."
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class SelfCheckHook(Hooks):
|
|
193
|
+
"""GROUNDED definition-of-done gate for AUTONOMOUS runs (no human to catch a premature 'done'). When the
|
|
194
|
+
model declares done, force a verification round: re-derive the task's real acceptance criteria and
|
|
195
|
+
CONFIRM each against the actual end-state by RUNNING tools (not asserting). Crucially it accepts 'done'
|
|
196
|
+
only once the model has actually done verification WORK (a tool step) since the gate fired — a bare
|
|
197
|
+
re-assertion of 'done' re-fires the gate. Bounded to `max_fires` rounds (env AGENT_SELFCHECK_MAX) so it
|
|
198
|
+
can never loop. Moat-safe: appends a message (the proven feedback channel) + observes tool activity; the
|
|
199
|
+
agent does the real work. The no-oracle cousin of OracleHook — the agent self-sources its acceptance
|
|
200
|
+
check instead of declaring done blind. (Targets the measured premature-stop losses: produced-no-output,
|
|
201
|
+
incomplete sweeps, symptom-not-root fixes — make it verify before it is allowed to finish.)"""
|
|
202
|
+
|
|
203
|
+
def __init__(self, max_fires: int = 3):
|
|
204
|
+
import os
|
|
205
|
+
try:
|
|
206
|
+
self._max = max(1, int(os.environ.get("AGENT_SELFCHECK_MAX") or max_fires))
|
|
207
|
+
except (TypeError, ValueError):
|
|
208
|
+
self._max = max(1, max_fires) # a non-numeric env value must not crash hook construction
|
|
209
|
+
self._fires = 0
|
|
210
|
+
self._acted = False # did the model run a tool since the gate last fired?
|
|
211
|
+
|
|
212
|
+
def reset_for_turn(self):
|
|
213
|
+
self._fires = 0
|
|
214
|
+
self._acted = False
|
|
215
|
+
|
|
216
|
+
def after_step(self, step: int, usage: dict, stop_reason: str):
|
|
217
|
+
if stop_reason == "tool_use": # the model actually ran verification/fix tools this round
|
|
218
|
+
self._acted = True
|
|
219
|
+
return None
|
|
220
|
+
|
|
221
|
+
def should_continue_after_stop(self, stop_reason):
|
|
222
|
+
if stop_reason != "end_turn":
|
|
223
|
+
return None
|
|
224
|
+
if self._fires > 0 and self._acted:
|
|
225
|
+
return None # verified-by-doing after a nudge → honest done, accept
|
|
226
|
+
if self._fires >= self._max:
|
|
227
|
+
return None # bounded → never loop
|
|
228
|
+
self._fires += 1
|
|
229
|
+
self._acted = False
|
|
230
|
+
return {"continue": True, "feedback": _SELF_CHECK}
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class PermissionHook(Hooks):
|
|
234
|
+
"""Gate tool execution. `policy(name, args) -> ToolDecision`.
|
|
235
|
+
|
|
236
|
+
When a policy returns `ask`, resolve it interactively via `on_ask(name, args, reason)
|
|
237
|
+
-> 'yes'|'no'|'always'` (the host supplies a TTY prompt). Non-interactive hosts
|
|
238
|
+
(on_ask=None) deny an `ask` — safe by default.
|
|
239
|
+
|
|
240
|
+
'always' memorizes a session approval — but keyed by the CALL, not the bare tool name
|
|
241
|
+
(rule patterns). Approving one shell command must NOT bless every shell command:
|
|
242
|
+
run_command/execute_code are remembered by their exact command/code; other tools (already
|
|
243
|
+
gated by policy) are remembered by name. `auto_approve` pre-seeds fnmatch rules matched
|
|
244
|
+
against the command (e.g. ["git status*", "ls *"]) so safe read-only commands never prompt."""
|
|
245
|
+
|
|
246
|
+
_CMD_TOOLS = ("run_command", "execute_code", "proc_start", "terminal_open", "terminal_send")
|
|
247
|
+
|
|
248
|
+
def __init__(self, policy, on_ask=None, auto_approve=None):
|
|
249
|
+
self.policy = policy
|
|
250
|
+
self.on_ask = on_ask
|
|
251
|
+
self._approved: set[str] = set() # exact approval keys (call patterns, not bare tool names)
|
|
252
|
+
self._rules: list[str] = list(auto_approve or []) # pre-seeded fnmatch globs over the command
|
|
253
|
+
|
|
254
|
+
@classmethod
|
|
255
|
+
def _key(cls, name: str, args: dict) -> str:
|
|
256
|
+
# command-SPECIFIC for the dangerous tools — approving `npm test` must not auto-allow `rm -rf`.
|
|
257
|
+
if name in cls._CMD_TOOLS:
|
|
258
|
+
return f"{name}:{(args.get('command') or args.get('code') or args.get('input') or '').strip()}"
|
|
259
|
+
return name # name-level for the rest (policy already gates them)
|
|
260
|
+
|
|
261
|
+
def _pre_allowed(self, name: str, args: dict, key: str) -> bool:
|
|
262
|
+
if key in self._approved:
|
|
263
|
+
return True
|
|
264
|
+
cmd = (args.get("command") or args.get("code") or args.get("input") or "").strip()
|
|
265
|
+
if cmd and self._rules:
|
|
266
|
+
import fnmatch
|
|
267
|
+
if any(fnmatch.fnmatch(cmd, rule) for rule in self._rules):
|
|
268
|
+
# A broad glob must NOT silently green-light a destructive command — fall through to ask.
|
|
269
|
+
return not _is_destructive_command(name, cmd)
|
|
270
|
+
return False
|
|
271
|
+
|
|
272
|
+
def authorize_tool(self, name, args):
|
|
273
|
+
d = self.policy(name, args)
|
|
274
|
+
if not d.ask:
|
|
275
|
+
return d
|
|
276
|
+
key = self._key(name, args)
|
|
277
|
+
if self._pre_allowed(name, args, key):
|
|
278
|
+
return ALLOW
|
|
279
|
+
if self.on_ask is None:
|
|
280
|
+
return ToolDecision(False, DENIAL_NO_PROMPT)
|
|
281
|
+
verdict = (self.on_ask(name, args, d.reason) or "no").lower()
|
|
282
|
+
if verdict == "always":
|
|
283
|
+
self._approved.add(key) # remember THIS call pattern, not the whole tool
|
|
284
|
+
return ALLOW
|
|
285
|
+
return ALLOW if verdict == "yes" else ToolDecision(False, DENIAL_USER)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
class BudgetHook(Hooks):
|
|
289
|
+
"""Stop the turn once cumulative tokens cross a ceiling."""
|
|
290
|
+
|
|
291
|
+
def __init__(self, max_total_tokens: int):
|
|
292
|
+
self.max = max_total_tokens
|
|
293
|
+
self.spent = 0
|
|
294
|
+
|
|
295
|
+
def reset_for_turn(self):
|
|
296
|
+
# PER-TURN budget: reset the tally at the start of each user task (run_turn calls this). Without
|
|
297
|
+
# this, the cap silently became a whole-SESSION budget across the REPL. A true
|
|
298
|
+
# session-wide cap, if ever wanted, should be a separate named hook — not this one.
|
|
299
|
+
self.spent = 0
|
|
300
|
+
|
|
301
|
+
def record_step_usage(self, usage):
|
|
302
|
+
self.spent += int(usage.get("prompt_tokens", 0)) + int(usage.get("completion_tokens", 0))
|
|
303
|
+
return {"stop_turn": True} if self.spent >= self.max else None
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
class GuardrailHook(Hooks):
|
|
307
|
+
"""Cross-step loop guard: block a tool call that repeats an identical failing call,
|
|
308
|
+
or an idempotent call that keeps making no progress. State is per-turn (cleared by
|
|
309
|
+
`reset_for_turn`), so counters never bleed across user tasks."""
|
|
310
|
+
|
|
311
|
+
def __init__(self, config=None):
|
|
312
|
+
self.guard = ToolCallGuardrail(config)
|
|
313
|
+
|
|
314
|
+
def reset_for_turn(self):
|
|
315
|
+
self.guard.reset_for_turn()
|
|
316
|
+
|
|
317
|
+
def authorize_tool(self, name, args):
|
|
318
|
+
d = self.guard.before_call(name, args)
|
|
319
|
+
if not d.block:
|
|
320
|
+
return ALLOW
|
|
321
|
+
# Only a HARD spin counts toward STUCK: a repeated FAILING call, or no-edit-progress (failing edits).
|
|
322
|
+
# A deduped idempotent/result no-progress read is harmless — block (skip) it but DON'T kill the turn,
|
|
323
|
+
# so a long exploration that re-reads a file isn't falsely flagged as stuck.
|
|
324
|
+
hard = d.code in ("repeated_exact_failure", "no_edit_progress")
|
|
325
|
+
return ToolDecision(False, d.message, counts_as_stuck=hard)
|
|
326
|
+
|
|
327
|
+
def transform_tool_result(self, name, args, output):
|
|
328
|
+
# NEVER feed a guardrail/policy BLOCK back into the counters: a blocked call never ran, so counting
|
|
329
|
+
# its synthetic "Error: blocked by policy:" result as a real failure would advance the failing /
|
|
330
|
+
# no-edit-progress axes and falsely escalate a harmless soft-block into a hard 'stuck' turn-kill.
|
|
331
|
+
if isinstance(output, str) and output.startswith("Error: blocked by policy:"):
|
|
332
|
+
return None
|
|
333
|
+
self.guard.after_call(name, args, output)
|
|
334
|
+
return None
|
sliceagent/interfaces.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""The contracts the core depends on — never the implementations.
|
|
2
|
+
|
|
3
|
+
The moat (loop + tiers) talks only to these. Everything commodity (LLM I/O,
|
|
4
|
+
retrieval, tool execution/sandbox, verification) lives behind them and is swappable.
|
|
5
|
+
Policy (Oracle/permissions/budget) is supplied via hooks.py.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Protocol, runtime_checkable
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class ToolCall:
|
|
15
|
+
id: str
|
|
16
|
+
name: str
|
|
17
|
+
args: dict
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class AssistantMessage:
|
|
22
|
+
content: str | None
|
|
23
|
+
tool_calls: list[ToolCall] = field(default_factory=list)
|
|
24
|
+
usage: dict | None = None # {"prompt_tokens": int, "completion_tokens": int}
|
|
25
|
+
finish_reason: str | None = None # provider's raw finish reason → normalized by the loop
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class Snippet:
|
|
30
|
+
path: str
|
|
31
|
+
text: str
|
|
32
|
+
score: float = 0.0
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class PageRef:
|
|
37
|
+
"""A bounded reference to one PAGE the PageTable can surface into the slice — the unified shape
|
|
38
|
+
every read/retrieval backend (code map, project-notes, cross-session episodes) returns from
|
|
39
|
+
PageTable.lookup(). Carries RAW text (`preview`); the renderer fences it (wrap_untrusted) so
|
|
40
|
+
injection-fencing stays at ONE layer. `handle` locates the page (a repo-map marker, a subtree
|
|
41
|
+
path, a session·turn locator); `untrusted` flags re-injected external content (default True)."""
|
|
42
|
+
handle: str
|
|
43
|
+
kind: str
|
|
44
|
+
preview: str
|
|
45
|
+
score: float = 0.0
|
|
46
|
+
untrusted: bool = True
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class TaskRef:
|
|
51
|
+
"""A bounded index row for the OTHER OPEN THREADS tier (Step 3)."""
|
|
52
|
+
task_id: str
|
|
53
|
+
title: str
|
|
54
|
+
status: str # active | parked | done | abandoned
|
|
55
|
+
updated: str = ""
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class TaskState:
|
|
60
|
+
"""Resumable, distilled state for one task = the serializable Slice fields. Stores REFS
|
|
61
|
+
(file paths + anchors), never file contents — ground truth is re-read from disk on resume.
|
|
62
|
+
Transient tiers (recent, action_log, active_skills) are intentionally NOT serialized."""
|
|
63
|
+
task_id: str
|
|
64
|
+
session_id: str = ""
|
|
65
|
+
title: str = ""
|
|
66
|
+
status: str = "active"
|
|
67
|
+
goal: str = ""
|
|
68
|
+
findings: list[str] = field(default_factory=list)
|
|
69
|
+
finding_source: dict[str, str] = field(default_factory=dict) # finding -> provenance tier (carried; else resume upgrades 'claim'→'tool-note')
|
|
70
|
+
requirements: list[dict] = field(default_factory=list) # STANDING REQUIREMENTS contract (carried)
|
|
71
|
+
plan: list[dict] = field(default_factory=list) # PLAN / TodoWrite steps + status (carried)
|
|
72
|
+
mission: str = "" # MISSION north-star objective (carried)
|
|
73
|
+
open_report: str = "" # OPEN USER REPORT blocker (carried; the "it's broken" push-back must survive resume)
|
|
74
|
+
world: dict = field(default_factory=dict) # agent WORLD MODEL (carried; was dropped on resume)
|
|
75
|
+
active_files: list[str] = field(default_factory=list)
|
|
76
|
+
edited_files: list[str] = field(default_factory=list) # list on the wire; a set in the Slice
|
|
77
|
+
edit_anchor: dict[str, str] = field(default_factory=dict)
|
|
78
|
+
last_error: str = ""
|
|
79
|
+
since_edit: int = 0
|
|
80
|
+
links: list[str] = field(default_factory=list) # task-graph edges (Step 3)
|
|
81
|
+
tags: str = "" # comma-joined (matches remember()/_tags)
|
|
82
|
+
resolution: str = ""
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@runtime_checkable
|
|
86
|
+
class LLMClient(Protocol):
|
|
87
|
+
"""Provider-agnostic completion + tool-calling. (implemented over an official LLM SDK)
|
|
88
|
+
May optionally expose `is_retryable(error) -> bool` for the retry policy."""
|
|
89
|
+
def complete(self, messages: list[dict], tools: list[dict]) -> AssistantMessage: ...
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@runtime_checkable
|
|
93
|
+
class ToolHost(Protocol):
|
|
94
|
+
"""Executes tools, ideally behind a sandbox. (backed by a container sandbox + MCP tools)"""
|
|
95
|
+
def schemas(self) -> list[dict]: ...
|
|
96
|
+
def run(self, name: str, args: dict) -> str: ...
|
|
97
|
+
def read_text(self, path: str) -> str: ... # reconstruct the artifacts tier (raises if missing)
|
|
98
|
+
def accesses(self, name: str, args: dict) -> list: ... # resource accesses for the scheduler
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@runtime_checkable
|
|
102
|
+
class Retriever(Protocol):
|
|
103
|
+
"""Code discovery for the RELATED CODE tier (repo search). (build: ripgrep + tree-sitter)"""
|
|
104
|
+
def retrieve(self, query: str, k: int = 6) -> list[Snippet]: ...
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@runtime_checkable
|
|
108
|
+
class Memory(Protocol):
|
|
109
|
+
"""Cross-session memory + the durable STATE VAULT (episodic cache, task-state, lessons).
|
|
110
|
+
Distinct from Retriever (memem indexes a curated vault, NOT source code). `is_durable` is the
|
|
111
|
+
structural no-op marker: NullMemory sets it False so hosts skip cache/checkpoint wiring (keeps
|
|
112
|
+
evals deterministic). The full surface is frozen here; implementations land incrementally.
|
|
113
|
+
NOTE: @runtime_checkable isinstance() verifies method-NAME presence only — not signatures or
|
|
114
|
+
return types; behavioral fidelity is enforced by the round-trip tests."""
|
|
115
|
+
is_durable: bool
|
|
116
|
+
# --- long-term lessons (exists) ---
|
|
117
|
+
def recall(self, query: str, k: int = 6, paths: list[str] | None = None) -> list[Snippet]: ...
|
|
118
|
+
def remember(self, content: str, *, title: str = "", scope: str = "default", tags: str = "",
|
|
119
|
+
paths: list[str] | None = None) -> None: ...
|
|
120
|
+
# --- episodic cache (lossless; never recalled into the LLM context) ---
|
|
121
|
+
def append_episode(self, session_id: str, task_id: str, turn: int, record: dict) -> None: ...
|
|
122
|
+
# read side: the model's on-demand valve into the cold cache (recall_history tool). Returns
|
|
123
|
+
# raw line dicts ({v,session_id,task_id,turn,ts,record}); the host renders/bounds them.
|
|
124
|
+
def read_episodes(self, session_id: str, *, limit: int | None = None) -> list[dict]: ...
|
|
125
|
+
# cross-session FTS5 discovery over the durable episode index (item 12; additive).
|
|
126
|
+
# Returns bounded hit dicts; [] when the index is unavailable. Single-session reads use
|
|
127
|
+
# read_episodes; this is the ACROSS-sessions counterpart.
|
|
128
|
+
def search_episodes(self, query: str, *, limit: int = 5, exclude_session: str | None = None,
|
|
129
|
+
only_session: str | None = None) -> list[dict]: ...
|
|
130
|
+
# --- task state / resume ---
|
|
131
|
+
def checkpoint_task(self, task: TaskState) -> None: ...
|
|
132
|
+
def load_task(self, task_id: str) -> TaskState | None: ...
|
|
133
|
+
def list_session_tasks(self, session_id: str) -> list[TaskRef]: ...
|
|
134
|
+
# --- consolidation / retrieval-feedback (declared now; implemented in later steps) ---
|
|
135
|
+
def mark_used(self, memory_id: str) -> None: ...
|
|
136
|
+
# llm = the abstract LLMClient contract (llm-agnostic — never a concrete provider type); returns a
|
|
137
|
+
# stats dict {lessons, skills, skills_rejected, errors} so callers report the truth, not a blind success.
|
|
138
|
+
def consolidate(self, session_id: str, *, llm=None, mode: str = "deterministic") -> dict: ...
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@runtime_checkable
|
|
142
|
+
class Oracle(Protocol):
|
|
143
|
+
"""Ground-truth verification independent of retrieval. (backed by the project's test/lint runners)"""
|
|
144
|
+
def verify(self) -> tuple[bool, str]: ...
|