sliceagent 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sliceagent/__init__.py +3 -0
- sliceagent/__main__.py +6 -0
- sliceagent/access.py +93 -0
- sliceagent/agents.py +173 -0
- sliceagent/background_review.py +146 -0
- sliceagent/binsniff.py +89 -0
- sliceagent/cli.py +890 -0
- sliceagent/clock.py +32 -0
- sliceagent/code_grep.py +329 -0
- sliceagent/code_index.py +417 -0
- sliceagent/config.py +240 -0
- sliceagent/context_overflow.py +227 -0
- sliceagent/envspec.py +129 -0
- sliceagent/errors.py +167 -0
- sliceagent/events.py +96 -0
- sliceagent/finding_types.py +70 -0
- sliceagent/flags.py +63 -0
- sliceagent/fuzzy.py +135 -0
- sliceagent/guardrails.py +438 -0
- sliceagent/guidance.py +69 -0
- sliceagent/hippocampus.py +581 -0
- sliceagent/hooks.py +334 -0
- sliceagent/interfaces.py +144 -0
- sliceagent/llm.py +695 -0
- sliceagent/loop.py +548 -0
- sliceagent/mcp_client.py +255 -0
- sliceagent/mcp_security.py +77 -0
- sliceagent/memory.py +428 -0
- sliceagent/metrics.py +103 -0
- sliceagent/model_catalog.py +124 -0
- sliceagent/monitor.py +615 -0
- sliceagent/neocortex.py +436 -0
- sliceagent/onboarding.py +323 -0
- sliceagent/oracle.py +36 -0
- sliceagent/pagetable.py +255 -0
- sliceagent/pfc.py +449 -0
- sliceagent/plugins.py +127 -0
- sliceagent/policy.py +234 -0
- sliceagent/procman.py +187 -0
- sliceagent/prompt.py +239 -0
- sliceagent/records.py +108 -0
- sliceagent/recovery.py +119 -0
- sliceagent/regions.py +678 -0
- sliceagent/registry.py +128 -0
- sliceagent/retriever.py +19 -0
- sliceagent/safety.py +332 -0
- sliceagent/sandbox.py +143 -0
- sliceagent/scheduler.py +92 -0
- sliceagent/search_index.py +289 -0
- sliceagent/seed.py +465 -0
- sliceagent/sensory_cortex.py +500 -0
- sliceagent/session.py +222 -0
- sliceagent/skill_provenance.py +71 -0
- sliceagent/skill_usage.py +123 -0
- sliceagent/skills.py +209 -0
- sliceagent/subagent.py +332 -0
- sliceagent/subdir_hints.py +222 -0
- sliceagent/swap.py +182 -0
- sliceagent/taskstate.py +57 -0
- sliceagent/telemetry.py +59 -0
- sliceagent/terminal.py +240 -0
- sliceagent/text_utils.py +56 -0
- sliceagent/tool_summary.py +93 -0
- sliceagent/tools.py +1194 -0
- sliceagent/tui.py +1377 -0
- sliceagent/web.py +354 -0
- sliceagent-0.1.0.dist-info/METADATA +262 -0
- sliceagent-0.1.0.dist-info/RECORD +71 -0
- sliceagent-0.1.0.dist-info/WHEEL +4 -0
- sliceagent-0.1.0.dist-info/entry_points.txt +2 -0
- sliceagent-0.1.0.dist-info/licenses/LICENSE +21 -0
sliceagent/guardrails.py
ADDED
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
"""Per-turn tool-call loop guardrail — the slice's anti-loop defense.
|
|
2
|
+
|
|
3
|
+
The ToolCallSignature / ToolCallGuardrailController design, adapted to sliceagent's
|
|
4
|
+
no-transcript invariant.
|
|
5
|
+
|
|
6
|
+
WHY THIS IS MOAT-CRITICAL
|
|
7
|
+
-------------------------
|
|
8
|
+
The active memory slice ERASES the model's memory of prior identical failed calls:
|
|
9
|
+
each turn is reconstructed fresh, so the model cannot "remember" that it already ran
|
|
10
|
+
the exact same failing command three steps ago. The slice's REPEATED/FAILING tier
|
|
11
|
+
mitigates this softly (it tells the model in prose), but a model can still ignore it.
|
|
12
|
+
This controller is the HARD floor: it counts, per turn, every (tool, canonical-args)
|
|
13
|
+
signature and BLOCKS a call once it has failed N times unchanged, or once a read-only
|
|
14
|
+
call has returned the same result N times with no progress. Because the model has no
|
|
15
|
+
transcript memory of the failure, the blocked-call message is ACTION-ORIENTED: it tells
|
|
16
|
+
the model what to do INSTEAD (the failure context the slice can't carry for it).
|
|
17
|
+
|
|
18
|
+
NO-TRANSCRIPT INVARIANT
|
|
19
|
+
-----------------------
|
|
20
|
+
State lives ONLY in this controller for the duration of ONE turn (reset_for_turn at the
|
|
21
|
+
top of run_turn). It feeds NO durable store and assumes NO growing message history. The
|
|
22
|
+
block decision becomes a synthetic tool RESULT (which the slice folds into its tiers like
|
|
23
|
+
any other result) — never a message appended to a transcript.
|
|
24
|
+
|
|
25
|
+
SLICEAGENT CONVENTIONS
|
|
26
|
+
--------------------
|
|
27
|
+
- "failing" is sliceagent's existing convention (loop.py / regions.record_action /
|
|
28
|
+
neocortex.py): out.startswith("Error") or out.startswith("Exit code"). We do NOT use a
|
|
29
|
+
JSON exit-code classifier (sliceagent has no safe_json_loads and a different result
|
|
30
|
+
shape). Callers may pass `failed=` explicitly; otherwise we classify here.
|
|
31
|
+
- The idempotent / mutating tool sets are sliceagent's actual builtins.
|
|
32
|
+
"""
|
|
33
|
+
from __future__ import annotations
|
|
34
|
+
|
|
35
|
+
import hashlib
|
|
36
|
+
import json
|
|
37
|
+
from dataclasses import dataclass, field
|
|
38
|
+
from typing import Any, Mapping
|
|
39
|
+
|
|
40
|
+
# sliceagent's read-only (idempotent) builtins. Repeating one of these with the SAME args
|
|
41
|
+
# and getting the SAME result is "no progress" — a soft loop the slice can't see through.
|
|
42
|
+
IDEMPOTENT_TOOL_NAMES = frozenset({"read_file", "list_files", "recall_history"})
|
|
43
|
+
|
|
44
|
+
# sliceagent's mutating builtins (+ topic/skill routing). A mutating tool is never treated
|
|
45
|
+
# as idempotent (its repeated identical RESULT is not a no-progress signal — only its
|
|
46
|
+
# repeated identical FAILURE is).
|
|
47
|
+
MUTATING_TOOL_NAMES = frozenset({
|
|
48
|
+
"edit_file", "append_to_file", "str_replace",
|
|
49
|
+
"run_command", "execute_code",
|
|
50
|
+
"terminal_open", "terminal_send", "proc_start", # live-process EXEC tools also change state → count toward the no-edit floor
|
|
51
|
+
"new_topic", "switch_topic", "skill",
|
|
52
|
+
})
|
|
53
|
+
|
|
54
|
+
# result_no_progress must never block the EDIT/ask actions its own message tells the model to take (else the
|
|
55
|
+
# axis is an inescapable trap). Command/read repeats ARE still blocked — only these genuine-progress escapes
|
|
56
|
+
# are exempt. (Narrower than MUTATING_TOOL_NAMES: a run_command/execute_code loop is still a no-progress loop.)
|
|
57
|
+
_RESULT_AXIS_ESCAPE = frozenset({"edit_file", "append_to_file", "str_replace", "write_file", "ask_user"})
|
|
58
|
+
|
|
59
|
+
# Known NON-mutating (read/search) tools. The no-progress streak treats a tool as a potential MUTATOR
|
|
60
|
+
# unless it is in here — so unknown plugin/MCP tools AND mutating builtins missing from the static set
|
|
61
|
+
# above (world_set, terminal_*, proc_*, update_plan, …) still drive loop detection (pessimistic).
|
|
62
|
+
_NON_MUTATORS = IDEMPOTENT_TOOL_NAMES | frozenset({"grep", "glob", "ask_user"})
|
|
63
|
+
|
|
64
|
+
# Same-step exact-call dedup eligibility: pure read-only QUERY tools whose identical re-execution within
|
|
65
|
+
# ONE LLM step yields a byte-identical, side-effect-free result, so a duplicate can reuse the first call's
|
|
66
|
+
# output instead of running the tool twice (lossless). Excludes ask_user (a real interaction even when the
|
|
67
|
+
# prompt repeats) and every mutating/unknown/plugin/MCP tool — those are never deduped, since a repeat may
|
|
68
|
+
# be intentional or carry side effects.
|
|
69
|
+
DEDUP_SAFE_TOOL_NAMES = IDEMPOTENT_TOOL_NAMES | frozenset({"grep", "glob"})
|
|
70
|
+
|
|
71
|
+
# sliceagent's failing convention — kept in one place for plain-string fallback.
|
|
72
|
+
# Structured tool results (ToolText) carry `.ok`; guardrail accounting must respect
|
|
73
|
+
# that before prose matching so it agrees with loop.run_tool_batch.
|
|
74
|
+
_FAIL_PREFIXES = ("Error", "Exit code")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def is_failing_output(output: str | None) -> bool:
|
|
78
|
+
"""Return whether a tool result failed.
|
|
79
|
+
|
|
80
|
+
Prefer a structured `.ok` flag when present (ToolText and compatible plugin
|
|
81
|
+
results); fall back to sliceagent's historical prose-prefix convention for
|
|
82
|
+
plain strings.
|
|
83
|
+
"""
|
|
84
|
+
ok = getattr(output, "ok", None)
|
|
85
|
+
if ok is not None:
|
|
86
|
+
return not bool(ok)
|
|
87
|
+
return bool(output) and (output.startswith("Error") or output.startswith("Exit code"))
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass(frozen=True)
|
|
91
|
+
class ToolCallGuardrailConfig:
|
|
92
|
+
"""Per-turn (= per-episode) loop-detection thresholds.
|
|
93
|
+
|
|
94
|
+
Defaults: HARD-BLOCK is ON (rather than warn-only).
|
|
95
|
+
sliceagent is uniquely loop-prone because the slice erases failure memory, so the block
|
|
96
|
+
floor must be active by default. Thresholds are intentionally low — by the time the same
|
|
97
|
+
exact call has failed `exact_failure_block_after` times, the model is in a loop the slice
|
|
98
|
+
cannot break on its own.
|
|
99
|
+
|
|
100
|
+
I3 — RESULT axis. The exact-(tool,args) axis above misses the live failure mode: the agent
|
|
101
|
+
looped ~13× re-inspecting the same directory via DISTINCT command text (`ls X`, `ls -la X`,
|
|
102
|
+
an `execute_code` listing) — every call a unique arg signature at count 1, so nothing ever
|
|
103
|
+
blocked across a 411k-token spin (GR1/2/3). The RESULT axis is tool-AGNOSTIC: it keys on the
|
|
104
|
+
OUTPUT hash across ALL tools (incl. run_command/execute_code), so semantically-redundant calls
|
|
105
|
+
with different text collapse to one progress signature. A repeated identical RESULT — even from
|
|
106
|
+
a 'mutating' tool — means the action is not changing observable state: a no-progress loop.
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
exact_failure_block_after: int = 3 # same (tool,args) FAILED this many times → block
|
|
110
|
+
no_progress_block_after: int = 3 # idempotent call returned SAME result this many times → block
|
|
111
|
+
# I3 RESULT axis — tool-agnostic, keyed on the OUTPUT not the args.
|
|
112
|
+
result_repeat_block_after: int = 4 # SAME result_hash recurring this many times across ANY tools → soft block
|
|
113
|
+
no_edit_mutations_before_warn: int = 6 # this many mutating attempts with NO successful edit → soft warn
|
|
114
|
+
call_budget_warn_after: int = 18 # this many tool calls with NO successful change landing → soft stop (floor)
|
|
115
|
+
trajectory_ring_cap: int = 20 # bounded per-episode ring of (op_kind, result_hash) progress signatures
|
|
116
|
+
idempotent_tools: frozenset[str] = field(default_factory=lambda: IDEMPOTENT_TOOL_NAMES)
|
|
117
|
+
mutating_tools: frozenset[str] = field(default_factory=lambda: MUTATING_TOOL_NAMES)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@dataclass(frozen=True)
|
|
121
|
+
class ToolCallSignature:
|
|
122
|
+
"""Stable, non-reversible identity for a tool name + canonical args (no raw values)."""
|
|
123
|
+
|
|
124
|
+
tool_name: str
|
|
125
|
+
args_hash: str
|
|
126
|
+
|
|
127
|
+
@classmethod
|
|
128
|
+
def from_call(cls, tool_name: str, args: Mapping[str, Any] | None) -> "ToolCallSignature":
|
|
129
|
+
return cls(tool_name=tool_name, args_hash=_sha256(canonical_tool_args(args or {})))
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@dataclass(frozen=True)
|
|
133
|
+
class GuardrailDecision:
|
|
134
|
+
"""What the controller decided for one call. `block` is the only actionable field for
|
|
135
|
+
the hook: when True, authorize_tool denies and surfaces `message` to the model."""
|
|
136
|
+
|
|
137
|
+
block: bool = False
|
|
138
|
+
# allow | repeated_exact_failure | idempotent_no_progress | result_no_progress | no_edit_progress
|
|
139
|
+
code: str = "allow"
|
|
140
|
+
message: str = ""
|
|
141
|
+
tool_name: str = ""
|
|
142
|
+
count: int = 0
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# I3 — COARSE op_kind. The trajectory ring stores a tool-AGNOSTIC operation class per step (not the
|
|
146
|
+
# raw tool name), so different tools doing the same kind of thing share a signature. Task-agnostic:
|
|
147
|
+
# only the builtin tool TAXONOMY, never command/argument parsing.
|
|
148
|
+
def op_kind(tool_name: str) -> str:
|
|
149
|
+
"""Map a tool name to a coarse operation class for the trajectory ring."""
|
|
150
|
+
if tool_name in ("edit_file", "append_to_file", "str_replace"):
|
|
151
|
+
return "edit"
|
|
152
|
+
if tool_name in ("read_file",):
|
|
153
|
+
return "read"
|
|
154
|
+
if tool_name in ("list_files",):
|
|
155
|
+
return "list"
|
|
156
|
+
if tool_name in ("run_command", "execute_code"):
|
|
157
|
+
return "exec"
|
|
158
|
+
if tool_name in ("new_topic", "switch_topic", "skill", "recall_history"):
|
|
159
|
+
return "meta"
|
|
160
|
+
return "other"
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def canonical_tool_args(args: Mapping[str, Any]) -> str:
|
|
164
|
+
"""Sorted compact JSON of the args, with the 'note' findings-arg STRIPPED.
|
|
165
|
+
|
|
166
|
+
The 'note' arg is the model's per-turn distilled conclusion (tools.with_note); it rides
|
|
167
|
+
on every call and changes turn-to-turn, so including it would make every signature unique
|
|
168
|
+
and HIDE loops. Stripping it is the sliceagent-specific fix — the canonical identity is the
|
|
169
|
+
real action (path/command/code), not the commentary attached to it.
|
|
170
|
+
"""
|
|
171
|
+
if not isinstance(args, Mapping):
|
|
172
|
+
raise TypeError(f"tool args must be a mapping, got {type(args).__name__}")
|
|
173
|
+
filtered = {k: v for k, v in args.items() if k != "note"}
|
|
174
|
+
return json.dumps(filtered, ensure_ascii=False, sort_keys=True, separators=(",", ":"), default=str)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
class ToolCallGuardrail:
|
|
178
|
+
"""Per-turn controller. `reset_for_turn()` at the top of each turn; `before_call()` in
|
|
179
|
+
authorize_tool (returns a block decision); `after_call()` in transform_tool_result
|
|
180
|
+
(counts the result). Side-effect free except for its own per-turn counters."""
|
|
181
|
+
|
|
182
|
+
def __init__(self, config: ToolCallGuardrailConfig | None = None):
|
|
183
|
+
self.config = config or ToolCallGuardrailConfig()
|
|
184
|
+
self.reset_for_turn()
|
|
185
|
+
|
|
186
|
+
def reset_for_turn(self) -> None:
|
|
187
|
+
"""Drop all per-turn (= per-EPISODE) counters. MUST be called at the start of every turn
|
|
188
|
+
(run_turn), so a fresh user task never inherits the prior task's loop counts. Every field
|
|
189
|
+
here is BOUNDED — the result-axis ring is capped at trajectory_ring_cap, never a transcript."""
|
|
190
|
+
self._exact_failure_counts: dict[ToolCallSignature, int] = {}
|
|
191
|
+
# signature -> (result_hash, repeat_count) for idempotent no-progress detection
|
|
192
|
+
self._no_progress: dict[ToolCallSignature, tuple[str, int]] = {}
|
|
193
|
+
# I3 RESULT axis — a BOUNDED per-episode ring of progress signatures (op_kind, result_hash),
|
|
194
|
+
# last `trajectory_ring_cap` steps. NOT the transcript: fixed-length, op-class + hash only (no
|
|
195
|
+
# args, no output text). Drives result-repeat detection across ANY tool (incl. shell).
|
|
196
|
+
self._trajectory: list[tuple[str, str]] = []
|
|
197
|
+
# result_hash -> count, derived from the ring (also bounded by the ring's distinct entries).
|
|
198
|
+
self._result_counts: dict[str, int] = {}
|
|
199
|
+
# mutating-attempt streak with no successful edit (the "act or stop" no-progress floor for edits).
|
|
200
|
+
self._mutations_since_edit: int = 0
|
|
201
|
+
# total tool calls since the last successful change landed — the coarse per-turn budget floor.
|
|
202
|
+
self._calls_since_edit: int = 0
|
|
203
|
+
|
|
204
|
+
def _is_idempotent(self, tool_name: str) -> bool:
|
|
205
|
+
if tool_name in self.config.mutating_tools:
|
|
206
|
+
return False
|
|
207
|
+
return tool_name in self.config.idempotent_tools
|
|
208
|
+
|
|
209
|
+
def before_call(self, tool_name: str, args: Mapping[str, Any] | None) -> GuardrailDecision:
|
|
210
|
+
"""Decide whether to BLOCK this call, based on counts from prior calls THIS turn.
|
|
211
|
+
Pure read of the counters — does not mutate them (after_call does the counting)."""
|
|
212
|
+
signature = ToolCallSignature.from_call(tool_name, _coerce_args(args))
|
|
213
|
+
|
|
214
|
+
exact_count = self._exact_failure_counts.get(signature, 0)
|
|
215
|
+
if exact_count >= self.config.exact_failure_block_after:
|
|
216
|
+
return GuardrailDecision(
|
|
217
|
+
block=True,
|
|
218
|
+
code="repeated_exact_failure",
|
|
219
|
+
message=(
|
|
220
|
+
f"Loop blocked: '{tool_name}' has already failed {exact_count} times this "
|
|
221
|
+
f"turn with these EXACT arguments — they are in the transcript above. Do NOT "
|
|
222
|
+
f"retry it unchanged. Read CURRENT ERROR and OPEN FILES, then either fix the "
|
|
223
|
+
f"root cause with a DIFFERENT call (different args/path/command, or a different "
|
|
224
|
+
f"tool), or, if the work is already complete, write the final summary and "
|
|
225
|
+
f"make NO tool call."
|
|
226
|
+
),
|
|
227
|
+
tool_name=tool_name,
|
|
228
|
+
count=exact_count,
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
if self._is_idempotent(tool_name):
|
|
232
|
+
record = self._no_progress.get(signature)
|
|
233
|
+
if record is not None and record[1] >= self.config.no_progress_block_after:
|
|
234
|
+
return GuardrailDecision(
|
|
235
|
+
block=True,
|
|
236
|
+
code="idempotent_no_progress",
|
|
237
|
+
message=(
|
|
238
|
+
f"Loop blocked: this read-only '{tool_name}' call has returned the SAME "
|
|
239
|
+
f"result {record[1]} times this turn. Repeating it cannot reveal anything "
|
|
240
|
+
f"new. Use the result already shown in OPEN FILES or the transcript above, "
|
|
241
|
+
f"or change the query/path. If you have what you need, act on it or write "
|
|
242
|
+
f"the final summary."
|
|
243
|
+
),
|
|
244
|
+
tool_name=tool_name,
|
|
245
|
+
count=record[1],
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
# I3 — no-edit-progress axis. The agent has made `no_edit_mutations_before_warn` consecutive
|
|
249
|
+
# FAILING mutating attempts this episode with nothing landing — it keeps trying to change the
|
|
250
|
+
# world and nothing sticks (e.g. a str_replace whose old_string never matches, an edit/run that
|
|
251
|
+
# keeps erroring). A SUCCESSFUL mutation resets the streak (so productive non-edit shell work is
|
|
252
|
+
# never penalized). Tool-agnostic over the mutating set; only fires on the next mutating attempt
|
|
253
|
+
# so a read/answer is never blocked. Soft "act or stop": stop hammering, read OPEN FILES, change.
|
|
254
|
+
if (self._mutations_since_edit >= self.config.no_edit_mutations_before_warn
|
|
255
|
+
and tool_name in self.config.mutating_tools):
|
|
256
|
+
return GuardrailDecision(
|
|
257
|
+
block=True,
|
|
258
|
+
code="no_edit_progress",
|
|
259
|
+
message=(
|
|
260
|
+
f"Loop blocked: your last {self._mutations_since_edit} mutating attempts this turn "
|
|
261
|
+
f"all FAILED with nothing landing — you are spinning. Re-read OPEN FILES to base "
|
|
262
|
+
f"the next change on the ACTUAL current contents (a str_replace must match the file "
|
|
263
|
+
f"verbatim), then make ONE precise edit with DIFFERENT arguments. If the work is "
|
|
264
|
+
f"already complete, write the final summary and make NO tool call."
|
|
265
|
+
),
|
|
266
|
+
tool_name=tool_name,
|
|
267
|
+
count=self._mutations_since_edit,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
# I3 RESULT axis (tool-AGNOSTIC) — the same OUTPUT has recurred ≥K times this episode across
|
|
271
|
+
# ANY tools (incl. run_command/execute_code). The agent is re-observing state that isn't
|
|
272
|
+
# changing — a no-progress loop the exact-(tool,args) axis cannot see (different command text,
|
|
273
|
+
# same result). Soft "you've seen this output N times — act or stop". Pure read of the ring.
|
|
274
|
+
top_hash, top_count = self._hottest_result()
|
|
275
|
+
# Block a re-observation repeat (incl. command loops with identical output — the real moat defense),
|
|
276
|
+
# but NEVER the EDIT/ask escape the message itself tells the model to take. Without this exemption the
|
|
277
|
+
# axis was an inescapable per-turn trap (blocked calls never advance the ring → permanent block →
|
|
278
|
+
# burns max_steps), blocking the very edit/ask that would make progress.
|
|
279
|
+
if (top_count >= self.config.result_repeat_block_after
|
|
280
|
+
and tool_name not in _RESULT_AXIS_ESCAPE):
|
|
281
|
+
return GuardrailDecision(
|
|
282
|
+
block=True,
|
|
283
|
+
code="result_no_progress",
|
|
284
|
+
message=(
|
|
285
|
+
f"Loop blocked: you have already seen this EXACT output {top_count} times this "
|
|
286
|
+
f"turn (across possibly different commands/tools) — the repeats are in the "
|
|
287
|
+
f"transcript above. Re-observing it cannot reveal anything new. Act on the "
|
|
288
|
+
f"result already in OPEN FILES with a DIFFERENT step, or — if the work is "
|
|
289
|
+
f"complete — write the final summary and make NO tool call."
|
|
290
|
+
),
|
|
291
|
+
tool_name=tool_name,
|
|
292
|
+
count=top_count,
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
# Coarse per-turn BUDGET floor (the plan's backstop): this many tool calls this episode with NO
|
|
296
|
+
# successful change landing means the agent is exploring in circles — the slice rebuilds each turn
|
|
297
|
+
# so it cannot see its own spin. A productive mutating call resets the budget, so real multi-step
|
|
298
|
+
# work is never throttled; only a pure read/failed-mutation spree trips it. Soft "act or answer".
|
|
299
|
+
if self._calls_since_edit >= self.config.call_budget_warn_after and tool_name not in _RESULT_AXIS_ESCAPE:
|
|
300
|
+
# same escape exemption as result_no_progress: never block the edit/ask the message recommends
|
|
301
|
+
# (a blocked call never runs → never resets the floor → otherwise an inescapable per-turn trap).
|
|
302
|
+
return GuardrailDecision(
|
|
303
|
+
block=True,
|
|
304
|
+
code="call_budget",
|
|
305
|
+
message=(
|
|
306
|
+
f"Loop blocked: {self._calls_since_edit} tool calls this turn with NO successful "
|
|
307
|
+
f"change landing — you are exploring in circles (the whole sequence is in the "
|
|
308
|
+
f"transcript above). Stop calling tools: act on what OPEN FILES already shows, "
|
|
309
|
+
f"or write your final summary/answer and make NO tool call."
|
|
310
|
+
),
|
|
311
|
+
tool_name=tool_name,
|
|
312
|
+
count=self._calls_since_edit,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
return GuardrailDecision(tool_name=tool_name)
|
|
316
|
+
|
|
317
|
+
def after_call(
|
|
318
|
+
self,
|
|
319
|
+
tool_name: str,
|
|
320
|
+
args: Mapping[str, Any] | None,
|
|
321
|
+
result: str | None,
|
|
322
|
+
*,
|
|
323
|
+
failed: bool | None = None,
|
|
324
|
+
) -> None:
|
|
325
|
+
"""Record one observed result into the per-turn counters. Called AFTER the tool ran.
|
|
326
|
+
No return value — counting only; blocking is before_call's job next time the signature
|
|
327
|
+
recurs."""
|
|
328
|
+
args = _coerce_args(args)
|
|
329
|
+
signature = ToolCallSignature.from_call(tool_name, args)
|
|
330
|
+
if failed is None:
|
|
331
|
+
failed = is_failing_output(result)
|
|
332
|
+
|
|
333
|
+
# I3 RESULT axis — record EVERY observed result into the bounded per-episode ring, regardless
|
|
334
|
+
# of tool or success. Tool-agnostic (keyed on op_kind + result_hash), so a loop of distinct
|
|
335
|
+
# commands returning the same output is visible. Bounded: the ring is capped, counts derived
|
|
336
|
+
# from it. We hash even failing results — a tool that fails the SAME way via different calls is
|
|
337
|
+
# still a no-progress loop.
|
|
338
|
+
kind = op_kind(tool_name)
|
|
339
|
+
result_hash = _sha256(result or "")
|
|
340
|
+
# An INFORMATION-FREE result (empty, or a "(… produced no output)" / "(no output)" sentinel) carries
|
|
341
|
+
# no observable state, so identical empties from DISTINCT successful silent commands (mkdir; touch;
|
|
342
|
+
# git add; chmod …) are NOT a re-observation loop — feeding them to the result-repeat ring would
|
|
343
|
+
# hard-block legitimate multi-step setup/build sequences at result_repeat_block_after. Skip them here;
|
|
344
|
+
# a genuine loop is still caught by the SIGNATURE axis (same call repeated) and the call-budget floor.
|
|
345
|
+
_r = (result or "").strip()
|
|
346
|
+
_info_free = (not _r) or _r.endswith("produced no output)") or _r in ("(no output)", "(empty)")
|
|
347
|
+
if not _info_free:
|
|
348
|
+
self._push_trajectory(kind, result_hash)
|
|
349
|
+
# Budget floor counts NON-PROGRESS calls only. Progress = a change that lands (the mutator branch
|
|
350
|
+
# below) OR a successful call that returns NEW information (a result not already in the recent ring).
|
|
351
|
+
# A distinct, successful read IS progress: analysis / review / debugging-by-reading legitimately
|
|
352
|
+
# never edit, and must not be strangled at call_budget_warn_after. Only a re-read of the SAME output
|
|
353
|
+
# or a FAILED call advances the floor — and genuine re-reads/repeats are already caught by the
|
|
354
|
+
# result/idempotent axes. This makes the floor task-AGNOSTIC: it fires on flailing, not on reading.
|
|
355
|
+
if (not failed) and self._result_counts.get(result_hash, 0) <= 1:
|
|
356
|
+
self._calls_since_edit = 0 # new information landed → reset the floor
|
|
357
|
+
else:
|
|
358
|
+
self._calls_since_edit += 1
|
|
359
|
+
|
|
360
|
+
# I3 no-edit axis — track mutating attempts that make NO observable progress. A mutating call
|
|
361
|
+
# that SUCCEEDS (an edit that lands, or a clean run/script that produced a non-error result) is
|
|
362
|
+
# progress → resets the streak; only a FAILING mutating attempt (a str_replace whose old_string
|
|
363
|
+
# never matches, an edit/run that errors) advances it. This targets the "trying to change the
|
|
364
|
+
# world and nothing sticks" loop WITHOUT penalizing productive non-edit shell work (running a
|
|
365
|
+
# build/test that passes is progress, not spinning). Tool-agnostic over the mutating set.
|
|
366
|
+
if tool_name not in _NON_MUTATORS: # pessimistic: unknown/plugin tools count as mutators too
|
|
367
|
+
if not failed:
|
|
368
|
+
self._mutations_since_edit = 0
|
|
369
|
+
self._calls_since_edit = 0 # a change landed → the budget floor resets
|
|
370
|
+
else:
|
|
371
|
+
self._mutations_since_edit += 1
|
|
372
|
+
|
|
373
|
+
if failed:
|
|
374
|
+
self._exact_failure_counts[signature] = self._exact_failure_counts.get(signature, 0) + 1
|
|
375
|
+
self._no_progress.pop(signature, None)
|
|
376
|
+
return
|
|
377
|
+
|
|
378
|
+
# success clears the exact-failure streak for this signature
|
|
379
|
+
self._exact_failure_counts.pop(signature, None)
|
|
380
|
+
|
|
381
|
+
if not self._is_idempotent(tool_name):
|
|
382
|
+
self._no_progress.pop(signature, None)
|
|
383
|
+
return
|
|
384
|
+
|
|
385
|
+
previous = self._no_progress.get(signature)
|
|
386
|
+
repeat = previous[1] + 1 if (previous is not None and previous[0] == result_hash) else 1
|
|
387
|
+
self._no_progress[signature] = (result_hash, repeat)
|
|
388
|
+
|
|
389
|
+
def _push_trajectory(self, kind: str, result_hash: str) -> None:
|
|
390
|
+
"""Append one (op_kind, result_hash) progress signature to the BOUNDED per-episode ring and
|
|
391
|
+
recompute the result-hash counts from it. The ring is capped at trajectory_ring_cap so it can
|
|
392
|
+
never grow into a transcript; counts are derived from the live ring (so a result that scrolls
|
|
393
|
+
out of the window stops counting — bounded memory, recent-window semantics)."""
|
|
394
|
+
cap = self.config.trajectory_ring_cap
|
|
395
|
+
self._trajectory.append((kind, result_hash))
|
|
396
|
+
if len(self._trajectory) > cap:
|
|
397
|
+
del self._trajectory[:-cap]
|
|
398
|
+
counts: dict[str, int] = {}
|
|
399
|
+
for _, h in self._trajectory:
|
|
400
|
+
counts[h] = counts.get(h, 0) + 1
|
|
401
|
+
self._result_counts = counts
|
|
402
|
+
|
|
403
|
+
def _hottest_result(self) -> tuple[str, int]:
|
|
404
|
+
"""The most-repeated result_hash in the current ring and its count (('', 0) when empty)."""
|
|
405
|
+
if not self._result_counts:
|
|
406
|
+
return ("", 0)
|
|
407
|
+
h = max(self._result_counts, key=self._result_counts.get)
|
|
408
|
+
return (h, self._result_counts[h])
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def guardrail_blocked_result(decision: GuardrailDecision) -> str:
|
|
412
|
+
"""The synthetic tool-result string surfaced when a call is blocked. Starts with 'Error'
|
|
413
|
+
so the slice's existing failing-detection (slice.record_action) tallies it as a failure —
|
|
414
|
+
keeping the block visible in the REPEATED/FAILING tier too."""
|
|
415
|
+
return f"Error: {decision.message}"
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def _coerce_args(args: Mapping[str, Any] | None) -> Mapping[str, Any]:
|
|
419
|
+
return args if isinstance(args, Mapping) else {}
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def _sha256(value: str) -> str:
|
|
423
|
+
return hashlib.sha256(value.encode("utf-8")).hexdigest()
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
__all__ = [
|
|
427
|
+
"ToolCallGuardrailConfig",
|
|
428
|
+
"ToolCallSignature",
|
|
429
|
+
"GuardrailDecision",
|
|
430
|
+
"ToolCallGuardrail",
|
|
431
|
+
"canonical_tool_args",
|
|
432
|
+
"is_failing_output",
|
|
433
|
+
"guardrail_blocked_result",
|
|
434
|
+
"op_kind",
|
|
435
|
+
"IDEMPOTENT_TOOL_NAMES",
|
|
436
|
+
"MUTATING_TOOL_NAMES",
|
|
437
|
+
"DEDUP_SAFE_TOOL_NAMES",
|
|
438
|
+
]
|
sliceagent/guidance.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Action-oriented guidance strings injected at denial / ceiling boundaries.
|
|
2
|
+
|
|
3
|
+
Strings and pure functions only — no state, no imports. The wording ("re-read before
|
|
4
|
+
retrying; don't repeat a stale call; after repeated failure switch approach"; "default
|
|
5
|
+
to taking action with tools"; "determine your next action") is tuned to be ACTIONABLE so
|
|
6
|
+
the model changes approach instead of spinning on the identical call.
|
|
7
|
+
|
|
8
|
+
These land in a DURABLE tier, not a transcript:
|
|
9
|
+
- DENIAL_NO_PROMPT / DENIAL_USER flow through
|
|
10
|
+
`loop.run_tool_batch` -> `Error: blocked by policy: <reason>` -> `ToolResult`
|
|
11
|
+
-> `slice.record_action` -> `s.last_error` (CURRENT ERROR), re-derivable each
|
|
12
|
+
turn from the durable action record.
|
|
13
|
+
- BUDGET_EXHAUSTED(kind) is the message carried on a `TurnInterrupted` event.
|
|
14
|
+
|
|
15
|
+
No per-session/per-turn computation here: every value is a module-level constant or
|
|
16
|
+
a pure function of its argument, so the system prefix stays byte-stable (cache-warm).
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
# Permission was required but there is no interactive channel to ask the user.
|
|
20
|
+
# Action-oriented: do not re-issue the same blocked call; pick a different,
|
|
21
|
+
# permitted route or surface what you need from the user.
|
|
22
|
+
DENIAL_NO_PROMPT: str = (
|
|
23
|
+
"This call needs permission but no approval channel is available, so it was "
|
|
24
|
+
"blocked. Do NOT retry the identical call — it will be blocked again. Instead, "
|
|
25
|
+
"either accomplish the goal with a tool that does not require approval, or stop "
|
|
26
|
+
"and tell the user exactly which action you need them to authorize and why."
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# The user explicitly declined the call.
|
|
30
|
+
# Action-oriented: treat the denial as final for this exact call; change approach
|
|
31
|
+
# or ask what the user would prefer rather than re-issuing it.
|
|
32
|
+
DENIAL_USER: str = (
|
|
33
|
+
"The user declined this action. Do NOT retry the identical call — the answer is "
|
|
34
|
+
"no. Instead, take a different approach that respects that decision, or ask the "
|
|
35
|
+
"user how they would like to proceed before trying anything similar."
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Maps a budget kind to the concrete ceiling that was hit, so the message can name
|
|
39
|
+
# the right limit. Unknown kinds fall back to a generic "work budget".
|
|
40
|
+
_BUDGET_CEILINGS = {
|
|
41
|
+
"max_steps": "the maximum number of steps for this turn",
|
|
42
|
+
"token_budget": "the token budget for this turn",
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# The anti-spin floor: the turn was stopped after repeated loop-blocks (the agent kept hitting the
|
|
47
|
+
# same wall instead of asking). Carried on a TurnInterrupted("stuck") event → shown to the user, who
|
|
48
|
+
# regains control. The proactive path is ask_user; this is the backstop when the model won't self-stop.
|
|
49
|
+
STUCK: str = (
|
|
50
|
+
"Stopped: this turn hit the loop guard repeatedly without making progress. When you are unsure "
|
|
51
|
+
"or blocked, call ask_user with a concise question instead of retrying — asking is better than "
|
|
52
|
+
"spinning. Control is back with the user; clarify or rephrase the task to continue."
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def BUDGET_EXHAUSTED(kind: str) -> str:
|
|
57
|
+
"""Guidance for a hard ceiling hit (kind in {"max_steps", "token_budget"}).
|
|
58
|
+
|
|
59
|
+
Names the ceiling that was reached, then asks the model to wrap up usefully
|
|
60
|
+
instead of silently looping: summarize progress and give the single most
|
|
61
|
+
useful next action. Returns a stable string for a given ``kind``.
|
|
62
|
+
"""
|
|
63
|
+
ceiling = _BUDGET_CEILINGS.get(kind, "the work budget for this turn")
|
|
64
|
+
return (
|
|
65
|
+
f"You have reached {ceiling} and cannot continue this turn. "
|
|
66
|
+
"Do not silently retry or keep working past the limit. Instead, summarize "
|
|
67
|
+
"the progress you have made and state the single most useful next action so "
|
|
68
|
+
"the work can resume cleanly."
|
|
69
|
+
)
|