sliceagent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. sliceagent/__init__.py +3 -0
  2. sliceagent/__main__.py +6 -0
  3. sliceagent/access.py +93 -0
  4. sliceagent/agents.py +173 -0
  5. sliceagent/background_review.py +146 -0
  6. sliceagent/binsniff.py +89 -0
  7. sliceagent/cli.py +890 -0
  8. sliceagent/clock.py +32 -0
  9. sliceagent/code_grep.py +329 -0
  10. sliceagent/code_index.py +417 -0
  11. sliceagent/config.py +240 -0
  12. sliceagent/context_overflow.py +227 -0
  13. sliceagent/envspec.py +129 -0
  14. sliceagent/errors.py +167 -0
  15. sliceagent/events.py +96 -0
  16. sliceagent/finding_types.py +70 -0
  17. sliceagent/flags.py +63 -0
  18. sliceagent/fuzzy.py +135 -0
  19. sliceagent/guardrails.py +438 -0
  20. sliceagent/guidance.py +69 -0
  21. sliceagent/hippocampus.py +581 -0
  22. sliceagent/hooks.py +334 -0
  23. sliceagent/interfaces.py +144 -0
  24. sliceagent/llm.py +695 -0
  25. sliceagent/loop.py +548 -0
  26. sliceagent/mcp_client.py +255 -0
  27. sliceagent/mcp_security.py +77 -0
  28. sliceagent/memory.py +428 -0
  29. sliceagent/metrics.py +103 -0
  30. sliceagent/model_catalog.py +124 -0
  31. sliceagent/monitor.py +615 -0
  32. sliceagent/neocortex.py +436 -0
  33. sliceagent/onboarding.py +323 -0
  34. sliceagent/oracle.py +36 -0
  35. sliceagent/pagetable.py +255 -0
  36. sliceagent/pfc.py +449 -0
  37. sliceagent/plugins.py +127 -0
  38. sliceagent/policy.py +234 -0
  39. sliceagent/procman.py +187 -0
  40. sliceagent/prompt.py +239 -0
  41. sliceagent/records.py +108 -0
  42. sliceagent/recovery.py +119 -0
  43. sliceagent/regions.py +678 -0
  44. sliceagent/registry.py +128 -0
  45. sliceagent/retriever.py +19 -0
  46. sliceagent/safety.py +332 -0
  47. sliceagent/sandbox.py +143 -0
  48. sliceagent/scheduler.py +92 -0
  49. sliceagent/search_index.py +289 -0
  50. sliceagent/seed.py +465 -0
  51. sliceagent/sensory_cortex.py +500 -0
  52. sliceagent/session.py +222 -0
  53. sliceagent/skill_provenance.py +71 -0
  54. sliceagent/skill_usage.py +123 -0
  55. sliceagent/skills.py +209 -0
  56. sliceagent/subagent.py +332 -0
  57. sliceagent/subdir_hints.py +222 -0
  58. sliceagent/swap.py +182 -0
  59. sliceagent/taskstate.py +57 -0
  60. sliceagent/telemetry.py +59 -0
  61. sliceagent/terminal.py +240 -0
  62. sliceagent/text_utils.py +56 -0
  63. sliceagent/tool_summary.py +93 -0
  64. sliceagent/tools.py +1194 -0
  65. sliceagent/tui.py +1377 -0
  66. sliceagent/web.py +354 -0
  67. sliceagent-0.1.0.dist-info/METADATA +262 -0
  68. sliceagent-0.1.0.dist-info/RECORD +71 -0
  69. sliceagent-0.1.0.dist-info/WHEEL +4 -0
  70. sliceagent-0.1.0.dist-info/entry_points.txt +2 -0
  71. sliceagent-0.1.0.dist-info/licenses/LICENSE +21 -0
sliceagent/hooks.py ADDED
@@ -0,0 +1,334 @@
1
+ """Hooks: the policy seam. The loop calls these; the host supplies them.
2
+
3
+ This is how policy stays OUT of the moat: the Oracle, permission gate, and token
4
+ budget are all hooks, not hardcoded loop logic.
5
+
6
+ Hook return conventions (all optional, return None to no-op):
7
+ before_step(step) -> {"block": bool, "reason": str} | None
8
+ record_step_usage(usage) -> {"stop_turn": bool} | None
9
+ after_step(step, usage, stop_reason) -> {"stop_turn": bool} | None
10
+ should_continue_after_stop(stop) -> {"continue": bool} | None
11
+ authorize_tool(name, args) -> ToolDecision
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import re
16
+ from dataclasses import dataclass
17
+
18
+ from .guardrails import ToolCallGuardrail
19
+ from .guidance import DENIAL_NO_PROMPT, DENIAL_USER
20
+
21
+ # Commands a wide AGENT_AUTO_APPROVE glob (e.g. "git *") must NEVER silently approve: destructive ops that
22
+ # are not catastrophic (so the policy floor lets them through to ASK) yet discard work/data. These always
23
+ # fall through to a confirmation even when a glob matches. The catastrophic floor is screened too (below).
24
+ _DESTRUCTIVE_AUTO = [
25
+ re.compile(r"\bgit\b[^\n]*\b(reset|clean|checkout|restore|rebase|filter-branch)\b", re.I),
26
+ re.compile(r"\bgit\b[^\n]*\bbranch\b[^\n]*\s-D\b", re.I),
27
+ re.compile(r"\bgit\b[^\n]*\bstash\b[^\n]*\b(drop|clear)\b", re.I),
28
+ re.compile(r"\bgit\b[^\n]*\bpush\b[^\n]*(--force|--force-with-lease|\s-f\b)", re.I),
29
+ re.compile(r"\brm\b(?=[^|;&\n]*\s-[a-z]*r)", re.I), # any recursive rm
30
+ re.compile(r"\b(shred|mkfs|wipefs)\b", re.I),
31
+ ]
32
+
33
+
34
+ def _is_destructive_command(name: str, cmd: str) -> bool:
35
+ """True if `cmd` must never be silently auto-approved — catastrophic OR work-discarding."""
36
+ from . import policy # deferred: policy imports hooks, so import here to avoid a cycle at load
37
+ if policy.no_dangerous_commands(name, {"command": cmd}) is not None:
38
+ return True
39
+ return any(p.search(cmd) for p in _DESTRUCTIVE_AUTO)
40
+
41
+
42
+ @dataclass
43
+ class ToolDecision:
44
+ allow: bool
45
+ reason: str = ""
46
+ ask: bool = False # policy abstains to an interactive prompt (resolved by PermissionHook)
47
+ # Does this block count toward the per-turn STUCK floor (loop.py STUCK_BLOCK_BUDGET)? True for a genuine
48
+ # SPIN (a repeated failing call, a policy denial the model keeps retrying); FALSE for a harmless dedup
49
+ # (re-reading the same file → the guard just skips it). So a long, legit exploration that re-reads a file
50
+ # a few times is NOT killed as "stuck" — only real spinning is.
51
+ counts_as_stuck: bool = True
52
+
53
+
54
+ ALLOW = ToolDecision(True)
55
+
56
+
57
+ class Hooks:
58
+ def before_step(self, step: int):
59
+ return None
60
+
61
+ def record_step_usage(self, usage: dict):
62
+ return None
63
+
64
+ def after_step(self, step: int, usage: dict, stop_reason: str):
65
+ return None
66
+
67
+ def should_continue_after_stop(self, stop_reason: str):
68
+ return None
69
+
70
+ def authorize_tool(self, name: str, args: dict) -> ToolDecision:
71
+ return ALLOW
72
+
73
+ def reset_for_turn(self):
74
+ """Reset any per-turn state at the start of a user task (fires ONCE per turn,
75
+ not per step). Used by the guardrail to clear cross-step loop counters so they
76
+ do not bleed across tasks. No-op by default."""
77
+ return None
78
+
79
+ # --- mutating seams (events can't mutate; these can) ---
80
+ def prepare_messages(self, messages: list[dict]):
81
+ """Last chance to transform the model-visible messages before the LLM call
82
+ (e.g. inject context). Return new messages, or None to leave unchanged."""
83
+ return None
84
+
85
+ def transform_tool_result(self, name: str, args: dict, output: str):
86
+ """Rewrite a tool result before it enters the slice (e.g. redaction, formatting).
87
+ Return new output, or None to leave unchanged."""
88
+ return None
89
+
90
+
91
+ class CompositeHooks(Hooks):
92
+ """Fan a single hook surface out over several hooks (first deny / any stop / any continue)."""
93
+
94
+ def __init__(self, *hooks: Hooks):
95
+ self.hooks = hooks
96
+
97
+ def before_step(self, step):
98
+ for h in self.hooks:
99
+ r = h.before_step(step)
100
+ if r and r.get("block"):
101
+ return r
102
+ return None
103
+
104
+ def record_step_usage(self, usage):
105
+ # materialize ALL results first — these callbacks have side effects (e.g. BudgetHook.spent +=), so a
106
+ # generator-fed any() that short-circuits on the first stop_turn would skip trailing hooks' observation.
107
+ flags = [(h.record_step_usage(usage) or {}).get("stop_turn") for h in self.hooks]
108
+ return {"stop_turn": True} if any(flags) else None
109
+
110
+ def after_step(self, step, usage, stop_reason):
111
+ flags = [(h.after_step(step, usage, stop_reason) or {}).get("stop_turn") for h in self.hooks]
112
+ return {"stop_turn": True} if any(flags) else None
113
+
114
+ def should_continue_after_stop(self, stop_reason):
115
+ for h in self.hooks:
116
+ r = h.should_continue_after_stop(stop_reason)
117
+ if r and r.get("continue"):
118
+ return r
119
+ return None
120
+
121
+ def authorize_tool(self, name, args):
122
+ for h in self.hooks:
123
+ d = h.authorize_tool(name, args)
124
+ if not d.allow:
125
+ return d
126
+ return ALLOW
127
+
128
+ def prepare_messages(self, messages):
129
+ changed = False
130
+ for h in self.hooks:
131
+ r = h.prepare_messages(messages)
132
+ if r is not None:
133
+ messages, changed = r, True
134
+ return messages if changed else None
135
+
136
+ def transform_tool_result(self, name, args, output):
137
+ changed = False
138
+ for h in self.hooks:
139
+ r = h.transform_tool_result(name, args, output)
140
+ if r is not None:
141
+ output, changed = r, True
142
+ return output if changed else None
143
+
144
+ def reset_for_turn(self):
145
+ for h in self.hooks:
146
+ h.reset_for_turn()
147
+
148
+
149
+ # --- concrete hooks ---
150
+
151
+ class OracleHook(Hooks):
152
+ """Verification gate: when the model declares done, run an oracle (tests/lint).
153
+ If it fails, record the failure into the slice and force another turn."""
154
+
155
+ def __init__(self, oracle, on_feedback):
156
+ self.oracle = oracle
157
+ self.on_feedback = on_feedback # callable(output:str) -> records into the slice
158
+
159
+ def should_continue_after_stop(self, stop_reason):
160
+ if stop_reason != "end_turn":
161
+ return None
162
+ try:
163
+ ok, output = self.oracle.verify()
164
+ except Exception as e: # noqa: BLE001 — a verify ERROR must FORCE another turn, never silently pass the done-gate
165
+ ok, output = False, f"verification could not run: {type(e).__name__}: {e}"
166
+ if ok:
167
+ return None
168
+ self.on_feedback(output) # also record into the slice (for the NEXT turn's seed / durable cache)
169
+ # CRITICAL: the failure detail must ride the MESSAGE channel — under the accumulate loop the seed
170
+ # is built once and never re-rendered mid-turn, so a slice mutation (last_error) is invisible to
171
+ # THIS turn's retry. Put `output` in `feedback` so the loop appends it as the model's next input.
172
+ return {"continue": True, "feedback": f"Verification failed — fix this, then finish:\n{output}"}
173
+
174
+
175
+ _SELF_CHECK = (
176
+ "STOP — definition-of-done check (required). Before you finish, verify your work against "
177
+ "the task's REAL acceptance criteria:\n"
178
+ "1) List EVERY concrete requirement: start from your STANDING REQUIREMENTS contract if you have one "
179
+ "(each open '[ ]' item is binding), and ALSO re-read the task for anything not yet recorded — the exact "
180
+ "output file path(s), required fields/values/format, each distinct sub-task, and any 'do not change X'.\n"
181
+ "2) For EACH requirement, CONFIRM it against the ACTUAL end-state right now — run a command or read "
182
+ "the real file (do NOT trust your memory, a note, or a schema-shape check): the required output "
183
+ "exists at the EXACT path, its contents/values are correct, every sub-task is done, and you changed "
184
+ "nothing you were told to leave alone. Call requirement_done(...) on each contract item you confirm.\n"
185
+ "3) If anything is unmet or unverified, fix it and re-check. When a value must match something that "
186
+ "already exists (a file, a git object, expected output), COPY it exactly — do not retype it.\n"
187
+ "Finish only when ALL requirements are confirmed against the real end-state. If everything already "
188
+ "checks out, just say so and finish — do not make changes for their own sake."
189
+ )
190
+
191
+
192
+ class SelfCheckHook(Hooks):
193
+ """GROUNDED definition-of-done gate for AUTONOMOUS runs (no human to catch a premature 'done'). When the
194
+ model declares done, force a verification round: re-derive the task's real acceptance criteria and
195
+ CONFIRM each against the actual end-state by RUNNING tools (not asserting). Crucially it accepts 'done'
196
+ only once the model has actually done verification WORK (a tool step) since the gate fired — a bare
197
+ re-assertion of 'done' re-fires the gate. Bounded to `max_fires` rounds (env AGENT_SELFCHECK_MAX) so it
198
+ can never loop. Moat-safe: appends a message (the proven feedback channel) + observes tool activity; the
199
+ agent does the real work. The no-oracle cousin of OracleHook — the agent self-sources its acceptance
200
+ check instead of declaring done blind. (Targets the measured premature-stop losses: produced-no-output,
201
+ incomplete sweeps, symptom-not-root fixes — make it verify before it is allowed to finish.)"""
202
+
203
+ def __init__(self, max_fires: int = 3):
204
+ import os
205
+ try:
206
+ self._max = max(1, int(os.environ.get("AGENT_SELFCHECK_MAX") or max_fires))
207
+ except (TypeError, ValueError):
208
+ self._max = max(1, max_fires) # a non-numeric env value must not crash hook construction
209
+ self._fires = 0
210
+ self._acted = False # did the model run a tool since the gate last fired?
211
+
212
+ def reset_for_turn(self):
213
+ self._fires = 0
214
+ self._acted = False
215
+
216
+ def after_step(self, step: int, usage: dict, stop_reason: str):
217
+ if stop_reason == "tool_use": # the model actually ran verification/fix tools this round
218
+ self._acted = True
219
+ return None
220
+
221
+ def should_continue_after_stop(self, stop_reason):
222
+ if stop_reason != "end_turn":
223
+ return None
224
+ if self._fires > 0 and self._acted:
225
+ return None # verified-by-doing after a nudge → honest done, accept
226
+ if self._fires >= self._max:
227
+ return None # bounded → never loop
228
+ self._fires += 1
229
+ self._acted = False
230
+ return {"continue": True, "feedback": _SELF_CHECK}
231
+
232
+
233
+ class PermissionHook(Hooks):
234
+ """Gate tool execution. `policy(name, args) -> ToolDecision`.
235
+
236
+ When a policy returns `ask`, resolve it interactively via `on_ask(name, args, reason)
237
+ -> 'yes'|'no'|'always'` (the host supplies a TTY prompt). Non-interactive hosts
238
+ (on_ask=None) deny an `ask` — safe by default.
239
+
240
+ 'always' memorizes a session approval — but keyed by the CALL, not the bare tool name
241
+ (rule patterns). Approving one shell command must NOT bless every shell command:
242
+ run_command/execute_code are remembered by their exact command/code; other tools (already
243
+ gated by policy) are remembered by name. `auto_approve` pre-seeds fnmatch rules matched
244
+ against the command (e.g. ["git status*", "ls *"]) so safe read-only commands never prompt."""
245
+
246
+ _CMD_TOOLS = ("run_command", "execute_code", "proc_start", "terminal_open", "terminal_send")
247
+
248
+ def __init__(self, policy, on_ask=None, auto_approve=None):
249
+ self.policy = policy
250
+ self.on_ask = on_ask
251
+ self._approved: set[str] = set() # exact approval keys (call patterns, not bare tool names)
252
+ self._rules: list[str] = list(auto_approve or []) # pre-seeded fnmatch globs over the command
253
+
254
+ @classmethod
255
+ def _key(cls, name: str, args: dict) -> str:
256
+ # command-SPECIFIC for the dangerous tools — approving `npm test` must not auto-allow `rm -rf`.
257
+ if name in cls._CMD_TOOLS:
258
+ return f"{name}:{(args.get('command') or args.get('code') or args.get('input') or '').strip()}"
259
+ return name # name-level for the rest (policy already gates them)
260
+
261
+ def _pre_allowed(self, name: str, args: dict, key: str) -> bool:
262
+ if key in self._approved:
263
+ return True
264
+ cmd = (args.get("command") or args.get("code") or args.get("input") or "").strip()
265
+ if cmd and self._rules:
266
+ import fnmatch
267
+ if any(fnmatch.fnmatch(cmd, rule) for rule in self._rules):
268
+ # A broad glob must NOT silently green-light a destructive command — fall through to ask.
269
+ return not _is_destructive_command(name, cmd)
270
+ return False
271
+
272
+ def authorize_tool(self, name, args):
273
+ d = self.policy(name, args)
274
+ if not d.ask:
275
+ return d
276
+ key = self._key(name, args)
277
+ if self._pre_allowed(name, args, key):
278
+ return ALLOW
279
+ if self.on_ask is None:
280
+ return ToolDecision(False, DENIAL_NO_PROMPT)
281
+ verdict = (self.on_ask(name, args, d.reason) or "no").lower()
282
+ if verdict == "always":
283
+ self._approved.add(key) # remember THIS call pattern, not the whole tool
284
+ return ALLOW
285
+ return ALLOW if verdict == "yes" else ToolDecision(False, DENIAL_USER)
286
+
287
+
288
+ class BudgetHook(Hooks):
289
+ """Stop the turn once cumulative tokens cross a ceiling."""
290
+
291
+ def __init__(self, max_total_tokens: int):
292
+ self.max = max_total_tokens
293
+ self.spent = 0
294
+
295
+ def reset_for_turn(self):
296
+ # PER-TURN budget: reset the tally at the start of each user task (run_turn calls this). Without
297
+ # this, the cap silently became a whole-SESSION budget across the REPL. A true
298
+ # session-wide cap, if ever wanted, should be a separate named hook — not this one.
299
+ self.spent = 0
300
+
301
+ def record_step_usage(self, usage):
302
+ self.spent += int(usage.get("prompt_tokens", 0)) + int(usage.get("completion_tokens", 0))
303
+ return {"stop_turn": True} if self.spent >= self.max else None
304
+
305
+
306
+ class GuardrailHook(Hooks):
307
+ """Cross-step loop guard: block a tool call that repeats an identical failing call,
308
+ or an idempotent call that keeps making no progress. State is per-turn (cleared by
309
+ `reset_for_turn`), so counters never bleed across user tasks."""
310
+
311
+ def __init__(self, config=None):
312
+ self.guard = ToolCallGuardrail(config)
313
+
314
+ def reset_for_turn(self):
315
+ self.guard.reset_for_turn()
316
+
317
+ def authorize_tool(self, name, args):
318
+ d = self.guard.before_call(name, args)
319
+ if not d.block:
320
+ return ALLOW
321
+ # Only a HARD spin counts toward STUCK: a repeated FAILING call, or no-edit-progress (failing edits).
322
+ # A deduped idempotent/result no-progress read is harmless — block (skip) it but DON'T kill the turn,
323
+ # so a long exploration that re-reads a file isn't falsely flagged as stuck.
324
+ hard = d.code in ("repeated_exact_failure", "no_edit_progress")
325
+ return ToolDecision(False, d.message, counts_as_stuck=hard)
326
+
327
+ def transform_tool_result(self, name, args, output):
328
+ # NEVER feed a guardrail/policy BLOCK back into the counters: a blocked call never ran, so counting
329
+ # its synthetic "Error: blocked by policy:" result as a real failure would advance the failing /
330
+ # no-edit-progress axes and falsely escalate a harmless soft-block into a hard 'stuck' turn-kill.
331
+ if isinstance(output, str) and output.startswith("Error: blocked by policy:"):
332
+ return None
333
+ self.guard.after_call(name, args, output)
334
+ return None
@@ -0,0 +1,144 @@
1
+ """The contracts the core depends on — never the implementations.
2
+
3
+ The moat (loop + tiers) talks only to these. Everything commodity (LLM I/O,
4
+ retrieval, tool execution/sandbox, verification) lives behind them and is swappable.
5
+ Policy (Oracle/permissions/budget) is supplied via hooks.py.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass, field
10
+ from typing import Protocol, runtime_checkable
11
+
12
+
13
+ @dataclass
14
+ class ToolCall:
15
+ id: str
16
+ name: str
17
+ args: dict
18
+
19
+
20
+ @dataclass
21
+ class AssistantMessage:
22
+ content: str | None
23
+ tool_calls: list[ToolCall] = field(default_factory=list)
24
+ usage: dict | None = None # {"prompt_tokens": int, "completion_tokens": int}
25
+ finish_reason: str | None = None # provider's raw finish reason → normalized by the loop
26
+
27
+
28
+ @dataclass
29
+ class Snippet:
30
+ path: str
31
+ text: str
32
+ score: float = 0.0
33
+
34
+
35
+ @dataclass
36
+ class PageRef:
37
+ """A bounded reference to one PAGE the PageTable can surface into the slice — the unified shape
38
+ every read/retrieval backend (code map, project-notes, cross-session episodes) returns from
39
+ PageTable.lookup(). Carries RAW text (`preview`); the renderer fences it (wrap_untrusted) so
40
+ injection-fencing stays at ONE layer. `handle` locates the page (a repo-map marker, a subtree
41
+ path, a session·turn locator); `untrusted` flags re-injected external content (default True)."""
42
+ handle: str
43
+ kind: str
44
+ preview: str
45
+ score: float = 0.0
46
+ untrusted: bool = True
47
+
48
+
49
+ @dataclass
50
+ class TaskRef:
51
+ """A bounded index row for the OTHER OPEN THREADS tier (Step 3)."""
52
+ task_id: str
53
+ title: str
54
+ status: str # active | parked | done | abandoned
55
+ updated: str = ""
56
+
57
+
58
+ @dataclass
59
+ class TaskState:
60
+ """Resumable, distilled state for one task = the serializable Slice fields. Stores REFS
61
+ (file paths + anchors), never file contents — ground truth is re-read from disk on resume.
62
+ Transient tiers (recent, action_log, active_skills) are intentionally NOT serialized."""
63
+ task_id: str
64
+ session_id: str = ""
65
+ title: str = ""
66
+ status: str = "active"
67
+ goal: str = ""
68
+ findings: list[str] = field(default_factory=list)
69
+ finding_source: dict[str, str] = field(default_factory=dict) # finding -> provenance tier (carried; else resume upgrades 'claim'→'tool-note')
70
+ requirements: list[dict] = field(default_factory=list) # STANDING REQUIREMENTS contract (carried)
71
+ plan: list[dict] = field(default_factory=list) # PLAN / TodoWrite steps + status (carried)
72
+ mission: str = "" # MISSION north-star objective (carried)
73
+ open_report: str = "" # OPEN USER REPORT blocker (carried; the "it's broken" push-back must survive resume)
74
+ world: dict = field(default_factory=dict) # agent WORLD MODEL (carried; was dropped on resume)
75
+ active_files: list[str] = field(default_factory=list)
76
+ edited_files: list[str] = field(default_factory=list) # list on the wire; a set in the Slice
77
+ edit_anchor: dict[str, str] = field(default_factory=dict)
78
+ last_error: str = ""
79
+ since_edit: int = 0
80
+ links: list[str] = field(default_factory=list) # task-graph edges (Step 3)
81
+ tags: str = "" # comma-joined (matches remember()/_tags)
82
+ resolution: str = ""
83
+
84
+
85
+ @runtime_checkable
86
+ class LLMClient(Protocol):
87
+ """Provider-agnostic completion + tool-calling. (implemented over an official LLM SDK)
88
+ May optionally expose `is_retryable(error) -> bool` for the retry policy."""
89
+ def complete(self, messages: list[dict], tools: list[dict]) -> AssistantMessage: ...
90
+
91
+
92
+ @runtime_checkable
93
+ class ToolHost(Protocol):
94
+ """Executes tools, ideally behind a sandbox. (backed by a container sandbox + MCP tools)"""
95
+ def schemas(self) -> list[dict]: ...
96
+ def run(self, name: str, args: dict) -> str: ...
97
+ def read_text(self, path: str) -> str: ... # reconstruct the artifacts tier (raises if missing)
98
+ def accesses(self, name: str, args: dict) -> list: ... # resource accesses for the scheduler
99
+
100
+
101
+ @runtime_checkable
102
+ class Retriever(Protocol):
103
+ """Code discovery for the RELATED CODE tier (repo search). (build: ripgrep + tree-sitter)"""
104
+ def retrieve(self, query: str, k: int = 6) -> list[Snippet]: ...
105
+
106
+
107
+ @runtime_checkable
108
+ class Memory(Protocol):
109
+ """Cross-session memory + the durable STATE VAULT (episodic cache, task-state, lessons).
110
+ Distinct from Retriever (memem indexes a curated vault, NOT source code). `is_durable` is the
111
+ structural no-op marker: NullMemory sets it False so hosts skip cache/checkpoint wiring (keeps
112
+ evals deterministic). The full surface is frozen here; implementations land incrementally.
113
+ NOTE: @runtime_checkable isinstance() verifies method-NAME presence only — not signatures or
114
+ return types; behavioral fidelity is enforced by the round-trip tests."""
115
+ is_durable: bool
116
+ # --- long-term lessons (exists) ---
117
+ def recall(self, query: str, k: int = 6, paths: list[str] | None = None) -> list[Snippet]: ...
118
+ def remember(self, content: str, *, title: str = "", scope: str = "default", tags: str = "",
119
+ paths: list[str] | None = None) -> None: ...
120
+ # --- episodic cache (lossless; never recalled into the LLM context) ---
121
+ def append_episode(self, session_id: str, task_id: str, turn: int, record: dict) -> None: ...
122
+ # read side: the model's on-demand valve into the cold cache (recall_history tool). Returns
123
+ # raw line dicts ({v,session_id,task_id,turn,ts,record}); the host renders/bounds them.
124
+ def read_episodes(self, session_id: str, *, limit: int | None = None) -> list[dict]: ...
125
+ # cross-session FTS5 discovery over the durable episode index (item 12; additive).
126
+ # Returns bounded hit dicts; [] when the index is unavailable. Single-session reads use
127
+ # read_episodes; this is the ACROSS-sessions counterpart.
128
+ def search_episodes(self, query: str, *, limit: int = 5, exclude_session: str | None = None,
129
+ only_session: str | None = None) -> list[dict]: ...
130
+ # --- task state / resume ---
131
+ def checkpoint_task(self, task: TaskState) -> None: ...
132
+ def load_task(self, task_id: str) -> TaskState | None: ...
133
+ def list_session_tasks(self, session_id: str) -> list[TaskRef]: ...
134
+ # --- consolidation / retrieval-feedback (declared now; implemented in later steps) ---
135
+ def mark_used(self, memory_id: str) -> None: ...
136
+ # llm = the abstract LLMClient contract (llm-agnostic — never a concrete provider type); returns a
137
+ # stats dict {lessons, skills, skills_rejected, errors} so callers report the truth, not a blind success.
138
+ def consolidate(self, session_id: str, *, llm=None, mode: str = "deterministic") -> dict: ...
139
+
140
+
141
+ @runtime_checkable
142
+ class Oracle(Protocol):
143
+ """Ground-truth verification independent of retrieval. (backed by the project's test/lint runners)"""
144
+ def verify(self) -> tuple[bool, str]: ...