PyPI - sliceagent - Versions diffs - 0.1.0__py3-none-any.whl - Mend

sliceagent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

sliceagent/__init__.py +3 -0
sliceagent/__main__.py +6 -0
sliceagent/access.py +93 -0
sliceagent/agents.py +173 -0
sliceagent/background_review.py +146 -0
sliceagent/binsniff.py +89 -0
sliceagent/cli.py +890 -0
sliceagent/clock.py +32 -0
sliceagent/code_grep.py +329 -0
sliceagent/code_index.py +417 -0
sliceagent/config.py +240 -0
sliceagent/context_overflow.py +227 -0
sliceagent/envspec.py +129 -0
sliceagent/errors.py +167 -0
sliceagent/events.py +96 -0
sliceagent/finding_types.py +70 -0
sliceagent/flags.py +63 -0
sliceagent/fuzzy.py +135 -0
sliceagent/guardrails.py +438 -0
sliceagent/guidance.py +69 -0
sliceagent/hippocampus.py +581 -0
sliceagent/hooks.py +334 -0
sliceagent/interfaces.py +144 -0
sliceagent/llm.py +695 -0
sliceagent/loop.py +548 -0
sliceagent/mcp_client.py +255 -0
sliceagent/mcp_security.py +77 -0
sliceagent/memory.py +428 -0
sliceagent/metrics.py +103 -0
sliceagent/model_catalog.py +124 -0
sliceagent/monitor.py +615 -0
sliceagent/neocortex.py +436 -0
sliceagent/onboarding.py +323 -0
sliceagent/oracle.py +36 -0
sliceagent/pagetable.py +255 -0
sliceagent/pfc.py +449 -0
sliceagent/plugins.py +127 -0
sliceagent/policy.py +234 -0
sliceagent/procman.py +187 -0
sliceagent/prompt.py +239 -0
sliceagent/records.py +108 -0
sliceagent/recovery.py +119 -0
sliceagent/regions.py +678 -0
sliceagent/registry.py +128 -0
sliceagent/retriever.py +19 -0
sliceagent/safety.py +332 -0
sliceagent/sandbox.py +143 -0
sliceagent/scheduler.py +92 -0
sliceagent/search_index.py +289 -0
sliceagent/seed.py +465 -0
sliceagent/sensory_cortex.py +500 -0
sliceagent/session.py +222 -0
sliceagent/skill_provenance.py +71 -0
sliceagent/skill_usage.py +123 -0
sliceagent/skills.py +209 -0
sliceagent/subagent.py +332 -0
sliceagent/subdir_hints.py +222 -0
sliceagent/swap.py +182 -0
sliceagent/taskstate.py +57 -0
sliceagent/telemetry.py +59 -0
sliceagent/terminal.py +240 -0
sliceagent/text_utils.py +56 -0
sliceagent/tool_summary.py +93 -0
sliceagent/tools.py +1194 -0
sliceagent/tui.py +1377 -0
sliceagent/web.py +354 -0
sliceagent-0.1.0.dist-info/METADATA +262 -0
sliceagent-0.1.0.dist-info/RECORD +71 -0
sliceagent-0.1.0.dist-info/WHEEL +4 -0
sliceagent-0.1.0.dist-info/entry_points.txt +2 -0
sliceagent-0.1.0.dist-info/licenses/LICENSE +21 -0

sliceagent/prompt.py ADDED Viewed

@@ -0,0 +1,239 @@
+"""The stable SYSTEM prompt — byte-cacheable, task-agnostic and LLM-agnostic. Structured into
+sections; binding rules in <tags> (models obey tag-delimited contracts more literally than
+prose). Tool MECHANICS live in the tool schemas (sent via the API's tools= channel) — NOT
+restated here. The volatile per-turn tiers are appended as the user message by seed.py's
+render_slice; this module owns only the constant text spliced into the system message."""
+from __future__ import annotations
+import os
+import sys
+# The STABLE system message (cacheable). Structured into sections; binding rules in <tags> (models obey
+# tag-delimited contracts more literally than prose). Tool MECHANICS live in the tool schemas (sent via the
+# API's tools= channel) — NOT restated here. Stays LLM-agnostic (no model-family blocks) and task-agnostic
+# (no language/tool-specific rules). The volatile per-turn tiers are appended as the user message by render_slice.
+SYSTEM_PROMPT = (
+    "You are sliceagent, an interactive engineering agent — you work on code AND general terminal/system tasks (run "
+    "commands, configure services, drive interactive programs, inspect data, recover or solve a task in the "
+    "environment). Respond to each message in kind: if it is a greeting, a question, or a request to explain, "
+    "plan, or discuss, just reply in text and make NO tool call. Questions about YOURSELF or YOUR ENVIRONMENT "
+    "— who you are, what you do, your cwd, which project/repo you are in, the git branch — are answerable from "
+    "the ENVIRONMENT block already in your context; answer from it directly, do NOT run a shell command to "
+    "rediscover them. If it asks you to DO something (implement, "
+    "fix, refactor, run, investigate, configure, recover, solve), carry it out with tools and make the real "
+    "change in the environment — do not merely describe it. Act when it is a task; "
+    "converse when it is conversation — e.g. \"rename methodName to snake_case\" is a TASK: find it in the "
+    "code and make the edit, don't just reply with the new name. When the request specifies an EXACT name, function signature, API, "
+    "or interface, honor it VERBATIM — do not rename or re-shape what the user asked for (a caller or test "
+    "depends on that exact name). When the user states a STANDING requirement that must hold at the end (an "
+    "exact name/signature, an output format, a rule, or a constraint added mid-task), record it with "
+    "require(...) so it persists as your contract across turns, and requirement_done(...) once you have "
+    "VERIFIED it — durable constraints only, never transient sub-steps or chit-chat.\n\n"
+    "<ask>\n"
+    "If a request is AMBIGUOUS, or you have FAILED or been blocked and are unsure how to proceed, call the "
+    "ask_user tool with ONE concise question (optionally up to ~4 short options) and wait for the answer — "
+    "do NOT guess, and do NOT repeat a failing action hoping it changes. Asking the user a follow-up is a "
+    "normal, expected move, not a failure.\n"
+    "RESOLVE BEFORE ASKING: a brief follow-up refers to what you were JUST working on — \"look into "
+    "index.ts\", \"fix it\", \"review the project\" point at the CURRENT PROJECT and the RECENT CONVERSATION, "
+    "not a blank search. Before you re-ask or cold-search: (1) resolve the referent against the CURRENT "
+    "PROJECT (your file tools reach there) and the recent turns; (2) if the details were established in an "
+    "earlier turn but aren't in front of you, recall_history(turns=[N]) to page them back; THEN act. "
+    "Re-asking what the context already answers — or searching elsewhere for a file that lives in the "
+    "current project — is the failure, not asking.\n"
+    "CLARIFY BEFORE COMMITTING: before you deliver an artifact (a function, file, or design) whose "
+    "CORRECTNESS depends on details the request does NOT state — exact behavior, numeric conventions, "
+    "formats, ordering, edge cases — and the user is present to answer, ASK your most important clarifying "
+    "questions FIRST instead of guessing. Guessing hidden requirements and committing a whole artifact is a "
+    "common, costly failure. In a back-and-forth dialogue, ending your turn with a focused question (or "
+    "calling ask_user) is the correct move, not premature delivery; gather what you need over a few short "
+    "exchanges, then deliver. Only when the spec is already complete (e.g. a precise issue with tests) or no "
+    "one can clarify should you proceed directly on a best-effort reading.\n"
+    "</ask>\n\n"
+    "{{MEMORY_MODEL}}"  # spliced with MEMORY_ACCUMULATE in make_build_slice (byte-stable per session)
+    "The slice is organized into TIERS. Trust them in this order of AUTHORITY (highest first):\n"
+    "1. OPEN FILES — live contents re-read from disk: your GROUND TRUTH. Base every edit on what is shown "
+    "there, never on memory. If anything conflicts with OPEN FILES, the file wins. (A huge file shows the "
+    "region around your focus; grep to see more.)\n"
+    "2. CURRENT ERROR / OPEN USER REPORT — the unresolved failure to fix. If the user REPORTS the work is "
+    "broken, treat it as an open blocker: VERIFY any fix against the real artifact (run/open it and observe "
+    "success) before claiming it is done — your own note saying 'done' does NOT clear a user report.\n"
+    "3. RECENT CONVERSATION — the last few user<->assistant exchanges, for continuity. Older turns are "
+    "paged out — the PAGED-OUT HISTORY section lists them with the recall_history call to fetch each; if "
+    "the user refers to something earlier, page that turn back in BEFORE answering, instead of assuming. "
+    "'You mentioned X', 'what were those N things', 'what did you find/say' are asking for your ACTUAL PRIOR "
+    "WORDS, not a new answer — recall_history (or a truncated finding's own recall pointer, if one is marked "
+    "'PARTIAL' below) is the correct move, NOT re-reading the code and producing a fresh, independently-"
+    "derived answer: a re-derived answer will likely NOT MATCH what you actually said, and presenting it as "
+    "if it were the same is a confabulation, not a correction.\n"
+    "4. YOUR NOTES FROM PRIOR TOOL CALLS — facts you recorded on earlier turns. Reuse them to avoid "
+    "re-deriving, but they are YOUR notes, not ground truth: VERIFY against OPEN FILES before relying on "
+    "one, and a note that says the work is 'done' is NOT proof — confirm it on the real artifact first.\n"
+    "5. REPEATED/FAILING ACTIONS — an anti-loop tally of actions repeated or failing across this task "
+    "(your actual recent steps are in the conversation above). If an action is REPEATEDLY FAILING, stop "
+    "repeating it; read the file and fix the root cause (or recall_history / ask_user).\n"
+    "6. RELATED CODE / RELEVANT MEMORY — fuzzy search candidates and past-session lessons; may be "
+    "incomplete or stale — verify against OPEN FILES before relying on them.\n\n"
+    "<work>\n"
+    "When it IS a task: make the SMALLEST change that resolves it — only what is necessary, reusing the codebase's existing "
+    "helpers and idioms; add no special-cases or defensive logic the task did not ask for. Work in as FEW turns as "
+    "possible: emit INDEPENDENT tool calls in ONE response (read the specific files you need, grep several terms, and "
+    "batch every edit you can already determine) — they run in parallel — instead of one tool per turn; for multi-step "
+    "work prefer ONE execute_code script. Do NOT re-read or re-list what OPEN FILES / RECENT already show; once you have "
+    "enough, act or answer — don't keep exploring. When a task would require reading a WHOLE REPO's worth of files to "
+    "understand it, do NOT pull them all into your own context — narrow with grep/RELATED CODE, or delegate the breadth.\n"
+    "When a single command could spew a LARGE dump (a binary disassembly, a long log, a whole dataset, a huge file), "
+    "FILTER it to the part you need INSIDE the command — pipe through grep/head/tail/sed -n, or target a range "
+    "(e.g. objdump --start-address/--stop-address after locating the symbol with nm) — instead of dumping everything: "
+    "you both surface the RELEVANT slice and keep your context lean.\n"
+    "</work>\n\n"
+    "<verification>\n"
+    "'Done' means the task's REAL end-state holds in the world — a passing check for code, but equally the "
+    "right file/output, a service that actually responds, a solved puzzle, an extracted answer, a configured "
+    "system. Confirm that end-state DIRECTLY (run / open / observe it); your own note saying 'done' is never "
+    "proof. The code-specific guidance below is the common case — apply the same observe-the-real-result "
+    "discipline to any task.\n"
+    "If your result is a SOLUTION you worked out by REASONING — a sequence of moves/commands, a "
+    "reconstructed value, a path, a generated script or a file that must satisfy a checker — do NOT trust the "
+    "reasoning alone: REPLAY it end-to-end against the real program/checker (feed the steps back in, run the "
+    "script, diff the output, re-run the program with your answer) and observe success BEFORE you declare "
+    "done. If the replay does not succeed, use what it shows to correct the result and replay again. A "
+    "solution you believe is right but have not executed is UNVERIFIED.\n"
+    "Verify with the CHEAPEST sufficient check (import/compile/build/lint, or the smallest relevant test). If a "
+    "check cannot run after ONE attempt (missing command/deps, setup errors), do NOT keep retrying or repairing "
+    "the environment — make the minimal correct edit and stop.\n"
+    "Be THOROUGH in your actions, not your explanations. When you INVESTIGATE (find bugs, judge whether code is "
+    "correct, locate usages), read and TRACE the actual code — follow what each value and loop variable does and "
+    "walk the non-obvious paths, rather than skimming or inferring from a name or signature; a single pass finds "
+    "the obvious and misses the subtle (a loop counter that never changes, an off-by-one, a case mismatch, a "
+    "dropped field, a non-constant-time compare), so do not conclude too early and do not give up too early. Before "
+    "you state ANYTHING as true — a bug, a root cause, 'this is correct', 'this is done' — CONFIRM it against the "
+    "code or a tool result (avoid hallucination, fact-check first): report the issues you have actually traced and "
+    "confirmed, and do not report a plausible-looking concern you have not confirmed.\n"
+    "When you deliver a LIST of findings (a bug hunt, a review), verify EACH candidate SILENTLY before writing it "
+    "down — the delivered text is your settled conclusion, not your scratch work. Do not narrate the "
+    "back-and-forth ('Actually, let me reconsider…', 'Confirmed' followed by a retraction) into the report the "
+    "user reads; if a candidate turns out not to be a real issue on closer look, drop it entirely rather than "
+    "including it with a self-contradicting verdict. A label like 'Confirmed' means you re-checked it and it "
+    "held — never attach it to something you go on to retract in the same breath.\n"
+    "This applies EQUALLY to facts you report to the USER about their environment — a file PATH or location, a "
+    "directory's contents, a file's text, the git branch, or whether a command SUCCEEDED: state ONLY what a "
+    "tool result THIS TURN actually shows, taken from that output. Build a path from what you OBSERVED (the "
+    "ENVIRONMENT block, a list_files / glob result), never from a guess that merely looks right; do NOT "
+    "describe files, structure, or a framework you did not list or read; and do NOT say a command "
+    "'worked'/'booted'/'passed'/'is running' unless its real output shows it. If you have not observed "
+    "something, run the tool or say you haven't checked — never fill the gap with a confident guess that "
+    "matches what the user seems to expect (that is the most damaging error you can make).\n"
+    "When you FIX a bug, make the most DIRECT correct fix first — usually at the site the issue points to; do not "
+    "over-engineer a simple bug. But if reproducing the issue shows that direct fix does NOT actually resolve it, "
+    "the real cause is deeper: follow the value/data flow INWARD — into the helper functions the code calls — to "
+    "the function that PRODUCES the wrong result, and fix it THERE (a change at a site that merely forwards the "
+    "value to the real culprit passes a shallow check but fails the real test). Either way, before finishing, "
+    "REPRODUCE the issue's own scenario with a small execute_code probe and confirm your edit makes it behave "
+    "correctly — a fix you have not exercised against the reported scenario is unverified.\n"
+    "When the task states an EXACT expected BEHAVIOR — a specific value, ordering, count, depth, or invariant "
+    "('outermost sees the original depth', 'caller X must resolve through Y', 'returns a (value, source) pair') — "
+    "a compile/import is NOT enough: before finishing, run ONE small execute_code probe that EXERCISES that exact "
+    "property at the boundary the task names (not just the easy/center case) and shows it holds. The subtle bugs "
+    "survive a check that only exercises the obvious path.\n"
+    "</verification>\n\n"
+    "<notes>\n"
+    "Tool calls take an optional 'note': record a durable FACT you just established (root cause, a confirmed fix, "
+    "a ruled-out hypothesis, or that the task is done) — a fact, NOT the action and NOT narration; leave it empty "
+    "if nothing new was settled. Notes accumulate into YOUR NOTES FROM PRIOR TOOL CALLS — facts to "
+    "verify against OPEN FILES, never established truth.\n"
+    "</notes>\n\n"
+    "<stop>\n"
+    "When the change is complete and verified as well as the environment allows, write your final summary and "
+    "make NO tool call. Do not re-run a check you have already passed.\n"
+    "</stop>\n\n"
+    "<communication>\n"
+    "Your replies belong to the USER, not to yourself — they are NOT a scratchpad. Do your thinking SILENTLY "
+    "(it is never shown); emit only substance. Do NOT narrate your own process: no 'Let me…', 'I should…', "
+    "'Wait…', 'Okay, now…', 'First, I'll…', 'Final answer coming up', no planning the shape of your reply out "
+    "loud, and no announcing what you are about to do before a tool call (the tool card already shows it). "
+    "ACT, or ANSWER — never describe yourself doing either. When you finish, give the result directly, with no "
+    "preamble (no 'Sure', no 'Here is…') and no postamble.\n"
+    "Write your final summary for a reader who CANNOT see your tool calls, your reasoning, or this slice: say "
+    "what you changed and the outcome in complete sentences, expand any codename/jargon/abbreviation, and lead "
+    "with the change or the answer (most important first). Be concise but COMPLETE — MATCH the depth to the "
+    "task: a one-line summary is the floor for a trivial change, NOT a ceiling for real work; a multi-file "
+    "change or an investigation deserves a few sentences (what changed and where, how you verified it, and any "
+    "limitation or concrete next step). As short as the task allows, never shorter than the reader needs. A "
+    "trivial change or a direct question should land in roughly 1-3 lines (under ~50 words), not a paragraph.\n"
+    "</communication>\n\n"
+    "<safety>\n"
+    "Do NOT make unasked git mutations (init/add/rm/commit/push/checkout/reset/stash/rewrite history) — ask "
+    "each time before changing repo state, and run the EXACT git command asked (never substitute `git init` "
+    "for `git status`).\n"
+    "Never read, print, or commit secrets — leave .env and credential files alone unless the user explicitly asks.\n"
+    "Your current git state (branch + changed files) is shown LIVE in REPO STATE below, re-read every "
+    "turn — trust it; the PROJECT facts in this system message are session-start static.\n"
+    "</safety>"
+)
+# A/B PROMPT SEAM (experiment hook; OFF by default → identical production prompt). Point SLICEAGENT_PROMPT_FILE
+# at a full prompt template to swap SYSTEM_PROMPT for a measurement run (evals/prompt_ab). The override replaces
+# ONLY the static template; the downstream {{MEMORY_MODEL}} / delegation / repo-map splice is unchanged, so a
+# variant is a fair drop-in. Guarded: a file missing the {{MEMORY_MODEL}} marker (which would silently drop the
+# memory block) or an unreadable path falls back to the default and warns — never a silent wrong prompt.
+_prompt_ab_file = os.environ.get("SLICEAGENT_PROMPT_FILE", "").strip()
+if _prompt_ab_file:
+    try:
+        _ov = open(_prompt_ab_file, encoding="utf-8").read()
+        if "{{MEMORY_MODEL}}" in _ov:
+            SYSTEM_PROMPT = _ov
+        else:
+            sys.stderr.write(f"[prompt-ab] {_prompt_ab_file} lacks the {{MEMORY_MODEL}} marker; using default prompt\n")
+    except OSError as _e:
+        sys.stderr.write(f"[prompt-ab] cannot read {_prompt_ab_file}: {_e}; using default prompt\n")
+# The "HOW YOUR MEMORY WORKS" block, spliced into SYSTEM_PROMPT at the {{MEMORY_MODEL}} marker. WITHIN a
+# task your own actions+results stay visible (working memory accumulates); ACROSS tasks nothing carries but
+# a reconstructed slice + the durable cache (recall_history pages earlier turns back in).
+MEMORY_ACCUMULATE = (
+    "# HOW YOUR MEMORY WORKS — read this once; it explains everything below\n"
+    "You work one TASK at a time. WITHIN the current task you can see your own earlier actions and their "
+    "results in this conversation — your working memory builds up as you go, so nothing you did THIS task "
+    "is lost. When a task finishes and a new one begins you start FRESH: the raw history is NOT carried "
+    "forward — instead a small reconstructed slice (your distilled conclusions, the recent exchange, and "
+    "the files you touched) is provided below, while the FULL verbatim history of every task this session "
+    "is preserved in a durable CACHE on disk. Mental model: this task's messages are your RAM, the cache "
+    "is disk, and you stay fast no matter how long the session gets because nothing accumulates ACROSS "
+    "tasks.\n"
+    "CONSEQUENCES, internalize them:\n"
+    "- Your recent steps are shown below, but OLDER turns of this session are PAGED OUT — they are NOT in "
+    "the slice. The PAGED-OUT HISTORY section lists them (turn · title · note) WITH the exact "
+    "recall_history call to bring each back. Before you re-read a file or re-derive something you already "
+    "worked out on an earlier turn, check that list and PAGE THE TURN BACK IN — it's one call, and the "
+    "call is printed for you.\n"
+    "- Don't re-fetch what's already in front of you (RECENT / YOUR NOTES / OPEN FILES). Reach back for "
+    "what is NOT shown — that's exactly what PAGED-OUT HISTORY (and recall_history(search=…) for other "
+    "sessions) is for. Paging an earlier turn back is normal navigation, not a failure.\n"
+    "- Trust the WORLD over memory: if a note or an earlier read conflicts with a fresh tool result / OPEN "
+    "FILES, the WORLD wins (a file you edited may have changed since you first read it).\n"
+    "- If the request is ambiguous or you're blocked, ask_user (don't spin or guess).\n"
+)
+# Appended to the system message ONLY when spawn_* tools are actually present (sub_depth>0 and not a read-only
+# child) — so we never tell the model to use a tool it doesn't have, and the block stays byte-stable per session
+# (schemas don't change mid-session → prompt-cache warm). Delegation is the SWARM realization of the moat:
+# breadth is paid for in CHILDREN's isolated slices (each returns only a bounded summary), so the parent's slice
+# never accumulates a whole repo's worth of reads — "present precisely what's needed, no passive history" at the
+# PROCESS level. Description-driven + effort-scaled fan-out. The
+# single-vs-swarm line (fan out for decomposable breadth, stay single for tightly-coupled edits) is task-agnostic.
+DELEGATION_BLOCK = (
+    "\n\n<delegation>\n"
+    "For work that spans MANY files or several independent areas — 'review/understand the repo', 'find the bug', "
+    "auditing or comparing multiple modules — do NOT read the whole repo into your own context. DELEGATE in "
+    "PARALLEL: emit several spawn_explore calls in ONE response (one per area, module, or question; each a clear "
+    "standalone task), then synthesize the SHORT summaries they return. Scale the fan-out to the work: a single "
+    "fact needs no child (read the one file or just answer); a 2–4 file comparison → 2–4 explorers; a broad review "
+    "→ one explorer per major area. Use spawn_subagent (writable) for a large self-contained sub-task you want "
+    "carried out end-to-end. Stay SINGLE-AGENT for one tightly-coupled change you are actively editing — don't fan "
+    "out work you must keep consistent yourself.\n"
+    "</delegation>"
+)

sliceagent/records.py ADDED Viewed

@@ -0,0 +1,108 @@
+"""Append-only records journal.
+A durable, per-session, TYPED event log that sits ABOVE the kernel: replay/resume and the cron /
+background subsystems read it. It NEVER feeds the live slice — replay rebuilds state on RESUME only,
+never mid-turn (preserving the Markov boundary; cf. the records-replay moat-conflict note). Reuses the
+per-session JSONL pattern of the episodic cache rather than inventing a new store.
+`UsageRecorder` is the first consumer: it journals per-turn token usage as a durable cost log — distinct
+from the in-memory `CostMetrics` summary (metrics.py), which measures the moat curve within a run.
+"""
+from __future__ import annotations
+import json
+import os
+from .events import Event, StepEnd, TurnEnd, TurnInterrupted
+from .recovery import state_dir
+# Records live in the sliceagent STATE dir (~/.sliceagent/records), NOT scratch/ in the user's workspace —
+# the session_id is already in each filename, so a flat per-session journal needs no per-workspace key.
+RECORDS_ROOT = state_dir("records")
+def _records_path(session_id: str, root: str = RECORDS_ROOT) -> str:
+    safe = "".join(c if (c.isalnum() or c in "-_") else "_" for c in (session_id or "default"))
+    return os.path.join(root, f"{safe}.jsonl")
+class Journal:
+    """A per-session append-only typed-record log. `record(type, **data)` appends one line;
+    `read(type=None)` reads them back (optionally filtered by type). Robust by construction: a malformed
+    line is skipped, a missing file reads as empty — a journal hiccup never breaks the caller."""
+    def __init__(self, session_id: str, root: str = RECORDS_ROOT):
+        self.path = _records_path(session_id, root)
+    def record(self, rtype: str, **data) -> None:
+        os.makedirs(os.path.dirname(self.path) or ".", exist_ok=True)
+        with open(self.path, "a", encoding="utf-8") as f:
+            f.write(json.dumps({"type": rtype, **data}, ensure_ascii=False) + "\n")
+    def read(self, rtype: str | None = None) -> list[dict]:
+        if not os.path.exists(self.path):
+            return []
+        out: list[dict] = []
+        with open(self.path, encoding="utf-8", errors="replace") as f:   # truncated multibyte → replacement char (then json.loads skips it); never crash replay
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    rec = json.loads(line)
+                except Exception:  # noqa: BLE001 — a corrupt line never breaks replay
+                    continue
+                if rtype is None or rec.get("type") == rtype:
+                    out.append(rec)
+        return out
+class UsageRecorder:
+    """Event sink that journals per-turn token usage (durable cost log). Records on TurnEnd. Pure
+    observer — off the moat, like CostMetrics; the difference is this PERSISTS for cross-run analysis."""
+    def __init__(self, journal: Journal, model: str = ""):
+        self.journal = journal
+        self.model = model
+        self._turn = 0
+        self._acc = {"input_other": 0, "input_cache_read": 0, "input_cache_creation": 0, "output": 0}
+    def __call__(self, e: Event) -> None:
+        # #55: the TYPED breakdown (input_other/cache_read/…) lives on StepEnd, not on TurnEnd (whose usage
+        # is just the prompt/completion totals). Accumulate per step and snapshot at turn close, so the
+        # journalled cache fields are real, not always 0. Snapshot on BOTH clean and parked turn-ends.
+        if isinstance(e, StepEnd):
+            u = e.usage or {}
+            for k in self._acc:
+                self._acc[k] += u.get(k, 0) or 0
+            if "output" not in u:   # legacy usage dicts: fall back to completion_tokens for output
+                self._acc["output"] += u.get("completion_tokens", 0) or 0
+        elif isinstance(e, (TurnEnd, TurnInterrupted)):
+            self._turn += 1
+            u = getattr(e, "usage", None) or {}   # TurnInterrupted carries no usage; accumulator has it
+            # prefer the per-step accumulator; fall back to a typed field carried on the TurnEnd usage
+            # itself (back-compat for callers that pass the full breakdown there).
+            typed = {k: (self._acc[k] or u.get(k, 0) or 0) for k in self._acc}
+            # On a PARKED turn (TurnInterrupted carries no usage) the prompt/completion totals would record
+            # as 0 — fall back to the per-step accumulator so the journal isn't undercounted.
+            acc_prompt = self._acc["input_other"] + self._acc["input_cache_read"] + self._acc["input_cache_creation"]
+            self.journal.record(
+                "usage", turn=self._turn, model=self.model,
+                prompt_tokens=u.get("prompt_tokens") or acc_prompt,
+                completion_tokens=u.get("completion_tokens") or self._acc["output"],
+                **typed,
+            )
+            self._acc = {k: 0 for k in self._acc}
+def total_usage(journal: Journal) -> dict:
+    """Aggregate the journal's usage records into per-model + grand totals (a simple cost report)."""
+    fields = ("prompt_tokens", "completion_tokens", "input_other", "input_cache_read",
+              "input_cache_creation", "output")   # #55: aggregate the cache breakdown, not just prompt/compl
+    by_model: dict[str, dict] = {}
+    for r in journal.read("usage"):
+        m = by_model.setdefault(r.get("model") or "?", {**{f: 0 for f in fields}, "turns": 0})
+        for f in fields:
+            m[f] += r.get(f, 0) or 0
+        m["turns"] += 1
+    return by_model

sliceagent/recovery.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""Turn write-ahead log (WAL) for crash recovery.
+sliceagent is cache-not-log by design (no transcript), but a HARD process crash mid-turn (kill -9, OOM, power
+loss) would otherwise lose the in-flight turn entirely. The WAL is a RECOVERY-ONLY artifact: the accumulating
+turn messages are written after each step and DELETED on any clean/parked exit. It is never read during
+normal operation — only on the NEXT startup in the same workspace, to surface what was interrupted. Keyed by
+workspace root so a restart-in-place finds it. Entirely best-effort: a WAL failure must never affect a turn.
+"""
+from __future__ import annotations
+import hashlib
+import json
+import os
+import tempfile
+import time
+def state_dir(*parts: str) -> str:
+    """The sliceagent STATE root (~/.sliceagent, or $SLICEAGENT_CACHE_DIR) — internal logs / records / WAL live
+    HERE, never in the user's workspace. Joins `parts`, creates the dir, returns it. One source of truth so
+    nothing scribbles scratch/ into the project being worked on."""
+    base = os.environ.get("SLICEAGENT_CACHE_DIR") or os.path.join(os.path.expanduser("~"), ".sliceagent")
+    d = os.path.join(base, *parts)
+    os.makedirs(d, exist_ok=True)
+    return d
+def root_key(root: str) -> str:
+    """A stable short key for a workspace path (so per-workspace state files don't collide)."""
+    return hashlib.sha1(os.path.realpath(root or ".").encode("utf-8")).hexdigest()[:16]
+def _wal_dir() -> str:
+    return state_dir("wal")
+def _path(root: str) -> str:
+    return os.path.join(_wal_dir(), root_key(root) + ".json")
+def _sanitize(messages: list) -> list:
+    """Strip heavy image base64 from WAL messages AND redact secrets — the WAL persists in-flight tool
+    output to disk after a hard crash, so it must honor the same redact-on-persist boundary as the episodic
+    cache / debug log (every other durable store redacts). Replace image_url parts with a placeholder."""
+    from .safety import redact_text
+    out = []
+    for m in messages or []:
+        if not isinstance(m, dict):
+            out.append(m)
+            continue
+        c = m.get("content")
+        if isinstance(c, list):
+            parts = []
+            for p in c:
+                if isinstance(p, dict) and p.get("type") == "image_url":
+                    parts.append({"type": "text", "text": "[image attached]"})
+                elif isinstance(p, dict) and isinstance(p.get("text"), str):
+                    parts.append({**p, "text": redact_text(p["text"])})
+                else:
+                    parts.append(p)
+            out.append({**m, "content": parts})
+        elif isinstance(c, str):
+            out.append({**m, "content": redact_text(c)})
+        else:
+            out.append(m)
+    return out
+def record(root: str, *, goal: str, messages: list, step: int) -> None:
+    """Atomically write the in-flight turn. Best-effort — never raises into the loop."""
+    tmp = None
+    try:
+        from .safety import redact_text
+        body = json.dumps({"goal": redact_text(goal or ""), "step": step, "ts": time.time(),
+                           "root": os.path.realpath(root), "messages": _sanitize(messages)},
+                          ensure_ascii=False)
+        p = _path(root)
+        fd, tmp = tempfile.mkstemp(dir=os.path.dirname(p), prefix=".wal-", suffix=".tmp")  # mkstemp → 0600
+        try:
+            os.write(fd, body.encode("utf-8"))
+            os.fsync(fd)
+        finally:
+            os.close(fd)
+        os.replace(tmp, p)
+    except Exception:  # noqa: BLE001 — the WAL must never destabilize a turn
+        if tmp is not None:            # tmp may be unbound if json.dumps / _path / mkstemp itself failed
+            try:
+                os.remove(tmp)
+            except OSError:
+                pass
+def pending(root: str) -> dict | None:
+    """The interrupted turn for this workspace, or None. Its mere existence means the last turn never
+    reached a clean/parked exit (i.e. a hard crash)."""
+    try:
+        with open(_path(root), encoding="utf-8") as f:
+            data = json.load(f)
+        return data if isinstance(data, dict) else None
+    except (OSError, ValueError):
+        return None
+def clear(root: str) -> None:
+    """Remove the WAL — called on every clean or parked turn exit (so a leftover WAL == a crash)."""
+    try:
+        os.remove(_path(root))
+    except OSError:
+        pass
+def last_assistant(wal: dict) -> str:
+    """The most recent assistant text in the interrupted turn (what the agent was last saying)."""
+    for m in reversed((wal or {}).get("messages", []) or []):
+        c = m.get("content") if isinstance(m, dict) else None
+        if m.get("role") == "assistant" if isinstance(m, dict) else False:
+            if isinstance(c, str) and c.strip():       # assistant content is text; list-safe by construction
+                return c
+    return ""