PyPI - sliceagent - Versions diffs - 0.1.0__py3-none-any.whl - Mend

sliceagent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

sliceagent/__init__.py +3 -0
sliceagent/__main__.py +6 -0
sliceagent/access.py +93 -0
sliceagent/agents.py +173 -0
sliceagent/background_review.py +146 -0
sliceagent/binsniff.py +89 -0
sliceagent/cli.py +890 -0
sliceagent/clock.py +32 -0
sliceagent/code_grep.py +329 -0
sliceagent/code_index.py +417 -0
sliceagent/config.py +240 -0
sliceagent/context_overflow.py +227 -0
sliceagent/envspec.py +129 -0
sliceagent/errors.py +167 -0
sliceagent/events.py +96 -0
sliceagent/finding_types.py +70 -0
sliceagent/flags.py +63 -0
sliceagent/fuzzy.py +135 -0
sliceagent/guardrails.py +438 -0
sliceagent/guidance.py +69 -0
sliceagent/hippocampus.py +581 -0
sliceagent/hooks.py +334 -0
sliceagent/interfaces.py +144 -0
sliceagent/llm.py +695 -0
sliceagent/loop.py +548 -0
sliceagent/mcp_client.py +255 -0
sliceagent/mcp_security.py +77 -0
sliceagent/memory.py +428 -0
sliceagent/metrics.py +103 -0
sliceagent/model_catalog.py +124 -0
sliceagent/monitor.py +615 -0
sliceagent/neocortex.py +436 -0
sliceagent/onboarding.py +323 -0
sliceagent/oracle.py +36 -0
sliceagent/pagetable.py +255 -0
sliceagent/pfc.py +449 -0
sliceagent/plugins.py +127 -0
sliceagent/policy.py +234 -0
sliceagent/procman.py +187 -0
sliceagent/prompt.py +239 -0
sliceagent/records.py +108 -0
sliceagent/recovery.py +119 -0
sliceagent/regions.py +678 -0
sliceagent/registry.py +128 -0
sliceagent/retriever.py +19 -0
sliceagent/safety.py +332 -0
sliceagent/sandbox.py +143 -0
sliceagent/scheduler.py +92 -0
sliceagent/search_index.py +289 -0
sliceagent/seed.py +465 -0
sliceagent/sensory_cortex.py +500 -0
sliceagent/session.py +222 -0
sliceagent/skill_provenance.py +71 -0
sliceagent/skill_usage.py +123 -0
sliceagent/skills.py +209 -0
sliceagent/subagent.py +332 -0
sliceagent/subdir_hints.py +222 -0
sliceagent/swap.py +182 -0
sliceagent/taskstate.py +57 -0
sliceagent/telemetry.py +59 -0
sliceagent/terminal.py +240 -0
sliceagent/text_utils.py +56 -0
sliceagent/tool_summary.py +93 -0
sliceagent/tools.py +1194 -0
sliceagent/tui.py +1377 -0
sliceagent/web.py +354 -0
sliceagent-0.1.0.dist-info/METADATA +262 -0
sliceagent-0.1.0.dist-info/RECORD +71 -0
sliceagent-0.1.0.dist-info/WHEEL +4 -0
sliceagent-0.1.0.dist-info/entry_points.txt +2 -0
sliceagent-0.1.0.dist-info/licenses/LICENSE +21 -0

sliceagent/memory.py ADDED Viewed

@@ -0,0 +1,428 @@
+"""Memory implementations — the state VAULT (task resumability) that MememMemory/NullMemory share,
+plus the two brain-region MIXINS that give MememMemory its HIPPOCAMPUS (hippocampus.py) and
+NEOCORTEX (neocortex.py) behavior. This file owns only what's left once those two concerns are
+factored out: the skill-writer utilities (shared by /learn and consolidation), the task-state
+markdown (de)serialization, and `checkpoint_task`/`load_task`/`list_session_tasks` — task resume is
+neither episodic recall nor a distilled lesson, so it stays here rather than forcing it into either
+mixin.
+memem is the plug for cross-session lessons (via NeocortexMixin): its in-process hybrid retrieval
+feeds the RELEVANT MEMORY tier and `memory_save` stores lessons. memem stays behind the `Memory`
+interface — the moat never imports it — and we degrade to NullMemory when memem/its vault is absent.
+`is_durable` is the structural marker: NullMemory sets it False, so hosts skip cache/checkpoint
+wiring and evals stay deterministic. The vault root is decoupled from memem's STATE dir
+(`MEMEM_DIR` = db/logs) — the cache is sliceagent-owned (`SLICEAGENT_VAULT`).
+"""
+from __future__ import annotations
+import json
+import os
+import re
+import tempfile
+import threading
+from .hippocampus import HippocampusMixin
+from .interfaces import Snippet, TaskRef, TaskState
+from .neocortex import NeocortexMixin
+from .safety import redact_text, scan_for_threats   # persist-guards: block-on-write + redact-on-persist
+from .text_utils import now_iso as _now_iso
+def _write_atomic(path: str, text: str) -> None:
+    """#39: write text atomically (temp in the same dir + os.replace) so a crash mid-write can't corrupt
+    a task file or the session index — the original stays intact and the rename is atomic on POSIX."""
+    d = os.path.dirname(path) or "."
+    fd, tmp = tempfile.mkstemp(prefix=".tmp-", dir=d)
+    try:
+        with os.fdopen(fd, "w", encoding="utf-8") as f:
+            f.write(text)
+        os.replace(tmp, path)
+    except BaseException:
+        try:
+            os.unlink(tmp)
+        except OSError:
+            pass
+        raise
+def _safe_vault_id(x: str) -> str | None:
+    """A task_id / session_id is model- and user-controllable (switch_topic, /resume) and is joined into a
+    vault path, so reject anything that could traverse out (`..`, separators, nul). Returns the id or None."""
+    x = (x or "").strip()
+    if not x or not re.fullmatch(r"[A-Za-z0-9._-]+", x) or ".." in x:
+        return None
+    return x
+def _vault_root() -> str:
+    """sliceagent-owned vault root. Prefers a dedicated var; then a document-vault var; NEVER
+    MEMEM_DIR (that is memem's state/db dir, not a vault). Falls back to ~/.sliceagent/vault."""
+    for k in ("SLICEAGENT_VAULT", "SLICEAGENT_CACHE_DIR",
+              "MEMEM_OBSIDIAN_VAULT", "CORTEX_OBSIDIAN_VAULT", "MEMEM_VAULT", "CORTEX_VAULT"):
+        v = os.environ.get(k)
+        if v:
+            return os.path.expanduser(v)
+    return os.path.join(os.path.expanduser("~"), ".sliceagent", "vault")
+def _skills_dir() -> str:
+    """Where consolidation writes promoted-procedure SKILL.md packs — a dir the SkillManager scans
+    (default ~/.sliceagent/skills, so skills are discovered next session). SLICEAGENT_SKILLS_DIR overrides."""
+    return os.path.expanduser(os.environ.get("SLICEAGENT_SKILLS_DIR")
+                              or os.path.join("~", ".sliceagent", "skills"))
+def write_skill_file(name: str, body: str, *, skills_dir: str | None = None) -> str | None:
+    """Persist ONE SKILL.md to the skills dir, the single guarded writer shared by auto-consolidation
+    and the foreground /learn tool. Validates the frontmatter, BLOCKS on a threat scan (a poisoned skill
+    re-injects unscanned every session), REDACTS any secret before it lands on disk, and writes
+    atomically. Returns the path written, or None if rejected. Never raises."""
+    try:
+        name = re.sub(r"[^a-z0-9._-]+", "-", (name or "").strip().lower()).strip("-").strip(".")[:64] or "skill"  # strip(".") rejects '.'/'..' dir escape
+        if not body.lstrip().startswith("---") or "name:" not in body[:200]:
+            return None                                  # not a valid SKILL.md (frontmatter required)
+        if scan_for_threats(body, scope="strict"):       # (a) BLOCK on write — poisoned skill
+            return None
+        d = os.path.join(skills_dir or _skills_dir(), name)
+        os.makedirs(d, exist_ok=True)
+        path = os.path.join(d, "SKILL.md")
+        # _write_atomic uses a per-writer mkstemp temp (not a fixed `path + ".tmp"`), so two concurrent
+        # skill writes can't clobber each other's temp and corrupt SKILL.md — each rename is isolated.
+        _write_atomic(path, redact_text(body))           # (c) redact any secret before persisting
+        return path
+    except Exception:  # noqa: BLE001 — a skill-write failure must never break the caller
+        return None
+def make_write_skill_tool():
+    """The FOREGROUND skill writer (the tool /learn drives) — the agent-callable writer sliceagent lacked.
+    The agent supplies name/description/body; WE own the frontmatter (provenance: user — never
+    auto-pruned) and the guarded write (validate + threat-scan + redact + atomic), so a model can't forge
+    AUTO provenance or smuggle an unscanned skill onto disk."""
+    from .registry import ToolEntry
+    from .skill_provenance import USER, frontmatter_line
+    def handler(args: dict) -> str:
+        name = re.sub(r"[^a-z0-9._-]+", "-", (args.get("name") or "").strip().lower()).strip("-").strip(".")[:64]  # strip(".") rejects '.'/'..' dir escape
+        desc = (args.get("description") or "").strip().replace("\n", " ")[:120]
+        body = (args.get("body") or "").strip()
+        if not name or not desc or not body:
+            return "write_skill: need a name, a description, and a body."
+        md = f"---\nname: {name}\ndescription: {desc}\n{frontmatter_line(USER)}\n---\n\n{body}\n"
+        path = write_skill_file(name, md)
+        if not path:
+            return "write_skill: rejected (invalid frontmatter, empty, or flagged by the security scan)."
+        return f"Skill saved to {path} (provenance: user — it will load next session)."
+    schema = {"type": "function", "function": {
+        "name": "write_skill",
+        "description": ("Save a REUSABLE skill (SKILL.md) authored by you, so a FUTURE session can load and "
+                        "reuse it. Provide a lowercase-hyphenated `name`, a <=60-char `description` of the "
+                        "capability, and the markdown `body` (## When to use / ## Process / ## Pitfalls / "
+                        "## Verification). This is how /learn turns what you just did into a durable skill."),
+        "parameters": {"type": "object", "properties": {
+            "name": {"type": "string", "description": "lowercase-hyphenated skill name (no spaces)"},
+            "description": {"type": "string", "description": "one sentence, <=60 chars, the capability"},
+            "body": {"type": "string", "description": "the skill body markdown (sections as above)"},
+        }, "required": ["name", "description", "body"]}}}
+    return ToolEntry(name="write_skill", schema=schema, handler=handler, source="builtin")
+# --- task-state markdown (de)serialization — pure module fns (no memem) -------------------
+def _split_frontmatter(text: str) -> tuple[dict, str]:
+    """Parse a leading `---\\n...\\n---` block of flat `key: value` scalars; return (fm, body)."""
+    fm: dict = {}
+    if text.startswith("---"):
+        end = text.find("\n---", 3)
+        if end != -1:
+            for ln in text[3:end].strip("\n").splitlines():
+                if ":" in ln:
+                    k, v = ln.split(":", 1)
+                    fm[k.strip()] = v.strip().strip('"')
+            return fm, text[end + 4:].lstrip("\n")
+    return fm, text
+_BODY_HDR_ESC = "⁣"  # invisible separator: prefix a VERBATIM line that begins with '## ' so
+                          # _read_sections doesn't mistake it for a section header (model-written markdown
+                          # in goal/mission/last_error/resolution otherwise truncates/misroutes on resume).
+def _esc_body(t: str) -> str:
+    # escape a line that starts with '## ' OR already starts with the sentinel (so verbatim content that
+    # natively begins with the sentinel round-trips exactly — _unesc peels exactly one layer).
+    if not t or ("## " not in t and _BODY_HDR_ESC not in t):
+        return t
+    return "\n".join(_BODY_HDR_ESC + ln if (ln.startswith("## ") or ln.startswith(_BODY_HDR_ESC)) else ln
+                     for ln in t.split("\n"))
+def _unesc_body(t: str) -> str:
+    if not t or _BODY_HDR_ESC not in t:
+        return t
+    return "\n".join(ln[1:] if ln.startswith(_BODY_HDR_ESC) else ln for ln in t.split("\n"))
+def _safe_int(v, default: int = 0) -> int:
+    try:
+        return int(v)
+    except (TypeError, ValueError):
+        return default
+def _read_sections(body: str) -> dict:
+    """Split a body into {lower-header: verbatim text} by '## ' headers (preserves multi-line)."""
+    out, cur, buf = {}, None, []
+    for ln in body.splitlines():
+        if ln.startswith("## "):
+            if cur is not None:
+                out[cur] = "\n".join(buf).strip("\n")
+            cur, buf = ln[3:].strip().lower(), []
+        elif cur is not None:
+            buf.append(ln)
+    if cur is not None:
+        out[cur] = "\n".join(buf).strip("\n")
+    return out
+def _bullets(text: str) -> list[str]:
+    out = []
+    for ln in (text or "").splitlines():
+        s = ln.strip()
+        if s.startswith("- "):
+            out.append(s[2:].strip())
+    return out
+def _render_task_md(task: TaskState, *, created: str, updated: str) -> str:
+    # #37: frontmatter is one flat `key: value` per line — a newline in a value would spill onto a line
+    # the parser drops (truncating the value). Collapse newlines to spaces for the scalar fields.
+    def _fm(v):
+        return str(v).replace("\r", " ").replace("\n", " ")
+    fm = [
+        "---", "type: task-state", "v: 1",
+        f"session_id: {task.session_id}", f"task_id: {task.task_id}",
+        f"title: {_fm(task.title)}", f"status: {_fm(task.status)}",
+        f"created: {created}", f"updated: {updated}",
+        f"since_edit: {task.since_edit}",
+        f"links: {','.join(task.links)}", f"tags: {_fm(task.tags)}", "---",
+    ]
+    body = [
+        "## Goal", _esc_body(task.goal),
+        "## Findings", "\n".join(f"- {f}" for f in task.findings),
+        # provenance per finding (JSON bullet, like World) — else cross-session resume drops it and a
+        # 'claim'-tier finding silently reads back at the higher 'tool-note' trust tier.
+        "## Finding sources", "\n".join(f"- {json.dumps([k, v], ensure_ascii=False)}"
+                                        for k, v in task.finding_source.items()),
+        # carried slice tiers — JSON-per-bullet so dict items round-trip EXACTLY (no markdown-escape
+        # hazard). Without these, resuming a task silently dropped the standing contract / todo / north-
+        # star / world model (data loss). Mission is a single verbatim line like Status.
+        "## Requirements", "\n".join(f"- {json.dumps(r, ensure_ascii=False)}" for r in task.requirements),
+        "## Plan", "\n".join(f"- {json.dumps(p, ensure_ascii=False)}" for p in task.plan),
+        "## Mission", _esc_body(task.mission),
+        "## Open report", _esc_body(getattr(task, "open_report", "")),
+        "## World", "\n".join(f"- {json.dumps([k, v], ensure_ascii=False)}" for k, v in task.world.items()),
+        "## Working set", "\n".join(f"- {p}" for p in task.active_files),
+        "## Edited", "\n".join(f"- {p}" for p in sorted(task.edited_files)),
+        # anchor is TAB-separated (a path never contains TAB; anchors may contain ' :: ' etc.)
+        "## Anchors", "\n".join(f"- {p}\t{a}" for p, a in task.edit_anchor.items()),
+        "## Status", _esc_body(task.last_error),   # verbatim, may be empty/multi-line
+        "## Resolution", _esc_body(task.resolution),
+    ]
+    return "\n".join(fm) + "\n" + "\n".join(body) + "\n"
+def _parse_task_md(path: str) -> TaskState | None:
+    with open(path, encoding="utf-8") as f:
+        fm, body = _split_frontmatter(f.read())
+    sec = _read_sections(body)
+    anchors: dict = {}
+    for b in _bullets(sec.get("anchors", "")):
+        if "\t" in b:
+            p, a = b.split("\t", 1)
+            anchors[p.strip()] = a
+    def _json_bullets(key):
+        out = []
+        for b in _bullets(sec.get(key, "")):
+            b = b.strip()
+            if not b:
+                continue
+            try:
+                out.append(json.loads(b))
+            except Exception:  # a corrupt line must not break resume
+                pass
+        return out
+    world = {}
+    for kv in _json_bullets("world"):
+        if isinstance(kv, list) and len(kv) == 2 and isinstance(kv[0], str):   # non-str key is unhashable → skip the bullet, not the whole task
+            world[kv[0]] = kv[1]
+    return TaskState(
+        task_id=fm.get("task_id", ""), session_id=fm.get("session_id", ""),
+        title=fm.get("title", ""), status=fm.get("status", "active"),
+        goal=_unesc_body(sec.get("goal", "")),
+        findings=_bullets(sec.get("findings", "")),
+        finding_source={kv[0]: kv[1] for kv in _json_bullets("finding sources")
+                        if isinstance(kv, list) and len(kv) == 2 and isinstance(kv[0], str)},
+        requirements=[r for r in _json_bullets("requirements") if isinstance(r, dict)],
+        plan=[p for p in _json_bullets("plan") if isinstance(p, dict)],
+        mission=_unesc_body(sec.get("mission", "")),
+        open_report=_unesc_body(sec.get("open report", "")),
+        world=world,
+        active_files=_bullets(sec.get("working set", "")),
+        edited_files=_bullets(sec.get("edited", "")),
+        edit_anchor=anchors,
+        last_error=_unesc_body(sec.get("status", "")),
+        since_edit=_safe_int(fm.get("since_edit"), 0),   # corrupt counter → 0, don't abort the whole load
+        links=[x for x in fm.get("links", "").split(",") if x],
+        tags=fm.get("tags", ""),
+        resolution=_unesc_body(sec.get("resolution", "")),
+    )
+def _upsert_session_index(vault: str, task: TaskState, updated: str) -> None:
+    """Maintain ONE bounded index file per session (so list_session_tasks reads it, not a glob)."""
+    d = os.path.join(vault, "sessions")
+    os.makedirs(d, exist_ok=True)
+    path = os.path.join(d, f"{task.session_id}.md")
+    rows: dict = {}  # task_id -> row text (without leading "- ")
+    if os.path.exists(path):
+        with open(path, encoding="utf-8") as f:
+            _, body = _split_frontmatter(f.read())
+        for b in _bullets(_read_sections(body).get("tasks", "")):
+            rows[b.split(" · ", 1)[0].strip()] = b
+    title = redact_text((task.title or "").replace("\n", " "))   # model-derived → redact before persisting
+    # title LAST — but OMIT the trailing " · title" when empty, else _bullets strips the trailing field
+    # and the row parses to 3 parts and is silently dropped from the session index.
+    rows[task.task_id] = f"{task.task_id} · {task.status} · {updated}" + (f" · {title}" if title else "")
+    lines = ["---", "type: session", f"session_id: {task.session_id}", "---", "## Tasks"]
+    lines += [f"- {r}" for r in rows.values()]
+    _write_atomic(path, "\n".join(lines) + "\n")
+def _parse_session_index(path: str) -> list[TaskRef]:
+    with open(path, encoding="utf-8") as f:
+        _, body = _split_frontmatter(f.read())
+    out: list[TaskRef] = []
+    for b in _bullets(_read_sections(body).get("tasks", "")):
+        parts = b.split(" · ", 3)  # task_id · status · updated · title  (title optional / may contain ' · ')
+        if len(parts) >= 3:
+            tid, status, updated = parts[0], parts[1], parts[2]
+            title = parts[3] if len(parts) == 4 else ""
+            out.append(TaskRef(task_id=tid.strip(), title=title.strip(),
+                               status=status.strip(), updated=updated.strip()))
+    return out
+# --- implementations ----------------------------------------------------------------------
+class NullMemory:
+    """No durable memory (the default until a vault is configured). A TRUE no-op — every method is
+    inert (no I/O, no clock), so the eval path is deterministic and adds nothing to the slice."""
+    is_durable = False
+    def recall(self, query: str, k: int = 6, paths: list[str] | None = None) -> list[Snippet]:
+        return []
+    def remember(self, content: str, *, title: str = "", scope: str = "default", tags: str = "",
+                 paths: list[str] | None = None) -> None:
+        return None
+    def append_episode(self, session_id: str, task_id: str, turn: int, record: dict) -> None:
+        return None
+    def read_episodes(self, session_id: str, *, limit: int | None = None) -> list[dict]:
+        return []
+    def episode_manifest(self, session_id: str, k: int) -> tuple[list[dict], int]:
+        return [], 0
+    def search_episodes(self, query: str, *, limit: int = 5,
+                        exclude_session: str | None = None,
+                        only_session: str | None = None) -> list[dict]:
+        return []
+    def checkpoint_task(self, task: TaskState) -> None:
+        return None
+    def load_task(self, task_id: str) -> TaskState | None:
+        return None
+    def list_session_tasks(self, session_id: str) -> list[TaskRef]:
+        return []
+    def mark_used(self, memory_id: str) -> None:
+        return None
+    def consolidate(self, session_id: str, *, llm=None, mode: str = "deterministic") -> dict:
+        return {"lessons": 0, "skills": 0, "skills_rejected": 0, "errors": 0}
+    def close(self) -> None:
+        return None
+class MememMemory(HippocampusMixin, NeocortexMixin):
+    """Adapter over memem (lessons, via NeocortexMixin) + the on-disk episodic cache (via
+    HippocampusMixin) + the state vault (task resume, below). Construction fails fast if memem
+    isn't importable. The vault is sliceagent-owned (_vault_root), decoupled from memem's state dir."""
+    is_durable = True
+    def __init__(self) -> None:
+        import memem.retrieve  # noqa: F401  — fail fast if memem is absent
+        self._vault = _vault_root()
+        self._scope = os.path.basename(os.getcwd()) or "default"   # same-project soft bonus on recall
+        self._idx_lock = threading.Lock()   # serialize the lazy FTS-index open across parallel explorers
+    # --- task state / resume ---
+    def checkpoint_task(self, task: TaskState) -> None:
+        try:
+            d = os.path.join(self._vault, "tasks")
+            os.makedirs(d, exist_ok=True)
+            path = os.path.join(d, f"{task.task_id}.md")
+            created = _now_iso()
+            if os.path.exists(path):  # preserve the original created on update
+                with open(path, encoding="utf-8") as f:
+                    fm, _ = _split_frontmatter(f.read())
+                created = fm.get("created") or created
+            updated = _now_iso()
+            # redact the WHOLE rendered task state before it lands on disk — title/goal/findings/last_error/
+            # resolution/mission/world are all model/tool-derived and may carry secrets (mirrors the episodic
+            # cache redaction). Redact-the-output is future-proof: new fields are covered automatically.
+            _write_atomic(path, redact_text(_render_task_md(task, created=created, updated=updated)))
+            _upsert_session_index(self._vault, task, updated)
+        except Exception:
+            pass
+    def load_task(self, task_id: str) -> TaskState | None:
+        tid = _safe_vault_id(task_id)
+        if tid is None:
+            return None   # reject path-traversal in a model/user-controlled id
+        try:
+            path = os.path.join(self._vault, "tasks", f"{tid}.md")
+            return _parse_task_md(path) if os.path.exists(path) else None
+        except Exception:
+            return None
+    def list_session_tasks(self, session_id: str) -> list[TaskRef]:
+        sid = _safe_vault_id(session_id)
+        if sid is None:
+            return []
+        try:
+            path = os.path.join(self._vault, "sessions", f"{sid}.md")
+            return _parse_session_index(path) if os.path.exists(path) else []
+        except Exception:
+            return []
+def make_memory(prefer_memem: bool = True):
+    """Return MememMemory if memem is importable, else NullMemory (graceful)."""
+    if prefer_memem:
+        try:
+            return MememMemory()
+        except Exception:
+            pass
+    return NullMemory()

sliceagent/metrics.py ADDED Viewed

@@ -0,0 +1,103 @@
+"""Cost + reliability metrics — the moat-MEASURING observer, expressed for the slice thesis.
+The project's whole bet is that per-turn cost stays FLAT as the conversation grows (the slice rebuilds a
+bounded seed each turn) while a transcript agent's climbs linearly. That bet is only credible if it's a
+NUMBER. This sink makes it one: the headline signal is `per_turn_fresh` — the FRESH (non-cache-read) input
+tokens per turn — which should stay flat for sliceagent and climb for a log-based agent.
+Pure OBSERVER, like its sibling `Telemetry`: consumes the loop's events, accumulates counters, emits nothing,
+mutates no slice — completely off the moat. It reads the TYPED usage breakdown the llm adapter now produces
+(`input_other`/`input_cache_read`/`input_cache_creation`/`output`, from llm._usage_dict). Per-step usage is
+accumulated from StepEnd; TurnEnd snapshots the per-turn fresh-input total and resets — so no double-counting
+with TurnEnd's cumulative `total`. Wire it into a dispatcher alongside slice_sink/telemetry and read
+`.summary()` afterward; `record_error(kind)` folds in the llm error buckets from errors.classify().
+"""
+from __future__ import annotations
+from .events import (ApiRetry, Event, SliceTightened, StepEnd, ToolResult, TurnEnd,
+                     TurnInterrupted)
+class CostMetrics:
+    """Callable event sink accumulating cost + reliability metrics. Read `.summary()` after a run."""
+    def __init__(self) -> None:
+        self.turns = 0
+        self.steps = 0
+        self.input_other = 0          # FRESH (non-cache-read) input tokens — the real cost driver
+        self.input_cache_read = 0     # input served from the provider prompt cache (~0.1x price)
+        self.input_cache_creation = 0
+        self.output = 0
+        self.per_turn_fresh: list[int] = []   # input_other per TurnEnd — THE moat curve (flat vs climbing)
+        self.tool_calls = 0
+        self.tool_failures = 0
+        self.retries = 0
+        self.overflows = 0
+        self.errors: dict[str, int] = {}      # classify() kind -> count
+        self._turn_fresh = 0                  # accumulator for the in-progress turn
+    def __call__(self, e: Event) -> None:
+        if isinstance(e, StepEnd):
+            self.steps += 1
+            self._add(e.usage)
+        elif isinstance(e, (TurnEnd, TurnInterrupted)):
+            # #56: snapshot + reset on BOTH clean and PARKED turn-ends. Without TurnInterrupted, a parked
+            # turn's fresh tokens were dropped from the moat curve AND its accumulator bled into the next
+            # turn (double-count); turns/per_turn_fresh undercounted on every interruption.
+            self.turns += 1
+            self.per_turn_fresh.append(self._turn_fresh)
+            self._turn_fresh = 0
+            if isinstance(e, TurnInterrupted):
+                self.errors[f"park:{e.reason}"] = self.errors.get(f"park:{e.reason}", 0) + 1
+        elif isinstance(e, ToolResult):
+            self.tool_calls += 1
+            if e.failing:
+                self.tool_failures += 1
+        elif isinstance(e, ApiRetry):
+            self.retries += 1
+        elif isinstance(e, SliceTightened):
+            self.overflows += 1
+    def _add(self, usage: dict | None) -> None:
+        if not usage:
+            return
+        fresh = usage.get("input_other", 0) or 0
+        self.input_other += fresh
+        self._turn_fresh += fresh
+        self.input_cache_read += usage.get("input_cache_read", 0) or 0
+        self.input_cache_creation += usage.get("input_cache_creation", 0) or 0
+        # output: prefer the typed key, fall back to the legacy one (older usage dicts)
+        self.output += usage.get("output", usage.get("completion_tokens", 0)) or 0
+    def record_error(self, kind: str) -> None:
+        """Fold an llm error bucket (errors.classify()['kind']) into the failure histogram. Called by the
+        host's retry/closeout path; the loop itself stays observer-only."""
+        if kind:
+            self.errors[kind] = self.errors.get(kind, 0) + 1
+    def summary(self) -> dict:
+        input_total = self.input_other + self.input_cache_read + self.input_cache_creation
+        hit = round(self.input_cache_read / input_total, 3) if input_total else 0.0
+        ptf = self.per_turn_fresh
+        return {
+            "turns": self.turns,
+            "steps": self.steps,
+            "input_other": self.input_other,
+            "input_cache_read": self.input_cache_read,
+            "input_cache_creation": self.input_cache_creation,
+            "output": self.output,
+            "cache_hit_rate": hit,                                   # cache-read / total input
+            "per_turn_fresh": list(ptf),                            # the moat curve
+            "avg_turn_fresh": round(sum(ptf) / len(ptf), 1) if ptf else 0.0,
+            "peak_turn_fresh": max(ptf) if ptf else 0,
+            "tool_calls": self.tool_calls,
+            "tool_failures": self.tool_failures,
+            "retries": self.retries,
+            "overflows": self.overflows,
+            "errors": dict(self.errors),
+        }
+def make_metrics_sink() -> CostMetrics:
+    """A CostMetrics instance IS the sink (callable) AND carries the counters to read afterward."""
+    return CostMetrics()

sliceagent/model_catalog.py ADDED Viewed

@@ -0,0 +1,124 @@
+"""Model capability catalog.
+Maps a model name (+ base URL) to its capabilities and wire quirks so provider-specific knowledge lives
+in ONE place instead of scattered `startswith` checks. Pattern-matched with a safe UNKNOWN default. Pure
+data + lookup; the llm adapter consults it (it is the source of truth for the tokens-param rename and the
+reasoning_effort capability — previously duplicated inline in llm.py).
+context_window is left 0 (unknown) unless genuinely known — sliceagent's overflow is reactive, so no caller
+relies on a fabricated number; the field is informational for any future context-window-aware feature.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+@dataclass(frozen=True)
+class ModelCapability:
+    family: str = "unknown"
+    # OpenAI gpt-5 / o-series renamed the completion cap to `max_completion_tokens` and REJECT `max_tokens`.
+    tokens_param: str = "max_tokens"
+    # accepts the OpenAI `reasoning_effort` param (gpt-5 / o-series). NOT deepseek (uses extra_body.thinking)
+    # nor moonshot/anthropic — those map "fast" to their own knobs in llm._reasoning_kwargs.
+    supports_reasoning_effort: bool = False
+    supports_tools: bool = True
+    supports_stream_options: bool = True   # OpenAI stream_options={include_usage}; set False if a provider 400s
+    supports_vision: bool = False    # accepts image content parts (multimodal); gates @image attachment
+    context_window: int = 0          # 0 = unknown (no fabricated values)
+_UNKNOWN = ModelCapability()
+# USD per 1M tokens: (input_fresh, input_cached, output). SINGLE SOURCE for the cost meter — keyed by a
+# name/family substring, first match wins. Update HERE when a provider changes pricing. (Context windows stay
+# 0/unknown by design: sliceagent's overflow is reactive, so nothing fabricates a window — see ModelCapability.)
+_PRICES = {
+    "gpt-5": (1.25, 0.125, 10.0), "gpt-4": (2.50, 1.25, 10.0), "o3": (2.0, 0.5, 8.0),
+    "deepseek": (0.27, 0.07, 1.10), "kimi": (0.60, 0.15, 2.50), "moonshot": (0.60, 0.15, 2.50),
+    "claude": (3.0, 0.30, 15.0),
+}
+def pricing(model: str, base_url: str = "") -> "tuple | None":
+    """USD/1M (input, cached_input, output) for a model, or None if unknown. The cost meter's single source."""
+    s = (model or "").lower() + " " + (base_url or "").lower()
+    for k, v in _PRICES.items():
+        if k in s:
+            return v
+    return None
+# Vision is keyed off the MODEL name (not the family) — kimi-k2.7-code is text-only but moonshot-*-vision is
+# not; gpt-4o/gpt-5/claude-3+/gemini/`*-vl`/anything with 'vision' is multimodal. Conservative allowlist.
+_VISION_HINTS = ("vision", "gpt-4o", "gpt-4.1", "gpt-5", "gpt-6", "claude-3", "claude-4",
+                 "claude-opus", "claude-sonnet", "gemini", "-vl", "qwen-vl")
+def _is_openai_endpoint(base_url: str) -> bool:
+    """True only when `base_url` is OpenAI's real API — the default (unset → the SDK's own default) or an
+    explicit api.openai.com. reasoning_effort + the /v1/responses route are OpenAI-ONLY wire features; a
+    model literally NAMED "gpt-5.5"/"o3" served by a DIFFERENT endpoint (DeepSeek, Moonshot, a local proxy —
+    /model only switches the model string, never the endpoint) does NOT speak that protocol. Routing to
+    /v1/responses there 404s (openai.NotFoundError — the route doesn't exist on that server), which used to
+    surface as a cryptic 'internal error ended the turn'; gating on the endpoint keeps it on the universal
+    chat/completions path instead — degrade gracefully, never assume a wire feature from the name alone."""
+    b = (base_url or "").strip().lower()
+    return b == "" or "api.openai.com" in b
+# name substrings -> the ONE provider that actually serves that model. `/model` only switches the model
+# STRING, never the endpoint (that's `config --use`), so this is the general "will this even resolve"
+# check — broader than capability()'s narrower reasoning-effort gate.
+_NAME_HOME = (
+    (("o1", "o2", "o3", "o4", "o5", "o6", "gpt-3", "gpt-4", "gpt-5", "gpt-6"), "openai"),
+    (("deepseek",), "deepseek"),
+    (("kimi", "moonshot"), "moonshot"),
+    (("claude",), "anthropic"),
+)
+# base_url substring -> the ONE provider that endpoint actually is. An UNMATCHED base_url (custom domain,
+# a local proxy/router) is deliberately left unresolved — such a proxy can legitimately re-route ANY model
+# name to any backend, so warning there would be a false positive (same safe-UNKNOWN posture as capability()).
+_ENDPOINT_HOME = (
+    (("api.openai.com",), "openai"),
+    (("deepseek.com",), "deepseek"),
+    (("moonshot.cn",), "moonshot"),
+    (("anthropic.com",), "anthropic"),
+)
+def _home(s: str, table: tuple) -> "str | None":
+    # each entry is (tuple-of-substrings, home) — NOT a single bare string, else `for k in keys` iterates
+    # individual CHARACTERS and matches almost anything (caught by a test: deepseek.com false-matched "openai").
+    for keys, home in table:
+        if any(k in s for k in keys):
+            return home
+    return None
+def likely_endpoint_mismatch(model: str, base_url: str) -> "str | None":
+    """The model's own home provider, IF it's a well-known name (gpt-*/deepseek/kimi/claude) about to be
+    sent to a DIFFERENT well-known endpoint — e.g. 'gpt-5.5' while still connected to DeepSeek. Returns None
+    (never warn) when either side is unrecognized: a custom/proxy endpoint may legitimately serve any name,
+    so a false-positive warning there is worse than a missed one."""
+    m, b = (model or "").lower(), (base_url or "").strip().lower()
+    model_home = _home(m, _NAME_HOME)
+    endpoint_home = "openai" if _is_openai_endpoint(b) else _home(b, _ENDPOINT_HOME)
+    return model_home if (model_home and endpoint_home and model_home != endpoint_home) else None
+def capability(model: str, base_url: str = "") -> ModelCapability:
+    """Resolve the capability record for a model (first matching rule wins; specific before general)."""
+    m = (model or "").lower()
+    b = (base_url or "").lower()
+    vis = any(h in m for h in _VISION_HINTS)
+    if m.startswith(("o1", "o3", "o4", "o5", "o6", "gpt-5", "gpt-6")) and _is_openai_endpoint(b):
+        return ModelCapability("openai-reasoning", tokens_param="max_completion_tokens",
+                               supports_reasoning_effort=True, supports_vision=vis)
+    if "deepseek" in m or "deepseek" in b:
+        return ModelCapability("deepseek", supports_vision=vis)   # reasoning via extra_body.thinking
+    if "kimi" in m or "moonshot" in b:
+        return ModelCapability("moonshot", supports_vision=vis)
+    if "claude" in m or "anthropic" in b:
+        return ModelCapability("anthropic", supports_vision=vis)
+    if m.startswith("gpt-") or "openai" in b:
+        return ModelCapability("openai", supports_vision=vis)
+    return ModelCapability(supports_vision=vis)