sliceagent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. sliceagent/__init__.py +3 -0
  2. sliceagent/__main__.py +6 -0
  3. sliceagent/access.py +93 -0
  4. sliceagent/agents.py +173 -0
  5. sliceagent/background_review.py +146 -0
  6. sliceagent/binsniff.py +89 -0
  7. sliceagent/cli.py +890 -0
  8. sliceagent/clock.py +32 -0
  9. sliceagent/code_grep.py +329 -0
  10. sliceagent/code_index.py +417 -0
  11. sliceagent/config.py +240 -0
  12. sliceagent/context_overflow.py +227 -0
  13. sliceagent/envspec.py +129 -0
  14. sliceagent/errors.py +167 -0
  15. sliceagent/events.py +96 -0
  16. sliceagent/finding_types.py +70 -0
  17. sliceagent/flags.py +63 -0
  18. sliceagent/fuzzy.py +135 -0
  19. sliceagent/guardrails.py +438 -0
  20. sliceagent/guidance.py +69 -0
  21. sliceagent/hippocampus.py +581 -0
  22. sliceagent/hooks.py +334 -0
  23. sliceagent/interfaces.py +144 -0
  24. sliceagent/llm.py +695 -0
  25. sliceagent/loop.py +548 -0
  26. sliceagent/mcp_client.py +255 -0
  27. sliceagent/mcp_security.py +77 -0
  28. sliceagent/memory.py +428 -0
  29. sliceagent/metrics.py +103 -0
  30. sliceagent/model_catalog.py +124 -0
  31. sliceagent/monitor.py +615 -0
  32. sliceagent/neocortex.py +436 -0
  33. sliceagent/onboarding.py +323 -0
  34. sliceagent/oracle.py +36 -0
  35. sliceagent/pagetable.py +255 -0
  36. sliceagent/pfc.py +449 -0
  37. sliceagent/plugins.py +127 -0
  38. sliceagent/policy.py +234 -0
  39. sliceagent/procman.py +187 -0
  40. sliceagent/prompt.py +239 -0
  41. sliceagent/records.py +108 -0
  42. sliceagent/recovery.py +119 -0
  43. sliceagent/regions.py +678 -0
  44. sliceagent/registry.py +128 -0
  45. sliceagent/retriever.py +19 -0
  46. sliceagent/safety.py +332 -0
  47. sliceagent/sandbox.py +143 -0
  48. sliceagent/scheduler.py +92 -0
  49. sliceagent/search_index.py +289 -0
  50. sliceagent/seed.py +465 -0
  51. sliceagent/sensory_cortex.py +500 -0
  52. sliceagent/session.py +222 -0
  53. sliceagent/skill_provenance.py +71 -0
  54. sliceagent/skill_usage.py +123 -0
  55. sliceagent/skills.py +209 -0
  56. sliceagent/subagent.py +332 -0
  57. sliceagent/subdir_hints.py +222 -0
  58. sliceagent/swap.py +182 -0
  59. sliceagent/taskstate.py +57 -0
  60. sliceagent/telemetry.py +59 -0
  61. sliceagent/terminal.py +240 -0
  62. sliceagent/text_utils.py +56 -0
  63. sliceagent/tool_summary.py +93 -0
  64. sliceagent/tools.py +1194 -0
  65. sliceagent/tui.py +1377 -0
  66. sliceagent/web.py +354 -0
  67. sliceagent-0.1.0.dist-info/METADATA +262 -0
  68. sliceagent-0.1.0.dist-info/RECORD +71 -0
  69. sliceagent-0.1.0.dist-info/WHEEL +4 -0
  70. sliceagent-0.1.0.dist-info/entry_points.txt +2 -0
  71. sliceagent-0.1.0.dist-info/licenses/LICENSE +21 -0
sliceagent/memory.py ADDED
@@ -0,0 +1,428 @@
1
+ """Memory implementations — the state VAULT (task resumability) that MememMemory/NullMemory share,
2
+ plus the two brain-region MIXINS that give MememMemory its HIPPOCAMPUS (hippocampus.py) and
3
+ NEOCORTEX (neocortex.py) behavior. This file owns only what's left once those two concerns are
4
+ factored out: the skill-writer utilities (shared by /learn and consolidation), the task-state
5
+ markdown (de)serialization, and `checkpoint_task`/`load_task`/`list_session_tasks` — task resume is
6
+ neither episodic recall nor a distilled lesson, so it stays here rather than forcing it into either
7
+ mixin.
8
+
9
+ memem is the plug for cross-session lessons (via NeocortexMixin): its in-process hybrid retrieval
10
+ feeds the RELEVANT MEMORY tier and `memory_save` stores lessons. memem stays behind the `Memory`
11
+ interface — the moat never imports it — and we degrade to NullMemory when memem/its vault is absent.
12
+
13
+ `is_durable` is the structural marker: NullMemory sets it False, so hosts skip cache/checkpoint
14
+ wiring and evals stay deterministic. The vault root is decoupled from memem's STATE dir
15
+ (`MEMEM_DIR` = db/logs) — the cache is sliceagent-owned (`SLICEAGENT_VAULT`).
16
+ """
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ import os
21
+ import re
22
+ import tempfile
23
+ import threading
24
+
25
+ from .hippocampus import HippocampusMixin
26
+ from .interfaces import Snippet, TaskRef, TaskState
27
+ from .neocortex import NeocortexMixin
28
+ from .safety import redact_text, scan_for_threats # persist-guards: block-on-write + redact-on-persist
29
+ from .text_utils import now_iso as _now_iso
30
+
31
+
32
+ def _write_atomic(path: str, text: str) -> None:
33
+ """#39: write text atomically (temp in the same dir + os.replace) so a crash mid-write can't corrupt
34
+ a task file or the session index — the original stays intact and the rename is atomic on POSIX."""
35
+ d = os.path.dirname(path) or "."
36
+ fd, tmp = tempfile.mkstemp(prefix=".tmp-", dir=d)
37
+ try:
38
+ with os.fdopen(fd, "w", encoding="utf-8") as f:
39
+ f.write(text)
40
+ os.replace(tmp, path)
41
+ except BaseException:
42
+ try:
43
+ os.unlink(tmp)
44
+ except OSError:
45
+ pass
46
+ raise
47
+
48
+
49
+ def _safe_vault_id(x: str) -> str | None:
50
+ """A task_id / session_id is model- and user-controllable (switch_topic, /resume) and is joined into a
51
+ vault path, so reject anything that could traverse out (`..`, separators, nul). Returns the id or None."""
52
+ x = (x or "").strip()
53
+ if not x or not re.fullmatch(r"[A-Za-z0-9._-]+", x) or ".." in x:
54
+ return None
55
+ return x
56
+
57
+
58
+ def _vault_root() -> str:
59
+ """sliceagent-owned vault root. Prefers a dedicated var; then a document-vault var; NEVER
60
+ MEMEM_DIR (that is memem's state/db dir, not a vault). Falls back to ~/.sliceagent/vault."""
61
+ for k in ("SLICEAGENT_VAULT", "SLICEAGENT_CACHE_DIR",
62
+ "MEMEM_OBSIDIAN_VAULT", "CORTEX_OBSIDIAN_VAULT", "MEMEM_VAULT", "CORTEX_VAULT"):
63
+ v = os.environ.get(k)
64
+ if v:
65
+ return os.path.expanduser(v)
66
+ return os.path.join(os.path.expanduser("~"), ".sliceagent", "vault")
67
+
68
+
69
+ def _skills_dir() -> str:
70
+ """Where consolidation writes promoted-procedure SKILL.md packs — a dir the SkillManager scans
71
+ (default ~/.sliceagent/skills, so skills are discovered next session). SLICEAGENT_SKILLS_DIR overrides."""
72
+ return os.path.expanduser(os.environ.get("SLICEAGENT_SKILLS_DIR")
73
+ or os.path.join("~", ".sliceagent", "skills"))
74
+
75
+
76
+ def write_skill_file(name: str, body: str, *, skills_dir: str | None = None) -> str | None:
77
+ """Persist ONE SKILL.md to the skills dir, the single guarded writer shared by auto-consolidation
78
+ and the foreground /learn tool. Validates the frontmatter, BLOCKS on a threat scan (a poisoned skill
79
+ re-injects unscanned every session), REDACTS any secret before it lands on disk, and writes
80
+ atomically. Returns the path written, or None if rejected. Never raises."""
81
+ try:
82
+ name = re.sub(r"[^a-z0-9._-]+", "-", (name or "").strip().lower()).strip("-").strip(".")[:64] or "skill" # strip(".") rejects '.'/'..' dir escape
83
+ if not body.lstrip().startswith("---") or "name:" not in body[:200]:
84
+ return None # not a valid SKILL.md (frontmatter required)
85
+ if scan_for_threats(body, scope="strict"): # (a) BLOCK on write — poisoned skill
86
+ return None
87
+ d = os.path.join(skills_dir or _skills_dir(), name)
88
+ os.makedirs(d, exist_ok=True)
89
+ path = os.path.join(d, "SKILL.md")
90
+ # _write_atomic uses a per-writer mkstemp temp (not a fixed `path + ".tmp"`), so two concurrent
91
+ # skill writes can't clobber each other's temp and corrupt SKILL.md — each rename is isolated.
92
+ _write_atomic(path, redact_text(body)) # (c) redact any secret before persisting
93
+ return path
94
+ except Exception: # noqa: BLE001 — a skill-write failure must never break the caller
95
+ return None
96
+
97
+
98
+ def make_write_skill_tool():
99
+ """The FOREGROUND skill writer (the tool /learn drives) — the agent-callable writer sliceagent lacked.
100
+ The agent supplies name/description/body; WE own the frontmatter (provenance: user — never
101
+ auto-pruned) and the guarded write (validate + threat-scan + redact + atomic), so a model can't forge
102
+ AUTO provenance or smuggle an unscanned skill onto disk."""
103
+ from .registry import ToolEntry
104
+ from .skill_provenance import USER, frontmatter_line
105
+
106
+ def handler(args: dict) -> str:
107
+ name = re.sub(r"[^a-z0-9._-]+", "-", (args.get("name") or "").strip().lower()).strip("-").strip(".")[:64] # strip(".") rejects '.'/'..' dir escape
108
+ desc = (args.get("description") or "").strip().replace("\n", " ")[:120]
109
+ body = (args.get("body") or "").strip()
110
+ if not name or not desc or not body:
111
+ return "write_skill: need a name, a description, and a body."
112
+ md = f"---\nname: {name}\ndescription: {desc}\n{frontmatter_line(USER)}\n---\n\n{body}\n"
113
+ path = write_skill_file(name, md)
114
+ if not path:
115
+ return "write_skill: rejected (invalid frontmatter, empty, or flagged by the security scan)."
116
+ return f"Skill saved to {path} (provenance: user — it will load next session)."
117
+
118
+ schema = {"type": "function", "function": {
119
+ "name": "write_skill",
120
+ "description": ("Save a REUSABLE skill (SKILL.md) authored by you, so a FUTURE session can load and "
121
+ "reuse it. Provide a lowercase-hyphenated `name`, a <=60-char `description` of the "
122
+ "capability, and the markdown `body` (## When to use / ## Process / ## Pitfalls / "
123
+ "## Verification). This is how /learn turns what you just did into a durable skill."),
124
+ "parameters": {"type": "object", "properties": {
125
+ "name": {"type": "string", "description": "lowercase-hyphenated skill name (no spaces)"},
126
+ "description": {"type": "string", "description": "one sentence, <=60 chars, the capability"},
127
+ "body": {"type": "string", "description": "the skill body markdown (sections as above)"},
128
+ }, "required": ["name", "description", "body"]}}}
129
+ return ToolEntry(name="write_skill", schema=schema, handler=handler, source="builtin")
130
+
131
+
132
+ # --- task-state markdown (de)serialization — pure module fns (no memem) -------------------
133
+
134
+ def _split_frontmatter(text: str) -> tuple[dict, str]:
135
+ """Parse a leading `---\\n...\\n---` block of flat `key: value` scalars; return (fm, body)."""
136
+ fm: dict = {}
137
+ if text.startswith("---"):
138
+ end = text.find("\n---", 3)
139
+ if end != -1:
140
+ for ln in text[3:end].strip("\n").splitlines():
141
+ if ":" in ln:
142
+ k, v = ln.split(":", 1)
143
+ fm[k.strip()] = v.strip().strip('"')
144
+ return fm, text[end + 4:].lstrip("\n")
145
+ return fm, text
146
+
147
+
148
+ _BODY_HDR_ESC = "⁣" # invisible separator: prefix a VERBATIM line that begins with '## ' so
149
+ # _read_sections doesn't mistake it for a section header (model-written markdown
150
+ # in goal/mission/last_error/resolution otherwise truncates/misroutes on resume).
151
+
152
+
153
+ def _esc_body(t: str) -> str:
154
+ # escape a line that starts with '## ' OR already starts with the sentinel (so verbatim content that
155
+ # natively begins with the sentinel round-trips exactly — _unesc peels exactly one layer).
156
+ if not t or ("## " not in t and _BODY_HDR_ESC not in t):
157
+ return t
158
+ return "\n".join(_BODY_HDR_ESC + ln if (ln.startswith("## ") or ln.startswith(_BODY_HDR_ESC)) else ln
159
+ for ln in t.split("\n"))
160
+
161
+
162
+ def _unesc_body(t: str) -> str:
163
+ if not t or _BODY_HDR_ESC not in t:
164
+ return t
165
+ return "\n".join(ln[1:] if ln.startswith(_BODY_HDR_ESC) else ln for ln in t.split("\n"))
166
+
167
+
168
+ def _safe_int(v, default: int = 0) -> int:
169
+ try:
170
+ return int(v)
171
+ except (TypeError, ValueError):
172
+ return default
173
+
174
+
175
+ def _read_sections(body: str) -> dict:
176
+ """Split a body into {lower-header: verbatim text} by '## ' headers (preserves multi-line)."""
177
+ out, cur, buf = {}, None, []
178
+ for ln in body.splitlines():
179
+ if ln.startswith("## "):
180
+ if cur is not None:
181
+ out[cur] = "\n".join(buf).strip("\n")
182
+ cur, buf = ln[3:].strip().lower(), []
183
+ elif cur is not None:
184
+ buf.append(ln)
185
+ if cur is not None:
186
+ out[cur] = "\n".join(buf).strip("\n")
187
+ return out
188
+
189
+
190
+ def _bullets(text: str) -> list[str]:
191
+ out = []
192
+ for ln in (text or "").splitlines():
193
+ s = ln.strip()
194
+ if s.startswith("- "):
195
+ out.append(s[2:].strip())
196
+ return out
197
+
198
+
199
+ def _render_task_md(task: TaskState, *, created: str, updated: str) -> str:
200
+ # #37: frontmatter is one flat `key: value` per line — a newline in a value would spill onto a line
201
+ # the parser drops (truncating the value). Collapse newlines to spaces for the scalar fields.
202
+ def _fm(v):
203
+ return str(v).replace("\r", " ").replace("\n", " ")
204
+ fm = [
205
+ "---", "type: task-state", "v: 1",
206
+ f"session_id: {task.session_id}", f"task_id: {task.task_id}",
207
+ f"title: {_fm(task.title)}", f"status: {_fm(task.status)}",
208
+ f"created: {created}", f"updated: {updated}",
209
+ f"since_edit: {task.since_edit}",
210
+ f"links: {','.join(task.links)}", f"tags: {_fm(task.tags)}", "---",
211
+ ]
212
+ body = [
213
+ "## Goal", _esc_body(task.goal),
214
+ "## Findings", "\n".join(f"- {f}" for f in task.findings),
215
+ # provenance per finding (JSON bullet, like World) — else cross-session resume drops it and a
216
+ # 'claim'-tier finding silently reads back at the higher 'tool-note' trust tier.
217
+ "## Finding sources", "\n".join(f"- {json.dumps([k, v], ensure_ascii=False)}"
218
+ for k, v in task.finding_source.items()),
219
+ # carried slice tiers — JSON-per-bullet so dict items round-trip EXACTLY (no markdown-escape
220
+ # hazard). Without these, resuming a task silently dropped the standing contract / todo / north-
221
+ # star / world model (data loss). Mission is a single verbatim line like Status.
222
+ "## Requirements", "\n".join(f"- {json.dumps(r, ensure_ascii=False)}" for r in task.requirements),
223
+ "## Plan", "\n".join(f"- {json.dumps(p, ensure_ascii=False)}" for p in task.plan),
224
+ "## Mission", _esc_body(task.mission),
225
+ "## Open report", _esc_body(getattr(task, "open_report", "")),
226
+ "## World", "\n".join(f"- {json.dumps([k, v], ensure_ascii=False)}" for k, v in task.world.items()),
227
+ "## Working set", "\n".join(f"- {p}" for p in task.active_files),
228
+ "## Edited", "\n".join(f"- {p}" for p in sorted(task.edited_files)),
229
+ # anchor is TAB-separated (a path never contains TAB; anchors may contain ' :: ' etc.)
230
+ "## Anchors", "\n".join(f"- {p}\t{a}" for p, a in task.edit_anchor.items()),
231
+ "## Status", _esc_body(task.last_error), # verbatim, may be empty/multi-line
232
+ "## Resolution", _esc_body(task.resolution),
233
+ ]
234
+ return "\n".join(fm) + "\n" + "\n".join(body) + "\n"
235
+
236
+
237
+ def _parse_task_md(path: str) -> TaskState | None:
238
+ with open(path, encoding="utf-8") as f:
239
+ fm, body = _split_frontmatter(f.read())
240
+ sec = _read_sections(body)
241
+ anchors: dict = {}
242
+ for b in _bullets(sec.get("anchors", "")):
243
+ if "\t" in b:
244
+ p, a = b.split("\t", 1)
245
+ anchors[p.strip()] = a
246
+ def _json_bullets(key):
247
+ out = []
248
+ for b in _bullets(sec.get(key, "")):
249
+ b = b.strip()
250
+ if not b:
251
+ continue
252
+ try:
253
+ out.append(json.loads(b))
254
+ except Exception: # a corrupt line must not break resume
255
+ pass
256
+ return out
257
+
258
+ world = {}
259
+ for kv in _json_bullets("world"):
260
+ if isinstance(kv, list) and len(kv) == 2 and isinstance(kv[0], str): # non-str key is unhashable → skip the bullet, not the whole task
261
+ world[kv[0]] = kv[1]
262
+ return TaskState(
263
+ task_id=fm.get("task_id", ""), session_id=fm.get("session_id", ""),
264
+ title=fm.get("title", ""), status=fm.get("status", "active"),
265
+ goal=_unesc_body(sec.get("goal", "")),
266
+ findings=_bullets(sec.get("findings", "")),
267
+ finding_source={kv[0]: kv[1] for kv in _json_bullets("finding sources")
268
+ if isinstance(kv, list) and len(kv) == 2 and isinstance(kv[0], str)},
269
+ requirements=[r for r in _json_bullets("requirements") if isinstance(r, dict)],
270
+ plan=[p for p in _json_bullets("plan") if isinstance(p, dict)],
271
+ mission=_unesc_body(sec.get("mission", "")),
272
+ open_report=_unesc_body(sec.get("open report", "")),
273
+ world=world,
274
+ active_files=_bullets(sec.get("working set", "")),
275
+ edited_files=_bullets(sec.get("edited", "")),
276
+ edit_anchor=anchors,
277
+ last_error=_unesc_body(sec.get("status", "")),
278
+ since_edit=_safe_int(fm.get("since_edit"), 0), # corrupt counter → 0, don't abort the whole load
279
+ links=[x for x in fm.get("links", "").split(",") if x],
280
+ tags=fm.get("tags", ""),
281
+ resolution=_unesc_body(sec.get("resolution", "")),
282
+ )
283
+
284
+
285
+ def _upsert_session_index(vault: str, task: TaskState, updated: str) -> None:
286
+ """Maintain ONE bounded index file per session (so list_session_tasks reads it, not a glob)."""
287
+ d = os.path.join(vault, "sessions")
288
+ os.makedirs(d, exist_ok=True)
289
+ path = os.path.join(d, f"{task.session_id}.md")
290
+ rows: dict = {} # task_id -> row text (without leading "- ")
291
+ if os.path.exists(path):
292
+ with open(path, encoding="utf-8") as f:
293
+ _, body = _split_frontmatter(f.read())
294
+ for b in _bullets(_read_sections(body).get("tasks", "")):
295
+ rows[b.split(" · ", 1)[0].strip()] = b
296
+ title = redact_text((task.title or "").replace("\n", " ")) # model-derived → redact before persisting
297
+ # title LAST — but OMIT the trailing " · title" when empty, else _bullets strips the trailing field
298
+ # and the row parses to 3 parts and is silently dropped from the session index.
299
+ rows[task.task_id] = f"{task.task_id} · {task.status} · {updated}" + (f" · {title}" if title else "")
300
+ lines = ["---", "type: session", f"session_id: {task.session_id}", "---", "## Tasks"]
301
+ lines += [f"- {r}" for r in rows.values()]
302
+ _write_atomic(path, "\n".join(lines) + "\n")
303
+
304
+
305
+ def _parse_session_index(path: str) -> list[TaskRef]:
306
+ with open(path, encoding="utf-8") as f:
307
+ _, body = _split_frontmatter(f.read())
308
+ out: list[TaskRef] = []
309
+ for b in _bullets(_read_sections(body).get("tasks", "")):
310
+ parts = b.split(" · ", 3) # task_id · status · updated · title (title optional / may contain ' · ')
311
+ if len(parts) >= 3:
312
+ tid, status, updated = parts[0], parts[1], parts[2]
313
+ title = parts[3] if len(parts) == 4 else ""
314
+ out.append(TaskRef(task_id=tid.strip(), title=title.strip(),
315
+ status=status.strip(), updated=updated.strip()))
316
+ return out
317
+
318
+
319
+ # --- implementations ----------------------------------------------------------------------
320
+
321
+ class NullMemory:
322
+ """No durable memory (the default until a vault is configured). A TRUE no-op — every method is
323
+ inert (no I/O, no clock), so the eval path is deterministic and adds nothing to the slice."""
324
+
325
+ is_durable = False
326
+
327
+ def recall(self, query: str, k: int = 6, paths: list[str] | None = None) -> list[Snippet]:
328
+ return []
329
+
330
+ def remember(self, content: str, *, title: str = "", scope: str = "default", tags: str = "",
331
+ paths: list[str] | None = None) -> None:
332
+ return None
333
+
334
+ def append_episode(self, session_id: str, task_id: str, turn: int, record: dict) -> None:
335
+ return None
336
+
337
+ def read_episodes(self, session_id: str, *, limit: int | None = None) -> list[dict]:
338
+ return []
339
+
340
+ def episode_manifest(self, session_id: str, k: int) -> tuple[list[dict], int]:
341
+ return [], 0
342
+
343
+ def search_episodes(self, query: str, *, limit: int = 5,
344
+ exclude_session: str | None = None,
345
+ only_session: str | None = None) -> list[dict]:
346
+ return []
347
+
348
+ def checkpoint_task(self, task: TaskState) -> None:
349
+ return None
350
+
351
+ def load_task(self, task_id: str) -> TaskState | None:
352
+ return None
353
+
354
+ def list_session_tasks(self, session_id: str) -> list[TaskRef]:
355
+ return []
356
+
357
+ def mark_used(self, memory_id: str) -> None:
358
+ return None
359
+
360
+ def consolidate(self, session_id: str, *, llm=None, mode: str = "deterministic") -> dict:
361
+ return {"lessons": 0, "skills": 0, "skills_rejected": 0, "errors": 0}
362
+
363
+ def close(self) -> None:
364
+ return None
365
+
366
+
367
+ class MememMemory(HippocampusMixin, NeocortexMixin):
368
+ """Adapter over memem (lessons, via NeocortexMixin) + the on-disk episodic cache (via
369
+ HippocampusMixin) + the state vault (task resume, below). Construction fails fast if memem
370
+ isn't importable. The vault is sliceagent-owned (_vault_root), decoupled from memem's state dir."""
371
+
372
+ is_durable = True
373
+
374
+ def __init__(self) -> None:
375
+ import memem.retrieve # noqa: F401 — fail fast if memem is absent
376
+ self._vault = _vault_root()
377
+ self._scope = os.path.basename(os.getcwd()) or "default" # same-project soft bonus on recall
378
+ self._idx_lock = threading.Lock() # serialize the lazy FTS-index open across parallel explorers
379
+
380
+ # --- task state / resume ---
381
+ def checkpoint_task(self, task: TaskState) -> None:
382
+ try:
383
+ d = os.path.join(self._vault, "tasks")
384
+ os.makedirs(d, exist_ok=True)
385
+ path = os.path.join(d, f"{task.task_id}.md")
386
+ created = _now_iso()
387
+ if os.path.exists(path): # preserve the original created on update
388
+ with open(path, encoding="utf-8") as f:
389
+ fm, _ = _split_frontmatter(f.read())
390
+ created = fm.get("created") or created
391
+ updated = _now_iso()
392
+ # redact the WHOLE rendered task state before it lands on disk — title/goal/findings/last_error/
393
+ # resolution/mission/world are all model/tool-derived and may carry secrets (mirrors the episodic
394
+ # cache redaction). Redact-the-output is future-proof: new fields are covered automatically.
395
+ _write_atomic(path, redact_text(_render_task_md(task, created=created, updated=updated)))
396
+ _upsert_session_index(self._vault, task, updated)
397
+ except Exception:
398
+ pass
399
+
400
+ def load_task(self, task_id: str) -> TaskState | None:
401
+ tid = _safe_vault_id(task_id)
402
+ if tid is None:
403
+ return None # reject path-traversal in a model/user-controlled id
404
+ try:
405
+ path = os.path.join(self._vault, "tasks", f"{tid}.md")
406
+ return _parse_task_md(path) if os.path.exists(path) else None
407
+ except Exception:
408
+ return None
409
+
410
+ def list_session_tasks(self, session_id: str) -> list[TaskRef]:
411
+ sid = _safe_vault_id(session_id)
412
+ if sid is None:
413
+ return []
414
+ try:
415
+ path = os.path.join(self._vault, "sessions", f"{sid}.md")
416
+ return _parse_session_index(path) if os.path.exists(path) else []
417
+ except Exception:
418
+ return []
419
+
420
+
421
+ def make_memory(prefer_memem: bool = True):
422
+ """Return MememMemory if memem is importable, else NullMemory (graceful)."""
423
+ if prefer_memem:
424
+ try:
425
+ return MememMemory()
426
+ except Exception:
427
+ pass
428
+ return NullMemory()
sliceagent/metrics.py ADDED
@@ -0,0 +1,103 @@
1
+ """Cost + reliability metrics — the moat-MEASURING observer, expressed for the slice thesis.
2
+
3
+ The project's whole bet is that per-turn cost stays FLAT as the conversation grows (the slice rebuilds a
4
+ bounded seed each turn) while a transcript agent's climbs linearly. That bet is only credible if it's a
5
+ NUMBER. This sink makes it one: the headline signal is `per_turn_fresh` — the FRESH (non-cache-read) input
6
+ tokens per turn — which should stay flat for sliceagent and climb for a log-based agent.
7
+
8
+ Pure OBSERVER, like its sibling `Telemetry`: consumes the loop's events, accumulates counters, emits nothing,
9
+ mutates no slice — completely off the moat. It reads the TYPED usage breakdown the llm adapter now produces
10
+ (`input_other`/`input_cache_read`/`input_cache_creation`/`output`, from llm._usage_dict). Per-step usage is
11
+ accumulated from StepEnd; TurnEnd snapshots the per-turn fresh-input total and resets — so no double-counting
12
+ with TurnEnd's cumulative `total`. Wire it into a dispatcher alongside slice_sink/telemetry and read
13
+ `.summary()` afterward; `record_error(kind)` folds in the llm error buckets from errors.classify().
14
+ """
15
+ from __future__ import annotations
16
+
17
+ from .events import (ApiRetry, Event, SliceTightened, StepEnd, ToolResult, TurnEnd,
18
+ TurnInterrupted)
19
+
20
+
21
+ class CostMetrics:
22
+ """Callable event sink accumulating cost + reliability metrics. Read `.summary()` after a run."""
23
+
24
+ def __init__(self) -> None:
25
+ self.turns = 0
26
+ self.steps = 0
27
+ self.input_other = 0 # FRESH (non-cache-read) input tokens — the real cost driver
28
+ self.input_cache_read = 0 # input served from the provider prompt cache (~0.1x price)
29
+ self.input_cache_creation = 0
30
+ self.output = 0
31
+ self.per_turn_fresh: list[int] = [] # input_other per TurnEnd — THE moat curve (flat vs climbing)
32
+ self.tool_calls = 0
33
+ self.tool_failures = 0
34
+ self.retries = 0
35
+ self.overflows = 0
36
+ self.errors: dict[str, int] = {} # classify() kind -> count
37
+ self._turn_fresh = 0 # accumulator for the in-progress turn
38
+
39
+ def __call__(self, e: Event) -> None:
40
+ if isinstance(e, StepEnd):
41
+ self.steps += 1
42
+ self._add(e.usage)
43
+ elif isinstance(e, (TurnEnd, TurnInterrupted)):
44
+ # #56: snapshot + reset on BOTH clean and PARKED turn-ends. Without TurnInterrupted, a parked
45
+ # turn's fresh tokens were dropped from the moat curve AND its accumulator bled into the next
46
+ # turn (double-count); turns/per_turn_fresh undercounted on every interruption.
47
+ self.turns += 1
48
+ self.per_turn_fresh.append(self._turn_fresh)
49
+ self._turn_fresh = 0
50
+ if isinstance(e, TurnInterrupted):
51
+ self.errors[f"park:{e.reason}"] = self.errors.get(f"park:{e.reason}", 0) + 1
52
+ elif isinstance(e, ToolResult):
53
+ self.tool_calls += 1
54
+ if e.failing:
55
+ self.tool_failures += 1
56
+ elif isinstance(e, ApiRetry):
57
+ self.retries += 1
58
+ elif isinstance(e, SliceTightened):
59
+ self.overflows += 1
60
+
61
+ def _add(self, usage: dict | None) -> None:
62
+ if not usage:
63
+ return
64
+ fresh = usage.get("input_other", 0) or 0
65
+ self.input_other += fresh
66
+ self._turn_fresh += fresh
67
+ self.input_cache_read += usage.get("input_cache_read", 0) or 0
68
+ self.input_cache_creation += usage.get("input_cache_creation", 0) or 0
69
+ # output: prefer the typed key, fall back to the legacy one (older usage dicts)
70
+ self.output += usage.get("output", usage.get("completion_tokens", 0)) or 0
71
+
72
+ def record_error(self, kind: str) -> None:
73
+ """Fold an llm error bucket (errors.classify()['kind']) into the failure histogram. Called by the
74
+ host's retry/closeout path; the loop itself stays observer-only."""
75
+ if kind:
76
+ self.errors[kind] = self.errors.get(kind, 0) + 1
77
+
78
+ def summary(self) -> dict:
79
+ input_total = self.input_other + self.input_cache_read + self.input_cache_creation
80
+ hit = round(self.input_cache_read / input_total, 3) if input_total else 0.0
81
+ ptf = self.per_turn_fresh
82
+ return {
83
+ "turns": self.turns,
84
+ "steps": self.steps,
85
+ "input_other": self.input_other,
86
+ "input_cache_read": self.input_cache_read,
87
+ "input_cache_creation": self.input_cache_creation,
88
+ "output": self.output,
89
+ "cache_hit_rate": hit, # cache-read / total input
90
+ "per_turn_fresh": list(ptf), # the moat curve
91
+ "avg_turn_fresh": round(sum(ptf) / len(ptf), 1) if ptf else 0.0,
92
+ "peak_turn_fresh": max(ptf) if ptf else 0,
93
+ "tool_calls": self.tool_calls,
94
+ "tool_failures": self.tool_failures,
95
+ "retries": self.retries,
96
+ "overflows": self.overflows,
97
+ "errors": dict(self.errors),
98
+ }
99
+
100
+
101
+ def make_metrics_sink() -> CostMetrics:
102
+ """A CostMetrics instance IS the sink (callable) AND carries the counters to read afterward."""
103
+ return CostMetrics()
@@ -0,0 +1,124 @@
1
+ """Model capability catalog.
2
+
3
+ Maps a model name (+ base URL) to its capabilities and wire quirks so provider-specific knowledge lives
4
+ in ONE place instead of scattered `startswith` checks. Pattern-matched with a safe UNKNOWN default. Pure
5
+ data + lookup; the llm adapter consults it (it is the source of truth for the tokens-param rename and the
6
+ reasoning_effort capability — previously duplicated inline in llm.py).
7
+
8
+ context_window is left 0 (unknown) unless genuinely known — sliceagent's overflow is reactive, so no caller
9
+ relies on a fabricated number; the field is informational for any future context-window-aware feature.
10
+ """
11
+ from __future__ import annotations
12
+
13
+ from dataclasses import dataclass
14
+
15
+
16
+ @dataclass(frozen=True)
17
+ class ModelCapability:
18
+ family: str = "unknown"
19
+ # OpenAI gpt-5 / o-series renamed the completion cap to `max_completion_tokens` and REJECT `max_tokens`.
20
+ tokens_param: str = "max_tokens"
21
+ # accepts the OpenAI `reasoning_effort` param (gpt-5 / o-series). NOT deepseek (uses extra_body.thinking)
22
+ # nor moonshot/anthropic — those map "fast" to their own knobs in llm._reasoning_kwargs.
23
+ supports_reasoning_effort: bool = False
24
+ supports_tools: bool = True
25
+ supports_stream_options: bool = True # OpenAI stream_options={include_usage}; set False if a provider 400s
26
+ supports_vision: bool = False # accepts image content parts (multimodal); gates @image attachment
27
+ context_window: int = 0 # 0 = unknown (no fabricated values)
28
+
29
+
30
+ _UNKNOWN = ModelCapability()
31
+
32
+ # USD per 1M tokens: (input_fresh, input_cached, output). SINGLE SOURCE for the cost meter — keyed by a
33
+ # name/family substring, first match wins. Update HERE when a provider changes pricing. (Context windows stay
34
+ # 0/unknown by design: sliceagent's overflow is reactive, so nothing fabricates a window — see ModelCapability.)
35
+ _PRICES = {
36
+ "gpt-5": (1.25, 0.125, 10.0), "gpt-4": (2.50, 1.25, 10.0), "o3": (2.0, 0.5, 8.0),
37
+ "deepseek": (0.27, 0.07, 1.10), "kimi": (0.60, 0.15, 2.50), "moonshot": (0.60, 0.15, 2.50),
38
+ "claude": (3.0, 0.30, 15.0),
39
+ }
40
+
41
+
42
+ def pricing(model: str, base_url: str = "") -> "tuple | None":
43
+ """USD/1M (input, cached_input, output) for a model, or None if unknown. The cost meter's single source."""
44
+ s = (model or "").lower() + " " + (base_url or "").lower()
45
+ for k, v in _PRICES.items():
46
+ if k in s:
47
+ return v
48
+ return None
49
+
50
+ # Vision is keyed off the MODEL name (not the family) — kimi-k2.7-code is text-only but moonshot-*-vision is
51
+ # not; gpt-4o/gpt-5/claude-3+/gemini/`*-vl`/anything with 'vision' is multimodal. Conservative allowlist.
52
+ _VISION_HINTS = ("vision", "gpt-4o", "gpt-4.1", "gpt-5", "gpt-6", "claude-3", "claude-4",
53
+ "claude-opus", "claude-sonnet", "gemini", "-vl", "qwen-vl")
54
+
55
+
56
+ def _is_openai_endpoint(base_url: str) -> bool:
57
+ """True only when `base_url` is OpenAI's real API — the default (unset → the SDK's own default) or an
58
+ explicit api.openai.com. reasoning_effort + the /v1/responses route are OpenAI-ONLY wire features; a
59
+ model literally NAMED "gpt-5.5"/"o3" served by a DIFFERENT endpoint (DeepSeek, Moonshot, a local proxy —
60
+ /model only switches the model string, never the endpoint) does NOT speak that protocol. Routing to
61
+ /v1/responses there 404s (openai.NotFoundError — the route doesn't exist on that server), which used to
62
+ surface as a cryptic 'internal error ended the turn'; gating on the endpoint keeps it on the universal
63
+ chat/completions path instead — degrade gracefully, never assume a wire feature from the name alone."""
64
+ b = (base_url or "").strip().lower()
65
+ return b == "" or "api.openai.com" in b
66
+
67
+
68
+ # name substrings -> the ONE provider that actually serves that model. `/model` only switches the model
69
+ # STRING, never the endpoint (that's `config --use`), so this is the general "will this even resolve"
70
+ # check — broader than capability()'s narrower reasoning-effort gate.
71
+ _NAME_HOME = (
72
+ (("o1", "o2", "o3", "o4", "o5", "o6", "gpt-3", "gpt-4", "gpt-5", "gpt-6"), "openai"),
73
+ (("deepseek",), "deepseek"),
74
+ (("kimi", "moonshot"), "moonshot"),
75
+ (("claude",), "anthropic"),
76
+ )
77
+ # base_url substring -> the ONE provider that endpoint actually is. An UNMATCHED base_url (custom domain,
78
+ # a local proxy/router) is deliberately left unresolved — such a proxy can legitimately re-route ANY model
79
+ # name to any backend, so warning there would be a false positive (same safe-UNKNOWN posture as capability()).
80
+ _ENDPOINT_HOME = (
81
+ (("api.openai.com",), "openai"),
82
+ (("deepseek.com",), "deepseek"),
83
+ (("moonshot.cn",), "moonshot"),
84
+ (("anthropic.com",), "anthropic"),
85
+ )
86
+
87
+
88
+ def _home(s: str, table: tuple) -> "str | None":
89
+ # each entry is (tuple-of-substrings, home) — NOT a single bare string, else `for k in keys` iterates
90
+ # individual CHARACTERS and matches almost anything (caught by a test: deepseek.com false-matched "openai").
91
+ for keys, home in table:
92
+ if any(k in s for k in keys):
93
+ return home
94
+ return None
95
+
96
+
97
+ def likely_endpoint_mismatch(model: str, base_url: str) -> "str | None":
98
+ """The model's own home provider, IF it's a well-known name (gpt-*/deepseek/kimi/claude) about to be
99
+ sent to a DIFFERENT well-known endpoint — e.g. 'gpt-5.5' while still connected to DeepSeek. Returns None
100
+ (never warn) when either side is unrecognized: a custom/proxy endpoint may legitimately serve any name,
101
+ so a false-positive warning there is worse than a missed one."""
102
+ m, b = (model or "").lower(), (base_url or "").strip().lower()
103
+ model_home = _home(m, _NAME_HOME)
104
+ endpoint_home = "openai" if _is_openai_endpoint(b) else _home(b, _ENDPOINT_HOME)
105
+ return model_home if (model_home and endpoint_home and model_home != endpoint_home) else None
106
+
107
+
108
+ def capability(model: str, base_url: str = "") -> ModelCapability:
109
+ """Resolve the capability record for a model (first matching rule wins; specific before general)."""
110
+ m = (model or "").lower()
111
+ b = (base_url or "").lower()
112
+ vis = any(h in m for h in _VISION_HINTS)
113
+ if m.startswith(("o1", "o3", "o4", "o5", "o6", "gpt-5", "gpt-6")) and _is_openai_endpoint(b):
114
+ return ModelCapability("openai-reasoning", tokens_param="max_completion_tokens",
115
+ supports_reasoning_effort=True, supports_vision=vis)
116
+ if "deepseek" in m or "deepseek" in b:
117
+ return ModelCapability("deepseek", supports_vision=vis) # reasoning via extra_body.thinking
118
+ if "kimi" in m or "moonshot" in b:
119
+ return ModelCapability("moonshot", supports_vision=vis)
120
+ if "claude" in m or "anthropic" in b:
121
+ return ModelCapability("anthropic", supports_vision=vis)
122
+ if m.startswith("gpt-") or "openai" in b:
123
+ return ModelCapability("openai", supports_vision=vis)
124
+ return ModelCapability(supports_vision=vis)