sliceagent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. sliceagent/__init__.py +3 -0
  2. sliceagent/__main__.py +6 -0
  3. sliceagent/access.py +93 -0
  4. sliceagent/agents.py +173 -0
  5. sliceagent/background_review.py +146 -0
  6. sliceagent/binsniff.py +89 -0
  7. sliceagent/cli.py +890 -0
  8. sliceagent/clock.py +32 -0
  9. sliceagent/code_grep.py +329 -0
  10. sliceagent/code_index.py +417 -0
  11. sliceagent/config.py +240 -0
  12. sliceagent/context_overflow.py +227 -0
  13. sliceagent/envspec.py +129 -0
  14. sliceagent/errors.py +167 -0
  15. sliceagent/events.py +96 -0
  16. sliceagent/finding_types.py +70 -0
  17. sliceagent/flags.py +63 -0
  18. sliceagent/fuzzy.py +135 -0
  19. sliceagent/guardrails.py +438 -0
  20. sliceagent/guidance.py +69 -0
  21. sliceagent/hippocampus.py +581 -0
  22. sliceagent/hooks.py +334 -0
  23. sliceagent/interfaces.py +144 -0
  24. sliceagent/llm.py +695 -0
  25. sliceagent/loop.py +548 -0
  26. sliceagent/mcp_client.py +255 -0
  27. sliceagent/mcp_security.py +77 -0
  28. sliceagent/memory.py +428 -0
  29. sliceagent/metrics.py +103 -0
  30. sliceagent/model_catalog.py +124 -0
  31. sliceagent/monitor.py +615 -0
  32. sliceagent/neocortex.py +436 -0
  33. sliceagent/onboarding.py +323 -0
  34. sliceagent/oracle.py +36 -0
  35. sliceagent/pagetable.py +255 -0
  36. sliceagent/pfc.py +449 -0
  37. sliceagent/plugins.py +127 -0
  38. sliceagent/policy.py +234 -0
  39. sliceagent/procman.py +187 -0
  40. sliceagent/prompt.py +239 -0
  41. sliceagent/records.py +108 -0
  42. sliceagent/recovery.py +119 -0
  43. sliceagent/regions.py +678 -0
  44. sliceagent/registry.py +128 -0
  45. sliceagent/retriever.py +19 -0
  46. sliceagent/safety.py +332 -0
  47. sliceagent/sandbox.py +143 -0
  48. sliceagent/scheduler.py +92 -0
  49. sliceagent/search_index.py +289 -0
  50. sliceagent/seed.py +465 -0
  51. sliceagent/sensory_cortex.py +500 -0
  52. sliceagent/session.py +222 -0
  53. sliceagent/skill_provenance.py +71 -0
  54. sliceagent/skill_usage.py +123 -0
  55. sliceagent/skills.py +209 -0
  56. sliceagent/subagent.py +332 -0
  57. sliceagent/subdir_hints.py +222 -0
  58. sliceagent/swap.py +182 -0
  59. sliceagent/taskstate.py +57 -0
  60. sliceagent/telemetry.py +59 -0
  61. sliceagent/terminal.py +240 -0
  62. sliceagent/text_utils.py +56 -0
  63. sliceagent/tool_summary.py +93 -0
  64. sliceagent/tools.py +1194 -0
  65. sliceagent/tui.py +1377 -0
  66. sliceagent/web.py +354 -0
  67. sliceagent-0.1.0.dist-info/METADATA +262 -0
  68. sliceagent-0.1.0.dist-info/RECORD +71 -0
  69. sliceagent-0.1.0.dist-info/WHEEL +4 -0
  70. sliceagent-0.1.0.dist-info/entry_points.txt +2 -0
  71. sliceagent-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,323 @@
1
+ """First-run onboarding + config discovery — the `sliceagent init` / `config` / `help` / `version` subcommands.
2
+
3
+ Turns the cold start from "copy .env.example, learn 28 env vars, hand-edit a TOML" into "run `sliceagent init`":
4
+ pick a provider, paste a key, we test it, and write ~/.sliceagent/config.toml so the next bare `sliceagent`
5
+ just works. `sliceagent config --list` makes every knob discoverable; `sliceagent help` shows the surface.
6
+
7
+ All entry points take injectable input/getpass/llm/home so the wizard is testable without a tty or a key.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import os
12
+
13
+ from .envspec import GROUPS, REGISTRY, current_value
14
+
15
+ # provider presets: key → (label, base_url, default_model). 'custom' prompts for the base_url.
16
+ PROVIDERS = {
17
+ "1": ("moonshot", "Moonshot (Kimi)", "https://api.moonshot.cn/v1", "kimi-k2.7-code"),
18
+ "2": ("openai", "OpenAI", "", "gpt-5.5"),
19
+ "3": ("deepseek", "DeepSeek", "https://api.deepseek.com/v1", "deepseek-chat"),
20
+ "4": ("custom", "Custom OpenAI-compatible endpoint", "", ""),
21
+ }
22
+
23
+
24
+ def _version() -> str:
25
+ try:
26
+ from . import __version__
27
+ return __version__
28
+ except Exception: # noqa: BLE001
29
+ return "0.0.0"
30
+
31
+
32
+ def _toml_str(v: str) -> str:
33
+ s = ((v or "").replace("\\", "\\\\").replace('"', '\\"')
34
+ .replace("\n", "\\n").replace("\r", "\\r").replace("\t", "\\t"))
35
+ return '"' + s + '"'
36
+
37
+
38
+ def _toml_key(k: str) -> str:
39
+ """A TOML table-header key: bare if it's a simple identifier, else a quoted key (so a provider id with a
40
+ dot/space/quote — e.g. 'my.host' — round-trips as one key instead of a nested table or a parse error)."""
41
+ if k and all(c.isalnum() or c in "-_" for c in k):
42
+ return k
43
+ return '"' + k.replace("\\", "\\\\").replace('"', '\\"') + '"'
44
+
45
+
46
+ def _toml_val(v) -> str:
47
+ if isinstance(v, bool):
48
+ return "true" if v else "false"
49
+ if isinstance(v, (int, float)):
50
+ return str(v)
51
+ if isinstance(v, list):
52
+ return "[" + ", ".join(_toml_val(x) for x in v) + "]"
53
+ return _toml_str(str(v))
54
+
55
+
56
+ def _emit_section(prefix: str, d: dict, lines: list) -> None:
57
+ """Emit one TOML table RECURSIVELY. Scalars first (TOML requires them before any sub-table header),
58
+ then each nested dict as a sub-table at ANY depth — so a nested dict like mcp_servers.<id>.env
59
+ becomes a proper [mcp_servers.<id>.env] sub-table instead of being stringified into a corrupt value
60
+ (the old code only handled two levels). A pure container of sub-tables (e.g. `providers`) gets no
61
+ header of its own; the sub-table headers imply it."""
62
+ scalars = [(k, v) for k, v in d.items() if not isinstance(v, dict)]
63
+ subtables = [(k, v) for k, v in d.items() if isinstance(v, dict)]
64
+ if prefix and (scalars or not subtables):
65
+ lines.append(f"\n[{prefix}]")
66
+ for k, v in scalars:
67
+ lines.append(f"{_toml_key(k)} = {_toml_val(v)}")
68
+ for k, v in subtables:
69
+ child = f"{prefix}.{_toml_key(k)}" if prefix else _toml_key(k)
70
+ _emit_section(child, v, lines)
71
+
72
+
73
+ def _emit_toml(data: dict) -> str:
74
+ """Minimal TOML emitter for sliceagent's config shape (scalars, sections, table-of-tables for
75
+ [providers.<id>], and arbitrarily-nested sub-tables like [mcp_servers.<id>.env]). Round-trips a
76
+ tomllib-parsed dict so editing one provider/server preserves the rest."""
77
+ lines = ["# sliceagent config — managed by `sliceagent init` / `config`. ENV overrides any value here."]
78
+ _emit_section("", data, lines)
79
+ return "\n".join(lines) + "\n"
80
+
81
+
82
+ def _config_path(home: str | None = None) -> str:
83
+ home = home or os.path.expanduser("~")
84
+ return os.path.join(home, ".sliceagent", "config.toml")
85
+
86
+
87
+ def _read_config(path: str) -> dict:
88
+ import tomllib
89
+ try:
90
+ with open(path, "rb") as f:
91
+ return tomllib.load(f)
92
+ except (OSError, tomllib.TOMLDecodeError):
93
+ return {}
94
+
95
+
96
+ def _atomic_write(path: str, body: str) -> None:
97
+ """ATOMIC 0600 write (the file holds an API key — never leave it half-written): write a temp in the same
98
+ dir, fsync, then os.replace(); on ANY failure remove the temp so no key-bearing fragment is left behind."""
99
+ os.makedirs(os.path.dirname(path), exist_ok=True)
100
+ import tempfile
101
+ fd, tmp = tempfile.mkstemp(dir=os.path.dirname(path) or ".", prefix=".sliceagent-cfg-", suffix=".tmp")
102
+ ok = False
103
+ try:
104
+ os.fchmod(fd, 0o600)
105
+ os.write(fd, body.encode("utf-8"))
106
+ os.fsync(fd)
107
+ os.close(fd)
108
+ os.replace(tmp, path)
109
+ ok = True
110
+ finally:
111
+ if not ok:
112
+ try:
113
+ os.close(fd)
114
+ except OSError:
115
+ pass
116
+ try:
117
+ os.remove(tmp)
118
+ except OSError:
119
+ pass
120
+
121
+
122
+ def _save_provider(path: str, *, pid: str, model: str, api_key: str, base_url: str) -> None:
123
+ """Merge a provider into the config: add/update [providers.<pid>], set it as the default, keep the rest."""
124
+ data = _read_config(path)
125
+ provs = data.setdefault("providers", {})
126
+ if not isinstance(provs, dict): # a corrupt non-dict providers must not crash on provs[pid]=
127
+ provs = data["providers"] = {}
128
+ entry = {"api_key": api_key, "model": model}
129
+ if base_url:
130
+ entry["base_url"] = base_url
131
+ provs[pid] = entry
132
+ agent = data.setdefault("agent", {})
133
+ if not isinstance(agent, dict):
134
+ agent = data["agent"] = {}
135
+ agent["default_provider"] = pid
136
+ agent["model"] = model # keep top-level model in sync (back-compat)
137
+ _atomic_write(path, _emit_toml(data))
138
+
139
+
140
+ def _test_key(model: str, api_key: str, base_url: str, llm_factory) -> tuple[bool, str]:
141
+ """One cheap completion to confirm the key/endpoint work. Returns (ok, message)."""
142
+ prev = {k: os.environ.get(k) for k in ("LLM_API_KEY", "LLM_BASE_URL", "OPENAI_BASE_URL")}
143
+ try:
144
+ os.environ["LLM_API_KEY"] = api_key
145
+ if base_url:
146
+ os.environ["LLM_BASE_URL"] = base_url
147
+ else:
148
+ # empty preset base_url ⇒ provider default; clear BOTH aliases (OpenAILLM also resolves
149
+ # OPENAI_BASE_URL) so a stale exported endpoint can't hijack the key-test probe.
150
+ os.environ.pop("LLM_BASE_URL", None)
151
+ os.environ.pop("OPENAI_BASE_URL", None)
152
+ llm = llm_factory(model)
153
+ resp = llm.complete([{"role": "user", "content": "Reply with the single word: ok"}], [])
154
+ txt = (getattr(resp, "content", "") or "").strip()
155
+ return (True, txt[:40] or "(empty reply, but the call succeeded)")
156
+ except Exception as e: # noqa: BLE001
157
+ return (False, f"{type(e).__name__}: {e}")
158
+ finally:
159
+ for k, v in prev.items():
160
+ if v is None:
161
+ os.environ.pop(k, None)
162
+ else:
163
+ os.environ[k] = v
164
+
165
+
166
+ def run_init(*, inp=input, getpw=None, llm_factory=None, home=None) -> int:
167
+ """Interactive setup wizard. Returns a process exit code."""
168
+ import getpass
169
+ getpw = getpw or getpass.getpass
170
+ if llm_factory is None:
171
+ def llm_factory(model):
172
+ from .llm import OpenAILLM
173
+ return OpenAILLM(model=model)
174
+ out = print
175
+ path = _config_path(home)
176
+ out("\n sliceagent setup\n ─────────────")
177
+ if os.path.exists(path):
178
+ try:
179
+ ans = inp(f" A config already exists at {path}. Add/update a provider in it? [Y/n] ").strip().lower()
180
+ except (EOFError, KeyboardInterrupt):
181
+ out("\n cancelled."); return 1
182
+ if ans in ("n", "no"):
183
+ out(" Leaving the existing config unchanged. Run `sliceagent` to start."); return 0
184
+
185
+ out("\n Choose a provider:")
186
+ for k, (_id, label, base, model) in PROVIDERS.items():
187
+ out(f" {k}. {label}" + (f" ({model})" if model else ""))
188
+ try:
189
+ choice = inp(" > ").strip() or "1"
190
+ pid, label, base_url, model = PROVIDERS.get(choice, PROVIDERS["1"])
191
+ if pid == "custom":
192
+ base_url = inp(" Base URL (OpenAI-compatible, e.g. https://host/v1): ").strip()
193
+ # RE-CONFIGURING an ALREADY-SAVED provider (re-running `sliceagent init` to update the model,
194
+ # or just re-confirming) must not force a blind full-key retype: pressing Enter keeps the
195
+ # existing key/model. A BRAND-NEW provider has no existing entry, so blank still means "no
196
+ # key entered" and falls through to the abort below — same as before.
197
+ existing = _read_config(path).get("providers")
198
+ existing = existing.get(pid) if isinstance(existing, dict) else None
199
+ existing = existing if isinstance(existing, dict) else {}
200
+ existing_key = existing.get("api_key") or ""
201
+ key_prompt = " API key (hidden, Enter to keep existing): " if existing_key else " API key (hidden): "
202
+ key = getpw(key_prompt).strip() or existing_key
203
+ model = (inp(f" Model [{existing.get('model') or model or 'required'}]: ").strip()
204
+ or existing.get("model") or model)
205
+ except (EOFError, KeyboardInterrupt):
206
+ out("\n cancelled."); return 1
207
+ if not key:
208
+ out(" No API key entered — aborting."); return 1
209
+ if not model:
210
+ out(" No model specified — aborting."); return 1
211
+
212
+ out("\n Testing the key with one request…")
213
+ ok, msg = _test_key(model, key, base_url, llm_factory)
214
+ out(f" {'✓ works' if ok else '✗ failed'}: {msg}")
215
+ if not ok:
216
+ try:
217
+ cont = inp(" Save the config anyway? [y/N] ").strip().lower()
218
+ except (EOFError, KeyboardInterrupt):
219
+ cont = "n"
220
+ if cont not in ("y", "yes"):
221
+ out(" Not saved. Re-run `sliceagent init` to try again."); return 1
222
+
223
+ _save_provider(path, pid=pid, model=model, api_key=key, base_url=base_url)
224
+ out(f"\n Saved provider '{pid}' (model {model}) → {path} (0600)")
225
+ out(" Ready. Run: sliceagent\n")
226
+ return 0
227
+
228
+
229
+ def run_config(argv=None, *, home=None, env=None) -> int:
230
+ """`sliceagent config` shows the resolved settings + config path; `--list` shows every env var."""
231
+ argv = argv or []
232
+ env = env if env is not None else os.environ
233
+ out = print
234
+ path = _config_path(home)
235
+ if "--path" in argv:
236
+ out(path); return 0
237
+ if argv and argv[0] == "--use":
238
+ pid = argv[1] if len(argv) > 1 else ""
239
+ data = _read_config(path)
240
+ provs = data.get("providers", {}) if isinstance(data.get("providers"), dict) else {}
241
+ if not pid or pid not in provs:
242
+ out(f" usage: sliceagent config --use <provider> "
243
+ f"(configured: {', '.join(provs) or 'none — run `sliceagent init`'})")
244
+ return 1
245
+ agent = data.setdefault("agent", {})
246
+ if not isinstance(agent, dict): # a corrupt non-dict [agent] must not crash on item-assign
247
+ agent = data["agent"] = {}
248
+ agent["default_provider"] = pid
249
+ if isinstance(provs[pid], dict) and provs[pid].get("model"):
250
+ agent["model"] = provs[pid]["model"]
251
+ _atomic_write(path, _emit_toml(data))
252
+ out(f" default provider → {pid} (model {provs[pid].get('model', '?') if isinstance(provs[pid], dict) else '?'})")
253
+ return 0
254
+ if "--list" in argv:
255
+ out("\n sliceagent environment variables (ENV overrides config file):")
256
+ for g in GROUPS:
257
+ out(f"\n [{g}]")
258
+ for e in [e for e in REGISTRY if e.group == g]:
259
+ cur = current_value(e.name, env)
260
+ shown = f" = {cur}" if cur else (f" (default: {e.default})" if e.default else "")
261
+ choices = f" {{{', '.join(e.choices)}}}" if e.choices else ""
262
+ out(f" {e.name}{shown}{choices}")
263
+ out(f" {e.desc}")
264
+ out("")
265
+ return 0
266
+ out(f"\n sliceagent {_version()}")
267
+ out(f" config file: {path} ({'exists' if os.path.exists(path) else 'not created — run `sliceagent init`'})")
268
+ data = _read_config(path)
269
+ provs = data.get("providers", {}) if isinstance(data.get("providers"), dict) else {}
270
+ if provs:
271
+ _agent = data.get("agent")
272
+ default = _agent.get("default_provider", "") if isinstance(_agent, dict) else ""
273
+ out(" providers (* = default · `config --use <id>` to switch):")
274
+ for pid, p in provs.items():
275
+ mark = "*" if pid == default else " "
276
+ out(f" {mark} {pid} ({(p or {}).get('model', '?')})")
277
+ out(" set values:")
278
+ any_set = False
279
+ for e in REGISTRY:
280
+ cur = current_value(e.name, env)
281
+ if cur:
282
+ out(f" {e.name} = {cur}")
283
+ any_set = True
284
+ if not any_set:
285
+ out(" (none — all defaults)")
286
+ out("\n `sliceagent config --list` for all knobs · `sliceagent init` to (re)configure\n")
287
+ return 0
288
+
289
+
290
+ def print_usage() -> int:
291
+ out = print
292
+ out(f"""
293
+ sliceagent {_version()} — a memory-native coding agent (the slice/cache-not-log kernel)
294
+
295
+ usage:
296
+ sliceagent start the interactive agent (inline UI; AGENT_TUI=live|off to switch)
297
+ sliceagent init interactive first-run setup (provider, key, model) → ~/.sliceagent/config.toml
298
+ sliceagent config show resolved settings, providers, and config path
299
+ sliceagent config --list list every environment variable, default, and current value
300
+ sliceagent config --use <id> switch the default provider
301
+ sliceagent help show this help
302
+ sliceagent version show the version
303
+
304
+ first run: sliceagent init then sliceagent
305
+ docs: README.md · QUICKSTART.md
306
+ """)
307
+ return 0
308
+
309
+
310
+ def dispatch(argv) -> int:
311
+ """Route a recognized subcommand; return an exit code. cli.main() calls this before any key gate."""
312
+ cmd = argv[0] if argv else ""
313
+ if cmd in ("--version", "-V", "version"):
314
+ print(f"sliceagent {_version()}"); return 0
315
+ if cmd in ("help", "--help", "-h"):
316
+ return print_usage()
317
+ if cmd == "init":
318
+ return run_init()
319
+ if cmd == "config":
320
+ return run_config(argv[1:])
321
+ print(f"unknown command: {cmd!r}\n")
322
+ print_usage()
323
+ return 1 # non-zero so shell scripts see the failure
sliceagent/oracle.py ADDED
@@ -0,0 +1,36 @@
1
+ """Oracle implementations — ground-truth verification, independent of retrieval accuracy.
2
+
3
+ The loop can gate "done" on this so a retrieval miss can't masquerade as completion.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import subprocess
8
+
9
+
10
+ class CommandOracle:
11
+ """Runs a verification command (e.g. the project's test suite). Pass/fail by exit code."""
12
+
13
+ def __init__(self, cmd: str, timeout: int = 120):
14
+ self.cmd = cmd
15
+ self.timeout = timeout
16
+
17
+ def verify(self) -> tuple[bool, str]:
18
+ try:
19
+ r = subprocess.run(self.cmd, shell=True, capture_output=True, text=True, timeout=self.timeout)
20
+ except subprocess.TimeoutExpired as e:
21
+ # A timed-out verification is a FAILURE, not a thrown exception — otherwise it propagates out
22
+ # of the oracle and silently BYPASSES the done-gate (a hung test would mark the task complete).
23
+ # On timeout, .stdout/.stderr may each be bytes OR str OR None (version/stream dependent) —
24
+ # decode EACH before concat, else a bytes+str mix (e.g. stdout bytes, stderr None→"") raises
25
+ # TypeError and the crash bypasses the done-gate. Coerce per-operand.
26
+ def _s(x):
27
+ return x.decode("utf-8", "replace") if isinstance(x, bytes) else (x or "")
28
+ out = _s(e.stdout) + _s(e.stderr)
29
+ return False, (out + f"\n[verification timed out after {self.timeout}s]").strip()
30
+ out = ((r.stdout or "") + (r.stderr or "")).strip()
31
+ return r.returncode == 0, out
32
+
33
+
34
+ class NullOracle:
35
+ def verify(self) -> tuple[bool, str]:
36
+ return True, ""
@@ -0,0 +1,255 @@
1
+ """PageTable — the SINGLE read/retrieval entry point for the slice.
2
+
3
+ Three scattered retrieval sources used to be wired independently into the slice build:
4
+ the code-discovery map (Retriever.retrieve/graph_map), the per-subtree project notes
5
+ (SubdirHints.hints_for), and cross-session episode search (Memory.search_episodes).
6
+ PageTable unifies them behind ONE call — ``lookup(focus, *, kind, k) -> list[PageRef]`` —
7
+ so the slice has a single place that decides WHAT to page in.
8
+
9
+ The PageTable owns the SubdirHints instance (constructed once per build closure, same
10
+ lifetime as before, preserving per-task subtree dedup). It is otherwise stateless: each
11
+ backend is a thin adapter over its source. Backends emit RAW text in the PageRef.preview;
12
+ fencing (wrap_untrusted) stays at ONE layer — the renderer in seed.py — so there is no
13
+ double-wrap.
14
+
15
+ NO-TRANSCRIPT MOAT: lookup() reads from durable/derived sources each turn; it never
16
+ accumulates state across turns (the only per-instance state is SubdirHints' per-task
17
+ surfaced-subtree set, which is a bounded durable store, not a transcript).
18
+
19
+ BRAIN-ANALOGY LEGEND (used in this file's section comments — a naming aid, not a new mechanism):
20
+ SENSORY CORTEX — code / project-notes: re-computed live from the filesystem, never persisted;
21
+ perception of the present, not memory of the past.
22
+ NEOCORTEX — memory-lessons: distilled, cross-session, auto-surfaced (like consolidated
23
+ semantic memory recalled associatively, with no explicit search).
24
+ HIPPOCAMPUS — episode-*: the lossless per-turn log, reached only by an explicit, cue-dependent
25
+ recall (recall_history) — like real hippocampal recall, prone to confabulation if
26
+ the cue is weak, which the visible-manifest/recall-marker work exists to prevent.
27
+ The Slice's own carried state (findings, conversation ring, plan, mission — see pfc.py) is the
28
+ fourth piece: PREFRONTAL CORTEX / working memory — bounded, actively maintained, free (no lookup()
29
+ call at all), and lost when the task resets. Only 4 of PageTable's 6 kinds fire per turn inside
30
+ build(); the other 2 (episode-xsession, episode-search-thissession) are reached only via the
31
+ recall_history tool (hippocampus.py) — the model's own hippocampal-recall lever.
32
+
33
+ DEFERRED (next backends to fold in here):
34
+ - per-file code refs (fan-out of the repo map) — kept as the single '(repo map)' page.
35
+ """
36
+ from __future__ import annotations
37
+
38
+ import os
39
+
40
+ from .interfaces import PageRef
41
+ from .text_utils import format_ts, normalize_ws
42
+
43
+ # Per-code-map preview cap — a generous PHYSICAL backstop only. The real bound is BREADTH, applied ONCE
44
+ # in graph_map (top-N ranked files, each shown complete). This must not re-cut the breadth-bounded map.
45
+ CODE_PREVIEW_CHARS = 12000
46
+
47
+
48
+ class PageTable:
49
+ """Single entry for the slice's read/retrieval. ``lookup`` dispatches by ``kind`` to one
50
+ backend; ``k`` is the per-kind budget (k<=0 SKIPS that backend, returning [])."""
51
+
52
+ def __init__(self, retriever=None, memory=None, subdir_hints=None,
53
+ *, session_id: str | None = None):
54
+ self.retriever = retriever
55
+ self.memory = memory
56
+ self.subdir_hints = subdir_hints # OWNED here (per-task subtree dedup lives on it)
57
+ # ONE concept — the CURRENT session: cross-session reads EXCLUDE it, within-session reads filter
58
+ # ONLY to it. (Was the overloaded `exclude_session`, used as both exclude AND only — a leak waiting
59
+ # to happen the moment a caller left it None.)
60
+ self.session_id = session_id
61
+
62
+ # ------------------------------------------------------------------ public
63
+ def lookup(self, focus, *, kind: str, k: int = 6, paths=None) -> list[PageRef]:
64
+ """Page in references relevant to ``focus`` from the ``kind`` backend.
65
+
66
+ ``focus`` is the per-kind locator: a discovery QUERY (code), the active-file WORKING
67
+ SET (project-notes), or a search QUERY (episode-xsession). ``k`` bounds the backend
68
+ (k<=0 => skip, [] — honors tighten's discovery_k=0 floor). Returns PageRefs carrying
69
+ RAW text; the caller fences them at render time."""
70
+ if k <= 0:
71
+ return []
72
+ # — SENSORY CORTEX (derived views): re-computed from the LIVE filesystem/code every call,
73
+ # never persisted — there is nothing to "remember" here, only to look at again, more carefully.
74
+ if kind == "code":
75
+ return self._code(focus, k)
76
+ if kind == "project-notes":
77
+ return self._project_notes(focus)
78
+ # — NEOCORTEX (long-term memory): distilled, cross-session, auto-surfaced — like a consolidated
79
+ # semantic memory that comes to mind associatively, without an effortful, cue-driven search.
80
+ if kind == "memory-lessons":
81
+ return self._lessons(focus, k, paths)
82
+ # — HIPPOCAMPUS (episodic memory): the lossless per-turn log. Retrieval is cue-dependent and
83
+ # EXPLICIT (recall_history) — like real hippocampal recall, it can fail or confabulate if the
84
+ # retrieval cue is weak, which is exactly the class of bug the cache-manifest/recall-marker work
85
+ # in seed.py and regions.py exists to prevent.
86
+ if kind == "episode-xsession":
87
+ return self._episodes(focus, k)
88
+ if kind == "episode-thissession":
89
+ return self._episodes_thissession(focus, k)
90
+ if kind == "episode-search-thissession":
91
+ return self._episodes_search_thissession(focus, k)
92
+ return []
93
+
94
+ # ----------------------------------------------------------------- backends
95
+ def _code(self, query: str, k: int) -> list[PageRef]:
96
+ """RELATED CODE: the retriever's relevance-ranked repo MAP. KEEP the single '(repo map)'
97
+ shape (one PageRef wrapping the whole map text) — per-file fan-out is a deferred follow-up."""
98
+ if self.retriever is None:
99
+ return []
100
+ snippets = self.retriever.retrieve(query, k=k)
101
+ if not snippets:
102
+ return []
103
+ # The Retriever contract yields a single Snippet(path='(repo map)') today (see
104
+ # code_index.RipgrepCodeIndex.retrieve). Carry its map text as ONE page, RAW.
105
+ sn = snippets[0]
106
+ return [PageRef(handle=sn.path, kind="code", preview=sn.text[:CODE_PREVIEW_CHARS],
107
+ score=sn.score, untrusted=True)]
108
+
109
+ def _project_notes(self, active_files) -> list[PageRef]:
110
+ """SUBDIRECTORY CONTEXT: convention files for any subtree in the working set not yet
111
+ surfaced this task. SubdirHints is owned here; its per-task dedup is preserved."""
112
+ if self.subdir_hints is None:
113
+ return []
114
+ text = self.subdir_hints.hints_for(active_files)
115
+ if not text:
116
+ return []
117
+ return [PageRef(handle="(project notes)", kind="project-notes", preview=text,
118
+ score=0.0, untrusted=True)]
119
+
120
+ def _lessons(self, query: str, k: int, paths=None) -> list[PageRef]:
121
+ """RELEVANT MEMORY: distilled cross-session LESSONS (memem's relevance-gated retrieve), the
122
+ always-on per-turn recall — NEOCORTEX in brain terms: consolidated, generalized, auto-surfaced.
123
+ Distinct from `_episodes` (raw FTS5 episode text) — HIPPOCAMPUS: the lossless, per-session log,
124
+ reached only by an explicit, cue-dependent recall_history call. Each Snippet -> one PageRef
125
+ (preview carries the lesson text RAW; the renderer fences it). memory absent / no hits -> []."""
126
+ if self.memory is None:
127
+ return []
128
+ snippets = self.memory.recall(query, k=k, paths=paths) # R1: file-context bonus at topic-start
129
+ return [PageRef(handle=sn.path, kind="memory-lessons", preview=sn.text,
130
+ score=sn.score, untrusted=True) for sn in snippets]
131
+
132
+ def _episodes(self, query: str, k: int) -> list[PageRef]:
133
+ """CROSS-SESSION RECALL: FTS5 episode hits from PAST sessions (the one cross-session read
134
+ path). Each hit row -> one PageRef: `handle` is the session·turn locator, `preview` packs
135
+ ts/title/note/match for the listing. Empty/unavailable index -> []."""
136
+ if self.memory is None or not isinstance(query, str) or not query.strip():
137
+ return []
138
+ hits = self.memory.search_episodes(query.strip(), limit=k,
139
+ exclude_session=self.session_id) # cross-session: drop my lineage
140
+ return [_episode_pageref(h) for h in hits]
141
+
142
+ def _episodes_search_thissession(self, query: str, k: int) -> list[PageRef]:
143
+ """WITHIN-SESSION content recall: FTS5 over the CURRENT session's episodes (the long-tail past
144
+ the manifest/index window). Closes the gap where an old turn was reachable only by a turn number
145
+ nobody knew. Each hit -> a PageRef whose handle is the TURN NUMBER, so the model pages the full
146
+ turn with recall_history(turns=[N]) — search by content, fetch by the number it just learned."""
147
+ if self.memory is None or not isinstance(query, str) or not query.strip() or not self.session_id:
148
+ return [] # FAIL CLOSED: no current session → no within-session search
149
+ hits = self.memory.search_episodes(query.strip(), limit=k, only_session=self.session_id)
150
+ return [PageRef(handle=str(h.get("turn")), kind="episode-search-thissession",
151
+ preview=_pack_episode_preview(h), score=float(h.get("score") or 0.0),
152
+ untrusted=False) for h in hits]
153
+
154
+ def _episodes_thissession(self, session_id: str, k: int) -> list[PageRef]:
155
+ """PAGED-OUT HISTORY manifest: locator-only PageRefs for the last ``k`` turns of THIS session —
156
+ the TRIGGER that makes recall_history get called (the model cannot reach for a cache it cannot
157
+ see; pin/view died because their payoff was invisible). The single this-session episodic READ
158
+ entry (mirrors ``_episodes`` for cross-session) so the slice has ONE retrieval seam. Locators
159
+ only — turn/title/breadcrumb, NEVER step bodies; content pages in solely when the model calls
160
+ recall_history(turns=[N]). Bounded to ``k``; a trailing '…older' ref flags that more exist."""
161
+ if not session_id:
162
+ return []
163
+ # Use the TAIL-only manifest read (O(k)/turn) when available, so a long session doesn't re-parse the
164
+ # whole JSONL every slice build — that was O(n²)/session, eroding the flat-per-turn-cost moat.
165
+ manifest = getattr(self.memory, "episode_manifest", None)
166
+ if manifest is not None:
167
+ shown, total = manifest(session_id, k)
168
+ older = max(0, total - len(shown))
169
+ else:
170
+ read = getattr(self.memory, "read_episodes", None)
171
+ if read is None:
172
+ return []
173
+ lines = read(session_id) # fallback: whole-session read
174
+ shown = lines[-k:]
175
+ older = len(lines) - len(shown)
176
+ if not shown:
177
+ return []
178
+ refs = [PageRef(handle=str(ln.get("turn")), kind="episode-thissession",
179
+ preview=_pack_thissession_preview(ln),
180
+ score=float(ln.get("turn") or 0), untrusted=False) for ln in shown]
181
+ if older:
182
+ refs.append(PageRef(handle="…older", kind="episode-thissession",
183
+ preview=(f"{older} earlier turn(s) not shown — recall_history() for the full "
184
+ f"index, or recall_history(search=\"keywords\") to find an older turn "
185
+ f"of THIS session by content (also matches past sessions)"),
186
+ score=0.0, untrusted=False))
187
+ return refs
188
+
189
+
190
+ def _episode_pageref(h: dict) -> PageRef:
191
+ """Map one cross-session episode hit dict to a PageRef (lossless for the listing's display:
192
+ locator in `handle`, ts/title/note/match packed into `preview`)."""
193
+ handle = f"{(h.get('session_id') or '')[:14]} · turn {h.get('turn')}"
194
+ return PageRef(handle=handle, kind="episode-xsession",
195
+ preview=_pack_episode_preview(h),
196
+ score=float(h.get("score") or 0.0), untrusted=True)
197
+
198
+
199
+ def _pack_episode_preview(h: dict) -> str:
200
+ ts = format_ts(h.get("ts")) # "06-16 12:30"
201
+ title = (h.get("title") or "(no title)")[:60]
202
+ note = (h.get("note") or "").strip()
203
+ snip = normalize_ws(h.get("snippet"))
204
+ out = f"{ts} · {title}"
205
+ if note:
206
+ out += f"\n note: {note[:160]}"
207
+ if snip:
208
+ out += f"\n match: {snip[:200]}"
209
+ return out
210
+
211
+
212
+ def _pack_thissession_preview(ln: dict) -> str:
213
+ """One locator-line body for the PAGED-OUT HISTORY manifest: the turn's title + a PAYOFF
214
+ breadcrumb (what the turn HOLDS), so the model can decide to page it back informedly. Locators
215
+ only — never step bodies/observations (those page in on demand via recall_history). `ln` is a
216
+ raw line parsed from the on-disk episodic JSONL — a malformed/corrupt record (e.g. an older
217
+ schema, a hand-edited file) must degrade to an empty preview, never crash the manifest build."""
218
+ rec = ln.get("record")
219
+ rec = rec if isinstance(rec, dict) else {}
220
+ meta = rec.get("meta")
221
+ meta = meta if isinstance(meta, dict) else {}
222
+ title = normalize_ws(rec.get("title") or "(untitled)")[:52]
223
+ flag = " · FAIL" if meta.get("failing") else ""
224
+ crumb = _thissession_breadcrumb(rec, meta)
225
+ return f"turn {ln.get('turn')} · \"{title}\"{flag}" + (f" · {crumb}" if crumb else "")
226
+
227
+
228
+ def _thissession_breadcrumb(rec: dict, meta: dict) -> str:
229
+ """The payoff breadcrumb (≤60 chars). An empty breadcrumb was the pin/view killer — a locator
230
+ with no visible payoff never gets called — so every line is GUARANTEED a content-derived hint:
231
+ the model's own note if it left one, else the turn's edited files, else its distinct read/grep/run
232
+ actions. All from data already in the record (no extra read, no LLM). `rec`/`meta` are already
233
+ dict-guarded by the caller; `steps`/`action` entries are guarded here since they come from the
234
+ same untrusted on-disk record."""
235
+ note = normalize_ws(rec.get("note"))
236
+ if note:
237
+ return ("note: " + note)[:60]
238
+ files = meta.get("files") or []
239
+ files = files if isinstance(files, list) else []
240
+ if files:
241
+ return ("edited: " + ", ".join(os.path.basename(str(f)) for f in files))[:60]
242
+ acts: list[str] = []
243
+ for st in rec.get("steps", []) or []:
244
+ if not isinstance(st, dict):
245
+ continue
246
+ for a in st.get("action", []) or []:
247
+ if not isinstance(a, dict):
248
+ continue
249
+ name = a.get("name") or ""
250
+ args = a.get("args", {}) if isinstance(a.get("args"), dict) else {}
251
+ arg = args.get("path") or args.get("query") or args.get("command") or ""
252
+ sig = (f"{name} {os.path.basename(str(arg))}").strip() if arg else name
253
+ if sig and sig not in acts:
254
+ acts.append(sig)
255
+ return ("did: " + ", ".join(acts[:3]))[:60] if acts else ""