sliceagent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. sliceagent/__init__.py +3 -0
  2. sliceagent/__main__.py +6 -0
  3. sliceagent/access.py +93 -0
  4. sliceagent/agents.py +173 -0
  5. sliceagent/background_review.py +146 -0
  6. sliceagent/binsniff.py +89 -0
  7. sliceagent/cli.py +890 -0
  8. sliceagent/clock.py +32 -0
  9. sliceagent/code_grep.py +329 -0
  10. sliceagent/code_index.py +417 -0
  11. sliceagent/config.py +240 -0
  12. sliceagent/context_overflow.py +227 -0
  13. sliceagent/envspec.py +129 -0
  14. sliceagent/errors.py +167 -0
  15. sliceagent/events.py +96 -0
  16. sliceagent/finding_types.py +70 -0
  17. sliceagent/flags.py +63 -0
  18. sliceagent/fuzzy.py +135 -0
  19. sliceagent/guardrails.py +438 -0
  20. sliceagent/guidance.py +69 -0
  21. sliceagent/hippocampus.py +581 -0
  22. sliceagent/hooks.py +334 -0
  23. sliceagent/interfaces.py +144 -0
  24. sliceagent/llm.py +695 -0
  25. sliceagent/loop.py +548 -0
  26. sliceagent/mcp_client.py +255 -0
  27. sliceagent/mcp_security.py +77 -0
  28. sliceagent/memory.py +428 -0
  29. sliceagent/metrics.py +103 -0
  30. sliceagent/model_catalog.py +124 -0
  31. sliceagent/monitor.py +615 -0
  32. sliceagent/neocortex.py +436 -0
  33. sliceagent/onboarding.py +323 -0
  34. sliceagent/oracle.py +36 -0
  35. sliceagent/pagetable.py +255 -0
  36. sliceagent/pfc.py +449 -0
  37. sliceagent/plugins.py +127 -0
  38. sliceagent/policy.py +234 -0
  39. sliceagent/procman.py +187 -0
  40. sliceagent/prompt.py +239 -0
  41. sliceagent/records.py +108 -0
  42. sliceagent/recovery.py +119 -0
  43. sliceagent/regions.py +678 -0
  44. sliceagent/registry.py +128 -0
  45. sliceagent/retriever.py +19 -0
  46. sliceagent/safety.py +332 -0
  47. sliceagent/sandbox.py +143 -0
  48. sliceagent/scheduler.py +92 -0
  49. sliceagent/search_index.py +289 -0
  50. sliceagent/seed.py +465 -0
  51. sliceagent/sensory_cortex.py +500 -0
  52. sliceagent/session.py +222 -0
  53. sliceagent/skill_provenance.py +71 -0
  54. sliceagent/skill_usage.py +123 -0
  55. sliceagent/skills.py +209 -0
  56. sliceagent/subagent.py +332 -0
  57. sliceagent/subdir_hints.py +222 -0
  58. sliceagent/swap.py +182 -0
  59. sliceagent/taskstate.py +57 -0
  60. sliceagent/telemetry.py +59 -0
  61. sliceagent/terminal.py +240 -0
  62. sliceagent/text_utils.py +56 -0
  63. sliceagent/tool_summary.py +93 -0
  64. sliceagent/tools.py +1194 -0
  65. sliceagent/tui.py +1377 -0
  66. sliceagent/web.py +354 -0
  67. sliceagent-0.1.0.dist-info/METADATA +262 -0
  68. sliceagent-0.1.0.dist-info/RECORD +71 -0
  69. sliceagent-0.1.0.dist-info/WHEEL +4 -0
  70. sliceagent-0.1.0.dist-info/entry_points.txt +2 -0
  71. sliceagent-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,417 @@
1
+ """CodeIndex — the RELATED CODE tier (behind the Retriever interface).
2
+
3
+ Fills the slice's discovery tier from a real repository: given the task (and the
4
+ agent's live focus, e.g. the current error), surface the most relevant *existing*
5
+ code as ranked snippets so the model doesn't have to blind-grep. Deterministic, no
6
+ embeddings, no network — just ripgrep over the working tree (respects .gitignore).
7
+
8
+ Distinct from the Memory tier: memem indexes a curated LESSON vault; this indexes
9
+ SOURCE CODE. Two tiers, two interfaces (see interfaces.py).
10
+
11
+ v1 is ripgrep symbol/term search + a regex repo-map skeleton. tree-sitter is the
12
+ precision upgrade for definition extraction — it slots in at `_defs_in()` without
13
+ touching the Retriever contract or any caller.
14
+ """
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ import os
19
+ import shutil
20
+ import subprocess
21
+ import threading
22
+
23
+ from .interfaces import Snippet
24
+
25
+ # tokens too common to discriminate on (task prose is full of them)
26
+ _STOP = frozenset((
27
+ "the and for with that this from into your you use using make build adds add create "
28
+ "creates function functions method module modules file files code test tests should "
29
+ "when then return returns value values given must each all any not new run runs fix "
30
+ "fixes bug bugs def class import only also like one two get set has have its them they "
31
+ "such only via per out off via are was were will can may want need needs implement"
32
+ ).split())
33
+
34
+ # identifier-ish tokens worth searching: snake_case, camelCase, dotted names, ≥3 chars
35
+ import re as _re
36
+ _TOKEN = _re.compile(r"[A-Za-z_][A-Za-z0-9_]{2,}")
37
+
38
+ # language-ish definition lines for the repo-map skeleton (tree-sitter upgrades this)
39
+ _DEF_RE = _re.compile(
40
+ r"^\s*(?:export\s+|default\s+|public\s+|private\s+|protected\s+|static\s+|abstract\s+)*"
41
+ r"(?:async\s+)?"
42
+ r"(?:def|class|func|function|fn|type|interface|struct|enum|impl|trait|module|const)\b"
43
+ )
44
+ _CODE_EXT = frozenset((
45
+ ".py .js .jsx .ts .tsx .go .rs .java .rb .c .h .cc .cpp .hpp .cs .php .swift .kt "
46
+ ".scala .sh .lua .m .mm .ex .exs .clj .hs .ml .r .jl"
47
+ ).split())
48
+
49
+ # the NAME of a definition (for the symbol graph); tree-sitter upgrades this at _scan_file()
50
+ _NAME_RE = _re.compile(
51
+ r"\b(?:def|class|func|function|fn|type|interface|struct|enum|trait|module|const)\s+"
52
+ r"([A-Za-z_][A-Za-z0-9_]*)")
53
+
54
+
55
+ def _terms(query: str, limit: int = 12) -> list[str]:
56
+ """Extract distinct, discriminating identifiers from a natural-language query."""
57
+ seen: set[str] = set()
58
+ out: list[str] = []
59
+ for tok in _TOKEN.findall(query or ""):
60
+ low = tok.lower()
61
+ if low in _STOP or low in seen:
62
+ continue
63
+ seen.add(low)
64
+ out.append(tok)
65
+ if len(out) >= limit:
66
+ break
67
+ return out
68
+
69
+
70
+ _TS = {"tried": False, "parser": None}
71
+ _TS_LOCK = threading.Lock()
72
+
73
+
74
+ def _ts_python():
75
+ """Lazily build a tree-sitter Python parser; None if tree-sitter isn't installed (→ regex)."""
76
+ if _TS["tried"]:
77
+ return _TS["parser"]
78
+ with _TS_LOCK: # parallel explorers can hit first-use concurrently — build under a lock so a second
79
+ if _TS["tried"]: # thread can't read parser=None in the window between tried=True and parser=<...>
80
+ return _TS["parser"]
81
+ parser = None
82
+ try: # convenience bundle (prebuilt grammars)
83
+ from tree_sitter_languages import get_parser
84
+ parser = get_parser("python")
85
+ except Exception: # noqa: BLE001 — fall back to the split packages
86
+ try:
87
+ import tree_sitter_python as _tspy
88
+ from tree_sitter import Language, Parser
89
+ parser = Parser(Language(_tspy.language()))
90
+ except Exception: # noqa: BLE001 — not installed → regex path
91
+ parser = None
92
+ _TS["parser"] = parser
93
+ _TS["tried"] = True # set tried AFTER parser is populated (no torn read)
94
+ return _TS["parser"]
95
+
96
+
97
+ def _ts_def_names(path: str, src: str):
98
+ """Definition names via tree-sitter (Python only, precise: real function/class nodes, no
99
+ comment/string false-positives). Returns None to signal 'use the regex' — non-Python file or
100
+ tree-sitter not installed. The Retriever contract and every caller are unchanged either way."""
101
+ if not path.endswith(".py"):
102
+ return None
103
+ parser = _ts_python()
104
+ if parser is None:
105
+ return None
106
+ try:
107
+ data = src.encode("utf-8", "replace") # tree-sitter offsets are BYTE offsets — slice the bytes, not the str
108
+ tree = parser.parse(data)
109
+ names, stack = set(), [tree.root_node]
110
+ while stack:
111
+ node = stack.pop()
112
+ if node.type in ("function_definition", "class_definition"):
113
+ nm = node.child_by_field_name("name")
114
+ if nm is not None:
115
+ names.add(data[nm.start_byte:nm.end_byte].decode("utf-8", "replace"))
116
+ stack.extend(node.children)
117
+ return names
118
+ except Exception: # noqa: BLE001 — any TS hiccup → regex
119
+ return None
120
+
121
+
122
+ class RipgrepCodeIndex:
123
+ """Retriever over a working tree using ripgrep. No index to build — queries run live.
124
+
125
+ Robust by design: any ripgrep failure (missing binary, bad path, timeout) degrades
126
+ to an empty result, so the discovery tier simply goes quiet rather than breaking the
127
+ loop — same contract as NullRetriever, just populated when there's code to find.
128
+ """
129
+
130
+ def __init__(self, root: str = ".", *, rg: str = "rg",
131
+ max_filesize: str = "300K", timeout: float = 6.0,
132
+ ctx: int = 4, max_chars: int = 1400):
133
+ self.root = os.path.abspath(root)
134
+ self.rg = rg
135
+ self.max_filesize = max_filesize
136
+ self.timeout = timeout
137
+ self.ctx = ctx
138
+ self.max_chars = max_chars
139
+ self._graph_cache: dict | None = None # query-independent def/ref graph (see _graph)
140
+ self._graph_builds = 0 # rebuild counter (observability + tests)
141
+ self._graph_lock = threading.Lock() # parallel explorers share this index → serialize rebuilds
142
+
143
+ # --- Retriever contract -------------------------------------------------
144
+ def retrieve(self, query: str, k: int = 6) -> list[Snippet]:
145
+ """The RELATED CODE tier: a relevance-RANKED repo MAP — which files matter for this query,
146
+ shown as definition SIGNATURES, not code excerpts. Why a map and not snippets: an A/B on a
147
+ lexical-trap task (bug in a neutral-vocabulary file the term search never ranks) showed the
148
+ map ties-or-beats injected snippets AND stays robust when the lexical signal points at the
149
+ WRONG file — the model reads real code on demand instead of anchoring on a guessed excerpt.
150
+ Ranking is STRUCTURAL (personalized PageRank over the def/ref graph, seeded by the lexical
151
+ matches), so a relevant file surfaces even with zero query-word overlap when a matched file
152
+ calls it — the case a purely-lexical ranking truncates on a large repo (see graph_map)."""
153
+ text, seeded = self.graph_map(query, _return_seeds=True)
154
+ if not text or not seeded:
155
+ # Gate on the REAL seed signal (≥1 lexical match in the graph), not the rendered "(matches:"
156
+ # count: a legitimately-seeded query whose matched file has NO def-skeleton (a config/constants
157
+ # file) still expands structurally to related files, and that map must NOT be dropped. With zero
158
+ # seeds we render NOTHING (the "hi" -> map noise case stays fixed).
159
+ return []
160
+ matches = text.count("(matches:")
161
+ return [Snippet(path="(repo map)", text=text, score=float(matches or seeded))]
162
+
163
+ def deps(self, path: str, limit: int = 6) -> list[str]:
164
+ """Files structurally COUPLED to `path`, from the cached def/ref graph: reverse deps (files
165
+ that reference `path` — its CALLERS, ranked FIRST because they break on a rename/signature
166
+ change), then forward deps (the contracts `path` references). Used to keep an edited file's
167
+ callers + contracts co-resident so a coordinated edit reaches every site that must change in
168
+ lockstep. Returns [] when `path` isn't in the graph, so callers degrade gracefully.
169
+ Query-INDEPENDENT (reuses the cached graph; no per-call ripgrep)."""
170
+ try:
171
+ g = self._graph(400)
172
+ except Exception:
173
+ return []
174
+ edges = g["edges"]
175
+ if path not in g["fileset"]:
176
+ return []
177
+ fwd = edges.get(path, {}) # files `path` references (contracts)
178
+ rev = {f: e[path] for f, e in edges.items() if path in e} # files that reference `path` (callers)
179
+ # CALLER-FIRST: the hazard in a coordinated edit is the REVERSE-dependents (callers/importers
180
+ # that break on a rename/signature change), not the forward contracts the file calls. Rank
181
+ # callers BEFORE contracts so truncation at `limit` drops contracts (re-readable on demand),
182
+ # never the call-sites that must change in lockstep (re-observation-reach >= action-reach).
183
+ ranked = sorted(rev, key=lambda f: -rev[f]) + sorted(fwd, key=lambda f: -fwd[f])
184
+ seen, out = set(), []
185
+ for f in ranked:
186
+ if f != path and f not in seen:
187
+ seen.add(f)
188
+ out.append(f)
189
+ return out[:limit]
190
+
191
+ def def_names(self, path: str) -> set:
192
+ """The symbol NAMES `path` defines (from the cached graph). Used to detect what an edit REMOVED
193
+ (pre-edit defs minus current defs) so a coordinated change can flag dangling references. Empty
194
+ on a no-graph host."""
195
+ try:
196
+ return set(self._graph(400).get("defs", {}).get(path) or ())
197
+ except Exception:
198
+ return set()
199
+
200
+ def ref_tokens(self, path: str) -> set:
201
+ """The identifier tokens `path` REFERENCES (from the cached graph). A file whose current tokens
202
+ still contain a name an edit removed/moved is a dangling call-site. Empty on a no-graph host."""
203
+ try:
204
+ return set(self._graph(400).get("tokens", {}).get(path) or ())
205
+ except Exception:
206
+ return set()
207
+
208
+ # --- structural map: rank by personalized PageRank over the def/ref graph ---
209
+ def graph_map(self, query: str, max_files: int = 400, max_shown: int = 20, *, _return_seeds: bool = False):
210
+ """Repo map ranked by PERSONALIZED PAGERANK over the symbol def/ref graph, seeded on the
211
+ files that match the query lexically. Rank flows along call/import edges, so a relevant file
212
+ surfaces even with ZERO query-word overlap when a matched file references it — exactly the
213
+ neutral-vocabulary target a purely-lexical ranking truncates on a large repo. Degrades to
214
+ lexical order when there is no graph signal. Bounded by BREADTH — the top `max_shown` ranked
215
+ files, each shown COMPLETE — NOT a char cut (a char cut dropped lower-ranked files mid-list,
216
+ the 'where is function X?' miss, and could render a file half-shown; breadth is deterministic).
217
+
218
+ The query-INDEPENDENT graph (defs/edges/skeletons) is cached on this instance and rebuilt
219
+ only when the tree changes (see _graph), so per-turn cost is just lexical search + PageRank,
220
+ not re-reading every file — the cost stays flat across a multi-turn session until an edit."""
221
+ g = self._graph(max_files)
222
+ files = list(g["files"])
223
+ if not files:
224
+ return ("", 0) if _return_seeds else ""
225
+ terms = _terms(query)
226
+ matched: dict[str, set] = {}
227
+ if terms:
228
+ for path, info in self._search(terms).items():
229
+ matched[os.path.relpath(path, self.root)] = info["terms"]
230
+ seeds = {rel: float(len(t)) for rel, t in matched.items() if rel in g["fileset"]}
231
+ n_seeds = len(seeds) # real seed signal returned to retrieve()'s gate (no shared read-back race)
232
+ pr = self._pagerank(files, g["edges"], seeds)
233
+ # rank: structural score, then lexical strength, then path (deterministic ties)
234
+ files.sort(key=lambda rel: (pr.get(rel, 0.0), len(matched.get(rel, ()))), reverse=True)
235
+ blocks: list[str] = []
236
+ for rel in files:
237
+ dlines = g["skeleton"].get(rel)
238
+ if not dlines:
239
+ continue
240
+ hit = matched.get(rel)
241
+ head = rel + (f" (matches: {', '.join(sorted(hit))})" if hit else "")
242
+ blocks.append(head + "\n" + "\n".join(" " + d for d in dlines))
243
+ if len(blocks) >= max_shown: # BREADTH bound: top-N ranked files, each shown COMPLETE
244
+ break
245
+ text = "\n".join(blocks)
246
+ return (text, n_seeds) if _return_seeds else text
247
+
248
+ def _graph(self, max_files: int) -> dict:
249
+ """Build (or reuse) the query-INDEPENDENT def/ref graph. Cached on this instance and
250
+ invalidated by a fingerprint of the code files (path + mtime + size), so it rebuilds ONLY
251
+ when the tree actually changes (e.g. the agent edits a file) — not every turn. Reads and
252
+ parses each file ONCE per rebuild (defs + skeleton + ref tokens in one pass)."""
253
+ files = self._code_files(max_files)
254
+ sig = self._fingerprint(files)
255
+ c = self._graph_cache
256
+ if c is not None and c["sig"] == sig: # lock-free fast path (reference read is atomic in CPython)
257
+ return c
258
+ with self._graph_lock: # serialize rebuilds so parallel explorers don't double-build / tear a read
259
+ c = self._graph_cache
260
+ if c is not None and c["sig"] == sig:
261
+ return c
262
+ defs: dict[str, set] = {}
263
+ sym2file: dict[str, set] = {}
264
+ skeleton: dict[str, list] = {}
265
+ tokens: dict[str, set] = {}
266
+ for rel in files:
267
+ names, lines, toks = self._scan_file(rel)
268
+ if lines:
269
+ skeleton[rel] = lines
270
+ tokens[rel] = toks
271
+ if names:
272
+ defs[rel] = names
273
+ for n in names:
274
+ sym2file.setdefault(n, set()).add(rel)
275
+ edges = self._edges_from_tokens(files, defs, sym2file, tokens)
276
+ self._graph_builds += 1
277
+ self._graph_cache = {"sig": sig, "files": files, "fileset": set(files),
278
+ "skeleton": skeleton, "edges": edges, "defs": defs, "tokens": tokens}
279
+ return self._graph_cache
280
+
281
+ def _fingerprint(self, files: list[str]) -> tuple:
282
+ """Cheap staleness key: (rel, mtime_ns, size) per file. Stat-only — no reads — so computing
283
+ it each turn is far cheaper than the rebuild it guards."""
284
+ out = []
285
+ for rel in files:
286
+ try:
287
+ st = os.stat(os.path.join(self.root, rel))
288
+ out.append((rel, st.st_mtime_ns, st.st_size))
289
+ except OSError:
290
+ out.append((rel, 0, 0))
291
+ return tuple(out)
292
+
293
+ def _scan_file(self, rel: str):
294
+ """One read per file → (def names, skeleton lines, ref tokens). Names use tree-sitter when
295
+ available (precise), else a regex; skeleton lines and tokens are regex (display + refs)."""
296
+ try:
297
+ with open(os.path.join(self.root, rel), "r", encoding="utf-8", errors="replace") as fh:
298
+ src = fh.read() # pin utf-8 (like every other read): a non-utf-8 locale would mis-decode the def/ref graph
299
+ except OSError:
300
+ return set(), [], set()
301
+ ts = _ts_def_names(os.path.join(self.root, rel), src)
302
+ if ts is not None:
303
+ names = {n for n in ts if len(n) >= 4}
304
+ else:
305
+ names = {m.group(1) for m in _NAME_RE.finditer(src) if len(m.group(1)) >= 4}
306
+ lines = [ln.strip()[:120] for ln in src.splitlines() if _DEF_RE.match(ln)][:12]
307
+ tokens = set(_TOKEN.findall(src))
308
+ return names, lines, tokens
309
+
310
+ @staticmethod
311
+ def _edges_from_tokens(files: list[str], defs: dict, sym2file: dict, tokens: dict) -> dict:
312
+ """Directed edges file → file it references, from the cached ref tokens. A references B if A
313
+ mentions a symbol DEFINED in B; symbols defined in many files are skipped (noisy names)."""
314
+ usable = {s: fs for s, fs in sym2file.items() if len(fs) <= 4}
315
+ edges: dict[str, dict] = {}
316
+ for rel in files:
317
+ own = defs.get(rel, set())
318
+ out: dict[str, int] = {}
319
+ for t in tokens.get(rel, ()):
320
+ if t in own:
321
+ continue
322
+ for tgt in usable.get(t, ()):
323
+ if tgt != rel:
324
+ out[tgt] = out.get(tgt, 0) + 1
325
+ if out:
326
+ edges[rel] = out
327
+ return edges
328
+
329
+ @staticmethod
330
+ def _pagerank(nodes: list[str], edges: dict, seeds: dict, d: float = 0.85,
331
+ iters: int = 40) -> dict:
332
+ """Personalized PageRank. Personalization mass sits on the seed files (the lexical matches);
333
+ with no seeds it's uniform (→ plain centrality). Dangling nodes redistribute to the seeds."""
334
+ n = len(nodes)
335
+ if n == 0:
336
+ return {}
337
+ total = sum(seeds.values())
338
+ p = ({x: seeds.get(x, 0.0) / total for x in nodes} if total > 0
339
+ else {x: 1.0 / n for x in nodes})
340
+ r = dict(p)
341
+ outsum = {u: sum(w.values()) for u, w in edges.items()}
342
+ nodeset = set(nodes)
343
+ for _ in range(iters):
344
+ nr = {x: (1 - d) * p[x] for x in nodes}
345
+ dangling = 0.0
346
+ for u in nodes:
347
+ ru = r[u]
348
+ s = outsum.get(u, 0)
349
+ if s > 0:
350
+ for v, w in edges[u].items():
351
+ if v in nodeset:
352
+ nr[v] += d * ru * (w / s)
353
+ else:
354
+ dangling += d * ru
355
+ if dangling:
356
+ for x in nodes:
357
+ nr[x] += dangling * p[x]
358
+ r = nr
359
+ return r
360
+
361
+ # --- internals ----------------------------------------------------------
362
+ def _search(self, terms: list[str]) -> dict[str, dict]:
363
+ """One ripgrep pass over all terms; group matches by file."""
364
+ cmd = [self.rg, "--json", "-i", "--max-filesize", self.max_filesize,
365
+ "--max-columns", "400"]
366
+ for t in terms:
367
+ cmd += ["-e", t]
368
+ cmd.append(self.root)
369
+ try:
370
+ proc = subprocess.run(cmd, capture_output=True, text=True,
371
+ timeout=self.timeout)
372
+ except (OSError, subprocess.SubprocessError):
373
+ return {}
374
+ files: dict[str, dict] = {}
375
+ for raw in proc.stdout.splitlines():
376
+ if not raw or '"type":"match"' not in raw:
377
+ continue
378
+ try:
379
+ obj = json.loads(raw)
380
+ except ValueError:
381
+ continue
382
+ d = obj.get("data", {})
383
+ path = (d.get("path") or {}).get("text")
384
+ ln = d.get("line_number")
385
+ if not path or ln is None:
386
+ continue
387
+ f = files.setdefault(path, {"terms": set(), "lines": [], "count": 0})
388
+ f["count"] += 1
389
+ if len(f["lines"]) < 60:
390
+ f["lines"].append(ln)
391
+ for sm in d.get("submatches", []):
392
+ mt = ((sm.get("match") or {}).get("text") or "").lower()
393
+ if mt:
394
+ f["terms"].add(mt)
395
+ return files
396
+
397
+ def _code_files(self, max_files: int) -> list[str]:
398
+ try:
399
+ proc = subprocess.run([self.rg, "--files", self.root],
400
+ capture_output=True, text=True, timeout=self.timeout)
401
+ except (OSError, subprocess.SubprocessError):
402
+ return []
403
+ rels: list[str] = []
404
+ for p in proc.stdout.splitlines():
405
+ if os.path.splitext(p)[1] in _CODE_EXT:
406
+ rels.append(os.path.relpath(p, self.root))
407
+ rels.sort()
408
+ return rels[:max_files]
409
+
410
+
411
+ def make_code_index(root: str = ".", *, prefer_ripgrep: bool = True):
412
+ """Factory mirroring make_memory(): a real CodeIndex if ripgrep is on PATH,
413
+ else NullRetriever so the loop runs unchanged."""
414
+ if prefer_ripgrep and shutil.which("rg"):
415
+ return RipgrepCodeIndex(root=root)
416
+ from .retriever import NullRetriever
417
+ return NullRetriever()
sliceagent/config.py ADDED
@@ -0,0 +1,240 @@
1
+ """Config — layered settings from sliceagent.toml (Step ③.2).
2
+
3
+ A layered config file (user then project, project overriding)
4
+ that declares persistent settings AND extension surfaces (skills dirs, MCP servers,
5
+ plugin dirs). Precedence is ENV > project file > user file > default, so a quick
6
+ `AGENT_POLICY=allow sliceagent ...` still overrides the file and ALL prior env-driven
7
+ behavior is preserved (the file just makes settings persistent).
8
+
9
+ Read-only TOML via stdlib tomllib (Python 3.11+ — no new dependency).
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import os
14
+ import tomllib
15
+
16
+
17
+ def _read_toml(path: str) -> dict:
18
+ try:
19
+ with open(path, "rb") as f:
20
+ return tomllib.load(f)
21
+ except (OSError, tomllib.TOMLDecodeError, UnicodeDecodeError, ValueError):
22
+ return {} # a corrupt / non-UTF-8 config must degrade to defaults, not crash startup
23
+
24
+
25
+ def _config_files() -> list[str]:
26
+ # user first, then project (project overrides user)
27
+ home = os.path.expanduser("~")
28
+ cwd = os.getcwd()
29
+ return [
30
+ os.path.join(home, ".sliceagent", "config.toml"),
31
+ os.path.join(cwd, "sliceagent.toml"),
32
+ os.path.join(cwd, ".sliceagent", "config.toml"),
33
+ ]
34
+
35
+
36
+ # ── runtime preferences (the /model switch persists here) ───────────────────────────────────────
37
+ # A tiny JSON sidecar, NOT config.toml: stdlib has no TOML WRITER (tomllib is read-only), so writing
38
+ # back to config.toml would need a new dep or a fragile hand-rolled serializer. JSON is safe + atomic.
39
+ # Precedence (resolved in cli): explicit env (AGENT_MODEL/AGENT_REASONING) > prefs > config.toml > default.
40
+ def _prefs_path() -> str:
41
+ return os.path.join(os.path.expanduser("~"), ".sliceagent", "prefs.json")
42
+
43
+
44
+ def load_prefs() -> dict:
45
+ """The user's last /model + /reasoning choice (or {} if none/unreadable)."""
46
+ try:
47
+ import json
48
+ with open(_prefs_path(), encoding="utf-8") as f:
49
+ return json.load(f) or {}
50
+ except Exception: # noqa: BLE001 — missing/corrupt prefs must never break startup
51
+ return {}
52
+
53
+
54
+ def save_prefs(updates: dict) -> None:
55
+ """Merge non-empty `updates` into the prefs sidecar (atomic write). Best-effort; never raises."""
56
+ try:
57
+ import json
58
+ path = _prefs_path()
59
+ os.makedirs(os.path.dirname(path), exist_ok=True)
60
+ cur = load_prefs()
61
+ cur.update({k: v for k, v in updates.items() if v})
62
+ tmp = path + ".tmp"
63
+ with open(tmp, "w", encoding="utf-8") as f:
64
+ json.dump(cur, f, indent=2)
65
+ os.replace(tmp, path)
66
+ except Exception: # noqa: BLE001 — persistence is a nicety, not a hard requirement
67
+ pass
68
+
69
+
70
+ def _deep_merge(a: dict, b: dict) -> dict:
71
+ out = dict(a)
72
+ for k, v in b.items():
73
+ out[k] = _deep_merge(out[k], v) if isinstance(v, dict) and isinstance(out.get(k), dict) else v
74
+ return out
75
+
76
+
77
+ def _truthy(v) -> bool:
78
+ if isinstance(v, bool):
79
+ return v
80
+ return str(v).strip().lower() in ("1", "true", "yes", "on")
81
+
82
+
83
+ class Config:
84
+ """Resolved settings. Each accessor checks ENV first, then the merged TOML, then a default."""
85
+
86
+ def __init__(self, data: dict | None = None):
87
+ self.data = data or {}
88
+
89
+ @classmethod
90
+ def load(cls) -> "Config":
91
+ merged: dict = {}
92
+ for f in _config_files():
93
+ if os.path.isfile(f):
94
+ merged = _deep_merge(merged, _read_toml(f))
95
+ return cls(merged)
96
+
97
+ def _get(self, section: str, key: str, env: str | None, default):
98
+ if env and os.environ.get(env) is not None:
99
+ return os.environ[env]
100
+ sec = self.data.get(section, {})
101
+ if isinstance(sec, dict) and key in sec:
102
+ return sec[key]
103
+ return default
104
+
105
+ # --- provider (multi-provider; written by `sliceagent init`; ENV always wins) ---
106
+ # Resolution order for api_key/base_url/model: ENV → the DEFAULT provider's [providers.<id>] table →
107
+ # the legacy flat [provider]/[agent].model → default. So multiple named providers can coexist and
108
+ # `sliceagent config --use <id>` switches between them, while old flat configs + env keep working.
109
+ @property
110
+ def default_provider(self) -> str:
111
+ return self._get("agent", "default_provider", "AGENT_PROVIDER", "")
112
+
113
+ def providers(self) -> dict:
114
+ """All declared providers: {id: {api_key, base_url, model}}."""
115
+ v = self.data.get("providers", {})
116
+ return {k: val for k, val in v.items() if isinstance(val, dict)} if isinstance(v, dict) else {}
117
+
118
+ def _provider_table(self) -> dict:
119
+ """The active provider's table: the configured default, or the sole provider if exactly one exists."""
120
+ provs = self.providers()
121
+ pid = self.default_provider
122
+ if pid and pid in provs:
123
+ return provs[pid]
124
+ if len(provs) == 1:
125
+ return next(iter(provs.values()))
126
+ return {}
127
+
128
+ @property
129
+ def api_key(self) -> str:
130
+ env = os.environ.get("LLM_API_KEY")
131
+ if env: # empty string ("" exported) means UNSET → fall through to config, don't return ""
132
+ return env
133
+ return self._provider_table().get("api_key") or self._get("provider", "api_key", None, "")
134
+
135
+ @property
136
+ def base_url(self) -> str:
137
+ env = os.environ.get("LLM_BASE_URL")
138
+ if env: # empty string → unset (use provider default), not a literal empty base_url
139
+ return env
140
+ return self._provider_table().get("base_url") or self._get("provider", "base_url", None, "")
141
+
142
+ # --- agent ---
143
+ @property
144
+ def model(self) -> str:
145
+ env = os.environ.get("AGENT_MODEL")
146
+ if env: # empty string → unset → fall through to config/default model, not ""
147
+ return env
148
+ # No built-in default model — the user chooses one (sliceagent init / AGENT_MODEL / config.toml).
149
+ return self._provider_table().get("model") or self._get("agent", "model", None, "")
150
+
151
+ @property
152
+ def policy(self) -> str:
153
+ return self._get("agent", "policy", "AGENT_POLICY", "teenager")
154
+
155
+ @property
156
+ def mine(self) -> str:
157
+ return self._get("agent", "mine", "AGENT_MINE", "deterministic")
158
+
159
+ @property
160
+ def subagent_depth(self) -> int:
161
+ v = self._get("agent", "subagent_depth", "AGENT_SUBAGENT_DEPTH", 1)
162
+ try:
163
+ return max(0, int(v)) # 0 = off; a malformed value falls back to the default
164
+ except (TypeError, ValueError):
165
+ return 1
166
+
167
+ @property
168
+ def show_slice(self) -> bool:
169
+ return _truthy(self._get("agent", "show_slice", "SHOW_SLICE", False))
170
+
171
+ # --- sandbox ---
172
+ @property
173
+ def sandbox_backend(self) -> str:
174
+ return self._get("sandbox", "backend", "AGENT_SANDBOX", "local") # local | docker
175
+
176
+ @property
177
+ def sandbox_image(self) -> str:
178
+ return self._get("sandbox", "image", None, "python:3.12-slim")
179
+
180
+ @property
181
+ def sandbox_network(self) -> str:
182
+ return self._get("sandbox", "network", None, "none")
183
+
184
+ # --- oracle / budget ---
185
+ @property
186
+ def verify_cmd(self) -> str | None:
187
+ return self._get("oracle", "verify_cmd", "AGENT_VERIFY_CMD", None)
188
+
189
+ @property
190
+ def max_tokens(self) -> int | None:
191
+ v = self._get("budget", "max_tokens", "AGENT_MAX_TOKENS", None)
192
+ try:
193
+ n = int(v) if v is not None else None
194
+ except (TypeError, ValueError):
195
+ return None # garbage budget → no budget (don't crash startup)
196
+ return n if (n is not None and n > 0) else None # discard a nonsensical <=0 budget
197
+
198
+ @property
199
+ def max_steps(self) -> int:
200
+ # Per-turn step ceiling (runaway backstop). Default raised above the old hard 40 so deep
201
+ # analysis/review turns aren't guillotined; overridable for heavier work.
202
+ v = self._get("budget", "max_steps", "AGENT_MAX_STEPS", None)
203
+ try:
204
+ n = int(v) if v not in (None, "") else None
205
+ except (TypeError, ValueError):
206
+ return 60
207
+ return n if (n is not None and n >= 1) else 60 # <=0 (incl. the env STRING "0") → default, consistent across env/TOML
208
+
209
+ # --- extension surfaces ---
210
+ @property
211
+ def skills_roots(self) -> list[str] | None:
212
+ sec = self.data.get("skills", {})
213
+ dirs = sec.get("dirs") if isinstance(sec, dict) else None
214
+ if isinstance(dirs, str): # a scalar `dirs = "..."` must not iterate char-by-char
215
+ dirs = [dirs]
216
+ if not isinstance(dirs, list):
217
+ return None
218
+ roots = [os.path.expanduser(d) for d in dirs if isinstance(d, str)] # skip non-str entries (don't crash startup)
219
+ return roots or None
220
+
221
+ @property
222
+ def mcp_servers(self) -> dict:
223
+ """Declared MCP servers (consumed in ③.3). e.g. [mcp_servers.github] ..."""
224
+ v = self.data.get("mcp_servers", {})
225
+ return v if isinstance(v, dict) else {}
226
+
227
+ @property
228
+ def plugin_dirs(self) -> list[str]:
229
+ """Extra plugin directories (consumed in ③.4)."""
230
+ sec = self.data.get("plugins", {})
231
+ dirs = sec.get("dirs", []) if isinstance(sec, dict) else []
232
+ if isinstance(dirs, str): # scalar `dirs = "..."` → single entry, not char iteration
233
+ dirs = [dirs]
234
+ if not isinstance(dirs, list):
235
+ return []
236
+ return [os.path.expanduser(d) for d in dirs if isinstance(d, str)] # skip non-str entries (don't crash startup)
237
+
238
+
239
+ def load_config() -> Config:
240
+ return Config.load()