sliceagent 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sliceagent/__init__.py +3 -0
- sliceagent/__main__.py +6 -0
- sliceagent/access.py +93 -0
- sliceagent/agents.py +173 -0
- sliceagent/background_review.py +146 -0
- sliceagent/binsniff.py +89 -0
- sliceagent/cli.py +890 -0
- sliceagent/clock.py +32 -0
- sliceagent/code_grep.py +329 -0
- sliceagent/code_index.py +417 -0
- sliceagent/config.py +240 -0
- sliceagent/context_overflow.py +227 -0
- sliceagent/envspec.py +129 -0
- sliceagent/errors.py +167 -0
- sliceagent/events.py +96 -0
- sliceagent/finding_types.py +70 -0
- sliceagent/flags.py +63 -0
- sliceagent/fuzzy.py +135 -0
- sliceagent/guardrails.py +438 -0
- sliceagent/guidance.py +69 -0
- sliceagent/hippocampus.py +581 -0
- sliceagent/hooks.py +334 -0
- sliceagent/interfaces.py +144 -0
- sliceagent/llm.py +695 -0
- sliceagent/loop.py +548 -0
- sliceagent/mcp_client.py +255 -0
- sliceagent/mcp_security.py +77 -0
- sliceagent/memory.py +428 -0
- sliceagent/metrics.py +103 -0
- sliceagent/model_catalog.py +124 -0
- sliceagent/monitor.py +615 -0
- sliceagent/neocortex.py +436 -0
- sliceagent/onboarding.py +323 -0
- sliceagent/oracle.py +36 -0
- sliceagent/pagetable.py +255 -0
- sliceagent/pfc.py +449 -0
- sliceagent/plugins.py +127 -0
- sliceagent/policy.py +234 -0
- sliceagent/procman.py +187 -0
- sliceagent/prompt.py +239 -0
- sliceagent/records.py +108 -0
- sliceagent/recovery.py +119 -0
- sliceagent/regions.py +678 -0
- sliceagent/registry.py +128 -0
- sliceagent/retriever.py +19 -0
- sliceagent/safety.py +332 -0
- sliceagent/sandbox.py +143 -0
- sliceagent/scheduler.py +92 -0
- sliceagent/search_index.py +289 -0
- sliceagent/seed.py +465 -0
- sliceagent/sensory_cortex.py +500 -0
- sliceagent/session.py +222 -0
- sliceagent/skill_provenance.py +71 -0
- sliceagent/skill_usage.py +123 -0
- sliceagent/skills.py +209 -0
- sliceagent/subagent.py +332 -0
- sliceagent/subdir_hints.py +222 -0
- sliceagent/swap.py +182 -0
- sliceagent/taskstate.py +57 -0
- sliceagent/telemetry.py +59 -0
- sliceagent/terminal.py +240 -0
- sliceagent/text_utils.py +56 -0
- sliceagent/tool_summary.py +93 -0
- sliceagent/tools.py +1194 -0
- sliceagent/tui.py +1377 -0
- sliceagent/web.py +354 -0
- sliceagent-0.1.0.dist-info/METADATA +262 -0
- sliceagent-0.1.0.dist-info/RECORD +71 -0
- sliceagent-0.1.0.dist-info/WHEEL +4 -0
- sliceagent-0.1.0.dist-info/entry_points.txt +2 -0
- sliceagent-0.1.0.dist-info/licenses/LICENSE +21 -0
sliceagent/code_index.py
ADDED
|
@@ -0,0 +1,417 @@
|
|
|
1
|
+
"""CodeIndex — the RELATED CODE tier (behind the Retriever interface).
|
|
2
|
+
|
|
3
|
+
Fills the slice's discovery tier from a real repository: given the task (and the
|
|
4
|
+
agent's live focus, e.g. the current error), surface the most relevant *existing*
|
|
5
|
+
code as ranked snippets so the model doesn't have to blind-grep. Deterministic, no
|
|
6
|
+
embeddings, no network — just ripgrep over the working tree (respects .gitignore).
|
|
7
|
+
|
|
8
|
+
Distinct from the Memory tier: memem indexes a curated LESSON vault; this indexes
|
|
9
|
+
SOURCE CODE. Two tiers, two interfaces (see interfaces.py).
|
|
10
|
+
|
|
11
|
+
v1 is ripgrep symbol/term search + a regex repo-map skeleton. tree-sitter is the
|
|
12
|
+
precision upgrade for definition extraction — it slots in at `_defs_in()` without
|
|
13
|
+
touching the Retriever contract or any caller.
|
|
14
|
+
"""
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
import os
|
|
19
|
+
import shutil
|
|
20
|
+
import subprocess
|
|
21
|
+
import threading
|
|
22
|
+
|
|
23
|
+
from .interfaces import Snippet
|
|
24
|
+
|
|
25
|
+
# tokens too common to discriminate on (task prose is full of them)
|
|
26
|
+
_STOP = frozenset((
|
|
27
|
+
"the and for with that this from into your you use using make build adds add create "
|
|
28
|
+
"creates function functions method module modules file files code test tests should "
|
|
29
|
+
"when then return returns value values given must each all any not new run runs fix "
|
|
30
|
+
"fixes bug bugs def class import only also like one two get set has have its them they "
|
|
31
|
+
"such only via per out off via are was were will can may want need needs implement"
|
|
32
|
+
).split())
|
|
33
|
+
|
|
34
|
+
# identifier-ish tokens worth searching: snake_case, camelCase, dotted names, ≥3 chars
|
|
35
|
+
import re as _re
|
|
36
|
+
_TOKEN = _re.compile(r"[A-Za-z_][A-Za-z0-9_]{2,}")
|
|
37
|
+
|
|
38
|
+
# language-ish definition lines for the repo-map skeleton (tree-sitter upgrades this)
|
|
39
|
+
_DEF_RE = _re.compile(
|
|
40
|
+
r"^\s*(?:export\s+|default\s+|public\s+|private\s+|protected\s+|static\s+|abstract\s+)*"
|
|
41
|
+
r"(?:async\s+)?"
|
|
42
|
+
r"(?:def|class|func|function|fn|type|interface|struct|enum|impl|trait|module|const)\b"
|
|
43
|
+
)
|
|
44
|
+
_CODE_EXT = frozenset((
|
|
45
|
+
".py .js .jsx .ts .tsx .go .rs .java .rb .c .h .cc .cpp .hpp .cs .php .swift .kt "
|
|
46
|
+
".scala .sh .lua .m .mm .ex .exs .clj .hs .ml .r .jl"
|
|
47
|
+
).split())
|
|
48
|
+
|
|
49
|
+
# the NAME of a definition (for the symbol graph); tree-sitter upgrades this at _scan_file()
|
|
50
|
+
_NAME_RE = _re.compile(
|
|
51
|
+
r"\b(?:def|class|func|function|fn|type|interface|struct|enum|trait|module|const)\s+"
|
|
52
|
+
r"([A-Za-z_][A-Za-z0-9_]*)")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _terms(query: str, limit: int = 12) -> list[str]:
|
|
56
|
+
"""Extract distinct, discriminating identifiers from a natural-language query."""
|
|
57
|
+
seen: set[str] = set()
|
|
58
|
+
out: list[str] = []
|
|
59
|
+
for tok in _TOKEN.findall(query or ""):
|
|
60
|
+
low = tok.lower()
|
|
61
|
+
if low in _STOP or low in seen:
|
|
62
|
+
continue
|
|
63
|
+
seen.add(low)
|
|
64
|
+
out.append(tok)
|
|
65
|
+
if len(out) >= limit:
|
|
66
|
+
break
|
|
67
|
+
return out
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
_TS = {"tried": False, "parser": None}
|
|
71
|
+
_TS_LOCK = threading.Lock()
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _ts_python():
|
|
75
|
+
"""Lazily build a tree-sitter Python parser; None if tree-sitter isn't installed (→ regex)."""
|
|
76
|
+
if _TS["tried"]:
|
|
77
|
+
return _TS["parser"]
|
|
78
|
+
with _TS_LOCK: # parallel explorers can hit first-use concurrently — build under a lock so a second
|
|
79
|
+
if _TS["tried"]: # thread can't read parser=None in the window between tried=True and parser=<...>
|
|
80
|
+
return _TS["parser"]
|
|
81
|
+
parser = None
|
|
82
|
+
try: # convenience bundle (prebuilt grammars)
|
|
83
|
+
from tree_sitter_languages import get_parser
|
|
84
|
+
parser = get_parser("python")
|
|
85
|
+
except Exception: # noqa: BLE001 — fall back to the split packages
|
|
86
|
+
try:
|
|
87
|
+
import tree_sitter_python as _tspy
|
|
88
|
+
from tree_sitter import Language, Parser
|
|
89
|
+
parser = Parser(Language(_tspy.language()))
|
|
90
|
+
except Exception: # noqa: BLE001 — not installed → regex path
|
|
91
|
+
parser = None
|
|
92
|
+
_TS["parser"] = parser
|
|
93
|
+
_TS["tried"] = True # set tried AFTER parser is populated (no torn read)
|
|
94
|
+
return _TS["parser"]
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _ts_def_names(path: str, src: str):
|
|
98
|
+
"""Definition names via tree-sitter (Python only, precise: real function/class nodes, no
|
|
99
|
+
comment/string false-positives). Returns None to signal 'use the regex' — non-Python file or
|
|
100
|
+
tree-sitter not installed. The Retriever contract and every caller are unchanged either way."""
|
|
101
|
+
if not path.endswith(".py"):
|
|
102
|
+
return None
|
|
103
|
+
parser = _ts_python()
|
|
104
|
+
if parser is None:
|
|
105
|
+
return None
|
|
106
|
+
try:
|
|
107
|
+
data = src.encode("utf-8", "replace") # tree-sitter offsets are BYTE offsets — slice the bytes, not the str
|
|
108
|
+
tree = parser.parse(data)
|
|
109
|
+
names, stack = set(), [tree.root_node]
|
|
110
|
+
while stack:
|
|
111
|
+
node = stack.pop()
|
|
112
|
+
if node.type in ("function_definition", "class_definition"):
|
|
113
|
+
nm = node.child_by_field_name("name")
|
|
114
|
+
if nm is not None:
|
|
115
|
+
names.add(data[nm.start_byte:nm.end_byte].decode("utf-8", "replace"))
|
|
116
|
+
stack.extend(node.children)
|
|
117
|
+
return names
|
|
118
|
+
except Exception: # noqa: BLE001 — any TS hiccup → regex
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class RipgrepCodeIndex:
|
|
123
|
+
"""Retriever over a working tree using ripgrep. No index to build — queries run live.
|
|
124
|
+
|
|
125
|
+
Robust by design: any ripgrep failure (missing binary, bad path, timeout) degrades
|
|
126
|
+
to an empty result, so the discovery tier simply goes quiet rather than breaking the
|
|
127
|
+
loop — same contract as NullRetriever, just populated when there's code to find.
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
def __init__(self, root: str = ".", *, rg: str = "rg",
|
|
131
|
+
max_filesize: str = "300K", timeout: float = 6.0,
|
|
132
|
+
ctx: int = 4, max_chars: int = 1400):
|
|
133
|
+
self.root = os.path.abspath(root)
|
|
134
|
+
self.rg = rg
|
|
135
|
+
self.max_filesize = max_filesize
|
|
136
|
+
self.timeout = timeout
|
|
137
|
+
self.ctx = ctx
|
|
138
|
+
self.max_chars = max_chars
|
|
139
|
+
self._graph_cache: dict | None = None # query-independent def/ref graph (see _graph)
|
|
140
|
+
self._graph_builds = 0 # rebuild counter (observability + tests)
|
|
141
|
+
self._graph_lock = threading.Lock() # parallel explorers share this index → serialize rebuilds
|
|
142
|
+
|
|
143
|
+
# --- Retriever contract -------------------------------------------------
|
|
144
|
+
def retrieve(self, query: str, k: int = 6) -> list[Snippet]:
|
|
145
|
+
"""The RELATED CODE tier: a relevance-RANKED repo MAP — which files matter for this query,
|
|
146
|
+
shown as definition SIGNATURES, not code excerpts. Why a map and not snippets: an A/B on a
|
|
147
|
+
lexical-trap task (bug in a neutral-vocabulary file the term search never ranks) showed the
|
|
148
|
+
map ties-or-beats injected snippets AND stays robust when the lexical signal points at the
|
|
149
|
+
WRONG file — the model reads real code on demand instead of anchoring on a guessed excerpt.
|
|
150
|
+
Ranking is STRUCTURAL (personalized PageRank over the def/ref graph, seeded by the lexical
|
|
151
|
+
matches), so a relevant file surfaces even with zero query-word overlap when a matched file
|
|
152
|
+
calls it — the case a purely-lexical ranking truncates on a large repo (see graph_map)."""
|
|
153
|
+
text, seeded = self.graph_map(query, _return_seeds=True)
|
|
154
|
+
if not text or not seeded:
|
|
155
|
+
# Gate on the REAL seed signal (≥1 lexical match in the graph), not the rendered "(matches:"
|
|
156
|
+
# count: a legitimately-seeded query whose matched file has NO def-skeleton (a config/constants
|
|
157
|
+
# file) still expands structurally to related files, and that map must NOT be dropped. With zero
|
|
158
|
+
# seeds we render NOTHING (the "hi" -> map noise case stays fixed).
|
|
159
|
+
return []
|
|
160
|
+
matches = text.count("(matches:")
|
|
161
|
+
return [Snippet(path="(repo map)", text=text, score=float(matches or seeded))]
|
|
162
|
+
|
|
163
|
+
def deps(self, path: str, limit: int = 6) -> list[str]:
|
|
164
|
+
"""Files structurally COUPLED to `path`, from the cached def/ref graph: reverse deps (files
|
|
165
|
+
that reference `path` — its CALLERS, ranked FIRST because they break on a rename/signature
|
|
166
|
+
change), then forward deps (the contracts `path` references). Used to keep an edited file's
|
|
167
|
+
callers + contracts co-resident so a coordinated edit reaches every site that must change in
|
|
168
|
+
lockstep. Returns [] when `path` isn't in the graph, so callers degrade gracefully.
|
|
169
|
+
Query-INDEPENDENT (reuses the cached graph; no per-call ripgrep)."""
|
|
170
|
+
try:
|
|
171
|
+
g = self._graph(400)
|
|
172
|
+
except Exception:
|
|
173
|
+
return []
|
|
174
|
+
edges = g["edges"]
|
|
175
|
+
if path not in g["fileset"]:
|
|
176
|
+
return []
|
|
177
|
+
fwd = edges.get(path, {}) # files `path` references (contracts)
|
|
178
|
+
rev = {f: e[path] for f, e in edges.items() if path in e} # files that reference `path` (callers)
|
|
179
|
+
# CALLER-FIRST: the hazard in a coordinated edit is the REVERSE-dependents (callers/importers
|
|
180
|
+
# that break on a rename/signature change), not the forward contracts the file calls. Rank
|
|
181
|
+
# callers BEFORE contracts so truncation at `limit` drops contracts (re-readable on demand),
|
|
182
|
+
# never the call-sites that must change in lockstep (re-observation-reach >= action-reach).
|
|
183
|
+
ranked = sorted(rev, key=lambda f: -rev[f]) + sorted(fwd, key=lambda f: -fwd[f])
|
|
184
|
+
seen, out = set(), []
|
|
185
|
+
for f in ranked:
|
|
186
|
+
if f != path and f not in seen:
|
|
187
|
+
seen.add(f)
|
|
188
|
+
out.append(f)
|
|
189
|
+
return out[:limit]
|
|
190
|
+
|
|
191
|
+
def def_names(self, path: str) -> set:
|
|
192
|
+
"""The symbol NAMES `path` defines (from the cached graph). Used to detect what an edit REMOVED
|
|
193
|
+
(pre-edit defs minus current defs) so a coordinated change can flag dangling references. Empty
|
|
194
|
+
on a no-graph host."""
|
|
195
|
+
try:
|
|
196
|
+
return set(self._graph(400).get("defs", {}).get(path) or ())
|
|
197
|
+
except Exception:
|
|
198
|
+
return set()
|
|
199
|
+
|
|
200
|
+
def ref_tokens(self, path: str) -> set:
|
|
201
|
+
"""The identifier tokens `path` REFERENCES (from the cached graph). A file whose current tokens
|
|
202
|
+
still contain a name an edit removed/moved is a dangling call-site. Empty on a no-graph host."""
|
|
203
|
+
try:
|
|
204
|
+
return set(self._graph(400).get("tokens", {}).get(path) or ())
|
|
205
|
+
except Exception:
|
|
206
|
+
return set()
|
|
207
|
+
|
|
208
|
+
# --- structural map: rank by personalized PageRank over the def/ref graph ---
|
|
209
|
+
def graph_map(self, query: str, max_files: int = 400, max_shown: int = 20, *, _return_seeds: bool = False):
|
|
210
|
+
"""Repo map ranked by PERSONALIZED PAGERANK over the symbol def/ref graph, seeded on the
|
|
211
|
+
files that match the query lexically. Rank flows along call/import edges, so a relevant file
|
|
212
|
+
surfaces even with ZERO query-word overlap when a matched file references it — exactly the
|
|
213
|
+
neutral-vocabulary target a purely-lexical ranking truncates on a large repo. Degrades to
|
|
214
|
+
lexical order when there is no graph signal. Bounded by BREADTH — the top `max_shown` ranked
|
|
215
|
+
files, each shown COMPLETE — NOT a char cut (a char cut dropped lower-ranked files mid-list,
|
|
216
|
+
the 'where is function X?' miss, and could render a file half-shown; breadth is deterministic).
|
|
217
|
+
|
|
218
|
+
The query-INDEPENDENT graph (defs/edges/skeletons) is cached on this instance and rebuilt
|
|
219
|
+
only when the tree changes (see _graph), so per-turn cost is just lexical search + PageRank,
|
|
220
|
+
not re-reading every file — the cost stays flat across a multi-turn session until an edit."""
|
|
221
|
+
g = self._graph(max_files)
|
|
222
|
+
files = list(g["files"])
|
|
223
|
+
if not files:
|
|
224
|
+
return ("", 0) if _return_seeds else ""
|
|
225
|
+
terms = _terms(query)
|
|
226
|
+
matched: dict[str, set] = {}
|
|
227
|
+
if terms:
|
|
228
|
+
for path, info in self._search(terms).items():
|
|
229
|
+
matched[os.path.relpath(path, self.root)] = info["terms"]
|
|
230
|
+
seeds = {rel: float(len(t)) for rel, t in matched.items() if rel in g["fileset"]}
|
|
231
|
+
n_seeds = len(seeds) # real seed signal returned to retrieve()'s gate (no shared read-back race)
|
|
232
|
+
pr = self._pagerank(files, g["edges"], seeds)
|
|
233
|
+
# rank: structural score, then lexical strength, then path (deterministic ties)
|
|
234
|
+
files.sort(key=lambda rel: (pr.get(rel, 0.0), len(matched.get(rel, ()))), reverse=True)
|
|
235
|
+
blocks: list[str] = []
|
|
236
|
+
for rel in files:
|
|
237
|
+
dlines = g["skeleton"].get(rel)
|
|
238
|
+
if not dlines:
|
|
239
|
+
continue
|
|
240
|
+
hit = matched.get(rel)
|
|
241
|
+
head = rel + (f" (matches: {', '.join(sorted(hit))})" if hit else "")
|
|
242
|
+
blocks.append(head + "\n" + "\n".join(" " + d for d in dlines))
|
|
243
|
+
if len(blocks) >= max_shown: # BREADTH bound: top-N ranked files, each shown COMPLETE
|
|
244
|
+
break
|
|
245
|
+
text = "\n".join(blocks)
|
|
246
|
+
return (text, n_seeds) if _return_seeds else text
|
|
247
|
+
|
|
248
|
+
def _graph(self, max_files: int) -> dict:
|
|
249
|
+
"""Build (or reuse) the query-INDEPENDENT def/ref graph. Cached on this instance and
|
|
250
|
+
invalidated by a fingerprint of the code files (path + mtime + size), so it rebuilds ONLY
|
|
251
|
+
when the tree actually changes (e.g. the agent edits a file) — not every turn. Reads and
|
|
252
|
+
parses each file ONCE per rebuild (defs + skeleton + ref tokens in one pass)."""
|
|
253
|
+
files = self._code_files(max_files)
|
|
254
|
+
sig = self._fingerprint(files)
|
|
255
|
+
c = self._graph_cache
|
|
256
|
+
if c is not None and c["sig"] == sig: # lock-free fast path (reference read is atomic in CPython)
|
|
257
|
+
return c
|
|
258
|
+
with self._graph_lock: # serialize rebuilds so parallel explorers don't double-build / tear a read
|
|
259
|
+
c = self._graph_cache
|
|
260
|
+
if c is not None and c["sig"] == sig:
|
|
261
|
+
return c
|
|
262
|
+
defs: dict[str, set] = {}
|
|
263
|
+
sym2file: dict[str, set] = {}
|
|
264
|
+
skeleton: dict[str, list] = {}
|
|
265
|
+
tokens: dict[str, set] = {}
|
|
266
|
+
for rel in files:
|
|
267
|
+
names, lines, toks = self._scan_file(rel)
|
|
268
|
+
if lines:
|
|
269
|
+
skeleton[rel] = lines
|
|
270
|
+
tokens[rel] = toks
|
|
271
|
+
if names:
|
|
272
|
+
defs[rel] = names
|
|
273
|
+
for n in names:
|
|
274
|
+
sym2file.setdefault(n, set()).add(rel)
|
|
275
|
+
edges = self._edges_from_tokens(files, defs, sym2file, tokens)
|
|
276
|
+
self._graph_builds += 1
|
|
277
|
+
self._graph_cache = {"sig": sig, "files": files, "fileset": set(files),
|
|
278
|
+
"skeleton": skeleton, "edges": edges, "defs": defs, "tokens": tokens}
|
|
279
|
+
return self._graph_cache
|
|
280
|
+
|
|
281
|
+
def _fingerprint(self, files: list[str]) -> tuple:
|
|
282
|
+
"""Cheap staleness key: (rel, mtime_ns, size) per file. Stat-only — no reads — so computing
|
|
283
|
+
it each turn is far cheaper than the rebuild it guards."""
|
|
284
|
+
out = []
|
|
285
|
+
for rel in files:
|
|
286
|
+
try:
|
|
287
|
+
st = os.stat(os.path.join(self.root, rel))
|
|
288
|
+
out.append((rel, st.st_mtime_ns, st.st_size))
|
|
289
|
+
except OSError:
|
|
290
|
+
out.append((rel, 0, 0))
|
|
291
|
+
return tuple(out)
|
|
292
|
+
|
|
293
|
+
def _scan_file(self, rel: str):
|
|
294
|
+
"""One read per file → (def names, skeleton lines, ref tokens). Names use tree-sitter when
|
|
295
|
+
available (precise), else a regex; skeleton lines and tokens are regex (display + refs)."""
|
|
296
|
+
try:
|
|
297
|
+
with open(os.path.join(self.root, rel), "r", encoding="utf-8", errors="replace") as fh:
|
|
298
|
+
src = fh.read() # pin utf-8 (like every other read): a non-utf-8 locale would mis-decode the def/ref graph
|
|
299
|
+
except OSError:
|
|
300
|
+
return set(), [], set()
|
|
301
|
+
ts = _ts_def_names(os.path.join(self.root, rel), src)
|
|
302
|
+
if ts is not None:
|
|
303
|
+
names = {n for n in ts if len(n) >= 4}
|
|
304
|
+
else:
|
|
305
|
+
names = {m.group(1) for m in _NAME_RE.finditer(src) if len(m.group(1)) >= 4}
|
|
306
|
+
lines = [ln.strip()[:120] for ln in src.splitlines() if _DEF_RE.match(ln)][:12]
|
|
307
|
+
tokens = set(_TOKEN.findall(src))
|
|
308
|
+
return names, lines, tokens
|
|
309
|
+
|
|
310
|
+
@staticmethod
|
|
311
|
+
def _edges_from_tokens(files: list[str], defs: dict, sym2file: dict, tokens: dict) -> dict:
|
|
312
|
+
"""Directed edges file → file it references, from the cached ref tokens. A references B if A
|
|
313
|
+
mentions a symbol DEFINED in B; symbols defined in many files are skipped (noisy names)."""
|
|
314
|
+
usable = {s: fs for s, fs in sym2file.items() if len(fs) <= 4}
|
|
315
|
+
edges: dict[str, dict] = {}
|
|
316
|
+
for rel in files:
|
|
317
|
+
own = defs.get(rel, set())
|
|
318
|
+
out: dict[str, int] = {}
|
|
319
|
+
for t in tokens.get(rel, ()):
|
|
320
|
+
if t in own:
|
|
321
|
+
continue
|
|
322
|
+
for tgt in usable.get(t, ()):
|
|
323
|
+
if tgt != rel:
|
|
324
|
+
out[tgt] = out.get(tgt, 0) + 1
|
|
325
|
+
if out:
|
|
326
|
+
edges[rel] = out
|
|
327
|
+
return edges
|
|
328
|
+
|
|
329
|
+
@staticmethod
|
|
330
|
+
def _pagerank(nodes: list[str], edges: dict, seeds: dict, d: float = 0.85,
|
|
331
|
+
iters: int = 40) -> dict:
|
|
332
|
+
"""Personalized PageRank. Personalization mass sits on the seed files (the lexical matches);
|
|
333
|
+
with no seeds it's uniform (→ plain centrality). Dangling nodes redistribute to the seeds."""
|
|
334
|
+
n = len(nodes)
|
|
335
|
+
if n == 0:
|
|
336
|
+
return {}
|
|
337
|
+
total = sum(seeds.values())
|
|
338
|
+
p = ({x: seeds.get(x, 0.0) / total for x in nodes} if total > 0
|
|
339
|
+
else {x: 1.0 / n for x in nodes})
|
|
340
|
+
r = dict(p)
|
|
341
|
+
outsum = {u: sum(w.values()) for u, w in edges.items()}
|
|
342
|
+
nodeset = set(nodes)
|
|
343
|
+
for _ in range(iters):
|
|
344
|
+
nr = {x: (1 - d) * p[x] for x in nodes}
|
|
345
|
+
dangling = 0.0
|
|
346
|
+
for u in nodes:
|
|
347
|
+
ru = r[u]
|
|
348
|
+
s = outsum.get(u, 0)
|
|
349
|
+
if s > 0:
|
|
350
|
+
for v, w in edges[u].items():
|
|
351
|
+
if v in nodeset:
|
|
352
|
+
nr[v] += d * ru * (w / s)
|
|
353
|
+
else:
|
|
354
|
+
dangling += d * ru
|
|
355
|
+
if dangling:
|
|
356
|
+
for x in nodes:
|
|
357
|
+
nr[x] += dangling * p[x]
|
|
358
|
+
r = nr
|
|
359
|
+
return r
|
|
360
|
+
|
|
361
|
+
# --- internals ----------------------------------------------------------
|
|
362
|
+
def _search(self, terms: list[str]) -> dict[str, dict]:
|
|
363
|
+
"""One ripgrep pass over all terms; group matches by file."""
|
|
364
|
+
cmd = [self.rg, "--json", "-i", "--max-filesize", self.max_filesize,
|
|
365
|
+
"--max-columns", "400"]
|
|
366
|
+
for t in terms:
|
|
367
|
+
cmd += ["-e", t]
|
|
368
|
+
cmd.append(self.root)
|
|
369
|
+
try:
|
|
370
|
+
proc = subprocess.run(cmd, capture_output=True, text=True,
|
|
371
|
+
timeout=self.timeout)
|
|
372
|
+
except (OSError, subprocess.SubprocessError):
|
|
373
|
+
return {}
|
|
374
|
+
files: dict[str, dict] = {}
|
|
375
|
+
for raw in proc.stdout.splitlines():
|
|
376
|
+
if not raw or '"type":"match"' not in raw:
|
|
377
|
+
continue
|
|
378
|
+
try:
|
|
379
|
+
obj = json.loads(raw)
|
|
380
|
+
except ValueError:
|
|
381
|
+
continue
|
|
382
|
+
d = obj.get("data", {})
|
|
383
|
+
path = (d.get("path") or {}).get("text")
|
|
384
|
+
ln = d.get("line_number")
|
|
385
|
+
if not path or ln is None:
|
|
386
|
+
continue
|
|
387
|
+
f = files.setdefault(path, {"terms": set(), "lines": [], "count": 0})
|
|
388
|
+
f["count"] += 1
|
|
389
|
+
if len(f["lines"]) < 60:
|
|
390
|
+
f["lines"].append(ln)
|
|
391
|
+
for sm in d.get("submatches", []):
|
|
392
|
+
mt = ((sm.get("match") or {}).get("text") or "").lower()
|
|
393
|
+
if mt:
|
|
394
|
+
f["terms"].add(mt)
|
|
395
|
+
return files
|
|
396
|
+
|
|
397
|
+
def _code_files(self, max_files: int) -> list[str]:
|
|
398
|
+
try:
|
|
399
|
+
proc = subprocess.run([self.rg, "--files", self.root],
|
|
400
|
+
capture_output=True, text=True, timeout=self.timeout)
|
|
401
|
+
except (OSError, subprocess.SubprocessError):
|
|
402
|
+
return []
|
|
403
|
+
rels: list[str] = []
|
|
404
|
+
for p in proc.stdout.splitlines():
|
|
405
|
+
if os.path.splitext(p)[1] in _CODE_EXT:
|
|
406
|
+
rels.append(os.path.relpath(p, self.root))
|
|
407
|
+
rels.sort()
|
|
408
|
+
return rels[:max_files]
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def make_code_index(root: str = ".", *, prefer_ripgrep: bool = True):
|
|
412
|
+
"""Factory mirroring make_memory(): a real CodeIndex if ripgrep is on PATH,
|
|
413
|
+
else NullRetriever so the loop runs unchanged."""
|
|
414
|
+
if prefer_ripgrep and shutil.which("rg"):
|
|
415
|
+
return RipgrepCodeIndex(root=root)
|
|
416
|
+
from .retriever import NullRetriever
|
|
417
|
+
return NullRetriever()
|
sliceagent/config.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
"""Config — layered settings from sliceagent.toml (Step ③.2).
|
|
2
|
+
|
|
3
|
+
A layered config file (user then project, project overriding)
|
|
4
|
+
that declares persistent settings AND extension surfaces (skills dirs, MCP servers,
|
|
5
|
+
plugin dirs). Precedence is ENV > project file > user file > default, so a quick
|
|
6
|
+
`AGENT_POLICY=allow sliceagent ...` still overrides the file and ALL prior env-driven
|
|
7
|
+
behavior is preserved (the file just makes settings persistent).
|
|
8
|
+
|
|
9
|
+
Read-only TOML via stdlib tomllib (Python 3.11+ — no new dependency).
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
import tomllib
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _read_toml(path: str) -> dict:
|
|
18
|
+
try:
|
|
19
|
+
with open(path, "rb") as f:
|
|
20
|
+
return tomllib.load(f)
|
|
21
|
+
except (OSError, tomllib.TOMLDecodeError, UnicodeDecodeError, ValueError):
|
|
22
|
+
return {} # a corrupt / non-UTF-8 config must degrade to defaults, not crash startup
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _config_files() -> list[str]:
|
|
26
|
+
# user first, then project (project overrides user)
|
|
27
|
+
home = os.path.expanduser("~")
|
|
28
|
+
cwd = os.getcwd()
|
|
29
|
+
return [
|
|
30
|
+
os.path.join(home, ".sliceagent", "config.toml"),
|
|
31
|
+
os.path.join(cwd, "sliceagent.toml"),
|
|
32
|
+
os.path.join(cwd, ".sliceagent", "config.toml"),
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# ── runtime preferences (the /model switch persists here) ───────────────────────────────────────
|
|
37
|
+
# A tiny JSON sidecar, NOT config.toml: stdlib has no TOML WRITER (tomllib is read-only), so writing
|
|
38
|
+
# back to config.toml would need a new dep or a fragile hand-rolled serializer. JSON is safe + atomic.
|
|
39
|
+
# Precedence (resolved in cli): explicit env (AGENT_MODEL/AGENT_REASONING) > prefs > config.toml > default.
|
|
40
|
+
def _prefs_path() -> str:
|
|
41
|
+
return os.path.join(os.path.expanduser("~"), ".sliceagent", "prefs.json")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def load_prefs() -> dict:
|
|
45
|
+
"""The user's last /model + /reasoning choice (or {} if none/unreadable)."""
|
|
46
|
+
try:
|
|
47
|
+
import json
|
|
48
|
+
with open(_prefs_path(), encoding="utf-8") as f:
|
|
49
|
+
return json.load(f) or {}
|
|
50
|
+
except Exception: # noqa: BLE001 — missing/corrupt prefs must never break startup
|
|
51
|
+
return {}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def save_prefs(updates: dict) -> None:
|
|
55
|
+
"""Merge non-empty `updates` into the prefs sidecar (atomic write). Best-effort; never raises."""
|
|
56
|
+
try:
|
|
57
|
+
import json
|
|
58
|
+
path = _prefs_path()
|
|
59
|
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
60
|
+
cur = load_prefs()
|
|
61
|
+
cur.update({k: v for k, v in updates.items() if v})
|
|
62
|
+
tmp = path + ".tmp"
|
|
63
|
+
with open(tmp, "w", encoding="utf-8") as f:
|
|
64
|
+
json.dump(cur, f, indent=2)
|
|
65
|
+
os.replace(tmp, path)
|
|
66
|
+
except Exception: # noqa: BLE001 — persistence is a nicety, not a hard requirement
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _deep_merge(a: dict, b: dict) -> dict:
|
|
71
|
+
out = dict(a)
|
|
72
|
+
for k, v in b.items():
|
|
73
|
+
out[k] = _deep_merge(out[k], v) if isinstance(v, dict) and isinstance(out.get(k), dict) else v
|
|
74
|
+
return out
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _truthy(v) -> bool:
|
|
78
|
+
if isinstance(v, bool):
|
|
79
|
+
return v
|
|
80
|
+
return str(v).strip().lower() in ("1", "true", "yes", "on")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class Config:
|
|
84
|
+
"""Resolved settings. Each accessor checks ENV first, then the merged TOML, then a default."""
|
|
85
|
+
|
|
86
|
+
def __init__(self, data: dict | None = None):
|
|
87
|
+
self.data = data or {}
|
|
88
|
+
|
|
89
|
+
@classmethod
|
|
90
|
+
def load(cls) -> "Config":
|
|
91
|
+
merged: dict = {}
|
|
92
|
+
for f in _config_files():
|
|
93
|
+
if os.path.isfile(f):
|
|
94
|
+
merged = _deep_merge(merged, _read_toml(f))
|
|
95
|
+
return cls(merged)
|
|
96
|
+
|
|
97
|
+
def _get(self, section: str, key: str, env: str | None, default):
|
|
98
|
+
if env and os.environ.get(env) is not None:
|
|
99
|
+
return os.environ[env]
|
|
100
|
+
sec = self.data.get(section, {})
|
|
101
|
+
if isinstance(sec, dict) and key in sec:
|
|
102
|
+
return sec[key]
|
|
103
|
+
return default
|
|
104
|
+
|
|
105
|
+
# --- provider (multi-provider; written by `sliceagent init`; ENV always wins) ---
|
|
106
|
+
# Resolution order for api_key/base_url/model: ENV → the DEFAULT provider's [providers.<id>] table →
|
|
107
|
+
# the legacy flat [provider]/[agent].model → default. So multiple named providers can coexist and
|
|
108
|
+
# `sliceagent config --use <id>` switches between them, while old flat configs + env keep working.
|
|
109
|
+
@property
|
|
110
|
+
def default_provider(self) -> str:
|
|
111
|
+
return self._get("agent", "default_provider", "AGENT_PROVIDER", "")
|
|
112
|
+
|
|
113
|
+
def providers(self) -> dict:
|
|
114
|
+
"""All declared providers: {id: {api_key, base_url, model}}."""
|
|
115
|
+
v = self.data.get("providers", {})
|
|
116
|
+
return {k: val for k, val in v.items() if isinstance(val, dict)} if isinstance(v, dict) else {}
|
|
117
|
+
|
|
118
|
+
def _provider_table(self) -> dict:
|
|
119
|
+
"""The active provider's table: the configured default, or the sole provider if exactly one exists."""
|
|
120
|
+
provs = self.providers()
|
|
121
|
+
pid = self.default_provider
|
|
122
|
+
if pid and pid in provs:
|
|
123
|
+
return provs[pid]
|
|
124
|
+
if len(provs) == 1:
|
|
125
|
+
return next(iter(provs.values()))
|
|
126
|
+
return {}
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def api_key(self) -> str:
|
|
130
|
+
env = os.environ.get("LLM_API_KEY")
|
|
131
|
+
if env: # empty string ("" exported) means UNSET → fall through to config, don't return ""
|
|
132
|
+
return env
|
|
133
|
+
return self._provider_table().get("api_key") or self._get("provider", "api_key", None, "")
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def base_url(self) -> str:
|
|
137
|
+
env = os.environ.get("LLM_BASE_URL")
|
|
138
|
+
if env: # empty string → unset (use provider default), not a literal empty base_url
|
|
139
|
+
return env
|
|
140
|
+
return self._provider_table().get("base_url") or self._get("provider", "base_url", None, "")
|
|
141
|
+
|
|
142
|
+
# --- agent ---
|
|
143
|
+
@property
|
|
144
|
+
def model(self) -> str:
|
|
145
|
+
env = os.environ.get("AGENT_MODEL")
|
|
146
|
+
if env: # empty string → unset → fall through to config/default model, not ""
|
|
147
|
+
return env
|
|
148
|
+
# No built-in default model — the user chooses one (sliceagent init / AGENT_MODEL / config.toml).
|
|
149
|
+
return self._provider_table().get("model") or self._get("agent", "model", None, "")
|
|
150
|
+
|
|
151
|
+
@property
|
|
152
|
+
def policy(self) -> str:
|
|
153
|
+
return self._get("agent", "policy", "AGENT_POLICY", "teenager")
|
|
154
|
+
|
|
155
|
+
@property
|
|
156
|
+
def mine(self) -> str:
|
|
157
|
+
return self._get("agent", "mine", "AGENT_MINE", "deterministic")
|
|
158
|
+
|
|
159
|
+
@property
|
|
160
|
+
def subagent_depth(self) -> int:
|
|
161
|
+
v = self._get("agent", "subagent_depth", "AGENT_SUBAGENT_DEPTH", 1)
|
|
162
|
+
try:
|
|
163
|
+
return max(0, int(v)) # 0 = off; a malformed value falls back to the default
|
|
164
|
+
except (TypeError, ValueError):
|
|
165
|
+
return 1
|
|
166
|
+
|
|
167
|
+
@property
|
|
168
|
+
def show_slice(self) -> bool:
|
|
169
|
+
return _truthy(self._get("agent", "show_slice", "SHOW_SLICE", False))
|
|
170
|
+
|
|
171
|
+
# --- sandbox ---
|
|
172
|
+
@property
|
|
173
|
+
def sandbox_backend(self) -> str:
|
|
174
|
+
return self._get("sandbox", "backend", "AGENT_SANDBOX", "local") # local | docker
|
|
175
|
+
|
|
176
|
+
@property
|
|
177
|
+
def sandbox_image(self) -> str:
|
|
178
|
+
return self._get("sandbox", "image", None, "python:3.12-slim")
|
|
179
|
+
|
|
180
|
+
@property
|
|
181
|
+
def sandbox_network(self) -> str:
|
|
182
|
+
return self._get("sandbox", "network", None, "none")
|
|
183
|
+
|
|
184
|
+
# --- oracle / budget ---
|
|
185
|
+
@property
|
|
186
|
+
def verify_cmd(self) -> str | None:
|
|
187
|
+
return self._get("oracle", "verify_cmd", "AGENT_VERIFY_CMD", None)
|
|
188
|
+
|
|
189
|
+
@property
|
|
190
|
+
def max_tokens(self) -> int | None:
|
|
191
|
+
v = self._get("budget", "max_tokens", "AGENT_MAX_TOKENS", None)
|
|
192
|
+
try:
|
|
193
|
+
n = int(v) if v is not None else None
|
|
194
|
+
except (TypeError, ValueError):
|
|
195
|
+
return None # garbage budget → no budget (don't crash startup)
|
|
196
|
+
return n if (n is not None and n > 0) else None # discard a nonsensical <=0 budget
|
|
197
|
+
|
|
198
|
+
@property
|
|
199
|
+
def max_steps(self) -> int:
|
|
200
|
+
# Per-turn step ceiling (runaway backstop). Default raised above the old hard 40 so deep
|
|
201
|
+
# analysis/review turns aren't guillotined; overridable for heavier work.
|
|
202
|
+
v = self._get("budget", "max_steps", "AGENT_MAX_STEPS", None)
|
|
203
|
+
try:
|
|
204
|
+
n = int(v) if v not in (None, "") else None
|
|
205
|
+
except (TypeError, ValueError):
|
|
206
|
+
return 60
|
|
207
|
+
return n if (n is not None and n >= 1) else 60 # <=0 (incl. the env STRING "0") → default, consistent across env/TOML
|
|
208
|
+
|
|
209
|
+
# --- extension surfaces ---
|
|
210
|
+
@property
|
|
211
|
+
def skills_roots(self) -> list[str] | None:
|
|
212
|
+
sec = self.data.get("skills", {})
|
|
213
|
+
dirs = sec.get("dirs") if isinstance(sec, dict) else None
|
|
214
|
+
if isinstance(dirs, str): # a scalar `dirs = "..."` must not iterate char-by-char
|
|
215
|
+
dirs = [dirs]
|
|
216
|
+
if not isinstance(dirs, list):
|
|
217
|
+
return None
|
|
218
|
+
roots = [os.path.expanduser(d) for d in dirs if isinstance(d, str)] # skip non-str entries (don't crash startup)
|
|
219
|
+
return roots or None
|
|
220
|
+
|
|
221
|
+
@property
|
|
222
|
+
def mcp_servers(self) -> dict:
|
|
223
|
+
"""Declared MCP servers (consumed in ③.3). e.g. [mcp_servers.github] ..."""
|
|
224
|
+
v = self.data.get("mcp_servers", {})
|
|
225
|
+
return v if isinstance(v, dict) else {}
|
|
226
|
+
|
|
227
|
+
@property
|
|
228
|
+
def plugin_dirs(self) -> list[str]:
|
|
229
|
+
"""Extra plugin directories (consumed in ③.4)."""
|
|
230
|
+
sec = self.data.get("plugins", {})
|
|
231
|
+
dirs = sec.get("dirs", []) if isinstance(sec, dict) else []
|
|
232
|
+
if isinstance(dirs, str): # scalar `dirs = "..."` → single entry, not char iteration
|
|
233
|
+
dirs = [dirs]
|
|
234
|
+
if not isinstance(dirs, list):
|
|
235
|
+
return []
|
|
236
|
+
return [os.path.expanduser(d) for d in dirs if isinstance(d, str)] # skip non-str entries (don't crash startup)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def load_config() -> Config:
|
|
240
|
+
return Config.load()
|