sliceagent 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sliceagent/__init__.py +3 -0
- sliceagent/__main__.py +6 -0
- sliceagent/access.py +93 -0
- sliceagent/agents.py +173 -0
- sliceagent/background_review.py +146 -0
- sliceagent/binsniff.py +89 -0
- sliceagent/cli.py +890 -0
- sliceagent/clock.py +32 -0
- sliceagent/code_grep.py +329 -0
- sliceagent/code_index.py +417 -0
- sliceagent/config.py +240 -0
- sliceagent/context_overflow.py +227 -0
- sliceagent/envspec.py +129 -0
- sliceagent/errors.py +167 -0
- sliceagent/events.py +96 -0
- sliceagent/finding_types.py +70 -0
- sliceagent/flags.py +63 -0
- sliceagent/fuzzy.py +135 -0
- sliceagent/guardrails.py +438 -0
- sliceagent/guidance.py +69 -0
- sliceagent/hippocampus.py +581 -0
- sliceagent/hooks.py +334 -0
- sliceagent/interfaces.py +144 -0
- sliceagent/llm.py +695 -0
- sliceagent/loop.py +548 -0
- sliceagent/mcp_client.py +255 -0
- sliceagent/mcp_security.py +77 -0
- sliceagent/memory.py +428 -0
- sliceagent/metrics.py +103 -0
- sliceagent/model_catalog.py +124 -0
- sliceagent/monitor.py +615 -0
- sliceagent/neocortex.py +436 -0
- sliceagent/onboarding.py +323 -0
- sliceagent/oracle.py +36 -0
- sliceagent/pagetable.py +255 -0
- sliceagent/pfc.py +449 -0
- sliceagent/plugins.py +127 -0
- sliceagent/policy.py +234 -0
- sliceagent/procman.py +187 -0
- sliceagent/prompt.py +239 -0
- sliceagent/records.py +108 -0
- sliceagent/recovery.py +119 -0
- sliceagent/regions.py +678 -0
- sliceagent/registry.py +128 -0
- sliceagent/retriever.py +19 -0
- sliceagent/safety.py +332 -0
- sliceagent/sandbox.py +143 -0
- sliceagent/scheduler.py +92 -0
- sliceagent/search_index.py +289 -0
- sliceagent/seed.py +465 -0
- sliceagent/sensory_cortex.py +500 -0
- sliceagent/session.py +222 -0
- sliceagent/skill_provenance.py +71 -0
- sliceagent/skill_usage.py +123 -0
- sliceagent/skills.py +209 -0
- sliceagent/subagent.py +332 -0
- sliceagent/subdir_hints.py +222 -0
- sliceagent/swap.py +182 -0
- sliceagent/taskstate.py +57 -0
- sliceagent/telemetry.py +59 -0
- sliceagent/terminal.py +240 -0
- sliceagent/text_utils.py +56 -0
- sliceagent/tool_summary.py +93 -0
- sliceagent/tools.py +1194 -0
- sliceagent/tui.py +1377 -0
- sliceagent/web.py +354 -0
- sliceagent-0.1.0.dist-info/METADATA +262 -0
- sliceagent-0.1.0.dist-info/RECORD +71 -0
- sliceagent-0.1.0.dist-info/WHEEL +4 -0
- sliceagent-0.1.0.dist-info/entry_points.txt +2 -0
- sliceagent-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,500 @@
|
|
|
1
|
+
"""SENSORY CORTEX — perception of the live world, not memory of the past. Everything in this
|
|
2
|
+
module is a DERIVED VIEW: recomputed live from the filesystem/git each time it's called, never
|
|
3
|
+
persisted between turns, nothing here is ever "recalled" — only re-observed. Holds the workspace
|
|
4
|
+
snapshot (git branch/status, project facts, conventions) AND the repo map / directory-walk
|
|
5
|
+
primitives (``repo_map``, ``_is_ignored``) — both are the same kind of thing: a fresh read of
|
|
6
|
+
the world, memoized at most for the session, never a durable store.
|
|
7
|
+
|
|
8
|
+
Workspace snapshot for the system prompt — a one-shot, cache-stable probe.
|
|
9
|
+
|
|
10
|
+
The point is to hand the model its *verify loop* and current git posture up
|
|
11
|
+
front — which branch, how dirty the tree is, the exact test/lint/build commands —
|
|
12
|
+
instead of making it rediscover them every session.
|
|
13
|
+
|
|
14
|
+
MOAT / cache safety
|
|
15
|
+
-------------------
|
|
16
|
+
``build_workspace_snapshot`` is called **once per session** and its output is baked
|
|
17
|
+
into the *stable* (cacheable) system-prompt tier — never re-probed per turn (that
|
|
18
|
+
would shatter the prompt cache). Branch and dirty state drift mid-session, so the
|
|
19
|
+
caller's brief tells the model to re-check with ``git`` before acting on it. The
|
|
20
|
+
function is therefore deterministic per ``cwd`` within a session, never raises, and
|
|
21
|
+
returns ``""`` outside a workspace (no repo / no marker / git missing / empty cwd).
|
|
22
|
+
|
|
23
|
+
SCOPE: git **branch** + short **status counts**
|
|
24
|
+
(staged/modified/untracked) + detected **verify command(s)**. Deliberately dropped:
|
|
25
|
+
ahead/behind tracking, worktree detection, and the recent-commit log.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import json
|
|
31
|
+
import os
|
|
32
|
+
import re
|
|
33
|
+
import subprocess
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
from typing import Optional
|
|
36
|
+
|
|
37
|
+
# Project-root signals that mark a directory as a code workspace even when it
|
|
38
|
+
# isn't (yet) a git repo. Cheap filename checks — no parsing.
|
|
39
|
+
_PROJECT_MARKERS = (
|
|
40
|
+
"pyproject.toml", "setup.py", "setup.cfg", "requirements.txt",
|
|
41
|
+
"package.json", "tsconfig.json", "deno.json",
|
|
42
|
+
"Cargo.toml", "go.mod", "pom.xml", "build.gradle", "build.gradle.kts",
|
|
43
|
+
"Gemfile", "composer.json", "mix.exs", "pubspec.yaml",
|
|
44
|
+
"CMakeLists.txt", "Makefile", "Dockerfile",
|
|
45
|
+
"AGENTS.md", "CLAUDE.md", ".cursorrules",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Agent-instruction files surfaced separately from manifests in the snapshot.
|
|
49
|
+
_CONTEXT_FILES = ("AGENTS.md", "CLAUDE.md", ".cursorrules")
|
|
50
|
+
|
|
51
|
+
# Lockfile → package manager, checked in priority order.
|
|
52
|
+
_PY_LOCKFILES = (("uv.lock", "uv"), ("poetry.lock", "poetry"), ("Pipfile.lock", "pipenv"))
|
|
53
|
+
_JS_LOCKFILES = (
|
|
54
|
+
("pnpm-lock.yaml", "pnpm"), ("bun.lockb", "bun"), ("bun.lock", "bun"),
|
|
55
|
+
("yarn.lock", "yarn"), ("package-lock.json", "npm"),
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# package.json scripts / Makefile targets worth surfacing as verify commands.
|
|
59
|
+
_VERIFY_TARGETS = ("test", "tests", "lint", "typecheck", "check", "build", "fmt", "format")
|
|
60
|
+
_MAX_VERIFY_COMMANDS = 8
|
|
61
|
+
_MAX_FACT_FILE_BYTES = 256 * 1024
|
|
62
|
+
|
|
63
|
+
_GIT_TIMEOUT = 2.5
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# ── cwd / root resolution ────────────────────────────────────────────────────
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _resolve_cwd(cwd: Optional[str]) -> Optional[Path]:
|
|
70
|
+
"""Resolve ``cwd`` to a Path, or ``None`` if it cannot be used. Never raises."""
|
|
71
|
+
try:
|
|
72
|
+
if cwd:
|
|
73
|
+
return Path(cwd).expanduser()
|
|
74
|
+
return Path(os.getcwd())
|
|
75
|
+
except (OSError, RuntimeError, ValueError):
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _git_root(cwd: Path) -> Optional[Path]:
|
|
80
|
+
try:
|
|
81
|
+
current = cwd.resolve()
|
|
82
|
+
for parent in [current, *current.parents]:
|
|
83
|
+
if (parent / ".git").exists():
|
|
84
|
+
return parent
|
|
85
|
+
except (OSError, RuntimeError, ValueError):
|
|
86
|
+
return None
|
|
87
|
+
return None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _home() -> Optional[Path]:
|
|
91
|
+
try:
|
|
92
|
+
return Path.home().resolve()
|
|
93
|
+
except (OSError, RuntimeError):
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _marker_root(cwd: Path) -> Optional[Path]:
|
|
98
|
+
"""Nearest ancestor (≤6 levels) that looks like a project root, or ``None``.
|
|
99
|
+
|
|
100
|
+
``$HOME`` itself is skipped — a Makefile or AGENTS.md sitting in the home
|
|
101
|
+
directory is global user config, not a project-root signal. Never raises.
|
|
102
|
+
"""
|
|
103
|
+
try:
|
|
104
|
+
current = cwd.resolve()
|
|
105
|
+
except (OSError, RuntimeError, ValueError):
|
|
106
|
+
return None
|
|
107
|
+
home = _home()
|
|
108
|
+
for depth, parent in enumerate([current, *current.parents]):
|
|
109
|
+
if depth > 6:
|
|
110
|
+
break
|
|
111
|
+
if parent == home:
|
|
112
|
+
continue
|
|
113
|
+
try:
|
|
114
|
+
for marker in _PROJECT_MARKERS:
|
|
115
|
+
if (parent / marker).exists():
|
|
116
|
+
return parent
|
|
117
|
+
except (OSError, ValueError):
|
|
118
|
+
continue
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# ── git/workspace probe ──────────────────────────────────────────────────────
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _git(cwd: Path, *args: str) -> str:
|
|
126
|
+
"""Run ``git -C cwd <args>`` and return stripped stdout, or ``""``. Never raises."""
|
|
127
|
+
try:
|
|
128
|
+
out = subprocess.run(
|
|
129
|
+
["git", "-C", str(cwd), *args],
|
|
130
|
+
capture_output=True,
|
|
131
|
+
text=True,
|
|
132
|
+
errors="replace", # a non-UTF-8 commit subject (%s) must not raise UnicodeDecodeError out of "never raises"
|
|
133
|
+
timeout=_GIT_TIMEOUT,
|
|
134
|
+
)
|
|
135
|
+
except (OSError, subprocess.SubprocessError):
|
|
136
|
+
return ""
|
|
137
|
+
return out.stdout.strip() if out.returncode == 0 else ""
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _parse_status(porcelain: str) -> tuple[str, dict[str, int]]:
|
|
141
|
+
"""Parse ``git status --porcelain=2 --branch`` into (branch_head, counts).
|
|
142
|
+
|
|
143
|
+
SCOPE: no upstream / ahead-behind tracking. Returns the branch
|
|
144
|
+
head name (``""`` if absent) and counts of staged/modified/untracked/conflicts.
|
|
145
|
+
"""
|
|
146
|
+
head = ""
|
|
147
|
+
counts = {"staged": 0, "modified": 0, "untracked": 0, "conflicts": 0}
|
|
148
|
+
for line in porcelain.splitlines():
|
|
149
|
+
if line.startswith("# branch.head"):
|
|
150
|
+
head = line.split(maxsplit=2)[-1]
|
|
151
|
+
elif line.startswith(("1 ", "2 ")):
|
|
152
|
+
parts = line.split(maxsplit=2)
|
|
153
|
+
if len(parts) < 2:
|
|
154
|
+
continue
|
|
155
|
+
xy = parts[1]
|
|
156
|
+
if len(xy) >= 2:
|
|
157
|
+
if xy[0] != ".":
|
|
158
|
+
counts["staged"] += 1
|
|
159
|
+
if xy[1] != ".":
|
|
160
|
+
counts["modified"] += 1
|
|
161
|
+
elif line.startswith("u "):
|
|
162
|
+
counts["conflicts"] += 1
|
|
163
|
+
elif line.startswith("? "):
|
|
164
|
+
counts["untracked"] += 1
|
|
165
|
+
return head, counts
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _dirty_phrases(counts: dict[str, int]) -> list[str]:
|
|
169
|
+
"""The non-zero ``"<n> <label>"`` phrases of a parsed git status, in display order.
|
|
170
|
+
|
|
171
|
+
Empty list == clean tree; callers join with ", " and fall back to "clean". Single source for
|
|
172
|
+
both the one-line branch summary and the multi-line snapshot so their wording can't drift.
|
|
173
|
+
"""
|
|
174
|
+
return [
|
|
175
|
+
f"{n} {label}" for label, n in (
|
|
176
|
+
("staged", counts["staged"]),
|
|
177
|
+
("modified", counts["modified"]),
|
|
178
|
+
("untracked", counts["untracked"]),
|
|
179
|
+
("conflicts", counts["conflicts"]),
|
|
180
|
+
) if n
|
|
181
|
+
]
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _read_small(path: Path) -> str:
|
|
185
|
+
"""Read a small text file, or ``""`` — never raises, never reads huge files."""
|
|
186
|
+
try:
|
|
187
|
+
if not path.is_file() or path.stat().st_size > _MAX_FACT_FILE_BYTES:
|
|
188
|
+
return ""
|
|
189
|
+
return path.read_text(encoding="utf-8", errors="replace")
|
|
190
|
+
except OSError:
|
|
191
|
+
return ""
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _project_facts(root: Path) -> list[str]:
|
|
195
|
+
"""Detected project facts: manifest(s) + package manager, verify commands,
|
|
196
|
+
agent-instruction context files. Cheap stat calls + a couple of small reads.
|
|
197
|
+
Deterministic for a given tree; never raises.
|
|
198
|
+
"""
|
|
199
|
+
facts: list[str] = []
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
manifests = [
|
|
203
|
+
m for m in _PROJECT_MARKERS
|
|
204
|
+
if m not in _CONTEXT_FILES and (root / m).is_file()
|
|
205
|
+
]
|
|
206
|
+
package_managers = [
|
|
207
|
+
pm for lock, pm in (*_PY_LOCKFILES, *_JS_LOCKFILES) if (root / lock).is_file()
|
|
208
|
+
]
|
|
209
|
+
except OSError:
|
|
210
|
+
manifests, package_managers = [], []
|
|
211
|
+
if manifests:
|
|
212
|
+
line = f"- Project: {', '.join(manifests[:6])}"
|
|
213
|
+
if package_managers:
|
|
214
|
+
line += f" ({'/'.join(dict.fromkeys(package_managers))})"
|
|
215
|
+
facts.append(line)
|
|
216
|
+
|
|
217
|
+
verify: list[str] = []
|
|
218
|
+
try:
|
|
219
|
+
if (root / "scripts" / "run_tests.sh").is_file():
|
|
220
|
+
verify.append("scripts/run_tests.sh")
|
|
221
|
+
if (root / "package.json").is_file():
|
|
222
|
+
try:
|
|
223
|
+
scripts = json.loads(_read_small(root / "package.json") or "{}").get("scripts") or {}
|
|
224
|
+
except (json.JSONDecodeError, AttributeError):
|
|
225
|
+
scripts = {}
|
|
226
|
+
js_pm = next((pm for lock, pm in _JS_LOCKFILES if (root / lock).is_file()), "npm")
|
|
227
|
+
verify.extend(f"{js_pm} run {name}" for name in _VERIFY_TARGETS if name in scripts)
|
|
228
|
+
if (root / "pytest.ini").is_file() or "[tool.pytest" in _read_small(root / "pyproject.toml"):
|
|
229
|
+
verify.append("pytest")
|
|
230
|
+
makefile = _read_small(root / "Makefile")
|
|
231
|
+
if makefile:
|
|
232
|
+
verify.extend(
|
|
233
|
+
f"make {name}" for name in _VERIFY_TARGETS
|
|
234
|
+
if re.search(rf"^{re.escape(name)}\s*:", makefile, re.MULTILINE)
|
|
235
|
+
)
|
|
236
|
+
except OSError:
|
|
237
|
+
pass
|
|
238
|
+
if verify:
|
|
239
|
+
deduped = list(dict.fromkeys(verify))[:_MAX_VERIFY_COMMANDS]
|
|
240
|
+
facts.append(f"- Verify: {'; '.join(deduped)}")
|
|
241
|
+
|
|
242
|
+
try:
|
|
243
|
+
context_files = [c for c in _CONTEXT_FILES if (root / c).is_file()]
|
|
244
|
+
except OSError:
|
|
245
|
+
context_files = []
|
|
246
|
+
if context_files:
|
|
247
|
+
facts.append(f"- Context files: {', '.join(context_files)}")
|
|
248
|
+
|
|
249
|
+
return facts
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def git_branch_status(cwd: str) -> str:
|
|
253
|
+
"""A compact one-line 'branch (status)' summary for the RE-OBSERVED ENVIRONMENT tier (I2).
|
|
254
|
+
|
|
255
|
+
Reuses the same git probe as the snapshot, but collapsed to ONE line: e.g.
|
|
256
|
+
"main (3 modified, 1 untracked)" or "main (clean)". Returns "" outside a repo / on any
|
|
257
|
+
error. Deterministic per cwd within a session (intended to be computed ONCE per session and
|
|
258
|
+
baked into the cache-stable system tier), never raises.
|
|
259
|
+
"""
|
|
260
|
+
resolved = _resolve_cwd(cwd)
|
|
261
|
+
if resolved is None:
|
|
262
|
+
return ""
|
|
263
|
+
git_root = _git_root(resolved)
|
|
264
|
+
if git_root is None:
|
|
265
|
+
return ""
|
|
266
|
+
head, counts = _parse_status(_git(git_root, "status", "--porcelain=2", "--branch"))
|
|
267
|
+
if not head:
|
|
268
|
+
return ""
|
|
269
|
+
branch = "(detached HEAD)" if head == "(detached)" else head
|
|
270
|
+
dirty = _dirty_phrases(counts)
|
|
271
|
+
base = f"{branch} ({', '.join(dirty) if dirty else 'clean'})"
|
|
272
|
+
last = " ".join(_git(git_root, "log", "-1", "--format=%h %s").split())[:72] # HEAD commit (orientation)
|
|
273
|
+
return f"{base} · HEAD: {last}" if last else base
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def build_workspace_snapshot(cwd: str) -> str:
|
|
277
|
+
"""Workspace snapshot body for the system prompt (``""`` outside a workspace).
|
|
278
|
+
|
|
279
|
+
Git state (branch + short status counts) when ``cwd`` is in a repo, plus
|
|
280
|
+
detected project facts (manifest, package manager, verify commands, context
|
|
281
|
+
files) — so marker-only (non-git) projects still get a snapshot.
|
|
282
|
+
|
|
283
|
+
Contract: ``''``-safe, NEVER raises, deterministic per ``cwd`` within a session.
|
|
284
|
+
Intended to be called ONCE per session; the caller bakes the result into the
|
|
285
|
+
stable (cacheable) system-prompt tier and supplies its own header.
|
|
286
|
+
|
|
287
|
+
SCOPE: no ahead/behind, no worktree, no commit log. The leading
|
|
288
|
+
"Root:" line is omitted — the caller's WORKSPACE header already frames it and
|
|
289
|
+
a second absolute path tends to make the model run commands in the wrong dir.
|
|
290
|
+
"""
|
|
291
|
+
resolved = _resolve_cwd(cwd)
|
|
292
|
+
if resolved is None:
|
|
293
|
+
return ""
|
|
294
|
+
git_root = _git_root(resolved)
|
|
295
|
+
root = git_root or _marker_root(resolved)
|
|
296
|
+
if root is None:
|
|
297
|
+
return ""
|
|
298
|
+
|
|
299
|
+
lines: list[str] = []
|
|
300
|
+
|
|
301
|
+
if git_root is not None:
|
|
302
|
+
head, counts = _parse_status(_git(root, "status", "--porcelain=2", "--branch"))
|
|
303
|
+
if head and head != "(detached)":
|
|
304
|
+
lines.append(f"- Branch: {head}")
|
|
305
|
+
elif head == "(detached)":
|
|
306
|
+
lines.append("- Branch: (detached HEAD)")
|
|
307
|
+
|
|
308
|
+
dirty = _dirty_phrases(counts)
|
|
309
|
+
lines.append(f"- Status: {', '.join(dirty) if dirty else 'clean'}")
|
|
310
|
+
|
|
311
|
+
lines.extend(_project_facts(root))
|
|
312
|
+
return "\n".join(lines)
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
# ── LIVE world-state (SENSORY CORTEX — the derived-view, recomputed-each-build region) ────────────
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def project_root(cwd: str) -> Optional[str]:
|
|
319
|
+
"""The project root for `cwd` — its git root, else the nearest ancestor holding a project marker
|
|
320
|
+
(pyproject/package.json/…); None outside any project (e.g. a bare HOME dir). This is the session-
|
|
321
|
+
static 'are we in a project at all?' decision that gates repo-derived slice content (the REPO MAP,
|
|
322
|
+
facts, conventions, subdir hints) — so launching in HOME doesn't os.walk the whole home directory."""
|
|
323
|
+
resolved = _resolve_cwd(cwd)
|
|
324
|
+
if resolved is None:
|
|
325
|
+
return None
|
|
326
|
+
root = _git_root(resolved) or _marker_root(resolved)
|
|
327
|
+
return str(root) if root else None
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def workspace_facts(cwd: str) -> str:
|
|
331
|
+
"""STATIC project facts (manifest, package manager, verify commands, context files) for the
|
|
332
|
+
cache-stable SYSTEM tier — the git-INDEPENDENT subset of build_workspace_snapshot. Live git
|
|
333
|
+
state is deliberately NOT here; it lives in the volatile slice via git_worktree_state(), so the
|
|
334
|
+
system message stays byte-stable (prompt-cache warm). '' outside a project; never raises."""
|
|
335
|
+
resolved = _resolve_cwd(cwd)
|
|
336
|
+
if resolved is None:
|
|
337
|
+
return ""
|
|
338
|
+
root = _git_root(resolved) or _marker_root(resolved)
|
|
339
|
+
if root is None:
|
|
340
|
+
return ""
|
|
341
|
+
return "\n".join(_project_facts(root))
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def project_conventions(cwd: str, *, max_chars: int = 4000) -> str:
|
|
345
|
+
"""The project's agent-convention file CONTENT (first present of AGENTS.md / CLAUDE.md / .cursorrules)
|
|
346
|
+
— an ALWAYS-IN-FORCE contract that must outlive the bounded slice's eviction. Injection-neutralized
|
|
347
|
+
(reuses subdir_hints._neutralize_injection) and capped. '' when none / outside a project.
|
|
348
|
+
|
|
349
|
+
Deterministic per cwd, so it rides in the cacheable SYSTEM tier (100% prompt-cache after turn 1) and
|
|
350
|
+
CANNOT be evicted/compacted — conventions persist across a long session at ~0 marginal cost, replacing
|
|
351
|
+
the uncached, evictable manual re-read of AGENTS.md. Bounded to ONE file ≤ max_chars (smaller than a
|
|
352
|
+
transcript agent's unbounded merged context). Treat as DATA: the live conversation overrides on conflict."""
|
|
353
|
+
from .subdir_hints import _neutralize_injection
|
|
354
|
+
resolved = _resolve_cwd(cwd)
|
|
355
|
+
if resolved is None:
|
|
356
|
+
return ""
|
|
357
|
+
root = _git_root(resolved) or _marker_root(resolved)
|
|
358
|
+
if root is None:
|
|
359
|
+
return ""
|
|
360
|
+
for name in _CONTEXT_FILES:
|
|
361
|
+
text = _read_small(root / name)
|
|
362
|
+
if text.strip():
|
|
363
|
+
body = _neutralize_injection(text).strip()
|
|
364
|
+
if len(body) > max_chars:
|
|
365
|
+
body = body[:max_chars] + "\n[...truncated]"
|
|
366
|
+
return f"{name}:\n{body}"
|
|
367
|
+
return ""
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def git_worktree_state(cwd: str, *, max_files: int = 20) -> str:
|
|
371
|
+
"""LIVE working-tree state for the VOLATILE slice tier (SENSORY CORTEX — the derived-view,
|
|
372
|
+
recomputed-each-build region, never persisted): current branch + the CHANGED-FILE SET (staged/
|
|
373
|
+
modified/untracked/conflicts), re-probed every build — unlike the one-shot session-start snapshot.
|
|
374
|
+
This is the cure for the stale-snapshot 're-run git' smell: the model always sees the current git
|
|
375
|
+
state. Bounded to max_files. '' outside a repo / on error; never raises (POMDP per-turn belief
|
|
376
|
+
update analog)."""
|
|
377
|
+
resolved = _resolve_cwd(cwd)
|
|
378
|
+
if resolved is None:
|
|
379
|
+
return ""
|
|
380
|
+
git_root = _git_root(resolved)
|
|
381
|
+
if git_root is None:
|
|
382
|
+
return ""
|
|
383
|
+
porcelain = _git(git_root, "status", "--porcelain=2", "--branch")
|
|
384
|
+
head, _counts = _parse_status(porcelain)
|
|
385
|
+
if not head:
|
|
386
|
+
return ""
|
|
387
|
+
branch = "(detached HEAD)" if head == "(detached)" else head
|
|
388
|
+
changed: list[tuple[str, str]] = []
|
|
389
|
+
for line in porcelain.splitlines():
|
|
390
|
+
if line.startswith(("1 ", "2 ")):
|
|
391
|
+
# porcelain v2 type-2 (rename/copy) has an extra <X><score> field AND joins the path as
|
|
392
|
+
# "<path>\t<origpath>" — so split at maxsplit=9 and drop the tab-joined origpath, else the
|
|
393
|
+
# reported path is mangled (origpath leaks in / the new path is truncated).
|
|
394
|
+
n = 9 if line.startswith("2 ") else 8
|
|
395
|
+
parts = line.split(maxsplit=n)
|
|
396
|
+
if len(parts) >= 2 and len(parts[1]) >= 2:
|
|
397
|
+
xy = parts[1]
|
|
398
|
+
tag = "staged" if xy[0] != "." else "modified"
|
|
399
|
+
changed.append((tag, parts[-1].split("\t", 1)[0]))
|
|
400
|
+
elif line.startswith("u "):
|
|
401
|
+
changed.append(("conflict", line.split(maxsplit=10)[-1]))
|
|
402
|
+
elif line.startswith("? "):
|
|
403
|
+
changed.append(("untracked", line[2:])) # exact path (splitlines already dropped the newline); .strip() ate significant leading/trailing spaces
|
|
404
|
+
if not changed:
|
|
405
|
+
return f"branch {branch} · working tree clean"
|
|
406
|
+
lines = [f"branch {branch} · {len(changed)} changed file(s)"]
|
|
407
|
+
lines += [f" {tag}: {path}" for tag, path in changed[:max_files]]
|
|
408
|
+
if len(changed) > max_files:
|
|
409
|
+
lines.append(f" …and {len(changed) - max_files} more")
|
|
410
|
+
return "\n".join(lines)
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
# Build/VCS/cache directories that are pure noise to LIST — and FLOOD context on a real repo (the reason a
|
|
414
|
+
# whole-repo "review" derailed: find/list surfaced thousands of .venv/.ruff_cache paths). list_files (tools.py)
|
|
415
|
+
# and repo_map both prune these so the model gets a clean map and doesn't fall back to raw `find`. Task-agnostic
|
|
416
|
+
# denylist (not a full .gitignore parse): covers the universal offenders. ripgrep (grep tool) is already
|
|
417
|
+
# .gitignore-aware natively.
|
|
418
|
+
_IGNORE_NAMES = frozenset({
|
|
419
|
+
".git", ".hg", ".svn", ".venv", "venv", "env", ".env", "node_modules", "__pycache__", ".ruff_cache",
|
|
420
|
+
".pytest_cache", ".mypy_cache", ".tox", ".idea", ".vscode", ".cache", "dist", "build", ".eggs", "htmlcov",
|
|
421
|
+
".DS_Store",
|
|
422
|
+
# JS/TS build + framework caches — huge generated trees that make repo_map/list/retrieval crawl (hunter's
|
|
423
|
+
# .next was ~thousands of files → 6s builds); never source the agent should read.
|
|
424
|
+
".next", ".turbo", ".parcel-cache", ".nuxt", ".svelte-kit", ".output", ".angular", ".vite", "coverage",
|
|
425
|
+
".gradle",
|
|
426
|
+
})
|
|
427
|
+
_IGNORE_SUFFIX = (".egg-info", ".pyc")
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def _is_ignored(name: str) -> bool:
|
|
431
|
+
return name in _IGNORE_NAMES or any(name.endswith(s) for s in _IGNORE_SUFFIX)
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
# Asset/binary/log files are noise in a structural MAP (they crowd out source); skipped from repo_map
|
|
435
|
+
# only (list_files still shows them). Generic, not task-specific.
|
|
436
|
+
_MAP_SKIP_SUFFIX = (".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico", ".pdf", ".log", ".lock", ".bin",
|
|
437
|
+
".so", ".dylib", ".o", ".class", ".woff", ".woff2", ".ttf", ".mp4", ".mov", ".zip",
|
|
438
|
+
".tar", ".gz", ".whl", ".pyc", ".jsonl", ".csv", ".parquet")
|
|
439
|
+
# Code extensions — used ONLY to RANK directories by source-density so the map shows the real source
|
|
440
|
+
# tree first (a generic signal, identical across task types; never a task-category switch).
|
|
441
|
+
_CODE_SUFFIX = (".py", ".js", ".ts", ".jsx", ".tsx", ".go", ".rs", ".java", ".rb", ".c", ".h", ".cc",
|
|
442
|
+
".cpp", ".hpp", ".cs", ".php", ".swift", ".kt", ".scala", ".sh", ".lua", ".ml", ".ex",
|
|
443
|
+
".exs", ".clj", ".r", ".jl", ".vue", ".sql")
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def repo_map(root: str, *, max_entries: int = 300, max_per_dir: int = 25, max_chars: int = 12000) -> str:
|
|
447
|
+
"""A compact, ignore-aware STRUCTURAL MAP of the project (SENSORY CORTEX — the derived-view tier-B
|
|
448
|
+
resident view, memoized for the session, never a persisted store): directories with their files,
|
|
449
|
+
pruned of VCS/venv/cache + asset/log noise, RANKED by source-density so the real code tree shows
|
|
450
|
+
first and never gets starved by asset/log dirs. This is what
|
|
451
|
+
kills cold-start — a 'review/understand the repo' task sees the structure RESIDENT instead of re-
|
|
452
|
+
listing with find. Built ONCE per session (stable → prompt-cache warm); new files created mid-task
|
|
453
|
+
surface via the LIVE worktree region. Over budget, late dirs collapse to a count; `max_chars` is a
|
|
454
|
+
HARD ceiling on the output (ranked tail dropped) so a huge tree can't blow the context window.
|
|
455
|
+
'' if root is unusable; never raises."""
|
|
456
|
+
if not root or not os.path.isdir(root):
|
|
457
|
+
return ""
|
|
458
|
+
rows: list[tuple[str, list[str], int, int]] = [] # (rel, files, total, code_count)
|
|
459
|
+
try:
|
|
460
|
+
for dirpath, dirnames, filenames in os.walk(root): # symlinks not followed
|
|
461
|
+
dirnames[:] = sorted(d for d in dirnames if not _is_ignored(d))
|
|
462
|
+
files = sorted(f for f in filenames
|
|
463
|
+
if not _is_ignored(f) and not f.endswith(_MAP_SKIP_SUFFIX))
|
|
464
|
+
if not files:
|
|
465
|
+
continue
|
|
466
|
+
rel = os.path.relpath(dirpath, root)
|
|
467
|
+
code_count = sum(1 for f in files if f.endswith(_CODE_SUFFIX))
|
|
468
|
+
rows.append((rel, files, len(files), code_count))
|
|
469
|
+
except OSError:
|
|
470
|
+
return ""
|
|
471
|
+
if not rows:
|
|
472
|
+
return ""
|
|
473
|
+
# rank source-dense dirs first (so src/ beats docs/ assets), ties broken by path for stability
|
|
474
|
+
rows.sort(key=lambda r: (-r[3], r[0]))
|
|
475
|
+
lines, shown = [], 0
|
|
476
|
+
for rel, files, total, _code in rows:
|
|
477
|
+
prefix = "./" if rel == "." else rel + "/"
|
|
478
|
+
if shown < max_entries: # detailed: list files (per-dir capped)
|
|
479
|
+
take = files[:max_per_dir]
|
|
480
|
+
shown += len(take)
|
|
481
|
+
extra = f" (+{total - len(take)} more)" if total > len(take) else ""
|
|
482
|
+
lines.append(f"{prefix} — {', '.join(take)}{extra}")
|
|
483
|
+
else: # over budget: keep the dir, collapse to a count
|
|
484
|
+
lines.append(f"{prefix} — ({total} files)")
|
|
485
|
+
out = "\n".join(lines)
|
|
486
|
+
if len(out) <= max_chars:
|
|
487
|
+
return out
|
|
488
|
+
# HARD char ceiling: rows are ranked source-dense-first, so keep the prefix that fits and drop the
|
|
489
|
+
# tail to a count. Without this a giant tree (or a session launched in a bare HOME) produces a
|
|
490
|
+
# multi-10k-token map that overflows the window on the very first turn.
|
|
491
|
+
kept, used = [], 0
|
|
492
|
+
for ln in lines:
|
|
493
|
+
if used + len(ln) + 1 > max_chars:
|
|
494
|
+
break
|
|
495
|
+
kept.append(ln)
|
|
496
|
+
used += len(ln) + 1
|
|
497
|
+
dropped = len(lines) - len(kept)
|
|
498
|
+
kept.append(f"… (+{dropped} more director{'y' if dropped == 1 else 'ies'} — over map budget; "
|
|
499
|
+
"use list_files to drill in)")
|
|
500
|
+
return "\n".join(kept)
|