sliceagent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. sliceagent/__init__.py +3 -0
  2. sliceagent/__main__.py +6 -0
  3. sliceagent/access.py +93 -0
  4. sliceagent/agents.py +173 -0
  5. sliceagent/background_review.py +146 -0
  6. sliceagent/binsniff.py +89 -0
  7. sliceagent/cli.py +890 -0
  8. sliceagent/clock.py +32 -0
  9. sliceagent/code_grep.py +329 -0
  10. sliceagent/code_index.py +417 -0
  11. sliceagent/config.py +240 -0
  12. sliceagent/context_overflow.py +227 -0
  13. sliceagent/envspec.py +129 -0
  14. sliceagent/errors.py +167 -0
  15. sliceagent/events.py +96 -0
  16. sliceagent/finding_types.py +70 -0
  17. sliceagent/flags.py +63 -0
  18. sliceagent/fuzzy.py +135 -0
  19. sliceagent/guardrails.py +438 -0
  20. sliceagent/guidance.py +69 -0
  21. sliceagent/hippocampus.py +581 -0
  22. sliceagent/hooks.py +334 -0
  23. sliceagent/interfaces.py +144 -0
  24. sliceagent/llm.py +695 -0
  25. sliceagent/loop.py +548 -0
  26. sliceagent/mcp_client.py +255 -0
  27. sliceagent/mcp_security.py +77 -0
  28. sliceagent/memory.py +428 -0
  29. sliceagent/metrics.py +103 -0
  30. sliceagent/model_catalog.py +124 -0
  31. sliceagent/monitor.py +615 -0
  32. sliceagent/neocortex.py +436 -0
  33. sliceagent/onboarding.py +323 -0
  34. sliceagent/oracle.py +36 -0
  35. sliceagent/pagetable.py +255 -0
  36. sliceagent/pfc.py +449 -0
  37. sliceagent/plugins.py +127 -0
  38. sliceagent/policy.py +234 -0
  39. sliceagent/procman.py +187 -0
  40. sliceagent/prompt.py +239 -0
  41. sliceagent/records.py +108 -0
  42. sliceagent/recovery.py +119 -0
  43. sliceagent/regions.py +678 -0
  44. sliceagent/registry.py +128 -0
  45. sliceagent/retriever.py +19 -0
  46. sliceagent/safety.py +332 -0
  47. sliceagent/sandbox.py +143 -0
  48. sliceagent/scheduler.py +92 -0
  49. sliceagent/search_index.py +289 -0
  50. sliceagent/seed.py +465 -0
  51. sliceagent/sensory_cortex.py +500 -0
  52. sliceagent/session.py +222 -0
  53. sliceagent/skill_provenance.py +71 -0
  54. sliceagent/skill_usage.py +123 -0
  55. sliceagent/skills.py +209 -0
  56. sliceagent/subagent.py +332 -0
  57. sliceagent/subdir_hints.py +222 -0
  58. sliceagent/swap.py +182 -0
  59. sliceagent/taskstate.py +57 -0
  60. sliceagent/telemetry.py +59 -0
  61. sliceagent/terminal.py +240 -0
  62. sliceagent/text_utils.py +56 -0
  63. sliceagent/tool_summary.py +93 -0
  64. sliceagent/tools.py +1194 -0
  65. sliceagent/tui.py +1377 -0
  66. sliceagent/web.py +354 -0
  67. sliceagent-0.1.0.dist-info/METADATA +262 -0
  68. sliceagent-0.1.0.dist-info/RECORD +71 -0
  69. sliceagent-0.1.0.dist-info/WHEEL +4 -0
  70. sliceagent-0.1.0.dist-info/entry_points.txt +2 -0
  71. sliceagent-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,500 @@
1
+ """SENSORY CORTEX — perception of the live world, not memory of the past. Everything in this
2
+ module is a DERIVED VIEW: recomputed live from the filesystem/git each time it's called, never
3
+ persisted between turns, nothing here is ever "recalled" — only re-observed. Holds the workspace
4
+ snapshot (git branch/status, project facts, conventions) AND the repo map / directory-walk
5
+ primitives (``repo_map``, ``_is_ignored``) — both are the same kind of thing: a fresh read of
6
+ the world, memoized at most for the session, never a durable store.
7
+
8
+ Workspace snapshot for the system prompt — a one-shot, cache-stable probe.
9
+
10
+ The point is to hand the model its *verify loop* and current git posture up
11
+ front — which branch, how dirty the tree is, the exact test/lint/build commands —
12
+ instead of making it rediscover them every session.
13
+
14
+ MOAT / cache safety
15
+ -------------------
16
+ ``build_workspace_snapshot`` is called **once per session** and its output is baked
17
+ into the *stable* (cacheable) system-prompt tier — never re-probed per turn (that
18
+ would shatter the prompt cache). Branch and dirty state drift mid-session, so the
19
+ caller's brief tells the model to re-check with ``git`` before acting on it. The
20
+ function is therefore deterministic per ``cwd`` within a session, never raises, and
21
+ returns ``""`` outside a workspace (no repo / no marker / git missing / empty cwd).
22
+
23
+ SCOPE: git **branch** + short **status counts**
24
+ (staged/modified/untracked) + detected **verify command(s)**. Deliberately dropped:
25
+ ahead/behind tracking, worktree detection, and the recent-commit log.
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import json
31
+ import os
32
+ import re
33
+ import subprocess
34
+ from pathlib import Path
35
+ from typing import Optional
36
+
37
+ # Project-root signals that mark a directory as a code workspace even when it
38
+ # isn't (yet) a git repo. Cheap filename checks — no parsing.
39
+ _PROJECT_MARKERS = (
40
+ "pyproject.toml", "setup.py", "setup.cfg", "requirements.txt",
41
+ "package.json", "tsconfig.json", "deno.json",
42
+ "Cargo.toml", "go.mod", "pom.xml", "build.gradle", "build.gradle.kts",
43
+ "Gemfile", "composer.json", "mix.exs", "pubspec.yaml",
44
+ "CMakeLists.txt", "Makefile", "Dockerfile",
45
+ "AGENTS.md", "CLAUDE.md", ".cursorrules",
46
+ )
47
+
48
+ # Agent-instruction files surfaced separately from manifests in the snapshot.
49
+ _CONTEXT_FILES = ("AGENTS.md", "CLAUDE.md", ".cursorrules")
50
+
51
+ # Lockfile → package manager, checked in priority order.
52
+ _PY_LOCKFILES = (("uv.lock", "uv"), ("poetry.lock", "poetry"), ("Pipfile.lock", "pipenv"))
53
+ _JS_LOCKFILES = (
54
+ ("pnpm-lock.yaml", "pnpm"), ("bun.lockb", "bun"), ("bun.lock", "bun"),
55
+ ("yarn.lock", "yarn"), ("package-lock.json", "npm"),
56
+ )
57
+
58
+ # package.json scripts / Makefile targets worth surfacing as verify commands.
59
+ _VERIFY_TARGETS = ("test", "tests", "lint", "typecheck", "check", "build", "fmt", "format")
60
+ _MAX_VERIFY_COMMANDS = 8
61
+ _MAX_FACT_FILE_BYTES = 256 * 1024
62
+
63
+ _GIT_TIMEOUT = 2.5
64
+
65
+
66
+ # ── cwd / root resolution ────────────────────────────────────────────────────
67
+
68
+
69
+ def _resolve_cwd(cwd: Optional[str]) -> Optional[Path]:
70
+ """Resolve ``cwd`` to a Path, or ``None`` if it cannot be used. Never raises."""
71
+ try:
72
+ if cwd:
73
+ return Path(cwd).expanduser()
74
+ return Path(os.getcwd())
75
+ except (OSError, RuntimeError, ValueError):
76
+ return None
77
+
78
+
79
+ def _git_root(cwd: Path) -> Optional[Path]:
80
+ try:
81
+ current = cwd.resolve()
82
+ for parent in [current, *current.parents]:
83
+ if (parent / ".git").exists():
84
+ return parent
85
+ except (OSError, RuntimeError, ValueError):
86
+ return None
87
+ return None
88
+
89
+
90
+ def _home() -> Optional[Path]:
91
+ try:
92
+ return Path.home().resolve()
93
+ except (OSError, RuntimeError):
94
+ return None
95
+
96
+
97
+ def _marker_root(cwd: Path) -> Optional[Path]:
98
+ """Nearest ancestor (≤6 levels) that looks like a project root, or ``None``.
99
+
100
+ ``$HOME`` itself is skipped — a Makefile or AGENTS.md sitting in the home
101
+ directory is global user config, not a project-root signal. Never raises.
102
+ """
103
+ try:
104
+ current = cwd.resolve()
105
+ except (OSError, RuntimeError, ValueError):
106
+ return None
107
+ home = _home()
108
+ for depth, parent in enumerate([current, *current.parents]):
109
+ if depth > 6:
110
+ break
111
+ if parent == home:
112
+ continue
113
+ try:
114
+ for marker in _PROJECT_MARKERS:
115
+ if (parent / marker).exists():
116
+ return parent
117
+ except (OSError, ValueError):
118
+ continue
119
+ return None
120
+
121
+
122
+ # ── git/workspace probe ──────────────────────────────────────────────────────
123
+
124
+
125
+ def _git(cwd: Path, *args: str) -> str:
126
+ """Run ``git -C cwd <args>`` and return stripped stdout, or ``""``. Never raises."""
127
+ try:
128
+ out = subprocess.run(
129
+ ["git", "-C", str(cwd), *args],
130
+ capture_output=True,
131
+ text=True,
132
+ errors="replace", # a non-UTF-8 commit subject (%s) must not raise UnicodeDecodeError out of "never raises"
133
+ timeout=_GIT_TIMEOUT,
134
+ )
135
+ except (OSError, subprocess.SubprocessError):
136
+ return ""
137
+ return out.stdout.strip() if out.returncode == 0 else ""
138
+
139
+
140
+ def _parse_status(porcelain: str) -> tuple[str, dict[str, int]]:
141
+ """Parse ``git status --porcelain=2 --branch`` into (branch_head, counts).
142
+
143
+ SCOPE: no upstream / ahead-behind tracking. Returns the branch
144
+ head name (``""`` if absent) and counts of staged/modified/untracked/conflicts.
145
+ """
146
+ head = ""
147
+ counts = {"staged": 0, "modified": 0, "untracked": 0, "conflicts": 0}
148
+ for line in porcelain.splitlines():
149
+ if line.startswith("# branch.head"):
150
+ head = line.split(maxsplit=2)[-1]
151
+ elif line.startswith(("1 ", "2 ")):
152
+ parts = line.split(maxsplit=2)
153
+ if len(parts) < 2:
154
+ continue
155
+ xy = parts[1]
156
+ if len(xy) >= 2:
157
+ if xy[0] != ".":
158
+ counts["staged"] += 1
159
+ if xy[1] != ".":
160
+ counts["modified"] += 1
161
+ elif line.startswith("u "):
162
+ counts["conflicts"] += 1
163
+ elif line.startswith("? "):
164
+ counts["untracked"] += 1
165
+ return head, counts
166
+
167
+
168
+ def _dirty_phrases(counts: dict[str, int]) -> list[str]:
169
+ """The non-zero ``"<n> <label>"`` phrases of a parsed git status, in display order.
170
+
171
+ Empty list == clean tree; callers join with ", " and fall back to "clean". Single source for
172
+ both the one-line branch summary and the multi-line snapshot so their wording can't drift.
173
+ """
174
+ return [
175
+ f"{n} {label}" for label, n in (
176
+ ("staged", counts["staged"]),
177
+ ("modified", counts["modified"]),
178
+ ("untracked", counts["untracked"]),
179
+ ("conflicts", counts["conflicts"]),
180
+ ) if n
181
+ ]
182
+
183
+
184
+ def _read_small(path: Path) -> str:
185
+ """Read a small text file, or ``""`` — never raises, never reads huge files."""
186
+ try:
187
+ if not path.is_file() or path.stat().st_size > _MAX_FACT_FILE_BYTES:
188
+ return ""
189
+ return path.read_text(encoding="utf-8", errors="replace")
190
+ except OSError:
191
+ return ""
192
+
193
+
194
+ def _project_facts(root: Path) -> list[str]:
195
+ """Detected project facts: manifest(s) + package manager, verify commands,
196
+ agent-instruction context files. Cheap stat calls + a couple of small reads.
197
+ Deterministic for a given tree; never raises.
198
+ """
199
+ facts: list[str] = []
200
+
201
+ try:
202
+ manifests = [
203
+ m for m in _PROJECT_MARKERS
204
+ if m not in _CONTEXT_FILES and (root / m).is_file()
205
+ ]
206
+ package_managers = [
207
+ pm for lock, pm in (*_PY_LOCKFILES, *_JS_LOCKFILES) if (root / lock).is_file()
208
+ ]
209
+ except OSError:
210
+ manifests, package_managers = [], []
211
+ if manifests:
212
+ line = f"- Project: {', '.join(manifests[:6])}"
213
+ if package_managers:
214
+ line += f" ({'/'.join(dict.fromkeys(package_managers))})"
215
+ facts.append(line)
216
+
217
+ verify: list[str] = []
218
+ try:
219
+ if (root / "scripts" / "run_tests.sh").is_file():
220
+ verify.append("scripts/run_tests.sh")
221
+ if (root / "package.json").is_file():
222
+ try:
223
+ scripts = json.loads(_read_small(root / "package.json") or "{}").get("scripts") or {}
224
+ except (json.JSONDecodeError, AttributeError):
225
+ scripts = {}
226
+ js_pm = next((pm for lock, pm in _JS_LOCKFILES if (root / lock).is_file()), "npm")
227
+ verify.extend(f"{js_pm} run {name}" for name in _VERIFY_TARGETS if name in scripts)
228
+ if (root / "pytest.ini").is_file() or "[tool.pytest" in _read_small(root / "pyproject.toml"):
229
+ verify.append("pytest")
230
+ makefile = _read_small(root / "Makefile")
231
+ if makefile:
232
+ verify.extend(
233
+ f"make {name}" for name in _VERIFY_TARGETS
234
+ if re.search(rf"^{re.escape(name)}\s*:", makefile, re.MULTILINE)
235
+ )
236
+ except OSError:
237
+ pass
238
+ if verify:
239
+ deduped = list(dict.fromkeys(verify))[:_MAX_VERIFY_COMMANDS]
240
+ facts.append(f"- Verify: {'; '.join(deduped)}")
241
+
242
+ try:
243
+ context_files = [c for c in _CONTEXT_FILES if (root / c).is_file()]
244
+ except OSError:
245
+ context_files = []
246
+ if context_files:
247
+ facts.append(f"- Context files: {', '.join(context_files)}")
248
+
249
+ return facts
250
+
251
+
252
+ def git_branch_status(cwd: str) -> str:
253
+ """A compact one-line 'branch (status)' summary for the RE-OBSERVED ENVIRONMENT tier (I2).
254
+
255
+ Reuses the same git probe as the snapshot, but collapsed to ONE line: e.g.
256
+ "main (3 modified, 1 untracked)" or "main (clean)". Returns "" outside a repo / on any
257
+ error. Deterministic per cwd within a session (intended to be computed ONCE per session and
258
+ baked into the cache-stable system tier), never raises.
259
+ """
260
+ resolved = _resolve_cwd(cwd)
261
+ if resolved is None:
262
+ return ""
263
+ git_root = _git_root(resolved)
264
+ if git_root is None:
265
+ return ""
266
+ head, counts = _parse_status(_git(git_root, "status", "--porcelain=2", "--branch"))
267
+ if not head:
268
+ return ""
269
+ branch = "(detached HEAD)" if head == "(detached)" else head
270
+ dirty = _dirty_phrases(counts)
271
+ base = f"{branch} ({', '.join(dirty) if dirty else 'clean'})"
272
+ last = " ".join(_git(git_root, "log", "-1", "--format=%h %s").split())[:72] # HEAD commit (orientation)
273
+ return f"{base} · HEAD: {last}" if last else base
274
+
275
+
276
+ def build_workspace_snapshot(cwd: str) -> str:
277
+ """Workspace snapshot body for the system prompt (``""`` outside a workspace).
278
+
279
+ Git state (branch + short status counts) when ``cwd`` is in a repo, plus
280
+ detected project facts (manifest, package manager, verify commands, context
281
+ files) — so marker-only (non-git) projects still get a snapshot.
282
+
283
+ Contract: ``''``-safe, NEVER raises, deterministic per ``cwd`` within a session.
284
+ Intended to be called ONCE per session; the caller bakes the result into the
285
+ stable (cacheable) system-prompt tier and supplies its own header.
286
+
287
+ SCOPE: no ahead/behind, no worktree, no commit log. The leading
288
+ "Root:" line is omitted — the caller's WORKSPACE header already frames it and
289
+ a second absolute path tends to make the model run commands in the wrong dir.
290
+ """
291
+ resolved = _resolve_cwd(cwd)
292
+ if resolved is None:
293
+ return ""
294
+ git_root = _git_root(resolved)
295
+ root = git_root or _marker_root(resolved)
296
+ if root is None:
297
+ return ""
298
+
299
+ lines: list[str] = []
300
+
301
+ if git_root is not None:
302
+ head, counts = _parse_status(_git(root, "status", "--porcelain=2", "--branch"))
303
+ if head and head != "(detached)":
304
+ lines.append(f"- Branch: {head}")
305
+ elif head == "(detached)":
306
+ lines.append("- Branch: (detached HEAD)")
307
+
308
+ dirty = _dirty_phrases(counts)
309
+ lines.append(f"- Status: {', '.join(dirty) if dirty else 'clean'}")
310
+
311
+ lines.extend(_project_facts(root))
312
+ return "\n".join(lines)
313
+
314
+
315
+ # ── LIVE world-state (SENSORY CORTEX — the derived-view, recomputed-each-build region) ────────────
316
+
317
+
318
+ def project_root(cwd: str) -> Optional[str]:
319
+ """The project root for `cwd` — its git root, else the nearest ancestor holding a project marker
320
+ (pyproject/package.json/…); None outside any project (e.g. a bare HOME dir). This is the session-
321
+ static 'are we in a project at all?' decision that gates repo-derived slice content (the REPO MAP,
322
+ facts, conventions, subdir hints) — so launching in HOME doesn't os.walk the whole home directory."""
323
+ resolved = _resolve_cwd(cwd)
324
+ if resolved is None:
325
+ return None
326
+ root = _git_root(resolved) or _marker_root(resolved)
327
+ return str(root) if root else None
328
+
329
+
330
+ def workspace_facts(cwd: str) -> str:
331
+ """STATIC project facts (manifest, package manager, verify commands, context files) for the
332
+ cache-stable SYSTEM tier — the git-INDEPENDENT subset of build_workspace_snapshot. Live git
333
+ state is deliberately NOT here; it lives in the volatile slice via git_worktree_state(), so the
334
+ system message stays byte-stable (prompt-cache warm). '' outside a project; never raises."""
335
+ resolved = _resolve_cwd(cwd)
336
+ if resolved is None:
337
+ return ""
338
+ root = _git_root(resolved) or _marker_root(resolved)
339
+ if root is None:
340
+ return ""
341
+ return "\n".join(_project_facts(root))
342
+
343
+
344
+ def project_conventions(cwd: str, *, max_chars: int = 4000) -> str:
345
+ """The project's agent-convention file CONTENT (first present of AGENTS.md / CLAUDE.md / .cursorrules)
346
+ — an ALWAYS-IN-FORCE contract that must outlive the bounded slice's eviction. Injection-neutralized
347
+ (reuses subdir_hints._neutralize_injection) and capped. '' when none / outside a project.
348
+
349
+ Deterministic per cwd, so it rides in the cacheable SYSTEM tier (100% prompt-cache after turn 1) and
350
+ CANNOT be evicted/compacted — conventions persist across a long session at ~0 marginal cost, replacing
351
+ the uncached, evictable manual re-read of AGENTS.md. Bounded to ONE file ≤ max_chars (smaller than a
352
+ transcript agent's unbounded merged context). Treat as DATA: the live conversation overrides on conflict."""
353
+ from .subdir_hints import _neutralize_injection
354
+ resolved = _resolve_cwd(cwd)
355
+ if resolved is None:
356
+ return ""
357
+ root = _git_root(resolved) or _marker_root(resolved)
358
+ if root is None:
359
+ return ""
360
+ for name in _CONTEXT_FILES:
361
+ text = _read_small(root / name)
362
+ if text.strip():
363
+ body = _neutralize_injection(text).strip()
364
+ if len(body) > max_chars:
365
+ body = body[:max_chars] + "\n[...truncated]"
366
+ return f"{name}:\n{body}"
367
+ return ""
368
+
369
+
370
+ def git_worktree_state(cwd: str, *, max_files: int = 20) -> str:
371
+ """LIVE working-tree state for the VOLATILE slice tier (SENSORY CORTEX — the derived-view,
372
+ recomputed-each-build region, never persisted): current branch + the CHANGED-FILE SET (staged/
373
+ modified/untracked/conflicts), re-probed every build — unlike the one-shot session-start snapshot.
374
+ This is the cure for the stale-snapshot 're-run git' smell: the model always sees the current git
375
+ state. Bounded to max_files. '' outside a repo / on error; never raises (POMDP per-turn belief
376
+ update analog)."""
377
+ resolved = _resolve_cwd(cwd)
378
+ if resolved is None:
379
+ return ""
380
+ git_root = _git_root(resolved)
381
+ if git_root is None:
382
+ return ""
383
+ porcelain = _git(git_root, "status", "--porcelain=2", "--branch")
384
+ head, _counts = _parse_status(porcelain)
385
+ if not head:
386
+ return ""
387
+ branch = "(detached HEAD)" if head == "(detached)" else head
388
+ changed: list[tuple[str, str]] = []
389
+ for line in porcelain.splitlines():
390
+ if line.startswith(("1 ", "2 ")):
391
+ # porcelain v2 type-2 (rename/copy) has an extra <X><score> field AND joins the path as
392
+ # "<path>\t<origpath>" — so split at maxsplit=9 and drop the tab-joined origpath, else the
393
+ # reported path is mangled (origpath leaks in / the new path is truncated).
394
+ n = 9 if line.startswith("2 ") else 8
395
+ parts = line.split(maxsplit=n)
396
+ if len(parts) >= 2 and len(parts[1]) >= 2:
397
+ xy = parts[1]
398
+ tag = "staged" if xy[0] != "." else "modified"
399
+ changed.append((tag, parts[-1].split("\t", 1)[0]))
400
+ elif line.startswith("u "):
401
+ changed.append(("conflict", line.split(maxsplit=10)[-1]))
402
+ elif line.startswith("? "):
403
+ changed.append(("untracked", line[2:])) # exact path (splitlines already dropped the newline); .strip() ate significant leading/trailing spaces
404
+ if not changed:
405
+ return f"branch {branch} · working tree clean"
406
+ lines = [f"branch {branch} · {len(changed)} changed file(s)"]
407
+ lines += [f" {tag}: {path}" for tag, path in changed[:max_files]]
408
+ if len(changed) > max_files:
409
+ lines.append(f" …and {len(changed) - max_files} more")
410
+ return "\n".join(lines)
411
+
412
+
413
+ # Build/VCS/cache directories that are pure noise to LIST — and FLOOD context on a real repo (the reason a
414
+ # whole-repo "review" derailed: find/list surfaced thousands of .venv/.ruff_cache paths). list_files (tools.py)
415
+ # and repo_map both prune these so the model gets a clean map and doesn't fall back to raw `find`. Task-agnostic
416
+ # denylist (not a full .gitignore parse): covers the universal offenders. ripgrep (grep tool) is already
417
+ # .gitignore-aware natively.
418
+ _IGNORE_NAMES = frozenset({
419
+ ".git", ".hg", ".svn", ".venv", "venv", "env", ".env", "node_modules", "__pycache__", ".ruff_cache",
420
+ ".pytest_cache", ".mypy_cache", ".tox", ".idea", ".vscode", ".cache", "dist", "build", ".eggs", "htmlcov",
421
+ ".DS_Store",
422
+ # JS/TS build + framework caches — huge generated trees that make repo_map/list/retrieval crawl (hunter's
423
+ # .next was ~thousands of files → 6s builds); never source the agent should read.
424
+ ".next", ".turbo", ".parcel-cache", ".nuxt", ".svelte-kit", ".output", ".angular", ".vite", "coverage",
425
+ ".gradle",
426
+ })
427
+ _IGNORE_SUFFIX = (".egg-info", ".pyc")
428
+
429
+
430
+ def _is_ignored(name: str) -> bool:
431
+ return name in _IGNORE_NAMES or any(name.endswith(s) for s in _IGNORE_SUFFIX)
432
+
433
+
434
+ # Asset/binary/log files are noise in a structural MAP (they crowd out source); skipped from repo_map
435
+ # only (list_files still shows them). Generic, not task-specific.
436
+ _MAP_SKIP_SUFFIX = (".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico", ".pdf", ".log", ".lock", ".bin",
437
+ ".so", ".dylib", ".o", ".class", ".woff", ".woff2", ".ttf", ".mp4", ".mov", ".zip",
438
+ ".tar", ".gz", ".whl", ".pyc", ".jsonl", ".csv", ".parquet")
439
+ # Code extensions — used ONLY to RANK directories by source-density so the map shows the real source
440
+ # tree first (a generic signal, identical across task types; never a task-category switch).
441
+ _CODE_SUFFIX = (".py", ".js", ".ts", ".jsx", ".tsx", ".go", ".rs", ".java", ".rb", ".c", ".h", ".cc",
442
+ ".cpp", ".hpp", ".cs", ".php", ".swift", ".kt", ".scala", ".sh", ".lua", ".ml", ".ex",
443
+ ".exs", ".clj", ".r", ".jl", ".vue", ".sql")
444
+
445
+
446
+ def repo_map(root: str, *, max_entries: int = 300, max_per_dir: int = 25, max_chars: int = 12000) -> str:
447
+ """A compact, ignore-aware STRUCTURAL MAP of the project (SENSORY CORTEX — the derived-view tier-B
448
+ resident view, memoized for the session, never a persisted store): directories with their files,
449
+ pruned of VCS/venv/cache + asset/log noise, RANKED by source-density so the real code tree shows
450
+ first and never gets starved by asset/log dirs. This is what
451
+ kills cold-start — a 'review/understand the repo' task sees the structure RESIDENT instead of re-
452
+ listing with find. Built ONCE per session (stable → prompt-cache warm); new files created mid-task
453
+ surface via the LIVE worktree region. Over budget, late dirs collapse to a count; `max_chars` is a
454
+ HARD ceiling on the output (ranked tail dropped) so a huge tree can't blow the context window.
455
+ '' if root is unusable; never raises."""
456
+ if not root or not os.path.isdir(root):
457
+ return ""
458
+ rows: list[tuple[str, list[str], int, int]] = [] # (rel, files, total, code_count)
459
+ try:
460
+ for dirpath, dirnames, filenames in os.walk(root): # symlinks not followed
461
+ dirnames[:] = sorted(d for d in dirnames if not _is_ignored(d))
462
+ files = sorted(f for f in filenames
463
+ if not _is_ignored(f) and not f.endswith(_MAP_SKIP_SUFFIX))
464
+ if not files:
465
+ continue
466
+ rel = os.path.relpath(dirpath, root)
467
+ code_count = sum(1 for f in files if f.endswith(_CODE_SUFFIX))
468
+ rows.append((rel, files, len(files), code_count))
469
+ except OSError:
470
+ return ""
471
+ if not rows:
472
+ return ""
473
+ # rank source-dense dirs first (so src/ beats docs/ assets), ties broken by path for stability
474
+ rows.sort(key=lambda r: (-r[3], r[0]))
475
+ lines, shown = [], 0
476
+ for rel, files, total, _code in rows:
477
+ prefix = "./" if rel == "." else rel + "/"
478
+ if shown < max_entries: # detailed: list files (per-dir capped)
479
+ take = files[:max_per_dir]
480
+ shown += len(take)
481
+ extra = f" (+{total - len(take)} more)" if total > len(take) else ""
482
+ lines.append(f"{prefix} — {', '.join(take)}{extra}")
483
+ else: # over budget: keep the dir, collapse to a count
484
+ lines.append(f"{prefix} — ({total} files)")
485
+ out = "\n".join(lines)
486
+ if len(out) <= max_chars:
487
+ return out
488
+ # HARD char ceiling: rows are ranked source-dense-first, so keep the prefix that fits and drop the
489
+ # tail to a count. Without this a giant tree (or a session launched in a bare HOME) produces a
490
+ # multi-10k-token map that overflows the window on the very first turn.
491
+ kept, used = [], 0
492
+ for ln in lines:
493
+ if used + len(ln) + 1 > max_chars:
494
+ break
495
+ kept.append(ln)
496
+ used += len(ln) + 1
497
+ dropped = len(lines) - len(kept)
498
+ kept.append(f"… (+{dropped} more director{'y' if dropped == 1 else 'ies'} — over map budget; "
499
+ "use list_files to drill in)")
500
+ return "\n".join(kept)