loom-code 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loom_code/__init__.py +22 -0
- loom_code/_post_commit.py +119 -0
- loom_code/agent.py +544 -0
- loom_code/approval.py +616 -0
- loom_code/browse/__init__.py +291 -0
- loom_code/browse/act.py +467 -0
- loom_code/browse/observe.py +249 -0
- loom_code/browse/session.py +96 -0
- loom_code/browse/verify.py +194 -0
- loom_code/checkpoint.py +283 -0
- loom_code/cli.py +495 -0
- loom_code/code_index.py +703 -0
- loom_code/compact.py +143 -0
- loom_code/consent.py +47 -0
- loom_code/credentials.py +527 -0
- loom_code/edit_tool.py +635 -0
- loom_code/extensions.py +522 -0
- loom_code/file_history.py +322 -0
- loom_code/file_tools.py +93 -0
- loom_code/git_hook.py +200 -0
- loom_code/grep_tool.py +430 -0
- loom_code/hooks.py +297 -0
- loom_code/loominit/__init__.py +23 -0
- loom_code/loominit/_ast_walk.py +429 -0
- loom_code/loominit/_files.py +284 -0
- loom_code/loominit/_graph.py +141 -0
- loom_code/loominit/_resolve.py +392 -0
- loom_code/loominit/_tests_map.py +108 -0
- loom_code/loominit/extractor.py +332 -0
- loom_code/loominit/repomap.py +225 -0
- loom_code/loominit/schema.py +242 -0
- loom_code/lsp_tools.py +396 -0
- loom_code/mcp_host.py +79 -0
- loom_code/operator.py +449 -0
- loom_code/paste.py +97 -0
- loom_code/paths.py +52 -0
- loom_code/permissions.py +177 -0
- loom_code/project.py +104 -0
- loom_code/prompts.py +451 -0
- loom_code/render.py +783 -0
- loom_code/repl.py +4080 -0
- loom_code/rules.py +267 -0
- loom_code/sandboxed_bash.py +176 -0
- loom_code/scribe.py +88 -0
- loom_code/skills/__init__.py +16 -0
- loom_code/skills/graphify/SKILL.md +97 -0
- loom_code/skills/graphify/tools.py +570 -0
- loom_code/trust.py +216 -0
- loom_code/turn.py +169 -0
- loom_code/web_fetch.py +370 -0
- loom_code/workers.py +758 -0
- loom_code/worktree.py +134 -0
- loom_code-0.1.1.dist-info/METADATA +224 -0
- loom_code-0.1.1.dist-info/RECORD +58 -0
- loom_code-0.1.1.dist-info/WHEEL +5 -0
- loom_code-0.1.1.dist-info/entry_points.txt +2 -0
- loom_code-0.1.1.dist-info/licenses/LICENSE +21 -0
- loom_code-0.1.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
"""Top-level orchestrator — ``build_index(repo_root) -> LoomIndex``.
|
|
2
|
+
|
|
3
|
+
This is the only public entry point for slice 1. It composes every
|
|
4
|
+
private helper in the package into one deterministic pipeline:
|
|
5
|
+
|
|
6
|
+
1. :func:`_files.discover_files` — walk the repo, hash every file,
|
|
7
|
+
capture mtime + git heat + ``is_test`` flag.
|
|
8
|
+
2. :func:`_ast_walk.walk_python_file` — per-file AST extraction
|
|
9
|
+
producing ``(_RawSymbol, _RawImport, _RawDecorator)`` triples.
|
|
10
|
+
3. :func:`_resolve.build_module_index` + :func:`resolve_imports` —
|
|
11
|
+
turn dotted module names into rel-path edges.
|
|
12
|
+
4. :func:`_resolve.detect_api_surface` — flag files reachable from
|
|
13
|
+
``__init__.py`` re-exports / ``__all__``.
|
|
14
|
+
5. :func:`_graph.pagerank_file_graph` — file-level centrality from
|
|
15
|
+
the resolved import graph.
|
|
16
|
+
6. :func:`_tests_map.build_test_map` — grep test files for each
|
|
17
|
+
symbol's bare name, emit citations.
|
|
18
|
+
7. :func:`_resolve.extract_entry_points` — pyproject scripts,
|
|
19
|
+
``__main__`` blocks, landmark-decorated callables.
|
|
20
|
+
8. :func:`_graph.cluster_by_path_prefix` — group files into
|
|
21
|
+
subsystem clusters with hash buckets for diff-aware refresh.
|
|
22
|
+
|
|
23
|
+
The result is a :class:`schema.LoomIndex`, consumed in-memory by
|
|
24
|
+
:mod:`repomap` to render the per-turn repo map. No LLM calls anywhere
|
|
25
|
+
here — the structural pass is deterministic by design.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import hashlib
|
|
31
|
+
from collections.abc import Iterable
|
|
32
|
+
from datetime import UTC, datetime
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
|
|
35
|
+
from ._ast_walk import _RawDecorator, _RawSymbol, walk_python_file
|
|
36
|
+
from ._files import DiscoveredFile, discover_files, is_git_repo
|
|
37
|
+
from ._graph import cluster_by_path_prefix, pagerank_file_graph
|
|
38
|
+
from ._resolve import (
|
|
39
|
+
build_module_index,
|
|
40
|
+
detect_api_surface,
|
|
41
|
+
extract_entry_points,
|
|
42
|
+
resolve_imports,
|
|
43
|
+
)
|
|
44
|
+
from ._tests_map import build_test_map
|
|
45
|
+
from .schema import (
|
|
46
|
+
CallEdge,
|
|
47
|
+
Cluster,
|
|
48
|
+
DecoratorLandmark,
|
|
49
|
+
FileEntry,
|
|
50
|
+
LoomIndex,
|
|
51
|
+
SymbolEntry,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def build_index(repo_root: Path) -> LoomIndex:
|
|
56
|
+
"""Run the full structural extraction pass and return the index.
|
|
57
|
+
|
|
58
|
+
Side effect: NONE — returns an in-memory :class:`LoomIndex` that
|
|
59
|
+
:mod:`repomap` renders into the per-turn repo map. Keeping it
|
|
60
|
+
I/O-free makes the pipeline trivially testable against in-memory
|
|
61
|
+
fixtures.
|
|
62
|
+
"""
|
|
63
|
+
files = discover_files(repo_root)
|
|
64
|
+
py_files = [f for f in files if f.lang == "python"]
|
|
65
|
+
|
|
66
|
+
# ---- 1. Walk every Python file ------------------------------------
|
|
67
|
+
per_file = _walk_all(py_files)
|
|
68
|
+
|
|
69
|
+
# ---- 2. Module-name resolution / API surface ----------------------
|
|
70
|
+
module_index = build_module_index(files)
|
|
71
|
+
api_surface = detect_api_surface(files, module_index)
|
|
72
|
+
|
|
73
|
+
# ---- 3. Aggregate symbols + assign file-level PageRank ------------
|
|
74
|
+
raw_imports_by_file: dict[str, list[tuple[str, int, int]]] = {
|
|
75
|
+
path: [(imp.to_module, imp.line, imp.level) for imp in imps]
|
|
76
|
+
for path, (_syms, imps, _decs) in per_file.items()
|
|
77
|
+
}
|
|
78
|
+
import_edges = resolve_imports(raw_imports_by_file, module_index)
|
|
79
|
+
|
|
80
|
+
# File-level PageRank: edges restricted to resolved (in-repo) ones.
|
|
81
|
+
edges_for_pagerank = _pagerank_edges(
|
|
82
|
+
import_edges, py_files, module_index, raw_imports_by_file
|
|
83
|
+
)
|
|
84
|
+
file_scores = pagerank_file_graph(
|
|
85
|
+
files=[f.rel_path for f in py_files],
|
|
86
|
+
edges=edges_for_pagerank,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# n_callers / n_callees per file (resolved edges only).
|
|
90
|
+
in_degree, out_degree = _degree_maps(edges_for_pagerank)
|
|
91
|
+
|
|
92
|
+
# ---- 4. Test→symbol map -------------------------------------------
|
|
93
|
+
all_symbol_names = {
|
|
94
|
+
sym.name
|
|
95
|
+
for path, (syms, _imps, _decs) in per_file.items()
|
|
96
|
+
for sym in syms
|
|
97
|
+
if sym.is_public
|
|
98
|
+
}
|
|
99
|
+
test_map = build_test_map(files=files, symbol_names=all_symbol_names)
|
|
100
|
+
|
|
101
|
+
# ---- 5. Build schema.FileEntry list -------------------------------
|
|
102
|
+
file_entries = [
|
|
103
|
+
FileEntry(
|
|
104
|
+
path=f.rel_path,
|
|
105
|
+
lang=f.lang,
|
|
106
|
+
size_bytes=f.size_bytes,
|
|
107
|
+
lines=f.lines,
|
|
108
|
+
sha256=f.sha256,
|
|
109
|
+
mtime=f.mtime,
|
|
110
|
+
git_changes_90d=f.git_changes_90d,
|
|
111
|
+
is_test=f.is_test,
|
|
112
|
+
in_api_surface=f.rel_path in api_surface,
|
|
113
|
+
)
|
|
114
|
+
for f in files
|
|
115
|
+
]
|
|
116
|
+
|
|
117
|
+
# ---- 6. Build schema.SymbolEntry list -----------------------------
|
|
118
|
+
symbol_entries: list[SymbolEntry] = []
|
|
119
|
+
for path, (raw_syms, _imps, _decs) in per_file.items():
|
|
120
|
+
file_pr = file_scores.get(path, 0.0)
|
|
121
|
+
file_in = in_degree.get(path, 0)
|
|
122
|
+
file_out = out_degree.get(path, 0)
|
|
123
|
+
for raw in raw_syms:
|
|
124
|
+
sym_id = f"{path}:{raw.qualified_name}"
|
|
125
|
+
symbol_entries.append(
|
|
126
|
+
SymbolEntry(
|
|
127
|
+
id=sym_id,
|
|
128
|
+
name=raw.name,
|
|
129
|
+
qualified_name=raw.qualified_name,
|
|
130
|
+
kind=raw.kind,
|
|
131
|
+
path=path,
|
|
132
|
+
line=raw.line,
|
|
133
|
+
end_line=raw.end_line,
|
|
134
|
+
signature=raw.signature,
|
|
135
|
+
docstring_first_line=raw.docstring_first_line,
|
|
136
|
+
decorators=list(raw.decorators),
|
|
137
|
+
is_public=raw.is_public,
|
|
138
|
+
in_api_surface=(path in api_surface) and raw.is_public,
|
|
139
|
+
# File-level for now; symbol-level requires call graph
|
|
140
|
+
pagerank=file_pr,
|
|
141
|
+
n_callers=file_in,
|
|
142
|
+
n_callees=file_out,
|
|
143
|
+
tests=test_map.get(raw.name, []) if raw.is_public else [],
|
|
144
|
+
)
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# ---- 7. Decorator landmarks --------------------------------------
|
|
148
|
+
decorator_entries: list[DecoratorLandmark] = []
|
|
149
|
+
decorator_path_lookup: dict[_RawDecorator, str] = {}
|
|
150
|
+
for path, (_syms, _imps, raw_decs) in per_file.items():
|
|
151
|
+
for raw in raw_decs:
|
|
152
|
+
decorator_path_lookup[raw] = path
|
|
153
|
+
decorator_entries.append(
|
|
154
|
+
DecoratorLandmark(
|
|
155
|
+
decorator=raw.decorator,
|
|
156
|
+
target=f"{path}:{raw.target_qualname}",
|
|
157
|
+
path=path,
|
|
158
|
+
line=raw.line,
|
|
159
|
+
)
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# ---- 8. Entry points ---------------------------------------------
|
|
163
|
+
all_decorators = [
|
|
164
|
+
raw
|
|
165
|
+
for _path, (_syms, _imps, raw_decs) in per_file.items()
|
|
166
|
+
for raw in raw_decs
|
|
167
|
+
]
|
|
168
|
+
entry_points = extract_entry_points(
|
|
169
|
+
repo_root=repo_root,
|
|
170
|
+
files=files,
|
|
171
|
+
decorators=all_decorators,
|
|
172
|
+
decorator_path_lookup=decorator_path_lookup,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# ---- 9. Clusters --------------------------------------------------
|
|
176
|
+
cluster_entries = _build_clusters(file_entries, py_files)
|
|
177
|
+
|
|
178
|
+
git_commit = _current_git_commit(repo_root)
|
|
179
|
+
|
|
180
|
+
return LoomIndex(
|
|
181
|
+
generated_at=datetime.now(UTC),
|
|
182
|
+
repo_root=str(repo_root.resolve()),
|
|
183
|
+
git_commit=git_commit,
|
|
184
|
+
files=file_entries,
|
|
185
|
+
symbols=symbol_entries,
|
|
186
|
+
imports=import_edges,
|
|
187
|
+
# Call graph stays empty in v1 — see _ast_walk's design note.
|
|
188
|
+
# Schema accommodates future call-graph extraction so we
|
|
189
|
+
# don't need a schema bump when we add it.
|
|
190
|
+
calls=list[CallEdge](),
|
|
191
|
+
decorators=decorator_entries,
|
|
192
|
+
entry_points=entry_points,
|
|
193
|
+
clusters=cluster_entries,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# ---------------------------------------------------------------------------
|
|
198
|
+
# Internals
|
|
199
|
+
# ---------------------------------------------------------------------------
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _walk_all(
|
|
203
|
+
py_files: list[DiscoveredFile],
|
|
204
|
+
) -> dict[
|
|
205
|
+
str,
|
|
206
|
+
tuple[
|
|
207
|
+
list[_RawSymbol],
|
|
208
|
+
list, # _RawImport
|
|
209
|
+
list[_RawDecorator],
|
|
210
|
+
],
|
|
211
|
+
]:
|
|
212
|
+
"""Parse every Python file in parallel-friendly form (sequential
|
|
213
|
+
for now; can wrap in ``anyio.to_thread.run_sync`` later if
|
|
214
|
+
profiling shows it matters)."""
|
|
215
|
+
out = {}
|
|
216
|
+
for f in py_files:
|
|
217
|
+
try:
|
|
218
|
+
text = f.abs_path.read_text(encoding="utf-8")
|
|
219
|
+
except OSError:
|
|
220
|
+
continue
|
|
221
|
+
syms, imps, decs = walk_python_file(text, f.rel_path)
|
|
222
|
+
out[f.rel_path] = (syms, imps, decs)
|
|
223
|
+
return out
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _pagerank_edges(
|
|
227
|
+
edges: Iterable, # ImportEdge
|
|
228
|
+
py_files: list[DiscoveredFile],
|
|
229
|
+
module_index,
|
|
230
|
+
raw_imports_by_file: dict[str, list[tuple[str, int, int]]],
|
|
231
|
+
) -> list[tuple[str, str]]:
|
|
232
|
+
"""Convert :class:`ImportEdge` records into ``(from_path,
|
|
233
|
+
to_path)`` pairs suitable for :func:`_graph.pagerank_file_graph`.
|
|
234
|
+
|
|
235
|
+
We can't read ``to_path`` straight off :class:`ImportEdge` (the
|
|
236
|
+
schema stores the display module name, not the resolved file).
|
|
237
|
+
So we re-resolve here using the same machinery the edges came
|
|
238
|
+
from. Cheap and keeps the schema clean.
|
|
239
|
+
"""
|
|
240
|
+
from ._resolve import resolve_import
|
|
241
|
+
|
|
242
|
+
py_paths = {f.rel_path for f in py_files}
|
|
243
|
+
pairs: list[tuple[str, str]] = []
|
|
244
|
+
for from_path, items in raw_imports_by_file.items():
|
|
245
|
+
if from_path not in py_paths:
|
|
246
|
+
continue
|
|
247
|
+
for to_module, _line, level in items:
|
|
248
|
+
target = resolve_import(
|
|
249
|
+
from_file=from_path,
|
|
250
|
+
to_module=to_module,
|
|
251
|
+
level=level,
|
|
252
|
+
module_index=module_index,
|
|
253
|
+
)
|
|
254
|
+
if target is not None and target in py_paths:
|
|
255
|
+
pairs.append((from_path, target))
|
|
256
|
+
return pairs
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _degree_maps(
|
|
260
|
+
edges: list[tuple[str, str]],
|
|
261
|
+
) -> tuple[dict[str, int], dict[str, int]]:
|
|
262
|
+
"""``in_degree[file]`` = number of files importing it.
|
|
263
|
+
``out_degree[file]`` = number of files it imports.
|
|
264
|
+
|
|
265
|
+
Both are used for the SymbolEntry's ``n_callers`` / ``n_callees``
|
|
266
|
+
fields as file-level approximations until call-graph extraction
|
|
267
|
+
lands."""
|
|
268
|
+
in_deg: dict[str, int] = {}
|
|
269
|
+
out_deg: dict[str, int] = {}
|
|
270
|
+
for src, dst in edges:
|
|
271
|
+
out_deg[src] = out_deg.get(src, 0) + 1
|
|
272
|
+
in_deg[dst] = in_deg.get(dst, 0) + 1
|
|
273
|
+
return in_deg, out_deg
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _build_clusters(
|
|
277
|
+
files: list[FileEntry], py_files: list[DiscoveredFile]
|
|
278
|
+
) -> list[Cluster]:
|
|
279
|
+
"""Group Python files into subsystem clusters via path prefix,
|
|
280
|
+
compute centroid symbols (highest-PageRank file → its symbols)
|
|
281
|
+
and hash buckets per cluster."""
|
|
282
|
+
py_paths = [f.rel_path for f in py_files]
|
|
283
|
+
groups = cluster_by_path_prefix(py_paths)
|
|
284
|
+
|
|
285
|
+
# Map files -> entries for hash lookup.
|
|
286
|
+
by_path = {f.path: f for f in files}
|
|
287
|
+
|
|
288
|
+
clusters: list[Cluster] = []
|
|
289
|
+
for cluster_id, paths in groups.items():
|
|
290
|
+
hashes = sorted(
|
|
291
|
+
by_path[p].sha256 for p in paths if p in by_path
|
|
292
|
+
)
|
|
293
|
+
bucket = hashlib.sha256(
|
|
294
|
+
"\n".join(hashes).encode("utf-8")
|
|
295
|
+
).hexdigest()[:16]
|
|
296
|
+
title = cluster_id.replace("/", "/").rstrip("/") or "root"
|
|
297
|
+
clusters.append(
|
|
298
|
+
Cluster(
|
|
299
|
+
id=cluster_id.replace("/", ".") or "root",
|
|
300
|
+
title=title,
|
|
301
|
+
paths=list(paths),
|
|
302
|
+
centroid_symbols=[], # populated in annotator pass
|
|
303
|
+
centrality=0.0, # populated alongside centroids
|
|
304
|
+
hash_bucket=bucket,
|
|
305
|
+
)
|
|
306
|
+
)
|
|
307
|
+
return sorted(clusters, key=lambda c: c.id)
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def _current_git_commit(repo_root: Path) -> str | None:
|
|
311
|
+
"""``git rev-parse HEAD`` — short-form (12 chars) commit hash.
|
|
312
|
+
Useful for diff-aware refresh: "the index was built against
|
|
313
|
+
commit X; you're on commit Y; here's what files changed"."""
|
|
314
|
+
if not is_git_repo(repo_root):
|
|
315
|
+
return None
|
|
316
|
+
import subprocess
|
|
317
|
+
|
|
318
|
+
try:
|
|
319
|
+
proc = subprocess.run(
|
|
320
|
+
["git", "rev-parse", "--short=12", "HEAD"],
|
|
321
|
+
cwd=str(repo_root),
|
|
322
|
+
capture_output=True,
|
|
323
|
+
text=True,
|
|
324
|
+
check=False,
|
|
325
|
+
timeout=5,
|
|
326
|
+
)
|
|
327
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
328
|
+
return None
|
|
329
|
+
if proc.returncode != 0:
|
|
330
|
+
return None
|
|
331
|
+
out = proc.stdout.strip()
|
|
332
|
+
return out or None
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
"""Deterministic, LLM-free repo map — an Aider-style ranked symbol
|
|
2
|
+
overview.
|
|
3
|
+
|
|
4
|
+
Why this exists: the older LOOM.md path injected BM25-ranked *LLM
|
|
5
|
+
narrative* sections, which (a) drifted from the code the moment the
|
|
6
|
+
agent edited it and (b) was a lossy paraphrase, not the code. This
|
|
7
|
+
renderer instead consumes the **structural** index (``build_index`` —
|
|
8
|
+
AST walk, no model calls) and emits the most structurally-important
|
|
9
|
+
symbols — real signatures + locations — within a token budget. It is:
|
|
10
|
+
|
|
11
|
+
* **deterministic** — same index in, same map out (stable tiebreaks);
|
|
12
|
+
* **fresh-by-construction** — re-running ``build_index`` reflects the
|
|
13
|
+
current tree, with zero LLM cost (unlike re-annotation);
|
|
14
|
+
* **cache-friendly** — the map is a stable global overview, so it
|
|
15
|
+
doesn't churn the system prompt per turn the way BM25 retrieval did.
|
|
16
|
+
|
|
17
|
+
Ranking heuristic (Aider's core insight: "a symbol referenced by many
|
|
18
|
+
others is more valuable context than a private helper called once"):
|
|
19
|
+
``n_callers`` (how widely the symbol's file is imported — already
|
|
20
|
+
computed by the extractor) plus bonuses for entry points, public
|
|
21
|
+
surface, and classes (architecture lands first).
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
from .schema import LoomIndex, SymbolEntry
|
|
29
|
+
|
|
30
|
+
# Rough chars→tokens ratio for budgeting (English/code ≈ 4 chars/token).
|
|
31
|
+
_CHARS_PER_TOKEN = 4
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
_DEF_PREFIXES = ("class ", "def ", "async def ")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _score(sym: SymbolEntry, entry_point_paths: set[str]) -> float:
|
|
38
|
+
"""Structural importance of a symbol. Higher = surfaced sooner."""
|
|
39
|
+
score = float(sym.n_callers) * 2.0 # import popularity (the core signal)
|
|
40
|
+
if not sym.name.startswith("_"):
|
|
41
|
+
score += 1.0 # public surface beats private helpers
|
|
42
|
+
if sym.signature.startswith("class "):
|
|
43
|
+
score += 2.0 # classes carry architecture
|
|
44
|
+
elif not sym.signature.startswith(_DEF_PREFIXES):
|
|
45
|
+
score -= 3.0 # bare module constants are noise in an overview
|
|
46
|
+
if sym.path in entry_point_paths:
|
|
47
|
+
score += 6.0 # CLI / route / main entry points orient fast
|
|
48
|
+
return score
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def build_repo_map(
|
|
52
|
+
index: LoomIndex,
|
|
53
|
+
*,
|
|
54
|
+
max_tokens: int = 1500,
|
|
55
|
+
max_per_file: int = 8,
|
|
56
|
+
) -> str:
|
|
57
|
+
"""Render the top symbols by structural importance, grouped by
|
|
58
|
+
file, within ``max_tokens`` (best-effort char estimate). Files are
|
|
59
|
+
ordered by their most-important symbol; within a file, symbols are
|
|
60
|
+
ordered by score then line. Deterministic."""
|
|
61
|
+
budget = max_tokens * _CHARS_PER_TOKEN
|
|
62
|
+
ep_paths = {ep.path for ep in index.entry_points if ep.path}
|
|
63
|
+
|
|
64
|
+
by_file: dict[str, list[SymbolEntry]] = {}
|
|
65
|
+
for sym in index.symbols:
|
|
66
|
+
by_file.setdefault(sym.path, []).append(sym)
|
|
67
|
+
for syms in by_file.values():
|
|
68
|
+
syms.sort(key=lambda s: (-_score(s, ep_paths), s.line))
|
|
69
|
+
|
|
70
|
+
def file_rank(path: str) -> float:
|
|
71
|
+
return max((_score(s, ep_paths) for s in by_file[path]), default=0.0)
|
|
72
|
+
|
|
73
|
+
file_order = sorted(by_file, key=lambda p: (-file_rank(p), p))
|
|
74
|
+
|
|
75
|
+
head = "# Repo map — top symbols by structural importance\n"
|
|
76
|
+
out: list[str] = [head]
|
|
77
|
+
used = len(head)
|
|
78
|
+
for path in file_order:
|
|
79
|
+
header = f"\n## {path}\n"
|
|
80
|
+
if used + len(header) > budget:
|
|
81
|
+
break
|
|
82
|
+
block: list[str] = [header]
|
|
83
|
+
blen = len(header)
|
|
84
|
+
for sym in by_file[path][:max_per_file]:
|
|
85
|
+
doc = (
|
|
86
|
+
f" — {sym.docstring_first_line}"
|
|
87
|
+
if sym.docstring_first_line
|
|
88
|
+
else ""
|
|
89
|
+
)
|
|
90
|
+
entry = f"- `{sym.signature}` ({sym.path}:{sym.line}){doc}\n"
|
|
91
|
+
if used + blen + len(entry) > budget:
|
|
92
|
+
break
|
|
93
|
+
block.append(entry)
|
|
94
|
+
blen += len(entry)
|
|
95
|
+
# Only emit the file header if at least one symbol fit under it.
|
|
96
|
+
if blen > len(header):
|
|
97
|
+
out.extend(block)
|
|
98
|
+
used += blen
|
|
99
|
+
return "".join(out)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def repo_map_for_root(
|
|
103
|
+
root: Path | str, *, max_tokens: int = 1500
|
|
104
|
+
) -> str | None:
|
|
105
|
+
"""Convenience: build the structural index for ``root`` (no LLM)
|
|
106
|
+
and render the repo map. Returns ``None`` when the repo has no
|
|
107
|
+
indexable symbols. Caller decides caching/freshness policy."""
|
|
108
|
+
from .extractor import build_index
|
|
109
|
+
|
|
110
|
+
index = build_index(Path(root))
|
|
111
|
+
if not index.symbols:
|
|
112
|
+
return None
|
|
113
|
+
return build_repo_map(index, max_tokens=max_tokens)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# Dirs the freshness signature ignores — build_index skips them too, so
|
|
117
|
+
# walking them would only add cost + false cache misses.
|
|
118
|
+
_SKIP_DIRS = frozenset(
|
|
119
|
+
{
|
|
120
|
+
".git",
|
|
121
|
+
".loom",
|
|
122
|
+
".loom-code",
|
|
123
|
+
"node_modules",
|
|
124
|
+
".venv",
|
|
125
|
+
"venv",
|
|
126
|
+
"__pycache__",
|
|
127
|
+
".mypy_cache",
|
|
128
|
+
".ruff_cache",
|
|
129
|
+
"dist",
|
|
130
|
+
"build",
|
|
131
|
+
".tox",
|
|
132
|
+
}
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Cache: root -> (signature, rendered map). The signature is cheap to
|
|
136
|
+
# recompute; the expensive AST walk only re-runs when it changes.
|
|
137
|
+
_REPO_MAP_CACHE: dict[str, tuple[tuple[float, int], str | None]] = {}
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _tree_signature(root: Path) -> tuple[float, int]:
|
|
141
|
+
"""A cheap freshness key: (newest .py mtime, file count). Either
|
|
142
|
+
moving means the tree changed → rebuild. Count catches add/delete
|
|
143
|
+
that don't bump the newest mtime."""
|
|
144
|
+
newest = 0.0
|
|
145
|
+
count = 0
|
|
146
|
+
for p in root.rglob("*.py"):
|
|
147
|
+
if any(part in _SKIP_DIRS for part in p.parts):
|
|
148
|
+
continue
|
|
149
|
+
try:
|
|
150
|
+
newest = max(newest, p.stat().st_mtime)
|
|
151
|
+
count += 1
|
|
152
|
+
except OSError:
|
|
153
|
+
continue
|
|
154
|
+
return (newest, count)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _repo_map_cached(
|
|
158
|
+
root: Path | str, max_tokens: int
|
|
159
|
+
) -> tuple[str | None, bool]:
|
|
160
|
+
"""Core cached build. Returns ``(map, rebuilt)`` — ``rebuilt`` is
|
|
161
|
+
True when the source tree changed and we re-walked, False on a
|
|
162
|
+
cache hit. Freshness-keyed (newest .py mtime + file count), so it
|
|
163
|
+
only re-parses when something actually moved. Mirrors the map to
|
|
164
|
+
``<root>/.loom/repomap.md`` on every rebuild for inspection."""
|
|
165
|
+
root_p = Path(root).resolve()
|
|
166
|
+
key = str(root_p)
|
|
167
|
+
sig = _tree_signature(root_p)
|
|
168
|
+
cached = _REPO_MAP_CACHE.get(key)
|
|
169
|
+
if cached is not None and cached[0] == sig:
|
|
170
|
+
return cached[1], False
|
|
171
|
+
rendered = repo_map_for_root(root_p, max_tokens=max_tokens)
|
|
172
|
+
_REPO_MAP_CACHE[key] = (sig, rendered)
|
|
173
|
+
_write_repomap_file(root_p, rendered)
|
|
174
|
+
return rendered, True
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def repo_map_for_root_cached(
|
|
178
|
+
root: Path | str, *, max_tokens: int = 1500
|
|
179
|
+
) -> str | None:
|
|
180
|
+
"""Cached repo map (the "fresh-by-construction" path). Safe to call
|
|
181
|
+
every turn — re-walks only when the source tree changed. The agent
|
|
182
|
+
reads the map from its memory block; ``.loom/repomap.md`` mirrors it
|
|
183
|
+
for inspection."""
|
|
184
|
+
return _repo_map_cached(root, max_tokens)[0]
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def repo_map_meta_for_root_cached(
|
|
188
|
+
root: Path | str, *, max_tokens: int = 1500
|
|
189
|
+
) -> dict[str, object]:
|
|
190
|
+
"""Like :func:`repo_map_for_root_cached` but returns the map plus
|
|
191
|
+
metadata for UI surfaces: ``{map, rebuilt, symbols, chars}``.
|
|
192
|
+
``rebuilt`` distinguishes a fresh re-index (tree changed) from a
|
|
193
|
+
cache hit, so the desktop can show "re-indexed" vs "cached"."""
|
|
194
|
+
body, rebuilt = _repo_map_cached(root, max_tokens)
|
|
195
|
+
text = body or ""
|
|
196
|
+
return {
|
|
197
|
+
"map": text,
|
|
198
|
+
"rebuilt": rebuilt,
|
|
199
|
+
"symbols": text.count("\n- `"),
|
|
200
|
+
"chars": len(text),
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _write_repomap_file(root: Path, rendered: str | None) -> None:
|
|
205
|
+
"""Mirror the injected repo map to ``<root>/.loom/repomap.md`` so a
|
|
206
|
+
human can see what the agent receives. Overwritten on every rebuild;
|
|
207
|
+
best-effort (a write failure never affects the returned map). The
|
|
208
|
+
agent reads the map from its memory block, not this file — it's
|
|
209
|
+
inspection-only. ``.loom/`` is gitignored, so this isn't committed."""
|
|
210
|
+
if not rendered:
|
|
211
|
+
return
|
|
212
|
+
try:
|
|
213
|
+
loom_dir = root / ".loom"
|
|
214
|
+
loom_dir.mkdir(parents=True, exist_ok=True)
|
|
215
|
+
note = (
|
|
216
|
+
"<!-- Auto-generated by loom-code. Rebuilt whenever the "
|
|
217
|
+
"source tree changes; this is the repo map injected into the "
|
|
218
|
+
"agent's system prompt each turn (the `loom_index` block). "
|
|
219
|
+
"Inspection-only — do not edit, it is overwritten. -->\n\n"
|
|
220
|
+
)
|
|
221
|
+
(loom_dir / "repomap.md").write_text(
|
|
222
|
+
note + rendered, encoding="utf-8"
|
|
223
|
+
)
|
|
224
|
+
except OSError:
|
|
225
|
+
pass
|