loom-code 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. loom_code/__init__.py +22 -0
  2. loom_code/_post_commit.py +119 -0
  3. loom_code/agent.py +544 -0
  4. loom_code/approval.py +616 -0
  5. loom_code/browse/__init__.py +291 -0
  6. loom_code/browse/act.py +467 -0
  7. loom_code/browse/observe.py +249 -0
  8. loom_code/browse/session.py +96 -0
  9. loom_code/browse/verify.py +194 -0
  10. loom_code/checkpoint.py +283 -0
  11. loom_code/cli.py +495 -0
  12. loom_code/code_index.py +703 -0
  13. loom_code/compact.py +143 -0
  14. loom_code/consent.py +47 -0
  15. loom_code/credentials.py +527 -0
  16. loom_code/edit_tool.py +635 -0
  17. loom_code/extensions.py +522 -0
  18. loom_code/file_history.py +322 -0
  19. loom_code/file_tools.py +93 -0
  20. loom_code/git_hook.py +200 -0
  21. loom_code/grep_tool.py +430 -0
  22. loom_code/hooks.py +297 -0
  23. loom_code/loominit/__init__.py +23 -0
  24. loom_code/loominit/_ast_walk.py +429 -0
  25. loom_code/loominit/_files.py +284 -0
  26. loom_code/loominit/_graph.py +141 -0
  27. loom_code/loominit/_resolve.py +392 -0
  28. loom_code/loominit/_tests_map.py +108 -0
  29. loom_code/loominit/extractor.py +332 -0
  30. loom_code/loominit/repomap.py +225 -0
  31. loom_code/loominit/schema.py +242 -0
  32. loom_code/lsp_tools.py +396 -0
  33. loom_code/mcp_host.py +79 -0
  34. loom_code/operator.py +449 -0
  35. loom_code/paste.py +97 -0
  36. loom_code/paths.py +52 -0
  37. loom_code/permissions.py +177 -0
  38. loom_code/project.py +104 -0
  39. loom_code/prompts.py +451 -0
  40. loom_code/render.py +783 -0
  41. loom_code/repl.py +4080 -0
  42. loom_code/rules.py +267 -0
  43. loom_code/sandboxed_bash.py +176 -0
  44. loom_code/scribe.py +88 -0
  45. loom_code/skills/__init__.py +16 -0
  46. loom_code/skills/graphify/SKILL.md +97 -0
  47. loom_code/skills/graphify/tools.py +570 -0
  48. loom_code/trust.py +216 -0
  49. loom_code/turn.py +169 -0
  50. loom_code/web_fetch.py +370 -0
  51. loom_code/workers.py +758 -0
  52. loom_code/worktree.py +134 -0
  53. loom_code-0.1.1.dist-info/METADATA +224 -0
  54. loom_code-0.1.1.dist-info/RECORD +58 -0
  55. loom_code-0.1.1.dist-info/WHEEL +5 -0
  56. loom_code-0.1.1.dist-info/entry_points.txt +2 -0
  57. loom_code-0.1.1.dist-info/licenses/LICENSE +21 -0
  58. loom_code-0.1.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,284 @@
1
+ """File discovery + per-file metadata (hash, lang, git heat).
2
+
3
+ The structural extractor calls :func:`discover_files` once at the
4
+ start of indexing; everything downstream uses the returned
5
+ :class:`DiscoveredFile` list as the canonical "what files exist".
6
+
7
+ Discovery strategy:
8
+
9
+ * **In a git repo** — use ``git ls-files --cached --others
10
+ --exclude-standard``. This respects ``.gitignore`` for free, which
11
+ is the *only* reliable way to skip a project's actual ignore set
12
+ (venvs, build outputs, generated code). The alternative — re-
13
+ implementing gitignore semantics — is a tar pit.
14
+ * **No git** — walk the tree, skip a hard-coded set of well-known
15
+ noise directories (``.venv``, ``node_modules``, ``__pycache__``,
16
+ etc.). Less accurate but covers the "loose folder" case.
17
+
18
+ Git heat (commits touching a file in the last 90 days) comes from
19
+ ``git log --since=90.days --name-only`` parsed once at discovery
20
+ time. If the repo is huge, this is the most expensive step in
21
+ discovery — still O(seconds). Cached in the returned dataclass so
22
+ no other module needs to re-run git.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import hashlib
28
+ import subprocess
29
+ from dataclasses import dataclass
30
+ from datetime import datetime
31
+ from pathlib import Path
32
+ from typing import Literal
33
+
34
+ # Hard-coded noise directories for the non-git walker. Add only
35
+ # things that are universally noise — when in doubt, leave it in,
36
+ # the user is in a git repo 99% of the time and these matter only
37
+ # for the fallback path.
38
+ _NOISE_DIRS: frozenset[str] = frozenset(
39
+ {
40
+ ".git",
41
+ ".hg",
42
+ ".svn",
43
+ ".venv",
44
+ "venv",
45
+ "env",
46
+ "__pycache__",
47
+ "node_modules",
48
+ ".pytest_cache",
49
+ ".mypy_cache",
50
+ ".ruff_cache",
51
+ ".tox",
52
+ ".nox",
53
+ "dist",
54
+ "build",
55
+ ".eggs",
56
+ ".loom", # our own output dir — never re-index ourselves
57
+ ".idea",
58
+ ".vscode",
59
+ }
60
+ )
61
+
62
+ # File extensions we recognize. Anything else is skipped — the index
63
+ # is for code understanding, not asset cataloguing. Markdown is kept
64
+ # because docs often capture architecture decisions that supplement
65
+ # the LLM-generated narrative.
66
+ _LANG_BY_EXT: dict[
67
+ str, Literal["python", "markdown", "toml", "yaml", "json"]
68
+ ] = {
69
+ ".py": "python",
70
+ ".pyi": "python",
71
+ ".md": "markdown",
72
+ ".markdown": "markdown",
73
+ ".toml": "toml",
74
+ ".yaml": "yaml",
75
+ ".yml": "yaml",
76
+ ".json": "json",
77
+ }
78
+
79
+
80
+ @dataclass(frozen=True)
81
+ class DiscoveredFile:
82
+ """One file the extractor will inspect, with everything it needs
83
+ to know up front. ``rel_path`` is repo-relative POSIX.
84
+
85
+ Hash is computed lazily on first read — the dataclass stores
86
+ the absolute path; callers compute + cache via :func:`hash_file`.
87
+ """
88
+
89
+ rel_path: str
90
+ abs_path: Path
91
+ lang: Literal["python", "markdown", "toml", "yaml", "json", "other"]
92
+ size_bytes: int
93
+ lines: int
94
+ sha256: str
95
+ mtime: datetime
96
+ git_changes_90d: int | None
97
+ is_test: bool
98
+
99
+
100
+ def is_git_repo(root: Path) -> bool:
101
+ """True when ``root`` (or any ancestor up to the filesystem
102
+ boundary) contains a ``.git`` directory. Using ``git rev-parse``
103
+ rather than just checking ``.git/`` makes us correct on
104
+ submodules + worktrees, where ``.git`` is a file."""
105
+ try:
106
+ result = subprocess.run(
107
+ ["git", "rev-parse", "--is-inside-work-tree"],
108
+ cwd=str(root),
109
+ capture_output=True,
110
+ text=True,
111
+ check=False,
112
+ )
113
+ return result.returncode == 0 and result.stdout.strip() == "true"
114
+ except FileNotFoundError:
115
+ # No git binary installed — definitely no git repo by our
116
+ # operational definition (we'd need git to enumerate it).
117
+ return False
118
+
119
+
120
+ def discover_files(root: Path) -> list[DiscoveredFile]:
121
+ """Enumerate every indexable file under ``root``.
122
+
123
+ Order is deterministic (sorted by ``rel_path``) so the resulting
124
+ :class:`schema.LoomIndex` is byte-stable across runs that see
125
+ the same tree — important for diff-aware refresh.
126
+
127
+ Returns an empty list when ``root`` doesn't exist; raises only on
128
+ permission errors. A non-readable repo is a real problem worth
129
+ surfacing.
130
+ """
131
+ if not root.exists():
132
+ return []
133
+
134
+ rel_paths = _list_paths(root)
135
+ git_heat = _git_heat(root) if is_git_repo(root) else {}
136
+
137
+ out: list[DiscoveredFile] = []
138
+ for rel in sorted(rel_paths):
139
+ abs_path = root / rel
140
+ if not abs_path.is_file():
141
+ continue
142
+ try:
143
+ data = abs_path.read_bytes()
144
+ except OSError:
145
+ continue
146
+ ext = abs_path.suffix.lower()
147
+ lang = _LANG_BY_EXT.get(ext, "other")
148
+ if lang == "other":
149
+ # We keep ``other`` files OUT of the index for now — the
150
+ # annotator can't do anything useful with binary blobs
151
+ # and including them just bloats files[]. Future: re-
152
+ # enable for shell / docker / etc. with a language
153
+ # filter in extractor.
154
+ continue
155
+ sha = hashlib.sha256(data).hexdigest()
156
+ text = data.decode("utf-8", errors="replace")
157
+ size = len(data)
158
+ n_lines = text.count("\n") + (
159
+ 1 if text and not text.endswith("\n") else 0
160
+ )
161
+ mtime = datetime.fromtimestamp(abs_path.stat().st_mtime).astimezone()
162
+ out.append(
163
+ DiscoveredFile(
164
+ rel_path=rel,
165
+ abs_path=abs_path,
166
+ lang=lang,
167
+ size_bytes=size,
168
+ lines=n_lines,
169
+ sha256=sha,
170
+ mtime=mtime,
171
+ git_changes_90d=git_heat.get(rel),
172
+ is_test=_is_test_path(rel),
173
+ )
174
+ )
175
+ return out
176
+
177
+
178
+ # ---------------------------------------------------------------------------
179
+ # Internals
180
+ # ---------------------------------------------------------------------------
181
+
182
+
183
+ def _list_paths(root: Path) -> list[str]:
184
+ """Enumerate POSIX-relative paths under ``root``.
185
+
186
+ Routes through ``git ls-files`` when applicable (free .gitignore
187
+ handling), else walks + filters noise dirs.
188
+ """
189
+ if is_git_repo(root):
190
+ return _git_list(root)
191
+ return _walk_list(root)
192
+
193
+
194
+ def _git_list(root: Path) -> list[str]:
195
+ """``git ls-files --cached --others --exclude-standard`` —
196
+ tracked + untracked but not ignored. Skips submodule contents
197
+ (recurse=False by default) which is what we want; submodule
198
+ code belongs to a different repo's index."""
199
+ proc = subprocess.run(
200
+ [
201
+ "git",
202
+ "ls-files",
203
+ "--cached",
204
+ "--others",
205
+ "--exclude-standard",
206
+ ],
207
+ cwd=str(root),
208
+ capture_output=True,
209
+ text=True,
210
+ check=False,
211
+ )
212
+ if proc.returncode != 0:
213
+ # Fall back to walking — better partial coverage than zero.
214
+ return _walk_list(root)
215
+ return [
216
+ line.strip()
217
+ for line in proc.stdout.splitlines()
218
+ if line.strip()
219
+ ]
220
+
221
+
222
+ def _walk_list(root: Path) -> list[str]:
223
+ """Manual walk: skip directories in :data:`_NOISE_DIRS`. POSIX
224
+ paths only — Windows users get the same shape via PurePosixPath
225
+ conversion in the caller (loom-code runs on macOS / Linux today
226
+ but the contract should not be tripped by OS quirks)."""
227
+ out: list[str] = []
228
+ for path in root.rglob("*"):
229
+ if not path.is_file():
230
+ continue
231
+ # Skip if any part is a noise dir.
232
+ if any(part in _NOISE_DIRS for part in path.relative_to(root).parts):
233
+ continue
234
+ out.append(path.relative_to(root).as_posix())
235
+ return out
236
+
237
+
238
+ def _git_heat(root: Path) -> dict[str, int]:
239
+ """Return ``{rel_path: n_commits_in_last_90d}``.
240
+
241
+ Uses ``git log --since=90.days --name-only --pretty=`` — outputs
242
+ one path per line per commit, with blank lines between commits.
243
+ Counting occurrences gives us the heat score directly.
244
+
245
+ Returns ``{}`` on any subprocess error — heat is a hint, not a
246
+ correctness guarantee."""
247
+ try:
248
+ proc = subprocess.run(
249
+ [
250
+ "git",
251
+ "log",
252
+ "--since=90.days",
253
+ "--name-only",
254
+ "--pretty=format:",
255
+ "--no-merges",
256
+ ],
257
+ cwd=str(root),
258
+ capture_output=True,
259
+ text=True,
260
+ check=False,
261
+ timeout=30,
262
+ )
263
+ except (subprocess.TimeoutExpired, FileNotFoundError):
264
+ return {}
265
+ if proc.returncode != 0:
266
+ return {}
267
+ counts: dict[str, int] = {}
268
+ for line in proc.stdout.splitlines():
269
+ line = line.strip()
270
+ if not line:
271
+ continue
272
+ counts[line] = counts.get(line, 0) + 1
273
+ return counts
274
+
275
+
276
+ def _is_test_path(rel: str) -> bool:
277
+ """Best-effort detection: anything under ``tests/`` or named
278
+ ``test_*.py`` / ``*_test.py``. Same heuristic pytest uses, which
279
+ matches the vast majority of Python projects."""
280
+ parts = rel.split("/")
281
+ if "tests" in parts or "test" in parts:
282
+ return True
283
+ name = parts[-1]
284
+ return name.startswith("test_") or name.endswith("_test.py")
@@ -0,0 +1,141 @@
1
+ """PageRank over the file-level import graph.
2
+
3
+ Hand-rolled power iteration — networkx would be a more familiar
4
+ implementation but it's a 5 MB transitive-dependency chain to do
5
+ roughly twenty lines of arithmetic. The loomflow design rule "no
6
+ SDK at module top" applies here too: cheap things should be self-
7
+ contained.
8
+
9
+ Math: standard PageRank with a damping factor (0.85). For a graph
10
+ where node ``i`` has out-edges to neighbours ``N(i)``::
11
+
12
+ pr(j) = (1 - d) / N + d * sum( pr(i) / |N(i)| for i in inbound(j) )
13
+
14
+ We iterate until L1 change drops below tolerance or 100 iterations
15
+ elapse. Dangling nodes (no out-edges) distribute their score
16
+ uniformly across all nodes, the textbook fix.
17
+
18
+ The result is per-FILE — file-level centrality, which Aider also
19
+ uses. Per-symbol PageRank requires a call graph, which we don't
20
+ extract in v1 (see :mod:`_ast_walk` design note). Each symbol
21
+ inherits its file's PageRank score in :mod:`extractor`'s aggregation
22
+ step.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ from collections import defaultdict
28
+
29
+ _DAMPING = 0.85
30
+ _TOLERANCE = 1e-6
31
+ _MAX_ITERATIONS = 100
32
+
33
+
34
+ def pagerank_file_graph(
35
+ *, files: list[str], edges: list[tuple[str, str]]
36
+ ) -> dict[str, float]:
37
+ """Compute PageRank for a directed graph of files.
38
+
39
+ ``files`` is the full node set (every indexed file). ``edges`` is
40
+ ``[(from_path, to_path), ...]`` — only RESOLVED imports, so
41
+ third-party / stdlib edges don't dominate.
42
+
43
+ Returns ``{rel_path: score}`` for every file in ``files``;
44
+ files not appearing in any edge get the uniform 1/N base score.
45
+ Returns ``{}`` if ``files`` is empty (degenerate case).
46
+ """
47
+ n = len(files)
48
+ if n == 0:
49
+ return {}
50
+
51
+ file_set = set(files)
52
+ # Inbound and outbound adjacency. Drop edges whose endpoints
53
+ # aren't in our file set — defensive: caller should already
54
+ # have filtered to resolved edges, but never trust that.
55
+ out_adj: dict[str, list[str]] = defaultdict(list)
56
+ in_adj: dict[str, list[str]] = defaultdict(list)
57
+ for src, dst in edges:
58
+ if src in file_set and dst in file_set and src != dst:
59
+ out_adj[src].append(dst)
60
+ in_adj[dst].append(src)
61
+
62
+ # Initialise uniform.
63
+ pr = {f: 1.0 / n for f in files}
64
+ base = (1.0 - _DAMPING) / n
65
+
66
+ for _ in range(_MAX_ITERATIONS):
67
+ # Dangling mass: sum of scores at nodes with no out-edges,
68
+ # redistributed uniformly to every node so the system stays
69
+ # stochastic.
70
+ dangling = sum(pr[f] for f in files if not out_adj[f])
71
+ dangling_share = _DAMPING * dangling / n
72
+
73
+ new_pr: dict[str, float] = {}
74
+ for f in files:
75
+ inbound_mass = sum(
76
+ pr[src] / len(out_adj[src])
77
+ for src in in_adj[f]
78
+ )
79
+ new_pr[f] = base + dangling_share + _DAMPING * inbound_mass
80
+
81
+ # L1 convergence check — converges fast on typical repos.
82
+ delta = sum(abs(new_pr[f] - pr[f]) for f in files)
83
+ pr = new_pr
84
+ if delta < _TOLERANCE:
85
+ break
86
+
87
+ return pr
88
+
89
+
90
+ def cluster_by_path_prefix(
91
+ files: list[str], *, max_files_per_cluster: int = 50
92
+ ) -> dict[str, list[str]]:
93
+ """Group files by their top-level directory, then split oversized
94
+ clusters by the NEXT directory level.
95
+
96
+ Recursion is shallow (depth 3) — beyond that, clusters get too
97
+ fine-grained to be useful. The result is a ``{cluster_id: [paths]}``
98
+ map; ``cluster_id`` is the directory prefix or the bare filename
99
+ for files at the repo root.
100
+
101
+ Example for the loomflow tree::
102
+
103
+ loomflow/agent/*.py → cluster "loomflow/agent"
104
+ loomflow/architecture/*.py → cluster "loomflow/architecture"
105
+ loomflow/memory/ (>50 files) → split into
106
+ "loomflow/memory/postgres",
107
+ "loomflow/memory/chroma", ...
108
+
109
+ This is a deliberately simple heuristic — most well-organized
110
+ codebases already group by directory by convention. Import-graph
111
+ community detection would do better on tangled codebases but
112
+ adds complexity for marginal gain on the typical case.
113
+ """
114
+ return _cluster(files, depth=1, max_files=max_files_per_cluster)
115
+
116
+
117
+ def _cluster(
118
+ files: list[str], depth: int, max_files: int
119
+ ) -> dict[str, list[str]]:
120
+ """Cluster by the first ``depth`` directory components. If any
121
+ resulting cluster exceeds ``max_files``, recurse on it with
122
+ ``depth+1``."""
123
+ groups: dict[str, list[str]] = defaultdict(list)
124
+ for path in files:
125
+ parts = path.split("/")
126
+ if len(parts) <= depth:
127
+ # Top-level file (e.g. ``cli.py``) — it's its own cluster.
128
+ groups[path].append(path)
129
+ else:
130
+ key = "/".join(parts[:depth])
131
+ groups[key].append(path)
132
+
133
+ out: dict[str, list[str]] = {}
134
+ for key, paths in groups.items():
135
+ if len(paths) <= max_files or depth >= 3:
136
+ out[key] = sorted(paths)
137
+ continue
138
+ # Recurse one level deeper for the oversized cluster.
139
+ sub = _cluster(paths, depth=depth + 1, max_files=max_files)
140
+ out.update(sub)
141
+ return out