codebase-index 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. codebase_index/__init__.py +7 -0
  2. codebase_index/__main__.py +3 -0
  3. codebase_index/cli.py +916 -0
  4. codebase_index/config.py +110 -0
  5. codebase_index/discovery/__init__.py +10 -0
  6. codebase_index/discovery/classify.py +151 -0
  7. codebase_index/discovery/ignore.py +58 -0
  8. codebase_index/discovery/walker.py +75 -0
  9. codebase_index/doctor.py +138 -0
  10. codebase_index/embeddings/__init__.py +2 -0
  11. codebase_index/embeddings/backend.py +67 -0
  12. codebase_index/embeddings/external.py +56 -0
  13. codebase_index/embeddings/local.py +41 -0
  14. codebase_index/embeddings/noop.py +15 -0
  15. codebase_index/graph/__init__.py +8 -0
  16. codebase_index/graph/analysis.py +468 -0
  17. codebase_index/graph/builder.py +160 -0
  18. codebase_index/graph/expand.py +136 -0
  19. codebase_index/graph/export.py +381 -0
  20. codebase_index/graph/navigate.py +201 -0
  21. codebase_index/indexer/__init__.py +8 -0
  22. codebase_index/indexer/doc_chunks.py +202 -0
  23. codebase_index/indexer/freshness.py +109 -0
  24. codebase_index/indexer/pipeline.py +423 -0
  25. codebase_index/mcp/__init__.py +2 -0
  26. codebase_index/mcp/server.py +354 -0
  27. codebase_index/models.py +145 -0
  28. codebase_index/output/__init__.py +6 -0
  29. codebase_index/output/json.py +13 -0
  30. codebase_index/output/markdown.py +316 -0
  31. codebase_index/output/redact.py +31 -0
  32. codebase_index/parsers/__init__.py +9 -0
  33. codebase_index/parsers/base.py +47 -0
  34. codebase_index/parsers/languages.py +290 -0
  35. codebase_index/parsers/line_chunker.py +39 -0
  36. codebase_index/parsers/symbol_chunks.py +62 -0
  37. codebase_index/parsers/treesitter.py +439 -0
  38. codebase_index/retrieval/__init__.py +9 -0
  39. codebase_index/retrieval/budget.py +82 -0
  40. codebase_index/retrieval/fusion.py +62 -0
  41. codebase_index/retrieval/intent.py +56 -0
  42. codebase_index/retrieval/pipeline.py +207 -0
  43. codebase_index/retrieval/rerank.py +69 -0
  44. codebase_index/retrieval/searchers.py +291 -0
  45. codebase_index/retrieval/skeleton.py +251 -0
  46. codebase_index/retrieval/types.py +79 -0
  47. codebase_index/scaffold.py +399 -0
  48. codebase_index/service.py +158 -0
  49. codebase_index/skill_template/SKILL.md +198 -0
  50. codebase_index/skill_template/examples/hooks/settings.json +16 -0
  51. codebase_index/skill_template/scripts/cbx +25 -0
  52. codebase_index/skill_template/scripts/cbx.ps1 +25 -0
  53. codebase_index/skill_update.py +150 -0
  54. codebase_index/storage/__init__.py +8 -0
  55. codebase_index/storage/db.py +116 -0
  56. codebase_index/storage/repo.py +701 -0
  57. codebase_index/storage/schema.sql +125 -0
  58. codebase_index/watch/__init__.py +5 -0
  59. codebase_index/watch/watcher.py +93 -0
  60. codebase_index-1.6.0.dist-info/METADATA +748 -0
  61. codebase_index-1.6.0.dist-info/RECORD +64 -0
  62. codebase_index-1.6.0.dist-info/WHEEL +4 -0
  63. codebase_index-1.6.0.dist-info/entry_points.txt +4 -0
  64. codebase_index-1.6.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,201 @@
1
+ """Graph navigation: shortest path between two nodes, and a node "card".
2
+
3
+ graphify ships `path A B` (how are two things connected?) and `explain Symbol`
4
+ (what is this node?). codebase-index already uses `explain` for how-it-works
5
+ retrieval, so the node card lives under `describe` to avoid colliding with it.
6
+
7
+ Both walk the *resolved* edge graph and carry the Phase-1 confidence trail, so a
8
+ path through an `inferred`/`ambiguous` edge is visibly less certain than one
9
+ through `extracted` edges.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import sqlite3
15
+ from collections import deque
16
+ from typing import Optional
17
+
18
+ from ..storage import repo
19
+
20
+ # BFS safety valve: stop exploring after this many nodes so `path` stays cheap on
21
+ # very large graphs (the shortest path, if short, is found long before this).
22
+ _MAX_VISITS = 20000
23
+
24
+ Node = tuple[str, int]
25
+
26
+
27
+ def _freshness(conn: sqlite3.Connection) -> dict:
28
+ return {
29
+ "exists": True,
30
+ "stale": False,
31
+ "built_at": repo.get_meta(conn, "built_at"),
32
+ "head_commit": repo.get_meta(conn, "head_commit"),
33
+ }
34
+
35
+
36
+ def _resolve_targets(conn: sqlite3.Connection, token: str) -> list[Node]:
37
+ """Resolve a path/symbol token to one or more graph nodes (file or symbols)."""
38
+ frow = repo.file_by_path(conn, token)
39
+ if frow is not None:
40
+ return [("file", int(frow["id"]))]
41
+ sym_rows = repo.symbols_by_name(conn, token, exact=True)
42
+ if sym_rows:
43
+ return [("symbol", int(r["id"])) for r in sym_rows]
44
+ suffix = repo.files_with_suffix(conn, token)
45
+ if len(suffix) == 1:
46
+ return [("file", int(suffix[0]["id"]))]
47
+ return []
48
+
49
+
50
+ def _node_ref(conn: sqlite3.Connection, kind: str, node_id: int) -> Optional[dict]:
51
+ if kind == "file":
52
+ row = conn.execute("SELECT path FROM files WHERE id = ?", (node_id,)).fetchone()
53
+ if row is None:
54
+ return None
55
+ return {"kind": "file", "name": row["path"].rsplit("/", 1)[-1], "path": row["path"],
56
+ "line_start": None}
57
+ row = conn.execute(
58
+ "SELECT s.name AS name, s.kind AS kind, s.line_start AS line_start, f.path AS path "
59
+ "FROM symbols s JOIN files f ON f.id = s.file_id WHERE s.id = ?",
60
+ (node_id,),
61
+ ).fetchone()
62
+ if row is None:
63
+ return None
64
+ return {"kind": "symbol", "name": row["name"], "symbol_kind": row["kind"],
65
+ "path": row["path"], "line_start": row["line_start"]}
66
+
67
+
68
+ def _undirected_neighbors(conn: sqlite3.Connection, kind: str, node_id: int):
69
+ """Yield (next_kind, next_id, edge_type, confidence, direction) ignoring edge
70
+ direction — `path` answers "how are these connected", not "who calls whom"."""
71
+ for e in repo.incoming_edges(conn, kind, node_id):
72
+ yield e["src_kind"], int(e["src_id"]), e["edge_type"], e["confidence"], "in"
73
+ for e in repo.outgoing_edges(conn, kind, node_id):
74
+ if e["dst_id"] is not None:
75
+ yield e["dst_kind"], int(e["dst_id"]), e["edge_type"], e["confidence"], "out"
76
+
77
+
78
+ # ---------------------------------------------------------------------------
79
+ # path A B
80
+ # ---------------------------------------------------------------------------
81
+
82
+ def path_payload(conn: sqlite3.Connection, src: str, dst: str) -> dict:
83
+ """Shortest undirected path between two nodes, with the edge audit trail."""
84
+ src_seeds = _resolve_targets(conn, src)
85
+ dst_seeds = set(_resolve_targets(conn, dst))
86
+ base = {"src": src, "dst": dst, "index": _freshness(conn), "nodes": [], "steps": []}
87
+ if not src_seeds or not dst_seeds:
88
+ missing = src if not src_seeds else dst
89
+ return {**base, "found": False, "reason": f"Could not resolve `{missing}` to an indexed node."}
90
+
91
+ # Multi-source BFS from every src node; stop at the first dst node reached.
92
+ parent: dict[Node, Optional[Node]] = {seed: None for seed in src_seeds}
93
+ via: dict[Node, tuple] = {}
94
+ queue: deque[Node] = deque(src_seeds)
95
+ found: Optional[Node] = None
96
+ visits = 0
97
+ while queue and visits < _MAX_VISITS:
98
+ node = queue.popleft()
99
+ visits += 1
100
+ if node in dst_seeds:
101
+ found = node
102
+ break
103
+ for nk, nid, etype, conf, direction in _undirected_neighbors(conn, *node):
104
+ nxt = (nk, nid)
105
+ if nxt not in parent:
106
+ parent[nxt] = node
107
+ via[nxt] = (etype, conf, direction)
108
+ queue.append(nxt)
109
+
110
+ if found is None:
111
+ return {**base, "found": False,
112
+ "reason": "No path found between the two nodes in the resolved graph."}
113
+
114
+ # Reconstruct from `found` back to a src seed.
115
+ chain: list[Node] = []
116
+ cur: Optional[Node] = found
117
+ while cur is not None:
118
+ chain.append(cur)
119
+ cur = parent[cur]
120
+ chain.reverse()
121
+
122
+ nodes = [ref for n in chain if (ref := _node_ref(conn, *n)) is not None]
123
+ steps = []
124
+ for prev, nxt in zip(chain, chain[1:]):
125
+ etype, conf, direction = via[nxt]
126
+ a, b = _node_ref(conn, *prev), _node_ref(conn, *nxt)
127
+ if a and b:
128
+ steps.append({"from": a, "to": b, "edge_type": etype,
129
+ "confidence": conf, "direction": direction})
130
+ return {**base, "found": True, "hops": len(steps), "nodes": nodes, "steps": steps}
131
+
132
+
133
+ # ---------------------------------------------------------------------------
134
+ # describe <symbol>
135
+ # ---------------------------------------------------------------------------
136
+
137
+ def describe_payload(conn: sqlite3.Connection, query: str) -> dict:
138
+ """A node card: definition(s), callers, callees, centrality, module, god status."""
139
+ base = {"query": query, "index": _freshness(conn)}
140
+ sym_rows = repo.symbols_by_name(conn, query, exact=True)
141
+ if not sym_rows:
142
+ return {**base, "found": False,
143
+ "reason": f"No symbol named `{query}` is indexed. Try `search` or `symbol`."}
144
+
145
+ definitions = [
146
+ {
147
+ "name": r["name"],
148
+ "qualified": r["qualified"],
149
+ "kind": r["kind"],
150
+ "path": r["path"],
151
+ "line_start": r["line_start"],
152
+ "line_end": r["line_end"],
153
+ "signature": r["signature"],
154
+ "in_degree": int(r["in_degree"]),
155
+ "out_degree": int(r["out_degree"]),
156
+ }
157
+ for r in sym_rows
158
+ ]
159
+ # Primary = most-connected definition (the one worth describing in depth).
160
+ primary_row = max(sym_rows, key=lambda r: int(r["in_degree"]) + int(r["out_degree"]))
161
+ primary_id = int(primary_row["id"])
162
+
163
+ callers = [
164
+ {"path": r["path"], "line": r["line"], "confidence": r["confidence"]}
165
+ for r in repo.refs_for_name(conn, query)
166
+ ]
167
+ callees = []
168
+ for e in repo.outgoing_edges(conn, "symbol", primary_id):
169
+ if e["dst_id"] is None:
170
+ continue
171
+ ref = _node_ref(conn, e["dst_kind"], int(e["dst_id"]))
172
+ if ref is not None:
173
+ callees.append({**ref, "edge_type": e["edge_type"], "confidence": e["confidence"]})
174
+
175
+ module = primary_row["path"].rsplit("/", 1)[0] if "/" in primary_row["path"] else "(root)"
176
+ god = _god_rank(conn, primary_row["name"], primary_row["path"])
177
+
178
+ return {
179
+ **base,
180
+ "found": True,
181
+ "definitions": definitions,
182
+ "primary": {"name": primary_row["name"], "path": primary_row["path"],
183
+ "module": module, "god_rank": god,
184
+ "in_degree": int(primary_row["in_degree"]),
185
+ "out_degree": int(primary_row["out_degree"])},
186
+ "callers": callers,
187
+ "callees": callees,
188
+ }
189
+
190
+
191
+ def _god_rank(conn: sqlite3.Connection, name: str, path: str) -> Optional[int]:
192
+ """1-based rank of this symbol among the cached god nodes, or None."""
193
+ from . import analysis
194
+
195
+ summary = analysis.load_analysis(conn)
196
+ if not summary:
197
+ return None
198
+ for idx, g in enumerate(summary.get("god_nodes", []), start=1):
199
+ if g.get("name") == name and g.get("path") == path:
200
+ return idx
201
+ return None
@@ -0,0 +1,8 @@
1
+ """Indexing orchestration.
2
+
3
+ pipeline.py : full + incremental build = discovery -> parse -> store chunks/symbols ->
4
+ graph build -> summaries -> FTS sync -> (optional) embeddings.
5
+ incremental.py : decide which files to (re)process from sha256 + mtime_ns + git status; handle
6
+ deletions (cascade) and config_hash changes (rebuild affected rows).
7
+ summarize.py : file/module/package summaries (heuristic/extractive by default; pluggable later).
8
+ """
@@ -0,0 +1,202 @@
1
+ """Extract document-style chunks from non-code content for FTS5 indexing.
2
+
3
+ Produces chunks of kind="doc" from:
4
+ - Markdown headings (# Heading)
5
+ - README sections (first 200 chars under each heading)
6
+ - Test function names (test_* in Python)
7
+ - Function/class docstrings
8
+ - Exception messages (raise X("message"))
9
+ - Config keys (.codeindex.json, pyproject.toml)
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import re
16
+ from typing import Optional
17
+
18
+ from ..parsers.base import Chunk
19
+
20
+ _MD_HEADING_RE = re.compile(r'^(#{1,6})\s+(.+)$', re.MULTILINE)
21
+ _TEST_FUNC_RE = re.compile(r'def\s+(test_\w+)\s*\(', re.MULTILINE)
22
+ _DOCSTRING_RE = re.compile(r'(?:def|class)\s+\w+[\s\S]*?("""[\s\S]*?""")')
23
+ _EXCEPTION_RE = re.compile(r'raise\s+\w+\s*\(\s*["\'](.+?)["\']', re.MULTILINE)
24
+
25
+
26
+ def extract_doc_chunks(text: str, rel_path: str, lang: Optional[str]) -> list[Chunk]:
27
+ """Extract all doc-style chunks from a file."""
28
+ chunks: list[Chunk] = []
29
+
30
+ if lang == "markdown":
31
+ chunks.extend(_extract_md_headings(text))
32
+ chunks.extend(_extract_readme_sections(text))
33
+ elif lang == "python":
34
+ chunks.extend(_extract_test_names(text))
35
+ chunks.extend(_extract_docstrings(text))
36
+ chunks.extend(_extract_exception_messages(text))
37
+ elif lang in ("json", "toml"):
38
+ chunks.extend(_extract_config_keys(text, lang))
39
+ elif rel_path.endswith(".py"):
40
+ chunks.extend(_extract_test_names(text))
41
+ chunks.extend(_extract_docstrings(text))
42
+ chunks.extend(_extract_exception_messages(text))
43
+
44
+ return chunks
45
+
46
+
47
+ def _extract_md_headings(text: str) -> list[Chunk]:
48
+ """Extract markdown headings as searchable chunks."""
49
+ chunks = []
50
+ for match in _MD_HEADING_RE.finditer(text):
51
+ line_num = text[:match.start()].count('\n') + 1
52
+ heading = match.group(0).strip()
53
+ token_est = max(1, len(heading) // 4)
54
+ chunks.append(Chunk(
55
+ line_start=line_num,
56
+ line_end=line_num,
57
+ content=heading,
58
+ token_est=token_est,
59
+ kind="doc",
60
+ ))
61
+ return chunks
62
+
63
+
64
+ def _extract_readme_sections(text: str) -> list[Chunk]:
65
+ """Extract first 200 chars under each markdown heading."""
66
+ chunks = []
67
+ headings = list(_MD_HEADING_RE.finditer(text))
68
+
69
+ for i, match in enumerate(headings):
70
+ heading_text = match.group(0).strip()
71
+ start = match.end()
72
+ end = headings[i + 1].start() if i + 1 < len(headings) else len(text)
73
+ section_body = text[start:end].strip()[:200]
74
+
75
+ if section_body:
76
+ line_start = text[:match.start()].count('\n') + 1
77
+ line_end = text[:start + len(section_body)].count('\n') + 1
78
+ content = f"{heading_text}: {section_body}"
79
+ token_est = max(1, len(content) // 4)
80
+ chunks.append(Chunk(
81
+ line_start=line_start,
82
+ line_end=line_end,
83
+ content=content,
84
+ token_est=token_est,
85
+ kind="doc",
86
+ ))
87
+
88
+ return chunks
89
+
90
+
91
+ def _extract_test_names(text: str) -> list[Chunk]:
92
+ """Extract test function names as searchable chunks."""
93
+ chunks = []
94
+ for match in _TEST_FUNC_RE.finditer(text):
95
+ line_num = text[:match.start()].count('\n') + 1
96
+ func_name = match.group(1)
97
+ token_est = max(1, len(func_name) // 4)
98
+ chunks.append(Chunk(
99
+ line_start=line_num,
100
+ line_end=line_num,
101
+ content=f"test function: {func_name}",
102
+ token_est=token_est,
103
+ kind="doc",
104
+ ))
105
+ return chunks
106
+
107
+
108
+ def _extract_docstrings(text: str) -> list[Chunk]:
109
+ """Extract function/class docstrings as searchable chunks."""
110
+ chunks = []
111
+ for match in _DOCSTRING_RE.finditer(text):
112
+ line_start = text[:match.start()].count('\n') + 1
113
+ docstring = match.group(1).strip('"""').strip()
114
+ if docstring and len(docstring) > 10:
115
+ line_end = text[:match.end()].count('\n') + 1
116
+ token_est = max(1, len(docstring) // 4)
117
+ chunks.append(Chunk(
118
+ line_start=line_start,
119
+ line_end=line_end,
120
+ content=docstring[:500],
121
+ token_est=token_est,
122
+ kind="doc",
123
+ ))
124
+ return chunks
125
+
126
+
127
+ def _extract_exception_messages(text: str) -> list[Chunk]:
128
+ """Extract exception messages as searchable chunks."""
129
+ chunks = []
130
+ for match in _EXCEPTION_RE.finditer(text):
131
+ line_num = text[:match.start()].count('\n') + 1
132
+ msg = match.group(1)
133
+ token_est = max(1, len(msg) // 4)
134
+ chunks.append(Chunk(
135
+ line_start=line_num,
136
+ line_end=line_num,
137
+ content=f"exception: {msg}",
138
+ token_est=token_est,
139
+ kind="doc",
140
+ ))
141
+ return chunks
142
+
143
+
144
+ def _extract_config_keys(text: str, lang: str) -> list[Chunk]:
145
+ """Extract config keys from JSON/TOML files."""
146
+ chunks = []
147
+ if lang == "json":
148
+ try:
149
+ data = json.loads(text)
150
+ keys = _flatten_json_keys(data)
151
+ for key_path, value in keys:
152
+ line_est = 1
153
+ content = f"config key: {key_path} = {_truncate_value(value)}"
154
+ token_est = max(1, len(content) // 4)
155
+ chunks.append(Chunk(
156
+ line_start=line_est,
157
+ line_end=line_est,
158
+ content=content,
159
+ token_est=token_est,
160
+ kind="doc",
161
+ ))
162
+ except json.JSONDecodeError:
163
+ pass
164
+ elif lang == "toml":
165
+ for match in re.finditer(r'^([\w.]+)\s*=', text, re.MULTILINE):
166
+ line_num = text[:match.start()].count('\n') + 1
167
+ key = match.group(1)
168
+ content = f"config key: {key}"
169
+ token_est = max(1, len(content) // 4)
170
+ chunks.append(Chunk(
171
+ line_start=line_num,
172
+ line_end=line_num,
173
+ content=content,
174
+ token_est=token_est,
175
+ kind="doc",
176
+ ))
177
+ return chunks
178
+
179
+
180
+ def _flatten_json_keys(data, prefix: str = "") -> list[tuple[str, str]]:
181
+ """Flatten nested JSON into dot-notation key paths."""
182
+ result = []
183
+ if isinstance(data, dict):
184
+ for k, v in data.items():
185
+ path = f"{prefix}.{k}" if prefix else k
186
+ if isinstance(v, (dict, list)):
187
+ result.extend(_flatten_json_keys(v, path))
188
+ else:
189
+ result.append((path, v))
190
+ elif isinstance(data, list):
191
+ for i, v in enumerate(data):
192
+ path = f"{prefix}[{i}]"
193
+ if isinstance(v, (dict, list)):
194
+ result.extend(_flatten_json_keys(v, path))
195
+ else:
196
+ result.append((path, v))
197
+ return result
198
+
199
+
200
+ def _truncate_value(value, max_len: int = 100) -> str:
201
+ s = str(value)
202
+ return s if len(s) <= max_len else s[:max_len] + "..."
@@ -0,0 +1,109 @@
1
+ """Compute index freshness for the `index` block of every response.
2
+
3
+ Contract (consumed by SKILL.md step 2):
4
+ exists -> a build has happened (meta.built_at present).
5
+ stale -> the working tree differs from what was indexed.
6
+ files_changed_since_build -> how many indexable files differ.
7
+
8
+ Strategy:
9
+ * Git fast-path: if the repo is a clean git tree AT the indexed commit, nothing
10
+ changed -> not stale (cheap; no walk).
11
+ * Accurate fallback (dirty tree, different commit, or no git): walk the current
12
+ indexable set and diff (path, mtime_ns) against the `files` table. This reuses
13
+ the discovery gates, so ignored/secret/binary files never count as changes.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import hashlib
19
+ import subprocess
20
+ from pathlib import Path
21
+
22
+ from ..config import Config
23
+ from ..discovery.walker import walk
24
+ from ..models import IndexFreshness
25
+ from ..storage import repo
26
+
27
+
28
+ def compute_freshness(conn, root: Path, config: Config) -> IndexFreshness:
29
+ built_at = repo.get_meta(conn, "built_at")
30
+ if built_at is None:
31
+ return IndexFreshness(exists=False, stale=False)
32
+
33
+ head = repo.get_meta(conn, "head_commit")
34
+ root = Path(root)
35
+
36
+ if _git_clean_at(root, head):
37
+ changed = 0
38
+ else:
39
+ changed = _changed_count(conn, root, config)
40
+
41
+ return IndexFreshness(
42
+ exists=True,
43
+ stale=changed > 0,
44
+ files_changed_since_build=changed,
45
+ built_at=built_at,
46
+ head_commit=head,
47
+ )
48
+
49
+
50
+ def _changed_count(conn, root: Path, config: Config) -> int:
51
+ """Added + removed + content-modified indexable files vs. the index.
52
+
53
+ Mirrors the incremental update's decision (indexer/pipeline.py): a file is
54
+ unchanged when (mtime, size) match, and even when they differ it is only
55
+ counted as changed if its sha256 differs. A bare `touch` that rewrites mtime
56
+ without changing bytes is a no-op for update_index, so it must not register as
57
+ stale here either.
58
+ """
59
+ indexed = repo.fingerprints(conn) # path -> (mtime_ns, size_bytes, sha256)
60
+ seen: set[str] = set()
61
+ changed = 0
62
+ for cand in walk(root, config):
63
+ try:
64
+ st = cand.path.stat()
65
+ except OSError:
66
+ continue
67
+ seen.add(cand.rel_path)
68
+ prior = indexed.get(cand.rel_path)
69
+ if prior is None:
70
+ changed += 1
71
+ continue
72
+ if prior[0] == st.st_mtime_ns and prior[1] == cand.size_bytes:
73
+ continue
74
+ try:
75
+ if prior[2] == _sha256_file(cand.path):
76
+ continue
77
+ except OSError:
78
+ pass
79
+ changed += 1
80
+ changed += sum(1 for path in indexed if path not in seen)
81
+ return changed
82
+
83
+
84
+ def _sha256_file(path: Path) -> str:
85
+ h = hashlib.sha256()
86
+ with path.open("rb") as fh:
87
+ for block in iter(lambda: fh.read(65536), b""):
88
+ h.update(block)
89
+ return h.hexdigest()
90
+
91
+
92
+ def _git_clean_at(root: Path, indexed_head: "str | None") -> bool:
93
+ """True iff git is available, HEAD == indexed_head, and the tree has no changes."""
94
+ if indexed_head is None or not (root / ".git").exists():
95
+ return False
96
+ try:
97
+ head = subprocess.run(
98
+ ["git", "-C", str(root), "rev-parse", "HEAD"],
99
+ capture_output=True, text=True, timeout=5, check=False,
100
+ )
101
+ if head.returncode != 0 or head.stdout.strip() != indexed_head:
102
+ return False
103
+ status = subprocess.run(
104
+ ["git", "-C", str(root), "status", "--porcelain"],
105
+ capture_output=True, text=True, timeout=5, check=False,
106
+ )
107
+ except (OSError, subprocess.SubprocessError):
108
+ return False
109
+ return status.returncode == 0 and status.stdout.strip() == ""