codebase-index 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. codebase_index/__init__.py +7 -0
  2. codebase_index/__main__.py +3 -0
  3. codebase_index/cli.py +916 -0
  4. codebase_index/config.py +110 -0
  5. codebase_index/discovery/__init__.py +10 -0
  6. codebase_index/discovery/classify.py +151 -0
  7. codebase_index/discovery/ignore.py +58 -0
  8. codebase_index/discovery/walker.py +75 -0
  9. codebase_index/doctor.py +138 -0
  10. codebase_index/embeddings/__init__.py +2 -0
  11. codebase_index/embeddings/backend.py +67 -0
  12. codebase_index/embeddings/external.py +56 -0
  13. codebase_index/embeddings/local.py +41 -0
  14. codebase_index/embeddings/noop.py +15 -0
  15. codebase_index/graph/__init__.py +8 -0
  16. codebase_index/graph/analysis.py +468 -0
  17. codebase_index/graph/builder.py +160 -0
  18. codebase_index/graph/expand.py +136 -0
  19. codebase_index/graph/export.py +381 -0
  20. codebase_index/graph/navigate.py +201 -0
  21. codebase_index/indexer/__init__.py +8 -0
  22. codebase_index/indexer/doc_chunks.py +202 -0
  23. codebase_index/indexer/freshness.py +109 -0
  24. codebase_index/indexer/pipeline.py +423 -0
  25. codebase_index/mcp/__init__.py +2 -0
  26. codebase_index/mcp/server.py +354 -0
  27. codebase_index/models.py +145 -0
  28. codebase_index/output/__init__.py +6 -0
  29. codebase_index/output/json.py +13 -0
  30. codebase_index/output/markdown.py +316 -0
  31. codebase_index/output/redact.py +31 -0
  32. codebase_index/parsers/__init__.py +9 -0
  33. codebase_index/parsers/base.py +47 -0
  34. codebase_index/parsers/languages.py +290 -0
  35. codebase_index/parsers/line_chunker.py +39 -0
  36. codebase_index/parsers/symbol_chunks.py +62 -0
  37. codebase_index/parsers/treesitter.py +439 -0
  38. codebase_index/retrieval/__init__.py +9 -0
  39. codebase_index/retrieval/budget.py +82 -0
  40. codebase_index/retrieval/fusion.py +62 -0
  41. codebase_index/retrieval/intent.py +56 -0
  42. codebase_index/retrieval/pipeline.py +207 -0
  43. codebase_index/retrieval/rerank.py +69 -0
  44. codebase_index/retrieval/searchers.py +291 -0
  45. codebase_index/retrieval/skeleton.py +251 -0
  46. codebase_index/retrieval/types.py +79 -0
  47. codebase_index/scaffold.py +399 -0
  48. codebase_index/service.py +158 -0
  49. codebase_index/skill_template/SKILL.md +198 -0
  50. codebase_index/skill_template/examples/hooks/settings.json +16 -0
  51. codebase_index/skill_template/scripts/cbx +25 -0
  52. codebase_index/skill_template/scripts/cbx.ps1 +25 -0
  53. codebase_index/skill_update.py +150 -0
  54. codebase_index/storage/__init__.py +8 -0
  55. codebase_index/storage/db.py +116 -0
  56. codebase_index/storage/repo.py +701 -0
  57. codebase_index/storage/schema.sql +125 -0
  58. codebase_index/watch/__init__.py +5 -0
  59. codebase_index/watch/watcher.py +93 -0
  60. codebase_index-1.6.0.dist-info/METADATA +748 -0
  61. codebase_index-1.6.0.dist-info/RECORD +64 -0
  62. codebase_index-1.6.0.dist-info/WHEEL +4 -0
  63. codebase_index-1.6.0.dist-info/entry_points.txt +4 -0
  64. codebase_index-1.6.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,41 @@
1
+ # src/codebase_index/embeddings/local.py
2
+ """On-device embedding via sentence-transformers. No network at query time.
3
+
4
+ The model is an OPTIONAL dependency (`pip install codebase-index[embeddings-local]`);
5
+ it is imported lazily so the base install never pulls it in. The model loads once
6
+ on first embed and is cached on the instance.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from .backend import EmbeddingError
12
+
13
+
14
+ class LocalBackend:
15
+ enabled = True
16
+ dim: int = 0
17
+
18
+ def __init__(self, model_name: str = "all-MiniLM-L6-v2") -> None:
19
+ self.name = f"local:{model_name}"
20
+ self.model_name = model_name
21
+ self._model = None
22
+
23
+ def _load(self):
24
+ if self._model is None:
25
+ try:
26
+ from sentence_transformers import SentenceTransformer # type: ignore[import-not-found]
27
+ except ImportError as exc:
28
+ raise EmbeddingError(
29
+ "Local embeddings need the optional extra: "
30
+ "pip install codebase-index[embeddings-local]"
31
+ ) from exc
32
+ self._model = SentenceTransformer(self.model_name)
33
+ self.dim = int(self._model.get_sentence_embedding_dimension())
34
+ return self._model
35
+
36
+ def embed(self, texts: list[str]) -> list[list[float]]:
37
+ model = self._load()
38
+ vecs = model.encode(
39
+ list(texts), convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=False
40
+ )
41
+ return [[float(x) for x in row] for row in vecs]
@@ -0,0 +1,15 @@
1
+ # src/codebase_index/embeddings/noop.py
2
+ """The disabled default backend. Present so callers never branch on None."""
3
+
4
+ from __future__ import annotations
5
+
6
+ from .backend import EmbeddingError
7
+
8
+
9
+ class NoopBackend:
10
+ enabled = False
11
+ name = "noop"
12
+ dim = 0
13
+
14
+ def embed(self, texts: list[str]) -> list[list[float]]:
15
+ raise EmbeddingError("Embeddings are disabled (embeddings.enabled = false).")
@@ -0,0 +1,8 @@
1
+ """Dependency / call / reference graph.
2
+
3
+ builder.py : extract import|call|reference|extends|implements|depends edges from AST; resolve
4
+ targets to symbol/file ids where possible (unresolved kept as dst_name); update
5
+ in_degree/out_degree on symbols.
6
+ expand.py : bounded graph walks for retrieval. impact() walks UP (callers/importers) for blast
7
+ radius; how_it_works walks DOWN (callees); find_refs reads direct reverse edges.
8
+ """
@@ -0,0 +1,468 @@
1
+ """Architecture analytics over the resolved edge graph — zero external deps.
2
+
3
+ This is the codebase-index take on graphify's community detection / god nodes /
4
+ surprising connections, implemented in pure, deterministic Python so the core
5
+ install stays dependency-free and the results are stable across runs (which
6
+ matters for the golden-snapshot tests and CI).
7
+
8
+ What it computes from the in-memory adjacency of resolved edges:
9
+
10
+ * communities - label propagation groups tightly-connected nodes into
11
+ "modules". Deterministic: nodes are visited in a fixed key
12
+ order and ties break to the smallest label, so the same graph
13
+ always yields the same partition.
14
+ * god nodes - the most-connected nodes (weighted degree). These are the
15
+ symbols/files most of the codebase leans on.
16
+ * surprising - edges that bridge two otherwise weakly-connected communities.
17
+ The cross-module links you would not think to look for.
18
+ * questions - template-generated starting questions seeded from the god
19
+ nodes and the bridges, mirroring graphify's GRAPH_REPORT.
20
+
21
+ The summary is cached in meta['graph_analysis'] by refresh_analysis() at build
22
+ time; the `architecture` command and HTML export read it back instantly.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import json
28
+ import sqlite3
29
+ from collections import Counter, defaultdict
30
+ from typing import Any, Optional
31
+
32
+ from ..storage import repo
33
+
34
+ # How many items to keep in the cached summary. Bounded so the meta JSON stays
35
+ # small even on very large repos.
36
+ MAX_GOD_NODES = 20
37
+ MAX_SURPRISING = 12
38
+ MAX_QUESTIONS = 8
39
+ TOP_NODES_PER_COMMUNITY = 5
40
+ MAX_COMMUNITIES_IN_SUMMARY = 40
41
+ # A community smaller than this is noise for reporting (isolated/leaf nodes).
42
+ MIN_REPORTED_COMMUNITY = 2
43
+ # A pair of communities joined by at most this many edges is a "bridge".
44
+ BRIDGE_MAX_EDGES = 2
45
+ # Cap on local-move passes; the partition almost always settles in 2-4.
46
+ _LOCAL_MOVE_PASSES = 20
47
+
48
+ ANALYSIS_META_KEY = "graph_analysis"
49
+
50
+ Node = tuple[str, int] # (kind, id)
51
+
52
+
53
+ # ---------------------------------------------------------------------------
54
+ # Graph construction
55
+ # ---------------------------------------------------------------------------
56
+
57
+ def build_adjacency(
58
+ edges: list[sqlite3.Row],
59
+ key_fn=None,
60
+ ) -> tuple[dict[Any, Counter], dict[tuple[Any, Any], int]]:
61
+ """Undirected weighted adjacency + per-edge multiplicity, from resolved edges.
62
+
63
+ Self-loops are dropped (they distort degree and never bridge communities).
64
+
65
+ ``key_fn(kind, id) -> hashable | None`` maps an edge endpoint to a node key
66
+ (returning None drops the edge). analyze() passes a *content* key
67
+ (kind:path:name:line) so the partition is identical across platforms — symbol
68
+ ids depend on file-walk order, which differs between OSes. The default keys by
69
+ (kind, id), used by the algorithm unit tests.
70
+ """
71
+ def kf(kind: str, nid: int):
72
+ return key_fn(kind, nid) if key_fn is not None else (kind, nid)
73
+
74
+ adj: dict[Any, Counter] = defaultdict(Counter)
75
+ edge_weight: dict[tuple[Any, Any], int] = defaultdict(int)
76
+ for e in edges:
77
+ src = kf(e["src_kind"], int(e["src_id"]))
78
+ dst = kf(e["dst_kind"], int(e["dst_id"]))
79
+ if src is None or dst is None or src == dst:
80
+ continue
81
+ adj[src][dst] += 1
82
+ adj[dst][src] += 1
83
+ edge_weight[_canonical_pair(src, dst)] += 1
84
+ return adj, edge_weight
85
+
86
+
87
+ def _canonical_pair(a: Any, b: Any) -> tuple[Any, Any]:
88
+ return (a, b) if a <= b else (b, a)
89
+
90
+
91
+ # The graph algorithms below are generic over the node-key type: analyze() calls
92
+ # them with (kind, id) tuples; the HTML/interop export reuses them with string
93
+ # keys. Typing the key as Any keeps both call sites valid.
94
+ def weighted_degree(adj: dict[Any, Counter]) -> dict[Any, int]:
95
+ return {node: sum(neighbors.values()) for node, neighbors in adj.items()}
96
+
97
+
98
+ # ---------------------------------------------------------------------------
99
+ # Community detection — deterministic label propagation
100
+ # ---------------------------------------------------------------------------
101
+
102
+ def detect_communities(adj: dict[Any, Counter]) -> dict[Any, int]:
103
+ """Partition nodes into communities by greedy modularity. Returns {node: id}.
104
+
105
+ This is the local-moving phase of the Louvain method, made deterministic:
106
+ every node starts alone, then in a fixed key order each node moves to the
107
+ neighbouring community that yields the largest modularity gain (ties break to
108
+ the smallest community id). Passes repeat until no node moves. Unlike label
109
+ propagation it does not collapse two cliques joined by a single bridge — the
110
+ bridge's gain cannot beat the dense intra-clique structure. Labels are
111
+ renumbered to dense, size-ranked ids so community 0 is always the largest.
112
+ """
113
+ nodes = sorted(adj.keys())
114
+ if not nodes:
115
+ return {}
116
+
117
+ deg = weighted_degree(adj)
118
+ two_m = sum(deg.values()) # = 2 * total edge weight
119
+ if two_m == 0:
120
+ return _renumber_by_size({node: idx for idx, node in enumerate(nodes)})
121
+
122
+ comm: dict[Any, int] = {node: idx for idx, node in enumerate(nodes)}
123
+ # Σ_tot per community: total weighted degree of its members.
124
+ sigma_tot: dict[int, int] = {idx: deg[node] for idx, node in enumerate(nodes)}
125
+
126
+ for _ in range(_LOCAL_MOVE_PASSES):
127
+ moved = False
128
+ for node in nodes:
129
+ ki = deg[node]
130
+ ci = comm[node]
131
+ # Detach node from its current community.
132
+ sigma_tot[ci] -= ki
133
+
134
+ # Weight from node into each neighbouring community.
135
+ links: Counter = Counter()
136
+ for neighbor, w in adj[node].items():
137
+ if neighbor != node:
138
+ links[comm[neighbor]] += w
139
+
140
+ # Pick the community maximising w_in - Σ_tot * k_i / (2m).
141
+ # Baseline = staying isolated (its own now-empty community), gain 0.
142
+ best_c = ci
143
+ best_gain = links.get(ci, 0) - sigma_tot[ci] * ki / two_m
144
+ for c, w_in in sorted(links.items()):
145
+ gain = w_in - sigma_tot[c] * ki / two_m
146
+ if gain > best_gain + 1e-12:
147
+ best_gain, best_c = gain, c
148
+
149
+ comm[node] = best_c
150
+ sigma_tot[best_c] += ki
151
+ if best_c != ci:
152
+ moved = True
153
+ if not moved:
154
+ break
155
+
156
+ return _renumber_by_size(comm)
157
+
158
+
159
+ def _renumber_by_size(label: dict[Any, int]) -> dict[Any, int]:
160
+ """Renumber raw labels to dense ids ordered by community size (desc), then by
161
+ smallest member key — so the mapping is stable run to run."""
162
+ members: dict[int, list[Any]] = defaultdict(list)
163
+ for node, lbl in label.items():
164
+ members[lbl].append(node)
165
+ order = sorted(members, key=lambda lbl: (-len(members[lbl]), min(members[lbl])))
166
+ remap = {old: new for new, old in enumerate(order)}
167
+ return {node: remap[lbl] for node, lbl in label.items()}
168
+
169
+
170
+ def modularity(adj: dict[Any, Counter], communities: dict[Any, int]) -> float:
171
+ """Newman modularity Q of the partition — a quality score in roughly [-0.5, 1].
172
+
173
+ Higher means the communities capture more edge density than chance. Reported
174
+ so the user can judge how meaningful the module split is.
175
+ """
176
+ m2 = sum(sum(neighbors.values()) for neighbors in adj.values()) # = 2 * |E|
177
+ if m2 == 0:
178
+ return 0.0
179
+ deg = weighted_degree(adj)
180
+ q = 0.0
181
+ for node, neighbors in adj.items():
182
+ ci = communities[node]
183
+ for neighbor, weight in neighbors.items():
184
+ if communities[neighbor] == ci:
185
+ q += weight - deg[node] * deg[neighbor] / m2
186
+ return round(q / m2, 4)
187
+
188
+
189
+ # ---------------------------------------------------------------------------
190
+ # Node labelling
191
+ # ---------------------------------------------------------------------------
192
+
193
+ def _node_index(conn: sqlite3.Connection) -> dict[Node, dict]:
194
+ """(kind, id) -> display metadata {kind, name, path, degree fields}."""
195
+ rows = repo.all_graph_nodes(conn)
196
+ index: dict[Node, dict] = {}
197
+ for f in rows["file"]:
198
+ index[("file", int(f["id"]))] = {
199
+ "kind": "file",
200
+ "name": f["path"].rsplit("/", 1)[-1],
201
+ "path": f["path"],
202
+ }
203
+ for s in rows["symbol"]:
204
+ index[("symbol", int(s["id"]))] = {
205
+ "kind": "symbol",
206
+ "name": s["name"],
207
+ "symbol_kind": s["kind"],
208
+ "path": s["path"],
209
+ "line_start": s["line_start"],
210
+ "in_degree": int(s["in_degree"]),
211
+ "out_degree": int(s["out_degree"]),
212
+ }
213
+ return index
214
+
215
+
216
+ def _stable_key(meta: dict) -> str:
217
+ """A platform-stable node key from content, not from the volatile symbol id.
218
+
219
+ Symbol ids are assigned in file-walk order, which differs across OSes; keying
220
+ the graph by path/name/line keeps communities and god-node ranking identical
221
+ everywhere (so the golden snapshots hold on Linux/macOS/Windows alike).
222
+ """
223
+ if meta["kind"] == "file":
224
+ return f"file::{meta['path']}"
225
+ return f"symbol::{meta['path']}::{meta['name']}::{meta.get('line_start', '')}"
226
+
227
+
228
+ def _dir_of(path: str) -> str:
229
+ return path.rsplit("/", 1)[0] if "/" in path else "(root)"
230
+
231
+
232
+ def _is_test_path(path: str) -> bool:
233
+ """Test files cluster with the code they exercise; don't let them name the module."""
234
+ lower = path.lower()
235
+ parts = lower.split("/")
236
+ if any(p in ("test", "tests", "__tests__", "spec", "specs") for p in parts):
237
+ return True
238
+ base = parts[-1]
239
+ return base.startswith("test_") or base.startswith("test.") or "_test." in base or ".test." in base
240
+
241
+
242
+ def label_community(members: list[Any], node_index: dict[Any, dict]) -> str:
243
+ """Name a community by the directory most of its (non-test) nodes live in.
244
+
245
+ A 2-5 word, plain-language module name is what graphify asks an LLM for; here
246
+ we derive it deterministically from the dominant source directory, which for
247
+ code is a strong proxy for "what this module is". Test paths are discounted so
248
+ a cluster of production symbols isn't mislabelled "tests" just because its test
249
+ files outnumber it; a community that is *only* tests still gets named for them.
250
+ """
251
+ prod: Counter = Counter()
252
+ allp: Counter = Counter()
253
+ for node in members:
254
+ meta = node_index.get(node)
255
+ if not (meta and meta.get("path")):
256
+ continue
257
+ d = _dir_of(meta["path"])
258
+ allp[d] += 1
259
+ if not _is_test_path(meta["path"]):
260
+ prod[d] += 1
261
+ dirs = prod or allp
262
+ if not dirs:
263
+ return "module"
264
+ # Most common dir; tie -> shortest then lexicographically smallest (stable).
265
+ top = min(dirs.items(), key=lambda kv: (-kv[1], len(kv[0]), kv[0]))
266
+ return top[0]
267
+
268
+
269
+ # ---------------------------------------------------------------------------
270
+ # God nodes / surprising connections / questions
271
+ # ---------------------------------------------------------------------------
272
+
273
+ def god_nodes(
274
+ adj: dict[Any, Counter],
275
+ communities: dict[Any, int],
276
+ node_index: dict[Any, dict],
277
+ *,
278
+ limit: int = MAX_GOD_NODES,
279
+ ) -> list[dict]:
280
+ """Most-connected nodes by weighted degree (the load-bearing ones)."""
281
+ deg = weighted_degree(adj)
282
+ ranked = sorted(deg, key=lambda n: (-deg[n], str(n)))
283
+ out: list[dict] = []
284
+ for node in ranked[:limit]:
285
+ meta = node_index.get(node)
286
+ if meta is None:
287
+ continue
288
+ out.append(
289
+ {
290
+ "kind": meta["kind"],
291
+ "name": meta["name"],
292
+ "path": meta.get("path"),
293
+ "degree": deg[node],
294
+ "community": communities.get(node, -1),
295
+ }
296
+ )
297
+ return out
298
+
299
+
300
+ def surprising_connections(
301
+ edge_weight: dict[tuple[Any, Any], int],
302
+ communities: dict[Any, int],
303
+ node_index: dict[Any, dict],
304
+ *,
305
+ limit: int = MAX_SURPRISING,
306
+ ) -> list[dict]:
307
+ """Edges that bridge two communities barely connected to each other.
308
+
309
+ For each unordered community pair we count how many edges cross between them;
310
+ a pair joined by only a handful of edges is a surprising structural link. We
311
+ surface the actual endpoint pair for each such bridge.
312
+ """
313
+ pair_edges: dict[tuple[int, int], list[tuple[Any, Any]]] = defaultdict(list)
314
+ for (a, b), _w in edge_weight.items():
315
+ ca, cb = communities.get(a, -1), communities.get(b, -1)
316
+ if ca == cb or ca < 0 or cb < 0:
317
+ continue
318
+ key = (ca, cb) if ca < cb else (cb, ca)
319
+ pair_edges[key].append((a, b))
320
+
321
+ bridges = [
322
+ (pair, endpoints)
323
+ for pair, endpoints in pair_edges.items()
324
+ if len(endpoints) <= BRIDGE_MAX_EDGES
325
+ ]
326
+ # Rarest bridges first (a single edge between modules is the most surprising),
327
+ # then by community-pair id for stability.
328
+ bridges.sort(key=lambda item: (len(item[1]), item[0]))
329
+
330
+ out: list[dict] = []
331
+ for (ca, cb), endpoints in bridges[:limit]:
332
+ a, b = sorted(endpoints)[0]
333
+ ma, mb = node_index.get(a), node_index.get(b)
334
+ if ma is None or mb is None:
335
+ continue
336
+ out.append(
337
+ {
338
+ "from": {"kind": ma["kind"], "name": ma["name"], "path": ma.get("path")},
339
+ "to": {"kind": mb["kind"], "name": mb["name"], "path": mb.get("path")},
340
+ "from_community": ca,
341
+ "to_community": cb,
342
+ "edge_count": len(endpoints),
343
+ }
344
+ )
345
+ return out
346
+
347
+
348
+ def suggest_questions(
349
+ gods: list[dict],
350
+ surprising: list[dict],
351
+ community_labels: dict[int, str],
352
+ *,
353
+ limit: int = MAX_QUESTIONS,
354
+ ) -> list[str]:
355
+ """Starter questions seeded from the structure, like graphify's report."""
356
+ questions: list[str] = []
357
+ for g in gods[:3]:
358
+ if g["kind"] == "symbol":
359
+ questions.append(f"How does `{g['name']}` work?")
360
+ questions.append(f"What breaks if `{g['name']}` changes?")
361
+ else:
362
+ questions.append(f"What is the role of `{g['name']}` in the architecture?")
363
+ for s in surprising[:3]:
364
+ la = community_labels.get(s["from_community"], f"community {s['from_community']}")
365
+ lb = community_labels.get(s["to_community"], f"community {s['to_community']}")
366
+ if la != lb:
367
+ questions.append(f"How is `{la}` connected to `{lb}`?")
368
+ # De-dup, preserve order.
369
+ seen: set[str] = set()
370
+ deduped: list[str] = []
371
+ for q in questions:
372
+ if q not in seen:
373
+ seen.add(q)
374
+ deduped.append(q)
375
+ return deduped[:limit]
376
+
377
+
378
+ # ---------------------------------------------------------------------------
379
+ # Top-level entry points
380
+ # ---------------------------------------------------------------------------
381
+
382
+ def analyze(conn: sqlite3.Connection) -> dict:
383
+ """Compute the full architecture-analytics summary (does not persist it)."""
384
+ edges = repo.all_resolved_edges(conn)
385
+ id_index = _node_index(conn) # (kind, id) -> meta
386
+
387
+ # Key the graph by stable content keys, not by volatile symbol ids, so the
388
+ # result is identical across platforms. node_index then maps that stable key
389
+ # back to display metadata.
390
+ node_index: dict[str, dict] = {}
391
+
392
+ def key_fn(kind: str, nid: int):
393
+ meta = id_index.get((kind, nid))
394
+ if meta is None:
395
+ return None
396
+ k = _stable_key(meta)
397
+ node_index.setdefault(k, meta)
398
+ return k
399
+
400
+ adj, edge_weight = build_adjacency(edges, key_fn)
401
+
402
+ communities = detect_communities(adj)
403
+ members: dict[int, list[str]] = defaultdict(list)
404
+ for node, cid in communities.items():
405
+ members[cid].append(node)
406
+
407
+ community_labels = {cid: label_community(nodes, node_index) for cid, nodes in members.items()}
408
+ deg = weighted_degree(adj)
409
+
410
+ community_summaries: list[dict] = []
411
+ reported = sorted(members, key=lambda cid: (-len(members[cid]), cid))
412
+ for cid in reported:
413
+ nodes = members[cid]
414
+ if len(nodes) < MIN_REPORTED_COMMUNITY:
415
+ continue
416
+ top = sorted(nodes, key=lambda n: (-deg.get(n, 0), str(n)))[:TOP_NODES_PER_COMMUNITY]
417
+ community_summaries.append(
418
+ {
419
+ "id": cid,
420
+ "label": community_labels[cid],
421
+ "size": len(nodes),
422
+ "top_nodes": [
423
+ {
424
+ "kind": node_index[n]["kind"],
425
+ "name": node_index[n]["name"],
426
+ "path": node_index[n].get("path"),
427
+ "degree": deg.get(n, 0),
428
+ }
429
+ for n in top
430
+ if n in node_index
431
+ ],
432
+ }
433
+ )
434
+ if len(community_summaries) >= MAX_COMMUNITIES_IN_SUMMARY:
435
+ break
436
+
437
+ gods = god_nodes(adj, communities, node_index)
438
+ surprising = surprising_connections(edge_weight, communities, node_index)
439
+ questions = suggest_questions(gods, surprising, community_labels)
440
+
441
+ return {
442
+ "node_count": len(adj),
443
+ "edge_count": sum(edge_weight.values()),
444
+ "community_count": sum(1 for nodes in members.values() if len(nodes) >= MIN_REPORTED_COMMUNITY),
445
+ "modularity": modularity(adj, communities),
446
+ "communities": community_summaries,
447
+ "god_nodes": gods,
448
+ "surprising": surprising,
449
+ "questions": questions,
450
+ }
451
+
452
+
453
+ def refresh_analysis(conn: sqlite3.Connection) -> dict:
454
+ """Compute and cache the analysis summary into meta['graph_analysis']."""
455
+ summary = analyze(conn)
456
+ repo.set_meta(conn, ANALYSIS_META_KEY, json.dumps(summary, ensure_ascii=False))
457
+ return summary
458
+
459
+
460
+ def load_analysis(conn: sqlite3.Connection) -> Optional[dict]:
461
+ """Read the cached analysis summary, or None if the build never produced one."""
462
+ raw = repo.get_meta(conn, ANALYSIS_META_KEY)
463
+ if not raw:
464
+ return None
465
+ try:
466
+ return json.loads(raw)
467
+ except (ValueError, TypeError):
468
+ return None