polycodegraph 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. codegraph/__init__.py +10 -0
  2. codegraph/analysis/__init__.py +30 -0
  3. codegraph/analysis/_common.py +125 -0
  4. codegraph/analysis/blast_radius.py +63 -0
  5. codegraph/analysis/cycles.py +79 -0
  6. codegraph/analysis/dataflow.py +861 -0
  7. codegraph/analysis/dead_code.py +165 -0
  8. codegraph/analysis/hotspots.py +68 -0
  9. codegraph/analysis/infrastructure.py +439 -0
  10. codegraph/analysis/metrics.py +52 -0
  11. codegraph/analysis/report.py +222 -0
  12. codegraph/analysis/roles.py +323 -0
  13. codegraph/analysis/untested.py +79 -0
  14. codegraph/cli.py +1506 -0
  15. codegraph/config.py +64 -0
  16. codegraph/embed/__init__.py +35 -0
  17. codegraph/embed/chunker.py +120 -0
  18. codegraph/embed/embedder.py +113 -0
  19. codegraph/embed/query.py +181 -0
  20. codegraph/embed/store.py +360 -0
  21. codegraph/graph/__init__.py +0 -0
  22. codegraph/graph/builder.py +212 -0
  23. codegraph/graph/schema.py +69 -0
  24. codegraph/graph/store_networkx.py +55 -0
  25. codegraph/graph/store_sqlite.py +249 -0
  26. codegraph/mcp_server/__init__.py +6 -0
  27. codegraph/mcp_server/server.py +933 -0
  28. codegraph/parsers/__init__.py +0 -0
  29. codegraph/parsers/base.py +70 -0
  30. codegraph/parsers/go.py +570 -0
  31. codegraph/parsers/python.py +1707 -0
  32. codegraph/parsers/typescript.py +1397 -0
  33. codegraph/py.typed +0 -0
  34. codegraph/resolve/__init__.py +4 -0
  35. codegraph/resolve/calls.py +480 -0
  36. codegraph/review/__init__.py +31 -0
  37. codegraph/review/baseline.py +32 -0
  38. codegraph/review/differ.py +211 -0
  39. codegraph/review/hook.py +70 -0
  40. codegraph/review/risk.py +219 -0
  41. codegraph/review/rules.py +342 -0
  42. codegraph/viz/__init__.py +17 -0
  43. codegraph/viz/_style.py +45 -0
  44. codegraph/viz/dashboard.py +740 -0
  45. codegraph/viz/diagrams.py +370 -0
  46. codegraph/viz/explore.py +453 -0
  47. codegraph/viz/hld.py +683 -0
  48. codegraph/viz/html.py +115 -0
  49. codegraph/viz/mermaid.py +111 -0
  50. codegraph/viz/svg.py +77 -0
  51. codegraph/web/__init__.py +4 -0
  52. codegraph/web/server.py +165 -0
  53. codegraph/web/static/app.css +664 -0
  54. codegraph/web/static/app.js +919 -0
  55. codegraph/web/static/index.html +112 -0
  56. codegraph/web/static/views/architecture.js +1671 -0
  57. codegraph/web/static/views/graph3d.css +564 -0
  58. codegraph/web/static/views/graph3d.js +999 -0
  59. codegraph/web/static/views/graph3d_transform.js +984 -0
  60. codegraph/workspace/__init__.py +34 -0
  61. codegraph/workspace/config.py +110 -0
  62. codegraph/workspace/operations.py +294 -0
  63. polycodegraph-0.1.0.dist-info/METADATA +687 -0
  64. polycodegraph-0.1.0.dist-info/RECORD +67 -0
  65. polycodegraph-0.1.0.dist-info/WHEEL +4 -0
  66. polycodegraph-0.1.0.dist-info/entry_points.txt +2 -0
  67. polycodegraph-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,370 @@
1
+ """Diagram-style visualizations: matrix, treemap, sankey, flowcharts.
2
+
3
+ These complement the node-link views in ``viz/explore.py`` with views that
4
+ actually *tell a story* about the codebase — call volume between modules
5
+ (matrix + sankey), file-size landscape (treemap), and call chains for top
6
+ entry points (Mermaid flowcharts).
7
+
8
+ All renderers in this module are pure-Python and produce small JSON blobs
9
+ that the dashboard HTML page consumes via D3 / Mermaid loaded from CDN.
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import re
15
+ from collections import Counter, defaultdict
16
+ from dataclasses import dataclass
17
+ from typing import Any, cast
18
+
19
+ import networkx as nx
20
+
21
+ from codegraph.analysis import find_hotspots
22
+ from codegraph.viz._style import kind_str
23
+
24
+ _CALLABLE_KINDS: frozenset[str] = frozenset({"FUNCTION", "METHOD"})
25
+
26
+
27
+ def _is_test_node(attrs: dict[str, Any]) -> bool:
28
+ return bool((attrs.get("metadata") or {}).get("is_test"))
29
+
30
+ _PACKAGE_RE = re.compile(r"^([^.]+)")
31
+
32
+
33
+ # ----------------------------- module helpers -----------------------------
34
+
35
+
36
+ def _module_index(graph: nx.MultiDiGraph) -> tuple[
37
+ dict[str, str], dict[str, dict[str, Any]]
38
+ ]:
39
+ """Return (node_id -> module_id, module_id -> info) for every symbol.
40
+
41
+ A *module* is a MODULE node. Symbols (CLASS / FUNCTION / METHOD) are
42
+ mapped to the module whose ``file`` matches the symbol's ``file``.
43
+ """
44
+ file_to_module: dict[str, str] = {}
45
+ module_info: dict[str, dict[str, Any]] = {}
46
+ for nid, attrs in graph.nodes(data=True):
47
+ if kind_str(attrs.get("kind")) != "MODULE":
48
+ continue
49
+ f = attrs.get("file")
50
+ if isinstance(f, str):
51
+ file_to_module[f] = nid
52
+ qn = str(attrs.get("qualname") or "")
53
+ match = _PACKAGE_RE.match(qn) if qn else None
54
+ package = match.group(1) if match else ""
55
+ module_info[nid] = {
56
+ "id": nid,
57
+ "qualname": qn,
58
+ "name": attrs.get("name") or qn or nid[:8],
59
+ "file": f or "",
60
+ "package": package,
61
+ "language": str(attrs.get("language") or ""),
62
+ "is_test": bool((attrs.get("metadata") or {}).get("is_test")),
63
+ "loc": 0,
64
+ "symbols": 0,
65
+ }
66
+
67
+ node_to_module: dict[str, str] = {}
68
+ for nid, attrs in graph.nodes(data=True):
69
+ kind = kind_str(attrs.get("kind"))
70
+ if kind == "MODULE":
71
+ node_to_module[nid] = nid
72
+ continue
73
+ f = attrs.get("file")
74
+ if isinstance(f, str) and f in file_to_module:
75
+ node_to_module[nid] = file_to_module[f]
76
+
77
+ # Approx LOC per module = max line_end of any symbol it contains.
78
+ for nid, attrs in graph.nodes(data=True):
79
+ kind = kind_str(attrs.get("kind"))
80
+ if kind not in ("FUNCTION", "METHOD", "CLASS"):
81
+ continue
82
+ mid = node_to_module.get(nid)
83
+ if mid is None or mid not in module_info:
84
+ continue
85
+ line_end = attrs.get("line_end") or attrs.get("line_start") or 0
86
+ try:
87
+ line_end_int = int(line_end)
88
+ except (TypeError, ValueError):
89
+ line_end_int = 0
90
+ if line_end_int > module_info[mid]["loc"]:
91
+ module_info[mid]["loc"] = line_end_int
92
+ module_info[mid]["symbols"] += 1
93
+
94
+ return node_to_module, module_info
95
+
96
+
97
+ # ---------------------------- dependency matrix ---------------------------
98
+
99
+
100
+ @dataclass
101
+ class MatrixData:
102
+ modules: list[dict[str, Any]]
103
+ counts: list[list[int]] # counts[i][j] = calls from modules[i] to modules[j]
104
+ max_count: int
105
+
106
+
107
+ def build_matrix(
108
+ graph: nx.MultiDiGraph, *, top_n: int = 40
109
+ ) -> MatrixData:
110
+ """Module x Module call-count matrix (cross-module CALLS only)."""
111
+ node_to_module, module_info = _module_index(graph)
112
+ pair_counts: dict[tuple[str, str], int] = defaultdict(int)
113
+ for src, dst, data in graph.edges(data=True):
114
+ if kind_str(data.get("kind")) != "CALLS":
115
+ continue
116
+ sm = node_to_module.get(src)
117
+ dm = node_to_module.get(dst)
118
+ if not sm or not dm or sm == dm:
119
+ continue
120
+ pair_counts[(sm, dm)] += 1
121
+
122
+ # Pick the top-N most active modules by total in+out call volume.
123
+ activity: Counter[str] = Counter()
124
+ for (s, d), c in pair_counts.items():
125
+ activity[s] += c
126
+ activity[d] += c
127
+ chosen = [m for m, _ in activity.most_common(top_n)]
128
+ chosen_set = set(chosen)
129
+ chosen.sort(key=lambda m: (module_info[m]["package"], module_info[m]["qualname"]))
130
+
131
+ counts = [
132
+ [pair_counts.get((a, b), 0) for b in chosen]
133
+ for a in chosen
134
+ ]
135
+ max_count = max((max(row) for row in counts), default=0)
136
+ return MatrixData(
137
+ modules=[module_info[m] for m in chosen if m in chosen_set],
138
+ counts=counts,
139
+ max_count=max_count,
140
+ )
141
+
142
+
143
+ # --------------------------------- sankey ---------------------------------
144
+
145
+
146
+ def build_sankey(
147
+ graph: nx.MultiDiGraph, *, max_links: int = 60
148
+ ) -> dict[str, Any]:
149
+ """Sankey-ready data for the heaviest cross-module call flows."""
150
+ node_to_module, module_info = _module_index(graph)
151
+ pair_counts: dict[tuple[str, str], int] = defaultdict(int)
152
+ for src, dst, data in graph.edges(data=True):
153
+ if kind_str(data.get("kind")) != "CALLS":
154
+ continue
155
+ sm = node_to_module.get(src)
156
+ dm = node_to_module.get(dst)
157
+ if not sm or not dm or sm == dm:
158
+ continue
159
+ pair_counts[(sm, dm)] += 1
160
+
161
+ top = sorted(pair_counts.items(), key=lambda kv: kv[1], reverse=True)[:max_links]
162
+ used: set[str] = set()
163
+ for (s, d), _c in top:
164
+ used.add(s)
165
+ used.add(d)
166
+ nodes = sorted(used, key=lambda m: module_info[m]["qualname"])
167
+ idx = {m: i for i, m in enumerate(nodes)}
168
+ return {
169
+ "nodes": [
170
+ {
171
+ "name": module_info[m]["name"],
172
+ "qualname": module_info[m]["qualname"],
173
+ "package": module_info[m]["package"],
174
+ }
175
+ for m in nodes
176
+ ],
177
+ "links": [
178
+ {"source": idx[s], "target": idx[d], "value": c}
179
+ for (s, d), c in top
180
+ ],
181
+ }
182
+
183
+
184
+ # ------------------------------- treemap ----------------------------------
185
+
186
+
187
+ def build_treemap(
188
+ graph: nx.MultiDiGraph,
189
+ *,
190
+ hotspot_scores: dict[str, int] | None = None,
191
+ ) -> dict[str, Any]:
192
+ """Hierarchical {package -> module -> {loc, score}} for D3 treemap."""
193
+ _node_to_module, module_info = _module_index(graph)
194
+ by_package: dict[str, list[dict[str, Any]]] = defaultdict(list)
195
+ for _mid, info in module_info.items():
196
+ if not info["loc"]:
197
+ continue
198
+ score = (hotspot_scores or {}).get(info["file"], 0)
199
+ by_package[info["package"] or "(root)"].append(
200
+ {
201
+ "name": info["qualname"] or info["name"],
202
+ "value": max(info["loc"], 1),
203
+ "symbols": info["symbols"],
204
+ "score": score,
205
+ "file": info["file"],
206
+ "is_test": info["is_test"],
207
+ }
208
+ )
209
+
210
+ def _value(item: dict[str, Any]) -> int:
211
+ v = item.get("value", 0)
212
+ return int(v) if isinstance(v, int | float) else 0
213
+
214
+ children: list[dict[str, Any]] = []
215
+ for pkg in sorted(by_package):
216
+ items: list[dict[str, Any]] = list(by_package[pkg])
217
+ items.sort(key=lambda x: -_value(x))
218
+ children.append({"name": pkg, "children": items})
219
+ return {"name": "repo", "children": children}
220
+
221
+
222
+ # ---------------------------- flow diagrams -------------------------------
223
+
224
+
225
+ def _trace_outgoing(
226
+ graph: nx.MultiDiGraph,
227
+ start: str,
228
+ *,
229
+ depth: int = 4,
230
+ max_nodes: int = 30,
231
+ ) -> nx.DiGraph:
232
+ """BFS along CALLS edges from ``start`` up to ``depth`` hops."""
233
+ seen: set[str] = {start}
234
+ frontier: list[tuple[str, int]] = [(start, 0)]
235
+ out: nx.DiGraph = nx.DiGraph()
236
+ out.add_node(start, **dict(graph.nodes[start]))
237
+ while frontier and len(seen) < max_nodes:
238
+ node, d = frontier.pop(0)
239
+ if d >= depth:
240
+ continue
241
+ for _src, dst, data in graph.out_edges(node, data=True):
242
+ if kind_str(data.get("kind")) != "CALLS":
243
+ continue
244
+ if dst not in seen:
245
+ seen.add(dst)
246
+ if dst in graph.nodes:
247
+ out.add_node(dst, **dict(graph.nodes[dst]))
248
+ frontier.append((dst, d + 1))
249
+ out.add_edge(node, dst)
250
+ if len(seen) >= max_nodes:
251
+ break
252
+ return out
253
+
254
+
255
+ def _mermaid_id(qualname: str, idx: int) -> str:
256
+ safe = re.sub(r"[^a-zA-Z0-9]", "_", qualname)[:40] or "n"
257
+ return f"n{idx}_{safe}"
258
+
259
+
260
+ def _mermaid_label(attrs: dict[str, Any]) -> str:
261
+ name = str(attrs.get("name") or attrs.get("qualname") or "?")
262
+ qn = str(attrs.get("qualname") or "")
263
+ if qn and qn != name:
264
+ # Show last two qualname segments for context.
265
+ parts = qn.split(".")
266
+ name = ".".join(parts[-2:]) if len(parts) > 1 else name
267
+ return name.replace('"', "'")[:48]
268
+
269
+
270
+ def render_flow_diagram(graph: nx.MultiDiGraph, start: str) -> str:
271
+ """Mermaid flowchart of CALLS originating from ``start``."""
272
+ sub = _trace_outgoing(graph, start)
273
+ if sub.number_of_nodes() <= 1:
274
+ return ""
275
+ ids: dict[str, str] = {}
276
+ for i, n in enumerate(sub.nodes()):
277
+ ids[n] = _mermaid_id(str(graph.nodes[n].get("qualname") or n), i)
278
+
279
+ lines: list[str] = ["flowchart LR"]
280
+ for n in sub.nodes():
281
+ attrs = dict(graph.nodes[n])
282
+ label = _mermaid_label(attrs)
283
+ kind = kind_str(attrs.get("kind"))
284
+ if kind == "METHOD":
285
+ lines.append(f' {ids[n]}(["{label}"])')
286
+ elif kind == "CLASS":
287
+ lines.append(f' {ids[n]}[["{label}"]]')
288
+ elif kind == "MODULE":
289
+ lines.append(f' {ids[n]}[/"{label}"/]')
290
+ else:
291
+ lines.append(f' {ids[n]}("{label}")')
292
+ for src, dst in sub.edges():
293
+ lines.append(f" {ids[src]} --> {ids[dst]}")
294
+ # Highlight the entry node.
295
+ lines.append(f" style {ids[start]} fill:#6366f1,stroke:#a5b4fc,color:#fff")
296
+ return "\n".join(lines)
297
+
298
+
299
+ def pick_flow_entry_points(
300
+ graph: nx.MultiDiGraph, *, limit: int = 8
301
+ ) -> list[dict[str, Any]]:
302
+ """Pick interesting flow starting points: top hotspots + high fan-out."""
303
+ candidates: dict[str, dict[str, Any]] = {}
304
+
305
+ # 1. Top hotspots (skip tests).
306
+ for h in find_hotspots(graph, limit=limit * 2):
307
+ nid = h.id
308
+ if nid not in graph.nodes:
309
+ continue
310
+ if _is_test_node(dict(graph.nodes[nid])):
311
+ continue
312
+ candidates[nid] = {
313
+ "id": nid,
314
+ "qualname": h.qualname,
315
+ "file": h.file,
316
+ "reason": f"hotspot, fan-in {h.fan_in}",
317
+ "score": h.fan_in * 3 + h.fan_out,
318
+ }
319
+
320
+ # 2. High fan-out callables (likely entry points / orchestrators).
321
+ for nid, attrs in graph.nodes(data=True):
322
+ if kind_str(attrs.get("kind")) not in _CALLABLE_KINDS:
323
+ continue
324
+ if _is_test_node(dict(attrs)):
325
+ continue
326
+ out_calls = sum(
327
+ 1 for _s, _d, data in graph.out_edges(nid, data=True)
328
+ if kind_str(data.get("kind")) == "CALLS"
329
+ )
330
+ in_calls = sum(
331
+ 1 for _s, _d, data in graph.in_edges(nid, data=True)
332
+ if kind_str(data.get("kind")) == "CALLS"
333
+ )
334
+ if out_calls < 3:
335
+ continue
336
+ if nid in candidates:
337
+ candidates[nid]["score"] = max(
338
+ cast(int, candidates[nid]["score"]), out_calls * 2 + in_calls
339
+ )
340
+ continue
341
+ candidates[nid] = {
342
+ "id": nid,
343
+ "qualname": str(attrs.get("qualname") or attrs.get("name") or nid),
344
+ "file": str(attrs.get("file") or ""),
345
+ "reason": f"fan-out {out_calls}",
346
+ "score": out_calls * 2 + in_calls,
347
+ }
348
+
349
+ ranked = sorted(
350
+ candidates.values(), key=lambda d: cast(int, d["score"]), reverse=True
351
+ )
352
+ return ranked[:limit]
353
+
354
+
355
+ # ---------------------------- json packaging -----------------------------
356
+
357
+
358
+ def to_json(obj: Any) -> str:
359
+ return json.dumps(obj, separators=(",", ":"), ensure_ascii=False)
360
+
361
+
362
+ __all__ = [
363
+ "MatrixData",
364
+ "build_matrix",
365
+ "build_sankey",
366
+ "build_treemap",
367
+ "pick_flow_entry_points",
368
+ "render_flow_diagram",
369
+ "to_json",
370
+ ]