polycodegraph 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraph/__init__.py +10 -0
- codegraph/analysis/__init__.py +30 -0
- codegraph/analysis/_common.py +125 -0
- codegraph/analysis/blast_radius.py +63 -0
- codegraph/analysis/cycles.py +79 -0
- codegraph/analysis/dataflow.py +861 -0
- codegraph/analysis/dead_code.py +165 -0
- codegraph/analysis/hotspots.py +68 -0
- codegraph/analysis/infrastructure.py +439 -0
- codegraph/analysis/metrics.py +52 -0
- codegraph/analysis/report.py +222 -0
- codegraph/analysis/roles.py +323 -0
- codegraph/analysis/untested.py +79 -0
- codegraph/cli.py +1506 -0
- codegraph/config.py +64 -0
- codegraph/embed/__init__.py +35 -0
- codegraph/embed/chunker.py +120 -0
- codegraph/embed/embedder.py +113 -0
- codegraph/embed/query.py +181 -0
- codegraph/embed/store.py +360 -0
- codegraph/graph/__init__.py +0 -0
- codegraph/graph/builder.py +212 -0
- codegraph/graph/schema.py +69 -0
- codegraph/graph/store_networkx.py +55 -0
- codegraph/graph/store_sqlite.py +249 -0
- codegraph/mcp_server/__init__.py +6 -0
- codegraph/mcp_server/server.py +933 -0
- codegraph/parsers/__init__.py +0 -0
- codegraph/parsers/base.py +70 -0
- codegraph/parsers/go.py +570 -0
- codegraph/parsers/python.py +1707 -0
- codegraph/parsers/typescript.py +1397 -0
- codegraph/py.typed +0 -0
- codegraph/resolve/__init__.py +4 -0
- codegraph/resolve/calls.py +480 -0
- codegraph/review/__init__.py +31 -0
- codegraph/review/baseline.py +32 -0
- codegraph/review/differ.py +211 -0
- codegraph/review/hook.py +70 -0
- codegraph/review/risk.py +219 -0
- codegraph/review/rules.py +342 -0
- codegraph/viz/__init__.py +17 -0
- codegraph/viz/_style.py +45 -0
- codegraph/viz/dashboard.py +740 -0
- codegraph/viz/diagrams.py +370 -0
- codegraph/viz/explore.py +453 -0
- codegraph/viz/hld.py +683 -0
- codegraph/viz/html.py +115 -0
- codegraph/viz/mermaid.py +111 -0
- codegraph/viz/svg.py +77 -0
- codegraph/web/__init__.py +4 -0
- codegraph/web/server.py +165 -0
- codegraph/web/static/app.css +664 -0
- codegraph/web/static/app.js +919 -0
- codegraph/web/static/index.html +112 -0
- codegraph/web/static/views/architecture.js +1671 -0
- codegraph/web/static/views/graph3d.css +564 -0
- codegraph/web/static/views/graph3d.js +999 -0
- codegraph/web/static/views/graph3d_transform.js +984 -0
- codegraph/workspace/__init__.py +34 -0
- codegraph/workspace/config.py +110 -0
- codegraph/workspace/operations.py +294 -0
- polycodegraph-0.1.0.dist-info/METADATA +687 -0
- polycodegraph-0.1.0.dist-info/RECORD +67 -0
- polycodegraph-0.1.0.dist-info/WHEEL +4 -0
- polycodegraph-0.1.0.dist-info/entry_points.txt +2 -0
- polycodegraph-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
"""Diagram-style visualizations: matrix, treemap, sankey, flowcharts.
|
|
2
|
+
|
|
3
|
+
These complement the node-link views in ``viz/explore.py`` with views that
|
|
4
|
+
actually *tell a story* about the codebase — call volume between modules
|
|
5
|
+
(matrix + sankey), file-size landscape (treemap), and call chains for top
|
|
6
|
+
entry points (Mermaid flowcharts).
|
|
7
|
+
|
|
8
|
+
All renderers in this module are pure-Python and produce small JSON blobs
|
|
9
|
+
that the dashboard HTML page consumes via D3 / Mermaid loaded from CDN.
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import re
|
|
15
|
+
from collections import Counter, defaultdict
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from typing import Any, cast
|
|
18
|
+
|
|
19
|
+
import networkx as nx
|
|
20
|
+
|
|
21
|
+
from codegraph.analysis import find_hotspots
|
|
22
|
+
from codegraph.viz._style import kind_str
|
|
23
|
+
|
|
24
|
+
_CALLABLE_KINDS: frozenset[str] = frozenset({"FUNCTION", "METHOD"})
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _is_test_node(attrs: dict[str, Any]) -> bool:
|
|
28
|
+
return bool((attrs.get("metadata") or {}).get("is_test"))
|
|
29
|
+
|
|
30
|
+
_PACKAGE_RE = re.compile(r"^([^.]+)")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# ----------------------------- module helpers -----------------------------
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _module_index(graph: nx.MultiDiGraph) -> tuple[
|
|
37
|
+
dict[str, str], dict[str, dict[str, Any]]
|
|
38
|
+
]:
|
|
39
|
+
"""Return (node_id -> module_id, module_id -> info) for every symbol.
|
|
40
|
+
|
|
41
|
+
A *module* is a MODULE node. Symbols (CLASS / FUNCTION / METHOD) are
|
|
42
|
+
mapped to the module whose ``file`` matches the symbol's ``file``.
|
|
43
|
+
"""
|
|
44
|
+
file_to_module: dict[str, str] = {}
|
|
45
|
+
module_info: dict[str, dict[str, Any]] = {}
|
|
46
|
+
for nid, attrs in graph.nodes(data=True):
|
|
47
|
+
if kind_str(attrs.get("kind")) != "MODULE":
|
|
48
|
+
continue
|
|
49
|
+
f = attrs.get("file")
|
|
50
|
+
if isinstance(f, str):
|
|
51
|
+
file_to_module[f] = nid
|
|
52
|
+
qn = str(attrs.get("qualname") or "")
|
|
53
|
+
match = _PACKAGE_RE.match(qn) if qn else None
|
|
54
|
+
package = match.group(1) if match else ""
|
|
55
|
+
module_info[nid] = {
|
|
56
|
+
"id": nid,
|
|
57
|
+
"qualname": qn,
|
|
58
|
+
"name": attrs.get("name") or qn or nid[:8],
|
|
59
|
+
"file": f or "",
|
|
60
|
+
"package": package,
|
|
61
|
+
"language": str(attrs.get("language") or ""),
|
|
62
|
+
"is_test": bool((attrs.get("metadata") or {}).get("is_test")),
|
|
63
|
+
"loc": 0,
|
|
64
|
+
"symbols": 0,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
node_to_module: dict[str, str] = {}
|
|
68
|
+
for nid, attrs in graph.nodes(data=True):
|
|
69
|
+
kind = kind_str(attrs.get("kind"))
|
|
70
|
+
if kind == "MODULE":
|
|
71
|
+
node_to_module[nid] = nid
|
|
72
|
+
continue
|
|
73
|
+
f = attrs.get("file")
|
|
74
|
+
if isinstance(f, str) and f in file_to_module:
|
|
75
|
+
node_to_module[nid] = file_to_module[f]
|
|
76
|
+
|
|
77
|
+
# Approx LOC per module = max line_end of any symbol it contains.
|
|
78
|
+
for nid, attrs in graph.nodes(data=True):
|
|
79
|
+
kind = kind_str(attrs.get("kind"))
|
|
80
|
+
if kind not in ("FUNCTION", "METHOD", "CLASS"):
|
|
81
|
+
continue
|
|
82
|
+
mid = node_to_module.get(nid)
|
|
83
|
+
if mid is None or mid not in module_info:
|
|
84
|
+
continue
|
|
85
|
+
line_end = attrs.get("line_end") or attrs.get("line_start") or 0
|
|
86
|
+
try:
|
|
87
|
+
line_end_int = int(line_end)
|
|
88
|
+
except (TypeError, ValueError):
|
|
89
|
+
line_end_int = 0
|
|
90
|
+
if line_end_int > module_info[mid]["loc"]:
|
|
91
|
+
module_info[mid]["loc"] = line_end_int
|
|
92
|
+
module_info[mid]["symbols"] += 1
|
|
93
|
+
|
|
94
|
+
return node_to_module, module_info
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# ---------------------------- dependency matrix ---------------------------
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@dataclass
|
|
101
|
+
class MatrixData:
|
|
102
|
+
modules: list[dict[str, Any]]
|
|
103
|
+
counts: list[list[int]] # counts[i][j] = calls from modules[i] to modules[j]
|
|
104
|
+
max_count: int
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def build_matrix(
|
|
108
|
+
graph: nx.MultiDiGraph, *, top_n: int = 40
|
|
109
|
+
) -> MatrixData:
|
|
110
|
+
"""Module x Module call-count matrix (cross-module CALLS only)."""
|
|
111
|
+
node_to_module, module_info = _module_index(graph)
|
|
112
|
+
pair_counts: dict[tuple[str, str], int] = defaultdict(int)
|
|
113
|
+
for src, dst, data in graph.edges(data=True):
|
|
114
|
+
if kind_str(data.get("kind")) != "CALLS":
|
|
115
|
+
continue
|
|
116
|
+
sm = node_to_module.get(src)
|
|
117
|
+
dm = node_to_module.get(dst)
|
|
118
|
+
if not sm or not dm or sm == dm:
|
|
119
|
+
continue
|
|
120
|
+
pair_counts[(sm, dm)] += 1
|
|
121
|
+
|
|
122
|
+
# Pick the top-N most active modules by total in+out call volume.
|
|
123
|
+
activity: Counter[str] = Counter()
|
|
124
|
+
for (s, d), c in pair_counts.items():
|
|
125
|
+
activity[s] += c
|
|
126
|
+
activity[d] += c
|
|
127
|
+
chosen = [m for m, _ in activity.most_common(top_n)]
|
|
128
|
+
chosen_set = set(chosen)
|
|
129
|
+
chosen.sort(key=lambda m: (module_info[m]["package"], module_info[m]["qualname"]))
|
|
130
|
+
|
|
131
|
+
counts = [
|
|
132
|
+
[pair_counts.get((a, b), 0) for b in chosen]
|
|
133
|
+
for a in chosen
|
|
134
|
+
]
|
|
135
|
+
max_count = max((max(row) for row in counts), default=0)
|
|
136
|
+
return MatrixData(
|
|
137
|
+
modules=[module_info[m] for m in chosen if m in chosen_set],
|
|
138
|
+
counts=counts,
|
|
139
|
+
max_count=max_count,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
# --------------------------------- sankey ---------------------------------
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def build_sankey(
|
|
147
|
+
graph: nx.MultiDiGraph, *, max_links: int = 60
|
|
148
|
+
) -> dict[str, Any]:
|
|
149
|
+
"""Sankey-ready data for the heaviest cross-module call flows."""
|
|
150
|
+
node_to_module, module_info = _module_index(graph)
|
|
151
|
+
pair_counts: dict[tuple[str, str], int] = defaultdict(int)
|
|
152
|
+
for src, dst, data in graph.edges(data=True):
|
|
153
|
+
if kind_str(data.get("kind")) != "CALLS":
|
|
154
|
+
continue
|
|
155
|
+
sm = node_to_module.get(src)
|
|
156
|
+
dm = node_to_module.get(dst)
|
|
157
|
+
if not sm or not dm or sm == dm:
|
|
158
|
+
continue
|
|
159
|
+
pair_counts[(sm, dm)] += 1
|
|
160
|
+
|
|
161
|
+
top = sorted(pair_counts.items(), key=lambda kv: kv[1], reverse=True)[:max_links]
|
|
162
|
+
used: set[str] = set()
|
|
163
|
+
for (s, d), _c in top:
|
|
164
|
+
used.add(s)
|
|
165
|
+
used.add(d)
|
|
166
|
+
nodes = sorted(used, key=lambda m: module_info[m]["qualname"])
|
|
167
|
+
idx = {m: i for i, m in enumerate(nodes)}
|
|
168
|
+
return {
|
|
169
|
+
"nodes": [
|
|
170
|
+
{
|
|
171
|
+
"name": module_info[m]["name"],
|
|
172
|
+
"qualname": module_info[m]["qualname"],
|
|
173
|
+
"package": module_info[m]["package"],
|
|
174
|
+
}
|
|
175
|
+
for m in nodes
|
|
176
|
+
],
|
|
177
|
+
"links": [
|
|
178
|
+
{"source": idx[s], "target": idx[d], "value": c}
|
|
179
|
+
for (s, d), c in top
|
|
180
|
+
],
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
# ------------------------------- treemap ----------------------------------
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def build_treemap(
|
|
188
|
+
graph: nx.MultiDiGraph,
|
|
189
|
+
*,
|
|
190
|
+
hotspot_scores: dict[str, int] | None = None,
|
|
191
|
+
) -> dict[str, Any]:
|
|
192
|
+
"""Hierarchical {package -> module -> {loc, score}} for D3 treemap."""
|
|
193
|
+
_node_to_module, module_info = _module_index(graph)
|
|
194
|
+
by_package: dict[str, list[dict[str, Any]]] = defaultdict(list)
|
|
195
|
+
for _mid, info in module_info.items():
|
|
196
|
+
if not info["loc"]:
|
|
197
|
+
continue
|
|
198
|
+
score = (hotspot_scores or {}).get(info["file"], 0)
|
|
199
|
+
by_package[info["package"] or "(root)"].append(
|
|
200
|
+
{
|
|
201
|
+
"name": info["qualname"] or info["name"],
|
|
202
|
+
"value": max(info["loc"], 1),
|
|
203
|
+
"symbols": info["symbols"],
|
|
204
|
+
"score": score,
|
|
205
|
+
"file": info["file"],
|
|
206
|
+
"is_test": info["is_test"],
|
|
207
|
+
}
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
def _value(item: dict[str, Any]) -> int:
|
|
211
|
+
v = item.get("value", 0)
|
|
212
|
+
return int(v) if isinstance(v, int | float) else 0
|
|
213
|
+
|
|
214
|
+
children: list[dict[str, Any]] = []
|
|
215
|
+
for pkg in sorted(by_package):
|
|
216
|
+
items: list[dict[str, Any]] = list(by_package[pkg])
|
|
217
|
+
items.sort(key=lambda x: -_value(x))
|
|
218
|
+
children.append({"name": pkg, "children": items})
|
|
219
|
+
return {"name": "repo", "children": children}
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
# ---------------------------- flow diagrams -------------------------------
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _trace_outgoing(
|
|
226
|
+
graph: nx.MultiDiGraph,
|
|
227
|
+
start: str,
|
|
228
|
+
*,
|
|
229
|
+
depth: int = 4,
|
|
230
|
+
max_nodes: int = 30,
|
|
231
|
+
) -> nx.DiGraph:
|
|
232
|
+
"""BFS along CALLS edges from ``start`` up to ``depth`` hops."""
|
|
233
|
+
seen: set[str] = {start}
|
|
234
|
+
frontier: list[tuple[str, int]] = [(start, 0)]
|
|
235
|
+
out: nx.DiGraph = nx.DiGraph()
|
|
236
|
+
out.add_node(start, **dict(graph.nodes[start]))
|
|
237
|
+
while frontier and len(seen) < max_nodes:
|
|
238
|
+
node, d = frontier.pop(0)
|
|
239
|
+
if d >= depth:
|
|
240
|
+
continue
|
|
241
|
+
for _src, dst, data in graph.out_edges(node, data=True):
|
|
242
|
+
if kind_str(data.get("kind")) != "CALLS":
|
|
243
|
+
continue
|
|
244
|
+
if dst not in seen:
|
|
245
|
+
seen.add(dst)
|
|
246
|
+
if dst in graph.nodes:
|
|
247
|
+
out.add_node(dst, **dict(graph.nodes[dst]))
|
|
248
|
+
frontier.append((dst, d + 1))
|
|
249
|
+
out.add_edge(node, dst)
|
|
250
|
+
if len(seen) >= max_nodes:
|
|
251
|
+
break
|
|
252
|
+
return out
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _mermaid_id(qualname: str, idx: int) -> str:
|
|
256
|
+
safe = re.sub(r"[^a-zA-Z0-9]", "_", qualname)[:40] or "n"
|
|
257
|
+
return f"n{idx}_{safe}"
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _mermaid_label(attrs: dict[str, Any]) -> str:
|
|
261
|
+
name = str(attrs.get("name") or attrs.get("qualname") or "?")
|
|
262
|
+
qn = str(attrs.get("qualname") or "")
|
|
263
|
+
if qn and qn != name:
|
|
264
|
+
# Show last two qualname segments for context.
|
|
265
|
+
parts = qn.split(".")
|
|
266
|
+
name = ".".join(parts[-2:]) if len(parts) > 1 else name
|
|
267
|
+
return name.replace('"', "'")[:48]
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def render_flow_diagram(graph: nx.MultiDiGraph, start: str) -> str:
|
|
271
|
+
"""Mermaid flowchart of CALLS originating from ``start``."""
|
|
272
|
+
sub = _trace_outgoing(graph, start)
|
|
273
|
+
if sub.number_of_nodes() <= 1:
|
|
274
|
+
return ""
|
|
275
|
+
ids: dict[str, str] = {}
|
|
276
|
+
for i, n in enumerate(sub.nodes()):
|
|
277
|
+
ids[n] = _mermaid_id(str(graph.nodes[n].get("qualname") or n), i)
|
|
278
|
+
|
|
279
|
+
lines: list[str] = ["flowchart LR"]
|
|
280
|
+
for n in sub.nodes():
|
|
281
|
+
attrs = dict(graph.nodes[n])
|
|
282
|
+
label = _mermaid_label(attrs)
|
|
283
|
+
kind = kind_str(attrs.get("kind"))
|
|
284
|
+
if kind == "METHOD":
|
|
285
|
+
lines.append(f' {ids[n]}(["{label}"])')
|
|
286
|
+
elif kind == "CLASS":
|
|
287
|
+
lines.append(f' {ids[n]}[["{label}"]]')
|
|
288
|
+
elif kind == "MODULE":
|
|
289
|
+
lines.append(f' {ids[n]}[/"{label}"/]')
|
|
290
|
+
else:
|
|
291
|
+
lines.append(f' {ids[n]}("{label}")')
|
|
292
|
+
for src, dst in sub.edges():
|
|
293
|
+
lines.append(f" {ids[src]} --> {ids[dst]}")
|
|
294
|
+
# Highlight the entry node.
|
|
295
|
+
lines.append(f" style {ids[start]} fill:#6366f1,stroke:#a5b4fc,color:#fff")
|
|
296
|
+
return "\n".join(lines)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def pick_flow_entry_points(
|
|
300
|
+
graph: nx.MultiDiGraph, *, limit: int = 8
|
|
301
|
+
) -> list[dict[str, Any]]:
|
|
302
|
+
"""Pick interesting flow starting points: top hotspots + high fan-out."""
|
|
303
|
+
candidates: dict[str, dict[str, Any]] = {}
|
|
304
|
+
|
|
305
|
+
# 1. Top hotspots (skip tests).
|
|
306
|
+
for h in find_hotspots(graph, limit=limit * 2):
|
|
307
|
+
nid = h.id
|
|
308
|
+
if nid not in graph.nodes:
|
|
309
|
+
continue
|
|
310
|
+
if _is_test_node(dict(graph.nodes[nid])):
|
|
311
|
+
continue
|
|
312
|
+
candidates[nid] = {
|
|
313
|
+
"id": nid,
|
|
314
|
+
"qualname": h.qualname,
|
|
315
|
+
"file": h.file,
|
|
316
|
+
"reason": f"hotspot, fan-in {h.fan_in}",
|
|
317
|
+
"score": h.fan_in * 3 + h.fan_out,
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
# 2. High fan-out callables (likely entry points / orchestrators).
|
|
321
|
+
for nid, attrs in graph.nodes(data=True):
|
|
322
|
+
if kind_str(attrs.get("kind")) not in _CALLABLE_KINDS:
|
|
323
|
+
continue
|
|
324
|
+
if _is_test_node(dict(attrs)):
|
|
325
|
+
continue
|
|
326
|
+
out_calls = sum(
|
|
327
|
+
1 for _s, _d, data in graph.out_edges(nid, data=True)
|
|
328
|
+
if kind_str(data.get("kind")) == "CALLS"
|
|
329
|
+
)
|
|
330
|
+
in_calls = sum(
|
|
331
|
+
1 for _s, _d, data in graph.in_edges(nid, data=True)
|
|
332
|
+
if kind_str(data.get("kind")) == "CALLS"
|
|
333
|
+
)
|
|
334
|
+
if out_calls < 3:
|
|
335
|
+
continue
|
|
336
|
+
if nid in candidates:
|
|
337
|
+
candidates[nid]["score"] = max(
|
|
338
|
+
cast(int, candidates[nid]["score"]), out_calls * 2 + in_calls
|
|
339
|
+
)
|
|
340
|
+
continue
|
|
341
|
+
candidates[nid] = {
|
|
342
|
+
"id": nid,
|
|
343
|
+
"qualname": str(attrs.get("qualname") or attrs.get("name") or nid),
|
|
344
|
+
"file": str(attrs.get("file") or ""),
|
|
345
|
+
"reason": f"fan-out {out_calls}",
|
|
346
|
+
"score": out_calls * 2 + in_calls,
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
ranked = sorted(
|
|
350
|
+
candidates.values(), key=lambda d: cast(int, d["score"]), reverse=True
|
|
351
|
+
)
|
|
352
|
+
return ranked[:limit]
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
# ---------------------------- json packaging -----------------------------
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def to_json(obj: Any) -> str:
|
|
359
|
+
return json.dumps(obj, separators=(",", ":"), ensure_ascii=False)
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
__all__ = [
|
|
363
|
+
"MatrixData",
|
|
364
|
+
"build_matrix",
|
|
365
|
+
"build_sankey",
|
|
366
|
+
"build_treemap",
|
|
367
|
+
"pick_flow_entry_points",
|
|
368
|
+
"render_flow_diagram",
|
|
369
|
+
"to_json",
|
|
370
|
+
]
|