polycodegraph 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraph/__init__.py +10 -0
- codegraph/analysis/__init__.py +30 -0
- codegraph/analysis/_common.py +125 -0
- codegraph/analysis/blast_radius.py +63 -0
- codegraph/analysis/cycles.py +79 -0
- codegraph/analysis/dataflow.py +861 -0
- codegraph/analysis/dead_code.py +165 -0
- codegraph/analysis/hotspots.py +68 -0
- codegraph/analysis/infrastructure.py +439 -0
- codegraph/analysis/metrics.py +52 -0
- codegraph/analysis/report.py +222 -0
- codegraph/analysis/roles.py +323 -0
- codegraph/analysis/untested.py +79 -0
- codegraph/cli.py +1506 -0
- codegraph/config.py +64 -0
- codegraph/embed/__init__.py +35 -0
- codegraph/embed/chunker.py +120 -0
- codegraph/embed/embedder.py +113 -0
- codegraph/embed/query.py +181 -0
- codegraph/embed/store.py +360 -0
- codegraph/graph/__init__.py +0 -0
- codegraph/graph/builder.py +212 -0
- codegraph/graph/schema.py +69 -0
- codegraph/graph/store_networkx.py +55 -0
- codegraph/graph/store_sqlite.py +249 -0
- codegraph/mcp_server/__init__.py +6 -0
- codegraph/mcp_server/server.py +933 -0
- codegraph/parsers/__init__.py +0 -0
- codegraph/parsers/base.py +70 -0
- codegraph/parsers/go.py +570 -0
- codegraph/parsers/python.py +1707 -0
- codegraph/parsers/typescript.py +1397 -0
- codegraph/py.typed +0 -0
- codegraph/resolve/__init__.py +4 -0
- codegraph/resolve/calls.py +480 -0
- codegraph/review/__init__.py +31 -0
- codegraph/review/baseline.py +32 -0
- codegraph/review/differ.py +211 -0
- codegraph/review/hook.py +70 -0
- codegraph/review/risk.py +219 -0
- codegraph/review/rules.py +342 -0
- codegraph/viz/__init__.py +17 -0
- codegraph/viz/_style.py +45 -0
- codegraph/viz/dashboard.py +740 -0
- codegraph/viz/diagrams.py +370 -0
- codegraph/viz/explore.py +453 -0
- codegraph/viz/hld.py +683 -0
- codegraph/viz/html.py +115 -0
- codegraph/viz/mermaid.py +111 -0
- codegraph/viz/svg.py +77 -0
- codegraph/web/__init__.py +4 -0
- codegraph/web/server.py +165 -0
- codegraph/web/static/app.css +664 -0
- codegraph/web/static/app.js +919 -0
- codegraph/web/static/index.html +112 -0
- codegraph/web/static/views/architecture.js +1671 -0
- codegraph/web/static/views/graph3d.css +564 -0
- codegraph/web/static/views/graph3d.js +999 -0
- codegraph/web/static/views/graph3d_transform.js +984 -0
- codegraph/workspace/__init__.py +34 -0
- codegraph/workspace/config.py +110 -0
- codegraph/workspace/operations.py +294 -0
- polycodegraph-0.1.0.dist-info/METADATA +687 -0
- polycodegraph-0.1.0.dist-info/RECORD +67 -0
- polycodegraph-0.1.0.dist-info/WHEEL +4 -0
- polycodegraph-0.1.0.dist-info/entry_points.txt +2 -0
- polycodegraph-0.1.0.dist-info/licenses/LICENSE +21 -0
codegraph/viz/hld.py
ADDED
|
@@ -0,0 +1,683 @@
|
|
|
1
|
+
"""Generic High-Level-Design view derived from any repo's package structure.
|
|
2
|
+
|
|
3
|
+
Classification strategy:
|
|
4
|
+
|
|
5
|
+
1. Compute the longest common qualname prefix of all MODULE nodes (the
|
|
6
|
+
"root" package). Strip it.
|
|
7
|
+
2. For each module, walk its qualname segments from rightmost to leftmost
|
|
8
|
+
(also splitting snake_case tokens like ``store_sqlite``). The first
|
|
9
|
+
token that matches a layer pattern in :data:`LAYER_CATALOG` wins.
|
|
10
|
+
3. If nothing matches, the module's first non-root segment becomes its own
|
|
11
|
+
ad-hoc layer. This keeps the diagram useful even for domain-specific
|
|
12
|
+
package names (``users``, ``billing``, ...).
|
|
13
|
+
|
|
14
|
+
The catalog patterns intentionally cover the most common architectural
|
|
15
|
+
concepts (cli/api, pipeline, parsers, resolve, domain, storage, analysis,
|
|
16
|
+
visualisation, infra). Unknown subpackages get a neutral grey "other" layer.
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import re
|
|
21
|
+
from collections import defaultdict
|
|
22
|
+
from dataclasses import dataclass, field
|
|
23
|
+
from typing import Any, cast
|
|
24
|
+
|
|
25
|
+
import networkx as nx
|
|
26
|
+
|
|
27
|
+
from codegraph.viz._style import kind_str
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True)
|
|
31
|
+
class Layer:
|
|
32
|
+
id: str
|
|
33
|
+
title: str
|
|
34
|
+
subtitle: str
|
|
35
|
+
color: str
|
|
36
|
+
text_color: str = "#0b1220"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass(frozen=True)
|
|
40
|
+
class LayerSpec:
|
|
41
|
+
tier: int
|
|
42
|
+
id: str
|
|
43
|
+
title: str
|
|
44
|
+
subtitle: str
|
|
45
|
+
color: str
|
|
46
|
+
patterns: tuple[str, ...] = field(default_factory=tuple)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# Generic catalog: the bigger the tier number, the lower the layer sits in
|
|
50
|
+
# the rendered top-to-bottom flowchart.
|
|
51
|
+
LAYER_CATALOG: list[LayerSpec] = [
|
|
52
|
+
LayerSpec(1, "cli", "CLI / API", "user-facing entrypoints", "#a78bfa", (
|
|
53
|
+
"cli", "api", "routes", "handlers", "controllers", "app", "main",
|
|
54
|
+
"entry", "entrypoint", "server", "rpc",
|
|
55
|
+
)),
|
|
56
|
+
LayerSpec(2, "pipeline", "Pipeline", "build / orchestration", "#fcd34d", (
|
|
57
|
+
"pipeline", "builder", "build", "orchestrator", "scheduler",
|
|
58
|
+
"jobs", "tasks", "worker", "queue", "runner",
|
|
59
|
+
)),
|
|
60
|
+
LayerSpec(3, "parsers", "Ingestion", "extractors / parsers", "#fb923c", (
|
|
61
|
+
"parser", "parsers", "extract", "extractor", "extractors",
|
|
62
|
+
"ingest", "reader", "loader", "scraper", "scrape", "import",
|
|
63
|
+
)),
|
|
64
|
+
LayerSpec(4, "resolve", "Resolution", "linking / binding", "#f472b6", (
|
|
65
|
+
"resolve", "resolver", "resolvers", "link", "linker", "binding",
|
|
66
|
+
)),
|
|
67
|
+
LayerSpec(5, "domain", "Domain", "core business logic", "#c084fc", (
|
|
68
|
+
"service", "services", "domain", "logic", "core", "engine",
|
|
69
|
+
"usecase", "usecases",
|
|
70
|
+
)),
|
|
71
|
+
LayerSpec(6, "storage", "Storage", "data + persistence", "#60a5fa", (
|
|
72
|
+
"store", "storage", "db", "database", "repo", "repository",
|
|
73
|
+
"persistence", "schema", "sqlite", "postgres", "mysql",
|
|
74
|
+
"mongo", "redis", "model", "models", "entities",
|
|
75
|
+
)),
|
|
76
|
+
LayerSpec(7, "analysis", "Analysis", "metrics / checks", "#34d399", (
|
|
77
|
+
"analysis", "analyze", "analyzer", "metric", "metrics",
|
|
78
|
+
"check", "checks", "lint", "quality", "insight", "insights",
|
|
79
|
+
"stats", "report", "reporting",
|
|
80
|
+
)),
|
|
81
|
+
LayerSpec(8, "viz", "Visualisation", "render / dashboards", "#22d3ee", (
|
|
82
|
+
"viz", "visual", "visualization", "visualisation", "render",
|
|
83
|
+
"renderer", "dashboard", "ui", "frontend", "web", "html",
|
|
84
|
+
)),
|
|
85
|
+
LayerSpec(9, "infra", "Infra / utils", "shared helpers", "#94a3b8", (
|
|
86
|
+
"util", "utils", "helper", "helpers", "common", "internal",
|
|
87
|
+
"tools", "misc", "support", "config", "settings", "constants",
|
|
88
|
+
)),
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
# Backward-compat: the catalog used to be exposed as ``LAYERS``.
|
|
92
|
+
LAYERS: list[Layer] = [
|
|
93
|
+
Layer(s.id, s.title, s.subtitle, s.color) for s in LAYER_CATALOG
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
_FALLBACK_TIER = 5
|
|
97
|
+
_FALLBACK_COLOR = "#94a3b8"
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _split_token(seg: str) -> list[str]:
|
|
101
|
+
return [p for p in re.split(r"[_\-]", seg.lower()) if p]
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _classify_segments(segments: list[str]) -> str | None:
|
|
105
|
+
"""Return the catalog id matching the rightmost meaningful token, else None."""
|
|
106
|
+
pattern_to_id: dict[str, str] = {}
|
|
107
|
+
for spec in LAYER_CATALOG:
|
|
108
|
+
for pat in spec.patterns:
|
|
109
|
+
pattern_to_id.setdefault(pat, spec.id)
|
|
110
|
+
for seg in reversed(segments):
|
|
111
|
+
token = seg.lower()
|
|
112
|
+
if token in pattern_to_id:
|
|
113
|
+
return pattern_to_id[token]
|
|
114
|
+
for part in _split_token(seg):
|
|
115
|
+
if part in pattern_to_id:
|
|
116
|
+
return pattern_to_id[part]
|
|
117
|
+
return None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _common_root(qualnames: list[str]) -> str:
|
|
121
|
+
if not qualnames:
|
|
122
|
+
return ""
|
|
123
|
+
split = [qn.split(".") for qn in qualnames if qn]
|
|
124
|
+
if not split:
|
|
125
|
+
return ""
|
|
126
|
+
common: list[str] = []
|
|
127
|
+
for segs in zip(*split, strict=False):
|
|
128
|
+
if len(set(segs)) == 1:
|
|
129
|
+
common.append(segs[0])
|
|
130
|
+
else:
|
|
131
|
+
break
|
|
132
|
+
# If the only common segment IS each module's full qualname (i.e. flat
|
|
133
|
+
# one-segment package set), don't strip — we'd have nothing left.
|
|
134
|
+
if len(common) >= max(len(s) for s in split):
|
|
135
|
+
common = common[:-1]
|
|
136
|
+
return ".".join(common)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _is_skippable_module(qn: str, file: str) -> bool:
|
|
140
|
+
"""Skip test modules + bare package __init__ shells from HLD."""
|
|
141
|
+
if _is_test_module(qn, file):
|
|
142
|
+
return True
|
|
143
|
+
f = (file or "").replace("\\", "/").lower()
|
|
144
|
+
return f.endswith("/__init__.py") or f == "__init__.py"
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _is_test_module(qn: str, file: str) -> bool:
|
|
148
|
+
if not qn:
|
|
149
|
+
return False
|
|
150
|
+
f = (file or "").replace("\\", "/").lower()
|
|
151
|
+
if "/tests/" in f or "/test/" in f or f.startswith("tests/") or f.startswith("test/"):
|
|
152
|
+
return True
|
|
153
|
+
segs = qn.split(".")
|
|
154
|
+
return any(s == "tests" or s == "test" or s.startswith("test_") for s in segs)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _file_path_to_module_qualname(graph: nx.MultiDiGraph) -> dict[str, str]:
|
|
158
|
+
out: dict[str, str] = {}
|
|
159
|
+
for _nid, attrs in graph.nodes(data=True):
|
|
160
|
+
if kind_str(attrs.get("kind")) != "MODULE":
|
|
161
|
+
continue
|
|
162
|
+
f = attrs.get("file")
|
|
163
|
+
qn = attrs.get("qualname")
|
|
164
|
+
if isinstance(f, str) and isinstance(qn, str) and qn:
|
|
165
|
+
if _is_skippable_module(qn, f):
|
|
166
|
+
continue
|
|
167
|
+
out[f] = qn
|
|
168
|
+
return out
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _module_qualnames(graph: nx.MultiDiGraph) -> list[str]:
|
|
172
|
+
return [
|
|
173
|
+
str(attrs.get("qualname"))
|
|
174
|
+
for _nid, attrs in graph.nodes(data=True)
|
|
175
|
+
if kind_str(attrs.get("kind")) == "MODULE"
|
|
176
|
+
and attrs.get("qualname")
|
|
177
|
+
and not _is_skippable_module(
|
|
178
|
+
str(attrs.get("qualname") or ""), str(attrs.get("file") or "")
|
|
179
|
+
)
|
|
180
|
+
]
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _layer_id_for(qn: str, root: str) -> str:
|
|
184
|
+
"""Return the layer id for a module qualname, given the stripped root."""
|
|
185
|
+
if root and (qn == root or qn.startswith(root + ".")):
|
|
186
|
+
rest = qn[len(root) + 1:] if qn != root else ""
|
|
187
|
+
else:
|
|
188
|
+
rest = qn
|
|
189
|
+
segments = [s for s in rest.split(".") if s]
|
|
190
|
+
classified = _classify_segments(segments) if segments else None
|
|
191
|
+
if classified:
|
|
192
|
+
return classified
|
|
193
|
+
if segments:
|
|
194
|
+
return segments[0].lower()
|
|
195
|
+
return "main"
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def derive_layers(graph: nx.MultiDiGraph) -> tuple[list[Layer], str]:
|
|
199
|
+
"""Inspect the graph and return (ordered Layers used, root prefix)."""
|
|
200
|
+
qns = _module_qualnames(graph)
|
|
201
|
+
root = _common_root(qns)
|
|
202
|
+
used: dict[str, int] = defaultdict(int)
|
|
203
|
+
for qn in qns:
|
|
204
|
+
used[_layer_id_for(qn, root)] += 1
|
|
205
|
+
|
|
206
|
+
catalog_by_id = {s.id: s for s in LAYER_CATALOG}
|
|
207
|
+
layers: list[tuple[int, str, Layer]] = []
|
|
208
|
+
for lid, _count in used.items():
|
|
209
|
+
spec = catalog_by_id.get(lid)
|
|
210
|
+
if spec is not None:
|
|
211
|
+
layers.append((spec.tier, lid, Layer(
|
|
212
|
+
spec.id, spec.title, spec.subtitle, spec.color,
|
|
213
|
+
)))
|
|
214
|
+
else:
|
|
215
|
+
# Ad-hoc layer named after the package segment.
|
|
216
|
+
layers.append((_FALLBACK_TIER, lid, Layer(
|
|
217
|
+
lid, lid.title(), "module group", _FALLBACK_COLOR,
|
|
218
|
+
)))
|
|
219
|
+
layers.sort(key=lambda t: (t[0], t[1]))
|
|
220
|
+
return [lay for _t, _id, lay in layers], root
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _node_to_layer(
|
|
224
|
+
graph: nx.MultiDiGraph, root: str
|
|
225
|
+
) -> tuple[dict[str, str], dict[str, str]]:
|
|
226
|
+
"""Return (node_id -> layer_id, node_id -> module_qualname)."""
|
|
227
|
+
file_to_module_qn = _file_path_to_module_qualname(graph)
|
|
228
|
+
node_to_layer: dict[str, str] = {}
|
|
229
|
+
node_to_module_qn: dict[str, str] = {}
|
|
230
|
+
for nid, attrs in graph.nodes(data=True):
|
|
231
|
+
kind = kind_str(attrs.get("kind"))
|
|
232
|
+
qn = str(attrs.get("qualname") or "")
|
|
233
|
+
f = attrs.get("file")
|
|
234
|
+
if _is_skippable_module(qn, str(f or "")):
|
|
235
|
+
continue
|
|
236
|
+
if kind == "MODULE" and qn:
|
|
237
|
+
node_to_layer[nid] = _layer_id_for(qn, root)
|
|
238
|
+
node_to_module_qn[nid] = qn
|
|
239
|
+
continue
|
|
240
|
+
module_qn = file_to_module_qn.get(f) if isinstance(f, str) else None
|
|
241
|
+
if not module_qn and qn:
|
|
242
|
+
module_qn = qn.rsplit(".", 1)[0] if "." in qn else qn
|
|
243
|
+
if module_qn:
|
|
244
|
+
node_to_layer[nid] = _layer_id_for(module_qn, root)
|
|
245
|
+
node_to_module_qn[nid] = module_qn
|
|
246
|
+
return node_to_layer, node_to_module_qn
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _short_name(qn: str) -> str:
|
|
250
|
+
return qn.rsplit(".", 1)[-1] if qn else qn
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
@dataclass
|
|
254
|
+
class HldPayload:
|
|
255
|
+
layers: list[dict[str, Any]]
|
|
256
|
+
edges: list[dict[str, Any]]
|
|
257
|
+
components: dict[str, list[dict[str, Any]]]
|
|
258
|
+
modules: dict[str, dict[str, Any]]
|
|
259
|
+
mermaid_layered: str
|
|
260
|
+
mermaid_context: str
|
|
261
|
+
metrics: dict[str, int]
|
|
262
|
+
root: str = ""
|
|
263
|
+
# v0.2 cross-stack data-flow surfaces (default empty so older payloads
|
|
264
|
+
# remain backwards-compatible). DF1 fills routes/sql; DF2 fills fetches.
|
|
265
|
+
routes: list[dict[str, Any]] = field(default_factory=list)
|
|
266
|
+
sql_io: list[dict[str, Any]] = field(default_factory=list)
|
|
267
|
+
fetches: list[dict[str, Any]] = field(default_factory=list)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def serialize_route_edges(
|
|
271
|
+
graph: nx.MultiDiGraph,
|
|
272
|
+
*,
|
|
273
|
+
include_dataflow: bool = True,
|
|
274
|
+
) -> list[dict[str, Any]]:
|
|
275
|
+
"""Serialize ROUTE edges into the HLD payload's ``routes`` array.
|
|
276
|
+
|
|
277
|
+
One entry per ROUTE edge. Each entry carries the handler's qualname
|
|
278
|
+
plus the HTTP method/path/framework metadata captured at parse time.
|
|
279
|
+
Sorted by ``(path, method)`` for stable rendering.
|
|
280
|
+
|
|
281
|
+
When ``include_dataflow`` is True (the default), each entry also gets
|
|
282
|
+
a ``dataflow`` field (see
|
|
283
|
+
:func:`codegraph.analysis.dataflow.shape_hops_for_handler`) so the
|
|
284
|
+
architecture dashboard can render a full per-handler trace without an
|
|
285
|
+
extra API round-trip. Set to False for callers that want the legacy
|
|
286
|
+
shape (e.g. existing snapshots, bandwidth-sensitive clients).
|
|
287
|
+
"""
|
|
288
|
+
# Local import: keeps the analysis package independent of the viz
|
|
289
|
+
# layer's import order and avoids cycles.
|
|
290
|
+
from codegraph.analysis.dataflow import shape_hops_for_handler
|
|
291
|
+
|
|
292
|
+
out: list[dict[str, Any]] = []
|
|
293
|
+
for src, _dst, data in graph.edges(data=True):
|
|
294
|
+
if kind_str(data.get("kind")) != "ROUTE":
|
|
295
|
+
continue
|
|
296
|
+
md = data.get("metadata") or {}
|
|
297
|
+
if not isinstance(md, dict):
|
|
298
|
+
md = {}
|
|
299
|
+
src_attrs = graph.nodes[src]
|
|
300
|
+
handler_qn = str(src_attrs.get("qualname") or "")
|
|
301
|
+
method = str(md.get("method") or "")
|
|
302
|
+
path = str(md.get("path") or "")
|
|
303
|
+
entry: dict[str, Any] = {
|
|
304
|
+
"handler_qn": handler_qn,
|
|
305
|
+
"method": method,
|
|
306
|
+
"path": path,
|
|
307
|
+
"framework": str(md.get("framework") or ""),
|
|
308
|
+
}
|
|
309
|
+
if include_dataflow:
|
|
310
|
+
# Pull the role off the handler node, if DF1.5 has assigned one.
|
|
311
|
+
role: str | None = None
|
|
312
|
+
node_md = src_attrs.get("metadata") or {}
|
|
313
|
+
if isinstance(node_md, dict):
|
|
314
|
+
role_val = node_md.get("role")
|
|
315
|
+
if role_val:
|
|
316
|
+
role = str(role_val)
|
|
317
|
+
entry["role"] = role
|
|
318
|
+
entry["dataflow"] = shape_hops_for_handler(
|
|
319
|
+
graph, handler_qn, method=method, path=path,
|
|
320
|
+
)
|
|
321
|
+
out.append(entry)
|
|
322
|
+
out.sort(key=lambda r: (r["path"], r["method"], r["handler_qn"]))
|
|
323
|
+
return out
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def serialize_sql_io_edges(graph: nx.MultiDiGraph) -> list[dict[str, Any]]:
|
|
327
|
+
"""Serialize READS_FROM / WRITES_TO edges into ``sql_io`` array.
|
|
328
|
+
|
|
329
|
+
One entry per edge with ``function_qn`` (the source) and ``model_qn``
|
|
330
|
+
(the resolved CLASS qualname). Unresolved edges are dropped during
|
|
331
|
+
resolution, so every entry here points to a real in-repo model.
|
|
332
|
+
"""
|
|
333
|
+
out: list[dict[str, Any]] = []
|
|
334
|
+
for src, dst, data in graph.edges(data=True):
|
|
335
|
+
kind = kind_str(data.get("kind"))
|
|
336
|
+
if kind not in ("READS_FROM", "WRITES_TO"):
|
|
337
|
+
continue
|
|
338
|
+
md = data.get("metadata") or {}
|
|
339
|
+
if not isinstance(md, dict):
|
|
340
|
+
md = {}
|
|
341
|
+
function_qn = str(graph.nodes[src].get("qualname") or "")
|
|
342
|
+
model_qn = str(graph.nodes[dst].get("qualname") or "")
|
|
343
|
+
if not model_qn:
|
|
344
|
+
continue
|
|
345
|
+
out.append({
|
|
346
|
+
"function_qn": function_qn,
|
|
347
|
+
"model_qn": model_qn,
|
|
348
|
+
"operation": str(md.get("operation") or ""),
|
|
349
|
+
"via": str(md.get("via") or ""),
|
|
350
|
+
"kind": kind,
|
|
351
|
+
})
|
|
352
|
+
out.sort(key=lambda r: (r["function_qn"], r["model_qn"], r["operation"]))
|
|
353
|
+
return out
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def serialize_fetch_edges(graph: nx.MultiDiGraph) -> list[dict[str, Any]]:
|
|
357
|
+
"""Serialize FETCH_CALL edges into the HLD payload's `fetches` array.
|
|
358
|
+
|
|
359
|
+
Each entry surfaces the caller's qualname plus the method/url/library and
|
|
360
|
+
parsed body keys captured by the DF2 TypeScript extractor. Stable order
|
|
361
|
+
by (caller_qn, method, url) for snapshot-friendly output.
|
|
362
|
+
"""
|
|
363
|
+
out: list[dict[str, Any]] = []
|
|
364
|
+
for src, _dst, data in graph.edges(data=True):
|
|
365
|
+
if kind_str(data.get("kind")) != "FETCH_CALL":
|
|
366
|
+
continue
|
|
367
|
+
src_attrs = graph.nodes[src]
|
|
368
|
+
caller_qn = str(src_attrs.get("qualname") or "")
|
|
369
|
+
if not caller_qn:
|
|
370
|
+
continue
|
|
371
|
+
md = data.get("metadata") or {}
|
|
372
|
+
if not isinstance(md, dict):
|
|
373
|
+
md = {}
|
|
374
|
+
entry: dict[str, Any] = {
|
|
375
|
+
"caller_qn": caller_qn,
|
|
376
|
+
"method": str(md.get("method") or ""),
|
|
377
|
+
"url": str(md.get("url") or ""),
|
|
378
|
+
"library": str(md.get("library") or ""),
|
|
379
|
+
"body_keys": list(md.get("body_keys") or []),
|
|
380
|
+
}
|
|
381
|
+
if "url_kind" in md:
|
|
382
|
+
entry["url_kind"] = str(md.get("url_kind") or "")
|
|
383
|
+
out.append(entry)
|
|
384
|
+
out.sort(key=lambda e: (e["caller_qn"], e["method"], e["url"]))
|
|
385
|
+
return out
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def _build_modules_drilldown(
|
|
389
|
+
graph: nx.MultiDiGraph,
|
|
390
|
+
node_to_layer: dict[str, str],
|
|
391
|
+
node_to_module: dict[str, str],
|
|
392
|
+
) -> dict[str, dict[str, Any]]:
|
|
393
|
+
modules: dict[str, dict[str, Any]] = {}
|
|
394
|
+
for nid, attrs in graph.nodes(data=True):
|
|
395
|
+
if kind_str(attrs.get("kind")) != "MODULE":
|
|
396
|
+
continue
|
|
397
|
+
qn = str(attrs.get("qualname") or "")
|
|
398
|
+
lid = node_to_layer.get(nid)
|
|
399
|
+
if not qn or not lid:
|
|
400
|
+
continue
|
|
401
|
+
modules.setdefault(qn, {
|
|
402
|
+
"qualname": qn, "name": _short_name(qn), "layer": lid,
|
|
403
|
+
"file": str(attrs.get("file") or ""), "symbols": [],
|
|
404
|
+
})
|
|
405
|
+
|
|
406
|
+
out_calls: dict[str, list[str]] = defaultdict(list)
|
|
407
|
+
in_calls: dict[str, list[str]] = defaultdict(list)
|
|
408
|
+
# Map (src_node, dst_qualname) -> edge metadata for callee_args alignment.
|
|
409
|
+
call_edge_meta: dict[tuple[str, str], dict[str, Any]] = {}
|
|
410
|
+
for src, dst, data in graph.edges(data=True):
|
|
411
|
+
if kind_str(data.get("kind")) != "CALLS":
|
|
412
|
+
continue
|
|
413
|
+
src_qn = graph.nodes[src].get("qualname")
|
|
414
|
+
dst_qn = graph.nodes[dst].get("qualname")
|
|
415
|
+
if dst_qn:
|
|
416
|
+
out_calls[src].append(str(dst_qn))
|
|
417
|
+
edge_md = data.get("metadata") or {}
|
|
418
|
+
if isinstance(edge_md, dict) and ("args" in edge_md or "kwargs" in edge_md):
|
|
419
|
+
call_edge_meta[(src, str(dst_qn))] = {
|
|
420
|
+
"args": list(edge_md.get("args") or []),
|
|
421
|
+
"kwargs": dict(edge_md.get("kwargs") or {}),
|
|
422
|
+
}
|
|
423
|
+
if src_qn:
|
|
424
|
+
in_calls[dst].append(str(src_qn))
|
|
425
|
+
|
|
426
|
+
sym_by_module: dict[str, list[dict[str, Any]]] = defaultdict(list)
|
|
427
|
+
for nid, attrs in graph.nodes(data=True):
|
|
428
|
+
kind = kind_str(attrs.get("kind"))
|
|
429
|
+
if kind not in ("FUNCTION", "METHOD", "CLASS"):
|
|
430
|
+
continue
|
|
431
|
+
mqn = node_to_module.get(nid)
|
|
432
|
+
if not mqn or mqn not in modules:
|
|
433
|
+
continue
|
|
434
|
+
sym_qn = str(attrs.get("qualname") or "")
|
|
435
|
+
line = attrs.get("line_start") or 0
|
|
436
|
+
try:
|
|
437
|
+
line_int = int(line)
|
|
438
|
+
except (TypeError, ValueError):
|
|
439
|
+
line_int = 0
|
|
440
|
+
callees_list = sorted(set(out_calls.get(nid, [])))[:14]
|
|
441
|
+
sym: dict[str, Any] = {
|
|
442
|
+
"qualname": sym_qn,
|
|
443
|
+
"name": _short_name(sym_qn) or str(attrs.get("name") or ""),
|
|
444
|
+
"kind": kind,
|
|
445
|
+
"line": line_int,
|
|
446
|
+
"fan_in": len(in_calls.get(nid, [])),
|
|
447
|
+
"fan_out": len(out_calls.get(nid, [])),
|
|
448
|
+
"callers": sorted(set(in_calls.get(nid, [])))[:14],
|
|
449
|
+
"callees": callees_list,
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
# DF0/DF1.5 metadata surfacing — omit when absent on the node.
|
|
453
|
+
node_md = attrs.get("metadata") or {}
|
|
454
|
+
if isinstance(node_md, dict):
|
|
455
|
+
if "params" in node_md:
|
|
456
|
+
sym["params"] = node_md["params"]
|
|
457
|
+
if "returns" in node_md:
|
|
458
|
+
sym["returns"] = node_md["returns"]
|
|
459
|
+
if "role" in node_md and node_md["role"] is not None:
|
|
460
|
+
sym["role"] = node_md["role"]
|
|
461
|
+
|
|
462
|
+
# callee_args parallel array, only when there ARE callees.
|
|
463
|
+
if callees_list:
|
|
464
|
+
callee_args: list[dict[str, Any]] = []
|
|
465
|
+
for cqn in callees_list:
|
|
466
|
+
meta = call_edge_meta.get((nid, cqn))
|
|
467
|
+
if meta is None:
|
|
468
|
+
callee_args.append({"args": [], "kwargs": {}})
|
|
469
|
+
else:
|
|
470
|
+
callee_args.append(meta)
|
|
471
|
+
sym["callee_args"] = callee_args
|
|
472
|
+
|
|
473
|
+
sym_by_module[mqn].append(sym)
|
|
474
|
+
|
|
475
|
+
for mqn, syms in sym_by_module.items():
|
|
476
|
+
syms.sort(key=lambda s: (
|
|
477
|
+
0 if s["kind"] == "CLASS" else 1, -int(s["fan_in"]), s["name"]
|
|
478
|
+
))
|
|
479
|
+
modules[mqn]["symbols"] = syms
|
|
480
|
+
return modules
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def build_hld(graph: nx.MultiDiGraph) -> HldPayload:
|
|
484
|
+
layers_used, root = derive_layers(graph)
|
|
485
|
+
layer_order = [lay.id for lay in layers_used]
|
|
486
|
+
node_to_layer, node_to_module = _node_to_layer(graph, root)
|
|
487
|
+
|
|
488
|
+
components: dict[str, list[dict[str, Any]]] = defaultdict(list)
|
|
489
|
+
seen_modules: set[str] = set()
|
|
490
|
+
module_symbols: dict[str, int] = defaultdict(int)
|
|
491
|
+
for nid, attrs in graph.nodes(data=True):
|
|
492
|
+
if kind_str(attrs.get("kind")) in ("FUNCTION", "METHOD", "CLASS"):
|
|
493
|
+
mqn = node_to_module.get(nid)
|
|
494
|
+
if mqn:
|
|
495
|
+
module_symbols[mqn] += 1
|
|
496
|
+
for nid, attrs in graph.nodes(data=True):
|
|
497
|
+
if kind_str(attrs.get("kind")) != "MODULE":
|
|
498
|
+
continue
|
|
499
|
+
qn = str(attrs.get("qualname") or "")
|
|
500
|
+
lid = node_to_layer.get(nid)
|
|
501
|
+
if not lid or qn in seen_modules:
|
|
502
|
+
continue
|
|
503
|
+
seen_modules.add(qn)
|
|
504
|
+
components[lid].append({
|
|
505
|
+
"qualname": qn,
|
|
506
|
+
"name": _short_name(qn),
|
|
507
|
+
"file": str(attrs.get("file") or ""),
|
|
508
|
+
"symbols": module_symbols.get(qn, 0),
|
|
509
|
+
})
|
|
510
|
+
for lid in components:
|
|
511
|
+
components[lid].sort(key=lambda c: (-int(c["symbols"]), c["qualname"]))
|
|
512
|
+
|
|
513
|
+
pair_w: dict[tuple[str, str, str], int] = defaultdict(int)
|
|
514
|
+
for src, dst, data in graph.edges(data=True):
|
|
515
|
+
ek = kind_str(data.get("kind"))
|
|
516
|
+
if ek not in ("CALLS", "IMPORTS"):
|
|
517
|
+
continue
|
|
518
|
+
sl = node_to_layer.get(src)
|
|
519
|
+
dl = node_to_layer.get(dst)
|
|
520
|
+
if not sl or not dl or sl == dl:
|
|
521
|
+
continue
|
|
522
|
+
pair_w[(sl, dl, ek)] += 1
|
|
523
|
+
edges = [
|
|
524
|
+
{"source": s, "target": d, "kind": k, "weight": w}
|
|
525
|
+
for (s, d, k), w in sorted(pair_w.items(), key=lambda kv: -kv[1])
|
|
526
|
+
]
|
|
527
|
+
|
|
528
|
+
mermaid_layered = _render_layered_mermaid(layers_used, components, edges)
|
|
529
|
+
mermaid_context = _render_context_mermaid(root)
|
|
530
|
+
|
|
531
|
+
metrics = {
|
|
532
|
+
"layers": sum(1 for lid in layer_order if components.get(lid)),
|
|
533
|
+
"components": sum(len(v) for v in components.values()),
|
|
534
|
+
"cross_layer_edges": len(edges),
|
|
535
|
+
"total_cross_layer_calls": sum(
|
|
536
|
+
int(cast(int, e["weight"])) for e in edges if e["kind"] == "CALLS"
|
|
537
|
+
),
|
|
538
|
+
}
|
|
539
|
+
return HldPayload(
|
|
540
|
+
layers=[
|
|
541
|
+
{"id": lay.id, "title": lay.title, "subtitle": lay.subtitle,
|
|
542
|
+
"color": lay.color}
|
|
543
|
+
for lay in layers_used
|
|
544
|
+
],
|
|
545
|
+
edges=edges,
|
|
546
|
+
components=dict(components),
|
|
547
|
+
modules=_build_modules_drilldown(graph, node_to_layer, node_to_module),
|
|
548
|
+
mermaid_layered=mermaid_layered,
|
|
549
|
+
mermaid_context=mermaid_context,
|
|
550
|
+
metrics=metrics,
|
|
551
|
+
root=root,
|
|
552
|
+
routes=serialize_route_edges(graph),
|
|
553
|
+
sql_io=serialize_sql_io_edges(graph),
|
|
554
|
+
fetches=serialize_fetch_edges(graph),
|
|
555
|
+
)
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
_SAFE_RE = re.compile(r"[^a-zA-Z0-9]")
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
def _safe_id(qn: str) -> str:
|
|
562
|
+
return "n_" + _SAFE_RE.sub("_", qn)[:60]
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
def _layer_safe(lid: str) -> str:
|
|
566
|
+
return f"L_{_SAFE_RE.sub('_', lid)}"
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
def _render_layered_mermaid(
|
|
570
|
+
layers_used: list[Layer],
|
|
571
|
+
components: dict[str, list[dict[str, Any]]],
|
|
572
|
+
edges: list[dict[str, Any]],
|
|
573
|
+
*,
|
|
574
|
+
max_per_layer: int = 8,
|
|
575
|
+
) -> str:
|
|
576
|
+
lines: list[str] = ["flowchart TB"]
|
|
577
|
+
for lay in layers_used:
|
|
578
|
+
comps = components.get(lay.id, [])
|
|
579
|
+
if not comps:
|
|
580
|
+
continue
|
|
581
|
+
lines.append(f' subgraph {_layer_safe(lay.id)}["<b>{lay.title}</b>"]')
|
|
582
|
+
lines.append(" direction LR")
|
|
583
|
+
ranked = sorted(comps, key=lambda c: -int(c.get("symbols") or 0))
|
|
584
|
+
shown = ranked[:max_per_layer]
|
|
585
|
+
hidden = len(ranked) - len(shown)
|
|
586
|
+
for c in shown:
|
|
587
|
+
qn = c["qualname"]
|
|
588
|
+
label = _short_name(qn)
|
|
589
|
+
badge = f" · {c['symbols']}" if c["symbols"] else ""
|
|
590
|
+
lines.append(f' {_safe_id(qn)}["{label}{badge}"]')
|
|
591
|
+
if hidden > 0:
|
|
592
|
+
lines.append(
|
|
593
|
+
f' {_safe_id(lay.id + "_more")}(["+{hidden} more"])'
|
|
594
|
+
)
|
|
595
|
+
lines.append(" end")
|
|
596
|
+
|
|
597
|
+
layer_pair: dict[tuple[str, str], dict[str, int]] = defaultdict(
|
|
598
|
+
lambda: {"calls": 0, "imports": 0}
|
|
599
|
+
)
|
|
600
|
+
for e in edges:
|
|
601
|
+
bucket = layer_pair[(e["source"], e["target"])]
|
|
602
|
+
if e["kind"] == "CALLS":
|
|
603
|
+
bucket["calls"] += int(e["weight"])
|
|
604
|
+
else:
|
|
605
|
+
bucket["imports"] += int(e["weight"])
|
|
606
|
+
|
|
607
|
+
edge_styles: list[tuple[int, int]] = []
|
|
608
|
+
for edge_idx, ((s, d), buckets) in enumerate(sorted(layer_pair.items())):
|
|
609
|
+
calls = buckets["calls"]
|
|
610
|
+
imports = buckets["imports"]
|
|
611
|
+
bits = []
|
|
612
|
+
if calls:
|
|
613
|
+
bits.append(f"{calls} calls")
|
|
614
|
+
if imports:
|
|
615
|
+
bits.append(f"{imports} imports")
|
|
616
|
+
label = " / ".join(bits) or "uses"
|
|
617
|
+
lines.append(f" {_layer_safe(s)} --\"{label}\"--> {_layer_safe(d)}")
|
|
618
|
+
edge_styles.append((edge_idx, calls + imports))
|
|
619
|
+
|
|
620
|
+
lines.append("")
|
|
621
|
+
for lay in layers_used:
|
|
622
|
+
if components.get(lay.id):
|
|
623
|
+
lines.append(
|
|
624
|
+
f" classDef {_SAFE_RE.sub('_', lay.id)}_node "
|
|
625
|
+
f"fill:{lay.color},stroke:{lay.color},color:#0b1220,rx:8,ry:8"
|
|
626
|
+
)
|
|
627
|
+
for c in components.get(lay.id, []):
|
|
628
|
+
lines.append(
|
|
629
|
+
f" class {_safe_id(c['qualname'])} "
|
|
630
|
+
f"{_SAFE_RE.sub('_', lay.id)}_node"
|
|
631
|
+
)
|
|
632
|
+
lines.append(
|
|
633
|
+
f" style {_layer_safe(lay.id)} fill:transparent,"
|
|
634
|
+
f"stroke:{lay.color},stroke-width:2px,stroke-dasharray:0"
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
if edge_styles:
|
|
638
|
+
max_w = max(w for _, w in edge_styles) or 1
|
|
639
|
+
for idx, w in edge_styles:
|
|
640
|
+
thickness = 1 + round((w / max_w) * 4)
|
|
641
|
+
lines.append(
|
|
642
|
+
f" linkStyle {idx} stroke:#94a3b8,stroke-width:{thickness}px,"
|
|
643
|
+
"fill:none"
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
return "\n".join(lines)
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
def _render_context_mermaid(root: str = "") -> str:
|
|
650
|
+
proj = root or "your repo"
|
|
651
|
+
return "\n".join([
|
|
652
|
+
"flowchart LR",
|
|
653
|
+
' user(["<b>Developer</b><br>runs codegraph"])',
|
|
654
|
+
f' repo[("<b>{proj}</b><br>source repository")]',
|
|
655
|
+
' cli{{"<b>codegraph CLI</b><br>build · analyze · viz · serve"}}',
|
|
656
|
+
' db[("<b>.codegraph/graph.db</b><br>SQLite store")]',
|
|
657
|
+
' out[/"<b>.codegraph/explore/</b><br>HTML dashboard"/]',
|
|
658
|
+
" user -- commands --> cli",
|
|
659
|
+
" cli -- reads --> repo",
|
|
660
|
+
" cli -- writes --> db",
|
|
661
|
+
" cli -- writes --> out",
|
|
662
|
+
" user -- opens --> out",
|
|
663
|
+
"",
|
|
664
|
+
" classDef person fill:#a78bfa,stroke:#1e293b,color:#0b1220,rx:8,ry:8",
|
|
665
|
+
" classDef system fill:#22d3ee,stroke:#1e293b,color:#0b1220,rx:8,ry:8",
|
|
666
|
+
" classDef ext fill:#fcd34d,stroke:#1e293b,color:#0b1220,rx:6,ry:6",
|
|
667
|
+
" classDef store fill:#60a5fa,stroke:#1e293b,color:#0b1220,rx:6,ry:6",
|
|
668
|
+
" class user person",
|
|
669
|
+
" class cli system",
|
|
670
|
+
" class repo,out ext",
|
|
671
|
+
" class db store",
|
|
672
|
+
])
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
__all__ = [
|
|
676
|
+
"LAYERS",
|
|
677
|
+
"LAYER_CATALOG",
|
|
678
|
+
"HldPayload",
|
|
679
|
+
"Layer",
|
|
680
|
+
"LayerSpec",
|
|
681
|
+
"build_hld",
|
|
682
|
+
"derive_layers",
|
|
683
|
+
]
|