polycodegraph 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. codegraph/__init__.py +10 -0
  2. codegraph/analysis/__init__.py +30 -0
  3. codegraph/analysis/_common.py +125 -0
  4. codegraph/analysis/blast_radius.py +63 -0
  5. codegraph/analysis/cycles.py +79 -0
  6. codegraph/analysis/dataflow.py +861 -0
  7. codegraph/analysis/dead_code.py +165 -0
  8. codegraph/analysis/hotspots.py +68 -0
  9. codegraph/analysis/infrastructure.py +439 -0
  10. codegraph/analysis/metrics.py +52 -0
  11. codegraph/analysis/report.py +222 -0
  12. codegraph/analysis/roles.py +323 -0
  13. codegraph/analysis/untested.py +79 -0
  14. codegraph/cli.py +1506 -0
  15. codegraph/config.py +64 -0
  16. codegraph/embed/__init__.py +35 -0
  17. codegraph/embed/chunker.py +120 -0
  18. codegraph/embed/embedder.py +113 -0
  19. codegraph/embed/query.py +181 -0
  20. codegraph/embed/store.py +360 -0
  21. codegraph/graph/__init__.py +0 -0
  22. codegraph/graph/builder.py +212 -0
  23. codegraph/graph/schema.py +69 -0
  24. codegraph/graph/store_networkx.py +55 -0
  25. codegraph/graph/store_sqlite.py +249 -0
  26. codegraph/mcp_server/__init__.py +6 -0
  27. codegraph/mcp_server/server.py +933 -0
  28. codegraph/parsers/__init__.py +0 -0
  29. codegraph/parsers/base.py +70 -0
  30. codegraph/parsers/go.py +570 -0
  31. codegraph/parsers/python.py +1707 -0
  32. codegraph/parsers/typescript.py +1397 -0
  33. codegraph/py.typed +0 -0
  34. codegraph/resolve/__init__.py +4 -0
  35. codegraph/resolve/calls.py +480 -0
  36. codegraph/review/__init__.py +31 -0
  37. codegraph/review/baseline.py +32 -0
  38. codegraph/review/differ.py +211 -0
  39. codegraph/review/hook.py +70 -0
  40. codegraph/review/risk.py +219 -0
  41. codegraph/review/rules.py +342 -0
  42. codegraph/viz/__init__.py +17 -0
  43. codegraph/viz/_style.py +45 -0
  44. codegraph/viz/dashboard.py +740 -0
  45. codegraph/viz/diagrams.py +370 -0
  46. codegraph/viz/explore.py +453 -0
  47. codegraph/viz/hld.py +683 -0
  48. codegraph/viz/html.py +115 -0
  49. codegraph/viz/mermaid.py +111 -0
  50. codegraph/viz/svg.py +77 -0
  51. codegraph/web/__init__.py +4 -0
  52. codegraph/web/server.py +165 -0
  53. codegraph/web/static/app.css +664 -0
  54. codegraph/web/static/app.js +919 -0
  55. codegraph/web/static/index.html +112 -0
  56. codegraph/web/static/views/architecture.js +1671 -0
  57. codegraph/web/static/views/graph3d.css +564 -0
  58. codegraph/web/static/views/graph3d.js +999 -0
  59. codegraph/web/static/views/graph3d_transform.js +984 -0
  60. codegraph/workspace/__init__.py +34 -0
  61. codegraph/workspace/config.py +110 -0
  62. codegraph/workspace/operations.py +294 -0
  63. polycodegraph-0.1.0.dist-info/METADATA +687 -0
  64. polycodegraph-0.1.0.dist-info/RECORD +67 -0
  65. polycodegraph-0.1.0.dist-info/WHEEL +4 -0
  66. polycodegraph-0.1.0.dist-info/entry_points.txt +2 -0
  67. polycodegraph-0.1.0.dist-info/licenses/LICENSE +21 -0
codegraph/viz/hld.py ADDED
@@ -0,0 +1,683 @@
1
+ """Generic High-Level-Design view derived from any repo's package structure.
2
+
3
+ Classification strategy:
4
+
5
+ 1. Compute the longest common qualname prefix of all MODULE nodes (the
6
+ "root" package). Strip it.
7
+ 2. For each module, walk its qualname segments from rightmost to leftmost
8
+ (also splitting snake_case tokens like ``store_sqlite``). The first
9
+ token that matches a layer pattern in :data:`LAYER_CATALOG` wins.
10
+ 3. If nothing matches, the module's first non-root segment becomes its own
11
+ ad-hoc layer. This keeps the diagram useful even for domain-specific
12
+ package names (``users``, ``billing``, ...).
13
+
14
+ The catalog patterns intentionally cover the most common architectural
15
+ concepts (cli/api, pipeline, parsers, resolve, domain, storage, analysis,
16
+ visualisation, infra). Unknown subpackages get a neutral grey "other" layer.
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import re
21
+ from collections import defaultdict
22
+ from dataclasses import dataclass, field
23
+ from typing import Any, cast
24
+
25
+ import networkx as nx
26
+
27
+ from codegraph.viz._style import kind_str
28
+
29
+
30
+ @dataclass(frozen=True)
31
+ class Layer:
32
+ id: str
33
+ title: str
34
+ subtitle: str
35
+ color: str
36
+ text_color: str = "#0b1220"
37
+
38
+
39
+ @dataclass(frozen=True)
40
+ class LayerSpec:
41
+ tier: int
42
+ id: str
43
+ title: str
44
+ subtitle: str
45
+ color: str
46
+ patterns: tuple[str, ...] = field(default_factory=tuple)
47
+
48
+
49
+ # Generic catalog: the bigger the tier number, the lower the layer sits in
50
+ # the rendered top-to-bottom flowchart.
51
+ LAYER_CATALOG: list[LayerSpec] = [
52
+ LayerSpec(1, "cli", "CLI / API", "user-facing entrypoints", "#a78bfa", (
53
+ "cli", "api", "routes", "handlers", "controllers", "app", "main",
54
+ "entry", "entrypoint", "server", "rpc",
55
+ )),
56
+ LayerSpec(2, "pipeline", "Pipeline", "build / orchestration", "#fcd34d", (
57
+ "pipeline", "builder", "build", "orchestrator", "scheduler",
58
+ "jobs", "tasks", "worker", "queue", "runner",
59
+ )),
60
+ LayerSpec(3, "parsers", "Ingestion", "extractors / parsers", "#fb923c", (
61
+ "parser", "parsers", "extract", "extractor", "extractors",
62
+ "ingest", "reader", "loader", "scraper", "scrape", "import",
63
+ )),
64
+ LayerSpec(4, "resolve", "Resolution", "linking / binding", "#f472b6", (
65
+ "resolve", "resolver", "resolvers", "link", "linker", "binding",
66
+ )),
67
+ LayerSpec(5, "domain", "Domain", "core business logic", "#c084fc", (
68
+ "service", "services", "domain", "logic", "core", "engine",
69
+ "usecase", "usecases",
70
+ )),
71
+ LayerSpec(6, "storage", "Storage", "data + persistence", "#60a5fa", (
72
+ "store", "storage", "db", "database", "repo", "repository",
73
+ "persistence", "schema", "sqlite", "postgres", "mysql",
74
+ "mongo", "redis", "model", "models", "entities",
75
+ )),
76
+ LayerSpec(7, "analysis", "Analysis", "metrics / checks", "#34d399", (
77
+ "analysis", "analyze", "analyzer", "metric", "metrics",
78
+ "check", "checks", "lint", "quality", "insight", "insights",
79
+ "stats", "report", "reporting",
80
+ )),
81
+ LayerSpec(8, "viz", "Visualisation", "render / dashboards", "#22d3ee", (
82
+ "viz", "visual", "visualization", "visualisation", "render",
83
+ "renderer", "dashboard", "ui", "frontend", "web", "html",
84
+ )),
85
+ LayerSpec(9, "infra", "Infra / utils", "shared helpers", "#94a3b8", (
86
+ "util", "utils", "helper", "helpers", "common", "internal",
87
+ "tools", "misc", "support", "config", "settings", "constants",
88
+ )),
89
+ ]
90
+
91
+ # Backward-compat: the catalog used to be exposed as ``LAYERS``.
92
+ LAYERS: list[Layer] = [
93
+ Layer(s.id, s.title, s.subtitle, s.color) for s in LAYER_CATALOG
94
+ ]
95
+
96
+ _FALLBACK_TIER = 5
97
+ _FALLBACK_COLOR = "#94a3b8"
98
+
99
+
100
+ def _split_token(seg: str) -> list[str]:
101
+ return [p for p in re.split(r"[_\-]", seg.lower()) if p]
102
+
103
+
104
+ def _classify_segments(segments: list[str]) -> str | None:
105
+ """Return the catalog id matching the rightmost meaningful token, else None."""
106
+ pattern_to_id: dict[str, str] = {}
107
+ for spec in LAYER_CATALOG:
108
+ for pat in spec.patterns:
109
+ pattern_to_id.setdefault(pat, spec.id)
110
+ for seg in reversed(segments):
111
+ token = seg.lower()
112
+ if token in pattern_to_id:
113
+ return pattern_to_id[token]
114
+ for part in _split_token(seg):
115
+ if part in pattern_to_id:
116
+ return pattern_to_id[part]
117
+ return None
118
+
119
+
120
+ def _common_root(qualnames: list[str]) -> str:
121
+ if not qualnames:
122
+ return ""
123
+ split = [qn.split(".") for qn in qualnames if qn]
124
+ if not split:
125
+ return ""
126
+ common: list[str] = []
127
+ for segs in zip(*split, strict=False):
128
+ if len(set(segs)) == 1:
129
+ common.append(segs[0])
130
+ else:
131
+ break
132
+ # If the only common segment IS each module's full qualname (i.e. flat
133
+ # one-segment package set), don't strip — we'd have nothing left.
134
+ if len(common) >= max(len(s) for s in split):
135
+ common = common[:-1]
136
+ return ".".join(common)
137
+
138
+
139
+ def _is_skippable_module(qn: str, file: str) -> bool:
140
+ """Skip test modules + bare package __init__ shells from HLD."""
141
+ if _is_test_module(qn, file):
142
+ return True
143
+ f = (file or "").replace("\\", "/").lower()
144
+ return f.endswith("/__init__.py") or f == "__init__.py"
145
+
146
+
147
+ def _is_test_module(qn: str, file: str) -> bool:
148
+ if not qn:
149
+ return False
150
+ f = (file or "").replace("\\", "/").lower()
151
+ if "/tests/" in f or "/test/" in f or f.startswith("tests/") or f.startswith("test/"):
152
+ return True
153
+ segs = qn.split(".")
154
+ return any(s == "tests" or s == "test" or s.startswith("test_") for s in segs)
155
+
156
+
157
+ def _file_path_to_module_qualname(graph: nx.MultiDiGraph) -> dict[str, str]:
158
+ out: dict[str, str] = {}
159
+ for _nid, attrs in graph.nodes(data=True):
160
+ if kind_str(attrs.get("kind")) != "MODULE":
161
+ continue
162
+ f = attrs.get("file")
163
+ qn = attrs.get("qualname")
164
+ if isinstance(f, str) and isinstance(qn, str) and qn:
165
+ if _is_skippable_module(qn, f):
166
+ continue
167
+ out[f] = qn
168
+ return out
169
+
170
+
171
+ def _module_qualnames(graph: nx.MultiDiGraph) -> list[str]:
172
+ return [
173
+ str(attrs.get("qualname"))
174
+ for _nid, attrs in graph.nodes(data=True)
175
+ if kind_str(attrs.get("kind")) == "MODULE"
176
+ and attrs.get("qualname")
177
+ and not _is_skippable_module(
178
+ str(attrs.get("qualname") or ""), str(attrs.get("file") or "")
179
+ )
180
+ ]
181
+
182
+
183
+ def _layer_id_for(qn: str, root: str) -> str:
184
+ """Return the layer id for a module qualname, given the stripped root."""
185
+ if root and (qn == root or qn.startswith(root + ".")):
186
+ rest = qn[len(root) + 1:] if qn != root else ""
187
+ else:
188
+ rest = qn
189
+ segments = [s for s in rest.split(".") if s]
190
+ classified = _classify_segments(segments) if segments else None
191
+ if classified:
192
+ return classified
193
+ if segments:
194
+ return segments[0].lower()
195
+ return "main"
196
+
197
+
198
+ def derive_layers(graph: nx.MultiDiGraph) -> tuple[list[Layer], str]:
199
+ """Inspect the graph and return (ordered Layers used, root prefix)."""
200
+ qns = _module_qualnames(graph)
201
+ root = _common_root(qns)
202
+ used: dict[str, int] = defaultdict(int)
203
+ for qn in qns:
204
+ used[_layer_id_for(qn, root)] += 1
205
+
206
+ catalog_by_id = {s.id: s for s in LAYER_CATALOG}
207
+ layers: list[tuple[int, str, Layer]] = []
208
+ for lid, _count in used.items():
209
+ spec = catalog_by_id.get(lid)
210
+ if spec is not None:
211
+ layers.append((spec.tier, lid, Layer(
212
+ spec.id, spec.title, spec.subtitle, spec.color,
213
+ )))
214
+ else:
215
+ # Ad-hoc layer named after the package segment.
216
+ layers.append((_FALLBACK_TIER, lid, Layer(
217
+ lid, lid.title(), "module group", _FALLBACK_COLOR,
218
+ )))
219
+ layers.sort(key=lambda t: (t[0], t[1]))
220
+ return [lay for _t, _id, lay in layers], root
221
+
222
+
223
+ def _node_to_layer(
224
+ graph: nx.MultiDiGraph, root: str
225
+ ) -> tuple[dict[str, str], dict[str, str]]:
226
+ """Return (node_id -> layer_id, node_id -> module_qualname)."""
227
+ file_to_module_qn = _file_path_to_module_qualname(graph)
228
+ node_to_layer: dict[str, str] = {}
229
+ node_to_module_qn: dict[str, str] = {}
230
+ for nid, attrs in graph.nodes(data=True):
231
+ kind = kind_str(attrs.get("kind"))
232
+ qn = str(attrs.get("qualname") or "")
233
+ f = attrs.get("file")
234
+ if _is_skippable_module(qn, str(f or "")):
235
+ continue
236
+ if kind == "MODULE" and qn:
237
+ node_to_layer[nid] = _layer_id_for(qn, root)
238
+ node_to_module_qn[nid] = qn
239
+ continue
240
+ module_qn = file_to_module_qn.get(f) if isinstance(f, str) else None
241
+ if not module_qn and qn:
242
+ module_qn = qn.rsplit(".", 1)[0] if "." in qn else qn
243
+ if module_qn:
244
+ node_to_layer[nid] = _layer_id_for(module_qn, root)
245
+ node_to_module_qn[nid] = module_qn
246
+ return node_to_layer, node_to_module_qn
247
+
248
+
249
+ def _short_name(qn: str) -> str:
250
+ return qn.rsplit(".", 1)[-1] if qn else qn
251
+
252
+
253
+ @dataclass
254
+ class HldPayload:
255
+ layers: list[dict[str, Any]]
256
+ edges: list[dict[str, Any]]
257
+ components: dict[str, list[dict[str, Any]]]
258
+ modules: dict[str, dict[str, Any]]
259
+ mermaid_layered: str
260
+ mermaid_context: str
261
+ metrics: dict[str, int]
262
+ root: str = ""
263
+ # v0.2 cross-stack data-flow surfaces (default empty so older payloads
264
+ # remain backwards-compatible). DF1 fills routes/sql; DF2 fills fetches.
265
+ routes: list[dict[str, Any]] = field(default_factory=list)
266
+ sql_io: list[dict[str, Any]] = field(default_factory=list)
267
+ fetches: list[dict[str, Any]] = field(default_factory=list)
268
+
269
+
270
+ def serialize_route_edges(
271
+ graph: nx.MultiDiGraph,
272
+ *,
273
+ include_dataflow: bool = True,
274
+ ) -> list[dict[str, Any]]:
275
+ """Serialize ROUTE edges into the HLD payload's ``routes`` array.
276
+
277
+ One entry per ROUTE edge. Each entry carries the handler's qualname
278
+ plus the HTTP method/path/framework metadata captured at parse time.
279
+ Sorted by ``(path, method)`` for stable rendering.
280
+
281
+ When ``include_dataflow`` is True (the default), each entry also gets
282
+ a ``dataflow`` field (see
283
+ :func:`codegraph.analysis.dataflow.shape_hops_for_handler`) so the
284
+ architecture dashboard can render a full per-handler trace without an
285
+ extra API round-trip. Set to False for callers that want the legacy
286
+ shape (e.g. existing snapshots, bandwidth-sensitive clients).
287
+ """
288
+ # Local import: keeps the analysis package independent of the viz
289
+ # layer's import order and avoids cycles.
290
+ from codegraph.analysis.dataflow import shape_hops_for_handler
291
+
292
+ out: list[dict[str, Any]] = []
293
+ for src, _dst, data in graph.edges(data=True):
294
+ if kind_str(data.get("kind")) != "ROUTE":
295
+ continue
296
+ md = data.get("metadata") or {}
297
+ if not isinstance(md, dict):
298
+ md = {}
299
+ src_attrs = graph.nodes[src]
300
+ handler_qn = str(src_attrs.get("qualname") or "")
301
+ method = str(md.get("method") or "")
302
+ path = str(md.get("path") or "")
303
+ entry: dict[str, Any] = {
304
+ "handler_qn": handler_qn,
305
+ "method": method,
306
+ "path": path,
307
+ "framework": str(md.get("framework") or ""),
308
+ }
309
+ if include_dataflow:
310
+ # Pull the role off the handler node, if DF1.5 has assigned one.
311
+ role: str | None = None
312
+ node_md = src_attrs.get("metadata") or {}
313
+ if isinstance(node_md, dict):
314
+ role_val = node_md.get("role")
315
+ if role_val:
316
+ role = str(role_val)
317
+ entry["role"] = role
318
+ entry["dataflow"] = shape_hops_for_handler(
319
+ graph, handler_qn, method=method, path=path,
320
+ )
321
+ out.append(entry)
322
+ out.sort(key=lambda r: (r["path"], r["method"], r["handler_qn"]))
323
+ return out
324
+
325
+
326
+ def serialize_sql_io_edges(graph: nx.MultiDiGraph) -> list[dict[str, Any]]:
327
+ """Serialize READS_FROM / WRITES_TO edges into ``sql_io`` array.
328
+
329
+ One entry per edge with ``function_qn`` (the source) and ``model_qn``
330
+ (the resolved CLASS qualname). Unresolved edges are dropped during
331
+ resolution, so every entry here points to a real in-repo model.
332
+ """
333
+ out: list[dict[str, Any]] = []
334
+ for src, dst, data in graph.edges(data=True):
335
+ kind = kind_str(data.get("kind"))
336
+ if kind not in ("READS_FROM", "WRITES_TO"):
337
+ continue
338
+ md = data.get("metadata") or {}
339
+ if not isinstance(md, dict):
340
+ md = {}
341
+ function_qn = str(graph.nodes[src].get("qualname") or "")
342
+ model_qn = str(graph.nodes[dst].get("qualname") or "")
343
+ if not model_qn:
344
+ continue
345
+ out.append({
346
+ "function_qn": function_qn,
347
+ "model_qn": model_qn,
348
+ "operation": str(md.get("operation") or ""),
349
+ "via": str(md.get("via") or ""),
350
+ "kind": kind,
351
+ })
352
+ out.sort(key=lambda r: (r["function_qn"], r["model_qn"], r["operation"]))
353
+ return out
354
+
355
+
356
+ def serialize_fetch_edges(graph: nx.MultiDiGraph) -> list[dict[str, Any]]:
357
+ """Serialize FETCH_CALL edges into the HLD payload's `fetches` array.
358
+
359
+ Each entry surfaces the caller's qualname plus the method/url/library and
360
+ parsed body keys captured by the DF2 TypeScript extractor. Stable order
361
+ by (caller_qn, method, url) for snapshot-friendly output.
362
+ """
363
+ out: list[dict[str, Any]] = []
364
+ for src, _dst, data in graph.edges(data=True):
365
+ if kind_str(data.get("kind")) != "FETCH_CALL":
366
+ continue
367
+ src_attrs = graph.nodes[src]
368
+ caller_qn = str(src_attrs.get("qualname") or "")
369
+ if not caller_qn:
370
+ continue
371
+ md = data.get("metadata") or {}
372
+ if not isinstance(md, dict):
373
+ md = {}
374
+ entry: dict[str, Any] = {
375
+ "caller_qn": caller_qn,
376
+ "method": str(md.get("method") or ""),
377
+ "url": str(md.get("url") or ""),
378
+ "library": str(md.get("library") or ""),
379
+ "body_keys": list(md.get("body_keys") or []),
380
+ }
381
+ if "url_kind" in md:
382
+ entry["url_kind"] = str(md.get("url_kind") or "")
383
+ out.append(entry)
384
+ out.sort(key=lambda e: (e["caller_qn"], e["method"], e["url"]))
385
+ return out
386
+
387
+
388
+ def _build_modules_drilldown(
389
+ graph: nx.MultiDiGraph,
390
+ node_to_layer: dict[str, str],
391
+ node_to_module: dict[str, str],
392
+ ) -> dict[str, dict[str, Any]]:
393
+ modules: dict[str, dict[str, Any]] = {}
394
+ for nid, attrs in graph.nodes(data=True):
395
+ if kind_str(attrs.get("kind")) != "MODULE":
396
+ continue
397
+ qn = str(attrs.get("qualname") or "")
398
+ lid = node_to_layer.get(nid)
399
+ if not qn or not lid:
400
+ continue
401
+ modules.setdefault(qn, {
402
+ "qualname": qn, "name": _short_name(qn), "layer": lid,
403
+ "file": str(attrs.get("file") or ""), "symbols": [],
404
+ })
405
+
406
+ out_calls: dict[str, list[str]] = defaultdict(list)
407
+ in_calls: dict[str, list[str]] = defaultdict(list)
408
+ # Map (src_node, dst_qualname) -> edge metadata for callee_args alignment.
409
+ call_edge_meta: dict[tuple[str, str], dict[str, Any]] = {}
410
+ for src, dst, data in graph.edges(data=True):
411
+ if kind_str(data.get("kind")) != "CALLS":
412
+ continue
413
+ src_qn = graph.nodes[src].get("qualname")
414
+ dst_qn = graph.nodes[dst].get("qualname")
415
+ if dst_qn:
416
+ out_calls[src].append(str(dst_qn))
417
+ edge_md = data.get("metadata") or {}
418
+ if isinstance(edge_md, dict) and ("args" in edge_md or "kwargs" in edge_md):
419
+ call_edge_meta[(src, str(dst_qn))] = {
420
+ "args": list(edge_md.get("args") or []),
421
+ "kwargs": dict(edge_md.get("kwargs") or {}),
422
+ }
423
+ if src_qn:
424
+ in_calls[dst].append(str(src_qn))
425
+
426
+ sym_by_module: dict[str, list[dict[str, Any]]] = defaultdict(list)
427
+ for nid, attrs in graph.nodes(data=True):
428
+ kind = kind_str(attrs.get("kind"))
429
+ if kind not in ("FUNCTION", "METHOD", "CLASS"):
430
+ continue
431
+ mqn = node_to_module.get(nid)
432
+ if not mqn or mqn not in modules:
433
+ continue
434
+ sym_qn = str(attrs.get("qualname") or "")
435
+ line = attrs.get("line_start") or 0
436
+ try:
437
+ line_int = int(line)
438
+ except (TypeError, ValueError):
439
+ line_int = 0
440
+ callees_list = sorted(set(out_calls.get(nid, [])))[:14]
441
+ sym: dict[str, Any] = {
442
+ "qualname": sym_qn,
443
+ "name": _short_name(sym_qn) or str(attrs.get("name") or ""),
444
+ "kind": kind,
445
+ "line": line_int,
446
+ "fan_in": len(in_calls.get(nid, [])),
447
+ "fan_out": len(out_calls.get(nid, [])),
448
+ "callers": sorted(set(in_calls.get(nid, [])))[:14],
449
+ "callees": callees_list,
450
+ }
451
+
452
+ # DF0/DF1.5 metadata surfacing — omit when absent on the node.
453
+ node_md = attrs.get("metadata") or {}
454
+ if isinstance(node_md, dict):
455
+ if "params" in node_md:
456
+ sym["params"] = node_md["params"]
457
+ if "returns" in node_md:
458
+ sym["returns"] = node_md["returns"]
459
+ if "role" in node_md and node_md["role"] is not None:
460
+ sym["role"] = node_md["role"]
461
+
462
+ # callee_args parallel array, only when there ARE callees.
463
+ if callees_list:
464
+ callee_args: list[dict[str, Any]] = []
465
+ for cqn in callees_list:
466
+ meta = call_edge_meta.get((nid, cqn))
467
+ if meta is None:
468
+ callee_args.append({"args": [], "kwargs": {}})
469
+ else:
470
+ callee_args.append(meta)
471
+ sym["callee_args"] = callee_args
472
+
473
+ sym_by_module[mqn].append(sym)
474
+
475
+ for mqn, syms in sym_by_module.items():
476
+ syms.sort(key=lambda s: (
477
+ 0 if s["kind"] == "CLASS" else 1, -int(s["fan_in"]), s["name"]
478
+ ))
479
+ modules[mqn]["symbols"] = syms
480
+ return modules
481
+
482
+
483
+ def build_hld(graph: nx.MultiDiGraph) -> HldPayload:
484
+ layers_used, root = derive_layers(graph)
485
+ layer_order = [lay.id for lay in layers_used]
486
+ node_to_layer, node_to_module = _node_to_layer(graph, root)
487
+
488
+ components: dict[str, list[dict[str, Any]]] = defaultdict(list)
489
+ seen_modules: set[str] = set()
490
+ module_symbols: dict[str, int] = defaultdict(int)
491
+ for nid, attrs in graph.nodes(data=True):
492
+ if kind_str(attrs.get("kind")) in ("FUNCTION", "METHOD", "CLASS"):
493
+ mqn = node_to_module.get(nid)
494
+ if mqn:
495
+ module_symbols[mqn] += 1
496
+ for nid, attrs in graph.nodes(data=True):
497
+ if kind_str(attrs.get("kind")) != "MODULE":
498
+ continue
499
+ qn = str(attrs.get("qualname") or "")
500
+ lid = node_to_layer.get(nid)
501
+ if not lid or qn in seen_modules:
502
+ continue
503
+ seen_modules.add(qn)
504
+ components[lid].append({
505
+ "qualname": qn,
506
+ "name": _short_name(qn),
507
+ "file": str(attrs.get("file") or ""),
508
+ "symbols": module_symbols.get(qn, 0),
509
+ })
510
+ for lid in components:
511
+ components[lid].sort(key=lambda c: (-int(c["symbols"]), c["qualname"]))
512
+
513
+ pair_w: dict[tuple[str, str, str], int] = defaultdict(int)
514
+ for src, dst, data in graph.edges(data=True):
515
+ ek = kind_str(data.get("kind"))
516
+ if ek not in ("CALLS", "IMPORTS"):
517
+ continue
518
+ sl = node_to_layer.get(src)
519
+ dl = node_to_layer.get(dst)
520
+ if not sl or not dl or sl == dl:
521
+ continue
522
+ pair_w[(sl, dl, ek)] += 1
523
+ edges = [
524
+ {"source": s, "target": d, "kind": k, "weight": w}
525
+ for (s, d, k), w in sorted(pair_w.items(), key=lambda kv: -kv[1])
526
+ ]
527
+
528
+ mermaid_layered = _render_layered_mermaid(layers_used, components, edges)
529
+ mermaid_context = _render_context_mermaid(root)
530
+
531
+ metrics = {
532
+ "layers": sum(1 for lid in layer_order if components.get(lid)),
533
+ "components": sum(len(v) for v in components.values()),
534
+ "cross_layer_edges": len(edges),
535
+ "total_cross_layer_calls": sum(
536
+ int(cast(int, e["weight"])) for e in edges if e["kind"] == "CALLS"
537
+ ),
538
+ }
539
+ return HldPayload(
540
+ layers=[
541
+ {"id": lay.id, "title": lay.title, "subtitle": lay.subtitle,
542
+ "color": lay.color}
543
+ for lay in layers_used
544
+ ],
545
+ edges=edges,
546
+ components=dict(components),
547
+ modules=_build_modules_drilldown(graph, node_to_layer, node_to_module),
548
+ mermaid_layered=mermaid_layered,
549
+ mermaid_context=mermaid_context,
550
+ metrics=metrics,
551
+ root=root,
552
+ routes=serialize_route_edges(graph),
553
+ sql_io=serialize_sql_io_edges(graph),
554
+ fetches=serialize_fetch_edges(graph),
555
+ )
556
+
557
+
558
+ _SAFE_RE = re.compile(r"[^a-zA-Z0-9]")
559
+
560
+
561
+ def _safe_id(qn: str) -> str:
562
+ return "n_" + _SAFE_RE.sub("_", qn)[:60]
563
+
564
+
565
+ def _layer_safe(lid: str) -> str:
566
+ return f"L_{_SAFE_RE.sub('_', lid)}"
567
+
568
+
569
+ def _render_layered_mermaid(
570
+ layers_used: list[Layer],
571
+ components: dict[str, list[dict[str, Any]]],
572
+ edges: list[dict[str, Any]],
573
+ *,
574
+ max_per_layer: int = 8,
575
+ ) -> str:
576
+ lines: list[str] = ["flowchart TB"]
577
+ for lay in layers_used:
578
+ comps = components.get(lay.id, [])
579
+ if not comps:
580
+ continue
581
+ lines.append(f' subgraph {_layer_safe(lay.id)}["<b>{lay.title}</b>"]')
582
+ lines.append(" direction LR")
583
+ ranked = sorted(comps, key=lambda c: -int(c.get("symbols") or 0))
584
+ shown = ranked[:max_per_layer]
585
+ hidden = len(ranked) - len(shown)
586
+ for c in shown:
587
+ qn = c["qualname"]
588
+ label = _short_name(qn)
589
+ badge = f" · {c['symbols']}" if c["symbols"] else ""
590
+ lines.append(f' {_safe_id(qn)}["{label}{badge}"]')
591
+ if hidden > 0:
592
+ lines.append(
593
+ f' {_safe_id(lay.id + "_more")}(["+{hidden} more"])'
594
+ )
595
+ lines.append(" end")
596
+
597
+ layer_pair: dict[tuple[str, str], dict[str, int]] = defaultdict(
598
+ lambda: {"calls": 0, "imports": 0}
599
+ )
600
+ for e in edges:
601
+ bucket = layer_pair[(e["source"], e["target"])]
602
+ if e["kind"] == "CALLS":
603
+ bucket["calls"] += int(e["weight"])
604
+ else:
605
+ bucket["imports"] += int(e["weight"])
606
+
607
+ edge_styles: list[tuple[int, int]] = []
608
+ for edge_idx, ((s, d), buckets) in enumerate(sorted(layer_pair.items())):
609
+ calls = buckets["calls"]
610
+ imports = buckets["imports"]
611
+ bits = []
612
+ if calls:
613
+ bits.append(f"{calls} calls")
614
+ if imports:
615
+ bits.append(f"{imports} imports")
616
+ label = " / ".join(bits) or "uses"
617
+ lines.append(f" {_layer_safe(s)} --\"{label}\"--> {_layer_safe(d)}")
618
+ edge_styles.append((edge_idx, calls + imports))
619
+
620
+ lines.append("")
621
+ for lay in layers_used:
622
+ if components.get(lay.id):
623
+ lines.append(
624
+ f" classDef {_SAFE_RE.sub('_', lay.id)}_node "
625
+ f"fill:{lay.color},stroke:{lay.color},color:#0b1220,rx:8,ry:8"
626
+ )
627
+ for c in components.get(lay.id, []):
628
+ lines.append(
629
+ f" class {_safe_id(c['qualname'])} "
630
+ f"{_SAFE_RE.sub('_', lay.id)}_node"
631
+ )
632
+ lines.append(
633
+ f" style {_layer_safe(lay.id)} fill:transparent,"
634
+ f"stroke:{lay.color},stroke-width:2px,stroke-dasharray:0"
635
+ )
636
+
637
+ if edge_styles:
638
+ max_w = max(w for _, w in edge_styles) or 1
639
+ for idx, w in edge_styles:
640
+ thickness = 1 + round((w / max_w) * 4)
641
+ lines.append(
642
+ f" linkStyle {idx} stroke:#94a3b8,stroke-width:{thickness}px,"
643
+ "fill:none"
644
+ )
645
+
646
+ return "\n".join(lines)
647
+
648
+
649
+ def _render_context_mermaid(root: str = "") -> str:
650
+ proj = root or "your repo"
651
+ return "\n".join([
652
+ "flowchart LR",
653
+ ' user(["<b>Developer</b><br>runs codegraph"])',
654
+ f' repo[("<b>{proj}</b><br>source repository")]',
655
+ ' cli{{"<b>codegraph CLI</b><br>build · analyze · viz · serve"}}',
656
+ ' db[("<b>.codegraph/graph.db</b><br>SQLite store")]',
657
+ ' out[/"<b>.codegraph/explore/</b><br>HTML dashboard"/]',
658
+ " user -- commands --> cli",
659
+ " cli -- reads --> repo",
660
+ " cli -- writes --> db",
661
+ " cli -- writes --> out",
662
+ " user -- opens --> out",
663
+ "",
664
+ " classDef person fill:#a78bfa,stroke:#1e293b,color:#0b1220,rx:8,ry:8",
665
+ " classDef system fill:#22d3ee,stroke:#1e293b,color:#0b1220,rx:8,ry:8",
666
+ " classDef ext fill:#fcd34d,stroke:#1e293b,color:#0b1220,rx:6,ry:6",
667
+ " classDef store fill:#60a5fa,stroke:#1e293b,color:#0b1220,rx:6,ry:6",
668
+ " class user person",
669
+ " class cli system",
670
+ " class repo,out ext",
671
+ " class db store",
672
+ ])
673
+
674
+
675
+ __all__ = [
676
+ "LAYERS",
677
+ "LAYER_CATALOG",
678
+ "HldPayload",
679
+ "Layer",
680
+ "LayerSpec",
681
+ "build_hld",
682
+ "derive_layers",
683
+ ]