polycodegraph 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. codegraph/__init__.py +10 -0
  2. codegraph/analysis/__init__.py +30 -0
  3. codegraph/analysis/_common.py +125 -0
  4. codegraph/analysis/blast_radius.py +63 -0
  5. codegraph/analysis/cycles.py +79 -0
  6. codegraph/analysis/dataflow.py +861 -0
  7. codegraph/analysis/dead_code.py +165 -0
  8. codegraph/analysis/hotspots.py +68 -0
  9. codegraph/analysis/infrastructure.py +439 -0
  10. codegraph/analysis/metrics.py +52 -0
  11. codegraph/analysis/report.py +222 -0
  12. codegraph/analysis/roles.py +323 -0
  13. codegraph/analysis/untested.py +79 -0
  14. codegraph/cli.py +1506 -0
  15. codegraph/config.py +64 -0
  16. codegraph/embed/__init__.py +35 -0
  17. codegraph/embed/chunker.py +120 -0
  18. codegraph/embed/embedder.py +113 -0
  19. codegraph/embed/query.py +181 -0
  20. codegraph/embed/store.py +360 -0
  21. codegraph/graph/__init__.py +0 -0
  22. codegraph/graph/builder.py +212 -0
  23. codegraph/graph/schema.py +69 -0
  24. codegraph/graph/store_networkx.py +55 -0
  25. codegraph/graph/store_sqlite.py +249 -0
  26. codegraph/mcp_server/__init__.py +6 -0
  27. codegraph/mcp_server/server.py +933 -0
  28. codegraph/parsers/__init__.py +0 -0
  29. codegraph/parsers/base.py +70 -0
  30. codegraph/parsers/go.py +570 -0
  31. codegraph/parsers/python.py +1707 -0
  32. codegraph/parsers/typescript.py +1397 -0
  33. codegraph/py.typed +0 -0
  34. codegraph/resolve/__init__.py +4 -0
  35. codegraph/resolve/calls.py +480 -0
  36. codegraph/review/__init__.py +31 -0
  37. codegraph/review/baseline.py +32 -0
  38. codegraph/review/differ.py +211 -0
  39. codegraph/review/hook.py +70 -0
  40. codegraph/review/risk.py +219 -0
  41. codegraph/review/rules.py +342 -0
  42. codegraph/viz/__init__.py +17 -0
  43. codegraph/viz/_style.py +45 -0
  44. codegraph/viz/dashboard.py +740 -0
  45. codegraph/viz/diagrams.py +370 -0
  46. codegraph/viz/explore.py +453 -0
  47. codegraph/viz/hld.py +683 -0
  48. codegraph/viz/html.py +115 -0
  49. codegraph/viz/mermaid.py +111 -0
  50. codegraph/viz/svg.py +77 -0
  51. codegraph/web/__init__.py +4 -0
  52. codegraph/web/server.py +165 -0
  53. codegraph/web/static/app.css +664 -0
  54. codegraph/web/static/app.js +919 -0
  55. codegraph/web/static/index.html +112 -0
  56. codegraph/web/static/views/architecture.js +1671 -0
  57. codegraph/web/static/views/graph3d.css +564 -0
  58. codegraph/web/static/views/graph3d.js +999 -0
  59. codegraph/web/static/views/graph3d_transform.js +984 -0
  60. codegraph/workspace/__init__.py +34 -0
  61. codegraph/workspace/config.py +110 -0
  62. codegraph/workspace/operations.py +294 -0
  63. polycodegraph-0.1.0.dist-info/METADATA +687 -0
  64. polycodegraph-0.1.0.dist-info/RECORD +67 -0
  65. polycodegraph-0.1.0.dist-info/WHEEL +4 -0
  66. polycodegraph-0.1.0.dist-info/entry_points.txt +2 -0
  67. polycodegraph-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,861 @@
1
+ """End-to-end data-flow tracing across the structural / behavioural / dataflow
2
+ graph layers.
3
+
4
+ This module exposes two complementary functions:
5
+
6
+ * :func:`match_route` — given a frontend ``FETCH_CALL`` URL + method, find the
7
+ qualname of the backend handler whose ``ROUTE`` edge matches.
8
+
9
+ * :func:`trace` — given an entry symbol (function qualname or ``url:METHOD path``
10
+ shape), walk the call graph + cross-layer edges to produce an ordered
11
+ :class:`DataFlow`. Implemented by DF4.
12
+
13
+ The dataclasses are stable contract — never modify the shapes here without
14
+ coordination.
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import re
19
+ from dataclasses import dataclass, field
20
+ from typing import Any
21
+
22
+ import networkx as nx
23
+
24
+ from codegraph.graph.schema import EdgeKind, NodeKind
25
+
26
+
27
+ @dataclass
28
+ class FlowHop:
29
+ """One step in a traced data-flow.
30
+
31
+ ``layer`` distinguishes ``frontend`` / ``backend`` / ``db`` so consumers
32
+ (CLI, MCP, dashboard) can render lanes. ``confidence`` is the per-hop match
33
+ quality — 1.0 for direct call-graph edges, lower for fuzzy URL matches.
34
+ """
35
+
36
+ layer: str # "frontend" | "backend" | "db"
37
+ qualname: str
38
+ file: str = ""
39
+ line: int = 0
40
+ method: str | None = None # HTTP verb, when applicable
41
+ path: str | None = None # URL path, when applicable
42
+ args: list[str] = field(default_factory=list)
43
+ kwargs: dict[str, str] = field(default_factory=dict)
44
+ role: str | None = None # HANDLER / SERVICE / COMPONENT / REPO if known
45
+ confidence: float = 1.0
46
+
47
+ # pragma: codegraph-public-api
48
+ def to_dict(self) -> dict[str, Any]:
49
+ out: dict[str, Any] = {
50
+ "layer": self.layer,
51
+ "qualname": self.qualname,
52
+ "file": self.file,
53
+ "line": self.line,
54
+ "args": list(self.args),
55
+ "kwargs": dict(self.kwargs),
56
+ "confidence": self.confidence,
57
+ }
58
+ if self.method is not None:
59
+ out["method"] = self.method
60
+ if self.path is not None:
61
+ out["path"] = self.path
62
+ if self.role is not None:
63
+ out["role"] = self.role
64
+ return out
65
+
66
+
67
+ @dataclass
68
+ class DataFlow:
69
+ """Ordered sequence of :class:`FlowHop` objects describing one trace.
70
+
71
+ ``confidence`` is the minimum across hops — the chain is only as strong as
72
+ its weakest match.
73
+ """
74
+
75
+ entry: str
76
+ hops: list[FlowHop] = field(default_factory=list)
77
+ confidence: float = 1.0
78
+
79
+ # pragma: codegraph-public-api
80
+ def to_dict(self) -> dict[str, Any]:
81
+ return {
82
+ "entry": self.entry,
83
+ "hops": [h.to_dict() for h in self.hops],
84
+ "confidence": self.confidence,
85
+ }
86
+
87
+
88
+ _PLACEHOLDER_RE = re.compile(r"^(\{[^}]*\}|\$\{[^}]*\}|:[A-Za-z_][A-Za-z0-9_]*|-?\d+)$")
89
+
90
+
91
+ def _strip_query_fragment(path: str) -> str:
92
+ """Drop ``?query`` and ``#fragment``; collapse trailing slash."""
93
+ for sep in ("?", "#"):
94
+ if sep in path:
95
+ path = path.split(sep, 1)[0]
96
+ if len(path) > 1 and path.endswith("/"):
97
+ path = path.rstrip("/")
98
+ return path
99
+
100
+
101
+ def _segments(path: str) -> list[str]:
102
+ """Split ``/api/users/{id}`` into ``['api', 'users', '{id}']``."""
103
+ return [s for s in _strip_query_fragment(path).split("/") if s]
104
+
105
+
106
+ def _is_placeholder(seg: str) -> bool:
107
+ """A segment is a placeholder if it's purely numeric, or wrapped in
108
+ ``{...}`` / ``${...}`` / leading ``:`` (Express style)."""
109
+ return bool(_PLACEHOLDER_RE.match(seg))
110
+
111
+
112
+ def _normalise_path(path: str) -> list[str]:
113
+ """Return the list of normalised segments where every placeholder
114
+ becomes the marker ``{*}`` so two paths with different placeholder
115
+ syntaxes compare equal segment-by-segment."""
116
+ return ["{*}" if _is_placeholder(s) else s for s in _segments(path)]
117
+
118
+
119
+ def _path_specificity(segs: list[str]) -> int:
120
+ """How "concrete" a path is — more literal segments means more specific.
121
+ Used to break ties when two routes match the same fetch."""
122
+ return sum(1 for s in segs if s != "{*}")
123
+
124
+
125
+ def _route_candidates(graph: nx.MultiDiGraph) -> list[tuple[str, str, str]]:
126
+ """Yield ``(handler_qualname, method, path)`` for every ROUTE edge.
127
+
128
+ ROUTE edges go from a backend handler FUNCTION/METHOD to a synthetic
129
+ target node with id ``route::<METHOD>::<path>``. The handler qualname
130
+ is the source node's qualname.
131
+ """
132
+ out: list[tuple[str, str, str]] = []
133
+ for src, _dst, key, edata in graph.edges(keys=True, data=True):
134
+ if key != EdgeKind.ROUTE.value:
135
+ continue
136
+ meta = edata.get("metadata") or {}
137
+ if not isinstance(meta, dict):
138
+ continue
139
+ method = str(meta.get("method") or "").upper()
140
+ path = str(meta.get("path") or "")
141
+ if not method or not path:
142
+ continue
143
+ attrs = graph.nodes.get(src) or {}
144
+ qn = str(attrs.get("qualname") or src)
145
+ out.append((qn, method, path))
146
+ return out
147
+
148
+
149
+ def _handler_param_names(graph: nx.MultiDiGraph, handler_qn: str) -> list[str]:
150
+ """Extract parameter names for the handler function, for body-key
151
+ overlap scoring. Looks up the node by qualname and reads
152
+ ``metadata.params`` (populated by DF0)."""
153
+ for _nid, attrs in graph.nodes(data=True):
154
+ if str(attrs.get("qualname") or "") != handler_qn:
155
+ continue
156
+ kind = str(attrs.get("kind") or "")
157
+ if kind not in (NodeKind.FUNCTION.value, NodeKind.METHOD.value):
158
+ continue
159
+ meta = attrs.get("metadata") or {}
160
+ params = meta.get("params") or [] if isinstance(meta, dict) else []
161
+ names: list[str] = []
162
+ for p in params:
163
+ if isinstance(p, dict):
164
+ name = str(p.get("name") or "").lstrip("*")
165
+ if name and name not in ("self", "cls"):
166
+ names.append(name)
167
+ return names
168
+ return []
169
+
170
+
171
+ def match_route(
172
+ graph: nx.MultiDiGraph,
173
+ fetch_url: str,
174
+ fetch_method: str = "GET",
175
+ *,
176
+ body_keys: list[str] | None = None,
177
+ ) -> tuple[str, float] | None:
178
+ """Return ``(handler_qualname, confidence)`` for the backend ROUTE that
179
+ matches this frontend fetch, or ``None`` if no route matches.
180
+
181
+ Confidence rubric:
182
+ * **1.0** — exact literal-segment match, no placeholders involved
183
+ * **0.9** — placeholders in either side normalise to the same shape
184
+ * up to **+0.05** bonus if the fetch's ``body_keys`` overlap with the
185
+ handler's parameter names (clamped at 0.95 / 1.0 ceilings)
186
+ * **0.5** — only a path *prefix* matches (last-resort fuzzy)
187
+ * **None** — method mismatch or no overlap
188
+
189
+ Trailing slashes, query strings, and fragments are stripped before
190
+ matching. Method comparison is case-insensitive.
191
+
192
+ When multiple routes match at the same top confidence, the more
193
+ specific one (more literal segments) wins.
194
+ """
195
+ method = (fetch_method or "GET").upper()
196
+ fetch_segs = _normalise_path(fetch_url)
197
+ raw_fetch_segs = _segments(fetch_url)
198
+ fetch_is_literal = all(not _is_placeholder(s) for s in raw_fetch_segs)
199
+
200
+ best: tuple[str, float, int] | None = None # (qn, score, specificity)
201
+
202
+ for handler_qn, route_method, route_path in _route_candidates(graph):
203
+ if route_method != method:
204
+ continue
205
+ route_segs = _normalise_path(route_path)
206
+ raw_route_segs = _segments(route_path)
207
+ route_is_literal = all(not _is_placeholder(s) for s in raw_route_segs)
208
+
209
+ if fetch_segs == route_segs:
210
+ base = 1.0 if (fetch_is_literal and route_is_literal) else 0.9
211
+ specificity = _path_specificity(route_segs)
212
+ elif (
213
+ len(fetch_segs) >= len(route_segs)
214
+ and fetch_segs[: len(route_segs)] == route_segs
215
+ and len(route_segs) > 0
216
+ ):
217
+ base = 0.5
218
+ specificity = _path_specificity(route_segs)
219
+ else:
220
+ continue
221
+
222
+ # Body-key bonus: any overlap with handler params nudges score up.
223
+ if body_keys:
224
+ param_names = _handler_param_names(graph, handler_qn)
225
+ overlap = set(body_keys) & set(param_names)
226
+ if overlap:
227
+ cap = 1.0 if base >= 1.0 else (0.95 if base >= 0.9 else 0.7)
228
+ base = min(cap, base + 0.05)
229
+
230
+ if best is None or base > best[1] or (
231
+ base == best[1] and specificity > best[2]
232
+ ):
233
+ best = (handler_qn, base, specificity)
234
+
235
+ if best is None:
236
+ return None
237
+ return (best[0], best[1])
238
+
239
+
240
+ _FRONTEND_EXTS = (".tsx", ".jsx")
241
+ _FETCH_ENTRY_RE = re.compile(
242
+ r"^\s*(?:url:\s*)?(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS|TRACE|WEBSOCKET)\s+(\S+)\s*$",
243
+ re.IGNORECASE,
244
+ )
245
+
246
+
247
+ def _resolve_entry_node(
248
+ graph: nx.MultiDiGraph, entry: str
249
+ ) -> str | None:
250
+ """Find a node id matching the given qualname (exact, case-insensitive)."""
251
+ target = entry.strip()
252
+ for nid, attrs in graph.nodes(data=True):
253
+ qn = str(attrs.get("qualname") or "")
254
+ if qn == target:
255
+ return str(nid)
256
+ # Case-insensitive fallback
257
+ lower = target.lower()
258
+ for nid, attrs in graph.nodes(data=True):
259
+ qn = str(attrs.get("qualname") or "")
260
+ if qn.lower() == lower:
261
+ return str(nid)
262
+ return None
263
+
264
+
265
+ def _layer_for(attrs: dict[str, Any]) -> str:
266
+ """Pick a layer label from a node's attrs."""
267
+ metadata = attrs.get("metadata") or {}
268
+ role = ""
269
+ if isinstance(metadata, dict):
270
+ role_val = metadata.get("role")
271
+ role = str(role_val) if role_val else ""
272
+ if role == "REPO":
273
+ return "db"
274
+ if role == "COMPONENT":
275
+ return "frontend"
276
+ file_path = str(attrs.get("file") or "").lower()
277
+ if any(file_path.endswith(ext) for ext in _FRONTEND_EXTS):
278
+ return "frontend"
279
+ return "backend"
280
+
281
+
282
+ def _hop_from_node(
283
+ graph: nx.MultiDiGraph,
284
+ node_id: str,
285
+ *,
286
+ args: list[str] | None = None,
287
+ kwargs: dict[str, str] | None = None,
288
+ confidence: float = 1.0,
289
+ ) -> FlowHop:
290
+ attrs = graph.nodes.get(node_id) or {}
291
+ metadata = attrs.get("metadata") or {}
292
+ role = None
293
+ if isinstance(metadata, dict):
294
+ role_val = metadata.get("role")
295
+ if role_val:
296
+ role = str(role_val)
297
+ return FlowHop(
298
+ layer=_layer_for(attrs),
299
+ qualname=str(attrs.get("qualname") or node_id),
300
+ file=str(attrs.get("file") or ""),
301
+ line=int(attrs.get("line_start") or 0),
302
+ args=list(args or []),
303
+ kwargs=dict(kwargs or {}),
304
+ role=role,
305
+ confidence=confidence,
306
+ )
307
+
308
+
309
+ def _outgoing_calls(
310
+ graph: nx.MultiDiGraph, node_id: str
311
+ ) -> list[tuple[str, dict[str, Any]]]:
312
+ """Return [(target_id, edge_metadata)] for outgoing CALLS edges.
313
+
314
+ Resolved targets (real nodes) are returned before unresolved sentinel
315
+ targets (``unresolved::*``). Trace traversal picks the first non-visited
316
+ callee, so this ordering matters: we want real next-hops to win over
317
+ decorator stubs and unbound name references.
318
+ """
319
+ out: list[tuple[str, dict[str, Any]]] = []
320
+ for _src, dst, key, edata in graph.out_edges(node_id, keys=True, data=True):
321
+ if key == EdgeKind.CALLS.value:
322
+ meta = edata.get("metadata") or {}
323
+ out.append((str(dst), meta if isinstance(meta, dict) else {}))
324
+
325
+ def _is_unresolved(target_id: str) -> bool:
326
+ attrs = graph.nodes.get(target_id) or {}
327
+ qn = str(attrs.get("qualname") or target_id)
328
+ return qn.startswith("unresolved::") or target_id.startswith("unresolved::")
329
+
330
+ out.sort(key=lambda t: (1 if _is_unresolved(t[0]) else 0))
331
+ return out
332
+
333
+
334
+ def _outgoing_data_edges(
335
+ graph: nx.MultiDiGraph, node_id: str
336
+ ) -> list[tuple[str, str]]:
337
+ """Return [(target_id, edge_kind)] for READS_FROM / WRITES_TO edges."""
338
+ out: list[tuple[str, str]] = []
339
+ for _src, dst, key in graph.out_edges(node_id, keys=True):
340
+ if key in (EdgeKind.READS_FROM.value, EdgeKind.WRITES_TO.value):
341
+ out.append((str(dst), str(key)))
342
+ return out
343
+
344
+
345
+ def _outgoing_fetches(
346
+ graph: nx.MultiDiGraph, node_id: str
347
+ ) -> list[dict[str, Any]]:
348
+ """Return list of FETCH_CALL edge metadata dicts originating from this node."""
349
+ out: list[dict[str, Any]] = []
350
+ for _src, _dst, key, edata in graph.out_edges(node_id, keys=True, data=True):
351
+ if key == EdgeKind.FETCH_CALL.value:
352
+ meta = edata.get("metadata") or {}
353
+ if isinstance(meta, dict):
354
+ out.append(meta)
355
+ return out
356
+
357
+
358
+ def _edge_args(meta: dict[str, Any]) -> tuple[list[str], dict[str, str]]:
359
+ args_raw = meta.get("args") or []
360
+ kwargs_raw = meta.get("kwargs") or {}
361
+ args = [str(a) for a in args_raw] if isinstance(args_raw, list) else []
362
+ kwargs: dict[str, str] = {}
363
+ if isinstance(kwargs_raw, dict):
364
+ kwargs = {str(k): str(v) for k, v in kwargs_raw.items()}
365
+ return args, kwargs
366
+
367
+
368
+ def trace(
369
+ graph: nx.MultiDiGraph,
370
+ entry: str,
371
+ *,
372
+ max_depth: int = 6,
373
+ ) -> DataFlow | None:
374
+ """Trace a data-flow starting from ``entry``.
375
+
376
+ ``entry`` may be:
377
+
378
+ * a fully-qualified symbol name — walk forwards over CALLS edges
379
+ * ``"METHOD /path"`` (or ``"url:METHOD /path"``) — find a backend handler
380
+ via :func:`match_route` and walk from there
381
+
382
+ Returns ``None`` when the entry cannot be located.
383
+
384
+ Hop construction:
385
+ * each visited node becomes a :class:`FlowHop`
386
+ * args/kwargs come from the *incoming* CALLS edge that brought us here
387
+ * READS_FROM / WRITES_TO edges become trailing ``layer=db`` hops
388
+ * outgoing FETCH_CALL edges trigger a cross-layer match via
389
+ :func:`match_route`; if no match, the partial chain is returned with
390
+ confidence dropped accordingly
391
+ * cycle detection: already-visited nodes are skipped silently
392
+ * stop after ``max_depth`` outgoing hops
393
+ """
394
+ # ---- Resolve the starting node --------------------------------------
395
+ fetch_match = _FETCH_ENTRY_RE.match(entry)
396
+ start_node: str | None = None
397
+ initial_confidence = 1.0
398
+ initial_args: list[str] = []
399
+ initial_kwargs: dict[str, str] = {}
400
+ initial_method: str | None = None
401
+ initial_path: str | None = None
402
+
403
+ if fetch_match:
404
+ method = fetch_match.group(1).upper()
405
+ path = fetch_match.group(2)
406
+ result = match_route(graph, path, method)
407
+ if result is None:
408
+ return DataFlow(entry=entry, hops=[], confidence=0.0)
409
+ handler_qn, conf = result
410
+ start_node = _resolve_entry_node(graph, handler_qn)
411
+ initial_confidence = conf
412
+ initial_method = method
413
+ initial_path = path
414
+ else:
415
+ start_node = _resolve_entry_node(graph, entry)
416
+
417
+ if start_node is None:
418
+ return None
419
+
420
+ # ---- Walk the graph -------------------------------------------------
421
+ hops: list[FlowHop] = []
422
+ visited: set[str] = set()
423
+
424
+ first_hop = _hop_from_node(
425
+ graph,
426
+ start_node,
427
+ args=initial_args,
428
+ kwargs=initial_kwargs,
429
+ confidence=initial_confidence,
430
+ )
431
+ if initial_method is not None:
432
+ first_hop.method = initial_method
433
+ if initial_path is not None:
434
+ first_hop.path = initial_path
435
+ hops.append(first_hop)
436
+ visited.add(start_node)
437
+
438
+ current = start_node
439
+ confidences: list[float] = [initial_confidence]
440
+ depth = 0
441
+
442
+ while depth < max_depth:
443
+ # 1. Cross-layer fetch transition (if any)
444
+ fetches = _outgoing_fetches(graph, current)
445
+ consumed_via_fetch = False
446
+ for fmeta in fetches:
447
+ url = str(fmeta.get("url") or "")
448
+ method = str(fmeta.get("method") or "GET")
449
+ body_keys_raw = fmeta.get("body_keys") or []
450
+ body_keys = (
451
+ [str(k) for k in body_keys_raw]
452
+ if isinstance(body_keys_raw, list)
453
+ else None
454
+ )
455
+ result = match_route(graph, url, method, body_keys=body_keys)
456
+ if result is None:
457
+ continue
458
+ handler_qn, conf = result
459
+ handler_node = _resolve_entry_node(graph, handler_qn)
460
+ if handler_node is None or handler_node in visited:
461
+ continue
462
+ hop = _hop_from_node(
463
+ graph,
464
+ handler_node,
465
+ args=[],
466
+ kwargs={},
467
+ confidence=conf,
468
+ )
469
+ hop.method = method
470
+ hop.path = url
471
+ hops.append(hop)
472
+ visited.add(handler_node)
473
+ confidences.append(conf)
474
+ current = handler_node
475
+ consumed_via_fetch = True
476
+ break # follow one fetch per hop
477
+ if consumed_via_fetch:
478
+ depth += 1
479
+ continue
480
+
481
+ # 2. Standard CALLS traversal — pick the first outgoing edge
482
+ callees = _outgoing_calls(graph, current)
483
+ next_step: tuple[str, list[str], dict[str, str]] | None = None
484
+ for dst, meta in callees:
485
+ if dst in visited:
486
+ continue
487
+ args, kwargs = _edge_args(meta)
488
+ next_step = (dst, args, kwargs)
489
+ break
490
+
491
+ if next_step is not None:
492
+ dst, args, kwargs = next_step
493
+ hop = _hop_from_node(
494
+ graph,
495
+ dst,
496
+ args=args,
497
+ kwargs=kwargs,
498
+ confidence=1.0,
499
+ )
500
+ hops.append(hop)
501
+ visited.add(dst)
502
+ confidences.append(1.0)
503
+ current = dst
504
+ depth += 1
505
+ continue
506
+
507
+ # 3. Terminal data edges (READS_FROM / WRITES_TO) — emit and stop
508
+ for dst, kind in _outgoing_data_edges(graph, current):
509
+ if dst in visited:
510
+ continue
511
+ db_attrs = graph.nodes.get(dst) or {}
512
+ db_qn = str(db_attrs.get("qualname") or dst)
513
+ db_hop = FlowHop(
514
+ layer="db",
515
+ qualname=db_qn,
516
+ file=str(db_attrs.get("file") or ""),
517
+ line=int(db_attrs.get("line_start") or 0),
518
+ role="REPO",
519
+ confidence=1.0,
520
+ )
521
+ db_hop.kwargs = {"op": kind}
522
+ hops.append(db_hop)
523
+ visited.add(dst)
524
+ confidences.append(1.0)
525
+ break
526
+
527
+ final_conf = min(confidences) if confidences else 0.0
528
+ return DataFlow(entry=entry, hops=hops, confidence=final_conf)
529
+
530
+
531
+ def shape_hops_for_handler(
532
+ graph: nx.MultiDiGraph,
533
+ handler_qn: str,
534
+ *,
535
+ method: str = "",
536
+ path: str = "",
537
+ max_depth: int = 6,
538
+ ) -> dict[str, Any]:
539
+ """Return the per-handler ``dataflow`` payload for the HLD view.
540
+
541
+ Output shape (matches the v0.3 unified-trace contract)::
542
+
543
+ {
544
+ "hops": [
545
+ {"kind": "FETCH_CALL"|"ROUTE"|"CALL"|"READS_FROM"|"WRITES_TO",
546
+ "qualname": str, "file": str, "line": int,
547
+ "args": [str, ...], "role": str | None,
548
+ "body_keys": [str, ...] # FETCH_CALL only
549
+ },
550
+ ...
551
+ ],
552
+ "confidence": float,
553
+ }
554
+
555
+ Each hop drops the per-hop ``confidence`` field; only the top-level
556
+ chain confidence is reported.
557
+
558
+ The shaping logic:
559
+
560
+ * walk forward from ``handler_qn`` using :func:`trace`
561
+ * tag the entry hop ``ROUTE`` and stamp method/path
562
+ * tag intermediate FlowHops ``CALL``
563
+ * tag terminal db-layer hops ``READS_FROM`` / ``WRITES_TO`` based on
564
+ the ``op`` recorded by :func:`trace`
565
+ * prepend any frontend ``FETCH_CALL`` callers whose ``match_route``
566
+ resolves to this handler — that gives the chain a real frontend
567
+ entry point in repos that have one
568
+ """
569
+ if not handler_qn:
570
+ return {"hops": [], "confidence": 0.0}
571
+
572
+ flow = trace(graph, handler_qn, max_depth=max_depth)
573
+ if flow is None or not flow.hops:
574
+ return {"hops": [], "confidence": 0.0}
575
+
576
+ hops_out: list[dict[str, Any]] = []
577
+
578
+ # Prepend any frontend FETCH_CALL caller(s) that resolve to this handler.
579
+ fetch_hops = _frontend_fetch_hops_for_handler(graph, handler_qn)
580
+ hops_out.extend(fetch_hops)
581
+
582
+ real_idx = 0
583
+ for hop in flow.hops:
584
+ # Skip unresolved sentinel nodes (e.g. decorator call sites the
585
+ # resolver never bound). They aren't useful in the trace UI.
586
+ if hop.qualname.startswith("unresolved::"):
587
+ continue
588
+ kind = _classify_hop_kind(hop, real_idx)
589
+ real_idx += 1
590
+ # For terminal db hops, FlowHop hard-codes role="REPO" — re-read
591
+ # the actual node metadata so unannotated CLASS nodes (e.g. plain
592
+ # SQLAlchemy models) surface as null per the v0.3 contract.
593
+ role: str | None = hop.role
594
+ if kind in ("READS_FROM", "WRITES_TO"):
595
+ role = _node_role(graph, hop.qualname)
596
+ entry: dict[str, Any] = {
597
+ "kind": kind,
598
+ "qualname": hop.qualname,
599
+ "file": hop.file,
600
+ "line": hop.line,
601
+ "args": list(hop.args),
602
+ "role": role,
603
+ }
604
+ hops_out.append(entry)
605
+
606
+ # Stamp method/path onto the ROUTE hop — find it (might not be hop 0
607
+ # if FETCH_CALL was prepended). Also backfill the ROUTE hop's args from
608
+ # the handler's own DF0 params when args is empty: trace() can't supply
609
+ # args at the entry hop (no incoming CALLS edge), but the handler's
610
+ # signature IS the contract for path / query / body params, so it's the
611
+ # right starting-key source for arg_flow propagation.
612
+ handler_params = _handler_param_names_for_arg_flow(graph, handler_qn)
613
+ for h in hops_out:
614
+ if h.get("kind") != "ROUTE":
615
+ continue
616
+ if method:
617
+ h["method"] = method
618
+ if path:
619
+ h["path"] = path
620
+ if not h.get("args") and handler_params:
621
+ h["args"] = list(handler_params)
622
+ break
623
+
624
+ # Per-hop arg_flow: every hop carries the same starting-key set, mapping
625
+ # each starting key to its locally-renamed name at this hop (or None).
626
+ starting_keys = _starting_keys_from_hops(hops_out)
627
+ for h in hops_out:
628
+ hop_args_raw = h.get("args") or []
629
+ hop_args = (
630
+ [str(a) for a in hop_args_raw] if isinstance(hop_args_raw, list) else []
631
+ )
632
+ h["arg_flow"] = _compute_arg_flow(starting_keys, hop_args)
633
+
634
+ return {"hops": hops_out, "confidence": float(flow.confidence)}
635
+
636
+
637
+ def _node_role(graph: nx.MultiDiGraph, qualname: str) -> str | None:
638
+ """Read the ``metadata.role`` field off the node with this qualname."""
639
+ if not qualname:
640
+ return None
641
+ for _nid, attrs in graph.nodes(data=True):
642
+ if str(attrs.get("qualname") or "") != qualname:
643
+ continue
644
+ meta = attrs.get("metadata") or {}
645
+ if isinstance(meta, dict):
646
+ role = meta.get("role")
647
+ if role:
648
+ return str(role)
649
+ return None
650
+ return None
651
+
652
+
653
+ def _handler_param_names_for_arg_flow(
654
+ graph: nx.MultiDiGraph, qualname: str
655
+ ) -> list[str]:
656
+ """Read handler param names from ``metadata.params`` for arg-flow seeding.
657
+
658
+ DF0 captures the function's signature on the node itself. The
659
+ :func:`trace` walker can't see this for the entry hop (no incoming
660
+ CALLS edge to read args off), so we read it from the node directly
661
+ when shaping the ROUTE hop. Skips ``self`` / ``cls``.
662
+ """
663
+ if not qualname:
664
+ return []
665
+ for _nid, attrs in graph.nodes(data=True):
666
+ if str(attrs.get("qualname") or "") != qualname:
667
+ continue
668
+ meta = attrs.get("metadata") or {}
669
+ if not isinstance(meta, dict):
670
+ return []
671
+ params = meta.get("params") or []
672
+ if not isinstance(params, list):
673
+ return []
674
+ out: list[str] = []
675
+ for p in params:
676
+ if not isinstance(p, dict):
677
+ continue
678
+ name = str(p.get("name") or "")
679
+ if not name or name in ("self", "cls"):
680
+ continue
681
+ out.append(name)
682
+ return out
683
+ return []
684
+
685
+
686
+ _CAMEL_BOUNDARY_RE = re.compile(r"(?<=[a-z0-9])(?=[A-Z])")
687
+
688
+
689
+ def _normalise_arg_name(name: str) -> str:
690
+ """Normalise an argument name for cross-hop matching.
691
+
692
+ Rules (in order):
693
+
694
+ 1. strip surrounding whitespace and quotes
695
+ 2. lowercase
696
+ 3. strip leading and trailing underscores
697
+ 4. split on underscores and on camelCase boundaries
698
+ 5. concatenate the resulting tokens
699
+
700
+ Examples::
701
+
702
+ userId -> "userid"
703
+ user_id -> "userid"
704
+ _user_id -> "userid"
705
+ UserID -> "userid"
706
+ userid -> "userid"
707
+ """
708
+ if not name:
709
+ return ""
710
+ cleaned = name.strip().strip("'\"`")
711
+ if not cleaned:
712
+ return ""
713
+ cleaned = cleaned.strip("_")
714
+ if not cleaned:
715
+ return ""
716
+ # Split camelCase boundaries first, then split on underscores.
717
+ camel_split = _CAMEL_BOUNDARY_RE.sub("_", cleaned)
718
+ tokens = [t for t in camel_split.split("_") if t]
719
+ return "".join(tokens).lower()
720
+
721
+
722
+ def _compute_arg_flow(
723
+ starting_keys: list[str], hop_args: list[str]
724
+ ) -> dict[str, str | None]:
725
+ """Map each starting key to the first hop arg with a matching normalised name.
726
+
727
+ Returns a dict keyed by every starting key (preserving the original key
728
+ spelling). Value is the local arg name at this hop, or ``None`` if no
729
+ match. The set of keys is identical for every hop in a chain so consumers
730
+ can render a stable column count.
731
+ """
732
+ out: dict[str, str | None] = {}
733
+ if not starting_keys:
734
+ return out
735
+ normalised_args: list[tuple[str, str]] = [
736
+ (a, _normalise_arg_name(a)) for a in hop_args
737
+ ]
738
+ for key in starting_keys:
739
+ nkey = _normalise_arg_name(key)
740
+ match: str | None = None
741
+ if nkey:
742
+ for original_arg, narg in normalised_args:
743
+ if narg and narg == nkey:
744
+ match = original_arg
745
+ break
746
+ out[key] = match
747
+ return out
748
+
749
+
750
+ def _starting_keys_from_hops(hops: list[dict[str, Any]]) -> list[str]:
751
+ """Determine starting keys for arg-flow propagation.
752
+
753
+ Per the v0.3 stretch contract:
754
+
755
+ * If there is a FETCH_CALL hop, starting keys = its ``body_keys`` plus its
756
+ positional ``args`` (after stripping quotes), de-duplicated, preserving
757
+ first-seen order.
758
+ * Otherwise, starting keys = the first hop's (ROUTE) ``args``.
759
+ * If neither yields anything, return ``[]`` and ``arg_flow`` becomes
760
+ ``{}`` for every hop.
761
+ """
762
+ if not hops:
763
+ return []
764
+ fetch_hop: dict[str, Any] | None = None
765
+ for h in hops:
766
+ if h.get("kind") == "FETCH_CALL":
767
+ fetch_hop = h
768
+ break
769
+
770
+ raw: list[str] = []
771
+ if fetch_hop is not None:
772
+ body_keys = fetch_hop.get("body_keys") or []
773
+ if isinstance(body_keys, list):
774
+ raw.extend(str(k) for k in body_keys)
775
+ fetch_args = fetch_hop.get("args") or []
776
+ if isinstance(fetch_args, list):
777
+ raw.extend(str(a) for a in fetch_args)
778
+ else:
779
+ first_args = hops[0].get("args") or []
780
+ if isinstance(first_args, list):
781
+ raw.extend(str(a) for a in first_args)
782
+
783
+ seen: set[str] = set()
784
+ out: list[str] = []
785
+ for k in raw:
786
+ cleaned = k.strip().strip("'\"`")
787
+ if not cleaned or cleaned in seen:
788
+ continue
789
+ seen.add(cleaned)
790
+ out.append(cleaned)
791
+ return out
792
+
793
+
794
+ def _classify_hop_kind(hop: FlowHop, index: int) -> str:
795
+ """Map a :class:`FlowHop` to one of the contract ``kind`` strings."""
796
+ if hop.layer == "db":
797
+ op = hop.kwargs.get("op") if hop.kwargs else ""
798
+ if op in (EdgeKind.WRITES_TO.value, "WRITES_TO"):
799
+ return "WRITES_TO"
800
+ return "READS_FROM"
801
+ if index == 0:
802
+ return "ROUTE"
803
+ return "CALL"
804
+
805
+
806
+ def _frontend_fetch_hops_for_handler(
807
+ graph: nx.MultiDiGraph, handler_qn: str
808
+ ) -> list[dict[str, Any]]:
809
+ """Return zero or more FETCH_CALL hop dicts whose route resolves to handler_qn."""
810
+ out: list[dict[str, Any]] = []
811
+ seen: set[str] = set()
812
+ for src, _dst, key, edata in graph.edges(keys=True, data=True):
813
+ if key != EdgeKind.FETCH_CALL.value:
814
+ continue
815
+ meta = edata.get("metadata") or {}
816
+ if not isinstance(meta, dict):
817
+ continue
818
+ url = str(meta.get("url") or "")
819
+ method = str(meta.get("method") or "GET")
820
+ body_keys_raw = meta.get("body_keys") or []
821
+ body_keys = (
822
+ [str(k) for k in body_keys_raw]
823
+ if isinstance(body_keys_raw, list)
824
+ else []
825
+ )
826
+ result = match_route(graph, url, method, body_keys=body_keys or None)
827
+ if result is None or result[0] != handler_qn:
828
+ continue
829
+ src_attrs = graph.nodes.get(src) or {}
830
+ caller_qn = str(src_attrs.get("qualname") or "")
831
+ if not caller_qn or caller_qn in seen:
832
+ continue
833
+ seen.add(caller_qn)
834
+ role_val = None
835
+ node_md = src_attrs.get("metadata") or {}
836
+ if isinstance(node_md, dict):
837
+ r = node_md.get("role")
838
+ if r:
839
+ role_val = str(r)
840
+ out.append({
841
+ "kind": "FETCH_CALL",
842
+ "qualname": caller_qn,
843
+ "file": str(src_attrs.get("file") or ""),
844
+ "line": int(src_attrs.get("line_start") or 0),
845
+ "args": [],
846
+ "role": role_val,
847
+ "body_keys": body_keys,
848
+ "method": method,
849
+ "url": url,
850
+ })
851
+ out.sort(key=lambda e: (str(e["qualname"]), str(e.get("url") or "")))
852
+ return out
853
+
854
+
855
+ __all__ = [
856
+ "DataFlow",
857
+ "FlowHop",
858
+ "match_route",
859
+ "shape_hops_for_handler",
860
+ "trace",
861
+ ]