polycodegraph 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. codegraph/__init__.py +10 -0
  2. codegraph/analysis/__init__.py +30 -0
  3. codegraph/analysis/_common.py +125 -0
  4. codegraph/analysis/blast_radius.py +63 -0
  5. codegraph/analysis/cycles.py +79 -0
  6. codegraph/analysis/dataflow.py +861 -0
  7. codegraph/analysis/dead_code.py +165 -0
  8. codegraph/analysis/hotspots.py +68 -0
  9. codegraph/analysis/infrastructure.py +439 -0
  10. codegraph/analysis/metrics.py +52 -0
  11. codegraph/analysis/report.py +222 -0
  12. codegraph/analysis/roles.py +323 -0
  13. codegraph/analysis/untested.py +79 -0
  14. codegraph/cli.py +1506 -0
  15. codegraph/config.py +64 -0
  16. codegraph/embed/__init__.py +35 -0
  17. codegraph/embed/chunker.py +120 -0
  18. codegraph/embed/embedder.py +113 -0
  19. codegraph/embed/query.py +181 -0
  20. codegraph/embed/store.py +360 -0
  21. codegraph/graph/__init__.py +0 -0
  22. codegraph/graph/builder.py +212 -0
  23. codegraph/graph/schema.py +69 -0
  24. codegraph/graph/store_networkx.py +55 -0
  25. codegraph/graph/store_sqlite.py +249 -0
  26. codegraph/mcp_server/__init__.py +6 -0
  27. codegraph/mcp_server/server.py +933 -0
  28. codegraph/parsers/__init__.py +0 -0
  29. codegraph/parsers/base.py +70 -0
  30. codegraph/parsers/go.py +570 -0
  31. codegraph/parsers/python.py +1707 -0
  32. codegraph/parsers/typescript.py +1397 -0
  33. codegraph/py.typed +0 -0
  34. codegraph/resolve/__init__.py +4 -0
  35. codegraph/resolve/calls.py +480 -0
  36. codegraph/review/__init__.py +31 -0
  37. codegraph/review/baseline.py +32 -0
  38. codegraph/review/differ.py +211 -0
  39. codegraph/review/hook.py +70 -0
  40. codegraph/review/risk.py +219 -0
  41. codegraph/review/rules.py +342 -0
  42. codegraph/viz/__init__.py +17 -0
  43. codegraph/viz/_style.py +45 -0
  44. codegraph/viz/dashboard.py +740 -0
  45. codegraph/viz/diagrams.py +370 -0
  46. codegraph/viz/explore.py +453 -0
  47. codegraph/viz/hld.py +683 -0
  48. codegraph/viz/html.py +115 -0
  49. codegraph/viz/mermaid.py +111 -0
  50. codegraph/viz/svg.py +77 -0
  51. codegraph/web/__init__.py +4 -0
  52. codegraph/web/server.py +165 -0
  53. codegraph/web/static/app.css +664 -0
  54. codegraph/web/static/app.js +919 -0
  55. codegraph/web/static/index.html +112 -0
  56. codegraph/web/static/views/architecture.js +1671 -0
  57. codegraph/web/static/views/graph3d.css +564 -0
  58. codegraph/web/static/views/graph3d.js +999 -0
  59. codegraph/web/static/views/graph3d_transform.js +984 -0
  60. codegraph/workspace/__init__.py +34 -0
  61. codegraph/workspace/config.py +110 -0
  62. codegraph/workspace/operations.py +294 -0
  63. polycodegraph-0.1.0.dist-info/METADATA +687 -0
  64. polycodegraph-0.1.0.dist-info/RECORD +67 -0
  65. polycodegraph-0.1.0.dist-info/WHEEL +4 -0
  66. polycodegraph-0.1.0.dist-info/entry_points.txt +2 -0
  67. polycodegraph-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,165 @@
1
+ """Dead code detection: definitions with no incoming references."""
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import dataclass
5
+
6
+ import networkx as nx
7
+
8
+ from codegraph.analysis._common import (
9
+ REFERENCE_EDGE_KINDS,
10
+ _kind_str,
11
+ in_protocol_class,
12
+ in_test_module,
13
+ is_excluded_path,
14
+ is_protocol_class,
15
+ )
16
+ from codegraph.graph.schema import EdgeKind, NodeKind
17
+
18
+ _CANDIDATE_KINDS: frozenset[str] = frozenset(
19
+ {NodeKind.FUNCTION.value, NodeKind.METHOD.value, NodeKind.CLASS.value}
20
+ )
21
+ _ENTRYPOINT_NAMES: frozenset[str] = frozenset({"main", "__main__"})
22
+
23
+ _PROPERTY_DECORATORS: tuple[str, ...] = (
24
+ "@property", "@cached_property", "functools.cached_property",
25
+ )
26
+
27
+
28
+ def _has_property_decorator(metadata: dict[str, object]) -> bool:
29
+ decorators = metadata.get("decorators") or []
30
+ if not isinstance(decorators, list):
31
+ return False
32
+ for raw in decorators:
33
+ text = str(raw).strip()
34
+ for marker in _PROPERTY_DECORATORS:
35
+ if marker in text:
36
+ return True
37
+ return False
38
+
39
+
40
+ def _class_has_inherits(graph: nx.MultiDiGraph, class_id: str) -> bool:
41
+ return any(
42
+ key == EdgeKind.INHERITS.value
43
+ for _src, _dst, key in graph.out_edges(class_id, keys=True)
44
+ )
45
+
46
+
47
+ def _is_polymorphic_override(graph: nx.MultiDiGraph, method_id: str) -> bool:
48
+ """True if the method's owning class inherits from another class.
49
+
50
+ Such methods are likely overrides invoked via base-class dispatch and
51
+ have no static incoming CALL edge.
52
+ """
53
+ for _src, dst, key in graph.out_edges(method_id, keys=True):
54
+ if key != EdgeKind.DEFINED_IN.value:
55
+ continue
56
+ attrs = graph.nodes.get(dst) or {}
57
+ if (
58
+ _kind_str(attrs.get("kind")) == NodeKind.CLASS.value
59
+ and _class_has_inherits(graph, dst)
60
+ ):
61
+ return True
62
+ return False
63
+
64
+
65
+ @dataclass
66
+ class DeadNode:
67
+ id: str
68
+ name: str
69
+ qualname: str
70
+ kind: str
71
+ file: str
72
+ line_start: int
73
+ reason: str = "no incoming references"
74
+
75
+
76
+ def _is_dunder(name: str) -> bool:
77
+ return name.startswith("__") and name.endswith("__")
78
+
79
+
80
+ def _is_test_function(name: str) -> bool:
81
+ return name.startswith("test_") or name.startswith("test")
82
+
83
+
84
+ def find_dead_code(
85
+ graph: nx.MultiDiGraph,
86
+ *,
87
+ include_tests: bool = False,
88
+ ) -> list[DeadNode]:
89
+ """Return definitions with no incoming reference edges.
90
+
91
+ Excludes (by default):
92
+ * Nodes living in test modules
93
+ * dunder methods and ``main`` entrypoints
94
+ * Names starting with ``test_`` (treated as test functions)
95
+
96
+ A function/class is "dead" if no other node CALLS / INHERITS / IMPLEMENTS
97
+ / IMPORTS it. Methods of an inherited class are still flagged, but a
98
+ method with an INHERITS-edge incoming counts as referenced.
99
+ """
100
+ dead: list[DeadNode] = []
101
+ for nid, attrs in graph.nodes(data=True):
102
+ kind = _kind_str(attrs.get("kind"))
103
+ if kind not in _CANDIDATE_KINDS:
104
+ continue
105
+ name = str(attrs.get("name") or "")
106
+ if name in _ENTRYPOINT_NAMES:
107
+ continue
108
+ if _is_dunder(name):
109
+ continue
110
+ if _is_test_function(name):
111
+ continue
112
+ if not include_tests and in_test_module(graph, nid):
113
+ continue
114
+ # Decorator/entry-point-aware skip: framework hooks (Typer commands,
115
+ # FastAPI routes, pytest fixtures, abstract methods, Celery tasks,
116
+ # etc.) are invoked dynamically and have no static incoming edge.
117
+ # The Python parser tags them with metadata["entry_point"] = True.
118
+ metadata = attrs.get("metadata") or {}
119
+ if metadata.get("entry_point"):
120
+ continue
121
+ # @property / @cached_property are accessed as attributes, not calls.
122
+ if _has_property_decorator(metadata):
123
+ continue
124
+ # Pragma-marked public-API symbols are intentionally exposed for
125
+ # library consumers; the codebase itself may not call them, but
126
+ # they are not dead. See `# pragma: codegraph-public-api` /
127
+ # `# codegraph: public-api` (or `// ...` for TS) in source.
128
+ if metadata.get("public_api"):
129
+ continue
130
+ # Generated/static frontend assets and test fixtures don't have
131
+ # traceable call graphs — exclude them from dead-code detection.
132
+ if is_excluded_path(str(attrs.get("file") or "")):
133
+ continue
134
+ # Skip ``typing.Protocol`` classes and their methods. Protocols define
135
+ # structural types for static type checking; they have no runtime
136
+ # call-graph incoming edges by design.
137
+ if kind == NodeKind.CLASS.value and is_protocol_class(graph, nid):
138
+ continue
139
+ if kind == NodeKind.METHOD.value and in_protocol_class(graph, nid):
140
+ continue
141
+ # Polymorphic overrides on classes that inherit have no static
142
+ # incoming CALL edge (dispatch is via the base class).
143
+ if kind == NodeKind.METHOD.value and _is_polymorphic_override(graph, nid):
144
+ continue
145
+
146
+ has_incoming_ref = False
147
+ for _src, _dst, key in graph.in_edges(nid, keys=True):
148
+ if key in REFERENCE_EDGE_KINDS:
149
+ has_incoming_ref = True
150
+ break
151
+ if has_incoming_ref:
152
+ continue
153
+
154
+ dead.append(
155
+ DeadNode(
156
+ id=nid,
157
+ name=name,
158
+ qualname=str(attrs.get("qualname") or name),
159
+ kind=kind,
160
+ file=str(attrs.get("file") or ""),
161
+ line_start=int(attrs.get("line_start") or 0),
162
+ )
163
+ )
164
+ dead.sort(key=lambda d: (d.file, d.line_start, d.qualname))
165
+ return dead
@@ -0,0 +1,68 @@
1
+ """Hotspot detection: top-N nodes by fan-in / fan-out / size."""
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import dataclass
5
+
6
+ import networkx as nx
7
+
8
+ from codegraph.analysis._common import _kind_str
9
+ from codegraph.graph.schema import EdgeKind, NodeKind
10
+
11
+ _CALLABLE_KINDS: frozenset[str] = frozenset(
12
+ {NodeKind.FUNCTION.value, NodeKind.METHOD.value}
13
+ )
14
+
15
+
16
+ @dataclass
17
+ class Hotspot:
18
+ id: str
19
+ name: str
20
+ qualname: str
21
+ kind: str
22
+ file: str
23
+ fan_in: int
24
+ fan_out: int
25
+ loc: int
26
+
27
+ @property
28
+ def score(self) -> int:
29
+ return self.fan_in * 2 + self.fan_out + self.loc // 50
30
+
31
+
32
+ def find_hotspots(
33
+ graph: nx.MultiDiGraph,
34
+ *,
35
+ limit: int = 20,
36
+ kinds: frozenset[str] = _CALLABLE_KINDS,
37
+ ) -> list[Hotspot]:
38
+ """Return top-N callable hotspots ranked by combined fan-in / fan-out / LOC."""
39
+ rows: list[Hotspot] = []
40
+ for nid, attrs in graph.nodes(data=True):
41
+ kind = _kind_str(attrs.get("kind"))
42
+ if kind not in kinds:
43
+ continue
44
+ fan_in = 0
45
+ fan_out = 0
46
+ for _src, _dst, key in graph.in_edges(nid, keys=True):
47
+ if key == EdgeKind.CALLS.value:
48
+ fan_in += 1
49
+ for _src, _dst, key in graph.out_edges(nid, keys=True):
50
+ if key == EdgeKind.CALLS.value:
51
+ fan_out += 1
52
+ line_start = int(attrs.get("line_start") or 0)
53
+ line_end = int(attrs.get("line_end") or 0)
54
+ loc = max(0, line_end - line_start + 1) if line_end else 0
55
+ rows.append(
56
+ Hotspot(
57
+ id=nid,
58
+ name=str(attrs.get("name") or ""),
59
+ qualname=str(attrs.get("qualname") or ""),
60
+ kind=kind,
61
+ file=str(attrs.get("file") or ""),
62
+ fan_in=fan_in,
63
+ fan_out=fan_out,
64
+ loc=loc,
65
+ )
66
+ )
67
+ rows.sort(key=lambda h: (-h.score, -h.fan_in, h.qualname))
68
+ return rows[:limit]
@@ -0,0 +1,439 @@
1
+ """Infrastructure-component detection.
2
+
3
+ Scans IMPORTS edges in the graph to identify external services the project
4
+ talks to (Redis, BullMQ, Postgres, S3, Express, etc.) and aggregates them
5
+ into an architecture-level topology — one node per detected component plus
6
+ the source files / handlers that use it.
7
+
8
+ Pure, read-only pass: walks the in-memory graph, returns a payload dict.
9
+ No DB writes, no schema changes. Output is consumed by
10
+ ``build_dashboard_payload`` to populate the dashboard's Architecture view.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ from collections import defaultdict
15
+ from typing import Any, Final
16
+
17
+ import networkx as nx
18
+
19
+ from codegraph.analysis._common import _kind_str
20
+ from codegraph.graph.schema import EdgeKind, NodeKind
21
+
22
+ ComponentKind = str # "CACHE" | "QUEUE" | "DB" | "BROKER" | "OBJECT_STORE" | "WEB_SERVER" | "HTTP_CLIENT" | "ORM" | "MESSAGING" | "SEARCH"
23
+
24
+
25
+ _CATALOG: Final[dict[str, dict[str, str]]] = {
26
+ "redis": {"kind": "CACHE", "label": "Redis", "color": "#ef4444"},
27
+ "ioredis": {"kind": "CACHE", "label": "Redis (ioredis)", "color": "#ef4444"},
28
+ "redis-py": {"kind": "CACHE", "label": "Redis", "color": "#ef4444"},
29
+ "aioredis": {"kind": "CACHE", "label": "Redis (async)", "color": "#ef4444"},
30
+ "memcached": {"kind": "CACHE", "label": "Memcached", "color": "#fb923c"},
31
+ "pymemcache": {"kind": "CACHE", "label": "Memcached", "color": "#fb923c"},
32
+
33
+ "bullmq": {"kind": "QUEUE", "label": "BullMQ", "color": "#f59e0b"},
34
+ "bull": {"kind": "QUEUE", "label": "Bull", "color": "#f59e0b"},
35
+ "celery": {"kind": "QUEUE", "label": "Celery", "color": "#f59e0b"},
36
+ "rq": {"kind": "QUEUE", "label": "RQ", "color": "#f59e0b"},
37
+ "amqplib": {"kind": "BROKER", "label": "RabbitMQ", "color": "#fb7185"},
38
+ "kombu": {"kind": "BROKER", "label": "RabbitMQ", "color": "#fb7185"},
39
+ "pika": {"kind": "BROKER", "label": "RabbitMQ", "color": "#fb7185"},
40
+ "kafkajs": {"kind": "BROKER", "label": "Kafka", "color": "#fb7185"},
41
+ "kafka-python": {"kind": "BROKER", "label": "Kafka", "color": "#fb7185"},
42
+ "confluent-kafka": {"kind": "BROKER", "label": "Kafka", "color": "#fb7185"},
43
+
44
+ "pg": {"kind": "DB", "label": "PostgreSQL", "color": "#3b82f6"},
45
+ "postgres": {"kind": "DB", "label": "PostgreSQL", "color": "#3b82f6"},
46
+ "psycopg2": {"kind": "DB", "label": "PostgreSQL", "color": "#3b82f6"},
47
+ "psycopg": {"kind": "DB", "label": "PostgreSQL", "color": "#3b82f6"},
48
+ "asyncpg": {"kind": "DB", "label": "PostgreSQL", "color": "#3b82f6"},
49
+ "mysql2": {"kind": "DB", "label": "MySQL", "color": "#06b6d4"},
50
+ "mysql": {"kind": "DB", "label": "MySQL", "color": "#06b6d4"},
51
+ "pymysql": {"kind": "DB", "label": "MySQL", "color": "#06b6d4"},
52
+ "sqlite3": {"kind": "DB", "label": "SQLite", "color": "#0ea5e9"},
53
+ "better-sqlite3": {"kind": "DB", "label": "SQLite", "color": "#0ea5e9"},
54
+ "mongodb": {"kind": "DB", "label": "MongoDB", "color": "#22c55e"},
55
+ "mongoose": {"kind": "ORM", "label": "Mongoose", "color": "#22c55e"},
56
+ "pymongo": {"kind": "DB", "label": "MongoDB", "color": "#22c55e"},
57
+ "motor": {"kind": "DB", "label": "MongoDB (async)","color": "#22c55e"},
58
+ "sqlalchemy": {"kind": "ORM", "label": "SQLAlchemy", "color": "#6366f1"},
59
+ "prisma": {"kind": "ORM", "label": "Prisma", "color": "#6366f1"},
60
+ "@prisma/client": {"kind": "ORM", "label": "Prisma", "color": "#6366f1"},
61
+ "typeorm": {"kind": "ORM", "label": "TypeORM", "color": "#6366f1"},
62
+ "sequelize": {"kind": "ORM", "label": "Sequelize", "color": "#6366f1"},
63
+ "drizzle-orm": {"kind": "ORM", "label": "Drizzle", "color": "#6366f1"},
64
+ "knex": {"kind": "ORM", "label": "Knex", "color": "#6366f1"},
65
+
66
+ "express": {"kind": "WEB_SERVER", "label": "Express", "color": "#a78bfa"},
67
+ "fastify": {"kind": "WEB_SERVER", "label": "Fastify", "color": "#a78bfa"},
68
+ "koa": {"kind": "WEB_SERVER", "label": "Koa", "color": "#a78bfa"},
69
+ "@nestjs/core": {"kind": "WEB_SERVER", "label": "NestJS", "color": "#a78bfa"},
70
+ "@nestjs/common": {"kind": "WEB_SERVER", "label": "NestJS", "color": "#a78bfa"},
71
+ "next": {"kind": "WEB_SERVER", "label": "Next.js", "color": "#a78bfa"},
72
+ "fastapi": {"kind": "WEB_SERVER", "label": "FastAPI", "color": "#a78bfa"},
73
+ "flask": {"kind": "WEB_SERVER", "label": "Flask", "color": "#a78bfa"},
74
+ "django": {"kind": "WEB_SERVER", "label": "Django", "color": "#a78bfa"},
75
+ "starlette": {"kind": "WEB_SERVER", "label": "Starlette", "color": "#a78bfa"},
76
+ "tornado": {"kind": "WEB_SERVER", "label": "Tornado", "color": "#a78bfa"},
77
+
78
+ "axios": {"kind": "HTTP_CLIENT", "label": "axios", "color": "#14b8a6"},
79
+ "got": {"kind": "HTTP_CLIENT", "label": "got", "color": "#14b8a6"},
80
+ "node-fetch": {"kind": "HTTP_CLIENT", "label": "node-fetch", "color": "#14b8a6"},
81
+ "undici": {"kind": "HTTP_CLIENT", "label": "undici", "color": "#14b8a6"},
82
+ "requests": {"kind": "HTTP_CLIENT", "label": "requests", "color": "#14b8a6"},
83
+ "httpx": {"kind": "HTTP_CLIENT", "label": "httpx", "color": "#14b8a6"},
84
+ "aiohttp": {"kind": "HTTP_CLIENT", "label": "aiohttp", "color": "#14b8a6"},
85
+
86
+ "aws-sdk": {"kind": "OBJECT_STORE", "label": "AWS SDK", "color": "#f59e0b"},
87
+ "@aws-sdk/client-s3": {"kind": "OBJECT_STORE","label": "AWS S3", "color": "#f59e0b"},
88
+ "boto3": {"kind": "OBJECT_STORE", "label": "AWS (boto3)", "color": "#f59e0b"},
89
+ "@google-cloud/storage": {"kind": "OBJECT_STORE","label": "GCS", "color": "#3b82f6"},
90
+ "minio": {"kind": "OBJECT_STORE", "label": "MinIO", "color": "#f59e0b"},
91
+
92
+ "elasticsearch": {"kind": "SEARCH", "label": "Elasticsearch", "color": "#fbbf24"},
93
+ "@elastic/elasticsearch": {"kind": "SEARCH", "label": "Elasticsearch", "color": "#fbbf24"},
94
+ "meilisearch": {"kind": "SEARCH", "label": "Meilisearch", "color": "#fbbf24"},
95
+ "algoliasearch": {"kind": "SEARCH", "label": "Algolia", "color": "#fbbf24"},
96
+
97
+ "socket.io": {"kind": "MESSAGING", "label": "Socket.IO", "color": "#ec4899"},
98
+ "ws": {"kind": "MESSAGING", "label": "WebSocket", "color": "#ec4899"},
99
+ "graphql": {"kind": "WEB_SERVER", "label": "GraphQL", "color": "#ec4899"},
100
+ "@apollo/server": {"kind": "WEB_SERVER", "label": "Apollo Server", "color": "#ec4899"},
101
+
102
+ "stripe": {"kind": "EXTERNAL_API", "label": "Stripe", "color": "#8b5cf6"},
103
+ "twilio": {"kind": "EXTERNAL_API", "label": "Twilio", "color": "#8b5cf6"},
104
+ "sendgrid": {"kind": "EXTERNAL_API", "label": "SendGrid", "color": "#8b5cf6"},
105
+ "@sendgrid/mail": {"kind": "EXTERNAL_API", "label": "SendGrid", "color": "#8b5cf6"},
106
+ "nodemailer": {"kind": "EXTERNAL_API", "label": "Email (SMTP)", "color": "#8b5cf6"},
107
+ "firebase-admin": {"kind": "EXTERNAL_API", "label": "Firebase", "color": "#8b5cf6"},
108
+ }
109
+
110
+ # Sort longer keys first so "@prisma/client" wins over "prisma" prefix tests.
111
+ _CATALOG_KEYS_LONGEST_FIRST: Final[list[str]] = sorted(
112
+ _CATALOG.keys(), key=len, reverse=True,
113
+ )
114
+
115
+
116
+ def _root_package(target: str) -> str:
117
+ """Return the leading package name from an import target_name.
118
+
119
+ Python: ``redis.Redis`` -> ``redis``; ``aws.s3.client`` -> ``aws``.
120
+ TS: ``ioredis.default`` -> ``ioredis``;
121
+ ``@aws-sdk/client-s3.S3Client`` -> ``@aws-sdk/client-s3``.
122
+ """
123
+ if not target:
124
+ return ""
125
+ # Scoped npm packages: keep the @scope/pkg slug intact.
126
+ if target.startswith("@"):
127
+ # Split into ["@scope/pkg", "rest", ...] by finding the first "." that
128
+ # comes AFTER the slash separating scope from pkg.
129
+ slash = target.find("/")
130
+ if slash > 0:
131
+ dot = target.find(".", slash)
132
+ return target[:dot] if dot > 0 else target
133
+ return target.split(".", 1)[0]
134
+
135
+
136
+ def _classify(target: str) -> dict[str, str] | None:
137
+ if not target:
138
+ return None
139
+ # Try exact catalog match on root package.
140
+ root = _root_package(target)
141
+ if root in _CATALOG:
142
+ return _CATALOG[root]
143
+ # Try longest-prefix match (handles `@scope/pkg/sub` style imports).
144
+ for key in _CATALOG_KEYS_LONGEST_FIRST:
145
+ if target == key or target.startswith(key + "/") or target.startswith(key + "."):
146
+ return _CATALOG[key]
147
+ return None
148
+
149
+
150
+ def _file_of(graph: nx.MultiDiGraph, node_id: str) -> str:
151
+ attrs = graph.nodes.get(node_id) or {}
152
+ return str(attrs.get("file") or "")
153
+
154
+
155
+ def _component_id(kind: str, label: str) -> str:
156
+ return f"infra:{kind}:{label}".lower().replace(" ", "_")
157
+
158
+
159
+ def _collect_handlers(graph: nx.MultiDiGraph) -> list[dict[str, Any]]:
160
+ """Return one entry per HANDLER node with method+path parsed from decorators."""
161
+ out: list[dict[str, Any]] = []
162
+ for nid, attrs in graph.nodes(data=True):
163
+ meta = attrs.get("metadata") or {}
164
+ if not isinstance(meta, dict):
165
+ continue
166
+ if meta.get("role") != "HANDLER":
167
+ continue
168
+ kind = _kind_str(attrs.get("kind"))
169
+ if kind not in (NodeKind.FUNCTION.value, NodeKind.METHOD.value):
170
+ continue
171
+ method, path = _parse_route_from_decorators(meta.get("decorators") or [])
172
+ out.append({
173
+ "id": nid,
174
+ "name": str(attrs.get("name") or ""),
175
+ "qualname": str(attrs.get("qualname") or ""),
176
+ "file": str(attrs.get("file") or ""),
177
+ "line": int(attrs.get("line_start") or 0),
178
+ "method": method,
179
+ "path": path,
180
+ })
181
+ out.sort(key=lambda h: (h["file"], h["line"]))
182
+ return out
183
+
184
+
185
+ _EXPRESS_VERB_RE: Final = None # built lazily below
186
+ _EXPRESS_VERBS: Final[frozenset[str]] = frozenset({
187
+ "get", "post", "put", "delete", "patch", "head", "options", "all",
188
+ })
189
+
190
+
191
+ def _resolve_handler_by_name(
192
+ graph: nx.MultiDiGraph,
193
+ name: str,
194
+ near_file: str,
195
+ ) -> str | None:
196
+ """Find a FUNCTION/METHOD node whose name matches; prefer same-file matches."""
197
+ if not name or not name.replace("_", "").replace("$", "").isalnum():
198
+ return None
199
+ same_file: str | None = None
200
+ other: str | None = None
201
+ for nid, attrs in graph.nodes(data=True):
202
+ if _kind_str(attrs.get("kind")) not in (
203
+ NodeKind.FUNCTION.value, NodeKind.METHOD.value
204
+ ):
205
+ continue
206
+ if str(attrs.get("name") or "") != name:
207
+ continue
208
+ nid_str = str(nid)
209
+ if str(attrs.get("file") or "") == near_file:
210
+ return nid_str
211
+ same_file = same_file or nid_str
212
+ other = other or nid_str
213
+ return same_file or other
214
+
215
+
216
+ def _collect_express_handlers(graph: nx.MultiDiGraph) -> list[dict[str, Any]]:
217
+ """Extract Express/Koa-style endpoints from MODULE node metadata.
218
+
219
+ The TS parser walks each file and stores route registrations
220
+ (``app.get('/x', fn)``, ``router.post(...)`` etc.) under
221
+ ``metadata.express_routes`` on its MODULE node. We read those here and
222
+ resolve handler names to FUNCTION/METHOD nodes so reachability BFS is
223
+ accurate.
224
+ """
225
+ out: list[dict[str, Any]] = []
226
+ for nid, attrs in graph.nodes(data=True):
227
+ if _kind_str(attrs.get("kind")) != NodeKind.MODULE.value:
228
+ continue
229
+ meta = attrs.get("metadata") or {}
230
+ routes = meta.get("express_routes") or []
231
+ if not isinstance(routes, list) or not routes:
232
+ continue
233
+ module_file = str(attrs.get("file") or "")
234
+ for r in routes:
235
+ if not isinstance(r, dict):
236
+ continue
237
+ method = str(r.get("method") or "").upper()
238
+ path = str(r.get("path") or "")
239
+ handler_name = str(r.get("handler_name") or "")
240
+ line = int(r.get("line") or 0)
241
+ if not method or not path:
242
+ continue
243
+ handler_id = (
244
+ _resolve_handler_by_name(graph, handler_name, module_file)
245
+ if handler_name
246
+ else None
247
+ )
248
+ synth_id = f"express:{method}:{path}:{module_file}:{line}"
249
+ out.append({
250
+ "id": handler_id or synth_id,
251
+ "name": handler_name or path,
252
+ "qualname": (
253
+ str(graph.nodes[handler_id].get("qualname") or "")
254
+ if handler_id
255
+ else f"{module_file}:{line}"
256
+ ),
257
+ "file": module_file,
258
+ "line": line,
259
+ "method": method,
260
+ "path": path,
261
+ "_bfs_from": handler_id or nid,
262
+ })
263
+ out.sort(key=lambda h: (h["file"], h["line"]))
264
+ return out
265
+
266
+
267
+ _HTTP_VERBS: Final[tuple[str, ...]] = (
268
+ "get", "post", "put", "delete", "patch", "head", "options",
269
+ )
270
+
271
+
272
+ def _parse_route_from_decorators(decorators: list[Any]) -> tuple[str, str]:
273
+ """Extract (METHOD, path) from a list of decorator-text strings.
274
+
275
+ Best-effort regex over the captured decorator text. Returns ("", "") if
276
+ nothing matched (the handler still appears in the list, just unlabeled).
277
+ """
278
+ import re
279
+ method = ""
280
+ path = ""
281
+ for dec in decorators:
282
+ text = str(dec)
283
+ m = re.search(
284
+ r"@\w[\w\.]*\.(get|post|put|delete|patch|head|options|route|websocket)\b",
285
+ text,
286
+ re.IGNORECASE,
287
+ )
288
+ if m:
289
+ verb = m.group(1).lower()
290
+ if verb in _HTTP_VERBS:
291
+ method = verb.upper()
292
+ elif verb == "route":
293
+ method = "ANY"
294
+ m2 = re.search(r"""["']([^"']+)["']""", text)
295
+ if m2 and not path:
296
+ path = m2.group(1)
297
+ if method and path:
298
+ break
299
+ return method, path
300
+
301
+
302
+ def _bfs_infra_for_handler(
303
+ graph: nx.MultiDiGraph,
304
+ handler_id: str,
305
+ file_to_components: dict[str, set[str]],
306
+ max_depth: int = 6,
307
+ ) -> list[str]:
308
+ """Walk forward through CALLS edges from a handler, collect infra IDs hit."""
309
+ seen: set[str] = {handler_id}
310
+ queue: list[tuple[str, int]] = [(handler_id, 0)]
311
+ hits: list[str] = []
312
+ seen_components: set[str] = set()
313
+ while queue:
314
+ nid, depth = queue.pop(0)
315
+ node_file = _file_of(graph, nid)
316
+ # Any component imported in the file containing this node counts as
317
+ # a hit, since the handler/service touches that file's symbols.
318
+ for cid in file_to_components.get(node_file, ()):
319
+ if cid not in seen_components:
320
+ seen_components.add(cid)
321
+ hits.append(cid)
322
+ if depth >= max_depth:
323
+ continue
324
+ for _src, dst, data in graph.out_edges(nid, data=True):
325
+ if _kind_str(data.get("kind")) != EdgeKind.CALLS.value:
326
+ continue
327
+ if dst in seen:
328
+ continue
329
+ if not isinstance(dst, str):
330
+ continue
331
+ if dst.startswith("unresolved::"):
332
+ continue
333
+ seen.add(dst)
334
+ queue.append((dst, depth + 1))
335
+ return hits
336
+
337
+
338
+ def detect_infrastructure(graph: nx.MultiDiGraph) -> dict[str, Any]:
339
+ """Build the architecture-view payload from the graph.
340
+
341
+ Returns a dict with three top-level keys:
342
+
343
+ * ``components`` - one entry per detected external service. Each carries
344
+ ``id``, ``kind``, ``label``, ``color``, ``count`` (import sites), and
345
+ ``files`` (paths that import it).
346
+ * ``handlers`` - one entry per HANDLER role node, with parsed
347
+ ``method`` + ``path`` and the IDs of components reachable from it.
348
+ * ``edges`` - aggregated USES edges from each importing module to each
349
+ component it touches; carries ``count``.
350
+ * ``metrics`` - summary counts.
351
+ """
352
+ # Pass 1: walk IMPORTS edges, classify, accumulate per-component evidence.
353
+ components: dict[str, dict[str, Any]] = {}
354
+ file_to_components: dict[str, set[str]] = defaultdict(set)
355
+ edges_pair: dict[tuple[str, str], int] = defaultdict(int)
356
+
357
+ for src, _dst, data in graph.edges(data=True):
358
+ if _kind_str(data.get("kind")) != EdgeKind.IMPORTS.value:
359
+ continue
360
+ meta = data.get("metadata") or {}
361
+ # TS edges carry both `source` and `target_name`. Python edges carry
362
+ # only `target_name`. Try `source` first since it is the cleaner
363
+ # package slug for TS scoped packages.
364
+ target_str = ""
365
+ if isinstance(meta, dict):
366
+ target_str = str(meta.get("source") or meta.get("target_name") or "")
367
+ if not target_str:
368
+ continue
369
+ info = _classify(target_str)
370
+ if info is None:
371
+ continue
372
+
373
+ cid = _component_id(info["kind"], info["label"])
374
+ if cid not in components:
375
+ components[cid] = {
376
+ "id": cid,
377
+ "kind": info["kind"],
378
+ "label": info["label"],
379
+ "color": info["color"],
380
+ "count": 0,
381
+ "files": [],
382
+ "evidence": [],
383
+ }
384
+ comp = components[cid]
385
+ comp["count"] += 1
386
+ importer_file = _file_of(graph, src)
387
+ if importer_file:
388
+ if importer_file not in comp["files"]:
389
+ comp["files"].append(importer_file)
390
+ file_to_components[importer_file].add(cid)
391
+ edges_pair[(importer_file, cid)] += 1
392
+ ev = f"{importer_file}:{data.get('line') or '?'} -> {target_str}"
393
+ if len(comp["evidence"]) < 6 and ev not in comp["evidence"]:
394
+ comp["evidence"].append(ev)
395
+
396
+ # Pass 2: handlers (decorator-style + Express-style) + reachable components.
397
+ decorator_handlers = _collect_handlers(graph)
398
+ express_handlers = _collect_express_handlers(graph)
399
+ handlers = decorator_handlers + express_handlers
400
+ seen_handler_keys: set[tuple[Any, ...]] = set()
401
+ deduped: list[dict[str, Any]] = []
402
+ for h in handlers:
403
+ key = (h["method"], h["path"], h["file"], h["line"])
404
+ if key in seen_handler_keys:
405
+ continue
406
+ seen_handler_keys.add(key)
407
+ bfs_root = h.pop("_bfs_from", None) or h["id"]
408
+ h["components"] = _bfs_infra_for_handler(
409
+ graph, bfs_root, file_to_components,
410
+ )
411
+ deduped.append(h)
412
+ handlers = deduped
413
+
414
+ # Pass 3: edges shaped for rendering (importer file -> component).
415
+ edges = [
416
+ {"source_file": fp, "target": cid, "count": n}
417
+ for (fp, cid), n in sorted(edges_pair.items(), key=lambda kv: -kv[1])
418
+ ]
419
+
420
+ by_kind: dict[str, int] = defaultdict(int)
421
+ for c in components.values():
422
+ by_kind[c["kind"]] += 1
423
+
424
+ return {
425
+ "components": sorted(
426
+ components.values(), key=lambda c: (-int(c["count"]), c["label"]),
427
+ ),
428
+ "handlers": handlers,
429
+ "edges": edges,
430
+ "metrics": {
431
+ "components": len(components),
432
+ "handlers": len(handlers),
433
+ "import_sites": sum(c["count"] for c in components.values()),
434
+ "by_kind": dict(by_kind),
435
+ },
436
+ }
437
+
438
+
439
+ __all__ = ["detect_infrastructure"]