polycodegraph 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraph/__init__.py +10 -0
- codegraph/analysis/__init__.py +30 -0
- codegraph/analysis/_common.py +125 -0
- codegraph/analysis/blast_radius.py +63 -0
- codegraph/analysis/cycles.py +79 -0
- codegraph/analysis/dataflow.py +861 -0
- codegraph/analysis/dead_code.py +165 -0
- codegraph/analysis/hotspots.py +68 -0
- codegraph/analysis/infrastructure.py +439 -0
- codegraph/analysis/metrics.py +52 -0
- codegraph/analysis/report.py +222 -0
- codegraph/analysis/roles.py +323 -0
- codegraph/analysis/untested.py +79 -0
- codegraph/cli.py +1506 -0
- codegraph/config.py +64 -0
- codegraph/embed/__init__.py +35 -0
- codegraph/embed/chunker.py +120 -0
- codegraph/embed/embedder.py +113 -0
- codegraph/embed/query.py +181 -0
- codegraph/embed/store.py +360 -0
- codegraph/graph/__init__.py +0 -0
- codegraph/graph/builder.py +212 -0
- codegraph/graph/schema.py +69 -0
- codegraph/graph/store_networkx.py +55 -0
- codegraph/graph/store_sqlite.py +249 -0
- codegraph/mcp_server/__init__.py +6 -0
- codegraph/mcp_server/server.py +933 -0
- codegraph/parsers/__init__.py +0 -0
- codegraph/parsers/base.py +70 -0
- codegraph/parsers/go.py +570 -0
- codegraph/parsers/python.py +1707 -0
- codegraph/parsers/typescript.py +1397 -0
- codegraph/py.typed +0 -0
- codegraph/resolve/__init__.py +4 -0
- codegraph/resolve/calls.py +480 -0
- codegraph/review/__init__.py +31 -0
- codegraph/review/baseline.py +32 -0
- codegraph/review/differ.py +211 -0
- codegraph/review/hook.py +70 -0
- codegraph/review/risk.py +219 -0
- codegraph/review/rules.py +342 -0
- codegraph/viz/__init__.py +17 -0
- codegraph/viz/_style.py +45 -0
- codegraph/viz/dashboard.py +740 -0
- codegraph/viz/diagrams.py +370 -0
- codegraph/viz/explore.py +453 -0
- codegraph/viz/hld.py +683 -0
- codegraph/viz/html.py +115 -0
- codegraph/viz/mermaid.py +111 -0
- codegraph/viz/svg.py +77 -0
- codegraph/web/__init__.py +4 -0
- codegraph/web/server.py +165 -0
- codegraph/web/static/app.css +664 -0
- codegraph/web/static/app.js +919 -0
- codegraph/web/static/index.html +112 -0
- codegraph/web/static/views/architecture.js +1671 -0
- codegraph/web/static/views/graph3d.css +564 -0
- codegraph/web/static/views/graph3d.js +999 -0
- codegraph/web/static/views/graph3d_transform.js +984 -0
- codegraph/workspace/__init__.py +34 -0
- codegraph/workspace/config.py +110 -0
- codegraph/workspace/operations.py +294 -0
- polycodegraph-0.1.0.dist-info/METADATA +687 -0
- polycodegraph-0.1.0.dist-info/RECORD +67 -0
- polycodegraph-0.1.0.dist-info/WHEEL +4 -0
- polycodegraph-0.1.0.dist-info/entry_points.txt +2 -0
- polycodegraph-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""Dead code detection: definitions with no incoming references."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
import networkx as nx
|
|
7
|
+
|
|
8
|
+
from codegraph.analysis._common import (
|
|
9
|
+
REFERENCE_EDGE_KINDS,
|
|
10
|
+
_kind_str,
|
|
11
|
+
in_protocol_class,
|
|
12
|
+
in_test_module,
|
|
13
|
+
is_excluded_path,
|
|
14
|
+
is_protocol_class,
|
|
15
|
+
)
|
|
16
|
+
from codegraph.graph.schema import EdgeKind, NodeKind
|
|
17
|
+
|
|
18
|
+
_CANDIDATE_KINDS: frozenset[str] = frozenset(
|
|
19
|
+
{NodeKind.FUNCTION.value, NodeKind.METHOD.value, NodeKind.CLASS.value}
|
|
20
|
+
)
|
|
21
|
+
_ENTRYPOINT_NAMES: frozenset[str] = frozenset({"main", "__main__"})
|
|
22
|
+
|
|
23
|
+
_PROPERTY_DECORATORS: tuple[str, ...] = (
|
|
24
|
+
"@property", "@cached_property", "functools.cached_property",
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _has_property_decorator(metadata: dict[str, object]) -> bool:
|
|
29
|
+
decorators = metadata.get("decorators") or []
|
|
30
|
+
if not isinstance(decorators, list):
|
|
31
|
+
return False
|
|
32
|
+
for raw in decorators:
|
|
33
|
+
text = str(raw).strip()
|
|
34
|
+
for marker in _PROPERTY_DECORATORS:
|
|
35
|
+
if marker in text:
|
|
36
|
+
return True
|
|
37
|
+
return False
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _class_has_inherits(graph: nx.MultiDiGraph, class_id: str) -> bool:
|
|
41
|
+
return any(
|
|
42
|
+
key == EdgeKind.INHERITS.value
|
|
43
|
+
for _src, _dst, key in graph.out_edges(class_id, keys=True)
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _is_polymorphic_override(graph: nx.MultiDiGraph, method_id: str) -> bool:
|
|
48
|
+
"""True if the method's owning class inherits from another class.
|
|
49
|
+
|
|
50
|
+
Such methods are likely overrides invoked via base-class dispatch and
|
|
51
|
+
have no static incoming CALL edge.
|
|
52
|
+
"""
|
|
53
|
+
for _src, dst, key in graph.out_edges(method_id, keys=True):
|
|
54
|
+
if key != EdgeKind.DEFINED_IN.value:
|
|
55
|
+
continue
|
|
56
|
+
attrs = graph.nodes.get(dst) or {}
|
|
57
|
+
if (
|
|
58
|
+
_kind_str(attrs.get("kind")) == NodeKind.CLASS.value
|
|
59
|
+
and _class_has_inherits(graph, dst)
|
|
60
|
+
):
|
|
61
|
+
return True
|
|
62
|
+
return False
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass
|
|
66
|
+
class DeadNode:
|
|
67
|
+
id: str
|
|
68
|
+
name: str
|
|
69
|
+
qualname: str
|
|
70
|
+
kind: str
|
|
71
|
+
file: str
|
|
72
|
+
line_start: int
|
|
73
|
+
reason: str = "no incoming references"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _is_dunder(name: str) -> bool:
|
|
77
|
+
return name.startswith("__") and name.endswith("__")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _is_test_function(name: str) -> bool:
|
|
81
|
+
return name.startswith("test_") or name.startswith("test")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def find_dead_code(
|
|
85
|
+
graph: nx.MultiDiGraph,
|
|
86
|
+
*,
|
|
87
|
+
include_tests: bool = False,
|
|
88
|
+
) -> list[DeadNode]:
|
|
89
|
+
"""Return definitions with no incoming reference edges.
|
|
90
|
+
|
|
91
|
+
Excludes (by default):
|
|
92
|
+
* Nodes living in test modules
|
|
93
|
+
* dunder methods and ``main`` entrypoints
|
|
94
|
+
* Names starting with ``test_`` (treated as test functions)
|
|
95
|
+
|
|
96
|
+
A function/class is "dead" if no other node CALLS / INHERITS / IMPLEMENTS
|
|
97
|
+
/ IMPORTS it. Methods of an inherited class are still flagged, but a
|
|
98
|
+
method with an INHERITS-edge incoming counts as referenced.
|
|
99
|
+
"""
|
|
100
|
+
dead: list[DeadNode] = []
|
|
101
|
+
for nid, attrs in graph.nodes(data=True):
|
|
102
|
+
kind = _kind_str(attrs.get("kind"))
|
|
103
|
+
if kind not in _CANDIDATE_KINDS:
|
|
104
|
+
continue
|
|
105
|
+
name = str(attrs.get("name") or "")
|
|
106
|
+
if name in _ENTRYPOINT_NAMES:
|
|
107
|
+
continue
|
|
108
|
+
if _is_dunder(name):
|
|
109
|
+
continue
|
|
110
|
+
if _is_test_function(name):
|
|
111
|
+
continue
|
|
112
|
+
if not include_tests and in_test_module(graph, nid):
|
|
113
|
+
continue
|
|
114
|
+
# Decorator/entry-point-aware skip: framework hooks (Typer commands,
|
|
115
|
+
# FastAPI routes, pytest fixtures, abstract methods, Celery tasks,
|
|
116
|
+
# etc.) are invoked dynamically and have no static incoming edge.
|
|
117
|
+
# The Python parser tags them with metadata["entry_point"] = True.
|
|
118
|
+
metadata = attrs.get("metadata") or {}
|
|
119
|
+
if metadata.get("entry_point"):
|
|
120
|
+
continue
|
|
121
|
+
# @property / @cached_property are accessed as attributes, not calls.
|
|
122
|
+
if _has_property_decorator(metadata):
|
|
123
|
+
continue
|
|
124
|
+
# Pragma-marked public-API symbols are intentionally exposed for
|
|
125
|
+
# library consumers; the codebase itself may not call them, but
|
|
126
|
+
# they are not dead. See `# pragma: codegraph-public-api` /
|
|
127
|
+
# `# codegraph: public-api` (or `// ...` for TS) in source.
|
|
128
|
+
if metadata.get("public_api"):
|
|
129
|
+
continue
|
|
130
|
+
# Generated/static frontend assets and test fixtures don't have
|
|
131
|
+
# traceable call graphs — exclude them from dead-code detection.
|
|
132
|
+
if is_excluded_path(str(attrs.get("file") or "")):
|
|
133
|
+
continue
|
|
134
|
+
# Skip ``typing.Protocol`` classes and their methods. Protocols define
|
|
135
|
+
# structural types for static type checking; they have no runtime
|
|
136
|
+
# call-graph incoming edges by design.
|
|
137
|
+
if kind == NodeKind.CLASS.value and is_protocol_class(graph, nid):
|
|
138
|
+
continue
|
|
139
|
+
if kind == NodeKind.METHOD.value and in_protocol_class(graph, nid):
|
|
140
|
+
continue
|
|
141
|
+
# Polymorphic overrides on classes that inherit have no static
|
|
142
|
+
# incoming CALL edge (dispatch is via the base class).
|
|
143
|
+
if kind == NodeKind.METHOD.value and _is_polymorphic_override(graph, nid):
|
|
144
|
+
continue
|
|
145
|
+
|
|
146
|
+
has_incoming_ref = False
|
|
147
|
+
for _src, _dst, key in graph.in_edges(nid, keys=True):
|
|
148
|
+
if key in REFERENCE_EDGE_KINDS:
|
|
149
|
+
has_incoming_ref = True
|
|
150
|
+
break
|
|
151
|
+
if has_incoming_ref:
|
|
152
|
+
continue
|
|
153
|
+
|
|
154
|
+
dead.append(
|
|
155
|
+
DeadNode(
|
|
156
|
+
id=nid,
|
|
157
|
+
name=name,
|
|
158
|
+
qualname=str(attrs.get("qualname") or name),
|
|
159
|
+
kind=kind,
|
|
160
|
+
file=str(attrs.get("file") or ""),
|
|
161
|
+
line_start=int(attrs.get("line_start") or 0),
|
|
162
|
+
)
|
|
163
|
+
)
|
|
164
|
+
dead.sort(key=lambda d: (d.file, d.line_start, d.qualname))
|
|
165
|
+
return dead
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Hotspot detection: top-N nodes by fan-in / fan-out / size."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
import networkx as nx
|
|
7
|
+
|
|
8
|
+
from codegraph.analysis._common import _kind_str
|
|
9
|
+
from codegraph.graph.schema import EdgeKind, NodeKind
|
|
10
|
+
|
|
11
|
+
_CALLABLE_KINDS: frozenset[str] = frozenset(
|
|
12
|
+
{NodeKind.FUNCTION.value, NodeKind.METHOD.value}
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class Hotspot:
|
|
18
|
+
id: str
|
|
19
|
+
name: str
|
|
20
|
+
qualname: str
|
|
21
|
+
kind: str
|
|
22
|
+
file: str
|
|
23
|
+
fan_in: int
|
|
24
|
+
fan_out: int
|
|
25
|
+
loc: int
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def score(self) -> int:
|
|
29
|
+
return self.fan_in * 2 + self.fan_out + self.loc // 50
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def find_hotspots(
|
|
33
|
+
graph: nx.MultiDiGraph,
|
|
34
|
+
*,
|
|
35
|
+
limit: int = 20,
|
|
36
|
+
kinds: frozenset[str] = _CALLABLE_KINDS,
|
|
37
|
+
) -> list[Hotspot]:
|
|
38
|
+
"""Return top-N callable hotspots ranked by combined fan-in / fan-out / LOC."""
|
|
39
|
+
rows: list[Hotspot] = []
|
|
40
|
+
for nid, attrs in graph.nodes(data=True):
|
|
41
|
+
kind = _kind_str(attrs.get("kind"))
|
|
42
|
+
if kind not in kinds:
|
|
43
|
+
continue
|
|
44
|
+
fan_in = 0
|
|
45
|
+
fan_out = 0
|
|
46
|
+
for _src, _dst, key in graph.in_edges(nid, keys=True):
|
|
47
|
+
if key == EdgeKind.CALLS.value:
|
|
48
|
+
fan_in += 1
|
|
49
|
+
for _src, _dst, key in graph.out_edges(nid, keys=True):
|
|
50
|
+
if key == EdgeKind.CALLS.value:
|
|
51
|
+
fan_out += 1
|
|
52
|
+
line_start = int(attrs.get("line_start") or 0)
|
|
53
|
+
line_end = int(attrs.get("line_end") or 0)
|
|
54
|
+
loc = max(0, line_end - line_start + 1) if line_end else 0
|
|
55
|
+
rows.append(
|
|
56
|
+
Hotspot(
|
|
57
|
+
id=nid,
|
|
58
|
+
name=str(attrs.get("name") or ""),
|
|
59
|
+
qualname=str(attrs.get("qualname") or ""),
|
|
60
|
+
kind=kind,
|
|
61
|
+
file=str(attrs.get("file") or ""),
|
|
62
|
+
fan_in=fan_in,
|
|
63
|
+
fan_out=fan_out,
|
|
64
|
+
loc=loc,
|
|
65
|
+
)
|
|
66
|
+
)
|
|
67
|
+
rows.sort(key=lambda h: (-h.score, -h.fan_in, h.qualname))
|
|
68
|
+
return rows[:limit]
|
|
@@ -0,0 +1,439 @@
|
|
|
1
|
+
"""Infrastructure-component detection.
|
|
2
|
+
|
|
3
|
+
Scans IMPORTS edges in the graph to identify external services the project
|
|
4
|
+
talks to (Redis, BullMQ, Postgres, S3, Express, etc.) and aggregates them
|
|
5
|
+
into an architecture-level topology — one node per detected component plus
|
|
6
|
+
the source files / handlers that use it.
|
|
7
|
+
|
|
8
|
+
Pure, read-only pass: walks the in-memory graph, returns a payload dict.
|
|
9
|
+
No DB writes, no schema changes. Output is consumed by
|
|
10
|
+
``build_dashboard_payload`` to populate the dashboard's Architecture view.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from collections import defaultdict
|
|
15
|
+
from typing import Any, Final
|
|
16
|
+
|
|
17
|
+
import networkx as nx
|
|
18
|
+
|
|
19
|
+
from codegraph.analysis._common import _kind_str
|
|
20
|
+
from codegraph.graph.schema import EdgeKind, NodeKind
|
|
21
|
+
|
|
22
|
+
ComponentKind = str # "CACHE" | "QUEUE" | "DB" | "BROKER" | "OBJECT_STORE" | "WEB_SERVER" | "HTTP_CLIENT" | "ORM" | "MESSAGING" | "SEARCH"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
_CATALOG: Final[dict[str, dict[str, str]]] = {
|
|
26
|
+
"redis": {"kind": "CACHE", "label": "Redis", "color": "#ef4444"},
|
|
27
|
+
"ioredis": {"kind": "CACHE", "label": "Redis (ioredis)", "color": "#ef4444"},
|
|
28
|
+
"redis-py": {"kind": "CACHE", "label": "Redis", "color": "#ef4444"},
|
|
29
|
+
"aioredis": {"kind": "CACHE", "label": "Redis (async)", "color": "#ef4444"},
|
|
30
|
+
"memcached": {"kind": "CACHE", "label": "Memcached", "color": "#fb923c"},
|
|
31
|
+
"pymemcache": {"kind": "CACHE", "label": "Memcached", "color": "#fb923c"},
|
|
32
|
+
|
|
33
|
+
"bullmq": {"kind": "QUEUE", "label": "BullMQ", "color": "#f59e0b"},
|
|
34
|
+
"bull": {"kind": "QUEUE", "label": "Bull", "color": "#f59e0b"},
|
|
35
|
+
"celery": {"kind": "QUEUE", "label": "Celery", "color": "#f59e0b"},
|
|
36
|
+
"rq": {"kind": "QUEUE", "label": "RQ", "color": "#f59e0b"},
|
|
37
|
+
"amqplib": {"kind": "BROKER", "label": "RabbitMQ", "color": "#fb7185"},
|
|
38
|
+
"kombu": {"kind": "BROKER", "label": "RabbitMQ", "color": "#fb7185"},
|
|
39
|
+
"pika": {"kind": "BROKER", "label": "RabbitMQ", "color": "#fb7185"},
|
|
40
|
+
"kafkajs": {"kind": "BROKER", "label": "Kafka", "color": "#fb7185"},
|
|
41
|
+
"kafka-python": {"kind": "BROKER", "label": "Kafka", "color": "#fb7185"},
|
|
42
|
+
"confluent-kafka": {"kind": "BROKER", "label": "Kafka", "color": "#fb7185"},
|
|
43
|
+
|
|
44
|
+
"pg": {"kind": "DB", "label": "PostgreSQL", "color": "#3b82f6"},
|
|
45
|
+
"postgres": {"kind": "DB", "label": "PostgreSQL", "color": "#3b82f6"},
|
|
46
|
+
"psycopg2": {"kind": "DB", "label": "PostgreSQL", "color": "#3b82f6"},
|
|
47
|
+
"psycopg": {"kind": "DB", "label": "PostgreSQL", "color": "#3b82f6"},
|
|
48
|
+
"asyncpg": {"kind": "DB", "label": "PostgreSQL", "color": "#3b82f6"},
|
|
49
|
+
"mysql2": {"kind": "DB", "label": "MySQL", "color": "#06b6d4"},
|
|
50
|
+
"mysql": {"kind": "DB", "label": "MySQL", "color": "#06b6d4"},
|
|
51
|
+
"pymysql": {"kind": "DB", "label": "MySQL", "color": "#06b6d4"},
|
|
52
|
+
"sqlite3": {"kind": "DB", "label": "SQLite", "color": "#0ea5e9"},
|
|
53
|
+
"better-sqlite3": {"kind": "DB", "label": "SQLite", "color": "#0ea5e9"},
|
|
54
|
+
"mongodb": {"kind": "DB", "label": "MongoDB", "color": "#22c55e"},
|
|
55
|
+
"mongoose": {"kind": "ORM", "label": "Mongoose", "color": "#22c55e"},
|
|
56
|
+
"pymongo": {"kind": "DB", "label": "MongoDB", "color": "#22c55e"},
|
|
57
|
+
"motor": {"kind": "DB", "label": "MongoDB (async)","color": "#22c55e"},
|
|
58
|
+
"sqlalchemy": {"kind": "ORM", "label": "SQLAlchemy", "color": "#6366f1"},
|
|
59
|
+
"prisma": {"kind": "ORM", "label": "Prisma", "color": "#6366f1"},
|
|
60
|
+
"@prisma/client": {"kind": "ORM", "label": "Prisma", "color": "#6366f1"},
|
|
61
|
+
"typeorm": {"kind": "ORM", "label": "TypeORM", "color": "#6366f1"},
|
|
62
|
+
"sequelize": {"kind": "ORM", "label": "Sequelize", "color": "#6366f1"},
|
|
63
|
+
"drizzle-orm": {"kind": "ORM", "label": "Drizzle", "color": "#6366f1"},
|
|
64
|
+
"knex": {"kind": "ORM", "label": "Knex", "color": "#6366f1"},
|
|
65
|
+
|
|
66
|
+
"express": {"kind": "WEB_SERVER", "label": "Express", "color": "#a78bfa"},
|
|
67
|
+
"fastify": {"kind": "WEB_SERVER", "label": "Fastify", "color": "#a78bfa"},
|
|
68
|
+
"koa": {"kind": "WEB_SERVER", "label": "Koa", "color": "#a78bfa"},
|
|
69
|
+
"@nestjs/core": {"kind": "WEB_SERVER", "label": "NestJS", "color": "#a78bfa"},
|
|
70
|
+
"@nestjs/common": {"kind": "WEB_SERVER", "label": "NestJS", "color": "#a78bfa"},
|
|
71
|
+
"next": {"kind": "WEB_SERVER", "label": "Next.js", "color": "#a78bfa"},
|
|
72
|
+
"fastapi": {"kind": "WEB_SERVER", "label": "FastAPI", "color": "#a78bfa"},
|
|
73
|
+
"flask": {"kind": "WEB_SERVER", "label": "Flask", "color": "#a78bfa"},
|
|
74
|
+
"django": {"kind": "WEB_SERVER", "label": "Django", "color": "#a78bfa"},
|
|
75
|
+
"starlette": {"kind": "WEB_SERVER", "label": "Starlette", "color": "#a78bfa"},
|
|
76
|
+
"tornado": {"kind": "WEB_SERVER", "label": "Tornado", "color": "#a78bfa"},
|
|
77
|
+
|
|
78
|
+
"axios": {"kind": "HTTP_CLIENT", "label": "axios", "color": "#14b8a6"},
|
|
79
|
+
"got": {"kind": "HTTP_CLIENT", "label": "got", "color": "#14b8a6"},
|
|
80
|
+
"node-fetch": {"kind": "HTTP_CLIENT", "label": "node-fetch", "color": "#14b8a6"},
|
|
81
|
+
"undici": {"kind": "HTTP_CLIENT", "label": "undici", "color": "#14b8a6"},
|
|
82
|
+
"requests": {"kind": "HTTP_CLIENT", "label": "requests", "color": "#14b8a6"},
|
|
83
|
+
"httpx": {"kind": "HTTP_CLIENT", "label": "httpx", "color": "#14b8a6"},
|
|
84
|
+
"aiohttp": {"kind": "HTTP_CLIENT", "label": "aiohttp", "color": "#14b8a6"},
|
|
85
|
+
|
|
86
|
+
"aws-sdk": {"kind": "OBJECT_STORE", "label": "AWS SDK", "color": "#f59e0b"},
|
|
87
|
+
"@aws-sdk/client-s3": {"kind": "OBJECT_STORE","label": "AWS S3", "color": "#f59e0b"},
|
|
88
|
+
"boto3": {"kind": "OBJECT_STORE", "label": "AWS (boto3)", "color": "#f59e0b"},
|
|
89
|
+
"@google-cloud/storage": {"kind": "OBJECT_STORE","label": "GCS", "color": "#3b82f6"},
|
|
90
|
+
"minio": {"kind": "OBJECT_STORE", "label": "MinIO", "color": "#f59e0b"},
|
|
91
|
+
|
|
92
|
+
"elasticsearch": {"kind": "SEARCH", "label": "Elasticsearch", "color": "#fbbf24"},
|
|
93
|
+
"@elastic/elasticsearch": {"kind": "SEARCH", "label": "Elasticsearch", "color": "#fbbf24"},
|
|
94
|
+
"meilisearch": {"kind": "SEARCH", "label": "Meilisearch", "color": "#fbbf24"},
|
|
95
|
+
"algoliasearch": {"kind": "SEARCH", "label": "Algolia", "color": "#fbbf24"},
|
|
96
|
+
|
|
97
|
+
"socket.io": {"kind": "MESSAGING", "label": "Socket.IO", "color": "#ec4899"},
|
|
98
|
+
"ws": {"kind": "MESSAGING", "label": "WebSocket", "color": "#ec4899"},
|
|
99
|
+
"graphql": {"kind": "WEB_SERVER", "label": "GraphQL", "color": "#ec4899"},
|
|
100
|
+
"@apollo/server": {"kind": "WEB_SERVER", "label": "Apollo Server", "color": "#ec4899"},
|
|
101
|
+
|
|
102
|
+
"stripe": {"kind": "EXTERNAL_API", "label": "Stripe", "color": "#8b5cf6"},
|
|
103
|
+
"twilio": {"kind": "EXTERNAL_API", "label": "Twilio", "color": "#8b5cf6"},
|
|
104
|
+
"sendgrid": {"kind": "EXTERNAL_API", "label": "SendGrid", "color": "#8b5cf6"},
|
|
105
|
+
"@sendgrid/mail": {"kind": "EXTERNAL_API", "label": "SendGrid", "color": "#8b5cf6"},
|
|
106
|
+
"nodemailer": {"kind": "EXTERNAL_API", "label": "Email (SMTP)", "color": "#8b5cf6"},
|
|
107
|
+
"firebase-admin": {"kind": "EXTERNAL_API", "label": "Firebase", "color": "#8b5cf6"},
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
# Sort longer keys first so "@prisma/client" wins over "prisma" prefix tests.
|
|
111
|
+
_CATALOG_KEYS_LONGEST_FIRST: Final[list[str]] = sorted(
|
|
112
|
+
_CATALOG.keys(), key=len, reverse=True,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _root_package(target: str) -> str:
|
|
117
|
+
"""Return the leading package name from an import target_name.
|
|
118
|
+
|
|
119
|
+
Python: ``redis.Redis`` -> ``redis``; ``aws.s3.client`` -> ``aws``.
|
|
120
|
+
TS: ``ioredis.default`` -> ``ioredis``;
|
|
121
|
+
``@aws-sdk/client-s3.S3Client`` -> ``@aws-sdk/client-s3``.
|
|
122
|
+
"""
|
|
123
|
+
if not target:
|
|
124
|
+
return ""
|
|
125
|
+
# Scoped npm packages: keep the @scope/pkg slug intact.
|
|
126
|
+
if target.startswith("@"):
|
|
127
|
+
# Split into ["@scope/pkg", "rest", ...] by finding the first "." that
|
|
128
|
+
# comes AFTER the slash separating scope from pkg.
|
|
129
|
+
slash = target.find("/")
|
|
130
|
+
if slash > 0:
|
|
131
|
+
dot = target.find(".", slash)
|
|
132
|
+
return target[:dot] if dot > 0 else target
|
|
133
|
+
return target.split(".", 1)[0]
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _classify(target: str) -> dict[str, str] | None:
|
|
137
|
+
if not target:
|
|
138
|
+
return None
|
|
139
|
+
# Try exact catalog match on root package.
|
|
140
|
+
root = _root_package(target)
|
|
141
|
+
if root in _CATALOG:
|
|
142
|
+
return _CATALOG[root]
|
|
143
|
+
# Try longest-prefix match (handles `@scope/pkg/sub` style imports).
|
|
144
|
+
for key in _CATALOG_KEYS_LONGEST_FIRST:
|
|
145
|
+
if target == key or target.startswith(key + "/") or target.startswith(key + "."):
|
|
146
|
+
return _CATALOG[key]
|
|
147
|
+
return None
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _file_of(graph: nx.MultiDiGraph, node_id: str) -> str:
|
|
151
|
+
attrs = graph.nodes.get(node_id) or {}
|
|
152
|
+
return str(attrs.get("file") or "")
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _component_id(kind: str, label: str) -> str:
|
|
156
|
+
return f"infra:{kind}:{label}".lower().replace(" ", "_")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _collect_handlers(graph: nx.MultiDiGraph) -> list[dict[str, Any]]:
|
|
160
|
+
"""Return one entry per HANDLER node with method+path parsed from decorators."""
|
|
161
|
+
out: list[dict[str, Any]] = []
|
|
162
|
+
for nid, attrs in graph.nodes(data=True):
|
|
163
|
+
meta = attrs.get("metadata") or {}
|
|
164
|
+
if not isinstance(meta, dict):
|
|
165
|
+
continue
|
|
166
|
+
if meta.get("role") != "HANDLER":
|
|
167
|
+
continue
|
|
168
|
+
kind = _kind_str(attrs.get("kind"))
|
|
169
|
+
if kind not in (NodeKind.FUNCTION.value, NodeKind.METHOD.value):
|
|
170
|
+
continue
|
|
171
|
+
method, path = _parse_route_from_decorators(meta.get("decorators") or [])
|
|
172
|
+
out.append({
|
|
173
|
+
"id": nid,
|
|
174
|
+
"name": str(attrs.get("name") or ""),
|
|
175
|
+
"qualname": str(attrs.get("qualname") or ""),
|
|
176
|
+
"file": str(attrs.get("file") or ""),
|
|
177
|
+
"line": int(attrs.get("line_start") or 0),
|
|
178
|
+
"method": method,
|
|
179
|
+
"path": path,
|
|
180
|
+
})
|
|
181
|
+
out.sort(key=lambda h: (h["file"], h["line"]))
|
|
182
|
+
return out
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
_EXPRESS_VERB_RE: Final = None # built lazily below
|
|
186
|
+
_EXPRESS_VERBS: Final[frozenset[str]] = frozenset({
|
|
187
|
+
"get", "post", "put", "delete", "patch", "head", "options", "all",
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _resolve_handler_by_name(
|
|
192
|
+
graph: nx.MultiDiGraph,
|
|
193
|
+
name: str,
|
|
194
|
+
near_file: str,
|
|
195
|
+
) -> str | None:
|
|
196
|
+
"""Find a FUNCTION/METHOD node whose name matches; prefer same-file matches."""
|
|
197
|
+
if not name or not name.replace("_", "").replace("$", "").isalnum():
|
|
198
|
+
return None
|
|
199
|
+
same_file: str | None = None
|
|
200
|
+
other: str | None = None
|
|
201
|
+
for nid, attrs in graph.nodes(data=True):
|
|
202
|
+
if _kind_str(attrs.get("kind")) not in (
|
|
203
|
+
NodeKind.FUNCTION.value, NodeKind.METHOD.value
|
|
204
|
+
):
|
|
205
|
+
continue
|
|
206
|
+
if str(attrs.get("name") or "") != name:
|
|
207
|
+
continue
|
|
208
|
+
nid_str = str(nid)
|
|
209
|
+
if str(attrs.get("file") or "") == near_file:
|
|
210
|
+
return nid_str
|
|
211
|
+
same_file = same_file or nid_str
|
|
212
|
+
other = other or nid_str
|
|
213
|
+
return same_file or other
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _collect_express_handlers(graph: nx.MultiDiGraph) -> list[dict[str, Any]]:
|
|
217
|
+
"""Extract Express/Koa-style endpoints from MODULE node metadata.
|
|
218
|
+
|
|
219
|
+
The TS parser walks each file and stores route registrations
|
|
220
|
+
(``app.get('/x', fn)``, ``router.post(...)`` etc.) under
|
|
221
|
+
``metadata.express_routes`` on its MODULE node. We read those here and
|
|
222
|
+
resolve handler names to FUNCTION/METHOD nodes so reachability BFS is
|
|
223
|
+
accurate.
|
|
224
|
+
"""
|
|
225
|
+
out: list[dict[str, Any]] = []
|
|
226
|
+
for nid, attrs in graph.nodes(data=True):
|
|
227
|
+
if _kind_str(attrs.get("kind")) != NodeKind.MODULE.value:
|
|
228
|
+
continue
|
|
229
|
+
meta = attrs.get("metadata") or {}
|
|
230
|
+
routes = meta.get("express_routes") or []
|
|
231
|
+
if not isinstance(routes, list) or not routes:
|
|
232
|
+
continue
|
|
233
|
+
module_file = str(attrs.get("file") or "")
|
|
234
|
+
for r in routes:
|
|
235
|
+
if not isinstance(r, dict):
|
|
236
|
+
continue
|
|
237
|
+
method = str(r.get("method") or "").upper()
|
|
238
|
+
path = str(r.get("path") or "")
|
|
239
|
+
handler_name = str(r.get("handler_name") or "")
|
|
240
|
+
line = int(r.get("line") or 0)
|
|
241
|
+
if not method or not path:
|
|
242
|
+
continue
|
|
243
|
+
handler_id = (
|
|
244
|
+
_resolve_handler_by_name(graph, handler_name, module_file)
|
|
245
|
+
if handler_name
|
|
246
|
+
else None
|
|
247
|
+
)
|
|
248
|
+
synth_id = f"express:{method}:{path}:{module_file}:{line}"
|
|
249
|
+
out.append({
|
|
250
|
+
"id": handler_id or synth_id,
|
|
251
|
+
"name": handler_name or path,
|
|
252
|
+
"qualname": (
|
|
253
|
+
str(graph.nodes[handler_id].get("qualname") or "")
|
|
254
|
+
if handler_id
|
|
255
|
+
else f"{module_file}:{line}"
|
|
256
|
+
),
|
|
257
|
+
"file": module_file,
|
|
258
|
+
"line": line,
|
|
259
|
+
"method": method,
|
|
260
|
+
"path": path,
|
|
261
|
+
"_bfs_from": handler_id or nid,
|
|
262
|
+
})
|
|
263
|
+
out.sort(key=lambda h: (h["file"], h["line"]))
|
|
264
|
+
return out
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
_HTTP_VERBS: Final[tuple[str, ...]] = (
|
|
268
|
+
"get", "post", "put", "delete", "patch", "head", "options",
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _parse_route_from_decorators(decorators: list[Any]) -> tuple[str, str]:
|
|
273
|
+
"""Extract (METHOD, path) from a list of decorator-text strings.
|
|
274
|
+
|
|
275
|
+
Best-effort regex over the captured decorator text. Returns ("", "") if
|
|
276
|
+
nothing matched (the handler still appears in the list, just unlabeled).
|
|
277
|
+
"""
|
|
278
|
+
import re
|
|
279
|
+
method = ""
|
|
280
|
+
path = ""
|
|
281
|
+
for dec in decorators:
|
|
282
|
+
text = str(dec)
|
|
283
|
+
m = re.search(
|
|
284
|
+
r"@\w[\w\.]*\.(get|post|put|delete|patch|head|options|route|websocket)\b",
|
|
285
|
+
text,
|
|
286
|
+
re.IGNORECASE,
|
|
287
|
+
)
|
|
288
|
+
if m:
|
|
289
|
+
verb = m.group(1).lower()
|
|
290
|
+
if verb in _HTTP_VERBS:
|
|
291
|
+
method = verb.upper()
|
|
292
|
+
elif verb == "route":
|
|
293
|
+
method = "ANY"
|
|
294
|
+
m2 = re.search(r"""["']([^"']+)["']""", text)
|
|
295
|
+
if m2 and not path:
|
|
296
|
+
path = m2.group(1)
|
|
297
|
+
if method and path:
|
|
298
|
+
break
|
|
299
|
+
return method, path
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def _bfs_infra_for_handler(
|
|
303
|
+
graph: nx.MultiDiGraph,
|
|
304
|
+
handler_id: str,
|
|
305
|
+
file_to_components: dict[str, set[str]],
|
|
306
|
+
max_depth: int = 6,
|
|
307
|
+
) -> list[str]:
|
|
308
|
+
"""Walk forward through CALLS edges from a handler, collect infra IDs hit."""
|
|
309
|
+
seen: set[str] = {handler_id}
|
|
310
|
+
queue: list[tuple[str, int]] = [(handler_id, 0)]
|
|
311
|
+
hits: list[str] = []
|
|
312
|
+
seen_components: set[str] = set()
|
|
313
|
+
while queue:
|
|
314
|
+
nid, depth = queue.pop(0)
|
|
315
|
+
node_file = _file_of(graph, nid)
|
|
316
|
+
# Any component imported in the file containing this node counts as
|
|
317
|
+
# a hit, since the handler/service touches that file's symbols.
|
|
318
|
+
for cid in file_to_components.get(node_file, ()):
|
|
319
|
+
if cid not in seen_components:
|
|
320
|
+
seen_components.add(cid)
|
|
321
|
+
hits.append(cid)
|
|
322
|
+
if depth >= max_depth:
|
|
323
|
+
continue
|
|
324
|
+
for _src, dst, data in graph.out_edges(nid, data=True):
|
|
325
|
+
if _kind_str(data.get("kind")) != EdgeKind.CALLS.value:
|
|
326
|
+
continue
|
|
327
|
+
if dst in seen:
|
|
328
|
+
continue
|
|
329
|
+
if not isinstance(dst, str):
|
|
330
|
+
continue
|
|
331
|
+
if dst.startswith("unresolved::"):
|
|
332
|
+
continue
|
|
333
|
+
seen.add(dst)
|
|
334
|
+
queue.append((dst, depth + 1))
|
|
335
|
+
return hits
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def detect_infrastructure(graph: nx.MultiDiGraph) -> dict[str, Any]:
|
|
339
|
+
"""Build the architecture-view payload from the graph.
|
|
340
|
+
|
|
341
|
+
Returns a dict with three top-level keys:
|
|
342
|
+
|
|
343
|
+
* ``components`` - one entry per detected external service. Each carries
|
|
344
|
+
``id``, ``kind``, ``label``, ``color``, ``count`` (import sites), and
|
|
345
|
+
``files`` (paths that import it).
|
|
346
|
+
* ``handlers`` - one entry per HANDLER role node, with parsed
|
|
347
|
+
``method`` + ``path`` and the IDs of components reachable from it.
|
|
348
|
+
* ``edges`` - aggregated USES edges from each importing module to each
|
|
349
|
+
component it touches; carries ``count``.
|
|
350
|
+
* ``metrics`` - summary counts.
|
|
351
|
+
"""
|
|
352
|
+
# Pass 1: walk IMPORTS edges, classify, accumulate per-component evidence.
|
|
353
|
+
components: dict[str, dict[str, Any]] = {}
|
|
354
|
+
file_to_components: dict[str, set[str]] = defaultdict(set)
|
|
355
|
+
edges_pair: dict[tuple[str, str], int] = defaultdict(int)
|
|
356
|
+
|
|
357
|
+
for src, _dst, data in graph.edges(data=True):
|
|
358
|
+
if _kind_str(data.get("kind")) != EdgeKind.IMPORTS.value:
|
|
359
|
+
continue
|
|
360
|
+
meta = data.get("metadata") or {}
|
|
361
|
+
# TS edges carry both `source` and `target_name`. Python edges carry
|
|
362
|
+
# only `target_name`. Try `source` first since it is the cleaner
|
|
363
|
+
# package slug for TS scoped packages.
|
|
364
|
+
target_str = ""
|
|
365
|
+
if isinstance(meta, dict):
|
|
366
|
+
target_str = str(meta.get("source") or meta.get("target_name") or "")
|
|
367
|
+
if not target_str:
|
|
368
|
+
continue
|
|
369
|
+
info = _classify(target_str)
|
|
370
|
+
if info is None:
|
|
371
|
+
continue
|
|
372
|
+
|
|
373
|
+
cid = _component_id(info["kind"], info["label"])
|
|
374
|
+
if cid not in components:
|
|
375
|
+
components[cid] = {
|
|
376
|
+
"id": cid,
|
|
377
|
+
"kind": info["kind"],
|
|
378
|
+
"label": info["label"],
|
|
379
|
+
"color": info["color"],
|
|
380
|
+
"count": 0,
|
|
381
|
+
"files": [],
|
|
382
|
+
"evidence": [],
|
|
383
|
+
}
|
|
384
|
+
comp = components[cid]
|
|
385
|
+
comp["count"] += 1
|
|
386
|
+
importer_file = _file_of(graph, src)
|
|
387
|
+
if importer_file:
|
|
388
|
+
if importer_file not in comp["files"]:
|
|
389
|
+
comp["files"].append(importer_file)
|
|
390
|
+
file_to_components[importer_file].add(cid)
|
|
391
|
+
edges_pair[(importer_file, cid)] += 1
|
|
392
|
+
ev = f"{importer_file}:{data.get('line') or '?'} -> {target_str}"
|
|
393
|
+
if len(comp["evidence"]) < 6 and ev not in comp["evidence"]:
|
|
394
|
+
comp["evidence"].append(ev)
|
|
395
|
+
|
|
396
|
+
# Pass 2: handlers (decorator-style + Express-style) + reachable components.
|
|
397
|
+
decorator_handlers = _collect_handlers(graph)
|
|
398
|
+
express_handlers = _collect_express_handlers(graph)
|
|
399
|
+
handlers = decorator_handlers + express_handlers
|
|
400
|
+
seen_handler_keys: set[tuple[Any, ...]] = set()
|
|
401
|
+
deduped: list[dict[str, Any]] = []
|
|
402
|
+
for h in handlers:
|
|
403
|
+
key = (h["method"], h["path"], h["file"], h["line"])
|
|
404
|
+
if key in seen_handler_keys:
|
|
405
|
+
continue
|
|
406
|
+
seen_handler_keys.add(key)
|
|
407
|
+
bfs_root = h.pop("_bfs_from", None) or h["id"]
|
|
408
|
+
h["components"] = _bfs_infra_for_handler(
|
|
409
|
+
graph, bfs_root, file_to_components,
|
|
410
|
+
)
|
|
411
|
+
deduped.append(h)
|
|
412
|
+
handlers = deduped
|
|
413
|
+
|
|
414
|
+
# Pass 3: edges shaped for rendering (importer file -> component).
|
|
415
|
+
edges = [
|
|
416
|
+
{"source_file": fp, "target": cid, "count": n}
|
|
417
|
+
for (fp, cid), n in sorted(edges_pair.items(), key=lambda kv: -kv[1])
|
|
418
|
+
]
|
|
419
|
+
|
|
420
|
+
by_kind: dict[str, int] = defaultdict(int)
|
|
421
|
+
for c in components.values():
|
|
422
|
+
by_kind[c["kind"]] += 1
|
|
423
|
+
|
|
424
|
+
return {
|
|
425
|
+
"components": sorted(
|
|
426
|
+
components.values(), key=lambda c: (-int(c["count"]), c["label"]),
|
|
427
|
+
),
|
|
428
|
+
"handlers": handlers,
|
|
429
|
+
"edges": edges,
|
|
430
|
+
"metrics": {
|
|
431
|
+
"components": len(components),
|
|
432
|
+
"handlers": len(handlers),
|
|
433
|
+
"import_sites": sum(c["count"] for c in components.values()),
|
|
434
|
+
"by_kind": dict(by_kind),
|
|
435
|
+
},
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
__all__ = ["detect_infrastructure"]
|