polycodegraph 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraph/__init__.py +10 -0
- codegraph/analysis/__init__.py +30 -0
- codegraph/analysis/_common.py +125 -0
- codegraph/analysis/blast_radius.py +63 -0
- codegraph/analysis/cycles.py +79 -0
- codegraph/analysis/dataflow.py +861 -0
- codegraph/analysis/dead_code.py +165 -0
- codegraph/analysis/hotspots.py +68 -0
- codegraph/analysis/infrastructure.py +439 -0
- codegraph/analysis/metrics.py +52 -0
- codegraph/analysis/report.py +222 -0
- codegraph/analysis/roles.py +323 -0
- codegraph/analysis/untested.py +79 -0
- codegraph/cli.py +1506 -0
- codegraph/config.py +64 -0
- codegraph/embed/__init__.py +35 -0
- codegraph/embed/chunker.py +120 -0
- codegraph/embed/embedder.py +113 -0
- codegraph/embed/query.py +181 -0
- codegraph/embed/store.py +360 -0
- codegraph/graph/__init__.py +0 -0
- codegraph/graph/builder.py +212 -0
- codegraph/graph/schema.py +69 -0
- codegraph/graph/store_networkx.py +55 -0
- codegraph/graph/store_sqlite.py +249 -0
- codegraph/mcp_server/__init__.py +6 -0
- codegraph/mcp_server/server.py +933 -0
- codegraph/parsers/__init__.py +0 -0
- codegraph/parsers/base.py +70 -0
- codegraph/parsers/go.py +570 -0
- codegraph/parsers/python.py +1707 -0
- codegraph/parsers/typescript.py +1397 -0
- codegraph/py.typed +0 -0
- codegraph/resolve/__init__.py +4 -0
- codegraph/resolve/calls.py +480 -0
- codegraph/review/__init__.py +31 -0
- codegraph/review/baseline.py +32 -0
- codegraph/review/differ.py +211 -0
- codegraph/review/hook.py +70 -0
- codegraph/review/risk.py +219 -0
- codegraph/review/rules.py +342 -0
- codegraph/viz/__init__.py +17 -0
- codegraph/viz/_style.py +45 -0
- codegraph/viz/dashboard.py +740 -0
- codegraph/viz/diagrams.py +370 -0
- codegraph/viz/explore.py +453 -0
- codegraph/viz/hld.py +683 -0
- codegraph/viz/html.py +115 -0
- codegraph/viz/mermaid.py +111 -0
- codegraph/viz/svg.py +77 -0
- codegraph/web/__init__.py +4 -0
- codegraph/web/server.py +165 -0
- codegraph/web/static/app.css +664 -0
- codegraph/web/static/app.js +919 -0
- codegraph/web/static/index.html +112 -0
- codegraph/web/static/views/architecture.js +1671 -0
- codegraph/web/static/views/graph3d.css +564 -0
- codegraph/web/static/views/graph3d.js +999 -0
- codegraph/web/static/views/graph3d_transform.js +984 -0
- codegraph/workspace/__init__.py +34 -0
- codegraph/workspace/config.py +110 -0
- codegraph/workspace/operations.py +294 -0
- polycodegraph-0.1.0.dist-info/METADATA +687 -0
- polycodegraph-0.1.0.dist-info/RECORD +67 -0
- polycodegraph-0.1.0.dist-info/WHEEL +4 -0
- polycodegraph-0.1.0.dist-info/entry_points.txt +2 -0
- polycodegraph-0.1.0.dist-info/licenses/LICENSE +21 -0
codegraph/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""codegraph — language-agnostic code graph for analysis, PR review, and AI assistants."""
|
|
2
|
+
from importlib.metadata import PackageNotFoundError as _PackageNotFoundError
|
|
3
|
+
from importlib.metadata import version as _version
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
__version__ = _version("polycodegraph")
|
|
7
|
+
except _PackageNotFoundError:
|
|
8
|
+
# Source checkout without install (rare — `pip install -e .` registers
|
|
9
|
+
# the package and avoids this branch in normal dev setups).
|
|
10
|
+
__version__ = "0.0.0+local"
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Whole-project analyses operating on the codegraph store."""
|
|
2
|
+
from codegraph.analysis.blast_radius import BlastRadiusResult, blast_radius
|
|
3
|
+
from codegraph.analysis.cycles import Cycle, CycleReport, find_cycles
|
|
4
|
+
from codegraph.analysis.dataflow import DataFlow, FlowHop, match_route, trace
|
|
5
|
+
from codegraph.analysis.dead_code import DeadNode, find_dead_code
|
|
6
|
+
from codegraph.analysis.hotspots import Hotspot, find_hotspots
|
|
7
|
+
from codegraph.analysis.metrics import GraphMetrics, compute_metrics
|
|
8
|
+
from codegraph.analysis.roles import classify_roles
|
|
9
|
+
from codegraph.analysis.untested import UntestedNode, find_untested
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"BlastRadiusResult",
|
|
13
|
+
"Cycle",
|
|
14
|
+
"CycleReport",
|
|
15
|
+
"DataFlow",
|
|
16
|
+
"DeadNode",
|
|
17
|
+
"FlowHop",
|
|
18
|
+
"GraphMetrics",
|
|
19
|
+
"Hotspot",
|
|
20
|
+
"UntestedNode",
|
|
21
|
+
"blast_radius",
|
|
22
|
+
"classify_roles",
|
|
23
|
+
"compute_metrics",
|
|
24
|
+
"find_cycles",
|
|
25
|
+
"find_dead_code",
|
|
26
|
+
"find_hotspots",
|
|
27
|
+
"find_untested",
|
|
28
|
+
"match_route",
|
|
29
|
+
"trace",
|
|
30
|
+
]
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""Shared helpers for analysis modules."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from collections.abc import Iterable
|
|
5
|
+
|
|
6
|
+
import networkx as nx
|
|
7
|
+
|
|
8
|
+
from codegraph.graph.schema import EdgeKind
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _kind_str(value: object) -> str:
|
|
12
|
+
"""Return the canonical string form of a NodeKind/EdgeKind/str."""
|
|
13
|
+
return str(getattr(value, "value", value) or "")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
REFERENCE_EDGE_KINDS: frozenset[str] = frozenset(
|
|
17
|
+
{
|
|
18
|
+
EdgeKind.CALLS.value,
|
|
19
|
+
EdgeKind.IMPORTS.value,
|
|
20
|
+
EdgeKind.INHERITS.value,
|
|
21
|
+
EdgeKind.IMPLEMENTS.value,
|
|
22
|
+
}
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
EXCLUDED_PATH_FRAGMENTS: tuple[str, ...] = (
|
|
27
|
+
"tests/fixtures/",
|
|
28
|
+
"tests\\fixtures\\",
|
|
29
|
+
"/static/",
|
|
30
|
+
"\\static\\",
|
|
31
|
+
# Demo / example repos are documentation, not call-graph-traceable code.
|
|
32
|
+
# Their entry points are framework-driven (FastAPI decorators, React JSX
|
|
33
|
+
# mount points) so the call-graph analyser would otherwise flag every
|
|
34
|
+
# showcase symbol as "dead" or "untested".
|
|
35
|
+
"examples/",
|
|
36
|
+
"examples\\",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def is_excluded_path(file_path: str) -> bool:
|
|
41
|
+
"""True iff the file path is under a directory excluded from analysis.
|
|
42
|
+
|
|
43
|
+
Test fixtures and static frontend assets don't have traceable call graphs
|
|
44
|
+
and should not be analysed for dead-code or untested-symbol detection.
|
|
45
|
+
"""
|
|
46
|
+
if not file_path:
|
|
47
|
+
return False
|
|
48
|
+
return any(fragment in file_path for fragment in EXCLUDED_PATH_FRAGMENTS)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def is_protocol_class(graph: nx.MultiDiGraph, class_id: str) -> bool:
|
|
52
|
+
"""True iff the class inherits from ``typing.Protocol``.
|
|
53
|
+
|
|
54
|
+
Walks INHERITS out-edges and matches any parent whose target name ends in
|
|
55
|
+
``Protocol``. This covers ``Protocol``, ``typing.Protocol``, and the
|
|
56
|
+
parser's ``unresolved::Protocol`` / ``unresolved::typing.Protocol`` forms.
|
|
57
|
+
"""
|
|
58
|
+
for _src, dst, key, data in graph.out_edges(class_id, keys=True, data=True):
|
|
59
|
+
if key != EdgeKind.INHERITS.value:
|
|
60
|
+
continue
|
|
61
|
+
target_name = ""
|
|
62
|
+
meta = data.get("metadata") or {}
|
|
63
|
+
if isinstance(meta, dict):
|
|
64
|
+
target_name = str(meta.get("target_name") or "")
|
|
65
|
+
if not target_name:
|
|
66
|
+
attrs = graph.nodes.get(dst) or {}
|
|
67
|
+
target_name = str(attrs.get("name") or attrs.get("qualname") or dst)
|
|
68
|
+
# Strip an unresolved:: prefix if the dst ID was used as fallback.
|
|
69
|
+
if target_name.startswith("unresolved::"):
|
|
70
|
+
target_name = target_name.split("::", 1)[1]
|
|
71
|
+
# Match bare "Protocol" or any dotted form ending with ".Protocol".
|
|
72
|
+
if target_name == "Protocol" or target_name.endswith(".Protocol"):
|
|
73
|
+
return True
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def in_protocol_class(graph: nx.MultiDiGraph, method_id: str) -> bool:
|
|
78
|
+
"""True iff this method's owning class is a typing.Protocol."""
|
|
79
|
+
for _src, dst, key in graph.out_edges(method_id, keys=True):
|
|
80
|
+
if key != EdgeKind.DEFINED_IN.value:
|
|
81
|
+
continue
|
|
82
|
+
attrs = graph.nodes.get(dst) or {}
|
|
83
|
+
if _kind_str(attrs.get("kind")) != "CLASS":
|
|
84
|
+
continue
|
|
85
|
+
if is_protocol_class(graph, dst):
|
|
86
|
+
return True
|
|
87
|
+
return False
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def in_test_module(graph: nx.MultiDiGraph, node_id: str) -> bool:
|
|
91
|
+
"""True iff the node is in a file whose MODULE node is marked is_test."""
|
|
92
|
+
attrs = graph.nodes.get(node_id) or {}
|
|
93
|
+
metadata = attrs.get("metadata") or {}
|
|
94
|
+
if metadata.get("is_test"):
|
|
95
|
+
return True
|
|
96
|
+
file_path = attrs.get("file")
|
|
97
|
+
if not file_path:
|
|
98
|
+
return False
|
|
99
|
+
# Path-based fallback for non-Python test files (e.g. node --test JS files
|
|
100
|
+
# under tests/) which don't carry the is_test module metadata.
|
|
101
|
+
normalised = str(file_path).replace("\\", "/")
|
|
102
|
+
if "/tests/" in normalised or normalised.startswith("tests/"):
|
|
103
|
+
return True
|
|
104
|
+
for _, other_attrs in graph.nodes(data=True):
|
|
105
|
+
if (
|
|
106
|
+
other_attrs.get("file") == file_path
|
|
107
|
+
and _kind_str(other_attrs.get("kind")) == "MODULE"
|
|
108
|
+
and (other_attrs.get("metadata") or {}).get("is_test")
|
|
109
|
+
):
|
|
110
|
+
return True
|
|
111
|
+
return False
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def filter_kinds(
|
|
115
|
+
graph: nx.MultiDiGraph, allowed: Iterable[str]
|
|
116
|
+
) -> nx.MultiDiGraph:
|
|
117
|
+
"""Return a subgraph view containing only edges with kinds in ``allowed``."""
|
|
118
|
+
allowed_set = set(allowed)
|
|
119
|
+
out: nx.MultiDiGraph = nx.MultiDiGraph()
|
|
120
|
+
for nid, attrs in graph.nodes(data=True):
|
|
121
|
+
out.add_node(nid, **attrs)
|
|
122
|
+
for src, dst, key, data in graph.edges(keys=True, data=True):
|
|
123
|
+
if data.get("kind") in allowed_set:
|
|
124
|
+
out.add_edge(src, dst, key=key, **data)
|
|
125
|
+
return out
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Blast radius: reverse-reachable set of a node via reference edges."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
|
|
6
|
+
import networkx as nx
|
|
7
|
+
|
|
8
|
+
from codegraph.analysis._common import REFERENCE_EDGE_KINDS, in_test_module
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class BlastRadiusResult:
|
|
13
|
+
target: str
|
|
14
|
+
nodes: list[str] = field(default_factory=list)
|
|
15
|
+
files: set[str] = field(default_factory=set)
|
|
16
|
+
test_nodes: list[str] = field(default_factory=list)
|
|
17
|
+
depth: int | None = None
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def size(self) -> int:
|
|
21
|
+
return len(self.nodes)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def blast_radius(
|
|
25
|
+
graph: nx.MultiDiGraph,
|
|
26
|
+
node_id: str,
|
|
27
|
+
depth: int | None = None,
|
|
28
|
+
) -> BlastRadiusResult:
|
|
29
|
+
"""Return the set of nodes that transitively reference ``node_id``.
|
|
30
|
+
|
|
31
|
+
A node ``B`` is in the blast radius of ``A`` iff there is a path from
|
|
32
|
+
``B`` to ``A`` along CALLS / IMPORTS / INHERITS / IMPLEMENTS edges. The
|
|
33
|
+
target itself is excluded from ``nodes``.
|
|
34
|
+
"""
|
|
35
|
+
result = BlastRadiusResult(target=node_id, depth=depth)
|
|
36
|
+
if node_id not in graph:
|
|
37
|
+
return result
|
|
38
|
+
|
|
39
|
+
visited: set[str] = {node_id}
|
|
40
|
+
frontier: set[str] = {node_id}
|
|
41
|
+
hops = 0
|
|
42
|
+
while frontier and (depth is None or hops < depth):
|
|
43
|
+
next_frontier: set[str] = set()
|
|
44
|
+
for current in frontier:
|
|
45
|
+
for src, _dst, key in graph.in_edges(current, keys=True):
|
|
46
|
+
if key not in REFERENCE_EDGE_KINDS:
|
|
47
|
+
continue
|
|
48
|
+
if src in visited:
|
|
49
|
+
continue
|
|
50
|
+
next_frontier.add(src)
|
|
51
|
+
visited |= next_frontier
|
|
52
|
+
frontier = next_frontier
|
|
53
|
+
hops += 1
|
|
54
|
+
|
|
55
|
+
visited.discard(node_id)
|
|
56
|
+
result.nodes = sorted(visited)
|
|
57
|
+
for nid in visited:
|
|
58
|
+
attrs = graph.nodes.get(nid) or {}
|
|
59
|
+
if attrs.get("file"):
|
|
60
|
+
result.files.add(str(attrs["file"]))
|
|
61
|
+
if in_test_module(graph, nid):
|
|
62
|
+
result.test_nodes.append(nid)
|
|
63
|
+
return result
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Cycle detection over import + call subgraphs."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
|
|
6
|
+
import networkx as nx
|
|
7
|
+
|
|
8
|
+
from codegraph.analysis._common import filter_kinds
|
|
9
|
+
from codegraph.graph.schema import EdgeKind
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True)
|
|
13
|
+
class Cycle:
|
|
14
|
+
"""A single cycle: parallel lists of node IDs and their qualnames.
|
|
15
|
+
|
|
16
|
+
`node_ids` is the canonical machine identifier; `qualnames` is the
|
|
17
|
+
human-readable rendering used in reports. Lists are the same length
|
|
18
|
+
and in the same order.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
node_ids: list[str]
|
|
22
|
+
qualnames: list[str]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class CycleReport:
|
|
27
|
+
import_cycles: list[Cycle] = field(default_factory=list)
|
|
28
|
+
call_cycles: list[Cycle] = field(default_factory=list)
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def total(self) -> int:
|
|
32
|
+
return len(self.import_cycles) + len(self.call_cycles)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _qualname_for(graph: nx.MultiDiGraph, node_id: str) -> str:
|
|
36
|
+
attrs = graph.nodes.get(node_id, {})
|
|
37
|
+
qn = attrs.get("qualname")
|
|
38
|
+
if isinstance(qn, str) and qn:
|
|
39
|
+
return qn
|
|
40
|
+
name = attrs.get("name")
|
|
41
|
+
if isinstance(name, str) and name:
|
|
42
|
+
return name
|
|
43
|
+
return node_id
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _scc_cycles(graph: nx.MultiDiGraph) -> list[Cycle]:
|
|
47
|
+
digraph = nx.DiGraph(graph)
|
|
48
|
+
cycles: list[Cycle] = []
|
|
49
|
+
for component in nx.strongly_connected_components(digraph):
|
|
50
|
+
if len(component) > 1:
|
|
51
|
+
node_ids = sorted(component)
|
|
52
|
+
cycles.append(
|
|
53
|
+
Cycle(
|
|
54
|
+
node_ids=node_ids,
|
|
55
|
+
qualnames=[_qualname_for(graph, n) for n in node_ids],
|
|
56
|
+
)
|
|
57
|
+
)
|
|
58
|
+
continue
|
|
59
|
+
# length-1 SCC: only a cycle if there's a self-loop.
|
|
60
|
+
node = next(iter(component))
|
|
61
|
+
if digraph.has_edge(node, node):
|
|
62
|
+
cycles.append(
|
|
63
|
+
Cycle(
|
|
64
|
+
node_ids=[node],
|
|
65
|
+
qualnames=[_qualname_for(graph, node)],
|
|
66
|
+
)
|
|
67
|
+
)
|
|
68
|
+
cycles.sort(key=lambda c: (-len(c.node_ids), c.node_ids))
|
|
69
|
+
return cycles
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def find_cycles(graph: nx.MultiDiGraph) -> CycleReport:
|
|
73
|
+
"""Detect strongly-connected components in import and call subgraphs."""
|
|
74
|
+
import_only = filter_kinds(graph, {EdgeKind.IMPORTS.value})
|
|
75
|
+
call_only = filter_kinds(graph, {EdgeKind.CALLS.value})
|
|
76
|
+
return CycleReport(
|
|
77
|
+
import_cycles=_scc_cycles(import_only),
|
|
78
|
+
call_cycles=_scc_cycles(call_only),
|
|
79
|
+
)
|