polycodegraph 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. codegraph/__init__.py +10 -0
  2. codegraph/analysis/__init__.py +30 -0
  3. codegraph/analysis/_common.py +125 -0
  4. codegraph/analysis/blast_radius.py +63 -0
  5. codegraph/analysis/cycles.py +79 -0
  6. codegraph/analysis/dataflow.py +861 -0
  7. codegraph/analysis/dead_code.py +165 -0
  8. codegraph/analysis/hotspots.py +68 -0
  9. codegraph/analysis/infrastructure.py +439 -0
  10. codegraph/analysis/metrics.py +52 -0
  11. codegraph/analysis/report.py +222 -0
  12. codegraph/analysis/roles.py +323 -0
  13. codegraph/analysis/untested.py +79 -0
  14. codegraph/cli.py +1506 -0
  15. codegraph/config.py +64 -0
  16. codegraph/embed/__init__.py +35 -0
  17. codegraph/embed/chunker.py +120 -0
  18. codegraph/embed/embedder.py +113 -0
  19. codegraph/embed/query.py +181 -0
  20. codegraph/embed/store.py +360 -0
  21. codegraph/graph/__init__.py +0 -0
  22. codegraph/graph/builder.py +212 -0
  23. codegraph/graph/schema.py +69 -0
  24. codegraph/graph/store_networkx.py +55 -0
  25. codegraph/graph/store_sqlite.py +249 -0
  26. codegraph/mcp_server/__init__.py +6 -0
  27. codegraph/mcp_server/server.py +933 -0
  28. codegraph/parsers/__init__.py +0 -0
  29. codegraph/parsers/base.py +70 -0
  30. codegraph/parsers/go.py +570 -0
  31. codegraph/parsers/python.py +1707 -0
  32. codegraph/parsers/typescript.py +1397 -0
  33. codegraph/py.typed +0 -0
  34. codegraph/resolve/__init__.py +4 -0
  35. codegraph/resolve/calls.py +480 -0
  36. codegraph/review/__init__.py +31 -0
  37. codegraph/review/baseline.py +32 -0
  38. codegraph/review/differ.py +211 -0
  39. codegraph/review/hook.py +70 -0
  40. codegraph/review/risk.py +219 -0
  41. codegraph/review/rules.py +342 -0
  42. codegraph/viz/__init__.py +17 -0
  43. codegraph/viz/_style.py +45 -0
  44. codegraph/viz/dashboard.py +740 -0
  45. codegraph/viz/diagrams.py +370 -0
  46. codegraph/viz/explore.py +453 -0
  47. codegraph/viz/hld.py +683 -0
  48. codegraph/viz/html.py +115 -0
  49. codegraph/viz/mermaid.py +111 -0
  50. codegraph/viz/svg.py +77 -0
  51. codegraph/web/__init__.py +4 -0
  52. codegraph/web/server.py +165 -0
  53. codegraph/web/static/app.css +664 -0
  54. codegraph/web/static/app.js +919 -0
  55. codegraph/web/static/index.html +112 -0
  56. codegraph/web/static/views/architecture.js +1671 -0
  57. codegraph/web/static/views/graph3d.css +564 -0
  58. codegraph/web/static/views/graph3d.js +999 -0
  59. codegraph/web/static/views/graph3d_transform.js +984 -0
  60. codegraph/workspace/__init__.py +34 -0
  61. codegraph/workspace/config.py +110 -0
  62. codegraph/workspace/operations.py +294 -0
  63. polycodegraph-0.1.0.dist-info/METADATA +687 -0
  64. polycodegraph-0.1.0.dist-info/RECORD +67 -0
  65. polycodegraph-0.1.0.dist-info/WHEEL +4 -0
  66. polycodegraph-0.1.0.dist-info/entry_points.txt +2 -0
  67. polycodegraph-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,211 @@
1
+ """Graph diffing: compare two graphs by (qualname, kind) identity."""
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import dataclass, field
5
+ from typing import Any
6
+
7
+ import networkx as nx
8
+
9
+
10
+ @dataclass
11
+ class NodeChange:
12
+ qualname: str
13
+ kind: str
14
+ file: str
15
+ line_start: int
16
+ signature: str
17
+ change_kind: str # "added" | "removed" | "modified"
18
+ details: dict[str, Any] = field(default_factory=dict)
19
+
20
+
21
+ @dataclass
22
+ class EdgeChange:
23
+ src_qualname: str
24
+ dst_qualname: str
25
+ kind: str
26
+ change_kind: str # "added" | "removed"
27
+
28
+
29
+ @dataclass
30
+ class GraphDiff:
31
+ added_nodes: list[NodeChange] = field(default_factory=list)
32
+ removed_nodes: list[NodeChange] = field(default_factory=list)
33
+ modified_nodes: list[NodeChange] = field(default_factory=list)
34
+ added_edges: list[EdgeChange] = field(default_factory=list)
35
+ removed_edges: list[EdgeChange] = field(default_factory=list)
36
+
37
+ @property
38
+ def total(self) -> int:
39
+ return (
40
+ len(self.added_nodes)
41
+ + len(self.removed_nodes)
42
+ + len(self.modified_nodes)
43
+ + len(self.added_edges)
44
+ + len(self.removed_edges)
45
+ )
46
+
47
+
48
+ _NodeKey = tuple[str, str]
49
+
50
+
51
+ def _kind_str(value: object) -> str:
52
+ return str(getattr(value, "value", value) or "")
53
+
54
+
55
+ def _node_key(attrs: dict[str, Any]) -> _NodeKey | None:
56
+ qualname = str(attrs.get("qualname") or "")
57
+ kind = _kind_str(attrs.get("kind"))
58
+ if not qualname or not kind:
59
+ return None
60
+ return (qualname, kind)
61
+
62
+
63
+ def _node_payload(attrs: dict[str, Any]) -> dict[str, Any]:
64
+ return {
65
+ "qualname": str(attrs.get("qualname") or ""),
66
+ "kind": _kind_str(attrs.get("kind")),
67
+ "file": str(attrs.get("file") or ""),
68
+ "line_start": int(attrs.get("line_start") or 0),
69
+ "signature": str(attrs.get("signature") or ""),
70
+ }
71
+
72
+
73
+ def _build_node_index(graph: nx.MultiDiGraph) -> dict[_NodeKey, dict[str, Any]]:
74
+ index: dict[_NodeKey, dict[str, Any]] = {}
75
+ for nid, attrs in graph.nodes(data=True):
76
+ key = _node_key(attrs)
77
+ if key is None:
78
+ continue
79
+ # Last write wins - duplicates are exceedingly rare given (qualname, kind, file)
80
+ # identity; we keep the first to be deterministic.
81
+ if key in index:
82
+ continue
83
+ payload = _node_payload(attrs)
84
+ payload["_id"] = nid
85
+ index[key] = payload
86
+ return index
87
+
88
+
89
+ def _id_to_qualname(graph: nx.MultiDiGraph) -> dict[str, str]:
90
+ return {
91
+ nid: str(attrs.get("qualname") or nid)
92
+ for nid, attrs in graph.nodes(data=True)
93
+ }
94
+
95
+
96
+ def _edge_keys(
97
+ graph: nx.MultiDiGraph, id_map: dict[str, str]
98
+ ) -> set[tuple[str, str, str]]:
99
+ keys: set[tuple[str, str, str]] = set()
100
+ for src, dst, data in graph.edges(data=True):
101
+ kind = _kind_str(data.get("kind"))
102
+ src_qn = id_map.get(src, src)
103
+ dst_qn = id_map.get(dst, dst)
104
+ keys.add((src_qn, dst_qn, kind))
105
+ return keys
106
+
107
+
108
+ def diff_graphs(old: nx.MultiDiGraph, new: nx.MultiDiGraph) -> GraphDiff:
109
+ """Diff two graphs by ``(qualname, kind)`` node identity.
110
+
111
+ A node is *modified* when the same identity exists in both graphs but
112
+ its ``file`` or ``signature`` changed.
113
+
114
+ ``line_start`` is intentionally NOT a modification trigger: when a PR
115
+ edits the top of a file, every symbol below the edit shifts down by N
116
+ lines and would otherwise show up as "modified" even though their
117
+ actual signatures are identical. Pure line-shift noise was producing
118
+ 50+ false-positive ``modified-signature`` findings on PRs that touched
119
+ high-traffic files (``app.js``, ``typescript.py``).
120
+
121
+ The ``line_start`` value is still captured on each ``NodeChange`` for
122
+ rendering — it just no longer triggers the change.
123
+ """
124
+ diff = GraphDiff()
125
+
126
+ old_idx = _build_node_index(old)
127
+ new_idx = _build_node_index(new)
128
+
129
+ for key, new_payload in new_idx.items():
130
+ if key not in old_idx:
131
+ diff.added_nodes.append(
132
+ NodeChange(
133
+ qualname=new_payload["qualname"],
134
+ kind=new_payload["kind"],
135
+ file=new_payload["file"],
136
+ line_start=new_payload["line_start"],
137
+ signature=new_payload["signature"],
138
+ change_kind="added",
139
+ )
140
+ )
141
+ continue
142
+ old_payload = old_idx[key]
143
+ details: dict[str, Any] = {}
144
+ for field_name in ("file", "signature"):
145
+ if old_payload[field_name] != new_payload[field_name]:
146
+ details[field_name] = {
147
+ "old": old_payload[field_name],
148
+ "new": new_payload[field_name],
149
+ }
150
+ # Record line drift in details for diagnostic output, but DON'T let
151
+ # it alone trigger "modified".
152
+ if (
153
+ old_payload["line_start"] != new_payload["line_start"]
154
+ and details
155
+ ):
156
+ details["line_start"] = {
157
+ "old": old_payload["line_start"],
158
+ "new": new_payload["line_start"],
159
+ }
160
+ if details:
161
+ diff.modified_nodes.append(
162
+ NodeChange(
163
+ qualname=new_payload["qualname"],
164
+ kind=new_payload["kind"],
165
+ file=new_payload["file"],
166
+ line_start=new_payload["line_start"],
167
+ signature=new_payload["signature"],
168
+ change_kind="modified",
169
+ details=details,
170
+ )
171
+ )
172
+
173
+ for key, old_payload in old_idx.items():
174
+ if key in new_idx:
175
+ continue
176
+ diff.removed_nodes.append(
177
+ NodeChange(
178
+ qualname=old_payload["qualname"],
179
+ kind=old_payload["kind"],
180
+ file=old_payload["file"],
181
+ line_start=old_payload["line_start"],
182
+ signature=old_payload["signature"],
183
+ change_kind="removed",
184
+ )
185
+ )
186
+
187
+ old_id_map = _id_to_qualname(old)
188
+ new_id_map = _id_to_qualname(new)
189
+ old_edges = _edge_keys(old, old_id_map)
190
+ new_edges = _edge_keys(new, new_id_map)
191
+
192
+ for src_qn, dst_qn, kind in sorted(new_edges - old_edges):
193
+ diff.added_edges.append(
194
+ EdgeChange(
195
+ src_qualname=src_qn,
196
+ dst_qualname=dst_qn,
197
+ kind=kind,
198
+ change_kind="added",
199
+ )
200
+ )
201
+ for src_qn, dst_qn, kind in sorted(old_edges - new_edges):
202
+ diff.removed_edges.append(
203
+ EdgeChange(
204
+ src_qualname=src_qn,
205
+ dst_qualname=dst_qn,
206
+ kind=kind,
207
+ change_kind="removed",
208
+ )
209
+ )
210
+
211
+ return diff
@@ -0,0 +1,70 @@
1
+ """Git hook installation for ``codegraph review`` integration."""
2
+ from __future__ import annotations
3
+
4
+ import stat
5
+ from pathlib import Path
6
+
7
+ HOOK_MARKER = "# codegraph-managed-hook"
8
+
9
+ DEFAULT_HOOK_NAME = "pre-push"
10
+
11
+
12
+ def _hook_script(target: str = "main") -> str:
13
+ return f"""#!/usr/bin/env bash
14
+ {HOOK_MARKER}
15
+ # Runs codegraph review against the configured baseline.
16
+ set -e
17
+ if ! command -v codegraph >/dev/null 2>&1; then
18
+ echo "codegraph: skipping (CLI not on PATH)"
19
+ exit 0
20
+ fi
21
+ codegraph review --target {target} --fail-on high || exit $?
22
+ """
23
+
24
+
25
+ def _hooks_dir(repo_root: Path) -> Path:
26
+ return repo_root / ".git" / "hooks"
27
+
28
+
29
+ def install_hook(
30
+ repo_root: Path,
31
+ hook: str = DEFAULT_HOOK_NAME,
32
+ target: str = "main",
33
+ force: bool = False,
34
+ ) -> Path:
35
+ """Install a codegraph-managed git hook in ``repo_root``.
36
+
37
+ Returns the path of the installed hook. Raises ``FileExistsError`` if a
38
+ foreign (non-codegraph) hook is already present and ``force`` is False.
39
+ """
40
+ hooks_dir = _hooks_dir(repo_root)
41
+ if not hooks_dir.parent.exists():
42
+ raise FileNotFoundError(f"not a git repository: {repo_root}")
43
+ hooks_dir.mkdir(parents=True, exist_ok=True)
44
+ hook_path = hooks_dir / hook
45
+ if hook_path.exists() and not force:
46
+ existing = hook_path.read_text()
47
+ if HOOK_MARKER not in existing:
48
+ raise FileExistsError(
49
+ f"refusing to overwrite existing {hook} hook (use --force)"
50
+ )
51
+ hook_path.write_text(_hook_script(target=target))
52
+ mode = hook_path.stat().st_mode
53
+ hook_path.chmod(mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
54
+ return hook_path
55
+
56
+
57
+ def uninstall_hook(
58
+ repo_root: Path, hook: str = DEFAULT_HOOK_NAME
59
+ ) -> bool:
60
+ """Remove a codegraph-managed git hook. Returns True if removed."""
61
+ hook_path = _hooks_dir(repo_root) / hook
62
+ if not hook_path.exists():
63
+ return False
64
+ text = hook_path.read_text()
65
+ if HOOK_MARKER not in text:
66
+ return False
67
+ hook_path.unlink()
68
+ return True
69
+
70
+
@@ -0,0 +1,219 @@
1
+ """Risk scoring for diff entries."""
2
+ from __future__ import annotations
3
+
4
+ import re
5
+ from dataclasses import dataclass, field
6
+ from typing import Any
7
+
8
+ import networkx as nx
9
+
10
+ from codegraph.analysis.cycles import find_cycles
11
+ from codegraph.analysis.hotspots import find_hotspots
12
+ from codegraph.graph.schema import EdgeKind
13
+ from codegraph.review.differ import EdgeChange, NodeChange
14
+
15
+
16
+ @dataclass
17
+ class Risk:
18
+ score: int # 0-100
19
+ level: str # low | med | high | critical
20
+ reasons: list[str] = field(default_factory=list)
21
+
22
+
23
+ def _level(score: int) -> str:
24
+ if score >= 81:
25
+ return "critical"
26
+ if score >= 51:
27
+ return "high"
28
+ if score >= 21:
29
+ return "med"
30
+ return "low"
31
+
32
+
33
+ def _kind_str(value: object) -> str:
34
+ return str(getattr(value, "value", value) or "")
35
+
36
+
37
+ def _find_node_id(
38
+ qualname: str, kind: str, graph: nx.MultiDiGraph
39
+ ) -> str | None:
40
+ for nid, attrs in graph.nodes(data=True):
41
+ if (
42
+ str(attrs.get("qualname") or "") == qualname
43
+ and _kind_str(attrs.get("kind")) == kind
44
+ ):
45
+ return str(nid)
46
+ return None
47
+
48
+
49
+ def _count_callers(node_id: str, graph: nx.MultiDiGraph) -> int:
50
+ count = 0
51
+ for _src, _dst, key in graph.in_edges(node_id, keys=True):
52
+ if key == EdgeKind.CALLS.value:
53
+ count += 1
54
+ return count
55
+
56
+
57
+ def _has_callers_in_new(
58
+ old_node_id: str, old_graph: nx.MultiDiGraph, new_graph: nx.MultiDiGraph
59
+ ) -> bool:
60
+ """Return True if any caller of ``old_node_id`` (in old) still exists in new."""
61
+ new_ids = set(new_graph.nodes())
62
+ return any(
63
+ src in new_ids
64
+ for src, _dst, _data in old_graph.in_edges(old_node_id, data=True)
65
+ )
66
+
67
+
68
+ def _hotspot_files(graph: nx.MultiDiGraph) -> frozenset[str]:
69
+ return frozenset(h.file for h in find_hotspots(graph, limit=10) if h.file)
70
+
71
+
72
+ def _is_hotspot_file(
73
+ file: str,
74
+ graph: nx.MultiDiGraph,
75
+ cache: dict[str, frozenset[str]] | None = None,
76
+ ) -> bool:
77
+ if not file:
78
+ return False
79
+ if cache is not None and "files" in cache:
80
+ return file in cache["files"]
81
+ return file in _hotspot_files(graph)
82
+
83
+
84
+ def _is_public_api(qualname: str) -> bool:
85
+ if not qualname:
86
+ return False
87
+ parts = qualname.rsplit(".", 1)
88
+ name = parts[-1]
89
+ return not name.startswith("_")
90
+
91
+
92
+ _SIG_PARAM_RE = re.compile(r"\(([^)]*)\)")
93
+
94
+
95
+ def _param_count(signature: str) -> int:
96
+ if not signature:
97
+ return -1
98
+ m = _SIG_PARAM_RE.search(signature)
99
+ if not m:
100
+ return -1
101
+ inside = m.group(1).strip()
102
+ if not inside:
103
+ return 0
104
+ # Naive split on commas at depth 0 - good enough for python signatures.
105
+ depth = 0
106
+ parts: list[str] = []
107
+ buf: list[str] = []
108
+ for ch in inside:
109
+ if ch in "([{":
110
+ depth += 1
111
+ elif ch in ")]}":
112
+ depth -= 1
113
+ if ch == "," and depth == 0:
114
+ parts.append("".join(buf).strip())
115
+ buf = []
116
+ else:
117
+ buf.append(ch)
118
+ if buf:
119
+ parts.append("".join(buf).strip())
120
+ return len([p for p in parts if p])
121
+
122
+
123
+ def _param_count_changed(old_sig: str, new_sig: str) -> bool:
124
+ old = _param_count(old_sig)
125
+ new = _param_count(new_sig)
126
+ if old < 0 or new < 0:
127
+ return old_sig != new_sig
128
+ return old != new
129
+
130
+
131
+ def _cycle_total(
132
+ graph: nx.MultiDiGraph,
133
+ cache: dict[str, int] | None = None,
134
+ label: str = "",
135
+ ) -> int:
136
+ if cache is not None and label in cache:
137
+ return cache[label]
138
+ return find_cycles(graph).total
139
+
140
+
141
+ def _introduces_cycle(
142
+ new_graph: nx.MultiDiGraph,
143
+ old_graph: nx.MultiDiGraph,
144
+ cache: dict[str, int] | None = None,
145
+ ) -> bool:
146
+ new_total = _cycle_total(new_graph, cache, "new")
147
+ old_total = _cycle_total(old_graph, cache, "old")
148
+ return new_total > old_total
149
+
150
+
151
+ # pragma: codegraph-public-api
152
+ def score_change(
153
+ change: NodeChange | EdgeChange,
154
+ *,
155
+ new_graph: nx.MultiDiGraph,
156
+ old_graph: nx.MultiDiGraph,
157
+ extra: dict[str, Any] | None = None,
158
+ ) -> Risk:
159
+ """Score a single diff entry against the new + old graphs."""
160
+ score = 0
161
+ reasons: list[str] = []
162
+ extra = extra or {}
163
+ raw_hotspot = extra.get("hotspot_cache")
164
+ hotspot_cache: dict[str, frozenset[str]] | None = (
165
+ raw_hotspot if isinstance(raw_hotspot, dict) else None
166
+ )
167
+ raw_cycle = extra.get("cycle_cache")
168
+ cycle_cache: dict[str, int] | None = (
169
+ raw_cycle if isinstance(raw_cycle, dict) else None
170
+ )
171
+
172
+ if isinstance(change, NodeChange):
173
+ new_id = _find_node_id(change.qualname, change.kind, new_graph)
174
+ old_id = _find_node_id(change.qualname, change.kind, old_graph)
175
+
176
+ if new_id is not None:
177
+ fan_in = _count_callers(new_id, new_graph)
178
+ if fan_in >= 10:
179
+ score += 40
180
+ reasons.append(f"high blast radius ({fan_in} callers)")
181
+
182
+ if (
183
+ change.change_kind == "removed"
184
+ and new_id is None
185
+ and old_id is not None
186
+ and _has_callers_in_new(old_id, old_graph, new_graph)
187
+ ):
188
+ score += 50
189
+ reasons.append("removed symbol still referenced")
190
+
191
+ hotspot_graph = (
192
+ old_graph if change.change_kind == "removed" else new_graph
193
+ )
194
+ if _is_hotspot_file(change.file, hotspot_graph, hotspot_cache):
195
+ score += 20
196
+ reasons.append("in hotspot file")
197
+
198
+ if change.change_kind == "added" and new_id is not None:
199
+ fan_in = _count_callers(new_id, new_graph)
200
+ if fan_in == 0 and not _is_public_api(change.qualname):
201
+ score += 10
202
+ reasons.append("potentially unreachable")
203
+
204
+ if change.change_kind == "modified":
205
+ sig_details = change.details.get("signature") or {}
206
+ old_sig = str(sig_details.get("old") or "")
207
+ new_sig = str(sig_details.get("new") or "")
208
+ if old_sig and new_sig and _param_count_changed(old_sig, new_sig):
209
+ score += 20
210
+ reasons.append("signature change")
211
+
212
+ if extra.get("introduces_cycle") or _introduces_cycle(
213
+ new_graph, old_graph, cycle_cache
214
+ ):
215
+ score += 30
216
+ reasons.append("introduces import/call cycle")
217
+
218
+ score = min(100, score)
219
+ return Risk(score=score, level=_level(score), reasons=reasons)