polycodegraph 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. codegraph/__init__.py +10 -0
  2. codegraph/analysis/__init__.py +30 -0
  3. codegraph/analysis/_common.py +125 -0
  4. codegraph/analysis/blast_radius.py +63 -0
  5. codegraph/analysis/cycles.py +79 -0
  6. codegraph/analysis/dataflow.py +861 -0
  7. codegraph/analysis/dead_code.py +165 -0
  8. codegraph/analysis/hotspots.py +68 -0
  9. codegraph/analysis/infrastructure.py +439 -0
  10. codegraph/analysis/metrics.py +52 -0
  11. codegraph/analysis/report.py +222 -0
  12. codegraph/analysis/roles.py +323 -0
  13. codegraph/analysis/untested.py +79 -0
  14. codegraph/cli.py +1506 -0
  15. codegraph/config.py +64 -0
  16. codegraph/embed/__init__.py +35 -0
  17. codegraph/embed/chunker.py +120 -0
  18. codegraph/embed/embedder.py +113 -0
  19. codegraph/embed/query.py +181 -0
  20. codegraph/embed/store.py +360 -0
  21. codegraph/graph/__init__.py +0 -0
  22. codegraph/graph/builder.py +212 -0
  23. codegraph/graph/schema.py +69 -0
  24. codegraph/graph/store_networkx.py +55 -0
  25. codegraph/graph/store_sqlite.py +249 -0
  26. codegraph/mcp_server/__init__.py +6 -0
  27. codegraph/mcp_server/server.py +933 -0
  28. codegraph/parsers/__init__.py +0 -0
  29. codegraph/parsers/base.py +70 -0
  30. codegraph/parsers/go.py +570 -0
  31. codegraph/parsers/python.py +1707 -0
  32. codegraph/parsers/typescript.py +1397 -0
  33. codegraph/py.typed +0 -0
  34. codegraph/resolve/__init__.py +4 -0
  35. codegraph/resolve/calls.py +480 -0
  36. codegraph/review/__init__.py +31 -0
  37. codegraph/review/baseline.py +32 -0
  38. codegraph/review/differ.py +211 -0
  39. codegraph/review/hook.py +70 -0
  40. codegraph/review/risk.py +219 -0
  41. codegraph/review/rules.py +342 -0
  42. codegraph/viz/__init__.py +17 -0
  43. codegraph/viz/_style.py +45 -0
  44. codegraph/viz/dashboard.py +740 -0
  45. codegraph/viz/diagrams.py +370 -0
  46. codegraph/viz/explore.py +453 -0
  47. codegraph/viz/hld.py +683 -0
  48. codegraph/viz/html.py +115 -0
  49. codegraph/viz/mermaid.py +111 -0
  50. codegraph/viz/svg.py +77 -0
  51. codegraph/web/__init__.py +4 -0
  52. codegraph/web/server.py +165 -0
  53. codegraph/web/static/app.css +664 -0
  54. codegraph/web/static/app.js +919 -0
  55. codegraph/web/static/index.html +112 -0
  56. codegraph/web/static/views/architecture.js +1671 -0
  57. codegraph/web/static/views/graph3d.css +564 -0
  58. codegraph/web/static/views/graph3d.js +999 -0
  59. codegraph/web/static/views/graph3d_transform.js +984 -0
  60. codegraph/workspace/__init__.py +34 -0
  61. codegraph/workspace/config.py +110 -0
  62. codegraph/workspace/operations.py +294 -0
  63. polycodegraph-0.1.0.dist-info/METADATA +687 -0
  64. polycodegraph-0.1.0.dist-info/RECORD +67 -0
  65. polycodegraph-0.1.0.dist-info/WHEEL +4 -0
  66. polycodegraph-0.1.0.dist-info/entry_points.txt +2 -0
  67. polycodegraph-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,34 @@
1
+ """Cross-repo workspace mode for codegraph.
2
+
3
+ A *workspace* is a user-level registration of N independent repositories that
4
+ codegraph should treat as a single mental unit for cross-repo queries. Each
5
+ registered repo still keeps its own ``.codegraph/graph.db``; workspace
6
+ operations open them in parallel and union/aggregate the results.
7
+
8
+ Public API:
9
+ - :class:`codegraph.workspace.config.WorkspaceConfig`
10
+ - :func:`codegraph.workspace.config.load_workspace`
11
+ - :func:`codegraph.workspace.config.save_workspace`
12
+ - :func:`codegraph.workspace.operations.workspace_state`
13
+ - :func:`codegraph.workspace.operations.workspace_diff_since`
14
+ - :func:`codegraph.workspace.operations.workspace_blast_radius`
15
+ """
16
+ from __future__ import annotations
17
+
18
+ from codegraph.workspace.config import (
19
+ USER_WORKSPACE_FILE,
20
+ WorkspaceConfig,
21
+ WorkspaceRepo,
22
+ load_workspace,
23
+ resolve_workspace_path,
24
+ save_workspace,
25
+ )
26
+
27
+ __all__ = [
28
+ "USER_WORKSPACE_FILE",
29
+ "WorkspaceConfig",
30
+ "WorkspaceRepo",
31
+ "load_workspace",
32
+ "resolve_workspace_path",
33
+ "save_workspace",
34
+ ]
@@ -0,0 +1,110 @@
1
+ """Workspace configuration model + load/save helpers.
2
+
3
+ The workspace file lives at ``~/.codegraph/workspace.yml`` by default.
4
+ Override via the ``CODEGRAPH_WORKSPACE_FILE`` environment variable — useful
5
+ for tests and for users who want to isolate workspaces per shell.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ from pathlib import Path
11
+
12
+ import yaml
13
+ from pydantic import BaseModel, Field
14
+
15
+ USER_WORKSPACE_FILE = Path.home() / ".codegraph" / "workspace.yml"
16
+ """Default user-level workspace file location."""
17
+
18
+
19
+ class WorkspaceRepo(BaseModel):
20
+ """A single repository registered in a workspace.
21
+
22
+ Attributes:
23
+ path: Absolute path to the repository root.
24
+ name: Optional short label (defaults to ``Path(path).name`` when read).
25
+ """
26
+
27
+ path: str
28
+ name: str | None = None
29
+
30
+ @property
31
+ def display_name(self) -> str:
32
+ return self.name or Path(self.path).name
33
+
34
+
35
+ class WorkspaceConfig(BaseModel):
36
+ """User-level workspace configuration.
37
+
38
+ Stored as YAML at ``~/.codegraph/workspace.yml``::
39
+
40
+ version: 1
41
+ repos:
42
+ - path: /Users/me/Documents/projects/foo
43
+ name: foo
44
+ - path: /Users/me/Documents/projects/bar
45
+ """
46
+
47
+ version: int = 1
48
+ repos: list[WorkspaceRepo] = Field(default_factory=list)
49
+
50
+ def has_repo(self, repo_path: str | Path) -> bool:
51
+ target = str(Path(repo_path).expanduser().resolve())
52
+ return any(str(Path(r.path).resolve()) == target for r in self.repos)
53
+
54
+ def remove_repo(self, repo_path: str | Path) -> bool:
55
+ """Drop the repo whose absolute path matches *repo_path*.
56
+
57
+ Returns True if something was removed.
58
+ """
59
+ target = str(Path(repo_path).expanduser().resolve())
60
+ before = len(self.repos)
61
+ self.repos = [
62
+ r for r in self.repos if str(Path(r.path).resolve()) != target
63
+ ]
64
+ return len(self.repos) != before
65
+
66
+
67
+ def resolve_workspace_path() -> Path:
68
+ """Return the workspace file path, respecting the env override."""
69
+ override = os.environ.get("CODEGRAPH_WORKSPACE_FILE")
70
+ if override:
71
+ return Path(override).expanduser()
72
+ return USER_WORKSPACE_FILE
73
+
74
+
75
+ def load_workspace(path: Path | None = None) -> WorkspaceConfig:
76
+ """Load the workspace from *path* (or the resolved default).
77
+
78
+ Returns an empty :class:`WorkspaceConfig` if the file is missing; never raises
79
+ for a missing file. Raises ``ValueError`` for malformed YAML or schema errors.
80
+ """
81
+ cfg_path = path or resolve_workspace_path()
82
+ if not cfg_path.exists():
83
+ return WorkspaceConfig()
84
+ try:
85
+ raw = cfg_path.read_text()
86
+ except OSError as exc:
87
+ raise ValueError(f"Cannot read workspace file at {cfg_path}: {exc}") from exc
88
+ try:
89
+ data = yaml.safe_load(raw) or {}
90
+ except yaml.YAMLError as exc:
91
+ raise ValueError(f"Invalid YAML in {cfg_path}: {exc}") from exc
92
+ if not isinstance(data, dict):
93
+ raise ValueError(
94
+ f"Workspace file {cfg_path} must be a mapping, got {type(data).__name__}"
95
+ )
96
+ return WorkspaceConfig.model_validate(data)
97
+
98
+
99
+ def save_workspace(cfg: WorkspaceConfig, path: Path | None = None) -> Path:
100
+ """Persist *cfg* to *path* (or the resolved default).
101
+
102
+ Creates parent dirs as needed. Returns the path written to.
103
+ """
104
+ cfg_path = path or resolve_workspace_path()
105
+ cfg_path.parent.mkdir(parents=True, exist_ok=True)
106
+ payload = cfg.model_dump(mode="json")
107
+ cfg_path.write_text(
108
+ yaml.safe_dump(payload, sort_keys=False, default_flow_style=False)
109
+ )
110
+ return cfg_path
@@ -0,0 +1,294 @@
1
+ """Pure functions for workspace operations — reused by CLI and MCP server.
2
+
3
+ All functions take a :class:`WorkspaceConfig` (or a list of
4
+ :class:`WorkspaceRepo`) and return JSON-serializable dicts. The MCP layer
5
+ just JSON-dumps the return values; the CLI renders them via Rich.
6
+
7
+ These functions are intentionally side-effect-free except for opening
8
+ SQLite connections to per-repo graph DBs (which they close before returning).
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import subprocess
13
+ from dataclasses import dataclass
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ from codegraph.workspace.config import WorkspaceConfig, WorkspaceRepo
18
+
19
+ # ---------------------------------------------------------------------------
20
+ # Repo health / state
21
+ # ---------------------------------------------------------------------------
22
+
23
+
24
+ @dataclass
25
+ class RepoStatus:
26
+ """Snapshot of a single registered repo's filesystem + git state."""
27
+
28
+ name: str
29
+ path: str
30
+ exists: bool
31
+ is_git: bool
32
+ has_graph: bool
33
+ branch: str | None = None
34
+ dirty_files: int = 0
35
+ last_commit: str | None = None
36
+ last_commit_at: str | None = None
37
+ error: str | None = None
38
+
39
+ def to_dict(self) -> dict[str, Any]:
40
+ return {
41
+ "name": self.name,
42
+ "path": self.path,
43
+ "exists": self.exists,
44
+ "is_git": self.is_git,
45
+ "has_graph": self.has_graph,
46
+ "branch": self.branch,
47
+ "dirty_files": self.dirty_files,
48
+ "last_commit": self.last_commit,
49
+ "last_commit_at": self.last_commit_at,
50
+ "error": self.error,
51
+ }
52
+
53
+
54
+ def _git(repo_path: Path, *args: str, timeout: int = 10) -> str:
55
+ """Run a git command inside *repo_path* and return stripped stdout.
56
+
57
+ Raises ``subprocess.CalledProcessError`` on non-zero exit.
58
+ """
59
+ result = subprocess.run(
60
+ ["git", *args],
61
+ cwd=repo_path,
62
+ capture_output=True,
63
+ text=True,
64
+ timeout=timeout,
65
+ check=True,
66
+ )
67
+ return result.stdout.strip()
68
+
69
+
70
+ def repo_status(repo: WorkspaceRepo) -> RepoStatus:
71
+ """Compute the current status of a single registered repo.
72
+
73
+ Never raises — errors are captured into the ``error`` field so workspace-wide
74
+ operations can show partial results for healthy repos.
75
+ """
76
+ path = Path(repo.path).expanduser()
77
+ name = repo.display_name
78
+
79
+ status = RepoStatus(
80
+ name=name,
81
+ path=str(path),
82
+ exists=path.exists(),
83
+ is_git=False,
84
+ has_graph=False,
85
+ )
86
+
87
+ if not status.exists:
88
+ status.error = "directory not found"
89
+ return status
90
+
91
+ status.is_git = (path / ".git").exists()
92
+ status.has_graph = (path / ".codegraph" / "graph.db").exists()
93
+
94
+ if not status.is_git:
95
+ status.error = "not a git repository"
96
+ return status
97
+
98
+ try:
99
+ status.branch = _git(path, "rev-parse", "--abbrev-ref", "HEAD") or None
100
+ porcelain = _git(path, "status", "--porcelain")
101
+ status.dirty_files = (
102
+ len([line for line in porcelain.splitlines() if line.strip()])
103
+ if porcelain
104
+ else 0
105
+ )
106
+ status.last_commit = _git(path, "log", "-1", "--pretty=%h %s")
107
+ status.last_commit_at = _git(path, "log", "-1", "--pretty=%cI")
108
+ except subprocess.CalledProcessError as exc:
109
+ status.error = (
110
+ f"git command failed: {' '.join(exc.cmd)} "
111
+ f"(exit {exc.returncode}): {exc.stderr.strip()[:200]}"
112
+ )
113
+ except subprocess.TimeoutExpired:
114
+ status.error = "git command timed out"
115
+ except FileNotFoundError:
116
+ status.error = "git binary not found on PATH"
117
+
118
+ return status
119
+
120
+
121
+ def workspace_state(cfg: WorkspaceConfig) -> dict[str, Any]:
122
+ """Return git + graph state for every registered repo."""
123
+ repos = [repo_status(r).to_dict() for r in cfg.repos]
124
+ return {
125
+ "workspace_size": len(repos),
126
+ "repos": repos,
127
+ }
128
+
129
+
130
+ # ---------------------------------------------------------------------------
131
+ # Cross-repo diff
132
+ # ---------------------------------------------------------------------------
133
+
134
+
135
+ def _diff_files(repo_path: Path, ref: str) -> list[str]:
136
+ """List files changed in repo_path since *ref* (committed + working tree).
137
+
138
+ Combines ``git diff --name-only <ref>...HEAD`` (committed changes since the
139
+ merge-base with *ref*) with ``git diff --name-only HEAD`` (uncommitted).
140
+ """
141
+ files: set[str] = set()
142
+ try:
143
+ committed = _git(repo_path, "diff", "--name-only", f"{ref}...HEAD")
144
+ if committed:
145
+ files.update(line for line in committed.splitlines() if line.strip())
146
+ except subprocess.CalledProcessError:
147
+ # ref may not exist in this repo — skip silently
148
+ pass
149
+ try:
150
+ working = _git(repo_path, "diff", "--name-only", "HEAD")
151
+ if working:
152
+ files.update(line for line in working.splitlines() if line.strip())
153
+ except subprocess.CalledProcessError:
154
+ pass
155
+ return sorted(files)
156
+
157
+
158
+ def repo_diff_since(repo: WorkspaceRepo, ref: str) -> dict[str, Any]:
159
+ """List changed files in one repo since *ref*."""
160
+ path = Path(repo.path).expanduser()
161
+ name = repo.display_name
162
+
163
+ if not path.exists():
164
+ return {"name": name, "path": str(path), "error": "directory not found", "files": []}
165
+ if not (path / ".git").exists():
166
+ return {"name": name, "path": str(path), "error": "not a git repository", "files": []}
167
+ try:
168
+ files = _diff_files(path, ref)
169
+ except FileNotFoundError:
170
+ return {
171
+ "name": name,
172
+ "path": str(path),
173
+ "error": "git binary not found on PATH",
174
+ "files": [],
175
+ }
176
+ return {
177
+ "name": name,
178
+ "path": str(path),
179
+ "ref": ref,
180
+ "files": files,
181
+ "files_changed": len(files),
182
+ }
183
+
184
+
185
+ def workspace_diff_since(cfg: WorkspaceConfig, ref: str = "main") -> dict[str, Any]:
186
+ """List files changed across every registered repo since *ref*."""
187
+ repos = [repo_diff_since(r, ref) for r in cfg.repos]
188
+ total = sum(int(r.get("files_changed", 0)) for r in repos)
189
+ return {
190
+ "ref": ref,
191
+ "workspace_size": len(repos),
192
+ "total_files_changed": total,
193
+ "repos": repos,
194
+ }
195
+
196
+
197
+ # ---------------------------------------------------------------------------
198
+ # Cross-repo blast radius
199
+ # ---------------------------------------------------------------------------
200
+
201
+
202
+ def _open_repo_graph(repo: WorkspaceRepo) -> Any | None:
203
+ """Load a repo's graph as a NetworkX MultiDiGraph, or None if unavailable."""
204
+ db_path = Path(repo.path).expanduser() / ".codegraph" / "graph.db"
205
+ if not db_path.exists():
206
+ return None
207
+ # Lazy imports keep CLI startup fast and avoid pulling these into every test.
208
+ from codegraph.graph.store_networkx import to_digraph
209
+ from codegraph.graph.store_sqlite import SQLiteGraphStore
210
+
211
+ store = SQLiteGraphStore(db_path)
212
+ try:
213
+ return to_digraph(store)
214
+ finally:
215
+ store.close()
216
+
217
+
218
+ def repo_blast_radius(
219
+ repo: WorkspaceRepo, symbol: str, depth: int | None = None
220
+ ) -> dict[str, Any]:
221
+ """Compute blast radius for *symbol* in one repo, returning a JSON-safe dict."""
222
+ name = repo.display_name
223
+ graph = _open_repo_graph(repo)
224
+ if graph is None:
225
+ return {
226
+ "name": name,
227
+ "path": str(Path(repo.path).expanduser()),
228
+ "error": "no .codegraph/graph.db (run `codegraph build` first)",
229
+ "found": False,
230
+ "nodes": [],
231
+ "files": [],
232
+ }
233
+
234
+ # Resolve symbol to a node ID — qualname substring match (case-insensitive),
235
+ # mirrors `find_symbol`'s behavior so users can pass either a full qualname
236
+ # or an unambiguous substring.
237
+ sym = symbol.lower()
238
+ target_id: str | None = None
239
+ for nid, attrs in graph.nodes(data=True):
240
+ qualname = str(attrs.get("qualname") or nid)
241
+ if qualname.lower() == sym or qualname == symbol:
242
+ target_id = nid
243
+ break
244
+ if target_id is None:
245
+ for nid, attrs in graph.nodes(data=True):
246
+ qualname = str(attrs.get("qualname") or nid)
247
+ if sym in qualname.lower():
248
+ target_id = nid
249
+ break
250
+ if target_id is None:
251
+ return {
252
+ "name": name,
253
+ "path": str(Path(repo.path).expanduser()),
254
+ "found": False,
255
+ "nodes": [],
256
+ "files": [],
257
+ }
258
+
259
+ from codegraph.analysis.blast_radius import blast_radius as _blast
260
+
261
+ result = _blast(graph, target_id, depth=depth)
262
+ return {
263
+ "name": name,
264
+ "path": str(Path(repo.path).expanduser()),
265
+ "found": True,
266
+ "target": str(target_id),
267
+ "target_qualname": str(
268
+ graph.nodes[target_id].get("qualname") or target_id
269
+ ),
270
+ "nodes": list(result.nodes),
271
+ "node_count": len(result.nodes),
272
+ "files": sorted(result.files),
273
+ "file_count": len(result.files),
274
+ "test_nodes": list(result.test_nodes),
275
+ }
276
+
277
+
278
+ def workspace_blast_radius(
279
+ cfg: WorkspaceConfig, symbol: str, depth: int | None = None
280
+ ) -> dict[str, Any]:
281
+ """Compute blast radius for *symbol* across every registered repo."""
282
+ per_repo = [repo_blast_radius(r, symbol, depth=depth) for r in cfg.repos]
283
+ hits = [r for r in per_repo if r.get("found")]
284
+ total_nodes = sum(int(r.get("node_count", 0)) for r in per_repo)
285
+ total_files = sum(int(r.get("file_count", 0)) for r in per_repo)
286
+ return {
287
+ "symbol": symbol,
288
+ "depth": depth,
289
+ "workspace_size": len(per_repo),
290
+ "repos_with_match": len(hits),
291
+ "total_nodes": total_nodes,
292
+ "total_files": total_files,
293
+ "repos": per_repo,
294
+ }