repolens-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- repolens/__init__.py +1 -0
- repolens/ai_client.py +230 -0
- repolens/analyzer.py +242 -0
- repolens/cli.py +117 -0
- repolens/fetcher.py +198 -0
- repolens/graph.py +126 -0
- repolens/models.py +52 -0
- repolens/scanner.py +69 -0
- repolens/tui/__init__.py +0 -0
- repolens/tui/app.py +951 -0
- repolens_cli-0.1.0.dist-info/METADATA +88 -0
- repolens_cli-0.1.0.dist-info/RECORD +15 -0
- repolens_cli-0.1.0.dist-info/WHEEL +4 -0
- repolens_cli-0.1.0.dist-info/entry_points.txt +2 -0
- repolens_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
repolens/fetcher.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""GitHub API client — fetches file trees and content without cloning."""
|
|
2
|
+
import base64
|
|
3
|
+
import re
|
|
4
|
+
import time
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import requests
|
|
8
|
+
|
|
9
|
+
from .models import FileNode
|
|
10
|
+
|
|
11
|
+
GITHUB_API = "https://api.github.com"
|
|
12
|
+
|
|
13
|
+
SUPPORTED_EXTENSIONS = {
|
|
14
|
+
".py": "python",
|
|
15
|
+
".js": "javascript",
|
|
16
|
+
".jsx": "javascript",
|
|
17
|
+
".ts": "typescript",
|
|
18
|
+
".tsx": "typescript",
|
|
19
|
+
".go": "go",
|
|
20
|
+
".java": "java",
|
|
21
|
+
".rb": "ruby",
|
|
22
|
+
".rs": "rust",
|
|
23
|
+
".cpp": "cpp",
|
|
24
|
+
".c": "c",
|
|
25
|
+
".h": "c",
|
|
26
|
+
".cs": "csharp",
|
|
27
|
+
".php": "php",
|
|
28
|
+
".swift": "swift",
|
|
29
|
+
".kt": "kotlin",
|
|
30
|
+
".scala": "scala",
|
|
31
|
+
".md": "markdown",
|
|
32
|
+
".json": "json",
|
|
33
|
+
".yaml": "yaml",
|
|
34
|
+
".yml": "yaml",
|
|
35
|
+
".toml": "toml",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def parse_github_url(url: str) -> tuple[str, str, Optional[str]]:
|
|
40
|
+
"""Parse GitHub URL into (owner, repo, branch_or_None).
|
|
41
|
+
|
|
42
|
+
Handles:
|
|
43
|
+
- https://github.com/owner/repo
|
|
44
|
+
- https://github.com/owner/repo/tree/branch
|
|
45
|
+
- github.com/owner/repo
|
|
46
|
+
- owner/repo
|
|
47
|
+
"""
|
|
48
|
+
url = url.strip().rstrip("/")
|
|
49
|
+
# strip protocol
|
|
50
|
+
url = re.sub(r"^https?://", "", url)
|
|
51
|
+
url = re.sub(r"^github\.com/", "", url)
|
|
52
|
+
|
|
53
|
+
parts = url.split("/")
|
|
54
|
+
if len(parts) < 2:
|
|
55
|
+
raise ValueError(f"Cannot parse GitHub URL: {url!r}. Expected owner/repo format.")
|
|
56
|
+
|
|
57
|
+
owner, repo = parts[0], parts[1]
|
|
58
|
+
repo = repo.removesuffix(".git")
|
|
59
|
+
|
|
60
|
+
branch = None
|
|
61
|
+
if len(parts) >= 4 and parts[2] == "tree":
|
|
62
|
+
branch = "/".join(parts[3:])
|
|
63
|
+
|
|
64
|
+
return owner, repo, branch
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _headers(token: Optional[str]) -> dict:
|
|
68
|
+
h = {"Accept": "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28"}
|
|
69
|
+
if token:
|
|
70
|
+
h["Authorization"] = f"Bearer {token}"
|
|
71
|
+
return h
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _get(url: str, token: Optional[str], params: dict | None = None) -> dict:
|
|
75
|
+
resp = requests.get(url, headers=_headers(token), params=params, timeout=30)
|
|
76
|
+
if resp.status_code == 403 and "rate limit" in resp.text.lower():
|
|
77
|
+
reset = int(resp.headers.get("X-RateLimit-Reset", time.time() + 60))
|
|
78
|
+
wait = max(0, reset - int(time.time())) + 1
|
|
79
|
+
raise RuntimeError(
|
|
80
|
+
f"GitHub rate limit hit. Resets in {wait}s. Set GITHUB_TOKEN to increase limits."
|
|
81
|
+
)
|
|
82
|
+
if resp.status_code == 404:
|
|
83
|
+
raise ValueError(f"Not found: {url}")
|
|
84
|
+
resp.raise_for_status()
|
|
85
|
+
return resp.json()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def get_repo_info(owner: str, repo: str, token: Optional[str] = None) -> dict:
|
|
89
|
+
return _get(f"{GITHUB_API}/repos/{owner}/{repo}", token)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def get_default_branch(owner: str, repo: str, token: Optional[str] = None) -> str:
|
|
93
|
+
info = get_repo_info(owner, repo, token)
|
|
94
|
+
return info["default_branch"]
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def fetch_file_tree(
|
|
98
|
+
owner: str,
|
|
99
|
+
repo: str,
|
|
100
|
+
branch: str,
|
|
101
|
+
token: Optional[str] = None,
|
|
102
|
+
max_files: int = 500,
|
|
103
|
+
) -> list[FileNode]:
|
|
104
|
+
"""Fetch the full file tree via GitHub Trees API (no cloning)."""
|
|
105
|
+
# Get commit SHA for the branch
|
|
106
|
+
branch_data = _get(f"{GITHUB_API}/repos/{owner}/{repo}/branches/{branch}", token)
|
|
107
|
+
commit_sha = branch_data["commit"]["sha"]
|
|
108
|
+
|
|
109
|
+
# Get tree SHA from commit
|
|
110
|
+
commit_data = _get(f"{GITHUB_API}/repos/{owner}/{repo}/git/commits/{commit_sha}", token)
|
|
111
|
+
tree_sha = commit_data["tree"]["sha"]
|
|
112
|
+
|
|
113
|
+
# Fetch full recursive tree
|
|
114
|
+
tree_data = _get(
|
|
115
|
+
f"{GITHUB_API}/repos/{owner}/{repo}/git/trees/{tree_sha}",
|
|
116
|
+
token,
|
|
117
|
+
params={"recursive": "1"},
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
if tree_data.get("truncated"):
|
|
121
|
+
print(f" Warning: repo tree was truncated (>{max_files} items). Showing first {max_files}.")
|
|
122
|
+
|
|
123
|
+
nodes = []
|
|
124
|
+
for item in tree_data.get("tree", []):
|
|
125
|
+
if item["type"] != "blob":
|
|
126
|
+
continue
|
|
127
|
+
path = item["path"]
|
|
128
|
+
ext = "." + path.rsplit(".", 1)[-1] if "." in path else ""
|
|
129
|
+
language = SUPPORTED_EXTENSIONS.get(ext.lower(), "")
|
|
130
|
+
nodes.append(
|
|
131
|
+
FileNode(
|
|
132
|
+
path=path,
|
|
133
|
+
sha=item["sha"],
|
|
134
|
+
size=item.get("size", 0),
|
|
135
|
+
language=language,
|
|
136
|
+
)
|
|
137
|
+
)
|
|
138
|
+
if len(nodes) >= max_files:
|
|
139
|
+
break
|
|
140
|
+
|
|
141
|
+
return nodes
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def fetch_file_content(
|
|
145
|
+
owner: str,
|
|
146
|
+
repo: str,
|
|
147
|
+
path: str,
|
|
148
|
+
branch: str,
|
|
149
|
+
token: Optional[str] = None,
|
|
150
|
+
) -> Optional[str]:
|
|
151
|
+
"""Fetch decoded content of a single file. Returns None if too large or binary."""
|
|
152
|
+
try:
|
|
153
|
+
data = _get(
|
|
154
|
+
f"{GITHUB_API}/repos/{owner}/{repo}/contents/{path}",
|
|
155
|
+
token,
|
|
156
|
+
params={"ref": branch},
|
|
157
|
+
)
|
|
158
|
+
except (ValueError, requests.HTTPError):
|
|
159
|
+
return None
|
|
160
|
+
|
|
161
|
+
if isinstance(data, list):
|
|
162
|
+
return None # It's a directory
|
|
163
|
+
|
|
164
|
+
if data.get("encoding") != "base64":
|
|
165
|
+
return None
|
|
166
|
+
|
|
167
|
+
size = data.get("size", 0)
|
|
168
|
+
if size > 500_000: # skip files > 500KB
|
|
169
|
+
return None
|
|
170
|
+
|
|
171
|
+
raw = data.get("content", "")
|
|
172
|
+
try:
|
|
173
|
+
return base64.b64decode(raw).decode("utf-8", errors="replace")
|
|
174
|
+
except Exception:
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def fetch_source_files(
|
|
179
|
+
owner: str,
|
|
180
|
+
repo: str,
|
|
181
|
+
branch: str,
|
|
182
|
+
files: list[FileNode],
|
|
183
|
+
token: Optional[str] = None,
|
|
184
|
+
languages: Optional[set[str]] = None,
|
|
185
|
+
max_fetch: int = 200,
|
|
186
|
+
) -> None:
|
|
187
|
+
"""Fetch content for source files in-place, filtered by language."""
|
|
188
|
+
source_files = [
|
|
189
|
+
f for f in files
|
|
190
|
+
if f.language and f.language not in ("markdown", "json", "yaml", "toml")
|
|
191
|
+
and (languages is None or f.language in languages)
|
|
192
|
+
][:max_fetch]
|
|
193
|
+
|
|
194
|
+
print(f" Fetching content for {len(source_files)} source files...")
|
|
195
|
+
for i, file_node in enumerate(source_files, 1):
|
|
196
|
+
if i % 20 == 0:
|
|
197
|
+
print(f" {i}/{len(source_files)}...")
|
|
198
|
+
file_node.content = fetch_file_content(owner, repo, file_node.path, branch, token)
|
repolens/graph.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""Build and analyse dependency and call graphs from FileAnalysis data."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from .models import FileAnalysis, FunctionNode, GraphStats
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def build_graph(analyses: dict[str, FileAnalysis]) -> GraphStats:
|
|
8
|
+
stats = GraphStats()
|
|
9
|
+
|
|
10
|
+
# ── Import graph ──────────────────────────────────────────────────────────
|
|
11
|
+
in_degree: dict[str, int] = {p: 0 for p in analyses}
|
|
12
|
+
|
|
13
|
+
for path, fa in analyses.items():
|
|
14
|
+
unique_imports = list(dict.fromkeys(fa.resolved_imports)) # deduplicate, preserve order
|
|
15
|
+
stats.import_edges[path] = unique_imports
|
|
16
|
+
for dep in unique_imports:
|
|
17
|
+
if dep in in_degree:
|
|
18
|
+
in_degree[dep] = in_degree.get(dep, 0) + 1
|
|
19
|
+
else:
|
|
20
|
+
in_degree[dep] = 1
|
|
21
|
+
|
|
22
|
+
stats.in_degree = in_degree
|
|
23
|
+
|
|
24
|
+
# ── Circular dependency detection (iterative DFS) ─────────────────────────
|
|
25
|
+
stats.circular_deps = _find_cycles(stats.import_edges)
|
|
26
|
+
|
|
27
|
+
# ── Entry points (source files nobody imports) ────────────────────────────
|
|
28
|
+
stats.entry_points = [
|
|
29
|
+
p for p in analyses if in_degree.get(p, 0) == 0
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
# ── Hub files (most imported) ─────────────────────────────────────────────
|
|
33
|
+
stats.hub_files = sorted(
|
|
34
|
+
[(p, in_degree.get(p, 0)) for p in analyses],
|
|
35
|
+
key=lambda x: x[1],
|
|
36
|
+
reverse=True,
|
|
37
|
+
)[:10]
|
|
38
|
+
|
|
39
|
+
# ── Function index & caller resolution ────────────────────────────────────
|
|
40
|
+
# Build name -> list[function_id] index for resolving calls
|
|
41
|
+
name_index: dict[str, list[str]] = {}
|
|
42
|
+
for path, fa in analyses.items():
|
|
43
|
+
for fn in fa.functions:
|
|
44
|
+
fid = f"{path}::{fn.name}"
|
|
45
|
+
fn_copy = FunctionNode(
|
|
46
|
+
name=fn.name,
|
|
47
|
+
file_path=fn.file_path,
|
|
48
|
+
line_start=fn.line_start,
|
|
49
|
+
line_end=fn.line_end,
|
|
50
|
+
calls=list(fn.calls),
|
|
51
|
+
)
|
|
52
|
+
stats.functions[fid] = fn_copy
|
|
53
|
+
name_index.setdefault(fn.name, []).append(fid)
|
|
54
|
+
|
|
55
|
+
# Resolve calls to function IDs and back-populate callers
|
|
56
|
+
for fid, fn in stats.functions.items():
|
|
57
|
+
resolved_calls: list[str] = []
|
|
58
|
+
for call in fn.calls:
|
|
59
|
+
base_name = call.split(".")[-1] # handle obj.method style
|
|
60
|
+
for candidate in name_index.get(base_name, []):
|
|
61
|
+
resolved_calls.append(candidate)
|
|
62
|
+
stats.functions[candidate].callers.append(fid)
|
|
63
|
+
fn.calls = list(dict.fromkeys(resolved_calls))
|
|
64
|
+
|
|
65
|
+
return stats
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _find_cycles(edges: dict[str, list[str]]) -> list[list[str]]:
|
|
69
|
+
"""Detect all simple cycles using iterative DFS (Johnson-lite)."""
|
|
70
|
+
WHITE, GRAY, BLACK = 0, 1, 2
|
|
71
|
+
color: dict[str, int] = {n: WHITE for n in edges}
|
|
72
|
+
# include all targets too
|
|
73
|
+
for deps in edges.values():
|
|
74
|
+
for d in deps:
|
|
75
|
+
color.setdefault(d, WHITE)
|
|
76
|
+
|
|
77
|
+
cycles: list[list[str]] = []
|
|
78
|
+
parent: dict[str, str | None] = {}
|
|
79
|
+
|
|
80
|
+
for start in list(color):
|
|
81
|
+
if color[start] != WHITE:
|
|
82
|
+
continue
|
|
83
|
+
stack = [(start, iter(edges.get(start, [])))]
|
|
84
|
+
color[start] = GRAY
|
|
85
|
+
path = [start]
|
|
86
|
+
parent[start] = None
|
|
87
|
+
|
|
88
|
+
while stack:
|
|
89
|
+
node, children = stack[-1]
|
|
90
|
+
try:
|
|
91
|
+
child = next(children)
|
|
92
|
+
if color.get(child, WHITE) == GRAY:
|
|
93
|
+
# Found a back-edge → extract cycle
|
|
94
|
+
idx = path.index(child)
|
|
95
|
+
cycle = path[idx:]
|
|
96
|
+
# Deduplicate: only add if we haven't seen this cycle (by sorted set)
|
|
97
|
+
cycle_key = frozenset(cycle)
|
|
98
|
+
if not any(frozenset(c) == cycle_key for c in cycles):
|
|
99
|
+
cycles.append(cycle)
|
|
100
|
+
elif color.get(child, WHITE) == WHITE:
|
|
101
|
+
color[child] = GRAY
|
|
102
|
+
parent[child] = node
|
|
103
|
+
path.append(child)
|
|
104
|
+
stack.append((child, iter(edges.get(child, []))))
|
|
105
|
+
except StopIteration:
|
|
106
|
+
color[node] = BLACK
|
|
107
|
+
stack.pop()
|
|
108
|
+
if path and path[-1] == node:
|
|
109
|
+
path.pop()
|
|
110
|
+
|
|
111
|
+
return cycles
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def importers_of(path: str, stats: GraphStats) -> list[str]:
|
|
115
|
+
"""Return list of files that import *path*."""
|
|
116
|
+
return [src for src, deps in stats.import_edges.items() if path in deps]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def callers_of(function_id: str, stats: GraphStats) -> list[str]:
|
|
120
|
+
fn = stats.functions.get(function_id)
|
|
121
|
+
return fn.callers if fn else []
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def callees_of(function_id: str, stats: GraphStats) -> list[str]:
|
|
125
|
+
fn = stats.functions.get(function_id)
|
|
126
|
+
return fn.calls if fn else []
|
repolens/models.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class FileNode:
|
|
7
|
+
path: str # repo-relative path
|
|
8
|
+
size: int = 0
|
|
9
|
+
language: str = ""
|
|
10
|
+
content: Optional[str] = None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class FunctionNode:
|
|
15
|
+
name: str
|
|
16
|
+
file_path: str
|
|
17
|
+
line_start: int
|
|
18
|
+
line_end: int
|
|
19
|
+
calls: list[str] = field(default_factory=list) # raw call names found in body
|
|
20
|
+
callers: list[str] = field(default_factory=list) # populated by graph builder
|
|
21
|
+
docstring: Optional[str] = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class FileAnalysis:
|
|
26
|
+
path: str
|
|
27
|
+
language: str
|
|
28
|
+
raw_imports: list[str] = field(default_factory=list)
|
|
29
|
+
resolved_imports: list[str] = field(default_factory=list) # repo-relative paths
|
|
30
|
+
functions: list[FunctionNode] = field(default_factory=list)
|
|
31
|
+
classes: list[str] = field(default_factory=list)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class GraphStats:
|
|
36
|
+
# file_path -> list of files it imports (within repo)
|
|
37
|
+
import_edges: dict[str, list[str]] = field(default_factory=dict)
|
|
38
|
+
# file_path -> in-degree (how many files import it)
|
|
39
|
+
in_degree: dict[str, int] = field(default_factory=dict)
|
|
40
|
+
circular_deps: list[list[str]] = field(default_factory=list)
|
|
41
|
+
entry_points: list[str] = field(default_factory=list) # nothing imports these
|
|
42
|
+
hub_files: list[tuple[str, int]] = field(default_factory=list) # (path, in_degree) top-10
|
|
43
|
+
# function_id "file::name" -> FunctionNode
|
|
44
|
+
functions: dict[str, FunctionNode] = field(default_factory=dict)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class RepoAnalysis:
|
|
49
|
+
root: str # absolute path to scanned directory
|
|
50
|
+
files: list[FileNode] = field(default_factory=list)
|
|
51
|
+
file_analyses: dict[str, FileAnalysis] = field(default_factory=dict)
|
|
52
|
+
stats: GraphStats = field(default_factory=GraphStats)
|
repolens/scanner.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from .models import FileNode
|
|
5
|
+
|
|
6
|
+
SUPPORTED_EXTENSIONS: dict[str, str] = {
|
|
7
|
+
".py": "python",
|
|
8
|
+
".js": "javascript",
|
|
9
|
+
".jsx": "javascript",
|
|
10
|
+
".ts": "typescript",
|
|
11
|
+
".tsx": "typescript",
|
|
12
|
+
".go": "go",
|
|
13
|
+
".rs": "rust",
|
|
14
|
+
".java": "java",
|
|
15
|
+
".rb": "ruby",
|
|
16
|
+
".cpp": "cpp",
|
|
17
|
+
".c": "c",
|
|
18
|
+
".h": "c",
|
|
19
|
+
".cs": "csharp",
|
|
20
|
+
".php": "php",
|
|
21
|
+
".swift": "swift",
|
|
22
|
+
".kt": "kotlin",
|
|
23
|
+
".scala": "scala",
|
|
24
|
+
".md": "markdown",
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
SKIP_DIRS = {
|
|
28
|
+
".git", ".hg", ".svn",
|
|
29
|
+
"node_modules", "__pycache__", ".mypy_cache", ".ruff_cache",
|
|
30
|
+
".venv", "venv", "env", ".env",
|
|
31
|
+
"dist", "build", "out", "target",
|
|
32
|
+
".idea", ".vscode",
|
|
33
|
+
"vendor", "third_party",
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
MAX_FILE_SIZE = 500_000 # 500 KB
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def scan(root: str, max_files: int = 2000) -> list[FileNode]:
|
|
40
|
+
"""Walk *root* and return FileNode list for all source files."""
|
|
41
|
+
root_path = Path(root).resolve()
|
|
42
|
+
nodes: list[FileNode] = []
|
|
43
|
+
|
|
44
|
+
for dirpath, dirnames, filenames in os.walk(root_path):
|
|
45
|
+
# Prune ignored directories in-place
|
|
46
|
+
dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS and not d.startswith(".")]
|
|
47
|
+
|
|
48
|
+
for filename in filenames:
|
|
49
|
+
if len(nodes) >= max_files:
|
|
50
|
+
break
|
|
51
|
+
full_path = Path(dirpath) / filename
|
|
52
|
+
suffix = full_path.suffix.lower()
|
|
53
|
+
language = SUPPORTED_EXTENSIONS.get(suffix)
|
|
54
|
+
if not language:
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
rel_path = full_path.relative_to(root_path).as_posix()
|
|
58
|
+
size = full_path.stat().st_size
|
|
59
|
+
|
|
60
|
+
content: str | None = None
|
|
61
|
+
if size <= MAX_FILE_SIZE:
|
|
62
|
+
try:
|
|
63
|
+
content = full_path.read_text(encoding="utf-8", errors="replace")
|
|
64
|
+
except OSError:
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
nodes.append(FileNode(path=rel_path, size=size, language=language, content=content))
|
|
68
|
+
|
|
69
|
+
return sorted(nodes, key=lambda n: n.path)
|
repolens/tui/__init__.py
ADDED
|
File without changes
|