repr-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- repr/__init__.py +10 -0
- repr/analyzer.py +915 -0
- repr/api.py +263 -0
- repr/auth.py +300 -0
- repr/cli.py +858 -0
- repr/config.py +392 -0
- repr/discovery.py +472 -0
- repr/extractor.py +388 -0
- repr/highlights.py +712 -0
- repr/openai_analysis.py +597 -0
- repr/tools.py +446 -0
- repr/ui.py +430 -0
- repr_cli-0.1.0.dist-info/METADATA +326 -0
- repr_cli-0.1.0.dist-info/RECORD +18 -0
- repr_cli-0.1.0.dist-info/WHEEL +5 -0
- repr_cli-0.1.0.dist-info/entry_points.txt +2 -0
- repr_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- repr_cli-0.1.0.dist-info/top_level.txt +1 -0
repr/discovery.py
ADDED
|
@@ -0,0 +1,472 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Git repository discovery - find repos recursively in directories.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from git import Repo, InvalidGitRepositoryError
|
|
11
|
+
from git.exc import GitCommandError
|
|
12
|
+
|
|
13
|
+
from .config import get_skip_patterns, get_repo_hash, set_repo_hash
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class RepoInfo:
|
|
18
|
+
"""Information about a discovered repository."""
|
|
19
|
+
|
|
20
|
+
path: Path
|
|
21
|
+
name: str
|
|
22
|
+
commit_count: int
|
|
23
|
+
user_commit_count: int # User's own commits
|
|
24
|
+
first_commit_date: datetime | None
|
|
25
|
+
last_commit_date: datetime | None
|
|
26
|
+
primary_language: str | None
|
|
27
|
+
languages: dict[str, int] | None # Language -> percentage mapping
|
|
28
|
+
contributors: int
|
|
29
|
+
is_fork: bool
|
|
30
|
+
remote_url: str | None
|
|
31
|
+
description: str | None # From README first line or repo
|
|
32
|
+
frameworks: list[str] | None # Detected frameworks/libraries
|
|
33
|
+
has_tests: bool # Has test directory or test files
|
|
34
|
+
has_ci: bool # Has CI/CD config (.github/workflows, .gitlab-ci, etc.)
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def age_months(self) -> int:
|
|
38
|
+
"""Calculate repository age in months."""
|
|
39
|
+
if not self.first_commit_date:
|
|
40
|
+
return 0
|
|
41
|
+
|
|
42
|
+
now = datetime.now()
|
|
43
|
+
delta = now - self.first_commit_date
|
|
44
|
+
return int(delta.days / 30)
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def age_display(self) -> str:
|
|
48
|
+
"""Human-readable age string."""
|
|
49
|
+
months = self.age_months
|
|
50
|
+
if months < 1:
|
|
51
|
+
return "< 1 mo"
|
|
52
|
+
elif months < 12:
|
|
53
|
+
return f"{months} mo"
|
|
54
|
+
else:
|
|
55
|
+
years = months // 12
|
|
56
|
+
return f"{years}+ yr"
|
|
57
|
+
|
|
58
|
+
def compute_hash(self) -> str:
|
|
59
|
+
"""Compute a hash representing the current state of the repo."""
|
|
60
|
+
try:
|
|
61
|
+
repo = Repo(self.path)
|
|
62
|
+
head_sha = repo.head.commit.hexsha
|
|
63
|
+
commit_count = str(self.commit_count)
|
|
64
|
+
hash_input = f"{head_sha}:{commit_count}".encode()
|
|
65
|
+
return hashlib.sha256(hash_input).hexdigest()[:16]
|
|
66
|
+
except Exception:
|
|
67
|
+
return ""
|
|
68
|
+
|
|
69
|
+
def to_dict(self) -> dict:
|
|
70
|
+
"""Convert to dictionary for serialization."""
|
|
71
|
+
return {
|
|
72
|
+
"path": str(self.path),
|
|
73
|
+
"name": self.name,
|
|
74
|
+
"commit_count": self.commit_count,
|
|
75
|
+
"first_commit": self.first_commit_date.isoformat() if self.first_commit_date else None,
|
|
76
|
+
"last_commit": self.last_commit_date.isoformat() if self.last_commit_date else None,
|
|
77
|
+
"languages": self.languages or {},
|
|
78
|
+
"contributors": self.contributors,
|
|
79
|
+
"is_fork": self.is_fork,
|
|
80
|
+
"remote_url": self.remote_url,
|
|
81
|
+
"age_months": self.age_months,
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def should_skip_directory(path: Path, skip_patterns: list[str]) -> bool:
|
|
86
|
+
"""Check if a directory should be skipped."""
|
|
87
|
+
name = path.name
|
|
88
|
+
|
|
89
|
+
# Always skip hidden directories (except .git check happens elsewhere)
|
|
90
|
+
if name.startswith(".") and name != ".git":
|
|
91
|
+
return True
|
|
92
|
+
|
|
93
|
+
# Check against skip patterns
|
|
94
|
+
for pattern in skip_patterns:
|
|
95
|
+
if name == pattern or name.lower() == pattern.lower():
|
|
96
|
+
return True
|
|
97
|
+
|
|
98
|
+
return False
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def discover_repos(
|
|
102
|
+
root_paths: list[Path],
|
|
103
|
+
skip_patterns: list[str] | None = None,
|
|
104
|
+
min_commits: int = 10,
|
|
105
|
+
use_cache: bool = True,
|
|
106
|
+
) -> list[RepoInfo]:
|
|
107
|
+
"""
|
|
108
|
+
Discover git repositories recursively.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
root_paths: List of directories to search
|
|
112
|
+
skip_patterns: Patterns to skip (default from config)
|
|
113
|
+
min_commits: Minimum commits to include repo
|
|
114
|
+
use_cache: Whether to use cached repo hashes
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
List of discovered repositories
|
|
118
|
+
"""
|
|
119
|
+
if skip_patterns is None:
|
|
120
|
+
skip_patterns = get_skip_patterns()
|
|
121
|
+
|
|
122
|
+
repos: list[RepoInfo] = []
|
|
123
|
+
visited_paths: set[Path] = set()
|
|
124
|
+
|
|
125
|
+
for root_path in root_paths:
|
|
126
|
+
root = Path(root_path).expanduser().resolve()
|
|
127
|
+
if not root.exists():
|
|
128
|
+
continue
|
|
129
|
+
|
|
130
|
+
# Search for .git directories
|
|
131
|
+
for git_dir in _find_git_dirs(root, skip_patterns, visited_paths):
|
|
132
|
+
repo_path = git_dir.parent
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
repo_info = analyze_repo(repo_path)
|
|
136
|
+
|
|
137
|
+
# Skip repos with too few commits
|
|
138
|
+
if repo_info.commit_count < min_commits:
|
|
139
|
+
continue
|
|
140
|
+
|
|
141
|
+
# Check cache if enabled
|
|
142
|
+
if use_cache:
|
|
143
|
+
cached_hash = get_repo_hash(str(repo_path))
|
|
144
|
+
current_hash = repo_info.compute_hash()
|
|
145
|
+
if cached_hash == current_hash:
|
|
146
|
+
repo_info._cached = True # type: ignore
|
|
147
|
+
else:
|
|
148
|
+
set_repo_hash(str(repo_path), current_hash)
|
|
149
|
+
|
|
150
|
+
repos.append(repo_info)
|
|
151
|
+
|
|
152
|
+
except (InvalidGitRepositoryError, GitCommandError, Exception):
|
|
153
|
+
# Skip invalid or problematic repos
|
|
154
|
+
continue
|
|
155
|
+
|
|
156
|
+
return repos
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _find_git_dirs(
|
|
160
|
+
root: Path,
|
|
161
|
+
skip_patterns: list[str],
|
|
162
|
+
visited: set[Path],
|
|
163
|
+
) -> list[Path]:
|
|
164
|
+
"""Find all .git directories under root."""
|
|
165
|
+
git_dirs: list[Path] = []
|
|
166
|
+
|
|
167
|
+
def search(path: Path, depth: int = 0) -> None:
|
|
168
|
+
if depth > 10: # Limit recursion depth
|
|
169
|
+
return
|
|
170
|
+
|
|
171
|
+
if path in visited:
|
|
172
|
+
return
|
|
173
|
+
visited.add(path)
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
for item in path.iterdir():
|
|
177
|
+
if not item.is_dir():
|
|
178
|
+
continue
|
|
179
|
+
|
|
180
|
+
if item.name == ".git":
|
|
181
|
+
git_dirs.append(item)
|
|
182
|
+
# Don't recurse into repo subdirectories
|
|
183
|
+
return
|
|
184
|
+
|
|
185
|
+
if should_skip_directory(item, skip_patterns):
|
|
186
|
+
continue
|
|
187
|
+
|
|
188
|
+
search(item, depth + 1)
|
|
189
|
+
except PermissionError:
|
|
190
|
+
pass
|
|
191
|
+
|
|
192
|
+
search(root)
|
|
193
|
+
return git_dirs
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def analyze_repo(path: Path) -> RepoInfo:
|
|
197
|
+
"""
|
|
198
|
+
Analyze a single repository.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
path: Path to repository root
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
RepoInfo with repository metadata
|
|
205
|
+
"""
|
|
206
|
+
repo = Repo(path)
|
|
207
|
+
|
|
208
|
+
# Get user's git config for identifying their commits
|
|
209
|
+
user_email = None
|
|
210
|
+
user_name = None
|
|
211
|
+
try:
|
|
212
|
+
user_email = repo.config_reader().get_value("user", "email", default=None)
|
|
213
|
+
user_name = repo.config_reader().get_value("user", "name", default=None)
|
|
214
|
+
except Exception:
|
|
215
|
+
pass
|
|
216
|
+
|
|
217
|
+
# Get commit counts (total and user's own)
|
|
218
|
+
commit_count = 0
|
|
219
|
+
user_commit_count = 0
|
|
220
|
+
try:
|
|
221
|
+
for commit in repo.iter_commits():
|
|
222
|
+
commit_count += 1
|
|
223
|
+
# Check if commit is by user
|
|
224
|
+
if user_email and commit.author.email == user_email:
|
|
225
|
+
user_commit_count += 1
|
|
226
|
+
elif user_name and commit.author.name == user_name:
|
|
227
|
+
user_commit_count += 1
|
|
228
|
+
except Exception:
|
|
229
|
+
pass
|
|
230
|
+
|
|
231
|
+
# Get date range
|
|
232
|
+
first_commit_date = None
|
|
233
|
+
last_commit_date = None
|
|
234
|
+
|
|
235
|
+
try:
|
|
236
|
+
commits = list(repo.iter_commits())
|
|
237
|
+
if commits:
|
|
238
|
+
last_commit_date = datetime.fromtimestamp(commits[0].committed_date)
|
|
239
|
+
first_commit_date = datetime.fromtimestamp(commits[-1].committed_date)
|
|
240
|
+
except Exception:
|
|
241
|
+
pass
|
|
242
|
+
|
|
243
|
+
# Get contributors
|
|
244
|
+
contributors = set()
|
|
245
|
+
try:
|
|
246
|
+
for commit in repo.iter_commits():
|
|
247
|
+
contributors.add(commit.author.email)
|
|
248
|
+
except Exception:
|
|
249
|
+
pass
|
|
250
|
+
|
|
251
|
+
# Get remote URL
|
|
252
|
+
remote_url = None
|
|
253
|
+
is_fork = False
|
|
254
|
+
try:
|
|
255
|
+
if repo.remotes:
|
|
256
|
+
remote = repo.remotes.origin
|
|
257
|
+
remote_url = remote.url
|
|
258
|
+
# Simple fork detection - could be improved
|
|
259
|
+
is_fork = "fork" in remote_url.lower() if remote_url else False
|
|
260
|
+
except Exception:
|
|
261
|
+
pass
|
|
262
|
+
|
|
263
|
+
# Get description from README
|
|
264
|
+
description = _get_repo_description(path)
|
|
265
|
+
|
|
266
|
+
# Detect frameworks
|
|
267
|
+
frameworks = _detect_frameworks(path)
|
|
268
|
+
|
|
269
|
+
# Check for tests
|
|
270
|
+
has_tests = _has_tests(path)
|
|
271
|
+
|
|
272
|
+
# Check for CI/CD
|
|
273
|
+
has_ci = _has_ci(path)
|
|
274
|
+
|
|
275
|
+
# Primary language and languages will be detected by extractor
|
|
276
|
+
primary_language = None
|
|
277
|
+
languages = None
|
|
278
|
+
|
|
279
|
+
return RepoInfo(
|
|
280
|
+
path=path,
|
|
281
|
+
name=path.name,
|
|
282
|
+
commit_count=commit_count,
|
|
283
|
+
user_commit_count=user_commit_count,
|
|
284
|
+
first_commit_date=first_commit_date,
|
|
285
|
+
last_commit_date=last_commit_date,
|
|
286
|
+
primary_language=primary_language,
|
|
287
|
+
languages=languages,
|
|
288
|
+
contributors=len(contributors),
|
|
289
|
+
is_fork=is_fork,
|
|
290
|
+
remote_url=remote_url,
|
|
291
|
+
description=description,
|
|
292
|
+
frameworks=frameworks,
|
|
293
|
+
has_tests=has_tests,
|
|
294
|
+
has_ci=has_ci,
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _get_repo_description(path: Path) -> str | None:
|
|
299
|
+
"""Extract description from README file."""
|
|
300
|
+
readme_names = ["README.md", "README.rst", "README.txt", "README"]
|
|
301
|
+
for name in readme_names:
|
|
302
|
+
readme_path = path / name
|
|
303
|
+
if readme_path.exists():
|
|
304
|
+
try:
|
|
305
|
+
content = readme_path.read_text(errors='ignore')
|
|
306
|
+
lines = content.strip().split('\n')
|
|
307
|
+
# Skip title (usually starts with #) and get first paragraph
|
|
308
|
+
for line in lines:
|
|
309
|
+
line = line.strip()
|
|
310
|
+
if line and not line.startswith('#') and not line.startswith('!'):
|
|
311
|
+
# Truncate to reasonable length
|
|
312
|
+
return line[:200] if len(line) > 200 else line
|
|
313
|
+
except Exception:
|
|
314
|
+
pass
|
|
315
|
+
return None
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def _detect_frameworks(path: Path) -> list[str]:
|
|
319
|
+
"""Detect frameworks and major libraries used."""
|
|
320
|
+
frameworks = []
|
|
321
|
+
|
|
322
|
+
# Python frameworks
|
|
323
|
+
requirements_files = ["requirements.txt", "requirements.in", "pyproject.toml", "setup.py"]
|
|
324
|
+
python_frameworks = {
|
|
325
|
+
"fastapi": "FastAPI", "django": "Django", "flask": "Flask",
|
|
326
|
+
"pytorch": "PyTorch", "torch": "PyTorch", "tensorflow": "TensorFlow",
|
|
327
|
+
"pandas": "Pandas", "numpy": "NumPy", "scikit-learn": "scikit-learn",
|
|
328
|
+
"celery": "Celery", "sqlalchemy": "SQLAlchemy", "pydantic": "Pydantic",
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
for req_file in requirements_files:
|
|
332
|
+
req_path = path / req_file
|
|
333
|
+
if req_path.exists():
|
|
334
|
+
try:
|
|
335
|
+
content = req_path.read_text(errors='ignore').lower()
|
|
336
|
+
for key, name in python_frameworks.items():
|
|
337
|
+
if key in content and name not in frameworks:
|
|
338
|
+
frameworks.append(name)
|
|
339
|
+
except Exception:
|
|
340
|
+
pass
|
|
341
|
+
|
|
342
|
+
# JavaScript/TypeScript frameworks
|
|
343
|
+
package_json = path / "package.json"
|
|
344
|
+
if package_json.exists():
|
|
345
|
+
try:
|
|
346
|
+
import json
|
|
347
|
+
data = json.loads(package_json.read_text())
|
|
348
|
+
deps = {**data.get("dependencies", {}), **data.get("devDependencies", {})}
|
|
349
|
+
js_frameworks = {
|
|
350
|
+
"react": "React", "next": "Next.js", "vue": "Vue",
|
|
351
|
+
"angular": "Angular", "svelte": "Svelte", "express": "Express",
|
|
352
|
+
"nestjs": "NestJS", "@nestjs/core": "NestJS",
|
|
353
|
+
"tailwindcss": "Tailwind", "typescript": "TypeScript",
|
|
354
|
+
}
|
|
355
|
+
for key, name in js_frameworks.items():
|
|
356
|
+
if key in deps and name not in frameworks:
|
|
357
|
+
frameworks.append(name)
|
|
358
|
+
except Exception:
|
|
359
|
+
pass
|
|
360
|
+
|
|
361
|
+
# Rust frameworks
|
|
362
|
+
cargo_toml = path / "Cargo.toml"
|
|
363
|
+
if cargo_toml.exists():
|
|
364
|
+
try:
|
|
365
|
+
content = cargo_toml.read_text(errors='ignore').lower()
|
|
366
|
+
rust_frameworks = {
|
|
367
|
+
"actix": "Actix", "axum": "Axum", "tokio": "Tokio",
|
|
368
|
+
"rocket": "Rocket", "warp": "Warp",
|
|
369
|
+
}
|
|
370
|
+
for key, name in rust_frameworks.items():
|
|
371
|
+
if key in content and name not in frameworks:
|
|
372
|
+
frameworks.append(name)
|
|
373
|
+
except Exception:
|
|
374
|
+
pass
|
|
375
|
+
|
|
376
|
+
# Go frameworks
|
|
377
|
+
go_mod = path / "go.mod"
|
|
378
|
+
if go_mod.exists():
|
|
379
|
+
try:
|
|
380
|
+
content = go_mod.read_text(errors='ignore').lower()
|
|
381
|
+
go_frameworks = {
|
|
382
|
+
"gin-gonic": "Gin", "echo": "Echo", "fiber": "Fiber",
|
|
383
|
+
}
|
|
384
|
+
for key, name in go_frameworks.items():
|
|
385
|
+
if key in content and name not in frameworks:
|
|
386
|
+
frameworks.append(name)
|
|
387
|
+
except Exception:
|
|
388
|
+
pass
|
|
389
|
+
|
|
390
|
+
return frameworks if frameworks else None
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def _has_tests(path: Path) -> bool:
|
|
394
|
+
"""Check if repository has tests."""
|
|
395
|
+
test_indicators = [
|
|
396
|
+
"tests", "test", "__tests__", "spec", "specs",
|
|
397
|
+
"pytest.ini", "jest.config.js", "jest.config.ts",
|
|
398
|
+
".pytest_cache", "conftest.py",
|
|
399
|
+
]
|
|
400
|
+
for indicator in test_indicators:
|
|
401
|
+
if (path / indicator).exists():
|
|
402
|
+
return True
|
|
403
|
+
|
|
404
|
+
# Check for test files in src
|
|
405
|
+
for pattern in ["**/test_*.py", "**/*_test.py", "**/*.test.ts", "**/*.spec.ts"]:
|
|
406
|
+
if list(path.glob(pattern)):
|
|
407
|
+
return True
|
|
408
|
+
|
|
409
|
+
return False
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def _has_ci(path: Path) -> bool:
|
|
413
|
+
"""Check if repository has CI/CD configuration."""
|
|
414
|
+
ci_paths = [
|
|
415
|
+
".github/workflows",
|
|
416
|
+
".gitlab-ci.yml",
|
|
417
|
+
".circleci",
|
|
418
|
+
"Jenkinsfile",
|
|
419
|
+
".travis.yml",
|
|
420
|
+
"azure-pipelines.yml",
|
|
421
|
+
".drone.yml",
|
|
422
|
+
"bitbucket-pipelines.yml",
|
|
423
|
+
]
|
|
424
|
+
for ci_path in ci_paths:
|
|
425
|
+
if (path / ci_path).exists():
|
|
426
|
+
return True
|
|
427
|
+
return False
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def is_config_only_repo(path: Path) -> bool:
|
|
431
|
+
"""
|
|
432
|
+
Check if a repository only contains config files (dotfiles, etc).
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
path: Path to repository root
|
|
436
|
+
|
|
437
|
+
Returns:
|
|
438
|
+
True if repo appears to be config-only
|
|
439
|
+
"""
|
|
440
|
+
config_indicators = {
|
|
441
|
+
"dotfiles",
|
|
442
|
+
".dotfiles",
|
|
443
|
+
"config",
|
|
444
|
+
".config",
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
# Check repo name
|
|
448
|
+
if path.name.lower() in config_indicators:
|
|
449
|
+
return True
|
|
450
|
+
|
|
451
|
+
# Check file types
|
|
452
|
+
code_extensions = {
|
|
453
|
+
".py", ".js", ".ts", ".tsx", ".jsx",
|
|
454
|
+
".go", ".rs", ".java", ".kt", ".swift",
|
|
455
|
+
".c", ".cpp", ".h", ".hpp",
|
|
456
|
+
".rb", ".php", ".cs", ".scala",
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
has_code = False
|
|
460
|
+
try:
|
|
461
|
+
for file in path.rglob("*"):
|
|
462
|
+
if file.is_file() and file.suffix in code_extensions:
|
|
463
|
+
# Check it's not in a hidden directory
|
|
464
|
+
parts = file.relative_to(path).parts
|
|
465
|
+
if not any(p.startswith(".") for p in parts[:-1]):
|
|
466
|
+
has_code = True
|
|
467
|
+
break
|
|
468
|
+
except Exception:
|
|
469
|
+
pass
|
|
470
|
+
|
|
471
|
+
return not has_code
|
|
472
|
+
|