repr-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- repr/__init__.py +10 -0
- repr/analyzer.py +915 -0
- repr/api.py +263 -0
- repr/auth.py +300 -0
- repr/cli.py +858 -0
- repr/config.py +392 -0
- repr/discovery.py +472 -0
- repr/extractor.py +388 -0
- repr/highlights.py +712 -0
- repr/openai_analysis.py +597 -0
- repr/tools.py +446 -0
- repr/ui.py +430 -0
- repr_cli-0.1.0.dist-info/METADATA +326 -0
- repr_cli-0.1.0.dist-info/RECORD +18 -0
- repr_cli-0.1.0.dist-info/WHEEL +5 -0
- repr_cli-0.1.0.dist-info/entry_points.txt +2 -0
- repr_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- repr_cli-0.1.0.dist-info/top_level.txt +1 -0
repr/extractor.py
ADDED
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Extract signals from repositories - languages, dependencies, etc.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from collections import Counter
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from pygments.lexers import get_lexer_for_filename, ClassNotFound
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# Language detection by file extension
|
|
14
|
+
LANGUAGE_EXTENSIONS = {
|
|
15
|
+
".py": "Python",
|
|
16
|
+
".js": "JavaScript",
|
|
17
|
+
".ts": "TypeScript",
|
|
18
|
+
".tsx": "TypeScript",
|
|
19
|
+
".jsx": "JavaScript",
|
|
20
|
+
".go": "Go",
|
|
21
|
+
".rs": "Rust",
|
|
22
|
+
".java": "Java",
|
|
23
|
+
".kt": "Kotlin",
|
|
24
|
+
".swift": "Swift",
|
|
25
|
+
".c": "C",
|
|
26
|
+
".cpp": "C++",
|
|
27
|
+
".h": "C",
|
|
28
|
+
".hpp": "C++",
|
|
29
|
+
".rb": "Ruby",
|
|
30
|
+
".php": "PHP",
|
|
31
|
+
".cs": "C#",
|
|
32
|
+
".scala": "Scala",
|
|
33
|
+
".clj": "Clojure",
|
|
34
|
+
".ex": "Elixir",
|
|
35
|
+
".exs": "Elixir",
|
|
36
|
+
".erl": "Erlang",
|
|
37
|
+
".hs": "Haskell",
|
|
38
|
+
".lua": "Lua",
|
|
39
|
+
".r": "R",
|
|
40
|
+
".R": "R",
|
|
41
|
+
".jl": "Julia",
|
|
42
|
+
".dart": "Dart",
|
|
43
|
+
".vue": "Vue",
|
|
44
|
+
".svelte": "Svelte",
|
|
45
|
+
".sql": "SQL",
|
|
46
|
+
".sh": "Shell",
|
|
47
|
+
".bash": "Shell",
|
|
48
|
+
".zsh": "Shell",
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
# Files to skip when detecting languages
|
|
52
|
+
SKIP_PATTERNS = {
|
|
53
|
+
"node_modules",
|
|
54
|
+
"venv",
|
|
55
|
+
".venv",
|
|
56
|
+
"vendor",
|
|
57
|
+
"__pycache__",
|
|
58
|
+
".git",
|
|
59
|
+
"dist",
|
|
60
|
+
"build",
|
|
61
|
+
".next",
|
|
62
|
+
"target",
|
|
63
|
+
"coverage",
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def detect_languages(repo_path: Path) -> dict[str, float]:
|
|
68
|
+
"""
|
|
69
|
+
Detect languages used in a repository.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
repo_path: Path to repository
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Dictionary of language -> percentage
|
|
76
|
+
"""
|
|
77
|
+
extension_counts: Counter[str] = Counter()
|
|
78
|
+
total_files = 0
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
for file_path in repo_path.rglob("*"):
|
|
82
|
+
if not file_path.is_file():
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
# Skip certain directories
|
|
86
|
+
parts = file_path.relative_to(repo_path).parts
|
|
87
|
+
if any(skip in parts for skip in SKIP_PATTERNS):
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
# Get language from extension
|
|
91
|
+
ext = file_path.suffix.lower()
|
|
92
|
+
if ext in LANGUAGE_EXTENSIONS:
|
|
93
|
+
extension_counts[ext] += 1
|
|
94
|
+
total_files += 1
|
|
95
|
+
except Exception:
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
if total_files == 0:
|
|
99
|
+
return {}
|
|
100
|
+
|
|
101
|
+
# Convert to language percentages
|
|
102
|
+
languages: Counter[str] = Counter()
|
|
103
|
+
for ext, count in extension_counts.items():
|
|
104
|
+
language = LANGUAGE_EXTENSIONS[ext]
|
|
105
|
+
languages[language] += count
|
|
106
|
+
|
|
107
|
+
# Calculate percentages
|
|
108
|
+
result = {}
|
|
109
|
+
for language, count in languages.most_common():
|
|
110
|
+
percentage = (count / total_files) * 100
|
|
111
|
+
if percentage >= 1: # Only include if >= 1%
|
|
112
|
+
result[language] = round(percentage, 1)
|
|
113
|
+
|
|
114
|
+
return result
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def get_primary_language(repo_path: Path) -> str | None:
|
|
118
|
+
"""
|
|
119
|
+
Get the primary language of a repository.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
repo_path: Path to repository
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
Primary language name or None
|
|
126
|
+
"""
|
|
127
|
+
languages = detect_languages(repo_path)
|
|
128
|
+
if languages:
|
|
129
|
+
return max(languages, key=languages.get)
|
|
130
|
+
return None
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def detect_dependencies(repo_path: Path) -> dict[str, list[str]]:
|
|
134
|
+
"""
|
|
135
|
+
Detect dependencies from package files.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
repo_path: Path to repository
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Dictionary of ecosystem -> list of dependency names
|
|
142
|
+
"""
|
|
143
|
+
dependencies: dict[str, list[str]] = {}
|
|
144
|
+
|
|
145
|
+
# Python - requirements.txt
|
|
146
|
+
requirements_file = repo_path / "requirements.txt"
|
|
147
|
+
if requirements_file.exists():
|
|
148
|
+
deps = _parse_requirements_txt(requirements_file)
|
|
149
|
+
if deps:
|
|
150
|
+
dependencies["python"] = deps
|
|
151
|
+
|
|
152
|
+
# Python - pyproject.toml
|
|
153
|
+
pyproject_file = repo_path / "pyproject.toml"
|
|
154
|
+
if pyproject_file.exists():
|
|
155
|
+
deps = _parse_pyproject_toml(pyproject_file)
|
|
156
|
+
if deps:
|
|
157
|
+
dependencies.setdefault("python", []).extend(deps)
|
|
158
|
+
dependencies["python"] = list(set(dependencies["python"]))
|
|
159
|
+
|
|
160
|
+
# Node.js - package.json
|
|
161
|
+
package_json = repo_path / "package.json"
|
|
162
|
+
if package_json.exists():
|
|
163
|
+
deps = _parse_package_json(package_json)
|
|
164
|
+
if deps:
|
|
165
|
+
dependencies["nodejs"] = deps
|
|
166
|
+
|
|
167
|
+
# Go - go.mod
|
|
168
|
+
go_mod = repo_path / "go.mod"
|
|
169
|
+
if go_mod.exists():
|
|
170
|
+
deps = _parse_go_mod(go_mod)
|
|
171
|
+
if deps:
|
|
172
|
+
dependencies["go"] = deps
|
|
173
|
+
|
|
174
|
+
# Rust - Cargo.toml
|
|
175
|
+
cargo_toml = repo_path / "Cargo.toml"
|
|
176
|
+
if cargo_toml.exists():
|
|
177
|
+
deps = _parse_cargo_toml(cargo_toml)
|
|
178
|
+
if deps:
|
|
179
|
+
dependencies["rust"] = deps
|
|
180
|
+
|
|
181
|
+
# Ruby - Gemfile
|
|
182
|
+
gemfile = repo_path / "Gemfile"
|
|
183
|
+
if gemfile.exists():
|
|
184
|
+
deps = _parse_gemfile(gemfile)
|
|
185
|
+
if deps:
|
|
186
|
+
dependencies["ruby"] = deps
|
|
187
|
+
|
|
188
|
+
return dependencies
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _parse_requirements_txt(path: Path) -> list[str]:
|
|
192
|
+
"""Parse Python requirements.txt file."""
|
|
193
|
+
deps = []
|
|
194
|
+
try:
|
|
195
|
+
content = path.read_text()
|
|
196
|
+
for line in content.splitlines():
|
|
197
|
+
line = line.strip()
|
|
198
|
+
if not line or line.startswith("#") or line.startswith("-"):
|
|
199
|
+
continue
|
|
200
|
+
# Extract package name (before version specifier)
|
|
201
|
+
match = re.match(r"^([a-zA-Z0-9_-]+)", line)
|
|
202
|
+
if match:
|
|
203
|
+
deps.append(match.group(1).lower())
|
|
204
|
+
except Exception:
|
|
205
|
+
pass
|
|
206
|
+
return deps
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _parse_pyproject_toml(path: Path) -> list[str]:
|
|
210
|
+
"""Parse Python pyproject.toml dependencies."""
|
|
211
|
+
deps = []
|
|
212
|
+
try:
|
|
213
|
+
content = path.read_text()
|
|
214
|
+
# Simple parsing - look for dependencies array
|
|
215
|
+
in_deps = False
|
|
216
|
+
for line in content.splitlines():
|
|
217
|
+
if "dependencies" in line and "=" in line:
|
|
218
|
+
in_deps = True
|
|
219
|
+
continue
|
|
220
|
+
if in_deps:
|
|
221
|
+
if line.strip().startswith("]"):
|
|
222
|
+
in_deps = False
|
|
223
|
+
continue
|
|
224
|
+
# Extract package name
|
|
225
|
+
match = re.search(r'"([a-zA-Z0-9_-]+)', line)
|
|
226
|
+
if match:
|
|
227
|
+
deps.append(match.group(1).lower())
|
|
228
|
+
except Exception:
|
|
229
|
+
pass
|
|
230
|
+
return deps
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _parse_package_json(path: Path) -> list[str]:
|
|
234
|
+
"""Parse Node.js package.json dependencies."""
|
|
235
|
+
deps = []
|
|
236
|
+
try:
|
|
237
|
+
content = json.loads(path.read_text())
|
|
238
|
+
for key in ["dependencies", "devDependencies"]:
|
|
239
|
+
if key in content:
|
|
240
|
+
deps.extend(content[key].keys())
|
|
241
|
+
except Exception:
|
|
242
|
+
pass
|
|
243
|
+
return deps
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def _parse_go_mod(path: Path) -> list[str]:
|
|
247
|
+
"""Parse Go go.mod dependencies."""
|
|
248
|
+
deps = []
|
|
249
|
+
try:
|
|
250
|
+
content = path.read_text()
|
|
251
|
+
in_require = False
|
|
252
|
+
for line in content.splitlines():
|
|
253
|
+
line = line.strip()
|
|
254
|
+
if line.startswith("require ("):
|
|
255
|
+
in_require = True
|
|
256
|
+
continue
|
|
257
|
+
if in_require:
|
|
258
|
+
if line == ")":
|
|
259
|
+
in_require = False
|
|
260
|
+
continue
|
|
261
|
+
# Extract module path
|
|
262
|
+
parts = line.split()
|
|
263
|
+
if parts:
|
|
264
|
+
deps.append(parts[0])
|
|
265
|
+
elif line.startswith("require "):
|
|
266
|
+
parts = line.split()
|
|
267
|
+
if len(parts) >= 2:
|
|
268
|
+
deps.append(parts[1])
|
|
269
|
+
except Exception:
|
|
270
|
+
pass
|
|
271
|
+
return deps
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _parse_cargo_toml(path: Path) -> list[str]:
|
|
275
|
+
"""Parse Rust Cargo.toml dependencies."""
|
|
276
|
+
deps = []
|
|
277
|
+
try:
|
|
278
|
+
content = path.read_text()
|
|
279
|
+
in_deps = False
|
|
280
|
+
for line in content.splitlines():
|
|
281
|
+
line = line.strip()
|
|
282
|
+
if line == "[dependencies]" or line == "[dev-dependencies]":
|
|
283
|
+
in_deps = True
|
|
284
|
+
continue
|
|
285
|
+
if line.startswith("[") and in_deps:
|
|
286
|
+
in_deps = False
|
|
287
|
+
continue
|
|
288
|
+
if in_deps and "=" in line:
|
|
289
|
+
name = line.split("=")[0].strip()
|
|
290
|
+
if name and not name.startswith("#"):
|
|
291
|
+
deps.append(name)
|
|
292
|
+
except Exception:
|
|
293
|
+
pass
|
|
294
|
+
return deps
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def _parse_gemfile(path: Path) -> list[str]:
|
|
298
|
+
"""Parse Ruby Gemfile dependencies."""
|
|
299
|
+
deps = []
|
|
300
|
+
try:
|
|
301
|
+
content = path.read_text()
|
|
302
|
+
for line in content.splitlines():
|
|
303
|
+
line = line.strip()
|
|
304
|
+
if line.startswith("gem "):
|
|
305
|
+
# Extract gem name
|
|
306
|
+
match = re.search(r"gem ['\"]([^'\"]+)['\"]", line)
|
|
307
|
+
if match:
|
|
308
|
+
deps.append(match.group(1))
|
|
309
|
+
except Exception:
|
|
310
|
+
pass
|
|
311
|
+
return deps
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def get_file_tree(repo_path: Path, max_depth: int = 3) -> dict:
|
|
315
|
+
"""
|
|
316
|
+
Get the file tree structure of a repository.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
repo_path: Path to repository
|
|
320
|
+
max_depth: Maximum depth to traverse
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
Nested dictionary representing file tree
|
|
324
|
+
"""
|
|
325
|
+
def build_tree(path: Path, depth: int = 0) -> dict | str:
|
|
326
|
+
if depth > max_depth:
|
|
327
|
+
return "..."
|
|
328
|
+
|
|
329
|
+
if path.is_file():
|
|
330
|
+
return path.name
|
|
331
|
+
|
|
332
|
+
result = {}
|
|
333
|
+
try:
|
|
334
|
+
for item in sorted(path.iterdir()):
|
|
335
|
+
# Skip hidden and common skip patterns
|
|
336
|
+
if item.name.startswith(".") or item.name in SKIP_PATTERNS:
|
|
337
|
+
continue
|
|
338
|
+
|
|
339
|
+
if item.is_dir():
|
|
340
|
+
subtree = build_tree(item, depth + 1)
|
|
341
|
+
if subtree: # Only include non-empty directories
|
|
342
|
+
result[item.name + "/"] = subtree
|
|
343
|
+
else:
|
|
344
|
+
result[item.name] = None
|
|
345
|
+
except PermissionError:
|
|
346
|
+
pass
|
|
347
|
+
|
|
348
|
+
return result
|
|
349
|
+
|
|
350
|
+
return build_tree(repo_path)
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def get_file_tree_flat(repo_path: Path, max_depth: int = 3) -> list[str]:
|
|
354
|
+
"""
|
|
355
|
+
Get a flat list of file paths in the repository.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
repo_path: Path to repository
|
|
359
|
+
max_depth: Maximum depth to traverse
|
|
360
|
+
|
|
361
|
+
Returns:
|
|
362
|
+
List of relative file paths
|
|
363
|
+
"""
|
|
364
|
+
files = []
|
|
365
|
+
|
|
366
|
+
def walk(path: Path, depth: int = 0) -> None:
|
|
367
|
+
if depth > max_depth:
|
|
368
|
+
return
|
|
369
|
+
|
|
370
|
+
try:
|
|
371
|
+
for item in sorted(path.iterdir()):
|
|
372
|
+
# Skip hidden and common skip patterns
|
|
373
|
+
if item.name.startswith(".") or item.name in SKIP_PATTERNS:
|
|
374
|
+
continue
|
|
375
|
+
|
|
376
|
+
rel_path = str(item.relative_to(repo_path))
|
|
377
|
+
|
|
378
|
+
if item.is_dir():
|
|
379
|
+
files.append(rel_path + "/")
|
|
380
|
+
walk(item, depth + 1)
|
|
381
|
+
else:
|
|
382
|
+
files.append(rel_path)
|
|
383
|
+
except PermissionError:
|
|
384
|
+
pass
|
|
385
|
+
|
|
386
|
+
walk(repo_path)
|
|
387
|
+
return files
|
|
388
|
+
|