repr-cli 0.1.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
repr/analyzer.py DELETED
@@ -1,915 +0,0 @@
1
- """
2
- Deep code analysis module.
3
-
4
- Performs sophisticated analysis of repositories beyond basic stats:
5
- - Code complexity metrics
6
- - Architecture pattern detection
7
- - Framework/library detection from imports
8
- - Code quality indicators
9
- - Test coverage indicators
10
- - Documentation analysis
11
- - API surface detection
12
- """
13
-
14
- import ast
15
- import json
16
- import re
17
- from collections import Counter, defaultdict
18
- from dataclasses import dataclass, field
19
- from pathlib import Path
20
- from typing import Any
21
-
22
-
23
- # ============================================================================
24
- # Data Classes
25
- # ============================================================================
26
-
27
- @dataclass
28
- class CodeMetrics:
29
- """Metrics for a single file."""
30
- path: str
31
- language: str
32
- lines_total: int = 0
33
- lines_code: int = 0
34
- lines_comment: int = 0
35
- lines_blank: int = 0
36
- functions: int = 0
37
- classes: int = 0
38
- imports: list[str] = field(default_factory=list)
39
- complexity: int = 0 # Cyclomatic complexity estimate
40
- avg_function_length: float = 0.0
41
- has_tests: bool = False
42
- has_docstrings: bool = False
43
-
44
-
45
- @dataclass
46
- class RepoAnalysis:
47
- """Complete analysis of a repository."""
48
- # Basic metrics
49
- total_files: int = 0
50
- total_lines: int = 0
51
- code_lines: int = 0
52
- comment_lines: int = 0
53
-
54
- # Languages
55
- languages: dict[str, float] = field(default_factory=dict)
56
-
57
- # Architecture
58
- architecture_patterns: list[str] = field(default_factory=list)
59
- project_type: str = "" # web-app, cli, library, api, etc.
60
-
61
- # Frameworks & libraries
62
- frameworks: list[str] = field(default_factory=list)
63
- notable_libraries: list[str] = field(default_factory=list)
64
-
65
- # Code quality
66
- avg_file_size: float = 0.0
67
- avg_function_length: float = 0.0
68
- avg_complexity: float = 0.0
69
- docstring_coverage: float = 0.0
70
-
71
- # Testing
72
- has_tests: bool = False
73
- test_files: int = 0
74
- test_ratio: float = 0.0 # test files / source files
75
- test_frameworks: list[str] = field(default_factory=list)
76
-
77
- # Documentation
78
- has_readme: bool = False
79
- readme_quality: str = "" # minimal, basic, good, excellent
80
- has_docs_folder: bool = False
81
- has_changelog: bool = False
82
- has_contributing: bool = False
83
-
84
- # API/Routes
85
- api_endpoints: list[dict] = field(default_factory=list)
86
-
87
- # Key files
88
- key_files: list[str] = field(default_factory=list)
89
- entry_points: list[str] = field(default_factory=list)
90
-
91
- # Imports analysis
92
- most_used_imports: list[tuple[str, int]] = field(default_factory=list)
93
-
94
- # File metrics (detailed)
95
- file_metrics: list[CodeMetrics] = field(default_factory=list)
96
-
97
-
98
- # ============================================================================
99
- # Framework Detection
100
- # ============================================================================
101
-
102
- FRAMEWORK_PATTERNS = {
103
- # Python Web
104
- "Django": [r"from django", r"import django", r"INSTALLED_APPS", r"urlpatterns"],
105
- "Flask": [r"from flask", r"import flask", r"Flask\(__name__\)"],
106
- "FastAPI": [r"from fastapi", r"import fastapi", r"FastAPI\(\)"],
107
- "Starlette": [r"from starlette", r"import starlette"],
108
- "Tornado": [r"from tornado", r"import tornado"],
109
-
110
- # Python Data/ML
111
- "NumPy": [r"import numpy", r"from numpy"],
112
- "Pandas": [r"import pandas", r"from pandas"],
113
- "PyTorch": [r"import torch", r"from torch"],
114
- "TensorFlow": [r"import tensorflow", r"from tensorflow"],
115
- "Scikit-learn": [r"from sklearn", r"import sklearn"],
116
- "Keras": [r"from keras", r"import keras"],
117
-
118
- # Python Testing
119
- "pytest": [r"import pytest", r"from pytest"],
120
- "unittest": [r"import unittest", r"from unittest"],
121
-
122
- # JavaScript/TypeScript
123
- "React": [r"from ['\"]react['\"]", r"import React", r"useState", r"useEffect"],
124
- "Vue": [r"from ['\"]vue['\"]", r"createApp", r"<template>"],
125
- "Angular": [r"@angular/core", r"@Component", r"@Injectable"],
126
- "Svelte": [r"<script.*>", r"\.svelte$"],
127
- "Next.js": [r"from ['\"]next", r"getServerSideProps", r"getStaticProps"],
128
- "Express": [r"from ['\"]express['\"]", r"require\(['\"]express['\"]"],
129
- "NestJS": [r"@nestjs/", r"@Controller", r"@Injectable"],
130
-
131
- # JavaScript Testing
132
- "Jest": [r"from ['\"]jest['\"]", r"describe\(", r"it\(", r"expect\("],
133
- "Mocha": [r"from ['\"]mocha['\"]", r"describe\(", r"it\("],
134
- "Vitest": [r"from ['\"]vitest['\"]"],
135
-
136
- # Go
137
- "Gin": [r"github.com/gin-gonic/gin"],
138
- "Echo": [r"github.com/labstack/echo"],
139
- "Fiber": [r"github.com/gofiber/fiber"],
140
-
141
- # Rust
142
- "Actix": [r"actix-web", r"actix_web"],
143
- "Rocket": [r"rocket::"],
144
- "Axum": [r"axum::"],
145
-
146
- # Ruby
147
- "Rails": [r"Rails\.", r"ActiveRecord", r"ActionController"],
148
- "Sinatra": [r"require ['\"]sinatra['\"]", r"Sinatra::"],
149
-
150
- # Database
151
- "SQLAlchemy": [r"from sqlalchemy", r"import sqlalchemy"],
152
- "Prisma": [r"@prisma/client", r"prisma\."],
153
- "TypeORM": [r"from ['\"]typeorm['\"]"],
154
- "Sequelize": [r"from ['\"]sequelize['\"]"],
155
- "Mongoose": [r"from ['\"]mongoose['\"]"],
156
- }
157
-
158
- PROJECT_TYPE_INDICATORS = {
159
- "web-app": ["src/pages", "src/views", "src/components", "templates/", "static/"],
160
- "api": ["routes/", "endpoints/", "api/", "controllers/", "handlers/"],
161
- "cli": ["cli.py", "main.py", "__main__.py", "bin/", "cmd/"],
162
- "library": ["src/lib/", "lib/", "setup.py", "pyproject.toml", "package.json"],
163
- "mobile": ["ios/", "android/", "App.tsx", "App.js"],
164
- "ml-project": ["notebooks/", "models/", "data/", "train.py", "model.py"],
165
- "monorepo": ["packages/", "apps/", "lerna.json", "pnpm-workspace.yaml"],
166
- }
167
-
168
- ARCHITECTURE_PATTERNS = {
169
- "mvc": ["models/", "views/", "controllers/"],
170
- "clean-architecture": ["domain/", "usecases/", "infrastructure/", "presentation/"],
171
- "hexagonal": ["adapters/", "ports/", "core/"],
172
- "layered": ["services/", "repositories/", "controllers/"],
173
- "microservices": ["docker-compose", "kubernetes", "k8s/"],
174
- "serverless": ["serverless.yml", "lambda/", "functions/"],
175
- }
176
-
177
-
178
- # ============================================================================
179
- # Analysis Functions
180
- # ============================================================================
181
-
182
- def analyze_repository(repo_path: Path) -> RepoAnalysis:
183
- """
184
- Perform deep analysis of a repository.
185
-
186
- Args:
187
- repo_path: Path to the repository
188
-
189
- Returns:
190
- RepoAnalysis with comprehensive metrics
191
- """
192
- analysis = RepoAnalysis()
193
-
194
- # Collect all source files
195
- source_files = _collect_source_files(repo_path)
196
-
197
- # Analyze each file
198
- all_imports: Counter[str] = Counter()
199
- total_functions = 0
200
- total_complexity = 0
201
- files_with_docstrings = 0
202
-
203
- for file_path in source_files:
204
- metrics = _analyze_file(repo_path, file_path)
205
- if metrics:
206
- analysis.file_metrics.append(metrics)
207
- analysis.total_files += 1
208
- analysis.total_lines += metrics.lines_total
209
- analysis.code_lines += metrics.lines_code
210
- analysis.comment_lines += metrics.lines_comment
211
-
212
- for imp in metrics.imports:
213
- all_imports[imp] += 1
214
-
215
- total_functions += metrics.functions
216
- total_complexity += metrics.complexity
217
-
218
- if metrics.has_docstrings:
219
- files_with_docstrings += 1
220
-
221
- if metrics.has_tests:
222
- analysis.test_files += 1
223
-
224
- # Calculate averages
225
- if analysis.total_files > 0:
226
- analysis.avg_file_size = analysis.code_lines / analysis.total_files
227
- analysis.docstring_coverage = (files_with_docstrings / analysis.total_files) * 100
228
-
229
- if total_functions > 0:
230
- analysis.avg_complexity = total_complexity / total_functions
231
-
232
- # Language breakdown
233
- analysis.languages = _calculate_language_breakdown(analysis.file_metrics)
234
-
235
- # Detect frameworks
236
- analysis.frameworks = _detect_frameworks(repo_path, all_imports)
237
- analysis.notable_libraries = _detect_notable_libraries(all_imports)
238
-
239
- # Detect project type and architecture
240
- analysis.project_type = _detect_project_type(repo_path)
241
- analysis.architecture_patterns = _detect_architecture(repo_path)
242
-
243
- # Testing analysis
244
- analysis.has_tests = analysis.test_files > 0
245
- source_files_count = analysis.total_files - analysis.test_files
246
- if source_files_count > 0:
247
- analysis.test_ratio = analysis.test_files / source_files_count
248
- analysis.test_frameworks = _detect_test_frameworks(analysis.frameworks, all_imports)
249
-
250
- # Documentation analysis
251
- analysis.has_readme = _check_readme(repo_path)
252
- analysis.readme_quality = _assess_readme_quality(repo_path)
253
- analysis.has_docs_folder = (repo_path / "docs").is_dir()
254
- analysis.has_changelog = any(
255
- (repo_path / name).exists()
256
- for name in ["CHANGELOG.md", "CHANGELOG", "HISTORY.md", "CHANGES.md"]
257
- )
258
- analysis.has_contributing = (repo_path / "CONTRIBUTING.md").exists()
259
-
260
- # API endpoints
261
- analysis.api_endpoints = _detect_api_endpoints(repo_path, analysis.frameworks)
262
-
263
- # Key files
264
- analysis.key_files = _identify_key_files(repo_path)
265
- analysis.entry_points = _identify_entry_points(repo_path)
266
-
267
- # Most used imports
268
- analysis.most_used_imports = all_imports.most_common(20)
269
-
270
- return analysis
271
-
272
-
273
- def _collect_source_files(repo_path: Path) -> list[Path]:
274
- """Collect all source code files."""
275
- skip_dirs = {
276
- ".git", "node_modules", "venv", ".venv", "__pycache__",
277
- "dist", "build", ".next", "target", "coverage", ".tox",
278
- "eggs", "*.egg-info", ".mypy_cache", ".pytest_cache",
279
- }
280
-
281
- source_extensions = {
282
- ".py", ".js", ".ts", ".tsx", ".jsx", ".go", ".rs",
283
- ".java", ".kt", ".swift", ".c", ".cpp", ".h", ".hpp",
284
- ".rb", ".php", ".cs", ".scala", ".ex", ".exs",
285
- }
286
-
287
- files = []
288
-
289
- for file_path in repo_path.rglob("*"):
290
- if not file_path.is_file():
291
- continue
292
-
293
- # Skip directories
294
- parts = file_path.relative_to(repo_path).parts
295
- if any(skip in parts for skip in skip_dirs):
296
- continue
297
-
298
- if file_path.suffix.lower() in source_extensions:
299
- files.append(file_path)
300
-
301
- return files
302
-
303
-
304
- def _analyze_file(repo_path: Path, file_path: Path) -> CodeMetrics | None:
305
- """Analyze a single source file."""
306
- try:
307
- content = file_path.read_text(errors="ignore")
308
- except Exception:
309
- return None
310
-
311
- rel_path = str(file_path.relative_to(repo_path))
312
- language = _get_language(file_path)
313
-
314
- lines = content.split("\n")
315
- metrics = CodeMetrics(
316
- path=rel_path,
317
- language=language,
318
- lines_total=len(lines),
319
- )
320
-
321
- # Count line types
322
- in_multiline_comment = False
323
- in_multiline_string = False
324
-
325
- for line in lines:
326
- stripped = line.strip()
327
-
328
- if not stripped:
329
- metrics.lines_blank += 1
330
- elif _is_comment_line(stripped, language, in_multiline_comment):
331
- metrics.lines_comment += 1
332
- else:
333
- metrics.lines_code += 1
334
-
335
- # Language-specific analysis
336
- if language == "Python":
337
- metrics = _analyze_python_file(content, metrics)
338
- elif language in ("JavaScript", "TypeScript"):
339
- metrics = _analyze_js_file(content, metrics)
340
- elif language == "Go":
341
- metrics = _analyze_go_file(content, metrics)
342
-
343
- # Check if it's a test file
344
- metrics.has_tests = _is_test_file(rel_path, content)
345
-
346
- return metrics
347
-
348
-
349
- def _analyze_python_file(content: str, metrics: CodeMetrics) -> CodeMetrics:
350
- """Deep analysis of Python file using AST."""
351
- try:
352
- tree = ast.parse(content)
353
- except SyntaxError:
354
- return metrics
355
-
356
- function_lengths = []
357
-
358
- for node in ast.walk(tree):
359
- # Count functions
360
- if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
361
- metrics.functions += 1
362
- # Estimate function length
363
- if hasattr(node, "end_lineno") and hasattr(node, "lineno"):
364
- length = node.end_lineno - node.lineno + 1
365
- function_lengths.append(length)
366
-
367
- # Check for docstring
368
- if (node.body and isinstance(node.body[0], ast.Expr) and
369
- isinstance(node.body[0].value, ast.Constant) and
370
- isinstance(node.body[0].value.value, str)):
371
- metrics.has_docstrings = True
372
-
373
- # Estimate complexity (simplified)
374
- for child in ast.walk(node):
375
- if isinstance(child, (ast.If, ast.For, ast.While, ast.ExceptHandler,
376
- ast.With, ast.comprehension)):
377
- metrics.complexity += 1
378
-
379
- # Count classes
380
- if isinstance(node, ast.ClassDef):
381
- metrics.classes += 1
382
- # Check for class docstring
383
- if (node.body and isinstance(node.body[0], ast.Expr) and
384
- isinstance(node.body[0].value, ast.Constant) and
385
- isinstance(node.body[0].value.value, str)):
386
- metrics.has_docstrings = True
387
-
388
- # Extract imports
389
- if isinstance(node, ast.Import):
390
- for alias in node.names:
391
- metrics.imports.append(alias.name.split(".")[0])
392
- elif isinstance(node, ast.ImportFrom):
393
- if node.module:
394
- metrics.imports.append(node.module.split(".")[0])
395
-
396
- if function_lengths:
397
- metrics.avg_function_length = sum(function_lengths) / len(function_lengths)
398
-
399
- return metrics
400
-
401
-
402
- def _analyze_js_file(content: str, metrics: CodeMetrics) -> CodeMetrics:
403
- """Analyze JavaScript/TypeScript file using regex."""
404
- # Count functions (approximate)
405
- function_patterns = [
406
- r"function\s+\w+\s*\(",
407
- r"const\s+\w+\s*=\s*(?:async\s*)?\(",
408
- r"(?:async\s+)?function\s*\(",
409
- r"\w+\s*:\s*(?:async\s*)?\(",
410
- r"=>\s*{",
411
- ]
412
- for pattern in function_patterns:
413
- metrics.functions += len(re.findall(pattern, content))
414
-
415
- # Count classes
416
- metrics.classes = len(re.findall(r"class\s+\w+", content))
417
-
418
- # Extract imports
419
- import_patterns = [
420
- r"import\s+.*?\s+from\s+['\"]([^'\"]+)['\"]",
421
- r"require\(['\"]([^'\"]+)['\"]\)",
422
- ]
423
- for pattern in import_patterns:
424
- for match in re.findall(pattern, content):
425
- # Clean up import path
426
- module = match.split("/")[0].replace("@", "")
427
- if module and not module.startswith("."):
428
- metrics.imports.append(module)
429
-
430
- # Estimate complexity (if/for/while/switch)
431
- complexity_patterns = [r"\bif\s*\(", r"\bfor\s*\(", r"\bwhile\s*\(", r"\bswitch\s*\("]
432
- for pattern in complexity_patterns:
433
- metrics.complexity += len(re.findall(pattern, content))
434
-
435
- return metrics
436
-
437
-
438
- def _analyze_go_file(content: str, metrics: CodeMetrics) -> CodeMetrics:
439
- """Analyze Go file using regex."""
440
- # Count functions
441
- metrics.functions = len(re.findall(r"func\s+(?:\([^)]+\)\s*)?\w+\s*\(", content))
442
-
443
- # Count structs (Go's classes)
444
- metrics.classes = len(re.findall(r"type\s+\w+\s+struct", content))
445
-
446
- # Extract imports
447
- import_match = re.search(r"import\s*\((.*?)\)", content, re.DOTALL)
448
- if import_match:
449
- for line in import_match.group(1).split("\n"):
450
- match = re.search(r"\"([^\"]+)\"", line)
451
- if match:
452
- pkg = match.group(1).split("/")[-1]
453
- metrics.imports.append(pkg)
454
-
455
- # Single imports
456
- for match in re.findall(r"import\s+\"([^\"]+)\"", content):
457
- pkg = match.split("/")[-1]
458
- metrics.imports.append(pkg)
459
-
460
- # Complexity
461
- metrics.complexity = len(re.findall(r"\bif\s+", content))
462
- metrics.complexity += len(re.findall(r"\bfor\s+", content))
463
- metrics.complexity += len(re.findall(r"\bswitch\s+", content))
464
-
465
- return metrics
466
-
467
-
468
- def _get_language(file_path: Path) -> str:
469
- """Get language from file extension."""
470
- ext_map = {
471
- ".py": "Python",
472
- ".js": "JavaScript",
473
- ".ts": "TypeScript",
474
- ".tsx": "TypeScript",
475
- ".jsx": "JavaScript",
476
- ".go": "Go",
477
- ".rs": "Rust",
478
- ".java": "Java",
479
- ".kt": "Kotlin",
480
- ".swift": "Swift",
481
- ".c": "C",
482
- ".cpp": "C++",
483
- ".h": "C",
484
- ".hpp": "C++",
485
- ".rb": "Ruby",
486
- ".php": "PHP",
487
- ".cs": "C#",
488
- }
489
- return ext_map.get(file_path.suffix.lower(), "Unknown")
490
-
491
-
492
- def _is_comment_line(line: str, language: str, in_multiline: bool) -> bool:
493
- """Check if a line is a comment."""
494
- if language == "Python":
495
- return line.startswith("#") or line.startswith('"""') or line.startswith("'''")
496
- elif language in ("JavaScript", "TypeScript", "Java", "Go", "Rust", "C", "C++"):
497
- return line.startswith("//") or line.startswith("/*") or line.startswith("*")
498
- elif language == "Ruby":
499
- return line.startswith("#")
500
- return False
501
-
502
-
503
- def _is_test_file(path: str, content: str) -> bool:
504
- """Check if file is a test file."""
505
- path_lower = path.lower()
506
-
507
- # Check path patterns
508
- test_patterns = ["test_", "_test.", ".test.", "spec.", "_spec.", "/tests/", "/test/", "/__tests__/"]
509
- if any(p in path_lower for p in test_patterns):
510
- return True
511
-
512
- # Check content patterns
513
- test_content_patterns = [
514
- r"def test_\w+",
515
- r"@pytest",
516
- r"unittest\.TestCase",
517
- r"describe\(['\"]",
518
- r"it\(['\"]",
519
- r"expect\(",
520
- r"assert\s+",
521
- ]
522
- for pattern in test_content_patterns:
523
- if re.search(pattern, content):
524
- return True
525
-
526
- return False
527
-
528
-
529
- def _calculate_language_breakdown(metrics: list[CodeMetrics]) -> dict[str, float]:
530
- """Calculate language percentage breakdown."""
531
- lang_lines: Counter[str] = Counter()
532
- total_lines = 0
533
-
534
- for m in metrics:
535
- if m.language != "Unknown":
536
- lang_lines[m.language] += m.lines_code
537
- total_lines += m.lines_code
538
-
539
- if total_lines == 0:
540
- return {}
541
-
542
- return {
543
- lang: round((lines / total_lines) * 100, 1)
544
- for lang, lines in lang_lines.most_common()
545
- if (lines / total_lines) >= 0.01 # At least 1%
546
- }
547
-
548
-
549
- def _detect_frameworks(repo_path: Path, all_imports: Counter) -> list[str]:
550
- """Detect frameworks used in the repository."""
551
- detected = []
552
-
553
- # Check file contents for framework patterns
554
- source_files = list(repo_path.rglob("*.py")) + list(repo_path.rglob("*.js")) + \
555
- list(repo_path.rglob("*.ts")) + list(repo_path.rglob("*.tsx"))
556
-
557
- sample_content = ""
558
- for f in source_files[:50]: # Sample first 50 files
559
- try:
560
- sample_content += f.read_text(errors="ignore")
561
- except Exception:
562
- pass
563
-
564
- # Also check package files
565
- for pf in ["package.json", "requirements.txt", "pyproject.toml", "Cargo.toml", "go.mod"]:
566
- pkg_file = repo_path / pf
567
- if pkg_file.exists():
568
- try:
569
- sample_content += pkg_file.read_text()
570
- except Exception:
571
- pass
572
-
573
- for framework, patterns in FRAMEWORK_PATTERNS.items():
574
- for pattern in patterns:
575
- if re.search(pattern, sample_content, re.IGNORECASE):
576
- if framework not in detected:
577
- detected.append(framework)
578
- break
579
-
580
- return detected
581
-
582
-
583
- def _detect_notable_libraries(all_imports: Counter) -> list[str]:
584
- """Detect notable libraries from imports."""
585
- notable = {
586
- # Python
587
- "requests": "HTTP client",
588
- "aiohttp": "Async HTTP",
589
- "celery": "Task queue",
590
- "redis": "Redis client",
591
- "pydantic": "Data validation",
592
- "sqlalchemy": "ORM",
593
- "alembic": "Migrations",
594
- "boto3": "AWS SDK",
595
- "opencv": "Computer vision",
596
- "pillow": "Image processing",
597
- "matplotlib": "Plotting",
598
- "seaborn": "Statistical viz",
599
- # JavaScript
600
- "axios": "HTTP client",
601
- "lodash": "Utilities",
602
- "moment": "Date handling",
603
- "dayjs": "Date handling",
604
- "rxjs": "Reactive programming",
605
- "socket.io": "WebSockets",
606
- "graphql": "GraphQL",
607
- "apollo": "GraphQL client",
608
- "redux": "State management",
609
- "zustand": "State management",
610
- "tanstack": "Data fetching",
611
- "tailwindcss": "CSS framework",
612
- }
613
-
614
- detected = []
615
- for imp, count in all_imports.items():
616
- imp_lower = imp.lower()
617
- for lib, desc in notable.items():
618
- if lib in imp_lower and lib not in detected:
619
- detected.append(lib)
620
-
621
- return detected[:15] # Top 15
622
-
623
-
624
- def _detect_project_type(repo_path: Path) -> str:
625
- """Detect the type of project."""
626
- files_and_dirs = set()
627
-
628
- for p in repo_path.iterdir():
629
- files_and_dirs.add(p.name)
630
-
631
- for p in repo_path.rglob("*"):
632
- try:
633
- rel = str(p.relative_to(repo_path))
634
- files_and_dirs.add(rel)
635
- except Exception:
636
- pass
637
-
638
- scores: Counter[str] = Counter()
639
-
640
- for project_type, indicators in PROJECT_TYPE_INDICATORS.items():
641
- for indicator in indicators:
642
- for item in files_and_dirs:
643
- if indicator.lower() in item.lower():
644
- scores[project_type] += 1
645
-
646
- if scores:
647
- return scores.most_common(1)[0][0]
648
- return "unknown"
649
-
650
-
651
- def _detect_architecture(repo_path: Path) -> list[str]:
652
- """Detect architecture patterns."""
653
- detected = []
654
-
655
- dirs = set()
656
- for p in repo_path.rglob("*"):
657
- if p.is_dir():
658
- dirs.add(p.name.lower() + "/")
659
-
660
- for pattern_name, indicators in ARCHITECTURE_PATTERNS.items():
661
- matches = sum(1 for ind in indicators if any(ind.lower() in d for d in dirs))
662
- if matches >= len(indicators) * 0.5: # At least 50% match
663
- detected.append(pattern_name)
664
-
665
- return detected
666
-
667
-
668
- def _detect_test_frameworks(frameworks: list[str], imports: Counter) -> list[str]:
669
- """Detect testing frameworks."""
670
- test_frameworks = ["pytest", "unittest", "Jest", "Mocha", "Vitest"]
671
- return [f for f in test_frameworks if f in frameworks]
672
-
673
-
674
- def _check_readme(repo_path: Path) -> bool:
675
- """Check if repository has a README."""
676
- readme_names = ["README.md", "README.rst", "README.txt", "README"]
677
- return any((repo_path / name).exists() for name in readme_names)
678
-
679
-
680
- def _assess_readme_quality(repo_path: Path) -> str:
681
- """Assess README quality."""
682
- readme_names = ["README.md", "README.rst", "README.txt", "README"]
683
-
684
- for name in readme_names:
685
- readme_path = repo_path / name
686
- if readme_path.exists():
687
- try:
688
- content = readme_path.read_text()
689
- lines = len(content.split("\n"))
690
-
691
- # Check for common sections
692
- has_install = bool(re.search(r"(?i)install|setup|getting started", content))
693
- has_usage = bool(re.search(r"(?i)usage|example|how to", content))
694
- has_api = bool(re.search(r"(?i)api|reference|documentation", content))
695
- has_license = bool(re.search(r"(?i)license|licence", content))
696
- has_badges = bool(re.search(r"\[!\[", content)) # Markdown badges
697
-
698
- score = sum([has_install, has_usage, has_api, has_license, has_badges])
699
-
700
- if lines < 20:
701
- return "minimal"
702
- elif score >= 4 and lines > 100:
703
- return "excellent"
704
- elif score >= 2 and lines > 50:
705
- return "good"
706
- else:
707
- return "basic"
708
- except Exception:
709
- pass
710
-
711
- return "none"
712
-
713
-
714
- def _detect_api_endpoints(repo_path: Path, frameworks: list[str]) -> list[dict]:
715
- """Detect API endpoints/routes."""
716
- endpoints = []
717
-
718
- # FastAPI patterns
719
- fastapi_patterns = [
720
- (r"@(?:app|router)\.(get|post|put|patch|delete)\(['\"]([^'\"]+)['\"]", "FastAPI"),
721
- ]
722
-
723
- # Flask patterns
724
- flask_patterns = [
725
- (r"@(?:app|blueprint)\.(route|get|post|put|delete)\(['\"]([^'\"]+)['\"]", "Flask"),
726
- ]
727
-
728
- # Express patterns
729
- express_patterns = [
730
- (r"(?:app|router)\.(get|post|put|patch|delete)\(['\"]([^'\"]+)['\"]", "Express"),
731
- ]
732
-
733
- patterns = []
734
- if "FastAPI" in frameworks:
735
- patterns.extend(fastapi_patterns)
736
- if "Flask" in frameworks:
737
- patterns.extend(flask_patterns)
738
- if "Express" in frameworks or "NestJS" in frameworks:
739
- patterns.extend(express_patterns)
740
-
741
- # If no framework detected, try all patterns
742
- if not patterns:
743
- patterns = fastapi_patterns + flask_patterns + express_patterns
744
-
745
- for file_path in list(repo_path.rglob("*.py")) + list(repo_path.rglob("*.js")) + list(repo_path.rglob("*.ts")):
746
- try:
747
- content = file_path.read_text(errors="ignore")
748
- for pattern, framework in patterns:
749
- for match in re.findall(pattern, content, re.IGNORECASE):
750
- if isinstance(match, tuple):
751
- method, path = match
752
- else:
753
- method, path = "GET", match
754
-
755
- endpoints.append({
756
- "method": method.upper(),
757
- "path": path,
758
- "file": str(file_path.relative_to(repo_path)),
759
- })
760
- except Exception:
761
- pass
762
-
763
- # Deduplicate and limit
764
- seen = set()
765
- unique_endpoints = []
766
- for ep in endpoints:
767
- key = (ep["method"], ep["path"])
768
- if key not in seen:
769
- seen.add(key)
770
- unique_endpoints.append(ep)
771
-
772
- return unique_endpoints[:50] # Limit to 50
773
-
774
-
775
- def _identify_key_files(repo_path: Path) -> list[str]:
776
- """Identify key files in the repository."""
777
- key_patterns = [
778
- "main.py", "app.py", "index.py", "server.py", "cli.py",
779
- "main.js", "index.js", "app.js", "server.js",
780
- "main.ts", "index.ts", "app.ts", "server.ts",
781
- "main.go", "main.rs",
782
- "Dockerfile", "docker-compose.yml", "docker-compose.yaml",
783
- "Makefile", "justfile",
784
- ".env.example", ".env.sample",
785
- "setup.py", "pyproject.toml", "package.json", "Cargo.toml", "go.mod",
786
- ]
787
-
788
- found = []
789
- for pattern in key_patterns:
790
- for file_path in repo_path.rglob(pattern):
791
- rel_path = str(file_path.relative_to(repo_path))
792
- if rel_path not in found:
793
- found.append(rel_path)
794
-
795
- return found[:20]
796
-
797
-
798
- def _identify_entry_points(repo_path: Path) -> list[str]:
799
- """Identify likely entry points."""
800
- entry_patterns = [
801
- "main.py", "__main__.py", "app.py", "cli.py", "server.py", "run.py",
802
- "index.js", "main.js", "app.js", "server.js",
803
- "index.ts", "main.ts", "app.ts", "server.ts",
804
- "main.go", "cmd/main.go",
805
- "main.rs", "src/main.rs",
806
- ]
807
-
808
- found = []
809
- for pattern in entry_patterns:
810
- for file_path in repo_path.rglob(pattern):
811
- rel_path = str(file_path.relative_to(repo_path))
812
- # Prefer root-level entry points
813
- if "/" not in rel_path or rel_path.startswith("src/") or rel_path.startswith("cmd/"):
814
- found.append(rel_path)
815
-
816
- return found[:5]
817
-
818
-
819
- # ============================================================================
820
- # Public API
821
- # ============================================================================
822
-
823
- def analyze_repo(repo_path: Path) -> dict[str, Any]:
824
- """
825
- Analyze a repository and return results as a dictionary.
826
-
827
- This is the main entry point for the analysis tool.
828
- """
829
- analysis = analyze_repository(repo_path)
830
-
831
- return {
832
- "summary": {
833
- "total_files": analysis.total_files,
834
- "total_lines": analysis.total_lines,
835
- "code_lines": analysis.code_lines,
836
- "comment_lines": analysis.comment_lines,
837
- "project_type": analysis.project_type,
838
- },
839
- "languages": analysis.languages,
840
- "frameworks": analysis.frameworks,
841
- "notable_libraries": analysis.notable_libraries,
842
- "architecture": analysis.architecture_patterns,
843
- "quality": {
844
- "avg_file_size_lines": round(analysis.avg_file_size, 1),
845
- "avg_complexity": round(analysis.avg_complexity, 2),
846
- "docstring_coverage_pct": round(analysis.docstring_coverage, 1),
847
- },
848
- "testing": {
849
- "has_tests": analysis.has_tests,
850
- "test_files": analysis.test_files,
851
- "test_ratio": round(analysis.test_ratio, 2),
852
- "test_frameworks": analysis.test_frameworks,
853
- },
854
- "documentation": {
855
- "has_readme": analysis.has_readme,
856
- "readme_quality": analysis.readme_quality,
857
- "has_docs_folder": analysis.has_docs_folder,
858
- "has_changelog": analysis.has_changelog,
859
- "has_contributing": analysis.has_contributing,
860
- },
861
- "api_endpoints": analysis.api_endpoints[:20], # Limit for readability
862
- "key_files": analysis.key_files,
863
- "entry_points": analysis.entry_points,
864
- "most_used_imports": [
865
- {"name": name, "count": count}
866
- for name, count in analysis.most_used_imports[:15]
867
- ],
868
- }
869
-
870
-
871
- def get_code_snippets(
872
- repo_path: Path,
873
- max_snippets: int = 5,
874
- max_lines: int = 50,
875
- ) -> list[dict[str, Any]]:
876
- """
877
- Extract interesting code snippets from a repository.
878
-
879
- Looks for:
880
- - Main entry points
881
- - Key functions/classes
882
- - Interesting patterns
883
- """
884
- snippets = []
885
-
886
- # Find interesting files
887
- interesting_files = [
888
- "main.py", "app.py", "cli.py", "server.py",
889
- "models.py", "schema.py", "routes.py", "handlers.py",
890
- "index.ts", "App.tsx", "main.ts",
891
- ]
892
-
893
- for pattern in interesting_files:
894
- for file_path in repo_path.rglob(pattern):
895
- if len(snippets) >= max_snippets:
896
- break
897
-
898
- try:
899
- content = file_path.read_text(errors="ignore")
900
- lines = content.split("\n")
901
-
902
- # Take first N lines or find an interesting section
903
- snippet_lines = lines[:max_lines]
904
-
905
- snippets.append({
906
- "path": str(file_path.relative_to(repo_path)),
907
- "content": "\n".join(snippet_lines),
908
- "total_lines": len(lines),
909
- "truncated": len(lines) > max_lines,
910
- })
911
- except Exception:
912
- pass
913
-
914
- return snippets
915
-