repr-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- repr/__init__.py +10 -0
- repr/analyzer.py +915 -0
- repr/api.py +263 -0
- repr/auth.py +300 -0
- repr/cli.py +858 -0
- repr/config.py +392 -0
- repr/discovery.py +472 -0
- repr/extractor.py +388 -0
- repr/highlights.py +712 -0
- repr/openai_analysis.py +597 -0
- repr/tools.py +446 -0
- repr/ui.py +430 -0
- repr_cli-0.1.0.dist-info/METADATA +326 -0
- repr_cli-0.1.0.dist-info/RECORD +18 -0
- repr_cli-0.1.0.dist-info/WHEEL +5 -0
- repr_cli-0.1.0.dist-info/entry_points.txt +2 -0
- repr_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- repr_cli-0.1.0.dist-info/top_level.txt +1 -0
repr/analyzer.py
ADDED
|
@@ -0,0 +1,915 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Deep code analysis module.
|
|
3
|
+
|
|
4
|
+
Performs sophisticated analysis of repositories beyond basic stats:
|
|
5
|
+
- Code complexity metrics
|
|
6
|
+
- Architecture pattern detection
|
|
7
|
+
- Framework/library detection from imports
|
|
8
|
+
- Code quality indicators
|
|
9
|
+
- Test coverage indicators
|
|
10
|
+
- Documentation analysis
|
|
11
|
+
- API surface detection
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import ast
|
|
15
|
+
import json
|
|
16
|
+
import re
|
|
17
|
+
from collections import Counter, defaultdict
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# ============================================================================
|
|
24
|
+
# Data Classes
|
|
25
|
+
# ============================================================================
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class CodeMetrics:
|
|
29
|
+
"""Metrics for a single file."""
|
|
30
|
+
path: str
|
|
31
|
+
language: str
|
|
32
|
+
lines_total: int = 0
|
|
33
|
+
lines_code: int = 0
|
|
34
|
+
lines_comment: int = 0
|
|
35
|
+
lines_blank: int = 0
|
|
36
|
+
functions: int = 0
|
|
37
|
+
classes: int = 0
|
|
38
|
+
imports: list[str] = field(default_factory=list)
|
|
39
|
+
complexity: int = 0 # Cyclomatic complexity estimate
|
|
40
|
+
avg_function_length: float = 0.0
|
|
41
|
+
has_tests: bool = False
|
|
42
|
+
has_docstrings: bool = False
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class RepoAnalysis:
|
|
47
|
+
"""Complete analysis of a repository."""
|
|
48
|
+
# Basic metrics
|
|
49
|
+
total_files: int = 0
|
|
50
|
+
total_lines: int = 0
|
|
51
|
+
code_lines: int = 0
|
|
52
|
+
comment_lines: int = 0
|
|
53
|
+
|
|
54
|
+
# Languages
|
|
55
|
+
languages: dict[str, float] = field(default_factory=dict)
|
|
56
|
+
|
|
57
|
+
# Architecture
|
|
58
|
+
architecture_patterns: list[str] = field(default_factory=list)
|
|
59
|
+
project_type: str = "" # web-app, cli, library, api, etc.
|
|
60
|
+
|
|
61
|
+
# Frameworks & libraries
|
|
62
|
+
frameworks: list[str] = field(default_factory=list)
|
|
63
|
+
notable_libraries: list[str] = field(default_factory=list)
|
|
64
|
+
|
|
65
|
+
# Code quality
|
|
66
|
+
avg_file_size: float = 0.0
|
|
67
|
+
avg_function_length: float = 0.0
|
|
68
|
+
avg_complexity: float = 0.0
|
|
69
|
+
docstring_coverage: float = 0.0
|
|
70
|
+
|
|
71
|
+
# Testing
|
|
72
|
+
has_tests: bool = False
|
|
73
|
+
test_files: int = 0
|
|
74
|
+
test_ratio: float = 0.0 # test files / source files
|
|
75
|
+
test_frameworks: list[str] = field(default_factory=list)
|
|
76
|
+
|
|
77
|
+
# Documentation
|
|
78
|
+
has_readme: bool = False
|
|
79
|
+
readme_quality: str = "" # minimal, basic, good, excellent
|
|
80
|
+
has_docs_folder: bool = False
|
|
81
|
+
has_changelog: bool = False
|
|
82
|
+
has_contributing: bool = False
|
|
83
|
+
|
|
84
|
+
# API/Routes
|
|
85
|
+
api_endpoints: list[dict] = field(default_factory=list)
|
|
86
|
+
|
|
87
|
+
# Key files
|
|
88
|
+
key_files: list[str] = field(default_factory=list)
|
|
89
|
+
entry_points: list[str] = field(default_factory=list)
|
|
90
|
+
|
|
91
|
+
# Imports analysis
|
|
92
|
+
most_used_imports: list[tuple[str, int]] = field(default_factory=list)
|
|
93
|
+
|
|
94
|
+
# File metrics (detailed)
|
|
95
|
+
file_metrics: list[CodeMetrics] = field(default_factory=list)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# ============================================================================
|
|
99
|
+
# Framework Detection
|
|
100
|
+
# ============================================================================
|
|
101
|
+
|
|
102
|
+
FRAMEWORK_PATTERNS = {
|
|
103
|
+
# Python Web
|
|
104
|
+
"Django": [r"from django", r"import django", r"INSTALLED_APPS", r"urlpatterns"],
|
|
105
|
+
"Flask": [r"from flask", r"import flask", r"Flask\(__name__\)"],
|
|
106
|
+
"FastAPI": [r"from fastapi", r"import fastapi", r"FastAPI\(\)"],
|
|
107
|
+
"Starlette": [r"from starlette", r"import starlette"],
|
|
108
|
+
"Tornado": [r"from tornado", r"import tornado"],
|
|
109
|
+
|
|
110
|
+
# Python Data/ML
|
|
111
|
+
"NumPy": [r"import numpy", r"from numpy"],
|
|
112
|
+
"Pandas": [r"import pandas", r"from pandas"],
|
|
113
|
+
"PyTorch": [r"import torch", r"from torch"],
|
|
114
|
+
"TensorFlow": [r"import tensorflow", r"from tensorflow"],
|
|
115
|
+
"Scikit-learn": [r"from sklearn", r"import sklearn"],
|
|
116
|
+
"Keras": [r"from keras", r"import keras"],
|
|
117
|
+
|
|
118
|
+
# Python Testing
|
|
119
|
+
"pytest": [r"import pytest", r"from pytest"],
|
|
120
|
+
"unittest": [r"import unittest", r"from unittest"],
|
|
121
|
+
|
|
122
|
+
# JavaScript/TypeScript
|
|
123
|
+
"React": [r"from ['\"]react['\"]", r"import React", r"useState", r"useEffect"],
|
|
124
|
+
"Vue": [r"from ['\"]vue['\"]", r"createApp", r"<template>"],
|
|
125
|
+
"Angular": [r"@angular/core", r"@Component", r"@Injectable"],
|
|
126
|
+
"Svelte": [r"<script.*>", r"\.svelte$"],
|
|
127
|
+
"Next.js": [r"from ['\"]next", r"getServerSideProps", r"getStaticProps"],
|
|
128
|
+
"Express": [r"from ['\"]express['\"]", r"require\(['\"]express['\"]"],
|
|
129
|
+
"NestJS": [r"@nestjs/", r"@Controller", r"@Injectable"],
|
|
130
|
+
|
|
131
|
+
# JavaScript Testing
|
|
132
|
+
"Jest": [r"from ['\"]jest['\"]", r"describe\(", r"it\(", r"expect\("],
|
|
133
|
+
"Mocha": [r"from ['\"]mocha['\"]", r"describe\(", r"it\("],
|
|
134
|
+
"Vitest": [r"from ['\"]vitest['\"]"],
|
|
135
|
+
|
|
136
|
+
# Go
|
|
137
|
+
"Gin": [r"github.com/gin-gonic/gin"],
|
|
138
|
+
"Echo": [r"github.com/labstack/echo"],
|
|
139
|
+
"Fiber": [r"github.com/gofiber/fiber"],
|
|
140
|
+
|
|
141
|
+
# Rust
|
|
142
|
+
"Actix": [r"actix-web", r"actix_web"],
|
|
143
|
+
"Rocket": [r"rocket::"],
|
|
144
|
+
"Axum": [r"axum::"],
|
|
145
|
+
|
|
146
|
+
# Ruby
|
|
147
|
+
"Rails": [r"Rails\.", r"ActiveRecord", r"ActionController"],
|
|
148
|
+
"Sinatra": [r"require ['\"]sinatra['\"]", r"Sinatra::"],
|
|
149
|
+
|
|
150
|
+
# Database
|
|
151
|
+
"SQLAlchemy": [r"from sqlalchemy", r"import sqlalchemy"],
|
|
152
|
+
"Prisma": [r"@prisma/client", r"prisma\."],
|
|
153
|
+
"TypeORM": [r"from ['\"]typeorm['\"]"],
|
|
154
|
+
"Sequelize": [r"from ['\"]sequelize['\"]"],
|
|
155
|
+
"Mongoose": [r"from ['\"]mongoose['\"]"],
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
PROJECT_TYPE_INDICATORS = {
|
|
159
|
+
"web-app": ["src/pages", "src/views", "src/components", "templates/", "static/"],
|
|
160
|
+
"api": ["routes/", "endpoints/", "api/", "controllers/", "handlers/"],
|
|
161
|
+
"cli": ["cli.py", "main.py", "__main__.py", "bin/", "cmd/"],
|
|
162
|
+
"library": ["src/lib/", "lib/", "setup.py", "pyproject.toml", "package.json"],
|
|
163
|
+
"mobile": ["ios/", "android/", "App.tsx", "App.js"],
|
|
164
|
+
"ml-project": ["notebooks/", "models/", "data/", "train.py", "model.py"],
|
|
165
|
+
"monorepo": ["packages/", "apps/", "lerna.json", "pnpm-workspace.yaml"],
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
ARCHITECTURE_PATTERNS = {
|
|
169
|
+
"mvc": ["models/", "views/", "controllers/"],
|
|
170
|
+
"clean-architecture": ["domain/", "usecases/", "infrastructure/", "presentation/"],
|
|
171
|
+
"hexagonal": ["adapters/", "ports/", "core/"],
|
|
172
|
+
"layered": ["services/", "repositories/", "controllers/"],
|
|
173
|
+
"microservices": ["docker-compose", "kubernetes", "k8s/"],
|
|
174
|
+
"serverless": ["serverless.yml", "lambda/", "functions/"],
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
# ============================================================================
|
|
179
|
+
# Analysis Functions
|
|
180
|
+
# ============================================================================
|
|
181
|
+
|
|
182
|
+
def analyze_repository(repo_path: Path) -> RepoAnalysis:
|
|
183
|
+
"""
|
|
184
|
+
Perform deep analysis of a repository.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
repo_path: Path to the repository
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
RepoAnalysis with comprehensive metrics
|
|
191
|
+
"""
|
|
192
|
+
analysis = RepoAnalysis()
|
|
193
|
+
|
|
194
|
+
# Collect all source files
|
|
195
|
+
source_files = _collect_source_files(repo_path)
|
|
196
|
+
|
|
197
|
+
# Analyze each file
|
|
198
|
+
all_imports: Counter[str] = Counter()
|
|
199
|
+
total_functions = 0
|
|
200
|
+
total_complexity = 0
|
|
201
|
+
files_with_docstrings = 0
|
|
202
|
+
|
|
203
|
+
for file_path in source_files:
|
|
204
|
+
metrics = _analyze_file(repo_path, file_path)
|
|
205
|
+
if metrics:
|
|
206
|
+
analysis.file_metrics.append(metrics)
|
|
207
|
+
analysis.total_files += 1
|
|
208
|
+
analysis.total_lines += metrics.lines_total
|
|
209
|
+
analysis.code_lines += metrics.lines_code
|
|
210
|
+
analysis.comment_lines += metrics.lines_comment
|
|
211
|
+
|
|
212
|
+
for imp in metrics.imports:
|
|
213
|
+
all_imports[imp] += 1
|
|
214
|
+
|
|
215
|
+
total_functions += metrics.functions
|
|
216
|
+
total_complexity += metrics.complexity
|
|
217
|
+
|
|
218
|
+
if metrics.has_docstrings:
|
|
219
|
+
files_with_docstrings += 1
|
|
220
|
+
|
|
221
|
+
if metrics.has_tests:
|
|
222
|
+
analysis.test_files += 1
|
|
223
|
+
|
|
224
|
+
# Calculate averages
|
|
225
|
+
if analysis.total_files > 0:
|
|
226
|
+
analysis.avg_file_size = analysis.code_lines / analysis.total_files
|
|
227
|
+
analysis.docstring_coverage = (files_with_docstrings / analysis.total_files) * 100
|
|
228
|
+
|
|
229
|
+
if total_functions > 0:
|
|
230
|
+
analysis.avg_complexity = total_complexity / total_functions
|
|
231
|
+
|
|
232
|
+
# Language breakdown
|
|
233
|
+
analysis.languages = _calculate_language_breakdown(analysis.file_metrics)
|
|
234
|
+
|
|
235
|
+
# Detect frameworks
|
|
236
|
+
analysis.frameworks = _detect_frameworks(repo_path, all_imports)
|
|
237
|
+
analysis.notable_libraries = _detect_notable_libraries(all_imports)
|
|
238
|
+
|
|
239
|
+
# Detect project type and architecture
|
|
240
|
+
analysis.project_type = _detect_project_type(repo_path)
|
|
241
|
+
analysis.architecture_patterns = _detect_architecture(repo_path)
|
|
242
|
+
|
|
243
|
+
# Testing analysis
|
|
244
|
+
analysis.has_tests = analysis.test_files > 0
|
|
245
|
+
source_files_count = analysis.total_files - analysis.test_files
|
|
246
|
+
if source_files_count > 0:
|
|
247
|
+
analysis.test_ratio = analysis.test_files / source_files_count
|
|
248
|
+
analysis.test_frameworks = _detect_test_frameworks(analysis.frameworks, all_imports)
|
|
249
|
+
|
|
250
|
+
# Documentation analysis
|
|
251
|
+
analysis.has_readme = _check_readme(repo_path)
|
|
252
|
+
analysis.readme_quality = _assess_readme_quality(repo_path)
|
|
253
|
+
analysis.has_docs_folder = (repo_path / "docs").is_dir()
|
|
254
|
+
analysis.has_changelog = any(
|
|
255
|
+
(repo_path / name).exists()
|
|
256
|
+
for name in ["CHANGELOG.md", "CHANGELOG", "HISTORY.md", "CHANGES.md"]
|
|
257
|
+
)
|
|
258
|
+
analysis.has_contributing = (repo_path / "CONTRIBUTING.md").exists()
|
|
259
|
+
|
|
260
|
+
# API endpoints
|
|
261
|
+
analysis.api_endpoints = _detect_api_endpoints(repo_path, analysis.frameworks)
|
|
262
|
+
|
|
263
|
+
# Key files
|
|
264
|
+
analysis.key_files = _identify_key_files(repo_path)
|
|
265
|
+
analysis.entry_points = _identify_entry_points(repo_path)
|
|
266
|
+
|
|
267
|
+
# Most used imports
|
|
268
|
+
analysis.most_used_imports = all_imports.most_common(20)
|
|
269
|
+
|
|
270
|
+
return analysis
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _collect_source_files(repo_path: Path) -> list[Path]:
|
|
274
|
+
"""Collect all source code files."""
|
|
275
|
+
skip_dirs = {
|
|
276
|
+
".git", "node_modules", "venv", ".venv", "__pycache__",
|
|
277
|
+
"dist", "build", ".next", "target", "coverage", ".tox",
|
|
278
|
+
"eggs", "*.egg-info", ".mypy_cache", ".pytest_cache",
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
source_extensions = {
|
|
282
|
+
".py", ".js", ".ts", ".tsx", ".jsx", ".go", ".rs",
|
|
283
|
+
".java", ".kt", ".swift", ".c", ".cpp", ".h", ".hpp",
|
|
284
|
+
".rb", ".php", ".cs", ".scala", ".ex", ".exs",
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
files = []
|
|
288
|
+
|
|
289
|
+
for file_path in repo_path.rglob("*"):
|
|
290
|
+
if not file_path.is_file():
|
|
291
|
+
continue
|
|
292
|
+
|
|
293
|
+
# Skip directories
|
|
294
|
+
parts = file_path.relative_to(repo_path).parts
|
|
295
|
+
if any(skip in parts for skip in skip_dirs):
|
|
296
|
+
continue
|
|
297
|
+
|
|
298
|
+
if file_path.suffix.lower() in source_extensions:
|
|
299
|
+
files.append(file_path)
|
|
300
|
+
|
|
301
|
+
return files
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def _analyze_file(repo_path: Path, file_path: Path) -> CodeMetrics | None:
|
|
305
|
+
"""Analyze a single source file."""
|
|
306
|
+
try:
|
|
307
|
+
content = file_path.read_text(errors="ignore")
|
|
308
|
+
except Exception:
|
|
309
|
+
return None
|
|
310
|
+
|
|
311
|
+
rel_path = str(file_path.relative_to(repo_path))
|
|
312
|
+
language = _get_language(file_path)
|
|
313
|
+
|
|
314
|
+
lines = content.split("\n")
|
|
315
|
+
metrics = CodeMetrics(
|
|
316
|
+
path=rel_path,
|
|
317
|
+
language=language,
|
|
318
|
+
lines_total=len(lines),
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
# Count line types
|
|
322
|
+
in_multiline_comment = False
|
|
323
|
+
in_multiline_string = False
|
|
324
|
+
|
|
325
|
+
for line in lines:
|
|
326
|
+
stripped = line.strip()
|
|
327
|
+
|
|
328
|
+
if not stripped:
|
|
329
|
+
metrics.lines_blank += 1
|
|
330
|
+
elif _is_comment_line(stripped, language, in_multiline_comment):
|
|
331
|
+
metrics.lines_comment += 1
|
|
332
|
+
else:
|
|
333
|
+
metrics.lines_code += 1
|
|
334
|
+
|
|
335
|
+
# Language-specific analysis
|
|
336
|
+
if language == "Python":
|
|
337
|
+
metrics = _analyze_python_file(content, metrics)
|
|
338
|
+
elif language in ("JavaScript", "TypeScript"):
|
|
339
|
+
metrics = _analyze_js_file(content, metrics)
|
|
340
|
+
elif language == "Go":
|
|
341
|
+
metrics = _analyze_go_file(content, metrics)
|
|
342
|
+
|
|
343
|
+
# Check if it's a test file
|
|
344
|
+
metrics.has_tests = _is_test_file(rel_path, content)
|
|
345
|
+
|
|
346
|
+
return metrics
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def _analyze_python_file(content: str, metrics: CodeMetrics) -> CodeMetrics:
|
|
350
|
+
"""Deep analysis of Python file using AST."""
|
|
351
|
+
try:
|
|
352
|
+
tree = ast.parse(content)
|
|
353
|
+
except SyntaxError:
|
|
354
|
+
return metrics
|
|
355
|
+
|
|
356
|
+
function_lengths = []
|
|
357
|
+
|
|
358
|
+
for node in ast.walk(tree):
|
|
359
|
+
# Count functions
|
|
360
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
361
|
+
metrics.functions += 1
|
|
362
|
+
# Estimate function length
|
|
363
|
+
if hasattr(node, "end_lineno") and hasattr(node, "lineno"):
|
|
364
|
+
length = node.end_lineno - node.lineno + 1
|
|
365
|
+
function_lengths.append(length)
|
|
366
|
+
|
|
367
|
+
# Check for docstring
|
|
368
|
+
if (node.body and isinstance(node.body[0], ast.Expr) and
|
|
369
|
+
isinstance(node.body[0].value, ast.Constant) and
|
|
370
|
+
isinstance(node.body[0].value.value, str)):
|
|
371
|
+
metrics.has_docstrings = True
|
|
372
|
+
|
|
373
|
+
# Estimate complexity (simplified)
|
|
374
|
+
for child in ast.walk(node):
|
|
375
|
+
if isinstance(child, (ast.If, ast.For, ast.While, ast.ExceptHandler,
|
|
376
|
+
ast.With, ast.comprehension)):
|
|
377
|
+
metrics.complexity += 1
|
|
378
|
+
|
|
379
|
+
# Count classes
|
|
380
|
+
if isinstance(node, ast.ClassDef):
|
|
381
|
+
metrics.classes += 1
|
|
382
|
+
# Check for class docstring
|
|
383
|
+
if (node.body and isinstance(node.body[0], ast.Expr) and
|
|
384
|
+
isinstance(node.body[0].value, ast.Constant) and
|
|
385
|
+
isinstance(node.body[0].value.value, str)):
|
|
386
|
+
metrics.has_docstrings = True
|
|
387
|
+
|
|
388
|
+
# Extract imports
|
|
389
|
+
if isinstance(node, ast.Import):
|
|
390
|
+
for alias in node.names:
|
|
391
|
+
metrics.imports.append(alias.name.split(".")[0])
|
|
392
|
+
elif isinstance(node, ast.ImportFrom):
|
|
393
|
+
if node.module:
|
|
394
|
+
metrics.imports.append(node.module.split(".")[0])
|
|
395
|
+
|
|
396
|
+
if function_lengths:
|
|
397
|
+
metrics.avg_function_length = sum(function_lengths) / len(function_lengths)
|
|
398
|
+
|
|
399
|
+
return metrics
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def _analyze_js_file(content: str, metrics: CodeMetrics) -> CodeMetrics:
|
|
403
|
+
"""Analyze JavaScript/TypeScript file using regex."""
|
|
404
|
+
# Count functions (approximate)
|
|
405
|
+
function_patterns = [
|
|
406
|
+
r"function\s+\w+\s*\(",
|
|
407
|
+
r"const\s+\w+\s*=\s*(?:async\s*)?\(",
|
|
408
|
+
r"(?:async\s+)?function\s*\(",
|
|
409
|
+
r"\w+\s*:\s*(?:async\s*)?\(",
|
|
410
|
+
r"=>\s*{",
|
|
411
|
+
]
|
|
412
|
+
for pattern in function_patterns:
|
|
413
|
+
metrics.functions += len(re.findall(pattern, content))
|
|
414
|
+
|
|
415
|
+
# Count classes
|
|
416
|
+
metrics.classes = len(re.findall(r"class\s+\w+", content))
|
|
417
|
+
|
|
418
|
+
# Extract imports
|
|
419
|
+
import_patterns = [
|
|
420
|
+
r"import\s+.*?\s+from\s+['\"]([^'\"]+)['\"]",
|
|
421
|
+
r"require\(['\"]([^'\"]+)['\"]\)",
|
|
422
|
+
]
|
|
423
|
+
for pattern in import_patterns:
|
|
424
|
+
for match in re.findall(pattern, content):
|
|
425
|
+
# Clean up import path
|
|
426
|
+
module = match.split("/")[0].replace("@", "")
|
|
427
|
+
if module and not module.startswith("."):
|
|
428
|
+
metrics.imports.append(module)
|
|
429
|
+
|
|
430
|
+
# Estimate complexity (if/for/while/switch)
|
|
431
|
+
complexity_patterns = [r"\bif\s*\(", r"\bfor\s*\(", r"\bwhile\s*\(", r"\bswitch\s*\("]
|
|
432
|
+
for pattern in complexity_patterns:
|
|
433
|
+
metrics.complexity += len(re.findall(pattern, content))
|
|
434
|
+
|
|
435
|
+
return metrics
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def _analyze_go_file(content: str, metrics: CodeMetrics) -> CodeMetrics:
|
|
439
|
+
"""Analyze Go file using regex."""
|
|
440
|
+
# Count functions
|
|
441
|
+
metrics.functions = len(re.findall(r"func\s+(?:\([^)]+\)\s*)?\w+\s*\(", content))
|
|
442
|
+
|
|
443
|
+
# Count structs (Go's classes)
|
|
444
|
+
metrics.classes = len(re.findall(r"type\s+\w+\s+struct", content))
|
|
445
|
+
|
|
446
|
+
# Extract imports
|
|
447
|
+
import_match = re.search(r"import\s*\((.*?)\)", content, re.DOTALL)
|
|
448
|
+
if import_match:
|
|
449
|
+
for line in import_match.group(1).split("\n"):
|
|
450
|
+
match = re.search(r"\"([^\"]+)\"", line)
|
|
451
|
+
if match:
|
|
452
|
+
pkg = match.group(1).split("/")[-1]
|
|
453
|
+
metrics.imports.append(pkg)
|
|
454
|
+
|
|
455
|
+
# Single imports
|
|
456
|
+
for match in re.findall(r"import\s+\"([^\"]+)\"", content):
|
|
457
|
+
pkg = match.split("/")[-1]
|
|
458
|
+
metrics.imports.append(pkg)
|
|
459
|
+
|
|
460
|
+
# Complexity
|
|
461
|
+
metrics.complexity = len(re.findall(r"\bif\s+", content))
|
|
462
|
+
metrics.complexity += len(re.findall(r"\bfor\s+", content))
|
|
463
|
+
metrics.complexity += len(re.findall(r"\bswitch\s+", content))
|
|
464
|
+
|
|
465
|
+
return metrics
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def _get_language(file_path: Path) -> str:
|
|
469
|
+
"""Get language from file extension."""
|
|
470
|
+
ext_map = {
|
|
471
|
+
".py": "Python",
|
|
472
|
+
".js": "JavaScript",
|
|
473
|
+
".ts": "TypeScript",
|
|
474
|
+
".tsx": "TypeScript",
|
|
475
|
+
".jsx": "JavaScript",
|
|
476
|
+
".go": "Go",
|
|
477
|
+
".rs": "Rust",
|
|
478
|
+
".java": "Java",
|
|
479
|
+
".kt": "Kotlin",
|
|
480
|
+
".swift": "Swift",
|
|
481
|
+
".c": "C",
|
|
482
|
+
".cpp": "C++",
|
|
483
|
+
".h": "C",
|
|
484
|
+
".hpp": "C++",
|
|
485
|
+
".rb": "Ruby",
|
|
486
|
+
".php": "PHP",
|
|
487
|
+
".cs": "C#",
|
|
488
|
+
}
|
|
489
|
+
return ext_map.get(file_path.suffix.lower(), "Unknown")
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
def _is_comment_line(line: str, language: str, in_multiline: bool) -> bool:
|
|
493
|
+
"""Check if a line is a comment."""
|
|
494
|
+
if language == "Python":
|
|
495
|
+
return line.startswith("#") or line.startswith('"""') or line.startswith("'''")
|
|
496
|
+
elif language in ("JavaScript", "TypeScript", "Java", "Go", "Rust", "C", "C++"):
|
|
497
|
+
return line.startswith("//") or line.startswith("/*") or line.startswith("*")
|
|
498
|
+
elif language == "Ruby":
|
|
499
|
+
return line.startswith("#")
|
|
500
|
+
return False
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def _is_test_file(path: str, content: str) -> bool:
|
|
504
|
+
"""Check if file is a test file."""
|
|
505
|
+
path_lower = path.lower()
|
|
506
|
+
|
|
507
|
+
# Check path patterns
|
|
508
|
+
test_patterns = ["test_", "_test.", ".test.", "spec.", "_spec.", "/tests/", "/test/", "/__tests__/"]
|
|
509
|
+
if any(p in path_lower for p in test_patterns):
|
|
510
|
+
return True
|
|
511
|
+
|
|
512
|
+
# Check content patterns
|
|
513
|
+
test_content_patterns = [
|
|
514
|
+
r"def test_\w+",
|
|
515
|
+
r"@pytest",
|
|
516
|
+
r"unittest\.TestCase",
|
|
517
|
+
r"describe\(['\"]",
|
|
518
|
+
r"it\(['\"]",
|
|
519
|
+
r"expect\(",
|
|
520
|
+
r"assert\s+",
|
|
521
|
+
]
|
|
522
|
+
for pattern in test_content_patterns:
|
|
523
|
+
if re.search(pattern, content):
|
|
524
|
+
return True
|
|
525
|
+
|
|
526
|
+
return False
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
def _calculate_language_breakdown(metrics: list[CodeMetrics]) -> dict[str, float]:
|
|
530
|
+
"""Calculate language percentage breakdown."""
|
|
531
|
+
lang_lines: Counter[str] = Counter()
|
|
532
|
+
total_lines = 0
|
|
533
|
+
|
|
534
|
+
for m in metrics:
|
|
535
|
+
if m.language != "Unknown":
|
|
536
|
+
lang_lines[m.language] += m.lines_code
|
|
537
|
+
total_lines += m.lines_code
|
|
538
|
+
|
|
539
|
+
if total_lines == 0:
|
|
540
|
+
return {}
|
|
541
|
+
|
|
542
|
+
return {
|
|
543
|
+
lang: round((lines / total_lines) * 100, 1)
|
|
544
|
+
for lang, lines in lang_lines.most_common()
|
|
545
|
+
if (lines / total_lines) >= 0.01 # At least 1%
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def _detect_frameworks(repo_path: Path, all_imports: Counter) -> list[str]:
|
|
550
|
+
"""Detect frameworks used in the repository."""
|
|
551
|
+
detected = []
|
|
552
|
+
|
|
553
|
+
# Check file contents for framework patterns
|
|
554
|
+
source_files = list(repo_path.rglob("*.py")) + list(repo_path.rglob("*.js")) + \
|
|
555
|
+
list(repo_path.rglob("*.ts")) + list(repo_path.rglob("*.tsx"))
|
|
556
|
+
|
|
557
|
+
sample_content = ""
|
|
558
|
+
for f in source_files[:50]: # Sample first 50 files
|
|
559
|
+
try:
|
|
560
|
+
sample_content += f.read_text(errors="ignore")
|
|
561
|
+
except Exception:
|
|
562
|
+
pass
|
|
563
|
+
|
|
564
|
+
# Also check package files
|
|
565
|
+
for pf in ["package.json", "requirements.txt", "pyproject.toml", "Cargo.toml", "go.mod"]:
|
|
566
|
+
pkg_file = repo_path / pf
|
|
567
|
+
if pkg_file.exists():
|
|
568
|
+
try:
|
|
569
|
+
sample_content += pkg_file.read_text()
|
|
570
|
+
except Exception:
|
|
571
|
+
pass
|
|
572
|
+
|
|
573
|
+
for framework, patterns in FRAMEWORK_PATTERNS.items():
|
|
574
|
+
for pattern in patterns:
|
|
575
|
+
if re.search(pattern, sample_content, re.IGNORECASE):
|
|
576
|
+
if framework not in detected:
|
|
577
|
+
detected.append(framework)
|
|
578
|
+
break
|
|
579
|
+
|
|
580
|
+
return detected
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
def _detect_notable_libraries(all_imports: Counter) -> list[str]:
|
|
584
|
+
"""Detect notable libraries from imports."""
|
|
585
|
+
notable = {
|
|
586
|
+
# Python
|
|
587
|
+
"requests": "HTTP client",
|
|
588
|
+
"aiohttp": "Async HTTP",
|
|
589
|
+
"celery": "Task queue",
|
|
590
|
+
"redis": "Redis client",
|
|
591
|
+
"pydantic": "Data validation",
|
|
592
|
+
"sqlalchemy": "ORM",
|
|
593
|
+
"alembic": "Migrations",
|
|
594
|
+
"boto3": "AWS SDK",
|
|
595
|
+
"opencv": "Computer vision",
|
|
596
|
+
"pillow": "Image processing",
|
|
597
|
+
"matplotlib": "Plotting",
|
|
598
|
+
"seaborn": "Statistical viz",
|
|
599
|
+
# JavaScript
|
|
600
|
+
"axios": "HTTP client",
|
|
601
|
+
"lodash": "Utilities",
|
|
602
|
+
"moment": "Date handling",
|
|
603
|
+
"dayjs": "Date handling",
|
|
604
|
+
"rxjs": "Reactive programming",
|
|
605
|
+
"socket.io": "WebSockets",
|
|
606
|
+
"graphql": "GraphQL",
|
|
607
|
+
"apollo": "GraphQL client",
|
|
608
|
+
"redux": "State management",
|
|
609
|
+
"zustand": "State management",
|
|
610
|
+
"tanstack": "Data fetching",
|
|
611
|
+
"tailwindcss": "CSS framework",
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
detected = []
|
|
615
|
+
for imp, count in all_imports.items():
|
|
616
|
+
imp_lower = imp.lower()
|
|
617
|
+
for lib, desc in notable.items():
|
|
618
|
+
if lib in imp_lower and lib not in detected:
|
|
619
|
+
detected.append(lib)
|
|
620
|
+
|
|
621
|
+
return detected[:15] # Top 15
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
def _detect_project_type(repo_path: Path) -> str:
|
|
625
|
+
"""Detect the type of project."""
|
|
626
|
+
files_and_dirs = set()
|
|
627
|
+
|
|
628
|
+
for p in repo_path.iterdir():
|
|
629
|
+
files_and_dirs.add(p.name)
|
|
630
|
+
|
|
631
|
+
for p in repo_path.rglob("*"):
|
|
632
|
+
try:
|
|
633
|
+
rel = str(p.relative_to(repo_path))
|
|
634
|
+
files_and_dirs.add(rel)
|
|
635
|
+
except Exception:
|
|
636
|
+
pass
|
|
637
|
+
|
|
638
|
+
scores: Counter[str] = Counter()
|
|
639
|
+
|
|
640
|
+
for project_type, indicators in PROJECT_TYPE_INDICATORS.items():
|
|
641
|
+
for indicator in indicators:
|
|
642
|
+
for item in files_and_dirs:
|
|
643
|
+
if indicator.lower() in item.lower():
|
|
644
|
+
scores[project_type] += 1
|
|
645
|
+
|
|
646
|
+
if scores:
|
|
647
|
+
return scores.most_common(1)[0][0]
|
|
648
|
+
return "unknown"
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
def _detect_architecture(repo_path: Path) -> list[str]:
|
|
652
|
+
"""Detect architecture patterns."""
|
|
653
|
+
detected = []
|
|
654
|
+
|
|
655
|
+
dirs = set()
|
|
656
|
+
for p in repo_path.rglob("*"):
|
|
657
|
+
if p.is_dir():
|
|
658
|
+
dirs.add(p.name.lower() + "/")
|
|
659
|
+
|
|
660
|
+
for pattern_name, indicators in ARCHITECTURE_PATTERNS.items():
|
|
661
|
+
matches = sum(1 for ind in indicators if any(ind.lower() in d for d in dirs))
|
|
662
|
+
if matches >= len(indicators) * 0.5: # At least 50% match
|
|
663
|
+
detected.append(pattern_name)
|
|
664
|
+
|
|
665
|
+
return detected
|
|
666
|
+
|
|
667
|
+
|
|
668
|
+
def _detect_test_frameworks(frameworks: list[str], imports: Counter) -> list[str]:
|
|
669
|
+
"""Detect testing frameworks."""
|
|
670
|
+
test_frameworks = ["pytest", "unittest", "Jest", "Mocha", "Vitest"]
|
|
671
|
+
return [f for f in test_frameworks if f in frameworks]
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
def _check_readme(repo_path: Path) -> bool:
|
|
675
|
+
"""Check if repository has a README."""
|
|
676
|
+
readme_names = ["README.md", "README.rst", "README.txt", "README"]
|
|
677
|
+
return any((repo_path / name).exists() for name in readme_names)
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
def _assess_readme_quality(repo_path: Path) -> str:
|
|
681
|
+
"""Assess README quality."""
|
|
682
|
+
readme_names = ["README.md", "README.rst", "README.txt", "README"]
|
|
683
|
+
|
|
684
|
+
for name in readme_names:
|
|
685
|
+
readme_path = repo_path / name
|
|
686
|
+
if readme_path.exists():
|
|
687
|
+
try:
|
|
688
|
+
content = readme_path.read_text()
|
|
689
|
+
lines = len(content.split("\n"))
|
|
690
|
+
|
|
691
|
+
# Check for common sections
|
|
692
|
+
has_install = bool(re.search(r"(?i)install|setup|getting started", content))
|
|
693
|
+
has_usage = bool(re.search(r"(?i)usage|example|how to", content))
|
|
694
|
+
has_api = bool(re.search(r"(?i)api|reference|documentation", content))
|
|
695
|
+
has_license = bool(re.search(r"(?i)license|licence", content))
|
|
696
|
+
has_badges = bool(re.search(r"\[!\[", content)) # Markdown badges
|
|
697
|
+
|
|
698
|
+
score = sum([has_install, has_usage, has_api, has_license, has_badges])
|
|
699
|
+
|
|
700
|
+
if lines < 20:
|
|
701
|
+
return "minimal"
|
|
702
|
+
elif score >= 4 and lines > 100:
|
|
703
|
+
return "excellent"
|
|
704
|
+
elif score >= 2 and lines > 50:
|
|
705
|
+
return "good"
|
|
706
|
+
else:
|
|
707
|
+
return "basic"
|
|
708
|
+
except Exception:
|
|
709
|
+
pass
|
|
710
|
+
|
|
711
|
+
return "none"
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
def _detect_api_endpoints(repo_path: Path, frameworks: list[str]) -> list[dict]:
|
|
715
|
+
"""Detect API endpoints/routes."""
|
|
716
|
+
endpoints = []
|
|
717
|
+
|
|
718
|
+
# FastAPI patterns
|
|
719
|
+
fastapi_patterns = [
|
|
720
|
+
(r"@(?:app|router)\.(get|post|put|patch|delete)\(['\"]([^'\"]+)['\"]", "FastAPI"),
|
|
721
|
+
]
|
|
722
|
+
|
|
723
|
+
# Flask patterns
|
|
724
|
+
flask_patterns = [
|
|
725
|
+
(r"@(?:app|blueprint)\.(route|get|post|put|delete)\(['\"]([^'\"]+)['\"]", "Flask"),
|
|
726
|
+
]
|
|
727
|
+
|
|
728
|
+
# Express patterns
|
|
729
|
+
express_patterns = [
|
|
730
|
+
(r"(?:app|router)\.(get|post|put|patch|delete)\(['\"]([^'\"]+)['\"]", "Express"),
|
|
731
|
+
]
|
|
732
|
+
|
|
733
|
+
patterns = []
|
|
734
|
+
if "FastAPI" in frameworks:
|
|
735
|
+
patterns.extend(fastapi_patterns)
|
|
736
|
+
if "Flask" in frameworks:
|
|
737
|
+
patterns.extend(flask_patterns)
|
|
738
|
+
if "Express" in frameworks or "NestJS" in frameworks:
|
|
739
|
+
patterns.extend(express_patterns)
|
|
740
|
+
|
|
741
|
+
# If no framework detected, try all patterns
|
|
742
|
+
if not patterns:
|
|
743
|
+
patterns = fastapi_patterns + flask_patterns + express_patterns
|
|
744
|
+
|
|
745
|
+
for file_path in list(repo_path.rglob("*.py")) + list(repo_path.rglob("*.js")) + list(repo_path.rglob("*.ts")):
|
|
746
|
+
try:
|
|
747
|
+
content = file_path.read_text(errors="ignore")
|
|
748
|
+
for pattern, framework in patterns:
|
|
749
|
+
for match in re.findall(pattern, content, re.IGNORECASE):
|
|
750
|
+
if isinstance(match, tuple):
|
|
751
|
+
method, path = match
|
|
752
|
+
else:
|
|
753
|
+
method, path = "GET", match
|
|
754
|
+
|
|
755
|
+
endpoints.append({
|
|
756
|
+
"method": method.upper(),
|
|
757
|
+
"path": path,
|
|
758
|
+
"file": str(file_path.relative_to(repo_path)),
|
|
759
|
+
})
|
|
760
|
+
except Exception:
|
|
761
|
+
pass
|
|
762
|
+
|
|
763
|
+
# Deduplicate and limit
|
|
764
|
+
seen = set()
|
|
765
|
+
unique_endpoints = []
|
|
766
|
+
for ep in endpoints:
|
|
767
|
+
key = (ep["method"], ep["path"])
|
|
768
|
+
if key not in seen:
|
|
769
|
+
seen.add(key)
|
|
770
|
+
unique_endpoints.append(ep)
|
|
771
|
+
|
|
772
|
+
return unique_endpoints[:50] # Limit to 50
|
|
773
|
+
|
|
774
|
+
|
|
775
|
+
def _identify_key_files(repo_path: Path) -> list[str]:
|
|
776
|
+
"""Identify key files in the repository."""
|
|
777
|
+
key_patterns = [
|
|
778
|
+
"main.py", "app.py", "index.py", "server.py", "cli.py",
|
|
779
|
+
"main.js", "index.js", "app.js", "server.js",
|
|
780
|
+
"main.ts", "index.ts", "app.ts", "server.ts",
|
|
781
|
+
"main.go", "main.rs",
|
|
782
|
+
"Dockerfile", "docker-compose.yml", "docker-compose.yaml",
|
|
783
|
+
"Makefile", "justfile",
|
|
784
|
+
".env.example", ".env.sample",
|
|
785
|
+
"setup.py", "pyproject.toml", "package.json", "Cargo.toml", "go.mod",
|
|
786
|
+
]
|
|
787
|
+
|
|
788
|
+
found = []
|
|
789
|
+
for pattern in key_patterns:
|
|
790
|
+
for file_path in repo_path.rglob(pattern):
|
|
791
|
+
rel_path = str(file_path.relative_to(repo_path))
|
|
792
|
+
if rel_path not in found:
|
|
793
|
+
found.append(rel_path)
|
|
794
|
+
|
|
795
|
+
return found[:20]
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
def _identify_entry_points(repo_path: Path) -> list[str]:
|
|
799
|
+
"""Identify likely entry points."""
|
|
800
|
+
entry_patterns = [
|
|
801
|
+
"main.py", "__main__.py", "app.py", "cli.py", "server.py", "run.py",
|
|
802
|
+
"index.js", "main.js", "app.js", "server.js",
|
|
803
|
+
"index.ts", "main.ts", "app.ts", "server.ts",
|
|
804
|
+
"main.go", "cmd/main.go",
|
|
805
|
+
"main.rs", "src/main.rs",
|
|
806
|
+
]
|
|
807
|
+
|
|
808
|
+
found = []
|
|
809
|
+
for pattern in entry_patterns:
|
|
810
|
+
for file_path in repo_path.rglob(pattern):
|
|
811
|
+
rel_path = str(file_path.relative_to(repo_path))
|
|
812
|
+
# Prefer root-level entry points
|
|
813
|
+
if "/" not in rel_path or rel_path.startswith("src/") or rel_path.startswith("cmd/"):
|
|
814
|
+
found.append(rel_path)
|
|
815
|
+
|
|
816
|
+
return found[:5]
|
|
817
|
+
|
|
818
|
+
|
|
819
|
+
# ============================================================================
|
|
820
|
+
# Public API
|
|
821
|
+
# ============================================================================
|
|
822
|
+
|
|
823
|
+
def analyze_repo(repo_path: Path) -> dict[str, Any]:
|
|
824
|
+
"""
|
|
825
|
+
Analyze a repository and return results as a dictionary.
|
|
826
|
+
|
|
827
|
+
This is the main entry point for the analysis tool.
|
|
828
|
+
"""
|
|
829
|
+
analysis = analyze_repository(repo_path)
|
|
830
|
+
|
|
831
|
+
return {
|
|
832
|
+
"summary": {
|
|
833
|
+
"total_files": analysis.total_files,
|
|
834
|
+
"total_lines": analysis.total_lines,
|
|
835
|
+
"code_lines": analysis.code_lines,
|
|
836
|
+
"comment_lines": analysis.comment_lines,
|
|
837
|
+
"project_type": analysis.project_type,
|
|
838
|
+
},
|
|
839
|
+
"languages": analysis.languages,
|
|
840
|
+
"frameworks": analysis.frameworks,
|
|
841
|
+
"notable_libraries": analysis.notable_libraries,
|
|
842
|
+
"architecture": analysis.architecture_patterns,
|
|
843
|
+
"quality": {
|
|
844
|
+
"avg_file_size_lines": round(analysis.avg_file_size, 1),
|
|
845
|
+
"avg_complexity": round(analysis.avg_complexity, 2),
|
|
846
|
+
"docstring_coverage_pct": round(analysis.docstring_coverage, 1),
|
|
847
|
+
},
|
|
848
|
+
"testing": {
|
|
849
|
+
"has_tests": analysis.has_tests,
|
|
850
|
+
"test_files": analysis.test_files,
|
|
851
|
+
"test_ratio": round(analysis.test_ratio, 2),
|
|
852
|
+
"test_frameworks": analysis.test_frameworks,
|
|
853
|
+
},
|
|
854
|
+
"documentation": {
|
|
855
|
+
"has_readme": analysis.has_readme,
|
|
856
|
+
"readme_quality": analysis.readme_quality,
|
|
857
|
+
"has_docs_folder": analysis.has_docs_folder,
|
|
858
|
+
"has_changelog": analysis.has_changelog,
|
|
859
|
+
"has_contributing": analysis.has_contributing,
|
|
860
|
+
},
|
|
861
|
+
"api_endpoints": analysis.api_endpoints[:20], # Limit for readability
|
|
862
|
+
"key_files": analysis.key_files,
|
|
863
|
+
"entry_points": analysis.entry_points,
|
|
864
|
+
"most_used_imports": [
|
|
865
|
+
{"name": name, "count": count}
|
|
866
|
+
for name, count in analysis.most_used_imports[:15]
|
|
867
|
+
],
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
|
|
871
|
+
def get_code_snippets(
|
|
872
|
+
repo_path: Path,
|
|
873
|
+
max_snippets: int = 5,
|
|
874
|
+
max_lines: int = 50,
|
|
875
|
+
) -> list[dict[str, Any]]:
|
|
876
|
+
"""
|
|
877
|
+
Extract interesting code snippets from a repository.
|
|
878
|
+
|
|
879
|
+
Looks for:
|
|
880
|
+
- Main entry points
|
|
881
|
+
- Key functions/classes
|
|
882
|
+
- Interesting patterns
|
|
883
|
+
"""
|
|
884
|
+
snippets = []
|
|
885
|
+
|
|
886
|
+
# Find interesting files
|
|
887
|
+
interesting_files = [
|
|
888
|
+
"main.py", "app.py", "cli.py", "server.py",
|
|
889
|
+
"models.py", "schema.py", "routes.py", "handlers.py",
|
|
890
|
+
"index.ts", "App.tsx", "main.ts",
|
|
891
|
+
]
|
|
892
|
+
|
|
893
|
+
for pattern in interesting_files:
|
|
894
|
+
for file_path in repo_path.rglob(pattern):
|
|
895
|
+
if len(snippets) >= max_snippets:
|
|
896
|
+
break
|
|
897
|
+
|
|
898
|
+
try:
|
|
899
|
+
content = file_path.read_text(errors="ignore")
|
|
900
|
+
lines = content.split("\n")
|
|
901
|
+
|
|
902
|
+
# Take first N lines or find an interesting section
|
|
903
|
+
snippet_lines = lines[:max_lines]
|
|
904
|
+
|
|
905
|
+
snippets.append({
|
|
906
|
+
"path": str(file_path.relative_to(repo_path)),
|
|
907
|
+
"content": "\n".join(snippet_lines),
|
|
908
|
+
"total_lines": len(lines),
|
|
909
|
+
"truncated": len(lines) > max_lines,
|
|
910
|
+
})
|
|
911
|
+
except Exception:
|
|
912
|
+
pass
|
|
913
|
+
|
|
914
|
+
return snippets
|
|
915
|
+
|