codebase-intel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebase_intel/__init__.py +3 -0
- codebase_intel/analytics/__init__.py +1 -0
- codebase_intel/analytics/benchmark.py +406 -0
- codebase_intel/analytics/feedback.py +496 -0
- codebase_intel/analytics/tracker.py +439 -0
- codebase_intel/cli/__init__.py +1 -0
- codebase_intel/cli/main.py +740 -0
- codebase_intel/contracts/__init__.py +1 -0
- codebase_intel/contracts/auto_generator.py +438 -0
- codebase_intel/contracts/evaluator.py +531 -0
- codebase_intel/contracts/models.py +433 -0
- codebase_intel/contracts/registry.py +225 -0
- codebase_intel/core/__init__.py +1 -0
- codebase_intel/core/config.py +248 -0
- codebase_intel/core/exceptions.py +454 -0
- codebase_intel/core/types.py +375 -0
- codebase_intel/decisions/__init__.py +1 -0
- codebase_intel/decisions/miner.py +297 -0
- codebase_intel/decisions/models.py +302 -0
- codebase_intel/decisions/store.py +411 -0
- codebase_intel/drift/__init__.py +1 -0
- codebase_intel/drift/detector.py +443 -0
- codebase_intel/graph/__init__.py +1 -0
- codebase_intel/graph/builder.py +391 -0
- codebase_intel/graph/parser.py +1232 -0
- codebase_intel/graph/query.py +377 -0
- codebase_intel/graph/storage.py +736 -0
- codebase_intel/mcp/__init__.py +1 -0
- codebase_intel/mcp/server.py +710 -0
- codebase_intel/orchestrator/__init__.py +1 -0
- codebase_intel/orchestrator/assembler.py +649 -0
- codebase_intel-0.1.0.dist-info/METADATA +361 -0
- codebase_intel-0.1.0.dist-info/RECORD +36 -0
- codebase_intel-0.1.0.dist-info/WHEEL +4 -0
- codebase_intel-0.1.0.dist-info/entry_points.txt +2 -0
- codebase_intel-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Quality Contracts — executable specifications for code quality enforcement."""
|
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
"""Auto-contract generator — discovers patterns in your codebase and generates rules.
|
|
2
|
+
|
|
3
|
+
THIS IS WHAT NOBODY ELSE DOES.
|
|
4
|
+
|
|
5
|
+
Instead of manually writing contracts, this module analyzes your existing code
|
|
6
|
+
and detects the patterns your team already follows:
|
|
7
|
+
|
|
8
|
+
- "Every API endpoint uses async def" → generates an async-enforcement rule
|
|
9
|
+
- "No file imports directly from the database layer except repositories" → layer rule
|
|
10
|
+
- "All service classes follow the naming pattern XxxService" → naming rule
|
|
11
|
+
- "Error handling always uses custom exception classes, never bare except" → pattern rule
|
|
12
|
+
- "Every test file has a corresponding source file" → coverage rule
|
|
13
|
+
|
|
14
|
+
The generated contracts are DRAFTS — human reviews and activates them.
|
|
15
|
+
This is intentional: auto-generated rules shouldn't silently enforce.
|
|
16
|
+
|
|
17
|
+
Why this matters:
|
|
18
|
+
- New team members learn conventions instantly
|
|
19
|
+
- AI agents follow patterns without being told
|
|
20
|
+
- Patterns that exist in practice become documented and enforced
|
|
21
|
+
- Zero manual contract writing needed to get started
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import logging
|
|
27
|
+
import re
|
|
28
|
+
from collections import Counter
|
|
29
|
+
from dataclasses import dataclass, field
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
from typing import TYPE_CHECKING, Any
|
|
32
|
+
|
|
33
|
+
from codebase_intel.contracts.models import (
|
|
34
|
+
ContractRule,
|
|
35
|
+
PatternExample,
|
|
36
|
+
QualityContract,
|
|
37
|
+
RuleKind,
|
|
38
|
+
ScopeFilter,
|
|
39
|
+
)
|
|
40
|
+
from codebase_intel.core.types import ContractSeverity, Language
|
|
41
|
+
|
|
42
|
+
if TYPE_CHECKING:
|
|
43
|
+
from codebase_intel.graph.storage import GraphStorage
|
|
44
|
+
|
|
45
|
+
logger = logging.getLogger(__name__)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class DetectedPattern:
|
|
50
|
+
"""A pattern detected in the codebase."""
|
|
51
|
+
|
|
52
|
+
name: str
|
|
53
|
+
description: str
|
|
54
|
+
kind: RuleKind
|
|
55
|
+
confidence: float # 0.0-1.0: how consistently the pattern is followed
|
|
56
|
+
occurrences: int # How many files follow this pattern
|
|
57
|
+
violations: int # How many files break this pattern
|
|
58
|
+
examples: list[str] = field(default_factory=list) # File paths as examples
|
|
59
|
+
counter_examples: list[str] = field(default_factory=list)
|
|
60
|
+
suggested_rule: ContractRule | None = None
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class AutoContractGenerator:
|
|
64
|
+
"""Analyzes a codebase and generates quality contracts from detected patterns."""
|
|
65
|
+
|
|
66
|
+
def __init__(self, project_root: Path) -> None:
|
|
67
|
+
self._project_root = project_root
|
|
68
|
+
|
|
69
|
+
async def analyze(
|
|
70
|
+
self,
|
|
71
|
+
storage: GraphStorage | None = None,
|
|
72
|
+
) -> list[DetectedPattern]:
|
|
73
|
+
"""Run all pattern detectors and return discovered patterns.
|
|
74
|
+
|
|
75
|
+
Each detector focuses on a specific type of pattern:
|
|
76
|
+
- Async patterns (all handlers async vs mixed)
|
|
77
|
+
- Import patterns (layer violations, circular imports)
|
|
78
|
+
- Naming conventions
|
|
79
|
+
- Error handling patterns
|
|
80
|
+
- File organization patterns
|
|
81
|
+
- Test coverage patterns
|
|
82
|
+
"""
|
|
83
|
+
patterns: list[DetectedPattern] = []
|
|
84
|
+
|
|
85
|
+
source_files = self._collect_source_files()
|
|
86
|
+
if not source_files:
|
|
87
|
+
return patterns
|
|
88
|
+
|
|
89
|
+
patterns.extend(self._detect_async_patterns(source_files))
|
|
90
|
+
patterns.extend(self._detect_import_layer_patterns(source_files))
|
|
91
|
+
patterns.extend(self._detect_naming_conventions(source_files))
|
|
92
|
+
patterns.extend(self._detect_error_handling_patterns(source_files))
|
|
93
|
+
patterns.extend(self._detect_file_organization(source_files))
|
|
94
|
+
patterns.extend(self._detect_docstring_patterns(source_files))
|
|
95
|
+
|
|
96
|
+
# Filter to patterns with high confidence
|
|
97
|
+
return [p for p in patterns if p.confidence >= 0.7]
|
|
98
|
+
|
|
99
|
+
def generate_contract(
|
|
100
|
+
self,
|
|
101
|
+
patterns: list[DetectedPattern],
|
|
102
|
+
contract_id: str = "auto-detected",
|
|
103
|
+
) -> QualityContract:
|
|
104
|
+
"""Convert detected patterns into a quality contract."""
|
|
105
|
+
rules = []
|
|
106
|
+
for pattern in patterns:
|
|
107
|
+
if pattern.suggested_rule:
|
|
108
|
+
rules.append(pattern.suggested_rule)
|
|
109
|
+
|
|
110
|
+
return QualityContract(
|
|
111
|
+
id=contract_id,
|
|
112
|
+
name="Auto-Detected Project Conventions",
|
|
113
|
+
description=(
|
|
114
|
+
f"Quality rules auto-generated from analyzing {len(patterns)} "
|
|
115
|
+
f"patterns in this codebase. Review and activate rules as needed."
|
|
116
|
+
),
|
|
117
|
+
priority=150, # Lower than manual contracts
|
|
118
|
+
scope=ScopeFilter(exclude_tests=True, exclude_generated=True),
|
|
119
|
+
rules=rules,
|
|
120
|
+
tags=["auto-generated"],
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# -------------------------------------------------------------------
|
|
124
|
+
# Pattern detectors
|
|
125
|
+
# -------------------------------------------------------------------
|
|
126
|
+
|
|
127
|
+
def _detect_async_patterns(
|
|
128
|
+
self, files: dict[Path, str]
|
|
129
|
+
) -> list[DetectedPattern]:
|
|
130
|
+
"""Detect if the project consistently uses async patterns."""
|
|
131
|
+
patterns: list[DetectedPattern] = []
|
|
132
|
+
|
|
133
|
+
async_defs = 0
|
|
134
|
+
sync_defs = 0
|
|
135
|
+
async_files: list[str] = []
|
|
136
|
+
sync_files: list[str] = []
|
|
137
|
+
|
|
138
|
+
for fp, content in files.items():
|
|
139
|
+
if fp.suffix != ".py":
|
|
140
|
+
continue
|
|
141
|
+
|
|
142
|
+
has_async = bool(re.search(r"async\s+def\s+\w+", content))
|
|
143
|
+
has_sync = bool(re.search(r"(?<!async\s)def\s+\w+", content))
|
|
144
|
+
|
|
145
|
+
# Only count if file has route/endpoint patterns
|
|
146
|
+
is_handler = any(kw in content for kw in ("@router.", "@app.", "async def get", "async def post", "async def create"))
|
|
147
|
+
|
|
148
|
+
if is_handler:
|
|
149
|
+
if has_async:
|
|
150
|
+
async_defs += 1
|
|
151
|
+
async_files.append(str(fp.relative_to(self._project_root)))
|
|
152
|
+
if has_sync and not has_async:
|
|
153
|
+
sync_defs += 1
|
|
154
|
+
sync_files.append(str(fp.relative_to(self._project_root)))
|
|
155
|
+
|
|
156
|
+
total = async_defs + sync_defs
|
|
157
|
+
if total >= 3 and async_defs > sync_defs:
|
|
158
|
+
confidence = async_defs / total
|
|
159
|
+
patterns.append(DetectedPattern(
|
|
160
|
+
name="Async handlers",
|
|
161
|
+
description=f"This project uses async handlers ({async_defs}/{total} handler files are async)",
|
|
162
|
+
kind=RuleKind.PATTERN,
|
|
163
|
+
confidence=confidence,
|
|
164
|
+
occurrences=async_defs,
|
|
165
|
+
violations=sync_defs,
|
|
166
|
+
examples=async_files[:3],
|
|
167
|
+
counter_examples=sync_files[:3],
|
|
168
|
+
suggested_rule=ContractRule(
|
|
169
|
+
id="auto-async-handlers",
|
|
170
|
+
name="Use async for all handler functions",
|
|
171
|
+
description=f"Detected pattern: {confidence:.0%} of handler files use async. Keep it consistent.",
|
|
172
|
+
kind=RuleKind.PATTERN,
|
|
173
|
+
severity=ContractSeverity.WARNING,
|
|
174
|
+
pattern=r"(?<!async\s)def\s+(get_|post_|put_|delete_|patch_|create_|update_|list_)",
|
|
175
|
+
fix_suggestion="Use `async def` for handler functions to match project convention.",
|
|
176
|
+
),
|
|
177
|
+
))
|
|
178
|
+
|
|
179
|
+
return patterns
|
|
180
|
+
|
|
181
|
+
def _detect_import_layer_patterns(
|
|
182
|
+
self, files: dict[Path, str]
|
|
183
|
+
) -> list[DetectedPattern]:
|
|
184
|
+
"""Detect layer violation patterns in imports.
|
|
185
|
+
|
|
186
|
+
Common pattern: routes/api files should not import database/ORM directly.
|
|
187
|
+
"""
|
|
188
|
+
patterns: list[DetectedPattern] = []
|
|
189
|
+
|
|
190
|
+
# Detect if routes import from db/models directly
|
|
191
|
+
route_files_importing_db = 0
|
|
192
|
+
route_files_clean = 0
|
|
193
|
+
violating: list[str] = []
|
|
194
|
+
clean: list[str] = []
|
|
195
|
+
|
|
196
|
+
db_patterns = re.compile(r"from\s+\w*(models?|db|database|orm|tortoise|sqlalchemy)\w*\s+import")
|
|
197
|
+
|
|
198
|
+
for fp, content in files.items():
|
|
199
|
+
rel = str(fp.relative_to(self._project_root))
|
|
200
|
+
|
|
201
|
+
is_route = any(kw in rel.lower() for kw in ("route", "router", "api", "endpoint", "view"))
|
|
202
|
+
if not is_route:
|
|
203
|
+
continue
|
|
204
|
+
|
|
205
|
+
if db_patterns.search(content):
|
|
206
|
+
route_files_importing_db += 1
|
|
207
|
+
violating.append(rel)
|
|
208
|
+
else:
|
|
209
|
+
route_files_clean += 1
|
|
210
|
+
clean.append(rel)
|
|
211
|
+
|
|
212
|
+
total = route_files_importing_db + route_files_clean
|
|
213
|
+
if total >= 3 and route_files_clean > route_files_importing_db:
|
|
214
|
+
confidence = route_files_clean / total
|
|
215
|
+
patterns.append(DetectedPattern(
|
|
216
|
+
name="Layer separation (routes ↛ DB)",
|
|
217
|
+
description=f"{confidence:.0%} of route files don't import DB directly — enforce this pattern",
|
|
218
|
+
kind=RuleKind.ARCHITECTURAL,
|
|
219
|
+
confidence=confidence,
|
|
220
|
+
occurrences=route_files_clean,
|
|
221
|
+
violations=route_files_importing_db,
|
|
222
|
+
examples=clean[:3],
|
|
223
|
+
counter_examples=violating[:3],
|
|
224
|
+
suggested_rule=ContractRule(
|
|
225
|
+
id="auto-no-db-in-routes",
|
|
226
|
+
name="No direct DB imports in route handlers",
|
|
227
|
+
description="Route files should not import from database/model modules directly. Use a service layer.",
|
|
228
|
+
kind=RuleKind.ARCHITECTURAL,
|
|
229
|
+
severity=ContractSeverity.WARNING,
|
|
230
|
+
pattern=r"from\s+\w*(models?|db|database)\w*\s+import",
|
|
231
|
+
fix_suggestion="Import from a service module instead. Routes → Services → Repositories → DB.",
|
|
232
|
+
),
|
|
233
|
+
))
|
|
234
|
+
|
|
235
|
+
return patterns
|
|
236
|
+
|
|
237
|
+
def _detect_naming_conventions(
|
|
238
|
+
self, files: dict[Path, str]
|
|
239
|
+
) -> list[DetectedPattern]:
|
|
240
|
+
"""Detect consistent naming conventions for classes and functions."""
|
|
241
|
+
patterns: list[DetectedPattern] = []
|
|
242
|
+
|
|
243
|
+
# Detect service class naming: FooService, BarService
|
|
244
|
+
service_names = Counter()
|
|
245
|
+
for fp, content in files.items():
|
|
246
|
+
for match in re.finditer(r"class\s+(\w+Service)\b", content):
|
|
247
|
+
service_names[match.group(1)] += 1
|
|
248
|
+
|
|
249
|
+
if len(service_names) >= 3:
|
|
250
|
+
patterns.append(DetectedPattern(
|
|
251
|
+
name="Service class naming",
|
|
252
|
+
description=f"Found {len(service_names)} service classes following XxxService pattern",
|
|
253
|
+
kind=RuleKind.PATTERN,
|
|
254
|
+
confidence=0.9,
|
|
255
|
+
occurrences=len(service_names),
|
|
256
|
+
violations=0,
|
|
257
|
+
examples=list(service_names.keys())[:5],
|
|
258
|
+
suggested_rule=ContractRule(
|
|
259
|
+
id="auto-service-naming",
|
|
260
|
+
name="Service classes must be named XxxService",
|
|
261
|
+
description="Detected convention: all service classes follow the XxxService naming pattern.",
|
|
262
|
+
kind=RuleKind.PATTERN,
|
|
263
|
+
severity=ContractSeverity.INFO,
|
|
264
|
+
fix_suggestion="Name service classes with the Service suffix: UserService, OrderService, etc.",
|
|
265
|
+
),
|
|
266
|
+
))
|
|
267
|
+
|
|
268
|
+
return patterns
|
|
269
|
+
|
|
270
|
+
def _detect_error_handling_patterns(
|
|
271
|
+
self, files: dict[Path, str]
|
|
272
|
+
) -> list[DetectedPattern]:
|
|
273
|
+
"""Detect error handling conventions."""
|
|
274
|
+
patterns: list[DetectedPattern] = []
|
|
275
|
+
|
|
276
|
+
custom_exception_files = 0
|
|
277
|
+
bare_except_files = 0
|
|
278
|
+
custom_examples: list[str] = []
|
|
279
|
+
bare_examples: list[str] = []
|
|
280
|
+
|
|
281
|
+
for fp, content in files.items():
|
|
282
|
+
if fp.suffix != ".py":
|
|
283
|
+
continue
|
|
284
|
+
|
|
285
|
+
rel = str(fp.relative_to(self._project_root))
|
|
286
|
+
has_custom = bool(re.search(r"raise\s+\w+Error\(|raise\s+\w+Exception\(", content))
|
|
287
|
+
has_bare = bool(re.search(r"except\s*:", content))
|
|
288
|
+
|
|
289
|
+
if has_custom:
|
|
290
|
+
custom_exception_files += 1
|
|
291
|
+
custom_examples.append(rel)
|
|
292
|
+
if has_bare:
|
|
293
|
+
bare_except_files += 1
|
|
294
|
+
bare_examples.append(rel)
|
|
295
|
+
|
|
296
|
+
if custom_exception_files >= 3:
|
|
297
|
+
total = custom_exception_files + bare_except_files
|
|
298
|
+
confidence = custom_exception_files / max(total, 1)
|
|
299
|
+
|
|
300
|
+
patterns.append(DetectedPattern(
|
|
301
|
+
name="Custom exception handling",
|
|
302
|
+
description=f"{custom_exception_files} files use custom exceptions. {bare_except_files} use bare except.",
|
|
303
|
+
kind=RuleKind.PATTERN,
|
|
304
|
+
confidence=confidence,
|
|
305
|
+
occurrences=custom_exception_files,
|
|
306
|
+
violations=bare_except_files,
|
|
307
|
+
examples=custom_examples[:3],
|
|
308
|
+
counter_examples=bare_examples[:3],
|
|
309
|
+
suggested_rule=ContractRule(
|
|
310
|
+
id="auto-no-bare-except",
|
|
311
|
+
name="No bare except clauses",
|
|
312
|
+
description="This project uses custom exception classes. Avoid bare `except:` clauses.",
|
|
313
|
+
kind=RuleKind.PATTERN,
|
|
314
|
+
severity=ContractSeverity.WARNING,
|
|
315
|
+
pattern=r"except\s*:",
|
|
316
|
+
fix_suggestion="Catch specific exceptions: `except ValueError:` or `except CustomError:`",
|
|
317
|
+
),
|
|
318
|
+
))
|
|
319
|
+
|
|
320
|
+
return patterns
|
|
321
|
+
|
|
322
|
+
def _detect_file_organization(
|
|
323
|
+
self, files: dict[Path, str]
|
|
324
|
+
) -> list[DetectedPattern]:
|
|
325
|
+
"""Detect file organization patterns."""
|
|
326
|
+
patterns: list[DetectedPattern] = []
|
|
327
|
+
|
|
328
|
+
# Detect if tests mirror source structure
|
|
329
|
+
source_modules = set()
|
|
330
|
+
test_modules = set()
|
|
331
|
+
|
|
332
|
+
for fp in files:
|
|
333
|
+
rel = fp.relative_to(self._project_root)
|
|
334
|
+
parts = rel.parts
|
|
335
|
+
|
|
336
|
+
if any(p in ("tests", "test", "__tests__") for p in parts):
|
|
337
|
+
test_modules.add(fp.stem.replace("test_", "").replace("_test", ""))
|
|
338
|
+
elif fp.suffix == ".py" and not fp.stem.startswith("_"):
|
|
339
|
+
source_modules.add(fp.stem)
|
|
340
|
+
|
|
341
|
+
if source_modules and test_modules:
|
|
342
|
+
covered = source_modules & test_modules
|
|
343
|
+
coverage_pct = len(covered) / max(len(source_modules), 1)
|
|
344
|
+
|
|
345
|
+
if coverage_pct >= 0.3:
|
|
346
|
+
patterns.append(DetectedPattern(
|
|
347
|
+
name="Test coverage structure",
|
|
348
|
+
description=f"{len(covered)}/{len(source_modules)} source modules have matching test files ({coverage_pct:.0%})",
|
|
349
|
+
kind=RuleKind.ARCHITECTURAL,
|
|
350
|
+
confidence=coverage_pct,
|
|
351
|
+
occurrences=len(covered),
|
|
352
|
+
violations=len(source_modules) - len(covered),
|
|
353
|
+
examples=list(covered)[:5],
|
|
354
|
+
suggested_rule=ContractRule(
|
|
355
|
+
id="auto-test-coverage",
|
|
356
|
+
name="Every source module should have a test file",
|
|
357
|
+
description=f"Detected: {coverage_pct:.0%} of modules have tests. Maintain this coverage.",
|
|
358
|
+
kind=RuleKind.ARCHITECTURAL,
|
|
359
|
+
severity=ContractSeverity.INFO,
|
|
360
|
+
fix_suggestion="Create test_<module>.py for new modules.",
|
|
361
|
+
),
|
|
362
|
+
))
|
|
363
|
+
|
|
364
|
+
return patterns
|
|
365
|
+
|
|
366
|
+
def _detect_docstring_patterns(
|
|
367
|
+
self, files: dict[Path, str]
|
|
368
|
+
) -> list[DetectedPattern]:
|
|
369
|
+
"""Detect docstring conventions."""
|
|
370
|
+
patterns: list[DetectedPattern] = []
|
|
371
|
+
|
|
372
|
+
with_docstrings = 0
|
|
373
|
+
without_docstrings = 0
|
|
374
|
+
|
|
375
|
+
for fp, content in files.items():
|
|
376
|
+
if fp.suffix != ".py":
|
|
377
|
+
continue
|
|
378
|
+
|
|
379
|
+
# Count public functions with/without docstrings
|
|
380
|
+
funcs = re.findall(r'def\s+([a-z]\w+)\s*\(.*?\).*?:\s*\n(\s+""")?', content, re.DOTALL)
|
|
381
|
+
for name, docstring in funcs:
|
|
382
|
+
if name.startswith("_"):
|
|
383
|
+
continue
|
|
384
|
+
if docstring:
|
|
385
|
+
with_docstrings += 1
|
|
386
|
+
else:
|
|
387
|
+
without_docstrings += 1
|
|
388
|
+
|
|
389
|
+
total = with_docstrings + without_docstrings
|
|
390
|
+
if total >= 10:
|
|
391
|
+
confidence = with_docstrings / total
|
|
392
|
+
if confidence >= 0.6:
|
|
393
|
+
patterns.append(DetectedPattern(
|
|
394
|
+
name="Docstring convention",
|
|
395
|
+
description=f"{confidence:.0%} of public functions have docstrings",
|
|
396
|
+
kind=RuleKind.PATTERN,
|
|
397
|
+
confidence=confidence,
|
|
398
|
+
occurrences=with_docstrings,
|
|
399
|
+
violations=without_docstrings,
|
|
400
|
+
suggested_rule=ContractRule(
|
|
401
|
+
id="auto-docstrings",
|
|
402
|
+
name="Public functions should have docstrings",
|
|
403
|
+
description=f"Detected: {confidence:.0%} of public functions have docstrings. Maintain this.",
|
|
404
|
+
kind=RuleKind.PATTERN,
|
|
405
|
+
severity=ContractSeverity.INFO,
|
|
406
|
+
fix_suggestion="Add a one-line docstring explaining what the function does.",
|
|
407
|
+
),
|
|
408
|
+
))
|
|
409
|
+
|
|
410
|
+
return patterns
|
|
411
|
+
|
|
412
|
+
# -------------------------------------------------------------------
|
|
413
|
+
# Helpers
|
|
414
|
+
# -------------------------------------------------------------------
|
|
415
|
+
|
|
416
|
+
def _collect_source_files(self) -> dict[Path, str]:
|
|
417
|
+
"""Collect all source files and their content."""
|
|
418
|
+
files: dict[Path, str] = {}
|
|
419
|
+
skip_dirs = {"node_modules", ".git", "__pycache__", ".venv", "venv", "dist", "build", ".tox"}
|
|
420
|
+
|
|
421
|
+
def _walk(directory: Path) -> None:
|
|
422
|
+
try:
|
|
423
|
+
for entry in sorted(directory.iterdir()):
|
|
424
|
+
if entry.is_dir():
|
|
425
|
+
if entry.name not in skip_dirs and not entry.name.startswith("."):
|
|
426
|
+
_walk(entry)
|
|
427
|
+
elif entry.is_file() and entry.suffix in (".py", ".ts", ".tsx", ".js", ".jsx"):
|
|
428
|
+
try:
|
|
429
|
+
content = entry.read_text(encoding="utf-8", errors="ignore")
|
|
430
|
+
if len(content) < 500_000: # Skip huge files
|
|
431
|
+
files[entry] = content
|
|
432
|
+
except OSError:
|
|
433
|
+
pass
|
|
434
|
+
except PermissionError:
|
|
435
|
+
pass
|
|
436
|
+
|
|
437
|
+
_walk(self._project_root)
|
|
438
|
+
return files
|