codebase-intel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. codebase_intel/__init__.py +3 -0
  2. codebase_intel/analytics/__init__.py +1 -0
  3. codebase_intel/analytics/benchmark.py +406 -0
  4. codebase_intel/analytics/feedback.py +496 -0
  5. codebase_intel/analytics/tracker.py +439 -0
  6. codebase_intel/cli/__init__.py +1 -0
  7. codebase_intel/cli/main.py +740 -0
  8. codebase_intel/contracts/__init__.py +1 -0
  9. codebase_intel/contracts/auto_generator.py +438 -0
  10. codebase_intel/contracts/evaluator.py +531 -0
  11. codebase_intel/contracts/models.py +433 -0
  12. codebase_intel/contracts/registry.py +225 -0
  13. codebase_intel/core/__init__.py +1 -0
  14. codebase_intel/core/config.py +248 -0
  15. codebase_intel/core/exceptions.py +454 -0
  16. codebase_intel/core/types.py +375 -0
  17. codebase_intel/decisions/__init__.py +1 -0
  18. codebase_intel/decisions/miner.py +297 -0
  19. codebase_intel/decisions/models.py +302 -0
  20. codebase_intel/decisions/store.py +411 -0
  21. codebase_intel/drift/__init__.py +1 -0
  22. codebase_intel/drift/detector.py +443 -0
  23. codebase_intel/graph/__init__.py +1 -0
  24. codebase_intel/graph/builder.py +391 -0
  25. codebase_intel/graph/parser.py +1232 -0
  26. codebase_intel/graph/query.py +377 -0
  27. codebase_intel/graph/storage.py +736 -0
  28. codebase_intel/mcp/__init__.py +1 -0
  29. codebase_intel/mcp/server.py +710 -0
  30. codebase_intel/orchestrator/__init__.py +1 -0
  31. codebase_intel/orchestrator/assembler.py +649 -0
  32. codebase_intel-0.1.0.dist-info/METADATA +361 -0
  33. codebase_intel-0.1.0.dist-info/RECORD +36 -0
  34. codebase_intel-0.1.0.dist-info/WHEEL +4 -0
  35. codebase_intel-0.1.0.dist-info/entry_points.txt +2 -0
  36. codebase_intel-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1 @@
1
+ """Quality Contracts — executable specifications for code quality enforcement."""
@@ -0,0 +1,438 @@
1
+ """Auto-contract generator — discovers patterns in your codebase and generates rules.
2
+
3
+ THIS IS WHAT NOBODY ELSE DOES.
4
+
5
+ Instead of manually writing contracts, this module analyzes your existing code
6
+ and detects the patterns your team already follows:
7
+
8
+ - "Every API endpoint uses async def" → generates an async-enforcement rule
9
+ - "No file imports directly from the database layer except repositories" → layer rule
10
+ - "All service classes follow the naming pattern XxxService" → naming rule
11
+ - "Error handling always uses custom exception classes, never bare except" → pattern rule
12
+ - "Every test file has a corresponding source file" → coverage rule
13
+
14
+ The generated contracts are DRAFTS — human reviews and activates them.
15
+ This is intentional: auto-generated rules shouldn't silently enforce.
16
+
17
+ Why this matters:
18
+ - New team members learn conventions instantly
19
+ - AI agents follow patterns without being told
20
+ - Patterns that exist in practice become documented and enforced
21
+ - Zero manual contract writing needed to get started
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import logging
27
+ import re
28
+ from collections import Counter
29
+ from dataclasses import dataclass, field
30
+ from pathlib import Path
31
+ from typing import TYPE_CHECKING, Any
32
+
33
+ from codebase_intel.contracts.models import (
34
+ ContractRule,
35
+ PatternExample,
36
+ QualityContract,
37
+ RuleKind,
38
+ ScopeFilter,
39
+ )
40
+ from codebase_intel.core.types import ContractSeverity, Language
41
+
42
+ if TYPE_CHECKING:
43
+ from codebase_intel.graph.storage import GraphStorage
44
+
45
+ logger = logging.getLogger(__name__)
46
+
47
+
48
+ @dataclass
49
+ class DetectedPattern:
50
+ """A pattern detected in the codebase."""
51
+
52
+ name: str
53
+ description: str
54
+ kind: RuleKind
55
+ confidence: float # 0.0-1.0: how consistently the pattern is followed
56
+ occurrences: int # How many files follow this pattern
57
+ violations: int # How many files break this pattern
58
+ examples: list[str] = field(default_factory=list) # File paths as examples
59
+ counter_examples: list[str] = field(default_factory=list)
60
+ suggested_rule: ContractRule | None = None
61
+
62
+
63
+ class AutoContractGenerator:
64
+ """Analyzes a codebase and generates quality contracts from detected patterns."""
65
+
66
+ def __init__(self, project_root: Path) -> None:
67
+ self._project_root = project_root
68
+
69
+ async def analyze(
70
+ self,
71
+ storage: GraphStorage | None = None,
72
+ ) -> list[DetectedPattern]:
73
+ """Run all pattern detectors and return discovered patterns.
74
+
75
+ Each detector focuses on a specific type of pattern:
76
+ - Async patterns (all handlers async vs mixed)
77
+ - Import patterns (layer violations, circular imports)
78
+ - Naming conventions
79
+ - Error handling patterns
80
+ - File organization patterns
81
+ - Test coverage patterns
82
+ """
83
+ patterns: list[DetectedPattern] = []
84
+
85
+ source_files = self._collect_source_files()
86
+ if not source_files:
87
+ return patterns
88
+
89
+ patterns.extend(self._detect_async_patterns(source_files))
90
+ patterns.extend(self._detect_import_layer_patterns(source_files))
91
+ patterns.extend(self._detect_naming_conventions(source_files))
92
+ patterns.extend(self._detect_error_handling_patterns(source_files))
93
+ patterns.extend(self._detect_file_organization(source_files))
94
+ patterns.extend(self._detect_docstring_patterns(source_files))
95
+
96
+ # Filter to patterns with high confidence
97
+ return [p for p in patterns if p.confidence >= 0.7]
98
+
99
+ def generate_contract(
100
+ self,
101
+ patterns: list[DetectedPattern],
102
+ contract_id: str = "auto-detected",
103
+ ) -> QualityContract:
104
+ """Convert detected patterns into a quality contract."""
105
+ rules = []
106
+ for pattern in patterns:
107
+ if pattern.suggested_rule:
108
+ rules.append(pattern.suggested_rule)
109
+
110
+ return QualityContract(
111
+ id=contract_id,
112
+ name="Auto-Detected Project Conventions",
113
+ description=(
114
+ f"Quality rules auto-generated from analyzing {len(patterns)} "
115
+ f"patterns in this codebase. Review and activate rules as needed."
116
+ ),
117
+ priority=150, # Lower than manual contracts
118
+ scope=ScopeFilter(exclude_tests=True, exclude_generated=True),
119
+ rules=rules,
120
+ tags=["auto-generated"],
121
+ )
122
+
123
+ # -------------------------------------------------------------------
124
+ # Pattern detectors
125
+ # -------------------------------------------------------------------
126
+
127
+ def _detect_async_patterns(
128
+ self, files: dict[Path, str]
129
+ ) -> list[DetectedPattern]:
130
+ """Detect if the project consistently uses async patterns."""
131
+ patterns: list[DetectedPattern] = []
132
+
133
+ async_defs = 0
134
+ sync_defs = 0
135
+ async_files: list[str] = []
136
+ sync_files: list[str] = []
137
+
138
+ for fp, content in files.items():
139
+ if fp.suffix != ".py":
140
+ continue
141
+
142
+ has_async = bool(re.search(r"async\s+def\s+\w+", content))
143
+ has_sync = bool(re.search(r"(?<!async\s)def\s+\w+", content))
144
+
145
+ # Only count if file has route/endpoint patterns
146
+ is_handler = any(kw in content for kw in ("@router.", "@app.", "async def get", "async def post", "async def create"))
147
+
148
+ if is_handler:
149
+ if has_async:
150
+ async_defs += 1
151
+ async_files.append(str(fp.relative_to(self._project_root)))
152
+ if has_sync and not has_async:
153
+ sync_defs += 1
154
+ sync_files.append(str(fp.relative_to(self._project_root)))
155
+
156
+ total = async_defs + sync_defs
157
+ if total >= 3 and async_defs > sync_defs:
158
+ confidence = async_defs / total
159
+ patterns.append(DetectedPattern(
160
+ name="Async handlers",
161
+ description=f"This project uses async handlers ({async_defs}/{total} handler files are async)",
162
+ kind=RuleKind.PATTERN,
163
+ confidence=confidence,
164
+ occurrences=async_defs,
165
+ violations=sync_defs,
166
+ examples=async_files[:3],
167
+ counter_examples=sync_files[:3],
168
+ suggested_rule=ContractRule(
169
+ id="auto-async-handlers",
170
+ name="Use async for all handler functions",
171
+ description=f"Detected pattern: {confidence:.0%} of handler files use async. Keep it consistent.",
172
+ kind=RuleKind.PATTERN,
173
+ severity=ContractSeverity.WARNING,
174
+ pattern=r"(?<!async\s)def\s+(get_|post_|put_|delete_|patch_|create_|update_|list_)",
175
+ fix_suggestion="Use `async def` for handler functions to match project convention.",
176
+ ),
177
+ ))
178
+
179
+ return patterns
180
+
181
+ def _detect_import_layer_patterns(
182
+ self, files: dict[Path, str]
183
+ ) -> list[DetectedPattern]:
184
+ """Detect layer violation patterns in imports.
185
+
186
+ Common pattern: routes/api files should not import database/ORM directly.
187
+ """
188
+ patterns: list[DetectedPattern] = []
189
+
190
+ # Detect if routes import from db/models directly
191
+ route_files_importing_db = 0
192
+ route_files_clean = 0
193
+ violating: list[str] = []
194
+ clean: list[str] = []
195
+
196
+ db_patterns = re.compile(r"from\s+\w*(models?|db|database|orm|tortoise|sqlalchemy)\w*\s+import")
197
+
198
+ for fp, content in files.items():
199
+ rel = str(fp.relative_to(self._project_root))
200
+
201
+ is_route = any(kw in rel.lower() for kw in ("route", "router", "api", "endpoint", "view"))
202
+ if not is_route:
203
+ continue
204
+
205
+ if db_patterns.search(content):
206
+ route_files_importing_db += 1
207
+ violating.append(rel)
208
+ else:
209
+ route_files_clean += 1
210
+ clean.append(rel)
211
+
212
+ total = route_files_importing_db + route_files_clean
213
+ if total >= 3 and route_files_clean > route_files_importing_db:
214
+ confidence = route_files_clean / total
215
+ patterns.append(DetectedPattern(
216
+ name="Layer separation (routes ↛ DB)",
217
+ description=f"{confidence:.0%} of route files don't import DB directly — enforce this pattern",
218
+ kind=RuleKind.ARCHITECTURAL,
219
+ confidence=confidence,
220
+ occurrences=route_files_clean,
221
+ violations=route_files_importing_db,
222
+ examples=clean[:3],
223
+ counter_examples=violating[:3],
224
+ suggested_rule=ContractRule(
225
+ id="auto-no-db-in-routes",
226
+ name="No direct DB imports in route handlers",
227
+ description="Route files should not import from database/model modules directly. Use a service layer.",
228
+ kind=RuleKind.ARCHITECTURAL,
229
+ severity=ContractSeverity.WARNING,
230
+ pattern=r"from\s+\w*(models?|db|database)\w*\s+import",
231
+ fix_suggestion="Import from a service module instead. Routes → Services → Repositories → DB.",
232
+ ),
233
+ ))
234
+
235
+ return patterns
236
+
237
+ def _detect_naming_conventions(
238
+ self, files: dict[Path, str]
239
+ ) -> list[DetectedPattern]:
240
+ """Detect consistent naming conventions for classes and functions."""
241
+ patterns: list[DetectedPattern] = []
242
+
243
+ # Detect service class naming: FooService, BarService
244
+ service_names = Counter()
245
+ for fp, content in files.items():
246
+ for match in re.finditer(r"class\s+(\w+Service)\b", content):
247
+ service_names[match.group(1)] += 1
248
+
249
+ if len(service_names) >= 3:
250
+ patterns.append(DetectedPattern(
251
+ name="Service class naming",
252
+ description=f"Found {len(service_names)} service classes following XxxService pattern",
253
+ kind=RuleKind.PATTERN,
254
+ confidence=0.9,
255
+ occurrences=len(service_names),
256
+ violations=0,
257
+ examples=list(service_names.keys())[:5],
258
+ suggested_rule=ContractRule(
259
+ id="auto-service-naming",
260
+ name="Service classes must be named XxxService",
261
+ description="Detected convention: all service classes follow the XxxService naming pattern.",
262
+ kind=RuleKind.PATTERN,
263
+ severity=ContractSeverity.INFO,
264
+ fix_suggestion="Name service classes with the Service suffix: UserService, OrderService, etc.",
265
+ ),
266
+ ))
267
+
268
+ return patterns
269
+
270
+ def _detect_error_handling_patterns(
271
+ self, files: dict[Path, str]
272
+ ) -> list[DetectedPattern]:
273
+ """Detect error handling conventions."""
274
+ patterns: list[DetectedPattern] = []
275
+
276
+ custom_exception_files = 0
277
+ bare_except_files = 0
278
+ custom_examples: list[str] = []
279
+ bare_examples: list[str] = []
280
+
281
+ for fp, content in files.items():
282
+ if fp.suffix != ".py":
283
+ continue
284
+
285
+ rel = str(fp.relative_to(self._project_root))
286
+ has_custom = bool(re.search(r"raise\s+\w+Error\(|raise\s+\w+Exception\(", content))
287
+ has_bare = bool(re.search(r"except\s*:", content))
288
+
289
+ if has_custom:
290
+ custom_exception_files += 1
291
+ custom_examples.append(rel)
292
+ if has_bare:
293
+ bare_except_files += 1
294
+ bare_examples.append(rel)
295
+
296
+ if custom_exception_files >= 3:
297
+ total = custom_exception_files + bare_except_files
298
+ confidence = custom_exception_files / max(total, 1)
299
+
300
+ patterns.append(DetectedPattern(
301
+ name="Custom exception handling",
302
+ description=f"{custom_exception_files} files use custom exceptions. {bare_except_files} use bare except.",
303
+ kind=RuleKind.PATTERN,
304
+ confidence=confidence,
305
+ occurrences=custom_exception_files,
306
+ violations=bare_except_files,
307
+ examples=custom_examples[:3],
308
+ counter_examples=bare_examples[:3],
309
+ suggested_rule=ContractRule(
310
+ id="auto-no-bare-except",
311
+ name="No bare except clauses",
312
+ description="This project uses custom exception classes. Avoid bare `except:` clauses.",
313
+ kind=RuleKind.PATTERN,
314
+ severity=ContractSeverity.WARNING,
315
+ pattern=r"except\s*:",
316
+ fix_suggestion="Catch specific exceptions: `except ValueError:` or `except CustomError:`",
317
+ ),
318
+ ))
319
+
320
+ return patterns
321
+
322
+ def _detect_file_organization(
323
+ self, files: dict[Path, str]
324
+ ) -> list[DetectedPattern]:
325
+ """Detect file organization patterns."""
326
+ patterns: list[DetectedPattern] = []
327
+
328
+ # Detect if tests mirror source structure
329
+ source_modules = set()
330
+ test_modules = set()
331
+
332
+ for fp in files:
333
+ rel = fp.relative_to(self._project_root)
334
+ parts = rel.parts
335
+
336
+ if any(p in ("tests", "test", "__tests__") for p in parts):
337
+ test_modules.add(fp.stem.replace("test_", "").replace("_test", ""))
338
+ elif fp.suffix == ".py" and not fp.stem.startswith("_"):
339
+ source_modules.add(fp.stem)
340
+
341
+ if source_modules and test_modules:
342
+ covered = source_modules & test_modules
343
+ coverage_pct = len(covered) / max(len(source_modules), 1)
344
+
345
+ if coverage_pct >= 0.3:
346
+ patterns.append(DetectedPattern(
347
+ name="Test coverage structure",
348
+ description=f"{len(covered)}/{len(source_modules)} source modules have matching test files ({coverage_pct:.0%})",
349
+ kind=RuleKind.ARCHITECTURAL,
350
+ confidence=coverage_pct,
351
+ occurrences=len(covered),
352
+ violations=len(source_modules) - len(covered),
353
+ examples=list(covered)[:5],
354
+ suggested_rule=ContractRule(
355
+ id="auto-test-coverage",
356
+ name="Every source module should have a test file",
357
+ description=f"Detected: {coverage_pct:.0%} of modules have tests. Maintain this coverage.",
358
+ kind=RuleKind.ARCHITECTURAL,
359
+ severity=ContractSeverity.INFO,
360
+ fix_suggestion="Create test_<module>.py for new modules.",
361
+ ),
362
+ ))
363
+
364
+ return patterns
365
+
366
+ def _detect_docstring_patterns(
367
+ self, files: dict[Path, str]
368
+ ) -> list[DetectedPattern]:
369
+ """Detect docstring conventions."""
370
+ patterns: list[DetectedPattern] = []
371
+
372
+ with_docstrings = 0
373
+ without_docstrings = 0
374
+
375
+ for fp, content in files.items():
376
+ if fp.suffix != ".py":
377
+ continue
378
+
379
+ # Count public functions with/without docstrings
380
+ funcs = re.findall(r'def\s+([a-z]\w+)\s*\(.*?\).*?:\s*\n(\s+""")?', content, re.DOTALL)
381
+ for name, docstring in funcs:
382
+ if name.startswith("_"):
383
+ continue
384
+ if docstring:
385
+ with_docstrings += 1
386
+ else:
387
+ without_docstrings += 1
388
+
389
+ total = with_docstrings + without_docstrings
390
+ if total >= 10:
391
+ confidence = with_docstrings / total
392
+ if confidence >= 0.6:
393
+ patterns.append(DetectedPattern(
394
+ name="Docstring convention",
395
+ description=f"{confidence:.0%} of public functions have docstrings",
396
+ kind=RuleKind.PATTERN,
397
+ confidence=confidence,
398
+ occurrences=with_docstrings,
399
+ violations=without_docstrings,
400
+ suggested_rule=ContractRule(
401
+ id="auto-docstrings",
402
+ name="Public functions should have docstrings",
403
+ description=f"Detected: {confidence:.0%} of public functions have docstrings. Maintain this.",
404
+ kind=RuleKind.PATTERN,
405
+ severity=ContractSeverity.INFO,
406
+ fix_suggestion="Add a one-line docstring explaining what the function does.",
407
+ ),
408
+ ))
409
+
410
+ return patterns
411
+
412
+ # -------------------------------------------------------------------
413
+ # Helpers
414
+ # -------------------------------------------------------------------
415
+
416
+ def _collect_source_files(self) -> dict[Path, str]:
417
+ """Collect all source files and their content."""
418
+ files: dict[Path, str] = {}
419
+ skip_dirs = {"node_modules", ".git", "__pycache__", ".venv", "venv", "dist", "build", ".tox"}
420
+
421
+ def _walk(directory: Path) -> None:
422
+ try:
423
+ for entry in sorted(directory.iterdir()):
424
+ if entry.is_dir():
425
+ if entry.name not in skip_dirs and not entry.name.startswith("."):
426
+ _walk(entry)
427
+ elif entry.is_file() and entry.suffix in (".py", ".ts", ".tsx", ".js", ".jsx"):
428
+ try:
429
+ content = entry.read_text(encoding="utf-8", errors="ignore")
430
+ if len(content) < 500_000: # Skip huge files
431
+ files[entry] = content
432
+ except OSError:
433
+ pass
434
+ except PermissionError:
435
+ pass
436
+
437
+ _walk(self._project_root)
438
+ return files