codebase-intel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. codebase_intel/__init__.py +3 -0
  2. codebase_intel/analytics/__init__.py +1 -0
  3. codebase_intel/analytics/benchmark.py +406 -0
  4. codebase_intel/analytics/feedback.py +496 -0
  5. codebase_intel/analytics/tracker.py +439 -0
  6. codebase_intel/cli/__init__.py +1 -0
  7. codebase_intel/cli/main.py +740 -0
  8. codebase_intel/contracts/__init__.py +1 -0
  9. codebase_intel/contracts/auto_generator.py +438 -0
  10. codebase_intel/contracts/evaluator.py +531 -0
  11. codebase_intel/contracts/models.py +433 -0
  12. codebase_intel/contracts/registry.py +225 -0
  13. codebase_intel/core/__init__.py +1 -0
  14. codebase_intel/core/config.py +248 -0
  15. codebase_intel/core/exceptions.py +454 -0
  16. codebase_intel/core/types.py +375 -0
  17. codebase_intel/decisions/__init__.py +1 -0
  18. codebase_intel/decisions/miner.py +297 -0
  19. codebase_intel/decisions/models.py +302 -0
  20. codebase_intel/decisions/store.py +411 -0
  21. codebase_intel/drift/__init__.py +1 -0
  22. codebase_intel/drift/detector.py +443 -0
  23. codebase_intel/graph/__init__.py +1 -0
  24. codebase_intel/graph/builder.py +391 -0
  25. codebase_intel/graph/parser.py +1232 -0
  26. codebase_intel/graph/query.py +377 -0
  27. codebase_intel/graph/storage.py +736 -0
  28. codebase_intel/mcp/__init__.py +1 -0
  29. codebase_intel/mcp/server.py +710 -0
  30. codebase_intel/orchestrator/__init__.py +1 -0
  31. codebase_intel/orchestrator/assembler.py +649 -0
  32. codebase_intel-0.1.0.dist-info/METADATA +361 -0
  33. codebase_intel-0.1.0.dist-info/RECORD +36 -0
  34. codebase_intel-0.1.0.dist-info/WHEEL +4 -0
  35. codebase_intel-0.1.0.dist-info/entry_points.txt +2 -0
  36. codebase_intel-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,443 @@
1
+ """Drift detector — identifies staleness, pattern violations, and knowledge decay.
2
+
3
+ Drift is the gradual divergence between what the system "knows" (decisions,
4
+ contracts, graph state) and what the code actually is. Drift is inevitable —
5
+ code changes constantly. The detector's job is to surface drift before it
6
+ causes problems (like an agent acting on outdated context).
7
+
8
+ Types of drift detected:
9
+ 1. Decision drift: code changed but decision anchors still point to old locations
10
+ 2. Contract drift: code violates contracts that it previously satisfied
11
+ 3. Graph drift: graph is stale (files changed since last index)
12
+ 4. Context rot: >30% of records are stale (systemic problem, not individual)
13
+
14
+ Edge cases:
15
+ - False positive from refactor: file moved but logic is the same → content hash
16
+ matching prevents false positives (same hash at new path = rename, not violation)
17
+ - Intentional drift: team decided to violate a contract temporarily → migration
18
+ deadlines in contracts handle this gracefully
19
+ - Large PR with many changes: drift check shouldn't block the workflow. Run async
20
+ and report non-blocking warnings.
21
+ - New developer joins: lots of "new" code the system hasn't seen → initial noise,
22
+ settles after first re-index
23
+ - Gradual drift vs sudden drift: gradual (1 file/week drifts) vs sudden (major
24
+ refactor invalidates 40% of records). Different responses needed.
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import logging
30
+ from dataclasses import dataclass, field
31
+ from datetime import UTC, datetime
32
+ from pathlib import Path
33
+ from typing import TYPE_CHECKING
34
+
35
+ from codebase_intel.core.exceptions import ContextRotError, ErrorContext
36
+ from codebase_intel.core.types import DriftLevel
37
+
38
+ if TYPE_CHECKING:
39
+ from codebase_intel.core.config import DriftConfig, ProjectConfig
40
+ from codebase_intel.decisions.store import DecisionStore
41
+ from codebase_intel.graph.storage import GraphStorage
42
+
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+
47
+ @dataclass
48
+ class DriftItem:
49
+ """A single instance of detected drift."""
50
+
51
+ component: str # "decision", "contract", "graph"
52
+ level: DriftLevel
53
+ description: str
54
+ file_path: Path | None = None
55
+ record_id: str | None = None
56
+ remediation: str = ""
57
+
58
+
59
+ @dataclass
60
+ class DriftReport:
61
+ """Complete drift analysis report."""
62
+
63
+ items: list[DriftItem] = field(default_factory=list)
64
+ checked_at: datetime = field(default_factory=lambda: datetime.now(UTC))
65
+ graph_stale_files: int = 0
66
+ decision_stale_count: int = 0
67
+ decision_orphaned_count: int = 0
68
+ decision_total: int = 0
69
+ rot_detected: bool = False
70
+ rot_percentage: float = 0.0
71
+
72
+ @property
73
+ def overall_level(self) -> DriftLevel:
74
+ """The most severe drift level across all items."""
75
+ if not self.items:
76
+ return DriftLevel.NONE
77
+ levels = [i.level for i in self.items]
78
+ severity = [DriftLevel.NONE, DriftLevel.LOW, DriftLevel.MEDIUM, DriftLevel.HIGH, DriftLevel.CRITICAL]
79
+ return max(levels, key=lambda l: severity.index(l))
80
+
81
+ @property
82
+ def summary(self) -> str:
83
+ """One-line summary for CLI/MCP output."""
84
+ if not self.items:
85
+ return "No drift detected. All records are current."
86
+ counts = {}
87
+ for item in self.items:
88
+ counts[item.level.value] = counts.get(item.level.value, 0) + 1
89
+ parts = [f"{v} {k}" for k, v in sorted(counts.items())]
90
+ return f"Drift detected: {', '.join(parts)}"
91
+
92
+ def to_context_string(self) -> str:
93
+ """Serialize for inclusion in agent context."""
94
+ if not self.items:
95
+ return ""
96
+
97
+ lines = [
98
+ "## Drift Warnings",
99
+ f"Overall status: {self.overall_level.value}",
100
+ "",
101
+ ]
102
+
103
+ # Group by component
104
+ by_component: dict[str, list[DriftItem]] = {}
105
+ for item in self.items:
106
+ by_component.setdefault(item.component, []).append(item)
107
+
108
+ for component, items in by_component.items():
109
+ lines.append(f"### {component.title()}")
110
+ for item in items[:10]: # Cap per component
111
+ lines.append(f"- [{item.level.value}] {item.description}")
112
+ if item.remediation:
113
+ lines.append(f" → {item.remediation}")
114
+ if len(items) > 10:
115
+ lines.append(f" ... and {len(items) - 10} more")
116
+ lines.append("")
117
+
118
+ if self.rot_detected:
119
+ lines.append(
120
+ f"**CONTEXT ROT ALERT**: {self.rot_percentage:.0%} of decision records "
121
+ f"are stale. Consider running `codebase-intel refresh`."
122
+ )
123
+
124
+ return "\n".join(lines)
125
+
126
+
127
+ class DriftDetector:
128
+ """Detects drift between recorded knowledge and actual code state."""
129
+
130
+ def __init__(
131
+ self,
132
+ config: DriftConfig,
133
+ project_root: Path,
134
+ graph_storage: GraphStorage | None = None,
135
+ decision_store: DecisionStore | None = None,
136
+ ) -> None:
137
+ self._config = config
138
+ self._project_root = project_root
139
+ self._graph = graph_storage
140
+ self._decisions = decision_store
141
+
142
+ async def full_check(self) -> DriftReport:
143
+ """Run a comprehensive drift check across all components.
144
+
145
+ This is the main entry point — called by CLI and post-commit hook.
146
+ """
147
+ report = DriftReport()
148
+
149
+ # Check each component independently
150
+ if self._graph:
151
+ await self._check_graph_drift(report)
152
+
153
+ if self._decisions:
154
+ await self._check_decision_drift(report)
155
+
156
+ # Check for context rot (systemic staleness)
157
+ self._check_context_rot(report)
158
+
159
+ logger.info(
160
+ "Drift check complete: %d items, overall=%s",
161
+ len(report.items),
162
+ report.overall_level.value,
163
+ )
164
+
165
+ return report
166
+
167
+ async def check_files(self, changed_files: list[Path]) -> DriftReport:
168
+ """Quick drift check focused on specific changed files.
169
+
170
+ Used by git hooks — only checks drift related to the changed files,
171
+ not the entire project. Much faster than full_check.
172
+
173
+ Edge cases:
174
+ - Changed file has no decisions: no decision drift (valid)
175
+ - Changed file is not in graph: graph drift (needs re-index)
176
+ - Changed file was deleted: orphan check on decisions anchored to it
177
+ """
178
+ report = DriftReport()
179
+
180
+ if self._graph:
181
+ await self._check_graph_drift_for_files(report, changed_files)
182
+
183
+ if self._decisions:
184
+ await self._check_decision_drift_for_files(report, changed_files)
185
+
186
+ return report
187
+
188
+ # -------------------------------------------------------------------
189
+ # Graph drift
190
+ # -------------------------------------------------------------------
191
+
192
+ async def _check_graph_drift(self, report: DriftReport) -> None:
193
+ """Check if the code graph is stale.
194
+
195
+ Compares stored fingerprints against current file state.
196
+ Files that changed since last index are flagged.
197
+
198
+ Edge case: new files (not in graph at all) are not drift —
199
+ they're additions. Only flag files whose content changed.
200
+ """
201
+ from codebase_intel.graph.parser import compute_file_hash
202
+
203
+ stale_count = 0
204
+ cursor = await self._graph._db.execute( # type: ignore[union-attr]
205
+ "SELECT file_path, content_hash FROM file_fingerprints"
206
+ )
207
+ for row in await cursor.fetchall():
208
+ stored_path = row[0]
209
+ stored_hash = row[1]
210
+ full_path = self._graph._from_stored_path(stored_path) # type: ignore[union-attr]
211
+
212
+ if not full_path.exists():
213
+ report.items.append(DriftItem(
214
+ component="graph",
215
+ level=DriftLevel.HIGH,
216
+ description=f"File {stored_path} was deleted but still in graph",
217
+ file_path=full_path,
218
+ remediation="Run `codebase-intel analyze` to update the graph",
219
+ ))
220
+ stale_count += 1
221
+ continue
222
+
223
+ try:
224
+ current_hash = compute_file_hash(full_path.read_bytes())
225
+ except OSError:
226
+ continue
227
+
228
+ if current_hash != stored_hash:
229
+ report.items.append(DriftItem(
230
+ component="graph",
231
+ level=DriftLevel.MEDIUM,
232
+ description=f"File {stored_path} changed since last index",
233
+ file_path=full_path,
234
+ remediation="Run `codebase-intel analyze --incremental`",
235
+ ))
236
+ stale_count += 1
237
+
238
+ report.graph_stale_files = stale_count
239
+
240
+ async def _check_graph_drift_for_files(
241
+ self, report: DriftReport, files: list[Path]
242
+ ) -> None:
243
+ """Quick graph drift check for specific files."""
244
+ from codebase_intel.graph.parser import compute_file_hash
245
+
246
+ for fp in files:
247
+ if not fp.exists():
248
+ continue
249
+
250
+ stored_hash = await self._graph.get_fingerprint(fp) # type: ignore[union-attr]
251
+ if stored_hash is None:
252
+ # New file — not drift, just needs indexing
253
+ report.items.append(DriftItem(
254
+ component="graph",
255
+ level=DriftLevel.LOW,
256
+ description=f"New file {fp.name} not yet in graph",
257
+ file_path=fp,
258
+ remediation="Will be indexed on next build",
259
+ ))
260
+ continue
261
+
262
+ current_hash = compute_file_hash(fp.read_bytes())
263
+ if current_hash != stored_hash:
264
+ report.graph_stale_files += 1
265
+
266
+ # -------------------------------------------------------------------
267
+ # Decision drift
268
+ # -------------------------------------------------------------------
269
+
270
+ async def _check_decision_drift(self, report: DriftReport) -> None:
271
+ """Check all decisions for staleness and orphaning.
272
+
273
+ Three types of decision drift:
274
+ 1. Stale: past review_by date
275
+ 2. Orphaned: code anchors point to deleted files
276
+ 3. Content drift: anchored code changed significantly (hash mismatch)
277
+
278
+ Edge case: decision with no code anchors is never orphaned
279
+ (it's an org-level decision, always somewhat relevant).
280
+
281
+ Edge case: decision that's both stale AND orphaned → report as
282
+ CRITICAL drift (double signal that this decision needs attention).
283
+ """
284
+ all_decisions = await self._decisions.load_all() # type: ignore[union-attr]
285
+ report.decision_total = len(all_decisions)
286
+ stale_count = 0
287
+ orphaned_count = 0
288
+
289
+ for record in all_decisions:
290
+ if record.status != "active":
291
+ continue
292
+
293
+ # Check staleness
294
+ if record.is_stale:
295
+ level = DriftLevel.MEDIUM
296
+ stale_count += 1
297
+ report.items.append(DriftItem(
298
+ component="decision",
299
+ level=level,
300
+ description=(
301
+ f"Decision {record.id} ('{record.title}') is past "
302
+ f"its review date ({record.review_by})"
303
+ ),
304
+ record_id=record.id,
305
+ remediation="Review and update the decision, or extend the review date",
306
+ ))
307
+
308
+ # Check for expired constraints
309
+ if record.is_expired:
310
+ report.items.append(DriftItem(
311
+ component="decision",
312
+ level=DriftLevel.HIGH,
313
+ description=(
314
+ f"Decision {record.id} has expired constraints — "
315
+ f"the original rationale may no longer apply"
316
+ ),
317
+ record_id=record.id,
318
+ remediation="Re-evaluate whether this decision still holds",
319
+ ))
320
+
321
+ # Check for orphaned anchors
322
+ if record.has_orphaned_anchors:
323
+ orphaned_count += 1
324
+ orphaned_paths = [
325
+ str(a.file_path)
326
+ for a in record.code_anchors
327
+ if not a.file_path.exists()
328
+ ]
329
+ level = DriftLevel.HIGH if record.is_stale else DriftLevel.MEDIUM
330
+
331
+ report.items.append(DriftItem(
332
+ component="decision",
333
+ level=level,
334
+ description=(
335
+ f"Decision {record.id} anchored to deleted files: "
336
+ f"{', '.join(orphaned_paths[:3])}"
337
+ ),
338
+ record_id=record.id,
339
+ remediation="Update code anchors to new file locations or supersede the decision",
340
+ ))
341
+
342
+ # Check content hash drift on remaining anchors
343
+ for anchor in record.code_anchors:
344
+ if not anchor.file_path.exists() or not anchor.content_hash:
345
+ continue
346
+
347
+ try:
348
+ content = anchor.file_path.read_text(encoding="utf-8")
349
+ if anchor.line_range:
350
+ lines = content.split("\n")
351
+ region = "\n".join(
352
+ lines[anchor.line_range.start - 1 : anchor.line_range.end]
353
+ )
354
+ else:
355
+ region = content
356
+
357
+ from codebase_intel.graph.parser import compute_file_hash
358
+ current_hash = compute_file_hash(region.encode())
359
+
360
+ if current_hash != anchor.content_hash:
361
+ report.items.append(DriftItem(
362
+ component="decision",
363
+ level=DriftLevel.MEDIUM,
364
+ description=(
365
+ f"Code anchored by {record.id} at "
366
+ f"{anchor.file_path.name} has changed"
367
+ ),
368
+ file_path=anchor.file_path,
369
+ record_id=record.id,
370
+ remediation="Verify the decision still applies to the changed code",
371
+ ))
372
+ except OSError:
373
+ pass
374
+
375
+ report.decision_stale_count = stale_count
376
+ report.decision_orphaned_count = orphaned_count
377
+
378
+ async def _check_decision_drift_for_files(
379
+ self, report: DriftReport, files: list[Path]
380
+ ) -> None:
381
+ """Quick decision drift check for specific changed files."""
382
+ file_set = {fp.resolve() for fp in files}
383
+ all_decisions = await self._decisions.load_all() # type: ignore[union-attr]
384
+
385
+ for record in all_decisions:
386
+ if record.status != "active":
387
+ continue
388
+
389
+ for anchor in record.code_anchors:
390
+ if anchor.file_path.resolve() in file_set:
391
+ report.items.append(DriftItem(
392
+ component="decision",
393
+ level=DriftLevel.LOW,
394
+ description=(
395
+ f"Changed file {anchor.file_path.name} is anchored "
396
+ f"by decision {record.id}"
397
+ ),
398
+ file_path=anchor.file_path,
399
+ record_id=record.id,
400
+ remediation="Verify the decision still applies after your changes",
401
+ ))
402
+
403
+ # -------------------------------------------------------------------
404
+ # Context rot detection
405
+ # -------------------------------------------------------------------
406
+
407
+ def _check_context_rot(self, report: DriftReport) -> None:
408
+ """Detect systemic context rot — when too many records are stale.
409
+
410
+ Context rot is qualitatively different from individual drift items.
411
+ It means the knowledge base as a whole is unreliable and needs a
412
+ bulk review, not item-by-item fixing.
413
+
414
+ Edge case: project with only 2 decisions, 1 is stale → 50% rot.
415
+ But that's not really "systemic." We require a minimum of 5 decisions
416
+ before triggering the rot alert.
417
+
418
+ Edge case: rot threshold is configurable. Default 30% — adjustable
419
+ for projects that tolerate more staleness.
420
+ """
421
+ total = report.decision_total
422
+ stale = report.decision_stale_count + report.decision_orphaned_count
423
+
424
+ if total < 5:
425
+ return # Too few decisions to assess rot
426
+
427
+ rot_pct = stale / total
428
+ report.rot_percentage = rot_pct
429
+
430
+ if rot_pct >= self._config.rot_threshold_pct:
431
+ report.rot_detected = True
432
+ report.items.append(DriftItem(
433
+ component="system",
434
+ level=DriftLevel.CRITICAL,
435
+ description=(
436
+ f"Context rot: {rot_pct:.0%} of decision records are stale or orphaned "
437
+ f"({stale}/{total})"
438
+ ),
439
+ remediation=(
440
+ "Run `codebase-intel refresh` to identify and update stale records. "
441
+ "Consider a team review session for bulk cleanup."
442
+ ),
443
+ ))
@@ -0,0 +1 @@
1
+ """Semantic Code Graph — AST parsing, dependency mapping, impact analysis."""