codebase-intel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. codebase_intel/__init__.py +3 -0
  2. codebase_intel/analytics/__init__.py +1 -0
  3. codebase_intel/analytics/benchmark.py +406 -0
  4. codebase_intel/analytics/feedback.py +496 -0
  5. codebase_intel/analytics/tracker.py +439 -0
  6. codebase_intel/cli/__init__.py +1 -0
  7. codebase_intel/cli/main.py +740 -0
  8. codebase_intel/contracts/__init__.py +1 -0
  9. codebase_intel/contracts/auto_generator.py +438 -0
  10. codebase_intel/contracts/evaluator.py +531 -0
  11. codebase_intel/contracts/models.py +433 -0
  12. codebase_intel/contracts/registry.py +225 -0
  13. codebase_intel/core/__init__.py +1 -0
  14. codebase_intel/core/config.py +248 -0
  15. codebase_intel/core/exceptions.py +454 -0
  16. codebase_intel/core/types.py +375 -0
  17. codebase_intel/decisions/__init__.py +1 -0
  18. codebase_intel/decisions/miner.py +297 -0
  19. codebase_intel/decisions/models.py +302 -0
  20. codebase_intel/decisions/store.py +411 -0
  21. codebase_intel/drift/__init__.py +1 -0
  22. codebase_intel/drift/detector.py +443 -0
  23. codebase_intel/graph/__init__.py +1 -0
  24. codebase_intel/graph/builder.py +391 -0
  25. codebase_intel/graph/parser.py +1232 -0
  26. codebase_intel/graph/query.py +377 -0
  27. codebase_intel/graph/storage.py +736 -0
  28. codebase_intel/mcp/__init__.py +1 -0
  29. codebase_intel/mcp/server.py +710 -0
  30. codebase_intel/orchestrator/__init__.py +1 -0
  31. codebase_intel/orchestrator/assembler.py +649 -0
  32. codebase_intel-0.1.0.dist-info/METADATA +361 -0
  33. codebase_intel-0.1.0.dist-info/RECORD +36 -0
  34. codebase_intel-0.1.0.dist-info/WHEEL +4 -0
  35. codebase_intel-0.1.0.dist-info/entry_points.txt +2 -0
  36. codebase_intel-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,411 @@
1
+ """Decision store — YAML-based persistence with query capabilities.
2
+
3
+ Decisions are stored as individual YAML files in the decisions directory.
4
+ This is intentional over SQLite because:
5
+ 1. YAML files are human-editable (developers can create/edit decisions directly)
6
+ 2. YAML files are git-friendly (diff, blame, merge work naturally)
7
+ 3. Individual files prevent merge conflicts (parallel decision creation)
8
+ 4. Files can be reviewed in PRs alongside the code they reference
9
+
10
+ Edge cases:
11
+ - YAML syntax error in a decision file: skip that file, report error, continue
12
+ - Multiple files with same decision ID: conflict — report and use the newer one
13
+ - Decision file modified externally while MCP server is running: detect via mtime
14
+ - Decision references a superseded decision that doesn't exist: orphaned chain
15
+ - Bulk operations (100+ decisions): batch loading with lazy parsing
16
+ - Unicode in decision content: YAML handles this natively
17
+ - Large decision files (someone put a design doc in the description): warn but allow
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import logging
23
+ from datetime import UTC, datetime
24
+ from pathlib import Path
25
+ from typing import TYPE_CHECKING
26
+
27
+ import yaml
28
+
29
+ from codebase_intel.core.exceptions import (
30
+ ContractParseError,
31
+ DecisionConflictError,
32
+ ErrorContext,
33
+ OrphanedDecisionError,
34
+ StaleDecisionError,
35
+ )
36
+ from codebase_intel.core.types import CodeAnchor, DecisionStatus, LineRange
37
+ from codebase_intel.decisions.models import (
38
+ AlternativeConsidered,
39
+ Constraint,
40
+ DecisionRecord,
41
+ )
42
+
43
+ if TYPE_CHECKING:
44
+ from codebase_intel.core.config import DecisionConfig
45
+
46
+ logger = logging.getLogger(__name__)
47
+
48
+
49
+ class DecisionStore:
50
+ """Manages decision records stored as YAML files."""
51
+
52
+ def __init__(self, config: DecisionConfig, project_root: Path) -> None:
53
+ self._config = config
54
+ self._project_root = project_root
55
+ self._cache: dict[str, DecisionRecord] = {}
56
+ self._cache_mtimes: dict[str, float] = {}
57
+
58
+ @property
59
+ def decisions_dir(self) -> Path:
60
+ return self._config.decisions_dir
61
+
62
+ async def load_all(self) -> list[DecisionRecord]:
63
+ """Load all decision records from the decisions directory.
64
+
65
+ Edge cases:
66
+ - Directory doesn't exist: return empty list (not initialized yet)
67
+ - Malformed YAML: skip that file, log error with filename
68
+ - Duplicate IDs: keep the most recently modified file, warn
69
+ - Empty YAML file: skip with warning
70
+ """
71
+ if not self.decisions_dir.exists():
72
+ return []
73
+
74
+ decisions: dict[str, DecisionRecord] = {}
75
+ errors: list[str] = []
76
+
77
+ for yaml_file in sorted(self.decisions_dir.glob("*.yaml")):
78
+ try:
79
+ record = self._load_file(yaml_file)
80
+ except Exception as exc:
81
+ errors.append(f"{yaml_file.name}: {exc}")
82
+ logger.warning("Failed to load decision file %s: %s", yaml_file, exc)
83
+ continue
84
+
85
+ if record is None:
86
+ continue
87
+
88
+ # Handle duplicate IDs
89
+ if record.id in decisions:
90
+ existing_file = self._find_file_for_id(record.id)
91
+ logger.warning(
92
+ "Duplicate decision ID '%s' in %s and %s — keeping newer",
93
+ record.id,
94
+ existing_file,
95
+ yaml_file,
96
+ )
97
+
98
+ decisions[record.id] = record
99
+ self._cache[record.id] = record
100
+ self._cache_mtimes[record.id] = yaml_file.stat().st_mtime
101
+
102
+ if errors:
103
+ logger.warning(
104
+ "%d decision files had errors: %s",
105
+ len(errors),
106
+ "; ".join(errors[:5]),
107
+ )
108
+
109
+ return list(decisions.values())
110
+
111
+ def _load_file(self, yaml_file: Path) -> DecisionRecord | None:
112
+ """Load a single decision YAML file.
113
+
114
+ Edge case: file exists but is empty → returns None.
115
+ Edge case: file has YAML but missing required fields → validation error.
116
+ """
117
+ content = yaml_file.read_text(encoding="utf-8")
118
+ if not content.strip():
119
+ return None
120
+
121
+ try:
122
+ data = yaml.safe_load(content)
123
+ except yaml.YAMLError as exc:
124
+ raise ContractParseError(
125
+ f"Invalid YAML in {yaml_file.name}: {exc}",
126
+ ErrorContext(file_path=yaml_file, operation="parse_decision"),
127
+ ) from exc
128
+
129
+ if not isinstance(data, dict):
130
+ raise ContractParseError(
131
+ f"Decision file {yaml_file.name} must contain a YAML mapping, got {type(data).__name__}",
132
+ ErrorContext(file_path=yaml_file),
133
+ )
134
+
135
+ # Parse code anchors from simplified YAML format
136
+ anchors = []
137
+ for anchor_data in data.get("code_anchors", []):
138
+ if isinstance(anchor_data, str):
139
+ # Shorthand: "src/middleware/rate_limiter.py:15-82"
140
+ anchors.append(self._parse_anchor_shorthand(anchor_data))
141
+ elif isinstance(anchor_data, dict):
142
+ anchors.append(CodeAnchor(
143
+ file_path=Path(anchor_data["file_path"]),
144
+ line_range=LineRange(**anchor_data["line_range"]) if "line_range" in anchor_data else None,
145
+ symbol_name=anchor_data.get("symbol_name"),
146
+ content_hash=anchor_data.get("content_hash"),
147
+ ))
148
+ data["code_anchors"] = anchors
149
+
150
+ # Parse alternatives
151
+ alternatives = []
152
+ for alt_data in data.get("alternatives", []):
153
+ if isinstance(alt_data, dict):
154
+ alternatives.append(AlternativeConsidered(**alt_data))
155
+ data["alternatives"] = alternatives
156
+
157
+ # Parse constraints
158
+ constraints = []
159
+ for c_data in data.get("constraints", []):
160
+ if isinstance(c_data, str):
161
+ constraints.append(Constraint(description=c_data, source="unknown"))
162
+ elif isinstance(c_data, dict):
163
+ constraints.append(Constraint(**c_data))
164
+ data["constraints"] = constraints
165
+
166
+ return DecisionRecord(**data)
167
+
168
+ def _parse_anchor_shorthand(self, shorthand: str) -> CodeAnchor:
169
+ """Parse shorthand anchor format: 'path/to/file.py:15-82' or 'path/to/file.py'.
170
+
171
+ Edge cases:
172
+ - No line range: anchor applies to entire file
173
+ - Single line: "file.py:42" → LineRange(start=42, end=42)
174
+ - Line range: "file.py:15-82" → LineRange(start=15, end=82)
175
+ - Path with colons (Windows, unlikely in POSIX): take last colon
176
+ """
177
+ if ":" in shorthand:
178
+ path_part, line_part = shorthand.rsplit(":", 1)
179
+ line_range = None
180
+ if "-" in line_part:
181
+ start, end = line_part.split("-", 1)
182
+ if start.isdigit() and end.isdigit():
183
+ line_range = LineRange(start=int(start), end=int(end))
184
+ elif line_part.isdigit():
185
+ line_no = int(line_part)
186
+ line_range = LineRange(start=line_no, end=line_no)
187
+ else:
188
+ path_part = shorthand
189
+ return CodeAnchor(file_path=Path(path_part), line_range=line_range)
190
+
191
+ return CodeAnchor(file_path=Path(shorthand))
192
+
193
+ async def get(self, decision_id: str) -> DecisionRecord | None:
194
+ """Get a single decision by ID.
195
+
196
+ Edge case: cached record might be stale if file was edited externally.
197
+ Check mtime before returning cached value.
198
+ """
199
+ if decision_id in self._cache:
200
+ # Check if file was modified since cache
201
+ file_path = self._find_file_for_id(decision_id)
202
+ if file_path and file_path.exists():
203
+ current_mtime = file_path.stat().st_mtime
204
+ cached_mtime = self._cache_mtimes.get(decision_id, 0)
205
+ if current_mtime > cached_mtime:
206
+ # Reload
207
+ record = self._load_file(file_path)
208
+ if record:
209
+ self._cache[decision_id] = record
210
+ self._cache_mtimes[decision_id] = current_mtime
211
+ return record
212
+
213
+ return self._cache[decision_id]
214
+
215
+ # Not in cache — scan directory
216
+ all_decisions = await self.load_all()
217
+ return self._cache.get(decision_id)
218
+
219
+ async def save(self, record: DecisionRecord) -> Path:
220
+ """Save a decision record to a YAML file.
221
+
222
+ Filename convention: DEC-042.yaml (based on ID).
223
+
224
+ Edge case: ID contains characters invalid in filenames.
225
+ We sanitize but keep the mapping clear.
226
+ """
227
+ self.decisions_dir.mkdir(parents=True, exist_ok=True)
228
+
229
+ filename = self._id_to_filename(record.id)
230
+ file_path = self.decisions_dir / filename
231
+
232
+ data = self._record_to_yaml_dict(record)
233
+ content = yaml.dump(
234
+ data,
235
+ default_flow_style=False,
236
+ sort_keys=False,
237
+ allow_unicode=True,
238
+ width=100,
239
+ )
240
+
241
+ file_path.write_text(content, encoding="utf-8")
242
+
243
+ self._cache[record.id] = record
244
+ self._cache_mtimes[record.id] = file_path.stat().st_mtime
245
+
246
+ logger.info("Saved decision %s to %s", record.id, file_path)
247
+ return file_path
248
+
249
+ async def query_by_files(
250
+ self,
251
+ file_paths: set[Path],
252
+ min_relevance: float = 0.1,
253
+ ) -> list[tuple[DecisionRecord, float]]:
254
+ """Find decisions relevant to a set of files, scored by relevance.
255
+
256
+ Returns decisions sorted by relevance (highest first).
257
+
258
+ Edge cases:
259
+ - No decisions exist: return empty list
260
+ - All decisions are stale: still return them (agent should know about
261
+ stale decisions to avoid contradicting them)
262
+ - Decision anchored to renamed file: relevance_score is 0 for the new
263
+ path, but drift detector will flag this
264
+ """
265
+ all_decisions = await self.load_all()
266
+ scored: list[tuple[DecisionRecord, float]] = []
267
+
268
+ for record in all_decisions:
269
+ if record.status in (DecisionStatus.SUPERSEDED, DecisionStatus.EXPIRED):
270
+ continue
271
+ score = record.relevance_score(file_paths)
272
+ if score >= min_relevance:
273
+ scored.append((record, score))
274
+
275
+ return sorted(scored, key=lambda x: x[1], reverse=True)
276
+
277
+ async def query_by_tags(self, tags: list[str]) -> list[DecisionRecord]:
278
+ """Find decisions matching any of the given tags."""
279
+ all_decisions = await self.load_all()
280
+ tag_set = set(tags)
281
+ return [d for d in all_decisions if set(d.tags) & tag_set]
282
+
283
+ async def find_conflicts(self) -> list[tuple[DecisionRecord, DecisionRecord, str]]:
284
+ """Detect conflicting active decisions.
285
+
286
+ Conflict detection heuristics:
287
+ 1. Two active decisions anchored to the same code region
288
+ 2. A decision and its superseded predecessor both marked active
289
+ 3. Decisions with contradictory constraints (manual tagging required)
290
+
291
+ Edge case: decisions may "look" conflicting (same file, different regions)
292
+ but actually address different concerns. We flag potential conflicts
293
+ and let humans/agents resolve.
294
+
295
+ Returns: list of (decision_a, decision_b, conflict_reason) tuples.
296
+ """
297
+ all_decisions = await self.load_all()
298
+ active = [d for d in all_decisions if d.status == DecisionStatus.ACTIVE]
299
+ conflicts: list[tuple[DecisionRecord, DecisionRecord, str]] = []
300
+
301
+ # Check for supersedes chain conflicts
302
+ superseded_ids = {d.supersedes for d in active if d.supersedes}
303
+ for decision in active:
304
+ if decision.id in superseded_ids:
305
+ superseder = next(
306
+ (d for d in active if d.supersedes == decision.id), None
307
+ )
308
+ if superseder:
309
+ conflicts.append((
310
+ decision,
311
+ superseder,
312
+ f"{superseder.id} supersedes {decision.id} but both are active",
313
+ ))
314
+
315
+ # Check for overlapping code anchors
316
+ for i, d1 in enumerate(active):
317
+ for d2 in active[i + 1 :]:
318
+ overlap = self._check_anchor_overlap(d1, d2)
319
+ if overlap:
320
+ conflicts.append((d1, d2, overlap))
321
+
322
+ return conflicts
323
+
324
+ def _check_anchor_overlap(
325
+ self, d1: DecisionRecord, d2: DecisionRecord
326
+ ) -> str | None:
327
+ """Check if two decisions have overlapping code anchors.
328
+
329
+ Edge case: same file but different line ranges — only flag if ranges
330
+ actually overlap, not just because they're in the same file.
331
+ """
332
+ for a1 in d1.code_anchors:
333
+ for a2 in d2.code_anchors:
334
+ if a1.file_path.resolve() != a2.file_path.resolve():
335
+ continue
336
+
337
+ # Same file — check line ranges
338
+ if a1.line_range is None or a2.line_range is None:
339
+ # At least one anchors the whole file — potential overlap
340
+ return (
341
+ f"Both anchor to {a1.file_path.name} "
342
+ f"(check if they address different concerns)"
343
+ )
344
+
345
+ # Check actual overlap
346
+ if (
347
+ a1.line_range.start <= a2.line_range.end
348
+ and a2.line_range.start <= a1.line_range.end
349
+ ):
350
+ return (
351
+ f"Overlapping code regions in {a1.file_path.name}: "
352
+ f"lines {a1.line_range.start}-{a1.line_range.end} "
353
+ f"and {a2.line_range.start}-{a2.line_range.end}"
354
+ )
355
+
356
+ return None
357
+
358
+ async def next_id(self) -> str:
359
+ """Generate the next decision ID.
360
+
361
+ Format: DEC-NNN with zero-padded 3-digit number.
362
+ Edge case: gaps in numbering (DEC-005 deleted) — we don't reuse IDs,
363
+ always increment from the highest existing ID.
364
+ """
365
+ all_decisions = await self.load_all()
366
+ max_num = 0
367
+ for d in all_decisions:
368
+ if d.id.startswith("DEC-"):
369
+ try:
370
+ num = int(d.id.split("-", 1)[1])
371
+ max_num = max(max_num, num)
372
+ except (ValueError, IndexError):
373
+ pass
374
+ return f"DEC-{max_num + 1:03d}"
375
+
376
+ # -------------------------------------------------------------------
377
+ # Private helpers
378
+ # -------------------------------------------------------------------
379
+
380
+ def _id_to_filename(self, decision_id: str) -> str:
381
+ """Convert decision ID to a safe filename."""
382
+ safe = decision_id.replace("/", "_").replace("\\", "_").replace(" ", "_")
383
+ return f"{safe}.yaml"
384
+
385
+ def _find_file_for_id(self, decision_id: str) -> Path | None:
386
+ """Find the YAML file for a decision ID."""
387
+ filename = self._id_to_filename(decision_id)
388
+ file_path = self.decisions_dir / filename
389
+ return file_path if file_path.exists() else None
390
+
391
+ def _record_to_yaml_dict(self, record: DecisionRecord) -> dict:
392
+ """Convert a DecisionRecord to a YAML-friendly dict.
393
+
394
+ Edge case: datetime objects → ISO format strings for YAML.
395
+ Edge case: Path objects → relative strings for portability.
396
+ """
397
+ data = record.model_dump(mode="json")
398
+
399
+ # Convert code anchors to shorthand format for readability
400
+ if data.get("code_anchors"):
401
+ anchors = []
402
+ for anchor in data["code_anchors"]:
403
+ fp = anchor["file_path"]
404
+ lr = anchor.get("line_range")
405
+ if lr:
406
+ anchors.append(f"{fp}:{lr['start']}-{lr['end']}")
407
+ else:
408
+ anchors.append(fp)
409
+ data["code_anchors"] = anchors
410
+
411
+ return data
@@ -0,0 +1 @@
1
+ """Drift detection — monitors staleness across all components."""