codebase-intel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. codebase_intel/__init__.py +3 -0
  2. codebase_intel/analytics/__init__.py +1 -0
  3. codebase_intel/analytics/benchmark.py +406 -0
  4. codebase_intel/analytics/feedback.py +496 -0
  5. codebase_intel/analytics/tracker.py +439 -0
  6. codebase_intel/cli/__init__.py +1 -0
  7. codebase_intel/cli/main.py +740 -0
  8. codebase_intel/contracts/__init__.py +1 -0
  9. codebase_intel/contracts/auto_generator.py +438 -0
  10. codebase_intel/contracts/evaluator.py +531 -0
  11. codebase_intel/contracts/models.py +433 -0
  12. codebase_intel/contracts/registry.py +225 -0
  13. codebase_intel/core/__init__.py +1 -0
  14. codebase_intel/core/config.py +248 -0
  15. codebase_intel/core/exceptions.py +454 -0
  16. codebase_intel/core/types.py +375 -0
  17. codebase_intel/decisions/__init__.py +1 -0
  18. codebase_intel/decisions/miner.py +297 -0
  19. codebase_intel/decisions/models.py +302 -0
  20. codebase_intel/decisions/store.py +411 -0
  21. codebase_intel/drift/__init__.py +1 -0
  22. codebase_intel/drift/detector.py +443 -0
  23. codebase_intel/graph/__init__.py +1 -0
  24. codebase_intel/graph/builder.py +391 -0
  25. codebase_intel/graph/parser.py +1232 -0
  26. codebase_intel/graph/query.py +377 -0
  27. codebase_intel/graph/storage.py +736 -0
  28. codebase_intel/mcp/__init__.py +1 -0
  29. codebase_intel/mcp/server.py +710 -0
  30. codebase_intel/orchestrator/__init__.py +1 -0
  31. codebase_intel/orchestrator/assembler.py +649 -0
  32. codebase_intel-0.1.0.dist-info/METADATA +361 -0
  33. codebase_intel-0.1.0.dist-info/RECORD +36 -0
  34. codebase_intel-0.1.0.dist-info/WHEEL +4 -0
  35. codebase_intel-0.1.0.dist-info/entry_points.txt +2 -0
  36. codebase_intel-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,302 @@
1
+ """Decision record models — the schema for capturing "why" in a machine-queryable format.
2
+
3
+ A decision record captures:
4
+ - What was decided
5
+ - Why (context, constraints, alternatives rejected)
6
+ - Where in the code it applies (code anchors)
7
+ - When it was made and when it should be reviewed
8
+ - What other decisions it relates to
9
+
10
+ Edge cases in schema design:
11
+ - Decision without code anchors: valid for org-level decisions ("we use Python 3.11+")
12
+ - Decision with anchors to deleted code: orphaned but possibly still relevant
13
+ - Decision that supersedes another: chain must be traversable
14
+ - Decision with conflicting constraints: e.g., "fast response" vs "complete validation"
15
+ - Decision with expiry date: temporary decisions ("use workaround until library v2")
16
+ - Cross-repo decisions: link format must support external repos
17
+ - Decision authored by an AI agent: must be flagged for human review
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ from datetime import UTC, datetime
23
+ from enum import Enum
24
+ from pathlib import Path
25
+ from typing import Any
26
+
27
+ from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
28
+
29
+ from codebase_intel.core.types import CodeAnchor, DecisionStatus
30
+
31
+
32
+ class AlternativeConsidered(BaseModel):
33
+ """An option that was evaluated but not chosen.
34
+
35
+ Recording rejected alternatives is critical — it prevents future
36
+ developers (and AI agents) from proposing solutions that were
37
+ already evaluated and dismissed.
38
+ """
39
+
40
+ model_config = ConfigDict(frozen=True)
41
+
42
+ name: str
43
+ description: str
44
+ rejection_reason: str
45
+ was_prototyped: bool = False # Higher signal if it was actually tried
46
+
47
+
48
+ class Constraint(BaseModel):
49
+ """A constraint that influenced the decision.
50
+
51
+ Edge cases:
52
+ - Compliance constraint: "GDPR requires data encryption at rest"
53
+ → source=legal, is_hard=True
54
+ - Performance constraint: "p99 < 200ms for this endpoint"
55
+ → source=sla, is_hard=True
56
+ - Preference: "team prefers explicit over magic"
57
+ → source=team, is_hard=False
58
+ - Temporary constraint: "budget freeze until Q3"
59
+ → has expiry_date
60
+ """
61
+
62
+ model_config = ConfigDict(frozen=True)
63
+
64
+ description: str
65
+ source: str = Field(
66
+ description="Where this constraint comes from: legal, sla, team, business, technical"
67
+ )
68
+ is_hard: bool = Field(
69
+ default=True,
70
+ description="Hard constraints can't be violated. Soft constraints are preferences.",
71
+ )
72
+ expiry_date: datetime | None = Field(
73
+ default=None,
74
+ description="If set, constraint is temporary and should be re-evaluated after this date",
75
+ )
76
+
77
+ @field_validator("expiry_date")
78
+ @classmethod
79
+ def ensure_utc(cls, v: datetime | None) -> datetime | None:
80
+ if v is None:
81
+ return None
82
+ if v.tzinfo is None:
83
+ return v.replace(tzinfo=UTC)
84
+ return v.astimezone(UTC)
85
+
86
+
87
+ class DecisionRecord(BaseModel):
88
+ """A single architectural or business decision.
89
+
90
+ This is the core data structure of the Decision Journal.
91
+ It's designed to be:
92
+ - Human-writable (YAML-friendly)
93
+ - Machine-queryable (structured fields)
94
+ - Code-linked (anchored to specific locations)
95
+ - Temporal (has a lifecycle with review dates)
96
+ """
97
+
98
+ model_config = ConfigDict(frozen=True)
99
+
100
+ # --- Identity ---
101
+ id: str = Field(description="Unique ID, e.g., 'DEC-042'")
102
+ title: str = Field(description="Short summary, e.g., 'Use token bucket for rate limiting'")
103
+ status: DecisionStatus = DecisionStatus.ACTIVE
104
+
105
+ # --- Context ---
106
+ context: str = Field(
107
+ description="The situation that prompted this decision — what problem were we solving?"
108
+ )
109
+ decision: str = Field(
110
+ description="What was decided — the actual choice made"
111
+ )
112
+ consequences: list[str] = Field(
113
+ default_factory=list,
114
+ description="Known consequences (positive and negative) of this decision",
115
+ )
116
+ alternatives: list[AlternativeConsidered] = Field(
117
+ default_factory=list,
118
+ description="Options that were evaluated and rejected",
119
+ )
120
+ constraints: list[Constraint] = Field(
121
+ default_factory=list,
122
+ description="Constraints that shaped this decision",
123
+ )
124
+
125
+ # --- Code linking ---
126
+ code_anchors: list[CodeAnchor] = Field(
127
+ default_factory=list,
128
+ description="Specific code locations this decision applies to",
129
+ )
130
+
131
+ # --- Relationships ---
132
+ supersedes: str | None = Field(
133
+ default=None,
134
+ description="ID of the decision this one replaces",
135
+ )
136
+ related_decisions: list[str] = Field(
137
+ default_factory=list,
138
+ description="IDs of related (but not superseded) decisions",
139
+ )
140
+ tags: list[str] = Field(
141
+ default_factory=list,
142
+ description="Categorization tags: 'architecture', 'security', 'performance', etc.",
143
+ )
144
+
145
+ # --- Temporal ---
146
+ created_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
147
+ review_by: datetime | None = Field(
148
+ default=None,
149
+ description="Date by which this decision should be reviewed for continued relevance",
150
+ )
151
+ last_validated: datetime | None = Field(
152
+ default=None,
153
+ description="Last time someone confirmed this decision still applies",
154
+ )
155
+
156
+ # --- Provenance ---
157
+ author: str = Field(
158
+ default="unknown",
159
+ description="Who made this decision (person or team name)",
160
+ )
161
+ source: str = Field(
162
+ default="manual",
163
+ description="How this record was created: manual, git-mined, ai-suggested",
164
+ )
165
+ source_ref: str | None = Field(
166
+ default=None,
167
+ description="Reference to source: PR URL, commit hash, meeting notes link",
168
+ )
169
+ confidence: float = Field(
170
+ default=1.0,
171
+ ge=0.0,
172
+ le=1.0,
173
+ description="How confident we are in this record: 1.0=verified, <1.0=auto-extracted",
174
+ )
175
+
176
+ # --- Validation ---
177
+
178
+ @field_validator("created_at", "review_by", "last_validated")
179
+ @classmethod
180
+ def ensure_utc(cls, v: datetime | None) -> datetime | None:
181
+ if v is None:
182
+ return None
183
+ if v.tzinfo is None:
184
+ return v.replace(tzinfo=UTC)
185
+ return v.astimezone(UTC)
186
+
187
+ @model_validator(mode="after")
188
+ def validate_supersedes_chain(self) -> DecisionRecord:
189
+ """Edge case: a decision cannot supersede itself."""
190
+ if self.supersedes == self.id:
191
+ msg = f"Decision {self.id} cannot supersede itself"
192
+ raise ValueError(msg)
193
+ return self
194
+
195
+ @property
196
+ def is_stale(self) -> bool:
197
+ """Check if this decision is past its review date.
198
+
199
+ Edge case: review_by not set → not stale (no review requested).
200
+ """
201
+ if self.review_by is None:
202
+ return False
203
+ return datetime.now(UTC) > self.review_by
204
+
205
+ @property
206
+ def is_expired(self) -> bool:
207
+ """Check if any hard constraints have expired.
208
+
209
+ Edge case: expired constraint doesn't automatically invalidate the decision
210
+ (the decision may still be valid for other reasons). It flags for review.
211
+ """
212
+ return any(
213
+ c.expiry_date is not None and datetime.now(UTC) > c.expiry_date
214
+ for c in self.constraints
215
+ if c.is_hard
216
+ )
217
+
218
+ @property
219
+ def has_orphaned_anchors(self) -> bool:
220
+ """Check if any code anchors reference files that don't exist."""
221
+ return any(
222
+ not anchor.file_path.exists()
223
+ for anchor in self.code_anchors
224
+ )
225
+
226
+ def relevance_score(self, file_paths: set[Path]) -> float:
227
+ """Score how relevant this decision is to a set of files.
228
+
229
+ Scoring:
230
+ - Direct anchor match: 1.0
231
+ - Same directory as an anchor: 0.5
232
+ - Same top-level package: 0.2
233
+ - No match: 0.0
234
+
235
+ Edge case: decision with no anchors (org-level) gets a baseline
236
+ score of 0.1 — always slightly relevant.
237
+ """
238
+ if not self.code_anchors:
239
+ return 0.1 # Org-level decision, always slightly relevant
240
+
241
+ max_score = 0.0
242
+ anchor_paths = {a.file_path.resolve() for a in self.code_anchors}
243
+ resolved_files = {p.resolve() for p in file_paths}
244
+
245
+ for anchor_path in anchor_paths:
246
+ # Direct match
247
+ if anchor_path in resolved_files:
248
+ return 1.0
249
+
250
+ # Same directory
251
+ for fp in resolved_files:
252
+ if anchor_path.parent == fp.parent:
253
+ max_score = max(max_score, 0.5)
254
+ elif (
255
+ len(anchor_path.parts) >= 3
256
+ and len(fp.parts) >= 3
257
+ and anchor_path.parts[:3] == fp.parts[:3]
258
+ ):
259
+ max_score = max(max_score, 0.2)
260
+
261
+ return max_score
262
+
263
+ def to_context_string(self, verbose: bool = False) -> str:
264
+ """Serialize for inclusion in agent context.
265
+
266
+ Two modes:
267
+ - Compact: title + decision + constraints (fits in tight budgets)
268
+ - Verbose: full record including alternatives and consequences
269
+ """
270
+ lines = [
271
+ f"## Decision: {self.title} [{self.id}]",
272
+ f"Status: {self.status.value}",
273
+ f"",
274
+ f"**Context:** {self.context}",
275
+ f"**Decision:** {self.decision}",
276
+ ]
277
+
278
+ if self.constraints:
279
+ lines.append("")
280
+ lines.append("**Constraints:**")
281
+ for c in self.constraints:
282
+ hard = "MUST" if c.is_hard else "SHOULD"
283
+ lines.append(f"- [{hard}] {c.description} (source: {c.source})")
284
+
285
+ if verbose:
286
+ if self.alternatives:
287
+ lines.append("")
288
+ lines.append("**Alternatives considered:**")
289
+ for alt in self.alternatives:
290
+ lines.append(f"- {alt.name}: {alt.rejection_reason}")
291
+
292
+ if self.consequences:
293
+ lines.append("")
294
+ lines.append("**Consequences:**")
295
+ for c in self.consequences:
296
+ lines.append(f"- {c}")
297
+
298
+ if self.is_stale:
299
+ lines.append("")
300
+ lines.append(f"**WARNING: This decision is past its review date ({self.review_by})**")
301
+
302
+ return "\n".join(lines)