codebase-intel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. codebase_intel/__init__.py +3 -0
  2. codebase_intel/analytics/__init__.py +1 -0
  3. codebase_intel/analytics/benchmark.py +406 -0
  4. codebase_intel/analytics/feedback.py +496 -0
  5. codebase_intel/analytics/tracker.py +439 -0
  6. codebase_intel/cli/__init__.py +1 -0
  7. codebase_intel/cli/main.py +740 -0
  8. codebase_intel/contracts/__init__.py +1 -0
  9. codebase_intel/contracts/auto_generator.py +438 -0
  10. codebase_intel/contracts/evaluator.py +531 -0
  11. codebase_intel/contracts/models.py +433 -0
  12. codebase_intel/contracts/registry.py +225 -0
  13. codebase_intel/core/__init__.py +1 -0
  14. codebase_intel/core/config.py +248 -0
  15. codebase_intel/core/exceptions.py +454 -0
  16. codebase_intel/core/types.py +375 -0
  17. codebase_intel/decisions/__init__.py +1 -0
  18. codebase_intel/decisions/miner.py +297 -0
  19. codebase_intel/decisions/models.py +302 -0
  20. codebase_intel/decisions/store.py +411 -0
  21. codebase_intel/drift/__init__.py +1 -0
  22. codebase_intel/drift/detector.py +443 -0
  23. codebase_intel/graph/__init__.py +1 -0
  24. codebase_intel/graph/builder.py +391 -0
  25. codebase_intel/graph/parser.py +1232 -0
  26. codebase_intel/graph/query.py +377 -0
  27. codebase_intel/graph/storage.py +736 -0
  28. codebase_intel/mcp/__init__.py +1 -0
  29. codebase_intel/mcp/server.py +710 -0
  30. codebase_intel/orchestrator/__init__.py +1 -0
  31. codebase_intel/orchestrator/assembler.py +649 -0
  32. codebase_intel-0.1.0.dist-info/METADATA +361 -0
  33. codebase_intel-0.1.0.dist-info/RECORD +36 -0
  34. codebase_intel-0.1.0.dist-info/WHEEL +4 -0
  35. codebase_intel-0.1.0.dist-info/entry_points.txt +2 -0
  36. codebase_intel-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,3 @@
1
+ """Codebase Intelligence Platform — structured context for AI coding agents."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1 @@
1
+ """Analytics — tracks efficiency metrics, proves value over time."""
@@ -0,0 +1,406 @@
1
+ """Benchmark system — measures codebase-intel efficiency against real projects.
2
+
3
+ Runs three scenarios per test case:
4
+ 1. NAIVE: Read all files in the scope (what an agent without context does)
5
+ 2. GRAPH: Use code graph to find relevant files only
6
+ 3. FULL: Graph + decisions + contracts (the complete pipeline)
7
+
8
+ For each scenario, measures:
9
+ - Token count (via tiktoken)
10
+ - Number of files included
11
+ - Number of decisions/contracts surfaced
12
+ - Assembly time
13
+
14
+ Produces a reproducible report: run `codebase-intel benchmark` and get the same
15
+ numbers every time for the same repo state.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+ import time
22
+ from dataclasses import dataclass, field
23
+ from pathlib import Path
24
+ from typing import Any
25
+
26
+ from codebase_intel.analytics.tracker import AnalyticsTracker
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ @dataclass
32
+ class BenchmarkScenario:
33
+ """A single benchmark test case: a task + expected scope."""
34
+
35
+ name: str
36
+ task_description: str
37
+ target_files: list[str] # Relative paths to the files being "edited"
38
+ expected_scope: str # "narrow" (1-5 files), "medium" (5-20), "wide" (20+)
39
+
40
+
41
+ @dataclass
42
+ class ScenarioResult:
43
+ """Result of running one scenario in all three modes."""
44
+
45
+ name: str
46
+ task_description: str
47
+ target_files: int
48
+
49
+ # Naive: read all files in the target directory
50
+ naive_tokens: int = 0
51
+ naive_files: int = 0
52
+
53
+ # Graph: use code graph for relevant files
54
+ graph_tokens: int = 0
55
+ graph_files: int = 0
56
+
57
+ # Full: graph + decisions + contracts
58
+ full_tokens: int = 0
59
+ full_files: int = 0
60
+ decisions_surfaced: int = 0
61
+ contracts_applied: int = 0
62
+ drift_warnings: int = 0
63
+
64
+ # Timings
65
+ graph_assembly_ms: float = 0.0
66
+ full_assembly_ms: float = 0.0
67
+
68
+ @property
69
+ def naive_vs_graph_reduction(self) -> float:
70
+ if self.naive_tokens == 0:
71
+ return 0.0
72
+ return (1 - self.graph_tokens / self.naive_tokens) * 100
73
+
74
+ @property
75
+ def naive_vs_full_reduction(self) -> float:
76
+ if self.naive_tokens == 0:
77
+ return 0.0
78
+ return (1 - self.full_tokens / self.naive_tokens) * 100
79
+
80
+ @property
81
+ def multiplier(self) -> float:
82
+ if self.full_tokens == 0:
83
+ return 0.0
84
+ return self.naive_tokens / self.full_tokens
85
+
86
+ def to_dict(self) -> dict[str, Any]:
87
+ return {
88
+ "name": self.name,
89
+ "task": self.task_description[:80],
90
+ "target_files": self.target_files,
91
+ "naive_tokens": self.naive_tokens,
92
+ "naive_files": self.naive_files,
93
+ "graph_tokens": self.graph_tokens,
94
+ "graph_files": self.graph_files,
95
+ "full_tokens": self.full_tokens,
96
+ "full_files": self.full_files,
97
+ "decisions_surfaced": self.decisions_surfaced,
98
+ "contracts_applied": self.contracts_applied,
99
+ "reduction_pct": round(self.naive_vs_full_reduction, 1),
100
+ "multiplier": round(self.multiplier, 1),
101
+ "graph_assembly_ms": round(self.graph_assembly_ms, 1),
102
+ "full_assembly_ms": round(self.full_assembly_ms, 1),
103
+ }
104
+
105
+
106
+ @dataclass
107
+ class BenchmarkReport:
108
+ """Complete benchmark report for a project."""
109
+
110
+ repo_name: str
111
+ repo_path: str
112
+ total_files: int = 0
113
+ total_nodes: int = 0
114
+ total_edges: int = 0
115
+ build_time_ms: float = 0.0
116
+ scenarios: list[ScenarioResult] = field(default_factory=list)
117
+
118
+ @property
119
+ def avg_reduction_pct(self) -> float:
120
+ reductions = [s.naive_vs_full_reduction for s in self.scenarios if s.naive_tokens > 0]
121
+ return sum(reductions) / max(len(reductions), 1)
122
+
123
+ @property
124
+ def avg_multiplier(self) -> float:
125
+ mults = [s.multiplier for s in self.scenarios if s.full_tokens > 0]
126
+ return sum(mults) / max(len(mults), 1)
127
+
128
+ @property
129
+ def total_decisions_surfaced(self) -> int:
130
+ return sum(s.decisions_surfaced for s in self.scenarios)
131
+
132
+ @property
133
+ def total_contracts_applied(self) -> int:
134
+ return sum(s.contracts_applied for s in self.scenarios)
135
+
136
+ def format_table(self) -> str:
137
+ """Format as a markdown table for README/reports."""
138
+ lines = [
139
+ f"## Benchmark: {self.repo_name}",
140
+ f"",
141
+ f"**Graph:** {self.total_files} files → {self.total_nodes} nodes, {self.total_edges} edges (built in {self.build_time_ms:.0f}ms)",
142
+ f"",
143
+ f"| Scenario | Naive Tokens | Graph Tokens | Full Tokens | Reduction | Multiplier | Decisions | Contracts |",
144
+ f"|---|---:|---:|---:|---:|---:|---:|---:|",
145
+ ]
146
+
147
+ for s in self.scenarios:
148
+ lines.append(
149
+ f"| {s.name} | {s.naive_tokens:,} | {s.graph_tokens:,} | "
150
+ f"{s.full_tokens:,} | {s.naive_vs_full_reduction:.0f}% | "
151
+ f"{s.multiplier:.1f}x | {s.decisions_surfaced} | {s.contracts_applied} |"
152
+ )
153
+
154
+ lines.append(
155
+ f"| **Average** | | | | **{self.avg_reduction_pct:.0f}%** | "
156
+ f"**{self.avg_multiplier:.1f}x** | **{self.total_decisions_surfaced}** | "
157
+ f"**{self.total_contracts_applied}** |"
158
+ )
159
+
160
+ return "\n".join(lines)
161
+
162
+
163
+ class BenchmarkRunner:
164
+ """Runs benchmarks against a project."""
165
+
166
+ def __init__(self, project_root: Path) -> None:
167
+ self._project_root = project_root
168
+
169
+ async def run(
170
+ self,
171
+ scenarios: list[BenchmarkScenario] | None = None,
172
+ tracker: AnalyticsTracker | None = None,
173
+ ) -> BenchmarkReport:
174
+ """Run full benchmark suite against the project.
175
+
176
+ If no scenarios provided, auto-generates them by picking files
177
+ at different depths and dependency counts.
178
+ """
179
+ from codebase_intel.core.config import ProjectConfig
180
+ from codebase_intel.graph.builder import GraphBuilder
181
+ from codebase_intel.graph.query import GraphQueryEngine
182
+ from codebase_intel.graph.storage import GraphStorage
183
+
184
+ config = ProjectConfig(project_root=self._project_root)
185
+
186
+ report = BenchmarkReport(
187
+ repo_name=self._project_root.name,
188
+ repo_path=str(self._project_root),
189
+ )
190
+
191
+ # Build graph
192
+ start = time.monotonic()
193
+ async with GraphStorage.open(config.graph, self._project_root) as storage:
194
+ builder = GraphBuilder(config, storage)
195
+ build_result = await builder.full_build()
196
+
197
+ report.total_files = build_result.processed
198
+ report.total_nodes = build_result.nodes_created
199
+ report.total_edges = build_result.edges_created
200
+ report.build_time_ms = (time.monotonic() - start) * 1000
201
+
202
+ engine = GraphQueryEngine(storage)
203
+
204
+ # Auto-generate scenarios if none provided
205
+ if scenarios is None:
206
+ scenarios = await self._auto_scenarios(storage)
207
+
208
+ # Run each scenario
209
+ for scenario in scenarios:
210
+ result = await self._run_scenario(
211
+ scenario, config, storage, engine
212
+ )
213
+ report.scenarios.append(result)
214
+
215
+ # Record in tracker if provided
216
+ if tracker:
217
+ tracker.record_benchmark(
218
+ repo_name=report.repo_name,
219
+ repo_path=report.repo_path,
220
+ total_files=report.total_files,
221
+ total_nodes=report.total_nodes,
222
+ total_edges=report.total_edges,
223
+ scenarios=[s.to_dict() for s in report.scenarios],
224
+ build_time_ms=report.build_time_ms,
225
+ )
226
+
227
+ return report
228
+
229
+ async def _run_scenario(
230
+ self,
231
+ scenario: BenchmarkScenario,
232
+ config: Any,
233
+ storage: Any,
234
+ engine: Any,
235
+ ) -> ScenarioResult:
236
+ """Run a single scenario in naive, graph, and full modes."""
237
+ from codebase_intel.orchestrator.assembler import estimate_tokens
238
+
239
+ target_paths = [self._project_root / f for f in scenario.target_files]
240
+ existing_paths = [p for p in target_paths if p.exists()]
241
+
242
+ if not existing_paths:
243
+ return ScenarioResult(
244
+ name=scenario.name,
245
+ task_description=scenario.task_description,
246
+ target_files=0,
247
+ )
248
+
249
+ result = ScenarioResult(
250
+ name=scenario.name,
251
+ task_description=scenario.task_description,
252
+ target_files=len(existing_paths),
253
+ )
254
+
255
+ # --- NAIVE: what an agent without context tools typically reads ---
256
+ # Agents usually read the target file + parent dir + any file they
257
+ # can grep for imports. We simulate: all files in parent dir + parent's
258
+ # parent dir (2 levels up from each target file).
259
+ naive_content = ""
260
+ naive_files: set[Path] = set()
261
+ code_extensions = {".py", ".ts", ".tsx", ".js", ".jsx", ".go", ".rs", ".java", ".rb"}
262
+ for fp in existing_paths:
263
+ for ancestor_dir in [fp.parent, fp.parent.parent]:
264
+ if not ancestor_dir.exists():
265
+ continue
266
+ try:
267
+ for child in ancestor_dir.iterdir():
268
+ if child.is_file() and child.suffix in code_extensions and child not in naive_files:
269
+ naive_files.add(child)
270
+ try:
271
+ naive_content += child.read_text(encoding="utf-8", errors="ignore")
272
+ except OSError:
273
+ pass
274
+ except OSError:
275
+ pass
276
+
277
+ result.naive_tokens = estimate_tokens(naive_content)
278
+ result.naive_files = len(naive_files)
279
+
280
+ # --- GRAPH: use code graph for relevant files ---
281
+ start = time.monotonic()
282
+ graph_result = await engine.query_by_files(existing_paths, include_depth=2)
283
+ result.graph_assembly_ms = (time.monotonic() - start) * 1000
284
+
285
+ graph_content = ""
286
+ graph_files_set: set[Path] = set()
287
+ for node in graph_result.nodes:
288
+ if node.file_path not in graph_files_set and node.file_path.exists():
289
+ graph_files_set.add(node.file_path)
290
+ try:
291
+ graph_content += node.file_path.read_text(encoding="utf-8", errors="ignore")
292
+ except OSError:
293
+ pass
294
+
295
+ result.graph_tokens = estimate_tokens(graph_content)
296
+ result.graph_files = len(graph_files_set)
297
+
298
+ # --- FULL: graph + decisions + contracts ---
299
+ start = time.monotonic()
300
+
301
+ from codebase_intel.contracts.registry import ContractRegistry
302
+ from codebase_intel.decisions.store import DecisionStore
303
+ from codebase_intel.orchestrator.assembler import ContextAssembler
304
+ from codebase_intel.core.types import TokenBudget
305
+
306
+ decision_store = DecisionStore(config.decisions, self._project_root)
307
+ contract_registry = ContractRegistry(config.contracts, self._project_root)
308
+ contract_registry.load()
309
+
310
+ assembler = ContextAssembler(
311
+ config=config.orchestrator,
312
+ graph_engine=engine,
313
+ decision_store=decision_store,
314
+ contract_registry=contract_registry,
315
+ )
316
+
317
+ assembled = await assembler.assemble(
318
+ task_description=scenario.task_description,
319
+ file_paths=existing_paths,
320
+ budget=TokenBudget(total=8_000), # Realistic agent budget — shows how well we prioritize
321
+ )
322
+
323
+ result.full_assembly_ms = (time.monotonic() - start) * 1000
324
+ result.full_tokens = assembled.total_tokens
325
+ result.full_files = len({
326
+ item.metadata.get("file_path")
327
+ for item in assembled.items
328
+ if item.metadata.get("file_path")
329
+ })
330
+ result.decisions_surfaced = sum(
331
+ 1 for item in assembled.items if item.item_type == "decision"
332
+ )
333
+ result.contracts_applied = sum(
334
+ 1 for item in assembled.items if item.item_type == "contract_rule"
335
+ )
336
+ result.drift_warnings = len(assembled.warnings)
337
+
338
+ return result
339
+
340
+ async def _auto_scenarios(self, storage: Any) -> list[BenchmarkScenario]:
341
+ """Auto-generate benchmark scenarios from the project structure.
342
+
343
+ Picks files at different levels:
344
+ 1. A deep leaf file (few dependents) — narrow scope
345
+ 2. A mid-level file (some dependents) — medium scope
346
+ 3. A core/shared file (many dependents) — wide scope
347
+ """
348
+ # Get files sorted by number of nodes (proxy for complexity)
349
+ cursor = await storage._db.execute(
350
+ """
351
+ SELECT file_path, COUNT(*) as node_count
352
+ FROM nodes
353
+ WHERE is_test = 0 AND is_generated = 0
354
+ GROUP BY file_path
355
+ ORDER BY node_count DESC
356
+ """
357
+ )
358
+ files = await cursor.fetchall()
359
+
360
+ if not files:
361
+ return []
362
+
363
+ scenarios: list[BenchmarkScenario] = []
364
+
365
+ # Core file (most nodes — likely a models or utils file)
366
+ if len(files) >= 1:
367
+ core_file = files[0][0]
368
+ scenarios.append(BenchmarkScenario(
369
+ name="Core module change",
370
+ task_description=f"Refactor the core module at {Path(core_file).name}",
371
+ target_files=[core_file],
372
+ expected_scope="wide",
373
+ ))
374
+
375
+ # Mid-level file
376
+ mid_idx = len(files) // 3
377
+ if len(files) > mid_idx:
378
+ mid_file = files[mid_idx][0]
379
+ scenarios.append(BenchmarkScenario(
380
+ name="Feature module change",
381
+ task_description=f"Add a new endpoint to {Path(mid_file).name}",
382
+ target_files=[mid_file],
383
+ expected_scope="medium",
384
+ ))
385
+
386
+ # Leaf file (fewest nodes)
387
+ if len(files) >= 3:
388
+ leaf_file = files[-1][0]
389
+ scenarios.append(BenchmarkScenario(
390
+ name="Leaf file change",
391
+ task_description=f"Fix a bug in {Path(leaf_file).name}",
392
+ target_files=[leaf_file],
393
+ expected_scope="narrow",
394
+ ))
395
+
396
+ # Multi-file change
397
+ if len(files) >= 5:
398
+ multi_files = [files[0][0], files[len(files) // 2][0]]
399
+ scenarios.append(BenchmarkScenario(
400
+ name="Cross-module refactor",
401
+ task_description="Refactor shared types used across multiple modules",
402
+ target_files=multi_files,
403
+ expected_scope="wide",
404
+ ))
405
+
406
+ return scenarios