codebase-intel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebase_intel/__init__.py +3 -0
- codebase_intel/analytics/__init__.py +1 -0
- codebase_intel/analytics/benchmark.py +406 -0
- codebase_intel/analytics/feedback.py +496 -0
- codebase_intel/analytics/tracker.py +439 -0
- codebase_intel/cli/__init__.py +1 -0
- codebase_intel/cli/main.py +740 -0
- codebase_intel/contracts/__init__.py +1 -0
- codebase_intel/contracts/auto_generator.py +438 -0
- codebase_intel/contracts/evaluator.py +531 -0
- codebase_intel/contracts/models.py +433 -0
- codebase_intel/contracts/registry.py +225 -0
- codebase_intel/core/__init__.py +1 -0
- codebase_intel/core/config.py +248 -0
- codebase_intel/core/exceptions.py +454 -0
- codebase_intel/core/types.py +375 -0
- codebase_intel/decisions/__init__.py +1 -0
- codebase_intel/decisions/miner.py +297 -0
- codebase_intel/decisions/models.py +302 -0
- codebase_intel/decisions/store.py +411 -0
- codebase_intel/drift/__init__.py +1 -0
- codebase_intel/drift/detector.py +443 -0
- codebase_intel/graph/__init__.py +1 -0
- codebase_intel/graph/builder.py +391 -0
- codebase_intel/graph/parser.py +1232 -0
- codebase_intel/graph/query.py +377 -0
- codebase_intel/graph/storage.py +736 -0
- codebase_intel/mcp/__init__.py +1 -0
- codebase_intel/mcp/server.py +710 -0
- codebase_intel/orchestrator/__init__.py +1 -0
- codebase_intel/orchestrator/assembler.py +649 -0
- codebase_intel-0.1.0.dist-info/METADATA +361 -0
- codebase_intel-0.1.0.dist-info/RECORD +36 -0
- codebase_intel-0.1.0.dist-info/WHEEL +4 -0
- codebase_intel-0.1.0.dist-info/entry_points.txt +2 -0
- codebase_intel-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Analytics — tracks efficiency metrics, proves value over time."""
|
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
"""Benchmark system — measures codebase-intel efficiency against real projects.
|
|
2
|
+
|
|
3
|
+
Runs three scenarios per test case:
|
|
4
|
+
1. NAIVE: Read all files in the scope (what an agent without context does)
|
|
5
|
+
2. GRAPH: Use code graph to find relevant files only
|
|
6
|
+
3. FULL: Graph + decisions + contracts (the complete pipeline)
|
|
7
|
+
|
|
8
|
+
For each scenario, measures:
|
|
9
|
+
- Token count (via tiktoken)
|
|
10
|
+
- Number of files included
|
|
11
|
+
- Number of decisions/contracts surfaced
|
|
12
|
+
- Assembly time
|
|
13
|
+
|
|
14
|
+
Produces a reproducible report: run `codebase-intel benchmark` and get the same
|
|
15
|
+
numbers every time for the same repo state.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import logging
|
|
21
|
+
import time
|
|
22
|
+
from dataclasses import dataclass, field
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import Any
|
|
25
|
+
|
|
26
|
+
from codebase_intel.analytics.tracker import AnalyticsTracker
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class BenchmarkScenario:
|
|
33
|
+
"""A single benchmark test case: a task + expected scope."""
|
|
34
|
+
|
|
35
|
+
name: str
|
|
36
|
+
task_description: str
|
|
37
|
+
target_files: list[str] # Relative paths to the files being "edited"
|
|
38
|
+
expected_scope: str # "narrow" (1-5 files), "medium" (5-20), "wide" (20+)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class ScenarioResult:
|
|
43
|
+
"""Result of running one scenario in all three modes."""
|
|
44
|
+
|
|
45
|
+
name: str
|
|
46
|
+
task_description: str
|
|
47
|
+
target_files: int
|
|
48
|
+
|
|
49
|
+
# Naive: read all files in the target directory
|
|
50
|
+
naive_tokens: int = 0
|
|
51
|
+
naive_files: int = 0
|
|
52
|
+
|
|
53
|
+
# Graph: use code graph for relevant files
|
|
54
|
+
graph_tokens: int = 0
|
|
55
|
+
graph_files: int = 0
|
|
56
|
+
|
|
57
|
+
# Full: graph + decisions + contracts
|
|
58
|
+
full_tokens: int = 0
|
|
59
|
+
full_files: int = 0
|
|
60
|
+
decisions_surfaced: int = 0
|
|
61
|
+
contracts_applied: int = 0
|
|
62
|
+
drift_warnings: int = 0
|
|
63
|
+
|
|
64
|
+
# Timings
|
|
65
|
+
graph_assembly_ms: float = 0.0
|
|
66
|
+
full_assembly_ms: float = 0.0
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def naive_vs_graph_reduction(self) -> float:
|
|
70
|
+
if self.naive_tokens == 0:
|
|
71
|
+
return 0.0
|
|
72
|
+
return (1 - self.graph_tokens / self.naive_tokens) * 100
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def naive_vs_full_reduction(self) -> float:
|
|
76
|
+
if self.naive_tokens == 0:
|
|
77
|
+
return 0.0
|
|
78
|
+
return (1 - self.full_tokens / self.naive_tokens) * 100
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def multiplier(self) -> float:
|
|
82
|
+
if self.full_tokens == 0:
|
|
83
|
+
return 0.0
|
|
84
|
+
return self.naive_tokens / self.full_tokens
|
|
85
|
+
|
|
86
|
+
def to_dict(self) -> dict[str, Any]:
|
|
87
|
+
return {
|
|
88
|
+
"name": self.name,
|
|
89
|
+
"task": self.task_description[:80],
|
|
90
|
+
"target_files": self.target_files,
|
|
91
|
+
"naive_tokens": self.naive_tokens,
|
|
92
|
+
"naive_files": self.naive_files,
|
|
93
|
+
"graph_tokens": self.graph_tokens,
|
|
94
|
+
"graph_files": self.graph_files,
|
|
95
|
+
"full_tokens": self.full_tokens,
|
|
96
|
+
"full_files": self.full_files,
|
|
97
|
+
"decisions_surfaced": self.decisions_surfaced,
|
|
98
|
+
"contracts_applied": self.contracts_applied,
|
|
99
|
+
"reduction_pct": round(self.naive_vs_full_reduction, 1),
|
|
100
|
+
"multiplier": round(self.multiplier, 1),
|
|
101
|
+
"graph_assembly_ms": round(self.graph_assembly_ms, 1),
|
|
102
|
+
"full_assembly_ms": round(self.full_assembly_ms, 1),
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@dataclass
|
|
107
|
+
class BenchmarkReport:
|
|
108
|
+
"""Complete benchmark report for a project."""
|
|
109
|
+
|
|
110
|
+
repo_name: str
|
|
111
|
+
repo_path: str
|
|
112
|
+
total_files: int = 0
|
|
113
|
+
total_nodes: int = 0
|
|
114
|
+
total_edges: int = 0
|
|
115
|
+
build_time_ms: float = 0.0
|
|
116
|
+
scenarios: list[ScenarioResult] = field(default_factory=list)
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def avg_reduction_pct(self) -> float:
|
|
120
|
+
reductions = [s.naive_vs_full_reduction for s in self.scenarios if s.naive_tokens > 0]
|
|
121
|
+
return sum(reductions) / max(len(reductions), 1)
|
|
122
|
+
|
|
123
|
+
@property
|
|
124
|
+
def avg_multiplier(self) -> float:
|
|
125
|
+
mults = [s.multiplier for s in self.scenarios if s.full_tokens > 0]
|
|
126
|
+
return sum(mults) / max(len(mults), 1)
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def total_decisions_surfaced(self) -> int:
|
|
130
|
+
return sum(s.decisions_surfaced for s in self.scenarios)
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def total_contracts_applied(self) -> int:
|
|
134
|
+
return sum(s.contracts_applied for s in self.scenarios)
|
|
135
|
+
|
|
136
|
+
def format_table(self) -> str:
|
|
137
|
+
"""Format as a markdown table for README/reports."""
|
|
138
|
+
lines = [
|
|
139
|
+
f"## Benchmark: {self.repo_name}",
|
|
140
|
+
f"",
|
|
141
|
+
f"**Graph:** {self.total_files} files → {self.total_nodes} nodes, {self.total_edges} edges (built in {self.build_time_ms:.0f}ms)",
|
|
142
|
+
f"",
|
|
143
|
+
f"| Scenario | Naive Tokens | Graph Tokens | Full Tokens | Reduction | Multiplier | Decisions | Contracts |",
|
|
144
|
+
f"|---|---:|---:|---:|---:|---:|---:|---:|",
|
|
145
|
+
]
|
|
146
|
+
|
|
147
|
+
for s in self.scenarios:
|
|
148
|
+
lines.append(
|
|
149
|
+
f"| {s.name} | {s.naive_tokens:,} | {s.graph_tokens:,} | "
|
|
150
|
+
f"{s.full_tokens:,} | {s.naive_vs_full_reduction:.0f}% | "
|
|
151
|
+
f"{s.multiplier:.1f}x | {s.decisions_surfaced} | {s.contracts_applied} |"
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
lines.append(
|
|
155
|
+
f"| **Average** | | | | **{self.avg_reduction_pct:.0f}%** | "
|
|
156
|
+
f"**{self.avg_multiplier:.1f}x** | **{self.total_decisions_surfaced}** | "
|
|
157
|
+
f"**{self.total_contracts_applied}** |"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
return "\n".join(lines)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class BenchmarkRunner:
|
|
164
|
+
"""Runs benchmarks against a project."""
|
|
165
|
+
|
|
166
|
+
def __init__(self, project_root: Path) -> None:
|
|
167
|
+
self._project_root = project_root
|
|
168
|
+
|
|
169
|
+
async def run(
|
|
170
|
+
self,
|
|
171
|
+
scenarios: list[BenchmarkScenario] | None = None,
|
|
172
|
+
tracker: AnalyticsTracker | None = None,
|
|
173
|
+
) -> BenchmarkReport:
|
|
174
|
+
"""Run full benchmark suite against the project.
|
|
175
|
+
|
|
176
|
+
If no scenarios provided, auto-generates them by picking files
|
|
177
|
+
at different depths and dependency counts.
|
|
178
|
+
"""
|
|
179
|
+
from codebase_intel.core.config import ProjectConfig
|
|
180
|
+
from codebase_intel.graph.builder import GraphBuilder
|
|
181
|
+
from codebase_intel.graph.query import GraphQueryEngine
|
|
182
|
+
from codebase_intel.graph.storage import GraphStorage
|
|
183
|
+
|
|
184
|
+
config = ProjectConfig(project_root=self._project_root)
|
|
185
|
+
|
|
186
|
+
report = BenchmarkReport(
|
|
187
|
+
repo_name=self._project_root.name,
|
|
188
|
+
repo_path=str(self._project_root),
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Build graph
|
|
192
|
+
start = time.monotonic()
|
|
193
|
+
async with GraphStorage.open(config.graph, self._project_root) as storage:
|
|
194
|
+
builder = GraphBuilder(config, storage)
|
|
195
|
+
build_result = await builder.full_build()
|
|
196
|
+
|
|
197
|
+
report.total_files = build_result.processed
|
|
198
|
+
report.total_nodes = build_result.nodes_created
|
|
199
|
+
report.total_edges = build_result.edges_created
|
|
200
|
+
report.build_time_ms = (time.monotonic() - start) * 1000
|
|
201
|
+
|
|
202
|
+
engine = GraphQueryEngine(storage)
|
|
203
|
+
|
|
204
|
+
# Auto-generate scenarios if none provided
|
|
205
|
+
if scenarios is None:
|
|
206
|
+
scenarios = await self._auto_scenarios(storage)
|
|
207
|
+
|
|
208
|
+
# Run each scenario
|
|
209
|
+
for scenario in scenarios:
|
|
210
|
+
result = await self._run_scenario(
|
|
211
|
+
scenario, config, storage, engine
|
|
212
|
+
)
|
|
213
|
+
report.scenarios.append(result)
|
|
214
|
+
|
|
215
|
+
# Record in tracker if provided
|
|
216
|
+
if tracker:
|
|
217
|
+
tracker.record_benchmark(
|
|
218
|
+
repo_name=report.repo_name,
|
|
219
|
+
repo_path=report.repo_path,
|
|
220
|
+
total_files=report.total_files,
|
|
221
|
+
total_nodes=report.total_nodes,
|
|
222
|
+
total_edges=report.total_edges,
|
|
223
|
+
scenarios=[s.to_dict() for s in report.scenarios],
|
|
224
|
+
build_time_ms=report.build_time_ms,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
return report
|
|
228
|
+
|
|
229
|
+
async def _run_scenario(
|
|
230
|
+
self,
|
|
231
|
+
scenario: BenchmarkScenario,
|
|
232
|
+
config: Any,
|
|
233
|
+
storage: Any,
|
|
234
|
+
engine: Any,
|
|
235
|
+
) -> ScenarioResult:
|
|
236
|
+
"""Run a single scenario in naive, graph, and full modes."""
|
|
237
|
+
from codebase_intel.orchestrator.assembler import estimate_tokens
|
|
238
|
+
|
|
239
|
+
target_paths = [self._project_root / f for f in scenario.target_files]
|
|
240
|
+
existing_paths = [p for p in target_paths if p.exists()]
|
|
241
|
+
|
|
242
|
+
if not existing_paths:
|
|
243
|
+
return ScenarioResult(
|
|
244
|
+
name=scenario.name,
|
|
245
|
+
task_description=scenario.task_description,
|
|
246
|
+
target_files=0,
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
result = ScenarioResult(
|
|
250
|
+
name=scenario.name,
|
|
251
|
+
task_description=scenario.task_description,
|
|
252
|
+
target_files=len(existing_paths),
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
# --- NAIVE: what an agent without context tools typically reads ---
|
|
256
|
+
# Agents usually read the target file + parent dir + any file they
|
|
257
|
+
# can grep for imports. We simulate: all files in parent dir + parent's
|
|
258
|
+
# parent dir (2 levels up from each target file).
|
|
259
|
+
naive_content = ""
|
|
260
|
+
naive_files: set[Path] = set()
|
|
261
|
+
code_extensions = {".py", ".ts", ".tsx", ".js", ".jsx", ".go", ".rs", ".java", ".rb"}
|
|
262
|
+
for fp in existing_paths:
|
|
263
|
+
for ancestor_dir in [fp.parent, fp.parent.parent]:
|
|
264
|
+
if not ancestor_dir.exists():
|
|
265
|
+
continue
|
|
266
|
+
try:
|
|
267
|
+
for child in ancestor_dir.iterdir():
|
|
268
|
+
if child.is_file() and child.suffix in code_extensions and child not in naive_files:
|
|
269
|
+
naive_files.add(child)
|
|
270
|
+
try:
|
|
271
|
+
naive_content += child.read_text(encoding="utf-8", errors="ignore")
|
|
272
|
+
except OSError:
|
|
273
|
+
pass
|
|
274
|
+
except OSError:
|
|
275
|
+
pass
|
|
276
|
+
|
|
277
|
+
result.naive_tokens = estimate_tokens(naive_content)
|
|
278
|
+
result.naive_files = len(naive_files)
|
|
279
|
+
|
|
280
|
+
# --- GRAPH: use code graph for relevant files ---
|
|
281
|
+
start = time.monotonic()
|
|
282
|
+
graph_result = await engine.query_by_files(existing_paths, include_depth=2)
|
|
283
|
+
result.graph_assembly_ms = (time.monotonic() - start) * 1000
|
|
284
|
+
|
|
285
|
+
graph_content = ""
|
|
286
|
+
graph_files_set: set[Path] = set()
|
|
287
|
+
for node in graph_result.nodes:
|
|
288
|
+
if node.file_path not in graph_files_set and node.file_path.exists():
|
|
289
|
+
graph_files_set.add(node.file_path)
|
|
290
|
+
try:
|
|
291
|
+
graph_content += node.file_path.read_text(encoding="utf-8", errors="ignore")
|
|
292
|
+
except OSError:
|
|
293
|
+
pass
|
|
294
|
+
|
|
295
|
+
result.graph_tokens = estimate_tokens(graph_content)
|
|
296
|
+
result.graph_files = len(graph_files_set)
|
|
297
|
+
|
|
298
|
+
# --- FULL: graph + decisions + contracts ---
|
|
299
|
+
start = time.monotonic()
|
|
300
|
+
|
|
301
|
+
from codebase_intel.contracts.registry import ContractRegistry
|
|
302
|
+
from codebase_intel.decisions.store import DecisionStore
|
|
303
|
+
from codebase_intel.orchestrator.assembler import ContextAssembler
|
|
304
|
+
from codebase_intel.core.types import TokenBudget
|
|
305
|
+
|
|
306
|
+
decision_store = DecisionStore(config.decisions, self._project_root)
|
|
307
|
+
contract_registry = ContractRegistry(config.contracts, self._project_root)
|
|
308
|
+
contract_registry.load()
|
|
309
|
+
|
|
310
|
+
assembler = ContextAssembler(
|
|
311
|
+
config=config.orchestrator,
|
|
312
|
+
graph_engine=engine,
|
|
313
|
+
decision_store=decision_store,
|
|
314
|
+
contract_registry=contract_registry,
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
assembled = await assembler.assemble(
|
|
318
|
+
task_description=scenario.task_description,
|
|
319
|
+
file_paths=existing_paths,
|
|
320
|
+
budget=TokenBudget(total=8_000), # Realistic agent budget — shows how well we prioritize
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
result.full_assembly_ms = (time.monotonic() - start) * 1000
|
|
324
|
+
result.full_tokens = assembled.total_tokens
|
|
325
|
+
result.full_files = len({
|
|
326
|
+
item.metadata.get("file_path")
|
|
327
|
+
for item in assembled.items
|
|
328
|
+
if item.metadata.get("file_path")
|
|
329
|
+
})
|
|
330
|
+
result.decisions_surfaced = sum(
|
|
331
|
+
1 for item in assembled.items if item.item_type == "decision"
|
|
332
|
+
)
|
|
333
|
+
result.contracts_applied = sum(
|
|
334
|
+
1 for item in assembled.items if item.item_type == "contract_rule"
|
|
335
|
+
)
|
|
336
|
+
result.drift_warnings = len(assembled.warnings)
|
|
337
|
+
|
|
338
|
+
return result
|
|
339
|
+
|
|
340
|
+
async def _auto_scenarios(self, storage: Any) -> list[BenchmarkScenario]:
|
|
341
|
+
"""Auto-generate benchmark scenarios from the project structure.
|
|
342
|
+
|
|
343
|
+
Picks files at different levels:
|
|
344
|
+
1. A deep leaf file (few dependents) — narrow scope
|
|
345
|
+
2. A mid-level file (some dependents) — medium scope
|
|
346
|
+
3. A core/shared file (many dependents) — wide scope
|
|
347
|
+
"""
|
|
348
|
+
# Get files sorted by number of nodes (proxy for complexity)
|
|
349
|
+
cursor = await storage._db.execute(
|
|
350
|
+
"""
|
|
351
|
+
SELECT file_path, COUNT(*) as node_count
|
|
352
|
+
FROM nodes
|
|
353
|
+
WHERE is_test = 0 AND is_generated = 0
|
|
354
|
+
GROUP BY file_path
|
|
355
|
+
ORDER BY node_count DESC
|
|
356
|
+
"""
|
|
357
|
+
)
|
|
358
|
+
files = await cursor.fetchall()
|
|
359
|
+
|
|
360
|
+
if not files:
|
|
361
|
+
return []
|
|
362
|
+
|
|
363
|
+
scenarios: list[BenchmarkScenario] = []
|
|
364
|
+
|
|
365
|
+
# Core file (most nodes — likely a models or utils file)
|
|
366
|
+
if len(files) >= 1:
|
|
367
|
+
core_file = files[0][0]
|
|
368
|
+
scenarios.append(BenchmarkScenario(
|
|
369
|
+
name="Core module change",
|
|
370
|
+
task_description=f"Refactor the core module at {Path(core_file).name}",
|
|
371
|
+
target_files=[core_file],
|
|
372
|
+
expected_scope="wide",
|
|
373
|
+
))
|
|
374
|
+
|
|
375
|
+
# Mid-level file
|
|
376
|
+
mid_idx = len(files) // 3
|
|
377
|
+
if len(files) > mid_idx:
|
|
378
|
+
mid_file = files[mid_idx][0]
|
|
379
|
+
scenarios.append(BenchmarkScenario(
|
|
380
|
+
name="Feature module change",
|
|
381
|
+
task_description=f"Add a new endpoint to {Path(mid_file).name}",
|
|
382
|
+
target_files=[mid_file],
|
|
383
|
+
expected_scope="medium",
|
|
384
|
+
))
|
|
385
|
+
|
|
386
|
+
# Leaf file (fewest nodes)
|
|
387
|
+
if len(files) >= 3:
|
|
388
|
+
leaf_file = files[-1][0]
|
|
389
|
+
scenarios.append(BenchmarkScenario(
|
|
390
|
+
name="Leaf file change",
|
|
391
|
+
task_description=f"Fix a bug in {Path(leaf_file).name}",
|
|
392
|
+
target_files=[leaf_file],
|
|
393
|
+
expected_scope="narrow",
|
|
394
|
+
))
|
|
395
|
+
|
|
396
|
+
# Multi-file change
|
|
397
|
+
if len(files) >= 5:
|
|
398
|
+
multi_files = [files[0][0], files[len(files) // 2][0]]
|
|
399
|
+
scenarios.append(BenchmarkScenario(
|
|
400
|
+
name="Cross-module refactor",
|
|
401
|
+
task_description="Refactor shared types used across multiple modules",
|
|
402
|
+
target_files=multi_files,
|
|
403
|
+
expected_scope="wide",
|
|
404
|
+
))
|
|
405
|
+
|
|
406
|
+
return scenarios
|