codebase-intel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. codebase_intel/__init__.py +3 -0
  2. codebase_intel/analytics/__init__.py +1 -0
  3. codebase_intel/analytics/benchmark.py +406 -0
  4. codebase_intel/analytics/feedback.py +496 -0
  5. codebase_intel/analytics/tracker.py +439 -0
  6. codebase_intel/cli/__init__.py +1 -0
  7. codebase_intel/cli/main.py +740 -0
  8. codebase_intel/contracts/__init__.py +1 -0
  9. codebase_intel/contracts/auto_generator.py +438 -0
  10. codebase_intel/contracts/evaluator.py +531 -0
  11. codebase_intel/contracts/models.py +433 -0
  12. codebase_intel/contracts/registry.py +225 -0
  13. codebase_intel/core/__init__.py +1 -0
  14. codebase_intel/core/config.py +248 -0
  15. codebase_intel/core/exceptions.py +454 -0
  16. codebase_intel/core/types.py +375 -0
  17. codebase_intel/decisions/__init__.py +1 -0
  18. codebase_intel/decisions/miner.py +297 -0
  19. codebase_intel/decisions/models.py +302 -0
  20. codebase_intel/decisions/store.py +411 -0
  21. codebase_intel/drift/__init__.py +1 -0
  22. codebase_intel/drift/detector.py +443 -0
  23. codebase_intel/graph/__init__.py +1 -0
  24. codebase_intel/graph/builder.py +391 -0
  25. codebase_intel/graph/parser.py +1232 -0
  26. codebase_intel/graph/query.py +377 -0
  27. codebase_intel/graph/storage.py +736 -0
  28. codebase_intel/mcp/__init__.py +1 -0
  29. codebase_intel/mcp/server.py +710 -0
  30. codebase_intel/orchestrator/__init__.py +1 -0
  31. codebase_intel/orchestrator/assembler.py +649 -0
  32. codebase_intel-0.1.0.dist-info/METADATA +361 -0
  33. codebase_intel-0.1.0.dist-info/RECORD +36 -0
  34. codebase_intel-0.1.0.dist-info/WHEEL +4 -0
  35. codebase_intel-0.1.0.dist-info/entry_points.txt +2 -0
  36. codebase_intel-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,439 @@
1
+ """Analytics tracker — records every context assembly and measures efficiency.
2
+
3
+ This is the "proof" layer. Every time an agent asks for context, we record:
4
+ - How many tokens the naive approach would use (read all files)
5
+ - How many tokens the graph-targeted approach used
6
+ - How many tokens the full pipeline used (graph + decisions + contracts)
7
+ - How many decisions were surfaced
8
+ - How many contract violations were caught
9
+ - How many drift warnings were included
10
+
11
+ Over time, this builds an undeniable case: "codebase-intel saved you X tokens,
12
+ caught Y violations, and surfaced Z decisions you would have missed."
13
+
14
+ Storage: SQLite in .codebase-intel/analytics.db — lightweight, portable, queryable.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ import logging
21
+ import sqlite3
22
+ from datetime import UTC, datetime
23
+ from pathlib import Path
24
+ from typing import Any
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ ANALYTICS_SCHEMA = """
29
+ CREATE TABLE IF NOT EXISTS context_events (
30
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
31
+ timestamp TEXT NOT NULL,
32
+ task_description TEXT NOT NULL,
33
+ files_requested INTEGER NOT NULL DEFAULT 0,
34
+
35
+ -- Token metrics (the core efficiency proof)
36
+ naive_tokens INTEGER NOT NULL DEFAULT 0,
37
+ graph_tokens INTEGER NOT NULL DEFAULT 0,
38
+ full_tokens INTEGER NOT NULL DEFAULT 0,
39
+ budget_tokens INTEGER NOT NULL DEFAULT 0,
40
+
41
+ -- Context composition
42
+ items_included INTEGER NOT NULL DEFAULT 0,
43
+ items_dropped INTEGER NOT NULL DEFAULT 0,
44
+ decisions_surfaced INTEGER NOT NULL DEFAULT 0,
45
+ contracts_applied INTEGER NOT NULL DEFAULT 0,
46
+ drift_warnings INTEGER NOT NULL DEFAULT 0,
47
+ conflicts_detected INTEGER NOT NULL DEFAULT 0,
48
+
49
+ -- Quality signals
50
+ truncated INTEGER NOT NULL DEFAULT 0,
51
+ assembly_time_ms REAL NOT NULL DEFAULT 0,
52
+
53
+ -- Metadata
54
+ metadata_json TEXT DEFAULT '{}'
55
+ );
56
+
57
+ CREATE TABLE IF NOT EXISTS daily_summary (
58
+ date TEXT PRIMARY KEY,
59
+ total_requests INTEGER NOT NULL DEFAULT 0,
60
+ total_naive_tokens INTEGER NOT NULL DEFAULT 0,
61
+ total_graph_tokens INTEGER NOT NULL DEFAULT 0,
62
+ total_full_tokens INTEGER NOT NULL DEFAULT 0,
63
+ total_decisions_surfaced INTEGER NOT NULL DEFAULT 0,
64
+ total_contracts_applied INTEGER NOT NULL DEFAULT 0,
65
+ total_drift_warnings INTEGER NOT NULL DEFAULT 0,
66
+ total_violations_caught INTEGER NOT NULL DEFAULT 0,
67
+ avg_token_reduction_pct REAL NOT NULL DEFAULT 0,
68
+ avg_assembly_time_ms REAL NOT NULL DEFAULT 0
69
+ );
70
+
71
+ CREATE TABLE IF NOT EXISTS benchmark_runs (
72
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
73
+ timestamp TEXT NOT NULL,
74
+ repo_name TEXT NOT NULL,
75
+ repo_path TEXT NOT NULL,
76
+ total_files INTEGER NOT NULL DEFAULT 0,
77
+ total_nodes INTEGER NOT NULL DEFAULT 0,
78
+ total_edges INTEGER NOT NULL DEFAULT 0,
79
+
80
+ -- Benchmark scenarios
81
+ scenarios_json TEXT NOT NULL DEFAULT '[]',
82
+
83
+ -- Aggregate results
84
+ avg_naive_tokens INTEGER NOT NULL DEFAULT 0,
85
+ avg_graph_tokens INTEGER NOT NULL DEFAULT 0,
86
+ avg_full_tokens INTEGER NOT NULL DEFAULT 0,
87
+ avg_token_reduction_pct REAL NOT NULL DEFAULT 0,
88
+ decisions_available INTEGER NOT NULL DEFAULT 0,
89
+ contracts_available INTEGER NOT NULL DEFAULT 0,
90
+
91
+ build_time_ms REAL NOT NULL DEFAULT 0,
92
+ metadata_json TEXT DEFAULT '{}'
93
+ );
94
+ """
95
+
96
+
97
+ class AnalyticsTracker:
98
+ """Records and queries efficiency metrics."""
99
+
100
+ def __init__(self, db_path: Path) -> None:
101
+ self._db_path = db_path
102
+ self._db_path.parent.mkdir(parents=True, exist_ok=True)
103
+ self._conn: sqlite3.Connection | None = None
104
+
105
+ def _get_conn(self) -> sqlite3.Connection:
106
+ if self._conn is None:
107
+ self._conn = sqlite3.connect(str(self._db_path))
108
+ self._conn.row_factory = sqlite3.Row
109
+ self._conn.executescript(ANALYTICS_SCHEMA)
110
+ return self._conn
111
+
112
+ def close(self) -> None:
113
+ if self._conn:
114
+ self._conn.close()
115
+ self._conn = None
116
+
117
+ # -------------------------------------------------------------------
118
+ # Record events
119
+ # -------------------------------------------------------------------
120
+
121
+ def record_context_event(
122
+ self,
123
+ task_description: str,
124
+ files_requested: int,
125
+ naive_tokens: int,
126
+ graph_tokens: int,
127
+ full_tokens: int,
128
+ budget_tokens: int,
129
+ items_included: int = 0,
130
+ items_dropped: int = 0,
131
+ decisions_surfaced: int = 0,
132
+ contracts_applied: int = 0,
133
+ drift_warnings: int = 0,
134
+ conflicts_detected: int = 0,
135
+ truncated: bool = False,
136
+ assembly_time_ms: float = 0.0,
137
+ metadata: dict[str, Any] | None = None,
138
+ ) -> int:
139
+ """Record a single context assembly event.
140
+
141
+ Returns the event ID.
142
+ """
143
+ conn = self._get_conn()
144
+ cursor = conn.execute(
145
+ """
146
+ INSERT INTO context_events (
147
+ timestamp, task_description, files_requested,
148
+ naive_tokens, graph_tokens, full_tokens, budget_tokens,
149
+ items_included, items_dropped, decisions_surfaced,
150
+ contracts_applied, drift_warnings, conflicts_detected,
151
+ truncated, assembly_time_ms, metadata_json
152
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
153
+ """,
154
+ (
155
+ datetime.now(UTC).isoformat(),
156
+ task_description[:500],
157
+ files_requested,
158
+ naive_tokens,
159
+ graph_tokens,
160
+ full_tokens,
161
+ budget_tokens,
162
+ items_included,
163
+ items_dropped,
164
+ decisions_surfaced,
165
+ contracts_applied,
166
+ drift_warnings,
167
+ conflicts_detected,
168
+ int(truncated),
169
+ assembly_time_ms,
170
+ json.dumps(metadata or {}),
171
+ ),
172
+ )
173
+ conn.commit()
174
+ self._update_daily_summary()
175
+ return cursor.lastrowid or 0
176
+
177
+ def record_benchmark(
178
+ self,
179
+ repo_name: str,
180
+ repo_path: str,
181
+ total_files: int,
182
+ total_nodes: int,
183
+ total_edges: int,
184
+ scenarios: list[dict[str, Any]],
185
+ decisions_available: int = 0,
186
+ contracts_available: int = 0,
187
+ build_time_ms: float = 0.0,
188
+ metadata: dict[str, Any] | None = None,
189
+ ) -> int:
190
+ """Record a benchmark run result."""
191
+ conn = self._get_conn()
192
+
193
+ # Compute aggregates from scenarios
194
+ naive_tokens = [s.get("naive_tokens", 0) for s in scenarios]
195
+ graph_tokens = [s.get("graph_tokens", 0) for s in scenarios]
196
+ full_tokens = [s.get("full_tokens", 0) for s in scenarios]
197
+
198
+ avg_naive = sum(naive_tokens) // max(len(naive_tokens), 1)
199
+ avg_graph = sum(graph_tokens) // max(len(graph_tokens), 1)
200
+ avg_full = sum(full_tokens) // max(len(full_tokens), 1)
201
+
202
+ reductions = []
203
+ for n, f in zip(naive_tokens, full_tokens):
204
+ if n > 0:
205
+ reductions.append((1 - f / n) * 100)
206
+ avg_reduction = sum(reductions) / max(len(reductions), 1)
207
+
208
+ cursor = conn.execute(
209
+ """
210
+ INSERT INTO benchmark_runs (
211
+ timestamp, repo_name, repo_path,
212
+ total_files, total_nodes, total_edges,
213
+ scenarios_json,
214
+ avg_naive_tokens, avg_graph_tokens, avg_full_tokens,
215
+ avg_token_reduction_pct,
216
+ decisions_available, contracts_available,
217
+ build_time_ms, metadata_json
218
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
219
+ """,
220
+ (
221
+ datetime.now(UTC).isoformat(),
222
+ repo_name,
223
+ repo_path,
224
+ total_files,
225
+ total_nodes,
226
+ total_edges,
227
+ json.dumps(scenarios),
228
+ avg_naive,
229
+ avg_graph,
230
+ avg_full,
231
+ round(avg_reduction, 1),
232
+ decisions_available,
233
+ contracts_available,
234
+ build_time_ms,
235
+ json.dumps(metadata or {}),
236
+ ),
237
+ )
238
+ conn.commit()
239
+ return cursor.lastrowid or 0
240
+
241
+ # -------------------------------------------------------------------
242
+ # Query metrics
243
+ # -------------------------------------------------------------------
244
+
245
+ def get_lifetime_stats(self) -> dict[str, Any]:
246
+ """Get all-time efficiency statistics."""
247
+ conn = self._get_conn()
248
+ row = conn.execute(
249
+ """
250
+ SELECT
251
+ COUNT(*) as total_requests,
252
+ COALESCE(SUM(naive_tokens), 0) as total_naive,
253
+ COALESCE(SUM(full_tokens), 0) as total_full,
254
+ COALESCE(SUM(graph_tokens), 0) as total_graph,
255
+ COALESCE(SUM(decisions_surfaced), 0) as total_decisions,
256
+ COALESCE(SUM(contracts_applied), 0) as total_contracts,
257
+ COALESCE(SUM(drift_warnings), 0) as total_drift,
258
+ COALESCE(AVG(assembly_time_ms), 0) as avg_assembly_ms,
259
+ COALESCE(SUM(conflicts_detected), 0) as total_conflicts,
260
+ COALESCE(SUM(items_dropped), 0) as total_dropped
261
+ FROM context_events
262
+ """
263
+ ).fetchone()
264
+
265
+ total_naive = row["total_naive"]
266
+ total_full = row["total_full"]
267
+ tokens_saved = total_naive - total_full
268
+ reduction_pct = (tokens_saved / total_naive * 100) if total_naive > 0 else 0
269
+
270
+ return {
271
+ "total_requests": row["total_requests"],
272
+ "tokens": {
273
+ "total_naive": total_naive,
274
+ "total_graph": row["total_graph"],
275
+ "total_full": total_full,
276
+ "total_saved": tokens_saved,
277
+ "reduction_pct": round(reduction_pct, 1),
278
+ },
279
+ "context_quality": {
280
+ "decisions_surfaced": row["total_decisions"],
281
+ "contracts_applied": row["total_contracts"],
282
+ "drift_warnings": row["total_drift"],
283
+ "conflicts_detected": row["total_conflicts"],
284
+ },
285
+ "performance": {
286
+ "avg_assembly_ms": round(row["avg_assembly_ms"], 1),
287
+ "items_dropped": row["total_dropped"],
288
+ },
289
+ }
290
+
291
+ def get_daily_trend(self, days: int = 30) -> list[dict[str, Any]]:
292
+ """Get daily metrics for the last N days."""
293
+ conn = self._get_conn()
294
+ rows = conn.execute(
295
+ """
296
+ SELECT * FROM daily_summary
297
+ ORDER BY date DESC
298
+ LIMIT ?
299
+ """,
300
+ (days,),
301
+ ).fetchall()
302
+ return [dict(row) for row in reversed(rows)]
303
+
304
+ def get_recent_events(self, limit: int = 20) -> list[dict[str, Any]]:
305
+ """Get most recent context events."""
306
+ conn = self._get_conn()
307
+ rows = conn.execute(
308
+ """
309
+ SELECT * FROM context_events
310
+ ORDER BY id DESC
311
+ LIMIT ?
312
+ """,
313
+ (limit,),
314
+ ).fetchall()
315
+ return [dict(row) for row in rows]
316
+
317
+ def get_benchmark_results(self) -> list[dict[str, Any]]:
318
+ """Get all benchmark run results."""
319
+ conn = self._get_conn()
320
+ rows = conn.execute(
321
+ """
322
+ SELECT * FROM benchmark_runs
323
+ ORDER BY timestamp DESC
324
+ """,
325
+ ).fetchall()
326
+ return [dict(row) for row in rows]
327
+
328
+ def get_before_after_comparison(self) -> dict[str, Any]:
329
+ """Generate a before/after comparison summary.
330
+
331
+ "Before" = naive approach (read all requested files)
332
+ "After" = codebase-intel (graph + decisions + contracts)
333
+
334
+ This is the money chart for the README and dashboard.
335
+ """
336
+ stats = self.get_lifetime_stats()
337
+ total_requests = stats["total_requests"]
338
+
339
+ if total_requests == 0:
340
+ return {
341
+ "has_data": False,
342
+ "message": "No context events recorded yet. Use the MCP server to start tracking.",
343
+ }
344
+
345
+ tokens = stats["tokens"]
346
+ quality = stats["context_quality"]
347
+
348
+ return {
349
+ "has_data": True,
350
+ "requests_analyzed": total_requests,
351
+ "before": {
352
+ "label": "Without codebase-intel",
353
+ "tokens_per_request": tokens["total_naive"] // max(total_requests, 1),
354
+ "decisions_available": 0,
355
+ "contract_checks": 0,
356
+ "drift_awareness": False,
357
+ "knows_why": False,
358
+ },
359
+ "after": {
360
+ "label": "With codebase-intel",
361
+ "tokens_per_request": tokens["total_full"] // max(total_requests, 1),
362
+ "decisions_available": quality["decisions_surfaced"],
363
+ "contract_checks": quality["contracts_applied"],
364
+ "drift_awareness": True,
365
+ "knows_why": True,
366
+ },
367
+ "improvement": {
368
+ "token_reduction_pct": tokens["reduction_pct"],
369
+ "tokens_saved_total": tokens["total_saved"],
370
+ "multiplier": round(
371
+ tokens["total_naive"] / max(tokens["total_full"], 1), 1
372
+ ),
373
+ "decisions_that_prevented_mistakes": quality["decisions_surfaced"],
374
+ "violations_caught_before_generation": quality["contracts_applied"],
375
+ },
376
+ }
377
+
378
+ # -------------------------------------------------------------------
379
+ # Internal
380
+ # -------------------------------------------------------------------
381
+
382
+ def _update_daily_summary(self) -> None:
383
+ """Refresh today's daily summary row."""
384
+ conn = self._get_conn()
385
+ today = datetime.now(UTC).strftime("%Y-%m-%d")
386
+
387
+ row = conn.execute(
388
+ """
389
+ SELECT
390
+ COUNT(*) as total_requests,
391
+ COALESCE(SUM(naive_tokens), 0) as total_naive,
392
+ COALESCE(SUM(graph_tokens), 0) as total_graph,
393
+ COALESCE(SUM(full_tokens), 0) as total_full,
394
+ COALESCE(SUM(decisions_surfaced), 0) as total_decisions,
395
+ COALESCE(SUM(contracts_applied), 0) as total_contracts,
396
+ COALESCE(SUM(drift_warnings), 0) as total_drift,
397
+ COALESCE(AVG(assembly_time_ms), 0) as avg_assembly_ms
398
+ FROM context_events
399
+ WHERE timestamp LIKE ?
400
+ """,
401
+ (f"{today}%",),
402
+ ).fetchone()
403
+
404
+ total_naive = row["total_naive"]
405
+ total_full = row["total_full"]
406
+ reduction = (1 - total_full / total_naive) * 100 if total_naive > 0 else 0
407
+
408
+ conn.execute(
409
+ """
410
+ INSERT INTO daily_summary (
411
+ date, total_requests, total_naive_tokens, total_graph_tokens,
412
+ total_full_tokens, total_decisions_surfaced, total_contracts_applied,
413
+ total_drift_warnings, avg_token_reduction_pct, avg_assembly_time_ms
414
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
415
+ ON CONFLICT(date) DO UPDATE SET
416
+ total_requests=excluded.total_requests,
417
+ total_naive_tokens=excluded.total_naive_tokens,
418
+ total_graph_tokens=excluded.total_graph_tokens,
419
+ total_full_tokens=excluded.total_full_tokens,
420
+ total_decisions_surfaced=excluded.total_decisions_surfaced,
421
+ total_contracts_applied=excluded.total_contracts_applied,
422
+ total_drift_warnings=excluded.total_drift_warnings,
423
+ avg_token_reduction_pct=excluded.avg_token_reduction_pct,
424
+ avg_assembly_time_ms=excluded.avg_assembly_time_ms
425
+ """,
426
+ (
427
+ today,
428
+ row["total_requests"],
429
+ total_naive,
430
+ row["total_graph"],
431
+ total_full,
432
+ row["total_decisions"],
433
+ row["total_contracts"],
434
+ row["total_drift"],
435
+ round(reduction, 1),
436
+ round(row["avg_assembly_ms"], 1),
437
+ ),
438
+ )
439
+ conn.commit()
@@ -0,0 +1 @@
1
+ """CLI interface for codebase-intel."""