ctrlcode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. ctrlcode/__init__.py +8 -0
  2. ctrlcode/agents/__init__.py +29 -0
  3. ctrlcode/agents/cleanup.py +388 -0
  4. ctrlcode/agents/communication.py +439 -0
  5. ctrlcode/agents/observability.py +421 -0
  6. ctrlcode/agents/react_loop.py +297 -0
  7. ctrlcode/agents/registry.py +211 -0
  8. ctrlcode/agents/result_parser.py +242 -0
  9. ctrlcode/agents/workflow.py +723 -0
  10. ctrlcode/analysis/__init__.py +28 -0
  11. ctrlcode/analysis/ast_diff.py +163 -0
  12. ctrlcode/analysis/bug_detector.py +149 -0
  13. ctrlcode/analysis/code_graphs.py +329 -0
  14. ctrlcode/analysis/semantic.py +205 -0
  15. ctrlcode/analysis/static.py +183 -0
  16. ctrlcode/analysis/synthesizer.py +281 -0
  17. ctrlcode/analysis/tests.py +189 -0
  18. ctrlcode/cleanup/__init__.py +16 -0
  19. ctrlcode/cleanup/auto_merge.py +350 -0
  20. ctrlcode/cleanup/doc_gardening.py +388 -0
  21. ctrlcode/cleanup/pr_automation.py +330 -0
  22. ctrlcode/cleanup/scheduler.py +356 -0
  23. ctrlcode/config.py +380 -0
  24. ctrlcode/embeddings/__init__.py +6 -0
  25. ctrlcode/embeddings/embedder.py +192 -0
  26. ctrlcode/embeddings/vector_store.py +213 -0
  27. ctrlcode/fuzzing/__init__.py +24 -0
  28. ctrlcode/fuzzing/analyzer.py +280 -0
  29. ctrlcode/fuzzing/budget.py +112 -0
  30. ctrlcode/fuzzing/context.py +665 -0
  31. ctrlcode/fuzzing/context_fuzzer.py +506 -0
  32. ctrlcode/fuzzing/derived_orchestrator.py +732 -0
  33. ctrlcode/fuzzing/oracle_adapter.py +135 -0
  34. ctrlcode/linters/__init__.py +11 -0
  35. ctrlcode/linters/hand_rolled_utils.py +221 -0
  36. ctrlcode/linters/yolo_parsing.py +217 -0
  37. ctrlcode/metrics/__init__.py +6 -0
  38. ctrlcode/metrics/dashboard.py +283 -0
  39. ctrlcode/metrics/tech_debt.py +663 -0
  40. ctrlcode/paths.py +68 -0
  41. ctrlcode/permissions.py +179 -0
  42. ctrlcode/providers/__init__.py +15 -0
  43. ctrlcode/providers/anthropic.py +138 -0
  44. ctrlcode/providers/base.py +77 -0
  45. ctrlcode/providers/openai.py +197 -0
  46. ctrlcode/providers/parallel.py +104 -0
  47. ctrlcode/server.py +871 -0
  48. ctrlcode/session/__init__.py +6 -0
  49. ctrlcode/session/baseline.py +57 -0
  50. ctrlcode/session/manager.py +967 -0
  51. ctrlcode/skills/__init__.py +10 -0
  52. ctrlcode/skills/builtin/commit.toml +29 -0
  53. ctrlcode/skills/builtin/docs.toml +25 -0
  54. ctrlcode/skills/builtin/refactor.toml +33 -0
  55. ctrlcode/skills/builtin/review.toml +28 -0
  56. ctrlcode/skills/builtin/test.toml +28 -0
  57. ctrlcode/skills/loader.py +111 -0
  58. ctrlcode/skills/registry.py +139 -0
  59. ctrlcode/storage/__init__.py +19 -0
  60. ctrlcode/storage/history_db.py +708 -0
  61. ctrlcode/tools/__init__.py +220 -0
  62. ctrlcode/tools/bash.py +112 -0
  63. ctrlcode/tools/browser.py +352 -0
  64. ctrlcode/tools/executor.py +153 -0
  65. ctrlcode/tools/explore.py +486 -0
  66. ctrlcode/tools/mcp.py +108 -0
  67. ctrlcode/tools/observability.py +561 -0
  68. ctrlcode/tools/registry.py +193 -0
  69. ctrlcode/tools/todo.py +291 -0
  70. ctrlcode/tools/update.py +266 -0
  71. ctrlcode/tools/webfetch.py +147 -0
  72. ctrlcode-0.1.0.dist-info/METADATA +93 -0
  73. ctrlcode-0.1.0.dist-info/RECORD +75 -0
  74. ctrlcode-0.1.0.dist-info/WHEEL +4 -0
  75. ctrlcode-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,708 @@
1
+ """SQLite-based historical knowledge database for fuzzing sessions."""
2
+
3
+ import json
4
+ import logging
5
+ import sqlite3
6
+ from dataclasses import dataclass
7
+ from datetime import datetime
8
+ from pathlib import Path
9
+ from typing import Optional
10
+
11
+ import numpy as np
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ @dataclass
17
+ class FuzzingSession:
18
+ """Record of a complete fuzzing session."""
19
+
20
+ session_id: str
21
+ user_request: str
22
+ generated_code: str
23
+ oracle: str
24
+ timestamp: datetime
25
+ num_tests: int
26
+ num_failures: int
27
+ oracle_reused: bool = False
28
+ reused_from: Optional[str] = None
29
+ quality_score: Optional[float] = None
30
+
31
+
32
+ @dataclass
33
+ class CodeRecord:
34
+ """Code snippet with embedding."""
35
+
36
+ code_id: str
37
+ session_id: str
38
+ code: str
39
+ embedding: np.ndarray
40
+ timestamp: datetime
41
+
42
+
43
+ @dataclass
44
+ class OracleRecord:
45
+ """Oracle with embedding and versioning."""
46
+
47
+ oracle_id: str
48
+ session_id: str
49
+ oracle: str
50
+ embedding: np.ndarray
51
+ quality_score: float
52
+ timestamp: datetime
53
+ oracle_version: int = 1
54
+ parent_oracle_id: Optional[str] = None
55
+ reuse_count: int = 0
56
+
57
+
58
+ @dataclass
59
+ class BugPattern:
60
+ """Bug pattern with embedding."""
61
+
62
+ bug_id: str
63
+ session_id: str
64
+ bug_description: str
65
+ code_snippet: str
66
+ embedding: np.ndarray
67
+ severity: str
68
+ timestamp: datetime
69
+
70
+
71
+ @dataclass
72
+ class StoredTest:
73
+ """Test case with embedding."""
74
+
75
+ test_id: str
76
+ session_id: str
77
+ test_code: str
78
+ embedding: np.ndarray
79
+ passed: bool
80
+ timestamp: datetime
81
+
82
+
83
+ class HistoryDB:
84
+ """SQLite database for persistent fuzzing history and knowledge base.
85
+
86
+ Schema:
87
+ - fuzzing_sessions: High-level session metadata
88
+ - code_embeddings: Code snippets with embeddings
89
+ - oracle_embeddings: Oracles with embeddings and quality scores
90
+ - bug_patterns: Bug patterns for pattern matching
91
+ - test_cases: Test cases with pass/fail status
92
+ """
93
+
94
+ def __init__(self, db_path: str | Path = ":memory:"):
95
+ """Initialize database connection.
96
+
97
+ Args:
98
+ db_path: Path to SQLite database file (or :memory: for in-memory)
99
+ """
100
+ self.db_path = Path(db_path) if db_path != ":memory:" else db_path
101
+ self.conn: Optional[sqlite3.Connection] = None
102
+ self._initialize_db()
103
+
104
+ def _initialize_db(self) -> None:
105
+ """Create database schema if not exists."""
106
+ self.conn = sqlite3.connect(
107
+ str(self.db_path) if self.db_path != ":memory:" else ":memory:",
108
+ check_same_thread=False,
109
+ )
110
+ self.conn.row_factory = sqlite3.Row
111
+
112
+ cursor = self.conn.cursor()
113
+
114
+ # Fuzzing sessions table
115
+ cursor.execute("""
116
+ CREATE TABLE IF NOT EXISTS fuzzing_sessions (
117
+ session_id TEXT PRIMARY KEY,
118
+ user_request TEXT NOT NULL,
119
+ generated_code TEXT NOT NULL,
120
+ oracle TEXT NOT NULL,
121
+ timestamp TEXT NOT NULL,
122
+ num_tests INTEGER NOT NULL,
123
+ num_failures INTEGER NOT NULL,
124
+ oracle_reused INTEGER NOT NULL DEFAULT 0,
125
+ reused_from TEXT,
126
+ quality_score REAL
127
+ )
128
+ """)
129
+
130
+ # Code embeddings table
131
+ cursor.execute("""
132
+ CREATE TABLE IF NOT EXISTS code_embeddings (
133
+ code_id TEXT PRIMARY KEY,
134
+ session_id TEXT NOT NULL,
135
+ code TEXT NOT NULL,
136
+ embedding BLOB NOT NULL,
137
+ timestamp TEXT NOT NULL,
138
+ FOREIGN KEY (session_id) REFERENCES fuzzing_sessions(session_id)
139
+ )
140
+ """)
141
+
142
+ # Oracle embeddings table
143
+ cursor.execute("""
144
+ CREATE TABLE IF NOT EXISTS oracle_embeddings (
145
+ oracle_id TEXT PRIMARY KEY,
146
+ session_id TEXT NOT NULL,
147
+ oracle TEXT NOT NULL,
148
+ embedding BLOB NOT NULL,
149
+ quality_score REAL NOT NULL,
150
+ timestamp TEXT NOT NULL,
151
+ oracle_version INTEGER NOT NULL DEFAULT 1,
152
+ parent_oracle_id TEXT,
153
+ reuse_count INTEGER NOT NULL DEFAULT 0,
154
+ FOREIGN KEY (session_id) REFERENCES fuzzing_sessions(session_id),
155
+ FOREIGN KEY (parent_oracle_id) REFERENCES oracle_embeddings(oracle_id)
156
+ )
157
+ """)
158
+
159
+ # Migration: Add versioning columns to existing oracle_embeddings table
160
+ cursor.execute("PRAGMA table_info(oracle_embeddings)")
161
+ columns = {row[1] for row in cursor.fetchall()}
162
+
163
+ if "oracle_version" not in columns:
164
+ cursor.execute("""
165
+ ALTER TABLE oracle_embeddings
166
+ ADD COLUMN oracle_version INTEGER NOT NULL DEFAULT 1
167
+ """)
168
+ logger.info("Added oracle_version column to oracle_embeddings table")
169
+
170
+ if "parent_oracle_id" not in columns:
171
+ cursor.execute("""
172
+ ALTER TABLE oracle_embeddings
173
+ ADD COLUMN parent_oracle_id TEXT
174
+ """)
175
+ logger.info("Added parent_oracle_id column to oracle_embeddings table")
176
+
177
+ if "reuse_count" not in columns:
178
+ cursor.execute("""
179
+ ALTER TABLE oracle_embeddings
180
+ ADD COLUMN reuse_count INTEGER NOT NULL DEFAULT 0
181
+ """)
182
+ logger.info("Added reuse_count column to oracle_embeddings table")
183
+
184
+ # Bug patterns table
185
+ cursor.execute("""
186
+ CREATE TABLE IF NOT EXISTS bug_patterns (
187
+ bug_id TEXT PRIMARY KEY,
188
+ session_id TEXT NOT NULL,
189
+ bug_description TEXT NOT NULL,
190
+ code_snippet TEXT NOT NULL,
191
+ embedding BLOB NOT NULL,
192
+ severity TEXT NOT NULL,
193
+ timestamp TEXT NOT NULL,
194
+ FOREIGN KEY (session_id) REFERENCES fuzzing_sessions(session_id)
195
+ )
196
+ """)
197
+
198
+ # Test cases table
199
+ cursor.execute("""
200
+ CREATE TABLE IF NOT EXISTS test_cases (
201
+ test_id TEXT PRIMARY KEY,
202
+ session_id TEXT NOT NULL,
203
+ test_code TEXT NOT NULL,
204
+ embedding BLOB NOT NULL,
205
+ passed INTEGER NOT NULL,
206
+ timestamp TEXT NOT NULL,
207
+ FOREIGN KEY (session_id) REFERENCES fuzzing_sessions(session_id)
208
+ )
209
+ """)
210
+
211
+ # Create indexes for faster queries
212
+ cursor.execute("""
213
+ CREATE INDEX IF NOT EXISTS idx_sessions_timestamp
214
+ ON fuzzing_sessions(timestamp)
215
+ """)
216
+ cursor.execute("""
217
+ CREATE INDEX IF NOT EXISTS idx_code_session
218
+ ON code_embeddings(session_id)
219
+ """)
220
+ cursor.execute("""
221
+ CREATE INDEX IF NOT EXISTS idx_oracle_quality
222
+ ON oracle_embeddings(quality_score DESC)
223
+ """)
224
+ cursor.execute("""
225
+ CREATE INDEX IF NOT EXISTS idx_bugs_severity
226
+ ON bug_patterns(severity)
227
+ """)
228
+
229
+ self.conn.commit()
230
+ logger.debug(f"Initialized history database at {self.db_path}")
231
+
232
+ def store_session(self, session: FuzzingSession) -> None:
233
+ """Store fuzzing session record.
234
+
235
+ Args:
236
+ session: Fuzzing session to store
237
+ """
238
+ cursor = self.conn.cursor()
239
+ cursor.execute(
240
+ """
241
+ INSERT OR REPLACE INTO fuzzing_sessions
242
+ (session_id, user_request, generated_code, oracle, timestamp,
243
+ num_tests, num_failures, oracle_reused, reused_from, quality_score)
244
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
245
+ """,
246
+ (
247
+ session.session_id,
248
+ session.user_request,
249
+ session.generated_code,
250
+ session.oracle,
251
+ session.timestamp.isoformat(),
252
+ session.num_tests,
253
+ session.num_failures,
254
+ 1 if session.oracle_reused else 0,
255
+ session.reused_from,
256
+ session.quality_score,
257
+ ),
258
+ )
259
+ self.conn.commit()
260
+ logger.debug(f"Stored session {session.session_id}")
261
+
262
+ def store_code(self, record: CodeRecord) -> None:
263
+ """Store code with embedding.
264
+
265
+ Args:
266
+ record: Code record to store
267
+ """
268
+ cursor = self.conn.cursor()
269
+ cursor.execute(
270
+ """
271
+ INSERT OR REPLACE INTO code_embeddings
272
+ (code_id, session_id, code, embedding, timestamp)
273
+ VALUES (?, ?, ?, ?, ?)
274
+ """,
275
+ (
276
+ record.code_id,
277
+ record.session_id,
278
+ record.code,
279
+ record.embedding.tobytes(),
280
+ record.timestamp.isoformat(),
281
+ ),
282
+ )
283
+ self.conn.commit()
284
+
285
+ def store_oracle(self, record: OracleRecord) -> None:
286
+ """Store oracle with embedding and versioning info.
287
+
288
+ Args:
289
+ record: Oracle record to store
290
+ """
291
+ cursor = self.conn.cursor()
292
+ cursor.execute(
293
+ """
294
+ INSERT OR REPLACE INTO oracle_embeddings
295
+ (oracle_id, session_id, oracle, embedding, quality_score, timestamp,
296
+ oracle_version, parent_oracle_id, reuse_count)
297
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
298
+ """,
299
+ (
300
+ record.oracle_id,
301
+ record.session_id,
302
+ record.oracle,
303
+ record.embedding.tobytes(),
304
+ record.quality_score,
305
+ record.timestamp.isoformat(),
306
+ record.oracle_version,
307
+ record.parent_oracle_id,
308
+ record.reuse_count,
309
+ ),
310
+ )
311
+ self.conn.commit()
312
+
313
+ def store_bug(self, record: BugPattern) -> None:
314
+ """Store bug pattern with embedding.
315
+
316
+ Args:
317
+ record: Bug pattern to store
318
+ """
319
+ cursor = self.conn.cursor()
320
+ cursor.execute(
321
+ """
322
+ INSERT OR REPLACE INTO bug_patterns
323
+ (bug_id, session_id, bug_description, code_snippet, embedding, severity, timestamp)
324
+ VALUES (?, ?, ?, ?, ?, ?, ?)
325
+ """,
326
+ (
327
+ record.bug_id,
328
+ record.session_id,
329
+ record.bug_description,
330
+ record.code_snippet,
331
+ record.embedding.tobytes(),
332
+ record.severity,
333
+ record.timestamp.isoformat(),
334
+ ),
335
+ )
336
+ self.conn.commit()
337
+
338
+ def store_test(self, record: StoredTest) -> None:
339
+ """Store test case with embedding.
340
+
341
+ Args:
342
+ record: Test case to store
343
+ """
344
+ cursor = self.conn.cursor()
345
+ cursor.execute(
346
+ """
347
+ INSERT OR REPLACE INTO test_cases
348
+ (test_id, session_id, test_code, embedding, passed, timestamp)
349
+ VALUES (?, ?, ?, ?, ?, ?)
350
+ """,
351
+ (
352
+ record.test_id,
353
+ record.session_id,
354
+ record.test_code,
355
+ record.embedding.tobytes(),
356
+ 1 if record.passed else 0,
357
+ record.timestamp.isoformat(),
358
+ ),
359
+ )
360
+ self.conn.commit()
361
+
362
+ def get_all_code_embeddings(self, limit: Optional[int] = None) -> list[CodeRecord]:
363
+ """Retrieve all code embeddings for vector store initialization.
364
+
365
+ Args:
366
+ limit: Maximum number of records to retrieve
367
+
368
+ Returns:
369
+ List of code records
370
+ """
371
+ cursor = self.conn.cursor()
372
+ query = "SELECT * FROM code_embeddings ORDER BY timestamp DESC"
373
+ if limit:
374
+ query += f" LIMIT {limit}"
375
+
376
+ rows = cursor.execute(query).fetchall()
377
+ return [self._row_to_code_record(row) for row in rows]
378
+
379
+ def get_all_oracle_embeddings(self, limit: Optional[int] = None) -> list[OracleRecord]:
380
+ """Retrieve all oracle embeddings.
381
+
382
+ Args:
383
+ limit: Maximum number of records
384
+
385
+ Returns:
386
+ List of oracle records
387
+ """
388
+ cursor = self.conn.cursor()
389
+ query = "SELECT * FROM oracle_embeddings ORDER BY quality_score DESC"
390
+ if limit:
391
+ query += f" LIMIT {limit}"
392
+
393
+ rows = cursor.execute(query).fetchall()
394
+ return [self._row_to_oracle_record(row) for row in rows]
395
+
396
+ def get_all_bug_embeddings(self, limit: Optional[int] = None) -> list[BugPattern]:
397
+ """Retrieve all bug pattern embeddings.
398
+
399
+ Args:
400
+ limit: Maximum number of records
401
+
402
+ Returns:
403
+ List of bug patterns
404
+ """
405
+ cursor = self.conn.cursor()
406
+ query = "SELECT * FROM bug_patterns ORDER BY timestamp DESC"
407
+ if limit:
408
+ query += f" LIMIT {limit}"
409
+
410
+ rows = cursor.execute(query).fetchall()
411
+ return [self._row_to_bug_record(row) for row in rows]
412
+
413
+ def get_all_test_embeddings(self, limit: Optional[int] = None) -> list[StoredTest]:
414
+ """Retrieve all test case embeddings.
415
+
416
+ Args:
417
+ limit: Maximum number of records
418
+
419
+ Returns:
420
+ List of test cases
421
+ """
422
+ cursor = self.conn.cursor()
423
+ query = "SELECT * FROM test_cases ORDER BY timestamp DESC"
424
+ if limit:
425
+ query += f" LIMIT {limit}"
426
+
427
+ rows = cursor.execute(query).fetchall()
428
+ return [self._row_to_test_record(row) for row in rows]
429
+
430
+ def get_session(self, session_id: str) -> Optional[FuzzingSession]:
431
+ """Retrieve session by ID.
432
+
433
+ Args:
434
+ session_id: Session identifier
435
+
436
+ Returns:
437
+ Fuzzing session or None
438
+ """
439
+ cursor = self.conn.cursor()
440
+ row = cursor.execute(
441
+ "SELECT * FROM fuzzing_sessions WHERE session_id = ?", (session_id,)
442
+ ).fetchone()
443
+
444
+ if not row:
445
+ return None
446
+
447
+ return FuzzingSession(
448
+ session_id=row["session_id"],
449
+ user_request=row["user_request"],
450
+ generated_code=row["generated_code"],
451
+ oracle=row["oracle"],
452
+ timestamp=datetime.fromisoformat(row["timestamp"]),
453
+ num_tests=row["num_tests"],
454
+ num_failures=row["num_failures"],
455
+ oracle_reused=bool(row["oracle_reused"]),
456
+ reused_from=row["reused_from"],
457
+ quality_score=row["quality_score"],
458
+ )
459
+
460
+ def get_stats(self) -> dict:
461
+ """Get database statistics.
462
+
463
+ Returns:
464
+ Dictionary of statistics
465
+ """
466
+ cursor = self.conn.cursor()
467
+
468
+ stats = {}
469
+
470
+ # Total sessions
471
+ stats["total_sessions"] = cursor.execute(
472
+ "SELECT COUNT(*) FROM fuzzing_sessions"
473
+ ).fetchone()[0]
474
+
475
+ # Oracle reuse rate
476
+ reused = cursor.execute(
477
+ "SELECT COUNT(*) FROM fuzzing_sessions WHERE oracle_reused = 1"
478
+ ).fetchone()[0]
479
+ stats["oracle_reuse_count"] = reused
480
+ stats["oracle_reuse_rate"] = (
481
+ reused / stats["total_sessions"] if stats["total_sessions"] > 0 else 0.0
482
+ )
483
+
484
+ # Total bugs
485
+ stats["total_bugs"] = cursor.execute("SELECT COUNT(*) FROM bug_patterns").fetchone()[0]
486
+
487
+ # Total tests
488
+ stats["total_tests"] = cursor.execute("SELECT COUNT(*) FROM test_cases").fetchone()[0]
489
+
490
+ # Test pass rate
491
+ passed = cursor.execute(
492
+ "SELECT COUNT(*) FROM test_cases WHERE passed = 1"
493
+ ).fetchone()[0]
494
+ stats["test_pass_rate"] = (
495
+ passed / stats["total_tests"] if stats["total_tests"] > 0 else 0.0
496
+ )
497
+
498
+ # Average quality score
499
+ avg_quality = cursor.execute(
500
+ "SELECT AVG(quality_score) FROM oracle_embeddings"
501
+ ).fetchone()[0]
502
+ stats["avg_oracle_quality"] = avg_quality if avg_quality else 0.0
503
+
504
+ # Total code embeddings
505
+ stats["total_code_embeddings"] = cursor.execute(
506
+ "SELECT COUNT(*) FROM code_embeddings"
507
+ ).fetchone()[0]
508
+
509
+ # Total oracle embeddings
510
+ stats["total_oracle_embeddings"] = cursor.execute(
511
+ "SELECT COUNT(*) FROM oracle_embeddings"
512
+ ).fetchone()[0]
513
+
514
+ return stats
515
+
516
+ def clear(self) -> None:
517
+ """Clear all data from database."""
518
+ cursor = self.conn.cursor()
519
+ cursor.execute("DELETE FROM test_cases")
520
+ cursor.execute("DELETE FROM bug_patterns")
521
+ cursor.execute("DELETE FROM oracle_embeddings")
522
+ cursor.execute("DELETE FROM code_embeddings")
523
+ cursor.execute("DELETE FROM fuzzing_sessions")
524
+ self.conn.commit()
525
+ logger.info("Cleared all data from history database")
526
+
527
+ def close(self) -> None:
528
+ """Close database connection."""
529
+ if self.conn:
530
+ self.conn.close()
531
+ self.conn = None
532
+
533
+ def _row_to_code_record(self, row: sqlite3.Row) -> CodeRecord:
534
+ """Convert database row to CodeRecord."""
535
+ embedding = np.frombuffer(row["embedding"], dtype=np.float32)
536
+ return CodeRecord(
537
+ code_id=row["code_id"],
538
+ session_id=row["session_id"],
539
+ code=row["code"],
540
+ embedding=embedding,
541
+ timestamp=datetime.fromisoformat(row["timestamp"]),
542
+ )
543
+
544
+ def _row_to_oracle_record(self, row: sqlite3.Row) -> OracleRecord:
545
+ """Convert database row to OracleRecord."""
546
+ embedding = np.frombuffer(row["embedding"], dtype=np.float32)
547
+
548
+ # Handle optional columns with try/except (sqlite3.Row doesn't support .get())
549
+ try:
550
+ oracle_version = row["oracle_version"]
551
+ except (KeyError, IndexError):
552
+ oracle_version = 1
553
+
554
+ try:
555
+ parent_oracle_id = row["parent_oracle_id"]
556
+ except (KeyError, IndexError):
557
+ parent_oracle_id = None
558
+
559
+ try:
560
+ reuse_count = row["reuse_count"]
561
+ except (KeyError, IndexError):
562
+ reuse_count = 0
563
+
564
+ return OracleRecord(
565
+ oracle_id=row["oracle_id"],
566
+ session_id=row["session_id"],
567
+ oracle=row["oracle"],
568
+ embedding=embedding,
569
+ quality_score=row["quality_score"],
570
+ timestamp=datetime.fromisoformat(row["timestamp"]),
571
+ oracle_version=oracle_version,
572
+ parent_oracle_id=parent_oracle_id,
573
+ reuse_count=reuse_count,
574
+ )
575
+
576
+ def _row_to_bug_record(self, row: sqlite3.Row) -> BugPattern:
577
+ """Convert database row to BugPattern."""
578
+ embedding = np.frombuffer(row["embedding"], dtype=np.float32)
579
+ return BugPattern(
580
+ bug_id=row["bug_id"],
581
+ session_id=row["session_id"],
582
+ bug_description=row["bug_description"],
583
+ code_snippet=row["code_snippet"],
584
+ embedding=embedding,
585
+ severity=row["severity"],
586
+ timestamp=datetime.fromisoformat(row["timestamp"]),
587
+ )
588
+
589
+ def _row_to_test_record(self, row: sqlite3.Row) -> StoredTest:
590
+ """Convert database row to StoredTest."""
591
+ embedding = np.frombuffer(row["embedding"], dtype=np.float32)
592
+ return StoredTest(
593
+ test_id=row["test_id"],
594
+ session_id=row["session_id"],
595
+ test_code=row["test_code"],
596
+ embedding=embedding,
597
+ passed=bool(row["passed"]),
598
+ timestamp=datetime.fromisoformat(row["timestamp"]),
599
+ )
600
+
601
+ def increment_oracle_reuse(self, oracle_id: str) -> None:
602
+ """Increment reuse count for an oracle.
603
+
604
+ Args:
605
+ oracle_id: Oracle ID to increment
606
+ """
607
+ cursor = self.conn.cursor()
608
+ cursor.execute(
609
+ """
610
+ UPDATE oracle_embeddings
611
+ SET reuse_count = reuse_count + 1
612
+ WHERE oracle_id = ?
613
+ """,
614
+ (oracle_id,),
615
+ )
616
+ self.conn.commit()
617
+ logger.debug(f"Incremented reuse count for oracle {oracle_id}")
618
+
619
+ def get_golden_oracles(
620
+ self, min_quality: float = 0.8, min_reuse: int = 3, limit: int = 10
621
+ ) -> list[OracleRecord]:
622
+ """Get high-quality, frequently reused oracles.
623
+
624
+ These are "golden oracles" that work well and are reused often.
625
+
626
+ Args:
627
+ min_quality: Minimum quality score (default: 0.8)
628
+ min_reuse: Minimum reuse count (default: 3)
629
+ limit: Maximum results (default: 10)
630
+
631
+ Returns:
632
+ List of golden oracle records, sorted by quality * reuse_count
633
+ """
634
+ cursor = self.conn.cursor()
635
+ rows = cursor.execute(
636
+ """
637
+ SELECT *
638
+ FROM oracle_embeddings
639
+ WHERE quality_score >= ? AND reuse_count >= ?
640
+ ORDER BY (quality_score * reuse_count) DESC
641
+ LIMIT ?
642
+ """,
643
+ (min_quality, min_reuse, limit),
644
+ ).fetchall()
645
+
646
+ return [self._row_to_oracle_record(row) for row in rows]
647
+
648
+ def get_oracle_lineage(self, oracle_id: str) -> list[OracleRecord]:
649
+ """Get oracle lineage (parent chain).
650
+
651
+ Args:
652
+ oracle_id: Oracle ID to trace
653
+
654
+ Returns:
655
+ List of oracles from newest to oldest (child to ancestor)
656
+ """
657
+ lineage = []
658
+ current_id = oracle_id
659
+ visited = set()
660
+
661
+ while current_id and current_id not in visited:
662
+ visited.add(current_id)
663
+
664
+ cursor = self.conn.cursor()
665
+ row = cursor.execute(
666
+ "SELECT * FROM oracle_embeddings WHERE oracle_id = ?",
667
+ (current_id,),
668
+ ).fetchone()
669
+
670
+ if not row:
671
+ break
672
+
673
+ oracle = self._row_to_oracle_record(row)
674
+ lineage.append(oracle)
675
+
676
+ # Move to parent
677
+ current_id = oracle.parent_oracle_id
678
+
679
+ return lineage
680
+
681
+ def get_oracle_descendants(self, oracle_id: str) -> list[OracleRecord]:
682
+ """Get all oracles derived from this oracle.
683
+
684
+ Args:
685
+ oracle_id: Parent oracle ID
686
+
687
+ Returns:
688
+ List of descendant oracles
689
+ """
690
+ cursor = self.conn.cursor()
691
+ rows = cursor.execute(
692
+ """
693
+ SELECT * FROM oracle_embeddings
694
+ WHERE parent_oracle_id = ?
695
+ ORDER BY timestamp ASC
696
+ """,
697
+ (oracle_id,),
698
+ ).fetchall()
699
+
700
+ return [self._row_to_oracle_record(row) for row in rows]
701
+
702
+ def __enter__(self):
703
+ """Context manager entry."""
704
+ return self
705
+
706
+ def __exit__(self, exc_type, exc_val, exc_tb):
707
+ """Context manager exit."""
708
+ self.close()