ctrlcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ctrlcode/__init__.py +8 -0
- ctrlcode/agents/__init__.py +29 -0
- ctrlcode/agents/cleanup.py +388 -0
- ctrlcode/agents/communication.py +439 -0
- ctrlcode/agents/observability.py +421 -0
- ctrlcode/agents/react_loop.py +297 -0
- ctrlcode/agents/registry.py +211 -0
- ctrlcode/agents/result_parser.py +242 -0
- ctrlcode/agents/workflow.py +723 -0
- ctrlcode/analysis/__init__.py +28 -0
- ctrlcode/analysis/ast_diff.py +163 -0
- ctrlcode/analysis/bug_detector.py +149 -0
- ctrlcode/analysis/code_graphs.py +329 -0
- ctrlcode/analysis/semantic.py +205 -0
- ctrlcode/analysis/static.py +183 -0
- ctrlcode/analysis/synthesizer.py +281 -0
- ctrlcode/analysis/tests.py +189 -0
- ctrlcode/cleanup/__init__.py +16 -0
- ctrlcode/cleanup/auto_merge.py +350 -0
- ctrlcode/cleanup/doc_gardening.py +388 -0
- ctrlcode/cleanup/pr_automation.py +330 -0
- ctrlcode/cleanup/scheduler.py +356 -0
- ctrlcode/config.py +380 -0
- ctrlcode/embeddings/__init__.py +6 -0
- ctrlcode/embeddings/embedder.py +192 -0
- ctrlcode/embeddings/vector_store.py +213 -0
- ctrlcode/fuzzing/__init__.py +24 -0
- ctrlcode/fuzzing/analyzer.py +280 -0
- ctrlcode/fuzzing/budget.py +112 -0
- ctrlcode/fuzzing/context.py +665 -0
- ctrlcode/fuzzing/context_fuzzer.py +506 -0
- ctrlcode/fuzzing/derived_orchestrator.py +732 -0
- ctrlcode/fuzzing/oracle_adapter.py +135 -0
- ctrlcode/linters/__init__.py +11 -0
- ctrlcode/linters/hand_rolled_utils.py +221 -0
- ctrlcode/linters/yolo_parsing.py +217 -0
- ctrlcode/metrics/__init__.py +6 -0
- ctrlcode/metrics/dashboard.py +283 -0
- ctrlcode/metrics/tech_debt.py +663 -0
- ctrlcode/paths.py +68 -0
- ctrlcode/permissions.py +179 -0
- ctrlcode/providers/__init__.py +15 -0
- ctrlcode/providers/anthropic.py +138 -0
- ctrlcode/providers/base.py +77 -0
- ctrlcode/providers/openai.py +197 -0
- ctrlcode/providers/parallel.py +104 -0
- ctrlcode/server.py +871 -0
- ctrlcode/session/__init__.py +6 -0
- ctrlcode/session/baseline.py +57 -0
- ctrlcode/session/manager.py +967 -0
- ctrlcode/skills/__init__.py +10 -0
- ctrlcode/skills/builtin/commit.toml +29 -0
- ctrlcode/skills/builtin/docs.toml +25 -0
- ctrlcode/skills/builtin/refactor.toml +33 -0
- ctrlcode/skills/builtin/review.toml +28 -0
- ctrlcode/skills/builtin/test.toml +28 -0
- ctrlcode/skills/loader.py +111 -0
- ctrlcode/skills/registry.py +139 -0
- ctrlcode/storage/__init__.py +19 -0
- ctrlcode/storage/history_db.py +708 -0
- ctrlcode/tools/__init__.py +220 -0
- ctrlcode/tools/bash.py +112 -0
- ctrlcode/tools/browser.py +352 -0
- ctrlcode/tools/executor.py +153 -0
- ctrlcode/tools/explore.py +486 -0
- ctrlcode/tools/mcp.py +108 -0
- ctrlcode/tools/observability.py +561 -0
- ctrlcode/tools/registry.py +193 -0
- ctrlcode/tools/todo.py +291 -0
- ctrlcode/tools/update.py +266 -0
- ctrlcode/tools/webfetch.py +147 -0
- ctrlcode-0.1.0.dist-info/METADATA +93 -0
- ctrlcode-0.1.0.dist-info/RECORD +75 -0
- ctrlcode-0.1.0.dist-info/WHEEL +4 -0
- ctrlcode-0.1.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,708 @@
|
|
|
1
|
+
"""SQLite-based historical knowledge database for fuzzing sessions."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import sqlite3
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class FuzzingSession:
|
|
18
|
+
"""Record of a complete fuzzing session."""
|
|
19
|
+
|
|
20
|
+
session_id: str
|
|
21
|
+
user_request: str
|
|
22
|
+
generated_code: str
|
|
23
|
+
oracle: str
|
|
24
|
+
timestamp: datetime
|
|
25
|
+
num_tests: int
|
|
26
|
+
num_failures: int
|
|
27
|
+
oracle_reused: bool = False
|
|
28
|
+
reused_from: Optional[str] = None
|
|
29
|
+
quality_score: Optional[float] = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class CodeRecord:
|
|
34
|
+
"""Code snippet with embedding."""
|
|
35
|
+
|
|
36
|
+
code_id: str
|
|
37
|
+
session_id: str
|
|
38
|
+
code: str
|
|
39
|
+
embedding: np.ndarray
|
|
40
|
+
timestamp: datetime
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class OracleRecord:
|
|
45
|
+
"""Oracle with embedding and versioning."""
|
|
46
|
+
|
|
47
|
+
oracle_id: str
|
|
48
|
+
session_id: str
|
|
49
|
+
oracle: str
|
|
50
|
+
embedding: np.ndarray
|
|
51
|
+
quality_score: float
|
|
52
|
+
timestamp: datetime
|
|
53
|
+
oracle_version: int = 1
|
|
54
|
+
parent_oracle_id: Optional[str] = None
|
|
55
|
+
reuse_count: int = 0
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class BugPattern:
|
|
60
|
+
"""Bug pattern with embedding."""
|
|
61
|
+
|
|
62
|
+
bug_id: str
|
|
63
|
+
session_id: str
|
|
64
|
+
bug_description: str
|
|
65
|
+
code_snippet: str
|
|
66
|
+
embedding: np.ndarray
|
|
67
|
+
severity: str
|
|
68
|
+
timestamp: datetime
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class StoredTest:
|
|
73
|
+
"""Test case with embedding."""
|
|
74
|
+
|
|
75
|
+
test_id: str
|
|
76
|
+
session_id: str
|
|
77
|
+
test_code: str
|
|
78
|
+
embedding: np.ndarray
|
|
79
|
+
passed: bool
|
|
80
|
+
timestamp: datetime
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class HistoryDB:
|
|
84
|
+
"""SQLite database for persistent fuzzing history and knowledge base.
|
|
85
|
+
|
|
86
|
+
Schema:
|
|
87
|
+
- fuzzing_sessions: High-level session metadata
|
|
88
|
+
- code_embeddings: Code snippets with embeddings
|
|
89
|
+
- oracle_embeddings: Oracles with embeddings and quality scores
|
|
90
|
+
- bug_patterns: Bug patterns for pattern matching
|
|
91
|
+
- test_cases: Test cases with pass/fail status
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
def __init__(self, db_path: str | Path = ":memory:"):
|
|
95
|
+
"""Initialize database connection.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
db_path: Path to SQLite database file (or :memory: for in-memory)
|
|
99
|
+
"""
|
|
100
|
+
self.db_path = Path(db_path) if db_path != ":memory:" else db_path
|
|
101
|
+
self.conn: Optional[sqlite3.Connection] = None
|
|
102
|
+
self._initialize_db()
|
|
103
|
+
|
|
104
|
+
def _initialize_db(self) -> None:
|
|
105
|
+
"""Create database schema if not exists."""
|
|
106
|
+
self.conn = sqlite3.connect(
|
|
107
|
+
str(self.db_path) if self.db_path != ":memory:" else ":memory:",
|
|
108
|
+
check_same_thread=False,
|
|
109
|
+
)
|
|
110
|
+
self.conn.row_factory = sqlite3.Row
|
|
111
|
+
|
|
112
|
+
cursor = self.conn.cursor()
|
|
113
|
+
|
|
114
|
+
# Fuzzing sessions table
|
|
115
|
+
cursor.execute("""
|
|
116
|
+
CREATE TABLE IF NOT EXISTS fuzzing_sessions (
|
|
117
|
+
session_id TEXT PRIMARY KEY,
|
|
118
|
+
user_request TEXT NOT NULL,
|
|
119
|
+
generated_code TEXT NOT NULL,
|
|
120
|
+
oracle TEXT NOT NULL,
|
|
121
|
+
timestamp TEXT NOT NULL,
|
|
122
|
+
num_tests INTEGER NOT NULL,
|
|
123
|
+
num_failures INTEGER NOT NULL,
|
|
124
|
+
oracle_reused INTEGER NOT NULL DEFAULT 0,
|
|
125
|
+
reused_from TEXT,
|
|
126
|
+
quality_score REAL
|
|
127
|
+
)
|
|
128
|
+
""")
|
|
129
|
+
|
|
130
|
+
# Code embeddings table
|
|
131
|
+
cursor.execute("""
|
|
132
|
+
CREATE TABLE IF NOT EXISTS code_embeddings (
|
|
133
|
+
code_id TEXT PRIMARY KEY,
|
|
134
|
+
session_id TEXT NOT NULL,
|
|
135
|
+
code TEXT NOT NULL,
|
|
136
|
+
embedding BLOB NOT NULL,
|
|
137
|
+
timestamp TEXT NOT NULL,
|
|
138
|
+
FOREIGN KEY (session_id) REFERENCES fuzzing_sessions(session_id)
|
|
139
|
+
)
|
|
140
|
+
""")
|
|
141
|
+
|
|
142
|
+
# Oracle embeddings table
|
|
143
|
+
cursor.execute("""
|
|
144
|
+
CREATE TABLE IF NOT EXISTS oracle_embeddings (
|
|
145
|
+
oracle_id TEXT PRIMARY KEY,
|
|
146
|
+
session_id TEXT NOT NULL,
|
|
147
|
+
oracle TEXT NOT NULL,
|
|
148
|
+
embedding BLOB NOT NULL,
|
|
149
|
+
quality_score REAL NOT NULL,
|
|
150
|
+
timestamp TEXT NOT NULL,
|
|
151
|
+
oracle_version INTEGER NOT NULL DEFAULT 1,
|
|
152
|
+
parent_oracle_id TEXT,
|
|
153
|
+
reuse_count INTEGER NOT NULL DEFAULT 0,
|
|
154
|
+
FOREIGN KEY (session_id) REFERENCES fuzzing_sessions(session_id),
|
|
155
|
+
FOREIGN KEY (parent_oracle_id) REFERENCES oracle_embeddings(oracle_id)
|
|
156
|
+
)
|
|
157
|
+
""")
|
|
158
|
+
|
|
159
|
+
# Migration: Add versioning columns to existing oracle_embeddings table
|
|
160
|
+
cursor.execute("PRAGMA table_info(oracle_embeddings)")
|
|
161
|
+
columns = {row[1] for row in cursor.fetchall()}
|
|
162
|
+
|
|
163
|
+
if "oracle_version" not in columns:
|
|
164
|
+
cursor.execute("""
|
|
165
|
+
ALTER TABLE oracle_embeddings
|
|
166
|
+
ADD COLUMN oracle_version INTEGER NOT NULL DEFAULT 1
|
|
167
|
+
""")
|
|
168
|
+
logger.info("Added oracle_version column to oracle_embeddings table")
|
|
169
|
+
|
|
170
|
+
if "parent_oracle_id" not in columns:
|
|
171
|
+
cursor.execute("""
|
|
172
|
+
ALTER TABLE oracle_embeddings
|
|
173
|
+
ADD COLUMN parent_oracle_id TEXT
|
|
174
|
+
""")
|
|
175
|
+
logger.info("Added parent_oracle_id column to oracle_embeddings table")
|
|
176
|
+
|
|
177
|
+
if "reuse_count" not in columns:
|
|
178
|
+
cursor.execute("""
|
|
179
|
+
ALTER TABLE oracle_embeddings
|
|
180
|
+
ADD COLUMN reuse_count INTEGER NOT NULL DEFAULT 0
|
|
181
|
+
""")
|
|
182
|
+
logger.info("Added reuse_count column to oracle_embeddings table")
|
|
183
|
+
|
|
184
|
+
# Bug patterns table
|
|
185
|
+
cursor.execute("""
|
|
186
|
+
CREATE TABLE IF NOT EXISTS bug_patterns (
|
|
187
|
+
bug_id TEXT PRIMARY KEY,
|
|
188
|
+
session_id TEXT NOT NULL,
|
|
189
|
+
bug_description TEXT NOT NULL,
|
|
190
|
+
code_snippet TEXT NOT NULL,
|
|
191
|
+
embedding BLOB NOT NULL,
|
|
192
|
+
severity TEXT NOT NULL,
|
|
193
|
+
timestamp TEXT NOT NULL,
|
|
194
|
+
FOREIGN KEY (session_id) REFERENCES fuzzing_sessions(session_id)
|
|
195
|
+
)
|
|
196
|
+
""")
|
|
197
|
+
|
|
198
|
+
# Test cases table
|
|
199
|
+
cursor.execute("""
|
|
200
|
+
CREATE TABLE IF NOT EXISTS test_cases (
|
|
201
|
+
test_id TEXT PRIMARY KEY,
|
|
202
|
+
session_id TEXT NOT NULL,
|
|
203
|
+
test_code TEXT NOT NULL,
|
|
204
|
+
embedding BLOB NOT NULL,
|
|
205
|
+
passed INTEGER NOT NULL,
|
|
206
|
+
timestamp TEXT NOT NULL,
|
|
207
|
+
FOREIGN KEY (session_id) REFERENCES fuzzing_sessions(session_id)
|
|
208
|
+
)
|
|
209
|
+
""")
|
|
210
|
+
|
|
211
|
+
# Create indexes for faster queries
|
|
212
|
+
cursor.execute("""
|
|
213
|
+
CREATE INDEX IF NOT EXISTS idx_sessions_timestamp
|
|
214
|
+
ON fuzzing_sessions(timestamp)
|
|
215
|
+
""")
|
|
216
|
+
cursor.execute("""
|
|
217
|
+
CREATE INDEX IF NOT EXISTS idx_code_session
|
|
218
|
+
ON code_embeddings(session_id)
|
|
219
|
+
""")
|
|
220
|
+
cursor.execute("""
|
|
221
|
+
CREATE INDEX IF NOT EXISTS idx_oracle_quality
|
|
222
|
+
ON oracle_embeddings(quality_score DESC)
|
|
223
|
+
""")
|
|
224
|
+
cursor.execute("""
|
|
225
|
+
CREATE INDEX IF NOT EXISTS idx_bugs_severity
|
|
226
|
+
ON bug_patterns(severity)
|
|
227
|
+
""")
|
|
228
|
+
|
|
229
|
+
self.conn.commit()
|
|
230
|
+
logger.debug(f"Initialized history database at {self.db_path}")
|
|
231
|
+
|
|
232
|
+
def store_session(self, session: FuzzingSession) -> None:
|
|
233
|
+
"""Store fuzzing session record.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
session: Fuzzing session to store
|
|
237
|
+
"""
|
|
238
|
+
cursor = self.conn.cursor()
|
|
239
|
+
cursor.execute(
|
|
240
|
+
"""
|
|
241
|
+
INSERT OR REPLACE INTO fuzzing_sessions
|
|
242
|
+
(session_id, user_request, generated_code, oracle, timestamp,
|
|
243
|
+
num_tests, num_failures, oracle_reused, reused_from, quality_score)
|
|
244
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
245
|
+
""",
|
|
246
|
+
(
|
|
247
|
+
session.session_id,
|
|
248
|
+
session.user_request,
|
|
249
|
+
session.generated_code,
|
|
250
|
+
session.oracle,
|
|
251
|
+
session.timestamp.isoformat(),
|
|
252
|
+
session.num_tests,
|
|
253
|
+
session.num_failures,
|
|
254
|
+
1 if session.oracle_reused else 0,
|
|
255
|
+
session.reused_from,
|
|
256
|
+
session.quality_score,
|
|
257
|
+
),
|
|
258
|
+
)
|
|
259
|
+
self.conn.commit()
|
|
260
|
+
logger.debug(f"Stored session {session.session_id}")
|
|
261
|
+
|
|
262
|
+
def store_code(self, record: CodeRecord) -> None:
|
|
263
|
+
"""Store code with embedding.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
record: Code record to store
|
|
267
|
+
"""
|
|
268
|
+
cursor = self.conn.cursor()
|
|
269
|
+
cursor.execute(
|
|
270
|
+
"""
|
|
271
|
+
INSERT OR REPLACE INTO code_embeddings
|
|
272
|
+
(code_id, session_id, code, embedding, timestamp)
|
|
273
|
+
VALUES (?, ?, ?, ?, ?)
|
|
274
|
+
""",
|
|
275
|
+
(
|
|
276
|
+
record.code_id,
|
|
277
|
+
record.session_id,
|
|
278
|
+
record.code,
|
|
279
|
+
record.embedding.tobytes(),
|
|
280
|
+
record.timestamp.isoformat(),
|
|
281
|
+
),
|
|
282
|
+
)
|
|
283
|
+
self.conn.commit()
|
|
284
|
+
|
|
285
|
+
def store_oracle(self, record: OracleRecord) -> None:
|
|
286
|
+
"""Store oracle with embedding and versioning info.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
record: Oracle record to store
|
|
290
|
+
"""
|
|
291
|
+
cursor = self.conn.cursor()
|
|
292
|
+
cursor.execute(
|
|
293
|
+
"""
|
|
294
|
+
INSERT OR REPLACE INTO oracle_embeddings
|
|
295
|
+
(oracle_id, session_id, oracle, embedding, quality_score, timestamp,
|
|
296
|
+
oracle_version, parent_oracle_id, reuse_count)
|
|
297
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
298
|
+
""",
|
|
299
|
+
(
|
|
300
|
+
record.oracle_id,
|
|
301
|
+
record.session_id,
|
|
302
|
+
record.oracle,
|
|
303
|
+
record.embedding.tobytes(),
|
|
304
|
+
record.quality_score,
|
|
305
|
+
record.timestamp.isoformat(),
|
|
306
|
+
record.oracle_version,
|
|
307
|
+
record.parent_oracle_id,
|
|
308
|
+
record.reuse_count,
|
|
309
|
+
),
|
|
310
|
+
)
|
|
311
|
+
self.conn.commit()
|
|
312
|
+
|
|
313
|
+
def store_bug(self, record: BugPattern) -> None:
|
|
314
|
+
"""Store bug pattern with embedding.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
record: Bug pattern to store
|
|
318
|
+
"""
|
|
319
|
+
cursor = self.conn.cursor()
|
|
320
|
+
cursor.execute(
|
|
321
|
+
"""
|
|
322
|
+
INSERT OR REPLACE INTO bug_patterns
|
|
323
|
+
(bug_id, session_id, bug_description, code_snippet, embedding, severity, timestamp)
|
|
324
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
325
|
+
""",
|
|
326
|
+
(
|
|
327
|
+
record.bug_id,
|
|
328
|
+
record.session_id,
|
|
329
|
+
record.bug_description,
|
|
330
|
+
record.code_snippet,
|
|
331
|
+
record.embedding.tobytes(),
|
|
332
|
+
record.severity,
|
|
333
|
+
record.timestamp.isoformat(),
|
|
334
|
+
),
|
|
335
|
+
)
|
|
336
|
+
self.conn.commit()
|
|
337
|
+
|
|
338
|
+
def store_test(self, record: StoredTest) -> None:
|
|
339
|
+
"""Store test case with embedding.
|
|
340
|
+
|
|
341
|
+
Args:
|
|
342
|
+
record: Test case to store
|
|
343
|
+
"""
|
|
344
|
+
cursor = self.conn.cursor()
|
|
345
|
+
cursor.execute(
|
|
346
|
+
"""
|
|
347
|
+
INSERT OR REPLACE INTO test_cases
|
|
348
|
+
(test_id, session_id, test_code, embedding, passed, timestamp)
|
|
349
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
350
|
+
""",
|
|
351
|
+
(
|
|
352
|
+
record.test_id,
|
|
353
|
+
record.session_id,
|
|
354
|
+
record.test_code,
|
|
355
|
+
record.embedding.tobytes(),
|
|
356
|
+
1 if record.passed else 0,
|
|
357
|
+
record.timestamp.isoformat(),
|
|
358
|
+
),
|
|
359
|
+
)
|
|
360
|
+
self.conn.commit()
|
|
361
|
+
|
|
362
|
+
def get_all_code_embeddings(self, limit: Optional[int] = None) -> list[CodeRecord]:
|
|
363
|
+
"""Retrieve all code embeddings for vector store initialization.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
limit: Maximum number of records to retrieve
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
List of code records
|
|
370
|
+
"""
|
|
371
|
+
cursor = self.conn.cursor()
|
|
372
|
+
query = "SELECT * FROM code_embeddings ORDER BY timestamp DESC"
|
|
373
|
+
if limit:
|
|
374
|
+
query += f" LIMIT {limit}"
|
|
375
|
+
|
|
376
|
+
rows = cursor.execute(query).fetchall()
|
|
377
|
+
return [self._row_to_code_record(row) for row in rows]
|
|
378
|
+
|
|
379
|
+
def get_all_oracle_embeddings(self, limit: Optional[int] = None) -> list[OracleRecord]:
|
|
380
|
+
"""Retrieve all oracle embeddings.
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
limit: Maximum number of records
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
List of oracle records
|
|
387
|
+
"""
|
|
388
|
+
cursor = self.conn.cursor()
|
|
389
|
+
query = "SELECT * FROM oracle_embeddings ORDER BY quality_score DESC"
|
|
390
|
+
if limit:
|
|
391
|
+
query += f" LIMIT {limit}"
|
|
392
|
+
|
|
393
|
+
rows = cursor.execute(query).fetchall()
|
|
394
|
+
return [self._row_to_oracle_record(row) for row in rows]
|
|
395
|
+
|
|
396
|
+
def get_all_bug_embeddings(self, limit: Optional[int] = None) -> list[BugPattern]:
|
|
397
|
+
"""Retrieve all bug pattern embeddings.
|
|
398
|
+
|
|
399
|
+
Args:
|
|
400
|
+
limit: Maximum number of records
|
|
401
|
+
|
|
402
|
+
Returns:
|
|
403
|
+
List of bug patterns
|
|
404
|
+
"""
|
|
405
|
+
cursor = self.conn.cursor()
|
|
406
|
+
query = "SELECT * FROM bug_patterns ORDER BY timestamp DESC"
|
|
407
|
+
if limit:
|
|
408
|
+
query += f" LIMIT {limit}"
|
|
409
|
+
|
|
410
|
+
rows = cursor.execute(query).fetchall()
|
|
411
|
+
return [self._row_to_bug_record(row) for row in rows]
|
|
412
|
+
|
|
413
|
+
def get_all_test_embeddings(self, limit: Optional[int] = None) -> list[StoredTest]:
|
|
414
|
+
"""Retrieve all test case embeddings.
|
|
415
|
+
|
|
416
|
+
Args:
|
|
417
|
+
limit: Maximum number of records
|
|
418
|
+
|
|
419
|
+
Returns:
|
|
420
|
+
List of test cases
|
|
421
|
+
"""
|
|
422
|
+
cursor = self.conn.cursor()
|
|
423
|
+
query = "SELECT * FROM test_cases ORDER BY timestamp DESC"
|
|
424
|
+
if limit:
|
|
425
|
+
query += f" LIMIT {limit}"
|
|
426
|
+
|
|
427
|
+
rows = cursor.execute(query).fetchall()
|
|
428
|
+
return [self._row_to_test_record(row) for row in rows]
|
|
429
|
+
|
|
430
|
+
def get_session(self, session_id: str) -> Optional[FuzzingSession]:
|
|
431
|
+
"""Retrieve session by ID.
|
|
432
|
+
|
|
433
|
+
Args:
|
|
434
|
+
session_id: Session identifier
|
|
435
|
+
|
|
436
|
+
Returns:
|
|
437
|
+
Fuzzing session or None
|
|
438
|
+
"""
|
|
439
|
+
cursor = self.conn.cursor()
|
|
440
|
+
row = cursor.execute(
|
|
441
|
+
"SELECT * FROM fuzzing_sessions WHERE session_id = ?", (session_id,)
|
|
442
|
+
).fetchone()
|
|
443
|
+
|
|
444
|
+
if not row:
|
|
445
|
+
return None
|
|
446
|
+
|
|
447
|
+
return FuzzingSession(
|
|
448
|
+
session_id=row["session_id"],
|
|
449
|
+
user_request=row["user_request"],
|
|
450
|
+
generated_code=row["generated_code"],
|
|
451
|
+
oracle=row["oracle"],
|
|
452
|
+
timestamp=datetime.fromisoformat(row["timestamp"]),
|
|
453
|
+
num_tests=row["num_tests"],
|
|
454
|
+
num_failures=row["num_failures"],
|
|
455
|
+
oracle_reused=bool(row["oracle_reused"]),
|
|
456
|
+
reused_from=row["reused_from"],
|
|
457
|
+
quality_score=row["quality_score"],
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
def get_stats(self) -> dict:
|
|
461
|
+
"""Get database statistics.
|
|
462
|
+
|
|
463
|
+
Returns:
|
|
464
|
+
Dictionary of statistics
|
|
465
|
+
"""
|
|
466
|
+
cursor = self.conn.cursor()
|
|
467
|
+
|
|
468
|
+
stats = {}
|
|
469
|
+
|
|
470
|
+
# Total sessions
|
|
471
|
+
stats["total_sessions"] = cursor.execute(
|
|
472
|
+
"SELECT COUNT(*) FROM fuzzing_sessions"
|
|
473
|
+
).fetchone()[0]
|
|
474
|
+
|
|
475
|
+
# Oracle reuse rate
|
|
476
|
+
reused = cursor.execute(
|
|
477
|
+
"SELECT COUNT(*) FROM fuzzing_sessions WHERE oracle_reused = 1"
|
|
478
|
+
).fetchone()[0]
|
|
479
|
+
stats["oracle_reuse_count"] = reused
|
|
480
|
+
stats["oracle_reuse_rate"] = (
|
|
481
|
+
reused / stats["total_sessions"] if stats["total_sessions"] > 0 else 0.0
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
# Total bugs
|
|
485
|
+
stats["total_bugs"] = cursor.execute("SELECT COUNT(*) FROM bug_patterns").fetchone()[0]
|
|
486
|
+
|
|
487
|
+
# Total tests
|
|
488
|
+
stats["total_tests"] = cursor.execute("SELECT COUNT(*) FROM test_cases").fetchone()[0]
|
|
489
|
+
|
|
490
|
+
# Test pass rate
|
|
491
|
+
passed = cursor.execute(
|
|
492
|
+
"SELECT COUNT(*) FROM test_cases WHERE passed = 1"
|
|
493
|
+
).fetchone()[0]
|
|
494
|
+
stats["test_pass_rate"] = (
|
|
495
|
+
passed / stats["total_tests"] if stats["total_tests"] > 0 else 0.0
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
# Average quality score
|
|
499
|
+
avg_quality = cursor.execute(
|
|
500
|
+
"SELECT AVG(quality_score) FROM oracle_embeddings"
|
|
501
|
+
).fetchone()[0]
|
|
502
|
+
stats["avg_oracle_quality"] = avg_quality if avg_quality else 0.0
|
|
503
|
+
|
|
504
|
+
# Total code embeddings
|
|
505
|
+
stats["total_code_embeddings"] = cursor.execute(
|
|
506
|
+
"SELECT COUNT(*) FROM code_embeddings"
|
|
507
|
+
).fetchone()[0]
|
|
508
|
+
|
|
509
|
+
# Total oracle embeddings
|
|
510
|
+
stats["total_oracle_embeddings"] = cursor.execute(
|
|
511
|
+
"SELECT COUNT(*) FROM oracle_embeddings"
|
|
512
|
+
).fetchone()[0]
|
|
513
|
+
|
|
514
|
+
return stats
|
|
515
|
+
|
|
516
|
+
def clear(self) -> None:
|
|
517
|
+
"""Clear all data from database."""
|
|
518
|
+
cursor = self.conn.cursor()
|
|
519
|
+
cursor.execute("DELETE FROM test_cases")
|
|
520
|
+
cursor.execute("DELETE FROM bug_patterns")
|
|
521
|
+
cursor.execute("DELETE FROM oracle_embeddings")
|
|
522
|
+
cursor.execute("DELETE FROM code_embeddings")
|
|
523
|
+
cursor.execute("DELETE FROM fuzzing_sessions")
|
|
524
|
+
self.conn.commit()
|
|
525
|
+
logger.info("Cleared all data from history database")
|
|
526
|
+
|
|
527
|
+
def close(self) -> None:
|
|
528
|
+
"""Close database connection."""
|
|
529
|
+
if self.conn:
|
|
530
|
+
self.conn.close()
|
|
531
|
+
self.conn = None
|
|
532
|
+
|
|
533
|
+
def _row_to_code_record(self, row: sqlite3.Row) -> CodeRecord:
|
|
534
|
+
"""Convert database row to CodeRecord."""
|
|
535
|
+
embedding = np.frombuffer(row["embedding"], dtype=np.float32)
|
|
536
|
+
return CodeRecord(
|
|
537
|
+
code_id=row["code_id"],
|
|
538
|
+
session_id=row["session_id"],
|
|
539
|
+
code=row["code"],
|
|
540
|
+
embedding=embedding,
|
|
541
|
+
timestamp=datetime.fromisoformat(row["timestamp"]),
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
def _row_to_oracle_record(self, row: sqlite3.Row) -> OracleRecord:
|
|
545
|
+
"""Convert database row to OracleRecord."""
|
|
546
|
+
embedding = np.frombuffer(row["embedding"], dtype=np.float32)
|
|
547
|
+
|
|
548
|
+
# Handle optional columns with try/except (sqlite3.Row doesn't support .get())
|
|
549
|
+
try:
|
|
550
|
+
oracle_version = row["oracle_version"]
|
|
551
|
+
except (KeyError, IndexError):
|
|
552
|
+
oracle_version = 1
|
|
553
|
+
|
|
554
|
+
try:
|
|
555
|
+
parent_oracle_id = row["parent_oracle_id"]
|
|
556
|
+
except (KeyError, IndexError):
|
|
557
|
+
parent_oracle_id = None
|
|
558
|
+
|
|
559
|
+
try:
|
|
560
|
+
reuse_count = row["reuse_count"]
|
|
561
|
+
except (KeyError, IndexError):
|
|
562
|
+
reuse_count = 0
|
|
563
|
+
|
|
564
|
+
return OracleRecord(
|
|
565
|
+
oracle_id=row["oracle_id"],
|
|
566
|
+
session_id=row["session_id"],
|
|
567
|
+
oracle=row["oracle"],
|
|
568
|
+
embedding=embedding,
|
|
569
|
+
quality_score=row["quality_score"],
|
|
570
|
+
timestamp=datetime.fromisoformat(row["timestamp"]),
|
|
571
|
+
oracle_version=oracle_version,
|
|
572
|
+
parent_oracle_id=parent_oracle_id,
|
|
573
|
+
reuse_count=reuse_count,
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
def _row_to_bug_record(self, row: sqlite3.Row) -> BugPattern:
|
|
577
|
+
"""Convert database row to BugPattern."""
|
|
578
|
+
embedding = np.frombuffer(row["embedding"], dtype=np.float32)
|
|
579
|
+
return BugPattern(
|
|
580
|
+
bug_id=row["bug_id"],
|
|
581
|
+
session_id=row["session_id"],
|
|
582
|
+
bug_description=row["bug_description"],
|
|
583
|
+
code_snippet=row["code_snippet"],
|
|
584
|
+
embedding=embedding,
|
|
585
|
+
severity=row["severity"],
|
|
586
|
+
timestamp=datetime.fromisoformat(row["timestamp"]),
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
def _row_to_test_record(self, row: sqlite3.Row) -> StoredTest:
|
|
590
|
+
"""Convert database row to StoredTest."""
|
|
591
|
+
embedding = np.frombuffer(row["embedding"], dtype=np.float32)
|
|
592
|
+
return StoredTest(
|
|
593
|
+
test_id=row["test_id"],
|
|
594
|
+
session_id=row["session_id"],
|
|
595
|
+
test_code=row["test_code"],
|
|
596
|
+
embedding=embedding,
|
|
597
|
+
passed=bool(row["passed"]),
|
|
598
|
+
timestamp=datetime.fromisoformat(row["timestamp"]),
|
|
599
|
+
)
|
|
600
|
+
|
|
601
|
+
def increment_oracle_reuse(self, oracle_id: str) -> None:
|
|
602
|
+
"""Increment reuse count for an oracle.
|
|
603
|
+
|
|
604
|
+
Args:
|
|
605
|
+
oracle_id: Oracle ID to increment
|
|
606
|
+
"""
|
|
607
|
+
cursor = self.conn.cursor()
|
|
608
|
+
cursor.execute(
|
|
609
|
+
"""
|
|
610
|
+
UPDATE oracle_embeddings
|
|
611
|
+
SET reuse_count = reuse_count + 1
|
|
612
|
+
WHERE oracle_id = ?
|
|
613
|
+
""",
|
|
614
|
+
(oracle_id,),
|
|
615
|
+
)
|
|
616
|
+
self.conn.commit()
|
|
617
|
+
logger.debug(f"Incremented reuse count for oracle {oracle_id}")
|
|
618
|
+
|
|
619
|
+
def get_golden_oracles(
|
|
620
|
+
self, min_quality: float = 0.8, min_reuse: int = 3, limit: int = 10
|
|
621
|
+
) -> list[OracleRecord]:
|
|
622
|
+
"""Get high-quality, frequently reused oracles.
|
|
623
|
+
|
|
624
|
+
These are "golden oracles" that work well and are reused often.
|
|
625
|
+
|
|
626
|
+
Args:
|
|
627
|
+
min_quality: Minimum quality score (default: 0.8)
|
|
628
|
+
min_reuse: Minimum reuse count (default: 3)
|
|
629
|
+
limit: Maximum results (default: 10)
|
|
630
|
+
|
|
631
|
+
Returns:
|
|
632
|
+
List of golden oracle records, sorted by quality * reuse_count
|
|
633
|
+
"""
|
|
634
|
+
cursor = self.conn.cursor()
|
|
635
|
+
rows = cursor.execute(
|
|
636
|
+
"""
|
|
637
|
+
SELECT *
|
|
638
|
+
FROM oracle_embeddings
|
|
639
|
+
WHERE quality_score >= ? AND reuse_count >= ?
|
|
640
|
+
ORDER BY (quality_score * reuse_count) DESC
|
|
641
|
+
LIMIT ?
|
|
642
|
+
""",
|
|
643
|
+
(min_quality, min_reuse, limit),
|
|
644
|
+
).fetchall()
|
|
645
|
+
|
|
646
|
+
return [self._row_to_oracle_record(row) for row in rows]
|
|
647
|
+
|
|
648
|
+
def get_oracle_lineage(self, oracle_id: str) -> list[OracleRecord]:
|
|
649
|
+
"""Get oracle lineage (parent chain).
|
|
650
|
+
|
|
651
|
+
Args:
|
|
652
|
+
oracle_id: Oracle ID to trace
|
|
653
|
+
|
|
654
|
+
Returns:
|
|
655
|
+
List of oracles from newest to oldest (child to ancestor)
|
|
656
|
+
"""
|
|
657
|
+
lineage = []
|
|
658
|
+
current_id = oracle_id
|
|
659
|
+
visited = set()
|
|
660
|
+
|
|
661
|
+
while current_id and current_id not in visited:
|
|
662
|
+
visited.add(current_id)
|
|
663
|
+
|
|
664
|
+
cursor = self.conn.cursor()
|
|
665
|
+
row = cursor.execute(
|
|
666
|
+
"SELECT * FROM oracle_embeddings WHERE oracle_id = ?",
|
|
667
|
+
(current_id,),
|
|
668
|
+
).fetchone()
|
|
669
|
+
|
|
670
|
+
if not row:
|
|
671
|
+
break
|
|
672
|
+
|
|
673
|
+
oracle = self._row_to_oracle_record(row)
|
|
674
|
+
lineage.append(oracle)
|
|
675
|
+
|
|
676
|
+
# Move to parent
|
|
677
|
+
current_id = oracle.parent_oracle_id
|
|
678
|
+
|
|
679
|
+
return lineage
|
|
680
|
+
|
|
681
|
+
def get_oracle_descendants(self, oracle_id: str) -> list[OracleRecord]:
|
|
682
|
+
"""Get all oracles derived from this oracle.
|
|
683
|
+
|
|
684
|
+
Args:
|
|
685
|
+
oracle_id: Parent oracle ID
|
|
686
|
+
|
|
687
|
+
Returns:
|
|
688
|
+
List of descendant oracles
|
|
689
|
+
"""
|
|
690
|
+
cursor = self.conn.cursor()
|
|
691
|
+
rows = cursor.execute(
|
|
692
|
+
"""
|
|
693
|
+
SELECT * FROM oracle_embeddings
|
|
694
|
+
WHERE parent_oracle_id = ?
|
|
695
|
+
ORDER BY timestamp ASC
|
|
696
|
+
""",
|
|
697
|
+
(oracle_id,),
|
|
698
|
+
).fetchall()
|
|
699
|
+
|
|
700
|
+
return [self._row_to_oracle_record(row) for row in rows]
|
|
701
|
+
|
|
702
|
+
def __enter__(self):
|
|
703
|
+
"""Context manager entry."""
|
|
704
|
+
return self
|
|
705
|
+
|
|
706
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
707
|
+
"""Context manager exit."""
|
|
708
|
+
self.close()
|