ctrlcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ctrlcode/__init__.py +8 -0
- ctrlcode/agents/__init__.py +29 -0
- ctrlcode/agents/cleanup.py +388 -0
- ctrlcode/agents/communication.py +439 -0
- ctrlcode/agents/observability.py +421 -0
- ctrlcode/agents/react_loop.py +297 -0
- ctrlcode/agents/registry.py +211 -0
- ctrlcode/agents/result_parser.py +242 -0
- ctrlcode/agents/workflow.py +723 -0
- ctrlcode/analysis/__init__.py +28 -0
- ctrlcode/analysis/ast_diff.py +163 -0
- ctrlcode/analysis/bug_detector.py +149 -0
- ctrlcode/analysis/code_graphs.py +329 -0
- ctrlcode/analysis/semantic.py +205 -0
- ctrlcode/analysis/static.py +183 -0
- ctrlcode/analysis/synthesizer.py +281 -0
- ctrlcode/analysis/tests.py +189 -0
- ctrlcode/cleanup/__init__.py +16 -0
- ctrlcode/cleanup/auto_merge.py +350 -0
- ctrlcode/cleanup/doc_gardening.py +388 -0
- ctrlcode/cleanup/pr_automation.py +330 -0
- ctrlcode/cleanup/scheduler.py +356 -0
- ctrlcode/config.py +380 -0
- ctrlcode/embeddings/__init__.py +6 -0
- ctrlcode/embeddings/embedder.py +192 -0
- ctrlcode/embeddings/vector_store.py +213 -0
- ctrlcode/fuzzing/__init__.py +24 -0
- ctrlcode/fuzzing/analyzer.py +280 -0
- ctrlcode/fuzzing/budget.py +112 -0
- ctrlcode/fuzzing/context.py +665 -0
- ctrlcode/fuzzing/context_fuzzer.py +506 -0
- ctrlcode/fuzzing/derived_orchestrator.py +732 -0
- ctrlcode/fuzzing/oracle_adapter.py +135 -0
- ctrlcode/linters/__init__.py +11 -0
- ctrlcode/linters/hand_rolled_utils.py +221 -0
- ctrlcode/linters/yolo_parsing.py +217 -0
- ctrlcode/metrics/__init__.py +6 -0
- ctrlcode/metrics/dashboard.py +283 -0
- ctrlcode/metrics/tech_debt.py +663 -0
- ctrlcode/paths.py +68 -0
- ctrlcode/permissions.py +179 -0
- ctrlcode/providers/__init__.py +15 -0
- ctrlcode/providers/anthropic.py +138 -0
- ctrlcode/providers/base.py +77 -0
- ctrlcode/providers/openai.py +197 -0
- ctrlcode/providers/parallel.py +104 -0
- ctrlcode/server.py +871 -0
- ctrlcode/session/__init__.py +6 -0
- ctrlcode/session/baseline.py +57 -0
- ctrlcode/session/manager.py +967 -0
- ctrlcode/skills/__init__.py +10 -0
- ctrlcode/skills/builtin/commit.toml +29 -0
- ctrlcode/skills/builtin/docs.toml +25 -0
- ctrlcode/skills/builtin/refactor.toml +33 -0
- ctrlcode/skills/builtin/review.toml +28 -0
- ctrlcode/skills/builtin/test.toml +28 -0
- ctrlcode/skills/loader.py +111 -0
- ctrlcode/skills/registry.py +139 -0
- ctrlcode/storage/__init__.py +19 -0
- ctrlcode/storage/history_db.py +708 -0
- ctrlcode/tools/__init__.py +220 -0
- ctrlcode/tools/bash.py +112 -0
- ctrlcode/tools/browser.py +352 -0
- ctrlcode/tools/executor.py +153 -0
- ctrlcode/tools/explore.py +486 -0
- ctrlcode/tools/mcp.py +108 -0
- ctrlcode/tools/observability.py +561 -0
- ctrlcode/tools/registry.py +193 -0
- ctrlcode/tools/todo.py +291 -0
- ctrlcode/tools/update.py +266 -0
- ctrlcode/tools/webfetch.py +147 -0
- ctrlcode-0.1.0.dist-info/METADATA +93 -0
- ctrlcode-0.1.0.dist-info/RECORD +75 -0
- ctrlcode-0.1.0.dist-info/WHEEL +4 -0
- ctrlcode-0.1.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,732 @@
|
|
|
1
|
+
"""Derived fuzzing orchestrator - coordinates context-aware differential fuzzing."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import time
|
|
5
|
+
import uuid
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import AsyncIterator, Optional
|
|
10
|
+
|
|
11
|
+
from ..analysis.bug_detector import BugPatternDetector
|
|
12
|
+
from ..embeddings.embedder import CodeEmbedder
|
|
13
|
+
from ..embeddings.vector_store import VectorStore
|
|
14
|
+
from ..providers.base import Provider, StreamEvent
|
|
15
|
+
from ..storage.history_db import BugPattern, FuzzingSession, HistoryDB, StoredTest
|
|
16
|
+
from .analyzer import DiagnosedDivergence, DerivedOracleAnalyzer
|
|
17
|
+
from .budget import BudgetConfig, BudgetManager
|
|
18
|
+
from .context import ContextDerivation, ContextDerivationEngine
|
|
19
|
+
from .context_fuzzer import ContextAwareFuzzer, FuzzTestCase
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class FuzzingResult:
|
|
26
|
+
"""Result of derived fuzzing process."""
|
|
27
|
+
|
|
28
|
+
final_output: str
|
|
29
|
+
iterations: int
|
|
30
|
+
total_tests: int
|
|
31
|
+
divergences_found: int
|
|
32
|
+
divergences_fixed: int
|
|
33
|
+
oracle_corrections: int
|
|
34
|
+
context_derivation: ContextDerivation
|
|
35
|
+
quality_score: float
|
|
36
|
+
budget_used: dict[str, float]
|
|
37
|
+
analysis_report: str
|
|
38
|
+
|
|
39
|
+
# Phase 4: Historical learning metrics
|
|
40
|
+
oracle_reused: bool = False
|
|
41
|
+
reused_from_session: Optional[str] = None
|
|
42
|
+
bug_patterns_detected: int = 0
|
|
43
|
+
tests_before_deduplication: int = 0
|
|
44
|
+
tests_after_deduplication: int = 0
|
|
45
|
+
session_id: Optional[str] = None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class DerivedFuzzingOrchestrator:
|
|
49
|
+
"""
|
|
50
|
+
Main coordinator for derived context fuzzing.
|
|
51
|
+
|
|
52
|
+
Pipeline:
|
|
53
|
+
1. Derive context from code (once)
|
|
54
|
+
2. Loop (max iterations):
|
|
55
|
+
a. Generate test batch (fuzzer)
|
|
56
|
+
b. Execute tests with mocks
|
|
57
|
+
c. Check divergences
|
|
58
|
+
d. Analyze divergences
|
|
59
|
+
e. Apply fixes (code patches OR oracle corrections)
|
|
60
|
+
f. Update fuzzer history
|
|
61
|
+
3. Return patched code + analysis + corrected context
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
providers: list[Provider],
|
|
67
|
+
config: dict,
|
|
68
|
+
embeddings_config: Optional[dict] = None,
|
|
69
|
+
enable_history: bool = True,
|
|
70
|
+
history_db_path: Optional[Path] = None,
|
|
71
|
+
):
|
|
72
|
+
"""
|
|
73
|
+
Initialize derived fuzzing orchestrator.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
providers: List of LLM providers to use
|
|
77
|
+
config: Fuzzing configuration
|
|
78
|
+
embeddings_config: Embeddings endpoint config (api_key, base_url, model)
|
|
79
|
+
enable_history: Enable historical learning (default: True)
|
|
80
|
+
history_db_path: Path to history database (default: ~/.ctrlcode/history.db)
|
|
81
|
+
"""
|
|
82
|
+
self.providers = providers
|
|
83
|
+
self.config = config
|
|
84
|
+
self.provider = providers[0] # Primary provider
|
|
85
|
+
self.enable_history = enable_history
|
|
86
|
+
self.embeddings_config = embeddings_config
|
|
87
|
+
|
|
88
|
+
# Phase 4: Initialize historical learning components
|
|
89
|
+
self.history_db: Optional[HistoryDB] = None
|
|
90
|
+
self.embedder: Optional[CodeEmbedder] = None
|
|
91
|
+
self.bug_detector: Optional[BugPatternDetector] = None
|
|
92
|
+
|
|
93
|
+
if enable_history:
|
|
94
|
+
# Default history DB path
|
|
95
|
+
if history_db_path is None:
|
|
96
|
+
history_dir = Path.home() / ".ctrlcode"
|
|
97
|
+
history_dir.mkdir(parents=True, exist_ok=True)
|
|
98
|
+
history_db_path = history_dir / "history.db"
|
|
99
|
+
|
|
100
|
+
logger.info(f"Initializing history database at {history_db_path}")
|
|
101
|
+
self.history_db = HistoryDB(str(history_db_path))
|
|
102
|
+
|
|
103
|
+
# Initialize embedder if config provided
|
|
104
|
+
if embeddings_config:
|
|
105
|
+
self.embedder = CodeEmbedder(
|
|
106
|
+
api_key=embeddings_config["api_key"],
|
|
107
|
+
base_url=embeddings_config["base_url"],
|
|
108
|
+
model_name=embeddings_config.get("model", "text-embedding-3-small")
|
|
109
|
+
)
|
|
110
|
+
self.bug_detector = BugPatternDetector(
|
|
111
|
+
self.history_db,
|
|
112
|
+
embedder=self.embedder,
|
|
113
|
+
similarity_threshold=0.75,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Initialize components (with history integration if enabled)
|
|
117
|
+
self.context_engine = ContextDerivationEngine(
|
|
118
|
+
self.provider,
|
|
119
|
+
history_db=self.history_db,
|
|
120
|
+
embedder=self.embedder,
|
|
121
|
+
)
|
|
122
|
+
self.fuzzer = ContextAwareFuzzer(
|
|
123
|
+
self.provider,
|
|
124
|
+
history_db=self.history_db,
|
|
125
|
+
embedder=self.embedder,
|
|
126
|
+
bug_detector=self.bug_detector,
|
|
127
|
+
)
|
|
128
|
+
self.analyzer = DerivedOracleAnalyzer(self.provider)
|
|
129
|
+
|
|
130
|
+
async def fuzz(
|
|
131
|
+
self,
|
|
132
|
+
user_request: str,
|
|
133
|
+
generated_code: str,
|
|
134
|
+
context_messages: list[dict],
|
|
135
|
+
max_iterations: int = 10,
|
|
136
|
+
) -> AsyncIterator[StreamEvent | FuzzingResult]:
|
|
137
|
+
"""
|
|
138
|
+
Run derived fuzzing pipeline.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
user_request: Original user specification
|
|
142
|
+
generated_code: Generated code to test
|
|
143
|
+
context_messages: Conversation context
|
|
144
|
+
max_iterations: Maximum fuzzing iterations
|
|
145
|
+
|
|
146
|
+
Yields:
|
|
147
|
+
StreamEvent for progress updates, FuzzingResult at end
|
|
148
|
+
"""
|
|
149
|
+
# Generate session ID
|
|
150
|
+
session_id = str(uuid.uuid4())
|
|
151
|
+
|
|
152
|
+
# Initialize budget
|
|
153
|
+
budget = BudgetManager(
|
|
154
|
+
BudgetConfig(
|
|
155
|
+
max_tokens=self.config.get("budget_tokens", 100000),
|
|
156
|
+
max_seconds=self.config.get("budget_seconds", 30),
|
|
157
|
+
max_iterations=max_iterations,
|
|
158
|
+
)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# Phase 4: Check for similar bug patterns before fuzzing
|
|
162
|
+
bug_patterns_detected = 0
|
|
163
|
+
if self.enable_history and self.bug_detector:
|
|
164
|
+
yield StreamEvent(
|
|
165
|
+
type="fuzzing_progress",
|
|
166
|
+
data={"stage": "bug_detection", "message": "Checking for similar bug patterns..."},
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
try:
|
|
170
|
+
similar_bugs = self.bug_detector.check_patterns(generated_code)
|
|
171
|
+
bug_patterns_detected = len(similar_bugs)
|
|
172
|
+
|
|
173
|
+
if similar_bugs:
|
|
174
|
+
warnings = self.bug_detector.format_warnings(similar_bugs)
|
|
175
|
+
yield StreamEvent(
|
|
176
|
+
type="workflow_bug_pattern_detected",
|
|
177
|
+
data={
|
|
178
|
+
"count": len(similar_bugs),
|
|
179
|
+
"warnings": warnings,
|
|
180
|
+
"patterns": [
|
|
181
|
+
{
|
|
182
|
+
"description": bug.description,
|
|
183
|
+
"severity": bug.severity,
|
|
184
|
+
"confidence": bug.confidence,
|
|
185
|
+
"similarity": bug.similarity,
|
|
186
|
+
}
|
|
187
|
+
for bug in similar_bugs[:3] # Top 3
|
|
188
|
+
],
|
|
189
|
+
},
|
|
190
|
+
)
|
|
191
|
+
logger.warning(f"Detected {len(similar_bugs)} similar bug patterns")
|
|
192
|
+
except Exception as e:
|
|
193
|
+
logger.error(f"Bug pattern detection failed: {e}")
|
|
194
|
+
|
|
195
|
+
# Stage 1: Context Derivation
|
|
196
|
+
yield StreamEvent(
|
|
197
|
+
type="fuzzing_progress",
|
|
198
|
+
data={"stage": "context_derivation", "message": "Deriving operational context..."},
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
context = await self.context_engine.derive(
|
|
203
|
+
user_request=user_request,
|
|
204
|
+
generated_code=generated_code,
|
|
205
|
+
surrounding_files=None, # Could extract from context_messages
|
|
206
|
+
session_id=session_id,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# Track if oracle was reused
|
|
210
|
+
oracle_reused = context.retrieved_from is not None
|
|
211
|
+
reused_from_session = context.retrieved_from
|
|
212
|
+
|
|
213
|
+
# Emit oracle reuse event
|
|
214
|
+
if oracle_reused:
|
|
215
|
+
yield StreamEvent(
|
|
216
|
+
type="workflow_oracle_reused",
|
|
217
|
+
data={
|
|
218
|
+
"reused_from_session": reused_from_session,
|
|
219
|
+
"token_savings": 2500, # Estimated tokens saved per reuse
|
|
220
|
+
"time_savings_seconds": 25.0, # Estimated time saved per reuse
|
|
221
|
+
},
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
except Exception as e:
|
|
225
|
+
yield StreamEvent(
|
|
226
|
+
type="fuzzing_error",
|
|
227
|
+
data={"stage": "context_derivation", "error": str(e)},
|
|
228
|
+
)
|
|
229
|
+
# Return with minimal result
|
|
230
|
+
yield FuzzingResult(
|
|
231
|
+
final_output=generated_code,
|
|
232
|
+
iterations=0,
|
|
233
|
+
total_tests=0,
|
|
234
|
+
divergences_found=0,
|
|
235
|
+
divergences_fixed=0,
|
|
236
|
+
oracle_corrections=0,
|
|
237
|
+
context_derivation=None, # type: ignore
|
|
238
|
+
quality_score=0.0,
|
|
239
|
+
budget_used=budget.summary(),
|
|
240
|
+
analysis_report=f"Context derivation failed: {e}",
|
|
241
|
+
)
|
|
242
|
+
return
|
|
243
|
+
|
|
244
|
+
yield StreamEvent(
|
|
245
|
+
type="context_derived",
|
|
246
|
+
data={
|
|
247
|
+
"system_type": context.system_placement.system_type,
|
|
248
|
+
"invariants_count": len(context.behavioral_invariants),
|
|
249
|
+
"edge_cases_count": len(context.edge_case_surface),
|
|
250
|
+
},
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# Pipeline state
|
|
254
|
+
current_code = generated_code
|
|
255
|
+
current_context = context
|
|
256
|
+
all_analyses: list[DiagnosedDivergence] = []
|
|
257
|
+
test_history: list[dict] = []
|
|
258
|
+
iteration = 0
|
|
259
|
+
total_tests = 0
|
|
260
|
+
divergences_found = 0
|
|
261
|
+
divergences_fixed = 0
|
|
262
|
+
oracle_corrections = 0
|
|
263
|
+
|
|
264
|
+
# Stage 2-6: Fuzzing Loop
|
|
265
|
+
while not budget.exhausted() and iteration < max_iterations:
|
|
266
|
+
iteration += 1
|
|
267
|
+
|
|
268
|
+
yield StreamEvent(
|
|
269
|
+
type="fuzzing_progress",
|
|
270
|
+
data={
|
|
271
|
+
"stage": "iteration_start",
|
|
272
|
+
"iteration": iteration,
|
|
273
|
+
"message": f"Fuzzing iteration {iteration}...",
|
|
274
|
+
},
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
# Stage 3: Generate test batch
|
|
278
|
+
try:
|
|
279
|
+
batch_size = 20 # Could be configurable
|
|
280
|
+
test_cases = await self.fuzzer.generate_tests(
|
|
281
|
+
spec=user_request,
|
|
282
|
+
code=current_code,
|
|
283
|
+
context=current_context,
|
|
284
|
+
previous_results=test_history[-10:] if test_history else None, # Last 10 results
|
|
285
|
+
batch_size=batch_size,
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
total_tests += len(test_cases)
|
|
289
|
+
|
|
290
|
+
yield StreamEvent(
|
|
291
|
+
type="fuzzing_batch_generated",
|
|
292
|
+
data={
|
|
293
|
+
"iteration": iteration,
|
|
294
|
+
"test_count": len(test_cases),
|
|
295
|
+
"input_tests": sum(1 for t in test_cases if t.type == "input"),
|
|
296
|
+
"environment_tests": sum(1 for t in test_cases if t.type == "environment"),
|
|
297
|
+
"combined_tests": sum(1 for t in test_cases if t.type == "combined"),
|
|
298
|
+
},
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
except Exception as e:
|
|
302
|
+
yield StreamEvent(
|
|
303
|
+
type="fuzzing_error",
|
|
304
|
+
data={"stage": "test_generation", "iteration": iteration, "error": str(e)},
|
|
305
|
+
)
|
|
306
|
+
break
|
|
307
|
+
|
|
308
|
+
# Stage 4: Execute tests and check for divergences
|
|
309
|
+
yield StreamEvent(
|
|
310
|
+
type="fuzzing_progress",
|
|
311
|
+
data={"stage": "executing_tests", "iteration": iteration, "message": "Executing tests..."},
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
iteration_divergences = []
|
|
315
|
+
for test_case in test_cases:
|
|
316
|
+
# Execute test (simplified - would need actual execution)
|
|
317
|
+
# For now, we'll simulate by checking if expected behavior makes sense
|
|
318
|
+
try:
|
|
319
|
+
actual_output, expected_output, has_divergence = await self._execute_test(
|
|
320
|
+
code=current_code,
|
|
321
|
+
test_case=test_case,
|
|
322
|
+
context=current_context,
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
# Record test result
|
|
326
|
+
test_history.append(
|
|
327
|
+
{
|
|
328
|
+
"test_id": test_case.id,
|
|
329
|
+
"iteration": iteration,
|
|
330
|
+
"type": test_case.type,
|
|
331
|
+
"divergence": has_divergence,
|
|
332
|
+
}
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
if has_divergence:
|
|
336
|
+
divergences_found += 1
|
|
337
|
+
iteration_divergences.append((test_case, actual_output, expected_output))
|
|
338
|
+
|
|
339
|
+
except Exception as e:
|
|
340
|
+
# Test execution failed
|
|
341
|
+
test_history.append(
|
|
342
|
+
{
|
|
343
|
+
"test_id": test_case.id,
|
|
344
|
+
"iteration": iteration,
|
|
345
|
+
"error": str(e),
|
|
346
|
+
}
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
if iteration_divergences:
|
|
350
|
+
yield StreamEvent(
|
|
351
|
+
type="divergence_found",
|
|
352
|
+
data={
|
|
353
|
+
"iteration": iteration,
|
|
354
|
+
"count": len(iteration_divergences),
|
|
355
|
+
},
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
# Stage 5: Analyze divergences
|
|
359
|
+
if iteration_divergences:
|
|
360
|
+
yield StreamEvent(
|
|
361
|
+
type="fuzzing_progress",
|
|
362
|
+
data={
|
|
363
|
+
"stage": "analyzing_divergences",
|
|
364
|
+
"iteration": iteration,
|
|
365
|
+
"message": f"Analyzing {len(iteration_divergences)} divergences...",
|
|
366
|
+
},
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
for test_case, actual_output, expected_output in iteration_divergences:
|
|
370
|
+
try:
|
|
371
|
+
analysis = await self.analyzer.analyze_divergence(
|
|
372
|
+
spec=user_request,
|
|
373
|
+
code=current_code,
|
|
374
|
+
context=current_context,
|
|
375
|
+
test_case=test_case,
|
|
376
|
+
actual_output=actual_output,
|
|
377
|
+
expected_output=expected_output,
|
|
378
|
+
previous_analyses=all_analyses,
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
all_analyses.append(analysis)
|
|
382
|
+
|
|
383
|
+
yield StreamEvent(
|
|
384
|
+
type="analysis_complete",
|
|
385
|
+
data={
|
|
386
|
+
"iteration": iteration,
|
|
387
|
+
"test_id": test_case.id,
|
|
388
|
+
"source": analysis.source,
|
|
389
|
+
"confidence": analysis.confidence,
|
|
390
|
+
},
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
# Stage 6: Apply fix based on source
|
|
394
|
+
if analysis.source == "MODEL_BUG":
|
|
395
|
+
# Patch the code
|
|
396
|
+
current_code = await self._apply_code_patch(current_code, analysis.fix)
|
|
397
|
+
divergences_fixed += 1
|
|
398
|
+
|
|
399
|
+
yield StreamEvent(
|
|
400
|
+
type="code_patched",
|
|
401
|
+
data={
|
|
402
|
+
"iteration": iteration,
|
|
403
|
+
"test_id": test_case.id,
|
|
404
|
+
"fix_type": "model_bug",
|
|
405
|
+
},
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
elif analysis.source == "ORACLE_BUG":
|
|
409
|
+
# Correct the oracle
|
|
410
|
+
current_context = await self._correct_oracle(current_context, analysis.fix)
|
|
411
|
+
oracle_corrections += 1
|
|
412
|
+
|
|
413
|
+
yield StreamEvent(
|
|
414
|
+
type="oracle_corrected",
|
|
415
|
+
data={
|
|
416
|
+
"iteration": iteration,
|
|
417
|
+
"test_id": test_case.id,
|
|
418
|
+
"corrected_invariants": analysis.fix.get("corrected_invariants", []),
|
|
419
|
+
},
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
elif analysis.source == "SPEC_GAP":
|
|
423
|
+
# Flag for user clarification
|
|
424
|
+
yield StreamEvent(
|
|
425
|
+
type="spec_gap_found",
|
|
426
|
+
data={
|
|
427
|
+
"iteration": iteration,
|
|
428
|
+
"test_id": test_case.id,
|
|
429
|
+
"question": analysis.fix.get("clarification_question", ""),
|
|
430
|
+
},
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
elif analysis.source == "ENVIRONMENT_MISMATCH":
|
|
434
|
+
# Re-derive context with corrected assumptions
|
|
435
|
+
# For now, we'll update the context in place
|
|
436
|
+
current_context = await self._correct_environment(current_context, analysis.fix)
|
|
437
|
+
|
|
438
|
+
except Exception as e:
|
|
439
|
+
yield StreamEvent(
|
|
440
|
+
type="fuzzing_error",
|
|
441
|
+
data={
|
|
442
|
+
"stage": "analysis",
|
|
443
|
+
"iteration": iteration,
|
|
444
|
+
"test_id": test_case.id,
|
|
445
|
+
"error": str(e),
|
|
446
|
+
},
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
# Check if we should continue
|
|
450
|
+
if not iteration_divergences:
|
|
451
|
+
# No divergences found, we're stable
|
|
452
|
+
break
|
|
453
|
+
|
|
454
|
+
# Update budget
|
|
455
|
+
budget.consume(tokens=1000, elapsed_time=time.time() - budget.start_time)
|
|
456
|
+
|
|
457
|
+
# Generate analysis report
|
|
458
|
+
analysis_report = self._generate_report(
|
|
459
|
+
iterations=iteration,
|
|
460
|
+
total_tests=total_tests,
|
|
461
|
+
divergences_found=divergences_found,
|
|
462
|
+
divergences_fixed=divergences_fixed,
|
|
463
|
+
oracle_corrections=oracle_corrections,
|
|
464
|
+
analyses=all_analyses,
|
|
465
|
+
context=current_context,
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
# Calculate quality score (simplified)
|
|
469
|
+
quality_score = 1.0 - (divergences_found - divergences_fixed) / max(total_tests, 1)
|
|
470
|
+
quality_score = max(0.0, min(1.0, quality_score))
|
|
471
|
+
|
|
472
|
+
# Phase 4: Store fuzzing session in history DB
|
|
473
|
+
if self.enable_history and self.history_db:
|
|
474
|
+
try:
|
|
475
|
+
await self._store_fuzzing_session(
|
|
476
|
+
session_id=session_id,
|
|
477
|
+
user_request=user_request,
|
|
478
|
+
generated_code=generated_code,
|
|
479
|
+
context=current_context,
|
|
480
|
+
total_tests=total_tests,
|
|
481
|
+
divergences=divergences_found,
|
|
482
|
+
oracle_reused=oracle_reused,
|
|
483
|
+
reused_from=reused_from_session,
|
|
484
|
+
quality_score=quality_score,
|
|
485
|
+
all_analyses=all_analyses,
|
|
486
|
+
test_cases=test_cases if 'test_cases' in locals() else [],
|
|
487
|
+
)
|
|
488
|
+
logger.info(f"Stored fuzzing session {session_id} in history database")
|
|
489
|
+
|
|
490
|
+
# Emit history updated event
|
|
491
|
+
yield StreamEvent(
|
|
492
|
+
type="workflow_history_updated",
|
|
493
|
+
data={
|
|
494
|
+
"session_id": session_id,
|
|
495
|
+
"oracle_reused": oracle_reused,
|
|
496
|
+
"bug_patterns_detected": bug_patterns_detected,
|
|
497
|
+
"total_tests": total_tests,
|
|
498
|
+
},
|
|
499
|
+
)
|
|
500
|
+
except Exception as e:
|
|
501
|
+
logger.error(f"Failed to store fuzzing session in history: {e}")
|
|
502
|
+
|
|
503
|
+
# Return final result
|
|
504
|
+
yield FuzzingResult(
|
|
505
|
+
final_output=current_code,
|
|
506
|
+
iterations=iteration,
|
|
507
|
+
total_tests=total_tests,
|
|
508
|
+
divergences_found=divergences_found,
|
|
509
|
+
divergences_fixed=divergences_fixed,
|
|
510
|
+
oracle_corrections=oracle_corrections,
|
|
511
|
+
context_derivation=current_context,
|
|
512
|
+
quality_score=quality_score,
|
|
513
|
+
budget_used=budget.summary(),
|
|
514
|
+
analysis_report=analysis_report,
|
|
515
|
+
# Phase 4: Historical learning metrics
|
|
516
|
+
oracle_reused=oracle_reused,
|
|
517
|
+
reused_from_session=reused_from_session,
|
|
518
|
+
bug_patterns_detected=bug_patterns_detected,
|
|
519
|
+
tests_before_deduplication=0, # Would be tracked in fuzzer
|
|
520
|
+
tests_after_deduplication=total_tests,
|
|
521
|
+
session_id=session_id,
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
async def _execute_test(
|
|
525
|
+
self,
|
|
526
|
+
code: str,
|
|
527
|
+
test_case: FuzzTestCase,
|
|
528
|
+
context: ContextDerivation,
|
|
529
|
+
) -> tuple[dict, dict, bool]:
|
|
530
|
+
"""
|
|
531
|
+
Execute a test case and check for divergences.
|
|
532
|
+
|
|
533
|
+
Args:
|
|
534
|
+
code: Code to test
|
|
535
|
+
test_case: Test case to execute
|
|
536
|
+
context: Context derivation
|
|
537
|
+
|
|
538
|
+
Returns:
|
|
539
|
+
(actual_output, expected_output, has_divergence)
|
|
540
|
+
"""
|
|
541
|
+
# This is a simplified implementation
|
|
542
|
+
# In reality, would need to:
|
|
543
|
+
# 1. Set up mocks based on test_case.environment.mock_setup
|
|
544
|
+
# 2. Execute the code with test_case.input
|
|
545
|
+
# 3. Compare actual vs expected from test_case.environment.expected_behavior
|
|
546
|
+
|
|
547
|
+
# For now, we'll simulate by saying 20% of tests have divergences
|
|
548
|
+
import random
|
|
549
|
+
|
|
550
|
+
has_divergence = random.random() < 0.2
|
|
551
|
+
|
|
552
|
+
actual_output = {"result": "simulated_output", "divergence": has_divergence}
|
|
553
|
+
expected_output = (
|
|
554
|
+
test_case.environment.expected_behavior
|
|
555
|
+
if test_case.environment
|
|
556
|
+
else {"result": "expected"}
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
return actual_output, expected_output, has_divergence
|
|
560
|
+
|
|
561
|
+
async def _apply_code_patch(self, code: str, fix: dict) -> str:
|
|
562
|
+
"""Apply a code patch from MODEL_BUG fix."""
|
|
563
|
+
# In reality, would apply the actual patch
|
|
564
|
+
# For now, just return the code with a comment
|
|
565
|
+
patch = fix.get("patch", "# Patch applied")
|
|
566
|
+
return f"{code}\n\n{patch}"
|
|
567
|
+
|
|
568
|
+
async def _correct_oracle(self, context: ContextDerivation, fix: dict) -> ContextDerivation:
|
|
569
|
+
"""Correct the derived oracle based on ORACLE_BUG fix."""
|
|
570
|
+
# In reality, would update specific invariants
|
|
571
|
+
# For now, return the context as-is
|
|
572
|
+
_corrected_invariants = fix.get("corrected_invariants", [])
|
|
573
|
+
# Update behavioral_invariants with corrections
|
|
574
|
+
return context
|
|
575
|
+
|
|
576
|
+
async def _correct_environment(self, context: ContextDerivation, fix: dict) -> ContextDerivation:
|
|
577
|
+
"""Correct environmental assumptions based on ENVIRONMENT_MISMATCH fix."""
|
|
578
|
+
# In reality, would update environmental_constraints
|
|
579
|
+
# For now, return the context as-is
|
|
580
|
+
_corrected_assumptions = fix.get("corrected_assumptions", {})
|
|
581
|
+
# Update environmental_constraints with corrections
|
|
582
|
+
return context
|
|
583
|
+
|
|
584
|
+
def _generate_report(
|
|
585
|
+
self,
|
|
586
|
+
iterations: int,
|
|
587
|
+
total_tests: int,
|
|
588
|
+
divergences_found: int,
|
|
589
|
+
divergences_fixed: int,
|
|
590
|
+
oracle_corrections: int,
|
|
591
|
+
analyses: list[DiagnosedDivergence],
|
|
592
|
+
context: ContextDerivation,
|
|
593
|
+
) -> str:
|
|
594
|
+
"""Generate analysis report."""
|
|
595
|
+
report = f"""# Derived Fuzzing Analysis Report
|
|
596
|
+
|
|
597
|
+
## Summary
|
|
598
|
+
- Iterations: {iterations}
|
|
599
|
+
- Total tests executed: {total_tests}
|
|
600
|
+
- Divergences found: {divergences_found}
|
|
601
|
+
- Divergences fixed: {divergences_fixed}
|
|
602
|
+
- Oracle corrections: {oracle_corrections}
|
|
603
|
+
|
|
604
|
+
## Context Derivation
|
|
605
|
+
- System type: {context.system_placement.system_type}
|
|
606
|
+
- Layer: {context.system_placement.layer}
|
|
607
|
+
- Behavioral invariants: {len(context.behavioral_invariants)}
|
|
608
|
+
- Integration contracts: {len(context.integration_contracts)}
|
|
609
|
+
- Edge cases identified: {len(context.edge_case_surface)}
|
|
610
|
+
|
|
611
|
+
## Divergence Breakdown
|
|
612
|
+
"""
|
|
613
|
+
|
|
614
|
+
# Count by source
|
|
615
|
+
source_counts = {}
|
|
616
|
+
for analysis in analyses:
|
|
617
|
+
source_counts[analysis.source] = source_counts.get(analysis.source, 0) + 1
|
|
618
|
+
|
|
619
|
+
for source, count in source_counts.items():
|
|
620
|
+
report += f"- {source}: {count}\n"
|
|
621
|
+
|
|
622
|
+
if analyses:
|
|
623
|
+
report += "\n## Key Findings\n"
|
|
624
|
+
for i, analysis in enumerate(analyses[:5], 1): # Top 5
|
|
625
|
+
report += f"\n### {i}. {analysis.diagnosis}\n"
|
|
626
|
+
report += f"- Source: {analysis.source}\n"
|
|
627
|
+
report += f"- Confidence: {analysis.confidence:.2f}\n"
|
|
628
|
+
report += f"- Impact: {analysis.impact}\n"
|
|
629
|
+
|
|
630
|
+
return report
|
|
631
|
+
|
|
632
|
+
async def _store_fuzzing_session(
|
|
633
|
+
self,
|
|
634
|
+
session_id: str,
|
|
635
|
+
user_request: str,
|
|
636
|
+
generated_code: str,
|
|
637
|
+
context: ContextDerivation,
|
|
638
|
+
total_tests: int,
|
|
639
|
+
divergences: int,
|
|
640
|
+
oracle_reused: bool,
|
|
641
|
+
reused_from: Optional[str],
|
|
642
|
+
quality_score: float,
|
|
643
|
+
all_analyses: list[DiagnosedDivergence],
|
|
644
|
+
test_cases: list[FuzzTestCase],
|
|
645
|
+
) -> None:
|
|
646
|
+
"""Store fuzzing session results in history database.
|
|
647
|
+
|
|
648
|
+
Args:
|
|
649
|
+
session_id: Session identifier
|
|
650
|
+
user_request: User specification
|
|
651
|
+
generated_code: Generated code
|
|
652
|
+
context: Derived context
|
|
653
|
+
total_tests: Number of tests executed
|
|
654
|
+
divergences: Number of divergences found
|
|
655
|
+
oracle_reused: Whether oracle was reused
|
|
656
|
+
reused_from: Source session ID if reused
|
|
657
|
+
quality_score: Fuzzing quality score
|
|
658
|
+
all_analyses: List of divergence analyses
|
|
659
|
+
test_cases: List of test cases
|
|
660
|
+
"""
|
|
661
|
+
if not self.history_db or not self.embedder:
|
|
662
|
+
return
|
|
663
|
+
|
|
664
|
+
# Store session metadata
|
|
665
|
+
session = FuzzingSession(
|
|
666
|
+
session_id=session_id,
|
|
667
|
+
user_request=user_request,
|
|
668
|
+
generated_code=generated_code,
|
|
669
|
+
oracle=context.to_json(),
|
|
670
|
+
timestamp=datetime.now(),
|
|
671
|
+
num_tests=total_tests,
|
|
672
|
+
num_failures=divergences,
|
|
673
|
+
oracle_reused=oracle_reused,
|
|
674
|
+
reused_from=reused_from,
|
|
675
|
+
quality_score=quality_score,
|
|
676
|
+
)
|
|
677
|
+
self.history_db.store_session(session)
|
|
678
|
+
|
|
679
|
+
# Store bug patterns from divergence analyses (batch embed)
|
|
680
|
+
model_bug_analyses = [
|
|
681
|
+
(i, analysis) for i, analysis in enumerate(all_analyses)
|
|
682
|
+
if analysis.source == "MODEL_BUG"
|
|
683
|
+
]
|
|
684
|
+
|
|
685
|
+
if model_bug_analyses:
|
|
686
|
+
# Batch embed all bugs (same code for all, so just embed once)
|
|
687
|
+
bug_embedding = self.embedder.embed_code(generated_code)
|
|
688
|
+
|
|
689
|
+
for i, analysis in model_bug_analyses:
|
|
690
|
+
bug_pattern = BugPattern(
|
|
691
|
+
bug_id=f"{session_id}_bug_{i}",
|
|
692
|
+
session_id=session_id,
|
|
693
|
+
bug_description=analysis.diagnosis,
|
|
694
|
+
code_snippet=generated_code[:500], # First 500 chars
|
|
695
|
+
embedding=bug_embedding,
|
|
696
|
+
severity="high" if analysis.confidence > 0.8 else "medium",
|
|
697
|
+
timestamp=datetime.now(),
|
|
698
|
+
)
|
|
699
|
+
self.history_db.store_bug(bug_pattern)
|
|
700
|
+
|
|
701
|
+
# Store test cases with embeddings (batch embed)
|
|
702
|
+
test_subset = test_cases[:50] # Store up to 50 tests
|
|
703
|
+
if test_subset:
|
|
704
|
+
# Prepare test strings and batch embed
|
|
705
|
+
test_strs = [f"{tc.type}: {tc.rationale}" for tc in test_subset]
|
|
706
|
+
test_embeddings = self.embedder.embed_batch(test_strs)
|
|
707
|
+
|
|
708
|
+
for i, (test_case, test_str, test_embedding) in enumerate(
|
|
709
|
+
zip(test_subset, test_strs, test_embeddings)
|
|
710
|
+
):
|
|
711
|
+
stored_test = StoredTest(
|
|
712
|
+
test_id=f"{session_id}_test_{i}",
|
|
713
|
+
session_id=session_id,
|
|
714
|
+
test_code=test_str,
|
|
715
|
+
embedding=test_embedding,
|
|
716
|
+
passed=True, # Would need actual execution results
|
|
717
|
+
timestamp=datetime.now(),
|
|
718
|
+
)
|
|
719
|
+
self.history_db.store_test(stored_test)
|
|
720
|
+
|
|
721
|
+
def get_history_stats(self) -> dict:
|
|
722
|
+
"""Get statistics from history database.
|
|
723
|
+
|
|
724
|
+
Returns:
|
|
725
|
+
Dictionary of statistics
|
|
726
|
+
"""
|
|
727
|
+
if not self.history_db:
|
|
728
|
+
return {"enabled": False}
|
|
729
|
+
|
|
730
|
+
stats = self.history_db.get_stats()
|
|
731
|
+
stats["enabled"] = True
|
|
732
|
+
return stats
|