ctrlcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ctrlcode/__init__.py +8 -0
- ctrlcode/agents/__init__.py +29 -0
- ctrlcode/agents/cleanup.py +388 -0
- ctrlcode/agents/communication.py +439 -0
- ctrlcode/agents/observability.py +421 -0
- ctrlcode/agents/react_loop.py +297 -0
- ctrlcode/agents/registry.py +211 -0
- ctrlcode/agents/result_parser.py +242 -0
- ctrlcode/agents/workflow.py +723 -0
- ctrlcode/analysis/__init__.py +28 -0
- ctrlcode/analysis/ast_diff.py +163 -0
- ctrlcode/analysis/bug_detector.py +149 -0
- ctrlcode/analysis/code_graphs.py +329 -0
- ctrlcode/analysis/semantic.py +205 -0
- ctrlcode/analysis/static.py +183 -0
- ctrlcode/analysis/synthesizer.py +281 -0
- ctrlcode/analysis/tests.py +189 -0
- ctrlcode/cleanup/__init__.py +16 -0
- ctrlcode/cleanup/auto_merge.py +350 -0
- ctrlcode/cleanup/doc_gardening.py +388 -0
- ctrlcode/cleanup/pr_automation.py +330 -0
- ctrlcode/cleanup/scheduler.py +356 -0
- ctrlcode/config.py +380 -0
- ctrlcode/embeddings/__init__.py +6 -0
- ctrlcode/embeddings/embedder.py +192 -0
- ctrlcode/embeddings/vector_store.py +213 -0
- ctrlcode/fuzzing/__init__.py +24 -0
- ctrlcode/fuzzing/analyzer.py +280 -0
- ctrlcode/fuzzing/budget.py +112 -0
- ctrlcode/fuzzing/context.py +665 -0
- ctrlcode/fuzzing/context_fuzzer.py +506 -0
- ctrlcode/fuzzing/derived_orchestrator.py +732 -0
- ctrlcode/fuzzing/oracle_adapter.py +135 -0
- ctrlcode/linters/__init__.py +11 -0
- ctrlcode/linters/hand_rolled_utils.py +221 -0
- ctrlcode/linters/yolo_parsing.py +217 -0
- ctrlcode/metrics/__init__.py +6 -0
- ctrlcode/metrics/dashboard.py +283 -0
- ctrlcode/metrics/tech_debt.py +663 -0
- ctrlcode/paths.py +68 -0
- ctrlcode/permissions.py +179 -0
- ctrlcode/providers/__init__.py +15 -0
- ctrlcode/providers/anthropic.py +138 -0
- ctrlcode/providers/base.py +77 -0
- ctrlcode/providers/openai.py +197 -0
- ctrlcode/providers/parallel.py +104 -0
- ctrlcode/server.py +871 -0
- ctrlcode/session/__init__.py +6 -0
- ctrlcode/session/baseline.py +57 -0
- ctrlcode/session/manager.py +967 -0
- ctrlcode/skills/__init__.py +10 -0
- ctrlcode/skills/builtin/commit.toml +29 -0
- ctrlcode/skills/builtin/docs.toml +25 -0
- ctrlcode/skills/builtin/refactor.toml +33 -0
- ctrlcode/skills/builtin/review.toml +28 -0
- ctrlcode/skills/builtin/test.toml +28 -0
- ctrlcode/skills/loader.py +111 -0
- ctrlcode/skills/registry.py +139 -0
- ctrlcode/storage/__init__.py +19 -0
- ctrlcode/storage/history_db.py +708 -0
- ctrlcode/tools/__init__.py +220 -0
- ctrlcode/tools/bash.py +112 -0
- ctrlcode/tools/browser.py +352 -0
- ctrlcode/tools/executor.py +153 -0
- ctrlcode/tools/explore.py +486 -0
- ctrlcode/tools/mcp.py +108 -0
- ctrlcode/tools/observability.py +561 -0
- ctrlcode/tools/registry.py +193 -0
- ctrlcode/tools/todo.py +291 -0
- ctrlcode/tools/update.py +266 -0
- ctrlcode/tools/webfetch.py +147 -0
- ctrlcode-0.1.0.dist-info/METADATA +93 -0
- ctrlcode-0.1.0.dist-info/RECORD +75 -0
- ctrlcode-0.1.0.dist-info/WHEEL +4 -0
- ctrlcode-0.1.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
"""FAISS-based vector storage for fast similarity search."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import faiss
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class VectorStore:
|
|
14
|
+
"""Fast approximate nearest neighbor search using FAISS HNSW index.
|
|
15
|
+
|
|
16
|
+
Supports efficient similarity search over thousands of embeddings.
|
|
17
|
+
Uses HNSW (Hierarchical Navigable Small World) algorithm for sub-millisecond queries.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
dimension: int = 384, # Default for all-MiniLM-L6-v2
|
|
23
|
+
index_type: str = "hnsw",
|
|
24
|
+
M: int = 32, # HNSW construction parameter (connectivity)
|
|
25
|
+
ef_construction: int = 200, # HNSW build quality
|
|
26
|
+
ef_search: int = 64, # HNSW search quality
|
|
27
|
+
):
|
|
28
|
+
"""Initialize vector store with FAISS index.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
dimension: Embedding vector dimension
|
|
32
|
+
index_type: Index type (hnsw or flat)
|
|
33
|
+
M: HNSW parameter - higher = better recall, more memory
|
|
34
|
+
ef_construction: HNSW build-time search depth
|
|
35
|
+
ef_search: HNSW query-time search depth
|
|
36
|
+
"""
|
|
37
|
+
self.dimension = dimension
|
|
38
|
+
self.index_type = index_type
|
|
39
|
+
self.M = M
|
|
40
|
+
self.ef_construction = ef_construction
|
|
41
|
+
self.ef_search = ef_search
|
|
42
|
+
|
|
43
|
+
# Initialize index
|
|
44
|
+
# Note: For cosine similarity with normalized vectors:
|
|
45
|
+
# - Flat index uses inner product (IP) directly
|
|
46
|
+
# - HNSW uses L2 distance (converted to cosine in search results)
|
|
47
|
+
if index_type == "hnsw":
|
|
48
|
+
# IndexHNSWFlat uses L2 distance only
|
|
49
|
+
self.index = faiss.IndexHNSWFlat(dimension, M)
|
|
50
|
+
self.index.hnsw.efConstruction = ef_construction
|
|
51
|
+
self.index.hnsw.efSearch = ef_search
|
|
52
|
+
elif index_type == "flat":
|
|
53
|
+
# Exact search for small datasets with inner product
|
|
54
|
+
self.index = faiss.IndexFlatIP(dimension) # Inner product (cosine for normalized)
|
|
55
|
+
else:
|
|
56
|
+
raise ValueError(f"Unknown index type: {index_type}")
|
|
57
|
+
|
|
58
|
+
# Track IDs separately (FAISS uses integer IDs)
|
|
59
|
+
self.id_map: list[str] = []
|
|
60
|
+
|
|
61
|
+
def add(
|
|
62
|
+
self,
|
|
63
|
+
embeddings: np.ndarray | list[np.ndarray],
|
|
64
|
+
ids: list[str],
|
|
65
|
+
) -> None:
|
|
66
|
+
"""Add embeddings to the index.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
embeddings: Single embedding or array of embeddings
|
|
70
|
+
ids: Corresponding string IDs
|
|
71
|
+
"""
|
|
72
|
+
# Convert single embedding to batch
|
|
73
|
+
if isinstance(embeddings, list):
|
|
74
|
+
embeddings = np.array(embeddings)
|
|
75
|
+
|
|
76
|
+
if embeddings.ndim == 1:
|
|
77
|
+
embeddings = embeddings.reshape(1, -1)
|
|
78
|
+
|
|
79
|
+
if embeddings.shape[0] != len(ids):
|
|
80
|
+
raise ValueError(f"Mismatch: {embeddings.shape[0]} embeddings, {len(ids)} IDs")
|
|
81
|
+
|
|
82
|
+
if embeddings.shape[1] != self.dimension:
|
|
83
|
+
raise ValueError(f"Wrong dimension: {embeddings.shape[1]}, expected {self.dimension}")
|
|
84
|
+
|
|
85
|
+
# Ensure float32 for FAISS
|
|
86
|
+
embeddings = embeddings.astype(np.float32)
|
|
87
|
+
|
|
88
|
+
# Add to index
|
|
89
|
+
self.index.add(embeddings)
|
|
90
|
+
self.id_map.extend(ids)
|
|
91
|
+
|
|
92
|
+
logger.debug(f"Added {len(ids)} embeddings to index (total: {len(self.id_map)})")
|
|
93
|
+
|
|
94
|
+
def search(
|
|
95
|
+
self,
|
|
96
|
+
query_embedding: np.ndarray,
|
|
97
|
+
k: int = 5,
|
|
98
|
+
min_similarity: Optional[float] = None,
|
|
99
|
+
) -> list[tuple[str, float]]:
|
|
100
|
+
"""Search for k most similar embeddings.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
query_embedding: Query vector
|
|
104
|
+
k: Number of results to return
|
|
105
|
+
min_similarity: Minimum similarity threshold (filter results)
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
List of (id, similarity_score) tuples, sorted by similarity (descending)
|
|
109
|
+
"""
|
|
110
|
+
if len(self.id_map) == 0:
|
|
111
|
+
return []
|
|
112
|
+
|
|
113
|
+
# Reshape query
|
|
114
|
+
if query_embedding.ndim == 1:
|
|
115
|
+
query_embedding = query_embedding.reshape(1, -1)
|
|
116
|
+
|
|
117
|
+
query_embedding = query_embedding.astype(np.float32)
|
|
118
|
+
|
|
119
|
+
# Limit k to available items
|
|
120
|
+
k = min(k, len(self.id_map))
|
|
121
|
+
|
|
122
|
+
# Search
|
|
123
|
+
distances, indices = self.index.search(query_embedding, k)
|
|
124
|
+
|
|
125
|
+
# Convert to (id, score) tuples
|
|
126
|
+
results = []
|
|
127
|
+
for idx, dist in zip(indices[0], distances[0]):
|
|
128
|
+
if idx < 0 or idx >= len(self.id_map):
|
|
129
|
+
continue # Invalid index
|
|
130
|
+
|
|
131
|
+
# Convert distance to similarity based on index type
|
|
132
|
+
if self.index_type == "hnsw":
|
|
133
|
+
# For L2 distance on normalized vectors: cosine_sim = 1 - (L2²/2)
|
|
134
|
+
# FAISS returns squared L2 distance
|
|
135
|
+
similarity = float(1.0 - (dist / 2.0))
|
|
136
|
+
else:
|
|
137
|
+
# For inner product, distance IS similarity
|
|
138
|
+
similarity = float(dist)
|
|
139
|
+
|
|
140
|
+
# Filter by threshold
|
|
141
|
+
if min_similarity is not None and similarity < min_similarity:
|
|
142
|
+
continue
|
|
143
|
+
|
|
144
|
+
results.append((self.id_map[idx], similarity))
|
|
145
|
+
|
|
146
|
+
return results
|
|
147
|
+
|
|
148
|
+
def save(self, path: Path) -> None:
|
|
149
|
+
"""Save index and ID map to disk.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
path: Directory to save to
|
|
153
|
+
"""
|
|
154
|
+
path = Path(path)
|
|
155
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
156
|
+
|
|
157
|
+
# Save FAISS index
|
|
158
|
+
index_file = path / "faiss.index"
|
|
159
|
+
faiss.write_index(self.index, str(index_file))
|
|
160
|
+
|
|
161
|
+
# Save ID map
|
|
162
|
+
id_map_file = path / "id_map.txt"
|
|
163
|
+
with open(id_map_file, "w") as f:
|
|
164
|
+
f.write("\n".join(self.id_map))
|
|
165
|
+
|
|
166
|
+
logger.info(f"Saved vector store to {path} ({len(self.id_map)} embeddings)")
|
|
167
|
+
|
|
168
|
+
def load(self, path: Path) -> None:
|
|
169
|
+
"""Load index and ID map from disk.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
path: Directory to load from
|
|
173
|
+
"""
|
|
174
|
+
path = Path(path)
|
|
175
|
+
|
|
176
|
+
# Load FAISS index
|
|
177
|
+
index_file = path / "faiss.index"
|
|
178
|
+
if not index_file.exists():
|
|
179
|
+
raise FileNotFoundError(f"Index file not found: {index_file}")
|
|
180
|
+
|
|
181
|
+
self.index = faiss.read_index(str(index_file))
|
|
182
|
+
|
|
183
|
+
# Update ef_search for HNSW
|
|
184
|
+
if self.index_type == "hnsw" and hasattr(self.index, "hnsw"):
|
|
185
|
+
self.index.hnsw.efSearch = self.ef_search
|
|
186
|
+
|
|
187
|
+
# Load ID map
|
|
188
|
+
id_map_file = path / "id_map.txt"
|
|
189
|
+
if not id_map_file.exists():
|
|
190
|
+
raise FileNotFoundError(f"ID map not found: {id_map_file}")
|
|
191
|
+
|
|
192
|
+
with open(id_map_file, "r") as f:
|
|
193
|
+
self.id_map = [line.strip() for line in f if line.strip()]
|
|
194
|
+
|
|
195
|
+
logger.info(f"Loaded vector store from {path} ({len(self.id_map)} embeddings)")
|
|
196
|
+
|
|
197
|
+
@property
|
|
198
|
+
def size(self) -> int:
|
|
199
|
+
"""Number of embeddings in the index."""
|
|
200
|
+
return len(self.id_map)
|
|
201
|
+
|
|
202
|
+
def clear(self) -> None:
|
|
203
|
+
"""Clear all embeddings from the index."""
|
|
204
|
+
# Reinitialize index
|
|
205
|
+
if self.index_type == "hnsw":
|
|
206
|
+
self.index = faiss.IndexHNSWFlat(self.dimension, self.M)
|
|
207
|
+
self.index.hnsw.efConstruction = self.ef_construction
|
|
208
|
+
self.index.hnsw.efSearch = self.ef_search
|
|
209
|
+
else:
|
|
210
|
+
self.index = faiss.IndexFlatIP(self.dimension)
|
|
211
|
+
|
|
212
|
+
self.id_map = []
|
|
213
|
+
logger.debug("Cleared vector store")
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Differential fuzzing system for ctrl-code (derived context architecture)."""
|
|
2
|
+
|
|
3
|
+
from .derived_orchestrator import DerivedFuzzingOrchestrator, FuzzingResult
|
|
4
|
+
from .context import ContextDerivationEngine, ContextDerivation, SystemPlacement, IntegrationContract, ImplicitAssumption
|
|
5
|
+
from .context_fuzzer import ContextAwareFuzzer, FuzzTestCase, EnvironmentScenario
|
|
6
|
+
from .analyzer import DerivedOracleAnalyzer, DiagnosedDivergence
|
|
7
|
+
from .budget import BudgetManager, BudgetConfig
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"DerivedFuzzingOrchestrator",
|
|
11
|
+
"FuzzingResult",
|
|
12
|
+
"ContextDerivationEngine",
|
|
13
|
+
"ContextDerivation",
|
|
14
|
+
"SystemPlacement",
|
|
15
|
+
"IntegrationContract",
|
|
16
|
+
"ImplicitAssumption",
|
|
17
|
+
"ContextAwareFuzzer",
|
|
18
|
+
"FuzzTestCase",
|
|
19
|
+
"EnvironmentScenario",
|
|
20
|
+
"DerivedOracleAnalyzer",
|
|
21
|
+
"DiagnosedDivergence",
|
|
22
|
+
"BudgetManager",
|
|
23
|
+
"BudgetConfig",
|
|
24
|
+
]
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
"""Enhanced analyzer with oracle bug detection."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass, asdict
|
|
5
|
+
from typing import Literal, Any
|
|
6
|
+
|
|
7
|
+
from ..providers.base import Provider
|
|
8
|
+
from .context import ContextDerivation
|
|
9
|
+
from .context_fuzzer import FuzzTestCase
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class DiagnosedDivergence:
|
|
14
|
+
"""Analyzed divergence with root cause and fix."""
|
|
15
|
+
|
|
16
|
+
divergence_id: str
|
|
17
|
+
diagnosis: str
|
|
18
|
+
root_cause: str
|
|
19
|
+
source: Literal["MODEL_BUG", "ORACLE_BUG", "SPEC_GAP", "ENVIRONMENT_MISMATCH"]
|
|
20
|
+
confidence: float # 0-1
|
|
21
|
+
fix: dict[str, Any] # Patch, corrected invariant, or clarification question
|
|
22
|
+
regression_test: str
|
|
23
|
+
follow_up_guidance: list[str]
|
|
24
|
+
impact: str
|
|
25
|
+
blast_radius: str
|
|
26
|
+
|
|
27
|
+
def to_dict(self) -> dict:
|
|
28
|
+
"""Convert to dictionary."""
|
|
29
|
+
return asdict(self)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class DerivedOracleAnalyzer:
|
|
33
|
+
"""
|
|
34
|
+
Analyzes divergences in derived context testing.
|
|
35
|
+
|
|
36
|
+
Key innovation: Detects when the ORACLE is wrong, not just the code.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(self, provider: Provider):
|
|
40
|
+
"""
|
|
41
|
+
Initialize analyzer.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
provider: LLM provider for analysis
|
|
45
|
+
"""
|
|
46
|
+
self.provider = provider
|
|
47
|
+
|
|
48
|
+
async def analyze_divergence(
|
|
49
|
+
self,
|
|
50
|
+
spec: str,
|
|
51
|
+
code: str,
|
|
52
|
+
context: ContextDerivation,
|
|
53
|
+
test_case: FuzzTestCase,
|
|
54
|
+
actual_output: dict,
|
|
55
|
+
expected_output: dict,
|
|
56
|
+
previous_analyses: list[DiagnosedDivergence] = [],
|
|
57
|
+
) -> DiagnosedDivergence:
|
|
58
|
+
"""
|
|
59
|
+
Analyze a divergence between actual and expected behavior.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
spec: Original specification
|
|
63
|
+
code: Generated code
|
|
64
|
+
context: Derived context
|
|
65
|
+
test_case: The test case that revealed the divergence
|
|
66
|
+
actual_output: What the code actually produced
|
|
67
|
+
expected_output: What was expected (from derived oracle)
|
|
68
|
+
previous_analyses: Previous divergence analyses (for learning)
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
DiagnosedDivergence with root cause and fix
|
|
72
|
+
|
|
73
|
+
This determines whether the divergence is due to:
|
|
74
|
+
- MODEL_BUG: Code is wrong
|
|
75
|
+
- ORACLE_BUG: Derived expectation was wrong
|
|
76
|
+
- SPEC_GAP: Specification is ambiguous
|
|
77
|
+
- ENVIRONMENT_MISMATCH: Wrong environmental assumptions
|
|
78
|
+
"""
|
|
79
|
+
# Build system prompt (from DIFFFUZZTEST.md lines 396-445)
|
|
80
|
+
system_prompt = """You are a senior software engineer performing root cause analysis. You operate
|
|
81
|
+
in a pipeline where code is tested against DERIVED behavioral expectations
|
|
82
|
+
rather than a running system.
|
|
83
|
+
|
|
84
|
+
This means divergences could come from three sources:
|
|
85
|
+
1. The MODEL'S CODE is wrong (most common)
|
|
86
|
+
2. The DERIVED EXPECTATION is wrong (the context analysis misjudged what the
|
|
87
|
+
code should do)
|
|
88
|
+
3. The SPECIFICATION is ambiguous or incomplete
|
|
89
|
+
4. The ENVIRONMENTAL ASSUMPTIONS don't match reality
|
|
90
|
+
|
|
91
|
+
For each divergence:
|
|
92
|
+
|
|
93
|
+
## 1. DIAGNOSE
|
|
94
|
+
Identify the root cause. Point to specific code, specific invariant, or
|
|
95
|
+
specific spec clause.
|
|
96
|
+
|
|
97
|
+
## 2. DETERMINE SOURCE
|
|
98
|
+
- MODEL_BUG: The code doesn't match what the spec and context require.
|
|
99
|
+
- ORACLE_BUG: The derived expectation was wrong — the code is actually fine,
|
|
100
|
+
but the context derivation made an incorrect inference about what the
|
|
101
|
+
behavior should be. (This is important — flag it so the context derivation
|
|
102
|
+
improves over time.)
|
|
103
|
+
- SPEC_GAP: The specification doesn't address this case. Neither the code
|
|
104
|
+
nor the derived oracle can be judged correct.
|
|
105
|
+
- ENVIRONMENT_MISMATCH: The environmental assumptions in the context report
|
|
106
|
+
don't match reality (e.g., assumed async but code is sync).
|
|
107
|
+
|
|
108
|
+
## 3. PROPOSE FIX
|
|
109
|
+
- MODEL_BUG → Code patch (minimal diff)
|
|
110
|
+
- ORACLE_BUG → Corrected invariant/expectation + note to adjust context derivation
|
|
111
|
+
- SPEC_GAP → Clarification question for the user + both possible interpretations
|
|
112
|
+
- ENVIRONMENT_MISMATCH → Corrected context assumptions + re-derive affected invariants
|
|
113
|
+
|
|
114
|
+
## 4. CONFIDENCE & IMPACT
|
|
115
|
+
- Confidence (0-1): How certain are you about the diagnosis?
|
|
116
|
+
- Impact: What else could break if this bug exists in production?
|
|
117
|
+
(e.g., "If retries aren't bounded, a single failing upstream could
|
|
118
|
+
consume all connection pool slots and cascade-fail the entire service")
|
|
119
|
+
- Blast radius: Just this function? Its callers? The whole service?
|
|
120
|
+
|
|
121
|
+
## 5. REGRESSION TEST
|
|
122
|
+
Concrete test code covering this case.
|
|
123
|
+
|
|
124
|
+
## 6. FOLLOW-UP FUZZING GUIDANCE
|
|
125
|
+
- Inputs/scenarios the fuzzer should try next
|
|
126
|
+
- If ORACLE_BUG: note which invariants to re-derive
|
|
127
|
+
|
|
128
|
+
Output as JSON."""
|
|
129
|
+
|
|
130
|
+
# Build user message
|
|
131
|
+
previous_section = (
|
|
132
|
+
json.dumps([a.to_dict() for a in previous_analyses], indent=2)
|
|
133
|
+
if previous_analyses
|
|
134
|
+
else "No previous analyses."
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
user_message = f"""## Specification
|
|
138
|
+
{spec}
|
|
139
|
+
|
|
140
|
+
## Generated Code
|
|
141
|
+
```
|
|
142
|
+
{code}
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## Derived Context
|
|
146
|
+
{context.to_json()}
|
|
147
|
+
|
|
148
|
+
## Test Case
|
|
149
|
+
{json.dumps(test_case.to_dict(), indent=2)}
|
|
150
|
+
|
|
151
|
+
## Actual Output
|
|
152
|
+
{json.dumps(actual_output, indent=2)}
|
|
153
|
+
|
|
154
|
+
## Expected Output
|
|
155
|
+
{json.dumps(expected_output, indent=2)}
|
|
156
|
+
|
|
157
|
+
## Previous Analyses
|
|
158
|
+
{previous_section}
|
|
159
|
+
|
|
160
|
+
Analyze this divergence and provide a diagnosis with fix."""
|
|
161
|
+
|
|
162
|
+
# Call LLM
|
|
163
|
+
messages = [
|
|
164
|
+
{"role": "system", "content": system_prompt},
|
|
165
|
+
{"role": "user", "content": user_message},
|
|
166
|
+
]
|
|
167
|
+
|
|
168
|
+
response = await self.provider.generate(messages)
|
|
169
|
+
response_text = response.get("text", "").strip()
|
|
170
|
+
|
|
171
|
+
# Parse JSON response
|
|
172
|
+
try:
|
|
173
|
+
# Extract JSON from markdown code blocks if present
|
|
174
|
+
if "```json" in response_text:
|
|
175
|
+
start = response_text.find("```json") + 7
|
|
176
|
+
end = response_text.find("```", start)
|
|
177
|
+
response_text = response_text[start:end].strip()
|
|
178
|
+
elif "```" in response_text:
|
|
179
|
+
start = response_text.find("```") + 3
|
|
180
|
+
end = response_text.find("```", start)
|
|
181
|
+
response_text = response_text[start:end].strip()
|
|
182
|
+
|
|
183
|
+
data = json.loads(response_text)
|
|
184
|
+
|
|
185
|
+
return DiagnosedDivergence(
|
|
186
|
+
divergence_id=test_case.id,
|
|
187
|
+
diagnosis=data["diagnosis"],
|
|
188
|
+
root_cause=data["root_cause"],
|
|
189
|
+
source=data["source"],
|
|
190
|
+
confidence=data["confidence"],
|
|
191
|
+
fix=data["fix"],
|
|
192
|
+
regression_test=data["regression_test"],
|
|
193
|
+
follow_up_guidance=data["follow_up_guidance"],
|
|
194
|
+
impact=data.get("impact", "Unknown impact"),
|
|
195
|
+
blast_radius=data.get("blast_radius", "Unknown blast radius"),
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
except (json.JSONDecodeError, KeyError) as e:
|
|
199
|
+
raise ValueError(f"Failed to parse analyzer response: {e}\nResponse: {response_text}")
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
async def check_invariants(
|
|
203
|
+
self,
|
|
204
|
+
output: dict,
|
|
205
|
+
invariants: list[str],
|
|
206
|
+
context: ContextDerivation,
|
|
207
|
+
) -> dict[str, Any]:
|
|
208
|
+
"""
|
|
209
|
+
Check if output satisfies behavioral invariants.
|
|
210
|
+
|
|
211
|
+
Used in MODE 2 (oracle-from-invariants) when no system is available.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
output: Function output to check
|
|
215
|
+
invariants: List of invariants that must hold
|
|
216
|
+
context: Full context derivation
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
Dict with invariant_results, overall status, and violations
|
|
220
|
+
"""
|
|
221
|
+
system_prompt = """You are a behavioral oracle. You will receive:
|
|
222
|
+
1. A function's output for a given input
|
|
223
|
+
2. A set of behavioral invariants that MUST hold
|
|
224
|
+
3. The system context
|
|
225
|
+
|
|
226
|
+
For each invariant, determine: HOLDS or VIOLATED.
|
|
227
|
+
|
|
228
|
+
Be precise. An invariant is only VIOLATED if the output definitively
|
|
229
|
+
contradicts it. If the output is consistent with the invariant but you
|
|
230
|
+
can't fully verify (e.g., you can't observe internal timing), mark it
|
|
231
|
+
as UNVERIFIABLE rather than assuming it holds.
|
|
232
|
+
|
|
233
|
+
Respond as:
|
|
234
|
+
{
|
|
235
|
+
"invariant_results": [
|
|
236
|
+
{
|
|
237
|
+
"invariant": "Total retry delay must be bounded",
|
|
238
|
+
"result": "HOLDS | VIOLATED | UNVERIFIABLE",
|
|
239
|
+
"evidence": "..."
|
|
240
|
+
}
|
|
241
|
+
],
|
|
242
|
+
"overall": "PASS | FAIL | PARTIAL",
|
|
243
|
+
"violations": ["list of violated invariant names"]
|
|
244
|
+
}"""
|
|
245
|
+
|
|
246
|
+
user_message = f"""## Output
|
|
247
|
+
{json.dumps(output, indent=2)}
|
|
248
|
+
|
|
249
|
+
## Invariants to Check
|
|
250
|
+
{json.dumps(invariants, indent=2)}
|
|
251
|
+
|
|
252
|
+
## System Context
|
|
253
|
+
{context.to_json()}
|
|
254
|
+
|
|
255
|
+
Check each invariant and respond with results."""
|
|
256
|
+
|
|
257
|
+
messages = [
|
|
258
|
+
{"role": "system", "content": system_prompt},
|
|
259
|
+
{"role": "user", "content": user_message},
|
|
260
|
+
]
|
|
261
|
+
|
|
262
|
+
response = await self.provider.generate(messages)
|
|
263
|
+
response_text = response.get("text", "").strip()
|
|
264
|
+
|
|
265
|
+
# Parse JSON response
|
|
266
|
+
try:
|
|
267
|
+
# Extract JSON from markdown code blocks if present
|
|
268
|
+
if "```json" in response_text:
|
|
269
|
+
start = response_text.find("```json") + 7
|
|
270
|
+
end = response_text.find("```", start)
|
|
271
|
+
response_text = response_text[start:end].strip()
|
|
272
|
+
elif "```" in response_text:
|
|
273
|
+
start = response_text.find("```") + 3
|
|
274
|
+
end = response_text.find("```", start)
|
|
275
|
+
response_text = response_text[start:end].strip()
|
|
276
|
+
|
|
277
|
+
return json.loads(response_text)
|
|
278
|
+
|
|
279
|
+
except (json.JSONDecodeError, KeyError) as e:
|
|
280
|
+
raise ValueError(f"Failed to parse invariant check response: {e}\nResponse: {response_text}")
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Budget management for fuzzing iterations."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class BudgetConfig:
|
|
9
|
+
"""Budget configuration."""
|
|
10
|
+
|
|
11
|
+
max_tokens: int
|
|
12
|
+
max_seconds: float
|
|
13
|
+
max_iterations: int
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BudgetManager:
|
|
17
|
+
"""Manages time and token budgets for fuzzing."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, config: BudgetConfig):
|
|
20
|
+
"""
|
|
21
|
+
Initialize budget manager.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
config: Budget configuration
|
|
25
|
+
"""
|
|
26
|
+
self.config = config
|
|
27
|
+
self.start_time = time.time()
|
|
28
|
+
self.tokens_used = 0
|
|
29
|
+
self.iterations = 0
|
|
30
|
+
|
|
31
|
+
def consume(self, tokens: int, elapsed_time: float) -> None:
|
|
32
|
+
"""
|
|
33
|
+
Consume budget resources.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
tokens: Number of tokens used
|
|
37
|
+
elapsed_time: Time elapsed in seconds
|
|
38
|
+
"""
|
|
39
|
+
self.tokens_used += tokens
|
|
40
|
+
self.iterations += 1
|
|
41
|
+
|
|
42
|
+
def exhausted(self) -> bool:
|
|
43
|
+
"""
|
|
44
|
+
Check if budget is exhausted.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
True if any budget limit is reached
|
|
48
|
+
"""
|
|
49
|
+
# Check iteration limit
|
|
50
|
+
if self.iterations >= self.config.max_iterations:
|
|
51
|
+
return True
|
|
52
|
+
|
|
53
|
+
# Check token limit
|
|
54
|
+
if self.tokens_used >= self.config.max_tokens:
|
|
55
|
+
return True
|
|
56
|
+
|
|
57
|
+
# Check time limit
|
|
58
|
+
elapsed = time.time() - self.start_time
|
|
59
|
+
if elapsed >= self.config.max_seconds:
|
|
60
|
+
return True
|
|
61
|
+
|
|
62
|
+
return False
|
|
63
|
+
|
|
64
|
+
def remaining_tokens(self) -> int:
|
|
65
|
+
"""Get remaining token budget."""
|
|
66
|
+
return max(0, self.config.max_tokens - self.tokens_used)
|
|
67
|
+
|
|
68
|
+
def remaining_time(self) -> float:
|
|
69
|
+
"""Get remaining time budget in seconds."""
|
|
70
|
+
elapsed = time.time() - self.start_time
|
|
71
|
+
return max(0.0, self.config.max_seconds - elapsed)
|
|
72
|
+
|
|
73
|
+
def remaining_iterations(self) -> int:
|
|
74
|
+
"""Get remaining iteration budget."""
|
|
75
|
+
return max(0, self.config.max_iterations - self.iterations)
|
|
76
|
+
|
|
77
|
+
def progress(self) -> dict[str, float]:
|
|
78
|
+
"""
|
|
79
|
+
Get budget progress.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
Dict with progress percentages for each budget type
|
|
83
|
+
"""
|
|
84
|
+
elapsed = time.time() - self.start_time
|
|
85
|
+
|
|
86
|
+
return {
|
|
87
|
+
"tokens": self.tokens_used / self.config.max_tokens,
|
|
88
|
+
"time": elapsed / self.config.max_seconds,
|
|
89
|
+
"iterations": self.iterations / self.config.max_iterations,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
def summary(self) -> dict[str, int | float]:
|
|
93
|
+
"""
|
|
94
|
+
Get budget summary.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Dict with used and remaining amounts
|
|
98
|
+
"""
|
|
99
|
+
return {
|
|
100
|
+
"tokens_used": self.tokens_used,
|
|
101
|
+
"tokens_remaining": self.remaining_tokens(),
|
|
102
|
+
"time_elapsed": time.time() - self.start_time,
|
|
103
|
+
"time_remaining": self.remaining_time(),
|
|
104
|
+
"iterations_done": self.iterations,
|
|
105
|
+
"iterations_remaining": self.remaining_iterations(),
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
def reset(self) -> None:
|
|
109
|
+
"""Reset budget counters."""
|
|
110
|
+
self.start_time = time.time()
|
|
111
|
+
self.tokens_used = 0
|
|
112
|
+
self.iterations = 0
|