ctrlcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ctrlcode/__init__.py +8 -0
- ctrlcode/agents/__init__.py +29 -0
- ctrlcode/agents/cleanup.py +388 -0
- ctrlcode/agents/communication.py +439 -0
- ctrlcode/agents/observability.py +421 -0
- ctrlcode/agents/react_loop.py +297 -0
- ctrlcode/agents/registry.py +211 -0
- ctrlcode/agents/result_parser.py +242 -0
- ctrlcode/agents/workflow.py +723 -0
- ctrlcode/analysis/__init__.py +28 -0
- ctrlcode/analysis/ast_diff.py +163 -0
- ctrlcode/analysis/bug_detector.py +149 -0
- ctrlcode/analysis/code_graphs.py +329 -0
- ctrlcode/analysis/semantic.py +205 -0
- ctrlcode/analysis/static.py +183 -0
- ctrlcode/analysis/synthesizer.py +281 -0
- ctrlcode/analysis/tests.py +189 -0
- ctrlcode/cleanup/__init__.py +16 -0
- ctrlcode/cleanup/auto_merge.py +350 -0
- ctrlcode/cleanup/doc_gardening.py +388 -0
- ctrlcode/cleanup/pr_automation.py +330 -0
- ctrlcode/cleanup/scheduler.py +356 -0
- ctrlcode/config.py +380 -0
- ctrlcode/embeddings/__init__.py +6 -0
- ctrlcode/embeddings/embedder.py +192 -0
- ctrlcode/embeddings/vector_store.py +213 -0
- ctrlcode/fuzzing/__init__.py +24 -0
- ctrlcode/fuzzing/analyzer.py +280 -0
- ctrlcode/fuzzing/budget.py +112 -0
- ctrlcode/fuzzing/context.py +665 -0
- ctrlcode/fuzzing/context_fuzzer.py +506 -0
- ctrlcode/fuzzing/derived_orchestrator.py +732 -0
- ctrlcode/fuzzing/oracle_adapter.py +135 -0
- ctrlcode/linters/__init__.py +11 -0
- ctrlcode/linters/hand_rolled_utils.py +221 -0
- ctrlcode/linters/yolo_parsing.py +217 -0
- ctrlcode/metrics/__init__.py +6 -0
- ctrlcode/metrics/dashboard.py +283 -0
- ctrlcode/metrics/tech_debt.py +663 -0
- ctrlcode/paths.py +68 -0
- ctrlcode/permissions.py +179 -0
- ctrlcode/providers/__init__.py +15 -0
- ctrlcode/providers/anthropic.py +138 -0
- ctrlcode/providers/base.py +77 -0
- ctrlcode/providers/openai.py +197 -0
- ctrlcode/providers/parallel.py +104 -0
- ctrlcode/server.py +871 -0
- ctrlcode/session/__init__.py +6 -0
- ctrlcode/session/baseline.py +57 -0
- ctrlcode/session/manager.py +967 -0
- ctrlcode/skills/__init__.py +10 -0
- ctrlcode/skills/builtin/commit.toml +29 -0
- ctrlcode/skills/builtin/docs.toml +25 -0
- ctrlcode/skills/builtin/refactor.toml +33 -0
- ctrlcode/skills/builtin/review.toml +28 -0
- ctrlcode/skills/builtin/test.toml +28 -0
- ctrlcode/skills/loader.py +111 -0
- ctrlcode/skills/registry.py +139 -0
- ctrlcode/storage/__init__.py +19 -0
- ctrlcode/storage/history_db.py +708 -0
- ctrlcode/tools/__init__.py +220 -0
- ctrlcode/tools/bash.py +112 -0
- ctrlcode/tools/browser.py +352 -0
- ctrlcode/tools/executor.py +153 -0
- ctrlcode/tools/explore.py +486 -0
- ctrlcode/tools/mcp.py +108 -0
- ctrlcode/tools/observability.py +561 -0
- ctrlcode/tools/registry.py +193 -0
- ctrlcode/tools/todo.py +291 -0
- ctrlcode/tools/update.py +266 -0
- ctrlcode/tools/webfetch.py +147 -0
- ctrlcode-0.1.0.dist-info/METADATA +93 -0
- ctrlcode-0.1.0.dist-info/RECORD +75 -0
- ctrlcode-0.1.0.dist-info/WHEEL +4 -0
- ctrlcode-0.1.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,665 @@
|
|
|
1
|
+
"""Context derivation engine - derives behavioral oracle from code analysis."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import uuid
|
|
7
|
+
from collections import OrderedDict
|
|
8
|
+
from dataclasses import asdict, dataclass, field
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Literal, Optional
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
|
|
15
|
+
from ..analysis.code_graphs import CodeGraphBuilder, CodeGraphs
|
|
16
|
+
from ..embeddings.embedder import CodeEmbedder
|
|
17
|
+
from ..providers.base import Provider
|
|
18
|
+
from ..storage.history_db import CodeRecord, HistoryDB, OracleRecord
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class LRUCache:
|
|
24
|
+
"""Simple LRU cache for oracle retrieval results."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, max_size: int = 100):
|
|
27
|
+
"""Initialize LRU cache.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
max_size: Maximum number of entries to cache
|
|
31
|
+
"""
|
|
32
|
+
self.cache: OrderedDict = OrderedDict()
|
|
33
|
+
self.max_size = max_size
|
|
34
|
+
self.hits = 0
|
|
35
|
+
self.misses = 0
|
|
36
|
+
|
|
37
|
+
def get(self, key: str) -> Optional[tuple]:
|
|
38
|
+
"""Get value from cache.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
key: Cache key
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Cached value or None
|
|
45
|
+
"""
|
|
46
|
+
if key in self.cache:
|
|
47
|
+
# Move to end (most recently used)
|
|
48
|
+
self.cache.move_to_end(key)
|
|
49
|
+
self.hits += 1
|
|
50
|
+
return self.cache[key]
|
|
51
|
+
else:
|
|
52
|
+
self.misses += 1
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
def put(self, key: str, value: tuple) -> None:
|
|
56
|
+
"""Put value in cache.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
key: Cache key
|
|
60
|
+
value: Value to cache
|
|
61
|
+
"""
|
|
62
|
+
# If cache is disabled (max_size == 0), don't store anything
|
|
63
|
+
if self.max_size == 0:
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
if key in self.cache:
|
|
67
|
+
# Update existing entry
|
|
68
|
+
self.cache.move_to_end(key)
|
|
69
|
+
else:
|
|
70
|
+
# Add new entry
|
|
71
|
+
if len(self.cache) >= self.max_size:
|
|
72
|
+
# Remove oldest entry
|
|
73
|
+
self.cache.popitem(last=False)
|
|
74
|
+
self.cache[key] = value
|
|
75
|
+
|
|
76
|
+
def clear(self) -> None:
|
|
77
|
+
"""Clear all cache entries."""
|
|
78
|
+
self.cache.clear()
|
|
79
|
+
self.hits = 0
|
|
80
|
+
self.misses = 0
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def hit_rate(self) -> float:
|
|
84
|
+
"""Calculate cache hit rate."""
|
|
85
|
+
total = self.hits + self.misses
|
|
86
|
+
return self.hits / total if total > 0 else 0.0
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def size(self) -> int:
|
|
90
|
+
"""Get current cache size."""
|
|
91
|
+
return len(self.cache)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@dataclass
|
|
95
|
+
class SystemPlacement:
|
|
96
|
+
"""Derived system placement information."""
|
|
97
|
+
|
|
98
|
+
system_type: str # "web service", "CLI tool", "library", etc.
|
|
99
|
+
layer: str # "HTTP handler", "business logic", "data access", etc.
|
|
100
|
+
callers: str # What likely calls this code
|
|
101
|
+
callees: str # What this code calls
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@dataclass
|
|
105
|
+
class IntegrationContract:
|
|
106
|
+
"""Contract with external system."""
|
|
107
|
+
|
|
108
|
+
system: str # "External API", "Database", "Message Queue"
|
|
109
|
+
contract: str # Description of the contract
|
|
110
|
+
implicit_requirements: list[str] # Implicit requirements
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass
|
|
114
|
+
class ImplicitAssumption:
|
|
115
|
+
"""Implicit assumption made by the code."""
|
|
116
|
+
|
|
117
|
+
assumption: str
|
|
118
|
+
risk: Literal["SAFE", "RISKY", "DANGEROUS"]
|
|
119
|
+
explanation: str
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@dataclass
|
|
123
|
+
class ContextDerivation:
|
|
124
|
+
"""Complete derived context for code under test."""
|
|
125
|
+
|
|
126
|
+
system_placement: SystemPlacement
|
|
127
|
+
environmental_constraints: dict[str, Any]
|
|
128
|
+
integration_contracts: list[IntegrationContract]
|
|
129
|
+
behavioral_invariants: list[str]
|
|
130
|
+
edge_case_surface: list[str]
|
|
131
|
+
implicit_assumptions: list[ImplicitAssumption]
|
|
132
|
+
|
|
133
|
+
# Phase 2 additions: graphs, embeddings, historical reuse
|
|
134
|
+
code_graphs: Optional[CodeGraphs] = None
|
|
135
|
+
function_invariants: dict[str, list[str]] = field(default_factory=dict)
|
|
136
|
+
embedding: Optional[np.ndarray] = None
|
|
137
|
+
retrieved_from: Optional[str] = None # Session ID if oracle was reused
|
|
138
|
+
|
|
139
|
+
def to_json(self) -> str:
|
|
140
|
+
"""Convert to JSON string (excludes graphs, embedding for serialization)."""
|
|
141
|
+
data = asdict(self)
|
|
142
|
+
# Remove non-serializable fields
|
|
143
|
+
data.pop("code_graphs", None)
|
|
144
|
+
data.pop("embedding", None)
|
|
145
|
+
return json.dumps(data, indent=2)
|
|
146
|
+
|
|
147
|
+
@classmethod
|
|
148
|
+
def from_dict(cls, data: dict) -> "ContextDerivation":
|
|
149
|
+
"""Create from dictionary."""
|
|
150
|
+
return cls(
|
|
151
|
+
system_placement=SystemPlacement(**data["system_placement"]),
|
|
152
|
+
environmental_constraints=data["environmental_constraints"],
|
|
153
|
+
integration_contracts=[
|
|
154
|
+
IntegrationContract(**c) for c in data["integration_contracts"]
|
|
155
|
+
],
|
|
156
|
+
behavioral_invariants=data["behavioral_invariants"],
|
|
157
|
+
edge_case_surface=data["edge_case_surface"],
|
|
158
|
+
implicit_assumptions=[
|
|
159
|
+
ImplicitAssumption(**a) for a in data["implicit_assumptions"]
|
|
160
|
+
],
|
|
161
|
+
function_invariants=data.get("function_invariants", {}),
|
|
162
|
+
retrieved_from=data.get("retrieved_from"),
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class ContextDerivationEngine:
|
|
167
|
+
"""
|
|
168
|
+
Analyzes generated code to derive full operational context.
|
|
169
|
+
|
|
170
|
+
This replaces the "concrete system" in traditional differential fuzzing
|
|
171
|
+
with an inferred behavioral contract.
|
|
172
|
+
|
|
173
|
+
Phase 2: Supports historical oracle reuse via embeddings and graph analysis.
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
def __init__(
|
|
177
|
+
self,
|
|
178
|
+
provider: Provider,
|
|
179
|
+
history_db: Optional[HistoryDB] = None,
|
|
180
|
+
embedder: Optional[CodeEmbedder] = None,
|
|
181
|
+
graph_builder: Optional[CodeGraphBuilder] = None,
|
|
182
|
+
similarity_threshold: float = 0.85,
|
|
183
|
+
cache_size: int = 100,
|
|
184
|
+
vector_store_path: Optional[Path] = None,
|
|
185
|
+
auto_save_interval: int = 10,
|
|
186
|
+
):
|
|
187
|
+
"""
|
|
188
|
+
Initialize context derivation engine.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
provider: LLM provider for analysis
|
|
192
|
+
history_db: Optional history database for oracle reuse
|
|
193
|
+
embedder: Optional code embedder (created if not provided)
|
|
194
|
+
graph_builder: Optional graph builder (created if not provided)
|
|
195
|
+
similarity_threshold: Minimum similarity for oracle reuse (default: 0.85)
|
|
196
|
+
cache_size: Max LRU cache entries for oracle retrieval (default: 100)
|
|
197
|
+
vector_store_path: Path to persistent vector store (default: ~/.ctrlcode/vector_store)
|
|
198
|
+
auto_save_interval: Save vector store after N additions (default: 10)
|
|
199
|
+
"""
|
|
200
|
+
self.provider = provider
|
|
201
|
+
self.history_db = history_db
|
|
202
|
+
self.embedder = embedder or CodeEmbedder()
|
|
203
|
+
self.graph_builder = graph_builder or CodeGraphBuilder()
|
|
204
|
+
self.similarity_threshold = similarity_threshold
|
|
205
|
+
self.oracle_cache = LRUCache(max_size=cache_size)
|
|
206
|
+
|
|
207
|
+
# Initialize persistent vector store
|
|
208
|
+
if vector_store_path is None:
|
|
209
|
+
vector_store_path = Path.home() / ".ctrlcode" / "vector_store"
|
|
210
|
+
|
|
211
|
+
self.vector_store_path = Path(vector_store_path)
|
|
212
|
+
self.auto_save_interval = auto_save_interval
|
|
213
|
+
self._additions_since_save = 0
|
|
214
|
+
|
|
215
|
+
# Create or load vector store
|
|
216
|
+
from ..embeddings.vector_store import VectorStore
|
|
217
|
+
|
|
218
|
+
self.vector_store = VectorStore(dimension=1536)
|
|
219
|
+
|
|
220
|
+
# Load existing index if available
|
|
221
|
+
if (self.vector_store_path / "faiss.index").exists():
|
|
222
|
+
try:
|
|
223
|
+
self.vector_store.load(self.vector_store_path)
|
|
224
|
+
logger.info(f"Loaded persistent vector store with {self.vector_store.size} embeddings")
|
|
225
|
+
except Exception as e:
|
|
226
|
+
logger.warning(f"Failed to load vector store: {e}. Starting fresh.")
|
|
227
|
+
|
|
228
|
+
# Track which code IDs are already indexed
|
|
229
|
+
self._indexed_code_ids: set[str] = set(self.vector_store.id_map)
|
|
230
|
+
|
|
231
|
+
async def derive(
|
|
232
|
+
self,
|
|
233
|
+
user_request: str,
|
|
234
|
+
generated_code: str,
|
|
235
|
+
surrounding_files: Optional[list[str]] = None,
|
|
236
|
+
session_id: Optional[str] = None,
|
|
237
|
+
) -> ContextDerivation:
|
|
238
|
+
"""
|
|
239
|
+
Derive operational context from code and specification.
|
|
240
|
+
|
|
241
|
+
Phase 2: Searches history for similar code and reuses oracles when possible.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
user_request: Original user specification
|
|
245
|
+
generated_code: Generated code to analyze
|
|
246
|
+
surrounding_files: Optional surrounding codebase files
|
|
247
|
+
session_id: Optional session ID for tracking
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
ContextDerivation with all derived context (including graphs, embeddings)
|
|
251
|
+
|
|
252
|
+
Raises:
|
|
253
|
+
ValueError: If derivation fails or returns invalid JSON
|
|
254
|
+
"""
|
|
255
|
+
if session_id is None:
|
|
256
|
+
session_id = str(uuid.uuid4())
|
|
257
|
+
|
|
258
|
+
# Step 1: Embed the generated code
|
|
259
|
+
logger.debug("Embedding generated code")
|
|
260
|
+
code_embedding = self.embedder.embed_code(generated_code)
|
|
261
|
+
|
|
262
|
+
# Step 2: Build code graphs
|
|
263
|
+
logger.debug("Building code relationship graphs")
|
|
264
|
+
code_graphs = self.graph_builder.build_from_code(generated_code, "<generated>")
|
|
265
|
+
|
|
266
|
+
# Step 3: Search history for similar code (if history DB available)
|
|
267
|
+
similar_oracle = None
|
|
268
|
+
retrieved_from = None
|
|
269
|
+
|
|
270
|
+
if self.history_db:
|
|
271
|
+
similar_oracle, retrieved_from = await self._search_similar_oracle(
|
|
272
|
+
code_embedding, generated_code, user_request
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
# Step 4: Either adapt historical oracle or derive fresh
|
|
276
|
+
if similar_oracle:
|
|
277
|
+
logger.info(f"Reusing historical oracle from session {retrieved_from}")
|
|
278
|
+
context = similar_oracle
|
|
279
|
+
context.retrieved_from = retrieved_from
|
|
280
|
+
else:
|
|
281
|
+
logger.debug("Deriving fresh oracle (no similar code in history)")
|
|
282
|
+
context = await self._derive_fresh_oracle(
|
|
283
|
+
user_request, generated_code, surrounding_files, code_graphs
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
# Step 5: Attach graphs and embedding
|
|
287
|
+
context.code_graphs = code_graphs
|
|
288
|
+
context.embedding = code_embedding
|
|
289
|
+
|
|
290
|
+
# Step 6: Store in history DB (if available)
|
|
291
|
+
if self.history_db:
|
|
292
|
+
await self._store_in_history(session_id, user_request, generated_code, context)
|
|
293
|
+
|
|
294
|
+
return context
|
|
295
|
+
|
|
296
|
+
def _save_vector_store(self) -> None:
|
|
297
|
+
"""Save vector store to disk and reset counter."""
|
|
298
|
+
try:
|
|
299
|
+
self.vector_store.save(self.vector_store_path)
|
|
300
|
+
self._additions_since_save = 0
|
|
301
|
+
logger.info(
|
|
302
|
+
f"Saved vector store with {self.vector_store.size} embeddings "
|
|
303
|
+
f"to {self.vector_store_path}"
|
|
304
|
+
)
|
|
305
|
+
except Exception as e:
|
|
306
|
+
logger.error(f"Failed to save vector store: {e}")
|
|
307
|
+
|
|
308
|
+
async def _search_similar_oracle(
|
|
309
|
+
self,
|
|
310
|
+
code_embedding: np.ndarray,
|
|
311
|
+
new_code: str,
|
|
312
|
+
user_request: str,
|
|
313
|
+
) -> tuple[Optional[ContextDerivation], Optional[str]]:
|
|
314
|
+
"""Search history for similar code and adapt oracle if found.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
code_embedding: Embedding of new code
|
|
318
|
+
new_code: New generated code
|
|
319
|
+
user_request: User specification
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
Tuple of (adapted oracle, source session ID) or (None, None)
|
|
323
|
+
"""
|
|
324
|
+
from .oracle_adapter import OracleAdapter
|
|
325
|
+
|
|
326
|
+
# Compute cache key from code hash
|
|
327
|
+
code_hash = hashlib.sha256(new_code.encode()).hexdigest()
|
|
328
|
+
|
|
329
|
+
# Check cache first
|
|
330
|
+
cached = self.oracle_cache.get(code_hash)
|
|
331
|
+
if cached is not None:
|
|
332
|
+
logger.debug(f"Cache HIT for code hash {code_hash[:8]}...")
|
|
333
|
+
similar_oracle, session_id = cached
|
|
334
|
+
return similar_oracle, session_id
|
|
335
|
+
|
|
336
|
+
logger.debug(f"Cache MISS for code hash {code_hash[:8]}...")
|
|
337
|
+
|
|
338
|
+
# Sync vector store with database (incremental updates)
|
|
339
|
+
code_records = self.history_db.get_all_code_embeddings(limit=10000)
|
|
340
|
+
|
|
341
|
+
if not code_records:
|
|
342
|
+
logger.debug("No historical code in database")
|
|
343
|
+
# Cache negative result
|
|
344
|
+
self.oracle_cache.put(code_hash, (None, None))
|
|
345
|
+
return None, None
|
|
346
|
+
|
|
347
|
+
# Add new embeddings incrementally
|
|
348
|
+
new_embeddings = []
|
|
349
|
+
new_ids = []
|
|
350
|
+
|
|
351
|
+
for rec in code_records:
|
|
352
|
+
if rec.code_id not in self._indexed_code_ids:
|
|
353
|
+
new_embeddings.append(rec.embedding)
|
|
354
|
+
new_ids.append(rec.code_id)
|
|
355
|
+
self._indexed_code_ids.add(rec.code_id)
|
|
356
|
+
|
|
357
|
+
if new_embeddings:
|
|
358
|
+
logger.debug(f"Adding {len(new_embeddings)} new embeddings to vector store")
|
|
359
|
+
self.vector_store.add(np.array(new_embeddings), new_ids)
|
|
360
|
+
self._additions_since_save += len(new_embeddings)
|
|
361
|
+
|
|
362
|
+
# Auto-save if threshold reached
|
|
363
|
+
if self._additions_since_save >= self.auto_save_interval:
|
|
364
|
+
self._save_vector_store()
|
|
365
|
+
|
|
366
|
+
# Search for similar code using persistent vector store
|
|
367
|
+
results = self.vector_store.search(
|
|
368
|
+
code_embedding,
|
|
369
|
+
k=3,
|
|
370
|
+
min_similarity=self.similarity_threshold,
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
if not results:
|
|
374
|
+
logger.debug(f"No similar code found above threshold {self.similarity_threshold}")
|
|
375
|
+
# Cache negative result
|
|
376
|
+
self.oracle_cache.put(code_hash, (None, None))
|
|
377
|
+
return None, None
|
|
378
|
+
|
|
379
|
+
# Try to adapt oracle from most similar code
|
|
380
|
+
for code_id, similarity in results:
|
|
381
|
+
logger.debug(f"Found similar code: {code_id} (similarity: {similarity:.2%})")
|
|
382
|
+
|
|
383
|
+
# Get the code record
|
|
384
|
+
code_record = next((r for r in code_records if r.code_id == code_id), None)
|
|
385
|
+
if not code_record:
|
|
386
|
+
continue
|
|
387
|
+
|
|
388
|
+
# Get the oracle for this session
|
|
389
|
+
oracle_records = self.history_db.get_all_oracle_embeddings()
|
|
390
|
+
oracle_record = next(
|
|
391
|
+
(o for o in oracle_records if o.session_id == code_record.session_id),
|
|
392
|
+
None,
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
if not oracle_record:
|
|
396
|
+
logger.debug(f"No oracle found for session {code_record.session_id}")
|
|
397
|
+
continue
|
|
398
|
+
|
|
399
|
+
# Reconstruct the historical oracle
|
|
400
|
+
try:
|
|
401
|
+
historical_oracle = ContextDerivation.from_dict(json.loads(oracle_record.oracle))
|
|
402
|
+
except Exception as e:
|
|
403
|
+
logger.warning(f"Failed to parse historical oracle: {e}")
|
|
404
|
+
continue
|
|
405
|
+
|
|
406
|
+
# Attempt to adapt the oracle
|
|
407
|
+
adapter = OracleAdapter(self.provider)
|
|
408
|
+
adapted_oracle = await adapter.adapt_oracle(
|
|
409
|
+
historical_oracle,
|
|
410
|
+
code_record.code,
|
|
411
|
+
new_code,
|
|
412
|
+
user_request,
|
|
413
|
+
similarity,
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
if adapted_oracle:
|
|
417
|
+
# Cache the successful result
|
|
418
|
+
result = (adapted_oracle, code_record.session_id)
|
|
419
|
+
self.oracle_cache.put(code_hash, result)
|
|
420
|
+
return result
|
|
421
|
+
|
|
422
|
+
logger.debug("Could not adapt any historical oracles")
|
|
423
|
+
# Cache the negative result to avoid repeated searches
|
|
424
|
+
self.oracle_cache.put(code_hash, (None, None))
|
|
425
|
+
return None, None
|
|
426
|
+
|
|
427
|
+
async def _derive_fresh_oracle(
|
|
428
|
+
self,
|
|
429
|
+
user_request: str,
|
|
430
|
+
generated_code: str,
|
|
431
|
+
surrounding_files: Optional[list[str]],
|
|
432
|
+
code_graphs: CodeGraphs,
|
|
433
|
+
) -> ContextDerivation:
|
|
434
|
+
"""Derive fresh oracle from scratch (original derivation logic).
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
user_request: User specification
|
|
438
|
+
generated_code: Generated code
|
|
439
|
+
surrounding_files: Optional surrounding files
|
|
440
|
+
code_graphs: Code relationship graphs
|
|
441
|
+
|
|
442
|
+
Returns:
|
|
443
|
+
Fresh ContextDerivation
|
|
444
|
+
|
|
445
|
+
Raises:
|
|
446
|
+
ValueError: If derivation fails
|
|
447
|
+
"""
|
|
448
|
+
# Build graph metadata section
|
|
449
|
+
graph_metadata = self._build_graph_metadata(code_graphs)
|
|
450
|
+
|
|
451
|
+
# Build system prompt (from DIFFFUZZTEST.md lines 80-140)
|
|
452
|
+
system_prompt = """You are a senior systems analyst. Your job is to examine a piece of generated
|
|
453
|
+
code and its originating specification, then derive the full operational
|
|
454
|
+
context — even when that context wasn't explicitly stated.
|
|
455
|
+
|
|
456
|
+
You will be given:
|
|
457
|
+
1. The user's original prompt/specification
|
|
458
|
+
2. The generated source code
|
|
459
|
+
3. (Optional) Surrounding codebase files, if available
|
|
460
|
+
|
|
461
|
+
Produce a CONTEXT DERIVATION REPORT with these sections:
|
|
462
|
+
|
|
463
|
+
## 1. System Placement
|
|
464
|
+
Where does this code live? Derive from clues in the code and spec:
|
|
465
|
+
- What kind of system is this part of? (web server, CLI tool, data pipeline,
|
|
466
|
+
library, microservice, mobile app, embedded system, etc.)
|
|
467
|
+
- What layer? (HTTP handler, business logic, data access, infrastructure,
|
|
468
|
+
utility, middleware, etc.)
|
|
469
|
+
- What runs around it? (What likely calls this code? What does it call?)
|
|
470
|
+
|
|
471
|
+
## 2. Environmental Constraints
|
|
472
|
+
What can we infer about the runtime environment?
|
|
473
|
+
- Language version and platform (derive from syntax, imports, idioms)
|
|
474
|
+
- Concurrency model (async, threaded, single-threaded, multiprocess?)
|
|
475
|
+
- Resource constraints (memory-sensitive? latency-sensitive? throughput?)
|
|
476
|
+
- Deployment context (serverless, container, long-running server, edge?)
|
|
477
|
+
|
|
478
|
+
## 3. Integration Contracts
|
|
479
|
+
What external systems does this code interact with (explicitly or implicitly)?
|
|
480
|
+
- APIs it calls or is called by (infer from HTTP clients, route handlers, etc.)
|
|
481
|
+
- Databases or data stores (infer from ORMs, query builders, file I/O)
|
|
482
|
+
- Message queues, caches, external services
|
|
483
|
+
- For each: what are the implicit contracts? (timeouts, retry semantics,
|
|
484
|
+
idempotency requirements, ordering guarantees)
|
|
485
|
+
|
|
486
|
+
## 4. Behavioral Invariants
|
|
487
|
+
What MUST be true about this code's behavior, regardless of implementation?
|
|
488
|
+
Derive from the spec AND from the system context:
|
|
489
|
+
- Functional invariants (output properties, state transitions)
|
|
490
|
+
- Safety invariants (must never: lose data, corrupt state, leak secrets, etc.)
|
|
491
|
+
- Liveness invariants (must always: eventually return, release resources, etc.)
|
|
492
|
+
- Idempotency requirements (is it safe to call twice with the same input?)
|
|
493
|
+
- Ordering constraints (must things happen in a specific sequence?)
|
|
494
|
+
|
|
495
|
+
## 5. Edge Case Surface
|
|
496
|
+
What environmental edge cases should be tested?
|
|
497
|
+
- What happens when dependencies are slow, down, or returning errors?
|
|
498
|
+
- What happens under concurrent access?
|
|
499
|
+
- What happens at resource limits (memory, file descriptors, connections)?
|
|
500
|
+
- What happens with malformed or unexpected upstream data?
|
|
501
|
+
- Clock skew, timezone issues, daylight savings transitions?
|
|
502
|
+
- Filesystem permissions, disk full, network partitions?
|
|
503
|
+
|
|
504
|
+
## 6. Implicit Assumptions
|
|
505
|
+
What assumptions has the code made that are NOT stated in the spec?
|
|
506
|
+
Flag each as:
|
|
507
|
+
- SAFE: Reasonable assumption, likely correct
|
|
508
|
+
- RISKY: Could be wrong depending on context
|
|
509
|
+
- DANGEROUS: Likely to cause issues in production
|
|
510
|
+
|
|
511
|
+
## 7. Function-Level Invariants
|
|
512
|
+
For each function defined in the code, derive specific behavioral invariants.
|
|
513
|
+
Output as a dict mapping function names to lists of invariants.
|
|
514
|
+
|
|
515
|
+
Output as structured JSON with all 7 sections."""
|
|
516
|
+
|
|
517
|
+
# Build user message
|
|
518
|
+
context_section = (
|
|
519
|
+
surrounding_files
|
|
520
|
+
if surrounding_files
|
|
521
|
+
else "No surrounding codebase provided. Derive context entirely from the code and specification."
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
user_message = f"""## Original Specification
|
|
525
|
+
{user_request}
|
|
526
|
+
|
|
527
|
+
## Generated Code
|
|
528
|
+
```
|
|
529
|
+
{generated_code}
|
|
530
|
+
```
|
|
531
|
+
|
|
532
|
+
{graph_metadata}
|
|
533
|
+
|
|
534
|
+
## Available Context
|
|
535
|
+
{context_section}"""
|
|
536
|
+
|
|
537
|
+
# Call LLM
|
|
538
|
+
messages = [
|
|
539
|
+
{"role": "system", "content": system_prompt},
|
|
540
|
+
{"role": "user", "content": user_message},
|
|
541
|
+
]
|
|
542
|
+
|
|
543
|
+
response = await self.provider.generate(messages)
|
|
544
|
+
response_text = response.get("text", "").strip()
|
|
545
|
+
|
|
546
|
+
# Parse JSON response
|
|
547
|
+
try:
|
|
548
|
+
# Extract JSON from markdown code blocks if present
|
|
549
|
+
if "```json" in response_text:
|
|
550
|
+
start = response_text.find("```json") + 7
|
|
551
|
+
end = response_text.find("```", start)
|
|
552
|
+
response_text = response_text[start:end].strip()
|
|
553
|
+
elif "```" in response_text:
|
|
554
|
+
start = response_text.find("```") + 3
|
|
555
|
+
end = response_text.find("```", start)
|
|
556
|
+
response_text = response_text[start:end].strip()
|
|
557
|
+
|
|
558
|
+
data = json.loads(response_text)
|
|
559
|
+
return ContextDerivation.from_dict(data)
|
|
560
|
+
|
|
561
|
+
except (json.JSONDecodeError, KeyError) as e:
|
|
562
|
+
raise ValueError(f"Failed to parse context derivation response: {e}\nResponse: {response_text}")
|
|
563
|
+
|
|
564
|
+
def _build_graph_metadata(self, graphs: CodeGraphs) -> str:
|
|
565
|
+
"""Build graph metadata section for prompt.
|
|
566
|
+
|
|
567
|
+
Args:
|
|
568
|
+
graphs: Code relationship graphs
|
|
569
|
+
|
|
570
|
+
Returns:
|
|
571
|
+
Formatted graph metadata string
|
|
572
|
+
"""
|
|
573
|
+
if graphs.function_count == 0:
|
|
574
|
+
return ""
|
|
575
|
+
|
|
576
|
+
metadata_parts = ["## Code Structure Analysis"]
|
|
577
|
+
metadata_parts.append(f"- Functions defined: {graphs.function_count}")
|
|
578
|
+
metadata_parts.append(f"- Classes defined: {graphs.class_count}")
|
|
579
|
+
|
|
580
|
+
# List functions
|
|
581
|
+
if graphs.function_count > 0:
|
|
582
|
+
functions = [
|
|
583
|
+
name
|
|
584
|
+
for name, info in graphs.export_map.items()
|
|
585
|
+
if info.symbol_type == "function"
|
|
586
|
+
]
|
|
587
|
+
metadata_parts.append(f"- Function names: {', '.join(functions[:10])}")
|
|
588
|
+
|
|
589
|
+
# Call graph summary
|
|
590
|
+
if graphs.call_graph.number_of_edges() > 0:
|
|
591
|
+
metadata_parts.append(
|
|
592
|
+
f"- Function calls detected: {graphs.call_graph.number_of_edges()} relationships"
|
|
593
|
+
)
|
|
594
|
+
|
|
595
|
+
return "\n".join(metadata_parts)
|
|
596
|
+
|
|
597
|
+
async def _store_in_history(
|
|
598
|
+
self,
|
|
599
|
+
session_id: str,
|
|
600
|
+
user_request: str,
|
|
601
|
+
generated_code: str,
|
|
602
|
+
context: ContextDerivation,
|
|
603
|
+
) -> None:
|
|
604
|
+
"""Store derivation results in history database.
|
|
605
|
+
|
|
606
|
+
Args:
|
|
607
|
+
session_id: Session identifier
|
|
608
|
+
user_request: User specification
|
|
609
|
+
generated_code: Generated code
|
|
610
|
+
context: Derived context
|
|
611
|
+
"""
|
|
612
|
+
try:
|
|
613
|
+
# Store code with embedding
|
|
614
|
+
code_record = CodeRecord(
|
|
615
|
+
code_id=f"{session_id}_code",
|
|
616
|
+
session_id=session_id,
|
|
617
|
+
code=generated_code,
|
|
618
|
+
embedding=context.embedding,
|
|
619
|
+
timestamp=datetime.now(),
|
|
620
|
+
)
|
|
621
|
+
self.history_db.store_code(code_record)
|
|
622
|
+
|
|
623
|
+
# Store oracle with embedding and versioning
|
|
624
|
+
oracle_embedding = self.embedder.embed_oracle(context.to_json())
|
|
625
|
+
|
|
626
|
+
# Determine version and parent
|
|
627
|
+
oracle_version = 1
|
|
628
|
+
parent_oracle_id = None
|
|
629
|
+
|
|
630
|
+
if context.retrieved_from:
|
|
631
|
+
# This oracle was adapted from a parent
|
|
632
|
+
parent_oracle_id = f"{context.retrieved_from}_oracle"
|
|
633
|
+
|
|
634
|
+
# Get parent oracle to determine version
|
|
635
|
+
try:
|
|
636
|
+
parent_oracles = self.history_db.get_all_oracle_embeddings()
|
|
637
|
+
parent = next(
|
|
638
|
+
(o for o in parent_oracles if o.oracle_id == parent_oracle_id),
|
|
639
|
+
None
|
|
640
|
+
)
|
|
641
|
+
if parent:
|
|
642
|
+
oracle_version = parent.oracle_version + 1
|
|
643
|
+
# Increment reuse count of parent
|
|
644
|
+
self.history_db.increment_oracle_reuse(parent_oracle_id)
|
|
645
|
+
logger.debug(f"Oracle adapted from v{parent.oracle_version}, creating v{oracle_version}")
|
|
646
|
+
except Exception as e:
|
|
647
|
+
logger.warning(f"Failed to get parent oracle version: {e}")
|
|
648
|
+
|
|
649
|
+
oracle_record = OracleRecord(
|
|
650
|
+
oracle_id=f"{session_id}_oracle",
|
|
651
|
+
session_id=session_id,
|
|
652
|
+
oracle=context.to_json(),
|
|
653
|
+
embedding=oracle_embedding,
|
|
654
|
+
quality_score=1.0, # Could be computed based on invariant count, etc.
|
|
655
|
+
timestamp=datetime.now(),
|
|
656
|
+
oracle_version=oracle_version,
|
|
657
|
+
parent_oracle_id=parent_oracle_id,
|
|
658
|
+
reuse_count=0,
|
|
659
|
+
)
|
|
660
|
+
self.history_db.store_oracle(oracle_record)
|
|
661
|
+
|
|
662
|
+
logger.debug(f"Stored derivation results for session {session_id} in history DB")
|
|
663
|
+
|
|
664
|
+
except Exception as e:
|
|
665
|
+
logger.warning(f"Failed to store in history DB: {e}")
|