ctrlcode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. ctrlcode/__init__.py +8 -0
  2. ctrlcode/agents/__init__.py +29 -0
  3. ctrlcode/agents/cleanup.py +388 -0
  4. ctrlcode/agents/communication.py +439 -0
  5. ctrlcode/agents/observability.py +421 -0
  6. ctrlcode/agents/react_loop.py +297 -0
  7. ctrlcode/agents/registry.py +211 -0
  8. ctrlcode/agents/result_parser.py +242 -0
  9. ctrlcode/agents/workflow.py +723 -0
  10. ctrlcode/analysis/__init__.py +28 -0
  11. ctrlcode/analysis/ast_diff.py +163 -0
  12. ctrlcode/analysis/bug_detector.py +149 -0
  13. ctrlcode/analysis/code_graphs.py +329 -0
  14. ctrlcode/analysis/semantic.py +205 -0
  15. ctrlcode/analysis/static.py +183 -0
  16. ctrlcode/analysis/synthesizer.py +281 -0
  17. ctrlcode/analysis/tests.py +189 -0
  18. ctrlcode/cleanup/__init__.py +16 -0
  19. ctrlcode/cleanup/auto_merge.py +350 -0
  20. ctrlcode/cleanup/doc_gardening.py +388 -0
  21. ctrlcode/cleanup/pr_automation.py +330 -0
  22. ctrlcode/cleanup/scheduler.py +356 -0
  23. ctrlcode/config.py +380 -0
  24. ctrlcode/embeddings/__init__.py +6 -0
  25. ctrlcode/embeddings/embedder.py +192 -0
  26. ctrlcode/embeddings/vector_store.py +213 -0
  27. ctrlcode/fuzzing/__init__.py +24 -0
  28. ctrlcode/fuzzing/analyzer.py +280 -0
  29. ctrlcode/fuzzing/budget.py +112 -0
  30. ctrlcode/fuzzing/context.py +665 -0
  31. ctrlcode/fuzzing/context_fuzzer.py +506 -0
  32. ctrlcode/fuzzing/derived_orchestrator.py +732 -0
  33. ctrlcode/fuzzing/oracle_adapter.py +135 -0
  34. ctrlcode/linters/__init__.py +11 -0
  35. ctrlcode/linters/hand_rolled_utils.py +221 -0
  36. ctrlcode/linters/yolo_parsing.py +217 -0
  37. ctrlcode/metrics/__init__.py +6 -0
  38. ctrlcode/metrics/dashboard.py +283 -0
  39. ctrlcode/metrics/tech_debt.py +663 -0
  40. ctrlcode/paths.py +68 -0
  41. ctrlcode/permissions.py +179 -0
  42. ctrlcode/providers/__init__.py +15 -0
  43. ctrlcode/providers/anthropic.py +138 -0
  44. ctrlcode/providers/base.py +77 -0
  45. ctrlcode/providers/openai.py +197 -0
  46. ctrlcode/providers/parallel.py +104 -0
  47. ctrlcode/server.py +871 -0
  48. ctrlcode/session/__init__.py +6 -0
  49. ctrlcode/session/baseline.py +57 -0
  50. ctrlcode/session/manager.py +967 -0
  51. ctrlcode/skills/__init__.py +10 -0
  52. ctrlcode/skills/builtin/commit.toml +29 -0
  53. ctrlcode/skills/builtin/docs.toml +25 -0
  54. ctrlcode/skills/builtin/refactor.toml +33 -0
  55. ctrlcode/skills/builtin/review.toml +28 -0
  56. ctrlcode/skills/builtin/test.toml +28 -0
  57. ctrlcode/skills/loader.py +111 -0
  58. ctrlcode/skills/registry.py +139 -0
  59. ctrlcode/storage/__init__.py +19 -0
  60. ctrlcode/storage/history_db.py +708 -0
  61. ctrlcode/tools/__init__.py +220 -0
  62. ctrlcode/tools/bash.py +112 -0
  63. ctrlcode/tools/browser.py +352 -0
  64. ctrlcode/tools/executor.py +153 -0
  65. ctrlcode/tools/explore.py +486 -0
  66. ctrlcode/tools/mcp.py +108 -0
  67. ctrlcode/tools/observability.py +561 -0
  68. ctrlcode/tools/registry.py +193 -0
  69. ctrlcode/tools/todo.py +291 -0
  70. ctrlcode/tools/update.py +266 -0
  71. ctrlcode/tools/webfetch.py +147 -0
  72. ctrlcode-0.1.0.dist-info/METADATA +93 -0
  73. ctrlcode-0.1.0.dist-info/RECORD +75 -0
  74. ctrlcode-0.1.0.dist-info/WHEEL +4 -0
  75. ctrlcode-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,506 @@
1
+ """Context-aware fuzzer - generates input and environment test cases."""
2
+
3
+ import json
4
+ import logging
5
+ from dataclasses import asdict, dataclass
6
+ from datetime import datetime
7
+ from typing import Any, Literal, Optional
8
+
9
+ import networkx as nx
10
+ import numpy as np
11
+
12
+ from ..analysis.bug_detector import BugPatternDetector
13
+ from ..analysis.code_graphs import CodeGraphs
14
+ from ..embeddings.embedder import CodeEmbedder
15
+ from ..embeddings.vector_store import VectorStore
16
+ from ..providers.base import Provider
17
+ from ..storage.history_db import HistoryDB, StoredTest
18
+ from .context import ContextDerivation
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ @dataclass
24
+ class EnvironmentScenario:
25
+ """Environment simulation scenario."""
26
+
27
+ description: str
28
+ mock_setup: dict[str, Any] # Dependency behaviors to simulate
29
+ expected_behavior: dict[str, Any] # Derived oracle
30
+ invariants_checked: list[str]
31
+
32
+
33
+ @dataclass
34
+ class FuzzTestCase:
35
+ """Single fuzz test case (input and/or environment)."""
36
+
37
+ id: str
38
+ type: Literal["input", "environment", "combined"]
39
+ input: Optional[dict[str, Any]] # Actual input (if type=input/combined)
40
+ environment: Optional[EnvironmentScenario] # Environment sim (if type=environment/combined)
41
+ rationale: str
42
+ category: Literal["exploit", "explore", "cover", "stress", "environment"]
43
+
44
+ def to_dict(self) -> dict:
45
+ """Convert to dictionary."""
46
+ return asdict(self)
47
+
48
+
49
+ class ContextAwareFuzzer:
50
+ """
51
+ Generates test cases for context-aware differential fuzzing.
52
+
53
+ Unlike traditional input fuzzing, generates TWO kinds of test cases:
54
+ - Input fuzzing: diverse inputs to the function
55
+ - Environment fuzzing: simulated environmental conditions
56
+
57
+ Phase 3: Enhanced with graph-aware expansion, bug pattern detection,
58
+ and test case clustering/deduplication.
59
+ """
60
+
61
+ def __init__(
62
+ self,
63
+ provider: Provider,
64
+ history_db: Optional[HistoryDB] = None,
65
+ embedder: Optional[CodeEmbedder] = None,
66
+ bug_detector: Optional[BugPatternDetector] = None,
67
+ graph_expansion_depth: int = 2,
68
+ test_clustering_threshold: float = 0.8,
69
+ ):
70
+ """
71
+ Initialize context-aware fuzzer.
72
+
73
+ Args:
74
+ provider: LLM provider for test generation
75
+ history_db: Optional history database for bug patterns and test clustering
76
+ embedder: Optional code embedder for test clustering
77
+ bug_detector: Optional bug pattern detector
78
+ graph_expansion_depth: Max depth for graph traversal (default: 2)
79
+ test_clustering_threshold: Similarity threshold for test deduplication (default: 0.8)
80
+ """
81
+ self.provider = provider
82
+ self.history_db = history_db
83
+ self.embedder = embedder or CodeEmbedder()
84
+ self.bug_detector = bug_detector
85
+ self.graph_expansion_depth = graph_expansion_depth
86
+ self.test_clustering_threshold = test_clustering_threshold
87
+
88
+ async def generate_tests(
89
+ self,
90
+ spec: str,
91
+ code: str,
92
+ context: ContextDerivation,
93
+ previous_results: Optional[list[dict]] = None,
94
+ batch_size: int = 20,
95
+ ) -> list[FuzzTestCase]:
96
+ """
97
+ Generate batch of test cases with derived expected behaviors.
98
+
99
+ Args:
100
+ spec: Original specification
101
+ code: Generated code
102
+ context: Derived context from ContextDerivationEngine
103
+ previous_results: Previous test results (for guided fuzzing)
104
+ batch_size: Number of tests to generate
105
+
106
+ Returns:
107
+ List of FuzzTestCase instances
108
+
109
+ Distribution:
110
+ - 40% environment scenarios (dependency failures, timing, concurrency)
111
+ - 30% input edge cases
112
+ - 20% combined (tricky input + hostile environment)
113
+ - 10% invariant-focused (directly test each behavioral invariant)
114
+ """
115
+ # Phase 3: Check for similar bug patterns before generating tests
116
+ bug_warnings = ""
117
+ if self.bug_detector:
118
+ similar_bugs = self.bug_detector.check_patterns(code)
119
+ if similar_bugs:
120
+ bug_warnings = self._build_bug_warnings_section(similar_bugs)
121
+ logger.warning(f"Detected {len(similar_bugs)} similar bug patterns")
122
+
123
+ # Phase 3: Build graph expansion context for environment scenarios
124
+ graph_context = ""
125
+ if context.code_graphs:
126
+ graph_context = self._build_graph_context_section(context.code_graphs, context)
127
+
128
+ # Build system prompt (from DIFFFUZZTEST.md lines 232-294)
129
+ system_prompt = """You are a fuzz test generator for differential testing. Unlike traditional
130
+ input fuzzing, you generate TWO kinds of test cases:
131
+
132
+ **A) Input Fuzzing** — diverse inputs to the function itself
133
+ **B) Environment Fuzzing** — simulated environmental conditions that test
134
+ how the code behaves in its derived system context
135
+
136
+ You will be given:
137
+ 1. The original specification
138
+ 2. The generated code
139
+ 3. A Context Derivation Report (system placement, constraints, invariants,
140
+ edge cases, and implicit assumptions)
141
+ 4. Previous test results and analyses (if any)
142
+
143
+ ## For Input Fuzzing:
144
+ Generate diverse inputs as before — edge cases, boundary values, adversarial
145
+ inputs. Focus on the behavioral invariants from the context report.
146
+
147
+ ## For Environment Fuzzing:
148
+ Generate SCENARIOS that simulate environmental conditions:
149
+ - Dependency failures (API returns 500, database times out, etc.)
150
+ - Resource pressure (slow responses, connection limits hit)
151
+ - Concurrency scenarios (parallel calls, race conditions)
152
+ - Configuration edge cases (missing env vars, wrong permissions)
153
+ - Temporal edge cases (clock skew, timezone boundaries)
154
+
155
+ Each environment scenario specifies:
156
+ - What mocks/stubs to set up (dependency behavior)
157
+ - What conditions to simulate (timing, ordering, resource state)
158
+ - What the EXPECTED behavior is (derived from the invariants)
159
+
160
+ Output format — JSON array:
161
+ [
162
+ {
163
+ "id": "fuzz_001",
164
+ "type": "input | environment | combined",
165
+ "input": { ... },
166
+ "environment": {
167
+ "description": "API returns 429 on first 2 calls, then 200",
168
+ "mock_setup": {
169
+ "responses": [
170
+ {"status": 429, "headers": {"Retry-After": "1"}, "delay_ms": 0},
171
+ {"status": 429, "headers": {}, "delay_ms": 0},
172
+ {"status": 200, "body": {"ok": true}, "delay_ms": 50}
173
+ ]
174
+ },
175
+ "expected_behavior": {
176
+ "should_succeed": true,
177
+ "min_total_time_ms": 1000,
178
+ "max_retries": 2,
179
+ "invariants_checked": ["backoff_increases", "respects_retry_after"]
180
+ },
181
+ "invariants_checked": ["backoff_increases", "respects_retry_after"]
182
+ },
183
+ "rationale": "Tests basic retry-then-succeed with Retry-After header",
184
+ "category": "exploit | explore | cover | stress | environment"
185
+ }
186
+ ]
187
+
188
+ CRITICAL: The `expected_behavior` field is your DERIVED ORACLE. Since there
189
+ is no real system to compare against, the expected behavior IS the test
190
+ oracle. Derive it strictly from the specification and the behavioral
191
+ invariants in the context report."""
192
+
193
+ # Build user message
194
+ history_section = (
195
+ json.dumps(previous_results, indent=2)
196
+ if previous_results
197
+ else "First iteration, no previous results."
198
+ )
199
+
200
+ user_message = f"""## Specification
201
+ {spec}
202
+
203
+ ## Generated Code
204
+ ```
205
+ {code}
206
+ ```
207
+
208
+ ## Context Derivation Report
209
+ {context.to_json()}
210
+
211
+ {graph_context}
212
+
213
+ {bug_warnings}
214
+
215
+ ## Previous Results
216
+ {history_section}
217
+
218
+ Generate a batch of {batch_size} test cases.
219
+ Distribution:
220
+ - 40% environment scenarios (dependency failures, timing, concurrency)
221
+ - 30% input edge cases
222
+ - 20% combined (tricky input + hostile environment)
223
+ - 10% invariant-focused (directly test each behavioral invariant)"""
224
+
225
+ # Call LLM
226
+ messages = [
227
+ {"role": "system", "content": system_prompt},
228
+ {"role": "user", "content": user_message},
229
+ ]
230
+
231
+ response = await self.provider.generate(messages)
232
+ response_text = response.get("text", "").strip()
233
+
234
+ # Parse JSON response
235
+ try:
236
+ # Extract JSON from markdown code blocks if present
237
+ if "```json" in response_text:
238
+ start = response_text.find("```json") + 7
239
+ end = response_text.find("```", start)
240
+ response_text = response_text[start:end].strip()
241
+ elif "```" in response_text:
242
+ start = response_text.find("```") + 3
243
+ end = response_text.find("```", start)
244
+ response_text = response_text[start:end].strip()
245
+
246
+ data = json.loads(response_text)
247
+
248
+ # Convert to FuzzTestCase instances
249
+ test_cases = []
250
+ for item in data:
251
+ # Build environment scenario if present
252
+ env_scenario = None
253
+ if item.get("environment"):
254
+ env_data = item["environment"]
255
+ env_scenario = EnvironmentScenario(
256
+ description=env_data["description"],
257
+ mock_setup=env_data["mock_setup"],
258
+ expected_behavior=env_data["expected_behavior"],
259
+ invariants_checked=env_data.get("invariants_checked", []),
260
+ )
261
+
262
+ test_case = FuzzTestCase(
263
+ id=item["id"],
264
+ type=item["type"],
265
+ input=item.get("input"),
266
+ environment=env_scenario,
267
+ rationale=item["rationale"],
268
+ category=item["category"],
269
+ )
270
+ test_cases.append(test_case)
271
+
272
+ return test_cases
273
+
274
+ except (json.JSONDecodeError, KeyError) as e:
275
+ raise ValueError(f"Failed to parse fuzzer response: {e}\nResponse: {response_text}")
276
+
277
+ # Phase 3: Cluster and deduplicate tests
278
+ if self.history_db:
279
+ test_cases = await self._cluster_and_deduplicate_tests(test_cases, context)
280
+
281
+ return test_cases
282
+
283
+ def _expand_graph_context(
284
+ self,
285
+ graphs: CodeGraphs,
286
+ start_functions: list[str],
287
+ max_depth: int = 2,
288
+ ) -> set[str]:
289
+ """Expand graph context using depth-limited BFS traversal.
290
+
291
+ Args:
292
+ graphs: Code relationship graphs
293
+ start_functions: Starting function nodes
294
+ max_depth: Maximum traversal depth (default: 2)
295
+
296
+ Returns:
297
+ Set of function names reachable within max_depth
298
+ """
299
+ if not graphs or not graphs.call_graph:
300
+ return set(start_functions)
301
+
302
+ reachable = set(start_functions)
303
+
304
+ for start_func in start_functions:
305
+ if start_func not in graphs.call_graph:
306
+ continue
307
+
308
+ # BFS traversal
309
+ visited = {start_func}
310
+ queue = [(start_func, 0)] # (node, depth)
311
+
312
+ while queue:
313
+ current, depth = queue.pop(0)
314
+
315
+ if depth >= max_depth:
316
+ continue
317
+
318
+ # Expand to callees (functions called by current)
319
+ for callee in graphs.get_callees(current):
320
+ if callee not in visited:
321
+ visited.add(callee)
322
+ reachable.add(callee)
323
+ queue.append((callee, depth + 1))
324
+
325
+ # Expand to callers (functions that call current)
326
+ for caller in graphs.get_callers(current):
327
+ if caller not in visited:
328
+ visited.add(caller)
329
+ reachable.add(caller)
330
+ queue.append((caller, depth + 1))
331
+
332
+ logger.debug(f"Graph expansion: {len(start_functions)} → {len(reachable)} functions")
333
+ return reachable
334
+
335
+ async def _cluster_and_deduplicate_tests(
336
+ self,
337
+ test_cases: list[FuzzTestCase],
338
+ context: ContextDerivation,
339
+ ) -> list[FuzzTestCase]:
340
+ """Cluster test cases and remove duplicates.
341
+
342
+ Args:
343
+ test_cases: Generated test cases
344
+ context: Context derivation with code graphs
345
+
346
+ Returns:
347
+ Deduplicated test cases (best test per cluster)
348
+ """
349
+ if len(test_cases) <= 1:
350
+ return test_cases
351
+
352
+ # Embed all test cases in batch for efficiency
353
+ test_strs = [json.dumps(test.to_dict(), sort_keys=True) for test in test_cases]
354
+ embeddings_array = self.embedder.embed_batch(test_strs)
355
+
356
+ # Cluster by similarity
357
+ clusters = self._cluster_by_similarity(
358
+ embeddings_array,
359
+ threshold=self.test_clustering_threshold,
360
+ )
361
+
362
+ # Keep best test per cluster
363
+ deduplicated = []
364
+ for cluster in clusters:
365
+ # Pick the test with most comprehensive rationale (as proxy for quality)
366
+ best_test = max(
367
+ (test_cases[i] for i in cluster),
368
+ key=lambda t: len(t.rationale),
369
+ )
370
+ deduplicated.append(best_test)
371
+
372
+ removed_count = len(test_cases) - len(deduplicated)
373
+ if removed_count > 0:
374
+ logger.info(f"Test clustering: removed {removed_count} duplicate tests")
375
+
376
+ return deduplicated
377
+
378
+ def _cluster_by_similarity(
379
+ self,
380
+ embeddings: np.ndarray,
381
+ threshold: float,
382
+ ) -> list[list[int]]:
383
+ """Cluster embeddings by cosine similarity.
384
+
385
+ Args:
386
+ embeddings: Array of embeddings (shape: [n, dim])
387
+ threshold: Similarity threshold (tests with similarity > threshold are clustered)
388
+
389
+ Returns:
390
+ List of clusters (each cluster is list of indices)
391
+ """
392
+ n = len(embeddings)
393
+ if n == 0:
394
+ return []
395
+
396
+ # Compute pairwise cosine similarities
397
+ # For normalized embeddings, cosine similarity = dot product
398
+ similarities = embeddings @ embeddings.T
399
+
400
+ # Build clusters using greedy approach
401
+ clusters = []
402
+ assigned = set()
403
+
404
+ for i in range(n):
405
+ if i in assigned:
406
+ continue
407
+
408
+ # Start new cluster with i
409
+ cluster = [i]
410
+ assigned.add(i)
411
+
412
+ # Add all similar unassigned items to this cluster
413
+ for j in range(i + 1, n):
414
+ if j in assigned:
415
+ continue
416
+
417
+ if similarities[i, j] >= threshold:
418
+ cluster.append(j)
419
+ assigned.add(j)
420
+
421
+ clusters.append(cluster)
422
+
423
+ return clusters
424
+
425
+ def _build_bug_warnings_section(self, similar_bugs: list) -> str:
426
+ """Build bug warnings section for prompt.
427
+
428
+ Args:
429
+ similar_bugs: List of DetectedPattern instances
430
+
431
+ Returns:
432
+ Formatted bug warnings section
433
+ """
434
+ if not similar_bugs:
435
+ return ""
436
+
437
+ lines = ["## ⚠️ Similar Bug Patterns Detected"]
438
+ lines.append(
439
+ "The following bug patterns from historical fuzzing sessions are similar to this code:"
440
+ )
441
+ lines.append("")
442
+
443
+ for i, bug in enumerate(similar_bugs[:5], 1): # Top 5 bugs
444
+ lines.append(
445
+ f"{i}. **{bug.description}** (Severity: {bug.severity}, Confidence: {bug.confidence})"
446
+ )
447
+ lines.append(f" Similarity: {bug.similarity:.1%}")
448
+ lines.append("")
449
+
450
+ lines.append(
451
+ "**IMPORTANT**: Generate targeted test cases that specifically check for these bug patterns."
452
+ )
453
+ return "\n".join(lines)
454
+
455
+ def _build_graph_context_section(
456
+ self,
457
+ graphs: CodeGraphs,
458
+ context: ContextDerivation,
459
+ ) -> str:
460
+ """Build graph-aware context section for environment fuzzing.
461
+
462
+ Args:
463
+ graphs: Code relationship graphs
464
+ context: Context derivation
465
+
466
+ Returns:
467
+ Formatted graph context section
468
+ """
469
+ if not graphs or graphs.function_count == 0:
470
+ return ""
471
+
472
+ lines = ["## Graph-Aware Context for Environment Fuzzing"]
473
+
474
+ # Get all functions
475
+ functions = [
476
+ name for name, info in graphs.export_map.items() if info.symbol_type == "function"
477
+ ]
478
+
479
+ if functions:
480
+ lines.append(f"Defined functions ({len(functions)}): {', '.join(functions[:10])}")
481
+
482
+ # Expand graph context for environment scenarios
483
+ expanded_functions = self._expand_graph_context(
484
+ graphs, functions, max_depth=self.graph_expansion_depth
485
+ )
486
+
487
+ if len(expanded_functions) > len(functions):
488
+ additional = len(expanded_functions) - len(functions)
489
+ lines.append(
490
+ f"Transitive dependencies ({additional} additional functions reachable via calls)"
491
+ )
492
+
493
+ # Integration contracts that might need mocking
494
+ if context.integration_contracts:
495
+ lines.append("")
496
+ lines.append("Functions that likely interact with external systems:")
497
+ for contract in context.integration_contracts[:5]:
498
+ lines.append(f" - {contract.system}: {contract.contract}")
499
+
500
+ lines.append("")
501
+ lines.append(
502
+ "Use the call graph to generate comprehensive environment scenarios "
503
+ "that test transitive dependency failures."
504
+ )
505
+
506
+ return "\n".join(lines)