ctrlcode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. ctrlcode/__init__.py +8 -0
  2. ctrlcode/agents/__init__.py +29 -0
  3. ctrlcode/agents/cleanup.py +388 -0
  4. ctrlcode/agents/communication.py +439 -0
  5. ctrlcode/agents/observability.py +421 -0
  6. ctrlcode/agents/react_loop.py +297 -0
  7. ctrlcode/agents/registry.py +211 -0
  8. ctrlcode/agents/result_parser.py +242 -0
  9. ctrlcode/agents/workflow.py +723 -0
  10. ctrlcode/analysis/__init__.py +28 -0
  11. ctrlcode/analysis/ast_diff.py +163 -0
  12. ctrlcode/analysis/bug_detector.py +149 -0
  13. ctrlcode/analysis/code_graphs.py +329 -0
  14. ctrlcode/analysis/semantic.py +205 -0
  15. ctrlcode/analysis/static.py +183 -0
  16. ctrlcode/analysis/synthesizer.py +281 -0
  17. ctrlcode/analysis/tests.py +189 -0
  18. ctrlcode/cleanup/__init__.py +16 -0
  19. ctrlcode/cleanup/auto_merge.py +350 -0
  20. ctrlcode/cleanup/doc_gardening.py +388 -0
  21. ctrlcode/cleanup/pr_automation.py +330 -0
  22. ctrlcode/cleanup/scheduler.py +356 -0
  23. ctrlcode/config.py +380 -0
  24. ctrlcode/embeddings/__init__.py +6 -0
  25. ctrlcode/embeddings/embedder.py +192 -0
  26. ctrlcode/embeddings/vector_store.py +213 -0
  27. ctrlcode/fuzzing/__init__.py +24 -0
  28. ctrlcode/fuzzing/analyzer.py +280 -0
  29. ctrlcode/fuzzing/budget.py +112 -0
  30. ctrlcode/fuzzing/context.py +665 -0
  31. ctrlcode/fuzzing/context_fuzzer.py +506 -0
  32. ctrlcode/fuzzing/derived_orchestrator.py +732 -0
  33. ctrlcode/fuzzing/oracle_adapter.py +135 -0
  34. ctrlcode/linters/__init__.py +11 -0
  35. ctrlcode/linters/hand_rolled_utils.py +221 -0
  36. ctrlcode/linters/yolo_parsing.py +217 -0
  37. ctrlcode/metrics/__init__.py +6 -0
  38. ctrlcode/metrics/dashboard.py +283 -0
  39. ctrlcode/metrics/tech_debt.py +663 -0
  40. ctrlcode/paths.py +68 -0
  41. ctrlcode/permissions.py +179 -0
  42. ctrlcode/providers/__init__.py +15 -0
  43. ctrlcode/providers/anthropic.py +138 -0
  44. ctrlcode/providers/base.py +77 -0
  45. ctrlcode/providers/openai.py +197 -0
  46. ctrlcode/providers/parallel.py +104 -0
  47. ctrlcode/server.py +871 -0
  48. ctrlcode/session/__init__.py +6 -0
  49. ctrlcode/session/baseline.py +57 -0
  50. ctrlcode/session/manager.py +967 -0
  51. ctrlcode/skills/__init__.py +10 -0
  52. ctrlcode/skills/builtin/commit.toml +29 -0
  53. ctrlcode/skills/builtin/docs.toml +25 -0
  54. ctrlcode/skills/builtin/refactor.toml +33 -0
  55. ctrlcode/skills/builtin/review.toml +28 -0
  56. ctrlcode/skills/builtin/test.toml +28 -0
  57. ctrlcode/skills/loader.py +111 -0
  58. ctrlcode/skills/registry.py +139 -0
  59. ctrlcode/storage/__init__.py +19 -0
  60. ctrlcode/storage/history_db.py +708 -0
  61. ctrlcode/tools/__init__.py +220 -0
  62. ctrlcode/tools/bash.py +112 -0
  63. ctrlcode/tools/browser.py +352 -0
  64. ctrlcode/tools/executor.py +153 -0
  65. ctrlcode/tools/explore.py +486 -0
  66. ctrlcode/tools/mcp.py +108 -0
  67. ctrlcode/tools/observability.py +561 -0
  68. ctrlcode/tools/registry.py +193 -0
  69. ctrlcode/tools/todo.py +291 -0
  70. ctrlcode/tools/update.py +266 -0
  71. ctrlcode/tools/webfetch.py +147 -0
  72. ctrlcode-0.1.0.dist-info/METADATA +93 -0
  73. ctrlcode-0.1.0.dist-info/RECORD +75 -0
  74. ctrlcode-0.1.0.dist-info/WHEEL +4 -0
  75. ctrlcode-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,205 @@
1
+ """Semantic analysis using LLM to compare code behavior."""
2
+
3
+ from dataclasses import dataclass
4
+
5
+ from ..providers.base import Provider
6
+
7
+
8
+ @dataclass
9
+ class SemanticDiff:
10
+ """Result of semantic comparison."""
11
+
12
+ variant_id: str
13
+ agreement_score: float # 0.0 to 1.0
14
+ behavioral_differences: list[str]
15
+ correctness_assessment: str # "correct", "incorrect", "uncertain"
16
+ edge_cases: list[str]
17
+ improvements: list[str]
18
+ concerns: list[str]
19
+
20
+ def __post_init__(self):
21
+ if not self.behavioral_differences:
22
+ self.behavioral_differences = []
23
+ if not self.edge_cases:
24
+ self.edge_cases = []
25
+ if not self.improvements:
26
+ self.improvements = []
27
+ if not self.concerns:
28
+ self.concerns = []
29
+
30
+
31
+ class SemanticAnalyzer:
32
+ """Analyzes code semantics using LLM."""
33
+
34
+ def __init__(self, provider: Provider):
35
+ """
36
+ Initialize semantic analyzer.
37
+
38
+ Args:
39
+ provider: LLM provider for analysis
40
+ """
41
+ self.provider = provider
42
+
43
+ async def compare(
44
+ self,
45
+ baseline: str,
46
+ variant: str,
47
+ variant_id: str,
48
+ user_request: str,
49
+ ) -> SemanticDiff:
50
+ """
51
+ Compare semantic behavior of baseline and variant.
52
+
53
+ Args:
54
+ baseline: Baseline code
55
+ variant: Variant code
56
+ variant_id: Identifier for variant
57
+ user_request: Original user request
58
+
59
+ Returns:
60
+ SemanticDiff with behavioral comparison
61
+ """
62
+ prompt = self._build_comparison_prompt(
63
+ baseline=baseline,
64
+ variant=variant,
65
+ user_request=user_request
66
+ )
67
+
68
+ # Get analysis from LLM
69
+ messages = [{"role": "user", "content": prompt}]
70
+ response = await self.provider.generate(messages, temperature=0.3)
71
+
72
+ # Parse response
73
+ analysis_text = response.get("text", "")
74
+ return self._parse_analysis(analysis_text, variant_id)
75
+
76
+ def _build_comparison_prompt(
77
+ self,
78
+ baseline: str,
79
+ variant: str,
80
+ user_request: str
81
+ ) -> str:
82
+ """Build prompt for semantic comparison."""
83
+ return f"""Compare these two code implementations for the request: "{user_request}"
84
+
85
+ Baseline:
86
+ ```python
87
+ {baseline}
88
+ ```
89
+
90
+ Variant:
91
+ ```python
92
+ {variant}
93
+ ```
94
+
95
+ Analyze the following:
96
+
97
+ 1. **Behavioral Equivalence**: Do they produce the same behavior? Score 0-10.
98
+
99
+ 2. **Differences**: What are the semantic differences?
100
+ - Different approach?
101
+ - Different edge case handling?
102
+ - Different error handling?
103
+
104
+ 3. **Correctness**: Which implementation is more correct for the request?
105
+ - State "correct", "incorrect", or "uncertain"
106
+ - Explain why
107
+
108
+ 4. **Edge Cases**: What edge cases does each handle (or not handle)?
109
+
110
+ 5. **Improvements**: What does the variant do better than baseline?
111
+
112
+ 6. **Concerns**: Any issues, bugs, or problems in the variant?
113
+
114
+ Format your response as:
115
+ SCORE: <0-10>
116
+ DIFFERENCES:
117
+ - <difference 1>
118
+ - <difference 2>
119
+ CORRECTNESS: <correct|incorrect|uncertain>
120
+ REASON: <explanation>
121
+ EDGE_CASES:
122
+ - <edge case 1>
123
+ - <edge case 2>
124
+ IMPROVEMENTS:
125
+ - <improvement 1>
126
+ - <improvement 2>
127
+ CONCERNS:
128
+ - <concern 1>
129
+ - <concern 2>
130
+ """
131
+
132
+ def _parse_analysis(self, text: str, variant_id: str) -> SemanticDiff:
133
+ """
134
+ Parse LLM analysis response.
135
+
136
+ Args:
137
+ text: Analysis text from LLM
138
+ variant_id: Variant identifier
139
+
140
+ Returns:
141
+ SemanticDiff with parsed results
142
+ """
143
+ lines = text.strip().split("\n")
144
+
145
+ # Extract sections
146
+ score = 5.0 # Default
147
+ differences = []
148
+ correctness = "uncertain"
149
+ edge_cases = []
150
+ improvements = []
151
+ concerns = []
152
+
153
+ current_section = None
154
+
155
+ for line in lines:
156
+ line = line.strip()
157
+
158
+ if line.startswith("SCORE:"):
159
+ try:
160
+ score_text = line.split(":", 1)[1].strip()
161
+ score = float(score_text.split()[0]) / 10.0 # Normalize to 0-1
162
+ except (ValueError, IndexError):
163
+ pass
164
+
165
+ elif line.startswith("DIFFERENCES:"):
166
+ current_section = "differences"
167
+
168
+ elif line.startswith("CORRECTNESS:"):
169
+ parts = line.split(":", 1)
170
+ if len(parts) > 1:
171
+ correctness = parts[1].strip().lower()
172
+ current_section = None
173
+
174
+ elif line.startswith("EDGE_CASES:"):
175
+ current_section = "edge_cases"
176
+
177
+ elif line.startswith("IMPROVEMENTS:"):
178
+ current_section = "improvements"
179
+
180
+ elif line.startswith("CONCERNS:"):
181
+ current_section = "concerns"
182
+
183
+ elif line.startswith("REASON:"):
184
+ current_section = None
185
+
186
+ elif line.startswith("-") and current_section:
187
+ item = line.lstrip("- ").strip()
188
+ if current_section == "differences":
189
+ differences.append(item)
190
+ elif current_section == "edge_cases":
191
+ edge_cases.append(item)
192
+ elif current_section == "improvements":
193
+ improvements.append(item)
194
+ elif current_section == "concerns":
195
+ concerns.append(item)
196
+
197
+ return SemanticDiff(
198
+ variant_id=variant_id,
199
+ agreement_score=score,
200
+ behavioral_differences=differences,
201
+ correctness_assessment=correctness,
202
+ edge_cases=edge_cases,
203
+ improvements=improvements,
204
+ concerns=concerns,
205
+ )
@@ -0,0 +1,183 @@
1
+ """Static analysis for code quality metrics."""
2
+
3
+ import subprocess
4
+ import tempfile
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+
8
+
9
+ @dataclass
10
+ class StaticAnalysisResult:
11
+ """Result of static analysis."""
12
+
13
+ variant_id: str
14
+ type_errors: int = 0
15
+ lint_issues: int = 0
16
+ complexity_score: float = 0.0
17
+ maintainability_index: float = 0.0
18
+ quality_score: float = 0.0 # Overall 0-1 score
19
+
20
+
21
+ class StaticAnalyzer:
22
+ """Performs static analysis on code."""
23
+
24
+ def analyze(self, code: str, variant_id: str) -> StaticAnalysisResult:
25
+ """
26
+ Run static analysis on code.
27
+
28
+ Args:
29
+ code: Code to analyze
30
+ variant_id: Identifier for variant
31
+
32
+ Returns:
33
+ StaticAnalysisResult with quality metrics
34
+ """
35
+ with tempfile.TemporaryDirectory() as tmpdir:
36
+ tmppath = Path(tmpdir)
37
+ code_file = tmppath / "code.py"
38
+ code_file.write_text(code)
39
+
40
+ # Run analyses
41
+ type_errors = self._run_mypy(code_file)
42
+ lint_issues = self._run_ruff(code_file)
43
+ complexity = self._calculate_complexity(code)
44
+
45
+ # Calculate overall quality score
46
+ quality = self._calculate_quality(
47
+ type_errors=type_errors,
48
+ lint_issues=lint_issues,
49
+ complexity=complexity,
50
+ )
51
+
52
+ return StaticAnalysisResult(
53
+ variant_id=variant_id,
54
+ type_errors=type_errors,
55
+ lint_issues=lint_issues,
56
+ complexity_score=complexity,
57
+ quality_score=quality,
58
+ )
59
+
60
+ def _run_mypy(self, file_path: Path) -> int:
61
+ """
62
+ Run mypy type checker.
63
+
64
+ Args:
65
+ file_path: Path to code file
66
+
67
+ Returns:
68
+ Number of type errors
69
+ """
70
+ try:
71
+ result = subprocess.run(
72
+ ["mypy", str(file_path), "--ignore-missing-imports"],
73
+ capture_output=True,
74
+ text=True,
75
+ timeout=10,
76
+ )
77
+
78
+ # Count error lines
79
+ errors = 0
80
+ for line in result.stdout.split("\n"):
81
+ if "error:" in line.lower():
82
+ errors += 1
83
+
84
+ return errors
85
+
86
+ except (subprocess.TimeoutExpired, FileNotFoundError):
87
+ return 0 # If mypy unavailable, assume no errors
88
+
89
+ def _run_ruff(self, file_path: Path) -> int:
90
+ """
91
+ Run ruff linter.
92
+
93
+ Args:
94
+ file_path: Path to code file
95
+
96
+ Returns:
97
+ Number of lint issues
98
+ """
99
+ try:
100
+ result = subprocess.run(
101
+ ["ruff", "check", str(file_path)],
102
+ capture_output=True,
103
+ text=True,
104
+ timeout=10,
105
+ )
106
+
107
+ # Count issue lines
108
+ issues = 0
109
+ for line in result.stdout.split("\n"):
110
+ if line.strip() and not line.startswith("Found"):
111
+ issues += 1
112
+
113
+ return issues
114
+
115
+ except (subprocess.TimeoutExpired, FileNotFoundError):
116
+ return 0 # If ruff unavailable, assume no issues
117
+
118
+ def _calculate_complexity(self, code: str) -> float:
119
+ """
120
+ Calculate code complexity.
121
+
122
+ Uses simple heuristics:
123
+ - Lines of code
124
+ - Nesting depth
125
+ - Number of branches
126
+
127
+ Args:
128
+ code: Code to analyze
129
+
130
+ Returns:
131
+ Complexity score (lower is better)
132
+ """
133
+ lines = [line for line in code.split("\n") if line.strip() and not line.strip().startswith("#")]
134
+ loc = len(lines)
135
+
136
+ # Calculate max nesting depth
137
+ max_depth = 0
138
+
139
+ for line in lines:
140
+ # Count leading spaces
141
+ stripped = line.lstrip()
142
+ if stripped:
143
+ indent = len(line) - len(stripped)
144
+ depth = indent // 4 # Assuming 4-space indents
145
+ max_depth = max(max_depth, depth)
146
+
147
+ # Count branches (if, for, while, try)
148
+ branches = sum(
149
+ 1 for line in lines
150
+ if any(line.strip().startswith(kw) for kw in ["if ", "elif ", "for ", "while ", "try:", "except"])
151
+ )
152
+
153
+ # Complexity = weighted sum
154
+ complexity = (loc * 0.1) + (max_depth * 2) + (branches * 1.5)
155
+
156
+ return complexity
157
+
158
+ def _calculate_quality(
159
+ self,
160
+ type_errors: int,
161
+ lint_issues: int,
162
+ complexity: float,
163
+ ) -> float:
164
+ """
165
+ Calculate overall quality score.
166
+
167
+ Args:
168
+ type_errors: Number of type errors
169
+ lint_issues: Number of lint issues
170
+ complexity: Complexity score
171
+
172
+ Returns:
173
+ Quality score between 0.0 and 1.0 (higher is better)
174
+ """
175
+ # Penalties
176
+ type_penalty = min(type_errors * 0.1, 0.4) # Max 40% penalty
177
+ lint_penalty = min(lint_issues * 0.05, 0.3) # Max 30% penalty
178
+ complexity_penalty = min(complexity / 50, 0.3) # Max 30% penalty
179
+
180
+ # Start with perfect score and subtract penalties
181
+ score = 1.0 - (type_penalty + lint_penalty + complexity_penalty)
182
+
183
+ return max(0.0, min(1.0, score))
@@ -0,0 +1,281 @@
1
+ """Feedback synthesis from differential analysis."""
2
+
3
+ from dataclasses import dataclass
4
+
5
+ from .ast_diff import ASTDiff
6
+ from .semantic import SemanticDiff
7
+ from .tests import TestResult
8
+ from .static import StaticAnalysisResult
9
+
10
+
11
+ @dataclass
12
+ class AnalysisResult:
13
+ """Complete analysis from all methods."""
14
+
15
+ structural_diffs: list[ASTDiff]
16
+ semantic_diffs: list[SemanticDiff]
17
+ test_results: list[TestResult]
18
+ static_results: list[StaticAnalysisResult]
19
+ elapsed_time: float
20
+
21
+
22
+ @dataclass
23
+ class Feedback:
24
+ """Synthesized feedback for improvement."""
25
+
26
+ improvement_prompt: str
27
+ quality_score: float
28
+ consensus: list[str]
29
+ discrepancies: list[str]
30
+ changes_made: list[str]
31
+
32
+
33
+ class FeedbackSynthesizer:
34
+ """Synthesizes analysis results into actionable feedback."""
35
+
36
+ def __init__(self, weights: dict[str, float] | None = None):
37
+ """
38
+ Initialize feedback synthesizer.
39
+
40
+ Args:
41
+ weights: Quality scoring weights for each analysis type
42
+ """
43
+ self.weights = weights or {
44
+ "tests": 0.4,
45
+ "static": 0.3,
46
+ "semantic": 0.3,
47
+ }
48
+
49
+ def synthesize(
50
+ self,
51
+ analysis: AnalysisResult,
52
+ iteration: int,
53
+ user_request: str,
54
+ ) -> Feedback:
55
+ """
56
+ Generate improvement feedback from analysis.
57
+
58
+ Args:
59
+ analysis: Complete analysis results
60
+ iteration: Current iteration number
61
+ user_request: Original user request
62
+
63
+ Returns:
64
+ Feedback with improvement prompt and scores
65
+ """
66
+ # Extract consensus patterns
67
+ consensus = self._find_consensus(analysis)
68
+
69
+ # Extract discrepancies
70
+ discrepancies = self._find_discrepancies(analysis)
71
+
72
+ # Calculate quality score
73
+ quality_score = self._calculate_quality(analysis)
74
+
75
+ # Build improvement prompt
76
+ improvement_prompt = self._build_improvement_prompt(
77
+ consensus=consensus,
78
+ discrepancies=discrepancies,
79
+ analysis=analysis,
80
+ iteration=iteration,
81
+ user_request=user_request,
82
+ )
83
+
84
+ return Feedback(
85
+ improvement_prompt=improvement_prompt,
86
+ quality_score=quality_score,
87
+ consensus=consensus,
88
+ discrepancies=discrepancies,
89
+ changes_made=[], # Will be populated after improvement
90
+ )
91
+
92
+ def _find_consensus(self, analysis: AnalysisResult) -> list[str]:
93
+ """
94
+ Find patterns agreed upon by all variants.
95
+
96
+ Args:
97
+ analysis: Analysis results
98
+
99
+ Returns:
100
+ List of consensus patterns
101
+ """
102
+ consensus = []
103
+
104
+ # Check semantic agreement
105
+ if analysis.semantic_diffs:
106
+ high_agreement = [
107
+ d for d in analysis.semantic_diffs
108
+ if d.agreement_score > 0.8
109
+ ]
110
+ if len(high_agreement) == len(analysis.semantic_diffs):
111
+ consensus.append("All variants show high semantic agreement")
112
+
113
+ # Check structural similarity
114
+ if analysis.structural_diffs:
115
+ similar = [
116
+ d for d in analysis.structural_diffs
117
+ if d.structural_similarity > 0.8
118
+ ]
119
+ if len(similar) == len(analysis.structural_diffs):
120
+ consensus.append("All variants have similar structure")
121
+
122
+ # Check test passing
123
+ if analysis.test_results:
124
+ all_passed = all(t.passed for t in analysis.test_results)
125
+ if all_passed:
126
+ consensus.append("All variants pass tests")
127
+
128
+ # Check common improvements
129
+ improvements: dict[str, int] = {}
130
+ for sem_diff in analysis.semantic_diffs:
131
+ for imp in sem_diff.improvements:
132
+ improvements[imp] = improvements.get(imp, 0) + 1
133
+
134
+ # Improvements mentioned by multiple variants
135
+ common_improvements = [
136
+ imp for imp, count in improvements.items()
137
+ if count >= len(analysis.semantic_diffs) // 2
138
+ ]
139
+ consensus.extend(common_improvements)
140
+
141
+ return consensus
142
+
143
+ def _find_discrepancies(self, analysis: AnalysisResult) -> list[str]:
144
+ """
145
+ Find significant differences between variants.
146
+
147
+ Args:
148
+ analysis: Analysis results
149
+
150
+ Returns:
151
+ List of discrepancy descriptions
152
+ """
153
+ discrepancies = []
154
+
155
+ # Test failures
156
+ if analysis.test_results:
157
+ failed = [t for t in analysis.test_results if not t.passed]
158
+ if failed:
159
+ discrepancies.append(
160
+ f"{len(failed)}/{len(analysis.test_results)} variants failed tests"
161
+ )
162
+
163
+ # Semantic disagreements
164
+ if analysis.semantic_diffs:
165
+ low_agreement = [
166
+ d for d in analysis.semantic_diffs
167
+ if d.agreement_score < 0.5
168
+ ]
169
+ if low_agreement:
170
+ discrepancies.append(
171
+ f"{len(low_agreement)} variants have low semantic agreement"
172
+ )
173
+
174
+ # Collect concerns
175
+ for sem_diff in analysis.semantic_diffs:
176
+ for concern in sem_diff.concerns:
177
+ if concern not in discrepancies:
178
+ discrepancies.append(f"Concern: {concern}")
179
+
180
+ # Structural differences
181
+ if analysis.structural_diffs:
182
+ syntax_errors = [d for d in analysis.structural_diffs if d.has_syntax_error]
183
+ if syntax_errors:
184
+ discrepancies.append(f"{len(syntax_errors)} variants have syntax errors")
185
+
186
+ # Static analysis issues
187
+ if analysis.static_results:
188
+ high_errors = [s for s in analysis.static_results if s.type_errors > 3]
189
+ if high_errors:
190
+ discrepancies.append(
191
+ f"{len(high_errors)} variants have multiple type errors"
192
+ )
193
+
194
+ high_complexity = [s for s in analysis.static_results if s.complexity_score > 20]
195
+ if high_complexity:
196
+ discrepancies.append(
197
+ f"{len(high_complexity)} variants have high complexity"
198
+ )
199
+
200
+ return discrepancies
201
+
202
+ def _calculate_quality(self, analysis: AnalysisResult) -> float:
203
+ """
204
+ Calculate aggregate quality score.
205
+
206
+ Args:
207
+ analysis: Analysis results
208
+
209
+ Returns:
210
+ Quality score between 0.0 and 1.0
211
+ """
212
+ score = 0.0
213
+
214
+ # Test passing rate
215
+ if analysis.test_results:
216
+ passing_rate = sum(
217
+ 1 for t in analysis.test_results if t.passed
218
+ ) / len(analysis.test_results)
219
+ score += passing_rate * self.weights["tests"]
220
+
221
+ # Static analysis score
222
+ if analysis.static_results:
223
+ avg_static = sum(
224
+ s.quality_score for s in analysis.static_results
225
+ ) / len(analysis.static_results)
226
+ score += avg_static * self.weights["static"]
227
+
228
+ # Semantic coherence
229
+ if analysis.semantic_diffs:
230
+ avg_semantic = sum(
231
+ d.agreement_score for d in analysis.semantic_diffs
232
+ ) / len(analysis.semantic_diffs)
233
+ score += avg_semantic * self.weights["semantic"]
234
+
235
+ return min(1.0, max(0.0, score))
236
+
237
+ def _build_improvement_prompt(
238
+ self,
239
+ consensus: list[str],
240
+ discrepancies: list[str],
241
+ analysis: AnalysisResult,
242
+ iteration: int,
243
+ user_request: str,
244
+ ) -> str:
245
+ """Build prompt for generating improved version."""
246
+ prompt = f"""Based on fuzzing iteration {iteration}, improve the code for: "{user_request}"
247
+
248
+ **Consensus (keep these patterns):**
249
+ {chr(10).join(f"- {c}" for c in consensus) if consensus else "- No strong consensus"}
250
+
251
+ **Issues to resolve:**
252
+ {chr(10).join(f"- {d}" for d in discrepancies) if discrepancies else "- No major issues"}
253
+
254
+ **Quality metrics:**
255
+ """
256
+
257
+ # Add test results
258
+ if analysis.test_results:
259
+ pass_rate = sum(1 for t in analysis.test_results if t.passed) / len(analysis.test_results)
260
+ prompt += f"\n- Test pass rate: {pass_rate:.1%}"
261
+
262
+ # Add static analysis
263
+ if analysis.static_results:
264
+ avg_quality = sum(s.quality_score for s in analysis.static_results) / len(analysis.static_results)
265
+ prompt += f"\n- Static analysis quality: {avg_quality:.1%}"
266
+
267
+ # Add complexity
268
+ if analysis.static_results:
269
+ avg_complexity = sum(s.complexity_score for s in analysis.static_results) / len(analysis.static_results)
270
+ prompt += f"\n- Average complexity: {avg_complexity:.1f}"
271
+
272
+ prompt += "\n\n**Task:**\nGenerate an improved version that:\n"
273
+ prompt += "1. Maintains consensus elements\n"
274
+ prompt += "2. Resolves identified issues\n"
275
+ prompt += "3. Improves quality metrics\n"
276
+ prompt += "4. Handles edge cases properly\n"
277
+ prompt += "\n**IMPORTANT:** Respond with ONLY the improved output directly. "
278
+ prompt += "Do NOT include explanations, meta-commentary, or descriptions of changes. "
279
+ prompt += "Just provide the final improved result as you would respond to the original request.\n"
280
+
281
+ return prompt