codeframe-ai 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. codeframe/__init__.py +11 -0
  2. codeframe/__main__.py +20 -0
  3. codeframe/adapters/__init__.py +5 -0
  4. codeframe/adapters/e2b/__init__.py +13 -0
  5. codeframe/adapters/e2b/adapter.py +342 -0
  6. codeframe/adapters/e2b/budget.py +71 -0
  7. codeframe/adapters/e2b/credential_scanner.py +134 -0
  8. codeframe/adapters/llm/__init__.py +92 -0
  9. codeframe/adapters/llm/anthropic.py +414 -0
  10. codeframe/adapters/llm/base.py +444 -0
  11. codeframe/adapters/llm/mock.py +281 -0
  12. codeframe/adapters/llm/openai.py +483 -0
  13. codeframe/agents/__init__.py +8 -0
  14. codeframe/agents/dependency_resolver.py +714 -0
  15. codeframe/auth/__init__.py +16 -0
  16. codeframe/auth/api_key_router.py +238 -0
  17. codeframe/auth/api_keys.py +156 -0
  18. codeframe/auth/dependencies.py +358 -0
  19. codeframe/auth/manager.py +178 -0
  20. codeframe/auth/models.py +30 -0
  21. codeframe/auth/router.py +93 -0
  22. codeframe/auth/schemas.py +15 -0
  23. codeframe/auth/scopes.py +53 -0
  24. codeframe/cli/__init__.py +12 -0
  25. codeframe/cli/__main__.py +20 -0
  26. codeframe/cli/api_client.py +275 -0
  27. codeframe/cli/app.py +5688 -0
  28. codeframe/cli/auth.py +122 -0
  29. codeframe/cli/auth_commands.py +958 -0
  30. codeframe/cli/commands/__init__.py +5 -0
  31. codeframe/cli/config_commands.py +79 -0
  32. codeframe/cli/dashboard_commands.py +67 -0
  33. codeframe/cli/engines_commands.py +205 -0
  34. codeframe/cli/env_commands.py +409 -0
  35. codeframe/cli/helpers.py +56 -0
  36. codeframe/cli/hooks_commands.py +208 -0
  37. codeframe/cli/import_commands.py +129 -0
  38. codeframe/cli/pr_commands.py +549 -0
  39. codeframe/cli/proof_commands.py +415 -0
  40. codeframe/cli/stats_commands.py +311 -0
  41. codeframe/cli/telemetry_runtime.py +153 -0
  42. codeframe/cli/validators.py +123 -0
  43. codeframe/config/rate_limits.py +165 -0
  44. codeframe/core/__init__.py +15 -0
  45. codeframe/core/adapters/__init__.py +43 -0
  46. codeframe/core/adapters/agent_adapter.py +114 -0
  47. codeframe/core/adapters/builtin.py +326 -0
  48. codeframe/core/adapters/claude_code.py +62 -0
  49. codeframe/core/adapters/codex.py +393 -0
  50. codeframe/core/adapters/git_utils.py +40 -0
  51. codeframe/core/adapters/kilocode.py +126 -0
  52. codeframe/core/adapters/opencode.py +48 -0
  53. codeframe/core/adapters/streaming_chat.py +483 -0
  54. codeframe/core/adapters/subprocess_adapter.py +213 -0
  55. codeframe/core/adapters/verification_wrapper.py +269 -0
  56. codeframe/core/agent.py +2183 -0
  57. codeframe/core/agents_config.py +569 -0
  58. codeframe/core/api_key_service.py +211 -0
  59. codeframe/core/artifacts.py +428 -0
  60. codeframe/core/blocker_detection.py +218 -0
  61. codeframe/core/blockers.py +433 -0
  62. codeframe/core/checkpoints.py +481 -0
  63. codeframe/core/conductor.py +2255 -0
  64. codeframe/core/config.py +827 -0
  65. codeframe/core/config_watcher.py +268 -0
  66. codeframe/core/context.py +542 -0
  67. codeframe/core/context_packager.py +234 -0
  68. codeframe/core/credentials.py +735 -0
  69. codeframe/core/dependency_analyzer.py +229 -0
  70. codeframe/core/dependency_graph.py +290 -0
  71. codeframe/core/diagnostic_agent.py +712 -0
  72. codeframe/core/diagnostics.py +616 -0
  73. codeframe/core/editor.py +556 -0
  74. codeframe/core/engine_registry.py +256 -0
  75. codeframe/core/engine_stats.py +231 -0
  76. codeframe/core/environment.py +697 -0
  77. codeframe/core/events.py +375 -0
  78. codeframe/core/executor.py +1005 -0
  79. codeframe/core/fix_tracker.py +480 -0
  80. codeframe/core/gates.py +1322 -0
  81. codeframe/core/git.py +477 -0
  82. codeframe/core/github_connect_service.py +178 -0
  83. codeframe/core/github_integration_config.py +118 -0
  84. codeframe/core/github_issues_service.py +449 -0
  85. codeframe/core/hooks.py +184 -0
  86. codeframe/core/importers/__init__.py +1 -0
  87. codeframe/core/importers/ralph.py +540 -0
  88. codeframe/core/installer.py +650 -0
  89. codeframe/core/models.py +1026 -0
  90. codeframe/core/notifications_config.py +183 -0
  91. codeframe/core/planner.py +437 -0
  92. codeframe/core/prd.py +670 -0
  93. codeframe/core/prd_discovery.py +1118 -0
  94. codeframe/core/prd_stress_test.py +499 -0
  95. codeframe/core/progress.py +126 -0
  96. codeframe/core/proof/__init__.py +34 -0
  97. codeframe/core/proof/capture.py +79 -0
  98. codeframe/core/proof/evidence.py +56 -0
  99. codeframe/core/proof/ledger.py +574 -0
  100. codeframe/core/proof/models.py +162 -0
  101. codeframe/core/proof/obligations.py +103 -0
  102. codeframe/core/proof/runner.py +233 -0
  103. codeframe/core/proof/scope.py +81 -0
  104. codeframe/core/proof/stubs.py +156 -0
  105. codeframe/core/quick_fixes.py +558 -0
  106. codeframe/core/react_agent.py +1650 -0
  107. codeframe/core/reconciliation.py +183 -0
  108. codeframe/core/replay.py +788 -0
  109. codeframe/core/review.py +285 -0
  110. codeframe/core/runtime.py +1134 -0
  111. codeframe/core/sandbox/__init__.py +27 -0
  112. codeframe/core/sandbox/context.py +98 -0
  113. codeframe/core/sandbox/worktree.py +20 -0
  114. codeframe/core/schedule.py +396 -0
  115. codeframe/core/stall_detector.py +71 -0
  116. codeframe/core/stall_monitor.py +134 -0
  117. codeframe/core/state_machine.py +121 -0
  118. codeframe/core/streaming.py +502 -0
  119. codeframe/core/task_tree.py +400 -0
  120. codeframe/core/tasks.py +1022 -0
  121. codeframe/core/telemetry.py +232 -0
  122. codeframe/core/templates.py +221 -0
  123. codeframe/core/tools.py +942 -0
  124. codeframe/core/workspace.py +887 -0
  125. codeframe/core/worktrees.py +276 -0
  126. codeframe/git/__init__.py +5 -0
  127. codeframe/git/github_integration.py +505 -0
  128. codeframe/lib/__init__.py +0 -0
  129. codeframe/lib/audit_logger.py +248 -0
  130. codeframe/lib/metrics_tracker.py +800 -0
  131. codeframe/lib/quality/__init__.py +7 -0
  132. codeframe/lib/quality/complexity_analyzer.py +316 -0
  133. codeframe/lib/quality/owasp_patterns.py +284 -0
  134. codeframe/lib/quality/security_scanner.py +250 -0
  135. codeframe/lib/rate_limiter.py +312 -0
  136. codeframe/notifications/__init__.py +0 -0
  137. codeframe/notifications/webhook.py +380 -0
  138. codeframe/planning/__init__.py +30 -0
  139. codeframe/planning/issue_generator.py +219 -0
  140. codeframe/planning/prd_template_functions.py +137 -0
  141. codeframe/planning/prd_templates.py +975 -0
  142. codeframe/planning/task_scheduler.py +511 -0
  143. codeframe/planning/task_templates.py +533 -0
  144. codeframe/platform_store/__init__.py +5 -0
  145. codeframe/platform_store/database.py +277 -0
  146. codeframe/platform_store/repositories/__init__.py +24 -0
  147. codeframe/platform_store/repositories/api_key_repository.py +245 -0
  148. codeframe/platform_store/repositories/audit_repository.py +67 -0
  149. codeframe/platform_store/repositories/base.py +295 -0
  150. codeframe/platform_store/repositories/interactive_sessions.py +165 -0
  151. codeframe/platform_store/repositories/token_repository.py +598 -0
  152. codeframe/platform_store/repositories/workspace_registry_repository.py +175 -0
  153. codeframe/platform_store/schema_manager.py +321 -0
  154. codeframe/templates/AGENTS.md.default +94 -0
  155. codeframe/tui/__init__.py +5 -0
  156. codeframe/tui/app.py +256 -0
  157. codeframe/tui/data_service.py +103 -0
  158. codeframe/ui/__init__.py +0 -0
  159. codeframe/ui/dependencies.py +103 -0
  160. codeframe/ui/models.py +999 -0
  161. codeframe/ui/response_models.py +201 -0
  162. codeframe/ui/routers/__init__.py +5 -0
  163. codeframe/ui/routers/_helpers.py +29 -0
  164. codeframe/ui/routers/batches_v2.py +315 -0
  165. codeframe/ui/routers/blockers_v2.py +320 -0
  166. codeframe/ui/routers/checkpoints_v2.py +310 -0
  167. codeframe/ui/routers/costs_v2.py +322 -0
  168. codeframe/ui/routers/diagnose_v2.py +225 -0
  169. codeframe/ui/routers/discovery_v2.py +417 -0
  170. codeframe/ui/routers/environment_v2.py +284 -0
  171. codeframe/ui/routers/events_v2.py +75 -0
  172. codeframe/ui/routers/gates_v2.py +166 -0
  173. codeframe/ui/routers/git_v2.py +284 -0
  174. codeframe/ui/routers/github_integrations_v2.py +532 -0
  175. codeframe/ui/routers/interactive_sessions_v2.py +238 -0
  176. codeframe/ui/routers/pr_v2.py +709 -0
  177. codeframe/ui/routers/prd_v2.py +695 -0
  178. codeframe/ui/routers/proof_v2.py +755 -0
  179. codeframe/ui/routers/review_v2.py +360 -0
  180. codeframe/ui/routers/schedule_v2.py +214 -0
  181. codeframe/ui/routers/session_chat_ws.py +354 -0
  182. codeframe/ui/routers/settings_v2.py +562 -0
  183. codeframe/ui/routers/streaming_v2.py +155 -0
  184. codeframe/ui/routers/tasks_v2.py +1098 -0
  185. codeframe/ui/routers/templates_v2.py +232 -0
  186. codeframe/ui/routers/terminal_ws.py +267 -0
  187. codeframe/ui/routers/workspace_v2.py +527 -0
  188. codeframe/ui/server.py +568 -0
  189. codeframe/ui/shared.py +241 -0
  190. codeframe/workspace/__init__.py +5 -0
  191. codeframe/workspace/manager.py +249 -0
  192. codeframe_ai-0.9.0.dist-info/METADATA +517 -0
  193. codeframe_ai-0.9.0.dist-info/RECORD +197 -0
  194. codeframe_ai-0.9.0.dist-info/WHEEL +5 -0
  195. codeframe_ai-0.9.0.dist-info/entry_points.txt +3 -0
  196. codeframe_ai-0.9.0.dist-info/licenses/LICENSE +661 -0
  197. codeframe_ai-0.9.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1005 @@
1
+ """Code execution engine for CodeFRAME v2.
2
+
3
+ Executes implementation plan steps by generating and applying code changes.
4
+ Handles file operations, shell commands, and tracks changes for rollback.
5
+
6
+ This module is headless - no FastAPI or HTTP dependencies.
7
+ """
8
+
9
+ import asyncio
10
+ import re
11
+ import shlex
12
+ import subprocess
13
+ from dataclasses import dataclass, field
14
+ from datetime import datetime, timezone
15
+ from enum import Enum
16
+ from pathlib import Path
17
+ from typing import Optional, TYPE_CHECKING
18
+
19
+ from codeframe.core.planner import PlanStep, StepType, ImplementationPlan
20
+ from codeframe.core.context import TaskContext
21
+ from codeframe.adapters.llm import LLMProvider, Purpose
22
+
23
+ if TYPE_CHECKING:
24
+ from codeframe.core.streaming import EventPublisher
25
+
26
+
27
+ # Module-level dangerous command patterns (importable by other modules like tools.py)
28
+ DANGEROUS_PATTERNS: list[tuple[str, str]] = [
29
+ # Recursive delete of root or home
30
+ (r"\brm\s+(-[rf]+\s+)*[/~]", "recursive deletion of root or home"),
31
+ (r"\brm\s+--no-preserve-root", "rm with --no-preserve-root"),
32
+ # Writing to /dev/ devices
33
+ (r">\s*/dev/", "redirect to /dev device"),
34
+ (r"\bdd\s+.*of=/dev/", "dd writing to device"),
35
+ # Filesystem destruction
36
+ (r"\bmkfs\b", "filesystem format command"),
37
+ (r"\bfdisk\b", "disk partition command"),
38
+ # Fork bombs
39
+ (r":\s*\(\s*\)\s*\{", "potential fork bomb"),
40
+ (r"\bfork\s*while\s*true", "potential fork bomb"),
41
+ # Dangerous dd operations
42
+ (r"\bdd\s+if=/dev/", "dd reading from device"),
43
+ # Dangerous chmod
44
+ (r"\bchmod\s+(-[Rr]\s+)?777\s+/", "chmod 777 on root"),
45
+ # Wget/curl piped to shell (potential malware download)
46
+ (r"\b(wget|curl)\s+.*\|\s*(ba)?sh", "download piped to shell"),
47
+ # Overwriting important system files
48
+ (r">\s*/(etc|bin|usr|lib|sbin)/", "overwriting system directory"),
49
+ ]
50
+
51
+
52
+ def is_dangerous_command(command: str) -> tuple[bool, str]:
53
+ """Check if a command matches dangerous patterns.
54
+
55
+ Uses regex-based patterns that are harder to bypass than substring matching.
56
+ Normalizes whitespace and handles common shell escapes.
57
+
58
+ Args:
59
+ command: The shell command to check
60
+
61
+ Returns:
62
+ Tuple of (is_dangerous, description) where description explains the match
63
+ """
64
+ # Normalize the command for comparison
65
+ try:
66
+ # Use shlex to handle escapes, then rejoin
67
+ tokens = shlex.split(command)
68
+ normalized = " ".join(tokens)
69
+ except ValueError:
70
+ # If shlex fails, use the original with normalized whitespace
71
+ normalized = " ".join(command.split())
72
+
73
+ for pattern, description in DANGEROUS_PATTERNS:
74
+ if re.search(pattern, normalized, re.IGNORECASE):
75
+ return (True, description)
76
+ # Also check original command in case normalization removed something
77
+ if re.search(pattern, command, re.IGNORECASE):
78
+ return (True, description)
79
+
80
+ return (False, "")
81
+
82
+
83
+ class ExecutionStatus(str, Enum):
84
+ """Status of a step execution."""
85
+
86
+ SUCCESS = "success"
87
+ FAILED = "failed"
88
+ SKIPPED = "skipped"
89
+
90
+
91
+ @dataclass
92
+ class FileChange:
93
+ """Record of a file change for rollback.
94
+
95
+ Attributes:
96
+ path: Path to the file
97
+ operation: Type of operation (create, edit, delete)
98
+ original_content: Original content (None for new files)
99
+ new_content: New content after change
100
+ timestamp: When the change was made
101
+ """
102
+
103
+ path: str
104
+ operation: str
105
+ original_content: Optional[str]
106
+ new_content: Optional[str]
107
+ timestamp: datetime
108
+
109
+
110
+ @dataclass
111
+ class StepResult:
112
+ """Result of executing a single step.
113
+
114
+ Attributes:
115
+ step: The step that was executed
116
+ status: Execution status
117
+ output: Output or result message
118
+ error: Error message if failed
119
+ file_changes: Files modified by this step
120
+ duration_ms: Execution time in milliseconds
121
+ """
122
+
123
+ step: PlanStep
124
+ status: ExecutionStatus
125
+ output: str = ""
126
+ error: str = ""
127
+ file_changes: list[FileChange] = field(default_factory=list)
128
+ duration_ms: int = 0
129
+
130
+
131
+ @dataclass
132
+ class ExecutionResult:
133
+ """Result of executing an entire plan.
134
+
135
+ Attributes:
136
+ plan: The plan that was executed
137
+ step_results: Results for each step
138
+ success: Whether all steps succeeded
139
+ total_duration_ms: Total execution time
140
+ """
141
+
142
+ plan: ImplementationPlan
143
+ step_results: list[StepResult]
144
+ success: bool = True
145
+ total_duration_ms: int = 0
146
+
147
+ @property
148
+ def failed_steps(self) -> list[StepResult]:
149
+ """Get steps that failed."""
150
+ return [r for r in self.step_results if r.status == ExecutionStatus.FAILED]
151
+
152
+ @property
153
+ def file_changes(self) -> list[FileChange]:
154
+ """Get all file changes across all steps."""
155
+ changes = []
156
+ for result in self.step_results:
157
+ changes.extend(result.file_changes)
158
+ return changes
159
+
160
+
161
+ # System prompt for code generation
162
+ CODE_GENERATION_PROMPT = """You are an expert code generator. Generate clean, well-structured code based on the requirements.
163
+
164
+ ## Decision-Making Autonomy
165
+
166
+ When you encounter a decision point:
167
+ 1. Check project preferences (if provided in context) first
168
+ 2. If no preference, use industry best practices
169
+ 3. If truly ambiguous, pick the simpler approach
170
+ 4. NEVER stop or ask for trivial tactical decisions
171
+
172
+ You MUST make decisions autonomously about:
173
+ - Import organization and ordering
174
+ - Variable and function naming (following existing patterns)
175
+ - Error handling strategies
176
+ - Code organization within files
177
+ - Choice between equivalent implementations
178
+
179
+ ## Code Guidelines
180
+
181
+ 1. Follow the existing code style in the project
182
+ 2. Include appropriate error handling
183
+ 3. Add brief comments for complex logic only
184
+ 4. Use type hints for Python code
185
+ 5. Keep code focused and minimal - don't over-engineer
186
+ 6. If a file exists, build on existing patterns
187
+ 7. Use existing imports and utilities where appropriate
188
+
189
+ Return ONLY the code, no explanations or markdown formatting."""
190
+
191
+ EDIT_GENERATION_PROMPT = """You are an expert code editor. Given the current file content and requested changes, generate the updated file content.
192
+
193
+ ## Decision-Making Autonomy
194
+
195
+ When you encounter a decision point:
196
+ 1. Check project preferences (if provided in context) first
197
+ 2. If no preference, follow existing patterns in the file
198
+ 3. If truly ambiguous, pick the approach that minimizes changes
199
+ 4. NEVER stop or ask for trivial tactical decisions
200
+
201
+ You MUST make decisions autonomously about:
202
+ - Where to insert new code
203
+ - How to integrate changes with existing code
204
+ - Import organization
205
+ - Minor refactoring needed for integration
206
+
207
+ ## Edit Guidelines
208
+
209
+ 1. Preserve the existing code style and formatting
210
+ 2. Make minimal changes to achieve the goal
211
+ 3. Don't change unrelated code
212
+ 4. Maintain all imports and dependencies
213
+ 5. If adding new functionality, integrate naturally with existing code
214
+ 6. Fix any linting issues in the modified sections
215
+
216
+ Return ONLY the complete updated file content, no explanations or markdown formatting."""
217
+
218
+
219
+ class Executor:
220
+ """Executes implementation plan steps.
221
+
222
+ Handles file operations, shell commands, and LLM-driven code generation.
223
+ Tracks all changes for potential rollback.
224
+ """
225
+
226
+ def __init__(
227
+ self,
228
+ llm_provider: LLMProvider,
229
+ repo_path: Path,
230
+ dry_run: bool = False,
231
+ command_timeout: int = 60,
232
+ event_publisher: Optional["EventPublisher"] = None,
233
+ ):
234
+ """Initialize the executor.
235
+
236
+ Args:
237
+ llm_provider: LLM provider for code generation
238
+ repo_path: Root path of the repository
239
+ dry_run: If True, don't actually make changes
240
+ command_timeout: Timeout for shell commands in seconds
241
+ event_publisher: Optional EventPublisher for streaming execution events
242
+ """
243
+ self.llm = llm_provider
244
+ self.repo_path = Path(repo_path)
245
+ self.dry_run = dry_run
246
+ self.command_timeout = command_timeout
247
+ self.changes: list[FileChange] = []
248
+ self.event_publisher = event_publisher
249
+
250
+ def execute_plan(
251
+ self,
252
+ plan: ImplementationPlan,
253
+ context: TaskContext,
254
+ ) -> ExecutionResult:
255
+ """Execute all steps in a plan.
256
+
257
+ Args:
258
+ plan: Plan to execute
259
+ context: Task context for code generation
260
+
261
+ Returns:
262
+ ExecutionResult with all step results
263
+ """
264
+ results = []
265
+ success = True
266
+ start_time = datetime.now(timezone.utc)
267
+
268
+ for step in plan.steps:
269
+ # Check dependencies
270
+ if not self._dependencies_satisfied(step, results):
271
+ results.append(StepResult(
272
+ step=step,
273
+ status=ExecutionStatus.SKIPPED,
274
+ output="Dependencies not satisfied",
275
+ ))
276
+ continue
277
+
278
+ # Execute the step
279
+ result = self.execute_step(step, context)
280
+ results.append(result)
281
+
282
+ if result.status == ExecutionStatus.FAILED:
283
+ success = False
284
+ break # Stop on first failure
285
+
286
+ end_time = datetime.now(timezone.utc)
287
+ duration = int((end_time - start_time).total_seconds() * 1000)
288
+
289
+ return ExecutionResult(
290
+ plan=plan,
291
+ step_results=results,
292
+ success=success,
293
+ total_duration_ms=duration,
294
+ )
295
+
296
+ def execute_step(
297
+ self,
298
+ step: PlanStep,
299
+ context: TaskContext,
300
+ ) -> StepResult:
301
+ """Execute a single plan step.
302
+
303
+ Args:
304
+ step: Step to execute
305
+ context: Task context for code generation
306
+
307
+ Returns:
308
+ StepResult with execution outcome
309
+ """
310
+ start_time = datetime.now(timezone.utc)
311
+
312
+ try:
313
+ if step.type == StepType.FILE_CREATE:
314
+ result = self._execute_file_create(step, context)
315
+ elif step.type == StepType.FILE_EDIT:
316
+ result = self._execute_file_edit(step, context)
317
+ elif step.type == StepType.FILE_DELETE:
318
+ result = self._execute_file_delete(step)
319
+ elif step.type == StepType.SHELL_COMMAND:
320
+ result = self._execute_shell_command(step)
321
+ elif step.type == StepType.VERIFICATION:
322
+ result = self._execute_verification(step)
323
+ else:
324
+ result = StepResult(
325
+ step=step,
326
+ status=ExecutionStatus.FAILED,
327
+ error=f"Unknown step type: {step.type}",
328
+ )
329
+ except Exception as e:
330
+ result = StepResult(
331
+ step=step,
332
+ status=ExecutionStatus.FAILED,
333
+ error=str(e),
334
+ )
335
+
336
+ end_time = datetime.now(timezone.utc)
337
+ result.duration_ms = int((end_time - start_time).total_seconds() * 1000)
338
+
339
+ return result
340
+
341
+ def _execute_file_create(
342
+ self,
343
+ step: PlanStep,
344
+ context: TaskContext,
345
+ ) -> StepResult:
346
+ """Create a new file with generated content.
347
+
348
+ If the file already exists, falls back gracefully:
349
+ - Identical content: returns SUCCESS (no-op)
350
+ - Different content: falls back to edit behavior
351
+ """
352
+ file_path = self.repo_path / step.target
353
+
354
+ # Check if file already exists -- fall back gracefully
355
+ if file_path.exists():
356
+ existing_content = file_path.read_text(encoding="utf-8")
357
+
358
+ # Generate content using edit prompt so LLM sees existing content
359
+ new_content = self._generate_edit_content(step, context, existing_content)
360
+
361
+ # If content is identical, no-op success
362
+ if existing_content.strip() == new_content.strip():
363
+ return StepResult(
364
+ step=step,
365
+ status=ExecutionStatus.SUCCESS,
366
+ output=f"File already exists with correct content: {step.target}",
367
+ )
368
+
369
+ # Content differs -- fall back to edit behavior
370
+ if self.dry_run:
371
+ return StepResult(
372
+ step=step,
373
+ status=ExecutionStatus.SUCCESS,
374
+ output=f"[DRY RUN] Would edit existing file: {step.target}",
375
+ )
376
+
377
+ file_path.write_text(new_content, encoding="utf-8")
378
+
379
+ change = FileChange(
380
+ path=step.target,
381
+ operation="edit",
382
+ original_content=existing_content,
383
+ new_content=new_content,
384
+ timestamp=datetime.now(timezone.utc),
385
+ )
386
+ self.changes.append(change)
387
+
388
+ return StepResult(
389
+ step=step,
390
+ status=ExecutionStatus.SUCCESS,
391
+ output=f"Updated existing file: {step.target}",
392
+ file_changes=[change],
393
+ )
394
+
395
+ # Generate file content using LLM
396
+ content = self._generate_file_content(step, context)
397
+
398
+ if self.dry_run:
399
+ return StepResult(
400
+ step=step,
401
+ status=ExecutionStatus.SUCCESS,
402
+ output=f"[DRY RUN] Would create: {step.target}",
403
+ )
404
+
405
+ # Create parent directories
406
+ file_path.parent.mkdir(parents=True, exist_ok=True)
407
+
408
+ # Write the file
409
+ file_path.write_text(content, encoding="utf-8")
410
+
411
+ # Record the change
412
+ change = FileChange(
413
+ path=step.target,
414
+ operation="create",
415
+ original_content=None,
416
+ new_content=content,
417
+ timestamp=datetime.now(timezone.utc),
418
+ )
419
+ self.changes.append(change)
420
+
421
+ return StepResult(
422
+ step=step,
423
+ status=ExecutionStatus.SUCCESS,
424
+ output=f"Created: {step.target}",
425
+ file_changes=[change],
426
+ )
427
+
428
+ def _execute_file_edit(
429
+ self,
430
+ step: PlanStep,
431
+ context: TaskContext,
432
+ ) -> StepResult:
433
+ """Edit an existing file."""
434
+ file_path = self.repo_path / step.target
435
+
436
+ # Check if file exists
437
+ if not file_path.exists():
438
+ return StepResult(
439
+ step=step,
440
+ status=ExecutionStatus.FAILED,
441
+ error=f"File not found: {step.target}",
442
+ )
443
+
444
+ # Read current content
445
+ original_content = file_path.read_text(encoding="utf-8")
446
+
447
+ # Generate edited content using LLM
448
+ new_content = self._generate_edit_content(step, context, original_content)
449
+
450
+ if self.dry_run:
451
+ return StepResult(
452
+ step=step,
453
+ status=ExecutionStatus.SUCCESS,
454
+ output=f"[DRY RUN] Would edit: {step.target}",
455
+ )
456
+
457
+ # Write the updated content
458
+ file_path.write_text(new_content, encoding="utf-8")
459
+
460
+ # Record the change
461
+ change = FileChange(
462
+ path=step.target,
463
+ operation="edit",
464
+ original_content=original_content,
465
+ new_content=new_content,
466
+ timestamp=datetime.now(timezone.utc),
467
+ )
468
+ self.changes.append(change)
469
+
470
+ return StepResult(
471
+ step=step,
472
+ status=ExecutionStatus.SUCCESS,
473
+ output=f"Edited: {step.target}",
474
+ file_changes=[change],
475
+ )
476
+
477
+ def _execute_file_delete(self, step: PlanStep) -> StepResult:
478
+ """Delete a file."""
479
+ file_path = self.repo_path / step.target
480
+
481
+ if not file_path.exists():
482
+ return StepResult(
483
+ step=step,
484
+ status=ExecutionStatus.SUCCESS,
485
+ output=f"File already deleted: {step.target}",
486
+ )
487
+
488
+ original_content = file_path.read_text(encoding="utf-8")
489
+
490
+ if self.dry_run:
491
+ return StepResult(
492
+ step=step,
493
+ status=ExecutionStatus.SUCCESS,
494
+ output=f"[DRY RUN] Would delete: {step.target}",
495
+ )
496
+
497
+ # Delete the file
498
+ file_path.unlink()
499
+
500
+ # Record the change
501
+ change = FileChange(
502
+ path=step.target,
503
+ operation="delete",
504
+ original_content=original_content,
505
+ new_content=None,
506
+ timestamp=datetime.now(timezone.utc),
507
+ )
508
+ self.changes.append(change)
509
+
510
+ return StepResult(
511
+ step=step,
512
+ status=ExecutionStatus.SUCCESS,
513
+ output=f"Deleted: {step.target}",
514
+ file_changes=[change],
515
+ )
516
+
517
+ def _is_dangerous_command(self, command: str) -> tuple[bool, str]:
518
+ """Check if a command matches dangerous patterns.
519
+
520
+ Delegates to the module-level is_dangerous_command() function.
521
+ """
522
+ return is_dangerous_command(command)
523
+
524
+ def _execute_shell_command(self, step: PlanStep) -> StepResult:
525
+ """Execute a shell command."""
526
+ command = step.target
527
+
528
+ # Check for dangerous command patterns using regex
529
+ is_dangerous, description = self._is_dangerous_command(command)
530
+ if is_dangerous:
531
+ return StepResult(
532
+ step=step,
533
+ status=ExecutionStatus.FAILED,
534
+ error=f"Blocked dangerous command: {description}",
535
+ )
536
+
537
+ if self.dry_run:
538
+ return StepResult(
539
+ step=step,
540
+ status=ExecutionStatus.SUCCESS,
541
+ output=f"[DRY RUN] Would run: {command}",
542
+ )
543
+
544
+ # Detect shell operators to determine execution mode
545
+ shell_operators = ['|', '&&', '||', '>', '<', '>>', '<<', ';', '`', '$(']
546
+ requires_shell = any(op in command for op in shell_operators)
547
+
548
+ try:
549
+ if requires_shell:
550
+ # Command contains shell operators, must use shell=True
551
+ result = subprocess.run(
552
+ command,
553
+ shell=True,
554
+ cwd=self.repo_path,
555
+ capture_output=True,
556
+ text=True,
557
+ timeout=self.command_timeout,
558
+ )
559
+ else:
560
+ # Safe to use shell=False with parsed arguments
561
+ try:
562
+ argv = shlex.split(command)
563
+ result = subprocess.run(
564
+ argv,
565
+ shell=False,
566
+ cwd=self.repo_path,
567
+ capture_output=True,
568
+ text=True,
569
+ timeout=self.command_timeout,
570
+ )
571
+ except ValueError:
572
+ # shlex.split failed (malformed command), fall back to shell=True
573
+ result = subprocess.run(
574
+ command,
575
+ shell=True,
576
+ cwd=self.repo_path,
577
+ capture_output=True,
578
+ text=True,
579
+ timeout=self.command_timeout,
580
+ )
581
+
582
+ if result.returncode == 0:
583
+ return StepResult(
584
+ step=step,
585
+ status=ExecutionStatus.SUCCESS,
586
+ output=result.stdout[:2000] if result.stdout else "Command completed",
587
+ )
588
+ else:
589
+ return StepResult(
590
+ step=step,
591
+ status=ExecutionStatus.FAILED,
592
+ output=result.stdout[:1000] if result.stdout else "",
593
+ error=result.stderr[:1000] if result.stderr else f"Exit code: {result.returncode}",
594
+ )
595
+
596
+ except subprocess.TimeoutExpired:
597
+ return StepResult(
598
+ step=step,
599
+ status=ExecutionStatus.FAILED,
600
+ error=f"Command timed out after {self.command_timeout}s",
601
+ )
602
+
603
+ def _execute_verification(self, step: PlanStep) -> StepResult:
604
+ """Execute a verification step (tests, linting, file checks).
605
+
606
+ Handles different verification scenarios:
607
+ - If target is a .py file: verify it exists and has valid syntax
608
+ - If target looks like a command: run it as shell command
609
+ - Otherwise: check if target file/path exists
610
+ """
611
+ target = step.target
612
+
613
+ # Check for common planner mistakes - these are step types, not valid targets
614
+ invalid_targets = ("shell_command", "file_edit", "file_create", "verification")
615
+ if target in invalid_targets:
616
+ return StepResult(
617
+ step=step,
618
+ status=ExecutionStatus.FAILED,
619
+ error=(
620
+ f"Invalid verification target '{target}'. "
621
+ f"The target should be a command to run (e.g., 'python script.py --help') "
622
+ f"or a file path to verify, not the step type name."
623
+ ),
624
+ )
625
+
626
+ # If target is a Python file, verify it exists and check syntax
627
+ if target.endswith(".py"):
628
+ file_path = self.repo_path / target
629
+ if not file_path.exists():
630
+ return StepResult(
631
+ step=step,
632
+ status=ExecutionStatus.FAILED,
633
+ error=f"File not found: {target}",
634
+ )
635
+
636
+ # Verify Python syntax
637
+ try:
638
+ import ast
639
+ content = file_path.read_text()
640
+ ast.parse(content)
641
+ return StepResult(
642
+ step=step,
643
+ status=ExecutionStatus.SUCCESS,
644
+ output=f"Verified: {target} exists and has valid Python syntax",
645
+ )
646
+ except SyntaxError as e:
647
+ return StepResult(
648
+ step=step,
649
+ status=ExecutionStatus.FAILED,
650
+ error=f"Syntax error in {target}: {e}",
651
+ )
652
+
653
+ # If target looks like a command (contains spaces or starts with known commands)
654
+ command_prefixes = ("python", "pytest", "ruff", "npm", "make", "bash", "sh")
655
+ if " " in target or target.split()[0] in command_prefixes:
656
+ return self._execute_shell_command(step)
657
+
658
+ # Otherwise just check if the path exists
659
+ target_path = self.repo_path / target
660
+ if target_path.exists():
661
+ return StepResult(
662
+ step=step,
663
+ status=ExecutionStatus.SUCCESS,
664
+ output=f"Verified: {target} exists",
665
+ )
666
+ else:
667
+ return StepResult(
668
+ step=step,
669
+ status=ExecutionStatus.FAILED,
670
+ error=f"Path not found: {target}",
671
+ )
672
+
673
+ def _generate_file_content(
674
+ self,
675
+ step: PlanStep,
676
+ context: TaskContext,
677
+ ) -> str:
678
+ """Generate content for a new file using LLM."""
679
+ prompt = self._build_generation_prompt(step, context)
680
+
681
+ response = self.llm.complete(
682
+ messages=[{"role": "user", "content": prompt}],
683
+ purpose=Purpose.EXECUTION,
684
+ system=CODE_GENERATION_PROMPT,
685
+ max_tokens=4096,
686
+ temperature=0.0,
687
+ )
688
+
689
+ # Clean up response - remove markdown code blocks if present
690
+ content = response.content.strip()
691
+ if content.startswith("```"):
692
+ lines = content.split("\n")
693
+ # Remove first and last lines (``` markers)
694
+ if lines[0].startswith("```"):
695
+ lines = lines[1:]
696
+ if lines and lines[-1].strip() == "```":
697
+ lines = lines[:-1]
698
+ content = "\n".join(lines)
699
+
700
+ return content
701
+
702
+ def _generate_edit_content(
703
+ self,
704
+ step: PlanStep,
705
+ context: TaskContext,
706
+ original_content: str,
707
+ ) -> str:
708
+ """Generate edited file content using LLM."""
709
+ prompt = self._build_edit_prompt(step, context, original_content)
710
+
711
+ response = self.llm.complete(
712
+ messages=[{"role": "user", "content": prompt}],
713
+ purpose=Purpose.EXECUTION,
714
+ system=EDIT_GENERATION_PROMPT,
715
+ max_tokens=8192,
716
+ temperature=0.0,
717
+ )
718
+
719
+ # Clean up response
720
+ content = response.content.strip()
721
+ if content.startswith("```"):
722
+ lines = content.split("\n")
723
+ if lines[0].startswith("```"):
724
+ lines = lines[1:]
725
+ if lines and lines[-1].strip() == "```":
726
+ lines = lines[:-1]
727
+ content = "\n".join(lines)
728
+
729
+ return content
730
+
731
+ def _build_generation_prompt(
732
+ self,
733
+ step: PlanStep,
734
+ context: TaskContext,
735
+ ) -> str:
736
+ """Build prompt for file generation."""
737
+ sections = [
738
+ f"## Task: {context.task.title}",
739
+ f"## File to Create: {step.target}",
740
+ f"## Purpose: {step.description}",
741
+ ]
742
+
743
+ if step.details:
744
+ sections.append(f"## Details: {step.details}")
745
+
746
+ if context.prd:
747
+ sections.append(f"## Requirements:\n{context.prd.content[:2000]}")
748
+
749
+ # Include relevant existing files for context
750
+ if context.loaded_files:
751
+ sections.append("## Related Files:")
752
+ for f in context.loaded_files[:3]:
753
+ sections.append(f"### {f.path}")
754
+ sections.append(f"```\n{f.content[:1500]}\n```")
755
+
756
+ sections.append("\nGenerate the file content:")
757
+
758
+ return "\n\n".join(sections)
759
+
760
+ def _build_edit_prompt(
761
+ self,
762
+ step: PlanStep,
763
+ context: TaskContext,
764
+ original_content: str,
765
+ ) -> str:
766
+ """Build prompt for file editing."""
767
+ sections = [
768
+ f"## Task: {context.task.title}",
769
+ f"## File to Edit: {step.target}",
770
+ f"## Change Required: {step.description}",
771
+ ]
772
+
773
+ if step.details:
774
+ sections.append(f"## Details: {step.details}")
775
+
776
+ sections.append(f"## Current File Content:\n```\n{original_content[:6000]}\n```")
777
+
778
+ sections.append("\nGenerate the complete updated file content:")
779
+
780
+ return "\n\n".join(sections)
781
+
782
+ def _dependencies_satisfied(
783
+ self,
784
+ step: PlanStep,
785
+ previous_results: list[StepResult],
786
+ ) -> bool:
787
+ """Check if a step's dependencies are satisfied."""
788
+ if not step.depends_on:
789
+ return True
790
+
791
+ for dep_index in step.depends_on:
792
+ # Find the result for this dependency
793
+ dep_result = None
794
+ for result in previous_results:
795
+ if result.step.index == dep_index:
796
+ dep_result = result
797
+ break
798
+
799
+ if dep_result is None:
800
+ return False # Dependency not executed yet
801
+
802
+ if dep_result.status != ExecutionStatus.SUCCESS:
803
+ return False # Dependency failed
804
+
805
+ return True
806
+
807
+ def rollback(self) -> list[str]:
808
+ """Rollback all changes made by this executor.
809
+
810
+ Returns:
811
+ List of files that were rolled back
812
+ """
813
+ rolled_back = []
814
+
815
+ # Process changes in reverse order
816
+ for change in reversed(self.changes):
817
+ file_path = self.repo_path / change.path
818
+
819
+ try:
820
+ if change.operation == "create":
821
+ # Delete the created file
822
+ if file_path.exists():
823
+ file_path.unlink()
824
+ rolled_back.append(f"Deleted: {change.path}")
825
+
826
+ elif change.operation == "edit":
827
+ # Restore original content
828
+ if change.original_content is not None:
829
+ file_path.write_text(change.original_content, encoding="utf-8")
830
+ rolled_back.append(f"Restored: {change.path}")
831
+
832
+ elif change.operation == "delete":
833
+ # Recreate the deleted file
834
+ if change.original_content is not None:
835
+ file_path.parent.mkdir(parents=True, exist_ok=True)
836
+ file_path.write_text(change.original_content, encoding="utf-8")
837
+ rolled_back.append(f"Recreated: {change.path}")
838
+
839
+ except Exception as e:
840
+ rolled_back.append(f"Failed to rollback {change.path}: {e}")
841
+
842
+ self.changes.clear()
843
+ return rolled_back
844
+
845
+ # ========================================================================
846
+ # Async Methods with Event Publishing
847
+ # ========================================================================
848
+
849
+ async def _publish_event(self, task_id: str, event) -> None:
850
+ """Publish an event if publisher is configured.
851
+
852
+ Args:
853
+ task_id: Task ID for the event
854
+ event: Event to publish
855
+ """
856
+ if self.event_publisher is not None:
857
+ await self.event_publisher.publish(task_id, event)
858
+
859
+ async def execute_step_async(
860
+ self,
861
+ step: PlanStep,
862
+ context: TaskContext,
863
+ task_id: str,
864
+ ) -> StepResult:
865
+ """Execute a single plan step asynchronously with event publishing.
866
+
867
+ Args:
868
+ step: Step to execute
869
+ context: Task context for code generation
870
+ task_id: Task ID for event publishing
871
+
872
+ Returns:
873
+ StepResult with execution outcome
874
+ """
875
+ from codeframe.core.models import OutputEvent, ErrorEvent
876
+
877
+ # Note: ProgressEvent is emitted by execute_plan_async() before calling this method,
878
+ # so we don't emit one here to avoid duplicates with incorrect total_steps
879
+
880
+ # Execute the step (sync operation, run in thread pool)
881
+ loop = asyncio.get_running_loop()
882
+ result = await loop.run_in_executor(
883
+ None,
884
+ lambda: self.execute_step(step, context),
885
+ )
886
+
887
+ # Publish output event for successful commands
888
+ if result.status == ExecutionStatus.SUCCESS and result.output:
889
+ from codeframe.core.streaming import SSE_OUTPUT_MAX_CHARS
890
+
891
+ output = result.output
892
+ truncated = len(output) > SSE_OUTPUT_MAX_CHARS
893
+ if truncated:
894
+ output = output[:SSE_OUTPUT_MAX_CHARS] + f"\n... (truncated, {len(result.output)} total chars)"
895
+
896
+ await self._publish_event(
897
+ task_id,
898
+ OutputEvent(
899
+ task_id=task_id,
900
+ stream="stdout",
901
+ line=output,
902
+ ),
903
+ )
904
+
905
+ # Publish error event for failures
906
+ if result.status == ExecutionStatus.FAILED:
907
+ await self._publish_event(
908
+ task_id,
909
+ ErrorEvent(
910
+ task_id=task_id,
911
+ error_type="step_failed",
912
+ error=result.error or f"Step {step.index} execution failed",
913
+ ),
914
+ )
915
+
916
+ return result
917
+
918
+ async def execute_plan_async(
919
+ self,
920
+ plan: ImplementationPlan,
921
+ context: TaskContext,
922
+ ) -> ExecutionResult:
923
+ """Execute all steps in a plan asynchronously with event publishing.
924
+
925
+ Args:
926
+ plan: Plan to execute
927
+ context: Task context for code generation
928
+
929
+ Returns:
930
+ ExecutionResult with all step results
931
+ """
932
+ from codeframe.core.models import ProgressEvent, CompletionEvent, ErrorEvent
933
+
934
+ task_id = plan.task_id
935
+ results = []
936
+ success = True
937
+ start_time = datetime.now(timezone.utc)
938
+ total_steps = len(plan.steps)
939
+
940
+ for i, step in enumerate(plan.steps, 1):
941
+ # Publish progress for each step
942
+ await self._publish_event(
943
+ task_id,
944
+ ProgressEvent(
945
+ task_id=task_id,
946
+ phase="execution",
947
+ step=i,
948
+ total_steps=total_steps,
949
+ message=f"Step {i}/{total_steps}: {step.description}",
950
+ ),
951
+ )
952
+
953
+ # Check dependencies
954
+ if not self._dependencies_satisfied(step, results):
955
+ results.append(StepResult(
956
+ step=step,
957
+ status=ExecutionStatus.SKIPPED,
958
+ output="Dependencies not satisfied",
959
+ ))
960
+ continue
961
+
962
+ # Execute the step
963
+ result = await self.execute_step_async(step, context, task_id)
964
+ results.append(result)
965
+
966
+ if result.status == ExecutionStatus.FAILED:
967
+ success = False
968
+ break # Stop on first failure
969
+
970
+ end_time = datetime.now(timezone.utc)
971
+ duration_seconds = (end_time - start_time).total_seconds()
972
+ duration_ms = int(duration_seconds * 1000)
973
+
974
+ # Publish completion event
975
+ files_modified = [c.path for c in self.changes]
976
+ if success:
977
+ await self._publish_event(
978
+ task_id,
979
+ CompletionEvent(
980
+ task_id=task_id,
981
+ status="completed",
982
+ duration_seconds=duration_seconds,
983
+ files_modified=files_modified,
984
+ ),
985
+ )
986
+ else:
987
+ failed_step = results[-1] if results else None
988
+ error_msg = failed_step.error if failed_step else "Plan execution failed"
989
+ if failed_step:
990
+ error_msg = f"Step {failed_step.step.index} failed: {error_msg}"
991
+ await self._publish_event(
992
+ task_id,
993
+ ErrorEvent(
994
+ task_id=task_id,
995
+ error_type="plan_failed",
996
+ error=error_msg,
997
+ ),
998
+ )
999
+
1000
+ return ExecutionResult(
1001
+ plan=plan,
1002
+ step_results=results,
1003
+ success=success,
1004
+ total_duration_ms=duration_ms,
1005
+ )