codeframe-ai 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeframe/__init__.py +11 -0
- codeframe/__main__.py +20 -0
- codeframe/adapters/__init__.py +5 -0
- codeframe/adapters/e2b/__init__.py +13 -0
- codeframe/adapters/e2b/adapter.py +342 -0
- codeframe/adapters/e2b/budget.py +71 -0
- codeframe/adapters/e2b/credential_scanner.py +134 -0
- codeframe/adapters/llm/__init__.py +92 -0
- codeframe/adapters/llm/anthropic.py +414 -0
- codeframe/adapters/llm/base.py +444 -0
- codeframe/adapters/llm/mock.py +281 -0
- codeframe/adapters/llm/openai.py +483 -0
- codeframe/agents/__init__.py +8 -0
- codeframe/agents/dependency_resolver.py +714 -0
- codeframe/auth/__init__.py +16 -0
- codeframe/auth/api_key_router.py +238 -0
- codeframe/auth/api_keys.py +156 -0
- codeframe/auth/dependencies.py +358 -0
- codeframe/auth/manager.py +178 -0
- codeframe/auth/models.py +30 -0
- codeframe/auth/router.py +93 -0
- codeframe/auth/schemas.py +15 -0
- codeframe/auth/scopes.py +53 -0
- codeframe/cli/__init__.py +12 -0
- codeframe/cli/__main__.py +20 -0
- codeframe/cli/api_client.py +275 -0
- codeframe/cli/app.py +5688 -0
- codeframe/cli/auth.py +122 -0
- codeframe/cli/auth_commands.py +958 -0
- codeframe/cli/commands/__init__.py +5 -0
- codeframe/cli/config_commands.py +79 -0
- codeframe/cli/dashboard_commands.py +67 -0
- codeframe/cli/engines_commands.py +205 -0
- codeframe/cli/env_commands.py +409 -0
- codeframe/cli/helpers.py +56 -0
- codeframe/cli/hooks_commands.py +208 -0
- codeframe/cli/import_commands.py +129 -0
- codeframe/cli/pr_commands.py +549 -0
- codeframe/cli/proof_commands.py +415 -0
- codeframe/cli/stats_commands.py +311 -0
- codeframe/cli/telemetry_runtime.py +153 -0
- codeframe/cli/validators.py +123 -0
- codeframe/config/rate_limits.py +165 -0
- codeframe/core/__init__.py +15 -0
- codeframe/core/adapters/__init__.py +43 -0
- codeframe/core/adapters/agent_adapter.py +114 -0
- codeframe/core/adapters/builtin.py +326 -0
- codeframe/core/adapters/claude_code.py +62 -0
- codeframe/core/adapters/codex.py +393 -0
- codeframe/core/adapters/git_utils.py +40 -0
- codeframe/core/adapters/kilocode.py +126 -0
- codeframe/core/adapters/opencode.py +48 -0
- codeframe/core/adapters/streaming_chat.py +483 -0
- codeframe/core/adapters/subprocess_adapter.py +213 -0
- codeframe/core/adapters/verification_wrapper.py +269 -0
- codeframe/core/agent.py +2183 -0
- codeframe/core/agents_config.py +569 -0
- codeframe/core/api_key_service.py +211 -0
- codeframe/core/artifacts.py +428 -0
- codeframe/core/blocker_detection.py +218 -0
- codeframe/core/blockers.py +433 -0
- codeframe/core/checkpoints.py +481 -0
- codeframe/core/conductor.py +2255 -0
- codeframe/core/config.py +827 -0
- codeframe/core/config_watcher.py +268 -0
- codeframe/core/context.py +542 -0
- codeframe/core/context_packager.py +234 -0
- codeframe/core/credentials.py +735 -0
- codeframe/core/dependency_analyzer.py +229 -0
- codeframe/core/dependency_graph.py +290 -0
- codeframe/core/diagnostic_agent.py +712 -0
- codeframe/core/diagnostics.py +616 -0
- codeframe/core/editor.py +556 -0
- codeframe/core/engine_registry.py +256 -0
- codeframe/core/engine_stats.py +231 -0
- codeframe/core/environment.py +697 -0
- codeframe/core/events.py +375 -0
- codeframe/core/executor.py +1005 -0
- codeframe/core/fix_tracker.py +480 -0
- codeframe/core/gates.py +1322 -0
- codeframe/core/git.py +477 -0
- codeframe/core/github_connect_service.py +178 -0
- codeframe/core/github_integration_config.py +118 -0
- codeframe/core/github_issues_service.py +449 -0
- codeframe/core/hooks.py +184 -0
- codeframe/core/importers/__init__.py +1 -0
- codeframe/core/importers/ralph.py +540 -0
- codeframe/core/installer.py +650 -0
- codeframe/core/models.py +1026 -0
- codeframe/core/notifications_config.py +183 -0
- codeframe/core/planner.py +437 -0
- codeframe/core/prd.py +670 -0
- codeframe/core/prd_discovery.py +1118 -0
- codeframe/core/prd_stress_test.py +499 -0
- codeframe/core/progress.py +126 -0
- codeframe/core/proof/__init__.py +34 -0
- codeframe/core/proof/capture.py +79 -0
- codeframe/core/proof/evidence.py +56 -0
- codeframe/core/proof/ledger.py +574 -0
- codeframe/core/proof/models.py +162 -0
- codeframe/core/proof/obligations.py +103 -0
- codeframe/core/proof/runner.py +233 -0
- codeframe/core/proof/scope.py +81 -0
- codeframe/core/proof/stubs.py +156 -0
- codeframe/core/quick_fixes.py +558 -0
- codeframe/core/react_agent.py +1650 -0
- codeframe/core/reconciliation.py +183 -0
- codeframe/core/replay.py +788 -0
- codeframe/core/review.py +285 -0
- codeframe/core/runtime.py +1134 -0
- codeframe/core/sandbox/__init__.py +27 -0
- codeframe/core/sandbox/context.py +98 -0
- codeframe/core/sandbox/worktree.py +20 -0
- codeframe/core/schedule.py +396 -0
- codeframe/core/stall_detector.py +71 -0
- codeframe/core/stall_monitor.py +134 -0
- codeframe/core/state_machine.py +121 -0
- codeframe/core/streaming.py +502 -0
- codeframe/core/task_tree.py +400 -0
- codeframe/core/tasks.py +1022 -0
- codeframe/core/telemetry.py +232 -0
- codeframe/core/templates.py +221 -0
- codeframe/core/tools.py +942 -0
- codeframe/core/workspace.py +887 -0
- codeframe/core/worktrees.py +276 -0
- codeframe/git/__init__.py +5 -0
- codeframe/git/github_integration.py +505 -0
- codeframe/lib/__init__.py +0 -0
- codeframe/lib/audit_logger.py +248 -0
- codeframe/lib/metrics_tracker.py +800 -0
- codeframe/lib/quality/__init__.py +7 -0
- codeframe/lib/quality/complexity_analyzer.py +316 -0
- codeframe/lib/quality/owasp_patterns.py +284 -0
- codeframe/lib/quality/security_scanner.py +250 -0
- codeframe/lib/rate_limiter.py +312 -0
- codeframe/notifications/__init__.py +0 -0
- codeframe/notifications/webhook.py +380 -0
- codeframe/planning/__init__.py +30 -0
- codeframe/planning/issue_generator.py +219 -0
- codeframe/planning/prd_template_functions.py +137 -0
- codeframe/planning/prd_templates.py +975 -0
- codeframe/planning/task_scheduler.py +511 -0
- codeframe/planning/task_templates.py +533 -0
- codeframe/platform_store/__init__.py +5 -0
- codeframe/platform_store/database.py +277 -0
- codeframe/platform_store/repositories/__init__.py +24 -0
- codeframe/platform_store/repositories/api_key_repository.py +245 -0
- codeframe/platform_store/repositories/audit_repository.py +67 -0
- codeframe/platform_store/repositories/base.py +295 -0
- codeframe/platform_store/repositories/interactive_sessions.py +165 -0
- codeframe/platform_store/repositories/token_repository.py +598 -0
- codeframe/platform_store/repositories/workspace_registry_repository.py +175 -0
- codeframe/platform_store/schema_manager.py +321 -0
- codeframe/templates/AGENTS.md.default +94 -0
- codeframe/tui/__init__.py +5 -0
- codeframe/tui/app.py +256 -0
- codeframe/tui/data_service.py +103 -0
- codeframe/ui/__init__.py +0 -0
- codeframe/ui/dependencies.py +103 -0
- codeframe/ui/models.py +999 -0
- codeframe/ui/response_models.py +201 -0
- codeframe/ui/routers/__init__.py +5 -0
- codeframe/ui/routers/_helpers.py +29 -0
- codeframe/ui/routers/batches_v2.py +315 -0
- codeframe/ui/routers/blockers_v2.py +320 -0
- codeframe/ui/routers/checkpoints_v2.py +310 -0
- codeframe/ui/routers/costs_v2.py +322 -0
- codeframe/ui/routers/diagnose_v2.py +225 -0
- codeframe/ui/routers/discovery_v2.py +417 -0
- codeframe/ui/routers/environment_v2.py +284 -0
- codeframe/ui/routers/events_v2.py +75 -0
- codeframe/ui/routers/gates_v2.py +166 -0
- codeframe/ui/routers/git_v2.py +284 -0
- codeframe/ui/routers/github_integrations_v2.py +532 -0
- codeframe/ui/routers/interactive_sessions_v2.py +238 -0
- codeframe/ui/routers/pr_v2.py +709 -0
- codeframe/ui/routers/prd_v2.py +695 -0
- codeframe/ui/routers/proof_v2.py +755 -0
- codeframe/ui/routers/review_v2.py +360 -0
- codeframe/ui/routers/schedule_v2.py +214 -0
- codeframe/ui/routers/session_chat_ws.py +354 -0
- codeframe/ui/routers/settings_v2.py +562 -0
- codeframe/ui/routers/streaming_v2.py +155 -0
- codeframe/ui/routers/tasks_v2.py +1098 -0
- codeframe/ui/routers/templates_v2.py +232 -0
- codeframe/ui/routers/terminal_ws.py +267 -0
- codeframe/ui/routers/workspace_v2.py +527 -0
- codeframe/ui/server.py +568 -0
- codeframe/ui/shared.py +241 -0
- codeframe/workspace/__init__.py +5 -0
- codeframe/workspace/manager.py +249 -0
- codeframe_ai-0.9.0.dist-info/METADATA +517 -0
- codeframe_ai-0.9.0.dist-info/RECORD +197 -0
- codeframe_ai-0.9.0.dist-info/WHEEL +5 -0
- codeframe_ai-0.9.0.dist-info/entry_points.txt +3 -0
- codeframe_ai-0.9.0.dist-info/licenses/LICENSE +661 -0
- codeframe_ai-0.9.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1005 @@
|
|
|
1
|
+
"""Code execution engine for CodeFRAME v2.
|
|
2
|
+
|
|
3
|
+
Executes implementation plan steps by generating and applying code changes.
|
|
4
|
+
Handles file operations, shell commands, and tracks changes for rollback.
|
|
5
|
+
|
|
6
|
+
This module is headless - no FastAPI or HTTP dependencies.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import re
|
|
11
|
+
import shlex
|
|
12
|
+
import subprocess
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from enum import Enum
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Optional, TYPE_CHECKING
|
|
18
|
+
|
|
19
|
+
from codeframe.core.planner import PlanStep, StepType, ImplementationPlan
|
|
20
|
+
from codeframe.core.context import TaskContext
|
|
21
|
+
from codeframe.adapters.llm import LLMProvider, Purpose
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from codeframe.core.streaming import EventPublisher
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Module-level dangerous command patterns (importable by other modules like tools.py)
|
|
28
|
+
DANGEROUS_PATTERNS: list[tuple[str, str]] = [
|
|
29
|
+
# Recursive delete of root or home
|
|
30
|
+
(r"\brm\s+(-[rf]+\s+)*[/~]", "recursive deletion of root or home"),
|
|
31
|
+
(r"\brm\s+--no-preserve-root", "rm with --no-preserve-root"),
|
|
32
|
+
# Writing to /dev/ devices
|
|
33
|
+
(r">\s*/dev/", "redirect to /dev device"),
|
|
34
|
+
(r"\bdd\s+.*of=/dev/", "dd writing to device"),
|
|
35
|
+
# Filesystem destruction
|
|
36
|
+
(r"\bmkfs\b", "filesystem format command"),
|
|
37
|
+
(r"\bfdisk\b", "disk partition command"),
|
|
38
|
+
# Fork bombs
|
|
39
|
+
(r":\s*\(\s*\)\s*\{", "potential fork bomb"),
|
|
40
|
+
(r"\bfork\s*while\s*true", "potential fork bomb"),
|
|
41
|
+
# Dangerous dd operations
|
|
42
|
+
(r"\bdd\s+if=/dev/", "dd reading from device"),
|
|
43
|
+
# Dangerous chmod
|
|
44
|
+
(r"\bchmod\s+(-[Rr]\s+)?777\s+/", "chmod 777 on root"),
|
|
45
|
+
# Wget/curl piped to shell (potential malware download)
|
|
46
|
+
(r"\b(wget|curl)\s+.*\|\s*(ba)?sh", "download piped to shell"),
|
|
47
|
+
# Overwriting important system files
|
|
48
|
+
(r">\s*/(etc|bin|usr|lib|sbin)/", "overwriting system directory"),
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def is_dangerous_command(command: str) -> tuple[bool, str]:
|
|
53
|
+
"""Check if a command matches dangerous patterns.
|
|
54
|
+
|
|
55
|
+
Uses regex-based patterns that are harder to bypass than substring matching.
|
|
56
|
+
Normalizes whitespace and handles common shell escapes.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
command: The shell command to check
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Tuple of (is_dangerous, description) where description explains the match
|
|
63
|
+
"""
|
|
64
|
+
# Normalize the command for comparison
|
|
65
|
+
try:
|
|
66
|
+
# Use shlex to handle escapes, then rejoin
|
|
67
|
+
tokens = shlex.split(command)
|
|
68
|
+
normalized = " ".join(tokens)
|
|
69
|
+
except ValueError:
|
|
70
|
+
# If shlex fails, use the original with normalized whitespace
|
|
71
|
+
normalized = " ".join(command.split())
|
|
72
|
+
|
|
73
|
+
for pattern, description in DANGEROUS_PATTERNS:
|
|
74
|
+
if re.search(pattern, normalized, re.IGNORECASE):
|
|
75
|
+
return (True, description)
|
|
76
|
+
# Also check original command in case normalization removed something
|
|
77
|
+
if re.search(pattern, command, re.IGNORECASE):
|
|
78
|
+
return (True, description)
|
|
79
|
+
|
|
80
|
+
return (False, "")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class ExecutionStatus(str, Enum):
|
|
84
|
+
"""Status of a step execution."""
|
|
85
|
+
|
|
86
|
+
SUCCESS = "success"
|
|
87
|
+
FAILED = "failed"
|
|
88
|
+
SKIPPED = "skipped"
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@dataclass
|
|
92
|
+
class FileChange:
|
|
93
|
+
"""Record of a file change for rollback.
|
|
94
|
+
|
|
95
|
+
Attributes:
|
|
96
|
+
path: Path to the file
|
|
97
|
+
operation: Type of operation (create, edit, delete)
|
|
98
|
+
original_content: Original content (None for new files)
|
|
99
|
+
new_content: New content after change
|
|
100
|
+
timestamp: When the change was made
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
path: str
|
|
104
|
+
operation: str
|
|
105
|
+
original_content: Optional[str]
|
|
106
|
+
new_content: Optional[str]
|
|
107
|
+
timestamp: datetime
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@dataclass
|
|
111
|
+
class StepResult:
|
|
112
|
+
"""Result of executing a single step.
|
|
113
|
+
|
|
114
|
+
Attributes:
|
|
115
|
+
step: The step that was executed
|
|
116
|
+
status: Execution status
|
|
117
|
+
output: Output or result message
|
|
118
|
+
error: Error message if failed
|
|
119
|
+
file_changes: Files modified by this step
|
|
120
|
+
duration_ms: Execution time in milliseconds
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
step: PlanStep
|
|
124
|
+
status: ExecutionStatus
|
|
125
|
+
output: str = ""
|
|
126
|
+
error: str = ""
|
|
127
|
+
file_changes: list[FileChange] = field(default_factory=list)
|
|
128
|
+
duration_ms: int = 0
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@dataclass
|
|
132
|
+
class ExecutionResult:
|
|
133
|
+
"""Result of executing an entire plan.
|
|
134
|
+
|
|
135
|
+
Attributes:
|
|
136
|
+
plan: The plan that was executed
|
|
137
|
+
step_results: Results for each step
|
|
138
|
+
success: Whether all steps succeeded
|
|
139
|
+
total_duration_ms: Total execution time
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
plan: ImplementationPlan
|
|
143
|
+
step_results: list[StepResult]
|
|
144
|
+
success: bool = True
|
|
145
|
+
total_duration_ms: int = 0
|
|
146
|
+
|
|
147
|
+
@property
|
|
148
|
+
def failed_steps(self) -> list[StepResult]:
|
|
149
|
+
"""Get steps that failed."""
|
|
150
|
+
return [r for r in self.step_results if r.status == ExecutionStatus.FAILED]
|
|
151
|
+
|
|
152
|
+
@property
|
|
153
|
+
def file_changes(self) -> list[FileChange]:
|
|
154
|
+
"""Get all file changes across all steps."""
|
|
155
|
+
changes = []
|
|
156
|
+
for result in self.step_results:
|
|
157
|
+
changes.extend(result.file_changes)
|
|
158
|
+
return changes
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# System prompt for code generation
|
|
162
|
+
CODE_GENERATION_PROMPT = """You are an expert code generator. Generate clean, well-structured code based on the requirements.
|
|
163
|
+
|
|
164
|
+
## Decision-Making Autonomy
|
|
165
|
+
|
|
166
|
+
When you encounter a decision point:
|
|
167
|
+
1. Check project preferences (if provided in context) first
|
|
168
|
+
2. If no preference, use industry best practices
|
|
169
|
+
3. If truly ambiguous, pick the simpler approach
|
|
170
|
+
4. NEVER stop or ask for trivial tactical decisions
|
|
171
|
+
|
|
172
|
+
You MUST make decisions autonomously about:
|
|
173
|
+
- Import organization and ordering
|
|
174
|
+
- Variable and function naming (following existing patterns)
|
|
175
|
+
- Error handling strategies
|
|
176
|
+
- Code organization within files
|
|
177
|
+
- Choice between equivalent implementations
|
|
178
|
+
|
|
179
|
+
## Code Guidelines
|
|
180
|
+
|
|
181
|
+
1. Follow the existing code style in the project
|
|
182
|
+
2. Include appropriate error handling
|
|
183
|
+
3. Add brief comments for complex logic only
|
|
184
|
+
4. Use type hints for Python code
|
|
185
|
+
5. Keep code focused and minimal - don't over-engineer
|
|
186
|
+
6. If a file exists, build on existing patterns
|
|
187
|
+
7. Use existing imports and utilities where appropriate
|
|
188
|
+
|
|
189
|
+
Return ONLY the code, no explanations or markdown formatting."""
|
|
190
|
+
|
|
191
|
+
EDIT_GENERATION_PROMPT = """You are an expert code editor. Given the current file content and requested changes, generate the updated file content.
|
|
192
|
+
|
|
193
|
+
## Decision-Making Autonomy
|
|
194
|
+
|
|
195
|
+
When you encounter a decision point:
|
|
196
|
+
1. Check project preferences (if provided in context) first
|
|
197
|
+
2. If no preference, follow existing patterns in the file
|
|
198
|
+
3. If truly ambiguous, pick the approach that minimizes changes
|
|
199
|
+
4. NEVER stop or ask for trivial tactical decisions
|
|
200
|
+
|
|
201
|
+
You MUST make decisions autonomously about:
|
|
202
|
+
- Where to insert new code
|
|
203
|
+
- How to integrate changes with existing code
|
|
204
|
+
- Import organization
|
|
205
|
+
- Minor refactoring needed for integration
|
|
206
|
+
|
|
207
|
+
## Edit Guidelines
|
|
208
|
+
|
|
209
|
+
1. Preserve the existing code style and formatting
|
|
210
|
+
2. Make minimal changes to achieve the goal
|
|
211
|
+
3. Don't change unrelated code
|
|
212
|
+
4. Maintain all imports and dependencies
|
|
213
|
+
5. If adding new functionality, integrate naturally with existing code
|
|
214
|
+
6. Fix any linting issues in the modified sections
|
|
215
|
+
|
|
216
|
+
Return ONLY the complete updated file content, no explanations or markdown formatting."""
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
class Executor:
|
|
220
|
+
"""Executes implementation plan steps.
|
|
221
|
+
|
|
222
|
+
Handles file operations, shell commands, and LLM-driven code generation.
|
|
223
|
+
Tracks all changes for potential rollback.
|
|
224
|
+
"""
|
|
225
|
+
|
|
226
|
+
def __init__(
|
|
227
|
+
self,
|
|
228
|
+
llm_provider: LLMProvider,
|
|
229
|
+
repo_path: Path,
|
|
230
|
+
dry_run: bool = False,
|
|
231
|
+
command_timeout: int = 60,
|
|
232
|
+
event_publisher: Optional["EventPublisher"] = None,
|
|
233
|
+
):
|
|
234
|
+
"""Initialize the executor.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
llm_provider: LLM provider for code generation
|
|
238
|
+
repo_path: Root path of the repository
|
|
239
|
+
dry_run: If True, don't actually make changes
|
|
240
|
+
command_timeout: Timeout for shell commands in seconds
|
|
241
|
+
event_publisher: Optional EventPublisher for streaming execution events
|
|
242
|
+
"""
|
|
243
|
+
self.llm = llm_provider
|
|
244
|
+
self.repo_path = Path(repo_path)
|
|
245
|
+
self.dry_run = dry_run
|
|
246
|
+
self.command_timeout = command_timeout
|
|
247
|
+
self.changes: list[FileChange] = []
|
|
248
|
+
self.event_publisher = event_publisher
|
|
249
|
+
|
|
250
|
+
def execute_plan(
|
|
251
|
+
self,
|
|
252
|
+
plan: ImplementationPlan,
|
|
253
|
+
context: TaskContext,
|
|
254
|
+
) -> ExecutionResult:
|
|
255
|
+
"""Execute all steps in a plan.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
plan: Plan to execute
|
|
259
|
+
context: Task context for code generation
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
ExecutionResult with all step results
|
|
263
|
+
"""
|
|
264
|
+
results = []
|
|
265
|
+
success = True
|
|
266
|
+
start_time = datetime.now(timezone.utc)
|
|
267
|
+
|
|
268
|
+
for step in plan.steps:
|
|
269
|
+
# Check dependencies
|
|
270
|
+
if not self._dependencies_satisfied(step, results):
|
|
271
|
+
results.append(StepResult(
|
|
272
|
+
step=step,
|
|
273
|
+
status=ExecutionStatus.SKIPPED,
|
|
274
|
+
output="Dependencies not satisfied",
|
|
275
|
+
))
|
|
276
|
+
continue
|
|
277
|
+
|
|
278
|
+
# Execute the step
|
|
279
|
+
result = self.execute_step(step, context)
|
|
280
|
+
results.append(result)
|
|
281
|
+
|
|
282
|
+
if result.status == ExecutionStatus.FAILED:
|
|
283
|
+
success = False
|
|
284
|
+
break # Stop on first failure
|
|
285
|
+
|
|
286
|
+
end_time = datetime.now(timezone.utc)
|
|
287
|
+
duration = int((end_time - start_time).total_seconds() * 1000)
|
|
288
|
+
|
|
289
|
+
return ExecutionResult(
|
|
290
|
+
plan=plan,
|
|
291
|
+
step_results=results,
|
|
292
|
+
success=success,
|
|
293
|
+
total_duration_ms=duration,
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
def execute_step(
|
|
297
|
+
self,
|
|
298
|
+
step: PlanStep,
|
|
299
|
+
context: TaskContext,
|
|
300
|
+
) -> StepResult:
|
|
301
|
+
"""Execute a single plan step.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
step: Step to execute
|
|
305
|
+
context: Task context for code generation
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
StepResult with execution outcome
|
|
309
|
+
"""
|
|
310
|
+
start_time = datetime.now(timezone.utc)
|
|
311
|
+
|
|
312
|
+
try:
|
|
313
|
+
if step.type == StepType.FILE_CREATE:
|
|
314
|
+
result = self._execute_file_create(step, context)
|
|
315
|
+
elif step.type == StepType.FILE_EDIT:
|
|
316
|
+
result = self._execute_file_edit(step, context)
|
|
317
|
+
elif step.type == StepType.FILE_DELETE:
|
|
318
|
+
result = self._execute_file_delete(step)
|
|
319
|
+
elif step.type == StepType.SHELL_COMMAND:
|
|
320
|
+
result = self._execute_shell_command(step)
|
|
321
|
+
elif step.type == StepType.VERIFICATION:
|
|
322
|
+
result = self._execute_verification(step)
|
|
323
|
+
else:
|
|
324
|
+
result = StepResult(
|
|
325
|
+
step=step,
|
|
326
|
+
status=ExecutionStatus.FAILED,
|
|
327
|
+
error=f"Unknown step type: {step.type}",
|
|
328
|
+
)
|
|
329
|
+
except Exception as e:
|
|
330
|
+
result = StepResult(
|
|
331
|
+
step=step,
|
|
332
|
+
status=ExecutionStatus.FAILED,
|
|
333
|
+
error=str(e),
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
end_time = datetime.now(timezone.utc)
|
|
337
|
+
result.duration_ms = int((end_time - start_time).total_seconds() * 1000)
|
|
338
|
+
|
|
339
|
+
return result
|
|
340
|
+
|
|
341
|
+
def _execute_file_create(
|
|
342
|
+
self,
|
|
343
|
+
step: PlanStep,
|
|
344
|
+
context: TaskContext,
|
|
345
|
+
) -> StepResult:
|
|
346
|
+
"""Create a new file with generated content.
|
|
347
|
+
|
|
348
|
+
If the file already exists, falls back gracefully:
|
|
349
|
+
- Identical content: returns SUCCESS (no-op)
|
|
350
|
+
- Different content: falls back to edit behavior
|
|
351
|
+
"""
|
|
352
|
+
file_path = self.repo_path / step.target
|
|
353
|
+
|
|
354
|
+
# Check if file already exists -- fall back gracefully
|
|
355
|
+
if file_path.exists():
|
|
356
|
+
existing_content = file_path.read_text(encoding="utf-8")
|
|
357
|
+
|
|
358
|
+
# Generate content using edit prompt so LLM sees existing content
|
|
359
|
+
new_content = self._generate_edit_content(step, context, existing_content)
|
|
360
|
+
|
|
361
|
+
# If content is identical, no-op success
|
|
362
|
+
if existing_content.strip() == new_content.strip():
|
|
363
|
+
return StepResult(
|
|
364
|
+
step=step,
|
|
365
|
+
status=ExecutionStatus.SUCCESS,
|
|
366
|
+
output=f"File already exists with correct content: {step.target}",
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
# Content differs -- fall back to edit behavior
|
|
370
|
+
if self.dry_run:
|
|
371
|
+
return StepResult(
|
|
372
|
+
step=step,
|
|
373
|
+
status=ExecutionStatus.SUCCESS,
|
|
374
|
+
output=f"[DRY RUN] Would edit existing file: {step.target}",
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
file_path.write_text(new_content, encoding="utf-8")
|
|
378
|
+
|
|
379
|
+
change = FileChange(
|
|
380
|
+
path=step.target,
|
|
381
|
+
operation="edit",
|
|
382
|
+
original_content=existing_content,
|
|
383
|
+
new_content=new_content,
|
|
384
|
+
timestamp=datetime.now(timezone.utc),
|
|
385
|
+
)
|
|
386
|
+
self.changes.append(change)
|
|
387
|
+
|
|
388
|
+
return StepResult(
|
|
389
|
+
step=step,
|
|
390
|
+
status=ExecutionStatus.SUCCESS,
|
|
391
|
+
output=f"Updated existing file: {step.target}",
|
|
392
|
+
file_changes=[change],
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
# Generate file content using LLM
|
|
396
|
+
content = self._generate_file_content(step, context)
|
|
397
|
+
|
|
398
|
+
if self.dry_run:
|
|
399
|
+
return StepResult(
|
|
400
|
+
step=step,
|
|
401
|
+
status=ExecutionStatus.SUCCESS,
|
|
402
|
+
output=f"[DRY RUN] Would create: {step.target}",
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
# Create parent directories
|
|
406
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
407
|
+
|
|
408
|
+
# Write the file
|
|
409
|
+
file_path.write_text(content, encoding="utf-8")
|
|
410
|
+
|
|
411
|
+
# Record the change
|
|
412
|
+
change = FileChange(
|
|
413
|
+
path=step.target,
|
|
414
|
+
operation="create",
|
|
415
|
+
original_content=None,
|
|
416
|
+
new_content=content,
|
|
417
|
+
timestamp=datetime.now(timezone.utc),
|
|
418
|
+
)
|
|
419
|
+
self.changes.append(change)
|
|
420
|
+
|
|
421
|
+
return StepResult(
|
|
422
|
+
step=step,
|
|
423
|
+
status=ExecutionStatus.SUCCESS,
|
|
424
|
+
output=f"Created: {step.target}",
|
|
425
|
+
file_changes=[change],
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
def _execute_file_edit(
|
|
429
|
+
self,
|
|
430
|
+
step: PlanStep,
|
|
431
|
+
context: TaskContext,
|
|
432
|
+
) -> StepResult:
|
|
433
|
+
"""Edit an existing file."""
|
|
434
|
+
file_path = self.repo_path / step.target
|
|
435
|
+
|
|
436
|
+
# Check if file exists
|
|
437
|
+
if not file_path.exists():
|
|
438
|
+
return StepResult(
|
|
439
|
+
step=step,
|
|
440
|
+
status=ExecutionStatus.FAILED,
|
|
441
|
+
error=f"File not found: {step.target}",
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
# Read current content
|
|
445
|
+
original_content = file_path.read_text(encoding="utf-8")
|
|
446
|
+
|
|
447
|
+
# Generate edited content using LLM
|
|
448
|
+
new_content = self._generate_edit_content(step, context, original_content)
|
|
449
|
+
|
|
450
|
+
if self.dry_run:
|
|
451
|
+
return StepResult(
|
|
452
|
+
step=step,
|
|
453
|
+
status=ExecutionStatus.SUCCESS,
|
|
454
|
+
output=f"[DRY RUN] Would edit: {step.target}",
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
# Write the updated content
|
|
458
|
+
file_path.write_text(new_content, encoding="utf-8")
|
|
459
|
+
|
|
460
|
+
# Record the change
|
|
461
|
+
change = FileChange(
|
|
462
|
+
path=step.target,
|
|
463
|
+
operation="edit",
|
|
464
|
+
original_content=original_content,
|
|
465
|
+
new_content=new_content,
|
|
466
|
+
timestamp=datetime.now(timezone.utc),
|
|
467
|
+
)
|
|
468
|
+
self.changes.append(change)
|
|
469
|
+
|
|
470
|
+
return StepResult(
|
|
471
|
+
step=step,
|
|
472
|
+
status=ExecutionStatus.SUCCESS,
|
|
473
|
+
output=f"Edited: {step.target}",
|
|
474
|
+
file_changes=[change],
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
def _execute_file_delete(self, step: PlanStep) -> StepResult:
|
|
478
|
+
"""Delete a file."""
|
|
479
|
+
file_path = self.repo_path / step.target
|
|
480
|
+
|
|
481
|
+
if not file_path.exists():
|
|
482
|
+
return StepResult(
|
|
483
|
+
step=step,
|
|
484
|
+
status=ExecutionStatus.SUCCESS,
|
|
485
|
+
output=f"File already deleted: {step.target}",
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
original_content = file_path.read_text(encoding="utf-8")
|
|
489
|
+
|
|
490
|
+
if self.dry_run:
|
|
491
|
+
return StepResult(
|
|
492
|
+
step=step,
|
|
493
|
+
status=ExecutionStatus.SUCCESS,
|
|
494
|
+
output=f"[DRY RUN] Would delete: {step.target}",
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
# Delete the file
|
|
498
|
+
file_path.unlink()
|
|
499
|
+
|
|
500
|
+
# Record the change
|
|
501
|
+
change = FileChange(
|
|
502
|
+
path=step.target,
|
|
503
|
+
operation="delete",
|
|
504
|
+
original_content=original_content,
|
|
505
|
+
new_content=None,
|
|
506
|
+
timestamp=datetime.now(timezone.utc),
|
|
507
|
+
)
|
|
508
|
+
self.changes.append(change)
|
|
509
|
+
|
|
510
|
+
return StepResult(
|
|
511
|
+
step=step,
|
|
512
|
+
status=ExecutionStatus.SUCCESS,
|
|
513
|
+
output=f"Deleted: {step.target}",
|
|
514
|
+
file_changes=[change],
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
def _is_dangerous_command(self, command: str) -> tuple[bool, str]:
|
|
518
|
+
"""Check if a command matches dangerous patterns.
|
|
519
|
+
|
|
520
|
+
Delegates to the module-level is_dangerous_command() function.
|
|
521
|
+
"""
|
|
522
|
+
return is_dangerous_command(command)
|
|
523
|
+
|
|
524
|
+
def _execute_shell_command(self, step: PlanStep) -> StepResult:
|
|
525
|
+
"""Execute a shell command."""
|
|
526
|
+
command = step.target
|
|
527
|
+
|
|
528
|
+
# Check for dangerous command patterns using regex
|
|
529
|
+
is_dangerous, description = self._is_dangerous_command(command)
|
|
530
|
+
if is_dangerous:
|
|
531
|
+
return StepResult(
|
|
532
|
+
step=step,
|
|
533
|
+
status=ExecutionStatus.FAILED,
|
|
534
|
+
error=f"Blocked dangerous command: {description}",
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
if self.dry_run:
|
|
538
|
+
return StepResult(
|
|
539
|
+
step=step,
|
|
540
|
+
status=ExecutionStatus.SUCCESS,
|
|
541
|
+
output=f"[DRY RUN] Would run: {command}",
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
# Detect shell operators to determine execution mode
|
|
545
|
+
shell_operators = ['|', '&&', '||', '>', '<', '>>', '<<', ';', '`', '$(']
|
|
546
|
+
requires_shell = any(op in command for op in shell_operators)
|
|
547
|
+
|
|
548
|
+
try:
|
|
549
|
+
if requires_shell:
|
|
550
|
+
# Command contains shell operators, must use shell=True
|
|
551
|
+
result = subprocess.run(
|
|
552
|
+
command,
|
|
553
|
+
shell=True,
|
|
554
|
+
cwd=self.repo_path,
|
|
555
|
+
capture_output=True,
|
|
556
|
+
text=True,
|
|
557
|
+
timeout=self.command_timeout,
|
|
558
|
+
)
|
|
559
|
+
else:
|
|
560
|
+
# Safe to use shell=False with parsed arguments
|
|
561
|
+
try:
|
|
562
|
+
argv = shlex.split(command)
|
|
563
|
+
result = subprocess.run(
|
|
564
|
+
argv,
|
|
565
|
+
shell=False,
|
|
566
|
+
cwd=self.repo_path,
|
|
567
|
+
capture_output=True,
|
|
568
|
+
text=True,
|
|
569
|
+
timeout=self.command_timeout,
|
|
570
|
+
)
|
|
571
|
+
except ValueError:
|
|
572
|
+
# shlex.split failed (malformed command), fall back to shell=True
|
|
573
|
+
result = subprocess.run(
|
|
574
|
+
command,
|
|
575
|
+
shell=True,
|
|
576
|
+
cwd=self.repo_path,
|
|
577
|
+
capture_output=True,
|
|
578
|
+
text=True,
|
|
579
|
+
timeout=self.command_timeout,
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
if result.returncode == 0:
|
|
583
|
+
return StepResult(
|
|
584
|
+
step=step,
|
|
585
|
+
status=ExecutionStatus.SUCCESS,
|
|
586
|
+
output=result.stdout[:2000] if result.stdout else "Command completed",
|
|
587
|
+
)
|
|
588
|
+
else:
|
|
589
|
+
return StepResult(
|
|
590
|
+
step=step,
|
|
591
|
+
status=ExecutionStatus.FAILED,
|
|
592
|
+
output=result.stdout[:1000] if result.stdout else "",
|
|
593
|
+
error=result.stderr[:1000] if result.stderr else f"Exit code: {result.returncode}",
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
except subprocess.TimeoutExpired:
|
|
597
|
+
return StepResult(
|
|
598
|
+
step=step,
|
|
599
|
+
status=ExecutionStatus.FAILED,
|
|
600
|
+
error=f"Command timed out after {self.command_timeout}s",
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
def _execute_verification(self, step: PlanStep) -> StepResult:
|
|
604
|
+
"""Execute a verification step (tests, linting, file checks).
|
|
605
|
+
|
|
606
|
+
Handles different verification scenarios:
|
|
607
|
+
- If target is a .py file: verify it exists and has valid syntax
|
|
608
|
+
- If target looks like a command: run it as shell command
|
|
609
|
+
- Otherwise: check if target file/path exists
|
|
610
|
+
"""
|
|
611
|
+
target = step.target
|
|
612
|
+
|
|
613
|
+
# Check for common planner mistakes - these are step types, not valid targets
|
|
614
|
+
invalid_targets = ("shell_command", "file_edit", "file_create", "verification")
|
|
615
|
+
if target in invalid_targets:
|
|
616
|
+
return StepResult(
|
|
617
|
+
step=step,
|
|
618
|
+
status=ExecutionStatus.FAILED,
|
|
619
|
+
error=(
|
|
620
|
+
f"Invalid verification target '{target}'. "
|
|
621
|
+
f"The target should be a command to run (e.g., 'python script.py --help') "
|
|
622
|
+
f"or a file path to verify, not the step type name."
|
|
623
|
+
),
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
# If target is a Python file, verify it exists and check syntax
|
|
627
|
+
if target.endswith(".py"):
|
|
628
|
+
file_path = self.repo_path / target
|
|
629
|
+
if not file_path.exists():
|
|
630
|
+
return StepResult(
|
|
631
|
+
step=step,
|
|
632
|
+
status=ExecutionStatus.FAILED,
|
|
633
|
+
error=f"File not found: {target}",
|
|
634
|
+
)
|
|
635
|
+
|
|
636
|
+
# Verify Python syntax
|
|
637
|
+
try:
|
|
638
|
+
import ast
|
|
639
|
+
content = file_path.read_text()
|
|
640
|
+
ast.parse(content)
|
|
641
|
+
return StepResult(
|
|
642
|
+
step=step,
|
|
643
|
+
status=ExecutionStatus.SUCCESS,
|
|
644
|
+
output=f"Verified: {target} exists and has valid Python syntax",
|
|
645
|
+
)
|
|
646
|
+
except SyntaxError as e:
|
|
647
|
+
return StepResult(
|
|
648
|
+
step=step,
|
|
649
|
+
status=ExecutionStatus.FAILED,
|
|
650
|
+
error=f"Syntax error in {target}: {e}",
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
# If target looks like a command (contains spaces or starts with known commands)
|
|
654
|
+
command_prefixes = ("python", "pytest", "ruff", "npm", "make", "bash", "sh")
|
|
655
|
+
if " " in target or target.split()[0] in command_prefixes:
|
|
656
|
+
return self._execute_shell_command(step)
|
|
657
|
+
|
|
658
|
+
# Otherwise just check if the path exists
|
|
659
|
+
target_path = self.repo_path / target
|
|
660
|
+
if target_path.exists():
|
|
661
|
+
return StepResult(
|
|
662
|
+
step=step,
|
|
663
|
+
status=ExecutionStatus.SUCCESS,
|
|
664
|
+
output=f"Verified: {target} exists",
|
|
665
|
+
)
|
|
666
|
+
else:
|
|
667
|
+
return StepResult(
|
|
668
|
+
step=step,
|
|
669
|
+
status=ExecutionStatus.FAILED,
|
|
670
|
+
error=f"Path not found: {target}",
|
|
671
|
+
)
|
|
672
|
+
|
|
673
|
+
def _generate_file_content(
|
|
674
|
+
self,
|
|
675
|
+
step: PlanStep,
|
|
676
|
+
context: TaskContext,
|
|
677
|
+
) -> str:
|
|
678
|
+
"""Generate content for a new file using LLM."""
|
|
679
|
+
prompt = self._build_generation_prompt(step, context)
|
|
680
|
+
|
|
681
|
+
response = self.llm.complete(
|
|
682
|
+
messages=[{"role": "user", "content": prompt}],
|
|
683
|
+
purpose=Purpose.EXECUTION,
|
|
684
|
+
system=CODE_GENERATION_PROMPT,
|
|
685
|
+
max_tokens=4096,
|
|
686
|
+
temperature=0.0,
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
# Clean up response - remove markdown code blocks if present
|
|
690
|
+
content = response.content.strip()
|
|
691
|
+
if content.startswith("```"):
|
|
692
|
+
lines = content.split("\n")
|
|
693
|
+
# Remove first and last lines (``` markers)
|
|
694
|
+
if lines[0].startswith("```"):
|
|
695
|
+
lines = lines[1:]
|
|
696
|
+
if lines and lines[-1].strip() == "```":
|
|
697
|
+
lines = lines[:-1]
|
|
698
|
+
content = "\n".join(lines)
|
|
699
|
+
|
|
700
|
+
return content
|
|
701
|
+
|
|
702
|
+
def _generate_edit_content(
|
|
703
|
+
self,
|
|
704
|
+
step: PlanStep,
|
|
705
|
+
context: TaskContext,
|
|
706
|
+
original_content: str,
|
|
707
|
+
) -> str:
|
|
708
|
+
"""Generate edited file content using LLM."""
|
|
709
|
+
prompt = self._build_edit_prompt(step, context, original_content)
|
|
710
|
+
|
|
711
|
+
response = self.llm.complete(
|
|
712
|
+
messages=[{"role": "user", "content": prompt}],
|
|
713
|
+
purpose=Purpose.EXECUTION,
|
|
714
|
+
system=EDIT_GENERATION_PROMPT,
|
|
715
|
+
max_tokens=8192,
|
|
716
|
+
temperature=0.0,
|
|
717
|
+
)
|
|
718
|
+
|
|
719
|
+
# Clean up response
|
|
720
|
+
content = response.content.strip()
|
|
721
|
+
if content.startswith("```"):
|
|
722
|
+
lines = content.split("\n")
|
|
723
|
+
if lines[0].startswith("```"):
|
|
724
|
+
lines = lines[1:]
|
|
725
|
+
if lines and lines[-1].strip() == "```":
|
|
726
|
+
lines = lines[:-1]
|
|
727
|
+
content = "\n".join(lines)
|
|
728
|
+
|
|
729
|
+
return content
|
|
730
|
+
|
|
731
|
+
def _build_generation_prompt(
|
|
732
|
+
self,
|
|
733
|
+
step: PlanStep,
|
|
734
|
+
context: TaskContext,
|
|
735
|
+
) -> str:
|
|
736
|
+
"""Build prompt for file generation."""
|
|
737
|
+
sections = [
|
|
738
|
+
f"## Task: {context.task.title}",
|
|
739
|
+
f"## File to Create: {step.target}",
|
|
740
|
+
f"## Purpose: {step.description}",
|
|
741
|
+
]
|
|
742
|
+
|
|
743
|
+
if step.details:
|
|
744
|
+
sections.append(f"## Details: {step.details}")
|
|
745
|
+
|
|
746
|
+
if context.prd:
|
|
747
|
+
sections.append(f"## Requirements:\n{context.prd.content[:2000]}")
|
|
748
|
+
|
|
749
|
+
# Include relevant existing files for context
|
|
750
|
+
if context.loaded_files:
|
|
751
|
+
sections.append("## Related Files:")
|
|
752
|
+
for f in context.loaded_files[:3]:
|
|
753
|
+
sections.append(f"### {f.path}")
|
|
754
|
+
sections.append(f"```\n{f.content[:1500]}\n```")
|
|
755
|
+
|
|
756
|
+
sections.append("\nGenerate the file content:")
|
|
757
|
+
|
|
758
|
+
return "\n\n".join(sections)
|
|
759
|
+
|
|
760
|
+
def _build_edit_prompt(
|
|
761
|
+
self,
|
|
762
|
+
step: PlanStep,
|
|
763
|
+
context: TaskContext,
|
|
764
|
+
original_content: str,
|
|
765
|
+
) -> str:
|
|
766
|
+
"""Build prompt for file editing."""
|
|
767
|
+
sections = [
|
|
768
|
+
f"## Task: {context.task.title}",
|
|
769
|
+
f"## File to Edit: {step.target}",
|
|
770
|
+
f"## Change Required: {step.description}",
|
|
771
|
+
]
|
|
772
|
+
|
|
773
|
+
if step.details:
|
|
774
|
+
sections.append(f"## Details: {step.details}")
|
|
775
|
+
|
|
776
|
+
sections.append(f"## Current File Content:\n```\n{original_content[:6000]}\n```")
|
|
777
|
+
|
|
778
|
+
sections.append("\nGenerate the complete updated file content:")
|
|
779
|
+
|
|
780
|
+
return "\n\n".join(sections)
|
|
781
|
+
|
|
782
|
+
def _dependencies_satisfied(
|
|
783
|
+
self,
|
|
784
|
+
step: PlanStep,
|
|
785
|
+
previous_results: list[StepResult],
|
|
786
|
+
) -> bool:
|
|
787
|
+
"""Check if a step's dependencies are satisfied."""
|
|
788
|
+
if not step.depends_on:
|
|
789
|
+
return True
|
|
790
|
+
|
|
791
|
+
for dep_index in step.depends_on:
|
|
792
|
+
# Find the result for this dependency
|
|
793
|
+
dep_result = None
|
|
794
|
+
for result in previous_results:
|
|
795
|
+
if result.step.index == dep_index:
|
|
796
|
+
dep_result = result
|
|
797
|
+
break
|
|
798
|
+
|
|
799
|
+
if dep_result is None:
|
|
800
|
+
return False # Dependency not executed yet
|
|
801
|
+
|
|
802
|
+
if dep_result.status != ExecutionStatus.SUCCESS:
|
|
803
|
+
return False # Dependency failed
|
|
804
|
+
|
|
805
|
+
return True
|
|
806
|
+
|
|
807
|
+
def rollback(self) -> list[str]:
|
|
808
|
+
"""Rollback all changes made by this executor.
|
|
809
|
+
|
|
810
|
+
Returns:
|
|
811
|
+
List of files that were rolled back
|
|
812
|
+
"""
|
|
813
|
+
rolled_back = []
|
|
814
|
+
|
|
815
|
+
# Process changes in reverse order
|
|
816
|
+
for change in reversed(self.changes):
|
|
817
|
+
file_path = self.repo_path / change.path
|
|
818
|
+
|
|
819
|
+
try:
|
|
820
|
+
if change.operation == "create":
|
|
821
|
+
# Delete the created file
|
|
822
|
+
if file_path.exists():
|
|
823
|
+
file_path.unlink()
|
|
824
|
+
rolled_back.append(f"Deleted: {change.path}")
|
|
825
|
+
|
|
826
|
+
elif change.operation == "edit":
|
|
827
|
+
# Restore original content
|
|
828
|
+
if change.original_content is not None:
|
|
829
|
+
file_path.write_text(change.original_content, encoding="utf-8")
|
|
830
|
+
rolled_back.append(f"Restored: {change.path}")
|
|
831
|
+
|
|
832
|
+
elif change.operation == "delete":
|
|
833
|
+
# Recreate the deleted file
|
|
834
|
+
if change.original_content is not None:
|
|
835
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
836
|
+
file_path.write_text(change.original_content, encoding="utf-8")
|
|
837
|
+
rolled_back.append(f"Recreated: {change.path}")
|
|
838
|
+
|
|
839
|
+
except Exception as e:
|
|
840
|
+
rolled_back.append(f"Failed to rollback {change.path}: {e}")
|
|
841
|
+
|
|
842
|
+
self.changes.clear()
|
|
843
|
+
return rolled_back
|
|
844
|
+
|
|
845
|
+
# ========================================================================
|
|
846
|
+
# Async Methods with Event Publishing
|
|
847
|
+
# ========================================================================
|
|
848
|
+
|
|
849
|
+
async def _publish_event(self, task_id: str, event) -> None:
|
|
850
|
+
"""Publish an event if publisher is configured.
|
|
851
|
+
|
|
852
|
+
Args:
|
|
853
|
+
task_id: Task ID for the event
|
|
854
|
+
event: Event to publish
|
|
855
|
+
"""
|
|
856
|
+
if self.event_publisher is not None:
|
|
857
|
+
await self.event_publisher.publish(task_id, event)
|
|
858
|
+
|
|
859
|
+
async def execute_step_async(
|
|
860
|
+
self,
|
|
861
|
+
step: PlanStep,
|
|
862
|
+
context: TaskContext,
|
|
863
|
+
task_id: str,
|
|
864
|
+
) -> StepResult:
|
|
865
|
+
"""Execute a single plan step asynchronously with event publishing.
|
|
866
|
+
|
|
867
|
+
Args:
|
|
868
|
+
step: Step to execute
|
|
869
|
+
context: Task context for code generation
|
|
870
|
+
task_id: Task ID for event publishing
|
|
871
|
+
|
|
872
|
+
Returns:
|
|
873
|
+
StepResult with execution outcome
|
|
874
|
+
"""
|
|
875
|
+
from codeframe.core.models import OutputEvent, ErrorEvent
|
|
876
|
+
|
|
877
|
+
# Note: ProgressEvent is emitted by execute_plan_async() before calling this method,
|
|
878
|
+
# so we don't emit one here to avoid duplicates with incorrect total_steps
|
|
879
|
+
|
|
880
|
+
# Execute the step (sync operation, run in thread pool)
|
|
881
|
+
loop = asyncio.get_running_loop()
|
|
882
|
+
result = await loop.run_in_executor(
|
|
883
|
+
None,
|
|
884
|
+
lambda: self.execute_step(step, context),
|
|
885
|
+
)
|
|
886
|
+
|
|
887
|
+
# Publish output event for successful commands
|
|
888
|
+
if result.status == ExecutionStatus.SUCCESS and result.output:
|
|
889
|
+
from codeframe.core.streaming import SSE_OUTPUT_MAX_CHARS
|
|
890
|
+
|
|
891
|
+
output = result.output
|
|
892
|
+
truncated = len(output) > SSE_OUTPUT_MAX_CHARS
|
|
893
|
+
if truncated:
|
|
894
|
+
output = output[:SSE_OUTPUT_MAX_CHARS] + f"\n... (truncated, {len(result.output)} total chars)"
|
|
895
|
+
|
|
896
|
+
await self._publish_event(
|
|
897
|
+
task_id,
|
|
898
|
+
OutputEvent(
|
|
899
|
+
task_id=task_id,
|
|
900
|
+
stream="stdout",
|
|
901
|
+
line=output,
|
|
902
|
+
),
|
|
903
|
+
)
|
|
904
|
+
|
|
905
|
+
# Publish error event for failures
|
|
906
|
+
if result.status == ExecutionStatus.FAILED:
|
|
907
|
+
await self._publish_event(
|
|
908
|
+
task_id,
|
|
909
|
+
ErrorEvent(
|
|
910
|
+
task_id=task_id,
|
|
911
|
+
error_type="step_failed",
|
|
912
|
+
error=result.error or f"Step {step.index} execution failed",
|
|
913
|
+
),
|
|
914
|
+
)
|
|
915
|
+
|
|
916
|
+
return result
|
|
917
|
+
|
|
918
|
+
async def execute_plan_async(
|
|
919
|
+
self,
|
|
920
|
+
plan: ImplementationPlan,
|
|
921
|
+
context: TaskContext,
|
|
922
|
+
) -> ExecutionResult:
|
|
923
|
+
"""Execute all steps in a plan asynchronously with event publishing.
|
|
924
|
+
|
|
925
|
+
Args:
|
|
926
|
+
plan: Plan to execute
|
|
927
|
+
context: Task context for code generation
|
|
928
|
+
|
|
929
|
+
Returns:
|
|
930
|
+
ExecutionResult with all step results
|
|
931
|
+
"""
|
|
932
|
+
from codeframe.core.models import ProgressEvent, CompletionEvent, ErrorEvent
|
|
933
|
+
|
|
934
|
+
task_id = plan.task_id
|
|
935
|
+
results = []
|
|
936
|
+
success = True
|
|
937
|
+
start_time = datetime.now(timezone.utc)
|
|
938
|
+
total_steps = len(plan.steps)
|
|
939
|
+
|
|
940
|
+
for i, step in enumerate(plan.steps, 1):
|
|
941
|
+
# Publish progress for each step
|
|
942
|
+
await self._publish_event(
|
|
943
|
+
task_id,
|
|
944
|
+
ProgressEvent(
|
|
945
|
+
task_id=task_id,
|
|
946
|
+
phase="execution",
|
|
947
|
+
step=i,
|
|
948
|
+
total_steps=total_steps,
|
|
949
|
+
message=f"Step {i}/{total_steps}: {step.description}",
|
|
950
|
+
),
|
|
951
|
+
)
|
|
952
|
+
|
|
953
|
+
# Check dependencies
|
|
954
|
+
if not self._dependencies_satisfied(step, results):
|
|
955
|
+
results.append(StepResult(
|
|
956
|
+
step=step,
|
|
957
|
+
status=ExecutionStatus.SKIPPED,
|
|
958
|
+
output="Dependencies not satisfied",
|
|
959
|
+
))
|
|
960
|
+
continue
|
|
961
|
+
|
|
962
|
+
# Execute the step
|
|
963
|
+
result = await self.execute_step_async(step, context, task_id)
|
|
964
|
+
results.append(result)
|
|
965
|
+
|
|
966
|
+
if result.status == ExecutionStatus.FAILED:
|
|
967
|
+
success = False
|
|
968
|
+
break # Stop on first failure
|
|
969
|
+
|
|
970
|
+
end_time = datetime.now(timezone.utc)
|
|
971
|
+
duration_seconds = (end_time - start_time).total_seconds()
|
|
972
|
+
duration_ms = int(duration_seconds * 1000)
|
|
973
|
+
|
|
974
|
+
# Publish completion event
|
|
975
|
+
files_modified = [c.path for c in self.changes]
|
|
976
|
+
if success:
|
|
977
|
+
await self._publish_event(
|
|
978
|
+
task_id,
|
|
979
|
+
CompletionEvent(
|
|
980
|
+
task_id=task_id,
|
|
981
|
+
status="completed",
|
|
982
|
+
duration_seconds=duration_seconds,
|
|
983
|
+
files_modified=files_modified,
|
|
984
|
+
),
|
|
985
|
+
)
|
|
986
|
+
else:
|
|
987
|
+
failed_step = results[-1] if results else None
|
|
988
|
+
error_msg = failed_step.error if failed_step else "Plan execution failed"
|
|
989
|
+
if failed_step:
|
|
990
|
+
error_msg = f"Step {failed_step.step.index} failed: {error_msg}"
|
|
991
|
+
await self._publish_event(
|
|
992
|
+
task_id,
|
|
993
|
+
ErrorEvent(
|
|
994
|
+
task_id=task_id,
|
|
995
|
+
error_type="plan_failed",
|
|
996
|
+
error=error_msg,
|
|
997
|
+
),
|
|
998
|
+
)
|
|
999
|
+
|
|
1000
|
+
return ExecutionResult(
|
|
1001
|
+
plan=plan,
|
|
1002
|
+
step_results=results,
|
|
1003
|
+
success=success,
|
|
1004
|
+
total_duration_ms=duration_ms,
|
|
1005
|
+
)
|