codeframe-ai 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. codeframe/__init__.py +11 -0
  2. codeframe/__main__.py +20 -0
  3. codeframe/adapters/__init__.py +5 -0
  4. codeframe/adapters/e2b/__init__.py +13 -0
  5. codeframe/adapters/e2b/adapter.py +342 -0
  6. codeframe/adapters/e2b/budget.py +71 -0
  7. codeframe/adapters/e2b/credential_scanner.py +134 -0
  8. codeframe/adapters/llm/__init__.py +92 -0
  9. codeframe/adapters/llm/anthropic.py +414 -0
  10. codeframe/adapters/llm/base.py +444 -0
  11. codeframe/adapters/llm/mock.py +281 -0
  12. codeframe/adapters/llm/openai.py +483 -0
  13. codeframe/agents/__init__.py +8 -0
  14. codeframe/agents/dependency_resolver.py +714 -0
  15. codeframe/auth/__init__.py +16 -0
  16. codeframe/auth/api_key_router.py +238 -0
  17. codeframe/auth/api_keys.py +156 -0
  18. codeframe/auth/dependencies.py +358 -0
  19. codeframe/auth/manager.py +178 -0
  20. codeframe/auth/models.py +30 -0
  21. codeframe/auth/router.py +93 -0
  22. codeframe/auth/schemas.py +15 -0
  23. codeframe/auth/scopes.py +53 -0
  24. codeframe/cli/__init__.py +12 -0
  25. codeframe/cli/__main__.py +20 -0
  26. codeframe/cli/api_client.py +275 -0
  27. codeframe/cli/app.py +5688 -0
  28. codeframe/cli/auth.py +122 -0
  29. codeframe/cli/auth_commands.py +958 -0
  30. codeframe/cli/commands/__init__.py +5 -0
  31. codeframe/cli/config_commands.py +79 -0
  32. codeframe/cli/dashboard_commands.py +67 -0
  33. codeframe/cli/engines_commands.py +205 -0
  34. codeframe/cli/env_commands.py +409 -0
  35. codeframe/cli/helpers.py +56 -0
  36. codeframe/cli/hooks_commands.py +208 -0
  37. codeframe/cli/import_commands.py +129 -0
  38. codeframe/cli/pr_commands.py +549 -0
  39. codeframe/cli/proof_commands.py +415 -0
  40. codeframe/cli/stats_commands.py +311 -0
  41. codeframe/cli/telemetry_runtime.py +153 -0
  42. codeframe/cli/validators.py +123 -0
  43. codeframe/config/rate_limits.py +165 -0
  44. codeframe/core/__init__.py +15 -0
  45. codeframe/core/adapters/__init__.py +43 -0
  46. codeframe/core/adapters/agent_adapter.py +114 -0
  47. codeframe/core/adapters/builtin.py +326 -0
  48. codeframe/core/adapters/claude_code.py +62 -0
  49. codeframe/core/adapters/codex.py +393 -0
  50. codeframe/core/adapters/git_utils.py +40 -0
  51. codeframe/core/adapters/kilocode.py +126 -0
  52. codeframe/core/adapters/opencode.py +48 -0
  53. codeframe/core/adapters/streaming_chat.py +483 -0
  54. codeframe/core/adapters/subprocess_adapter.py +213 -0
  55. codeframe/core/adapters/verification_wrapper.py +269 -0
  56. codeframe/core/agent.py +2183 -0
  57. codeframe/core/agents_config.py +569 -0
  58. codeframe/core/api_key_service.py +211 -0
  59. codeframe/core/artifacts.py +428 -0
  60. codeframe/core/blocker_detection.py +218 -0
  61. codeframe/core/blockers.py +433 -0
  62. codeframe/core/checkpoints.py +481 -0
  63. codeframe/core/conductor.py +2255 -0
  64. codeframe/core/config.py +827 -0
  65. codeframe/core/config_watcher.py +268 -0
  66. codeframe/core/context.py +542 -0
  67. codeframe/core/context_packager.py +234 -0
  68. codeframe/core/credentials.py +735 -0
  69. codeframe/core/dependency_analyzer.py +229 -0
  70. codeframe/core/dependency_graph.py +290 -0
  71. codeframe/core/diagnostic_agent.py +712 -0
  72. codeframe/core/diagnostics.py +616 -0
  73. codeframe/core/editor.py +556 -0
  74. codeframe/core/engine_registry.py +256 -0
  75. codeframe/core/engine_stats.py +231 -0
  76. codeframe/core/environment.py +697 -0
  77. codeframe/core/events.py +375 -0
  78. codeframe/core/executor.py +1005 -0
  79. codeframe/core/fix_tracker.py +480 -0
  80. codeframe/core/gates.py +1322 -0
  81. codeframe/core/git.py +477 -0
  82. codeframe/core/github_connect_service.py +178 -0
  83. codeframe/core/github_integration_config.py +118 -0
  84. codeframe/core/github_issues_service.py +449 -0
  85. codeframe/core/hooks.py +184 -0
  86. codeframe/core/importers/__init__.py +1 -0
  87. codeframe/core/importers/ralph.py +540 -0
  88. codeframe/core/installer.py +650 -0
  89. codeframe/core/models.py +1026 -0
  90. codeframe/core/notifications_config.py +183 -0
  91. codeframe/core/planner.py +437 -0
  92. codeframe/core/prd.py +670 -0
  93. codeframe/core/prd_discovery.py +1118 -0
  94. codeframe/core/prd_stress_test.py +499 -0
  95. codeframe/core/progress.py +126 -0
  96. codeframe/core/proof/__init__.py +34 -0
  97. codeframe/core/proof/capture.py +79 -0
  98. codeframe/core/proof/evidence.py +56 -0
  99. codeframe/core/proof/ledger.py +574 -0
  100. codeframe/core/proof/models.py +162 -0
  101. codeframe/core/proof/obligations.py +103 -0
  102. codeframe/core/proof/runner.py +233 -0
  103. codeframe/core/proof/scope.py +81 -0
  104. codeframe/core/proof/stubs.py +156 -0
  105. codeframe/core/quick_fixes.py +558 -0
  106. codeframe/core/react_agent.py +1650 -0
  107. codeframe/core/reconciliation.py +183 -0
  108. codeframe/core/replay.py +788 -0
  109. codeframe/core/review.py +285 -0
  110. codeframe/core/runtime.py +1134 -0
  111. codeframe/core/sandbox/__init__.py +27 -0
  112. codeframe/core/sandbox/context.py +98 -0
  113. codeframe/core/sandbox/worktree.py +20 -0
  114. codeframe/core/schedule.py +396 -0
  115. codeframe/core/stall_detector.py +71 -0
  116. codeframe/core/stall_monitor.py +134 -0
  117. codeframe/core/state_machine.py +121 -0
  118. codeframe/core/streaming.py +502 -0
  119. codeframe/core/task_tree.py +400 -0
  120. codeframe/core/tasks.py +1022 -0
  121. codeframe/core/telemetry.py +232 -0
  122. codeframe/core/templates.py +221 -0
  123. codeframe/core/tools.py +942 -0
  124. codeframe/core/workspace.py +887 -0
  125. codeframe/core/worktrees.py +276 -0
  126. codeframe/git/__init__.py +5 -0
  127. codeframe/git/github_integration.py +505 -0
  128. codeframe/lib/__init__.py +0 -0
  129. codeframe/lib/audit_logger.py +248 -0
  130. codeframe/lib/metrics_tracker.py +800 -0
  131. codeframe/lib/quality/__init__.py +7 -0
  132. codeframe/lib/quality/complexity_analyzer.py +316 -0
  133. codeframe/lib/quality/owasp_patterns.py +284 -0
  134. codeframe/lib/quality/security_scanner.py +250 -0
  135. codeframe/lib/rate_limiter.py +312 -0
  136. codeframe/notifications/__init__.py +0 -0
  137. codeframe/notifications/webhook.py +380 -0
  138. codeframe/planning/__init__.py +30 -0
  139. codeframe/planning/issue_generator.py +219 -0
  140. codeframe/planning/prd_template_functions.py +137 -0
  141. codeframe/planning/prd_templates.py +975 -0
  142. codeframe/planning/task_scheduler.py +511 -0
  143. codeframe/planning/task_templates.py +533 -0
  144. codeframe/platform_store/__init__.py +5 -0
  145. codeframe/platform_store/database.py +277 -0
  146. codeframe/platform_store/repositories/__init__.py +24 -0
  147. codeframe/platform_store/repositories/api_key_repository.py +245 -0
  148. codeframe/platform_store/repositories/audit_repository.py +67 -0
  149. codeframe/platform_store/repositories/base.py +295 -0
  150. codeframe/platform_store/repositories/interactive_sessions.py +165 -0
  151. codeframe/platform_store/repositories/token_repository.py +598 -0
  152. codeframe/platform_store/repositories/workspace_registry_repository.py +175 -0
  153. codeframe/platform_store/schema_manager.py +321 -0
  154. codeframe/templates/AGENTS.md.default +94 -0
  155. codeframe/tui/__init__.py +5 -0
  156. codeframe/tui/app.py +256 -0
  157. codeframe/tui/data_service.py +103 -0
  158. codeframe/ui/__init__.py +0 -0
  159. codeframe/ui/dependencies.py +103 -0
  160. codeframe/ui/models.py +999 -0
  161. codeframe/ui/response_models.py +201 -0
  162. codeframe/ui/routers/__init__.py +5 -0
  163. codeframe/ui/routers/_helpers.py +29 -0
  164. codeframe/ui/routers/batches_v2.py +315 -0
  165. codeframe/ui/routers/blockers_v2.py +320 -0
  166. codeframe/ui/routers/checkpoints_v2.py +310 -0
  167. codeframe/ui/routers/costs_v2.py +322 -0
  168. codeframe/ui/routers/diagnose_v2.py +225 -0
  169. codeframe/ui/routers/discovery_v2.py +417 -0
  170. codeframe/ui/routers/environment_v2.py +284 -0
  171. codeframe/ui/routers/events_v2.py +75 -0
  172. codeframe/ui/routers/gates_v2.py +166 -0
  173. codeframe/ui/routers/git_v2.py +284 -0
  174. codeframe/ui/routers/github_integrations_v2.py +532 -0
  175. codeframe/ui/routers/interactive_sessions_v2.py +238 -0
  176. codeframe/ui/routers/pr_v2.py +709 -0
  177. codeframe/ui/routers/prd_v2.py +695 -0
  178. codeframe/ui/routers/proof_v2.py +755 -0
  179. codeframe/ui/routers/review_v2.py +360 -0
  180. codeframe/ui/routers/schedule_v2.py +214 -0
  181. codeframe/ui/routers/session_chat_ws.py +354 -0
  182. codeframe/ui/routers/settings_v2.py +562 -0
  183. codeframe/ui/routers/streaming_v2.py +155 -0
  184. codeframe/ui/routers/tasks_v2.py +1098 -0
  185. codeframe/ui/routers/templates_v2.py +232 -0
  186. codeframe/ui/routers/terminal_ws.py +267 -0
  187. codeframe/ui/routers/workspace_v2.py +527 -0
  188. codeframe/ui/server.py +568 -0
  189. codeframe/ui/shared.py +241 -0
  190. codeframe/workspace/__init__.py +5 -0
  191. codeframe/workspace/manager.py +249 -0
  192. codeframe_ai-0.9.0.dist-info/METADATA +517 -0
  193. codeframe_ai-0.9.0.dist-info/RECORD +197 -0
  194. codeframe_ai-0.9.0.dist-info/WHEEL +5 -0
  195. codeframe_ai-0.9.0.dist-info/entry_points.txt +3 -0
  196. codeframe_ai-0.9.0.dist-info/licenses/LICENSE +661 -0
  197. codeframe_ai-0.9.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2183 @@
1
+ """Agent orchestrator for CodeFRAME v2.
2
+
3
+ Coordinates the full agent execution loop:
4
+ 1. Load context for task
5
+ 2. Generate implementation plan
6
+ 3. Execute plan steps
7
+ 4. Detect blockers when stuck
8
+ 5. Run verification gates
9
+ 6. Emit events throughout
10
+
11
+ This module is headless - no FastAPI or HTTP dependencies.
12
+ """
13
+
14
+ import re
15
+ import shlex
16
+ import subprocess
17
+ from dataclasses import dataclass, field
18
+ from datetime import datetime, timezone
19
+ from enum import Enum
20
+ from pathlib import Path
21
+ from typing import TYPE_CHECKING, Callable, Optional
22
+
23
+ from codeframe.adapters.llm import LLMProvider, Purpose
24
+ from codeframe.core import blockers, events
25
+ from codeframe.core.context import ContextLoader, TaskContext
26
+ from codeframe.core.events import EventType
27
+ from codeframe.core.executor import Executor, ExecutionStatus, StepResult
28
+ from codeframe.core.fix_tracker import EscalationDecision, FixAttemptTracker, FixOutcome
29
+ from codeframe.core.gates import run as run_gates, GateResult, GateStatus
30
+ from codeframe.core.planner import ImplementationPlan, Planner, PlanStep, StepType
31
+ from codeframe.core.quick_fixes import apply_quick_fix, find_quick_fix
32
+ from codeframe.core.workspace import Workspace
33
+
34
+ if TYPE_CHECKING:
35
+ from codeframe.core.conductor import GlobalFixCoordinator
36
+ from codeframe.core.streaming import EventPublisher, RunOutputLogger
37
+
38
+ # Safe shell commands that can be executed without full shell interpretation
39
+ SAFE_SHELL_COMMANDS = frozenset({
40
+ # Python tools
41
+ "python", "python3", "pytest", "ruff", "black", "mypy", "pip", "uv",
42
+ # Node tools
43
+ "npm", "node", "npx", "yarn", "pnpm",
44
+ # System tools
45
+ "ls", "cat", "head", "tail", "grep", "find", "mkdir", "touch", "cp", "mv",
46
+ # Git
47
+ "git",
48
+ # Testing
49
+ "jest", "vitest", "cargo",
50
+ })
51
+
52
+
53
+ def _extract_file_from_command(command: str) -> Optional[str]:
54
+ """Extract a file path from a verification command.
55
+
56
+ Examples:
57
+ "python task_tracker.py --help" -> "task_tracker.py"
58
+ "pytest tests/test_foo.py" -> "tests/test_foo.py"
59
+ "ruff check main.py" -> "main.py"
60
+ "python -m mymodule" -> None
61
+
62
+ Args:
63
+ command: The shell command to parse
64
+
65
+ Returns:
66
+ The file path if found, None otherwise
67
+ """
68
+ if not command:
69
+ return None
70
+
71
+ # Common patterns for Python file references
72
+ # Match .py files in the command
73
+ py_match = re.search(r'(\S+\.py)', command)
74
+ if py_match:
75
+ return py_match.group(1)
76
+
77
+ # No file found
78
+ return None
79
+
80
+
81
+ def _is_path_safe(file_path: Path, workspace_path: Path) -> tuple[bool, str]:
82
+ """Check if a file path is safely within the workspace.
83
+
84
+ Prevents path traversal attacks via '..' components.
85
+
86
+ Args:
87
+ file_path: The file path to check
88
+ workspace_path: The workspace root path
89
+
90
+ Returns:
91
+ Tuple of (is_safe, reason) where reason explains any rejection
92
+ """
93
+ try:
94
+ # Resolve both paths to handle symlinks and relative paths
95
+ resolved_file = file_path.resolve()
96
+ resolved_workspace = workspace_path.resolve()
97
+
98
+ # Check if the file is within the workspace
99
+ try:
100
+ resolved_file.relative_to(resolved_workspace)
101
+ return (True, "")
102
+ except ValueError:
103
+ return (False, f"Path escapes workspace: {file_path}")
104
+ except Exception as e:
105
+ return (False, f"Path resolution error: {e}")
106
+
107
+
108
+ def _parse_command_safely(command: str) -> tuple[list[str], bool, str]:
109
+ """Parse a shell command into an argument list for safe execution.
110
+
111
+ Args:
112
+ command: The shell command string
113
+
114
+ Returns:
115
+ Tuple of (argv_list, requires_shell, warning) where:
116
+ - argv_list: Parsed command arguments
117
+ - requires_shell: True if command needs shell interpretation
118
+ - warning: Non-empty if there are safety concerns
119
+ """
120
+ # Check for shell operators that require shell=True
121
+ shell_operators = ['|', '&&', '||', '>', '<', '>>', '<<', ';', '$', '`', '$(']
122
+ has_shell_operators = any(op in command for op in shell_operators)
123
+
124
+ if has_shell_operators:
125
+ return ([], True, "Command contains shell operators")
126
+
127
+ try:
128
+ # Parse command into argv list
129
+ argv = shlex.split(command)
130
+ if not argv:
131
+ return ([], True, "Empty command")
132
+
133
+ # Check if the base command is in our safe list
134
+ base_cmd = Path(argv[0]).name # Handle paths like /usr/bin/python
135
+ if base_cmd not in SAFE_SHELL_COMMANDS:
136
+ return (argv, True, f"Command '{base_cmd}' not in safe list")
137
+
138
+ return (argv, False, "")
139
+ except ValueError as e:
140
+ # shlex.split failed (e.g., unclosed quotes)
141
+ return ([], True, f"Command parse error: {e}")
142
+
143
+
144
+ class AgentStatus(str, Enum):
145
+ """Current status of the agent."""
146
+
147
+ IDLE = "idle"
148
+ PLANNING = "planning"
149
+ EXECUTING = "executing"
150
+ BLOCKED = "blocked"
151
+ VERIFYING = "verifying"
152
+ COMPLETED = "completed"
153
+ FAILED = "failed"
154
+
155
+
156
+ class FixScope(str, Enum):
157
+ """Scope of a proposed fix - determines coordination requirements.
158
+
159
+ LOCAL: Agent can execute autonomously (files it created, its own tests)
160
+ GLOBAL: Requires Conductor coordination (config files, installs, shared code)
161
+ """
162
+
163
+ LOCAL = "local"
164
+ GLOBAL = "global"
165
+
166
+
167
+ # Files that require global coordination when modified
168
+ GLOBAL_SCOPE_FILES = {
169
+ "pyproject.toml",
170
+ "package.json",
171
+ "tsconfig.json",
172
+ "Cargo.toml",
173
+ "go.mod",
174
+ "requirements.txt",
175
+ "setup.py",
176
+ "setup.cfg",
177
+ ".env",
178
+ ".env.example",
179
+ "Dockerfile",
180
+ "docker-compose.yml",
181
+ "Makefile",
182
+ }
183
+
184
+
185
+ @dataclass
186
+ class BlockerInfo:
187
+ """Information about a detected blocker.
188
+
189
+ Attributes:
190
+ reason: Why the agent is blocked
191
+ question: Question to ask the user
192
+ context: Additional context about the blocker
193
+ step_index: Which step caused the blocker (if any)
194
+ """
195
+
196
+ reason: str
197
+ question: str
198
+ context: str = ""
199
+ step_index: Optional[int] = None
200
+
201
+
202
+ @dataclass
203
+ class AgentState:
204
+ """Current state of the agent execution.
205
+
206
+ Attributes:
207
+ status: Current agent status
208
+ task_id: Task being executed
209
+ plan: Generated implementation plan
210
+ current_step: Current step index (0-based)
211
+ step_results: Results of executed steps
212
+ blocker: Current blocker (if any)
213
+ gate_results: Results of verification gates
214
+ attempt_count: Number of execution attempts
215
+ max_attempts: Maximum attempts before giving up
216
+ """
217
+
218
+ status: AgentStatus = AgentStatus.IDLE
219
+ task_id: str = ""
220
+ plan: Optional[ImplementationPlan] = None
221
+ current_step: int = 0
222
+ step_results: list[StepResult] = field(default_factory=list)
223
+ blocker: Optional[BlockerInfo] = None
224
+ gate_results: list[GateResult] = field(default_factory=list)
225
+ attempt_count: int = 0
226
+ max_attempts: int = 3
227
+
228
+ def to_dict(self) -> dict:
229
+ """Convert to dictionary for persistence."""
230
+ return {
231
+ "status": self.status.value,
232
+ "task_id": self.task_id,
233
+ "plan": self.plan.to_dict() if self.plan else None,
234
+ "current_step": self.current_step,
235
+ "step_results": [
236
+ {
237
+ "step_index": r.step.index,
238
+ "status": r.status.value,
239
+ "output": r.output,
240
+ "error": r.error,
241
+ }
242
+ for r in self.step_results
243
+ ],
244
+ "blocker": {
245
+ "reason": self.blocker.reason,
246
+ "question": self.blocker.question,
247
+ "context": self.blocker.context,
248
+ } if self.blocker else None,
249
+ "attempt_count": self.attempt_count,
250
+ }
251
+
252
+
253
+ # Blocker detection thresholds
254
+ MAX_CONSECUTIVE_FAILURES = 3
255
+ MAX_STEP_RETRIES = 2
256
+ MAX_SELF_CORRECTION_ATTEMPTS = 2
257
+ MAX_CONSECUTIVE_VERIFICATION_FAILURES = 3
258
+
259
+ # Pattern constants live in blocker_detection.py (authoritative location).
260
+ # Only import what Agent code actually uses.
261
+ from codeframe.core.blocker_detection import ( # noqa: E402
262
+ HUMAN_INPUT_PATTERNS,
263
+ TACTICAL_DECISION_PATTERNS,
264
+ TECHNICAL_ERROR_PATTERNS,
265
+ )
266
+
267
+
268
+ class Agent:
269
+ """Orchestrates task execution through the full agent loop.
270
+
271
+ The agent coordinates:
272
+ - Context loading and planning
273
+ - Step-by-step execution
274
+ - Blocker detection and creation
275
+ - Verification gate integration
276
+ - State management for pause/resume
277
+ """
278
+
279
+ def __init__(
280
+ self,
281
+ workspace: Workspace,
282
+ llm_provider: LLMProvider,
283
+ max_context_tokens: int = 100_000,
284
+ dry_run: bool = False,
285
+ on_event: Optional[Callable[[str, dict], None]] = None,
286
+ debug: bool = False,
287
+ verbose: bool = False,
288
+ fix_coordinator: Optional["GlobalFixCoordinator"] = None,
289
+ output_logger: Optional["RunOutputLogger"] = None,
290
+ event_publisher: Optional["EventPublisher"] = None,
291
+ ):
292
+ """Initialize the agent.
293
+
294
+ Args:
295
+ workspace: Target workspace
296
+ llm_provider: LLM provider for planning and code generation
297
+ max_context_tokens: Maximum tokens for context loading
298
+ dry_run: If True, don't make actual changes
299
+ on_event: Optional callback for agent events
300
+ debug: If True, write detailed debug log to workspace
301
+ verbose: If True, print detailed progress to stdout
302
+ fix_coordinator: Optional coordinator for global fixes (for parallel execution)
303
+ output_logger: Optional logger for streaming output to file (for cf work follow)
304
+ event_publisher: Optional EventPublisher for SSE streaming (for web clients)
305
+ """
306
+ self.workspace = workspace
307
+ self.llm = llm_provider
308
+ self.max_context_tokens = max_context_tokens
309
+ self.dry_run = dry_run
310
+ self.on_event = on_event
311
+ self.debug = debug
312
+ self.verbose = verbose
313
+ self.fix_coordinator = fix_coordinator
314
+ self.output_logger = output_logger
315
+ self.event_publisher = event_publisher
316
+
317
+ self.state = AgentState()
318
+ self.context: Optional[TaskContext] = None
319
+ self.executor: Optional[Executor] = None
320
+
321
+ # Fix attempt tracking for loop prevention and escalation
322
+ self.fix_tracker = FixAttemptTracker()
323
+
324
+ # Debug logging setup
325
+ self._debug_log_path: Optional[Path] = None
326
+ self._failure_count = 0 # Track failures for verbose logging
327
+ if debug:
328
+ self._setup_debug_log()
329
+
330
+ def _verbose_print(self, message: str) -> None:
331
+ """Print message to stdout (if verbose) and to output log file.
332
+
333
+ The output log file is always written to (if logger provided) to enable
334
+ streaming via `cf work follow`, even when verbose=False.
335
+
336
+ Args:
337
+ message: Message to print/log
338
+ """
339
+ # Print to stdout if verbose mode is enabled
340
+ if self.verbose:
341
+ print(message)
342
+
343
+ # Always write to output log if logger is provided (for cf work follow)
344
+ if self.output_logger:
345
+ self.output_logger.write(message + "\n")
346
+
347
+ def run(self, task_id: str) -> AgentState:
348
+ """Run the agent on a task.
349
+
350
+ This is the main entry point. It runs the full agent loop:
351
+ 1. Load context
352
+ 2. Plan implementation
353
+ 3. Execute steps
354
+ 4. Handle blockers and gates
355
+ 5. Complete or fail
356
+
357
+ Args:
358
+ task_id: Task to execute
359
+
360
+ Returns:
361
+ Final AgentState
362
+ """
363
+ self.state = AgentState(task_id=task_id, status=AgentStatus.IDLE)
364
+ self._emit_event("agent_started", {"task_id": task_id})
365
+
366
+ try:
367
+ # Load context
368
+ self._emit_event("loading_context", {"task_id": task_id})
369
+ self.context = self._load_context(task_id)
370
+
371
+ # Check for open blockers first
372
+ if self.context.open_blockers:
373
+ self._handle_existing_blockers()
374
+ return self.state
375
+
376
+ # Plan implementation
377
+ self.state.status = AgentStatus.PLANNING
378
+ self._emit_event("planning_started", {})
379
+ self.state.plan = self._create_plan()
380
+ self._emit_event("planning_completed", {
381
+ "steps": self.state.plan.total_steps,
382
+ "complexity": self.state.plan.estimated_complexity.value,
383
+ })
384
+
385
+ # Execute plan
386
+ self.state.status = AgentStatus.EXECUTING
387
+ self._execute_plan()
388
+
389
+ # Run final verification if execution succeeded
390
+ if self.state.status == AgentStatus.EXECUTING:
391
+ self._run_final_verification()
392
+
393
+ except Exception as e:
394
+ self.state.status = AgentStatus.FAILED
395
+ self._emit_event("agent_failed", {"error": str(e)})
396
+ raise
397
+
398
+ self._emit_event("agent_finished", {"status": self.state.status.value})
399
+ return self.state
400
+
401
+ def resume(self, task_id: str, state: AgentState) -> AgentState:
402
+ """Resume execution from a saved state.
403
+
404
+ Args:
405
+ task_id: Task to resume
406
+ state: Previous agent state
407
+
408
+ Returns:
409
+ Final AgentState
410
+ """
411
+ self.state = state
412
+ self._emit_event("agent_resumed", {"task_id": task_id, "step": state.current_step})
413
+
414
+ # Reload context
415
+ self.context = self._load_context(task_id)
416
+
417
+ # Check if blockers are now resolved
418
+ if self.state.status == AgentStatus.BLOCKED:
419
+ if not self.context.open_blockers:
420
+ # Blockers resolved, continue execution
421
+ self.state.status = AgentStatus.EXECUTING
422
+ self.state.blocker = None
423
+ self._execute_plan()
424
+ else:
425
+ # Still blocked
426
+ return self.state
427
+
428
+ # Run final verification if needed
429
+ if self.state.status == AgentStatus.EXECUTING:
430
+ self._run_final_verification()
431
+
432
+ self._emit_event("agent_finished", {"status": self.state.status.value})
433
+ return self.state
434
+
435
+ def _load_context(self, task_id: str) -> TaskContext:
436
+ """Load context for a task."""
437
+ loader = ContextLoader(self.workspace, max_tokens=self.max_context_tokens)
438
+ return loader.load(task_id)
439
+
440
+ def _create_plan(self) -> ImplementationPlan:
441
+ """Create implementation plan from context."""
442
+ planner = Planner(self.llm)
443
+ return planner.create_plan(self.context)
444
+
445
+ def _execute_plan(self) -> None:
446
+ """Execute the implementation plan step by step."""
447
+ if not self.state.plan:
448
+ raise ValueError("No plan to execute")
449
+
450
+ self.executor = Executor(
451
+ llm_provider=self.llm,
452
+ repo_path=self.workspace.repo_path,
453
+ dry_run=self.dry_run,
454
+ event_publisher=self.event_publisher,
455
+ )
456
+
457
+ consecutive_failures = 0
458
+ consecutive_verification_failures = 0
459
+
460
+ self._debug_log(
461
+ f"Starting plan execution with {len(self.state.plan.steps)} steps",
462
+ level="INFO",
463
+ always=True,
464
+ )
465
+
466
+ while self.state.current_step < len(self.state.plan.steps):
467
+ step = self.state.plan.steps[self.state.current_step]
468
+
469
+ self._debug_log(
470
+ f"=== STEP {step.index} ({step.type.value}) ===",
471
+ level="INFO",
472
+ data={
473
+ "target": step.target,
474
+ "description": step.description,
475
+ "details_length": len(step.details) if step.details else 0,
476
+ "current_step_index": self.state.current_step,
477
+ "consecutive_failures": consecutive_failures,
478
+ },
479
+ always=True,
480
+ )
481
+
482
+ self._emit_event("step_started", {
483
+ "step": step.index,
484
+ "type": step.type.value,
485
+ "target": step.target,
486
+ })
487
+
488
+ # Execute the step
489
+ result = self.executor.execute_step(step, self.context)
490
+ self.state.step_results.append(result)
491
+
492
+ self._debug_log(
493
+ f"Step {step.index} execution result: {result.status.value}",
494
+ level="INFO" if result.status == ExecutionStatus.SUCCESS else "WARN",
495
+ data={
496
+ "output_preview": result.output[:200] if result.output else None,
497
+ "error": result.error if result.error else None,
498
+ },
499
+ always=True,
500
+ )
501
+
502
+ if result.status == ExecutionStatus.SUCCESS:
503
+ consecutive_failures = 0
504
+ self._emit_event("step_completed", {
505
+ "step": step.index,
506
+ "output": result.output[:200],
507
+ })
508
+
509
+ # Run incremental verification for file changes
510
+ if step.type in {StepType.FILE_CREATE, StepType.FILE_EDIT}:
511
+ gate_result = self._run_incremental_verification()
512
+ if gate_result and gate_result.passed:
513
+ consecutive_verification_failures = 0
514
+ elif gate_result and not gate_result.passed:
515
+ # Try to fix lint issues automatically (works for style, not syntax)
516
+ if not self._try_auto_fix(gate_result):
517
+ # Auto-fix failed - need to self-correct the code
518
+ # Extract detailed error info from gate result
519
+ failed_checks = [
520
+ c for c in gate_result.checks
521
+ if c.status != GateStatus.PASSED
522
+ ]
523
+ failed_check_names = [c.name for c in failed_checks]
524
+
525
+ # Build detailed error string with actual output
526
+ error_details = []
527
+ for check in failed_checks:
528
+ if check.output:
529
+ error_details.append(
530
+ f"[{check.name}] {check.output[:500]}"
531
+ )
532
+ error_detail_str = (
533
+ "\n".join(error_details)
534
+ if error_details
535
+ else "No details available"
536
+ )
537
+
538
+ self._emit_event("verification_failed", {
539
+ "step": step.index,
540
+ "error": f"Verification failed: {failed_check_names}",
541
+ "gates": failed_check_names,
542
+ "error_count": len(failed_checks),
543
+ "error_details": error_detail_str[:1000],
544
+ })
545
+
546
+ failed_result = StepResult(
547
+ step=step,
548
+ status=ExecutionStatus.FAILED,
549
+ error=(
550
+ f"Verification failed: {failed_check_names}"
551
+ f"\n{error_detail_str}"
552
+ ),
553
+ )
554
+
555
+ # Try self-correction to fix the code
556
+ self_correction_attempts = 0
557
+ current_result = failed_result
558
+ self_correction_succeeded = False
559
+
560
+ while self_correction_attempts < MAX_SELF_CORRECTION_ATTEMPTS:
561
+ self_correction_attempts += 1
562
+ corrected_result = self._attempt_self_correction(
563
+ step, current_result, self_correction_attempts
564
+ )
565
+
566
+ if corrected_result is None:
567
+ break
568
+
569
+ if corrected_result.status == ExecutionStatus.SUCCESS:
570
+ # Re-verify the corrected code
571
+ recheck = self._run_incremental_verification()
572
+ if recheck is None or recheck.passed:
573
+ self._emit_event("step_completed", {
574
+ "step": step.index,
575
+ "output": "Code fixed via self-correction",
576
+ "self_corrected": True,
577
+ })
578
+ self_correction_succeeded = True
579
+ break
580
+
581
+ # Re-verification failed — preserve error context
582
+ # so next correction attempt knows what to fix
583
+ reverify_failed = [
584
+ c for c in recheck.checks
585
+ if c.status != GateStatus.PASSED
586
+ ]
587
+ reverify_errors = []
588
+ for check in reverify_failed:
589
+ if check.output:
590
+ reverify_errors.append(f"[{check.name}] {check.output[:500]}")
591
+ reverify_msg = "\n".join(reverify_errors) if reverify_errors else "Re-verification failed"
592
+ current_result = StepResult(
593
+ step=step,
594
+ status=ExecutionStatus.FAILED,
595
+ error=f"Re-verification after correction:\n{reverify_msg}",
596
+ )
597
+ continue
598
+
599
+ current_result = corrected_result
600
+
601
+ if self_correction_succeeded:
602
+ consecutive_verification_failures = 0
603
+ else:
604
+ # Couldn't fix the verification error
605
+ consecutive_verification_failures += 1
606
+ consecutive_failures += 1
607
+ if consecutive_verification_failures >= MAX_CONSECUTIVE_VERIFICATION_FAILURES:
608
+ self._debug_log(
609
+ f"ABORTING: Too many consecutive verification failures ({consecutive_verification_failures})",
610
+ level="ERROR",
611
+ always=True,
612
+ )
613
+ self._emit_event("execution_aborted", {
614
+ "reason": f"Too many consecutive verification failures ({consecutive_verification_failures})",
615
+ "step": step.index,
616
+ })
617
+ # Force blocker creation — bypass LLM classification
618
+ # since this is a definitive abort, not a tactical decision
619
+ error_msg = current_result.error if current_result else "Repeated verification failures"
620
+ blocker = blockers.create(
621
+ workspace=self.workspace,
622
+ question=f"Agent aborted: {consecutive_verification_failures} consecutive verification failures at step {step.index} ({step.description}). Last error: {error_msg[:500]}",
623
+ task_id=self.state.task_id,
624
+ created_by="agent",
625
+ )
626
+ self.state.status = AgentStatus.BLOCKED
627
+ self.state.blocker = BlockerInfo(
628
+ reason="Too many consecutive verification failures",
629
+ question=blocker.question,
630
+ context=f"Step {step.index}: {step.description}",
631
+ )
632
+ return
633
+ if consecutive_failures >= MAX_CONSECUTIVE_FAILURES:
634
+ self._create_blocker_from_failure(step, current_result)
635
+ return
636
+ # Otherwise, continue to next step with broken file
637
+
638
+ self.state.current_step += 1
639
+
640
+ elif result.status == ExecutionStatus.FAILED:
641
+ consecutive_failures += 1
642
+ self._failure_count += 1 # Track for debug logging verbosity
643
+
644
+ self._debug_log(
645
+ f"STEP FAILED: consecutive_failures={consecutive_failures}, total_failures={self._failure_count}",
646
+ level="WARN",
647
+ data={"error": result.error},
648
+ always=True,
649
+ )
650
+
651
+ self._emit_event("step_failed", {
652
+ "step": step.index,
653
+ "error": result.error[:200],
654
+ })
655
+
656
+ # Special handling for verification step failures
657
+ # When verification fails (e.g., syntax error), we need to fix the TARGET file
658
+ # not "self-correct" the verification step itself
659
+ if step.type == StepType.VERIFICATION:
660
+ # Extract the actual file path from the verification command
661
+ # e.g., "python task_tracker.py --help" -> "task_tracker.py"
662
+ file_path = _extract_file_from_command(step.target)
663
+
664
+ if file_path:
665
+ # Create a FILE_EDIT step to fix the target file
666
+ fix_step = PlanStep(
667
+ index=step.index,
668
+ type=StepType.FILE_EDIT,
669
+ target=file_path,
670
+ description=f"Fix {file_path} - {result.error[:100]}",
671
+ details=f"The verification command '{step.target}' failed with error: {result.error}. Fix this error in {file_path}.",
672
+ depends_on=[],
673
+ )
674
+ # Replace step with the fix step for self-correction
675
+ step = fix_step
676
+ else:
677
+ # Can't determine which file to fix, create blocker
678
+ self._debug_log(
679
+ f"Cannot extract file path from verification command: {step.target}",
680
+ level="WARN",
681
+ always=True,
682
+ )
683
+ self._create_blocker_from_failure(step, result)
684
+ return
685
+
686
+ # Classify the error
687
+ error_type = self._classify_error(result.error)
688
+
689
+ # For human-input-needed errors, create blocker immediately
690
+ if error_type == "human":
691
+ self._create_blocker_from_failure(step, result)
692
+ return
693
+
694
+ # For technical errors, try self-correction first
695
+ self_correction_attempts = 0
696
+ current_result = result
697
+ self_correction_succeeded = False
698
+
699
+ while self_correction_attempts < MAX_SELF_CORRECTION_ATTEMPTS:
700
+ self_correction_attempts += 1
701
+ corrected_result = self._attempt_self_correction(
702
+ step, current_result, self_correction_attempts
703
+ )
704
+
705
+ if corrected_result is None:
706
+ # Self-correction failed to even attempt, stop trying
707
+ break
708
+
709
+ if corrected_result.status == ExecutionStatus.SUCCESS:
710
+ # Self-correction worked! Update state and continue
711
+ self.state.step_results[-1] = corrected_result # Replace failed result
712
+ consecutive_failures = 0
713
+ self._emit_event("step_completed", {
714
+ "step": step.index,
715
+ "output": corrected_result.output[:200],
716
+ "self_corrected": True,
717
+ })
718
+
719
+ # Run incremental verification for file changes
720
+ if step.type in {StepType.FILE_CREATE, StepType.FILE_EDIT}:
721
+ gate_result = self._run_incremental_verification()
722
+ if gate_result and not gate_result.passed:
723
+ if not self._try_auto_fix(gate_result):
724
+ consecutive_failures += 1
725
+
726
+ self.state.current_step += 1
727
+ self_correction_succeeded = True
728
+ break
729
+
730
+ # Self-correction didn't succeed, try again
731
+ current_result = corrected_result
732
+
733
+ # Handle case where self-correction didn't succeed
734
+ if not self_correction_succeeded:
735
+ # Check if we should create a blocker
736
+ if self._should_create_blocker(
737
+ consecutive_failures, current_result, self_correction_attempts
738
+ ):
739
+ self._create_blocker_from_failure(step, current_result)
740
+ return
741
+
742
+ # Give up on this step if too many consecutive failures
743
+ if consecutive_failures >= MAX_CONSECUTIVE_FAILURES:
744
+ self._debug_log(
745
+ f"GIVING UP: Too many consecutive failures ({consecutive_failures})",
746
+ level="ERROR",
747
+ always=True,
748
+ )
749
+ self.state.status = AgentStatus.FAILED
750
+ self._emit_event("execution_failed", {
751
+ "reason": "Too many consecutive failures after self-correction",
752
+ })
753
+ return
754
+
755
+ # Skip this step and continue to the next
756
+ self._debug_log(
757
+ f"Skipping failed step {step.index}, advancing to next step",
758
+ level="WARN",
759
+ always=True,
760
+ )
761
+ self.state.current_step += 1
762
+
763
+ elif result.status == ExecutionStatus.SKIPPED:
764
+ self._debug_log(f"Step {step.index} SKIPPED", level="INFO", always=True)
765
+ self._emit_event("step_skipped", {"step": step.index})
766
+ self.state.current_step += 1
767
+
768
+ self._debug_log(
769
+ f"Plan execution completed. Final step index: {self.state.current_step}",
770
+ level="INFO",
771
+ always=True,
772
+ )
773
+
774
+ def _run_incremental_verification(self) -> Optional[GateResult]:
775
+ """Run quick verification after file changes."""
776
+ # Only run fast checks (ruff) for incremental verification
777
+ try:
778
+ result = run_gates(
779
+ self.workspace,
780
+ gates=["ruff"],
781
+ verbose=True,
782
+ )
783
+ self.state.gate_results.append(result)
784
+ return result
785
+ except Exception:
786
+ return None
787
+
788
+ def _run_final_verification(self) -> None:
789
+ """Run full verification gates with self-correction loop.
790
+
791
+ This method implements a retry loop that:
792
+ 1. Runs verification gates (pytest, ruff)
793
+ 2. If gates pass, marks task as COMPLETED
794
+ 3. If gates fail, attempts self-correction:
795
+ a. Try ruff --fix for lint issues
796
+ b. Use LLM to generate fix plan for remaining errors
797
+ c. Execute fix steps
798
+ d. Re-run verification
799
+ 4. Repeats until max_attempts or gives up
800
+ """
801
+ self.state.status = AgentStatus.VERIFYING
802
+ self._emit_event("verification_started", {})
803
+
804
+ print(f"\n[VERIFY] Starting final verification (max {self.state.max_attempts} attempts)")
805
+ self._debug_log(
806
+ f"Starting final verification (max {self.state.max_attempts} attempts)",
807
+ level="INFO",
808
+ always=True,
809
+ )
810
+
811
+ while self.state.attempt_count < self.state.max_attempts:
812
+ attempt_num = self.state.attempt_count + 1
813
+ self._verbose_print(f"[VERIFY] Attempt {attempt_num}/{self.state.max_attempts}")
814
+ self._debug_log(
815
+ f"Verification attempt {attempt_num}/{self.state.max_attempts}",
816
+ level="INFO",
817
+ )
818
+
819
+ try:
820
+ result = run_gates(self.workspace, verbose=False)
821
+ self.state.gate_results.append(result)
822
+
823
+ if result.passed:
824
+ self.state.status = AgentStatus.COMPLETED
825
+ self._emit_event("verification_passed", {"attempt": attempt_num})
826
+ self._verbose_print(f"[VERIFY] PASSED on attempt {attempt_num}")
827
+ self._debug_log(
828
+ f"Verification PASSED on attempt {attempt_num}",
829
+ level="INFO",
830
+ always=True,
831
+ )
832
+ return # Success!
833
+
834
+ # Verification failed - log details
835
+ failed_checks = [
836
+ c.name for c in result.checks
837
+ if c.status == GateStatus.FAILED
838
+ ]
839
+ self._verbose_print(f"[VERIFY] FAILED: {', '.join(failed_checks)}")
840
+ self._debug_log(
841
+ f"Verification failed: {', '.join(failed_checks)}",
842
+ level="WARN",
843
+ always=True,
844
+ )
845
+
846
+ # Increment attempt count
847
+ self.state.attempt_count += 1
848
+
849
+ # Check if we have retries left
850
+ if self.state.attempt_count >= self.state.max_attempts:
851
+ self._debug_log(
852
+ f"Max attempts ({self.state.max_attempts}) exceeded",
853
+ level="ERROR",
854
+ always=True,
855
+ )
856
+ break # Exit loop, fall through to FAILED
857
+
858
+ # Attempt self-correction
859
+ self._verbose_print("[VERIFY] Attempting self-correction...")
860
+ self._emit_event("self_correction_started", {
861
+ "attempt": attempt_num,
862
+ "failed_checks": failed_checks,
863
+ })
864
+
865
+ fixed = self._attempt_verification_fix(result)
866
+ if not fixed:
867
+ self._verbose_print("[VERIFY] Self-correction FAILED, giving up")
868
+ self._debug_log(
869
+ "Self-correction failed, giving up",
870
+ level="ERROR",
871
+ always=True,
872
+ )
873
+ break # Can't fix, fall through to FAILED
874
+
875
+ self._verbose_print("[VERIFY] Self-correction applied, re-running verification...")
876
+ self._debug_log(
877
+ "Self-correction applied, re-running verification",
878
+ level="INFO",
879
+ always=True,
880
+ )
881
+ # Loop back to re-run gates
882
+
883
+ except Exception as e:
884
+ self._verbose_print(f"[VERIFY] Exception: {e}")
885
+ self._emit_event("verification_error", {"error": str(e)})
886
+ self._debug_log(
887
+ f"Verification error: {e}",
888
+ level="ERROR",
889
+ always=True,
890
+ )
891
+ break # Exit on exception
892
+
893
+ # Max attempts exceeded or couldn't fix
894
+ self._verbose_print(f"[VERIFY] Final result: FAILED after {self.state.attempt_count} attempts")
895
+ self.state.status = AgentStatus.FAILED
896
+ self._emit_event("verification_failed", {
897
+ "reason": "Max verification attempts exceeded or self-correction failed",
898
+ "attempts": self.state.attempt_count,
899
+ })
900
+
901
+ def _try_auto_fix(self, gate_result: GateResult) -> bool:
902
+ """Try to automatically fix lint issues.
903
+
904
+ Returns:
905
+ True if auto-fix was successful (returncode == 0)
906
+ """
907
+ if not self.executor or self.dry_run:
908
+ return False
909
+
910
+ import subprocess
911
+
912
+ try:
913
+ result = subprocess.run(
914
+ ["ruff", "check", "--fix", "."],
915
+ cwd=self.workspace.repo_path,
916
+ capture_output=True,
917
+ text=True,
918
+ timeout=30,
919
+ )
920
+
921
+ if result.returncode == 0:
922
+ self._debug_log("ruff --fix succeeded", level="INFO")
923
+ return True
924
+ else:
925
+ # Ruff fix failed - log the error output
926
+ stderr_preview = result.stderr[:500] if result.stderr else ""
927
+ stdout_preview = result.stdout[:500] if result.stdout else ""
928
+ self._debug_log(
929
+ f"ruff --fix failed (exit {result.returncode}): {stderr_preview or stdout_preview}",
930
+ level="WARN",
931
+ )
932
+ return False
933
+
934
+ except subprocess.TimeoutExpired:
935
+ self._debug_log("ruff --fix timed out after 30s", level="WARN")
936
+ return False
937
+ except subprocess.CalledProcessError as e:
938
+ self._debug_log(f"ruff --fix raised CalledProcessError: {e}", level="WARN")
939
+ return False
940
+ except FileNotFoundError:
941
+ self._debug_log("ruff command not found", level="WARN")
942
+ return False
943
+ except Exception as e:
944
+ self._debug_log(f"ruff --fix error: {e}", level="WARN")
945
+ return False
946
+
947
+ def _build_self_correction_context(self) -> str:
948
+ """Build rich context for intelligent self-correction.
949
+
950
+ Provides the LLM with project structure, config files, and file tree
951
+ so it can reason about local vs external packages, project layout, etc.
952
+
953
+ Returns:
954
+ Formatted context string for the self-correction prompt
955
+ """
956
+ sections = []
957
+
958
+ # Project structure overview
959
+ sections.append("## Project Structure")
960
+ if self.context and self.context.file_tree:
961
+ # Group files by directory
962
+ dirs: dict[str, list[str]] = {}
963
+ for f in self.context.file_tree[:50]: # Limit to 50 files
964
+ from pathlib import Path as P
965
+ dir_path = str(P(f.path).parent)
966
+ if dir_path not in dirs:
967
+ dirs[dir_path] = []
968
+ dirs[dir_path].append(P(f.path).name)
969
+
970
+ for dir_path in sorted(dirs.keys())[:15]:
971
+ sections.append(f" {dir_path}/")
972
+ for filename in dirs[dir_path][:8]:
973
+ sections.append(f" {filename}")
974
+ if len(dirs[dir_path]) > 8:
975
+ sections.append(f" ... ({len(dirs[dir_path]) - 8} more)")
976
+ sections.append("")
977
+
978
+ # Key config files content
979
+ config_files = ["pyproject.toml", "package.json", "Cargo.toml", "go.mod", "setup.py"]
980
+ for config_name in config_files:
981
+ config_path = self.workspace.repo_path / config_name
982
+ if config_path.exists():
983
+ try:
984
+ content = config_path.read_text()[:2000] # Limit size
985
+ sections.append(f"## {config_name}")
986
+ sections.append("```")
987
+ sections.append(content)
988
+ sections.append("```")
989
+ sections.append("")
990
+ except Exception:
991
+ pass
992
+
993
+ # Tech stack info if available
994
+ if self.context and self.context.tech_stack:
995
+ sections.append("## Tech Stack")
996
+ sections.append(self.context.tech_stack)
997
+ sections.append("")
998
+
999
+ # Files this agent created/modified in this run
1000
+ if self.state.step_results:
1001
+ modified_files = set()
1002
+ for result in self.state.step_results:
1003
+ for change in result.file_changes:
1004
+ modified_files.add(str(change.path))
1005
+ if modified_files:
1006
+ sections.append("## Files Modified by This Task")
1007
+ for f in sorted(modified_files)[:20]:
1008
+ sections.append(f" - {f}")
1009
+ sections.append("")
1010
+
1011
+ return "\n".join(sections)
1012
+
1013
+ def _classify_fix_scope(self, fix: dict) -> FixScope:
1014
+ """Classify whether a fix is local or global.
1015
+
1016
+ Args:
1017
+ fix: Fix dictionary with 'file', 'action', 'command' keys
1018
+
1019
+ Returns:
1020
+ FixScope.LOCAL or FixScope.GLOBAL
1021
+ """
1022
+ action = fix.get("action", "")
1023
+ file_path = fix.get("file", "")
1024
+ command = fix.get("command", "")
1025
+
1026
+ # Shell commands that modify project state are global
1027
+ if action == "shell":
1028
+ global_commands = ["pip install", "npm install", "uv add", "cargo add",
1029
+ "go get", "yarn add", "pnpm add", "poetry add"]
1030
+ for gc in global_commands:
1031
+ if gc in command:
1032
+ return FixScope.GLOBAL
1033
+
1034
+ # Creating new directories at project root is global
1035
+ if action == "create_directory":
1036
+ # Root-level or src/ directories are global
1037
+ if "/" not in file_path or file_path.startswith("src/"):
1038
+ return FixScope.GLOBAL
1039
+
1040
+ # Modifying config files is always global
1041
+ from pathlib import Path as P
1042
+ filename = P(file_path).name if file_path else ""
1043
+ if filename in GLOBAL_SCOPE_FILES:
1044
+ return FixScope.GLOBAL
1045
+
1046
+ # Check if file was created by this agent in this run
1047
+ if self.state.step_results:
1048
+ files_this_run = set()
1049
+ for result in self.state.step_results:
1050
+ for change in result.file_changes:
1051
+ files_this_run.add(str(change.path))
1052
+ if file_path in files_this_run:
1053
+ return FixScope.LOCAL
1054
+
1055
+ # Default to global for safety
1056
+ return FixScope.GLOBAL
1057
+
1058
+ def _attempt_verification_fix(self, gate_result: GateResult) -> bool:
1059
+ """Attempt to self-correct verification failures.
1060
+
1061
+ Strategy:
1062
+ 1. Try ruff --fix for quick lint fixes
1063
+ 2. Try pattern-based quick fixes (no LLM needed)
1064
+ 3. Collect error messages from failed checks
1065
+ 4. Check if we should escalate to blocker
1066
+ 5. Use LLM to generate a fix plan
1067
+ 6. Execute the fix plan steps
1068
+ 7. Return True if fixes were applied (caller will re-verify)
1069
+
1070
+ Args:
1071
+ gate_result: Result of failed verification gates
1072
+
1073
+ Returns:
1074
+ True if fixes were applied, False if unable to fix
1075
+ """
1076
+ self._verbose_print("[SELFCORRECT] Starting verification fix attempt")
1077
+ self._debug_log("Attempting self-correction", level="INFO", always=True)
1078
+
1079
+ # Step 1: Try ruff --fix for quick lint fixes
1080
+ self._verbose_print("[SELFCORRECT] Running ruff --fix...")
1081
+ self._try_auto_fix(gate_result)
1082
+
1083
+ # Step 2: Collect error messages from failed checks
1084
+ errors = []
1085
+ for check in gate_result.checks:
1086
+ if check.status == GateStatus.FAILED and check.output:
1087
+ errors.append(f"{check.name}: {check.output[:1000]}")
1088
+
1089
+ if not errors:
1090
+ self._verbose_print("[SELFCORRECT] No error messages to fix")
1091
+ self._debug_log("No error messages to fix", level="WARN")
1092
+ return False
1093
+
1094
+ self._verbose_print(f"[SELFCORRECT] Collected {len(errors)} error(s) to fix")
1095
+ error_summary = "\n\n".join(errors)
1096
+ self._debug_log(f"Errors to fix:\n{error_summary[:500]}...", level="INFO")
1097
+
1098
+ # Step 3: Try pattern-based quick fixes first (no LLM needed)
1099
+ quick_fix_applied = False
1100
+ for error in errors:
1101
+ quick_fix = find_quick_fix(
1102
+ error,
1103
+ repo_path=self.workspace.repo_path,
1104
+ )
1105
+ if quick_fix:
1106
+ # Check if we already tried this fix
1107
+ if self.fix_tracker.was_attempted(error, quick_fix.description):
1108
+ self._verbose_print(f"[SELFCORRECT] Skipping already-tried fix: {quick_fix.description}")
1109
+ self._debug_log(f"Skipping duplicate fix: {quick_fix.description}", level="INFO")
1110
+ continue
1111
+
1112
+ # Record the attempt
1113
+ self.fix_tracker.record_attempt(error, quick_fix.description)
1114
+
1115
+ self._verbose_print(f"[SELFCORRECT] Trying quick fix: {quick_fix.description}")
1116
+ success, msg = apply_quick_fix(quick_fix, self.workspace.repo_path, self.dry_run)
1117
+
1118
+ if success:
1119
+ self.fix_tracker.record_outcome(error, quick_fix.description, FixOutcome.SUCCESS)
1120
+ self._verbose_print(f"[SELFCORRECT] Quick fix applied: {msg}")
1121
+ self._debug_log(f"Quick fix applied: {msg}", level="INFO", always=True)
1122
+ quick_fix_applied = True
1123
+ else:
1124
+ self.fix_tracker.record_outcome(error, quick_fix.description, FixOutcome.FAILED)
1125
+ self._verbose_print(f"[SELFCORRECT] Quick fix failed: {msg}")
1126
+ self._debug_log(f"Quick fix failed: {msg}", level="WARN")
1127
+
1128
+ if quick_fix_applied:
1129
+ return True # Let caller re-verify
1130
+
1131
+ # Step 4: Check if we should escalate to blocker
1132
+ escalation = self.fix_tracker.should_escalate(error_summary)
1133
+ if escalation.should_escalate:
1134
+ self._verbose_print(f"[SELFCORRECT] Escalating to blocker: {escalation.reason}")
1135
+ self._debug_log(f"Escalating to blocker: {escalation.reason}", level="WARN", always=True)
1136
+ self._create_escalation_blocker(error_summary, escalation)
1137
+ return False # Stop trying, blocker created
1138
+
1139
+ # Step 5: Use LLM to generate a fix plan with full context
1140
+ # Build rich context so LLM can reason about project structure
1141
+ project_context = self._build_self_correction_context()
1142
+
1143
+ # Include info about already-tried fixes to avoid repetition
1144
+ attempted_fixes = self.fix_tracker.get_attempted_fixes(error_summary)
1145
+ already_tried = ""
1146
+ if attempted_fixes:
1147
+ already_tried = "\n\nALREADY TRIED (DO NOT REPEAT):\n" + "\n".join(f"- {f}" for f in attempted_fixes)
1148
+
1149
+ fix_prompt = f"""You are an intelligent agent fixing verification errors. You have access to the full project context below.
1150
+
1151
+ {project_context}
1152
+
1153
+ ## Errors to Fix
1154
+
1155
+ {error_summary}
1156
+
1157
+ ## Instructions
1158
+
1159
+ Analyze the errors and the project structure. Determine the root cause and propose fixes.
1160
+
1161
+ You can use ANY of these actions:
1162
+ - "edit": Modify existing file (requires old_code, new_code)
1163
+ - "create": Create new file (requires content)
1164
+ - "shell": Run a shell command (requires command)
1165
+
1166
+ Return a JSON object:
1167
+ {{
1168
+ "analysis": "What's the root cause? Is this a local code issue or a project configuration issue?",
1169
+ "fixes": [
1170
+ {{
1171
+ "action": "edit|create|shell",
1172
+ "scope": "local|global",
1173
+ "description": "What this fix does",
1174
+ "file": "path/to/file.py",
1175
+ "old_code": "for edits only",
1176
+ "new_code": "for edits only",
1177
+ "content": "for creates only",
1178
+ "command": "for shell only"
1179
+ }}
1180
+ ]
1181
+ }}
1182
+
1183
+ ## Scope Classification (IMPORTANT for parallel execution)
1184
+ - "local": Fixes to files YOU created in this task, your own tests, formatting fixes
1185
+ - "global": Config files (pyproject.toml, package.json), install commands, new packages, shared code
1186
+
1187
+ ## Common Patterns
1188
+ - ModuleNotFoundError for LOCAL package (src/foo exists): Use "uv pip install -e ." or fix pyproject.toml
1189
+ - ModuleNotFoundError for EXTERNAL package: Use "uv pip install <package>"
1190
+ - Import errors in your code: Edit the file to fix imports
1191
+ - Syntax errors: Edit the file to fix syntax
1192
+
1193
+ IMPORTANT:
1194
+ - Check if the module exists locally before trying to install it
1195
+ - Be precise with old_code - it must match exactly
1196
+ - Return valid JSON only{already_tried}"""
1197
+
1198
+ try:
1199
+ self._verbose_print("[SELFCORRECT] Asking LLM for fixes...")
1200
+ response = self.llm.complete(
1201
+ messages=[{"role": "user", "content": fix_prompt}],
1202
+ purpose=Purpose.EXECUTION,
1203
+ system="You are a code fixer. Return only valid JSON.",
1204
+ max_tokens=4096,
1205
+ temperature=0.0,
1206
+ )
1207
+
1208
+ # Parse the fix plan
1209
+ import json
1210
+ json_match = re.search(r"\{[\s\S]*\}", response.content)
1211
+ if not json_match:
1212
+ self._verbose_print("[SELFCORRECT] No JSON found in LLM response")
1213
+ self._debug_log("No JSON found in fix response", level="ERROR")
1214
+ return False
1215
+
1216
+ fix_plan = json.loads(json_match.group())
1217
+ fixes = fix_plan.get("fixes", [])
1218
+
1219
+ if not fixes:
1220
+ self._verbose_print("[SELFCORRECT] LLM returned empty fixes list")
1221
+ self._debug_log("No fixes generated", level="WARN")
1222
+ return False
1223
+
1224
+ analysis = fix_plan.get('analysis', 'no analysis')
1225
+ self._verbose_print(f"[SELFCORRECT] LLM generated {len(fixes)} fix(es): {analysis[:100]}...")
1226
+ self._debug_log(
1227
+ f"Generated {len(fixes)} fixes: {analysis}",
1228
+ level="INFO",
1229
+ always=True,
1230
+ )
1231
+
1232
+ # Step 6: Execute the fix plan with tracking
1233
+ applied = 0
1234
+ for fix in fixes:
1235
+ file_path = self.workspace.repo_path / fix.get("file", "")
1236
+ action = fix.get("action", "edit")
1237
+ fix_desc = fix.get("description", f"{action} {fix.get('file', 'unknown')}")
1238
+
1239
+ # Track the attempt
1240
+ self.fix_tracker.record_attempt(
1241
+ error_summary, fix_desc, file_path=str(file_path)
1242
+ )
1243
+
1244
+ try:
1245
+ fix_succeeded = False
1246
+
1247
+ if action == "create":
1248
+ # Create new file with path safety check
1249
+ content = fix.get("content", "")
1250
+ if content and not self.dry_run:
1251
+ # Verify path is safely within workspace
1252
+ is_safe, reason = _is_path_safe(file_path, self.workspace.repo_path)
1253
+ if not is_safe:
1254
+ self._debug_log(f"Create blocked: {reason}", level="WARN")
1255
+ else:
1256
+ file_path.parent.mkdir(parents=True, exist_ok=True)
1257
+ file_path.write_text(content)
1258
+ self._debug_log(f"Created {file_path}", level="INFO")
1259
+ applied += 1
1260
+ fix_succeeded = True
1261
+
1262
+ elif action == "edit":
1263
+ # Edit existing file with path safety check
1264
+ old_code = fix.get("old_code", "")
1265
+ new_code = fix.get("new_code", "")
1266
+
1267
+ # Verify path is safely within workspace before any file ops
1268
+ is_safe, reason = _is_path_safe(file_path, self.workspace.repo_path)
1269
+ if not is_safe:
1270
+ self._debug_log(f"Edit blocked: {reason}", level="WARN")
1271
+ elif not file_path.exists():
1272
+ self._debug_log(f"File not found: {file_path}", level="WARN")
1273
+ elif not old_code:
1274
+ self._debug_log(f"No old_code for {file_path}", level="WARN")
1275
+ else:
1276
+ content = file_path.read_text()
1277
+ if old_code not in content:
1278
+ self._debug_log(
1279
+ f"old_code not found in {file_path}",
1280
+ level="WARN",
1281
+ )
1282
+ elif not self.dry_run:
1283
+ new_content = content.replace(old_code, new_code, 1)
1284
+ file_path.write_text(new_content)
1285
+ self._debug_log(f"Fixed {file_path}", level="INFO")
1286
+ applied += 1
1287
+ fix_succeeded = True
1288
+
1289
+ elif action == "delete":
1290
+ # Delete file with safeguards
1291
+ if self.dry_run:
1292
+ self._debug_log(f"[DRY RUN] Would delete {file_path}", level="INFO")
1293
+ elif not file_path.exists():
1294
+ self._debug_log(f"File already deleted: {file_path}", level="INFO")
1295
+ fix_succeeded = True
1296
+ else:
1297
+ # Verify path is safely within workspace
1298
+ is_safe, reason = _is_path_safe(file_path, self.workspace.repo_path)
1299
+ if not is_safe:
1300
+ self._debug_log(f"Delete blocked: {reason}", level="WARN")
1301
+ else:
1302
+ file_path.unlink()
1303
+ self._debug_log(f"Deleted {file_path}", level="INFO")
1304
+ applied += 1
1305
+ fix_succeeded = True
1306
+
1307
+ elif action == "shell":
1308
+ # Run shell command with safe parsing
1309
+ command = fix.get("command", "")
1310
+ if command and not self.dry_run:
1311
+ scope = self._classify_fix_scope(fix)
1312
+ self._verbose_print(f"[SELFCORRECT] Running shell ({scope.value}): {command[:80]}...")
1313
+
1314
+ # Parse command for safe execution
1315
+ argv, requires_shell, parse_warning = _parse_command_safely(command)
1316
+
1317
+ # Reject commands that require shell=True (contain operators/unsafe constructs)
1318
+ if requires_shell:
1319
+ self._debug_log(
1320
+ f"Shell command rejected: {parse_warning} - command: {command[:100]}",
1321
+ level="ERROR",
1322
+ )
1323
+ self._verbose_print(
1324
+ f"[SELFCORRECT] Command rejected (requires shell): {parse_warning}"
1325
+ )
1326
+ # Mark as failed and skip execution
1327
+ self.fix_tracker.record_outcome(
1328
+ error_summary, fix_desc, FixOutcome.FAILED
1329
+ )
1330
+ continue # Skip to next fix
1331
+
1332
+ if parse_warning:
1333
+ self._debug_log(f"Shell safety: {parse_warning}", level="WARN")
1334
+
1335
+ # Helper to run the command safely (only shell=False now)
1336
+ def _run_command() -> subprocess.CompletedProcess:
1337
+ return subprocess.run(
1338
+ argv,
1339
+ shell=False,
1340
+ cwd=self.workspace.repo_path,
1341
+ capture_output=True,
1342
+ text=True,
1343
+ timeout=120,
1344
+ )
1345
+
1346
+ # Global scope commands should go through Coordinator
1347
+ if scope == FixScope.GLOBAL and self.fix_coordinator:
1348
+ status, should_execute = self.fix_coordinator.request_fix(
1349
+ error=error_summary,
1350
+ fix_type="shell",
1351
+ fix_description=fix_desc,
1352
+ command=command,
1353
+ task_id=self.state.task_id,
1354
+ )
1355
+ if status == "already_completed":
1356
+ # Another agent already fixed this
1357
+ self._verbose_print("[SELFCORRECT] Fix already done by another agent")
1358
+ applied += 1
1359
+ fix_succeeded = True
1360
+ elif status == "pending":
1361
+ # Wait for another agent to finish
1362
+ self._verbose_print("[SELFCORRECT] Waiting for another agent's fix...")
1363
+ if self.fix_coordinator.wait_for_fix(error_summary, timeout=60.0):
1364
+ applied += 1
1365
+ fix_succeeded = True
1366
+ else:
1367
+ self._debug_log("Timeout waiting for global fix", level="WARN")
1368
+ elif should_execute:
1369
+ # We are responsible for executing
1370
+ try:
1371
+ result = _run_command()
1372
+ success = result.returncode == 0
1373
+ self.fix_coordinator.report_fix_result(
1374
+ error_summary, success, result.stderr[:200] if not success else None
1375
+ )
1376
+ if success:
1377
+ self._debug_log(f"Global shell command succeeded: {command}", level="INFO")
1378
+ applied += 1
1379
+ fix_succeeded = True
1380
+ else:
1381
+ self._debug_log(f"Global shell command failed: {result.stderr[:200]}", level="WARN")
1382
+ except Exception as shell_err:
1383
+ self.fix_coordinator.report_fix_result(error_summary, False, str(shell_err))
1384
+ self._debug_log(f"Global shell error: {shell_err}", level="WARN")
1385
+ else:
1386
+ # Local scope - execute directly
1387
+ try:
1388
+ result = _run_command()
1389
+ if result.returncode == 0:
1390
+ self._debug_log(f"Shell command succeeded: {command}", level="INFO")
1391
+ applied += 1
1392
+ fix_succeeded = True
1393
+ else:
1394
+ self._debug_log(
1395
+ f"Shell command failed: {result.stderr[:200]}",
1396
+ level="WARN"
1397
+ )
1398
+ except subprocess.TimeoutExpired:
1399
+ self._debug_log(f"Shell command timed out: {command}", level="WARN")
1400
+ except Exception as shell_err:
1401
+ self._debug_log(f"Shell command error: {shell_err}", level="WARN")
1402
+
1403
+ # Record outcome
1404
+ self.fix_tracker.record_outcome(
1405
+ error_summary, fix_desc,
1406
+ FixOutcome.SUCCESS if fix_succeeded else FixOutcome.FAILED
1407
+ )
1408
+
1409
+ except Exception as e:
1410
+ self._debug_log(f"Fix failed for {file_path}: {e}", level="ERROR")
1411
+ self.fix_tracker.record_outcome(error_summary, fix_desc, FixOutcome.FAILED)
1412
+
1413
+ self._verbose_print(f"[SELFCORRECT] Applied {applied}/{len(fixes)} fixes")
1414
+ self._debug_log(
1415
+ f"Applied {applied}/{len(fixes)} fixes",
1416
+ level="INFO",
1417
+ always=True,
1418
+ )
1419
+ return applied > 0
1420
+
1421
+ except json.JSONDecodeError as e:
1422
+ self._verbose_print(f"[SELFCORRECT] JSON parse error: {e}")
1423
+ self._debug_log(f"Failed to parse fix plan JSON: {e}", level="ERROR")
1424
+ return False
1425
+ except Exception as e:
1426
+ self._verbose_print(f"[SELFCORRECT] Error: {e}")
1427
+ self._debug_log(f"Self-correction error: {e}", level="ERROR")
1428
+ return False
1429
+
1430
+ def _classify_error(self, error: str) -> str:
1431
+ """Classify an error as technical, tactical, or human-input-needed.
1432
+
1433
+ Error classification hierarchy:
1434
+ 1. TACTICAL - Agent asking about implementation details it should decide itself
1435
+ 2. HUMAN - True requirements ambiguity or access issues
1436
+ 3. TECHNICAL - Coding errors the agent can self-correct
1437
+
1438
+ Args:
1439
+ error: Error message to classify
1440
+
1441
+ Returns:
1442
+ "tactical" if agent should decide autonomously (no blocker)
1443
+ "technical" if agent can self-correct
1444
+ "human" if genuinely needs human input (create blocker)
1445
+ """
1446
+ error_lower = error.lower()
1447
+
1448
+ # Check tactical patterns FIRST - these should NEVER create blockers
1449
+ # Agent should resolve these using preferences or best judgment
1450
+ for pattern in TACTICAL_DECISION_PATTERNS:
1451
+ if pattern in error_lower:
1452
+ return "tactical"
1453
+
1454
+ # Check true human-input patterns (requirements ambiguity + access issues)
1455
+ for pattern in HUMAN_INPUT_PATTERNS:
1456
+ if pattern in error_lower:
1457
+ return "human"
1458
+
1459
+ # Check technical patterns
1460
+ for pattern in TECHNICAL_ERROR_PATTERNS:
1461
+ if pattern in error_lower:
1462
+ return "technical"
1463
+
1464
+ # Default to technical - agent should try to fix it first
1465
+ return "technical"
1466
+
1467
+ def _resolve_tactical_decision(self, error: str, context: "TaskContext") -> str:
1468
+ """Resolve a tactical decision using preferences and best judgment.
1469
+
1470
+ When the agent encounters a tactical question (implementation detail,
1471
+ tooling choice, file handling, etc.), this method resolves it
1472
+ autonomously instead of creating a blocker.
1473
+
1474
+ Args:
1475
+ error: The error/question that triggered this
1476
+ context: Task context with preferences
1477
+
1478
+ Returns:
1479
+ Resolution instruction for the agent to follow
1480
+ """
1481
+ self._emit_event("tactical_resolution_started", {"question": error[:200]})
1482
+
1483
+ # Build resolution prompt using preferences
1484
+ prefs = context.preferences
1485
+ pref_section = prefs.to_prompt_section() if prefs.has_preferences() else ""
1486
+
1487
+ prompt = f"""You encountered a tactical implementation decision that should be resolved autonomously.
1488
+
1489
+ ## The Question/Decision
1490
+ {error}
1491
+
1492
+ {pref_section}
1493
+
1494
+ ## Resolution Guidelines
1495
+
1496
+ As an expert software engineer, resolve this decision using:
1497
+ 1. Project preferences (above) if they apply
1498
+ 2. Industry best practices if no preference
1499
+ 3. The simpler approach when multiple options are equivalent
1500
+ 4. Common conventions for this type of project
1501
+
1502
+ IMPORTANT: This is a tactical decision you MUST resolve yourself. Do NOT ask the user.
1503
+ Do NOT say you need clarification. Make the best decision and proceed.
1504
+
1505
+ Respond with a brief, clear instruction on what to do. For example:
1506
+ - "Use pytest as the test framework"
1507
+ - "Overwrite the existing file with the new implementation"
1508
+ - "Use the latest stable version of the library"
1509
+ - "Install using uv (the project's package manager)"
1510
+
1511
+ Your decision:"""
1512
+
1513
+ try:
1514
+ response = self.llm.complete(
1515
+ messages=[{"role": "user", "content": prompt}],
1516
+ purpose=Purpose.GENERATION,
1517
+ max_tokens=256,
1518
+ temperature=0.0,
1519
+ )
1520
+
1521
+ resolution = response.strip()
1522
+ self._emit_event(
1523
+ "tactical_resolution_completed",
1524
+ {"question": error[:200], "resolution": resolution[:200]},
1525
+ )
1526
+ self._debug_log(
1527
+ f"TACTICAL DECISION RESOLVED: {resolution[:100]}",
1528
+ level="INFO",
1529
+ data={"question": error, "resolution": resolution},
1530
+ )
1531
+ return resolution
1532
+
1533
+ except Exception as e:
1534
+ # On LLM failure, use a sensible default
1535
+ self._emit_event(
1536
+ "tactical_resolution_failed", {"question": error[:200], "error": str(e)}
1537
+ )
1538
+ return "Proceed with the most common/standard approach for this situation."
1539
+
1540
+ def _should_create_blocker(
1541
+ self,
1542
+ consecutive_failures: int,
1543
+ result: StepResult,
1544
+ self_correction_attempts: int = 0,
1545
+ ) -> bool:
1546
+ """Determine if we should create a blocker.
1547
+
1548
+ Blockers are only created for genuine human-input-needed situations.
1549
+ Technical errors should be handled by self-correction first.
1550
+ Tactical decisions should NEVER create blockers - agent resolves them.
1551
+
1552
+ Args:
1553
+ consecutive_failures: Number of consecutive step failures
1554
+ result: The failed step result
1555
+ self_correction_attempts: How many self-correction attempts were made
1556
+
1557
+ Returns:
1558
+ True if a blocker should be created
1559
+ """
1560
+ error_type = self._classify_error(result.error)
1561
+
1562
+ # TACTICAL decisions NEVER create blockers
1563
+ # The agent should resolve these autonomously using preferences
1564
+ if error_type == "tactical":
1565
+ self._debug_log(
1566
+ "TACTICAL decision detected - will resolve autonomously, NOT creating blocker",
1567
+ level="INFO",
1568
+ data={"error": result.error[:200]},
1569
+ )
1570
+ return False
1571
+
1572
+ # Human-input-needed errors always create blockers
1573
+ if error_type == "human":
1574
+ return True
1575
+
1576
+ # Technical errors only create blockers after exhausting self-correction
1577
+ if error_type == "technical":
1578
+ # Only block if we've tried self-correction and still failing
1579
+ if self_correction_attempts >= MAX_SELF_CORRECTION_ATTEMPTS:
1580
+ # After multiple self-correction attempts, the agent is truly stuck
1581
+ return True
1582
+ # Otherwise, don't block - let the caller try self-correction
1583
+ return False
1584
+
1585
+ return False
1586
+
1587
+ def _attempt_self_correction(
1588
+ self,
1589
+ step: PlanStep,
1590
+ result: StepResult,
1591
+ attempt: int,
1592
+ ) -> Optional[StepResult]:
1593
+ """Attempt to self-correct a failed step using LLM.
1594
+
1595
+ Uses the LLM to analyze the error and generate a corrected approach.
1596
+
1597
+ Args:
1598
+ step: The step that failed
1599
+ result: The failure result
1600
+ attempt: Which self-correction attempt this is (1-based)
1601
+
1602
+ Returns:
1603
+ New StepResult if correction was attempted, None if can't correct
1604
+ """
1605
+ self._emit_event("self_correction_started", {
1606
+ "step": step.index,
1607
+ "attempt": attempt,
1608
+ "error": result.error[:200],
1609
+ })
1610
+
1611
+ self._debug_log(
1612
+ f"SELF-CORRECTION attempt {attempt} for step {step.index}",
1613
+ level="INFO",
1614
+ data={
1615
+ "step_type": step.type.value,
1616
+ "target": step.target,
1617
+ "description": step.description,
1618
+ "error": result.error,
1619
+ },
1620
+ always=True,
1621
+ )
1622
+
1623
+ prompt = f"""A code execution step failed. Analyze the error and provide a corrected approach.
1624
+
1625
+ Step Description: {step.description}
1626
+ Step Type: {step.type.value}
1627
+ Target: {step.target}
1628
+
1629
+ Error:
1630
+ {result.error}
1631
+
1632
+ Previous approach that failed:
1633
+ {step.details[:2000] if step.details else "No details"}
1634
+
1635
+ Please provide a corrected version that fixes this error. Consider:
1636
+ 1. If it's a file path issue, find the correct path or create the file
1637
+ 2. If it's an import issue, add the missing import
1638
+ 3. If it's a syntax error, fix the syntax
1639
+ 4. If it's a logic error, fix the logic
1640
+
1641
+ Respond with ONLY the corrected code/content, no explanation."""
1642
+
1643
+ # Log the full prompt for debugging
1644
+ self._debug_log_llm_interaction(
1645
+ f"Self-correction attempt {attempt} for step {step.index}",
1646
+ prompt,
1647
+ )
1648
+
1649
+ try:
1650
+ # Use CORRECTION purpose to step up to a stronger model (Opus)
1651
+ # for better error analysis and code fixing
1652
+ correction_model = self.llm.get_model(Purpose.CORRECTION)
1653
+ self._debug_log(
1654
+ f"Using stepped-up model for self-correction: {correction_model}",
1655
+ level="INFO",
1656
+ always=True,
1657
+ )
1658
+
1659
+ response = self.llm.complete(
1660
+ messages=[{"role": "user", "content": prompt}],
1661
+ purpose=Purpose.CORRECTION,
1662
+ max_tokens=4000,
1663
+ temperature=0.0,
1664
+ )
1665
+
1666
+ corrected_details = response.content.strip()
1667
+
1668
+ # Log the full response for debugging
1669
+ self._debug_log_llm_interaction(
1670
+ f"Self-correction response {attempt} for step {step.index}",
1671
+ prompt,
1672
+ response=corrected_details,
1673
+ )
1674
+
1675
+ self._debug_log(
1676
+ f"Self-correction LLM response received ({len(corrected_details)} chars)",
1677
+ level="DEBUG",
1678
+ data={"first_100_chars": corrected_details[:100]},
1679
+ always=True,
1680
+ )
1681
+
1682
+ # Create a corrected step with the new details
1683
+ corrected_step = PlanStep(
1684
+ index=step.index,
1685
+ type=step.type,
1686
+ target=step.target,
1687
+ description=f"{step.description} (self-corrected, attempt {attempt})",
1688
+ details=corrected_details,
1689
+ depends_on=step.depends_on,
1690
+ )
1691
+
1692
+ # Re-execute with corrected step
1693
+ self._debug_log(
1694
+ f"Executing corrected step {step.index}",
1695
+ level="DEBUG",
1696
+ always=True,
1697
+ )
1698
+ corrected_result = self.executor.execute_step(corrected_step, self.context)
1699
+
1700
+ self._debug_log(
1701
+ f"Corrected step result: {corrected_result.status.value}",
1702
+ level="INFO",
1703
+ data={
1704
+ "success": corrected_result.status == ExecutionStatus.SUCCESS,
1705
+ "error": corrected_result.error if corrected_result.error else None,
1706
+ "output": corrected_result.output[:200] if corrected_result.output else None,
1707
+ },
1708
+ always=True,
1709
+ )
1710
+
1711
+ self._emit_event("self_correction_completed", {
1712
+ "step": step.index,
1713
+ "attempt": attempt,
1714
+ "success": corrected_result.status == ExecutionStatus.SUCCESS,
1715
+ })
1716
+
1717
+ return corrected_result
1718
+
1719
+ except Exception as e:
1720
+ self._debug_log(
1721
+ f"Self-correction EXCEPTION: {str(e)}",
1722
+ level="ERROR",
1723
+ always=True,
1724
+ )
1725
+ self._emit_event("self_correction_failed", {
1726
+ "step": step.index,
1727
+ "attempt": attempt,
1728
+ "error": str(e),
1729
+ })
1730
+ return None
1731
+
1732
+ def _create_blocker_from_failure(
1733
+ self,
1734
+ step: PlanStep,
1735
+ result: StepResult,
1736
+ ) -> None:
1737
+ """Create a blocker from a step failure.
1738
+
1739
+ May resolve autonomously if the LLM determines the issue is tactical.
1740
+ Only creates actual blockers for issues requiring human input.
1741
+ """
1742
+ question = self._generate_blocker_question(step, result)
1743
+
1744
+ # Check if LLM determined this should be resolved autonomously
1745
+ if question.startswith("RESOLVE_AUTONOMOUSLY:"):
1746
+ self._debug_log(
1747
+ f"Auto-resolving tactical decision: {question}",
1748
+ level="INFO",
1749
+ always=True,
1750
+ )
1751
+ # Don't create a blocker - let the agent continue with self-correction
1752
+ self._emit_event("tactical_resolved", {
1753
+ "step": step.index,
1754
+ "resolution": question,
1755
+ })
1756
+ return
1757
+
1758
+ # Check if LLM determined this is a technical fix
1759
+ if question.startswith("TECHNICAL_FIX:"):
1760
+ self._debug_log(
1761
+ f"Technical issue identified: {question}",
1762
+ level="INFO",
1763
+ always=True,
1764
+ )
1765
+ # Don't create a blocker - mark as needing retry
1766
+ self._emit_event("technical_fix_needed", {
1767
+ "step": step.index,
1768
+ "fix": question,
1769
+ })
1770
+ return
1771
+
1772
+ # Also check for tactical patterns in the question itself
1773
+ question_lower = question.lower()
1774
+ tactical_indicators = [
1775
+ "virtual environment", "venv", "virtualenv",
1776
+ "would you like me to", "would you prefer",
1777
+ "should i create", "should i use",
1778
+ "pip install", "npm install", "uv sync",
1779
+ "break-system-packages", "pipx",
1780
+ "pytest.ini", "pyproject.toml", "asyncio_default_fixture_loop_scope",
1781
+ "fixture scope", "loop scope",
1782
+ ]
1783
+
1784
+ if any(indicator in question_lower for indicator in tactical_indicators):
1785
+ self._debug_log(
1786
+ f"Detected tactical question pattern, auto-resolving: {question[:100]}...",
1787
+ level="INFO",
1788
+ always=True,
1789
+ )
1790
+ self._emit_event("tactical_resolved", {
1791
+ "step": step.index,
1792
+ "resolution": "Auto-resolved tactical decision",
1793
+ })
1794
+ return
1795
+
1796
+ # This is a legitimate blocker that requires human input
1797
+ blocker = blockers.create(
1798
+ workspace=self.workspace,
1799
+ question=question,
1800
+ task_id=self.state.task_id,
1801
+ created_by="agent",
1802
+ )
1803
+
1804
+ self.state.status = AgentStatus.BLOCKED
1805
+ self.state.blocker = BlockerInfo(
1806
+ reason=result.error,
1807
+ question=question,
1808
+ context=f"Step {step.index}: {step.description}",
1809
+ step_index=step.index,
1810
+ )
1811
+
1812
+ self._emit_event("blocker_created", {
1813
+ "blocker_id": blocker.id,
1814
+ "question": question,
1815
+ })
1816
+ # Note: task status update handled by runtime.block_run()
1817
+
1818
+ def _create_verification_blocker(self, gate_result: GateResult) -> None:
1819
+ """Handle verification failure.
1820
+
1821
+ Verification failures (pytest, ruff, etc.) are TECHNICAL issues,
1822
+ not human decision points. We mark the task as FAILED instead of
1823
+ BLOCKED so the retry mechanism can handle it.
1824
+
1825
+ This prevents tactical questions like "pytest failed, what should I do?"
1826
+ from becoming blockers that require human intervention.
1827
+ """
1828
+ failed_checks = [
1829
+ c.name for c in gate_result.checks
1830
+ if c.status == GateStatus.FAILED
1831
+ ]
1832
+
1833
+ self._debug_log(
1834
+ f"Verification failed for: {', '.join(failed_checks)}. "
1835
+ "Marking as FAILED (not BLOCKED) for retry.",
1836
+ level="WARN",
1837
+ always=True,
1838
+ )
1839
+
1840
+ # Mark as FAILED, not BLOCKED - verification failures are technical
1841
+ # issues that should be retried, not human decision points
1842
+ self.state.status = AgentStatus.FAILED
1843
+ self._emit_event("verification_failed", {
1844
+ "failed_checks": failed_checks,
1845
+ "reason": "Verification failed - technical issue for retry",
1846
+ })
1847
+ # Note: task status update handled by runtime.fail_run()
1848
+
1849
+ def _create_escalation_blocker(
1850
+ self,
1851
+ error_summary: str,
1852
+ escalation: EscalationDecision,
1853
+ ) -> None:
1854
+ """Create a blocker when self-correction has been exhausted.
1855
+
1856
+ Unlike regular blockers which ask for guidance, escalation blockers
1857
+ provide detailed context about what was tried and why we're stuck.
1858
+
1859
+ Args:
1860
+ error_summary: Summary of the errors being fixed
1861
+ escalation: EscalationDecision from FixAttemptTracker
1862
+ """
1863
+
1864
+ # Build a detailed, informative question
1865
+ context = self.fix_tracker.get_blocker_context(error_summary)
1866
+
1867
+ # Format attempted fixes
1868
+ fixes_list = ""
1869
+ if escalation.attempted_fixes:
1870
+ fixes_list = "\n".join(f" - {f}" for f in escalation.attempted_fixes[:10])
1871
+
1872
+ question = f"""Task failed after multiple self-correction attempts.
1873
+
1874
+ **Error:** {context.get('error_type', 'Unknown error')}
1875
+
1876
+ **Problem:** {escalation.error_summary[:300]}
1877
+
1878
+ **Attempted fixes ({context.get('attempt_count', 0)} total):**
1879
+ {fixes_list}
1880
+
1881
+ **Reason for escalation:** {escalation.reason}
1882
+
1883
+ **How should I proceed?** Please provide guidance on:
1884
+ 1. What might be causing this persistent error?
1885
+ 2. Is there a different approach I should try?
1886
+ 3. Are there any missing dependencies or configuration?"""
1887
+
1888
+ # Create the blocker
1889
+ blocker = blockers.create(
1890
+ workspace=self.workspace,
1891
+ question=question,
1892
+ task_id=self.state.task_id,
1893
+ created_by="agent",
1894
+ )
1895
+
1896
+ self.state.status = AgentStatus.BLOCKED
1897
+ self.state.blocker = BlockerInfo(
1898
+ reason=escalation.reason,
1899
+ question=question,
1900
+ context=f"Self-correction exhausted after {context.get('attempt_count', 0)} attempts",
1901
+ )
1902
+
1903
+ self._emit_event("escalation_blocker_created", {
1904
+ "blocker_id": blocker.id,
1905
+ "reason": escalation.reason,
1906
+ "attempt_count": context.get("attempt_count", 0),
1907
+ "attempted_fixes": escalation.attempted_fixes,
1908
+ })
1909
+
1910
+ self._debug_log(
1911
+ f"Created escalation blocker: {blocker.id}",
1912
+ level="INFO",
1913
+ data={
1914
+ "reason": escalation.reason,
1915
+ "attempt_count": context.get("attempt_count", 0),
1916
+ },
1917
+ always=True,
1918
+ )
1919
+
1920
+ def _generate_blocker_question(
1921
+ self,
1922
+ step: PlanStep,
1923
+ result: StepResult,
1924
+ ) -> str:
1925
+ """Generate a helpful question for the blocker.
1926
+
1927
+ Only generates questions for issues that truly require human input.
1928
+ Tactical decisions are auto-resolved, not turned into blockers.
1929
+ """
1930
+ # Use LLM to generate a clear question
1931
+ prompt = f"""A code execution step failed. Generate a clear, specific question to ask the user for help.
1932
+
1933
+ Step: {step.description}
1934
+ Target: {step.target}
1935
+ Error: {result.error}
1936
+
1937
+ CRITICAL INSTRUCTIONS:
1938
+ 1. ONLY generate a question if human input is TRULY required
1939
+ 2. Do NOT ask about tactical decisions - these should be resolved autonomously:
1940
+ - Virtual environments (always create one)
1941
+ - Package managers (use uv/pip/npm as appropriate)
1942
+ - Test frameworks (use pytest/jest)
1943
+ - File handling (overwrite existing files)
1944
+ - Configuration options (use sensible defaults)
1945
+ - Asyncio fixture scopes (use function scope)
1946
+
1947
+ 3. DO ask about:
1948
+ - Conflicting requirements in the specification
1949
+ - Missing API keys or credentials
1950
+ - Business logic that requires domain expertise
1951
+ - Security policy clarifications
1952
+
1953
+ 4. If the error is a tactical decision, respond with: "RESOLVE_AUTONOMOUSLY: [your decision]"
1954
+ For example: "RESOLVE_AUTONOMOUSLY: Create virtual environment and install dependencies"
1955
+
1956
+ 5. If the error is a technical issue (syntax error, import error, test failure), respond with:
1957
+ "TECHNICAL_FIX: [what to fix]"
1958
+
1959
+ Generate a single question OR a RESOLVE_AUTONOMOUSLY/TECHNICAL_FIX directive:"""
1960
+
1961
+ try:
1962
+ response = self.llm.complete(
1963
+ messages=[{"role": "user", "content": prompt}],
1964
+ purpose=Purpose.GENERATION,
1965
+ max_tokens=300,
1966
+ temperature=0.0,
1967
+ )
1968
+ return response.content.strip()
1969
+ except Exception:
1970
+ # Fallback to generic question
1971
+ return f"Step '{step.description}' failed with error: {result.error}. How should I proceed?"
1972
+
1973
+ def _handle_existing_blockers(self) -> None:
1974
+ """Handle situation where task already has open blockers."""
1975
+ self.state.status = AgentStatus.BLOCKED
1976
+
1977
+ # Get the first open blocker
1978
+ open_blocker = self.context.open_blockers[0]
1979
+ self.state.blocker = BlockerInfo(
1980
+ reason="Pre-existing blocker",
1981
+ question=open_blocker.question,
1982
+ )
1983
+
1984
+ self._emit_event("existing_blocker", {
1985
+ "blocker_id": open_blocker.id,
1986
+ "question": open_blocker.question,
1987
+ })
1988
+
1989
+ def _emit_event(self, event_type: str, data: dict) -> None:
1990
+ """Emit an agent event."""
1991
+ if self.on_event:
1992
+ self.on_event(event_type, data)
1993
+
1994
+ # Also emit to workspace event log
1995
+ try:
1996
+ events.emit_for_workspace(
1997
+ self.workspace,
1998
+ EventType.WORK_STARTED if event_type == "agent_started" else EventType.RUN_STEP,
1999
+ data={"agent_event": event_type, **data},
2000
+ print_event=False,
2001
+ )
2002
+ except Exception:
2003
+ pass # Don't fail on event emission
2004
+
2005
+ # Publish to SSE EventPublisher for web clients
2006
+ if self.event_publisher and self.state.task_id:
2007
+ try:
2008
+ self._publish_sse_event(event_type, data)
2009
+ except Exception:
2010
+ pass # Don't fail on SSE emission
2011
+
2012
+ def _publish_sse_event(self, event_type: str, data: dict) -> None:
2013
+ """Publish an event to SSE subscribers.
2014
+
2015
+ Maps internal agent events to SSE ExecutionEvent types.
2016
+
2017
+ Args:
2018
+ event_type: Internal event type (step_started, step_completed, etc.)
2019
+ data: Event data
2020
+ """
2021
+ from codeframe.core.models import ProgressEvent, OutputEvent, ErrorEvent, CompletionEvent
2022
+
2023
+ task_id = self.state.task_id
2024
+
2025
+ # Map internal events to SSE events
2026
+ if event_type == "step_started":
2027
+ total_steps = len(self.state.plan.steps) if self.state.plan else 1
2028
+ event = ProgressEvent(
2029
+ task_id=task_id,
2030
+ phase="execution",
2031
+ step=data.get("step", 0),
2032
+ total_steps=total_steps,
2033
+ message=f"Step {data.get('step', 0)}: {data.get('target', 'unknown')}",
2034
+ )
2035
+ self.event_publisher.publish_sync(task_id, event)
2036
+
2037
+ elif event_type == "step_completed":
2038
+ output = data.get("output", "")
2039
+ if output:
2040
+ event = OutputEvent(
2041
+ task_id=task_id,
2042
+ stream="stdout",
2043
+ line=output[:500],
2044
+ )
2045
+ self.event_publisher.publish_sync(task_id, event)
2046
+
2047
+ elif event_type == "step_failed":
2048
+ event = ErrorEvent(
2049
+ task_id=task_id,
2050
+ error_type="step_failed",
2051
+ error=data.get("error", "Step failed"),
2052
+ )
2053
+ self.event_publisher.publish_sync(task_id, event)
2054
+
2055
+ elif event_type == "verification_failed":
2056
+ event = ErrorEvent(
2057
+ task_id=task_id,
2058
+ error_type="verification_failed",
2059
+ error=data.get("error", "Verification failed"),
2060
+ )
2061
+ self.event_publisher.publish_sync(task_id, event)
2062
+
2063
+ elif event_type in ("agent_completed", "agent_finished"):
2064
+ # Handle both "agent_completed" and "agent_finished" (run() emits "agent_finished")
2065
+ status = data.get("status", "completed")
2066
+ # Map AgentStatus values to SSE completion status
2067
+ if status in ("completed", "COMPLETED"):
2068
+ sse_status = "completed"
2069
+ elif status in ("failed", "FAILED"):
2070
+ sse_status = "failed"
2071
+ elif status in ("blocked", "BLOCKED"):
2072
+ sse_status = "blocked"
2073
+ else:
2074
+ sse_status = status
2075
+
2076
+ event = CompletionEvent(
2077
+ task_id=task_id,
2078
+ status=sse_status,
2079
+ duration_seconds=0, # Could track this
2080
+ files_modified=[c.path for c in (self.executor.changes if self.executor else [])],
2081
+ )
2082
+ self.event_publisher.publish_sync(task_id, event)
2083
+ self.event_publisher.complete_task_sync(task_id)
2084
+
2085
+ elif event_type == "agent_failed":
2086
+ event = ErrorEvent(
2087
+ task_id=task_id,
2088
+ error_type="agent_failed",
2089
+ error=data.get("error", "Agent execution failed"),
2090
+ )
2091
+ self.event_publisher.publish_sync(task_id, event)
2092
+ self.event_publisher.complete_task_sync(task_id)
2093
+
2094
+ elif event_type == "blocker_created":
2095
+ from codeframe.core.models import BlockerEvent
2096
+ event = BlockerEvent(
2097
+ task_id=task_id,
2098
+ blocker_id=data.get("blocker_id", ""),
2099
+ question=data.get("question", ""),
2100
+ context=data.get("context", ""),
2101
+ )
2102
+ self.event_publisher.publish_sync(task_id, event)
2103
+
2104
+ def _setup_debug_log(self) -> None:
2105
+ """Set up the debug log file in workspace directory."""
2106
+ timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
2107
+ self._debug_log_path = self.workspace.repo_path / f".codeframe_debug_{timestamp}.log"
2108
+
2109
+ # Write header
2110
+ with open(self._debug_log_path, "w") as f:
2111
+ f.write("=" * 80 + "\n")
2112
+ f.write("CodeFRAME Agent Debug Log\n")
2113
+ f.write(f"Started: {datetime.now(timezone.utc).isoformat()}\n")
2114
+ f.write(f"Workspace: {self.workspace.id}\n")
2115
+ f.write(f"Repo Path: {self.workspace.repo_path}\n")
2116
+ f.write("=" * 80 + "\n\n")
2117
+
2118
+ def _debug_log(
2119
+ self,
2120
+ message: str,
2121
+ level: str = "INFO",
2122
+ data: Optional[dict] = None,
2123
+ always: bool = False,
2124
+ ) -> None:
2125
+ """Write to the debug log file.
2126
+
2127
+ Args:
2128
+ message: Log message
2129
+ level: Log level (INFO, WARN, ERROR, DEBUG)
2130
+ data: Optional structured data to include
2131
+ always: If True, log even if failure count is low
2132
+ """
2133
+ if not self._debug_log_path:
2134
+ return
2135
+
2136
+ # Only log detailed info after first failure, unless always=True
2137
+ if not always and self._failure_count == 0 and level == "DEBUG":
2138
+ return
2139
+
2140
+ timestamp = datetime.now(timezone.utc).strftime("%H:%M:%S.%f")[:-3]
2141
+ line = f"[{timestamp}] [{level}] {message}\n"
2142
+
2143
+ with open(self._debug_log_path, "a") as f:
2144
+ f.write(line)
2145
+ if data:
2146
+ for key, value in data.items():
2147
+ # Truncate long values for readability
2148
+ val_str = str(value)
2149
+ if len(val_str) > 500:
2150
+ val_str = val_str[:500] + "... [TRUNCATED]"
2151
+ f.write(f" {key}: {val_str}\n")
2152
+ f.write("\n")
2153
+
2154
+ def _debug_log_llm_interaction(
2155
+ self,
2156
+ label: str,
2157
+ prompt: str,
2158
+ response: Optional[str] = None,
2159
+ error: Optional[str] = None,
2160
+ ) -> None:
2161
+ """Log a full LLM interaction (prompt + response) for debugging."""
2162
+ if not self._debug_log_path:
2163
+ return
2164
+
2165
+ timestamp = datetime.now(timezone.utc).strftime("%H:%M:%S.%f")[:-3]
2166
+
2167
+ with open(self._debug_log_path, "a") as f:
2168
+ f.write(f"\n{'='*60}\n")
2169
+ f.write(f"[{timestamp}] LLM INTERACTION: {label}\n")
2170
+ f.write(f"{'='*60}\n\n")
2171
+
2172
+ f.write(f"--- PROMPT ({len(prompt)} chars) ---\n")
2173
+ f.write(prompt)
2174
+ f.write("\n\n")
2175
+
2176
+ if response:
2177
+ f.write(f"--- RESPONSE ({len(response)} chars) ---\n")
2178
+ f.write(response)
2179
+ f.write("\n\n")
2180
+ elif error:
2181
+ f.write(f"--- ERROR ---\n{error}\n\n")
2182
+
2183
+ f.write(f"{'='*60}\n\n")