codeframe-ai 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeframe/__init__.py +11 -0
- codeframe/__main__.py +20 -0
- codeframe/adapters/__init__.py +5 -0
- codeframe/adapters/e2b/__init__.py +13 -0
- codeframe/adapters/e2b/adapter.py +342 -0
- codeframe/adapters/e2b/budget.py +71 -0
- codeframe/adapters/e2b/credential_scanner.py +134 -0
- codeframe/adapters/llm/__init__.py +92 -0
- codeframe/adapters/llm/anthropic.py +414 -0
- codeframe/adapters/llm/base.py +444 -0
- codeframe/adapters/llm/mock.py +281 -0
- codeframe/adapters/llm/openai.py +483 -0
- codeframe/agents/__init__.py +8 -0
- codeframe/agents/dependency_resolver.py +714 -0
- codeframe/auth/__init__.py +16 -0
- codeframe/auth/api_key_router.py +238 -0
- codeframe/auth/api_keys.py +156 -0
- codeframe/auth/dependencies.py +358 -0
- codeframe/auth/manager.py +178 -0
- codeframe/auth/models.py +30 -0
- codeframe/auth/router.py +93 -0
- codeframe/auth/schemas.py +15 -0
- codeframe/auth/scopes.py +53 -0
- codeframe/cli/__init__.py +12 -0
- codeframe/cli/__main__.py +20 -0
- codeframe/cli/api_client.py +275 -0
- codeframe/cli/app.py +5688 -0
- codeframe/cli/auth.py +122 -0
- codeframe/cli/auth_commands.py +958 -0
- codeframe/cli/commands/__init__.py +5 -0
- codeframe/cli/config_commands.py +79 -0
- codeframe/cli/dashboard_commands.py +67 -0
- codeframe/cli/engines_commands.py +205 -0
- codeframe/cli/env_commands.py +409 -0
- codeframe/cli/helpers.py +56 -0
- codeframe/cli/hooks_commands.py +208 -0
- codeframe/cli/import_commands.py +129 -0
- codeframe/cli/pr_commands.py +549 -0
- codeframe/cli/proof_commands.py +415 -0
- codeframe/cli/stats_commands.py +311 -0
- codeframe/cli/telemetry_runtime.py +153 -0
- codeframe/cli/validators.py +123 -0
- codeframe/config/rate_limits.py +165 -0
- codeframe/core/__init__.py +15 -0
- codeframe/core/adapters/__init__.py +43 -0
- codeframe/core/adapters/agent_adapter.py +114 -0
- codeframe/core/adapters/builtin.py +326 -0
- codeframe/core/adapters/claude_code.py +62 -0
- codeframe/core/adapters/codex.py +393 -0
- codeframe/core/adapters/git_utils.py +40 -0
- codeframe/core/adapters/kilocode.py +126 -0
- codeframe/core/adapters/opencode.py +48 -0
- codeframe/core/adapters/streaming_chat.py +483 -0
- codeframe/core/adapters/subprocess_adapter.py +213 -0
- codeframe/core/adapters/verification_wrapper.py +269 -0
- codeframe/core/agent.py +2183 -0
- codeframe/core/agents_config.py +569 -0
- codeframe/core/api_key_service.py +211 -0
- codeframe/core/artifacts.py +428 -0
- codeframe/core/blocker_detection.py +218 -0
- codeframe/core/blockers.py +433 -0
- codeframe/core/checkpoints.py +481 -0
- codeframe/core/conductor.py +2255 -0
- codeframe/core/config.py +827 -0
- codeframe/core/config_watcher.py +268 -0
- codeframe/core/context.py +542 -0
- codeframe/core/context_packager.py +234 -0
- codeframe/core/credentials.py +735 -0
- codeframe/core/dependency_analyzer.py +229 -0
- codeframe/core/dependency_graph.py +290 -0
- codeframe/core/diagnostic_agent.py +712 -0
- codeframe/core/diagnostics.py +616 -0
- codeframe/core/editor.py +556 -0
- codeframe/core/engine_registry.py +256 -0
- codeframe/core/engine_stats.py +231 -0
- codeframe/core/environment.py +697 -0
- codeframe/core/events.py +375 -0
- codeframe/core/executor.py +1005 -0
- codeframe/core/fix_tracker.py +480 -0
- codeframe/core/gates.py +1322 -0
- codeframe/core/git.py +477 -0
- codeframe/core/github_connect_service.py +178 -0
- codeframe/core/github_integration_config.py +118 -0
- codeframe/core/github_issues_service.py +449 -0
- codeframe/core/hooks.py +184 -0
- codeframe/core/importers/__init__.py +1 -0
- codeframe/core/importers/ralph.py +540 -0
- codeframe/core/installer.py +650 -0
- codeframe/core/models.py +1026 -0
- codeframe/core/notifications_config.py +183 -0
- codeframe/core/planner.py +437 -0
- codeframe/core/prd.py +670 -0
- codeframe/core/prd_discovery.py +1118 -0
- codeframe/core/prd_stress_test.py +499 -0
- codeframe/core/progress.py +126 -0
- codeframe/core/proof/__init__.py +34 -0
- codeframe/core/proof/capture.py +79 -0
- codeframe/core/proof/evidence.py +56 -0
- codeframe/core/proof/ledger.py +574 -0
- codeframe/core/proof/models.py +162 -0
- codeframe/core/proof/obligations.py +103 -0
- codeframe/core/proof/runner.py +233 -0
- codeframe/core/proof/scope.py +81 -0
- codeframe/core/proof/stubs.py +156 -0
- codeframe/core/quick_fixes.py +558 -0
- codeframe/core/react_agent.py +1650 -0
- codeframe/core/reconciliation.py +183 -0
- codeframe/core/replay.py +788 -0
- codeframe/core/review.py +285 -0
- codeframe/core/runtime.py +1134 -0
- codeframe/core/sandbox/__init__.py +27 -0
- codeframe/core/sandbox/context.py +98 -0
- codeframe/core/sandbox/worktree.py +20 -0
- codeframe/core/schedule.py +396 -0
- codeframe/core/stall_detector.py +71 -0
- codeframe/core/stall_monitor.py +134 -0
- codeframe/core/state_machine.py +121 -0
- codeframe/core/streaming.py +502 -0
- codeframe/core/task_tree.py +400 -0
- codeframe/core/tasks.py +1022 -0
- codeframe/core/telemetry.py +232 -0
- codeframe/core/templates.py +221 -0
- codeframe/core/tools.py +942 -0
- codeframe/core/workspace.py +887 -0
- codeframe/core/worktrees.py +276 -0
- codeframe/git/__init__.py +5 -0
- codeframe/git/github_integration.py +505 -0
- codeframe/lib/__init__.py +0 -0
- codeframe/lib/audit_logger.py +248 -0
- codeframe/lib/metrics_tracker.py +800 -0
- codeframe/lib/quality/__init__.py +7 -0
- codeframe/lib/quality/complexity_analyzer.py +316 -0
- codeframe/lib/quality/owasp_patterns.py +284 -0
- codeframe/lib/quality/security_scanner.py +250 -0
- codeframe/lib/rate_limiter.py +312 -0
- codeframe/notifications/__init__.py +0 -0
- codeframe/notifications/webhook.py +380 -0
- codeframe/planning/__init__.py +30 -0
- codeframe/planning/issue_generator.py +219 -0
- codeframe/planning/prd_template_functions.py +137 -0
- codeframe/planning/prd_templates.py +975 -0
- codeframe/planning/task_scheduler.py +511 -0
- codeframe/planning/task_templates.py +533 -0
- codeframe/platform_store/__init__.py +5 -0
- codeframe/platform_store/database.py +277 -0
- codeframe/platform_store/repositories/__init__.py +24 -0
- codeframe/platform_store/repositories/api_key_repository.py +245 -0
- codeframe/platform_store/repositories/audit_repository.py +67 -0
- codeframe/platform_store/repositories/base.py +295 -0
- codeframe/platform_store/repositories/interactive_sessions.py +165 -0
- codeframe/platform_store/repositories/token_repository.py +598 -0
- codeframe/platform_store/repositories/workspace_registry_repository.py +175 -0
- codeframe/platform_store/schema_manager.py +321 -0
- codeframe/templates/AGENTS.md.default +94 -0
- codeframe/tui/__init__.py +5 -0
- codeframe/tui/app.py +256 -0
- codeframe/tui/data_service.py +103 -0
- codeframe/ui/__init__.py +0 -0
- codeframe/ui/dependencies.py +103 -0
- codeframe/ui/models.py +999 -0
- codeframe/ui/response_models.py +201 -0
- codeframe/ui/routers/__init__.py +5 -0
- codeframe/ui/routers/_helpers.py +29 -0
- codeframe/ui/routers/batches_v2.py +315 -0
- codeframe/ui/routers/blockers_v2.py +320 -0
- codeframe/ui/routers/checkpoints_v2.py +310 -0
- codeframe/ui/routers/costs_v2.py +322 -0
- codeframe/ui/routers/diagnose_v2.py +225 -0
- codeframe/ui/routers/discovery_v2.py +417 -0
- codeframe/ui/routers/environment_v2.py +284 -0
- codeframe/ui/routers/events_v2.py +75 -0
- codeframe/ui/routers/gates_v2.py +166 -0
- codeframe/ui/routers/git_v2.py +284 -0
- codeframe/ui/routers/github_integrations_v2.py +532 -0
- codeframe/ui/routers/interactive_sessions_v2.py +238 -0
- codeframe/ui/routers/pr_v2.py +709 -0
- codeframe/ui/routers/prd_v2.py +695 -0
- codeframe/ui/routers/proof_v2.py +755 -0
- codeframe/ui/routers/review_v2.py +360 -0
- codeframe/ui/routers/schedule_v2.py +214 -0
- codeframe/ui/routers/session_chat_ws.py +354 -0
- codeframe/ui/routers/settings_v2.py +562 -0
- codeframe/ui/routers/streaming_v2.py +155 -0
- codeframe/ui/routers/tasks_v2.py +1098 -0
- codeframe/ui/routers/templates_v2.py +232 -0
- codeframe/ui/routers/terminal_ws.py +267 -0
- codeframe/ui/routers/workspace_v2.py +527 -0
- codeframe/ui/server.py +568 -0
- codeframe/ui/shared.py +241 -0
- codeframe/workspace/__init__.py +5 -0
- codeframe/workspace/manager.py +249 -0
- codeframe_ai-0.9.0.dist-info/METADATA +517 -0
- codeframe_ai-0.9.0.dist-info/RECORD +197 -0
- codeframe_ai-0.9.0.dist-info/WHEEL +5 -0
- codeframe_ai-0.9.0.dist-info/entry_points.txt +3 -0
- codeframe_ai-0.9.0.dist-info/licenses/LICENSE +661 -0
- codeframe_ai-0.9.0.dist-info/top_level.txt +1 -0
codeframe/core/agent.py
ADDED
|
@@ -0,0 +1,2183 @@
|
|
|
1
|
+
"""Agent orchestrator for CodeFRAME v2.
|
|
2
|
+
|
|
3
|
+
Coordinates the full agent execution loop:
|
|
4
|
+
1. Load context for task
|
|
5
|
+
2. Generate implementation plan
|
|
6
|
+
3. Execute plan steps
|
|
7
|
+
4. Detect blockers when stuck
|
|
8
|
+
5. Run verification gates
|
|
9
|
+
6. Emit events throughout
|
|
10
|
+
|
|
11
|
+
This module is headless - no FastAPI or HTTP dependencies.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
import shlex
|
|
16
|
+
import subprocess
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from datetime import datetime, timezone
|
|
19
|
+
from enum import Enum
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import TYPE_CHECKING, Callable, Optional
|
|
22
|
+
|
|
23
|
+
from codeframe.adapters.llm import LLMProvider, Purpose
|
|
24
|
+
from codeframe.core import blockers, events
|
|
25
|
+
from codeframe.core.context import ContextLoader, TaskContext
|
|
26
|
+
from codeframe.core.events import EventType
|
|
27
|
+
from codeframe.core.executor import Executor, ExecutionStatus, StepResult
|
|
28
|
+
from codeframe.core.fix_tracker import EscalationDecision, FixAttemptTracker, FixOutcome
|
|
29
|
+
from codeframe.core.gates import run as run_gates, GateResult, GateStatus
|
|
30
|
+
from codeframe.core.planner import ImplementationPlan, Planner, PlanStep, StepType
|
|
31
|
+
from codeframe.core.quick_fixes import apply_quick_fix, find_quick_fix
|
|
32
|
+
from codeframe.core.workspace import Workspace
|
|
33
|
+
|
|
34
|
+
if TYPE_CHECKING:
|
|
35
|
+
from codeframe.core.conductor import GlobalFixCoordinator
|
|
36
|
+
from codeframe.core.streaming import EventPublisher, RunOutputLogger
|
|
37
|
+
|
|
38
|
+
# Safe shell commands that can be executed without full shell interpretation
|
|
39
|
+
SAFE_SHELL_COMMANDS = frozenset({
|
|
40
|
+
# Python tools
|
|
41
|
+
"python", "python3", "pytest", "ruff", "black", "mypy", "pip", "uv",
|
|
42
|
+
# Node tools
|
|
43
|
+
"npm", "node", "npx", "yarn", "pnpm",
|
|
44
|
+
# System tools
|
|
45
|
+
"ls", "cat", "head", "tail", "grep", "find", "mkdir", "touch", "cp", "mv",
|
|
46
|
+
# Git
|
|
47
|
+
"git",
|
|
48
|
+
# Testing
|
|
49
|
+
"jest", "vitest", "cargo",
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _extract_file_from_command(command: str) -> Optional[str]:
|
|
54
|
+
"""Extract a file path from a verification command.
|
|
55
|
+
|
|
56
|
+
Examples:
|
|
57
|
+
"python task_tracker.py --help" -> "task_tracker.py"
|
|
58
|
+
"pytest tests/test_foo.py" -> "tests/test_foo.py"
|
|
59
|
+
"ruff check main.py" -> "main.py"
|
|
60
|
+
"python -m mymodule" -> None
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
command: The shell command to parse
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
The file path if found, None otherwise
|
|
67
|
+
"""
|
|
68
|
+
if not command:
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
# Common patterns for Python file references
|
|
72
|
+
# Match .py files in the command
|
|
73
|
+
py_match = re.search(r'(\S+\.py)', command)
|
|
74
|
+
if py_match:
|
|
75
|
+
return py_match.group(1)
|
|
76
|
+
|
|
77
|
+
# No file found
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _is_path_safe(file_path: Path, workspace_path: Path) -> tuple[bool, str]:
|
|
82
|
+
"""Check if a file path is safely within the workspace.
|
|
83
|
+
|
|
84
|
+
Prevents path traversal attacks via '..' components.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
file_path: The file path to check
|
|
88
|
+
workspace_path: The workspace root path
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Tuple of (is_safe, reason) where reason explains any rejection
|
|
92
|
+
"""
|
|
93
|
+
try:
|
|
94
|
+
# Resolve both paths to handle symlinks and relative paths
|
|
95
|
+
resolved_file = file_path.resolve()
|
|
96
|
+
resolved_workspace = workspace_path.resolve()
|
|
97
|
+
|
|
98
|
+
# Check if the file is within the workspace
|
|
99
|
+
try:
|
|
100
|
+
resolved_file.relative_to(resolved_workspace)
|
|
101
|
+
return (True, "")
|
|
102
|
+
except ValueError:
|
|
103
|
+
return (False, f"Path escapes workspace: {file_path}")
|
|
104
|
+
except Exception as e:
|
|
105
|
+
return (False, f"Path resolution error: {e}")
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _parse_command_safely(command: str) -> tuple[list[str], bool, str]:
|
|
109
|
+
"""Parse a shell command into an argument list for safe execution.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
command: The shell command string
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
Tuple of (argv_list, requires_shell, warning) where:
|
|
116
|
+
- argv_list: Parsed command arguments
|
|
117
|
+
- requires_shell: True if command needs shell interpretation
|
|
118
|
+
- warning: Non-empty if there are safety concerns
|
|
119
|
+
"""
|
|
120
|
+
# Check for shell operators that require shell=True
|
|
121
|
+
shell_operators = ['|', '&&', '||', '>', '<', '>>', '<<', ';', '$', '`', '$(']
|
|
122
|
+
has_shell_operators = any(op in command for op in shell_operators)
|
|
123
|
+
|
|
124
|
+
if has_shell_operators:
|
|
125
|
+
return ([], True, "Command contains shell operators")
|
|
126
|
+
|
|
127
|
+
try:
|
|
128
|
+
# Parse command into argv list
|
|
129
|
+
argv = shlex.split(command)
|
|
130
|
+
if not argv:
|
|
131
|
+
return ([], True, "Empty command")
|
|
132
|
+
|
|
133
|
+
# Check if the base command is in our safe list
|
|
134
|
+
base_cmd = Path(argv[0]).name # Handle paths like /usr/bin/python
|
|
135
|
+
if base_cmd not in SAFE_SHELL_COMMANDS:
|
|
136
|
+
return (argv, True, f"Command '{base_cmd}' not in safe list")
|
|
137
|
+
|
|
138
|
+
return (argv, False, "")
|
|
139
|
+
except ValueError as e:
|
|
140
|
+
# shlex.split failed (e.g., unclosed quotes)
|
|
141
|
+
return ([], True, f"Command parse error: {e}")
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class AgentStatus(str, Enum):
|
|
145
|
+
"""Current status of the agent."""
|
|
146
|
+
|
|
147
|
+
IDLE = "idle"
|
|
148
|
+
PLANNING = "planning"
|
|
149
|
+
EXECUTING = "executing"
|
|
150
|
+
BLOCKED = "blocked"
|
|
151
|
+
VERIFYING = "verifying"
|
|
152
|
+
COMPLETED = "completed"
|
|
153
|
+
FAILED = "failed"
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class FixScope(str, Enum):
|
|
157
|
+
"""Scope of a proposed fix - determines coordination requirements.
|
|
158
|
+
|
|
159
|
+
LOCAL: Agent can execute autonomously (files it created, its own tests)
|
|
160
|
+
GLOBAL: Requires Conductor coordination (config files, installs, shared code)
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
LOCAL = "local"
|
|
164
|
+
GLOBAL = "global"
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
# Files that require global coordination when modified
|
|
168
|
+
GLOBAL_SCOPE_FILES = {
|
|
169
|
+
"pyproject.toml",
|
|
170
|
+
"package.json",
|
|
171
|
+
"tsconfig.json",
|
|
172
|
+
"Cargo.toml",
|
|
173
|
+
"go.mod",
|
|
174
|
+
"requirements.txt",
|
|
175
|
+
"setup.py",
|
|
176
|
+
"setup.cfg",
|
|
177
|
+
".env",
|
|
178
|
+
".env.example",
|
|
179
|
+
"Dockerfile",
|
|
180
|
+
"docker-compose.yml",
|
|
181
|
+
"Makefile",
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
@dataclass
|
|
186
|
+
class BlockerInfo:
|
|
187
|
+
"""Information about a detected blocker.
|
|
188
|
+
|
|
189
|
+
Attributes:
|
|
190
|
+
reason: Why the agent is blocked
|
|
191
|
+
question: Question to ask the user
|
|
192
|
+
context: Additional context about the blocker
|
|
193
|
+
step_index: Which step caused the blocker (if any)
|
|
194
|
+
"""
|
|
195
|
+
|
|
196
|
+
reason: str
|
|
197
|
+
question: str
|
|
198
|
+
context: str = ""
|
|
199
|
+
step_index: Optional[int] = None
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
@dataclass
|
|
203
|
+
class AgentState:
|
|
204
|
+
"""Current state of the agent execution.
|
|
205
|
+
|
|
206
|
+
Attributes:
|
|
207
|
+
status: Current agent status
|
|
208
|
+
task_id: Task being executed
|
|
209
|
+
plan: Generated implementation plan
|
|
210
|
+
current_step: Current step index (0-based)
|
|
211
|
+
step_results: Results of executed steps
|
|
212
|
+
blocker: Current blocker (if any)
|
|
213
|
+
gate_results: Results of verification gates
|
|
214
|
+
attempt_count: Number of execution attempts
|
|
215
|
+
max_attempts: Maximum attempts before giving up
|
|
216
|
+
"""
|
|
217
|
+
|
|
218
|
+
status: AgentStatus = AgentStatus.IDLE
|
|
219
|
+
task_id: str = ""
|
|
220
|
+
plan: Optional[ImplementationPlan] = None
|
|
221
|
+
current_step: int = 0
|
|
222
|
+
step_results: list[StepResult] = field(default_factory=list)
|
|
223
|
+
blocker: Optional[BlockerInfo] = None
|
|
224
|
+
gate_results: list[GateResult] = field(default_factory=list)
|
|
225
|
+
attempt_count: int = 0
|
|
226
|
+
max_attempts: int = 3
|
|
227
|
+
|
|
228
|
+
def to_dict(self) -> dict:
|
|
229
|
+
"""Convert to dictionary for persistence."""
|
|
230
|
+
return {
|
|
231
|
+
"status": self.status.value,
|
|
232
|
+
"task_id": self.task_id,
|
|
233
|
+
"plan": self.plan.to_dict() if self.plan else None,
|
|
234
|
+
"current_step": self.current_step,
|
|
235
|
+
"step_results": [
|
|
236
|
+
{
|
|
237
|
+
"step_index": r.step.index,
|
|
238
|
+
"status": r.status.value,
|
|
239
|
+
"output": r.output,
|
|
240
|
+
"error": r.error,
|
|
241
|
+
}
|
|
242
|
+
for r in self.step_results
|
|
243
|
+
],
|
|
244
|
+
"blocker": {
|
|
245
|
+
"reason": self.blocker.reason,
|
|
246
|
+
"question": self.blocker.question,
|
|
247
|
+
"context": self.blocker.context,
|
|
248
|
+
} if self.blocker else None,
|
|
249
|
+
"attempt_count": self.attempt_count,
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
# Blocker detection thresholds
|
|
254
|
+
MAX_CONSECUTIVE_FAILURES = 3
|
|
255
|
+
MAX_STEP_RETRIES = 2
|
|
256
|
+
MAX_SELF_CORRECTION_ATTEMPTS = 2
|
|
257
|
+
MAX_CONSECUTIVE_VERIFICATION_FAILURES = 3
|
|
258
|
+
|
|
259
|
+
# Pattern constants live in blocker_detection.py (authoritative location).
|
|
260
|
+
# Only import what Agent code actually uses.
|
|
261
|
+
from codeframe.core.blocker_detection import ( # noqa: E402
|
|
262
|
+
HUMAN_INPUT_PATTERNS,
|
|
263
|
+
TACTICAL_DECISION_PATTERNS,
|
|
264
|
+
TECHNICAL_ERROR_PATTERNS,
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
class Agent:
|
|
269
|
+
"""Orchestrates task execution through the full agent loop.
|
|
270
|
+
|
|
271
|
+
The agent coordinates:
|
|
272
|
+
- Context loading and planning
|
|
273
|
+
- Step-by-step execution
|
|
274
|
+
- Blocker detection and creation
|
|
275
|
+
- Verification gate integration
|
|
276
|
+
- State management for pause/resume
|
|
277
|
+
"""
|
|
278
|
+
|
|
279
|
+
def __init__(
|
|
280
|
+
self,
|
|
281
|
+
workspace: Workspace,
|
|
282
|
+
llm_provider: LLMProvider,
|
|
283
|
+
max_context_tokens: int = 100_000,
|
|
284
|
+
dry_run: bool = False,
|
|
285
|
+
on_event: Optional[Callable[[str, dict], None]] = None,
|
|
286
|
+
debug: bool = False,
|
|
287
|
+
verbose: bool = False,
|
|
288
|
+
fix_coordinator: Optional["GlobalFixCoordinator"] = None,
|
|
289
|
+
output_logger: Optional["RunOutputLogger"] = None,
|
|
290
|
+
event_publisher: Optional["EventPublisher"] = None,
|
|
291
|
+
):
|
|
292
|
+
"""Initialize the agent.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
workspace: Target workspace
|
|
296
|
+
llm_provider: LLM provider for planning and code generation
|
|
297
|
+
max_context_tokens: Maximum tokens for context loading
|
|
298
|
+
dry_run: If True, don't make actual changes
|
|
299
|
+
on_event: Optional callback for agent events
|
|
300
|
+
debug: If True, write detailed debug log to workspace
|
|
301
|
+
verbose: If True, print detailed progress to stdout
|
|
302
|
+
fix_coordinator: Optional coordinator for global fixes (for parallel execution)
|
|
303
|
+
output_logger: Optional logger for streaming output to file (for cf work follow)
|
|
304
|
+
event_publisher: Optional EventPublisher for SSE streaming (for web clients)
|
|
305
|
+
"""
|
|
306
|
+
self.workspace = workspace
|
|
307
|
+
self.llm = llm_provider
|
|
308
|
+
self.max_context_tokens = max_context_tokens
|
|
309
|
+
self.dry_run = dry_run
|
|
310
|
+
self.on_event = on_event
|
|
311
|
+
self.debug = debug
|
|
312
|
+
self.verbose = verbose
|
|
313
|
+
self.fix_coordinator = fix_coordinator
|
|
314
|
+
self.output_logger = output_logger
|
|
315
|
+
self.event_publisher = event_publisher
|
|
316
|
+
|
|
317
|
+
self.state = AgentState()
|
|
318
|
+
self.context: Optional[TaskContext] = None
|
|
319
|
+
self.executor: Optional[Executor] = None
|
|
320
|
+
|
|
321
|
+
# Fix attempt tracking for loop prevention and escalation
|
|
322
|
+
self.fix_tracker = FixAttemptTracker()
|
|
323
|
+
|
|
324
|
+
# Debug logging setup
|
|
325
|
+
self._debug_log_path: Optional[Path] = None
|
|
326
|
+
self._failure_count = 0 # Track failures for verbose logging
|
|
327
|
+
if debug:
|
|
328
|
+
self._setup_debug_log()
|
|
329
|
+
|
|
330
|
+
def _verbose_print(self, message: str) -> None:
|
|
331
|
+
"""Print message to stdout (if verbose) and to output log file.
|
|
332
|
+
|
|
333
|
+
The output log file is always written to (if logger provided) to enable
|
|
334
|
+
streaming via `cf work follow`, even when verbose=False.
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
message: Message to print/log
|
|
338
|
+
"""
|
|
339
|
+
# Print to stdout if verbose mode is enabled
|
|
340
|
+
if self.verbose:
|
|
341
|
+
print(message)
|
|
342
|
+
|
|
343
|
+
# Always write to output log if logger is provided (for cf work follow)
|
|
344
|
+
if self.output_logger:
|
|
345
|
+
self.output_logger.write(message + "\n")
|
|
346
|
+
|
|
347
|
+
def run(self, task_id: str) -> AgentState:
|
|
348
|
+
"""Run the agent on a task.
|
|
349
|
+
|
|
350
|
+
This is the main entry point. It runs the full agent loop:
|
|
351
|
+
1. Load context
|
|
352
|
+
2. Plan implementation
|
|
353
|
+
3. Execute steps
|
|
354
|
+
4. Handle blockers and gates
|
|
355
|
+
5. Complete or fail
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
task_id: Task to execute
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
Final AgentState
|
|
362
|
+
"""
|
|
363
|
+
self.state = AgentState(task_id=task_id, status=AgentStatus.IDLE)
|
|
364
|
+
self._emit_event("agent_started", {"task_id": task_id})
|
|
365
|
+
|
|
366
|
+
try:
|
|
367
|
+
# Load context
|
|
368
|
+
self._emit_event("loading_context", {"task_id": task_id})
|
|
369
|
+
self.context = self._load_context(task_id)
|
|
370
|
+
|
|
371
|
+
# Check for open blockers first
|
|
372
|
+
if self.context.open_blockers:
|
|
373
|
+
self._handle_existing_blockers()
|
|
374
|
+
return self.state
|
|
375
|
+
|
|
376
|
+
# Plan implementation
|
|
377
|
+
self.state.status = AgentStatus.PLANNING
|
|
378
|
+
self._emit_event("planning_started", {})
|
|
379
|
+
self.state.plan = self._create_plan()
|
|
380
|
+
self._emit_event("planning_completed", {
|
|
381
|
+
"steps": self.state.plan.total_steps,
|
|
382
|
+
"complexity": self.state.plan.estimated_complexity.value,
|
|
383
|
+
})
|
|
384
|
+
|
|
385
|
+
# Execute plan
|
|
386
|
+
self.state.status = AgentStatus.EXECUTING
|
|
387
|
+
self._execute_plan()
|
|
388
|
+
|
|
389
|
+
# Run final verification if execution succeeded
|
|
390
|
+
if self.state.status == AgentStatus.EXECUTING:
|
|
391
|
+
self._run_final_verification()
|
|
392
|
+
|
|
393
|
+
except Exception as e:
|
|
394
|
+
self.state.status = AgentStatus.FAILED
|
|
395
|
+
self._emit_event("agent_failed", {"error": str(e)})
|
|
396
|
+
raise
|
|
397
|
+
|
|
398
|
+
self._emit_event("agent_finished", {"status": self.state.status.value})
|
|
399
|
+
return self.state
|
|
400
|
+
|
|
401
|
+
def resume(self, task_id: str, state: AgentState) -> AgentState:
|
|
402
|
+
"""Resume execution from a saved state.
|
|
403
|
+
|
|
404
|
+
Args:
|
|
405
|
+
task_id: Task to resume
|
|
406
|
+
state: Previous agent state
|
|
407
|
+
|
|
408
|
+
Returns:
|
|
409
|
+
Final AgentState
|
|
410
|
+
"""
|
|
411
|
+
self.state = state
|
|
412
|
+
self._emit_event("agent_resumed", {"task_id": task_id, "step": state.current_step})
|
|
413
|
+
|
|
414
|
+
# Reload context
|
|
415
|
+
self.context = self._load_context(task_id)
|
|
416
|
+
|
|
417
|
+
# Check if blockers are now resolved
|
|
418
|
+
if self.state.status == AgentStatus.BLOCKED:
|
|
419
|
+
if not self.context.open_blockers:
|
|
420
|
+
# Blockers resolved, continue execution
|
|
421
|
+
self.state.status = AgentStatus.EXECUTING
|
|
422
|
+
self.state.blocker = None
|
|
423
|
+
self._execute_plan()
|
|
424
|
+
else:
|
|
425
|
+
# Still blocked
|
|
426
|
+
return self.state
|
|
427
|
+
|
|
428
|
+
# Run final verification if needed
|
|
429
|
+
if self.state.status == AgentStatus.EXECUTING:
|
|
430
|
+
self._run_final_verification()
|
|
431
|
+
|
|
432
|
+
self._emit_event("agent_finished", {"status": self.state.status.value})
|
|
433
|
+
return self.state
|
|
434
|
+
|
|
435
|
+
def _load_context(self, task_id: str) -> TaskContext:
|
|
436
|
+
"""Load context for a task."""
|
|
437
|
+
loader = ContextLoader(self.workspace, max_tokens=self.max_context_tokens)
|
|
438
|
+
return loader.load(task_id)
|
|
439
|
+
|
|
440
|
+
def _create_plan(self) -> ImplementationPlan:
|
|
441
|
+
"""Create implementation plan from context."""
|
|
442
|
+
planner = Planner(self.llm)
|
|
443
|
+
return planner.create_plan(self.context)
|
|
444
|
+
|
|
445
|
+
def _execute_plan(self) -> None:
|
|
446
|
+
"""Execute the implementation plan step by step."""
|
|
447
|
+
if not self.state.plan:
|
|
448
|
+
raise ValueError("No plan to execute")
|
|
449
|
+
|
|
450
|
+
self.executor = Executor(
|
|
451
|
+
llm_provider=self.llm,
|
|
452
|
+
repo_path=self.workspace.repo_path,
|
|
453
|
+
dry_run=self.dry_run,
|
|
454
|
+
event_publisher=self.event_publisher,
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
consecutive_failures = 0
|
|
458
|
+
consecutive_verification_failures = 0
|
|
459
|
+
|
|
460
|
+
self._debug_log(
|
|
461
|
+
f"Starting plan execution with {len(self.state.plan.steps)} steps",
|
|
462
|
+
level="INFO",
|
|
463
|
+
always=True,
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
while self.state.current_step < len(self.state.plan.steps):
|
|
467
|
+
step = self.state.plan.steps[self.state.current_step]
|
|
468
|
+
|
|
469
|
+
self._debug_log(
|
|
470
|
+
f"=== STEP {step.index} ({step.type.value}) ===",
|
|
471
|
+
level="INFO",
|
|
472
|
+
data={
|
|
473
|
+
"target": step.target,
|
|
474
|
+
"description": step.description,
|
|
475
|
+
"details_length": len(step.details) if step.details else 0,
|
|
476
|
+
"current_step_index": self.state.current_step,
|
|
477
|
+
"consecutive_failures": consecutive_failures,
|
|
478
|
+
},
|
|
479
|
+
always=True,
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
self._emit_event("step_started", {
|
|
483
|
+
"step": step.index,
|
|
484
|
+
"type": step.type.value,
|
|
485
|
+
"target": step.target,
|
|
486
|
+
})
|
|
487
|
+
|
|
488
|
+
# Execute the step
|
|
489
|
+
result = self.executor.execute_step(step, self.context)
|
|
490
|
+
self.state.step_results.append(result)
|
|
491
|
+
|
|
492
|
+
self._debug_log(
|
|
493
|
+
f"Step {step.index} execution result: {result.status.value}",
|
|
494
|
+
level="INFO" if result.status == ExecutionStatus.SUCCESS else "WARN",
|
|
495
|
+
data={
|
|
496
|
+
"output_preview": result.output[:200] if result.output else None,
|
|
497
|
+
"error": result.error if result.error else None,
|
|
498
|
+
},
|
|
499
|
+
always=True,
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
if result.status == ExecutionStatus.SUCCESS:
|
|
503
|
+
consecutive_failures = 0
|
|
504
|
+
self._emit_event("step_completed", {
|
|
505
|
+
"step": step.index,
|
|
506
|
+
"output": result.output[:200],
|
|
507
|
+
})
|
|
508
|
+
|
|
509
|
+
# Run incremental verification for file changes
|
|
510
|
+
if step.type in {StepType.FILE_CREATE, StepType.FILE_EDIT}:
|
|
511
|
+
gate_result = self._run_incremental_verification()
|
|
512
|
+
if gate_result and gate_result.passed:
|
|
513
|
+
consecutive_verification_failures = 0
|
|
514
|
+
elif gate_result and not gate_result.passed:
|
|
515
|
+
# Try to fix lint issues automatically (works for style, not syntax)
|
|
516
|
+
if not self._try_auto_fix(gate_result):
|
|
517
|
+
# Auto-fix failed - need to self-correct the code
|
|
518
|
+
# Extract detailed error info from gate result
|
|
519
|
+
failed_checks = [
|
|
520
|
+
c for c in gate_result.checks
|
|
521
|
+
if c.status != GateStatus.PASSED
|
|
522
|
+
]
|
|
523
|
+
failed_check_names = [c.name for c in failed_checks]
|
|
524
|
+
|
|
525
|
+
# Build detailed error string with actual output
|
|
526
|
+
error_details = []
|
|
527
|
+
for check in failed_checks:
|
|
528
|
+
if check.output:
|
|
529
|
+
error_details.append(
|
|
530
|
+
f"[{check.name}] {check.output[:500]}"
|
|
531
|
+
)
|
|
532
|
+
error_detail_str = (
|
|
533
|
+
"\n".join(error_details)
|
|
534
|
+
if error_details
|
|
535
|
+
else "No details available"
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
self._emit_event("verification_failed", {
|
|
539
|
+
"step": step.index,
|
|
540
|
+
"error": f"Verification failed: {failed_check_names}",
|
|
541
|
+
"gates": failed_check_names,
|
|
542
|
+
"error_count": len(failed_checks),
|
|
543
|
+
"error_details": error_detail_str[:1000],
|
|
544
|
+
})
|
|
545
|
+
|
|
546
|
+
failed_result = StepResult(
|
|
547
|
+
step=step,
|
|
548
|
+
status=ExecutionStatus.FAILED,
|
|
549
|
+
error=(
|
|
550
|
+
f"Verification failed: {failed_check_names}"
|
|
551
|
+
f"\n{error_detail_str}"
|
|
552
|
+
),
|
|
553
|
+
)
|
|
554
|
+
|
|
555
|
+
# Try self-correction to fix the code
|
|
556
|
+
self_correction_attempts = 0
|
|
557
|
+
current_result = failed_result
|
|
558
|
+
self_correction_succeeded = False
|
|
559
|
+
|
|
560
|
+
while self_correction_attempts < MAX_SELF_CORRECTION_ATTEMPTS:
|
|
561
|
+
self_correction_attempts += 1
|
|
562
|
+
corrected_result = self._attempt_self_correction(
|
|
563
|
+
step, current_result, self_correction_attempts
|
|
564
|
+
)
|
|
565
|
+
|
|
566
|
+
if corrected_result is None:
|
|
567
|
+
break
|
|
568
|
+
|
|
569
|
+
if corrected_result.status == ExecutionStatus.SUCCESS:
|
|
570
|
+
# Re-verify the corrected code
|
|
571
|
+
recheck = self._run_incremental_verification()
|
|
572
|
+
if recheck is None or recheck.passed:
|
|
573
|
+
self._emit_event("step_completed", {
|
|
574
|
+
"step": step.index,
|
|
575
|
+
"output": "Code fixed via self-correction",
|
|
576
|
+
"self_corrected": True,
|
|
577
|
+
})
|
|
578
|
+
self_correction_succeeded = True
|
|
579
|
+
break
|
|
580
|
+
|
|
581
|
+
# Re-verification failed — preserve error context
|
|
582
|
+
# so next correction attempt knows what to fix
|
|
583
|
+
reverify_failed = [
|
|
584
|
+
c for c in recheck.checks
|
|
585
|
+
if c.status != GateStatus.PASSED
|
|
586
|
+
]
|
|
587
|
+
reverify_errors = []
|
|
588
|
+
for check in reverify_failed:
|
|
589
|
+
if check.output:
|
|
590
|
+
reverify_errors.append(f"[{check.name}] {check.output[:500]}")
|
|
591
|
+
reverify_msg = "\n".join(reverify_errors) if reverify_errors else "Re-verification failed"
|
|
592
|
+
current_result = StepResult(
|
|
593
|
+
step=step,
|
|
594
|
+
status=ExecutionStatus.FAILED,
|
|
595
|
+
error=f"Re-verification after correction:\n{reverify_msg}",
|
|
596
|
+
)
|
|
597
|
+
continue
|
|
598
|
+
|
|
599
|
+
current_result = corrected_result
|
|
600
|
+
|
|
601
|
+
if self_correction_succeeded:
|
|
602
|
+
consecutive_verification_failures = 0
|
|
603
|
+
else:
|
|
604
|
+
# Couldn't fix the verification error
|
|
605
|
+
consecutive_verification_failures += 1
|
|
606
|
+
consecutive_failures += 1
|
|
607
|
+
if consecutive_verification_failures >= MAX_CONSECUTIVE_VERIFICATION_FAILURES:
|
|
608
|
+
self._debug_log(
|
|
609
|
+
f"ABORTING: Too many consecutive verification failures ({consecutive_verification_failures})",
|
|
610
|
+
level="ERROR",
|
|
611
|
+
always=True,
|
|
612
|
+
)
|
|
613
|
+
self._emit_event("execution_aborted", {
|
|
614
|
+
"reason": f"Too many consecutive verification failures ({consecutive_verification_failures})",
|
|
615
|
+
"step": step.index,
|
|
616
|
+
})
|
|
617
|
+
# Force blocker creation — bypass LLM classification
|
|
618
|
+
# since this is a definitive abort, not a tactical decision
|
|
619
|
+
error_msg = current_result.error if current_result else "Repeated verification failures"
|
|
620
|
+
blocker = blockers.create(
|
|
621
|
+
workspace=self.workspace,
|
|
622
|
+
question=f"Agent aborted: {consecutive_verification_failures} consecutive verification failures at step {step.index} ({step.description}). Last error: {error_msg[:500]}",
|
|
623
|
+
task_id=self.state.task_id,
|
|
624
|
+
created_by="agent",
|
|
625
|
+
)
|
|
626
|
+
self.state.status = AgentStatus.BLOCKED
|
|
627
|
+
self.state.blocker = BlockerInfo(
|
|
628
|
+
reason="Too many consecutive verification failures",
|
|
629
|
+
question=blocker.question,
|
|
630
|
+
context=f"Step {step.index}: {step.description}",
|
|
631
|
+
)
|
|
632
|
+
return
|
|
633
|
+
if consecutive_failures >= MAX_CONSECUTIVE_FAILURES:
|
|
634
|
+
self._create_blocker_from_failure(step, current_result)
|
|
635
|
+
return
|
|
636
|
+
# Otherwise, continue to next step with broken file
|
|
637
|
+
|
|
638
|
+
self.state.current_step += 1
|
|
639
|
+
|
|
640
|
+
elif result.status == ExecutionStatus.FAILED:
|
|
641
|
+
consecutive_failures += 1
|
|
642
|
+
self._failure_count += 1 # Track for debug logging verbosity
|
|
643
|
+
|
|
644
|
+
self._debug_log(
|
|
645
|
+
f"STEP FAILED: consecutive_failures={consecutive_failures}, total_failures={self._failure_count}",
|
|
646
|
+
level="WARN",
|
|
647
|
+
data={"error": result.error},
|
|
648
|
+
always=True,
|
|
649
|
+
)
|
|
650
|
+
|
|
651
|
+
self._emit_event("step_failed", {
|
|
652
|
+
"step": step.index,
|
|
653
|
+
"error": result.error[:200],
|
|
654
|
+
})
|
|
655
|
+
|
|
656
|
+
# Special handling for verification step failures
|
|
657
|
+
# When verification fails (e.g., syntax error), we need to fix the TARGET file
|
|
658
|
+
# not "self-correct" the verification step itself
|
|
659
|
+
if step.type == StepType.VERIFICATION:
|
|
660
|
+
# Extract the actual file path from the verification command
|
|
661
|
+
# e.g., "python task_tracker.py --help" -> "task_tracker.py"
|
|
662
|
+
file_path = _extract_file_from_command(step.target)
|
|
663
|
+
|
|
664
|
+
if file_path:
|
|
665
|
+
# Create a FILE_EDIT step to fix the target file
|
|
666
|
+
fix_step = PlanStep(
|
|
667
|
+
index=step.index,
|
|
668
|
+
type=StepType.FILE_EDIT,
|
|
669
|
+
target=file_path,
|
|
670
|
+
description=f"Fix {file_path} - {result.error[:100]}",
|
|
671
|
+
details=f"The verification command '{step.target}' failed with error: {result.error}. Fix this error in {file_path}.",
|
|
672
|
+
depends_on=[],
|
|
673
|
+
)
|
|
674
|
+
# Replace step with the fix step for self-correction
|
|
675
|
+
step = fix_step
|
|
676
|
+
else:
|
|
677
|
+
# Can't determine which file to fix, create blocker
|
|
678
|
+
self._debug_log(
|
|
679
|
+
f"Cannot extract file path from verification command: {step.target}",
|
|
680
|
+
level="WARN",
|
|
681
|
+
always=True,
|
|
682
|
+
)
|
|
683
|
+
self._create_blocker_from_failure(step, result)
|
|
684
|
+
return
|
|
685
|
+
|
|
686
|
+
# Classify the error
|
|
687
|
+
error_type = self._classify_error(result.error)
|
|
688
|
+
|
|
689
|
+
# For human-input-needed errors, create blocker immediately
|
|
690
|
+
if error_type == "human":
|
|
691
|
+
self._create_blocker_from_failure(step, result)
|
|
692
|
+
return
|
|
693
|
+
|
|
694
|
+
# For technical errors, try self-correction first
|
|
695
|
+
self_correction_attempts = 0
|
|
696
|
+
current_result = result
|
|
697
|
+
self_correction_succeeded = False
|
|
698
|
+
|
|
699
|
+
while self_correction_attempts < MAX_SELF_CORRECTION_ATTEMPTS:
|
|
700
|
+
self_correction_attempts += 1
|
|
701
|
+
corrected_result = self._attempt_self_correction(
|
|
702
|
+
step, current_result, self_correction_attempts
|
|
703
|
+
)
|
|
704
|
+
|
|
705
|
+
if corrected_result is None:
|
|
706
|
+
# Self-correction failed to even attempt, stop trying
|
|
707
|
+
break
|
|
708
|
+
|
|
709
|
+
if corrected_result.status == ExecutionStatus.SUCCESS:
|
|
710
|
+
# Self-correction worked! Update state and continue
|
|
711
|
+
self.state.step_results[-1] = corrected_result # Replace failed result
|
|
712
|
+
consecutive_failures = 0
|
|
713
|
+
self._emit_event("step_completed", {
|
|
714
|
+
"step": step.index,
|
|
715
|
+
"output": corrected_result.output[:200],
|
|
716
|
+
"self_corrected": True,
|
|
717
|
+
})
|
|
718
|
+
|
|
719
|
+
# Run incremental verification for file changes
|
|
720
|
+
if step.type in {StepType.FILE_CREATE, StepType.FILE_EDIT}:
|
|
721
|
+
gate_result = self._run_incremental_verification()
|
|
722
|
+
if gate_result and not gate_result.passed:
|
|
723
|
+
if not self._try_auto_fix(gate_result):
|
|
724
|
+
consecutive_failures += 1
|
|
725
|
+
|
|
726
|
+
self.state.current_step += 1
|
|
727
|
+
self_correction_succeeded = True
|
|
728
|
+
break
|
|
729
|
+
|
|
730
|
+
# Self-correction didn't succeed, try again
|
|
731
|
+
current_result = corrected_result
|
|
732
|
+
|
|
733
|
+
# Handle case where self-correction didn't succeed
|
|
734
|
+
if not self_correction_succeeded:
|
|
735
|
+
# Check if we should create a blocker
|
|
736
|
+
if self._should_create_blocker(
|
|
737
|
+
consecutive_failures, current_result, self_correction_attempts
|
|
738
|
+
):
|
|
739
|
+
self._create_blocker_from_failure(step, current_result)
|
|
740
|
+
return
|
|
741
|
+
|
|
742
|
+
# Give up on this step if too many consecutive failures
|
|
743
|
+
if consecutive_failures >= MAX_CONSECUTIVE_FAILURES:
|
|
744
|
+
self._debug_log(
|
|
745
|
+
f"GIVING UP: Too many consecutive failures ({consecutive_failures})",
|
|
746
|
+
level="ERROR",
|
|
747
|
+
always=True,
|
|
748
|
+
)
|
|
749
|
+
self.state.status = AgentStatus.FAILED
|
|
750
|
+
self._emit_event("execution_failed", {
|
|
751
|
+
"reason": "Too many consecutive failures after self-correction",
|
|
752
|
+
})
|
|
753
|
+
return
|
|
754
|
+
|
|
755
|
+
# Skip this step and continue to the next
|
|
756
|
+
self._debug_log(
|
|
757
|
+
f"Skipping failed step {step.index}, advancing to next step",
|
|
758
|
+
level="WARN",
|
|
759
|
+
always=True,
|
|
760
|
+
)
|
|
761
|
+
self.state.current_step += 1
|
|
762
|
+
|
|
763
|
+
elif result.status == ExecutionStatus.SKIPPED:
|
|
764
|
+
self._debug_log(f"Step {step.index} SKIPPED", level="INFO", always=True)
|
|
765
|
+
self._emit_event("step_skipped", {"step": step.index})
|
|
766
|
+
self.state.current_step += 1
|
|
767
|
+
|
|
768
|
+
self._debug_log(
|
|
769
|
+
f"Plan execution completed. Final step index: {self.state.current_step}",
|
|
770
|
+
level="INFO",
|
|
771
|
+
always=True,
|
|
772
|
+
)
|
|
773
|
+
|
|
774
|
+
def _run_incremental_verification(self) -> Optional[GateResult]:
|
|
775
|
+
"""Run quick verification after file changes."""
|
|
776
|
+
# Only run fast checks (ruff) for incremental verification
|
|
777
|
+
try:
|
|
778
|
+
result = run_gates(
|
|
779
|
+
self.workspace,
|
|
780
|
+
gates=["ruff"],
|
|
781
|
+
verbose=True,
|
|
782
|
+
)
|
|
783
|
+
self.state.gate_results.append(result)
|
|
784
|
+
return result
|
|
785
|
+
except Exception:
|
|
786
|
+
return None
|
|
787
|
+
|
|
788
|
+
def _run_final_verification(self) -> None:
|
|
789
|
+
"""Run full verification gates with self-correction loop.
|
|
790
|
+
|
|
791
|
+
This method implements a retry loop that:
|
|
792
|
+
1. Runs verification gates (pytest, ruff)
|
|
793
|
+
2. If gates pass, marks task as COMPLETED
|
|
794
|
+
3. If gates fail, attempts self-correction:
|
|
795
|
+
a. Try ruff --fix for lint issues
|
|
796
|
+
b. Use LLM to generate fix plan for remaining errors
|
|
797
|
+
c. Execute fix steps
|
|
798
|
+
d. Re-run verification
|
|
799
|
+
4. Repeats until max_attempts or gives up
|
|
800
|
+
"""
|
|
801
|
+
self.state.status = AgentStatus.VERIFYING
|
|
802
|
+
self._emit_event("verification_started", {})
|
|
803
|
+
|
|
804
|
+
print(f"\n[VERIFY] Starting final verification (max {self.state.max_attempts} attempts)")
|
|
805
|
+
self._debug_log(
|
|
806
|
+
f"Starting final verification (max {self.state.max_attempts} attempts)",
|
|
807
|
+
level="INFO",
|
|
808
|
+
always=True,
|
|
809
|
+
)
|
|
810
|
+
|
|
811
|
+
while self.state.attempt_count < self.state.max_attempts:
|
|
812
|
+
attempt_num = self.state.attempt_count + 1
|
|
813
|
+
self._verbose_print(f"[VERIFY] Attempt {attempt_num}/{self.state.max_attempts}")
|
|
814
|
+
self._debug_log(
|
|
815
|
+
f"Verification attempt {attempt_num}/{self.state.max_attempts}",
|
|
816
|
+
level="INFO",
|
|
817
|
+
)
|
|
818
|
+
|
|
819
|
+
try:
|
|
820
|
+
result = run_gates(self.workspace, verbose=False)
|
|
821
|
+
self.state.gate_results.append(result)
|
|
822
|
+
|
|
823
|
+
if result.passed:
|
|
824
|
+
self.state.status = AgentStatus.COMPLETED
|
|
825
|
+
self._emit_event("verification_passed", {"attempt": attempt_num})
|
|
826
|
+
self._verbose_print(f"[VERIFY] PASSED on attempt {attempt_num}")
|
|
827
|
+
self._debug_log(
|
|
828
|
+
f"Verification PASSED on attempt {attempt_num}",
|
|
829
|
+
level="INFO",
|
|
830
|
+
always=True,
|
|
831
|
+
)
|
|
832
|
+
return # Success!
|
|
833
|
+
|
|
834
|
+
# Verification failed - log details
|
|
835
|
+
failed_checks = [
|
|
836
|
+
c.name for c in result.checks
|
|
837
|
+
if c.status == GateStatus.FAILED
|
|
838
|
+
]
|
|
839
|
+
self._verbose_print(f"[VERIFY] FAILED: {', '.join(failed_checks)}")
|
|
840
|
+
self._debug_log(
|
|
841
|
+
f"Verification failed: {', '.join(failed_checks)}",
|
|
842
|
+
level="WARN",
|
|
843
|
+
always=True,
|
|
844
|
+
)
|
|
845
|
+
|
|
846
|
+
# Increment attempt count
|
|
847
|
+
self.state.attempt_count += 1
|
|
848
|
+
|
|
849
|
+
# Check if we have retries left
|
|
850
|
+
if self.state.attempt_count >= self.state.max_attempts:
|
|
851
|
+
self._debug_log(
|
|
852
|
+
f"Max attempts ({self.state.max_attempts}) exceeded",
|
|
853
|
+
level="ERROR",
|
|
854
|
+
always=True,
|
|
855
|
+
)
|
|
856
|
+
break # Exit loop, fall through to FAILED
|
|
857
|
+
|
|
858
|
+
# Attempt self-correction
|
|
859
|
+
self._verbose_print("[VERIFY] Attempting self-correction...")
|
|
860
|
+
self._emit_event("self_correction_started", {
|
|
861
|
+
"attempt": attempt_num,
|
|
862
|
+
"failed_checks": failed_checks,
|
|
863
|
+
})
|
|
864
|
+
|
|
865
|
+
fixed = self._attempt_verification_fix(result)
|
|
866
|
+
if not fixed:
|
|
867
|
+
self._verbose_print("[VERIFY] Self-correction FAILED, giving up")
|
|
868
|
+
self._debug_log(
|
|
869
|
+
"Self-correction failed, giving up",
|
|
870
|
+
level="ERROR",
|
|
871
|
+
always=True,
|
|
872
|
+
)
|
|
873
|
+
break # Can't fix, fall through to FAILED
|
|
874
|
+
|
|
875
|
+
self._verbose_print("[VERIFY] Self-correction applied, re-running verification...")
|
|
876
|
+
self._debug_log(
|
|
877
|
+
"Self-correction applied, re-running verification",
|
|
878
|
+
level="INFO",
|
|
879
|
+
always=True,
|
|
880
|
+
)
|
|
881
|
+
# Loop back to re-run gates
|
|
882
|
+
|
|
883
|
+
except Exception as e:
|
|
884
|
+
self._verbose_print(f"[VERIFY] Exception: {e}")
|
|
885
|
+
self._emit_event("verification_error", {"error": str(e)})
|
|
886
|
+
self._debug_log(
|
|
887
|
+
f"Verification error: {e}",
|
|
888
|
+
level="ERROR",
|
|
889
|
+
always=True,
|
|
890
|
+
)
|
|
891
|
+
break # Exit on exception
|
|
892
|
+
|
|
893
|
+
# Max attempts exceeded or couldn't fix
|
|
894
|
+
self._verbose_print(f"[VERIFY] Final result: FAILED after {self.state.attempt_count} attempts")
|
|
895
|
+
self.state.status = AgentStatus.FAILED
|
|
896
|
+
self._emit_event("verification_failed", {
|
|
897
|
+
"reason": "Max verification attempts exceeded or self-correction failed",
|
|
898
|
+
"attempts": self.state.attempt_count,
|
|
899
|
+
})
|
|
900
|
+
|
|
901
|
+
def _try_auto_fix(self, gate_result: GateResult) -> bool:
|
|
902
|
+
"""Try to automatically fix lint issues.
|
|
903
|
+
|
|
904
|
+
Returns:
|
|
905
|
+
True if auto-fix was successful (returncode == 0)
|
|
906
|
+
"""
|
|
907
|
+
if not self.executor or self.dry_run:
|
|
908
|
+
return False
|
|
909
|
+
|
|
910
|
+
import subprocess
|
|
911
|
+
|
|
912
|
+
try:
|
|
913
|
+
result = subprocess.run(
|
|
914
|
+
["ruff", "check", "--fix", "."],
|
|
915
|
+
cwd=self.workspace.repo_path,
|
|
916
|
+
capture_output=True,
|
|
917
|
+
text=True,
|
|
918
|
+
timeout=30,
|
|
919
|
+
)
|
|
920
|
+
|
|
921
|
+
if result.returncode == 0:
|
|
922
|
+
self._debug_log("ruff --fix succeeded", level="INFO")
|
|
923
|
+
return True
|
|
924
|
+
else:
|
|
925
|
+
# Ruff fix failed - log the error output
|
|
926
|
+
stderr_preview = result.stderr[:500] if result.stderr else ""
|
|
927
|
+
stdout_preview = result.stdout[:500] if result.stdout else ""
|
|
928
|
+
self._debug_log(
|
|
929
|
+
f"ruff --fix failed (exit {result.returncode}): {stderr_preview or stdout_preview}",
|
|
930
|
+
level="WARN",
|
|
931
|
+
)
|
|
932
|
+
return False
|
|
933
|
+
|
|
934
|
+
except subprocess.TimeoutExpired:
|
|
935
|
+
self._debug_log("ruff --fix timed out after 30s", level="WARN")
|
|
936
|
+
return False
|
|
937
|
+
except subprocess.CalledProcessError as e:
|
|
938
|
+
self._debug_log(f"ruff --fix raised CalledProcessError: {e}", level="WARN")
|
|
939
|
+
return False
|
|
940
|
+
except FileNotFoundError:
|
|
941
|
+
self._debug_log("ruff command not found", level="WARN")
|
|
942
|
+
return False
|
|
943
|
+
except Exception as e:
|
|
944
|
+
self._debug_log(f"ruff --fix error: {e}", level="WARN")
|
|
945
|
+
return False
|
|
946
|
+
|
|
947
|
+
def _build_self_correction_context(self) -> str:
|
|
948
|
+
"""Build rich context for intelligent self-correction.
|
|
949
|
+
|
|
950
|
+
Provides the LLM with project structure, config files, and file tree
|
|
951
|
+
so it can reason about local vs external packages, project layout, etc.
|
|
952
|
+
|
|
953
|
+
Returns:
|
|
954
|
+
Formatted context string for the self-correction prompt
|
|
955
|
+
"""
|
|
956
|
+
sections = []
|
|
957
|
+
|
|
958
|
+
# Project structure overview
|
|
959
|
+
sections.append("## Project Structure")
|
|
960
|
+
if self.context and self.context.file_tree:
|
|
961
|
+
# Group files by directory
|
|
962
|
+
dirs: dict[str, list[str]] = {}
|
|
963
|
+
for f in self.context.file_tree[:50]: # Limit to 50 files
|
|
964
|
+
from pathlib import Path as P
|
|
965
|
+
dir_path = str(P(f.path).parent)
|
|
966
|
+
if dir_path not in dirs:
|
|
967
|
+
dirs[dir_path] = []
|
|
968
|
+
dirs[dir_path].append(P(f.path).name)
|
|
969
|
+
|
|
970
|
+
for dir_path in sorted(dirs.keys())[:15]:
|
|
971
|
+
sections.append(f" {dir_path}/")
|
|
972
|
+
for filename in dirs[dir_path][:8]:
|
|
973
|
+
sections.append(f" {filename}")
|
|
974
|
+
if len(dirs[dir_path]) > 8:
|
|
975
|
+
sections.append(f" ... ({len(dirs[dir_path]) - 8} more)")
|
|
976
|
+
sections.append("")
|
|
977
|
+
|
|
978
|
+
# Key config files content
|
|
979
|
+
config_files = ["pyproject.toml", "package.json", "Cargo.toml", "go.mod", "setup.py"]
|
|
980
|
+
for config_name in config_files:
|
|
981
|
+
config_path = self.workspace.repo_path / config_name
|
|
982
|
+
if config_path.exists():
|
|
983
|
+
try:
|
|
984
|
+
content = config_path.read_text()[:2000] # Limit size
|
|
985
|
+
sections.append(f"## {config_name}")
|
|
986
|
+
sections.append("```")
|
|
987
|
+
sections.append(content)
|
|
988
|
+
sections.append("```")
|
|
989
|
+
sections.append("")
|
|
990
|
+
except Exception:
|
|
991
|
+
pass
|
|
992
|
+
|
|
993
|
+
# Tech stack info if available
|
|
994
|
+
if self.context and self.context.tech_stack:
|
|
995
|
+
sections.append("## Tech Stack")
|
|
996
|
+
sections.append(self.context.tech_stack)
|
|
997
|
+
sections.append("")
|
|
998
|
+
|
|
999
|
+
# Files this agent created/modified in this run
|
|
1000
|
+
if self.state.step_results:
|
|
1001
|
+
modified_files = set()
|
|
1002
|
+
for result in self.state.step_results:
|
|
1003
|
+
for change in result.file_changes:
|
|
1004
|
+
modified_files.add(str(change.path))
|
|
1005
|
+
if modified_files:
|
|
1006
|
+
sections.append("## Files Modified by This Task")
|
|
1007
|
+
for f in sorted(modified_files)[:20]:
|
|
1008
|
+
sections.append(f" - {f}")
|
|
1009
|
+
sections.append("")
|
|
1010
|
+
|
|
1011
|
+
return "\n".join(sections)
|
|
1012
|
+
|
|
1013
|
+
def _classify_fix_scope(self, fix: dict) -> FixScope:
|
|
1014
|
+
"""Classify whether a fix is local or global.
|
|
1015
|
+
|
|
1016
|
+
Args:
|
|
1017
|
+
fix: Fix dictionary with 'file', 'action', 'command' keys
|
|
1018
|
+
|
|
1019
|
+
Returns:
|
|
1020
|
+
FixScope.LOCAL or FixScope.GLOBAL
|
|
1021
|
+
"""
|
|
1022
|
+
action = fix.get("action", "")
|
|
1023
|
+
file_path = fix.get("file", "")
|
|
1024
|
+
command = fix.get("command", "")
|
|
1025
|
+
|
|
1026
|
+
# Shell commands that modify project state are global
|
|
1027
|
+
if action == "shell":
|
|
1028
|
+
global_commands = ["pip install", "npm install", "uv add", "cargo add",
|
|
1029
|
+
"go get", "yarn add", "pnpm add", "poetry add"]
|
|
1030
|
+
for gc in global_commands:
|
|
1031
|
+
if gc in command:
|
|
1032
|
+
return FixScope.GLOBAL
|
|
1033
|
+
|
|
1034
|
+
# Creating new directories at project root is global
|
|
1035
|
+
if action == "create_directory":
|
|
1036
|
+
# Root-level or src/ directories are global
|
|
1037
|
+
if "/" not in file_path or file_path.startswith("src/"):
|
|
1038
|
+
return FixScope.GLOBAL
|
|
1039
|
+
|
|
1040
|
+
# Modifying config files is always global
|
|
1041
|
+
from pathlib import Path as P
|
|
1042
|
+
filename = P(file_path).name if file_path else ""
|
|
1043
|
+
if filename in GLOBAL_SCOPE_FILES:
|
|
1044
|
+
return FixScope.GLOBAL
|
|
1045
|
+
|
|
1046
|
+
# Check if file was created by this agent in this run
|
|
1047
|
+
if self.state.step_results:
|
|
1048
|
+
files_this_run = set()
|
|
1049
|
+
for result in self.state.step_results:
|
|
1050
|
+
for change in result.file_changes:
|
|
1051
|
+
files_this_run.add(str(change.path))
|
|
1052
|
+
if file_path in files_this_run:
|
|
1053
|
+
return FixScope.LOCAL
|
|
1054
|
+
|
|
1055
|
+
# Default to global for safety
|
|
1056
|
+
return FixScope.GLOBAL
|
|
1057
|
+
|
|
1058
|
+
def _attempt_verification_fix(self, gate_result: GateResult) -> bool:
|
|
1059
|
+
"""Attempt to self-correct verification failures.
|
|
1060
|
+
|
|
1061
|
+
Strategy:
|
|
1062
|
+
1. Try ruff --fix for quick lint fixes
|
|
1063
|
+
2. Try pattern-based quick fixes (no LLM needed)
|
|
1064
|
+
3. Collect error messages from failed checks
|
|
1065
|
+
4. Check if we should escalate to blocker
|
|
1066
|
+
5. Use LLM to generate a fix plan
|
|
1067
|
+
6. Execute the fix plan steps
|
|
1068
|
+
7. Return True if fixes were applied (caller will re-verify)
|
|
1069
|
+
|
|
1070
|
+
Args:
|
|
1071
|
+
gate_result: Result of failed verification gates
|
|
1072
|
+
|
|
1073
|
+
Returns:
|
|
1074
|
+
True if fixes were applied, False if unable to fix
|
|
1075
|
+
"""
|
|
1076
|
+
self._verbose_print("[SELFCORRECT] Starting verification fix attempt")
|
|
1077
|
+
self._debug_log("Attempting self-correction", level="INFO", always=True)
|
|
1078
|
+
|
|
1079
|
+
# Step 1: Try ruff --fix for quick lint fixes
|
|
1080
|
+
self._verbose_print("[SELFCORRECT] Running ruff --fix...")
|
|
1081
|
+
self._try_auto_fix(gate_result)
|
|
1082
|
+
|
|
1083
|
+
# Step 2: Collect error messages from failed checks
|
|
1084
|
+
errors = []
|
|
1085
|
+
for check in gate_result.checks:
|
|
1086
|
+
if check.status == GateStatus.FAILED and check.output:
|
|
1087
|
+
errors.append(f"{check.name}: {check.output[:1000]}")
|
|
1088
|
+
|
|
1089
|
+
if not errors:
|
|
1090
|
+
self._verbose_print("[SELFCORRECT] No error messages to fix")
|
|
1091
|
+
self._debug_log("No error messages to fix", level="WARN")
|
|
1092
|
+
return False
|
|
1093
|
+
|
|
1094
|
+
self._verbose_print(f"[SELFCORRECT] Collected {len(errors)} error(s) to fix")
|
|
1095
|
+
error_summary = "\n\n".join(errors)
|
|
1096
|
+
self._debug_log(f"Errors to fix:\n{error_summary[:500]}...", level="INFO")
|
|
1097
|
+
|
|
1098
|
+
# Step 3: Try pattern-based quick fixes first (no LLM needed)
|
|
1099
|
+
quick_fix_applied = False
|
|
1100
|
+
for error in errors:
|
|
1101
|
+
quick_fix = find_quick_fix(
|
|
1102
|
+
error,
|
|
1103
|
+
repo_path=self.workspace.repo_path,
|
|
1104
|
+
)
|
|
1105
|
+
if quick_fix:
|
|
1106
|
+
# Check if we already tried this fix
|
|
1107
|
+
if self.fix_tracker.was_attempted(error, quick_fix.description):
|
|
1108
|
+
self._verbose_print(f"[SELFCORRECT] Skipping already-tried fix: {quick_fix.description}")
|
|
1109
|
+
self._debug_log(f"Skipping duplicate fix: {quick_fix.description}", level="INFO")
|
|
1110
|
+
continue
|
|
1111
|
+
|
|
1112
|
+
# Record the attempt
|
|
1113
|
+
self.fix_tracker.record_attempt(error, quick_fix.description)
|
|
1114
|
+
|
|
1115
|
+
self._verbose_print(f"[SELFCORRECT] Trying quick fix: {quick_fix.description}")
|
|
1116
|
+
success, msg = apply_quick_fix(quick_fix, self.workspace.repo_path, self.dry_run)
|
|
1117
|
+
|
|
1118
|
+
if success:
|
|
1119
|
+
self.fix_tracker.record_outcome(error, quick_fix.description, FixOutcome.SUCCESS)
|
|
1120
|
+
self._verbose_print(f"[SELFCORRECT] Quick fix applied: {msg}")
|
|
1121
|
+
self._debug_log(f"Quick fix applied: {msg}", level="INFO", always=True)
|
|
1122
|
+
quick_fix_applied = True
|
|
1123
|
+
else:
|
|
1124
|
+
self.fix_tracker.record_outcome(error, quick_fix.description, FixOutcome.FAILED)
|
|
1125
|
+
self._verbose_print(f"[SELFCORRECT] Quick fix failed: {msg}")
|
|
1126
|
+
self._debug_log(f"Quick fix failed: {msg}", level="WARN")
|
|
1127
|
+
|
|
1128
|
+
if quick_fix_applied:
|
|
1129
|
+
return True # Let caller re-verify
|
|
1130
|
+
|
|
1131
|
+
# Step 4: Check if we should escalate to blocker
|
|
1132
|
+
escalation = self.fix_tracker.should_escalate(error_summary)
|
|
1133
|
+
if escalation.should_escalate:
|
|
1134
|
+
self._verbose_print(f"[SELFCORRECT] Escalating to blocker: {escalation.reason}")
|
|
1135
|
+
self._debug_log(f"Escalating to blocker: {escalation.reason}", level="WARN", always=True)
|
|
1136
|
+
self._create_escalation_blocker(error_summary, escalation)
|
|
1137
|
+
return False # Stop trying, blocker created
|
|
1138
|
+
|
|
1139
|
+
# Step 5: Use LLM to generate a fix plan with full context
|
|
1140
|
+
# Build rich context so LLM can reason about project structure
|
|
1141
|
+
project_context = self._build_self_correction_context()
|
|
1142
|
+
|
|
1143
|
+
# Include info about already-tried fixes to avoid repetition
|
|
1144
|
+
attempted_fixes = self.fix_tracker.get_attempted_fixes(error_summary)
|
|
1145
|
+
already_tried = ""
|
|
1146
|
+
if attempted_fixes:
|
|
1147
|
+
already_tried = "\n\nALREADY TRIED (DO NOT REPEAT):\n" + "\n".join(f"- {f}" for f in attempted_fixes)
|
|
1148
|
+
|
|
1149
|
+
fix_prompt = f"""You are an intelligent agent fixing verification errors. You have access to the full project context below.
|
|
1150
|
+
|
|
1151
|
+
{project_context}
|
|
1152
|
+
|
|
1153
|
+
## Errors to Fix
|
|
1154
|
+
|
|
1155
|
+
{error_summary}
|
|
1156
|
+
|
|
1157
|
+
## Instructions
|
|
1158
|
+
|
|
1159
|
+
Analyze the errors and the project structure. Determine the root cause and propose fixes.
|
|
1160
|
+
|
|
1161
|
+
You can use ANY of these actions:
|
|
1162
|
+
- "edit": Modify existing file (requires old_code, new_code)
|
|
1163
|
+
- "create": Create new file (requires content)
|
|
1164
|
+
- "shell": Run a shell command (requires command)
|
|
1165
|
+
|
|
1166
|
+
Return a JSON object:
|
|
1167
|
+
{{
|
|
1168
|
+
"analysis": "What's the root cause? Is this a local code issue or a project configuration issue?",
|
|
1169
|
+
"fixes": [
|
|
1170
|
+
{{
|
|
1171
|
+
"action": "edit|create|shell",
|
|
1172
|
+
"scope": "local|global",
|
|
1173
|
+
"description": "What this fix does",
|
|
1174
|
+
"file": "path/to/file.py",
|
|
1175
|
+
"old_code": "for edits only",
|
|
1176
|
+
"new_code": "for edits only",
|
|
1177
|
+
"content": "for creates only",
|
|
1178
|
+
"command": "for shell only"
|
|
1179
|
+
}}
|
|
1180
|
+
]
|
|
1181
|
+
}}
|
|
1182
|
+
|
|
1183
|
+
## Scope Classification (IMPORTANT for parallel execution)
|
|
1184
|
+
- "local": Fixes to files YOU created in this task, your own tests, formatting fixes
|
|
1185
|
+
- "global": Config files (pyproject.toml, package.json), install commands, new packages, shared code
|
|
1186
|
+
|
|
1187
|
+
## Common Patterns
|
|
1188
|
+
- ModuleNotFoundError for LOCAL package (src/foo exists): Use "uv pip install -e ." or fix pyproject.toml
|
|
1189
|
+
- ModuleNotFoundError for EXTERNAL package: Use "uv pip install <package>"
|
|
1190
|
+
- Import errors in your code: Edit the file to fix imports
|
|
1191
|
+
- Syntax errors: Edit the file to fix syntax
|
|
1192
|
+
|
|
1193
|
+
IMPORTANT:
|
|
1194
|
+
- Check if the module exists locally before trying to install it
|
|
1195
|
+
- Be precise with old_code - it must match exactly
|
|
1196
|
+
- Return valid JSON only{already_tried}"""
|
|
1197
|
+
|
|
1198
|
+
try:
|
|
1199
|
+
self._verbose_print("[SELFCORRECT] Asking LLM for fixes...")
|
|
1200
|
+
response = self.llm.complete(
|
|
1201
|
+
messages=[{"role": "user", "content": fix_prompt}],
|
|
1202
|
+
purpose=Purpose.EXECUTION,
|
|
1203
|
+
system="You are a code fixer. Return only valid JSON.",
|
|
1204
|
+
max_tokens=4096,
|
|
1205
|
+
temperature=0.0,
|
|
1206
|
+
)
|
|
1207
|
+
|
|
1208
|
+
# Parse the fix plan
|
|
1209
|
+
import json
|
|
1210
|
+
json_match = re.search(r"\{[\s\S]*\}", response.content)
|
|
1211
|
+
if not json_match:
|
|
1212
|
+
self._verbose_print("[SELFCORRECT] No JSON found in LLM response")
|
|
1213
|
+
self._debug_log("No JSON found in fix response", level="ERROR")
|
|
1214
|
+
return False
|
|
1215
|
+
|
|
1216
|
+
fix_plan = json.loads(json_match.group())
|
|
1217
|
+
fixes = fix_plan.get("fixes", [])
|
|
1218
|
+
|
|
1219
|
+
if not fixes:
|
|
1220
|
+
self._verbose_print("[SELFCORRECT] LLM returned empty fixes list")
|
|
1221
|
+
self._debug_log("No fixes generated", level="WARN")
|
|
1222
|
+
return False
|
|
1223
|
+
|
|
1224
|
+
analysis = fix_plan.get('analysis', 'no analysis')
|
|
1225
|
+
self._verbose_print(f"[SELFCORRECT] LLM generated {len(fixes)} fix(es): {analysis[:100]}...")
|
|
1226
|
+
self._debug_log(
|
|
1227
|
+
f"Generated {len(fixes)} fixes: {analysis}",
|
|
1228
|
+
level="INFO",
|
|
1229
|
+
always=True,
|
|
1230
|
+
)
|
|
1231
|
+
|
|
1232
|
+
# Step 6: Execute the fix plan with tracking
|
|
1233
|
+
applied = 0
|
|
1234
|
+
for fix in fixes:
|
|
1235
|
+
file_path = self.workspace.repo_path / fix.get("file", "")
|
|
1236
|
+
action = fix.get("action", "edit")
|
|
1237
|
+
fix_desc = fix.get("description", f"{action} {fix.get('file', 'unknown')}")
|
|
1238
|
+
|
|
1239
|
+
# Track the attempt
|
|
1240
|
+
self.fix_tracker.record_attempt(
|
|
1241
|
+
error_summary, fix_desc, file_path=str(file_path)
|
|
1242
|
+
)
|
|
1243
|
+
|
|
1244
|
+
try:
|
|
1245
|
+
fix_succeeded = False
|
|
1246
|
+
|
|
1247
|
+
if action == "create":
|
|
1248
|
+
# Create new file with path safety check
|
|
1249
|
+
content = fix.get("content", "")
|
|
1250
|
+
if content and not self.dry_run:
|
|
1251
|
+
# Verify path is safely within workspace
|
|
1252
|
+
is_safe, reason = _is_path_safe(file_path, self.workspace.repo_path)
|
|
1253
|
+
if not is_safe:
|
|
1254
|
+
self._debug_log(f"Create blocked: {reason}", level="WARN")
|
|
1255
|
+
else:
|
|
1256
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1257
|
+
file_path.write_text(content)
|
|
1258
|
+
self._debug_log(f"Created {file_path}", level="INFO")
|
|
1259
|
+
applied += 1
|
|
1260
|
+
fix_succeeded = True
|
|
1261
|
+
|
|
1262
|
+
elif action == "edit":
|
|
1263
|
+
# Edit existing file with path safety check
|
|
1264
|
+
old_code = fix.get("old_code", "")
|
|
1265
|
+
new_code = fix.get("new_code", "")
|
|
1266
|
+
|
|
1267
|
+
# Verify path is safely within workspace before any file ops
|
|
1268
|
+
is_safe, reason = _is_path_safe(file_path, self.workspace.repo_path)
|
|
1269
|
+
if not is_safe:
|
|
1270
|
+
self._debug_log(f"Edit blocked: {reason}", level="WARN")
|
|
1271
|
+
elif not file_path.exists():
|
|
1272
|
+
self._debug_log(f"File not found: {file_path}", level="WARN")
|
|
1273
|
+
elif not old_code:
|
|
1274
|
+
self._debug_log(f"No old_code for {file_path}", level="WARN")
|
|
1275
|
+
else:
|
|
1276
|
+
content = file_path.read_text()
|
|
1277
|
+
if old_code not in content:
|
|
1278
|
+
self._debug_log(
|
|
1279
|
+
f"old_code not found in {file_path}",
|
|
1280
|
+
level="WARN",
|
|
1281
|
+
)
|
|
1282
|
+
elif not self.dry_run:
|
|
1283
|
+
new_content = content.replace(old_code, new_code, 1)
|
|
1284
|
+
file_path.write_text(new_content)
|
|
1285
|
+
self._debug_log(f"Fixed {file_path}", level="INFO")
|
|
1286
|
+
applied += 1
|
|
1287
|
+
fix_succeeded = True
|
|
1288
|
+
|
|
1289
|
+
elif action == "delete":
|
|
1290
|
+
# Delete file with safeguards
|
|
1291
|
+
if self.dry_run:
|
|
1292
|
+
self._debug_log(f"[DRY RUN] Would delete {file_path}", level="INFO")
|
|
1293
|
+
elif not file_path.exists():
|
|
1294
|
+
self._debug_log(f"File already deleted: {file_path}", level="INFO")
|
|
1295
|
+
fix_succeeded = True
|
|
1296
|
+
else:
|
|
1297
|
+
# Verify path is safely within workspace
|
|
1298
|
+
is_safe, reason = _is_path_safe(file_path, self.workspace.repo_path)
|
|
1299
|
+
if not is_safe:
|
|
1300
|
+
self._debug_log(f"Delete blocked: {reason}", level="WARN")
|
|
1301
|
+
else:
|
|
1302
|
+
file_path.unlink()
|
|
1303
|
+
self._debug_log(f"Deleted {file_path}", level="INFO")
|
|
1304
|
+
applied += 1
|
|
1305
|
+
fix_succeeded = True
|
|
1306
|
+
|
|
1307
|
+
elif action == "shell":
|
|
1308
|
+
# Run shell command with safe parsing
|
|
1309
|
+
command = fix.get("command", "")
|
|
1310
|
+
if command and not self.dry_run:
|
|
1311
|
+
scope = self._classify_fix_scope(fix)
|
|
1312
|
+
self._verbose_print(f"[SELFCORRECT] Running shell ({scope.value}): {command[:80]}...")
|
|
1313
|
+
|
|
1314
|
+
# Parse command for safe execution
|
|
1315
|
+
argv, requires_shell, parse_warning = _parse_command_safely(command)
|
|
1316
|
+
|
|
1317
|
+
# Reject commands that require shell=True (contain operators/unsafe constructs)
|
|
1318
|
+
if requires_shell:
|
|
1319
|
+
self._debug_log(
|
|
1320
|
+
f"Shell command rejected: {parse_warning} - command: {command[:100]}",
|
|
1321
|
+
level="ERROR",
|
|
1322
|
+
)
|
|
1323
|
+
self._verbose_print(
|
|
1324
|
+
f"[SELFCORRECT] Command rejected (requires shell): {parse_warning}"
|
|
1325
|
+
)
|
|
1326
|
+
# Mark as failed and skip execution
|
|
1327
|
+
self.fix_tracker.record_outcome(
|
|
1328
|
+
error_summary, fix_desc, FixOutcome.FAILED
|
|
1329
|
+
)
|
|
1330
|
+
continue # Skip to next fix
|
|
1331
|
+
|
|
1332
|
+
if parse_warning:
|
|
1333
|
+
self._debug_log(f"Shell safety: {parse_warning}", level="WARN")
|
|
1334
|
+
|
|
1335
|
+
# Helper to run the command safely (only shell=False now)
|
|
1336
|
+
def _run_command() -> subprocess.CompletedProcess:
|
|
1337
|
+
return subprocess.run(
|
|
1338
|
+
argv,
|
|
1339
|
+
shell=False,
|
|
1340
|
+
cwd=self.workspace.repo_path,
|
|
1341
|
+
capture_output=True,
|
|
1342
|
+
text=True,
|
|
1343
|
+
timeout=120,
|
|
1344
|
+
)
|
|
1345
|
+
|
|
1346
|
+
# Global scope commands should go through Coordinator
|
|
1347
|
+
if scope == FixScope.GLOBAL and self.fix_coordinator:
|
|
1348
|
+
status, should_execute = self.fix_coordinator.request_fix(
|
|
1349
|
+
error=error_summary,
|
|
1350
|
+
fix_type="shell",
|
|
1351
|
+
fix_description=fix_desc,
|
|
1352
|
+
command=command,
|
|
1353
|
+
task_id=self.state.task_id,
|
|
1354
|
+
)
|
|
1355
|
+
if status == "already_completed":
|
|
1356
|
+
# Another agent already fixed this
|
|
1357
|
+
self._verbose_print("[SELFCORRECT] Fix already done by another agent")
|
|
1358
|
+
applied += 1
|
|
1359
|
+
fix_succeeded = True
|
|
1360
|
+
elif status == "pending":
|
|
1361
|
+
# Wait for another agent to finish
|
|
1362
|
+
self._verbose_print("[SELFCORRECT] Waiting for another agent's fix...")
|
|
1363
|
+
if self.fix_coordinator.wait_for_fix(error_summary, timeout=60.0):
|
|
1364
|
+
applied += 1
|
|
1365
|
+
fix_succeeded = True
|
|
1366
|
+
else:
|
|
1367
|
+
self._debug_log("Timeout waiting for global fix", level="WARN")
|
|
1368
|
+
elif should_execute:
|
|
1369
|
+
# We are responsible for executing
|
|
1370
|
+
try:
|
|
1371
|
+
result = _run_command()
|
|
1372
|
+
success = result.returncode == 0
|
|
1373
|
+
self.fix_coordinator.report_fix_result(
|
|
1374
|
+
error_summary, success, result.stderr[:200] if not success else None
|
|
1375
|
+
)
|
|
1376
|
+
if success:
|
|
1377
|
+
self._debug_log(f"Global shell command succeeded: {command}", level="INFO")
|
|
1378
|
+
applied += 1
|
|
1379
|
+
fix_succeeded = True
|
|
1380
|
+
else:
|
|
1381
|
+
self._debug_log(f"Global shell command failed: {result.stderr[:200]}", level="WARN")
|
|
1382
|
+
except Exception as shell_err:
|
|
1383
|
+
self.fix_coordinator.report_fix_result(error_summary, False, str(shell_err))
|
|
1384
|
+
self._debug_log(f"Global shell error: {shell_err}", level="WARN")
|
|
1385
|
+
else:
|
|
1386
|
+
# Local scope - execute directly
|
|
1387
|
+
try:
|
|
1388
|
+
result = _run_command()
|
|
1389
|
+
if result.returncode == 0:
|
|
1390
|
+
self._debug_log(f"Shell command succeeded: {command}", level="INFO")
|
|
1391
|
+
applied += 1
|
|
1392
|
+
fix_succeeded = True
|
|
1393
|
+
else:
|
|
1394
|
+
self._debug_log(
|
|
1395
|
+
f"Shell command failed: {result.stderr[:200]}",
|
|
1396
|
+
level="WARN"
|
|
1397
|
+
)
|
|
1398
|
+
except subprocess.TimeoutExpired:
|
|
1399
|
+
self._debug_log(f"Shell command timed out: {command}", level="WARN")
|
|
1400
|
+
except Exception as shell_err:
|
|
1401
|
+
self._debug_log(f"Shell command error: {shell_err}", level="WARN")
|
|
1402
|
+
|
|
1403
|
+
# Record outcome
|
|
1404
|
+
self.fix_tracker.record_outcome(
|
|
1405
|
+
error_summary, fix_desc,
|
|
1406
|
+
FixOutcome.SUCCESS if fix_succeeded else FixOutcome.FAILED
|
|
1407
|
+
)
|
|
1408
|
+
|
|
1409
|
+
except Exception as e:
|
|
1410
|
+
self._debug_log(f"Fix failed for {file_path}: {e}", level="ERROR")
|
|
1411
|
+
self.fix_tracker.record_outcome(error_summary, fix_desc, FixOutcome.FAILED)
|
|
1412
|
+
|
|
1413
|
+
self._verbose_print(f"[SELFCORRECT] Applied {applied}/{len(fixes)} fixes")
|
|
1414
|
+
self._debug_log(
|
|
1415
|
+
f"Applied {applied}/{len(fixes)} fixes",
|
|
1416
|
+
level="INFO",
|
|
1417
|
+
always=True,
|
|
1418
|
+
)
|
|
1419
|
+
return applied > 0
|
|
1420
|
+
|
|
1421
|
+
except json.JSONDecodeError as e:
|
|
1422
|
+
self._verbose_print(f"[SELFCORRECT] JSON parse error: {e}")
|
|
1423
|
+
self._debug_log(f"Failed to parse fix plan JSON: {e}", level="ERROR")
|
|
1424
|
+
return False
|
|
1425
|
+
except Exception as e:
|
|
1426
|
+
self._verbose_print(f"[SELFCORRECT] Error: {e}")
|
|
1427
|
+
self._debug_log(f"Self-correction error: {e}", level="ERROR")
|
|
1428
|
+
return False
|
|
1429
|
+
|
|
1430
|
+
def _classify_error(self, error: str) -> str:
|
|
1431
|
+
"""Classify an error as technical, tactical, or human-input-needed.
|
|
1432
|
+
|
|
1433
|
+
Error classification hierarchy:
|
|
1434
|
+
1. TACTICAL - Agent asking about implementation details it should decide itself
|
|
1435
|
+
2. HUMAN - True requirements ambiguity or access issues
|
|
1436
|
+
3. TECHNICAL - Coding errors the agent can self-correct
|
|
1437
|
+
|
|
1438
|
+
Args:
|
|
1439
|
+
error: Error message to classify
|
|
1440
|
+
|
|
1441
|
+
Returns:
|
|
1442
|
+
"tactical" if agent should decide autonomously (no blocker)
|
|
1443
|
+
"technical" if agent can self-correct
|
|
1444
|
+
"human" if genuinely needs human input (create blocker)
|
|
1445
|
+
"""
|
|
1446
|
+
error_lower = error.lower()
|
|
1447
|
+
|
|
1448
|
+
# Check tactical patterns FIRST - these should NEVER create blockers
|
|
1449
|
+
# Agent should resolve these using preferences or best judgment
|
|
1450
|
+
for pattern in TACTICAL_DECISION_PATTERNS:
|
|
1451
|
+
if pattern in error_lower:
|
|
1452
|
+
return "tactical"
|
|
1453
|
+
|
|
1454
|
+
# Check true human-input patterns (requirements ambiguity + access issues)
|
|
1455
|
+
for pattern in HUMAN_INPUT_PATTERNS:
|
|
1456
|
+
if pattern in error_lower:
|
|
1457
|
+
return "human"
|
|
1458
|
+
|
|
1459
|
+
# Check technical patterns
|
|
1460
|
+
for pattern in TECHNICAL_ERROR_PATTERNS:
|
|
1461
|
+
if pattern in error_lower:
|
|
1462
|
+
return "technical"
|
|
1463
|
+
|
|
1464
|
+
# Default to technical - agent should try to fix it first
|
|
1465
|
+
return "technical"
|
|
1466
|
+
|
|
1467
|
+
def _resolve_tactical_decision(self, error: str, context: "TaskContext") -> str:
|
|
1468
|
+
"""Resolve a tactical decision using preferences and best judgment.
|
|
1469
|
+
|
|
1470
|
+
When the agent encounters a tactical question (implementation detail,
|
|
1471
|
+
tooling choice, file handling, etc.), this method resolves it
|
|
1472
|
+
autonomously instead of creating a blocker.
|
|
1473
|
+
|
|
1474
|
+
Args:
|
|
1475
|
+
error: The error/question that triggered this
|
|
1476
|
+
context: Task context with preferences
|
|
1477
|
+
|
|
1478
|
+
Returns:
|
|
1479
|
+
Resolution instruction for the agent to follow
|
|
1480
|
+
"""
|
|
1481
|
+
self._emit_event("tactical_resolution_started", {"question": error[:200]})
|
|
1482
|
+
|
|
1483
|
+
# Build resolution prompt using preferences
|
|
1484
|
+
prefs = context.preferences
|
|
1485
|
+
pref_section = prefs.to_prompt_section() if prefs.has_preferences() else ""
|
|
1486
|
+
|
|
1487
|
+
prompt = f"""You encountered a tactical implementation decision that should be resolved autonomously.
|
|
1488
|
+
|
|
1489
|
+
## The Question/Decision
|
|
1490
|
+
{error}
|
|
1491
|
+
|
|
1492
|
+
{pref_section}
|
|
1493
|
+
|
|
1494
|
+
## Resolution Guidelines
|
|
1495
|
+
|
|
1496
|
+
As an expert software engineer, resolve this decision using:
|
|
1497
|
+
1. Project preferences (above) if they apply
|
|
1498
|
+
2. Industry best practices if no preference
|
|
1499
|
+
3. The simpler approach when multiple options are equivalent
|
|
1500
|
+
4. Common conventions for this type of project
|
|
1501
|
+
|
|
1502
|
+
IMPORTANT: This is a tactical decision you MUST resolve yourself. Do NOT ask the user.
|
|
1503
|
+
Do NOT say you need clarification. Make the best decision and proceed.
|
|
1504
|
+
|
|
1505
|
+
Respond with a brief, clear instruction on what to do. For example:
|
|
1506
|
+
- "Use pytest as the test framework"
|
|
1507
|
+
- "Overwrite the existing file with the new implementation"
|
|
1508
|
+
- "Use the latest stable version of the library"
|
|
1509
|
+
- "Install using uv (the project's package manager)"
|
|
1510
|
+
|
|
1511
|
+
Your decision:"""
|
|
1512
|
+
|
|
1513
|
+
try:
|
|
1514
|
+
response = self.llm.complete(
|
|
1515
|
+
messages=[{"role": "user", "content": prompt}],
|
|
1516
|
+
purpose=Purpose.GENERATION,
|
|
1517
|
+
max_tokens=256,
|
|
1518
|
+
temperature=0.0,
|
|
1519
|
+
)
|
|
1520
|
+
|
|
1521
|
+
resolution = response.strip()
|
|
1522
|
+
self._emit_event(
|
|
1523
|
+
"tactical_resolution_completed",
|
|
1524
|
+
{"question": error[:200], "resolution": resolution[:200]},
|
|
1525
|
+
)
|
|
1526
|
+
self._debug_log(
|
|
1527
|
+
f"TACTICAL DECISION RESOLVED: {resolution[:100]}",
|
|
1528
|
+
level="INFO",
|
|
1529
|
+
data={"question": error, "resolution": resolution},
|
|
1530
|
+
)
|
|
1531
|
+
return resolution
|
|
1532
|
+
|
|
1533
|
+
except Exception as e:
|
|
1534
|
+
# On LLM failure, use a sensible default
|
|
1535
|
+
self._emit_event(
|
|
1536
|
+
"tactical_resolution_failed", {"question": error[:200], "error": str(e)}
|
|
1537
|
+
)
|
|
1538
|
+
return "Proceed with the most common/standard approach for this situation."
|
|
1539
|
+
|
|
1540
|
+
def _should_create_blocker(
|
|
1541
|
+
self,
|
|
1542
|
+
consecutive_failures: int,
|
|
1543
|
+
result: StepResult,
|
|
1544
|
+
self_correction_attempts: int = 0,
|
|
1545
|
+
) -> bool:
|
|
1546
|
+
"""Determine if we should create a blocker.
|
|
1547
|
+
|
|
1548
|
+
Blockers are only created for genuine human-input-needed situations.
|
|
1549
|
+
Technical errors should be handled by self-correction first.
|
|
1550
|
+
Tactical decisions should NEVER create blockers - agent resolves them.
|
|
1551
|
+
|
|
1552
|
+
Args:
|
|
1553
|
+
consecutive_failures: Number of consecutive step failures
|
|
1554
|
+
result: The failed step result
|
|
1555
|
+
self_correction_attempts: How many self-correction attempts were made
|
|
1556
|
+
|
|
1557
|
+
Returns:
|
|
1558
|
+
True if a blocker should be created
|
|
1559
|
+
"""
|
|
1560
|
+
error_type = self._classify_error(result.error)
|
|
1561
|
+
|
|
1562
|
+
# TACTICAL decisions NEVER create blockers
|
|
1563
|
+
# The agent should resolve these autonomously using preferences
|
|
1564
|
+
if error_type == "tactical":
|
|
1565
|
+
self._debug_log(
|
|
1566
|
+
"TACTICAL decision detected - will resolve autonomously, NOT creating blocker",
|
|
1567
|
+
level="INFO",
|
|
1568
|
+
data={"error": result.error[:200]},
|
|
1569
|
+
)
|
|
1570
|
+
return False
|
|
1571
|
+
|
|
1572
|
+
# Human-input-needed errors always create blockers
|
|
1573
|
+
if error_type == "human":
|
|
1574
|
+
return True
|
|
1575
|
+
|
|
1576
|
+
# Technical errors only create blockers after exhausting self-correction
|
|
1577
|
+
if error_type == "technical":
|
|
1578
|
+
# Only block if we've tried self-correction and still failing
|
|
1579
|
+
if self_correction_attempts >= MAX_SELF_CORRECTION_ATTEMPTS:
|
|
1580
|
+
# After multiple self-correction attempts, the agent is truly stuck
|
|
1581
|
+
return True
|
|
1582
|
+
# Otherwise, don't block - let the caller try self-correction
|
|
1583
|
+
return False
|
|
1584
|
+
|
|
1585
|
+
return False
|
|
1586
|
+
|
|
1587
|
+
def _attempt_self_correction(
|
|
1588
|
+
self,
|
|
1589
|
+
step: PlanStep,
|
|
1590
|
+
result: StepResult,
|
|
1591
|
+
attempt: int,
|
|
1592
|
+
) -> Optional[StepResult]:
|
|
1593
|
+
"""Attempt to self-correct a failed step using LLM.
|
|
1594
|
+
|
|
1595
|
+
Uses the LLM to analyze the error and generate a corrected approach.
|
|
1596
|
+
|
|
1597
|
+
Args:
|
|
1598
|
+
step: The step that failed
|
|
1599
|
+
result: The failure result
|
|
1600
|
+
attempt: Which self-correction attempt this is (1-based)
|
|
1601
|
+
|
|
1602
|
+
Returns:
|
|
1603
|
+
New StepResult if correction was attempted, None if can't correct
|
|
1604
|
+
"""
|
|
1605
|
+
self._emit_event("self_correction_started", {
|
|
1606
|
+
"step": step.index,
|
|
1607
|
+
"attempt": attempt,
|
|
1608
|
+
"error": result.error[:200],
|
|
1609
|
+
})
|
|
1610
|
+
|
|
1611
|
+
self._debug_log(
|
|
1612
|
+
f"SELF-CORRECTION attempt {attempt} for step {step.index}",
|
|
1613
|
+
level="INFO",
|
|
1614
|
+
data={
|
|
1615
|
+
"step_type": step.type.value,
|
|
1616
|
+
"target": step.target,
|
|
1617
|
+
"description": step.description,
|
|
1618
|
+
"error": result.error,
|
|
1619
|
+
},
|
|
1620
|
+
always=True,
|
|
1621
|
+
)
|
|
1622
|
+
|
|
1623
|
+
prompt = f"""A code execution step failed. Analyze the error and provide a corrected approach.
|
|
1624
|
+
|
|
1625
|
+
Step Description: {step.description}
|
|
1626
|
+
Step Type: {step.type.value}
|
|
1627
|
+
Target: {step.target}
|
|
1628
|
+
|
|
1629
|
+
Error:
|
|
1630
|
+
{result.error}
|
|
1631
|
+
|
|
1632
|
+
Previous approach that failed:
|
|
1633
|
+
{step.details[:2000] if step.details else "No details"}
|
|
1634
|
+
|
|
1635
|
+
Please provide a corrected version that fixes this error. Consider:
|
|
1636
|
+
1. If it's a file path issue, find the correct path or create the file
|
|
1637
|
+
2. If it's an import issue, add the missing import
|
|
1638
|
+
3. If it's a syntax error, fix the syntax
|
|
1639
|
+
4. If it's a logic error, fix the logic
|
|
1640
|
+
|
|
1641
|
+
Respond with ONLY the corrected code/content, no explanation."""
|
|
1642
|
+
|
|
1643
|
+
# Log the full prompt for debugging
|
|
1644
|
+
self._debug_log_llm_interaction(
|
|
1645
|
+
f"Self-correction attempt {attempt} for step {step.index}",
|
|
1646
|
+
prompt,
|
|
1647
|
+
)
|
|
1648
|
+
|
|
1649
|
+
try:
|
|
1650
|
+
# Use CORRECTION purpose to step up to a stronger model (Opus)
|
|
1651
|
+
# for better error analysis and code fixing
|
|
1652
|
+
correction_model = self.llm.get_model(Purpose.CORRECTION)
|
|
1653
|
+
self._debug_log(
|
|
1654
|
+
f"Using stepped-up model for self-correction: {correction_model}",
|
|
1655
|
+
level="INFO",
|
|
1656
|
+
always=True,
|
|
1657
|
+
)
|
|
1658
|
+
|
|
1659
|
+
response = self.llm.complete(
|
|
1660
|
+
messages=[{"role": "user", "content": prompt}],
|
|
1661
|
+
purpose=Purpose.CORRECTION,
|
|
1662
|
+
max_tokens=4000,
|
|
1663
|
+
temperature=0.0,
|
|
1664
|
+
)
|
|
1665
|
+
|
|
1666
|
+
corrected_details = response.content.strip()
|
|
1667
|
+
|
|
1668
|
+
# Log the full response for debugging
|
|
1669
|
+
self._debug_log_llm_interaction(
|
|
1670
|
+
f"Self-correction response {attempt} for step {step.index}",
|
|
1671
|
+
prompt,
|
|
1672
|
+
response=corrected_details,
|
|
1673
|
+
)
|
|
1674
|
+
|
|
1675
|
+
self._debug_log(
|
|
1676
|
+
f"Self-correction LLM response received ({len(corrected_details)} chars)",
|
|
1677
|
+
level="DEBUG",
|
|
1678
|
+
data={"first_100_chars": corrected_details[:100]},
|
|
1679
|
+
always=True,
|
|
1680
|
+
)
|
|
1681
|
+
|
|
1682
|
+
# Create a corrected step with the new details
|
|
1683
|
+
corrected_step = PlanStep(
|
|
1684
|
+
index=step.index,
|
|
1685
|
+
type=step.type,
|
|
1686
|
+
target=step.target,
|
|
1687
|
+
description=f"{step.description} (self-corrected, attempt {attempt})",
|
|
1688
|
+
details=corrected_details,
|
|
1689
|
+
depends_on=step.depends_on,
|
|
1690
|
+
)
|
|
1691
|
+
|
|
1692
|
+
# Re-execute with corrected step
|
|
1693
|
+
self._debug_log(
|
|
1694
|
+
f"Executing corrected step {step.index}",
|
|
1695
|
+
level="DEBUG",
|
|
1696
|
+
always=True,
|
|
1697
|
+
)
|
|
1698
|
+
corrected_result = self.executor.execute_step(corrected_step, self.context)
|
|
1699
|
+
|
|
1700
|
+
self._debug_log(
|
|
1701
|
+
f"Corrected step result: {corrected_result.status.value}",
|
|
1702
|
+
level="INFO",
|
|
1703
|
+
data={
|
|
1704
|
+
"success": corrected_result.status == ExecutionStatus.SUCCESS,
|
|
1705
|
+
"error": corrected_result.error if corrected_result.error else None,
|
|
1706
|
+
"output": corrected_result.output[:200] if corrected_result.output else None,
|
|
1707
|
+
},
|
|
1708
|
+
always=True,
|
|
1709
|
+
)
|
|
1710
|
+
|
|
1711
|
+
self._emit_event("self_correction_completed", {
|
|
1712
|
+
"step": step.index,
|
|
1713
|
+
"attempt": attempt,
|
|
1714
|
+
"success": corrected_result.status == ExecutionStatus.SUCCESS,
|
|
1715
|
+
})
|
|
1716
|
+
|
|
1717
|
+
return corrected_result
|
|
1718
|
+
|
|
1719
|
+
except Exception as e:
|
|
1720
|
+
self._debug_log(
|
|
1721
|
+
f"Self-correction EXCEPTION: {str(e)}",
|
|
1722
|
+
level="ERROR",
|
|
1723
|
+
always=True,
|
|
1724
|
+
)
|
|
1725
|
+
self._emit_event("self_correction_failed", {
|
|
1726
|
+
"step": step.index,
|
|
1727
|
+
"attempt": attempt,
|
|
1728
|
+
"error": str(e),
|
|
1729
|
+
})
|
|
1730
|
+
return None
|
|
1731
|
+
|
|
1732
|
+
def _create_blocker_from_failure(
|
|
1733
|
+
self,
|
|
1734
|
+
step: PlanStep,
|
|
1735
|
+
result: StepResult,
|
|
1736
|
+
) -> None:
|
|
1737
|
+
"""Create a blocker from a step failure.
|
|
1738
|
+
|
|
1739
|
+
May resolve autonomously if the LLM determines the issue is tactical.
|
|
1740
|
+
Only creates actual blockers for issues requiring human input.
|
|
1741
|
+
"""
|
|
1742
|
+
question = self._generate_blocker_question(step, result)
|
|
1743
|
+
|
|
1744
|
+
# Check if LLM determined this should be resolved autonomously
|
|
1745
|
+
if question.startswith("RESOLVE_AUTONOMOUSLY:"):
|
|
1746
|
+
self._debug_log(
|
|
1747
|
+
f"Auto-resolving tactical decision: {question}",
|
|
1748
|
+
level="INFO",
|
|
1749
|
+
always=True,
|
|
1750
|
+
)
|
|
1751
|
+
# Don't create a blocker - let the agent continue with self-correction
|
|
1752
|
+
self._emit_event("tactical_resolved", {
|
|
1753
|
+
"step": step.index,
|
|
1754
|
+
"resolution": question,
|
|
1755
|
+
})
|
|
1756
|
+
return
|
|
1757
|
+
|
|
1758
|
+
# Check if LLM determined this is a technical fix
|
|
1759
|
+
if question.startswith("TECHNICAL_FIX:"):
|
|
1760
|
+
self._debug_log(
|
|
1761
|
+
f"Technical issue identified: {question}",
|
|
1762
|
+
level="INFO",
|
|
1763
|
+
always=True,
|
|
1764
|
+
)
|
|
1765
|
+
# Don't create a blocker - mark as needing retry
|
|
1766
|
+
self._emit_event("technical_fix_needed", {
|
|
1767
|
+
"step": step.index,
|
|
1768
|
+
"fix": question,
|
|
1769
|
+
})
|
|
1770
|
+
return
|
|
1771
|
+
|
|
1772
|
+
# Also check for tactical patterns in the question itself
|
|
1773
|
+
question_lower = question.lower()
|
|
1774
|
+
tactical_indicators = [
|
|
1775
|
+
"virtual environment", "venv", "virtualenv",
|
|
1776
|
+
"would you like me to", "would you prefer",
|
|
1777
|
+
"should i create", "should i use",
|
|
1778
|
+
"pip install", "npm install", "uv sync",
|
|
1779
|
+
"break-system-packages", "pipx",
|
|
1780
|
+
"pytest.ini", "pyproject.toml", "asyncio_default_fixture_loop_scope",
|
|
1781
|
+
"fixture scope", "loop scope",
|
|
1782
|
+
]
|
|
1783
|
+
|
|
1784
|
+
if any(indicator in question_lower for indicator in tactical_indicators):
|
|
1785
|
+
self._debug_log(
|
|
1786
|
+
f"Detected tactical question pattern, auto-resolving: {question[:100]}...",
|
|
1787
|
+
level="INFO",
|
|
1788
|
+
always=True,
|
|
1789
|
+
)
|
|
1790
|
+
self._emit_event("tactical_resolved", {
|
|
1791
|
+
"step": step.index,
|
|
1792
|
+
"resolution": "Auto-resolved tactical decision",
|
|
1793
|
+
})
|
|
1794
|
+
return
|
|
1795
|
+
|
|
1796
|
+
# This is a legitimate blocker that requires human input
|
|
1797
|
+
blocker = blockers.create(
|
|
1798
|
+
workspace=self.workspace,
|
|
1799
|
+
question=question,
|
|
1800
|
+
task_id=self.state.task_id,
|
|
1801
|
+
created_by="agent",
|
|
1802
|
+
)
|
|
1803
|
+
|
|
1804
|
+
self.state.status = AgentStatus.BLOCKED
|
|
1805
|
+
self.state.blocker = BlockerInfo(
|
|
1806
|
+
reason=result.error,
|
|
1807
|
+
question=question,
|
|
1808
|
+
context=f"Step {step.index}: {step.description}",
|
|
1809
|
+
step_index=step.index,
|
|
1810
|
+
)
|
|
1811
|
+
|
|
1812
|
+
self._emit_event("blocker_created", {
|
|
1813
|
+
"blocker_id": blocker.id,
|
|
1814
|
+
"question": question,
|
|
1815
|
+
})
|
|
1816
|
+
# Note: task status update handled by runtime.block_run()
|
|
1817
|
+
|
|
1818
|
+
def _create_verification_blocker(self, gate_result: GateResult) -> None:
|
|
1819
|
+
"""Handle verification failure.
|
|
1820
|
+
|
|
1821
|
+
Verification failures (pytest, ruff, etc.) are TECHNICAL issues,
|
|
1822
|
+
not human decision points. We mark the task as FAILED instead of
|
|
1823
|
+
BLOCKED so the retry mechanism can handle it.
|
|
1824
|
+
|
|
1825
|
+
This prevents tactical questions like "pytest failed, what should I do?"
|
|
1826
|
+
from becoming blockers that require human intervention.
|
|
1827
|
+
"""
|
|
1828
|
+
failed_checks = [
|
|
1829
|
+
c.name for c in gate_result.checks
|
|
1830
|
+
if c.status == GateStatus.FAILED
|
|
1831
|
+
]
|
|
1832
|
+
|
|
1833
|
+
self._debug_log(
|
|
1834
|
+
f"Verification failed for: {', '.join(failed_checks)}. "
|
|
1835
|
+
"Marking as FAILED (not BLOCKED) for retry.",
|
|
1836
|
+
level="WARN",
|
|
1837
|
+
always=True,
|
|
1838
|
+
)
|
|
1839
|
+
|
|
1840
|
+
# Mark as FAILED, not BLOCKED - verification failures are technical
|
|
1841
|
+
# issues that should be retried, not human decision points
|
|
1842
|
+
self.state.status = AgentStatus.FAILED
|
|
1843
|
+
self._emit_event("verification_failed", {
|
|
1844
|
+
"failed_checks": failed_checks,
|
|
1845
|
+
"reason": "Verification failed - technical issue for retry",
|
|
1846
|
+
})
|
|
1847
|
+
# Note: task status update handled by runtime.fail_run()
|
|
1848
|
+
|
|
1849
|
+
def _create_escalation_blocker(
|
|
1850
|
+
self,
|
|
1851
|
+
error_summary: str,
|
|
1852
|
+
escalation: EscalationDecision,
|
|
1853
|
+
) -> None:
|
|
1854
|
+
"""Create a blocker when self-correction has been exhausted.
|
|
1855
|
+
|
|
1856
|
+
Unlike regular blockers which ask for guidance, escalation blockers
|
|
1857
|
+
provide detailed context about what was tried and why we're stuck.
|
|
1858
|
+
|
|
1859
|
+
Args:
|
|
1860
|
+
error_summary: Summary of the errors being fixed
|
|
1861
|
+
escalation: EscalationDecision from FixAttemptTracker
|
|
1862
|
+
"""
|
|
1863
|
+
|
|
1864
|
+
# Build a detailed, informative question
|
|
1865
|
+
context = self.fix_tracker.get_blocker_context(error_summary)
|
|
1866
|
+
|
|
1867
|
+
# Format attempted fixes
|
|
1868
|
+
fixes_list = ""
|
|
1869
|
+
if escalation.attempted_fixes:
|
|
1870
|
+
fixes_list = "\n".join(f" - {f}" for f in escalation.attempted_fixes[:10])
|
|
1871
|
+
|
|
1872
|
+
question = f"""Task failed after multiple self-correction attempts.
|
|
1873
|
+
|
|
1874
|
+
**Error:** {context.get('error_type', 'Unknown error')}
|
|
1875
|
+
|
|
1876
|
+
**Problem:** {escalation.error_summary[:300]}
|
|
1877
|
+
|
|
1878
|
+
**Attempted fixes ({context.get('attempt_count', 0)} total):**
|
|
1879
|
+
{fixes_list}
|
|
1880
|
+
|
|
1881
|
+
**Reason for escalation:** {escalation.reason}
|
|
1882
|
+
|
|
1883
|
+
**How should I proceed?** Please provide guidance on:
|
|
1884
|
+
1. What might be causing this persistent error?
|
|
1885
|
+
2. Is there a different approach I should try?
|
|
1886
|
+
3. Are there any missing dependencies or configuration?"""
|
|
1887
|
+
|
|
1888
|
+
# Create the blocker
|
|
1889
|
+
blocker = blockers.create(
|
|
1890
|
+
workspace=self.workspace,
|
|
1891
|
+
question=question,
|
|
1892
|
+
task_id=self.state.task_id,
|
|
1893
|
+
created_by="agent",
|
|
1894
|
+
)
|
|
1895
|
+
|
|
1896
|
+
self.state.status = AgentStatus.BLOCKED
|
|
1897
|
+
self.state.blocker = BlockerInfo(
|
|
1898
|
+
reason=escalation.reason,
|
|
1899
|
+
question=question,
|
|
1900
|
+
context=f"Self-correction exhausted after {context.get('attempt_count', 0)} attempts",
|
|
1901
|
+
)
|
|
1902
|
+
|
|
1903
|
+
self._emit_event("escalation_blocker_created", {
|
|
1904
|
+
"blocker_id": blocker.id,
|
|
1905
|
+
"reason": escalation.reason,
|
|
1906
|
+
"attempt_count": context.get("attempt_count", 0),
|
|
1907
|
+
"attempted_fixes": escalation.attempted_fixes,
|
|
1908
|
+
})
|
|
1909
|
+
|
|
1910
|
+
self._debug_log(
|
|
1911
|
+
f"Created escalation blocker: {blocker.id}",
|
|
1912
|
+
level="INFO",
|
|
1913
|
+
data={
|
|
1914
|
+
"reason": escalation.reason,
|
|
1915
|
+
"attempt_count": context.get("attempt_count", 0),
|
|
1916
|
+
},
|
|
1917
|
+
always=True,
|
|
1918
|
+
)
|
|
1919
|
+
|
|
1920
|
+
def _generate_blocker_question(
|
|
1921
|
+
self,
|
|
1922
|
+
step: PlanStep,
|
|
1923
|
+
result: StepResult,
|
|
1924
|
+
) -> str:
|
|
1925
|
+
"""Generate a helpful question for the blocker.
|
|
1926
|
+
|
|
1927
|
+
Only generates questions for issues that truly require human input.
|
|
1928
|
+
Tactical decisions are auto-resolved, not turned into blockers.
|
|
1929
|
+
"""
|
|
1930
|
+
# Use LLM to generate a clear question
|
|
1931
|
+
prompt = f"""A code execution step failed. Generate a clear, specific question to ask the user for help.
|
|
1932
|
+
|
|
1933
|
+
Step: {step.description}
|
|
1934
|
+
Target: {step.target}
|
|
1935
|
+
Error: {result.error}
|
|
1936
|
+
|
|
1937
|
+
CRITICAL INSTRUCTIONS:
|
|
1938
|
+
1. ONLY generate a question if human input is TRULY required
|
|
1939
|
+
2. Do NOT ask about tactical decisions - these should be resolved autonomously:
|
|
1940
|
+
- Virtual environments (always create one)
|
|
1941
|
+
- Package managers (use uv/pip/npm as appropriate)
|
|
1942
|
+
- Test frameworks (use pytest/jest)
|
|
1943
|
+
- File handling (overwrite existing files)
|
|
1944
|
+
- Configuration options (use sensible defaults)
|
|
1945
|
+
- Asyncio fixture scopes (use function scope)
|
|
1946
|
+
|
|
1947
|
+
3. DO ask about:
|
|
1948
|
+
- Conflicting requirements in the specification
|
|
1949
|
+
- Missing API keys or credentials
|
|
1950
|
+
- Business logic that requires domain expertise
|
|
1951
|
+
- Security policy clarifications
|
|
1952
|
+
|
|
1953
|
+
4. If the error is a tactical decision, respond with: "RESOLVE_AUTONOMOUSLY: [your decision]"
|
|
1954
|
+
For example: "RESOLVE_AUTONOMOUSLY: Create virtual environment and install dependencies"
|
|
1955
|
+
|
|
1956
|
+
5. If the error is a technical issue (syntax error, import error, test failure), respond with:
|
|
1957
|
+
"TECHNICAL_FIX: [what to fix]"
|
|
1958
|
+
|
|
1959
|
+
Generate a single question OR a RESOLVE_AUTONOMOUSLY/TECHNICAL_FIX directive:"""
|
|
1960
|
+
|
|
1961
|
+
try:
|
|
1962
|
+
response = self.llm.complete(
|
|
1963
|
+
messages=[{"role": "user", "content": prompt}],
|
|
1964
|
+
purpose=Purpose.GENERATION,
|
|
1965
|
+
max_tokens=300,
|
|
1966
|
+
temperature=0.0,
|
|
1967
|
+
)
|
|
1968
|
+
return response.content.strip()
|
|
1969
|
+
except Exception:
|
|
1970
|
+
# Fallback to generic question
|
|
1971
|
+
return f"Step '{step.description}' failed with error: {result.error}. How should I proceed?"
|
|
1972
|
+
|
|
1973
|
+
def _handle_existing_blockers(self) -> None:
|
|
1974
|
+
"""Handle situation where task already has open blockers."""
|
|
1975
|
+
self.state.status = AgentStatus.BLOCKED
|
|
1976
|
+
|
|
1977
|
+
# Get the first open blocker
|
|
1978
|
+
open_blocker = self.context.open_blockers[0]
|
|
1979
|
+
self.state.blocker = BlockerInfo(
|
|
1980
|
+
reason="Pre-existing blocker",
|
|
1981
|
+
question=open_blocker.question,
|
|
1982
|
+
)
|
|
1983
|
+
|
|
1984
|
+
self._emit_event("existing_blocker", {
|
|
1985
|
+
"blocker_id": open_blocker.id,
|
|
1986
|
+
"question": open_blocker.question,
|
|
1987
|
+
})
|
|
1988
|
+
|
|
1989
|
+
def _emit_event(self, event_type: str, data: dict) -> None:
|
|
1990
|
+
"""Emit an agent event."""
|
|
1991
|
+
if self.on_event:
|
|
1992
|
+
self.on_event(event_type, data)
|
|
1993
|
+
|
|
1994
|
+
# Also emit to workspace event log
|
|
1995
|
+
try:
|
|
1996
|
+
events.emit_for_workspace(
|
|
1997
|
+
self.workspace,
|
|
1998
|
+
EventType.WORK_STARTED if event_type == "agent_started" else EventType.RUN_STEP,
|
|
1999
|
+
data={"agent_event": event_type, **data},
|
|
2000
|
+
print_event=False,
|
|
2001
|
+
)
|
|
2002
|
+
except Exception:
|
|
2003
|
+
pass # Don't fail on event emission
|
|
2004
|
+
|
|
2005
|
+
# Publish to SSE EventPublisher for web clients
|
|
2006
|
+
if self.event_publisher and self.state.task_id:
|
|
2007
|
+
try:
|
|
2008
|
+
self._publish_sse_event(event_type, data)
|
|
2009
|
+
except Exception:
|
|
2010
|
+
pass # Don't fail on SSE emission
|
|
2011
|
+
|
|
2012
|
+
def _publish_sse_event(self, event_type: str, data: dict) -> None:
|
|
2013
|
+
"""Publish an event to SSE subscribers.
|
|
2014
|
+
|
|
2015
|
+
Maps internal agent events to SSE ExecutionEvent types.
|
|
2016
|
+
|
|
2017
|
+
Args:
|
|
2018
|
+
event_type: Internal event type (step_started, step_completed, etc.)
|
|
2019
|
+
data: Event data
|
|
2020
|
+
"""
|
|
2021
|
+
from codeframe.core.models import ProgressEvent, OutputEvent, ErrorEvent, CompletionEvent
|
|
2022
|
+
|
|
2023
|
+
task_id = self.state.task_id
|
|
2024
|
+
|
|
2025
|
+
# Map internal events to SSE events
|
|
2026
|
+
if event_type == "step_started":
|
|
2027
|
+
total_steps = len(self.state.plan.steps) if self.state.plan else 1
|
|
2028
|
+
event = ProgressEvent(
|
|
2029
|
+
task_id=task_id,
|
|
2030
|
+
phase="execution",
|
|
2031
|
+
step=data.get("step", 0),
|
|
2032
|
+
total_steps=total_steps,
|
|
2033
|
+
message=f"Step {data.get('step', 0)}: {data.get('target', 'unknown')}",
|
|
2034
|
+
)
|
|
2035
|
+
self.event_publisher.publish_sync(task_id, event)
|
|
2036
|
+
|
|
2037
|
+
elif event_type == "step_completed":
|
|
2038
|
+
output = data.get("output", "")
|
|
2039
|
+
if output:
|
|
2040
|
+
event = OutputEvent(
|
|
2041
|
+
task_id=task_id,
|
|
2042
|
+
stream="stdout",
|
|
2043
|
+
line=output[:500],
|
|
2044
|
+
)
|
|
2045
|
+
self.event_publisher.publish_sync(task_id, event)
|
|
2046
|
+
|
|
2047
|
+
elif event_type == "step_failed":
|
|
2048
|
+
event = ErrorEvent(
|
|
2049
|
+
task_id=task_id,
|
|
2050
|
+
error_type="step_failed",
|
|
2051
|
+
error=data.get("error", "Step failed"),
|
|
2052
|
+
)
|
|
2053
|
+
self.event_publisher.publish_sync(task_id, event)
|
|
2054
|
+
|
|
2055
|
+
elif event_type == "verification_failed":
|
|
2056
|
+
event = ErrorEvent(
|
|
2057
|
+
task_id=task_id,
|
|
2058
|
+
error_type="verification_failed",
|
|
2059
|
+
error=data.get("error", "Verification failed"),
|
|
2060
|
+
)
|
|
2061
|
+
self.event_publisher.publish_sync(task_id, event)
|
|
2062
|
+
|
|
2063
|
+
elif event_type in ("agent_completed", "agent_finished"):
|
|
2064
|
+
# Handle both "agent_completed" and "agent_finished" (run() emits "agent_finished")
|
|
2065
|
+
status = data.get("status", "completed")
|
|
2066
|
+
# Map AgentStatus values to SSE completion status
|
|
2067
|
+
if status in ("completed", "COMPLETED"):
|
|
2068
|
+
sse_status = "completed"
|
|
2069
|
+
elif status in ("failed", "FAILED"):
|
|
2070
|
+
sse_status = "failed"
|
|
2071
|
+
elif status in ("blocked", "BLOCKED"):
|
|
2072
|
+
sse_status = "blocked"
|
|
2073
|
+
else:
|
|
2074
|
+
sse_status = status
|
|
2075
|
+
|
|
2076
|
+
event = CompletionEvent(
|
|
2077
|
+
task_id=task_id,
|
|
2078
|
+
status=sse_status,
|
|
2079
|
+
duration_seconds=0, # Could track this
|
|
2080
|
+
files_modified=[c.path for c in (self.executor.changes if self.executor else [])],
|
|
2081
|
+
)
|
|
2082
|
+
self.event_publisher.publish_sync(task_id, event)
|
|
2083
|
+
self.event_publisher.complete_task_sync(task_id)
|
|
2084
|
+
|
|
2085
|
+
elif event_type == "agent_failed":
|
|
2086
|
+
event = ErrorEvent(
|
|
2087
|
+
task_id=task_id,
|
|
2088
|
+
error_type="agent_failed",
|
|
2089
|
+
error=data.get("error", "Agent execution failed"),
|
|
2090
|
+
)
|
|
2091
|
+
self.event_publisher.publish_sync(task_id, event)
|
|
2092
|
+
self.event_publisher.complete_task_sync(task_id)
|
|
2093
|
+
|
|
2094
|
+
elif event_type == "blocker_created":
|
|
2095
|
+
from codeframe.core.models import BlockerEvent
|
|
2096
|
+
event = BlockerEvent(
|
|
2097
|
+
task_id=task_id,
|
|
2098
|
+
blocker_id=data.get("blocker_id", ""),
|
|
2099
|
+
question=data.get("question", ""),
|
|
2100
|
+
context=data.get("context", ""),
|
|
2101
|
+
)
|
|
2102
|
+
self.event_publisher.publish_sync(task_id, event)
|
|
2103
|
+
|
|
2104
|
+
def _setup_debug_log(self) -> None:
|
|
2105
|
+
"""Set up the debug log file in workspace directory."""
|
|
2106
|
+
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
|
2107
|
+
self._debug_log_path = self.workspace.repo_path / f".codeframe_debug_{timestamp}.log"
|
|
2108
|
+
|
|
2109
|
+
# Write header
|
|
2110
|
+
with open(self._debug_log_path, "w") as f:
|
|
2111
|
+
f.write("=" * 80 + "\n")
|
|
2112
|
+
f.write("CodeFRAME Agent Debug Log\n")
|
|
2113
|
+
f.write(f"Started: {datetime.now(timezone.utc).isoformat()}\n")
|
|
2114
|
+
f.write(f"Workspace: {self.workspace.id}\n")
|
|
2115
|
+
f.write(f"Repo Path: {self.workspace.repo_path}\n")
|
|
2116
|
+
f.write("=" * 80 + "\n\n")
|
|
2117
|
+
|
|
2118
|
+
def _debug_log(
|
|
2119
|
+
self,
|
|
2120
|
+
message: str,
|
|
2121
|
+
level: str = "INFO",
|
|
2122
|
+
data: Optional[dict] = None,
|
|
2123
|
+
always: bool = False,
|
|
2124
|
+
) -> None:
|
|
2125
|
+
"""Write to the debug log file.
|
|
2126
|
+
|
|
2127
|
+
Args:
|
|
2128
|
+
message: Log message
|
|
2129
|
+
level: Log level (INFO, WARN, ERROR, DEBUG)
|
|
2130
|
+
data: Optional structured data to include
|
|
2131
|
+
always: If True, log even if failure count is low
|
|
2132
|
+
"""
|
|
2133
|
+
if not self._debug_log_path:
|
|
2134
|
+
return
|
|
2135
|
+
|
|
2136
|
+
# Only log detailed info after first failure, unless always=True
|
|
2137
|
+
if not always and self._failure_count == 0 and level == "DEBUG":
|
|
2138
|
+
return
|
|
2139
|
+
|
|
2140
|
+
timestamp = datetime.now(timezone.utc).strftime("%H:%M:%S.%f")[:-3]
|
|
2141
|
+
line = f"[{timestamp}] [{level}] {message}\n"
|
|
2142
|
+
|
|
2143
|
+
with open(self._debug_log_path, "a") as f:
|
|
2144
|
+
f.write(line)
|
|
2145
|
+
if data:
|
|
2146
|
+
for key, value in data.items():
|
|
2147
|
+
# Truncate long values for readability
|
|
2148
|
+
val_str = str(value)
|
|
2149
|
+
if len(val_str) > 500:
|
|
2150
|
+
val_str = val_str[:500] + "... [TRUNCATED]"
|
|
2151
|
+
f.write(f" {key}: {val_str}\n")
|
|
2152
|
+
f.write("\n")
|
|
2153
|
+
|
|
2154
|
+
def _debug_log_llm_interaction(
|
|
2155
|
+
self,
|
|
2156
|
+
label: str,
|
|
2157
|
+
prompt: str,
|
|
2158
|
+
response: Optional[str] = None,
|
|
2159
|
+
error: Optional[str] = None,
|
|
2160
|
+
) -> None:
|
|
2161
|
+
"""Log a full LLM interaction (prompt + response) for debugging."""
|
|
2162
|
+
if not self._debug_log_path:
|
|
2163
|
+
return
|
|
2164
|
+
|
|
2165
|
+
timestamp = datetime.now(timezone.utc).strftime("%H:%M:%S.%f")[:-3]
|
|
2166
|
+
|
|
2167
|
+
with open(self._debug_log_path, "a") as f:
|
|
2168
|
+
f.write(f"\n{'='*60}\n")
|
|
2169
|
+
f.write(f"[{timestamp}] LLM INTERACTION: {label}\n")
|
|
2170
|
+
f.write(f"{'='*60}\n\n")
|
|
2171
|
+
|
|
2172
|
+
f.write(f"--- PROMPT ({len(prompt)} chars) ---\n")
|
|
2173
|
+
f.write(prompt)
|
|
2174
|
+
f.write("\n\n")
|
|
2175
|
+
|
|
2176
|
+
if response:
|
|
2177
|
+
f.write(f"--- RESPONSE ({len(response)} chars) ---\n")
|
|
2178
|
+
f.write(response)
|
|
2179
|
+
f.write("\n\n")
|
|
2180
|
+
elif error:
|
|
2181
|
+
f.write(f"--- ERROR ---\n{error}\n\n")
|
|
2182
|
+
|
|
2183
|
+
f.write(f"{'='*60}\n\n")
|