amd-gaia 0.15.0__py3-none-any.whl → 0.15.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/METADATA +222 -223
- amd_gaia-0.15.2.dist-info/RECORD +182 -0
- {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/WHEEL +1 -1
- {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/entry_points.txt +1 -0
- {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/licenses/LICENSE.md +20 -20
- gaia/__init__.py +29 -29
- gaia/agents/__init__.py +19 -19
- gaia/agents/base/__init__.py +9 -9
- gaia/agents/base/agent.py +2132 -2177
- gaia/agents/base/api_agent.py +119 -120
- gaia/agents/base/console.py +1967 -1841
- gaia/agents/base/errors.py +237 -237
- gaia/agents/base/mcp_agent.py +86 -86
- gaia/agents/base/tools.py +88 -83
- gaia/agents/blender/__init__.py +7 -0
- gaia/agents/blender/agent.py +553 -556
- gaia/agents/blender/agent_simple.py +133 -135
- gaia/agents/blender/app.py +211 -211
- gaia/agents/blender/app_simple.py +41 -41
- gaia/agents/blender/core/__init__.py +16 -16
- gaia/agents/blender/core/materials.py +506 -506
- gaia/agents/blender/core/objects.py +316 -316
- gaia/agents/blender/core/rendering.py +225 -225
- gaia/agents/blender/core/scene.py +220 -220
- gaia/agents/blender/core/view.py +146 -146
- gaia/agents/chat/__init__.py +9 -9
- gaia/agents/chat/agent.py +809 -835
- gaia/agents/chat/app.py +1065 -1058
- gaia/agents/chat/session.py +508 -508
- gaia/agents/chat/tools/__init__.py +15 -15
- gaia/agents/chat/tools/file_tools.py +96 -96
- gaia/agents/chat/tools/rag_tools.py +1744 -1729
- gaia/agents/chat/tools/shell_tools.py +437 -436
- gaia/agents/code/__init__.py +7 -7
- gaia/agents/code/agent.py +549 -549
- gaia/agents/code/cli.py +377 -0
- gaia/agents/code/models.py +135 -135
- gaia/agents/code/orchestration/__init__.py +24 -24
- gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
- gaia/agents/code/orchestration/checklist_generator.py +713 -713
- gaia/agents/code/orchestration/factories/__init__.py +9 -9
- gaia/agents/code/orchestration/factories/base.py +63 -63
- gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
- gaia/agents/code/orchestration/factories/python_factory.py +106 -106
- gaia/agents/code/orchestration/orchestrator.py +841 -841
- gaia/agents/code/orchestration/project_analyzer.py +391 -391
- gaia/agents/code/orchestration/steps/__init__.py +67 -67
- gaia/agents/code/orchestration/steps/base.py +188 -188
- gaia/agents/code/orchestration/steps/error_handler.py +314 -314
- gaia/agents/code/orchestration/steps/nextjs.py +828 -828
- gaia/agents/code/orchestration/steps/python.py +307 -307
- gaia/agents/code/orchestration/template_catalog.py +469 -469
- gaia/agents/code/orchestration/workflows/__init__.py +14 -14
- gaia/agents/code/orchestration/workflows/base.py +80 -80
- gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
- gaia/agents/code/orchestration/workflows/python.py +94 -94
- gaia/agents/code/prompts/__init__.py +11 -11
- gaia/agents/code/prompts/base_prompt.py +77 -77
- gaia/agents/code/prompts/code_patterns.py +2034 -2036
- gaia/agents/code/prompts/nextjs_prompt.py +40 -40
- gaia/agents/code/prompts/python_prompt.py +109 -109
- gaia/agents/code/schema_inference.py +365 -365
- gaia/agents/code/system_prompt.py +41 -41
- gaia/agents/code/tools/__init__.py +42 -42
- gaia/agents/code/tools/cli_tools.py +1138 -1138
- gaia/agents/code/tools/code_formatting.py +319 -319
- gaia/agents/code/tools/code_tools.py +769 -769
- gaia/agents/code/tools/error_fixing.py +1347 -1347
- gaia/agents/code/tools/external_tools.py +180 -180
- gaia/agents/code/tools/file_io.py +845 -845
- gaia/agents/code/tools/prisma_tools.py +190 -190
- gaia/agents/code/tools/project_management.py +1016 -1016
- gaia/agents/code/tools/testing.py +321 -321
- gaia/agents/code/tools/typescript_tools.py +122 -122
- gaia/agents/code/tools/validation_parsing.py +461 -461
- gaia/agents/code/tools/validation_tools.py +806 -806
- gaia/agents/code/tools/web_dev_tools.py +1758 -1758
- gaia/agents/code/validators/__init__.py +16 -16
- gaia/agents/code/validators/antipattern_checker.py +241 -241
- gaia/agents/code/validators/ast_analyzer.py +197 -197
- gaia/agents/code/validators/requirements_validator.py +145 -145
- gaia/agents/code/validators/syntax_validator.py +171 -171
- gaia/agents/docker/__init__.py +7 -7
- gaia/agents/docker/agent.py +643 -642
- gaia/agents/emr/__init__.py +8 -8
- gaia/agents/emr/agent.py +1504 -1506
- gaia/agents/emr/cli.py +1322 -1322
- gaia/agents/emr/constants.py +475 -475
- gaia/agents/emr/dashboard/__init__.py +4 -4
- gaia/agents/emr/dashboard/server.py +1972 -1974
- gaia/agents/jira/__init__.py +11 -11
- gaia/agents/jira/agent.py +894 -894
- gaia/agents/jira/jql_templates.py +299 -299
- gaia/agents/routing/__init__.py +7 -7
- gaia/agents/routing/agent.py +567 -570
- gaia/agents/routing/system_prompt.py +75 -75
- gaia/agents/summarize/__init__.py +11 -0
- gaia/agents/summarize/agent.py +885 -0
- gaia/agents/summarize/prompts.py +129 -0
- gaia/api/__init__.py +23 -23
- gaia/api/agent_registry.py +238 -238
- gaia/api/app.py +305 -305
- gaia/api/openai_server.py +575 -575
- gaia/api/schemas.py +186 -186
- gaia/api/sse_handler.py +373 -373
- gaia/apps/__init__.py +4 -4
- gaia/apps/llm/__init__.py +6 -6
- gaia/apps/llm/app.py +184 -169
- gaia/apps/summarize/app.py +116 -633
- gaia/apps/summarize/html_viewer.py +133 -133
- gaia/apps/summarize/pdf_formatter.py +284 -284
- gaia/audio/__init__.py +2 -2
- gaia/audio/audio_client.py +439 -439
- gaia/audio/audio_recorder.py +269 -269
- gaia/audio/kokoro_tts.py +599 -599
- gaia/audio/whisper_asr.py +432 -432
- gaia/chat/__init__.py +16 -16
- gaia/chat/app.py +428 -430
- gaia/chat/prompts.py +522 -522
- gaia/chat/sdk.py +1228 -1225
- gaia/cli.py +5659 -5632
- gaia/database/__init__.py +10 -10
- gaia/database/agent.py +176 -176
- gaia/database/mixin.py +290 -290
- gaia/database/testing.py +64 -64
- gaia/eval/batch_experiment.py +2332 -2332
- gaia/eval/claude.py +542 -542
- gaia/eval/config.py +37 -37
- gaia/eval/email_generator.py +512 -512
- gaia/eval/eval.py +3179 -3179
- gaia/eval/groundtruth.py +1130 -1130
- gaia/eval/transcript_generator.py +582 -582
- gaia/eval/webapp/README.md +167 -167
- gaia/eval/webapp/package-lock.json +875 -875
- gaia/eval/webapp/package.json +20 -20
- gaia/eval/webapp/public/app.js +3402 -3402
- gaia/eval/webapp/public/index.html +87 -87
- gaia/eval/webapp/public/styles.css +3661 -3661
- gaia/eval/webapp/server.js +415 -415
- gaia/eval/webapp/test-setup.js +72 -72
- gaia/installer/__init__.py +23 -0
- gaia/installer/init_command.py +1275 -0
- gaia/installer/lemonade_installer.py +619 -0
- gaia/llm/__init__.py +10 -2
- gaia/llm/base_client.py +60 -0
- gaia/llm/exceptions.py +12 -0
- gaia/llm/factory.py +70 -0
- gaia/llm/lemonade_client.py +3421 -3221
- gaia/llm/lemonade_manager.py +294 -294
- gaia/llm/providers/__init__.py +9 -0
- gaia/llm/providers/claude.py +108 -0
- gaia/llm/providers/lemonade.py +118 -0
- gaia/llm/providers/openai_provider.py +79 -0
- gaia/llm/vlm_client.py +382 -382
- gaia/logger.py +189 -189
- gaia/mcp/agent_mcp_server.py +245 -245
- gaia/mcp/blender_mcp_client.py +138 -138
- gaia/mcp/blender_mcp_server.py +648 -648
- gaia/mcp/context7_cache.py +332 -332
- gaia/mcp/external_services.py +518 -518
- gaia/mcp/mcp_bridge.py +811 -550
- gaia/mcp/servers/__init__.py +6 -6
- gaia/mcp/servers/docker_mcp.py +83 -83
- gaia/perf_analysis.py +361 -0
- gaia/rag/__init__.py +10 -10
- gaia/rag/app.py +293 -293
- gaia/rag/demo.py +304 -304
- gaia/rag/pdf_utils.py +235 -235
- gaia/rag/sdk.py +2194 -2194
- gaia/security.py +183 -163
- gaia/talk/app.py +287 -289
- gaia/talk/sdk.py +538 -538
- gaia/testing/__init__.py +87 -87
- gaia/testing/assertions.py +330 -330
- gaia/testing/fixtures.py +333 -333
- gaia/testing/mocks.py +493 -493
- gaia/util.py +46 -46
- gaia/utils/__init__.py +33 -33
- gaia/utils/file_watcher.py +675 -675
- gaia/utils/parsing.py +223 -223
- gaia/version.py +100 -100
- amd_gaia-0.15.0.dist-info/RECORD +0 -168
- gaia/agents/code/app.py +0 -266
- gaia/llm/llm_client.py +0 -723
- {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/top_level.txt +0 -0
|
@@ -1,841 +1,841 @@
|
|
|
1
|
-
# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
|
|
2
|
-
# SPDX-License-Identifier: MIT
|
|
3
|
-
"""
|
|
4
|
-
Orchestrator for LLM-driven workflow execution.
|
|
5
|
-
|
|
6
|
-
The Orchestrator controls workflow execution using Checklist Mode:
|
|
7
|
-
- LLM generates a checklist of template invocations based on user request
|
|
8
|
-
- Executor runs templates deterministically with error recovery
|
|
9
|
-
- Provides semantic understanding (e.g., adds checkboxes for todos)
|
|
10
|
-
|
|
11
|
-
Features:
|
|
12
|
-
- LLM-driven checklist generation
|
|
13
|
-
- Deterministic template execution
|
|
14
|
-
- Error recovery with three-tier strategy
|
|
15
|
-
- Progress reporting
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
import json
|
|
19
|
-
import logging
|
|
20
|
-
import os
|
|
21
|
-
import re
|
|
22
|
-
import subprocess
|
|
23
|
-
from dataclasses import dataclass, field
|
|
24
|
-
from pathlib import Path
|
|
25
|
-
from typing import Any, Callable, Dict, List, Optional, Protocol
|
|
26
|
-
|
|
27
|
-
from gaia.agents.base.console import AgentConsole
|
|
28
|
-
|
|
29
|
-
from .steps.base import ToolExecutor, UserContext
|
|
30
|
-
from .steps.error_handler import ErrorHandler
|
|
31
|
-
|
|
32
|
-
logger = logging.getLogger(__name__)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
class ProjectDirectoryError(Exception):
|
|
36
|
-
"""Raised when the project directory cannot be prepared safely."""
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def _estimate_token_count(text: str) -> int:
|
|
40
|
-
"""Lightweight token estimate assuming ~4 characters per token."""
|
|
41
|
-
avg_chars_per_token = 4
|
|
42
|
-
byte_length = len(text.encode("utf-8"))
|
|
43
|
-
return max(1, (byte_length + avg_chars_per_token - 1) // avg_chars_per_token)
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
class ChatSDK(Protocol):
|
|
47
|
-
"""Protocol for chat SDK interface used by checklist generator."""
|
|
48
|
-
|
|
49
|
-
def send(self, message: str, timeout: int = 600, no_history: bool = False) -> Any:
|
|
50
|
-
"""Send a message and get response."""
|
|
51
|
-
...
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
@dataclass
|
|
55
|
-
class ExecutionResult:
|
|
56
|
-
"""Result of a complete workflow execution."""
|
|
57
|
-
|
|
58
|
-
success: bool
|
|
59
|
-
phases_completed: List[str] = field(default_factory=list)
|
|
60
|
-
phases_failed: List[str] = field(default_factory=list)
|
|
61
|
-
total_steps: int = 0
|
|
62
|
-
steps_succeeded: int = 0
|
|
63
|
-
steps_failed: int = 0
|
|
64
|
-
steps_skipped: int = 0
|
|
65
|
-
errors: List[str] = field(default_factory=list)
|
|
66
|
-
outputs: Dict[str, Any] = field(default_factory=dict)
|
|
67
|
-
|
|
68
|
-
@property
|
|
69
|
-
def summary(self) -> str:
|
|
70
|
-
"""Get a human-readable summary."""
|
|
71
|
-
status = "SUCCESS" if self.success else "FAILED"
|
|
72
|
-
return (
|
|
73
|
-
f"{status}: {self.steps_succeeded}/{self.total_steps} steps completed, "
|
|
74
|
-
f"{self.steps_failed} failed, {self.steps_skipped} skipped"
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
CHECKPOINT_REVIEW_PROMPT = """You are the checkpoint reviewer for the GAIA web development agent.
|
|
79
|
-
|
|
80
|
-
You receive:
|
|
81
|
-
- The original user request
|
|
82
|
-
- A summary of the latest checklist execution (including errors/warnings)
|
|
83
|
-
- Logs from the validation and testing tools (run_typescript_check, validate_styles, run_tests, etc.)
|
|
84
|
-
- Any previously requested fixes that are still outstanding
|
|
85
|
-
|
|
86
|
-
Decide if the application is ready to ship or if additional fixes are required.
|
|
87
|
-
|
|
88
|
-
Rules:
|
|
89
|
-
1. If ANY validation or test log failed, status must be \"needs_fix\" with concrete guidance.
|
|
90
|
-
2. Only return \"complete\" when the app works end-to-end and validations passed.
|
|
91
|
-
3. When fixes are needed, suggest actionable steps that can be executed through `fix_code` (LLM-assisted repair of problematic files).
|
|
92
|
-
|
|
93
|
-
Respond with concise JSON only:
|
|
94
|
-
{
|
|
95
|
-
\"status\": \"complete\" | \"needs_fix\",
|
|
96
|
-
\"reasoning\": \"short justification\",
|
|
97
|
-
\"issues\": [\"list of concrete bugs or failures\"],
|
|
98
|
-
\"fix_instructions\": [\"ordered actions the next checklist should perform\"]
|
|
99
|
-
}
|
|
100
|
-
"""
|
|
101
|
-
|
|
102
|
-
MAX_CHAT_HISTORY_TOKENS = 15000
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
@dataclass
|
|
106
|
-
class CheckpointAssessment:
|
|
107
|
-
"""LLM-produced verdict about the current checkpoint."""
|
|
108
|
-
|
|
109
|
-
status: str
|
|
110
|
-
reasoning: str
|
|
111
|
-
issues: List[str] = field(default_factory=list)
|
|
112
|
-
fix_instructions: List[str] = field(default_factory=list)
|
|
113
|
-
|
|
114
|
-
@property
|
|
115
|
-
def needs_fix(self) -> bool:
|
|
116
|
-
"""Return True when the reviewer requires another checklist."""
|
|
117
|
-
return self.status.lower() != "complete"
|
|
118
|
-
|
|
119
|
-
def to_dict(self) -> Dict[str, Any]:
|
|
120
|
-
"""Serialize the assessment."""
|
|
121
|
-
return {
|
|
122
|
-
"status": self.status,
|
|
123
|
-
"reasoning": self.reasoning,
|
|
124
|
-
"issues": self.issues,
|
|
125
|
-
"fix_instructions": self.fix_instructions,
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
class Orchestrator:
|
|
130
|
-
"""Controls LLM-driven workflow execution with error recovery.
|
|
131
|
-
|
|
132
|
-
The orchestrator uses Checklist Mode exclusively:
|
|
133
|
-
- LLM analyzes user request and generates a checklist of templates
|
|
134
|
-
- Executor runs templates deterministically
|
|
135
|
-
- Provides semantic understanding (e.g., adds checkboxes for todos)
|
|
136
|
-
"""
|
|
137
|
-
|
|
138
|
-
def __init__(
|
|
139
|
-
self,
|
|
140
|
-
tool_executor: ToolExecutor,
|
|
141
|
-
llm_client: ChatSDK,
|
|
142
|
-
llm_fixer: Optional[Callable[[str, str], Optional[str]]] = None,
|
|
143
|
-
progress_callback: Optional[Callable[[str, str, int, int], None]] = None,
|
|
144
|
-
console: Optional[AgentConsole] = None,
|
|
145
|
-
max_checklist_loops: int = 10,
|
|
146
|
-
):
|
|
147
|
-
"""Initialize orchestrator.
|
|
148
|
-
|
|
149
|
-
Args:
|
|
150
|
-
tool_executor: Function to execute tools (name, args) -> result
|
|
151
|
-
llm_client: Chat SDK for checklist generation (required)
|
|
152
|
-
llm_fixer: Optional LLM-based code fixer for escalation
|
|
153
|
-
progress_callback: Optional callback(phase, step, current, total)
|
|
154
|
-
console: Optional console for displaying output
|
|
155
|
-
max_checklist_loops: Max number of checklist iterations before giving up
|
|
156
|
-
"""
|
|
157
|
-
if llm_client is None:
|
|
158
|
-
raise ValueError("llm_client is required for Orchestrator")
|
|
159
|
-
|
|
160
|
-
self.tool_executor = tool_executor
|
|
161
|
-
self.llm_client = llm_client
|
|
162
|
-
self.error_handler = ErrorHandler(
|
|
163
|
-
command_executor=self._run_command,
|
|
164
|
-
llm_fixer=llm_fixer,
|
|
165
|
-
)
|
|
166
|
-
self.progress_callback = progress_callback
|
|
167
|
-
self.console = console
|
|
168
|
-
self.max_checklist_loops = max(1, max_checklist_loops)
|
|
169
|
-
|
|
170
|
-
# Initialize checklist components
|
|
171
|
-
from .checklist_executor import ChecklistExecutor
|
|
172
|
-
from .checklist_generator import ChecklistGenerator
|
|
173
|
-
|
|
174
|
-
self.checklist_generator = ChecklistGenerator(llm_client)
|
|
175
|
-
self.checklist_executor = ChecklistExecutor(
|
|
176
|
-
tool_executor,
|
|
177
|
-
llm_client=llm_client, # Pass LLM for per-item code generation
|
|
178
|
-
error_handler=self.error_handler,
|
|
179
|
-
progress_callback=self._checklist_progress_callback,
|
|
180
|
-
console=console, # Pass console
|
|
181
|
-
)
|
|
182
|
-
logger.debug(
|
|
183
|
-
"Orchestrator initialized - LLM will plan execution AND generate code per item"
|
|
184
|
-
)
|
|
185
|
-
|
|
186
|
-
def execute(
|
|
187
|
-
self, context: UserContext, step_through: bool = False
|
|
188
|
-
) -> ExecutionResult:
|
|
189
|
-
"""Execute the workflow using iterative LLM-generated checklists."""
|
|
190
|
-
logger.debug("Executing workflow (LLM-driven checklist loop)")
|
|
191
|
-
|
|
192
|
-
from .project_analyzer import ProjectAnalyzer
|
|
193
|
-
|
|
194
|
-
analyzer = ProjectAnalyzer()
|
|
195
|
-
aggregated_validation_logs: List[Any] = []
|
|
196
|
-
fix_feedback: List[str] = []
|
|
197
|
-
iteration_outputs: List[Dict[str, Any]] = []
|
|
198
|
-
combined_errors: List[str] = []
|
|
199
|
-
previous_execution_errors: List[str] = []
|
|
200
|
-
previous_validation_logs: List[Any] = []
|
|
201
|
-
|
|
202
|
-
total_steps = 0
|
|
203
|
-
steps_succeeded = 0
|
|
204
|
-
steps_failed = 0
|
|
205
|
-
success = False
|
|
206
|
-
|
|
207
|
-
try:
|
|
208
|
-
context.project_dir = self._prepare_project_directory(context)
|
|
209
|
-
except ProjectDirectoryError as exc:
|
|
210
|
-
error_message = str(exc)
|
|
211
|
-
logger.error(error_message)
|
|
212
|
-
if self.console:
|
|
213
|
-
self.console.print_error(error_message)
|
|
214
|
-
return ExecutionResult(
|
|
215
|
-
success=False,
|
|
216
|
-
phases_completed=[],
|
|
217
|
-
phases_failed=["project_directory"],
|
|
218
|
-
total_steps=1,
|
|
219
|
-
steps_succeeded=0,
|
|
220
|
-
steps_failed=1,
|
|
221
|
-
steps_skipped=0,
|
|
222
|
-
errors=[error_message],
|
|
223
|
-
outputs={
|
|
224
|
-
"iterations": [],
|
|
225
|
-
"validation_logs": [],
|
|
226
|
-
"fix_feedback": [],
|
|
227
|
-
"project_dir": context.project_dir,
|
|
228
|
-
},
|
|
229
|
-
)
|
|
230
|
-
|
|
231
|
-
for iteration in range(1, self.max_checklist_loops + 1):
|
|
232
|
-
logger.debug("Starting checklist iteration %d", iteration)
|
|
233
|
-
|
|
234
|
-
if iteration > 1:
|
|
235
|
-
summary_result = self._maybe_summarize_conversation_history()
|
|
236
|
-
if summary_result and self.console:
|
|
237
|
-
self.console.print_info(
|
|
238
|
-
"Conversation history summarized to stay within token limits."
|
|
239
|
-
)
|
|
240
|
-
|
|
241
|
-
project_state = analyzer.analyze(context.project_dir)
|
|
242
|
-
|
|
243
|
-
# Surface accumulated signals to the next checklist prompt
|
|
244
|
-
context.validation_reports = [
|
|
245
|
-
log.to_dict() for log in aggregated_validation_logs
|
|
246
|
-
]
|
|
247
|
-
context.fix_feedback = fix_feedback.copy()
|
|
248
|
-
|
|
249
|
-
logger.info(
|
|
250
|
-
"Generating checklist iteration %d of %d",
|
|
251
|
-
iteration,
|
|
252
|
-
self.max_checklist_loops,
|
|
253
|
-
)
|
|
254
|
-
if self.console:
|
|
255
|
-
self.console.print_info(
|
|
256
|
-
f"Generating checklist iteration {iteration} of {self.max_checklist_loops}"
|
|
257
|
-
)
|
|
258
|
-
if iteration == 1:
|
|
259
|
-
checklist = self.checklist_generator.generate_initial_checklist(
|
|
260
|
-
context, project_state
|
|
261
|
-
)
|
|
262
|
-
else:
|
|
263
|
-
checklist = self.checklist_generator.generate_debug_checklist(
|
|
264
|
-
context=context,
|
|
265
|
-
project_state=project_state,
|
|
266
|
-
prior_errors=previous_execution_errors,
|
|
267
|
-
validation_logs=previous_validation_logs,
|
|
268
|
-
)
|
|
269
|
-
|
|
270
|
-
if not checklist.is_valid:
|
|
271
|
-
logger.error(
|
|
272
|
-
"Invalid checklist (iteration %d): %s",
|
|
273
|
-
iteration,
|
|
274
|
-
checklist.validation_errors,
|
|
275
|
-
)
|
|
276
|
-
try:
|
|
277
|
-
checklist_dump = json.dumps(checklist.to_dict(), indent=2)
|
|
278
|
-
except Exception: # pylint: disable=broad-exception-caught
|
|
279
|
-
checklist_dump = str(checklist)
|
|
280
|
-
logger.error("Invalid checklist payload: %s", checklist_dump)
|
|
281
|
-
if self.console:
|
|
282
|
-
self.console.pretty_print_json(
|
|
283
|
-
checklist.to_dict(), title="Invalid Checklist"
|
|
284
|
-
)
|
|
285
|
-
combined_errors.extend(checklist.validation_errors)
|
|
286
|
-
assessment = CheckpointAssessment(
|
|
287
|
-
status="needs_fix",
|
|
288
|
-
reasoning="Checklist validation failed",
|
|
289
|
-
issues=checklist.validation_errors.copy(),
|
|
290
|
-
fix_instructions=checklist.validation_errors.copy(),
|
|
291
|
-
)
|
|
292
|
-
iteration_outputs.append(
|
|
293
|
-
{
|
|
294
|
-
"iteration": iteration,
|
|
295
|
-
"checklist": checklist.to_dict(),
|
|
296
|
-
"execution": None,
|
|
297
|
-
"assessment": assessment.to_dict(),
|
|
298
|
-
}
|
|
299
|
-
)
|
|
300
|
-
break
|
|
301
|
-
|
|
302
|
-
logger.debug(
|
|
303
|
-
"Generated checklist with %d items: %s",
|
|
304
|
-
len(checklist.items),
|
|
305
|
-
checklist.reasoning,
|
|
306
|
-
)
|
|
307
|
-
|
|
308
|
-
checklist_result = self.checklist_executor.execute(
|
|
309
|
-
checklist, context, step_through=step_through
|
|
310
|
-
)
|
|
311
|
-
|
|
312
|
-
total_steps += len(checklist_result.item_results)
|
|
313
|
-
steps_succeeded += checklist_result.items_succeeded
|
|
314
|
-
steps_failed += checklist_result.items_failed
|
|
315
|
-
combined_errors.extend(checklist_result.errors)
|
|
316
|
-
|
|
317
|
-
aggregated_validation_logs.extend(checklist_result.validation_logs)
|
|
318
|
-
previous_execution_errors = checklist_result.errors.copy()
|
|
319
|
-
previous_validation_logs = checklist_result.validation_logs.copy()
|
|
320
|
-
|
|
321
|
-
logger.info("Assessing application state after iteration %d", iteration)
|
|
322
|
-
if self.console:
|
|
323
|
-
self.console.print_info(
|
|
324
|
-
f"Assessing application state after iteration {iteration}"
|
|
325
|
-
)
|
|
326
|
-
assessment = self._assess_checkpoint(
|
|
327
|
-
context=context,
|
|
328
|
-
checklist=checklist,
|
|
329
|
-
execution_result=checklist_result,
|
|
330
|
-
validation_history=aggregated_validation_logs,
|
|
331
|
-
)
|
|
332
|
-
if assessment.needs_fix:
|
|
333
|
-
logger.info(
|
|
334
|
-
"Application not ready after iteration %d, planning another checklist: %s",
|
|
335
|
-
iteration,
|
|
336
|
-
assessment.reasoning or "no reasoning provided",
|
|
337
|
-
)
|
|
338
|
-
if self.console:
|
|
339
|
-
self.console.print_info(
|
|
340
|
-
"Application not ready; preparing another checklist."
|
|
341
|
-
)
|
|
342
|
-
else:
|
|
343
|
-
logger.info(
|
|
344
|
-
"Application marked complete after iteration %d: %s",
|
|
345
|
-
iteration,
|
|
346
|
-
assessment.reasoning or "no reasoning provided",
|
|
347
|
-
)
|
|
348
|
-
if self.console:
|
|
349
|
-
self.console.print_success("Application marked complete.")
|
|
350
|
-
|
|
351
|
-
iteration_outputs.append(
|
|
352
|
-
{
|
|
353
|
-
"iteration": iteration,
|
|
354
|
-
"checklist": checklist.to_dict(),
|
|
355
|
-
"execution": {
|
|
356
|
-
"summary": checklist_result.summary,
|
|
357
|
-
"success": checklist_result.success,
|
|
358
|
-
"files": checklist_result.total_files,
|
|
359
|
-
"errors": checklist_result.errors,
|
|
360
|
-
"warnings": checklist_result.warnings,
|
|
361
|
-
"item_results": [
|
|
362
|
-
r.to_dict() for r in checklist_result.item_results
|
|
363
|
-
],
|
|
364
|
-
"validation_logs": [
|
|
365
|
-
log.to_dict() for log in checklist_result.validation_logs
|
|
366
|
-
],
|
|
367
|
-
},
|
|
368
|
-
"assessment": assessment.to_dict(),
|
|
369
|
-
}
|
|
370
|
-
)
|
|
371
|
-
|
|
372
|
-
if not assessment.needs_fix:
|
|
373
|
-
success = (
|
|
374
|
-
checklist_result.success and assessment.status.lower() == "complete"
|
|
375
|
-
)
|
|
376
|
-
break
|
|
377
|
-
|
|
378
|
-
instructions = assessment.fix_instructions or assessment.issues
|
|
379
|
-
if not instructions and assessment.reasoning:
|
|
380
|
-
instructions = [assessment.reasoning]
|
|
381
|
-
if instructions:
|
|
382
|
-
fix_feedback.extend(instructions)
|
|
383
|
-
|
|
384
|
-
else:
|
|
385
|
-
combined_errors.append(
|
|
386
|
-
f"Reached maximum checklist iterations ({self.max_checklist_loops}) without passing validation"
|
|
387
|
-
)
|
|
388
|
-
|
|
389
|
-
latest_execution = None
|
|
390
|
-
latest_checklist = None
|
|
391
|
-
if iteration_outputs:
|
|
392
|
-
latest_entry = iteration_outputs[-1]
|
|
393
|
-
latest_execution = latest_entry.get("execution")
|
|
394
|
-
latest_checklist = latest_entry.get("checklist")
|
|
395
|
-
|
|
396
|
-
outputs = {
|
|
397
|
-
"iterations": iteration_outputs,
|
|
398
|
-
"validation_logs": [log.to_dict() for log in aggregated_validation_logs],
|
|
399
|
-
"fix_feedback": fix_feedback,
|
|
400
|
-
"project_dir": context.project_dir,
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
if latest_execution:
|
|
404
|
-
outputs["files"] = latest_execution.get("files", [])
|
|
405
|
-
outputs["detailed_results"] = latest_execution.get("item_results", [])
|
|
406
|
-
if latest_checklist:
|
|
407
|
-
outputs["checklist"] = latest_checklist
|
|
408
|
-
|
|
409
|
-
return ExecutionResult(
|
|
410
|
-
success=success,
|
|
411
|
-
phases_completed=["checklist"] if success else [],
|
|
412
|
-
phases_failed=[] if success else ["checklist"],
|
|
413
|
-
total_steps=total_steps,
|
|
414
|
-
steps_succeeded=steps_succeeded,
|
|
415
|
-
steps_failed=steps_failed,
|
|
416
|
-
steps_skipped=0,
|
|
417
|
-
errors=combined_errors,
|
|
418
|
-
outputs=outputs,
|
|
419
|
-
)
|
|
420
|
-
|
|
421
|
-
def _run_command(self, command: str, cwd: Optional[str] = None) -> tuple[int, str]:
|
|
422
|
-
"""Run a shell command.
|
|
423
|
-
|
|
424
|
-
Args:
|
|
425
|
-
command: Command to run
|
|
426
|
-
cwd: Working directory
|
|
427
|
-
|
|
428
|
-
Returns:
|
|
429
|
-
Tuple of (exit_code, output)
|
|
430
|
-
"""
|
|
431
|
-
try:
|
|
432
|
-
result = subprocess.run(
|
|
433
|
-
command,
|
|
434
|
-
shell=True,
|
|
435
|
-
cwd=cwd,
|
|
436
|
-
capture_output=True,
|
|
437
|
-
text=True,
|
|
438
|
-
timeout=1200,
|
|
439
|
-
check=False, # We handle return codes ourselves
|
|
440
|
-
)
|
|
441
|
-
output = result.stdout + result.stderr
|
|
442
|
-
return result.returncode, output
|
|
443
|
-
except subprocess.TimeoutExpired:
|
|
444
|
-
return 1, "Command timed out"
|
|
445
|
-
except Exception as e:
|
|
446
|
-
return 1, str(e)
|
|
447
|
-
|
|
448
|
-
def _checklist_progress_callback(
|
|
449
|
-
self, description: str, current: int, total: int
|
|
450
|
-
) -> None:
|
|
451
|
-
"""Progress callback adapter for checklist execution.
|
|
452
|
-
|
|
453
|
-
Converts checklist progress format to the standard progress format.
|
|
454
|
-
|
|
455
|
-
Args:
|
|
456
|
-
description: Current item description
|
|
457
|
-
current: Current item number
|
|
458
|
-
total: Total items
|
|
459
|
-
"""
|
|
460
|
-
if self.progress_callback:
|
|
461
|
-
self.progress_callback("checklist", description, current, total)
|
|
462
|
-
|
|
463
|
-
def _assess_checkpoint(
|
|
464
|
-
self,
|
|
465
|
-
context: UserContext,
|
|
466
|
-
checklist: Any,
|
|
467
|
-
execution_result: Any,
|
|
468
|
-
validation_history: List[Any],
|
|
469
|
-
) -> CheckpointAssessment:
|
|
470
|
-
"""Ask the LLM whether the workflow is complete or needs another checklist."""
|
|
471
|
-
prompt = self._build_checkpoint_prompt(
|
|
472
|
-
context=context,
|
|
473
|
-
checklist=checklist,
|
|
474
|
-
execution_result=execution_result,
|
|
475
|
-
validation_history=validation_history,
|
|
476
|
-
)
|
|
477
|
-
|
|
478
|
-
try:
|
|
479
|
-
response = self.llm_client.send(prompt, timeout=1200)
|
|
480
|
-
data = self._parse_checkpoint_response(response)
|
|
481
|
-
return CheckpointAssessment(
|
|
482
|
-
status=data.get("status", "needs_fix"),
|
|
483
|
-
reasoning=data.get("reasoning", ""),
|
|
484
|
-
issues=data.get("issues", []),
|
|
485
|
-
fix_instructions=data.get("fix_instructions", []),
|
|
486
|
-
)
|
|
487
|
-
except Exception as exc: # pylint: disable=broad-exception-caught
|
|
488
|
-
logger.exception("Checkpoint assessment failed")
|
|
489
|
-
return CheckpointAssessment(
|
|
490
|
-
status="needs_fix",
|
|
491
|
-
reasoning="Failed to interpret checkpoint reviewer output",
|
|
492
|
-
issues=[f"Checkpoint reviewer error: {exc}"],
|
|
493
|
-
fix_instructions=[
|
|
494
|
-
"Inspect validation logs, then fix the root cause using fix_code."
|
|
495
|
-
],
|
|
496
|
-
)
|
|
497
|
-
|
|
498
|
-
def _build_checkpoint_prompt(
|
|
499
|
-
self,
|
|
500
|
-
context: UserContext,
|
|
501
|
-
checklist: Any,
|
|
502
|
-
execution_result: Any,
|
|
503
|
-
validation_history: List[Any],
|
|
504
|
-
) -> str:
|
|
505
|
-
"""Build the prompt for the checkpoint reviewer."""
|
|
506
|
-
validation_summary = self._format_validation_history(
|
|
507
|
-
validation_history, getattr(execution_result, "validation_logs", None)
|
|
508
|
-
)
|
|
509
|
-
|
|
510
|
-
outstanding = (
|
|
511
|
-
"\n".join(f"- {item}" for item in context.fix_feedback)
|
|
512
|
-
if context.fix_feedback
|
|
513
|
-
else "None"
|
|
514
|
-
)
|
|
515
|
-
|
|
516
|
-
errors = execution_result.errors or ["None"]
|
|
517
|
-
warnings = execution_result.warnings or []
|
|
518
|
-
|
|
519
|
-
sections = [
|
|
520
|
-
CHECKPOINT_REVIEW_PROMPT.strip(),
|
|
521
|
-
"",
|
|
522
|
-
"## User Request",
|
|
523
|
-
context.user_request,
|
|
524
|
-
"",
|
|
525
|
-
"## Latest Checklist Plan",
|
|
526
|
-
f"Reasoning: {checklist.reasoning}",
|
|
527
|
-
"",
|
|
528
|
-
"## Execution Summary",
|
|
529
|
-
execution_result.summary,
|
|
530
|
-
"",
|
|
531
|
-
"## Execution Errors",
|
|
532
|
-
"\n".join(f"- {err}" for err in errors),
|
|
533
|
-
"",
|
|
534
|
-
"## Execution Warnings",
|
|
535
|
-
"\n".join(f"- {warn}" for warn in warnings) if warnings else "None",
|
|
536
|
-
"",
|
|
537
|
-
"## Validation & Test Logs",
|
|
538
|
-
validation_summary,
|
|
539
|
-
"",
|
|
540
|
-
"## Outstanding Fix Requests",
|
|
541
|
-
outstanding,
|
|
542
|
-
]
|
|
543
|
-
|
|
544
|
-
return "\n".join(sections)
|
|
545
|
-
|
|
546
|
-
def _maybe_summarize_conversation_history(self) -> Optional[str]:
|
|
547
|
-
"""Trigger ChatSDK conversation summarization when available."""
|
|
548
|
-
chat_sdk = getattr(self, "llm_client", None)
|
|
549
|
-
if not chat_sdk or not hasattr(chat_sdk, "summarize_conversation_history"):
|
|
550
|
-
return None
|
|
551
|
-
|
|
552
|
-
try:
|
|
553
|
-
summary = chat_sdk.summarize_conversation_history(
|
|
554
|
-
max_history_tokens=MAX_CHAT_HISTORY_TOKENS
|
|
555
|
-
)
|
|
556
|
-
if summary:
|
|
557
|
-
logger.info(
|
|
558
|
-
"Conversation history summarized to ~%d tokens",
|
|
559
|
-
_estimate_token_count(summary),
|
|
560
|
-
)
|
|
561
|
-
return summary
|
|
562
|
-
except Exception as exc: # pylint: disable=broad-exception-caught
|
|
563
|
-
logger.exception("Failed to summarize conversation history: %s", exc)
|
|
564
|
-
return None
|
|
565
|
-
|
|
566
|
-
def _prepare_project_directory(self, context: UserContext) -> str:
|
|
567
|
-
"""
|
|
568
|
-
Ensure the project directory is ready for creation workflows.
|
|
569
|
-
|
|
570
|
-
If the provided path exists and is non-empty without an existing project,
|
|
571
|
-
pick a unique subdirectory via the LLM to avoid create-next-app failures.
|
|
572
|
-
"""
|
|
573
|
-
base_path = Path(context.project_dir).expanduser()
|
|
574
|
-
if base_path.exists() and not base_path.is_dir():
|
|
575
|
-
raise ProjectDirectoryError(
|
|
576
|
-
f"Provided path is not a directory: {base_path}"
|
|
577
|
-
)
|
|
578
|
-
|
|
579
|
-
if not base_path.exists():
|
|
580
|
-
base_path.mkdir(parents=True, exist_ok=True)
|
|
581
|
-
logger.info("Created project directory: %s", base_path)
|
|
582
|
-
return str(base_path)
|
|
583
|
-
|
|
584
|
-
existing_entries = [p.name for p in base_path.iterdir()]
|
|
585
|
-
if not existing_entries:
|
|
586
|
-
return str(base_path)
|
|
587
|
-
|
|
588
|
-
if self.console:
|
|
589
|
-
self.console.print_warning(
|
|
590
|
-
f"Target directory {base_path} is not empty; selecting a new subdirectory."
|
|
591
|
-
)
|
|
592
|
-
|
|
593
|
-
suggested = self._choose_subdirectory_name(
|
|
594
|
-
base_path, existing_entries, context.user_request
|
|
595
|
-
)
|
|
596
|
-
if not suggested:
|
|
597
|
-
raise ProjectDirectoryError(
|
|
598
|
-
f"Unable to find an available project name under {base_path}. "
|
|
599
|
-
"Provide one explicitly with --path."
|
|
600
|
-
)
|
|
601
|
-
|
|
602
|
-
new_dir = base_path / suggested
|
|
603
|
-
new_dir.mkdir(parents=False, exist_ok=False)
|
|
604
|
-
logger.info("Using nested project directory: %s", new_dir)
|
|
605
|
-
# Align process cwd with the newly created project directory.
|
|
606
|
-
try:
|
|
607
|
-
os.chdir(new_dir)
|
|
608
|
-
except OSError as exc:
|
|
609
|
-
logger.warning("Failed to chdir to %s: %s", new_dir, exc)
|
|
610
|
-
if self.console:
|
|
611
|
-
self.console.print_info(f"Using project directory: {new_dir}")
|
|
612
|
-
return str(new_dir)
|
|
613
|
-
|
|
614
|
-
def _choose_subdirectory_name(
|
|
615
|
-
self, base_path: Path, existing_entries: List[str], user_request: str
|
|
616
|
-
) -> Optional[str]:
|
|
617
|
-
"""Ask the LLM for a unique subdirectory name, retrying on conflicts."""
|
|
618
|
-
existing_lower = {name.lower() for name in existing_entries}
|
|
619
|
-
prompt = self._build_directory_prompt(
|
|
620
|
-
base_path, existing_entries, user_request, None
|
|
621
|
-
)
|
|
622
|
-
last_reason = None
|
|
623
|
-
|
|
624
|
-
system_prompt = "You suggest concise folder names for new projects."
|
|
625
|
-
|
|
626
|
-
for attempt in range(1, 4):
|
|
627
|
-
try:
|
|
628
|
-
response = self._send_prompt_without_history(
|
|
629
|
-
prompt, timeout=120, system_prompt=system_prompt
|
|
630
|
-
)
|
|
631
|
-
except Exception as exc: # pylint: disable=broad-exception-caught
|
|
632
|
-
last_reason = f"LLM error on attempt {attempt}: {exc}"
|
|
633
|
-
logger.warning(last_reason)
|
|
634
|
-
prompt = self._build_directory_prompt(
|
|
635
|
-
base_path, existing_entries, user_request, last_reason
|
|
636
|
-
)
|
|
637
|
-
continue
|
|
638
|
-
|
|
639
|
-
raw_response = self._extract_response_text(response)
|
|
640
|
-
candidate = self._sanitize_directory_name(raw_response)
|
|
641
|
-
if not candidate:
|
|
642
|
-
last_reason = "LLM returned an empty or invalid directory name."
|
|
643
|
-
elif candidate.lower() in existing_lower:
|
|
644
|
-
last_reason = f"Name '{candidate}' already exists in {base_path}."
|
|
645
|
-
elif "/" in candidate or "\\" in candidate or ".." in candidate:
|
|
646
|
-
last_reason = "Directory name contained path separators or traversal."
|
|
647
|
-
elif len(candidate) > 64:
|
|
648
|
-
last_reason = "Directory name exceeded 64 characters."
|
|
649
|
-
else:
|
|
650
|
-
candidate_path = base_path / candidate
|
|
651
|
-
if candidate_path.exists():
|
|
652
|
-
last_reason = f"Directory '{candidate}' already exists."
|
|
653
|
-
else:
|
|
654
|
-
return candidate
|
|
655
|
-
|
|
656
|
-
logger.warning(
|
|
657
|
-
"Directory name attempt %d rejected: %s", attempt, last_reason
|
|
658
|
-
)
|
|
659
|
-
prompt = self._build_directory_prompt(
|
|
660
|
-
base_path, existing_entries, user_request, last_reason
|
|
661
|
-
)
|
|
662
|
-
|
|
663
|
-
return None
|
|
664
|
-
|
|
665
|
-
@staticmethod
|
|
666
|
-
def _sanitize_directory_name(raw: str) -> str:
|
|
667
|
-
"""Normalize LLM output to a filesystem-safe directory name."""
|
|
668
|
-
if not raw:
|
|
669
|
-
return ""
|
|
670
|
-
candidate = raw.strip().strip("`'\"")
|
|
671
|
-
candidate = candidate.splitlines()[0].strip()
|
|
672
|
-
candidate = re.sub(r"[^A-Za-z0-9_-]+", "-", candidate)
|
|
673
|
-
return candidate.strip("-_").lower()
|
|
674
|
-
|
|
675
|
-
def _send_prompt_without_history(
|
|
676
|
-
self, prompt: str, timeout: int = 120, system_prompt: Optional[str] = None
|
|
677
|
-
) -> Any:
|
|
678
|
-
"""
|
|
679
|
-
Send a prompt without reading from or writing to chat history.
|
|
680
|
-
|
|
681
|
-
Prefers the underlying LLM client's `generate` API when available,
|
|
682
|
-
falling back to `send(..., no_history=True)` for compatibility.
|
|
683
|
-
"""
|
|
684
|
-
# If the ChatSDK exposes the underlying LLM client, use it directly with chat messages
|
|
685
|
-
# to avoid any stored history and ensure system prompts are applied cleanly.
|
|
686
|
-
llm_client = getattr(self.llm_client, "llm_client", None)
|
|
687
|
-
if llm_client and hasattr(llm_client, "generate"):
|
|
688
|
-
model = getattr(getattr(self.llm_client, "config", None), "model", None)
|
|
689
|
-
messages = []
|
|
690
|
-
if system_prompt:
|
|
691
|
-
messages.append({"role": "system", "content": system_prompt})
|
|
692
|
-
messages.append({"role": "user", "content": prompt})
|
|
693
|
-
return llm_client.generate(
|
|
694
|
-
prompt=prompt,
|
|
695
|
-
messages=messages,
|
|
696
|
-
model=model,
|
|
697
|
-
timeout=timeout,
|
|
698
|
-
endpoint="chat",
|
|
699
|
-
)
|
|
700
|
-
|
|
701
|
-
# Fallback: use send with no_history to avoid persisting messages.
|
|
702
|
-
if hasattr(self.llm_client, "send"):
|
|
703
|
-
return self.llm_client.send(
|
|
704
|
-
prompt, timeout=timeout, no_history=True, system_prompt=system_prompt
|
|
705
|
-
)
|
|
706
|
-
|
|
707
|
-
raise ValueError("LLM client does not support generate or send APIs")
|
|
708
|
-
|
|
709
|
-
@staticmethod
|
|
710
|
-
def _build_directory_prompt(
|
|
711
|
-
base_path: Path,
|
|
712
|
-
existing_entries: List[str],
|
|
713
|
-
user_request: Optional[str],
|
|
714
|
-
rejection_reason: Optional[str],
|
|
715
|
-
) -> str:
|
|
716
|
-
"""Construct the LLM prompt for picking a safe project subdirectory."""
|
|
717
|
-
entries = sorted(existing_entries)
|
|
718
|
-
max_list = 50
|
|
719
|
-
if len(entries) > max_list:
|
|
720
|
-
entries_display = "\n".join(f"- {name}" for name in entries[:max_list])
|
|
721
|
-
entries_display += f"\n- ...and {len(entries) - max_list} more"
|
|
722
|
-
else:
|
|
723
|
-
entries_display = "\n".join(f"- {name}" for name in entries)
|
|
724
|
-
|
|
725
|
-
prompt_sections = [
|
|
726
|
-
"You must choose a new folder name for a project because the target path is not empty.",
|
|
727
|
-
f"Base path: {base_path}",
|
|
728
|
-
"Existing files and folders you MUST avoid (do not reuse any of these names):",
|
|
729
|
-
entries_display or "- <empty>",
|
|
730
|
-
"User request driving this project:",
|
|
731
|
-
user_request or "<no request provided>",
|
|
732
|
-
"Rules:",
|
|
733
|
-
"- Return a single folder name only. Do NOT echo the instructions. No paths, quotes, JSON, or extra text.",
|
|
734
|
-
"- Use lowercase kebab-case or snake_case; ASCII letters, numbers, hyphens, and underscores only.",
|
|
735
|
-
"- Do not use any existing names above. Avoid dots, spaces, or slashes.",
|
|
736
|
-
"- Keep it under 40 characters.",
|
|
737
|
-
]
|
|
738
|
-
|
|
739
|
-
if rejection_reason:
|
|
740
|
-
prompt_sections.append(
|
|
741
|
-
f"Previous suggestion was rejected: {rejection_reason}. Try a different unique name."
|
|
742
|
-
)
|
|
743
|
-
|
|
744
|
-
return "\n".join(prompt_sections)
|
|
745
|
-
|
|
746
|
-
def _format_validation_history(
|
|
747
|
-
self, validation_history: List[Any], latest_plan_logs: Optional[List[Any]]
|
|
748
|
-
) -> str:
|
|
749
|
-
"""Format validation logs, splitting latest plan from historical ones."""
|
|
750
|
-
|
|
751
|
-
if not validation_history:
|
|
752
|
-
return "No validation or test commands have been executed yet."
|
|
753
|
-
|
|
754
|
-
latest_logs = latest_plan_logs or []
|
|
755
|
-
latest_count = len(latest_logs)
|
|
756
|
-
historical_logs = (
|
|
757
|
-
validation_history[:-latest_count] if latest_count else validation_history
|
|
758
|
-
)
|
|
759
|
-
|
|
760
|
-
def normalize(entry: Any) -> Dict[str, Any]:
|
|
761
|
-
if hasattr(entry, "to_dict"):
|
|
762
|
-
return entry.to_dict()
|
|
763
|
-
if isinstance(entry, dict):
|
|
764
|
-
return entry
|
|
765
|
-
return {}
|
|
766
|
-
|
|
767
|
-
def render(entries: List[Any], limit: Optional[int] = None) -> List[str]:
|
|
768
|
-
if not entries:
|
|
769
|
-
return ["None"]
|
|
770
|
-
|
|
771
|
-
selected = entries if limit is None else entries[-limit:]
|
|
772
|
-
lines: List[str] = []
|
|
773
|
-
for entry in selected:
|
|
774
|
-
data = normalize(entry)
|
|
775
|
-
template = data.get("template", "unknown")
|
|
776
|
-
description = data.get("description", "")
|
|
777
|
-
success = data.get("success", True)
|
|
778
|
-
status = "PASS" if success else "FAIL"
|
|
779
|
-
error = data.get("error")
|
|
780
|
-
output = data.get("output", {})
|
|
781
|
-
|
|
782
|
-
lines.append(f"- [{status}] {template}: {description}")
|
|
783
|
-
if error:
|
|
784
|
-
lines.append(f" Error: {error}")
|
|
785
|
-
|
|
786
|
-
snippet = ""
|
|
787
|
-
if isinstance(output, dict):
|
|
788
|
-
for key in ("stdout", "stderr", "message", "log", "details"):
|
|
789
|
-
if output.get(key):
|
|
790
|
-
snippet = str(output[key])
|
|
791
|
-
break
|
|
792
|
-
if not snippet and output:
|
|
793
|
-
snippet = json.dumps(output)[:400]
|
|
794
|
-
elif output:
|
|
795
|
-
snippet = str(output)[:400]
|
|
796
|
-
|
|
797
|
-
snippet = snippet.strip()
|
|
798
|
-
if snippet:
|
|
799
|
-
lines.append(f" Output: {snippet[:400]}")
|
|
800
|
-
return lines
|
|
801
|
-
|
|
802
|
-
sections: List[str] = []
|
|
803
|
-
sections.append("### Latest Plan Results")
|
|
804
|
-
sections.extend(render(list(latest_logs)))
|
|
805
|
-
sections.append("")
|
|
806
|
-
sections.append("### Previous Plan History")
|
|
807
|
-
sections.extend(render(list(historical_logs), limit=5))
|
|
808
|
-
|
|
809
|
-
return "\n".join(sections).strip()
|
|
810
|
-
|
|
811
|
-
def _parse_checkpoint_response(self, response: Any) -> Dict[str, Any]:
|
|
812
|
-
"""Parse JSON output from the checkpoint reviewer."""
|
|
813
|
-
text = self._extract_response_text(response)
|
|
814
|
-
json_str = self._extract_json(text)
|
|
815
|
-
return json.loads(json_str)
|
|
816
|
-
|
|
817
|
-
@staticmethod
|
|
818
|
-
def _extract_response_text(response: Any) -> str:
|
|
819
|
-
"""Normalize SDK response objects to raw text."""
|
|
820
|
-
if isinstance(response, str):
|
|
821
|
-
return response
|
|
822
|
-
if hasattr(response, "text"):
|
|
823
|
-
return response.text
|
|
824
|
-
if hasattr(response, "content"):
|
|
825
|
-
return response.content
|
|
826
|
-
if isinstance(response, dict):
|
|
827
|
-
return response.get("text", response.get("content", str(response)))
|
|
828
|
-
return str(response)
|
|
829
|
-
|
|
830
|
-
@staticmethod
|
|
831
|
-
def _extract_json(text: str) -> str:
|
|
832
|
-
"""Extract JSON blob from arbitrary text (markdown-safe)."""
|
|
833
|
-
code_block = re.search(r"```(?:json)?\\s*\\n?(.*?)\\n?```", text, re.DOTALL)
|
|
834
|
-
if code_block:
|
|
835
|
-
return code_block.group(1).strip()
|
|
836
|
-
|
|
837
|
-
json_match = re.search(r"\\{.*\\}", text, re.DOTALL)
|
|
838
|
-
if json_match:
|
|
839
|
-
return json_match.group(0)
|
|
840
|
-
|
|
841
|
-
return text.strip()
|
|
1
|
+
# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
"""
|
|
4
|
+
Orchestrator for LLM-driven workflow execution.
|
|
5
|
+
|
|
6
|
+
The Orchestrator controls workflow execution using Checklist Mode:
|
|
7
|
+
- LLM generates a checklist of template invocations based on user request
|
|
8
|
+
- Executor runs templates deterministically with error recovery
|
|
9
|
+
- Provides semantic understanding (e.g., adds checkboxes for todos)
|
|
10
|
+
|
|
11
|
+
Features:
|
|
12
|
+
- LLM-driven checklist generation
|
|
13
|
+
- Deterministic template execution
|
|
14
|
+
- Error recovery with three-tier strategy
|
|
15
|
+
- Progress reporting
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import json
|
|
19
|
+
import logging
|
|
20
|
+
import os
|
|
21
|
+
import re
|
|
22
|
+
import subprocess
|
|
23
|
+
from dataclasses import dataclass, field
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
from typing import Any, Callable, Dict, List, Optional, Protocol
|
|
26
|
+
|
|
27
|
+
from gaia.agents.base.console import AgentConsole
|
|
28
|
+
|
|
29
|
+
from .steps.base import ToolExecutor, UserContext
|
|
30
|
+
from .steps.error_handler import ErrorHandler
|
|
31
|
+
|
|
32
|
+
logger = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ProjectDirectoryError(Exception):
|
|
36
|
+
"""Raised when the project directory cannot be prepared safely."""
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _estimate_token_count(text: str) -> int:
|
|
40
|
+
"""Lightweight token estimate assuming ~4 characters per token."""
|
|
41
|
+
avg_chars_per_token = 4
|
|
42
|
+
byte_length = len(text.encode("utf-8"))
|
|
43
|
+
return max(1, (byte_length + avg_chars_per_token - 1) // avg_chars_per_token)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class ChatSDK(Protocol):
|
|
47
|
+
"""Protocol for chat SDK interface used by checklist generator."""
|
|
48
|
+
|
|
49
|
+
def send(self, message: str, timeout: int = 600, no_history: bool = False) -> Any:
|
|
50
|
+
"""Send a message and get response."""
|
|
51
|
+
...
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class ExecutionResult:
|
|
56
|
+
"""Result of a complete workflow execution."""
|
|
57
|
+
|
|
58
|
+
success: bool
|
|
59
|
+
phases_completed: List[str] = field(default_factory=list)
|
|
60
|
+
phases_failed: List[str] = field(default_factory=list)
|
|
61
|
+
total_steps: int = 0
|
|
62
|
+
steps_succeeded: int = 0
|
|
63
|
+
steps_failed: int = 0
|
|
64
|
+
steps_skipped: int = 0
|
|
65
|
+
errors: List[str] = field(default_factory=list)
|
|
66
|
+
outputs: Dict[str, Any] = field(default_factory=dict)
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def summary(self) -> str:
|
|
70
|
+
"""Get a human-readable summary."""
|
|
71
|
+
status = "SUCCESS" if self.success else "FAILED"
|
|
72
|
+
return (
|
|
73
|
+
f"{status}: {self.steps_succeeded}/{self.total_steps} steps completed, "
|
|
74
|
+
f"{self.steps_failed} failed, {self.steps_skipped} skipped"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
CHECKPOINT_REVIEW_PROMPT = """You are the checkpoint reviewer for the GAIA web development agent.
|
|
79
|
+
|
|
80
|
+
You receive:
|
|
81
|
+
- The original user request
|
|
82
|
+
- A summary of the latest checklist execution (including errors/warnings)
|
|
83
|
+
- Logs from the validation and testing tools (run_typescript_check, validate_styles, run_tests, etc.)
|
|
84
|
+
- Any previously requested fixes that are still outstanding
|
|
85
|
+
|
|
86
|
+
Decide if the application is ready to ship or if additional fixes are required.
|
|
87
|
+
|
|
88
|
+
Rules:
|
|
89
|
+
1. If ANY validation or test log failed, status must be \"needs_fix\" with concrete guidance.
|
|
90
|
+
2. Only return \"complete\" when the app works end-to-end and validations passed.
|
|
91
|
+
3. When fixes are needed, suggest actionable steps that can be executed through `fix_code` (LLM-assisted repair of problematic files).
|
|
92
|
+
|
|
93
|
+
Respond with concise JSON only:
|
|
94
|
+
{
|
|
95
|
+
\"status\": \"complete\" | \"needs_fix\",
|
|
96
|
+
\"reasoning\": \"short justification\",
|
|
97
|
+
\"issues\": [\"list of concrete bugs or failures\"],
|
|
98
|
+
\"fix_instructions\": [\"ordered actions the next checklist should perform\"]
|
|
99
|
+
}
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
MAX_CHAT_HISTORY_TOKENS = 15000
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@dataclass
|
|
106
|
+
class CheckpointAssessment:
|
|
107
|
+
"""LLM-produced verdict about the current checkpoint."""
|
|
108
|
+
|
|
109
|
+
status: str
|
|
110
|
+
reasoning: str
|
|
111
|
+
issues: List[str] = field(default_factory=list)
|
|
112
|
+
fix_instructions: List[str] = field(default_factory=list)
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def needs_fix(self) -> bool:
|
|
116
|
+
"""Return True when the reviewer requires another checklist."""
|
|
117
|
+
return self.status.lower() != "complete"
|
|
118
|
+
|
|
119
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
120
|
+
"""Serialize the assessment."""
|
|
121
|
+
return {
|
|
122
|
+
"status": self.status,
|
|
123
|
+
"reasoning": self.reasoning,
|
|
124
|
+
"issues": self.issues,
|
|
125
|
+
"fix_instructions": self.fix_instructions,
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class Orchestrator:
|
|
130
|
+
"""Controls LLM-driven workflow execution with error recovery.
|
|
131
|
+
|
|
132
|
+
The orchestrator uses Checklist Mode exclusively:
|
|
133
|
+
- LLM analyzes user request and generates a checklist of templates
|
|
134
|
+
- Executor runs templates deterministically
|
|
135
|
+
- Provides semantic understanding (e.g., adds checkboxes for todos)
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
def __init__(
|
|
139
|
+
self,
|
|
140
|
+
tool_executor: ToolExecutor,
|
|
141
|
+
llm_client: ChatSDK,
|
|
142
|
+
llm_fixer: Optional[Callable[[str, str], Optional[str]]] = None,
|
|
143
|
+
progress_callback: Optional[Callable[[str, str, int, int], None]] = None,
|
|
144
|
+
console: Optional[AgentConsole] = None,
|
|
145
|
+
max_checklist_loops: int = 10,
|
|
146
|
+
):
|
|
147
|
+
"""Initialize orchestrator.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
tool_executor: Function to execute tools (name, args) -> result
|
|
151
|
+
llm_client: Chat SDK for checklist generation (required)
|
|
152
|
+
llm_fixer: Optional LLM-based code fixer for escalation
|
|
153
|
+
progress_callback: Optional callback(phase, step, current, total)
|
|
154
|
+
console: Optional console for displaying output
|
|
155
|
+
max_checklist_loops: Max number of checklist iterations before giving up
|
|
156
|
+
"""
|
|
157
|
+
if llm_client is None:
|
|
158
|
+
raise ValueError("llm_client is required for Orchestrator")
|
|
159
|
+
|
|
160
|
+
self.tool_executor = tool_executor
|
|
161
|
+
self.llm_client = llm_client
|
|
162
|
+
self.error_handler = ErrorHandler(
|
|
163
|
+
command_executor=self._run_command,
|
|
164
|
+
llm_fixer=llm_fixer,
|
|
165
|
+
)
|
|
166
|
+
self.progress_callback = progress_callback
|
|
167
|
+
self.console = console
|
|
168
|
+
self.max_checklist_loops = max(1, max_checklist_loops)
|
|
169
|
+
|
|
170
|
+
# Initialize checklist components
|
|
171
|
+
from .checklist_executor import ChecklistExecutor
|
|
172
|
+
from .checklist_generator import ChecklistGenerator
|
|
173
|
+
|
|
174
|
+
self.checklist_generator = ChecklistGenerator(llm_client)
|
|
175
|
+
self.checklist_executor = ChecklistExecutor(
|
|
176
|
+
tool_executor,
|
|
177
|
+
llm_client=llm_client, # Pass LLM for per-item code generation
|
|
178
|
+
error_handler=self.error_handler,
|
|
179
|
+
progress_callback=self._checklist_progress_callback,
|
|
180
|
+
console=console, # Pass console
|
|
181
|
+
)
|
|
182
|
+
logger.debug(
|
|
183
|
+
"Orchestrator initialized - LLM will plan execution AND generate code per item"
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
def execute(
|
|
187
|
+
self, context: UserContext, step_through: bool = False
|
|
188
|
+
) -> ExecutionResult:
|
|
189
|
+
"""Execute the workflow using iterative LLM-generated checklists."""
|
|
190
|
+
logger.debug("Executing workflow (LLM-driven checklist loop)")
|
|
191
|
+
|
|
192
|
+
from .project_analyzer import ProjectAnalyzer
|
|
193
|
+
|
|
194
|
+
analyzer = ProjectAnalyzer()
|
|
195
|
+
aggregated_validation_logs: List[Any] = []
|
|
196
|
+
fix_feedback: List[str] = []
|
|
197
|
+
iteration_outputs: List[Dict[str, Any]] = []
|
|
198
|
+
combined_errors: List[str] = []
|
|
199
|
+
previous_execution_errors: List[str] = []
|
|
200
|
+
previous_validation_logs: List[Any] = []
|
|
201
|
+
|
|
202
|
+
total_steps = 0
|
|
203
|
+
steps_succeeded = 0
|
|
204
|
+
steps_failed = 0
|
|
205
|
+
success = False
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
context.project_dir = self._prepare_project_directory(context)
|
|
209
|
+
except ProjectDirectoryError as exc:
|
|
210
|
+
error_message = str(exc)
|
|
211
|
+
logger.error(error_message)
|
|
212
|
+
if self.console:
|
|
213
|
+
self.console.print_error(error_message)
|
|
214
|
+
return ExecutionResult(
|
|
215
|
+
success=False,
|
|
216
|
+
phases_completed=[],
|
|
217
|
+
phases_failed=["project_directory"],
|
|
218
|
+
total_steps=1,
|
|
219
|
+
steps_succeeded=0,
|
|
220
|
+
steps_failed=1,
|
|
221
|
+
steps_skipped=0,
|
|
222
|
+
errors=[error_message],
|
|
223
|
+
outputs={
|
|
224
|
+
"iterations": [],
|
|
225
|
+
"validation_logs": [],
|
|
226
|
+
"fix_feedback": [],
|
|
227
|
+
"project_dir": context.project_dir,
|
|
228
|
+
},
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
for iteration in range(1, self.max_checklist_loops + 1):
|
|
232
|
+
logger.debug("Starting checklist iteration %d", iteration)
|
|
233
|
+
|
|
234
|
+
if iteration > 1:
|
|
235
|
+
summary_result = self._maybe_summarize_conversation_history()
|
|
236
|
+
if summary_result and self.console:
|
|
237
|
+
self.console.print_info(
|
|
238
|
+
"Conversation history summarized to stay within token limits."
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
project_state = analyzer.analyze(context.project_dir)
|
|
242
|
+
|
|
243
|
+
# Surface accumulated signals to the next checklist prompt
|
|
244
|
+
context.validation_reports = [
|
|
245
|
+
log.to_dict() for log in aggregated_validation_logs
|
|
246
|
+
]
|
|
247
|
+
context.fix_feedback = fix_feedback.copy()
|
|
248
|
+
|
|
249
|
+
logger.info(
|
|
250
|
+
"Generating checklist iteration %d of %d",
|
|
251
|
+
iteration,
|
|
252
|
+
self.max_checklist_loops,
|
|
253
|
+
)
|
|
254
|
+
if self.console:
|
|
255
|
+
self.console.print_info(
|
|
256
|
+
f"Generating checklist iteration {iteration} of {self.max_checklist_loops}"
|
|
257
|
+
)
|
|
258
|
+
if iteration == 1:
|
|
259
|
+
checklist = self.checklist_generator.generate_initial_checklist(
|
|
260
|
+
context, project_state
|
|
261
|
+
)
|
|
262
|
+
else:
|
|
263
|
+
checklist = self.checklist_generator.generate_debug_checklist(
|
|
264
|
+
context=context,
|
|
265
|
+
project_state=project_state,
|
|
266
|
+
prior_errors=previous_execution_errors,
|
|
267
|
+
validation_logs=previous_validation_logs,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
if not checklist.is_valid:
|
|
271
|
+
logger.error(
|
|
272
|
+
"Invalid checklist (iteration %d): %s",
|
|
273
|
+
iteration,
|
|
274
|
+
checklist.validation_errors,
|
|
275
|
+
)
|
|
276
|
+
try:
|
|
277
|
+
checklist_dump = json.dumps(checklist.to_dict(), indent=2)
|
|
278
|
+
except Exception: # pylint: disable=broad-exception-caught
|
|
279
|
+
checklist_dump = str(checklist)
|
|
280
|
+
logger.error("Invalid checklist payload: %s", checklist_dump)
|
|
281
|
+
if self.console:
|
|
282
|
+
self.console.pretty_print_json(
|
|
283
|
+
checklist.to_dict(), title="Invalid Checklist"
|
|
284
|
+
)
|
|
285
|
+
combined_errors.extend(checklist.validation_errors)
|
|
286
|
+
assessment = CheckpointAssessment(
|
|
287
|
+
status="needs_fix",
|
|
288
|
+
reasoning="Checklist validation failed",
|
|
289
|
+
issues=checklist.validation_errors.copy(),
|
|
290
|
+
fix_instructions=checklist.validation_errors.copy(),
|
|
291
|
+
)
|
|
292
|
+
iteration_outputs.append(
|
|
293
|
+
{
|
|
294
|
+
"iteration": iteration,
|
|
295
|
+
"checklist": checklist.to_dict(),
|
|
296
|
+
"execution": None,
|
|
297
|
+
"assessment": assessment.to_dict(),
|
|
298
|
+
}
|
|
299
|
+
)
|
|
300
|
+
break
|
|
301
|
+
|
|
302
|
+
logger.debug(
|
|
303
|
+
"Generated checklist with %d items: %s",
|
|
304
|
+
len(checklist.items),
|
|
305
|
+
checklist.reasoning,
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
checklist_result = self.checklist_executor.execute(
|
|
309
|
+
checklist, context, step_through=step_through
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
total_steps += len(checklist_result.item_results)
|
|
313
|
+
steps_succeeded += checklist_result.items_succeeded
|
|
314
|
+
steps_failed += checklist_result.items_failed
|
|
315
|
+
combined_errors.extend(checklist_result.errors)
|
|
316
|
+
|
|
317
|
+
aggregated_validation_logs.extend(checklist_result.validation_logs)
|
|
318
|
+
previous_execution_errors = checklist_result.errors.copy()
|
|
319
|
+
previous_validation_logs = checklist_result.validation_logs.copy()
|
|
320
|
+
|
|
321
|
+
logger.info("Assessing application state after iteration %d", iteration)
|
|
322
|
+
if self.console:
|
|
323
|
+
self.console.print_info(
|
|
324
|
+
f"Assessing application state after iteration {iteration}"
|
|
325
|
+
)
|
|
326
|
+
assessment = self._assess_checkpoint(
|
|
327
|
+
context=context,
|
|
328
|
+
checklist=checklist,
|
|
329
|
+
execution_result=checklist_result,
|
|
330
|
+
validation_history=aggregated_validation_logs,
|
|
331
|
+
)
|
|
332
|
+
if assessment.needs_fix:
|
|
333
|
+
logger.info(
|
|
334
|
+
"Application not ready after iteration %d, planning another checklist: %s",
|
|
335
|
+
iteration,
|
|
336
|
+
assessment.reasoning or "no reasoning provided",
|
|
337
|
+
)
|
|
338
|
+
if self.console:
|
|
339
|
+
self.console.print_info(
|
|
340
|
+
"Application not ready; preparing another checklist."
|
|
341
|
+
)
|
|
342
|
+
else:
|
|
343
|
+
logger.info(
|
|
344
|
+
"Application marked complete after iteration %d: %s",
|
|
345
|
+
iteration,
|
|
346
|
+
assessment.reasoning or "no reasoning provided",
|
|
347
|
+
)
|
|
348
|
+
if self.console:
|
|
349
|
+
self.console.print_success("Application marked complete.")
|
|
350
|
+
|
|
351
|
+
iteration_outputs.append(
|
|
352
|
+
{
|
|
353
|
+
"iteration": iteration,
|
|
354
|
+
"checklist": checklist.to_dict(),
|
|
355
|
+
"execution": {
|
|
356
|
+
"summary": checklist_result.summary,
|
|
357
|
+
"success": checklist_result.success,
|
|
358
|
+
"files": checklist_result.total_files,
|
|
359
|
+
"errors": checklist_result.errors,
|
|
360
|
+
"warnings": checklist_result.warnings,
|
|
361
|
+
"item_results": [
|
|
362
|
+
r.to_dict() for r in checklist_result.item_results
|
|
363
|
+
],
|
|
364
|
+
"validation_logs": [
|
|
365
|
+
log.to_dict() for log in checklist_result.validation_logs
|
|
366
|
+
],
|
|
367
|
+
},
|
|
368
|
+
"assessment": assessment.to_dict(),
|
|
369
|
+
}
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
if not assessment.needs_fix:
|
|
373
|
+
success = (
|
|
374
|
+
checklist_result.success and assessment.status.lower() == "complete"
|
|
375
|
+
)
|
|
376
|
+
break
|
|
377
|
+
|
|
378
|
+
instructions = assessment.fix_instructions or assessment.issues
|
|
379
|
+
if not instructions and assessment.reasoning:
|
|
380
|
+
instructions = [assessment.reasoning]
|
|
381
|
+
if instructions:
|
|
382
|
+
fix_feedback.extend(instructions)
|
|
383
|
+
|
|
384
|
+
else:
|
|
385
|
+
combined_errors.append(
|
|
386
|
+
f"Reached maximum checklist iterations ({self.max_checklist_loops}) without passing validation"
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
latest_execution = None
|
|
390
|
+
latest_checklist = None
|
|
391
|
+
if iteration_outputs:
|
|
392
|
+
latest_entry = iteration_outputs[-1]
|
|
393
|
+
latest_execution = latest_entry.get("execution")
|
|
394
|
+
latest_checklist = latest_entry.get("checklist")
|
|
395
|
+
|
|
396
|
+
outputs = {
|
|
397
|
+
"iterations": iteration_outputs,
|
|
398
|
+
"validation_logs": [log.to_dict() for log in aggregated_validation_logs],
|
|
399
|
+
"fix_feedback": fix_feedback,
|
|
400
|
+
"project_dir": context.project_dir,
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
if latest_execution:
|
|
404
|
+
outputs["files"] = latest_execution.get("files", [])
|
|
405
|
+
outputs["detailed_results"] = latest_execution.get("item_results", [])
|
|
406
|
+
if latest_checklist:
|
|
407
|
+
outputs["checklist"] = latest_checklist
|
|
408
|
+
|
|
409
|
+
return ExecutionResult(
|
|
410
|
+
success=success,
|
|
411
|
+
phases_completed=["checklist"] if success else [],
|
|
412
|
+
phases_failed=[] if success else ["checklist"],
|
|
413
|
+
total_steps=total_steps,
|
|
414
|
+
steps_succeeded=steps_succeeded,
|
|
415
|
+
steps_failed=steps_failed,
|
|
416
|
+
steps_skipped=0,
|
|
417
|
+
errors=combined_errors,
|
|
418
|
+
outputs=outputs,
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
def _run_command(self, command: str, cwd: Optional[str] = None) -> tuple[int, str]:
|
|
422
|
+
"""Run a shell command.
|
|
423
|
+
|
|
424
|
+
Args:
|
|
425
|
+
command: Command to run
|
|
426
|
+
cwd: Working directory
|
|
427
|
+
|
|
428
|
+
Returns:
|
|
429
|
+
Tuple of (exit_code, output)
|
|
430
|
+
"""
|
|
431
|
+
try:
|
|
432
|
+
result = subprocess.run(
|
|
433
|
+
command,
|
|
434
|
+
shell=True,
|
|
435
|
+
cwd=cwd,
|
|
436
|
+
capture_output=True,
|
|
437
|
+
text=True,
|
|
438
|
+
timeout=1200,
|
|
439
|
+
check=False, # We handle return codes ourselves
|
|
440
|
+
)
|
|
441
|
+
output = result.stdout + result.stderr
|
|
442
|
+
return result.returncode, output
|
|
443
|
+
except subprocess.TimeoutExpired:
|
|
444
|
+
return 1, "Command timed out"
|
|
445
|
+
except Exception as e:
|
|
446
|
+
return 1, str(e)
|
|
447
|
+
|
|
448
|
+
def _checklist_progress_callback(
|
|
449
|
+
self, description: str, current: int, total: int
|
|
450
|
+
) -> None:
|
|
451
|
+
"""Progress callback adapter for checklist execution.
|
|
452
|
+
|
|
453
|
+
Converts checklist progress format to the standard progress format.
|
|
454
|
+
|
|
455
|
+
Args:
|
|
456
|
+
description: Current item description
|
|
457
|
+
current: Current item number
|
|
458
|
+
total: Total items
|
|
459
|
+
"""
|
|
460
|
+
if self.progress_callback:
|
|
461
|
+
self.progress_callback("checklist", description, current, total)
|
|
462
|
+
|
|
463
|
+
def _assess_checkpoint(
|
|
464
|
+
self,
|
|
465
|
+
context: UserContext,
|
|
466
|
+
checklist: Any,
|
|
467
|
+
execution_result: Any,
|
|
468
|
+
validation_history: List[Any],
|
|
469
|
+
) -> CheckpointAssessment:
|
|
470
|
+
"""Ask the LLM whether the workflow is complete or needs another checklist."""
|
|
471
|
+
prompt = self._build_checkpoint_prompt(
|
|
472
|
+
context=context,
|
|
473
|
+
checklist=checklist,
|
|
474
|
+
execution_result=execution_result,
|
|
475
|
+
validation_history=validation_history,
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
try:
|
|
479
|
+
response = self.llm_client.send(prompt, timeout=1200)
|
|
480
|
+
data = self._parse_checkpoint_response(response)
|
|
481
|
+
return CheckpointAssessment(
|
|
482
|
+
status=data.get("status", "needs_fix"),
|
|
483
|
+
reasoning=data.get("reasoning", ""),
|
|
484
|
+
issues=data.get("issues", []),
|
|
485
|
+
fix_instructions=data.get("fix_instructions", []),
|
|
486
|
+
)
|
|
487
|
+
except Exception as exc: # pylint: disable=broad-exception-caught
|
|
488
|
+
logger.exception("Checkpoint assessment failed")
|
|
489
|
+
return CheckpointAssessment(
|
|
490
|
+
status="needs_fix",
|
|
491
|
+
reasoning="Failed to interpret checkpoint reviewer output",
|
|
492
|
+
issues=[f"Checkpoint reviewer error: {exc}"],
|
|
493
|
+
fix_instructions=[
|
|
494
|
+
"Inspect validation logs, then fix the root cause using fix_code."
|
|
495
|
+
],
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
def _build_checkpoint_prompt(
|
|
499
|
+
self,
|
|
500
|
+
context: UserContext,
|
|
501
|
+
checklist: Any,
|
|
502
|
+
execution_result: Any,
|
|
503
|
+
validation_history: List[Any],
|
|
504
|
+
) -> str:
|
|
505
|
+
"""Build the prompt for the checkpoint reviewer."""
|
|
506
|
+
validation_summary = self._format_validation_history(
|
|
507
|
+
validation_history, getattr(execution_result, "validation_logs", None)
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
outstanding = (
|
|
511
|
+
"\n".join(f"- {item}" for item in context.fix_feedback)
|
|
512
|
+
if context.fix_feedback
|
|
513
|
+
else "None"
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
errors = execution_result.errors or ["None"]
|
|
517
|
+
warnings = execution_result.warnings or []
|
|
518
|
+
|
|
519
|
+
sections = [
|
|
520
|
+
CHECKPOINT_REVIEW_PROMPT.strip(),
|
|
521
|
+
"",
|
|
522
|
+
"## User Request",
|
|
523
|
+
context.user_request,
|
|
524
|
+
"",
|
|
525
|
+
"## Latest Checklist Plan",
|
|
526
|
+
f"Reasoning: {checklist.reasoning}",
|
|
527
|
+
"",
|
|
528
|
+
"## Execution Summary",
|
|
529
|
+
execution_result.summary,
|
|
530
|
+
"",
|
|
531
|
+
"## Execution Errors",
|
|
532
|
+
"\n".join(f"- {err}" for err in errors),
|
|
533
|
+
"",
|
|
534
|
+
"## Execution Warnings",
|
|
535
|
+
"\n".join(f"- {warn}" for warn in warnings) if warnings else "None",
|
|
536
|
+
"",
|
|
537
|
+
"## Validation & Test Logs",
|
|
538
|
+
validation_summary,
|
|
539
|
+
"",
|
|
540
|
+
"## Outstanding Fix Requests",
|
|
541
|
+
outstanding,
|
|
542
|
+
]
|
|
543
|
+
|
|
544
|
+
return "\n".join(sections)
|
|
545
|
+
|
|
546
|
+
def _maybe_summarize_conversation_history(self) -> Optional[str]:
|
|
547
|
+
"""Trigger ChatSDK conversation summarization when available."""
|
|
548
|
+
chat_sdk = getattr(self, "llm_client", None)
|
|
549
|
+
if not chat_sdk or not hasattr(chat_sdk, "summarize_conversation_history"):
|
|
550
|
+
return None
|
|
551
|
+
|
|
552
|
+
try:
|
|
553
|
+
summary = chat_sdk.summarize_conversation_history(
|
|
554
|
+
max_history_tokens=MAX_CHAT_HISTORY_TOKENS
|
|
555
|
+
)
|
|
556
|
+
if summary:
|
|
557
|
+
logger.info(
|
|
558
|
+
"Conversation history summarized to ~%d tokens",
|
|
559
|
+
_estimate_token_count(summary),
|
|
560
|
+
)
|
|
561
|
+
return summary
|
|
562
|
+
except Exception as exc: # pylint: disable=broad-exception-caught
|
|
563
|
+
logger.exception("Failed to summarize conversation history: %s", exc)
|
|
564
|
+
return None
|
|
565
|
+
|
|
566
|
+
def _prepare_project_directory(self, context: UserContext) -> str:
|
|
567
|
+
"""
|
|
568
|
+
Ensure the project directory is ready for creation workflows.
|
|
569
|
+
|
|
570
|
+
If the provided path exists and is non-empty without an existing project,
|
|
571
|
+
pick a unique subdirectory via the LLM to avoid create-next-app failures.
|
|
572
|
+
"""
|
|
573
|
+
base_path = Path(context.project_dir).expanduser()
|
|
574
|
+
if base_path.exists() and not base_path.is_dir():
|
|
575
|
+
raise ProjectDirectoryError(
|
|
576
|
+
f"Provided path is not a directory: {base_path}"
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
if not base_path.exists():
|
|
580
|
+
base_path.mkdir(parents=True, exist_ok=True)
|
|
581
|
+
logger.info("Created project directory: %s", base_path)
|
|
582
|
+
return str(base_path)
|
|
583
|
+
|
|
584
|
+
existing_entries = [p.name for p in base_path.iterdir()]
|
|
585
|
+
if not existing_entries:
|
|
586
|
+
return str(base_path)
|
|
587
|
+
|
|
588
|
+
if self.console:
|
|
589
|
+
self.console.print_warning(
|
|
590
|
+
f"Target directory {base_path} is not empty; selecting a new subdirectory."
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
suggested = self._choose_subdirectory_name(
|
|
594
|
+
base_path, existing_entries, context.user_request
|
|
595
|
+
)
|
|
596
|
+
if not suggested:
|
|
597
|
+
raise ProjectDirectoryError(
|
|
598
|
+
f"Unable to find an available project name under {base_path}. "
|
|
599
|
+
"Provide one explicitly with --path."
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
new_dir = base_path / suggested
|
|
603
|
+
new_dir.mkdir(parents=False, exist_ok=False)
|
|
604
|
+
logger.info("Using nested project directory: %s", new_dir)
|
|
605
|
+
# Align process cwd with the newly created project directory.
|
|
606
|
+
try:
|
|
607
|
+
os.chdir(new_dir)
|
|
608
|
+
except OSError as exc:
|
|
609
|
+
logger.warning("Failed to chdir to %s: %s", new_dir, exc)
|
|
610
|
+
if self.console:
|
|
611
|
+
self.console.print_info(f"Using project directory: {new_dir}")
|
|
612
|
+
return str(new_dir)
|
|
613
|
+
|
|
614
|
+
def _choose_subdirectory_name(
|
|
615
|
+
self, base_path: Path, existing_entries: List[str], user_request: str
|
|
616
|
+
) -> Optional[str]:
|
|
617
|
+
"""Ask the LLM for a unique subdirectory name, retrying on conflicts."""
|
|
618
|
+
existing_lower = {name.lower() for name in existing_entries}
|
|
619
|
+
prompt = self._build_directory_prompt(
|
|
620
|
+
base_path, existing_entries, user_request, None
|
|
621
|
+
)
|
|
622
|
+
last_reason = None
|
|
623
|
+
|
|
624
|
+
system_prompt = "You suggest concise folder names for new projects."
|
|
625
|
+
|
|
626
|
+
for attempt in range(1, 4):
|
|
627
|
+
try:
|
|
628
|
+
response = self._send_prompt_without_history(
|
|
629
|
+
prompt, timeout=120, system_prompt=system_prompt
|
|
630
|
+
)
|
|
631
|
+
except Exception as exc: # pylint: disable=broad-exception-caught
|
|
632
|
+
last_reason = f"LLM error on attempt {attempt}: {exc}"
|
|
633
|
+
logger.warning(last_reason)
|
|
634
|
+
prompt = self._build_directory_prompt(
|
|
635
|
+
base_path, existing_entries, user_request, last_reason
|
|
636
|
+
)
|
|
637
|
+
continue
|
|
638
|
+
|
|
639
|
+
raw_response = self._extract_response_text(response)
|
|
640
|
+
candidate = self._sanitize_directory_name(raw_response)
|
|
641
|
+
if not candidate:
|
|
642
|
+
last_reason = "LLM returned an empty or invalid directory name."
|
|
643
|
+
elif candidate.lower() in existing_lower:
|
|
644
|
+
last_reason = f"Name '{candidate}' already exists in {base_path}."
|
|
645
|
+
elif "/" in candidate or "\\" in candidate or ".." in candidate:
|
|
646
|
+
last_reason = "Directory name contained path separators or traversal."
|
|
647
|
+
elif len(candidate) > 64:
|
|
648
|
+
last_reason = "Directory name exceeded 64 characters."
|
|
649
|
+
else:
|
|
650
|
+
candidate_path = base_path / candidate
|
|
651
|
+
if candidate_path.exists():
|
|
652
|
+
last_reason = f"Directory '{candidate}' already exists."
|
|
653
|
+
else:
|
|
654
|
+
return candidate
|
|
655
|
+
|
|
656
|
+
logger.warning(
|
|
657
|
+
"Directory name attempt %d rejected: %s", attempt, last_reason
|
|
658
|
+
)
|
|
659
|
+
prompt = self._build_directory_prompt(
|
|
660
|
+
base_path, existing_entries, user_request, last_reason
|
|
661
|
+
)
|
|
662
|
+
|
|
663
|
+
return None
|
|
664
|
+
|
|
665
|
+
@staticmethod
|
|
666
|
+
def _sanitize_directory_name(raw: str) -> str:
|
|
667
|
+
"""Normalize LLM output to a filesystem-safe directory name."""
|
|
668
|
+
if not raw:
|
|
669
|
+
return ""
|
|
670
|
+
candidate = raw.strip().strip("`'\"")
|
|
671
|
+
candidate = candidate.splitlines()[0].strip()
|
|
672
|
+
candidate = re.sub(r"[^A-Za-z0-9_-]+", "-", candidate)
|
|
673
|
+
return candidate.strip("-_").lower()
|
|
674
|
+
|
|
675
|
+
def _send_prompt_without_history(
|
|
676
|
+
self, prompt: str, timeout: int = 120, system_prompt: Optional[str] = None
|
|
677
|
+
) -> Any:
|
|
678
|
+
"""
|
|
679
|
+
Send a prompt without reading from or writing to chat history.
|
|
680
|
+
|
|
681
|
+
Prefers the underlying LLM client's `generate` API when available,
|
|
682
|
+
falling back to `send(..., no_history=True)` for compatibility.
|
|
683
|
+
"""
|
|
684
|
+
# If the ChatSDK exposes the underlying LLM client, use it directly with chat messages
|
|
685
|
+
# to avoid any stored history and ensure system prompts are applied cleanly.
|
|
686
|
+
llm_client = getattr(self.llm_client, "llm_client", None)
|
|
687
|
+
if llm_client and hasattr(llm_client, "generate"):
|
|
688
|
+
model = getattr(getattr(self.llm_client, "config", None), "model", None)
|
|
689
|
+
messages = []
|
|
690
|
+
if system_prompt:
|
|
691
|
+
messages.append({"role": "system", "content": system_prompt})
|
|
692
|
+
messages.append({"role": "user", "content": prompt})
|
|
693
|
+
return llm_client.generate(
|
|
694
|
+
prompt=prompt,
|
|
695
|
+
messages=messages,
|
|
696
|
+
model=model,
|
|
697
|
+
timeout=timeout,
|
|
698
|
+
endpoint="chat",
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
# Fallback: use send with no_history to avoid persisting messages.
|
|
702
|
+
if hasattr(self.llm_client, "send"):
|
|
703
|
+
return self.llm_client.send(
|
|
704
|
+
prompt, timeout=timeout, no_history=True, system_prompt=system_prompt
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
raise ValueError("LLM client does not support generate or send APIs")
|
|
708
|
+
|
|
709
|
+
@staticmethod
|
|
710
|
+
def _build_directory_prompt(
|
|
711
|
+
base_path: Path,
|
|
712
|
+
existing_entries: List[str],
|
|
713
|
+
user_request: Optional[str],
|
|
714
|
+
rejection_reason: Optional[str],
|
|
715
|
+
) -> str:
|
|
716
|
+
"""Construct the LLM prompt for picking a safe project subdirectory."""
|
|
717
|
+
entries = sorted(existing_entries)
|
|
718
|
+
max_list = 50
|
|
719
|
+
if len(entries) > max_list:
|
|
720
|
+
entries_display = "\n".join(f"- {name}" for name in entries[:max_list])
|
|
721
|
+
entries_display += f"\n- ...and {len(entries) - max_list} more"
|
|
722
|
+
else:
|
|
723
|
+
entries_display = "\n".join(f"- {name}" for name in entries)
|
|
724
|
+
|
|
725
|
+
prompt_sections = [
|
|
726
|
+
"You must choose a new folder name for a project because the target path is not empty.",
|
|
727
|
+
f"Base path: {base_path}",
|
|
728
|
+
"Existing files and folders you MUST avoid (do not reuse any of these names):",
|
|
729
|
+
entries_display or "- <empty>",
|
|
730
|
+
"User request driving this project:",
|
|
731
|
+
user_request or "<no request provided>",
|
|
732
|
+
"Rules:",
|
|
733
|
+
"- Return a single folder name only. Do NOT echo the instructions. No paths, quotes, JSON, or extra text.",
|
|
734
|
+
"- Use lowercase kebab-case or snake_case; ASCII letters, numbers, hyphens, and underscores only.",
|
|
735
|
+
"- Do not use any existing names above. Avoid dots, spaces, or slashes.",
|
|
736
|
+
"- Keep it under 40 characters.",
|
|
737
|
+
]
|
|
738
|
+
|
|
739
|
+
if rejection_reason:
|
|
740
|
+
prompt_sections.append(
|
|
741
|
+
f"Previous suggestion was rejected: {rejection_reason}. Try a different unique name."
|
|
742
|
+
)
|
|
743
|
+
|
|
744
|
+
return "\n".join(prompt_sections)
|
|
745
|
+
|
|
746
|
+
def _format_validation_history(
|
|
747
|
+
self, validation_history: List[Any], latest_plan_logs: Optional[List[Any]]
|
|
748
|
+
) -> str:
|
|
749
|
+
"""Format validation logs, splitting latest plan from historical ones."""
|
|
750
|
+
|
|
751
|
+
if not validation_history:
|
|
752
|
+
return "No validation or test commands have been executed yet."
|
|
753
|
+
|
|
754
|
+
latest_logs = latest_plan_logs or []
|
|
755
|
+
latest_count = len(latest_logs)
|
|
756
|
+
historical_logs = (
|
|
757
|
+
validation_history[:-latest_count] if latest_count else validation_history
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
def normalize(entry: Any) -> Dict[str, Any]:
|
|
761
|
+
if hasattr(entry, "to_dict"):
|
|
762
|
+
return entry.to_dict()
|
|
763
|
+
if isinstance(entry, dict):
|
|
764
|
+
return entry
|
|
765
|
+
return {}
|
|
766
|
+
|
|
767
|
+
def render(entries: List[Any], limit: Optional[int] = None) -> List[str]:
|
|
768
|
+
if not entries:
|
|
769
|
+
return ["None"]
|
|
770
|
+
|
|
771
|
+
selected = entries if limit is None else entries[-limit:]
|
|
772
|
+
lines: List[str] = []
|
|
773
|
+
for entry in selected:
|
|
774
|
+
data = normalize(entry)
|
|
775
|
+
template = data.get("template", "unknown")
|
|
776
|
+
description = data.get("description", "")
|
|
777
|
+
success = data.get("success", True)
|
|
778
|
+
status = "PASS" if success else "FAIL"
|
|
779
|
+
error = data.get("error")
|
|
780
|
+
output = data.get("output", {})
|
|
781
|
+
|
|
782
|
+
lines.append(f"- [{status}] {template}: {description}")
|
|
783
|
+
if error:
|
|
784
|
+
lines.append(f" Error: {error}")
|
|
785
|
+
|
|
786
|
+
snippet = ""
|
|
787
|
+
if isinstance(output, dict):
|
|
788
|
+
for key in ("stdout", "stderr", "message", "log", "details"):
|
|
789
|
+
if output.get(key):
|
|
790
|
+
snippet = str(output[key])
|
|
791
|
+
break
|
|
792
|
+
if not snippet and output:
|
|
793
|
+
snippet = json.dumps(output)[:400]
|
|
794
|
+
elif output:
|
|
795
|
+
snippet = str(output)[:400]
|
|
796
|
+
|
|
797
|
+
snippet = snippet.strip()
|
|
798
|
+
if snippet:
|
|
799
|
+
lines.append(f" Output: {snippet[:400]}")
|
|
800
|
+
return lines
|
|
801
|
+
|
|
802
|
+
sections: List[str] = []
|
|
803
|
+
sections.append("### Latest Plan Results")
|
|
804
|
+
sections.extend(render(list(latest_logs)))
|
|
805
|
+
sections.append("")
|
|
806
|
+
sections.append("### Previous Plan History")
|
|
807
|
+
sections.extend(render(list(historical_logs), limit=5))
|
|
808
|
+
|
|
809
|
+
return "\n".join(sections).strip()
|
|
810
|
+
|
|
811
|
+
def _parse_checkpoint_response(self, response: Any) -> Dict[str, Any]:
|
|
812
|
+
"""Parse JSON output from the checkpoint reviewer."""
|
|
813
|
+
text = self._extract_response_text(response)
|
|
814
|
+
json_str = self._extract_json(text)
|
|
815
|
+
return json.loads(json_str)
|
|
816
|
+
|
|
817
|
+
@staticmethod
|
|
818
|
+
def _extract_response_text(response: Any) -> str:
|
|
819
|
+
"""Normalize SDK response objects to raw text."""
|
|
820
|
+
if isinstance(response, str):
|
|
821
|
+
return response
|
|
822
|
+
if hasattr(response, "text"):
|
|
823
|
+
return response.text
|
|
824
|
+
if hasattr(response, "content"):
|
|
825
|
+
return response.content
|
|
826
|
+
if isinstance(response, dict):
|
|
827
|
+
return response.get("text", response.get("content", str(response)))
|
|
828
|
+
return str(response)
|
|
829
|
+
|
|
830
|
+
@staticmethod
|
|
831
|
+
def _extract_json(text: str) -> str:
|
|
832
|
+
"""Extract JSON blob from arbitrary text (markdown-safe)."""
|
|
833
|
+
code_block = re.search(r"```(?:json)?\\s*\\n?(.*?)\\n?```", text, re.DOTALL)
|
|
834
|
+
if code_block:
|
|
835
|
+
return code_block.group(1).strip()
|
|
836
|
+
|
|
837
|
+
json_match = re.search(r"\\{.*\\}", text, re.DOTALL)
|
|
838
|
+
if json_match:
|
|
839
|
+
return json_match.group(0)
|
|
840
|
+
|
|
841
|
+
return text.strip()
|