amd-gaia 0.14.3__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/METADATA +223 -223
  2. amd_gaia-0.15.1.dist-info/RECORD +178 -0
  3. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/entry_points.txt +1 -0
  4. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/licenses/LICENSE.md +20 -20
  5. gaia/__init__.py +29 -29
  6. gaia/agents/__init__.py +19 -19
  7. gaia/agents/base/__init__.py +9 -9
  8. gaia/agents/base/agent.py +2177 -2177
  9. gaia/agents/base/api_agent.py +120 -120
  10. gaia/agents/base/console.py +1841 -1841
  11. gaia/agents/base/errors.py +237 -237
  12. gaia/agents/base/mcp_agent.py +86 -86
  13. gaia/agents/base/tools.py +83 -83
  14. gaia/agents/blender/agent.py +556 -556
  15. gaia/agents/blender/agent_simple.py +133 -135
  16. gaia/agents/blender/app.py +211 -211
  17. gaia/agents/blender/app_simple.py +41 -41
  18. gaia/agents/blender/core/__init__.py +16 -16
  19. gaia/agents/blender/core/materials.py +506 -506
  20. gaia/agents/blender/core/objects.py +316 -316
  21. gaia/agents/blender/core/rendering.py +225 -225
  22. gaia/agents/blender/core/scene.py +220 -220
  23. gaia/agents/blender/core/view.py +146 -146
  24. gaia/agents/chat/__init__.py +9 -9
  25. gaia/agents/chat/agent.py +835 -835
  26. gaia/agents/chat/app.py +1058 -1058
  27. gaia/agents/chat/session.py +508 -508
  28. gaia/agents/chat/tools/__init__.py +15 -15
  29. gaia/agents/chat/tools/file_tools.py +96 -96
  30. gaia/agents/chat/tools/rag_tools.py +1729 -1729
  31. gaia/agents/chat/tools/shell_tools.py +436 -436
  32. gaia/agents/code/__init__.py +7 -7
  33. gaia/agents/code/agent.py +549 -549
  34. gaia/agents/code/cli.py +377 -0
  35. gaia/agents/code/models.py +135 -135
  36. gaia/agents/code/orchestration/__init__.py +24 -24
  37. gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
  38. gaia/agents/code/orchestration/checklist_generator.py +713 -713
  39. gaia/agents/code/orchestration/factories/__init__.py +9 -9
  40. gaia/agents/code/orchestration/factories/base.py +63 -63
  41. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
  42. gaia/agents/code/orchestration/factories/python_factory.py +106 -106
  43. gaia/agents/code/orchestration/orchestrator.py +841 -841
  44. gaia/agents/code/orchestration/project_analyzer.py +391 -391
  45. gaia/agents/code/orchestration/steps/__init__.py +67 -67
  46. gaia/agents/code/orchestration/steps/base.py +188 -188
  47. gaia/agents/code/orchestration/steps/error_handler.py +314 -314
  48. gaia/agents/code/orchestration/steps/nextjs.py +828 -828
  49. gaia/agents/code/orchestration/steps/python.py +307 -307
  50. gaia/agents/code/orchestration/template_catalog.py +469 -469
  51. gaia/agents/code/orchestration/workflows/__init__.py +14 -14
  52. gaia/agents/code/orchestration/workflows/base.py +80 -80
  53. gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
  54. gaia/agents/code/orchestration/workflows/python.py +94 -94
  55. gaia/agents/code/prompts/__init__.py +11 -11
  56. gaia/agents/code/prompts/base_prompt.py +77 -77
  57. gaia/agents/code/prompts/code_patterns.py +2036 -2036
  58. gaia/agents/code/prompts/nextjs_prompt.py +40 -40
  59. gaia/agents/code/prompts/python_prompt.py +109 -109
  60. gaia/agents/code/schema_inference.py +365 -365
  61. gaia/agents/code/system_prompt.py +41 -41
  62. gaia/agents/code/tools/__init__.py +42 -42
  63. gaia/agents/code/tools/cli_tools.py +1138 -1138
  64. gaia/agents/code/tools/code_formatting.py +319 -319
  65. gaia/agents/code/tools/code_tools.py +769 -769
  66. gaia/agents/code/tools/error_fixing.py +1347 -1347
  67. gaia/agents/code/tools/external_tools.py +180 -180
  68. gaia/agents/code/tools/file_io.py +845 -845
  69. gaia/agents/code/tools/prisma_tools.py +190 -190
  70. gaia/agents/code/tools/project_management.py +1016 -1016
  71. gaia/agents/code/tools/testing.py +321 -321
  72. gaia/agents/code/tools/typescript_tools.py +122 -122
  73. gaia/agents/code/tools/validation_parsing.py +461 -461
  74. gaia/agents/code/tools/validation_tools.py +806 -806
  75. gaia/agents/code/tools/web_dev_tools.py +1758 -1758
  76. gaia/agents/code/validators/__init__.py +16 -16
  77. gaia/agents/code/validators/antipattern_checker.py +241 -241
  78. gaia/agents/code/validators/ast_analyzer.py +197 -197
  79. gaia/agents/code/validators/requirements_validator.py +145 -145
  80. gaia/agents/code/validators/syntax_validator.py +171 -171
  81. gaia/agents/docker/__init__.py +7 -7
  82. gaia/agents/docker/agent.py +642 -642
  83. gaia/agents/emr/__init__.py +8 -8
  84. gaia/agents/emr/agent.py +1506 -1506
  85. gaia/agents/emr/cli.py +1322 -1322
  86. gaia/agents/emr/constants.py +475 -475
  87. gaia/agents/emr/dashboard/__init__.py +4 -4
  88. gaia/agents/emr/dashboard/server.py +1974 -1974
  89. gaia/agents/jira/__init__.py +11 -11
  90. gaia/agents/jira/agent.py +894 -894
  91. gaia/agents/jira/jql_templates.py +299 -299
  92. gaia/agents/routing/__init__.py +7 -7
  93. gaia/agents/routing/agent.py +567 -570
  94. gaia/agents/routing/system_prompt.py +75 -75
  95. gaia/agents/summarize/__init__.py +11 -0
  96. gaia/agents/summarize/agent.py +885 -0
  97. gaia/agents/summarize/prompts.py +129 -0
  98. gaia/api/__init__.py +23 -23
  99. gaia/api/agent_registry.py +238 -238
  100. gaia/api/app.py +305 -305
  101. gaia/api/openai_server.py +575 -575
  102. gaia/api/schemas.py +186 -186
  103. gaia/api/sse_handler.py +373 -373
  104. gaia/apps/__init__.py +4 -4
  105. gaia/apps/llm/__init__.py +6 -6
  106. gaia/apps/llm/app.py +173 -169
  107. gaia/apps/summarize/app.py +116 -633
  108. gaia/apps/summarize/html_viewer.py +133 -133
  109. gaia/apps/summarize/pdf_formatter.py +284 -284
  110. gaia/audio/__init__.py +2 -2
  111. gaia/audio/audio_client.py +439 -439
  112. gaia/audio/audio_recorder.py +269 -269
  113. gaia/audio/kokoro_tts.py +599 -599
  114. gaia/audio/whisper_asr.py +432 -432
  115. gaia/chat/__init__.py +16 -16
  116. gaia/chat/app.py +430 -430
  117. gaia/chat/prompts.py +522 -522
  118. gaia/chat/sdk.py +1228 -1225
  119. gaia/cli.py +5481 -5621
  120. gaia/database/__init__.py +10 -10
  121. gaia/database/agent.py +176 -176
  122. gaia/database/mixin.py +290 -290
  123. gaia/database/testing.py +64 -64
  124. gaia/eval/batch_experiment.py +2332 -2332
  125. gaia/eval/claude.py +542 -542
  126. gaia/eval/config.py +37 -37
  127. gaia/eval/email_generator.py +512 -512
  128. gaia/eval/eval.py +3179 -3179
  129. gaia/eval/groundtruth.py +1130 -1130
  130. gaia/eval/transcript_generator.py +582 -582
  131. gaia/eval/webapp/README.md +167 -167
  132. gaia/eval/webapp/package-lock.json +875 -875
  133. gaia/eval/webapp/package.json +20 -20
  134. gaia/eval/webapp/public/app.js +3402 -3402
  135. gaia/eval/webapp/public/index.html +87 -87
  136. gaia/eval/webapp/public/styles.css +3661 -3661
  137. gaia/eval/webapp/server.js +415 -415
  138. gaia/eval/webapp/test-setup.js +72 -72
  139. gaia/llm/__init__.py +9 -2
  140. gaia/llm/base_client.py +60 -0
  141. gaia/llm/exceptions.py +12 -0
  142. gaia/llm/factory.py +70 -0
  143. gaia/llm/lemonade_client.py +3236 -3221
  144. gaia/llm/lemonade_manager.py +294 -294
  145. gaia/llm/providers/__init__.py +9 -0
  146. gaia/llm/providers/claude.py +108 -0
  147. gaia/llm/providers/lemonade.py +120 -0
  148. gaia/llm/providers/openai_provider.py +79 -0
  149. gaia/llm/vlm_client.py +382 -382
  150. gaia/logger.py +189 -189
  151. gaia/mcp/agent_mcp_server.py +245 -245
  152. gaia/mcp/blender_mcp_client.py +138 -138
  153. gaia/mcp/blender_mcp_server.py +648 -648
  154. gaia/mcp/context7_cache.py +332 -332
  155. gaia/mcp/external_services.py +518 -518
  156. gaia/mcp/mcp_bridge.py +811 -550
  157. gaia/mcp/servers/__init__.py +6 -6
  158. gaia/mcp/servers/docker_mcp.py +83 -83
  159. gaia/perf_analysis.py +361 -0
  160. gaia/rag/__init__.py +10 -10
  161. gaia/rag/app.py +293 -293
  162. gaia/rag/demo.py +304 -304
  163. gaia/rag/pdf_utils.py +235 -235
  164. gaia/rag/sdk.py +2194 -2194
  165. gaia/security.py +163 -163
  166. gaia/talk/app.py +289 -289
  167. gaia/talk/sdk.py +538 -538
  168. gaia/testing/__init__.py +87 -87
  169. gaia/testing/assertions.py +330 -330
  170. gaia/testing/fixtures.py +333 -333
  171. gaia/testing/mocks.py +493 -493
  172. gaia/util.py +46 -46
  173. gaia/utils/__init__.py +33 -33
  174. gaia/utils/file_watcher.py +675 -675
  175. gaia/utils/parsing.py +223 -223
  176. gaia/version.py +100 -100
  177. amd_gaia-0.14.3.dist-info/RECORD +0 -168
  178. gaia/agents/code/app.py +0 -266
  179. gaia/llm/llm_client.py +0 -729
  180. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/WHEEL +0 -0
  181. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/top_level.txt +0 -0
@@ -1,713 +1,713 @@
1
- # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
- # SPDX-License-Identifier: MIT
3
- """Checklist Generator for LLM-Driven Code Generation.
4
-
5
- This module uses an LLM to generate a checklist of template invocations
6
- based on the user's request and the available template catalog.
7
-
8
- The generator:
9
- 1. Receives user request and project context
10
- 2. Sends prompt to LLM with template catalog
11
- 3. Parses LLM response into structured checklist
12
- 4. Validates checklist items against template definitions
13
-
14
- The resulting checklist is then executed deterministically by
15
- ChecklistExecutor.
16
- """
17
-
18
- import json
19
- import logging
20
- import re
21
- from dataclasses import dataclass, field
22
- from typing import Any, Dict, List, Optional, Protocol
23
-
24
- from .steps.base import UserContext
25
- from .template_catalog import get_catalog_prompt, validate_checklist_item
26
-
27
- logger = logging.getLogger(__name__)
28
-
29
-
30
- class ChatSDK(Protocol):
31
- """Protocol for chat SDK interface."""
32
-
33
- def send(self, message: str, timeout: int = 600, no_history: bool = False) -> Any:
34
- """Send a message and get response."""
35
- ...
36
-
37
-
38
- @dataclass
39
- class ChecklistItem:
40
- """Single item in the generated checklist.
41
-
42
- Represents a template invocation with its parameters and
43
- the LLM's reasoning for including it.
44
- """
45
-
46
- template: str
47
- params: Dict[str, Any]
48
- description: str
49
-
50
- def to_dict(self) -> Dict[str, Any]:
51
- """Convert to dictionary representation."""
52
- return {
53
- "template": self.template,
54
- "params": self.params,
55
- "description": self.description,
56
- }
57
-
58
-
59
- @dataclass
60
- class GeneratedChecklist:
61
- """Complete checklist generated by LLM.
62
-
63
- Contains the list of template invocations and the LLM's
64
- overall reasoning for the chosen approach.
65
- """
66
-
67
- items: List[ChecklistItem]
68
- reasoning: str
69
- raw_response: str = ""
70
- validation_errors: List[str] = field(default_factory=list)
71
-
72
- @property
73
- def is_valid(self) -> bool:
74
- """Check if checklist passed validation."""
75
- return len(self.validation_errors) == 0
76
-
77
- def to_dict(self) -> Dict[str, Any]:
78
- """Convert to dictionary representation."""
79
- return {
80
- "reasoning": self.reasoning,
81
- "checklist": [item.to_dict() for item in self.items],
82
- "is_valid": self.is_valid,
83
- "validation_errors": self.validation_errors,
84
- }
85
-
86
-
87
- @dataclass
88
- class ProjectState:
89
- """Current state of the project for context."""
90
-
91
- exists: bool = False
92
- has_package_json: bool = False
93
- has_prisma: bool = False
94
- has_next_config: bool = False
95
- existing_models: List[str] = field(default_factory=list)
96
- existing_routes: List[str] = field(default_factory=list)
97
- existing_pages: List[str] = field(default_factory=list)
98
-
99
- def to_prompt(self) -> str:
100
- """Generate prompt-friendly description of project state."""
101
- if not self.exists:
102
- return "Project does not exist yet - will be created fresh."
103
-
104
- lines = ["Current project state:"]
105
-
106
- if self.has_package_json:
107
- lines.append("- ✓ package.json exists (Node.js project)")
108
- if self.has_next_config:
109
- lines.append("- ✓ next.config.ts exists (Next.js configured)")
110
- if self.has_prisma:
111
- lines.append("- ✓ Prisma configured")
112
- if self.existing_models:
113
- lines.append(f" - Models: {', '.join(self.existing_models)}")
114
-
115
- if self.existing_routes:
116
- lines.append(f"- Existing API routes: {', '.join(self.existing_routes)}")
117
-
118
- if self.existing_pages:
119
- lines.append(f"- Existing pages: {', '.join(self.existing_pages)}")
120
-
121
- return "\n".join(lines)
122
-
123
-
124
- CHECKLIST_SYSTEM_PROMPT = """You are a code generation planner. Your task is to analyze the user's request and generate a checklist of template invocations that will create the requested application.
125
-
126
- {catalog_prompt}
127
-
128
- ## Instructions
129
-
130
- 1. Analyze the user's request carefully
131
- 2. Consider what the user ACTUALLY wants (semantic understanding)
132
- 3. Select templates that will fulfill the request
133
- 4. Add semantic enhancements based on the request type:
134
- - For "todo" apps: add checkboxes for completion status
135
- - For "blog" apps: add date fields for posts
136
- - For "e-commerce": add price, inventory fields
137
- 5. Ensure dependencies are satisfied (run setup before data, data before API, API before UI)
138
- 6. Generate a complete checklist that creates a working application
139
- 7. When follow-up fixes are requested, use `fix_code` to repair the specific files called out by validation/test logs. Extract the precise file paths and line numbers from the errors (see the Raw Validation Logs) and pass those line numbers inside the error description so the fixer knows exactly where to focus. Always reference the latest findings to decide which fixes to schedule before running validations again.
140
- 8. If the user explicitly requests an additional UI artifact (countdown display, stats badge, etc.), schedule a `generate_react_component` step with the appropriate `artifact-*` variant (e.g., `"artifact-timer"`) and a descriptive `component_name`. Keep the artifact's logic inside that client component—server components like `page.tsx` should only render the artifact and pass any required props.
141
- 9. **Route pairing requirement:** Whenever you schedule `{{"template": "generate_api_route", "params": {{"type": "collection", ...}}}}`, you MUST also include a matching `generate_api_route` item with `"type": "item"` for the same resource so detail pages can call `/api/<resource>/[id]`.
142
-
143
- This workflow repeats until all validations pass, so each checklist should either advance new functionality or explicitly repair the failures reported in the latest validation logs.
144
-
145
- ## IMPORTANT: Complete CRUD Applications
146
-
147
- For any app that manages resources (todos, posts, users, etc.), you MUST generate ALL of these UI components:
148
-
149
- 1. **Form component** (variant: "form") - Reusable form for create (generate this first so other pages can import it)
150
- 2. **Artifact components** (variant: "artifact-*") - Any additional UI artifacts requested by the user (e.g., countdown display, stats badge). Generate these before any page that consumes them.
151
- 3. **New page** (variant: "new") - Create new item at /resources/new
152
- 4. **Edit page** (variant: "detail") - Edit single item at /resources/[id] with pre-populated form
153
- 5. **List page** (variant: "list") - Main page showing all items at /resources
154
-
155
- Missing any of the required components (form, new, detail, list) will result in broken navigation! When artifacts are requested, they must also be generated or the UI will be incomplete.
156
-
157
- ## REQUIRED: Setup and Validation Commands
158
-
159
- **CRITICAL**: The following commands are REQUIRED for a valid plan:
160
-
161
- 1. **setup_app_styling** MUST be included after creating the application (after `create_next_app`). This configures app-wide styling with modern dark theme design system.
162
-
163
- 2. **setup_testing** MUST be included after `setup_app_styling`. This sets up the testing infrastructure.
164
-
165
- 3. **generate_style_tests** MUST be included after `setup_testing`. This generates CSS integrity tests that validate the design system.
166
-
167
- 4. **The final 2 commands MUST be in this exact order:**
168
- - Second-to-last: `run_typescript_check` (validates TypeScript compilation)
169
- - Last: `validate_styles` (validates CSS files and design system)
170
-
171
- These setup and validation commands are mandatory - a plan without them is INVALID.
172
-
173
- ## Output Format
174
-
175
- Respond with ONLY a JSON object (no markdown code blocks):
176
- {{
177
- "reasoning": "Brief explanation of your approach and any semantic enhancements",
178
- "checklist": [
179
- {{"template": "template_name", "params": {{}}, "description": "Why this step is needed"}}
180
- ]
181
- }}
182
-
183
- Important:
184
- - Use exact template names from the catalog
185
- - Provide all required parameters
186
- - Order items by dependency (setup first, then data, then API, then UI)
187
- - REQUIRED ordering for `generate_react_component`: emit all non-`list` variants (form, new, detail, actions, artifact-*) before the `variant: "list"` call so the list can import previously generated components
188
- - Add semantic enhancements that make the app intuitive (e.g., checkboxes for todos)
189
- - For CRUD apps, ALWAYS include all 4 UI variants: list, form, new, detail
190
- - REQUIRED: Include `setup_app_styling` after `create_next_app`
191
- - REQUIRED: Include `setup_testing` after `setup_app_styling`
192
- - REQUIRED: Include `generate_style_tests` after `setup_testing`
193
- - REQUIRED: End with `run_typescript_check`, then `validate_styles` as the last 2 commands
194
- - When converting a raw validation error into `fix_code`, copy the exact snippet (file, line, column, and message). For example:
195
-
196
- Raw Validation Logs (example):
197
- ```
198
- {{"template": "run_typescript_check", "output": {{"errors": "path/to/File.tsx(10,5): error TS1234: <error text>\\n"}}}}
199
- ```
200
-
201
- Corresponding checklist item:
202
- ```
203
- {{
204
- "template": "fix_code",
205
- "params": {{
206
- "file_path": "path/to/File.tsx",
207
- "error_description": "path/to/File.tsx(10,5): error TS1234: <error text>"
208
- }},
209
- "description": "Fix the TypeScript compiler error reported for File.tsx."
210
- }}
211
- ```
212
- Always keep the error text verbatim so the fixer knows exactly where to edit."""
213
-
214
-
215
- class ChecklistGenerator:
216
- """Generate execution checklist using LLM.
217
-
218
- The generator sends the user request, project state, and template
219
- catalog to an LLM, which returns a structured checklist of template
220
- invocations.
221
- """
222
-
223
- def __init__(self, chat_sdk: ChatSDK):
224
- """Initialize the checklist generator.
225
-
226
- Args:
227
- chat_sdk: Chat SDK instance for LLM communication
228
- """
229
- self.chat = chat_sdk
230
-
231
- def generate_initial_checklist(
232
- self,
233
- context: UserContext,
234
- project_state: Optional[ProjectState] = None,
235
- ) -> GeneratedChecklist:
236
- """Generate the initial project-scaffolding checklist."""
237
- if project_state is None:
238
- project_state = ProjectState()
239
-
240
- system_prompt = CHECKLIST_SYSTEM_PROMPT.format(
241
- catalog_prompt=get_catalog_prompt()
242
- )
243
- user_prompt = self._build_initial_prompt(context, project_state)
244
- full_prompt = f"{system_prompt}\n\n## User Request\n\n{user_prompt}"
245
- return self._generate_from_prompt(full_prompt)
246
-
247
- def generate_debug_checklist(
248
- self,
249
- context: UserContext,
250
- project_state: Optional[ProjectState],
251
- prior_errors: Optional[List[str]],
252
- validation_logs: Optional[List[Any]],
253
- ) -> GeneratedChecklist:
254
- """Generate a remediation checklist to fix outstanding errors."""
255
- if project_state is None:
256
- project_state = ProjectState()
257
-
258
- debug_prompt = self._build_debug_prompt(
259
- context=context,
260
- project_state=project_state,
261
- prior_errors=prior_errors or [],
262
- validation_logs=validation_logs or [],
263
- )
264
- system_prompt = CHECKLIST_SYSTEM_PROMPT.format(
265
- catalog_prompt=get_catalog_prompt()
266
- )
267
- full_prompt = f"{system_prompt}\n\n## Remediation Context\n\n{debug_prompt}"
268
- return self._generate_from_prompt(full_prompt)
269
-
270
- def _generate_from_prompt(self, full_prompt: str) -> GeneratedChecklist:
271
- """Common checklist generation logic with retries."""
272
- logger.debug("Generating checklist with LLM...")
273
- logger.debug(f"Checklist prompt: {full_prompt}")
274
-
275
- max_attempts = 3
276
- last_failure_reason = "unknown error"
277
-
278
- for attempt in range(1, max_attempts + 1):
279
- try:
280
- response = self.chat.send(full_prompt, timeout=1200)
281
-
282
- response_text = self._extract_response_text(response)
283
-
284
- logger.debug(f"LLM response (attempt {attempt}): {response_text}")
285
-
286
- checklist = self._parse_checklist(response_text)
287
- except Exception as exc: # pylint: disable=broad-exception-caught
288
- last_failure_reason = str(exc)
289
- logger.warning(
290
- "Checklist generation attempt %d/%d failed: %s",
291
- attempt,
292
- max_attempts,
293
- exc,
294
- )
295
- continue
296
-
297
- if not checklist.items:
298
- last_failure_reason = "LLM returned an empty checklist"
299
- logger.warning(
300
- "Checklist generation attempt %d/%d returned no items, retrying...",
301
- attempt,
302
- max_attempts,
303
- )
304
- continue
305
-
306
- self._validate_checklist(checklist)
307
- if checklist.validation_errors:
308
- last_failure_reason = "; ".join(checklist.validation_errors)
309
- logger.warning(
310
- "Checklist generation attempt %d/%d failed validation: %s",
311
- attempt,
312
- max_attempts,
313
- checklist.validation_errors,
314
- )
315
- continue
316
-
317
- logger.debug(
318
- "Generated checklist with %d items on attempt %d",
319
- len(checklist.items),
320
- attempt,
321
- )
322
- return checklist
323
-
324
- raise RuntimeError(
325
- f"Failed to generate a valid checklist after {max_attempts} attempts: "
326
- f"{last_failure_reason}"
327
- )
328
-
329
- def _build_initial_prompt(
330
- self,
331
- context: UserContext,
332
- project_state: ProjectState,
333
- ) -> str:
334
- """Build the user prompt with all context.
335
-
336
- Args:
337
- context: User context
338
- project_state: Current project state
339
-
340
- Returns:
341
- Formatted user prompt string
342
- """
343
- lines = [f"**User Request**: {context.user_request}"]
344
-
345
- if context.entity_name:
346
- lines.append(f"\n**Inferred Entity**: {context.entity_name}")
347
-
348
- if context.schema_fields:
349
- lines.append(f"\n**Inferred Fields**: {json.dumps(context.schema_fields)}")
350
-
351
- lines.append(f"\n**Project Directory**: {context.project_dir}")
352
- lines.append(f"\n**Language**: {context.language}")
353
- lines.append(f"\n**Project Type**: {context.project_type}")
354
-
355
- lines.append(f"\n{project_state.to_prompt()}")
356
-
357
- if context.fix_feedback:
358
- lines.append("\n**Outstanding Fix Requests**:")
359
- for note in context.fix_feedback[-5:]:
360
- lines.append(f"- {note}")
361
-
362
- if context.validation_reports:
363
- lines.append("\n**Recent Validation/Test Findings**:")
364
- for log in context.validation_reports[-5:]:
365
- status = "PASS" if log.get("success", True) else "FAIL"
366
- template = log.get("template", "validation_step")
367
- description = log.get("description", "")
368
- lines.append(f"- [{status}] {template}: {description}")
369
- if log.get("error"):
370
- lines.append(f" Error: {log['error']}")
371
-
372
- output = log.get("output", {})
373
- snippet = ""
374
- if isinstance(output, dict):
375
- for key in ("stdout", "stderr", "message", "details"):
376
- if output.get(key):
377
- snippet = str(output[key])[:200]
378
- break
379
- if not snippet and output:
380
- snippet = json.dumps(output)[:200]
381
- elif output:
382
- snippet = str(output)[:200]
383
- if snippet:
384
- lines.append(f" Output: {snippet}")
385
-
386
- lines.append("\nGenerate a checklist to fulfill this request.")
387
-
388
- return "\n".join(lines)
389
-
390
- def _build_debug_prompt(
391
- self,
392
- context: UserContext,
393
- project_state: ProjectState,
394
- prior_errors: List[str],
395
- validation_logs: List[Any],
396
- ) -> str:
397
- """Build prompt for remediation/debug checklists."""
398
- lines = [
399
- "You are a remediation planner for the GAIA web development agent. "
400
- "The project has already been scaffolded; focus exclusively on fixing outstanding issues."
401
- ]
402
- lines.append(f"\n**User Request**: {context.user_request}")
403
- lines.append(f"\n**Project Directory**: {context.project_dir}")
404
-
405
- if context.entity_name:
406
- lines.append(f"\n**Entity**: {context.entity_name}")
407
- if context.schema_fields:
408
- lines.append(f"\n**Schema Fields**: {json.dumps(context.schema_fields)}")
409
-
410
- lines.append(f"\n{project_state.to_prompt()}")
411
-
412
- if prior_errors:
413
- lines.append("\n**Execution Errors From Last Attempt:**")
414
- for err in prior_errors:
415
- lines.append(f"- {err}")
416
-
417
- if validation_logs:
418
- lines.append("\n**Recent Validation/Test Results:**")
419
- raw_entries = []
420
- for log in validation_logs[-10:]:
421
- entry = log.to_dict() if hasattr(log, "to_dict") else log
422
- template = entry.get("template", "unknown_step")
423
- success = entry.get("success", True)
424
- desc = entry.get("description", "")
425
- status = "PASS" if success else "FAIL"
426
- lines.append(f"- [{status}] {template}: {desc}")
427
- if entry.get("error"):
428
- lines.append(f" Error: {entry['error']}")
429
- output = entry.get("output") or {}
430
- for key in ("stdout", "stderr", "details", "message"):
431
- if output.get(key):
432
- snippet = str(output[key])[:200]
433
- lines.append(f" Output: {snippet}")
434
- break
435
- raw_entries.append(entry)
436
-
437
- if raw_entries:
438
- lines.append(
439
- "\n**Raw Validation Logs (exact text for follow-up fixes):**"
440
- )
441
- for entry in raw_entries:
442
- lines.append(json.dumps(entry, ensure_ascii=False))
443
-
444
- if context.fix_feedback:
445
- lines.append("\n**Outstanding Fix Instructions:**")
446
- for note in context.fix_feedback[-10:]:
447
- lines.append(f"- {note}")
448
-
449
- lines.append(
450
- "\nYour job: draft a concise checklist that repairs the errors above, "
451
- "regenerates any broken code, and re-runs critical validations."
452
- )
453
- lines.append(
454
- "\n**Critical Requirements for Debug Checklists:**\n"
455
- "1. Use `fix_code` to repair the specific files referenced in the failures above.\n"
456
- "2. Re-run any validations or tests that previously failed once fixes are applied.\n"
457
- "3. Always include `run_typescript_check` as the second-to-last command to capture current compiler errors.\n"
458
- "4. Always include `validate_styles` as the final command to capture CSS/design regressions."
459
- )
460
-
461
- return "\n".join(lines)
462
-
463
- def _extract_response_text(self, response: Any) -> str:
464
- """Extract text from LLM response.
465
-
466
- Handles different response formats from various SDKs.
467
-
468
- Args:
469
- response: Response from chat SDK
470
-
471
- Returns:
472
- Response text string
473
- """
474
- if isinstance(response, str):
475
- return response
476
-
477
- # Handle response objects with text attribute
478
- if hasattr(response, "text"):
479
- return response.text
480
-
481
- # Handle response objects with content attribute
482
- if hasattr(response, "content"):
483
- return response.content
484
-
485
- # Handle dict-like responses
486
- if isinstance(response, dict):
487
- return response.get("text", response.get("content", str(response)))
488
-
489
- return str(response)
490
-
491
- def _parse_checklist(self, response_text: str) -> GeneratedChecklist:
492
- """Parse LLM response into GeneratedChecklist.
493
-
494
- Args:
495
- response_text: Raw LLM response text
496
-
497
- Returns:
498
- Parsed GeneratedChecklist
499
- """
500
- try:
501
- # Try to extract JSON from the response
502
- json_str = self._extract_json(response_text)
503
-
504
- data = json.loads(json_str)
505
-
506
- # Parse items
507
- items = []
508
- for item_data in data.get("checklist", []):
509
- item = ChecklistItem(
510
- template=item_data.get("template", ""),
511
- params=item_data.get("params", {}),
512
- description=item_data.get("description", ""),
513
- )
514
- items.append(item)
515
-
516
- return GeneratedChecklist(
517
- items=items,
518
- reasoning=data.get("reasoning", ""),
519
- raw_response=response_text,
520
- )
521
-
522
- except json.JSONDecodeError as e:
523
- logger.error(f"Failed to parse checklist JSON: {e}")
524
- return GeneratedChecklist(
525
- items=[],
526
- reasoning="",
527
- raw_response=response_text,
528
- validation_errors=[f"Failed to parse JSON: {str(e)}"],
529
- )
530
-
531
- def _extract_json(self, text: str) -> str:
532
- """Extract JSON from text that might contain markdown or other content.
533
-
534
- Args:
535
- text: Text that may contain JSON
536
-
537
- Returns:
538
- Extracted JSON string
539
- """
540
- # Try to find JSON in markdown code block
541
- code_block_match = re.search(r"```(?:json)?\s*\n?(.*?)\n?```", text, re.DOTALL)
542
- if code_block_match:
543
- return code_block_match.group(1).strip()
544
-
545
- # Try to find raw JSON object
546
- json_match = re.search(r"\{.*\}", text, re.DOTALL)
547
- if json_match:
548
- return json_match.group(0)
549
-
550
- # Return as-is and let JSON parser handle it
551
- return text.strip()
552
-
553
- def _validate_checklist(self, checklist: GeneratedChecklist) -> None:
554
- """Validate checklist items against template definitions.
555
-
556
- Adds validation errors to the checklist if any are found.
557
-
558
- Args:
559
- checklist: Checklist to validate (modified in place)
560
- """
561
- for item in checklist.items:
562
- errors = validate_checklist_item(item.template, item.params)
563
- checklist.validation_errors.extend(errors)
564
-
565
- # Check for duplicate templates (some are ok, like multiple API routes)
566
- seen_templates = {}
567
- for item in checklist.items:
568
- key = f"{item.template}:{json.dumps(item.params, sort_keys=True)}"
569
- if key in seen_templates:
570
- checklist.validation_errors.append(
571
- f"Duplicate checklist item: {item.template} with same params"
572
- )
573
- seen_templates[key] = True
574
-
575
- # Validate required setup: setup_app_styling must come after create_next_app
576
- create_app_index = None
577
- setup_styling_index = None
578
- setup_testing_index = None
579
- for i, item in enumerate(checklist.items):
580
- if item.template == "create_next_app":
581
- create_app_index = i
582
- if item.template == "setup_app_styling":
583
- setup_styling_index = i
584
- if item.template == "setup_testing":
585
- setup_testing_index = i
586
-
587
- if create_app_index is not None:
588
- if setup_styling_index is None:
589
- checklist.validation_errors.append(
590
- "REQUIRED: 'setup_app_styling' must be included after 'create_next_app'"
591
- )
592
- elif setup_styling_index <= create_app_index:
593
- checklist.validation_errors.append(
594
- "REQUIRED: 'setup_app_styling' must come after 'create_next_app' in the checklist"
595
- )
596
-
597
- # Validate required testing setup: setup_testing must come after setup_app_styling
598
- if setup_styling_index is not None:
599
- if setup_testing_index is None:
600
- checklist.validation_errors.append(
601
- "REQUIRED: 'setup_testing' must be included after 'setup_app_styling'"
602
- )
603
- elif setup_testing_index <= setup_styling_index:
604
- checklist.validation_errors.append(
605
- "REQUIRED: 'setup_testing' must come after 'setup_app_styling' in the checklist"
606
- )
607
-
608
- # Validate required final validation commands: run_typescript_check, validate_styles
609
- if len(checklist.items) < 2:
610
- checklist.validation_errors.append(
611
- "REQUIRED: Checklist must end with 'run_typescript_check', "
612
- "'validate_styles' as the last two commands"
613
- )
614
- else:
615
- last_item = checklist.items[-1]
616
- second_last_item = checklist.items[-2]
617
-
618
- if last_item.template != "validate_styles":
619
- checklist.validation_errors.append(
620
- "REQUIRED: The last command must be 'validate_styles'"
621
- )
622
- if second_last_item.template != "run_typescript_check":
623
- checklist.validation_errors.append(
624
- "REQUIRED: The second-to-last command must be 'run_typescript_check'"
625
- )
626
-
627
- # Validate generate_style_tests is included (after setup_testing)
628
- generate_style_tests_index = None
629
- for i, item in enumerate(checklist.items):
630
- if item.template == "generate_style_tests":
631
- generate_style_tests_index = i
632
-
633
- if setup_testing_index is not None and generate_style_tests_index is None:
634
- checklist.validation_errors.append(
635
- "REQUIRED: 'generate_style_tests' must be included after 'setup_testing'"
636
- )
637
- elif (
638
- generate_style_tests_index is not None
639
- and setup_testing_index is not None
640
- and generate_style_tests_index <= setup_testing_index
641
- ):
642
- checklist.validation_errors.append(
643
- "REQUIRED: 'generate_style_tests' must come after 'setup_testing'"
644
- )
645
-
646
- if checklist.validation_errors:
647
- logger.warning(
648
- f"Checklist validation errors: {checklist.validation_errors}"
649
- )
650
-
651
-
652
- def create_checklist_from_workflow(
653
- workflow_phases: List[Any],
654
- context: UserContext,
655
- ) -> GeneratedChecklist:
656
- """Create a checklist from existing workflow phases (for comparison/testing).
657
-
658
- This converts the old step-based workflow into the new checklist format,
659
- useful for testing and migration.
660
-
661
- Args:
662
- workflow_phases: List of WorkflowPhase objects from factory
663
- context: User context
664
-
665
- Returns:
666
- GeneratedChecklist representing the workflow
667
- """
668
- items = []
669
-
670
- for phase in workflow_phases:
671
- for step in phase.steps:
672
- # Map step names to template names
673
- template_map = {
674
- "create_next_app": "create_next_app",
675
- "setup_styling": "setup_app_styling",
676
- "install_deps": "setup_prisma",
677
- "setup_testing": "setup_testing",
678
- "prisma_init": "setup_prisma",
679
- "setup_prisma": "setup_prisma",
680
- "manage_data_model": "generate_prisma_model",
681
- "manage_api_endpoint": "generate_api_route",
682
- "manage_api_endpoint_dynamic": "generate_api_route",
683
- "manage_react_component": "generate_react_component",
684
- "update_landing_page": "update_landing_page",
685
- "validate_typescript": "run_typescript_check",
686
- "validate_crud_structure": "run_typescript_check",
687
- "test_crud_api": "run_typescript_check",
688
- }
689
-
690
- template_name = template_map.get(step.name, step.name)
691
-
692
- # Extract params from step
693
- params = {}
694
- if hasattr(step, "get_tool_invocation"):
695
- invocation = step.get_tool_invocation(context)
696
- if invocation:
697
- _, step_params = invocation
698
- params = {
699
- k: v for k, v in step_params.items() if k != "project_dir"
700
- }
701
-
702
- items.append(
703
- ChecklistItem(
704
- template=template_name,
705
- params=params,
706
- description=step.description,
707
- )
708
- )
709
-
710
- return GeneratedChecklist(
711
- items=items,
712
- reasoning="Converted from existing workflow",
713
- )
1
+ # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+ """Checklist Generator for LLM-Driven Code Generation.
4
+
5
+ This module uses an LLM to generate a checklist of template invocations
6
+ based on the user's request and the available template catalog.
7
+
8
+ The generator:
9
+ 1. Receives user request and project context
10
+ 2. Sends prompt to LLM with template catalog
11
+ 3. Parses LLM response into structured checklist
12
+ 4. Validates checklist items against template definitions
13
+
14
+ The resulting checklist is then executed deterministically by
15
+ ChecklistExecutor.
16
+ """
17
+
18
+ import json
19
+ import logging
20
+ import re
21
+ from dataclasses import dataclass, field
22
+ from typing import Any, Dict, List, Optional, Protocol
23
+
24
+ from .steps.base import UserContext
25
+ from .template_catalog import get_catalog_prompt, validate_checklist_item
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ class ChatSDK(Protocol):
31
+ """Protocol for chat SDK interface."""
32
+
33
+ def send(self, message: str, timeout: int = 600, no_history: bool = False) -> Any:
34
+ """Send a message and get response."""
35
+ ...
36
+
37
+
38
+ @dataclass
39
+ class ChecklistItem:
40
+ """Single item in the generated checklist.
41
+
42
+ Represents a template invocation with its parameters and
43
+ the LLM's reasoning for including it.
44
+ """
45
+
46
+ template: str
47
+ params: Dict[str, Any]
48
+ description: str
49
+
50
+ def to_dict(self) -> Dict[str, Any]:
51
+ """Convert to dictionary representation."""
52
+ return {
53
+ "template": self.template,
54
+ "params": self.params,
55
+ "description": self.description,
56
+ }
57
+
58
+
59
+ @dataclass
60
+ class GeneratedChecklist:
61
+ """Complete checklist generated by LLM.
62
+
63
+ Contains the list of template invocations and the LLM's
64
+ overall reasoning for the chosen approach.
65
+ """
66
+
67
+ items: List[ChecklistItem]
68
+ reasoning: str
69
+ raw_response: str = ""
70
+ validation_errors: List[str] = field(default_factory=list)
71
+
72
+ @property
73
+ def is_valid(self) -> bool:
74
+ """Check if checklist passed validation."""
75
+ return len(self.validation_errors) == 0
76
+
77
+ def to_dict(self) -> Dict[str, Any]:
78
+ """Convert to dictionary representation."""
79
+ return {
80
+ "reasoning": self.reasoning,
81
+ "checklist": [item.to_dict() for item in self.items],
82
+ "is_valid": self.is_valid,
83
+ "validation_errors": self.validation_errors,
84
+ }
85
+
86
+
87
+ @dataclass
88
+ class ProjectState:
89
+ """Current state of the project for context."""
90
+
91
+ exists: bool = False
92
+ has_package_json: bool = False
93
+ has_prisma: bool = False
94
+ has_next_config: bool = False
95
+ existing_models: List[str] = field(default_factory=list)
96
+ existing_routes: List[str] = field(default_factory=list)
97
+ existing_pages: List[str] = field(default_factory=list)
98
+
99
+ def to_prompt(self) -> str:
100
+ """Generate prompt-friendly description of project state."""
101
+ if not self.exists:
102
+ return "Project does not exist yet - will be created fresh."
103
+
104
+ lines = ["Current project state:"]
105
+
106
+ if self.has_package_json:
107
+ lines.append("- ✓ package.json exists (Node.js project)")
108
+ if self.has_next_config:
109
+ lines.append("- ✓ next.config.ts exists (Next.js configured)")
110
+ if self.has_prisma:
111
+ lines.append("- ✓ Prisma configured")
112
+ if self.existing_models:
113
+ lines.append(f" - Models: {', '.join(self.existing_models)}")
114
+
115
+ if self.existing_routes:
116
+ lines.append(f"- Existing API routes: {', '.join(self.existing_routes)}")
117
+
118
+ if self.existing_pages:
119
+ lines.append(f"- Existing pages: {', '.join(self.existing_pages)}")
120
+
121
+ return "\n".join(lines)
122
+
123
+
124
+ CHECKLIST_SYSTEM_PROMPT = """You are a code generation planner. Your task is to analyze the user's request and generate a checklist of template invocations that will create the requested application.
125
+
126
+ {catalog_prompt}
127
+
128
+ ## Instructions
129
+
130
+ 1. Analyze the user's request carefully
131
+ 2. Consider what the user ACTUALLY wants (semantic understanding)
132
+ 3. Select templates that will fulfill the request
133
+ 4. Add semantic enhancements based on the request type:
134
+ - For "todo" apps: add checkboxes for completion status
135
+ - For "blog" apps: add date fields for posts
136
+ - For "e-commerce": add price, inventory fields
137
+ 5. Ensure dependencies are satisfied (run setup before data, data before API, API before UI)
138
+ 6. Generate a complete checklist that creates a working application
139
+ 7. When follow-up fixes are requested, use `fix_code` to repair the specific files called out by validation/test logs. Extract the precise file paths and line numbers from the errors (see the Raw Validation Logs) and pass those line numbers inside the error description so the fixer knows exactly where to focus. Always reference the latest findings to decide which fixes to schedule before running validations again.
140
+ 8. If the user explicitly requests an additional UI artifact (countdown display, stats badge, etc.), schedule a `generate_react_component` step with the appropriate `artifact-*` variant (e.g., `"artifact-timer"`) and a descriptive `component_name`. Keep the artifact's logic inside that client component—server components like `page.tsx` should only render the artifact and pass any required props.
141
+ 9. **Route pairing requirement:** Whenever you schedule `{{"template": "generate_api_route", "params": {{"type": "collection", ...}}}}`, you MUST also include a matching `generate_api_route` item with `"type": "item"` for the same resource so detail pages can call `/api/<resource>/[id]`.
142
+
143
+ This workflow repeats until all validations pass, so each checklist should either advance new functionality or explicitly repair the failures reported in the latest validation logs.
144
+
145
+ ## IMPORTANT: Complete CRUD Applications
146
+
147
+ For any app that manages resources (todos, posts, users, etc.), you MUST generate ALL of these UI components:
148
+
149
+ 1. **Form component** (variant: "form") - Reusable form for create (generate this first so other pages can import it)
150
+ 2. **Artifact components** (variant: "artifact-*") - Any additional UI artifacts requested by the user (e.g., countdown display, stats badge). Generate these before any page that consumes them.
151
+ 3. **New page** (variant: "new") - Create new item at /resources/new
152
+ 4. **Edit page** (variant: "detail") - Edit single item at /resources/[id] with pre-populated form
153
+ 5. **List page** (variant: "list") - Main page showing all items at /resources
154
+
155
+ Missing any of the required components (form, new, detail, list) will result in broken navigation! When artifacts are requested, they must also be generated or the UI will be incomplete.
156
+
157
+ ## REQUIRED: Setup and Validation Commands
158
+
159
+ **CRITICAL**: The following commands are REQUIRED for a valid plan:
160
+
161
+ 1. **setup_app_styling** MUST be included after creating the application (after `create_next_app`). This configures app-wide styling with modern dark theme design system.
162
+
163
+ 2. **setup_testing** MUST be included after `setup_app_styling`. This sets up the testing infrastructure.
164
+
165
+ 3. **generate_style_tests** MUST be included after `setup_testing`. This generates CSS integrity tests that validate the design system.
166
+
167
+ 4. **The final 2 commands MUST be in this exact order:**
168
+ - Second-to-last: `run_typescript_check` (validates TypeScript compilation)
169
+ - Last: `validate_styles` (validates CSS files and design system)
170
+
171
+ These setup and validation commands are mandatory - a plan without them is INVALID.
172
+
173
+ ## Output Format
174
+
175
+ Respond with ONLY a JSON object (no markdown code blocks):
176
+ {{
177
+ "reasoning": "Brief explanation of your approach and any semantic enhancements",
178
+ "checklist": [
179
+ {{"template": "template_name", "params": {{}}, "description": "Why this step is needed"}}
180
+ ]
181
+ }}
182
+
183
+ Important:
184
+ - Use exact template names from the catalog
185
+ - Provide all required parameters
186
+ - Order items by dependency (setup first, then data, then API, then UI)
187
+ - REQUIRED ordering for `generate_react_component`: emit all non-`list` variants (form, new, detail, actions, artifact-*) before the `variant: "list"` call so the list can import previously generated components
188
+ - Add semantic enhancements that make the app intuitive (e.g., checkboxes for todos)
189
+ - For CRUD apps, ALWAYS include all 4 UI variants: list, form, new, detail
190
+ - REQUIRED: Include `setup_app_styling` after `create_next_app`
191
+ - REQUIRED: Include `setup_testing` after `setup_app_styling`
192
+ - REQUIRED: Include `generate_style_tests` after `setup_testing`
193
+ - REQUIRED: End with `run_typescript_check`, then `validate_styles` as the last 2 commands
194
+ - When converting a raw validation error into `fix_code`, copy the exact snippet (file, line, column, and message). For example:
195
+
196
+ Raw Validation Logs (example):
197
+ ```
198
+ {{"template": "run_typescript_check", "output": {{"errors": "path/to/File.tsx(10,5): error TS1234: <error text>\\n"}}}}
199
+ ```
200
+
201
+ Corresponding checklist item:
202
+ ```
203
+ {{
204
+ "template": "fix_code",
205
+ "params": {{
206
+ "file_path": "path/to/File.tsx",
207
+ "error_description": "path/to/File.tsx(10,5): error TS1234: <error text>"
208
+ }},
209
+ "description": "Fix the TypeScript compiler error reported for File.tsx."
210
+ }}
211
+ ```
212
+ Always keep the error text verbatim so the fixer knows exactly where to edit."""
213
+
214
+
215
+ class ChecklistGenerator:
216
+ """Generate execution checklist using LLM.
217
+
218
+ The generator sends the user request, project state, and template
219
+ catalog to an LLM, which returns a structured checklist of template
220
+ invocations.
221
+ """
222
+
223
+ def __init__(self, chat_sdk: ChatSDK):
224
+ """Initialize the checklist generator.
225
+
226
+ Args:
227
+ chat_sdk: Chat SDK instance for LLM communication
228
+ """
229
+ self.chat = chat_sdk
230
+
231
+ def generate_initial_checklist(
232
+ self,
233
+ context: UserContext,
234
+ project_state: Optional[ProjectState] = None,
235
+ ) -> GeneratedChecklist:
236
+ """Generate the initial project-scaffolding checklist."""
237
+ if project_state is None:
238
+ project_state = ProjectState()
239
+
240
+ system_prompt = CHECKLIST_SYSTEM_PROMPT.format(
241
+ catalog_prompt=get_catalog_prompt()
242
+ )
243
+ user_prompt = self._build_initial_prompt(context, project_state)
244
+ full_prompt = f"{system_prompt}\n\n## User Request\n\n{user_prompt}"
245
+ return self._generate_from_prompt(full_prompt)
246
+
247
+ def generate_debug_checklist(
248
+ self,
249
+ context: UserContext,
250
+ project_state: Optional[ProjectState],
251
+ prior_errors: Optional[List[str]],
252
+ validation_logs: Optional[List[Any]],
253
+ ) -> GeneratedChecklist:
254
+ """Generate a remediation checklist to fix outstanding errors."""
255
+ if project_state is None:
256
+ project_state = ProjectState()
257
+
258
+ debug_prompt = self._build_debug_prompt(
259
+ context=context,
260
+ project_state=project_state,
261
+ prior_errors=prior_errors or [],
262
+ validation_logs=validation_logs or [],
263
+ )
264
+ system_prompt = CHECKLIST_SYSTEM_PROMPT.format(
265
+ catalog_prompt=get_catalog_prompt()
266
+ )
267
+ full_prompt = f"{system_prompt}\n\n## Remediation Context\n\n{debug_prompt}"
268
+ return self._generate_from_prompt(full_prompt)
269
+
270
+ def _generate_from_prompt(self, full_prompt: str) -> GeneratedChecklist:
271
+ """Common checklist generation logic with retries."""
272
+ logger.debug("Generating checklist with LLM...")
273
+ logger.debug(f"Checklist prompt: {full_prompt}")
274
+
275
+ max_attempts = 3
276
+ last_failure_reason = "unknown error"
277
+
278
+ for attempt in range(1, max_attempts + 1):
279
+ try:
280
+ response = self.chat.send(full_prompt, timeout=1200)
281
+
282
+ response_text = self._extract_response_text(response)
283
+
284
+ logger.debug(f"LLM response (attempt {attempt}): {response_text}")
285
+
286
+ checklist = self._parse_checklist(response_text)
287
+ except Exception as exc: # pylint: disable=broad-exception-caught
288
+ last_failure_reason = str(exc)
289
+ logger.warning(
290
+ "Checklist generation attempt %d/%d failed: %s",
291
+ attempt,
292
+ max_attempts,
293
+ exc,
294
+ )
295
+ continue
296
+
297
+ if not checklist.items:
298
+ last_failure_reason = "LLM returned an empty checklist"
299
+ logger.warning(
300
+ "Checklist generation attempt %d/%d returned no items, retrying...",
301
+ attempt,
302
+ max_attempts,
303
+ )
304
+ continue
305
+
306
+ self._validate_checklist(checklist)
307
+ if checklist.validation_errors:
308
+ last_failure_reason = "; ".join(checklist.validation_errors)
309
+ logger.warning(
310
+ "Checklist generation attempt %d/%d failed validation: %s",
311
+ attempt,
312
+ max_attempts,
313
+ checklist.validation_errors,
314
+ )
315
+ continue
316
+
317
+ logger.debug(
318
+ "Generated checklist with %d items on attempt %d",
319
+ len(checklist.items),
320
+ attempt,
321
+ )
322
+ return checklist
323
+
324
+ raise RuntimeError(
325
+ f"Failed to generate a valid checklist after {max_attempts} attempts: "
326
+ f"{last_failure_reason}"
327
+ )
328
+
329
+ def _build_initial_prompt(
330
+ self,
331
+ context: UserContext,
332
+ project_state: ProjectState,
333
+ ) -> str:
334
+ """Build the user prompt with all context.
335
+
336
+ Args:
337
+ context: User context
338
+ project_state: Current project state
339
+
340
+ Returns:
341
+ Formatted user prompt string
342
+ """
343
+ lines = [f"**User Request**: {context.user_request}"]
344
+
345
+ if context.entity_name:
346
+ lines.append(f"\n**Inferred Entity**: {context.entity_name}")
347
+
348
+ if context.schema_fields:
349
+ lines.append(f"\n**Inferred Fields**: {json.dumps(context.schema_fields)}")
350
+
351
+ lines.append(f"\n**Project Directory**: {context.project_dir}")
352
+ lines.append(f"\n**Language**: {context.language}")
353
+ lines.append(f"\n**Project Type**: {context.project_type}")
354
+
355
+ lines.append(f"\n{project_state.to_prompt()}")
356
+
357
+ if context.fix_feedback:
358
+ lines.append("\n**Outstanding Fix Requests**:")
359
+ for note in context.fix_feedback[-5:]:
360
+ lines.append(f"- {note}")
361
+
362
+ if context.validation_reports:
363
+ lines.append("\n**Recent Validation/Test Findings**:")
364
+ for log in context.validation_reports[-5:]:
365
+ status = "PASS" if log.get("success", True) else "FAIL"
366
+ template = log.get("template", "validation_step")
367
+ description = log.get("description", "")
368
+ lines.append(f"- [{status}] {template}: {description}")
369
+ if log.get("error"):
370
+ lines.append(f" Error: {log['error']}")
371
+
372
+ output = log.get("output", {})
373
+ snippet = ""
374
+ if isinstance(output, dict):
375
+ for key in ("stdout", "stderr", "message", "details"):
376
+ if output.get(key):
377
+ snippet = str(output[key])[:200]
378
+ break
379
+ if not snippet and output:
380
+ snippet = json.dumps(output)[:200]
381
+ elif output:
382
+ snippet = str(output)[:200]
383
+ if snippet:
384
+ lines.append(f" Output: {snippet}")
385
+
386
+ lines.append("\nGenerate a checklist to fulfill this request.")
387
+
388
+ return "\n".join(lines)
389
+
390
+ def _build_debug_prompt(
391
+ self,
392
+ context: UserContext,
393
+ project_state: ProjectState,
394
+ prior_errors: List[str],
395
+ validation_logs: List[Any],
396
+ ) -> str:
397
+ """Build prompt for remediation/debug checklists."""
398
+ lines = [
399
+ "You are a remediation planner for the GAIA web development agent. "
400
+ "The project has already been scaffolded; focus exclusively on fixing outstanding issues."
401
+ ]
402
+ lines.append(f"\n**User Request**: {context.user_request}")
403
+ lines.append(f"\n**Project Directory**: {context.project_dir}")
404
+
405
+ if context.entity_name:
406
+ lines.append(f"\n**Entity**: {context.entity_name}")
407
+ if context.schema_fields:
408
+ lines.append(f"\n**Schema Fields**: {json.dumps(context.schema_fields)}")
409
+
410
+ lines.append(f"\n{project_state.to_prompt()}")
411
+
412
+ if prior_errors:
413
+ lines.append("\n**Execution Errors From Last Attempt:**")
414
+ for err in prior_errors:
415
+ lines.append(f"- {err}")
416
+
417
+ if validation_logs:
418
+ lines.append("\n**Recent Validation/Test Results:**")
419
+ raw_entries = []
420
+ for log in validation_logs[-10:]:
421
+ entry = log.to_dict() if hasattr(log, "to_dict") else log
422
+ template = entry.get("template", "unknown_step")
423
+ success = entry.get("success", True)
424
+ desc = entry.get("description", "")
425
+ status = "PASS" if success else "FAIL"
426
+ lines.append(f"- [{status}] {template}: {desc}")
427
+ if entry.get("error"):
428
+ lines.append(f" Error: {entry['error']}")
429
+ output = entry.get("output") or {}
430
+ for key in ("stdout", "stderr", "details", "message"):
431
+ if output.get(key):
432
+ snippet = str(output[key])[:200]
433
+ lines.append(f" Output: {snippet}")
434
+ break
435
+ raw_entries.append(entry)
436
+
437
+ if raw_entries:
438
+ lines.append(
439
+ "\n**Raw Validation Logs (exact text for follow-up fixes):**"
440
+ )
441
+ for entry in raw_entries:
442
+ lines.append(json.dumps(entry, ensure_ascii=False))
443
+
444
+ if context.fix_feedback:
445
+ lines.append("\n**Outstanding Fix Instructions:**")
446
+ for note in context.fix_feedback[-10:]:
447
+ lines.append(f"- {note}")
448
+
449
+ lines.append(
450
+ "\nYour job: draft a concise checklist that repairs the errors above, "
451
+ "regenerates any broken code, and re-runs critical validations."
452
+ )
453
+ lines.append(
454
+ "\n**Critical Requirements for Debug Checklists:**\n"
455
+ "1. Use `fix_code` to repair the specific files referenced in the failures above.\n"
456
+ "2. Re-run any validations or tests that previously failed once fixes are applied.\n"
457
+ "3. Always include `run_typescript_check` as the second-to-last command to capture current compiler errors.\n"
458
+ "4. Always include `validate_styles` as the final command to capture CSS/design regressions."
459
+ )
460
+
461
+ return "\n".join(lines)
462
+
463
+ def _extract_response_text(self, response: Any) -> str:
464
+ """Extract text from LLM response.
465
+
466
+ Handles different response formats from various SDKs.
467
+
468
+ Args:
469
+ response: Response from chat SDK
470
+
471
+ Returns:
472
+ Response text string
473
+ """
474
+ if isinstance(response, str):
475
+ return response
476
+
477
+ # Handle response objects with text attribute
478
+ if hasattr(response, "text"):
479
+ return response.text
480
+
481
+ # Handle response objects with content attribute
482
+ if hasattr(response, "content"):
483
+ return response.content
484
+
485
+ # Handle dict-like responses
486
+ if isinstance(response, dict):
487
+ return response.get("text", response.get("content", str(response)))
488
+
489
+ return str(response)
490
+
491
+ def _parse_checklist(self, response_text: str) -> GeneratedChecklist:
492
+ """Parse LLM response into GeneratedChecklist.
493
+
494
+ Args:
495
+ response_text: Raw LLM response text
496
+
497
+ Returns:
498
+ Parsed GeneratedChecklist
499
+ """
500
+ try:
501
+ # Try to extract JSON from the response
502
+ json_str = self._extract_json(response_text)
503
+
504
+ data = json.loads(json_str)
505
+
506
+ # Parse items
507
+ items = []
508
+ for item_data in data.get("checklist", []):
509
+ item = ChecklistItem(
510
+ template=item_data.get("template", ""),
511
+ params=item_data.get("params", {}),
512
+ description=item_data.get("description", ""),
513
+ )
514
+ items.append(item)
515
+
516
+ return GeneratedChecklist(
517
+ items=items,
518
+ reasoning=data.get("reasoning", ""),
519
+ raw_response=response_text,
520
+ )
521
+
522
+ except json.JSONDecodeError as e:
523
+ logger.error(f"Failed to parse checklist JSON: {e}")
524
+ return GeneratedChecklist(
525
+ items=[],
526
+ reasoning="",
527
+ raw_response=response_text,
528
+ validation_errors=[f"Failed to parse JSON: {str(e)}"],
529
+ )
530
+
531
+ def _extract_json(self, text: str) -> str:
532
+ """Extract JSON from text that might contain markdown or other content.
533
+
534
+ Args:
535
+ text: Text that may contain JSON
536
+
537
+ Returns:
538
+ Extracted JSON string
539
+ """
540
+ # Try to find JSON in markdown code block
541
+ code_block_match = re.search(r"```(?:json)?\s*\n?(.*?)\n?```", text, re.DOTALL)
542
+ if code_block_match:
543
+ return code_block_match.group(1).strip()
544
+
545
+ # Try to find raw JSON object
546
+ json_match = re.search(r"\{.*\}", text, re.DOTALL)
547
+ if json_match:
548
+ return json_match.group(0)
549
+
550
+ # Return as-is and let JSON parser handle it
551
+ return text.strip()
552
+
553
+ def _validate_checklist(self, checklist: GeneratedChecklist) -> None:
554
+ """Validate checklist items against template definitions.
555
+
556
+ Adds validation errors to the checklist if any are found.
557
+
558
+ Args:
559
+ checklist: Checklist to validate (modified in place)
560
+ """
561
+ for item in checklist.items:
562
+ errors = validate_checklist_item(item.template, item.params)
563
+ checklist.validation_errors.extend(errors)
564
+
565
+ # Check for duplicate templates (some are ok, like multiple API routes)
566
+ seen_templates = {}
567
+ for item in checklist.items:
568
+ key = f"{item.template}:{json.dumps(item.params, sort_keys=True)}"
569
+ if key in seen_templates:
570
+ checklist.validation_errors.append(
571
+ f"Duplicate checklist item: {item.template} with same params"
572
+ )
573
+ seen_templates[key] = True
574
+
575
+ # Validate required setup: setup_app_styling must come after create_next_app
576
+ create_app_index = None
577
+ setup_styling_index = None
578
+ setup_testing_index = None
579
+ for i, item in enumerate(checklist.items):
580
+ if item.template == "create_next_app":
581
+ create_app_index = i
582
+ if item.template == "setup_app_styling":
583
+ setup_styling_index = i
584
+ if item.template == "setup_testing":
585
+ setup_testing_index = i
586
+
587
+ if create_app_index is not None:
588
+ if setup_styling_index is None:
589
+ checklist.validation_errors.append(
590
+ "REQUIRED: 'setup_app_styling' must be included after 'create_next_app'"
591
+ )
592
+ elif setup_styling_index <= create_app_index:
593
+ checklist.validation_errors.append(
594
+ "REQUIRED: 'setup_app_styling' must come after 'create_next_app' in the checklist"
595
+ )
596
+
597
+ # Validate required testing setup: setup_testing must come after setup_app_styling
598
+ if setup_styling_index is not None:
599
+ if setup_testing_index is None:
600
+ checklist.validation_errors.append(
601
+ "REQUIRED: 'setup_testing' must be included after 'setup_app_styling'"
602
+ )
603
+ elif setup_testing_index <= setup_styling_index:
604
+ checklist.validation_errors.append(
605
+ "REQUIRED: 'setup_testing' must come after 'setup_app_styling' in the checklist"
606
+ )
607
+
608
+ # Validate required final validation commands: run_typescript_check, validate_styles
609
+ if len(checklist.items) < 2:
610
+ checklist.validation_errors.append(
611
+ "REQUIRED: Checklist must end with 'run_typescript_check', "
612
+ "'validate_styles' as the last two commands"
613
+ )
614
+ else:
615
+ last_item = checklist.items[-1]
616
+ second_last_item = checklist.items[-2]
617
+
618
+ if last_item.template != "validate_styles":
619
+ checklist.validation_errors.append(
620
+ "REQUIRED: The last command must be 'validate_styles'"
621
+ )
622
+ if second_last_item.template != "run_typescript_check":
623
+ checklist.validation_errors.append(
624
+ "REQUIRED: The second-to-last command must be 'run_typescript_check'"
625
+ )
626
+
627
+ # Validate generate_style_tests is included (after setup_testing)
628
+ generate_style_tests_index = None
629
+ for i, item in enumerate(checklist.items):
630
+ if item.template == "generate_style_tests":
631
+ generate_style_tests_index = i
632
+
633
+ if setup_testing_index is not None and generate_style_tests_index is None:
634
+ checklist.validation_errors.append(
635
+ "REQUIRED: 'generate_style_tests' must be included after 'setup_testing'"
636
+ )
637
+ elif (
638
+ generate_style_tests_index is not None
639
+ and setup_testing_index is not None
640
+ and generate_style_tests_index <= setup_testing_index
641
+ ):
642
+ checklist.validation_errors.append(
643
+ "REQUIRED: 'generate_style_tests' must come after 'setup_testing'"
644
+ )
645
+
646
+ if checklist.validation_errors:
647
+ logger.warning(
648
+ f"Checklist validation errors: {checklist.validation_errors}"
649
+ )
650
+
651
+
652
+ def create_checklist_from_workflow(
653
+ workflow_phases: List[Any],
654
+ context: UserContext,
655
+ ) -> GeneratedChecklist:
656
+ """Create a checklist from existing workflow phases (for comparison/testing).
657
+
658
+ This converts the old step-based workflow into the new checklist format,
659
+ useful for testing and migration.
660
+
661
+ Args:
662
+ workflow_phases: List of WorkflowPhase objects from factory
663
+ context: User context
664
+
665
+ Returns:
666
+ GeneratedChecklist representing the workflow
667
+ """
668
+ items = []
669
+
670
+ for phase in workflow_phases:
671
+ for step in phase.steps:
672
+ # Map step names to template names
673
+ template_map = {
674
+ "create_next_app": "create_next_app",
675
+ "setup_styling": "setup_app_styling",
676
+ "install_deps": "setup_prisma",
677
+ "setup_testing": "setup_testing",
678
+ "prisma_init": "setup_prisma",
679
+ "setup_prisma": "setup_prisma",
680
+ "manage_data_model": "generate_prisma_model",
681
+ "manage_api_endpoint": "generate_api_route",
682
+ "manage_api_endpoint_dynamic": "generate_api_route",
683
+ "manage_react_component": "generate_react_component",
684
+ "update_landing_page": "update_landing_page",
685
+ "validate_typescript": "run_typescript_check",
686
+ "validate_crud_structure": "run_typescript_check",
687
+ "test_crud_api": "run_typescript_check",
688
+ }
689
+
690
+ template_name = template_map.get(step.name, step.name)
691
+
692
+ # Extract params from step
693
+ params = {}
694
+ if hasattr(step, "get_tool_invocation"):
695
+ invocation = step.get_tool_invocation(context)
696
+ if invocation:
697
+ _, step_params = invocation
698
+ params = {
699
+ k: v for k, v in step_params.items() if k != "project_dir"
700
+ }
701
+
702
+ items.append(
703
+ ChecklistItem(
704
+ template=template_name,
705
+ params=params,
706
+ description=step.description,
707
+ )
708
+ )
709
+
710
+ return GeneratedChecklist(
711
+ items=items,
712
+ reasoning="Converted from existing workflow",
713
+ )