amd-gaia 0.15.0__py3-none-any.whl → 0.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/METADATA +222 -223
  2. amd_gaia-0.15.2.dist-info/RECORD +182 -0
  3. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/WHEEL +1 -1
  4. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/entry_points.txt +1 -0
  5. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/licenses/LICENSE.md +20 -20
  6. gaia/__init__.py +29 -29
  7. gaia/agents/__init__.py +19 -19
  8. gaia/agents/base/__init__.py +9 -9
  9. gaia/agents/base/agent.py +2132 -2177
  10. gaia/agents/base/api_agent.py +119 -120
  11. gaia/agents/base/console.py +1967 -1841
  12. gaia/agents/base/errors.py +237 -237
  13. gaia/agents/base/mcp_agent.py +86 -86
  14. gaia/agents/base/tools.py +88 -83
  15. gaia/agents/blender/__init__.py +7 -0
  16. gaia/agents/blender/agent.py +553 -556
  17. gaia/agents/blender/agent_simple.py +133 -135
  18. gaia/agents/blender/app.py +211 -211
  19. gaia/agents/blender/app_simple.py +41 -41
  20. gaia/agents/blender/core/__init__.py +16 -16
  21. gaia/agents/blender/core/materials.py +506 -506
  22. gaia/agents/blender/core/objects.py +316 -316
  23. gaia/agents/blender/core/rendering.py +225 -225
  24. gaia/agents/blender/core/scene.py +220 -220
  25. gaia/agents/blender/core/view.py +146 -146
  26. gaia/agents/chat/__init__.py +9 -9
  27. gaia/agents/chat/agent.py +809 -835
  28. gaia/agents/chat/app.py +1065 -1058
  29. gaia/agents/chat/session.py +508 -508
  30. gaia/agents/chat/tools/__init__.py +15 -15
  31. gaia/agents/chat/tools/file_tools.py +96 -96
  32. gaia/agents/chat/tools/rag_tools.py +1744 -1729
  33. gaia/agents/chat/tools/shell_tools.py +437 -436
  34. gaia/agents/code/__init__.py +7 -7
  35. gaia/agents/code/agent.py +549 -549
  36. gaia/agents/code/cli.py +377 -0
  37. gaia/agents/code/models.py +135 -135
  38. gaia/agents/code/orchestration/__init__.py +24 -24
  39. gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
  40. gaia/agents/code/orchestration/checklist_generator.py +713 -713
  41. gaia/agents/code/orchestration/factories/__init__.py +9 -9
  42. gaia/agents/code/orchestration/factories/base.py +63 -63
  43. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
  44. gaia/agents/code/orchestration/factories/python_factory.py +106 -106
  45. gaia/agents/code/orchestration/orchestrator.py +841 -841
  46. gaia/agents/code/orchestration/project_analyzer.py +391 -391
  47. gaia/agents/code/orchestration/steps/__init__.py +67 -67
  48. gaia/agents/code/orchestration/steps/base.py +188 -188
  49. gaia/agents/code/orchestration/steps/error_handler.py +314 -314
  50. gaia/agents/code/orchestration/steps/nextjs.py +828 -828
  51. gaia/agents/code/orchestration/steps/python.py +307 -307
  52. gaia/agents/code/orchestration/template_catalog.py +469 -469
  53. gaia/agents/code/orchestration/workflows/__init__.py +14 -14
  54. gaia/agents/code/orchestration/workflows/base.py +80 -80
  55. gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
  56. gaia/agents/code/orchestration/workflows/python.py +94 -94
  57. gaia/agents/code/prompts/__init__.py +11 -11
  58. gaia/agents/code/prompts/base_prompt.py +77 -77
  59. gaia/agents/code/prompts/code_patterns.py +2034 -2036
  60. gaia/agents/code/prompts/nextjs_prompt.py +40 -40
  61. gaia/agents/code/prompts/python_prompt.py +109 -109
  62. gaia/agents/code/schema_inference.py +365 -365
  63. gaia/agents/code/system_prompt.py +41 -41
  64. gaia/agents/code/tools/__init__.py +42 -42
  65. gaia/agents/code/tools/cli_tools.py +1138 -1138
  66. gaia/agents/code/tools/code_formatting.py +319 -319
  67. gaia/agents/code/tools/code_tools.py +769 -769
  68. gaia/agents/code/tools/error_fixing.py +1347 -1347
  69. gaia/agents/code/tools/external_tools.py +180 -180
  70. gaia/agents/code/tools/file_io.py +845 -845
  71. gaia/agents/code/tools/prisma_tools.py +190 -190
  72. gaia/agents/code/tools/project_management.py +1016 -1016
  73. gaia/agents/code/tools/testing.py +321 -321
  74. gaia/agents/code/tools/typescript_tools.py +122 -122
  75. gaia/agents/code/tools/validation_parsing.py +461 -461
  76. gaia/agents/code/tools/validation_tools.py +806 -806
  77. gaia/agents/code/tools/web_dev_tools.py +1758 -1758
  78. gaia/agents/code/validators/__init__.py +16 -16
  79. gaia/agents/code/validators/antipattern_checker.py +241 -241
  80. gaia/agents/code/validators/ast_analyzer.py +197 -197
  81. gaia/agents/code/validators/requirements_validator.py +145 -145
  82. gaia/agents/code/validators/syntax_validator.py +171 -171
  83. gaia/agents/docker/__init__.py +7 -7
  84. gaia/agents/docker/agent.py +643 -642
  85. gaia/agents/emr/__init__.py +8 -8
  86. gaia/agents/emr/agent.py +1504 -1506
  87. gaia/agents/emr/cli.py +1322 -1322
  88. gaia/agents/emr/constants.py +475 -475
  89. gaia/agents/emr/dashboard/__init__.py +4 -4
  90. gaia/agents/emr/dashboard/server.py +1972 -1974
  91. gaia/agents/jira/__init__.py +11 -11
  92. gaia/agents/jira/agent.py +894 -894
  93. gaia/agents/jira/jql_templates.py +299 -299
  94. gaia/agents/routing/__init__.py +7 -7
  95. gaia/agents/routing/agent.py +567 -570
  96. gaia/agents/routing/system_prompt.py +75 -75
  97. gaia/agents/summarize/__init__.py +11 -0
  98. gaia/agents/summarize/agent.py +885 -0
  99. gaia/agents/summarize/prompts.py +129 -0
  100. gaia/api/__init__.py +23 -23
  101. gaia/api/agent_registry.py +238 -238
  102. gaia/api/app.py +305 -305
  103. gaia/api/openai_server.py +575 -575
  104. gaia/api/schemas.py +186 -186
  105. gaia/api/sse_handler.py +373 -373
  106. gaia/apps/__init__.py +4 -4
  107. gaia/apps/llm/__init__.py +6 -6
  108. gaia/apps/llm/app.py +184 -169
  109. gaia/apps/summarize/app.py +116 -633
  110. gaia/apps/summarize/html_viewer.py +133 -133
  111. gaia/apps/summarize/pdf_formatter.py +284 -284
  112. gaia/audio/__init__.py +2 -2
  113. gaia/audio/audio_client.py +439 -439
  114. gaia/audio/audio_recorder.py +269 -269
  115. gaia/audio/kokoro_tts.py +599 -599
  116. gaia/audio/whisper_asr.py +432 -432
  117. gaia/chat/__init__.py +16 -16
  118. gaia/chat/app.py +428 -430
  119. gaia/chat/prompts.py +522 -522
  120. gaia/chat/sdk.py +1228 -1225
  121. gaia/cli.py +5659 -5632
  122. gaia/database/__init__.py +10 -10
  123. gaia/database/agent.py +176 -176
  124. gaia/database/mixin.py +290 -290
  125. gaia/database/testing.py +64 -64
  126. gaia/eval/batch_experiment.py +2332 -2332
  127. gaia/eval/claude.py +542 -542
  128. gaia/eval/config.py +37 -37
  129. gaia/eval/email_generator.py +512 -512
  130. gaia/eval/eval.py +3179 -3179
  131. gaia/eval/groundtruth.py +1130 -1130
  132. gaia/eval/transcript_generator.py +582 -582
  133. gaia/eval/webapp/README.md +167 -167
  134. gaia/eval/webapp/package-lock.json +875 -875
  135. gaia/eval/webapp/package.json +20 -20
  136. gaia/eval/webapp/public/app.js +3402 -3402
  137. gaia/eval/webapp/public/index.html +87 -87
  138. gaia/eval/webapp/public/styles.css +3661 -3661
  139. gaia/eval/webapp/server.js +415 -415
  140. gaia/eval/webapp/test-setup.js +72 -72
  141. gaia/installer/__init__.py +23 -0
  142. gaia/installer/init_command.py +1275 -0
  143. gaia/installer/lemonade_installer.py +619 -0
  144. gaia/llm/__init__.py +10 -2
  145. gaia/llm/base_client.py +60 -0
  146. gaia/llm/exceptions.py +12 -0
  147. gaia/llm/factory.py +70 -0
  148. gaia/llm/lemonade_client.py +3421 -3221
  149. gaia/llm/lemonade_manager.py +294 -294
  150. gaia/llm/providers/__init__.py +9 -0
  151. gaia/llm/providers/claude.py +108 -0
  152. gaia/llm/providers/lemonade.py +118 -0
  153. gaia/llm/providers/openai_provider.py +79 -0
  154. gaia/llm/vlm_client.py +382 -382
  155. gaia/logger.py +189 -189
  156. gaia/mcp/agent_mcp_server.py +245 -245
  157. gaia/mcp/blender_mcp_client.py +138 -138
  158. gaia/mcp/blender_mcp_server.py +648 -648
  159. gaia/mcp/context7_cache.py +332 -332
  160. gaia/mcp/external_services.py +518 -518
  161. gaia/mcp/mcp_bridge.py +811 -550
  162. gaia/mcp/servers/__init__.py +6 -6
  163. gaia/mcp/servers/docker_mcp.py +83 -83
  164. gaia/perf_analysis.py +361 -0
  165. gaia/rag/__init__.py +10 -10
  166. gaia/rag/app.py +293 -293
  167. gaia/rag/demo.py +304 -304
  168. gaia/rag/pdf_utils.py +235 -235
  169. gaia/rag/sdk.py +2194 -2194
  170. gaia/security.py +183 -163
  171. gaia/talk/app.py +287 -289
  172. gaia/talk/sdk.py +538 -538
  173. gaia/testing/__init__.py +87 -87
  174. gaia/testing/assertions.py +330 -330
  175. gaia/testing/fixtures.py +333 -333
  176. gaia/testing/mocks.py +493 -493
  177. gaia/util.py +46 -46
  178. gaia/utils/__init__.py +33 -33
  179. gaia/utils/file_watcher.py +675 -675
  180. gaia/utils/parsing.py +223 -223
  181. gaia/version.py +100 -100
  182. amd_gaia-0.15.0.dist-info/RECORD +0 -168
  183. gaia/agents/code/app.py +0 -266
  184. gaia/llm/llm_client.py +0 -723
  185. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/top_level.txt +0 -0
@@ -1,713 +1,713 @@
1
- # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
- # SPDX-License-Identifier: MIT
3
- """Checklist Generator for LLM-Driven Code Generation.
4
-
5
- This module uses an LLM to generate a checklist of template invocations
6
- based on the user's request and the available template catalog.
7
-
8
- The generator:
9
- 1. Receives user request and project context
10
- 2. Sends prompt to LLM with template catalog
11
- 3. Parses LLM response into structured checklist
12
- 4. Validates checklist items against template definitions
13
-
14
- The resulting checklist is then executed deterministically by
15
- ChecklistExecutor.
16
- """
17
-
18
- import json
19
- import logging
20
- import re
21
- from dataclasses import dataclass, field
22
- from typing import Any, Dict, List, Optional, Protocol
23
-
24
- from .steps.base import UserContext
25
- from .template_catalog import get_catalog_prompt, validate_checklist_item
26
-
27
- logger = logging.getLogger(__name__)
28
-
29
-
30
- class ChatSDK(Protocol):
31
- """Protocol for chat SDK interface."""
32
-
33
- def send(self, message: str, timeout: int = 600, no_history: bool = False) -> Any:
34
- """Send a message and get response."""
35
- ...
36
-
37
-
38
- @dataclass
39
- class ChecklistItem:
40
- """Single item in the generated checklist.
41
-
42
- Represents a template invocation with its parameters and
43
- the LLM's reasoning for including it.
44
- """
45
-
46
- template: str
47
- params: Dict[str, Any]
48
- description: str
49
-
50
- def to_dict(self) -> Dict[str, Any]:
51
- """Convert to dictionary representation."""
52
- return {
53
- "template": self.template,
54
- "params": self.params,
55
- "description": self.description,
56
- }
57
-
58
-
59
- @dataclass
60
- class GeneratedChecklist:
61
- """Complete checklist generated by LLM.
62
-
63
- Contains the list of template invocations and the LLM's
64
- overall reasoning for the chosen approach.
65
- """
66
-
67
- items: List[ChecklistItem]
68
- reasoning: str
69
- raw_response: str = ""
70
- validation_errors: List[str] = field(default_factory=list)
71
-
72
- @property
73
- def is_valid(self) -> bool:
74
- """Check if checklist passed validation."""
75
- return len(self.validation_errors) == 0
76
-
77
- def to_dict(self) -> Dict[str, Any]:
78
- """Convert to dictionary representation."""
79
- return {
80
- "reasoning": self.reasoning,
81
- "checklist": [item.to_dict() for item in self.items],
82
- "is_valid": self.is_valid,
83
- "validation_errors": self.validation_errors,
84
- }
85
-
86
-
87
- @dataclass
88
- class ProjectState:
89
- """Current state of the project for context."""
90
-
91
- exists: bool = False
92
- has_package_json: bool = False
93
- has_prisma: bool = False
94
- has_next_config: bool = False
95
- existing_models: List[str] = field(default_factory=list)
96
- existing_routes: List[str] = field(default_factory=list)
97
- existing_pages: List[str] = field(default_factory=list)
98
-
99
- def to_prompt(self) -> str:
100
- """Generate prompt-friendly description of project state."""
101
- if not self.exists:
102
- return "Project does not exist yet - will be created fresh."
103
-
104
- lines = ["Current project state:"]
105
-
106
- if self.has_package_json:
107
- lines.append("- ✓ package.json exists (Node.js project)")
108
- if self.has_next_config:
109
- lines.append("- ✓ next.config.ts exists (Next.js configured)")
110
- if self.has_prisma:
111
- lines.append("- ✓ Prisma configured")
112
- if self.existing_models:
113
- lines.append(f" - Models: {', '.join(self.existing_models)}")
114
-
115
- if self.existing_routes:
116
- lines.append(f"- Existing API routes: {', '.join(self.existing_routes)}")
117
-
118
- if self.existing_pages:
119
- lines.append(f"- Existing pages: {', '.join(self.existing_pages)}")
120
-
121
- return "\n".join(lines)
122
-
123
-
124
- CHECKLIST_SYSTEM_PROMPT = """You are a code generation planner. Your task is to analyze the user's request and generate a checklist of template invocations that will create the requested application.
125
-
126
- {catalog_prompt}
127
-
128
- ## Instructions
129
-
130
- 1. Analyze the user's request carefully
131
- 2. Consider what the user ACTUALLY wants (semantic understanding)
132
- 3. Select templates that will fulfill the request
133
- 4. Add semantic enhancements based on the request type:
134
- - For "todo" apps: add checkboxes for completion status
135
- - For "blog" apps: add date fields for posts
136
- - For "e-commerce": add price, inventory fields
137
- 5. Ensure dependencies are satisfied (run setup before data, data before API, API before UI)
138
- 6. Generate a complete checklist that creates a working application
139
- 7. When follow-up fixes are requested, use `fix_code` to repair the specific files called out by validation/test logs. Extract the precise file paths and line numbers from the errors (see the Raw Validation Logs) and pass those line numbers inside the error description so the fixer knows exactly where to focus. Always reference the latest findings to decide which fixes to schedule before running validations again.
140
- 8. If the user explicitly requests an additional UI artifact (countdown display, stats badge, etc.), schedule a `generate_react_component` step with the appropriate `artifact-*` variant (e.g., `"artifact-timer"`) and a descriptive `component_name`. Keep the artifact's logic inside that client component—server components like `page.tsx` should only render the artifact and pass any required props.
141
- 9. **Route pairing requirement:** Whenever you schedule `{{"template": "generate_api_route", "params": {{"type": "collection", ...}}}}`, you MUST also include a matching `generate_api_route` item with `"type": "item"` for the same resource so detail pages can call `/api/<resource>/[id]`.
142
-
143
- This workflow repeats until all validations pass, so each checklist should either advance new functionality or explicitly repair the failures reported in the latest validation logs.
144
-
145
- ## IMPORTANT: Complete CRUD Applications
146
-
147
- For any app that manages resources (todos, posts, users, etc.), you MUST generate ALL of these UI components:
148
-
149
- 1. **Form component** (variant: "form") - Reusable form for create (generate this first so other pages can import it)
150
- 2. **Artifact components** (variant: "artifact-*") - Any additional UI artifacts requested by the user (e.g., countdown display, stats badge). Generate these before any page that consumes them.
151
- 3. **New page** (variant: "new") - Create new item at /resources/new
152
- 4. **Edit page** (variant: "detail") - Edit single item at /resources/[id] with pre-populated form
153
- 5. **List page** (variant: "list") - Main page showing all items at /resources
154
-
155
- Missing any of the required components (form, new, detail, list) will result in broken navigation! When artifacts are requested, they must also be generated or the UI will be incomplete.
156
-
157
- ## REQUIRED: Setup and Validation Commands
158
-
159
- **CRITICAL**: The following commands are REQUIRED for a valid plan:
160
-
161
- 1. **setup_app_styling** MUST be included after creating the application (after `create_next_app`). This configures app-wide styling with modern dark theme design system.
162
-
163
- 2. **setup_testing** MUST be included after `setup_app_styling`. This sets up the testing infrastructure.
164
-
165
- 3. **generate_style_tests** MUST be included after `setup_testing`. This generates CSS integrity tests that validate the design system.
166
-
167
- 4. **The final 2 commands MUST be in this exact order:**
168
- - Second-to-last: `run_typescript_check` (validates TypeScript compilation)
169
- - Last: `validate_styles` (validates CSS files and design system)
170
-
171
- These setup and validation commands are mandatory - a plan without them is INVALID.
172
-
173
- ## Output Format
174
-
175
- Respond with ONLY a JSON object (no markdown code blocks):
176
- {{
177
- "reasoning": "Brief explanation of your approach and any semantic enhancements",
178
- "checklist": [
179
- {{"template": "template_name", "params": {{}}, "description": "Why this step is needed"}}
180
- ]
181
- }}
182
-
183
- Important:
184
- - Use exact template names from the catalog
185
- - Provide all required parameters
186
- - Order items by dependency (setup first, then data, then API, then UI)
187
- - REQUIRED ordering for `generate_react_component`: emit all non-`list` variants (form, new, detail, actions, artifact-*) before the `variant: "list"` call so the list can import previously generated components
188
- - Add semantic enhancements that make the app intuitive (e.g., checkboxes for todos)
189
- - For CRUD apps, ALWAYS include all 4 UI variants: list, form, new, detail
190
- - REQUIRED: Include `setup_app_styling` after `create_next_app`
191
- - REQUIRED: Include `setup_testing` after `setup_app_styling`
192
- - REQUIRED: Include `generate_style_tests` after `setup_testing`
193
- - REQUIRED: End with `run_typescript_check`, then `validate_styles` as the last 2 commands
194
- - When converting a raw validation error into `fix_code`, copy the exact snippet (file, line, column, and message). For example:
195
-
196
- Raw Validation Logs (example):
197
- ```
198
- {{"template": "run_typescript_check", "output": {{"errors": "path/to/File.tsx(10,5): error TS1234: <error text>\\n"}}}}
199
- ```
200
-
201
- Corresponding checklist item:
202
- ```
203
- {{
204
- "template": "fix_code",
205
- "params": {{
206
- "file_path": "path/to/File.tsx",
207
- "error_description": "path/to/File.tsx(10,5): error TS1234: <error text>"
208
- }},
209
- "description": "Fix the TypeScript compiler error reported for File.tsx."
210
- }}
211
- ```
212
- Always keep the error text verbatim so the fixer knows exactly where to edit."""
213
-
214
-
215
- class ChecklistGenerator:
216
- """Generate execution checklist using LLM.
217
-
218
- The generator sends the user request, project state, and template
219
- catalog to an LLM, which returns a structured checklist of template
220
- invocations.
221
- """
222
-
223
- def __init__(self, chat_sdk: ChatSDK):
224
- """Initialize the checklist generator.
225
-
226
- Args:
227
- chat_sdk: Chat SDK instance for LLM communication
228
- """
229
- self.chat = chat_sdk
230
-
231
- def generate_initial_checklist(
232
- self,
233
- context: UserContext,
234
- project_state: Optional[ProjectState] = None,
235
- ) -> GeneratedChecklist:
236
- """Generate the initial project-scaffolding checklist."""
237
- if project_state is None:
238
- project_state = ProjectState()
239
-
240
- system_prompt = CHECKLIST_SYSTEM_PROMPT.format(
241
- catalog_prompt=get_catalog_prompt()
242
- )
243
- user_prompt = self._build_initial_prompt(context, project_state)
244
- full_prompt = f"{system_prompt}\n\n## User Request\n\n{user_prompt}"
245
- return self._generate_from_prompt(full_prompt)
246
-
247
- def generate_debug_checklist(
248
- self,
249
- context: UserContext,
250
- project_state: Optional[ProjectState],
251
- prior_errors: Optional[List[str]],
252
- validation_logs: Optional[List[Any]],
253
- ) -> GeneratedChecklist:
254
- """Generate a remediation checklist to fix outstanding errors."""
255
- if project_state is None:
256
- project_state = ProjectState()
257
-
258
- debug_prompt = self._build_debug_prompt(
259
- context=context,
260
- project_state=project_state,
261
- prior_errors=prior_errors or [],
262
- validation_logs=validation_logs or [],
263
- )
264
- system_prompt = CHECKLIST_SYSTEM_PROMPT.format(
265
- catalog_prompt=get_catalog_prompt()
266
- )
267
- full_prompt = f"{system_prompt}\n\n## Remediation Context\n\n{debug_prompt}"
268
- return self._generate_from_prompt(full_prompt)
269
-
270
- def _generate_from_prompt(self, full_prompt: str) -> GeneratedChecklist:
271
- """Common checklist generation logic with retries."""
272
- logger.debug("Generating checklist with LLM...")
273
- logger.debug(f"Checklist prompt: {full_prompt}")
274
-
275
- max_attempts = 3
276
- last_failure_reason = "unknown error"
277
-
278
- for attempt in range(1, max_attempts + 1):
279
- try:
280
- response = self.chat.send(full_prompt, timeout=1200)
281
-
282
- response_text = self._extract_response_text(response)
283
-
284
- logger.debug(f"LLM response (attempt {attempt}): {response_text}")
285
-
286
- checklist = self._parse_checklist(response_text)
287
- except Exception as exc: # pylint: disable=broad-exception-caught
288
- last_failure_reason = str(exc)
289
- logger.warning(
290
- "Checklist generation attempt %d/%d failed: %s",
291
- attempt,
292
- max_attempts,
293
- exc,
294
- )
295
- continue
296
-
297
- if not checklist.items:
298
- last_failure_reason = "LLM returned an empty checklist"
299
- logger.warning(
300
- "Checklist generation attempt %d/%d returned no items, retrying...",
301
- attempt,
302
- max_attempts,
303
- )
304
- continue
305
-
306
- self._validate_checklist(checklist)
307
- if checklist.validation_errors:
308
- last_failure_reason = "; ".join(checklist.validation_errors)
309
- logger.warning(
310
- "Checklist generation attempt %d/%d failed validation: %s",
311
- attempt,
312
- max_attempts,
313
- checklist.validation_errors,
314
- )
315
- continue
316
-
317
- logger.debug(
318
- "Generated checklist with %d items on attempt %d",
319
- len(checklist.items),
320
- attempt,
321
- )
322
- return checklist
323
-
324
- raise RuntimeError(
325
- f"Failed to generate a valid checklist after {max_attempts} attempts: "
326
- f"{last_failure_reason}"
327
- )
328
-
329
- def _build_initial_prompt(
330
- self,
331
- context: UserContext,
332
- project_state: ProjectState,
333
- ) -> str:
334
- """Build the user prompt with all context.
335
-
336
- Args:
337
- context: User context
338
- project_state: Current project state
339
-
340
- Returns:
341
- Formatted user prompt string
342
- """
343
- lines = [f"**User Request**: {context.user_request}"]
344
-
345
- if context.entity_name:
346
- lines.append(f"\n**Inferred Entity**: {context.entity_name}")
347
-
348
- if context.schema_fields:
349
- lines.append(f"\n**Inferred Fields**: {json.dumps(context.schema_fields)}")
350
-
351
- lines.append(f"\n**Project Directory**: {context.project_dir}")
352
- lines.append(f"\n**Language**: {context.language}")
353
- lines.append(f"\n**Project Type**: {context.project_type}")
354
-
355
- lines.append(f"\n{project_state.to_prompt()}")
356
-
357
- if context.fix_feedback:
358
- lines.append("\n**Outstanding Fix Requests**:")
359
- for note in context.fix_feedback[-5:]:
360
- lines.append(f"- {note}")
361
-
362
- if context.validation_reports:
363
- lines.append("\n**Recent Validation/Test Findings**:")
364
- for log in context.validation_reports[-5:]:
365
- status = "PASS" if log.get("success", True) else "FAIL"
366
- template = log.get("template", "validation_step")
367
- description = log.get("description", "")
368
- lines.append(f"- [{status}] {template}: {description}")
369
- if log.get("error"):
370
- lines.append(f" Error: {log['error']}")
371
-
372
- output = log.get("output", {})
373
- snippet = ""
374
- if isinstance(output, dict):
375
- for key in ("stdout", "stderr", "message", "details"):
376
- if output.get(key):
377
- snippet = str(output[key])[:200]
378
- break
379
- if not snippet and output:
380
- snippet = json.dumps(output)[:200]
381
- elif output:
382
- snippet = str(output)[:200]
383
- if snippet:
384
- lines.append(f" Output: {snippet}")
385
-
386
- lines.append("\nGenerate a checklist to fulfill this request.")
387
-
388
- return "\n".join(lines)
389
-
390
- def _build_debug_prompt(
391
- self,
392
- context: UserContext,
393
- project_state: ProjectState,
394
- prior_errors: List[str],
395
- validation_logs: List[Any],
396
- ) -> str:
397
- """Build prompt for remediation/debug checklists."""
398
- lines = [
399
- "You are a remediation planner for the GAIA web development agent. "
400
- "The project has already been scaffolded; focus exclusively on fixing outstanding issues."
401
- ]
402
- lines.append(f"\n**User Request**: {context.user_request}")
403
- lines.append(f"\n**Project Directory**: {context.project_dir}")
404
-
405
- if context.entity_name:
406
- lines.append(f"\n**Entity**: {context.entity_name}")
407
- if context.schema_fields:
408
- lines.append(f"\n**Schema Fields**: {json.dumps(context.schema_fields)}")
409
-
410
- lines.append(f"\n{project_state.to_prompt()}")
411
-
412
- if prior_errors:
413
- lines.append("\n**Execution Errors From Last Attempt:**")
414
- for err in prior_errors:
415
- lines.append(f"- {err}")
416
-
417
- if validation_logs:
418
- lines.append("\n**Recent Validation/Test Results:**")
419
- raw_entries = []
420
- for log in validation_logs[-10:]:
421
- entry = log.to_dict() if hasattr(log, "to_dict") else log
422
- template = entry.get("template", "unknown_step")
423
- success = entry.get("success", True)
424
- desc = entry.get("description", "")
425
- status = "PASS" if success else "FAIL"
426
- lines.append(f"- [{status}] {template}: {desc}")
427
- if entry.get("error"):
428
- lines.append(f" Error: {entry['error']}")
429
- output = entry.get("output") or {}
430
- for key in ("stdout", "stderr", "details", "message"):
431
- if output.get(key):
432
- snippet = str(output[key])[:200]
433
- lines.append(f" Output: {snippet}")
434
- break
435
- raw_entries.append(entry)
436
-
437
- if raw_entries:
438
- lines.append(
439
- "\n**Raw Validation Logs (exact text for follow-up fixes):**"
440
- )
441
- for entry in raw_entries:
442
- lines.append(json.dumps(entry, ensure_ascii=False))
443
-
444
- if context.fix_feedback:
445
- lines.append("\n**Outstanding Fix Instructions:**")
446
- for note in context.fix_feedback[-10:]:
447
- lines.append(f"- {note}")
448
-
449
- lines.append(
450
- "\nYour job: draft a concise checklist that repairs the errors above, "
451
- "regenerates any broken code, and re-runs critical validations."
452
- )
453
- lines.append(
454
- "\n**Critical Requirements for Debug Checklists:**\n"
455
- "1. Use `fix_code` to repair the specific files referenced in the failures above.\n"
456
- "2. Re-run any validations or tests that previously failed once fixes are applied.\n"
457
- "3. Always include `run_typescript_check` as the second-to-last command to capture current compiler errors.\n"
458
- "4. Always include `validate_styles` as the final command to capture CSS/design regressions."
459
- )
460
-
461
- return "\n".join(lines)
462
-
463
- def _extract_response_text(self, response: Any) -> str:
464
- """Extract text from LLM response.
465
-
466
- Handles different response formats from various SDKs.
467
-
468
- Args:
469
- response: Response from chat SDK
470
-
471
- Returns:
472
- Response text string
473
- """
474
- if isinstance(response, str):
475
- return response
476
-
477
- # Handle response objects with text attribute
478
- if hasattr(response, "text"):
479
- return response.text
480
-
481
- # Handle response objects with content attribute
482
- if hasattr(response, "content"):
483
- return response.content
484
-
485
- # Handle dict-like responses
486
- if isinstance(response, dict):
487
- return response.get("text", response.get("content", str(response)))
488
-
489
- return str(response)
490
-
491
- def _parse_checklist(self, response_text: str) -> GeneratedChecklist:
492
- """Parse LLM response into GeneratedChecklist.
493
-
494
- Args:
495
- response_text: Raw LLM response text
496
-
497
- Returns:
498
- Parsed GeneratedChecklist
499
- """
500
- try:
501
- # Try to extract JSON from the response
502
- json_str = self._extract_json(response_text)
503
-
504
- data = json.loads(json_str)
505
-
506
- # Parse items
507
- items = []
508
- for item_data in data.get("checklist", []):
509
- item = ChecklistItem(
510
- template=item_data.get("template", ""),
511
- params=item_data.get("params", {}),
512
- description=item_data.get("description", ""),
513
- )
514
- items.append(item)
515
-
516
- return GeneratedChecklist(
517
- items=items,
518
- reasoning=data.get("reasoning", ""),
519
- raw_response=response_text,
520
- )
521
-
522
- except json.JSONDecodeError as e:
523
- logger.error(f"Failed to parse checklist JSON: {e}")
524
- return GeneratedChecklist(
525
- items=[],
526
- reasoning="",
527
- raw_response=response_text,
528
- validation_errors=[f"Failed to parse JSON: {str(e)}"],
529
- )
530
-
531
- def _extract_json(self, text: str) -> str:
532
- """Extract JSON from text that might contain markdown or other content.
533
-
534
- Args:
535
- text: Text that may contain JSON
536
-
537
- Returns:
538
- Extracted JSON string
539
- """
540
- # Try to find JSON in markdown code block
541
- code_block_match = re.search(r"```(?:json)?\s*\n?(.*?)\n?```", text, re.DOTALL)
542
- if code_block_match:
543
- return code_block_match.group(1).strip()
544
-
545
- # Try to find raw JSON object
546
- json_match = re.search(r"\{.*\}", text, re.DOTALL)
547
- if json_match:
548
- return json_match.group(0)
549
-
550
- # Return as-is and let JSON parser handle it
551
- return text.strip()
552
-
553
- def _validate_checklist(self, checklist: GeneratedChecklist) -> None:
554
- """Validate checklist items against template definitions.
555
-
556
- Adds validation errors to the checklist if any are found.
557
-
558
- Args:
559
- checklist: Checklist to validate (modified in place)
560
- """
561
- for item in checklist.items:
562
- errors = validate_checklist_item(item.template, item.params)
563
- checklist.validation_errors.extend(errors)
564
-
565
- # Check for duplicate templates (some are ok, like multiple API routes)
566
- seen_templates = {}
567
- for item in checklist.items:
568
- key = f"{item.template}:{json.dumps(item.params, sort_keys=True)}"
569
- if key in seen_templates:
570
- checklist.validation_errors.append(
571
- f"Duplicate checklist item: {item.template} with same params"
572
- )
573
- seen_templates[key] = True
574
-
575
- # Validate required setup: setup_app_styling must come after create_next_app
576
- create_app_index = None
577
- setup_styling_index = None
578
- setup_testing_index = None
579
- for i, item in enumerate(checklist.items):
580
- if item.template == "create_next_app":
581
- create_app_index = i
582
- if item.template == "setup_app_styling":
583
- setup_styling_index = i
584
- if item.template == "setup_testing":
585
- setup_testing_index = i
586
-
587
- if create_app_index is not None:
588
- if setup_styling_index is None:
589
- checklist.validation_errors.append(
590
- "REQUIRED: 'setup_app_styling' must be included after 'create_next_app'"
591
- )
592
- elif setup_styling_index <= create_app_index:
593
- checklist.validation_errors.append(
594
- "REQUIRED: 'setup_app_styling' must come after 'create_next_app' in the checklist"
595
- )
596
-
597
- # Validate required testing setup: setup_testing must come after setup_app_styling
598
- if setup_styling_index is not None:
599
- if setup_testing_index is None:
600
- checklist.validation_errors.append(
601
- "REQUIRED: 'setup_testing' must be included after 'setup_app_styling'"
602
- )
603
- elif setup_testing_index <= setup_styling_index:
604
- checklist.validation_errors.append(
605
- "REQUIRED: 'setup_testing' must come after 'setup_app_styling' in the checklist"
606
- )
607
-
608
- # Validate required final validation commands: run_typescript_check, validate_styles
609
- if len(checklist.items) < 2:
610
- checklist.validation_errors.append(
611
- "REQUIRED: Checklist must end with 'run_typescript_check', "
612
- "'validate_styles' as the last two commands"
613
- )
614
- else:
615
- last_item = checklist.items[-1]
616
- second_last_item = checklist.items[-2]
617
-
618
- if last_item.template != "validate_styles":
619
- checklist.validation_errors.append(
620
- "REQUIRED: The last command must be 'validate_styles'"
621
- )
622
- if second_last_item.template != "run_typescript_check":
623
- checklist.validation_errors.append(
624
- "REQUIRED: The second-to-last command must be 'run_typescript_check'"
625
- )
626
-
627
- # Validate generate_style_tests is included (after setup_testing)
628
- generate_style_tests_index = None
629
- for i, item in enumerate(checklist.items):
630
- if item.template == "generate_style_tests":
631
- generate_style_tests_index = i
632
-
633
- if setup_testing_index is not None and generate_style_tests_index is None:
634
- checklist.validation_errors.append(
635
- "REQUIRED: 'generate_style_tests' must be included after 'setup_testing'"
636
- )
637
- elif (
638
- generate_style_tests_index is not None
639
- and setup_testing_index is not None
640
- and generate_style_tests_index <= setup_testing_index
641
- ):
642
- checklist.validation_errors.append(
643
- "REQUIRED: 'generate_style_tests' must come after 'setup_testing'"
644
- )
645
-
646
- if checklist.validation_errors:
647
- logger.warning(
648
- f"Checklist validation errors: {checklist.validation_errors}"
649
- )
650
-
651
-
652
- def create_checklist_from_workflow(
653
- workflow_phases: List[Any],
654
- context: UserContext,
655
- ) -> GeneratedChecklist:
656
- """Create a checklist from existing workflow phases (for comparison/testing).
657
-
658
- This converts the old step-based workflow into the new checklist format,
659
- useful for testing and migration.
660
-
661
- Args:
662
- workflow_phases: List of WorkflowPhase objects from factory
663
- context: User context
664
-
665
- Returns:
666
- GeneratedChecklist representing the workflow
667
- """
668
- items = []
669
-
670
- for phase in workflow_phases:
671
- for step in phase.steps:
672
- # Map step names to template names
673
- template_map = {
674
- "create_next_app": "create_next_app",
675
- "setup_styling": "setup_app_styling",
676
- "install_deps": "setup_prisma",
677
- "setup_testing": "setup_testing",
678
- "prisma_init": "setup_prisma",
679
- "setup_prisma": "setup_prisma",
680
- "manage_data_model": "generate_prisma_model",
681
- "manage_api_endpoint": "generate_api_route",
682
- "manage_api_endpoint_dynamic": "generate_api_route",
683
- "manage_react_component": "generate_react_component",
684
- "update_landing_page": "update_landing_page",
685
- "validate_typescript": "run_typescript_check",
686
- "validate_crud_structure": "run_typescript_check",
687
- "test_crud_api": "run_typescript_check",
688
- }
689
-
690
- template_name = template_map.get(step.name, step.name)
691
-
692
- # Extract params from step
693
- params = {}
694
- if hasattr(step, "get_tool_invocation"):
695
- invocation = step.get_tool_invocation(context)
696
- if invocation:
697
- _, step_params = invocation
698
- params = {
699
- k: v for k, v in step_params.items() if k != "project_dir"
700
- }
701
-
702
- items.append(
703
- ChecklistItem(
704
- template=template_name,
705
- params=params,
706
- description=step.description,
707
- )
708
- )
709
-
710
- return GeneratedChecklist(
711
- items=items,
712
- reasoning="Converted from existing workflow",
713
- )
1
+ # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+ """Checklist Generator for LLM-Driven Code Generation.
4
+
5
+ This module uses an LLM to generate a checklist of template invocations
6
+ based on the user's request and the available template catalog.
7
+
8
+ The generator:
9
+ 1. Receives user request and project context
10
+ 2. Sends prompt to LLM with template catalog
11
+ 3. Parses LLM response into structured checklist
12
+ 4. Validates checklist items against template definitions
13
+
14
+ The resulting checklist is then executed deterministically by
15
+ ChecklistExecutor.
16
+ """
17
+
18
+ import json
19
+ import logging
20
+ import re
21
+ from dataclasses import dataclass, field
22
+ from typing import Any, Dict, List, Optional, Protocol
23
+
24
+ from .steps.base import UserContext
25
+ from .template_catalog import get_catalog_prompt, validate_checklist_item
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ class ChatSDK(Protocol):
31
+ """Protocol for chat SDK interface."""
32
+
33
+ def send(self, message: str, timeout: int = 600, no_history: bool = False) -> Any:
34
+ """Send a message and get response."""
35
+ ...
36
+
37
+
38
+ @dataclass
39
+ class ChecklistItem:
40
+ """Single item in the generated checklist.
41
+
42
+ Represents a template invocation with its parameters and
43
+ the LLM's reasoning for including it.
44
+ """
45
+
46
+ template: str
47
+ params: Dict[str, Any]
48
+ description: str
49
+
50
+ def to_dict(self) -> Dict[str, Any]:
51
+ """Convert to dictionary representation."""
52
+ return {
53
+ "template": self.template,
54
+ "params": self.params,
55
+ "description": self.description,
56
+ }
57
+
58
+
59
+ @dataclass
60
+ class GeneratedChecklist:
61
+ """Complete checklist generated by LLM.
62
+
63
+ Contains the list of template invocations and the LLM's
64
+ overall reasoning for the chosen approach.
65
+ """
66
+
67
+ items: List[ChecklistItem]
68
+ reasoning: str
69
+ raw_response: str = ""
70
+ validation_errors: List[str] = field(default_factory=list)
71
+
72
+ @property
73
+ def is_valid(self) -> bool:
74
+ """Check if checklist passed validation."""
75
+ return len(self.validation_errors) == 0
76
+
77
+ def to_dict(self) -> Dict[str, Any]:
78
+ """Convert to dictionary representation."""
79
+ return {
80
+ "reasoning": self.reasoning,
81
+ "checklist": [item.to_dict() for item in self.items],
82
+ "is_valid": self.is_valid,
83
+ "validation_errors": self.validation_errors,
84
+ }
85
+
86
+
87
+ @dataclass
88
+ class ProjectState:
89
+ """Current state of the project for context."""
90
+
91
+ exists: bool = False
92
+ has_package_json: bool = False
93
+ has_prisma: bool = False
94
+ has_next_config: bool = False
95
+ existing_models: List[str] = field(default_factory=list)
96
+ existing_routes: List[str] = field(default_factory=list)
97
+ existing_pages: List[str] = field(default_factory=list)
98
+
99
+ def to_prompt(self) -> str:
100
+ """Generate prompt-friendly description of project state."""
101
+ if not self.exists:
102
+ return "Project does not exist yet - will be created fresh."
103
+
104
+ lines = ["Current project state:"]
105
+
106
+ if self.has_package_json:
107
+ lines.append("- ✓ package.json exists (Node.js project)")
108
+ if self.has_next_config:
109
+ lines.append("- ✓ next.config.ts exists (Next.js configured)")
110
+ if self.has_prisma:
111
+ lines.append("- ✓ Prisma configured")
112
+ if self.existing_models:
113
+ lines.append(f" - Models: {', '.join(self.existing_models)}")
114
+
115
+ if self.existing_routes:
116
+ lines.append(f"- Existing API routes: {', '.join(self.existing_routes)}")
117
+
118
+ if self.existing_pages:
119
+ lines.append(f"- Existing pages: {', '.join(self.existing_pages)}")
120
+
121
+ return "\n".join(lines)
122
+
123
+
124
+ CHECKLIST_SYSTEM_PROMPT = """You are a code generation planner. Your task is to analyze the user's request and generate a checklist of template invocations that will create the requested application.
125
+
126
+ {catalog_prompt}
127
+
128
+ ## Instructions
129
+
130
+ 1. Analyze the user's request carefully
131
+ 2. Consider what the user ACTUALLY wants (semantic understanding)
132
+ 3. Select templates that will fulfill the request
133
+ 4. Add semantic enhancements based on the request type:
134
+ - For "todo" apps: add checkboxes for completion status
135
+ - For "blog" apps: add date fields for posts
136
+ - For "e-commerce": add price, inventory fields
137
+ 5. Ensure dependencies are satisfied (run setup before data, data before API, API before UI)
138
+ 6. Generate a complete checklist that creates a working application
139
+ 7. When follow-up fixes are requested, use `fix_code` to repair the specific files called out by validation/test logs. Extract the precise file paths and line numbers from the errors (see the Raw Validation Logs) and pass those line numbers inside the error description so the fixer knows exactly where to focus. Always reference the latest findings to decide which fixes to schedule before running validations again.
140
+ 8. If the user explicitly requests an additional UI artifact (countdown display, stats badge, etc.), schedule a `generate_react_component` step with the appropriate `artifact-*` variant (e.g., `"artifact-timer"`) and a descriptive `component_name`. Keep the artifact's logic inside that client component—server components like `page.tsx` should only render the artifact and pass any required props.
141
+ 9. **Route pairing requirement:** Whenever you schedule `{{"template": "generate_api_route", "params": {{"type": "collection", ...}}}}`, you MUST also include a matching `generate_api_route` item with `"type": "item"` for the same resource so detail pages can call `/api/<resource>/[id]`.
142
+
143
+ This workflow repeats until all validations pass, so each checklist should either advance new functionality or explicitly repair the failures reported in the latest validation logs.
144
+
145
+ ## IMPORTANT: Complete CRUD Applications
146
+
147
+ For any app that manages resources (todos, posts, users, etc.), you MUST generate ALL of these UI components:
148
+
149
+ 1. **Form component** (variant: "form") - Reusable form for create (generate this first so other pages can import it)
150
+ 2. **Artifact components** (variant: "artifact-*") - Any additional UI artifacts requested by the user (e.g., countdown display, stats badge). Generate these before any page that consumes them.
151
+ 3. **New page** (variant: "new") - Create new item at /resources/new
152
+ 4. **Edit page** (variant: "detail") - Edit single item at /resources/[id] with pre-populated form
153
+ 5. **List page** (variant: "list") - Main page showing all items at /resources
154
+
155
+ Missing any of the required components (form, new, detail, list) will result in broken navigation! When artifacts are requested, they must also be generated or the UI will be incomplete.
156
+
157
+ ## REQUIRED: Setup and Validation Commands
158
+
159
+ **CRITICAL**: The following commands are REQUIRED for a valid plan:
160
+
161
+ 1. **setup_app_styling** MUST be included after creating the application (after `create_next_app`). This configures app-wide styling with modern dark theme design system.
162
+
163
+ 2. **setup_testing** MUST be included after `setup_app_styling`. This sets up the testing infrastructure.
164
+
165
+ 3. **generate_style_tests** MUST be included after `setup_testing`. This generates CSS integrity tests that validate the design system.
166
+
167
+ 4. **The final 2 commands MUST be in this exact order:**
168
+ - Second-to-last: `run_typescript_check` (validates TypeScript compilation)
169
+ - Last: `validate_styles` (validates CSS files and design system)
170
+
171
+ These setup and validation commands are mandatory - a plan without them is INVALID.
172
+
173
+ ## Output Format
174
+
175
+ Respond with ONLY a JSON object (no markdown code blocks):
176
+ {{
177
+ "reasoning": "Brief explanation of your approach and any semantic enhancements",
178
+ "checklist": [
179
+ {{"template": "template_name", "params": {{}}, "description": "Why this step is needed"}}
180
+ ]
181
+ }}
182
+
183
+ Important:
184
+ - Use exact template names from the catalog
185
+ - Provide all required parameters
186
+ - Order items by dependency (setup first, then data, then API, then UI)
187
+ - REQUIRED ordering for `generate_react_component`: emit all non-`list` variants (form, new, detail, actions, artifact-*) before the `variant: "list"` call so the list can import previously generated components
188
+ - Add semantic enhancements that make the app intuitive (e.g., checkboxes for todos)
189
+ - For CRUD apps, ALWAYS include all 4 UI variants: list, form, new, detail
190
+ - REQUIRED: Include `setup_app_styling` after `create_next_app`
191
+ - REQUIRED: Include `setup_testing` after `setup_app_styling`
192
+ - REQUIRED: Include `generate_style_tests` after `setup_testing`
193
+ - REQUIRED: End with `run_typescript_check`, then `validate_styles` as the last 2 commands
194
+ - When converting a raw validation error into `fix_code`, copy the exact snippet (file, line, column, and message). For example:
195
+
196
+ Raw Validation Logs (example):
197
+ ```
198
+ {{"template": "run_typescript_check", "output": {{"errors": "path/to/File.tsx(10,5): error TS1234: <error text>\\n"}}}}
199
+ ```
200
+
201
+ Corresponding checklist item:
202
+ ```
203
+ {{
204
+ "template": "fix_code",
205
+ "params": {{
206
+ "file_path": "path/to/File.tsx",
207
+ "error_description": "path/to/File.tsx(10,5): error TS1234: <error text>"
208
+ }},
209
+ "description": "Fix the TypeScript compiler error reported for File.tsx."
210
+ }}
211
+ ```
212
+ Always keep the error text verbatim so the fixer knows exactly where to edit."""
213
+
214
+
215
+ class ChecklistGenerator:
216
+ """Generate execution checklist using LLM.
217
+
218
+ The generator sends the user request, project state, and template
219
+ catalog to an LLM, which returns a structured checklist of template
220
+ invocations.
221
+ """
222
+
223
+ def __init__(self, chat_sdk: ChatSDK):
224
+ """Initialize the checklist generator.
225
+
226
+ Args:
227
+ chat_sdk: Chat SDK instance for LLM communication
228
+ """
229
+ self.chat = chat_sdk
230
+
231
+ def generate_initial_checklist(
232
+ self,
233
+ context: UserContext,
234
+ project_state: Optional[ProjectState] = None,
235
+ ) -> GeneratedChecklist:
236
+ """Generate the initial project-scaffolding checklist."""
237
+ if project_state is None:
238
+ project_state = ProjectState()
239
+
240
+ system_prompt = CHECKLIST_SYSTEM_PROMPT.format(
241
+ catalog_prompt=get_catalog_prompt()
242
+ )
243
+ user_prompt = self._build_initial_prompt(context, project_state)
244
+ full_prompt = f"{system_prompt}\n\n## User Request\n\n{user_prompt}"
245
+ return self._generate_from_prompt(full_prompt)
246
+
247
+ def generate_debug_checklist(
248
+ self,
249
+ context: UserContext,
250
+ project_state: Optional[ProjectState],
251
+ prior_errors: Optional[List[str]],
252
+ validation_logs: Optional[List[Any]],
253
+ ) -> GeneratedChecklist:
254
+ """Generate a remediation checklist to fix outstanding errors."""
255
+ if project_state is None:
256
+ project_state = ProjectState()
257
+
258
+ debug_prompt = self._build_debug_prompt(
259
+ context=context,
260
+ project_state=project_state,
261
+ prior_errors=prior_errors or [],
262
+ validation_logs=validation_logs or [],
263
+ )
264
+ system_prompt = CHECKLIST_SYSTEM_PROMPT.format(
265
+ catalog_prompt=get_catalog_prompt()
266
+ )
267
+ full_prompt = f"{system_prompt}\n\n## Remediation Context\n\n{debug_prompt}"
268
+ return self._generate_from_prompt(full_prompt)
269
+
270
+ def _generate_from_prompt(self, full_prompt: str) -> GeneratedChecklist:
271
+ """Common checklist generation logic with retries."""
272
+ logger.debug("Generating checklist with LLM...")
273
+ logger.debug(f"Checklist prompt: {full_prompt}")
274
+
275
+ max_attempts = 3
276
+ last_failure_reason = "unknown error"
277
+
278
+ for attempt in range(1, max_attempts + 1):
279
+ try:
280
+ response = self.chat.send(full_prompt, timeout=1200)
281
+
282
+ response_text = self._extract_response_text(response)
283
+
284
+ logger.debug(f"LLM response (attempt {attempt}): {response_text}")
285
+
286
+ checklist = self._parse_checklist(response_text)
287
+ except Exception as exc: # pylint: disable=broad-exception-caught
288
+ last_failure_reason = str(exc)
289
+ logger.warning(
290
+ "Checklist generation attempt %d/%d failed: %s",
291
+ attempt,
292
+ max_attempts,
293
+ exc,
294
+ )
295
+ continue
296
+
297
+ if not checklist.items:
298
+ last_failure_reason = "LLM returned an empty checklist"
299
+ logger.warning(
300
+ "Checklist generation attempt %d/%d returned no items, retrying...",
301
+ attempt,
302
+ max_attempts,
303
+ )
304
+ continue
305
+
306
+ self._validate_checklist(checklist)
307
+ if checklist.validation_errors:
308
+ last_failure_reason = "; ".join(checklist.validation_errors)
309
+ logger.warning(
310
+ "Checklist generation attempt %d/%d failed validation: %s",
311
+ attempt,
312
+ max_attempts,
313
+ checklist.validation_errors,
314
+ )
315
+ continue
316
+
317
+ logger.debug(
318
+ "Generated checklist with %d items on attempt %d",
319
+ len(checklist.items),
320
+ attempt,
321
+ )
322
+ return checklist
323
+
324
+ raise RuntimeError(
325
+ f"Failed to generate a valid checklist after {max_attempts} attempts: "
326
+ f"{last_failure_reason}"
327
+ )
328
+
329
+ def _build_initial_prompt(
330
+ self,
331
+ context: UserContext,
332
+ project_state: ProjectState,
333
+ ) -> str:
334
+ """Build the user prompt with all context.
335
+
336
+ Args:
337
+ context: User context
338
+ project_state: Current project state
339
+
340
+ Returns:
341
+ Formatted user prompt string
342
+ """
343
+ lines = [f"**User Request**: {context.user_request}"]
344
+
345
+ if context.entity_name:
346
+ lines.append(f"\n**Inferred Entity**: {context.entity_name}")
347
+
348
+ if context.schema_fields:
349
+ lines.append(f"\n**Inferred Fields**: {json.dumps(context.schema_fields)}")
350
+
351
+ lines.append(f"\n**Project Directory**: {context.project_dir}")
352
+ lines.append(f"\n**Language**: {context.language}")
353
+ lines.append(f"\n**Project Type**: {context.project_type}")
354
+
355
+ lines.append(f"\n{project_state.to_prompt()}")
356
+
357
+ if context.fix_feedback:
358
+ lines.append("\n**Outstanding Fix Requests**:")
359
+ for note in context.fix_feedback[-5:]:
360
+ lines.append(f"- {note}")
361
+
362
+ if context.validation_reports:
363
+ lines.append("\n**Recent Validation/Test Findings**:")
364
+ for log in context.validation_reports[-5:]:
365
+ status = "PASS" if log.get("success", True) else "FAIL"
366
+ template = log.get("template", "validation_step")
367
+ description = log.get("description", "")
368
+ lines.append(f"- [{status}] {template}: {description}")
369
+ if log.get("error"):
370
+ lines.append(f" Error: {log['error']}")
371
+
372
+ output = log.get("output", {})
373
+ snippet = ""
374
+ if isinstance(output, dict):
375
+ for key in ("stdout", "stderr", "message", "details"):
376
+ if output.get(key):
377
+ snippet = str(output[key])[:200]
378
+ break
379
+ if not snippet and output:
380
+ snippet = json.dumps(output)[:200]
381
+ elif output:
382
+ snippet = str(output)[:200]
383
+ if snippet:
384
+ lines.append(f" Output: {snippet}")
385
+
386
+ lines.append("\nGenerate a checklist to fulfill this request.")
387
+
388
+ return "\n".join(lines)
389
+
390
+ def _build_debug_prompt(
391
+ self,
392
+ context: UserContext,
393
+ project_state: ProjectState,
394
+ prior_errors: List[str],
395
+ validation_logs: List[Any],
396
+ ) -> str:
397
+ """Build prompt for remediation/debug checklists."""
398
+ lines = [
399
+ "You are a remediation planner for the GAIA web development agent. "
400
+ "The project has already been scaffolded; focus exclusively on fixing outstanding issues."
401
+ ]
402
+ lines.append(f"\n**User Request**: {context.user_request}")
403
+ lines.append(f"\n**Project Directory**: {context.project_dir}")
404
+
405
+ if context.entity_name:
406
+ lines.append(f"\n**Entity**: {context.entity_name}")
407
+ if context.schema_fields:
408
+ lines.append(f"\n**Schema Fields**: {json.dumps(context.schema_fields)}")
409
+
410
+ lines.append(f"\n{project_state.to_prompt()}")
411
+
412
+ if prior_errors:
413
+ lines.append("\n**Execution Errors From Last Attempt:**")
414
+ for err in prior_errors:
415
+ lines.append(f"- {err}")
416
+
417
+ if validation_logs:
418
+ lines.append("\n**Recent Validation/Test Results:**")
419
+ raw_entries = []
420
+ for log in validation_logs[-10:]:
421
+ entry = log.to_dict() if hasattr(log, "to_dict") else log
422
+ template = entry.get("template", "unknown_step")
423
+ success = entry.get("success", True)
424
+ desc = entry.get("description", "")
425
+ status = "PASS" if success else "FAIL"
426
+ lines.append(f"- [{status}] {template}: {desc}")
427
+ if entry.get("error"):
428
+ lines.append(f" Error: {entry['error']}")
429
+ output = entry.get("output") or {}
430
+ for key in ("stdout", "stderr", "details", "message"):
431
+ if output.get(key):
432
+ snippet = str(output[key])[:200]
433
+ lines.append(f" Output: {snippet}")
434
+ break
435
+ raw_entries.append(entry)
436
+
437
+ if raw_entries:
438
+ lines.append(
439
+ "\n**Raw Validation Logs (exact text for follow-up fixes):**"
440
+ )
441
+ for entry in raw_entries:
442
+ lines.append(json.dumps(entry, ensure_ascii=False))
443
+
444
+ if context.fix_feedback:
445
+ lines.append("\n**Outstanding Fix Instructions:**")
446
+ for note in context.fix_feedback[-10:]:
447
+ lines.append(f"- {note}")
448
+
449
+ lines.append(
450
+ "\nYour job: draft a concise checklist that repairs the errors above, "
451
+ "regenerates any broken code, and re-runs critical validations."
452
+ )
453
+ lines.append(
454
+ "\n**Critical Requirements for Debug Checklists:**\n"
455
+ "1. Use `fix_code` to repair the specific files referenced in the failures above.\n"
456
+ "2. Re-run any validations or tests that previously failed once fixes are applied.\n"
457
+ "3. Always include `run_typescript_check` as the second-to-last command to capture current compiler errors.\n"
458
+ "4. Always include `validate_styles` as the final command to capture CSS/design regressions."
459
+ )
460
+
461
+ return "\n".join(lines)
462
+
463
+ def _extract_response_text(self, response: Any) -> str:
464
+ """Extract text from LLM response.
465
+
466
+ Handles different response formats from various SDKs.
467
+
468
+ Args:
469
+ response: Response from chat SDK
470
+
471
+ Returns:
472
+ Response text string
473
+ """
474
+ if isinstance(response, str):
475
+ return response
476
+
477
+ # Handle response objects with text attribute
478
+ if hasattr(response, "text"):
479
+ return response.text
480
+
481
+ # Handle response objects with content attribute
482
+ if hasattr(response, "content"):
483
+ return response.content
484
+
485
+ # Handle dict-like responses
486
+ if isinstance(response, dict):
487
+ return response.get("text", response.get("content", str(response)))
488
+
489
+ return str(response)
490
+
491
+ def _parse_checklist(self, response_text: str) -> GeneratedChecklist:
492
+ """Parse LLM response into GeneratedChecklist.
493
+
494
+ Args:
495
+ response_text: Raw LLM response text
496
+
497
+ Returns:
498
+ Parsed GeneratedChecklist
499
+ """
500
+ try:
501
+ # Try to extract JSON from the response
502
+ json_str = self._extract_json(response_text)
503
+
504
+ data = json.loads(json_str)
505
+
506
+ # Parse items
507
+ items = []
508
+ for item_data in data.get("checklist", []):
509
+ item = ChecklistItem(
510
+ template=item_data.get("template", ""),
511
+ params=item_data.get("params", {}),
512
+ description=item_data.get("description", ""),
513
+ )
514
+ items.append(item)
515
+
516
+ return GeneratedChecklist(
517
+ items=items,
518
+ reasoning=data.get("reasoning", ""),
519
+ raw_response=response_text,
520
+ )
521
+
522
+ except json.JSONDecodeError as e:
523
+ logger.error(f"Failed to parse checklist JSON: {e}")
524
+ return GeneratedChecklist(
525
+ items=[],
526
+ reasoning="",
527
+ raw_response=response_text,
528
+ validation_errors=[f"Failed to parse JSON: {str(e)}"],
529
+ )
530
+
531
+ def _extract_json(self, text: str) -> str:
532
+ """Extract JSON from text that might contain markdown or other content.
533
+
534
+ Args:
535
+ text: Text that may contain JSON
536
+
537
+ Returns:
538
+ Extracted JSON string
539
+ """
540
+ # Try to find JSON in markdown code block
541
+ code_block_match = re.search(r"```(?:json)?\s*\n?(.*?)\n?```", text, re.DOTALL)
542
+ if code_block_match:
543
+ return code_block_match.group(1).strip()
544
+
545
+ # Try to find raw JSON object
546
+ json_match = re.search(r"\{.*\}", text, re.DOTALL)
547
+ if json_match:
548
+ return json_match.group(0)
549
+
550
+ # Return as-is and let JSON parser handle it
551
+ return text.strip()
552
+
553
+ def _validate_checklist(self, checklist: GeneratedChecklist) -> None:
554
+ """Validate checklist items against template definitions.
555
+
556
+ Adds validation errors to the checklist if any are found.
557
+
558
+ Args:
559
+ checklist: Checklist to validate (modified in place)
560
+ """
561
+ for item in checklist.items:
562
+ errors = validate_checklist_item(item.template, item.params)
563
+ checklist.validation_errors.extend(errors)
564
+
565
+ # Check for duplicate templates (some are ok, like multiple API routes)
566
+ seen_templates = {}
567
+ for item in checklist.items:
568
+ key = f"{item.template}:{json.dumps(item.params, sort_keys=True)}"
569
+ if key in seen_templates:
570
+ checklist.validation_errors.append(
571
+ f"Duplicate checklist item: {item.template} with same params"
572
+ )
573
+ seen_templates[key] = True
574
+
575
+ # Validate required setup: setup_app_styling must come after create_next_app
576
+ create_app_index = None
577
+ setup_styling_index = None
578
+ setup_testing_index = None
579
+ for i, item in enumerate(checklist.items):
580
+ if item.template == "create_next_app":
581
+ create_app_index = i
582
+ if item.template == "setup_app_styling":
583
+ setup_styling_index = i
584
+ if item.template == "setup_testing":
585
+ setup_testing_index = i
586
+
587
+ if create_app_index is not None:
588
+ if setup_styling_index is None:
589
+ checklist.validation_errors.append(
590
+ "REQUIRED: 'setup_app_styling' must be included after 'create_next_app'"
591
+ )
592
+ elif setup_styling_index <= create_app_index:
593
+ checklist.validation_errors.append(
594
+ "REQUIRED: 'setup_app_styling' must come after 'create_next_app' in the checklist"
595
+ )
596
+
597
+ # Validate required testing setup: setup_testing must come after setup_app_styling
598
+ if setup_styling_index is not None:
599
+ if setup_testing_index is None:
600
+ checklist.validation_errors.append(
601
+ "REQUIRED: 'setup_testing' must be included after 'setup_app_styling'"
602
+ )
603
+ elif setup_testing_index <= setup_styling_index:
604
+ checklist.validation_errors.append(
605
+ "REQUIRED: 'setup_testing' must come after 'setup_app_styling' in the checklist"
606
+ )
607
+
608
+ # Validate required final validation commands: run_typescript_check, validate_styles
609
+ if len(checklist.items) < 2:
610
+ checklist.validation_errors.append(
611
+ "REQUIRED: Checklist must end with 'run_typescript_check', "
612
+ "'validate_styles' as the last two commands"
613
+ )
614
+ else:
615
+ last_item = checklist.items[-1]
616
+ second_last_item = checklist.items[-2]
617
+
618
+ if last_item.template != "validate_styles":
619
+ checklist.validation_errors.append(
620
+ "REQUIRED: The last command must be 'validate_styles'"
621
+ )
622
+ if second_last_item.template != "run_typescript_check":
623
+ checklist.validation_errors.append(
624
+ "REQUIRED: The second-to-last command must be 'run_typescript_check'"
625
+ )
626
+
627
+ # Validate generate_style_tests is included (after setup_testing)
628
+ generate_style_tests_index = None
629
+ for i, item in enumerate(checklist.items):
630
+ if item.template == "generate_style_tests":
631
+ generate_style_tests_index = i
632
+
633
+ if setup_testing_index is not None and generate_style_tests_index is None:
634
+ checklist.validation_errors.append(
635
+ "REQUIRED: 'generate_style_tests' must be included after 'setup_testing'"
636
+ )
637
+ elif (
638
+ generate_style_tests_index is not None
639
+ and setup_testing_index is not None
640
+ and generate_style_tests_index <= setup_testing_index
641
+ ):
642
+ checklist.validation_errors.append(
643
+ "REQUIRED: 'generate_style_tests' must come after 'setup_testing'"
644
+ )
645
+
646
+ if checklist.validation_errors:
647
+ logger.warning(
648
+ f"Checklist validation errors: {checklist.validation_errors}"
649
+ )
650
+
651
+
652
+ def create_checklist_from_workflow(
653
+ workflow_phases: List[Any],
654
+ context: UserContext,
655
+ ) -> GeneratedChecklist:
656
+ """Create a checklist from existing workflow phases (for comparison/testing).
657
+
658
+ This converts the old step-based workflow into the new checklist format,
659
+ useful for testing and migration.
660
+
661
+ Args:
662
+ workflow_phases: List of WorkflowPhase objects from factory
663
+ context: User context
664
+
665
+ Returns:
666
+ GeneratedChecklist representing the workflow
667
+ """
668
+ items = []
669
+
670
+ for phase in workflow_phases:
671
+ for step in phase.steps:
672
+ # Map step names to template names
673
+ template_map = {
674
+ "create_next_app": "create_next_app",
675
+ "setup_styling": "setup_app_styling",
676
+ "install_deps": "setup_prisma",
677
+ "setup_testing": "setup_testing",
678
+ "prisma_init": "setup_prisma",
679
+ "setup_prisma": "setup_prisma",
680
+ "manage_data_model": "generate_prisma_model",
681
+ "manage_api_endpoint": "generate_api_route",
682
+ "manage_api_endpoint_dynamic": "generate_api_route",
683
+ "manage_react_component": "generate_react_component",
684
+ "update_landing_page": "update_landing_page",
685
+ "validate_typescript": "run_typescript_check",
686
+ "validate_crud_structure": "run_typescript_check",
687
+ "test_crud_api": "run_typescript_check",
688
+ }
689
+
690
+ template_name = template_map.get(step.name, step.name)
691
+
692
+ # Extract params from step
693
+ params = {}
694
+ if hasattr(step, "get_tool_invocation"):
695
+ invocation = step.get_tool_invocation(context)
696
+ if invocation:
697
+ _, step_params = invocation
698
+ params = {
699
+ k: v for k, v in step_params.items() if k != "project_dir"
700
+ }
701
+
702
+ items.append(
703
+ ChecklistItem(
704
+ template=template_name,
705
+ params=params,
706
+ description=step.description,
707
+ )
708
+ )
709
+
710
+ return GeneratedChecklist(
711
+ items=items,
712
+ reasoning="Converted from existing workflow",
713
+ )