@trygentic/agentloop 0.16.0-alpha.11 → 0.18.0-alpha.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -12
- package/package.json +3 -3
- package/templates/agents/_base/proactive.bt.json +43 -0
- package/templates/agents/_base/reactive-delegation.bt.json +73 -0
- package/templates/agents/_base/reactive-message.bt.json +58 -0
- package/templates/agents/_base/reactive-task.bt.json +51 -0
- package/templates/agents/chat/chat.bt.json +70 -20
- package/templates/agents/chat/chat.md +36 -19
- package/templates/agents/engineer/engineer.bt.json +951 -346
- package/templates/agents/engineer/engineer.md +86 -33
- package/templates/agents/merge-resolver/merge-resolver.bt.json +217 -0
- package/templates/agents/merge-resolver/merge-resolver.md +297 -0
- package/templates/agents/orchestrator/orchestrator.bt.json +1 -0
- package/templates/agents/orchestrator/orchestrator.md +17 -92
- package/templates/agents/product-manager/product-manager.bt.json +215 -25
- package/templates/agents/product-manager/product-manager.md +86 -13
- package/templates/agents/qa-tester/qa-tester.bt.json +299 -88
- package/templates/agents/qa-tester/qa-tester.md +59 -12
- package/templates/agents/release/release.bt.json +219 -0
- package/templates/agents/release/release.md +164 -0
- package/templates/examples/engineer.md.example +4 -4
- package/templates/examples/example-custom-agent.md.example +4 -4
- package/templates/examples/example-plugin.js.example +1 -1
- package/templates/plugins/qa-e2e-maestro/qa-e2e-maestro.bt.json +1191 -0
- package/templates/plugins/qa-e2e-maestro/qa-e2e-maestro.md +923 -0
- package/templates/plugins/qa-e2e-scenario/qa-e2e-scenario.md +85 -0
- package/templates/non-core-templates/container.md +0 -173
- package/templates/non-core-templates/dag-planner.md +0 -96
- package/templates/non-core-templates/internal/cli-tester.md +0 -218
- package/templates/non-core-templates/internal/qa-tester.md +0 -300
- package/templates/non-core-templates/internal/tui-designer.md +0 -370
- package/templates/non-core-templates/internal/tui-tester.md +0 -125
- package/templates/non-core-templates/maestro-qa.md +0 -240
- package/templates/non-core-templates/merge-resolver.md +0 -150
- package/templates/non-core-templates/project-detection.md +0 -75
- package/templates/non-core-templates/questionnaire.md +0 -124
|
@@ -0,0 +1,1191 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "qa-e2e-maestro-continuous-agent-tree",
|
|
3
|
+
"description": "Continuous behavior tree for the QA E2E Maestro agent. Loops forever, waiting for task assignments from the orchestrator. Validates UI behavior exclusively via Maestro MCP tools against the iOS Simulator running Expo Go. Dynamically generates E2E test scenarios based on task context, classifies failures, and provides feedback to engineers. Uses forEach iteration to execute each scenario independently.",
|
|
4
|
+
"version": "4.5.0",
|
|
5
|
+
"tree": {
|
|
6
|
+
"type": "root",
|
|
7
|
+
"child": {
|
|
8
|
+
"type": "sequence",
|
|
9
|
+
"comment": "Main continuous loop - never exits unless agent is stopped",
|
|
10
|
+
"children": [
|
|
11
|
+
{
|
|
12
|
+
"type": "action",
|
|
13
|
+
"call": "WaitForTask",
|
|
14
|
+
"comment": "Block until orchestrator assigns a task via ContinuousAgentRunner.assignTask()"
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"type": "action",
|
|
18
|
+
"call": "FetchTaskContext",
|
|
19
|
+
"comment": "Load task details, comments, qa-tester results, and acceptance criteria"
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"type": "selector",
|
|
23
|
+
"comment": "Scan .agentloop/maestro-flows/ for reusable project-level Maestro test flows (non-critical: continue if none found)",
|
|
24
|
+
"children": [
|
|
25
|
+
{
|
|
26
|
+
"type": "llm-action",
|
|
27
|
+
"name": "ScanProjectMaestroFlows",
|
|
28
|
+
"prompt": "Scan the project's `.agentloop/maestro-flows/` directory for reusable Maestro YAML test flows.\n\nTask Title: {{taskTitle}}\nTask Description: {{taskDescription}}\n\n## Instructions\n1. List all YAML files in BOTH directories using Bash:\n ```\n ls -la .agentloop/maestro-flows/*.yaml .agentloop/maestro-flows/*.yaml 2>/dev/null\n ```\n2. For each YAML file found, read it to understand:\n - What it does (from comments and flow steps)\n - What tags it has (from the YAML header)\n - Whether it's relevant to the current task\n3. Pay special attention to flows tagged as 'reusable' or 'setup' \u2014 these are designed to be used as subflows\n4. The `guest-login.yaml` flow is the PREFERRED way to launch the app and navigate to the main screen as a guest user. It handles:\n - Stopping any existing Expo Go session\n - Opening the app via `exp://localhost:PORT` deep link\n - Waiting for and tapping through the splash screen (uses regex for trailing newline in accessibility text)\n - Tapping 'Explore as Guest' on the login screen\n - Waiting for and verifying the main app loads (all 5 bottom tabs)\n\nReport all found flows with their paths, descriptions, and relevance to the task.",
|
|
29
|
+
"contextKeys": [
|
|
30
|
+
"taskTitle",
|
|
31
|
+
"taskDescription"
|
|
32
|
+
],
|
|
33
|
+
"allowedTools": [
|
|
34
|
+
"Bash"
|
|
35
|
+
],
|
|
36
|
+
"subagent": "qa-e2e-maestro",
|
|
37
|
+
"maxTurns": 5,
|
|
38
|
+
"outputSchema": {
|
|
39
|
+
"type": "object",
|
|
40
|
+
"properties": {
|
|
41
|
+
"flows": {
|
|
42
|
+
"type": "array",
|
|
43
|
+
"items": {
|
|
44
|
+
"type": "object",
|
|
45
|
+
"properties": {
|
|
46
|
+
"filePath": {
|
|
47
|
+
"type": "string",
|
|
48
|
+
"description": "Relative path to the YAML flow file"
|
|
49
|
+
},
|
|
50
|
+
"description": {
|
|
51
|
+
"type": "string",
|
|
52
|
+
"description": "What this flow does"
|
|
53
|
+
},
|
|
54
|
+
"tags": {
|
|
55
|
+
"type": "array",
|
|
56
|
+
"items": {
|
|
57
|
+
"type": "string"
|
|
58
|
+
},
|
|
59
|
+
"description": "Tags from the YAML header"
|
|
60
|
+
},
|
|
61
|
+
"isRelevant": {
|
|
62
|
+
"type": "boolean",
|
|
63
|
+
"description": "Whether this flow is relevant to the current task"
|
|
64
|
+
},
|
|
65
|
+
"useAs": {
|
|
66
|
+
"type": "string",
|
|
67
|
+
"enum": [
|
|
68
|
+
"setup",
|
|
69
|
+
"test",
|
|
70
|
+
"subflow",
|
|
71
|
+
"reference"
|
|
72
|
+
],
|
|
73
|
+
"description": "How to use this flow"
|
|
74
|
+
}
|
|
75
|
+
},
|
|
76
|
+
"required": [
|
|
77
|
+
"filePath",
|
|
78
|
+
"description",
|
|
79
|
+
"isRelevant"
|
|
80
|
+
]
|
|
81
|
+
}
|
|
82
|
+
},
|
|
83
|
+
"hasGuestLoginFlow": {
|
|
84
|
+
"type": "boolean",
|
|
85
|
+
"description": "Whether a guest-login.yaml flow was found"
|
|
86
|
+
},
|
|
87
|
+
"summary": {
|
|
88
|
+
"type": "string",
|
|
89
|
+
"description": "Summary of available project flows"
|
|
90
|
+
}
|
|
91
|
+
},
|
|
92
|
+
"required": [
|
|
93
|
+
"flows",
|
|
94
|
+
"summary"
|
|
95
|
+
]
|
|
96
|
+
},
|
|
97
|
+
"outputKey": "projectMaestroFlows",
|
|
98
|
+
"temperature": 0.2
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
"type": "action",
|
|
102
|
+
"call": "NoOp",
|
|
103
|
+
"comment": "Continue if no project maestro flows found - will use ad-hoc navigation"
|
|
104
|
+
}
|
|
105
|
+
]
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
"type": "selector",
|
|
109
|
+
"comment": "Detect project type for dependency installation and test framework detection (non-fatal)",
|
|
110
|
+
"children": [
|
|
111
|
+
{
|
|
112
|
+
"type": "action",
|
|
113
|
+
"call": "DetectProjectType",
|
|
114
|
+
"comment": "Detect project type, test framework, and available test/lint commands"
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
"type": "action",
|
|
118
|
+
"call": "NoOp",
|
|
119
|
+
"comment": "Continue even if project type detection fails"
|
|
120
|
+
}
|
|
121
|
+
]
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
"type": "selector",
|
|
125
|
+
"comment": "Try to install dependencies, continue if it fails",
|
|
126
|
+
"children": [
|
|
127
|
+
{
|
|
128
|
+
"type": "action",
|
|
129
|
+
"call": "InstallDependencies",
|
|
130
|
+
"comment": "Ensure dependencies are installed for test execution"
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
"type": "action",
|
|
134
|
+
"call": "NoOp",
|
|
135
|
+
"comment": "Continue even if dependency installation fails"
|
|
136
|
+
}
|
|
137
|
+
]
|
|
138
|
+
},
|
|
139
|
+
{
|
|
140
|
+
"type": "action",
|
|
141
|
+
"call": "CreateScreenshotDirectory",
|
|
142
|
+
"comment": "Create .agentloop/pr-screenshots/<taskId>/ in the worktree for persistent screenshot storage"
|
|
143
|
+
},
|
|
144
|
+
{
|
|
145
|
+
"type": "selector",
|
|
146
|
+
"comment": "Check for incoming agent messages (non-critical: continue even if unavailable)",
|
|
147
|
+
"children": [
|
|
148
|
+
{
|
|
149
|
+
"type": "action",
|
|
150
|
+
"call": "CheckIncomingMessages",
|
|
151
|
+
"comment": "Poll for messages from qa-tester or engineers (coordination, context, known issues)"
|
|
152
|
+
},
|
|
153
|
+
{
|
|
154
|
+
"type": "action",
|
|
155
|
+
"call": "NoOp",
|
|
156
|
+
"comment": "Continue without message checking if messaging is unavailable"
|
|
157
|
+
}
|
|
158
|
+
]
|
|
159
|
+
},
|
|
160
|
+
{
|
|
161
|
+
"type": "selector",
|
|
162
|
+
"comment": "Notify other agents that E2E testing is starting (non-critical: continue even if unavailable)",
|
|
163
|
+
"children": [
|
|
164
|
+
{
|
|
165
|
+
"type": "action",
|
|
166
|
+
"call": "SendTaskStartNotification",
|
|
167
|
+
"comment": "Broadcast to other agents that E2E QA is starting review of this task"
|
|
168
|
+
},
|
|
169
|
+
{
|
|
170
|
+
"type": "action",
|
|
171
|
+
"call": "NoOp",
|
|
172
|
+
"comment": "Continue without notification if messaging is unavailable"
|
|
173
|
+
}
|
|
174
|
+
]
|
|
175
|
+
},
|
|
176
|
+
{
|
|
177
|
+
"type": "retry",
|
|
178
|
+
"attempts": 2,
|
|
179
|
+
"comment": "Boot a fresh iOS simulator for this task - retry once if boot fails",
|
|
180
|
+
"child": {
|
|
181
|
+
"type": "action",
|
|
182
|
+
"call": "BootFreshSimulator",
|
|
183
|
+
"comment": "Shutdown any existing simulators and boot a fresh one for isolated task testing"
|
|
184
|
+
}
|
|
185
|
+
},
|
|
186
|
+
{
|
|
187
|
+
"type": "succeed",
|
|
188
|
+
"comment": "CRITICAL: succeed wrapper ensures ShutdownSimulator always runs even if E2E testing fails. Without this, a sequence failure skips simulator cleanup, leaving simulators running indefinitely.",
|
|
189
|
+
"child": {
|
|
190
|
+
"type": "selector",
|
|
191
|
+
"comment": "Main E2E testing flow with failure handling",
|
|
192
|
+
"children": [
|
|
193
|
+
{
|
|
194
|
+
"type": "sequence",
|
|
195
|
+
"comment": "Main E2E test sequence: analyze -> setup env -> prerequisite navigation -> forEach scenarios -> aggregate -> report results",
|
|
196
|
+
"children": [
|
|
197
|
+
{
|
|
198
|
+
"type": "llm-action",
|
|
199
|
+
"name": "AnalyzeTaskForE2E",
|
|
200
|
+
"allowedTools": [],
|
|
201
|
+
"prompt": "You are a QA E2E testing agent analyzing a task that has already passed unit/integration tests from the qa-tester agent.\n\nTask Title: {{taskTitle}}\nTask Description: {{taskDescription}}\nTask Comments (includes qa-tester results): {{taskComments}}\nTask Files: {{taskFiles}}\n\nYou will be testing the app using Maestro MCP tools. Available tools:\n\n## Interactive MCP Tools (for ad-hoc testing)\n- mcp__maestro__tap_on - Tap on UI elements by text or ID\n- mcp__maestro__input_text - Type text into focused input fields\n- mcp__maestro__back - Press the back button\n- mcp__maestro__take_screenshot - Capture the current screen state\n- mcp__maestro__inspect_view_hierarchy - Inspect the full UI tree (use to verify elements are visible)\n- mcp__maestro__stop_app / mcp__maestro__launch_app - Reset app state between scenarios\n- mcp__maestro__list_devices / mcp__maestro__start_device - Device management\n\n## Flow Execution Tools (for reusable flows)\n- mcp__maestro__run_flow - Run a single Maestro YAML flow file (PREFERRED for common paths)\n- mcp__maestro__run_flow_files - Run multiple YAML flow files in sequence\n- mcp__maestro__check_flow_syntax - Validate YAML flow syntax\n\n## Reference Tools\n- mcp__maestro__query_docs - Query Maestro documentation\n- mcp__maestro__cheat_sheet - Get Maestro command reference\n\n## Maestro Flows (.agentloop/maestro-flows/)\nAvailable flows: {{projectMaestroFlows}}\n\nAll Maestro YAML flows (curated and agent-generated) live in `.agentloop/maestro-flows/`. When designing E2E scenarios, prefer referencing existing flows as setup steps. For example, if guest-login.yaml is available, include it as the first step for any scenario that needs guest mode access rather than defining manual navigation steps. Use run_flow to replay these INSTEAD of manually repeating the same navigation steps. This is faster and more reliable.\n\nNOTE: swipe, scroll, assert_visible, and wait_for are NOT available as individual MCP tools.\n- To verify elements: use inspect_view_hierarchy and check the returned tree for the expected text/element\n- To wait for elements: use a polling loop with sleep + inspect_view_hierarchy\n- To scroll/swipe: use Bash with xcrun simctl io commands\n- For complex assertions: write a Maestro YAML flow and use run_flow\n\nAnalyze the task to determine:\n1. What E2E scenarios need to be tested based on the changed files, task description, and acceptance criteria\n2. For each scenario, define specific steps \u2014 prefer run_flow for standard paths (launch, login, navigation) and MCP tools for task-specific interactions\n3. What acceptance criteria need UI-level validation\n4. Priority order for test execution\n5. Summary of what the qa-tester already validated (from task comments)\n\nCRITICAL - PREREQUISITE NAVIGATION ALREADY HANDLED:\nBefore your scenarios run, the app will already be launched in guest mode on the main screen via the NavigateToMainScreen prerequisite step. Your scenarios MUST NOT include steps to:\n- Launch or restart the app\n- Navigate through onboarding/splash screens\n- Tap 'Explore as Guest' or any login/guest-mode entry\n- Run the guest-login.yaml reusable flow\nInstead, each scenario should start its steps from the MAIN APP SCREEN with all 5 bottom tabs already visible. If a scenario needs authenticated state (not guest), it should explicitly note this as a prerequisite deviation.\n\nBe specific about which screens, buttons, and user flows should be tested. Each scenario should have concrete steps.",
|
|
202
|
+
"contextKeys": [
|
|
203
|
+
"taskTitle",
|
|
204
|
+
"taskDescription",
|
|
205
|
+
"taskComments",
|
|
206
|
+
"taskFiles",
|
|
207
|
+
"projectMaestroFlows"
|
|
208
|
+
],
|
|
209
|
+
"outputSchema": {
|
|
210
|
+
"type": "object",
|
|
211
|
+
"properties": {
|
|
212
|
+
"qaTesterSummary": {
|
|
213
|
+
"type": "string",
|
|
214
|
+
"description": "Summary of what qa-tester already validated"
|
|
215
|
+
},
|
|
216
|
+
"e2eScenarios": {
|
|
217
|
+
"type": "array",
|
|
218
|
+
"items": {
|
|
219
|
+
"type": "object",
|
|
220
|
+
"properties": {
|
|
221
|
+
"name": {
|
|
222
|
+
"type": "string",
|
|
223
|
+
"description": "Scenario name"
|
|
224
|
+
},
|
|
225
|
+
"description": {
|
|
226
|
+
"type": "string",
|
|
227
|
+
"description": "What to test and why"
|
|
228
|
+
},
|
|
229
|
+
"steps": {
|
|
230
|
+
"type": "array",
|
|
231
|
+
"items": {
|
|
232
|
+
"type": "object",
|
|
233
|
+
"properties": {
|
|
234
|
+
"action": {
|
|
235
|
+
"type": "string",
|
|
236
|
+
"description": "MCP tool to use (e.g., tap_on, input_text, take_screenshot, inspect_view_hierarchy, stop_app, launch_app) or 'bash' for xcrun simctl commands. NOTE: swipe, scroll, assert_visible, wait_for are NOT available."
|
|
237
|
+
},
|
|
238
|
+
"target": {
|
|
239
|
+
"type": [
|
|
240
|
+
"string",
|
|
241
|
+
"null"
|
|
242
|
+
],
|
|
243
|
+
"description": "Element text, ID, or direction to act on"
|
|
244
|
+
},
|
|
245
|
+
"value": {
|
|
246
|
+
"type": [
|
|
247
|
+
"string",
|
|
248
|
+
"null"
|
|
249
|
+
],
|
|
250
|
+
"description": "Value to input (for input_text) or additional parameter"
|
|
251
|
+
},
|
|
252
|
+
"description": {
|
|
253
|
+
"type": "string",
|
|
254
|
+
"description": "Human-readable description of this step"
|
|
255
|
+
}
|
|
256
|
+
},
|
|
257
|
+
"required": [
|
|
258
|
+
"action",
|
|
259
|
+
"description"
|
|
260
|
+
]
|
|
261
|
+
},
|
|
262
|
+
"description": "Ordered list of MCP tool steps for this scenario"
|
|
263
|
+
},
|
|
264
|
+
"priority": {
|
|
265
|
+
"type": "string",
|
|
266
|
+
"enum": [
|
|
267
|
+
"high",
|
|
268
|
+
"medium",
|
|
269
|
+
"low"
|
|
270
|
+
]
|
|
271
|
+
},
|
|
272
|
+
"relatedFiles": {
|
|
273
|
+
"type": "array",
|
|
274
|
+
"items": {
|
|
275
|
+
"type": "string"
|
|
276
|
+
},
|
|
277
|
+
"description": "Task files this scenario validates"
|
|
278
|
+
}
|
|
279
|
+
},
|
|
280
|
+
"required": [
|
|
281
|
+
"name",
|
|
282
|
+
"description",
|
|
283
|
+
"steps",
|
|
284
|
+
"priority"
|
|
285
|
+
]
|
|
286
|
+
},
|
|
287
|
+
"description": "E2E scenarios to execute using Maestro MCP tools"
|
|
288
|
+
},
|
|
289
|
+
"acceptanceCriteriaToValidate": {
|
|
290
|
+
"type": "array",
|
|
291
|
+
"items": {
|
|
292
|
+
"type": "string"
|
|
293
|
+
},
|
|
294
|
+
"description": "Specific acceptance criteria that need UI-level validation"
|
|
295
|
+
},
|
|
296
|
+
"testScope": {
|
|
297
|
+
"type": "string",
|
|
298
|
+
"enum": [
|
|
299
|
+
"minimal",
|
|
300
|
+
"standard",
|
|
301
|
+
"comprehensive"
|
|
302
|
+
],
|
|
303
|
+
"description": "Recommended test scope based on risk"
|
|
304
|
+
}
|
|
305
|
+
},
|
|
306
|
+
"required": [
|
|
307
|
+
"e2eScenarios",
|
|
308
|
+
"testScope"
|
|
309
|
+
]
|
|
310
|
+
},
|
|
311
|
+
"outputKey": "e2eTestPlan",
|
|
312
|
+
"temperature": 0.3
|
|
313
|
+
},
|
|
314
|
+
{
|
|
315
|
+
"type": "action",
|
|
316
|
+
"call": "ResetEnvRetryCount",
|
|
317
|
+
"comment": "Reset environment retry counter before setup"
|
|
318
|
+
},
|
|
319
|
+
{
|
|
320
|
+
"type": "retry",
|
|
321
|
+
"attempts": 3,
|
|
322
|
+
"comment": "Environment setup with retry logic - simulator boot, Expo server, app launch",
|
|
323
|
+
"child": {
|
|
324
|
+
"type": "sequence",
|
|
325
|
+
"comment": "Environment setup sequence",
|
|
326
|
+
"children": [
|
|
327
|
+
{
|
|
328
|
+
"type": "llm-action",
|
|
329
|
+
"name": "SetupTestEnvironment",
|
|
330
|
+
"prompt": "You are setting up the E2E test environment for an iOS Simulator running Expo Go.\n\nCRITICAL: Do NOT call mcp__agentloop__report_trigger_result during environment setup. Only report environment status via the output schema. The BT handles trigger reporting after test execution.\n\nCRITICAL: Do NOT run the guest-login.yaml Maestro flow or navigate through the app during setup. Your ONLY job is to ensure the infrastructure is ready: simulator booted, Expo Go installed, Metro running. App navigation happens in the NavigateToMainScreen step.\n\nCRITICAL CONTEXT: A fresh simulator has ALREADY been booted for you by the BootFreshSimulator action.\n- Your assigned simulator Device ID: {{simulatorDeviceId}}\n- Your assigned simulator Name: {{simulatorDeviceName}}\n- Your agent instance name: {{agentInstanceName}}\n\n## Metro Port Assignment\nYour assigned Metro port is: {{metroPort}}\nThis port was computed deterministically by the BootFreshSimulator action (instance 1 = 8081, instance 2 = 8082, etc.).\n\nUse port {{metroPort}} for ALL Metro, curl, and Expo URL commands. Do NOT calculate the port yourself \u2014 use exactly {{metroPort}}.\n\nCRITICAL: Multiple QA agents run in parallel, each with their OWN simulator. You MUST use your specific device UDID ({{simulatorDeviceId}}) for ALL xcrun simctl commands. NEVER use 'booted' as the device target \u2014 it picks a random booted simulator and will interfere with other agents.\n\n## CRITICAL WARNING: Always Use --offline Flag\nNEVER start Expo without the --offline flag. Without it, Expo attempts online authentication which fails with HTTP 500.\n\n## Step 1: Verify Simulator is Booted\n```\nxcrun simctl list devices | grep '{{simulatorDeviceId}}'\n```\nIf NOT booted, boot it: `xcrun simctl boot '{{simulatorDeviceId}}'` and wait up to 60 seconds.\n\nEnsure the Simulator.app GUI is running (required on macOS for reliable xcrun simctl operations):\n```\nopen -a Simulator\n```\nThis is idempotent \u2014 safe to run even if Simulator.app is already open.\n\n## Step 2: Clean Install Expo Go (MANDATORY - Clear Stale Data)\n```\nxcrun simctl terminate '{{simulatorDeviceId}}' host.exp.Exponent 2>/dev/null\nxcrun simctl uninstall '{{simulatorDeviceId}}' host.exp.Exponent 2>/dev/null\ncurl -L 'https://github.com/expo/expo-go-releases/releases/download/Expo-Go-54.0.6/Expo-Go-54.0.6.tar.gz' -o /tmp/ExpoGo.tar.gz\nmkdir -p '/tmp/Expo Go.app'\ncd '/tmp/Expo Go.app' && tar -xzf /tmp/ExpoGo.tar.gz\nxcrun simctl install '{{simulatorDeviceId}}' '/tmp/Expo Go.app'\n```\nVerify: `xcrun simctl listapps '{{simulatorDeviceId}}' 2>/dev/null | grep -A1 'host.exp.Exponent'`\n\n## Step 2b: Suppress Expo Go Developer Menu Welcome Screen\nIMPORTANT: These MUST be set AFTER installing Expo Go (Step 2 above), not before. The install wipes app defaults.\n```bash\nxcrun simctl spawn '{{simulatorDeviceId}}' defaults write host.exp.Exponent EXDevMenuIsOnboardingFinished -bool YES 2>/dev/null\nxcrun simctl spawn '{{simulatorDeviceId}}' defaults write host.exp.Exponent EXDevMenuDisableAutoLaunch -bool YES 2>/dev/null\n```\nVerify the defaults were actually set:\n```bash\nxcrun simctl spawn '{{simulatorDeviceId}}' defaults read host.exp.Exponent EXDevMenuIsOnboardingFinished 2>/dev/null || echo 'WARNING: Dev menu defaults may not have been set'\n```\n\n## Step 2c: Ensure frontend dependencies are installed (CRITICAL for worktrees)\nWorktrees do NOT have node_modules by default. If `frontend/node_modules` is missing or is a broken symlink, Metro WILL fail with 'dependencies were not installed' errors.\n```\nif [ ! -d frontend/node_modules ] || [ ! -e frontend/node_modules/.package-lock.json ]; then\n echo 'frontend/node_modules missing or incomplete - installing dependencies...'\n cd frontend && npm install --legacy-peer-deps && cd ..\nfi\n```\nThis MUST succeed before starting Metro. If npm install fails, report environmentReady=false.\n\n## Step 3: Kill Stale Metro Bundler on YOUR Port and Start Fresh\n```\nlsof -ti:{{metroPort}} | xargs kill -9 2>/dev/null\nunset CI && cd frontend && npx expo start --go --offline --port {{metroPort}} </dev/null >metro.log 2>&1 &\n```\nWait for Metro to be ready (up to 45 seconds):\n```\nfor i in $(seq 1 45); do if curl -s http://localhost:{{metroPort}}/status 2>/dev/null | grep -q 'packager-status:running'; then echo 'Metro is ready'; break; fi; sleep 1; done\n```\nIf Metro fails, check logs and retry with --clear:\n```\ncat frontend/metro.log | tail -20\nlsof -ti:{{metroPort}} | xargs kill -9 2>/dev/null\nunset CI && cd frontend && npx expo start --go --offline --port {{metroPort}} --clear </dev/null >metro.log 2>&1 &\n```\nIf 'Unable to resolve module' or 'failed to resolve module' errors persist after retry:\n```\ncd frontend && rm -rf node_modules && npm install --legacy-peer-deps && cd ..\n```\nThen retry starting Metro.\n\n## Step 3b: Start Simulator Log Capture (Background)\nStart a background process to capture simulator logs, filtered for JavaScript errors and React Native exceptions:\n```bash\n# Create log file for this test session\nLOG_FILE=\"/tmp/simulator-{{simulatorDeviceId}}-errors.log\"\nrm -f \"$LOG_FILE\"\n\n# Start log stream in background, filtering for JS errors, React Native exceptions, and Expo errors\nxcrun simctl spawn '{{simulatorDeviceId}}' log stream \\\n --level error \\\n --predicate 'subsystem == \"com.apple.JavaScriptCore\" OR subsystem == \"host.exp.Exponent\" OR eventMessage CONTAINS \"Error\" OR eventMessage CONTAINS \"ExceptionsManager\" OR eventMessage CONTAINS \"Unhandled JS Exception\" OR eventMessage CONTAINS \"RCTFatal\" OR eventMessage CONTAINS \"TypeError\" OR eventMessage CONTAINS \"ReferenceError\" OR eventMessage CONTAINS \"Render Error\" OR eventMessage CONTAINS \"undefined is not\"' \\\n > \"$LOG_FILE\" 2>&1 &\nLOG_PID=$!\necho \"Started log capture (PID: $LOG_PID) -> $LOG_FILE\"\n```\nThis captures React Native runtime errors, JS exceptions, and Expo Go errors without any app-side changes. The log file will be read by ExecuteSingleScenario when failures occur.\n\nNOTE: The log capture background process will be cleaned up automatically when the simulator shuts down in the ShutdownSimulator BT action.\n\n## Step 4: Verify Expo Go Can Launch (DO NOT NAVIGATE)\nLaunch Expo Go and open the project URL to trigger the JS bundle download:\n```\nxcrun simctl launch '{{simulatorDeviceId}}' host.exp.Exponent\nsleep 3\nxcrun simctl openurl '{{simulatorDeviceId}}' 'exp://localhost:{{metroPort}}'\n```\nMANDATORY: Check for 'Open in Expo Go?' system dialog within the first 5-10 seconds:\n- Use mcp__maestro__inspect_view_hierarchy to check for an 'Open' button\n- If found, tap 'Open' using mcp__maestro__tap_on with text 'Open'\n- Wait 3 seconds after tapping for the app to begin loading\nThis dialog check is NOT optional. The dialog blocks app loading entirely if not dismissed.\n\nMANDATORY DEV MENU CHECK: On EVERY polling attempt, also check for the Expo Developer Menu onboarding overlay:\n- Use mcp__maestro__inspect_view_hierarchy and look for text containing 'developer menu' or a 'Continue' button in a dialog/modal context\n- If found, tap 'Continue' using mcp__maestro__tap_on to dismiss the overlay\n- Wait 2 seconds after dismissing, then re-check the view hierarchy\n- This overlay can appear even when dev menu defaults were set, especially after a fresh Expo Go install\n\nPoll for app readiness (up to 8 attempts, 5 seconds apart = 40 seconds max):\n1. Wait 5 seconds initially after opening the URL\n2. On each attempt, use mcp__maestro__inspect_view_hierarchy to check the current screen\n3. Classify what you see:\n - SUCCESS: ANY of 'Tap anywhere', 'Explore as Guest', 'Login', or 'tab-Map' is visible \u2014 the app loaded\n - RENDER ERROR: 'Render Error' or 'Element type is invalid' is visible \u2014 the app DID launch (environment is working), but the app has a runtime bug. Set appLaunched=true because the environment setup succeeded.\n - STILL LOADING: Expo Go's home screen or a loading indicator \u2014 continue polling, wait 5 more seconds\n4. Take a screenshot on the final attempt for evidence\n5. If after 8 attempts the app still hasn't loaded past Expo Go's container, set appLaunched=false\n\nDo NOT tap anything or navigate beyond dismissing the 'Open in Expo Go?' dialog. The app just needs to be running.\n\n## Step 5: Save Initial Screenshot\n```\nxcrun simctl io '{{simulatorDeviceId}}' screenshot '{{screenshotDirectory}}/01-initial-state.png'\n```\n\n## Readiness Criteria\nSet environmentReady=true if ALL of:\n- Simulator is booted (simulatorReady=true)\n- Metro bundler is running on your port (expoServerReady=true)\n- Expo Go launched and app content is visible (appLaunched=true)\n\nSet environmentReady=false if Metro failed to start or app is stuck on Expo Go home (not the project).\n\nEnvironment retry count: {{envRetryCount}} / {{maxEnvRetries}}\nScreenshot directory: {{screenshotDirectory}}\n\nReport the device ID, your Metro port, and environment status when done.",
|
|
331
|
+
"contextKeys": [
|
|
332
|
+
"envRetryCount",
|
|
333
|
+
"maxEnvRetries",
|
|
334
|
+
"screenshotDirectory",
|
|
335
|
+
"simulatorDeviceId",
|
|
336
|
+
"simulatorDeviceName",
|
|
337
|
+
"agentInstanceName",
|
|
338
|
+
"metroPort",
|
|
339
|
+
"projectMaestroFlows"
|
|
340
|
+
],
|
|
341
|
+
"allowedTools": [
|
|
342
|
+
"Bash",
|
|
343
|
+
"mcp__maestro__list_devices",
|
|
344
|
+
"mcp__maestro__start_device",
|
|
345
|
+
"mcp__maestro__take_screenshot",
|
|
346
|
+
"mcp__maestro__inspect_view_hierarchy",
|
|
347
|
+
"mcp__maestro__tap_on",
|
|
348
|
+
"mcp__maestro__input_text",
|
|
349
|
+
"mcp__maestro__stop_app",
|
|
350
|
+
"mcp__maestro__launch_app",
|
|
351
|
+
"mcp__maestro__back",
|
|
352
|
+
"mcp__maestro__run_flow",
|
|
353
|
+
"mcp__maestro__run_flow_files",
|
|
354
|
+
"mcp__maestro__check_flow_syntax",
|
|
355
|
+
"mcp__maestro__query_docs",
|
|
356
|
+
"mcp__maestro__cheat_sheet"
|
|
357
|
+
],
|
|
358
|
+
"subagent": "qa-e2e-maestro",
|
|
359
|
+
"maxTurns": 30,
|
|
360
|
+
"outputSchema": {
|
|
361
|
+
"type": "object",
|
|
362
|
+
"properties": {
|
|
363
|
+
"deviceId": {
|
|
364
|
+
"type": "string",
|
|
365
|
+
"description": "The iOS Simulator device ID"
|
|
366
|
+
},
|
|
367
|
+
"metroPort": {
|
|
368
|
+
"type": "number",
|
|
369
|
+
"description": "The Metro bundler port assigned to this agent instance"
|
|
370
|
+
},
|
|
371
|
+
"simulatorReady": {
|
|
372
|
+
"type": "boolean",
|
|
373
|
+
"description": "Whether the simulator is booted and ready"
|
|
374
|
+
},
|
|
375
|
+
"expoServerReady": {
|
|
376
|
+
"type": "boolean",
|
|
377
|
+
"description": "Whether Metro bundler is running on the assigned port"
|
|
378
|
+
},
|
|
379
|
+
"appLaunched": {
|
|
380
|
+
"type": "boolean",
|
|
381
|
+
"description": "Whether the app launched successfully in Expo Go"
|
|
382
|
+
},
|
|
383
|
+
"environmentReady": {
|
|
384
|
+
"type": "boolean",
|
|
385
|
+
"description": "Overall environment readiness"
|
|
386
|
+
},
|
|
387
|
+
"environmentIssues": {
|
|
388
|
+
"type": "array",
|
|
389
|
+
"items": {
|
|
390
|
+
"type": "string"
|
|
391
|
+
},
|
|
392
|
+
"description": "Any issues encountered during setup"
|
|
393
|
+
},
|
|
394
|
+
"simulatorLogFile": {
|
|
395
|
+
"type": [
|
|
396
|
+
"string",
|
|
397
|
+
"null"
|
|
398
|
+
],
|
|
399
|
+
"description": "Path to the simulator error log file being captured in background"
|
|
400
|
+
}
|
|
401
|
+
},
|
|
402
|
+
"required": [
|
|
403
|
+
"deviceId",
|
|
404
|
+
"metroPort",
|
|
405
|
+
"simulatorReady",
|
|
406
|
+
"expoServerReady",
|
|
407
|
+
"appLaunched",
|
|
408
|
+
"environmentReady"
|
|
409
|
+
]
|
|
410
|
+
},
|
|
411
|
+
"outputKey": "envSetupResult",
|
|
412
|
+
"temperature": 0.2
|
|
413
|
+
},
|
|
414
|
+
{
|
|
415
|
+
"type": "selector",
|
|
416
|
+
"comment": "Check if environment is ready - if not, increment retry and fail to trigger retry",
|
|
417
|
+
"children": [
|
|
418
|
+
{
|
|
419
|
+
"type": "sequence",
|
|
420
|
+
"comment": "Environment ready - continue to test execution",
|
|
421
|
+
"children": [
|
|
422
|
+
{
|
|
423
|
+
"type": "condition",
|
|
424
|
+
"call": "IsEnvironmentReady",
|
|
425
|
+
"comment": "Deterministic check: parses envSetupResult and verifies all boolean fields (environmentReady, simulatorReady, expoServerReady, appLaunched) are true"
|
|
426
|
+
}
|
|
427
|
+
]
|
|
428
|
+
},
|
|
429
|
+
{
|
|
430
|
+
"type": "sequence",
|
|
431
|
+
"comment": "Environment not ready - increment retry and fail",
|
|
432
|
+
"children": [
|
|
433
|
+
{
|
|
434
|
+
"type": "action",
|
|
435
|
+
"call": "IncrementEnvRetryCount"
|
|
436
|
+
},
|
|
437
|
+
{
|
|
438
|
+
"type": "fail",
|
|
439
|
+
"comment": "Fail to trigger retry of environment setup",
|
|
440
|
+
"child": {
|
|
441
|
+
"type": "action",
|
|
442
|
+
"call": "NoOp"
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
]
|
|
446
|
+
}
|
|
447
|
+
]
|
|
448
|
+
}
|
|
449
|
+
]
|
|
450
|
+
}
|
|
451
|
+
},
|
|
452
|
+
{
|
|
453
|
+
"type": "selector",
|
|
454
|
+
"comment": "Check if environment setup succeeded after retries or failed permanently",
|
|
455
|
+
"children": [
|
|
456
|
+
{
|
|
457
|
+
"type": "sequence",
|
|
458
|
+
"comment": "Environment ready - proceed with E2E test execution using forEach iteration",
|
|
459
|
+
"children": [
|
|
460
|
+
{
|
|
461
|
+
"type": "condition",
|
|
462
|
+
"call": "EnvironmentSetupSucceeded",
|
|
463
|
+
"comment": "Deterministic check: parses envSetupResult and verifies all boolean fields are true (same logic as IsEnvironmentReady)"
|
|
464
|
+
},
|
|
465
|
+
{
|
|
466
|
+
"type": "action",
|
|
467
|
+
"call": "TakeSimulatorScreenshot",
|
|
468
|
+
"comment": "Capture post-setup state (deterministic, not LLM-dependent). Saved as 02-post-setup.png"
|
|
469
|
+
},
|
|
470
|
+
{
|
|
471
|
+
"type": "llm-action",
|
|
472
|
+
"name": "NavigateToMainScreen",
|
|
473
|
+
"comment": "PREREQUISITE NAVIGATION: Navigate to guest mode main screen before running any test scenarios",
|
|
474
|
+
"allowedTools": [
|
|
475
|
+
"Bash",
|
|
476
|
+
"Read",
|
|
477
|
+
"Glob",
|
|
478
|
+
"mcp__maestro__list_devices",
|
|
479
|
+
"mcp__maestro__start_device",
|
|
480
|
+
"mcp__maestro__take_screenshot",
|
|
481
|
+
"mcp__maestro__inspect_view_hierarchy",
|
|
482
|
+
"mcp__maestro__tap_on",
|
|
483
|
+
"mcp__maestro__input_text",
|
|
484
|
+
"mcp__maestro__stop_app",
|
|
485
|
+
"mcp__maestro__launch_app",
|
|
486
|
+
"mcp__maestro__back",
|
|
487
|
+
"mcp__maestro__run_flow",
|
|
488
|
+
"mcp__maestro__run_flow_files",
|
|
489
|
+
"mcp__maestro__check_flow_syntax",
|
|
490
|
+
"mcp__maestro__query_docs",
|
|
491
|
+
"mcp__maestro__cheat_sheet"
|
|
492
|
+
],
|
|
493
|
+
"prompt": "You are navigating the app to the main guest-mode screen as a prerequisite before E2E test scenarios are executed.\n\nCRITICAL: Do NOT call mcp__agentloop__report_trigger_result during navigation. Only report navigation status via the output schema. The BT handles trigger reporting after test execution.\n\n## CRITICAL: Environment Is ALREADY Set Up \u2014 DO NOT Redo Setup\nThe SetupTestEnvironment BT node has ALREADY completed ALL environment setup before you run:\n- Simulator is booted and verified\n- Expo Go is freshly installed with dev menu suppressed\n- Metro bundler is running and serving on your port\n- The app has been launched and verified ready\n\nDO NOT run ANY of the following commands \u2014 they will BREAK the working environment and waste your turns:\n- xcrun simctl terminate/uninstall/install \u2014 Expo Go is already installed\n- lsof/kill on the Metro port \u2014 Metro is already running\n- npx expo start \u2014 Metro is already started\n- xcrun simctl boot \u2014 Simulator is already booted\n\nYour ONLY job is to navigate from the current app state to the main screen with bottom tabs visible.\n\nDevice ID: {{simulatorDeviceId}}\nDevice Name: {{simulatorDeviceName}}\nAgent Instance: {{agentInstanceName}}\nEnvironment Setup: {{envSetupResult}}\n\n## Metro Port\nYour assigned Metro port is: {{metroPort}}. Use port {{metroPort}} for ALL Expo URL references. Do NOT calculate the port yourself.\n\nCRITICAL: Multiple QA agents run in parallel, each with their OWN simulator. You MUST use your specific device UDID ({{simulatorDeviceId}}) for ALL xcrun simctl commands. NEVER use 'booted' \u2014 it picks a random simulator.\n\n## Navigate to Guest Mode\n1. First, check the current app state with mcp__maestro__inspect_view_hierarchy\n2. If the app is already on the main screen (tab-Map visible), you are done\n3. If on splash/login, navigate manually:\n - Splash screen ('Tap anywhere'): use mcp__maestro__tap_on with point '50%,50%'\n - Login screen: use mcp__maestro__tap_on with text 'Explore as Guest'\n - Wait 15-30 seconds for main app to load, verify with inspect_view_hierarchy\n4. Alternatively, read and run guest-login.yaml:\n ```\n cat .agentloop/maestro-flows/guest-login.yaml\n ```\n Substitute {{metroPort}} for localhost:8081 in the openLink URL if needed.\n Run via mcp__maestro__run_flow with the YAML content as flow_yaml and your device_id.\n5. If the flow FAILS, try MANUAL navigation using individual MCP tools:\n a. Check current state with mcp__maestro__inspect_view_hierarchy\n b. Navigate step by step based on what you see (tap_on for buttons, wait, verify)\n c. If you see a Render Error or runtime crash, this is a TEST FAILURE \u2014 capture the error text and report navigationComplete=false with errorDetails\n d. After tapping 'Explore as Guest', wait 30 seconds and verify via inspect_view_hierarchy\n6. If the app is completely unresponsive after 2 attempts, use mcp__maestro__stop_app then mcp__maestro__launch_app to restart it (do NOT reinstall Expo Go)\n7. If still stuck after recovery, report navigationComplete=false\n\nCRITICAL YAML RULE: When using mcp__maestro__run_flow with flow_yaml, the YAML MUST ALWAYS start with an appId config section followed by '---'. Example:\n```yaml\nappId: host.exp.Exponent\n---\n- tapOn:\n point: 50%,50%\n```\nNEVER send flow_yaml without the appId header \u2014 Maestro will reject it with 'Config Section Required'.\n\n## Screenshot Persistence\nAfter key navigation steps, save a persistent screenshot using YOUR device UDID:\n```\nxcrun simctl io '{{simulatorDeviceId}}' screenshot '{{screenshotDirectory}}/NN-description.png'\n```\nUse sequential numbering starting from 03 (01-initial and 02-post-setup are already taken).\n\nScreenshot directory: {{screenshotDirectory}}\n\nTest credentials: username=agentloop1, password=Myp@ssw0rd!\nApp ID for dev build: com.grantreynolds.knowyourselfproject\nExpo Go bundle ID: host.exp.Exponent\n\nReport navigation status when done.",
|
|
494
|
+
"contextKeys": [
|
|
495
|
+
"simulatorDeviceId",
|
|
496
|
+
"simulatorDeviceName",
|
|
497
|
+
"agentInstanceName",
|
|
498
|
+
"envSetupResult",
|
|
499
|
+
"metroPort",
|
|
500
|
+
"screenshotDirectory",
|
|
501
|
+
"projectMaestroFlows"
|
|
502
|
+
],
|
|
503
|
+
"subagent": "qa-e2e-maestro",
|
|
504
|
+
"maxTurns": 30,
|
|
505
|
+
"outputSchema": {
|
|
506
|
+
"type": "object",
|
|
507
|
+
"properties": {
|
|
508
|
+
"navigationComplete": {
|
|
509
|
+
"type": "boolean",
|
|
510
|
+
"description": "Whether navigation to the main screen (bottom tabs visible) was successful"
|
|
511
|
+
},
|
|
512
|
+
"currentScreen": {
|
|
513
|
+
"type": "string",
|
|
514
|
+
"description": "Description of the current screen state after navigation"
|
|
515
|
+
},
|
|
516
|
+
"errorDetails": {
|
|
517
|
+
"type": [
|
|
518
|
+
"string",
|
|
519
|
+
"null"
|
|
520
|
+
],
|
|
521
|
+
"description": "Error details if navigation failed (e.g., Render Error text, crash details)"
|
|
522
|
+
}
|
|
523
|
+
},
|
|
524
|
+
"required": [
|
|
525
|
+
"navigationComplete",
|
|
526
|
+
"currentScreen"
|
|
527
|
+
]
|
|
528
|
+
},
|
|
529
|
+
"outputKey": "navigationResult",
|
|
530
|
+
"temperature": 0.2
|
|
531
|
+
},
|
|
532
|
+
{
|
|
533
|
+
"type": "action",
|
|
534
|
+
"call": "InitializeScenarioResults",
|
|
535
|
+
"comment": "Initialize the scenarioResults accumulator array on the blackboard before forEach iteration"
|
|
536
|
+
},
|
|
537
|
+
{
|
|
538
|
+
"type": "forEach",
|
|
539
|
+
"name": "IterateScenarios",
|
|
540
|
+
"comment": "Iterate over each E2E scenario from the test plan, executing one at a time with dedicated LLM turns",
|
|
541
|
+
"collection": "e2eTestPlan.e2eScenarios",
|
|
542
|
+
"itemKey": "currentScenario",
|
|
543
|
+
"indexKey": "currentScenarioIndex",
|
|
544
|
+
"continueOnFailure": true,
|
|
545
|
+
"child": {
|
|
546
|
+
"type": "sequence",
|
|
547
|
+
"comment": "Execute a single scenario, accumulate its result, and take an inter-scenario screenshot",
|
|
548
|
+
"children": [
|
|
549
|
+
{
|
|
550
|
+
"type": "llm-action",
|
|
551
|
+
"name": "ExecuteSingleScenario",
|
|
552
|
+
"comment": "Execute one E2E test scenario using Maestro MCP tools",
|
|
553
|
+
"allowedTools": [
|
|
554
|
+
"Bash",
|
|
555
|
+
"Read",
|
|
556
|
+
"Glob",
|
|
557
|
+
"mcp__maestro__list_devices",
|
|
558
|
+
"mcp__maestro__start_device",
|
|
559
|
+
"mcp__maestro__take_screenshot",
|
|
560
|
+
"mcp__maestro__inspect_view_hierarchy",
|
|
561
|
+
"mcp__maestro__tap_on",
|
|
562
|
+
"mcp__maestro__input_text",
|
|
563
|
+
"mcp__maestro__stop_app",
|
|
564
|
+
"mcp__maestro__launch_app",
|
|
565
|
+
"mcp__maestro__back",
|
|
566
|
+
"mcp__maestro__run_flow",
|
|
567
|
+
"mcp__maestro__run_flow_files",
|
|
568
|
+
"mcp__maestro__check_flow_syntax",
|
|
569
|
+
"mcp__maestro__query_docs",
|
|
570
|
+
"mcp__maestro__cheat_sheet"
|
|
571
|
+
],
|
|
572
|
+
"prompt": "## MANDATORY RULES \u2014 VIOLATION = REJECTED RESULT\n1. scenarioName output MUST be exactly: {{currentScenario.name}}\n2. You MUST actually execute the scenario steps using Maestro MCP tools BEFORE responding. Minimum 5 tool calls required.\n3. Do NOT call mcp__agentloop__report_trigger_result. Only report via output schema.\n\n## Current Scenario To Execute\n**Scenario Name:** {{currentScenario.name}}\n**Scenario Index:** {{currentScenarioIndex}}\n**Description:** {{currentScenario.description}}\n**Steps:** {{currentScenario.steps}}\n**Priority:** {{currentScenario.priority}}\n\nYour scenarioName output MUST be exactly: {{currentScenario.name}}\nExecute ONLY this scenario.\n\n## Environment State \u2014 ALREADY SET UP\nThe app is ALREADY on the main screen in guest mode with all 5 bottom tabs visible.\n- Do NOT run xcrun simctl terminate/uninstall/install/boot\n- Do NOT run lsof/kill on Metro port or npx expo start\n- Do NOT launch/restart the app or navigate splash/onboarding/login\n- Do NOT tap 'Explore as Guest' or run guest-login.yaml\n- Start testing IMMEDIATELY from the current main screen state\n\nDevice ID: {{simulatorDeviceId}}\nDevice Name: {{simulatorDeviceName}}\nMetro Port: {{metroPort}} (use for ALL Expo URL references)\nAgent Instance: {{agentInstanceName}}\nTask: {{taskDescription}}\n\nCRITICAL: Use YOUR device UDID ({{simulatorDeviceId}}) for ALL xcrun simctl commands. NEVER use 'booted'.\n\n## Required Execution Steps\nYou MUST perform ALL of these steps in order:\n1. take_screenshot \u2014 capture 'before' state, save to {{screenshotDirectory}}/S{{currentScenarioIndex}}-01-before.png\n2. inspect_view_hierarchy \u2014 verify current screen state and visible elements\n3. Execute EACH step from the scenario using tap_on, input_text, run_flow, or other Maestro tools\n4. After each interaction, inspect_view_hierarchy to verify the expected result\n5. take_screenshot \u2014 capture 'after' state, save to {{screenshotDirectory}}/S{{currentScenarioIndex}}-99-after.png\n6. ONLY THEN respond with results\n\nIf the scenario fails, retry up to {{maxScenarioRetries}} times before marking failed.\n\n## Available Tools\n- take_screenshot, inspect_view_hierarchy \u2014 observe state\n- tap_on, input_text, back \u2014 user interactions\n- stop_app, launch_app \u2014 app lifecycle\n- run_flow (YAML must start with appId header + '---'), run_flow_files\n- query_docs, cheat_sheet \u2014 Maestro reference\n- Bash \u2014 for xcrun simctl io screenshots and scrolling\n\nNOTE: swipe/scroll/assert_visible/wait_for are NOT individual MCP tools. Use inspect_view_hierarchy to verify, polling loops to wait, Bash xcrun for swipe/scroll, and run_flow for complex sequences.\n\n## Screenshot Persistence\nAfter EVERY take_screenshot, also save persistent copy:\n```\nxcrun simctl io '{{simulatorDeviceId}}' screenshot '{{screenshotDirectory}}/S{{currentScenarioIndex}}-NN-description.png'\n```\n\n## On App Crash (ONLY if the app crashes DURING this scenario's execution)\nIf the app crashes while you are executing scenario steps, you may attempt recovery UP TO 2 TIMES:\n1. stop_app to kill Expo Go\n2. launch_app with host.exp.Exponent\n3. Wait 10s, inspect_view_hierarchy\n4. If 'Open in Expo Go?' dialog, tap_on 'Open'\n5. Wait 30-45s for bundle reload, verify with inspect_view_hierarchy\nIf crash recovery fails after 2 attempts, mark this scenario as passed=false with failureType='environment' rather than continuing to retry. Do NOT burn remaining turns on repeated recovery attempts.\n\nYAML RULE: flow_yaml MUST start with appId config + '---':\n```yaml\nappId: host.exp.Exponent\n---\n- tapOn:\n point: 50%,50%\n```\n\nCredentials: username=agentloop1, password=Myp@ssw0rd!\nExpo Go bundle ID: host.exp.Exponent\n\n## Runtime Error Extraction\nOn React Native error screen (red overlay):\n1. inspect_view_hierarchy for full error text\n2. Extract: errorTitle, errorMessage, componentName, sourceFile\n3. Save in runtimeErrorsDetected, viewHierarchyOnError\n4. Screenshot as evidence\n5. Read /tmp/simulator-{{simulatorDeviceId}}-errors.log (tail -100)\n\n## Simulator Log on Failure\n```bash\nLOG_FILE=\"/tmp/simulator-{{simulatorDeviceId}}-errors.log\"\n[ -f \"$LOG_FILE\" ] && tail -200 \"$LOG_FILE\"\n```\nFocus on: Render Error, TypeError, ReferenceError, component names, file paths, ExceptionsManager.\n\nScreenshot directory: {{screenshotDirectory}}\n\nREMINDER: scenarioName MUST be exactly: {{currentScenario.name}} \u2014 Execute ALL scenario steps before responding.",
|
|
573
|
+
"contextKeys": [
|
|
574
|
+
"taskDescription",
|
|
575
|
+
"currentScenario",
|
|
576
|
+
"currentScenarioIndex",
|
|
577
|
+
"navigationResult",
|
|
578
|
+
"envSetupResult",
|
|
579
|
+
"maxScenarioRetries",
|
|
580
|
+
"screenshotDirectory",
|
|
581
|
+
"simulatorDeviceId",
|
|
582
|
+
"simulatorDeviceName",
|
|
583
|
+
"agentInstanceName",
|
|
584
|
+
"metroPort",
|
|
585
|
+
"projectMaestroFlows"
|
|
586
|
+
],
|
|
587
|
+
"subagent": "qa-e2e-scenario",
|
|
588
|
+
"maxTurns": 500,
|
|
589
|
+
"minTurns": 5,
|
|
590
|
+
"outputSchema": {
|
|
591
|
+
"type": "object",
|
|
592
|
+
"properties": {
|
|
593
|
+
"scenarioName": {
|
|
594
|
+
"type": "string",
|
|
595
|
+
"description": "MUST be the EXACT name field from the input scenario. Copy verbatim."
|
|
596
|
+
},
|
|
597
|
+
"passed": {
|
|
598
|
+
"type": "boolean"
|
|
599
|
+
},
|
|
600
|
+
"retryCount": {
|
|
601
|
+
"type": "number"
|
|
602
|
+
},
|
|
603
|
+
"stepsExecuted": {
|
|
604
|
+
"type": "array",
|
|
605
|
+
"items": {
|
|
606
|
+
"type": "string"
|
|
607
|
+
}
|
|
608
|
+
},
|
|
609
|
+
"assertionResults": {
|
|
610
|
+
"type": "array",
|
|
611
|
+
"items": {
|
|
612
|
+
"type": "string"
|
|
613
|
+
}
|
|
614
|
+
},
|
|
615
|
+
"screenshotsBefore": {
|
|
616
|
+
"type": "array",
|
|
617
|
+
"items": {
|
|
618
|
+
"type": "string"
|
|
619
|
+
}
|
|
620
|
+
},
|
|
621
|
+
"screenshotsAfter": {
|
|
622
|
+
"type": "array",
|
|
623
|
+
"items": {
|
|
624
|
+
"type": "string"
|
|
625
|
+
}
|
|
626
|
+
},
|
|
627
|
+
"errorDetails": {
|
|
628
|
+
"type": [
|
|
629
|
+
"string",
|
|
630
|
+
"null"
|
|
631
|
+
]
|
|
632
|
+
},
|
|
633
|
+
"runtimeErrorsDetected": {
|
|
634
|
+
"type": "array",
|
|
635
|
+
"items": {
|
|
636
|
+
"type": "object",
|
|
637
|
+
"properties": {
|
|
638
|
+
"errorTitle": {
|
|
639
|
+
"type": "string",
|
|
640
|
+
"description": "The error title (e.g., 'Render Error')"
|
|
641
|
+
},
|
|
642
|
+
"errorMessage": {
|
|
643
|
+
"type": "string",
|
|
644
|
+
"description": "The full error message text extracted from the view hierarchy"
|
|
645
|
+
},
|
|
646
|
+
"componentName": {
|
|
647
|
+
"type": [
|
|
648
|
+
"string",
|
|
649
|
+
"null"
|
|
650
|
+
],
|
|
651
|
+
"description": "The component name from 'Check the render method of X' if present"
|
|
652
|
+
},
|
|
653
|
+
"sourceFile": {
|
|
654
|
+
"type": [
|
|
655
|
+
"string",
|
|
656
|
+
"null"
|
|
657
|
+
],
|
|
658
|
+
"description": "Source file name if visible in the error (e.g., 'BottomTabBar.js')"
|
|
659
|
+
},
|
|
660
|
+
"screenshotPath": {
|
|
661
|
+
"type": [
|
|
662
|
+
"string",
|
|
663
|
+
"null"
|
|
664
|
+
],
|
|
665
|
+
"description": "Path to the screenshot capturing this error"
|
|
666
|
+
}
|
|
667
|
+
},
|
|
668
|
+
"required": [
|
|
669
|
+
"errorTitle",
|
|
670
|
+
"errorMessage"
|
|
671
|
+
]
|
|
672
|
+
},
|
|
673
|
+
"description": "Runtime errors detected via view hierarchy inspection during this scenario"
|
|
674
|
+
},
|
|
675
|
+
"viewHierarchyOnError": {
|
|
676
|
+
"type": [
|
|
677
|
+
"string",
|
|
678
|
+
"null"
|
|
679
|
+
],
|
|
680
|
+
"description": "Raw view hierarchy text captured when a runtime error was detected"
|
|
681
|
+
},
|
|
682
|
+
"simulatorLogExcerpt": {
|
|
683
|
+
"type": [
|
|
684
|
+
"string",
|
|
685
|
+
"null"
|
|
686
|
+
],
|
|
687
|
+
"description": "Relevant simulator log lines captured during this scenario (filtered for errors/exceptions)"
|
|
688
|
+
}
|
|
689
|
+
},
|
|
690
|
+
"required": [
|
|
691
|
+
"scenarioName",
|
|
692
|
+
"passed",
|
|
693
|
+
"stepsExecuted",
|
|
694
|
+
"assertionResults"
|
|
695
|
+
]
|
|
696
|
+
},
|
|
697
|
+
"outputKey": "currentScenarioResult",
|
|
698
|
+
"temperature": 0.3
|
|
699
|
+
},
|
|
700
|
+
{
|
|
701
|
+
"type": "action",
|
|
702
|
+
"call": "AccumulateScenarioResult",
|
|
703
|
+
"comment": "Push currentScenarioResult into the scenarioResults accumulator array"
|
|
704
|
+
},
|
|
705
|
+
{
|
|
706
|
+
"type": "action",
|
|
707
|
+
"call": "TakeSimulatorScreenshot",
|
|
708
|
+
"comment": "Capture inter-scenario screenshot for evidence between iterations"
|
|
709
|
+
}
|
|
710
|
+
]
|
|
711
|
+
}
|
|
712
|
+
},
|
|
713
|
+
{
|
|
714
|
+
"type": "action",
|
|
715
|
+
"call": "TakeSimulatorScreenshot",
|
|
716
|
+
"comment": "Capture post-test state for PR evidence (deterministic, not LLM-dependent). Shows final app state after all E2E scenarios."
|
|
717
|
+
},
|
|
718
|
+
{
|
|
719
|
+
"type": "selector",
|
|
720
|
+
"comment": "Save successful flows as reusable YAML (non-critical)",
|
|
721
|
+
"children": [
|
|
722
|
+
{
|
|
723
|
+
"type": "llm-action",
|
|
724
|
+
"name": "SaveSuccessfulFlows",
|
|
725
|
+
"allowedTools": [
|
|
726
|
+
"Bash",
|
|
727
|
+
"Read",
|
|
728
|
+
"mcp__maestro__check_flow_syntax"
|
|
729
|
+
],
|
|
730
|
+
"prompt": "Based on the E2E test execution results, generate reusable Maestro YAML flow files for any SUCCESSFUL navigation paths that were discovered.\n\nE2E Scenario Results: {{scenarioResults}}\nTask: {{taskDescription}}\nProject Root: {{projectRoot}}\n\nFor each successful test scenario, extract the sequence of UI interactions that worked and save them as Maestro YAML flow files.\n\n### Maestro YAML Format\nEach flow file starts with an appId header, then a list of commands:\n```yaml\nappId: host.exp.Exponent\n---\n# Flow description\n# Generated by qa-e2e-maestro on DATE\n\n- launchApp\n- tapOn: \"Button Text\"\n- assertVisible: \"Expected Text\"\n- takeScreenshot: \"step-name\"\n```\n\nAvailable Maestro YAML commands:\n- `launchApp` / `stopApp` - launch/stop the app\n- `tapOn: \"text\"` - tap a UI element by text\n- `inputText: \"text\"` - type text into focused field\n- `assertVisible: \"text\"` - assert element is visible\n- `swipe: { direction: \"UP\", duration: 500 }` - swipe gesture\n- `scroll` - scroll the view\n- `takeScreenshot: \"name\"` - capture screenshot\n- `waitForAnimationToEnd` - wait for animations\n- `extendedWaitUntil: { visible: \"text\", timeout: 30000 }` - wait with timeout\n\n### What to Save\n1. **ALWAYS save an `app-launch.yaml`** if the app was successfully launched (this is the most valuable reusable flow)\n2. Save a flow for each successful navigation path (e.g., `guest-mode-entry.yaml`, `login-flow.yaml`)\n3. Save task-specific flows if relevant\n\n### How to Save\nUse Bash to create the directory and write files:\n```bash\nmkdir -p .agentloop/maestro-flows\ncat > .agentloop/maestro-flows/app-launch.yaml << 'FLOW_EOF'\nappId: host.exp.Exponent\n---\n# App Launch Flow\n- launchApp\n- tapOn: \"Open\"\n- assertVisible: \"Login\"\nFLOW_EOF\n```\n\nIMPORTANT: Write the flows to the PROJECT ROOT (the working directory), not the worktree path. The CWD is already set to the worktree which mirrors project structure.\n\n## Existing Maestro Flows\nAvailable flows in .agentloop/maestro-flows/: {{projectMaestroFlows}}\n\nDo NOT regenerate flows that already exist. Only save NEW flows for navigation paths not already covered.\n\n### Validate Flows\nAfter writing each YAML flow file, validate it using mcp__maestro__check_flow_syntax to ensure the syntax is correct before saving.\n\nIf no scenarios succeeded, skip flow generation and report that.",
|
|
731
|
+
"contextKeys": [
|
|
732
|
+
"scenarioResults",
|
|
733
|
+
"taskDescription",
|
|
734
|
+
"projectMaestroFlows"
|
|
735
|
+
],
|
|
736
|
+
"subagent": "qa-e2e-maestro",
|
|
737
|
+
"maxTurns": 10,
|
|
738
|
+
"outputSchema": {
|
|
739
|
+
"type": "object",
|
|
740
|
+
"properties": {
|
|
741
|
+
"flowsSaved": {
|
|
742
|
+
"type": "array",
|
|
743
|
+
"items": {
|
|
744
|
+
"type": "string"
|
|
745
|
+
},
|
|
746
|
+
"description": "List of YAML flow file paths saved"
|
|
747
|
+
},
|
|
748
|
+
"skipped": {
|
|
749
|
+
"type": "boolean",
|
|
750
|
+
"description": "Whether flow generation was skipped due to no successful scenarios"
|
|
751
|
+
}
|
|
752
|
+
},
|
|
753
|
+
"required": [
|
|
754
|
+
"flowsSaved"
|
|
755
|
+
]
|
|
756
|
+
},
|
|
757
|
+
"outputKey": "savedFlows",
|
|
758
|
+
"temperature": 0.2
|
|
759
|
+
},
|
|
760
|
+
{
|
|
761
|
+
"type": "action",
|
|
762
|
+
"call": "NoOp",
|
|
763
|
+
"comment": "Continue if flow saving fails - non-critical"
|
|
764
|
+
}
|
|
765
|
+
]
|
|
766
|
+
},
|
|
767
|
+
{
|
|
768
|
+
"type": "action",
|
|
769
|
+
"call": "AggregateScenarioResults",
|
|
770
|
+
"comment": "Deterministic aggregation of accumulated scenarioResults into e2eTestResults (replaces LLM-based AggregateTestResults)"
|
|
771
|
+
},
|
|
772
|
+
{
|
|
773
|
+
"type": "llm-action",
|
|
774
|
+
"name": "AnalyzeE2EResults",
|
|
775
|
+
"allowedTools": [],
|
|
776
|
+
"prompt": "Analyze the E2E test results and classify any failures.\n\nTask: {{taskDescription}}\nTask Files: {{taskFiles}}\nE2E Test Plan: {{e2eTestPlan}}\nE2E Test Results: {{e2eTestResults}}\nQA Tester Comments: {{taskComments}}\n\nFor each failed test scenario, classify it as:\n- **task-related**: The failure is caused by the engineer's code changes. The UI does not behave as expected due to bugs in the changed files.\n- **environment**: The failure is caused by simulator/infrastructure issues ONLY (see strict definition below). NOT for UI element failures.\n- **flaky**: The failure is intermittent and resolved on retry. Timing/animation issues.\n- **pre-existing**: The UI issue existed before the engineer's changes and is not related to this task.\n\n## CRITICAL: UI Interaction Failure Classification\n\nUI interaction failures (element not found, tap failed, assertion failed) should DEFAULT to **task-related**, NOT environment. The engineer's changes likely removed, renamed, or broke a UI element.\n\n### Classify as TASK-RELATED:\n- \"element not found for text 'X'\" -> task-related (engineer's changes removed/renamed the element, or the feature doesn't render correctly)\n- \"assertion failed: X is not visible\" -> task-related (the element should be visible but isn't due to a code bug)\n- \"tap failed on element X\" -> task-related (element exists but isn't tappable, wrong state, or doesn't appear as expected)\n- \"could not find element matching X\" -> task-related (UI element is missing from the rendered screen)\n- \"timeout waiting for element X\" -> task-related (the element never appeared, likely a rendering or navigation bug)\n- Any failure involving UI elements, text, buttons, navigation, or screen content -> task-related or pre-existing, NEVER environment\n\n### Classify as ENVIRONMENT (strict definition):\n- \"Simulator disconnected\" or \"Simulator failed to boot\" -> environment\n- \"Metro bundler timeout\" or \"Metro bundler crashed\" -> environment\n- \"Expo Go crashed on launch\" or \"Expo Go failed to install\" -> environment\n- \"Bundle compilation failed\" (before the app renders any UI) -> environment\n- Network connectivity failures unrelated to the app -> environment\n- \"White screen\" with NO rendered UI at all (app failed to load entirely) -> environment\n\nEnvironment failures are ONLY about infrastructure (simulator, Metro, Expo Go, network, bundle compilation). If the app loaded and rendered ANY UI at all, failures interacting with that UI are NOT environment issues.\n\n### When in doubt: classify as ENVIRONMENT\nIf you are unsure whether a failure is environment or task-related, classify as **environment**. Navigation failures (can't find onboarding elements, can't reach main screen, app crash during setup) are almost always environment/timing issues, not code bugs. Only classify as task-related when the failure clearly occurs DURING actual scenario step execution on the correct screen and directly relates to the engineer's code changes.\n\n## CRITICAL: React Runtime Error Classification\n\nWhen the view hierarchy or screenshots show ANY of these patterns, classify the failure as **task-related** (NOT environment):\n- \"Render Error\" text visible in the view hierarchy\n- \"Element type is invalid\" error message\n- \"Check the render method of\" followed by a component name\n- \"undefined is not an object\" in error overlay\n- \"TypeError\" or \"ReferenceError\" in red error screen\n- Any red error overlay showing a JavaScript stack trace with project file paths (e.g., src/pages/*, src/components/*)\n\nThese are React Native runtime crashes caused by bugs in the engineer's code (broken imports, undefined variables, type errors). They are NOT environment issues even though they appear inside Expo Go's error overlay.\n\nEnvironment issues are ONLY: simulator won't boot, Metro won't start, Expo Go won't install, network errors, bundle compilation failures (before the app renders).\n\nIMPORTANT:\n- Only classify as task-related if the failure clearly relates to files or features the engineer modified\n- Pre-existing failures do NOT block approval\n- Environment failures do NOT block approval (they trigger blocked status instead)\n- Flaky failures that pass on retry are passes\n\n## CRITICAL: Use Extracted Error Text for Accurate Suggested Fixes\nWhen runtime errors were detected (check e2eTestResults.runtimeErrorsDetected), use the EXACT error information for your suggestedFix:\n- If the error says 'Check the render method of `BottomTabBar`' -> suggest checking BottomTabBar.js for undefined imports\n- If the error says 'Check the render method of `BaselineMapPage`' -> suggest checking BaselineMapPage.js\n- Extract the specific component name and source file from the error, NOT from the taskFiles list\n- The suggestedFix MUST reference the actual failing component/file from the error, not generic task files\n- Include the simulator log excerpt if available (e2eTestResults.simulatorLogExcerpt) to provide stack trace context\n\nDO NOT suggest checking files from the task description if the error clearly points to a different file. The error text is the ground truth for what needs fixing.\n\nMANDATORY ZERO-PASS HARD FAIL RULE:\n- If ZERO test scenarios passed (passedScenarios == 0), the overallVerdict MUST be 'fail' \u2014 NO EXCEPTIONS\n- You may NOT classify all failures as 'environment issues' and return verdict 'pass'\n- You may NOT approve based on code review alone without any passing E2E tests\n- 'Environment issues prevented testing' is a FAIL, not a pass\n- A 'pass' verdict requires AT LEAST ONE successfully executed E2E test scenario\n\nCRITICAL RULE \u2014 DO NOT RECLASSIFY PASSING SCENARIOS:\n- Only analyze and classify scenarios where passed=false (FAILED scenarios)\n- Scenarios where passed=true are PASSED \u2014 do NOT reclassify them as failures\n- Do NOT count passing scenarios in taskRelatedFailures, environmentFailures, flakyFailures, or preExistingFailures\n- If you have concerns about test evidence quality on passing scenarios, note them in testReport but they are NOT failures\n- 'Insufficient assertions' or 'screenshot-only evidence' on a passing scenario is a test quality note, NOT a task-related failure\n- failureDetails array should ONLY contain entries for scenarios where passed=false\n- If all scenarios passed (passed=true), then: overallVerdict MUST be 'pass', all failure counts MUST be 0\n\nVERDICT RULES:\n- If ALL scenarios passed=true AND there are no runtime errors: overallVerdict = 'pass'\n- If ANY scenario has passed=false with task-related classification: overallVerdict = 'fail'\n- If failures are ONLY environment/flaky: overallVerdict = 'blocked'\n- taskRelatedFailures count MUST equal the number of FAILED scenarios classified as 'task-related'\n- Do NOT inflate failure counts by reclassifying passing scenarios\n\nGenerate a detailed E2E test report suitable for a task comment.",
|
|
777
|
+
"contextKeys": [
|
|
778
|
+
"taskDescription",
|
|
779
|
+
"taskFiles",
|
|
780
|
+
"e2eTestPlan",
|
|
781
|
+
"e2eTestResults",
|
|
782
|
+
"taskComments"
|
|
783
|
+
],
|
|
784
|
+
"outputSchema": {
|
|
785
|
+
"type": "object",
|
|
786
|
+
"properties": {
|
|
787
|
+
"overallVerdict": {
|
|
788
|
+
"type": "string",
|
|
789
|
+
"enum": [
|
|
790
|
+
"pass",
|
|
791
|
+
"fail",
|
|
792
|
+
"blocked"
|
|
793
|
+
],
|
|
794
|
+
"description": "Overall E2E verdict"
|
|
795
|
+
},
|
|
796
|
+
"taskRelatedFailures": {
|
|
797
|
+
"type": "number",
|
|
798
|
+
"description": "Count of failures caused by engineer's changes"
|
|
799
|
+
},
|
|
800
|
+
"environmentFailures": {
|
|
801
|
+
"type": "number",
|
|
802
|
+
"description": "Count of environment-related failures"
|
|
803
|
+
},
|
|
804
|
+
"flakyFailures": {
|
|
805
|
+
"type": "number",
|
|
806
|
+
"description": "Count of flaky failures that resolved on retry"
|
|
807
|
+
},
|
|
808
|
+
"preExistingFailures": {
|
|
809
|
+
"type": "number",
|
|
810
|
+
"description": "Count of pre-existing UI issues"
|
|
811
|
+
},
|
|
812
|
+
"failureDetails": {
|
|
813
|
+
"type": "array",
|
|
814
|
+
"items": {
|
|
815
|
+
"type": "object",
|
|
816
|
+
"properties": {
|
|
817
|
+
"scenario": {
|
|
818
|
+
"type": "string"
|
|
819
|
+
},
|
|
820
|
+
"classification": {
|
|
821
|
+
"type": "string",
|
|
822
|
+
"enum": [
|
|
823
|
+
"task-related",
|
|
824
|
+
"environment",
|
|
825
|
+
"flaky",
|
|
826
|
+
"pre-existing"
|
|
827
|
+
]
|
|
828
|
+
},
|
|
829
|
+
"description": {
|
|
830
|
+
"type": "string"
|
|
831
|
+
},
|
|
832
|
+
"suggestedFix": {
|
|
833
|
+
"type": [
|
|
834
|
+
"string",
|
|
835
|
+
"null"
|
|
836
|
+
]
|
|
837
|
+
}
|
|
838
|
+
},
|
|
839
|
+
"required": [
|
|
840
|
+
"scenario",
|
|
841
|
+
"classification",
|
|
842
|
+
"description"
|
|
843
|
+
]
|
|
844
|
+
}
|
|
845
|
+
},
|
|
846
|
+
"testReport": {
|
|
847
|
+
"type": "string",
|
|
848
|
+
"description": "Detailed E2E test report for task comment"
|
|
849
|
+
},
|
|
850
|
+
"recommendedStatus": {
|
|
851
|
+
"type": "string",
|
|
852
|
+
"enum": [
|
|
853
|
+
"done",
|
|
854
|
+
"todo",
|
|
855
|
+
"blocked"
|
|
856
|
+
],
|
|
857
|
+
"description": "Recommended status based on results"
|
|
858
|
+
},
|
|
859
|
+
"statusReason": {
|
|
860
|
+
"type": "string",
|
|
861
|
+
"description": "Reason for the recommended status"
|
|
862
|
+
}
|
|
863
|
+
},
|
|
864
|
+
"required": [
|
|
865
|
+
"overallVerdict",
|
|
866
|
+
"taskRelatedFailures",
|
|
867
|
+
"testReport",
|
|
868
|
+
"recommendedStatus",
|
|
869
|
+
"statusReason"
|
|
870
|
+
]
|
|
871
|
+
},
|
|
872
|
+
"outputKey": "analyzedE2EResults",
|
|
873
|
+
"temperature": 0.3
|
|
874
|
+
},
|
|
875
|
+
{
|
|
876
|
+
"type": "selector",
|
|
877
|
+
"comment": "Status decision based on E2E analysis",
|
|
878
|
+
"children": [
|
|
879
|
+
{
|
|
880
|
+
"type": "sequence",
|
|
881
|
+
"comment": "E2E tests passed - approve",
|
|
882
|
+
"children": [
|
|
883
|
+
{
|
|
884
|
+
"type": "condition",
|
|
885
|
+
"call": "E2ETestsPassed",
|
|
886
|
+
"comment": "Deterministic check: overallVerdict=pass, taskRelatedFailures=0, passedScenarios>0"
|
|
887
|
+
},
|
|
888
|
+
{
|
|
889
|
+
"type": "llm-action",
|
|
890
|
+
"name": "WriteE2EApprovalComment",
|
|
891
|
+
"allowedTools": [],
|
|
892
|
+
"prompt": "Write a brief E2E approval comment for the task.\n\nTask: {{taskDescription}}\nE2E Test Report: {{analyzedE2EResults}}\nE2E Test Results: {{e2eTestResults}}\nScreenshot Directory: {{screenshotDirectory}}\n\nFormat the comment as:\n\n## E2E QA Maestro - APPROVED\n\n**Scenarios Tested:** X passed / Y total\n\n### E2E Scenarios\n- [scenario name]: PASS/FAIL - [brief description of what was validated]\n\n[Brief summary of what was validated]\n\n**Screenshots**: {{screenshotDirectory}}\n\nIf there were pre-existing issues, note them briefly as known issues not blocking.",
|
|
893
|
+
"contextKeys": [
|
|
894
|
+
"taskDescription",
|
|
895
|
+
"analyzedE2EResults",
|
|
896
|
+
"e2eTestResults",
|
|
897
|
+
"screenshotDirectory"
|
|
898
|
+
],
|
|
899
|
+
"outputSchema": {
|
|
900
|
+
"type": "object",
|
|
901
|
+
"properties": {
|
|
902
|
+
"comment": {
|
|
903
|
+
"type": "string"
|
|
904
|
+
}
|
|
905
|
+
},
|
|
906
|
+
"required": [
|
|
907
|
+
"comment"
|
|
908
|
+
]
|
|
909
|
+
},
|
|
910
|
+
"outputKey": "approvalComment",
|
|
911
|
+
"temperature": 0.4
|
|
912
|
+
},
|
|
913
|
+
{
|
|
914
|
+
"type": "action",
|
|
915
|
+
"call": "AddApprovalComment"
|
|
916
|
+
},
|
|
917
|
+
{
|
|
918
|
+
"type": "action",
|
|
919
|
+
"call": "ReportTriggerPass"
|
|
920
|
+
}
|
|
921
|
+
]
|
|
922
|
+
},
|
|
923
|
+
{
|
|
924
|
+
"type": "sequence",
|
|
925
|
+
"comment": "ALL failures are pre-existing and unrelated to task - escalate for fix instead of bouncing to engineer",
|
|
926
|
+
"children": [
|
|
927
|
+
{
|
|
928
|
+
"type": "condition",
|
|
929
|
+
"call": "IsPreExistingBugBlocking",
|
|
930
|
+
"comment": "Check if ALL failures are pre-existing with zero task-related failures"
|
|
931
|
+
},
|
|
932
|
+
{
|
|
933
|
+
"type": "action",
|
|
934
|
+
"call": "EscalatePreExistingBug",
|
|
935
|
+
"comment": "Create bug-fix task, add DAG dependency blocking original task, notify PM and merge-resolver"
|
|
936
|
+
},
|
|
937
|
+
{
|
|
938
|
+
"type": "action",
|
|
939
|
+
"call": "ReportTriggerFail",
|
|
940
|
+
"comment": "Move task to todo - DAG dependency on the bug-fix task prevents orchestrator from re-assigning until fix is done. NOT a rejection of engineer's work."
|
|
941
|
+
}
|
|
942
|
+
]
|
|
943
|
+
},
|
|
944
|
+
{
|
|
945
|
+
"type": "sequence",
|
|
946
|
+
"comment": "Task-related E2E failures - reject back to engineer",
|
|
947
|
+
"children": [
|
|
948
|
+
{
|
|
949
|
+
"type": "condition",
|
|
950
|
+
"call": "HasTaskRelatedE2EFailures",
|
|
951
|
+
"comment": "Deterministic check: taskRelatedFailures > 0"
|
|
952
|
+
},
|
|
953
|
+
{
|
|
954
|
+
"type": "llm-action",
|
|
955
|
+
"name": "WriteE2ERejectionComment",
|
|
956
|
+
"allowedTools": [],
|
|
957
|
+
"prompt": "Write a detailed E2E rejection comment for the task.\n\nTask: {{taskDescription}}\nE2E Analysis: {{analyzedE2EResults}}\nE2E Test Results: {{e2eTestResults}}\nScreenshot Directory: {{screenshotDirectory}}\n\nFormat the comment as:\n\n## E2E QA Maestro - REJECTED\n\n**Task-Related Failures:** X\n\n### Failed Scenarios\nFor each task-related failure:\n- **Scenario:** [name]\n- **Expected:** [expected behavior]\n- **Actual:** [actual behavior]\n- **Steps to Reproduce:** [numbered steps using MCP tool actions]\n- **Suggested Fix:** [if available]\n- **Error Details:** [exact error text from runtimeErrorsDetected if available]\n- **Simulator Logs:** [relevant log excerpt if available from e2eTestResults.simulatorLogExcerpt]\n\n### Passed Scenarios\n- [scenario name]: PASS - [brief description]\n\n**Screenshots**: {{screenshotDirectory}}\n\n[Summary of what needs fixing before re-review]",
|
|
958
|
+
"contextKeys": [
|
|
959
|
+
"taskDescription",
|
|
960
|
+
"analyzedE2EResults",
|
|
961
|
+
"e2eTestResults",
|
|
962
|
+
"screenshotDirectory"
|
|
963
|
+
],
|
|
964
|
+
"outputSchema": {
|
|
965
|
+
"type": "object",
|
|
966
|
+
"properties": {
|
|
967
|
+
"rejectionReason": {
|
|
968
|
+
"type": "string"
|
|
969
|
+
},
|
|
970
|
+
"comment": {
|
|
971
|
+
"type": "string"
|
|
972
|
+
}
|
|
973
|
+
},
|
|
974
|
+
"required": [
|
|
975
|
+
"rejectionReason",
|
|
976
|
+
"comment"
|
|
977
|
+
]
|
|
978
|
+
},
|
|
979
|
+
"outputKey": "rejectionDetails",
|
|
980
|
+
"temperature": 0.3
|
|
981
|
+
},
|
|
982
|
+
{
|
|
983
|
+
"type": "action",
|
|
984
|
+
"call": "AddRejectionComment"
|
|
985
|
+
},
|
|
986
|
+
{
|
|
987
|
+
"type": "action",
|
|
988
|
+
"call": "ReportTriggerFail"
|
|
989
|
+
}
|
|
990
|
+
]
|
|
991
|
+
},
|
|
992
|
+
{
|
|
993
|
+
"type": "sequence",
|
|
994
|
+
"comment": "Fallback - CONSERVATIVE: reject if results are ambiguous (cannot confirm pass). Better to block and have a human review than to auto-approve with uncertain results.",
|
|
995
|
+
"children": [
|
|
996
|
+
{
|
|
997
|
+
"type": "llm-action",
|
|
998
|
+
"name": "WriteFallbackComment",
|
|
999
|
+
"allowedTools": [],
|
|
1000
|
+
"prompt": "Write an E2E test comment for an ambiguous/unclear result. The test results could not be clearly classified as pass or fail.\n\nTask: {{taskDescription}}\nE2E Analysis: {{analyzedE2EResults}}\nE2E Test Results: {{e2eTestResults}}\nScreenshot Directory: {{screenshotDirectory}}\n\nThe test results were ambiguous - we could not determine a clear pass or clear task-related failure. When in doubt, we REJECT to be safe.\n\nMANDATORY: If 0 out of N scenarios passed, this is ALWAYS a failure. You may NOT classify all failures as environment issues and pass. 'Environment issues prevented testing' is a FAIL.\n\nCheck the E2E test results carefully:\n- How many scenarios were executed?\n- How many passed vs failed?\n- If 0 scenarios passed, this is definitely a failure \u2014 report it as such.\n- If results are unclear or incomplete, this is a failure.\n\nFormat as:\n\n## E2E QA Maestro - REJECTED (Inconclusive)\n\n**Reason:** Test results were ambiguous and could not confirm all scenarios pass.\n\n### E2E Scenarios\n- [scenario name]: PASS/FAIL - [brief description]\n\n### Summary\n[Document what was tested, what passed, what failed, and why results are inconclusive]\n[Explain what needs to happen for a clear pass]\n\n**Screenshots**: {{screenshotDirectory}}",
|
|
1001
|
+
"contextKeys": [
|
|
1002
|
+
"taskDescription",
|
|
1003
|
+
"analyzedE2EResults",
|
|
1004
|
+
"e2eTestResults",
|
|
1005
|
+
"screenshotDirectory"
|
|
1006
|
+
],
|
|
1007
|
+
"outputSchema": {
|
|
1008
|
+
"type": "object",
|
|
1009
|
+
"properties": {
|
|
1010
|
+
"rejectionReason": {
|
|
1011
|
+
"type": "string"
|
|
1012
|
+
},
|
|
1013
|
+
"comment": {
|
|
1014
|
+
"type": "string"
|
|
1015
|
+
}
|
|
1016
|
+
},
|
|
1017
|
+
"required": [
|
|
1018
|
+
"rejectionReason",
|
|
1019
|
+
"comment"
|
|
1020
|
+
]
|
|
1021
|
+
},
|
|
1022
|
+
"outputKey": "rejectionDetails",
|
|
1023
|
+
"temperature": 0.4
|
|
1024
|
+
},
|
|
1025
|
+
{
|
|
1026
|
+
"type": "action",
|
|
1027
|
+
"call": "AddRejectionComment"
|
|
1028
|
+
},
|
|
1029
|
+
{
|
|
1030
|
+
"type": "action",
|
|
1031
|
+
"call": "ReportTriggerFail"
|
|
1032
|
+
}
|
|
1033
|
+
]
|
|
1034
|
+
}
|
|
1035
|
+
]
|
|
1036
|
+
}
|
|
1037
|
+
]
|
|
1038
|
+
},
|
|
1039
|
+
{
|
|
1040
|
+
"type": "sequence",
|
|
1041
|
+
"comment": "Environment setup failed after all retries - block the task",
|
|
1042
|
+
"children": [
|
|
1043
|
+
{
|
|
1044
|
+
"type": "llm-action",
|
|
1045
|
+
"name": "DocumentEnvironmentBlocker",
|
|
1046
|
+
"allowedTools": [],
|
|
1047
|
+
"prompt": "Document that E2E testing is blocked due to environment issues.\n\nTask: {{taskDescription}}\nEnvironment Setup Result: {{envSetupResult}}\nRetry Count: {{envRetryCount}} / {{maxEnvRetries}}\n\nExplain that:\n1. The E2E QA agent attempted to set up the test environment (iOS Simulator + Expo Go + Metro bundler)\n2. Multiple attempts ({{envRetryCount}}) to set up the environment failed\n3. This is NOT a code issue with the engineer's changes\n4. Human intervention is needed to resolve the environment\n\nCommon environment issues and fixes:\n- HTTP 500 'Input is required...EXPO_TOKEN': Missing --offline flag or CI=1 was set. Use 'unset CI && npx expo start --go --offline </dev/null &'\n- Port occupied: stale Metro process needs killing with lsof -ti:PORT | xargs kill -9 (use the agent's assigned port)\n- Module resolution errors: frontend/node_modules needs npm install --legacy-peer-deps\n- Simulator not booting: Xcode/simulator may need updating or disk space may be low\n- Expo Go not installed or wrong version: install Expo Go 54.0.6 via xcrun simctl install\n- Check frontend/metro.log for Metro bundler startup errors\n\nBe specific about what environment issue occurred.\n\nFormat as:\n\n## E2E QA Maestro - BLOCKED (Environment)\n\n**Issue:** [specific environment problem]\n**Attempts:** {{envRetryCount}} / {{maxEnvRetries}}\n\n### Details\n[Detailed description of what went wrong]\n\n### Required Actions\n[What the human needs to do to fix the environment]",
|
|
1048
|
+
"contextKeys": [
|
|
1049
|
+
"taskDescription",
|
|
1050
|
+
"envSetupResult",
|
|
1051
|
+
"envRetryCount",
|
|
1052
|
+
"maxEnvRetries"
|
|
1053
|
+
],
|
|
1054
|
+
"outputSchema": {
|
|
1055
|
+
"type": "object",
|
|
1056
|
+
"properties": {
|
|
1057
|
+
"rejectionReason": {
|
|
1058
|
+
"type": "string"
|
|
1059
|
+
},
|
|
1060
|
+
"environmentIssue": {
|
|
1061
|
+
"type": "string"
|
|
1062
|
+
},
|
|
1063
|
+
"comment": {
|
|
1064
|
+
"type": "string"
|
|
1065
|
+
}
|
|
1066
|
+
},
|
|
1067
|
+
"required": [
|
|
1068
|
+
"rejectionReason",
|
|
1069
|
+
"environmentIssue",
|
|
1070
|
+
"comment"
|
|
1071
|
+
]
|
|
1072
|
+
},
|
|
1073
|
+
"outputKey": "rejectionDetails",
|
|
1074
|
+
"temperature": 0.3
|
|
1075
|
+
},
|
|
1076
|
+
{
|
|
1077
|
+
"type": "action",
|
|
1078
|
+
"call": "AddEnvironmentBlockerComment"
|
|
1079
|
+
},
|
|
1080
|
+
{
|
|
1081
|
+
"type": "action",
|
|
1082
|
+
"call": "ReportTriggerFail"
|
|
1083
|
+
}
|
|
1084
|
+
]
|
|
1085
|
+
}
|
|
1086
|
+
]
|
|
1087
|
+
}
|
|
1088
|
+
]
|
|
1089
|
+
},
|
|
1090
|
+
{
|
|
1091
|
+
"type": "sequence",
|
|
1092
|
+
"comment": "FAILURE HANDLER: Unexpected errors during E2E testing",
|
|
1093
|
+
"children": [
|
|
1094
|
+
{
|
|
1095
|
+
"type": "action",
|
|
1096
|
+
"call": "AddQAFailureComment"
|
|
1097
|
+
},
|
|
1098
|
+
{
|
|
1099
|
+
"type": "action",
|
|
1100
|
+
"call": "ReportTriggerFail"
|
|
1101
|
+
}
|
|
1102
|
+
]
|
|
1103
|
+
}
|
|
1104
|
+
]
|
|
1105
|
+
}
|
|
1106
|
+
},
|
|
1107
|
+
{
|
|
1108
|
+
"type": "selector",
|
|
1109
|
+
"comment": "Shutdown simulator and Metro after testing - clean up resources for next task",
|
|
1110
|
+
"children": [
|
|
1111
|
+
{
|
|
1112
|
+
"type": "action",
|
|
1113
|
+
"call": "ShutdownSimulator",
|
|
1114
|
+
"comment": "Shutdown the iOS simulator and kill Metro to free resources - fresh sim will be booted for next task"
|
|
1115
|
+
},
|
|
1116
|
+
{
|
|
1117
|
+
"type": "action",
|
|
1118
|
+
"call": "NoOp",
|
|
1119
|
+
"comment": "Continue if simulator shutdown fails - non-critical cleanup"
|
|
1120
|
+
}
|
|
1121
|
+
]
|
|
1122
|
+
},
|
|
1123
|
+
{
|
|
1124
|
+
"type": "selector",
|
|
1125
|
+
"comment": "Notify other agents that E2E review is complete (non-critical: continue even if unavailable)",
|
|
1126
|
+
"children": [
|
|
1127
|
+
{
|
|
1128
|
+
"type": "action",
|
|
1129
|
+
"call": "SendTaskCompleteNotification",
|
|
1130
|
+
"comment": "Broadcast to other agents that E2E QA has finished reviewing this task"
|
|
1131
|
+
},
|
|
1132
|
+
{
|
|
1133
|
+
"type": "action",
|
|
1134
|
+
"call": "NoOp",
|
|
1135
|
+
"comment": "Continue without notification if messaging is unavailable"
|
|
1136
|
+
}
|
|
1137
|
+
]
|
|
1138
|
+
},
|
|
1139
|
+
{
|
|
1140
|
+
"type": "action",
|
|
1141
|
+
"call": "ClearTaskContext",
|
|
1142
|
+
"comment": "Reset task-specific blackboard keys to prepare for next task assignment"
|
|
1143
|
+
},
|
|
1144
|
+
{
|
|
1145
|
+
"type": "action",
|
|
1146
|
+
"call": "Loop",
|
|
1147
|
+
"comment": "Return RUNNING to restart the BT from the root - wait for next task"
|
|
1148
|
+
}
|
|
1149
|
+
]
|
|
1150
|
+
}
|
|
1151
|
+
},
|
|
1152
|
+
"blackboardDefaults": {
|
|
1153
|
+
"currentTaskId": null,
|
|
1154
|
+
"taskAssignedAt": null,
|
|
1155
|
+
"taskDetails": null,
|
|
1156
|
+
"taskComments": null,
|
|
1157
|
+
"taskDescription": null,
|
|
1158
|
+
"taskTitle": null,
|
|
1159
|
+
"taskFiles": null,
|
|
1160
|
+
"e2eTestPlan": null,
|
|
1161
|
+
"projectMaestroFlows": null,
|
|
1162
|
+
"screenshotDirectory": ".agentloop/pr-screenshots",
|
|
1163
|
+
"envSetupResult": null,
|
|
1164
|
+
"envRetryCount": 0,
|
|
1165
|
+
"maxEnvRetries": 3,
|
|
1166
|
+
"appBundleId": "host.exp.Exponent",
|
|
1167
|
+
"e2eTestResults": null,
|
|
1168
|
+
"maxScenarioRetries": 2,
|
|
1169
|
+
"analyzedE2EResults": null,
|
|
1170
|
+
"savedFlows": null,
|
|
1171
|
+
"approvalComment": null,
|
|
1172
|
+
"rejectionDetails": null,
|
|
1173
|
+
"incomingMessages": [],
|
|
1174
|
+
"coordinationMessage": null,
|
|
1175
|
+
"loopCount": 0,
|
|
1176
|
+
"screenshotLabel": "checkpoint",
|
|
1177
|
+
"simulatorDeviceType": "iPhone 17 Pro",
|
|
1178
|
+
"simulatorDeviceId": null,
|
|
1179
|
+
"simulatorDeviceName": null,
|
|
1180
|
+
"simulatorBootedAt": null,
|
|
1181
|
+
"metroPort": null,
|
|
1182
|
+
"agentInstanceName": null,
|
|
1183
|
+
"simulatorLogFile": null,
|
|
1184
|
+
"simulatorLogs": null,
|
|
1185
|
+
"navigationResult": null,
|
|
1186
|
+
"currentScenario": null,
|
|
1187
|
+
"currentScenarioIndex": null,
|
|
1188
|
+
"currentScenarioResult": null,
|
|
1189
|
+
"scenarioResults": []
|
|
1190
|
+
}
|
|
1191
|
+
}
|