@bugzy-ai/bugzy 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +10 -7
  2. package/dist/cli/index.cjs +6168 -5848
  3. package/dist/cli/index.cjs.map +1 -1
  4. package/dist/cli/index.js +6168 -5848
  5. package/dist/cli/index.js.map +1 -1
  6. package/dist/index.cjs +5563 -5302
  7. package/dist/index.cjs.map +1 -1
  8. package/dist/index.d.cts +5 -4
  9. package/dist/index.d.ts +5 -4
  10. package/dist/index.js +5560 -5300
  11. package/dist/index.js.map +1 -1
  12. package/dist/subagents/index.cjs +368 -51
  13. package/dist/subagents/index.cjs.map +1 -1
  14. package/dist/subagents/index.js +368 -51
  15. package/dist/subagents/index.js.map +1 -1
  16. package/dist/subagents/metadata.cjs +10 -2
  17. package/dist/subagents/metadata.cjs.map +1 -1
  18. package/dist/subagents/metadata.js +10 -2
  19. package/dist/subagents/metadata.js.map +1 -1
  20. package/dist/tasks/index.cjs +864 -2391
  21. package/dist/tasks/index.cjs.map +1 -1
  22. package/dist/tasks/index.d.cts +48 -5
  23. package/dist/tasks/index.d.ts +48 -5
  24. package/dist/tasks/index.js +862 -2389
  25. package/dist/tasks/index.js.map +1 -1
  26. package/dist/templates/init/.bugzy/runtime/knowledge-base.md +61 -0
  27. package/dist/templates/init/.bugzy/runtime/knowledge-maintenance-guide.md +97 -0
  28. package/dist/templates/init/.bugzy/runtime/subagent-memory-guide.md +87 -0
  29. package/dist/templates/init/.bugzy/runtime/templates/test-plan-template.md +41 -16
  30. package/dist/templates/init/.bugzy/runtime/templates/test-result-schema.md +498 -0
  31. package/dist/templates/init/.bugzy/runtime/test-execution-strategy.md +535 -0
  32. package/dist/templates/init/.bugzy/runtime/testing-best-practices.md +368 -14
  33. package/dist/templates/init/.gitignore-template +23 -2
  34. package/package.json +1 -1
  35. package/templates/init/.bugzy/runtime/templates/test-plan-template.md +41 -16
  36. package/templates/init/.env.testdata +18 -0
@@ -1,789 +1,25 @@
1
+ // src/tasks/steps/types.ts
2
+ function isInlineStep(ref) {
3
+ return typeof ref === "object" && "inline" in ref && ref.inline === true;
4
+ }
5
+ function isStepReferenceObject(ref) {
6
+ return typeof ref === "object" && "stepId" in ref;
7
+ }
8
+
1
9
  // src/tasks/constants.ts
2
10
  var TASK_SLUGS = {
3
11
  EXPLORE_APPLICATION: "explore-application",
12
+ ONBOARD_TESTING: "onboard-testing",
4
13
  GENERATE_TEST_CASES: "generate-test-cases",
5
14
  GENERATE_TEST_PLAN: "generate-test-plan",
6
15
  HANDLE_MESSAGE: "handle-message",
7
16
  PROCESS_EVENT: "process-event",
8
17
  RUN_TESTS: "run-tests",
9
- VERIFY_CHANGES: "verify-changes"
18
+ VERIFY_CHANGES: "verify-changes",
19
+ /** @deprecated Use ONBOARD_TESTING instead */
20
+ FULL_TEST_COVERAGE: "onboard-testing"
10
21
  };
11
22
 
12
- // src/tasks/templates/exploration-instructions.ts
13
- var EXPLORATION_INSTRUCTIONS = `
14
- ## Exploratory Testing Protocol
15
-
16
- Before creating or running formal tests, perform exploratory testing to validate requirements and understand actual system behavior. The depth of exploration should adapt to the clarity of requirements.
17
-
18
- ### Step {{STEP_NUMBER}}.1: Assess Requirement Clarity
19
-
20
- Determine exploration depth based on requirement quality:
21
-
22
- | Clarity | Indicators | Exploration Depth | Goal |
23
- |---------|-----------|-------------------|------|
24
- | **Clear** | Detailed acceptance criteria, screenshots/mockups, specific field names/URLs/roles, unambiguous behavior, consistent patterns | Quick (1-2 min) | Confirm feature exists, capture evidence |
25
- | **Vague** | General direction clear but specifics missing, incomplete examples, assumed details, relative terms ("fix", "better") | Moderate (3-5 min) | Document current behavior, identify ambiguities, generate clarification questions |
26
- | **Unclear** | Contradictory info, multiple interpretations, no examples/criteria, ambiguous scope ("the page"), critical details missing | Deep (5-10 min) | Systematically test scenarios, document patterns, identify all ambiguities, formulate comprehensive questions |
27
-
28
- **Examples:**
29
- - **Clear:** "Change 'Submit' button from blue (#007BFF) to green (#28A745) on /auth/login. Verify hover effect."
30
- - **Vague:** "Fix the sorting in todo list page. The items are mixed up for premium users."
31
- - **Unclear:** "Improve the dashboard performance. Users say it's slow."
32
-
33
- ### Step {{STEP_NUMBER}}.2: Quick Exploration (1-2 min)
34
-
35
- **When:** Requirements CLEAR
36
-
37
- **Steps:**
38
- 1. Navigate to feature (use provided URL), verify loads without errors
39
- 2. Verify key elements exist (buttons, fields, sections mentioned)
40
- 3. Capture screenshot of initial state
41
- 4. Document:
42
- \`\`\`markdown
43
- **Quick Exploration (1 min)**
44
- Feature: [Name] | URL: [Path]
45
- Status: \u2705 Accessible / \u274C Not found / \u26A0\uFE0F Different
46
- Screenshot: [filename]
47
- Notes: [Immediate observations]
48
- \`\`\`
49
- 5. **Decision:** \u2705 Matches \u2192 Test creation | \u274C/\u26A0\uFE0F Doesn't match \u2192 Moderate Exploration
50
-
51
- **Time Limit:** 1-2 minutes
52
-
53
- ### Step {{STEP_NUMBER}}.3: Moderate Exploration (3-5 min)
54
-
55
- **When:** Requirements VAGUE or Quick Exploration revealed discrepancies
56
-
57
- **Steps:**
58
- 1. Navigate using appropriate role(s), set up preconditions, ensure clean state
59
- 2. Test primary user flow, document steps and behavior, note unexpected behavior
60
- 3. Capture before/after screenshots, document field values/ordering/visibility
61
- 4. Compare to requirement: What matches? What differs? What's absent?
62
- 5. Identify specific ambiguities:
63
- \`\`\`markdown
64
- **Moderate Exploration (4 min)**
65
-
66
- **Explored:** Role: [Admin], Path: [Steps], Behavior: [What happened]
67
-
68
- **Current State:** [Specific observations with examples]
69
- - Example: "Admin view shows 8 sort options: By Title, By Due Date, By Priority..."
70
-
71
- **Requirement Says:** [What requirement expected]
72
-
73
- **Discrepancies:** [Specific differences]
74
- - Example: "Premium users see 5 fewer sorting options than admins"
75
-
76
- **Ambiguities:**
77
- 1. [First ambiguity with concrete example]
78
- 2. [Second if applicable]
79
-
80
- **Clarification Needed:** [Specific questions]
81
- \`\`\`
82
- 6. Assess severity using Clarification Protocol
83
- 7. **Decision:** \u{1F7E2} Minor \u2192 Proceed with assumptions | \u{1F7E1} Medium \u2192 Async clarification, proceed | \u{1F534} Critical \u2192 Stop, escalate
84
-
85
- **Time Limit:** 3-5 minutes
86
-
87
- ### Step {{STEP_NUMBER}}.4: Deep Exploration (5-10 min)
88
-
89
- **When:** Requirements UNCLEAR or critical ambiguities found
90
-
91
- **Steps:**
92
- 1. **Define Exploration Matrix:** Identify dimensions (user roles, feature states, input variations, browsers)
93
-
94
- 2. **Systematic Testing:** Test each matrix cell methodically
95
- \`\`\`
96
- Example for "Todo List Sorting":
97
- Matrix: User Roles \xD7 Feature Observations
98
-
99
- Test 1: Admin Role \u2192 Navigate, document sort options (count, names, order), screenshot
100
- Test 2: Basic User Role \u2192 Same todo list, document options, screenshot
101
- Test 3: Compare \u2192 Side-by-side table, identify missing/reordered options
102
- \`\`\`
103
-
104
- 3. **Document Patterns:** Consistent behavior? Role-based differences? What varies vs constant?
105
-
106
- 4. **Comprehensive Report:**
107
- \`\`\`markdown
108
- **Deep Exploration (8 min)**
109
-
110
- **Matrix:** [Dimensions] | **Tests:** [X combinations]
111
-
112
- **Findings:**
113
-
114
- ### Test 1: Admin
115
- - Setup: [Preconditions] | Steps: [Actions]
116
- - Observations: Sort options=8, Options=[list], Ordering=[sequence]
117
- - Screenshot: [filename-admin.png]
118
-
119
- ### Test 2: Basic User
120
- - Setup: [Preconditions] | Steps: [Actions]
121
- - Observations: Sort options=3, Missing vs Admin=[5 options], Ordering=[sequence]
122
- - Screenshot: [filename-user.png]
123
-
124
- **Comparison Table:**
125
- | Sort Option | Admin Pos | User Pos | Notes |
126
- |-------------|-----------|----------|-------|
127
- | By Title | 1 | 1 | Match |
128
- | By Priority | 3 | Not visible | Missing |
129
-
130
- **Patterns:**
131
- - Role-based feature visibility
132
- - Consistent relative ordering for visible fields
133
-
134
- **Critical Ambiguities:**
135
- 1. Option Visibility: Intentional basic users see 5 fewer sort options?
136
- 2. Sort Definition: (A) All roles see all options in same order, OR (B) Roles see permitted options in same relative order?
137
-
138
- **Clarification Questions:** [Specific, concrete based on findings]
139
- \`\`\`
140
-
141
- 5. **Next Action:** Critical ambiguities \u2192 STOP, clarify | Patterns suggest answer \u2192 Validate assumption | Behavior clear \u2192 Test creation
142
-
143
- **Time Limit:** 5-10 minutes
144
-
145
- ### Step {{STEP_NUMBER}}.5: Link Exploration to Clarification
146
-
147
- **Flow:** Requirement Analysis \u2192 Exploration \u2192 Clarification
148
-
149
- 1. Requirement analysis detects vague language \u2192 Triggers exploration
150
- 2. Exploration documents current behavior \u2192 Identifies discrepancies
151
- 3. Clarification uses findings \u2192 Asks specific questions referencing observations
152
-
153
- **Example:**
154
- \`\`\`
155
- "Fix the sorting in todo list"
156
- \u2193 Ambiguity: "sorting" = by date, priority, or completion status?
157
- \u2193 Moderate Exploration: Admin=8 sort options, User=3 sort options
158
- \u2193 Question: "Should basic users see all 8 sort options (bug) or only 3 with consistent sequence (correct)?"
159
- \`\`\`
160
-
161
- ### Step {{STEP_NUMBER}}.6: Document Exploration Results
162
-
163
- **Template:**
164
- \`\`\`markdown
165
- ## Exploration Summary
166
-
167
- **Date:** [YYYY-MM-DD] | **Explorer:** [Agent/User] | **Depth:** [Quick/Moderate/Deep] | **Duration:** [X min]
168
-
169
- ### Feature: [Name and description]
170
-
171
- ### Observations: [Key findings]
172
-
173
- ### Current Behavior: [What feature does today]
174
-
175
- ### Discrepancies: [Requirement vs observation differences]
176
-
177
- ### Assumptions Made: [If proceeding with assumptions]
178
-
179
- ### Artifacts: Screenshots: [list], Video: [if captured], Notes: [detailed]
180
- \`\`\`
181
-
182
- **Memory Storage:** Feature behavior patterns, common ambiguity types, resolution approaches
183
-
184
- ### Step {{STEP_NUMBER}}.7: Integration with Test Creation
185
-
186
- **Quick Exploration \u2192 Direct Test:**
187
- - Feature verified \u2192 Create test matching requirement \u2192 Reference screenshot
188
-
189
- **Moderate Exploration \u2192 Assumption-Based Test:**
190
- - Document behavior \u2192 Create test on best interpretation \u2192 Mark assumptions \u2192 Plan updates after clarification
191
-
192
- **Deep Exploration \u2192 Clarification-First:**
193
- - Block test creation until clarification \u2192 Use exploration as basis for questions \u2192 Create test after answer \u2192 Reference both exploration and clarification
194
-
195
- ---
196
-
197
- ## Adaptive Exploration Decision Tree
198
-
199
- \`\`\`
200
- Start: Requirement Received
201
- \u2193
202
- Are requirements clear with specifics?
203
- \u251C\u2500 YES \u2192 Quick Exploration (1-2 min)
204
- \u2502 \u2193
205
- \u2502 Does feature match description?
206
- \u2502 \u251C\u2500 YES \u2192 Proceed to Test Creation
207
- \u2502 \u2514\u2500 NO \u2192 Escalate to Moderate Exploration
208
- \u2502
209
- \u2514\u2500 NO \u2192 Is general direction clear but details missing?
210
- \u251C\u2500 YES \u2192 Moderate Exploration (3-5 min)
211
- \u2502 \u2193
212
- \u2502 Are ambiguities MEDIUM severity or lower?
213
- \u2502 \u251C\u2500 YES \u2192 Document assumptions, proceed with test creation
214
- \u2502 \u2514\u2500 NO \u2192 Escalate to Deep Exploration or Clarification
215
- \u2502
216
- \u2514\u2500 NO \u2192 Deep Exploration (5-10 min)
217
- \u2193
218
- Document comprehensive findings
219
- \u2193
220
- Assess ambiguity severity
221
- \u2193
222
- Seek clarification for CRITICAL/HIGH
223
- \`\`\`
224
-
225
- ---
226
-
227
- ## Remember:
228
-
229
- \u{1F50D} **Explore before assuming** | \u{1F4CA} **Concrete observations > abstract interpretation** | \u23F1\uFE0F **Adaptive depth: time \u221D uncertainty** | \u{1F3AF} **Exploration findings \u2192 specific clarifications** | \u{1F4DD} **Always document** | \u{1F517} **Link exploration \u2192 ambiguity \u2192 clarification**
230
- `;
231
-
232
- // src/tasks/templates/knowledge-base.ts
233
- var KNOWLEDGE_BASE_READ_INSTRUCTIONS = `
234
- ## Knowledge Base Context
235
-
236
- Before proceeding, read the curated knowledge base to inform your work:
237
-
238
- **Location:** \`.bugzy/runtime/knowledge-base.md\`
239
-
240
- **Purpose:** The knowledge base is a living collection of factual knowledge - what we currently know and believe to be true about this project, its patterns, and its context. This is NOT a historical log, but a curated snapshot that evolves as understanding improves.
241
-
242
- **How to Use:**
243
- 1. Read the knowledge base to understand:
244
- - Project-specific patterns and conventions
245
- - Known behaviors and system characteristics
246
- - Relevant context from past work
247
- - Documented decisions and approaches
248
-
249
- 2. Apply this knowledge to:
250
- - Make informed decisions aligned with project patterns
251
- - Avoid repeating past mistakes
252
- - Build on existing understanding
253
- - Maintain consistency with established practices
254
-
255
- **Note:** The knowledge base may not exist yet or may be empty. If it doesn't exist or is empty, proceed without this context and help build it as you work.
256
- `;
257
- var KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS = `
258
- ## Knowledge Base Maintenance
259
-
260
- After completing your work, update the knowledge base with new insights.
261
-
262
- **Location:** \`.bugzy/runtime/knowledge-base.md\`
263
-
264
- **Process:**
265
-
266
- 1. **Read the maintenance guide** at \`.bugzy/runtime/knowledge-maintenance-guide.md\` to understand when to ADD, UPDATE, or REMOVE entries and how to maintain a curated knowledge base (not an append-only log)
267
-
268
- 2. **Review the current knowledge base** to check for overlaps, contradictions, or opportunities to consolidate existing knowledge
269
-
270
- 3. **Update the knowledge base** following the maintenance guide principles: favor consolidation over addition, update rather than append, resolve contradictions immediately, and focus on quality over completeness
271
-
272
- **Remember:** Every entry should answer "Will this help someone working on this project in 6 months?"
273
- `;
274
-
275
- // src/tasks/library/explore-application.ts
276
- var exploreApplicationTask = {
277
- slug: TASK_SLUGS.EXPLORE_APPLICATION,
278
- name: "Explore Application",
279
- description: "Systematically explore application to discover UI elements, workflows, and behaviors",
280
- frontmatter: {
281
- description: "Systematically explore application to discover UI elements, workflows, and behaviors",
282
- "argument-hint": "--focus [area] --depth [shallow|deep] --system [system-name]"
283
- },
284
- baseContent: `# Explore Application Command
285
-
286
- ## SECURITY NOTICE
287
- **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
288
- - **Read \`.env.testdata\`** for non-secret environment variables (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
289
- - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
290
- - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
291
- - The \`.env\` file access is blocked by settings.json
292
-
293
- Systematically explore the application using the test-runner agent to discover actual UI elements, workflows, and behaviors. Updates test plan and project documentation with findings.
294
-
295
- ## Arguments
296
- Arguments: $ARGUMENTS
297
-
298
- ## Parse Arguments
299
- Extract the following from arguments:
300
- - **focus**: Specific area to explore (authentication, navigation, search, content, admin)
301
- - **depth**: Exploration depth - shallow (quick discovery) or deep (comprehensive) - defaults to deep
302
- - **system**: Which system to explore (optional for multi-system setups)
303
-
304
- ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
305
-
306
- ## Process
307
-
308
- ### Step 0: Understand Exploration Protocol
309
-
310
- This task implements the exploration protocol defined in the exploration-instructions template.
311
-
312
- **Purpose**: This task provides the infrastructure for systematic application exploration that is referenced by other tasks (generate-test-plan, generate-test-cases, verify-changes) when they need to explore features before proceeding.
313
-
314
- **Depth Alignment**: The depth levels in this task align with the exploration template:
315
- - **Shallow exploration (15-20 min)** implements the quick/moderate exploration from the template
316
- - **Deep exploration (45-60 min)** implements comprehensive deep exploration from the template
317
-
318
- The depth levels are extended for full application exploration compared to the focused feature exploration used in other tasks.
319
-
320
- **Full Exploration Protocol Reference**:
321
-
322
- ${EXPLORATION_INSTRUCTIONS}
323
-
324
- **Note**: This task extends the protocol for comprehensive application-wide exploration, while other tasks use abbreviated versions for targeted feature exploration.
325
-
326
- ### Step 1: Load Environment and Context
327
-
328
- #### 1.1 Check Environment Variables
329
- Read \`.env.testdata\` file to understand what variables are required:
330
- - TEST_BASE_URL or TEST_MOBILE_BASE_URL (base URL variable names)
331
- - [SYSTEM_NAME]_URL (if multi-system setup)
332
- - Authentication credential variable names for the selected system
333
- - Any test data variable names
334
-
335
- Note: The actual values will be read from the user's \`.env\` file at test execution time.
336
- Verify \`.env.testdata\` exists to understand variable structure. If it doesn't exist, notify user to create it based on test plan.
337
-
338
- #### 1.2 Read Current Test Plan
339
- Read \`test-plan.md\` to:
340
- - Identify sections marked with [TO BE EXPLORED]
341
- - Find features requiring discovery
342
- - Understand testing scope and priorities
343
-
344
- #### 1.3 Read Project Context
345
- Read \`.bugzy/runtime/project-context.md\` for:
346
- - System architecture understanding
347
- - Testing environment details
348
- - QA workflow requirements
349
-
350
- ### Step 2: Prepare Exploration Strategy
351
-
352
- Based on the arguments and context, prepare exploration instructions.
353
-
354
- #### 2.1 Focus Area Strategies
355
-
356
- **If focus is "authentication":**
357
- \`\`\`
358
- 1. Navigate to the application homepage
359
- 2. Locate and document all authentication entry points:
360
- - Login button/link location and selector
361
- - Registration option and flow
362
- - Social login options (Facebook, Google, etc.)
363
- 3. Test login flow:
364
- - Document form fields and validation
365
- - Test error states with invalid credentials
366
- - Verify successful login indicators
367
- 4. Test logout functionality:
368
- - Find logout option
369
- - Verify session termination
370
- - Check redirect behavior
371
- 5. Explore password recovery:
372
- - Locate forgot password link
373
- - Document recovery flow
374
- - Note email/SMS options
375
- 6. Check role-based access:
376
- - Identify user role indicators
377
- - Document permission differences
378
- - Test admin/moderator access if available
379
- 7. Test session persistence:
380
- - Check remember me functionality
381
- - Test timeout behavior
382
- - Verify multi-tab session handling
383
- \`\`\`
384
-
385
- **If focus is "navigation":**
386
- \`\`\`
387
- 1. Document main navigation structure:
388
- - Primary menu items and hierarchy
389
- - Mobile menu behavior
390
- - Footer navigation links
391
- 2. Map URL patterns:
392
- - Category URL structure
393
- - Parameter patterns
394
- - Deep linking support
395
- 3. Test breadcrumb navigation:
396
- - Availability on different pages
397
- - Clickability and accuracy
398
- - Mobile display
399
- 4. Explore category system:
400
- - Main categories and subcategories
401
- - Navigation between levels
402
- - Content organization
403
- 5. Document special sections:
404
- - User profiles
405
- - Admin areas
406
- - Help/Support sections
407
- 6. Test browser navigation:
408
- - Back/forward button behavior
409
- - History management
410
- - State preservation
411
- \`\`\`
412
-
413
- **If focus is "search":**
414
- \`\`\`
415
- 1. Locate search interfaces:
416
- - Main search bar
417
- - Advanced search options
418
- - Category-specific search
419
- 2. Document search features:
420
- - Autocomplete/suggestions
421
- - Search filters
422
- - Sort options
423
- 3. Test search functionality:
424
- - Special character handling
425
- - Empty/invalid queries
426
- 4. Analyze search results:
427
- - Result format and layout
428
- - Pagination
429
- - No results handling
430
- 5. Check search performance:
431
- - Response times
432
- - Result relevance
433
- - Load more/infinite scroll
434
- \`\`\`
435
-
436
- **If no focus specified:**
437
- Use comprehensive exploration covering all major areas.
438
-
439
- #### 2.2 Depth Configuration
440
-
441
- **Implementation Note**: These depths implement the exploration protocol defined in exploration-instructions.ts, extended for full application exploration.
442
-
443
- **Shallow exploration (--depth shallow):**
444
- - Quick discovery pass (15-20 minutes)
445
- - Focus on main features only
446
- - Basic screenshot capture
447
- - High-level findings
448
- - *Aligns with Quick/Moderate exploration from template*
449
-
450
- **Deep exploration (--depth deep or default):**
451
- - Comprehensive exploration (45-60 minutes)
452
- - Test edge cases and variations
453
- - Extensive screenshot documentation
454
- - Detailed technical findings
455
- - Performance observations
456
- - Accessibility notes
457
- - *Aligns with Deep exploration from template*
458
-
459
- ### Step 3: Execute Exploration
460
-
461
- #### 3.1 Create Exploration Test Case
462
- Generate a temporary exploration test case file at \`./test-cases/EXPLORATION-TEMP.md\`:
463
-
464
- \`\`\`markdown
465
- ---
466
- id: EXPLORATION-TEMP
467
- title: Application Exploration - [Focus Area or Comprehensive]
468
- type: exploratory
469
- priority: high
470
- ---
471
-
472
- ## Preconditions
473
- - Browser with cleared cookies and cache
474
- - Access to [system] environment
475
- - Credentials configured per .env.testdata template
476
-
477
- ## Test Steps
478
- [Generated exploration steps based on strategy]
479
-
480
- ## Expected Results
481
- Document all findings including:
482
- - UI element locations and selectors
483
- - Navigation patterns and URLs
484
- - Feature behaviors and workflows
485
- - Performance observations
486
- - Error states and edge cases
487
- - Screenshots of all key areas
488
- \`\`\`
489
-
490
- #### 3.2 Launch Test Runner Agent
491
- {{INVOKE_TEST_RUNNER}}
492
-
493
- Execute the exploration test case with special exploration instructions:
494
-
495
- \`\`\`
496
- Execute the exploration test case at ./test-cases/EXPLORATION-TEMP.md with focus on discovery and documentation.
497
-
498
- Special instructions for exploration mode:
499
- 1. Take screenshots of EVERY significant UI element and page
500
- 2. Document all clickable elements with their selectors
501
- 3. Note all URL patterns and parameters
502
- 4. Test variations and edge cases where possible
503
- 5. Document load times and performance observations
504
- 6. Create detailed findings report with structured data
505
- 7. Organize screenshots by functional area
506
- 8. Note any console errors or warnings
507
- 9. Document which features are accessible vs restricted
508
-
509
- Generate a comprehensive exploration report that can be used to update project documentation.
510
- \`\`\`
511
-
512
- ### Step 4: Process Exploration Results
513
-
514
- #### 4.1 Read Test Runner Output
515
- Read the generated test run files from \`./test-runs/[timestamp]/EXPLORATION-TEMP/\`:
516
- - \`findings.md\` - Main findings document
517
- - \`test-log.md\` - Detailed step execution
518
- - \`screenshots/\` - Visual documentation
519
- - \`summary.json\` - Execution summary
520
-
521
- #### 4.2 Parse and Structure Findings
522
- Extract and organize:
523
- - Discovered features and capabilities
524
- - UI element selectors and patterns
525
- - Navigation structure and URLs
526
- - Authentication flow details
527
- - Performance metrics
528
- - Technical observations
529
- - Areas requiring further investigation
530
-
531
- ### Step 5: Update Project Artifacts
532
-
533
- #### 5.1 Update Test Plan
534
- Read and update \`test-plan.md\`:
535
- - Replace [TO BE EXPLORED] markers with concrete findings
536
- - Add newly discovered features to test items
537
- - Update navigation patterns and URL structures
538
- - Document actual authentication methods
539
- - Update environment variables if new ones discovered
540
- - Refine pass/fail criteria based on actual behavior
541
-
542
- #### 5.2 Create Exploration Report
543
- Create \`./exploration-reports/[timestamp]-[focus]-exploration.md\`
544
-
545
- ### Step 6: Cleanup
546
-
547
- #### 6.1 Remove Temporary Files
548
- Delete the temporary exploration test case:
549
- \`\`\`bash
550
- rm ./test-cases/EXPLORATION-TEMP.md
551
- \`\`\`
552
-
553
- ### Step 7: Generate Summary Report
554
- Create a concise summary for the user
555
-
556
- ## Error Handling
557
-
558
- ### Environment Issues
559
- - If \`.env.testdata\` missing: Warn user and suggest creating it from test plan
560
- - If credentials invalid (at runtime): Document in report and continue with public areas
561
- - If system unreachable: Retry with exponential backoff, report if persistent
562
-
563
- ### Exploration Failures
564
- - If test-runner fails: Capture partial results and report
565
- - If specific area inaccessible: Note in findings and continue
566
- - If browser crashes: Attempt recovery and resume
567
- - If test-runner stops, but does not create files, inspect what it did and if it was not enough remove the test-run and start the test-runner agent again. If it has enough info, continue with what you have.
568
-
569
- ### Data Issues
570
- - If dynamic content prevents exploration: Note and try alternative approaches
571
- - If rate limited: Implement delays and retry
572
-
573
- ## Integration with Other Commands
574
-
575
- ### Feeds into /generate-test-cases
576
- - Provides actual UI elements for test steps
577
- - Documents real workflows for test scenarios
578
- - Identifies edge cases to test
579
-
580
- ### Updates from /process-event
581
- - New exploration findings can be processed as events
582
- - Discovered bugs trigger issue creation
583
- - Feature discoveries update test coverage
584
-
585
- ### Enhances /run-tests
586
- - Tests use discovered selectors
587
- - Validation based on actual behavior
588
- - More reliable test execution
589
-
590
- ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}`,
591
- optionalSubagents: [],
592
- requiredSubagents: ["test-runner"]
593
- };
594
-
595
- // src/tasks/templates/clarification-instructions.ts
596
- var CLARIFICATION_INSTRUCTIONS = `
597
- ## Clarification Protocol
598
-
599
- Before proceeding with test creation or execution, ensure requirements are clear and testable. Use this protocol to detect ambiguity, assess its severity, and determine the appropriate action.
600
-
601
- ### Step {{STEP_NUMBER}}.0: Check for Pending Clarification
602
-
603
- Before starting, check if this task is resuming from a blocked clarification:
604
-
605
- 1. **Check $ARGUMENTS for clarification data:**
606
- - If \`$ARGUMENTS.clarification\` exists, this task is resuming with a clarification response
607
- - Extract: \`clarification\` (the user's answer), \`originalArgs\` (original task parameters)
608
-
609
- 2. **If clarification is present:**
610
- - Read \`.bugzy/runtime/blocked-task-queue.md\`
611
- - Find and remove your task's entry from the queue (update the file)
612
- - Proceed using the clarification as if user just provided the answer
613
- - Skip ambiguity detection for the clarified aspect
614
-
615
- 3. **If no clarification in $ARGUMENTS:** Proceed normally with ambiguity detection below.
616
-
617
- ### Step {{STEP_NUMBER}}.1: Detect Ambiguity
618
-
619
- Scan for ambiguity signals:
620
-
621
- **Language:** Vague terms ("fix", "improve", "better", "like", "mixed up"), relative terms without reference ("faster", "more"), undefined scope ("the ordering", "the fields", "the page"), modal ambiguity ("should", "could" vs "must", "will")
622
-
623
- **Details:** Missing acceptance criteria (no clear PASS/FAIL), no examples/mockups, incomplete field/element lists, unclear role behavior differences, unspecified error scenarios
624
-
625
- **Interpretation:** Multiple valid interpretations, contradictory information (description vs comments), implied vs explicit requirements
626
-
627
- **Context:** No reference documentation, "RELEASE APPROVED" without criteria, quick ticket creation, assumes knowledge ("as you know...", "obviously...")
628
-
629
- **Quick Check:**
630
- - [ ] Success criteria explicitly defined? (PASS if X, FAIL if Y)
631
- - [ ] All affected elements specifically listed? (field names, URLs, roles)
632
- - [ ] Only ONE reasonable interpretation?
633
- - [ ] Examples, screenshots, or mockups provided?
634
- - [ ] Consistent with existing system patterns?
635
- - [ ] Can write test assertions without assumptions?
636
-
637
- ### Step {{STEP_NUMBER}}.2: Assess Severity
638
-
639
- If ambiguity is detected, assess its severity:
640
-
641
- | Severity | Characteristics | Examples | Action |
642
- |----------|----------------|----------|--------|
643
- | \u{1F534} **CRITICAL** | Expected behavior undefined/contradictory; test outcome unpredictable; core functionality unclear; success criteria missing; multiple interpretations = different strategies | "Fix the issue" (what issue?), "Improve performance" (which metrics?), "Fix sorting in todo list" (by date? priority? completion status?) | **STOP** - Seek clarification before proceeding |
644
- | \u{1F7E0} **HIGH** | Core underspecified but direction clear; affects majority of scenarios; vague success criteria; assumptions risky | "Fix ordering" (sequence OR visibility?), "Add validation" (what? messages?), "Update dashboard" (which widgets?) | **STOP** - Seek clarification before proceeding |
645
- | \u{1F7E1} **MEDIUM** | Specific details missing; general requirements clear; affects subset of cases; reasonable low-risk assumptions possible; wrong assumption = test updates not strategy overhaul | Missing field labels, unclear error message text, undefined timeouts, button placement not specified, date formats unclear | **PROCEED** - (1) Moderate exploration, (2) Document assumptions: "Assuming X because Y", (3) Proceed with creation/execution, (4) Async clarification (team-communicator), (5) Mark [ASSUMED: description] |
646
- | \u{1F7E2} **LOW** | Minor edge cases; documentation gaps don't affect execution; optional/cosmetic elements; minimal impact | Tooltip text, optional field validation, icon choice, placeholder text, tab order | **PROCEED** - (1) Mark [TO BE CLARIFIED: description], (2) Proceed, (3) Mention in report "Minor Details", (4) No blocking/async clarification |
647
-
648
- ### Step {{STEP_NUMBER}}.3: Check Memory for Similar Clarifications
649
-
650
- Before asking, check if similar question was answered:
651
-
652
- **Process:**
653
- 1. **Query team-communicator memory** - Search by feature name, ambiguity pattern, ticket keywords
654
- 2. **Review past Q&A** - Similar question asked? What was answer? Applicable now?
655
- 3. **Assess reusability:**
656
- - Directly applicable \u2192 Use answer, no re-ask
657
- - Partially applicable \u2192 Adapt and reference ("Previously for X, clarified Y. Same here?")
658
- - Not applicable \u2192 Ask as new
659
- 4. **Update memory** - Store Q&A with task type, feature, pattern tags
660
-
661
- **Example:** Query "todo sorting priority" \u2192 Found 2025-01-15: "Should completed todos appear in main list?" \u2192 Answer: "No, move to separate archive view" \u2192 Directly applicable \u2192 Use, no re-ask needed
662
-
663
- ### Step {{STEP_NUMBER}}.4: Formulate Clarification Questions
664
-
665
- If clarification needed (CRITICAL/HIGH severity), formulate specific, concrete questions:
666
-
667
- **Good Questions:** Specific and concrete, provide context, offer options, reference examples, tie to test strategy
668
-
669
- **Bad Questions:** Too vague/broad, assumptive, multiple questions in one, no context
670
-
671
- **Template:**
672
- \`\`\`
673
- **Context:** [Current understanding]
674
- **Ambiguity:** [Specific unclear aspect]
675
- **Question:** [Specific question with options]
676
- **Why Important:** [Testing strategy impact]
677
-
678
- Example:
679
- Context: TODO-456 "Fix the sorting in the todo list so items appear in the right order"
680
- Ambiguity: "sorting" = (A) by creation date, (B) by due date, (C) by priority level, or (D) custom user-defined order
681
- Question: Should todos be sorted by due date (soonest first) or priority (high to low)? Should completed items appear in the list or move to archive?
682
- Why Important: Different sort criteria require different test assertions. Current app shows 15 active todos + 8 completed in mixed order.
683
- \`\`\`
684
-
685
- ### Step {{STEP_NUMBER}}.5: Communicate Clarification Request
686
-
687
- **For Slack-Triggered Tasks:** Use team-communicator subagent:
688
- \`\`\`
689
- Ask clarification in Slack thread:
690
- Context: [From ticket/description]
691
- Ambiguity: [Describe ambiguity]
692
- Severity: [CRITICAL/HIGH]
693
- Questions:
694
- 1. [First specific question]
695
- 2. [Second if needed]
696
-
697
- Clarification needed to proceed. I'll wait for response before testing.
698
- \`\`\`
699
-
700
- **For Manual/API Triggers:** Include in task output:
701
- \`\`\`markdown
702
- ## \u26A0\uFE0F Clarification Required Before Testing
703
-
704
- **Ambiguity:** [Description]
705
- **Severity:** [CRITICAL/HIGH]
706
-
707
- ### Questions:
708
- 1. **Question:** [First question]
709
- - Context: [Provide context]
710
- - Options: [If applicable]
711
- - Impact: [Testing impact]
712
-
713
- **Action Required:** Provide clarification. Testing cannot proceed.
714
- **Current Observation:** [What exploration revealed - concrete examples]
715
- \`\`\`
716
-
717
- ### Step {{STEP_NUMBER}}.5.1: Register Blocked Task (CRITICAL/HIGH only)
718
-
719
- When asking a CRITICAL or HIGH severity question that blocks progress, register the task in the blocked queue so it can be automatically re-triggered when clarification arrives.
720
-
721
- **Update \`.bugzy/runtime/blocked-task-queue.md\`:**
722
-
723
- 1. Read the current file (create if doesn't exist)
724
- 2. Add a new row to the Queue table
725
-
726
- \`\`\`markdown
727
- # Blocked Task Queue
728
-
729
- Tasks waiting for clarification responses.
730
-
731
- | Task Slug | Question | Original Args |
732
- |-----------|----------|---------------|
733
- | generate-test-plan | Should todos be sorted by date or priority? | \`{"ticketId": "TODO-456"}\` |
734
- \`\`\`
735
-
736
- **Entry Fields:**
737
- - **Task Slug**: The task slug (e.g., \`generate-test-plan\`) - used for re-triggering
738
- - **Question**: The clarification question asked (so LLM can match responses)
739
- - **Original Args**: JSON-serialized \`$ARGUMENTS\` wrapped in backticks
740
-
741
- **Purpose**: The LLM processor reads this file and matches user responses to pending questions. When a match is found, it re-queues the task with the clarification.
742
-
743
- ### Step {{STEP_NUMBER}}.6: Wait or Proceed Based on Severity
744
-
745
- **CRITICAL/HIGH \u2192 STOP and Wait:**
746
- - Do NOT create tests, run tests, or make assumptions
747
- - Wait for clarification, resume after answer
748
- - *Rationale: Wrong assumptions = incorrect tests, false results, wasted time*
749
-
750
- **MEDIUM \u2192 Proceed with Documented Assumptions:**
751
- - Perform moderate exploration, document assumptions, proceed with creation/execution
752
- - Ask clarification async (team-communicator), mark results "based on assumptions"
753
- - Update tests after clarification received
754
- - *Rationale: Waiting blocks progress; documented assumptions allow forward movement with later corrections*
755
-
756
- **LOW \u2192 Proceed and Mark:**
757
- - Proceed with creation/execution, mark gaps [TO BE CLARIFIED] or [ASSUMED]
758
- - Mention in report but don't prioritize, no blocking
759
- - *Rationale: Details don't affect strategy/results significantly*
760
-
761
- ### Step {{STEP_NUMBER}}.7: Document Clarification in Results
762
-
763
- When reporting test results, always include an "Ambiguities" section if clarification occurred:
764
-
765
- \`\`\`markdown
766
- ## Ambiguities Encountered
767
-
768
- ### Clarification: [Topic]
769
- - **Severity:** [CRITICAL/HIGH/MEDIUM/LOW]
770
- - **Question Asked:** [What was asked]
771
- - **Response:** [Answer received, or "Awaiting response"]
772
- - **Impact:** [How this affected testing]
773
- - **Assumption Made:** [If proceeded with assumption]
774
- - **Risk:** [What could be wrong if assumption is incorrect]
775
-
776
- ### Resolution:
777
- [How the clarification was resolved and incorporated into testing]
778
- \`\`\`
779
-
780
- ---
781
-
782
- ## Remember:
783
-
784
- \u{1F6D1} **Block for CRITICAL/HIGH** | \u2705 **Ask correctly > guess poorly** | \u{1F4DD} **Document MEDIUM assumptions** | \u{1F50D} **Check memory first** | \u{1F3AF} **Specific questions \u2192 specific answers**
785
- `;
786
-
787
23
  // src/tasks/library/generate-test-cases.ts
788
24
  var generateTestCasesTask = {
789
25
  slug: TASK_SLUGS.GENERATE_TEST_CASES,
@@ -793,40 +29,39 @@ var generateTestCasesTask = {
793
29
  description: "Generate manual test case documentation AND automated Playwright test scripts from test plan",
794
30
  "argument-hint": "--type [exploratory|functional|regression|smoke] --focus [optional-feature]"
795
31
  },
796
- baseContent: `# Generate Test Cases Command
797
-
798
- ## SECURITY NOTICE
799
- **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
800
- - **Read \`.env.testdata\`** for non-secret test data (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
801
- - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
802
- - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
803
- - The \`.env\` file access is blocked by settings.json
804
-
805
- Generate comprehensive test artifacts including BOTH manual test case documentation AND automated Playwright test scripts.
806
-
807
- ## Overview
32
+ steps: [
33
+ // Step 1: Overview (inline)
34
+ {
35
+ inline: true,
36
+ title: "Generate Test Cases Overview",
37
+ content: `Generate comprehensive test artifacts including BOTH manual test case documentation AND automated Playwright test scripts.
808
38
 
809
39
  This command generates:
810
40
  1. **Manual Test Case Documentation** (in \`./test-cases/\`) - Human-readable test cases in markdown format
811
41
  2. **Automated Playwright Tests** (in \`./tests/specs/\`) - Executable TypeScript test scripts
812
42
  3. **Page Object Models** (in \`./tests/pages/\`) - Reusable page classes for automated tests
813
- 4. **Supporting Files** (fixtures, helpers, components) - As needed for test automation
814
-
815
- ## Arguments
816
- Arguments: $ARGUMENTS
43
+ 4. **Supporting Files** (fixtures, helpers, components) - As needed for test automation`
44
+ },
45
+ // Step 2: Security Notice (library)
46
+ "security-notice",
47
+ // Step 3: Arguments (inline)
48
+ {
49
+ inline: true,
50
+ title: "Arguments",
51
+ content: `Arguments: $ARGUMENTS
817
52
 
818
- ## Parse Arguments
53
+ **Parse Arguments:**
819
54
  Extract the following from arguments:
820
55
  - **type**: Test type (exploratory, functional, regression, smoke) - defaults to functional
821
- - **focus**: Optional specific feature or section to focus on
822
-
823
- ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
824
-
825
- ## Process
826
-
827
- ### Step 1: Gather Context
828
-
829
- #### 1.1 Read Test Plan
56
+ - **focus**: Optional specific feature or section to focus on`
57
+ },
58
+ // Step 4: Knowledge Base Read (library)
59
+ "read-knowledge-base",
60
+ // Step 5: Gather Context (inline)
61
+ {
62
+ inline: true,
63
+ title: "Gather Context",
64
+ content: `**1.1 Read Test Plan**
830
65
  Read the test plan from \`test-plan.md\` to understand:
831
66
  - Test items and features
832
67
  - Testing approach and automation strategy
@@ -835,35 +70,40 @@ Read the test plan from \`test-plan.md\` to understand:
835
70
  - Test environment and data requirements
836
71
  - Automation decision criteria
837
72
 
838
- #### 1.2 Check Existing Test Cases and Tests
73
+ **1.2 Check Existing Test Cases and Tests**
839
74
  - List all files in \`./test-cases/\` to understand existing manual test coverage
840
75
  - List all files in \`./tests/specs/\` to understand existing automated tests
841
76
  - Determine next test case ID (TC-XXX format)
842
77
  - Identify existing Page Objects in \`./tests/pages/\`
843
- - Avoid creating overlapping test cases or duplicate automation
844
-
845
- {{DOCUMENTATION_RESEARCHER_INSTRUCTIONS}}
846
-
847
- ### Step 1.4: Explore Features (If Needed)
848
-
849
- If documentation is insufficient or ambiguous, perform adaptive exploration to understand actual feature behavior before creating test cases.
850
-
851
- ${EXPLORATION_INSTRUCTIONS.replace(/{{STEP_NUMBER}}/g, "1.4")}
852
-
853
- ### Step 1.5: Clarify Ambiguities
854
-
855
- If exploration or documentation review reveals ambiguous requirements, use the clarification protocol to resolve them before generating test cases.
856
-
857
- ${CLARIFICATION_INSTRUCTIONS.replace(/{{STEP_NUMBER}}/g, "1.5")}
858
-
859
- **Important Notes:**
860
- - **CRITICAL/HIGH ambiguities:** STOP test case generation and seek clarification
861
- - **MEDIUM ambiguities:** Document assumptions explicitly in test case with [ASSUMED: reason]
862
- - **LOW ambiguities:** Mark with [TO BE CLARIFIED: detail] in test case notes section
863
-
864
- ### Step 1.6: Organize Test Scenarios by Area
78
+ - Avoid creating overlapping test cases or duplicate automation`
79
+ },
80
+ // Step 6: Documentation Researcher (conditional inline)
81
+ {
82
+ inline: true,
83
+ title: "Gather Product Documentation",
84
+ content: `{{INVOKE_DOCUMENTATION_RESEARCHER}} to gather comprehensive product documentation:
865
85
 
866
- Based on exploration and documentation, organize test scenarios by feature area/component:
86
+ \`\`\`
87
+ Explore all available product documentation, specifically focusing on:
88
+ - UI elements and workflows
89
+ - User interactions and navigation paths
90
+ - Form fields and validation rules
91
+ - Error messages and edge cases
92
+ - Authentication and authorization flows
93
+ - Business rules and constraints
94
+ - API endpoints for test data setup
95
+ \`\`\``,
96
+ conditionalOnSubagent: "documentation-researcher"
97
+ },
98
+ // Step 7: Exploration Protocol (from library)
99
+ "exploration-protocol",
100
+ // Step 8: Clarification Protocol (from library)
101
+ "clarification-protocol",
102
+ // Step 9: Organize Test Scenarios (inline - task-specific)
103
+ {
104
+ inline: true,
105
+ title: "Organize Test Scenarios by Area",
106
+ content: `Based on exploration and documentation, organize test scenarios by feature area/component:
867
107
 
868
108
  **Group scenarios into areas** (e.g., Authentication, Dashboard, Checkout, Profile Management):
869
109
  - Each area should be a logical feature grouping
@@ -898,19 +138,21 @@ Based on exploration and documentation, organize test scenarios by feature area/
898
138
 
899
139
  Example structure:
900
140
  - **Authentication**: TC-001 Valid login (smoke, automate), TC-002 Invalid password (automate), TC-003 Password reset (automate)
901
- - **Dashboard**: TC-004 View dashboard widgets (smoke, automate), TC-005 Filter data by date (automate), TC-006 Export data (manual - rare use)
902
-
903
- ### Step 1.7: Generate All Manual Test Case Files
904
-
905
- Generate ALL manual test case markdown files in the \`./test-cases/\` directory BEFORE invoking the test-code-generator agent.
141
+ - **Dashboard**: TC-004 View dashboard widgets (smoke, automate), TC-005 Filter data by date (automate), TC-006 Export data (manual - rare use)`
142
+ },
143
+ // Step 10: Generate Manual Test Cases (inline)
144
+ {
145
+ inline: true,
146
+ title: "Generate All Manual Test Case Files",
147
+ content: `Generate ALL manual test case markdown files in the \`./test-cases/\` directory BEFORE invoking the test-code-generator agent.
906
148
 
907
- **For each test scenario from Step 1.6:**
149
+ **For each test scenario from the previous step:**
908
150
 
909
151
  1. **Create test case file** in \`./test-cases/\` with format \`TC-XXX-feature-description.md\`
910
152
  2. **Include frontmatter** with:
911
153
  - \`id:\` TC-XXX (sequential ID)
912
154
  - \`title:\` Clear, descriptive title
913
- - \`automated:\` true/false (based on automation decision from Step 1.6)
155
+ - \`automated:\` true/false (based on automation decision)
914
156
  - \`automated_test:\` (leave empty - will be filled by subagent when automated)
915
157
  - \`type:\` exploratory/functional/regression/smoke
916
158
  - \`area:\` Feature area/component
@@ -922,30 +164,30 @@ Generate ALL manual test case markdown files in the \`./test-cases/\` directory
922
164
  - **Test Data**: Environment variables to use (e.g., \${TEST_BASE_URL}, \${TEST_OWNER_EMAIL})
923
165
  - **Notes**: Any assumptions, clarifications needed, or special considerations
924
166
 
925
- **Output**: All manual test case markdown files created in \`./test-cases/\` with automation flags set
926
-
927
- ### Step 2: Automate Test Cases Area by Area
928
-
929
- **IMPORTANT**: Process each feature area separately to enable incremental, focused test creation.
930
-
931
- **For each area from Step 1.6**, invoke the test-code-generator agent:
167
+ **Output**: All manual test case markdown files created in \`./test-cases/\` with automation flags set`
168
+ },
169
+ // Step 11: Automate Test Cases (inline - detailed instructions for test-code-generator)
170
+ {
171
+ inline: true,
172
+ title: "Automate Test Cases Area by Area",
173
+ content: `**IMPORTANT**: Process each feature area separately to enable incremental, focused test creation.
932
174
 
933
- #### Step 2.1: Prepare Area Context
175
+ **For each area**, invoke the test-code-generator agent:
934
176
 
177
+ **Prepare Area Context:**
935
178
  Before invoking the agent, identify the test cases for the current area:
936
179
  - Current area name
937
180
  - Test case files for this area (e.g., TC-001-valid-login.md, TC-002-invalid-password.md)
938
181
  - Which test cases are marked for automation (automated: true)
939
- - Test type: {type}
182
+ - Test type from arguments
940
183
  - Test plan reference: test-plan.md
941
184
  - Existing automated tests in ./tests/specs/
942
185
  - Existing Page Objects in ./tests/pages/
943
186
 
944
- #### Step 2.2: Invoke test-code-generator Agent
187
+ **Invoke test-code-generator Agent:**
945
188
 
946
189
  {{INVOKE_TEST_CODE_GENERATOR}} for the current area with the following context:
947
190
 
948
- **Agent Invocation:**
949
191
  "Automate test cases for the [AREA_NAME] area.
950
192
 
951
193
  **Context:**
@@ -966,63 +208,34 @@ Before invoking the agent, identify the test cases for the current area:
966
208
  - Create automated Playwright test in ./tests/specs/
967
209
  - Update the manual test case file to reference the automated test path
968
210
  6. Run and iterate on each test until it passes or fails with a product bug
969
- 8. Update .env.testdata with any new variables
211
+ 7. Update .env.testdata with any new variables
970
212
 
971
213
  **Focus only on the [AREA_NAME] area** - do not automate tests for other areas yet."
972
214
 
973
- #### Step 2.3: Verify Area Completion
974
-
215
+ **Verify Area Completion:**
975
216
  After the agent completes the area, verify:
976
217
  - Manual test case files updated with automated_test references
977
218
  - Automated tests created for all test cases marked automated: true
978
219
  - Tests are passing (or failing with documented product bugs)
979
220
  - Page Objects created/updated for the area
980
221
 
981
- #### Step 2.4: Repeat for Next Area
982
-
983
- Move to the next area and repeat Steps 2.1-2.3 until all areas are complete.
222
+ **Repeat for Next Area:**
223
+ Move to the next area and repeat until all areas are complete.
984
224
 
985
225
  **Benefits of area-by-area approach**:
986
226
  - Agent focuses on one feature at a time
987
227
  - POMs built incrementally as needed
988
228
  - Tests verified before moving to next area
989
229
  - Easier to manage and track progress
990
- - Can pause/resume between areas if needed
991
-
992
- ### Step 2.5: Validate Generated Artifacts
993
-
994
- After the test-code-generator completes, verify:
995
-
996
- 1. **Manual Test Cases (in \`./test-cases/\`)**:
997
- - Each has unique TC-XXX ID
998
- - Frontmatter includes \`automated: true/false\` flag
999
- - If automated, includes \`automated_test\` path reference
1000
- - Contains human-readable steps and expected results
1001
- - References environment variables for test data
1002
-
1003
- 2. **Automated Tests (in \`./tests/specs/\`)**:
1004
- - Organized by feature in subdirectories
1005
- - Each test file references manual test case ID in comments
1006
- - Uses Page Object Model pattern
1007
- - Follows role-based selector priority
1008
- - Uses environment variables for test data
1009
- - Includes proper TypeScript typing
1010
-
1011
- 3. **Page Objects (in \`./tests/pages/\`)**:
1012
- - Extend BasePage class
1013
- - Use semantic selectors (getByRole, getByLabel, getByText)
1014
- - Contain only actions, no assertions
1015
- - Properly typed with TypeScript
1016
-
1017
- 4. **Supporting Files**:
1018
- - Fixtures created for common setup (in \`./tests/fixtures/\`)
1019
- - Helper functions for data generation (in \`./tests/helpers/\`)
1020
- - Component objects for reusable UI elements (in \`./tests/components/\`)
1021
- - Types defined as needed (in \`./tests/types/\`)
1022
-
1023
- ### Step 3: Create Directories if Needed
1024
-
1025
- Ensure required directories exist:
230
+ - Can pause/resume between areas if needed`
231
+ },
232
+ // Step 12: Validate Artifacts (library)
233
+ "validate-test-artifacts",
234
+ // Step 13: Create Directories (inline)
235
+ {
236
+ inline: true,
237
+ title: "Create Directories if Needed",
238
+ content: `Ensure required directories exist:
1026
239
  \`\`\`bash
1027
240
  mkdir -p ./test-cases
1028
241
  mkdir -p ./tests/specs
@@ -1030,23 +243,53 @@ mkdir -p ./tests/pages
1030
243
  mkdir -p ./tests/components
1031
244
  mkdir -p ./tests/fixtures
1032
245
  mkdir -p ./tests/helpers
1033
- \`\`\`
1034
-
1035
- ### Step 4: Update .env.testdata (if needed)
1036
-
1037
- If new environment variables were introduced:
1038
- - Read current \`.env.testdata\`
1039
- - Add new TEST_* variables with empty values
1040
- - Group variables logically with comments
1041
- - Document what each variable is for
1042
-
1043
- ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
246
+ \`\`\``
247
+ },
248
+ // Step 14: Extract Env Variables (library)
249
+ "extract-env-variables",
250
+ // Step 15: Knowledge Base Update (library)
251
+ "update-knowledge-base",
252
+ // Step 16: Team Communication (conditional inline)
253
+ {
254
+ inline: true,
255
+ title: "Team Communication",
256
+ content: `{{INVOKE_TEAM_COMMUNICATOR}} to notify the product team about the new test cases and automated tests:
1044
257
 
1045
- {{TEAM_COMMUNICATOR_INSTRUCTIONS}}
258
+ \`\`\`
259
+ 1. Post an update about test case and automation creation
260
+ 2. Provide summary of coverage:
261
+ - Number of manual test cases created
262
+ - Number of automated tests created
263
+ - Features covered by automation
264
+ - Areas kept manual-only (and why)
265
+ 3. Highlight key automated test scenarios
266
+ 4. Share command to run automated tests: npx playwright test
267
+ 5. Ask for team review and validation
268
+ 6. Mention any areas needing exploration or clarification
269
+ 7. Use appropriate channel and threading for the update
270
+ \`\`\`
1046
271
 
1047
- ### Step 5: Final Summary
272
+ The team communication should include:
273
+ - **Test artifacts created**: Manual test cases + automated tests count
274
+ - **Automation coverage**: Which features are now automated
275
+ - **Manual-only areas**: Why some tests are kept manual (rare scenarios, exploratory)
276
+ - **Key automated scenarios**: Critical paths now covered by automation
277
+ - **Running tests**: Command to execute automated tests
278
+ - **Review request**: Ask team to validate scenarios and review test code
279
+ - **Next steps**: Plans for CI/CD integration or additional test coverage
1048
280
 
1049
- Provide a comprehensive summary showing:
281
+ **Update team communicator memory:**
282
+ - Record this communication
283
+ - Note test case and automation creation
284
+ - Track team feedback on automation approach
285
+ - Document any clarifications requested`,
286
+ conditionalOnSubagent: "team-communicator"
287
+ },
288
+ // Step 17: Final Summary (inline)
289
+ {
290
+ inline: true,
291
+ title: "Final Summary",
292
+ content: `Provide a comprehensive summary showing:
1050
293
 
1051
294
  **Manual Test Cases:**
1052
295
  - Number of manual test cases created
@@ -1070,245 +313,162 @@ Provide a comprehensive summary showing:
1070
313
  - Note about copying .env.testdata to .env
1071
314
  - Mention any exploration needed for edge cases
1072
315
 
1073
- ### Important Notes
1074
-
316
+ **Important Notes:**
1075
317
  - **Both Manual AND Automated**: Generate both artifacts - they serve different purposes
1076
318
  - **Manual Test Cases**: Documentation, reference, can be executed manually when needed
1077
319
  - **Automated Tests**: Fast, repeatable, for CI/CD and regression testing
1078
320
  - **Automation Decision**: Not all test cases need automation - rare edge cases can stay manual
1079
321
  - **Linking**: Manual test cases reference automated tests; automated tests reference manual test case IDs
1080
- - **Two-Phase Workflow**: First generate all manual test cases (Step 1.7), then automate area-by-area (Step 2)
1081
- - **Ambiguity Handling**: Use exploration (Step 1.4) and clarification (Step 1.5) protocols before generating
322
+ - **Two-Phase Workflow**: First generate all manual test cases, then automate area-by-area
323
+ - **Ambiguity Handling**: Use exploration and clarification protocols before generating
1082
324
  - **Environment Variables**: Use \`process.env.VAR_NAME\` in tests, update .env.testdata as needed
1083
- - **Test Independence**: Each test must be runnable in isolation and in parallel`,
1084
- optionalSubagents: [
1085
- {
1086
- role: "documentation-researcher",
1087
- contentBlock: `#### 1.4 Gather Product Documentation
1088
-
1089
- {{INVOKE_DOCUMENTATION_RESEARCHER}} to gather comprehensive product documentation:
1090
-
1091
- \`\`\`
1092
- Explore all available product documentation, specifically focusing on:
1093
- - UI elements and workflows
1094
- - User interactions and navigation paths
1095
- - Form fields and validation rules
1096
- - Error messages and edge cases
1097
- - Authentication and authorization flows
1098
- - Business rules and constraints
1099
- - API endpoints for test data setup
1100
- \`\`\``
1101
- },
1102
- {
1103
- role: "team-communicator",
1104
- contentBlock: `### Step 4.5: Team Communication
1105
-
1106
- {{INVOKE_TEAM_COMMUNICATOR}} to notify the product team about the new test cases and automated tests:
1107
-
1108
- \`\`\`
1109
- 1. Post an update about test case and automation creation
1110
- 2. Provide summary of coverage:
1111
- - Number of manual test cases created
1112
- - Number of automated tests created
1113
- - Features covered by automation
1114
- - Areas kept manual-only (and why)
1115
- 3. Highlight key automated test scenarios
1116
- 4. Share command to run automated tests: npx playwright test
1117
- 5. Ask for team review and validation
1118
- 6. Mention any areas needing exploration or clarification
1119
- 7. Use appropriate channel and threading for the update
1120
- \`\`\`
1121
-
1122
- The team communication should include:
1123
- - **Test artifacts created**: Manual test cases + automated tests count
1124
- - **Automation coverage**: Which features are now automated
1125
- - **Manual-only areas**: Why some tests are kept manual (rare scenarios, exploratory)
1126
- - **Key automated scenarios**: Critical paths now covered by automation
1127
- - **Running tests**: Command to execute automated tests
1128
- - **Review request**: Ask team to validate scenarios and review test code
1129
- - **Next steps**: Plans for CI/CD integration or additional test coverage
1130
-
1131
- **Update team communicator memory:**
1132
- - Record this communication
1133
- - Note test case and automation creation
1134
- - Track team feedback on automation approach
1135
- - Document any clarifications requested`
325
+ - **Test Independence**: Each test must be runnable in isolation and in parallel`
1136
326
  }
1137
327
  ],
1138
- requiredSubagents: ["test-runner", "test-code-generator"]
328
+ requiredSubagents: ["test-runner", "test-code-generator"],
329
+ optionalSubagents: ["documentation-researcher", "team-communicator"],
330
+ dependentTasks: []
1139
331
  };
1140
332
 
1141
333
  // src/tasks/library/generate-test-plan.ts
1142
334
  var generateTestPlanTask = {
1143
335
  slug: TASK_SLUGS.GENERATE_TEST_PLAN,
1144
336
  name: "Generate Test Plan",
1145
- description: "Generate a comprehensive test plan from product description",
337
+ description: "Generate a concise feature checklist test plan from product description",
1146
338
  frontmatter: {
1147
- description: "Generate a comprehensive test plan from product description",
339
+ description: "Generate a concise feature checklist test plan (~50-100 lines)",
1148
340
  "argument-hint": "<product-description>"
1149
341
  },
1150
- baseContent: `# Generate Test Plan Command
1151
-
1152
- ## SECURITY NOTICE
1153
- **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
1154
- - **Read \`.env.testdata\`** for non-secret test data (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
1155
- - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
1156
- - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
1157
- - The \`.env\` file access is blocked by settings.json
1158
-
1159
- Generate a comprehensive test plan from product description following the Brain Module specifications.
1160
-
1161
- ## Arguments
1162
- Product description: $ARGUMENTS
1163
-
1164
- ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
1165
-
1166
- ## Process
1167
-
1168
- ### Step 1: Load project context
1169
- Read \`.bugzy/runtime/project-context.md\` to understand:
1170
- - Project overview and key platform features
1171
- - SDLC methodology and sprint duration
1172
- - Testing environment and goals
1173
- - Technical stack and constraints
1174
- - QA workflow and processes
1175
-
1176
- ### Step 1.5: Process the product description
1177
- Use the product description provided directly in the arguments, enriched with project context understanding.
1178
-
1179
- ### Step 1.6: Initialize environment variables tracking
1180
- Create a list to track all TEST_ prefixed environment variables discovered throughout the process.
1181
-
1182
- {{DOCUMENTATION_RESEARCHER_INSTRUCTIONS}}
1183
-
1184
- ### Step 1.7: Explore Product (If Needed)
1185
-
1186
- If product description is vague or incomplete, perform adaptive exploration to understand actual product features and behavior.
1187
-
1188
- ${EXPLORATION_INSTRUCTIONS.replace(/{{STEP_NUMBER}}/g, "1.7")}
1189
-
1190
- ### Step 1.8: Clarify Ambiguities
1191
-
1192
- If exploration or product description reveals ambiguous requirements, use the clarification protocol before generating the test plan.
1193
-
1194
- ${CLARIFICATION_INSTRUCTIONS.replace(/{{STEP_NUMBER}}/g, "1.8")}
1195
-
1196
- **Important Notes:**
1197
- - **CRITICAL/HIGH ambiguities:** STOP test plan generation and seek clarification
1198
- - Examples: Undefined core features, unclear product scope, contradictory requirements
1199
- - **MEDIUM ambiguities:** Document assumptions in test plan with [ASSUMED: reason] and seek async clarification
1200
- - Examples: Missing field lists, unclear validation rules, vague user roles
1201
- - **LOW ambiguities:** Mark with [TO BE EXPLORED: detail] in test plan for future investigation
1202
- - Examples: Optional features, cosmetic details, non-critical edge cases
1203
-
1204
- ### Step 3: Prepare the test plan generation context
1205
-
1206
- **After ensuring requirements are clear through exploration and clarification:**
342
+ steps: [
343
+ // Step 1: Overview (inline)
344
+ {
345
+ inline: true,
346
+ title: "Generate Test Plan Overview",
347
+ content: `Generate a comprehensive test plan from product description following the Brain Module specifications.`
348
+ },
349
+ // Step 2: Security Notice (library)
350
+ "security-notice",
351
+ // Step 3: Arguments (inline)
352
+ {
353
+ inline: true,
354
+ title: "Arguments",
355
+ content: `Product description: $ARGUMENTS`
356
+ },
357
+ // Step 4: Knowledge Base Read (library)
358
+ "read-knowledge-base",
359
+ // Step 5: Load Project Context (library)
360
+ "load-project-context",
361
+ // Step 6: Process Description (inline)
362
+ {
363
+ inline: true,
364
+ title: "Process the Product Description",
365
+ content: `Use the product description provided directly in the arguments, enriched with project context understanding.`
366
+ },
367
+ // Step 7: Initialize Env Tracking (inline)
368
+ {
369
+ inline: true,
370
+ title: "Initialize Environment Variables Tracking",
371
+ content: `Create a list to track all TEST_ prefixed environment variables discovered throughout the process.`
372
+ },
373
+ // Step 8: Documentation Researcher (conditional inline)
374
+ {
375
+ inline: true,
376
+ title: "Gather Comprehensive Project Documentation",
377
+ content: `{{INVOKE_DOCUMENTATION_RESEARCHER}} to explore and gather all available project information and other documentation sources. This ensures the test plan is based on complete and current information.
1207
378
 
1208
- Based on the gathered information:
1209
- - **goal**: Extract the main purpose and objectives from all available documentation
1210
- - **knowledge**: Combine product description with discovered documentation insights
1211
- - **testPlan**: Use the standard test plan template structure, enriched with documentation findings
1212
- - **gaps**: Identify areas lacking documentation that will need exploration
1213
-
1214
- ### Step 4: Generate the test plan using the prompt template
1215
-
1216
- You are an expert QA Test Plan Writer with expertise in both manual and automated testing strategies. Using the gathered information and context from the product description provided, you will now produce a comprehensive test plan in Markdown format that includes an automation strategy.
1217
-
1218
- Writing Instructions:
1219
- - **Use Product Terminology:** Incorporate exact terms and labels from the product description for features and UI elements (to ensure the test plan uses official naming).
1220
- - **Testing Scope:** The plan covers both automated E2E testing via Playwright and exploratory manual testing. Focus on what a user can do and see in a browser.
1221
- - **Test Data - IMPORTANT:**
1222
- - DO NOT include test data values in the test plan body
1223
- - Test data goes ONLY to the \`.env.testdata\` file
1224
- - In the test plan, reference \`.env.testdata\` for test data requirements
1225
- - Define test data as environment variables prefixed with TEST_ (e.g., TEST_BASE_URL, TEST_USER_EMAIL, TEST_USER_PASSWORD)
1226
- - DO NOT GENERATE VALUES FOR THE ENV VARS, ONLY THE KEYS
1227
- - Track all TEST_ variables for extraction to .env.testdata in Step 7
1228
- - **DO NOT INCLUDE TEST SCENARIOS**
1229
- - **Incorporate All Relevant Info:** If the product description mentions specific requirements, constraints, or acceptance criteria (such as field validations, role-based access rules, important parameters), make sure these are reflected in the test plan. Do not add anything not supported by the given information.
1230
- - **Test Automation Strategy Section - REQUIRED:** Include a comprehensive "Test Automation Strategy" section with the following subsections:
1231
-
1232
- **## Test Automation Strategy**
1233
-
1234
- ### Automated Test Coverage
1235
- - Identify critical user paths to automate (login, checkout, core features)
1236
- - Define regression test scenarios for automation
1237
- - Specify API endpoints that need automated testing
1238
- - List smoke test scenarios for CI/CD pipeline
1239
-
1240
- ### Exploratory Testing Areas
1241
- - New features not yet automated
1242
- - Complex edge cases requiring human judgment
1243
- - Visual/UX validation requiring subjective assessment
1244
- - Scenarios that are not cost-effective to automate
1245
-
1246
- ### Test Data Management
1247
- - Environment variables strategy (which vars go in .env.example vs .env)
1248
- - Dynamic test data generation approach (use data generators)
1249
- - API-based test data setup (10-20x faster than UI)
1250
- - Test data isolation and cleanup strategy
1251
-
1252
- ### Automation Approach
1253
- - **Framework:** Playwright + TypeScript (already scaffolded)
1254
- - **Pattern:** Page Object Model for all pages
1255
- - **Selectors:** Prioritize role-based selectors (getByRole, getByLabel, getByText)
1256
- - **Components:** Reusable component objects for common UI elements
1257
- - **Fixtures:** Custom fixtures for authenticated sessions and common setup
1258
- - **API for Speed:** Use Playwright's request context to create test data via API
1259
- - **Best Practices:** Reference \`.bugzy/runtime/testing-best-practices.md\` for patterns
1260
-
1261
- ### Test Organization
1262
- - Automated tests location: \`./tests/specs/[feature]/\`
1263
- - Page Objects location: \`./tests/pages/\`
1264
- - Manual test cases location: \`./test-cases/\` (human-readable documentation)
1265
- - Test case naming: TC-XXX-feature-description.md
1266
- - Automated test naming: feature.spec.ts
1267
-
1268
- ### Automation Decision Criteria
1269
- Define which scenarios warrant automation:
1270
- - \u2705 Automate: Frequent execution, critical paths, regression tests, CI/CD integration
1271
- - \u274C Keep Manual: Rare edge cases, exploratory tests, visual validation, one-time checks
1272
-
1273
- ### Step 5: Create the test plan file
1274
-
1275
- Read the test plan template from \`.bugzy/runtime/templates/test-plan-template.md\` and use it as the base structure. Fill in the placeholders with information extracted from BOTH the product description AND documentation research:
1276
-
1277
- 1. Read the template file from \`.bugzy/runtime/templates/test-plan-template.md\`
1278
- 2. Replace placeholders like:
1279
- - \`[ProjectName]\` with the actual project name from the product description
1280
- - \`[Date]\` with the current date
1281
- - Feature sections with actual features identified from all documentation sources
1282
- - Test data requirements based on the product's needs and API documentation
1283
- - Risks based on the complexity, known issues, and technical constraints
1284
- 3. Add any product-specific sections that may be needed based on discovered documentation
1285
- 4. **Mark ambiguities based on severity:**
1286
- - CRITICAL/HIGH: Should be clarified before plan creation (see Step 1.8)
1287
- - MEDIUM: Mark with [ASSUMED: reason] and note assumption
1288
- - LOW: Mark with [TO BE EXPLORED: detail] for future investigation
1289
- 5. Include references to source documentation for traceability
1290
-
1291
- ### Step 6: Save the test plan
1292
-
1293
- Save the generated test plan to a file named \`test-plan.md\` in the project root with appropriate frontmatter:
379
+ \`\`\`
380
+ Explore all available project documentation related to: $ARGUMENTS
1294
381
 
1295
- \`\`\`yaml
1296
- ---
1297
- version: 1.0.0
1298
- lifecycle_phase: initial
1299
- created_at: [current date]
1300
- updated_at: [current date]
1301
- last_exploration: null
1302
- total_discoveries: 0
1303
- status: draft
1304
- author: claude
1305
- tags: [functional, security, performance]
1306
- ---
382
+ Specifically gather:
383
+ - Product specifications and requirements
384
+ - User stories and acceptance criteria
385
+ - Technical architecture documentation
386
+ - API documentation and endpoints
387
+ - User roles and permissions
388
+ - Business rules and validations
389
+ - UI/UX specifications
390
+ - Known limitations or constraints
391
+ - Existing test documentation
392
+ - Bug reports or known issues
1307
393
  \`\`\`
1308
394
 
1309
- ### Step 7: Extract and save environment variables
395
+ The agent will:
396
+ 1. Check its memory for previously discovered documentation
397
+ 2. Explore workspace for relevant pages and databases
398
+ 3. Build a comprehensive understanding of the product
399
+ 4. Return synthesized information about all discovered documentation`,
400
+ conditionalOnSubagent: "documentation-researcher"
401
+ },
402
+ // Step 9: Exploration Protocol (from library)
403
+ "exploration-protocol",
404
+ // Step 10: Clarification Protocol (from library)
405
+ "clarification-protocol",
406
+ // Step 11: Prepare Context (inline)
407
+ {
408
+ inline: true,
409
+ title: "Prepare Test Plan Generation Context",
410
+ content: `**After ensuring requirements are clear through exploration and clarification:**
1310
411
 
1311
- **CRITICAL**: Test data values must ONLY go to .env.testdata, NOT in the test plan document.
412
+ Based on the gathered information:
413
+ - **goal**: Extract the main purpose and objectives from all available documentation
414
+ - **knowledge**: Combine product description with discovered documentation insights
415
+ - **testPlan**: Use the standard test plan template structure, enriched with documentation findings
416
+ - **gaps**: Identify areas lacking documentation that will need exploration`
417
+ },
418
+ // Step 12: Generate Test Plan (inline - more detailed than library step)
419
+ {
420
+ inline: true,
421
+ title: "Generate Test Plan Using Simplified Format",
422
+ content: `You are an expert QA Test Plan Writer. Generate a **concise** test plan (~50-100 lines) that serves as a feature checklist for test case generation.
423
+
424
+ **CRITICAL - Keep it Simple:**
425
+ - The test plan is a **feature checklist**, NOT a comprehensive document
426
+ - Detailed UI elements and exploration findings go to \`./exploration-reports/\`
427
+ - Technical patterns and architecture go to \`.bugzy/runtime/knowledge-base.md\`
428
+ - Process documentation stays in \`.bugzy/runtime/project-context.md\`
429
+
430
+ **Writing Instructions:**
431
+ - **Use Product Terminology:** Use exact feature names from the product description
432
+ - **Feature Checklist Format:** Each feature is a checkbox item with brief description
433
+ - **Group by Feature Area:** Organize features into logical sections
434
+ - **NO detailed UI elements** - those belong in exploration reports
435
+ - **NO test scenarios** - those are generated in test cases
436
+ - **NO process documentation** - keep only what's needed for test generation
437
+
438
+ **Test Data Handling:**
439
+ - Test data goes ONLY to \`.env.testdata\` file
440
+ - In test plan, reference environment variable NAMES only (e.g., TEST_BASE_URL)
441
+ - DO NOT generate values for env vars, only keys
442
+ - Track all TEST_ variables for extraction to .env.testdata in the next step`
443
+ },
444
+ // Step 13: Create Test Plan File (inline)
445
+ {
446
+ inline: true,
447
+ title: "Create Test Plan File",
448
+ content: `Read the simplified template from \`.bugzy/runtime/templates/test-plan-template.md\` and fill it in:
449
+
450
+ 1. Read the template file
451
+ 2. Replace placeholders:
452
+ - \`[PROJECT_NAME]\` with the actual project name
453
+ - \`[DATE]\` with the current date
454
+ - Feature sections with actual features grouped by area
455
+ 3. Each feature is a **checkbox item** with brief description
456
+ 4. **Mark ambiguities:**
457
+ - MEDIUM: Mark with [ASSUMED: reason]
458
+ - LOW: Mark with [TO BE EXPLORED: detail]
459
+ 5. Keep total document under 100 lines`
460
+ },
461
+ // Step 14: Save Test Plan (inline)
462
+ {
463
+ inline: true,
464
+ title: "Save Test Plan",
465
+ content: `Save to \`test-plan.md\` in project root. The template already includes frontmatter - just fill in the dates.`
466
+ },
467
+ // Step 15: Extract Env Variables (inline - more detailed than library step)
468
+ {
469
+ inline: true,
470
+ title: "Extract and Save Environment Variables",
471
+ content: `**CRITICAL**: Test data values must ONLY go to .env.testdata, NOT in the test plan document.
1312
472
 
1313
473
  After saving the test plan:
1314
474
 
@@ -1345,53 +505,15 @@ After saving the test plan:
1345
505
  5. **Verify test plan references .env.testdata**:
1346
506
  - Ensure test plan DOES NOT contain test data values
1347
507
  - Ensure test plan references \`.env.testdata\` for test data requirements
1348
- - Add instruction: "Fill in actual values in .env.testdata before running tests"
1349
-
1350
- ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
1351
-
1352
- {{TEAM_COMMUNICATOR_INSTRUCTIONS}}
1353
-
1354
- ### Step 8: Final summary
1355
-
1356
- Provide a summary of:
1357
- - Test plan created successfully at \`test-plan.md\`
1358
- - Environment variables extracted to \`.env.testdata\`
1359
- - Number of TEST_ variables discovered
1360
- - Instructions for the user to fill in actual values in .env.testdata before running tests`,
1361
- optionalSubagents: [
1362
- {
1363
- role: "documentation-researcher",
1364
- contentBlock: `### Step 2: Gather comprehensive project documentation
1365
-
1366
- {{INVOKE_DOCUMENTATION_RESEARCHER}} to explore and gather all available project information and other documentation sources. This ensures the test plan is based on complete and current information.
1367
-
1368
- \`\`\`
1369
- Explore all available project documentation related to: $ARGUMENTS
1370
-
1371
- Specifically gather:
1372
- - Product specifications and requirements
1373
- - User stories and acceptance criteria
1374
- - Technical architecture documentation
1375
- - API documentation and endpoints
1376
- - User roles and permissions
1377
- - Business rules and validations
1378
- - UI/UX specifications
1379
- - Known limitations or constraints
1380
- - Existing test documentation
1381
- - Bug reports or known issues
1382
- \`\`\`
1383
-
1384
- The agent will:
1385
- 1. Check its memory for previously discovered documentation
1386
- 2. Explore workspace for relevant pages and databases
1387
- 3. Build a comprehensive understanding of the product
1388
- 4. Return synthesized information about all discovered documentation`
508
+ - Add instruction: "Fill in actual values in .env.testdata before running tests"`
1389
509
  },
510
+ // Step 16: Knowledge Base Update (library)
511
+ "update-knowledge-base",
512
+ // Step 17: Team Communication (conditional inline)
1390
513
  {
1391
- role: "team-communicator",
1392
- contentBlock: `### Step 7.5: Team Communication
1393
-
1394
- {{INVOKE_TEAM_COMMUNICATOR}} to notify the product team about the new test plan:
514
+ inline: true,
515
+ title: "Team Communication",
516
+ content: `{{INVOKE_TEAM_COMMUNICATOR}} to notify the product team about the new test plan:
1395
517
 
1396
518
  \`\`\`
1397
519
  1. Post an update about the test plan creation
@@ -1412,10 +534,23 @@ The team communication should include:
1412
534
  **Update team communicator memory:**
1413
535
  - Record this communication in the team-communicator memory
1414
536
  - Note this as a test plan creation communication
1415
- - Track team response to this type of update`
537
+ - Track team response to this type of update`,
538
+ conditionalOnSubagent: "team-communicator"
539
+ },
540
+ // Step 18: Final Summary (inline)
541
+ {
542
+ inline: true,
543
+ title: "Final Summary",
544
+ content: `Provide a summary of:
545
+ - Test plan created successfully at \`test-plan.md\`
546
+ - Environment variables extracted to \`.env.testdata\`
547
+ - Number of TEST_ variables discovered
548
+ - Instructions for the user to fill in actual values in .env.testdata before running tests`
1416
549
  }
1417
550
  ],
1418
- requiredSubagents: ["test-runner"]
551
+ requiredSubagents: ["test-runner"],
552
+ optionalSubagents: ["documentation-researcher", "team-communicator"],
553
+ dependentTasks: []
1419
554
  };
1420
555
 
1421
556
  // src/tasks/library/handle-message.ts
@@ -1427,27 +562,30 @@ var handleMessageTask = {
1427
562
  description: "Handle team responses and Slack communications, maintaining context for ongoing conversations",
1428
563
  "argument-hint": "[slack thread context or team message]"
1429
564
  },
1430
- baseContent: `# Handle Message Command
1431
-
1432
- ## SECURITY NOTICE
1433
- **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
1434
- - **Read \`.env.testdata\`** for non-secret environment variables (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
1435
- - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
1436
- - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
1437
- - The \`.env\` file access is blocked by settings.json
1438
-
1439
- Process team responses from Slack threads and handle multi-turn conversations with the product team about testing clarifications, ambiguities, and questions.
1440
-
1441
- ## Arguments
1442
- Team message/thread context: $ARGUMENTS
1443
-
1444
- ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
1445
-
1446
- ## Process
1447
-
1448
- ### Step 0: Detect Message Intent and Load Handler
565
+ steps: [
566
+ // Step 1: Overview (inline)
567
+ {
568
+ inline: true,
569
+ title: "Handle Message Overview",
570
+ content: `# Handle Message Command
1449
571
 
1450
- Before processing the message, identify the intent type to load the appropriate handler.
572
+ Process team responses from Slack threads and handle multi-turn conversations with the product team about testing clarifications, ambiguities, and questions.`
573
+ },
574
+ // Step 2: Security Notice (library)
575
+ "security-notice",
576
+ // Step 3: Arguments (inline)
577
+ {
578
+ inline: true,
579
+ title: "Arguments",
580
+ content: `Team message/thread context: $ARGUMENTS`
581
+ },
582
+ // Step 4: Knowledge Base Read (library)
583
+ "read-knowledge-base",
584
+ // Step 5: Detect Intent (inline - task-specific)
585
+ {
586
+ inline: true,
587
+ title: "Detect Message Intent and Load Handler",
588
+ content: `Before processing the message, identify the intent type to load the appropriate handler.
1451
589
 
1452
590
  #### 0.1 Extract Intent from Event Payload
1453
591
 
@@ -1490,11 +628,17 @@ The handler file contains all necessary processing logic for the detected intent
1490
628
  - Specific processing steps for that intent
1491
629
  - Context loading requirements
1492
630
  - Response guidelines
1493
- - Memory update instructions
1494
-
1495
- ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
1496
-
1497
- ## Key Principles
631
+ - Memory update instructions`
632
+ },
633
+ // Step 6: Clarification Protocol (for ambiguous intents)
634
+ "clarification-protocol",
635
+ // Step 8: Knowledge Base Update (library)
636
+ "update-knowledge-base",
637
+ // Step 9: Key Principles (inline)
638
+ {
639
+ inline: true,
640
+ title: "Key Principles",
641
+ content: `## Key Principles
1498
642
 
1499
643
  ### Context Preservation
1500
644
  - Always maintain full conversation context
@@ -1514,9 +658,13 @@ ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
1514
658
  ### Quality Communication
1515
659
  - Acknowledge team input appropriately
1516
660
  - Provide updates on actions taken
1517
- - Ask good follow-up questions when needed
1518
-
1519
- ## Important Considerations
661
+ - Ask good follow-up questions when needed`
662
+ },
663
+ // Step 10: Important Considerations (inline)
664
+ {
665
+ inline: true,
666
+ title: "Important Considerations",
667
+ content: `## Important Considerations
1520
668
 
1521
669
  ### Thread Organization
1522
670
  - Keep related discussions in same thread
@@ -1536,9 +684,12 @@ ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
1536
684
  ### Memory Maintenance
1537
685
  - Keep active conversations visible and current
1538
686
  - Archive resolved discussions appropriately
1539
- - Maintain searchable history of resolutions`,
687
+ - Maintain searchable history of resolutions`
688
+ }
689
+ ],
690
+ requiredSubagents: ["team-communicator"],
1540
691
  optionalSubagents: [],
1541
- requiredSubagents: ["team-communicator"]
692
+ dependentTasks: []
1542
693
  };
1543
694
 
1544
695
  // src/tasks/library/process-event.ts
@@ -1550,42 +701,47 @@ var processEventTask = {
1550
701
  description: "Process external system events (Jira, GitHub, Linear) using handler-defined rules to extract insights and track issues",
1551
702
  "argument-hint": "[event payload or description]"
1552
703
  },
1553
- baseContent: `# Process Event Command
1554
-
1555
- ## SECURITY NOTICE
1556
- **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
1557
- - **Read \`.env.testdata\`** for non-secret environment variables (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
1558
- - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
1559
- - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
1560
- - The \`.env\` file access is blocked by settings.json
1561
-
1562
- Process various types of events using intelligent pattern matching and historical context to maintain and evolve the testing system.
1563
-
1564
- ## Arguments
1565
- Arguments: $ARGUMENTS
1566
-
1567
- ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
1568
-
1569
- ## Process
1570
-
1571
- ### Step 1: Understand Event Context
704
+ steps: [
705
+ // Step 1: Overview (inline)
706
+ {
707
+ inline: true,
708
+ title: "Process Event Overview",
709
+ content: `# Process Event Command
1572
710
 
1573
- Events come from integrated external systems via webhooks or manual input. Common sources include:
711
+ Process various types of events using intelligent pattern matching and historical context to maintain and evolve the testing system.`
712
+ },
713
+ // Step 2: Security Notice (library)
714
+ "security-notice",
715
+ // Step 3: Arguments (inline)
716
+ {
717
+ inline: true,
718
+ title: "Arguments",
719
+ content: `Arguments: $ARGUMENTS`
720
+ },
721
+ // Step 4: Knowledge Base Read (library)
722
+ "read-knowledge-base",
723
+ // Step 5: Understand Event Context (inline)
724
+ {
725
+ inline: true,
726
+ title: "Understand Event Context",
727
+ content: `Events come from integrated external systems via webhooks or manual input. Common sources include:
1574
728
  - **Issue Trackers**: Jira, Linear, GitHub Issues
1575
729
  - **Source Control**: GitHub, GitLab
1576
730
  - **Communication Tools**: Slack
1577
731
 
1578
- **Event structure and semantics vary by source.** Do not interpret events based on generic assumptions. Instead, load the appropriate handler file (Step 2.4) for system-specific processing rules.
732
+ **Event structure and semantics vary by source.** Do not interpret events based on generic assumptions. Instead, load the appropriate handler file for system-specific processing rules.
1579
733
 
1580
734
  #### Event Context to Extract:
1581
735
  - **What happened**: The core event (test failed, PR merged, etc.)
1582
736
  - **Where**: Component, service, or area affected
1583
737
  - **Impact**: How this affects testing strategy
1584
- - **Action Required**: What needs to be done in response
1585
-
1586
- ### Step 1.5: Clarify Unclear Events
1587
-
1588
- If the event information is incomplete or ambiguous, seek clarification before processing:
738
+ - **Action Required**: What needs to be done in response`
739
+ },
740
+ // Step 6: Clarify Unclear Events (inline - task-specific)
741
+ {
742
+ inline: true,
743
+ title: "Clarify Unclear Events",
744
+ content: `If the event information is incomplete or ambiguous, seek clarification before processing:
1589
745
 
1590
746
  #### Detect Unclear Events
1591
747
 
@@ -1668,9 +824,13 @@ In event history, record:
1668
824
  - **Assumption made**: If proceeded with assumption
1669
825
  - **Resolution**: How ambiguity was resolved
1670
826
 
1671
- This ensures future similar events can reference past clarifications and avoid redundant questions.
1672
-
1673
- ### Step 2: Load Context and Memory
827
+ This ensures future similar events can reference past clarifications and avoid redundant questions.`
828
+ },
829
+ // Step 7: Load Context and Memory (inline)
830
+ {
831
+ inline: true,
832
+ title: "Load Context and Memory",
833
+ content: `### Step 2: Load Context and Memory
1674
834
 
1675
835
  #### 2.1 Check Event Processor Memory
1676
836
  Read \`.bugzy/runtime/memory/event-processor.md\` to:
@@ -1695,10 +855,10 @@ Read \`.bugzy/runtime/memory/event-history.md\` to:
1695
855
  Based on the event source, load the handler from \`.bugzy/runtime/handlers/\`:
1696
856
 
1697
857
  **Step 1: Detect Event Source from Payload:**
1698
- - \`com.jira-server.*\` event type prefix \u2192 \`.bugzy/runtime/handlers/jira.md\`
1699
- - \`github.*\` or GitHub webhook structure \u2192 \`.bugzy/runtime/handlers/github.md\`
1700
- - \`linear.*\` or Linear webhook \u2192 \`.bugzy/runtime/handlers/linear.md\`
1701
- - Other sources \u2192 Check for matching handler file by source name
858
+ - \`com.jira-server.*\` event type prefix -> \`.bugzy/runtime/handlers/jira.md\`
859
+ - \`github.*\` or GitHub webhook structure -> \`.bugzy/runtime/handlers/github.md\`
860
+ - \`linear.*\` or Linear webhook -> \`.bugzy/runtime/handlers/linear.md\`
861
+ - Other sources -> Check for matching handler file by source name
1702
862
 
1703
863
  **Step 2: Load and Read the Handler File:**
1704
864
  The handler file contains system-specific instructions for:
@@ -1724,9 +884,13 @@ Do NOT guess or apply generic logic. Instead:
1724
884
  Handlers reference \`.bugzy/runtime/project-context.md\` for project-specific rules like:
1725
885
  - Which status transitions trigger verify-changes
1726
886
  - Which resolutions should update the knowledge base
1727
- - Which transitions to ignore
1728
-
1729
- ### Step 3: Intelligent Event Analysis
887
+ - Which transitions to ignore`
888
+ },
889
+ // Step 8: Intelligent Event Analysis (inline)
890
+ {
891
+ inline: true,
892
+ title: "Intelligent Event Analysis",
893
+ content: `### Step 3: Intelligent Event Analysis
1730
894
 
1731
895
  #### 3.1 Contextual Pattern Analysis
1732
896
  Don't just match patterns - analyze the event within the full context:
@@ -1757,11 +921,28 @@ Based on event type and content, generate 3-5 specific search queries:
1757
921
  - Search for similar past events
1758
922
  - Look for related test cases
1759
923
  - Find relevant documentation
1760
- - Check for known issues
1761
-
1762
- {{DOCUMENTATION_RESEARCHER_INSTRUCTIONS}}
1763
-
1764
- ### Step 4: Task Planning with Reasoning
924
+ - Check for known issues`
925
+ },
926
+ // Step 9: Documentation Research (conditional inline)
927
+ {
928
+ inline: true,
929
+ title: "Use Documentation Researcher",
930
+ content: `#### 3.3 Use Documentation Researcher if Needed
931
+
932
+ {{INVOKE_DOCUMENTATION_RESEARCHER}} to find information about unknown features or components:
933
+
934
+ For events mentioning unknown features or components, ask the agent to explore project documentation and return:
935
+ - Feature specifications
936
+ - Related test cases
937
+ - Known issues or limitations
938
+ - Component dependencies`,
939
+ conditionalOnSubagent: "documentation-researcher"
940
+ },
941
+ // Step 10: Task Planning (inline)
942
+ {
943
+ inline: true,
944
+ title: "Task Planning with Reasoning",
945
+ content: `### Step 4: Task Planning with Reasoning
1765
946
 
1766
947
  Generate tasks based on event analysis, using examples from memory as reference.
1767
948
 
@@ -1777,11 +958,11 @@ Analyze the event in context of ALL available information to decide what actions
1777
958
 
1778
959
  **Contextual Decision Making**:
1779
960
  The same event type can require different actions based on context:
1780
- - If handler says this status triggers verification \u2192 Invoke /verify-changes
1781
- - If this issue was already processed (check event history) \u2192 Skip to avoid duplicates
1782
- - If related PR exists in knowledge base \u2192 Include PR context in actions
1783
- - If this is a recurring pattern from the same source \u2192 Consider flagging for review
1784
- - If handler has no rule for this event type \u2192 Ask user for guidance
961
+ - If handler says this status triggers verification -> Invoke /verify-changes
962
+ - If this issue was already processed (check event history) -> Skip to avoid duplicates
963
+ - If related PR exists in knowledge base -> Include PR context in actions
964
+ - If this is a recurring pattern from the same source -> Consider flagging for review
965
+ - If handler has no rule for this event type -> Ask user for guidance
1785
966
 
1786
967
  **Dynamic Task Selection**:
1787
968
  Based on the contextual analysis, decide which tasks make sense:
@@ -1799,15 +980,40 @@ For each task, document WHY it's being executed:
1799
980
  Task: extract_learning
1800
981
  Reasoning: This event reveals a pattern of login failures on Chrome that wasn't previously documented
1801
982
  Data: "Chrome-specific timeout issues with login button"
1802
- \`\`\`
983
+ \`\`\``
984
+ },
985
+ // Step 11: Issue Tracking (conditional inline)
986
+ {
987
+ inline: true,
988
+ title: "Issue Tracking",
989
+ content: `##### For Issue Tracking:
1803
990
 
1804
- ### Step 5: Execute Tasks with Memory Updates
991
+ When an issue needs to be tracked (task type: report_bug or update_story):
1805
992
 
1806
- #### 5.1 Execute Each Task
993
+ {{INVOKE_ISSUE_TRACKER}}
994
+
995
+ 1. Check for duplicate issues in the tracking system
996
+ 2. For bugs: Create detailed bug report with:
997
+ - Clear, descriptive title
998
+ - Detailed description with context
999
+ - Step-by-step reproduction instructions
1000
+ - Expected vs actual behavior
1001
+ - Environment and configuration details
1002
+ - Test case reference (if applicable)
1003
+ - Screenshots or error logs
1004
+ 3. For stories: Update status and add QA comments
1005
+ 4. Track issue lifecycle and maintain categorization
1807
1006
 
1808
- {{ISSUE_TRACKER_INSTRUCTIONS}}
1007
+ The issue-tracker agent will handle all aspects of issue tracking including duplicate detection, story management, QA workflow transitions, and integration with your project management system (Jira, Linear, Notion, etc.).`,
1008
+ conditionalOnSubagent: "issue-tracker"
1009
+ },
1010
+ // Step 12: Execute Tasks (inline)
1011
+ {
1012
+ inline: true,
1013
+ title: "Execute Tasks with Memory Updates",
1014
+ content: `### Step 5: Execute Tasks with Memory Updates
1809
1015
 
1810
- ##### For Other Tasks:
1016
+ #### 5.1 Execute Each Task
1811
1017
  Follow the standard execution logic with added context from memory.
1812
1018
 
1813
1019
  #### 5.2 Update Event Processor Memory
@@ -1844,9 +1050,13 @@ source: [source]
1844
1050
  **Outcome**: [Success/Partial/Failed]
1845
1051
  **Notes**: [Any additional context]
1846
1052
  ---
1847
- \`\`\`
1848
-
1849
- ### Step 6: Learning from Events
1053
+ \`\`\``
1054
+ },
1055
+ // Step 13: Learning and Maintenance (inline)
1056
+ {
1057
+ inline: true,
1058
+ title: "Learning from Events",
1059
+ content: `### Step 6: Learning from Events
1850
1060
 
1851
1061
  After processing, check if this event teaches us something new:
1852
1062
  1. Is this a new type of event we haven't seen?
@@ -1866,9 +1076,15 @@ mkdir -p ./test-cases .claude/memory
1866
1076
  Create files if they don't exist:
1867
1077
  - \`.bugzy/runtime/knowledge-base.md\`
1868
1078
  - \`.bugzy/runtime/memory/event-processor.md\`
1869
- - \`.bugzy/runtime/memory/event-history.md\`
1870
-
1871
- ## Important Considerations
1079
+ - \`.bugzy/runtime/memory/event-history.md\``
1080
+ },
1081
+ // Step 14: Knowledge Base Update (library)
1082
+ "update-knowledge-base",
1083
+ // Step 15: Important Considerations (inline)
1084
+ {
1085
+ inline: true,
1086
+ title: "Important Considerations",
1087
+ content: `## Important Considerations
1872
1088
 
1873
1089
  ### Contextual Intelligence
1874
1090
  - Never process events in isolation - always consider full context
@@ -1892,42 +1108,11 @@ Create files if they don't exist:
1892
1108
  - Each event adds to our understanding of the system
1893
1109
  - Update patterns when new correlations are discovered
1894
1110
  - Refine decision rules based on outcomes
1895
- - Build institutional memory through event history
1896
-
1897
- ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}`,
1898
- optionalSubagents: [
1899
- {
1900
- role: "documentation-researcher",
1901
- contentBlock: `#### 3.3 Use Documentation Researcher if Needed
1902
- For events mentioning unknown features or components:
1903
- \`\`\`
1904
- {{INVOKE_DOCUMENTATION_RESEARCHER}} to find information about: [component/feature]
1905
- \`\`\``
1906
- },
1907
- {
1908
- role: "issue-tracker",
1909
- contentBlock: `##### For Issue Tracking:
1910
-
1911
- When an issue needs to be tracked (task type: report_bug or update_story):
1912
- \`\`\`
1913
- {{INVOKE_ISSUE_TRACKER}}
1914
- 1. Check for duplicate issues in the tracking system
1915
- 2. For bugs: Create detailed bug report with:
1916
- - Clear, descriptive title
1917
- - Detailed description with context
1918
- - Step-by-step reproduction instructions
1919
- - Expected vs actual behavior
1920
- - Environment and configuration details
1921
- - Test case reference (if applicable)
1922
- - Screenshots or error logs
1923
- 3. For stories: Update status and add QA comments
1924
- 4. Track issue lifecycle and maintain categorization
1925
- \`\`\`
1926
-
1927
- The issue-tracker agent will handle all aspects of issue tracking including duplicate detection, story management, QA workflow transitions, and integration with your project management system (Jira, Linear, Notion, etc.).`
1111
+ - Build institutional memory through event history`
1928
1112
  }
1929
1113
  ],
1930
- requiredSubagents: [],
1114
+ requiredSubagents: ["team-communicator"],
1115
+ optionalSubagents: ["documentation-researcher", "issue-tracker"],
1931
1116
  dependentTasks: ["verify-changes"]
1932
1117
  };
1933
1118
 
@@ -1940,49 +1125,40 @@ var runTestsTask = {
1940
1125
  description: "Execute automated Playwright tests, analyze failures, and fix test issues automatically",
1941
1126
  "argument-hint": '[file-pattern|tag|all] (e.g., "auth", "@smoke", "tests/specs/login.spec.ts")'
1942
1127
  },
1943
- baseContent: `# Run Tests Command
1944
-
1945
- ## SECURITY NOTICE
1946
- **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
1947
- - **Read \`.env.testdata\`** for non-secret environment variables (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
1948
- - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
1949
- - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
1950
- - The \`.env\` file access is blocked by settings.json
1951
-
1952
- Execute automated Playwright tests, analyze failures using JSON reports, automatically fix test issues, and log product bugs.
1128
+ steps: [
1129
+ // Step 1: Overview (inline)
1130
+ {
1131
+ inline: true,
1132
+ title: "Run Tests Overview",
1133
+ content: `# Run Tests Command
1953
1134
 
1954
- ## Arguments
1955
- Arguments: $ARGUMENTS
1135
+ Execute automated Playwright tests, analyze failures using JSON reports, automatically fix test issues, and log product bugs.`
1136
+ },
1137
+ // Step 2: Security Notice (library)
1138
+ "security-notice",
1139
+ // Step 3: Arguments (inline)
1140
+ {
1141
+ inline: true,
1142
+ title: "Arguments",
1143
+ content: `Arguments: $ARGUMENTS
1956
1144
 
1957
- ## Parse Arguments
1145
+ **Parse Arguments:**
1958
1146
  Extract the following from arguments:
1959
1147
  - **selector**: Test selection criteria
1960
1148
  - File pattern: "auth" \u2192 finds tests/specs/**/*auth*.spec.ts
1961
1149
  - Tag: "@smoke" \u2192 runs tests with @smoke annotation
1962
1150
  - Specific file: "tests/specs/login.spec.ts"
1963
- - All tests: "all" or "" \u2192 runs entire test suite
1964
-
1965
- ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
1966
-
1967
- ## Test Execution Strategy
1968
-
1969
- **IMPORTANT**: Before selecting tests, read \`.bugzy/runtime/test-execution-strategy.md\` to understand:
1970
- - Available test tiers (Smoke, Component, Full Regression)
1971
- - When to use each tier (commit, PR, release, debug)
1972
- - Default behavior (default to @smoke unless user specifies otherwise)
1973
- - How to interpret user intent from context keywords
1974
- - Time/coverage trade-offs
1975
- - Tag taxonomy
1976
-
1977
- Apply the strategy guidance when determining which tests to run.
1978
-
1979
- ## Process
1980
-
1981
- **First**, consult \`.bugzy/runtime/test-execution-strategy.md\` decision tree to determine appropriate test tier based on user's selector and context.
1982
-
1983
- ### Step 1: Identify Automated Tests to Run
1984
-
1985
- #### 1.1 Understand Test Selection
1151
+ - All tests: "all" or "" \u2192 runs entire test suite`
1152
+ },
1153
+ // Step 4: Knowledge Base Read (library)
1154
+ "read-knowledge-base",
1155
+ // Step 5: Test Execution Strategy (library)
1156
+ "read-test-strategy",
1157
+ // Step 6: Identify Tests (inline - task-specific)
1158
+ {
1159
+ inline: true,
1160
+ title: "Identify Automated Tests to Run",
1161
+ content: `#### Understand Test Selection
1986
1162
  Parse the selector argument to determine which tests to run:
1987
1163
 
1988
1164
  **File Pattern** (e.g., "auth", "login"):
@@ -1999,7 +1175,7 @@ Parse the selector argument to determine which tests to run:
1999
1175
  **All Tests** ("all" or no selector):
2000
1176
  - Run entire test suite: \`tests/specs/**/*.spec.ts\`
2001
1177
 
2002
- #### 1.2 Find Matching Test Files
1178
+ #### Find Matching Test Files
2003
1179
  Use glob patterns to find test files:
2004
1180
  \`\`\`bash
2005
1181
  # For file pattern
@@ -2012,211 +1188,39 @@ ls tests/specs/auth/login.spec.ts
2012
1188
  ls tests/specs/**/*.spec.ts
2013
1189
  \`\`\`
2014
1190
 
2015
- #### 1.3 Validate Test Files Exist
1191
+ #### Validate Test Files Exist
2016
1192
  Check that at least one test file was found:
2017
1193
  - If no tests found, inform user and suggest available tests
2018
1194
  - List available test files if selection was unclear
2019
1195
 
2020
- ### Step 2: Execute Automated Playwright Tests
2021
-
2022
- #### 2.1 Build Playwright Command
2023
- Construct the Playwright test command based on the selector:
2024
-
2025
- **For file pattern or specific file**:
2026
- \`\`\`bash
2027
- npx playwright test [selector]
2028
- \`\`\`
2029
-
2030
- **For tag**:
2031
- \`\`\`bash
2032
- npx playwright test --grep "[tag]"
2033
- \`\`\`
2034
-
2035
- **For all tests**:
2036
- \`\`\`bash
2037
- npx playwright test
2038
- \`\`\`
2039
-
2040
- **Output**: Custom Bugzy reporter will create hierarchical test-runs/YYYYMMDD-HHMMSS/ structure with manifest.json
2041
-
2042
- #### 2.2 Execute Tests via Bash
2043
- Run the Playwright command:
2044
- \`\`\`bash
2045
- npx playwright test [selector]
2046
- \`\`\`
2047
-
2048
- Wait for execution to complete. This may take several minutes depending on test count.
2049
-
2050
- **Note**: The custom Bugzy reporter will automatically:
2051
- - Generate timestamp in YYYYMMDD-HHMMSS format
2052
- - Create test-runs/{timestamp}/ directory structure
2053
- - Record execution-id.txt with BUGZY_EXECUTION_ID
2054
- - Save results per test case in TC-{id}/exec-1/ folders
2055
- - Generate manifest.json with complete execution summary
2056
-
2057
- #### 2.3 Locate and Read Test Results
2058
- After execution completes, find and read the manifest:
2059
-
2060
- 1. Find the test run directory (most recent):
2061
- \`\`\`bash
2062
- ls -t test-runs/ | head -1
2063
- \`\`\`
2064
-
2065
- 2. Read the manifest.json file:
2066
- \`\`\`bash
2067
- cat test-runs/[timestamp]/manifest.json
2068
- \`\`\`
2069
-
2070
- 3. Store the timestamp for use in test-debugger-fixer if needed
2071
-
2072
- ### Step 3: Analyze Test Results from Manifest
2073
-
2074
- #### 3.1 Parse Manifest
2075
- The Bugzy custom reporter produces structured output in manifest.json:
2076
- \`\`\`json
2077
- {
2078
- "bugzyExecutionId": "70a59676-cfd0-4ffd-b8ad-69ceff25c31d",
2079
- "timestamp": "20251115-123456",
2080
- "startTime": "2025-11-15T12:34:56.789Z",
2081
- "endTime": "2025-11-15T12:45:23.456Z",
2082
- "status": "completed",
2083
- "stats": {
2084
- "totalTests": 10,
2085
- "passed": 8,
2086
- "failed": 2,
2087
- "totalExecutions": 10
2088
- },
2089
- "testCases": [
1196
+ #### Confirm Selection Before Execution
1197
+ Before running tests, confirm the selection with the user if ambiguous:
1198
+ - **Clear selection** (specific file or tag): Proceed immediately
1199
+ - **Pattern match** (multiple files): List matching files and ask for confirmation if count > 5
1200
+ - **No selector** (all tests): Confirm running full suite before executing`
1201
+ },
1202
+ // Step 7-10: Test Execution (library steps)
1203
+ "run-playwright-tests",
1204
+ "parse-test-results",
1205
+ "triage-failures",
1206
+ "fix-test-issues",
1207
+ // Step 11: Log Product Bugs (conditional - library step)
2090
1208
  {
2091
- "id": "TC-001-login",
2092
- "name": "Login functionality",
2093
- "totalExecutions": 1,
2094
- "finalStatus": "passed",
2095
- "executions": [
2096
- {
2097
- "number": 1,
2098
- "status": "passed",
2099
- "duration": 1234,
2100
- "videoFile": "video.webm",
2101
- "hasTrace": false,
2102
- "hasScreenshots": false,
2103
- "error": null
2104
- }
2105
- ]
1209
+ stepId: "log-product-bugs",
1210
+ conditionalOnSubagent: "issue-tracker"
2106
1211
  },
1212
+ // Step 12: Knowledge Base Update (library)
1213
+ "update-knowledge-base",
1214
+ // Step 13: Team Communication (conditional - library step)
2107
1215
  {
2108
- "id": "TC-002-invalid-credentials",
2109
- "name": "Invalid credentials error",
2110
- "totalExecutions": 1,
2111
- "finalStatus": "failed",
2112
- "executions": [
2113
- {
2114
- "number": 1,
2115
- "status": "failed",
2116
- "duration": 2345,
2117
- "videoFile": "video.webm",
2118
- "hasTrace": true,
2119
- "hasScreenshots": true,
2120
- "error": "expect(locator).toBeVisible()..."
2121
- }
2122
- ]
2123
- }
2124
- ]
2125
- }
2126
- \`\`\`
2127
-
2128
- #### 3.2 Extract Test Results
2129
- From the manifest, extract:
2130
- - **Total tests**: stats.totalTests
2131
- - **Passed tests**: stats.passed
2132
- - **Failed tests**: stats.failed
2133
- - **Total executions**: stats.totalExecutions (includes re-runs)
2134
- - **Duration**: Calculate from startTime and endTime
2135
-
2136
- For each failed test, collect from testCases array:
2137
- - Test ID (id field)
2138
- - Test name (name field)
2139
- - Final status (finalStatus field)
2140
- - Latest execution details:
2141
- - Error message (executions[last].error)
2142
- - Duration (executions[last].duration)
2143
- - Video file location (test-runs/{timestamp}/{id}/exec-{num}/{videoFile})
2144
- - Trace availability (executions[last].hasTrace)
2145
- - Screenshots availability (executions[last].hasScreenshots)
2146
-
2147
- #### 3.3 Generate Summary Statistics
2148
- \`\`\`markdown
2149
- ## Test Execution Summary
2150
- - Total Tests: [count]
2151
- - Passed: [count] ([percentage]%)
2152
- - Failed: [count] ([percentage]%)
2153
- - Skipped: [count] ([percentage]%)
2154
- - Total Duration: [time]
2155
- \`\`\`
2156
-
2157
- ### Step 5: Triage Failed Tests
2158
-
2159
- After analyzing test results, triage each failure to determine if it's a product bug or test issue:
2160
-
2161
- #### 5.1 Triage Failed Tests FIRST
2162
-
2163
- **\u26A0\uFE0F IMPORTANT: Do NOT report bugs without triaging first.**
2164
-
2165
- For each failed test:
2166
-
2167
- 1. **Read failure details** from JSON report (error message, stack trace)
2168
- 2. **Classify the failure:**
2169
- - **Product bug**: Application behaves incorrectly
2170
- - **Test issue**: Test code needs fixing (selector, timing, assertion)
2171
- 3. **Document classification** for next steps
2172
-
2173
- **Classification Guidelines:**
2174
- - **Product Bug**: Correct test code, unexpected application behavior
2175
- - **Test Issue**: Selector not found, timeout, race condition, wrong assertion
2176
-
2177
- #### 5.2 Fix Test Issues Automatically
2178
-
2179
- For each test classified as **[TEST ISSUE]**, use the test-debugger-fixer agent to automatically fix the test:
2180
-
2181
- \`\`\`
2182
- {{INVOKE_TEST_DEBUGGER_FIXER}}
2183
-
2184
- For each failed test classified as a test issue (not a product bug), provide:
2185
- - Test run timestamp: [from manifest.timestamp]
2186
- - Test case ID: [from testCases[].id in manifest]
2187
- - Test name/title: [from testCases[].name in manifest]
2188
- - Error message: [from testCases[].executions[last].error]
2189
- - Execution details path: test-runs/{timestamp}/{testCaseId}/exec-1/
2190
-
2191
- The agent will:
2192
- 1. Read the execution details from result.json
2193
- 2. Analyze the failure (error message, trace if available)
2194
- 3. Identify the root cause (brittle selector, missing wait, race condition, etc.)
2195
- 4. Apply appropriate fix to the test code
2196
- 5. Rerun the test
2197
- 6. The custom reporter will automatically create the next exec-N/ folder
2198
- 7. Repeat up to 3 times if needed (exec-1, exec-2, exec-3)
2199
- 8. Report success or escalate as likely product bug
2200
-
2201
- After test-debugger-fixer completes:
2202
- - If fix succeeded: Mark test as fixed, add to "Tests Fixed" list
2203
- - If still failing after 3 attempts: Reclassify as potential product bug for Step 5.3
2204
- \`\`\`
2205
-
2206
- **Track Fixed Tests:**
2207
- - Maintain list of tests fixed automatically
2208
- - Include fix description (e.g., "Updated selector from CSS to role-based")
2209
- - Note verification status (test now passes)
2210
-
2211
- {{ISSUE_TRACKER_INSTRUCTIONS}}
2212
-
2213
- ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
2214
-
2215
- {{TEAM_COMMUNICATOR_INSTRUCTIONS}}
2216
-
2217
- ### Step 6: Handle Special Cases
2218
-
2219
- #### If No Test Cases Found
1216
+ stepId: "notify-team",
1217
+ conditionalOnSubagent: "team-communicator"
1218
+ },
1219
+ // Step 14: Handle Special Cases (inline - task-specific)
1220
+ {
1221
+ inline: true,
1222
+ title: "Handle Special Cases",
1223
+ content: `#### If No Test Cases Found
2220
1224
  If no test cases match the selection criteria:
2221
1225
  1. Inform user that no matching test cases were found
2222
1226
  2. List available test cases or suggest running \`/generate-test-cases\` first
@@ -2259,128 +1263,12 @@ If selected test cases have formatting issues:
2259
1263
 
2260
1264
  **Related Documentation**:
2261
1265
  - \`.bugzy/runtime/test-execution-strategy.md\` - When and why to run specific tests
2262
- - \`.bugzy/runtime/testing-best-practices.md\` - How to write tests (patterns and anti-patterns)
2263
-
2264
- `,
2265
- optionalSubagents: [
2266
- {
2267
- role: "issue-tracker",
2268
- contentBlock: `
2269
-
2270
- #### 5.3 Log Product Bugs via Issue Tracker
2271
-
2272
- After triage in Step 5.1, for tests classified as **[PRODUCT BUG]**, use the issue-tracker agent to log bugs:
2273
-
2274
- For each bug to report, use the issue-tracker agent:
2275
-
2276
- \`\`\`
2277
- {{INVOKE_ISSUE_TRACKER}}
2278
- 1. Check for duplicate bugs in the tracking system
2279
- - The agent will automatically search for similar existing issues
2280
- - It maintains memory of recently reported issues
2281
- - Duplicate detection happens automatically - don't create manual checks
2282
-
2283
- 2. For each new bug (non-duplicate):
2284
- Create detailed bug report with:
2285
- - **Title**: Clear, descriptive summary (e.g., "Login button fails with timeout on checkout page")
2286
- - **Description**:
2287
- - What happened vs. what was expected
2288
- - Impact on users
2289
- - Test reference: [file path] \u203A [test title]
2290
- - **Reproduction Steps**:
2291
- - List steps from the failing test
2292
- - Include specific test data used
2293
- - Note any setup requirements from test file
2294
- - **Test Execution Details**:
2295
- - Test file: [file path from JSON report]
2296
- - Test name: [test title from JSON report]
2297
- - Error message: [from JSON report]
2298
- - Stack trace: [from JSON report]
2299
- - Trace file: [path if available]
2300
- - Screenshots: [paths if available]
2301
- - **Environment Details**:
2302
- - Browser and version (from Playwright config)
2303
- - Test environment URL (from .env.testdata BASE_URL)
2304
- - Timestamp of failure
2305
- - **Severity/Priority**: Based on:
2306
- - Test type (smoke tests = high priority)
2307
- - User impact
2308
- - Frequency (always fails vs flaky)
2309
- - **Additional Context**:
2310
- - Error messages or stack traces from JSON report
2311
- - Related test files (if part of test suite)
2312
- - Relevant knowledge from knowledge-base.md
2313
-
2314
- 3. Track created issues:
2315
- - Note the issue ID/number returned
2316
- - Update issue tracker memory with new bugs
2317
- - Prepare issue references for team communication
2318
- \`\`\`
2319
-
2320
- #### 6.3 Summary of Bug Reporting
2321
-
2322
- After issue tracker agent completes, create a summary:
2323
- \`\`\`markdown
2324
- ### Bug Reporting Summary
2325
- - Total bugs found: [count of FAIL tests]
2326
- - New bugs reported: [count of newly created issues]
2327
- - Duplicate bugs found: [count of duplicates detected]
2328
- - Issues not reported: [count of skipped/known issues]
2329
-
2330
- **New Bug Reports**:
2331
- - [Issue ID]: [Bug title] (Test: TC-XXX, Priority: [priority])
2332
- - [Issue ID]: [Bug title] (Test: TC-YYY, Priority: [priority])
2333
-
2334
- **Duplicate Bugs** (already tracked):
2335
- - [Existing Issue ID]: [Bug title] (Matches test: TC-XXX)
2336
-
2337
- **Not Reported** (skipped or known):
2338
- - TC-XXX: Skipped due to blocker failure
2339
- - TC-YYY: Known issue documented in knowledge base
2340
- \`\`\`
2341
-
2342
- **Note**: The issue tracker agent handles all duplicate detection and system integration automatically. Simply provide the bug details and let it manage the rest.`
2343
- },
2344
- {
2345
- role: "team-communicator",
2346
- contentBlock: `### Step 6: Team Communication
2347
-
2348
- {{INVOKE_TEAM_COMMUNICATOR}}
2349
-
2350
- Notify the product team about test execution:
2351
-
2352
- \`\`\`
2353
- 1. Post test execution summary with key statistics
2354
- 2. Highlight critical failures that need immediate attention
2355
- 3. Share important learnings about product behavior
2356
- 4. Report any potential bugs discovered during testing
2357
- 5. Ask for clarification on unexpected behaviors
2358
- 6. Provide recommendations for areas needing investigation
2359
- 7. Use appropriate urgency level based on failure severity
2360
- \`\`\`
2361
-
2362
- The team communication should include:
2363
- - **Execution summary**: Overall pass/fail statistics and timing
2364
- - **Critical issues**: High-priority failures that need immediate attention
2365
- - **Key learnings**: Important discoveries about product behavior
2366
- - **Potential bugs**: Issues that may require bug reports
2367
- - **Clarifications needed**: Unexpected behaviors requiring team input
2368
- - **Recommendations**: Suggested follow-up actions
2369
-
2370
- **Communication strategy based on results**:
2371
- - **All tests passed**: Brief positive update, highlight learnings
2372
- - **Minor failures**: Standard update with failure details and plans
2373
- - **Critical failures**: Urgent notification with detailed analysis
2374
- - **New discoveries**: Separate message highlighting interesting findings
2375
-
2376
- **Update team communicator memory**:
2377
- - Record test execution communication
2378
- - Track team response patterns to test results
2379
- - Document any clarifications provided by the team
2380
- - Note team priorities based on their responses`
1266
+ - \`.bugzy/runtime/testing-best-practices.md\` - How to write tests (patterns and anti-patterns)`
2381
1267
  }
2382
1268
  ],
2383
- requiredSubagents: ["test-runner", "test-debugger-fixer"]
1269
+ requiredSubagents: ["test-runner", "test-debugger-fixer"],
1270
+ optionalSubagents: ["issue-tracker", "team-communicator"],
1271
+ dependentTasks: []
2384
1272
  };
2385
1273
 
2386
1274
  // src/tasks/library/verify-changes.ts
@@ -2392,14 +1280,12 @@ var verifyChangesTask = {
2392
1280
  description: "Verify code changes with automated tests and manual verification checklists",
2393
1281
  "argument-hint": "[trigger-auto-detected]"
2394
1282
  },
2395
- baseContent: `# Verify Changes - Unified Multi-Trigger Workflow
2396
-
2397
- ## SECURITY NOTICE
2398
- **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
2399
- - **Read \`.env.testdata\`** for non-secret environment variables (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
2400
- - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
2401
- - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
2402
- - The \`.env\` file access is blocked by settings.json
1283
+ steps: [
1284
+ // Step 1: Overview (inline)
1285
+ {
1286
+ inline: true,
1287
+ title: "Verify Changes Overview",
1288
+ content: `# Verify Changes - Unified Multi-Trigger Workflow
2403
1289
 
2404
1290
  ## Overview
2405
1291
 
@@ -2407,21 +1293,27 @@ This task performs comprehensive change verification with:
2407
1293
  - **Automated testing**: Execute Playwright tests with automatic triage and fixing
2408
1294
  - **Manual verification checklists**: Generate role-specific checklists for non-automatable scenarios
2409
1295
  - **Multi-trigger support**: Works from manual CLI, Slack messages, GitHub PRs, and CI/CD
2410
- - **Smart output routing**: Results formatted and delivered to the appropriate channel
2411
-
2412
- ## Arguments
2413
-
2414
- **Input**: $ARGUMENTS
2415
-
2416
- The input format determines the trigger source and context extraction strategy.
2417
-
2418
- ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
2419
-
2420
- ## Step 1: Detect Trigger Source
1296
+ - **Smart output routing**: Results formatted and delivered to the appropriate channel`
1297
+ },
1298
+ // Step 2: Security Notice (library)
1299
+ "security-notice",
1300
+ // Step 3: Arguments (inline)
1301
+ {
1302
+ inline: true,
1303
+ title: "Arguments",
1304
+ content: `**Input**: $ARGUMENTS
2421
1305
 
2422
- Analyze the input format to determine how this task was invoked:
1306
+ The input format determines the trigger source and context extraction strategy.`
1307
+ },
1308
+ // Step 4: Knowledge Base Read (library)
1309
+ "read-knowledge-base",
1310
+ // Step 5: Detect Trigger Source (inline)
1311
+ {
1312
+ inline: true,
1313
+ title: "Detect Trigger Source",
1314
+ content: `Analyze the input format to determine how this task was invoked:
2423
1315
 
2424
- ### 1.1 Identify Trigger Type
1316
+ ### Identify Trigger Type
2425
1317
 
2426
1318
  **GitHub PR Webhook:**
2427
1319
  - Input contains \`pull_request\` object with structure:
@@ -2438,7 +1330,7 @@ Analyze the input format to determine how this task was invoked:
2438
1330
  }
2439
1331
  }
2440
1332
  \`\`\`
2441
- \u2192 **Trigger detected: GITHUB_PR**
1333
+ -> **Trigger detected: GITHUB_PR**
2442
1334
 
2443
1335
  **Slack Event:**
2444
1336
  - Input contains \`event\` object with structure:
@@ -2455,7 +1347,7 @@ Analyze the input format to determine how this task was invoked:
2455
1347
  }
2456
1348
  }
2457
1349
  \`\`\`
2458
- \u2192 **Trigger detected: SLACK_MESSAGE**
1350
+ -> **Trigger detected: SLACK_MESSAGE**
2459
1351
 
2460
1352
  **CI/CD Environment:**
2461
1353
  - Environment variables present:
@@ -2465,24 +1357,26 @@ Analyze the input format to determine how this task was invoked:
2465
1357
  - \`GITHUB_BASE_REF\` (base branch)
2466
1358
  - \`GITHUB_HEAD_REF\` (head branch)
2467
1359
  - Git context available via bash commands
2468
- \u2192 **Trigger detected: CI_CD**
1360
+ -> **Trigger detected: CI_CD**
2469
1361
 
2470
1362
  **Manual Invocation:**
2471
1363
  - Input is natural language, URL, or issue identifier
2472
1364
  - Patterns: "PR #123", GitHub URL, "PROJ-456", feature description
2473
- \u2192 **Trigger detected: MANUAL**
2474
-
2475
- ### 1.2 Store Trigger Context
2476
-
2477
- Store the detected trigger for use in Step 6 (output routing):
2478
- - Set variable: \`TRIGGER_SOURCE\` = [GITHUB_PR | SLACK_MESSAGE | CI_CD | MANUAL]
2479
- - This determines output formatting and delivery channel
1365
+ -> **Trigger detected: MANUAL**
2480
1366
 
2481
- ## Step 2: Extract Context Based on Trigger
1367
+ ### Store Trigger Context
2482
1368
 
2483
- Based on the detected trigger source, extract relevant context:
1369
+ Store the detected trigger for use in output routing:
1370
+ - Set variable: \`TRIGGER_SOURCE\` = [GITHUB_PR | SLACK_MESSAGE | CI_CD | MANUAL]
1371
+ - This determines output formatting and delivery channel`
1372
+ },
1373
+ // Step 6: Extract Context (inline)
1374
+ {
1375
+ inline: true,
1376
+ title: "Extract Context Based on Trigger",
1377
+ content: `Based on the detected trigger source, extract relevant context:
2484
1378
 
2485
- ### 2.1 GitHub PR Trigger - Extract PR Details
1379
+ ### GitHub PR Trigger - Extract PR Details
2486
1380
 
2487
1381
  If trigger is GITHUB_PR:
2488
1382
  - **PR number**: \`pull_request.number\`
@@ -2493,9 +1387,7 @@ If trigger is GITHUB_PR:
2493
1387
  - **Base branch**: \`pull_request.base.ref\`
2494
1388
  - **Head branch**: \`pull_request.head.ref\`
2495
1389
 
2496
- Optional: Fetch additional details via GitHub API if needed (PR comments, reviews)
2497
-
2498
- ### 2.2 Slack Message Trigger - Parse Natural Language
1390
+ ### Slack Message Trigger - Parse Natural Language
2499
1391
 
2500
1392
  If trigger is SLACK_MESSAGE:
2501
1393
  - **Message text**: \`event.text\`
@@ -2510,30 +1402,24 @@ If trigger is SLACK_MESSAGE:
2510
1402
  - Feature names: Quoted terms, capitalized phrases
2511
1403
  - Environments: "staging", "production", "preview"
2512
1404
 
2513
- ### 2.3 CI/CD Trigger - Read CI Environment
1405
+ ### CI/CD Trigger - Read CI Environment
2514
1406
 
2515
1407
  If trigger is CI_CD:
2516
1408
  - **CI platform**: Read \`CI\` env var
2517
- - **Branch**: \`GITHUB_REF\` \u2192 extract branch name
1409
+ - **Branch**: \`GITHUB_REF\` -> extract branch name
2518
1410
  - **Commit**: \`GITHUB_SHA\`
2519
1411
  - **Base branch**: \`GITHUB_BASE_REF\` (for PRs)
2520
1412
  - **Changed files**: Run \`git diff --name-only $BASE_SHA...$HEAD_SHA\`
2521
1413
 
2522
- If in PR context, can also fetch PR number from CI env vars (e.g., \`GITHUB_EVENT_PATH\`)
2523
-
2524
- ### 2.4 Manual Trigger - Parse User Input
1414
+ ### Manual Trigger - Parse User Input
2525
1415
 
2526
1416
  If trigger is MANUAL:
2527
1417
  - **GitHub PR URL**: Parse to extract PR number, then fetch details via API
2528
- - Pattern: \`https://github.com/owner/repo/pull/123\`
2529
- - Extract: owner, repo, PR number
2530
- - Fetch: PR details, diff, comments
2531
- - **Issue identifier**: Extract issue ID
2532
- - Patterns: "PROJ-123", "#456", "BUG-789"
1418
+ - **Issue identifier**: Extract issue ID (patterns: "PROJ-123", "#456", "BUG-789")
2533
1419
  - **Feature description**: Use text as-is for verification context
2534
1420
  - **Deployment URL**: Extract for testing environment
2535
1421
 
2536
- ### 2.5 Unified Context Structure
1422
+ ### Unified Context Structure
2537
1423
 
2538
1424
  After extraction, create unified context structure:
2539
1425
  \`\`\`
@@ -2546,21 +1432,21 @@ CHANGE_CONTEXT = {
2546
1432
  environment: "staging" | "production" | URL,
2547
1433
  prNumber: 123 (if available),
2548
1434
  issueId: "PROJ-456" (if available),
2549
-
2550
- // For output routing:
2551
1435
  slackChannel: "C123456" (if Slack trigger),
2552
1436
  slackThread: "1234567890.123456" (if Slack trigger),
2553
1437
  githubRepo: "owner/repo" (if GitHub trigger)
2554
1438
  }
2555
- \`\`\`
2556
-
2557
- ## Step 3: Determine Test Scope (Smart Selection)
2558
-
2559
- **IMPORTANT**: You do NOT have access to code files. Infer test scope from change **descriptions** only.
1439
+ \`\`\``
1440
+ },
1441
+ // Step 7: Determine Test Scope (inline)
1442
+ {
1443
+ inline: true,
1444
+ title: "Determine Test Scope (Smart Selection)",
1445
+ content: `**IMPORTANT**: You do NOT have access to code files. Infer test scope from change **descriptions** only.
2560
1446
 
2561
1447
  Based on PR title, description, and commit messages, intelligently select which tests to run:
2562
1448
 
2563
- ### 3.1 Infer Test Scope from Change Descriptions
1449
+ ### Infer Test Scope from Change Descriptions
2564
1450
 
2565
1451
  Analyze the change description to identify affected feature areas:
2566
1452
 
@@ -2568,32 +1454,19 @@ Analyze the change description to identify affected feature areas:
2568
1454
 
2569
1455
  | Description Keywords | Inferred Test Scope | Example |
2570
1456
  |---------------------|-------------------|---------|
2571
- | "login", "authentication", "sign in/up" | \`tests/specs/auth/\` | "Fix login page validation" \u2192 Auth tests |
2572
- | "checkout", "payment", "purchase" | \`tests/specs/checkout/\` | "Optimize checkout flow" \u2192 Checkout tests |
2573
- | "cart", "shopping cart", "add to cart" | \`tests/specs/cart/\` | "Update cart calculations" \u2192 Cart tests |
2574
- | "API", "endpoint", "backend" | API test suites | "Add new user API endpoint" \u2192 User API tests |
2575
- | "profile", "account", "settings" | \`tests/specs/profile/\` or \`tests/specs/settings/\` | "Profile page redesign" \u2192 Profile tests |
1457
+ | "login", "authentication", "sign in/up" | \`tests/specs/auth/\` | "Fix login page validation" -> Auth tests |
1458
+ | "checkout", "payment", "purchase" | \`tests/specs/checkout/\` | "Optimize checkout flow" -> Checkout tests |
1459
+ | "cart", "shopping cart", "add to cart" | \`tests/specs/cart/\` | "Update cart calculations" -> Cart tests |
1460
+ | "API", "endpoint", "backend" | API test suites | "Add new user API endpoint" -> User API tests |
1461
+ | "profile", "account", "settings" | \`tests/specs/profile/\` or \`tests/specs/settings/\` | "Profile page redesign" -> Profile tests |
2576
1462
 
2577
1463
  **Inference strategy:**
2578
1464
  1. **Extract feature keywords** from PR title and description
2579
- - PR title: "feat(checkout): Add PayPal payment option"
2580
- - Keywords: ["checkout", "payment"]
2581
- - Inferred scope: Checkout tests
2582
-
2583
1465
  2. **Analyze commit messages** for conventional commit scopes
2584
- - \`feat(auth): Add password reset flow\` \u2192 Auth tests
2585
- - \`fix(cart): Resolve quantity update bug\` \u2192 Cart tests
2586
-
2587
1466
  3. **Map keywords to test organization**
2588
- - Reference: Tests are organized by feature under \`tests/specs/\` (see \`.bugzy/runtime/testing-best-practices.md\`)
2589
- - Feature areas typically include: auth/, checkout/, cart/, profile/, api/, etc.
2590
-
2591
1467
  4. **Identify test scope breadth from description tone**
2592
- - "Fix typo in button label" \u2192 Narrow scope (smoke tests)
2593
- - "Refactor shared utility functions" \u2192 Wide scope (full suite)
2594
- - "Update single component styling" \u2192 Narrow scope (component tests)
2595
1468
 
2596
- ### 3.2 Fallback Strategies Based on Description Analysis
1469
+ ### Fallback Strategies Based on Description Analysis
2597
1470
 
2598
1471
  **Description patterns that indicate full suite:**
2599
1472
  - "Refactor shared/common utilities" (wide impact)
@@ -2608,30 +1481,13 @@ Analyze the change description to identify affected feature areas:
2608
1481
  - "Fix formatting" or "Linting fixes" (no logic change)
2609
1482
 
2610
1483
  **When description is vague or ambiguous:**
2611
- - Examples: "Updated several components", "Various bug fixes", "Improvements"
2612
1484
  - **ACTION REQUIRED**: Use AskUserQuestion tool to clarify test scope
2613
- - Provide options based on available test suites:
2614
- \`\`\`typescript
2615
- AskUserQuestion({
2616
- questions: [{
2617
- question: "The change description is broad. Which test suites should run?",
2618
- header: "Test Scope",
2619
- multiSelect: true,
2620
- options: [
2621
- { label: "Auth tests", description: "Login, signup, password reset" },
2622
- { label: "Checkout tests", description: "Purchase flow, payment processing" },
2623
- { label: "Full test suite", description: "Run all tests for comprehensive validation" },
2624
- { label: "Smoke tests only", description: "Quick validation of critical paths" }
2625
- ]
2626
- }]
2627
- })
2628
- \`\`\`
2629
1485
 
2630
1486
  **If specific test scope requested:**
2631
1487
  - User can override with: "only smoke tests", "full suite", specific test suite names
2632
1488
  - Honor user's explicit scope over smart selection
2633
1489
 
2634
- ### 3.3 Test Selection Summary
1490
+ ### Test Selection Summary
2635
1491
 
2636
1492
  Generate summary of test selection based on description analysis:
2637
1493
  \`\`\`markdown
@@ -2641,180 +1497,51 @@ Generate summary of test selection based on description analysis:
2641
1497
  - **Affected test suites**: [list inferred test suite paths or names]
2642
1498
  - **Scope reasoning**: [explain why this scope was selected]
2643
1499
  - **Execution strategy**: [smart selection | full suite | smoke tests | user-specified]
2644
- \`\`\`
2645
-
2646
- **Example summary:**
2647
- \`\`\`markdown
2648
- ### Test Scope Determined
2649
- - **Change description**: "feat(checkout): Add PayPal payment option"
2650
- - **Identified keywords**: checkout, payment, PayPal
2651
- - **Affected test suites**: tests/specs/checkout/payment.spec.ts, tests/specs/checkout/purchase-flow.spec.ts
2652
- - **Scope reasoning**: Change affects checkout payment processing; running all checkout tests to validate payment integration
2653
- - **Execution strategy**: Smart selection (checkout suite)
2654
- \`\`\`
2655
-
2656
- ## Step 4: Run Verification Workflow
2657
-
2658
- Execute comprehensive verification combining automated tests and manual checklists:
2659
-
2660
- ### 4A: Automated Testing (Integrated from /run-tests)
2661
-
2662
- Execute automated Playwright tests with full triage and fixing:
2663
-
2664
- #### 4A.1 Execute Tests
2665
-
2666
- Run the selected tests via Playwright:
2667
- \`\`\`bash
2668
- npx playwright test [scope] --reporter=json --output=test-results/
2669
- \`\`\`
2670
-
2671
- Wait for execution to complete. Capture JSON report from \`test-results/.last-run.json\`.
2672
-
2673
- #### 4A.2 Parse Test Results
2674
-
2675
- Read and analyze the JSON report:
2676
- - Extract: Total, passed, failed, skipped counts
2677
- - For each failed test: file path, test name, error message, stack trace, trace file
2678
- - Calculate: Pass rate, total duration
2679
-
2680
- #### 4A.3 Triage Failures (Classification)
2681
-
2682
- #### Automatic Test Issue Fixing
2683
-
2684
- For each test classified as **[TEST ISSUE]**, use the test-debugger-fixer agent to automatically fix the test:
2685
-
2686
- \`\`\`
2687
- {{INVOKE_TEST_DEBUGGER_FIXER}}
2688
-
2689
- For each failed test classified as a test issue (not a product bug), provide:
2690
- - Test file path: [from JSON report]
2691
- - Test name/title: [from JSON report]
2692
- - Error message: [from JSON report]
2693
- - Stack trace: [from JSON report]
2694
- - Trace file path: [if available]
2695
-
2696
- The agent will:
2697
- 1. Read the failing test file
2698
- 2. Analyze the failure details
2699
- 3. Open browser via Playwright MCP to debug if needed
2700
- 4. Identify the root cause (brittle selector, missing wait, race condition, etc.)
2701
- 5. Apply appropriate fix to the test code
2702
- 6. Rerun the test to verify the fix
2703
- 7. Repeat up to 3 times if needed
2704
- 8. Report success or escalate as likely product bug
2705
-
2706
- After test-debugger-fixer completes:
2707
- - If fix succeeded: Mark test as fixed, add to "Tests Fixed" list
2708
- - If still failing after 3 attempts: Reclassify as potential product bug
2709
- \`\`\`
2710
-
2711
- **Track Fixed Tests:**
2712
- - Maintain list of tests fixed automatically
2713
- - Include fix description (e.g., "Updated selector from CSS to role-based")
2714
- - Note verification status (test now passes)
2715
- - Reference .bugzy/runtime/testing-best-practices.md for best practices
2716
-
2717
- For each failed test, classify as:
2718
- - **[PRODUCT BUG]**: Correct test code, but application behaves incorrectly
2719
- - **[TEST ISSUE]**: Test code needs fixing (selector, timing, assertion)
2720
-
2721
- Classification guidelines:
2722
- - Product Bug: Expected behavior not met, functional issue
2723
- - Test Issue: Selector not found, timeout, race condition, brittle locator
2724
-
2725
- #### 4A.4 Fix Test Issues Automatically
2726
-
2727
- For tests classified as [TEST ISSUE]:
2728
- - {{INVOKE_TEST_DEBUGGER_FIXER}} to analyze and fix
2729
- - Agent debugs with browser if needed
2730
- - Applies fix (selector update, wait condition, assertion correction)
2731
- - Reruns test to verify fix (10x for flaky tests)
2732
- - Max 3 fix attempts, then reclassify as product bug
2733
-
2734
- Track fixed tests with:
2735
- - Test file path
2736
- - Fix description
2737
- - Verification status (now passes)
2738
-
2739
- #### 4A.5 Log Product Bugs
2740
-
2741
- {{ISSUE_TRACKER_INSTRUCTIONS}}
2742
-
2743
- For tests classified as [PRODUCT BUG]:
2744
- - {{INVOKE_ISSUE_TRACKER}} to create bug reports
2745
- - Agent checks for duplicates automatically
2746
- - Creates detailed report with:
2747
- - Title, description, reproduction steps
2748
- - Test reference, error details, stack trace
2749
- - Screenshots, traces, environment details
2750
- - Severity based on test type and impact
2751
- - Returns issue ID for tracking
2752
-
2753
- ### 4B: Manual Verification Checklist (NEW)
2754
-
2755
- Generate human-readable checklist for non-automatable scenarios:
2756
-
2757
- #### Generate Manual Verification Checklist
2758
-
2759
- Analyze the code changes and generate a manual verification checklist for scenarios that cannot be automated.
1500
+ \`\`\``
1501
+ },
1502
+ // Step 8-11: Test Execution (library steps)
1503
+ "run-playwright-tests",
1504
+ "parse-test-results",
1505
+ "triage-failures",
1506
+ "fix-test-issues",
1507
+ // Step 12: Log Product Bugs (conditional library step)
1508
+ {
1509
+ stepId: "log-product-bugs",
1510
+ conditionalOnSubagent: "issue-tracker"
1511
+ },
1512
+ // Step 13: Generate Manual Verification Checklist (inline)
1513
+ {
1514
+ inline: true,
1515
+ title: "Generate Manual Verification Checklist",
1516
+ content: `Generate human-readable checklist for non-automatable scenarios:
2760
1517
 
2761
- #### Analyze Change Context
1518
+ ### Analyze Change Context
2762
1519
 
2763
1520
  Review the provided context to understand what changed:
2764
1521
  - Read PR title, description, and commit messages
2765
1522
  - Identify change types from descriptions: visual, UX, forms, mobile, accessibility, edge cases
2766
1523
  - Understand the scope and impact of changes from the change descriptions
2767
1524
 
2768
- #### Identify Non-Automatable Scenarios
1525
+ ### Identify Non-Automatable Scenarios
2769
1526
 
2770
1527
  Based on the change analysis, identify scenarios that require human verification:
2771
1528
 
2772
1529
  **1. Visual Design Changes** (CSS, styling, design files, graphics)
2773
- - Color schemes, gradients, shadows
2774
- - Typography, font sizes, line heights
2775
- - Spacing, margins, padding, alignment
2776
- - Visual consistency across components
2777
- - Brand guideline compliance
2778
- \u2192 Add **Design Validation** checklist items
1530
+ -> Add **Design Validation** checklist items
2779
1531
 
2780
1532
  **2. UX Interaction Changes** (animations, transitions, gestures, micro-interactions)
2781
- - Animation smoothness (60fps expectation)
2782
- - Transition timing and easing
2783
- - Interaction responsiveness and feel
2784
- - Loading states and skeleton screens
2785
- - Hover effects, focus states
2786
- \u2192 Add **UX Feel** checklist items
1533
+ -> Add **UX Feel** checklist items
2787
1534
 
2788
1535
  **3. Form and Input Changes** (new form fields, input validation, user input)
2789
- - Screen reader compatibility
2790
- - Keyboard navigation (Tab order, Enter to submit)
2791
- - Error message clarity and placement
2792
- - Color contrast (WCAG 2.1 AA: 4.5:1 ratio for text)
2793
- - Focus indicators visibility
2794
- \u2192 Add **Accessibility** checklist items
1536
+ -> Add **Accessibility** checklist items
2795
1537
 
2796
1538
  **4. Mobile and Responsive Changes** (media queries, touch interactions, viewport)
2797
- - Touch target sizes (\u226544px iOS, \u226548dp Android)
2798
- - Responsive layout breakpoints
2799
- - Mobile keyboard behavior (doesn't obscure inputs)
2800
- - Swipe gestures and touch interactions
2801
- - Pinch-to-zoom functionality
2802
- \u2192 Add **Mobile Experience** checklist items
1539
+ -> Add **Mobile Experience** checklist items
2803
1540
 
2804
1541
  **5. Low ROI or Rare Scenarios** (edge cases, one-time migrations, rare user paths)
2805
- - Scenarios used by < 1% of users
2806
- - Complex multi-system integrations
2807
- - One-time data migrations
2808
- - Leap year, DST, timezone edge cases
2809
- \u2192 Add **Exploratory Testing** notes
2810
-
2811
- **6. Cross-Browser Visual Consistency** (layout rendering differences)
2812
- - Layout consistency across Chrome, Firefox, Safari
2813
- - CSS feature support differences
2814
- - Font rendering variations
2815
- \u2192 Add **Cross-Browser** checklist items (if significant visual changes)
1542
+ -> Add **Exploratory Testing** notes
2816
1543
 
2817
- #### Generate Role-Specific Checklist Items
1544
+ ### Generate Role-Specific Checklist Items
2818
1545
 
2819
1546
  For each identified scenario, create clear, actionable checklist items:
2820
1547
 
@@ -2823,113 +1550,24 @@ For each identified scenario, create clear, actionable checklist items:
2823
1550
  - Assigned role (@design-team, @qa-team, @a11y-team, @mobile-team)
2824
1551
  - Acceptance criteria (what constitutes pass/fail)
2825
1552
  - Reference to standards when applicable (WCAG, iOS HIG, Material Design)
2826
- - Priority indicator (\u{1F534} critical, \u{1F7E1} important, \u{1F7E2} nice-to-have)
1553
+ - Priority indicator (red circle critical, yellow circle important, green circle nice-to-have)
2827
1554
 
2828
1555
  **Example checklist items:**
2829
1556
 
2830
1557
  **Design Validation (@design-team)**
2831
- - [ ] \u{1F534} Login button color matches brand guidelines (#FF6B35)
2832
- - [ ] \u{1F7E1} Loading spinner animation smooth (60fps, no jank)
2833
- - [ ] \u{1F7E1} Card shadows match design system (elevation-2: 0 2px 4px rgba(0,0,0,0.1))
2834
- - [ ] \u{1F7E2} Hover states provide appropriate visual feedback
1558
+ - [ ] Login button color matches brand guidelines (#FF6B35)
1559
+ - [ ] Loading spinner animation smooth (60fps, no jank)
2835
1560
 
2836
1561
  **Accessibility (@a11y-team)**
2837
- - [ ] \u{1F534} Screen reader announces form errors clearly (tested with VoiceOver/NVDA)
2838
- - [ ] \u{1F534} Keyboard navigation: Tab through all interactive elements in logical order
2839
- - [ ] \u{1F534} Color contrast meets WCAG 2.1 AA (4.5:1 for body text, 3:1 for large text)
2840
- - [ ] \u{1F7E1} Focus indicators visible on all interactive elements
1562
+ - [ ] Screen reader announces form errors clearly (tested with VoiceOver/NVDA)
1563
+ - [ ] Keyboard navigation: Tab through all interactive elements in logical order
1564
+ - [ ] Color contrast meets WCAG 2.1 AA (4.5:1 for body text, 3:1 for large text)
2841
1565
 
2842
1566
  **Mobile Experience (@qa-team, @mobile-team)**
2843
- - [ ] \u{1F534} Touch targets \u226544px (iOS Human Interface Guidelines)
2844
- - [ ] \u{1F534} Mobile keyboard doesn't obscure input fields on iOS/Android
2845
- - [ ] \u{1F7E1} Swipe gestures work naturally without conflicts
2846
- - [ ] \u{1F7E1} Responsive layout adapts properly on iPhone SE (smallest screen)
2847
-
2848
- **UX Feel (@design-team, @qa-team)**
2849
- - [ ] \u{1F7E1} Page transitions smooth and not jarring
2850
- - [ ] \u{1F7E1} Button click feedback immediate (< 100ms perceived response)
2851
- - [ ] \u{1F7E2} Loading states prevent confusion during data fetch
2852
-
2853
- **Exploratory Testing (@qa-team)**
2854
- - [ ] \u{1F7E2} Test edge case: User submits form during network timeout
2855
- - [ ] \u{1F7E2} Test edge case: User navigates back during submission
2856
-
2857
- #### Format for Output Channel
2858
-
2859
- Adapt the checklist format based on the output channel (determined by trigger source):
1567
+ - [ ] Touch targets greater than or equal to 44px (iOS Human Interface Guidelines)
1568
+ - [ ] Mobile keyboard doesn't obscure input fields on iOS/Android
2860
1569
 
2861
- **Terminal (Manual Trigger):**
2862
- \`\`\`markdown
2863
- MANUAL VERIFICATION CHECKLIST:
2864
- Please verify the following before merging:
2865
-
2866
- Design Validation (@design-team):
2867
- [ ] \u{1F534} Checkout button colors match brand guidelines (#FF6B35)
2868
- [ ] \u{1F7E1} Loading spinner animation smooth (60fps)
2869
-
2870
- Accessibility (@a11y-team):
2871
- [ ] \u{1F534} Screen reader announces error messages
2872
- [ ] \u{1F534} Keyboard navigation works (Tab order logical)
2873
- [ ] \u{1F534} Color contrast meets WCAG 2.1 AA (4.5:1 ratio)
2874
-
2875
- Mobile Experience (@qa-team):
2876
- [ ] \u{1F534} Touch targets \u226544px (iOS HIG)
2877
- [ ] \u{1F7E1} Responsive layout works on iPhone SE
2878
- \`\`\`
2879
-
2880
- **Slack (Slack Trigger):**
2881
- \`\`\`markdown
2882
- *Manual Verification Needed:*
2883
- \u25A1 Visual: Button colors, animations (60fps)
2884
- \u25A1 Mobile: Touch targets \u226544px
2885
- \u25A1 A11y: Screen reader, keyboard nav, contrast
2886
-
2887
- cc @design-team @qa-team @a11y-team
2888
- \`\`\`
2889
-
2890
- **GitHub PR Comment (GitHub Trigger):**
2891
- \`\`\`markdown
2892
- ### Manual Verification Required
2893
-
2894
- The following scenarios require human verification before release:
2895
-
2896
- #### Design Validation (@design-team)
2897
- - [ ] \u{1F534} Checkout button colors match brand guidelines (#FF6B35)
2898
- - [ ] \u{1F7E1} Loading spinner animation smooth (60fps)
2899
- - [ ] \u{1F7E1} Card shadows match design system
2900
-
2901
- #### Accessibility (@a11y-team)
2902
- - [ ] \u{1F534} Screen reader announces error messages (VoiceOver/NVDA)
2903
- - [ ] \u{1F534} Keyboard navigation through all form fields (Tab order)
2904
- - [ ] \u{1F534} Color contrast meets WCAG 2.1 AA (4.5:1 for body text)
2905
-
2906
- #### Mobile Experience (@qa-team)
2907
- - [ ] \u{1F534} Touch targets \u226544px (iOS Human Interface Guidelines)
2908
- - [ ] \u{1F534} Mobile keyboard doesn't obscure input fields
2909
- - [ ] \u{1F7E1} Responsive layout works on iPhone SE (375x667)
2910
-
2911
- ---
2912
- *Legend: \u{1F534} Critical \u2022 \u{1F7E1} Important \u2022 \u{1F7E2} Nice-to-have*
2913
- \`\`\`
2914
-
2915
- #### Guidelines for Quality Checklists
2916
-
2917
- **DO:**
2918
- - Make each item verifiable (clear pass/fail criteria)
2919
- - Include context (why this needs manual verification)
2920
- - Reference standards (WCAG, iOS HIG, Material Design)
2921
- - Assign to specific roles
2922
- - Prioritize items (critical, important, nice-to-have)
2923
- - Be specific (not "check colors" but "Login button color matches #FF6B35")
2924
-
2925
- **DON'T:**
2926
- - Create vague items ("test thoroughly")
2927
- - List items that can be automated
2928
- - Skip role assignments
2929
- - Forget acceptance criteria
2930
- - Omit priority indicators
2931
-
2932
- #### When NO Manual Verification Needed
1570
+ ### When NO Manual Verification Needed
2933
1571
 
2934
1572
  If the changes are purely:
2935
1573
  - Backend logic (no UI changes)
@@ -2941,18 +1579,13 @@ Output:
2941
1579
  \`\`\`markdown
2942
1580
  **Manual Verification:** Not required for this change.
2943
1581
  All user-facing changes are fully covered by automated tests.
2944
- \`\`\`
2945
-
2946
- #### Summary
2947
-
2948
- After generating the checklist:
2949
- - Count total items by priority (\u{1F534} critical, \u{1F7E1} important, \u{1F7E2} nice-to-have)
2950
- - Estimate time needed (e.g., "~30 minutes for design QA, ~45 minutes for accessibility testing")
2951
- - Suggest who should perform each category of checks
2952
-
2953
- ### 4C: Aggregate Results
2954
-
2955
- Combine automated and manual verification results:
1582
+ \`\`\``
1583
+ },
1584
+ // Step 14: Aggregate Results (inline)
1585
+ {
1586
+ inline: true,
1587
+ title: "Aggregate Verification Results",
1588
+ content: `Combine automated and manual verification results:
2956
1589
 
2957
1590
  \`\`\`markdown
2958
1591
  ## Verification Results Summary
@@ -2966,231 +1599,99 @@ Combine automated and manual verification results:
2966
1599
  - Duration: [time]
2967
1600
 
2968
1601
  ### Manual Verification Required
2969
- [Checklist generated in 4B, or "Not required"]
1602
+ [Checklist generated in previous step, or "Not required"]
2970
1603
 
2971
1604
  ### Overall Recommendation
2972
- [\u2705 Safe to merge | \u26A0\uFE0F Review bugs before merging | \u274C Do not merge]
2973
- \`\`\`
2974
-
2975
- ## Step 5: Understanding the Change (Documentation Research)
2976
-
2977
- {{DOCUMENTATION_RESEARCHER_INSTRUCTIONS}}
2978
-
2979
- Before proceeding with test creation or execution, ensure requirements are clear through ambiguity detection and adaptive exploration.
2980
-
2981
- **Note**: For detailed exploration and clarification protocols, refer to the complete instructions below. Adapt the depth of exploration based on requirement clarity and use the clarification protocol to detect ambiguity, assess severity, and seek clarification when needed.
2982
-
2983
- After clarification and exploration, analyze the change to determine the verification approach:
2984
-
2985
- ### 5.1 Identify Test Scope
2986
- Based on the change description, exploration findings, and clarified requirements:
2987
- - **Direct impact**: Which features/functionality are directly modified
2988
- - **Indirect impact**: What else might be affected (dependencies, integrations)
2989
- - **Regression risk**: Existing functionality that should be retested
2990
- - **New functionality**: Features that need new test coverage
2991
-
2992
- ### 5.2 Determine Verification Strategy
2993
- Plan your testing approach based on validated requirements:
2994
- - **Priority areas**: Critical paths that must work
2995
- - **Test types needed**: Functional, regression, integration, UI/UX
2996
- - **Test data requirements**: What test accounts, data, or scenarios needed
2997
- - **Success criteria**: What determines the change is working correctly (now clearly defined)
2998
-
2999
- ## Step 6: Report Results (Multi-Channel Output)
3000
-
3001
- Route output based on trigger source (from Step 1):
3002
-
3003
- ### 6.1 MANUAL Trigger \u2192 Terminal Output
3004
-
3005
- Format as comprehensive markdown report for terminal display:
3006
-
3007
- \`\`\`markdown
3008
- # Test Verification Report
3009
-
3010
- ## Change Summary
3011
- - **What Changed**: [Brief description]
3012
- - **Scope**: [Affected features/areas]
3013
- - **Changed Files**: [count] files
3014
-
3015
- ## Automated Test Results
3016
- ### Statistics
3017
- - Total Tests: [count]
3018
- - Passed: [count] ([percentage]%)
3019
- - Failed: [count]
3020
- - Test Issues Fixed: [count]
3021
- - Product Bugs Logged: [count]
3022
- - Duration: [time]
3023
-
3024
- ### Tests Fixed Automatically
3025
- [For each fixed test:
3026
- - **Test**: [file path] \u203A [test name]
3027
- - **Issue**: [problem found]
3028
- - **Fix**: [what was changed]
3029
- - **Status**: \u2705 Now passing
3030
- ]
3031
-
3032
- ### Product Bugs Logged
3033
- [For each bug:
3034
- - **Issue**: [ISSUE-123] [Bug title]
3035
- - **Test**: [test file] \u203A [test name]
3036
- - **Severity**: [priority]
3037
- - **Link**: [issue tracker URL]
3038
- ]
3039
-
3040
- ## Manual Verification Checklist
3041
-
3042
- [Insert checklist from Step 4B]
3043
-
3044
- ## Recommendation
3045
- [\u2705 Safe to merge - all automated tests pass, complete manual checks before release]
3046
- [\u26A0\uFE0F Review bugs before merging - [X] bugs need attention]
3047
- [\u274C Do not merge - critical failures]
3048
-
3049
- ## Test Artifacts
3050
- - JSON Report: test-results/.last-run.json
3051
- - HTML Report: playwright-report/index.html
3052
- - Traces: test-results/[test-id]/trace.zip
3053
- - Screenshots: test-results/[test-id]/screenshots/
3054
- \`\`\`
3055
-
3056
- ### 6.2 SLACK_MESSAGE Trigger \u2192 Thread Reply
3057
-
3058
- {{TEAM_COMMUNICATOR_INSTRUCTIONS}}
3059
-
3060
- {{INVOKE_TEAM_COMMUNICATOR}} to post concise results to Slack thread:
3061
-
3062
- \`\`\`
3063
- Post verification results.
3064
-
3065
- **Channel**: [from CHANGE_CONTEXT.slackChannel]
3066
- **Thread**: [from CHANGE_CONTEXT.slackThread]
3067
-
3068
- **Message**:
3069
- \u{1F9EA} *Verification Results for [change title]*
3070
-
3071
- *Automated:* \u2705 [passed]/[total] tests passed ([duration])
3072
- [If test issues fixed:] \u{1F527} [count] test issues auto-fixed
3073
- [If bugs logged:] \u{1F41B} [count] bugs logged ([list issue IDs])
3074
-
3075
- *Manual Verification Needed:*
3076
- [Concise checklist summary - collapsed/expandable]
3077
- \u25A1 Visual: [key items]
3078
- \u25A1 Mobile: [key items]
3079
- \u25A1 A11y: [key items]
3080
-
3081
- *Recommendation:* [\u2705 Safe to merge | \u26A0\uFE0F Review bugs | \u274C Blocked]
3082
-
3083
- [If bugs logged:] cc @[relevant-team-members]
3084
- [Link to full test report if available]
3085
- \`\`\`
3086
-
3087
- ### 6.3 GITHUB_PR Trigger \u2192 PR Comment
3088
-
3089
- Use GitHub API to post comprehensive comment on PR:
3090
-
3091
- **Format as GitHub-flavored markdown:**
3092
- \`\`\`markdown
3093
- ## \u{1F9EA} Test Verification Results
3094
-
3095
- **Status:** [\u2705 All tests passed | \u26A0\uFE0F Issues found | \u274C Critical failures]
3096
-
3097
- ### Automated Tests
3098
- | Metric | Value |
3099
- |--------|-------|
3100
- | Total Tests | [count] |
3101
- | Passed | \u2705 [count] ([percentage]%) |
3102
- | Failed | \u274C [count] |
3103
- | Test Issues Fixed | \u{1F527} [count] |
3104
- | Product Bugs Logged | \u{1F41B} [count] |
3105
- | Duration | \u23F1\uFE0F [time] |
3106
-
3107
- ### Failed Tests (Triaged)
3108
-
3109
- [For each failure:]
3110
-
3111
- #### \u274C **[Test Name]**
3112
- - **File:** \`[test-file-path]\`
3113
- - **Cause:** [Product bug | Test issue]
3114
- - **Action:** [Bug logged: [ISSUE-123](url) | Fixed: [commit-hash](url)]
3115
- - **Details:**
3116
- \`\`\`
3117
- [Error message]
3118
- \`\`\`
3119
-
3120
- ### Tests Fixed Automatically
3121
-
3122
- [For each fixed test:]
3123
- - \u2705 **[Test Name]** (\`[file-path]\`)
3124
- - **Issue:** [brittle selector | missing wait | race condition]
3125
- - **Fix:** [description of fix applied]
3126
- - **Verified:** Passes 10/10 runs
3127
-
3128
- ### Product Bugs Logged
3129
-
3130
- [For each bug:]
3131
- - \u{1F41B} **[[ISSUE-123](url)]** [Bug title]
3132
- - **Test:** \`[test-file]\` \u203A [test name]
3133
- - **Severity:** [\u{1F534} Critical | \u{1F7E1} Important | \u{1F7E2} Minor]
3134
- - **Assignee:** @[backend-team | frontend-team]
3135
-
3136
- ### Manual Verification Required
3137
-
3138
- The following scenarios require human verification before release:
3139
-
3140
- #### Design Validation (@design-team)
3141
- - [ ] \u{1F534} [Critical design check]
3142
- - [ ] \u{1F7E1} [Important design check]
3143
-
3144
- #### Accessibility (@a11y-team)
3145
- - [ ] \u{1F534} [Critical a11y check]
3146
- - [ ] \u{1F7E1} [Important a11y check]
3147
-
3148
- #### Mobile Experience (@qa-team)
3149
- - [ ] \u{1F534} [Critical mobile check]
3150
- - [ ] \u{1F7E1} [Important mobile check]
3151
-
3152
- ---
3153
- *Legend: \u{1F534} Critical \u2022 \u{1F7E1} Important \u2022 \u{1F7E2} Nice-to-have*
3154
-
3155
- ### Test Artifacts
3156
- - [Full HTML Report](playwright-report/index.html)
3157
- - [Test Traces](test-results/)
3158
-
3159
- ### Recommendation
3160
- [\u2705 **Safe to merge** - All automated tests pass, complete manual checks before release]
3161
- [\u26A0\uFE0F **Review required** - [X] bugs need attention, complete manual checks]
3162
- [\u274C **Do not merge** - Critical failures must be resolved first]
3163
-
3164
- ---
3165
- *\u{1F916} Automated by Bugzy \u2022 [View Test Code](tests/specs/) \u2022 [Manual Test Cases](test-cases/)*
3166
- \`\`\`
1605
+ [Safe to merge | Review bugs before merging | Do not merge]
1606
+ \`\`\``
1607
+ },
1608
+ // Step 15: Documentation Research (conditional inline)
1609
+ {
1610
+ inline: true,
1611
+ title: "Understanding the Change (Documentation Research)",
1612
+ content: `{{INVOKE_DOCUMENTATION_RESEARCHER}} to gather comprehensive context about the changed features:
3167
1613
 
3168
- **Post comment via GitHub API:**
3169
- - Endpoint: \`POST /repos/{owner}/{repo}/issues/{pr_number}/comments\`
3170
- - Use GitHub MCP or bash with \`gh\` CLI
3171
- - Requires GITHUB_TOKEN from environment
1614
+ Explore project documentation related to the changes.
3172
1615
 
3173
- ### 6.4 CI_CD Trigger \u2192 Build Log + PR Comment
1616
+ Specifically gather:
1617
+ - Product specifications for affected features
1618
+ - User stories and acceptance criteria
1619
+ - Technical architecture documentation
1620
+ - API endpoints and contracts
1621
+ - User roles and permissions relevant to the change
1622
+ - Business rules and validations
1623
+ - UI/UX specifications
1624
+ - Known limitations or constraints
1625
+ - Related bug reports or known issues
1626
+ - Existing test documentation for this area
3174
1627
 
3175
- **Output to CI build log:**
3176
- - Print detailed results to stdout (captured by CI)
3177
- - Use ANSI colors if supported by CI platform
3178
- - Same format as MANUAL terminal output
1628
+ The agent will:
1629
+ 1. Check its memory for previously discovered documentation
1630
+ 2. Explore workspace for relevant pages and databases
1631
+ 3. Build comprehensive understanding of the affected features
1632
+ 4. Return synthesized information to inform testing strategy
3179
1633
 
3180
- **Exit with appropriate code:**
1634
+ Use this information to:
1635
+ - Better understand the change context
1636
+ - Identify comprehensive test scenarios
1637
+ - Recognize integration points and dependencies
1638
+ - Spot potential edge cases or risk areas
1639
+ - Enhance manual verification checklist generation`,
1640
+ conditionalOnSubagent: "documentation-researcher"
1641
+ },
1642
+ // Step 16: Report Results (inline)
1643
+ {
1644
+ inline: true,
1645
+ title: "Report Results (Multi-Channel Output)",
1646
+ content: `Route output based on trigger source:
1647
+
1648
+ ### MANUAL Trigger -> Terminal Output
1649
+
1650
+ Format as comprehensive markdown report for terminal display with:
1651
+ - Change Summary (what changed, scope, affected files)
1652
+ - Automated Test Results (statistics, tests fixed, bugs logged)
1653
+ - Manual Verification Checklist
1654
+ - Recommendation (safe to merge / review / do not merge)
1655
+ - Test Artifacts (JSON report, HTML report, traces, screenshots)
1656
+
1657
+ ### SLACK_MESSAGE Trigger -> Thread Reply
1658
+
1659
+ {{INVOKE_TEAM_COMMUNICATOR}} to post concise results to Slack thread with:
1660
+ - Verification results summary
1661
+ - Critical failures that need immediate attention
1662
+ - Bugs logged with issue tracker links
1663
+ - Manual verification checklist summary
1664
+ - Recommendation and next steps
1665
+ - Tag relevant team members for critical issues
1666
+
1667
+ ### GITHUB_PR Trigger -> PR Comment
1668
+
1669
+ Use GitHub API to post comprehensive comment on PR with:
1670
+ - Status (All tests passed / Issues found / Critical failures)
1671
+ - Automated Tests table (Total, Passed, Failed, Fixed, Bugs, Duration)
1672
+ - Failed Tests (triaged and with actions taken)
1673
+ - Tests Fixed Automatically (issue, fix, verified)
1674
+ - Product Bugs Logged (issue ID, title, test, severity)
1675
+ - Manual Verification Required (checklist)
1676
+ - Test Artifacts links
1677
+ - Recommendation
1678
+
1679
+ ### CI_CD Trigger -> Build Log + PR Comment
1680
+
1681
+ Output to CI build log (print detailed results to stdout) and exit with appropriate code:
3181
1682
  - Exit 0: All tests passed (safe to merge)
3182
1683
  - Exit 1: Tests failed or critical bugs found (block merge)
3183
1684
 
3184
- **Post PR comment if GitHub context available:**
3185
- - Check for PR number in CI environment
3186
- - If available: Post comment using 6.3 format
3187
- - Also notify team via Slack if critical failures
3188
-
3189
- ## Additional Steps
3190
-
3191
- ### Handle Special Cases
3192
-
3193
- **If no tests found for changed files:**
1685
+ Post PR comment if GitHub context available.`,
1686
+ conditionalOnSubagent: "team-communicator"
1687
+ },
1688
+ // Step 17: Knowledge Base Update (library)
1689
+ "update-knowledge-base",
1690
+ // Step 18: Handle Special Cases (inline)
1691
+ {
1692
+ inline: true,
1693
+ title: "Handle Special Cases",
1694
+ content: `**If no tests found for changed files:**
3194
1695
  - Inform user: "No automated tests found for changed files"
3195
1696
  - Recommend: "Run smoke test suite for basic validation"
3196
1697
  - Still generate manual verification checklist
@@ -3204,8 +1705,6 @@ The following scenarios require human verification before release:
3204
1705
  - Suggest troubleshooting steps
3205
1706
  - Don't proceed with triage if tests didn't run
3206
1707
 
3207
- ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
3208
-
3209
1708
  ## Important Notes
3210
1709
 
3211
1710
  - This task handles **all trigger sources** with a single unified workflow
@@ -3216,162 +1715,172 @@ ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
3216
1715
  - Product bugs are logged with **automatic duplicate detection**
3217
1716
  - Test issues are fixed automatically with **verification**
3218
1717
  - Results include both automated and manual verification items
3219
- - For best results, ensure:
3220
- - Playwright is installed (\`npx playwright install\`)
3221
- - Environment variables configured (copy \`.env.testdata\` to \`.env\`)
3222
- - GitHub token available for PR comments (if GitHub trigger)
3223
- - Slack integration configured (if Slack trigger)
3224
- - Issue tracker configured (Linear, Jira, etc.)
3225
1718
 
3226
1719
  ## Success Criteria
3227
1720
 
3228
1721
  A successful verification includes:
3229
- 1. \u2705 Trigger source correctly detected
3230
- 2. \u2705 Context extracted completely
3231
- 3. \u2705 Tests executed (or skipped with explanation)
3232
- 4. \u2705 All failures triaged (product bug vs test issue)
3233
- 5. \u2705 Test issues fixed automatically (when possible)
3234
- 6. \u2705 Product bugs logged to issue tracker
3235
- 7. \u2705 Manual verification checklist generated
3236
- 8. \u2705 Results formatted for output channel
3237
- 9. \u2705 Results delivered to appropriate destination
3238
- 10. \u2705 Clear recommendation provided (merge / review / block)`,
3239
- optionalSubagents: [
3240
- {
3241
- role: "documentation-researcher",
3242
- contentBlock: `#### Research Project Documentation
3243
-
3244
- {{INVOKE_DOCUMENTATION_RESEARCHER}} to gather comprehensive context about the changed features:
3245
-
3246
- \`\`\`
3247
- Explore project documentation related to the changes.
3248
-
3249
- Specifically gather:
3250
- - Product specifications for affected features
3251
- - User stories and acceptance criteria
3252
- - Technical architecture documentation
3253
- - API endpoints and contracts
3254
- - User roles and permissions relevant to the change
3255
- - Business rules and validations
3256
- - UI/UX specifications
3257
- - Known limitations or constraints
3258
- - Related bug reports or known issues
3259
- - Existing test documentation for this area
3260
- \`\`\`
3261
-
3262
- The agent will:
3263
- 1. Check its memory for previously discovered documentation
3264
- 2. Explore workspace for relevant pages and databases
3265
- 3. Build comprehensive understanding of the affected features
3266
- 4. Return synthesized information to inform testing strategy
1722
+ 1. Trigger source correctly detected
1723
+ 2. Context extracted completely
1724
+ 3. Tests executed (or skipped with explanation)
1725
+ 4. All failures triaged (product bug vs test issue)
1726
+ 5. Test issues fixed automatically (when possible)
1727
+ 6. Product bugs logged to issue tracker
1728
+ 7. Manual verification checklist generated
1729
+ 8. Results formatted for output channel
1730
+ 9. Results delivered to appropriate destination
1731
+ 10. Clear recommendation provided (merge / review / block)`
1732
+ }
1733
+ ],
1734
+ requiredSubagents: ["test-runner", "test-debugger-fixer"],
1735
+ optionalSubagents: ["documentation-researcher", "issue-tracker", "team-communicator"],
1736
+ dependentTasks: []
1737
+ };
3267
1738
 
3268
- Use this information to:
3269
- - Better understand the change context
3270
- - Identify comprehensive test scenarios
3271
- - Recognize integration points and dependencies
3272
- - Spot potential edge cases or risk areas
3273
- - Enhance manual verification checklist generation`
1739
+ // src/tasks/library/onboard-testing.ts
1740
+ var onboardTestingTask = {
1741
+ slug: TASK_SLUGS.ONBOARD_TESTING,
1742
+ name: "Onboard Testing",
1743
+ description: "Complete workflow: explore application, generate test plan, create test cases, run tests, fix issues, and report results",
1744
+ frontmatter: {
1745
+ description: "Complete test coverage workflow - from exploration to passing tests",
1746
+ "argument-hint": "<focus-area-or-feature-description>"
1747
+ },
1748
+ steps: [
1749
+ // Step 1: Overview (inline)
1750
+ {
1751
+ inline: true,
1752
+ title: "Onboard Testing Overview",
1753
+ content: `## Overview
1754
+
1755
+ This command orchestrates the complete test coverage workflow in a single execution:
1756
+ 1. **Phase 1**: Read project context and explore application
1757
+ 2. **Phase 2**: Generate lightweight test plan
1758
+ 3. **Phase 3**: Generate and verify test cases (create + fix until passing)
1759
+ 4. **Phase 4**: Triage failures and fix test issues
1760
+ 5. **Phase 5**: Log product bugs
1761
+ 6. **Phase 6**: Final report`
3274
1762
  },
1763
+ // Step 2: Security Notice (from library)
1764
+ "security-notice",
1765
+ // Step 3: Arguments (inline)
3275
1766
  {
3276
- role: "issue-tracker",
3277
- contentBlock: `#### Log Product Bugs
3278
-
3279
- For tests classified as **[PRODUCT BUG]**, {{INVOKE_ISSUE_TRACKER}} to log bugs:
3280
-
3281
- \`\`\`
3282
- 1. Check for duplicate bugs in the tracking system
3283
- - The agent will automatically search for similar existing issues
3284
- - It maintains memory of recently reported issues
3285
- - Duplicate detection happens automatically - don't create manual checks
3286
-
3287
- 2. For each new bug (non-duplicate):
3288
- Create detailed bug report with:
3289
- - **Title**: Clear, descriptive summary (e.g., "Login button fails with timeout on checkout page")
3290
- - **Description**:
3291
- - What happened vs. what was expected
3292
- - Impact on users
3293
- - Test reference: [file path] \u203A [test title]
3294
- - **Reproduction Steps**:
3295
- - List steps from the failing test
3296
- - Include specific test data used
3297
- - Note any setup requirements from test file
3298
- - **Test Execution Details**:
3299
- - Test file: [file path from JSON report]
3300
- - Test name: [test title from JSON report]
3301
- - Error message: [from JSON report]
3302
- - Stack trace: [from JSON report]
3303
- - Trace file: [path if available]
3304
- - Screenshots: [paths if available]
3305
- - **Environment Details**:
3306
- - Browser and version (from Playwright config)
3307
- - Test environment URL (from .env.testdata BASE_URL)
3308
- - Timestamp of failure
3309
- - **Severity/Priority**: Based on:
3310
- - Test type (smoke tests = high priority)
3311
- - User impact
3312
- - Frequency (always fails vs flaky)
3313
- - **Additional Context**:
3314
- - Error messages or stack traces from JSON report
3315
- - Related test files (if part of test suite)
3316
- - Relevant knowledge from knowledge-base.md
3317
-
3318
- 3. Track created issues:
3319
- - Note the issue ID/number returned
3320
- - Update issue tracker memory with new bugs
3321
- - Prepare issue references for team communication
3322
- \`\`\`
3323
-
3324
- **Note**: The issue tracker agent handles all duplicate detection and system integration automatically. Simply provide the bug details and let it manage the rest.`
1767
+ inline: true,
1768
+ title: "Arguments",
1769
+ content: `Focus area: $ARGUMENTS`
3325
1770
  },
1771
+ // Phase 1: Setup
1772
+ "read-knowledge-base",
1773
+ // Phase 2: Exploration Protocol
1774
+ "exploration-protocol",
1775
+ // Execute exploration via test-runner
1776
+ "create-exploration-test-case",
1777
+ "run-exploration",
1778
+ "process-exploration-results",
1779
+ // Phase 3: Test Plan Generation
1780
+ "generate-test-plan",
1781
+ "extract-env-variables",
1782
+ // Phase 4: Test Case Generation
1783
+ "generate-test-cases",
1784
+ "automate-test-cases",
1785
+ // Phase 5: Test Execution
1786
+ "run-playwright-tests",
1787
+ "parse-test-results",
1788
+ // Phase 6: Triage and Fix (NEW - was missing from full-test-coverage)
1789
+ "triage-failures",
1790
+ "fix-test-issues",
3326
1791
  {
3327
- role: "team-communicator",
3328
- contentBlock: `#### Team Communication
3329
-
3330
- {{INVOKE_TEAM_COMMUNICATOR}} to share verification results (primarily for Slack trigger, but can be used for other triggers):
3331
-
3332
- \`\`\`
3333
- 1. Post verification results summary
3334
- 2. Highlight critical failures that need immediate attention
3335
- 3. Share bugs logged with issue tracker links
3336
- 4. Provide manual verification checklist summary
3337
- 5. Recommend next steps based on results
3338
- 6. Tag relevant team members for critical issues
3339
- 7. Use appropriate urgency level based on failure severity
3340
- \`\`\`
1792
+ stepId: "log-product-bugs",
1793
+ conditionalOnSubagent: "issue-tracker"
1794
+ },
1795
+ // Phase 7: Reporting and Communication
1796
+ "update-knowledge-base",
1797
+ {
1798
+ stepId: "notify-team",
1799
+ conditionalOnSubagent: "team-communicator"
1800
+ },
1801
+ "generate-final-report"
1802
+ ],
1803
+ requiredSubagents: ["test-runner", "test-code-generator", "test-debugger-fixer"],
1804
+ optionalSubagents: ["documentation-researcher", "team-communicator", "issue-tracker"],
1805
+ dependentTasks: ["run-tests", "generate-test-cases"]
1806
+ };
3341
1807
 
3342
- The team communication should include:
3343
- - **Execution summary**: Overall pass/fail statistics and timing
3344
- - **Tests fixed**: Count of test issues fixed automatically
3345
- - **Bugs logged**: Product bugs reported to issue tracker
3346
- - **Manual checklist**: Summary of manual verification items
3347
- - **Recommendation**: Safe to merge / Review required / Do not merge
3348
- - **Test artifacts**: Links to reports, traces, screenshots
3349
-
3350
- **Communication strategy based on trigger**:
3351
- - **Slack**: Post concise message with expandable details in thread
3352
- - **Manual**: Full detailed report in terminal
3353
- - **GitHub PR**: Comprehensive PR comment with tables and checklists
3354
- - **CI/CD**: Build log output + optional Slack notification for critical failures
3355
-
3356
- **Update team communicator memory**:
3357
- - Record verification communication
3358
- - Track response patterns by trigger type
3359
- - Document team preferences for detail level
3360
- - Note which team members respond to which types of issues`
3361
- }
1808
+ // src/tasks/library/explore-application.ts
1809
+ var exploreApplicationTask = {
1810
+ slug: TASK_SLUGS.EXPLORE_APPLICATION,
1811
+ name: "Explore Application",
1812
+ description: "Systematically explore application to discover UI elements, workflows, and behaviors",
1813
+ frontmatter: {
1814
+ description: "Explore application to discover UI, workflows, and behaviors",
1815
+ "argument-hint": "--focus [area] --depth [shallow|deep] --system [name]"
1816
+ },
1817
+ steps: [
1818
+ // Step 1: Overview (inline)
1819
+ {
1820
+ inline: true,
1821
+ title: "Explore Application Overview",
1822
+ content: `Discover actual UI elements, workflows, and behaviors using the test-runner agent. Updates test plan and project documentation with findings.`
1823
+ },
1824
+ // Step 2: Security Notice (from library)
1825
+ "security-notice",
1826
+ // Step 3: Arguments (inline)
1827
+ {
1828
+ inline: true,
1829
+ title: "Arguments",
1830
+ content: `**Arguments**: $ARGUMENTS
1831
+
1832
+ **Parse:**
1833
+ - **focus**: auth, navigation, search, content, admin (default: comprehensive)
1834
+ - **depth**: shallow (15-20 min) or deep (45-60 min, default)
1835
+ - **system**: target system (optional for multi-system setups)`
1836
+ },
1837
+ // Setup
1838
+ "read-knowledge-base",
1839
+ "load-project-context",
1840
+ // Exploration Protocol (adaptive depth)
1841
+ "exploration-protocol",
1842
+ // Execute
1843
+ "create-exploration-test-case",
1844
+ "run-exploration",
1845
+ "process-exploration-results",
1846
+ // Update
1847
+ "update-exploration-artifacts",
1848
+ // Team Communication (conditional inline)
1849
+ {
1850
+ inline: true,
1851
+ title: "Team Communication",
1852
+ content: `{{INVOKE_TEAM_COMMUNICATOR}} to notify the product team about exploration findings:
1853
+
1854
+ \`\`\`
1855
+ 1. Post an update about exploration completion
1856
+ 2. Summarize key discoveries:
1857
+ - UI elements and workflows identified
1858
+ - Behaviors documented
1859
+ - Areas needing further investigation
1860
+ 3. Share exploration report location
1861
+ 4. Ask for team feedback on findings
1862
+ 5. Use appropriate channel and threading
1863
+ \`\`\``,
1864
+ conditionalOnSubagent: "team-communicator"
1865
+ },
1866
+ "cleanup-temp-files",
1867
+ "update-knowledge-base"
3362
1868
  ],
3363
- requiredSubagents: ["test-runner", "test-debugger-fixer"]
1869
+ requiredSubagents: ["test-runner"],
1870
+ optionalSubagents: ["team-communicator"],
1871
+ dependentTasks: []
3364
1872
  };
3365
1873
 
3366
1874
  // src/tasks/index.ts
3367
1875
  var TASK_TEMPLATES = {
3368
- [TASK_SLUGS.EXPLORE_APPLICATION]: exploreApplicationTask,
3369
1876
  [TASK_SLUGS.GENERATE_TEST_CASES]: generateTestCasesTask,
3370
1877
  [TASK_SLUGS.GENERATE_TEST_PLAN]: generateTestPlanTask,
3371
1878
  [TASK_SLUGS.HANDLE_MESSAGE]: handleMessageTask,
3372
1879
  [TASK_SLUGS.PROCESS_EVENT]: processEventTask,
3373
1880
  [TASK_SLUGS.RUN_TESTS]: runTestsTask,
3374
- [TASK_SLUGS.VERIFY_CHANGES]: verifyChangesTask
1881
+ [TASK_SLUGS.VERIFY_CHANGES]: verifyChangesTask,
1882
+ [TASK_SLUGS.ONBOARD_TESTING]: onboardTestingTask,
1883
+ [TASK_SLUGS.EXPLORE_APPLICATION]: exploreApplicationTask
3375
1884
  };
3376
1885
  function getTaskTemplate(slug) {
3377
1886
  return TASK_TEMPLATES[slug];
@@ -3382,49 +1891,13 @@ function getAllTaskSlugs() {
3382
1891
  function isTaskRegistered(slug) {
3383
1892
  return TASK_TEMPLATES[slug] !== void 0;
3384
1893
  }
3385
- function buildSlashCommandsConfig(slugs) {
3386
- const configs = {};
3387
- for (const slug of slugs) {
3388
- const task = TASK_TEMPLATES[slug];
3389
- if (!task) {
3390
- console.warn(`Unknown task slug: ${slug}, skipping`);
3391
- continue;
3392
- }
3393
- configs[slug] = {
3394
- frontmatter: task.frontmatter,
3395
- content: task.baseContent
3396
- };
3397
- console.log(`\u2713 Added slash command: /${slug}`);
3398
- }
3399
- return configs;
3400
- }
3401
- function getRequiredMCPsFromTasks(slugs) {
3402
- const mcps = /* @__PURE__ */ new Set();
3403
- for (const slug of slugs) {
3404
- const task = TASK_TEMPLATES[slug];
3405
- if (!task) continue;
3406
- for (const subagent of task.requiredSubagents) {
3407
- const mcpMap = {
3408
- "test-runner": "playwright",
3409
- "team-communicator": "slack",
3410
- "documentation-researcher": "notion",
3411
- "issue-tracker": "linear"
3412
- };
3413
- const mcp = mcpMap[subagent];
3414
- if (mcp) {
3415
- mcps.add(mcp);
3416
- }
3417
- }
3418
- }
3419
- return Array.from(mcps);
3420
- }
3421
1894
  export {
3422
1895
  TASK_SLUGS,
3423
1896
  TASK_TEMPLATES,
3424
- buildSlashCommandsConfig,
3425
1897
  getAllTaskSlugs,
3426
- getRequiredMCPsFromTasks,
3427
1898
  getTaskTemplate,
1899
+ isInlineStep,
1900
+ isStepReferenceObject,
3428
1901
  isTaskRegistered
3429
1902
  };
3430
1903
  //# sourceMappingURL=index.js.map