@bugzy-ai/bugzy 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +10 -7
  2. package/dist/cli/index.cjs +6168 -5848
  3. package/dist/cli/index.cjs.map +1 -1
  4. package/dist/cli/index.js +6168 -5848
  5. package/dist/cli/index.js.map +1 -1
  6. package/dist/index.cjs +5563 -5302
  7. package/dist/index.cjs.map +1 -1
  8. package/dist/index.d.cts +5 -4
  9. package/dist/index.d.ts +5 -4
  10. package/dist/index.js +5560 -5300
  11. package/dist/index.js.map +1 -1
  12. package/dist/subagents/index.cjs +368 -51
  13. package/dist/subagents/index.cjs.map +1 -1
  14. package/dist/subagents/index.js +368 -51
  15. package/dist/subagents/index.js.map +1 -1
  16. package/dist/subagents/metadata.cjs +10 -2
  17. package/dist/subagents/metadata.cjs.map +1 -1
  18. package/dist/subagents/metadata.js +10 -2
  19. package/dist/subagents/metadata.js.map +1 -1
  20. package/dist/tasks/index.cjs +864 -2391
  21. package/dist/tasks/index.cjs.map +1 -1
  22. package/dist/tasks/index.d.cts +48 -5
  23. package/dist/tasks/index.d.ts +48 -5
  24. package/dist/tasks/index.js +862 -2389
  25. package/dist/tasks/index.js.map +1 -1
  26. package/dist/templates/init/.bugzy/runtime/knowledge-base.md +61 -0
  27. package/dist/templates/init/.bugzy/runtime/knowledge-maintenance-guide.md +97 -0
  28. package/dist/templates/init/.bugzy/runtime/subagent-memory-guide.md +87 -0
  29. package/dist/templates/init/.bugzy/runtime/templates/test-plan-template.md +41 -16
  30. package/dist/templates/init/.bugzy/runtime/templates/test-result-schema.md +498 -0
  31. package/dist/templates/init/.bugzy/runtime/test-execution-strategy.md +535 -0
  32. package/dist/templates/init/.bugzy/runtime/testing-best-practices.md +368 -14
  33. package/dist/templates/init/.gitignore-template +23 -2
  34. package/package.json +1 -1
  35. package/templates/init/.bugzy/runtime/templates/test-plan-template.md +41 -16
  36. package/templates/init/.env.testdata +18 -0
@@ -22,800 +22,36 @@ var tasks_exports = {};
22
22
  __export(tasks_exports, {
23
23
  TASK_SLUGS: () => TASK_SLUGS,
24
24
  TASK_TEMPLATES: () => TASK_TEMPLATES,
25
- buildSlashCommandsConfig: () => buildSlashCommandsConfig,
26
25
  getAllTaskSlugs: () => getAllTaskSlugs,
27
- getRequiredMCPsFromTasks: () => getRequiredMCPsFromTasks,
28
26
  getTaskTemplate: () => getTaskTemplate,
27
+ isInlineStep: () => isInlineStep,
28
+ isStepReferenceObject: () => isStepReferenceObject,
29
29
  isTaskRegistered: () => isTaskRegistered
30
30
  });
31
31
  module.exports = __toCommonJS(tasks_exports);
32
32
 
33
+ // src/tasks/steps/types.ts
34
+ function isInlineStep(ref) {
35
+ return typeof ref === "object" && "inline" in ref && ref.inline === true;
36
+ }
37
+ function isStepReferenceObject(ref) {
38
+ return typeof ref === "object" && "stepId" in ref;
39
+ }
40
+
33
41
  // src/tasks/constants.ts
34
42
  var TASK_SLUGS = {
35
43
  EXPLORE_APPLICATION: "explore-application",
44
+ ONBOARD_TESTING: "onboard-testing",
36
45
  GENERATE_TEST_CASES: "generate-test-cases",
37
46
  GENERATE_TEST_PLAN: "generate-test-plan",
38
47
  HANDLE_MESSAGE: "handle-message",
39
48
  PROCESS_EVENT: "process-event",
40
49
  RUN_TESTS: "run-tests",
41
- VERIFY_CHANGES: "verify-changes"
50
+ VERIFY_CHANGES: "verify-changes",
51
+ /** @deprecated Use ONBOARD_TESTING instead */
52
+ FULL_TEST_COVERAGE: "onboard-testing"
42
53
  };
43
54
 
44
- // src/tasks/templates/exploration-instructions.ts
45
- var EXPLORATION_INSTRUCTIONS = `
46
- ## Exploratory Testing Protocol
47
-
48
- Before creating or running formal tests, perform exploratory testing to validate requirements and understand actual system behavior. The depth of exploration should adapt to the clarity of requirements.
49
-
50
- ### Step {{STEP_NUMBER}}.1: Assess Requirement Clarity
51
-
52
- Determine exploration depth based on requirement quality:
53
-
54
- | Clarity | Indicators | Exploration Depth | Goal |
55
- |---------|-----------|-------------------|------|
56
- | **Clear** | Detailed acceptance criteria, screenshots/mockups, specific field names/URLs/roles, unambiguous behavior, consistent patterns | Quick (1-2 min) | Confirm feature exists, capture evidence |
57
- | **Vague** | General direction clear but specifics missing, incomplete examples, assumed details, relative terms ("fix", "better") | Moderate (3-5 min) | Document current behavior, identify ambiguities, generate clarification questions |
58
- | **Unclear** | Contradictory info, multiple interpretations, no examples/criteria, ambiguous scope ("the page"), critical details missing | Deep (5-10 min) | Systematically test scenarios, document patterns, identify all ambiguities, formulate comprehensive questions |
59
-
60
- **Examples:**
61
- - **Clear:** "Change 'Submit' button from blue (#007BFF) to green (#28A745) on /auth/login. Verify hover effect."
62
- - **Vague:** "Fix the sorting in todo list page. The items are mixed up for premium users."
63
- - **Unclear:** "Improve the dashboard performance. Users say it's slow."
64
-
65
- ### Step {{STEP_NUMBER}}.2: Quick Exploration (1-2 min)
66
-
67
- **When:** Requirements CLEAR
68
-
69
- **Steps:**
70
- 1. Navigate to feature (use provided URL), verify loads without errors
71
- 2. Verify key elements exist (buttons, fields, sections mentioned)
72
- 3. Capture screenshot of initial state
73
- 4. Document:
74
- \`\`\`markdown
75
- **Quick Exploration (1 min)**
76
- Feature: [Name] | URL: [Path]
77
- Status: \u2705 Accessible / \u274C Not found / \u26A0\uFE0F Different
78
- Screenshot: [filename]
79
- Notes: [Immediate observations]
80
- \`\`\`
81
- 5. **Decision:** \u2705 Matches \u2192 Test creation | \u274C/\u26A0\uFE0F Doesn't match \u2192 Moderate Exploration
82
-
83
- **Time Limit:** 1-2 minutes
84
-
85
- ### Step {{STEP_NUMBER}}.3: Moderate Exploration (3-5 min)
86
-
87
- **When:** Requirements VAGUE or Quick Exploration revealed discrepancies
88
-
89
- **Steps:**
90
- 1. Navigate using appropriate role(s), set up preconditions, ensure clean state
91
- 2. Test primary user flow, document steps and behavior, note unexpected behavior
92
- 3. Capture before/after screenshots, document field values/ordering/visibility
93
- 4. Compare to requirement: What matches? What differs? What's absent?
94
- 5. Identify specific ambiguities:
95
- \`\`\`markdown
96
- **Moderate Exploration (4 min)**
97
-
98
- **Explored:** Role: [Admin], Path: [Steps], Behavior: [What happened]
99
-
100
- **Current State:** [Specific observations with examples]
101
- - Example: "Admin view shows 8 sort options: By Title, By Due Date, By Priority..."
102
-
103
- **Requirement Says:** [What requirement expected]
104
-
105
- **Discrepancies:** [Specific differences]
106
- - Example: "Premium users see 5 fewer sorting options than admins"
107
-
108
- **Ambiguities:**
109
- 1. [First ambiguity with concrete example]
110
- 2. [Second if applicable]
111
-
112
- **Clarification Needed:** [Specific questions]
113
- \`\`\`
114
- 6. Assess severity using Clarification Protocol
115
- 7. **Decision:** \u{1F7E2} Minor \u2192 Proceed with assumptions | \u{1F7E1} Medium \u2192 Async clarification, proceed | \u{1F534} Critical \u2192 Stop, escalate
116
-
117
- **Time Limit:** 3-5 minutes
118
-
119
- ### Step {{STEP_NUMBER}}.4: Deep Exploration (5-10 min)
120
-
121
- **When:** Requirements UNCLEAR or critical ambiguities found
122
-
123
- **Steps:**
124
- 1. **Define Exploration Matrix:** Identify dimensions (user roles, feature states, input variations, browsers)
125
-
126
- 2. **Systematic Testing:** Test each matrix cell methodically
127
- \`\`\`
128
- Example for "Todo List Sorting":
129
- Matrix: User Roles \xD7 Feature Observations
130
-
131
- Test 1: Admin Role \u2192 Navigate, document sort options (count, names, order), screenshot
132
- Test 2: Basic User Role \u2192 Same todo list, document options, screenshot
133
- Test 3: Compare \u2192 Side-by-side table, identify missing/reordered options
134
- \`\`\`
135
-
136
- 3. **Document Patterns:** Consistent behavior? Role-based differences? What varies vs constant?
137
-
138
- 4. **Comprehensive Report:**
139
- \`\`\`markdown
140
- **Deep Exploration (8 min)**
141
-
142
- **Matrix:** [Dimensions] | **Tests:** [X combinations]
143
-
144
- **Findings:**
145
-
146
- ### Test 1: Admin
147
- - Setup: [Preconditions] | Steps: [Actions]
148
- - Observations: Sort options=8, Options=[list], Ordering=[sequence]
149
- - Screenshot: [filename-admin.png]
150
-
151
- ### Test 2: Basic User
152
- - Setup: [Preconditions] | Steps: [Actions]
153
- - Observations: Sort options=3, Missing vs Admin=[5 options], Ordering=[sequence]
154
- - Screenshot: [filename-user.png]
155
-
156
- **Comparison Table:**
157
- | Sort Option | Admin Pos | User Pos | Notes |
158
- |-------------|-----------|----------|-------|
159
- | By Title | 1 | 1 | Match |
160
- | By Priority | 3 | Not visible | Missing |
161
-
162
- **Patterns:**
163
- - Role-based feature visibility
164
- - Consistent relative ordering for visible fields
165
-
166
- **Critical Ambiguities:**
167
- 1. Option Visibility: Intentional basic users see 5 fewer sort options?
168
- 2. Sort Definition: (A) All roles see all options in same order, OR (B) Roles see permitted options in same relative order?
169
-
170
- **Clarification Questions:** [Specific, concrete based on findings]
171
- \`\`\`
172
-
173
- 5. **Next Action:** Critical ambiguities \u2192 STOP, clarify | Patterns suggest answer \u2192 Validate assumption | Behavior clear \u2192 Test creation
174
-
175
- **Time Limit:** 5-10 minutes
176
-
177
- ### Step {{STEP_NUMBER}}.5: Link Exploration to Clarification
178
-
179
- **Flow:** Requirement Analysis \u2192 Exploration \u2192 Clarification
180
-
181
- 1. Requirement analysis detects vague language \u2192 Triggers exploration
182
- 2. Exploration documents current behavior \u2192 Identifies discrepancies
183
- 3. Clarification uses findings \u2192 Asks specific questions referencing observations
184
-
185
- **Example:**
186
- \`\`\`
187
- "Fix the sorting in todo list"
188
- \u2193 Ambiguity: "sorting" = by date, priority, or completion status?
189
- \u2193 Moderate Exploration: Admin=8 sort options, User=3 sort options
190
- \u2193 Question: "Should basic users see all 8 sort options (bug) or only 3 with consistent sequence (correct)?"
191
- \`\`\`
192
-
193
- ### Step {{STEP_NUMBER}}.6: Document Exploration Results
194
-
195
- **Template:**
196
- \`\`\`markdown
197
- ## Exploration Summary
198
-
199
- **Date:** [YYYY-MM-DD] | **Explorer:** [Agent/User] | **Depth:** [Quick/Moderate/Deep] | **Duration:** [X min]
200
-
201
- ### Feature: [Name and description]
202
-
203
- ### Observations: [Key findings]
204
-
205
- ### Current Behavior: [What feature does today]
206
-
207
- ### Discrepancies: [Requirement vs observation differences]
208
-
209
- ### Assumptions Made: [If proceeding with assumptions]
210
-
211
- ### Artifacts: Screenshots: [list], Video: [if captured], Notes: [detailed]
212
- \`\`\`
213
-
214
- **Memory Storage:** Feature behavior patterns, common ambiguity types, resolution approaches
215
-
216
- ### Step {{STEP_NUMBER}}.7: Integration with Test Creation
217
-
218
- **Quick Exploration \u2192 Direct Test:**
219
- - Feature verified \u2192 Create test matching requirement \u2192 Reference screenshot
220
-
221
- **Moderate Exploration \u2192 Assumption-Based Test:**
222
- - Document behavior \u2192 Create test on best interpretation \u2192 Mark assumptions \u2192 Plan updates after clarification
223
-
224
- **Deep Exploration \u2192 Clarification-First:**
225
- - Block test creation until clarification \u2192 Use exploration as basis for questions \u2192 Create test after answer \u2192 Reference both exploration and clarification
226
-
227
- ---
228
-
229
- ## Adaptive Exploration Decision Tree
230
-
231
- \`\`\`
232
- Start: Requirement Received
233
- \u2193
234
- Are requirements clear with specifics?
235
- \u251C\u2500 YES \u2192 Quick Exploration (1-2 min)
236
- \u2502 \u2193
237
- \u2502 Does feature match description?
238
- \u2502 \u251C\u2500 YES \u2192 Proceed to Test Creation
239
- \u2502 \u2514\u2500 NO \u2192 Escalate to Moderate Exploration
240
- \u2502
241
- \u2514\u2500 NO \u2192 Is general direction clear but details missing?
242
- \u251C\u2500 YES \u2192 Moderate Exploration (3-5 min)
243
- \u2502 \u2193
244
- \u2502 Are ambiguities MEDIUM severity or lower?
245
- \u2502 \u251C\u2500 YES \u2192 Document assumptions, proceed with test creation
246
- \u2502 \u2514\u2500 NO \u2192 Escalate to Deep Exploration or Clarification
247
- \u2502
248
- \u2514\u2500 NO \u2192 Deep Exploration (5-10 min)
249
- \u2193
250
- Document comprehensive findings
251
- \u2193
252
- Assess ambiguity severity
253
- \u2193
254
- Seek clarification for CRITICAL/HIGH
255
- \`\`\`
256
-
257
- ---
258
-
259
- ## Remember:
260
-
261
- \u{1F50D} **Explore before assuming** | \u{1F4CA} **Concrete observations > abstract interpretation** | \u23F1\uFE0F **Adaptive depth: time \u221D uncertainty** | \u{1F3AF} **Exploration findings \u2192 specific clarifications** | \u{1F4DD} **Always document** | \u{1F517} **Link exploration \u2192 ambiguity \u2192 clarification**
262
- `;
263
-
264
- // src/tasks/templates/knowledge-base.ts
265
- var KNOWLEDGE_BASE_READ_INSTRUCTIONS = `
266
- ## Knowledge Base Context
267
-
268
- Before proceeding, read the curated knowledge base to inform your work:
269
-
270
- **Location:** \`.bugzy/runtime/knowledge-base.md\`
271
-
272
- **Purpose:** The knowledge base is a living collection of factual knowledge - what we currently know and believe to be true about this project, its patterns, and its context. This is NOT a historical log, but a curated snapshot that evolves as understanding improves.
273
-
274
- **How to Use:**
275
- 1. Read the knowledge base to understand:
276
- - Project-specific patterns and conventions
277
- - Known behaviors and system characteristics
278
- - Relevant context from past work
279
- - Documented decisions and approaches
280
-
281
- 2. Apply this knowledge to:
282
- - Make informed decisions aligned with project patterns
283
- - Avoid repeating past mistakes
284
- - Build on existing understanding
285
- - Maintain consistency with established practices
286
-
287
- **Note:** The knowledge base may not exist yet or may be empty. If it doesn't exist or is empty, proceed without this context and help build it as you work.
288
- `;
289
- var KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS = `
290
- ## Knowledge Base Maintenance
291
-
292
- After completing your work, update the knowledge base with new insights.
293
-
294
- **Location:** \`.bugzy/runtime/knowledge-base.md\`
295
-
296
- **Process:**
297
-
298
- 1. **Read the maintenance guide** at \`.bugzy/runtime/knowledge-maintenance-guide.md\` to understand when to ADD, UPDATE, or REMOVE entries and how to maintain a curated knowledge base (not an append-only log)
299
-
300
- 2. **Review the current knowledge base** to check for overlaps, contradictions, or opportunities to consolidate existing knowledge
301
-
302
- 3. **Update the knowledge base** following the maintenance guide principles: favor consolidation over addition, update rather than append, resolve contradictions immediately, and focus on quality over completeness
303
-
304
- **Remember:** Every entry should answer "Will this help someone working on this project in 6 months?"
305
- `;
306
-
307
- // src/tasks/library/explore-application.ts
308
- var exploreApplicationTask = {
309
- slug: TASK_SLUGS.EXPLORE_APPLICATION,
310
- name: "Explore Application",
311
- description: "Systematically explore application to discover UI elements, workflows, and behaviors",
312
- frontmatter: {
313
- description: "Systematically explore application to discover UI elements, workflows, and behaviors",
314
- "argument-hint": "--focus [area] --depth [shallow|deep] --system [system-name]"
315
- },
316
- baseContent: `# Explore Application Command
317
-
318
- ## SECURITY NOTICE
319
- **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
320
- - **Read \`.env.testdata\`** for non-secret environment variables (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
321
- - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
322
- - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
323
- - The \`.env\` file access is blocked by settings.json
324
-
325
- Systematically explore the application using the test-runner agent to discover actual UI elements, workflows, and behaviors. Updates test plan and project documentation with findings.
326
-
327
- ## Arguments
328
- Arguments: $ARGUMENTS
329
-
330
- ## Parse Arguments
331
- Extract the following from arguments:
332
- - **focus**: Specific area to explore (authentication, navigation, search, content, admin)
333
- - **depth**: Exploration depth - shallow (quick discovery) or deep (comprehensive) - defaults to deep
334
- - **system**: Which system to explore (optional for multi-system setups)
335
-
336
- ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
337
-
338
- ## Process
339
-
340
- ### Step 0: Understand Exploration Protocol
341
-
342
- This task implements the exploration protocol defined in the exploration-instructions template.
343
-
344
- **Purpose**: This task provides the infrastructure for systematic application exploration that is referenced by other tasks (generate-test-plan, generate-test-cases, verify-changes) when they need to explore features before proceeding.
345
-
346
- **Depth Alignment**: The depth levels in this task align with the exploration template:
347
- - **Shallow exploration (15-20 min)** implements the quick/moderate exploration from the template
348
- - **Deep exploration (45-60 min)** implements comprehensive deep exploration from the template
349
-
350
- The depth levels are extended for full application exploration compared to the focused feature exploration used in other tasks.
351
-
352
- **Full Exploration Protocol Reference**:
353
-
354
- ${EXPLORATION_INSTRUCTIONS}
355
-
356
- **Note**: This task extends the protocol for comprehensive application-wide exploration, while other tasks use abbreviated versions for targeted feature exploration.
357
-
358
- ### Step 1: Load Environment and Context
359
-
360
- #### 1.1 Check Environment Variables
361
- Read \`.env.testdata\` file to understand what variables are required:
362
- - TEST_BASE_URL or TEST_MOBILE_BASE_URL (base URL variable names)
363
- - [SYSTEM_NAME]_URL (if multi-system setup)
364
- - Authentication credential variable names for the selected system
365
- - Any test data variable names
366
-
367
- Note: The actual values will be read from the user's \`.env\` file at test execution time.
368
- Verify \`.env.testdata\` exists to understand variable structure. If it doesn't exist, notify user to create it based on test plan.
369
-
370
- #### 1.2 Read Current Test Plan
371
- Read \`test-plan.md\` to:
372
- - Identify sections marked with [TO BE EXPLORED]
373
- - Find features requiring discovery
374
- - Understand testing scope and priorities
375
-
376
- #### 1.3 Read Project Context
377
- Read \`.bugzy/runtime/project-context.md\` for:
378
- - System architecture understanding
379
- - Testing environment details
380
- - QA workflow requirements
381
-
382
- ### Step 2: Prepare Exploration Strategy
383
-
384
- Based on the arguments and context, prepare exploration instructions.
385
-
386
- #### 2.1 Focus Area Strategies
387
-
388
- **If focus is "authentication":**
389
- \`\`\`
390
- 1. Navigate to the application homepage
391
- 2. Locate and document all authentication entry points:
392
- - Login button/link location and selector
393
- - Registration option and flow
394
- - Social login options (Facebook, Google, etc.)
395
- 3. Test login flow:
396
- - Document form fields and validation
397
- - Test error states with invalid credentials
398
- - Verify successful login indicators
399
- 4. Test logout functionality:
400
- - Find logout option
401
- - Verify session termination
402
- - Check redirect behavior
403
- 5. Explore password recovery:
404
- - Locate forgot password link
405
- - Document recovery flow
406
- - Note email/SMS options
407
- 6. Check role-based access:
408
- - Identify user role indicators
409
- - Document permission differences
410
- - Test admin/moderator access if available
411
- 7. Test session persistence:
412
- - Check remember me functionality
413
- - Test timeout behavior
414
- - Verify multi-tab session handling
415
- \`\`\`
416
-
417
- **If focus is "navigation":**
418
- \`\`\`
419
- 1. Document main navigation structure:
420
- - Primary menu items and hierarchy
421
- - Mobile menu behavior
422
- - Footer navigation links
423
- 2. Map URL patterns:
424
- - Category URL structure
425
- - Parameter patterns
426
- - Deep linking support
427
- 3. Test breadcrumb navigation:
428
- - Availability on different pages
429
- - Clickability and accuracy
430
- - Mobile display
431
- 4. Explore category system:
432
- - Main categories and subcategories
433
- - Navigation between levels
434
- - Content organization
435
- 5. Document special sections:
436
- - User profiles
437
- - Admin areas
438
- - Help/Support sections
439
- 6. Test browser navigation:
440
- - Back/forward button behavior
441
- - History management
442
- - State preservation
443
- \`\`\`
444
-
445
- **If focus is "search":**
446
- \`\`\`
447
- 1. Locate search interfaces:
448
- - Main search bar
449
- - Advanced search options
450
- - Category-specific search
451
- 2. Document search features:
452
- - Autocomplete/suggestions
453
- - Search filters
454
- - Sort options
455
- 3. Test search functionality:
456
- - Special character handling
457
- - Empty/invalid queries
458
- 4. Analyze search results:
459
- - Result format and layout
460
- - Pagination
461
- - No results handling
462
- 5. Check search performance:
463
- - Response times
464
- - Result relevance
465
- - Load more/infinite scroll
466
- \`\`\`
467
-
468
- **If no focus specified:**
469
- Use comprehensive exploration covering all major areas.
470
-
471
- #### 2.2 Depth Configuration
472
-
473
- **Implementation Note**: These depths implement the exploration protocol defined in exploration-instructions.ts, extended for full application exploration.
474
-
475
- **Shallow exploration (--depth shallow):**
476
- - Quick discovery pass (15-20 minutes)
477
- - Focus on main features only
478
- - Basic screenshot capture
479
- - High-level findings
480
- - *Aligns with Quick/Moderate exploration from template*
481
-
482
- **Deep exploration (--depth deep or default):**
483
- - Comprehensive exploration (45-60 minutes)
484
- - Test edge cases and variations
485
- - Extensive screenshot documentation
486
- - Detailed technical findings
487
- - Performance observations
488
- - Accessibility notes
489
- - *Aligns with Deep exploration from template*
490
-
491
- ### Step 3: Execute Exploration
492
-
493
- #### 3.1 Create Exploration Test Case
494
- Generate a temporary exploration test case file at \`./test-cases/EXPLORATION-TEMP.md\`:
495
-
496
- \`\`\`markdown
497
- ---
498
- id: EXPLORATION-TEMP
499
- title: Application Exploration - [Focus Area or Comprehensive]
500
- type: exploratory
501
- priority: high
502
- ---
503
-
504
- ## Preconditions
505
- - Browser with cleared cookies and cache
506
- - Access to [system] environment
507
- - Credentials configured per .env.testdata template
508
-
509
- ## Test Steps
510
- [Generated exploration steps based on strategy]
511
-
512
- ## Expected Results
513
- Document all findings including:
514
- - UI element locations and selectors
515
- - Navigation patterns and URLs
516
- - Feature behaviors and workflows
517
- - Performance observations
518
- - Error states and edge cases
519
- - Screenshots of all key areas
520
- \`\`\`
521
-
522
- #### 3.2 Launch Test Runner Agent
523
- {{INVOKE_TEST_RUNNER}}
524
-
525
- Execute the exploration test case with special exploration instructions:
526
-
527
- \`\`\`
528
- Execute the exploration test case at ./test-cases/EXPLORATION-TEMP.md with focus on discovery and documentation.
529
-
530
- Special instructions for exploration mode:
531
- 1. Take screenshots of EVERY significant UI element and page
532
- 2. Document all clickable elements with their selectors
533
- 3. Note all URL patterns and parameters
534
- 4. Test variations and edge cases where possible
535
- 5. Document load times and performance observations
536
- 6. Create detailed findings report with structured data
537
- 7. Organize screenshots by functional area
538
- 8. Note any console errors or warnings
539
- 9. Document which features are accessible vs restricted
540
-
541
- Generate a comprehensive exploration report that can be used to update project documentation.
542
- \`\`\`
543
-
544
- ### Step 4: Process Exploration Results
545
-
546
- #### 4.1 Read Test Runner Output
547
- Read the generated test run files from \`./test-runs/[timestamp]/EXPLORATION-TEMP/\`:
548
- - \`findings.md\` - Main findings document
549
- - \`test-log.md\` - Detailed step execution
550
- - \`screenshots/\` - Visual documentation
551
- - \`summary.json\` - Execution summary
552
-
553
- #### 4.2 Parse and Structure Findings
554
- Extract and organize:
555
- - Discovered features and capabilities
556
- - UI element selectors and patterns
557
- - Navigation structure and URLs
558
- - Authentication flow details
559
- - Performance metrics
560
- - Technical observations
561
- - Areas requiring further investigation
562
-
563
- ### Step 5: Update Project Artifacts
564
-
565
- #### 5.1 Update Test Plan
566
- Read and update \`test-plan.md\`:
567
- - Replace [TO BE EXPLORED] markers with concrete findings
568
- - Add newly discovered features to test items
569
- - Update navigation patterns and URL structures
570
- - Document actual authentication methods
571
- - Update environment variables if new ones discovered
572
- - Refine pass/fail criteria based on actual behavior
573
-
574
- #### 5.2 Create Exploration Report
575
- Create \`./exploration-reports/[timestamp]-[focus]-exploration.md\`
576
-
577
- ### Step 6: Cleanup
578
-
579
- #### 6.1 Remove Temporary Files
580
- Delete the temporary exploration test case:
581
- \`\`\`bash
582
- rm ./test-cases/EXPLORATION-TEMP.md
583
- \`\`\`
584
-
585
- ### Step 7: Generate Summary Report
586
- Create a concise summary for the user
587
-
588
- ## Error Handling
589
-
590
- ### Environment Issues
591
- - If \`.env.testdata\` missing: Warn user and suggest creating it from test plan
592
- - If credentials invalid (at runtime): Document in report and continue with public areas
593
- - If system unreachable: Retry with exponential backoff, report if persistent
594
-
595
- ### Exploration Failures
596
- - If test-runner fails: Capture partial results and report
597
- - If specific area inaccessible: Note in findings and continue
598
- - If browser crashes: Attempt recovery and resume
599
- - If test-runner stops, but does not create files, inspect what it did and if it was not enough remove the test-run and start the test-runner agent again. If it has enough info, continue with what you have.
600
-
601
- ### Data Issues
602
- - If dynamic content prevents exploration: Note and try alternative approaches
603
- - If rate limited: Implement delays and retry
604
-
605
- ## Integration with Other Commands
606
-
607
- ### Feeds into /generate-test-cases
608
- - Provides actual UI elements for test steps
609
- - Documents real workflows for test scenarios
610
- - Identifies edge cases to test
611
-
612
- ### Updates from /process-event
613
- - New exploration findings can be processed as events
614
- - Discovered bugs trigger issue creation
615
- - Feature discoveries update test coverage
616
-
617
- ### Enhances /run-tests
618
- - Tests use discovered selectors
619
- - Validation based on actual behavior
620
- - More reliable test execution
621
-
622
- ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}`,
623
- optionalSubagents: [],
624
- requiredSubagents: ["test-runner"]
625
- };
626
-
627
- // src/tasks/templates/clarification-instructions.ts
628
- var CLARIFICATION_INSTRUCTIONS = `
629
- ## Clarification Protocol
630
-
631
- Before proceeding with test creation or execution, ensure requirements are clear and testable. Use this protocol to detect ambiguity, assess its severity, and determine the appropriate action.
632
-
633
- ### Step {{STEP_NUMBER}}.0: Check for Pending Clarification
634
-
635
- Before starting, check if this task is resuming from a blocked clarification:
636
-
637
- 1. **Check $ARGUMENTS for clarification data:**
638
- - If \`$ARGUMENTS.clarification\` exists, this task is resuming with a clarification response
639
- - Extract: \`clarification\` (the user's answer), \`originalArgs\` (original task parameters)
640
-
641
- 2. **If clarification is present:**
642
- - Read \`.bugzy/runtime/blocked-task-queue.md\`
643
- - Find and remove your task's entry from the queue (update the file)
644
- - Proceed using the clarification as if user just provided the answer
645
- - Skip ambiguity detection for the clarified aspect
646
-
647
- 3. **If no clarification in $ARGUMENTS:** Proceed normally with ambiguity detection below.
648
-
649
- ### Step {{STEP_NUMBER}}.1: Detect Ambiguity
650
-
651
- Scan for ambiguity signals:
652
-
653
- **Language:** Vague terms ("fix", "improve", "better", "like", "mixed up"), relative terms without reference ("faster", "more"), undefined scope ("the ordering", "the fields", "the page"), modal ambiguity ("should", "could" vs "must", "will")
654
-
655
- **Details:** Missing acceptance criteria (no clear PASS/FAIL), no examples/mockups, incomplete field/element lists, unclear role behavior differences, unspecified error scenarios
656
-
657
- **Interpretation:** Multiple valid interpretations, contradictory information (description vs comments), implied vs explicit requirements
658
-
659
- **Context:** No reference documentation, "RELEASE APPROVED" without criteria, quick ticket creation, assumes knowledge ("as you know...", "obviously...")
660
-
661
- **Quick Check:**
662
- - [ ] Success criteria explicitly defined? (PASS if X, FAIL if Y)
663
- - [ ] All affected elements specifically listed? (field names, URLs, roles)
664
- - [ ] Only ONE reasonable interpretation?
665
- - [ ] Examples, screenshots, or mockups provided?
666
- - [ ] Consistent with existing system patterns?
667
- - [ ] Can write test assertions without assumptions?
668
-
669
- ### Step {{STEP_NUMBER}}.2: Assess Severity
670
-
671
- If ambiguity is detected, assess its severity:
672
-
673
- | Severity | Characteristics | Examples | Action |
674
- |----------|----------------|----------|--------|
675
- | \u{1F534} **CRITICAL** | Expected behavior undefined/contradictory; test outcome unpredictable; core functionality unclear; success criteria missing; multiple interpretations = different strategies | "Fix the issue" (what issue?), "Improve performance" (which metrics?), "Fix sorting in todo list" (by date? priority? completion status?) | **STOP** - Seek clarification before proceeding |
676
- | \u{1F7E0} **HIGH** | Core underspecified but direction clear; affects majority of scenarios; vague success criteria; assumptions risky | "Fix ordering" (sequence OR visibility?), "Add validation" (what? messages?), "Update dashboard" (which widgets?) | **STOP** - Seek clarification before proceeding |
677
- | \u{1F7E1} **MEDIUM** | Specific details missing; general requirements clear; affects subset of cases; reasonable low-risk assumptions possible; wrong assumption = test updates not strategy overhaul | Missing field labels, unclear error message text, undefined timeouts, button placement not specified, date formats unclear | **PROCEED** - (1) Moderate exploration, (2) Document assumptions: "Assuming X because Y", (3) Proceed with creation/execution, (4) Async clarification (team-communicator), (5) Mark [ASSUMED: description] |
678
- | \u{1F7E2} **LOW** | Minor edge cases; documentation gaps don't affect execution; optional/cosmetic elements; minimal impact | Tooltip text, optional field validation, icon choice, placeholder text, tab order | **PROCEED** - (1) Mark [TO BE CLARIFIED: description], (2) Proceed, (3) Mention in report "Minor Details", (4) No blocking/async clarification |
679
-
680
- ### Step {{STEP_NUMBER}}.3: Check Memory for Similar Clarifications
681
-
682
- Before asking, check if similar question was answered:
683
-
684
- **Process:**
685
- 1. **Query team-communicator memory** - Search by feature name, ambiguity pattern, ticket keywords
686
- 2. **Review past Q&A** - Similar question asked? What was answer? Applicable now?
687
- 3. **Assess reusability:**
688
- - Directly applicable \u2192 Use answer, no re-ask
689
- - Partially applicable \u2192 Adapt and reference ("Previously for X, clarified Y. Same here?")
690
- - Not applicable \u2192 Ask as new
691
- 4. **Update memory** - Store Q&A with task type, feature, pattern tags
692
-
693
- **Example:** Query "todo sorting priority" \u2192 Found 2025-01-15: "Should completed todos appear in main list?" \u2192 Answer: "No, move to separate archive view" \u2192 Directly applicable \u2192 Use, no re-ask needed
694
-
695
- ### Step {{STEP_NUMBER}}.4: Formulate Clarification Questions
696
-
697
- If clarification needed (CRITICAL/HIGH severity), formulate specific, concrete questions:
698
-
699
- **Good Questions:** Specific and concrete, provide context, offer options, reference examples, tie to test strategy
700
-
701
- **Bad Questions:** Too vague/broad, assumptive, multiple questions in one, no context
702
-
703
- **Template:**
704
- \`\`\`
705
- **Context:** [Current understanding]
706
- **Ambiguity:** [Specific unclear aspect]
707
- **Question:** [Specific question with options]
708
- **Why Important:** [Testing strategy impact]
709
-
710
- Example:
711
- Context: TODO-456 "Fix the sorting in the todo list so items appear in the right order"
712
- Ambiguity: "sorting" = (A) by creation date, (B) by due date, (C) by priority level, or (D) custom user-defined order
713
- Question: Should todos be sorted by due date (soonest first) or priority (high to low)? Should completed items appear in the list or move to archive?
714
- Why Important: Different sort criteria require different test assertions. Current app shows 15 active todos + 8 completed in mixed order.
715
- \`\`\`
716
-
717
- ### Step {{STEP_NUMBER}}.5: Communicate Clarification Request
718
-
719
- **For Slack-Triggered Tasks:** Use team-communicator subagent:
720
- \`\`\`
721
- Ask clarification in Slack thread:
722
- Context: [From ticket/description]
723
- Ambiguity: [Describe ambiguity]
724
- Severity: [CRITICAL/HIGH]
725
- Questions:
726
- 1. [First specific question]
727
- 2. [Second if needed]
728
-
729
- Clarification needed to proceed. I'll wait for response before testing.
730
- \`\`\`
731
-
732
- **For Manual/API Triggers:** Include in task output:
733
- \`\`\`markdown
734
- ## \u26A0\uFE0F Clarification Required Before Testing
735
-
736
- **Ambiguity:** [Description]
737
- **Severity:** [CRITICAL/HIGH]
738
-
739
- ### Questions:
740
- 1. **Question:** [First question]
741
- - Context: [Provide context]
742
- - Options: [If applicable]
743
- - Impact: [Testing impact]
744
-
745
- **Action Required:** Provide clarification. Testing cannot proceed.
746
- **Current Observation:** [What exploration revealed - concrete examples]
747
- \`\`\`
748
-
749
- ### Step {{STEP_NUMBER}}.5.1: Register Blocked Task (CRITICAL/HIGH only)
750
-
751
- When asking a CRITICAL or HIGH severity question that blocks progress, register the task in the blocked queue so it can be automatically re-triggered when clarification arrives.
752
-
753
- **Update \`.bugzy/runtime/blocked-task-queue.md\`:**
754
-
755
- 1. Read the current file (create if doesn't exist)
756
- 2. Add a new row to the Queue table
757
-
758
- \`\`\`markdown
759
- # Blocked Task Queue
760
-
761
- Tasks waiting for clarification responses.
762
-
763
- | Task Slug | Question | Original Args |
764
- |-----------|----------|---------------|
765
- | generate-test-plan | Should todos be sorted by date or priority? | \`{"ticketId": "TODO-456"}\` |
766
- \`\`\`
767
-
768
- **Entry Fields:**
769
- - **Task Slug**: The task slug (e.g., \`generate-test-plan\`) - used for re-triggering
770
- - **Question**: The clarification question asked (so LLM can match responses)
771
- - **Original Args**: JSON-serialized \`$ARGUMENTS\` wrapped in backticks
772
-
773
- **Purpose**: The LLM processor reads this file and matches user responses to pending questions. When a match is found, it re-queues the task with the clarification.
774
-
775
- ### Step {{STEP_NUMBER}}.6: Wait or Proceed Based on Severity
776
-
777
- **CRITICAL/HIGH \u2192 STOP and Wait:**
778
- - Do NOT create tests, run tests, or make assumptions
779
- - Wait for clarification, resume after answer
780
- - *Rationale: Wrong assumptions = incorrect tests, false results, wasted time*
781
-
782
- **MEDIUM \u2192 Proceed with Documented Assumptions:**
783
- - Perform moderate exploration, document assumptions, proceed with creation/execution
784
- - Ask clarification async (team-communicator), mark results "based on assumptions"
785
- - Update tests after clarification received
786
- - *Rationale: Waiting blocks progress; documented assumptions allow forward movement with later corrections*
787
-
788
- **LOW \u2192 Proceed and Mark:**
789
- - Proceed with creation/execution, mark gaps [TO BE CLARIFIED] or [ASSUMED]
790
- - Mention in report but don't prioritize, no blocking
791
- - *Rationale: Details don't affect strategy/results significantly*
792
-
793
- ### Step {{STEP_NUMBER}}.7: Document Clarification in Results
794
-
795
- When reporting test results, always include an "Ambiguities" section if clarification occurred:
796
-
797
- \`\`\`markdown
798
- ## Ambiguities Encountered
799
-
800
- ### Clarification: [Topic]
801
- - **Severity:** [CRITICAL/HIGH/MEDIUM/LOW]
802
- - **Question Asked:** [What was asked]
803
- - **Response:** [Answer received, or "Awaiting response"]
804
- - **Impact:** [How this affected testing]
805
- - **Assumption Made:** [If proceeded with assumption]
806
- - **Risk:** [What could be wrong if assumption is incorrect]
807
-
808
- ### Resolution:
809
- [How the clarification was resolved and incorporated into testing]
810
- \`\`\`
811
-
812
- ---
813
-
814
- ## Remember:
815
-
816
- \u{1F6D1} **Block for CRITICAL/HIGH** | \u2705 **Ask correctly > guess poorly** | \u{1F4DD} **Document MEDIUM assumptions** | \u{1F50D} **Check memory first** | \u{1F3AF} **Specific questions \u2192 specific answers**
817
- `;
818
-
819
55
  // src/tasks/library/generate-test-cases.ts
820
56
  var generateTestCasesTask = {
821
57
  slug: TASK_SLUGS.GENERATE_TEST_CASES,
@@ -825,40 +61,39 @@ var generateTestCasesTask = {
825
61
  description: "Generate manual test case documentation AND automated Playwright test scripts from test plan",
826
62
  "argument-hint": "--type [exploratory|functional|regression|smoke] --focus [optional-feature]"
827
63
  },
828
- baseContent: `# Generate Test Cases Command
829
-
830
- ## SECURITY NOTICE
831
- **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
832
- - **Read \`.env.testdata\`** for non-secret test data (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
833
- - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
834
- - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
835
- - The \`.env\` file access is blocked by settings.json
836
-
837
- Generate comprehensive test artifacts including BOTH manual test case documentation AND automated Playwright test scripts.
838
-
839
- ## Overview
64
+ steps: [
65
+ // Step 1: Overview (inline)
66
+ {
67
+ inline: true,
68
+ title: "Generate Test Cases Overview",
69
+ content: `Generate comprehensive test artifacts including BOTH manual test case documentation AND automated Playwright test scripts.
840
70
 
841
71
  This command generates:
842
72
  1. **Manual Test Case Documentation** (in \`./test-cases/\`) - Human-readable test cases in markdown format
843
73
  2. **Automated Playwright Tests** (in \`./tests/specs/\`) - Executable TypeScript test scripts
844
74
  3. **Page Object Models** (in \`./tests/pages/\`) - Reusable page classes for automated tests
845
- 4. **Supporting Files** (fixtures, helpers, components) - As needed for test automation
846
-
847
- ## Arguments
848
- Arguments: $ARGUMENTS
75
+ 4. **Supporting Files** (fixtures, helpers, components) - As needed for test automation`
76
+ },
77
+ // Step 2: Security Notice (library)
78
+ "security-notice",
79
+ // Step 3: Arguments (inline)
80
+ {
81
+ inline: true,
82
+ title: "Arguments",
83
+ content: `Arguments: $ARGUMENTS
849
84
 
850
- ## Parse Arguments
85
+ **Parse Arguments:**
851
86
  Extract the following from arguments:
852
87
  - **type**: Test type (exploratory, functional, regression, smoke) - defaults to functional
853
- - **focus**: Optional specific feature or section to focus on
854
-
855
- ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
856
-
857
- ## Process
858
-
859
- ### Step 1: Gather Context
860
-
861
- #### 1.1 Read Test Plan
88
+ - **focus**: Optional specific feature or section to focus on`
89
+ },
90
+ // Step 4: Knowledge Base Read (library)
91
+ "read-knowledge-base",
92
+ // Step 5: Gather Context (inline)
93
+ {
94
+ inline: true,
95
+ title: "Gather Context",
96
+ content: `**1.1 Read Test Plan**
862
97
  Read the test plan from \`test-plan.md\` to understand:
863
98
  - Test items and features
864
99
  - Testing approach and automation strategy
@@ -867,35 +102,40 @@ Read the test plan from \`test-plan.md\` to understand:
867
102
  - Test environment and data requirements
868
103
  - Automation decision criteria
869
104
 
870
- #### 1.2 Check Existing Test Cases and Tests
105
+ **1.2 Check Existing Test Cases and Tests**
871
106
  - List all files in \`./test-cases/\` to understand existing manual test coverage
872
107
  - List all files in \`./tests/specs/\` to understand existing automated tests
873
108
  - Determine next test case ID (TC-XXX format)
874
109
  - Identify existing Page Objects in \`./tests/pages/\`
875
- - Avoid creating overlapping test cases or duplicate automation
876
-
877
- {{DOCUMENTATION_RESEARCHER_INSTRUCTIONS}}
878
-
879
- ### Step 1.4: Explore Features (If Needed)
880
-
881
- If documentation is insufficient or ambiguous, perform adaptive exploration to understand actual feature behavior before creating test cases.
882
-
883
- ${EXPLORATION_INSTRUCTIONS.replace(/{{STEP_NUMBER}}/g, "1.4")}
884
-
885
- ### Step 1.5: Clarify Ambiguities
886
-
887
- If exploration or documentation review reveals ambiguous requirements, use the clarification protocol to resolve them before generating test cases.
888
-
889
- ${CLARIFICATION_INSTRUCTIONS.replace(/{{STEP_NUMBER}}/g, "1.5")}
890
-
891
- **Important Notes:**
892
- - **CRITICAL/HIGH ambiguities:** STOP test case generation and seek clarification
893
- - **MEDIUM ambiguities:** Document assumptions explicitly in test case with [ASSUMED: reason]
894
- - **LOW ambiguities:** Mark with [TO BE CLARIFIED: detail] in test case notes section
895
-
896
- ### Step 1.6: Organize Test Scenarios by Area
110
+ - Avoid creating overlapping test cases or duplicate automation`
111
+ },
112
+ // Step 6: Documentation Researcher (conditional inline)
113
+ {
114
+ inline: true,
115
+ title: "Gather Product Documentation",
116
+ content: `{{INVOKE_DOCUMENTATION_RESEARCHER}} to gather comprehensive product documentation:
897
117
 
898
- Based on exploration and documentation, organize test scenarios by feature area/component:
118
+ \`\`\`
119
+ Explore all available product documentation, specifically focusing on:
120
+ - UI elements and workflows
121
+ - User interactions and navigation paths
122
+ - Form fields and validation rules
123
+ - Error messages and edge cases
124
+ - Authentication and authorization flows
125
+ - Business rules and constraints
126
+ - API endpoints for test data setup
127
+ \`\`\``,
128
+ conditionalOnSubagent: "documentation-researcher"
129
+ },
130
+ // Step 7: Exploration Protocol (from library)
131
+ "exploration-protocol",
132
+ // Step 8: Clarification Protocol (from library)
133
+ "clarification-protocol",
134
+ // Step 9: Organize Test Scenarios (inline - task-specific)
135
+ {
136
+ inline: true,
137
+ title: "Organize Test Scenarios by Area",
138
+ content: `Based on exploration and documentation, organize test scenarios by feature area/component:
899
139
 
900
140
  **Group scenarios into areas** (e.g., Authentication, Dashboard, Checkout, Profile Management):
901
141
  - Each area should be a logical feature grouping
@@ -930,19 +170,21 @@ Based on exploration and documentation, organize test scenarios by feature area/
930
170
 
931
171
  Example structure:
932
172
  - **Authentication**: TC-001 Valid login (smoke, automate), TC-002 Invalid password (automate), TC-003 Password reset (automate)
933
- - **Dashboard**: TC-004 View dashboard widgets (smoke, automate), TC-005 Filter data by date (automate), TC-006 Export data (manual - rare use)
934
-
935
- ### Step 1.7: Generate All Manual Test Case Files
936
-
937
- Generate ALL manual test case markdown files in the \`./test-cases/\` directory BEFORE invoking the test-code-generator agent.
173
+ - **Dashboard**: TC-004 View dashboard widgets (smoke, automate), TC-005 Filter data by date (automate), TC-006 Export data (manual - rare use)`
174
+ },
175
+ // Step 10: Generate Manual Test Cases (inline)
176
+ {
177
+ inline: true,
178
+ title: "Generate All Manual Test Case Files",
179
+ content: `Generate ALL manual test case markdown files in the \`./test-cases/\` directory BEFORE invoking the test-code-generator agent.
938
180
 
939
- **For each test scenario from Step 1.6:**
181
+ **For each test scenario from the previous step:**
940
182
 
941
183
  1. **Create test case file** in \`./test-cases/\` with format \`TC-XXX-feature-description.md\`
942
184
  2. **Include frontmatter** with:
943
185
  - \`id:\` TC-XXX (sequential ID)
944
186
  - \`title:\` Clear, descriptive title
945
- - \`automated:\` true/false (based on automation decision from Step 1.6)
187
+ - \`automated:\` true/false (based on automation decision)
946
188
  - \`automated_test:\` (leave empty - will be filled by subagent when automated)
947
189
  - \`type:\` exploratory/functional/regression/smoke
948
190
  - \`area:\` Feature area/component
@@ -954,30 +196,30 @@ Generate ALL manual test case markdown files in the \`./test-cases/\` directory
954
196
  - **Test Data**: Environment variables to use (e.g., \${TEST_BASE_URL}, \${TEST_OWNER_EMAIL})
955
197
  - **Notes**: Any assumptions, clarifications needed, or special considerations
956
198
 
957
- **Output**: All manual test case markdown files created in \`./test-cases/\` with automation flags set
958
-
959
- ### Step 2: Automate Test Cases Area by Area
960
-
961
- **IMPORTANT**: Process each feature area separately to enable incremental, focused test creation.
962
-
963
- **For each area from Step 1.6**, invoke the test-code-generator agent:
199
+ **Output**: All manual test case markdown files created in \`./test-cases/\` with automation flags set`
200
+ },
201
+ // Step 11: Automate Test Cases (inline - detailed instructions for test-code-generator)
202
+ {
203
+ inline: true,
204
+ title: "Automate Test Cases Area by Area",
205
+ content: `**IMPORTANT**: Process each feature area separately to enable incremental, focused test creation.
964
206
 
965
- #### Step 2.1: Prepare Area Context
207
+ **For each area**, invoke the test-code-generator agent:
966
208
 
209
+ **Prepare Area Context:**
967
210
  Before invoking the agent, identify the test cases for the current area:
968
211
  - Current area name
969
212
  - Test case files for this area (e.g., TC-001-valid-login.md, TC-002-invalid-password.md)
970
213
  - Which test cases are marked for automation (automated: true)
971
- - Test type: {type}
214
+ - Test type from arguments
972
215
  - Test plan reference: test-plan.md
973
216
  - Existing automated tests in ./tests/specs/
974
217
  - Existing Page Objects in ./tests/pages/
975
218
 
976
- #### Step 2.2: Invoke test-code-generator Agent
219
+ **Invoke test-code-generator Agent:**
977
220
 
978
221
  {{INVOKE_TEST_CODE_GENERATOR}} for the current area with the following context:
979
222
 
980
- **Agent Invocation:**
981
223
  "Automate test cases for the [AREA_NAME] area.
982
224
 
983
225
  **Context:**
@@ -998,63 +240,34 @@ Before invoking the agent, identify the test cases for the current area:
998
240
  - Create automated Playwright test in ./tests/specs/
999
241
  - Update the manual test case file to reference the automated test path
1000
242
  6. Run and iterate on each test until it passes or fails with a product bug
1001
- 8. Update .env.testdata with any new variables
243
+ 7. Update .env.testdata with any new variables
1002
244
 
1003
245
  **Focus only on the [AREA_NAME] area** - do not automate tests for other areas yet."
1004
246
 
1005
- #### Step 2.3: Verify Area Completion
1006
-
247
+ **Verify Area Completion:**
1007
248
  After the agent completes the area, verify:
1008
249
  - Manual test case files updated with automated_test references
1009
250
  - Automated tests created for all test cases marked automated: true
1010
251
  - Tests are passing (or failing with documented product bugs)
1011
252
  - Page Objects created/updated for the area
1012
253
 
1013
- #### Step 2.4: Repeat for Next Area
1014
-
1015
- Move to the next area and repeat Steps 2.1-2.3 until all areas are complete.
254
+ **Repeat for Next Area:**
255
+ Move to the next area and repeat until all areas are complete.
1016
256
 
1017
257
  **Benefits of area-by-area approach**:
1018
258
  - Agent focuses on one feature at a time
1019
259
  - POMs built incrementally as needed
1020
260
  - Tests verified before moving to next area
1021
261
  - Easier to manage and track progress
1022
- - Can pause/resume between areas if needed
1023
-
1024
- ### Step 2.5: Validate Generated Artifacts
1025
-
1026
- After the test-code-generator completes, verify:
1027
-
1028
- 1. **Manual Test Cases (in \`./test-cases/\`)**:
1029
- - Each has unique TC-XXX ID
1030
- - Frontmatter includes \`automated: true/false\` flag
1031
- - If automated, includes \`automated_test\` path reference
1032
- - Contains human-readable steps and expected results
1033
- - References environment variables for test data
1034
-
1035
- 2. **Automated Tests (in \`./tests/specs/\`)**:
1036
- - Organized by feature in subdirectories
1037
- - Each test file references manual test case ID in comments
1038
- - Uses Page Object Model pattern
1039
- - Follows role-based selector priority
1040
- - Uses environment variables for test data
1041
- - Includes proper TypeScript typing
1042
-
1043
- 3. **Page Objects (in \`./tests/pages/\`)**:
1044
- - Extend BasePage class
1045
- - Use semantic selectors (getByRole, getByLabel, getByText)
1046
- - Contain only actions, no assertions
1047
- - Properly typed with TypeScript
1048
-
1049
- 4. **Supporting Files**:
1050
- - Fixtures created for common setup (in \`./tests/fixtures/\`)
1051
- - Helper functions for data generation (in \`./tests/helpers/\`)
1052
- - Component objects for reusable UI elements (in \`./tests/components/\`)
1053
- - Types defined as needed (in \`./tests/types/\`)
1054
-
1055
- ### Step 3: Create Directories if Needed
1056
-
1057
- Ensure required directories exist:
262
+ - Can pause/resume between areas if needed`
263
+ },
264
+ // Step 12: Validate Artifacts (library)
265
+ "validate-test-artifacts",
266
+ // Step 13: Create Directories (inline)
267
+ {
268
+ inline: true,
269
+ title: "Create Directories if Needed",
270
+ content: `Ensure required directories exist:
1058
271
  \`\`\`bash
1059
272
  mkdir -p ./test-cases
1060
273
  mkdir -p ./tests/specs
@@ -1062,23 +275,53 @@ mkdir -p ./tests/pages
1062
275
  mkdir -p ./tests/components
1063
276
  mkdir -p ./tests/fixtures
1064
277
  mkdir -p ./tests/helpers
1065
- \`\`\`
1066
-
1067
- ### Step 4: Update .env.testdata (if needed)
1068
-
1069
- If new environment variables were introduced:
1070
- - Read current \`.env.testdata\`
1071
- - Add new TEST_* variables with empty values
1072
- - Group variables logically with comments
1073
- - Document what each variable is for
1074
-
1075
- ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
278
+ \`\`\``
279
+ },
280
+ // Step 14: Extract Env Variables (library)
281
+ "extract-env-variables",
282
+ // Step 15: Knowledge Base Update (library)
283
+ "update-knowledge-base",
284
+ // Step 16: Team Communication (conditional inline)
285
+ {
286
+ inline: true,
287
+ title: "Team Communication",
288
+ content: `{{INVOKE_TEAM_COMMUNICATOR}} to notify the product team about the new test cases and automated tests:
1076
289
 
1077
- {{TEAM_COMMUNICATOR_INSTRUCTIONS}}
290
+ \`\`\`
291
+ 1. Post an update about test case and automation creation
292
+ 2. Provide summary of coverage:
293
+ - Number of manual test cases created
294
+ - Number of automated tests created
295
+ - Features covered by automation
296
+ - Areas kept manual-only (and why)
297
+ 3. Highlight key automated test scenarios
298
+ 4. Share command to run automated tests: npx playwright test
299
+ 5. Ask for team review and validation
300
+ 6. Mention any areas needing exploration or clarification
301
+ 7. Use appropriate channel and threading for the update
302
+ \`\`\`
1078
303
 
1079
- ### Step 5: Final Summary
304
+ The team communication should include:
305
+ - **Test artifacts created**: Manual test cases + automated tests count
306
+ - **Automation coverage**: Which features are now automated
307
+ - **Manual-only areas**: Why some tests are kept manual (rare scenarios, exploratory)
308
+ - **Key automated scenarios**: Critical paths now covered by automation
309
+ - **Running tests**: Command to execute automated tests
310
+ - **Review request**: Ask team to validate scenarios and review test code
311
+ - **Next steps**: Plans for CI/CD integration or additional test coverage
1080
312
 
1081
- Provide a comprehensive summary showing:
313
+ **Update team communicator memory:**
314
+ - Record this communication
315
+ - Note test case and automation creation
316
+ - Track team feedback on automation approach
317
+ - Document any clarifications requested`,
318
+ conditionalOnSubagent: "team-communicator"
319
+ },
320
+ // Step 17: Final Summary (inline)
321
+ {
322
+ inline: true,
323
+ title: "Final Summary",
324
+ content: `Provide a comprehensive summary showing:
1082
325
 
1083
326
  **Manual Test Cases:**
1084
327
  - Number of manual test cases created
@@ -1102,245 +345,162 @@ Provide a comprehensive summary showing:
1102
345
  - Note about copying .env.testdata to .env
1103
346
  - Mention any exploration needed for edge cases
1104
347
 
1105
- ### Important Notes
1106
-
348
+ **Important Notes:**
1107
349
  - **Both Manual AND Automated**: Generate both artifacts - they serve different purposes
1108
350
  - **Manual Test Cases**: Documentation, reference, can be executed manually when needed
1109
351
  - **Automated Tests**: Fast, repeatable, for CI/CD and regression testing
1110
352
  - **Automation Decision**: Not all test cases need automation - rare edge cases can stay manual
1111
353
  - **Linking**: Manual test cases reference automated tests; automated tests reference manual test case IDs
1112
- - **Two-Phase Workflow**: First generate all manual test cases (Step 1.7), then automate area-by-area (Step 2)
1113
- - **Ambiguity Handling**: Use exploration (Step 1.4) and clarification (Step 1.5) protocols before generating
354
+ - **Two-Phase Workflow**: First generate all manual test cases, then automate area-by-area
355
+ - **Ambiguity Handling**: Use exploration and clarification protocols before generating
1114
356
  - **Environment Variables**: Use \`process.env.VAR_NAME\` in tests, update .env.testdata as needed
1115
- - **Test Independence**: Each test must be runnable in isolation and in parallel`,
1116
- optionalSubagents: [
1117
- {
1118
- role: "documentation-researcher",
1119
- contentBlock: `#### 1.4 Gather Product Documentation
1120
-
1121
- {{INVOKE_DOCUMENTATION_RESEARCHER}} to gather comprehensive product documentation:
1122
-
1123
- \`\`\`
1124
- Explore all available product documentation, specifically focusing on:
1125
- - UI elements and workflows
1126
- - User interactions and navigation paths
1127
- - Form fields and validation rules
1128
- - Error messages and edge cases
1129
- - Authentication and authorization flows
1130
- - Business rules and constraints
1131
- - API endpoints for test data setup
1132
- \`\`\``
1133
- },
1134
- {
1135
- role: "team-communicator",
1136
- contentBlock: `### Step 4.5: Team Communication
1137
-
1138
- {{INVOKE_TEAM_COMMUNICATOR}} to notify the product team about the new test cases and automated tests:
1139
-
1140
- \`\`\`
1141
- 1. Post an update about test case and automation creation
1142
- 2. Provide summary of coverage:
1143
- - Number of manual test cases created
1144
- - Number of automated tests created
1145
- - Features covered by automation
1146
- - Areas kept manual-only (and why)
1147
- 3. Highlight key automated test scenarios
1148
- 4. Share command to run automated tests: npx playwright test
1149
- 5. Ask for team review and validation
1150
- 6. Mention any areas needing exploration or clarification
1151
- 7. Use appropriate channel and threading for the update
1152
- \`\`\`
1153
-
1154
- The team communication should include:
1155
- - **Test artifacts created**: Manual test cases + automated tests count
1156
- - **Automation coverage**: Which features are now automated
1157
- - **Manual-only areas**: Why some tests are kept manual (rare scenarios, exploratory)
1158
- - **Key automated scenarios**: Critical paths now covered by automation
1159
- - **Running tests**: Command to execute automated tests
1160
- - **Review request**: Ask team to validate scenarios and review test code
1161
- - **Next steps**: Plans for CI/CD integration or additional test coverage
1162
-
1163
- **Update team communicator memory:**
1164
- - Record this communication
1165
- - Note test case and automation creation
1166
- - Track team feedback on automation approach
1167
- - Document any clarifications requested`
357
+ - **Test Independence**: Each test must be runnable in isolation and in parallel`
1168
358
  }
1169
359
  ],
1170
- requiredSubagents: ["test-runner", "test-code-generator"]
360
+ requiredSubagents: ["test-runner", "test-code-generator"],
361
+ optionalSubagents: ["documentation-researcher", "team-communicator"],
362
+ dependentTasks: []
1171
363
  };
1172
364
 
1173
365
  // src/tasks/library/generate-test-plan.ts
1174
366
  var generateTestPlanTask = {
1175
367
  slug: TASK_SLUGS.GENERATE_TEST_PLAN,
1176
368
  name: "Generate Test Plan",
1177
- description: "Generate a comprehensive test plan from product description",
369
+ description: "Generate a concise feature checklist test plan from product description",
1178
370
  frontmatter: {
1179
- description: "Generate a comprehensive test plan from product description",
371
+ description: "Generate a concise feature checklist test plan (~50-100 lines)",
1180
372
  "argument-hint": "<product-description>"
1181
373
  },
1182
- baseContent: `# Generate Test Plan Command
1183
-
1184
- ## SECURITY NOTICE
1185
- **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
1186
- - **Read \`.env.testdata\`** for non-secret test data (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
1187
- - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
1188
- - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
1189
- - The \`.env\` file access is blocked by settings.json
1190
-
1191
- Generate a comprehensive test plan from product description following the Brain Module specifications.
1192
-
1193
- ## Arguments
1194
- Product description: $ARGUMENTS
1195
-
1196
- ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
1197
-
1198
- ## Process
1199
-
1200
- ### Step 1: Load project context
1201
- Read \`.bugzy/runtime/project-context.md\` to understand:
1202
- - Project overview and key platform features
1203
- - SDLC methodology and sprint duration
1204
- - Testing environment and goals
1205
- - Technical stack and constraints
1206
- - QA workflow and processes
1207
-
1208
- ### Step 1.5: Process the product description
1209
- Use the product description provided directly in the arguments, enriched with project context understanding.
1210
-
1211
- ### Step 1.6: Initialize environment variables tracking
1212
- Create a list to track all TEST_ prefixed environment variables discovered throughout the process.
1213
-
1214
- {{DOCUMENTATION_RESEARCHER_INSTRUCTIONS}}
1215
-
1216
- ### Step 1.7: Explore Product (If Needed)
1217
-
1218
- If product description is vague or incomplete, perform adaptive exploration to understand actual product features and behavior.
1219
-
1220
- ${EXPLORATION_INSTRUCTIONS.replace(/{{STEP_NUMBER}}/g, "1.7")}
1221
-
1222
- ### Step 1.8: Clarify Ambiguities
1223
-
1224
- If exploration or product description reveals ambiguous requirements, use the clarification protocol before generating the test plan.
1225
-
1226
- ${CLARIFICATION_INSTRUCTIONS.replace(/{{STEP_NUMBER}}/g, "1.8")}
1227
-
1228
- **Important Notes:**
1229
- - **CRITICAL/HIGH ambiguities:** STOP test plan generation and seek clarification
1230
- - Examples: Undefined core features, unclear product scope, contradictory requirements
1231
- - **MEDIUM ambiguities:** Document assumptions in test plan with [ASSUMED: reason] and seek async clarification
1232
- - Examples: Missing field lists, unclear validation rules, vague user roles
1233
- - **LOW ambiguities:** Mark with [TO BE EXPLORED: detail] in test plan for future investigation
1234
- - Examples: Optional features, cosmetic details, non-critical edge cases
1235
-
1236
- ### Step 3: Prepare the test plan generation context
1237
-
1238
- **After ensuring requirements are clear through exploration and clarification:**
374
+ steps: [
375
+ // Step 1: Overview (inline)
376
+ {
377
+ inline: true,
378
+ title: "Generate Test Plan Overview",
379
+ content: `Generate a comprehensive test plan from product description following the Brain Module specifications.`
380
+ },
381
+ // Step 2: Security Notice (library)
382
+ "security-notice",
383
+ // Step 3: Arguments (inline)
384
+ {
385
+ inline: true,
386
+ title: "Arguments",
387
+ content: `Product description: $ARGUMENTS`
388
+ },
389
+ // Step 4: Knowledge Base Read (library)
390
+ "read-knowledge-base",
391
+ // Step 5: Load Project Context (library)
392
+ "load-project-context",
393
+ // Step 6: Process Description (inline)
394
+ {
395
+ inline: true,
396
+ title: "Process the Product Description",
397
+ content: `Use the product description provided directly in the arguments, enriched with project context understanding.`
398
+ },
399
+ // Step 7: Initialize Env Tracking (inline)
400
+ {
401
+ inline: true,
402
+ title: "Initialize Environment Variables Tracking",
403
+ content: `Create a list to track all TEST_ prefixed environment variables discovered throughout the process.`
404
+ },
405
+ // Step 8: Documentation Researcher (conditional inline)
406
+ {
407
+ inline: true,
408
+ title: "Gather Comprehensive Project Documentation",
409
+ content: `{{INVOKE_DOCUMENTATION_RESEARCHER}} to explore and gather all available project information and other documentation sources. This ensures the test plan is based on complete and current information.
1239
410
 
1240
- Based on the gathered information:
1241
- - **goal**: Extract the main purpose and objectives from all available documentation
1242
- - **knowledge**: Combine product description with discovered documentation insights
1243
- - **testPlan**: Use the standard test plan template structure, enriched with documentation findings
1244
- - **gaps**: Identify areas lacking documentation that will need exploration
1245
-
1246
- ### Step 4: Generate the test plan using the prompt template
1247
-
1248
- You are an expert QA Test Plan Writer with expertise in both manual and automated testing strategies. Using the gathered information and context from the product description provided, you will now produce a comprehensive test plan in Markdown format that includes an automation strategy.
1249
-
1250
- Writing Instructions:
1251
- - **Use Product Terminology:** Incorporate exact terms and labels from the product description for features and UI elements (to ensure the test plan uses official naming).
1252
- - **Testing Scope:** The plan covers both automated E2E testing via Playwright and exploratory manual testing. Focus on what a user can do and see in a browser.
1253
- - **Test Data - IMPORTANT:**
1254
- - DO NOT include test data values in the test plan body
1255
- - Test data goes ONLY to the \`.env.testdata\` file
1256
- - In the test plan, reference \`.env.testdata\` for test data requirements
1257
- - Define test data as environment variables prefixed with TEST_ (e.g., TEST_BASE_URL, TEST_USER_EMAIL, TEST_USER_PASSWORD)
1258
- - DO NOT GENERATE VALUES FOR THE ENV VARS, ONLY THE KEYS
1259
- - Track all TEST_ variables for extraction to .env.testdata in Step 7
1260
- - **DO NOT INCLUDE TEST SCENARIOS**
1261
- - **Incorporate All Relevant Info:** If the product description mentions specific requirements, constraints, or acceptance criteria (such as field validations, role-based access rules, important parameters), make sure these are reflected in the test plan. Do not add anything not supported by the given information.
1262
- - **Test Automation Strategy Section - REQUIRED:** Include a comprehensive "Test Automation Strategy" section with the following subsections:
1263
-
1264
- **## Test Automation Strategy**
1265
-
1266
- ### Automated Test Coverage
1267
- - Identify critical user paths to automate (login, checkout, core features)
1268
- - Define regression test scenarios for automation
1269
- - Specify API endpoints that need automated testing
1270
- - List smoke test scenarios for CI/CD pipeline
1271
-
1272
- ### Exploratory Testing Areas
1273
- - New features not yet automated
1274
- - Complex edge cases requiring human judgment
1275
- - Visual/UX validation requiring subjective assessment
1276
- - Scenarios that are not cost-effective to automate
1277
-
1278
- ### Test Data Management
1279
- - Environment variables strategy (which vars go in .env.example vs .env)
1280
- - Dynamic test data generation approach (use data generators)
1281
- - API-based test data setup (10-20x faster than UI)
1282
- - Test data isolation and cleanup strategy
1283
-
1284
- ### Automation Approach
1285
- - **Framework:** Playwright + TypeScript (already scaffolded)
1286
- - **Pattern:** Page Object Model for all pages
1287
- - **Selectors:** Prioritize role-based selectors (getByRole, getByLabel, getByText)
1288
- - **Components:** Reusable component objects for common UI elements
1289
- - **Fixtures:** Custom fixtures for authenticated sessions and common setup
1290
- - **API for Speed:** Use Playwright's request context to create test data via API
1291
- - **Best Practices:** Reference \`.bugzy/runtime/testing-best-practices.md\` for patterns
1292
-
1293
- ### Test Organization
1294
- - Automated tests location: \`./tests/specs/[feature]/\`
1295
- - Page Objects location: \`./tests/pages/\`
1296
- - Manual test cases location: \`./test-cases/\` (human-readable documentation)
1297
- - Test case naming: TC-XXX-feature-description.md
1298
- - Automated test naming: feature.spec.ts
1299
-
1300
- ### Automation Decision Criteria
1301
- Define which scenarios warrant automation:
1302
- - \u2705 Automate: Frequent execution, critical paths, regression tests, CI/CD integration
1303
- - \u274C Keep Manual: Rare edge cases, exploratory tests, visual validation, one-time checks
1304
-
1305
- ### Step 5: Create the test plan file
1306
-
1307
- Read the test plan template from \`.bugzy/runtime/templates/test-plan-template.md\` and use it as the base structure. Fill in the placeholders with information extracted from BOTH the product description AND documentation research:
1308
-
1309
- 1. Read the template file from \`.bugzy/runtime/templates/test-plan-template.md\`
1310
- 2. Replace placeholders like:
1311
- - \`[ProjectName]\` with the actual project name from the product description
1312
- - \`[Date]\` with the current date
1313
- - Feature sections with actual features identified from all documentation sources
1314
- - Test data requirements based on the product's needs and API documentation
1315
- - Risks based on the complexity, known issues, and technical constraints
1316
- 3. Add any product-specific sections that may be needed based on discovered documentation
1317
- 4. **Mark ambiguities based on severity:**
1318
- - CRITICAL/HIGH: Should be clarified before plan creation (see Step 1.8)
1319
- - MEDIUM: Mark with [ASSUMED: reason] and note assumption
1320
- - LOW: Mark with [TO BE EXPLORED: detail] for future investigation
1321
- 5. Include references to source documentation for traceability
1322
-
1323
- ### Step 6: Save the test plan
1324
-
1325
- Save the generated test plan to a file named \`test-plan.md\` in the project root with appropriate frontmatter:
411
+ \`\`\`
412
+ Explore all available project documentation related to: $ARGUMENTS
1326
413
 
1327
- \`\`\`yaml
1328
- ---
1329
- version: 1.0.0
1330
- lifecycle_phase: initial
1331
- created_at: [current date]
1332
- updated_at: [current date]
1333
- last_exploration: null
1334
- total_discoveries: 0
1335
- status: draft
1336
- author: claude
1337
- tags: [functional, security, performance]
1338
- ---
414
+ Specifically gather:
415
+ - Product specifications and requirements
416
+ - User stories and acceptance criteria
417
+ - Technical architecture documentation
418
+ - API documentation and endpoints
419
+ - User roles and permissions
420
+ - Business rules and validations
421
+ - UI/UX specifications
422
+ - Known limitations or constraints
423
+ - Existing test documentation
424
+ - Bug reports or known issues
1339
425
  \`\`\`
1340
426
 
1341
- ### Step 7: Extract and save environment variables
427
+ The agent will:
428
+ 1. Check its memory for previously discovered documentation
429
+ 2. Explore workspace for relevant pages and databases
430
+ 3. Build a comprehensive understanding of the product
431
+ 4. Return synthesized information about all discovered documentation`,
432
+ conditionalOnSubagent: "documentation-researcher"
433
+ },
434
+ // Step 9: Exploration Protocol (from library)
435
+ "exploration-protocol",
436
+ // Step 10: Clarification Protocol (from library)
437
+ "clarification-protocol",
438
+ // Step 11: Prepare Context (inline)
439
+ {
440
+ inline: true,
441
+ title: "Prepare Test Plan Generation Context",
442
+ content: `**After ensuring requirements are clear through exploration and clarification:**
1342
443
 
1343
- **CRITICAL**: Test data values must ONLY go to .env.testdata, NOT in the test plan document.
444
+ Based on the gathered information:
445
+ - **goal**: Extract the main purpose and objectives from all available documentation
446
+ - **knowledge**: Combine product description with discovered documentation insights
447
+ - **testPlan**: Use the standard test plan template structure, enriched with documentation findings
448
+ - **gaps**: Identify areas lacking documentation that will need exploration`
449
+ },
450
+ // Step 12: Generate Test Plan (inline - more detailed than library step)
451
+ {
452
+ inline: true,
453
+ title: "Generate Test Plan Using Simplified Format",
454
+ content: `You are an expert QA Test Plan Writer. Generate a **concise** test plan (~50-100 lines) that serves as a feature checklist for test case generation.
455
+
456
+ **CRITICAL - Keep it Simple:**
457
+ - The test plan is a **feature checklist**, NOT a comprehensive document
458
+ - Detailed UI elements and exploration findings go to \`./exploration-reports/\`
459
+ - Technical patterns and architecture go to \`.bugzy/runtime/knowledge-base.md\`
460
+ - Process documentation stays in \`.bugzy/runtime/project-context.md\`
461
+
462
+ **Writing Instructions:**
463
+ - **Use Product Terminology:** Use exact feature names from the product description
464
+ - **Feature Checklist Format:** Each feature is a checkbox item with brief description
465
+ - **Group by Feature Area:** Organize features into logical sections
466
+ - **NO detailed UI elements** - those belong in exploration reports
467
+ - **NO test scenarios** - those are generated in test cases
468
+ - **NO process documentation** - keep only what's needed for test generation
469
+
470
+ **Test Data Handling:**
471
+ - Test data goes ONLY to \`.env.testdata\` file
472
+ - In test plan, reference environment variable NAMES only (e.g., TEST_BASE_URL)
473
+ - DO NOT generate values for env vars, only keys
474
+ - Track all TEST_ variables for extraction to .env.testdata in the next step`
475
+ },
476
+ // Step 13: Create Test Plan File (inline)
477
+ {
478
+ inline: true,
479
+ title: "Create Test Plan File",
480
+ content: `Read the simplified template from \`.bugzy/runtime/templates/test-plan-template.md\` and fill it in:
481
+
482
+ 1. Read the template file
483
+ 2. Replace placeholders:
484
+ - \`[PROJECT_NAME]\` with the actual project name
485
+ - \`[DATE]\` with the current date
486
+ - Feature sections with actual features grouped by area
487
+ 3. Each feature is a **checkbox item** with brief description
488
+ 4. **Mark ambiguities:**
489
+ - MEDIUM: Mark with [ASSUMED: reason]
490
+ - LOW: Mark with [TO BE EXPLORED: detail]
491
+ 5. Keep total document under 100 lines`
492
+ },
493
+ // Step 14: Save Test Plan (inline)
494
+ {
495
+ inline: true,
496
+ title: "Save Test Plan",
497
+ content: `Save to \`test-plan.md\` in project root. The template already includes frontmatter - just fill in the dates.`
498
+ },
499
+ // Step 15: Extract Env Variables (inline - more detailed than library step)
500
+ {
501
+ inline: true,
502
+ title: "Extract and Save Environment Variables",
503
+ content: `**CRITICAL**: Test data values must ONLY go to .env.testdata, NOT in the test plan document.
1344
504
 
1345
505
  After saving the test plan:
1346
506
 
@@ -1377,53 +537,15 @@ After saving the test plan:
1377
537
  5. **Verify test plan references .env.testdata**:
1378
538
  - Ensure test plan DOES NOT contain test data values
1379
539
  - Ensure test plan references \`.env.testdata\` for test data requirements
1380
- - Add instruction: "Fill in actual values in .env.testdata before running tests"
1381
-
1382
- ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
1383
-
1384
- {{TEAM_COMMUNICATOR_INSTRUCTIONS}}
1385
-
1386
- ### Step 8: Final summary
1387
-
1388
- Provide a summary of:
1389
- - Test plan created successfully at \`test-plan.md\`
1390
- - Environment variables extracted to \`.env.testdata\`
1391
- - Number of TEST_ variables discovered
1392
- - Instructions for the user to fill in actual values in .env.testdata before running tests`,
1393
- optionalSubagents: [
1394
- {
1395
- role: "documentation-researcher",
1396
- contentBlock: `### Step 2: Gather comprehensive project documentation
1397
-
1398
- {{INVOKE_DOCUMENTATION_RESEARCHER}} to explore and gather all available project information and other documentation sources. This ensures the test plan is based on complete and current information.
1399
-
1400
- \`\`\`
1401
- Explore all available project documentation related to: $ARGUMENTS
1402
-
1403
- Specifically gather:
1404
- - Product specifications and requirements
1405
- - User stories and acceptance criteria
1406
- - Technical architecture documentation
1407
- - API documentation and endpoints
1408
- - User roles and permissions
1409
- - Business rules and validations
1410
- - UI/UX specifications
1411
- - Known limitations or constraints
1412
- - Existing test documentation
1413
- - Bug reports or known issues
1414
- \`\`\`
1415
-
1416
- The agent will:
1417
- 1. Check its memory for previously discovered documentation
1418
- 2. Explore workspace for relevant pages and databases
1419
- 3. Build a comprehensive understanding of the product
1420
- 4. Return synthesized information about all discovered documentation`
540
+ - Add instruction: "Fill in actual values in .env.testdata before running tests"`
1421
541
  },
542
+ // Step 16: Knowledge Base Update (library)
543
+ "update-knowledge-base",
544
+ // Step 17: Team Communication (conditional inline)
1422
545
  {
1423
- role: "team-communicator",
1424
- contentBlock: `### Step 7.5: Team Communication
1425
-
1426
- {{INVOKE_TEAM_COMMUNICATOR}} to notify the product team about the new test plan:
546
+ inline: true,
547
+ title: "Team Communication",
548
+ content: `{{INVOKE_TEAM_COMMUNICATOR}} to notify the product team about the new test plan:
1427
549
 
1428
550
  \`\`\`
1429
551
  1. Post an update about the test plan creation
@@ -1444,10 +566,23 @@ The team communication should include:
1444
566
  **Update team communicator memory:**
1445
567
  - Record this communication in the team-communicator memory
1446
568
  - Note this as a test plan creation communication
1447
- - Track team response to this type of update`
569
+ - Track team response to this type of update`,
570
+ conditionalOnSubagent: "team-communicator"
571
+ },
572
+ // Step 18: Final Summary (inline)
573
+ {
574
+ inline: true,
575
+ title: "Final Summary",
576
+ content: `Provide a summary of:
577
+ - Test plan created successfully at \`test-plan.md\`
578
+ - Environment variables extracted to \`.env.testdata\`
579
+ - Number of TEST_ variables discovered
580
+ - Instructions for the user to fill in actual values in .env.testdata before running tests`
1448
581
  }
1449
582
  ],
1450
- requiredSubagents: ["test-runner"]
583
+ requiredSubagents: ["test-runner"],
584
+ optionalSubagents: ["documentation-researcher", "team-communicator"],
585
+ dependentTasks: []
1451
586
  };
1452
587
 
1453
588
  // src/tasks/library/handle-message.ts
@@ -1459,27 +594,30 @@ var handleMessageTask = {
1459
594
  description: "Handle team responses and Slack communications, maintaining context for ongoing conversations",
1460
595
  "argument-hint": "[slack thread context or team message]"
1461
596
  },
1462
- baseContent: `# Handle Message Command
1463
-
1464
- ## SECURITY NOTICE
1465
- **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
1466
- - **Read \`.env.testdata\`** for non-secret environment variables (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
1467
- - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
1468
- - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
1469
- - The \`.env\` file access is blocked by settings.json
1470
-
1471
- Process team responses from Slack threads and handle multi-turn conversations with the product team about testing clarifications, ambiguities, and questions.
1472
-
1473
- ## Arguments
1474
- Team message/thread context: $ARGUMENTS
1475
-
1476
- ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
1477
-
1478
- ## Process
1479
-
1480
- ### Step 0: Detect Message Intent and Load Handler
597
+ steps: [
598
+ // Step 1: Overview (inline)
599
+ {
600
+ inline: true,
601
+ title: "Handle Message Overview",
602
+ content: `# Handle Message Command
1481
603
 
1482
- Before processing the message, identify the intent type to load the appropriate handler.
604
+ Process team responses from Slack threads and handle multi-turn conversations with the product team about testing clarifications, ambiguities, and questions.`
605
+ },
606
+ // Step 2: Security Notice (library)
607
+ "security-notice",
608
+ // Step 3: Arguments (inline)
609
+ {
610
+ inline: true,
611
+ title: "Arguments",
612
+ content: `Team message/thread context: $ARGUMENTS`
613
+ },
614
+ // Step 4: Knowledge Base Read (library)
615
+ "read-knowledge-base",
616
+ // Step 5: Detect Intent (inline - task-specific)
617
+ {
618
+ inline: true,
619
+ title: "Detect Message Intent and Load Handler",
620
+ content: `Before processing the message, identify the intent type to load the appropriate handler.
1483
621
 
1484
622
  #### 0.1 Extract Intent from Event Payload
1485
623
 
@@ -1522,11 +660,17 @@ The handler file contains all necessary processing logic for the detected intent
1522
660
  - Specific processing steps for that intent
1523
661
  - Context loading requirements
1524
662
  - Response guidelines
1525
- - Memory update instructions
1526
-
1527
- ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
1528
-
1529
- ## Key Principles
663
+ - Memory update instructions`
664
+ },
665
+ // Step 6: Clarification Protocol (for ambiguous intents)
666
+ "clarification-protocol",
667
+ // Step 8: Knowledge Base Update (library)
668
+ "update-knowledge-base",
669
+ // Step 9: Key Principles (inline)
670
+ {
671
+ inline: true,
672
+ title: "Key Principles",
673
+ content: `## Key Principles
1530
674
 
1531
675
  ### Context Preservation
1532
676
  - Always maintain full conversation context
@@ -1546,9 +690,13 @@ ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
1546
690
  ### Quality Communication
1547
691
  - Acknowledge team input appropriately
1548
692
  - Provide updates on actions taken
1549
- - Ask good follow-up questions when needed
1550
-
1551
- ## Important Considerations
693
+ - Ask good follow-up questions when needed`
694
+ },
695
+ // Step 10: Important Considerations (inline)
696
+ {
697
+ inline: true,
698
+ title: "Important Considerations",
699
+ content: `## Important Considerations
1552
700
 
1553
701
  ### Thread Organization
1554
702
  - Keep related discussions in same thread
@@ -1568,9 +716,12 @@ ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
1568
716
  ### Memory Maintenance
1569
717
  - Keep active conversations visible and current
1570
718
  - Archive resolved discussions appropriately
1571
- - Maintain searchable history of resolutions`,
719
+ - Maintain searchable history of resolutions`
720
+ }
721
+ ],
722
+ requiredSubagents: ["team-communicator"],
1572
723
  optionalSubagents: [],
1573
- requiredSubagents: ["team-communicator"]
724
+ dependentTasks: []
1574
725
  };
1575
726
 
1576
727
  // src/tasks/library/process-event.ts
@@ -1582,42 +733,47 @@ var processEventTask = {
1582
733
  description: "Process external system events (Jira, GitHub, Linear) using handler-defined rules to extract insights and track issues",
1583
734
  "argument-hint": "[event payload or description]"
1584
735
  },
1585
- baseContent: `# Process Event Command
1586
-
1587
- ## SECURITY NOTICE
1588
- **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
1589
- - **Read \`.env.testdata\`** for non-secret environment variables (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
1590
- - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
1591
- - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
1592
- - The \`.env\` file access is blocked by settings.json
1593
-
1594
- Process various types of events using intelligent pattern matching and historical context to maintain and evolve the testing system.
1595
-
1596
- ## Arguments
1597
- Arguments: $ARGUMENTS
1598
-
1599
- ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
1600
-
1601
- ## Process
1602
-
1603
- ### Step 1: Understand Event Context
736
+ steps: [
737
+ // Step 1: Overview (inline)
738
+ {
739
+ inline: true,
740
+ title: "Process Event Overview",
741
+ content: `# Process Event Command
1604
742
 
1605
- Events come from integrated external systems via webhooks or manual input. Common sources include:
743
+ Process various types of events using intelligent pattern matching and historical context to maintain and evolve the testing system.`
744
+ },
745
+ // Step 2: Security Notice (library)
746
+ "security-notice",
747
+ // Step 3: Arguments (inline)
748
+ {
749
+ inline: true,
750
+ title: "Arguments",
751
+ content: `Arguments: $ARGUMENTS`
752
+ },
753
+ // Step 4: Knowledge Base Read (library)
754
+ "read-knowledge-base",
755
+ // Step 5: Understand Event Context (inline)
756
+ {
757
+ inline: true,
758
+ title: "Understand Event Context",
759
+ content: `Events come from integrated external systems via webhooks or manual input. Common sources include:
1606
760
  - **Issue Trackers**: Jira, Linear, GitHub Issues
1607
761
  - **Source Control**: GitHub, GitLab
1608
762
  - **Communication Tools**: Slack
1609
763
 
1610
- **Event structure and semantics vary by source.** Do not interpret events based on generic assumptions. Instead, load the appropriate handler file (Step 2.4) for system-specific processing rules.
764
+ **Event structure and semantics vary by source.** Do not interpret events based on generic assumptions. Instead, load the appropriate handler file for system-specific processing rules.
1611
765
 
1612
766
  #### Event Context to Extract:
1613
767
  - **What happened**: The core event (test failed, PR merged, etc.)
1614
768
  - **Where**: Component, service, or area affected
1615
769
  - **Impact**: How this affects testing strategy
1616
- - **Action Required**: What needs to be done in response
1617
-
1618
- ### Step 1.5: Clarify Unclear Events
1619
-
1620
- If the event information is incomplete or ambiguous, seek clarification before processing:
770
+ - **Action Required**: What needs to be done in response`
771
+ },
772
+ // Step 6: Clarify Unclear Events (inline - task-specific)
773
+ {
774
+ inline: true,
775
+ title: "Clarify Unclear Events",
776
+ content: `If the event information is incomplete or ambiguous, seek clarification before processing:
1621
777
 
1622
778
  #### Detect Unclear Events
1623
779
 
@@ -1700,9 +856,13 @@ In event history, record:
1700
856
  - **Assumption made**: If proceeded with assumption
1701
857
  - **Resolution**: How ambiguity was resolved
1702
858
 
1703
- This ensures future similar events can reference past clarifications and avoid redundant questions.
1704
-
1705
- ### Step 2: Load Context and Memory
859
+ This ensures future similar events can reference past clarifications and avoid redundant questions.`
860
+ },
861
+ // Step 7: Load Context and Memory (inline)
862
+ {
863
+ inline: true,
864
+ title: "Load Context and Memory",
865
+ content: `### Step 2: Load Context and Memory
1706
866
 
1707
867
  #### 2.1 Check Event Processor Memory
1708
868
  Read \`.bugzy/runtime/memory/event-processor.md\` to:
@@ -1727,10 +887,10 @@ Read \`.bugzy/runtime/memory/event-history.md\` to:
1727
887
  Based on the event source, load the handler from \`.bugzy/runtime/handlers/\`:
1728
888
 
1729
889
  **Step 1: Detect Event Source from Payload:**
1730
- - \`com.jira-server.*\` event type prefix \u2192 \`.bugzy/runtime/handlers/jira.md\`
1731
- - \`github.*\` or GitHub webhook structure \u2192 \`.bugzy/runtime/handlers/github.md\`
1732
- - \`linear.*\` or Linear webhook \u2192 \`.bugzy/runtime/handlers/linear.md\`
1733
- - Other sources \u2192 Check for matching handler file by source name
890
+ - \`com.jira-server.*\` event type prefix -> \`.bugzy/runtime/handlers/jira.md\`
891
+ - \`github.*\` or GitHub webhook structure -> \`.bugzy/runtime/handlers/github.md\`
892
+ - \`linear.*\` or Linear webhook -> \`.bugzy/runtime/handlers/linear.md\`
893
+ - Other sources -> Check for matching handler file by source name
1734
894
 
1735
895
  **Step 2: Load and Read the Handler File:**
1736
896
  The handler file contains system-specific instructions for:
@@ -1756,9 +916,13 @@ Do NOT guess or apply generic logic. Instead:
1756
916
  Handlers reference \`.bugzy/runtime/project-context.md\` for project-specific rules like:
1757
917
  - Which status transitions trigger verify-changes
1758
918
  - Which resolutions should update the knowledge base
1759
- - Which transitions to ignore
1760
-
1761
- ### Step 3: Intelligent Event Analysis
919
+ - Which transitions to ignore`
920
+ },
921
+ // Step 8: Intelligent Event Analysis (inline)
922
+ {
923
+ inline: true,
924
+ title: "Intelligent Event Analysis",
925
+ content: `### Step 3: Intelligent Event Analysis
1762
926
 
1763
927
  #### 3.1 Contextual Pattern Analysis
1764
928
  Don't just match patterns - analyze the event within the full context:
@@ -1789,11 +953,28 @@ Based on event type and content, generate 3-5 specific search queries:
1789
953
  - Search for similar past events
1790
954
  - Look for related test cases
1791
955
  - Find relevant documentation
1792
- - Check for known issues
1793
-
1794
- {{DOCUMENTATION_RESEARCHER_INSTRUCTIONS}}
1795
-
1796
- ### Step 4: Task Planning with Reasoning
956
+ - Check for known issues`
957
+ },
958
+ // Step 9: Documentation Research (conditional inline)
959
+ {
960
+ inline: true,
961
+ title: "Use Documentation Researcher",
962
+ content: `#### 3.3 Use Documentation Researcher if Needed
963
+
964
+ {{INVOKE_DOCUMENTATION_RESEARCHER}} to find information about unknown features or components:
965
+
966
+ For events mentioning unknown features or components, ask the agent to explore project documentation and return:
967
+ - Feature specifications
968
+ - Related test cases
969
+ - Known issues or limitations
970
+ - Component dependencies`,
971
+ conditionalOnSubagent: "documentation-researcher"
972
+ },
973
+ // Step 10: Task Planning (inline)
974
+ {
975
+ inline: true,
976
+ title: "Task Planning with Reasoning",
977
+ content: `### Step 4: Task Planning with Reasoning
1797
978
 
1798
979
  Generate tasks based on event analysis, using examples from memory as reference.
1799
980
 
@@ -1809,11 +990,11 @@ Analyze the event in context of ALL available information to decide what actions
1809
990
 
1810
991
  **Contextual Decision Making**:
1811
992
  The same event type can require different actions based on context:
1812
- - If handler says this status triggers verification \u2192 Invoke /verify-changes
1813
- - If this issue was already processed (check event history) \u2192 Skip to avoid duplicates
1814
- - If related PR exists in knowledge base \u2192 Include PR context in actions
1815
- - If this is a recurring pattern from the same source \u2192 Consider flagging for review
1816
- - If handler has no rule for this event type \u2192 Ask user for guidance
993
+ - If handler says this status triggers verification -> Invoke /verify-changes
994
+ - If this issue was already processed (check event history) -> Skip to avoid duplicates
995
+ - If related PR exists in knowledge base -> Include PR context in actions
996
+ - If this is a recurring pattern from the same source -> Consider flagging for review
997
+ - If handler has no rule for this event type -> Ask user for guidance
1817
998
 
1818
999
  **Dynamic Task Selection**:
1819
1000
  Based on the contextual analysis, decide which tasks make sense:
@@ -1831,15 +1012,40 @@ For each task, document WHY it's being executed:
1831
1012
  Task: extract_learning
1832
1013
  Reasoning: This event reveals a pattern of login failures on Chrome that wasn't previously documented
1833
1014
  Data: "Chrome-specific timeout issues with login button"
1834
- \`\`\`
1015
+ \`\`\``
1016
+ },
1017
+ // Step 11: Issue Tracking (conditional inline)
1018
+ {
1019
+ inline: true,
1020
+ title: "Issue Tracking",
1021
+ content: `##### For Issue Tracking:
1835
1022
 
1836
- ### Step 5: Execute Tasks with Memory Updates
1023
+ When an issue needs to be tracked (task type: report_bug or update_story):
1837
1024
 
1838
- #### 5.1 Execute Each Task
1025
+ {{INVOKE_ISSUE_TRACKER}}
1026
+
1027
+ 1. Check for duplicate issues in the tracking system
1028
+ 2. For bugs: Create detailed bug report with:
1029
+ - Clear, descriptive title
1030
+ - Detailed description with context
1031
+ - Step-by-step reproduction instructions
1032
+ - Expected vs actual behavior
1033
+ - Environment and configuration details
1034
+ - Test case reference (if applicable)
1035
+ - Screenshots or error logs
1036
+ 3. For stories: Update status and add QA comments
1037
+ 4. Track issue lifecycle and maintain categorization
1839
1038
 
1840
- {{ISSUE_TRACKER_INSTRUCTIONS}}
1039
+ The issue-tracker agent will handle all aspects of issue tracking including duplicate detection, story management, QA workflow transitions, and integration with your project management system (Jira, Linear, Notion, etc.).`,
1040
+ conditionalOnSubagent: "issue-tracker"
1041
+ },
1042
+ // Step 12: Execute Tasks (inline)
1043
+ {
1044
+ inline: true,
1045
+ title: "Execute Tasks with Memory Updates",
1046
+ content: `### Step 5: Execute Tasks with Memory Updates
1841
1047
 
1842
- ##### For Other Tasks:
1048
+ #### 5.1 Execute Each Task
1843
1049
  Follow the standard execution logic with added context from memory.
1844
1050
 
1845
1051
  #### 5.2 Update Event Processor Memory
@@ -1876,9 +1082,13 @@ source: [source]
1876
1082
  **Outcome**: [Success/Partial/Failed]
1877
1083
  **Notes**: [Any additional context]
1878
1084
  ---
1879
- \`\`\`
1880
-
1881
- ### Step 6: Learning from Events
1085
+ \`\`\``
1086
+ },
1087
+ // Step 13: Learning and Maintenance (inline)
1088
+ {
1089
+ inline: true,
1090
+ title: "Learning from Events",
1091
+ content: `### Step 6: Learning from Events
1882
1092
 
1883
1093
  After processing, check if this event teaches us something new:
1884
1094
  1. Is this a new type of event we haven't seen?
@@ -1898,9 +1108,15 @@ mkdir -p ./test-cases .claude/memory
1898
1108
  Create files if they don't exist:
1899
1109
  - \`.bugzy/runtime/knowledge-base.md\`
1900
1110
  - \`.bugzy/runtime/memory/event-processor.md\`
1901
- - \`.bugzy/runtime/memory/event-history.md\`
1902
-
1903
- ## Important Considerations
1111
+ - \`.bugzy/runtime/memory/event-history.md\``
1112
+ },
1113
+ // Step 14: Knowledge Base Update (library)
1114
+ "update-knowledge-base",
1115
+ // Step 15: Important Considerations (inline)
1116
+ {
1117
+ inline: true,
1118
+ title: "Important Considerations",
1119
+ content: `## Important Considerations
1904
1120
 
1905
1121
  ### Contextual Intelligence
1906
1122
  - Never process events in isolation - always consider full context
@@ -1924,42 +1140,11 @@ Create files if they don't exist:
1924
1140
  - Each event adds to our understanding of the system
1925
1141
  - Update patterns when new correlations are discovered
1926
1142
  - Refine decision rules based on outcomes
1927
- - Build institutional memory through event history
1928
-
1929
- ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}`,
1930
- optionalSubagents: [
1931
- {
1932
- role: "documentation-researcher",
1933
- contentBlock: `#### 3.3 Use Documentation Researcher if Needed
1934
- For events mentioning unknown features or components:
1935
- \`\`\`
1936
- {{INVOKE_DOCUMENTATION_RESEARCHER}} to find information about: [component/feature]
1937
- \`\`\``
1938
- },
1939
- {
1940
- role: "issue-tracker",
1941
- contentBlock: `##### For Issue Tracking:
1942
-
1943
- When an issue needs to be tracked (task type: report_bug or update_story):
1944
- \`\`\`
1945
- {{INVOKE_ISSUE_TRACKER}}
1946
- 1. Check for duplicate issues in the tracking system
1947
- 2. For bugs: Create detailed bug report with:
1948
- - Clear, descriptive title
1949
- - Detailed description with context
1950
- - Step-by-step reproduction instructions
1951
- - Expected vs actual behavior
1952
- - Environment and configuration details
1953
- - Test case reference (if applicable)
1954
- - Screenshots or error logs
1955
- 3. For stories: Update status and add QA comments
1956
- 4. Track issue lifecycle and maintain categorization
1957
- \`\`\`
1958
-
1959
- The issue-tracker agent will handle all aspects of issue tracking including duplicate detection, story management, QA workflow transitions, and integration with your project management system (Jira, Linear, Notion, etc.).`
1143
+ - Build institutional memory through event history`
1960
1144
  }
1961
1145
  ],
1962
- requiredSubagents: [],
1146
+ requiredSubagents: ["team-communicator"],
1147
+ optionalSubagents: ["documentation-researcher", "issue-tracker"],
1963
1148
  dependentTasks: ["verify-changes"]
1964
1149
  };
1965
1150
 
@@ -1972,49 +1157,40 @@ var runTestsTask = {
1972
1157
  description: "Execute automated Playwright tests, analyze failures, and fix test issues automatically",
1973
1158
  "argument-hint": '[file-pattern|tag|all] (e.g., "auth", "@smoke", "tests/specs/login.spec.ts")'
1974
1159
  },
1975
- baseContent: `# Run Tests Command
1976
-
1977
- ## SECURITY NOTICE
1978
- **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
1979
- - **Read \`.env.testdata\`** for non-secret environment variables (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
1980
- - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
1981
- - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
1982
- - The \`.env\` file access is blocked by settings.json
1983
-
1984
- Execute automated Playwright tests, analyze failures using JSON reports, automatically fix test issues, and log product bugs.
1160
+ steps: [
1161
+ // Step 1: Overview (inline)
1162
+ {
1163
+ inline: true,
1164
+ title: "Run Tests Overview",
1165
+ content: `# Run Tests Command
1985
1166
 
1986
- ## Arguments
1987
- Arguments: $ARGUMENTS
1167
+ Execute automated Playwright tests, analyze failures using JSON reports, automatically fix test issues, and log product bugs.`
1168
+ },
1169
+ // Step 2: Security Notice (library)
1170
+ "security-notice",
1171
+ // Step 3: Arguments (inline)
1172
+ {
1173
+ inline: true,
1174
+ title: "Arguments",
1175
+ content: `Arguments: $ARGUMENTS
1988
1176
 
1989
- ## Parse Arguments
1177
+ **Parse Arguments:**
1990
1178
  Extract the following from arguments:
1991
1179
  - **selector**: Test selection criteria
1992
1180
  - File pattern: "auth" \u2192 finds tests/specs/**/*auth*.spec.ts
1993
1181
  - Tag: "@smoke" \u2192 runs tests with @smoke annotation
1994
1182
  - Specific file: "tests/specs/login.spec.ts"
1995
- - All tests: "all" or "" \u2192 runs entire test suite
1996
-
1997
- ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
1998
-
1999
- ## Test Execution Strategy
2000
-
2001
- **IMPORTANT**: Before selecting tests, read \`.bugzy/runtime/test-execution-strategy.md\` to understand:
2002
- - Available test tiers (Smoke, Component, Full Regression)
2003
- - When to use each tier (commit, PR, release, debug)
2004
- - Default behavior (default to @smoke unless user specifies otherwise)
2005
- - How to interpret user intent from context keywords
2006
- - Time/coverage trade-offs
2007
- - Tag taxonomy
2008
-
2009
- Apply the strategy guidance when determining which tests to run.
2010
-
2011
- ## Process
2012
-
2013
- **First**, consult \`.bugzy/runtime/test-execution-strategy.md\` decision tree to determine appropriate test tier based on user's selector and context.
2014
-
2015
- ### Step 1: Identify Automated Tests to Run
2016
-
2017
- #### 1.1 Understand Test Selection
1183
+ - All tests: "all" or "" \u2192 runs entire test suite`
1184
+ },
1185
+ // Step 4: Knowledge Base Read (library)
1186
+ "read-knowledge-base",
1187
+ // Step 5: Test Execution Strategy (library)
1188
+ "read-test-strategy",
1189
+ // Step 6: Identify Tests (inline - task-specific)
1190
+ {
1191
+ inline: true,
1192
+ title: "Identify Automated Tests to Run",
1193
+ content: `#### Understand Test Selection
2018
1194
  Parse the selector argument to determine which tests to run:
2019
1195
 
2020
1196
  **File Pattern** (e.g., "auth", "login"):
@@ -2031,7 +1207,7 @@ Parse the selector argument to determine which tests to run:
2031
1207
  **All Tests** ("all" or no selector):
2032
1208
  - Run entire test suite: \`tests/specs/**/*.spec.ts\`
2033
1209
 
2034
- #### 1.2 Find Matching Test Files
1210
+ #### Find Matching Test Files
2035
1211
  Use glob patterns to find test files:
2036
1212
  \`\`\`bash
2037
1213
  # For file pattern
@@ -2044,211 +1220,39 @@ ls tests/specs/auth/login.spec.ts
2044
1220
  ls tests/specs/**/*.spec.ts
2045
1221
  \`\`\`
2046
1222
 
2047
- #### 1.3 Validate Test Files Exist
1223
+ #### Validate Test Files Exist
2048
1224
  Check that at least one test file was found:
2049
1225
  - If no tests found, inform user and suggest available tests
2050
1226
  - List available test files if selection was unclear
2051
1227
 
2052
- ### Step 2: Execute Automated Playwright Tests
2053
-
2054
- #### 2.1 Build Playwright Command
2055
- Construct the Playwright test command based on the selector:
2056
-
2057
- **For file pattern or specific file**:
2058
- \`\`\`bash
2059
- npx playwright test [selector]
2060
- \`\`\`
2061
-
2062
- **For tag**:
2063
- \`\`\`bash
2064
- npx playwright test --grep "[tag]"
2065
- \`\`\`
2066
-
2067
- **For all tests**:
2068
- \`\`\`bash
2069
- npx playwright test
2070
- \`\`\`
2071
-
2072
- **Output**: Custom Bugzy reporter will create hierarchical test-runs/YYYYMMDD-HHMMSS/ structure with manifest.json
2073
-
2074
- #### 2.2 Execute Tests via Bash
2075
- Run the Playwright command:
2076
- \`\`\`bash
2077
- npx playwright test [selector]
2078
- \`\`\`
2079
-
2080
- Wait for execution to complete. This may take several minutes depending on test count.
2081
-
2082
- **Note**: The custom Bugzy reporter will automatically:
2083
- - Generate timestamp in YYYYMMDD-HHMMSS format
2084
- - Create test-runs/{timestamp}/ directory structure
2085
- - Record execution-id.txt with BUGZY_EXECUTION_ID
2086
- - Save results per test case in TC-{id}/exec-1/ folders
2087
- - Generate manifest.json with complete execution summary
2088
-
2089
- #### 2.3 Locate and Read Test Results
2090
- After execution completes, find and read the manifest:
2091
-
2092
- 1. Find the test run directory (most recent):
2093
- \`\`\`bash
2094
- ls -t test-runs/ | head -1
2095
- \`\`\`
2096
-
2097
- 2. Read the manifest.json file:
2098
- \`\`\`bash
2099
- cat test-runs/[timestamp]/manifest.json
2100
- \`\`\`
2101
-
2102
- 3. Store the timestamp for use in test-debugger-fixer if needed
2103
-
2104
- ### Step 3: Analyze Test Results from Manifest
2105
-
2106
- #### 3.1 Parse Manifest
2107
- The Bugzy custom reporter produces structured output in manifest.json:
2108
- \`\`\`json
2109
- {
2110
- "bugzyExecutionId": "70a59676-cfd0-4ffd-b8ad-69ceff25c31d",
2111
- "timestamp": "20251115-123456",
2112
- "startTime": "2025-11-15T12:34:56.789Z",
2113
- "endTime": "2025-11-15T12:45:23.456Z",
2114
- "status": "completed",
2115
- "stats": {
2116
- "totalTests": 10,
2117
- "passed": 8,
2118
- "failed": 2,
2119
- "totalExecutions": 10
2120
- },
2121
- "testCases": [
1228
+ #### Confirm Selection Before Execution
1229
+ Before running tests, confirm the selection with the user if ambiguous:
1230
+ - **Clear selection** (specific file or tag): Proceed immediately
1231
+ - **Pattern match** (multiple files): List matching files and ask for confirmation if count > 5
1232
+ - **No selector** (all tests): Confirm running full suite before executing`
1233
+ },
1234
+ // Step 7-10: Test Execution (library steps)
1235
+ "run-playwright-tests",
1236
+ "parse-test-results",
1237
+ "triage-failures",
1238
+ "fix-test-issues",
1239
+ // Step 11: Log Product Bugs (conditional - library step)
2122
1240
  {
2123
- "id": "TC-001-login",
2124
- "name": "Login functionality",
2125
- "totalExecutions": 1,
2126
- "finalStatus": "passed",
2127
- "executions": [
2128
- {
2129
- "number": 1,
2130
- "status": "passed",
2131
- "duration": 1234,
2132
- "videoFile": "video.webm",
2133
- "hasTrace": false,
2134
- "hasScreenshots": false,
2135
- "error": null
2136
- }
2137
- ]
1241
+ stepId: "log-product-bugs",
1242
+ conditionalOnSubagent: "issue-tracker"
2138
1243
  },
1244
+ // Step 12: Knowledge Base Update (library)
1245
+ "update-knowledge-base",
1246
+ // Step 13: Team Communication (conditional - library step)
2139
1247
  {
2140
- "id": "TC-002-invalid-credentials",
2141
- "name": "Invalid credentials error",
2142
- "totalExecutions": 1,
2143
- "finalStatus": "failed",
2144
- "executions": [
2145
- {
2146
- "number": 1,
2147
- "status": "failed",
2148
- "duration": 2345,
2149
- "videoFile": "video.webm",
2150
- "hasTrace": true,
2151
- "hasScreenshots": true,
2152
- "error": "expect(locator).toBeVisible()..."
2153
- }
2154
- ]
2155
- }
2156
- ]
2157
- }
2158
- \`\`\`
2159
-
2160
- #### 3.2 Extract Test Results
2161
- From the manifest, extract:
2162
- - **Total tests**: stats.totalTests
2163
- - **Passed tests**: stats.passed
2164
- - **Failed tests**: stats.failed
2165
- - **Total executions**: stats.totalExecutions (includes re-runs)
2166
- - **Duration**: Calculate from startTime and endTime
2167
-
2168
- For each failed test, collect from testCases array:
2169
- - Test ID (id field)
2170
- - Test name (name field)
2171
- - Final status (finalStatus field)
2172
- - Latest execution details:
2173
- - Error message (executions[last].error)
2174
- - Duration (executions[last].duration)
2175
- - Video file location (test-runs/{timestamp}/{id}/exec-{num}/{videoFile})
2176
- - Trace availability (executions[last].hasTrace)
2177
- - Screenshots availability (executions[last].hasScreenshots)
2178
-
2179
- #### 3.3 Generate Summary Statistics
2180
- \`\`\`markdown
2181
- ## Test Execution Summary
2182
- - Total Tests: [count]
2183
- - Passed: [count] ([percentage]%)
2184
- - Failed: [count] ([percentage]%)
2185
- - Skipped: [count] ([percentage]%)
2186
- - Total Duration: [time]
2187
- \`\`\`
2188
-
2189
- ### Step 5: Triage Failed Tests
2190
-
2191
- After analyzing test results, triage each failure to determine if it's a product bug or test issue:
2192
-
2193
- #### 5.1 Triage Failed Tests FIRST
2194
-
2195
- **\u26A0\uFE0F IMPORTANT: Do NOT report bugs without triaging first.**
2196
-
2197
- For each failed test:
2198
-
2199
- 1. **Read failure details** from JSON report (error message, stack trace)
2200
- 2. **Classify the failure:**
2201
- - **Product bug**: Application behaves incorrectly
2202
- - **Test issue**: Test code needs fixing (selector, timing, assertion)
2203
- 3. **Document classification** for next steps
2204
-
2205
- **Classification Guidelines:**
2206
- - **Product Bug**: Correct test code, unexpected application behavior
2207
- - **Test Issue**: Selector not found, timeout, race condition, wrong assertion
2208
-
2209
- #### 5.2 Fix Test Issues Automatically
2210
-
2211
- For each test classified as **[TEST ISSUE]**, use the test-debugger-fixer agent to automatically fix the test:
2212
-
2213
- \`\`\`
2214
- {{INVOKE_TEST_DEBUGGER_FIXER}}
2215
-
2216
- For each failed test classified as a test issue (not a product bug), provide:
2217
- - Test run timestamp: [from manifest.timestamp]
2218
- - Test case ID: [from testCases[].id in manifest]
2219
- - Test name/title: [from testCases[].name in manifest]
2220
- - Error message: [from testCases[].executions[last].error]
2221
- - Execution details path: test-runs/{timestamp}/{testCaseId}/exec-1/
2222
-
2223
- The agent will:
2224
- 1. Read the execution details from result.json
2225
- 2. Analyze the failure (error message, trace if available)
2226
- 3. Identify the root cause (brittle selector, missing wait, race condition, etc.)
2227
- 4. Apply appropriate fix to the test code
2228
- 5. Rerun the test
2229
- 6. The custom reporter will automatically create the next exec-N/ folder
2230
- 7. Repeat up to 3 times if needed (exec-1, exec-2, exec-3)
2231
- 8. Report success or escalate as likely product bug
2232
-
2233
- After test-debugger-fixer completes:
2234
- - If fix succeeded: Mark test as fixed, add to "Tests Fixed" list
2235
- - If still failing after 3 attempts: Reclassify as potential product bug for Step 5.3
2236
- \`\`\`
2237
-
2238
- **Track Fixed Tests:**
2239
- - Maintain list of tests fixed automatically
2240
- - Include fix description (e.g., "Updated selector from CSS to role-based")
2241
- - Note verification status (test now passes)
2242
-
2243
- {{ISSUE_TRACKER_INSTRUCTIONS}}
2244
-
2245
- ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
2246
-
2247
- {{TEAM_COMMUNICATOR_INSTRUCTIONS}}
2248
-
2249
- ### Step 6: Handle Special Cases
2250
-
2251
- #### If No Test Cases Found
1248
+ stepId: "notify-team",
1249
+ conditionalOnSubagent: "team-communicator"
1250
+ },
1251
+ // Step 14: Handle Special Cases (inline - task-specific)
1252
+ {
1253
+ inline: true,
1254
+ title: "Handle Special Cases",
1255
+ content: `#### If No Test Cases Found
2252
1256
  If no test cases match the selection criteria:
2253
1257
  1. Inform user that no matching test cases were found
2254
1258
  2. List available test cases or suggest running \`/generate-test-cases\` first
@@ -2291,128 +1295,12 @@ If selected test cases have formatting issues:
2291
1295
 
2292
1296
  **Related Documentation**:
2293
1297
  - \`.bugzy/runtime/test-execution-strategy.md\` - When and why to run specific tests
2294
- - \`.bugzy/runtime/testing-best-practices.md\` - How to write tests (patterns and anti-patterns)
2295
-
2296
- `,
2297
- optionalSubagents: [
2298
- {
2299
- role: "issue-tracker",
2300
- contentBlock: `
2301
-
2302
- #### 5.3 Log Product Bugs via Issue Tracker
2303
-
2304
- After triage in Step 5.1, for tests classified as **[PRODUCT BUG]**, use the issue-tracker agent to log bugs:
2305
-
2306
- For each bug to report, use the issue-tracker agent:
2307
-
2308
- \`\`\`
2309
- {{INVOKE_ISSUE_TRACKER}}
2310
- 1. Check for duplicate bugs in the tracking system
2311
- - The agent will automatically search for similar existing issues
2312
- - It maintains memory of recently reported issues
2313
- - Duplicate detection happens automatically - don't create manual checks
2314
-
2315
- 2. For each new bug (non-duplicate):
2316
- Create detailed bug report with:
2317
- - **Title**: Clear, descriptive summary (e.g., "Login button fails with timeout on checkout page")
2318
- - **Description**:
2319
- - What happened vs. what was expected
2320
- - Impact on users
2321
- - Test reference: [file path] \u203A [test title]
2322
- - **Reproduction Steps**:
2323
- - List steps from the failing test
2324
- - Include specific test data used
2325
- - Note any setup requirements from test file
2326
- - **Test Execution Details**:
2327
- - Test file: [file path from JSON report]
2328
- - Test name: [test title from JSON report]
2329
- - Error message: [from JSON report]
2330
- - Stack trace: [from JSON report]
2331
- - Trace file: [path if available]
2332
- - Screenshots: [paths if available]
2333
- - **Environment Details**:
2334
- - Browser and version (from Playwright config)
2335
- - Test environment URL (from .env.testdata BASE_URL)
2336
- - Timestamp of failure
2337
- - **Severity/Priority**: Based on:
2338
- - Test type (smoke tests = high priority)
2339
- - User impact
2340
- - Frequency (always fails vs flaky)
2341
- - **Additional Context**:
2342
- - Error messages or stack traces from JSON report
2343
- - Related test files (if part of test suite)
2344
- - Relevant knowledge from knowledge-base.md
2345
-
2346
- 3. Track created issues:
2347
- - Note the issue ID/number returned
2348
- - Update issue tracker memory with new bugs
2349
- - Prepare issue references for team communication
2350
- \`\`\`
2351
-
2352
- #### 6.3 Summary of Bug Reporting
2353
-
2354
- After issue tracker agent completes, create a summary:
2355
- \`\`\`markdown
2356
- ### Bug Reporting Summary
2357
- - Total bugs found: [count of FAIL tests]
2358
- - New bugs reported: [count of newly created issues]
2359
- - Duplicate bugs found: [count of duplicates detected]
2360
- - Issues not reported: [count of skipped/known issues]
2361
-
2362
- **New Bug Reports**:
2363
- - [Issue ID]: [Bug title] (Test: TC-XXX, Priority: [priority])
2364
- - [Issue ID]: [Bug title] (Test: TC-YYY, Priority: [priority])
2365
-
2366
- **Duplicate Bugs** (already tracked):
2367
- - [Existing Issue ID]: [Bug title] (Matches test: TC-XXX)
2368
-
2369
- **Not Reported** (skipped or known):
2370
- - TC-XXX: Skipped due to blocker failure
2371
- - TC-YYY: Known issue documented in knowledge base
2372
- \`\`\`
2373
-
2374
- **Note**: The issue tracker agent handles all duplicate detection and system integration automatically. Simply provide the bug details and let it manage the rest.`
2375
- },
2376
- {
2377
- role: "team-communicator",
2378
- contentBlock: `### Step 6: Team Communication
2379
-
2380
- {{INVOKE_TEAM_COMMUNICATOR}}
2381
-
2382
- Notify the product team about test execution:
2383
-
2384
- \`\`\`
2385
- 1. Post test execution summary with key statistics
2386
- 2. Highlight critical failures that need immediate attention
2387
- 3. Share important learnings about product behavior
2388
- 4. Report any potential bugs discovered during testing
2389
- 5. Ask for clarification on unexpected behaviors
2390
- 6. Provide recommendations for areas needing investigation
2391
- 7. Use appropriate urgency level based on failure severity
2392
- \`\`\`
2393
-
2394
- The team communication should include:
2395
- - **Execution summary**: Overall pass/fail statistics and timing
2396
- - **Critical issues**: High-priority failures that need immediate attention
2397
- - **Key learnings**: Important discoveries about product behavior
2398
- - **Potential bugs**: Issues that may require bug reports
2399
- - **Clarifications needed**: Unexpected behaviors requiring team input
2400
- - **Recommendations**: Suggested follow-up actions
2401
-
2402
- **Communication strategy based on results**:
2403
- - **All tests passed**: Brief positive update, highlight learnings
2404
- - **Minor failures**: Standard update with failure details and plans
2405
- - **Critical failures**: Urgent notification with detailed analysis
2406
- - **New discoveries**: Separate message highlighting interesting findings
2407
-
2408
- **Update team communicator memory**:
2409
- - Record test execution communication
2410
- - Track team response patterns to test results
2411
- - Document any clarifications provided by the team
2412
- - Note team priorities based on their responses`
1298
+ - \`.bugzy/runtime/testing-best-practices.md\` - How to write tests (patterns and anti-patterns)`
2413
1299
  }
2414
1300
  ],
2415
- requiredSubagents: ["test-runner", "test-debugger-fixer"]
1301
+ requiredSubagents: ["test-runner", "test-debugger-fixer"],
1302
+ optionalSubagents: ["issue-tracker", "team-communicator"],
1303
+ dependentTasks: []
2416
1304
  };
2417
1305
 
2418
1306
  // src/tasks/library/verify-changes.ts
@@ -2424,14 +1312,12 @@ var verifyChangesTask = {
2424
1312
  description: "Verify code changes with automated tests and manual verification checklists",
2425
1313
  "argument-hint": "[trigger-auto-detected]"
2426
1314
  },
2427
- baseContent: `# Verify Changes - Unified Multi-Trigger Workflow
2428
-
2429
- ## SECURITY NOTICE
2430
- **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
2431
- - **Read \`.env.testdata\`** for non-secret environment variables (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
2432
- - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
2433
- - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
2434
- - The \`.env\` file access is blocked by settings.json
1315
+ steps: [
1316
+ // Step 1: Overview (inline)
1317
+ {
1318
+ inline: true,
1319
+ title: "Verify Changes Overview",
1320
+ content: `# Verify Changes - Unified Multi-Trigger Workflow
2435
1321
 
2436
1322
  ## Overview
2437
1323
 
@@ -2439,21 +1325,27 @@ This task performs comprehensive change verification with:
2439
1325
  - **Automated testing**: Execute Playwright tests with automatic triage and fixing
2440
1326
  - **Manual verification checklists**: Generate role-specific checklists for non-automatable scenarios
2441
1327
  - **Multi-trigger support**: Works from manual CLI, Slack messages, GitHub PRs, and CI/CD
2442
- - **Smart output routing**: Results formatted and delivered to the appropriate channel
2443
-
2444
- ## Arguments
2445
-
2446
- **Input**: $ARGUMENTS
2447
-
2448
- The input format determines the trigger source and context extraction strategy.
2449
-
2450
- ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
2451
-
2452
- ## Step 1: Detect Trigger Source
1328
+ - **Smart output routing**: Results formatted and delivered to the appropriate channel`
1329
+ },
1330
+ // Step 2: Security Notice (library)
1331
+ "security-notice",
1332
+ // Step 3: Arguments (inline)
1333
+ {
1334
+ inline: true,
1335
+ title: "Arguments",
1336
+ content: `**Input**: $ARGUMENTS
2453
1337
 
2454
- Analyze the input format to determine how this task was invoked:
1338
+ The input format determines the trigger source and context extraction strategy.`
1339
+ },
1340
+ // Step 4: Knowledge Base Read (library)
1341
+ "read-knowledge-base",
1342
+ // Step 5: Detect Trigger Source (inline)
1343
+ {
1344
+ inline: true,
1345
+ title: "Detect Trigger Source",
1346
+ content: `Analyze the input format to determine how this task was invoked:
2455
1347
 
2456
- ### 1.1 Identify Trigger Type
1348
+ ### Identify Trigger Type
2457
1349
 
2458
1350
  **GitHub PR Webhook:**
2459
1351
  - Input contains \`pull_request\` object with structure:
@@ -2470,7 +1362,7 @@ Analyze the input format to determine how this task was invoked:
2470
1362
  }
2471
1363
  }
2472
1364
  \`\`\`
2473
- \u2192 **Trigger detected: GITHUB_PR**
1365
+ -> **Trigger detected: GITHUB_PR**
2474
1366
 
2475
1367
  **Slack Event:**
2476
1368
  - Input contains \`event\` object with structure:
@@ -2487,7 +1379,7 @@ Analyze the input format to determine how this task was invoked:
2487
1379
  }
2488
1380
  }
2489
1381
  \`\`\`
2490
- \u2192 **Trigger detected: SLACK_MESSAGE**
1382
+ -> **Trigger detected: SLACK_MESSAGE**
2491
1383
 
2492
1384
  **CI/CD Environment:**
2493
1385
  - Environment variables present:
@@ -2497,24 +1389,26 @@ Analyze the input format to determine how this task was invoked:
2497
1389
  - \`GITHUB_BASE_REF\` (base branch)
2498
1390
  - \`GITHUB_HEAD_REF\` (head branch)
2499
1391
  - Git context available via bash commands
2500
- \u2192 **Trigger detected: CI_CD**
1392
+ -> **Trigger detected: CI_CD**
2501
1393
 
2502
1394
  **Manual Invocation:**
2503
1395
  - Input is natural language, URL, or issue identifier
2504
1396
  - Patterns: "PR #123", GitHub URL, "PROJ-456", feature description
2505
- \u2192 **Trigger detected: MANUAL**
2506
-
2507
- ### 1.2 Store Trigger Context
2508
-
2509
- Store the detected trigger for use in Step 6 (output routing):
2510
- - Set variable: \`TRIGGER_SOURCE\` = [GITHUB_PR | SLACK_MESSAGE | CI_CD | MANUAL]
2511
- - This determines output formatting and delivery channel
1397
+ -> **Trigger detected: MANUAL**
2512
1398
 
2513
- ## Step 2: Extract Context Based on Trigger
1399
+ ### Store Trigger Context
2514
1400
 
2515
- Based on the detected trigger source, extract relevant context:
1401
+ Store the detected trigger for use in output routing:
1402
+ - Set variable: \`TRIGGER_SOURCE\` = [GITHUB_PR | SLACK_MESSAGE | CI_CD | MANUAL]
1403
+ - This determines output formatting and delivery channel`
1404
+ },
1405
+ // Step 6: Extract Context (inline)
1406
+ {
1407
+ inline: true,
1408
+ title: "Extract Context Based on Trigger",
1409
+ content: `Based on the detected trigger source, extract relevant context:
2516
1410
 
2517
- ### 2.1 GitHub PR Trigger - Extract PR Details
1411
+ ### GitHub PR Trigger - Extract PR Details
2518
1412
 
2519
1413
  If trigger is GITHUB_PR:
2520
1414
  - **PR number**: \`pull_request.number\`
@@ -2525,9 +1419,7 @@ If trigger is GITHUB_PR:
2525
1419
  - **Base branch**: \`pull_request.base.ref\`
2526
1420
  - **Head branch**: \`pull_request.head.ref\`
2527
1421
 
2528
- Optional: Fetch additional details via GitHub API if needed (PR comments, reviews)
2529
-
2530
- ### 2.2 Slack Message Trigger - Parse Natural Language
1422
+ ### Slack Message Trigger - Parse Natural Language
2531
1423
 
2532
1424
  If trigger is SLACK_MESSAGE:
2533
1425
  - **Message text**: \`event.text\`
@@ -2542,30 +1434,24 @@ If trigger is SLACK_MESSAGE:
2542
1434
  - Feature names: Quoted terms, capitalized phrases
2543
1435
  - Environments: "staging", "production", "preview"
2544
1436
 
2545
- ### 2.3 CI/CD Trigger - Read CI Environment
1437
+ ### CI/CD Trigger - Read CI Environment
2546
1438
 
2547
1439
  If trigger is CI_CD:
2548
1440
  - **CI platform**: Read \`CI\` env var
2549
- - **Branch**: \`GITHUB_REF\` \u2192 extract branch name
1441
+ - **Branch**: \`GITHUB_REF\` -> extract branch name
2550
1442
  - **Commit**: \`GITHUB_SHA\`
2551
1443
  - **Base branch**: \`GITHUB_BASE_REF\` (for PRs)
2552
1444
  - **Changed files**: Run \`git diff --name-only $BASE_SHA...$HEAD_SHA\`
2553
1445
 
2554
- If in PR context, can also fetch PR number from CI env vars (e.g., \`GITHUB_EVENT_PATH\`)
2555
-
2556
- ### 2.4 Manual Trigger - Parse User Input
1446
+ ### Manual Trigger - Parse User Input
2557
1447
 
2558
1448
  If trigger is MANUAL:
2559
1449
  - **GitHub PR URL**: Parse to extract PR number, then fetch details via API
2560
- - Pattern: \`https://github.com/owner/repo/pull/123\`
2561
- - Extract: owner, repo, PR number
2562
- - Fetch: PR details, diff, comments
2563
- - **Issue identifier**: Extract issue ID
2564
- - Patterns: "PROJ-123", "#456", "BUG-789"
1450
+ - **Issue identifier**: Extract issue ID (patterns: "PROJ-123", "#456", "BUG-789")
2565
1451
  - **Feature description**: Use text as-is for verification context
2566
1452
  - **Deployment URL**: Extract for testing environment
2567
1453
 
2568
- ### 2.5 Unified Context Structure
1454
+ ### Unified Context Structure
2569
1455
 
2570
1456
  After extraction, create unified context structure:
2571
1457
  \`\`\`
@@ -2578,21 +1464,21 @@ CHANGE_CONTEXT = {
2578
1464
  environment: "staging" | "production" | URL,
2579
1465
  prNumber: 123 (if available),
2580
1466
  issueId: "PROJ-456" (if available),
2581
-
2582
- // For output routing:
2583
1467
  slackChannel: "C123456" (if Slack trigger),
2584
1468
  slackThread: "1234567890.123456" (if Slack trigger),
2585
1469
  githubRepo: "owner/repo" (if GitHub trigger)
2586
1470
  }
2587
- \`\`\`
2588
-
2589
- ## Step 3: Determine Test Scope (Smart Selection)
2590
-
2591
- **IMPORTANT**: You do NOT have access to code files. Infer test scope from change **descriptions** only.
1471
+ \`\`\``
1472
+ },
1473
+ // Step 7: Determine Test Scope (inline)
1474
+ {
1475
+ inline: true,
1476
+ title: "Determine Test Scope (Smart Selection)",
1477
+ content: `**IMPORTANT**: You do NOT have access to code files. Infer test scope from change **descriptions** only.
2592
1478
 
2593
1479
  Based on PR title, description, and commit messages, intelligently select which tests to run:
2594
1480
 
2595
- ### 3.1 Infer Test Scope from Change Descriptions
1481
+ ### Infer Test Scope from Change Descriptions
2596
1482
 
2597
1483
  Analyze the change description to identify affected feature areas:
2598
1484
 
@@ -2600,32 +1486,19 @@ Analyze the change description to identify affected feature areas:
2600
1486
 
2601
1487
  | Description Keywords | Inferred Test Scope | Example |
2602
1488
  |---------------------|-------------------|---------|
2603
- | "login", "authentication", "sign in/up" | \`tests/specs/auth/\` | "Fix login page validation" \u2192 Auth tests |
2604
- | "checkout", "payment", "purchase" | \`tests/specs/checkout/\` | "Optimize checkout flow" \u2192 Checkout tests |
2605
- | "cart", "shopping cart", "add to cart" | \`tests/specs/cart/\` | "Update cart calculations" \u2192 Cart tests |
2606
- | "API", "endpoint", "backend" | API test suites | "Add new user API endpoint" \u2192 User API tests |
2607
- | "profile", "account", "settings" | \`tests/specs/profile/\` or \`tests/specs/settings/\` | "Profile page redesign" \u2192 Profile tests |
1489
+ | "login", "authentication", "sign in/up" | \`tests/specs/auth/\` | "Fix login page validation" -> Auth tests |
1490
+ | "checkout", "payment", "purchase" | \`tests/specs/checkout/\` | "Optimize checkout flow" -> Checkout tests |
1491
+ | "cart", "shopping cart", "add to cart" | \`tests/specs/cart/\` | "Update cart calculations" -> Cart tests |
1492
+ | "API", "endpoint", "backend" | API test suites | "Add new user API endpoint" -> User API tests |
1493
+ | "profile", "account", "settings" | \`tests/specs/profile/\` or \`tests/specs/settings/\` | "Profile page redesign" -> Profile tests |
2608
1494
 
2609
1495
  **Inference strategy:**
2610
1496
  1. **Extract feature keywords** from PR title and description
2611
- - PR title: "feat(checkout): Add PayPal payment option"
2612
- - Keywords: ["checkout", "payment"]
2613
- - Inferred scope: Checkout tests
2614
-
2615
1497
  2. **Analyze commit messages** for conventional commit scopes
2616
- - \`feat(auth): Add password reset flow\` \u2192 Auth tests
2617
- - \`fix(cart): Resolve quantity update bug\` \u2192 Cart tests
2618
-
2619
1498
  3. **Map keywords to test organization**
2620
- - Reference: Tests are organized by feature under \`tests/specs/\` (see \`.bugzy/runtime/testing-best-practices.md\`)
2621
- - Feature areas typically include: auth/, checkout/, cart/, profile/, api/, etc.
2622
-
2623
1499
  4. **Identify test scope breadth from description tone**
2624
- - "Fix typo in button label" \u2192 Narrow scope (smoke tests)
2625
- - "Refactor shared utility functions" \u2192 Wide scope (full suite)
2626
- - "Update single component styling" \u2192 Narrow scope (component tests)
2627
1500
 
2628
- ### 3.2 Fallback Strategies Based on Description Analysis
1501
+ ### Fallback Strategies Based on Description Analysis
2629
1502
 
2630
1503
  **Description patterns that indicate full suite:**
2631
1504
  - "Refactor shared/common utilities" (wide impact)
@@ -2640,30 +1513,13 @@ Analyze the change description to identify affected feature areas:
2640
1513
  - "Fix formatting" or "Linting fixes" (no logic change)
2641
1514
 
2642
1515
  **When description is vague or ambiguous:**
2643
- - Examples: "Updated several components", "Various bug fixes", "Improvements"
2644
1516
  - **ACTION REQUIRED**: Use AskUserQuestion tool to clarify test scope
2645
- - Provide options based on available test suites:
2646
- \`\`\`typescript
2647
- AskUserQuestion({
2648
- questions: [{
2649
- question: "The change description is broad. Which test suites should run?",
2650
- header: "Test Scope",
2651
- multiSelect: true,
2652
- options: [
2653
- { label: "Auth tests", description: "Login, signup, password reset" },
2654
- { label: "Checkout tests", description: "Purchase flow, payment processing" },
2655
- { label: "Full test suite", description: "Run all tests for comprehensive validation" },
2656
- { label: "Smoke tests only", description: "Quick validation of critical paths" }
2657
- ]
2658
- }]
2659
- })
2660
- \`\`\`
2661
1517
 
2662
1518
  **If specific test scope requested:**
2663
1519
  - User can override with: "only smoke tests", "full suite", specific test suite names
2664
1520
  - Honor user's explicit scope over smart selection
2665
1521
 
2666
- ### 3.3 Test Selection Summary
1522
+ ### Test Selection Summary
2667
1523
 
2668
1524
  Generate summary of test selection based on description analysis:
2669
1525
  \`\`\`markdown
@@ -2673,180 +1529,51 @@ Generate summary of test selection based on description analysis:
2673
1529
  - **Affected test suites**: [list inferred test suite paths or names]
2674
1530
  - **Scope reasoning**: [explain why this scope was selected]
2675
1531
  - **Execution strategy**: [smart selection | full suite | smoke tests | user-specified]
2676
- \`\`\`
2677
-
2678
- **Example summary:**
2679
- \`\`\`markdown
2680
- ### Test Scope Determined
2681
- - **Change description**: "feat(checkout): Add PayPal payment option"
2682
- - **Identified keywords**: checkout, payment, PayPal
2683
- - **Affected test suites**: tests/specs/checkout/payment.spec.ts, tests/specs/checkout/purchase-flow.spec.ts
2684
- - **Scope reasoning**: Change affects checkout payment processing; running all checkout tests to validate payment integration
2685
- - **Execution strategy**: Smart selection (checkout suite)
2686
- \`\`\`
2687
-
2688
- ## Step 4: Run Verification Workflow
2689
-
2690
- Execute comprehensive verification combining automated tests and manual checklists:
2691
-
2692
- ### 4A: Automated Testing (Integrated from /run-tests)
2693
-
2694
- Execute automated Playwright tests with full triage and fixing:
2695
-
2696
- #### 4A.1 Execute Tests
2697
-
2698
- Run the selected tests via Playwright:
2699
- \`\`\`bash
2700
- npx playwright test [scope] --reporter=json --output=test-results/
2701
- \`\`\`
2702
-
2703
- Wait for execution to complete. Capture JSON report from \`test-results/.last-run.json\`.
2704
-
2705
- #### 4A.2 Parse Test Results
2706
-
2707
- Read and analyze the JSON report:
2708
- - Extract: Total, passed, failed, skipped counts
2709
- - For each failed test: file path, test name, error message, stack trace, trace file
2710
- - Calculate: Pass rate, total duration
2711
-
2712
- #### 4A.3 Triage Failures (Classification)
2713
-
2714
- #### Automatic Test Issue Fixing
2715
-
2716
- For each test classified as **[TEST ISSUE]**, use the test-debugger-fixer agent to automatically fix the test:
2717
-
2718
- \`\`\`
2719
- {{INVOKE_TEST_DEBUGGER_FIXER}}
2720
-
2721
- For each failed test classified as a test issue (not a product bug), provide:
2722
- - Test file path: [from JSON report]
2723
- - Test name/title: [from JSON report]
2724
- - Error message: [from JSON report]
2725
- - Stack trace: [from JSON report]
2726
- - Trace file path: [if available]
2727
-
2728
- The agent will:
2729
- 1. Read the failing test file
2730
- 2. Analyze the failure details
2731
- 3. Open browser via Playwright MCP to debug if needed
2732
- 4. Identify the root cause (brittle selector, missing wait, race condition, etc.)
2733
- 5. Apply appropriate fix to the test code
2734
- 6. Rerun the test to verify the fix
2735
- 7. Repeat up to 3 times if needed
2736
- 8. Report success or escalate as likely product bug
2737
-
2738
- After test-debugger-fixer completes:
2739
- - If fix succeeded: Mark test as fixed, add to "Tests Fixed" list
2740
- - If still failing after 3 attempts: Reclassify as potential product bug
2741
- \`\`\`
2742
-
2743
- **Track Fixed Tests:**
2744
- - Maintain list of tests fixed automatically
2745
- - Include fix description (e.g., "Updated selector from CSS to role-based")
2746
- - Note verification status (test now passes)
2747
- - Reference .bugzy/runtime/testing-best-practices.md for best practices
2748
-
2749
- For each failed test, classify as:
2750
- - **[PRODUCT BUG]**: Correct test code, but application behaves incorrectly
2751
- - **[TEST ISSUE]**: Test code needs fixing (selector, timing, assertion)
2752
-
2753
- Classification guidelines:
2754
- - Product Bug: Expected behavior not met, functional issue
2755
- - Test Issue: Selector not found, timeout, race condition, brittle locator
2756
-
2757
- #### 4A.4 Fix Test Issues Automatically
2758
-
2759
- For tests classified as [TEST ISSUE]:
2760
- - {{INVOKE_TEST_DEBUGGER_FIXER}} to analyze and fix
2761
- - Agent debugs with browser if needed
2762
- - Applies fix (selector update, wait condition, assertion correction)
2763
- - Reruns test to verify fix (10x for flaky tests)
2764
- - Max 3 fix attempts, then reclassify as product bug
2765
-
2766
- Track fixed tests with:
2767
- - Test file path
2768
- - Fix description
2769
- - Verification status (now passes)
2770
-
2771
- #### 4A.5 Log Product Bugs
2772
-
2773
- {{ISSUE_TRACKER_INSTRUCTIONS}}
2774
-
2775
- For tests classified as [PRODUCT BUG]:
2776
- - {{INVOKE_ISSUE_TRACKER}} to create bug reports
2777
- - Agent checks for duplicates automatically
2778
- - Creates detailed report with:
2779
- - Title, description, reproduction steps
2780
- - Test reference, error details, stack trace
2781
- - Screenshots, traces, environment details
2782
- - Severity based on test type and impact
2783
- - Returns issue ID for tracking
2784
-
2785
- ### 4B: Manual Verification Checklist (NEW)
2786
-
2787
- Generate human-readable checklist for non-automatable scenarios:
2788
-
2789
- #### Generate Manual Verification Checklist
2790
-
2791
- Analyze the code changes and generate a manual verification checklist for scenarios that cannot be automated.
1532
+ \`\`\``
1533
+ },
1534
+ // Step 8-11: Test Execution (library steps)
1535
+ "run-playwright-tests",
1536
+ "parse-test-results",
1537
+ "triage-failures",
1538
+ "fix-test-issues",
1539
+ // Step 12: Log Product Bugs (conditional library step)
1540
+ {
1541
+ stepId: "log-product-bugs",
1542
+ conditionalOnSubagent: "issue-tracker"
1543
+ },
1544
+ // Step 13: Generate Manual Verification Checklist (inline)
1545
+ {
1546
+ inline: true,
1547
+ title: "Generate Manual Verification Checklist",
1548
+ content: `Generate human-readable checklist for non-automatable scenarios:
2792
1549
 
2793
- #### Analyze Change Context
1550
+ ### Analyze Change Context
2794
1551
 
2795
1552
  Review the provided context to understand what changed:
2796
1553
  - Read PR title, description, and commit messages
2797
1554
  - Identify change types from descriptions: visual, UX, forms, mobile, accessibility, edge cases
2798
1555
  - Understand the scope and impact of changes from the change descriptions
2799
1556
 
2800
- #### Identify Non-Automatable Scenarios
1557
+ ### Identify Non-Automatable Scenarios
2801
1558
 
2802
1559
  Based on the change analysis, identify scenarios that require human verification:
2803
1560
 
2804
1561
  **1. Visual Design Changes** (CSS, styling, design files, graphics)
2805
- - Color schemes, gradients, shadows
2806
- - Typography, font sizes, line heights
2807
- - Spacing, margins, padding, alignment
2808
- - Visual consistency across components
2809
- - Brand guideline compliance
2810
- \u2192 Add **Design Validation** checklist items
1562
+ -> Add **Design Validation** checklist items
2811
1563
 
2812
1564
  **2. UX Interaction Changes** (animations, transitions, gestures, micro-interactions)
2813
- - Animation smoothness (60fps expectation)
2814
- - Transition timing and easing
2815
- - Interaction responsiveness and feel
2816
- - Loading states and skeleton screens
2817
- - Hover effects, focus states
2818
- \u2192 Add **UX Feel** checklist items
1565
+ -> Add **UX Feel** checklist items
2819
1566
 
2820
1567
  **3. Form and Input Changes** (new form fields, input validation, user input)
2821
- - Screen reader compatibility
2822
- - Keyboard navigation (Tab order, Enter to submit)
2823
- - Error message clarity and placement
2824
- - Color contrast (WCAG 2.1 AA: 4.5:1 ratio for text)
2825
- - Focus indicators visibility
2826
- \u2192 Add **Accessibility** checklist items
1568
+ -> Add **Accessibility** checklist items
2827
1569
 
2828
1570
  **4. Mobile and Responsive Changes** (media queries, touch interactions, viewport)
2829
- - Touch target sizes (\u226544px iOS, \u226548dp Android)
2830
- - Responsive layout breakpoints
2831
- - Mobile keyboard behavior (doesn't obscure inputs)
2832
- - Swipe gestures and touch interactions
2833
- - Pinch-to-zoom functionality
2834
- \u2192 Add **Mobile Experience** checklist items
1571
+ -> Add **Mobile Experience** checklist items
2835
1572
 
2836
1573
  **5. Low ROI or Rare Scenarios** (edge cases, one-time migrations, rare user paths)
2837
- - Scenarios used by < 1% of users
2838
- - Complex multi-system integrations
2839
- - One-time data migrations
2840
- - Leap year, DST, timezone edge cases
2841
- \u2192 Add **Exploratory Testing** notes
2842
-
2843
- **6. Cross-Browser Visual Consistency** (layout rendering differences)
2844
- - Layout consistency across Chrome, Firefox, Safari
2845
- - CSS feature support differences
2846
- - Font rendering variations
2847
- \u2192 Add **Cross-Browser** checklist items (if significant visual changes)
1574
+ -> Add **Exploratory Testing** notes
2848
1575
 
2849
- #### Generate Role-Specific Checklist Items
1576
+ ### Generate Role-Specific Checklist Items
2850
1577
 
2851
1578
  For each identified scenario, create clear, actionable checklist items:
2852
1579
 
@@ -2855,113 +1582,24 @@ For each identified scenario, create clear, actionable checklist items:
2855
1582
  - Assigned role (@design-team, @qa-team, @a11y-team, @mobile-team)
2856
1583
  - Acceptance criteria (what constitutes pass/fail)
2857
1584
  - Reference to standards when applicable (WCAG, iOS HIG, Material Design)
2858
- - Priority indicator (\u{1F534} critical, \u{1F7E1} important, \u{1F7E2} nice-to-have)
1585
+ - Priority indicator (red circle critical, yellow circle important, green circle nice-to-have)
2859
1586
 
2860
1587
  **Example checklist items:**
2861
1588
 
2862
1589
  **Design Validation (@design-team)**
2863
- - [ ] \u{1F534} Login button color matches brand guidelines (#FF6B35)
2864
- - [ ] \u{1F7E1} Loading spinner animation smooth (60fps, no jank)
2865
- - [ ] \u{1F7E1} Card shadows match design system (elevation-2: 0 2px 4px rgba(0,0,0,0.1))
2866
- - [ ] \u{1F7E2} Hover states provide appropriate visual feedback
1590
+ - [ ] Login button color matches brand guidelines (#FF6B35)
1591
+ - [ ] Loading spinner animation smooth (60fps, no jank)
2867
1592
 
2868
1593
  **Accessibility (@a11y-team)**
2869
- - [ ] \u{1F534} Screen reader announces form errors clearly (tested with VoiceOver/NVDA)
2870
- - [ ] \u{1F534} Keyboard navigation: Tab through all interactive elements in logical order
2871
- - [ ] \u{1F534} Color contrast meets WCAG 2.1 AA (4.5:1 for body text, 3:1 for large text)
2872
- - [ ] \u{1F7E1} Focus indicators visible on all interactive elements
1594
+ - [ ] Screen reader announces form errors clearly (tested with VoiceOver/NVDA)
1595
+ - [ ] Keyboard navigation: Tab through all interactive elements in logical order
1596
+ - [ ] Color contrast meets WCAG 2.1 AA (4.5:1 for body text, 3:1 for large text)
2873
1597
 
2874
1598
  **Mobile Experience (@qa-team, @mobile-team)**
2875
- - [ ] \u{1F534} Touch targets \u226544px (iOS Human Interface Guidelines)
2876
- - [ ] \u{1F534} Mobile keyboard doesn't obscure input fields on iOS/Android
2877
- - [ ] \u{1F7E1} Swipe gestures work naturally without conflicts
2878
- - [ ] \u{1F7E1} Responsive layout adapts properly on iPhone SE (smallest screen)
2879
-
2880
- **UX Feel (@design-team, @qa-team)**
2881
- - [ ] \u{1F7E1} Page transitions smooth and not jarring
2882
- - [ ] \u{1F7E1} Button click feedback immediate (< 100ms perceived response)
2883
- - [ ] \u{1F7E2} Loading states prevent confusion during data fetch
2884
-
2885
- **Exploratory Testing (@qa-team)**
2886
- - [ ] \u{1F7E2} Test edge case: User submits form during network timeout
2887
- - [ ] \u{1F7E2} Test edge case: User navigates back during submission
2888
-
2889
- #### Format for Output Channel
2890
-
2891
- Adapt the checklist format based on the output channel (determined by trigger source):
1599
+ - [ ] Touch targets greater than or equal to 44px (iOS Human Interface Guidelines)
1600
+ - [ ] Mobile keyboard doesn't obscure input fields on iOS/Android
2892
1601
 
2893
- **Terminal (Manual Trigger):**
2894
- \`\`\`markdown
2895
- MANUAL VERIFICATION CHECKLIST:
2896
- Please verify the following before merging:
2897
-
2898
- Design Validation (@design-team):
2899
- [ ] \u{1F534} Checkout button colors match brand guidelines (#FF6B35)
2900
- [ ] \u{1F7E1} Loading spinner animation smooth (60fps)
2901
-
2902
- Accessibility (@a11y-team):
2903
- [ ] \u{1F534} Screen reader announces error messages
2904
- [ ] \u{1F534} Keyboard navigation works (Tab order logical)
2905
- [ ] \u{1F534} Color contrast meets WCAG 2.1 AA (4.5:1 ratio)
2906
-
2907
- Mobile Experience (@qa-team):
2908
- [ ] \u{1F534} Touch targets \u226544px (iOS HIG)
2909
- [ ] \u{1F7E1} Responsive layout works on iPhone SE
2910
- \`\`\`
2911
-
2912
- **Slack (Slack Trigger):**
2913
- \`\`\`markdown
2914
- *Manual Verification Needed:*
2915
- \u25A1 Visual: Button colors, animations (60fps)
2916
- \u25A1 Mobile: Touch targets \u226544px
2917
- \u25A1 A11y: Screen reader, keyboard nav, contrast
2918
-
2919
- cc @design-team @qa-team @a11y-team
2920
- \`\`\`
2921
-
2922
- **GitHub PR Comment (GitHub Trigger):**
2923
- \`\`\`markdown
2924
- ### Manual Verification Required
2925
-
2926
- The following scenarios require human verification before release:
2927
-
2928
- #### Design Validation (@design-team)
2929
- - [ ] \u{1F534} Checkout button colors match brand guidelines (#FF6B35)
2930
- - [ ] \u{1F7E1} Loading spinner animation smooth (60fps)
2931
- - [ ] \u{1F7E1} Card shadows match design system
2932
-
2933
- #### Accessibility (@a11y-team)
2934
- - [ ] \u{1F534} Screen reader announces error messages (VoiceOver/NVDA)
2935
- - [ ] \u{1F534} Keyboard navigation through all form fields (Tab order)
2936
- - [ ] \u{1F534} Color contrast meets WCAG 2.1 AA (4.5:1 for body text)
2937
-
2938
- #### Mobile Experience (@qa-team)
2939
- - [ ] \u{1F534} Touch targets \u226544px (iOS Human Interface Guidelines)
2940
- - [ ] \u{1F534} Mobile keyboard doesn't obscure input fields
2941
- - [ ] \u{1F7E1} Responsive layout works on iPhone SE (375x667)
2942
-
2943
- ---
2944
- *Legend: \u{1F534} Critical \u2022 \u{1F7E1} Important \u2022 \u{1F7E2} Nice-to-have*
2945
- \`\`\`
2946
-
2947
- #### Guidelines for Quality Checklists
2948
-
2949
- **DO:**
2950
- - Make each item verifiable (clear pass/fail criteria)
2951
- - Include context (why this needs manual verification)
2952
- - Reference standards (WCAG, iOS HIG, Material Design)
2953
- - Assign to specific roles
2954
- - Prioritize items (critical, important, nice-to-have)
2955
- - Be specific (not "check colors" but "Login button color matches #FF6B35")
2956
-
2957
- **DON'T:**
2958
- - Create vague items ("test thoroughly")
2959
- - List items that can be automated
2960
- - Skip role assignments
2961
- - Forget acceptance criteria
2962
- - Omit priority indicators
2963
-
2964
- #### When NO Manual Verification Needed
1602
+ ### When NO Manual Verification Needed
2965
1603
 
2966
1604
  If the changes are purely:
2967
1605
  - Backend logic (no UI changes)
@@ -2973,18 +1611,13 @@ Output:
2973
1611
  \`\`\`markdown
2974
1612
  **Manual Verification:** Not required for this change.
2975
1613
  All user-facing changes are fully covered by automated tests.
2976
- \`\`\`
2977
-
2978
- #### Summary
2979
-
2980
- After generating the checklist:
2981
- - Count total items by priority (\u{1F534} critical, \u{1F7E1} important, \u{1F7E2} nice-to-have)
2982
- - Estimate time needed (e.g., "~30 minutes for design QA, ~45 minutes for accessibility testing")
2983
- - Suggest who should perform each category of checks
2984
-
2985
- ### 4C: Aggregate Results
2986
-
2987
- Combine automated and manual verification results:
1614
+ \`\`\``
1615
+ },
1616
+ // Step 14: Aggregate Results (inline)
1617
+ {
1618
+ inline: true,
1619
+ title: "Aggregate Verification Results",
1620
+ content: `Combine automated and manual verification results:
2988
1621
 
2989
1622
  \`\`\`markdown
2990
1623
  ## Verification Results Summary
@@ -2998,231 +1631,99 @@ Combine automated and manual verification results:
2998
1631
  - Duration: [time]
2999
1632
 
3000
1633
  ### Manual Verification Required
3001
- [Checklist generated in 4B, or "Not required"]
1634
+ [Checklist generated in previous step, or "Not required"]
3002
1635
 
3003
1636
  ### Overall Recommendation
3004
- [\u2705 Safe to merge | \u26A0\uFE0F Review bugs before merging | \u274C Do not merge]
3005
- \`\`\`
3006
-
3007
- ## Step 5: Understanding the Change (Documentation Research)
3008
-
3009
- {{DOCUMENTATION_RESEARCHER_INSTRUCTIONS}}
3010
-
3011
- Before proceeding with test creation or execution, ensure requirements are clear through ambiguity detection and adaptive exploration.
3012
-
3013
- **Note**: For detailed exploration and clarification protocols, refer to the complete instructions below. Adapt the depth of exploration based on requirement clarity and use the clarification protocol to detect ambiguity, assess severity, and seek clarification when needed.
3014
-
3015
- After clarification and exploration, analyze the change to determine the verification approach:
3016
-
3017
- ### 5.1 Identify Test Scope
3018
- Based on the change description, exploration findings, and clarified requirements:
3019
- - **Direct impact**: Which features/functionality are directly modified
3020
- - **Indirect impact**: What else might be affected (dependencies, integrations)
3021
- - **Regression risk**: Existing functionality that should be retested
3022
- - **New functionality**: Features that need new test coverage
3023
-
3024
- ### 5.2 Determine Verification Strategy
3025
- Plan your testing approach based on validated requirements:
3026
- - **Priority areas**: Critical paths that must work
3027
- - **Test types needed**: Functional, regression, integration, UI/UX
3028
- - **Test data requirements**: What test accounts, data, or scenarios needed
3029
- - **Success criteria**: What determines the change is working correctly (now clearly defined)
3030
-
3031
- ## Step 6: Report Results (Multi-Channel Output)
3032
-
3033
- Route output based on trigger source (from Step 1):
3034
-
3035
- ### 6.1 MANUAL Trigger \u2192 Terminal Output
3036
-
3037
- Format as comprehensive markdown report for terminal display:
3038
-
3039
- \`\`\`markdown
3040
- # Test Verification Report
3041
-
3042
- ## Change Summary
3043
- - **What Changed**: [Brief description]
3044
- - **Scope**: [Affected features/areas]
3045
- - **Changed Files**: [count] files
3046
-
3047
- ## Automated Test Results
3048
- ### Statistics
3049
- - Total Tests: [count]
3050
- - Passed: [count] ([percentage]%)
3051
- - Failed: [count]
3052
- - Test Issues Fixed: [count]
3053
- - Product Bugs Logged: [count]
3054
- - Duration: [time]
3055
-
3056
- ### Tests Fixed Automatically
3057
- [For each fixed test:
3058
- - **Test**: [file path] \u203A [test name]
3059
- - **Issue**: [problem found]
3060
- - **Fix**: [what was changed]
3061
- - **Status**: \u2705 Now passing
3062
- ]
3063
-
3064
- ### Product Bugs Logged
3065
- [For each bug:
3066
- - **Issue**: [ISSUE-123] [Bug title]
3067
- - **Test**: [test file] \u203A [test name]
3068
- - **Severity**: [priority]
3069
- - **Link**: [issue tracker URL]
3070
- ]
3071
-
3072
- ## Manual Verification Checklist
3073
-
3074
- [Insert checklist from Step 4B]
3075
-
3076
- ## Recommendation
3077
- [\u2705 Safe to merge - all automated tests pass, complete manual checks before release]
3078
- [\u26A0\uFE0F Review bugs before merging - [X] bugs need attention]
3079
- [\u274C Do not merge - critical failures]
3080
-
3081
- ## Test Artifacts
3082
- - JSON Report: test-results/.last-run.json
3083
- - HTML Report: playwright-report/index.html
3084
- - Traces: test-results/[test-id]/trace.zip
3085
- - Screenshots: test-results/[test-id]/screenshots/
3086
- \`\`\`
3087
-
3088
- ### 6.2 SLACK_MESSAGE Trigger \u2192 Thread Reply
3089
-
3090
- {{TEAM_COMMUNICATOR_INSTRUCTIONS}}
3091
-
3092
- {{INVOKE_TEAM_COMMUNICATOR}} to post concise results to Slack thread:
3093
-
3094
- \`\`\`
3095
- Post verification results.
3096
-
3097
- **Channel**: [from CHANGE_CONTEXT.slackChannel]
3098
- **Thread**: [from CHANGE_CONTEXT.slackThread]
3099
-
3100
- **Message**:
3101
- \u{1F9EA} *Verification Results for [change title]*
3102
-
3103
- *Automated:* \u2705 [passed]/[total] tests passed ([duration])
3104
- [If test issues fixed:] \u{1F527} [count] test issues auto-fixed
3105
- [If bugs logged:] \u{1F41B} [count] bugs logged ([list issue IDs])
3106
-
3107
- *Manual Verification Needed:*
3108
- [Concise checklist summary - collapsed/expandable]
3109
- \u25A1 Visual: [key items]
3110
- \u25A1 Mobile: [key items]
3111
- \u25A1 A11y: [key items]
3112
-
3113
- *Recommendation:* [\u2705 Safe to merge | \u26A0\uFE0F Review bugs | \u274C Blocked]
3114
-
3115
- [If bugs logged:] cc @[relevant-team-members]
3116
- [Link to full test report if available]
3117
- \`\`\`
3118
-
3119
- ### 6.3 GITHUB_PR Trigger \u2192 PR Comment
3120
-
3121
- Use GitHub API to post comprehensive comment on PR:
3122
-
3123
- **Format as GitHub-flavored markdown:**
3124
- \`\`\`markdown
3125
- ## \u{1F9EA} Test Verification Results
3126
-
3127
- **Status:** [\u2705 All tests passed | \u26A0\uFE0F Issues found | \u274C Critical failures]
3128
-
3129
- ### Automated Tests
3130
- | Metric | Value |
3131
- |--------|-------|
3132
- | Total Tests | [count] |
3133
- | Passed | \u2705 [count] ([percentage]%) |
3134
- | Failed | \u274C [count] |
3135
- | Test Issues Fixed | \u{1F527} [count] |
3136
- | Product Bugs Logged | \u{1F41B} [count] |
3137
- | Duration | \u23F1\uFE0F [time] |
3138
-
3139
- ### Failed Tests (Triaged)
3140
-
3141
- [For each failure:]
3142
-
3143
- #### \u274C **[Test Name]**
3144
- - **File:** \`[test-file-path]\`
3145
- - **Cause:** [Product bug | Test issue]
3146
- - **Action:** [Bug logged: [ISSUE-123](url) | Fixed: [commit-hash](url)]
3147
- - **Details:**
3148
- \`\`\`
3149
- [Error message]
3150
- \`\`\`
3151
-
3152
- ### Tests Fixed Automatically
3153
-
3154
- [For each fixed test:]
3155
- - \u2705 **[Test Name]** (\`[file-path]\`)
3156
- - **Issue:** [brittle selector | missing wait | race condition]
3157
- - **Fix:** [description of fix applied]
3158
- - **Verified:** Passes 10/10 runs
3159
-
3160
- ### Product Bugs Logged
3161
-
3162
- [For each bug:]
3163
- - \u{1F41B} **[[ISSUE-123](url)]** [Bug title]
3164
- - **Test:** \`[test-file]\` \u203A [test name]
3165
- - **Severity:** [\u{1F534} Critical | \u{1F7E1} Important | \u{1F7E2} Minor]
3166
- - **Assignee:** @[backend-team | frontend-team]
3167
-
3168
- ### Manual Verification Required
3169
-
3170
- The following scenarios require human verification before release:
3171
-
3172
- #### Design Validation (@design-team)
3173
- - [ ] \u{1F534} [Critical design check]
3174
- - [ ] \u{1F7E1} [Important design check]
3175
-
3176
- #### Accessibility (@a11y-team)
3177
- - [ ] \u{1F534} [Critical a11y check]
3178
- - [ ] \u{1F7E1} [Important a11y check]
3179
-
3180
- #### Mobile Experience (@qa-team)
3181
- - [ ] \u{1F534} [Critical mobile check]
3182
- - [ ] \u{1F7E1} [Important mobile check]
3183
-
3184
- ---
3185
- *Legend: \u{1F534} Critical \u2022 \u{1F7E1} Important \u2022 \u{1F7E2} Nice-to-have*
3186
-
3187
- ### Test Artifacts
3188
- - [Full HTML Report](playwright-report/index.html)
3189
- - [Test Traces](test-results/)
3190
-
3191
- ### Recommendation
3192
- [\u2705 **Safe to merge** - All automated tests pass, complete manual checks before release]
3193
- [\u26A0\uFE0F **Review required** - [X] bugs need attention, complete manual checks]
3194
- [\u274C **Do not merge** - Critical failures must be resolved first]
3195
-
3196
- ---
3197
- *\u{1F916} Automated by Bugzy \u2022 [View Test Code](tests/specs/) \u2022 [Manual Test Cases](test-cases/)*
3198
- \`\`\`
1637
+ [Safe to merge | Review bugs before merging | Do not merge]
1638
+ \`\`\``
1639
+ },
1640
+ // Step 15: Documentation Research (conditional inline)
1641
+ {
1642
+ inline: true,
1643
+ title: "Understanding the Change (Documentation Research)",
1644
+ content: `{{INVOKE_DOCUMENTATION_RESEARCHER}} to gather comprehensive context about the changed features:
3199
1645
 
3200
- **Post comment via GitHub API:**
3201
- - Endpoint: \`POST /repos/{owner}/{repo}/issues/{pr_number}/comments\`
3202
- - Use GitHub MCP or bash with \`gh\` CLI
3203
- - Requires GITHUB_TOKEN from environment
1646
+ Explore project documentation related to the changes.
3204
1647
 
3205
- ### 6.4 CI_CD Trigger \u2192 Build Log + PR Comment
1648
+ Specifically gather:
1649
+ - Product specifications for affected features
1650
+ - User stories and acceptance criteria
1651
+ - Technical architecture documentation
1652
+ - API endpoints and contracts
1653
+ - User roles and permissions relevant to the change
1654
+ - Business rules and validations
1655
+ - UI/UX specifications
1656
+ - Known limitations or constraints
1657
+ - Related bug reports or known issues
1658
+ - Existing test documentation for this area
3206
1659
 
3207
- **Output to CI build log:**
3208
- - Print detailed results to stdout (captured by CI)
3209
- - Use ANSI colors if supported by CI platform
3210
- - Same format as MANUAL terminal output
1660
+ The agent will:
1661
+ 1. Check its memory for previously discovered documentation
1662
+ 2. Explore workspace for relevant pages and databases
1663
+ 3. Build comprehensive understanding of the affected features
1664
+ 4. Return synthesized information to inform testing strategy
3211
1665
 
3212
- **Exit with appropriate code:**
1666
+ Use this information to:
1667
+ - Better understand the change context
1668
+ - Identify comprehensive test scenarios
1669
+ - Recognize integration points and dependencies
1670
+ - Spot potential edge cases or risk areas
1671
+ - Enhance manual verification checklist generation`,
1672
+ conditionalOnSubagent: "documentation-researcher"
1673
+ },
1674
+ // Step 16: Report Results (inline)
1675
+ {
1676
+ inline: true,
1677
+ title: "Report Results (Multi-Channel Output)",
1678
+ content: `Route output based on trigger source:
1679
+
1680
+ ### MANUAL Trigger -> Terminal Output
1681
+
1682
+ Format as comprehensive markdown report for terminal display with:
1683
+ - Change Summary (what changed, scope, affected files)
1684
+ - Automated Test Results (statistics, tests fixed, bugs logged)
1685
+ - Manual Verification Checklist
1686
+ - Recommendation (safe to merge / review / do not merge)
1687
+ - Test Artifacts (JSON report, HTML report, traces, screenshots)
1688
+
1689
+ ### SLACK_MESSAGE Trigger -> Thread Reply
1690
+
1691
+ {{INVOKE_TEAM_COMMUNICATOR}} to post concise results to Slack thread with:
1692
+ - Verification results summary
1693
+ - Critical failures that need immediate attention
1694
+ - Bugs logged with issue tracker links
1695
+ - Manual verification checklist summary
1696
+ - Recommendation and next steps
1697
+ - Tag relevant team members for critical issues
1698
+
1699
+ ### GITHUB_PR Trigger -> PR Comment
1700
+
1701
+ Use GitHub API to post comprehensive comment on PR with:
1702
+ - Status (All tests passed / Issues found / Critical failures)
1703
+ - Automated Tests table (Total, Passed, Failed, Fixed, Bugs, Duration)
1704
+ - Failed Tests (triaged and with actions taken)
1705
+ - Tests Fixed Automatically (issue, fix, verified)
1706
+ - Product Bugs Logged (issue ID, title, test, severity)
1707
+ - Manual Verification Required (checklist)
1708
+ - Test Artifacts links
1709
+ - Recommendation
1710
+
1711
+ ### CI_CD Trigger -> Build Log + PR Comment
1712
+
1713
+ Output to CI build log (print detailed results to stdout) and exit with appropriate code:
3213
1714
  - Exit 0: All tests passed (safe to merge)
3214
1715
  - Exit 1: Tests failed or critical bugs found (block merge)
3215
1716
 
3216
- **Post PR comment if GitHub context available:**
3217
- - Check for PR number in CI environment
3218
- - If available: Post comment using 6.3 format
3219
- - Also notify team via Slack if critical failures
3220
-
3221
- ## Additional Steps
3222
-
3223
- ### Handle Special Cases
3224
-
3225
- **If no tests found for changed files:**
1717
+ Post PR comment if GitHub context available.`,
1718
+ conditionalOnSubagent: "team-communicator"
1719
+ },
1720
+ // Step 17: Knowledge Base Update (library)
1721
+ "update-knowledge-base",
1722
+ // Step 18: Handle Special Cases (inline)
1723
+ {
1724
+ inline: true,
1725
+ title: "Handle Special Cases",
1726
+ content: `**If no tests found for changed files:**
3226
1727
  - Inform user: "No automated tests found for changed files"
3227
1728
  - Recommend: "Run smoke test suite for basic validation"
3228
1729
  - Still generate manual verification checklist
@@ -3236,8 +1737,6 @@ The following scenarios require human verification before release:
3236
1737
  - Suggest troubleshooting steps
3237
1738
  - Don't proceed with triage if tests didn't run
3238
1739
 
3239
- ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
3240
-
3241
1740
  ## Important Notes
3242
1741
 
3243
1742
  - This task handles **all trigger sources** with a single unified workflow
@@ -3248,162 +1747,172 @@ ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
3248
1747
  - Product bugs are logged with **automatic duplicate detection**
3249
1748
  - Test issues are fixed automatically with **verification**
3250
1749
  - Results include both automated and manual verification items
3251
- - For best results, ensure:
3252
- - Playwright is installed (\`npx playwright install\`)
3253
- - Environment variables configured (copy \`.env.testdata\` to \`.env\`)
3254
- - GitHub token available for PR comments (if GitHub trigger)
3255
- - Slack integration configured (if Slack trigger)
3256
- - Issue tracker configured (Linear, Jira, etc.)
3257
1750
 
3258
1751
  ## Success Criteria
3259
1752
 
3260
1753
  A successful verification includes:
3261
- 1. \u2705 Trigger source correctly detected
3262
- 2. \u2705 Context extracted completely
3263
- 3. \u2705 Tests executed (or skipped with explanation)
3264
- 4. \u2705 All failures triaged (product bug vs test issue)
3265
- 5. \u2705 Test issues fixed automatically (when possible)
3266
- 6. \u2705 Product bugs logged to issue tracker
3267
- 7. \u2705 Manual verification checklist generated
3268
- 8. \u2705 Results formatted for output channel
3269
- 9. \u2705 Results delivered to appropriate destination
3270
- 10. \u2705 Clear recommendation provided (merge / review / block)`,
3271
- optionalSubagents: [
3272
- {
3273
- role: "documentation-researcher",
3274
- contentBlock: `#### Research Project Documentation
3275
-
3276
- {{INVOKE_DOCUMENTATION_RESEARCHER}} to gather comprehensive context about the changed features:
3277
-
3278
- \`\`\`
3279
- Explore project documentation related to the changes.
3280
-
3281
- Specifically gather:
3282
- - Product specifications for affected features
3283
- - User stories and acceptance criteria
3284
- - Technical architecture documentation
3285
- - API endpoints and contracts
3286
- - User roles and permissions relevant to the change
3287
- - Business rules and validations
3288
- - UI/UX specifications
3289
- - Known limitations or constraints
3290
- - Related bug reports or known issues
3291
- - Existing test documentation for this area
3292
- \`\`\`
3293
-
3294
- The agent will:
3295
- 1. Check its memory for previously discovered documentation
3296
- 2. Explore workspace for relevant pages and databases
3297
- 3. Build comprehensive understanding of the affected features
3298
- 4. Return synthesized information to inform testing strategy
1754
+ 1. Trigger source correctly detected
1755
+ 2. Context extracted completely
1756
+ 3. Tests executed (or skipped with explanation)
1757
+ 4. All failures triaged (product bug vs test issue)
1758
+ 5. Test issues fixed automatically (when possible)
1759
+ 6. Product bugs logged to issue tracker
1760
+ 7. Manual verification checklist generated
1761
+ 8. Results formatted for output channel
1762
+ 9. Results delivered to appropriate destination
1763
+ 10. Clear recommendation provided (merge / review / block)`
1764
+ }
1765
+ ],
1766
+ requiredSubagents: ["test-runner", "test-debugger-fixer"],
1767
+ optionalSubagents: ["documentation-researcher", "issue-tracker", "team-communicator"],
1768
+ dependentTasks: []
1769
+ };
3299
1770
 
3300
- Use this information to:
3301
- - Better understand the change context
3302
- - Identify comprehensive test scenarios
3303
- - Recognize integration points and dependencies
3304
- - Spot potential edge cases or risk areas
3305
- - Enhance manual verification checklist generation`
1771
+ // src/tasks/library/onboard-testing.ts
1772
+ var onboardTestingTask = {
1773
+ slug: TASK_SLUGS.ONBOARD_TESTING,
1774
+ name: "Onboard Testing",
1775
+ description: "Complete workflow: explore application, generate test plan, create test cases, run tests, fix issues, and report results",
1776
+ frontmatter: {
1777
+ description: "Complete test coverage workflow - from exploration to passing tests",
1778
+ "argument-hint": "<focus-area-or-feature-description>"
1779
+ },
1780
+ steps: [
1781
+ // Step 1: Overview (inline)
1782
+ {
1783
+ inline: true,
1784
+ title: "Onboard Testing Overview",
1785
+ content: `## Overview
1786
+
1787
+ This command orchestrates the complete test coverage workflow in a single execution:
1788
+ 1. **Phase 1**: Read project context and explore application
1789
+ 2. **Phase 2**: Generate lightweight test plan
1790
+ 3. **Phase 3**: Generate and verify test cases (create + fix until passing)
1791
+ 4. **Phase 4**: Triage failures and fix test issues
1792
+ 5. **Phase 5**: Log product bugs
1793
+ 6. **Phase 6**: Final report`
3306
1794
  },
1795
+ // Step 2: Security Notice (from library)
1796
+ "security-notice",
1797
+ // Step 3: Arguments (inline)
3307
1798
  {
3308
- role: "issue-tracker",
3309
- contentBlock: `#### Log Product Bugs
3310
-
3311
- For tests classified as **[PRODUCT BUG]**, {{INVOKE_ISSUE_TRACKER}} to log bugs:
3312
-
3313
- \`\`\`
3314
- 1. Check for duplicate bugs in the tracking system
3315
- - The agent will automatically search for similar existing issues
3316
- - It maintains memory of recently reported issues
3317
- - Duplicate detection happens automatically - don't create manual checks
3318
-
3319
- 2. For each new bug (non-duplicate):
3320
- Create detailed bug report with:
3321
- - **Title**: Clear, descriptive summary (e.g., "Login button fails with timeout on checkout page")
3322
- - **Description**:
3323
- - What happened vs. what was expected
3324
- - Impact on users
3325
- - Test reference: [file path] \u203A [test title]
3326
- - **Reproduction Steps**:
3327
- - List steps from the failing test
3328
- - Include specific test data used
3329
- - Note any setup requirements from test file
3330
- - **Test Execution Details**:
3331
- - Test file: [file path from JSON report]
3332
- - Test name: [test title from JSON report]
3333
- - Error message: [from JSON report]
3334
- - Stack trace: [from JSON report]
3335
- - Trace file: [path if available]
3336
- - Screenshots: [paths if available]
3337
- - **Environment Details**:
3338
- - Browser and version (from Playwright config)
3339
- - Test environment URL (from .env.testdata BASE_URL)
3340
- - Timestamp of failure
3341
- - **Severity/Priority**: Based on:
3342
- - Test type (smoke tests = high priority)
3343
- - User impact
3344
- - Frequency (always fails vs flaky)
3345
- - **Additional Context**:
3346
- - Error messages or stack traces from JSON report
3347
- - Related test files (if part of test suite)
3348
- - Relevant knowledge from knowledge-base.md
3349
-
3350
- 3. Track created issues:
3351
- - Note the issue ID/number returned
3352
- - Update issue tracker memory with new bugs
3353
- - Prepare issue references for team communication
3354
- \`\`\`
3355
-
3356
- **Note**: The issue tracker agent handles all duplicate detection and system integration automatically. Simply provide the bug details and let it manage the rest.`
1799
+ inline: true,
1800
+ title: "Arguments",
1801
+ content: `Focus area: $ARGUMENTS`
3357
1802
  },
1803
+ // Phase 1: Setup
1804
+ "read-knowledge-base",
1805
+ // Phase 2: Exploration Protocol
1806
+ "exploration-protocol",
1807
+ // Execute exploration via test-runner
1808
+ "create-exploration-test-case",
1809
+ "run-exploration",
1810
+ "process-exploration-results",
1811
+ // Phase 3: Test Plan Generation
1812
+ "generate-test-plan",
1813
+ "extract-env-variables",
1814
+ // Phase 4: Test Case Generation
1815
+ "generate-test-cases",
1816
+ "automate-test-cases",
1817
+ // Phase 5: Test Execution
1818
+ "run-playwright-tests",
1819
+ "parse-test-results",
1820
+ // Phase 6: Triage and Fix (NEW - was missing from full-test-coverage)
1821
+ "triage-failures",
1822
+ "fix-test-issues",
3358
1823
  {
3359
- role: "team-communicator",
3360
- contentBlock: `#### Team Communication
3361
-
3362
- {{INVOKE_TEAM_COMMUNICATOR}} to share verification results (primarily for Slack trigger, but can be used for other triggers):
3363
-
3364
- \`\`\`
3365
- 1. Post verification results summary
3366
- 2. Highlight critical failures that need immediate attention
3367
- 3. Share bugs logged with issue tracker links
3368
- 4. Provide manual verification checklist summary
3369
- 5. Recommend next steps based on results
3370
- 6. Tag relevant team members for critical issues
3371
- 7. Use appropriate urgency level based on failure severity
3372
- \`\`\`
1824
+ stepId: "log-product-bugs",
1825
+ conditionalOnSubagent: "issue-tracker"
1826
+ },
1827
+ // Phase 7: Reporting and Communication
1828
+ "update-knowledge-base",
1829
+ {
1830
+ stepId: "notify-team",
1831
+ conditionalOnSubagent: "team-communicator"
1832
+ },
1833
+ "generate-final-report"
1834
+ ],
1835
+ requiredSubagents: ["test-runner", "test-code-generator", "test-debugger-fixer"],
1836
+ optionalSubagents: ["documentation-researcher", "team-communicator", "issue-tracker"],
1837
+ dependentTasks: ["run-tests", "generate-test-cases"]
1838
+ };
3373
1839
 
3374
- The team communication should include:
3375
- - **Execution summary**: Overall pass/fail statistics and timing
3376
- - **Tests fixed**: Count of test issues fixed automatically
3377
- - **Bugs logged**: Product bugs reported to issue tracker
3378
- - **Manual checklist**: Summary of manual verification items
3379
- - **Recommendation**: Safe to merge / Review required / Do not merge
3380
- - **Test artifacts**: Links to reports, traces, screenshots
3381
-
3382
- **Communication strategy based on trigger**:
3383
- - **Slack**: Post concise message with expandable details in thread
3384
- - **Manual**: Full detailed report in terminal
3385
- - **GitHub PR**: Comprehensive PR comment with tables and checklists
3386
- - **CI/CD**: Build log output + optional Slack notification for critical failures
3387
-
3388
- **Update team communicator memory**:
3389
- - Record verification communication
3390
- - Track response patterns by trigger type
3391
- - Document team preferences for detail level
3392
- - Note which team members respond to which types of issues`
3393
- }
1840
+ // src/tasks/library/explore-application.ts
1841
+ var exploreApplicationTask = {
1842
+ slug: TASK_SLUGS.EXPLORE_APPLICATION,
1843
+ name: "Explore Application",
1844
+ description: "Systematically explore application to discover UI elements, workflows, and behaviors",
1845
+ frontmatter: {
1846
+ description: "Explore application to discover UI, workflows, and behaviors",
1847
+ "argument-hint": "--focus [area] --depth [shallow|deep] --system [name]"
1848
+ },
1849
+ steps: [
1850
+ // Step 1: Overview (inline)
1851
+ {
1852
+ inline: true,
1853
+ title: "Explore Application Overview",
1854
+ content: `Discover actual UI elements, workflows, and behaviors using the test-runner agent. Updates test plan and project documentation with findings.`
1855
+ },
1856
+ // Step 2: Security Notice (from library)
1857
+ "security-notice",
1858
+ // Step 3: Arguments (inline)
1859
+ {
1860
+ inline: true,
1861
+ title: "Arguments",
1862
+ content: `**Arguments**: $ARGUMENTS
1863
+
1864
+ **Parse:**
1865
+ - **focus**: auth, navigation, search, content, admin (default: comprehensive)
1866
+ - **depth**: shallow (15-20 min) or deep (45-60 min, default)
1867
+ - **system**: target system (optional for multi-system setups)`
1868
+ },
1869
+ // Setup
1870
+ "read-knowledge-base",
1871
+ "load-project-context",
1872
+ // Exploration Protocol (adaptive depth)
1873
+ "exploration-protocol",
1874
+ // Execute
1875
+ "create-exploration-test-case",
1876
+ "run-exploration",
1877
+ "process-exploration-results",
1878
+ // Update
1879
+ "update-exploration-artifacts",
1880
+ // Team Communication (conditional inline)
1881
+ {
1882
+ inline: true,
1883
+ title: "Team Communication",
1884
+ content: `{{INVOKE_TEAM_COMMUNICATOR}} to notify the product team about exploration findings:
1885
+
1886
+ \`\`\`
1887
+ 1. Post an update about exploration completion
1888
+ 2. Summarize key discoveries:
1889
+ - UI elements and workflows identified
1890
+ - Behaviors documented
1891
+ - Areas needing further investigation
1892
+ 3. Share exploration report location
1893
+ 4. Ask for team feedback on findings
1894
+ 5. Use appropriate channel and threading
1895
+ \`\`\``,
1896
+ conditionalOnSubagent: "team-communicator"
1897
+ },
1898
+ "cleanup-temp-files",
1899
+ "update-knowledge-base"
3394
1900
  ],
3395
- requiredSubagents: ["test-runner", "test-debugger-fixer"]
1901
+ requiredSubagents: ["test-runner"],
1902
+ optionalSubagents: ["team-communicator"],
1903
+ dependentTasks: []
3396
1904
  };
3397
1905
 
3398
1906
  // src/tasks/index.ts
3399
1907
  var TASK_TEMPLATES = {
3400
- [TASK_SLUGS.EXPLORE_APPLICATION]: exploreApplicationTask,
3401
1908
  [TASK_SLUGS.GENERATE_TEST_CASES]: generateTestCasesTask,
3402
1909
  [TASK_SLUGS.GENERATE_TEST_PLAN]: generateTestPlanTask,
3403
1910
  [TASK_SLUGS.HANDLE_MESSAGE]: handleMessageTask,
3404
1911
  [TASK_SLUGS.PROCESS_EVENT]: processEventTask,
3405
1912
  [TASK_SLUGS.RUN_TESTS]: runTestsTask,
3406
- [TASK_SLUGS.VERIFY_CHANGES]: verifyChangesTask
1913
+ [TASK_SLUGS.VERIFY_CHANGES]: verifyChangesTask,
1914
+ [TASK_SLUGS.ONBOARD_TESTING]: onboardTestingTask,
1915
+ [TASK_SLUGS.EXPLORE_APPLICATION]: exploreApplicationTask
3407
1916
  };
3408
1917
  function getTaskTemplate(slug) {
3409
1918
  return TASK_TEMPLATES[slug];
@@ -3414,50 +1923,14 @@ function getAllTaskSlugs() {
3414
1923
  function isTaskRegistered(slug) {
3415
1924
  return TASK_TEMPLATES[slug] !== void 0;
3416
1925
  }
3417
- function buildSlashCommandsConfig(slugs) {
3418
- const configs = {};
3419
- for (const slug of slugs) {
3420
- const task = TASK_TEMPLATES[slug];
3421
- if (!task) {
3422
- console.warn(`Unknown task slug: ${slug}, skipping`);
3423
- continue;
3424
- }
3425
- configs[slug] = {
3426
- frontmatter: task.frontmatter,
3427
- content: task.baseContent
3428
- };
3429
- console.log(`\u2713 Added slash command: /${slug}`);
3430
- }
3431
- return configs;
3432
- }
3433
- function getRequiredMCPsFromTasks(slugs) {
3434
- const mcps = /* @__PURE__ */ new Set();
3435
- for (const slug of slugs) {
3436
- const task = TASK_TEMPLATES[slug];
3437
- if (!task) continue;
3438
- for (const subagent of task.requiredSubagents) {
3439
- const mcpMap = {
3440
- "test-runner": "playwright",
3441
- "team-communicator": "slack",
3442
- "documentation-researcher": "notion",
3443
- "issue-tracker": "linear"
3444
- };
3445
- const mcp = mcpMap[subagent];
3446
- if (mcp) {
3447
- mcps.add(mcp);
3448
- }
3449
- }
3450
- }
3451
- return Array.from(mcps);
3452
- }
3453
1926
  // Annotate the CommonJS export names for ESM import in node:
3454
1927
  0 && (module.exports = {
3455
1928
  TASK_SLUGS,
3456
1929
  TASK_TEMPLATES,
3457
- buildSlashCommandsConfig,
3458
1930
  getAllTaskSlugs,
3459
- getRequiredMCPsFromTasks,
3460
1931
  getTaskTemplate,
1932
+ isInlineStep,
1933
+ isStepReferenceObject,
3461
1934
  isTaskRegistered
3462
1935
  });
3463
1936
  //# sourceMappingURL=index.cjs.map