@bugzy-ai/bugzy 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +248 -0
  3. package/dist/cli/index.cjs +7547 -0
  4. package/dist/cli/index.cjs.map +1 -0
  5. package/dist/cli/index.d.cts +1 -0
  6. package/dist/cli/index.d.ts +1 -0
  7. package/dist/cli/index.js +7539 -0
  8. package/dist/cli/index.js.map +1 -0
  9. package/dist/index.cjs +6439 -0
  10. package/dist/index.cjs.map +1 -0
  11. package/dist/index.d.cts +54 -0
  12. package/dist/index.d.ts +54 -0
  13. package/dist/index.js +6383 -0
  14. package/dist/index.js.map +1 -0
  15. package/dist/subagents/index.cjs +2703 -0
  16. package/dist/subagents/index.cjs.map +1 -0
  17. package/dist/subagents/index.d.cts +34 -0
  18. package/dist/subagents/index.d.ts +34 -0
  19. package/dist/subagents/index.js +2662 -0
  20. package/dist/subagents/index.js.map +1 -0
  21. package/dist/subagents/metadata.cjs +207 -0
  22. package/dist/subagents/metadata.cjs.map +1 -0
  23. package/dist/subagents/metadata.d.cts +31 -0
  24. package/dist/subagents/metadata.d.ts +31 -0
  25. package/dist/subagents/metadata.js +174 -0
  26. package/dist/subagents/metadata.js.map +1 -0
  27. package/dist/tasks/index.cjs +3464 -0
  28. package/dist/tasks/index.cjs.map +1 -0
  29. package/dist/tasks/index.d.cts +44 -0
  30. package/dist/tasks/index.d.ts +44 -0
  31. package/dist/tasks/index.js +3431 -0
  32. package/dist/tasks/index.js.map +1 -0
  33. package/dist/templates/init/.bugzy/runtime/project-context.md +35 -0
  34. package/dist/templates/init/.bugzy/runtime/templates/test-plan-template.md +25 -0
  35. package/dist/templates/init/.bugzy/runtime/testing-best-practices.md +278 -0
  36. package/dist/templates/init/.gitignore-template +4 -0
  37. package/package.json +95 -0
  38. package/templates/init/.bugzy/runtime/knowledge-base.md +61 -0
  39. package/templates/init/.bugzy/runtime/knowledge-maintenance-guide.md +97 -0
  40. package/templates/init/.bugzy/runtime/project-context.md +35 -0
  41. package/templates/init/.bugzy/runtime/subagent-memory-guide.md +87 -0
  42. package/templates/init/.bugzy/runtime/templates/test-plan-template.md +25 -0
  43. package/templates/init/.bugzy/runtime/templates/test-result-schema.md +498 -0
  44. package/templates/init/.bugzy/runtime/test-execution-strategy.md +535 -0
  45. package/templates/init/.bugzy/runtime/testing-best-practices.md +632 -0
  46. package/templates/init/.gitignore-template +25 -0
  47. package/templates/init/CLAUDE.md +157 -0
  48. package/templates/init/test-runs/README.md +45 -0
  49. package/templates/playwright/BasePage.template.ts +190 -0
  50. package/templates/playwright/auth.setup.template.ts +89 -0
  51. package/templates/playwright/dataGenerators.helper.template.ts +148 -0
  52. package/templates/playwright/dateUtils.helper.template.ts +96 -0
  53. package/templates/playwright/pages.fixture.template.ts +50 -0
  54. package/templates/playwright/playwright.config.template.ts +97 -0
  55. package/templates/playwright/reporters/bugzy-reporter.ts +454 -0
@@ -0,0 +1,3464 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/tasks/index.ts
21
+ var tasks_exports = {};
22
+ __export(tasks_exports, {
23
+ TASK_SLUGS: () => TASK_SLUGS,
24
+ TASK_TEMPLATES: () => TASK_TEMPLATES,
25
+ buildSlashCommandsConfig: () => buildSlashCommandsConfig,
26
+ getAllTaskSlugs: () => getAllTaskSlugs,
27
+ getRequiredMCPsFromTasks: () => getRequiredMCPsFromTasks,
28
+ getTaskTemplate: () => getTaskTemplate,
29
+ isTaskRegistered: () => isTaskRegistered
30
+ });
31
+ module.exports = __toCommonJS(tasks_exports);
32
+
33
+ // src/tasks/constants.ts
34
+ var TASK_SLUGS = {
35
+ EXPLORE_APPLICATION: "explore-application",
36
+ GENERATE_TEST_CASES: "generate-test-cases",
37
+ GENERATE_TEST_PLAN: "generate-test-plan",
38
+ HANDLE_MESSAGE: "handle-message",
39
+ PROCESS_EVENT: "process-event",
40
+ RUN_TESTS: "run-tests",
41
+ VERIFY_CHANGES: "verify-changes"
42
+ };
43
+
44
+ // src/tasks/templates/exploration-instructions.ts
45
+ var EXPLORATION_INSTRUCTIONS = `
46
+ ## Exploratory Testing Protocol
47
+
48
+ Before creating or running formal tests, perform exploratory testing to validate requirements and understand actual system behavior. The depth of exploration should adapt to the clarity of requirements.
49
+
50
+ ### Step {{STEP_NUMBER}}.1: Assess Requirement Clarity
51
+
52
+ Determine exploration depth based on requirement quality:
53
+
54
+ | Clarity | Indicators | Exploration Depth | Goal |
55
+ |---------|-----------|-------------------|------|
56
+ | **Clear** | Detailed acceptance criteria, screenshots/mockups, specific field names/URLs/roles, unambiguous behavior, consistent patterns | Quick (1-2 min) | Confirm feature exists, capture evidence |
57
+ | **Vague** | General direction clear but specifics missing, incomplete examples, assumed details, relative terms ("fix", "better") | Moderate (3-5 min) | Document current behavior, identify ambiguities, generate clarification questions |
58
+ | **Unclear** | Contradictory info, multiple interpretations, no examples/criteria, ambiguous scope ("the page"), critical details missing | Deep (5-10 min) | Systematically test scenarios, document patterns, identify all ambiguities, formulate comprehensive questions |
59
+
60
+ **Examples:**
61
+ - **Clear:** "Change 'Submit' button from blue (#007BFF) to green (#28A745) on /auth/login. Verify hover effect."
62
+ - **Vague:** "Fix the sorting in todo list page. The items are mixed up for premium users."
63
+ - **Unclear:** "Improve the dashboard performance. Users say it's slow."
64
+
65
+ ### Step {{STEP_NUMBER}}.2: Quick Exploration (1-2 min)
66
+
67
+ **When:** Requirements CLEAR
68
+
69
+ **Steps:**
70
+ 1. Navigate to feature (use provided URL), verify loads without errors
71
+ 2. Verify key elements exist (buttons, fields, sections mentioned)
72
+ 3. Capture screenshot of initial state
73
+ 4. Document:
74
+ \`\`\`markdown
75
+ **Quick Exploration (1 min)**
76
+ Feature: [Name] | URL: [Path]
77
+ Status: \u2705 Accessible / \u274C Not found / \u26A0\uFE0F Different
78
+ Screenshot: [filename]
79
+ Notes: [Immediate observations]
80
+ \`\`\`
81
+ 5. **Decision:** \u2705 Matches \u2192 Test creation | \u274C/\u26A0\uFE0F Doesn't match \u2192 Moderate Exploration
82
+
83
+ **Time Limit:** 1-2 minutes
84
+
85
+ ### Step {{STEP_NUMBER}}.3: Moderate Exploration (3-5 min)
86
+
87
+ **When:** Requirements VAGUE or Quick Exploration revealed discrepancies
88
+
89
+ **Steps:**
90
+ 1. Navigate using appropriate role(s), set up preconditions, ensure clean state
91
+ 2. Test primary user flow, document steps and behavior, note unexpected behavior
92
+ 3. Capture before/after screenshots, document field values/ordering/visibility
93
+ 4. Compare to requirement: What matches? What differs? What's absent?
94
+ 5. Identify specific ambiguities:
95
+ \`\`\`markdown
96
+ **Moderate Exploration (4 min)**
97
+
98
+ **Explored:** Role: [Admin], Path: [Steps], Behavior: [What happened]
99
+
100
+ **Current State:** [Specific observations with examples]
101
+ - Example: "Admin view shows 8 sort options: By Title, By Due Date, By Priority..."
102
+
103
+ **Requirement Says:** [What requirement expected]
104
+
105
+ **Discrepancies:** [Specific differences]
106
+ - Example: "Premium users see 5 fewer sorting options than admins"
107
+
108
+ **Ambiguities:**
109
+ 1. [First ambiguity with concrete example]
110
+ 2. [Second if applicable]
111
+
112
+ **Clarification Needed:** [Specific questions]
113
+ \`\`\`
114
+ 6. Assess severity using Clarification Protocol
115
+ 7. **Decision:** \u{1F7E2} Minor \u2192 Proceed with assumptions | \u{1F7E1} Medium \u2192 Async clarification, proceed | \u{1F534} Critical \u2192 Stop, escalate
116
+
117
+ **Time Limit:** 3-5 minutes
118
+
119
+ ### Step {{STEP_NUMBER}}.4: Deep Exploration (5-10 min)
120
+
121
+ **When:** Requirements UNCLEAR or critical ambiguities found
122
+
123
+ **Steps:**
124
+ 1. **Define Exploration Matrix:** Identify dimensions (user roles, feature states, input variations, browsers)
125
+
126
+ 2. **Systematic Testing:** Test each matrix cell methodically
127
+ \`\`\`
128
+ Example for "Todo List Sorting":
129
+ Matrix: User Roles \xD7 Feature Observations
130
+
131
+ Test 1: Admin Role \u2192 Navigate, document sort options (count, names, order), screenshot
132
+ Test 2: Basic User Role \u2192 Same todo list, document options, screenshot
133
+ Test 3: Compare \u2192 Side-by-side table, identify missing/reordered options
134
+ \`\`\`
135
+
136
+ 3. **Document Patterns:** Consistent behavior? Role-based differences? What varies vs constant?
137
+
138
+ 4. **Comprehensive Report:**
139
+ \`\`\`markdown
140
+ **Deep Exploration (8 min)**
141
+
142
+ **Matrix:** [Dimensions] | **Tests:** [X combinations]
143
+
144
+ **Findings:**
145
+
146
+ ### Test 1: Admin
147
+ - Setup: [Preconditions] | Steps: [Actions]
148
+ - Observations: Sort options=8, Options=[list], Ordering=[sequence]
149
+ - Screenshot: [filename-admin.png]
150
+
151
+ ### Test 2: Basic User
152
+ - Setup: [Preconditions] | Steps: [Actions]
153
+ - Observations: Sort options=3, Missing vs Admin=[5 options], Ordering=[sequence]
154
+ - Screenshot: [filename-user.png]
155
+
156
+ **Comparison Table:**
157
+ | Sort Option | Admin Pos | User Pos | Notes |
158
+ |-------------|-----------|----------|-------|
159
+ | By Title | 1 | 1 | Match |
160
+ | By Priority | 3 | Not visible | Missing |
161
+
162
+ **Patterns:**
163
+ - Role-based feature visibility
164
+ - Consistent relative ordering for visible fields
165
+
166
+ **Critical Ambiguities:**
167
+ 1. Option Visibility: Intentional basic users see 5 fewer sort options?
168
+ 2. Sort Definition: (A) All roles see all options in same order, OR (B) Roles see permitted options in same relative order?
169
+
170
+ **Clarification Questions:** [Specific, concrete based on findings]
171
+ \`\`\`
172
+
173
+ 5. **Next Action:** Critical ambiguities \u2192 STOP, clarify | Patterns suggest answer \u2192 Validate assumption | Behavior clear \u2192 Test creation
174
+
175
+ **Time Limit:** 5-10 minutes
176
+
177
+ ### Step {{STEP_NUMBER}}.5: Link Exploration to Clarification
178
+
179
+ **Flow:** Requirement Analysis \u2192 Exploration \u2192 Clarification
180
+
181
+ 1. Requirement analysis detects vague language \u2192 Triggers exploration
182
+ 2. Exploration documents current behavior \u2192 Identifies discrepancies
183
+ 3. Clarification uses findings \u2192 Asks specific questions referencing observations
184
+
185
+ **Example:**
186
+ \`\`\`
187
+ "Fix the sorting in todo list"
188
+ \u2193 Ambiguity: "sorting" = by date, priority, or completion status?
189
+ \u2193 Moderate Exploration: Admin=8 sort options, User=3 sort options
190
+ \u2193 Question: "Should basic users see all 8 sort options (bug) or only 3 with consistent sequence (correct)?"
191
+ \`\`\`
192
+
193
+ ### Step {{STEP_NUMBER}}.6: Document Exploration Results
194
+
195
+ **Template:**
196
+ \`\`\`markdown
197
+ ## Exploration Summary
198
+
199
+ **Date:** [YYYY-MM-DD] | **Explorer:** [Agent/User] | **Depth:** [Quick/Moderate/Deep] | **Duration:** [X min]
200
+
201
+ ### Feature: [Name and description]
202
+
203
+ ### Observations: [Key findings]
204
+
205
+ ### Current Behavior: [What feature does today]
206
+
207
+ ### Discrepancies: [Requirement vs observation differences]
208
+
209
+ ### Assumptions Made: [If proceeding with assumptions]
210
+
211
+ ### Artifacts: Screenshots: [list], Video: [if captured], Notes: [detailed]
212
+ \`\`\`
213
+
214
+ **Memory Storage:** Feature behavior patterns, common ambiguity types, resolution approaches
215
+
216
+ ### Step {{STEP_NUMBER}}.7: Integration with Test Creation
217
+
218
+ **Quick Exploration \u2192 Direct Test:**
219
+ - Feature verified \u2192 Create test matching requirement \u2192 Reference screenshot
220
+
221
+ **Moderate Exploration \u2192 Assumption-Based Test:**
222
+ - Document behavior \u2192 Create test on best interpretation \u2192 Mark assumptions \u2192 Plan updates after clarification
223
+
224
+ **Deep Exploration \u2192 Clarification-First:**
225
+ - Block test creation until clarification \u2192 Use exploration as basis for questions \u2192 Create test after answer \u2192 Reference both exploration and clarification
226
+
227
+ ---
228
+
229
+ ## Adaptive Exploration Decision Tree
230
+
231
+ \`\`\`
232
+ Start: Requirement Received
233
+ \u2193
234
+ Are requirements clear with specifics?
235
+ \u251C\u2500 YES \u2192 Quick Exploration (1-2 min)
236
+ \u2502 \u2193
237
+ \u2502 Does feature match description?
238
+ \u2502 \u251C\u2500 YES \u2192 Proceed to Test Creation
239
+ \u2502 \u2514\u2500 NO \u2192 Escalate to Moderate Exploration
240
+ \u2502
241
+ \u2514\u2500 NO \u2192 Is general direction clear but details missing?
242
+ \u251C\u2500 YES \u2192 Moderate Exploration (3-5 min)
243
+ \u2502 \u2193
244
+ \u2502 Are ambiguities MEDIUM severity or lower?
245
+ \u2502 \u251C\u2500 YES \u2192 Document assumptions, proceed with test creation
246
+ \u2502 \u2514\u2500 NO \u2192 Escalate to Deep Exploration or Clarification
247
+ \u2502
248
+ \u2514\u2500 NO \u2192 Deep Exploration (5-10 min)
249
+ \u2193
250
+ Document comprehensive findings
251
+ \u2193
252
+ Assess ambiguity severity
253
+ \u2193
254
+ Seek clarification for CRITICAL/HIGH
255
+ \`\`\`
256
+
257
+ ---
258
+
259
+ ## Remember:
260
+
261
+ \u{1F50D} **Explore before assuming** | \u{1F4CA} **Concrete observations > abstract interpretation** | \u23F1\uFE0F **Adaptive depth: time \u221D uncertainty** | \u{1F3AF} **Exploration findings \u2192 specific clarifications** | \u{1F4DD} **Always document** | \u{1F517} **Link exploration \u2192 ambiguity \u2192 clarification**
262
+ `;
263
+
264
+ // src/tasks/templates/knowledge-base.ts
265
+ var KNOWLEDGE_BASE_READ_INSTRUCTIONS = `
266
+ ## Knowledge Base Context
267
+
268
+ Before proceeding, read the curated knowledge base to inform your work:
269
+
270
+ **Location:** \`.bugzy/runtime/knowledge-base.md\`
271
+
272
+ **Purpose:** The knowledge base is a living collection of factual knowledge - what we currently know and believe to be true about this project, its patterns, and its context. This is NOT a historical log, but a curated snapshot that evolves as understanding improves.
273
+
274
+ **How to Use:**
275
+ 1. Read the knowledge base to understand:
276
+ - Project-specific patterns and conventions
277
+ - Known behaviors and system characteristics
278
+ - Relevant context from past work
279
+ - Documented decisions and approaches
280
+
281
+ 2. Apply this knowledge to:
282
+ - Make informed decisions aligned with project patterns
283
+ - Avoid repeating past mistakes
284
+ - Build on existing understanding
285
+ - Maintain consistency with established practices
286
+
287
+ **Note:** The knowledge base may not exist yet or may be empty. If it doesn't exist or is empty, proceed without this context and help build it as you work.
288
+ `;
289
+ var KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS = `
290
+ ## Knowledge Base Maintenance
291
+
292
+ After completing your work, update the knowledge base with new insights.
293
+
294
+ **Location:** \`.bugzy/runtime/knowledge-base.md\`
295
+
296
+ **Process:**
297
+
298
+ 1. **Read the maintenance guide** at \`.bugzy/runtime/knowledge-maintenance-guide.md\` to understand when to ADD, UPDATE, or REMOVE entries and how to maintain a curated knowledge base (not an append-only log)
299
+
300
+ 2. **Review the current knowledge base** to check for overlaps, contradictions, or opportunities to consolidate existing knowledge
301
+
302
+ 3. **Update the knowledge base** following the maintenance guide principles: favor consolidation over addition, update rather than append, resolve contradictions immediately, and focus on quality over completeness
303
+
304
+ **Remember:** Every entry should answer "Will this help someone working on this project in 6 months?"
305
+ `;
306
+
307
+ // src/tasks/library/explore-application.ts
308
+ var exploreApplicationTask = {
309
+ slug: TASK_SLUGS.EXPLORE_APPLICATION,
310
+ name: "Explore Application",
311
+ description: "Systematically explore application to discover UI elements, workflows, and behaviors",
312
+ frontmatter: {
313
+ description: "Systematically explore application to discover UI elements, workflows, and behaviors",
314
+ "argument-hint": "--focus [area] --depth [shallow|deep] --system [system-name]"
315
+ },
316
+ baseContent: `# Explore Application Command
317
+
318
+ ## SECURITY NOTICE
319
+ **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
320
+ - **Read \`.env.testdata\`** for non-secret environment variables (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
321
+ - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
322
+ - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
323
+ - The \`.env\` file access is blocked by settings.json
324
+
325
+ Systematically explore the application using the test-runner agent to discover actual UI elements, workflows, and behaviors. Updates test plan and project documentation with findings.
326
+
327
+ ## Arguments
328
+ Arguments: $ARGUMENTS
329
+
330
+ ## Parse Arguments
331
+ Extract the following from arguments:
332
+ - **focus**: Specific area to explore (authentication, navigation, search, content, admin)
333
+ - **depth**: Exploration depth - shallow (quick discovery) or deep (comprehensive) - defaults to deep
334
+ - **system**: Which system to explore (optional for multi-system setups)
335
+
336
+ ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
337
+
338
+ ## Process
339
+
340
+ ### Step 0: Understand Exploration Protocol
341
+
342
+ This task implements the exploration protocol defined in the exploration-instructions template.
343
+
344
+ **Purpose**: This task provides the infrastructure for systematic application exploration that is referenced by other tasks (generate-test-plan, generate-test-cases, verify-changes) when they need to explore features before proceeding.
345
+
346
+ **Depth Alignment**: The depth levels in this task align with the exploration template:
347
+ - **Shallow exploration (15-20 min)** implements the quick/moderate exploration from the template
348
+ - **Deep exploration (45-60 min)** implements comprehensive deep exploration from the template
349
+
350
+ The depth levels are extended for full application exploration compared to the focused feature exploration used in other tasks.
351
+
352
+ **Full Exploration Protocol Reference**:
353
+
354
+ ${EXPLORATION_INSTRUCTIONS}
355
+
356
+ **Note**: This task extends the protocol for comprehensive application-wide exploration, while other tasks use abbreviated versions for targeted feature exploration.
357
+
358
+ ### Step 1: Load Environment and Context
359
+
360
+ #### 1.1 Check Environment Variables
361
+ Read \`.env.testdata\` file to understand what variables are required:
362
+ - TEST_BASE_URL or TEST_MOBILE_BASE_URL (base URL variable names)
363
+ - [SYSTEM_NAME]_URL (if multi-system setup)
364
+ - Authentication credential variable names for the selected system
365
+ - Any test data variable names
366
+
367
+ Note: The actual values will be read from the user's \`.env\` file at test execution time.
368
+ Verify \`.env.testdata\` exists to understand variable structure. If it doesn't exist, notify user to create it based on test plan.
369
+
370
+ #### 1.2 Read Current Test Plan
371
+ Read \`test-plan.md\` to:
372
+ - Identify sections marked with [TO BE EXPLORED]
373
+ - Find features requiring discovery
374
+ - Understand testing scope and priorities
375
+
376
+ #### 1.3 Read Project Context
377
+ Read \`.bugzy/runtime/project-context.md\` for:
378
+ - System architecture understanding
379
+ - Testing environment details
380
+ - QA workflow requirements
381
+
382
+ ### Step 2: Prepare Exploration Strategy
383
+
384
+ Based on the arguments and context, prepare exploration instructions.
385
+
386
+ #### 2.1 Focus Area Strategies
387
+
388
+ **If focus is "authentication":**
389
+ \`\`\`
390
+ 1. Navigate to the application homepage
391
+ 2. Locate and document all authentication entry points:
392
+ - Login button/link location and selector
393
+ - Registration option and flow
394
+ - Social login options (Facebook, Google, etc.)
395
+ 3. Test login flow:
396
+ - Document form fields and validation
397
+ - Test error states with invalid credentials
398
+ - Verify successful login indicators
399
+ 4. Test logout functionality:
400
+ - Find logout option
401
+ - Verify session termination
402
+ - Check redirect behavior
403
+ 5. Explore password recovery:
404
+ - Locate forgot password link
405
+ - Document recovery flow
406
+ - Note email/SMS options
407
+ 6. Check role-based access:
408
+ - Identify user role indicators
409
+ - Document permission differences
410
+ - Test admin/moderator access if available
411
+ 7. Test session persistence:
412
+ - Check remember me functionality
413
+ - Test timeout behavior
414
+ - Verify multi-tab session handling
415
+ \`\`\`
416
+
417
+ **If focus is "navigation":**
418
+ \`\`\`
419
+ 1. Document main navigation structure:
420
+ - Primary menu items and hierarchy
421
+ - Mobile menu behavior
422
+ - Footer navigation links
423
+ 2. Map URL patterns:
424
+ - Category URL structure
425
+ - Parameter patterns
426
+ - Deep linking support
427
+ 3. Test breadcrumb navigation:
428
+ - Availability on different pages
429
+ - Clickability and accuracy
430
+ - Mobile display
431
+ 4. Explore category system:
432
+ - Main categories and subcategories
433
+ - Navigation between levels
434
+ - Content organization
435
+ 5. Document special sections:
436
+ - User profiles
437
+ - Admin areas
438
+ - Help/Support sections
439
+ 6. Test browser navigation:
440
+ - Back/forward button behavior
441
+ - History management
442
+ - State preservation
443
+ \`\`\`
444
+
445
+ **If focus is "search":**
446
+ \`\`\`
447
+ 1. Locate search interfaces:
448
+ - Main search bar
449
+ - Advanced search options
450
+ - Category-specific search
451
+ 2. Document search features:
452
+ - Autocomplete/suggestions
453
+ - Search filters
454
+ - Sort options
455
+ 3. Test search functionality:
456
+ - Special character handling
457
+ - Empty/invalid queries
458
+ 4. Analyze search results:
459
+ - Result format and layout
460
+ - Pagination
461
+ - No results handling
462
+ 5. Check search performance:
463
+ - Response times
464
+ - Result relevance
465
+ - Load more/infinite scroll
466
+ \`\`\`
467
+
468
+ **If no focus specified:**
469
+ Use comprehensive exploration covering all major areas.
470
+
471
+ #### 2.2 Depth Configuration
472
+
473
+ **Implementation Note**: These depths implement the exploration protocol defined in exploration-instructions.ts, extended for full application exploration.
474
+
475
+ **Shallow exploration (--depth shallow):**
476
+ - Quick discovery pass (15-20 minutes)
477
+ - Focus on main features only
478
+ - Basic screenshot capture
479
+ - High-level findings
480
+ - *Aligns with Quick/Moderate exploration from template*
481
+
482
+ **Deep exploration (--depth deep or default):**
483
+ - Comprehensive exploration (45-60 minutes)
484
+ - Test edge cases and variations
485
+ - Extensive screenshot documentation
486
+ - Detailed technical findings
487
+ - Performance observations
488
+ - Accessibility notes
489
+ - *Aligns with Deep exploration from template*
490
+
491
+ ### Step 3: Execute Exploration
492
+
493
+ #### 3.1 Create Exploration Test Case
494
+ Generate a temporary exploration test case file at \`./test-cases/EXPLORATION-TEMP.md\`:
495
+
496
+ \`\`\`markdown
497
+ ---
498
+ id: EXPLORATION-TEMP
499
+ title: Application Exploration - [Focus Area or Comprehensive]
500
+ type: exploratory
501
+ priority: high
502
+ ---
503
+
504
+ ## Preconditions
505
+ - Browser with cleared cookies and cache
506
+ - Access to [system] environment
507
+ - Credentials configured per .env.testdata template
508
+
509
+ ## Test Steps
510
+ [Generated exploration steps based on strategy]
511
+
512
+ ## Expected Results
513
+ Document all findings including:
514
+ - UI element locations and selectors
515
+ - Navigation patterns and URLs
516
+ - Feature behaviors and workflows
517
+ - Performance observations
518
+ - Error states and edge cases
519
+ - Screenshots of all key areas
520
+ \`\`\`
521
+
522
+ #### 3.2 Launch Test Runner Agent
523
+ Invoke the test-runner agent with special exploration instructions:
524
+
525
+ \`\`\`
526
+ Execute the exploration test case at ./test-cases/EXPLORATION-TEMP.md with focus on discovery and documentation.
527
+
528
+ Special instructions for exploration mode:
529
+ 1. Take screenshots of EVERY significant UI element and page
530
+ 2. Document all clickable elements with their selectors
531
+ 3. Note all URL patterns and parameters
532
+ 4. Test variations and edge cases where possible
533
+ 5. Document load times and performance observations
534
+ 6. Create detailed findings report with structured data
535
+ 7. Organize screenshots by functional area
536
+ 8. Note any console errors or warnings
537
+ 9. Document which features are accessible vs restricted
538
+
539
+ Generate a comprehensive exploration report that can be used to update project documentation.
540
+ \`\`\`
541
+
542
+ ### Step 4: Process Exploration Results
543
+
544
+ #### 4.1 Read Test Runner Output
545
+ Read the generated test run files from \`./test-runs/[timestamp]/EXPLORATION-TEMP/\`:
546
+ - \`findings.md\` - Main findings document
547
+ - \`test-log.md\` - Detailed step execution
548
+ - \`screenshots/\` - Visual documentation
549
+ - \`summary.json\` - Execution summary
550
+
551
+ #### 4.2 Parse and Structure Findings
552
+ Extract and organize:
553
+ - Discovered features and capabilities
554
+ - UI element selectors and patterns
555
+ - Navigation structure and URLs
556
+ - Authentication flow details
557
+ - Performance metrics
558
+ - Technical observations
559
+ - Areas requiring further investigation
560
+
561
+ ### Step 5: Update Project Artifacts
562
+
563
+ #### 5.1 Update Test Plan
564
+ Read and update \`test-plan.md\`:
565
+ - Replace [TO BE EXPLORED] markers with concrete findings
566
+ - Add newly discovered features to test items
567
+ - Update navigation patterns and URL structures
568
+ - Document actual authentication methods
569
+ - Update environment variables if new ones discovered
570
+ - Refine pass/fail criteria based on actual behavior
571
+
572
+ #### 5.2 Create Exploration Report
573
+ Create \`./exploration-reports/[timestamp]-[focus]-exploration.md\`
574
+
575
+ ### Step 6: Cleanup
576
+
577
+ #### 6.1 Remove Temporary Files
578
+ Delete the temporary exploration test case:
579
+ \`\`\`bash
580
+ rm ./test-cases/EXPLORATION-TEMP.md
581
+ \`\`\`
582
+
583
+ ### Step 7: Generate Summary Report
584
+ Create a concise summary for the user
585
+
586
+ ## Error Handling
587
+
588
+ ### Environment Issues
589
+ - If \`.env.testdata\` missing: Warn user and suggest creating it from test plan
590
+ - If credentials invalid (at runtime): Document in report and continue with public areas
591
+ - If system unreachable: Retry with exponential backoff, report if persistent
592
+
593
+ ### Exploration Failures
594
+ - If test-runner fails: Capture partial results and report
595
+ - If specific area inaccessible: Note in findings and continue
596
+ - If browser crashes: Attempt recovery and resume
597
+ - If test-runner stops, but does not create files, inspect what it did and if it was not enough remove the test-run and start the test-runner agent again. If it has enough info, continue with what you have.
598
+
599
+ ### Data Issues
600
+ - If dynamic content prevents exploration: Note and try alternative approaches
601
+ - If rate limited: Implement delays and retry
602
+
603
+ ## Integration with Other Commands
604
+
605
+ ### Feeds into /generate-test-cases
606
+ - Provides actual UI elements for test steps
607
+ - Documents real workflows for test scenarios
608
+ - Identifies edge cases to test
609
+
610
+ ### Updates from /process-event
611
+ - New exploration findings can be processed as events
612
+ - Discovered bugs trigger issue creation
613
+ - Feature discoveries update test coverage
614
+
615
+ ### Enhances /run-tests
616
+ - Tests use discovered selectors
617
+ - Validation based on actual behavior
618
+ - More reliable test execution
619
+
620
+ ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}`,
621
+ optionalSubagents: [],
622
+ requiredSubagents: ["test-runner"]
623
+ };
624
+
625
+ // src/tasks/templates/clarification-instructions.ts
626
+ var CLARIFICATION_INSTRUCTIONS = `
627
+ ## Clarification Protocol
628
+
629
+ Before proceeding with test creation or execution, ensure requirements are clear and testable. Use this protocol to detect ambiguity, assess its severity, and determine the appropriate action.
630
+
631
+ ### Step {{STEP_NUMBER}}.0: Check for Pending Clarification
632
+
633
+ Before starting, check if this task is resuming from a blocked clarification:
634
+
635
+ 1. **Check $ARGUMENTS for clarification data:**
636
+ - If \`$ARGUMENTS.clarification\` exists, this task is resuming with a clarification response
637
+ - Extract: \`clarification\` (the user's answer), \`originalArgs\` (original task parameters)
638
+
639
+ 2. **If clarification is present:**
640
+ - Read \`.bugzy/runtime/blocked-task-queue.md\`
641
+ - Find and remove your task's entry from the queue (update the file)
642
+ - Proceed using the clarification as if user just provided the answer
643
+ - Skip ambiguity detection for the clarified aspect
644
+
645
+ 3. **If no clarification in $ARGUMENTS:** Proceed normally with ambiguity detection below.
646
+
647
+ ### Step {{STEP_NUMBER}}.1: Detect Ambiguity
648
+
649
+ Scan for ambiguity signals:
650
+
651
+ **Language:** Vague terms ("fix", "improve", "better", "like", "mixed up"), relative terms without reference ("faster", "more"), undefined scope ("the ordering", "the fields", "the page"), modal ambiguity ("should", "could" vs "must", "will")
652
+
653
+ **Details:** Missing acceptance criteria (no clear PASS/FAIL), no examples/mockups, incomplete field/element lists, unclear role behavior differences, unspecified error scenarios
654
+
655
+ **Interpretation:** Multiple valid interpretations, contradictory information (description vs comments), implied vs explicit requirements
656
+
657
+ **Context:** No reference documentation, "RELEASE APPROVED" without criteria, quick ticket creation, assumes knowledge ("as you know...", "obviously...")
658
+
659
+ **Quick Check:**
660
+ - [ ] Success criteria explicitly defined? (PASS if X, FAIL if Y)
661
+ - [ ] All affected elements specifically listed? (field names, URLs, roles)
662
+ - [ ] Only ONE reasonable interpretation?
663
+ - [ ] Examples, screenshots, or mockups provided?
664
+ - [ ] Consistent with existing system patterns?
665
+ - [ ] Can write test assertions without assumptions?
666
+
667
+ ### Step {{STEP_NUMBER}}.2: Assess Severity
668
+
669
+ If ambiguity is detected, assess its severity:
670
+
671
+ | Severity | Characteristics | Examples | Action |
672
+ |----------|----------------|----------|--------|
673
+ | \u{1F534} **CRITICAL** | Expected behavior undefined/contradictory; test outcome unpredictable; core functionality unclear; success criteria missing; multiple interpretations = different strategies | "Fix the issue" (what issue?), "Improve performance" (which metrics?), "Fix sorting in todo list" (by date? priority? completion status?) | **STOP** - Seek clarification before proceeding |
674
+ | \u{1F7E0} **HIGH** | Core underspecified but direction clear; affects majority of scenarios; vague success criteria; assumptions risky | "Fix ordering" (sequence OR visibility?), "Add validation" (what? messages?), "Update dashboard" (which widgets?) | **STOP** - Seek clarification before proceeding |
675
+ | \u{1F7E1} **MEDIUM** | Specific details missing; general requirements clear; affects subset of cases; reasonable low-risk assumptions possible; wrong assumption = test updates not strategy overhaul | Missing field labels, unclear error message text, undefined timeouts, button placement not specified, date formats unclear | **PROCEED** - (1) Moderate exploration, (2) Document assumptions: "Assuming X because Y", (3) Proceed with creation/execution, (4) Async clarification (team-communicator), (5) Mark [ASSUMED: description] |
676
+ | \u{1F7E2} **LOW** | Minor edge cases; documentation gaps don't affect execution; optional/cosmetic elements; minimal impact | Tooltip text, optional field validation, icon choice, placeholder text, tab order | **PROCEED** - (1) Mark [TO BE CLARIFIED: description], (2) Proceed, (3) Mention in report "Minor Details", (4) No blocking/async clarification |
677
+
678
+ ### Step {{STEP_NUMBER}}.3: Check Memory for Similar Clarifications
679
+
680
+ Before asking, check if similar question was answered:
681
+
682
+ **Process:**
683
+ 1. **Query team-communicator memory** - Search by feature name, ambiguity pattern, ticket keywords
684
+ 2. **Review past Q&A** - Similar question asked? What was answer? Applicable now?
685
+ 3. **Assess reusability:**
686
+ - Directly applicable \u2192 Use answer, no re-ask
687
+ - Partially applicable \u2192 Adapt and reference ("Previously for X, clarified Y. Same here?")
688
+ - Not applicable \u2192 Ask as new
689
+ 4. **Update memory** - Store Q&A with task type, feature, pattern tags
690
+
691
+ **Example:** Query "todo sorting priority" \u2192 Found 2025-01-15: "Should completed todos appear in main list?" \u2192 Answer: "No, move to separate archive view" \u2192 Directly applicable \u2192 Use, no re-ask needed
692
+
693
+ ### Step {{STEP_NUMBER}}.4: Formulate Clarification Questions
694
+
695
+ If clarification needed (CRITICAL/HIGH severity), formulate specific, concrete questions:
696
+
697
+ **Good Questions:** Specific and concrete, provide context, offer options, reference examples, tie to test strategy
698
+
699
+ **Bad Questions:** Too vague/broad, assumptive, multiple questions in one, no context
700
+
701
+ **Template:**
702
+ \`\`\`
703
+ **Context:** [Current understanding]
704
+ **Ambiguity:** [Specific unclear aspect]
705
+ **Question:** [Specific question with options]
706
+ **Why Important:** [Testing strategy impact]
707
+
708
+ Example:
709
+ Context: TODO-456 "Fix the sorting in the todo list so items appear in the right order"
710
+ Ambiguity: "sorting" = (A) by creation date, (B) by due date, (C) by priority level, or (D) custom user-defined order
711
+ Question: Should todos be sorted by due date (soonest first) or priority (high to low)? Should completed items appear in the list or move to archive?
712
+ Why Important: Different sort criteria require different test assertions. Current app shows 15 active todos + 8 completed in mixed order.
713
+ \`\`\`
714
+
715
+ ### Step {{STEP_NUMBER}}.5: Communicate Clarification Request
716
+
717
+ **For Slack-Triggered Tasks:** Use team-communicator subagent:
718
+ \`\`\`
719
+ Ask clarification in Slack thread:
720
+ Context: [From ticket/description]
721
+ Ambiguity: [Describe ambiguity]
722
+ Severity: [CRITICAL/HIGH]
723
+ Questions:
724
+ 1. [First specific question]
725
+ 2. [Second if needed]
726
+
727
+ Clarification needed to proceed. I'll wait for response before testing.
728
+ \`\`\`
729
+
730
+ **For Manual/API Triggers:** Include in task output:
731
+ \`\`\`markdown
732
+ ## \u26A0\uFE0F Clarification Required Before Testing
733
+
734
+ **Ambiguity:** [Description]
735
+ **Severity:** [CRITICAL/HIGH]
736
+
737
+ ### Questions:
738
+ 1. **Question:** [First question]
739
+ - Context: [Provide context]
740
+ - Options: [If applicable]
741
+ - Impact: [Testing impact]
742
+
743
+ **Action Required:** Provide clarification. Testing cannot proceed.
744
+ **Current Observation:** [What exploration revealed - concrete examples]
745
+ \`\`\`
746
+
747
+ ### Step {{STEP_NUMBER}}.5.1: Register Blocked Task (CRITICAL/HIGH only)
748
+
749
+ When asking a CRITICAL or HIGH severity question that blocks progress, register the task in the blocked queue so it can be automatically re-triggered when clarification arrives.
750
+
751
+ **Update \`.bugzy/runtime/blocked-task-queue.md\`:**
752
+
753
+ 1. Read the current file (create if doesn't exist)
754
+ 2. Add a new row to the Queue table
755
+
756
+ \`\`\`markdown
757
+ # Blocked Task Queue
758
+
759
+ Tasks waiting for clarification responses.
760
+
761
+ | Task Slug | Question | Original Args |
762
+ |-----------|----------|---------------|
763
+ | generate-test-plan | Should todos be sorted by date or priority? | \`{"ticketId": "TODO-456"}\` |
764
+ \`\`\`
765
+
766
+ **Entry Fields:**
767
+ - **Task Slug**: The task slug (e.g., \`generate-test-plan\`) - used for re-triggering
768
+ - **Question**: The clarification question asked (so LLM can match responses)
769
+ - **Original Args**: JSON-serialized \`$ARGUMENTS\` wrapped in backticks
770
+
771
+ **Purpose**: The LLM processor reads this file and matches user responses to pending questions. When a match is found, it re-queues the task with the clarification.
772
+
773
+ ### Step {{STEP_NUMBER}}.6: Wait or Proceed Based on Severity
774
+
775
+ **CRITICAL/HIGH \u2192 STOP and Wait:**
776
+ - Do NOT create tests, run tests, or make assumptions
777
+ - Wait for clarification, resume after answer
778
+ - *Rationale: Wrong assumptions = incorrect tests, false results, wasted time*
779
+
780
+ **MEDIUM \u2192 Proceed with Documented Assumptions:**
781
+ - Perform moderate exploration, document assumptions, proceed with creation/execution
782
+ - Ask clarification async (team-communicator), mark results "based on assumptions"
783
+ - Update tests after clarification received
784
+ - *Rationale: Waiting blocks progress; documented assumptions allow forward movement with later corrections*
785
+
786
+ **LOW \u2192 Proceed and Mark:**
787
+ - Proceed with creation/execution, mark gaps [TO BE CLARIFIED] or [ASSUMED]
788
+ - Mention in report but don't prioritize, no blocking
789
+ - *Rationale: Details don't affect strategy/results significantly*
790
+
791
+ ### Step {{STEP_NUMBER}}.7: Document Clarification in Results
792
+
793
+ When reporting test results, always include an "Ambiguities" section if clarification occurred:
794
+
795
+ \`\`\`markdown
796
+ ## Ambiguities Encountered
797
+
798
+ ### Clarification: [Topic]
799
+ - **Severity:** [CRITICAL/HIGH/MEDIUM/LOW]
800
+ - **Question Asked:** [What was asked]
801
+ - **Response:** [Answer received, or "Awaiting response"]
802
+ - **Impact:** [How this affected testing]
803
+ - **Assumption Made:** [If proceeded with assumption]
804
+ - **Risk:** [What could be wrong if assumption is incorrect]
805
+
806
+ ### Resolution:
807
+ [How the clarification was resolved and incorporated into testing]
808
+ \`\`\`
809
+
810
+ ---
811
+
812
+ ## Remember:
813
+
814
+ \u{1F6D1} **Block for CRITICAL/HIGH** | \u2705 **Ask correctly > guess poorly** | \u{1F4DD} **Document MEDIUM assumptions** | \u{1F50D} **Check memory first** | \u{1F3AF} **Specific questions \u2192 specific answers**
815
+ `;
816
+
817
+ // src/tasks/library/generate-test-cases.ts
818
+ var generateTestCasesTask = {
819
+ slug: TASK_SLUGS.GENERATE_TEST_CASES,
820
+ name: "Generate Test Cases",
821
+ description: "Generate manual test case documentation AND automated Playwright test scripts from test plan",
822
+ frontmatter: {
823
+ description: "Generate manual test case documentation AND automated Playwright test scripts from test plan",
824
+ "argument-hint": "--type [exploratory|functional|regression|smoke] --focus [optional-feature]"
825
+ },
826
+ baseContent: `# Generate Test Cases Command
827
+
828
+ ## SECURITY NOTICE
829
+ **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
830
+ - **Read \`.env.testdata\`** for non-secret test data (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
831
+ - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
832
+ - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
833
+ - The \`.env\` file access is blocked by settings.json
834
+
835
+ Generate comprehensive test artifacts including BOTH manual test case documentation AND automated Playwright test scripts.
836
+
837
+ ## Overview
838
+
839
+ This command generates:
840
+ 1. **Manual Test Case Documentation** (in \`./test-cases/\`) - Human-readable test cases in markdown format
841
+ 2. **Automated Playwright Tests** (in \`./tests/specs/\`) - Executable TypeScript test scripts
842
+ 3. **Page Object Models** (in \`./tests/pages/\`) - Reusable page classes for automated tests
843
+ 4. **Supporting Files** (fixtures, helpers, components) - As needed for test automation
844
+
845
+ ## Arguments
846
+ Arguments: $ARGUMENTS
847
+
848
+ ## Parse Arguments
849
+ Extract the following from arguments:
850
+ - **type**: Test type (exploratory, functional, regression, smoke) - defaults to functional
851
+ - **focus**: Optional specific feature or section to focus on
852
+
853
+ ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
854
+
855
+ ## Process
856
+
857
+ ### Step 1: Gather Context
858
+
859
+ #### 1.1 Read Test Plan
860
+ Read the test plan from \`test-plan.md\` to understand:
861
+ - Test items and features
862
+ - Testing approach and automation strategy
863
+ - Test Automation Strategy section (automated vs exploratory)
864
+ - Pass/fail criteria
865
+ - Test environment and data requirements
866
+ - Automation decision criteria
867
+
868
+ #### 1.2 Check Existing Test Cases and Tests
869
+ - List all files in \`./test-cases/\` to understand existing manual test coverage
870
+ - List all files in \`./tests/specs/\` to understand existing automated tests
871
+ - Determine next test case ID (TC-XXX format)
872
+ - Identify existing Page Objects in \`./tests/pages/\`
873
+ - Avoid creating overlapping test cases or duplicate automation
874
+
875
+ {{DOCUMENTATION_RESEARCHER_INSTRUCTIONS}}
876
+
877
+ ### Step 1.4: Explore Features (If Needed)
878
+
879
+ If documentation is insufficient or ambiguous, perform adaptive exploration to understand actual feature behavior before creating test cases.
880
+
881
+ ${EXPLORATION_INSTRUCTIONS.replace(/{{STEP_NUMBER}}/g, "1.4")}
882
+
883
+ ### Step 1.5: Clarify Ambiguities
884
+
885
+ If exploration or documentation review reveals ambiguous requirements, use the clarification protocol to resolve them before generating test cases.
886
+
887
+ ${CLARIFICATION_INSTRUCTIONS.replace(/{{STEP_NUMBER}}/g, "1.5")}
888
+
889
+ **Important Notes:**
890
+ - **CRITICAL/HIGH ambiguities:** STOP test case generation and seek clarification
891
+ - **MEDIUM ambiguities:** Document assumptions explicitly in test case with [ASSUMED: reason]
892
+ - **LOW ambiguities:** Mark with [TO BE CLARIFIED: detail] in test case notes section
893
+
894
+ ### Step 1.6: Organize Test Scenarios by Area
895
+
896
+ Based on exploration and documentation, organize test scenarios by feature area/component:
897
+
898
+ **Group scenarios into areas** (e.g., Authentication, Dashboard, Checkout, Profile Management):
899
+ - Each area should be a logical feature grouping
900
+ - Areas should be relatively independent for parallel test execution
901
+ - Consider the application's navigation structure and user flows
902
+
903
+ **For each area, identify scenarios**:
904
+
905
+ 1. **Critical User Paths** (must automate as smoke tests):
906
+ - Login/authentication flows
907
+ - Core feature workflows
908
+ - Data creation/modification flows
909
+ - Critical business transactions
910
+
911
+ 2. **Happy Path Scenarios** (automate for regression):
912
+ - Standard user workflows
913
+ - Common use cases
914
+ - Typical data entry patterns
915
+
916
+ 3. **Error Handling Scenarios** (evaluate automation ROI):
917
+ - Validation error messages
918
+ - Network error handling
919
+ - Permission/authorization errors
920
+
921
+ 4. **Edge Cases** (consider manual testing):
922
+ - Rare scenarios (<1% occurrence)
923
+ - Complex exploratory scenarios
924
+ - Visual/UX validation requiring judgment
925
+ - Features in heavy flux
926
+
927
+ **Output**: Test scenarios organized by area with automation decisions for each
928
+
929
+ Example structure:
930
+ - **Authentication**: TC-001 Valid login (smoke, automate), TC-002 Invalid password (automate), TC-003 Password reset (automate)
931
+ - **Dashboard**: TC-004 View dashboard widgets (smoke, automate), TC-005 Filter data by date (automate), TC-006 Export data (manual - rare use)
932
+
933
+ ### Step 1.7: Generate All Manual Test Case Files
934
+
935
+ Generate ALL manual test case markdown files in the \`./test-cases/\` directory BEFORE invoking the test-code-generator agent.
936
+
937
+ **For each test scenario from Step 1.6:**
938
+
939
+ 1. **Create test case file** in \`./test-cases/\` with format \`TC-XXX-feature-description.md\`
940
+ 2. **Include frontmatter** with:
941
+ - \`id:\` TC-XXX (sequential ID)
942
+ - \`title:\` Clear, descriptive title
943
+ - \`automated:\` true/false (based on automation decision from Step 1.6)
944
+ - \`automated_test:\` (leave empty - will be filled by subagent when automated)
945
+ - \`type:\` exploratory/functional/regression/smoke
946
+ - \`area:\` Feature area/component
947
+ 3. **Write test case content**:
948
+ - **Objective**: Clear description of what is being tested
949
+ - **Preconditions**: Setup requirements, test data needed
950
+ - **Test Steps**: Numbered, human-readable steps
951
+ - **Expected Results**: What should happen at each step
952
+ - **Test Data**: Environment variables to use (e.g., \${TEST_BASE_URL}, \${TEST_OWNER_EMAIL})
953
+ - **Notes**: Any assumptions, clarifications needed, or special considerations
954
+
955
+ **Output**: All manual test case markdown files created in \`./test-cases/\` with automation flags set
956
+
957
+ ### Step 2: Automate Test Cases Area by Area
958
+
959
+ **IMPORTANT**: Process each feature area separately to enable incremental, focused test creation.
960
+
961
+ **For each area from Step 1.6**, invoke the test-code-generator agent:
962
+
963
+ #### Step 2.1: Prepare Area Context
964
+
965
+ Before invoking the agent, identify the test cases for the current area:
966
+ - Current area name
967
+ - Test case files for this area (e.g., TC-001-valid-login.md, TC-002-invalid-password.md)
968
+ - Which test cases are marked for automation (automated: true)
969
+ - Test type: {type}
970
+ - Test plan reference: test-plan.md
971
+ - Existing automated tests in ./tests/specs/
972
+ - Existing Page Objects in ./tests/pages/
973
+
974
+ #### Step 2.2: Invoke test-code-generator Agent
975
+
976
+ Use the test-code-generator agent for the current area with the following context:
977
+
978
+ **Agent Invocation:**
979
+ "Use the test-code-generator agent to automate test cases for the [AREA_NAME] area.
980
+
981
+ **Context:**
982
+ - Area: [AREA_NAME]
983
+ - Manual test case files to automate: [list TC-XXX files marked with automated: true]
984
+ - Test type: {type}
985
+ - Test plan: test-plan.md
986
+ - Manual test cases directory: ./test-cases/
987
+ - Existing automated tests: ./tests/specs/
988
+ - Existing Page Objects: ./tests/pages/
989
+
990
+ **The agent should:**
991
+ 1. Read the manual test case files for this area
992
+ 2. Check existing Page Object infrastructure for this area
993
+ 3. Explore the feature area to understand implementation (gather selectors, URLs, flows)
994
+ 4. Build missing Page Objects and supporting code
995
+ 5. For each test case marked \`automated: true\`:
996
+ - Create automated Playwright test in ./tests/specs/
997
+ - Update the manual test case file to reference the automated test path
998
+ 6. Run and iterate on each test until it passes or fails with a product bug
999
+ 8. Update .env.testdata with any new variables
1000
+
1001
+ **Focus only on the [AREA_NAME] area** - do not automate tests for other areas yet."
1002
+
1003
+ #### Step 2.3: Verify Area Completion
1004
+
1005
+ After the agent completes the area, verify:
1006
+ - Manual test case files updated with automated_test references
1007
+ - Automated tests created for all test cases marked automated: true
1008
+ - Tests are passing (or failing with documented product bugs)
1009
+ - Page Objects created/updated for the area
1010
+
1011
+ #### Step 2.4: Repeat for Next Area
1012
+
1013
+ Move to the next area and repeat Steps 2.1-2.3 until all areas are complete.
1014
+
1015
+ **Benefits of area-by-area approach**:
1016
+ - Agent focuses on one feature at a time
1017
+ - POMs built incrementally as needed
1018
+ - Tests verified before moving to next area
1019
+ - Easier to manage and track progress
1020
+ - Can pause/resume between areas if needed
1021
+
1022
+ ### Step 2.5: Validate Generated Artifacts
1023
+
1024
+ After the test-code-generator completes, verify:
1025
+
1026
+ 1. **Manual Test Cases (in \`./test-cases/\`)**:
1027
+ - Each has unique TC-XXX ID
1028
+ - Frontmatter includes \`automated: true/false\` flag
1029
+ - If automated, includes \`automated_test\` path reference
1030
+ - Contains human-readable steps and expected results
1031
+ - References environment variables for test data
1032
+
1033
+ 2. **Automated Tests (in \`./tests/specs/\`)**:
1034
+ - Organized by feature in subdirectories
1035
+ - Each test file references manual test case ID in comments
1036
+ - Uses Page Object Model pattern
1037
+ - Follows role-based selector priority
1038
+ - Uses environment variables for test data
1039
+ - Includes proper TypeScript typing
1040
+
1041
+ 3. **Page Objects (in \`./tests/pages/\`)**:
1042
+ - Extend BasePage class
1043
+ - Use semantic selectors (getByRole, getByLabel, getByText)
1044
+ - Contain only actions, no assertions
1045
+ - Properly typed with TypeScript
1046
+
1047
+ 4. **Supporting Files**:
1048
+ - Fixtures created for common setup (in \`./tests/fixtures/\`)
1049
+ - Helper functions for data generation (in \`./tests/helpers/\`)
1050
+ - Component objects for reusable UI elements (in \`./tests/components/\`)
1051
+ - Types defined as needed (in \`./tests/types/\`)
1052
+
1053
+ ### Step 3: Create Directories if Needed
1054
+
1055
+ Ensure required directories exist:
1056
+ \`\`\`bash
1057
+ mkdir -p ./test-cases
1058
+ mkdir -p ./tests/specs
1059
+ mkdir -p ./tests/pages
1060
+ mkdir -p ./tests/components
1061
+ mkdir -p ./tests/fixtures
1062
+ mkdir -p ./tests/helpers
1063
+ \`\`\`
1064
+
1065
+ ### Step 4: Update .env.testdata (if needed)
1066
+
1067
+ If new environment variables were introduced:
1068
+ - Read current \`.env.testdata\`
1069
+ - Add new TEST_* variables with empty values
1070
+ - Group variables logically with comments
1071
+ - Document what each variable is for
1072
+
1073
+ ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
1074
+
1075
+ {{TEAM_COMMUNICATOR_INSTRUCTIONS}}
1076
+
1077
+ ### Step 5: Final Summary
1078
+
1079
+ Provide a comprehensive summary showing:
1080
+
1081
+ **Manual Test Cases:**
1082
+ - Number of manual test cases created
1083
+ - List of test case files with IDs and titles
1084
+ - Automation status for each (automated: yes/no)
1085
+
1086
+ **Automated Tests:**
1087
+ - Number of automated test scripts created
1088
+ - List of spec files with test counts
1089
+ - Page Objects created or updated
1090
+ - Fixtures and helpers added
1091
+
1092
+ **Test Coverage:**
1093
+ - Features covered by manual tests
1094
+ - Features covered by automated tests
1095
+ - Areas kept manual-only (and why)
1096
+
1097
+ **Next Steps:**
1098
+ - Command to run automated tests: \`npx playwright test\`
1099
+ - Instructions to run specific test file
1100
+ - Note about copying .env.testdata to .env
1101
+ - Mention any exploration needed for edge cases
1102
+
1103
+ ### Important Notes
1104
+
1105
+ - **Both Manual AND Automated**: Generate both artifacts - they serve different purposes
1106
+ - **Manual Test Cases**: Documentation, reference, can be executed manually when needed
1107
+ - **Automated Tests**: Fast, repeatable, for CI/CD and regression testing
1108
+ - **Automation Decision**: Not all test cases need automation - rare edge cases can stay manual
1109
+ - **Linking**: Manual test cases reference automated tests; automated tests reference manual test case IDs
1110
+ - **Two-Phase Workflow**: First generate all manual test cases (Step 1.7), then automate area-by-area (Step 2)
1111
+ - **Ambiguity Handling**: Use exploration (Step 1.4) and clarification (Step 1.5) protocols before generating
1112
+ - **Environment Variables**: Use \`process.env.VAR_NAME\` in tests, update .env.testdata as needed
1113
+ - **Test Independence**: Each test must be runnable in isolation and in parallel`,
1114
+ optionalSubagents: [
1115
+ {
1116
+ role: "documentation-researcher",
1117
+ contentBlock: `#### 1.4 Gather Product Documentation
1118
+
1119
+ Use the documentation-researcher agent to gather comprehensive product documentation:
1120
+
1121
+ \`\`\`
1122
+ Use the documentation-researcher agent to explore all available product documentation, specifically focusing on:
1123
+ - UI elements and workflows
1124
+ - User interactions and navigation paths
1125
+ - Form fields and validation rules
1126
+ - Error messages and edge cases
1127
+ - Authentication and authorization flows
1128
+ - Business rules and constraints
1129
+ - API endpoints for test data setup
1130
+ \`\`\``
1131
+ },
1132
+ {
1133
+ role: "team-communicator",
1134
+ contentBlock: `### Step 4.5: Team Communication
1135
+
1136
+ Use the team-communicator agent to notify the product team about the new test cases and automated tests:
1137
+
1138
+ \`\`\`
1139
+ Use the team-communicator agent to:
1140
+ 1. Post an update about test case and automation creation
1141
+ 2. Provide summary of coverage:
1142
+ - Number of manual test cases created
1143
+ - Number of automated tests created
1144
+ - Features covered by automation
1145
+ - Areas kept manual-only (and why)
1146
+ 3. Highlight key automated test scenarios
1147
+ 4. Share command to run automated tests: npx playwright test
1148
+ 5. Ask for team review and validation
1149
+ 6. Mention any areas needing exploration or clarification
1150
+ 7. Use appropriate channel and threading for the update
1151
+ \`\`\`
1152
+
1153
+ The team communication should include:
1154
+ - **Test artifacts created**: Manual test cases + automated tests count
1155
+ - **Automation coverage**: Which features are now automated
1156
+ - **Manual-only areas**: Why some tests are kept manual (rare scenarios, exploratory)
1157
+ - **Key automated scenarios**: Critical paths now covered by automation
1158
+ - **Running tests**: Command to execute automated tests
1159
+ - **Review request**: Ask team to validate scenarios and review test code
1160
+ - **Next steps**: Plans for CI/CD integration or additional test coverage
1161
+
1162
+ **Update team communicator memory:**
1163
+ - Record this communication
1164
+ - Note test case and automation creation
1165
+ - Track team feedback on automation approach
1166
+ - Document any clarifications requested`
1167
+ }
1168
+ ],
1169
+ requiredSubagents: ["test-runner", "test-code-generator"]
1170
+ };
1171
+
1172
+ // src/tasks/library/generate-test-plan.ts
1173
+ var generateTestPlanTask = {
1174
+ slug: TASK_SLUGS.GENERATE_TEST_PLAN,
1175
+ name: "Generate Test Plan",
1176
+ description: "Generate a comprehensive test plan from product description",
1177
+ frontmatter: {
1178
+ description: "Generate a comprehensive test plan from product description",
1179
+ "argument-hint": "<product-description>"
1180
+ },
1181
+ baseContent: `# Generate Test Plan Command
1182
+
1183
+ ## SECURITY NOTICE
1184
+ **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
1185
+ - **Read \`.env.testdata\`** for non-secret test data (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
1186
+ - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
1187
+ - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
1188
+ - The \`.env\` file access is blocked by settings.json
1189
+
1190
+ Generate a comprehensive test plan from product description following the Brain Module specifications.
1191
+
1192
+ ## Arguments
1193
+ Product description: $ARGUMENTS
1194
+
1195
+ ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
1196
+
1197
+ ## Process
1198
+
1199
+ ### Step 1: Load project context
1200
+ Read \`.bugzy/runtime/project-context.md\` to understand:
1201
+ - Project overview and key platform features
1202
+ - SDLC methodology and sprint duration
1203
+ - Testing environment and goals
1204
+ - Technical stack and constraints
1205
+ - QA workflow and processes
1206
+
1207
+ ### Step 1.5: Process the product description
1208
+ Use the product description provided directly in the arguments, enriched with project context understanding.
1209
+
1210
+ ### Step 1.6: Initialize environment variables tracking
1211
+ Create a list to track all TEST_ prefixed environment variables discovered throughout the process.
1212
+
1213
+ {{DOCUMENTATION_RESEARCHER_INSTRUCTIONS}}
1214
+
1215
+ ### Step 1.7: Explore Product (If Needed)
1216
+
1217
+ If product description is vague or incomplete, perform adaptive exploration to understand actual product features and behavior.
1218
+
1219
+ ${EXPLORATION_INSTRUCTIONS.replace(/{{STEP_NUMBER}}/g, "1.7")}
1220
+
1221
+ ### Step 1.8: Clarify Ambiguities
1222
+
1223
+ If exploration or product description reveals ambiguous requirements, use the clarification protocol before generating the test plan.
1224
+
1225
+ ${CLARIFICATION_INSTRUCTIONS.replace(/{{STEP_NUMBER}}/g, "1.8")}
1226
+
1227
+ **Important Notes:**
1228
+ - **CRITICAL/HIGH ambiguities:** STOP test plan generation and seek clarification
1229
+ - Examples: Undefined core features, unclear product scope, contradictory requirements
1230
+ - **MEDIUM ambiguities:** Document assumptions in test plan with [ASSUMED: reason] and seek async clarification
1231
+ - Examples: Missing field lists, unclear validation rules, vague user roles
1232
+ - **LOW ambiguities:** Mark with [TO BE EXPLORED: detail] in test plan for future investigation
1233
+ - Examples: Optional features, cosmetic details, non-critical edge cases
1234
+
1235
+ ### Step 3: Prepare the test plan generation context
1236
+
1237
+ **After ensuring requirements are clear through exploration and clarification:**
1238
+
1239
+ Based on the gathered information:
1240
+ - **goal**: Extract the main purpose and objectives from all available documentation
1241
+ - **knowledge**: Combine product description with discovered documentation insights
1242
+ - **testPlan**: Use the standard test plan template structure, enriched with documentation findings
1243
+ - **gaps**: Identify areas lacking documentation that will need exploration
1244
+
1245
+ ### Step 4: Generate the test plan using the prompt template
1246
+
1247
+ You are an expert QA Test Plan Writer with expertise in both manual and automated testing strategies. Using the gathered information and context from the product description provided, you will now produce a comprehensive test plan in Markdown format that includes an automation strategy.
1248
+
1249
+ Writing Instructions:
1250
+ - **Use Product Terminology:** Incorporate exact terms and labels from the product description for features and UI elements (to ensure the test plan uses official naming).
1251
+ - **Testing Scope:** The plan covers both automated E2E testing via Playwright and exploratory manual testing. Focus on what a user can do and see in a browser.
1252
+ - **Test Data - IMPORTANT:**
1253
+ - DO NOT include test data values in the test plan body
1254
+ - Test data goes ONLY to the \`.env.testdata\` file
1255
+ - In the test plan, reference \`.env.testdata\` for test data requirements
1256
+ - Define test data as environment variables prefixed with TEST_ (e.g., TEST_BASE_URL, TEST_USER_EMAIL, TEST_USER_PASSWORD)
1257
+ - DO NOT GENERATE VALUES FOR THE ENV VARS, ONLY THE KEYS
1258
+ - Track all TEST_ variables for extraction to .env.testdata in Step 7
1259
+ - **DO NOT INCLUDE TEST SCENARIOS**
1260
+ - **Incorporate All Relevant Info:** If the product description mentions specific requirements, constraints, or acceptance criteria (such as field validations, role-based access rules, important parameters), make sure these are reflected in the test plan. Do not add anything not supported by the given information.
1261
+ - **Test Automation Strategy Section - REQUIRED:** Include a comprehensive "Test Automation Strategy" section with the following subsections:
1262
+
1263
+ **## Test Automation Strategy**
1264
+
1265
+ ### Automated Test Coverage
1266
+ - Identify critical user paths to automate (login, checkout, core features)
1267
+ - Define regression test scenarios for automation
1268
+ - Specify API endpoints that need automated testing
1269
+ - List smoke test scenarios for CI/CD pipeline
1270
+
1271
+ ### Exploratory Testing Areas
1272
+ - New features not yet automated
1273
+ - Complex edge cases requiring human judgment
1274
+ - Visual/UX validation requiring subjective assessment
1275
+ - Scenarios that are not cost-effective to automate
1276
+
1277
+ ### Test Data Management
1278
+ - Environment variables strategy (which vars go in .env.example vs .env)
1279
+ - Dynamic test data generation approach (use data generators)
1280
+ - API-based test data setup (10-20x faster than UI)
1281
+ - Test data isolation and cleanup strategy
1282
+
1283
+ ### Automation Approach
1284
+ - **Framework:** Playwright + TypeScript (already scaffolded)
1285
+ - **Pattern:** Page Object Model for all pages
1286
+ - **Selectors:** Prioritize role-based selectors (getByRole, getByLabel, getByText)
1287
+ - **Components:** Reusable component objects for common UI elements
1288
+ - **Fixtures:** Custom fixtures for authenticated sessions and common setup
1289
+ - **API for Speed:** Use Playwright's request context to create test data via API
1290
+ - **Best Practices:** Reference \`.bugzy/runtime/testing-best-practices.md\` for patterns
1291
+
1292
+ ### Test Organization
1293
+ - Automated tests location: \`./tests/specs/[feature]/\`
1294
+ - Page Objects location: \`./tests/pages/\`
1295
+ - Manual test cases location: \`./test-cases/\` (human-readable documentation)
1296
+ - Test case naming: TC-XXX-feature-description.md
1297
+ - Automated test naming: feature.spec.ts
1298
+
1299
+ ### Automation Decision Criteria
1300
+ Define which scenarios warrant automation:
1301
+ - \u2705 Automate: Frequent execution, critical paths, regression tests, CI/CD integration
1302
+ - \u274C Keep Manual: Rare edge cases, exploratory tests, visual validation, one-time checks
1303
+
1304
+ ### Step 5: Create the test plan file
1305
+
1306
+ Read the test plan template from \`.bugzy/runtime/templates/test-plan-template.md\` and use it as the base structure. Fill in the placeholders with information extracted from BOTH the product description AND documentation research:
1307
+
1308
+ 1. Read the template file from \`.bugzy/runtime/templates/test-plan-template.md\`
1309
+ 2. Replace placeholders like:
1310
+ - \`[ProjectName]\` with the actual project name from the product description
1311
+ - \`[Date]\` with the current date
1312
+ - Feature sections with actual features identified from all documentation sources
1313
+ - Test data requirements based on the product's needs and API documentation
1314
+ - Risks based on the complexity, known issues, and technical constraints
1315
+ 3. Add any product-specific sections that may be needed based on discovered documentation
1316
+ 4. **Mark ambiguities based on severity:**
1317
+ - CRITICAL/HIGH: Should be clarified before plan creation (see Step 1.8)
1318
+ - MEDIUM: Mark with [ASSUMED: reason] and note assumption
1319
+ - LOW: Mark with [TO BE EXPLORED: detail] for future investigation
1320
+ 5. Include references to source documentation for traceability
1321
+
1322
+ ### Step 6: Save the test plan
1323
+
1324
+ Save the generated test plan to a file named \`test-plan.md\` in the project root with appropriate frontmatter:
1325
+
1326
+ \`\`\`yaml
1327
+ ---
1328
+ version: 1.0.0
1329
+ lifecycle_phase: initial
1330
+ created_at: [current date]
1331
+ updated_at: [current date]
1332
+ last_exploration: null
1333
+ total_discoveries: 0
1334
+ status: draft
1335
+ author: claude
1336
+ tags: [functional, security, performance]
1337
+ ---
1338
+ \`\`\`
1339
+
1340
+ ### Step 7: Extract and save environment variables
1341
+
1342
+ **CRITICAL**: Test data values must ONLY go to .env.testdata, NOT in the test plan document.
1343
+
1344
+ After saving the test plan:
1345
+
1346
+ 1. **Parse the test plan** to find all TEST_ prefixed environment variables mentioned:
1347
+ - Look in the Testing Environment section
1348
+ - Search for any TEST_ variables referenced
1349
+ - Extract variables from configuration or setup sections
1350
+ - Common patterns include: TEST_BASE_URL, TEST_USER_*, TEST_API_*, TEST_ADMIN_*, etc.
1351
+
1352
+ 2. **Create .env.testdata file** with all discovered variables:
1353
+ \`\`\`bash
1354
+ # Application Configuration
1355
+ TEST_BASE_URL=
1356
+
1357
+ # Test User Credentials
1358
+ TEST_USER_EMAIL=
1359
+ TEST_USER_PASSWORD=
1360
+ TEST_ADMIN_EMAIL=
1361
+ TEST_ADMIN_PASSWORD=
1362
+
1363
+ # API Configuration
1364
+ TEST_API_KEY=
1365
+ TEST_API_SECRET=
1366
+
1367
+ # Other Test Data
1368
+ TEST_DB_NAME=
1369
+ TEST_TIMEOUT=
1370
+ \`\`\`
1371
+
1372
+ 3. **Add helpful comments** for each variable group to guide users in filling values
1373
+
1374
+ 4. **Save the file** as \`.env.testdata\` in the project root
1375
+
1376
+ 5. **Verify test plan references .env.testdata**:
1377
+ - Ensure test plan DOES NOT contain test data values
1378
+ - Ensure test plan references \`.env.testdata\` for test data requirements
1379
+ - Add instruction: "Fill in actual values in .env.testdata before running tests"
1380
+
1381
+ ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
1382
+
1383
+ {{TEAM_COMMUNICATOR_INSTRUCTIONS}}
1384
+
1385
+ ### Step 8: Final summary
1386
+
1387
+ Provide a summary of:
1388
+ - Test plan created successfully at \`test-plan.md\`
1389
+ - Environment variables extracted to \`.env.testdata\`
1390
+ - Number of TEST_ variables discovered
1391
+ - Instructions for the user to fill in actual values in .env.testdata before running tests`,
1392
+ optionalSubagents: [
1393
+ {
1394
+ role: "documentation-researcher",
1395
+ contentBlock: `### Step 2: Gather comprehensive project documentation
1396
+
1397
+ Use the documentation-researcher agent to explore and gather all available project information and other documentation sources. This ensures the test plan is based on complete and current information.
1398
+
1399
+ \`\`\`
1400
+ Use the documentation-researcher agent to explore all available project documentation related to: $ARGUMENTS
1401
+
1402
+ Specifically gather:
1403
+ - Product specifications and requirements
1404
+ - User stories and acceptance criteria
1405
+ - Technical architecture documentation
1406
+ - API documentation and endpoints
1407
+ - User roles and permissions
1408
+ - Business rules and validations
1409
+ - UI/UX specifications
1410
+ - Known limitations or constraints
1411
+ - Existing test documentation
1412
+ - Bug reports or known issues
1413
+ \`\`\`
1414
+
1415
+ The agent will:
1416
+ 1. Check its memory for previously discovered documentation
1417
+ 2. Explore workspace for relevant pages and databases
1418
+ 3. Build a comprehensive understanding of the product
1419
+ 4. Return synthesized information about all discovered documentation`
1420
+ },
1421
+ {
1422
+ role: "team-communicator",
1423
+ contentBlock: `### Step 7.5: Team Communication
1424
+
1425
+ Use the team-communicator agent to notify the product team about the new test plan:
1426
+
1427
+ \`\`\`
1428
+ Use the team-communicator agent to:
1429
+ 1. Post an update about the test plan creation
1430
+ 2. Provide a brief summary of coverage areas and key features
1431
+ 3. Mention any areas that need exploration or clarification
1432
+ 4. Ask for team review and feedback on the test plan
1433
+ 5. Include a link or reference to the test-plan.md file
1434
+ 6. Use appropriate channel and threading for the update
1435
+ \`\`\`
1436
+
1437
+ The team communication should include:
1438
+ - **Test plan scope**: Brief overview of what will be tested
1439
+ - **Coverage highlights**: Key features and user flows included
1440
+ - **Areas needing clarification**: Any uncertainties discovered during documentation research
1441
+ - **Review request**: Ask team to review and provide feedback
1442
+ - **Next steps**: Mention plan to generate test cases after review
1443
+
1444
+ **Update team communicator memory:**
1445
+ - Record this communication in the team-communicator memory
1446
+ - Note this as a test plan creation communication
1447
+ - Track team response to this type of update`
1448
+ }
1449
+ ],
1450
+ requiredSubagents: ["test-runner"]
1451
+ };
1452
+
1453
+ // src/tasks/library/handle-message.ts
1454
+ var handleMessageTask = {
1455
+ slug: TASK_SLUGS.HANDLE_MESSAGE,
1456
+ name: "Handle Message",
1457
+ description: "Handle team responses and Slack communications, maintaining context for ongoing conversations (LLM-routed)",
1458
+ frontmatter: {
1459
+ description: "Handle team responses and Slack communications, maintaining context for ongoing conversations",
1460
+ "argument-hint": "[slack thread context or team message]"
1461
+ },
1462
+ baseContent: `# Handle Message Command
1463
+
1464
+ ## SECURITY NOTICE
1465
+ **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
1466
+ - **Read \`.env.testdata\`** for non-secret environment variables (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
1467
+ - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
1468
+ - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
1469
+ - The \`.env\` file access is blocked by settings.json
1470
+
1471
+ Process team responses from Slack threads and handle multi-turn conversations with the product team about testing clarifications, ambiguities, and questions.
1472
+
1473
+ ## Arguments
1474
+ Team message/thread context: $ARGUMENTS
1475
+
1476
+ ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
1477
+
1478
+ ## Process
1479
+
1480
+ ### Step 0: Detect Message Intent and Load Handler
1481
+
1482
+ Before processing the message, identify the intent type to load the appropriate handler.
1483
+
1484
+ #### 0.1 Extract Intent from Event Payload
1485
+
1486
+ Check the event payload for the \`intent\` field provided by the LLM layer:
1487
+ - If \`intent\` is present, use it directly
1488
+ - Valid intent values: \`question\`, \`feedback\`, \`status\`
1489
+
1490
+ #### 0.2 Fallback Intent Detection (if no intent provided)
1491
+
1492
+ If intent is not in the payload, detect from message patterns:
1493
+
1494
+ | Condition | Intent |
1495
+ |-----------|--------|
1496
+ | Keywords: "status", "progress", "how did", "results", "how many passed" | \`status\` |
1497
+ | Keywords: "bug", "issue", "broken", "doesn't work", "failed", "error" | \`feedback\` |
1498
+ | Question words: "what", "which", "do we have", "is there" about tests/project | \`question\` |
1499
+ | Default (none of above) | \`feedback\` |
1500
+
1501
+ #### 0.3 Load Handler File
1502
+
1503
+ Based on detected intent, load the handler from:
1504
+ \`.bugzy/runtime/handlers/messages/{intent}.md\`
1505
+
1506
+ **Handler files:**
1507
+ - \`question.md\` - Questions about tests, coverage, project details
1508
+ - \`feedback.md\` - Bug reports, test observations, general information
1509
+ - \`status.md\` - Status checks on test runs, task progress
1510
+
1511
+ #### 0.4 Follow Handler Instructions
1512
+
1513
+ **IMPORTANT**: The handler file is authoritative for this intent type.
1514
+
1515
+ 1. Read the handler file completely
1516
+ 2. Follow its processing steps in order
1517
+ 3. Apply its context loading requirements
1518
+ 4. Use its response guidelines
1519
+ 5. Perform any memory updates it specifies
1520
+
1521
+ The handler file contains all necessary processing logic for the detected intent type. Each handler includes:
1522
+ - Specific processing steps for that intent
1523
+ - Context loading requirements
1524
+ - Response guidelines
1525
+ - Memory update instructions
1526
+
1527
+ ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
1528
+
1529
+ ## Key Principles
1530
+
1531
+ ### Context Preservation
1532
+ - Always maintain full conversation context
1533
+ - Link responses back to original uncertainties
1534
+ - Preserve reasoning chain for future reference
1535
+
1536
+ ### Actionable Responses
1537
+ - Convert team input into concrete actions
1538
+ - Don't let clarifications sit without implementation
1539
+ - Follow through on commitments made to team
1540
+
1541
+ ### Learning Integration
1542
+ - Each interaction improves our understanding
1543
+ - Build knowledge base of team preferences
1544
+ - Refine communication approaches over time
1545
+
1546
+ ### Quality Communication
1547
+ - Acknowledge team input appropriately
1548
+ - Provide updates on actions taken
1549
+ - Ask good follow-up questions when needed
1550
+
1551
+ ## Important Considerations
1552
+
1553
+ ### Thread Organization
1554
+ - Keep related discussions in same thread
1555
+ - Start new threads for new topics
1556
+ - Maintain clear conversation boundaries
1557
+
1558
+ ### Response Timing
1559
+ - Acknowledge important messages promptly
1560
+ - Allow time for implementation before status updates
1561
+ - Don't spam team with excessive communications
1562
+
1563
+ ### Action Prioritization
1564
+ - Address urgent clarifications first
1565
+ - Batch related updates when possible
1566
+ - Focus on high-impact changes
1567
+
1568
+ ### Memory Maintenance
1569
+ - Keep active conversations visible and current
1570
+ - Archive resolved discussions appropriately
1571
+ - Maintain searchable history of resolutions`,
1572
+ optionalSubagents: [],
1573
+ requiredSubagents: ["team-communicator"]
1574
+ };
1575
+
1576
+ // src/tasks/library/process-event.ts
1577
+ var processEventTask = {
1578
+ slug: TASK_SLUGS.PROCESS_EVENT,
1579
+ name: "Process Event",
1580
+ description: "Process external system events (Jira, GitHub, Linear) using handler-defined rules to extract insights and track issues",
1581
+ frontmatter: {
1582
+ description: "Process external system events (Jira, GitHub, Linear) using handler-defined rules to extract insights and track issues",
1583
+ "argument-hint": "[event payload or description]"
1584
+ },
1585
+ baseContent: `# Process Event Command
1586
+
1587
+ ## SECURITY NOTICE
1588
+ **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
1589
+ - **Read \`.env.testdata\`** for non-secret environment variables (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
1590
+ - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
1591
+ - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
1592
+ - The \`.env\` file access is blocked by settings.json
1593
+
1594
+ Process various types of events using intelligent pattern matching and historical context to maintain and evolve the testing system.
1595
+
1596
+ ## Arguments
1597
+ Arguments: $ARGUMENTS
1598
+
1599
+ ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
1600
+
1601
+ ## Process
1602
+
1603
+ ### Step 1: Understand Event Context
1604
+
1605
+ Events come from integrated external systems via webhooks or manual input. Common sources include:
1606
+ - **Issue Trackers**: Jira, Linear, GitHub Issues
1607
+ - **Source Control**: GitHub, GitLab
1608
+ - **Communication Tools**: Slack
1609
+
1610
+ **Event structure and semantics vary by source.** Do not interpret events based on generic assumptions. Instead, load the appropriate handler file (Step 2.4) for system-specific processing rules.
1611
+
1612
+ #### Event Context to Extract:
1613
+ - **What happened**: The core event (test failed, PR merged, etc.)
1614
+ - **Where**: Component, service, or area affected
1615
+ - **Impact**: How this affects testing strategy
1616
+ - **Action Required**: What needs to be done in response
1617
+
1618
+ ### Step 1.5: Clarify Unclear Events
1619
+
1620
+ If the event information is incomplete or ambiguous, seek clarification before processing:
1621
+
1622
+ #### Detect Unclear Events
1623
+
1624
+ Events may be unclear in several ways:
1625
+ - **Vague description**: "Something broke", "issue with login" (what specifically?)
1626
+ - **Missing context**: Which component, which environment, which user?
1627
+ - **Contradictory information**: Event data conflicts with other sources
1628
+ - **Unknown references**: Mentions unfamiliar features, components, or systems
1629
+ - **Unclear severity**: Impact or priority is ambiguous
1630
+
1631
+ #### Assess Ambiguity Severity
1632
+
1633
+ Classify the ambiguity level to determine appropriate response:
1634
+
1635
+ **\u{1F534} CRITICAL - STOP and seek clarification:**
1636
+ - Cannot identify which component is affected
1637
+ - Event data is contradictory or nonsensical
1638
+ - Unknown system or feature mentioned
1639
+ - Cannot determine if this requires immediate action
1640
+ - Example: Event says "production is down" but unclear which service
1641
+
1642
+ **\u{1F7E0} HIGH - STOP and seek clarification:**
1643
+ - Vague problem description that could apply to multiple areas
1644
+ - Missing critical context needed for proper response
1645
+ - Unclear which team or system is responsible
1646
+ - Example: "Login issue reported" (login button? auth service? session? which page?)
1647
+
1648
+ **\u{1F7E1} MEDIUM - Proceed with documented assumptions:**
1649
+ - Some details missing but core event is clear
1650
+ - Can infer likely meaning from context
1651
+ - Can proceed but should clarify async
1652
+ - Example: "Test failed on staging" (can assume main staging, but clarify which one)
1653
+
1654
+ **\u{1F7E2} LOW - Mark and proceed:**
1655
+ - Minor details missing (optional context)
1656
+ - Cosmetic or non-critical information gaps
1657
+ - Can document gap and continue
1658
+ - Example: Missing timestamp or exact user who reported issue
1659
+
1660
+ #### Clarification Approach by Severity
1661
+
1662
+ **For CRITICAL/HIGH ambiguity:**
1663
+ 1. **Use team-communicator to ask specific questions**
1664
+ 2. **WAIT for response before proceeding**
1665
+ 3. **Document the clarification request in event history**
1666
+
1667
+ Example clarification messages:
1668
+ - "Event mentions 'login issue' - can you clarify if this is:
1669
+ \u2022 Login button not responding?
1670
+ \u2022 Authentication service failure?
1671
+ \u2022 Session management problem?
1672
+ \u2022 Specific page or global?"
1673
+
1674
+ - "Event references component 'XYZ' which is unknown. What system does this belong to?"
1675
+
1676
+ - "Event data shows contradictory information: status=success but error_count=15. Which is correct?"
1677
+
1678
+ **For MEDIUM ambiguity:**
1679
+ 1. **Document assumption** with reasoning
1680
+ 2. **Proceed with processing** based on assumption
1681
+ 3. **Ask for clarification async** (non-blocking)
1682
+ 4. **Mark in event history** for future reference
1683
+
1684
+ Example: [ASSUMED: "login issue" refers to login button based on recent similar events]
1685
+
1686
+ **For LOW ambiguity:**
1687
+ 1. **Mark with [TO BE CLARIFIED: detail]**
1688
+ 2. **Continue processing** normally
1689
+ 3. **Document gap** in event history
1690
+
1691
+ Example: [TO BE CLARIFIED: Exact timestamp of when issue was first observed]
1692
+
1693
+ #### Document Clarification Process
1694
+
1695
+ In event history, record:
1696
+ - **Ambiguity detected**: What was unclear
1697
+ - **Severity assessed**: CRITICAL/HIGH/MEDIUM/LOW
1698
+ - **Clarification requested**: Questions asked (if any)
1699
+ - **Response received**: Team's clarification
1700
+ - **Assumption made**: If proceeded with assumption
1701
+ - **Resolution**: How ambiguity was resolved
1702
+
1703
+ This ensures future similar events can reference past clarifications and avoid redundant questions.
1704
+
1705
+ ### Step 2: Load Context and Memory
1706
+
1707
+ #### 2.1 Check Event Processor Memory
1708
+ Read \`.bugzy/runtime/memory/event-processor.md\` to:
1709
+ - Find similar event patterns
1710
+ - Load example events with reasoning
1711
+ - Get system-specific rules
1712
+ - Retrieve task mapping patterns
1713
+
1714
+ #### 2.2 Check Event History
1715
+ Read \`.bugzy/runtime/memory/event-history.md\` to:
1716
+ - Ensure event hasn't been processed already (idempotency)
1717
+ - Find related recent events
1718
+ - Understand event patterns and trends
1719
+
1720
+ #### 2.3 Read Current State
1721
+ - Read \`test-plan.md\` for current coverage
1722
+ - List \`./test-cases/\` for existing tests
1723
+ - Check \`.bugzy/runtime/knowledge-base.md\` for past insights
1724
+
1725
+ #### 2.4 Load System-Specific Handler (REQUIRED)
1726
+
1727
+ Based on the event source, load the handler from \`.bugzy/runtime/handlers/\`:
1728
+
1729
+ **Step 1: Detect Event Source from Payload:**
1730
+ - \`com.jira-server.*\` event type prefix \u2192 \`.bugzy/runtime/handlers/jira.md\`
1731
+ - \`github.*\` or GitHub webhook structure \u2192 \`.bugzy/runtime/handlers/github.md\`
1732
+ - \`linear.*\` or Linear webhook \u2192 \`.bugzy/runtime/handlers/linear.md\`
1733
+ - Other sources \u2192 Check for matching handler file by source name
1734
+
1735
+ **Step 2: Load and Read the Handler File:**
1736
+ The handler file contains system-specific instructions for:
1737
+ - Event payload structure and field meanings
1738
+ - Which triggers (status changes, resolutions) require specific actions
1739
+ - How to interpret different event types
1740
+ - When to invoke \`/verify-changes\`
1741
+ - How to update the knowledge base
1742
+
1743
+ **Step 3: Follow Handler Instructions:**
1744
+ The handler file is authoritative for this event source. Follow its instructions for:
1745
+ - Interpreting the event payload
1746
+ - Determining what actions to take
1747
+ - Formatting responses and updates
1748
+
1749
+ **Step 4: If No Handler Exists:**
1750
+ Do NOT guess or apply generic logic. Instead:
1751
+ 1. Inform the user that no handler exists for this event source
1752
+ 2. Ask how this event type should be processed
1753
+ 3. Suggest creating a handler file at \`.bugzy/runtime/handlers/{source}.md\`
1754
+
1755
+ **Project-Specific Configuration:**
1756
+ Handlers reference \`.bugzy/runtime/project-context.md\` for project-specific rules like:
1757
+ - Which status transitions trigger verify-changes
1758
+ - Which resolutions should update the knowledge base
1759
+ - Which transitions to ignore
1760
+
1761
+ ### Step 3: Intelligent Event Analysis
1762
+
1763
+ #### 3.1 Contextual Pattern Analysis
1764
+ Don't just match patterns - analyze the event within the full context:
1765
+
1766
+ **Combine Multiple Signals**:
1767
+ - Event details + Historical patterns from memory
1768
+ - Current test plan state + Knowledge base
1769
+ - External system status + Team activity
1770
+ - Business priorities + Risk assessment
1771
+
1772
+ **Example Contextual Analysis**:
1773
+ \`\`\`
1774
+ Event: Jira issue PROJ-456 moved to "Ready for QA"
1775
+ + Handler: jira.md says "Ready for QA" triggers /verify-changes
1776
+ + History: This issue was previously in "In Progress" for 3 days
1777
+ + Knowledge: Related PR #123 merged yesterday
1778
+ = Decision: Invoke /verify-changes with issue context and PR reference
1779
+ \`\`\`
1780
+
1781
+ **Pattern Recognition with Context**:
1782
+ - An issue resolution depends on what the handler prescribes for that status
1783
+ - A duplicate event (same issue, same transition) should be skipped
1784
+ - Events from different sources about the same change should be correlated
1785
+ - Handler instructions take precedence over generic assumptions
1786
+
1787
+ #### 3.2 Generate Semantic Queries
1788
+ Based on event type and content, generate 3-5 specific search queries:
1789
+ - Search for similar past events
1790
+ - Look for related test cases
1791
+ - Find relevant documentation
1792
+ - Check for known issues
1793
+
1794
+ {{DOCUMENTATION_RESEARCHER_INSTRUCTIONS}}
1795
+
1796
+ ### Step 4: Task Planning with Reasoning
1797
+
1798
+ Generate tasks based on event analysis, using examples from memory as reference.
1799
+
1800
+ #### Task Generation Logic:
1801
+ Analyze the event in context of ALL available information to decide what actions to take:
1802
+
1803
+ **Consider the Full Context**:
1804
+ - What does the handler prescribe for this event type?
1805
+ - How does this relate to current knowledge?
1806
+ - What's the state of related issues in external systems?
1807
+ - Is this part of a larger pattern we've been seeing?
1808
+ - What's the business impact of this event?
1809
+
1810
+ **Contextual Decision Making**:
1811
+ The same event type can require different actions based on context:
1812
+ - If handler says this status triggers verification \u2192 Invoke /verify-changes
1813
+ - If this issue was already processed (check event history) \u2192 Skip to avoid duplicates
1814
+ - If related PR exists in knowledge base \u2192 Include PR context in actions
1815
+ - If this is a recurring pattern from the same source \u2192 Consider flagging for review
1816
+ - If handler has no rule for this event type \u2192 Ask user for guidance
1817
+
1818
+ **Dynamic Task Selection**:
1819
+ Based on the contextual analysis, decide which tasks make sense:
1820
+ - **extract_learning**: When the event reveals something new about the system
1821
+ - **update_test_plan**: When our understanding of what to test has changed
1822
+ - **update_test_cases**: When tests need to reflect new reality
1823
+ - **report_bug**: When we have a legitimate, impactful, reproducible issue
1824
+ - **skip_action**: When context shows no action needed (e.g., known issue, already fixed)
1825
+
1826
+ The key is to use ALL available context - not just react to the event type
1827
+
1828
+ #### Document Reasoning:
1829
+ For each task, document WHY it's being executed:
1830
+ \`\`\`markdown
1831
+ Task: extract_learning
1832
+ Reasoning: This event reveals a pattern of login failures on Chrome that wasn't previously documented
1833
+ Data: "Chrome-specific timeout issues with login button"
1834
+ \`\`\`
1835
+
1836
+ ### Step 5: Execute Tasks with Memory Updates
1837
+
1838
+ #### 5.1 Execute Each Task
1839
+
1840
+ {{ISSUE_TRACKER_INSTRUCTIONS}}
1841
+
1842
+ ##### For Other Tasks:
1843
+ Follow the standard execution logic with added context from memory.
1844
+
1845
+ #### 5.2 Update Event Processor Memory
1846
+ If new patterns discovered, append to \`.bugzy/runtime/memory/event-processor.md\`:
1847
+ \`\`\`markdown
1848
+ ### Pattern: [New Pattern Name]
1849
+ **First Seen**: [Date]
1850
+ **Indicators**: [What identifies this pattern]
1851
+ **Typical Tasks**: [Common task responses]
1852
+ **Example**: [This event]
1853
+ \`\`\`
1854
+
1855
+ #### 5.3 Update Event History
1856
+ Append to \`.bugzy/runtime/memory/event-history.md\`:
1857
+ \`\`\`markdown
1858
+ ## [Timestamp] - Event #[ID]
1859
+
1860
+ **Original Input**: [Raw arguments provided]
1861
+ **Parsed Event**:
1862
+ \`\`\`yaml
1863
+ type: [type]
1864
+ source: [source]
1865
+ [other fields]
1866
+ \`\`\`
1867
+
1868
+ **Pattern Matched**: [Pattern name or "New Pattern"]
1869
+ **Tasks Executed**:
1870
+ 1. [Task 1] - Reasoning: [Why]
1871
+ 2. [Task 2] - Reasoning: [Why]
1872
+
1873
+ **Files Modified**:
1874
+ - [List of files]
1875
+
1876
+ **Outcome**: [Success/Partial/Failed]
1877
+ **Notes**: [Any additional context]
1878
+ ---
1879
+ \`\`\`
1880
+
1881
+ ### Step 6: Learning from Events
1882
+
1883
+ After processing, check if this event teaches us something new:
1884
+ 1. Is this a new type of event we haven't seen?
1885
+ 2. Did our task planning work well?
1886
+ 3. Should we update our patterns?
1887
+ 4. Are there trends across recent events?
1888
+
1889
+ If yes, update the event processor memory with new patterns or refined rules.
1890
+
1891
+ ### Step 7: Create Necessary Files
1892
+
1893
+ Ensure all required files and directories exist:
1894
+ \`\`\`bash
1895
+ mkdir -p ./test-cases .claude/memory
1896
+ \`\`\`
1897
+
1898
+ Create files if they don't exist:
1899
+ - \`.bugzy/runtime/knowledge-base.md\`
1900
+ - \`.bugzy/runtime/memory/event-processor.md\`
1901
+ - \`.bugzy/runtime/memory/event-history.md\`
1902
+
1903
+ ## Important Considerations
1904
+
1905
+ ### Contextual Intelligence
1906
+ - Never process events in isolation - always consider full context
1907
+ - Use knowledge base, history, and external system state to inform decisions
1908
+ - What seems like a bug might be expected behavior given the context
1909
+ - A minor event might be critical when seen as part of a pattern
1910
+
1911
+ ### Adaptive Response
1912
+ - Same event type can require different actions based on context
1913
+ - Learn from each event to improve future decision-making
1914
+ - Build understanding of system behavior over time
1915
+ - Adjust responses based on business priorities and risk
1916
+
1917
+ ### Smart Task Generation
1918
+ - Only take actions prescribed by the handler or confirmed by the user
1919
+ - Document why each decision was made with full context
1920
+ - Skip redundant actions (e.g., duplicate events, already-processed issues)
1921
+ - Escalate appropriately based on pattern recognition
1922
+
1923
+ ### Continuous Learning
1924
+ - Each event adds to our understanding of the system
1925
+ - Update patterns when new correlations are discovered
1926
+ - Refine decision rules based on outcomes
1927
+ - Build institutional memory through event history
1928
+
1929
+ ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}`,
1930
+ optionalSubagents: [
1931
+ {
1932
+ role: "documentation-researcher",
1933
+ contentBlock: `#### 3.3 Use Documentation Researcher if Needed
1934
+ For events mentioning unknown features or components:
1935
+ \`\`\`
1936
+ Use documentation-researcher agent to find information about: [component/feature]
1937
+ \`\`\``
1938
+ },
1939
+ {
1940
+ role: "issue-tracker",
1941
+ contentBlock: `##### For Issue Tracking:
1942
+
1943
+ When an issue needs to be tracked (task type: report_bug or update_story):
1944
+ \`\`\`
1945
+ Use issue-tracker agent to:
1946
+ 1. Check for duplicate issues in the tracking system
1947
+ 2. For bugs: Create detailed bug report with:
1948
+ - Clear, descriptive title
1949
+ - Detailed description with context
1950
+ - Step-by-step reproduction instructions
1951
+ - Expected vs actual behavior
1952
+ - Environment and configuration details
1953
+ - Test case reference (if applicable)
1954
+ - Screenshots or error logs
1955
+ 3. For stories: Update status and add QA comments
1956
+ 4. Track issue lifecycle and maintain categorization
1957
+ \`\`\`
1958
+
1959
+ The issue-tracker agent will handle all aspects of issue tracking including duplicate detection, story management, QA workflow transitions, and integration with your project management system (Jira, Linear, Notion, etc.).`
1960
+ }
1961
+ ],
1962
+ requiredSubagents: [],
1963
+ dependentTasks: ["verify-changes"]
1964
+ };
1965
+
1966
+ // src/tasks/library/run-tests.ts
1967
+ var runTestsTask = {
1968
+ slug: TASK_SLUGS.RUN_TESTS,
1969
+ name: "Run Tests",
1970
+ description: "Execute automated Playwright tests, analyze failures, and fix test issues automatically",
1971
+ frontmatter: {
1972
+ description: "Execute automated Playwright tests, analyze failures, and fix test issues automatically",
1973
+ "argument-hint": '[file-pattern|tag|all] (e.g., "auth", "@smoke", "tests/specs/login.spec.ts")'
1974
+ },
1975
+ baseContent: `# Run Tests Command
1976
+
1977
+ ## SECURITY NOTICE
1978
+ **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
1979
+ - **Read \`.env.testdata\`** for non-secret environment variables (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
1980
+ - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
1981
+ - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
1982
+ - The \`.env\` file access is blocked by settings.json
1983
+
1984
+ Execute automated Playwright tests, analyze failures using JSON reports, automatically fix test issues, and log product bugs.
1985
+
1986
+ ## Arguments
1987
+ Arguments: $ARGUMENTS
1988
+
1989
+ ## Parse Arguments
1990
+ Extract the following from arguments:
1991
+ - **selector**: Test selection criteria
1992
+ - File pattern: "auth" \u2192 finds tests/specs/**/*auth*.spec.ts
1993
+ - Tag: "@smoke" \u2192 runs tests with @smoke annotation
1994
+ - Specific file: "tests/specs/login.spec.ts"
1995
+ - All tests: "all" or "" \u2192 runs entire test suite
1996
+
1997
+ ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
1998
+
1999
+ ## Test Execution Strategy
2000
+
2001
+ **IMPORTANT**: Before selecting tests, read \`.bugzy/runtime/test-execution-strategy.md\` to understand:
2002
+ - Available test tiers (Smoke, Component, Full Regression)
2003
+ - When to use each tier (commit, PR, release, debug)
2004
+ - Default behavior (default to @smoke unless user specifies otherwise)
2005
+ - How to interpret user intent from context keywords
2006
+ - Time/coverage trade-offs
2007
+ - Tag taxonomy
2008
+
2009
+ Apply the strategy guidance when determining which tests to run.
2010
+
2011
+ ## Process
2012
+
2013
+ **First**, consult \`.bugzy/runtime/test-execution-strategy.md\` decision tree to determine appropriate test tier based on user's selector and context.
2014
+
2015
+ ### Step 1: Identify Automated Tests to Run
2016
+
2017
+ #### 1.1 Understand Test Selection
2018
+ Parse the selector argument to determine which tests to run:
2019
+
2020
+ **File Pattern** (e.g., "auth", "login"):
2021
+ - Find matching test files: \`tests/specs/**/*[pattern]*.spec.ts\`
2022
+ - Example: "auth" \u2192 finds all test files with "auth" in the name
2023
+
2024
+ **Tag** (e.g., "@smoke", "@regression"):
2025
+ - Run tests with specific Playwright tag annotation
2026
+ - Use Playwright's \`--grep\` option
2027
+
2028
+ **Specific File** (e.g., "tests/specs/auth/login.spec.ts"):
2029
+ - Run that specific test file
2030
+
2031
+ **All Tests** ("all" or no selector):
2032
+ - Run entire test suite: \`tests/specs/**/*.spec.ts\`
2033
+
2034
+ #### 1.2 Find Matching Test Files
2035
+ Use glob patterns to find test files:
2036
+ \`\`\`bash
2037
+ # For file pattern
2038
+ ls tests/specs/**/*[pattern]*.spec.ts
2039
+
2040
+ # For specific file
2041
+ ls tests/specs/auth/login.spec.ts
2042
+
2043
+ # For all tests
2044
+ ls tests/specs/**/*.spec.ts
2045
+ \`\`\`
2046
+
2047
+ #### 1.3 Validate Test Files Exist
2048
+ Check that at least one test file was found:
2049
+ - If no tests found, inform user and suggest available tests
2050
+ - List available test files if selection was unclear
2051
+
2052
+ ### Step 2: Execute Automated Playwright Tests
2053
+
2054
+ #### 2.1 Build Playwright Command
2055
+ Construct the Playwright test command based on the selector:
2056
+
2057
+ **For file pattern or specific file**:
2058
+ \`\`\`bash
2059
+ npx playwright test [selector]
2060
+ \`\`\`
2061
+
2062
+ **For tag**:
2063
+ \`\`\`bash
2064
+ npx playwright test --grep "[tag]"
2065
+ \`\`\`
2066
+
2067
+ **For all tests**:
2068
+ \`\`\`bash
2069
+ npx playwright test
2070
+ \`\`\`
2071
+
2072
+ **Output**: Custom Bugzy reporter will create hierarchical test-runs/YYYYMMDD-HHMMSS/ structure with manifest.json
2073
+
2074
+ #### 2.2 Execute Tests via Bash
2075
+ Run the Playwright command:
2076
+ \`\`\`bash
2077
+ npx playwright test [selector]
2078
+ \`\`\`
2079
+
2080
+ Wait for execution to complete. This may take several minutes depending on test count.
2081
+
2082
+ **Note**: The custom Bugzy reporter will automatically:
2083
+ - Generate timestamp in YYYYMMDD-HHMMSS format
2084
+ - Create test-runs/{timestamp}/ directory structure
2085
+ - Record execution-id.txt with BUGZY_EXECUTION_ID
2086
+ - Save results per test case in TC-{id}/exec-1/ folders
2087
+ - Generate manifest.json with complete execution summary
2088
+
2089
+ #### 2.3 Locate and Read Test Results
2090
+ After execution completes, find and read the manifest:
2091
+
2092
+ 1. Find the test run directory (most recent):
2093
+ \`\`\`bash
2094
+ ls -t test-runs/ | head -1
2095
+ \`\`\`
2096
+
2097
+ 2. Read the manifest.json file:
2098
+ \`\`\`bash
2099
+ cat test-runs/[timestamp]/manifest.json
2100
+ \`\`\`
2101
+
2102
+ 3. Store the timestamp for use in test-debugger-fixer if needed
2103
+
2104
+ ### Step 3: Analyze Test Results from Manifest
2105
+
2106
+ #### 3.1 Parse Manifest
2107
+ The Bugzy custom reporter produces structured output in manifest.json:
2108
+ \`\`\`json
2109
+ {
2110
+ "bugzyExecutionId": "70a59676-cfd0-4ffd-b8ad-69ceff25c31d",
2111
+ "timestamp": "20251115-123456",
2112
+ "startTime": "2025-11-15T12:34:56.789Z",
2113
+ "endTime": "2025-11-15T12:45:23.456Z",
2114
+ "status": "completed",
2115
+ "stats": {
2116
+ "totalTests": 10,
2117
+ "passed": 8,
2118
+ "failed": 2,
2119
+ "totalExecutions": 10
2120
+ },
2121
+ "testCases": [
2122
+ {
2123
+ "id": "TC-001-login",
2124
+ "name": "Login functionality",
2125
+ "totalExecutions": 1,
2126
+ "finalStatus": "passed",
2127
+ "executions": [
2128
+ {
2129
+ "number": 1,
2130
+ "status": "passed",
2131
+ "duration": 1234,
2132
+ "videoFile": "video.webm",
2133
+ "hasTrace": false,
2134
+ "hasScreenshots": false,
2135
+ "error": null
2136
+ }
2137
+ ]
2138
+ },
2139
+ {
2140
+ "id": "TC-002-invalid-credentials",
2141
+ "name": "Invalid credentials error",
2142
+ "totalExecutions": 1,
2143
+ "finalStatus": "failed",
2144
+ "executions": [
2145
+ {
2146
+ "number": 1,
2147
+ "status": "failed",
2148
+ "duration": 2345,
2149
+ "videoFile": "video.webm",
2150
+ "hasTrace": true,
2151
+ "hasScreenshots": true,
2152
+ "error": "expect(locator).toBeVisible()..."
2153
+ }
2154
+ ]
2155
+ }
2156
+ ]
2157
+ }
2158
+ \`\`\`
2159
+
2160
+ #### 3.2 Extract Test Results
2161
+ From the manifest, extract:
2162
+ - **Total tests**: stats.totalTests
2163
+ - **Passed tests**: stats.passed
2164
+ - **Failed tests**: stats.failed
2165
+ - **Total executions**: stats.totalExecutions (includes re-runs)
2166
+ - **Duration**: Calculate from startTime and endTime
2167
+
2168
+ For each failed test, collect from testCases array:
2169
+ - Test ID (id field)
2170
+ - Test name (name field)
2171
+ - Final status (finalStatus field)
2172
+ - Latest execution details:
2173
+ - Error message (executions[last].error)
2174
+ - Duration (executions[last].duration)
2175
+ - Video file location (test-runs/{timestamp}/{id}/exec-{num}/{videoFile})
2176
+ - Trace availability (executions[last].hasTrace)
2177
+ - Screenshots availability (executions[last].hasScreenshots)
2178
+
2179
+ #### 3.3 Generate Summary Statistics
2180
+ \`\`\`markdown
2181
+ ## Test Execution Summary
2182
+ - Total Tests: [count]
2183
+ - Passed: [count] ([percentage]%)
2184
+ - Failed: [count] ([percentage]%)
2185
+ - Skipped: [count] ([percentage]%)
2186
+ - Total Duration: [time]
2187
+ \`\`\`
2188
+
2189
+ ### Step 5: Triage Failed Tests
2190
+
2191
+ After analyzing test results, triage each failure to determine if it's a product bug or test issue:
2192
+
2193
+ #### 5.1 Triage Failed Tests FIRST
2194
+
2195
+ **\u26A0\uFE0F IMPORTANT: Do NOT report bugs without triaging first.**
2196
+
2197
+ For each failed test:
2198
+
2199
+ 1. **Read failure details** from JSON report (error message, stack trace)
2200
+ 2. **Classify the failure:**
2201
+ - **Product bug**: Application behaves incorrectly
2202
+ - **Test issue**: Test code needs fixing (selector, timing, assertion)
2203
+ 3. **Document classification** for next steps
2204
+
2205
+ **Classification Guidelines:**
2206
+ - **Product Bug**: Correct test code, unexpected application behavior
2207
+ - **Test Issue**: Selector not found, timeout, race condition, wrong assertion
2208
+
2209
+ #### 5.2 Fix Test Issues Automatically
2210
+
2211
+ For each test classified as **[TEST ISSUE]**, use the test-debugger-fixer agent to automatically fix the test:
2212
+
2213
+ \`\`\`
2214
+ Use the test-debugger-fixer agent to fix test issues:
2215
+
2216
+ For each failed test classified as a test issue (not a product bug), provide:
2217
+ - Test run timestamp: [from manifest.timestamp]
2218
+ - Test case ID: [from testCases[].id in manifest]
2219
+ - Test name/title: [from testCases[].name in manifest]
2220
+ - Error message: [from testCases[].executions[last].error]
2221
+ - Execution details path: test-runs/{timestamp}/{testCaseId}/exec-1/
2222
+
2223
+ The agent will:
2224
+ 1. Read the execution details from result.json
2225
+ 2. Analyze the failure (error message, trace if available)
2226
+ 3. Identify the root cause (brittle selector, missing wait, race condition, etc.)
2227
+ 4. Apply appropriate fix to the test code
2228
+ 5. Rerun the test
2229
+ 6. The custom reporter will automatically create the next exec-N/ folder
2230
+ 7. Repeat up to 3 times if needed (exec-1, exec-2, exec-3)
2231
+ 8. Report success or escalate as likely product bug
2232
+
2233
+ After test-debugger-fixer completes:
2234
+ - If fix succeeded: Mark test as fixed, add to "Tests Fixed" list
2235
+ - If still failing after 3 attempts: Reclassify as potential product bug for Step 5.3
2236
+ \`\`\`
2237
+
2238
+ **Track Fixed Tests:**
2239
+ - Maintain list of tests fixed automatically
2240
+ - Include fix description (e.g., "Updated selector from CSS to role-based")
2241
+ - Note verification status (test now passes)
2242
+
2243
+ {{ISSUE_TRACKER_INSTRUCTIONS}}
2244
+
2245
+ ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
2246
+
2247
+ {{TEAM_COMMUNICATOR_INSTRUCTIONS}}
2248
+
2249
+ ### Step 6: Handle Special Cases
2250
+
2251
+ #### If No Test Cases Found
2252
+ If no test cases match the selection criteria:
2253
+ 1. Inform user that no matching test cases were found
2254
+ 2. List available test cases or suggest running \`/generate-test-cases\` first
2255
+ 3. Provide examples of valid selection criteria
2256
+
2257
+ #### If Test Runner Agent Fails
2258
+ If the test-runner agent encounters issues:
2259
+ 1. Report the specific error
2260
+ 2. Suggest troubleshooting steps
2261
+ 3. Offer to run tests individually if batch execution failed
2262
+
2263
+ #### If Test Cases Are Invalid
2264
+ If selected test cases have formatting issues:
2265
+ 1. Report which test cases are invalid
2266
+ 2. Specify what's missing or incorrect
2267
+ 3. Offer to fix the issues or skip invalid tests
2268
+
2269
+ ### Important Notes
2270
+
2271
+ **Test Selection Strategy**:
2272
+ - **Always read** \`.bugzy/runtime/test-execution-strategy.md\` before selecting tests
2273
+ - Default to \`@smoke\` tests for fast validation unless user explicitly requests otherwise
2274
+ - Smoke tests provide 100% manual test case coverage with zero redundancy (~2-5 min)
2275
+ - Full regression includes intentional redundancy for diagnostic value (~10-15 min)
2276
+ - Use context keywords from user request to choose appropriate tier
2277
+
2278
+ **Test Execution**:
2279
+ - Automated Playwright tests are executed via bash command, not through agents
2280
+ - Test execution time varies by tier (see strategy document for details)
2281
+ - JSON reports provide structured test results for analysis
2282
+ - Playwright automatically captures traces, screenshots, and videos on failures
2283
+ - Test artifacts are stored in test-results/ directory
2284
+
2285
+ **Failure Handling**:
2286
+ - Test failures are automatically triaged (product bugs vs test issues)
2287
+ - Test issues are automatically fixed by the test-debugger-fixer subagent
2288
+ - Product bugs are logged via issue tracker after triage
2289
+ - All results are analyzed for learning opportunities and team communication
2290
+ - Critical failures trigger immediate team notification
2291
+
2292
+ **Related Documentation**:
2293
+ - \`.bugzy/runtime/test-execution-strategy.md\` - When and why to run specific tests
2294
+ - \`.bugzy/runtime/testing-best-practices.md\` - How to write tests (patterns and anti-patterns)
2295
+
2296
+ `,
2297
+ optionalSubagents: [
2298
+ {
2299
+ role: "issue-tracker",
2300
+ contentBlock: `
2301
+
2302
+ #### 5.3 Log Product Bugs via Issue Tracker
2303
+
2304
+ After triage in Step 5.1, for tests classified as **[PRODUCT BUG]**, use the issue-tracker agent to log bugs:
2305
+
2306
+ For each bug to report, use the issue-tracker agent:
2307
+
2308
+ \`\`\`
2309
+ Use issue-tracker agent to:
2310
+ 1. Check for duplicate bugs in the tracking system
2311
+ - The agent will automatically search for similar existing issues
2312
+ - It maintains memory of recently reported issues
2313
+ - Duplicate detection happens automatically - don't create manual checks
2314
+
2315
+ 2. For each new bug (non-duplicate):
2316
+ Create detailed bug report with:
2317
+ - **Title**: Clear, descriptive summary (e.g., "Login button fails with timeout on checkout page")
2318
+ - **Description**:
2319
+ - What happened vs. what was expected
2320
+ - Impact on users
2321
+ - Test reference: [file path] \u203A [test title]
2322
+ - **Reproduction Steps**:
2323
+ - List steps from the failing test
2324
+ - Include specific test data used
2325
+ - Note any setup requirements from test file
2326
+ - **Test Execution Details**:
2327
+ - Test file: [file path from JSON report]
2328
+ - Test name: [test title from JSON report]
2329
+ - Error message: [from JSON report]
2330
+ - Stack trace: [from JSON report]
2331
+ - Trace file: [path if available]
2332
+ - Screenshots: [paths if available]
2333
+ - **Environment Details**:
2334
+ - Browser and version (from Playwright config)
2335
+ - Test environment URL (from .env.testdata BASE_URL)
2336
+ - Timestamp of failure
2337
+ - **Severity/Priority**: Based on:
2338
+ - Test type (smoke tests = high priority)
2339
+ - User impact
2340
+ - Frequency (always fails vs flaky)
2341
+ - **Additional Context**:
2342
+ - Error messages or stack traces from JSON report
2343
+ - Related test files (if part of test suite)
2344
+ - Relevant knowledge from knowledge-base.md
2345
+
2346
+ 3. Track created issues:
2347
+ - Note the issue ID/number returned
2348
+ - Update issue tracker memory with new bugs
2349
+ - Prepare issue references for team communication
2350
+ \`\`\`
2351
+
2352
+ #### 6.3 Summary of Bug Reporting
2353
+
2354
+ After issue tracker agent completes, create a summary:
2355
+ \`\`\`markdown
2356
+ ### Bug Reporting Summary
2357
+ - Total bugs found: [count of FAIL tests]
2358
+ - New bugs reported: [count of newly created issues]
2359
+ - Duplicate bugs found: [count of duplicates detected]
2360
+ - Issues not reported: [count of skipped/known issues]
2361
+
2362
+ **New Bug Reports**:
2363
+ - [Issue ID]: [Bug title] (Test: TC-XXX, Priority: [priority])
2364
+ - [Issue ID]: [Bug title] (Test: TC-YYY, Priority: [priority])
2365
+
2366
+ **Duplicate Bugs** (already tracked):
2367
+ - [Existing Issue ID]: [Bug title] (Matches test: TC-XXX)
2368
+
2369
+ **Not Reported** (skipped or known):
2370
+ - TC-XXX: Skipped due to blocker failure
2371
+ - TC-YYY: Known issue documented in knowledge base
2372
+ \`\`\`
2373
+
2374
+ **Note**: The issue tracker agent handles all duplicate detection and system integration automatically. Simply provide the bug details and let it manage the rest.`
2375
+ },
2376
+ {
2377
+ role: "team-communicator",
2378
+ contentBlock: `### Step 6: Team Communication
2379
+
2380
+ Use the team-communicator agent to notify the product team about test execution:
2381
+
2382
+ \`\`\`
2383
+ Use the team-communicator agent to:
2384
+ 1. Post test execution summary with key statistics
2385
+ 2. Highlight critical failures that need immediate attention
2386
+ 3. Share important learnings about product behavior
2387
+ 4. Report any potential bugs discovered during testing
2388
+ 5. Ask for clarification on unexpected behaviors
2389
+ 6. Provide recommendations for areas needing investigation
2390
+ 7. Use appropriate urgency level based on failure severity
2391
+ \`\`\`
2392
+
2393
+ The team communication should include:
2394
+ - **Execution summary**: Overall pass/fail statistics and timing
2395
+ - **Critical issues**: High-priority failures that need immediate attention
2396
+ - **Key learnings**: Important discoveries about product behavior
2397
+ - **Potential bugs**: Issues that may require bug reports
2398
+ - **Clarifications needed**: Unexpected behaviors requiring team input
2399
+ - **Recommendations**: Suggested follow-up actions
2400
+
2401
+ **Communication strategy based on results**:
2402
+ - **All tests passed**: Brief positive update, highlight learnings
2403
+ - **Minor failures**: Standard update with failure details and plans
2404
+ - **Critical failures**: Urgent notification with detailed analysis
2405
+ - **New discoveries**: Separate message highlighting interesting findings
2406
+
2407
+ **Update team communicator memory**:
2408
+ - Record test execution communication
2409
+ - Track team response patterns to test results
2410
+ - Document any clarifications provided by the team
2411
+ - Note team priorities based on their responses`
2412
+ }
2413
+ ],
2414
+ requiredSubagents: ["test-runner", "test-debugger-fixer"]
2415
+ };
2416
+
2417
+ // src/tasks/library/verify-changes.ts
2418
+ var verifyChangesTask = {
2419
+ slug: TASK_SLUGS.VERIFY_CHANGES,
2420
+ name: "Verify Changes",
2421
+ description: "Unified verification command for all trigger sources with automated tests and manual checklists",
2422
+ frontmatter: {
2423
+ description: "Verify code changes with automated tests and manual verification checklists",
2424
+ "argument-hint": "[trigger-auto-detected]"
2425
+ },
2426
+ baseContent: `# Verify Changes - Unified Multi-Trigger Workflow
2427
+
2428
+ ## SECURITY NOTICE
2429
+ **CRITICAL**: Never read the \`.env\` file. It contains ONLY secrets (passwords, API keys).
2430
+ - **Read \`.env.testdata\`** for non-secret environment variables (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
2431
+ - \`.env.testdata\` contains actual values for test data, URLs, and non-sensitive configuration
2432
+ - For secrets: Reference variable names only (TEST_OWNER_PASSWORD) - values are injected at runtime
2433
+ - The \`.env\` file access is blocked by settings.json
2434
+
2435
+ ## Overview
2436
+
2437
+ This task performs comprehensive change verification with:
2438
+ - **Automated testing**: Execute Playwright tests with automatic triage and fixing
2439
+ - **Manual verification checklists**: Generate role-specific checklists for non-automatable scenarios
2440
+ - **Multi-trigger support**: Works from manual CLI, Slack messages, GitHub PRs, and CI/CD
2441
+ - **Smart output routing**: Results formatted and delivered to the appropriate channel
2442
+
2443
+ ## Arguments
2444
+
2445
+ **Input**: $ARGUMENTS
2446
+
2447
+ The input format determines the trigger source and context extraction strategy.
2448
+
2449
+ ${KNOWLEDGE_BASE_READ_INSTRUCTIONS}
2450
+
2451
+ ## Step 1: Detect Trigger Source
2452
+
2453
+ Analyze the input format to determine how this task was invoked:
2454
+
2455
+ ### 1.1 Identify Trigger Type
2456
+
2457
+ **GitHub PR Webhook:**
2458
+ - Input contains \`pull_request\` object with structure:
2459
+ \`\`\`json
2460
+ {
2461
+ "pull_request": {
2462
+ "number": 123,
2463
+ "title": "...",
2464
+ "body": "...",
2465
+ "changed_files": [...],
2466
+ "base": { "ref": "main" },
2467
+ "head": { "ref": "feature-branch" },
2468
+ "user": { "login": "..." }
2469
+ }
2470
+ }
2471
+ \`\`\`
2472
+ \u2192 **Trigger detected: GITHUB_PR**
2473
+
2474
+ **Slack Event:**
2475
+ - Input contains \`event\` object with structure:
2476
+ \`\`\`json
2477
+ {
2478
+ "eventType": "com.slack.message" or "com.slack.app_mention",
2479
+ "event": {
2480
+ "type": "message",
2481
+ "channel": "C123456",
2482
+ "user": "U123456",
2483
+ "text": "message content",
2484
+ "ts": "1234567890.123456",
2485
+ "thread_ts": "..." (optional)
2486
+ }
2487
+ }
2488
+ \`\`\`
2489
+ \u2192 **Trigger detected: SLACK_MESSAGE**
2490
+
2491
+ **CI/CD Environment:**
2492
+ - Environment variables present:
2493
+ - \`CI=true\`
2494
+ - \`GITHUB_REF\` (e.g., "refs/heads/feature-branch")
2495
+ - \`GITHUB_SHA\` (commit hash)
2496
+ - \`GITHUB_BASE_REF\` (base branch)
2497
+ - \`GITHUB_HEAD_REF\` (head branch)
2498
+ - Git context available via bash commands
2499
+ \u2192 **Trigger detected: CI_CD**
2500
+
2501
+ **Manual Invocation:**
2502
+ - Input is natural language, URL, or issue identifier
2503
+ - Patterns: "PR #123", GitHub URL, "PROJ-456", feature description
2504
+ \u2192 **Trigger detected: MANUAL**
2505
+
2506
+ ### 1.2 Store Trigger Context
2507
+
2508
+ Store the detected trigger for use in Step 6 (output routing):
2509
+ - Set variable: \`TRIGGER_SOURCE\` = [GITHUB_PR | SLACK_MESSAGE | CI_CD | MANUAL]
2510
+ - This determines output formatting and delivery channel
2511
+
2512
+ ## Step 2: Extract Context Based on Trigger
2513
+
2514
+ Based on the detected trigger source, extract relevant context:
2515
+
2516
+ ### 2.1 GitHub PR Trigger - Extract PR Details
2517
+
2518
+ If trigger is GITHUB_PR:
2519
+ - **PR number**: \`pull_request.number\`
2520
+ - **Title**: \`pull_request.title\`
2521
+ - **Description**: \`pull_request.body\`
2522
+ - **Changed files**: \`pull_request.changed_files\` (array of file paths)
2523
+ - **Author**: \`pull_request.user.login\`
2524
+ - **Base branch**: \`pull_request.base.ref\`
2525
+ - **Head branch**: \`pull_request.head.ref\`
2526
+
2527
+ Optional: Fetch additional details via GitHub API if needed (PR comments, reviews)
2528
+
2529
+ ### 2.2 Slack Message Trigger - Parse Natural Language
2530
+
2531
+ If trigger is SLACK_MESSAGE:
2532
+ - **Message text**: \`event.text\`
2533
+ - **Channel**: \`event.channel\` (for posting results)
2534
+ - **User**: \`event.user\` (requester)
2535
+ - **Thread**: \`event.thread_ts\` or \`event.ts\` (for threading replies)
2536
+
2537
+ **Extract references from text:**
2538
+ - PR numbers: "#123", "PR 123", "pull request 123"
2539
+ - Issue IDs: "PROJ-456", "BUG-123"
2540
+ - URLs: GitHub PR links, deployment URLs
2541
+ - Feature names: Quoted terms, capitalized phrases
2542
+ - Environments: "staging", "production", "preview"
2543
+
2544
+ ### 2.3 CI/CD Trigger - Read CI Environment
2545
+
2546
+ If trigger is CI_CD:
2547
+ - **CI platform**: Read \`CI\` env var
2548
+ - **Branch**: \`GITHUB_REF\` \u2192 extract branch name
2549
+ - **Commit**: \`GITHUB_SHA\`
2550
+ - **Base branch**: \`GITHUB_BASE_REF\` (for PRs)
2551
+ - **Changed files**: Run \`git diff --name-only $BASE_SHA...$HEAD_SHA\`
2552
+
2553
+ If in PR context, can also fetch PR number from CI env vars (e.g., \`GITHUB_EVENT_PATH\`)
2554
+
2555
+ ### 2.4 Manual Trigger - Parse User Input
2556
+
2557
+ If trigger is MANUAL:
2558
+ - **GitHub PR URL**: Parse to extract PR number, then fetch details via API
2559
+ - Pattern: \`https://github.com/owner/repo/pull/123\`
2560
+ - Extract: owner, repo, PR number
2561
+ - Fetch: PR details, diff, comments
2562
+ - **Issue identifier**: Extract issue ID
2563
+ - Patterns: "PROJ-123", "#456", "BUG-789"
2564
+ - **Feature description**: Use text as-is for verification context
2565
+ - **Deployment URL**: Extract for testing environment
2566
+
2567
+ ### 2.5 Unified Context Structure
2568
+
2569
+ After extraction, create unified context structure:
2570
+ \`\`\`
2571
+ CHANGE_CONTEXT = {
2572
+ trigger: [GITHUB_PR | SLACK_MESSAGE | CI_CD | MANUAL],
2573
+ title: "...",
2574
+ description: "...",
2575
+ changedFiles: ["src/pages/Login.tsx", ...],
2576
+ author: "...",
2577
+ environment: "staging" | "production" | URL,
2578
+ prNumber: 123 (if available),
2579
+ issueId: "PROJ-456" (if available),
2580
+
2581
+ // For output routing:
2582
+ slackChannel: "C123456" (if Slack trigger),
2583
+ slackThread: "1234567890.123456" (if Slack trigger),
2584
+ githubRepo: "owner/repo" (if GitHub trigger)
2585
+ }
2586
+ \`\`\`
2587
+
2588
+ ## Step 3: Determine Test Scope (Smart Selection)
2589
+
2590
+ **IMPORTANT**: You do NOT have access to code files. Infer test scope from change **descriptions** only.
2591
+
2592
+ Based on PR title, description, and commit messages, intelligently select which tests to run:
2593
+
2594
+ ### 3.1 Infer Test Scope from Change Descriptions
2595
+
2596
+ Analyze the change description to identify affected feature areas:
2597
+
2598
+ **Example mappings from descriptions to test suites:**
2599
+
2600
+ | Description Keywords | Inferred Test Scope | Example |
2601
+ |---------------------|-------------------|---------|
2602
+ | "login", "authentication", "sign in/up" | \`tests/specs/auth/\` | "Fix login page validation" \u2192 Auth tests |
2603
+ | "checkout", "payment", "purchase" | \`tests/specs/checkout/\` | "Optimize checkout flow" \u2192 Checkout tests |
2604
+ | "cart", "shopping cart", "add to cart" | \`tests/specs/cart/\` | "Update cart calculations" \u2192 Cart tests |
2605
+ | "API", "endpoint", "backend" | API test suites | "Add new user API endpoint" \u2192 User API tests |
2606
+ | "profile", "account", "settings" | \`tests/specs/profile/\` or \`tests/specs/settings/\` | "Profile page redesign" \u2192 Profile tests |
2607
+
2608
+ **Inference strategy:**
2609
+ 1. **Extract feature keywords** from PR title and description
2610
+ - PR title: "feat(checkout): Add PayPal payment option"
2611
+ - Keywords: ["checkout", "payment"]
2612
+ - Inferred scope: Checkout tests
2613
+
2614
+ 2. **Analyze commit messages** for conventional commit scopes
2615
+ - \`feat(auth): Add password reset flow\` \u2192 Auth tests
2616
+ - \`fix(cart): Resolve quantity update bug\` \u2192 Cart tests
2617
+
2618
+ 3. **Map keywords to test organization**
2619
+ - Reference: Tests are organized by feature under \`tests/specs/\` (see \`.bugzy/runtime/testing-best-practices.md\`)
2620
+ - Feature areas typically include: auth/, checkout/, cart/, profile/, api/, etc.
2621
+
2622
+ 4. **Identify test scope breadth from description tone**
2623
+ - "Fix typo in button label" \u2192 Narrow scope (smoke tests)
2624
+ - "Refactor shared utility functions" \u2192 Wide scope (full suite)
2625
+ - "Update single component styling" \u2192 Narrow scope (component tests)
2626
+
2627
+ ### 3.2 Fallback Strategies Based on Description Analysis
2628
+
2629
+ **Description patterns that indicate full suite:**
2630
+ - "Refactor shared/common utilities" (wide impact)
2631
+ - "Update dependencies" or "Upgrade framework" (safety validation)
2632
+ - "Merge main into feature" or "Sync with main" (comprehensive validation)
2633
+ - "Breaking changes" or "Major version update" (thorough testing)
2634
+ - "Database migration" or "Schema changes" (data integrity)
2635
+
2636
+ **Description patterns that indicate smoke tests only:**
2637
+ - "Fix typo" or "Update copy/text" (cosmetic change)
2638
+ - "Update README" or "Documentation only" (no functional change)
2639
+ - "Fix formatting" or "Linting fixes" (no logic change)
2640
+
2641
+ **When description is vague or ambiguous:**
2642
+ - Examples: "Updated several components", "Various bug fixes", "Improvements"
2643
+ - **ACTION REQUIRED**: Use AskUserQuestion tool to clarify test scope
2644
+ - Provide options based on available test suites:
2645
+ \`\`\`typescript
2646
+ AskUserQuestion({
2647
+ questions: [{
2648
+ question: "The change description is broad. Which test suites should run?",
2649
+ header: "Test Scope",
2650
+ multiSelect: true,
2651
+ options: [
2652
+ { label: "Auth tests", description: "Login, signup, password reset" },
2653
+ { label: "Checkout tests", description: "Purchase flow, payment processing" },
2654
+ { label: "Full test suite", description: "Run all tests for comprehensive validation" },
2655
+ { label: "Smoke tests only", description: "Quick validation of critical paths" }
2656
+ ]
2657
+ }]
2658
+ })
2659
+ \`\`\`
2660
+
2661
+ **If specific test scope requested:**
2662
+ - User can override with: "only smoke tests", "full suite", specific test suite names
2663
+ - Honor user's explicit scope over smart selection
2664
+
2665
+ ### 3.3 Test Selection Summary
2666
+
2667
+ Generate summary of test selection based on description analysis:
2668
+ \`\`\`markdown
2669
+ ### Test Scope Determined
2670
+ - **Change description**: [PR title or summary]
2671
+ - **Identified keywords**: [list extracted keywords: "auth", "checkout", etc.]
2672
+ - **Affected test suites**: [list inferred test suite paths or names]
2673
+ - **Scope reasoning**: [explain why this scope was selected]
2674
+ - **Execution strategy**: [smart selection | full suite | smoke tests | user-specified]
2675
+ \`\`\`
2676
+
2677
+ **Example summary:**
2678
+ \`\`\`markdown
2679
+ ### Test Scope Determined
2680
+ - **Change description**: "feat(checkout): Add PayPal payment option"
2681
+ - **Identified keywords**: checkout, payment, PayPal
2682
+ - **Affected test suites**: tests/specs/checkout/payment.spec.ts, tests/specs/checkout/purchase-flow.spec.ts
2683
+ - **Scope reasoning**: Change affects checkout payment processing; running all checkout tests to validate payment integration
2684
+ - **Execution strategy**: Smart selection (checkout suite)
2685
+ \`\`\`
2686
+
2687
+ ## Step 4: Run Verification Workflow
2688
+
2689
+ Execute comprehensive verification combining automated tests and manual checklists:
2690
+
2691
+ ### 4A: Automated Testing (Integrated from /run-tests)
2692
+
2693
+ Execute automated Playwright tests with full triage and fixing:
2694
+
2695
+ #### 4A.1 Execute Tests
2696
+
2697
+ Run the selected tests via Playwright:
2698
+ \`\`\`bash
2699
+ npx playwright test [scope] --reporter=json --output=test-results/
2700
+ \`\`\`
2701
+
2702
+ Wait for execution to complete. Capture JSON report from \`test-results/.last-run.json\`.
2703
+
2704
+ #### 4A.2 Parse Test Results
2705
+
2706
+ Read and analyze the JSON report:
2707
+ - Extract: Total, passed, failed, skipped counts
2708
+ - For each failed test: file path, test name, error message, stack trace, trace file
2709
+ - Calculate: Pass rate, total duration
2710
+
2711
+ #### 4A.3 Triage Failures (Classification)
2712
+
2713
+ #### Automatic Test Issue Fixing
2714
+
2715
+ For each test classified as **[TEST ISSUE]**, use the test-debugger-fixer agent to automatically fix the test:
2716
+
2717
+ \`\`\`
2718
+ Use the test-debugger-fixer agent to fix test issues:
2719
+
2720
+ For each failed test classified as a test issue (not a product bug), provide:
2721
+ - Test file path: [from JSON report]
2722
+ - Test name/title: [from JSON report]
2723
+ - Error message: [from JSON report]
2724
+ - Stack trace: [from JSON report]
2725
+ - Trace file path: [if available]
2726
+
2727
+ The agent will:
2728
+ 1. Read the failing test file
2729
+ 2. Analyze the failure details
2730
+ 3. Open browser via Playwright MCP to debug if needed
2731
+ 4. Identify the root cause (brittle selector, missing wait, race condition, etc.)
2732
+ 5. Apply appropriate fix to the test code
2733
+ 6. Rerun the test to verify the fix
2734
+ 7. Repeat up to 3 times if needed
2735
+ 8. Report success or escalate as likely product bug
2736
+
2737
+ After test-debugger-fixer completes:
2738
+ - If fix succeeded: Mark test as fixed, add to "Tests Fixed" list
2739
+ - If still failing after 3 attempts: Reclassify as potential product bug
2740
+ \`\`\`
2741
+
2742
+ **Track Fixed Tests:**
2743
+ - Maintain list of tests fixed automatically
2744
+ - Include fix description (e.g., "Updated selector from CSS to role-based")
2745
+ - Note verification status (test now passes)
2746
+ - Reference .bugzy/runtime/testing-best-practices.md for best practices
2747
+
2748
+ For each failed test, classify as:
2749
+ - **[PRODUCT BUG]**: Correct test code, but application behaves incorrectly
2750
+ - **[TEST ISSUE]**: Test code needs fixing (selector, timing, assertion)
2751
+
2752
+ Classification guidelines:
2753
+ - Product Bug: Expected behavior not met, functional issue
2754
+ - Test Issue: Selector not found, timeout, race condition, brittle locator
2755
+
2756
+ #### 4A.4 Fix Test Issues Automatically
2757
+
2758
+ For tests classified as [TEST ISSUE]:
2759
+ - Use test-debugger-fixer agent to analyze and fix
2760
+ - Agent debugs with browser if needed
2761
+ - Applies fix (selector update, wait condition, assertion correction)
2762
+ - Reruns test to verify fix (10x for flaky tests)
2763
+ - Max 3 fix attempts, then reclassify as product bug
2764
+
2765
+ Track fixed tests with:
2766
+ - Test file path
2767
+ - Fix description
2768
+ - Verification status (now passes)
2769
+
2770
+ #### 4A.5 Log Product Bugs
2771
+
2772
+ {{ISSUE_TRACKER_INSTRUCTIONS}}
2773
+
2774
+ For tests classified as [PRODUCT BUG]:
2775
+ - Use issue-tracker agent to create bug reports
2776
+ - Agent checks for duplicates automatically
2777
+ - Creates detailed report with:
2778
+ - Title, description, reproduction steps
2779
+ - Test reference, error details, stack trace
2780
+ - Screenshots, traces, environment details
2781
+ - Severity based on test type and impact
2782
+ - Returns issue ID for tracking
2783
+
2784
+ ### 4B: Manual Verification Checklist (NEW)
2785
+
2786
+ Generate human-readable checklist for non-automatable scenarios:
2787
+
2788
+ #### Generate Manual Verification Checklist
2789
+
2790
+ Analyze the code changes and generate a manual verification checklist for scenarios that cannot be automated.
2791
+
2792
+ #### Analyze Change Context
2793
+
2794
+ Review the provided context to understand what changed:
2795
+ - Read PR title, description, and commit messages
2796
+ - Identify change types from descriptions: visual, UX, forms, mobile, accessibility, edge cases
2797
+ - Understand the scope and impact of changes from the change descriptions
2798
+
2799
+ #### Identify Non-Automatable Scenarios
2800
+
2801
+ Based on the change analysis, identify scenarios that require human verification:
2802
+
2803
+ **1. Visual Design Changes** (CSS, styling, design files, graphics)
2804
+ - Color schemes, gradients, shadows
2805
+ - Typography, font sizes, line heights
2806
+ - Spacing, margins, padding, alignment
2807
+ - Visual consistency across components
2808
+ - Brand guideline compliance
2809
+ \u2192 Add **Design Validation** checklist items
2810
+
2811
+ **2. UX Interaction Changes** (animations, transitions, gestures, micro-interactions)
2812
+ - Animation smoothness (60fps expectation)
2813
+ - Transition timing and easing
2814
+ - Interaction responsiveness and feel
2815
+ - Loading states and skeleton screens
2816
+ - Hover effects, focus states
2817
+ \u2192 Add **UX Feel** checklist items
2818
+
2819
+ **3. Form and Input Changes** (new form fields, input validation, user input)
2820
+ - Screen reader compatibility
2821
+ - Keyboard navigation (Tab order, Enter to submit)
2822
+ - Error message clarity and placement
2823
+ - Color contrast (WCAG 2.1 AA: 4.5:1 ratio for text)
2824
+ - Focus indicators visibility
2825
+ \u2192 Add **Accessibility** checklist items
2826
+
2827
+ **4. Mobile and Responsive Changes** (media queries, touch interactions, viewport)
2828
+ - Touch target sizes (\u226544px iOS, \u226548dp Android)
2829
+ - Responsive layout breakpoints
2830
+ - Mobile keyboard behavior (doesn't obscure inputs)
2831
+ - Swipe gestures and touch interactions
2832
+ - Pinch-to-zoom functionality
2833
+ \u2192 Add **Mobile Experience** checklist items
2834
+
2835
+ **5. Low ROI or Rare Scenarios** (edge cases, one-time migrations, rare user paths)
2836
+ - Scenarios used by < 1% of users
2837
+ - Complex multi-system integrations
2838
+ - One-time data migrations
2839
+ - Leap year, DST, timezone edge cases
2840
+ \u2192 Add **Exploratory Testing** notes
2841
+
2842
+ **6. Cross-Browser Visual Consistency** (layout rendering differences)
2843
+ - Layout consistency across Chrome, Firefox, Safari
2844
+ - CSS feature support differences
2845
+ - Font rendering variations
2846
+ \u2192 Add **Cross-Browser** checklist items (if significant visual changes)
2847
+
2848
+ #### Generate Role-Specific Checklist Items
2849
+
2850
+ For each identified scenario, create clear, actionable checklist items:
2851
+
2852
+ **Format for each item:**
2853
+ - Clear, specific task description
2854
+ - Assigned role (@design-team, @qa-team, @a11y-team, @mobile-team)
2855
+ - Acceptance criteria (what constitutes pass/fail)
2856
+ - Reference to standards when applicable (WCAG, iOS HIG, Material Design)
2857
+ - Priority indicator (\u{1F534} critical, \u{1F7E1} important, \u{1F7E2} nice-to-have)
2858
+
2859
+ **Example checklist items:**
2860
+
2861
+ **Design Validation (@design-team)**
2862
+ - [ ] \u{1F534} Login button color matches brand guidelines (#FF6B35)
2863
+ - [ ] \u{1F7E1} Loading spinner animation smooth (60fps, no jank)
2864
+ - [ ] \u{1F7E1} Card shadows match design system (elevation-2: 0 2px 4px rgba(0,0,0,0.1))
2865
+ - [ ] \u{1F7E2} Hover states provide appropriate visual feedback
2866
+
2867
+ **Accessibility (@a11y-team)**
2868
+ - [ ] \u{1F534} Screen reader announces form errors clearly (tested with VoiceOver/NVDA)
2869
+ - [ ] \u{1F534} Keyboard navigation: Tab through all interactive elements in logical order
2870
+ - [ ] \u{1F534} Color contrast meets WCAG 2.1 AA (4.5:1 for body text, 3:1 for large text)
2871
+ - [ ] \u{1F7E1} Focus indicators visible on all interactive elements
2872
+
2873
+ **Mobile Experience (@qa-team, @mobile-team)**
2874
+ - [ ] \u{1F534} Touch targets \u226544px (iOS Human Interface Guidelines)
2875
+ - [ ] \u{1F534} Mobile keyboard doesn't obscure input fields on iOS/Android
2876
+ - [ ] \u{1F7E1} Swipe gestures work naturally without conflicts
2877
+ - [ ] \u{1F7E1} Responsive layout adapts properly on iPhone SE (smallest screen)
2878
+
2879
+ **UX Feel (@design-team, @qa-team)**
2880
+ - [ ] \u{1F7E1} Page transitions smooth and not jarring
2881
+ - [ ] \u{1F7E1} Button click feedback immediate (< 100ms perceived response)
2882
+ - [ ] \u{1F7E2} Loading states prevent confusion during data fetch
2883
+
2884
+ **Exploratory Testing (@qa-team)**
2885
+ - [ ] \u{1F7E2} Test edge case: User submits form during network timeout
2886
+ - [ ] \u{1F7E2} Test edge case: User navigates back during submission
2887
+
2888
+ #### Format for Output Channel
2889
+
2890
+ Adapt the checklist format based on the output channel (determined by trigger source):
2891
+
2892
+ **Terminal (Manual Trigger):**
2893
+ \`\`\`markdown
2894
+ MANUAL VERIFICATION CHECKLIST:
2895
+ Please verify the following before merging:
2896
+
2897
+ Design Validation (@design-team):
2898
+ [ ] \u{1F534} Checkout button colors match brand guidelines (#FF6B35)
2899
+ [ ] \u{1F7E1} Loading spinner animation smooth (60fps)
2900
+
2901
+ Accessibility (@a11y-team):
2902
+ [ ] \u{1F534} Screen reader announces error messages
2903
+ [ ] \u{1F534} Keyboard navigation works (Tab order logical)
2904
+ [ ] \u{1F534} Color contrast meets WCAG 2.1 AA (4.5:1 ratio)
2905
+
2906
+ Mobile Experience (@qa-team):
2907
+ [ ] \u{1F534} Touch targets \u226544px (iOS HIG)
2908
+ [ ] \u{1F7E1} Responsive layout works on iPhone SE
2909
+ \`\`\`
2910
+
2911
+ **Slack (Slack Trigger):**
2912
+ \`\`\`markdown
2913
+ *Manual Verification Needed:*
2914
+ \u25A1 Visual: Button colors, animations (60fps)
2915
+ \u25A1 Mobile: Touch targets \u226544px
2916
+ \u25A1 A11y: Screen reader, keyboard nav, contrast
2917
+
2918
+ cc @design-team @qa-team @a11y-team
2919
+ \`\`\`
2920
+
2921
+ **GitHub PR Comment (GitHub Trigger):**
2922
+ \`\`\`markdown
2923
+ ### Manual Verification Required
2924
+
2925
+ The following scenarios require human verification before release:
2926
+
2927
+ #### Design Validation (@design-team)
2928
+ - [ ] \u{1F534} Checkout button colors match brand guidelines (#FF6B35)
2929
+ - [ ] \u{1F7E1} Loading spinner animation smooth (60fps)
2930
+ - [ ] \u{1F7E1} Card shadows match design system
2931
+
2932
+ #### Accessibility (@a11y-team)
2933
+ - [ ] \u{1F534} Screen reader announces error messages (VoiceOver/NVDA)
2934
+ - [ ] \u{1F534} Keyboard navigation through all form fields (Tab order)
2935
+ - [ ] \u{1F534} Color contrast meets WCAG 2.1 AA (4.5:1 for body text)
2936
+
2937
+ #### Mobile Experience (@qa-team)
2938
+ - [ ] \u{1F534} Touch targets \u226544px (iOS Human Interface Guidelines)
2939
+ - [ ] \u{1F534} Mobile keyboard doesn't obscure input fields
2940
+ - [ ] \u{1F7E1} Responsive layout works on iPhone SE (375x667)
2941
+
2942
+ ---
2943
+ *Legend: \u{1F534} Critical \u2022 \u{1F7E1} Important \u2022 \u{1F7E2} Nice-to-have*
2944
+ \`\`\`
2945
+
2946
+ #### Guidelines for Quality Checklists
2947
+
2948
+ **DO:**
2949
+ - Make each item verifiable (clear pass/fail criteria)
2950
+ - Include context (why this needs manual verification)
2951
+ - Reference standards (WCAG, iOS HIG, Material Design)
2952
+ - Assign to specific roles
2953
+ - Prioritize items (critical, important, nice-to-have)
2954
+ - Be specific (not "check colors" but "Login button color matches #FF6B35")
2955
+
2956
+ **DON'T:**
2957
+ - Create vague items ("test thoroughly")
2958
+ - List items that can be automated
2959
+ - Skip role assignments
2960
+ - Forget acceptance criteria
2961
+ - Omit priority indicators
2962
+
2963
+ #### When NO Manual Verification Needed
2964
+
2965
+ If the changes are purely:
2966
+ - Backend logic (no UI changes)
2967
+ - Code refactoring (no behavior changes)
2968
+ - Configuration changes (no user-facing impact)
2969
+ - Fully covered by automated tests
2970
+
2971
+ Output:
2972
+ \`\`\`markdown
2973
+ **Manual Verification:** Not required for this change.
2974
+ All user-facing changes are fully covered by automated tests.
2975
+ \`\`\`
2976
+
2977
+ #### Summary
2978
+
2979
+ After generating the checklist:
2980
+ - Count total items by priority (\u{1F534} critical, \u{1F7E1} important, \u{1F7E2} nice-to-have)
2981
+ - Estimate time needed (e.g., "~30 minutes for design QA, ~45 minutes for accessibility testing")
2982
+ - Suggest who should perform each category of checks
2983
+
2984
+ ### 4C: Aggregate Results
2985
+
2986
+ Combine automated and manual verification results:
2987
+
2988
+ \`\`\`markdown
2989
+ ## Verification Results Summary
2990
+
2991
+ ### Automated Tests
2992
+ - Total tests: [count]
2993
+ - Passed: [count] ([percentage]%)
2994
+ - Failed: [count] ([percentage]%)
2995
+ - Test issues fixed: [count]
2996
+ - Product bugs logged: [count]
2997
+ - Duration: [time]
2998
+
2999
+ ### Manual Verification Required
3000
+ [Checklist generated in 4B, or "Not required"]
3001
+
3002
+ ### Overall Recommendation
3003
+ [\u2705 Safe to merge | \u26A0\uFE0F Review bugs before merging | \u274C Do not merge]
3004
+ \`\`\`
3005
+
3006
+ ## Step 5: Understanding the Change (Documentation Research)
3007
+
3008
+ {{DOCUMENTATION_RESEARCHER_INSTRUCTIONS}}
3009
+
3010
+ Before proceeding with test creation or execution, ensure requirements are clear through ambiguity detection and adaptive exploration.
3011
+
3012
+ **Note**: For detailed exploration and clarification protocols, refer to the complete instructions below. Adapt the depth of exploration based on requirement clarity and use the clarification protocol to detect ambiguity, assess severity, and seek clarification when needed.
3013
+
3014
+ After clarification and exploration, analyze the change to determine the verification approach:
3015
+
3016
+ ### 5.1 Identify Test Scope
3017
+ Based on the change description, exploration findings, and clarified requirements:
3018
+ - **Direct impact**: Which features/functionality are directly modified
3019
+ - **Indirect impact**: What else might be affected (dependencies, integrations)
3020
+ - **Regression risk**: Existing functionality that should be retested
3021
+ - **New functionality**: Features that need new test coverage
3022
+
3023
+ ### 5.2 Determine Verification Strategy
3024
+ Plan your testing approach based on validated requirements:
3025
+ - **Priority areas**: Critical paths that must work
3026
+ - **Test types needed**: Functional, regression, integration, UI/UX
3027
+ - **Test data requirements**: What test accounts, data, or scenarios needed
3028
+ - **Success criteria**: What determines the change is working correctly (now clearly defined)
3029
+
3030
+ ## Step 6: Report Results (Multi-Channel Output)
3031
+
3032
+ Route output based on trigger source (from Step 1):
3033
+
3034
+ ### 6.1 MANUAL Trigger \u2192 Terminal Output
3035
+
3036
+ Format as comprehensive markdown report for terminal display:
3037
+
3038
+ \`\`\`markdown
3039
+ # Test Verification Report
3040
+
3041
+ ## Change Summary
3042
+ - **What Changed**: [Brief description]
3043
+ - **Scope**: [Affected features/areas]
3044
+ - **Changed Files**: [count] files
3045
+
3046
+ ## Automated Test Results
3047
+ ### Statistics
3048
+ - Total Tests: [count]
3049
+ - Passed: [count] ([percentage]%)
3050
+ - Failed: [count]
3051
+ - Test Issues Fixed: [count]
3052
+ - Product Bugs Logged: [count]
3053
+ - Duration: [time]
3054
+
3055
+ ### Tests Fixed Automatically
3056
+ [For each fixed test:
3057
+ - **Test**: [file path] \u203A [test name]
3058
+ - **Issue**: [problem found]
3059
+ - **Fix**: [what was changed]
3060
+ - **Status**: \u2705 Now passing
3061
+ ]
3062
+
3063
+ ### Product Bugs Logged
3064
+ [For each bug:
3065
+ - **Issue**: [ISSUE-123] [Bug title]
3066
+ - **Test**: [test file] \u203A [test name]
3067
+ - **Severity**: [priority]
3068
+ - **Link**: [issue tracker URL]
3069
+ ]
3070
+
3071
+ ## Manual Verification Checklist
3072
+
3073
+ [Insert checklist from Step 4B]
3074
+
3075
+ ## Recommendation
3076
+ [\u2705 Safe to merge - all automated tests pass, complete manual checks before release]
3077
+ [\u26A0\uFE0F Review bugs before merging - [X] bugs need attention]
3078
+ [\u274C Do not merge - critical failures]
3079
+
3080
+ ## Test Artifacts
3081
+ - JSON Report: test-results/.last-run.json
3082
+ - HTML Report: playwright-report/index.html
3083
+ - Traces: test-results/[test-id]/trace.zip
3084
+ - Screenshots: test-results/[test-id]/screenshots/
3085
+ \`\`\`
3086
+
3087
+ ### 6.2 SLACK_MESSAGE Trigger \u2192 Thread Reply
3088
+
3089
+ {{TEAM_COMMUNICATOR_INSTRUCTIONS}}
3090
+
3091
+ Use team-communicator agent to post concise results to Slack thread:
3092
+
3093
+ \`\`\`
3094
+ Use the team-communicator agent to post verification results.
3095
+
3096
+ **Channel**: [from CHANGE_CONTEXT.slackChannel]
3097
+ **Thread**: [from CHANGE_CONTEXT.slackThread]
3098
+
3099
+ **Message**:
3100
+ \u{1F9EA} *Verification Results for [change title]*
3101
+
3102
+ *Automated:* \u2705 [passed]/[total] tests passed ([duration])
3103
+ [If test issues fixed:] \u{1F527} [count] test issues auto-fixed
3104
+ [If bugs logged:] \u{1F41B} [count] bugs logged ([list issue IDs])
3105
+
3106
+ *Manual Verification Needed:*
3107
+ [Concise checklist summary - collapsed/expandable]
3108
+ \u25A1 Visual: [key items]
3109
+ \u25A1 Mobile: [key items]
3110
+ \u25A1 A11y: [key items]
3111
+
3112
+ *Recommendation:* [\u2705 Safe to merge | \u26A0\uFE0F Review bugs | \u274C Blocked]
3113
+
3114
+ [If bugs logged:] cc @[relevant-team-members]
3115
+ [Link to full test report if available]
3116
+ \`\`\`
3117
+
3118
+ ### 6.3 GITHUB_PR Trigger \u2192 PR Comment
3119
+
3120
+ Use GitHub API to post comprehensive comment on PR:
3121
+
3122
+ **Format as GitHub-flavored markdown:**
3123
+ \`\`\`markdown
3124
+ ## \u{1F9EA} Test Verification Results
3125
+
3126
+ **Status:** [\u2705 All tests passed | \u26A0\uFE0F Issues found | \u274C Critical failures]
3127
+
3128
+ ### Automated Tests
3129
+ | Metric | Value |
3130
+ |--------|-------|
3131
+ | Total Tests | [count] |
3132
+ | Passed | \u2705 [count] ([percentage]%) |
3133
+ | Failed | \u274C [count] |
3134
+ | Test Issues Fixed | \u{1F527} [count] |
3135
+ | Product Bugs Logged | \u{1F41B} [count] |
3136
+ | Duration | \u23F1\uFE0F [time] |
3137
+
3138
+ ### Failed Tests (Triaged)
3139
+
3140
+ [For each failure:]
3141
+
3142
+ #### \u274C **[Test Name]**
3143
+ - **File:** \`[test-file-path]\`
3144
+ - **Cause:** [Product bug | Test issue]
3145
+ - **Action:** [Bug logged: [ISSUE-123](url) | Fixed: [commit-hash](url)]
3146
+ - **Details:**
3147
+ \`\`\`
3148
+ [Error message]
3149
+ \`\`\`
3150
+
3151
+ ### Tests Fixed Automatically
3152
+
3153
+ [For each fixed test:]
3154
+ - \u2705 **[Test Name]** (\`[file-path]\`)
3155
+ - **Issue:** [brittle selector | missing wait | race condition]
3156
+ - **Fix:** [description of fix applied]
3157
+ - **Verified:** Passes 10/10 runs
3158
+
3159
+ ### Product Bugs Logged
3160
+
3161
+ [For each bug:]
3162
+ - \u{1F41B} **[[ISSUE-123](url)]** [Bug title]
3163
+ - **Test:** \`[test-file]\` \u203A [test name]
3164
+ - **Severity:** [\u{1F534} Critical | \u{1F7E1} Important | \u{1F7E2} Minor]
3165
+ - **Assignee:** @[backend-team | frontend-team]
3166
+
3167
+ ### Manual Verification Required
3168
+
3169
+ The following scenarios require human verification before release:
3170
+
3171
+ #### Design Validation (@design-team)
3172
+ - [ ] \u{1F534} [Critical design check]
3173
+ - [ ] \u{1F7E1} [Important design check]
3174
+
3175
+ #### Accessibility (@a11y-team)
3176
+ - [ ] \u{1F534} [Critical a11y check]
3177
+ - [ ] \u{1F7E1} [Important a11y check]
3178
+
3179
+ #### Mobile Experience (@qa-team)
3180
+ - [ ] \u{1F534} [Critical mobile check]
3181
+ - [ ] \u{1F7E1} [Important mobile check]
3182
+
3183
+ ---
3184
+ *Legend: \u{1F534} Critical \u2022 \u{1F7E1} Important \u2022 \u{1F7E2} Nice-to-have*
3185
+
3186
+ ### Test Artifacts
3187
+ - [Full HTML Report](playwright-report/index.html)
3188
+ - [Test Traces](test-results/)
3189
+
3190
+ ### Recommendation
3191
+ [\u2705 **Safe to merge** - All automated tests pass, complete manual checks before release]
3192
+ [\u26A0\uFE0F **Review required** - [X] bugs need attention, complete manual checks]
3193
+ [\u274C **Do not merge** - Critical failures must be resolved first]
3194
+
3195
+ ---
3196
+ *\u{1F916} Automated by Bugzy \u2022 [View Test Code](tests/specs/) \u2022 [Manual Test Cases](test-cases/)*
3197
+ \`\`\`
3198
+
3199
+ **Post comment via GitHub API:**
3200
+ - Endpoint: \`POST /repos/{owner}/{repo}/issues/{pr_number}/comments\`
3201
+ - Use GitHub MCP or bash with \`gh\` CLI
3202
+ - Requires GITHUB_TOKEN from environment
3203
+
3204
+ ### 6.4 CI_CD Trigger \u2192 Build Log + PR Comment
3205
+
3206
+ **Output to CI build log:**
3207
+ - Print detailed results to stdout (captured by CI)
3208
+ - Use ANSI colors if supported by CI platform
3209
+ - Same format as MANUAL terminal output
3210
+
3211
+ **Exit with appropriate code:**
3212
+ - Exit 0: All tests passed (safe to merge)
3213
+ - Exit 1: Tests failed or critical bugs found (block merge)
3214
+
3215
+ **Post PR comment if GitHub context available:**
3216
+ - Check for PR number in CI environment
3217
+ - If available: Post comment using 6.3 format
3218
+ - Also notify team via Slack if critical failures
3219
+
3220
+ ## Additional Steps
3221
+
3222
+ ### Handle Special Cases
3223
+
3224
+ **If no tests found for changed files:**
3225
+ - Inform user: "No automated tests found for changed files"
3226
+ - Recommend: "Run smoke test suite for basic validation"
3227
+ - Still generate manual verification checklist
3228
+
3229
+ **If all tests skipped:**
3230
+ - Explain why (dependencies, environment issues)
3231
+ - Recommend: Check test configuration and prerequisites
3232
+
3233
+ **If test execution fails:**
3234
+ - Report specific error (Playwright not installed, env vars missing)
3235
+ - Suggest troubleshooting steps
3236
+ - Don't proceed with triage if tests didn't run
3237
+
3238
+ ${KNOWLEDGE_BASE_UPDATE_INSTRUCTIONS}
3239
+
3240
+ ## Important Notes
3241
+
3242
+ - This task handles **all trigger sources** with a single unified workflow
3243
+ - Trigger detection is automatic based on input format
3244
+ - Output is automatically routed to the appropriate channel
3245
+ - Automated tests are executed with **full triage and automatic fixing**
3246
+ - Manual verification checklists are generated for **non-automatable scenarios**
3247
+ - Product bugs are logged with **automatic duplicate detection**
3248
+ - Test issues are fixed automatically with **verification**
3249
+ - Results include both automated and manual verification items
3250
+ - For best results, ensure:
3251
+ - Playwright is installed (\`npx playwright install\`)
3252
+ - Environment variables configured (copy \`.env.testdata\` to \`.env\`)
3253
+ - GitHub token available for PR comments (if GitHub trigger)
3254
+ - Slack integration configured (if Slack trigger)
3255
+ - Issue tracker configured (Linear, Jira, etc.)
3256
+
3257
+ ## Success Criteria
3258
+
3259
+ A successful verification includes:
3260
+ 1. \u2705 Trigger source correctly detected
3261
+ 2. \u2705 Context extracted completely
3262
+ 3. \u2705 Tests executed (or skipped with explanation)
3263
+ 4. \u2705 All failures triaged (product bug vs test issue)
3264
+ 5. \u2705 Test issues fixed automatically (when possible)
3265
+ 6. \u2705 Product bugs logged to issue tracker
3266
+ 7. \u2705 Manual verification checklist generated
3267
+ 8. \u2705 Results formatted for output channel
3268
+ 9. \u2705 Results delivered to appropriate destination
3269
+ 10. \u2705 Clear recommendation provided (merge / review / block)`,
3270
+ optionalSubagents: [
3271
+ {
3272
+ role: "documentation-researcher",
3273
+ contentBlock: `#### Research Project Documentation
3274
+
3275
+ Use the documentation-researcher agent to gather comprehensive context about the changed features:
3276
+
3277
+ \`\`\`
3278
+ Use the documentation-researcher agent to explore project documentation related to the changes.
3279
+
3280
+ Specifically gather:
3281
+ - Product specifications for affected features
3282
+ - User stories and acceptance criteria
3283
+ - Technical architecture documentation
3284
+ - API endpoints and contracts
3285
+ - User roles and permissions relevant to the change
3286
+ - Business rules and validations
3287
+ - UI/UX specifications
3288
+ - Known limitations or constraints
3289
+ - Related bug reports or known issues
3290
+ - Existing test documentation for this area
3291
+ \`\`\`
3292
+
3293
+ The agent will:
3294
+ 1. Check its memory for previously discovered documentation
3295
+ 2. Explore workspace for relevant pages and databases
3296
+ 3. Build comprehensive understanding of the affected features
3297
+ 4. Return synthesized information to inform testing strategy
3298
+
3299
+ Use this information to:
3300
+ - Better understand the change context
3301
+ - Identify comprehensive test scenarios
3302
+ - Recognize integration points and dependencies
3303
+ - Spot potential edge cases or risk areas
3304
+ - Enhance manual verification checklist generation`
3305
+ },
3306
+ {
3307
+ role: "issue-tracker",
3308
+ contentBlock: `#### Log Product Bugs
3309
+
3310
+ For tests classified as **[PRODUCT BUG]**, use the issue-tracker agent to log bugs:
3311
+
3312
+ \`\`\`
3313
+ Use issue-tracker agent to:
3314
+ 1. Check for duplicate bugs in the tracking system
3315
+ - The agent will automatically search for similar existing issues
3316
+ - It maintains memory of recently reported issues
3317
+ - Duplicate detection happens automatically - don't create manual checks
3318
+
3319
+ 2. For each new bug (non-duplicate):
3320
+ Create detailed bug report with:
3321
+ - **Title**: Clear, descriptive summary (e.g., "Login button fails with timeout on checkout page")
3322
+ - **Description**:
3323
+ - What happened vs. what was expected
3324
+ - Impact on users
3325
+ - Test reference: [file path] \u203A [test title]
3326
+ - **Reproduction Steps**:
3327
+ - List steps from the failing test
3328
+ - Include specific test data used
3329
+ - Note any setup requirements from test file
3330
+ - **Test Execution Details**:
3331
+ - Test file: [file path from JSON report]
3332
+ - Test name: [test title from JSON report]
3333
+ - Error message: [from JSON report]
3334
+ - Stack trace: [from JSON report]
3335
+ - Trace file: [path if available]
3336
+ - Screenshots: [paths if available]
3337
+ - **Environment Details**:
3338
+ - Browser and version (from Playwright config)
3339
+ - Test environment URL (from .env.testdata BASE_URL)
3340
+ - Timestamp of failure
3341
+ - **Severity/Priority**: Based on:
3342
+ - Test type (smoke tests = high priority)
3343
+ - User impact
3344
+ - Frequency (always fails vs flaky)
3345
+ - **Additional Context**:
3346
+ - Error messages or stack traces from JSON report
3347
+ - Related test files (if part of test suite)
3348
+ - Relevant knowledge from knowledge-base.md
3349
+
3350
+ 3. Track created issues:
3351
+ - Note the issue ID/number returned
3352
+ - Update issue tracker memory with new bugs
3353
+ - Prepare issue references for team communication
3354
+ \`\`\`
3355
+
3356
+ **Note**: The issue tracker agent handles all duplicate detection and system integration automatically. Simply provide the bug details and let it manage the rest.`
3357
+ },
3358
+ {
3359
+ role: "team-communicator",
3360
+ contentBlock: `#### Team Communication
3361
+
3362
+ Use the team-communicator agent to share verification results (primarily for Slack trigger, but can be used for other triggers):
3363
+
3364
+ \`\`\`
3365
+ Use the team-communicator agent to:
3366
+ 1. Post verification results summary
3367
+ 2. Highlight critical failures that need immediate attention
3368
+ 3. Share bugs logged with issue tracker links
3369
+ 4. Provide manual verification checklist summary
3370
+ 5. Recommend next steps based on results
3371
+ 6. Tag relevant team members for critical issues
3372
+ 7. Use appropriate urgency level based on failure severity
3373
+ \`\`\`
3374
+
3375
+ The team communication should include:
3376
+ - **Execution summary**: Overall pass/fail statistics and timing
3377
+ - **Tests fixed**: Count of test issues fixed automatically
3378
+ - **Bugs logged**: Product bugs reported to issue tracker
3379
+ - **Manual checklist**: Summary of manual verification items
3380
+ - **Recommendation**: Safe to merge / Review required / Do not merge
3381
+ - **Test artifacts**: Links to reports, traces, screenshots
3382
+
3383
+ **Communication strategy based on trigger**:
3384
+ - **Slack**: Post concise message with expandable details in thread
3385
+ - **Manual**: Full detailed report in terminal
3386
+ - **GitHub PR**: Comprehensive PR comment with tables and checklists
3387
+ - **CI/CD**: Build log output + optional Slack notification for critical failures
3388
+
3389
+ **Update team communicator memory**:
3390
+ - Record verification communication
3391
+ - Track response patterns by trigger type
3392
+ - Document team preferences for detail level
3393
+ - Note which team members respond to which types of issues`
3394
+ }
3395
+ ],
3396
+ requiredSubagents: ["test-runner", "test-debugger-fixer"]
3397
+ };
3398
+
3399
+ // src/tasks/index.ts
3400
+ var TASK_TEMPLATES = {
3401
+ [TASK_SLUGS.EXPLORE_APPLICATION]: exploreApplicationTask,
3402
+ [TASK_SLUGS.GENERATE_TEST_CASES]: generateTestCasesTask,
3403
+ [TASK_SLUGS.GENERATE_TEST_PLAN]: generateTestPlanTask,
3404
+ [TASK_SLUGS.HANDLE_MESSAGE]: handleMessageTask,
3405
+ [TASK_SLUGS.PROCESS_EVENT]: processEventTask,
3406
+ [TASK_SLUGS.RUN_TESTS]: runTestsTask,
3407
+ [TASK_SLUGS.VERIFY_CHANGES]: verifyChangesTask
3408
+ };
3409
+ function getTaskTemplate(slug) {
3410
+ return TASK_TEMPLATES[slug];
3411
+ }
3412
+ function getAllTaskSlugs() {
3413
+ return Object.keys(TASK_TEMPLATES);
3414
+ }
3415
+ function isTaskRegistered(slug) {
3416
+ return TASK_TEMPLATES[slug] !== void 0;
3417
+ }
3418
+ function buildSlashCommandsConfig(slugs) {
3419
+ const configs = {};
3420
+ for (const slug of slugs) {
3421
+ const task = TASK_TEMPLATES[slug];
3422
+ if (!task) {
3423
+ console.warn(`Unknown task slug: ${slug}, skipping`);
3424
+ continue;
3425
+ }
3426
+ configs[slug] = {
3427
+ frontmatter: task.frontmatter,
3428
+ content: task.baseContent
3429
+ };
3430
+ console.log(`\u2713 Added slash command: /${slug}`);
3431
+ }
3432
+ return configs;
3433
+ }
3434
+ function getRequiredMCPsFromTasks(slugs) {
3435
+ const mcps = /* @__PURE__ */ new Set();
3436
+ for (const slug of slugs) {
3437
+ const task = TASK_TEMPLATES[slug];
3438
+ if (!task) continue;
3439
+ for (const subagent of task.requiredSubagents) {
3440
+ const mcpMap = {
3441
+ "test-runner": "playwright",
3442
+ "team-communicator": "slack",
3443
+ "documentation-researcher": "notion",
3444
+ "issue-tracker": "linear"
3445
+ };
3446
+ const mcp = mcpMap[subagent];
3447
+ if (mcp) {
3448
+ mcps.add(mcp);
3449
+ }
3450
+ }
3451
+ }
3452
+ return Array.from(mcps);
3453
+ }
3454
+ // Annotate the CommonJS export names for ESM import in node:
3455
+ 0 && (module.exports = {
3456
+ TASK_SLUGS,
3457
+ TASK_TEMPLATES,
3458
+ buildSlashCommandsConfig,
3459
+ getAllTaskSlugs,
3460
+ getRequiredMCPsFromTasks,
3461
+ getTaskTemplate,
3462
+ isTaskRegistered
3463
+ });
3464
+ //# sourceMappingURL=index.cjs.map