@bugzy-ai/bugzy 1.15.1 → 1.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -276,6 +276,7 @@ var TASK_SLUGS = {
276
276
  PROCESS_EVENT: "process-event",
277
277
  RUN_TESTS: "run-tests",
278
278
  VERIFY_CHANGES: "verify-changes",
279
+ TRIAGE_RESULTS: "triage-results",
279
280
  /** @deprecated Use ONBOARD_TESTING instead */
280
281
  FULL_TEST_COVERAGE: "onboard-testing"
281
282
  };
@@ -393,27 +394,12 @@ Example structure:
393
394
  {
394
395
  inline: true,
395
396
  title: "Generate All Manual Test Case Files",
396
- content: `Generate ALL manual test case markdown files in the \`./test-cases/\` directory BEFORE invoking the test-code-generator agent.
397
-
398
- **For each test scenario from the previous step:**
399
-
400
- 1. **Create test case file** in \`./test-cases/\` with format \`TC-XXX-feature-description.md\`
401
- 2. **Include frontmatter** with:
402
- - \`id:\` TC-XXX (sequential ID)
403
- - \`title:\` Clear, descriptive title
404
- - \`automated:\` true/false (based on automation decision)
405
- - \`automated_test:\` (leave empty - will be filled by subagent when automated)
406
- - \`type:\` exploratory/functional/regression/smoke
407
- - \`area:\` Feature area/component
408
- 3. **Write test case content**:
409
- - **Objective**: Clear description of what is being tested
410
- - **Preconditions**: Setup requirements, test data needed
411
- - **Test Steps**: Numbered, human-readable steps
412
- - **Expected Results**: What should happen at each step
413
- - **Test Data**: Environment variables to use (e.g., \${TEST_BASE_URL}, \${TEST_OWNER_EMAIL})
414
- - **Notes**: Any assumptions, clarifications needed, or special considerations
415
-
416
- **Output**: All manual test case markdown files created in \`./test-cases/\` with automation flags set`
397
+ content: `Generate ALL manual test case markdown files in \`./test-cases/\` BEFORE invoking the test-code-generator agent.
398
+
399
+ Create files using \`TC-XXX-feature-description.md\` format. Follow the format of existing test cases in the directory. If no existing cases exist, include:
400
+ - Frontmatter with test case metadata (id, title, type, area, \`automated: true/false\`, \`automated_test:\` empty)
401
+ - Clear test steps with expected results
402
+ - Required test data references (use env var names, not values)`
417
403
  },
418
404
  // Step 11: Automate Test Cases (inline - detailed instructions for test-code-generator)
419
405
  {
@@ -498,76 +484,14 @@ Move to the next area and repeat until all areas are complete.
498
484
  {
499
485
  inline: true,
500
486
  title: "Team Communication",
501
- content: `{{INVOKE_TEAM_COMMUNICATOR}} to notify the product team about the new test cases and automated tests:
502
-
503
- \`\`\`
504
- 1. Post an update about test case and automation creation
505
- 2. Provide summary of coverage:
506
- - Number of manual test cases created
507
- - Number of automated tests created
508
- - Features covered by automation
509
- - Areas kept manual-only (and why)
510
- 3. Highlight key automated test scenarios
511
- 4. Share command to run automated tests (from \`./tests/CLAUDE.md\`)
512
- 5. Ask for team review and validation
513
- 6. Mention any areas needing exploration or clarification
514
- 7. Use appropriate channel and threading for the update
515
- \`\`\`
516
-
517
- The team communication should include:
518
- - **Test artifacts created**: Manual test cases + automated tests count
519
- - **Automation coverage**: Which features are now automated
520
- - **Manual-only areas**: Why some tests are kept manual (rare scenarios, exploratory)
521
- - **Key automated scenarios**: Critical paths now covered by automation
522
- - **Running tests**: Command to execute automated tests
523
- - **Review request**: Ask team to validate scenarios and review test code
524
- - **Next steps**: Plans for CI/CD integration or additional test coverage
525
-
526
- **Update team communicator memory:**
527
- - Record this communication
528
- - Note test case and automation creation
529
- - Track team feedback on automation approach
530
- - Document any clarifications requested`,
487
+ content: `{{INVOKE_TEAM_COMMUNICATOR}} to share test case and automation results with the team, highlighting coverage areas, automation vs manual-only decisions, and any unresolved clarifications. Ask for team review.`,
531
488
  conditionalOnSubagent: "team-communicator"
532
489
  },
533
490
  // Step 17: Final Summary (inline)
534
491
  {
535
492
  inline: true,
536
493
  title: "Final Summary",
537
- content: `Provide a comprehensive summary showing:
538
-
539
- **Manual Test Cases:**
540
- - Number of manual test cases created
541
- - List of test case files with IDs and titles
542
- - Automation status for each (automated: yes/no)
543
-
544
- **Automated Tests:**
545
- - Number of automated test scripts created
546
- - List of spec files with test counts
547
- - Page Objects created or updated
548
- - Fixtures and helpers added
549
-
550
- **Test Coverage:**
551
- - Features covered by manual tests
552
- - Features covered by automated tests
553
- - Areas kept manual-only (and why)
554
-
555
- **Next Steps:**
556
- - Command to run automated tests (from \`./tests/CLAUDE.md\`)
557
- - Instructions to run specific test file (from \`./tests/CLAUDE.md\`)
558
- - Note about copying .env.testdata to .env
559
- - Mention any exploration needed for edge cases
560
-
561
- **Important Notes:**
562
- - **Both Manual AND Automated**: Generate both artifacts - they serve different purposes
563
- - **Manual Test Cases**: Documentation, reference, can be executed manually when needed
564
- - **Automated Tests**: Fast, repeatable, for CI/CD and regression testing
565
- - **Automation Decision**: Not all test cases need automation - rare edge cases can stay manual
566
- - **Linking**: Manual test cases reference automated tests; automated tests reference manual test case IDs
567
- - **Two-Phase Workflow**: First generate all manual test cases, then automate area-by-area
568
- - **Ambiguity Handling**: Use exploration and clarification protocols before generating
569
- - **Environment Variables**: Use \`process.env.VAR_NAME\` in tests, update .env.testdata as needed
570
- - **Test Independence**: Each test must be runnable in isolation and in parallel`
494
+ content: `Provide a summary of created artifacts: manual test cases (count, IDs), automated tests (count, spec files), page objects and supporting files, coverage by area, and command to run tests (from \`./tests/CLAUDE.md\`).`
571
495
  }
572
496
  ],
573
497
  requiredSubagents: ["browser-automation", "test-code-generator"],
@@ -734,28 +658,7 @@ After saving the test plan:
734
658
  {
735
659
  inline: true,
736
660
  title: "Team Communication",
737
- content: `{{INVOKE_TEAM_COMMUNICATOR}} to notify the product team about the new test plan:
738
-
739
- \`\`\`
740
- 1. Post an update about the test plan creation
741
- 2. Provide a brief summary of coverage areas and key features
742
- 3. Mention any areas that need exploration or clarification
743
- 4. Ask for team review and feedback on the test plan
744
- 5. Include a link or reference to the test-plan.md file
745
- 6. Use appropriate channel and threading for the update
746
- \`\`\`
747
-
748
- The team communication should include:
749
- - **Test plan scope**: Brief overview of what will be tested
750
- - **Coverage highlights**: Key features and user flows included
751
- - **Areas needing clarification**: Any uncertainties discovered during documentation research
752
- - **Review request**: Ask team to review and provide feedback
753
- - **Next steps**: Mention plan to generate test cases after review
754
-
755
- **Update team communicator memory:**
756
- - Record this communication in the team-communicator memory
757
- - Note this as a test plan creation communication
758
- - Track team response to this type of update`,
661
+ content: `{{INVOKE_TEAM_COMMUNICATOR}} to share the test plan with the team for review, highlighting coverage areas and any unresolved clarifications.`,
759
662
  conditionalOnSubagent: "team-communicator"
760
663
  },
761
664
  // Step 18: Final Summary (inline)
@@ -877,59 +780,7 @@ After processing the message through the handler and composing your response:
877
780
  // Step 7: Clarification Protocol (for ambiguous intents)
878
781
  "clarification-protocol",
879
782
  // Step 8: Knowledge Base Update (library)
880
- "update-knowledge-base",
881
- // Step 9: Key Principles (inline)
882
- {
883
- inline: true,
884
- title: "Key Principles",
885
- content: `## Key Principles
886
-
887
- ### Context Preservation
888
- - Always maintain full conversation context
889
- - Link responses back to original uncertainties
890
- - Preserve reasoning chain for future reference
891
-
892
- ### Actionable Responses
893
- - Convert team input into concrete actions
894
- - Don't let clarifications sit without implementation
895
- - Follow through on commitments made to team
896
-
897
- ### Learning Integration
898
- - Each interaction improves our understanding
899
- - Build knowledge base of team preferences
900
- - Refine communication approaches over time
901
-
902
- ### Quality Communication
903
- - Acknowledge team input appropriately
904
- - Provide updates on actions taken
905
- - Ask good follow-up questions when needed`
906
- },
907
- // Step 10: Important Considerations (inline)
908
- {
909
- inline: true,
910
- title: "Important Considerations",
911
- content: `## Important Considerations
912
-
913
- ### Thread Organization
914
- - Keep related discussions in same thread
915
- - Start new threads for new topics
916
- - Maintain clear conversation boundaries
917
-
918
- ### Response Timing
919
- - Acknowledge important messages promptly
920
- - Allow time for implementation before status updates
921
- - Don't spam team with excessive communications
922
-
923
- ### Action Prioritization
924
- - Address urgent clarifications first
925
- - Batch related updates when possible
926
- - Focus on high-impact changes
927
-
928
- ### Memory Maintenance
929
- - Keep active conversations visible and current
930
- - Archive resolved discussions appropriately
931
- - Maintain searchable history of resolutions`
932
- }
783
+ "update-knowledge-base"
933
784
  ],
934
785
  requiredSubagents: ["team-communicator"],
935
786
  optionalSubagents: [],
@@ -1356,38 +1207,7 @@ Create files if they don't exist:
1356
1207
  - \`.bugzy/runtime/memory/event-history.md\``
1357
1208
  },
1358
1209
  // Step 14: Knowledge Base Update (library)
1359
- "update-knowledge-base",
1360
- // Step 15: Important Considerations (inline)
1361
- {
1362
- inline: true,
1363
- title: "Important Considerations",
1364
- content: `## Important Considerations
1365
-
1366
- ### Contextual Intelligence
1367
- - Never process events in isolation - always consider full context
1368
- - Use knowledge base, history, and external system state to inform decisions
1369
- - What seems like a bug might be expected behavior given the context
1370
- - A minor event might be critical when seen as part of a pattern
1371
-
1372
- ### Adaptive Response
1373
- - Same event type can require different actions based on context
1374
- - Learn from each event to improve future decision-making
1375
- - Build understanding of system behavior over time
1376
- - Adjust responses based on business priorities and risk
1377
-
1378
- ### Smart Task Generation
1379
- - NEVER execute action tasks directly \u2014 all action tasks go through blocked-task-queue for team confirmation
1380
- - Knowledge base updates and event history logging are the only direct operations
1381
- - Document why each decision was made with full context
1382
- - Skip redundant actions (e.g., duplicate events, already-processed issues)
1383
- - Escalate appropriately based on pattern recognition
1384
-
1385
- ### Continuous Learning
1386
- - Each event adds to our understanding of the system
1387
- - Update patterns when new correlations are discovered
1388
- - Refine decision rules based on outcomes
1389
- - Build institutional memory through event history`
1390
- }
1210
+ "update-knowledge-base"
1391
1211
  ],
1392
1212
  requiredSubagents: ["team-communicator"],
1393
1213
  optionalSubagents: ["documentation-researcher", "issue-tracker"],
@@ -1475,6 +1295,7 @@ Before running tests, confirm the selection with the user if ambiguous:
1475
1295
  },
1476
1296
  // Step 7-10: Test Execution (library steps)
1477
1297
  "run-tests",
1298
+ "normalize-test-results",
1478
1299
  "parse-test-results",
1479
1300
  "triage-failures",
1480
1301
  "fix-test-issues",
@@ -1483,14 +1304,7 @@ Before running tests, confirm the selection with the user if ambiguous:
1483
1304
  stepId: "log-product-bugs",
1484
1305
  conditionalOnSubagent: "issue-tracker"
1485
1306
  },
1486
- // Step 12: Knowledge Base Update (library)
1487
- "update-knowledge-base",
1488
- // Step 13: Team Communication (conditional - library step)
1489
- {
1490
- stepId: "notify-team",
1491
- conditionalOnSubagent: "team-communicator"
1492
- },
1493
- // Step 14: Handle Special Cases (inline - task-specific)
1307
+ // Step 12: Handle Special Cases (inline - reference material, positioned before final action steps)
1494
1308
  {
1495
1309
  inline: true,
1496
1310
  title: "Handle Special Cases",
@@ -1538,6 +1352,13 @@ If selected test cases have formatting issues:
1538
1352
  **Related Documentation**:
1539
1353
  - \`./tests/docs/test-execution-strategy.md\` - When and why to run specific tests
1540
1354
  - \`./tests/docs/testing-best-practices.md\` - How to write tests (patterns and anti-patterns)`
1355
+ },
1356
+ // Step 13: Knowledge Base Update (library)
1357
+ "update-knowledge-base",
1358
+ // Step 14: Team Communication (conditional - library step, LAST actionable step)
1359
+ {
1360
+ stepId: "notify-team",
1361
+ conditionalOnSubagent: "team-communicator"
1541
1362
  }
1542
1363
  ],
1543
1364
  requiredSubagents: ["browser-automation", "test-debugger-fixer"],
@@ -1652,33 +1473,13 @@ Store the detected trigger for use in output routing:
1652
1473
  title: "Coverage Gap vs. Ambiguity",
1653
1474
  content: `### Coverage Gap vs. Ambiguity
1654
1475
 
1655
- When the trigger indicates a feature has been implemented and is ready for testing (Jira "Ready to Test", PR merged, CI/CD pipeline):
1656
-
1657
- **Missing test coverage for the referenced feature is a COVERAGE GAP, not an ambiguity.**
1658
-
1659
- - The developer/team is asserting the feature exists and is ready for testing
1660
- - "Not yet explored" or "out of scope" in the test plan means the QA team hasn't tested it yet \u2014 it does NOT mean the feature doesn't exist
1661
- - Do NOT classify as CRITICAL based on stale documentation or knowledge base gaps
1662
- - If project-context.md or the Jira issue references the feature, assume it exists until browser exploration proves otherwise
1663
- - Coverage gaps are handled in the "Create Tests for Coverage Gaps" step below \u2014 do NOT block here
1664
-
1665
- ### If You Browse the App and Cannot Find the Referenced Feature
1476
+ When the trigger indicates a feature is ready for testing (Jira "Ready to Test", PR merged, CI/CD):
1666
1477
 
1667
- Apply the Clarification Protocol's **"Execution Obstacle vs. Requirement Ambiguity"** principle:
1478
+ **Missing test coverage is a COVERAGE GAP, not an ambiguity.** The trigger asserts the feature exists. Do NOT block based on stale docs or knowledge base gaps. Coverage gaps are handled in "Create Tests for Coverage Gaps" below.
1668
1479
 
1669
- This is an **execution obstacle**, NOT a requirement ambiguity \u2014 because the authoritative trigger source (Jira issue, PR, team request) asserts the feature exists. Common causes for not finding it:
1670
- - **Missing role/tier**: You're logged in as a basic user but the feature requires admin/premium access
1671
- - **Missing test data**: Required test accounts or data haven't been configured in \`.env.testdata\`
1672
- - **Feature flags**: The feature is behind a flag not enabled in the test environment
1673
- - **Environment config**: The feature requires specific environment variables or deployment settings
1480
+ **If you can't find the referenced feature in the browser:** Apply the Clarification Protocol's execution obstacle principle. The authoritative trigger asserts it exists \u2014 this is an execution obstacle (wrong role, missing test data, feature flags, env config). PROCEED to create tests, add placeholder env vars, notify team about the access issue. Tests may fail until resolved \u2014 that's expected.
1674
1481
 
1675
- **Action: PROCEED to "Create Tests for Coverage Gaps".** Do NOT BLOCK.
1676
- - Create test cases and specs that reference the feature as described in the trigger
1677
- - Add placeholder env vars to \`.env.testdata\` for any missing credentials
1678
- - Notify the team (via team-communicator) about the access obstacle and what needs to be configured
1679
- - Tests may fail until the obstacle is resolved \u2014 this is expected and acceptable
1680
-
1681
- **Only classify as CRITICAL (and BLOCK) if NO authoritative trigger source claims the feature exists** \u2014 e.g., a vague manual request with no Jira/PR backing.`
1482
+ **Only BLOCK if NO authoritative trigger source claims the feature exists** (e.g., vague manual request with no Jira/PR backing).`
1682
1483
  },
1683
1484
  // Step 6: Clarification Protocol (library)
1684
1485
  "clarification-protocol",
@@ -2069,44 +1870,11 @@ Post PR comment if GitHub context available.`,
2069
1870
  {
2070
1871
  inline: true,
2071
1872
  title: "Handle Special Cases",
2072
- content: `**If no tests found for changed files:**
2073
- - Inform user: "No automated tests found for changed files"
2074
- - Recommend: "Run smoke test suite for basic validation"
2075
- - Still generate manual verification checklist
2076
-
2077
- **If all tests skipped:**
2078
- - Explain why (dependencies, environment issues)
2079
- - Recommend: Check test configuration and prerequisites
2080
-
2081
- **If test execution fails:**
2082
- - Report specific error (test framework not installed, env vars missing)
2083
- - Suggest troubleshooting steps
2084
- - Don't proceed with triage if tests didn't run
2085
-
2086
- ## Important Notes
2087
-
2088
- - This task handles **all trigger sources** with a single unified workflow
2089
- - Trigger detection is automatic based on input format
2090
- - Output is automatically routed to the appropriate channel
2091
- - Automated tests are executed with **full triage and automatic fixing**
2092
- - Manual verification checklists are generated for **non-automatable scenarios**
2093
- - Product bugs are logged with **automatic duplicate detection**
2094
- - Test issues are fixed automatically with **verification**
2095
- - Results include both automated and manual verification items
2096
-
2097
- ## Success Criteria
2098
-
2099
- A successful verification includes:
2100
- 1. Trigger source correctly detected
2101
- 2. Context extracted completely
2102
- 3. Tests executed (or skipped with explanation)
2103
- 4. All failures triaged (product bug vs test issue)
2104
- 5. Test issues fixed automatically (when possible)
2105
- 6. Product bugs logged to issue tracker
2106
- 7. Manual verification checklist generated
2107
- 8. Results formatted for output channel
2108
- 9. Results delivered to appropriate destination
2109
- 10. Clear recommendation provided (merge / review / block)`
1873
+ content: `**If no tests found for changed files:** recommend smoke test suite, still generate manual verification checklist.
1874
+
1875
+ **If all tests skipped:** explain why (dependencies, environment), recommend checking configuration.
1876
+
1877
+ **If test execution fails:** report specific error, suggest troubleshooting, don't proceed with triage.`
2110
1878
  }
2111
1879
  ],
2112
1880
  requiredSubagents: ["browser-automation", "test-debugger-fixer"],
@@ -2257,6 +2025,108 @@ var exploreApplicationTask = {
2257
2025
  dependentTasks: []
2258
2026
  };
2259
2027
 
2028
+ // src/tasks/library/triage-results.ts
2029
+ var triageResultsTask = {
2030
+ slug: TASK_SLUGS.TRIAGE_RESULTS,
2031
+ name: "Triage Results",
2032
+ description: "Analyze externally-submitted test results and triage failures as product bugs or test issues",
2033
+ frontmatter: {
2034
+ description: "Analyze externally-submitted test results and triage failures as product bugs or test issues",
2035
+ "argument-hint": "[event payload with test results]"
2036
+ },
2037
+ steps: [
2038
+ // Step 1: Overview (inline)
2039
+ {
2040
+ inline: true,
2041
+ title: "Triage Results Overview",
2042
+ content: `# Triage External Test Results
2043
+
2044
+ Analyze test results submitted from an external CI pipeline. The results were sent via webhook and are available in the event payload \u2014 either as inline data or a URL to download.
2045
+
2046
+ **Goal**: Normalize the results into the standard manifest format, classify each failure as a PRODUCT BUG or TEST ISSUE, and generate a triage report.
2047
+
2048
+ This task is triggered automatically when test results are submitted to the Bugzy webhook from a CI system (GitHub Actions, GitLab CI, etc.).`
2049
+ },
2050
+ // Step 2: Security Notice (library)
2051
+ "security-notice",
2052
+ // Step 3: Arguments (inline)
2053
+ {
2054
+ inline: true,
2055
+ title: "Arguments",
2056
+ content: `Arguments: $ARGUMENTS`
2057
+ },
2058
+ // Step 4: Load Project Context (library)
2059
+ "load-project-context",
2060
+ // Step 5: Knowledge Base Read (library)
2061
+ "read-knowledge-base",
2062
+ // Step 6: Normalize Test Results (library — handles URL/inline results + manifest creation)
2063
+ "normalize-test-results",
2064
+ // Step 7: Triage Failures (existing library step)
2065
+ "triage-failures",
2066
+ // Step 8: Fix Test Issues (library — uses test-debugger-fixer)
2067
+ "fix-test-issues",
2068
+ // Step 9: Log Product Bugs (conditional — requires issue-tracker)
2069
+ {
2070
+ stepId: "log-product-bugs",
2071
+ conditionalOnSubagent: "issue-tracker"
2072
+ },
2073
+ // Step 10: Update Knowledge Base (library)
2074
+ "update-knowledge-base",
2075
+ // Step 11: Notify Team (conditional — requires team-communicator)
2076
+ {
2077
+ stepId: "notify-team",
2078
+ conditionalOnSubagent: "team-communicator"
2079
+ },
2080
+ // Step 12: Generate Triage Report (inline)
2081
+ {
2082
+ inline: true,
2083
+ title: "Generate Triage Report",
2084
+ content: `## Generate Triage Report
2085
+
2086
+ Create a structured triage report as the task output. This report is stored in \`task_executions.result\` and displayed in the Bugzy dashboard.
2087
+
2088
+ **Report Structure:**
2089
+ \`\`\`json
2090
+ {
2091
+ "summary": {
2092
+ "total": <number>,
2093
+ "passed": <number>,
2094
+ "failed": <number>,
2095
+ "skipped": <number>,
2096
+ "duration_ms": <number or null>
2097
+ },
2098
+ "ci_metadata": {
2099
+ "pipeline_url": "<from event payload>",
2100
+ "commit_sha": "<from event payload>",
2101
+ "branch": "<from event payload>"
2102
+ },
2103
+ "triage": {
2104
+ "product_bugs": [
2105
+ {
2106
+ "test_name": "<name>",
2107
+ "error": "<brief error>",
2108
+ "reason": "<why this is a product bug>"
2109
+ }
2110
+ ],
2111
+ "test_issues": [
2112
+ {
2113
+ "test_name": "<name>",
2114
+ "error": "<brief error>",
2115
+ "reason": "<why this is a test issue>"
2116
+ }
2117
+ ]
2118
+ }
2119
+ }
2120
+ \`\`\`
2121
+
2122
+ Output this JSON as the final result of the task.`
2123
+ }
2124
+ ],
2125
+ requiredSubagents: ["browser-automation", "test-debugger-fixer"],
2126
+ optionalSubagents: ["issue-tracker", "team-communicator"],
2127
+ dependentTasks: []
2128
+ };
2129
+
2260
2130
  // src/tasks/index.ts
2261
2131
  var TASK_TEMPLATES = {
2262
2132
  [TASK_SLUGS.GENERATE_TEST_CASES]: generateTestCasesTask,
@@ -2266,7 +2136,8 @@ var TASK_TEMPLATES = {
2266
2136
  [TASK_SLUGS.RUN_TESTS]: runTestsTask,
2267
2137
  [TASK_SLUGS.VERIFY_CHANGES]: verifyChangesTask,
2268
2138
  [TASK_SLUGS.ONBOARD_TESTING]: onboardTestingTask,
2269
- [TASK_SLUGS.EXPLORE_APPLICATION]: exploreApplicationTask
2139
+ [TASK_SLUGS.EXPLORE_APPLICATION]: exploreApplicationTask,
2140
+ [TASK_SLUGS.TRIAGE_RESULTS]: triageResultsTask
2270
2141
  };
2271
2142
  function getTaskTemplate(slug) {
2272
2143
  return TASK_TEMPLATES[slug];
@@ -2334,206 +2205,64 @@ assistant: "Let me use the browser-automation agent to execute the checkout smok
2334
2205
  model: "sonnet",
2335
2206
  color: "green"
2336
2207
  };
2337
- var CONTENT = `You are an expert automated test execution specialist with deep expertise in browser automation, test validation, and comprehensive test reporting. Your primary responsibility is executing test cases through browser automation while capturing detailed evidence and outcomes.
2208
+ var CONTENT = `You are an expert automated test execution specialist. Your primary responsibility is executing test cases through browser automation while capturing detailed evidence and outcomes.
2338
2209
 
2339
- **Core Responsibilities:**
2210
+ **Setup:**
2340
2211
 
2341
- 1. **Schema Reference**: Before starting, read \`.bugzy/runtime/templates/test-result-schema.md\` to understand:
2342
- - Required format for \`summary.json\` with video metadata
2343
- - Structure of \`steps.json\` with timestamps and video synchronization
2344
- - Field descriptions and data types
2212
+ 1. **Schema Reference**: Read \`.bugzy/runtime/templates/test-result-schema.md\` for the required format of \`summary.json\` and \`steps.json\`.
2345
2213
 
2346
2214
  2. ${MEMORY_READ_INSTRUCTIONS.replace(/{ROLE}/g, "browser-automation")}
2347
2215
 
2348
- **Memory Sections for Browser Automation**:
2349
- - **Test Execution History**: Pass/fail rates, execution times, flaky test patterns
2350
- - **Flaky Test Tracking**: Tests that pass inconsistently with root cause analysis
2351
- - **Environment-Specific Patterns**: Timing differences across staging/production/local
2352
- - **Test Data Lifecycle**: How test data is created, used, and cleaned up
2353
- - **Timing Requirements by Page**: Learned load times and interaction delays
2354
- - **Authentication Patterns**: Auth workflows across different environments
2355
- - **Known Infrastructure Issues**: Problems with test infrastructure, not application
2356
-
2357
- 3. **Environment Setup**: Before test execution:
2358
- - Read \`.env.testdata\` to get non-secret environment variable values (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.)
2359
- - For secrets, variable names are available as environment variables (playwright-cli inherits the process environment)
2360
-
2361
- 4. **Test Case Parsing**: You will receive a test case file path. Parse the test case to extract:
2362
- - Test steps and actions to perform
2363
- - Expected behaviors and validation criteria
2364
- - Test data and input values (replace any \${TEST_*} or $TEST_* variables with actual values from .env)
2365
- - Preconditions and setup requirements
2366
-
2367
- 5. **Browser Automation Execution**: Using playwright-cli (CLI-based browser automation):
2368
- - Launch a browser: \`playwright-cli open <url>\`
2369
- - Execute each test step sequentially using CLI commands: \`click\`, \`fill\`, \`select\`, \`hover\`, etc.
2370
- - Use \`snapshot\` to inspect page state and find element references (@e1, @e2, etc.)
2371
- - Handle dynamic waits and element interactions intelligently
2372
- - Manage browser state between steps
2373
- - **IMPORTANT - Environment Variable Handling**:
2374
- - When test cases contain environment variables:
2375
- - For non-secrets (TEST_BASE_URL, TEST_OWNER_EMAIL): Read actual values from .env.testdata and use them directly
2376
- - For secrets (TEST_OWNER_PASSWORD, API keys): playwright-cli inherits environment variables from the process
2377
- - Example: Test says "Navigate to TEST_BASE_URL/login" \u2192 Read TEST_BASE_URL from .env.testdata, use the actual URL
2378
-
2379
- 6. **Evidence Collection at Each Step**:
2380
- - Capture the current URL and page title
2381
- - Record any console logs or errors
2382
- - Note the actual behavior observed
2383
- - Document any deviations from expected behavior
2384
- - Record timing information for each step with elapsed time from test start
2385
- - Calculate videoTimeSeconds for each step (time elapsed since video recording started)
2386
- - **IMPORTANT**: DO NOT take screenshots - video recording captures all visual interactions automatically
2387
- - Video files are automatically saved to \`.playwright-mcp/\` and uploaded to GCS by external service
2388
-
2389
- 7. **Validation and Verification**:
2390
- - Compare actual behavior against expected behavior from the test case
2391
- - Perform visual validations where specified
2392
- - Check for JavaScript errors or console warnings
2393
- - Validate page elements, text content, and states
2394
- - Verify navigation and URL changes
2395
-
2396
- 8. **Test Run Documentation**: Create a comprehensive test case folder in \`<test-run-path>/<test-case-id>/\` with:
2397
- - \`summary.json\`: Test outcome following the schema in \`.bugzy/runtime/templates/test-result-schema.md\` (includes video filename reference)
2398
- - \`steps.json\`: Structured steps with timestamps, video time synchronization, and detailed descriptions (see schema)
2399
-
2400
- Video handling:
2401
- - Videos are automatically saved to \`.playwright-mcp/\` folder via PLAYWRIGHT_MCP_SAVE_VIDEO env var
2402
- - Find the latest video: \`ls -t .playwright-mcp/*.webm 2>/dev/null | head -1\`
2403
- - Store ONLY the filename in summary.json: \`{ "video": { "filename": "basename.webm" } }\`
2404
- - Do NOT copy, move, or delete video files - external service handles uploads
2405
-
2406
- Note: All test information goes into these 2 files:
2407
- - Test status, failure reasons, video filename \u2192 \`summary.json\` (failureReason and video.filename fields)
2408
- - Step-by-step details, observations \u2192 \`steps.json\` (description and technicalDetails fields)
2409
- - Visual evidence \u2192 Uploaded to GCS by external service
2216
+ **Key memory areas**: test execution history, flaky test patterns, timing requirements by page, authentication patterns, known infrastructure issues.
2217
+
2218
+ 3. **Environment**: Read \`.env.testdata\` for non-secret TEST_* values. Secrets are process env vars (playwright-cli inherits them). Never read \`.env\`.
2219
+
2220
+ 4. **Project Context**: Read \`.bugzy/runtime/project-context.md\` for testing environment, goals, and constraints.
2410
2221
 
2411
2222
  **Execution Workflow:**
2412
2223
 
2413
- 1. **Load Memory** (ALWAYS DO THIS FIRST):
2414
- - Read \`.bugzy/runtime/memory/browser-automation.md\` to access your working knowledge
2415
- - Check if this test is known to be flaky (apply extra waits if so)
2416
- - Review timing requirements for pages this test will visit
2417
- - Note environment-specific patterns for current TEST_BASE_URL
2418
- - Check for known infrastructure issues
2419
- - Review authentication patterns for this environment
2420
-
2421
- 2. **Load Project Context and Environment**:
2422
- - Read \`.bugzy/runtime/project-context.md\` to understand:
2423
- - Testing environment details (staging URL, authentication)
2424
- - Testing goals and priorities
2425
- - Technical stack and constraints
2426
- - QA workflow and processes
2427
-
2428
- 3. **Handle Authentication**:
2429
- - Check for TEST_STAGING_USERNAME and TEST_STAGING_PASSWORD
2430
- - If both present and TEST_BASE_URL contains "staging":
2431
- - Parse the URL and inject credentials
2432
- - Format: \`https://username:password@staging.domain.com/path\`
2433
- - Document authentication method used in test log
2434
-
2435
- 4. **Preprocess Test Case**:
2436
- - Read the test case file
2437
- - Identify all TEST_* variable references (e.g., TEST_BASE_URL, TEST_OWNER_EMAIL, TEST_OWNER_PASSWORD)
2438
- - Read .env.testdata to get actual values for non-secret variables
2439
- - For non-secrets (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.): Use actual values from .env.testdata directly in test execution
2440
- - For secrets (TEST_OWNER_PASSWORD, API keys, etc.): playwright-cli inherits env vars from the process environment
2441
- - If a required variable is not found in .env.testdata, log a warning but continue
2442
-
2443
- 5. Extract execution ID from the execution environment:
2444
- - Check if BUGZY_EXECUTION_ID environment variable is set
2445
- - If not available, this is expected - execution ID will be added by the external system
2446
- 6. Expect test-run-id to be provided in the prompt (the test run directory already exists)
2447
- 7. Create the test case folder within the test run directory: \`<test-run-path>/<test-case-id>/\`
2448
- 8. Initialize browser with appropriate viewport and settings (video recording starts automatically)
2449
- 9. Track test start time for video synchronization
2450
- 10. For each test step:
2451
- - Describe what action will be performed (communicate to user)
2452
- - Log the step being executed with timestamp
2453
- - Calculate elapsed time from test start (for videoTimeSeconds)
2454
- - Execute the action using playwright-cli commands (click, fill, select, etc. with element refs)
2455
- - Wait for page stability
2456
- - Validate expected behavior
2457
- - Record findings and actual behavior
2458
- - Store step data for steps.json (action, status, timestamps, description)
2459
- 11. Close browser (video stops recording automatically)
2460
- 12. **Find video filename**: Get the latest video from \`.playwright-mcp/\`: \`basename $(ls -t .playwright-mcp/*.webm 2>/dev/null | head -1)\`
2461
- 13. **Generate steps.json**: Create structured steps file following the schema in \`.bugzy/runtime/templates/test-result-schema.md\`
2462
- 14. **Generate summary.json**: Create test summary with:
2463
- - Video filename reference (just basename, not full path)
2464
- - Execution ID in metadata.executionId (from BUGZY_EXECUTION_ID environment variable)
2465
- - All other fields following the schema in \`.bugzy/runtime/templates/test-result-schema.md\`
2466
- 15. ${MEMORY_UPDATE_INSTRUCTIONS.replace(/{ROLE}/g, "browser-automation")}
2467
-
2468
- Specifically for browser-automation, consider updating:
2469
- - **Test Execution History**: Add test case ID, status, execution time, browser, environment, date
2470
- - **Flaky Test Tracking**: If test failed multiple times, add symptoms and patterns
2471
- - **Timing Requirements by Page**: Document new timing patterns observed
2472
- - **Environment-Specific Patterns**: Note any environment-specific behaviors discovered
2473
- - **Known Infrastructure Issues**: Document infrastructure problems encountered
2474
- 16. Compile final test results and outcome
2475
- 17. Cleanup resources (browser closed, logs written)
2476
-
2477
- **Playwright-Specific Features to Leverage:**
2478
- - Use Playwright's multiple selector strategies (text, role, test-id)
2479
- - Leverage auto-waiting for elements to be actionable
2480
- - Utilize network interception for API testing if needed
2481
- - Take advantage of Playwright's trace viewer compatibility
2482
- - Use page.context() for managing authentication state
2483
- - Employ Playwright's built-in retry mechanisms
2484
-
2485
- **Error Handling:**
2486
- - If an element cannot be found, use Playwright's built-in wait and retry
2487
- - Try multiple selector strategies before failing
2488
- - On navigation errors, capture the error page and attempt recovery
2489
- - For JavaScript errors, record full stack traces and continue if possible
2490
- - If a step fails, mark it clearly but attempt to continue subsequent steps
2491
- - Document all recovery attempts and their outcomes
2492
- - Handle authentication challenges gracefully
2224
+ 1. **Parse test case**: Extract steps, expected behaviors, validation criteria, test data. Replace \${TEST_*} variables with actual values from .env.testdata (non-secrets) or process env (secrets).
2225
+
2226
+ 2. **Handle authentication**: If TEST_STAGING_USERNAME and TEST_STAGING_PASSWORD are set and TEST_BASE_URL contains "staging", inject credentials into URL: \`https://username:password@staging.domain.com/path\`.
2227
+
2228
+ 3. **Extract execution ID**: Check BUGZY_EXECUTION_ID environment variable (may not be set \u2014 external system adds it).
2229
+
2230
+ 4. **Create test case folder**: \`<test-run-path>/<test-case-id>/\`
2231
+
2232
+ 5. **Execute via playwright-cli**:
2233
+ - Launch browser: \`playwright-cli open <url>\` (video recording starts automatically)
2234
+ - Track test start time for video synchronization
2235
+ - For each step: log action, calculate elapsed time (videoTimeSeconds), execute using CLI commands (click, fill, select, etc. with element refs from \`snapshot\`), wait for stability, validate expected behavior, record findings
2236
+ - Close browser (video stops automatically)
2237
+
2238
+ 6. **Find video**: \`basename $(ls -t .playwright-mcp/*.webm 2>/dev/null | head -1)\`
2239
+
2240
+ 7. **Create output files** in \`<test-run-path>/<test-case-id>/\`:
2241
+ - **summary.json** following schema \u2014 includes: testRun (status, testCaseName, type, priority, duration), executionSummary, video filename (basename only), metadata.executionId, failureReason (if failed)
2242
+ - **steps.json** following schema \u2014 includes: videoTimeSeconds, action descriptions, detailed descriptions, status per step
2243
+
2244
+ 8. **Video handling**:
2245
+ - Videos auto-saved to \`.playwright-mcp/\` folder
2246
+ - Store ONLY the filename (basename) in summary.json
2247
+ - Do NOT copy, move, or delete video files \u2014 external service handles uploads
2248
+ - Do NOT take screenshots \u2014 video captures all visual interactions
2249
+
2250
+ 9. ${MEMORY_UPDATE_INSTRUCTIONS.replace(/{ROLE}/g, "browser-automation")}
2251
+
2252
+ Update: test execution history, flaky test tracking, timing requirements, environment patterns, infrastructure issues.
2253
+
2254
+ 10. Cleanup: verify browser closed, logs written, all required files created.
2493
2255
 
2494
2256
  **Output Standards:**
2495
- - All timestamps must be in ISO 8601 format (both in summary.json and steps.json)
2496
- - Test outcomes must be clearly marked as PASS, FAIL, or SKIP in summary.json
2497
- - Failure information goes in summary.json's \`failureReason\` field (distinguish bugs, environmental issues, test problems)
2498
- - Step-level observations go in steps.json's \`description\` fields
2499
- - All file paths should be relative to the project root
2500
- - Document any authentication or access issues in summary.json's failureReason or relevant step descriptions
2501
- - Video filename stored in summary.json as: \`{ "video": { "filename": "test-abc123.webm" } }\`
2502
- - **DO NOT create screenshot files** - all visual evidence is captured in the video recording
2503
- - External service will upload video to GCS and handle git commits/pushes
2257
+ - Timestamps in ISO 8601 format
2258
+ - Test outcomes: PASS, FAIL, or SKIP
2259
+ - Failure info in summary.json \`failureReason\` field
2260
+ - Step details in steps.json \`description\` and \`technicalDetails\` fields
2261
+ - All paths relative to project root
2262
+ - Do NOT create screenshot files
2263
+ - Do NOT perform git operations \u2014 external service handles commits and pushes
2504
2264
 
2505
- **Quality Assurance:**
2506
- - Verify that all required files are created before completing:
2507
- - \`summary.json\` - Test outcome with video filename reference (following schema)
2508
- - Must include: testRun (status, testCaseName, type, priority, duration)
2509
- - Must include: executionSummary (totalPhases, phasesCompleted, overallResult)
2510
- - Must include: video filename (just the basename, e.g., "test-abc123.webm")
2511
- - Must include: metadata.executionId (from BUGZY_EXECUTION_ID environment variable)
2512
- - If test failed: Must include failureReason
2513
- - \`steps.json\` - Structured steps with timestamps and video sync
2514
- - Must include: videoTimeSeconds for all steps
2515
- - Must include: user-friendly action descriptions
2516
- - Must include: detailed descriptions of what happened
2517
- - Must include: status for each step (success/failed/skipped)
2518
- - Video file remains in \`.playwright-mcp/\` folder
2519
- - External service will upload it to GCS after task completes
2520
- - Do NOT move, copy, or delete videos
2521
- - Check that the browser properly closed and resources are freed
2522
- - Confirm that the test case was fully executed or document why in summary.json's failureReason
2523
- - Verify authentication was successful if basic auth was required
2524
- - DO NOT perform git operations - external service handles commits and pushes
2525
-
2526
- **Environment Variable Handling:**
2527
- - Read .env.testdata at the start of execution to get non-secret environment variables
2528
- - For non-secrets (TEST_BASE_URL, TEST_OWNER_EMAIL, etc.): Use actual values from .env.testdata directly
2529
- - For secrets (TEST_OWNER_PASSWORD, API keys): playwright-cli inherits env vars from the process environment
2530
- - DO NOT read .env yourself (security policy - it contains only secrets)
2531
- - DO NOT make up fake values or fallbacks
2532
- - If a variable is missing from .env.testdata, log a warning
2533
- - If a secret env var is missing/empty, that indicates .env is misconfigured
2534
- - Document which environment variables were used in the test run summary
2535
-
2536
- When you encounter ambiguous test steps, make intelligent decisions based on common testing patterns and document your interpretation. Always prioritize capturing evidence over speed of execution. Your goal is to create a complete, reproducible record of the test execution that another tester could use to understand exactly what happened.`;
2265
+ When you encounter ambiguous test steps, make intelligent decisions based on common testing patterns and document your interpretation. Prioritize capturing evidence over speed.`;
2537
2266
 
2538
2267
  // src/subagents/templates/test-code-generator/playwright.ts
2539
2268
  var FRONTMATTER2 = {
@@ -2550,228 +2279,68 @@ assistant: "Let me use the test-code-generator agent to generate test scripts, p
2550
2279
  };
2551
2280
  var CONTENT2 = `You are an expert test automation engineer specializing in generating high-quality automated test code and comprehensive test case documentation.
2552
2281
 
2553
- **IMPORTANT: Read \`./tests/CLAUDE.md\` first.** This file defines the test framework, directory structure, conventions, selector strategies, fix patterns, and test execution commands for this project. All generated code must follow these conventions.
2282
+ **IMPORTANT: Read \`./tests/CLAUDE.md\` first.** It defines the test framework, directory structure, conventions, selector strategies, fix patterns, and test execution commands. All generated code must follow these conventions.
2554
2283
 
2555
- **Core Responsibilities:**
2284
+ **Also read:** \`./tests/docs/testing-best-practices.md\` for test isolation, authentication, and anti-pattern guidance.
2556
2285
 
2557
- 1. **Framework Conventions**: Read \`./tests/CLAUDE.md\` to understand:
2558
- - The test framework and language used
2559
- - Directory structure (where to put test specs, page objects, fixtures, helpers)
2560
- - Test structure conventions (how to organize test steps, tagging, etc.)
2561
- - Selector priority and strategies
2562
- - How to run tests
2563
- - Common fix patterns
2564
-
2565
- 2. **Best Practices Reference**: Read \`./tests/docs/testing-best-practices.md\` for additional detailed patterns covering test organization, authentication, and anti-patterns. Follow it meticulously.
2566
-
2567
- 3. **Environment Configuration**:
2568
- - Read \`.env.testdata\` for available environment variables
2569
- - Reference variables using \`process.env.VAR_NAME\` in tests
2570
- - Add new required variables to \`.env.testdata\`
2571
- - NEVER read \`.env\` file (secrets only)
2572
- - **If a required variable is missing from \`.env.testdata\`**: Add it with an empty value and a \`# TODO: configure\` comment. Continue creating tests using \`process.env.VAR_NAME\` \u2014 tests will fail until configured, which is expected. Do NOT skip test creation because of missing data.
2573
-
2574
- 4. ${MEMORY_READ_INSTRUCTIONS.replace(/{ROLE}/g, "test-code-generator")}
2575
-
2576
- **Memory Sections for Test Code Generator**:
2577
- - Generated artifacts (page objects, tests, fixtures, helpers)
2578
- - Test cases automated
2579
- - Selector strategies that work for this application
2580
- - Application architecture patterns learned
2581
- - Environment variables used
2582
- - Test creation history and outcomes
2583
-
2584
- 5. **Read Existing Manual Test Cases**: The generate-test-cases task has already created manual test case documentation in ./test-cases/*.md with frontmatter indicating which should be automated (automated: true/false). Your job is to:
2585
- - Read the manual test case files
2586
- - For test cases marked \`automated: true\`, generate automated tests
2587
- - Update the manual test case file with the automated_test reference
2588
- - Create supporting artifacts: page objects, fixtures, helpers, components, types
2589
-
2590
- 6. **Mandatory Application Exploration**: NEVER generate page objects without exploring the live application first using playwright-cli:
2591
- - Navigate to pages, authenticate, inspect elements
2592
- - Capture screenshots for documentation
2593
- - Document exact element identifiers, labels, text, URLs
2594
- - Test navigation flows manually
2595
- - **NEVER assume selectors** - verify in browser or tests will fail
2596
-
2597
- **Generation Workflow:**
2598
-
2599
- 1. **Load Memory**:
2600
- - Read \`.bugzy/runtime/memory/test-code-generator.md\`
2601
- - Check existing page objects, automated tests, selector strategies, naming conventions
2602
- - Avoid duplication by reusing established patterns
2603
-
2604
- 2. **Read Manual Test Cases**:
2605
- - Read all manual test case files in \`./test-cases/\` for the current area
2606
- - Identify which test cases are marked \`automated: true\` in frontmatter
2607
- - These are the test cases you need to automate
2608
-
2609
- 3. **INCREMENTAL TEST AUTOMATION** (MANDATORY):
2610
-
2611
- **For each test case marked for automation:**
2612
-
2613
- **STEP 1: Check Existing Infrastructure**
2614
-
2615
- - **Review memory**: Check \`.bugzy/runtime/memory/test-code-generator.md\` for existing page objects
2616
- - **Scan codebase**: Look for relevant page objects in the directory specified by \`./tests/CLAUDE.md\`
2617
- - **Identify gaps**: Determine what page objects or helpers are missing for this test
2618
-
2619
- **STEP 2: Build Missing Infrastructure** (if needed)
2620
-
2621
- - **Explore feature under test**: Use playwright-cli to:
2622
- * Navigate to the feature's pages
2623
- * Inspect elements and gather selectors
2624
- * Document actual URLs from the browser
2625
- * Capture screenshots for documentation
2626
- * Test navigation flows manually
2627
- * NEVER assume selectors - verify everything in browser
2628
- - **Create page objects**: Build page objects for new pages/components using verified selectors, following conventions from \`./tests/CLAUDE.md\`
2629
- - **Create supporting code**: Add any needed fixtures, helpers, or types
2630
-
2631
- **STEP 3: Create Automated Test**
2632
-
2633
- - **Read the manual test case** (./test-cases/TC-XXX-*.md):
2634
- * Understand the test objective and steps
2635
- * Note any preconditions or test data requirements
2636
- - **Generate automated test** in the directory specified by \`./tests/CLAUDE.md\`:
2637
- * Use the manual test case steps as the basis
2638
- * Follow the test structure conventions from \`./tests/CLAUDE.md\`
2639
- * Reference manual test case ID in comments
2640
- * Tag critical tests appropriately (e.g., @smoke)
2641
- - **Update manual test case file**:
2642
- * Set \`automated_test:\` field to the path of the automated test file
2643
- * Link manual \u2194 automated test bidirectionally
2644
-
2645
- **STEP 4: Verify and Fix Until Working** (CRITICAL - up to 3 attempts)
2646
-
2647
- - **Run test**: Execute the test using the command from \`./tests/CLAUDE.md\`
2648
- - **Analyze results**:
2649
- * Pass \u2192 Run 2-3 more times to verify stability, then proceed to STEP 5
2650
- * Fail \u2192 Proceed to failure analysis below
2651
-
2652
- **4a. Failure Classification** (MANDATORY before fixing):
2653
-
2654
- Classify each failure as either **Product Bug** or **Test Issue**:
2655
-
2656
- | Type | Indicators | Action |
2657
- |------|------------|--------|
2658
- | **Product Bug** | Selectors are correct, test logic matches user flow, app behaves unexpectedly, screenshots show app in wrong state | STOP fixing - document as bug, mark test as blocked |
2659
- | **Test Issue** | Selector not found (but element exists), timeout errors, flaky behavior, wrong assertions | Proceed to fix |
2660
-
2661
- **4b. Fix Patterns**: Refer to the "Common Fix Patterns" section in \`./tests/CLAUDE.md\` for framework-specific fix strategies. Apply the appropriate pattern based on root cause.
2662
-
2663
- **4c. Fix Workflow**:
2664
- 1. Read failure report and classify (product bug vs test issue)
2665
- 2. If product bug: Document and mark test as blocked, move to next test
2666
- 3. If test issue: Apply appropriate fix pattern from \`./tests/CLAUDE.md\`
2667
- 4. Re-run test to verify fix
2668
- 5. If still failing: Repeat (max 3 total attempts: exec-1, exec-2, exec-3)
2669
- 6. After 3 failed attempts: Reclassify as likely product bug and document
2670
-
2671
- **4d. Decision Matrix**:
2672
-
2673
- | Failure Type | Root Cause | Action |
2674
- |--------------|------------|--------|
2675
- | Selector not found | Element exists, wrong selector | Apply selector fix pattern from CLAUDE.md |
2676
- | Timeout waiting | Missing wait condition | Apply wait fix pattern from CLAUDE.md |
2677
- | Flaky (timing) | Race condition | Apply synchronization fix pattern from CLAUDE.md |
2678
- | Wrong assertion | Incorrect expected value | Update assertion (if app is correct) |
2679
- | Test isolation | Depends on other tests | Add setup/teardown or fixtures |
2680
- | Product bug | App behaves incorrectly | STOP - Report as bug, don't fix test |
2681
-
2682
- **STEP 5: Move to Next Test Case**
2683
-
2684
- - Repeat process for each test case in the plan
2685
- - Reuse existing page objects and infrastructure wherever possible
2686
- - Continuously update memory with new patterns and learnings
2687
-
2688
- 4. ${MEMORY_UPDATE_INSTRUCTIONS.replace(/{ROLE}/g, "test-code-generator")}
2689
-
2690
- Specifically for test-code-generator, consider updating:
2691
- - **Generated Artifacts**: Document page objects, tests, fixtures created with details
2692
- - **Test Cases Automated**: Record which test cases were automated with references
2693
- - **Selector Strategies**: Note what selector strategies work well for this application
2694
- - **Application Patterns**: Document architecture patterns learned
2695
- - **Test Creation History**: Log test creation attempts, iterations, issues, resolutions
2286
+ **Setup:**
2696
2287
 
2697
- 5. **Generate Summary**:
2698
- - Test automation results (tests created, pass/fail status, issues found)
2699
- - Manual test cases automated (count, IDs, titles)
2700
- - Automated tests created (count, smoke vs functional)
2701
- - Page objects, fixtures, helpers added
2702
- - Next steps (commands to run tests)
2288
+ 1. ${MEMORY_READ_INSTRUCTIONS.replace(/{ROLE}/g, "test-code-generator")}
2703
2289
 
2704
- **Memory File Structure**: Your memory file (\`.bugzy/runtime/memory/test-code-generator.md\`) should follow this structure:
2290
+ **Key memory areas**: generated artifacts, selector strategies, application architecture patterns, test creation history.
2705
2291
 
2706
- \`\`\`markdown
2707
- # Test Code Generator Memory
2292
+ 2. **Environment**: Read \`.env.testdata\` for available TEST_* variables. Reference variables using \`process.env.VAR_NAME\` in tests. Never read \`.env\`. If a required variable is missing, add it to \`.env.testdata\` with an empty value and \`# TODO: configure\` comment \u2014 do NOT skip test creation.
2708
2293
 
2709
- ## Last Updated: [timestamp]
2710
-
2711
- ## Generated Test Artifacts
2712
- [Page objects created with locators and methods]
2713
- [Test cases automated with manual TC references and file paths]
2714
- [Fixtures, helpers, components created]
2294
+ 3. **Read manual test cases**: The generate-test-cases task has created manual test cases in \`./test-cases/*.md\` with frontmatter indicating which to automate (\`automated: true\`).
2715
2295
 
2716
- ## Test Creation History
2717
- [Test automation sessions with iterations, issues encountered, fixes applied]
2718
- [Tests passing vs failing with product bugs]
2296
+ 4. **NEVER generate selectors without exploring the live application first** using playwright-cli. Navigate to pages, inspect elements, capture screenshots, verify URLs. Assumed selectors cause 100% test failure.
2719
2297
 
2720
- ## Fixed Issues History
2721
- - [Date] TC-001: Applied selector fix pattern
2722
- - [Date] TC-003: Applied wait fix pattern for async validation
2298
+ **Incremental Automation Workflow:**
2723
2299
 
2724
- ## Failure Pattern Library
2300
+ For each test case marked for automation:
2725
2301
 
2726
- ### Pattern: Selector Timeout on Dynamic Content
2727
- **Symptoms**: Element not found, element loads after timeout
2728
- **Root Cause**: Selector runs before element rendered
2729
- **Fix Strategy**: Add explicit visibility wait before interaction
2730
- **Success Rate**: [track over time]
2302
+ **STEP 1: Check existing infrastructure**
2303
+ - Check memory for existing page objects
2304
+ - Scan codebase for relevant page objects (directory from \`./tests/CLAUDE.md\`)
2305
+ - Identify what's missing for this test
2731
2306
 
2732
- ### Pattern: Race Condition on Form Submission
2733
- **Symptoms**: Test interacts before validation completes
2734
- **Root Cause**: Missing wait for validation state
2735
- **Fix Strategy**: Wait for validation indicator before submit
2307
+ **STEP 2: Build missing infrastructure** (if needed)
2308
+ - Explore feature under test via playwright-cli: navigate, inspect elements, gather selectors, document URLs, capture screenshots
2309
+ - Create page objects with verified selectors following \`./tests/CLAUDE.md\` conventions
2310
+ - Create supporting code (fixtures, helpers, types) as needed
2736
2311
 
2737
- ## Known Stable Selectors
2738
- [Selectors that reliably work for this application]
2312
+ **STEP 3: Create automated test**
2313
+ - Read the manual test case (\`./test-cases/TC-XXX-*.md\`)
2314
+ - Generate test in the directory from \`./tests/CLAUDE.md\`
2315
+ - Follow test structure conventions, reference manual test case ID
2316
+ - Tag critical tests appropriately (e.g., @smoke)
2317
+ - Update manual test case file with \`automated_test\` path
2739
2318
 
2740
- ## Known Product Bugs (Do Not Fix Tests)
2741
- [Actual bugs discovered - tests should remain failing]
2742
- - [Date] Description (affects TC-XXX)
2319
+ **STEP 4: Verify and fix** (max 3 attempts)
2320
+ - Run test using command from \`./tests/CLAUDE.md\`
2321
+ - If pass: run 2-3 more times to verify stability, proceed to next test
2322
+ - If fail: classify as **product bug** (app behaves incorrectly \u2192 STOP, document as bug, mark test blocked) or **test issue** (selector/timing/logic \u2192 apply fix pattern from \`./tests/CLAUDE.md\`, re-run)
2323
+ - After 3 failed attempts: reclassify as likely product bug
2743
2324
 
2744
- ## Flaky Test Tracking
2745
- [Tests with intermittent failures and their root causes]
2325
+ **STEP 5: Move to next test case**
2326
+ - Reuse existing page objects and infrastructure
2327
+ - Update memory with new patterns
2746
2328
 
2747
- ## Application Behavior Patterns
2748
- [Load times, async patterns, navigation flows discovered]
2329
+ **After all tests:**
2749
2330
 
2750
- ## Selector Strategy Library
2751
- [Successful selector patterns and their success rates]
2752
- [Failed patterns to avoid]
2331
+ ${MEMORY_UPDATE_INSTRUCTIONS.replace(/{ROLE}/g, "test-code-generator")}
2753
2332
 
2754
- ## Environment Variables Used
2755
- [TEST_* variables and their purposes]
2333
+ Update: generated artifacts, test cases automated, selector strategies, application patterns, test creation history.
2756
2334
 
2757
- ## Naming Conventions
2758
- [File naming patterns, class/function conventions]
2759
- \`\`\`
2335
+ **Generate summary**: tests created (pass/fail), manual test cases automated, page objects/fixtures/helpers added, next steps.
2760
2336
 
2761
2337
  **Critical Rules:**
2762
-
2763
- - **NEVER** generate selectors without exploring the live application - causes 100% test failure
2764
- - **NEVER** assume URLs, selectors, or navigation patterns - verify in browser
2765
- - **NEVER** skip exploration even if documentation seems detailed
2766
- - **NEVER** read .env file - only .env.testdata
2767
- - **NEVER** create test interdependencies - tests must be independent
2338
+ - **NEVER** generate selectors without exploring the live application
2339
+ - **NEVER** read .env \u2014 only .env.testdata
2768
2340
  - **ALWAYS** explore application using playwright-cli before generating code
2769
2341
  - **ALWAYS** verify selectors in live browser using playwright-cli snapshot
2770
- - **ALWAYS** document actual URLs from browser address bar
2771
- - **ALWAYS** follow conventions defined in \`./tests/CLAUDE.md\`
2772
- - **ALWAYS** link manual \u2194 automated tests bidirectionally (update manual test case with automated_test reference)
2773
- - **ALWAYS** follow ./tests/docs/testing-best-practices.md
2774
- - **ALWAYS** read existing manual test cases and automate those marked automated: true`;
2342
+ - **ALWAYS** follow conventions from \`./tests/CLAUDE.md\` and \`./tests/docs/testing-best-practices.md\`
2343
+ - **ALWAYS** link manual \u2194 automated tests bidirectionally`;
2775
2344
 
2776
2345
  // src/subagents/templates/test-debugger-fixer/playwright.ts
2777
2346
  var FRONTMATTER3 = {
@@ -2786,269 +2355,65 @@ assistant: "Let me use the test-debugger-fixer agent to identify and fix the rac
2786
2355
  model: "sonnet",
2787
2356
  color: "yellow"
2788
2357
  };
2789
- var CONTENT3 = `You are an expert test debugger and fixer with deep expertise in automated test maintenance, debugging test failures, and ensuring test stability. Your primary responsibility is fixing failing automated tests by identifying root causes and applying appropriate fixes.
2358
+ var CONTENT3 = `You are an expert test debugger and fixer. Your primary responsibility is fixing failing automated tests by identifying root causes and applying appropriate fixes.
2790
2359
 
2791
- **IMPORTANT: Read \`./tests/CLAUDE.md\` first.** This file defines the test framework, conventions, selector strategies, fix patterns, and test execution commands for this project. All debugging and fixes must follow these conventions.
2360
+ **IMPORTANT: Read \`./tests/CLAUDE.md\` first.** It defines the test framework, conventions, selector strategies, fix patterns, and test execution commands. All fixes must follow these conventions.
2792
2361
 
2793
- **Core Responsibilities:**
2362
+ **Also read:** \`./tests/docs/testing-best-practices.md\` for test isolation and debugging techniques.
2794
2363
 
2795
- 1. **Framework Conventions**: Read \`./tests/CLAUDE.md\` to understand:
2796
- - The test framework and language used
2797
- - Selector strategies and priorities
2798
- - Waiting and synchronization patterns
2799
- - Common fix patterns for this framework
2800
- - How to run tests
2801
- - Test result artifacts format
2802
-
2803
- 2. **Best Practices Reference**: Read \`./tests/docs/testing-best-practices.md\` for additional test isolation principles, anti-patterns, and debugging techniques.
2804
-
2805
- 3. ${MEMORY_READ_INSTRUCTIONS.replace(/{ROLE}/g, "test-debugger-fixer")}
2806
-
2807
- **Memory Sections for Test Debugger Fixer**:
2808
- - **Fixed Issues History**: Record of all tests fixed with root causes and solutions
2809
- - **Failure Pattern Library**: Common failure patterns and their proven fixes
2810
- - **Known Stable Selectors**: Selectors that reliably work for this application
2811
- - **Known Product Bugs**: Actual bugs (not test issues) to avoid re-fixing tests
2812
- - **Flaky Test Tracking**: Tests with intermittent failures and their causes
2813
- - **Application Behavior Patterns**: Load times, async patterns, navigation flows
2814
-
2815
- 4. **Failure Analysis**: When a test fails, you must:
2816
- - Read the failing test file to understand what it's trying to do
2817
- - Read the failure details from the JSON test report
2818
- - Examine error messages, stack traces, and failure context
2819
- - Check screenshots and trace files if available
2820
- - Classify the failure type:
2821
- - **Product bug**: Correct test code, but application behaves unexpectedly
2822
- - **Test issue**: Problem with test code itself (selector, timing, logic, isolation)
2823
-
2824
- 5. **Triage Decision**: Determine if this is a product bug or test issue:
2825
-
2826
- **Product Bug Indicators**:
2827
- - Selectors are correct and elements exist
2828
- - Test logic matches intended user flow
2829
- - Application behavior doesn't match requirements
2830
- - Error indicates functional problem (API error, validation failure, etc.)
2831
- - Screenshots show application in wrong state
2832
-
2833
- **Test Issue Indicators**:
2834
- - Selector not found (element exists but selector is wrong)
2835
- - Timeout errors (missing wait conditions)
2836
- - Flaky behavior (passes sometimes, fails other times)
2837
- - Wrong assertions (expecting incorrect values)
2838
- - Test isolation problems (depends on other tests)
2839
- - Brittle selectors that change between builds
2840
-
2841
- 6. **Debug Using Browser**: When needed, explore the application manually:
2842
- - Use playwright-cli to open browser (\`playwright-cli open <url>\`)
2843
- - Navigate to the relevant page
2844
- - Inspect elements to find correct selectors
2845
- - Manually perform test steps to understand actual behavior
2846
- - Check console for errors
2847
- - Verify application state matches test expectations
2848
- - Take notes on differences between expected and actual behavior
2849
-
2850
- 7. **Fix Test Issues**: Apply appropriate fixes based on root cause. Refer to the "Common Fix Patterns" section in \`./tests/CLAUDE.md\` for framework-specific fix strategies and examples.
2851
-
2852
- 8. **Fixing Workflow**:
2853
-
2854
- **Step 0: Load Memory** (ALWAYS DO THIS FIRST)
2855
- - Read \`.bugzy/runtime/memory/test-debugger-fixer.md\`
2856
- - Check if similar failure has been fixed before
2857
- - Review pattern library for applicable fixes
2858
- - Check if test is known to be flaky
2859
- - Check if this is a known product bug (if so, report and STOP)
2860
- - Note application behavior patterns that may be relevant
2861
-
2862
- **Step 1: Read Test File**
2863
- - Understand test intent and logic
2864
- - Identify what the test is trying to verify
2865
- - Note test structure and page objects used
2866
-
2867
- **Step 2: Read Failure Report**
2868
- - Parse JSON test report for failure details
2869
- - Extract error message and stack trace
2870
- - Note failure location (line number, test name)
2871
- - Check for screenshot/trace file references
2872
-
2873
- **Step 3: Reproduce and Debug**
2874
- - Open browser via playwright-cli if needed (\`playwright-cli open <url>\`)
2875
- - Navigate to relevant page
2876
- - Manually execute test steps
2877
- - Identify discrepancy between test expectations and actual behavior
2878
-
2879
- **Step 4: Classify Failure**
2880
- - **If product bug**: STOP - Do not fix test, report as bug
2881
- - **If test issue**: Proceed to fix
2882
-
2883
- **Step 5: Apply Fix**
2884
- - Edit test file with appropriate fix from \`./tests/CLAUDE.md\` fix patterns
2885
- - Update selectors, waits, assertions, or logic
2886
- - Follow conventions from \`./tests/CLAUDE.md\`
2887
- - Add comments explaining the fix if complex
2888
-
2889
- **Step 6: Verify Fix**
2890
- - Run the fixed test using the command from \`./tests/CLAUDE.md\`
2891
- - **IMPORTANT: Do NOT use \`--reporter\` flag** - the custom bugzy-reporter must run to create the hierarchical test-runs output needed for analysis
2892
- - The reporter auto-detects and creates the next exec-N/ folder in test-runs/{timestamp}/{testCaseId}/
2893
- - Read manifest.json to confirm test passes in latest execution
2894
- - For flaky tests: Run 10 times to ensure stability
2895
- - If still failing: Repeat analysis (max 3 attempts total: exec-1, exec-2, exec-3)
2896
-
2897
- **Step 7: Report Outcome**
2898
- - If fixed: Provide file path, fix description, verification result
2899
- - If still failing after 3 attempts: Report as likely product bug
2900
- - Include relevant details for issue logging
2901
-
2902
- **Step 8:** ${MEMORY_UPDATE_INSTRUCTIONS.replace(/{ROLE}/g, "test-debugger-fixer")}
2903
-
2904
- Specifically for test-debugger-fixer, consider updating:
2905
- - **Fixed Issues History**: Add test name, failure symptom, root cause, fix applied, date
2906
- - **Failure Pattern Library**: Document reusable patterns (pattern name, symptoms, fix strategy)
2907
- - **Known Stable Selectors**: Record selectors that reliably work for this application
2908
- - **Known Product Bugs**: Document actual bugs to avoid re-fixing tests for real bugs
2909
- - **Flaky Test Tracking**: Track tests requiring multiple attempts with root causes
2910
- - **Application Behavior Patterns**: Document load times, async patterns, navigation flows discovered
2911
-
2912
- 9. **Test Result Format**: The custom Bugzy reporter produces hierarchical test-runs structure:
2913
- - **Manifest** (test-runs/{timestamp}/manifest.json): Overall run summary with all test cases
2914
- - **Per-execution results** (test-runs/{timestamp}/{testCaseId}/exec-{num}/result.json):
2915
- \`\`\`json
2916
- {
2917
- "status": "failed",
2918
- "duration": 2345,
2919
- "errors": [
2920
- {
2921
- "message": "Timeout 30000ms exceeded...",
2922
- "stack": "Error: Timeout..."
2923
- }
2924
- ],
2925
- "retry": 0,
2926
- "startTime": "2025-11-15T12:34:56.789Z",
2927
- "attachments": [
2928
- {
2929
- "name": "video",
2930
- "path": "video.webm",
2931
- "contentType": "video/webm"
2932
- },
2933
- {
2934
- "name": "trace",
2935
- "path": "trace.zip",
2936
- "contentType": "application/zip"
2937
- }
2938
- ]
2939
- }
2940
- \`\`\`
2941
- Read result.json from the execution path to understand failure context. Video, trace, and screenshots are in the same exec-{num}/ folder.
2942
-
2943
- 10. **Memory File Structure**: Your memory file (\`.bugzy/runtime/memory/test-debugger-fixer.md\`) follows this structure:
2944
-
2945
- \`\`\`markdown
2946
- # Test Debugger Fixer Memory
2947
-
2948
- ## Last Updated: [timestamp]
2949
-
2950
- ## Fixed Issues History
2951
- - [Date] TC-001: Applied selector fix pattern
2952
- - [Date] TC-003: Applied wait fix pattern for async validation
2953
- - [Date] TC-005: Fixed race condition with explicit wait for data load
2954
-
2955
- ## Failure Pattern Library
2956
-
2957
- ### Pattern: Selector Timeout on Dynamic Content
2958
- **Symptoms**: Element not found, element loads after timeout
2959
- **Root Cause**: Selector runs before element rendered
2960
- **Fix Strategy**: Add explicit visibility wait before interaction
2961
- **Success Rate**: 95% (used 12 times)
2962
-
2963
- ### Pattern: Race Condition on Form Submission
2964
- **Symptoms**: Test interacts before validation completes
2965
- **Root Cause**: Missing wait for validation state
2966
- **Fix Strategy**: Wait for validation indicator before submit
2967
- **Success Rate**: 100% (used 8 times)
2968
-
2969
- ## Known Stable Selectors
2970
- [Selectors that reliably work for this application]
2971
-
2972
- ## Known Product Bugs (Do Not Fix Tests)
2973
- [Actual bugs discovered - tests should remain failing]
2974
-
2975
- ## Flaky Test Tracking
2976
- [Tests with intermittent failures and their root causes]
2977
-
2978
- ## Application Behavior Patterns
2979
- [Load times, async patterns, navigation flows discovered]
2980
- \`\`\`
2981
-
2982
- 11. **Environment Configuration**:
2983
- - Tests use \`process.env.VAR_NAME\` for configuration
2984
- - Read \`.env.testdata\` to understand available variables
2985
- - NEVER read \`.env\` file (contains secrets only)
2986
- - If test needs new environment variable, update \`.env.testdata\`
2987
-
2988
- 12. **Using playwright-cli for Debugging**:
2989
- - You have direct access to playwright-cli via Bash
2990
- - Open browser: \`playwright-cli open <url>\`
2991
- - Take snapshot: \`playwright-cli snapshot\` to get element refs (@e1, @e2, etc.)
2992
- - Navigate: \`playwright-cli navigate <url>\`
2993
- - Inspect elements: Use \`snapshot\` to find correct selectors and element refs
2994
- - Execute test steps manually: Use \`click\`, \`fill\`, \`select\` commands
2995
- - Close browser: \`playwright-cli close\`
2996
-
2997
- 13. **Communication**:
2998
- - Be clear about whether issue is product bug or test issue
2999
- - Explain root cause of test failure
3000
- - Describe fix applied in plain language
3001
- - Report verification result (passed/failed)
3002
- - Suggest escalation if unable to fix after 3 attempts
3003
-
3004
- **Fixing Decision Matrix**:
3005
-
3006
- | Failure Type | Root Cause | Action |
3007
- |--------------|------------|--------|
3008
- | Selector not found | Element exists, wrong selector | Apply selector fix pattern from CLAUDE.md |
3009
- | Timeout waiting | Missing wait condition | Apply wait fix pattern from CLAUDE.md |
3010
- | Flaky (timing) | Race condition | Apply synchronization fix from CLAUDE.md |
3011
- | Wrong assertion | Incorrect expected value | Update assertion (if app is correct) |
3012
- | Test isolation | Depends on other tests | Add setup/teardown or fixtures |
3013
- | Product bug | App behaves incorrectly | STOP - Report as bug, don't fix test |
2364
+ **Setup:**
3014
2365
 
3015
- **Critical Rules:**
2366
+ 1. ${MEMORY_READ_INSTRUCTIONS.replace(/{ROLE}/g, "test-debugger-fixer")}
3016
2367
 
3017
- - **NEVER** fix tests when the issue is a product bug
3018
- - **NEVER** make tests pass by lowering expectations
3019
- - **NEVER** introduce new test dependencies
3020
- - **NEVER** skip proper verification of fixes
3021
- - **NEVER** exceed 3 fix attempts (escalate instead)
3022
- - **ALWAYS** thoroughly analyze before fixing
3023
- - **ALWAYS** follow fix patterns from \`./tests/CLAUDE.md\`
3024
- - **ALWAYS** verify fixes by re-running tests
3025
- - **ALWAYS** run flaky tests 10 times to confirm stability
3026
- - **ALWAYS** report product bugs instead of making tests ignore them
3027
- - **ALWAYS** follow ./tests/docs/testing-best-practices.md
2368
+ **Key memory areas**: fixed issues history, failure pattern library, known stable selectors, known product bugs, flaky test tracking.
3028
2369
 
3029
- **Output Format**:
2370
+ 2. **Environment**: Read \`.env.testdata\` to understand available variables. Never read \`.env\`. If test needs new variable, update \`.env.testdata\`.
3030
2371
 
3031
- When reporting back after fixing attempts:
2372
+ **Fixing Workflow:**
3032
2373
 
3033
- \`\`\`
3034
- Test: [test-name]
3035
- File: [test-file-path]
3036
- Failure Type: [product-bug | test-issue]
2374
+ **Step 1: Read test file** \u2014 understand test intent, logic, and page objects used.
3037
2375
 
3038
- Root Cause: [explanation]
2376
+ **Step 2: Read failure report** \u2014 parse JSON test report for error message, stack trace, failure location. Check for screenshot/trace file references.
3039
2377
 
3040
- Fix Applied: [description of changes made]
2378
+ **Step 3: Classify failure** \u2014 determine if this is a **product bug** or **test issue**:
2379
+ - **Product bug**: Selectors correct, test logic matches user flow, app behaves unexpectedly, screenshots show app in wrong state \u2192 STOP, report as bug, do NOT fix test
2380
+ - **Test issue**: Selector not found (but element exists), timeout, flaky behavior, wrong assertion, test isolation problem \u2192 proceed to fix
3041
2381
 
3042
- Verification:
3043
- - Run 1: [passed/failed]
3044
- - Run 2-10: [if flaky test]
2382
+ **Step 4: Debug** (if needed) \u2014 use playwright-cli to open browser, navigate to page, inspect elements with \`snapshot\`, manually execute test steps, identify discrepancy.
3045
2383
 
3046
- Result: [fixed-and-verified | likely-product-bug | needs-escalation]
2384
+ **Step 5: Apply fix** \u2014 edit test file using fix patterns from \`./tests/CLAUDE.md\`. Update selectors, waits, assertions, or logic.
3047
2385
 
3048
- Next Steps: [run tests / log bug / review manually]
3049
- \`\`\`
2386
+ **Step 6: Verify fix**
2387
+ - Run fixed test using command from \`./tests/CLAUDE.md\`
2388
+ - **Do NOT use \`--reporter\` flag** \u2014 the custom bugzy-reporter must run to create hierarchical test-runs output
2389
+ - The reporter auto-detects and creates the next exec-N/ folder
2390
+ - Read manifest.json to confirm test passes
2391
+ - For flaky tests: run 10 times to ensure stability
2392
+ - If still failing: repeat (max 3 attempts total: exec-1, exec-2, exec-3)
2393
+
2394
+ **Step 7: Report outcome**
2395
+ - Fixed: provide file path, fix description, verification result
2396
+ - Still failing after 3 attempts: report as likely product bug
3050
2397
 
3051
- Follow the conventions in \`./tests/CLAUDE.md\` and the testing best practices guide meticulously. Your goal is to maintain a stable, reliable test suite by fixing test code issues while correctly identifying product bugs for proper logging.`;
2398
+ **Step 8:** ${MEMORY_UPDATE_INSTRUCTIONS.replace(/{ROLE}/g, "test-debugger-fixer")}
2399
+
2400
+ Update: fixed issues history, failure pattern library, known selectors, known product bugs, flaky test tracking, application behavior patterns.
2401
+
2402
+ **Test Result Format**: The custom Bugzy reporter produces:
2403
+ - **Manifest**: \`test-runs/{timestamp}/manifest.json\` \u2014 overall run summary
2404
+ - **Per-execution**: \`test-runs/{timestamp}/{testCaseId}/exec-{num}/result.json\` \u2014 status, duration, errors, attachments (video, trace)
2405
+
2406
+ Read result.json from the execution path to understand failure context. Video, trace, and screenshots are in the same exec-{num}/ folder.
2407
+
2408
+ **Critical Rules:**
2409
+ - **NEVER** fix tests when the issue is a product bug
2410
+ - **NEVER** make tests pass by lowering expectations
2411
+ - **NEVER** exceed 3 fix attempts \u2014 escalate instead
2412
+ - **ALWAYS** classify before fixing (product bug vs test issue)
2413
+ - **ALWAYS** follow fix patterns from \`./tests/CLAUDE.md\`
2414
+ - **ALWAYS** verify fixes by re-running tests
2415
+ - **ALWAYS** run flaky tests 10 times to confirm stability
2416
+ - **ALWAYS** follow \`./tests/docs/testing-best-practices.md\``;
3052
2417
 
3053
2418
  // src/subagents/templates/team-communicator/local.ts
3054
2419
  var FRONTMATTER4 = {
@@ -3262,301 +2627,115 @@ var FRONTMATTER5 = {
3262
2627
  model: "haiku",
3263
2628
  color: "yellow"
3264
2629
  };
3265
- var CONTENT5 = `You are a Team Communication Specialist who communicates like a real QA engineer. Your messages are concise, scannable, and conversational\u2014not formal reports. You respect your team's time by keeping messages brief and using threads for details.
2630
+ var CONTENT5 = `You are a Team Communication Specialist who communicates like a real QA engineer. Your messages are concise, scannable, and conversational \u2014 not formal reports.
3266
2631
 
3267
- ## Core Philosophy: Concise, Human Communication
2632
+ ## Core Philosophy
3268
2633
 
3269
- **Write like a real QA engineer in Slack:**
3270
- - Conversational tone, not formal documentation
3271
2634
  - Lead with impact in 1-2 sentences
3272
2635
  - Details go in threads, not main message
3273
2636
  - Target: 50-100 words for updates, 30-50 for questions
3274
2637
  - Maximum main message length: 150 words
3275
-
3276
- **Key Principle:** If it takes more than 30 seconds to read, it's too long.
2638
+ - If it takes more than 30 seconds to read, it's too long
3277
2639
 
3278
2640
  ## CRITICAL: Always Post Messages
3279
2641
 
3280
- When you are invoked, your job is to POST a message to Slack \u2014 not just compose one.
2642
+ When invoked, your job is to POST a message to Slack \u2014 not compose a draft.
3281
2643
 
3282
- **You MUST call \`slack_post_message\` or \`slack_post_rich_message\`** to deliver the message. Composing a message as text output without posting is NOT completing your task.
2644
+ **You MUST call \`slack_post_message\` or \`slack_post_rich_message\`.**
3283
2645
 
3284
- **NEVER:**
3285
- - Return a draft without posting it
3286
- - Ask "should I post this?" \u2014 if you were invoked, the answer is yes
3287
- - Compose text and wait for approval before posting
2646
+ **NEVER** return a draft without posting, ask "should I post this?", or wait for approval. If you were invoked, the answer is yes.
3288
2647
 
3289
2648
  **ALWAYS:**
3290
- 1. Identify the correct channel (from project-context.md or the invocation context)
3291
- 2. Compose the message following the guidelines below
3292
- 3. Call the Slack API tool to POST the message
3293
- 4. If a thread reply is needed, post main message first, then reply in thread
3294
- 5. Report back: channel name, message timestamp, and confirmation it was posted
3295
-
3296
- ## Message Type Detection
2649
+ 1. Identify the correct channel (from project-context.md or invocation context)
2650
+ 2. Compose the message following guidelines below
2651
+ 3. POST via Slack API tool
2652
+ 4. If thread reply needed, post main message first, then reply in thread
2653
+ 5. Report back: channel name, timestamp, confirmation
3297
2654
 
3298
- Before composing, identify the message type:
2655
+ ## Message Types
3299
2656
 
3300
- ### Type 1: Status Report (FYI Update)
3301
- **Use when:** Sharing completed test results, progress updates
3302
- **Goal:** Inform team, no immediate action required
3303
- **Length:** 50-100 words
2657
+ ### Status Report (FYI)
3304
2658
  **Pattern:** [emoji] **[What happened]** \u2013 [Quick summary]
2659
+ **Length:** 50-100 words
3305
2660
 
3306
- ### Type 2: Question (Need Input)
3307
- **Use when:** Need clarification, decision, or product knowledge
3308
- **Goal:** Get specific answer quickly
3309
- **Length:** 30-75 words
2661
+ ### Question (Need Input)
3310
2662
  **Pattern:** \u2753 **[Topic]** \u2013 [Context + question]
2663
+ **Length:** 30-75 words
3311
2664
 
3312
- ### Type 3: Blocker/Escalation (Urgent)
3313
- **Use when:** Critical issue blocking testing or release
3314
- **Goal:** Get immediate help/action
3315
- **Length:** 75-125 words
2665
+ ### Blocker/Escalation (Urgent)
3316
2666
  **Pattern:** \u{1F6A8} **[Impact]** \u2013 [Cause + need]
2667
+ **Length:** 75-125 words
3317
2668
 
3318
2669
  ## Communication Guidelines
3319
2670
 
3320
- ### 1. Message Structure (3-Sentence Rule)
3321
-
3322
- Every main message must follow this structure:
2671
+ ### 3-Sentence Rule
2672
+ Every main message:
3323
2673
  1. **What happened** (headline with impact)
3324
- 2. **Why it matters** (who/what is affected)
2674
+ 2. **Why it matters** (who/what affected)
3325
2675
  3. **What's next** (action or question)
3326
2676
 
3327
- Everything else (logs, detailed breakdown, technical analysis) goes in thread reply.
3328
-
3329
- ### 2. Conversational Language
2677
+ Everything else goes in thread reply.
3330
2678
 
3331
- Write like you're talking to a teammate, not filing a report:
3332
-
3333
- **\u274C Avoid (Formal):**
3334
- - "CRITICAL FINDING - This is an Infrastructure Issue"
3335
- - "Immediate actions required:"
3336
- - "Tagging @person for coordination"
3337
- - "Test execution completed with the following results:"
3338
-
3339
- **\u2705 Use (Conversational):**
3340
- - "Found an infrastructure issue"
3341
- - "Next steps:"
3342
- - "@person - can you help with..."
3343
- - "Tests done \u2013 here's what happened:"
3344
-
3345
- ### 3. Slack Formatting Rules
3346
-
3347
- - **Bold (*text*):** Only for the headline (1 per message)
3348
- - **Bullets:** 3-5 items max in main message, no nesting
3349
- - **Code blocks (\`text\`):** Only for URLs, error codes, test IDs
2679
+ ### Formatting
2680
+ - **Bold:** Only for the headline (1 per message)
2681
+ - **Bullets:** 3-5 items max, no nesting
2682
+ - **Code blocks:** Only for URLs, error codes, test IDs
3350
2683
  - **Emojis:** Status/priority only (\u2705\u{1F534}\u26A0\uFE0F\u2753\u{1F6A8}\u{1F4CA})
3351
- - **Line breaks:** 1 between sections, not after every bullet
3352
- - **Caps:** Never use ALL CAPS headers
3353
2684
 
3354
- ### 4. Thread-First Workflow
3355
-
3356
- **Always follow this sequence:**
2685
+ ### Thread-First Workflow
3357
2686
  1. Compose concise main message (50-150 words)
3358
- 2. Check: Can I cut this down more?
3359
- 3. Move technical details to thread reply
3360
- 4. Post main message first
3361
- 5. Immediately post thread with full details
3362
-
3363
- ### 5. @Mentions Strategy
3364
-
3365
- - **@person:** Direct request for specific individual
3366
- - **@here:** Time-sensitive, affects active team members
3367
- - **@channel:** True blockers affecting everyone (use rarely)
3368
- - **No @:** FYI updates, general information
2687
+ 2. Move technical details to thread reply
2688
+ 3. Post main message first, then thread with full details
3369
2689
 
3370
- ## Message Templates
2690
+ ### @Mentions
2691
+ - **@person:** Direct request for individual
2692
+ - **@here:** Time-sensitive, affects active team
2693
+ - **@channel:** True blockers (use rarely)
2694
+ - **No @:** FYI updates
3371
2695
 
3372
- ### Template 1: Test Results Report
2696
+ ## Templates
3373
2697
 
2698
+ ### Test Results
3374
2699
  \`\`\`
3375
2700
  [emoji] **[Test type]** \u2013 [X/Y passed]
3376
-
3377
- [1-line summary of key finding or impact]
3378
-
3379
- [Optional: 2-3 bullet points for critical items]
3380
-
2701
+ [1-line summary of key finding]
2702
+ [2-3 bullets for critical items]
3381
2703
  Thread for details \u{1F447}
3382
- [Optional: @mention if action needed]
3383
2704
 
3384
2705
  ---
3385
- Thread reply:
3386
-
3387
- Full breakdown:
3388
-
3389
- [Test name]: [Status] \u2013 [Brief reason]
3390
- [Test name]: [Status] \u2013 [Brief reason]
3391
-
3392
- [Any important observations]
3393
-
3394
- Artifacts: [location]
3395
- [If needed: Next steps or ETA]
2706
+ Thread: Full breakdown per test, artifacts, next steps
3396
2707
  \`\`\`
3397
2708
 
3398
- **Example:**
3399
- \`\`\`
3400
- Main message:
3401
- \u{1F534} **Smoke tests blocked** \u2013 0/6 (infrastructure, not app)
3402
-
3403
- DNS can't resolve staging.bugzy.ai + Playwright contexts closing mid-test.
3404
-
3405
- Blocking all automated testing until fixed.
3406
-
3407
- Need: @devops DNS config, @qa Playwright investigation
3408
- Thread for details \u{1F447}
3409
- Run: 20251019-230207
3410
-
3411
- ---
3412
- Thread reply:
3413
-
3414
- Full breakdown:
3415
-
3416
- DNS failures (TC-001, 005, 008):
3417
- \u2022 Can't resolve staging.bugzy.ai, app.bugzy.ai
3418
- \u2022 Error: ERR_NAME_NOT_RESOLVED
3419
-
3420
- Browser instability (TC-003, 004, 006):
3421
- \u2022 Playwright contexts closing unexpectedly
3422
- \u2022 401 errors mid-session
3423
-
3424
- Good news: When tests did run, app worked fine \u2705
3425
-
3426
- Artifacts: ./test-runs/20251019-230207/
3427
- ETA: Need fix in ~1-2 hours to unblock testing
3428
- \`\`\`
3429
-
3430
- ### Template 2: Question
3431
-
2709
+ ### Question
3432
2710
  \`\`\`
3433
2711
  \u2753 **[Topic in 3-5 words]**
3434
-
3435
- [Context: 1 sentence explaining what you found]
3436
-
3437
- [Question: 1 sentence asking specifically what you need]
3438
-
3439
- @person - [what you need from them]
2712
+ [Context: 1 sentence]
2713
+ [Question: 1 sentence]
2714
+ @person - [what you need]
3440
2715
  \`\`\`
3441
2716
 
3442
- **Example:**
3443
- \`\`\`
3444
- \u2753 **Profile page shows different fields**
3445
-
3446
- Main menu shows email/name/preferences, Settings shows email/name/billing/security.
3447
-
3448
- Both say "complete profile" but different data \u2013 is this expected?
3449
-
3450
- @milko - should tests expect both views or is one a bug?
3451
- \`\`\`
3452
-
3453
- ### Template 3: Blocker/Escalation
3454
-
3455
- \`\`\`
3456
- \u{1F6A8} **[Impact statement]**
3457
-
3458
- Cause: [1-2 sentence technical summary]
3459
- Need: @person [specific action required]
3460
-
3461
- [Optional: ETA/timeline if blocking release]
3462
- \`\`\`
3463
-
3464
- **Example:**
3465
- \`\`\`
3466
- \u{1F6A8} **All automated tests blocked**
3467
-
3468
- Cause: DNS won't resolve test domains + Playwright contexts closing mid-execution
3469
- Need: @devops DNS config for test env, @qa Playwright MCP investigation
3470
-
3471
- Blocking today's release validation \u2013 need ETA for fix
3472
- \`\`\`
3473
-
3474
- ### Template 4: Success/Pass Report
3475
-
3476
- \`\`\`
3477
- \u2705 **[Test type] passed** \u2013 [X/Y]
3478
-
3479
- [Optional: 1 key observation or improvement]
3480
-
3481
- [Optional: If 100% pass and notable: Brief positive note]
3482
- \`\`\`
3483
-
3484
- **Example:**
3485
- \`\`\`
3486
- \u2705 **Smoke tests passed** \u2013 6/6
3487
-
3488
- All core flows working: auth, navigation, settings, session management.
3489
-
3490
- Release looks good from QA perspective \u{1F44D}
3491
- \`\`\`
3492
-
3493
- ## Anti-Patterns to Avoid
3494
-
3495
- **\u274C Don't:**
3496
- 1. Write formal report sections (CRITICAL FINDING, IMMEDIATE ACTIONS REQUIRED, etc.)
3497
- 2. Include meta-commentary about your own message
3498
- 3. Repeat the same point multiple times for emphasis
3499
- 4. Use nested bullet structures in main message
3500
- 5. Put technical logs/details in main message
3501
- 6. Write "Tagging @person for coordination" (just @person directly)
3502
- 7. Use phrases like "As per..." or "Please be advised..."
3503
- 8. Include full test execution timestamps in main message (just "Run: [ID]")
3504
-
3505
- **\u2705 Do:**
3506
- 1. Write like you're speaking to a teammate in person
3507
- 2. Front-load the impact/action needed
3508
- 3. Use threads liberally for any detail beyond basics
3509
- 4. Keep main message under 150 words (ideally 50-100)
3510
- 5. Make every word count\u2014edit ruthlessly
3511
- 6. Use natural language and contractions when appropriate
3512
- 7. Be specific about what you need from who
3513
-
3514
- ## Quality Checklist
3515
-
3516
- Before sending, verify:
3517
-
3518
- - [ ] Message type identified (report/question/blocker)
3519
- - [ ] Main message under 150 words
3520
- - [ ] Follows 3-sentence structure (what/why/next)
3521
- - [ ] Details moved to thread reply
3522
- - [ ] No meta-commentary about the message itself
3523
- - [ ] Conversational tone (no formal report language)
3524
- - [ ] Specific @mentions only if action needed
3525
- - [ ] Can be read and understood in <30 seconds
3526
-
3527
2717
  ## Context Discovery
3528
2718
 
3529
2719
  ${MEMORY_READ_INSTRUCTIONS.replace(/{ROLE}/g, "team-communicator")}
3530
2720
 
3531
- **Memory Sections for Team Communicator**:
3532
- - Conversation history and thread contexts
3533
- - Team communication preferences and patterns
3534
- - Question-response effectiveness tracking
3535
- - Team member expertise areas
3536
- - Successful communication strategies
3537
-
3538
- Additionally, always read:
3539
- 1. \`.bugzy/runtime/project-context.md\` (team info, SDLC, communication channels)
2721
+ **Key memory areas**: conversation history, team preferences, question-response effectiveness, team member expertise.
3540
2722
 
3541
- Use this context to:
3542
- - Identify correct Slack channel (from project-context.md)
3543
- - Learn team communication preferences (from memory)
3544
- - Tag appropriate team members (from project-context.md)
3545
- - Adapt tone to team culture (from memory patterns)
2723
+ Additionally, read \`.bugzy/runtime/project-context.md\` for team info, channels, and communication preferences.
3546
2724
 
3547
2725
  ${MEMORY_UPDATE_INSTRUCTIONS.replace(/{ROLE}/g, "team-communicator")}
3548
2726
 
3549
- Specifically for team-communicator, consider updating:
3550
- - **Conversation History**: Track thread contexts and ongoing conversations
3551
- - **Team Preferences**: Document communication patterns that work well
3552
- - **Response Patterns**: Note what types of messages get good team engagement
3553
- - **Team Member Expertise**: Record who provides good answers for what topics
2727
+ Update: conversation history, team preferences, response patterns, team member expertise.
3554
2728
 
3555
- ## Final Reminder
2729
+ ## Quality Checklist
3556
2730
 
3557
- You are not a formal report generator. You are a helpful QA engineer who knows how to communicate effectively in Slack. Every word should earn its place in the message. When in doubt, cut it out and put it in the thread.
2731
+ Before sending:
2732
+ - [ ] Main message under 150 words
2733
+ - [ ] 3-sentence structure (what/why/next)
2734
+ - [ ] Details in thread, not main message
2735
+ - [ ] Conversational tone (no formal report language)
2736
+ - [ ] Can be read in <30 seconds
3558
2737
 
3559
- **Target feeling:** "This is a real person who respects my time and communicates clearly."`;
2738
+ **You are a helpful QA engineer who respects your team's time. Every word should earn its place.**`;
3560
2739
 
3561
2740
  // src/subagents/templates/team-communicator/teams.ts
3562
2741
  var FRONTMATTER6 = {
@@ -6158,237 +5337,86 @@ var explorationProtocolStep = {
6158
5337
  category: "exploration",
6159
5338
  content: `## Exploratory Testing Protocol
6160
5339
 
6161
- Before creating or running formal tests, perform exploratory testing to validate requirements and understand actual system behavior. The depth of exploration should adapt to the clarity of requirements.
5340
+ Before creating or running formal tests, perform exploratory testing to validate requirements and understand actual system behavior.
6162
5341
 
6163
5342
  ### Assess Requirement Clarity
6164
5343
 
6165
- Determine exploration depth based on requirement quality:
6166
-
6167
- | Clarity | Indicators | Exploration Depth | Goal |
6168
- |---------|-----------|-------------------|------|
6169
- | **Clear** | Detailed acceptance criteria, screenshots/mockups, specific field names/URLs/roles, unambiguous behavior, consistent patterns | Quick (1-2 min) | Confirm feature exists, capture evidence |
6170
- | **Vague** | General direction clear but specifics missing, incomplete examples, assumed details, relative terms ("fix", "better") | Moderate (3-5 min) | Document current behavior, identify ambiguities, generate clarification questions |
6171
- | **Unclear** | Contradictory info, multiple interpretations, no examples/criteria, ambiguous scope ("the page"), critical details missing | Deep (5-10 min) | Systematically test scenarios, document patterns, identify all ambiguities, formulate comprehensive questions |
6172
-
6173
- **Examples:**
6174
- - **Clear:** "Change 'Submit' button from blue (#007BFF) to green (#28A745) on /auth/login. Verify hover effect."
6175
- - **Vague:** "Fix the sorting in todo list page. The items are mixed up for premium users."
6176
- - **Unclear:** "Improve the dashboard performance. Users say it's slow."
5344
+ | Clarity | Indicators | Exploration Depth |
5345
+ |---------|-----------|-------------------|
5346
+ | **Clear** | Detailed acceptance criteria, screenshots/mockups, specific field names/URLs | **Quick (1-2 min)** \u2014 confirm feature exists, capture evidence |
5347
+ | **Vague** | General direction clear but specifics missing, relative terms ("fix", "better") | **Moderate (3-5 min)** \u2014 document current behavior, identify ambiguities |
5348
+ | **Unclear** | Contradictory info, multiple interpretations, no criteria, ambiguous scope | **Deep (5-10 min)** \u2014 systematically test scenarios, document all ambiguities |
6177
5349
 
6178
5350
  ### Maturity Adjustment
6179
5351
 
6180
- If the Clarification Protocol determined project maturity, adjust exploration depth:
6181
-
6182
- - **New project**: Default one level deeper than requirement clarity suggests (Clear \u2192 Moderate, Vague \u2192 Deep)
6183
- - **Growing project**: Use requirement clarity as-is (standard protocol)
6184
- - **Mature project**: Trust knowledge base \u2014 can stay at suggested depth or go one level shallower if KB covers the feature
5352
+ If the Clarification Protocol determined project maturity:
5353
+ - **New project**: Default one level deeper (Clear \u2192 Moderate, Vague \u2192 Deep)
5354
+ - **Growing project**: Use requirement clarity as-is
5355
+ - **Mature project**: Can stay at suggested depth or go shallower if knowledge base covers the feature
6185
5356
 
6186
- **Always verify features exist before testing them.** If exploration reveals that a referenced page or feature does not exist in the application, apply the Clarification Protocol's "Execution Obstacle vs. Requirement Ambiguity" principle:
6187
- - If an authoritative trigger source (Jira issue, PR, team request) asserts the feature exists, this is likely an **execution obstacle** (missing credentials, feature flags, environment config) \u2014 proceed with test artifact creation and notify the team about the access issue. Do NOT BLOCK.
6188
- - If NO authoritative source claims the feature exists, this is **CRITICAL severity** \u2014 escalate via the Clarification Protocol regardless of maturity level. Do NOT silently adapt or work around the missing feature.
5357
+ **Always verify features exist before testing them.** If a referenced feature doesn't exist:
5358
+ - If an authoritative trigger (Jira, PR, team request) asserts it exists \u2192 **execution obstacle** (proceed with artifacts, notify team). Do NOT block.
5359
+ - If NO authoritative source claims it exists \u2192 **CRITICAL severity** \u2014 escalate via Clarification Protocol.
6189
5360
 
6190
5361
  ### Quick Exploration (1-2 min)
6191
5362
 
6192
5363
  **When:** Requirements CLEAR
6193
5364
 
6194
- **Steps:**
6195
- 1. Navigate to feature (use provided URL), verify loads without errors
5365
+ 1. Navigate to feature, verify it loads without errors
6196
5366
  2. Verify key elements exist (buttons, fields, sections mentioned)
6197
5367
  3. Capture screenshot of initial state
6198
- 4. Document:
6199
- \`\`\`markdown
6200
- **Quick Exploration (1 min)**
6201
- Feature: [Name] | URL: [Path]
6202
- Status: \u2705 Accessible / \u274C Not found / \u26A0\uFE0F Different
6203
- Screenshot: [filename]
6204
- Notes: [Immediate observations]
6205
- \`\`\`
6206
- 5. **Decision:** \u2705 Matches \u2192 Test creation | \u274C/\u26A0\uFE0F Doesn't match \u2192 Moderate Exploration
6207
-
6208
- **Time Limit:** 1-2 minutes
5368
+ 4. Document: feature name, URL, status (accessible/not found/different), notes
5369
+ 5. **Decision:** Matches \u2192 test creation | Doesn't match \u2192 Moderate Exploration
6209
5370
 
6210
5371
  ### Moderate Exploration (3-5 min)
6211
5372
 
6212
5373
  **When:** Requirements VAGUE or Quick Exploration revealed discrepancies
6213
5374
 
6214
- **Steps:**
6215
- 1. Navigate using appropriate role(s), set up preconditions, ensure clean state
5375
+ 1. Navigate using appropriate role(s), set up preconditions
6216
5376
  2. Test primary user flow, document steps and behavior, note unexpected behavior
6217
5377
  3. Capture before/after screenshots, document field values/ordering/visibility
6218
- 4. Compare to requirement: What matches? What differs? What's absent?
6219
- 5. Identify specific ambiguities:
6220
- \`\`\`markdown
6221
- **Moderate Exploration (4 min)**
6222
-
6223
- **Explored:** Role: [Admin], Path: [Steps], Behavior: [What happened]
6224
-
6225
- **Current State:** [Specific observations with examples]
6226
- - Example: "Admin view shows 8 sort options: By Title, By Due Date, By Priority..."
6227
-
6228
- **Requirement Says:** [What requirement expected]
6229
-
6230
- **Discrepancies:** [Specific differences]
6231
- - Example: "Premium users see 5 fewer sorting options than admins"
6232
-
6233
- **Ambiguities:**
6234
- 1. [First ambiguity with concrete example]
6235
- 2. [Second if applicable]
6236
-
6237
- **Clarification Needed:** [Specific questions]
6238
- \`\`\`
5378
+ 4. Compare to requirement: what matches, what differs, what's absent
5379
+ 5. Identify specific ambiguities with concrete examples
6239
5380
  6. Assess severity using Clarification Protocol
6240
- 7. **Decision:** \u{1F7E2} Minor \u2192 Proceed with assumptions | \u{1F7E1} Medium \u2192 Async clarification, proceed | \u{1F534} Critical \u2192 Stop, escalate
6241
-
6242
- **Time Limit:** 3-5 minutes
5381
+ 7. **Decision:** Minor ambiguity \u2192 proceed with assumptions | Critical \u2192 stop, escalate
6243
5382
 
6244
5383
  ### Deep Exploration (5-10 min)
6245
5384
 
6246
5385
  **When:** Requirements UNCLEAR or critical ambiguities found
6247
5386
 
6248
- **Steps:**
6249
- 1. **Define Exploration Matrix:** Identify dimensions (user roles, feature states, input variations, browsers)
6250
-
6251
- 2. **Systematic Testing:** Test each matrix cell methodically
6252
- \`\`\`
6253
- Example for "Todo List Sorting":
6254
- Matrix: User Roles \xD7 Feature Observations
6255
-
6256
- Test 1: Admin Role \u2192 Navigate, document sort options (count, names, order), screenshot
6257
- Test 2: Basic User Role \u2192 Same todo list, document options, screenshot
6258
- Test 3: Compare \u2192 Side-by-side table, identify missing/reordered options
6259
- \`\`\`
6260
-
6261
- 3. **Document Patterns:** Consistent behavior? Role-based differences? What varies vs constant?
6262
-
6263
- 4. **Comprehensive Report:**
6264
- \`\`\`markdown
6265
- **Deep Exploration (8 min)**
6266
-
6267
- **Matrix:** [Dimensions] | **Tests:** [X combinations]
6268
-
6269
- **Findings:**
6270
-
6271
- ### Test 1: Admin
6272
- - Setup: [Preconditions] | Steps: [Actions]
6273
- - Observations: Sort options=8, Options=[list], Ordering=[sequence]
6274
- - Screenshot: [filename-admin.png]
6275
-
6276
- ### Test 2: Basic User
6277
- - Setup: [Preconditions] | Steps: [Actions]
6278
- - Observations: Sort options=3, Missing vs Admin=[5 options], Ordering=[sequence]
6279
- - Screenshot: [filename-user.png]
6280
-
6281
- **Comparison Table:**
6282
- | Sort Option | Admin Pos | User Pos | Notes |
6283
- |-------------|-----------|----------|-------|
6284
- | By Title | 1 | 1 | Match |
6285
- | By Priority | 3 | Not visible | Missing |
6286
-
6287
- **Patterns:**
6288
- - Role-based feature visibility
6289
- - Consistent relative ordering for visible fields
6290
-
6291
- **Critical Ambiguities:**
6292
- 1. Option Visibility: Intentional basic users see 5 fewer sort options?
6293
- 2. Sort Definition: (A) All roles see all options in same order, OR (B) Roles see permitted options in same relative order?
6294
-
6295
- **Clarification Questions:** [Specific, concrete based on findings]
6296
- \`\`\`
6297
-
6298
- 5. **Next Action:** Critical ambiguities \u2192 STOP, clarify | Patterns suggest answer \u2192 Validate assumption | Behavior clear \u2192 Test creation
6299
-
6300
- **Time Limit:** 5-10 minutes
6301
-
6302
- ### Link Exploration to Clarification
6303
-
6304
- **Flow:** Requirement Analysis \u2192 Exploration \u2192 Clarification
6305
-
6306
- 1. Requirement analysis detects vague language \u2192 Triggers exploration
6307
- 2. Exploration documents current behavior \u2192 Identifies discrepancies
6308
- 3. Clarification uses findings \u2192 Asks specific questions referencing observations
6309
-
6310
- **Example:**
6311
- \`\`\`
6312
- "Fix the sorting in todo list"
6313
- \u2193 Ambiguity: "sorting" = by date, priority, or completion status?
6314
- \u2193 Moderate Exploration: Admin=8 sort options, User=3 sort options
6315
- \u2193 Question: "Should basic users see all 8 sort options (bug) or only 3 with consistent sequence (correct)?"
6316
- \`\`\`
5387
+ 1. **Define exploration matrix:** dimensions (user roles, feature states, input variations)
5388
+ 2. **Systematic testing:** test each matrix cell methodically, document observations
5389
+ 3. **Document patterns:** consistent behavior, role-based differences, what varies vs constant
5390
+ 4. **Comprehensive report:** findings per test, comparison table, identified patterns, critical ambiguities
5391
+ 5. **Next action:** Critical ambiguities \u2192 STOP, clarify | Patterns suggest answer \u2192 validate assumption | Behavior clear \u2192 test creation
6317
5392
 
6318
5393
  ### Document Exploration Results
6319
5394
 
6320
- **Template:**
6321
- \`\`\`markdown
6322
- ## Exploration Summary
6323
-
6324
- **Date:** [YYYY-MM-DD] | **Explorer:** [Agent/User] | **Depth:** [Quick/Moderate/Deep] | **Duration:** [X min]
6325
-
6326
- ### Feature: [Name and description]
6327
-
6328
- ### Observations: [Key findings]
6329
-
6330
- ### Current Behavior: [What feature does today]
6331
-
6332
- ### Discrepancies: [Requirement vs observation differences]
6333
-
6334
- ### Assumptions Made: [If proceeding with assumptions]
5395
+ Save exploration findings as a report including:
5396
+ - Date, depth, duration
5397
+ - Feature observations and current behavior
5398
+ - Discrepancies between requirements and observations
5399
+ - Assumptions made (if proceeding)
5400
+ - Artifacts: screenshots, videos, notes
6335
5401
 
6336
- ### Artifacts: Screenshots: [list], Video: [if captured], Notes: [detailed]
6337
- \`\`\`
6338
-
6339
- **Memory Storage:** Feature behavior patterns, common ambiguity types, resolution approaches
6340
-
6341
- ### Integration with Test Creation
6342
-
6343
- **Quick Exploration \u2192 Direct Test:**
6344
- - Feature verified \u2192 Create test matching requirement \u2192 Reference screenshot
6345
-
6346
- **Moderate Exploration \u2192 Assumption-Based Test:**
6347
- - Document behavior \u2192 Create test on best interpretation \u2192 Mark assumptions \u2192 Plan updates after clarification
6348
-
6349
- **Deep Exploration \u2192 Clarification-First:**
6350
- - Block test creation until clarification \u2192 Use exploration as basis for questions \u2192 Create test after answer \u2192 Reference both exploration and clarification
6351
-
6352
- ---
6353
-
6354
- ## Adaptive Exploration Decision Tree
5402
+ ### Decision Tree
6355
5403
 
6356
5404
  \`\`\`
6357
- Start: Requirement Received
6358
- \u2193
6359
- Are requirements clear with specifics?
6360
- \u251C\u2500 YES \u2192 Quick Exploration (1-2 min)
6361
- \u2502 \u2193
6362
- \u2502 Does feature match description?
6363
- \u2502 \u251C\u2500 YES \u2192 Proceed to Test Creation
6364
- \u2502 \u2514\u2500 NO \u2192 Escalate to Moderate Exploration
6365
- \u2502
6366
- \u2514\u2500 NO \u2192 Is general direction clear but details missing?
6367
- \u251C\u2500 YES \u2192 Moderate Exploration (3-5 min)
6368
- \u2502 \u2193
6369
- \u2502 Are ambiguities MEDIUM severity or lower?
6370
- \u2502 \u251C\u2500 YES \u2192 Document assumptions, proceed with test creation
6371
- \u2502 \u2514\u2500 NO \u2192 Escalate to Deep Exploration or Clarification
6372
- \u2502
6373
- \u2514\u2500 NO \u2192 Deep Exploration (5-10 min)
6374
- \u2193
6375
- Document comprehensive findings
6376
- \u2193
6377
- Assess ambiguity severity
6378
- \u2193
6379
- Seek clarification for CRITICAL/HIGH
5405
+ Requirements clear? \u2192 YES \u2192 Quick Exploration \u2192 Matches? \u2192 YES \u2192 Test Creation
5406
+ \u2192 NO \u2192 Moderate Exploration
5407
+ \u2192 NO \u2192 Direction clear? \u2192 YES \u2192 Moderate Exploration \u2192 Ambiguity \u2264 MEDIUM? \u2192 YES \u2192 Proceed with assumptions
5408
+ \u2192 NO \u2192 Deep Exploration / Clarify
5409
+ \u2192 NO \u2192 Deep Exploration \u2192 Document findings \u2192 Clarify CRITICAL/HIGH
6380
5410
  \`\`\`
6381
5411
 
6382
5412
  ---
6383
5413
 
6384
5414
  ## Remember
6385
5415
 
6386
- - **Explore before assuming** - Validate requirements against actual behavior
6387
- - **Concrete observations > abstract interpretation** - Document specific findings
6388
- - **Adaptive depth: time \u221D uncertainty** - Match exploration effort to requirement clarity
6389
- - **Exploration findings \u2192 specific clarifications** - Use observations to formulate questions
6390
- - **Always document** - Create artifacts for future reference
6391
- - **Link exploration \u2192 ambiguity \u2192 clarification** - Connect the workflow`,
5416
+ - **Explore before assuming** \u2014 validate requirements against actual behavior
5417
+ - **Concrete observations > abstract interpretation** \u2014 document specific findings
5418
+ - **Adaptive depth** \u2014 match exploration effort to requirement clarity
5419
+ - **Always document** \u2014 create artifacts for future reference`,
6392
5420
  tags: ["exploration", "protocol", "adaptive"]
6393
5421
  };
6394
5422
 
@@ -6400,277 +5428,138 @@ var clarificationProtocolStep = {
6400
5428
  invokesSubagents: ["team-communicator"],
6401
5429
  content: `## Clarification Protocol
6402
5430
 
6403
- Before proceeding with test creation or execution, ensure requirements are clear and testable. Use this protocol to detect ambiguity, assess its severity, and determine the appropriate action.
5431
+ Before proceeding with test creation or execution, ensure requirements are clear and testable.
6404
5432
 
6405
5433
  ### Check for Pending Clarification
6406
5434
 
6407
- Before starting, check if this task is resuming from a blocked clarification:
6408
-
6409
- 1. **Check $ARGUMENTS for clarification data:**
6410
- - If \`$ARGUMENTS.clarification\` exists, this task is resuming with a clarification response
6411
- - Extract: \`clarification\` (the user's answer), \`originalArgs\` (original task parameters)
6412
-
6413
- 2. **If clarification is present:**
6414
- - Read \`.bugzy/runtime/blocked-task-queue.md\`
6415
- - Find and remove your task's entry from the queue (update the file)
6416
- - Proceed using the clarification as if user just provided the answer
6417
- - Skip ambiguity detection for the clarified aspect
6418
-
6419
- 3. **If no clarification in $ARGUMENTS:** Proceed normally with ambiguity detection below.
5435
+ 1. If \`$ARGUMENTS.clarification\` exists, this task is resuming with a clarification response:
5436
+ - Extract \`clarification\` (the user's answer) and \`originalArgs\` (original task parameters)
5437
+ - Read \`.bugzy/runtime/blocked-task-queue.md\`, find and remove your task's entry
5438
+ - Proceed using the clarification, skip ambiguity detection for the clarified aspect
5439
+ 2. If no clarification in $ARGUMENTS: Proceed normally with ambiguity detection below.
6420
5440
 
6421
5441
  ### Assess Project Maturity
6422
5442
 
6423
- Before detecting ambiguity, assess how well you know this project. Maturity determines how aggressively you should ask questions \u2014 new projects require more questions, mature projects can rely on accumulated knowledge.
5443
+ Maturity determines how aggressively you should ask questions.
6424
5444
 
6425
- **Measure maturity from runtime artifacts:**
5445
+ **Measure from runtime artifacts:**
6426
5446
 
6427
5447
  | Signal | New | Growing | Mature |
6428
5448
  |--------|-----|---------|--------|
6429
- | \`knowledge-base.md\` | < 80 lines (template) | 80-300 lines | 300+ lines |
6430
- | \`memory/\` files | 0 files | 1-3 files | 4+ files, >5KB each |
5449
+ | \`knowledge-base.md\` | < 80 lines | 80-300 lines | 300+ lines |
5450
+ | \`memory/\` files | 0 | 1-3 | 4+ files, >5KB each |
6431
5451
  | Test cases in \`test-cases/\` | 0 | 1-6 | 7+ |
6432
5452
  | Exploration reports | 0 | 1 | 2+ |
6433
5453
 
6434
- **Steps:**
6435
- 1. Read \`.bugzy/runtime/knowledge-base.md\` and count lines
6436
- 2. List \`.bugzy/runtime/memory/\` directory and count files
6437
- 3. List \`test-cases/\` directory and count \`.md\` files (exclude README)
6438
- 4. Count exploration reports in \`exploration-reports/\`
6439
- 5. Classify: If majority of signals = New \u2192 **New**; majority Mature \u2192 **Mature**; otherwise \u2192 **Growing**
5454
+ Check these signals and classify: majority New \u2192 **New**; majority Mature \u2192 **Mature**; otherwise \u2192 **Growing**.
6440
5455
 
6441
5456
  **Maturity adjusts your question threshold:**
6442
- - **New**: Ask for CRITICAL + HIGH + MEDIUM severity (gather information aggressively)
6443
- - **Growing**: Ask for CRITICAL + HIGH severity (standard protocol)
6444
- - **Mature**: Ask for CRITICAL only (handle HIGH with documented assumptions)
6445
-
6446
- **CRITICAL severity ALWAYS triggers a question, regardless of maturity level.**
5457
+ - **New**: STOP for CRITICAL + HIGH + MEDIUM
5458
+ - **Growing**: STOP for CRITICAL + HIGH (default)
5459
+ - **Mature**: STOP for CRITICAL only; handle HIGH with documented assumptions
6447
5460
 
6448
5461
  ### Detect Ambiguity
6449
5462
 
6450
- Scan for ambiguity signals:
6451
-
6452
- **Language:** Vague terms ("fix", "improve", "better", "like", "mixed up"), relative terms without reference ("faster", "more"), undefined scope ("the ordering", "the fields", "the page"), modal ambiguity ("should", "could" vs "must", "will")
6453
-
6454
- **Details:** Missing acceptance criteria (no clear PASS/FAIL), no examples/mockups, incomplete field/element lists, unclear role behavior differences, unspecified error scenarios
6455
-
6456
- **Interpretation:** Multiple valid interpretations, contradictory information (description vs comments), implied vs explicit requirements
5463
+ Scan for these signals:
5464
+ - **Language**: Vague terms ("fix", "improve"), relative terms without reference, undefined scope, modal ambiguity
5465
+ - **Details**: Missing acceptance criteria, no examples, incomplete element lists, unspecified error scenarios
5466
+ - **Interpretation**: Multiple valid interpretations, contradictory information, implied vs explicit requirements
5467
+ - **Context**: No reference documentation, assumes knowledge
6457
5468
 
6458
- **Context:** No reference documentation, "RELEASE APPROVED" without criteria, quick ticket creation, assumes knowledge ("as you know...", "obviously...")
6459
-
6460
- **Quick Check:**
6461
- - [ ] Success criteria explicitly defined? (PASS if X, FAIL if Y)
6462
- - [ ] All affected elements specifically listed? (field names, URLs, roles)
6463
- - [ ] Only ONE reasonable interpretation?
6464
- - [ ] Examples, screenshots, or mockups provided?
6465
- - [ ] Consistent with existing system patterns?
6466
- - [ ] Can write test assertions without assumptions?
5469
+ **Quick Check** \u2014 can you write test assertions without assumptions? Is there only ONE reasonable interpretation?
6467
5470
 
6468
5471
  ### Assess Severity
6469
5472
 
6470
- If ambiguity is detected, assess its severity:
6471
-
6472
- | Severity | Characteristics | Examples | Action |
6473
- |----------|----------------|----------|--------|
6474
- | **CRITICAL** | Expected behavior undefined/contradictory; test outcome unpredictable; core functionality unclear; success criteria missing; multiple interpretations = different strategies; **referenced page/feature confirmed absent after browser verification AND no authoritative trigger source (Jira, PR, team request) asserts the feature exists** | "Fix the issue" (what issue?), "Improve performance" (which metrics?), "Fix sorting in todo list" (by date? priority? completion status?), "Test the Settings page" (browsed app \u2014 no Settings page exists, and no Jira/PR claims it was built) | **STOP** - You MUST ask via team-communicator before proceeding |
6475
- | **HIGH** | Core underspecified but direction clear; affects majority of scenarios; vague success criteria; assumptions risky | "Fix ordering" (sequence OR visibility?), "Add validation" (what? messages?), "Update dashboard" (which widgets?) | **STOP** - You MUST ask via team-communicator before proceeding |
6476
- | **MEDIUM** | Specific details missing; general requirements clear; affects subset of cases; reasonable low-risk assumptions possible; wrong assumption = test updates not strategy overhaul | Missing field labels, unclear error message text, undefined timeouts, button placement not specified, date formats unclear | **PROCEED** - (1) Moderate exploration, (2) Document assumptions: "Assuming X because Y", (3) Proceed with creation/execution, (4) Async clarification (team-communicator), (5) Mark [ASSUMED: description] |
6477
- | **LOW** | Minor edge cases; documentation gaps don't affect execution; optional/cosmetic elements; minimal impact | Tooltip text, optional field validation, icon choice, placeholder text, tab order | **PROCEED** - (1) Mark [TO BE CLARIFIED: description], (2) Proceed, (3) Mention in report "Minor Details", (4) No blocking/async clarification |
5473
+ | Severity | Characteristics | Action |
5474
+ |----------|----------------|--------|
5475
+ | **CRITICAL** | Expected behavior undefined/contradictory; core functionality unclear; success criteria missing; multiple interpretations = different strategies; page/feature confirmed absent with no authoritative trigger claiming it exists | **STOP** \u2014 ask via team-communicator |
5476
+ | **HIGH** | Core underspecified but direction clear; affects majority of scenarios; assumptions risky | **STOP** \u2014 ask via team-communicator |
5477
+ | **MEDIUM** | Specific details missing; general requirements clear; reasonable low-risk assumptions possible | **PROCEED** \u2014 moderate exploration, document assumptions [ASSUMED: X], async clarification |
5478
+ | **LOW** | Minor edge cases; documentation gaps don't affect execution | **PROCEED** \u2014 mark [TO BE CLARIFIED: X], mention in report |
6478
5479
 
6479
5480
  ### Execution Obstacle vs. Requirement Ambiguity
6480
5481
 
6481
- Before classifying something as CRITICAL, distinguish between these two fundamentally different situations:
6482
-
6483
- **Requirement Ambiguity** = *What* to test is unclear \u2192 severity assessment applies normally
6484
- - No authoritative source describes the feature
6485
- - The task description is vague or contradictory
6486
- - You cannot determine what "correct" behavior looks like
6487
- - \u2192 Apply severity table above. CRITICAL/HIGH \u2192 BLOCK.
6488
-
6489
- **Execution Obstacle** = *What* to test is clear, but *how* to access/verify has obstacles \u2192 NEVER BLOCK
6490
- - An authoritative trigger source (Jira issue, PR, team message) asserts the feature exists
6491
- - You browsed the app but couldn't find/access the feature
6492
- - The obstacle is likely: wrong user role/tier, missing test data, feature flags, environment config
6493
- - \u2192 PROCEED with artifact creation (test cases, test specs). Notify team about the obstacle.
5482
+ Before classifying something as CRITICAL, distinguish:
6494
5483
 
6495
- **The key test:** Does an authoritative trigger source (Jira, PR, team request) assert the feature exists?
6496
- - **YES** \u2192 It's an execution obstacle. The feature exists but you can't access it. Proceed: create test artifacts, add placeholder env vars, notify team about access issues.
6497
- - **NO** \u2192 It may genuinely not exist. Apply CRITICAL severity, ask what was meant.
5484
+ **Requirement Ambiguity** = *What* to test is unclear \u2192 severity assessment applies normally.
6498
5485
 
6499
- | Scenario | Trigger Says | Browser Shows | Classification | Action |
6500
- |----------|-------------|---------------|----------------|--------|
6501
- | Jira says "test premium dashboard", you log in as test_user and don't see it | Feature exists | Can't access | **Execution obstacle** | Create tests, notify team re: missing premium credentials |
6502
- | PR says "verify new settings page", you browse and find no settings page | Feature exists | Can't find | **Execution obstacle** | Create tests, notify team re: possible feature flag/env issue |
6503
- | Manual request "test the settings page", no Jira/PR, you browse and find no settings page | No source claims it | Can't find | **Requirement ambiguity (CRITICAL)** | BLOCK, ask what was meant |
6504
- | Jira says "fix sorting", but doesn't specify sort criteria | Feature exists | Feature exists | **Requirement ambiguity (HIGH)** | BLOCK, ask which sort criteria |
5486
+ **Execution Obstacle** = *What* to test is clear, but *how* to access/verify has obstacles \u2192 NEVER BLOCK.
5487
+ - An authoritative trigger source (Jira, PR, team message) asserts the feature exists
5488
+ - You browsed but couldn't find/access it (likely: wrong role, missing test data, feature flags, env config)
5489
+ - \u2192 PROCEED with artifact creation. Notify team about the obstacle.
6505
5490
 
6506
- **Partial Feature Existence \u2014 URL found but requested functionality absent:**
5491
+ **The key test:** Does an authoritative trigger source assert the feature exists?
5492
+ - **YES** \u2192 Execution obstacle. Proceed, create test artifacts, notify team about access issues.
5493
+ - **NO** \u2192 May genuinely not exist. Apply CRITICAL severity, ask.
6507
5494
 
6508
- A common edge case: a page/route loads successfully, but the SPECIFIC FUNCTIONALITY you were asked to test doesn't exist on it.
5495
+ **Important:** A page loading is NOT the same as the requested functionality existing on it. Evaluate whether the REQUESTED FUNCTIONALITY exists, not just whether a URL resolves. If the page loads but requested features are absent and no authoritative source claims they were built \u2192 CRITICAL ambiguity.
6509
5496
 
6510
- **Rule:** Evaluate whether the REQUESTED FUNCTIONALITY exists, not just whether a URL resolves.
6511
-
6512
- | Page Exists | Requested Features Exist | Authoritative Trigger | Classification |
6513
- |-------------|--------------------------|----------------------|----------------|
6514
- | Yes | Yes | Any | Proceed normally |
6515
- | Yes | No | Yes (Jira/PR says features built) | Execution obstacle \u2014 features behind flag/env |
6516
- | Yes | No | No (manual request only) | **Requirement ambiguity (CRITICAL)** \u2014 ask what's expected |
6517
- | No | N/A | Yes | Execution obstacle \u2014 page not deployed yet |
6518
- | No | N/A | No | **Requirement ambiguity (CRITICAL)** \u2014 ask what was meant |
6519
-
6520
- **Example:** Prompt says "Test the checkout payment form with credit card 4111..." You browse to /checkout and find an information form (first name, last name, postal code) but NO payment form, NO shipping options, NO Place Order button. No Jira/PR claims these features exist. \u2192 **CRITICAL requirement ambiguity.** Ask: "I found a checkout information form at /checkout but no payment form or shipping options. Can you clarify what checkout features you'd like tested?"
6521
-
6522
- **Key insight:** Finding a URL is not the same as finding the requested functionality. Do NOT classify this as an "execution obstacle" just because the page loads.
5497
+ | Scenario | Trigger Claims Feature | Browser Shows | Classification |
5498
+ |----------|----------------------|---------------|----------------|
5499
+ | Jira says "test premium dashboard", can't see it | Yes | Can't access | Execution obstacle \u2014 proceed |
5500
+ | PR says "verify settings page", no settings page | Yes | Can't find | Execution obstacle \u2014 proceed |
5501
+ | Manual request "test settings", no Jira/PR | No | Can't find | CRITICAL ambiguity \u2014 ask |
5502
+ | Jira says "fix sorting", no sort criteria | Yes | Feature exists | HIGH ambiguity \u2014 ask |
6523
5503
 
6524
5504
  ### Check Memory for Similar Clarifications
6525
5505
 
6526
- Before asking, check if similar question was answered:
6527
-
6528
- **Process:**
6529
- 1. **Query team-communicator memory** - Search by feature name, ambiguity pattern, ticket keywords
6530
- 2. **Review past Q&A** - Similar question asked? What was answer? Applicable now?
6531
- 3. **Assess reusability:**
6532
- - Directly applicable \u2192 Use answer, no re-ask
6533
- - Partially applicable \u2192 Adapt and reference ("Previously for X, clarified Y. Same here?")
6534
- - Not applicable \u2192 Ask as new
6535
- 4. **Update memory** - Store Q&A with task type, feature, pattern tags
6536
-
6537
- **Example:** Query "todo sorting priority" \u2192 Found 2025-01-15: "Should completed todos appear in main list?" \u2192 Answer: "No, move to separate archive view" \u2192 Directly applicable \u2192 Use, no re-ask needed
5506
+ Before asking, search memory by feature name, ambiguity pattern, and ticket keywords. If a directly applicable past answer exists, use it without re-asking. If partially applicable, adapt and reference.
6538
5507
 
6539
5508
  ### Formulate Clarification Questions
6540
5509
 
6541
- If clarification needed (CRITICAL/HIGH severity), formulate specific, concrete questions:
6542
-
6543
- **Good Questions:** Specific and concrete, provide context, offer options, reference examples, tie to test strategy
6544
-
6545
- **Bad Questions:** Too vague/broad, assumptive, multiple questions in one, no context
5510
+ If clarification needed (CRITICAL/HIGH), formulate specific, concrete questions:
6546
5511
 
6547
- **Template:**
6548
5512
  \`\`\`
6549
5513
  **Context:** [Current understanding]
6550
5514
  **Ambiguity:** [Specific unclear aspect]
6551
5515
  **Question:** [Specific question with options]
6552
5516
  **Why Important:** [Testing strategy impact]
6553
-
6554
- Example:
6555
- Context: TODO-456 "Fix the sorting in the todo list so items appear in the right order"
6556
- Ambiguity: "sorting" = (A) by creation date, (B) by due date, (C) by priority level, or (D) custom user-defined order
6557
- Question: Should todos be sorted by due date (soonest first) or priority (high to low)? Should completed items appear in the list or move to archive?
6558
- Why Important: Different sort criteria require different test assertions. Current app shows 15 active todos + 8 completed in mixed order.
6559
5517
  \`\`\`
6560
5518
 
6561
5519
  ### Communicate Clarification Request
6562
5520
 
6563
- **For Slack-Triggered Tasks:** {{INVOKE_TEAM_COMMUNICATOR}} to ask in thread:
6564
- \`\`\`
6565
- Ask clarification in Slack thread:
6566
- Context: [From ticket/description]
6567
- Ambiguity: [Describe ambiguity]
6568
- Severity: [CRITICAL/HIGH]
6569
- Questions:
6570
- 1. [First specific question]
6571
- 2. [Second if needed]
6572
-
6573
- Clarification needed to proceed. I'll wait for response before testing.
6574
- \`\`\`
5521
+ **For Slack-Triggered Tasks:** {{INVOKE_TEAM_COMMUNICATOR}} to ask in thread with context, ambiguity description, severity, and specific questions.
6575
5522
 
6576
- **For Manual/API Triggers:** Include in task output:
6577
- \`\`\`markdown
6578
- ## Clarification Required Before Testing
6579
-
6580
- **Ambiguity:** [Description]
6581
- **Severity:** [CRITICAL/HIGH]
6582
-
6583
- ### Questions:
6584
- 1. **Question:** [First question]
6585
- - Context: [Provide context]
6586
- - Options: [If applicable]
6587
- - Impact: [Testing impact]
6588
-
6589
- **Action Required:** Provide clarification. Testing cannot proceed.
6590
- **Current Observation:** [What exploration revealed - concrete examples]
6591
- \`\`\`
5523
+ **For Manual/API Triggers:** Include a "Clarification Required Before Testing" section in task output with ambiguity, severity, questions with context/options/impact, and current observations.
6592
5524
 
6593
5525
  ### Register Blocked Task (CRITICAL/HIGH only)
6594
5526
 
6595
- When asking a CRITICAL or HIGH severity question that blocks progress, register the task in the blocked queue so it can be automatically re-triggered when clarification arrives.
6596
-
6597
- **Update \`.bugzy/runtime/blocked-task-queue.md\`:**
6598
-
6599
- 1. Read the current file (create if doesn't exist)
6600
- 2. Add a new row to the Queue table
5527
+ When blocked, register in \`.bugzy/runtime/blocked-task-queue.md\`:
6601
5528
 
6602
5529
  \`\`\`markdown
6603
- # Blocked Task Queue
6604
-
6605
- Tasks waiting for clarification responses.
6606
-
6607
5530
  | Task Slug | Question | Original Args |
6608
5531
  |-----------|----------|---------------|
6609
5532
  | generate-test-plan | Should todos be sorted by date or priority? | \`{"ticketId": "TODO-456"}\` |
6610
5533
  \`\`\`
6611
5534
 
6612
- **Entry Fields:**
6613
- - **Task Slug**: The task slug (e.g., \`generate-test-plan\`) - used for re-triggering
6614
- - **Question**: The clarification question asked (so LLM can match responses)
6615
- - **Original Args**: JSON-serialized \`$ARGUMENTS\` wrapped in backticks
6616
-
6617
- **Purpose**: The LLM processor reads this file and matches user responses to pending questions. When a match is found, it re-queues the task with the clarification.
5535
+ The LLM processor reads this file and matches user responses to pending questions, then re-queues the task with the clarification.
6618
5536
 
6619
5537
  ### Wait or Proceed Based on Severity
6620
5538
 
6621
- **Use your maturity assessment to adjust thresholds:**
6622
- - **New project**: STOP for CRITICAL + HIGH + MEDIUM
6623
- - **Growing project**: STOP for CRITICAL + HIGH (default)
6624
- - **Mature project**: STOP for CRITICAL only; handle HIGH with documented assumptions
6625
-
6626
5539
  **When severity meets your STOP threshold:**
6627
- - You MUST call team-communicator (Slack) to ask the question \u2014 do NOT just mention it in your text output
5540
+ - You MUST call team-communicator to ask \u2014 do NOT just mention it in text output
6628
5541
  - Do NOT create tests, run tests, or make assumptions about the unclear aspect
6629
- - Do NOT silently adapt by working around the issue (e.g., running other tests instead)
5542
+ - Do NOT silently adapt by working around the issue
6630
5543
  - Do NOT invent your own success criteria when none are provided
6631
- - Register the blocked task and wait for clarification
6632
- - *Rationale: Wrong assumptions = incorrect tests, false results, wasted time*
5544
+ - Register the blocked task and wait
6633
5545
 
6634
- **When severity is below your STOP threshold \u2192 Proceed with Documented Assumptions:**
6635
- - Perform moderate exploration, document assumptions, proceed with creation/execution
6636
- - Ask clarification async (team-communicator), mark results "based on assumptions"
6637
- - Update tests after clarification received
6638
- - *Rationale: Waiting blocks progress; documented assumptions allow forward movement with later corrections*
6639
-
6640
- **LOW \u2192 Always Proceed and Mark:**
6641
- - Proceed with creation/execution, mark gaps [TO BE CLARIFIED] or [ASSUMED]
6642
- - Mention in report but don't prioritize, no blocking
6643
- - *Rationale: Details don't affect strategy/results significantly*
5546
+ **When severity is below your STOP threshold:**
5547
+ - Perform moderate exploration, document assumptions, proceed
5548
+ - Ask clarification async, mark results "based on assumptions"
6644
5549
 
6645
5550
  ### Document Clarification in Results
6646
5551
 
6647
- When reporting test results, always include an "Ambiguities" section if clarification occurred:
6648
-
6649
- \`\`\`markdown
6650
- ## Ambiguities Encountered
6651
-
6652
- ### Clarification: [Topic]
6653
- - **Severity:** [CRITICAL/HIGH/MEDIUM/LOW]
6654
- - **Question Asked:** [What was asked]
6655
- - **Response:** [Answer received, or "Awaiting response"]
6656
- - **Impact:** [How this affected testing]
6657
- - **Assumption Made:** [If proceeded with assumption]
6658
- - **Risk:** [What could be wrong if assumption is incorrect]
6659
-
6660
- ### Resolution:
6661
- [How the clarification was resolved and incorporated into testing]
6662
- \`\`\`
5552
+ Include an "Ambiguities Encountered" section in results when clarification occurred, noting severity, question asked, response (or "Awaiting"), impact, assumptions made, and risk.
6663
5553
 
6664
5554
  ---
6665
5555
 
6666
5556
  ## Remember
6667
5557
 
6668
- - **STOP means STOP** - When you hit a STOP threshold, you MUST call team-communicator to ask via Slack. Do NOT silently adapt, skip, or work around the issue
6669
- - **Non-existent features \u2014 check context first** - If a page/feature doesn't exist in the browser, check whether an authoritative trigger (Jira, PR, team request) asserts it exists. If YES \u2192 execution obstacle (proceed with artifact creation, notify team). If NO authoritative source claims it exists \u2192 CRITICAL severity, ask what was meant
6670
- - **Ask correctly > guess poorly** - Specific questions lead to specific answers
6671
- - **Never invent success criteria** - If the task says "improve" or "fix" without metrics, ask what "done" looks like
6672
- - **Check memory first** - Avoid re-asking previously answered questions
6673
- - **Maturity adjusts threshold, not judgment** - Even in mature projects, CRITICAL always triggers a question`,
5558
+ - **STOP means STOP** \u2014 When you hit a STOP threshold, you MUST call team-communicator. Do NOT silently adapt or work around the issue
5559
+ - **Non-existent features \u2014 check context first** \u2014 If a feature doesn't exist in browser, check whether an authoritative trigger asserts it exists. YES \u2192 execution obstacle (proceed). NO \u2192 CRITICAL severity, ask.
5560
+ - **Never invent success criteria** \u2014 If the task says "improve" or "fix" without metrics, ask what "done" looks like
5561
+ - **Check memory first** \u2014 Avoid re-asking previously answered questions
5562
+ - **Maturity adjusts threshold, not judgment** \u2014 CRITICAL always triggers a question`,
6674
5563
  tags: ["clarification", "protocol", "ambiguity"]
6675
5564
  };
6676
5565
 
@@ -6859,6 +5748,10 @@ The agent will:
6859
5748
  4. Apply appropriate fix pattern from \`./tests/CLAUDE.md\`
6860
5749
  5. Rerun the test
6861
5750
  6. The custom reporter will automatically create the next exec-N/ folder
5751
+ 6b. If no custom reporter (BYOT mode \u2014 check for \`reporters/bugzy-reporter.ts\`):
5752
+ Run the parse script to update the manifest with re-run results:
5753
+ \`npx tsx reporters/parse-results.ts --input <re-run-output> --timestamp <current> --test-id <testCaseId>\`
5754
+ This creates exec-N+1/ and updates the manifest.
6862
5755
  7. Repeat up to 3 times if needed (exec-1, exec-2, exec-3)
6863
5756
  8. Report success or escalate as likely product bug
6864
5757
 
@@ -7050,6 +5943,87 @@ ls -t test-runs/ | head -1
7050
5943
  tags: ["execution", "exploration"]
7051
5944
  };
7052
5945
 
5946
+ // src/tasks/steps/execution/normalize-test-results.ts
5947
+ var normalizeTestResultsStep = {
5948
+ id: "normalize-test-results",
5949
+ title: "Normalize Test Results",
5950
+ category: "execution",
5951
+ content: `## Normalize Test Results
5952
+
5953
+ Convert test results into the standard Bugzy \`test-runs/\` manifest format. This step handles both external CI results (via webhook) and local BYOT test output. In managed mode (bugzy-reporter already created the manifest), this step is skipped.
5954
+
5955
+ ### 1. Check for Existing Manifest
5956
+
5957
+ Look for a \`test-runs/*/manifest.json\` from the most recent run. If a manifest already exists from the bugzy-reporter (managed mode), **skip this step entirely** \u2014 the results are already normalized.
5958
+
5959
+ ### 2. Determine Input Source
5960
+
5961
+ Check how test results are available:
5962
+
5963
+ **From event payload** (external CI \u2014 \`$ARGUMENTS\` contains event data):
5964
+ - \`data.results_url\` \u2014 URL to download results from (the parse script handles the download)
5965
+ - \`data.results\` \u2014 inline results (write to a temp file first: \`/tmp/bugzy-results-<random>.json\`)
5966
+
5967
+ **From local test run** (agent executed BYOT tests):
5968
+ - Read \`./tests/CLAUDE.md\` for the native test output location
5969
+ - Find the most recent test output file
5970
+
5971
+ ### 3. Locate and Run Parse Script
5972
+
5973
+ Look for the parse script at \`reporters/parse-results.ts\`.
5974
+
5975
+ **If the parse script exists:**
5976
+ \`\`\`bash
5977
+ npx tsx reporters/parse-results.ts --input <source>
5978
+ \`\`\`
5979
+ Where \`<source>\` is the file path, temp file path, or URL determined in step 2.
5980
+
5981
+ **If the parse script is missing** (fallback for robustness):
5982
+ Create the manifest inline using the same approach \u2014 parse the results format by inspecting the data structure:
5983
+ - JSON with \`suites\` or \`specs\` arrays: Likely Playwright JSON report
5984
+ - XML with \`<testsuites>\` or \`<testsuite>\` root: JUnit XML format
5985
+ - JSON with \`results\` array and \`stats\` object: Likely Cypress/Mocha JSON
5986
+ - Other: Inspect structure and adapt
5987
+
5988
+ Then create:
5989
+ 1. \`test-runs/{timestamp}/manifest.json\` with the standard Bugzy schema
5990
+ 2. \`test-runs/{timestamp}/{testCaseId}/exec-1/result.json\` for each failed test
5991
+
5992
+ Save the inline parse logic to \`reporters/parse-results.ts\` for future reuse.
5993
+
5994
+ ### 4. Verify Manifest
5995
+
5996
+ Confirm \`manifest.json\` was created:
5997
+ - Read the manifest and validate the structure
5998
+ - Check that \`stats\` counts match the \`testCases\` array
5999
+
6000
+ ### 5. Generate Summary
6001
+
6002
+ Read the manifest and produce a summary:
6003
+
6004
+ \`\`\`markdown
6005
+ ## Test Results Summary
6006
+
6007
+ - Total Tests: [count]
6008
+ - Passed: [count] ([percentage]%)
6009
+ - Failed: [count] ([percentage]%)
6010
+ - Skipped: [count] ([percentage]%)
6011
+ - Duration: [time if available]
6012
+ \`\`\`
6013
+
6014
+ ### 6. Include CI Metadata (if from event payload)
6015
+
6016
+ If the results came from an external CI event (\`$ARGUMENTS\` contains \`data.metadata\`), include:
6017
+ - **Pipeline URL**: \`data.metadata.pipeline_url\`
6018
+ - **Commit**: \`data.metadata.commit_sha\`
6019
+ - **Branch**: \`data.metadata.branch\`
6020
+
6021
+ ### 7. All Tests Passed?
6022
+
6023
+ If there are **no failures**, note that all tests passed. Downstream triage and fix steps can be skipped.`,
6024
+ tags: ["execution", "results", "normalization", "byot"]
6025
+ };
6026
+
7053
6027
  // src/tasks/steps/generation/generate-test-plan.ts
7054
6028
  var generateTestPlanStep = {
7055
6029
  id: "generate-test-plan",
@@ -7234,6 +6208,116 @@ TEST_API_KEY=secret_key_here
7234
6208
  tags: ["generation", "environment"]
7235
6209
  };
7236
6210
 
6211
+ // src/tasks/steps/generation/create-results-parser.ts
6212
+ var createResultsParserStep = {
6213
+ id: "create-results-parser",
6214
+ title: "Create Results Parser Script",
6215
+ category: "generation",
6216
+ content: `## Create Results Parser Script
6217
+
6218
+ Create a reusable script that normalizes test results from the project's test framework into Bugzy's standard \`test-runs/\` manifest format. This script is used at runtime by both external CI events and agent-executed BYOT test runs.
6219
+
6220
+ ### Inspect the Test Project
6221
+
6222
+ 1. Read \`./tests/CLAUDE.md\` to understand:
6223
+ - Which test framework is used (Playwright, Cypress, Jest, Mocha, etc.)
6224
+ - How tests are run and where output goes
6225
+ - The native report format (JSON, JUnit XML, etc.)
6226
+ 2. Check the test runner config file (e.g., \`playwright.config.ts\`, \`cypress.config.ts\`, \`jest.config.ts\`) for report settings
6227
+ 3. If a sample test output exists, read it to understand the exact structure
6228
+
6229
+ ### Create the Parse Script
6230
+
6231
+ Create \`reporters/parse-results.ts\` \u2014 a Node.js/TypeScript CLI script.
6232
+
6233
+ **Interface:**
6234
+ \`\`\`
6235
+ npx tsx reporters/parse-results.ts --input <file-or-url> [--timestamp <existing>] [--test-id <id>]
6236
+ \`\`\`
6237
+
6238
+ **Arguments:**
6239
+ - \`--input\` (required): file path or URL to the test results
6240
+ - If URL (starts with \`http://\` or \`https://\`): download with 30s timeout
6241
+ - If file path: read directly from disk
6242
+ - \`--timestamp\` (optional): existing run timestamp for incremental updates
6243
+ - \`--test-id\` (optional): specific test case ID for incremental updates (used with \`--timestamp\`)
6244
+
6245
+ **Normal mode** (no \`--timestamp\`):
6246
+ 1. Parse the project-specific test output format
6247
+ 2. Generate a timestamp: \`YYYYMMDD-HHmmss\`
6248
+ 3. Create \`test-runs/{timestamp}/manifest.json\` with the standard Bugzy schema:
6249
+ \`\`\`json
6250
+ {
6251
+ "bugzyExecutionId": "<from BUGZY_EXECUTION_ID env var or 'local'>",
6252
+ "timestamp": "<YYYYMMDD-HHmmss>",
6253
+ "startTime": "<ISO8601>",
6254
+ "endTime": "<ISO8601>",
6255
+ "status": "completed",
6256
+ "stats": {
6257
+ "totalTests": 0,
6258
+ "passed": 0,
6259
+ "failed": 0,
6260
+ "totalExecutions": 0
6261
+ },
6262
+ "testCases": [
6263
+ {
6264
+ "id": "<slugified test name, e.g. TC-001-login>",
6265
+ "name": "<original test name>",
6266
+ "totalExecutions": 1,
6267
+ "finalStatus": "passed|failed",
6268
+ "executions": [
6269
+ {
6270
+ "executionNumber": 1,
6271
+ "status": "passed|failed",
6272
+ "error": "<error message if failed, null if passed>",
6273
+ "duration": null,
6274
+ "hasTrace": false,
6275
+ "hasScreenshots": false
6276
+ }
6277
+ ]
6278
+ }
6279
+ ]
6280
+ }
6281
+ \`\`\`
6282
+ 4. For each failed test, create:
6283
+ - Directory: \`test-runs/{timestamp}/{testCaseId}/exec-1/\`
6284
+ - File: \`test-runs/{timestamp}/{testCaseId}/exec-1/result.json\` containing:
6285
+ \`\`\`json
6286
+ {
6287
+ "status": "failed",
6288
+ "error": "<full error message>",
6289
+ "stackTrace": "<stack trace if available>",
6290
+ "duration": null,
6291
+ "testFile": "<file path if available>"
6292
+ }
6293
+ \`\`\`
6294
+ 5. Print the manifest path to stdout
6295
+ 6. Exit code 0 on success, non-zero on failure
6296
+
6297
+ **Incremental mode** (\`--timestamp\` + \`--test-id\` provided):
6298
+ 1. Read existing \`test-runs/{timestamp}/manifest.json\`
6299
+ 2. Parse the new test results for the specified test case
6300
+ 3. Find the next execution number (e.g., if exec-2 exists, create exec-3)
6301
+ 4. Create \`test-runs/{timestamp}/{testCaseId}/exec-N/result.json\`
6302
+ 5. Update the manifest: add execution entry, update \`totalExecutions\`, update \`finalStatus\` and stats
6303
+ 6. Print the manifest path to stdout
6304
+
6305
+ ### Test the Script
6306
+
6307
+ 1. Run the project's tests to generate a sample output (or use an existing one)
6308
+ 2. Run the parse script: \`npx tsx reporters/parse-results.ts --input <sample-output>\`
6309
+ 3. Verify \`test-runs/\` was created with correct manifest.json structure
6310
+ 4. Check that failed test directories have result.json files
6311
+
6312
+ ### Document in CLAUDE.md
6313
+
6314
+ Add to \`./tests/CLAUDE.md\`:
6315
+ - Location: \`reporters/parse-results.ts\`
6316
+ - Usage: \`npx tsx reporters/parse-results.ts --input <file-or-url> [--timestamp <ts>] [--test-id <id>]\`
6317
+ - Where the project's native test output is located (for local runs)`,
6318
+ tags: ["generation", "byot", "results", "parser"]
6319
+ };
6320
+
7237
6321
  // src/tasks/steps/communication/notify-team.ts
7238
6322
  var notifyTeamStep = {
7239
6323
  id: "notify-team",
@@ -7482,11 +6566,13 @@ var STEP_LIBRARY = {
7482
6566
  "create-exploration-test-case": createExplorationTestCaseStep,
7483
6567
  "run-exploration": runExplorationStep,
7484
6568
  "process-exploration-results": processExplorationResultsStep,
6569
+ "normalize-test-results": normalizeTestResultsStep,
7485
6570
  // Generation
7486
6571
  "generate-test-plan": generateTestPlanStep,
7487
6572
  "generate-test-cases": generateTestCasesStep,
7488
6573
  "automate-test-cases": automateTestCasesStep,
7489
6574
  "extract-env-variables": extractEnvVariablesStep,
6575
+ "create-results-parser": createResultsParserStep,
7490
6576
  // Communication
7491
6577
  "notify-team": notifyTeamStep,
7492
6578
  // Maintenance