@bugzy-ai/bugzy 1.15.0 → 1.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bugzy-ai/bugzy",
3
- "version": "1.15.0",
3
+ "version": "1.15.1",
4
4
  "description": "Open-source AI agent configuration for QA automation with Claude Code",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -0,0 +1,178 @@
1
+ # Feedback Message Handler
2
+
3
+ Instructions for processing bug reports, test observations, and user feedback.
4
+
5
+ ## Detection Criteria
6
+
7
+ This handler applies when:
8
+ - User reports an issue, bug, or unexpected behavior
9
+ - User shares test results or observations
10
+ - User provides information (not asking a question or requesting action)
11
+ - Keywords present: "found", "issue", "bug", "doesn't work", "broken", "observed", "noticed", "failed", "error"
12
+ - Intent field from LLM layer is `feedback`
13
+ - Re-routed from clarification handler (thread reply with no blocked task)
14
+
15
+ ## Processing Steps
16
+
17
+ ### Step 1: Parse Feedback
18
+
19
+ Extract the following from the message:
20
+
21
+ | Field | Description | Examples |
22
+ |-------|-------------|----------|
23
+ | **Type** | Category of feedback | `bug_report`, `test_result`, `observation`, `suggestion`, `general` |
24
+ | **Severity** | Impact level | `critical`, `high`, `medium`, `low` |
25
+ | **Component** | Affected area | "login", "checkout", "search", etc. |
26
+ | **Description** | Core issue description | What happened |
27
+ | **Expected** | What should happen (if stated) | Expected behavior |
28
+ | **Steps** | How to reproduce (if provided) | Reproduction steps |
29
+
30
+ **Type Detection**:
31
+ - `bug_report`: "bug", "broken", "doesn't work", "error", "crash"
32
+ - `test_result`: "test passed", "test failed", "ran tests", "testing showed"
33
+ - `observation`: "noticed", "observed", "found that", "saw that"
34
+ - `suggestion`: "should", "could we", "what if", "idea"
35
+ - `general`: Default for unclassified feedback
36
+
37
+ ### Step 2: Check for Duplicates
38
+
39
+ Search the knowledge base for similar entries:
40
+
41
+ 1. Read `.bugzy/runtime/knowledge-base.md`
42
+ 2. Search for:
43
+ - Same component + similar symptoms
44
+ - Matching keywords from the description
45
+ - Recent entries (last 30 days) with similar patterns
46
+ 3. If duplicate found:
47
+ - Reference the existing entry
48
+ - Note any new information provided
49
+ - Update existing entry if new details are valuable
50
+
51
+ ### Step 3: Update Knowledge Base
52
+
53
+ Add or update entry in `.bugzy/runtime/knowledge-base.md`:
54
+
55
+ **For Bug Reports**:
56
+ ```markdown
57
+ ### Bug Report: [Brief Description]
58
+ **Reported**: [ISO date]
59
+ **Source**: Slack - [username if available]
60
+ **Component**: [component]
61
+ **Severity**: [severity]
62
+ **Status**: Under investigation
63
+
64
+ **Description**: [full description]
65
+
66
+ **Expected Behavior**: [if provided]
67
+
68
+ **Steps to Reproduce**: [if provided]
69
+ 1. Step one
70
+ 2. Step two
71
+
72
+ **Related**: [links to related issues/test cases if any]
73
+ ```
74
+
75
+ **For Observations**:
76
+ ```markdown
77
+ ### Observation: [Brief Description]
78
+ **Reported**: [ISO date]
79
+ **Source**: Slack - [username if available]
80
+ **Component**: [component]
81
+ **Context**: [what was being done when observed]
82
+
83
+ **Details**: [full observation]
84
+
85
+ **Impact**: [potential impact on testing]
86
+ ```
87
+
88
+ **For Test Results**:
89
+ ```markdown
90
+ ### Manual Test Result: [Test/Feature Name]
91
+ **Reported**: [ISO date]
92
+ **Source**: Slack - [username if available]
93
+ **Result**: [passed/failed]
94
+ **Component**: [component]
95
+
96
+ **Details**: [what was tested, outcome]
97
+
98
+ **Notes**: [any additional observations]
99
+ ```
100
+
101
+ ### Step 4: Determine Follow-up Actions
102
+
103
+ Based on feedback type, consider additional actions:
104
+
105
+ | Type | Potential Actions |
106
+ |------|-------------------|
107
+ | **bug_report (critical/high)** | Consider creating issue via issue-tracker if configured |
108
+ | **bug_report (medium/low)** | Log in knowledge base, may inform future test cases |
109
+ | **test_result** | Update relevant test case status if identifiable |
110
+ | **observation** | May inform test plan updates |
111
+ | **suggestion** | Log for future consideration |
112
+
113
+ **Issue Tracker Integration** (if configured):
114
+ - For critical/high severity bugs, check if issue-tracker agent is available
115
+ - If so, create or link to an issue in the configured system
116
+ - Reference the issue in the knowledge base entry
117
+
118
+ ### Step 5: Acknowledge and Confirm
119
+
120
+ Respond to the user confirming:
121
+ 1. Feedback was received and understood
122
+ 2. Summary of what was captured
123
+ 3. What actions will be taken
124
+ 4. Any follow-up questions if needed
125
+
126
+ ## Response Guidelines
127
+
128
+ **Structure**:
129
+ ```
130
+ Thanks for reporting this. Here's what I've captured:
131
+
132
+ [Summary of the feedback]
133
+
134
+ I've logged this in the knowledge base under [category].
135
+ [Any follow-up actions being taken]
136
+
137
+ [Optional: Follow-up questions if clarification needed]
138
+ ```
139
+
140
+ **Examples**:
141
+
142
+ For bug report:
143
+ ```
144
+ Thanks for reporting this. I've logged the following:
145
+
146
+ Bug: Checkout fails when cart has more than 10 items
147
+ - Severity: High
148
+ - Component: Checkout
149
+ - Status: Under investigation
150
+
151
+ I've added this to the knowledge base. This may affect our checkout test coverage - I'll review TC-045 through TC-048 for related scenarios.
152
+
153
+ Can you confirm which browser this occurred in?
154
+ ```
155
+
156
+ For observation:
157
+ ```
158
+ Good catch - I've noted this observation:
159
+
160
+ The loading spinner on the dashboard takes longer than expected after the recent update.
161
+
162
+ I've added this to the knowledge base under performance observations. This might be worth adding to our performance test suite.
163
+ ```
164
+
165
+ ## Context Loading Requirements
166
+
167
+ Required:
168
+ - [x] Knowledge base (`.bugzy/runtime/knowledge-base.md`) - for duplicate check and updates
169
+
170
+ Conditional:
171
+ - [ ] Test cases (`./test-cases/`) - if feedback relates to specific test
172
+ - [ ] Test runs (`./test-runs/`) - if feedback relates to recent results
173
+
174
+ ## Memory Updates
175
+
176
+ Required updates:
177
+ - Knowledge base (`.bugzy/runtime/knowledge-base.md`) - add new entry or update existing
178
+ - Optionally team communicator memory if tracking feedback sources
@@ -0,0 +1,122 @@
1
+ # Question Message Handler
2
+
3
+ Instructions for processing questions about the project, tests, coverage, or testing status.
4
+
5
+ ## Detection Criteria
6
+
7
+ This handler applies when:
8
+ - Message contains question words (what, how, which, where, why, when, do, does, is, are, can)
9
+ - Question relates to tests, test plan, coverage, test results, or project artifacts
10
+ - User is seeking information, NOT requesting an action
11
+ - Intent field from LLM layer is `question`
12
+
13
+ ## Processing Steps
14
+
15
+ ### Step 1: Classify Question Type
16
+
17
+ Analyze the question to determine the primary type:
18
+
19
+ | Type | Indicators | Primary Context Sources |
20
+ |------|------------|------------------------|
21
+ | **Coverage** | "what tests", "do we have", "is there a test for", "covered" | test-cases/, test-plan.md |
22
+ | **Results** | "did tests pass", "what failed", "test results", "how many" | test-runs/ |
23
+ | **Knowledge** | "how does", "what is", "explain", feature/component questions | knowledge-base.md |
24
+ | **Plan** | "what's in scope", "test plan", "testing strategy", "priorities" | test-plan.md |
25
+ | **Process** | "how do I", "when should", "what's the workflow" | project-context.md |
26
+
27
+ ### Step 2: Load Relevant Context
28
+
29
+ Based on question type, load the appropriate files:
30
+
31
+ **For Coverage questions**:
32
+ 1. Read `test-plan.md` for overall test strategy
33
+ 2. List files in `./test-cases/` directory
34
+ 3. Search test case files for relevant keywords
35
+
36
+ **For Results questions**:
37
+ 1. List directories in `./test-runs/` (sorted by date, newest first)
38
+ 2. Read `summary.json` from relevant test run directories
39
+ 3. Extract pass/fail counts, failure reasons
40
+
41
+ **For Knowledge questions**:
42
+ 1. Read `.bugzy/runtime/knowledge-base.md`
43
+ 2. Search for relevant entries
44
+ 3. Also check test-plan.md for feature descriptions
45
+
46
+ **For Plan questions**:
47
+ 1. Read `test-plan.md`
48
+ 2. Extract relevant sections (scope, priorities, features)
49
+
50
+ **For Process questions**:
51
+ 1. Read `.bugzy/runtime/project-context.md`
52
+ 2. Check for workflow documentation
53
+
54
+ ### Step 3: Formulate Answer
55
+
56
+ Compose the answer following these guidelines:
57
+
58
+ 1. **Be specific**: Quote relevant sections from source files
59
+ 2. **Cite sources**: Mention which files contain the information
60
+ 3. **Structure clearly**: Use bullet points for multiple items
61
+ 4. **Quantify when possible**: "We have 12 test cases covering login..."
62
+ 5. **Acknowledge gaps**: If information is incomplete, say so
63
+
64
+ ### Step 4: Offer Follow-up
65
+
66
+ End responses with:
67
+ - Offer to provide more detail if needed
68
+ - Suggest related information that might be helpful
69
+ - For coverage gaps, offer to create test cases
70
+
71
+ ## Response Guidelines
72
+
73
+ **Structure**:
74
+ ```
75
+ [Direct answer to the question]
76
+
77
+ [Supporting details/evidence with file references]
78
+
79
+ [Optional: Related information or follow-up offer]
80
+ ```
81
+
82
+ **Examples**:
83
+
84
+ For "Do we have tests for login?":
85
+ ```
86
+ Yes, we have 4 test cases covering the login feature:
87
+ - TC-001: Successful login with valid credentials
88
+ - TC-002: Login failure with invalid password
89
+ - TC-003: Login with remember me option
90
+ - TC-004: Password reset flow
91
+
92
+ These are documented in ./test-cases/TC-001.md through TC-004.md.
93
+ Would you like details on any specific test case?
94
+ ```
95
+
96
+ For "How many tests passed in the last run?":
97
+ ```
98
+ The most recent test run (2024-01-15 14:30) results:
99
+ - Total: 24 tests
100
+ - Passed: 21 (87.5%)
101
+ - Failed: 3
102
+
103
+ Failed tests:
104
+ - TC-012: Checkout timeout (performance issue)
105
+ - TC-015: Image upload failed (file size validation)
106
+ - TC-018: Search pagination broken
107
+
108
+ Results are in ./test-runs/20240115-143000/summary.json
109
+ ```
110
+
111
+ ## Context Loading Requirements
112
+
113
+ Required (based on question type):
114
+ - [ ] Test plan (`test-plan.md`) - for coverage, plan, knowledge questions
115
+ - [ ] Test cases (`./test-cases/`) - for coverage questions
116
+ - [ ] Test runs (`./test-runs/`) - for results questions
117
+ - [ ] Knowledge base (`.bugzy/runtime/knowledge-base.md`) - for knowledge questions
118
+ - [ ] Project context (`.bugzy/runtime/project-context.md`) - for process questions
119
+
120
+ ## Memory Updates
121
+
122
+ None required - questions are read-only operations. No state changes needed.
@@ -0,0 +1,146 @@
1
+ # Status Message Handler
2
+
3
+ Instructions for processing status requests about tests, tasks, or executions.
4
+
5
+ ## Detection Criteria
6
+
7
+ This handler applies when:
8
+ - User asks about progress or status
9
+ - Keywords present: "status", "progress", "how is", "what happened", "results", "how did", "update on"
10
+ - Questions about test runs, task completion, or execution state
11
+ - Intent field from LLM layer is `status`
12
+
13
+ ## Processing Steps
14
+
15
+ ### Step 1: Identify Status Scope
16
+
17
+ Determine what the user is asking about:
18
+
19
+ | Scope | Indicators | Data Sources |
20
+ |-------|------------|--------------|
21
+ | **Latest test run** | "last run", "recent tests", "how did tests go" | Most recent test-runs/ directory |
22
+ | **Specific test** | Test ID mentioned (TC-XXX), specific feature name | test-runs/*/TC-XXX/, test-cases/TC-XXX.md |
23
+ | **All tests / Overall** | "overall", "all tests", "test coverage", "pass rate" | All test-runs/ summaries |
24
+ | **Specific feature** | Feature name mentioned | Filter test-runs by feature |
25
+ | **Task progress** | "is the task done", "what's happening with" | team-communicator memory |
26
+
27
+ ### Step 2: Gather Status Data
28
+
29
+ **For Latest Test Run**:
30
+ 1. List directories in `./test-runs/` sorted by name (newest first)
31
+ 2. Read `summary.json` from the most recent directory
32
+ 3. Extract: total tests, passed, failed, skipped, execution time
33
+ 4. For failures, extract brief failure reasons
34
+
35
+ **For Specific Test**:
36
+ 1. Find test case file in `./test-cases/TC-XXX.md`
37
+ 2. Search test-runs for directories containing this test ID
38
+ 3. Get most recent result for this specific test
39
+ 4. Include: last run date, result, failure reason if failed
40
+
41
+ **For Overall Status**:
42
+ 1. Read all `summary.json` files in test-runs/
43
+ 2. Calculate aggregate statistics:
44
+ - Total runs in period (last 7 days, 30 days, etc.)
45
+ - Overall pass rate
46
+ - Most commonly failing tests
47
+ - Trend (improving/declining)
48
+
49
+ **For Task Progress**:
50
+ 1. Read `.bugzy/runtime/memory/team-communicator.md`
51
+ 2. Check for active tasks, blocked tasks, recently completed tasks
52
+ 3. Extract relevant task status
53
+
54
+ ### Step 3: Format Status Report
55
+
56
+ Present status clearly and concisely:
57
+
58
+ **For Latest Test Run**:
59
+ ```
60
+ Test Run: [YYYYMMDD-HHMMSS]
61
+ Status: [Completed/In Progress]
62
+
63
+ Results:
64
+ - Total: [N] tests
65
+ - Passed: [N] ([%])
66
+ - Failed: [N] ([%])
67
+ - Skipped: [N]
68
+
69
+ [If failures exist:]
70
+ Failed Tests:
71
+ - [TC-XXX]: [Brief failure reason]
72
+ - [TC-YYY]: [Brief failure reason]
73
+
74
+ Duration: [X minutes]
75
+ ```
76
+
77
+ **For Specific Test**:
78
+ ```
79
+ Test: [TC-XXX] - [Test Name]
80
+
81
+ Latest Result: [Passed/Failed]
82
+ Run Date: [Date/Time]
83
+
84
+ [If failed:]
85
+ Failure Reason: [reason]
86
+ Last Successful: [date if known]
87
+
88
+ [If passed:]
89
+ Consecutive Passes: [N] (since [date])
90
+ ```
91
+
92
+ **For Overall Status**:
93
+ ```
94
+ Test Suite Overview (Last [N] Days)
95
+
96
+ Total Test Runs: [N]
97
+ Average Pass Rate: [%]
98
+
99
+ Trend: [Improving/Stable/Declining]
100
+
101
+ Most Reliable Tests:
102
+ - [TC-XXX]: [100%] pass rate
103
+ - [TC-YYY]: [100%] pass rate
104
+
105
+ Flaky/Failing Tests:
106
+ - [TC-ZZZ]: [40%] pass rate - [common failure reason]
107
+ - [TC-AAA]: [60%] pass rate - [common failure reason]
108
+
109
+ Last Run: [date/time] - [X/Y passed]
110
+ ```
111
+
112
+ ### Step 4: Provide Context and Recommendations
113
+
114
+ Based on the status:
115
+
116
+ **For failing tests**:
117
+ - Suggest reviewing the test case
118
+ - Mention if this is a new failure or recurring
119
+ - Link to relevant knowledge base entries if they exist
120
+
121
+ **For overall declining trends**:
122
+ - Highlight which tests are causing the decline
123
+ - Suggest investigation areas
124
+
125
+ **For good results**:
126
+ - Acknowledge the healthy state
127
+ - Mention any tests that were previously failing and are now passing
128
+
129
+ ## Response Guidelines
130
+
131
+ - Lead with the most important information (pass/fail summary)
132
+ - Use clear formatting (bullet points, percentages)
133
+ - Include timestamps so users know data freshness
134
+ - Offer to drill down into specifics if summary was given
135
+ - Keep responses scannable - use structure over paragraphs
136
+
137
+ ## Context Loading Requirements
138
+
139
+ Required (based on scope):
140
+ - [ ] Test runs (`./test-runs/`) - for any test status
141
+ - [ ] Test cases (`./test-cases/`) - for specific test details
142
+ - [ ] Team communicator memory (`.bugzy/runtime/memory/team-communicator.md`) - for task status
143
+
144
+ ## Memory Updates
145
+
146
+ None required - status checks are read-only operations. No state changes needed.
@@ -0,0 +1,195 @@
1
+ # Event Examples Template
2
+
3
+ This template provides examples of different event formats that can be processed by the `/process-event` command. Use these as references when triggering events.
4
+
5
+ ## Natural Language Events
6
+
7
+ ### Test Failures
8
+ ```bash
9
+ /process-event "Login test failed with timeout error on Chrome"
10
+ /process-event "The checkout process is broken - users can't complete payment"
11
+ /process-event "TC-001 failed: Element not found after waiting 10 seconds"
12
+ ```
13
+
14
+ ### Discoveries
15
+ ```bash
16
+ /process-event "Found new admin panel at /admin that's not documented"
17
+ /process-event "Discovered that users can bypass authentication by going directly to /dashboard"
18
+ /process-event "New feature: dark mode toggle in settings menu"
19
+ ```
20
+
21
+ ### User Feedback
22
+ ```bash
23
+ /process-event "Customer complaint: checkout process too complicated, abandoned cart"
24
+ /process-event "Support ticket: users reporting slow page loads on mobile"
25
+ /process-event "User suggestion: add keyboard shortcuts for common actions"
26
+ ```
27
+
28
+ ## Structured Events (Key-Value Pairs)
29
+
30
+ ### Test Event
31
+ ```bash
32
+ /process-event --type test.failed --test-id TC-001 --error "Button not clickable" --browser Chrome
33
+ /process-event --type test.passed --test-id TC-045 --duration 45s --previously-flaky true
34
+ ```
35
+
36
+ ### Bug Report
37
+ ```bash
38
+ /process-event --type bug.found --component auth --severity high --title "Login bypass vulnerability"
39
+ /process-event --type bug.fixed --bug-id BUG-123 --resolution "Updated validation logic"
40
+ ```
41
+
42
+ ### Feature Event
43
+ ```bash
44
+ /process-event --type feature.added --name "Quick Actions" --location "dashboard" --documented false
45
+ /process-event --type requirement.changed --feature "Password Policy" --change "Minimum 12 characters"
46
+ ```
47
+
48
+ ## JSON Format Events
49
+
50
+ ### Complex Test Failure
51
+ ```bash
52
+ /process-event '{
53
+ "type": "test.failed",
54
+ "test_id": "TC-001",
55
+ "title": "Login with valid credentials",
56
+ "error": {
57
+ "message": "Element not found",
58
+ "selector": ".login-button",
59
+ "timeout": 10000
60
+ },
61
+ "environment": {
62
+ "browser": "Chrome 120",
63
+ "os": "macOS",
64
+ "viewport": "1920x1080"
65
+ },
66
+ "timestamp": "2025-01-25T10:30:00Z"
67
+ }'
68
+ ```
69
+
70
+ ### User Feedback with Context
71
+ ```bash
72
+ /process-event '{
73
+ "type": "user.feedback",
74
+ "source": "support",
75
+ "ticket_id": "SUP-456",
76
+ "user_type": "premium",
77
+ "issue": {
78
+ "area": "checkout",
79
+ "description": "Payment method not saving",
80
+ "impact": "Cannot complete purchase",
81
+ "frequency": "Always"
82
+ }
83
+ }'
84
+ ```
85
+
86
+ ### Performance Issue
87
+ ```bash
88
+ /process-event '{
89
+ "type": "performance.issue",
90
+ "page": "/dashboard",
91
+ "metrics": {
92
+ "load_time": 8500,
93
+ "time_to_interactive": 12000,
94
+ "largest_contentful_paint": 6500
95
+ },
96
+ "threshold_exceeded": true
97
+ }'
98
+ ```
99
+
100
+ ## YAML-like Format
101
+
102
+ ### Simple Events
103
+ ```bash
104
+ /process-event "type: test.failed, test: TC-001, browser: Firefox"
105
+ /process-event "type: bug.found, severity: medium, component: search"
106
+ /process-event "type: discovery, feature: API endpoint, path: /api/v2/users"
107
+ ```
108
+
109
+ ## Batch Events
110
+
111
+ ### Multiple Related Issues
112
+ ```bash
113
+ /process-event "Multiple login failures today: TC-001, TC-002, TC-003 all failing with similar timeout errors. Seems to be a systematic issue with the authentication service."
114
+ ```
115
+
116
+ ### Exploratory Testing Results
117
+ ```bash
118
+ /process-event "Exploratory testing session results: Found 3 UI inconsistencies, 1 broken link, new feature in settings, and performance degradation on search page"
119
+ ```
120
+
121
+ ## Event Chains
122
+
123
+ Sometimes events are related and should reference each other:
124
+
125
+ ### Initial Event
126
+ ```bash
127
+ /process-event --type deployment --version 2.1.0 --environment staging
128
+ ```
129
+
130
+ ### Follow-up Event
131
+ ```bash
132
+ /process-event "After deployment 2.1.0: 5 tests failing that were passing before"
133
+ ```
134
+
135
+ ## Special Cases
136
+
137
+ ### Flaky Test Pattern
138
+ ```bash
139
+ /process-event "TC-089 failed 3 times out of 10 runs - appears to be flaky"
140
+ ```
141
+
142
+ ### Environment-Specific
143
+ ```bash
144
+ /process-event "All Safari tests failing but Chrome and Firefox pass"
145
+ ```
146
+
147
+ ### Data-Dependent
148
+ ```bash
149
+ /process-event "Tests pass with test data but fail with production data"
150
+ ```
151
+
152
+ ## Tips for Event Creation
153
+
154
+ 1. **Be Specific**: Include test IDs, error messages, and environment details
155
+ 2. **Add Context**: Mention if issue is new, recurring, or related to recent changes
156
+ 3. **Include Impact**: Describe how the issue affects users or testing
157
+ 4. **Provide Evidence**: Include screenshots paths, logs, or session IDs if available
158
+ 5. **Link Related Items**: Reference bug IDs, test cases, or previous events
159
+
160
+ ## Common Patterns to Trigger
161
+
162
+ ### Trigger Learning Extraction
163
+ ```bash
164
+ /process-event "Discovered that all form validations fail when browser language is not English"
165
+ ```
166
+
167
+ ### Trigger Test Plan Update
168
+ ```bash
169
+ /process-event "New payment provider integrated - Stripe checkout now available"
170
+ ```
171
+
172
+ ### Trigger Test Case Creation
173
+ ```bash
174
+ /process-event "Found undocumented admin features that need test coverage"
175
+ ```
176
+
177
+ ### Trigger Bug Report
178
+ ```bash
179
+ /process-event "Critical: Users lose data when session expires during form submission"
180
+ ```
181
+
182
+ ## Event Metadata
183
+
184
+ Events can include optional metadata:
185
+ - `priority`: high, medium, low
186
+ - `source`: automation, manual, support, monitoring
187
+ - `session_id`: For tracking related events
188
+ - `user`: Who reported or discovered
189
+ - `environment`: staging, production, development
190
+ - `tags`: Categories for filtering
191
+
192
+ Example with metadata:
193
+ ```bash
194
+ /process-event --type issue --priority high --source monitoring --environment production --message "Memory leak detected in checkout service"
195
+ ```
@@ -0,0 +1,28 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(jq:*)",
5
+ "mcp__notion__API-post-database-query",
6
+ "mcp__notion__API-retrieve-a-database",
7
+ "Bash(mkdir:*)",
8
+ "Bash(playwright-cli:*)",
9
+ "Bash(git grep:*)",
10
+ "mcp__slack__slack_list_channels",
11
+ "mcp__slack__slack_post_rich_message",
12
+ "Bash(git init:*)",
13
+ "Bash(git --no-pager status --porcelain)",
14
+ "Bash(git --no-pager diff --stat HEAD)",
15
+ "Bash(git --no-pager log --oneline -5)",
16
+ "Bash(git --no-pager status)",
17
+ "Bash(git --no-pager diff HEAD)"
18
+ ],
19
+ "deny": [
20
+ "Read(.env)"
21
+ ],
22
+ "ask": []
23
+ },
24
+ "enabledMcpjsonServers": [
25
+ "notion",
26
+ "slack"
27
+ ]
28
+ }
@@ -0,0 +1,329 @@
1
+ import { test, expect } from '@playwright/test';
2
+ import { mergeManifests } from '../bugzy-reporter';
3
+
4
+ function makeExecution(overrides: Partial<{
5
+ number: number;
6
+ status: string;
7
+ duration: number;
8
+ videoFile: string | null;
9
+ hasTrace: boolean;
10
+ hasScreenshots: boolean;
11
+ error: string | null;
12
+ }> = {}) {
13
+ return {
14
+ number: 1,
15
+ status: 'passed',
16
+ duration: 1000,
17
+ videoFile: 'video.webm',
18
+ hasTrace: false,
19
+ hasScreenshots: false,
20
+ error: null,
21
+ ...overrides,
22
+ };
23
+ }
24
+
25
+ function makeTestCase(id: string, executions: ReturnType<typeof makeExecution>[], finalStatus?: string) {
26
+ const lastExec = executions[executions.length - 1];
27
+ return {
28
+ id,
29
+ name: id.replace(/^TC-\d+-/, '').replace(/-/g, ' '),
30
+ totalExecutions: executions.length,
31
+ finalStatus: finalStatus ?? lastExec.status,
32
+ executions,
33
+ };
34
+ }
35
+
36
+ function makeManifest(overrides: Partial<{
37
+ bugzyExecutionId: string;
38
+ timestamp: string;
39
+ startTime: string;
40
+ endTime: string;
41
+ status: string;
42
+ stats: { totalTests: number; passed: number; failed: number; totalExecutions: number };
43
+ testCases: ReturnType<typeof makeTestCase>[];
44
+ }> = {}) {
45
+ const testCases = overrides.testCases ?? [];
46
+ const totalExecutions = testCases.reduce((sum, tc) => sum + tc.executions.length, 0);
47
+ const passed = testCases.filter(tc => tc.finalStatus === 'passed').length;
48
+ const failed = testCases.length - passed;
49
+
50
+ return {
51
+ bugzyExecutionId: 'local-20260127-060129',
52
+ timestamp: '20260127-060129',
53
+ startTime: '2026-01-27T06:01:29.000Z',
54
+ endTime: '2026-01-27T06:02:00.000Z',
55
+ status: 'passed',
56
+ stats: {
57
+ totalTests: testCases.length,
58
+ passed,
59
+ failed,
60
+ totalExecutions,
61
+ ...overrides.stats,
62
+ },
63
+ ...overrides,
64
+ testCases,
65
+ };
66
+ }
67
+
68
+ test.describe('mergeManifests', () => {
69
+ test('returns current manifest unchanged when existing is null', () => {
70
+ const current = makeManifest({
71
+ testCases: [makeTestCase('TC-001-login', [makeExecution()])],
72
+ });
73
+
74
+ const result = mergeManifests(null, current);
75
+
76
+ expect(result).toEqual(current);
77
+ });
78
+
79
+ test('merges test cases from both manifests', () => {
80
+ const existing = makeManifest({
81
+ testCases: [
82
+ makeTestCase('TC-001-login', [makeExecution({ number: 1 })]),
83
+ ],
84
+ });
85
+
86
+ const current = makeManifest({
87
+ startTime: '2026-01-27T06:05:00.000Z',
88
+ endTime: '2026-01-27T06:06:00.000Z',
89
+ testCases: [
90
+ makeTestCase('TC-002-checkout', [makeExecution({ number: 1 })]),
91
+ ],
92
+ });
93
+
94
+ const result = mergeManifests(existing, current);
95
+
96
+ expect(result.testCases).toHaveLength(2);
97
+ expect(result.testCases.map(tc => tc.id)).toContain('TC-001-login');
98
+ expect(result.testCases.map(tc => tc.id)).toContain('TC-002-checkout');
99
+ expect(result.stats.totalTests).toBe(2);
100
+ expect(result.stats.totalExecutions).toBe(2);
101
+ });
102
+
103
+ test('merges executions for the same test case across runs', () => {
104
+ const existing = makeManifest({
105
+ testCases: [
106
+ makeTestCase('TC-001-login', [
107
+ makeExecution({ number: 1, status: 'failed', error: 'timeout' }),
108
+ ], 'failed'),
109
+ ],
110
+ });
111
+
112
+ const current = makeManifest({
113
+ startTime: '2026-01-27T06:05:00.000Z',
114
+ endTime: '2026-01-27T06:06:00.000Z',
115
+ testCases: [
116
+ makeTestCase('TC-001-login', [
117
+ makeExecution({ number: 2, status: 'passed' }),
118
+ ]),
119
+ ],
120
+ });
121
+
122
+ const result = mergeManifests(existing, current);
123
+
124
+ expect(result.testCases).toHaveLength(1);
125
+ const tc = result.testCases[0];
126
+ expect(tc.executions).toHaveLength(2);
127
+ expect(tc.executions[0].number).toBe(1);
128
+ expect(tc.executions[0].status).toBe('failed');
129
+ expect(tc.executions[1].number).toBe(2);
130
+ expect(tc.executions[1].status).toBe('passed');
131
+ expect(tc.totalExecutions).toBe(2);
132
+ expect(tc.finalStatus).toBe('passed'); // Latest execution status
133
+ });
134
+
135
+ test('current run wins on execution number collision', () => {
136
+ const existing = makeManifest({
137
+ testCases: [
138
+ makeTestCase('TC-001-login', [
139
+ makeExecution({ number: 3, status: 'failed', duration: 500 }),
140
+ ], 'failed'),
141
+ ],
142
+ });
143
+
144
+ const current = makeManifest({
145
+ startTime: '2026-01-27T06:05:00.000Z',
146
+ endTime: '2026-01-27T06:06:00.000Z',
147
+ testCases: [
148
+ makeTestCase('TC-001-login', [
149
+ makeExecution({ number: 3, status: 'passed', duration: 1200 }),
150
+ ]),
151
+ ],
152
+ });
153
+
154
+ const result = mergeManifests(existing, current);
155
+
156
+ const tc = result.testCases[0];
157
+ expect(tc.executions).toHaveLength(1);
158
+ expect(tc.executions[0].status).toBe('passed');
159
+ expect(tc.executions[0].duration).toBe(1200);
160
+ });
161
+
162
+ test('preserves test cases that only exist in existing manifest', () => {
163
+ const existing = makeManifest({
164
+ testCases: [
165
+ makeTestCase('TC-001-login', [makeExecution({ number: 1 })]),
166
+ makeTestCase('TC-002-checkout', [makeExecution({ number: 1 })]),
167
+ ],
168
+ });
169
+
170
+ const current = makeManifest({
171
+ startTime: '2026-01-27T06:05:00.000Z',
172
+ endTime: '2026-01-27T06:06:00.000Z',
173
+ testCases: [
174
+ makeTestCase('TC-001-login', [makeExecution({ number: 2 })]),
175
+ ],
176
+ });
177
+
178
+ const result = mergeManifests(existing, current);
179
+
180
+ expect(result.testCases).toHaveLength(2);
181
+ const checkout = result.testCases.find(tc => tc.id === 'TC-002-checkout');
182
+ expect(checkout).toBeDefined();
183
+ expect(checkout!.executions).toHaveLength(1);
184
+ expect(checkout!.executions[0].number).toBe(1);
185
+ });
186
+
187
+ test('recalculates stats correctly from merged data', () => {
188
+ const existing = makeManifest({
189
+ testCases: [
190
+ makeTestCase('TC-001-login', [
191
+ makeExecution({ number: 1, status: 'failed' }),
192
+ ], 'failed'),
193
+ makeTestCase('TC-002-checkout', [
194
+ makeExecution({ number: 1, status: 'passed' }),
195
+ ]),
196
+ ],
197
+ });
198
+
199
+ const current = makeManifest({
200
+ startTime: '2026-01-27T06:05:00.000Z',
201
+ endTime: '2026-01-27T06:06:00.000Z',
202
+ testCases: [
203
+ makeTestCase('TC-001-login', [
204
+ makeExecution({ number: 2, status: 'passed' }),
205
+ ]),
206
+ makeTestCase('TC-003-profile', [
207
+ makeExecution({ number: 1, status: 'failed' }),
208
+ ], 'failed'),
209
+ ],
210
+ });
211
+
212
+ const result = mergeManifests(existing, current);
213
+
214
+ expect(result.stats.totalTests).toBe(3);
215
+ // TC-001: exec-1 (failed) + exec-2 (passed) = 2 execs, finalStatus=passed
216
+ // TC-002: exec-1 (passed) = 1 exec, finalStatus=passed
217
+ // TC-003: exec-1 (failed) = 1 exec, finalStatus=failed
218
+ expect(result.stats.totalExecutions).toBe(4);
219
+ expect(result.stats.passed).toBe(2); // TC-001 and TC-002
220
+ expect(result.stats.failed).toBe(1); // TC-003
221
+ });
222
+
223
+ test('uses earliest startTime and latest endTime', () => {
224
+ const existing = makeManifest({
225
+ startTime: '2026-01-27T06:01:00.000Z',
226
+ endTime: '2026-01-27T06:02:00.000Z',
227
+ testCases: [makeTestCase('TC-001-login', [makeExecution()])],
228
+ });
229
+
230
+ const current = makeManifest({
231
+ startTime: '2026-01-27T06:05:00.000Z',
232
+ endTime: '2026-01-27T06:06:00.000Z',
233
+ testCases: [makeTestCase('TC-001-login', [makeExecution({ number: 2 })])],
234
+ });
235
+
236
+ const result = mergeManifests(existing, current);
237
+
238
+ expect(result.startTime).toBe('2026-01-27T06:01:00.000Z');
239
+ expect(result.endTime).toBe('2026-01-27T06:06:00.000Z');
240
+ });
241
+
242
+ test('sets status to failed if any test case has failed finalStatus', () => {
243
+ const existing = makeManifest({
244
+ status: 'passed',
245
+ testCases: [
246
+ makeTestCase('TC-001-login', [makeExecution({ number: 1, status: 'passed' })]),
247
+ ],
248
+ });
249
+
250
+ const current = makeManifest({
251
+ status: 'passed',
252
+ startTime: '2026-01-27T06:05:00.000Z',
253
+ endTime: '2026-01-27T06:06:00.000Z',
254
+ testCases: [
255
+ makeTestCase('TC-002-checkout', [
256
+ makeExecution({ number: 1, status: 'failed' }),
257
+ ], 'failed'),
258
+ ],
259
+ });
260
+
261
+ const result = mergeManifests(existing, current);
262
+
263
+ expect(result.status).toBe('failed');
264
+ });
265
+
266
+ test('preserves original session timestamp from existing manifest', () => {
267
+ const existing = makeManifest({
268
+ timestamp: '20260127-060129',
269
+ testCases: [makeTestCase('TC-001-login', [makeExecution()])],
270
+ });
271
+
272
+ const current = makeManifest({
273
+ timestamp: '20260127-060500',
274
+ startTime: '2026-01-27T06:05:00.000Z',
275
+ endTime: '2026-01-27T06:06:00.000Z',
276
+ testCases: [makeTestCase('TC-001-login', [makeExecution({ number: 2 })])],
277
+ });
278
+
279
+ const result = mergeManifests(existing, current);
280
+
281
+ expect(result.timestamp).toBe('20260127-060129');
282
+ });
283
+
284
+ test('handles timedOut status as failure in merged status', () => {
285
+ const existing = makeManifest({
286
+ status: 'passed',
287
+ testCases: [
288
+ makeTestCase('TC-001-login', [
289
+ makeExecution({ number: 1, status: 'timedOut' }),
290
+ ], 'timedOut'),
291
+ ],
292
+ });
293
+
294
+ const current = makeManifest({
295
+ status: 'passed',
296
+ startTime: '2026-01-27T06:05:00.000Z',
297
+ endTime: '2026-01-27T06:06:00.000Z',
298
+ testCases: [
299
+ makeTestCase('TC-002-checkout', [makeExecution({ number: 1 })]),
300
+ ],
301
+ });
302
+
303
+ const result = mergeManifests(existing, current);
304
+
305
+ expect(result.status).toBe('failed');
306
+ });
307
+
308
+ test('does not mutate input manifests', () => {
309
+ const existingExec = makeExecution({ number: 1, status: 'failed' });
310
+ const existing = makeManifest({
311
+ testCases: [makeTestCase('TC-001-login', [existingExec], 'failed')],
312
+ });
313
+ const existingSnapshot = JSON.parse(JSON.stringify(existing));
314
+
315
+ const current = makeManifest({
316
+ startTime: '2026-01-27T06:05:00.000Z',
317
+ endTime: '2026-01-27T06:06:00.000Z',
318
+ testCases: [
319
+ makeTestCase('TC-001-login', [makeExecution({ number: 2, status: 'passed' })]),
320
+ ],
321
+ });
322
+ const currentSnapshot = JSON.parse(JSON.stringify(current));
323
+
324
+ mergeManifests(existing, current);
325
+
326
+ expect(existing).toEqual(existingSnapshot);
327
+ expect(current).toEqual(currentSnapshot);
328
+ });
329
+ });
@@ -0,0 +1,5 @@
1
+ import { defineConfig } from '@playwright/test';
2
+
3
+ export default defineConfig({
4
+ testDir: '.',
5
+ });
@@ -24,6 +24,142 @@ interface StepData {
24
24
  duration?: number;
25
25
  }
26
26
 
27
+ /**
28
+ * Manifest execution entry
29
+ */
30
+ interface ManifestExecution {
31
+ number: number;
32
+ status: string;
33
+ duration: number;
34
+ videoFile: string | null;
35
+ hasTrace: boolean;
36
+ hasScreenshots: boolean;
37
+ error: string | null;
38
+ }
39
+
40
+ /**
41
+ * Manifest test case entry
42
+ */
43
+ interface ManifestTestCase {
44
+ id: string;
45
+ name: string;
46
+ totalExecutions: number;
47
+ finalStatus: string;
48
+ executions: ManifestExecution[];
49
+ }
50
+
51
+ /**
52
+ * Manifest structure for test run sessions
53
+ */
54
+ interface Manifest {
55
+ bugzyExecutionId: string;
56
+ timestamp: string;
57
+ startTime: string;
58
+ endTime: string;
59
+ status: string;
60
+ stats: {
61
+ totalTests: number;
62
+ passed: number;
63
+ failed: number;
64
+ totalExecutions: number;
65
+ };
66
+ testCases: ManifestTestCase[];
67
+ }
68
+
69
+ /**
70
+ * Merge an existing manifest with the current run's manifest.
71
+ * If existing is null, returns current as-is.
72
+ * Deduplicates executions by number (current run wins on collision).
73
+ * Recalculates stats from the merged data.
74
+ */
75
+ export function mergeManifests(existing: Manifest | null, current: Manifest): Manifest {
76
+ if (!existing) {
77
+ return current;
78
+ }
79
+
80
+ // Build map of test cases by id from existing manifest
81
+ const testCaseMap = new Map<string, ManifestTestCase>();
82
+ for (const tc of existing.testCases) {
83
+ testCaseMap.set(tc.id, { ...tc, executions: [...tc.executions] });
84
+ }
85
+
86
+ // Merge current run's test cases
87
+ for (const tc of current.testCases) {
88
+ const existingTc = testCaseMap.get(tc.id);
89
+ if (existingTc) {
90
+ // Merge executions: build a map keyed by execution number
91
+ const execMap = new Map<number, ManifestExecution>();
92
+ for (const exec of existingTc.executions) {
93
+ execMap.set(exec.number, exec);
94
+ }
95
+ // Current run's executions overwrite on collision
96
+ for (const exec of tc.executions) {
97
+ execMap.set(exec.number, exec);
98
+ }
99
+ // Sort by execution number
100
+ const mergedExecs = Array.from(execMap.values()).sort((a, b) => a.number - b.number);
101
+ const finalStatus = mergedExecs[mergedExecs.length - 1].status;
102
+
103
+ testCaseMap.set(tc.id, {
104
+ id: tc.id,
105
+ name: tc.name,
106
+ totalExecutions: mergedExecs.length,
107
+ finalStatus,
108
+ executions: mergedExecs,
109
+ });
110
+ } else {
111
+ // New test case from current run
112
+ testCaseMap.set(tc.id, { ...tc, executions: [...tc.executions] });
113
+ }
114
+ }
115
+
116
+ // Build merged test cases array
117
+ const mergedTestCases = Array.from(testCaseMap.values());
118
+
119
+ // Recalculate stats
120
+ let totalTests = 0;
121
+ let totalExecutions = 0;
122
+ let passedTests = 0;
123
+ let failedTests = 0;
124
+
125
+ for (const tc of mergedTestCases) {
126
+ totalTests++;
127
+ totalExecutions += tc.executions.length;
128
+ if (tc.finalStatus === 'passed') {
129
+ passedTests++;
130
+ } else {
131
+ failedTests++;
132
+ }
133
+ }
134
+
135
+ // Use earliest startTime, latest endTime
136
+ const startTime = new Date(existing.startTime) < new Date(current.startTime)
137
+ ? existing.startTime
138
+ : current.startTime;
139
+ const endTime = new Date(existing.endTime) > new Date(current.endTime)
140
+ ? existing.endTime
141
+ : current.endTime;
142
+
143
+ // Status: if any test case failed, overall is failed
144
+ const hasFailure = mergedTestCases.some(tc => tc.finalStatus === 'failed' || tc.finalStatus === 'timedOut');
145
+ const status = hasFailure ? 'failed' : current.status;
146
+
147
+ return {
148
+ bugzyExecutionId: current.bugzyExecutionId,
149
+ timestamp: existing.timestamp, // Keep original session timestamp
150
+ startTime,
151
+ endTime,
152
+ status,
153
+ stats: {
154
+ totalTests,
155
+ passed: passedTests,
156
+ failed: failedTests,
157
+ totalExecutions,
158
+ },
159
+ testCases: mergedTestCases,
160
+ };
161
+ }
162
+
27
163
  /**
28
164
  * Bugzy Custom Playwright Reporter
29
165
  *
@@ -393,8 +529,8 @@ class BugzyReporter implements Reporter {
393
529
  });
394
530
  }
395
531
 
396
- // Generate manifest.json
397
- const manifest = {
532
+ // Build current run's manifest
533
+ const currentManifest: Manifest = {
398
534
  bugzyExecutionId: this.bugzyExecutionId,
399
535
  timestamp: this.timestamp,
400
536
  startTime: this.startTime.toISOString(),
@@ -409,14 +545,37 @@ class BugzyReporter implements Reporter {
409
545
  testCases,
410
546
  };
411
547
 
548
+ // Read existing manifest for merge (if session is being reused)
412
549
  const manifestPath = path.join(this.testRunDir, 'manifest.json');
413
- fs.writeFileSync(manifestPath, JSON.stringify(manifest, null, 2));
550
+ let existingManifest: Manifest | null = null;
551
+ if (fs.existsSync(manifestPath)) {
552
+ try {
553
+ existingManifest = JSON.parse(fs.readFileSync(manifestPath, 'utf-8'));
554
+ } catch (err) {
555
+ console.warn(`⚠️ Could not parse existing manifest, will overwrite: ${err}`);
556
+ }
557
+ }
414
558
 
415
- console.log(`\n📊 Test Run Summary:`);
559
+ // Merge with existing manifest data
560
+ const merged = mergeManifests(existingManifest, currentManifest);
561
+
562
+ // Write atomically (temp file + rename)
563
+ const tmpPath = manifestPath + '.tmp';
564
+ fs.writeFileSync(tmpPath, JSON.stringify(merged, null, 2));
565
+ fs.renameSync(tmpPath, manifestPath);
566
+
567
+ console.log(`\n📊 Test Run Summary (this run):`);
416
568
  console.log(` Total tests: ${totalTests}`);
417
569
  console.log(` Passed: ${passedTests}`);
418
570
  console.log(` Failed: ${failedTests}`);
419
571
  console.log(` Total executions: ${totalExecutions}`);
572
+
573
+ if (existingManifest) {
574
+ console.log(`\n🔗 Merged with previous session data:`);
575
+ console.log(` Session total tests: ${merged.stats.totalTests}`);
576
+ console.log(` Session total executions: ${merged.stats.totalExecutions}`);
577
+ }
578
+
420
579
  console.log(` Manifest: ${manifestPath}\n`);
421
580
  }
422
581