@bugzy-ai/bugzy 1.16.0 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,299 @@
1
+ import { test, expect } from '@playwright/test';
2
+ import { classifyFailures } from '../bugzy-reporter';
3
+ import * as fs from 'fs';
4
+ import * as path from 'path';
5
+ import * as os from 'os';
6
+
7
+ function makeManifest(overrides: Partial<{
8
+ timestamp: string;
9
+ testCases: Array<{
10
+ id: string;
11
+ name: string;
12
+ totalExecutions: number;
13
+ finalStatus: string;
14
+ executions: Array<{
15
+ number: number;
16
+ status: string;
17
+ duration: number;
18
+ videoFile: string | null;
19
+ hasTrace: boolean;
20
+ hasScreenshots: boolean;
21
+ error: string | null;
22
+ }>;
23
+ }>;
24
+ }> = {}) {
25
+ const testCases = overrides.testCases ?? [];
26
+ const totalExecutions = testCases.reduce((sum, tc) => sum + tc.executions.length, 0);
27
+ const passed = testCases.filter(tc => tc.finalStatus === 'passed').length;
28
+ const failed = testCases.length - passed;
29
+
30
+ return {
31
+ bugzyExecutionId: 'local-test',
32
+ timestamp: overrides.timestamp ?? '20260216-120000',
33
+ startTime: '2026-02-16T12:00:00.000Z',
34
+ endTime: '2026-02-16T12:01:00.000Z',
35
+ status: failed > 0 ? 'failed' : 'passed',
36
+ stats: { totalTests: testCases.length, passed, failed, totalExecutions },
37
+ testCases,
38
+ };
39
+ }
40
+
41
+ function makeTestCase(id: string, finalStatus: string, error?: string) {
42
+ return {
43
+ id,
44
+ name: id.replace(/^TC-\d+-/, '').replace(/-/g, ' '),
45
+ totalExecutions: 1,
46
+ finalStatus,
47
+ executions: [{
48
+ number: 1,
49
+ status: finalStatus,
50
+ duration: 1000,
51
+ videoFile: null,
52
+ hasTrace: false,
53
+ hasScreenshots: false,
54
+ error: error ?? null,
55
+ }],
56
+ };
57
+ }
58
+
59
+ function setupTestRunsDir(manifests: Array<{ timestamp: string; manifest: any }>) {
60
+ const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'bugzy-test-'));
61
+ const testRunsRoot = path.join(tmpDir, 'test-runs');
62
+ fs.mkdirSync(testRunsRoot, { recursive: true });
63
+
64
+ for (const { timestamp, manifest } of manifests) {
65
+ const runDir = path.join(testRunsRoot, timestamp);
66
+ fs.mkdirSync(runDir, { recursive: true });
67
+ fs.writeFileSync(
68
+ path.join(runDir, 'manifest.json'),
69
+ JSON.stringify(manifest, null, 2)
70
+ );
71
+ }
72
+
73
+ return testRunsRoot;
74
+ }
75
+
76
+ test.describe('classifyFailures', () => {
77
+ test('returns empty arrays when no failures', () => {
78
+ const manifest = makeManifest({
79
+ testCases: [makeTestCase('TC-001-login', 'passed')],
80
+ });
81
+
82
+ const result = classifyFailures(manifest, '/nonexistent');
83
+
84
+ expect(result.newFailures).toHaveLength(0);
85
+ expect(result.knownFailures).toHaveLength(0);
86
+ });
87
+
88
+ test('all failures are new when no previous runs exist', () => {
89
+ const manifest = makeManifest({
90
+ timestamp: '20260216-120000',
91
+ testCases: [
92
+ makeTestCase('TC-001-login', 'failed', 'timeout'),
93
+ makeTestCase('TC-002-checkout', 'failed', 'element not found'),
94
+ ],
95
+ });
96
+
97
+ const testRunsRoot = setupTestRunsDir([]);
98
+
99
+ const result = classifyFailures(manifest, testRunsRoot);
100
+
101
+ expect(result.newFailures).toHaveLength(2);
102
+ expect(result.knownFailures).toHaveLength(0);
103
+ expect(result.newFailures[0].id).toBe('TC-001-login');
104
+ expect(result.newFailures[0].error).toBe('timeout');
105
+ expect(result.newFailures[1].id).toBe('TC-002-checkout');
106
+ });
107
+
108
+ test('failure is new when test passed in recent run', () => {
109
+ const previousManifest = makeManifest({
110
+ timestamp: '20260215-120000',
111
+ testCases: [
112
+ makeTestCase('TC-001-login', 'passed'),
113
+ makeTestCase('TC-002-checkout', 'passed'),
114
+ ],
115
+ });
116
+
117
+ const testRunsRoot = setupTestRunsDir([
118
+ { timestamp: '20260215-120000', manifest: previousManifest },
119
+ ]);
120
+
121
+ const currentManifest = makeManifest({
122
+ timestamp: '20260216-120000',
123
+ testCases: [
124
+ makeTestCase('TC-001-login', 'failed', 'timeout'),
125
+ ],
126
+ });
127
+
128
+ const result = classifyFailures(currentManifest, testRunsRoot);
129
+
130
+ expect(result.newFailures).toHaveLength(1);
131
+ expect(result.knownFailures).toHaveLength(0);
132
+ expect(result.newFailures[0].id).toBe('TC-001-login');
133
+ expect(result.newFailures[0].lastPassedRun).toBe('20260215-120000');
134
+ });
135
+
136
+ test('failure is known when test failed in all previous runs', () => {
137
+ const prev1 = makeManifest({
138
+ timestamp: '20260215-120000',
139
+ testCases: [makeTestCase('TC-001-login', 'failed', 'timeout')],
140
+ });
141
+ const prev2 = makeManifest({
142
+ timestamp: '20260214-120000',
143
+ testCases: [makeTestCase('TC-001-login', 'failed', 'timeout')],
144
+ });
145
+
146
+ const testRunsRoot = setupTestRunsDir([
147
+ { timestamp: '20260215-120000', manifest: prev1 },
148
+ { timestamp: '20260214-120000', manifest: prev2 },
149
+ ]);
150
+
151
+ const currentManifest = makeManifest({
152
+ timestamp: '20260216-120000',
153
+ testCases: [makeTestCase('TC-001-login', 'failed', 'timeout')],
154
+ });
155
+
156
+ const result = classifyFailures(currentManifest, testRunsRoot);
157
+
158
+ expect(result.newFailures).toHaveLength(0);
159
+ expect(result.knownFailures).toHaveLength(1);
160
+ expect(result.knownFailures[0].id).toBe('TC-001-login');
161
+ });
162
+
163
+ test('mixed new and known failures', () => {
164
+ const previousManifest = makeManifest({
165
+ timestamp: '20260215-120000',
166
+ testCases: [
167
+ makeTestCase('TC-001-login', 'passed'),
168
+ makeTestCase('TC-002-checkout', 'failed', 'always broken'),
169
+ ],
170
+ });
171
+
172
+ const testRunsRoot = setupTestRunsDir([
173
+ { timestamp: '20260215-120000', manifest: previousManifest },
174
+ ]);
175
+
176
+ const currentManifest = makeManifest({
177
+ timestamp: '20260216-120000',
178
+ testCases: [
179
+ makeTestCase('TC-001-login', 'failed', 'new regression'),
180
+ makeTestCase('TC-002-checkout', 'failed', 'still broken'),
181
+ ],
182
+ });
183
+
184
+ const result = classifyFailures(currentManifest, testRunsRoot);
185
+
186
+ expect(result.newFailures).toHaveLength(1);
187
+ expect(result.newFailures[0].id).toBe('TC-001-login');
188
+ expect(result.newFailures[0].lastPassedRun).toBe('20260215-120000');
189
+
190
+ expect(result.knownFailures).toHaveLength(1);
191
+ expect(result.knownFailures[0].id).toBe('TC-002-checkout');
192
+ });
193
+
194
+ test('new test not in history is treated as new failure', () => {
195
+ const previousManifest = makeManifest({
196
+ timestamp: '20260215-120000',
197
+ testCases: [makeTestCase('TC-001-login', 'passed')],
198
+ });
199
+
200
+ const testRunsRoot = setupTestRunsDir([
201
+ { timestamp: '20260215-120000', manifest: previousManifest },
202
+ ]);
203
+
204
+ const currentManifest = makeManifest({
205
+ timestamp: '20260216-120000',
206
+ testCases: [
207
+ makeTestCase('TC-003-new-feature', 'failed', 'new test fails'),
208
+ ],
209
+ });
210
+
211
+ const result = classifyFailures(currentManifest, testRunsRoot);
212
+
213
+ expect(result.newFailures).toHaveLength(1);
214
+ expect(result.newFailures[0].id).toBe('TC-003-new-feature');
215
+ expect(result.newFailures[0].lastPassedRun).toBeNull();
216
+ });
217
+
218
+ test('respects BUGZY_FAILURE_LOOKBACK env var', () => {
219
+ // Set lookback to 1
220
+ const origEnv = process.env.BUGZY_FAILURE_LOOKBACK;
221
+ process.env.BUGZY_FAILURE_LOOKBACK = '1';
222
+
223
+ try {
224
+ // Run 1: test passed
225
+ // Run 2: test failed
226
+ // Run 3 (current): test failed
227
+ // With lookback=1, only run 2 is checked (most recent)
228
+ const run1 = makeManifest({
229
+ timestamp: '20260213-120000',
230
+ testCases: [makeTestCase('TC-001-login', 'passed')],
231
+ });
232
+ const run2 = makeManifest({
233
+ timestamp: '20260214-120000',
234
+ testCases: [makeTestCase('TC-001-login', 'failed', 'broken')],
235
+ });
236
+
237
+ const testRunsRoot = setupTestRunsDir([
238
+ { timestamp: '20260213-120000', manifest: run1 },
239
+ { timestamp: '20260214-120000', manifest: run2 },
240
+ ]);
241
+
242
+ const currentManifest = makeManifest({
243
+ timestamp: '20260215-120000',
244
+ testCases: [makeTestCase('TC-001-login', 'failed', 'still broken')],
245
+ });
246
+
247
+ const result = classifyFailures(currentManifest, testRunsRoot);
248
+
249
+ // With lookback=1, only sees run2 where test failed → known failure
250
+ expect(result.knownFailures).toHaveLength(1);
251
+ expect(result.newFailures).toHaveLength(0);
252
+ } finally {
253
+ if (origEnv !== undefined) {
254
+ process.env.BUGZY_FAILURE_LOOKBACK = origEnv;
255
+ } else {
256
+ delete process.env.BUGZY_FAILURE_LOOKBACK;
257
+ }
258
+ }
259
+ });
260
+
261
+ test('handles timedOut status as failure', () => {
262
+ const previousManifest = makeManifest({
263
+ timestamp: '20260215-120000',
264
+ testCases: [makeTestCase('TC-001-login', 'passed')],
265
+ });
266
+
267
+ const testRunsRoot = setupTestRunsDir([
268
+ { timestamp: '20260215-120000', manifest: previousManifest },
269
+ ]);
270
+
271
+ const currentManifest = makeManifest({
272
+ timestamp: '20260216-120000',
273
+ testCases: [makeTestCase('TC-001-login', 'timedOut', 'Test timeout')],
274
+ });
275
+
276
+ const result = classifyFailures(currentManifest, testRunsRoot);
277
+
278
+ expect(result.newFailures).toHaveLength(1);
279
+ expect(result.newFailures[0].id).toBe('TC-001-login');
280
+ });
281
+
282
+ test('skips current run timestamp when reading previous manifests', () => {
283
+ // Only the current run exists - should be treated as first run
284
+ const currentManifest = makeManifest({
285
+ timestamp: '20260216-120000',
286
+ testCases: [makeTestCase('TC-001-login', 'failed', 'error')],
287
+ });
288
+
289
+ const testRunsRoot = setupTestRunsDir([
290
+ { timestamp: '20260216-120000', manifest: currentManifest },
291
+ ]);
292
+
293
+ const result = classifyFailures(currentManifest, testRunsRoot);
294
+
295
+ // First run - all failures are new
296
+ expect(result.newFailures).toHaveLength(1);
297
+ expect(result.knownFailures).toHaveLength(0);
298
+ });
299
+ });
@@ -48,6 +48,16 @@ interface ManifestTestCase {
48
48
  executions: ManifestExecution[];
49
49
  }
50
50
 
51
+ /**
52
+ * Failure classification entry for new vs known failures
53
+ */
54
+ interface FailureClassification {
55
+ id: string;
56
+ name: string;
57
+ error: string | null;
58
+ lastPassedRun: string | null;
59
+ }
60
+
51
61
  /**
52
62
  * Manifest structure for test run sessions
53
63
  */
@@ -64,6 +74,132 @@ interface Manifest {
64
74
  totalExecutions: number;
65
75
  };
66
76
  testCases: ManifestTestCase[];
77
+ new_failures?: FailureClassification[];
78
+ known_failures?: FailureClassification[];
79
+ }
80
+
81
+ /**
82
+ * Classify failures as new or known by checking previous test run manifests.
83
+ *
84
+ * A failure is "new" if the test passed in any of the last N runs.
85
+ * A failure is "known" if the test failed in ALL of the last N runs (or no prior data exists for that specific test).
86
+ * If there are no previous runs at all (first run), all failures are treated as "new".
87
+ *
88
+ * @param currentManifest - The current run's manifest
89
+ * @param testRunsRoot - Path to the test-runs/ directory
90
+ * @returns Object with newFailures and knownFailures arrays
91
+ */
92
+ export function classifyFailures(
93
+ currentManifest: Manifest,
94
+ testRunsRoot: string
95
+ ): { newFailures: FailureClassification[]; knownFailures: FailureClassification[] } {
96
+ const lookback = parseInt(process.env.BUGZY_FAILURE_LOOKBACK || '5', 10);
97
+ const newFailures: FailureClassification[] = [];
98
+ const knownFailures: FailureClassification[] = [];
99
+
100
+ // Get failed test cases from current manifest
101
+ const failedTests = currentManifest.testCases.filter(
102
+ tc => tc.finalStatus === 'failed' || tc.finalStatus === 'timedOut'
103
+ );
104
+
105
+ if (failedTests.length === 0) {
106
+ return { newFailures, knownFailures };
107
+ }
108
+
109
+ // Read previous manifests
110
+ const previousManifests: Manifest[] = [];
111
+ if (fs.existsSync(testRunsRoot)) {
112
+ const dirs = fs.readdirSync(testRunsRoot)
113
+ .filter(d => {
114
+ try {
115
+ return fs.statSync(path.join(testRunsRoot, d)).isDirectory();
116
+ } catch {
117
+ return false;
118
+ }
119
+ })
120
+ .sort()
121
+ .reverse(); // Latest first
122
+
123
+ for (const dir of dirs) {
124
+ // Skip current run
125
+ if (dir === currentManifest.timestamp) continue;
126
+
127
+ if (previousManifests.length >= lookback) break;
128
+
129
+ const manifestPath = path.join(testRunsRoot, dir, 'manifest.json');
130
+ if (fs.existsSync(manifestPath)) {
131
+ try {
132
+ const manifest: Manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf-8'));
133
+ previousManifests.push(manifest);
134
+ } catch {
135
+ // Skip invalid manifests
136
+ }
137
+ }
138
+ }
139
+ }
140
+
141
+ // If no previous runs exist, all failures are new (first run)
142
+ if (previousManifests.length === 0) {
143
+ for (const tc of failedTests) {
144
+ const lastExec = tc.executions[tc.executions.length - 1];
145
+ newFailures.push({
146
+ id: tc.id,
147
+ name: tc.name,
148
+ error: lastExec?.error || null,
149
+ lastPassedRun: null,
150
+ });
151
+ }
152
+ return { newFailures, knownFailures };
153
+ }
154
+
155
+ // For each failed test, check if it passed in any previous run
156
+ for (const tc of failedTests) {
157
+ const lastExec = tc.executions[tc.executions.length - 1];
158
+ let lastPassedRun: string | null = null;
159
+
160
+ for (const prevManifest of previousManifests) {
161
+ const prevTc = prevManifest.testCases.find(ptc => ptc.id === tc.id);
162
+ if (prevTc && (prevTc.finalStatus === 'passed')) {
163
+ lastPassedRun = prevManifest.timestamp;
164
+ break;
165
+ }
166
+ }
167
+
168
+ if (lastPassedRun) {
169
+ // Test passed recently, so this is a new failure
170
+ newFailures.push({
171
+ id: tc.id,
172
+ name: tc.name,
173
+ error: lastExec?.error || null,
174
+ lastPassedRun,
175
+ });
176
+ } else {
177
+ // Check if test exists in any previous run at all
178
+ const existsInPrevious = previousManifests.some(
179
+ pm => pm.testCases.some(ptc => ptc.id === tc.id)
180
+ );
181
+
182
+ if (!existsInPrevious) {
183
+ // New test that doesn't exist in history - treat as new failure
184
+ newFailures.push({
185
+ id: tc.id,
186
+ name: tc.name,
187
+ error: lastExec?.error || null,
188
+ lastPassedRun: null,
189
+ });
190
+ } else {
191
+ // Failed in all previous runs - known failure
192
+ knownFailures.push({
193
+ id: tc.id,
194
+ name: tc.name,
195
+ error: lastExec?.error || null,
196
+ lastPassedRun: null,
197
+ });
198
+ }
199
+ }
200
+ }
201
+
202
+ return { newFailures, knownFailures };
67
203
  }
68
204
 
69
205
  /**
@@ -144,7 +280,7 @@ export function mergeManifests(existing: Manifest | null, current: Manifest): Ma
144
280
  const hasFailure = mergedTestCases.some(tc => tc.finalStatus === 'failed' || tc.finalStatus === 'timedOut');
145
281
  const status = hasFailure ? 'failed' : current.status;
146
282
 
147
- return {
283
+ const merged: Manifest = {
148
284
  bugzyExecutionId: current.bugzyExecutionId,
149
285
  timestamp: existing.timestamp, // Keep original session timestamp
150
286
  startTime,
@@ -158,6 +294,21 @@ export function mergeManifests(existing: Manifest | null, current: Manifest): Ma
158
294
  },
159
295
  testCases: mergedTestCases,
160
296
  };
297
+
298
+ // Preserve failure classification (current run's classification wins)
299
+ if (current.new_failures) {
300
+ merged.new_failures = current.new_failures;
301
+ } else if (existing.new_failures) {
302
+ merged.new_failures = existing.new_failures;
303
+ }
304
+
305
+ if (current.known_failures) {
306
+ merged.known_failures = current.known_failures;
307
+ } else if (existing.known_failures) {
308
+ merged.known_failures = existing.known_failures;
309
+ }
310
+
311
+ return merged;
161
312
  }
162
313
 
163
314
  /**
@@ -559,6 +710,26 @@ class BugzyReporter implements Reporter {
559
710
  // Merge with existing manifest data
560
711
  const merged = mergeManifests(existingManifest, currentManifest);
561
712
 
713
+ // Classify failures as new vs known
714
+ if (merged.stats.failed > 0) {
715
+ try {
716
+ const testRunsRoot = path.join(process.cwd(), 'test-runs');
717
+ const { newFailures, knownFailures } = classifyFailures(merged, testRunsRoot);
718
+ if (newFailures.length > 0) {
719
+ merged.new_failures = newFailures;
720
+ }
721
+ if (knownFailures.length > 0) {
722
+ merged.known_failures = knownFailures;
723
+ }
724
+
725
+ console.log(`\nšŸ” Failure Classification:`);
726
+ console.log(` New failures: ${newFailures.length}`);
727
+ console.log(` Known failures: ${knownFailures.length}`);
728
+ } catch (err) {
729
+ console.warn(`āš ļø Could not classify failures: ${err}`);
730
+ }
731
+ }
732
+
562
733
  // Write atomically (temp file + rename)
563
734
  const tmpPath = manifestPath + '.tmp';
564
735
  fs.writeFileSync(tmpPath, JSON.stringify(merged, null, 2));