agentic-team-templates 0.4.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,557 @@
1
+ # Test Reliability
2
+
3
+ Guidelines for preventing flaky tests and ensuring deterministic test execution.
4
+
5
+ ## What is a Flaky Test?
6
+
7
+ A flaky test passes and fails intermittently without code changes. Flaky tests:
8
+ - Erode confidence in the test suite
9
+ - Waste developer time investigating false failures
10
+ - Lead to ignored failures that hide real bugs
11
+ - Slow down CI/CD pipelines with retries
12
+
13
+ **Goal**: Zero flaky tests in CI.
14
+
15
+ ## Common Causes and Solutions
16
+
17
+ ### 1. Timing Dependencies
18
+
19
+ **Problem**: Tests depend on specific timing that varies between runs.
20
+
21
+ ```ts
22
+ // Bad: Fixed sleep that may not be enough
23
+ await page.click('button');
24
+ await sleep(1000);
25
+ expect(await page.textContent('h1')).toBe('Success');
26
+
27
+ // Bad: Checking too quickly
28
+ const result = startBackgroundJob();
29
+ expect(result.status).toBe('complete'); // Not done yet!
30
+ ```
31
+
32
+ **Solution**: Use explicit waits and polling.
33
+
34
+ ```ts
35
+ // Good: Wait for condition
36
+ await page.click('button');
37
+ await expect(page.locator('h1')).toHaveText('Success', { timeout: 5000 });
38
+
39
+ // Good: Poll for completion
40
+ const result = await waitFor(
41
+ async () => {
42
+ const job = await getJob(jobId);
43
+ return job.status === 'complete' ? job : null;
44
+ },
45
+ { timeout: 10000, interval: 100 }
46
+ );
47
+ expect(result.status).toBe('complete');
48
+ ```
49
+
50
+ ```ts
51
+ // Utility: Generic polling function
52
+ async function waitFor<T>(
53
+ condition: () => Promise<T | null>,
54
+ options: { timeout: number; interval: number }
55
+ ): Promise<T> {
56
+ const start = Date.now();
57
+
58
+ while (Date.now() - start < options.timeout) {
59
+ const result = await condition();
60
+ if (result !== null) return result;
61
+ await sleep(options.interval);
62
+ }
63
+
64
+ throw new Error(`Condition not met within ${options.timeout}ms`);
65
+ }
66
+ ```
67
+
68
+ ### 2. Shared State
69
+
70
+ **Problem**: Tests affect each other through shared resources.
71
+
72
+ ```ts
73
+ // Bad: Shared counter
74
+ let testCounter = 0;
75
+
76
+ it('test 1', () => {
77
+ testCounter++;
78
+ expect(testCounter).toBe(1);
79
+ });
80
+
81
+ it('test 2', () => {
82
+ expect(testCounter).toBe(0); // Fails!
83
+ });
84
+
85
+ // Bad: Shared database state
86
+ it('creates user', async () => {
87
+ await db.user.create({ data: { email: 'test@example.com' } });
88
+ });
89
+
90
+ it('gets all users', async () => {
91
+ const users = await db.user.findMany();
92
+ expect(users).toHaveLength(0); // Fails - leftover from previous test
93
+ });
94
+ ```
95
+
96
+ **Solution**: Isolate test state.
97
+
98
+ ```ts
99
+ // Good: Fresh state per test
100
+ describe('Counter', () => {
101
+ let counter: Counter;
102
+
103
+ beforeEach(() => {
104
+ counter = new Counter();
105
+ });
106
+
107
+ it('starts at zero', () => {
108
+ expect(counter.value).toBe(0);
109
+ });
110
+
111
+ it('increments', () => {
112
+ counter.increment();
113
+ expect(counter.value).toBe(1);
114
+ });
115
+ });
116
+
117
+ // Good: Clean database per test
118
+ beforeEach(async () => {
119
+ await db.$transaction([
120
+ db.user.deleteMany(),
121
+ db.order.deleteMany(),
122
+ ]);
123
+ });
124
+ ```
125
+
126
+ ### 3. Test Order Dependency
127
+
128
+ **Problem**: Tests must run in specific order to pass.
129
+
130
+ ```ts
131
+ // Bad: Test 2 depends on test 1
132
+ let createdUserId: string;
133
+
134
+ it('creates user', async () => {
135
+ const user = await createUser({ name: 'Test' });
136
+ createdUserId = user.id;
137
+ });
138
+
139
+ it('updates user', async () => {
140
+ await updateUser(createdUserId, { name: 'Updated' }); // Depends on previous test
141
+ });
142
+ ```
143
+
144
+ **Solution**: Each test is self-contained.
145
+
146
+ ```ts
147
+ // Good: Independent tests
148
+ it('creates user', async () => {
149
+ const user = await createUser({ name: 'Test' });
150
+ expect(user.id).toBeDefined();
151
+ });
152
+
153
+ it('updates user', async () => {
154
+ const user = await createUser({ name: 'Test' });
155
+ const updated = await updateUser(user.id, { name: 'Updated' });
156
+ expect(updated.name).toBe('Updated');
157
+ });
158
+ ```
159
+
160
+ ```ts
161
+ // Config: Randomize test order to catch dependencies
162
+ // vitest.config.ts
163
+ export default {
164
+ test: {
165
+ sequence: {
166
+ shuffle: true,
167
+ },
168
+ },
169
+ };
170
+ ```
171
+
172
+ ### 4. Time-Dependent Code
173
+
174
+ **Problem**: Tests depend on current time.
175
+
176
+ ```ts
177
+ // Bad: Depends on wall clock
178
+ it('generates expiry date', () => {
179
+ const token = createToken();
180
+ expect(token.expiresAt).toBeInstanceOf(Date); // Different every run
181
+ });
182
+
183
+ // Bad: Time-based logic
184
+ it('shows expired message', () => {
185
+ const subscription = createSubscription({ expiresAt: yesterday() });
186
+ expect(subscription.isExpired).toBe(true); // Depends on "now"
187
+ });
188
+ ```
189
+
190
+ **Solution**: Mock time.
191
+
192
+ ```ts
193
+ // Good: Controlled time
194
+ import { vi } from 'vitest';
195
+
196
+ it('generates expiry date', () => {
197
+ vi.useFakeTimers();
198
+ vi.setSystemTime(new Date('2025-01-01T00:00:00Z'));
199
+
200
+ const token = createToken({ expiresIn: '1h' });
201
+
202
+ expect(token.expiresAt).toEqual(new Date('2025-01-01T01:00:00Z'));
203
+
204
+ vi.useRealTimers();
205
+ });
206
+
207
+ // Good: Time as parameter
208
+ it('shows expired message', () => {
209
+ const now = new Date('2025-01-01');
210
+ const subscription = createSubscription({
211
+ expiresAt: new Date('2024-12-31')
212
+ });
213
+
214
+ expect(subscription.isExpiredAt(now)).toBe(true);
215
+ });
216
+ ```
217
+
218
+ ### 5. Random Data Without Seeds
219
+
220
+ **Problem**: Random test data causes inconsistent behavior.
221
+
222
+ ```ts
223
+ // Bad: Random failures
224
+ it('processes order', () => {
225
+ const order = {
226
+ items: Array.from({ length: Math.random() * 10 }, () => createItem()),
227
+ };
228
+ expect(processOrder(order).total).toBeGreaterThan(0);
229
+ // Sometimes 0 items!
230
+ });
231
+ ```
232
+
233
+ **Solution**: Seed random generators or use deterministic factories.
234
+
235
+ ```ts
236
+ // Good: Seeded randomness
237
+ import { faker } from '@faker-js/faker';
238
+
239
+ faker.seed(12345); // Same sequence every run
240
+
241
+ it('processes order', () => {
242
+ const order = createOrder(); // Uses seeded faker
243
+ expect(processOrder(order).total).toBeGreaterThan(0);
244
+ });
245
+
246
+ // Good: Explicit data
247
+ it('processes order', () => {
248
+ const order = createOrder({
249
+ items: [
250
+ { price: 100, quantity: 2 },
251
+ { price: 50, quantity: 1 },
252
+ ],
253
+ });
254
+ expect(processOrder(order).total).toBe(250);
255
+ });
256
+ ```
257
+
258
+ ### 6. Network Dependencies
259
+
260
+ **Problem**: Tests depend on external services.
261
+
262
+ ```ts
263
+ // Bad: Hits real API
264
+ it('fetches weather', async () => {
265
+ const weather = await fetchWeather('NYC');
266
+ expect(weather.temp).toBeGreaterThan(-50); // API might be down
267
+ });
268
+ ```
269
+
270
+ **Solution**: Mock external services.
271
+
272
+ ```ts
273
+ // Good: Mock at network level with MSW
274
+ import { http, HttpResponse } from 'msw';
275
+ import { setupServer } from 'msw/node';
276
+
277
+ const server = setupServer(
278
+ http.get('https://api.weather.com/current', () => {
279
+ return HttpResponse.json({ temp: 72, condition: 'sunny' });
280
+ })
281
+ );
282
+
283
+ beforeAll(() => server.listen());
284
+ afterEach(() => server.resetHandlers());
285
+ afterAll(() => server.close());
286
+
287
+ it('fetches weather', async () => {
288
+ const weather = await fetchWeather('NYC');
289
+ expect(weather.temp).toBe(72);
290
+ });
291
+ ```
292
+
293
+ ### 7. Resource Leaks
294
+
295
+ **Problem**: Resources not cleaned up between tests.
296
+
297
+ ```ts
298
+ // Bad: Database connections accumulate
299
+ let connection: Connection;
300
+
301
+ beforeEach(() => {
302
+ connection = createConnection();
303
+ });
304
+
305
+ it('test 1', async () => {
306
+ await connection.query('SELECT 1');
307
+ });
308
+
309
+ // Connection leak! Never closed
310
+ ```
311
+
312
+ **Solution**: Proper cleanup.
313
+
314
+ ```ts
315
+ // Good: Cleanup in afterEach
316
+ let connection: Connection;
317
+
318
+ beforeEach(() => {
319
+ connection = createConnection();
320
+ });
321
+
322
+ afterEach(async () => {
323
+ await connection.close();
324
+ });
325
+
326
+ // Good: Use try/finally in test
327
+ it('test with resource', async () => {
328
+ const connection = createConnection();
329
+ try {
330
+ await connection.query('SELECT 1');
331
+ } finally {
332
+ await connection.close();
333
+ }
334
+ });
335
+
336
+ // Good: Use disposal pattern
337
+ it('test with resource', async () => {
338
+ await using connection = createConnection();
339
+ await connection.query('SELECT 1');
340
+ // Automatically disposed
341
+ });
342
+ ```
343
+
344
+ ### 8. Race Conditions
345
+
346
+ **Problem**: Concurrent operations with non-deterministic ordering.
347
+
348
+ ```ts
349
+ // Bad: Parallel operations may interleave
350
+ it('counts correctly', async () => {
351
+ const counter = new AtomicCounter();
352
+
353
+ await Promise.all([
354
+ counter.increment(),
355
+ counter.increment(),
356
+ counter.increment(),
357
+ ]);
358
+
359
+ expect(counter.value).toBe(3); // May fail due to race
360
+ });
361
+ ```
362
+
363
+ **Solution**: Proper synchronization or sequential execution.
364
+
365
+ ```ts
366
+ // Good: Sequential operations
367
+ it('counts correctly', async () => {
368
+ const counter = new AtomicCounter();
369
+
370
+ await counter.increment();
371
+ await counter.increment();
372
+ await counter.increment();
373
+
374
+ expect(counter.value).toBe(3);
375
+ });
376
+
377
+ // Good: Proper atomic implementation
378
+ class AtomicCounter {
379
+ private value = 0;
380
+ private mutex = new Mutex();
381
+
382
+ async increment(): Promise<void> {
383
+ await this.mutex.runExclusive(() => {
384
+ this.value++;
385
+ });
386
+ }
387
+ }
388
+ ```
389
+
390
+ ## Detecting Flaky Tests
391
+
392
+ ### Repeat Runs
393
+
394
+ ```bash
395
+ # Run tests multiple times
396
+ for i in {1..10}; do npm test; done
397
+
398
+ # Or with vitest
399
+ npx vitest --repeat 10
400
+ ```
401
+
402
+ ### CI Configuration
403
+
404
+ ```yaml
405
+ # Retry and track
406
+ - name: Run Tests
407
+ run: npm test
408
+ env:
409
+ VITEST_RETRY: 2
410
+
411
+ - name: Upload Flaky Report
412
+ if: always()
413
+ uses: actions/upload-artifact@v4
414
+ with:
415
+ name: flaky-tests
416
+ path: flaky-tests.json
417
+ ```
418
+
419
+ ### Quarantine System
420
+
421
+ ```ts
422
+ // Quarantine flaky tests
423
+ it.skip.if(process.env.QUARANTINE === 'true')(
424
+ 'sometimes fails - JIRA-123',
425
+ async () => {
426
+ // Flaky test code
427
+ }
428
+ );
429
+
430
+ // Or use a custom decorator
431
+ describe('OrderService', () => {
432
+ quarantined('sometimes fails due to race condition', async () => {
433
+ // Known flaky test
434
+ });
435
+ });
436
+
437
+ function quarantined(name: string, fn: () => Promise<void>) {
438
+ if (process.env.RUN_QUARANTINED === 'true') {
439
+ it(name, fn);
440
+ } else {
441
+ it.skip(`[QUARANTINED] ${name}`, fn);
442
+ }
443
+ }
444
+ ```
445
+
446
+ ## Test Isolation Strategies
447
+
448
+ ### 1. Database Per Test
449
+
450
+ ```ts
451
+ // Use transactions that rollback
452
+ beforeEach(async () => {
453
+ await db.$executeRaw`BEGIN`;
454
+ });
455
+
456
+ afterEach(async () => {
457
+ await db.$executeRaw`ROLLBACK`;
458
+ });
459
+ ```
460
+
461
+ ### 2. Containerized Databases
462
+
463
+ ```ts
464
+ // testcontainers for isolated databases
465
+ import { PostgreSqlContainer } from '@testcontainers/postgresql';
466
+
467
+ let container: PostgreSqlContainer;
468
+
469
+ beforeAll(async () => {
470
+ container = await new PostgreSqlContainer().start();
471
+ process.env.DATABASE_URL = container.getConnectionUri();
472
+ });
473
+
474
+ afterAll(async () => {
475
+ await container.stop();
476
+ });
477
+ ```
478
+
479
+ ### 3. Parallel Test Workers
480
+
481
+ ```ts
482
+ // vitest.config.ts - isolate workers
483
+ export default {
484
+ test: {
485
+ pool: 'forks', // True process isolation
486
+ poolOptions: {
487
+ forks: {
488
+ singleFork: false,
489
+ },
490
+ },
491
+ // Or use threads with isolation
492
+ isolate: true,
493
+ },
494
+ };
495
+ ```
496
+
497
+ ## Debugging Flaky Tests
498
+
499
+ ### Add Logging
500
+
501
+ ```ts
502
+ it('sometimes fails', async () => {
503
+ console.log('Test started at:', new Date().toISOString());
504
+ console.log('Environment:', process.env.NODE_ENV);
505
+
506
+ const result = await doSomething();
507
+ console.log('Result:', JSON.stringify(result));
508
+
509
+ expect(result.success).toBe(true);
510
+ });
511
+ ```
512
+
513
+ ### Capture Screenshots (E2E)
514
+
515
+ ```ts
516
+ import { test } from '@playwright/test';
517
+
518
+ test('checkout flow', async ({ page }) => {
519
+ await page.goto('/checkout');
520
+
521
+ // Capture state before assertion
522
+ await page.screenshot({ path: 'debug/before-submit.png' });
523
+
524
+ await page.click('button[type="submit"]');
525
+
526
+ // Capture state after action
527
+ await page.screenshot({ path: 'debug/after-submit.png' });
528
+
529
+ await expect(page.locator('h1')).toHaveText('Success');
530
+ });
531
+ ```
532
+
533
+ ### Record Test Execution
534
+
535
+ ```ts
536
+ // Trace mode for debugging
537
+ // playwright.config.ts
538
+ export default {
539
+ use: {
540
+ trace: 'retain-on-failure',
541
+ video: 'retain-on-failure',
542
+ },
543
+ };
544
+ ```
545
+
546
+ ## Best Practices Summary
547
+
548
+ | Practice | Implementation |
549
+ |----------|----------------|
550
+ | Avoid sleep() | Use explicit waits |
551
+ | Isolate state | Fresh setup per test |
552
+ | Mock external services | Use MSW or similar |
553
+ | Control time | Use vi.useFakeTimers() |
554
+ | Seed random data | faker.seed() |
555
+ | Clean up resources | afterEach cleanup |
556
+ | Randomize order | sequence.shuffle: true |
557
+ | Track flaky tests | Quarantine and fix |