aigent-team 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +253 -0
  3. package/dist/chunk-N3RYHWTR.js +267 -0
  4. package/dist/cli.js +576 -0
  5. package/dist/index.d.ts +234 -0
  6. package/dist/index.js +27 -0
  7. package/package.json +67 -0
  8. package/templates/shared/git-workflow.md +44 -0
  9. package/templates/shared/project-conventions.md +48 -0
  10. package/templates/teams/ba/agent.yaml +25 -0
  11. package/templates/teams/ba/references/acceptance-criteria.md +87 -0
  12. package/templates/teams/ba/references/api-contract-design.md +110 -0
  13. package/templates/teams/ba/references/requirements-analysis.md +83 -0
  14. package/templates/teams/ba/references/user-story-mapping.md +73 -0
  15. package/templates/teams/ba/skill.md +85 -0
  16. package/templates/teams/be/agent.yaml +34 -0
  17. package/templates/teams/be/conventions.md +102 -0
  18. package/templates/teams/be/references/api-design.md +91 -0
  19. package/templates/teams/be/references/async-processing.md +86 -0
  20. package/templates/teams/be/references/auth-security.md +58 -0
  21. package/templates/teams/be/references/caching.md +79 -0
  22. package/templates/teams/be/references/database.md +65 -0
  23. package/templates/teams/be/references/error-handling.md +106 -0
  24. package/templates/teams/be/references/observability.md +83 -0
  25. package/templates/teams/be/references/review-checklist.md +50 -0
  26. package/templates/teams/be/references/testing.md +100 -0
  27. package/templates/teams/be/review-checklist.md +54 -0
  28. package/templates/teams/be/skill.md +71 -0
  29. package/templates/teams/devops/agent.yaml +35 -0
  30. package/templates/teams/devops/conventions.md +133 -0
  31. package/templates/teams/devops/references/ci-cd.md +218 -0
  32. package/templates/teams/devops/references/cost-optimization.md +218 -0
  33. package/templates/teams/devops/references/disaster-recovery.md +199 -0
  34. package/templates/teams/devops/references/docker.md +237 -0
  35. package/templates/teams/devops/references/infrastructure-as-code.md +238 -0
  36. package/templates/teams/devops/references/kubernetes.md +397 -0
  37. package/templates/teams/devops/references/monitoring.md +224 -0
  38. package/templates/teams/devops/references/review-checklist.md +149 -0
  39. package/templates/teams/devops/references/security.md +225 -0
  40. package/templates/teams/devops/review-checklist.md +72 -0
  41. package/templates/teams/devops/skill.md +131 -0
  42. package/templates/teams/fe/agent.yaml +28 -0
  43. package/templates/teams/fe/conventions.md +80 -0
  44. package/templates/teams/fe/references/accessibility.md +92 -0
  45. package/templates/teams/fe/references/component-architecture.md +87 -0
  46. package/templates/teams/fe/references/css-styling.md +89 -0
  47. package/templates/teams/fe/references/forms.md +73 -0
  48. package/templates/teams/fe/references/performance.md +104 -0
  49. package/templates/teams/fe/references/review-checklist.md +51 -0
  50. package/templates/teams/fe/references/security.md +90 -0
  51. package/templates/teams/fe/references/state-management.md +117 -0
  52. package/templates/teams/fe/references/testing.md +112 -0
  53. package/templates/teams/fe/review-checklist.md +53 -0
  54. package/templates/teams/fe/skill.md +68 -0
  55. package/templates/teams/lead/agent.yaml +18 -0
  56. package/templates/teams/lead/references/cross-team-coordination.md +68 -0
  57. package/templates/teams/lead/references/quality-gates.md +64 -0
  58. package/templates/teams/lead/references/task-decomposition.md +69 -0
  59. package/templates/teams/lead/skill.md +83 -0
  60. package/templates/teams/qa/agent.yaml +32 -0
  61. package/templates/teams/qa/conventions.md +130 -0
  62. package/templates/teams/qa/references/ci-integration.md +337 -0
  63. package/templates/teams/qa/references/e2e-testing.md +292 -0
  64. package/templates/teams/qa/references/mocking.md +249 -0
  65. package/templates/teams/qa/references/performance-testing.md +288 -0
  66. package/templates/teams/qa/references/review-checklist.md +143 -0
  67. package/templates/teams/qa/references/security-testing.md +271 -0
  68. package/templates/teams/qa/references/test-data.md +275 -0
  69. package/templates/teams/qa/references/test-strategy.md +192 -0
  70. package/templates/teams/qa/review-checklist.md +53 -0
  71. package/templates/teams/qa/skill.md +131 -0
@@ -0,0 +1,275 @@
1
+ # Test Data Reference
2
+
3
+ ## Factory Pattern
4
+
5
+ Factories produce valid test objects with sensible defaults. Override only
6
+ what matters for each test — this makes tests self-documenting.
7
+
8
+ ### TypeScript Factory (with Faker)
9
+
10
+ ```typescript
11
+ // tests/factories/user.factory.ts
12
+ import { faker } from '@faker-js/faker';
13
+
14
+ interface User {
15
+ id: string;
16
+ email: string;
17
+ name: string;
18
+ role: 'admin' | 'member' | 'viewer';
19
+ createdAt: Date;
20
+ }
21
+
22
+ export function buildUser(overrides: Partial<User> = {}): User {
23
+ return {
24
+ id: faker.string.uuid(),
25
+ email: faker.internet.email(),
26
+ name: faker.person.fullName(),
27
+ role: 'member',
28
+ createdAt: faker.date.recent({ days: 30 }),
29
+ ...overrides,
30
+ };
31
+ }
32
+
33
+ // Usage in test:
34
+ const admin = buildUser({ role: 'admin' });
35
+ const viewer = buildUser({ role: 'viewer', email: 'viewer@test.com' });
36
+ ```
37
+
38
+ ### Related Object Factories
39
+
40
+ ```typescript
41
+ // tests/factories/order.factory.ts
42
+ import { faker } from '@faker-js/faker';
43
+ import { buildUser } from './user.factory';
44
+
45
+ interface OrderItem {
46
+ sku: string;
47
+ name: string;
48
+ quantity: number;
49
+ unitPrice: number;
50
+ }
51
+
52
+ interface Order {
53
+ id: string;
54
+ userId: string;
55
+ items: OrderItem[];
56
+ total: number;
57
+ status: 'pending' | 'paid' | 'shipped';
58
+ }
59
+
60
+ export function buildOrderItem(overrides: Partial<OrderItem> = {}): OrderItem {
61
+ const quantity = overrides.quantity ?? faker.number.int({ min: 1, max: 5 });
62
+ const unitPrice = overrides.unitPrice ?? parseFloat(faker.commerce.price({ min: 5, max: 200 }));
63
+ return {
64
+ sku: faker.string.alphanumeric(8).toUpperCase(),
65
+ name: faker.commerce.productName(),
66
+ quantity,
67
+ unitPrice,
68
+ ...overrides,
69
+ };
70
+ }
71
+
72
+ export function buildOrder(overrides: Partial<Order> = {}): Order {
73
+ const items = overrides.items ?? [buildOrderItem(), buildOrderItem()];
74
+ return {
75
+ id: faker.string.uuid(),
76
+ userId: buildUser().id,
77
+ items,
78
+ total: items.reduce((sum, i) => sum + i.quantity * i.unitPrice, 0),
79
+ status: 'pending',
80
+ ...overrides,
81
+ };
82
+ }
83
+ ```
84
+
85
+ ### Python Factory (with Faker)
86
+
87
+ ```python
88
+ # tests/factories/user_factory.py
89
+ from dataclasses import dataclass, field
90
+ from datetime import datetime
91
+ from faker import Faker
92
+ from uuid import uuid4
93
+
94
+ fake = Faker()
95
+
96
+ @dataclass
97
+ class UserFactory:
98
+ id: str = field(default_factory=lambda: str(uuid4()))
99
+ email: str = field(default_factory=fake.email)
100
+ name: str = field(default_factory=fake.name)
101
+ role: str = "member"
102
+ created_at: datetime = field(default_factory=datetime.now)
103
+
104
+ # Usage:
105
+ user = UserFactory()
106
+ admin = UserFactory(role="admin")
107
+ ```
108
+
109
+ ---
110
+
111
+ ## Faker for Realistic Data
112
+
113
+ ### Why Faker Over Hardcoded Values
114
+
115
+ - Hardcoded values hide implicit assumptions (e.g., "John" assumes ASCII names).
116
+ - Faker generates edge cases naturally (long names, special characters, Unicode).
117
+ - Deterministic seed for reproducibility:
118
+
119
+ ```typescript
120
+ import { faker } from '@faker-js/faker';
121
+
122
+ // Set seed for reproducible test runs when debugging
123
+ faker.seed(12345);
124
+ ```
125
+
126
+ ### Common Faker Patterns
127
+
128
+ ```typescript
129
+ faker.string.uuid() // IDs
130
+ faker.internet.email() // Email addresses
131
+ faker.person.fullName() // Display names
132
+ faker.lorem.paragraph() // Long text fields
133
+ faker.date.between({ from: '2024-01-01', to: '2024-12-31' }) // Date ranges
134
+ faker.number.int({ min: 1, max: 100 }) // Bounded integers
135
+ faker.helpers.arrayElement(['a', 'b']) // Random from set
136
+ faker.string.alphanumeric(10) // Tokens / codes
137
+ ```
138
+
139
+ ### Avoid
140
+
141
+ - `faker.lorem.sentence()` for fields with validation (use realistic values).
142
+ - Random data for fields your test asserts on (override explicitly).
143
+
144
+ ---
145
+
146
+ ## Database Isolation Strategies
147
+
148
+ Tests that share a database must not leak state. Choose one strategy:
149
+
150
+ ### 1. Transaction Rollback (fastest)
151
+
152
+ Wrap each test in a transaction and roll back after. No cleanup needed.
153
+
154
+ ```typescript
155
+ // tests/helpers/db.ts
156
+ import { db } from '../../src/db';
157
+
158
+ export async function withTransaction<T>(
159
+ fn: (trx: Transaction) => Promise<T>,
160
+ ): Promise<T> {
161
+ const trx = await db.transaction();
162
+ try {
163
+ const result = await fn(trx);
164
+ return result;
165
+ } finally {
166
+ await trx.rollback();
167
+ }
168
+ }
169
+
170
+ // In test:
171
+ test('creates order', async () => {
172
+ await withTransaction(async (trx) => {
173
+ const order = await createOrder(trx, buildOrder());
174
+ expect(order.status).toBe('pending');
175
+ // Transaction rolls back — no cleanup
176
+ });
177
+ });
178
+ ```
179
+
180
+ **Caveat**: Does not work if code under test manages its own transactions.
181
+
182
+ ### 2. Truncate Between Tests
183
+
184
+ Clear all tables before each test. Slower but simpler.
185
+
186
+ ```typescript
187
+ // tests/helpers/db.ts
188
+ const tables = ['orders', 'order_items', 'users'];
189
+
190
+ export async function truncateAll() {
191
+ // Disable FK checks, truncate, re-enable — one round trip
192
+ await db.raw(`
193
+ SET session_replication_role = 'replica';
194
+ ${tables.map(t => `TRUNCATE TABLE "${t}" CASCADE;`).join('\n')}
195
+ SET session_replication_role = 'origin';
196
+ `);
197
+ }
198
+
199
+ // vitest.setup.ts
200
+ beforeEach(async () => {
201
+ await truncateAll();
202
+ });
203
+ ```
204
+
205
+ ### 3. Unique IDs Per Test (no cleanup)
206
+
207
+ Each test uses unique identifiers. Data from other tests is invisible
208
+ because queries filter by test-specific IDs.
209
+
210
+ ```typescript
211
+ test('fetches user orders', async () => {
212
+ const userId = faker.string.uuid(); // unique to this test
213
+ await seedOrder({ userId, status: 'paid' });
214
+ await seedOrder({ userId, status: 'pending' });
215
+
216
+ const orders = await getOrdersByUser(userId);
217
+ expect(orders).toHaveLength(2);
218
+ // Other tests' orders are not returned
219
+ });
220
+ ```
221
+
222
+ Best for read-heavy tests. Does not prevent table bloat in long-running suites.
223
+
224
+ ### Comparison
225
+
226
+ | Strategy | Speed | Isolation | Complexity |
227
+ |---|---|---|---|
228
+ | Transaction rollback | Fastest | Strong | Medium (transaction passthrough) |
229
+ | Truncate | Slow | Strong | Low |
230
+ | Unique IDs | Fast | Partial | Low |
231
+
232
+ **Recommendation**: Transaction rollback for unit/integration tests. Truncate
233
+ for E2E against a real database. Unique IDs as a supplementary technique.
234
+
235
+ ---
236
+
237
+ ## Test Environment Parity
238
+
239
+ Tests must run against an environment that matches production as closely
240
+ as possible.
241
+
242
+ ### Database
243
+
244
+ - Same engine and version (PostgreSQL 16 in prod = PostgreSQL 16 in test).
245
+ - Use Docker Compose or Testcontainers for local/CI parity.
246
+ - Never substitute SQLite for PostgreSQL — query behaviour differs.
247
+
248
+ ```typescript
249
+ // testcontainers example
250
+ import { PostgreSqlContainer } from '@testcontainers/postgresql';
251
+
252
+ let container: StartedPostgreSqlContainer;
253
+
254
+ beforeAll(async () => {
255
+ container = await new PostgreSqlContainer('postgres:16')
256
+ .withDatabase('test')
257
+ .start();
258
+ process.env.DATABASE_URL = container.getConnectionUri();
259
+ }, 60_000);
260
+
261
+ afterAll(async () => {
262
+ await container.stop();
263
+ });
264
+ ```
265
+
266
+ ### External Services
267
+
268
+ - Mock external APIs with MSW (see `mocking.md`).
269
+ - For services you own, use contract tests (see `test-strategy.md`).
270
+ - Never call real third-party APIs in automated tests.
271
+
272
+ ### Configuration
273
+
274
+ - Use a dedicated `.env.test` file. Never share `.env` between dev and test.
275
+ - Ensure secrets in test config are test-only credentials, not production keys.
@@ -0,0 +1,192 @@
1
+ # Test Strategy Reference
2
+
3
+ ## Test Pyramid Enforcement
4
+
5
+ ### Target Ratios (by test count)
6
+
7
+ | Level | Share | Max execution time | Typical tools |
8
+ |---|---|---|---|
9
+ | Unit | 70 % | < 10 s total suite | Vitest, Jest, Pytest |
10
+ | Integration | 20 % | < 2 min total suite | Testing Library, Supertest, MSW |
11
+ | E2E | 10 % | < 10 min smoke, < 30 min full | Playwright |
12
+
13
+ ### Time Budgets
14
+
15
+ Every CI run has a wall-clock budget. If a level exceeds its budget,
16
+ optimise before adding more tests:
17
+
18
+ - **Unit**: 10 s. If exceeded, check for I/O leaks (unmocked network, file
19
+ system) or heavy setup. Vitest's `--reporter=verbose --bail=1` helps triage.
20
+ - **Integration**: 2 min. Parallelise with `--pool=forks`. Ensure database
21
+ truncation is batched, not per-test.
22
+ - **E2E smoke**: 10 min. Run only critical journeys. Full suite runs nightly.
23
+
24
+ ### Pyramid Inversion Detection
25
+
26
+ Run a quarterly audit:
27
+
28
+ ```bash
29
+ # Count tests per level (adjust globs to your project)
30
+ echo "Unit: $(find . -name '*.test.ts' -not -path '*/e2e/*' | xargs grep -c 'it\|test(' | awk -F: '{s+=$2} END{print s}')"
31
+ echo "Integration: $(find . -path '*/__tests__/integration/*' -name '*.test.ts' | xargs grep -c 'it\|test(' | awk -F: '{s+=$2} END{print s}')"
32
+ echo "E2E: $(find . -path '*/e2e/*' -name '*.spec.ts' | xargs grep -c 'it\|test(' | awk -F: '{s+=$2} END{print s}')"
33
+ ```
34
+
35
+ If E2E > 15 % of total test count, investigate which E2E tests duplicate
36
+ integration-level coverage and demote them.
37
+
38
+ ---
39
+
40
+ ## Risk-Based Testing Prioritisation
41
+
42
+ Not all features carry equal risk. Allocate test effort proportionally:
43
+
44
+ ### Risk Matrix
45
+
46
+ | Factor | Weight | Examples |
47
+ |---|---|---|
48
+ | Revenue impact | High | Checkout, payment, subscription |
49
+ | User frequency | High | Login, search, navigation |
50
+ | Data sensitivity | High | PII handling, auth, export |
51
+ | Change velocity | Medium | Features under active development |
52
+ | Complexity | Medium | State machines, concurrent workflows |
53
+ | Blast radius | Medium | Shared libraries, core APIs |
54
+
55
+ ### Prioritisation Process
56
+
57
+ 1. List all features/modules.
58
+ 2. Score each on the factors above (1-3 scale).
59
+ 3. Weighted total determines test investment tier:
60
+ - **Tier 1** (score >= 12): Full coverage — unit, integration, E2E, contract,
61
+ performance, security.
62
+ - **Tier 2** (score 7-11): Unit + integration + smoke E2E.
63
+ - **Tier 3** (score <= 6): Unit tests only; integration on critical paths.
64
+
65
+ ---
66
+
67
+ ## Contract Testing (Microservices)
68
+
69
+ When services communicate over HTTP/gRPC, use consumer-driven contract tests
70
+ to prevent integration breakage without running all services.
71
+
72
+ ### Pact Workflow
73
+
74
+ ```
75
+ Consumer writes contract (Pact test)
76
+ → Publishes pact to Pact Broker
77
+ → Provider verifies contract in its own CI
78
+ → Broker records verification result
79
+ → Consumer deploy gated on "can-i-deploy" check
80
+ ```
81
+
82
+ ### Consumer Side (TypeScript + Pact)
83
+
84
+ ```typescript
85
+ import { PactV3 } from '@pact-foundation/pact';
86
+
87
+ const provider = new PactV3({
88
+ consumer: 'OrderService',
89
+ provider: 'InventoryService',
90
+ });
91
+
92
+ describe('Inventory API contract', () => {
93
+ it('returns stock for a product', async () => {
94
+ await provider
95
+ .given('product ABC exists with stock 42')
96
+ .uponReceiving('a request for product stock')
97
+ .withRequest({ method: 'GET', path: '/api/inventory/ABC' })
98
+ .willRespondWith({
99
+ status: 200,
100
+ body: { productId: 'ABC', stock: 42 },
101
+ })
102
+ .executeTest(async (mockServer) => {
103
+ const res = await fetch(`${mockServer.url}/api/inventory/ABC`);
104
+ const data = await res.json();
105
+ expect(data.stock).toBe(42);
106
+ });
107
+ });
108
+ });
109
+ ```
110
+
111
+ ### Provider Side
112
+
113
+ ```typescript
114
+ const { Verifier } = require('@pact-foundation/pact');
115
+
116
+ new Verifier({
117
+ providerBaseUrl: 'http://localhost:3001',
118
+ pactBrokerUrl: process.env.PACT_BROKER_URL,
119
+ provider: 'InventoryService',
120
+ providerStatesSetupUrl: 'http://localhost:3001/_pact/setup',
121
+ }).verifyProvider();
122
+ ```
123
+
124
+ ---
125
+
126
+ ## Visual Regression Testing
127
+
128
+ Use Chromatic (Storybook) or Playwright screenshots for UI drift detection.
129
+
130
+ ### Strategy
131
+
132
+ - **Component level** (Chromatic): Every Storybook story is a visual test.
133
+ Run on every PR. Chromatic handles diffing and approval workflow.
134
+ - **Page level** (Playwright): Capture full-page screenshots for critical
135
+ pages. Compare against baselines.
136
+
137
+ ### Playwright Visual Comparison
138
+
139
+ ```typescript
140
+ test('homepage visual regression', async ({ page }) => {
141
+ await page.goto('/');
142
+ await expect(page).toHaveScreenshot('homepage.png', {
143
+ maxDiffPixelRatio: 0.01,
144
+ });
145
+ });
146
+ ```
147
+
148
+ ### Guidelines
149
+
150
+ - Always set a `maxDiffPixelRatio` or `maxDiffPixels` — pixel-perfect
151
+ comparison is too brittle across environments.
152
+ - Run visual tests in a single browser/OS combination in CI for consistency.
153
+ - Use `toHaveScreenshot` with `fullPage: true` for layout regression.
154
+ - Hide dynamic content (timestamps, avatars) with CSS or masking.
155
+
156
+ ---
157
+
158
+ ## Mutation Testing
159
+
160
+ Code coverage tells you what code was *executed* by tests. Mutation testing
161
+ tells you what code is actually *verified* by assertions.
162
+
163
+ ### How It Works
164
+
165
+ Stryker (JS/TS) or mutmut (Python) injects small changes ("mutants") into
166
+ your source code — flipping operators, removing calls, changing return values.
167
+ If your tests still pass, the mutant "survived" and your tests are weak.
168
+
169
+ ### Setup (Stryker)
170
+
171
+ ```json
172
+ // stryker.conf.json
173
+ {
174
+ "mutate": ["src/**/*.ts", "!src/**/*.test.ts"],
175
+ "testRunner": "vitest",
176
+ "reporters": ["html", "clear-text", "progress"],
177
+ "thresholds": { "high": 80, "low": 60, "break": 50 }
178
+ }
179
+ ```
180
+
181
+ ### Interpretation
182
+
183
+ | Metric | Target | Action if below |
184
+ |---|---|---|
185
+ | Mutation score | >= 70 % | Add assertions; tests execute code but don't verify it |
186
+ | Survived mutants | Review top 10 | Prioritise mutants in Tier 1 modules |
187
+
188
+ ### When to Run
189
+
190
+ - **Not on every PR** — mutation testing is slow (minutes to hours).
191
+ - Run nightly or weekly on critical modules.
192
+ - Gate releases on mutation score for Tier 1 code.
@@ -0,0 +1,53 @@
1
+ ### Test Level & Strategy
2
+ - [ ] Tests are at the correct pyramid level (not E2E for logic that could be unit tested)
3
+ - [ ] Risk-proportional coverage: high-risk paths (auth, payment, data mutation) have more tests
4
+ - [ ] Both happy path and failure paths are tested (not just "it works" but "it fails gracefully")
5
+ - [ ] Edge cases covered: null, empty, max-length, unicode, concurrent access, expired session
6
+
7
+ ### Test Quality
8
+ - [ ] Tests follow AAA pattern (Arrange-Act-Assert) with clear separation
9
+ - [ ] Test names describe expected behavior: `should {outcome} when {condition}`
10
+ - [ ] One logical assertion per test (no mega-tests that assert 10 unrelated things)
11
+ - [ ] Tests are independent — pass when run alone, in any order, in parallel
12
+ - [ ] No snapshot tests for HTML/JSON output (use visual regression for UI instead)
13
+ - [ ] Parameterized tests (`test.each`) used for data-driven scenarios instead of copy-paste
14
+
15
+ ### Test Data
16
+ - [ ] Uses factories/fixtures with realistic data (Faker, not `"test"/"test"`)
17
+ - [ ] No hardcoded IDs or magic values that depend on external state
18
+ - [ ] Test data is self-contained — each test creates what it needs, cleans up after
19
+ - [ ] No dependency on test execution order or shared mutable state
20
+
21
+ ### Mocking
22
+ - [ ] Mocks only at system boundaries (HTTP APIs, time, randomness) — not internal modules
23
+ - [ ] Mock behavior matches real API (response format, error codes, edge cases)
24
+ - [ ] No over-mocking — if >3 mocks are needed, consider an integration test instead
25
+ - [ ] MSW/nock used for HTTP mocking (not monkey-patching fetch/axios)
26
+
27
+ ### E2E Specific
28
+ - [ ] Page Object Model or component abstraction used (no raw selectors in tests)
29
+ - [ ] Elements selected by role/label/text — not CSS class, not complex XPath
30
+ - [ ] Explicit waits for conditions — no `sleep()`/`wait()` with fixed duration
31
+ - [ ] Test data setup via API/seed — not through UI interactions
32
+ - [ ] Passes 10/10 runs locally before merge (flakiness check)
33
+ - [ ] Screenshot/trace captured on failure for debugging
34
+ - [ ] Tests parallelizable — no shared accounts, no port conflicts
35
+
36
+ ### Flakiness Prevention
37
+ - [ ] No timing-dependent assertions (exact timestamps, animation states)
38
+ - [ ] No shared state between tests (global variables, database records, file system)
39
+ - [ ] Async operations properly awaited — no "fire and forget" in test code
40
+ - [ ] Retry logic only for eventually-consistent operations, not to mask bugs
41
+ - [ ] Environment-agnostic — passes on macOS, Linux, CI (timezone, locale, filesystem)
42
+
43
+ ### Performance Tests (if applicable)
44
+ - [ ] Pass/fail thresholds defined in the test script (not just "observe the numbers")
45
+ - [ ] Tests run against production-like environment with realistic data volume
46
+ - [ ] Baseline recorded for comparison
47
+ - [ ] All four load types covered: smoke, load, stress, soak
48
+
49
+ ### CI Integration
50
+ - [ ] New tests added to the correct CI stage (smoke vs regression vs nightly)
51
+ - [ ] Test execution time within budget (unit <5ms, integration <500ms, E2E <30s)
52
+ - [ ] Test report generated (JUnit XML / Allure) for CI visibility
53
+ - [ ] No `@skip` / `.only` / `xit` left in committed code
@@ -0,0 +1,131 @@
1
+ # QA Engineer — Skill Index
2
+
3
+ You are a senior QA engineer. You think in terms of risk, confidence, and
4
+ feedback speed — not "more tests." Every test you write or review must justify
5
+ its existence by catching a class of bugs that no other test already covers.
6
+
7
+ ---
8
+
9
+ ## Core Principles
10
+
11
+ 1. **Quality is everyone's job, QA owns the strategy.**
12
+ You do not gate-keep releases. You design the safety net and teach others to
13
+ use it. If a bug escapes, the first question is "why didn't our automation
14
+ catch it?" — not "who wrote the code?"
15
+
16
+ 2. **Respect the test pyramid.**
17
+ 70 % unit, 20 % integration, 10 % E2E — by count. Inversions are a smell.
18
+ E2E tests are expensive; each one must protect a critical user journey that
19
+ lower levels cannot cover.
20
+
21
+ 3. **Flaky tests are bugs — zero tolerance.**
22
+ A flaky test is worse than no test: it trains the team to ignore failures.
23
+ Quarantine immediately, fix within 48 h, or delete. Track flakiness rate as
24
+ a first-class metric.
25
+
26
+ 4. **Coverage is a vanity metric.**
27
+ High coverage with weak assertions proves nothing. Focus on mutation testing
28
+ score and defect escape rate instead.
29
+
30
+ 5. **Shift left.**
31
+ The cheapest bug is the one caught in a unit test during local development.
32
+ Invest in fast feedback: < 10 s for unit suite, < 2 min for integration,
33
+ < 10 min for smoke E2E.
34
+
35
+ 6. **Tests are production code.**
36
+ They deserve the same care: clear naming, no duplication, proper
37
+ abstractions (factories, page objects), and code review.
38
+
39
+ 7. **Test behaviour, not implementation.**
40
+ If refactoring the source without changing behaviour breaks a test, that
41
+ test is coupled to implementation and must be rewritten.
42
+
43
+ ---
44
+
45
+ ## Anti-Patterns — Flag Immediately
46
+
47
+ | Anti-pattern | Why it's harmful | Fix |
48
+ |---|---|---|
49
+ | Testing implementation details | Brittle; breaks on refactor | Assert on outputs and side effects only |
50
+ | Over-mocking | Test proves nothing about real system | Mock at system boundaries only (HTTP, clock, random) |
51
+ | `sleep()` / fixed delays in E2E | Flaky, slow | Use explicit waits / `waitFor` / Playwright auto-wait |
52
+ | Snapshot tests for logic | Fail on any change, nobody reads diffs | Use targeted assertions |
53
+ | Shared mutable test state | Order-dependent failures | Isolate via `beforeEach`, factories, transactions |
54
+ | Test ordering dependency | Hidden coupling | Each test must pass in isolation; randomize order |
55
+ | Ignoring test performance | CI becomes bottleneck | Budget time per level; parallelize |
56
+ | Copy-paste test setup | Maintenance nightmare | Extract factories and helpers |
57
+
58
+ ---
59
+
60
+ ## Decision Framework — Choosing the Right Test Level
61
+
62
+ ```
63
+ Is the logic pure computation (no I/O, no DOM)?
64
+ YES → Unit test (Vitest / Jest / Pytest)
65
+
66
+ Does it involve multiple modules collaborating?
67
+ YES → Is there a network boundary?
68
+ YES → Contract test (Pact) + integration test with MSW
69
+ NO → Integration test (Testing Library + Vitest)
70
+
71
+ Is it a critical user journey (login, checkout, payment)?
72
+ YES → E2E test (Playwright)
73
+ NO → Can you cover it with an integration test?
74
+ YES → Integration test
75
+ NO → E2E test, but justify it in a comment
76
+ ```
77
+
78
+ When in doubt, start at the lowest level that can catch the bug.
79
+
80
+ ---
81
+
82
+ ## Reference Files
83
+
84
+ Read these on demand — not every task requires every file.
85
+
86
+ | Reference | When to read |
87
+ |---|---|
88
+ | [test-strategy.md](references/test-strategy.md) | Planning a new test suite, reviewing pyramid balance, setting up mutation testing or visual regression |
89
+ | [e2e-testing.md](references/e2e-testing.md) | Writing or reviewing Playwright/Cypress tests, debugging flakiness, setting up Page Object Model |
90
+ | [test-data.md](references/test-data.md) | Building factories, seeding databases, solving test isolation problems |
91
+ | [mocking.md](references/mocking.md) | Setting up MSW, deciding what to mock, verifying mock contracts |
92
+ | [performance-testing.md](references/performance-testing.md) | Writing k6 scripts, defining load scenarios, setting thresholds |
93
+ | [security-testing.md](references/security-testing.md) | OWASP checks, setting up ZAP, verifying rate limits and auth |
94
+ | [ci-integration.md](references/ci-integration.md) | Configuring pipeline stages, test reporting, flakiness tracking |
95
+ | [review-checklist.md](references/review-checklist.md) | Reviewing any PR that touches test code |
96
+
97
+ ---
98
+
99
+ ## Workflow Index
100
+
101
+ ### Writing a new test
102
+ 1. Determine test level using the decision framework above.
103
+ 2. Read the relevant reference file for that level.
104
+ 3. Use factories from `test-data.md` for data setup.
105
+ 4. Follow the review checklist before submitting.
106
+
107
+ ### Investigating a flaky test
108
+ 1. Read `e2e-testing.md` > Flakiness Prevention.
109
+ 2. Check for anti-patterns in the table above.
110
+ 3. Reproduce locally with `--repeat-each=20` (Playwright) or loop.
111
+ 4. Fix root cause; never add retries as a permanent solution.
112
+
113
+ ### Setting up CI test pipeline
114
+ 1. Read `ci-integration.md` for stage design and time budgets.
115
+ 2. Read `test-strategy.md` for pyramid enforcement.
116
+ 3. Configure reporting per `ci-integration.md` > Test Reporting.
117
+
118
+ ### Performance test planning
119
+ 1. Read `performance-testing.md` for scenario types and script templates.
120
+ 2. Define thresholds based on SLOs.
121
+ 3. Run against a production-like environment only.
122
+
123
+ ### Security audit
124
+ 1. Read `security-testing.md` for OWASP checklist.
125
+ 2. Run automated DAST scan with ZAP.
126
+ 3. Manual verification for auth bypass and IDOR.
127
+
128
+ ### Reviewing test code
129
+ 1. Open `review-checklist.md` and walk through every section.
130
+ 2. Flag any anti-pattern from the table above.
131
+ 3. Verify test level matches the decision framework.