aigent-team 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +253 -0
- package/dist/chunk-N3RYHWTR.js +267 -0
- package/dist/cli.js +576 -0
- package/dist/index.d.ts +234 -0
- package/dist/index.js +27 -0
- package/package.json +67 -0
- package/templates/shared/git-workflow.md +44 -0
- package/templates/shared/project-conventions.md +48 -0
- package/templates/teams/ba/agent.yaml +25 -0
- package/templates/teams/ba/references/acceptance-criteria.md +87 -0
- package/templates/teams/ba/references/api-contract-design.md +110 -0
- package/templates/teams/ba/references/requirements-analysis.md +83 -0
- package/templates/teams/ba/references/user-story-mapping.md +73 -0
- package/templates/teams/ba/skill.md +85 -0
- package/templates/teams/be/agent.yaml +34 -0
- package/templates/teams/be/conventions.md +102 -0
- package/templates/teams/be/references/api-design.md +91 -0
- package/templates/teams/be/references/async-processing.md +86 -0
- package/templates/teams/be/references/auth-security.md +58 -0
- package/templates/teams/be/references/caching.md +79 -0
- package/templates/teams/be/references/database.md +65 -0
- package/templates/teams/be/references/error-handling.md +106 -0
- package/templates/teams/be/references/observability.md +83 -0
- package/templates/teams/be/references/review-checklist.md +50 -0
- package/templates/teams/be/references/testing.md +100 -0
- package/templates/teams/be/review-checklist.md +54 -0
- package/templates/teams/be/skill.md +71 -0
- package/templates/teams/devops/agent.yaml +35 -0
- package/templates/teams/devops/conventions.md +133 -0
- package/templates/teams/devops/references/ci-cd.md +218 -0
- package/templates/teams/devops/references/cost-optimization.md +218 -0
- package/templates/teams/devops/references/disaster-recovery.md +199 -0
- package/templates/teams/devops/references/docker.md +237 -0
- package/templates/teams/devops/references/infrastructure-as-code.md +238 -0
- package/templates/teams/devops/references/kubernetes.md +397 -0
- package/templates/teams/devops/references/monitoring.md +224 -0
- package/templates/teams/devops/references/review-checklist.md +149 -0
- package/templates/teams/devops/references/security.md +225 -0
- package/templates/teams/devops/review-checklist.md +72 -0
- package/templates/teams/devops/skill.md +131 -0
- package/templates/teams/fe/agent.yaml +28 -0
- package/templates/teams/fe/conventions.md +80 -0
- package/templates/teams/fe/references/accessibility.md +92 -0
- package/templates/teams/fe/references/component-architecture.md +87 -0
- package/templates/teams/fe/references/css-styling.md +89 -0
- package/templates/teams/fe/references/forms.md +73 -0
- package/templates/teams/fe/references/performance.md +104 -0
- package/templates/teams/fe/references/review-checklist.md +51 -0
- package/templates/teams/fe/references/security.md +90 -0
- package/templates/teams/fe/references/state-management.md +117 -0
- package/templates/teams/fe/references/testing.md +112 -0
- package/templates/teams/fe/review-checklist.md +53 -0
- package/templates/teams/fe/skill.md +68 -0
- package/templates/teams/lead/agent.yaml +18 -0
- package/templates/teams/lead/references/cross-team-coordination.md +68 -0
- package/templates/teams/lead/references/quality-gates.md +64 -0
- package/templates/teams/lead/references/task-decomposition.md +69 -0
- package/templates/teams/lead/skill.md +83 -0
- package/templates/teams/qa/agent.yaml +32 -0
- package/templates/teams/qa/conventions.md +130 -0
- package/templates/teams/qa/references/ci-integration.md +337 -0
- package/templates/teams/qa/references/e2e-testing.md +292 -0
- package/templates/teams/qa/references/mocking.md +249 -0
- package/templates/teams/qa/references/performance-testing.md +288 -0
- package/templates/teams/qa/references/review-checklist.md +143 -0
- package/templates/teams/qa/references/security-testing.md +271 -0
- package/templates/teams/qa/references/test-data.md +275 -0
- package/templates/teams/qa/references/test-strategy.md +192 -0
- package/templates/teams/qa/review-checklist.md +53 -0
- package/templates/teams/qa/skill.md +131 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
id: qa
|
|
2
|
+
name: QA Agent
|
|
3
|
+
description: >
|
|
4
|
+
Senior QA engineer agent. Expert in test strategy, test pyramid optimization,
|
|
5
|
+
E2E automation, contract testing, performance testing, and shift-left quality.
|
|
6
|
+
role: qa
|
|
7
|
+
techStack:
|
|
8
|
+
languages: [TypeScript, JavaScript, Python]
|
|
9
|
+
frameworks: [Playwright, Cypress, Vitest, Jest, Pytest]
|
|
10
|
+
libraries: [MSW, Faker.js, Testing Library, Supertest, Pact, k6, Stryker, Chromatic]
|
|
11
|
+
buildTools: [Allure, GitHub Actions]
|
|
12
|
+
tools:
|
|
13
|
+
allowed: [Read, Write, Edit, Bash, Grep, Glob]
|
|
14
|
+
globs:
|
|
15
|
+
- "**/*.test.*"
|
|
16
|
+
- "**/*.spec.*"
|
|
17
|
+
- "**/*.e2e.*"
|
|
18
|
+
- "test/**/*"
|
|
19
|
+
- "tests/**/*"
|
|
20
|
+
- "e2e/**/*"
|
|
21
|
+
- "cypress/**/*"
|
|
22
|
+
- "playwright/**/*"
|
|
23
|
+
- "playwright.config.*"
|
|
24
|
+
- "__tests__/**/*"
|
|
25
|
+
- "**/fixtures/**/*"
|
|
26
|
+
- "**/factories/**/*"
|
|
27
|
+
- "vitest.config.*"
|
|
28
|
+
- "jest.config.*"
|
|
29
|
+
- "cypress.config.*"
|
|
30
|
+
sharedKnowledge:
|
|
31
|
+
- project-conventions
|
|
32
|
+
- git-workflow
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
## Test Pyramid Enforcement
|
|
2
|
+
|
|
3
|
+
- Ratio target: ~70% unit, ~20% integration, ~10% E2E. Measure quarterly.
|
|
4
|
+
- If adding an E2E test, ask: "Can this be caught at a lower level?" If yes, write the lower-level test instead.
|
|
5
|
+
- E2E tests are reserved for critical user journeys only — login, checkout, core CRUD flow. Not for edge cases.
|
|
6
|
+
- Each level has a time budget: unit tests <5ms each, integration <500ms, E2E <30s. Tests exceeding budget must be optimized or re-leveled.
|
|
7
|
+
|
|
8
|
+
## Test Structure
|
|
9
|
+
|
|
10
|
+
- Every test follows AAA (Arrange-Act-Assert) with clear visual separation:
|
|
11
|
+
```typescript
|
|
12
|
+
it('should return 404 when user does not exist', async () => {
|
|
13
|
+
// Arrange
|
|
14
|
+
const nonExistentId = 'user_999';
|
|
15
|
+
|
|
16
|
+
// Act
|
|
17
|
+
const response = await api.get(`/users/${nonExistentId}`);
|
|
18
|
+
|
|
19
|
+
// Assert
|
|
20
|
+
expect(response.status).toBe(404);
|
|
21
|
+
expect(response.body.error.code).toBe('USER_NOT_FOUND');
|
|
22
|
+
});
|
|
23
|
+
```
|
|
24
|
+
- One logical assertion per test. Multiple `expect` calls are fine if they assert on the same behavior (e.g., checking status code AND response body).
|
|
25
|
+
- Test names must describe expected behavior: `should {expected outcome} when {condition}`. Not `test1`, not `works correctly`, not `user endpoint`.
|
|
26
|
+
- Group related tests with `describe` blocks that name the unit under test:
|
|
27
|
+
```
|
|
28
|
+
describe('UserService.createUser', () => {
|
|
29
|
+
it('should create user and return profile', ...);
|
|
30
|
+
it('should throw ConflictError when email exists', ...);
|
|
31
|
+
it('should hash password before storing', ...);
|
|
32
|
+
});
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Test Data Management
|
|
36
|
+
|
|
37
|
+
- **Factories over fixtures**: Use factory functions that generate test data with sensible defaults and allow overrides:
|
|
38
|
+
```typescript
|
|
39
|
+
const user = createTestUser({ role: 'admin', verified: true });
|
|
40
|
+
```
|
|
41
|
+
- **Faker for realistic data**: Use `@faker-js/faker` for emails, names, addresses. Fixed strings like `"test@test.com"` hide bugs in validation logic.
|
|
42
|
+
- **Database isolation**: Each test owns its data. Options:
|
|
43
|
+
- Wrap each test in a transaction that rolls back after assertion
|
|
44
|
+
- Truncate tables in `beforeEach` (slower but simpler)
|
|
45
|
+
- Use unique identifiers per test to avoid collisions
|
|
46
|
+
- **Never depend on seed data**: If a test needs a user to exist, the test creates that user. Don't rely on data from migrations or other tests.
|
|
47
|
+
- **Test environment parity**: Use the same database engine as production (not SQLite when production is Postgres). Use testcontainers or Docker Compose.
|
|
48
|
+
|
|
49
|
+
## Mocking Strategy
|
|
50
|
+
|
|
51
|
+
- **Mock at system boundaries only**:
|
|
52
|
+
- External HTTP APIs → MSW (Mock Service Worker) or nock
|
|
53
|
+
- Database → real test database (not mocked)
|
|
54
|
+
- File system → temp directories
|
|
55
|
+
- Time → `vi.useFakeTimers()` / `freezegun`
|
|
56
|
+
- Random → seed-based PRNG
|
|
57
|
+
- **Never mock internal modules** — if you mock a service to test a controller, you're not testing the integration. Use the real service with a test database.
|
|
58
|
+
- **Mock behaviors, not implementations**: Mock what the API returns, not how it's called internally.
|
|
59
|
+
- **Verify mock contracts**: If you mock an external API, add a contract test that periodically validates your mock matches the real API.
|
|
60
|
+
|
|
61
|
+
## E2E Testing Standards
|
|
62
|
+
|
|
63
|
+
- **Page Object Model** — every page/component gets a class that encapsulates selectors and actions:
|
|
64
|
+
```typescript
|
|
65
|
+
class LoginPage {
|
|
66
|
+
async login(email: string, password: string) {
|
|
67
|
+
await this.page.getByLabel('Email').fill(email);
|
|
68
|
+
await this.page.getByLabel('Password').fill(password);
|
|
69
|
+
await this.page.getByRole('button', { name: 'Sign in' }).click();
|
|
70
|
+
await this.page.waitForURL('/dashboard');
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
```
|
|
74
|
+
- **Element selection priority**: `getByRole` > `getByLabel` > `getByPlaceholder` > `getByText` > `data-testid`. CSS classes and XPath are banned.
|
|
75
|
+
- **Wait strategy**: Always wait for a specific condition, never a fixed duration.
|
|
76
|
+
- Wait for element: `page.waitForSelector('.result-item')`
|
|
77
|
+
- Wait for network: `page.waitForResponse('/api/search')`
|
|
78
|
+
- Wait for navigation: `page.waitForURL('/dashboard')`
|
|
79
|
+
- Timeout: max 10 seconds. If it needs more, the feature is too slow.
|
|
80
|
+
- **Test isolation**: Each test starts with a clean state. Use API calls to create/cleanup test data, not UI interactions.
|
|
81
|
+
- **Parallel execution**: Tests must run in parallel safely. No shared accounts, no shared database records, no port conflicts.
|
|
82
|
+
- **Retry policy**: Flaky tests get 1 automatic retry in CI. If a test needs >1 retry to pass consistently, it has a bug — fix it.
|
|
83
|
+
- **Screenshot on failure**: Automatically capture screenshot + trace on test failure for debugging.
|
|
84
|
+
|
|
85
|
+
## Flakiness Management
|
|
86
|
+
|
|
87
|
+
- **Zero tolerance**: A flaky test is a broken test. Fix within 24 hours or quarantine (move to a separate suite that doesn't block PRs).
|
|
88
|
+
- **Root cause categories** and fixes:
|
|
89
|
+
- **Timing**: Replace `sleep` with explicit waits. Add retry logic for eventually-consistent operations.
|
|
90
|
+
- **Shared state**: Isolate test data. Use unique identifiers. Don't depend on test execution order.
|
|
91
|
+
- **Environment**: Pin dependency versions. Use Docker for consistent environments. Handle timezone differences.
|
|
92
|
+
- **Race conditions**: Await all async operations. Use `waitForResponse` instead of assuming API is instant.
|
|
93
|
+
- **Resource contention**: Don't share browser instances across tests. Don't share database connections.
|
|
94
|
+
- **Detection**: Run full suite 5x nightly with test order randomized. Any inconsistent result = flaky. Track flakiness rate as a team metric (target: <1%).
|
|
95
|
+
|
|
96
|
+
## Performance Testing Standards
|
|
97
|
+
|
|
98
|
+
- Performance tests live in a separate directory (`perf/` or `load/`) — not mixed with functional tests.
|
|
99
|
+
- Every performance test defines pass/fail thresholds in the script, not just "see what happens":
|
|
100
|
+
```javascript
|
|
101
|
+
export const options = {
|
|
102
|
+
thresholds: {
|
|
103
|
+
http_req_duration: ['p95<500', 'p99<1000'],
|
|
104
|
+
http_req_failed: ['rate<0.01'],
|
|
105
|
+
},
|
|
106
|
+
};
|
|
107
|
+
```
|
|
108
|
+
- Run against a production-like environment with production-like data volume. Testing against empty databases is meaningless.
|
|
109
|
+
- Results tracked over time — store in CI artifacts or a dashboard. Detect regressions by comparing against last 5 runs.
|
|
110
|
+
- Types of load tests (must have all four):
|
|
111
|
+
- **Smoke**: 1-5 users, verify basic functionality under load tooling
|
|
112
|
+
- **Load**: Expected peak traffic for 10 minutes
|
|
113
|
+
- **Stress**: 2-3x peak traffic, find the breaking point
|
|
114
|
+
- **Soak**: Normal traffic for 2-4 hours, detect memory leaks and connection exhaustion
|
|
115
|
+
|
|
116
|
+
## Security Testing
|
|
117
|
+
|
|
118
|
+
- Run OWASP ZAP or similar DAST tool against staging environment in CI pipeline.
|
|
119
|
+
- SQL injection: test all input fields with `'; DROP TABLE users; --` and similar payloads.
|
|
120
|
+
- XSS: test all input fields that render output with `<script>alert('xss')</script>` and event handlers.
|
|
121
|
+
- IDOR: for every API endpoint that takes a resource ID, verify that user A cannot access user B's resources.
|
|
122
|
+
- Auth bypass: test every protected endpoint without a token, with an expired token, with a token for a different role.
|
|
123
|
+
- Rate limiting: verify rate limits are enforced by making requests at 2x the limit and confirming 429 responses.
|
|
124
|
+
|
|
125
|
+
## CI Integration
|
|
126
|
+
|
|
127
|
+
- **Smoke tests** (<5 min): Run on every PR. Blocks merge. Includes unit tests + critical integration tests.
|
|
128
|
+
- **Full regression** (<30 min): Run on merge to main. Includes all unit, integration, and E2E tests.
|
|
129
|
+
- **Nightly suite** (<2 hours): Run at midnight. Includes performance tests, security scans, visual regression, mutation testing.
|
|
130
|
+
- **Test reports**: Every CI run produces a report (Allure, JUnit XML) with pass/fail, duration, flakiness history. Link in PR comment.
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
# CI Integration Reference
|
|
2
|
+
|
|
3
|
+
## Pipeline Stages
|
|
4
|
+
|
|
5
|
+
Design your CI pipeline with distinct test stages. Each stage has a purpose,
|
|
6
|
+
a time budget, and a failure response.
|
|
7
|
+
|
|
8
|
+
### Stage Design
|
|
9
|
+
|
|
10
|
+
```
|
|
11
|
+
PR opened / push to branch
|
|
12
|
+
│
|
|
13
|
+
├─ Stage 1: Lint + Type Check (< 1 min)
|
|
14
|
+
│ Fail fast on syntax and type errors.
|
|
15
|
+
│
|
|
16
|
+
├─ Stage 2: Unit Tests (< 2 min)
|
|
17
|
+
│ Run full unit suite in parallel.
|
|
18
|
+
│ Gate: must pass to proceed.
|
|
19
|
+
│
|
|
20
|
+
├─ Stage 3: Integration Tests (< 5 min)
|
|
21
|
+
│ Run with test database (Docker).
|
|
22
|
+
│ Gate: must pass to proceed.
|
|
23
|
+
│
|
|
24
|
+
├─ Stage 4: E2E Smoke (< 10 min)
|
|
25
|
+
│ Critical user journeys only.
|
|
26
|
+
│ Gate: must pass to merge.
|
|
27
|
+
│
|
|
28
|
+
└─ Stage 5: Performance Smoke (< 2 min)
|
|
29
|
+
k6 smoke test against ephemeral environment.
|
|
30
|
+
Gate: warn on regression, block on threshold breach.
|
|
31
|
+
|
|
32
|
+
Nightly (schedule: '0 2 * * *')
|
|
33
|
+
│
|
|
34
|
+
├─ Full E2E Suite (< 30 min)
|
|
35
|
+
│ All E2E tests, all browsers.
|
|
36
|
+
│
|
|
37
|
+
├─ Full Performance Suite (< 45 min)
|
|
38
|
+
│ Load + stress scenarios.
|
|
39
|
+
│
|
|
40
|
+
└─ Security Scan (< 15 min)
|
|
41
|
+
ZAP baseline scan.
|
|
42
|
+
|
|
43
|
+
Weekly
|
|
44
|
+
│
|
|
45
|
+
├─ Soak Test (2-8 hours)
|
|
46
|
+
│
|
|
47
|
+
└─ Mutation Testing (varies)
|
|
48
|
+
Stryker on Tier 1 modules.
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Time Budgets
|
|
52
|
+
|
|
53
|
+
| Stage | Budget | Action if exceeded |
|
|
54
|
+
|---|---|---|
|
|
55
|
+
| Unit tests | 2 min | Profile slow tests; check for I/O leaks |
|
|
56
|
+
| Integration tests | 5 min | Parallelise; batch DB operations |
|
|
57
|
+
| E2E smoke | 10 min | Reduce test count; check for slow waits |
|
|
58
|
+
| Full E2E | 30 min | Shard across machines; remove redundant tests |
|
|
59
|
+
| Performance smoke | 2 min | Reduce VUs / duration |
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## GitHub Actions Examples
|
|
64
|
+
|
|
65
|
+
### PR Pipeline
|
|
66
|
+
|
|
67
|
+
```yaml
|
|
68
|
+
# .github/workflows/test.yml
|
|
69
|
+
name: Test
|
|
70
|
+
on:
|
|
71
|
+
pull_request:
|
|
72
|
+
branches: [main]
|
|
73
|
+
push:
|
|
74
|
+
branches: [main]
|
|
75
|
+
|
|
76
|
+
concurrency:
|
|
77
|
+
group: test-${{ github.ref }}
|
|
78
|
+
cancel-in-progress: true
|
|
79
|
+
|
|
80
|
+
jobs:
|
|
81
|
+
lint-and-typecheck:
|
|
82
|
+
runs-on: ubuntu-latest
|
|
83
|
+
timeout-minutes: 5
|
|
84
|
+
steps:
|
|
85
|
+
- uses: actions/checkout@v4
|
|
86
|
+
- uses: actions/setup-node@v4
|
|
87
|
+
with: { node-version: 20, cache: 'npm' }
|
|
88
|
+
- run: npm ci
|
|
89
|
+
- run: npm run lint
|
|
90
|
+
- run: npm run typecheck
|
|
91
|
+
|
|
92
|
+
unit:
|
|
93
|
+
runs-on: ubuntu-latest
|
|
94
|
+
timeout-minutes: 5
|
|
95
|
+
needs: lint-and-typecheck
|
|
96
|
+
steps:
|
|
97
|
+
- uses: actions/checkout@v4
|
|
98
|
+
- uses: actions/setup-node@v4
|
|
99
|
+
with: { node-version: 20, cache: 'npm' }
|
|
100
|
+
- run: npm ci
|
|
101
|
+
- run: npx vitest run --reporter=junit --outputFile=results/unit.xml
|
|
102
|
+
- uses: actions/upload-artifact@v4
|
|
103
|
+
if: always()
|
|
104
|
+
with:
|
|
105
|
+
name: unit-results
|
|
106
|
+
path: results/
|
|
107
|
+
|
|
108
|
+
integration:
|
|
109
|
+
runs-on: ubuntu-latest
|
|
110
|
+
timeout-minutes: 10
|
|
111
|
+
needs: unit
|
|
112
|
+
services:
|
|
113
|
+
postgres:
|
|
114
|
+
image: postgres:16
|
|
115
|
+
env:
|
|
116
|
+
POSTGRES_DB: test
|
|
117
|
+
POSTGRES_USER: test
|
|
118
|
+
POSTGRES_PASSWORD: test
|
|
119
|
+
ports: ['5432:5432']
|
|
120
|
+
options: >-
|
|
121
|
+
--health-cmd pg_isready
|
|
122
|
+
--health-interval 5s
|
|
123
|
+
--health-timeout 5s
|
|
124
|
+
--health-retries 5
|
|
125
|
+
steps:
|
|
126
|
+
- uses: actions/checkout@v4
|
|
127
|
+
- uses: actions/setup-node@v4
|
|
128
|
+
with: { node-version: 20, cache: 'npm' }
|
|
129
|
+
- run: npm ci
|
|
130
|
+
- run: npx vitest run --project=integration --reporter=junit --outputFile=results/integration.xml
|
|
131
|
+
env:
|
|
132
|
+
DATABASE_URL: postgresql://test:test@localhost:5432/test
|
|
133
|
+
- uses: actions/upload-artifact@v4
|
|
134
|
+
if: always()
|
|
135
|
+
with:
|
|
136
|
+
name: integration-results
|
|
137
|
+
path: results/
|
|
138
|
+
|
|
139
|
+
e2e-smoke:
|
|
140
|
+
runs-on: ubuntu-latest
|
|
141
|
+
timeout-minutes: 15
|
|
142
|
+
needs: integration
|
|
143
|
+
steps:
|
|
144
|
+
- uses: actions/checkout@v4
|
|
145
|
+
- uses: actions/setup-node@v4
|
|
146
|
+
with: { node-version: 20, cache: 'npm' }
|
|
147
|
+
- run: npm ci
|
|
148
|
+
- run: npx playwright install --with-deps chromium
|
|
149
|
+
- run: npm run build
|
|
150
|
+
- run: npx playwright test --project=smoke --reporter=junit,html
|
|
151
|
+
- uses: actions/upload-artifact@v4
|
|
152
|
+
if: always()
|
|
153
|
+
with:
|
|
154
|
+
name: e2e-results
|
|
155
|
+
path: |
|
|
156
|
+
playwright-report/
|
|
157
|
+
test-results/
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### Nightly Pipeline
|
|
161
|
+
|
|
162
|
+
```yaml
|
|
163
|
+
# .github/workflows/nightly.yml
|
|
164
|
+
name: Nightly Tests
|
|
165
|
+
on:
|
|
166
|
+
schedule:
|
|
167
|
+
- cron: '0 2 * * *'
|
|
168
|
+
workflow_dispatch:
|
|
169
|
+
|
|
170
|
+
jobs:
|
|
171
|
+
e2e-full:
|
|
172
|
+
runs-on: ubuntu-latest
|
|
173
|
+
timeout-minutes: 45
|
|
174
|
+
strategy:
|
|
175
|
+
matrix:
|
|
176
|
+
shard: [1, 2, 3, 4]
|
|
177
|
+
steps:
|
|
178
|
+
- uses: actions/checkout@v4
|
|
179
|
+
- uses: actions/setup-node@v4
|
|
180
|
+
with: { node-version: 20, cache: 'npm' }
|
|
181
|
+
- run: npm ci
|
|
182
|
+
- run: npx playwright install --with-deps
|
|
183
|
+
- run: npm run build
|
|
184
|
+
- run: npx playwright test --shard=${{ matrix.shard }}/4 --reporter=junit,html
|
|
185
|
+
- uses: actions/upload-artifact@v4
|
|
186
|
+
if: always()
|
|
187
|
+
with:
|
|
188
|
+
name: e2e-shard-${{ matrix.shard }}
|
|
189
|
+
path: |
|
|
190
|
+
playwright-report/
|
|
191
|
+
test-results/
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
---
|
|
195
|
+
|
|
196
|
+
## Test Reporting
|
|
197
|
+
|
|
198
|
+
### JUnit XML (Universal)
|
|
199
|
+
|
|
200
|
+
All major CI systems parse JUnit XML. Configure reporters:
|
|
201
|
+
|
|
202
|
+
```typescript
|
|
203
|
+
// vitest.config.ts
|
|
204
|
+
export default defineConfig({
|
|
205
|
+
test: {
|
|
206
|
+
reporters: ['default', 'junit'],
|
|
207
|
+
outputFile: { junit: 'results/vitest.xml' },
|
|
208
|
+
},
|
|
209
|
+
});
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
```typescript
|
|
213
|
+
// playwright.config.ts
|
|
214
|
+
export default defineConfig({
|
|
215
|
+
reporter: [
|
|
216
|
+
['list'],
|
|
217
|
+
['junit', { outputFile: 'results/playwright.xml' }],
|
|
218
|
+
['html', { open: 'never' }],
|
|
219
|
+
],
|
|
220
|
+
});
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
### Allure Reporting
|
|
224
|
+
|
|
225
|
+
Allure provides rich HTML reports with history, categories, and trends.
|
|
226
|
+
|
|
227
|
+
```bash
|
|
228
|
+
# Install
|
|
229
|
+
npm install -D allure-vitest allure-playwright
|
|
230
|
+
|
|
231
|
+
# Vitest config
|
|
232
|
+
# vitest.config.ts
|
|
233
|
+
export default defineConfig({
|
|
234
|
+
test: {
|
|
235
|
+
reporters: ['default', 'allure-vitest'],
|
|
236
|
+
},
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
# Playwright config
|
|
240
|
+
# playwright.config.ts
|
|
241
|
+
export default defineConfig({
|
|
242
|
+
reporter: [['list'], ['allure-playwright']],
|
|
243
|
+
});
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
```yaml
|
|
247
|
+
# CI step to generate and upload Allure report
|
|
248
|
+
- run: npx allure generate allure-results --clean -o allure-report
|
|
249
|
+
- uses: actions/upload-artifact@v4
|
|
250
|
+
if: always()
|
|
251
|
+
with:
|
|
252
|
+
name: allure-report
|
|
253
|
+
path: allure-report/
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
### PR Comment with Results
|
|
257
|
+
|
|
258
|
+
```yaml
|
|
259
|
+
- name: Test Report Summary
|
|
260
|
+
uses: dorny/test-reporter@v1
|
|
261
|
+
if: always()
|
|
262
|
+
with:
|
|
263
|
+
name: Test Results
|
|
264
|
+
path: 'results/*.xml'
|
|
265
|
+
reporter: java-junit
|
|
266
|
+
fail-on-error: true
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
---
|
|
270
|
+
|
|
271
|
+
## Flakiness Tracking
|
|
272
|
+
|
|
273
|
+
### Detection
|
|
274
|
+
|
|
275
|
+
1. **Retry-based**: If a test passes on retry but failed initially, it is
|
|
276
|
+
flaky. Playwright and Vitest both support `--retries`.
|
|
277
|
+
|
|
278
|
+
2. **Repeat-based**: Run tests with `--repeat-each=5` in nightly CI. Any
|
|
279
|
+
test that fails at least once is flaky.
|
|
280
|
+
|
|
281
|
+
3. **Historical**: Track pass/fail per test across runs. A test that fails
|
|
282
|
+
> 1 % of the time is flaky.
|
|
283
|
+
|
|
284
|
+
### Tracking Process
|
|
285
|
+
|
|
286
|
+
```
|
|
287
|
+
Test fails in CI
|
|
288
|
+
│
|
|
289
|
+
├─ Passed on retry?
|
|
290
|
+
│ YES → Flag as flaky, add to tracking board
|
|
291
|
+
│ NO → Genuine failure, investigate normally
|
|
292
|
+
│
|
|
293
|
+
Flaky test flagged
|
|
294
|
+
│
|
|
295
|
+
├─ Add label: "flaky-test" in issue tracker
|
|
296
|
+
├─ Quarantine: move to `@flaky` tag, run separately
|
|
297
|
+
├─ Set SLA: fix within 48 hours
|
|
298
|
+
│
|
|
299
|
+
Fixed?
|
|
300
|
+
│
|
|
301
|
+
├─ YES → Remove quarantine, monitor for 1 week
|
|
302
|
+
└─ NO within 48h → Delete the test (a flaky test is worse than no test)
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
### Quarantine Implementation (Playwright)
|
|
306
|
+
|
|
307
|
+
```typescript
|
|
308
|
+
// Tag flaky tests for separate execution
|
|
309
|
+
test('sometimes fails due to animation @flaky', async ({ page }) => {
|
|
310
|
+
// ...
|
|
311
|
+
});
|
|
312
|
+
|
|
313
|
+
// playwright.config.ts — run flaky tests in a separate project
|
|
314
|
+
export default defineConfig({
|
|
315
|
+
projects: [
|
|
316
|
+
{
|
|
317
|
+
name: 'stable',
|
|
318
|
+
testMatch: /.*\.spec\.ts/,
|
|
319
|
+
testIgnore: /.*@flaky.*/,
|
|
320
|
+
},
|
|
321
|
+
{
|
|
322
|
+
name: 'quarantine',
|
|
323
|
+
grep: /@flaky/,
|
|
324
|
+
retries: 3,
|
|
325
|
+
},
|
|
326
|
+
],
|
|
327
|
+
});
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
### Metrics to Track
|
|
331
|
+
|
|
332
|
+
| Metric | Target | Frequency |
|
|
333
|
+
|---|---|---|
|
|
334
|
+
| Flaky test count | 0 | Daily |
|
|
335
|
+
| Flakiness rate (flaky runs / total runs) | < 1 % | Weekly |
|
|
336
|
+
| Mean time to fix flaky test | < 48 h | Per incident |
|
|
337
|
+
| Tests in quarantine | 0 | Daily |
|