@keber/qa-framework 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +53 -0
- package/README.md +233 -0
- package/agent-instructions/00-module-analysis.md +263 -0
- package/agent-instructions/01-spec-generation.md +278 -0
- package/agent-instructions/02-test-plan-generation.md +202 -0
- package/agent-instructions/03-test-case-generation.md +147 -0
- package/agent-instructions/04-automation-generation.md +310 -0
- package/agent-instructions/04b-test-stabilization.md +306 -0
- package/agent-instructions/05-ado-integration.md +244 -0
- package/agent-instructions/06-maintenance.md +125 -0
- package/docs/architecture.md +227 -0
- package/docs/comparison-matrix.md +131 -0
- package/docs/final-report.md +279 -0
- package/docs/folder-structure-guide.md +291 -0
- package/docs/generalization-decisions.md +203 -0
- package/docs/installation.md +239 -0
- package/docs/spec-driven-philosophy.md +170 -0
- package/docs/usage-with-agent.md +203 -0
- package/examples/module-example/README.md +34 -0
- package/examples/module-example/suppliers/00-inventory.md +56 -0
- package/examples/module-example/suppliers/suppliers-create.spec.ts +148 -0
- package/integrations/ado-powershell/README.md +75 -0
- package/integrations/ado-powershell/pipelines/azure-pipeline-qa.yml +133 -0
- package/integrations/ado-powershell/scripts/create-testplan-from-mapping.ps1 +114 -0
- package/integrations/ado-powershell/scripts/inject-ado-ids.ps1 +96 -0
- package/integrations/ado-powershell/scripts/sync-ado-titles.ps1 +93 -0
- package/integrations/playwright/README.md +68 -0
- package/integrations/playwright-azure-reporter/README.md +88 -0
- package/package.json +57 -0
- package/qa-framework.config.json +87 -0
- package/scripts/cli.js +74 -0
- package/scripts/generate.js +92 -0
- package/scripts/init.js +322 -0
- package/scripts/validate.js +184 -0
- package/templates/automation-scaffold/.env.example +56 -0
- package/templates/automation-scaffold/fixtures/auth.ts +77 -0
- package/templates/automation-scaffold/fixtures/test-helpers.ts +85 -0
- package/templates/automation-scaffold/global-setup.ts +106 -0
- package/templates/automation-scaffold/package.json +24 -0
- package/templates/automation-scaffold/playwright.config.ts +85 -0
- package/templates/defect-report.md +101 -0
- package/templates/execution-report.md +116 -0
- package/templates/session-summary.md +73 -0
- package/templates/specification/00-inventory.md +81 -0
- package/templates/specification/01-business-rules.md +90 -0
- package/templates/specification/02-workflows.md +114 -0
- package/templates/specification/03-roles-permissions.md +49 -0
- package/templates/specification/04-test-data.md +104 -0
- package/templates/specification/05-test-scenarios.md +226 -0
- package/templates/test-case.md +81 -0
- package/templates/test-plan.md +130 -0
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
# Agent Instructions: Automation Generation
|
|
2
|
+
|
|
3
|
+
**File**: `agent-instructions/04-automation-generation.md`
|
|
4
|
+
**Purpose**: Instructions for writing Playwright E2E automation tests that trace back to approved test cases in `01-specifications/`.
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
> **Prerequisite**: Specifications must be reviewed and approved before writing automation. Do not automate TCs that are `PENDING-CODE` or `BLOCKED-PERMISSIONS`.
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## Structure: How to Organize Spec Files
|
|
13
|
+
|
|
14
|
+
Each module gets a directory under `qa/07-automation/e2e/tests/`:
|
|
15
|
+
|
|
16
|
+
```
|
|
17
|
+
qa/07-automation/e2e/tests/
|
|
18
|
+
└── {module-kebab}/
|
|
19
|
+
├── COVERAGE-MAPPING.md ← TC-ID → spec location map
|
|
20
|
+
├── {suite-name}.spec.ts ← P0 suite
|
|
21
|
+
├── {suite-name}-p1.spec.ts ← P1 suite
|
|
22
|
+
└── integration/
|
|
23
|
+
└── cross-module.spec.ts ← Cross-module dependencies
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Spec file naming: `{description}.spec.ts` in kebab-case.
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Spec File Template
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
/**
|
|
34
|
+
* @module {MODULE_CODE}
|
|
35
|
+
* @submodule {SUBMODULE_CODE}
|
|
36
|
+
* @spec qa/01-specifications/module-{name}/submodule-{name}/05-test-scenarios.md
|
|
37
|
+
* @priority P0
|
|
38
|
+
*/
|
|
39
|
+
|
|
40
|
+
import { test, expect } from '@playwright/test';
|
|
41
|
+
|
|
42
|
+
// EXEC_IDX: changes every 60 seconds, provides collision-resistant unique values
|
|
43
|
+
const EXEC_IDX = Math.floor(Date.now() / 60_000) % 100_000;
|
|
44
|
+
|
|
45
|
+
test.describe('{Feature Suite Name}', () => {
|
|
46
|
+
|
|
47
|
+
test.beforeAll(async ({ browser }) => {
|
|
48
|
+
// Provision data needed by tests in this suite
|
|
49
|
+
// Rule: provision AT LEAST as many records as tests that consume them
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
test('[TC-{MODULE}-{SUB}-001] {TC title} @P0', async ({ page }) => {
|
|
53
|
+
// Arrange
|
|
54
|
+
// ...
|
|
55
|
+
|
|
56
|
+
// Act
|
|
57
|
+
// ...
|
|
58
|
+
|
|
59
|
+
// Assert
|
|
60
|
+
// ...
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
test('[TC-{MODULE}-{SUB}-002] {TC title} @P1', async ({ page }) => {
|
|
64
|
+
// ...
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
test.skip('[TC-{MODULE}-{SUB}-003] {TC title} @P1 — DEF-{NNN}: {description}. Reactivate when {ADO WI or DEF ID} is resolved.', async ({ page }) => {
|
|
68
|
+
// Skip body: write the test as if the bug were fixed
|
|
69
|
+
// This documents the expected behavior, not the buggy state
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
});
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## Required Patterns
|
|
78
|
+
|
|
79
|
+
### Pattern 1: EXEC_IDX — Unique Test Data
|
|
80
|
+
|
|
81
|
+
Use EXEC_IDX everywhere you create records to avoid collisions between consecutive test runs:
|
|
82
|
+
|
|
83
|
+
```typescript
|
|
84
|
+
const EXEC_IDX = Math.floor(Date.now() / 60_000) % 100_000;
|
|
85
|
+
|
|
86
|
+
// For titles/names
|
|
87
|
+
const entityName = `Test-Record-${EXEC_IDX}`;
|
|
88
|
+
|
|
89
|
+
// For dates (future-dated to avoid prod data collisions)
|
|
90
|
+
const year = 2120 + (EXEC_IDX % 10);
|
|
91
|
+
const month = String((EXEC_IDX % 12) + 1).padStart(2, '0');
|
|
92
|
+
const day = String((EXEC_IDX % 28) + 1).padStart(2, '0');
|
|
93
|
+
const testDate = `${year}-${month}-${day}`;
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Why year 2120+: avoids any overlap with real production data.
|
|
97
|
+
Why modular arithmetic: the date always represents a valid date, never day 32 or month 13.
|
|
98
|
+
|
|
99
|
+
### Pattern 2: Multi-Role Auth with storageState
|
|
100
|
+
|
|
101
|
+
Use `global-setup.ts` to log in once per role and save storageState:
|
|
102
|
+
|
|
103
|
+
```typescript
|
|
104
|
+
// playwright.config.ts
|
|
105
|
+
use: {
|
|
106
|
+
storageState: '.auth/session.json', // single role
|
|
107
|
+
// or: storageState: (workerInfo) => `.auth/role-${workerInfo.workerIndex}.json`
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// global-setup.ts — runs once before all tests
|
|
111
|
+
async function globalSetup(config) {
|
|
112
|
+
const browser = await chromium.launch();
|
|
113
|
+
const page = await browser.newPage();
|
|
114
|
+
|
|
115
|
+
// Log in and save session
|
|
116
|
+
await page.goto(process.env.QA_BASE_URL + process.env.QA_LOGIN_PATH);
|
|
117
|
+
await page.fill(process.env.QA_LOGIN_EMAIL_SELECTOR, process.env.QA_USER_EMAIL);
|
|
118
|
+
// Use evaluate() for password — excluded from traces
|
|
119
|
+
await page.evaluate(
|
|
120
|
+
([sel, pwd]) => { (document.querySelector(sel) as HTMLInputElement).value = pwd; },
|
|
121
|
+
[process.env.QA_LOGIN_PASSWORD_SELECTOR, process.env.QA_USER_PASSWORD]
|
|
122
|
+
);
|
|
123
|
+
await page.click(process.env.QA_LOGIN_SUBMIT_SELECTOR);
|
|
124
|
+
await page.waitForURL(url => !url.includes(process.env.QA_LOGIN_PATH));
|
|
125
|
+
|
|
126
|
+
await page.context().storageState({ path: '.auth/session.json' });
|
|
127
|
+
await browser.close();
|
|
128
|
+
}
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
For multi-role: call login sequence sequentially for each role (apps often reject concurrent sessions).
|
|
132
|
+
|
|
133
|
+
### Pattern 3: Fast-Fail for Status Checks
|
|
134
|
+
|
|
135
|
+
When checking the text content of status indicators, use a short timeout to prevent hanging:
|
|
136
|
+
|
|
137
|
+
```typescript
|
|
138
|
+
// BAD: inherits the full test timeout (30s or more)
|
|
139
|
+
const status = await page.locator('.status-badge').textContent();
|
|
140
|
+
|
|
141
|
+
// GOOD: fails fast if not available in 3 seconds
|
|
142
|
+
const status = await page.locator('.status-badge').textContent({ timeout: 3_000 });
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### Pattern 4: Email Validation — 3-Layer Strategy
|
|
146
|
+
|
|
147
|
+
When testing email notification behavior without a real inbox:
|
|
148
|
+
|
|
149
|
+
```typescript
|
|
150
|
+
// Layer 1: HTTP route interception (most reliable)
|
|
151
|
+
const emailRequests: string[] = [];
|
|
152
|
+
await page.route('**/*mail*', (route) => {
|
|
153
|
+
emailRequests.push(route.request().url());
|
|
154
|
+
route.continue();
|
|
155
|
+
});
|
|
156
|
+
await page.route('**/*correo*', (route) => {
|
|
157
|
+
emailRequests.push(route.request().url());
|
|
158
|
+
route.continue();
|
|
159
|
+
});
|
|
160
|
+
// ... perform the action ...
|
|
161
|
+
expect(emailRequests.length).toBeGreaterThan(0);
|
|
162
|
+
|
|
163
|
+
// Layer 2: UI confirmation (toast, banner, badge)
|
|
164
|
+
await expect(page.locator('.email-sent-indicator')).toBeVisible();
|
|
165
|
+
|
|
166
|
+
// Layer 3: test.fixme() — if inbox access is needed and not available
|
|
167
|
+
test.fixme(process.env.QA_MAILBOX_ACCESS !== 'true', 'Requires mailbox access');
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### Pattern 5: Skip Pattern for Known Bugs
|
|
171
|
+
|
|
172
|
+
```typescript
|
|
173
|
+
test.skip(true,
|
|
174
|
+
'DEF-001: {Description of bug}. ' +
|
|
175
|
+
'Expected: {expected behavior}. ' +
|
|
176
|
+
'Actual: {buggy behavior}. ' +
|
|
177
|
+
'Reactivate when ADO #{WI_ID} / DEF-001 is resolved.'
|
|
178
|
+
);
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
**Never** make a test artificially "pass" around a bug. Document with exact expected behavior.
|
|
182
|
+
|
|
183
|
+
### Pattern 6: beforeAll Provisioning Ratio
|
|
184
|
+
|
|
185
|
+
In `beforeAll`, create **at least as many records as the number of tests that consume them**:
|
|
186
|
+
|
|
187
|
+
```typescript
|
|
188
|
+
test.beforeAll(async ({ browser }) => {
|
|
189
|
+
const page = await browser.newPage();
|
|
190
|
+
// If 3 tests each need 1 different pending record:
|
|
191
|
+
const records: string[] = [];
|
|
192
|
+
for (let i = 0; i < 3; i++) {
|
|
193
|
+
const id = await createRecord(page, `Record-${EXEC_IDX}-${i}`);
|
|
194
|
+
records.push(id);
|
|
195
|
+
}
|
|
196
|
+
// Store in closure or page object for use in tests
|
|
197
|
+
});
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### Pattern 7: Password Injection (Trace Safety)
|
|
201
|
+
|
|
202
|
+
```typescript
|
|
203
|
+
// Passwords must be set via evaluate() to exclude from Playwright traces
|
|
204
|
+
await page.evaluate(
|
|
205
|
+
([selector, pwd]) => {
|
|
206
|
+
(document.querySelector(selector) as HTMLInputElement).value = pwd;
|
|
207
|
+
},
|
|
208
|
+
[process.env.QA_LOGIN_PASSWORD_SELECTOR, process.env.QA_USER_PASSWORD]
|
|
209
|
+
);
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
---
|
|
213
|
+
|
|
214
|
+
## `playwright.config.ts` Checklist
|
|
215
|
+
|
|
216
|
+
Every project Playwright config should configure:
|
|
217
|
+
|
|
218
|
+
```typescript
|
|
219
|
+
export default defineConfig({
|
|
220
|
+
testDir: './tests',
|
|
221
|
+
timeout: 30_000, // Per-test timeout
|
|
222
|
+
actionTimeout: 10_000, // Per-action timeout (REQUIRED — prevents silent hangs)
|
|
223
|
+
navigationTimeout: 60_000, // Per-navigation timeout
|
|
224
|
+
retries: process.env.CI ? 2 : 0,
|
|
225
|
+
workers: process.env.CI ? 1 : 2,
|
|
226
|
+
|
|
227
|
+
// Exclude seeds and debug scripts from normal runs
|
|
228
|
+
testIgnore: ['**/seeds/**', '**/debug-*.spec.ts', '**/seed.spec.ts'],
|
|
229
|
+
|
|
230
|
+
use: {
|
|
231
|
+
baseURL: process.env.QA_BASE_URL,
|
|
232
|
+
storageState: '.auth/session.json',
|
|
233
|
+
locale: 'en-US', // or 'es-CL' for Chilean apps
|
|
234
|
+
screenshot: 'only-on-failure',
|
|
235
|
+
video: 'retain-on-failure',
|
|
236
|
+
trace: 'on-first-retry',
|
|
237
|
+
},
|
|
238
|
+
|
|
239
|
+
reporter: [
|
|
240
|
+
['html', { outputFolder: 'playwright-report' }],
|
|
241
|
+
// ADO reporter: only when integration enabled
|
|
242
|
+
// ['@alex_neo/playwright-azure-reporter', { ... }],
|
|
243
|
+
],
|
|
244
|
+
|
|
245
|
+
projects: [
|
|
246
|
+
{ name: 'chromium', use: { ...devices['Desktop Chrome'] } },
|
|
247
|
+
// Add firefox, webkit when cross-browser coverage is needed
|
|
248
|
+
],
|
|
249
|
+
|
|
250
|
+
globalSetup: './global-setup.ts',
|
|
251
|
+
outputDir: '../../05-test-execution/automated/test-results',
|
|
252
|
+
});
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
`actionTimeout` is **not optional**. Without it, `textContent()`, `getAttribute()`, and similar calls inherit the full test timeout and can silently hang if the element is slow.
|
|
256
|
+
|
|
257
|
+
---
|
|
258
|
+
|
|
259
|
+
## Stability Criteria
|
|
260
|
+
|
|
261
|
+
A test is only considered stable when:
|
|
262
|
+
|
|
263
|
+
1. It passes ≥ 2 consecutive runs with **different** `EXEC_IDX` values (i.e., at least 60 seconds apart)
|
|
264
|
+
2. It does not rely on another test's side effects
|
|
265
|
+
3. It produces the same result across 3+ independent runs
|
|
266
|
+
4. No `waitForTimeout(N)` longer than 2000ms is used (use `waitForSelector` or `waitForLoadState` instead)
|
|
267
|
+
|
|
268
|
+
---
|
|
269
|
+
|
|
270
|
+
## Debugging Failed Tests
|
|
271
|
+
|
|
272
|
+
Use this 5-layer methodology:
|
|
273
|
+
|
|
274
|
+
1. **Environment layer**: Is QA_BASE_URL reachable? Are credentials valid? Run login test first.
|
|
275
|
+
2. **Timeout layer**: Did the test fail with "timeout" or "Timeout exceeded"? Check `actionTimeout`, increase if justified.
|
|
276
|
+
3. **Data layer**: Did `beforeAll` create the expected records? Add `console.log` per step in `beforeAll`.
|
|
277
|
+
4. **Code layer**: Is the selector correct? Use page.pause() or Playwright UI mode to inspect.
|
|
278
|
+
5. **System defect layer**: Is the failure due to a real bug? File a defect and add `test.skip()`.
|
|
279
|
+
|
|
280
|
+
**Diagnostic script pattern**: For complex data discovery, create a diagnostic spec in `helpers/debug/`:
|
|
281
|
+
|
|
282
|
+
```typescript
|
|
283
|
+
// helpers/debug/inspect-{feature}.spec.ts
|
|
284
|
+
test('diagnostic: dump form options', async ({ page }) => {
|
|
285
|
+
await page.goto(`${process.env.QA_BASE_URL}/my-form`);
|
|
286
|
+
const options = await page.$$eval('select option', els =>
|
|
287
|
+
els.map(el => ({ value: el.value, text: el.textContent }))
|
|
288
|
+
);
|
|
289
|
+
console.log(JSON.stringify(options, null, 2));
|
|
290
|
+
});
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
This is not part of the test suite (excluded via `testIgnore`). Use it to discover valid option values.
|
|
294
|
+
|
|
295
|
+
---
|
|
296
|
+
|
|
297
|
+
## Coverage Mapping Document
|
|
298
|
+
|
|
299
|
+
After writing automation for a module, create `COVERAGE-MAPPING.md` in the test directory:
|
|
300
|
+
|
|
301
|
+
```markdown
|
|
302
|
+
# Coverage Mapping — {Module}
|
|
303
|
+
|
|
304
|
+
| TC ID | Title | Spec file | Playwright spec | Status |
|
|
305
|
+
|-------|-------|-----------|----------------|--------|
|
|
306
|
+
| TC-OPER-CAT-001 | Access catalog list | 01-specifications/.../05-test-scenarios.md | tests/operacion/catalogos/critical.spec.ts | ✅ Automated |
|
|
307
|
+
| TC-OPER-CAT-002 | Create labor type | ... | tests/operacion/... | ✅ Automated |
|
|
308
|
+
| TC-OPER-CAT-003 | Duplicate rejection | ... | tests/operacion/... | ⚠️ Partially (UI only) |
|
|
309
|
+
| TC-OPER-ASI-001 | Access assistance list | ... | — | ⛔ Blocked (BLOCKED-PERMISSIONS) |
|
|
310
|
+
```
|
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
# Agent Instructions: Test Stabilization
|
|
2
|
+
|
|
3
|
+
**File**: `agent-instructions/04b-test-stabilization.md`
|
|
4
|
+
**Purpose**: Instructions for exhaustively reviewing and stabilizing generated Playwright tests to ensure they are logically correct, technically sound, and free of false positives and false negatives before ADO integration.
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Stage Context
|
|
9
|
+
|
|
10
|
+
| Field | Value |
|
|
11
|
+
|-------|-------|
|
|
12
|
+
| Stage number | 3.5 |
|
|
13
|
+
| Stage name | Test Stabilization |
|
|
14
|
+
| Preceding stage | Stage 4 — Automation Generation (`04-automation-generation.md`) |
|
|
15
|
+
| Following stage | Stage 5 — ADO Integration (`05-ado-integration.md`) |
|
|
16
|
+
| Can be skipped? | No — ADO integration with unreliable tests produces misleading test run history |
|
|
17
|
+
| Required inputs | Generated `.spec.ts` files in `qa/07-automation/e2e/`; approved `05-test-scenarios.md` for the module |
|
|
18
|
+
| Produced outputs | Stabilized spec files; `STABILIZATION-REPORT.md` per submodule |
|
|
19
|
+
| Exit criterion | Confidence ≥ 90% that every test produces a true positive on failure and a true negative on pass |
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## When to use
|
|
24
|
+
|
|
25
|
+
Run this stage immediately after **Stage 4 — Automation Generation** and before any ADO
|
|
26
|
+
integration or CI pipeline registration. This stage is mandatory regardless of whether tests
|
|
27
|
+
are passing or failing after generation.
|
|
28
|
+
|
|
29
|
+
Also re-run this stage when:
|
|
30
|
+
- Sprint changes introduce significant new test code (more than 3 new spec files)
|
|
31
|
+
- A test suite starts flaking in CI after a previously stable period
|
|
32
|
+
- A test is suspected of being a false positive (passing despite the feature being broken)
|
|
33
|
+
- A test is suspected of being a false negative (failing despite the feature being correct)
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
> **Scope boundary**: This stage operates exclusively on test code (`.spec.ts`, fixtures,
|
|
38
|
+
> helpers, `global-setup.ts`, `playwright.config.ts`). The application source code is
|
|
39
|
+
> strictly out of scope. Do not modify, patch, or work around application behavior to make
|
|
40
|
+
> tests pass — document the mismatch as a defect instead.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
> **Coverage boundary**: Do not add new tests or increase coverage during this stage. If a
|
|
45
|
+
> gap in coverage is identified, note it in the STABILIZATION-REPORT.md for Stage 6
|
|
46
|
+
> (Maintenance) to address. The goal is correctness of existing tests, not completeness.
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Step 1 — Establish the Reference Baseline
|
|
51
|
+
|
|
52
|
+
Before running any tests, collect the ground truth against which correctness will be judged.
|
|
53
|
+
|
|
54
|
+
1. Open `05-test-scenarios.md` for the target module/submodule. For each TC row, note:
|
|
55
|
+
- TC-ID and title
|
|
56
|
+
- Priority (P0/P1/P2/P3)
|
|
57
|
+
- Expected result (acceptance criterion)
|
|
58
|
+
- Automatable status (Yes / Partial / No)
|
|
59
|
+
|
|
60
|
+
2. Open `COVERAGE-MAPPING.md` for the module. For each row:
|
|
61
|
+
- Confirm the `.spec.ts` file and test function name exist
|
|
62
|
+
- Flag any rows where the spec or function is missing (these are generation gaps, not
|
|
63
|
+
stabilization issues — document in the report but do not create new tests)
|
|
64
|
+
|
|
65
|
+
3. If `05-test-scenarios.md` or `COVERAGE-MAPPING.md` are absent or incomplete, stop and
|
|
66
|
+
complete Stage 4 before proceeding.
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Step 2 — First Run: Collect Baseline Results
|
|
71
|
+
|
|
72
|
+
Run the full suite for the target module without any modifications:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
npx playwright test qa/07-automation/e2e/tests/{module-kebab}/ --reporter=list
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Record the outcome of every test:
|
|
79
|
+
|
|
80
|
+
| TC-ID | Test title | Result | Notes |
|
|
81
|
+
|-------|-----------|--------|-------|
|
|
82
|
+
| TC-MOD-SUB-001 | ... | ✅ pass / ❌ fail / ⚠️ flaky / ⏭ skip | Error message if fail |
|
|
83
|
+
|
|
84
|
+
Collect trace artifacts for every failing test:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
npx playwright test ... --trace=on
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Do NOT make any changes yet. The first run result is the diagnostic baseline.
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## Step 3 — Classify Every Failing or Skipped Test
|
|
95
|
+
|
|
96
|
+
For each non-passing test, determine its failure category before writing any code.
|
|
97
|
+
|
|
98
|
+
### Failure Categories
|
|
99
|
+
|
|
100
|
+
| Category | Definition | Correct action |
|
|
101
|
+
|----------|-----------|----------------|
|
|
102
|
+
| **A — Test Bug: Wrong selector** | The selector no longer matches the UI element | Fix selector in `.spec.ts` |
|
|
103
|
+
| **B — Test Bug: Wrong assertion** | The assertion does not match the specified acceptance criterion | Fix assertion to match spec |
|
|
104
|
+
| **C — Test Bug: Wrong flow** | The test steps don't reflect the actual user flow described in the TC | Rewrite steps to match TC |
|
|
105
|
+
| **D — Test Bug: Fragile timing** | Test fails intermittently due to missing `await`, race conditions, or missing `waitFor` | Add proper async handling |
|
|
106
|
+
| **E — Test Bug: Incorrect data** | Test data is malformed, not unique, or conflicts with QA environment state | Fix data generation or provisioning |
|
|
107
|
+
| **F — App Bug** | The test is correct and the application is not behaving as the TC specifies | Do not fix the test; file a defect |
|
|
108
|
+
| **G — TC Mismatch** | The spec TC is ambiguous or incorrect vs the actual implemented behavior | Do not fix the test or the app; update `05-test-scenarios.md` and note as a spec defect |
|
|
109
|
+
| **H — Infra / Environment** | Test fails due to QA environment unavailability, network, or credentials | Retry in a clean environment; do not classify as a test bug until confirmed |
|
|
110
|
+
| **I — Intentional skip** | Test is marked `test.skip()` due to a known defect or pending feature | Verify skip reason is still valid; update `06-defects/open/` reference |
|
|
111
|
+
|
|
112
|
+
For each failing test, write your classification in the working STABILIZATION-REPORT.md
|
|
113
|
+
before making any change.
|
|
114
|
+
|
|
115
|
+
### Decision Protocol
|
|
116
|
+
|
|
117
|
+
```
|
|
118
|
+
Is the test step sequence and assertion derived correctly from 05-test-scenarios.md?
|
|
119
|
+
No → Category G (TC Mismatch — fix the spec, not the test)
|
|
120
|
+
Yes ↓
|
|
121
|
+
Does the application behave as the TC acceptance criterion describes?
|
|
122
|
+
No → Category F (App Bug — file a defect, skip the test with a DEF reference)
|
|
123
|
+
Yes ↓
|
|
124
|
+
Does the failure appear in every run (deterministic)?
|
|
125
|
+
No → Category D or H (timing or environment — diagnose further)
|
|
126
|
+
Yes ↓
|
|
127
|
+
Is the failure caused by a selector, assertion, data, or flow issue in the test code?
|
|
128
|
+
→ Category A, B, C, or E (Test Bug — fix the test code)
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
## Step 4 — Fix Iterations
|
|
134
|
+
|
|
135
|
+
Apply fixes one category at a time, in this order: E → A → C → B → D.
|
|
136
|
+
(Data problems mask selector problems; flow problems mask assertion problems.)
|
|
137
|
+
|
|
138
|
+
After each batch of fixes, re-run the affected tests:
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
npx playwright test qa/07-automation/e2e/tests/{module-kebab}/{file}.spec.ts --reporter=list
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
Do not move to the next category until the current category's tests are stable across
|
|
145
|
+
**at least two consecutive runs**.
|
|
146
|
+
|
|
147
|
+
### Rules for fixing test code
|
|
148
|
+
|
|
149
|
+
- **Selectors**: prefer `getByRole`, `getByLabel`, `getByTestId` — avoid CSS/XPath selectors
|
|
150
|
+
that depend on position or class names that aren't semantic
|
|
151
|
+
- **Assertions**: every assertion must trace back to a specific acceptance criterion in the TC;
|
|
152
|
+
remove or replace any `expect().toBeVisible()` that does not correspond to a TC requirement
|
|
153
|
+
- **Waits**: prefer `await expect(locator).toBeVisible()` over `page.waitForTimeout()`;
|
|
154
|
+
never use `waitForTimeout` as a fix for timing — diagnose the actual cause
|
|
155
|
+
- **Data**: use `EXEC_IDX + RUN_SALT` for unique identifiers (see ISSUE-02 note below);
|
|
156
|
+
provision data in `beforeAll`, not inside individual tests
|
|
157
|
+
- **Skips**: when skipping for a Category F (App Bug), always include the defect reference:
|
|
158
|
+
```typescript
|
|
159
|
+
test.skip(true, 'DEF-{NNN}: {one-line bug description}. Filed {YYYY-MM-DD}.');
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
> ⚠️ **EXEC_IDX collision**: Two runs within the same 60-second window produce identical
|
|
163
|
+
> `EXEC_IDX` values. For fields requiring guaranteed uniqueness, add a salt:
|
|
164
|
+
> ```typescript
|
|
165
|
+
> const EXEC_IDX = Math.floor(Date.now() / 60_000) % 100_000;
|
|
166
|
+
> const RUN_SALT = Math.floor(Math.random() * 9999).toString().padStart(4, '0');
|
|
167
|
+
> ```
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
## Step 5 — Validate Passing Tests for False Positives
|
|
172
|
+
|
|
173
|
+
A passing test is not necessarily a correct test. After fixing failing tests, audit every
|
|
174
|
+
**passing** test for false positives.
|
|
175
|
+
|
|
176
|
+
For each passing test, perform one of the following checks:
|
|
177
|
+
|
|
178
|
+
### Negation check (preferred)
|
|
179
|
+
Temporarily modify the test to assert the opposite of the expected result, then run it.
|
|
180
|
+
If it still passes — the test is a false positive (asserting nothing meaningful).
|
|
181
|
+
|
|
182
|
+
```typescript
|
|
183
|
+
// Original: expect(toast).toHaveText('Guardado exitosamente');
|
|
184
|
+
// Negation check: replace temporarily with:
|
|
185
|
+
expect(toast).toHaveText('TEXTO_QUE_NO_EXISTE_JAMAS');
|
|
186
|
+
// If this passes → the original assertion is not actually evaluating the toast content
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
Restore the original assertion after the check.
|
|
190
|
+
|
|
191
|
+
### Trace review (secondary)
|
|
192
|
+
Open the Playwright trace for the passing test and step through it. Verify:
|
|
193
|
+
- Every action in the `test()` body corresponds to a step in the TC
|
|
194
|
+
- Every `expect()` is actually being evaluated (no skipped assertions due to early returns
|
|
195
|
+
or unreachable code)
|
|
196
|
+
- The test does not pass because an early `expect()` short-circuits before the meaningful one
|
|
197
|
+
|
|
198
|
+
### API/network verification (for tests touching data persistence)
|
|
199
|
+
For tests that create, update, or delete records: after the UI action, verify the result
|
|
200
|
+
is persisted, not just that a success toast appeared. A test that only checks the toast
|
|
201
|
+
is a false positive if the actual save fails silently.
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## Step 6 — Iteration and Convergence
|
|
206
|
+
|
|
207
|
+
Repeat Steps 4 and 5 until:
|
|
208
|
+
|
|
209
|
+
1. All deterministic failures are resolved (either fixed or legitimately skipped with a
|
|
210
|
+
defect reference)
|
|
211
|
+
2. All passing tests have passed the negation check or trace review
|
|
212
|
+
3. No test produces intermittent results across 3 consecutive runs
|
|
213
|
+
|
|
214
|
+
**Confidence scoring**: after each iteration, estimate confidence per test:
|
|
215
|
+
|
|
216
|
+
| Confidence | Criteria |
|
|
217
|
+
|-----------|---------|
|
|
218
|
+
| ✅ High (≥90%) | Passed negation check AND matches TC acceptance criterion exactly |
|
|
219
|
+
| ⚠️ Medium (70–89%) | Passes consistently; trace reviewed; no negation check yet |
|
|
220
|
+
| ❌ Low (<70%) | Intermittent, unclassified failure, or negation check not attempted |
|
|
221
|
+
|
|
222
|
+
The stage is complete when **all tests are ✅ High or legitimately skipped** (Category F
|
|
223
|
+
with a defect reference or Category I with a valid skip reason).
|
|
224
|
+
|
|
225
|
+
---
|
|
226
|
+
|
|
227
|
+
## Step 7 — Document Findings
|
|
228
|
+
|
|
229
|
+
For each submodule, create or update `STABILIZATION-REPORT.md` in the module's automation
|
|
230
|
+
directory:
|
|
231
|
+
|
|
232
|
+
```
|
|
233
|
+
qa/07-automation/e2e/tests/{module-kebab}/STABILIZATION-REPORT.md
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
### STABILIZATION-REPORT.md structure
|
|
237
|
+
|
|
238
|
+
```markdown
|
|
239
|
+
# Stabilization Report — {MODULE} > {SUBMODULE}
|
|
240
|
+
|
|
241
|
+
**Date**: YYYY-MM-DD
|
|
242
|
+
**Sprint**: {sprint number or label}
|
|
243
|
+
**Analyst**: {agent session name or human name}
|
|
244
|
+
**Spec reference**: `qa/{module-key}/{submodule-key}/05-test-scenarios.md`
|
|
245
|
+
|
|
246
|
+
## Summary
|
|
247
|
+
|
|
248
|
+
| Metric | Value |
|
|
249
|
+
|--------|-------|
|
|
250
|
+
| Tests evaluated | N |
|
|
251
|
+
| Tests stabilized (now ✅ High confidence) | N |
|
|
252
|
+
| Tests skipped — App Bug (Category F) | N |
|
|
253
|
+
| Tests skipped — Intentional (Category I) | N |
|
|
254
|
+
| Remaining low-confidence tests | N |
|
|
255
|
+
| Overall confidence | NN% |
|
|
256
|
+
|
|
257
|
+
## Test-by-Test Record
|
|
258
|
+
|
|
259
|
+
| TC-ID | Test title | Baseline result | Final result | Category | Changes made | Confidence |
|
|
260
|
+
|-------|-----------|-----------------|-------------|----------|-------------|-----------|
|
|
261
|
+
| TC-... | ... | ❌ fail | ✅ pass | A | Fixed selector `#old` → `getByRole('button', { name: '...' })` | ✅ High |
|
|
262
|
+
|
|
263
|
+
## App Bugs Filed
|
|
264
|
+
|
|
265
|
+
| DEF-ID | TC-ID | Description | Filed date | ADO WI (if created) |
|
|
266
|
+
|--------|-------|-------------|-----------|---------------------|
|
|
267
|
+
|
|
268
|
+
## Spec Defects (Category G)
|
|
269
|
+
|
|
270
|
+
| TC-ID | Issue | Recommended `05-test-scenarios.md` update |
|
|
271
|
+
|-------|-------|------------------------------------------|
|
|
272
|
+
|
|
273
|
+
## Coverage Gaps Identified (do not fix in this stage)
|
|
274
|
+
|
|
275
|
+
- {TC-ID}: test mapped in COVERAGE-MAPPING.md but `.spec.ts` function not found
|
|
276
|
+
- ...
|
|
277
|
+
|
|
278
|
+
## Decisions Log
|
|
279
|
+
|
|
280
|
+
Chronological record of non-trivial decisions made during stabilization:
|
|
281
|
+
|
|
282
|
+
- YYYY-MM-DD: Classified TC-XXX failure as Category F after verifying the save endpoint
|
|
283
|
+
returns 500 on duplicate RUTs. App bug, not test bug. DEF-001 filed.
|
|
284
|
+
- ...
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
---
|
|
288
|
+
|
|
289
|
+
## Step 8 — Update Upstream Artifacts
|
|
290
|
+
|
|
291
|
+
After completing STABILIZATION-REPORT.md:
|
|
292
|
+
|
|
293
|
+
1. **If any Category G issues were found** (TC Mismatch): update `05-test-scenarios.md`
|
|
294
|
+
with the corrected acceptance criteria. Note the change with `[REVISED - {YYYY-MM-DD}]`.
|
|
295
|
+
|
|
296
|
+
2. **If any Category F issues were found** (App Bug): create defect files in
|
|
297
|
+
`qa/{module-key}/{submodule-key}/06-defects/open/DEF-{NNN}.md` for each bug filed.
|
|
298
|
+
|
|
299
|
+
3. **Update `COVERAGE-MAPPING.md`**: set the `Status` column for each TC:
|
|
300
|
+
- `Automated` — test is ✅ High confidence
|
|
301
|
+
- `Skipped-Defect` — test is skipped via `test.skip()` with a DEF reference
|
|
302
|
+
- `Skipped-Infra` — test is skipped due to environment limitation
|
|
303
|
+
- `Manual` — TC is intentionally not automated
|
|
304
|
+
|
|
305
|
+
4. **Update session-summary.md** for the submodule: mark Stage 3.5 as ✅ Complete and
|
|
306
|
+
record the overall confidence percentage.
|