ai-or-die 0.1.43 → 0.1.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +175 -13
- package/docs/agent-instructions/02-testing-and-validation.md +55 -0
- package/docs/agent-instructions/06-ci-first-testing.md +14 -0
- package/docs/agent-instructions/09-copilot-agent-testing.md +523 -0
- package/docs/audits/SUMMARY.md +151 -0
- package/docs/history/mobile-ux-overhaul-deferrals.md +107 -0
- package/docs/planning/qol2-handoff.md +119 -0
- package/e2e/playwright.config.js +16 -1
- package/package.json +1 -1
- package/src/base-bridge.js +21 -1
- package/src/public/app.js +378 -70
- package/src/public/base.css +2 -0
- package/src/public/components/bottom-nav.css +1 -1
- package/src/public/components/buttons.css +6 -0
- package/src/public/components/extra-keys.css +31 -15
- package/src/public/components/menus.css +13 -1
- package/src/public/components/modals.css +14 -0
- package/src/public/components/tabs.css +16 -4
- package/src/public/extra-keys.js +148 -18
- package/src/public/index.html +4 -4
- package/src/public/mobile.css +40 -10
- package/src/server.js +37 -6
package/.github/workflows/ci.yml
CHANGED
|
@@ -7,14 +7,15 @@ on:
|
|
|
7
7
|
branches: [main]
|
|
8
8
|
|
|
9
9
|
concurrency:
|
|
10
|
-
group: ci-${{ github.event.pull_request.number || github.sha }}
|
|
10
|
+
group: ci-main-${{ github.event.pull_request.number || github.sha }}
|
|
11
11
|
cancel-in-progress: true
|
|
12
12
|
|
|
13
13
|
jobs:
|
|
14
14
|
test:
|
|
15
15
|
runs-on: ${{ matrix.os }}
|
|
16
|
-
timeout-minutes:
|
|
16
|
+
timeout-minutes: 12
|
|
17
17
|
strategy:
|
|
18
|
+
fail-fast: false
|
|
18
19
|
matrix:
|
|
19
20
|
os: [ubuntu-latest, windows-latest]
|
|
20
21
|
node-version: [22]
|
|
@@ -23,6 +24,7 @@ jobs:
|
|
|
23
24
|
- uses: actions/setup-node@v4
|
|
24
25
|
with:
|
|
25
26
|
node-version: ${{ matrix.node-version }}
|
|
27
|
+
cache: 'npm'
|
|
26
28
|
- run: npm ci
|
|
27
29
|
- run: npm test
|
|
28
30
|
- run: npm audit --audit-level=moderate
|
|
@@ -30,8 +32,9 @@ jobs:
|
|
|
30
32
|
|
|
31
33
|
test-browser-golden:
|
|
32
34
|
runs-on: ${{ matrix.os }}
|
|
33
|
-
timeout-minutes:
|
|
35
|
+
timeout-minutes: 12
|
|
34
36
|
strategy:
|
|
37
|
+
fail-fast: false
|
|
35
38
|
matrix:
|
|
36
39
|
os: [ubuntu-latest, windows-latest]
|
|
37
40
|
steps:
|
|
@@ -39,7 +42,15 @@ jobs:
|
|
|
39
42
|
- uses: actions/setup-node@v4
|
|
40
43
|
with:
|
|
41
44
|
node-version: '22'
|
|
45
|
+
cache: 'npm'
|
|
42
46
|
- run: npm ci
|
|
47
|
+
- name: Cache Playwright browsers
|
|
48
|
+
uses: actions/cache@v4
|
|
49
|
+
with:
|
|
50
|
+
path: |
|
|
51
|
+
~/.cache/ms-playwright
|
|
52
|
+
~/AppData/Local/ms-playwright
|
|
53
|
+
key: playwright-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
|
|
43
54
|
- name: Install Playwright browsers
|
|
44
55
|
run: npx playwright install chromium --with-deps
|
|
45
56
|
- name: Run golden path test
|
|
@@ -56,8 +67,9 @@ jobs:
|
|
|
56
67
|
|
|
57
68
|
test-browser-functional-core:
|
|
58
69
|
runs-on: ${{ matrix.os }}
|
|
59
|
-
timeout-minutes:
|
|
70
|
+
timeout-minutes: 12
|
|
60
71
|
strategy:
|
|
72
|
+
fail-fast: false
|
|
61
73
|
matrix:
|
|
62
74
|
os: [ubuntu-latest, windows-latest]
|
|
63
75
|
steps:
|
|
@@ -65,7 +77,15 @@ jobs:
|
|
|
65
77
|
- uses: actions/setup-node@v4
|
|
66
78
|
with:
|
|
67
79
|
node-version: '22'
|
|
80
|
+
cache: 'npm'
|
|
68
81
|
- run: npm ci
|
|
82
|
+
- name: Cache Playwright browsers
|
|
83
|
+
uses: actions/cache@v4
|
|
84
|
+
with:
|
|
85
|
+
path: |
|
|
86
|
+
~/.cache/ms-playwright
|
|
87
|
+
~/AppData/Local/ms-playwright
|
|
88
|
+
key: playwright-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
|
|
69
89
|
- name: Install Playwright browsers
|
|
70
90
|
run: npx playwright install chromium --with-deps
|
|
71
91
|
- name: Run functional core tests
|
|
@@ -82,8 +102,9 @@ jobs:
|
|
|
82
102
|
|
|
83
103
|
test-browser-functional-extended:
|
|
84
104
|
runs-on: ${{ matrix.os }}
|
|
85
|
-
timeout-minutes:
|
|
105
|
+
timeout-minutes: 12
|
|
86
106
|
strategy:
|
|
107
|
+
fail-fast: false
|
|
87
108
|
matrix:
|
|
88
109
|
os: [ubuntu-latest, windows-latest]
|
|
89
110
|
steps:
|
|
@@ -91,7 +112,15 @@ jobs:
|
|
|
91
112
|
- uses: actions/setup-node@v4
|
|
92
113
|
with:
|
|
93
114
|
node-version: '22'
|
|
115
|
+
cache: 'npm'
|
|
94
116
|
- run: npm ci
|
|
117
|
+
- name: Cache Playwright browsers
|
|
118
|
+
uses: actions/cache@v4
|
|
119
|
+
with:
|
|
120
|
+
path: |
|
|
121
|
+
~/.cache/ms-playwright
|
|
122
|
+
~/AppData/Local/ms-playwright
|
|
123
|
+
key: playwright-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
|
|
95
124
|
- name: Install Playwright browsers
|
|
96
125
|
run: npx playwright install chromium --with-deps
|
|
97
126
|
- name: Run functional extended tests
|
|
@@ -108,8 +137,9 @@ jobs:
|
|
|
108
137
|
|
|
109
138
|
test-browser-mobile:
|
|
110
139
|
runs-on: ${{ matrix.os }}
|
|
111
|
-
timeout-minutes:
|
|
140
|
+
timeout-minutes: 12
|
|
112
141
|
strategy:
|
|
142
|
+
fail-fast: false
|
|
113
143
|
matrix:
|
|
114
144
|
os: [ubuntu-latest, windows-latest]
|
|
115
145
|
steps:
|
|
@@ -117,7 +147,15 @@ jobs:
|
|
|
117
147
|
- uses: actions/setup-node@v4
|
|
118
148
|
with:
|
|
119
149
|
node-version: '22'
|
|
150
|
+
cache: 'npm'
|
|
120
151
|
- run: npm ci
|
|
152
|
+
- name: Cache Playwright browsers
|
|
153
|
+
uses: actions/cache@v4
|
|
154
|
+
with:
|
|
155
|
+
path: |
|
|
156
|
+
~/.cache/ms-playwright
|
|
157
|
+
~/AppData/Local/ms-playwright
|
|
158
|
+
key: playwright-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
|
|
121
159
|
- name: Install Playwright browsers
|
|
122
160
|
run: npx playwright install chromium --with-deps
|
|
123
161
|
- name: Run mobile portrait tests (iPhone 14)
|
|
@@ -136,7 +174,7 @@ jobs:
|
|
|
136
174
|
|
|
137
175
|
test-browser-visual:
|
|
138
176
|
runs-on: ${{ matrix.os }}
|
|
139
|
-
timeout-minutes:
|
|
177
|
+
timeout-minutes: 12
|
|
140
178
|
strategy:
|
|
141
179
|
fail-fast: false
|
|
142
180
|
matrix:
|
|
@@ -146,7 +184,15 @@ jobs:
|
|
|
146
184
|
- uses: actions/setup-node@v4
|
|
147
185
|
with:
|
|
148
186
|
node-version: '22'
|
|
187
|
+
cache: 'npm'
|
|
149
188
|
- run: npm ci
|
|
189
|
+
- name: Cache Playwright browsers
|
|
190
|
+
uses: actions/cache@v4
|
|
191
|
+
with:
|
|
192
|
+
path: |
|
|
193
|
+
~/.cache/ms-playwright
|
|
194
|
+
~/AppData/Local/ms-playwright
|
|
195
|
+
key: playwright-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
|
|
150
196
|
- name: Install Playwright browsers
|
|
151
197
|
run: npx playwright install chromium --with-deps
|
|
152
198
|
- name: Run visual regression tests
|
|
@@ -177,8 +223,9 @@ jobs:
|
|
|
177
223
|
|
|
178
224
|
test-browser-new-features:
|
|
179
225
|
runs-on: ${{ matrix.os }}
|
|
180
|
-
timeout-minutes:
|
|
226
|
+
timeout-minutes: 12
|
|
181
227
|
strategy:
|
|
228
|
+
fail-fast: false
|
|
182
229
|
matrix:
|
|
183
230
|
os: [ubuntu-latest, windows-latest]
|
|
184
231
|
steps:
|
|
@@ -186,7 +233,15 @@ jobs:
|
|
|
186
233
|
- uses: actions/setup-node@v4
|
|
187
234
|
with:
|
|
188
235
|
node-version: '22'
|
|
236
|
+
cache: 'npm'
|
|
189
237
|
- run: npm ci
|
|
238
|
+
- name: Cache Playwright browsers
|
|
239
|
+
uses: actions/cache@v4
|
|
240
|
+
with:
|
|
241
|
+
path: |
|
|
242
|
+
~/.cache/ms-playwright
|
|
243
|
+
~/AppData/Local/ms-playwright
|
|
244
|
+
key: playwright-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
|
|
190
245
|
- name: Install Playwright browsers
|
|
191
246
|
run: npx playwright install chromium --with-deps
|
|
192
247
|
- name: Run new feature tests
|
|
@@ -203,8 +258,9 @@ jobs:
|
|
|
203
258
|
|
|
204
259
|
test-browser-integrations:
|
|
205
260
|
runs-on: ${{ matrix.os }}
|
|
206
|
-
timeout-minutes:
|
|
261
|
+
timeout-minutes: 12
|
|
207
262
|
strategy:
|
|
263
|
+
fail-fast: false
|
|
208
264
|
matrix:
|
|
209
265
|
os: [ubuntu-latest, windows-latest]
|
|
210
266
|
steps:
|
|
@@ -212,7 +268,15 @@ jobs:
|
|
|
212
268
|
- uses: actions/setup-node@v4
|
|
213
269
|
with:
|
|
214
270
|
node-version: '22'
|
|
271
|
+
cache: 'npm'
|
|
215
272
|
- run: npm ci
|
|
273
|
+
- name: Cache Playwright browsers
|
|
274
|
+
uses: actions/cache@v4
|
|
275
|
+
with:
|
|
276
|
+
path: |
|
|
277
|
+
~/.cache/ms-playwright
|
|
278
|
+
~/AppData/Local/ms-playwright
|
|
279
|
+
key: playwright-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
|
|
216
280
|
- name: Install Playwright browsers
|
|
217
281
|
run: npx playwright install chromium --with-deps
|
|
218
282
|
- name: Run integration tests
|
|
@@ -230,7 +294,7 @@ jobs:
|
|
|
230
294
|
test-browser-power-user:
|
|
231
295
|
runs-on: ${{ matrix.os }}
|
|
232
296
|
needs: test
|
|
233
|
-
timeout-minutes:
|
|
297
|
+
timeout-minutes: 12
|
|
234
298
|
strategy:
|
|
235
299
|
fail-fast: false
|
|
236
300
|
matrix:
|
|
@@ -240,7 +304,15 @@ jobs:
|
|
|
240
304
|
- uses: actions/setup-node@v4
|
|
241
305
|
with:
|
|
242
306
|
node-version: '22'
|
|
307
|
+
cache: 'npm'
|
|
243
308
|
- run: npm ci
|
|
309
|
+
- name: Cache Playwright browsers
|
|
310
|
+
uses: actions/cache@v4
|
|
311
|
+
with:
|
|
312
|
+
path: |
|
|
313
|
+
~/.cache/ms-playwright
|
|
314
|
+
~/AppData/Local/ms-playwright
|
|
315
|
+
key: playwright-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
|
|
244
316
|
- name: Install Playwright browsers
|
|
245
317
|
run: npx playwright install chromium --with-deps
|
|
246
318
|
- name: Run power user flow tests
|
|
@@ -258,7 +330,7 @@ jobs:
|
|
|
258
330
|
test-browser-mobile-flows:
|
|
259
331
|
runs-on: ${{ matrix.os }}
|
|
260
332
|
needs: test
|
|
261
|
-
timeout-minutes:
|
|
333
|
+
timeout-minutes: 12
|
|
262
334
|
strategy:
|
|
263
335
|
fail-fast: false
|
|
264
336
|
matrix:
|
|
@@ -268,7 +340,15 @@ jobs:
|
|
|
268
340
|
- uses: actions/setup-node@v4
|
|
269
341
|
with:
|
|
270
342
|
node-version: '22'
|
|
343
|
+
cache: 'npm'
|
|
271
344
|
- run: npm ci
|
|
345
|
+
- name: Cache Playwright browsers
|
|
346
|
+
uses: actions/cache@v4
|
|
347
|
+
with:
|
|
348
|
+
path: |
|
|
349
|
+
~/.cache/ms-playwright
|
|
350
|
+
~/AppData/Local/ms-playwright
|
|
351
|
+
key: playwright-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
|
|
272
352
|
- name: Install Playwright browsers
|
|
273
353
|
run: npx playwright install chromium --with-deps
|
|
274
354
|
- name: Run mobile flow tests
|
|
@@ -286,7 +366,7 @@ jobs:
|
|
|
286
366
|
test-browser-ui-features:
|
|
287
367
|
runs-on: ${{ matrix.os }}
|
|
288
368
|
needs: test
|
|
289
|
-
timeout-minutes:
|
|
369
|
+
timeout-minutes: 12
|
|
290
370
|
strategy:
|
|
291
371
|
fail-fast: false
|
|
292
372
|
matrix:
|
|
@@ -296,7 +376,15 @@ jobs:
|
|
|
296
376
|
- uses: actions/setup-node@v4
|
|
297
377
|
with:
|
|
298
378
|
node-version: '22'
|
|
379
|
+
cache: 'npm'
|
|
299
380
|
- run: npm ci
|
|
381
|
+
- name: Cache Playwright browsers
|
|
382
|
+
uses: actions/cache@v4
|
|
383
|
+
with:
|
|
384
|
+
path: |
|
|
385
|
+
~/.cache/ms-playwright
|
|
386
|
+
~/AppData/Local/ms-playwright
|
|
387
|
+
key: playwright-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
|
|
300
388
|
- name: Install Playwright browsers
|
|
301
389
|
run: npx playwright install chromium --with-deps
|
|
302
390
|
- name: Run UI feature tests
|
|
@@ -311,10 +399,83 @@ jobs:
|
|
|
311
399
|
playwright-report/
|
|
312
400
|
retention-days: 14
|
|
313
401
|
|
|
402
|
+
test-browser-mobile-sprint1:
|
|
403
|
+
runs-on: ${{ matrix.os }}
|
|
404
|
+
needs: test
|
|
405
|
+
timeout-minutes: 12
|
|
406
|
+
strategy:
|
|
407
|
+
fail-fast: false
|
|
408
|
+
matrix:
|
|
409
|
+
os: [ubuntu-latest, windows-latest]
|
|
410
|
+
steps:
|
|
411
|
+
- uses: actions/checkout@v4
|
|
412
|
+
- uses: actions/setup-node@v4
|
|
413
|
+
with:
|
|
414
|
+
node-version: '22'
|
|
415
|
+
cache: 'npm'
|
|
416
|
+
- run: npm ci
|
|
417
|
+
- name: Cache Playwright browsers
|
|
418
|
+
uses: actions/cache@v4
|
|
419
|
+
with:
|
|
420
|
+
path: |
|
|
421
|
+
~/.cache/ms-playwright
|
|
422
|
+
~/AppData/Local/ms-playwright
|
|
423
|
+
key: playwright-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
|
|
424
|
+
- name: Install Playwright browsers
|
|
425
|
+
run: npx playwright install chromium --with-deps
|
|
426
|
+
- name: Run mobile sprint1 tests
|
|
427
|
+
run: npx playwright test --config e2e/playwright.config.js --project mobile-sprint1
|
|
428
|
+
- name: Upload Playwright report
|
|
429
|
+
uses: actions/upload-artifact@v4
|
|
430
|
+
if: ${{ !cancelled() }}
|
|
431
|
+
with:
|
|
432
|
+
name: playwright-mobile-sprint1-${{ matrix.os }}
|
|
433
|
+
path: |
|
|
434
|
+
e2e/test-results/
|
|
435
|
+
playwright-report/
|
|
436
|
+
retention-days: 14
|
|
437
|
+
|
|
438
|
+
test-browser-mobile-sprint23:
|
|
439
|
+
runs-on: ${{ matrix.os }}
|
|
440
|
+
needs: test
|
|
441
|
+
timeout-minutes: 12
|
|
442
|
+
strategy:
|
|
443
|
+
fail-fast: false
|
|
444
|
+
matrix:
|
|
445
|
+
os: [ubuntu-latest, windows-latest]
|
|
446
|
+
steps:
|
|
447
|
+
- uses: actions/checkout@v4
|
|
448
|
+
- uses: actions/setup-node@v4
|
|
449
|
+
with:
|
|
450
|
+
node-version: '22'
|
|
451
|
+
cache: 'npm'
|
|
452
|
+
- run: npm ci
|
|
453
|
+
- name: Cache Playwright browsers
|
|
454
|
+
uses: actions/cache@v4
|
|
455
|
+
with:
|
|
456
|
+
path: |
|
|
457
|
+
~/.cache/ms-playwright
|
|
458
|
+
~/AppData/Local/ms-playwright
|
|
459
|
+
key: playwright-${{ runner.os }}-${{ hashFiles('package-lock.json') }}
|
|
460
|
+
- name: Install Playwright browsers
|
|
461
|
+
run: npx playwright install chromium --with-deps
|
|
462
|
+
- name: Run mobile sprint23 tests
|
|
463
|
+
run: npx playwright test --config e2e/playwright.config.js --project mobile-sprint23
|
|
464
|
+
- name: Upload Playwright report
|
|
465
|
+
uses: actions/upload-artifact@v4
|
|
466
|
+
if: ${{ !cancelled() }}
|
|
467
|
+
with:
|
|
468
|
+
name: playwright-mobile-sprint23-${{ matrix.os }}
|
|
469
|
+
path: |
|
|
470
|
+
e2e/test-results/
|
|
471
|
+
playwright-report/
|
|
472
|
+
retention-days: 14
|
|
473
|
+
|
|
314
474
|
build-binary:
|
|
315
475
|
runs-on: ${{ matrix.os }}
|
|
316
|
-
timeout-minutes:
|
|
476
|
+
timeout-minutes: 12
|
|
317
477
|
strategy:
|
|
478
|
+
fail-fast: false
|
|
318
479
|
matrix:
|
|
319
480
|
include:
|
|
320
481
|
- os: ubuntu-latest
|
|
@@ -328,6 +489,7 @@ jobs:
|
|
|
328
489
|
- uses: actions/setup-node@v4
|
|
329
490
|
with:
|
|
330
491
|
node-version: '22'
|
|
492
|
+
cache: 'npm'
|
|
331
493
|
- run: npm ci
|
|
332
494
|
- name: Build SEA binary
|
|
333
495
|
run: node scripts/build-sea.js
|
|
@@ -1,5 +1,60 @@
|
|
|
1
1
|
# Testing and Validation
|
|
2
2
|
|
|
3
|
+
## Core Philosophy
|
|
4
|
+
|
|
5
|
+
Validate like a user would use the product. Every test — from unit to E2E to exploratory — must ultimately answer the question: "Does this work the way a real person expects?" Tests that verify internal implementation details without connecting to user-observable behavior are maintenance liabilities, not safety nets.
|
|
6
|
+
|
|
7
|
+
## Testing Hierarchy
|
|
8
|
+
|
|
9
|
+
The project uses three testing tiers. Each tier serves a distinct purpose. Using the wrong tier for a given problem wastes time or misses bugs.
|
|
10
|
+
|
|
11
|
+
### Tier 1: True E2E Tests (Deterministic, CI)
|
|
12
|
+
|
|
13
|
+
Playwright tests that run on every PR across both Ubuntu and Windows. These are the source of truth for whether the product works. They simulate real user actions — clicking, typing, navigating — against the full running system (server, WebSocket, terminal, browser UI).
|
|
14
|
+
|
|
15
|
+
- **Run frequency**: Every commit, every PR
|
|
16
|
+
- **Authority**: If E2E passes on CI, the feature works. If it fails, the feature is broken.
|
|
17
|
+
- **Finds**: Regressions in known behavior, cross-platform breakage, integration failures
|
|
18
|
+
- **Owns**: The regression contract. Once an E2E test exists for a behavior, that behavior cannot break without CI catching it.
|
|
19
|
+
|
|
20
|
+
Every new feature requires E2E coverage. Every bug fix requires a regression E2E test. No exceptions.
|
|
21
|
+
|
|
22
|
+
See `docs/agent-instructions/06-ci-first-testing.md` for the complete CI workflow, job map, and debugging playbook.
|
|
23
|
+
|
|
24
|
+
### Tier 2: Copilot Agent Exploratory Testing (LLM, Periodic)
|
|
25
|
+
|
|
26
|
+
Copilot coding agents with Playwright MCP acting as human-like testers. They browse the app, interact with it at various viewports, and produce structured audit reports. This is a "bug bash" — run per feature, per release, or per major UI change. Not on every commit.
|
|
27
|
+
|
|
28
|
+
- **Run frequency**: Per feature or per release (~50 minutes per run)
|
|
29
|
+
- **Authority**: Findings require expert validation before action (~15% false-positive rate from emulation gaps)
|
|
30
|
+
- **Finds**: Unknown-unknowns, UX issues, accessibility gaps, mobile layout problems, edge cases nobody anticipated
|
|
31
|
+
- **Owns**: Discovery. These tests find the things you forgot to test.
|
|
32
|
+
|
|
33
|
+
Validated findings become fix tasks. Fixes include Tier 1 E2E regression tests that prevent recurrence.
|
|
34
|
+
|
|
35
|
+
See `docs/agent-instructions/09-copilot-agent-testing.md` for the full setup, issue templates, and validation process.
|
|
36
|
+
|
|
37
|
+
### Tier 3: Manual Device Testing (Real Hardware, Edge Cases)
|
|
38
|
+
|
|
39
|
+
Real devices, real keyboards, real network conditions. For issues that Playwright emulation cannot catch.
|
|
40
|
+
|
|
41
|
+
- **Run frequency**: As needed, for findings flagged "Needs Real Device Testing" during Tier 2 validation
|
|
42
|
+
- **Authority**: Final word on device-specific behavior
|
|
43
|
+
- **Finds**: `visualViewport` timing, `pointer: coarse` media query behavior, virtual keyboard overlays, PWA install flows, touch physics, real network latency
|
|
44
|
+
- **Owns**: The gap between emulation and reality
|
|
45
|
+
|
|
46
|
+
Any Tier 2 finding that depends on real device behavior must be verified on Tier 3 before the fix ships.
|
|
47
|
+
|
|
48
|
+
### How the Tiers Work Together
|
|
49
|
+
|
|
50
|
+
1. **Tier 2 discovers issues** during feature development or before a release
|
|
51
|
+
2. **Expert validation** removes false positives and confirms real bugs
|
|
52
|
+
3. **Tier 3 verifies** any finding that depends on real device behavior
|
|
53
|
+
4. **Fixes ship with Tier 1 E2E regression tests** that run on every future commit
|
|
54
|
+
5. **Tier 1 prevents recurrence** permanently
|
|
55
|
+
|
|
56
|
+
The tiers are complementary, not competing. Tier 1 catches what you know about. Tier 2 finds what you missed. Tier 3 confirms what emulation cannot.
|
|
57
|
+
|
|
3
58
|
## Coverage Target
|
|
4
59
|
|
|
5
60
|
Target 90% code coverage for all new code. This is not optional for new features or refactors. Existing code without tests should be covered when modified.
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# CI-First Testing
|
|
2
2
|
|
|
3
|
+
This document covers Tier 1 (True E2E Tests) of the project's testing hierarchy. E2E tests on CI are the source of truth for regression. For the full three-tier testing hierarchy — E2E, Copilot agent exploratory testing, and manual device testing — see `docs/agent-instructions/02-testing-and-validation.md`.
|
|
4
|
+
|
|
3
5
|
## E2E Tests Are the Source of Truth
|
|
4
6
|
|
|
5
7
|
End-to-end tests are the only true way to validate that the system works. Unit tests verify isolated logic. E2E tests prove the whole system -- server, WebSocket, terminal, browser UI -- actually functions as a user would experience it.
|
|
@@ -8,6 +10,18 @@ A feature is not done until its E2E tests pass on GitHub runners. If unit tests
|
|
|
8
10
|
|
|
9
11
|
Every new feature must have E2E test coverage. Every bug fix must have a regression E2E test. The E2E suite is the contract that tells the next agent "this is what working looks like."
|
|
10
12
|
|
|
13
|
+
### Performance budget: 5-minute target, 7-minute max
|
|
14
|
+
|
|
15
|
+
The entire CI pipeline must complete within 5 minutes wall-clock time. 7 minutes is the absolute maximum acceptable. The per-job timeout is set to 9 minutes as a safety net for runner queue delays, but any job consistently hitting 7+ minutes must be investigated and optimized.
|
|
16
|
+
|
|
17
|
+
To hit this budget:
|
|
18
|
+
- **Parallelize aggressively**: All independent Playwright projects run in separate parallel jobs. Never run projects sequentially within a single job unless they share expensive state.
|
|
19
|
+
- **Minimize setup overhead**: Each CI job spends 2-3 minutes on checkout, npm ci, and Playwright install. Consolidate small test projects into fewer jobs to reduce redundant setup.
|
|
20
|
+
- **No unnecessary dependencies**: Do not add `needs:` between jobs unless one job consumes artifacts from another. Unit tests and browser tests run in parallel from the start.
|
|
21
|
+
- **Increase Playwright workers**: Use `--workers=2` or more within each job for parallel test execution.
|
|
22
|
+
|
|
23
|
+
When adding new E2E tests, verify the pipeline still completes under 5 minutes. If it doesn't, split the slowest job or consolidate the smallest ones.
|
|
24
|
+
|
|
11
25
|
### Long E2E waits indicate bugs
|
|
12
26
|
|
|
13
27
|
If an E2E test requires long waits or generous timeouts to pass, that is a signal of a bug in the product code, not a test timing issue. No real user is going to wait 30 seconds for a terminal to respond or 10 seconds for a WebSocket to connect. If the test needs that much patience, the code is too slow and must be fixed. Tightening test timeouts is a legitimate way to catch performance regressions -- the test should reflect realistic user expectations, not compensate for sluggish code.
|