codebyplan 1.13.55 → 1.13.56
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +64 -13
- package/package.json +1 -1
- package/templates/agents/cbp-e2e-maestro.md +97 -8
- package/templates/agents/cbp-e2e-playwright.md +118 -15
- package/templates/agents/cbp-verify-reviewer.md +8 -0
- package/templates/context/testing/e2e.md +43 -2
- package/templates/github-workflows/ci.yml +19 -14
- package/templates/rules/e2e-mandatory.md +21 -0
- package/templates/rules/two-tier-ci.md +14 -6
package/dist/cli.js
CHANGED
|
@@ -39,7 +39,7 @@ var VERSION, PACKAGE_NAME;
|
|
|
39
39
|
var init_version = __esm({
|
|
40
40
|
"src/lib/version.ts"() {
|
|
41
41
|
"use strict";
|
|
42
|
-
VERSION = "1.13.
|
|
42
|
+
VERSION = "1.13.56";
|
|
43
43
|
PACKAGE_NAME = "codebyplan";
|
|
44
44
|
}
|
|
45
45
|
});
|
|
@@ -31701,6 +31701,16 @@ function detectPnpmVersionFromPackageJson(projectDir) {
|
|
|
31701
31701
|
return "10";
|
|
31702
31702
|
}
|
|
31703
31703
|
}
|
|
31704
|
+
function detectStrictEnforcedFromCiJson(projectDir) {
|
|
31705
|
+
try {
|
|
31706
|
+
const ciJsonPath = path9.join(projectDir, ".codebyplan", "ci.json");
|
|
31707
|
+
const raw = fs9.readFileSync(ciJsonPath, "utf-8");
|
|
31708
|
+
const parsed = JSON.parse(raw);
|
|
31709
|
+
return parsed.workflow?.strict_check_enforced === true;
|
|
31710
|
+
} catch {
|
|
31711
|
+
return false;
|
|
31712
|
+
}
|
|
31713
|
+
}
|
|
31704
31714
|
async function runScaffoldCiWorkflow(opts) {
|
|
31705
31715
|
await Promise.resolve();
|
|
31706
31716
|
const dryRun = opts?.dryRun ?? false;
|
|
@@ -31708,6 +31718,7 @@ async function runScaffoldCiWorkflow(opts) {
|
|
|
31708
31718
|
const projectDir = path9.resolve(opts?.projectDir ?? process.cwd());
|
|
31709
31719
|
const pnpmVersion = opts?.pnpmVersion ?? detectPnpmVersionFromPackageJson(projectDir);
|
|
31710
31720
|
const nodeVersion = opts?.nodeVersion ?? "22";
|
|
31721
|
+
const strictEnforced = opts?.strictEnforced ?? detectStrictEnforcedFromCiJson(projectDir);
|
|
31711
31722
|
const templatesDir = opts?.templatesDir ?? resolveTemplatesDir();
|
|
31712
31723
|
const templatePath = path9.join(templatesDir, "github-workflows", "ci.yml");
|
|
31713
31724
|
if (!fs9.existsSync(templatePath)) {
|
|
@@ -31718,7 +31729,9 @@ async function runScaffoldCiWorkflow(opts) {
|
|
|
31718
31729
|
const rawTemplate = fs9.readFileSync(templatePath, "utf-8");
|
|
31719
31730
|
const renderedContent = substituteTokens(rawTemplate, {
|
|
31720
31731
|
PNPM_VERSION: pnpmVersion,
|
|
31721
|
-
NODE_VERSION: nodeVersion
|
|
31732
|
+
NODE_VERSION: nodeVersion,
|
|
31733
|
+
STRICT_NAME_SUFFIX: strictEnforced ? "" : " (report-only)",
|
|
31734
|
+
STRICT_CONTINUE_ON_ERROR_LINE: strictEnforced ? "" : "\n continue-on-error: true"
|
|
31722
31735
|
});
|
|
31723
31736
|
const targetPath = path9.join(projectDir, ".github", "workflows", "ci.yml");
|
|
31724
31737
|
if (dryRun) {
|
|
@@ -31963,7 +31976,7 @@ function parseFlagsFromArgs2(args) {
|
|
|
31963
31976
|
}
|
|
31964
31977
|
function printHelp3() {
|
|
31965
31978
|
process.stdout.write(
|
|
31966
|
-
'\n codebyplan ci\n\n CI configuration management \u2014 detect platforms, scaffold workflow, enforce required check.\n\n Subcommands:\n init Detect platforms and write/update .codebyplan/ci.json\n scaffold-workflow Write .github/workflows/ci.yml from the bundled template\n enforce-check Enforce the required CI status check on a GitHub branch\n resolve <category> Resolve the shell command for a CI check category (unit_test|typecheck|build|lint|e2e|audit)\n\n Flags (all subcommands):\n --dry-run Preview the operation without writing any files\n --force Overwrite existing content that differs\n --project-dir <p> Target project root (default: current directory)\n --json Emit structured JSON to stdout\n\n Flags (scaffold-workflow only):\n --pnpm-version <v> pnpm version for {{PNPM_VERSION}} token (default: auto-detected from package.json packageManager, falls back to "10")\n --node-version <v> Node.js version for {{NODE_VERSION}} token (default: "22")\n\n Flags (enforce-check only):\n --branch <b> Branch to enforce (default: "main")\n --check-name <n> Status check name (default: "Lint + typecheck + test + build")\n\n Flags (resolve only):\n --platform <slug> Platform slug to resolve against (e.g. next_js, nestjs, package)\n --json Emit the full CiResolveResult as JSON\n\n'
|
|
31979
|
+
'\n codebyplan ci\n\n CI configuration management \u2014 detect platforms, scaffold workflow, enforce required check.\n\n Subcommands:\n init Detect platforms and write/update .codebyplan/ci.json\n scaffold-workflow Write .github/workflows/ci.yml from the bundled template\n enforce-check Enforce the required CI status check on a GitHub branch\n resolve <category> Resolve the shell command for a CI check category (unit_test|typecheck|build|lint|e2e|audit)\n\n Flags (all subcommands):\n --dry-run Preview the operation without writing any files\n --force Overwrite existing content that differs\n --project-dir <p> Target project root (default: current directory)\n --json Emit structured JSON to stdout\n\n Flags (scaffold-workflow only):\n --pnpm-version <v> pnpm version for {{PNPM_VERSION}} token (default: auto-detected from package.json packageManager, falls back to "10")\n --node-version <v> Node.js version for {{NODE_VERSION}} token (default: "22")\n --strict-enforced Emit ci-strict as an enforced gate (continue-on-error:false, no\n report-only suffix). Default: read .codebyplan/ci.json\n workflow.strict_check_enforced (falls back to report-only)\n\n Flags (enforce-check only):\n --branch <b> Branch to enforce (default: "main")\n --check-name <n> Status check name (default: "Lint + typecheck + test + build")\n\n Flags (resolve only):\n --platform <slug> Platform slug to resolve against (e.g. next_js, nestjs, package)\n --json Emit the full CiResolveResult as JSON\n\n'
|
|
31967
31980
|
);
|
|
31968
31981
|
}
|
|
31969
31982
|
function printInitResult(result) {
|
|
@@ -32062,6 +32075,7 @@ async function runCiCommand(args) {
|
|
|
32062
32075
|
if (subcommand === "scaffold-workflow") {
|
|
32063
32076
|
const pnpmVersion = flags["pnpm-version"];
|
|
32064
32077
|
const nodeVersion = flags["node-version"];
|
|
32078
|
+
const strictEnforced = flags["strict-enforced"] ? true : void 0;
|
|
32065
32079
|
let result;
|
|
32066
32080
|
try {
|
|
32067
32081
|
result = await runScaffoldCiWorkflow({
|
|
@@ -32069,7 +32083,8 @@ async function runCiCommand(args) {
|
|
|
32069
32083
|
force,
|
|
32070
32084
|
projectDir,
|
|
32071
32085
|
pnpmVersion,
|
|
32072
|
-
nodeVersion
|
|
32086
|
+
nodeVersion,
|
|
32087
|
+
strictEnforced
|
|
32073
32088
|
});
|
|
32074
32089
|
} catch (err) {
|
|
32075
32090
|
process.stderr.write(
|
|
@@ -36023,9 +36038,10 @@ function resolveTurboBin(projectRoot) {
|
|
|
36023
36038
|
if (existsSync14(workspaceRootBin)) return workspaceRootBin;
|
|
36024
36039
|
return TURBO_NOT_FOUND_SENTINEL;
|
|
36025
36040
|
}
|
|
36026
|
-
function runTurboWithSummary(task, projectRoot, spawnFn) {
|
|
36041
|
+
function runTurboWithSummary(task, projectRoot, spawnFn, concurrency) {
|
|
36027
36042
|
const turboBin = resolveTurboBin(projectRoot);
|
|
36028
|
-
const
|
|
36043
|
+
const concurrencyFlag = concurrency !== void 0 ? ` --concurrency=${concurrency}` : "";
|
|
36044
|
+
const command = `${turboBin} run ${task}${concurrencyFlag} --summarize`;
|
|
36029
36045
|
let spawnResult;
|
|
36030
36046
|
try {
|
|
36031
36047
|
spawnResult = spawnFn(command, { cwd: projectRoot });
|
|
@@ -36059,6 +36075,7 @@ function runCheck(opts) {
|
|
|
36059
36075
|
spawnFn = defaultSpawnFn,
|
|
36060
36076
|
updateBaseline: updateBaselineOpt = false,
|
|
36061
36077
|
noBaseline = false,
|
|
36078
|
+
concurrency,
|
|
36062
36079
|
loadBaselineFn = loadBaseline,
|
|
36063
36080
|
saveBaselineFn = saveBaseline
|
|
36064
36081
|
} = opts;
|
|
@@ -36107,7 +36124,7 @@ function runCheck(opts) {
|
|
|
36107
36124
|
spawnResult,
|
|
36108
36125
|
failingPackages,
|
|
36109
36126
|
command: lintCommand
|
|
36110
|
-
} = runTurboWithSummary("lint", projectRoot, spawnFn);
|
|
36127
|
+
} = runTurboWithSummary("lint", projectRoot, spawnFn, concurrency);
|
|
36111
36128
|
currentFailing.lint = failingPackages;
|
|
36112
36129
|
const newFailures = resolveNewFailures(
|
|
36113
36130
|
"lint",
|
|
@@ -36132,7 +36149,7 @@ function runCheck(opts) {
|
|
|
36132
36149
|
spawnResult,
|
|
36133
36150
|
failingPackages,
|
|
36134
36151
|
command: typecheckCommand
|
|
36135
|
-
} = runTurboWithSummary("typecheck", projectRoot, spawnFn);
|
|
36152
|
+
} = runTurboWithSummary("typecheck", projectRoot, spawnFn, concurrency);
|
|
36136
36153
|
currentFailing.typecheck = failingPackages;
|
|
36137
36154
|
const newFailures = resolveNewFailures(
|
|
36138
36155
|
"typecheck",
|
|
@@ -36157,7 +36174,7 @@ function runCheck(opts) {
|
|
|
36157
36174
|
spawnResult,
|
|
36158
36175
|
failingPackages,
|
|
36159
36176
|
command: testsCommand
|
|
36160
|
-
} = runTurboWithSummary("test", projectRoot, spawnFn);
|
|
36177
|
+
} = runTurboWithSummary("test", projectRoot, spawnFn, concurrency);
|
|
36161
36178
|
currentFailing.tests = failingPackages;
|
|
36162
36179
|
const newFailures = resolveNewFailures(
|
|
36163
36180
|
"tests",
|
|
@@ -36358,9 +36375,24 @@ function parseCheckArgs(args) {
|
|
|
36358
36375
|
let files;
|
|
36359
36376
|
let updateBaseline = false;
|
|
36360
36377
|
let noBaseline = false;
|
|
36378
|
+
let concurrency;
|
|
36361
36379
|
for (let i = 0; i < args.length; i++) {
|
|
36362
36380
|
const arg = args[i];
|
|
36363
|
-
if (arg === "--
|
|
36381
|
+
if (arg === "--concurrency") {
|
|
36382
|
+
const val = args[i + 1];
|
|
36383
|
+
const parsed = val !== void 0 ? Number(val) : NaN;
|
|
36384
|
+
if (Number.isInteger(parsed) && parsed > 0) {
|
|
36385
|
+
concurrency = parsed;
|
|
36386
|
+
i++;
|
|
36387
|
+
} else {
|
|
36388
|
+
process.stderr.write(
|
|
36389
|
+
`check: --concurrency value must be a positive integer (got: '${val ?? ""}').
|
|
36390
|
+
`
|
|
36391
|
+
);
|
|
36392
|
+
process.exitCode = 1;
|
|
36393
|
+
return null;
|
|
36394
|
+
}
|
|
36395
|
+
} else if (arg === "--scope") {
|
|
36364
36396
|
const val = args[i + 1];
|
|
36365
36397
|
if (val === "round" || val === "task" || val === "merged") {
|
|
36366
36398
|
scope = val;
|
|
@@ -36392,7 +36424,7 @@ function parseCheckArgs(args) {
|
|
|
36392
36424
|
}
|
|
36393
36425
|
}
|
|
36394
36426
|
}
|
|
36395
|
-
return { scope, json, files, updateBaseline, noBaseline };
|
|
36427
|
+
return { scope, json, files, updateBaseline, noBaseline, concurrency };
|
|
36396
36428
|
}
|
|
36397
36429
|
function emitTable(result) {
|
|
36398
36430
|
const strict = result.no_baseline === true;
|
|
@@ -36465,13 +36497,14 @@ function runCheckCommand(args) {
|
|
|
36465
36497
|
if (parsed === null) {
|
|
36466
36498
|
return;
|
|
36467
36499
|
}
|
|
36468
|
-
const { scope, json, files, updateBaseline, noBaseline } = parsed;
|
|
36500
|
+
const { scope, json, files, updateBaseline, noBaseline, concurrency } = parsed;
|
|
36469
36501
|
const result = runCheck({
|
|
36470
36502
|
scope,
|
|
36471
36503
|
changedFiles: files,
|
|
36472
36504
|
// NO-OP in whole-repo mode; notice emitted by runCheck
|
|
36473
36505
|
updateBaseline,
|
|
36474
|
-
noBaseline
|
|
36506
|
+
noBaseline,
|
|
36507
|
+
concurrency
|
|
36475
36508
|
});
|
|
36476
36509
|
if (json) {
|
|
36477
36510
|
process.stdout.write(JSON.stringify(result, null, 2) + "\n");
|
|
@@ -41006,6 +41039,24 @@ function verifyRound(input) {
|
|
|
41006
41039
|
}
|
|
41007
41040
|
}
|
|
41008
41041
|
}
|
|
41042
|
+
if (output.status === "completed") {
|
|
41043
|
+
const consoleErrors = output.console_errors;
|
|
41044
|
+
if (Array.isArray(consoleErrors) && consoleErrors.length > 0) {
|
|
41045
|
+
failed_checks.push({
|
|
41046
|
+
check: "console_errors_reported",
|
|
41047
|
+
framework,
|
|
41048
|
+
detail: `Framework "${framework}" completed but reported ${consoleErrors.length} console/page error(s) \u2014 a completed run must be console-clean per rules/e2e-mandatory.md.`
|
|
41049
|
+
});
|
|
41050
|
+
}
|
|
41051
|
+
const violations = output.a11y?.violations;
|
|
41052
|
+
if (Array.isArray(violations) && violations.length > 0) {
|
|
41053
|
+
failed_checks.push({
|
|
41054
|
+
check: "a11y_violations_reported",
|
|
41055
|
+
framework,
|
|
41056
|
+
detail: `Framework "${framework}" completed but reported ${violations.length} accessibility violation(s) \u2014 fix in-scope or classify as category 'a11y' failures per rules/e2e-mandatory.md.`
|
|
41057
|
+
});
|
|
41058
|
+
}
|
|
41059
|
+
}
|
|
41009
41060
|
}
|
|
41010
41061
|
return {
|
|
41011
41062
|
pass: failed_checks.length === 0,
|
package/package.json
CHANGED
|
@@ -132,7 +132,8 @@ One subdirectory per app module. Shared flows under `_shared/`. Probe under `_pr
|
|
|
132
132
|
|
|
133
133
|
## Spec-Writing Patterns
|
|
134
134
|
|
|
135
|
-
**One flow per screen/feature.**
|
|
135
|
+
**One flow per screen/feature.** A flow that only taps and asserts visibility is NOT done —
|
|
136
|
+
prove behavior:
|
|
136
137
|
|
|
137
138
|
```yaml
|
|
138
139
|
appId: ${APP_ID}
|
|
@@ -141,15 +142,97 @@ tags:
|
|
|
141
142
|
---
|
|
142
143
|
- runFlow: _shared/login.yaml
|
|
143
144
|
- assertVisible: "Dashboard"
|
|
144
|
-
-
|
|
145
|
-
-
|
|
145
|
+
- waitForAnimationToEnd
|
|
146
|
+
- assertNoDefectsWithAI: # AI visual-defect check — see AI Assertions below
|
|
147
|
+
optional: false
|
|
148
|
+
- takeScreenshot: "dashboard-loaded" # NEW states only — see Visual Baselines
|
|
149
|
+
- tapOn:
|
|
150
|
+
text: "Create"
|
|
151
|
+
enabled: true # state selector — waits for interactivity, catches broken gating
|
|
146
152
|
- assertVisible: "New item"
|
|
147
|
-
- takeScreenshot: "create-modal-open"
|
|
148
153
|
```
|
|
149
154
|
|
|
150
155
|
Use text-based targeting first (`tapOn: "Button"`); use testID when ambiguous
|
|
151
|
-
(`tapOn: { id: "btn" }`).
|
|
152
|
-
|
|
156
|
+
(`tapOn: { id: "btn" }`). `text`/`id` are REGEX by default — escape `$` and `[`; quote
|
|
157
|
+
`YES`/`NO`/`ON`/`OFF` (unquoted they parse as YAML booleans).
|
|
158
|
+
|
|
159
|
+
**Assertion depth requirements**:
|
|
160
|
+
|
|
161
|
+
- **State selectors prove logic**: `enabled`, `checked`, `focused`, `selected` — e.g. assert
|
|
162
|
+
Submit is `enabled: false` before required fields are filled, `enabled: true` after.
|
|
163
|
+
- **Data round-trips** via `copyTextFrom` + `assertTrue`: copy the value on screen A
|
|
164
|
+
(snapshot into `output.*` via `evalScript` before the next copy overwrites
|
|
165
|
+
`maestro.copiedText`), navigate, assert screen B shows the same value.
|
|
166
|
+
- **Persistence proof** for create/edit flows — after the UI reports success, verify via
|
|
167
|
+
`runScript` `http.get` against the backend API (`json()` parse + `assertTrue` on the
|
|
168
|
+
field), or at minimum kill + relaunch and re-assert:
|
|
169
|
+
|
|
170
|
+
```yaml
|
|
171
|
+
- killApp
|
|
172
|
+
- launchApp: { stopApp: false }
|
|
173
|
+
- assertVisible: ${output.createdTitle}
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
- For CRUD: create + verify (round-trip); edit + verify updated; delete + confirm + verify
|
|
177
|
+
removed.
|
|
178
|
+
|
|
179
|
+
## Visual Baselines (assertScreenshot)
|
|
180
|
+
|
|
181
|
+
Committed PNGs under `e2e/screenshots/maestro/` are BASELINES, not run artifacts.
|
|
182
|
+
|
|
183
|
+
- **New state** (`git ls-files --error-unmatch <path>` exits non-zero): `waitForAnimationToEnd`,
|
|
184
|
+
then `takeScreenshot: "{flow}-{state}"` and `git add` the PNG (auto-new model).
|
|
185
|
+
- **Existing baseline**: do NOT retake/overwrite. Assert against it:
|
|
186
|
+
|
|
187
|
+
```yaml
|
|
188
|
+
- waitForAnimationToEnd
|
|
189
|
+
- assertScreenshot:
|
|
190
|
+
path: e2e/screenshots/maestro/{flow}-{state}.png
|
|
191
|
+
thresholdPercentage: 95
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
On failure classify `visual_regression`: capture the live screen under a transient
|
|
195
|
+
diagnostic name (`{flow}-{state}-actual`, written to `--test-output-dir`), report it in
|
|
196
|
+
`screenshots[]`, and NEVER overwrite the committed baseline. The user accepts the change at
|
|
197
|
+
`/cbp-verify`; only then is the baseline re-captured and re-added.
|
|
198
|
+
- `baseline_diff_pct` stays `null` (Maestro reports threshold pass/fail, not a percentage);
|
|
199
|
+
set `is_new` per git tracking as before.
|
|
200
|
+
|
|
201
|
+
## AI Assertions (assertNoDefectsWithAI / assertWithAI)
|
|
202
|
+
|
|
203
|
+
Maestro's AI commands screenshot the current screen and detect rendering defects (cut-off
|
|
204
|
+
text, overlapping elements, mis-centered content). Run `assertNoDefectsWithAI` at every
|
|
205
|
+
primary screen state; use `assertWithAI` for states selectors can't express:
|
|
206
|
+
|
|
207
|
+
```yaml
|
|
208
|
+
- assertNoDefectsWithAI:
|
|
209
|
+
optional: false
|
|
210
|
+
- assertWithAI:
|
|
211
|
+
assertion: The 6-digit verification input is visible with all six boxes empty.
|
|
212
|
+
optional: false
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
**Critical**: AI commands default to `optional: true` (warn-only — a detected defect does
|
|
216
|
+
NOT fail the flow). ALWAYS set `optional: false`.
|
|
217
|
+
|
|
218
|
+
**Auth preflight (Step 6.5.1 addition)**: AI commands require Maestro auth — a `maestro login`
|
|
219
|
+
session or `MAESTRO_CLOUD_API_KEY` (a free account suffices; the legacy `MAESTRO_CLI_AI_KEY`
|
|
220
|
+
BYO-key path is retired). Probe before authoring AI steps. When unavailable, ask the user once
|
|
221
|
+
(provide key / skip AI), record `ai_checks: 'unavailable'` in the output, omit AI commands,
|
|
222
|
+
and rely on `assertScreenshot` baselines — never let an AI step fail a run on a missing key.
|
|
223
|
+
AI artifacts (`ai-report-*.html`, `ai-*.json`) land under `--test-output-dir`; reference them
|
|
224
|
+
in `critical_issues[].reason` when a defect is found.
|
|
225
|
+
|
|
226
|
+
## Anti-Patterns
|
|
227
|
+
|
|
228
|
+
- `waitForAnimationToEnd` is NOT an assertion — it succeeds even on timeout; always pair it
|
|
229
|
+
with a real assert or screenshot.
|
|
230
|
+
- Don't wrap whole flows in `retry` (hides product flakiness); bound `repeat` loops with
|
|
231
|
+
`times` + `while` together.
|
|
232
|
+
- No `point:` coordinate taps — device-dependent; combine attribute + relational selectors instead.
|
|
233
|
+
- Don't max out timeouts ("60s everywhere") — defaults catch performance regressions.
|
|
234
|
+
- Platform limits: `back` is Android/Web only; airplane-mode commands are Android-only;
|
|
235
|
+
Android `inputText` is ASCII-only; system biometric/HealthKit dialogs need XCUITest.
|
|
153
236
|
|
|
154
237
|
## Screenshot Capture
|
|
155
238
|
|
|
@@ -161,7 +244,9 @@ Screenshots written to `e2e/screenshots/maestro/` (via `screenshotsDir` in `conf
|
|
|
161
244
|
Committed path convention: `e2e/screenshots/maestro/{flow}-{state}.png` (repo root).
|
|
162
245
|
This path is intentionally outside `apps/web/e2e/screenshots/` (which is gitignored).
|
|
163
246
|
|
|
164
|
-
After the flow completes, `git add
|
|
247
|
+
After the flow completes, `git add` each NEW PNG individually — never `git add` the whole
|
|
248
|
+
directory (that silently stages drifted baselines; existing states are gated by
|
|
249
|
+
`assertScreenshot`, see Visual Baselines).
|
|
165
250
|
|
|
166
251
|
**`is_new` detection**: `git ls-files --error-unmatch <path>` exits non-zero → `is_new: true`.
|
|
167
252
|
|
|
@@ -186,9 +271,13 @@ Include this in the specialist output alongside `screenshots[]`.
|
|
|
186
271
|
## Run Command
|
|
187
272
|
|
|
188
273
|
```bash
|
|
189
|
-
maestro test maestro/flows/{module}/{flow}.yaml --format
|
|
274
|
+
maestro test maestro/flows/{module}/{flow}.yaml --format junit --output maestro/results.xml \
|
|
275
|
+
--test-output-dir maestro/output
|
|
190
276
|
```
|
|
191
277
|
|
|
278
|
+
`maestro/output/` holds transient diagnostics (AI reports, `-actual` regression captures) —
|
|
279
|
+
gitignore it; committed baselines live only under `e2e/screenshots/maestro/`.
|
|
280
|
+
|
|
192
281
|
## pnpm Scripts
|
|
193
282
|
|
|
194
283
|
```json
|
|
@@ -21,7 +21,7 @@ accordingly.
|
|
|
21
21
|
## Install
|
|
22
22
|
|
|
23
23
|
```bash
|
|
24
|
-
pnpm add -D @playwright/test
|
|
24
|
+
pnpm add -D @playwright/test @axe-core/playwright
|
|
25
25
|
pnpm exec playwright install chromium
|
|
26
26
|
# CI with system deps:
|
|
27
27
|
pnpm exec playwright install --with-deps chromium
|
|
@@ -265,32 +265,123 @@ port from `.codebyplan/server.local.json` (worktree overlay, checked first) then
|
|
|
265
265
|
`.codebyplan/server.json` (committed base). On mismatch ask which is correct, then propose
|
|
266
266
|
an Edit to align them.
|
|
267
267
|
|
|
268
|
+
## Quality Fixture (MANDATORY)
|
|
269
|
+
|
|
270
|
+
`apps/{app}/e2e/fixtures.ts` — the single `test` source for ALL specs. It auto-enforces the
|
|
271
|
+
console-clean mandate (an `{ auto: true }` fixture runs in every test with zero per-spec
|
|
272
|
+
opt-in) and provides the axe builder. Create it if absent; when touching an existing spec
|
|
273
|
+
that still imports from `@playwright/test`, migrate its import.
|
|
274
|
+
|
|
275
|
+
```ts
|
|
276
|
+
import { test as base, expect } from "@playwright/test";
|
|
277
|
+
import AxeBuilder from "@axe-core/playwright";
|
|
278
|
+
|
|
279
|
+
// Known, triaged errors only — every entry needs a comment linking its fix task.
|
|
280
|
+
const ALLOWED_CONSOLE: RegExp[] = [];
|
|
281
|
+
|
|
282
|
+
type QualityFixtures = {
|
|
283
|
+
consoleGuard: void;
|
|
284
|
+
makeAxeBuilder: () => AxeBuilder;
|
|
285
|
+
};
|
|
286
|
+
|
|
287
|
+
export const test = base.extend<QualityFixtures>({
|
|
288
|
+
consoleGuard: [
|
|
289
|
+
async ({ page, baseURL }, use) => {
|
|
290
|
+
const errors: string[] = [];
|
|
291
|
+
page.on("console", (msg) => {
|
|
292
|
+
if (msg.type() === "error" && !ALLOWED_CONSOLE.some((re) => re.test(msg.text())))
|
|
293
|
+
errors.push(`console.error: ${msg.text()}`);
|
|
294
|
+
});
|
|
295
|
+
page.on("pageerror", (err) => errors.push(`pageerror: ${err.message}`));
|
|
296
|
+
page.on("requestfailed", (req) => {
|
|
297
|
+
// Own-origin, non-aborted failures only (cancelled prefetches are noise)
|
|
298
|
+
if (baseURL && req.url().startsWith(baseURL) && req.failure()?.errorText !== "net::ERR_ABORTED")
|
|
299
|
+
errors.push(`requestfailed: ${req.method()} ${req.url()} — ${req.failure()?.errorText}`);
|
|
300
|
+
});
|
|
301
|
+
await use();
|
|
302
|
+
expect(errors, "console/page errors captured during test").toEqual([]);
|
|
303
|
+
},
|
|
304
|
+
{ auto: true },
|
|
305
|
+
],
|
|
306
|
+
makeAxeBuilder: async ({ page }, use) => {
|
|
307
|
+
await use(() => new AxeBuilder({ page }).withTags(["wcag2a", "wcag2aa", "wcag21a", "wcag21aa"]));
|
|
308
|
+
},
|
|
309
|
+
});
|
|
310
|
+
export { expect };
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
Collected errors from failing tests feed the `console_errors[]` output field (see Output
|
|
314
|
+
Additions below).
|
|
315
|
+
|
|
268
316
|
## Spec-Writing Patterns
|
|
269
317
|
|
|
270
|
-
**One spec file per page/flow.**
|
|
318
|
+
**One spec file per page/flow.** Specs import `{ test, expect }` from the quality fixture
|
|
319
|
+
(`./fixtures` or relative path) — NEVER directly from `@playwright/test`.
|
|
271
320
|
|
|
272
|
-
|
|
273
|
-
|
|
321
|
+
Mandatory per spec — a spec that only proves elements are visible is NOT done:
|
|
322
|
+
|
|
323
|
+
- Smoke test: loads, title correct (the console guard fails it on any console/page error).
|
|
324
|
+
- Primary user flow: main interaction **with a behavior proof** (below).
|
|
274
325
|
- Visual regression: `toHaveScreenshot` at every primary state.
|
|
326
|
+
- Structure: `toMatchAriaSnapshot` on the primary state — catches hierarchy/label/role
|
|
327
|
+
breakage without pixel fragility.
|
|
328
|
+
- Accessibility: one axe scan per page state, zero violations.
|
|
329
|
+
|
|
330
|
+
### Functional Proof (mutations)
|
|
275
331
|
|
|
276
|
-
|
|
277
|
-
|
|
332
|
+
Every flow that mutates state MUST prove the mutation happened — asserting the optimistic UI
|
|
333
|
+
is not proof:
|
|
278
334
|
|
|
279
335
|
```ts
|
|
280
|
-
|
|
336
|
+
// 1. Prove the API call succeeded
|
|
337
|
+
const resp = page.waitForResponse((r) => r.url().includes("/api/items") && r.request().method() === "POST");
|
|
338
|
+
await page.getByRole("button", { name: "Create" }).click();
|
|
339
|
+
expect((await resp).status()).toBeLessThan(400);
|
|
340
|
+
|
|
341
|
+
// 2. Prove persistence — reload and re-assert (or poll the API for eventual consistency)
|
|
342
|
+
await page.reload();
|
|
343
|
+
await expect(page.getByRole("listitem").filter({ hasText: itemName })).toBeVisible();
|
|
344
|
+
// await expect.poll(async () => (await page.request.get(`/api/items/${id}`)).status()).toBe(200);
|
|
345
|
+
```
|
|
281
346
|
|
|
282
|
-
|
|
283
|
-
test.beforeEach(async ({ page }) => {
|
|
284
|
-
await page.goto("/");
|
|
285
|
-
});
|
|
347
|
+
### Error-State Proof (forms / CRUD)
|
|
286
348
|
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
349
|
+
At least one test per form/CRUD spec injects a failure and asserts the rendered error UI —
|
|
350
|
+
error paths are where untested UIs break in production:
|
|
351
|
+
|
|
352
|
+
```ts
|
|
353
|
+
await page.route("**/api/items", (r) => r.fulfill({ status: 500 }));
|
|
354
|
+
await page.getByRole("button", { name: "Create" }).click();
|
|
355
|
+
await expect(page.getByRole("alert")).toContainText(/failed|went wrong/i);
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
### Permission / RLS Proof
|
|
359
|
+
|
|
360
|
+
When the route is role-gated, include one denial test (lower-privilege storage state or
|
|
361
|
+
seeded non-member): assert the explicit denial UI or redirect — a blank render is a bug,
|
|
362
|
+
not a pass.
|
|
363
|
+
|
|
364
|
+
### Accessibility Scan
|
|
365
|
+
|
|
366
|
+
```ts
|
|
367
|
+
test("a11y: dashboard has no WCAG A/AA violations", async ({ page, makeAxeBuilder }) => {
|
|
368
|
+
await page.goto("/dashboard");
|
|
369
|
+
const results = await makeAxeBuilder().analyze();
|
|
370
|
+
expect(results.violations).toEqual([]);
|
|
291
371
|
});
|
|
292
372
|
```
|
|
293
373
|
|
|
374
|
+
Known issues are excluded via `.disableRules([...])` with a comment linking the fix task —
|
|
375
|
+
never by deleting the scan.
|
|
376
|
+
|
|
377
|
+
### Anti-Patterns (reject in review)
|
|
378
|
+
|
|
379
|
+
- `page.waitForTimeout(...)` — web-first assertions auto-retry; hard sleeps mask races.
|
|
380
|
+
- `expect(await locator.isVisible()).toBe(true)` — one-shot, no retry; use `await expect(locator).toBeVisible()`.
|
|
381
|
+
- `.nth(n)` / `.first()` positional selection — except the documented SCSS-module fallback.
|
|
382
|
+
- In-spec env skips (`test.skip(!process.env.X, ...)`) — forbidden per `rules/e2e-mandatory.md`.
|
|
383
|
+
- Visibility-only assertions after a mutation — see Functional Proof.
|
|
384
|
+
|
|
294
385
|
## Screenshot Capture
|
|
295
386
|
|
|
296
387
|
**Baseline regression** (preferred):
|
|
@@ -332,6 +423,18 @@ when the playwright.config project/device emulation indicates a mobile viewport
|
|
|
332
423
|
|
|
333
424
|
Include this in the specialist output alongside `screenshots[]`.
|
|
334
425
|
|
|
426
|
+
## Output Additions (Playwright)
|
|
427
|
+
|
|
428
|
+
Beyond the shared contract, ALWAYS report:
|
|
429
|
+
|
|
430
|
+
- `console_errors[]` — every entry the console guard collected on failed tests
|
|
431
|
+
(`{test_name, type: 'console' | 'pageerror' | 'requestfailed', text}`). Empty array on a
|
|
432
|
+
clean run — never omit the field.
|
|
433
|
+
- `a11y` — `{scanned_pages: string[], violations: [{rule, impact, page}]}` aggregated from
|
|
434
|
+
the axe scans. A `status: 'completed'` output with non-empty `violations` is inconsistent —
|
|
435
|
+
fix in-scope or classify the failures as category `a11y`; `codebyplan e2e verify-round`
|
|
436
|
+
hard-fails the inconsistency.
|
|
437
|
+
|
|
335
438
|
## Run Command
|
|
336
439
|
|
|
337
440
|
```bash
|
|
@@ -202,6 +202,14 @@ The deterministic e2e gate (`codebyplan e2e verify-round`) and the unit/lint/typ
|
|
|
202
202
|
here). If the diff touches an e2e-eligible UI surface, note it in `summary` so the orchestrator
|
|
203
203
|
confirms its gate ran — but do not assert a build/test result this agent did not run.
|
|
204
204
|
|
|
205
|
+
E2E verdict gates (refuse `READY` per `rules/e2e-mandatory.md`): a zero-assertion run
|
|
206
|
+
(`passed === 0 && skipped > 0` on a touched path); an empty `e2e_gallery[]` when the round
|
|
207
|
+
touched UI for an eligible framework (sole exception: `vscode-test`-only rounds with explicit
|
|
208
|
+
`e2e_gallery: []`); a `status: 'completed'` e2e output carrying non-empty `console_errors[]`
|
|
209
|
+
or `a11y.violations[]`. Treat a `{type: 'shallow_coverage'}` critical issue on a mutation
|
|
210
|
+
surface as a real finding (visibility-only specs prove rendering, not behavior) — severity
|
|
211
|
+
`medium` minimum, routed to a follow-up round.
|
|
212
|
+
|
|
205
213
|
### Phase 6: Build Findings, Verdict & Routing
|
|
206
214
|
|
|
207
215
|
Assign severity by impact: `critical` (runtime error / data corruption / security), `high`
|
|
@@ -54,7 +54,7 @@ output:
|
|
|
54
54
|
- test_name: string
|
|
55
55
|
error: string
|
|
56
56
|
file: string
|
|
57
|
-
category: 'env' | 'auth' | 'access' | 'flake' | 'real' | 'visual_regression'
|
|
57
|
+
category: 'env' | 'auth' | 'access' | 'flake' | 'real' | 'visual_regression' | 'console_error' | 'a11y'
|
|
58
58
|
classification_reason: string
|
|
59
59
|
framework_configured: boolean
|
|
60
60
|
preflight:
|
|
@@ -77,6 +77,17 @@ output:
|
|
|
77
77
|
committed_path: string # repo-relative; MUST be git-tracked after the run
|
|
78
78
|
is_new: boolean # true => no prior baseline; auto-captured+committed this run
|
|
79
79
|
baseline_diff_pct: number | null # null for non-playwright frameworks
|
|
80
|
+
console_errors: # REQUIRED for playwright (empty array on a clean run);
|
|
81
|
+
- test_name: string # null/omitted for frameworks without console capture
|
|
82
|
+
type: 'console' | 'pageerror' | 'requestfailed'
|
|
83
|
+
text: string
|
|
84
|
+
a11y: # REQUIRED for playwright; null/omitted otherwise
|
|
85
|
+
scanned_pages: string[]
|
|
86
|
+
violations:
|
|
87
|
+
- rule: string # axe rule id (e.g. color-contrast)
|
|
88
|
+
impact: string # critical | serious | moderate | minor
|
|
89
|
+
page: string
|
|
90
|
+
ai_checks: 'ran' | 'unavailable' | null # maestro only — AI assertion availability (see agent body)
|
|
80
91
|
user_interactions: [{question, answer}]
|
|
81
92
|
tech_stack_reconciliation:
|
|
82
93
|
db_framework: string | null
|
|
@@ -177,12 +188,32 @@ For each failed test, assign exactly one category:
|
|
|
177
188
|
| `auth` | Login-page redirect, 401 after credential submit, `invalid_grant`, `email_not_confirmed` | AskUserQuestion per Step 6.5.3 |
|
|
178
189
|
| `access` | 403/404 on an accessible route, RLS denial text, missing seed data | AskUserQuestion: "Test failed with access error: `{error}`. Options: (1) fix + reply steps, (2) abort." |
|
|
179
190
|
| `flake` | Timeout on first run, passes on immediate retry, network jitter | Retry up to 3 times before reclassifying to `real` |
|
|
180
|
-
| `visual_regression` | `toHaveScreenshot`
|
|
191
|
+
| `visual_regression` | `toHaveScreenshot` / `assertScreenshot` diff exceeded threshold | Do NOT retry. Include baseline + actual paths in `screenshots[]` with `baseline_diff_pct`. Do NOT auto-accept baselines. |
|
|
192
|
+
| `console_error` | Console guard collected console/page/request errors during the flow | App defect — fix in-scope or report; never allowlist without a linked fix task |
|
|
193
|
+
| `a11y` | Axe scan reported WCAG A/AA violations | Do NOT retry. Report rule ids in `a11y.violations`; fix in-scope or surface at `/cbp-verify` |
|
|
181
194
|
| `real` | Assertion failure on app behavior (wrong text, state, navigation) | Attempt fix (selector, timeout, assertion), max 3 attempts, then report |
|
|
182
195
|
|
|
183
196
|
`env`, `auth`, `access` failures MUST NOT count toward `test_results.failed` until
|
|
184
197
|
preflight passes — they block the run instead.
|
|
185
198
|
|
|
199
|
+
## Functional Assertion Mandate
|
|
200
|
+
|
|
201
|
+
Visibility-only specs are NOT sufficient coverage — they prove rendering, not behavior.
|
|
202
|
+
Every spec/flow covering a mutation (create / edit / delete / submit) MUST include at
|
|
203
|
+
least one behavior proof:
|
|
204
|
+
|
|
205
|
+
- **network success proof** — response-status assertion on the mutating call
|
|
206
|
+
(`waitForResponse` in Playwright; `runScript` `http.*` in Maestro), AND/OR
|
|
207
|
+
- **persistence proof** — reload / kill-and-relaunch / direct API re-read showing the
|
|
208
|
+
change survived, PLUS
|
|
209
|
+
- **one error-state test per form/CRUD surface** — inject a failure (`page.route` 500 in
|
|
210
|
+
Playwright) and assert the rendered error UI.
|
|
211
|
+
|
|
212
|
+
When a suite's assertions are entirely visibility/navigation-level, the specialist MUST
|
|
213
|
+
report `critical_issues[]` entry `{type: 'shallow_coverage', ...}` — the run may pass, but
|
|
214
|
+
the gap is flagged for the next round. `cbp-verify-reviewer` treats `shallow_coverage` on a
|
|
215
|
+
mutation surface as a finding, not noise.
|
|
216
|
+
|
|
186
217
|
## Committed-Screenshot Mandate
|
|
187
218
|
|
|
188
219
|
Every eligible e2e run MUST persist relevant screenshots to the framework's committed
|
|
@@ -215,6 +246,11 @@ classify as `visual_regression`. Do NOT auto-update. Surface as a blocking accep
|
|
|
215
246
|
at `/cbp-verify` (round scope). The user must explicitly approve (`--update-snapshots`) or open a
|
|
216
247
|
fix task. This relaxes the prior always-manual contract ONLY for new screens.
|
|
217
248
|
|
|
249
|
+
The model applies to ALL screenshot-capable frameworks, not just Playwright: Maestro gates
|
|
250
|
+
existing baselines with `assertScreenshot` against the committed PNG (the agent never
|
|
251
|
+
retakes/overwrites an existing baseline; acceptance = re-capture + `git add` after user
|
|
252
|
+
approval at `/cbp-verify`).
|
|
253
|
+
|
|
218
254
|
## Screenshot Collection Rule
|
|
219
255
|
|
|
220
256
|
After every run, enumerate all committed PNGs and populate BOTH `screenshots[]` and
|
|
@@ -242,6 +278,11 @@ New-screen auto-capture (above) is the only exception to the always-manual contr
|
|
|
242
278
|
- `tests_run === true`
|
|
243
279
|
- `preflight.*.ok === true` for every required prerequisite
|
|
244
280
|
- Every failure has `category` other than `env`, `auth`, or `access`
|
|
281
|
+
- `console_errors[]` is empty and `a11y.violations[]` is empty (where the framework reports
|
|
282
|
+
them — Playwright always does). Non-empty values with `status: 'completed'` are
|
|
283
|
+
inconsistent and hard-fail `codebyplan e2e verify-round` (`console_errors_reported`,
|
|
284
|
+
`a11y_violations_reported`); either fix in-scope or return `status: 'failed'` with the
|
|
285
|
+
matching failure category.
|
|
245
286
|
|
|
246
287
|
Otherwise return `status: 'failed'`.
|
|
247
288
|
|
|
@@ -17,9 +17,10 @@
|
|
|
17
17
|
# Two jobs:
|
|
18
18
|
# ci SOFT tier (authoritative required check) — the baseline-tolerant
|
|
19
19
|
# inner loop: lint, typecheck, test, build across the repo.
|
|
20
|
-
# ci-strict HARDCORE tier
|
|
21
|
-
# `codebyplan check --scope merged --no-baseline`.
|
|
22
|
-
#
|
|
20
|
+
# ci-strict HARDCORE tier — whole-repo ABSOLUTE GREEN via
|
|
21
|
+
# `codebyplan check --scope merged --no-baseline`. Report-only by
|
|
22
|
+
# default; set `workflow.strict_check_enforced: true` in
|
|
23
|
+
# `.codebyplan/ci.json` to make it a real gate (then enforce-check).
|
|
23
24
|
|
|
24
25
|
name: CI
|
|
25
26
|
|
|
@@ -69,19 +70,21 @@ jobs:
|
|
|
69
70
|
- name: Build
|
|
70
71
|
run: pnpm turbo build
|
|
71
72
|
|
|
72
|
-
# ── HARDCORE strict tier
|
|
73
|
+
# ── HARDCORE strict tier ────────────────────────────────────────────────────
|
|
73
74
|
# Whole-repo ABSOLUTE GREEN: `codebyplan check --scope merged --no-baseline`
|
|
74
75
|
# ignores .check-baseline.json entirely, so ANY failing package (lint,
|
|
75
|
-
# typecheck, test) fails this job. This is the
|
|
76
|
+
# typecheck, test) fails this job. This is the checkpoint→main gate.
|
|
76
77
|
#
|
|
77
|
-
# report-only
|
|
78
|
-
# `
|
|
79
|
-
#
|
|
80
|
-
#
|
|
78
|
+
# report-only vs enforced is driven by `.codebyplan/ci.json`
|
|
79
|
+
# `workflow.strict_check_enforced` (scaffold-ci-workflow substitutes the
|
|
80
|
+
# tokens below): when false (default) the job name carries " (report-only)"
|
|
81
|
+
# and `continue-on-error: true` keeps it non-blocking; when true the suffix is
|
|
82
|
+
# dropped and `continue-on-error: false` makes it a real gate. Only flip the
|
|
83
|
+
# flag once the whole repo is absolute-green AND the job has run green in CI,
|
|
84
|
+
# then add it to branch protection via `codebyplan ci enforce-check`.
|
|
81
85
|
ci-strict:
|
|
82
|
-
name: Strict whole-repo green
|
|
83
|
-
runs-on: ubuntu-latest
|
|
84
|
-
continue-on-error: true
|
|
86
|
+
name: Strict whole-repo green{{STRICT_NAME_SUFFIX}}
|
|
87
|
+
runs-on: ubuntu-latest{{STRICT_CONTINUE_ON_ERROR_LINE}}
|
|
85
88
|
steps:
|
|
86
89
|
- name: Checkout
|
|
87
90
|
uses: actions/checkout@v4
|
|
@@ -112,10 +115,12 @@ jobs:
|
|
|
112
115
|
# In the monorepo run the freshly-built bundle directly (the bin shim may
|
|
113
116
|
# be missing because dist/cli.js did not exist at install time); in a
|
|
114
117
|
# consumer repo that path is absent, so fall back to the installed bin.
|
|
118
|
+
# --concurrency=1 serializes turbo so the whole-repo matrix does not
|
|
119
|
+
# CPU-starve timing-sensitive test suites into flaky timeouts on the runner.
|
|
115
120
|
- name: Strict check (no baseline)
|
|
116
121
|
run: |
|
|
117
122
|
if [ -f packages/codebyplan-package/dist/cli.js ]; then
|
|
118
|
-
node packages/codebyplan-package/dist/cli.js check --scope merged --no-baseline
|
|
123
|
+
node packages/codebyplan-package/dist/cli.js check --scope merged --no-baseline --concurrency=1
|
|
119
124
|
else
|
|
120
|
-
pnpm exec codebyplan check --scope merged --no-baseline
|
|
125
|
+
pnpm exec codebyplan check --scope merged --no-baseline --concurrency=1
|
|
121
126
|
fi
|
|
@@ -73,6 +73,27 @@ The sole exception is `vscode-test`: the committed dir may be empty when the ext
|
|
|
73
73
|
has no visual output (behavior-only tests). Agents must still define the dir and report
|
|
74
74
|
`e2e_gallery: []` explicitly — not omit the field.
|
|
75
75
|
|
|
76
|
+
## Quality-Capture Mandates
|
|
77
|
+
|
|
78
|
+
A green run that captured no quality signals is not evidence. Per framework:
|
|
79
|
+
|
|
80
|
+
- **playwright**: every spec imports `test` from the shared quality fixture
|
|
81
|
+
(`e2e/fixtures.ts`) — console/pageerror guard auto-active in every test; one axe WCAG A/AA
|
|
82
|
+
scan per page state. The output MUST carry `console_errors[]` (empty on clean) and `a11y`
|
|
83
|
+
per `context/testing/e2e.md`.
|
|
84
|
+
- **maestro**: existing committed screenshots are baselines — gate them with
|
|
85
|
+
`assertScreenshot` (never retake/overwrite); run `assertNoDefectsWithAI` with
|
|
86
|
+
`optional: false` at primary states when Maestro auth is available (the default
|
|
87
|
+
`optional: true` is warn-only and forbidden; record `ai_checks: 'unavailable'` when auth
|
|
88
|
+
is absent).
|
|
89
|
+
- A `status: 'completed'` output carrying non-empty `console_errors[]` or
|
|
90
|
+
`a11y.violations[]` is inconsistent — `codebyplan e2e verify-round` hard-fails it
|
|
91
|
+
(`console_errors_reported`, `a11y_violations_reported`).
|
|
92
|
+
|
|
93
|
+
Mutation flows MUST carry a behavior proof per `context/testing/e2e.md` § Functional
|
|
94
|
+
Assertion Mandate (network success proof / persistence proof / error-state test);
|
|
95
|
+
visibility-only suites are flagged `{type: 'shallow_coverage'}` in `critical_issues[]`.
|
|
96
|
+
|
|
76
97
|
## Cross-References
|
|
77
98
|
|
|
78
99
|
- `context/testing/e2e.md` — Input/Output contract, pre-flight loop, failure classification,
|
|
@@ -47,13 +47,21 @@ The branch model is **feat→main direct**; `.codebyplan/git.json` has `integrat
|
|
|
47
47
|
IS the per-checkpoint feat branch. The hardcore tier runs against that feat branch's merged
|
|
48
48
|
state before it lands on main; do not assume a staging/integration hop exists.
|
|
49
49
|
|
|
50
|
-
##
|
|
50
|
+
## Strict-Tier Enforcement (report-only ⇄ enforced)
|
|
51
51
|
|
|
52
|
-
The whole-repo hardcore CI **job**
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
52
|
+
The whole-repo hardcore CI **job** (`ci-strict`) is config-driven via `.codebyplan/ci.json`
|
|
53
|
+
`workflow.strict_check_enforced`, which `codebyplan ci scaffold-workflow` substitutes into the
|
|
54
|
+
generated `.github/workflows/ci.yml`:
|
|
55
|
+
|
|
56
|
+
- **`false` (default)** — report-only: the job carries the " (report-only)" name suffix and
|
|
57
|
+
`continue-on-error: true`, so `--scope merged --no-baseline` is advisory in CI — surfaced, not
|
|
58
|
+
enforced. A repo whose baseline is still red keeps merging while it pays the baseline down.
|
|
59
|
+
- **`true`** — enforced: the suffix is dropped and `continue-on-error` is omitted (defaults to
|
|
60
|
+
`false`), making the job a real gate. Flip ONLY after the whole repo is absolute-green AND the
|
|
61
|
+
job has already run green in CI, then wire the check name `Strict whole-repo green` into branch
|
|
62
|
+
protection via `codebyplan ci enforce-check --check-name "Strict whole-repo green"`.
|
|
63
|
+
|
|
64
|
+
Locally, `cbp-verify` runs and reports the same check regardless of the flag.
|
|
57
65
|
|
|
58
66
|
## Cross-References
|
|
59
67
|
|