qfai 1.8.2 → 1.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +9 -4
  2. package/assets/init/.qfai/assistant/agents/product-experience-architect.md +2 -1
  3. package/assets/init/.qfai/assistant/skills/qfai-atdd/SKILL.md +4 -4
  4. package/assets/init/.qfai/assistant/skills/qfai-configure/SKILL.md +1 -1
  5. package/assets/init/.qfai/assistant/skills/qfai-discussion/SKILL.md +1 -0
  6. package/assets/init/.qfai/assistant/skills/qfai-discussion/references/rcp_footer.md +1 -1
  7. package/assets/init/.qfai/assistant/skills/qfai-implement/SKILL.md +3 -1
  8. package/assets/init/.qfai/assistant/skills/qfai-prototyping/SKILL.md +121 -62
  9. package/assets/init/.qfai/assistant/skills/qfai-prototyping/references/evidence-requirements.md +43 -12
  10. package/assets/init/.qfai/assistant/skills/qfai-prototyping/references/iteration-cycle.md +46 -14
  11. package/assets/init/.qfai/assistant/skills/qfai-prototyping/references/l1-review-guide.md +13 -12
  12. package/assets/init/.qfai/assistant/skills/qfai-prototyping/references/l2-review-guide.md +16 -10
  13. package/assets/init/.qfai/assistant/skills/qfai-prototyping/references/reviewer-gate.md +25 -4
  14. package/assets/init/.qfai/assistant/skills/qfai-sdd/SKILL.md +3 -3
  15. package/assets/init/.qfai/assistant/skills/qfai-sdd/references/rcp_footer.md +1 -1
  16. package/assets/init/.qfai/assistant/skills/qfai-sdd/references/sdd-quality-gate.md +1 -1
  17. package/assets/init/.qfai/assistant/skills/qfai-sdd/templates/contracts/absorption-policy.sample.yaml +7 -0
  18. package/assets/init/.qfai/assistant/skills/qfai-sdd/templates/contracts/evaluation-rubric.sample.yaml +20 -3
  19. package/assets/init/.qfai/assistant/skills/qfai-sdd/templates/contracts/evaluator-calibration.sample.yaml +6 -0
  20. package/assets/init/.qfai/assistant/skills/qfai-sdd/templates/contracts/exploration-brief.sample.yaml +9 -0
  21. package/assets/init/.qfai/assistant/skills/qfai-verify/SKILL.md +6 -6
  22. package/assets/init/.qfai/contracts/design/README.md +6 -1
  23. package/assets/init/.qfai/contracts/ui/README.md +2 -0
  24. package/assets/init/.qfai/discussion/README.md +14 -9
  25. package/assets/init/.qfai/evidence/README.md +66 -46
  26. package/assets/init/root/.github/workflows/qfai-validate.yml +39 -0
  27. package/assets/init/root/qfai.config.yaml +1 -2
  28. package/dist/cli/index.cjs +2539 -927
  29. package/dist/cli/index.cjs.map +1 -1
  30. package/dist/cli/index.mjs +2624 -1012
  31. package/dist/cli/index.mjs.map +1 -1
  32. package/dist/index.cjs +1120 -421
  33. package/dist/index.cjs.map +1 -1
  34. package/dist/index.d.cts +95 -23
  35. package/dist/index.d.ts +95 -23
  36. package/dist/index.mjs +1114 -414
  37. package/dist/index.mjs.map +1 -1
  38. package/package.json +3 -2
  39. package/assets/scripts/capture-screenshots.js +0 -128
@@ -2,10 +2,12 @@
2
2
 
3
3
  L1 checks implementation fidelity.
4
4
 
5
- ## Inputs
5
+ ## Inputs (read from review-bundle.json)
6
6
 
7
- - screenshots
8
- - HTML snapshots
7
+ - screenshots (round/candidate path, per declared screen)
8
+ - HTML snapshots (round/candidate path, per declared screen)
9
+ - accessibility snapshots (round/candidate path, per declared screen)
10
+ - Playwright CLI command log (round/candidate path, per declared screen)
9
11
  - canonical UI contracts from `.qfai/contracts/ui/*.yaml`
10
12
  - latest code state
11
13
 
@@ -13,24 +15,23 @@ L1 checks implementation fidelity.
13
15
 
14
16
  For each declared screen:
15
17
 
16
- - the screen is reachable/rendered
17
- - screenshot exists
18
- - HTML snapshot exists
19
- - required elements are visibly present
20
- - required actions are wired or explicitly marked missing
18
+ - the screen is reachable/rendered (confirm via goto log in command log)
19
+ - screenshot, HTML, accessibility snapshot, and command log all exist at the round/candidate path
20
+ - required elements are visibly present (cross-check screenshot + HTML + snapshot)
21
+ - required actions are wired or explicitly marked missing (cross-check interaction commands in the command log vs `primaryTasks`)
21
22
  - blocking UI failures are identified
22
23
 
23
24
  ## Failure handling
24
25
 
25
- - Missing screenshot or HTML => score `0`, rerun required
26
+ - Missing any of the 4 per-screen artifacts => score `0`, rerun required
26
27
  - Missing primary action wiring => blocking finding
27
28
  - Severe route/render failure => blocking finding
28
29
 
29
30
  ## Output
30
31
 
31
- Return:
32
+ Write `evaluator-reviews/<candidate-id>.json` with:
32
33
 
33
34
  - per-screen findings
34
35
  - blocking/immediate-fix classification
35
- - a numeric score per axis in the range `0.0..1.0`
36
- - rationale tied to screenshot/HTML evidence
36
+ - a numeric score per axis in the range `0..100`
37
+ - rationale tied to screenshot / HTML / snapshot / command log refs (all entries in `evidenceRefs[]` MUST be concrete paths to existing artifacts)
@@ -2,14 +2,19 @@
2
2
 
3
3
  L2 checks product experience and design alignment.
4
4
 
5
- ## Inputs
6
-
7
- - screenshots
8
- - HTML snapshots
9
- - `.qfai/contracts/design/evaluation-axes.yaml`
10
- - `.qfai/contracts/design/anchor-selection.yaml`
5
+ ## Inputs (read from review-bundle.json)
6
+
7
+ - screenshots (round/candidate path, per declared screen)
8
+ - HTML snapshots (round/candidate path, per declared screen)
9
+ - accessibility snapshots (round/candidate path, per declared screen)
10
+ - Playwright CLI command log (round/candidate path, per declared screen)
11
+ - `.qfai/contracts/design/evaluation-rubric.yaml`
12
+ - `.qfai/contracts/design/selected-direction.yaml`
11
13
  - `.qfai/contracts/design/design-system.yaml`
12
- - previous iteration score
14
+ - legacy inputs, if present (skip if absent):
15
+ - `.qfai/contracts/design/evaluation-axes.yaml`
16
+ - `.qfai/contracts/design/anchor-selection.yaml`
17
+ - previous round score
13
18
 
14
19
  ## 3-layer evaluation family
15
20
 
@@ -27,13 +32,14 @@ L2 must explicitly use all of:
27
32
  - product-specific differentiation is visible
28
33
  - selected anchor direction is reflected in the current UI
29
34
  - design system checklist is respected
35
+ - interaction outcomes in the command log are consistent with the experience the designer intended
30
36
  - experience findings are recorded separately from blocking L1 findings
31
37
 
32
38
  ## Output
33
39
 
34
- Return:
40
+ Write to `evaluator-reviews/<candidate-id>.json` with:
35
41
 
36
42
  - per-axis findings
37
43
  - revise/manual-review classification
38
- - a numeric score per axis in the range `0.0..1.0`
39
- - rationale tied to screenshot/HTML evidence and axis refs
44
+ - a numeric score per axis in the range `0..100`
45
+ - rationale tied to screenshot / HTML / snapshot / command log refs and axis refs (all entries in `evidenceRefs[]` MUST be concrete paths to existing artifacts)
@@ -1,15 +1,26 @@
1
1
  # Reviewer Gate
2
2
 
3
- The reviewer is an independent gate, not the implementation author.
3
+ The reviewer is an independent gate, not the implementation author. The reviewer gate applies identically to all modes (spec-0012); modes differ only in `maxCycles`.
4
4
 
5
5
  ## Reviewer must verify
6
6
 
7
- - all declared screens have screenshot evidence
8
- - all declared screens have HTML snapshot evidence
7
+ - all declared screens have all 4 per-screen artifacts for every active candidate in every round (screenshot, HTML, accessibility snapshot, command log)
8
+ - canonical latest paths mirror the newest accepted winner/polish state
9
+ - every round has `command-plans.json`, `review-bundle.json`, and per-candidate evaluator reviews
10
+ - `review-bundle.json` contains all required fields (candidates, axisDefs, designSystemChecklist, commandPlanRef)
11
+ - evaluator review `evidenceRefs[]` entries are concrete artifact refs (no placeholders)
9
12
  - L1 and L2 evaluators used the required inputs
10
13
  - the 3-layer evaluation family was referenced
11
14
  - missing evidence triggered rerun rather than waiver
12
- - `qfai validate --fail-on error` passed
15
+ - `qfai validate --profile prototyping --fail-on error` passed
16
+ - `prototyping.json` `maxCycles` matches the mode (no mode invariant violations)
17
+ - winner_selected is true
18
+ - post_selection_polish_completed is true
19
+ - breakthrough_checked is true
20
+ - best_of_history_present is true
21
+ - all_reviewer_axes_perfect_100 is true
22
+ - completion_eligible is true only after the completion certificate is valid
23
+ - no completion claim is based on a 95-point threshold
13
24
 
14
25
  ## Reviewer output
15
26
 
@@ -21,4 +32,14 @@ Required fixes:
21
32
  - ...
22
33
  Evidence checked:
23
34
  - ...
35
+ Gate fields:
36
+ - mode: low-cost|standard|full-harness
37
+ - maxCycles: <number matching mode>
38
+ - winner_selected: true|false
39
+ - post_selection_polish_completed: true|false
40
+ - breakthrough_checked: true|false
41
+ - best_of_history_present: true|false
42
+ - all_reviewer_axes_perfect_100: true|false
43
+ - completion_eligible: true|false
44
+ - completion_certificate_valid: true|false
24
45
  ```
@@ -200,7 +200,7 @@ Follow `.qfai/assistant/instructions/shared-skill-operating-baseline.md#delta-re
200
200
  - `05_Examples.md` must include `EX-ID` and `BR-Ref` mappings.
201
201
  - `06_Test-Cases.md` must include `TC-ID`, `EX-Ref`, `AC-Refs`, and `Type`.
202
202
  - `06_Test-Cases.md` quality depth must include normal-path plus error or boundary coverage.
203
- - Do not complete the stage until `qfai validate --fail-on error --format github | tee .qfai/report/validate.log` exits with `error=0`.
203
+ - Do not complete the stage until `qfai validate --profile sdd --fail-on error --format github | tee .qfai/report/validate.log` exits with `error=0`.
204
204
  - Reference direction rules from `.qfai/specs/README.md` must be enforced.
205
205
  - Keep `specs/` definition-only and operational status under `.qfai/report/run-*`.
206
206
  - Traceability depth and density-smell review rules live in:
@@ -246,7 +246,7 @@ The canonical file set is defined by skill templates under `.qfai/assistant/skil
246
246
  8. Execute Phase 2 (Slice) and pass slice gate for each target spec.
247
247
  9. Execute Phase 3 (Plan finalize) after at least one slice gate passes.
248
248
  10. Execute Phase 4 (Delta update).
249
- 11. Run `qfai validate --fail-on error --format github | tee .qfai/report/validate.log`.
249
+ 11. Run `qfai validate --profile sdd --fail-on error --format github | tee .qfai/report/validate.log`.
250
250
  12. Review `.qfai/report/specs-coverage/spec-*.md` and triage density-smell warnings.
251
251
  13. If validate fails, fix source-layer artifacts and repeat until `error=0`.
252
252
 
@@ -314,7 +314,7 @@ When declaring DONE, include:
314
314
  - [ ] `10_Plan.md` is finalized as How-only.
315
315
  - [ ] `specs/plan.md` was not created.
316
316
  - [ ] `09_delta.md` (or `*_delta.md`) contains adoption/rejection rationale.
317
- - [ ] `qfai validate --fail-on error --format github` ran and produced `error=0`.
317
+ - [ ] `qfai validate --profile sdd --fail-on error --format github` ran and produced `error=0`.
318
318
  - [ ] `.qfai/report/specs-coverage/spec-*.md` was reviewed.
319
319
  - [ ] Quality gate checks are recorded in evidence.
320
320
  - [ ] Evidence file exists and is complete.
@@ -31,7 +31,7 @@
31
31
 
32
32
  ## Validate Hard Gate(必須)
33
33
 
34
- - 各 review cycle で `qfai validate --fail-on error --format github` を実行していること
34
+ - 各 review cycle で `qfai validate --profile sdd --fail-on error --format github` を実行していること
35
35
  - `.qfai/report/validate.log` が存在し、最新の成果物に対応していること
36
36
 
37
37
  ---
@@ -20,7 +20,7 @@ Use this file for the full quality gate checklist behind `/qfai-sdd`.
20
20
 
21
21
  ## Validation Checks
22
22
 
23
- - `qfai validate --fail-on error --format github | tee .qfai/report/validate.log`
23
+ - `qfai validate --profile sdd --fail-on error --format github | tee .qfai/report/validate.log`
24
24
  - `error=0`
25
25
  - `.qfai/report/specs-coverage/spec-*.md` reviewed
26
26
  - Density-smell warnings triaged
@@ -0,0 +1,7 @@
1
+ # Downstream absorption policy generated by /qfai-sdd
2
+ minAbsorptionsPerSurvivor: 2
3
+ require_rejected_reason: true
4
+ allow_adapt_required: true
5
+ coherence_review:
6
+ block_on_regression_alert: true
7
+ block_on_blocking_findings: true
@@ -8,9 +8,26 @@ axes:
8
8
  weight: 1
9
9
  - id: functionality
10
10
  weight: 1
11
- hard_floors:
12
- - functionality
13
- - accessibility-risk
11
+ - id: accessibility-risk
12
+ weight: 1
13
+ - id: implementation-plausibility
14
+ weight: 1
14
15
  weighted_axes:
15
16
  - design-quality
16
17
  - originality
18
+ hard_floors:
19
+ - id: functionality
20
+ min_score: 80
21
+ - id: accessibility-risk
22
+ min_score: 80
23
+ - id: conceptFit
24
+ min_score: 85
25
+ absorbable_categories:
26
+ - layout
27
+ - interaction
28
+ - content-hierarchy
29
+ - visual-language
30
+ - navigation
31
+ - motion
32
+ coherence:
33
+ regression_threshold: 5
@@ -1,9 +1,15 @@
1
1
  # Downstream evaluator calibration generated by /qfai-sdd
2
2
  good_critique_examples:
3
3
  - Skeptical, specific, and actionable feedback
4
+ - Names which candidate strengths should be harvested and why
4
5
  too_lenient_examples:
5
6
  - Praise that ignores blandness
6
7
  blandness_fail_examples:
7
8
  - Technically correct but generic
8
9
  originality_fail_examples:
9
10
  - Library defaults with no product-specific decisions
11
+ concept_fit_fail_examples:
12
+ - A refinement that improves novelty but breaks the exploration brief anchors
13
+ coherence_regression_red_flags:
14
+ - Newly absorbed ideas overpower the selected concept
15
+ - Navigation or hierarchy changes make the surface feel like a different product
@@ -8,3 +8,12 @@ brand_signals:
8
8
  - Confident
9
9
  differentiation_targets:
10
10
  - Avoid generic dashboard defaults
11
+ parallel_candidate_routing: required
12
+ concept_anchors:
13
+ - id: CONCEPT-0001
14
+ statement: Calm operational confidence
15
+ - id: CONCEPT-0002
16
+ statement: Guided depth without clutter
17
+ non_goals:
18
+ - Loud novelty-first visuals
19
+ - Route switching that depends on hidden runtime state
@@ -95,7 +95,7 @@ Use the shared schema.
95
95
  - Follow `.qfai/assistant/instructions/shared-skill-delegation-baseline.md#reviewer-gate-baseline`.
96
96
  - Reviewer checks:
97
97
  - required roles were delegated;
98
- - validate evidence exists: `qfai validate --fail-on error` completed with `error=0`;
98
+ - validate evidence exists: `qfai validate --profile verify --fail-on error` completed with `error=0`;
99
99
  - declared screens have mandatory screenshot and HTML evidence under `.qfai/evidence/prototyping/`;
100
100
  - Drift Protocol enforced;
101
101
  - test-layer policy enforced against `test-layers.md`.
@@ -132,7 +132,7 @@ Follow `.qfai/assistant/instructions/shared-skill-operating-baseline.md#delta-re
132
132
  - `.qfai/evidence/` is intentionally NOT tracked by Git (it ships with a local `.gitignore`).
133
133
  - Do NOT commit evidence files; summarize key outcomes in the PR description instead.
134
134
  - You MUST run the mandatory checks listed below and record outcomes.
135
- - In CI, you MUST keep QFAI validation on default/full mode (`qfai validate --fail-on error`). Do NOT use `--phase refinement`.
135
+ - In CI, you MUST keep QFAI validation on full-scan mode (`qfai validate --profile verify --fail-on error` or default `qfai validate --fail-on error`). Do NOT use partial profiles.
136
136
  - Waivers are only for `warning` / `info` findings. If a waiver attempts to suppress an `error`, treat it as a failure and fix the root cause.
137
137
  - You MUST stop and escalate if any gate fails without an actionable fix list.
138
138
  - Completion must be approved by a reviewer who did not run the gates.
@@ -148,7 +148,7 @@ Run quality gates and produce evidence that the change is correct and safe.
148
148
  ## Success Criteria (Definition of Done)
149
149
 
150
150
  - Repo quality gates PASS (format/lint/type/test/build/etc).
151
- - QFAI checks PASS (at minimum: `qfai validate`, and optionally `qfai report`).
151
+ - QFAI checks PASS (at minimum: `qfai validate --profile verify`, and optionally `qfai report`).
152
152
  - Declared screens have mandatory screenshot and HTML evidence.
153
153
  - A concise evidence summary exists (copy‑paste for PR).
154
154
  - The PR-ready summary includes **Change Classification (Primary/Tags)** per `.qfai/assistant/instructions/change-classification.md`.
@@ -341,12 +341,12 @@ If unknown, propose defaults and mark assumptions.
341
341
 
342
342
  Run (adjust as needed):
343
343
 
344
- - `qfai validate --fail-on error`
344
+ - `qfai validate --profile verify --fail-on error`
345
345
  - `qfai report` (if used in this repo)
346
346
 
347
347
  Notes:
348
348
 
349
- - CI must run default/full validation only. `--phase refinement` is local-only.
349
+ - CI must run default/full validation only. Partial profiles are local skill checks only.
350
350
  - If `QFAI-WAIVER-002` appears, remove the invalid waiver and resolve the underlying `error` finding.
351
351
 
352
352
  Capture:
@@ -454,7 +454,7 @@ Evidence must include:
454
454
  1. QFAI validation:
455
455
 
456
456
  ```bash
457
- qfai validate --fail-on error
457
+ qfai validate --profile verify --fail-on error
458
458
  ```
459
459
 
460
460
  2. Repository standard gates (discover from package.json/CI/docs):
@@ -6,6 +6,8 @@ Provide the downstream execution truth for exploration-first prototyping and fin
6
6
 
7
7
  These files are version-managed and may be read directly by `/qfai-prototyping`, `/qfai-implement`, `/qfai-atdd`, and `qfai validate`.
8
8
 
9
+ > **Prototyping harness (spec-0012)**: `evaluation-rubric.yaml` is the source of evaluator axes, absorbable categories, and concept-fit hard floors. `absorption-policy.yaml` defines minimum absorption and curation expectations between rounds. `design-system.yaml` remains the downstream checklist for winner extraction and polish.
10
+
9
11
  ## Status After Init
10
12
 
11
13
  After `qfai init`, this directory contains only this README. This is the normal initial state. `/qfai-sdd` creates design files when a UI-bearing capability is normalized for downstream execution.
@@ -17,8 +19,9 @@ The absence of design files is not a defect for non-UI capabilities. For UI-bear
17
19
  Typical files:
18
20
 
19
21
  - `exploration-brief.yaml` — machine-readable exploration brief generated from discussion
20
- - `evaluation-rubric.yaml` — machine-readable evaluator rubric with weighted originality/design criteria
22
+ - `evaluation-rubric.yaml` — machine-readable evaluator rubric with weighted axes, hard floors, and absorbable categories
21
23
  - `evaluator-calibration.yaml` — evaluator alignment examples and anti-leniency guidance
24
+ - `absorption-policy.yaml` — round-to-round absorption thresholds and curation rules
22
25
  - `selected-direction.yaml` — current winning direction, rationale, and carry-forward rules
23
26
  - `design-system.yaml` — extracted final design system produced after direction convergence
24
27
  - `design-tokens*.yaml` — optional token definitions
@@ -28,6 +31,7 @@ Typical files:
28
31
  - `exploration-brief.yaml`
29
32
  - `evaluation-rubric.yaml`
30
33
  - `evaluator-calibration.yaml`
34
+ - `absorption-policy.yaml`
31
35
  - `selected-direction.yaml`
32
36
  - `design-system.yaml`
33
37
  - `design-tokens.yaml`
@@ -38,3 +42,4 @@ Typical files:
38
42
  - **Not** a replacement for specs or UI contracts
39
43
  - **Not** an excuse for downstream skills to read discussion-side artifacts directly
40
44
  - **Not** a place to finalize a winner before prototyping convergence
45
+ - **Not** a place to store round evidence; that belongs under `.qfai/evidence/prototyping/`
@@ -7,6 +7,8 @@ The contract must describe screen structure, action coverage targets, and stable
7
7
 
8
8
  > **Note:** UI contracts are the downstream execution truth for screen obligations. `/qfai-sdd` may derive them from discussion-side exploration, but `/qfai-prototyping`, `/qfai-implement`, and `/qfai-atdd` must read `contracts/ui/*.yaml` instead of reading `discussion-*/uiux/40_screen_contracts.md` directly.
9
9
 
10
+ > **Prototyping harness (spec-0012)**: `screens[].id`, `screens[].route`, and `screens[].primary_tasks` (snake_case in YAML; surfaced as `primaryTasks` in the parsed `CanonicalScreenContract`) feed the round-based command-plan builders consumed by the AI evaluator sub-agent. Changes to screen IDs or routes must propagate to `.qfai/evidence/prototyping/rounds/<rN>/candidates/<candidate-id>/<screen-id>.*` evidence.
11
+
10
12
  ## File rules
11
13
 
12
14
  - File name: `ui-XXXX-<slug>.yaml`
@@ -123,11 +123,15 @@ UI-bearing discussion packs (`ui_bearing: true`) may include a `prototyping.yaml
123
123
 
124
124
  ### Canonical namespaced schema (when present)
125
125
 
126
+ The three modes (`low-cost`, `standard`, `full-harness`) share the same evidence obligations and validator gates; the only mode-dependent value is `maxCycles` (1 / 3 / 20). Choose `recommended_mode` based on the project's iteration budget, and list every mode the project allows under `allowed_modes`.
127
+
126
128
  ```yaml
127
129
  prototyping:
128
- recommended_mode: full-harness
129
- rationale: Exploration-first prototyping requires the full-harness runtime loop in packages/qfai.
130
+ recommended_mode: standard
131
+ rationale: Standard mode (maxCycles=3) matches our review cadence.
130
132
  allowed_modes:
133
+ - low-cost
134
+ - standard
131
135
  - full-harness
132
136
  surface: web
133
137
  ```
@@ -136,18 +140,19 @@ prototyping:
136
140
 
137
141
  If you create this artifact, populate all 4 fields.
138
142
 
139
- | Field | Required | Description |
140
- | ------------------ | -------- | ---------------------------------------------- |
141
- | `recommended_mode` | yes | `full-harness` |
142
- | `rationale` | yes | Non-empty string explaining the recommendation |
143
- | `allowed_modes` | yes | Unique array; must contain only `full-harness` |
144
- | `surface` | yes | `web`, `mobile`, `desktop`, or `mixed` |
143
+ | Field | Required | Description |
144
+ | ------------------ | -------- | -------------------------------------------------------------------------- |
145
+ | `recommended_mode` | yes | One of `low-cost`, `standard`, `full-harness` |
146
+ | `rationale` | yes | Non-empty string explaining the recommendation |
147
+ | `allowed_modes` | yes | Unique non-empty array drawn from `low-cost` / `standard` / `full-harness` |
148
+ | `surface` | yes | `web`, `mobile`, `desktop`, or `mixed` |
145
149
 
146
150
  ### Current behavior
147
151
 
148
152
  - Current discussion-pack readiness does not block on missing `prototyping.yaml`.
149
153
  - When `prototyping.yaml` is present, prefer the canonical namespaced schema under the `prototyping:` key.
150
- - `recommended_mode` should be included in `allowed_modes`. In packages/qfai, this means `recommended_mode` should be `full-harness` and `allowed_modes` should contain only `full-harness`.
154
+ - `recommended_mode` MUST be included in `allowed_modes`.
155
+ - Mode invariant (spec-0017 REQ-0001): the three modes share obligations except for `maxCycles` (1 for `low-cost`, 3 for `standard`, 20 for `full-harness`). Picking a different mode does not relax any other gate.
151
156
 
152
157
  ## Suggested naming
153
158
 
@@ -3,75 +3,95 @@
3
3
  ## Purpose
4
4
 
5
5
  Evidence files record what was actually executed and observed.
6
- `packages/qfai` v1.7.15 treats prototyping as `full-harness` only and UI-only.
6
+ `packages/qfai` treats prototyping as a Playwright CLI + AI evaluator harness with unified obligations across all modes (spec-0012).
7
7
 
8
8
  ## Prototyping artifacts
9
9
 
10
- Canonical files:
10
+ Round-scoped artifacts (for each round `<rN>`):
11
11
 
12
- - `.qfai/evidence/prototyping.md`
13
- - `.qfai/evidence/prototyping.json`
14
- - `.qfai/evidence/render.json`
15
- - `.qfai/evidence/browser-qa.json`
16
- - `.qfai/evidence/fullHarness.exit.json`
17
- - `.qfai/evidence/fullHarness.handoff.json`
18
- - `.qfai/evidence/fullHarness.fakeUiDetection.json`
12
+ - `.qfai/evidence/prototyping/rounds/<rN>/command-plans.json` — candidate-aware Playwright CLI command plans
13
+ - `.qfai/evidence/prototyping/rounds/<rN>/review-bundle.json` — evaluator input bundle
14
+ - `.qfai/evidence/prototyping/rounds/<rN>/evaluator-reviews/<candidate-id>.json` — evaluator output per candidate
15
+ - `.qfai/evidence/prototyping/rounds/<rN>/harvest.json` — harvest template for `r5|r3|r2`
16
+ - `.qfai/evidence/prototyping/rounds/<rN>/narrow-decision.json` — survivor decision for `r5|r3|r2`
17
+ - `.qfai/evidence/prototyping/rounds/<rN>/absorption-plan.json` — absorption plan for `r3|r2|r1`
18
+ - `.qfai/evidence/prototyping/rounds/<rN>/reimplementation.json` — reimplementation record for `r3|r2|r1`
19
+ - `.qfai/evidence/prototyping/rounds/<rN>/candidates/<candidate-id>/<screen-id>.png` — screenshot per declared screen
20
+ - `.qfai/evidence/prototyping/rounds/<rN>/candidates/<candidate-id>/<screen-id>.html` — HTML snapshot per declared screen
21
+ - `.qfai/evidence/prototyping/rounds/<rN>/candidates/<candidate-id>/<screen-id>.snapshot.txt` — accessibility snapshot per declared screen
22
+ - `.qfai/evidence/prototyping/rounds/<rN>/candidates/<candidate-id>/<screen-id>.commands.json` — executed command log per declared screen
23
+
24
+ Cross-round rollups:
25
+
26
+ - `.qfai/evidence/prototyping.json` — `rounds[]` / `polishCycles[]` rollup with best-of-history / breakthrough / reviewer gate sections
27
+ - `.qfai/evidence/prototyping.md` — reviewer-readable summary
28
+ - `.qfai/evidence/breakthrough.json` — breakthrough decisions
29
+
30
+ Canonical latest paths (mirror the newest accepted winner/polish state):
31
+
32
+ - `.qfai/evidence/prototyping/screenshots/<screen-id>.png`
33
+ - `.qfai/evidence/prototyping/html/<screen-id>.html`
19
34
 
20
35
  ## Execution contract
21
36
 
22
37
  Supported prototyping surfaces are `web`, `mobile`, `desktop`, and `mixed`.
23
38
  `cli`, API-only, backend-only, and `ui_bearing: false` classifications are not prototyping execution targets.
24
39
 
25
- ## Obligation matrix
40
+ Browser tool: `playwright-cli` (the only supported value per spec-0012).
26
41
 
27
- | surface / mode | specs | runtimeGate | uiFidelity | render evidence | browser QA | fullHarness |
28
- | ---------------------- | -------- | ----------- | ---------- | --------------- | ---------- | ----------- |
29
- | web / full-harness | required | required | required | required | required | required |
30
- | mobile / full-harness | required | required | required | required | required | required |
31
- | desktop / full-harness | required | required | required | required | required | required |
32
- | mixed / full-harness | required | required | required | required | required | required |
42
+ ## Obligation matrix (spec-0012)
33
43
 
34
- `low-cost` and `standard` are unsupported in `packages/qfai` v1.7.15.
44
+ Mode invariant: every row below is identical except for the `maxCycles` column. `maxCycles` is the only mode-dependent field.
45
+
46
+ | surface / mode | specs | runtimeGate | uiFidelity | playwright evidence | reviewBundle | evaluatorReview | bestOfHistory | breakthrough | reviewerGate | maxCycles |
47
+ | ---------------------- | -------- | ----------- | ---------- | ------------------- | ------------ | --------------- | ------------- | ------------ | ------------ | --------- |
48
+ | web / low-cost | required | required | required | required | required | required | required | required | required | 1 |
49
+ | web / standard | required | required | required | required | required | required | required | required | required | 3 |
50
+ | web / full-harness | required | required | required | required | required | required | required | required | required | 20 |
51
+ | mobile / low-cost | required | required | required | required | required | required | required | required | required | 1 |
52
+ | mobile / standard | required | required | required | required | required | required | required | required | required | 3 |
53
+ | mobile / full-harness | required | required | required | required | required | required | required | required | required | 20 |
54
+ | desktop / low-cost | required | required | required | required | required | required | required | required | required | 1 |
55
+ | desktop / standard | required | required | required | required | required | required | required | required | required | 3 |
56
+ | desktop / full-harness | required | required | required | required | required | required | required | required | required | 20 |
57
+ | mixed / low-cost | required | required | required | required | required | required | required | required | required | 1 |
58
+ | mixed / standard | required | required | required | required | required | required | required | required | required | 3 |
59
+ | mixed / full-harness | required | required | required | required | required | required | required | required | required | 20 |
60
+
61
+ Choosing a lower mode buys fewer cycles, not a weaker gate.
35
62
 
36
63
  ## Truthfulness rules
37
64
 
38
- - `mode.effective` must be `full-harness`.
39
- - `uiFidelity.mode` must be `interactive`.
40
- - Canonical screen contracts in `discussion-*/uiux/40_screen_contracts.md` are mandatory.
41
- - Browser QA is mandatory per screen.
42
- - Calibration is resolved from `fullHarness.calibrationRef.packPath`; scalar caller overrides are invalid.
43
- - `runtimeGate.evidenceRefs` must contain concrete render/browser QA/spec refs only.
44
- - `specCoverage` refs must use concrete declared refs plus concrete observed refs. Self-reference and synthetic strings are invalid.
65
+ - `mode.effective` must be one of `low-cost`, `standard`, `full-harness`.
66
+ - `maxCycles` must match `PROTOTYPING_MAX_CYCLES[mode]` or `QFAI-PROT-MODE-001` is raised.
67
+ - Browser tool must be `playwright-cli`.
68
+ - `uiFidelity.mode` must be `interactive` (captured via the Playwright CLI command plans).
69
+ - Evidence capture is performed by the AI evaluator sub-agent via the Playwright CLI command plans generated by QFAI.
70
+ - Canonical screen contracts in `.qfai/contracts/ui/*.yaml` are mandatory.
71
+ - evaluator review `evidenceRefs[]` entries must contain concrete artifact refs that point to existing files; placeholders (`""`, `"tbd"`, `"TBD"`) are rejected.
72
+ - Canonical latest paths must mirror the newest accepted winner/polish artifacts.
45
73
  - `mockPaths` is a negative-only ledger. Allowed values are `fail|finding` only.
46
74
 
47
- ## fullHarness semantics
48
-
49
- Required fields:
75
+ ## Prototyping completion gate (spec-0012)
50
76
 
51
- - `enabled = true`
52
- - `runId`
53
- - `calibrationRef.configPath`
54
- - `calibrationRef.packPath`
55
- - `calibrationRef.packVersion`
56
- - `iterationCount`
57
- - `bestIteration`
58
- - `status`
59
- - `reviewerSignoff`
60
- - `reviewerLogs`
61
- - `iterations`
62
- - `scoringTrace`
63
- - `limitations`
77
+ Completion requires all of the following for every mode:
64
78
 
65
- Review semantics:
79
+ - all 4 per-screen artifacts present for every declared screen in the completion round / polish cycle
80
+ - at least one `polish` cycle completed after winner selection
81
+ - `bestOfHistory` present
82
+ - `breakthrough` present
83
+ - independent reviewer gate returned `PASS`
84
+ - every reviewer sub-agent scored every evaluation axis at `100/100`
85
+ - `qfai validate --profile prototyping --fail-on error` passes
66
86
 
67
- - `finalDecision = accepted` -> `reviewerSignoff.status = approved`
68
- - `finalDecision = rejected` -> `reviewerSignoff.status = rejected`
69
- - `finalDecision = abandoned` -> `reviewerSignoff.status = abandoned`
70
- - `reviewerLogs[last].verdict` must align with the final decision and termination semantics.
87
+ If the polish-cycle budget is exhausted before the gate is satisfied, the run does not complete and returns `REVISE`.
71
88
 
72
89
  ## Prohibited patterns
73
90
 
74
- - `low-cost` or `standard` prototyping metadata
91
+ - `browserProvider` or `renderProvider` config keys (rejected per spec-0012)
92
+ - `playwright-mcp` as standard browser tool
93
+ - Node Playwright direct invocation for evidence capture
94
+ - Mode differences other than `maxCycles`
75
95
  - `cli` prototyping execution
76
96
  - self-reference such as `prototyping.json#/runtimeGate`
77
97
  - synthetic refs such as `specs: ...`
@@ -0,0 +1,39 @@
1
+ # QFAI validate CI workflow
2
+ #
3
+ # Generated by `qfai init` (spec-0017 REQ-0009). Runs `qfai validate --profile
4
+ # full --fail-on error` on every push to main/master and every pull request.
5
+ #
6
+ # The `full` profile includes the QFAI-TEST-001 test-todo stub gate, so any
7
+ # `it.todo` / `test.todo` / `describe.todo` in your test suite will fail CI
8
+ # (you can opt out via `validation.testStrategy.forbidTestTodoStubs: false`
9
+ # in qfai.config.yaml).
10
+ #
11
+ # If your project uses pnpm / yarn instead of npm, replace the `npm ci` step
12
+ # with your package manager's install command (e.g. `pnpm install
13
+ # --frozen-lockfile` + `pnpm/action-setup@v4`). `npx qfai` works with any
14
+ # Node package manager.
15
+
16
+ name: qfai validate
17
+
18
+ on:
19
+ push:
20
+ branches: [main, master]
21
+ pull_request:
22
+
23
+ jobs:
24
+ validate:
25
+ name: qfai validate (full profile, fail on error)
26
+ runs-on: ubuntu-latest
27
+ timeout-minutes: 10
28
+ steps:
29
+ - uses: actions/checkout@v4
30
+ - uses: actions/setup-node@v4
31
+ with:
32
+ # Match the QFAI repo's CI baseline (Node 20 LTS) and the package
33
+ # `engines: ">=18.0.0"`. Bump deliberately when QFAI raises its
34
+ # supported floor.
35
+ node-version: "20"
36
+ cache: npm
37
+ - run: npm ci
38
+ - name: qfai validate
39
+ run: npx qfai validate --profile full --fail-on error
@@ -40,5 +40,4 @@ prototyping:
40
40
  packPath: .qfai/evidence/calibration.yaml
41
41
  execution:
42
42
  targetUrl: null
43
- browserProvider: playwright
44
- renderProvider: playwright
43
+ browserTool: playwright-cli