qfai 1.8.0 → 1.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/README.md +1 -1
  2. package/assets/init/.qfai/assistant/agents/frontend-engineer.md +2 -2
  3. package/assets/init/.qfai/assistant/agents/product-experience-architect.md +2 -2
  4. package/assets/init/.qfai/assistant/skills/qfai-atdd/SKILL.md +4 -0
  5. package/assets/init/.qfai/assistant/skills/qfai-configure/SKILL.md +2 -1
  6. package/assets/init/.qfai/assistant/skills/qfai-discussion/SKILL.md +60 -330
  7. package/assets/init/.qfai/assistant/skills/qfai-discussion/templates/14_Review-Request.md +15 -16
  8. package/assets/init/.qfai/assistant/skills/qfai-discussion/templates/uiux/00_index.md +13 -21
  9. package/assets/init/.qfai/assistant/skills/qfai-discussion/templates/uiux/30_exploration_brief.md +29 -0
  10. package/assets/init/.qfai/assistant/skills/qfai-discussion/templates/uiux/31_reference_pool.md +13 -0
  11. package/assets/init/.qfai/assistant/skills/qfai-discussion/templates/uiux/32_design_anti_goals.md +10 -0
  12. package/assets/init/.qfai/assistant/skills/qfai-discussion/templates/uiux/33_exploration_rubric.md +27 -0
  13. package/assets/init/.qfai/assistant/skills/qfai-discussion/templates/uiux/34_evaluator_calibration.md +17 -0
  14. package/assets/init/.qfai/assistant/skills/qfai-discussion/templates/uiux/50_review_input_bundle.md +16 -22
  15. package/assets/init/.qfai/assistant/skills/qfai-implement/SKILL.md +7 -5
  16. package/assets/init/.qfai/assistant/skills/qfai-prototyping/SKILL.md +187 -132
  17. package/assets/init/.qfai/assistant/skills/qfai-prototyping/references/design-system-compliance.md +22 -0
  18. package/assets/init/.qfai/assistant/skills/qfai-prototyping/references/evidence-requirements.md +31 -0
  19. package/assets/init/.qfai/assistant/skills/qfai-prototyping/references/iteration-cycle.md +25 -0
  20. package/assets/init/.qfai/assistant/skills/qfai-prototyping/references/l1-review-guide.md +36 -0
  21. package/assets/init/.qfai/assistant/skills/qfai-prototyping/references/l2-review-guide.md +39 -0
  22. package/assets/init/.qfai/assistant/skills/qfai-prototyping/references/reviewer-gate.md +24 -0
  23. package/assets/init/.qfai/assistant/skills/qfai-sdd/SKILL.md +18 -9
  24. package/assets/init/.qfai/assistant/skills/qfai-sdd/templates/contracts/design-system.sample.yaml +22 -0
  25. package/assets/init/.qfai/assistant/skills/qfai-sdd/templates/contracts/evaluation-rubric.sample.yaml +16 -0
  26. package/assets/init/.qfai/assistant/skills/qfai-sdd/templates/contracts/evaluator-calibration.sample.yaml +9 -0
  27. package/assets/init/.qfai/assistant/skills/qfai-sdd/templates/contracts/exploration-brief.sample.yaml +10 -0
  28. package/assets/init/.qfai/assistant/skills/qfai-sdd/templates/contracts/selected-direction.sample.yaml +7 -0
  29. package/assets/init/.qfai/assistant/skills/qfai-verify/SKILL.md +3 -0
  30. package/assets/init/.qfai/assistant/steering/agent-catalog.yml +1 -1
  31. package/assets/init/.qfai/assistant/steering/ui-definition-protocol.md +6 -6
  32. package/assets/init/.qfai/contracts/README.md +17 -10
  33. package/assets/init/.qfai/contracts/design/README.md +23 -15
  34. package/assets/init/.qfai/contracts/ui/README.md +9 -8
  35. package/assets/init/.qfai/discussion/README.md +18 -18
  36. package/assets/uix-rev/comparison-review.md +8 -10
  37. package/assets/uix-rev/contracts-review.md +1 -1
  38. package/assets/uix-rev/scoring-review.md +20 -46
  39. package/assets/uix-rev/strategy-review.md +11 -16
  40. package/dist/cli/index.cjs +7709 -16321
  41. package/dist/cli/index.cjs.map +1 -1
  42. package/dist/cli/index.mjs +7776 -16388
  43. package/dist/cli/index.mjs.map +1 -1
  44. package/dist/index.cjs +13589 -20963
  45. package/dist/index.cjs.map +1 -1
  46. package/dist/index.d.cts +196 -589
  47. package/dist/index.d.ts +196 -589
  48. package/dist/index.mjs +10289 -17651
  49. package/dist/index.mjs.map +1 -1
  50. package/package.json +1 -1
  51. package/assets/init/.qfai/assistant/skills/qfai-discussion/templates/uiux/10_implementation_strategy.md +0 -38
  52. package/assets/init/.qfai/assistant/skills/qfai-discussion/templates/uiux/11_design_taste_interview.md +0 -45
  53. package/assets/init/.qfai/assistant/skills/qfai-discussion/templates/uiux/12_design_system.md +0 -115
  54. package/assets/init/.qfai/assistant/skills/qfai-discussion/templates/uiux/20_design_eval_invariant.md +0 -68
  55. package/assets/init/.qfai/assistant/skills/qfai-discussion/templates/uiux/21_design_eval_trend_derived.md +0 -130
  56. package/assets/init/.qfai/assistant/skills/qfai-discussion/templates/uiux/22_design_eval_product_specific.md +0 -68
  57. package/assets/init/.qfai/assistant/skills/qfai-discussion/templates/uiux/23_design_eval_aggregate.md +0 -53
  58. package/assets/init/.qfai/assistant/skills/qfai-discussion/templates/uiux/24_design_eval_dynamic_overrides.md +0 -28
  59. package/assets/init/.qfai/assistant/skills/qfai-discussion/templates/uiux/30_option_comparison.md +0 -56
  60. package/assets/init/.qfai/assistant/skills/qfai-discussion/templates/uiux/31_selected_anchor_screen.md +0 -42
@@ -0,0 +1,29 @@
1
+ # 30 Exploration Brief
2
+
3
+ ## Product Intent
4
+
5
+ - What the product should feel like:
6
+ - What users should immediately understand:
7
+
8
+ ## Must-preserve Interactions
9
+
10
+ - Primary task:
11
+ - Secondary task:
12
+ - Critical state changes:
13
+
14
+ ## Brand Signals
15
+
16
+ - Desired tone:
17
+ - Desired visual character:
18
+ - Must-avoid brand signals:
19
+
20
+ ## Differentiation Targets
21
+
22
+ - How this surface should avoid generic layouts:
23
+ - Where deliberate originality should show up:
24
+
25
+ ## Implementation Constraints
26
+
27
+ - Technical constraints:
28
+ - Accessibility constraints:
29
+ - Operational constraints:
@@ -0,0 +1,13 @@
1
+ # 31 Reference Pool
2
+
3
+ ## Exploration References
4
+
5
+ | Ref | Type | Why it matters | Adopted points | Rejected points | Local translation |
6
+ | ------- | ------- | -------------- | -------------- | --------------- | ----------------- |
7
+ | REF-001 | Product | [why] | [adopted] | [rejected] | [translation] |
8
+
9
+ ## Design Guideline Research
10
+
11
+ | Ref | Guideline | Rule refs | Why it matters | Local translation |
12
+ | ------ | ----------- | ----------- | -------------- | ----------------- |
13
+ | GL-001 | [guideline] | [rule refs] | [why] | [translation] |
@@ -0,0 +1,10 @@
1
+ # 32 Design Anti-goals
2
+
3
+ ## Anti-goals
4
+
5
+ - Avoid generic library-default dashboards with no product character.
6
+ - Avoid AI-slop patterns such as purple gradients over white cards without product rationale.
7
+
8
+ ## Recurrence Prevention
9
+
10
+ - If a later iteration drifts toward a rejected direction, log the trigger and explicitly restate why the pattern is banned.
@@ -0,0 +1,27 @@
1
+ # 33 Exploration Rubric
2
+
3
+ ## Design Quality
4
+
5
+ - What counts as coherent:
6
+ - What breaks coherence:
7
+
8
+ ## Originality
9
+
10
+ - What counts as deliberate design:
11
+ - What counts as generic or AI-slop:
12
+
13
+ ## Craft
14
+
15
+ - Typography, spacing, color, and contrast competence:
16
+
17
+ ## Functionality
18
+
19
+ - Whether users can understand and complete the core task:
20
+
21
+ ## Accessibility Risk
22
+
23
+ - Which issues are hard fails:
24
+
25
+ ## Implementation Plausibility
26
+
27
+ - What level of complexity is acceptable for the current slice:
@@ -0,0 +1,17 @@
1
+ # 34 Evaluator Calibration
2
+
3
+ ## Good Critique
4
+
5
+ - Example of a skeptical but actionable critique:
6
+
7
+ ## Too Lenient
8
+
9
+ - Example of praise that should be rejected because it ignores obvious blandness or usability issues:
10
+
11
+ ## Blandness Fail
12
+
13
+ - Example of a design that is technically competent but too generic to pass:
14
+
15
+ ## Originality Fail
16
+
17
+ - Example of a design that copies defaults without deliberate product-specific choices:
@@ -6,33 +6,27 @@ Consolidate all sidecar artifacts into a review-ready bundle for design reviewer
6
6
 
7
7
  ## Bundle Contents
8
8
 
9
- | Artifact | Path | Status |
10
- | -------------------------- | ------------------------------------------ | ------------------------- |
11
- | Strategy | `uiux/10_implementation_strategy.md` | [draft/reviewed/approved] |
12
- | Taste interview | `uiux/11_design_taste_interview.md` | [draft/reviewed/approved] |
13
- | Trend scan | `04_Sources.md#Trend Scan` | [draft/reviewed/approved] |
14
- | Invariant layer | `uiux/20_design_eval_invariant.md` | [draft/reviewed/approved] |
15
- | Trend-derived layer | `uiux/21_design_eval_trend_derived.md` | [draft/reviewed/approved] |
16
- | Product-specific layer | `uiux/22_design_eval_product_specific.md` | [draft/reviewed/approved] |
17
- | Aggregate layer | `uiux/23_design_eval_aggregate.md` | [draft/reviewed/approved] |
18
- | Dynamic overrides | `uiux/24_design_eval_dynamic_overrides.md` | [optional] |
19
- | Option comparison | `uiux/30_option_comparison.md` | [draft/reviewed/approved] |
20
- | Selected anchor | `uiux/31_selected_anchor_screen.md` | [draft/reviewed/approved] |
21
- | Screen contracts | `uiux/40_screen_contracts.md` | [draft/reviewed/approved] |
22
- | Prototyping recommendation | `../prototyping.yaml` | [draft/reviewed/approved] |
9
+ | Artifact | Path | Status |
10
+ | -------------------------- | ---------------------------------- | ------------------------- |
11
+ | Exploration brief | `uiux/30_exploration_brief.md` | [draft/reviewed/approved] |
12
+ | Reference pool | `uiux/31_reference_pool.md` | [draft/reviewed/approved] |
13
+ | Design anti-goals | `uiux/32_design_anti_goals.md` | [draft/reviewed/approved] |
14
+ | Exploration rubric | `uiux/33_exploration_rubric.md` | [draft/reviewed/approved] |
15
+ | Evaluator calibration | `uiux/34_evaluator_calibration.md` | [draft/reviewed/approved] |
16
+ | Screen contracts | `uiux/40_screen_contracts.md` | [draft/reviewed/approved] |
17
+ | Prototyping recommendation | `../prototyping.yaml` | [draft/reviewed/approved] |
23
18
 
24
19
  ## Trend-derived review focus
25
20
 
26
- - Required trend categories are all present and complete.
21
+ - Required references are all present and complete.
27
22
  - Stale / overused AI slop patterns are explicitly avoided.
28
- - Trend research is translated into scoring, comparison, and selected anchor decisions.
29
- - Scoring-ready axes use canonical fields: `origin`, `layer`, `source_refs`, `goal_refs`, `evidence_required`, `review_questions`.
23
+ - Reference research is translated into exploration and evaluator calibration inputs.
24
+ - Later iterations are not automatically preferred over stronger middle iterations.
30
25
 
31
26
  ## Review Checklist
32
27
 
33
- - [ ] Strategy aligns with surface type and project constraints
34
- - [ ] Trend categories are complete and translated into local design decisions
35
- - [ ] Competitive references include adopted_points, rejected_points, and local_translation
36
- - [ ] Scoring-ready axes expose canonical fields including origin/source_refs/goal_refs/evidence_required/review_questions
37
- - [ ] Selected anchor clearly documents rationale and downstream implications
28
+ - [ ] Exploration brief aligns with surface type and project constraints
29
+ - [ ] Reference pool is complete and translated into local design decisions
30
+ - [ ] Evaluator calibration includes skeptical critique examples
31
+ - [ ] Best-of-history handling is explicit
38
32
  - [ ] Screen contracts cover all required states
@@ -75,11 +75,13 @@ Execute the TDD micro-cycle for each pending item in `test-list.md`, transitioni
75
75
  ## Visual Review Guard
76
76
 
77
77
  - Review rendered output, screenshot evidence, or HTML output before closing any UI-affecting item.
78
- - Read the sidecar family first (selected anchor, strategy, screen contracts) whenever implementation touches UI or critique-driven behavior.
79
- - Read order: option comparison (30_option_comparison.md)selected anchor screen (31_selected_anchor_screen.md)
80
- strategy (10_implementation_strategy.md) taste interview (11_design_taste_interview.md) →
81
- trend scan (04_Sources.md#Trend Scan) → 3-layer evaluation family (20/21/22/23 + optional 24) →
82
- screen contracts (40_screen_contracts.md) review input bundle (50_review_input_bundle.md) →
78
+ - Read spec + contract inputs first whenever implementation touches UI or critique-driven behavior.
79
+ - Read order: `01_Spec.md` `03_Acceptance-Criteria.md``05_Examples.md`
80
+ `.qfai/contracts/design/exploration-brief.yaml`
81
+ `.qfai/contracts/design/anchor-selection.yaml` (legacy alias, when present) →
82
+ `.qfai/contracts/design/evaluation-axes.yaml` (legacy alias, when present) →
83
+ `.qfai/contracts/design/evaluation-rubric.yaml` → `.qfai/contracts/design/evaluator-calibration.yaml` →
84
+ `.qfai/contracts/design/selected-direction.yaml` → `.qfai/contracts/design/design-system.yaml` → `.qfai/contracts/ui/*.yaml` →
83
85
  optional design tokens → optional fallback mock → mermaid flows.
84
86
  - If code intent and rendered output diverge, treat the rendered/HTML result as the blocking review input and reconcile before DONE.
85
87
 
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: qfai-prototyping
3
- title: QFAI Prototyping (Full-Harness Only)
4
- description: "Build a contract-aligned UI prototype and block completion until full-harness evidence and validate gate pass."
3
+ title: QFAI Prototyping (Exploration-First Harness)
4
+ description: "Run a planner/generator/evaluator UI harness with a 5→3→2→1 direction funnel, breakthrough detection, and final design-system extraction."
5
5
  argument-hint: "[--auto]"
6
6
  allowed-tools: [Read, Glob, Write, TodoWrite, Task, Bash]
7
7
  roles:
@@ -24,173 +24,225 @@ mode: execution-focused
24
24
 
25
25
  [DRIFT-PROTOCOL:MANDATORY]
26
26
 
27
- This skill is static-first for planning and file review, but the package execution contract is `full-harness` only.
28
- Do not default or downgrade prototyping modes.
27
+ This skill owns prototyping orchestration directly.
28
+ Do not rely on a CLI entrypoint or package runtime loop.
29
29
 
30
30
  ## CRITICAL CONSTRAINTS (Read First)
31
31
 
32
32
  - Scope is all specs from `.qfai/specs/spec-*`.
33
- - Evidence is mandatory in markdown + json under `.qfai/evidence/`.
34
- - DONE is forbidden until prototyping evidence, reviewer gate, and `qfai validate --fail-on error` pass.
35
- - Supported prototyping surfaces are `web`, `mobile`, `desktop`, and `mixed`.
33
+ - Screenshot evidence and HTML snapshot evidence are mandatory.
34
+ - Screenshot evidence path: `.qfai/evidence/prototyping/screenshots/<screen-id>.png`
35
+ - HTML snapshot path: `.qfai/evidence/prototyping/html/<screen-id>.html`
36
+ - If either screenshot or HTML is missing for a declared screen, that screen scores `0` and the run is incomplete.
37
+ - Optional evidence is abolished. Missing mandatory evidence must trigger rerun, not waiver.
38
+ - DONE is forbidden until `qfai validate --fail-on error` passes and `/qfai-verify` can approve the run.
39
+ - Supported UI prototyping surfaces are `web`, `mobile`, `desktop`, and `mixed`.
36
40
  - `cli`, API-only, backend-only, and `ui_bearing: false` classifications are not prototyping execution targets.
37
- - Canonical screen contracts in `discussion-*/uiux/40_screen_contracts.md` are mandatory.
38
- - Browser QA, render evidence, runtimeGate, uiFidelity, specCoverage, and `fullHarness` are mandatory.
39
- - `uiFidelity` is screen-level and must be built from real render/browser evidence.
40
- - `mockPaths` is a negative-only issue ledger with `fail|finding` only.
41
- - Calibration pack is the SSOT. Runtime and validator both resolve from `calibrationRef.packPath`.
42
- - `--reviewer <id>` is mandatory and placeholder reviewer ids are rejected.
43
- - L1 and L2 findings must be fixed or dispositioned before PASS.
41
+ - `cli` is not supported and is not an execution target for prototyping.
42
+ - Evaluation is performed by sub-agents; machine checks are limited to schema/evidence validation and breakthrough trigger detection.
43
+ - Shared evidence vocabulary includes `render.json`, `browser-qa.json`, `prototyping.json`, and `breakthrough.json`.
44
+ - static-first evidence capture remains mandatory even when interactive review is used.
44
45
 
45
46
  ## Goal
46
47
 
47
- Build the minimum runnable slice for all specs and produce canonical `full-harness` evidence under `.qfai/evidence/`.
48
+ Generate multiple design directions, converge on a winner, extract the selected direction and final design system, and keep the winner open to breakthrough pivots during later polish iterations.
48
49
 
49
- ## Mode
50
+ ## Surface / Mode
50
51
 
51
- ### Full-harness
52
+ - surface / mode routing uses `standard` as the default execution path.
53
+ - `standard` is the default when no explicit escalation to `full-harness` is requested.
54
+ - `full-harness` is reserved for explicit escalation and review-heavy obligations.
52
55
 
53
- - Full-harness is the package default when prototyping execution is valid.
54
- - Each `qfai prototyping run --mode full-harness --reviewer <id>` invocation records exactly one measured iteration.
55
- - Multiple iterations are formed only by real code changes between runs.
56
- - The runtime does not self-modify code and does not fabricate evidence.
56
+ ## Required References
57
57
 
58
- ## Obligation matrix
58
+ Read and follow these references before execution:
59
59
 
60
- | surface / mode | specs | runtimeGate | uiFidelity | render evidence | browser QA | fullHarness |
61
- | ---------------------- | -------- | ----------- | ---------- | --------------- | ---------- | ----------- |
62
- | web / full-harness | required | required | required | required | required | required |
63
- | mobile / full-harness | required | required | required | required | required | required |
64
- | desktop / full-harness | required | required | required | required | required | required |
65
- | mixed / full-harness | required | required | required | required | required | required |
60
+ - `.qfai/assistant/skills/qfai-prototyping/references/evidence-requirements.md`
61
+ - `.qfai/assistant/skills/qfai-prototyping/references/iteration-cycle.md`
62
+ - `.qfai/assistant/skills/qfai-prototyping/references/l1-review-guide.md`
63
+ - `.qfai/assistant/skills/qfai-prototyping/references/l2-review-guide.md`
64
+ - `.qfai/contracts/design/anchor-selection.yaml` when legacy validator slices are exercised
65
+ - `.qfai/contracts/design/evaluation-axes.yaml` when legacy validator slices are exercised
66
+ - `.qfai/assistant/skills/qfai-prototyping/references/design-system-compliance.md`
67
+ - `.qfai/assistant/skills/qfai-prototyping/references/reviewer-gate.md`
68
+ - `.qfai/assistant/steering/test-layers.md`
66
69
 
67
- ## Required evidence
70
+ ## Delegation Scope Table
68
71
 
69
- ## Evidence (MANDATORY)
72
+ All sub-agent delegation in this skill MUST follow the category-to-role mapping below.
73
+ Assigning a task to a role not listed for the category is a violation and MUST be flagged.
74
+ Evaluation scoring and screenshot capture must use only the allowed roles below.
70
75
 
71
- - `.qfai/evidence/prototyping.md`
72
- - `.qfai/evidence/prototyping.json`
73
- - `.qfai/evidence/render.json`
74
- - `.qfai/evidence/browser-qa.json`
75
- - `.qfai/evidence/fullHarness.exit.json`
76
- - `.qfai/evidence/fullHarness.handoff.json`
77
- - `.qfai/evidence/fullHarness.fakeUiDetection.json`
76
+ | Category | Allowed Role(s) |
77
+ | --------------------- | ------------------------------------------------------ |
78
+ | UI implementation | frontend-engineer, product-experience-architect |
79
+ | Screenshot capture | devops-ci-engineer |
80
+ | Evaluation scoring | product-surface-reviewer, product-experience-architect |
81
+ | Build | devops-ci-engineer, backend-engineer |
82
+ | Breakthrough planning | product-experience-architect, frontend-engineer |
78
83
 
79
- ## Truthfulness rules
84
+ Any delegation map entry that assigns a category to an undefined or unlisted role MUST produce a violation finding naming the undefined role and the category.
80
85
 
81
- - `mode.effective` must be `full-harness`.
82
- - `runtimeGate` is observed-only. Synthetic status codes are invalid.
83
- - `runtimeGate.evidenceRefs` must contain concrete render/browser QA/spec refs only.
84
- - `specCoverage` must use concrete declared refs and concrete observed refs only.
85
- - Browser QA evidence must be preserved per screen.
86
- - `actionsWired` must reflect actionable control coverage, not finding counts.
87
- - `reviewerSignoff.status` represents final decision, not mere completion.
88
- - `reviewerLogs[].verdict` must align with decision/termination semantics.
86
+ ## Required Process
89
87
 
90
- ## Review semantics
88
+ ### Step 0 — Execution Plan
91
89
 
92
- - `accepted` -> `approved`
93
- - `rejected` -> `rejected`
94
- - `abandoned` -> `abandoned`
95
- - Plateau stop or max-iterations stop must not produce `approved`.
90
+ Before any code is written, create an execution plan record in the work evidence.
96
91
 
97
- ## Delegation Scope Table
92
+ Required fields:
98
93
 
99
- All sub-agent delegation in this skill MUST follow the category-to-role mapping below.
100
- Assigning a task to a role not listed for the category is a violation and MUST be flagged.
94
+ - `targetIterations`: integer; minimum 2
95
+ - `funnelPolicy`: `5->3->2->1`
96
+ - `evaluationAxesSource`: ref to `.qfai/contracts/design/evaluation-rubric.yaml`
97
+ - `delegationMap`: category-to-role assignments per Delegation Scope Table
98
+ - `plannedAt`: ISO-8601 timestamp
101
99
 
102
- | Category | Allowed Role(s) |
103
- | ------------------ | ------------------------------------------------------ |
104
- | UI implementation | frontend-engineer, product-experience-architect |
105
- | Screenshot capture | devops-ci-engineer |
106
- | Evaluation L1-L2 | product-surface-reviewer, product-experience-architect |
107
- | Build | devops-ci-engineer, backend-engineer |
100
+ ### Step 1 — Read Inputs
108
101
 
109
- Any delegation map entry that assigns a category to an undefined or unlisted role (e.g., `"generic-code-writer"`) MUST produce a violation finding naming the undefined role and the category.
102
+ Read the downstream-ready spec/contract inputs and verify:
110
103
 
111
- ## Required process
104
+ - `.qfai/specs/<spec-id>/01_Spec.md`
105
+ - `.qfai/specs/<spec-id>/03_Acceptance-Criteria.md`
106
+ - `.qfai/contracts/design/exploration-brief.yaml`
107
+ - `.qfai/contracts/design/evaluation-rubric.yaml`
108
+ - `.qfai/contracts/design/evaluator-calibration.yaml`
109
+ - `.qfai/contracts/design/anchor-selection.yaml` when legacy validator slices are exercised
110
+ - `.qfai/contracts/design/evaluation-axes.yaml` when legacy validator slices are exercised
111
+ - `.qfai/contracts/design/selected-direction.yaml` when already created
112
+ - `.qfai/contracts/design/design-system.yaml` when already created
113
+ - `.qfai/contracts/ui/*.yaml`
112
114
 
113
- ### Step 0 — Execution Plan (executionPlan)
115
+ Read order:
114
116
 
115
- Before any code is written, create an `executionPlan` record with the following fields:
117
+ 1. `.qfai/specs/<spec-id>/01_Spec.md`
118
+ 2. `.qfai/specs/<spec-id>/03_Acceptance-Criteria.md`
119
+ 3. `.qfai/contracts/design/exploration-brief.yaml`
120
+ 4. `.qfai/contracts/design/evaluation-rubric.yaml`
121
+ 5. `.qfai/contracts/design/evaluator-calibration.yaml`
122
+ 6. `.qfai/contracts/design/anchor-selection.yaml` (legacy validator alias, when present)
123
+ 7. `.qfai/contracts/design/evaluation-axes.yaml` (legacy validator alias, when present)
124
+ 8. `.qfai/contracts/design/selected-direction.yaml`
125
+ 9. `.qfai/contracts/design/design-system.yaml`
126
+ 10. `.qfai/contracts/ui/*.yaml`
116
127
 
117
- - `targetIterations`: integer; minimum 2 for full-harness
118
- - `evaluationAxesSource`: reference to the discussion pack evaluation-family files (20/21/22/23)
119
- - `delegationMap`: category-to-role assignments per Delegation Scope Table above
120
- - `plannedAt`: ISO-8601 timestamp
128
+ ### Step 2 Verify Execution Preconditions
121
129
 
122
- The executionPlan MUST be present in `prototyping.json` when `mode=full-harness`. A validator MUST reject any full-harness record without an executionPlan.
130
+ Confirm all of the following before any evaluation:
123
131
 
124
- ### Iteration Gate
132
+ - classification is UI-bearing
133
+ - surface is `web`, `mobile`, `desktop`, or `mixed`
134
+ - every declared screen has a stable `screen-id`
135
+ - the exploration brief, evaluation rubric, and evaluator calibration contracts satisfy the required schema
125
136
 
126
- - full-harness convergence requires a minimum of 2 iterations.
127
- - A single-iteration run that reports `converged=true` is invalid; the iteration gate MUST raise an error with message "minimum 2 iterations required before convergence".
128
- - The phase transition from iteration N to N+1 is blocked until `terminationCondition` is met or the gate explicitly authorizes continuation.
137
+ ### Step 3 Generate Divergent Directions
129
138
 
130
- ### 5-Step Iteration Cycle
139
+ Generate 5 clearly distinct design directions before selecting a winner.
140
+ Do not begin with a single incumbent direction.
131
141
 
132
- Each full-harness iteration follows this fixed sequence:
142
+ ### Step 4 Capture Mandatory Evidence
133
143
 
134
- 1. **Capture** Run `packages/qfai/assets/scripts/capture-screenshots.js --url <url> --out <dir>` and record screenshot paths with timestamps under `scoringTrace[i].screenshotDir`.
135
- 2. **Evaluate** — Launch L1 and L2 evaluator sub-agents with full context bundle: (a) screenshots from Step 1, (b) axisDefs from evaluation-family 20/21/22/23, (c) previousScore from prior iteration, (d) designSystemChecklist from `uiux/12_design_system.md`.
136
- 3. **Identify** — Aggregate L1 + L2 findings; flag immediate-fix items.
137
- 4. **Fix** — Apply fixes per finding disposition; do not close items without evidence.
138
- 5. **Re-evaluate** — Re-run Steps 1–4; compare new score to prior score to check plateau.
144
+ For every declared screen and every active direction:
139
145
 
140
- The sequence MUST NOT be permuted. Parallel execution of Capture+Evaluate is prohibited.
146
+ - capture one screenshot and store it at the canonical screenshot path
147
+ - capture one HTML snapshot and store it at the canonical HTML path
148
+ - record missing evidence immediately; do not continue as if capture succeeded
141
149
 
142
- ### Evaluator Input4 Required Elements
150
+ ### Step 5Launch Evaluation Reviewers
143
151
 
144
- When launching any L1 or L2 evaluator sub-agent, all 4 elements MUST be present in the input:
152
+ Launch evaluation reviewer sub-agents with the full context bundle:
145
153
 
146
- (a) screenshots paths produced by capture-screenshots.js for the current iteration
147
- (b) axisDefs scoring axes from discussion-pack evaluation-family (20/21/22/23)
148
- (c) previousScore — aggregate score from the prior iteration (null for iteration 1)
149
- (d) designSystemChecklist the compliance checklist derived from `uiux/12_design_system.md`
154
+ - screenshots from Step 4
155
+ - HTML snapshots from Step 4
156
+ - `axisDefs` from `.qfai/contracts/design/evaluation-rubric.yaml`
157
+ - `previousScore` from the prior iteration (`null` for iteration 1)
158
+ - `designSystemChecklist` from `.qfai/contracts/design/design-system.yaml`
150
159
 
151
- If any element is missing, a reviewer check MUST raise a finding naming the missing element.
152
- Missing element (d) is a common error when `uiux/12_design_system.md` is absent; the reviewer MUST still flag it.
160
+ ### Step 6 Direction Funnel
153
161
 
154
- ### Visual Quality Structural Checklist
162
+ Run the mandatory convergence funnel:
155
163
 
156
- Each iteration evaluation MUST score all 6 visual categories:
164
+ - 5 directions -> top 3
165
+ - top 3 remixed -> top 2
166
+ - top 2 -> selected winner 1
167
+
168
+ ### Step 7 — Extract Winner Contracts
169
+
170
+ After the first winner is selected:
171
+
172
+ - write `.qfai/contracts/design/selected-direction.yaml`
173
+ - extract `.qfai/contracts/design/design-system.yaml`
174
+
175
+ ### Step 8 — Polish the Winner
176
+
177
+ Iterate on the selected winner with normal critique/rework loops.
178
+ Do not assume the latest iteration is automatically best; keep best-of-history in evidence.
157
179
 
158
- 1. Color — color palette adherence to design system tokens
159
- 2. Typography — type scale, weight, line-height compliance
160
- 3. Spacing — spacing scale and grid alignment
161
- 4. Border radius — border-radius consistency across components
162
- 5. Shadow — shadow elevation and opacity standards
163
- 6. Do's&Don'ts — adherence to explicit do/don't rules from `uiux/12_design_system.md`
180
+ ## Iteration Gate
164
181
 
165
- ### Lighthouse Gate (MUST for web full-harness)
182
+ - Minimum 2 iterations are required before any terminal phase transition is allowed.
183
+ - Do not mark the run as converged or complete after a single iteration.
184
+ - Any phase transition to completion must pass through the iteration gate and reviewer gate.
166
185
 
167
- When `surface=web` and `mode=full-harness`, a Lighthouse performance/accessibility report MUST be captured and attached to the evidence. The reviewer gate MUST raise an error "Lighthouse Gate is MUST for full-harness + web surface" when the report is absent.
186
+ ### Step 9 Breakthrough Detection
168
187
 
169
- ### Steps (continued)
188
+ After each polish iteration, run the mechanical breakthrough detector.
189
+ If `allItemsPass95` is false and score improvement is below the configured plateau threshold and code change is below the configured diff threshold, trigger breakthrough branching.
170
190
 
171
- 1. Read the latest discussion pack and verify `prototyping.yaml`, `04_Sources.md`, `20/21/22/23`, and `40_screen_contracts.md`.
172
- Read order: option comparison / `30_option_comparison.md` -> selected anchor screen / `31_selected_anchor_screen.md` -> strategy / `10_implementation_strategy.md` -> taste interview / `11_design_taste_interview.md` -> trend scan / `04_Sources.md` -> 3-layer evaluation family (`20/21/22/23`) -> screen contracts / `40_screen_contracts.md`.
173
- 2. Verify the classification is UI-bearing and the surface is `web`, `mobile`, `desktop`, or `mixed`.
174
- 3. Create the executionPlan (Step 0 above).
175
- 4. Implement the minimum runnable slice for all specs.
176
- 5. Run `qfai prototyping run --mode full-harness --reviewer <id>` — this executes the 5-Step Iteration Cycle per iteration.
177
- 6. Review render evidence, HTML snapshots, Browser QA, runtimeGate, uiFidelity, and specCoverage for every declared screen.
178
- 7. Fix findings and rerun until the evidence is coherent.
179
- 8. Run `qfai validate --fail-on error`.
180
- 9. Route an independent reviewer and do not declare completion until the result is `PASS`.
191
+ ### Step 10 Breakthrough Branch Loop
181
192
 
182
- ## Reviewer gate
193
+ When breakthrough is triggered:
194
+
195
+ - generate exactly 2 branch directions
196
+ - compare incumbent + 2 branches
197
+ - replace the mainline if a branch wins
198
+ - refresh selected-direction/design-system if the winner changes
199
+ - record the decision in `.qfai/evidence/breakthrough.json`
200
+
201
+ ### Step 11 — Validate and Verify
202
+
203
+ - Run `qfai validate --fail-on error`.
204
+ - Route `/qfai-verify` or its equivalent gate workflow for final quality approval.
205
+ - Do not declare completion until the reviewer result is `PASS`.
206
+
207
+ ## Evaluator Inputs (Mandatory)
208
+
209
+ When launching any evaluation reviewer sub-agent, all 5 elements MUST be present:
210
+
211
+ 1. screenshots
212
+ 2. HTML snapshots
213
+ 3. axisDefs
214
+ 4. previousScore
215
+ 5. designSystemChecklist
216
+
217
+ ## Visual Quality Structural Checklist
218
+
219
+ Each iteration evaluation MUST score all 6 visual categories:
220
+
221
+ 1. Design quality
222
+ 2. Originality
223
+ 3. Craft
224
+ 4. Functionality
225
+ 5. Accessibility risk
226
+ 6. Implementation plausibility
183
227
 
184
228
  ### Reviewer Gate (MUST)
185
229
 
186
- - Reviewer must verify full-harness evidence completeness.
187
- - Reviewer response must include `Result: PASS | REVISE` (matching shared-skill-delegation-baseline.md#reviewer-response-template).
188
- - Reviewer must verify calibration pack usage via `calibrationRef`.
189
- - Reviewer must reject self-reference, synthetic refs, and `mockPaths.status="pass"`.
190
- - Reviewer must verify `reviewerSignoff`, `reviewerLogs`, `terminationReason`, and `finalDecision` are semantically aligned.
191
- - Reviewer must verify Drift Protocol compliance and alignment with `test-layers.md`.
192
- - Review volume guidance remains signals, not gates.
193
- - Reviewer returns PASS or REVISE only.
230
+ Reviewer checks are defined in:
231
+
232
+ - `.qfai/assistant/skills/qfai-prototyping/references/reviewer-gate.md`
233
+ - `.qfai/assistant/steering/test-layers.md`
234
+
235
+ Minimum reviewer responsibilities:
236
+
237
+ - enforce the Drift Protocol before approving a completion transition
238
+ - verify mandatory screenshot/HTML evidence exists for every declared screen
239
+ - verify exploration brief, evaluation rubric, and evaluator calibration were used
240
+ - verify missing evidence caused rerun rather than waiver
241
+ - verify `qfai validate --fail-on error` completed successfully
242
+ - verify breakthrough trigger evidence is present
243
+ - verify best-of-history handling is documented
244
+ - treat score/volume heuristics as signals, not gates
245
+ - return `Result: PASS | REVISE`
194
246
 
195
247
  ## Sub-agent Delegation (MANDATORY)
196
248
 
@@ -198,9 +250,9 @@ Follow `.qfai/assistant/instructions/shared-skill-delegation-baseline.md`.
198
250
 
199
251
  ### Orchestrator Protocol (MUST)
200
252
 
201
- - Additional prototyping-specific overrides:
202
- - do not self-approve;
203
- - keep evidence paths canonical and integrate delegated results only.
253
+ - do not self-approve
254
+ - keep evidence paths canonical
255
+ - integrate delegated results only
204
256
 
205
257
  ### Capability Probe (MUST)
206
258
 
@@ -221,19 +273,22 @@ Follow `.qfai/assistant/instructions/shared-skill-operating-baseline.md#completi
221
273
 
222
274
  Prototyping-specific additions:
223
275
 
224
- - all specs are covered;
225
- - full-harness evidence is complete and truthful;
226
- - `qfai validate --fail-on error` passes;
227
- - reviewer returns `PASS`.
276
+ - all specs are covered
277
+ - all declared screens have screenshot + HTML evidence
278
+ - `selected-direction.yaml` exists
279
+ - `design-system.yaml` exists
280
+ - `breakthrough.json` exists
281
+ - `qfai validate --fail-on error` passes
282
+ - reviewer returns `PASS`
228
283
 
229
284
  ## FINAL CHECKLIST (Check Last)
230
285
 
231
- ### Completion Checklist (MUST)
232
-
233
286
  - All specs are covered in the Coverage Matrix.
234
- - Required full-harness evidence is present.
235
- - 404 findings are resolved or the run is not complete.
236
- - uiFidelity is present when required.
287
+ - Every declared screen has screenshot evidence.
288
+ - Every declared screen has HTML evidence.
289
+ - Missing evidence triggered rerun instead of waiver.
290
+ - Direction funnel `5->3->2->1` completed.
291
+ - Breakthrough detector ran after polish iterations.
237
292
  - Reviewer returned PASS; otherwise status is REVISE.
238
293
 
239
294
  ## Completion Message & Next Actions (MUST)
@@ -242,4 +297,4 @@ Action:
242
297
 
243
298
  - Proceed: `/qfai-atdd`
244
299
  - Quality gate: `/qfai-verify`
245
- - Rework prototyping: rerun `/qfai-prototyping` with corrected evidence
300
+ - Rework prototyping: rerun `/qfai-prototyping` with corrected screenshot/HTML evidence
@@ -0,0 +1,22 @@
1
+ # Design System Compliance
2
+
3
+ When `.qfai/contracts/design/design-system.yaml` exists and is required, evaluators must compare the implementation against:
4
+
5
+ - color palette
6
+ - typography scale and weights
7
+ - spacing scale
8
+ - border radius
9
+ - shadow usage
10
+ - explicit do/don't rules
11
+
12
+ ## Rule
13
+
14
+ If the implementation clearly contradicts the design system on a primary screen, record an immediate-fix finding.
15
+
16
+ ## Evidence
17
+
18
+ Support each finding with:
19
+
20
+ - screenshot evidence
21
+ - HTML snapshot evidence
22
+ - the specific design-system clause or checklist item
@@ -0,0 +1,31 @@
1
+ # Evidence Requirements
2
+
3
+ ## Mandatory evidence
4
+
5
+ For every declared screen in `.qfai/contracts/ui/*.yaml`, collect both:
6
+
7
+ - screenshot: `.qfai/evidence/prototyping/screenshots/<screen-id>.png`
8
+ - HTML snapshot: `.qfai/evidence/prototyping/html/<screen-id>.html`
9
+
10
+ If either artifact is missing:
11
+
12
+ - the screen is scored `0`
13
+ - the run is incomplete
14
+ - rerun is mandatory
15
+
16
+ Optional evidence is not allowed.
17
+
18
+ ## Capture rules
19
+
20
+ - Use stable `screen-id` names from the canonical UI contracts.
21
+ - Overwrite stale evidence with fresh evidence from the current iteration.
22
+ - Do not reuse an older screenshot or HTML snapshot after a fix.
23
+ - If capture fails, record the failure in work evidence and stop pretending the screen was evaluated.
24
+
25
+ ## Validate gate expectations
26
+
27
+ `qfai validate --fail-on error` must be able to confirm:
28
+
29
+ - every declared screen has a screenshot file
30
+ - every declared screen has an HTML snapshot file
31
+ - the file paths follow the canonical directories above