claudecode-omc 5.6.6 → 5.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/.local/skills/h5-to-swiftui/SKILL.md +201 -0
  2. package/.local/skills/h5-to-swiftui/assets/calibration/README.md +176 -0
  3. package/.local/skills/h5-to-swiftui/assets/calibration/h5-twin/index.html +52 -0
  4. package/.local/skills/h5-to-swiftui/assets/calibration/h5-twin/style.css +133 -0
  5. package/.local/skills/h5-to-swiftui/assets/calibration/swiftui-twin/Package.swift +26 -0
  6. package/.local/skills/h5-to-swiftui/assets/calibration/swiftui-twin/Sources/CalibrationScreen/CalibrationScreen.swift +142 -0
  7. package/.local/skills/h5-to-swiftui/assets/calibration/swiftui-twin-divergent/Package.swift +32 -0
  8. package/.local/skills/h5-to-swiftui/assets/calibration/swiftui-twin-divergent/Sources/CalibrationScreenDivergent/CalibrationScreenDivergent.swift +122 -0
  9. package/.local/skills/h5-to-swiftui/assets/calibration/tokens.json +42 -0
  10. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/index.html +14 -0
  11. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/package.json +20 -0
  12. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/public/api/articles/001.json +96 -0
  13. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/public/api/articles/index.json +89 -0
  14. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/App.jsx +22 -0
  15. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/App.module.css +11 -0
  16. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/components/ArticleCard.jsx +53 -0
  17. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/components/ArticleCard.module.css +139 -0
  18. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/components/NavBar.jsx +37 -0
  19. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/components/NavBar.module.css +72 -0
  20. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/components/TagCloud.jsx +30 -0
  21. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/components/TagCloud.module.css +50 -0
  22. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/components/TrendChart.jsx +159 -0
  23. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/components/TrendChart.module.css +21 -0
  24. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/main.jsx +12 -0
  25. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/screens/ArticleScreen.jsx +182 -0
  26. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/screens/ArticleScreen.module.css +294 -0
  27. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/screens/FeedScreen.jsx +147 -0
  28. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/screens/FeedScreen.module.css +161 -0
  29. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/styles/global.css +50 -0
  30. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/styles/tokens.css +103 -0
  31. package/.local/skills/h5-to-swiftui/assets/sample-h5-react/vite.config.js +6 -0
  32. package/.local/skills/h5-to-swiftui/assets/sample-h5-vanilla/data/tasks.js +67 -0
  33. package/.local/skills/h5-to-swiftui/assets/sample-h5-vanilla/index.html +26 -0
  34. package/.local/skills/h5-to-swiftui/assets/sample-h5-vanilla/router.js +73 -0
  35. package/.local/skills/h5-to-swiftui/assets/sample-h5-vanilla/screens/detail.js +164 -0
  36. package/.local/skills/h5-to-swiftui/assets/sample-h5-vanilla/screens/home.js +53 -0
  37. package/.local/skills/h5-to-swiftui/assets/sample-h5-vanilla/screens/list.js +87 -0
  38. package/.local/skills/h5-to-swiftui/assets/sample-h5-vanilla/styles/app.css +342 -0
  39. package/.local/skills/h5-to-swiftui/assets/sample-h5-vanilla/styles/tokens.css +68 -0
  40. package/.local/skills/h5-to-swiftui/references/css-to-swiftui-map.md +205 -0
  41. package/.local/skills/h5-to-swiftui/references/design-token-extraction.md +209 -0
  42. package/.local/skills/h5-to-swiftui/references/high-risk-triage.md +209 -0
  43. package/.local/skills/h5-to-swiftui/references/render-equivalence-calibration.md +193 -0
  44. package/.local/skills/h5-to-swiftui/references/stack-detection.md +160 -0
  45. package/.local/skills/h5-to-swiftui/references/visual-diff-loop-protocol.md +365 -0
  46. package/.local/skills/h5-to-swiftui/scripts/_calib-consts.mjs +150 -0
  47. package/.local/skills/h5-to-swiftui/scripts/_imglib.mjs +547 -0
  48. package/.local/skills/h5-to-swiftui/scripts/_provenance.mjs +123 -0
  49. package/.local/skills/h5-to-swiftui/scripts/calibrate-render.mjs +625 -0
  50. package/.local/skills/h5-to-swiftui/scripts/capture-reference.mjs +386 -0
  51. package/.local/skills/h5-to-swiftui/scripts/detect-stack.mjs +305 -0
  52. package/.local/skills/h5-to-swiftui/scripts/evaluate-convergence.mjs +1093 -0
  53. package/.local/skills/h5-to-swiftui/scripts/extract-tokens.mjs +600 -0
  54. package/.local/skills/h5-to-swiftui/scripts/mark-overlay.mjs +379 -0
  55. package/.local/skills/h5-to-swiftui/scripts/pixel-diff.mjs +530 -0
  56. package/.local/skills/h5-to-swiftui/scripts/sim-screenshot.sh +544 -0
  57. package/bundled/manifest.json +1 -1
  58. package/package.json +1 -1
@@ -0,0 +1,160 @@
1
+ # Stack Detection — Stage 0
2
+
3
+ Used by `scripts/detect-stack.mjs`. Runs **before any other stage**. Determines
4
+ whether the project is in v1 scope; out-of-scope projects stop here with an
5
+ explicit written report — they are never guessed past.
6
+
7
+ ---
8
+
9
+ ## Why native rewrite beats WebView shell and mechanical transpilation
10
+
11
+ Condensed from findings.md RQ1 (sources: kean.blog; Apple "Composing custom
12
+ layouts" WWDC22-10056; MDN Specificity; LogRocket / Yoga 3.0; dbushell; bjango;
13
+ fatbobman; MDN animations; tonsky.me; Android→iOS pilot 2507.16037).
14
+
15
+ | Route | Fidelity ceiling | Core failure mechanism |
16
+ |---|---|---|
17
+ | WebView shell (Capacitor / Cordova / Ionic) | ~70–80% | Renders in WebKit, not Core Animation/Metal; ProMotion 120 Hz unavailable to WebKit-rendered content; Dynamic Type needs full reload; rubber-band scroll constant diverges; haptics absent; 2 extra OS processes per instance |
18
+ | React Native / NativeScript | ~85–92% | Yoga bridge drops 5 CSS flex properties (`flex-basis`, `flex` shorthand, `wrap-reverse`, `order`, column/row-gap individually); bridge latency; not SwiftUI semantics |
19
+ | Flutter (Impeller) | ~80–85% | Own GPU renderer independent of UIKit; iOS conventions require manual Cupertino override throughout |
20
+ | Mechanical transpilation (DOM→view tree) | **<60%** | Six structural incompatibilities listed below — errors compound per nesting level |
21
+ | **Native rewrite** (this skill) | **100% platform ceiling** | Requires a render-measure-feedback loop; see `visual-diff-loop-protocol.md` |
22
+
23
+ ### Six reasons mechanical transpilation cannot reach pixel-level
24
+
25
+ 1. **Inverted layout protocol.** CSS: parent establishes a containing block;
26
+ child size = content + padding + border + margin. SwiftUI: parent _proposes_
27
+ a size; child _chooses_; parent must honor it. The information-flow direction
28
+ is reversed. Reconstructing it from CSS is underdetermined for nested
29
+ layouts. (kean.blog; Apple "Composing custom layouts".)
30
+
31
+ 2. **No cascade.** CSS resolves styles by global selector specificity +
32
+ inheritance + `!important`. SwiftUI styling is local modifier-chain order +
33
+ downward `environment`. A transpiler needs a full cascade engine; any cascade
34
+ error is a visible defect. (MDN Specificity.)
35
+
36
+ 3. **No 1:1 flex mapping.** Yoga (the most mature CSS-flex bridge) drops
37
+ `flex-basis`, the `flex` shorthand, `wrap-reverse`, `order`, and individual
38
+ `row-gap`/`column-gap`. Errors compound per nesting level. (LogRocket /
39
+ Yoga 3.0.)
40
+
41
+ 4. **Font metric divergence.** WebKit implements `line-height: normal ≈ 1.2`
42
+ against the CSS font spec; SwiftUI uses CoreText metrics directly. Identical
43
+ `16px` declarations yield different line height, advance width, and
44
+ baseline — different wrap column — all subsequent layout shifts. (dbushell;
45
+ bjango.)
46
+
47
+ 5. **Animation model mismatch.** CSS = timeline keyframes on absolute time;
48
+ SwiftUI = state-diff interpolation on Core Animation. Mechanical
49
+ `@keyframes` → `.animation()` diverges in timing, easing, and
50
+ interruptibility. (fatbobman; MDN animations.)
51
+
52
+ 6. **Non-deterministic adaptive spacing.** SwiftUI inserts type-dependent
53
+ spacing and context-sensitive control styling with no CSS source to
54
+ translate from. (tonsky.me.)
55
+
56
+ **Takeaway:** native rewrite is the only route to the 100% ceiling; transpilation
57
+ is at best a draft generator; a render-diff feedback loop is mandatory because
58
+ these divergences are invisible to static code diffing.
59
+
60
+ ---
61
+
62
+ ## Detection heuristics table
63
+
64
+ `detect-stack.mjs` reads `package.json` + a shallow source scan (`.js/.ts/.jsx/
65
+ .tsx/.vue/.svelte` in `src/` or root, max 500 files). No network calls.
66
+
67
+ ### Framework detection
68
+
69
+ | Framework | `package.json` deps (any of) | Source signatures |
70
+ |---|---|---|
71
+ | React | `react`, `react-dom`, `@types/react` | `import React`, `JSX.Element`, `.jsx`/`.tsx` extensions, `ReactDOM.render`, `createRoot` |
72
+ | Vue | `vue`, `@vue/core`, `nuxt` | `.vue` files, `<template>` + `<script setup>`, `createApp` |
73
+ | Svelte | `svelte`, `@sveltejs/kit` | `.svelte` files, `<script>` + `<style>` co-located, `$:` reactivity |
74
+ | Angular | `@angular/core`, `@angular/common` | `@Component`, `@NgModule`, `.component.ts` suffix pattern |
75
+ | Solid | `solid-js`, `@solidjs/router` | `createSignal`, `createEffect`, `.jsx` with no `react` dep |
76
+ | Vanilla | none of the above in deps | Bare `addEventListener`, no framework imports, `.js`/`.ts` only |
77
+
78
+ Confidence scoring: `high` = dep present + source signature found; `medium` =
79
+ dep only (source scan inconclusive); `low` = source signature only (dep absent,
80
+ e.g. CDN-loaded). At `low` confidence, `detect-stack.mjs` logs a warning and
81
+ continues; `in_v1_scope` reflects the most-likely classification.
82
+
83
+ ### Build tool detection
84
+
85
+ | Build tool | Primary signal | Secondary signal |
86
+ |---|---|---|
87
+ | Vite | `vite` in deps or devDeps; `vite.config.{js,ts}` present | `"dev": "vite"` in scripts |
88
+ | webpack | `webpack` in deps; `webpack.config.{js,cjs,mjs}` present | `"build": "webpack"` in scripts |
89
+ | Next.js | `next` in deps; `next.config.{js,mjs,ts}` present | `pages/` or `app/` dir with `page.{tsx,jsx}` |
90
+ | Nuxt | `nuxt` in deps; `nuxt.config.{ts,js}` present | `pages/` dir with `.vue` files |
91
+ | CRA | `react-scripts` in deps | `"start": "react-scripts start"` in scripts |
92
+ | None | none of the above | Single `index.html` + inline `<script>` or `<script type=module>` |
93
+
94
+ ### Styling detection
95
+
96
+ | Styling system | Detection signal |
97
+ |---|---|
98
+ | Tailwind v3 | `tailwindcss@^3` in deps; `tailwind.config.{js,ts}` present; class names like `text-sm`, `flex`, `bg-gray-100` in source |
99
+ | Tailwind v4 | `tailwindcss@^4` in deps; `@import "tailwindcss"` or `@theme` block in a `.css` file; no `tailwind.config.*` |
100
+ | CSS Modules | `.module.css` / `.module.scss` files present; `import styles from '…module.css'` in source |
101
+ | Sass / SCSS | `.scss` or `.sass` files; `sass` or `node-sass` in deps |
102
+ | CSS-in-JS | `styled-components`, `@emotion/react`, `@emotion/styled`, `@stitches/react`, or `linaria` in deps |
103
+ | Plain CSS | None of the above; `.css` files imported directly |
104
+
105
+ ### Router detection
106
+
107
+ | Router | Detection signal |
108
+ |---|---|
109
+ | React Router | `react-router-dom` or `react-router` in deps |
110
+ | TanStack Router | `@tanstack/react-router` in deps |
111
+ | Next.js router | Next.js detected (built-in) |
112
+ | Vue Router | `vue-router` in deps |
113
+ | Wouter | `wouter` in deps |
114
+ | None | Single-page with no router dep |
115
+
116
+ ---
117
+
118
+ ## v1 scope gate
119
+
120
+ **v1 supports: framework ∈ {vanilla, React}.**
121
+
122
+ All other detected frameworks (Vue, Svelte, Angular, Solid) are **out of v1 scope**.
123
+ `detect-stack.mjs` writes `stack-report.json` and **exits with code 2** (not 0,
124
+ not 1 — pipeline scripts detect this as a scope-stop, not an error).
125
+
126
+ The pipeline does **not** attempt to convert an out-of-scope project. It does not
127
+ guess. It does not warn and continue. It stops cleanly so the caller knows exactly
128
+ what happened.
129
+
130
+ ### `stack-report.json` schema
131
+
132
+ ```json
133
+ {
134
+ "schema": "h5-to-swiftui/stack-report@1",
135
+ "framework": "vue",
136
+ "buildTool": "vite",
137
+ "styling": ["css-modules", "tailwind-v3"],
138
+ "router": "vue-router",
139
+ "confidence": "high",
140
+ "in_v1_scope": false,
141
+ "stop_reason": "detected=vue, out of v1 scope; v1 supports vanilla|React only",
142
+ "detected_at": "2026-05-19T12:00:00Z"
143
+ }
144
+ ```
145
+
146
+ | Field | Type | Notes |
147
+ |---|---|---|
148
+ | `framework` | string | `vanilla` \| `react` \| `vue` \| `svelte` \| `angular` \| `solid` \| `unknown` |
149
+ | `buildTool` | string | `vite` \| `webpack` \| `next` \| `nuxt` \| `cra` \| `none` \| `unknown` |
150
+ | `styling` | string[] | Array; a project may use multiple (e.g. `["tailwind-v3","css-modules"]`) |
151
+ | `router` | string | Primary detected router or `none` |
152
+ | `confidence` | string | `high` \| `medium` \| `low` |
153
+ | `in_v1_scope` | boolean | `true` only when `framework` is `vanilla` or `react` |
154
+ | `stop_reason` | string | Present only when `in_v1_scope: false` |
155
+ | `detected_at` | string | ISO-8601 timestamp |
156
+
157
+ When `in_v1_scope: true`, the pipeline proceeds to Stage 1. The `stack-report.json`
158
+ is retained as a Stage 0 artifact and its fields inform Stage 1 (token extraction
159
+ strategy differs for plain CSS vs Tailwind v4 vs CSS-in-JS) and Stage 4 (component
160
+ pattern differs for class components vs function+hooks vs vanilla DOM).
@@ -0,0 +1,365 @@
1
+ # Stage 5 — Visual-Diff Convergence Loop Protocol
2
+
3
+ The core mechanism. Operates **per component** (component granularity is the
4
+ primary anti-oscillation defense — a fix to one component cannot break
5
+ another). Consumes `calibration.json` (Stage 2.5) and the per-component
6
+ **snapshot host** (Stage 4 hard output contract).
7
+
8
+ ## One iteration
9
+
10
+ 1. **Render** the component via its snapshot host in the simulator
11
+ (`sim-screenshot.sh`). Normalize per `calibration.json.transform`
12
+ (crop → resample → P3→sRGB) so it co-registers with the Stage-2
13
+ reference crop for that component (`reference/<screen>/<component>.png`).
14
+ 2. **Diff cascade** (`pixel-diff.mjs`):
15
+ - pHash Hamming is recorded as raw data plus a `phash_fast_candidate`
16
+ boolean (≤ 5). **It is necessary-not-sufficient and NEVER a
17
+ short-circuit to `converged`** — `pixel-diff.mjs` does not decide the
18
+ verdict; `evaluate-convergence.mjs` does, and it always requires the
19
+ region gate to pass regardless of pHash.
20
+ - split component into **text regions** vs **non-text regions** using the
21
+ DOM bbox map:
22
+ - text → score by layout-box **IoU** + resolved **token-color ΔE**
23
+ (foreground/background), **never glyph-raster SSIM** (cross-renderer
24
+ glyph rasters are not comparable — Stage 2.5). IoU is the **real**
25
+ `bboxIoU(refBox, genBox)` only when `--gen-bbox-map` supplies the
26
+ generated bbox; otherwise `iou` is **`null`** (never a fabricated
27
+ 1.0), and a `null` iou counts as a gate **FAIL**.
28
+ - non-text → SSIM + CIEDE2000 + AA-tolerant diff mask.
29
+ - **inter-component spacing delta** is `null` unless generated positions
30
+ are supplied via `--gen-bbox-map` (never fabricated `{top:0,leading:0}`).
31
+ - Inputs MUST be **co-registered** (same dimensions) — `pixel-diff.mjs`
32
+ does not normalize and **hard-errors (exit 1)** on a size mismatch;
33
+ pre-normalize via Stage 2.5 `calibration.transform` first.
34
+ 3. **Feedback payload** to the corrector LLM:
35
+ - reference + generated, both with **identical Set-of-Mark** numbered
36
+ overlays (`mark-overlay.mjs`),
37
+ - red diff-mask overlay on the reference,
38
+ - structured JSON delta (schema below),
39
+ - **two-stage critique**: (a) visual/perceptual NL critique, then (b)
40
+ code-level NL→SwiftUI-patch recommendation (separation of modalities).
41
+ 4. **Patch**: corrector emits a **structured per-file diff** (NOT a
42
+ whole-file rewrite — enables reversion control), constrained to the
43
+ `tokens.json` vocabulary; prior-iteration correction history is injected
44
+ ("you changed X which worsened Y; do not revert").
45
+ 5. **Recompile.** Compile-failure branch: revert working tree to the best
46
+ gate-passing iteration, **consume one iteration**; if the cap is hit with
47
+ no buildable+passing iteration ⇒ `needs-human` (never `converged`).
48
+ 6. **Re-measure** (step 2). Retain best **only among gate-passing
49
+ iterations** (monotone-or-fail: a non-passing run is never presented as a
50
+ result).
51
+
52
+ Cap = `--max-iter` (default 3). Diminishing returns past 3 are documented in
53
+ `../../../.omc/conductor/tracks/h5-to-swiftui/research/findings.md` RQ4.
54
+
55
+ ## The verdict is emitted ONLY by `scripts/evaluate-convergence.mjs`
56
+
57
+ `convergence/<component>.json` is **never hand-written** by the orchestrator.
58
+ The orchestrator runs `pixel-diff.mjs` per iteration, records each
59
+ iteration's `built` flag, gathers masks + the independent-judge result, then
60
+ calls `scripts/evaluate-convergence.mjs`, which **mechanically** decides the
61
+ tier and **exits non-zero on any guard violation** (exit 3 =
62
+ needs-human/guard violation, exit 4 = blocked, exit 0 = converged/close) so a
63
+ pipeline cannot ignore a failed gate. Every guard below is enforced *in that
64
+ script's code*, not by prose. The script — not the caller — chooses
65
+ `best_iteration` and computes `gate_passed` per iteration; caller-supplied
66
+ values for those are ignored.
67
+
68
+ Invocation:
69
+
70
+ ```
71
+ node scripts/evaluate-convergence.mjs \
72
+ --iterations iterations.json \ # [{i,diff_json_path,built}, ...]
73
+ --calibration calibration.json \ # STRUCTURED numeric gate (not string DSL)
74
+ --judge judge.json \ # {negative_control,framing,differences,verdict}
75
+ --masks masks.json \ # [{x,y,w,h,reason}] (reason required)
76
+ --component-area 320x140 \ # mask-fraction denominator
77
+ --component ProductCard \
78
+ --out .h5-to-swiftui/convergence/ProductCard.json
79
+ ```
80
+
81
+ ## Trust model & residual (honest disclosure — spec §1.1)
82
+
83
+ The skill's own thesis (§1.1) is *report the residual, do not pretend it is
84
+ zero*. Applied to the grader's **own** trust model:
85
+
86
+ **Mechanically bound (fail-closed, in `evaluate-convergence.mjs` code):**
87
+
88
+ - the structured `gate` is **recomputed from `calib.floor`** and a deviating
89
+ gate is rejected (`gate-floor-mismatch`, exit 1) — this binds the *gate to
90
+ the floor* (a hand-loosened gate is rejected unless the floor is loosened);
91
+ it does not bind the floor's *value*;
92
+ - the **identity of the bundled twin source files** (excluding build
93
+ output/dotfiles) is bound via `calibration_source` source-tree hashes
94
+ recomputed from the actual `assets/calibration/{h5-twin,swiftui-twin}`
95
+ (`calibration-twin-mismatch`, exit 1) — this binds the twin *source
96
+ identity*, NOT the measured `floor` *value* (the real public twin hashes
97
+ can be copied alongside a loose floor);
98
+ - the **`floor` value** is asserted to satisfy `calibrate-render.mjs`'s OWN
99
+ sanity envelope via the shared `scripts/_calib-consts.mjs`
100
+ (`ssim_nontext ≥ 0.95`, non-null `text_iou ≥ 0.9`, metric-valid
101
+ `deltaE_p95`); a floor calibrate-render could not have emitted (it writes
102
+ `blocked.json` below this) is rejected `floor-implausible`, exit 1 — this
103
+ kills the absurd-floor attack but does NOT re-measure the floor;
104
+ - the judge negative control is bound to the **shipped, hash-pinned**
105
+ `assets/calibration/swiftui-twin-divergent` source files (structured +
106
+ rejected + framed; `negative-control-unbound` VOIDs any `YES`, exit 3);
107
+ - pHash never short-circuits; a `null` text IoU is a FAIL; best-of-N is
108
+ monotone-or-fail; the mask budget is verified or the run refuses to pass;
109
+ `calibration.json.schema` is asserted.
110
+
111
+ **Named, irreducible residuals (NOT zero — BOTH stated, neither hidden):** a
112
+ fully zero-trust verdict is impossible because *something must run the
113
+ renders and something must measure the floor*.
114
+
115
+ 1. **The grader cannot re-execute the simulator renders.** It trusts the
116
+ per-iteration `pixel-diff.mjs` JSONs were produced by running the **real
117
+ `pixel-diff.mjs` on real `sim-screenshot.sh` renders** (bounded by
118
+ `sim-screenshot.sh`'s no-fake build/env spine — no simulator / no build ⇒
119
+ `blocked`/`needs-human`, never converged).
120
+ 2. **The grader cannot re-measure the calibration floor.** It asserts the
121
+ supplied `floor` satisfies `calibrate-render.mjs`'s own sanity envelope
122
+ and recomputes the gate from it, but a `floor` *within* that envelope yet
123
+ looser than the TRUE measured floor is trusted (it cannot re-render the
124
+ bundled twins to re-derive the real number). Mitigated by the
125
+ orchestrator's contractual obligation to run the real,
126
+ sanity/flat-image-spined `calibrate-render.mjs` and by the human-readable
127
+ `calibration_provenance` recorded in the convergence artifact.
128
+
129
+ These are the deliberate, documented boundaries: the deliverable is
130
+ **"maximally provenance-bound + honestly disclosed residual"**, explicitly
131
+ *not* "zero-trust". The **whole-assembled-screen trend check remains a
132
+ Stage-7 manual cross-check**, not an automated Stage-5 guard (unchanged;
133
+ still honestly scoped).
134
+
135
+ ## Tiered verdict (honest)
136
+
137
+ `evaluate-convergence.mjs` reads the **structured** `calibration.json.gate`
138
+ (numeric `gate.converged` / `gate.close` objects — a legacy string-DSL gate
139
+ is rejected as un-enforceable) and evaluates it against each iteration's
140
+ `pixel-diff.mjs` output:
141
+
142
+ - **`converged`** — the *script's* chosen best iteration (built AND
143
+ gate-passing) passes the converged gate vs the *measured* floor AND the
144
+ independent judge `YES` with a valid negative control.
145
+ - **`close`** — structural metrics within the `close` band AND judge returns
146
+ `visually-equivalent-residual-subperceptual`. This is an **honest accept**,
147
+ explicitly distinct from faking — "as good as this renderer pair allows".
148
+ - **`needs-human`** — anything else; recorded with full evidence + the
149
+ machine-readable guard reason. **`blocked`** — a `blocked.json` is present
150
+ for the component (never converged).
151
+
152
+ ## Independent judge (anti-collusion)
153
+
154
+ - Separate sub-agent, separate lane (OMC: never self-approve in same
155
+ context). Dispatch via `Agent` (use `verifier` or `qa-tester`), NOT the
156
+ corrector.
157
+ - **Negative control (BOUND to the shipped divergent twin)**: before its
158
+ verdict is trusted, feed the judge the known-divergent pair
159
+ `h5-twin` vs the bundled `assets/calibration/swiftui-twin-divergent`; it
160
+ MUST return a rejection. This is **not** a free `"passed"` string — the old
161
+ bare form is rejected as unbound (it asserts a judge run nothing verified).
162
+ `judge.negative_control` MUST be the structured artifact below, and
163
+ `scripts/evaluate-convergence.mjs` recomputes the divergent twin's
164
+ source-tree hash from the shipped asset and fails closed
165
+ (`negative-control-unbound`, any `YES` VOID, exit 3) unless every field
166
+ matches:
167
+
168
+ ```json
169
+ "negative_control": {
170
+ "stimulus_source_hash": "<sha256 source-tree hash of the bundled assets/calibration/swiftui-twin-divergent>",
171
+ "divergent_pair": "h5-twin vs swiftui-twin-divergent",
172
+ "rejected": true,
173
+ "differences": [ { "desc": "green bg vs light", "severity": "major" }, … ]
174
+ }
175
+ ```
176
+
177
+ `stimulus_source_hash` ≠ the recomputed bundled divergent hash, OR
178
+ `rejected !== true`, OR an empty/unstructured `differences`, OR
179
+ `framing !== "forced-difference-3"` ⇒ the negative control is **unbound**,
180
+ any `YES` is **VOID**, tier forced `needs-human`, exit non-zero
181
+ (`reason: negative-control-unbound`). This ties "the judge really rejected
182
+ the *known-divergent* pair" to the shipped, hash-pinned asset rather than a
183
+ free string.
184
+ - **Adversarial framing**: prompt = *"enumerate the 3 most significant
185
+ visual differences between A and B and rate each severity"*, never
186
+ "do these match?". A `converged` verdict requires all 3 to be
187
+ sub-perceptual / cross-renderer-irreducible. `judge.framing` MUST be
188
+ `"forced-difference-3"` (enforced; absence/mismatch VOIDs `YES`).
189
+
190
+ ## Anti-gaming guards
191
+
192
+ **Enforced in code by `scripts/evaluate-convergence.mjs`** (violation ⇒ tier
193
+ downgrade + non-zero exit + a machine-readable reason — never a silent pass):
194
+
195
+ - **Mask budget**: `sum(mask area) / component area` must be ≤ 0.10; every
196
+ mask must carry a non-empty `reason`. Over budget or a reason-less mask ⇒
197
+ forced `needs-human`, exit 3. If masks are supplied but the component area
198
+ is unknown, the script **refuses to proceed (exit 1)** rather than silently
199
+ pass an unverifiable budget.
200
+ - **Gate eval in code**: the structured `calibration.json` thresholds are
201
+ evaluated per iteration to produce `gate_passed` (the caller cannot supply
202
+ it). A text-region `iou` of `null` is a **FAIL**. pHash never short-circuits.
203
+ - **Monotone-or-fail best-of-N**: `best_iteration` is the script's choice
204
+ among iterations that are **both** `built==true` **and** `gate_passed==true`;
205
+ if none qualify ⇒ `needs-human` (recorded). The caller's `best_iteration`
206
+ is ignored.
207
+ - **Negative-control voids judge (bound to the shipped divergent twin)**:
208
+ `judge.negative_control` MUST be the structured object
209
+ `{stimulus_source_hash, divergent_pair, rejected, differences}` whose
210
+ `stimulus_source_hash` equals the **source-tree hash of the shipped
211
+ `assets/calibration/swiftui-twin-divergent`** recomputed by the grader,
212
+ with `rejected === true`, a non-empty structured `differences`, and
213
+ `judge.framing === "forced-difference-3"`. The legacy bare
214
+ `negative_control:"passed"` string is **rejected as unbound**. Any
215
+ deviation ⇒ any `verdict:"YES"` is **VOID** ⇒ cannot be `converged`
216
+ (downgraded to `needs-human`, recorded `negative_control: failed`,
217
+ `reason: negative-control-unbound`, exit 3).
218
+ - **Calibration provenance (gate ⇐ floor; twin source identity; floor ⇐
219
+ sanity envelope)**: the grader asserts
220
+ `calibration.json.schema == "h5-to-swiftui/calibration@1"`; **recomputes**
221
+ the structured gate from `floor` and rejects a deviating `gate`
222
+ (`gate-floor-mismatch`, exit 1 — binds the gate TO the floor); recomputes
223
+ the bundled `h5-twin`/`swiftui-twin` **source-file** tree hashes from the
224
+ shipped assets and rejects a mismatching `calibration_source`
225
+ (`calibration-twin-mismatch`, exit 1 — binds the twin source IDENTITY, not
226
+ the floor value); and asserts the `floor` *value* satisfies
227
+ `calibrate-render.mjs`'s own sanity envelope via the shared
228
+ `scripts/_calib-consts.mjs` (`floor-implausible`, exit 1 — a floor
229
+ calibrate-render could not have emitted is rejected, killing the
230
+ absurd-floor attack). **Residual:** a `floor` *within* that envelope yet
231
+ looser than the true measured floor is trusted — the grader cannot
232
+ re-render the bundled twins to re-measure it (named residual (2) under
233
+ "Trust model & residual").
234
+ - **Build accounting**: a present `blocked.json` for the component, or no
235
+ built+gate-passing iteration ⇒ `blocked`/`needs-human`, **never**
236
+ `converged`.
237
+
238
+ **Enforced elsewhere / by process (NOT by `evaluate-convergence.mjs`):**
239
+
240
+ - **Idiomatic lint** (Stage 4): a component whose layout is predominantly
241
+ `.position()`/`.offset()` absolute pinning is rejected at rewrite time —
242
+ pixel-pushed WebView-in-SwiftUI, not a native rewrite.
243
+ - **Whole-assembled-screen trend check — Stage-7 manual cross-check, NOT an
244
+ automated Stage-5 guard.** No executable component diffs the assembled
245
+ screen during the loop, so it is **not** advertised as active automation.
246
+ After assembly (Stage 7) a human / separate verification pass compares the
247
+ assembled-screen capture vs the reference so per-component `converged`
248
+ cannot mask a broken composition. This is a documented **known
249
+ limitation**: until that Stage-7 check runs, per-component verdicts are
250
+ authoritative only at component granularity.
251
+ - **Determinism**: artifact header pins sim/browser/model/seed (passed
252
+ through from `calibration.json`); the dry-run runs twice and must yield the
253
+ **same verdict** (not same pixels).
254
+
255
+ ## `pixel-diff.mjs` output schema (`h5-to-swiftui/diff@1`)
256
+
257
+ `phash_hamming` is raw data; `phash_fast_candidate` is necessary-not-
258
+ sufficient (NOT a converged signal — there is **no** `phash_converged`
259
+ field). `iou` is `null` unless `--gen-bbox-map` supplied the generated bbox
260
+ (a `null` iou is a gate FAIL, never an assumed 1.0).
261
+ `inter_component_spacing_delta_pt` is `null` (with an explanatory
262
+ `inter_component_spacing_delta_note`) when generated positions are unknown —
263
+ never fabricated zeros.
264
+
265
+ ```json
266
+ {
267
+ "schema": "h5-to-swiftui/diff@1",
268
+ "component": "ProductCard",
269
+ "phash_hamming": 7,
270
+ "phash_fast_candidate": false,
271
+ "regions": {
272
+ "text": [{"mark": 2, "iou": 0.95, "fg_deltaE": 1.2, "bg_deltaE": 0.6}],
273
+ "nontext": [{"mark": 1, "ssim": 0.991, "deltaE_p95": 1.4}]
274
+ },
275
+ "inter_component_spacing_delta_pt": null,
276
+ "inter_component_spacing_delta_note": "null: generated component positions unknown (--gen-bbox-map absent); reporting null, NOT zeros",
277
+ "diff_mask_png": ".h5-to-swiftui/diff/ProductCard.iter2.mask.png",
278
+ "global_ssim": 0.984,
279
+ "diff_pixel_fraction": 0.0142
280
+ }
281
+ ```
282
+
283
+ When no generated bbox is supplied, a text region is instead:
284
+ `{"mark": 2, "iou": null, "iou_note": "no generated bbox supplied (--gen-bbox-map absent or no match) — IoU is null, NOT assumed 1.0", "fg_deltaE": 1.2, "bg_deltaE": 0.6}`.
285
+
286
+ ## `convergence/<component>.json` schema (`h5-to-swiftui/convergence@1`)
287
+
288
+ Emitted **only** by `scripts/evaluate-convergence.mjs` (pinned-version header
289
+ passed through from `calibration.json`):
290
+
291
+ ```json
292
+ {
293
+ "schema": "h5-to-swiftui/convergence@1",
294
+ "component": "ProductCard",
295
+ "pinned": { "sim_runtime": "...", "browser": "...", "model_id": "...", "temperature": 0 },
296
+ "calibration_floor": { "ssim_nontext": 0.991, "deltaE_p95": 1.6, "text_iou": 0.94 },
297
+ "gate": {
298
+ "converged": {"ssim_nontext_min": 0.986, "deltaE_p95_max": 2.0, "text_iou_min": 0.91, "require_judge_yes": true},
299
+ "close": {"ssim_nontext_min": 0.981, "deltaE_p95_max": 2.4, "text_iou_min": 0.88, "require_judge_equiv": true}
300
+ },
301
+ "iterations": [
302
+ {"i": 1, "diff": {"global_ssim": 0.90, "phash_hamming": 20, "diff_pixel_fraction": 0.2},
303
+ "built": true, "gate_passed": false, "close_band_passed": false,
304
+ "phash_fast_candidate": false, "gate_detail": "nontext mark 1 ssim 0.9 < 0.986"},
305
+ {"i": 2, "diff": {"global_ssim": 0.993, "phash_hamming": 3, "diff_pixel_fraction": 0.02},
306
+ "built": true, "gate_passed": true, "close_band_passed": true,
307
+ "phash_fast_candidate": true, "gate_detail": "all sub-gates passed"},
308
+ {"i": 3, "diff": {"global_ssim": 0.999, "phash_hamming": 2, "diff_pixel_fraction": 0.001},
309
+ "built": false, "gate_passed": false, "close_band_passed": false,
310
+ "phash_fast_candidate": true, "gate_detail": "all sub-gates passed"}
311
+ ],
312
+ "masks": [{"x":0,"y":0,"w":12,"h":12,"reason":"live timestamp"}],
313
+ "mask_fraction": 0.02,
314
+ "mask_budget": 0.10,
315
+ "component_area_px": 44800,
316
+ "component_area_source": "flag 320x140",
317
+ "judge": {
318
+ "negative_control": "passed",
319
+ "negative_control_binding": {
320
+ "expected_divergent_source_sha256": "…64-hex (recomputed from shipped swiftui-twin-divergent)…",
321
+ "claimed_stimulus_source_hash": "…64-hex (from judge.json)…",
322
+ "rejected": true,
323
+ "bound": true,
324
+ "reasons": []
325
+ },
326
+ "framing": "forced-difference-3",
327
+ "differences": [
328
+ {"desc": "1px baseline shift on price label", "severity": "sub-perceptual"},
329
+ {"desc": "shadow blur 0.5pt softer", "severity": "sub-perceptual"},
330
+ {"desc": "—", "severity": "none"}
331
+ ],
332
+ "verdict": "YES",
333
+ "verdict_honored": true
334
+ },
335
+ "calibration_provenance": {
336
+ "schema_ok": true,
337
+ "gate_recomputed_from_floor": true,
338
+ "calibration_source": {
339
+ "h5_twin_source_sha256": "…64-hex…",
340
+ "swiftui_twin_source_sha256": "…64-hex…",
341
+ "verified_against_bundled": true
342
+ }
343
+ },
344
+ "guard_violations": [],
345
+ "best_iteration": 2,
346
+ "tier": "converged",
347
+ "tier_reason": "gate_passed(best i=2) AND judge YES with valid negative control",
348
+ "residual": {"ssim_nontext": 0.993, "deltaE_p95": 1.4, "text_iou": 0.96},
349
+ "evaluated_at": "ISO8601"
350
+ }
351
+ ```
352
+
353
+ `best_iteration` is the script's choice (built AND gate-passing only); a
354
+ non-empty `guard_violations` array and/or a `tier` of `needs-human`/`blocked`
355
+ corresponds to a non-zero process exit (3 / 4) so the verdict cannot be
356
+ silently ignored.
357
+
358
+ ## Stop conditions
359
+
360
+ - `converged` or `close` reached ⇒ accept best gate-passing iteration.
361
+ - Cap hit, no gate-passing+buildable iteration ⇒ `needs-human`.
362
+ - Mask budget exceeded / negative control failed / idiomatic-lint failed ⇒
363
+ `needs-human` regardless of metrics.
364
+ - No simulator or persistent build failure ⇒ `blocked` (Stage 5 skipped for
365
+ that component; counted as `needs-human` in the summary, never success).