claudecode-omc 5.6.6 → 5.6.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.local/skills/h5-to-swiftui/SKILL.md +201 -0
- package/.local/skills/h5-to-swiftui/assets/calibration/README.md +176 -0
- package/.local/skills/h5-to-swiftui/assets/calibration/h5-twin/index.html +52 -0
- package/.local/skills/h5-to-swiftui/assets/calibration/h5-twin/style.css +133 -0
- package/.local/skills/h5-to-swiftui/assets/calibration/swiftui-twin/Package.swift +26 -0
- package/.local/skills/h5-to-swiftui/assets/calibration/swiftui-twin/Sources/CalibrationScreen/CalibrationScreen.swift +142 -0
- package/.local/skills/h5-to-swiftui/assets/calibration/swiftui-twin-divergent/Package.swift +32 -0
- package/.local/skills/h5-to-swiftui/assets/calibration/swiftui-twin-divergent/Sources/CalibrationScreenDivergent/CalibrationScreenDivergent.swift +122 -0
- package/.local/skills/h5-to-swiftui/assets/calibration/tokens.json +42 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/index.html +14 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/package.json +20 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/public/api/articles/001.json +96 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/public/api/articles/index.json +89 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/App.jsx +22 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/App.module.css +11 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/components/ArticleCard.jsx +53 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/components/ArticleCard.module.css +139 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/components/NavBar.jsx +37 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/components/NavBar.module.css +72 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/components/TagCloud.jsx +30 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/components/TagCloud.module.css +50 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/components/TrendChart.jsx +159 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/components/TrendChart.module.css +21 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/main.jsx +12 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/screens/ArticleScreen.jsx +182 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/screens/ArticleScreen.module.css +294 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/screens/FeedScreen.jsx +147 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/screens/FeedScreen.module.css +161 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/styles/global.css +50 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/src/styles/tokens.css +103 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-react/vite.config.js +6 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-vanilla/data/tasks.js +67 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-vanilla/index.html +26 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-vanilla/router.js +73 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-vanilla/screens/detail.js +164 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-vanilla/screens/home.js +53 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-vanilla/screens/list.js +87 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-vanilla/styles/app.css +342 -0
- package/.local/skills/h5-to-swiftui/assets/sample-h5-vanilla/styles/tokens.css +68 -0
- package/.local/skills/h5-to-swiftui/references/css-to-swiftui-map.md +205 -0
- package/.local/skills/h5-to-swiftui/references/design-token-extraction.md +209 -0
- package/.local/skills/h5-to-swiftui/references/high-risk-triage.md +209 -0
- package/.local/skills/h5-to-swiftui/references/render-equivalence-calibration.md +193 -0
- package/.local/skills/h5-to-swiftui/references/stack-detection.md +160 -0
- package/.local/skills/h5-to-swiftui/references/visual-diff-loop-protocol.md +365 -0
- package/.local/skills/h5-to-swiftui/scripts/_calib-consts.mjs +150 -0
- package/.local/skills/h5-to-swiftui/scripts/_imglib.mjs +547 -0
- package/.local/skills/h5-to-swiftui/scripts/_provenance.mjs +123 -0
- package/.local/skills/h5-to-swiftui/scripts/calibrate-render.mjs +625 -0
- package/.local/skills/h5-to-swiftui/scripts/capture-reference.mjs +386 -0
- package/.local/skills/h5-to-swiftui/scripts/detect-stack.mjs +305 -0
- package/.local/skills/h5-to-swiftui/scripts/evaluate-convergence.mjs +1093 -0
- package/.local/skills/h5-to-swiftui/scripts/extract-tokens.mjs +600 -0
- package/.local/skills/h5-to-swiftui/scripts/mark-overlay.mjs +379 -0
- package/.local/skills/h5-to-swiftui/scripts/pixel-diff.mjs +530 -0
- package/.local/skills/h5-to-swiftui/scripts/sim-screenshot.sh +544 -0
- package/bundled/manifest.json +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# Stack Detection — Stage 0
|
|
2
|
+
|
|
3
|
+
Used by `scripts/detect-stack.mjs`. Runs **before any other stage**. Determines
|
|
4
|
+
whether the project is in v1 scope; out-of-scope projects stop here with an
|
|
5
|
+
explicit written report — they are never guessed past.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Why native rewrite beats WebView shell and mechanical transpilation
|
|
10
|
+
|
|
11
|
+
Condensed from findings.md RQ1 (sources: kean.blog; Apple "Composing custom
|
|
12
|
+
layouts" WWDC22-10056; MDN Specificity; LogRocket / Yoga 3.0; dbushell; bjango;
|
|
13
|
+
fatbobman; MDN animations; tonsky.me; Android→iOS pilot 2507.16037).
|
|
14
|
+
|
|
15
|
+
| Route | Fidelity ceiling | Core failure mechanism |
|
|
16
|
+
|---|---|---|
|
|
17
|
+
| WebView shell (Capacitor / Cordova / Ionic) | ~70–80% | Renders in WebKit, not Core Animation/Metal; ProMotion 120 Hz unavailable to WebKit-rendered content; Dynamic Type needs full reload; rubber-band scroll constant diverges; haptics absent; 2 extra OS processes per instance |
|
|
18
|
+
| React Native / NativeScript | ~85–92% | Yoga bridge drops 5 CSS flex properties (`flex-basis`, `flex` shorthand, `wrap-reverse`, `order`, column/row-gap individually); bridge latency; not SwiftUI semantics |
|
|
19
|
+
| Flutter (Impeller) | ~80–85% | Own GPU renderer independent of UIKit; iOS conventions require manual Cupertino override throughout |
|
|
20
|
+
| Mechanical transpilation (DOM→view tree) | **<60%** | Six structural incompatibilities listed below — errors compound per nesting level |
|
|
21
|
+
| **Native rewrite** (this skill) | **100% platform ceiling** | Requires a render-measure-feedback loop; see `visual-diff-loop-protocol.md` |
|
|
22
|
+
|
|
23
|
+
### Six reasons mechanical transpilation cannot reach pixel-level
|
|
24
|
+
|
|
25
|
+
1. **Inverted layout protocol.** CSS: parent establishes a containing block;
|
|
26
|
+
child size = content + padding + border + margin. SwiftUI: parent _proposes_
|
|
27
|
+
a size; child _chooses_; parent must honor it. The information-flow direction
|
|
28
|
+
is reversed. Reconstructing it from CSS is underdetermined for nested
|
|
29
|
+
layouts. (kean.blog; Apple "Composing custom layouts".)
|
|
30
|
+
|
|
31
|
+
2. **No cascade.** CSS resolves styles by global selector specificity +
|
|
32
|
+
inheritance + `!important`. SwiftUI styling is local modifier-chain order +
|
|
33
|
+
downward `environment`. A transpiler needs a full cascade engine; any cascade
|
|
34
|
+
error is a visible defect. (MDN Specificity.)
|
|
35
|
+
|
|
36
|
+
3. **No 1:1 flex mapping.** Yoga (the most mature CSS-flex bridge) drops
|
|
37
|
+
`flex-basis`, the `flex` shorthand, `wrap-reverse`, `order`, and individual
|
|
38
|
+
`row-gap`/`column-gap`. Errors compound per nesting level. (LogRocket /
|
|
39
|
+
Yoga 3.0.)
|
|
40
|
+
|
|
41
|
+
4. **Font metric divergence.** WebKit implements `line-height: normal ≈ 1.2`
|
|
42
|
+
against the CSS font spec; SwiftUI uses CoreText metrics directly. Identical
|
|
43
|
+
`16px` declarations yield different line height, advance width, and
|
|
44
|
+
baseline — different wrap column — all subsequent layout shifts. (dbushell;
|
|
45
|
+
bjango.)
|
|
46
|
+
|
|
47
|
+
5. **Animation model mismatch.** CSS = timeline keyframes on absolute time;
|
|
48
|
+
SwiftUI = state-diff interpolation on Core Animation. Mechanical
|
|
49
|
+
`@keyframes` → `.animation()` diverges in timing, easing, and
|
|
50
|
+
interruptibility. (fatbobman; MDN animations.)
|
|
51
|
+
|
|
52
|
+
6. **Non-deterministic adaptive spacing.** SwiftUI inserts type-dependent
|
|
53
|
+
spacing and context-sensitive control styling with no CSS source to
|
|
54
|
+
translate from. (tonsky.me.)
|
|
55
|
+
|
|
56
|
+
**Takeaway:** native rewrite is the only route to the 100% ceiling; transpilation
|
|
57
|
+
is at best a draft generator; a render-diff feedback loop is mandatory because
|
|
58
|
+
these divergences are invisible to static code diffing.
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Detection heuristics table
|
|
63
|
+
|
|
64
|
+
`detect-stack.mjs` reads `package.json` + a shallow source scan (`.js/.ts/.jsx/
|
|
65
|
+
.tsx/.vue/.svelte` in `src/` or root, max 500 files). No network calls.
|
|
66
|
+
|
|
67
|
+
### Framework detection
|
|
68
|
+
|
|
69
|
+
| Framework | `package.json` deps (any of) | Source signatures |
|
|
70
|
+
|---|---|---|
|
|
71
|
+
| React | `react`, `react-dom`, `@types/react` | `import React`, `JSX.Element`, `.jsx`/`.tsx` extensions, `ReactDOM.render`, `createRoot` |
|
|
72
|
+
| Vue | `vue`, `@vue/core`, `nuxt` | `.vue` files, `<template>` + `<script setup>`, `createApp` |
|
|
73
|
+
| Svelte | `svelte`, `@sveltejs/kit` | `.svelte` files, `<script>` + `<style>` co-located, `$:` reactivity |
|
|
74
|
+
| Angular | `@angular/core`, `@angular/common` | `@Component`, `@NgModule`, `.component.ts` suffix pattern |
|
|
75
|
+
| Solid | `solid-js`, `@solidjs/router` | `createSignal`, `createEffect`, `.jsx` with no `react` dep |
|
|
76
|
+
| Vanilla | none of the above in deps | Bare `addEventListener`, no framework imports, `.js`/`.ts` only |
|
|
77
|
+
|
|
78
|
+
Confidence scoring: `high` = dep present + source signature found; `medium` =
|
|
79
|
+
dep only (source scan inconclusive); `low` = source signature only (dep absent,
|
|
80
|
+
e.g. CDN-loaded). At `low` confidence, `detect-stack.mjs` logs a warning and
|
|
81
|
+
continues; `in_v1_scope` reflects the most-likely classification.
|
|
82
|
+
|
|
83
|
+
### Build tool detection
|
|
84
|
+
|
|
85
|
+
| Build tool | Primary signal | Secondary signal |
|
|
86
|
+
|---|---|---|
|
|
87
|
+
| Vite | `vite` in deps or devDeps; `vite.config.{js,ts}` present | `"dev": "vite"` in scripts |
|
|
88
|
+
| webpack | `webpack` in deps; `webpack.config.{js,cjs,mjs}` present | `"build": "webpack"` in scripts |
|
|
89
|
+
| Next.js | `next` in deps; `next.config.{js,mjs,ts}` present | `pages/` or `app/` dir with `page.{tsx,jsx}` |
|
|
90
|
+
| Nuxt | `nuxt` in deps; `nuxt.config.{ts,js}` present | `pages/` dir with `.vue` files |
|
|
91
|
+
| CRA | `react-scripts` in deps | `"start": "react-scripts start"` in scripts |
|
|
92
|
+
| None | none of the above | Single `index.html` + inline `<script>` or `<script type=module>` |
|
|
93
|
+
|
|
94
|
+
### Styling detection
|
|
95
|
+
|
|
96
|
+
| Styling system | Detection signal |
|
|
97
|
+
|---|---|
|
|
98
|
+
| Tailwind v3 | `tailwindcss@^3` in deps; `tailwind.config.{js,ts}` present; class names like `text-sm`, `flex`, `bg-gray-100` in source |
|
|
99
|
+
| Tailwind v4 | `tailwindcss@^4` in deps; `@import "tailwindcss"` or `@theme` block in a `.css` file; no `tailwind.config.*` |
|
|
100
|
+
| CSS Modules | `.module.css` / `.module.scss` files present; `import styles from '…module.css'` in source |
|
|
101
|
+
| Sass / SCSS | `.scss` or `.sass` files; `sass` or `node-sass` in deps |
|
|
102
|
+
| CSS-in-JS | `styled-components`, `@emotion/react`, `@emotion/styled`, `@stitches/react`, or `linaria` in deps |
|
|
103
|
+
| Plain CSS | None of the above; `.css` files imported directly |
|
|
104
|
+
|
|
105
|
+
### Router detection
|
|
106
|
+
|
|
107
|
+
| Router | Detection signal |
|
|
108
|
+
|---|---|
|
|
109
|
+
| React Router | `react-router-dom` or `react-router` in deps |
|
|
110
|
+
| TanStack Router | `@tanstack/react-router` in deps |
|
|
111
|
+
| Next.js router | Next.js detected (built-in) |
|
|
112
|
+
| Vue Router | `vue-router` in deps |
|
|
113
|
+
| Wouter | `wouter` in deps |
|
|
114
|
+
| None | Single-page with no router dep |
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
## v1 scope gate
|
|
119
|
+
|
|
120
|
+
**v1 supports: framework ∈ {vanilla, React}.**
|
|
121
|
+
|
|
122
|
+
All other detected frameworks (Vue, Svelte, Angular, Solid) are **out of v1 scope**.
|
|
123
|
+
`detect-stack.mjs` writes `stack-report.json` and **exits with code 2** (not 0,
|
|
124
|
+
not 1 — pipeline scripts detect this as a scope-stop, not an error).
|
|
125
|
+
|
|
126
|
+
The pipeline does **not** attempt to convert an out-of-scope project. It does not
|
|
127
|
+
guess. It does not warn and continue. It stops cleanly so the caller knows exactly
|
|
128
|
+
what happened.
|
|
129
|
+
|
|
130
|
+
### `stack-report.json` schema
|
|
131
|
+
|
|
132
|
+
```json
|
|
133
|
+
{
|
|
134
|
+
"schema": "h5-to-swiftui/stack-report@1",
|
|
135
|
+
"framework": "vue",
|
|
136
|
+
"buildTool": "vite",
|
|
137
|
+
"styling": ["css-modules", "tailwind-v3"],
|
|
138
|
+
"router": "vue-router",
|
|
139
|
+
"confidence": "high",
|
|
140
|
+
"in_v1_scope": false,
|
|
141
|
+
"stop_reason": "detected=vue, out of v1 scope; v1 supports vanilla|React only",
|
|
142
|
+
"detected_at": "2026-05-19T12:00:00Z"
|
|
143
|
+
}
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
| Field | Type | Notes |
|
|
147
|
+
|---|---|---|
|
|
148
|
+
| `framework` | string | `vanilla` \| `react` \| `vue` \| `svelte` \| `angular` \| `solid` \| `unknown` |
|
|
149
|
+
| `buildTool` | string | `vite` \| `webpack` \| `next` \| `nuxt` \| `cra` \| `none` \| `unknown` |
|
|
150
|
+
| `styling` | string[] | Array; a project may use multiple (e.g. `["tailwind-v3","css-modules"]`) |
|
|
151
|
+
| `router` | string | Primary detected router or `none` |
|
|
152
|
+
| `confidence` | string | `high` \| `medium` \| `low` |
|
|
153
|
+
| `in_v1_scope` | boolean | `true` only when `framework` is `vanilla` or `react` |
|
|
154
|
+
| `stop_reason` | string | Present only when `in_v1_scope: false` |
|
|
155
|
+
| `detected_at` | string | ISO-8601 timestamp |
|
|
156
|
+
|
|
157
|
+
When `in_v1_scope: true`, the pipeline proceeds to Stage 1. The `stack-report.json`
|
|
158
|
+
is retained as a Stage 0 artifact and its fields inform Stage 1 (token extraction
|
|
159
|
+
strategy differs for plain CSS vs Tailwind v4 vs CSS-in-JS) and Stage 4 (component
|
|
160
|
+
pattern differs for class components vs function+hooks vs vanilla DOM).
|
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
# Stage 5 — Visual-Diff Convergence Loop Protocol
|
|
2
|
+
|
|
3
|
+
The core mechanism. Operates **per component** (component granularity is the
|
|
4
|
+
primary anti-oscillation defense — a fix to one component cannot break
|
|
5
|
+
another). Consumes `calibration.json` (Stage 2.5) and the per-component
|
|
6
|
+
**snapshot host** (Stage 4 hard output contract).
|
|
7
|
+
|
|
8
|
+
## One iteration
|
|
9
|
+
|
|
10
|
+
1. **Render** the component via its snapshot host in the simulator
|
|
11
|
+
(`sim-screenshot.sh`). Normalize per `calibration.json.transform`
|
|
12
|
+
(crop → resample → P3→sRGB) so it co-registers with the Stage-2
|
|
13
|
+
reference crop for that component (`reference/<screen>/<component>.png`).
|
|
14
|
+
2. **Diff cascade** (`pixel-diff.mjs`):
|
|
15
|
+
- pHash Hamming is recorded as raw data plus a `phash_fast_candidate`
|
|
16
|
+
boolean (≤ 5). **It is necessary-not-sufficient and NEVER a
|
|
17
|
+
short-circuit to `converged`** — `pixel-diff.mjs` does not decide the
|
|
18
|
+
verdict; `evaluate-convergence.mjs` does, and it always requires the
|
|
19
|
+
region gate to pass regardless of pHash.
|
|
20
|
+
- split component into **text regions** vs **non-text regions** using the
|
|
21
|
+
DOM bbox map:
|
|
22
|
+
- text → score by layout-box **IoU** + resolved **token-color ΔE**
|
|
23
|
+
(foreground/background), **never glyph-raster SSIM** (cross-renderer
|
|
24
|
+
glyph rasters are not comparable — Stage 2.5). IoU is the **real**
|
|
25
|
+
`bboxIoU(refBox, genBox)` only when `--gen-bbox-map` supplies the
|
|
26
|
+
generated bbox; otherwise `iou` is **`null`** (never a fabricated
|
|
27
|
+
1.0), and a `null` iou counts as a gate **FAIL**.
|
|
28
|
+
- non-text → SSIM + CIEDE2000 + AA-tolerant diff mask.
|
|
29
|
+
- **inter-component spacing delta** is `null` unless generated positions
|
|
30
|
+
are supplied via `--gen-bbox-map` (never fabricated `{top:0,leading:0}`).
|
|
31
|
+
- Inputs MUST be **co-registered** (same dimensions) — `pixel-diff.mjs`
|
|
32
|
+
does not normalize and **hard-errors (exit 1)** on a size mismatch;
|
|
33
|
+
pre-normalize via Stage 2.5 `calibration.transform` first.
|
|
34
|
+
3. **Feedback payload** to the corrector LLM:
|
|
35
|
+
- reference + generated, both with **identical Set-of-Mark** numbered
|
|
36
|
+
overlays (`mark-overlay.mjs`),
|
|
37
|
+
- red diff-mask overlay on the reference,
|
|
38
|
+
- structured JSON delta (schema below),
|
|
39
|
+
- **two-stage critique**: (a) visual/perceptual NL critique, then (b)
|
|
40
|
+
code-level NL→SwiftUI-patch recommendation (separation of modalities).
|
|
41
|
+
4. **Patch**: corrector emits a **structured per-file diff** (NOT a
|
|
42
|
+
whole-file rewrite — enables reversion control), constrained to the
|
|
43
|
+
`tokens.json` vocabulary; prior-iteration correction history is injected
|
|
44
|
+
("you changed X which worsened Y; do not revert").
|
|
45
|
+
5. **Recompile.** Compile-failure branch: revert working tree to the best
|
|
46
|
+
gate-passing iteration, **consume one iteration**; if the cap is hit with
|
|
47
|
+
no buildable+passing iteration ⇒ `needs-human` (never `converged`).
|
|
48
|
+
6. **Re-measure** (step 2). Retain best **only among gate-passing
|
|
49
|
+
iterations** (monotone-or-fail: a non-passing run is never presented as a
|
|
50
|
+
result).
|
|
51
|
+
|
|
52
|
+
Cap = `--max-iter` (default 3). Diminishing returns past 3 are documented in
|
|
53
|
+
`../../../.omc/conductor/tracks/h5-to-swiftui/research/findings.md` RQ4.
|
|
54
|
+
|
|
55
|
+
## The verdict is emitted ONLY by `scripts/evaluate-convergence.mjs`
|
|
56
|
+
|
|
57
|
+
`convergence/<component>.json` is **never hand-written** by the orchestrator.
|
|
58
|
+
The orchestrator runs `pixel-diff.mjs` per iteration, records each
|
|
59
|
+
iteration's `built` flag, gathers masks + the independent-judge result, then
|
|
60
|
+
calls `scripts/evaluate-convergence.mjs`, which **mechanically** decides the
|
|
61
|
+
tier and **exits non-zero on any guard violation** (exit 3 =
|
|
62
|
+
needs-human/guard violation, exit 4 = blocked, exit 0 = converged/close) so a
|
|
63
|
+
pipeline cannot ignore a failed gate. Every guard below is enforced *in that
|
|
64
|
+
script's code*, not by prose. The script — not the caller — chooses
|
|
65
|
+
`best_iteration` and computes `gate_passed` per iteration; caller-supplied
|
|
66
|
+
values for those are ignored.
|
|
67
|
+
|
|
68
|
+
Invocation:
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
node scripts/evaluate-convergence.mjs \
|
|
72
|
+
--iterations iterations.json \ # [{i,diff_json_path,built}, ...]
|
|
73
|
+
--calibration calibration.json \ # STRUCTURED numeric gate (not string DSL)
|
|
74
|
+
--judge judge.json \ # {negative_control,framing,differences,verdict}
|
|
75
|
+
--masks masks.json \ # [{x,y,w,h,reason}] (reason required)
|
|
76
|
+
--component-area 320x140 \ # mask-fraction denominator
|
|
77
|
+
--component ProductCard \
|
|
78
|
+
--out .h5-to-swiftui/convergence/ProductCard.json
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Trust model & residual (honest disclosure — spec §1.1)
|
|
82
|
+
|
|
83
|
+
The skill's own thesis (§1.1) is *report the residual, do not pretend it is
|
|
84
|
+
zero*. Applied to the grader's **own** trust model:
|
|
85
|
+
|
|
86
|
+
**Mechanically bound (fail-closed, in `evaluate-convergence.mjs` code):**
|
|
87
|
+
|
|
88
|
+
- the structured `gate` is **recomputed from `calib.floor`** and a deviating
|
|
89
|
+
gate is rejected (`gate-floor-mismatch`, exit 1) — this binds the *gate to
|
|
90
|
+
the floor* (a hand-loosened gate is rejected unless the floor is loosened);
|
|
91
|
+
it does not bind the floor's *value*;
|
|
92
|
+
- the **identity of the bundled twin source files** (excluding build
|
|
93
|
+
output/dotfiles) is bound via `calibration_source` source-tree hashes
|
|
94
|
+
recomputed from the actual `assets/calibration/{h5-twin,swiftui-twin}`
|
|
95
|
+
(`calibration-twin-mismatch`, exit 1) — this binds the twin *source
|
|
96
|
+
identity*, NOT the measured `floor` *value* (the real public twin hashes
|
|
97
|
+
can be copied alongside a loose floor);
|
|
98
|
+
- the **`floor` value** is asserted to satisfy `calibrate-render.mjs`'s OWN
|
|
99
|
+
sanity envelope via the shared `scripts/_calib-consts.mjs`
|
|
100
|
+
(`ssim_nontext ≥ 0.95`, non-null `text_iou ≥ 0.9`, metric-valid
|
|
101
|
+
`deltaE_p95`); a floor calibrate-render could not have emitted (it writes
|
|
102
|
+
`blocked.json` below this) is rejected `floor-implausible`, exit 1 — this
|
|
103
|
+
kills the absurd-floor attack but does NOT re-measure the floor;
|
|
104
|
+
- the judge negative control is bound to the **shipped, hash-pinned**
|
|
105
|
+
`assets/calibration/swiftui-twin-divergent` source files (structured +
|
|
106
|
+
rejected + framed; `negative-control-unbound` VOIDs any `YES`, exit 3);
|
|
107
|
+
- pHash never short-circuits; a `null` text IoU is a FAIL; best-of-N is
|
|
108
|
+
monotone-or-fail; the mask budget is verified or the run refuses to pass;
|
|
109
|
+
`calibration.json.schema` is asserted.
|
|
110
|
+
|
|
111
|
+
**Named, irreducible residuals (NOT zero — BOTH stated, neither hidden):** a
|
|
112
|
+
fully zero-trust verdict is impossible because *something must run the
|
|
113
|
+
renders and something must measure the floor*.
|
|
114
|
+
|
|
115
|
+
1. **The grader cannot re-execute the simulator renders.** It trusts the
|
|
116
|
+
per-iteration `pixel-diff.mjs` JSONs were produced by running the **real
|
|
117
|
+
`pixel-diff.mjs` on real `sim-screenshot.sh` renders** (bounded by
|
|
118
|
+
`sim-screenshot.sh`'s no-fake build/env spine — no simulator / no build ⇒
|
|
119
|
+
`blocked`/`needs-human`, never converged).
|
|
120
|
+
2. **The grader cannot re-measure the calibration floor.** It asserts the
|
|
121
|
+
supplied `floor` satisfies `calibrate-render.mjs`'s own sanity envelope
|
|
122
|
+
and recomputes the gate from it, but a `floor` *within* that envelope yet
|
|
123
|
+
looser than the TRUE measured floor is trusted (it cannot re-render the
|
|
124
|
+
bundled twins to re-derive the real number). Mitigated by the
|
|
125
|
+
orchestrator's contractual obligation to run the real,
|
|
126
|
+
sanity/flat-image-spined `calibrate-render.mjs` and by the human-readable
|
|
127
|
+
`calibration_provenance` recorded in the convergence artifact.
|
|
128
|
+
|
|
129
|
+
These are the deliberate, documented boundaries: the deliverable is
|
|
130
|
+
**"maximally provenance-bound + honestly disclosed residual"**, explicitly
|
|
131
|
+
*not* "zero-trust". The **whole-assembled-screen trend check remains a
|
|
132
|
+
Stage-7 manual cross-check**, not an automated Stage-5 guard (unchanged;
|
|
133
|
+
still honestly scoped).
|
|
134
|
+
|
|
135
|
+
## Tiered verdict (honest)
|
|
136
|
+
|
|
137
|
+
`evaluate-convergence.mjs` reads the **structured** `calibration.json.gate`
|
|
138
|
+
(numeric `gate.converged` / `gate.close` objects — a legacy string-DSL gate
|
|
139
|
+
is rejected as un-enforceable) and evaluates it against each iteration's
|
|
140
|
+
`pixel-diff.mjs` output:
|
|
141
|
+
|
|
142
|
+
- **`converged`** — the *script's* chosen best iteration (built AND
|
|
143
|
+
gate-passing) passes the converged gate vs the *measured* floor AND the
|
|
144
|
+
independent judge `YES` with a valid negative control.
|
|
145
|
+
- **`close`** — structural metrics within the `close` band AND judge returns
|
|
146
|
+
`visually-equivalent-residual-subperceptual`. This is an **honest accept**,
|
|
147
|
+
explicitly distinct from faking — "as good as this renderer pair allows".
|
|
148
|
+
- **`needs-human`** — anything else; recorded with full evidence + the
|
|
149
|
+
machine-readable guard reason. **`blocked`** — a `blocked.json` is present
|
|
150
|
+
for the component (never converged).
|
|
151
|
+
|
|
152
|
+
## Independent judge (anti-collusion)
|
|
153
|
+
|
|
154
|
+
- Separate sub-agent, separate lane (OMC: never self-approve in same
|
|
155
|
+
context). Dispatch via `Agent` (use `verifier` or `qa-tester`), NOT the
|
|
156
|
+
corrector.
|
|
157
|
+
- **Negative control (BOUND to the shipped divergent twin)**: before its
|
|
158
|
+
verdict is trusted, feed the judge the known-divergent pair
|
|
159
|
+
`h5-twin` vs the bundled `assets/calibration/swiftui-twin-divergent`; it
|
|
160
|
+
MUST return a rejection. This is **not** a free `"passed"` string — the old
|
|
161
|
+
bare form is rejected as unbound (it asserts a judge run nothing verified).
|
|
162
|
+
`judge.negative_control` MUST be the structured artifact below, and
|
|
163
|
+
`scripts/evaluate-convergence.mjs` recomputes the divergent twin's
|
|
164
|
+
source-tree hash from the shipped asset and fails closed
|
|
165
|
+
(`negative-control-unbound`, any `YES` VOID, exit 3) unless every field
|
|
166
|
+
matches:
|
|
167
|
+
|
|
168
|
+
```json
|
|
169
|
+
"negative_control": {
|
|
170
|
+
"stimulus_source_hash": "<sha256 source-tree hash of the bundled assets/calibration/swiftui-twin-divergent>",
|
|
171
|
+
"divergent_pair": "h5-twin vs swiftui-twin-divergent",
|
|
172
|
+
"rejected": true,
|
|
173
|
+
"differences": [ { "desc": "green bg vs light", "severity": "major" }, … ]
|
|
174
|
+
}
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
`stimulus_source_hash` ≠ the recomputed bundled divergent hash, OR
|
|
178
|
+
`rejected !== true`, OR an empty/unstructured `differences`, OR
|
|
179
|
+
`framing !== "forced-difference-3"` ⇒ the negative control is **unbound**,
|
|
180
|
+
any `YES` is **VOID**, tier forced `needs-human`, exit non-zero
|
|
181
|
+
(`reason: negative-control-unbound`). This ties "the judge really rejected
|
|
182
|
+
the *known-divergent* pair" to the shipped, hash-pinned asset rather than a
|
|
183
|
+
free string.
|
|
184
|
+
- **Adversarial framing**: prompt = *"enumerate the 3 most significant
|
|
185
|
+
visual differences between A and B and rate each severity"*, never
|
|
186
|
+
"do these match?". A `converged` verdict requires all 3 to be
|
|
187
|
+
sub-perceptual / cross-renderer-irreducible. `judge.framing` MUST be
|
|
188
|
+
`"forced-difference-3"` (enforced; absence/mismatch VOIDs `YES`).
|
|
189
|
+
|
|
190
|
+
## Anti-gaming guards
|
|
191
|
+
|
|
192
|
+
**Enforced in code by `scripts/evaluate-convergence.mjs`** (violation ⇒ tier
|
|
193
|
+
downgrade + non-zero exit + a machine-readable reason — never a silent pass):
|
|
194
|
+
|
|
195
|
+
- **Mask budget**: `sum(mask area) / component area` must be ≤ 0.10; every
|
|
196
|
+
mask must carry a non-empty `reason`. Over budget or a reason-less mask ⇒
|
|
197
|
+
forced `needs-human`, exit 3. If masks are supplied but the component area
|
|
198
|
+
is unknown, the script **refuses to proceed (exit 1)** rather than silently
|
|
199
|
+
pass an unverifiable budget.
|
|
200
|
+
- **Gate eval in code**: the structured `calibration.json` thresholds are
|
|
201
|
+
evaluated per iteration to produce `gate_passed` (the caller cannot supply
|
|
202
|
+
it). A text-region `iou` of `null` is a **FAIL**. pHash never short-circuits.
|
|
203
|
+
- **Monotone-or-fail best-of-N**: `best_iteration` is the script's choice
|
|
204
|
+
among iterations that are **both** `built==true` **and** `gate_passed==true`;
|
|
205
|
+
if none qualify ⇒ `needs-human` (recorded). The caller's `best_iteration`
|
|
206
|
+
is ignored.
|
|
207
|
+
- **Negative-control voids judge (bound to the shipped divergent twin)**:
|
|
208
|
+
`judge.negative_control` MUST be the structured object
|
|
209
|
+
`{stimulus_source_hash, divergent_pair, rejected, differences}` whose
|
|
210
|
+
`stimulus_source_hash` equals the **source-tree hash of the shipped
|
|
211
|
+
`assets/calibration/swiftui-twin-divergent`** recomputed by the grader,
|
|
212
|
+
with `rejected === true`, a non-empty structured `differences`, and
|
|
213
|
+
`judge.framing === "forced-difference-3"`. The legacy bare
|
|
214
|
+
`negative_control:"passed"` string is **rejected as unbound**. Any
|
|
215
|
+
deviation ⇒ any `verdict:"YES"` is **VOID** ⇒ cannot be `converged`
|
|
216
|
+
(downgraded to `needs-human`, recorded `negative_control: failed`,
|
|
217
|
+
`reason: negative-control-unbound`, exit 3).
|
|
218
|
+
- **Calibration provenance (gate ⇐ floor; twin source identity; floor ⇐
|
|
219
|
+
sanity envelope)**: the grader asserts
|
|
220
|
+
`calibration.json.schema == "h5-to-swiftui/calibration@1"`; **recomputes**
|
|
221
|
+
the structured gate from `floor` and rejects a deviating `gate`
|
|
222
|
+
(`gate-floor-mismatch`, exit 1 — binds the gate TO the floor); recomputes
|
|
223
|
+
the bundled `h5-twin`/`swiftui-twin` **source-file** tree hashes from the
|
|
224
|
+
shipped assets and rejects a mismatching `calibration_source`
|
|
225
|
+
(`calibration-twin-mismatch`, exit 1 — binds the twin source IDENTITY, not
|
|
226
|
+
the floor value); and asserts the `floor` *value* satisfies
|
|
227
|
+
`calibrate-render.mjs`'s own sanity envelope via the shared
|
|
228
|
+
`scripts/_calib-consts.mjs` (`floor-implausible`, exit 1 — a floor
|
|
229
|
+
calibrate-render could not have emitted is rejected, killing the
|
|
230
|
+
absurd-floor attack). **Residual:** a `floor` *within* that envelope yet
|
|
231
|
+
looser than the true measured floor is trusted — the grader cannot
|
|
232
|
+
re-render the bundled twins to re-measure it (named residual (2) under
|
|
233
|
+
"Trust model & residual").
|
|
234
|
+
- **Build accounting**: a present `blocked.json` for the component, or no
|
|
235
|
+
built+gate-passing iteration ⇒ `blocked`/`needs-human`, **never**
|
|
236
|
+
`converged`.
|
|
237
|
+
|
|
238
|
+
**Enforced elsewhere / by process (NOT by `evaluate-convergence.mjs`):**
|
|
239
|
+
|
|
240
|
+
- **Idiomatic lint** (Stage 4): a component whose layout is predominantly
|
|
241
|
+
`.position()`/`.offset()` absolute pinning is rejected at rewrite time —
|
|
242
|
+
pixel-pushed WebView-in-SwiftUI, not a native rewrite.
|
|
243
|
+
- **Whole-assembled-screen trend check — Stage-7 manual cross-check, NOT an
|
|
244
|
+
automated Stage-5 guard.** No executable component diffs the assembled
|
|
245
|
+
screen during the loop, so it is **not** advertised as active automation.
|
|
246
|
+
After assembly (Stage 7) a human / separate verification pass compares the
|
|
247
|
+
assembled-screen capture vs the reference so per-component `converged`
|
|
248
|
+
cannot mask a broken composition. This is a documented **known
|
|
249
|
+
limitation**: until that Stage-7 check runs, per-component verdicts are
|
|
250
|
+
authoritative only at component granularity.
|
|
251
|
+
- **Determinism**: artifact header pins sim/browser/model/seed (passed
|
|
252
|
+
through from `calibration.json`); the dry-run runs twice and must yield the
|
|
253
|
+
**same verdict** (not same pixels).
|
|
254
|
+
|
|
255
|
+
## `pixel-diff.mjs` output schema (`h5-to-swiftui/diff@1`)
|
|
256
|
+
|
|
257
|
+
`phash_hamming` is raw data; `phash_fast_candidate` is necessary-not-
|
|
258
|
+
sufficient (NOT a converged signal — there is **no** `phash_converged`
|
|
259
|
+
field). `iou` is `null` unless `--gen-bbox-map` supplied the generated bbox
|
|
260
|
+
(a `null` iou is a gate FAIL, never an assumed 1.0).
|
|
261
|
+
`inter_component_spacing_delta_pt` is `null` (with an explanatory
|
|
262
|
+
`inter_component_spacing_delta_note`) when generated positions are unknown —
|
|
263
|
+
never fabricated zeros.
|
|
264
|
+
|
|
265
|
+
```json
|
|
266
|
+
{
|
|
267
|
+
"schema": "h5-to-swiftui/diff@1",
|
|
268
|
+
"component": "ProductCard",
|
|
269
|
+
"phash_hamming": 7,
|
|
270
|
+
"phash_fast_candidate": false,
|
|
271
|
+
"regions": {
|
|
272
|
+
"text": [{"mark": 2, "iou": 0.95, "fg_deltaE": 1.2, "bg_deltaE": 0.6}],
|
|
273
|
+
"nontext": [{"mark": 1, "ssim": 0.991, "deltaE_p95": 1.4}]
|
|
274
|
+
},
|
|
275
|
+
"inter_component_spacing_delta_pt": null,
|
|
276
|
+
"inter_component_spacing_delta_note": "null: generated component positions unknown (--gen-bbox-map absent); reporting null, NOT zeros",
|
|
277
|
+
"diff_mask_png": ".h5-to-swiftui/diff/ProductCard.iter2.mask.png",
|
|
278
|
+
"global_ssim": 0.984,
|
|
279
|
+
"diff_pixel_fraction": 0.0142
|
|
280
|
+
}
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
When no generated bbox is supplied, a text region is instead:
|
|
284
|
+
`{"mark": 2, "iou": null, "iou_note": "no generated bbox supplied (--gen-bbox-map absent or no match) — IoU is null, NOT assumed 1.0", "fg_deltaE": 1.2, "bg_deltaE": 0.6}`.
|
|
285
|
+
|
|
286
|
+
## `convergence/<component>.json` schema (`h5-to-swiftui/convergence@1`)
|
|
287
|
+
|
|
288
|
+
Emitted **only** by `scripts/evaluate-convergence.mjs` (pinned-version header
|
|
289
|
+
passed through from `calibration.json`):
|
|
290
|
+
|
|
291
|
+
```json
|
|
292
|
+
{
|
|
293
|
+
"schema": "h5-to-swiftui/convergence@1",
|
|
294
|
+
"component": "ProductCard",
|
|
295
|
+
"pinned": { "sim_runtime": "...", "browser": "...", "model_id": "...", "temperature": 0 },
|
|
296
|
+
"calibration_floor": { "ssim_nontext": 0.991, "deltaE_p95": 1.6, "text_iou": 0.94 },
|
|
297
|
+
"gate": {
|
|
298
|
+
"converged": {"ssim_nontext_min": 0.986, "deltaE_p95_max": 2.0, "text_iou_min": 0.91, "require_judge_yes": true},
|
|
299
|
+
"close": {"ssim_nontext_min": 0.981, "deltaE_p95_max": 2.4, "text_iou_min": 0.88, "require_judge_equiv": true}
|
|
300
|
+
},
|
|
301
|
+
"iterations": [
|
|
302
|
+
{"i": 1, "diff": {"global_ssim": 0.90, "phash_hamming": 20, "diff_pixel_fraction": 0.2},
|
|
303
|
+
"built": true, "gate_passed": false, "close_band_passed": false,
|
|
304
|
+
"phash_fast_candidate": false, "gate_detail": "nontext mark 1 ssim 0.9 < 0.986"},
|
|
305
|
+
{"i": 2, "diff": {"global_ssim": 0.993, "phash_hamming": 3, "diff_pixel_fraction": 0.02},
|
|
306
|
+
"built": true, "gate_passed": true, "close_band_passed": true,
|
|
307
|
+
"phash_fast_candidate": true, "gate_detail": "all sub-gates passed"},
|
|
308
|
+
{"i": 3, "diff": {"global_ssim": 0.999, "phash_hamming": 2, "diff_pixel_fraction": 0.001},
|
|
309
|
+
"built": false, "gate_passed": false, "close_band_passed": false,
|
|
310
|
+
"phash_fast_candidate": true, "gate_detail": "all sub-gates passed"}
|
|
311
|
+
],
|
|
312
|
+
"masks": [{"x":0,"y":0,"w":12,"h":12,"reason":"live timestamp"}],
|
|
313
|
+
"mask_fraction": 0.02,
|
|
314
|
+
"mask_budget": 0.10,
|
|
315
|
+
"component_area_px": 44800,
|
|
316
|
+
"component_area_source": "flag 320x140",
|
|
317
|
+
"judge": {
|
|
318
|
+
"negative_control": "passed",
|
|
319
|
+
"negative_control_binding": {
|
|
320
|
+
"expected_divergent_source_sha256": "…64-hex (recomputed from shipped swiftui-twin-divergent)…",
|
|
321
|
+
"claimed_stimulus_source_hash": "…64-hex (from judge.json)…",
|
|
322
|
+
"rejected": true,
|
|
323
|
+
"bound": true,
|
|
324
|
+
"reasons": []
|
|
325
|
+
},
|
|
326
|
+
"framing": "forced-difference-3",
|
|
327
|
+
"differences": [
|
|
328
|
+
{"desc": "1px baseline shift on price label", "severity": "sub-perceptual"},
|
|
329
|
+
{"desc": "shadow blur 0.5pt softer", "severity": "sub-perceptual"},
|
|
330
|
+
{"desc": "—", "severity": "none"}
|
|
331
|
+
],
|
|
332
|
+
"verdict": "YES",
|
|
333
|
+
"verdict_honored": true
|
|
334
|
+
},
|
|
335
|
+
"calibration_provenance": {
|
|
336
|
+
"schema_ok": true,
|
|
337
|
+
"gate_recomputed_from_floor": true,
|
|
338
|
+
"calibration_source": {
|
|
339
|
+
"h5_twin_source_sha256": "…64-hex…",
|
|
340
|
+
"swiftui_twin_source_sha256": "…64-hex…",
|
|
341
|
+
"verified_against_bundled": true
|
|
342
|
+
}
|
|
343
|
+
},
|
|
344
|
+
"guard_violations": [],
|
|
345
|
+
"best_iteration": 2,
|
|
346
|
+
"tier": "converged",
|
|
347
|
+
"tier_reason": "gate_passed(best i=2) AND judge YES with valid negative control",
|
|
348
|
+
"residual": {"ssim_nontext": 0.993, "deltaE_p95": 1.4, "text_iou": 0.96},
|
|
349
|
+
"evaluated_at": "ISO8601"
|
|
350
|
+
}
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
`best_iteration` is the script's choice (built AND gate-passing only); a
|
|
354
|
+
non-empty `guard_violations` array and/or a `tier` of `needs-human`/`blocked`
|
|
355
|
+
corresponds to a non-zero process exit (3 / 4) so the verdict cannot be
|
|
356
|
+
silently ignored.
|
|
357
|
+
|
|
358
|
+
## Stop conditions
|
|
359
|
+
|
|
360
|
+
- `converged` or `close` reached ⇒ accept best gate-passing iteration.
|
|
361
|
+
- Cap hit, no gate-passing+buildable iteration ⇒ `needs-human`.
|
|
362
|
+
- Mask budget exceeded / negative control failed / idiomatic-lint failed ⇒
|
|
363
|
+
`needs-human` regardless of metrics.
|
|
364
|
+
- No simulator or persistent build failure ⇒ `blocked` (Stage 5 skipped for
|
|
365
|
+
that component; counted as `needs-human` in the summary, never success).
|