npm - @rolepod/uiproof - Versions diffs - 0.4.1 → 0.6.0 - Mend

@rolepod/uiproof 0.4.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/.claude-plugin/marketplace.json +3 -3
package/.claude-plugin/plugin.json +2 -2
package/.codex-plugin/plugin.json +3 -3
package/.cursor-plugin/plugin.json +2 -2
package/CHANGELOG.md +168 -0
package/README.md +26 -7
package/dist/bin/rolepod-uiproof.js +1678 -59
package/dist/bin/rolepod-uiproof.js.map +1 -1
package/dist/index.d.ts +635 -10
package/dist/index.js +1716 -73
package/dist/index.js.map +1 -1
package/dist/schemas/tools.json +34 -1
package/package.json +1 -1
package/skills/audit-a11y/SKILL.md +9 -0
package/skills/check-errors/SKILL.md +123 -0
package/skills/scaffold-e2e/SKILL.md +23 -0
package/skills/verify-ui/SKILL.md +146 -70
package/skills/visual-diff/SKILL.md +9 -0

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "$schema": "https://anthropic.com/claude-code/marketplace.schema.json",
   "name": "rolepod-uiproof",
-  "description": "Multi-platform UI / mobile automation MCP server + 4 shipped skills (verify-ui, audit-a11y, visual-diff, scaffold-e2e) for AI coding agents.",
+  "description": "Multi-platform UI / mobile automation MCP server + 5 shipped skills (verify-ui, audit-a11y, visual-diff, scaffold-e2e, check-errors) for AI coding agents.",
   "owner": {
     "name": "nuttaruj",
     "url": "https://github.com/nuttaruj"
@@ -10,8 +10,8 @@
     {
       "name": "rolepod-uiproof",
       "source": "./",
-      "description": "15 MCP tools (10 atomic browser/mobile primitives + 5 composite workflows) + 4 user-invocable skills. Web production-ready via Playwright; mobile (iOS/Android) via Appium scaffolded — see `rolepod-uiproof doctor` for readiness.",
-      "version": "0.4.0",
+      "description": "26 MCP tools (21 atomic browser/mobile primitives + 5 composite workflows) + 5 user-invocable skills. v0.6 adds Extension Protocol v1 support — works standalone today, becomes the verify-phase UI provider when installed alongside the `rolepod` parent plugin (evidence routes to `.rolepod/evidence/` with `manifest.json`). Replaces chrome-devtools-mcp and playwright-mcp for UI testing. Web production-ready via Playwright; mobile (iOS/Android) via Appium scaffolded — see `rolepod-uiproof doctor` for readiness.",
+      "version": "0.6.0",
       "author": {
         "name": "nuttaruj"
       },

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "rolepod-uiproof",
-  "version": "0.4.1",
-  "description": "Multi-platform UI/mobile automation for AI agents — 4 shipped skills (verify-ui, audit-a11y, visual-diff, scaffold-e2e) + MCP server with 15 tools. v0.3 adds AppiumEngine scaffolding for iOS/Android, scope={ref} audit, replay CLI, ddmin minimization, doctor + install:mobile.",
+  "version": "0.6.0",
+  "description": "Multi-platform UI/mobile automation for AI agents — 5 shipped skills (verify-ui, audit-a11y, visual-diff, scaffold-e2e, check-errors) + MCP server with 26 tools. Works standalone OR with the `rolepod` parent plugin: when ROLEPOD_PARENT=1 is set, evidence routes to `.rolepod/evidence/` with a `manifest.json` per Extension Protocol v1, so parent's `check-work` skill can aggregate UI verify results into its phase report. v0.5 completed the UI verification surface (console + network observability, hover/drag/fill_form/upload/dialog, runtime emulation, multi-page, gated JS eval).",
   "author": {
     "name": "nuttaruj",
     "url": "https://github.com/nuttaruj"

package/.codex-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "rolepod-uiproof",
-  "version": "0.4.0",
-  "description": "Multi-platform UI/mobile automation for AI agents — 4 shipped skills (verify-ui, audit-a11y, visual-diff, scaffold-e2e) + MCP server with 15 tools. v0.3 adds AppiumEngine scaffolding for iOS/Android; web is production-ready via Playwright.",
+  "version": "0.6.0",
+  "description": "Multi-platform UI/mobile automation for AI agents — 5 shipped skills (verify-ui, audit-a11y, visual-diff, scaffold-e2e, check-errors) + MCP server with 26 tools. v0.6 adds Extension Protocol v1 — works standalone today, becomes the verify-phase UI provider when paired with the `rolepod` parent plugin.",
   "author": {
     "name": "nuttaruj",
     "url": "https://github.com/nuttaruj"
@@ -25,7 +25,7 @@
   "interface": {
     "displayName": "Rolepod UIProof",
     "shortDescription": "UI verification, a11y audits, visual diff, e2e scaffolding — for AI coding agents.",
-    "longDescription": "rolepod-uiproof ships an MCP server with 15 tools (10 atomic + 5 composite) and 4 user-invocable skills (/verify-ui, /audit-a11y, /visual-diff, /scaffold-e2e). Web is fully supported via Playwright; mobile (iOS/Android via Appium) lands in v0.3.",
+    "longDescription": "rolepod-uiproof ships an MCP server with 26 tools (21 atomic + 5 composite) and 5 user-invocable skills (/verify-ui, /audit-a11y, /visual-diff, /scaffold-e2e, /check-errors). Web is fully supported via Playwright; mobile (iOS/Android via Appium) supports basic input. v0.6: pair with the `rolepod` parent plugin (v2.7+) and uiproof becomes the verify-phase UI provider — evidence routes to `.rolepod/evidence/` with a `manifest.json` per Extension Protocol v1.",
     "developerName": "nuttaruj",
     "category": "Productivity",
     "capabilities": ["Read", "Write", "Bash"],

package/.cursor-plugin/plugin.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "name": "rolepod-uiproof",
   "displayName": "Rolepod UIProof",
-  "version": "0.4.0",
-  "description": "Multi-platform UI / mobile automation MCP server + 4 shipped skills (verify-ui, audit-a11y, visual-diff, scaffold-e2e) for AI coding agents. Web production-ready via Playwright; mobile (iOS/Android via Appium) scaffolded.",
+  "version": "0.6.0",
+  "description": "Multi-platform UI / mobile automation MCP server + 5 shipped skills (verify-ui, audit-a11y, visual-diff, scaffold-e2e, check-errors) for AI coding agents. v0.6 adds Extension Protocol v1 — works standalone today, becomes the verify-phase UI provider when paired with the `rolepod` parent plugin (evidence routes to `.rolepod/evidence/` with `manifest.json`). Replaces chrome-devtools-mcp and playwright-mcp.",
   "author": {
     "name": "nuttaruj"
   },

package/CHANGELOG.md CHANGED Viewed

@@ -7,6 +7,174 @@ release.
 ## [Unreleased]
+## [0.6.0] — 2026-05-27
+**Extension Protocol v1 — `uiproof` becomes parent-aware. Standalone
+behavior unchanged.**
+When the parent `rolepod` plugin (v2.7+) sets `ROLEPOD_PARENT=1` via
+its SessionStart hook, uiproof routes evidence to the shared
+`.rolepod/evidence/` tree and emits a `manifest.json` per spec so the
+parent's `check-work` skill can aggregate UI verify results into its
+phase report. With no parent installed the v0.5 behavior is preserved
+exactly — same artifact path, same tool output, plus a `manifest.json`
+in each run dir as a bonus.
+### Added
+- **Env-aware evidence path** in `ArtifactStore`. Detected at
+  construction from `process.env.ROLEPOD_PARENT === "1"`.
+  - standalone: `.rolepod-uiproof/artifacts/{prefix}_{ts}_{uuid}/`
+  - with-parent: `.rolepod/evidence/{ts}-rolepod-uiproof-{skill}/`
+- **`manifest.json`** written by every composite that starts a run
+  (`verify_ui_flow`, `audit_a11y`, `visual_diff`, `scaffold_e2e`).
+  Schema follows Extension Protocol v1: `protocol`, `plugin`, `skill`,
+  `phase`, `status`, `summary`, `started_at`, `finished_at`,
+  `artifacts: [{type, path}]`, `metadata`. Best-effort: any IO failure
+  is logged but never thrown.
+- **Graduated a11y status**. `audit_a11y` manifest carries `status`:
+  `critical/serious > 0 → fail`, `moderate/minor > 0 → warn`, no
+  issues → `pass`. Keeps the `warn` signal a strict pass/fail would
+  discard.
+- **Protocol version check**. When `ROLEPOD_PROTOCOL` is set but
+  does not equal `v1`, `buildServer()` logs a one-shot warning. Does
+  not block; manifest is still written in v1 shape.
+- **`/check-errors` evidence routing doc** alongside the other 4
+  skills.
+### Changed
+- `ArtifactStore.startRun(prefix, opts?)` — `opts.skill` is new and
+  optional. Provides the canonical skill name for both the
+  with-parent dirname and the manifest's `skill` field. Return shape
+  extended with `skill` and `mode` (back-compat: existing destructuring
+  of `{ runId, runDir }` keeps working).
+- `buildServer()` log line surfaces `protocol: "v1"` and
+  `mode: "standalone" | "with-parent"` alongside the existing version
+  + tools list.
+- All 5 shipped skills' SKILL.md gained an "Evidence routing" section
+  between "Process" / "Outputs" and "If the tool is unavailable".
+  Mirrored to `plugins/rolepod-uiproof/skills/`.
+- README "Standalone vs Combined" section added explaining the two
+  modes.
+### Behavior
+- **Standalone:** unchanged. Evidence still written to
+  `.rolepod-uiproof/artifacts/`. New: a `manifest.json` appears in each
+  run dir. Tool return values gain an optional `manifest: "<path>"`
+  field; everything else is byte-for-byte identical.
+- **With rolepod parent:** evidence written to
+  `.rolepod/evidence/<ts>-rolepod-uiproof-<skill>/` with `manifest.json`
+  per protocol spec. Visual baselines stay in
+  `.rolepod-uiproof/baselines/` regardless of mode.
+### Non-goals (kept out of v0.6)
+- Dynamic capabilities registry (`.claude-plugin/capabilities.json`)
+- Protocol version negotiation beyond a single warn
+- Cross-child coordination (uiproof ↔ wplab handoff inside one run)
+- Mobile platform support stays at the v0.5 partial level
+## [0.5.0] — 2026-05-27
+**Complete UI verification surface — one MCP replaces chrome-devtools-mcp
+and playwright-mcp for UI testing use cases.**
+Tool count: 15 → 26 (atomic 10 → 21, composite 5 unchanged). The five
+"out of scope for `uiproof`" families (Lighthouse, performance traces,
+heap snapshots, extensions, third-party page tools) are intentionally
+**not** added — those belong to future `rolepod-perfproof` and
+`rolepod-secproof` MCPs.
+### Added — 11 new atomic tools
+Cross-platform (work on chromium/firefox/webkit; mobile stubs throw
+`engine_error` until gestures land):
+- `rolepod_browser_hover` — `locator.hover()`; refs stay valid
+- `rolepod_browser_drag` — `locator.dragTo()`
+- `rolepod_browser_fill_form` — batch input/select/checkbox/radio
+- `rolepod_browser_upload_file` — `locator.setInputFiles()`, abs path required
+Web-only (cast to `PlaywrightEngine`):
+- `rolepod_browser_handle_dialog` — pre-arm one-shot accept/dismiss
+- `rolepod_browser_console` — list/filter/clear ring-buffered console
+  messages (1000-entry cap, errors+warnings default)
+- `rolepod_browser_network` — list/filter network requests, optional HAR export
+- `rolepod_browser_set_env` — runtime viewport / offline / geolocation /
+  color_scheme / reduced_motion / extra_headers / network_throttle (CDP) /
+  cpu_throttle (CDP)
+- `rolepod_browser_evaluate` — arbitrary JS in page context.
+  **Disabled by default** — opt in via `ROLEPOD_ALLOW_EVAL=1` env var
+- `rolepod_browser_pages` — list pages in active context (popups,
+  target=_blank, OAuth windows)
+- `rolepod_browser_switch_page` — set active page index
+### Added — verify_ui_flow capture lifecycle (impl)
+The `capture` array has accepted these values since v0.1, but only
+`screenshot` was wired. v0.5 fills in the rest:
+- `console` → `{runDir}/console.json`
+- `har` → `{runDir}/network.har`
+- `video` → `{runDir}/videos/*.webm`
+- `trace` → `{runDir}/trace.zip` (view with `npx playwright show-trace`)
+- `a11y_tree` → `{runDir}/a11y_tree.json`
+### Added — 8 new verify_ui_flow step kinds
+`hover`, `drag`, `fill_form`, `upload`, `dialog`, `set_env`,
+`switch_page`, `evaluate`. All get first-class codegen in
+`scaffold_e2e` for playwright-test and pytest+selenium.
+### Added — 4 new verify_ui_flow expect kinds
+- `no_console_errors` — filter level=error, drop excludes, count must be 0
+- `no_failed_requests` — filter `failure || status>=400` (or `>=500`
+  when `allow_4xx`), drop excludes, count must be 0
+- `request_made` — URL regex + optional method must match `min_count`
+  (default 1) times
+- `response_status` — URL regex + exact status code must match
+### Added — multi-page support
+A session is now a `context` (was a single page). Popups and
+`target="_blank"` links are auto-tracked. Use `browser_pages` to list,
+`browser_switch_page` to activate. Default active = page 0.
+### Added — new skill `/check-errors`
+Thin wrapper over `rolepod_verify_ui_flow` with strict assertions baked
+in. Use case: PR-gate or post-merge smoke.
+### Changed — `/verify-ui` and `/scaffold-e2e` skills
+Documented every new step / expect / capture kind. Default suggestion
+in `/verify-ui`: include `no_console_errors` and `no_failed_requests`
+in `expect` for any user-visible flow.
+### Changed — Engine interface
+Adds four cross-platform input methods: `hover`, `drag`, `fillForm`,
+`uploadFile`. `OpenOptions.capture` accepts `{ har, video, trace }`.
+`WebSession.page` renamed to `mainPage`; internal call sites go through
+`activePage(s)`.
+### Non-changes (intentional)
+- `screencast_*` not added — Playwright `trace.zip` is strictly better.
+- `click_at` not added — use refs from `snapshot`.
+- Lighthouse not added — axe-core covers a11y.
+- Performance traces / heap snapshots not added — `rolepod-perfproof` scope.
+- Extension management not added — out of scope.
+### Migration from 0.4
+Pure additions; no behavioral changes on existing tools or
+step/expect/capture kinds. Existing replay bundles play back unchanged.
 ## [0.4.1] — 2026-05-27
 ### Fixed

package/README.md CHANGED Viewed

@@ -1,28 +1,47 @@
 # rolepod-uiproof
-**rolepod-uiproof gives Claude Code, Cursor, Codex CLI, and Gemini CLI a real browser/mobile driver — so the AI can actually click through your UI, audit accessibility, diff screenshots, and scaffold e2e tests instead of guessing.**
+**rolepod-uiproof gives Claude Code, Cursor, Codex CLI, and Gemini CLI a real browser/mobile driver — so the AI can actually click through your UI, audit accessibility, check console errors, inspect network requests, diff screenshots, and scaffold e2e tests instead of guessing.**
-One MCP server, one tool surface, four skills you invoke from chat. Web is production-ready via Playwright; iOS and Android use Appium (same client as alumnium — needs a local Appium daemon + simulator/emulator, or a real device). No internal LLM — your Lead agent drives every action.
+One MCP server, one tool surface, five skills you invoke from chat. Web is production-ready via Playwright; iOS and Android use Appium (same client as alumnium — needs a local Appium daemon + simulator/emulator, or a real device). No internal LLM — your Lead agent drives every action.
+**v0.5 completes the UI verification surface — replacing `chrome-devtools-mcp` and `playwright-mcp` for UI testing.** 26 tools total (21 atomic + 5 composite). New in v0.5: console + network observability, hover / drag / fill_form / upload / dialog, runtime emulation (resize / offline / geolocation / color_scheme / network + CPU throttle), multi-page support, gated JS eval, and impl of HAR / video / trace capture in `/verify-ui`.
 ## What it helps with
-- **Verify a UI change in seconds.** `/verify-ui` opens a real browser, runs your steps, checks your assertions, saves a screenshot + replay bundle.
+- **Verify a UI change in seconds.** `/verify-ui` opens a real browser, runs your steps, checks your assertions, saves a screenshot + replay bundle (optionally HAR + video + trace + console logs).
+- **Gate merges on "no regressions during this flow".** `/check-errors` runs a flow with strict `no_console_errors` + `no_failed_requests` assertions baked in. PR-gate or post-merge smoke check.
 - **Catch a11y regressions before merge.** `/audit-a11y` runs axe-core against WCAG-A / AA / AAA and returns issues grouped by severity, with WCAG references and fix links.
 - **Lock down the visual contract.** `/visual-diff` captures a screenshot and compares against a named baseline under `./.rolepod-uiproof/baselines/`. First call seeds; subsequent calls diff.
-- **Turn an interactive verify run into a real test file.** `/scaffold-e2e` transcribes a replay bundle into Playwright Test, Vitest+Playwright, or pytest+selenium.
+- **Turn an interactive verify run into a real test file.** `/scaffold-e2e` transcribes a replay bundle into Playwright Test, Vitest+Playwright, or pytest+selenium — with first-class codegen for every step + expect kind.
 - **Reproduce + minimize a bug deterministically.** `/verify-ui` with `mode: "reproduce"` runs ddmin step-elimination to find the shortest still-reproducing sequence.
-## The four skills
+## The five skills
 | Skill | Wraps | What it does |
 |---|---|---|
-| `/verify-ui` | `rolepod_verify_ui_flow` | Drive a session through steps, evaluate assertions, save evidence + replay bundle. `mode: assert` (default) or `reproduce` with optional ddmin minimization. |
+| `/verify-ui` | `rolepod_verify_ui_flow` | Drive a session through steps, evaluate assertions (incl. console errors / failed requests / specific request made / response status), save evidence (screenshot / console / HAR / video / trace / a11y_tree) + replay bundle. `mode: assert` or `reproduce` with optional ddmin minimization. |
+| `/check-errors` | `rolepod_verify_ui_flow` | Thin wrapper with strict `no_console_errors` + `no_failed_requests` baked in. Use as PR-gate or post-merge smoke. |
 | `/audit-a11y` | `rolepod_audit_a11y` | axe-core audit at WCAG-A / AA / AAA. `scope: "page"` or `scope: { ref }`. Markdown or JSON report. |
 | `/visual-diff` | `rolepod_visual_diff` | Pixel diff against a named baseline. Auto-seeds on first call. Configurable threshold + pixelmatch sensitivity. |
-| `/scaffold-e2e` | `rolepod_scaffold_e2e` | Generate a runnable test file from a scenario + optional replay bundle. Three target frameworks. |
+| `/scaffold-e2e` | `rolepod_scaffold_e2e` | Generate a runnable test file from a scenario + optional replay bundle. Three target frameworks. v0.5 codegen handles every step + expect kind. |
 Every skill is **single-backend** (D-024) — it calls the rolepod-uiproof server and only the rolepod-uiproof server. If the server is unavailable, the skill fails with a clear diagnostic. Multi-backend routing belongs in the parent [`rolepod`](https://github.com/nuttaruj/rolepod) plugin's phase skills, not here.
+## Standalone vs Combined
+`rolepod-uiproof` works either as a **standalone** browser MCP for any project, or **combined** with the [`rolepod`](https://github.com/nuttaruj/rolepod) parent plugin (v2.7+) where it becomes the Verify phase provider for UI artifacts.
+**Standalone** (default): use the 5 skills directly as atomic browser tools. Evidence saved under `./.rolepod-uiproof/artifacts/<run>/` with a `manifest.json` per Extension Protocol v1.
+**Combined with rolepod parent**: when the parent's SessionStart hook sets `ROLEPOD_PARENT=1`, uiproof writes evidence to `./.rolepod/evidence/<ts>-rolepod-uiproof-<skill>/` instead, where parent's `check-work` skill auto-aggregates manifests into the verify report. No skill changes — same 26 tools, same 5 skills, smarter routing.
+| Install | Unlocks |
+|---|---|
+| uiproof alone | Browser test, a11y audit, visual diff, e2e scaffold, error gate |
+| uiproof + rolepod parent | + verify-phase aggregation, evidence handoff to `check-work` |
+The `manifest.json` is written in BOTH modes, so installing the parent later still lets historic artifacts get picked up. Baselines for `/visual-diff` always live in `./.rolepod-uiproof/baselines/` regardless of mode — they are user-curated configuration, not per-run evidence.
 ## Install
 Pick your CLI. All install paths share the same MCP server (`@rolepod/uiproof` on npm) and the same skill set.