npm - @rolepod/uiproof - Versions diffs - 0.4.1 → 0.6.0 - Mend

@rolepod/uiproof 0.4.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/.claude-plugin/marketplace.json +3 -3
package/.claude-plugin/plugin.json +2 -2
package/.codex-plugin/plugin.json +3 -3
package/.cursor-plugin/plugin.json +2 -2
package/CHANGELOG.md +168 -0
package/README.md +26 -7
package/dist/bin/rolepod-uiproof.js +1678 -59
package/dist/bin/rolepod-uiproof.js.map +1 -1
package/dist/index.d.ts +635 -10
package/dist/index.js +1716 -73
package/dist/index.js.map +1 -1
package/dist/schemas/tools.json +34 -1
package/package.json +1 -1
package/skills/audit-a11y/SKILL.md +9 -0
package/skills/check-errors/SKILL.md +123 -0
package/skills/scaffold-e2e/SKILL.md +23 -0
package/skills/verify-ui/SKILL.md +146 -70
package/skills/visual-diff/SKILL.md +9 -0

package/dist/schemas/tools.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "$schema": "https://json-schema.org/draft/2019-09/schema",
-  "rolepod_mcp_version": "0.4.1",
+  "rolepod_mcp_version": "0.6.0",
   "tools": {
     "rolepod_browser_open": {
       "$schema": "https://json-schema.org/draft/2019-09/schema#"
@@ -32,6 +32,39 @@
     "rolepod_browser_navigate": {
       "$schema": "https://json-schema.org/draft/2019-09/schema#"
     },
+    "rolepod_browser_hover": {
+      "$schema": "https://json-schema.org/draft/2019-09/schema#"
+    },
+    "rolepod_browser_drag": {
+      "$schema": "https://json-schema.org/draft/2019-09/schema#"
+    },
+    "rolepod_browser_fill_form": {
+      "$schema": "https://json-schema.org/draft/2019-09/schema#"
+    },
+    "rolepod_browser_upload_file": {
+      "$schema": "https://json-schema.org/draft/2019-09/schema#"
+    },
+    "rolepod_browser_handle_dialog": {
+      "$schema": "https://json-schema.org/draft/2019-09/schema#"
+    },
+    "rolepod_browser_console": {
+      "$schema": "https://json-schema.org/draft/2019-09/schema#"
+    },
+    "rolepod_browser_network": {
+      "$schema": "https://json-schema.org/draft/2019-09/schema#"
+    },
+    "rolepod_browser_set_env": {
+      "$schema": "https://json-schema.org/draft/2019-09/schema#"
+    },
+    "rolepod_browser_evaluate": {
+      "$schema": "https://json-schema.org/draft/2019-09/schema#"
+    },
+    "rolepod_browser_pages": {
+      "$schema": "https://json-schema.org/draft/2019-09/schema#"
+    },
+    "rolepod_browser_switch_page": {
+      "$schema": "https://json-schema.org/draft/2019-09/schema#"
+    },
     "rolepod_verify_ui_flow": {
       "$schema": "https://json-schema.org/draft/2019-09/schema#"
     },

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@rolepod/uiproof",
-  "version": "0.4.1",
+  "version": "0.6.0",
   "description": "Multi-platform UI/mobile automation for AI agents — MCP server + shipped skills.",
   "keywords": [
     "mcp",

package/skills/audit-a11y/SKILL.md CHANGED Viewed

@@ -43,6 +43,15 @@ MCP server. No fallback (D-024).
 3. Surface counts + critical/serious issues inline; reference the report
    path for the full list.
+## Evidence routing
+Run artifacts are saved under:
+- **Standalone:** `.rolepod-uiproof/artifacts/<prefix>_<ts>_<uuid>/`
+- **With `rolepod` parent** (when `ROLEPOD_PARENT=1` is set by the parent's SessionStart hook): `.rolepod/evidence/<ts>-rolepod-uiproof-<skill>/`
+Either way the run directory contains a `manifest.json` per Extension Protocol v1, so the parent's `check-work` skill can aggregate results into the verify phase report. Standalone users can read the manifest themselves — same shape.
 ## If the tool is unavailable
 Surface plainly:

package/skills/check-errors/SKILL.md ADDED Viewed

@@ -0,0 +1,123 @@
+---
+name: check-errors
+description: Drive a flow and fail if any console error or failed network request occurs. Thin wrapper over verify_ui_flow with strict error-only assertions. Use to gate merges on "no regressions during this flow".
+---
+# /check-errors
+Thin wrapper over **`rolepod_verify_ui_flow`** focused on the question:
+> Does this flow run cleanly — no console errors, no failed requests?
+Use after `/verify-ui` confirms the feature works, OR as a fast smoke
+check before merging.
+## When to use
+- After feature work, to gate "did I introduce a regression somewhere?"
+- During PR review, to confirm the happy path doesn't spew errors.
+- After dependency upgrades, to catch a quiet console break.
+- After CSP / CORS / API auth changes — common cause of silent 4xx/5xx.
+## When NOT to use
+- You want to assert specific UI text — use `/verify-ui` instead.
+- You only care about visual regression — use `/visual-diff`.
+- You want a11y compliance — use `/audit-a11y`.
+- Backend-only diff with no UI surface.
+## Inputs
+- `url` — entry point.
+- `steps` *(optional)* — drive the flow. Same shape as `/verify-ui` steps.
+- `exclude_console_patterns` *(optional)* — substrings; matching console
+  errors are ignored. Useful for third-party SDKs that always log
+  noise (e.g. `["facebook.com", "googletagmanager"]`).
+- `exclude_request_patterns` *(optional)* — same idea for URLs.
+- `allow_4xx` *(optional, default false)* — if true, only 5xx counts as
+  a failure. Useful when 4xx is part of the auth happy path.
+## Process
+Call `rolepod_verify_ui_flow` with:
+```json
+{
+  "mode": "assert",
+  "open": { "platform": "web", "url": "<url>" },
+  "steps": [ ...user-provided... ],
+  "expect": [
+    { "kind": "no_console_errors", "exclude_patterns": [...] },
+    { "kind": "no_failed_requests", "exclude_patterns": [...], "allow_4xx": false }
+  ],
+  "capture": ["screenshot", "console", "har"]
+}
+```
+Surface the result. On `passed: false`, point the user at `console.json`
+and `network.har` in `evidence_paths` so they can drill in.
+## Outputs
+Same shape as `/verify-ui`:
+- `passed: boolean`
+- `failure_reason` — e.g. `Expectations failed: expect[0] no_console_errors`
+- `evidence_paths.console` — JSON dump of console messages
+- `evidence_paths.har` — full HAR file
+## Examples
+### Smoke check — landing page
+User: "Open https://app.example.com and confirm no errors fire."
+```json
+{
+  "open": { "platform": "web", "url": "https://app.example.com" },
+  "steps": [],
+  "expect": [
+    { "kind": "no_console_errors" },
+    { "kind": "no_failed_requests" }
+  ],
+  "capture": ["screenshot", "console", "har"]
+}
+```
+### Drive a flow then assert clean
+User: "Sign in then dashboard — make sure no console errors."
+```json
+{
+  "open": { "platform": "web", "url": "https://app.example.com/login" },
+  "steps": [
+    { "kind": "fill_form", "fields": [
+      { "query": "Email", "value": "test@example.com" },
+      { "query": "Password", "value": "..." }
+    ]},
+    { "kind": "click", "query": "Sign in" },
+    { "kind": "wait_for", "condition": { "kind": "url_matches", "pattern": "dashboard" } }
+  ],
+  "expect": [
+    { "kind": "no_console_errors", "exclude_patterns": ["sentry.io"] },
+    { "kind": "no_failed_requests", "exclude_patterns": ["/analytics"] }
+  ],
+  "capture": ["screenshot", "console", "har"]
+}
+```
+## Evidence routing
+Run artifacts are saved under:
+- **Standalone:** `.rolepod-uiproof/artifacts/<prefix>_<ts>_<uuid>/`
+- **With `rolepod` parent** (when `ROLEPOD_PARENT=1` is set by the parent's SessionStart hook): `.rolepod/evidence/<ts>-rolepod-uiproof-<skill>/`
+Either way the run directory contains a `manifest.json` per Extension Protocol v1. Because `/check-errors` wraps `rolepod_verify_ui_flow`, the manifest is written by the underlying composite tool — same shape, same fields.
+## If the tool is unavailable
+> The `/check-errors` skill needs the **rolepod-uiproof** MCP server,
+> which is not currently available. Confirm the plugin is installed and
+> try again, or check that `npx -y rolepod-uiproof` is reachable.

package/skills/scaffold-e2e/SKILL.md CHANGED Viewed

@@ -22,6 +22,20 @@ MCP server. No fallback (D-024).
 - The scenario is too vague to scaffold — ask the user to clarify before
   calling.
+## Coverage
+The codegen handles every step kind and expect kind supported by
+`/verify-ui` (click, type, key, navigate, wait_for, hover, drag,
+fill_form, upload, dialog, set_env, switch_page, evaluate; text_visible,
+text_absent, url_matches, ref_in_state, no_console_errors,
+no_failed_requests, request_made, response_status).
+Playwright-test gets first-class translation for everything that has a
+direct Playwright API. Pytest+selenium covers the basics; expect kinds
+that need network introspection (no_failed_requests, request_made,
+response_status) emit a TODO referencing `selenium-wire` or BiDi, since
+upstream Selenium has no network-capture primitive.
 ## Inputs
 - `framework` — `playwright-test` | `vitest+playwright` | `pytest+selenium`.
@@ -48,6 +62,15 @@ MCP server. No fallback (D-024).
 3. Print the generated file path and the setup steps. Surface
    `dependencies` as an install command.
+## Evidence routing
+Run artifacts (the generated test file) are saved under:
+- **Standalone:** `.rolepod-uiproof/artifacts/<prefix>_<ts>_<uuid>/`
+- **With `rolepod` parent** (when `ROLEPOD_PARENT=1` is set by the parent's SessionStart hook): `.rolepod/evidence/<ts>-rolepod-uiproof-<skill>/`
+Either way the run directory contains a `manifest.json` per Extension Protocol v1 (with `phase: "build"` for this skill).
 ## If the tool is unavailable
 Surface plainly:

package/skills/verify-ui/SKILL.md CHANGED Viewed

@@ -1,83 +1,125 @@
 ---
 name: verify-ui
-description: Drive a real browser session through steps and assert expected outcomes; save evidence under ./.rolepod-uiproof/artifacts/. Use when a diff changes visible behavior and code-level tests do not prove it. v0.1 web only.
+description: Drive a real browser session through steps and assert expected outcomes — including console errors, network failures, and visual state. Save evidence under ./.rolepod-uiproof/artifacts/. Web only.
 ---
 # /verify-ui
 Single-backend skill. Calls **`rolepod_verify_ui_flow`** on the rolepod-uiproof
 MCP server and surfaces the structured result. No fallback (D-024) — if the
-tool is unavailable, this skill fails with a clear diagnostic so the caller
-(typically the user, or the parent `rolepod` plugin's `check-work` skill)
-can decide what to do next.
+tool is unavailable, this skill fails with a clear diagnostic.
 ## When to use
 - A diff changes user-visible behavior on a web target.
-- A URL is reachable (dev server is running, or the target is a deployed URL).
-- Code-level tests (unit, type-check, lint) do not prove the visible
-  outcome.
+- A URL is reachable (dev server is running, or the target is deployed).
+- You want to prove the UI works AND has no console errors / failed
+  requests / regressed visuals — code-level tests can't do that.
 ## When NOT to use
 - Backend-only diffs (no UI change).
 - Doc, config, or build-tool changes with no behavior surface.
-- No dev server / target available — ask the user to spin one up first
-  before invoking.
-- iOS / Android targets — mobile ships in v0.3 (`platform: 'ios' | 'android'`).
+- No dev server / target available — ask the user to spin one up first.
+- iOS / Android targets — mobile is partially supported (basic input);
+  console / network / set_env / evaluate are web-only.
 ## Modes
-- `mode: 'assert'` (default) — the assertions describe what the **feature
+- `mode: 'assert'` (default) — assertions describe what the **feature
   should do**; pass = feature works.
-- `mode: 'reproduce'` — the assertions describe what the **bug looks like**;
+- `mode: 'reproduce'` — assertions describe what the **bug looks like**;
   pass = bug reproduces. When `minimize: true` (default) the tool then
   removes steps one-by-one to find the shortest still-reproducing sequence
-  and writes a `replay-minimized.json` bundle next to `replay.json`.
+  and writes `replay-minimized.json` next to `replay.json`.
 ## Inputs
-- `target` — the URL to open (web only in v0.1).
-- `steps` — ordered UI actions. Each is one of:
-  - `{ kind: 'click', query: <accessible name substring> }`
-  - `{ kind: 'type', query: <accessible name substring>, text: <string>, clear_first?: boolean }`
-  - `{ kind: 'key', key: <e.g. 'Enter'> }`
-  - `{ kind: 'wait_for', condition: { kind, ... } }`
-  - `{ kind: 'navigate', url: <string> }`
-- `expect` — ordered assertions. Each is one of:
-  - `{ kind: 'text_visible', text: <string> }`
-  - `{ kind: 'text_absent', text: <string> }`
-  - `{ kind: 'url_matches', pattern: <regex string> }`
-  - `{ kind: 'ref_in_state', query: <accessible name substring>, state: 'visible' | 'enabled' | 'focused' }`
-- `capture` *(optional)* — defaults to `['screenshot']`. v0.1 only emits
-  screenshots and a replay bundle; HAR / console / video land in later
-  milestones.
-- `close_on_finish` *(optional)* — defaults to `true`.
+### `open` — context setup
+```json
+{ "platform": "web", "url": "https://...", "browser": "chromium" }
+```
+Optional: `viewport`, `headless`, `user_agent`, `locale`. UA / locale /
+timezone MUST be set here — they cannot change mid-session.
+### `steps` — UI actions in order
+Each step is one of:
+- `{ "kind": "click", "query": "Submit" }`
+- `{ "kind": "type", "query": "Email", "text": "x@y.com", "clear_first": true }`
+- `{ "kind": "key", "key": "Enter" }`
+- `{ "kind": "wait_for", "condition": { ... } }`
+- `{ "kind": "navigate", "url": "https://..." }`
+- `{ "kind": "hover", "query": "More" }`
+- `{ "kind": "drag", "from_query": "Card A", "to_query": "Column 2" }`
+- `{ "kind": "fill_form", "fields": [ { "query": "Name", "value": "Alice" }, { "query": "Subscribe", "value": true, "kind": "checkbox" } ] }`
+- `{ "kind": "upload", "query": "Avatar", "file_path": "/abs/path/to/file.png" }`
+- `{ "kind": "dialog", "action": "accept" }` — **place BEFORE the action that triggers the dialog**
+- `{ "kind": "set_env", "viewport": { "width": 375, "height": 812 } }` — also accepts offline, geolocation, color_scheme, reduced_motion, extra_headers, network_throttle, cpu_throttle
+- `{ "kind": "switch_page", "index": 1 }` — multi-page (popups, target=_blank)
+- `{ "kind": "evaluate", "script": "return document.title" }` — gated by `ROLEPOD_ALLOW_EVAL=1`
+### `expect` — assertions
+- `{ "kind": "text_visible", "text": "..." }`
+- `{ "kind": "text_absent", "text": "..." }`
+- `{ "kind": "url_matches", "pattern": "regex" }`
+- `{ "kind": "ref_in_state", "query": "Submit", "state": "enabled" }`
+- `{ "kind": "no_console_errors", "exclude_patterns": ["3rd-party.com"] }`
+- `{ "kind": "no_failed_requests", "exclude_patterns": ["/analytics"], "allow_4xx": false }`
+- `{ "kind": "request_made", "url_pattern": "/api/checkout", "method": "POST", "min_count": 1 }`
+- `{ "kind": "response_status", "url_pattern": "/api/me", "status": 200 }`
+### `capture` — evidence
+Default: `["screenshot"]`. Available:
+- `screenshot` — `final.png`
+- `console` — `console.json` (filtered errors+warnings, ring buffer up to 1000)
+- `har` — `network.har` (full HAR)
+- `video` — `videos/*.webm`
+- `trace` — `trace.zip` (Playwright trace; view with `npx playwright show-trace`)
+- `a11y_tree` — `a11y_tree.json` (final snapshot)
+### Defaults
+- `close_on_finish: true`
+- `minimize: true` (only consulted when `mode: 'reproduce'`)
 ## Outputs
-- `run_id` — folder name under `./.rolepod-uiproof/artifacts/`.
-- `passed` — boolean.
-- `failed_at_step` *(when not passed)* — 0-based step index.
-- `failure_reason` *(when not passed)* — human-readable explanation.
-- `evidence_paths` — `{ screenshots: string[], replay_bundle?: string }`.
-- `final_url_or_screen` — page URL at the end of the run.
+- `run_id`, `passed`, `failed_at_step`, `failure_reason`,
+  `final_url_or_screen`
+- `evidence_paths: { screenshots, replay_bundle, console?, a11y_tree?, har?, trace?, video? }`
+- `minimized` (only on `mode: 'reproduce'` + `passed: true` + `minimize: true`)
 ## Process
-1. Construct a `rolepod_verify_ui_flow` input from the user's intent:
-   - `mode: 'assert'`
-   - `open: { platform: 'web', url: <target> }`
-   - `steps`, `expect`, `capture`, `close_on_finish` per inputs above.
+1. Build the `rolepod_verify_ui_flow` input.
 2. Call the tool.
-3. Report the structured result. If `passed: false`, include
-   `failed_at_step`, `failure_reason`, and the screenshot path so the user
-   can inspect the failure.
+3. Report the structured result. On failure include `failed_at_step` +
+   `failure_reason` + relevant evidence paths (screenshot, console.json
+   if console errors caused the failure).
-## If the tool is unavailable
+## Default suggestion
+For ANY user-visible flow, default-include `no_console_errors` and
+`no_failed_requests` in `expect`. Real UI bugs surface as console errors
+or 5xx responses far more often than as wrong text.
+## Evidence routing
+Run artifacts are saved under:
-The rolepod-uiproof MCP server is not registered or is not responding. Surface
-this plainly:
+- **Standalone:** `.rolepod-uiproof/artifacts/<prefix>_<ts>_<uuid>/`
+- **With `rolepod` parent** (when `ROLEPOD_PARENT=1` is set by the parent's SessionStart hook): `.rolepod/evidence/<ts>-rolepod-uiproof-<skill>/`
+Either way the run directory contains a `manifest.json` per Extension Protocol v1, so the parent's `check-work` skill can aggregate results into the verify phase report. Standalone users can read the manifest themselves — same shape.
+## If the tool is unavailable
 > The `/verify-ui` skill needs the **rolepod-uiproof** MCP server, which is
 > not currently available. Confirm the plugin is installed and try again,
@@ -85,50 +127,84 @@ this plainly:
 Do **not** attempt this work via Playwright MCP, Chrome DevTools MCP, or
 any other backend from inside this skill. Multi-backend routing is the
-job of the parent `rolepod` plugin's `check-work` / `debug-issue` skills
-(D-024).
+job of the parent `rolepod` plugin's `check-work` / `debug-issue` skills.
 ## Examples
-### Success — verify a search result on example.com
+### Success — verify checkout flow with no errors
-User: "Verify that opening https://example.com shows the heading 'Example
-Domain' and links to iana.org."
-Skill invokes `rolepod_verify_ui_flow` with:
+User: "Verify https://shop.example.com/checkout works — fill the form,
+submit, expect a success page and no errors."
 ```json
 {
   "mode": "assert",
-  "open": { "platform": "web", "url": "https://example.com" },
-  "steps": [],
+  "open": { "platform": "web", "url": "https://shop.example.com/checkout" },
+  "steps": [
+    { "kind": "fill_form", "fields": [
+      { "query": "Name", "value": "Alice" },
+      { "query": "Email", "value": "alice@example.com" },
+      { "query": "Card", "value": "4242 4242 4242 4242" }
+    ]},
+    { "kind": "click", "query": "Pay" },
+    { "kind": "wait_for", "condition": { "kind": "text_visible", "text": "Thank you" } }
+  ],
   "expect": [
-    { "kind": "text_visible", "text": "Example Domain" },
-    { "kind": "text_visible", "text": "More information" }
-  ]
+    { "kind": "text_visible", "text": "Thank you" },
+    { "kind": "no_console_errors" },
+    { "kind": "no_failed_requests", "exclude_patterns": ["/analytics"] },
+    { "kind": "response_status", "url_pattern": "/api/checkout", "status": 200 }
+  ],
+  "capture": ["screenshot", "console", "har"]
 }
 ```
-Returns:
+### Failure with evidence
+When `no_console_errors` fails, the result surfaces:
 ```json
 {
-  "run_id": "verify_20260524T101512_a1b2c3d4",
-  "passed": true,
+  "passed": false,
+  "failure_reason": "Expectations failed: expect[1] no_console_errors",
   "evidence_paths": {
-    "screenshots": [".rolepod-uiproof/artifacts/verify_…/final.png"],
-    "replay_bundle": ".rolepod-uiproof/artifacts/verify_…/replay.json"
-  },
-  "final_url_or_screen": "https://example.com/"
+    "screenshots": ["…/final.png"],
+    "console": "…/console.json"
+  }
 }
 ```
-### Failure — MCP server not available
+Open `console.json` to inspect the errors.
-The MCP server is not registered. The skill returns:
+### Dialog handling
-> The `/verify-ui` skill needs the **rolepod-uiproof** MCP server, which is
-> not currently available. Confirm the plugin is installed and try again.
+User: "When the user clicks Delete, a confirm dialog appears. Verify
+that accepting it deletes the row."
+```json
+{
+  "steps": [
+    { "kind": "dialog", "action": "accept" },
+    { "kind": "click", "query": "Delete" },
+    { "kind": "wait_for", "condition": { "kind": "text_absent", "text": "Row A" } }
+  ],
+  "expect": [ { "kind": "text_absent", "text": "Row A" } ]
+}
+```
+The `dialog` step arms a one-shot handler; the *next* trigger (the click)
+fires it. Un-armed dialogs are auto-dismissed.
+### Responsive + dark mode
+User: "Verify mobile dark-mode layout."
-No other backend is attempted. The caller decides whether to escalate to
-the parent rolepod plugin's `check-work` skill.
+```json
+{
+  "steps": [
+    { "kind": "set_env", "viewport": { "width": 375, "height": 812 }, "color_scheme": "dark" }
+  ],
+  "expect": [ { "kind": "text_visible", "text": "Menu" } ],
+  "capture": ["screenshot"]
+}
+```

package/skills/visual-diff/SKILL.md CHANGED Viewed

@@ -44,6 +44,15 @@ MCP server. No fallback (D-024).
 3. Report `diff_pct`, `passed`, and the three image paths. If the baseline
    was just seeded, say so explicitly.
+## Evidence routing
+Run artifacts are saved under:
+- **Standalone:** `.rolepod-uiproof/artifacts/<prefix>_<ts>_<uuid>/`
+- **With `rolepod` parent** (when `ROLEPOD_PARENT=1` is set by the parent's SessionStart hook): `.rolepod/evidence/<ts>-rolepod-uiproof-<skill>/`
+Baselines under `.rolepod-uiproof/baselines/` are always the same location regardless of mode — they are user-curated config, not per-run evidence. Either way the run directory contains a `manifest.json` per Extension Protocol v1.
 ## If the tool is unavailable
 Surface plainly: