npm - @bastani/atomic - Versions diffs - 0.8.31-alpha.1 → 0.8.31-alpha.2 - Mend

@bastani/atomic 0.8.31-alpha.1 → 0.8.31-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (145) hide show

package/CHANGELOG.md +14 -3
package/README.md +12 -10
package/dist/builtin/cursor/CHANGELOG.md +1 -1
package/dist/builtin/cursor/package.json +2 -2
package/dist/builtin/intercom/CHANGELOG.md +1 -1
package/dist/builtin/intercom/package.json +2 -2
package/dist/builtin/mcp/CHANGELOG.md +1 -1
package/dist/builtin/mcp/package.json +3 -3
package/dist/builtin/subagents/CHANGELOG.md +10 -1
package/dist/builtin/subagents/agents/codebase-online-researcher.md +8 -8
package/dist/builtin/subagents/agents/debugger.md +6 -6
package/dist/builtin/subagents/package.json +4 -4
package/dist/builtin/subagents/skills/effective-liteparse/SKILL.md +118 -0
package/dist/builtin/subagents/skills/effective-liteparse/scripts/search.py +128 -0
package/dist/builtin/subagents/skills/playwright-cli/SKILL.md +404 -0
package/dist/builtin/subagents/skills/playwright-cli/references/element-attributes.md +23 -0
package/dist/builtin/subagents/skills/playwright-cli/references/playwright-tests.md +39 -0
package/dist/builtin/subagents/skills/playwright-cli/references/request-mocking.md +87 -0
package/dist/builtin/subagents/skills/playwright-cli/references/running-code.md +241 -0
package/dist/builtin/subagents/skills/playwright-cli/references/session-management.md +225 -0
package/dist/builtin/subagents/skills/playwright-cli/references/spec-driven-testing.md +305 -0
package/dist/builtin/subagents/skills/playwright-cli/references/storage-state.md +275 -0
package/dist/builtin/subagents/skills/playwright-cli/references/test-generation.md +134 -0
package/dist/builtin/subagents/skills/playwright-cli/references/tracing.md +139 -0
package/dist/builtin/subagents/skills/playwright-cli/references/video-recording.md +143 -0
package/dist/builtin/web-access/CHANGELOG.md +1 -1
package/dist/builtin/web-access/package.json +2 -2
package/dist/builtin/workflows/CHANGELOG.md +7 -1
package/dist/builtin/workflows/README.md +4 -4
package/dist/builtin/workflows/builtin/open-claude-design.ts +59 -56
package/dist/builtin/workflows/builtin/ralph.ts +56 -3
package/dist/builtin/workflows/builtin/shared-prompts.ts +1 -1
package/dist/builtin/workflows/package.json +2 -2
package/dist/builtin/workflows/skills/research-codebase/SKILL.md +1 -1
package/dist/cli/args.d.ts.map +1 -1
package/dist/cli/args.js +1 -1
package/dist/cli/args.js.map +1 -1
package/dist/core/agent-session.d.ts +1 -0
package/dist/core/agent-session.d.ts.map +1 -1
package/dist/core/agent-session.js +38 -18
package/dist/core/agent-session.js.map +1 -1
package/dist/core/context-window.d.ts +11 -1
package/dist/core/context-window.d.ts.map +1 -1
package/dist/core/context-window.js +19 -6
package/dist/core/context-window.js.map +1 -1
package/dist/core/copilot-model-catalog.d.ts +19 -16
package/dist/core/copilot-model-catalog.d.ts.map +1 -1
package/dist/core/copilot-model-catalog.js +14 -11
package/dist/core/copilot-model-catalog.js.map +1 -1
package/dist/core/project-trust.d.ts.map +1 -1
package/dist/core/project-trust.js +2 -1
package/dist/core/project-trust.js.map +1 -1
package/dist/core/sdk.d.ts.map +1 -1
package/dist/core/sdk.js +18 -7
package/dist/core/sdk.js.map +1 -1
package/dist/core/settings-manager.d.ts +11 -2
package/dist/core/settings-manager.d.ts.map +1 -1
package/dist/core/settings-manager.js +62 -8
package/dist/core/settings-manager.js.map +1 -1
package/dist/core/system-prompt.d.ts.map +1 -1
package/dist/core/system-prompt.js +1 -0
package/dist/core/system-prompt.js.map +1 -1
package/dist/core/tools/edit-diff.d.ts +1 -2
package/dist/core/tools/edit-diff.d.ts.map +1 -1
package/dist/core/tools/edit-diff.js +1 -2
package/dist/core/tools/edit-diff.js.map +1 -1
package/dist/index.d.ts +2 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1 -0
package/dist/index.js.map +1 -1
package/dist/modes/interactive/components/config-selector.d.ts.map +1 -1
package/dist/modes/interactive/components/config-selector.js +5 -7
package/dist/modes/interactive/components/config-selector.js.map +1 -1
package/dist/modes/interactive/components/model-selector.d.ts.map +1 -1
package/dist/modes/interactive/components/model-selector.js +2 -1
package/dist/modes/interactive/components/model-selector.js.map +1 -1
package/dist/modes/interactive/components/scoped-models-selector.d.ts.map +1 -1
package/dist/modes/interactive/components/scoped-models-selector.js +4 -1
package/dist/modes/interactive/components/scoped-models-selector.js.map +1 -1
package/dist/modes/interactive/components/settings-selector.d.ts +2 -0
package/dist/modes/interactive/components/settings-selector.d.ts.map +1 -1
package/dist/modes/interactive/components/settings-selector.js +165 -15
package/dist/modes/interactive/components/settings-selector.js.map +1 -1
package/dist/modes/interactive/components/tree-selector.d.ts.map +1 -1
package/dist/modes/interactive/components/tree-selector.js +44 -4
package/dist/modes/interactive/components/tree-selector.js.map +1 -1
package/dist/modes/interactive/interactive-mode.d.ts +1 -1
package/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
package/dist/modes/interactive/interactive-mode.js +24 -54
package/dist/modes/interactive/interactive-mode.js.map +1 -1
package/dist/modes/interactive/model-search.d.ts +7 -0
package/dist/modes/interactive/model-search.d.ts.map +1 -0
package/dist/modes/interactive/model-search.js +6 -0
package/dist/modes/interactive/model-search.js.map +1 -0
package/dist/modes/interactive/theme/theme-controller.d.ts +30 -0
package/dist/modes/interactive/theme/theme-controller.d.ts.map +1 -0
package/dist/modes/interactive/theme/theme-controller.js +108 -0
package/dist/modes/interactive/theme/theme-controller.js.map +1 -0
package/dist/modes/interactive/theme/theme-schema.json +2 -1
package/dist/modes/interactive/theme/theme.d.ts +5 -0
package/dist/modes/interactive/theme/theme.d.ts.map +1 -1
package/dist/modes/interactive/theme/theme.js +70 -29
package/dist/modes/interactive/theme/theme.js.map +1 -1
package/dist/modes/rpc/rpc-client.d.ts +1 -1
package/dist/modes/rpc/rpc-client.d.ts.map +1 -1
package/dist/modes/rpc/rpc-client.js +1 -1
package/dist/modes/rpc/rpc-client.js.map +1 -1
package/dist/modes/rpc/rpc-mode.d.ts.map +1 -1
package/dist/modes/rpc/rpc-mode.js +1 -1
package/dist/modes/rpc/rpc-mode.js.map +1 -1
package/dist/package-manager-cli.d.ts.map +1 -1
package/dist/package-manager-cli.js +39 -9
package/dist/package-manager-cli.js.map +1 -1
package/docs/extensions.md +21 -0
package/docs/models.md +3 -3
package/docs/packages.md +13 -9
package/docs/providers.md +2 -2
package/docs/quickstart.md +14 -0
package/docs/rpc.md +3 -3
package/docs/sdk.md +15 -11
package/docs/session-format.md +1 -1
package/docs/settings.md +8 -3
package/docs/themes.md +3 -1
package/docs/tui.md +1 -1
package/docs/usage.md +12 -9
package/docs/workflows.md +9 -7
package/examples/extensions/custom-provider-anthropic/package-lock.json +2 -2
package/examples/extensions/custom-provider-anthropic/package.json +1 -1
package/examples/extensions/custom-provider-gitlab-duo/package.json +1 -1
package/examples/extensions/gondolin/package-lock.json +2 -2
package/examples/extensions/gondolin/package.json +1 -1
package/examples/extensions/preset.ts +10 -4
package/examples/extensions/provider-payload.ts +5 -5
package/examples/extensions/sandbox/index.ts +2 -2
package/examples/extensions/sandbox/package-lock.json +3 -3
package/examples/extensions/sandbox/package.json +2 -2
package/examples/extensions/subagent/agents.ts +2 -2
package/examples/extensions/subagent/index.ts +4 -2
package/examples/extensions/with-deps/package-lock.json +2 -2
package/examples/extensions/with-deps/package.json +1 -1
package/package.json +5 -5
package/dist/builtin/subagents/skills/browser/EXAMPLES.md +0 -151
package/dist/builtin/subagents/skills/browser/LICENSE.txt +0 -21
package/dist/builtin/subagents/skills/browser/REFERENCE.md +0 -451
package/dist/builtin/subagents/skills/browser/SKILL.md +0 -170

package/dist/builtin/workflows/builtin/open-claude-design.ts CHANGED Viewed

@@ -21,12 +21,13 @@
  *
  * The refinement loop has been re-shaped so that the artifact under review is
  * a real HTML page on disk (`preview.html`). The workflow attempts to open it
- * through the `browser` skill so the user can interactively review;
+ * through the `playwright-cli` skill so the user can interactively review;
  * when browser automation is unavailable, the file path is surfaced so the user
  * can open it manually. Before any stage runs, an initial deterministic setup
- * step ensures the browser skill's `browse` CLI is available (`which browse`,
- * then `npm install -g browse` when missing); it is best-effort and never
- * blocks the run. The final exporter produces a rich `spec.html` that
+ * step ensures the playwright-cli skill's `playwright-cli` command is available
+ * (`npx --no-install playwright-cli --version`, then
+ * `npm install -g @playwright/cli@latest` when missing); it is best-effort and
+ * never blocks the run. The final exporter produces a rich `spec.html` that
  * embeds the agreed-upon design alongside the implementation handoff.
  */
@@ -222,12 +223,12 @@ const ANTI_SLOP_RULES = [
   "Commit to a specific aesthetic direction; do not hedge with generic SaaS defaults.",
 ].join("\n");
-type BrowseCliStatus = {
-  /** Whether the `browse` CLI is expected to be available to downstream stages. */
+type PlaywrightCliStatus = {
+  /** Whether the `playwright-cli` command is expected to be available to downstream stages. */
   readonly available: boolean;
-  /** True when the CLI was already on PATH and no install was attempted. */
+  /** True when the command was already on PATH and no install was attempted. */
   readonly alreadyPresent: boolean;
-  /** True when this step installed the CLI via `npm install -g browse`. */
+  /** True when this step installed the command via `npm install -g @playwright/cli@latest`. */
   readonly installed: boolean;
   /** Human-readable, single-line outcome surfaced as a workflow output. */
   readonly summary: string;
@@ -236,23 +237,24 @@ type BrowseCliStatus = {
 };
 /**
- * Initial deterministic setup step (no LLM): ensure the browser skill's `browse`
- * CLI is available before any design stage runs. Mirrors the browser skill's
- * documented bootstrap (`which browse || npm install -g browse`) but performs it
- * once, deterministically, instead of relying on each stage to probe/install it.
+ * Initial deterministic setup step (no LLM): ensure the playwright-cli skill's
+ * `playwright-cli` command is available before any design stage runs. Mirrors the
+ * playwright-cli skill's documented bootstrap (`npx --no-install playwright-cli
+ * --version` || `npm install -g @playwright/cli@latest`) but performs it once,
+ * deterministically, instead of relying on each stage to probe/install it.
  * The PATH probe always runs, but the actual global install is skipped under
  * automated tests (`NODE_ENV=test`) to avoid slow, networked, environment-
  * mutating side effects.
  *
  * Best-effort by contract: it never throws and never blocks the workflow. When
- * the CLI cannot be located or installed, downstream stages keep their graceful
+ * the command cannot be located or installed, downstream stages keep their graceful
  * degradation path (surface the manual preview path / URL).
  */
-function ensureBrowseCli(): BrowseCliStatus {
+function ensurePlaywrightCli(): PlaywrightCliStatus {
   const isWindows = process.platform === "win32";
   const onPath = (): boolean => {
     try {
-      const probe = spawnSync(isWindows ? "where" : "which", ["browse"], {
+      const probe = spawnSync(isWindows ? "where" : "which", ["playwright-cli"], {
         stdio: "ignore",
         timeout: 15_000,
         shell: isWindows,
@@ -268,7 +270,7 @@ function ensureBrowseCli(): BrowseCliStatus {
       available: true,
       alreadyPresent: true,
       installed: false,
-      summary: "browse CLI already on PATH; skipped install.",
+      summary: "playwright-cli already on PATH; skipped install.",
     };
   }
@@ -282,13 +284,13 @@ function ensureBrowseCli(): BrowseCliStatus {
       alreadyPresent: false,
       installed: false,
       summary:
-        "browse CLI not found; skipped global install under the test environment.",
+        "playwright-cli not found; skipped global install under the test environment.",
       error: "global install skipped during tests",
     };
   }
   try {
-    const install = spawnSync("npm", ["install", "-g", "browse"], {
+    const install = spawnSync("npm", ["install", "-g", "@playwright/cli@latest"], {
       stdio: "ignore",
       timeout: 180_000,
       shell: isWindows,
@@ -298,19 +300,19 @@ function ensureBrowseCli(): BrowseCliStatus {
         available: true,
         alreadyPresent: false,
         installed: true,
-        summary: "Installed browse CLI via `npm install -g browse`.",
+        summary: "Installed playwright-cli via `npm install -g @playwright/cli@latest`.",
       };
     }
     const reason =
       install.error?.message ??
       (typeof install.status === "number"
-        ? `npm install -g browse exited with code ${install.status}`
-        : "npm install -g browse did not complete");
+        ? `npm install -g @playwright/cli@latest exited with code ${install.status}`
+        : "npm install -g @playwright/cli@latest did not complete");
     return {
       available: false,
       alreadyPresent: false,
       installed: false,
-      summary: `Could not install browse CLI (${reason}); stages will degrade gracefully.`,
+      summary: `Could not install playwright-cli (${reason}); stages will degrade gracefully.`,
       error: reason,
     };
   } catch (error) {
@@ -320,7 +322,7 @@ function ensureBrowseCli(): BrowseCliStatus {
       available: false,
       alreadyPresent: false,
       installed: false,
-      summary: `Could not install browse CLI (${reason}); stages will degrade gracefully.`,
+      summary: `Could not install playwright-cli (${reason}); stages will degrade gracefully.`,
       error: reason,
     };
   }
@@ -328,24 +330,25 @@ function ensureBrowseCli(): BrowseCliStatus {
 /**
  * Build the per-run browser bootstrap guidance injected into stage prompts.
- * When the deterministic setup step already ensured `browse` is installed, the
- * guidance tells stages to assume availability and not waste turns reinstalling;
- * otherwise it retains the original probe-and-install fallback.
+ * When the deterministic setup step already ensured `playwright-cli` is installed,
+ * the guidance tells stages to assume availability and not waste turns
+ * reinstalling; otherwise it retains the original probe-and-install fallback.
  */
-function buildBrowserBootstrapRules(status: BrowseCliStatus): string {
+function buildPlaywrightCliBootstrapRules(status: PlaywrightCliStatus): string {
   const probeRule = status.available
-    ? "The workflow's deterministic setup step already ensured the browser skill's `browse` CLI is installed and on PATH; assume it is available and do NOT reinstall it. Only if a `browse` command reports the executable as missing should you re-probe with `which browse` and run `npm install -g browse` once before retrying. Do not add project dependencies."
-    : `The workflow's deterministic setup step attempted to install the browser skill's \`browse\` CLI but it FAILED with: "${status.error ?? "unknown error"}". Treat this as a known starting condition to work around, not a hard blocker. Probe with \`which browse\` and retry once with \`npm install -g browse\`; if it still fails, use the error above to diagnose a workaround (for example: EACCES/permission errors → retry with a user-writable global prefix; missing npm/Node → report it plainly; network/registry errors → surface them). If the CLI still cannot be made available, degrade gracefully and surface the manual file path / URL. Do not add project dependencies.`;
+    ? "The workflow's deterministic setup step already ensured the playwright-cli skill's `playwright-cli` command is installed and on PATH; assume it is available and do NOT reinstall it. Only if a `playwright-cli` command reports it is missing should you re-probe with `which playwright-cli` (or `npx --no-install playwright-cli --version`) and run `npm install -g @playwright/cli@latest` once before retrying. Do not add project dependencies."
+    : `The workflow's deterministic setup step attempted to install the playwright-cli skill's \`playwright-cli\` command but it FAILED with: "${status.error ?? "unknown error"}". Treat this as a known starting condition to work around, not a hard blocker. Probe with \`which playwright-cli\` (or \`npx --no-install playwright-cli --version\`) and retry once with \`npm install -g @playwright/cli@latest\`; if it still fails, use the error above to diagnose a workaround (for example: EACCES/permission errors → retry with a user-writable global prefix; missing npm/Node → report it plainly; network/registry errors → surface them). If the command still cannot be made available, degrade gracefully and surface the manual file path / URL. Do not add project dependencies.`;
   return [
     probeRule,
-    "Use `browse open <url> --local --headed` when a generated local preview should be visible to the user, and use `browse snapshot` plus `browse screenshot --path <file>` for review evidence.",
-    "If `browse` is unavailable after three attempts or the browser runtime still fails, degrade gracefully and surface the manual file path / URL.",
+    "Use `playwright-cli open <url>` when a generated local preview should be visible to the user, and use `playwright-cli snapshot` plus `playwright-cli screenshot --filename=<file>` for review evidence.",
+    "If a `playwright-cli` command reports a missing browser executable, install the browser once with `npx playwright install chromium` and retry.",
+    "If `playwright-cli` is unavailable after three attempts or the browser runtime still fails, degrade gracefully and surface the manual file path / URL.",
   ].join("\n");
 }
 export default defineWorkflow("open-claude-design")
   .description(
-    "AI-powered design workflow: design-system onboarding → reference import → HTML generation → impeccable-driven refinement → quality gate → rich HTML handoff. Each stage delegates to a specific impeccable sub-skill; the user can iteratively review the generated HTML through the browser skill.",
+    "AI-powered design workflow: design-system onboarding → reference import → HTML generation → impeccable-driven refinement → quality gate → rich HTML handoff. Each stage delegates to a specific impeccable sub-skill; the user can iteratively review the generated HTML through the playwright-cli skill.",
   )
   .input("prompt", Type.String({
     description: "What to design (for example, a dashboard, page, component, or prototype).",
@@ -378,14 +381,14 @@ export default defineWorkflow("open-claude-design")
   .output("preview_file_url", Type.Optional(Type.String({ description: "file:// URL for the generated preview.html file." })))
   .output("spec_path", Type.Optional(Type.String({ description: "Absolute path to the generated spec.html file." })))
   .output("spec_file_url", Type.Optional(Type.String({ description: "file:// URL for the generated spec.html file." })))
-  .output("browse_cli_status", Type.Optional(Type.String({ description: "Outcome of the initial deterministic step that ensures the browser skill's `browse` CLI is installed." })))
+  .output("playwright_cli_status", Type.Optional(Type.String({ description: "Outcome of the initial deterministic step that ensures the playwright-cli skill's `playwright-cli` command is installed." })))
   .run(async (ctx) => {
-    // Initial deterministic setup step (no LLM): ensure the browser skill's
-    // `browse` CLI is installed before any design stage runs. Best-effort —
+    // Initial deterministic setup step (no LLM): ensure the playwright-cli skill's
+    // `playwright-cli` command is installed before any design stage runs. Best-effort —
     // a failed install never blocks the workflow; downstream stages keep their
     // graceful-degradation fallback (surface the manual preview path / URL).
-    const browseCli = ensureBrowseCli();
-    const browserBootstrapRules = buildBrowserBootstrapRules(browseCli);
+    const playwrightCli = ensurePlaywrightCli();
+    const browserBootstrapRules = buildPlaywrightCliBootstrapRules(playwrightCli);
     const inputs = ctx.inputs;
@@ -626,8 +629,8 @@ export default defineWorkflow("open-claude-design")
           [
             "instructions",
             [
-              "1. Use browser/screenshot tooling (for example the browser skill's `browse` CLI) if available; cite observable evidence rather than guessing.",
-              "2. If `browse` is available but opening the reference URL reports a missing browser executable, follow the bootstrap rules and retry once.",
+              "1. Use browser/screenshot tooling (for example the playwright-cli skill's `playwright-cli` command) if available; cite observable evidence rather than guessing.",
+              "2. If `playwright-cli` is available but opening the reference URL reports a missing browser executable, follow the bootstrap rules and retry once.",
               "3. Analyze: layout, visual hierarchy, navigation, color, typography, spacing, states, interactions, responsive behavior.",
               "4. Separate reference-specific styling from requirements that should transfer to this project's design system.",
               "5. If the URL is inaccessible or browser bootstrap fails, state that and provide a best-effort fallback based only on available information — never fabricate observations.",
@@ -738,7 +741,7 @@ export default defineWorkflow("open-claude-design")
           ],
           [
             "objective",
-            "Your job is to make the just-generated HTML artifact visible to the user so they can give feedback. Open the HTML preview file using the browser skill's `browse` CLI when available, then prompt the user for feedback. Gracefully degrade if browser automation is unavailable.",
+            "Your job is to make the just-generated HTML artifact visible to the user so they can give feedback. Open the HTML preview file using the playwright-cli skill's `playwright-cli` command when available, then prompt the user for feedback. Gracefully degrade if browser automation is unavailable.",
           ],
           ["preview_path", previewPath],
           ["preview_file_url", previewFileUrl],
@@ -746,11 +749,11 @@ export default defineWorkflow("open-claude-design")
           [
             "instructions",
             [
-              "1. Probe for `browse` availability using the bootstrap rules above.",
-              `2. If available, run: \`browse open ${previewFileUrl} --local --headed\`. If that reports a missing browser executable, follow the bootstrap rules and retry once.`,
-              "3. Then run `browse snapshot` and use any available annotation/review flow from the active browser environment; if none exists, ask the user to review the visible page or manual file path and provide notes inline.",
+              "1. Probe for `playwright-cli` availability using the bootstrap rules above.",
+              `2. If available, run: \`playwright-cli open ${previewFileUrl}\`. If that reports a missing browser executable, follow the bootstrap rules and retry once.`,
+              "3. Then run `playwright-cli snapshot` and, for interactive review, `playwright-cli show --annotate` so the user can draw on the page and add notes; if interactive review is unavailable, ask the user to review the visible page or manual file path and provide notes inline.",
               "4. Capture any annotation artifact path, screenshot path, or user notes and surface them in your output.",
-              `5. If \`browse\` is NOT available or browser bootstrap fails, print a clear instruction block telling the user to open the file manually at: ${previewPath} (or via the URL ${previewFileUrl}).`,
+              `5. If \`playwright-cli\` is NOT available or browser bootstrap fails, print a clear instruction block telling the user to open the file manually at: ${previewPath} (or via the URL ${previewFileUrl}).`,
               "6. Never block the workflow on unavailable tooling; always exit with a non-empty status string.",
             ].join("\n"),
           ],
@@ -869,10 +872,10 @@ export default defineWorkflow("open-claude-design")
               [
                 "instructions",
                 [
-                  `1. Attempt rendering verification via the browser skill: \`browse open ${previewFileUrl} --local\`. If that reports a missing browser executable, follow the bootstrap rules and retry once.`,
-                  `2. Then run \`browse viewport 360 800\`, \`browse screenshot --path ${join(artifactDir, `mobile-${iteration}.png`)}\`, \`browse viewport 1440 900\`, \`browse screenshot --path ${join(artifactDir, `desktop-${iteration}.png`)}\`.`,
+                  `1. Attempt rendering verification via the playwright-cli skill: \`playwright-cli open ${previewFileUrl}\`. If that reports a missing browser executable, follow the bootstrap rules and retry once.`,
+                  `2. Then run \`playwright-cli resize 360 800\`, \`playwright-cli screenshot --filename=${join(artifactDir, `mobile-${iteration}.png`)}\`, \`playwright-cli resize 1440 900\`, \`playwright-cli screenshot --filename=${join(artifactDir, `desktop-${iteration}.png`)}\`.`,
                   "3. Check: contrast (WCAG AA), overflow, spacing rhythm, alignment, breakpoint behavior, empty/loading/error states, keyboard/pointer affordances, focus rings, prefers-reduced-motion.",
-                  "4. If `browse` is unavailable or browser bootstrap fails, perform a static design review of the HTML source and mark every finding as `needs-rendering-verification`.",
+                  "4. If `playwright-cli` is unavailable or browser bootstrap fails, perform a static design review of the HTML source and mark every finding as `needs-rendering-verification`.",
                   "5. Distinguish confirmed visual issues from risks that need rendering verification. Never fabricate rendered evidence.",
                 ].join("\n"),
               ],
@@ -956,9 +959,9 @@ export default defineWorkflow("open-claude-design")
             [
               "instructions",
               [
-                `1. If \`browse\` is available, run \`browse open ${previewFileUrl} --local --headed\`. If that reports a missing browser executable, follow the bootstrap rules and retry once.`,
-                "2. Then run `browse snapshot` and use any available annotation/review flow from the active browser environment; otherwise ask the user to provide feedback inline.",
-                `3. If \`browse\` is unavailable or browser bootstrap fails, surface the path clearly: ${previewPath} (URL: ${previewFileUrl}).`,
+                `1. If \`playwright-cli\` is available, run \`playwright-cli open ${previewFileUrl}\`. If that reports a missing browser executable, follow the bootstrap rules and retry once.`,
+                "2. Then run `playwright-cli snapshot` and, for interactive review, `playwright-cli show --annotate`; otherwise ask the user to provide feedback inline.",
+                `3. If \`playwright-cli\` is unavailable or browser bootstrap fails, surface the path clearly: ${previewPath} (URL: ${previewFileUrl}).`,
                 "4. Return any captured annotations as structured notes the next user-feedback step can read.",
                 "5. Do not block on unavailable tooling.",
               ].join("\n"),
@@ -1083,7 +1086,7 @@ export default defineWorkflow("open-claude-design")
             "Return markdown with headings (NOT the HTML):",
             "1. Spec written to (absolute path)",
             "2. Sections included",
-            "3. How to open the spec (browse command + manual fallback path)",
+            "3. How to open the spec (playwright-cli command + manual fallback path)",
             "4. Recommended files and components",
             "5. Implementation steps",
             "6. Usage example",
@@ -1107,7 +1110,7 @@ export default defineWorkflow("open-claude-design")
           ],
           [
             "objective",
-            "Make the rich HTML spec visible to the user. Open the final spec.html with the browser skill's `browse` CLI so the user can review the agreed design and implementation handoff. Degrade gracefully if browser automation is unavailable.",
+            "Make the rich HTML spec visible to the user. Open the final spec.html with the playwright-cli skill's `playwright-cli` command so the user can review the agreed design and implementation handoff. Degrade gracefully if browser automation is unavailable.",
           ],
           ["spec_path", specPath],
           ["spec_file_url", specFileUrl],
@@ -1117,9 +1120,9 @@ export default defineWorkflow("open-claude-design")
           [
             "instructions",
             [
-              "1. Probe for `browse` availability using the bootstrap rules above.",
-              `2. If available, run \`browse open ${specFileUrl} --local --headed\`. If that reports a missing browser executable, follow the bootstrap rules and retry once.`,
-              "3. Then run `browse snapshot` and use any available annotation/review flow from the active browser environment so the user can capture any final notes.",
+              "1. Probe for `playwright-cli` availability using the bootstrap rules above.",
+              `2. If available, run \`playwright-cli open ${specFileUrl}\`. If that reports a missing browser executable, follow the bootstrap rules and retry once.`,
+              "3. Then run `playwright-cli snapshot` and, for interactive review, `playwright-cli show --annotate` so the user can capture any final notes.",
               `4. Always print, prominently, the absolute paths so the user can open them manually:\n   - Final spec: ${specPath}\n   - Approved preview: ${previewPath}`,
               "5. Do not block the workflow; return a structured summary even if no tooling worked.",
             ].join("\n"),
@@ -1147,7 +1150,7 @@ export default defineWorkflow("open-claude-design")
       preview_file_url: previewFileUrl,
       spec_path: specPath,
       spec_file_url: specFileUrl,
-      browse_cli_status: browseCli.summary,
+      playwright_cli_status: playwrightCli.summary,
     };
   })
   .compile();

package/dist/builtin/workflows/builtin/ralph.ts CHANGED Viewed

@@ -7,6 +7,7 @@
  * findings into the next research pass with ctx.task().
  */
+import { existsSync } from "node:fs";
 import { mkdir, mkdtemp, writeFile } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { dirname, join, resolve } from "node:path";
@@ -21,6 +22,7 @@ import { E2E_VERIFICATION_GUIDANCE, WORKER_PREFLIGHT_CONTRACT } from "./shared-p
 const DEFAULT_MAX_LOOPS = 10;
 const DEFAULT_RESEARCH_DIR = "research";
 const IMPLEMENTATION_NOTES_FILENAME = "implementation-notes.md";
+const QA_E2E_VIDEO_FILENAME = "qa-e2e-evidence.webm";
 const MAX_RESEARCH_SLUG_LENGTH = 80;
 // Reviewer fan-out launches three independent reviewers; the loop stops only when
 // all three reviewers independently approve (find no issues). Requiring unanimous
@@ -194,6 +196,26 @@ async function createImplementationNotesFile(prompt: string): Promise<string> {
   return notesPath;
 }
+// Stable absolute path the orchestrator records the QA end-to-end proof video to.
+// The directory is created up front so `playwright-cli video-start <path>` can
+// write to it; the video file itself is produced by the orchestrator's QA pass
+// (and overwritten each iteration so it always reflects the latest state). The
+// final pull-request stage attaches it when it exists.
+async function createQaEvidenceVideoPath(): Promise<string> {
+  const qaDir = await mkdtemp(join(tmpdir(), "atomic-ralph-qa-"));
+  return join(qaDir, QA_E2E_VIDEO_FILENAME);
+}
+function renderQaE2eVideoGuidance(qaVideoPath: string): string {
+  return [
+    "QA the change end-to-end whenever it touches user-visible UI behavior, including full-stack changes whose UI correctness depends on backend/API behavior. Use the `playwright-cli` skill (or delegate to a subagent with `skill: \"playwright-cli\"`) to drive the running application like a user and prove the implemented scenario actually works.",
+    `Record that QA E2E pass as a reviewable video so the user can watch the feature working. After \`playwright-cli open\`, start recording with \`playwright-cli video-start ${qaVideoPath}\`, annotate the scenario with \`playwright-cli video-chapter\` / \`playwright-cli video-show-actions\`, exercise the full user scenario, then \`playwright-cli video-stop\`. Write the video to exactly this path and overwrite any prior recording so it always reflects the latest implemented state: ${qaVideoPath}`,
+    `After recording, add the video to the implementation notes as a reference: include a \`## QA E2E Video\` entry with the absolute path ${qaVideoPath} and a one-line description of the proven scenario, so the user can review the proof when this stage finishes.`,
+    "If the change has no user-visible UI scenario (pure refactor, docs, infra, or non-UI library code), do not fabricate a video; record in the implementation notes that no QA E2E video applies and why.",
+    "If `playwright-cli` or a browser runtime is unavailable, install it once per the skill (`npm install -g @playwright/cli@latest`, then `npx playwright install chromium` for a missing browser executable). If it still cannot run, record the smallest validation actually performed and note that the QA E2E video could not be produced — never claim a video exists when it does not.",
+  ].join("\n");
+}
 function reviewDecisionFromResult(result: WorkflowTaskResult): ReviewDecision | undefined {
   return result.structured as ReviewDecision | undefined;
 }
@@ -360,6 +382,7 @@ function renderForkedOrchestratorPrompt(args: {
   readonly workflowCwdContext: PromptSection;
   readonly researchPath: string;
   readonly implementationNotesPath: string;
+  readonly qaVideoPath: string;
 }): string {
   return taggedPrompt([
     [
@@ -385,6 +408,7 @@ function renderForkedOrchestratorPrompt(args: {
       ].join("\n"),
     ],
     ["e2e_verification", E2E_VERIFICATION_GUIDANCE],
+    ["qa_e2e_video", renderQaE2eVideoGuidance(args.qaVideoPath)],
     [
       "output_format",
       [
@@ -396,6 +420,7 @@ function renderForkedOrchestratorPrompt(args: {
         "5. Validation run / recommended",
         "6. Deferred work or blockers",
         "7. Implementation notes — confirm the OS temp notes path was updated",
+        "8. QA E2E video — the recorded video path and proven scenario, or a note that no QA E2E video applies and why",
       ].join("\n"),
     ],
   ]);
@@ -424,6 +449,7 @@ type RalphWorkflowResult = {
   readonly research: string;
   readonly research_path: string;
   readonly implementation_notes_path: string;
+  readonly qa_video_path?: string;
   readonly pr_report?: string;
   readonly approved: boolean;
   readonly iterations_completed: number;
@@ -455,6 +481,7 @@ async function runRalphWorkflow(
   // worktree cwd so research stage writes land in the same checkout.
   const workflowResearchPath = resolve(workflowStartCwd, defaultResearchPath(prompt));
   const implementationNotesPath = await createImplementationNotesFile(prompt);
+  const qaVideoPath = await createQaEvidenceVideoPath();
   const artifactDir = await mkdtemp(join(tmpdir(), "atomic-ralph-run-"));
   const workflowCwdContext = workflowCwdContextSection(workflowStartCwd);
   let approved = false;
@@ -612,6 +639,7 @@ async function runRalphWorkflow(
         ],
         ["project_setup", WORKER_PREFLIGHT_CONTRACT],
         ["e2e_verification", E2E_VERIFICATION_GUIDANCE],
+        ["qa_e2e_video", renderQaE2eVideoGuidance(qaVideoPath)],
         [
           "orchestration_guidance",
           [
@@ -654,7 +682,8 @@ async function runRalphWorkflow(
             "Pass each subagent the relevant task, constraints, files, validation expectations, unresolved reviewer findings covered by the research, and instructions to report implementation-note-worthy decisions or tradeoffs.",
             "Coordinate subagent results into the smallest coherent set of changes that satisfies the researched implementation guidance and original user prompt.",
             "Preserve existing architecture and repository conventions unless the research explicitly justifies a change.",
-            "Run or delegate the most relevant validation commands available in the repository, including end-to-end browser or tmux validation when the change has an executable user scenario.",
+            "Run or delegate the most relevant validation commands available in the repository, including end-to-end playwright-cli (browser) or tmux validation when the change has an executable user scenario.",
+            "For UI-applicable or full-stack changes, ensure the QA E2E pass described in <qa_e2e_video> runs and records the reviewable proof video before you finalize this iteration.",
             `Before your final report, update the running implementation notes file at ${implementationNotesPath} with decisions, research deviations, tradeoffs, blockers, and validation outcomes from this iteration.`,
             "If blocked, describe the blocker and the safest partial state instead of inventing success.",
             "Do not hide failures; reviewers need accurate status.",
@@ -671,6 +700,7 @@ async function runRalphWorkflow(
             "5. Validation run / recommended",
             "6. Deferred work or blockers",
             "7. Implementation notes — confirm the OS temp notes path was updated",
+            "8. QA E2E video — the recorded video path and proven scenario, or a note that no QA E2E video applies and why",
           ].join("\n"),
         ],
       ])
@@ -681,6 +711,7 @@ async function runRalphWorkflow(
           workflowCwdContext,
           researchPath,
           implementationNotesPath,
+          qaVideoPath,
         });
     const orchestrator = await ctx.task(`orchestrator-${iteration}`, {
       prompt: orchestratorPrompt,
@@ -735,7 +766,7 @@ async function runRalphWorkflow(
         "validation_expectations",
         [
           "Inspect the actual diff/repository state rather than trusting stage summaries.",
-          "Run or delegate focused validation when it is necessary to distinguish a real bug from a hunch, including end-to-end browser or tmux validation when a user scenario can prove the outcome.",
+          "Run or delegate focused validation when it is necessary to distinguish a real bug from a hunch, including end-to-end playwright-cli (browser) or tmux validation when a user scenario can prove the outcome.",
           "If tests or typechecks fail because dependencies are missing, install/download the missing dependencies with the repo's documented package manager instead of bypassing the check.",
           "If validation cannot be completed after reasonable recovery, record the limitation in overall_explanation and reviewer_error; do not use missing dependencies as a reason to approve.",
         ].join("\n"),
@@ -790,7 +821,7 @@ async function runRalphWorkflow(
         [
           "1. Identify the changed files or diff under review.",
           "2. Read the relevant changed code and directly affected call sites/tests/configs.",
-          "3. Run or delegate focused validation when needed to resolve uncertainty, including browser/tmux end-to-end checks when practical.",
+          "3. Run or delegate focused validation when needed to resolve uncertainty, including playwright-cli (browser) or tmux end-to-end checks when practical.",
           "4. If you cannot inspect or validate enough to approve safely, populate reviewer_error and set stop_review_loop=false.",
         ].join("\n"),
       ],
@@ -888,6 +919,10 @@ async function runRalphWorkflow(
     if (approved) break;
   }
+  // The orchestrator writes the QA end-to-end proof video to this stable path
+  // when the change has a UI-applicable scenario; the final PR stage attaches it.
+  const qaVideoAvailable = existsSync(qaVideoPath);
   if (createPr === true) {
     const prResult = await ctx.task("pull-request", {
       prompt: taggedPrompt([
@@ -914,6 +949,21 @@ async function runRalphWorkflow(
             "If multiple accounts, hosts, or providers are available, use the remote URL and git config username/email as heuristics to choose the most likely identity, but try each available credential/account that can read the repository and create the provider-appropriate review request.",
           ].join("\n"),
         ],
+        [
+          "qa_video_attachment",
+          qaVideoAvailable
+            ? [
+                `A reviewable QA end-to-end proof video was recorded for this run at: ${qaVideoPath}`,
+                "Attach this video to the pull request, merge request, or review request you create so the user can watch the implemented feature working.",
+                "Prefer embedding or linking it in the PR/MR/review description. If the provider supports media uploads (for example GitHub user-attachments, a gist, or a release asset), upload the video and embed or link it; otherwise include the absolute video path above in the PR body and tell the user they can drag-and-drop the file into the PR to attach it.",
+                "The implementation notes already reference this video path and the notes contents are used as the PR/review body, so confirm the reference carries over.",
+                "Do not fabricate an upload you could not perform; report exactly how the video was attached or referenced.",
+              ].join("\n")
+            : [
+                "No QA end-to-end proof video was produced for this run (no UI-applicable scenario, or the browser runtime was unavailable).",
+                "Do not invent or attach a video. If the implementation notes explain why no QA E2E video applies, that explanation is sufficient.",
+              ].join("\n"),
+        ],
         [
           "pr_policy",
           [
@@ -936,6 +986,7 @@ async function runRalphWorkflow(
             "3. Implementation notes comment — whether the provider-appropriate comment was created as the last action, or why it could not be created",
             "4. Commands run — include exit status or clear outcome",
             "5. Follow-up for the user — exact next steps if credentials or repository state blocked PR creation",
+            "6. QA E2E video — how the proof video was attached or linked to the review request, or that no QA E2E video applies",
           ].join("\n"),
         ],
       ]),
@@ -956,6 +1007,7 @@ async function runRalphWorkflow(
     research: finalResearch,
     research_path: finalResearchPath,
     implementation_notes_path: implementationNotesPath,
+    ...(qaVideoAvailable ? { qa_video_path: qaVideoPath } : {}),
     ...(finalPrReport === undefined ? {} : { pr_report: finalPrReport }),
     approved,
     iterations_completed: iterationsCompleted,
@@ -997,6 +1049,7 @@ export default defineWorkflow("ralph")
   .output("research", Type.Optional(Type.String({ description: "Latest research report text or artifact reference." })))
   .output("research_path", Type.Optional(Type.String({ description: "Path to the latest generated research artifact under research/." })))
   .output("implementation_notes_path", Type.Optional(Type.String({ description: "OS-temp notes file containing decisions, deviations, blockers, and validation notes." })))
+  .output("qa_video_path", Type.Optional(Type.String({ description: "Absolute path to the reviewable QA end-to-end proof video recorded with playwright-cli for UI-applicable changes, when one was produced." })))
   .output("pr_report", Type.Optional(Type.String({ description: "Pull-request report emitted only when create_pr=true and the final pull-request stage runs." })))
   .output("approved", Type.Optional(Type.Boolean({ description: "Whether the reviewer loop approved before completion or optional final handoff." })))
   .output("iterations_completed", Type.Optional(Type.Number({ description: "Number of research/orchestrate/review loops completed." })))

package/dist/builtin/workflows/builtin/shared-prompts.ts CHANGED Viewed

@@ -12,7 +12,7 @@ export const WORKER_PREFLIGHT_CONTRACT = [
 export const E2E_VERIFICATION_GUIDANCE = [
   "Verify correctness end-to-end whenever practical for user-visible behavior; do not rely only on code inspection, unit tests, or stage summaries when an executable user scenario can prove the outcome.",
-  "For web or frontend flows — including frontend changes whose correctness depends on backend/API behavior — use the browser skill, or delegate to a subagent with `skill: \"browser\"`, to drive the application like a user and capture screenshot, DOM, or network evidence when that proves the objective.",
+  "For web or frontend flows — including frontend changes whose correctness depends on backend/API behavior — use the playwright-cli skill, or delegate to a subagent with `skill: \"playwright-cli\"`, to drive the application like a user and capture snapshot, screenshot, DOM, or network evidence when that proves the objective.",
   "For TUI or terminal-app flows, use the tmux skill, or delegate to a subagent with `skill: \"tmux\"`, to launch the app in an isolated tmux session, send keys, capture pane output, and simulate the scenario end to end.",
   "If end-to-end verification is not practical in this checkout, record what was attempted, the smallest missing prerequisite, and the narrower validation that was run instead; do not claim end-to-end proof when it was not performed.",
 ].join("\n");

package/dist/builtin/workflows/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@bastani/workflows",
-  "version": "0.8.31-alpha.1",
+  "version": "0.8.31-alpha.2",
   "private": true,
   "description": "Atomic extension for multi-stage workflow authoring and execution.",
   "contributors": [
@@ -83,7 +83,7 @@
   },
   "peerDependencies": {
     "@bastani/atomic": "*",
-    "@earendil-works/pi-tui": "^0.79.6"
+    "@earendil-works/pi-tui": "^0.79.7"
   },
   "peerDependenciesMeta": {
     "@bastani/atomic": {

package/dist/builtin/workflows/skills/research-codebase/SKILL.md CHANGED Viewed

@@ -62,7 +62,7 @@ The user's research question/request is: **$ARGUMENTS**
     **For online search:**
     - VERY IMPORTANT: In case you discover external libraries as dependencies, use the **codebase-online-researcher** agent for external documentation and resources
-        - The agent fetches live web content using the **browser** skill's `browse` CLI (or `npx browse` / `curl`). Instruct it to apply the token-efficient fetch order: (1) try `curl https://<site>/llms.txt` for an AI-friendly index (see [llmstxt.org](https://llmstxt.org/llms.txt)), (2) try `curl <url> -H "Accept: text/markdown"` to get pre-converted Markdown (supported on Cloudflare-hosted docs via [Markdown for Agents](https://developers.cloudflare.com/fundamentals/reference/markdown-for-agents/)), (3) fall back to HTML parsing via `browse`
+        - The agent fetches live web content using the **playwright-cli** skill's `playwright-cli` command (or `npx playwright-cli` / `curl`). Instruct it to apply the token-efficient fetch order: (1) try `curl https://<site>/llms.txt` for an AI-friendly index (see [llmstxt.org](https://llmstxt.org/llms.txt)), (2) try `curl <url> -H "Accept: text/markdown"` to get pre-converted Markdown (supported on Cloudflare-hosted docs via [Markdown for Agents](https://developers.cloudflare.com/fundamentals/reference/markdown-for-agents/)), (3) fall back to HTML parsing via `playwright-cli`
         - Instruct the agent to return LINKS with their findings and INCLUDE those links in the research document
         - The agent should persist reusable source documents under `research/web/<YYYY-MM-DD>-<kebab-case-topic>.md` (with frontmatter noting `source_url`, `fetched_at`, and `fetch_method`) so future research can reuse them without re-fetching
         - Output directory for the synthesized web research artifacts: `research/web/`: