@bastani/atomic 0.8.31-alpha.1 → 0.8.31-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. package/CHANGELOG.md +14 -3
  2. package/README.md +12 -10
  3. package/dist/builtin/cursor/CHANGELOG.md +1 -1
  4. package/dist/builtin/cursor/package.json +2 -2
  5. package/dist/builtin/intercom/CHANGELOG.md +1 -1
  6. package/dist/builtin/intercom/package.json +2 -2
  7. package/dist/builtin/mcp/CHANGELOG.md +1 -1
  8. package/dist/builtin/mcp/package.json +3 -3
  9. package/dist/builtin/subagents/CHANGELOG.md +10 -1
  10. package/dist/builtin/subagents/agents/codebase-online-researcher.md +8 -8
  11. package/dist/builtin/subagents/agents/debugger.md +6 -6
  12. package/dist/builtin/subagents/package.json +4 -4
  13. package/dist/builtin/subagents/skills/effective-liteparse/SKILL.md +118 -0
  14. package/dist/builtin/subagents/skills/effective-liteparse/scripts/search.py +128 -0
  15. package/dist/builtin/subagents/skills/playwright-cli/SKILL.md +404 -0
  16. package/dist/builtin/subagents/skills/playwright-cli/references/element-attributes.md +23 -0
  17. package/dist/builtin/subagents/skills/playwright-cli/references/playwright-tests.md +39 -0
  18. package/dist/builtin/subagents/skills/playwright-cli/references/request-mocking.md +87 -0
  19. package/dist/builtin/subagents/skills/playwright-cli/references/running-code.md +241 -0
  20. package/dist/builtin/subagents/skills/playwright-cli/references/session-management.md +225 -0
  21. package/dist/builtin/subagents/skills/playwright-cli/references/spec-driven-testing.md +305 -0
  22. package/dist/builtin/subagents/skills/playwright-cli/references/storage-state.md +275 -0
  23. package/dist/builtin/subagents/skills/playwright-cli/references/test-generation.md +134 -0
  24. package/dist/builtin/subagents/skills/playwright-cli/references/tracing.md +139 -0
  25. package/dist/builtin/subagents/skills/playwright-cli/references/video-recording.md +143 -0
  26. package/dist/builtin/web-access/CHANGELOG.md +1 -1
  27. package/dist/builtin/web-access/package.json +2 -2
  28. package/dist/builtin/workflows/CHANGELOG.md +7 -1
  29. package/dist/builtin/workflows/README.md +4 -4
  30. package/dist/builtin/workflows/builtin/open-claude-design.ts +59 -56
  31. package/dist/builtin/workflows/builtin/ralph.ts +56 -3
  32. package/dist/builtin/workflows/builtin/shared-prompts.ts +1 -1
  33. package/dist/builtin/workflows/package.json +2 -2
  34. package/dist/builtin/workflows/skills/research-codebase/SKILL.md +1 -1
  35. package/dist/cli/args.d.ts.map +1 -1
  36. package/dist/cli/args.js +1 -1
  37. package/dist/cli/args.js.map +1 -1
  38. package/dist/core/agent-session.d.ts +1 -0
  39. package/dist/core/agent-session.d.ts.map +1 -1
  40. package/dist/core/agent-session.js +38 -18
  41. package/dist/core/agent-session.js.map +1 -1
  42. package/dist/core/context-window.d.ts +11 -1
  43. package/dist/core/context-window.d.ts.map +1 -1
  44. package/dist/core/context-window.js +19 -6
  45. package/dist/core/context-window.js.map +1 -1
  46. package/dist/core/copilot-model-catalog.d.ts +19 -16
  47. package/dist/core/copilot-model-catalog.d.ts.map +1 -1
  48. package/dist/core/copilot-model-catalog.js +14 -11
  49. package/dist/core/copilot-model-catalog.js.map +1 -1
  50. package/dist/core/project-trust.d.ts.map +1 -1
  51. package/dist/core/project-trust.js +2 -1
  52. package/dist/core/project-trust.js.map +1 -1
  53. package/dist/core/sdk.d.ts.map +1 -1
  54. package/dist/core/sdk.js +18 -7
  55. package/dist/core/sdk.js.map +1 -1
  56. package/dist/core/settings-manager.d.ts +11 -2
  57. package/dist/core/settings-manager.d.ts.map +1 -1
  58. package/dist/core/settings-manager.js +62 -8
  59. package/dist/core/settings-manager.js.map +1 -1
  60. package/dist/core/system-prompt.d.ts.map +1 -1
  61. package/dist/core/system-prompt.js +1 -0
  62. package/dist/core/system-prompt.js.map +1 -1
  63. package/dist/core/tools/edit-diff.d.ts +1 -2
  64. package/dist/core/tools/edit-diff.d.ts.map +1 -1
  65. package/dist/core/tools/edit-diff.js +1 -2
  66. package/dist/core/tools/edit-diff.js.map +1 -1
  67. package/dist/index.d.ts +2 -1
  68. package/dist/index.d.ts.map +1 -1
  69. package/dist/index.js +1 -0
  70. package/dist/index.js.map +1 -1
  71. package/dist/modes/interactive/components/config-selector.d.ts.map +1 -1
  72. package/dist/modes/interactive/components/config-selector.js +5 -7
  73. package/dist/modes/interactive/components/config-selector.js.map +1 -1
  74. package/dist/modes/interactive/components/model-selector.d.ts.map +1 -1
  75. package/dist/modes/interactive/components/model-selector.js +2 -1
  76. package/dist/modes/interactive/components/model-selector.js.map +1 -1
  77. package/dist/modes/interactive/components/scoped-models-selector.d.ts.map +1 -1
  78. package/dist/modes/interactive/components/scoped-models-selector.js +4 -1
  79. package/dist/modes/interactive/components/scoped-models-selector.js.map +1 -1
  80. package/dist/modes/interactive/components/settings-selector.d.ts +2 -0
  81. package/dist/modes/interactive/components/settings-selector.d.ts.map +1 -1
  82. package/dist/modes/interactive/components/settings-selector.js +165 -15
  83. package/dist/modes/interactive/components/settings-selector.js.map +1 -1
  84. package/dist/modes/interactive/components/tree-selector.d.ts.map +1 -1
  85. package/dist/modes/interactive/components/tree-selector.js +44 -4
  86. package/dist/modes/interactive/components/tree-selector.js.map +1 -1
  87. package/dist/modes/interactive/interactive-mode.d.ts +1 -1
  88. package/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
  89. package/dist/modes/interactive/interactive-mode.js +24 -54
  90. package/dist/modes/interactive/interactive-mode.js.map +1 -1
  91. package/dist/modes/interactive/model-search.d.ts +7 -0
  92. package/dist/modes/interactive/model-search.d.ts.map +1 -0
  93. package/dist/modes/interactive/model-search.js +6 -0
  94. package/dist/modes/interactive/model-search.js.map +1 -0
  95. package/dist/modes/interactive/theme/theme-controller.d.ts +30 -0
  96. package/dist/modes/interactive/theme/theme-controller.d.ts.map +1 -0
  97. package/dist/modes/interactive/theme/theme-controller.js +108 -0
  98. package/dist/modes/interactive/theme/theme-controller.js.map +1 -0
  99. package/dist/modes/interactive/theme/theme-schema.json +2 -1
  100. package/dist/modes/interactive/theme/theme.d.ts +5 -0
  101. package/dist/modes/interactive/theme/theme.d.ts.map +1 -1
  102. package/dist/modes/interactive/theme/theme.js +70 -29
  103. package/dist/modes/interactive/theme/theme.js.map +1 -1
  104. package/dist/modes/rpc/rpc-client.d.ts +1 -1
  105. package/dist/modes/rpc/rpc-client.d.ts.map +1 -1
  106. package/dist/modes/rpc/rpc-client.js +1 -1
  107. package/dist/modes/rpc/rpc-client.js.map +1 -1
  108. package/dist/modes/rpc/rpc-mode.d.ts.map +1 -1
  109. package/dist/modes/rpc/rpc-mode.js +1 -1
  110. package/dist/modes/rpc/rpc-mode.js.map +1 -1
  111. package/dist/package-manager-cli.d.ts.map +1 -1
  112. package/dist/package-manager-cli.js +39 -9
  113. package/dist/package-manager-cli.js.map +1 -1
  114. package/docs/extensions.md +21 -0
  115. package/docs/models.md +3 -3
  116. package/docs/packages.md +13 -9
  117. package/docs/providers.md +2 -2
  118. package/docs/quickstart.md +14 -0
  119. package/docs/rpc.md +3 -3
  120. package/docs/sdk.md +15 -11
  121. package/docs/session-format.md +1 -1
  122. package/docs/settings.md +8 -3
  123. package/docs/themes.md +3 -1
  124. package/docs/tui.md +1 -1
  125. package/docs/usage.md +12 -9
  126. package/docs/workflows.md +9 -7
  127. package/examples/extensions/custom-provider-anthropic/package-lock.json +2 -2
  128. package/examples/extensions/custom-provider-anthropic/package.json +1 -1
  129. package/examples/extensions/custom-provider-gitlab-duo/package.json +1 -1
  130. package/examples/extensions/gondolin/package-lock.json +2 -2
  131. package/examples/extensions/gondolin/package.json +1 -1
  132. package/examples/extensions/preset.ts +10 -4
  133. package/examples/extensions/provider-payload.ts +5 -5
  134. package/examples/extensions/sandbox/index.ts +2 -2
  135. package/examples/extensions/sandbox/package-lock.json +3 -3
  136. package/examples/extensions/sandbox/package.json +2 -2
  137. package/examples/extensions/subagent/agents.ts +2 -2
  138. package/examples/extensions/subagent/index.ts +4 -2
  139. package/examples/extensions/with-deps/package-lock.json +2 -2
  140. package/examples/extensions/with-deps/package.json +1 -1
  141. package/package.json +5 -5
  142. package/dist/builtin/subagents/skills/browser/EXAMPLES.md +0 -151
  143. package/dist/builtin/subagents/skills/browser/LICENSE.txt +0 -21
  144. package/dist/builtin/subagents/skills/browser/REFERENCE.md +0 -451
  145. package/dist/builtin/subagents/skills/browser/SKILL.md +0 -170
@@ -21,12 +21,13 @@
21
21
  *
22
22
  * The refinement loop has been re-shaped so that the artifact under review is
23
23
  * a real HTML page on disk (`preview.html`). The workflow attempts to open it
24
- * through the `browser` skill so the user can interactively review;
24
+ * through the `playwright-cli` skill so the user can interactively review;
25
25
  * when browser automation is unavailable, the file path is surfaced so the user
26
26
  * can open it manually. Before any stage runs, an initial deterministic setup
27
- * step ensures the browser skill's `browse` CLI is available (`which browse`,
28
- * then `npm install -g browse` when missing); it is best-effort and never
29
- * blocks the run. The final exporter produces a rich `spec.html` that
27
+ * step ensures the playwright-cli skill's `playwright-cli` command is available
28
+ * (`npx --no-install playwright-cli --version`, then
29
+ * `npm install -g @playwright/cli@latest` when missing); it is best-effort and
30
+ * never blocks the run. The final exporter produces a rich `spec.html` that
30
31
  * embeds the agreed-upon design alongside the implementation handoff.
31
32
  */
32
33
 
@@ -222,12 +223,12 @@ const ANTI_SLOP_RULES = [
222
223
  "Commit to a specific aesthetic direction; do not hedge with generic SaaS defaults.",
223
224
  ].join("\n");
224
225
 
225
- type BrowseCliStatus = {
226
- /** Whether the `browse` CLI is expected to be available to downstream stages. */
226
+ type PlaywrightCliStatus = {
227
+ /** Whether the `playwright-cli` command is expected to be available to downstream stages. */
227
228
  readonly available: boolean;
228
- /** True when the CLI was already on PATH and no install was attempted. */
229
+ /** True when the command was already on PATH and no install was attempted. */
229
230
  readonly alreadyPresent: boolean;
230
- /** True when this step installed the CLI via `npm install -g browse`. */
231
+ /** True when this step installed the command via `npm install -g @playwright/cli@latest`. */
231
232
  readonly installed: boolean;
232
233
  /** Human-readable, single-line outcome surfaced as a workflow output. */
233
234
  readonly summary: string;
@@ -236,23 +237,24 @@ type BrowseCliStatus = {
236
237
  };
237
238
 
238
239
  /**
239
- * Initial deterministic setup step (no LLM): ensure the browser skill's `browse`
240
- * CLI is available before any design stage runs. Mirrors the browser skill's
241
- * documented bootstrap (`which browse || npm install -g browse`) but performs it
242
- * once, deterministically, instead of relying on each stage to probe/install it.
240
+ * Initial deterministic setup step (no LLM): ensure the playwright-cli skill's
241
+ * `playwright-cli` command is available before any design stage runs. Mirrors the
242
+ * playwright-cli skill's documented bootstrap (`npx --no-install playwright-cli
243
+ * --version` || `npm install -g @playwright/cli@latest`) but performs it once,
244
+ * deterministically, instead of relying on each stage to probe/install it.
243
245
  * The PATH probe always runs, but the actual global install is skipped under
244
246
  * automated tests (`NODE_ENV=test`) to avoid slow, networked, environment-
245
247
  * mutating side effects.
246
248
  *
247
249
  * Best-effort by contract: it never throws and never blocks the workflow. When
248
- * the CLI cannot be located or installed, downstream stages keep their graceful
250
+ * the command cannot be located or installed, downstream stages keep their graceful
249
251
  * degradation path (surface the manual preview path / URL).
250
252
  */
251
- function ensureBrowseCli(): BrowseCliStatus {
253
+ function ensurePlaywrightCli(): PlaywrightCliStatus {
252
254
  const isWindows = process.platform === "win32";
253
255
  const onPath = (): boolean => {
254
256
  try {
255
- const probe = spawnSync(isWindows ? "where" : "which", ["browse"], {
257
+ const probe = spawnSync(isWindows ? "where" : "which", ["playwright-cli"], {
256
258
  stdio: "ignore",
257
259
  timeout: 15_000,
258
260
  shell: isWindows,
@@ -268,7 +270,7 @@ function ensureBrowseCli(): BrowseCliStatus {
268
270
  available: true,
269
271
  alreadyPresent: true,
270
272
  installed: false,
271
- summary: "browse CLI already on PATH; skipped install.",
273
+ summary: "playwright-cli already on PATH; skipped install.",
272
274
  };
273
275
  }
274
276
 
@@ -282,13 +284,13 @@ function ensureBrowseCli(): BrowseCliStatus {
282
284
  alreadyPresent: false,
283
285
  installed: false,
284
286
  summary:
285
- "browse CLI not found; skipped global install under the test environment.",
287
+ "playwright-cli not found; skipped global install under the test environment.",
286
288
  error: "global install skipped during tests",
287
289
  };
288
290
  }
289
291
 
290
292
  try {
291
- const install = spawnSync("npm", ["install", "-g", "browse"], {
293
+ const install = spawnSync("npm", ["install", "-g", "@playwright/cli@latest"], {
292
294
  stdio: "ignore",
293
295
  timeout: 180_000,
294
296
  shell: isWindows,
@@ -298,19 +300,19 @@ function ensureBrowseCli(): BrowseCliStatus {
298
300
  available: true,
299
301
  alreadyPresent: false,
300
302
  installed: true,
301
- summary: "Installed browse CLI via `npm install -g browse`.",
303
+ summary: "Installed playwright-cli via `npm install -g @playwright/cli@latest`.",
302
304
  };
303
305
  }
304
306
  const reason =
305
307
  install.error?.message ??
306
308
  (typeof install.status === "number"
307
- ? `npm install -g browse exited with code ${install.status}`
308
- : "npm install -g browse did not complete");
309
+ ? `npm install -g @playwright/cli@latest exited with code ${install.status}`
310
+ : "npm install -g @playwright/cli@latest did not complete");
309
311
  return {
310
312
  available: false,
311
313
  alreadyPresent: false,
312
314
  installed: false,
313
- summary: `Could not install browse CLI (${reason}); stages will degrade gracefully.`,
315
+ summary: `Could not install playwright-cli (${reason}); stages will degrade gracefully.`,
314
316
  error: reason,
315
317
  };
316
318
  } catch (error) {
@@ -320,7 +322,7 @@ function ensureBrowseCli(): BrowseCliStatus {
320
322
  available: false,
321
323
  alreadyPresent: false,
322
324
  installed: false,
323
- summary: `Could not install browse CLI (${reason}); stages will degrade gracefully.`,
325
+ summary: `Could not install playwright-cli (${reason}); stages will degrade gracefully.`,
324
326
  error: reason,
325
327
  };
326
328
  }
@@ -328,24 +330,25 @@ function ensureBrowseCli(): BrowseCliStatus {
328
330
 
329
331
  /**
330
332
  * Build the per-run browser bootstrap guidance injected into stage prompts.
331
- * When the deterministic setup step already ensured `browse` is installed, the
332
- * guidance tells stages to assume availability and not waste turns reinstalling;
333
- * otherwise it retains the original probe-and-install fallback.
333
+ * When the deterministic setup step already ensured `playwright-cli` is installed,
334
+ * the guidance tells stages to assume availability and not waste turns
335
+ * reinstalling; otherwise it retains the original probe-and-install fallback.
334
336
  */
335
- function buildBrowserBootstrapRules(status: BrowseCliStatus): string {
337
+ function buildPlaywrightCliBootstrapRules(status: PlaywrightCliStatus): string {
336
338
  const probeRule = status.available
337
- ? "The workflow's deterministic setup step already ensured the browser skill's `browse` CLI is installed and on PATH; assume it is available and do NOT reinstall it. Only if a `browse` command reports the executable as missing should you re-probe with `which browse` and run `npm install -g browse` once before retrying. Do not add project dependencies."
338
- : `The workflow's deterministic setup step attempted to install the browser skill's \`browse\` CLI but it FAILED with: "${status.error ?? "unknown error"}". Treat this as a known starting condition to work around, not a hard blocker. Probe with \`which browse\` and retry once with \`npm install -g browse\`; if it still fails, use the error above to diagnose a workaround (for example: EACCES/permission errors → retry with a user-writable global prefix; missing npm/Node → report it plainly; network/registry errors → surface them). If the CLI still cannot be made available, degrade gracefully and surface the manual file path / URL. Do not add project dependencies.`;
339
+ ? "The workflow's deterministic setup step already ensured the playwright-cli skill's `playwright-cli` command is installed and on PATH; assume it is available and do NOT reinstall it. Only if a `playwright-cli` command reports it is missing should you re-probe with `which playwright-cli` (or `npx --no-install playwright-cli --version`) and run `npm install -g @playwright/cli@latest` once before retrying. Do not add project dependencies."
340
+ : `The workflow's deterministic setup step attempted to install the playwright-cli skill's \`playwright-cli\` command but it FAILED with: "${status.error ?? "unknown error"}". Treat this as a known starting condition to work around, not a hard blocker. Probe with \`which playwright-cli\` (or \`npx --no-install playwright-cli --version\`) and retry once with \`npm install -g @playwright/cli@latest\`; if it still fails, use the error above to diagnose a workaround (for example: EACCES/permission errors → retry with a user-writable global prefix; missing npm/Node → report it plainly; network/registry errors → surface them). If the command still cannot be made available, degrade gracefully and surface the manual file path / URL. Do not add project dependencies.`;
339
341
  return [
340
342
  probeRule,
341
- "Use `browse open <url> --local --headed` when a generated local preview should be visible to the user, and use `browse snapshot` plus `browse screenshot --path <file>` for review evidence.",
342
- "If `browse` is unavailable after three attempts or the browser runtime still fails, degrade gracefully and surface the manual file path / URL.",
343
+ "Use `playwright-cli open <url>` when a generated local preview should be visible to the user, and use `playwright-cli snapshot` plus `playwright-cli screenshot --filename=<file>` for review evidence.",
344
+ "If a `playwright-cli` command reports a missing browser executable, install the browser once with `npx playwright install chromium` and retry.",
345
+ "If `playwright-cli` is unavailable after three attempts or the browser runtime still fails, degrade gracefully and surface the manual file path / URL.",
343
346
  ].join("\n");
344
347
  }
345
348
 
346
349
  export default defineWorkflow("open-claude-design")
347
350
  .description(
348
- "AI-powered design workflow: design-system onboarding → reference import → HTML generation → impeccable-driven refinement → quality gate → rich HTML handoff. Each stage delegates to a specific impeccable sub-skill; the user can iteratively review the generated HTML through the browser skill.",
351
+ "AI-powered design workflow: design-system onboarding → reference import → HTML generation → impeccable-driven refinement → quality gate → rich HTML handoff. Each stage delegates to a specific impeccable sub-skill; the user can iteratively review the generated HTML through the playwright-cli skill.",
349
352
  )
350
353
  .input("prompt", Type.String({
351
354
  description: "What to design (for example, a dashboard, page, component, or prototype).",
@@ -378,14 +381,14 @@ export default defineWorkflow("open-claude-design")
378
381
  .output("preview_file_url", Type.Optional(Type.String({ description: "file:// URL for the generated preview.html file." })))
379
382
  .output("spec_path", Type.Optional(Type.String({ description: "Absolute path to the generated spec.html file." })))
380
383
  .output("spec_file_url", Type.Optional(Type.String({ description: "file:// URL for the generated spec.html file." })))
381
- .output("browse_cli_status", Type.Optional(Type.String({ description: "Outcome of the initial deterministic step that ensures the browser skill's `browse` CLI is installed." })))
384
+ .output("playwright_cli_status", Type.Optional(Type.String({ description: "Outcome of the initial deterministic step that ensures the playwright-cli skill's `playwright-cli` command is installed." })))
382
385
  .run(async (ctx) => {
383
- // Initial deterministic setup step (no LLM): ensure the browser skill's
384
- // `browse` CLI is installed before any design stage runs. Best-effort —
386
+ // Initial deterministic setup step (no LLM): ensure the playwright-cli skill's
387
+ // `playwright-cli` command is installed before any design stage runs. Best-effort —
385
388
  // a failed install never blocks the workflow; downstream stages keep their
386
389
  // graceful-degradation fallback (surface the manual preview path / URL).
387
- const browseCli = ensureBrowseCli();
388
- const browserBootstrapRules = buildBrowserBootstrapRules(browseCli);
390
+ const playwrightCli = ensurePlaywrightCli();
391
+ const browserBootstrapRules = buildPlaywrightCliBootstrapRules(playwrightCli);
389
392
 
390
393
  const inputs = ctx.inputs;
391
394
 
@@ -626,8 +629,8 @@ export default defineWorkflow("open-claude-design")
626
629
  [
627
630
  "instructions",
628
631
  [
629
- "1. Use browser/screenshot tooling (for example the browser skill's `browse` CLI) if available; cite observable evidence rather than guessing.",
630
- "2. If `browse` is available but opening the reference URL reports a missing browser executable, follow the bootstrap rules and retry once.",
632
+ "1. Use browser/screenshot tooling (for example the playwright-cli skill's `playwright-cli` command) if available; cite observable evidence rather than guessing.",
633
+ "2. If `playwright-cli` is available but opening the reference URL reports a missing browser executable, follow the bootstrap rules and retry once.",
631
634
  "3. Analyze: layout, visual hierarchy, navigation, color, typography, spacing, states, interactions, responsive behavior.",
632
635
  "4. Separate reference-specific styling from requirements that should transfer to this project's design system.",
633
636
  "5. If the URL is inaccessible or browser bootstrap fails, state that and provide a best-effort fallback based only on available information — never fabricate observations.",
@@ -738,7 +741,7 @@ export default defineWorkflow("open-claude-design")
738
741
  ],
739
742
  [
740
743
  "objective",
741
- "Your job is to make the just-generated HTML artifact visible to the user so they can give feedback. Open the HTML preview file using the browser skill's `browse` CLI when available, then prompt the user for feedback. Gracefully degrade if browser automation is unavailable.",
744
+ "Your job is to make the just-generated HTML artifact visible to the user so they can give feedback. Open the HTML preview file using the playwright-cli skill's `playwright-cli` command when available, then prompt the user for feedback. Gracefully degrade if browser automation is unavailable.",
742
745
  ],
743
746
  ["preview_path", previewPath],
744
747
  ["preview_file_url", previewFileUrl],
@@ -746,11 +749,11 @@ export default defineWorkflow("open-claude-design")
746
749
  [
747
750
  "instructions",
748
751
  [
749
- "1. Probe for `browse` availability using the bootstrap rules above.",
750
- `2. If available, run: \`browse open ${previewFileUrl} --local --headed\`. If that reports a missing browser executable, follow the bootstrap rules and retry once.`,
751
- "3. Then run `browse snapshot` and use any available annotation/review flow from the active browser environment; if none exists, ask the user to review the visible page or manual file path and provide notes inline.",
752
+ "1. Probe for `playwright-cli` availability using the bootstrap rules above.",
753
+ `2. If available, run: \`playwright-cli open ${previewFileUrl}\`. If that reports a missing browser executable, follow the bootstrap rules and retry once.`,
754
+ "3. Then run `playwright-cli snapshot` and, for interactive review, `playwright-cli show --annotate` so the user can draw on the page and add notes; if interactive review is unavailable, ask the user to review the visible page or manual file path and provide notes inline.",
752
755
  "4. Capture any annotation artifact path, screenshot path, or user notes and surface them in your output.",
753
- `5. If \`browse\` is NOT available or browser bootstrap fails, print a clear instruction block telling the user to open the file manually at: ${previewPath} (or via the URL ${previewFileUrl}).`,
756
+ `5. If \`playwright-cli\` is NOT available or browser bootstrap fails, print a clear instruction block telling the user to open the file manually at: ${previewPath} (or via the URL ${previewFileUrl}).`,
754
757
  "6. Never block the workflow on unavailable tooling; always exit with a non-empty status string.",
755
758
  ].join("\n"),
756
759
  ],
@@ -869,10 +872,10 @@ export default defineWorkflow("open-claude-design")
869
872
  [
870
873
  "instructions",
871
874
  [
872
- `1. Attempt rendering verification via the browser skill: \`browse open ${previewFileUrl} --local\`. If that reports a missing browser executable, follow the bootstrap rules and retry once.`,
873
- `2. Then run \`browse viewport 360 800\`, \`browse screenshot --path ${join(artifactDir, `mobile-${iteration}.png`)}\`, \`browse viewport 1440 900\`, \`browse screenshot --path ${join(artifactDir, `desktop-${iteration}.png`)}\`.`,
875
+ `1. Attempt rendering verification via the playwright-cli skill: \`playwright-cli open ${previewFileUrl}\`. If that reports a missing browser executable, follow the bootstrap rules and retry once.`,
876
+ `2. Then run \`playwright-cli resize 360 800\`, \`playwright-cli screenshot --filename=${join(artifactDir, `mobile-${iteration}.png`)}\`, \`playwright-cli resize 1440 900\`, \`playwright-cli screenshot --filename=${join(artifactDir, `desktop-${iteration}.png`)}\`.`,
874
877
  "3. Check: contrast (WCAG AA), overflow, spacing rhythm, alignment, breakpoint behavior, empty/loading/error states, keyboard/pointer affordances, focus rings, prefers-reduced-motion.",
875
- "4. If `browse` is unavailable or browser bootstrap fails, perform a static design review of the HTML source and mark every finding as `needs-rendering-verification`.",
878
+ "4. If `playwright-cli` is unavailable or browser bootstrap fails, perform a static design review of the HTML source and mark every finding as `needs-rendering-verification`.",
876
879
  "5. Distinguish confirmed visual issues from risks that need rendering verification. Never fabricate rendered evidence.",
877
880
  ].join("\n"),
878
881
  ],
@@ -956,9 +959,9 @@ export default defineWorkflow("open-claude-design")
956
959
  [
957
960
  "instructions",
958
961
  [
959
- `1. If \`browse\` is available, run \`browse open ${previewFileUrl} --local --headed\`. If that reports a missing browser executable, follow the bootstrap rules and retry once.`,
960
- "2. Then run `browse snapshot` and use any available annotation/review flow from the active browser environment; otherwise ask the user to provide feedback inline.",
961
- `3. If \`browse\` is unavailable or browser bootstrap fails, surface the path clearly: ${previewPath} (URL: ${previewFileUrl}).`,
962
+ `1. If \`playwright-cli\` is available, run \`playwright-cli open ${previewFileUrl}\`. If that reports a missing browser executable, follow the bootstrap rules and retry once.`,
963
+ "2. Then run `playwright-cli snapshot` and, for interactive review, `playwright-cli show --annotate`; otherwise ask the user to provide feedback inline.",
964
+ `3. If \`playwright-cli\` is unavailable or browser bootstrap fails, surface the path clearly: ${previewPath} (URL: ${previewFileUrl}).`,
962
965
  "4. Return any captured annotations as structured notes the next user-feedback step can read.",
963
966
  "5. Do not block on unavailable tooling.",
964
967
  ].join("\n"),
@@ -1083,7 +1086,7 @@ export default defineWorkflow("open-claude-design")
1083
1086
  "Return markdown with headings (NOT the HTML):",
1084
1087
  "1. Spec written to (absolute path)",
1085
1088
  "2. Sections included",
1086
- "3. How to open the spec (browse command + manual fallback path)",
1089
+ "3. How to open the spec (playwright-cli command + manual fallback path)",
1087
1090
  "4. Recommended files and components",
1088
1091
  "5. Implementation steps",
1089
1092
  "6. Usage example",
@@ -1107,7 +1110,7 @@ export default defineWorkflow("open-claude-design")
1107
1110
  ],
1108
1111
  [
1109
1112
  "objective",
1110
- "Make the rich HTML spec visible to the user. Open the final spec.html with the browser skill's `browse` CLI so the user can review the agreed design and implementation handoff. Degrade gracefully if browser automation is unavailable.",
1113
+ "Make the rich HTML spec visible to the user. Open the final spec.html with the playwright-cli skill's `playwright-cli` command so the user can review the agreed design and implementation handoff. Degrade gracefully if browser automation is unavailable.",
1111
1114
  ],
1112
1115
  ["spec_path", specPath],
1113
1116
  ["spec_file_url", specFileUrl],
@@ -1117,9 +1120,9 @@ export default defineWorkflow("open-claude-design")
1117
1120
  [
1118
1121
  "instructions",
1119
1122
  [
1120
- "1. Probe for `browse` availability using the bootstrap rules above.",
1121
- `2. If available, run \`browse open ${specFileUrl} --local --headed\`. If that reports a missing browser executable, follow the bootstrap rules and retry once.`,
1122
- "3. Then run `browse snapshot` and use any available annotation/review flow from the active browser environment so the user can capture any final notes.",
1123
+ "1. Probe for `playwright-cli` availability using the bootstrap rules above.",
1124
+ `2. If available, run \`playwright-cli open ${specFileUrl}\`. If that reports a missing browser executable, follow the bootstrap rules and retry once.`,
1125
+ "3. Then run `playwright-cli snapshot` and, for interactive review, `playwright-cli show --annotate` so the user can capture any final notes.",
1123
1126
  `4. Always print, prominently, the absolute paths so the user can open them manually:\n - Final spec: ${specPath}\n - Approved preview: ${previewPath}`,
1124
1127
  "5. Do not block the workflow; return a structured summary even if no tooling worked.",
1125
1128
  ].join("\n"),
@@ -1147,7 +1150,7 @@ export default defineWorkflow("open-claude-design")
1147
1150
  preview_file_url: previewFileUrl,
1148
1151
  spec_path: specPath,
1149
1152
  spec_file_url: specFileUrl,
1150
- browse_cli_status: browseCli.summary,
1153
+ playwright_cli_status: playwrightCli.summary,
1151
1154
  };
1152
1155
  })
1153
1156
  .compile();
@@ -7,6 +7,7 @@
7
7
  * findings into the next research pass with ctx.task().
8
8
  */
9
9
 
10
+ import { existsSync } from "node:fs";
10
11
  import { mkdir, mkdtemp, writeFile } from "node:fs/promises";
11
12
  import { tmpdir } from "node:os";
12
13
  import { dirname, join, resolve } from "node:path";
@@ -21,6 +22,7 @@ import { E2E_VERIFICATION_GUIDANCE, WORKER_PREFLIGHT_CONTRACT } from "./shared-p
21
22
  const DEFAULT_MAX_LOOPS = 10;
22
23
  const DEFAULT_RESEARCH_DIR = "research";
23
24
  const IMPLEMENTATION_NOTES_FILENAME = "implementation-notes.md";
25
+ const QA_E2E_VIDEO_FILENAME = "qa-e2e-evidence.webm";
24
26
  const MAX_RESEARCH_SLUG_LENGTH = 80;
25
27
  // Reviewer fan-out launches three independent reviewers; the loop stops only when
26
28
  // all three reviewers independently approve (find no issues). Requiring unanimous
@@ -194,6 +196,26 @@ async function createImplementationNotesFile(prompt: string): Promise<string> {
194
196
  return notesPath;
195
197
  }
196
198
 
199
+ // Stable absolute path the orchestrator records the QA end-to-end proof video to.
200
+ // The directory is created up front so `playwright-cli video-start <path>` can
201
+ // write to it; the video file itself is produced by the orchestrator's QA pass
202
+ // (and overwritten each iteration so it always reflects the latest state). The
203
+ // final pull-request stage attaches it when it exists.
204
+ async function createQaEvidenceVideoPath(): Promise<string> {
205
+ const qaDir = await mkdtemp(join(tmpdir(), "atomic-ralph-qa-"));
206
+ return join(qaDir, QA_E2E_VIDEO_FILENAME);
207
+ }
208
+
209
+ function renderQaE2eVideoGuidance(qaVideoPath: string): string {
210
+ return [
211
+ "QA the change end-to-end whenever it touches user-visible UI behavior, including full-stack changes whose UI correctness depends on backend/API behavior. Use the `playwright-cli` skill (or delegate to a subagent with `skill: \"playwright-cli\"`) to drive the running application like a user and prove the implemented scenario actually works.",
212
+ `Record that QA E2E pass as a reviewable video so the user can watch the feature working. After \`playwright-cli open\`, start recording with \`playwright-cli video-start ${qaVideoPath}\`, annotate the scenario with \`playwright-cli video-chapter\` / \`playwright-cli video-show-actions\`, exercise the full user scenario, then \`playwright-cli video-stop\`. Write the video to exactly this path and overwrite any prior recording so it always reflects the latest implemented state: ${qaVideoPath}`,
213
+ `After recording, add the video to the implementation notes as a reference: include a \`## QA E2E Video\` entry with the absolute path ${qaVideoPath} and a one-line description of the proven scenario, so the user can review the proof when this stage finishes.`,
214
+ "If the change has no user-visible UI scenario (pure refactor, docs, infra, or non-UI library code), do not fabricate a video; record in the implementation notes that no QA E2E video applies and why.",
215
+ "If `playwright-cli` or a browser runtime is unavailable, install it once per the skill (`npm install -g @playwright/cli@latest`, then `npx playwright install chromium` for a missing browser executable). If it still cannot run, record the smallest validation actually performed and note that the QA E2E video could not be produced — never claim a video exists when it does not.",
216
+ ].join("\n");
217
+ }
218
+
197
219
  function reviewDecisionFromResult(result: WorkflowTaskResult): ReviewDecision | undefined {
198
220
  return result.structured as ReviewDecision | undefined;
199
221
  }
@@ -360,6 +382,7 @@ function renderForkedOrchestratorPrompt(args: {
360
382
  readonly workflowCwdContext: PromptSection;
361
383
  readonly researchPath: string;
362
384
  readonly implementationNotesPath: string;
385
+ readonly qaVideoPath: string;
363
386
  }): string {
364
387
  return taggedPrompt([
365
388
  [
@@ -385,6 +408,7 @@ function renderForkedOrchestratorPrompt(args: {
385
408
  ].join("\n"),
386
409
  ],
387
410
  ["e2e_verification", E2E_VERIFICATION_GUIDANCE],
411
+ ["qa_e2e_video", renderQaE2eVideoGuidance(args.qaVideoPath)],
388
412
  [
389
413
  "output_format",
390
414
  [
@@ -396,6 +420,7 @@ function renderForkedOrchestratorPrompt(args: {
396
420
  "5. Validation run / recommended",
397
421
  "6. Deferred work or blockers",
398
422
  "7. Implementation notes — confirm the OS temp notes path was updated",
423
+ "8. QA E2E video — the recorded video path and proven scenario, or a note that no QA E2E video applies and why",
399
424
  ].join("\n"),
400
425
  ],
401
426
  ]);
@@ -424,6 +449,7 @@ type RalphWorkflowResult = {
424
449
  readonly research: string;
425
450
  readonly research_path: string;
426
451
  readonly implementation_notes_path: string;
452
+ readonly qa_video_path?: string;
427
453
  readonly pr_report?: string;
428
454
  readonly approved: boolean;
429
455
  readonly iterations_completed: number;
@@ -455,6 +481,7 @@ async function runRalphWorkflow(
455
481
  // worktree cwd so research stage writes land in the same checkout.
456
482
  const workflowResearchPath = resolve(workflowStartCwd, defaultResearchPath(prompt));
457
483
  const implementationNotesPath = await createImplementationNotesFile(prompt);
484
+ const qaVideoPath = await createQaEvidenceVideoPath();
458
485
  const artifactDir = await mkdtemp(join(tmpdir(), "atomic-ralph-run-"));
459
486
  const workflowCwdContext = workflowCwdContextSection(workflowStartCwd);
460
487
  let approved = false;
@@ -612,6 +639,7 @@ async function runRalphWorkflow(
612
639
  ],
613
640
  ["project_setup", WORKER_PREFLIGHT_CONTRACT],
614
641
  ["e2e_verification", E2E_VERIFICATION_GUIDANCE],
642
+ ["qa_e2e_video", renderQaE2eVideoGuidance(qaVideoPath)],
615
643
  [
616
644
  "orchestration_guidance",
617
645
  [
@@ -654,7 +682,8 @@ async function runRalphWorkflow(
654
682
  "Pass each subagent the relevant task, constraints, files, validation expectations, unresolved reviewer findings covered by the research, and instructions to report implementation-note-worthy decisions or tradeoffs.",
655
683
  "Coordinate subagent results into the smallest coherent set of changes that satisfies the researched implementation guidance and original user prompt.",
656
684
  "Preserve existing architecture and repository conventions unless the research explicitly justifies a change.",
657
- "Run or delegate the most relevant validation commands available in the repository, including end-to-end browser or tmux validation when the change has an executable user scenario.",
685
+ "Run or delegate the most relevant validation commands available in the repository, including end-to-end playwright-cli (browser) or tmux validation when the change has an executable user scenario.",
686
+ "For UI-applicable or full-stack changes, ensure the QA E2E pass described in <qa_e2e_video> runs and records the reviewable proof video before you finalize this iteration.",
658
687
  `Before your final report, update the running implementation notes file at ${implementationNotesPath} with decisions, research deviations, tradeoffs, blockers, and validation outcomes from this iteration.`,
659
688
  "If blocked, describe the blocker and the safest partial state instead of inventing success.",
660
689
  "Do not hide failures; reviewers need accurate status.",
@@ -671,6 +700,7 @@ async function runRalphWorkflow(
671
700
  "5. Validation run / recommended",
672
701
  "6. Deferred work or blockers",
673
702
  "7. Implementation notes — confirm the OS temp notes path was updated",
703
+ "8. QA E2E video — the recorded video path and proven scenario, or a note that no QA E2E video applies and why",
674
704
  ].join("\n"),
675
705
  ],
676
706
  ])
@@ -681,6 +711,7 @@ async function runRalphWorkflow(
681
711
  workflowCwdContext,
682
712
  researchPath,
683
713
  implementationNotesPath,
714
+ qaVideoPath,
684
715
  });
685
716
  const orchestrator = await ctx.task(`orchestrator-${iteration}`, {
686
717
  prompt: orchestratorPrompt,
@@ -735,7 +766,7 @@ async function runRalphWorkflow(
735
766
  "validation_expectations",
736
767
  [
737
768
  "Inspect the actual diff/repository state rather than trusting stage summaries.",
738
- "Run or delegate focused validation when it is necessary to distinguish a real bug from a hunch, including end-to-end browser or tmux validation when a user scenario can prove the outcome.",
769
+ "Run or delegate focused validation when it is necessary to distinguish a real bug from a hunch, including end-to-end playwright-cli (browser) or tmux validation when a user scenario can prove the outcome.",
739
770
  "If tests or typechecks fail because dependencies are missing, install/download the missing dependencies with the repo's documented package manager instead of bypassing the check.",
740
771
  "If validation cannot be completed after reasonable recovery, record the limitation in overall_explanation and reviewer_error; do not use missing dependencies as a reason to approve.",
741
772
  ].join("\n"),
@@ -790,7 +821,7 @@ async function runRalphWorkflow(
790
821
  [
791
822
  "1. Identify the changed files or diff under review.",
792
823
  "2. Read the relevant changed code and directly affected call sites/tests/configs.",
793
- "3. Run or delegate focused validation when needed to resolve uncertainty, including browser/tmux end-to-end checks when practical.",
824
+ "3. Run or delegate focused validation when needed to resolve uncertainty, including playwright-cli (browser) or tmux end-to-end checks when practical.",
794
825
  "4. If you cannot inspect or validate enough to approve safely, populate reviewer_error and set stop_review_loop=false.",
795
826
  ].join("\n"),
796
827
  ],
@@ -888,6 +919,10 @@ async function runRalphWorkflow(
888
919
  if (approved) break;
889
920
  }
890
921
 
922
+ // The orchestrator writes the QA end-to-end proof video to this stable path
923
+ // when the change has a UI-applicable scenario; the final PR stage attaches it.
924
+ const qaVideoAvailable = existsSync(qaVideoPath);
925
+
891
926
  if (createPr === true) {
892
927
  const prResult = await ctx.task("pull-request", {
893
928
  prompt: taggedPrompt([
@@ -914,6 +949,21 @@ async function runRalphWorkflow(
914
949
  "If multiple accounts, hosts, or providers are available, use the remote URL and git config username/email as heuristics to choose the most likely identity, but try each available credential/account that can read the repository and create the provider-appropriate review request.",
915
950
  ].join("\n"),
916
951
  ],
952
+ [
953
+ "qa_video_attachment",
954
+ qaVideoAvailable
955
+ ? [
956
+ `A reviewable QA end-to-end proof video was recorded for this run at: ${qaVideoPath}`,
957
+ "Attach this video to the pull request, merge request, or review request you create so the user can watch the implemented feature working.",
958
+ "Prefer embedding or linking it in the PR/MR/review description. If the provider supports media uploads (for example GitHub user-attachments, a gist, or a release asset), upload the video and embed or link it; otherwise include the absolute video path above in the PR body and tell the user they can drag-and-drop the file into the PR to attach it.",
959
+ "The implementation notes already reference this video path and the notes contents are used as the PR/review body, so confirm the reference carries over.",
960
+ "Do not fabricate an upload you could not perform; report exactly how the video was attached or referenced.",
961
+ ].join("\n")
962
+ : [
963
+ "No QA end-to-end proof video was produced for this run (no UI-applicable scenario, or the browser runtime was unavailable).",
964
+ "Do not invent or attach a video. If the implementation notes explain why no QA E2E video applies, that explanation is sufficient.",
965
+ ].join("\n"),
966
+ ],
917
967
  [
918
968
  "pr_policy",
919
969
  [
@@ -936,6 +986,7 @@ async function runRalphWorkflow(
936
986
  "3. Implementation notes comment — whether the provider-appropriate comment was created as the last action, or why it could not be created",
937
987
  "4. Commands run — include exit status or clear outcome",
938
988
  "5. Follow-up for the user — exact next steps if credentials or repository state blocked PR creation",
989
+ "6. QA E2E video — how the proof video was attached or linked to the review request, or that no QA E2E video applies",
939
990
  ].join("\n"),
940
991
  ],
941
992
  ]),
@@ -956,6 +1007,7 @@ async function runRalphWorkflow(
956
1007
  research: finalResearch,
957
1008
  research_path: finalResearchPath,
958
1009
  implementation_notes_path: implementationNotesPath,
1010
+ ...(qaVideoAvailable ? { qa_video_path: qaVideoPath } : {}),
959
1011
  ...(finalPrReport === undefined ? {} : { pr_report: finalPrReport }),
960
1012
  approved,
961
1013
  iterations_completed: iterationsCompleted,
@@ -997,6 +1049,7 @@ export default defineWorkflow("ralph")
997
1049
  .output("research", Type.Optional(Type.String({ description: "Latest research report text or artifact reference." })))
998
1050
  .output("research_path", Type.Optional(Type.String({ description: "Path to the latest generated research artifact under research/." })))
999
1051
  .output("implementation_notes_path", Type.Optional(Type.String({ description: "OS-temp notes file containing decisions, deviations, blockers, and validation notes." })))
1052
+ .output("qa_video_path", Type.Optional(Type.String({ description: "Absolute path to the reviewable QA end-to-end proof video recorded with playwright-cli for UI-applicable changes, when one was produced." })))
1000
1053
  .output("pr_report", Type.Optional(Type.String({ description: "Pull-request report emitted only when create_pr=true and the final pull-request stage runs." })))
1001
1054
  .output("approved", Type.Optional(Type.Boolean({ description: "Whether the reviewer loop approved before completion or optional final handoff." })))
1002
1055
  .output("iterations_completed", Type.Optional(Type.Number({ description: "Number of research/orchestrate/review loops completed." })))
@@ -12,7 +12,7 @@ export const WORKER_PREFLIGHT_CONTRACT = [
12
12
 
13
13
  export const E2E_VERIFICATION_GUIDANCE = [
14
14
  "Verify correctness end-to-end whenever practical for user-visible behavior; do not rely only on code inspection, unit tests, or stage summaries when an executable user scenario can prove the outcome.",
15
- "For web or frontend flows — including frontend changes whose correctness depends on backend/API behavior — use the browser skill, or delegate to a subagent with `skill: \"browser\"`, to drive the application like a user and capture screenshot, DOM, or network evidence when that proves the objective.",
15
+ "For web or frontend flows — including frontend changes whose correctness depends on backend/API behavior — use the playwright-cli skill, or delegate to a subagent with `skill: \"playwright-cli\"`, to drive the application like a user and capture snapshot, screenshot, DOM, or network evidence when that proves the objective.",
16
16
  "For TUI or terminal-app flows, use the tmux skill, or delegate to a subagent with `skill: \"tmux\"`, to launch the app in an isolated tmux session, send keys, capture pane output, and simulate the scenario end to end.",
17
17
  "If end-to-end verification is not practical in this checkout, record what was attempted, the smallest missing prerequisite, and the narrower validation that was run instead; do not claim end-to-end proof when it was not performed.",
18
18
  ].join("\n");
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bastani/workflows",
3
- "version": "0.8.31-alpha.1",
3
+ "version": "0.8.31-alpha.2",
4
4
  "private": true,
5
5
  "description": "Atomic extension for multi-stage workflow authoring and execution.",
6
6
  "contributors": [
@@ -83,7 +83,7 @@
83
83
  },
84
84
  "peerDependencies": {
85
85
  "@bastani/atomic": "*",
86
- "@earendil-works/pi-tui": "^0.79.6"
86
+ "@earendil-works/pi-tui": "^0.79.7"
87
87
  },
88
88
  "peerDependenciesMeta": {
89
89
  "@bastani/atomic": {
@@ -62,7 +62,7 @@ The user's research question/request is: **$ARGUMENTS**
62
62
 
63
63
  **For online search:**
64
64
  - VERY IMPORTANT: In case you discover external libraries as dependencies, use the **codebase-online-researcher** agent for external documentation and resources
65
- - The agent fetches live web content using the **browser** skill's `browse` CLI (or `npx browse` / `curl`). Instruct it to apply the token-efficient fetch order: (1) try `curl https://<site>/llms.txt` for an AI-friendly index (see [llmstxt.org](https://llmstxt.org/llms.txt)), (2) try `curl <url> -H "Accept: text/markdown"` to get pre-converted Markdown (supported on Cloudflare-hosted docs via [Markdown for Agents](https://developers.cloudflare.com/fundamentals/reference/markdown-for-agents/)), (3) fall back to HTML parsing via `browse`
65
+ - The agent fetches live web content using the **playwright-cli** skill's `playwright-cli` command (or `npx playwright-cli` / `curl`). Instruct it to apply the token-efficient fetch order: (1) try `curl https://<site>/llms.txt` for an AI-friendly index (see [llmstxt.org](https://llmstxt.org/llms.txt)), (2) try `curl <url> -H "Accept: text/markdown"` to get pre-converted Markdown (supported on Cloudflare-hosted docs via [Markdown for Agents](https://developers.cloudflare.com/fundamentals/reference/markdown-for-agents/)), (3) fall back to HTML parsing via `playwright-cli`
66
66
  - Instruct the agent to return LINKS with their findings and INCLUDE those links in the research document
67
67
  - The agent should persist reusable source documents under `research/web/<YYYY-MM-DD>-<kebab-case-topic>.md` (with frontmatter noting `source_url`, `fetched_at`, and `fetch_method`) so future research can reuse them without re-fetching
68
68
  - Output directory for the synthesized web research artifacts: `research/web/`: