npm - codebyplan - Versions diffs - 1.13.14 → 1.13.15 - Mend

codebyplan 1.13.14 → 1.13.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/cli.js +319 -63
package/package.json +1 -1
package/templates/agents/cbp-e2e-maestro.md +26 -3
package/templates/agents/cbp-e2e-playwright.md +24 -3
package/templates/agents/cbp-e2e-tauri.md +25 -2
package/templates/agents/cbp-e2e-vscode.md +28 -3
package/templates/agents/cbp-e2e-xcuitest.md +40 -4
package/templates/agents/cbp-task-check.md +2 -0
package/templates/context/testing/e2e.md +57 -9
package/templates/hooks/validate-structure-patterns.sh +1 -1
package/templates/rules/e2e-mandatory.md +19 -2
package/templates/settings.project.base.json +8 -1
package/templates/skills/cbp-checkpoint-end/SKILL.md +18 -1
package/templates/skills/cbp-frontend-ui/SKILL.md +9 -7
package/templates/skills/cbp-round-execute/SKILL.md +31 -7

package/dist/cli.js CHANGED Viewed

@@ -14,7 +14,7 @@ var VERSION, PACKAGE_NAME;
 var init_version = __esm({
   "src/lib/version.ts"() {
     "use strict";
-    VERSION = "1.13.14";
+    VERSION = "1.13.15";
     PACKAGE_NAME = "codebyplan";
   }
 });
@@ -697,7 +697,7 @@ function isRetryable(err) {
   return false;
 }
 function delay(ms) {
-  return new Promise((resolve7) => setTimeout(resolve7, ms));
+  return new Promise((resolve8) => setTimeout(resolve8, ms));
 }
 async function request(method, path8, options) {
   const url = buildUrl(path8, options?.params);
@@ -1055,7 +1055,7 @@ var init_device_flow = __esm({
         this.name = "OAuthInvalidClientError";
       }
     };
-    defaultSleep = (ms) => new Promise((resolve7) => setTimeout(resolve7, ms));
+    defaultSleep = (ms) => new Promise((resolve8) => setTimeout(resolve8, ms));
   }
 });
@@ -1880,9 +1880,9 @@ async function writeMcpConfig(scope) {
   return configPath;
 }
 async function fetchRepos(auth) {
-  const baseUrl2 = (process.env.CODEBYPLAN_API_URL ?? "https://www.codebyplan.com").replace(/\/$/, "");
+  const baseUrl3 = (process.env.CODEBYPLAN_API_URL ?? "https://www.codebyplan.com").replace(/\/$/, "");
   const headers = auth.kind === "oauth" ? { Authorization: `Bearer ${await getAccessToken()}` } : { "x-api-key": auth.apiKey };
-  const res = await fetch(`${baseUrl2}/api/repos`, {
+  const res = await fetch(`${baseUrl3}/api/repos`, {
     headers,
     signal: AbortSignal.timeout(1e4)
   });
@@ -2081,8 +2081,8 @@ async function runSetup() {
     const deviceId = await getOrCreateDeviceId(projectPath);
     let branch = "main";
     try {
-      const { execSync: execSync8 } = await import("node:child_process");
-      branch = execSync8("git symbolic-ref --short HEAD", {
+      const { execSync: execSync9 } = await import("node:child_process");
+      branch = execSync9("git symbolic-ref --short HEAD", {
         cwd: projectPath,
         encoding: "utf-8"
       }).trim();
@@ -3720,9 +3720,9 @@ async function eslintInit(repoId, projectPath) {
   Install ${missingPkgs.length} missing packages? [Y/n] `
     );
     if (confirmed) {
-      const { execSync: execSync8 } = await import("node:child_process");
+      const { execSync: execSync9 } = await import("node:child_process");
       try {
-        execSync8(installCmd, { cwd: projectPath, stdio: "inherit" });
+        execSync9(installCmd, { cwd: projectPath, stdio: "inherit" });
         console.log("  Packages installed.\n");
       } catch (err) {
         console.error(
@@ -4157,7 +4157,7 @@ function setRetryDelayMs(ms) {
   RETRY_DELAY_MS = ms;
 }
 function sleep(ms) {
-  return new Promise((resolve7) => setTimeout(resolve7, ms));
+  return new Promise((resolve8) => setTimeout(resolve8, ms));
 }
 function isTransientMcpError(err) {
   if (!(err instanceof McpError)) return false;
@@ -6374,13 +6374,262 @@ var init_version_status = __esm({
   }
 });
+// src/cli/upload-e2e-images.ts
+var upload_e2e_images_exports = {};
+__export(upload_e2e_images_exports, {
+  runUploadE2eImagesCommand: () => runUploadE2eImagesCommand
+});
+import { readFile as readFile15 } from "node:fs/promises";
+import { join as join21, basename, resolve as resolve5 } from "node:path";
+import { execSync as execSync7 } from "node:child_process";
+function baseUrl2() {
+  return (process.env.CODEBYPLAN_API_URL ?? "https://www.codebyplan.com").replace(/\/$/, "");
+}
+function parseArgs(args) {
+  const flags = {};
+  const booleans = /* @__PURE__ */ new Set();
+  const positionals = [];
+  for (let i = 0; i < args.length; i++) {
+    const arg = args[i];
+    if (arg.startsWith("--")) {
+      const key = arg.slice(2);
+      const next = args[i + 1];
+      if (next !== void 0 && !next.startsWith("--")) {
+        flags[key] = next;
+        i++;
+      } else {
+        booleans.add(key);
+      }
+    } else {
+      positionals.push(arg);
+    }
+  }
+  return {
+    checkpointId: positionals[0],
+    repoId: flags["repo-id"],
+    baseBranch: flags["base-branch"] ?? "main",
+    json: booleans.has("json"),
+    dryRun: booleans.has("dry-run")
+  };
+}
+async function readE2eConfig(projectPath) {
+  try {
+    const raw = await readFile15(
+      join21(projectPath, ".codebyplan", "e2e.json"),
+      "utf-8"
+    );
+    return JSON.parse(raw);
+  } catch {
+    return {};
+  }
+}
+function collectPngsFromGitDiff(projectPath, frameworkName, frameworkConfig, baseBranch) {
+  const pathspec = frameworkConfig.test_dir ?? frameworkConfig.app;
+  if (!pathspec) {
+    return [];
+  }
+  let stdout7;
+  try {
+    stdout7 = execSync7(
+      `git diff --name-status --diff-filter=AM "${baseBranch}...HEAD" -- "${pathspec}"`,
+      { cwd: projectPath, encoding: "utf-8" }
+    );
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    if (msg.includes("unknown revision") || msg.includes("ambiguous argument") || msg.includes("not a git repository")) {
+      return [];
+    }
+    process.stderr.write(`upload-e2e-images: git diff failed: ${msg}
+`);
+    return [];
+  }
+  const results = [];
+  for (const line of stdout7.split("\n")) {
+    const trimmed = line.trim();
+    if (!trimmed) continue;
+    const tab = trimmed.indexOf("	");
+    if (tab === -1) continue;
+    const status = trimmed.slice(0, tab).trim();
+    const filePath = trimmed.slice(tab + 1).trim();
+    if (!filePath.endsWith(".png")) continue;
+    if (frameworkName === "playwright" && !filePath.includes(".spec.ts-snapshots/"))
+      continue;
+    const isNew = status === "A";
+    results.push({
+      absolutePath: join21(projectPath, filePath),
+      filename: basename(filePath),
+      framework: frameworkName,
+      is_new: isNew
+    });
+  }
+  return results;
+}
+function deriveTestName(absolutePath) {
+  const segments = absolutePath.replace(/\\/g, "/").split("/");
+  for (let i = segments.length - 2; i >= 0; i--) {
+    const seg = segments[i];
+    if (seg && seg.endsWith(".spec.ts-snapshots")) {
+      return seg.replace(".spec.ts-snapshots", "");
+    }
+  }
+  return basename(absolutePath, ".png");
+}
+function buildManifestItem(png) {
+  const testName = deriveTestName(png.absolutePath);
+  const pageOrScreen = basename(png.absolutePath, ".png");
+  return {
+    filename: png.filename,
+    test_name: testName,
+    page_or_screen: pageOrScreen,
+    framework: png.framework,
+    is_new: png.is_new,
+    baseline_diff_pct: null
+  };
+}
+async function runUploadE2eImagesCommand(args) {
+  const parsed = parseArgs(args);
+  if (!parsed.checkpointId) {
+    process.stderr.write(
+      "upload-e2e-images: missing required argument <checkpointId>\n\nUsage: codebyplan upload-e2e-images <checkpointId> [--repo-id <uuid>]\n       [--base-branch <name>] [--json] [--dry-run]\n\nExample: codebyplan upload-e2e-images chk-abc-123 --base-branch main\n"
+    );
+    process.exit(1);
+  }
+  const checkpointId = parsed.checkpointId;
+  const projectPath = resolve5(process.cwd());
+  let repoId = parsed.repoId;
+  if (!repoId) {
+    const found = await findCodebyplanConfig(projectPath);
+    repoId = found?.contents.repo_id;
+  }
+  if (!repoId) {
+    process.stderr.write(
+      "upload-e2e-images: could not determine repo_id.\nPass --repo-id <uuid> or ensure .codebyplan/repo.json exists.\n"
+    );
+    process.exit(1);
+  }
+  const e2eConfig = await readE2eConfig(projectPath);
+  const frameworks = e2eConfig.frameworks ?? {};
+  const allPngs = [];
+  for (const [name, cfg] of Object.entries(frameworks)) {
+    if (!cfg.enabled || !cfg.auto_run) continue;
+    const pngs = collectPngsFromGitDiff(
+      projectPath,
+      name,
+      cfg,
+      parsed.baseBranch
+    );
+    allPngs.push(...pngs);
+  }
+  if (allPngs.length === 0) {
+    process.stdout.write(
+      `No new/changed e2e screenshots found for ${checkpointId}
+`
+    );
+    process.exit(0);
+  }
+  const manifest = allPngs.map(buildManifestItem);
+  if (parsed.dryRun) {
+    if (parsed.json) {
+      process.stdout.write(JSON.stringify(manifest, null, 2) + "\n");
+    } else {
+      process.stdout.write(
+        `[dry-run] Would upload ${manifest.length} screenshot(s) for checkpoint ${checkpointId}:
+`
+      );
+      for (const item of manifest) {
+        const label = item.is_new ? "NEW" : "CHANGED";
+        process.stdout.write(
+          `  [${label}] ${item.filename} (${item.framework}, test: ${item.test_name})
+`
+        );
+      }
+    }
+    process.exit(0);
+  }
+  const formData = new FormData();
+  formData.append("checkpointId", checkpointId);
+  formData.append("repoId", repoId);
+  formData.append("manifest", JSON.stringify(manifest));
+  for (const png of allPngs) {
+    let bytes;
+    try {
+      bytes = await readFile15(png.absolutePath);
+    } catch {
+      process.stderr.write(
+        `upload-e2e-images: could not read file: ${png.absolutePath}
+`
+      );
+      process.exit(1);
+    }
+    const blob = new Blob([bytes], { type: "image/png" });
+    formData.append("files", blob, png.filename);
+  }
+  const auth = await getAuthHeaders();
+  const url = `${baseUrl2()}/api/checkpoint-images`;
+  let res;
+  try {
+    res = await fetch(url, {
+      method: "POST",
+      headers: auth.headers,
+      body: formData
+    });
+  } catch (err) {
+    process.stderr.write(
+      `upload-e2e-images: network error: ${err instanceof Error ? err.message : String(err)}
+`
+    );
+    process.exit(1);
+  }
+  if (!res.ok) {
+    let bodyText = "";
+    try {
+      bodyText = await res.text();
+    } catch {
+    }
+    process.stderr.write(
+      `upload-e2e-images: API returned ${res.status}: ${bodyText}
+`
+    );
+    process.exit(1);
+  }
+  let result;
+  try {
+    result = await res.json();
+  } catch {
+    process.stderr.write(
+      "upload-e2e-images: failed to parse API response as JSON\n"
+    );
+    process.exit(1);
+  }
+  if (parsed.json) {
+    process.stdout.write(JSON.stringify(result, null, 2) + "\n");
+    return;
+  }
+  process.stdout.write(
+    `Uploaded ${manifest.length} screenshot(s) for checkpoint ${checkpointId}
+`
+  );
+  const paths = result.data.stored_paths ?? [];
+  for (const p of paths) {
+    process.stdout.write(`  ${p}
+`);
+  }
+}
+var init_upload_e2e_images = __esm({
+  "src/cli/upload-e2e-images.ts"() {
+    "use strict";
+    init_api();
+    init_flags();
+  }
+});
 // src/cli/cmux-sync.ts
 var cmux_sync_exports = {};
 __export(cmux_sync_exports, {
   runCmuxSync: () => runCmuxSync
 });
-import { execSync as execSync7, execFileSync as execFileSync2 } from "node:child_process";
-import { basename } from "node:path";
+import { execSync as execSync8, execFileSync as execFileSync2 } from "node:child_process";
+import { basename as basename2 } from "node:path";
 async function runCmuxSync() {
   try {
     if (!process.env.CMUX_WORKSPACE_ID) {
@@ -6389,17 +6638,17 @@ async function runCmuxSync() {
     const bin = process.env.CMUX_BUNDLED_CLI_PATH || process.env.CMUX_CLAUDE_HOOK_CMUX_BIN || "cmux";
     let branch = "";
     try {
-      branch = execSync7("git rev-parse --abbrev-ref HEAD", {
+      branch = execSync8("git rev-parse --abbrev-ref HEAD", {
         encoding: "utf8"
       }).trim();
     } catch {
     }
     let folder = "";
     try {
-      const toplevel = execSync7("git rev-parse --show-toplevel", {
+      const toplevel = execSync8("git rev-parse --show-toplevel", {
         encoding: "utf8"
       }).trim();
-      folder = basename(toplevel);
+      folder = basename2(toplevel);
     } catch {
     }
     if (branch) {
@@ -6440,19 +6689,19 @@ var init_cmux_sync = __esm({
 });
 // src/lib/migrate-local-config.ts
-import { mkdir as mkdir6, readFile as readFile15, unlink as unlink2, writeFile as writeFile12 } from "node:fs/promises";
-import { join as join21 } from "node:path";
+import { mkdir as mkdir6, readFile as readFile16, unlink as unlink2, writeFile as writeFile12 } from "node:fs/promises";
+import { join as join22 } from "node:path";
 function legacySharedPath(projectPath) {
-  return join21(projectPath, ".codebyplan.json");
+  return join22(projectPath, ".codebyplan.json");
 }
 function legacyLocalPath(projectPath) {
-  return join21(projectPath, ".codebyplan.local.json");
+  return join22(projectPath, ".codebyplan.local.json");
 }
 function newDirPath(projectPath) {
-  return join21(projectPath, ".codebyplan");
+  return join22(projectPath, ".codebyplan");
 }
 function sentinelPath(projectPath) {
-  return join21(projectPath, ".codebyplan", "repo.json");
+  return join22(projectPath, ".codebyplan", "repo.json");
 }
 async function statSafe(p) {
   const { stat: stat2 } = await import("node:fs/promises");
@@ -6491,7 +6740,7 @@ async function runLocalMigration(projectPath) {
   }
   let legacyRaw;
   try {
-    legacyRaw = await readFile15(legacySharedPath(projectPath), "utf-8");
+    legacyRaw = await readFile16(legacySharedPath(projectPath), "utf-8");
   } catch {
     return {
       migrated: true,
@@ -6518,7 +6767,7 @@ async function runLocalMigration(projectPath) {
   let deviceId;
   let deviceWrittenByHelper = false;
   try {
-    const localRaw = await readFile15(legacyLocalPath(projectPath), "utf-8");
+    const localRaw = await readFile16(legacyLocalPath(projectPath), "utf-8");
     const localParsed = JSON.parse(localRaw);
     if (typeof localParsed.device_id === "string") {
       deviceId = localParsed.device_id;
@@ -6546,7 +6795,7 @@ async function runLocalMigration(projectPath) {
   if ("organization_id" in cfg) repoJson.organization_id = cfg.organization_id;
   if ("project_id" in cfg) repoJson.project_id = cfg.project_id;
   await writeFile12(
-    join21(projectPath, ".codebyplan", "repo.json"),
+    join22(projectPath, ".codebyplan", "repo.json"),
     JSON.stringify(repoJson, null, 2) + "\n",
     "utf-8"
   );
@@ -6559,7 +6808,7 @@ async function runLocalMigration(projectPath) {
   if ("port_allocations" in cfg)
     serverJson.port_allocations = cfg.port_allocations;
   await writeFile12(
-    join21(projectPath, ".codebyplan", "server.json"),
+    join22(projectPath, ".codebyplan", "server.json"),
     JSON.stringify(serverJson, null, 2) + "\n",
     "utf-8"
   );
@@ -6568,7 +6817,7 @@ async function runLocalMigration(projectPath) {
   if ("git_branch" in cfg) gitJson.git_branch = cfg.git_branch;
   if ("branch_config" in cfg) gitJson.branch_config = cfg.branch_config;
   await writeFile12(
-    join21(projectPath, ".codebyplan", "git.json"),
+    join22(projectPath, ".codebyplan", "git.json"),
     JSON.stringify(gitJson, null, 2) + "\n",
     "utf-8"
   );
@@ -6576,35 +6825,35 @@ async function runLocalMigration(projectPath) {
   const shipmentJson = {};
   if ("shipment" in cfg) shipmentJson.shipment = cfg.shipment;
   await writeFile12(
-    join21(projectPath, ".codebyplan", "shipment.json"),
+    join22(projectPath, ".codebyplan", "shipment.json"),
     JSON.stringify(shipmentJson, null, 2) + "\n",
     "utf-8"
   );
   filesChanged.push(".codebyplan/shipment.json");
   const vendorJson = {};
   await writeFile12(
-    join21(projectPath, ".codebyplan", "vendor.json"),
+    join22(projectPath, ".codebyplan", "vendor.json"),
     JSON.stringify(vendorJson, null, 2) + "\n",
     "utf-8"
   );
   filesChanged.push(".codebyplan/vendor.json");
   const e2eJson = {};
   await writeFile12(
-    join21(projectPath, ".codebyplan", "e2e.json"),
+    join22(projectPath, ".codebyplan", "e2e.json"),
     JSON.stringify(e2eJson, null, 2) + "\n",
     "utf-8"
   );
   filesChanged.push(".codebyplan/e2e.json");
   const eslintJson = {};
   await writeFile12(
-    join21(projectPath, ".codebyplan", "eslint.json"),
+    join22(projectPath, ".codebyplan", "eslint.json"),
     JSON.stringify(eslintJson, null, 2) + "\n",
     "utf-8"
   );
   filesChanged.push(".codebyplan/eslint.json");
   if (!deviceWrittenByHelper) {
     await writeFile12(
-      join21(projectPath, ".codebyplan", "device.local.json"),
+      join22(projectPath, ".codebyplan", "device.local.json"),
       JSON.stringify({ device_id: deviceId }, null, 2) + "\n",
       "utf-8"
     );
@@ -6616,9 +6865,9 @@ async function runLocalMigration(projectPath) {
       "Migration write incomplete: .codebyplan/repo.json was not persisted. Re-run migration to retry from a clean state."
     );
   }
-  const gitignorePath = join21(projectPath, ".gitignore");
+  const gitignorePath = join22(projectPath, ".gitignore");
   try {
-    const gitignoreContent = await readFile15(gitignorePath, "utf-8");
+    const gitignoreContent = await readFile16(gitignorePath, "utf-8");
     const legacyLine = ".codebyplan.local.json";
     const newLine = ".codebyplan/device.local.json";
     const hasLegacy = gitignoreContent.split("\n").some((l) => l.trimEnd() === legacyLine);
@@ -6669,7 +6918,7 @@ var init_migrate_local_config = __esm({
 // src/cli/config.ts
 var config_exports = {};
 __export(config_exports, {
-  readE2eConfig: () => readE2eConfig,
+  readE2eConfig: () => readE2eConfig2,
   readGitConfig: () => readGitConfig,
   readRepoConfig: () => readRepoConfig,
   readServerConfig: () => readServerConfig,
@@ -6677,8 +6926,8 @@ __export(config_exports, {
   readVendorConfig: () => readVendorConfig,
   runConfig: () => runConfig
 });
-import { mkdir as mkdir7, readFile as readFile16, writeFile as writeFile13 } from "node:fs/promises";
-import { join as join22 } from "node:path";
+import { mkdir as mkdir7, readFile as readFile17, writeFile as writeFile13 } from "node:fs/promises";
+import { join as join23 } from "node:path";
 async function runConfig() {
   const flags = parseFlags(3);
   const dryRun = hasFlag("dry-run", 3);
@@ -6711,14 +6960,14 @@ async function runConfig() {
   console.log("\n  Config complete.\n");
 }
 async function syncConfigToFile(repoId, projectPath, dryRun) {
-  const codebyplanDir = join22(projectPath, ".codebyplan");
+  const codebyplanDir = join23(projectPath, ".codebyplan");
   let resolvedWorktreeId;
   try {
     const deviceId = await getOrCreateDeviceId(projectPath);
     let branch = "main";
     try {
-      const { execSync: execSync8 } = await import("node:child_process");
-      branch = execSync8("git symbolic-ref --short HEAD", {
+      const { execSync: execSync9 } = await import("node:child_process");
+      branch = execSync9("git symbolic-ref --short HEAD", {
         cwd: projectPath,
         encoding: "utf-8"
       }).trim();
@@ -6854,11 +7103,11 @@ async function syncConfigToFile(repoId, projectPath, dryRun) {
   ];
   let anyUpdated = false;
   for (const { name, payload, createOnly } of files) {
-    const filePath = join22(codebyplanDir, name);
+    const filePath = join23(codebyplanDir, name);
     const newJson = JSON.stringify(payload, null, 2) + "\n";
     let currentJson = "";
     try {
-      currentJson = await readFile16(filePath, "utf-8");
+      currentJson = await readFile17(filePath, "utf-8");
     } catch {
     }
     if (createOnly && currentJson !== "") continue;
@@ -6873,8 +7122,8 @@ async function syncConfigToFile(repoId, projectPath, dryRun) {
 }
 async function readRepoConfig(projectPath) {
   try {
-    const raw = await readFile16(
-      join22(projectPath, ".codebyplan", "repo.json"),
+    const raw = await readFile17(
+      join23(projectPath, ".codebyplan", "repo.json"),
       "utf-8"
     );
     return JSON.parse(raw);
@@ -6884,8 +7133,8 @@ async function readRepoConfig(projectPath) {
 }
 async function readServerConfig(projectPath) {
   try {
-    const raw = await readFile16(
-      join22(projectPath, ".codebyplan", "server.json"),
+    const raw = await readFile17(
+      join23(projectPath, ".codebyplan", "server.json"),
       "utf-8"
     );
     return JSON.parse(raw);
@@ -6895,8 +7144,8 @@ async function readServerConfig(projectPath) {
 }
 async function readGitConfig(projectPath) {
   try {
-    const raw = await readFile16(
-      join22(projectPath, ".codebyplan", "git.json"),
+    const raw = await readFile17(
+      join23(projectPath, ".codebyplan", "git.json"),
       "utf-8"
     );
     return JSON.parse(raw);
@@ -6906,8 +7155,8 @@ async function readGitConfig(projectPath) {
 }
 async function readShipmentConfig(projectPath) {
   try {
-    const raw = await readFile16(
-      join22(projectPath, ".codebyplan", "shipment.json"),
+    const raw = await readFile17(
+      join23(projectPath, ".codebyplan", "shipment.json"),
       "utf-8"
     );
     return JSON.parse(raw);
@@ -6917,8 +7166,8 @@ async function readShipmentConfig(projectPath) {
 }
 async function readVendorConfig(projectPath) {
   try {
-    const raw = await readFile16(
-      join22(projectPath, ".codebyplan", "vendor.json"),
+    const raw = await readFile17(
+      join23(projectPath, ".codebyplan", "vendor.json"),
       "utf-8"
     );
     return JSON.parse(raw);
@@ -6926,10 +7175,10 @@ async function readVendorConfig(projectPath) {
     return null;
   }
 }
-async function readE2eConfig(projectPath) {
+async function readE2eConfig2(projectPath) {
   try {
-    const raw = await readFile16(
-      join22(projectPath, ".codebyplan", "e2e.json"),
+    const raw = await readFile17(
+      join23(projectPath, ".codebyplan", "e2e.json"),
       "utf-8"
     );
     return JSON.parse(raw);
@@ -6985,14 +7234,14 @@ var init_server_detect = __esm({
 });
 // src/lib/port-verify.ts
-import { readFile as readFile17 } from "node:fs/promises";
+import { readFile as readFile18 } from "node:fs/promises";
 async function verifyPorts(projectPath, portAllocations) {
   const mismatches = [];
   const allocatedPorts = new Set(portAllocations.map((a) => a.port));
   const packageJsonPaths = await findPackageJsonFiles(projectPath, projectPath);
   for (const pkgPath of packageJsonPaths) {
     try {
-      const raw = await readFile17(pkgPath, "utf-8");
+      const raw = await readFile18(pkgPath, "utf-8");
       const pkg = JSON.parse(raw);
       const scriptPort = detectPortFromScripts(pkg);
       if (scriptPort !== null && !allocatedPorts.has(scriptPort)) {
@@ -7055,7 +7304,7 @@ async function findUnallocatedApps(projectPath, portAllocations) {
     }
     let pkg;
     try {
-      const raw = await readFile17(`${app.absPath}/package.json`, "utf-8");
+      const raw = await readFile18(`${app.absPath}/package.json`, "utf-8");
       pkg = JSON.parse(raw);
     } catch {
       continue;
@@ -7263,10 +7512,10 @@ async function runTechStack() {
         );
       }
       try {
-        const { execSync: execSync8 } = await import("node:child_process");
+        const { execSync: execSync9 } = await import("node:child_process");
         let branch = "main";
         try {
-          branch = execSync8("git symbolic-ref --short HEAD", {
+          branch = execSync9("git symbolic-ref --short HEAD", {
             cwd: projectPath,
             encoding: "utf-8"
           }).trim();
@@ -7429,11 +7678,11 @@ async function ask(q, opts) {
   try {
     while (true) {
       const choices = q.choices.map((c) => `[${c.key}] ${c.label}`).join("  ");
-      const answer = await new Promise((resolve7) => {
+      const answer = await new Promise((resolve8) => {
         rl.question(`${q.message}
   ${choices}
 > `, (input) => {
-          resolve7(input.trim().toLowerCase());
+          resolve8(input.trim().toLowerCase());
         });
       });
       const match = q.choices.find(
@@ -8066,11 +8315,11 @@ var init_uninstall = __esm({
 // src/index.ts
 init_version();
 import { readFileSync as readFileSync8 } from "node:fs";
-import { resolve as resolve6 } from "node:path";
+import { resolve as resolve7 } from "node:path";
 void (async () => {
   if (!process.env.CODEBYPLAN_API_KEY) {
     try {
-      const envPath = resolve6(process.cwd(), ".env.local");
+      const envPath = resolve7(process.cwd(), ".env.local");
       const content = readFileSync8(envPath, "utf-8");
       for (const line of content.split("\n")) {
         const trimmed = line.trim();
@@ -8203,6 +8452,12 @@ void (async () => {
     await runVersionStatus2();
     process.exit(0);
   }
+  if (arg === "upload-e2e-images") {
+    const { runUploadE2eImagesCommand: runUploadE2eImagesCommand2 } = await Promise.resolve().then(() => (init_upload_e2e_images(), upload_e2e_images_exports));
+    const rest = process.argv.slice(3);
+    await runUploadE2eImagesCommand2(rest);
+    process.exit(0);
+  }
   if (arg === "cmux-sync") {
     const { runCmuxSync: runCmuxSync2 } = await Promise.resolve().then(() => (init_cmux_sync(), cmux_sync_exports));
     await runCmuxSync2();
@@ -8304,6 +8559,7 @@ void (async () => {
     codebyplan round sync-approvals  Sync git diff and approvals with round/task state
     codebyplan bump                  Detect changed packages and patch-bump versions
     codebyplan ship                  Ship current feat branch to production via PR
+    codebyplan upload-e2e-images     Upload new/changed committed e2e PNGs for a checkpoint
     codebyplan scaffold-publish-workflow   Write the publish-on-main GitHub workflow into ./.github/workflows/
     codebyplan branch migrate        Rewrite branch_config from 3-branch to 2-tier model
     codebyplan claude                Claude asset management (install/update/uninstall)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "codebyplan",
-  "version": "1.13.14",
+  "version": "1.13.15",
   "description": "CLI for CodeByPlan — AI-powered development planning and tracking",
   "type": "module",
   "bin": {

package/templates/agents/cbp-e2e-maestro.md CHANGED Viewed

@@ -38,7 +38,7 @@ env:
   TEST_EMAIL: ${TEST_EMAIL}
   TEST_PASSWORD: ${TEST_PASSWORD}
   APP_ID: com.yourorg.yourapp
-screenshotsDir: maestro/screenshots
+screenshotsDir: e2e/screenshots/maestro
 ```
 ## Shared Login Flow
@@ -158,8 +158,31 @@ delete + confirm + verify removed.
 - takeScreenshot: "flow-name-after-state"
 ```
-Screenshots written to `maestro/screenshots/` (via `screenshotsDir` in `config.yaml`).
-Enumerate: `maestro/screenshots/*.png`.
+Screenshots written to `e2e/screenshots/maestro/` (via `screenshotsDir` in `config.yaml`).
+Committed path convention: `e2e/screenshots/maestro/{flow}-{state}.png` (repo root).
+This path is intentionally outside `apps/web/e2e/screenshots/` (which is gitignored).
+After the flow completes, `git add e2e/screenshots/maestro/` to track new PNGs.
+**`is_new` detection**: `git ls-files --error-unmatch <path>` exits non-zero → `is_new: true`.
+Enumerate committed PNGs: `e2e/screenshots/maestro/*.png`.
+## e2e_gallery Population
+After the run, for each committed PNG in `e2e/screenshots/maestro/*.png`, emit one
+`e2e_gallery[]` entry:
+```yaml
+- test_name: string          # Maestro flow filename (e.g. "dashboard")
+  page_or_screen: string     # screen / flow name
+  framework: maestro
+  committed_path: string     # repo-relative: e2e/screenshots/maestro/{flow}-{state}.png
+  is_new: boolean            # detected via git ls-files
+  baseline_diff_pct: null    # Maestro does not produce pixel diffs
+```
+Include this in the specialist output alongside `screenshots[]`.
 ## Run Command

package/templates/agents/cbp-e2e-playwright.md CHANGED Viewed

@@ -189,7 +189,8 @@ test.describe("Home page", () => {
 ```ts
 await expect(page).toHaveScreenshot("state-name.png", { maxDiffPixelRatio: 0.001 });
 ```
-Baselines live beside spec under `{spec}.spec.ts-snapshots/`. Committed to git.
+Baselines live beside spec under `{spec}.spec.ts-snapshots/`. Committed path:
+`apps/{app}/e2e/{spec}.spec.ts-snapshots/{name}-{browser}.png`.
 **Diagnostic** (intermediate states):
 ```ts
@@ -199,9 +200,29 @@ await page.screenshot({
 });
 ```
-Enumerate PNGs: `test-results/**/*.png` and `{spec}.spec.ts-snapshots/`.
+Enumerate committed PNGs: `{spec}.spec.ts-snapshots/**/*.png` (NOT `test-results/` — those are transient).
-**Never run `--update-snapshots` automatically.** A diff is a `visual_regression` failure.
+**`is_new` detection**: `git ls-files --error-unmatch <committed_path>` exits non-zero →
+`is_new: true` (auto-committed first-run baseline; `git add` the file). Exit zero → `is_new: false`.
+**Never run `--update-snapshots` automatically.** A diff on an existing baseline is a `visual_regression` failure.
+## e2e_gallery Population
+After the run, for each committed PNG in `{spec}.spec.ts-snapshots/**/*.png`, emit one
+`e2e_gallery[]` entry. For `screenshots[].viewport`: default to `'desktop'`; set `'mobile'`
+when the playwright.config project/device emulation indicates a mobile viewport (e.g. `devices['iPhone 14']`).
+```yaml
+- test_name: string          # test title from test.info().title
+  page_or_screen: string     # route / screen name
+  framework: playwright
+  committed_path: string     # repo-relative path to the .spec.ts-snapshots PNG
+  is_new: boolean            # detected via git ls-files (see above)
+  baseline_diff_pct: number | null  # from Playwright diff output; null when is_new
+```
+Include this in the specialist output alongside `screenshots[]`.
 ## Run Command

package/templates/agents/cbp-e2e-tauri.md CHANGED Viewed

@@ -148,10 +148,33 @@ For CRUD: create + verify visible; edit + verify; delete + confirm + verify remo
 ## Screenshot Capture
 ```ts
-await browser.saveScreenshot(`./e2e/screenshots/${testName}-${state}.png`);
+await browser.saveScreenshot(
+  `./e2e/screenshots/webdriverio/${testName}-${state}.png`
+);
 ```
-Enumerate: `e2e/screenshots/*.png`.
+Committed path convention: `{app-dir}/e2e/screenshots/webdriverio/{spec}-{state}.png`.
+After the run, `git add {app-dir}/e2e/screenshots/webdriverio/` to track new PNGs.
+**`is_new` detection**: `git ls-files --error-unmatch <path>` exits non-zero → `is_new: true`.
+Enumerate committed PNGs: `{app-dir}/e2e/screenshots/webdriverio/**/*.png`.
+## e2e_gallery Population
+After the run, for each committed PNG under `{app-dir}/e2e/screenshots/webdriverio/`, emit one
+`e2e_gallery[]` entry:
+```yaml
+- test_name: string          # spec describe/it label
+  page_or_screen: string     # window / view name
+  framework: webdriverio
+  committed_path: string     # repo-relative: {app-dir}/e2e/screenshots/webdriverio/{spec}-{state}.png
+  is_new: boolean            # detected via git ls-files
+  baseline_diff_pct: null    # WebDriverIO does not produce pixel diffs
+```
+Include this in the specialist output alongside `screenshots[]`.
 ## Run Command

package/templates/agents/cbp-e2e-vscode.md CHANGED Viewed

@@ -162,10 +162,35 @@ snapshots to `test-fixtures/`.
 ## Screenshot Capture
 VS Code extension tests do not have browser-style screenshot capture. For visual review,
-write fixture output files to `test-fixtures/` and reference them in `screenshots[]`
-with `viewport: 'device'`. `baseline_diff_pct: null` for all entries.
+write fixture output PNGs to the committed dir:
-Enumerate screenshots: `apps/vscode/test-fixtures/**/*.png`.
+Committed path convention: `{app-dir}/e2e/screenshots/vscode/{suite}-{test}.png`.
+This dir **may be empty** for behavior-only tests that produce no visual output (SD-3).
+When capturing PNGs is possible, write them there and `git add` them.
+Enumerate committed PNGs: `{app-dir}/e2e/screenshots/vscode/**/*.png` (may be empty).
+## e2e_gallery Population
+Always emit `e2e_gallery[]` in the specialist output — even when empty (never omit the field):
+```yaml
+e2e_gallery: []   # empty for behavior-only extensions with no PNG output
+```
+When committed PNGs do exist, emit one entry per PNG:
+```yaml
+- test_name: string          # suite/test name
+  page_or_screen: string     # VS Code view / panel name
+  framework: vscode-test
+  committed_path: string     # repo-relative: {app-dir}/e2e/screenshots/vscode/{suite}-{test}.png
+  is_new: boolean            # detected via git ls-files
+  baseline_diff_pct: null    # vscode-test does not produce pixel diffs
+```
+Include this in the specialist output alongside `screenshots[]`.
 ## Run Command

package/templates/agents/cbp-e2e-xcuitest.md CHANGED Viewed

@@ -168,11 +168,40 @@ screenshot.lifetime = .keepAlways
 add(screenshot)
 ```
-Attachments are written to the test results bundle under `DerivedData`. Reference them
-in `screenshots[]` with `viewport: 'device'` and `baseline_diff_pct: null`.
+Attachments are written to the test results bundle under `DerivedData`. After the run,
+export them to the committed path using `xcrun xcresulttool`:
-Enumerate: `~/Library/Developer/Xcode/DerivedData/**/Attachments/*.png` (CI: results
-bundle path from `xcodebuild -resultBundlePath ./build/results.xcresult`).
+```bash
+# Export all attachments from the result bundle to committed dir
+xcrun xcresulttool export \
+  --path ./build/results.xcresult \
+  --output-path {app-dir}/e2e/screenshots/xcuitest \
+  --type directory
+```
+Committed path convention: `{app-dir}/e2e/screenshots/xcuitest/{TestClass}/{testMethod}-{n}.png`.
+After export, `git add {app-dir}/e2e/screenshots/xcuitest/` to track new PNGs.
+**`is_new` detection**: `git ls-files --error-unmatch <path>` exits non-zero → `is_new: true`.
+Enumerate committed PNGs: `{app-dir}/e2e/screenshots/xcuitest/**/*.png`.
+## e2e_gallery Population
+After export, for each committed PNG under `{app-dir}/e2e/screenshots/xcuitest/`, emit one
+`e2e_gallery[]` entry:
+```yaml
+- test_name: string          # TestClass/testMethod
+  page_or_screen: string     # screen name inferred from the attachment name
+  framework: xcuitest
+  committed_path: string     # repo-relative: {app-dir}/e2e/screenshots/xcuitest/{TestClass}/{testMethod}-{n}.png
+  is_new: boolean            # detected via git ls-files
+  baseline_diff_pct: null    # XCUITest does not produce pixel diffs
+```
+Include this in the specialist output alongside `screenshots[]` (which retains the
+`DerivedData` transient path for diagnostic reference).
 ## Run Command
@@ -181,9 +210,16 @@ xcodebuild test \
   -workspace ios/YourApp.xcworkspace \
   -scheme YourApp \
   -destination 'platform=iOS Simulator,name=iPhone 16,OS=latest' \
+  -resultBundlePath ./build/results.xcresult \
   TEST_EMAIL="$TEST_EMAIL" \
   TEST_PASSWORD="$TEST_PASSWORD" \
   | xcbeautify
+# Then export attachments to committed dir:
+xcrun xcresulttool export \
+  --path ./build/results.xcresult \
+  --output-path {app-dir}/e2e/screenshots/xcuitest \
+  --type directory
+git add {app-dir}/e2e/screenshots/xcuitest/
 ```
 ## pnpm Script

package/templates/agents/cbp-task-check.md CHANGED Viewed

@@ -84,6 +84,8 @@ Review all QA items across all rounds:
 **E2E pass vs skipped distinction**: When reading `auto_qa.items[]` for `check: 'e2e'`, do NOT conflate `status: 'pass'` with `status: 'skipped'`. A spec that ran with `passed === 0 && skipped > 0` for any path touching `files_changed` is a hard fail, not a pass — verdict text MUST explicitly call this out: "E2E spec authored but assertions did not execute (skip-gated)." Do NOT issue a READY verdict on a zero-assertion e2e run; route to a fix round per `rules/e2e-mandatory.md`.
+**Committed-screenshot check**: For any round where `round.context.e2e_eligible[]` is non-empty, verify `round.context.e2e_gallery[]` is non-empty. Refuse a READY verdict when it is empty — verdict text: "E2E ran but produced zero committed screenshots — open a fix round per `rules/e2e-mandatory.md` § Committed-Screenshot Enforcement." Sole exception: when `vscode-test` is the ONLY eligible framework, an empty `e2e_gallery[]` is allowed (SD-3, behavior-only extensions).
 List any pending or failed items. Determine if they are blockers.
 ### Phase 5: File Approval Check

package/templates/context/testing/e2e.md CHANGED Viewed

@@ -70,6 +70,13 @@ output:
       viewport: 'desktop' | 'mobile' | 'tablet' | 'device'
       is_new: bool
       baseline_diff_pct: number | null
+  e2e_gallery:                           # ADDITIVE alongside screenshots[]; consumed by TASK-3 / checkpoint-end
+    - test_name: string
+      page_or_screen: string
+      framework: string                  # playwright | maestro | xcuitest | webdriverio | vscode-test
+      committed_path: string             # repo-relative; MUST be git-tracked after the run
+      is_new: boolean                    # true => no prior baseline; auto-captured+committed this run
+      baseline_diff_pct: number | null   # null for non-playwright frameworks
   user_interactions: [{question, answer}]
   tech_stack_reconciliation:
     db_framework: string | null
@@ -174,18 +181,57 @@ For each failed test, assign exactly one category:
 `env`, `auth`, `access` failures MUST NOT count toward `test_results.failed` until
 preflight passes — they block the run instead.
+## Committed-Screenshot Mandate
+Every eligible e2e run MUST persist relevant screenshots to the framework's committed
+directory (tracked in git). Transient dirs (e.g. `test-results/`, `DerivedData`) are for
+diagnostics only — they are NOT the committed path.
+| Framework | Committed path |
+|---|---|
+| playwright | `apps/{app}/e2e/{spec}.spec.ts-snapshots/{name}-{browser}.png` |
+| maestro | `e2e/screenshots/maestro/{flow}-{state}.png` (repo root) |
+| xcuitest | `{app-dir}/e2e/screenshots/xcuitest/{TestClass}/{testMethod}-{n}.png` |
+| webdriverio | `{app-dir}/e2e/screenshots/webdriverio/{spec}-{state}.png` |
+| vscode-test | `{app-dir}/e2e/screenshots/vscode/{suite}-{test}.png` (SD-3: may be empty for behavior-only extensions) |
+SD-3: the vscode-test committed dir may be empty for behavior-only extensions (no visual surface); the agent must still emit `e2e_gallery: []` explicitly. `cbp-task-check` Phase 4 treats an empty `e2e_gallery[]` as allowed when `vscode-test` is the ONLY eligible framework.
+**Gitignore caution**: root `.gitignore` ignores `apps/web/e2e/screenshots/`. For the `{app-dir}`-relative frameworks (xcuitest, webdriverio, vscode-test), `{app-dir}` MUST NOT resolve to `apps/web` — committed PNGs there would be silently dropped from git. Remedy: use a non-ignored subdir (e.g. `apps/web/e2e/baselines/<framework>/`). A `.gitignore` negation (`!apps/web/e2e/screenshots/<framework>/`) does NOT work — git does not recurse into an ignored parent directory, so PNGs in that subdir would be silently dropped on a fresh checkout. Maestro (repo-root `e2e/screenshots/maestro/`) is already safe.
+`is_new` detection: `git ls-files --error-unmatch <path>` exits non-zero → `is_new: true`
+(no committed baseline exists yet; auto-capture and `git add`). Exit zero → `is_new: false`.
+## Auto-New / Gated-Changed Update Model
+**NEW screens** (`is_new === true`): the specialist auto-captures the PNG and runs
+`git add <committed_path>`. The test passes; `cbp-frontend-ui` Step 5b reviews semantically.
+No user gate required for first-run capture.
+**EXISTING baselines that visually diff** (`is_new === false`, `baseline_diff_pct > threshold`):
+classify as `visual_regression`. Do NOT auto-update. Surface as a blocking accept-or-fix gate
+at `/cbp-round-end` Step 7. The user must explicitly approve (`--update-snapshots`) or open a
+fix task. This relaxes the prior always-manual contract ONLY for new screens.
 ## Screenshot Collection Rule
-After every run, enumerate all PNGs produced and populate `screenshots[]`. Framework-
-specific paths are in each agent's body. Every entry requires:
-`{test_name, path, page_or_screen, viewport, is_new, baseline_diff_pct}`.
+After every run, enumerate all committed PNGs and populate BOTH `screenshots[]` and
+`e2e_gallery[]`. Framework-specific paths are in each agent's body. Every `screenshots[]`
+entry requires: `{test_name, path, page_or_screen, viewport, is_new, baseline_diff_pct}`.
+Every `e2e_gallery[]` entry requires: `{test_name, page_or_screen, framework, committed_path,
+is_new, baseline_diff_pct}`. `committed_path` MUST be a git-tracked path after the run.
+`/cbp-round-execute` Step 5b aggregates `e2e_gallery[]` across all specialists and stores it
+in `round.context.e2e_gallery`. TASK-3 / checkpoint-end consumes this aggregated gallery to
+upload images to the DB.
 Screenshots flow to `cbp-frontend-ui` invoked by `/cbp-round-execute` Step 5b with
 `phase: 'screenshot_review'` — NOT inline by `round-executor` Step 3.8 (which runs
 `phase: 'style_only'` without e2e output).
-**Baselines are never auto-accepted.** A `toHaveScreenshot` diff is `visual_regression`;
-the user decides via QA whether to update baselines.
+**Changed baselines are never auto-accepted.** A `toHaveScreenshot` diff on an existing
+baseline is `visual_regression`; the user decides via QA whether to update baselines.
+New-screen auto-capture (above) is the only exception to the always-manual contract.
 ## Completion Rule
@@ -237,7 +283,9 @@ An agent is NOT spawned when ANY of the following hold:
 spawn multiple specialists in the same round (one per eligible framework). Agents run in
 parallel with `cbp-testing-qa-agent`. Each specialist's output is stored under
 `round.context.e2e_outputs[framework]` (a framework-keyed map); `/cbp-round-execute` Step 5b
-aggregates `screenshots[]` across all entries before the `cbp-frontend-ui` review.
+aggregates `screenshots[]` and `e2e_gallery[]` across all entries before the
+`cbp-frontend-ui` review. The aggregated `e2e_gallery[]` is persisted separately to
+`round.context.e2e_gallery` for consumption by TASK-3 / checkpoint-end.
 **whole_checkpoint_mode dispatch** (`/cbp-checkpoint-check` Step 5b and `/cbp-checkpoint-plan`
 Step 4): pass `round_number: 0`, `whole_checkpoint_mode: true`, and the aggregated
@@ -298,6 +346,6 @@ a loop, snapshot text/href BEFORE navigation rather than holding stale `Locator`
 | Situation | What happens |
 |---|---|
-| No baseline (new screen) | Playwright creates on first run; test passes; `cbp-frontend-ui` at Step 5b reviews semantically. |
-| Baseline exists, diff ≤ threshold | Test passes. |
-| Baseline exists, diff > threshold | `visual_regression` failure. Agent does NOT retry. `cbp-frontend-ui` at Step 5b flags it; `/cbp-round-end` Step 3b constructs user QA item. User decides: fix-task or `--update-snapshots`. |
+| No baseline (new screen, `is_new: true`) | Playwright creates on first run; auto-committed; `git add` runs; `e2e_gallery[].is_new: true`; `cbp-frontend-ui` Step 5b reviews semantically. No user gate. |
+| Baseline exists, diff ≤ threshold | Test passes; `is_new: false`; `baseline_diff_pct` recorded. |
+| Baseline exists, diff > threshold | `visual_regression` failure; `is_new: false`. Agent does NOT retry. `cbp-frontend-ui` Step 5b flags it; `/cbp-round-end` Step 3b constructs user QA item. User decides: fix-task or `--update-snapshots`. |

package/templates/hooks/validate-structure-patterns.sh CHANGED Viewed

@@ -12,7 +12,7 @@
 _SUB='(templates|examples|reference|scripts)/[a-z0-9.-]+\.(md|sh|json|ya?ml)'
 enforce_path_pattern '^/\.claude/skills/' "^/\.claude/skills/[a-z0-9-]+/(SKILL\.md|[a-z0-9-]+\.md|${_SUB})$" 'Invalid skill path' "Pattern: /.claude/skills/{name}/SKILL.md | /.claude/skills/{name}/{file}.md | /.claude/skills/{name}/(templates|examples|reference|scripts)/{file}.{md,sh,json,yaml}"
 enforce_path_pattern '^/\.claude/agents/' "^/\.claude/agents/([a-z0-9-]+\.md|[a-z0-9-]+/(AGENT\.md|[a-z0-9-]+\.md|${_SUB}))$" 'Invalid agent path' "Pattern: /.claude/agents/{name}.md | /.claude/agents/{name}/AGENT.md | /.claude/agents/{name}/{file}.md | /.claude/agents/{name}/(templates|examples|reference|scripts)/{file}.{md,sh,json,yaml}"
-enforce_path_pattern '^/\.claude/rules/' '^/\.claude/rules/[a-z-]+\.md$' 'Invalid native rule path' 'Pattern: /.claude/rules/{name}.md'
+enforce_path_pattern '^/\.claude/rules/' '^/\.claude/rules/[a-z0-9-]+\.md$' 'Invalid native rule path' 'Pattern: /.claude/rules/{name}.md'
 if match_path '^/\.claude/hooks/' && ! match_path '^/\.claude/hooks/__test-fixtures__/'; then
   if ! match_path '^/\.claude/hooks/[a-z-]+\.sh$'; then
     block 'Invalid hook path' 'Pattern: /.claude/hooks/{name}.sh'

package/templates/rules/e2e-mandatory.md CHANGED Viewed

@@ -61,10 +61,27 @@ A spec that ran with `passed === 0 && skipped > 0` for any path touching `files_
 **hard fail**, not a pass — `cbp-task-check` (`agents/cbp-task-check.md`) refuses a READY
 verdict on a zero-assertion e2e run and routes to a fix round per this rule.
+## Committed-Screenshot Enforcement
+An eligible e2e run that produces **zero committed screenshots** for any `pages_affected`
+path it touched is a defect — not a valid pass. Every framework must write at least one
+PNG to its committed dir (per the table in `context/testing/e2e.md` § Committed-Screenshot
+Mandate) and `git add` it before reporting `status: 'completed'`.
+`cbp-task-check` refuses a READY verdict when `e2e_gallery[]` is empty AND the round
+touched UI source paths for an eligible framework — sole exception: `vscode-test`-only
+rounds (SD-3, behavior-only extensions; see below). The fix path is the same as for a
+zero-assertion run: open a fix round that captures the missing committed screenshots.
+The sole exception is `vscode-test`: the committed dir may be empty when the extension
+has no visual output (behavior-only tests). Agents must still define the dir and report
+`e2e_gallery: []` explicitly — not omit the field.
 ## Cross-References
 - `context/testing/e2e.md` — Input/Output contract, pre-flight loop, failure classification,
-  and the dispatch routing table (framework → agent).
-- `agents/cbp-task-check.md` — enforces the zero-assertion hard-fail at verdict time.
+  committed-screenshot mandate, auto-new/gated-changed model, and dispatch routing table.
+- `agents/cbp-task-check.md` — enforces the zero-assertion hard-fail and the empty
+  `e2e_gallery[]` hard-fail at verdict time.
 - `skills/cbp-round-execute/SKILL.md` Step 5/6, `skills/cbp-checkpoint-check/SKILL.md` Step 5b
   — the config-driven dispatch and `e2e_eligible_skipped` gate implementations.

package/templates/settings.project.base.json CHANGED Viewed

@@ -88,7 +88,9 @@
       "Bash(codebyplan ship:*)",
       "Bash(npx codebyplan ship:*)",
       "Bash(codebyplan claude:*)",
-      "Bash(npx codebyplan claude:*)"
+      "Bash(npx codebyplan claude:*)",
+      "Bash(codebyplan upload-e2e-images:*)",
+      "Bash(npx codebyplan upload-e2e-images:*)"
     ],
     "allow": [
       "Skill(cbp-build-cc-agent)",
@@ -125,6 +127,11 @@
       "Skill(cbp-setup-e2e)",
       "Skill(cbp-setup-eslint)",
       "Skill(cbp-ship-configure)",
+      "Skill(cbp-standalone-task-check)",
+      "Skill(cbp-standalone-task-complete)",
+      "Skill(cbp-standalone-task-create)",
+      "Skill(cbp-standalone-task-start)",
+      "Skill(cbp-standalone-task-testing)",
       "Skill(cbp-supabase-branch-check)",
       "Skill(cbp-supabase-migrate)",
       "Skill(cbp-supabase-setup)",

package/templates/skills/cbp-checkpoint-end/SKILL.md CHANGED Viewed

@@ -113,6 +113,22 @@ If `/cbp-ship` reports `aborted_at` (user aborted) or any surface failed verific
 If the repo has zero configured surfaces (very early-stage), `/cbp-ship` exits with `## No deployable surfaces configured` — that's a success state, continue to cleanup.
+### Step 7.5: Upload E2E Screenshots to DB (best-effort)
+After `/cbp-ship` completes successfully, upload the checkpoint's new/changed committed E2E screenshots to the CodeByPlan DB so they can be reviewed per-checkpoint in the web UI (CHK-171). This step is **best-effort and non-blocking** — a failure here MUST NOT halt shipment or cleanup.
+```bash
+npx codebyplan upload-e2e-images "$CHECKPOINT_ID" --repo-id "$REPO_ID" --base-branch "$BASE" --json
+```
+The command collects only the PNGs added/changed on the feat branch vs `$BASE` (this checkpoint's own e2e work — not the whole baseline set), POSTs them to `POST /api/checkpoint-images`, and the endpoint stores each file + patches `checkpoints.e2e_screenshots`. Capture the outcome into `E2E_IMAGES_UPLOADED` for Step 10:
+- Exit 0 with uploaded paths → `{ count: <n>, stored_paths: [...], skipped: false }`.
+- Exit 0 with `No new/changed e2e screenshots found` → `{ count: 0, stored_paths: [], skipped: true }`.
+- Non-zero exit → `{ count: 0, stored_paths: [], skipped: true, error: "<stderr summary>" }`; emit a non-blocking warning and continue to Step 8.
+`--repo-id` defaults to `repo_id` from `.codebyplan/repo.json` when omitted.
 ### Step 8: Stale Feat Branch Cleanup
 After successful shipment, identify stale remote feat branches:
@@ -202,7 +218,8 @@ context.shipment: {
   skipped: [...],           // populated by /cbp-ship — surfaces explicitly skipped
   stale_branches_cleaned: [list of deleted git branches],
   feat_branch_deleted: true/false,
-  supabase_branches_deleted: [list of Supabase preview branch names removed in Steps 8–9]
+  supabase_branches_deleted: [list of Supabase preview branch names removed in Steps 8–9],
+  e2e_images_uploaded: E2E_IMAGES_UPLOADED   // from Step 7.5 — { count, stored_paths, skipped, error? } (CHK-171)
 }
 ```

package/templates/skills/cbp-frontend-ui/SKILL.md CHANGED Viewed

@@ -41,8 +41,8 @@ input:
       path: string                          # Repo-relative or absolute path to PNG
       page_or_screen: string
       viewport: 'desktop' | 'mobile' | 'tablet' | 'device'
-      is_new: bool                          # No baseline existed (new screen)
-      baseline_diff_pct: number | null      # Pixel-diff % vs Playwright baseline
+      is_new: bool                          # true = no prior committed baseline; auto-captured+committed this run
+      baseline_diff_pct: number | null      # Pixel-diff % vs committed baseline (null for non-playwright frameworks)
 ```
 ## Output Contract
@@ -167,9 +167,10 @@ If no design source PNGs exist for the changed pages, skip this phase.
 For each screenshot in `e2e_screenshots[]`:
 1. **Read the PNG via the Read tool** (Claude multimodal — the PNG is shown to the model directly). Do not use Bash to inspect bytes.
-2. **Check baseline regression** (`baseline_diff_pct != null`):
-   - If `baseline_diff_pct > 0.1%`: emit finding `{category: 'baseline_regression', severity: 'critical', file: {path}, screenshot: {path}, issue: 'Pixel diff vs committed baseline: {diff_pct}%', suggestion: 'Inspect the diff PNG (same folder, -diff suffix). Either fix the regression or, if intentional, run `playwright test --update-snapshots` and commit the new baseline.'}`
-   - Do NOT auto-update baselines. The user must explicitly approve via QA.
+2. **Check new vs changed baseline**:
+   - `is_new === true`: screenshot was auto-captured and committed this run (no prior baseline). Review semantically only — no regression to flag. Populate `screenshot_review.new_screens_reviewed`.
+   - `is_new === false` AND `baseline_diff_pct > 0.1%`: emit finding `{category: 'baseline_regression', severity: 'critical', file: {path}, screenshot: {path}, issue: 'Pixel diff vs committed baseline: {diff_pct}%', suggestion: 'Inspect the diff PNG (same folder, -diff suffix). Either fix the regression or, if intentional, run `playwright test --update-snapshots` and commit the new baseline.'}`
+   - Do NOT auto-update changed baselines. The user must explicitly approve via QA.
 3. **Semantic review of rendered output** (both new screens and existing):
    - **Text overflow / truncation** — text clipped, ellipsis in unintended places, buttons cut off
    - **Unstyled elements** — unbranded default fonts, missing styles (flash of unstyled content captured), default blue links
@@ -237,7 +238,8 @@ Go beyond fixing violations — actively improve visual quality. If spacing coul
 - Token compliance checked
 - Spacing consistency verified
 - **All `e2e_screenshots` reviewed** (when provided) — rendered output checked for overflow, unstyled elements, missing imagery, contrast, layout breaks, loading/error artifacts, design-source fidelity
-- Baseline regressions surfaced (never auto-accepted)
+- New-screen baselines reviewed semantically (`is_new === true` — auto-committed, no user gate)
+- Changed-baseline regressions surfaced (never auto-accepted; `is_new === false` AND diff > threshold)
 - Critical/warning issues auto-fixed where possible (styling only, in-scope only)
 - Findings categorized by severity
@@ -258,5 +260,5 @@ Go beyond fixing violations — actively improve visual quality. If spacing coul
 - **Also invoked by**: `/cbp-checkpoint-check` with screenshots aggregated from a whole-checkpoint e2e run
 - **Consumes**: `e2e_screenshots[]` aggregated from `round.context.e2e_outputs[*].screenshots` (populated by the `cbp-e2e-*` specialists at `/cbp-round-execute` Step 5)
 - **Output written to**: `round.context.frontend_ui_review` — when invoked twice per round, the second invocation merges with the first
-- **Downstream gate**: this skill emits `findings[]` only. Baseline-regression findings surface as a BLOCKING gate at `/cbp-round-end` Step 7 (baselines never auto-accepted); rendered-visual critical findings are surfaced in the Step 7 findings presentation.
+- **Downstream gate**: this skill emits `findings[]` only. Changed-baseline-regression findings (`is_new === false`) surface as a BLOCKING gate at `/cbp-round-end` Step 7 (never auto-accepted); new-screen baselines (`is_new === true`) are auto-committed and reviewed semantically only; rendered-visual critical findings are surfaced in the Step 7 findings presentation.
 - **Paired with**: `frontend-design` (pre-implementation aesthetic decision), `frontend-ux` (interaction-quality self-review, also Step 3.8)

package/templates/skills/cbp-round-execute/SKILL.md CHANGED Viewed

@@ -184,11 +184,35 @@ Input contracts: `cbp-testing-qa-agent` receives `executor_output`, `testing_pro
 ### Step 5b: Post-E2E Screenshot Review (cbp-frontend-ui Phase 6.5)
-Aggregate screenshots across ALL specialists that ran: `screenshots = Object.values(round.context.e2e_outputs ?? {}).flatMap(o => o.screenshots ?? [])`. When the aggregated list is non-empty, invoke the `cbp-frontend-ui` skill with `phase: 'screenshot_review'` (input: `files_changed`, `e2e_screenshots: <aggregated screenshots>`, `context: { checkpoint_goal, round_requirements }`). Under this phase the skill runs only Phase 6.5 (Rendered-Output Visual Review) + 7 + 8 — Phases 1-6 (style) already ran inline at executor Step 3.8 with `phase: 'style_only'`.
+Aggregate across ALL specialists that ran:
-Persist findings to `round.context.frontend_ui_review` (merge with Step 3.8's style-only output if present). Baseline-regression findings surface as a BLOCKING gate at `/cbp-round-end` Step 7 (an explicit accept-or-fix user decision; baselines are NEVER auto-accepted); rendered_visual critical findings are surfaced in the Step 7 findings presentation. Neither auto-fails the round. cbp-testing-qa-agent does NOT read these findings (full independence per Step 5).
+```js
+screenshots = Object.values(round.context.e2e_outputs ?? {}).flatMap(o => o.screenshots ?? []);
+e2e_gallery  = Object.values(round.context.e2e_outputs ?? {}).flatMap(o => o.e2e_gallery  ?? []);
+```
+**Auto-new baseline handling**: for each entry in `e2e_gallery` where `is_new === true`, the
+specialist has already run `git add <committed_path>`. No additional user gate is needed.
+**Changed-baseline handling**: entries where `is_new === false` AND `baseline_diff_pct > threshold`
+are `visual_regression` — do NOT auto-accept; surface as blocking gate at Step 7.
+Persist `e2e_gallery` to `round.context.e2e_gallery` (additive alongside existing
+`round.context.e2e_outputs`). This field is consumed by TASK-3 / checkpoint-end for DB upload.
+Note: `e2e_gallery[]` is aggregated and persisted regardless of whether `cbp-frontend-ui` runs — the empty-gallery enforcement lives in `cbp-task-check` Phase 4, while the `screenshots[]` visual review (frontend-ui Phase 6.5) is a separate concern gated on `screenshots[]` being non-empty.
+When the aggregated `screenshots` list is non-empty, invoke the `cbp-frontend-ui` skill with
+`phase: 'screenshot_review'` (input: `files_changed`, `e2e_screenshots: <aggregated screenshots>`,
+`context: { checkpoint_goal, round_requirements }`). Under this phase the skill runs only
+Phase 6.5 (Rendered-Output Visual Review) + 7 + 8 — Phases 1-6 (style) already ran at Step 3.8.
+Persist findings to `round.context.frontend_ui_review` (merge with Step 3.8's style-only output
+if present). Baseline-regression findings surface as a BLOCKING gate at `/cbp-round-end` Step 7
+(an explicit accept-or-fix user decision; changed baselines are NEVER auto-accepted);
+rendered_visual critical findings are surfaced in the Step 7 findings presentation. Neither
+auto-fails the round. cbp-testing-qa-agent does NOT read these findings (full independence).
-**Skip** when `round.context.e2e_outputs` is absent/empty, the aggregated `screenshots` list is empty, or `testing_profile === 'claude_only'`.
+**Skip** when `round.context.e2e_outputs` is absent/empty, the aggregated `screenshots` list
+is empty, or `testing_profile === 'claude_only'`.
 ### Step 6: Hard-Fail Routing
@@ -215,9 +239,9 @@ When `cbp-testing-qa-agent` spawn fails OR the resolved `testing_profile` is `cl
 Update round context via MCP `update_round` / `update_standalone_round` per KIND:
-- `context`: { ...existing, executor_output, testing_qa_output, e2e_eligible, e2e_outputs, frontend_ui_review }
+- `context`: { ...existing, executor_output, testing_qa_output, e2e_eligible, e2e_outputs, e2e_gallery, frontend_ui_review }
-`e2e_outputs` (a framework-keyed map of specialist outputs, e.g. `{ playwright: {...}, maestro: {...} }`) and `frontend_ui_review` are present only when the gates above admitted them (≥1 eligible framework ran AND Step 5b ran). `e2e_eligible[]` records which frameworks were eligible this round and drives the Step 6 `e2e_eligible_skipped` check.
+`e2e_outputs` (a framework-keyed map of specialist outputs, e.g. `{ playwright: {...}, maestro: {...} }`), `e2e_gallery` (aggregated flat array of committed-PNG entries across all specialists — consumed by TASK-3 / checkpoint-end for DB upload), and `frontend_ui_review` are present only when the gates above admitted them (≥1 eligible framework ran AND Step 5b ran). `e2e_eligible[]` records which frameworks were eligible this round and drives the Step 6 `e2e_eligible_skipped` check.
 ### Step 8: Auto-trigger Round End
@@ -234,13 +258,13 @@ Trigger `/cbp-round-end`.
 - `testing_profile` from `task.context` governs which checks run — read it once in Step 2; pass to every testing-qa + e2e specialist spawn
 - `claude_only` profile skips all agent spawns (testing-qa AND `cbp-e2e-*`); runs hook syntax and skill structure checks inline
 - E2E dispatch is **config-driven and opt-out** (`.codebyplan/e2e.json`), not gated on `has_ui_work`/`testing_profile` — an eligible framework that silently does not run is an `e2e_eligible_skipped` hard-fail (`rules/e2e-mandatory.md`)
-- Step 5b (cbp-frontend-ui Phase 6.5) runs only when e2e produced screenshots — gated on the aggregated `e2e_outputs[*].screenshots[]` being non-empty
+- Step 5b (cbp-frontend-ui Phase 6.5) runs only when e2e produced screenshots — gated on the aggregated `e2e_outputs[*].screenshots[]` being non-empty; `e2e_gallery[]` is always aggregated and persisted when any specialist ran
 - Claude NEVER git adds files in round commands
 ## Integration
 - **Reads**: MCP `get_current_task` / `get_current_standalone_task`, `get_rounds` / `get_standalone_rounds` (per KIND)
-- **Writes**: MCP `update_round` / `update_standalone_round` (context with executor_output + testing_qa_output + e2e_eligible + e2e_outputs + frontend_ui_review) — per KIND
+- **Writes**: MCP `update_round` / `update_standalone_round` (context with executor_output + testing_qa_output + e2e_eligible + e2e_outputs + e2e_gallery + frontend_ui_review) — per KIND
 - **Spawns**: `cbp-round-executor` (per wave or single), `cbp-testing-qa-agent` (per wave, parallel sibling of the `cbp-e2e-*` specialists), the `cbp-e2e-*` specialists (config-driven dispatch per `context/testing/e2e.md`, one per eligible framework in `.codebyplan/e2e.json`), `cbp-database-agent` (if DB work), `cbp-security-agent` (if security review needed)
 - **Skill invocations**: `cbp-frontend-ui` at Step 5b with `phase: 'screenshot_review'` (post-e2e)
 - **Triggers**: `/cbp-round-end` (auto)