npm - pi-agent-browser-native - Versions diffs - 0.2.37 → 0.2.39 - Mend

pi-agent-browser-native 0.2.37 → 0.2.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/CHANGELOG.md +22 -0
package/README.md +19 -6
package/docs/ARCHITECTURE.md +4 -4
package/docs/COMMAND_REFERENCE.md +15 -12
package/docs/RELEASE.md +21 -9
package/docs/SUPPORT_MATRIX.md +10 -8
package/docs/TOOL_CONTRACT.md +1 -1
package/docs/platform-smoke.md +176 -0
package/extensions/agent-browser/lib/playbook.ts +2 -2
package/extensions/agent-browser/lib/process.ts +72 -13
package/package.json +16 -4
package/platform-smoke.config.mjs +18 -0
package/scripts/agent-browser-capability-baseline.mjs +9 -6
package/scripts/platform-smoke/artifacts.mjs +94 -0
package/scripts/platform-smoke/browser-dogfood-windows.ps1 +110 -0
package/scripts/platform-smoke/crabbox-runner.mjs +149 -0
package/scripts/platform-smoke/doctor.mjs +307 -0
package/scripts/platform-smoke/linux-image/Dockerfile +23 -0
package/scripts/platform-smoke/platform-build-windows.ps1 +103 -0
package/scripts/platform-smoke/targets.mjs +471 -0
package/scripts/platform-smoke.mjs +161 -0

package/docs/platform-smoke.md ADDED Viewed

@@ -0,0 +1,176 @@
+# Platform smoke testing
+`pi-agent-browser-native` uses a Crabbox-backed local platform smoke gate to prove the package on macOS, Ubuntu Linux, and native Windows before release.
+This is a release-blocking gate. Missing Crabbox setup, Docker, macOS SSH, the native Windows template, upstream `agent-browser`, or browser runtime dependencies is a blocked release setup, not a skipped pass.
+## Required release gate
+Run the cheap harness checks first, then the full matrix:
+```sh
+npm run check:platform-smoke
+npm run smoke:platform:ubuntu-image
+npm run smoke:platform:all
+```
+`smoke:platform:all` runs `smoke:platform:doctor` before any target suite starts. The canonical `npm run verify -- release` gate also runs the same platform doctor and full `macos,ubuntu,windows-native` matrix after default verification and packaged Pi smoke, so `npm publish` cannot pass `prepublishOnly` without the platform gate.
+Per-target commands are for diagnosis:
+```sh
+npm run smoke:platform:macos
+npm run smoke:platform:ubuntu
+npm run smoke:platform:windows-native
+npm run verify -- platform-smoke run --target ubuntu --suite platform-build
+```
+## Targets
+| Target | Crabbox provider | Shell contract | Release status |
+| --- | --- | --- | --- |
+| `macos` | `ssh` static localhost | POSIX shell on macOS | Required |
+| `ubuntu` | `local-container` | POSIX shell in a Docker-compatible local container | Required |
+| `windows-native` | `parallels` | native Windows PowerShell over OpenSSH | Required |
+## Required environment
+Install Crabbox on the macOS maintainer host and keep it on `PATH`:
+```sh
+brew install openclaw/tap/crabbox
+crabbox --version
+crabbox providers
+```
+Use `PLATFORM_SMOKE_CRABBOX=/path/to/crabbox` only when testing a non-default Crabbox binary.
+Standard configuration knobs:
+```sh
+PLATFORM_SMOKE_MAC_HOST=localhost
+PLATFORM_SMOKE_MAC_USER="$USER"
+PLATFORM_SMOKE_MAC_WORK_ROOT="/Users/$USER/crabbox/pi-agent-browser-native"
+# Default local image built by npm run smoke:platform:ubuntu-image.
+PLATFORM_SMOKE_UBUNTU_IMAGE="pi-agent-browser-native-platform:node24-agent-browser0.27.1"
+PLATFORM_SMOKE_WINDOWS_VM="pi-extension-windows-template"
+PLATFORM_SMOKE_WINDOWS_SNAPSHOT="crabbox-ready"
+PLATFORM_SMOKE_WINDOWS_USER="<windows-ssh-user>"
+PLATFORM_SMOKE_WINDOWS_WORK_ROOT="C:\\crabbox\\pi-agent-browser-native"
+# Optional: names of secret env vars to redact/forward if future live suites need them.
+PLATFORM_SMOKE_AUTH_ENV=""
+```
+The Ubuntu target image is derived from `node:24-bookworm`, installs `agent-browser@0.27.1`, installs Debian Chromium through apt, creates a non-root `circleci` user, and sets `AGENT_BROWSER_EXECUTABLE_PATH=/usr/bin/chromium`. Rebuild it after upstream rebaselining, or override `PLATFORM_SMOKE_UBUNTU_IMAGE` with an equivalent prepared local image. Do not install `agent-browser` ad hoc inside the Ubuntu smoke command.
+The configured upstream `agent-browser` baseline is imported from [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs). Target-local browser suites verify that exact `agent-browser` version before running. Bake the exact upstream CLI and browser runtime into the Windows template/snapshot for speed and reproducibility; missing or stale Windows `agent-browser` / browser readiness is a blocked setup, not something the smoke command repairs. The Windows browser suite checks the preinstalled browser cache and prewarms one short local file URL before the extension harness runs.
+## Target setup expectations
+Crabbox does not install project runtime tools. The macOS host, Ubuntu image, and Windows template must already provide:
+- Node/npm at or above the configured Node major baseline in [`platform-smoke.config.mjs`](../platform-smoke.config.mjs).
+- Git and `tar`.
+- Upstream `agent-browser` matching this wrapper’s capability baseline. The Ubuntu target gets it from [`scripts/platform-smoke/linux-image/Dockerfile`](../scripts/platform-smoke/linux-image/Dockerfile); the Windows template gets it from the shared `pi-extension-windows-template` / `crabbox-ready` snapshot.
+- Browser/runtime dependencies needed by upstream `agent-browser`.
+- Native PowerShell and OpenSSH Server on Windows.
+For Windows, reuse `pi-extension-windows-template` with the shared canonical `crabbox-ready` power-off snapshot. Do not create one-off project VMs. If a reusable tool is missing, update the shared template, verify from a fresh SSH session, remove caches/secrets/checkouts, shut down cleanly, and promote a known-good power-off snapshot.
+## What the suites prove
+Each required target runs `platform-build` and `browser-dogfood-smoke` on one Crabbox lease, serially.
+### `platform-build`
+1. Verify the target Node major version.
+2. Run `npm ci` in the synced checkout.
+3. Run `npm run verify -- platform-target`, a fast target-local gate covering generated docs, TypeScript, package/platform harness tests, and runtime planning. The full unit/fake suite still runs once in the host default gate before the release matrix starts; target-local smoke must not duplicate that full suite on every OS. Browser subprocess behavior is then exercised by the target-local `browser-dogfood-smoke` suite against the real upstream binary.
+4. Run `npm pack`.
+5. Create a clean target-local Pi project.
+6. Install the packed tarball with `npm install --no-save`.
+7. Run `pi install -l ./node_modules/pi-agent-browser-native` from the clean project.
+8. Run `pi list` and assert the package is registered from the packed install.
+9. Assert the release proof did not use `pi -e .` or `pi --extension .`.
+### `browser-dogfood-smoke`
+1. Run `npm ci` in the synced checkout if needed.
+2. Run the deterministic model-free browser smoke through `scripts/verify-agent-browser-dogfood.ts`.
+3. Exercise native wrapper surfaces against the deterministic local file fixture from `scripts/verify-agent-browser-dogfood.ts`: top-level `qa`, `semanticAction`, constrained `job`, screenshot artifact verification, and session close.
+4. Persist the dogfood JSON report and stdout/stderr evidence.
+5. Fail on missing browser artifacts, failed tool calls, leaked secrets, or unclosed sessions.
+The dogfood suite intentionally uses the checkout harness while `platform-build` proves packed Pi installation. Together they catch OS-specific packaging, install, path, process, browser, and wrapper bugs without using an LLM.
+## Artifact contract
+Every target suite writes host-side evidence under:
+```text
+.artifacts/platform-smoke/<run-id>/<target>/<suite>/
+```
+Required files include:
+```text
+summary.json
+artifact-manifest.json
+target.json
+suite.json
+command.txt
+exit-code.txt
+crabbox.stdout.txt
+crabbox.stderr.txt
+crabbox.timing.json
+assertions.json
+failures.md            # only when assertions fail
+```
+`platform-build` also writes:
+```text
+node-version.txt
+packed-tarball.txt
+packed-node-install.stdout.txt
+packed-node-install.stderr.txt
+pi-install.stdout.txt
+pi-install.stderr.txt
+pi-list.stdout.txt
+pi-list.stderr.txt
+```
+`browser-dogfood-smoke` also writes:
+```text
+node-version.txt
+dogfood-artifacts.txt
+dogfood.stdout.txt
+dogfood.stderr.txt
+dogfood-report.json
+```
+Each target also writes a `lease-cleanup` artifact directory with `crabbox.stop.*` files. Cleanup failures are failing test results. Ubuntu and Windows runs also invoke Crabbox cleanup for stale direct-provider state after stopping the owned lease.
+Passing suites must satisfy:
+```text
+summary.ok === assertions.ok
+artifact-manifest.missing.length === 0
+```
+The harness redacts configured secret values and token-like text from persisted artifacts, then fails if a redaction scan still finds raw secrets.
+## Source of truth
+- Config: [`platform-smoke.config.mjs`](../platform-smoke.config.mjs)
+- CLI: [`scripts/platform-smoke.mjs`](../scripts/platform-smoke.mjs)
+- Crabbox wrapper: [`scripts/platform-smoke/crabbox-runner.mjs`](../scripts/platform-smoke/crabbox-runner.mjs)
+- Target commands/assertions: [`scripts/platform-smoke/targets.mjs`](../scripts/platform-smoke/targets.mjs)
+- Platform doctor: [`scripts/platform-smoke/doctor.mjs`](../scripts/platform-smoke/doctor.mjs)
+- Artifact helpers: [`scripts/platform-smoke/artifacts.mjs`](../scripts/platform-smoke/artifacts.mjs)
+- Windows build suite: [`scripts/platform-smoke/platform-build-windows.ps1`](../scripts/platform-smoke/platform-build-windows.ps1)
+- Windows browser suite: [`scripts/platform-smoke/browser-dogfood-windows.ps1`](../scripts/platform-smoke/browser-dogfood-windows.ps1)

package/extensions/agent-browser/lib/playbook.ts CHANGED Viewed

@@ -30,7 +30,7 @@ export const QUICK_START_GUIDELINES = [
 ] as const;
 export const BRAVE_SEARCH_PROMPT_GUIDELINE =
-	"When a non-empty BRAVE_API_KEY is available in the current environment, prefer the Brave Search API via bash/curl to discover specific destination URLs, then open the chosen URL with agent_browser instead of browsing a search engine results page just to find the target.";
+	"With BRAVE_API_KEY set, use Brave Search via bash/curl to find exact destination URLs, then open the chosen URL with agent_browser; do not browse search results just to locate a target.";
 export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
 	"Standard workflow: open the page, snapshot -i, interact using current @refs from that snapshot, and re-snapshot after navigation, scrolling, rerendering, or other major DOM changes because refs are page-scoped; the wrapper fails mutation-prone stale/recycled refs before upstream can silently target a different current-page element.",
@@ -51,7 +51,7 @@ export const SHARED_BROWSER_PLAYBOOK_GUIDELINES = [
 	"For Electron desktop apps, prefer top-level electron for wrapper-owned discovery, isolated launch, status, compact probe, and cleanup: list first, treat likely-sensitive annotations as hints rather than enforcement, launch with the default snapshot handoff unless handoff: \"tabs\" is the safer diagnostic starting point, use electron.probe or snapshot -i/qa.attached for current-session state, and always cleanup the returned launchId when done. electron.launch uses an isolated temporary profile; it does not reuse the app's normal signed-in profile or attach to an already-running authenticated app. For signed-in local app state, host-launch the normal app with --remote-debugging-port when appropriate, then use raw args connect <port|url>; after connect, inspect tab list, select the stable tab id such as tab t2, then run a condition wait or snapshot -i before using refs. close commands (`close`, `quit`, or `exit`) only close the browser/CDP session; leave manually launched app shutdown, profile cleanup, and explicit artifacts to the host owner.",
 	"For provider or specialized app workflows, load version-matched upstream guidance with skills get agentcore|electron|slack|dogfood|vercel-sandbox through the native tool; add --full when you need references/templates, and use skills get --all only for broad skill audits. Provider launches such as -p ios, --provider browserbase/kernel/browseruse/browserless/agentcore, and iOS --device are upstream-owned setup paths; use sessionMode fresh when switching providers and expect external credentials or local Appium/Xcode setup to be required.",
 	"For dialogs and frames, use dialog status/accept/dismiss and frame <selector|main> through native args; when --confirm-actions produces a pending confirmation, use details.nextActions or exact confirm <id> / deny <id> calls instead of inventing ids.",
-	"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For headed demos, put --headed on the first launch with sessionMode=fresh and verify with screenshot/tab/get-url evidence because tool success cannot prove the OS window is visible to the user. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.27.0, use wait --fn predicates instead of stale upstream-help examples like wait <selector> --state hidden. Use electron.probe/status for wrapper-owned launch health or target mismatch. Fixed waits are a last resort, must stay below the wrapper IPC budget (wait 30000 is intentionally blocked), and a successful payload like \"waited\":\"timeout\" means elapsed time only—verify completion with an observed condition, fresh snapshot, or screenshot.",
+	"If a session lands on the wrong page or tab, an interaction changes origin unexpectedly, or an open call returns blocked, blank, or otherwise unexpected results, use tab list / tab <tab-id-or-label> / snapshot -i to recover state before retrying different URLs or fallback strategies. For headed demos, put --headed on the first launch with sessionMode=fresh and verify with screenshot/tab/get-url evidence because tool success cannot prove the OS window is visible to the user. For desktop readiness, prefer real conditions first: wait --text, wait --url, wait --fn, wait --load <state>, wait --download, or qa.attached; for disappearance checks in agent-browser 0.27.1, use wait --fn predicates instead of stale upstream-help examples like wait <selector> --state hidden. Use electron.probe/status for wrapper-owned launch health or target mismatch. Fixed waits are a last resort, must stay below the wrapper IPC budget (wait 30000 is intentionally blocked), and a successful payload like \"waited\":\"timeout\" means elapsed time only—verify completion with an observed condition, fresh snapshot, or screenshot.",
 	"For feed, timeline, or inbox reading tasks, focus on the main timeline/list region and read the first item there rather than unrelated composer or sidebar content.",
 	"For read-only browsing tasks, prefer extracting the answer from the current snapshot, structured ref labels, or eval --stdin on the current page before navigating away. Only click into media viewers, detail routes, or new pages when the current view does not contain the needed information.",
 	"For downloads, prefer download <selector> <path> when an element click should save a file. Do not rely on click alone when you need the downloaded file on disk.",

package/extensions/agent-browser/lib/process.ts CHANGED Viewed

@@ -1,15 +1,16 @@
 /**
  * Purpose: Execute the upstream agent-browser binary for the pi-agent-browser extension.
- * Responsibilities: Spawn the agent-browser subprocess without a shell, forward a curated environment surface, stream optional stdin, bound in-memory output buffering, spill oversized stdout safely to a private temp file under a disk budget, and honor abort signals.
+ * Responsibilities: Spawn the agent-browser subprocess, forward a curated environment surface, stream optional stdin, bound in-memory output buffering, spill oversized stdout safely to a private temp file under a disk budget, and honor abort signals.
  * Scope: Process execution only; argument planning, output formatting, and pi tool registration live elsewhere.
  * Usage: Called by the extension tool after argument validation and session planning are complete.
- * Invariants/Assumptions: The binary name is always `agent-browser`, the wrapper never shells out, and callers handle semantic success/error interpretation.
+ * Invariants/Assumptions: The binary name is always `agent-browser`; Windows routes through PowerShell to invoke npm launchers with escaped argv; callers handle semantic success/error interpretation.
  */
 import { type ChildProcessWithoutNullStreams, spawn } from "node:child_process";
 import { chmod, mkdir } from "node:fs/promises";
 import { env as processEnv, platform as processPlatform } from "node:process";
+import { GLOBAL_BOOLEAN_FLAGS_WITH_OPTIONAL_VALUES, GLOBAL_VALUE_FLAGS, getFlagName } from "./argv-grammar.js";
 import { openSecureTempFile, writeSecureTempChunk } from "./temp.js";
 const MAX_BUFFERED_STDOUT_BYTES = 512 * 1_024;
@@ -107,6 +108,52 @@ function appendTail(text: string, addition: string, maxChars: number): string {
 	return combined.length <= maxChars ? combined : combined.slice(combined.length - maxChars);
 }
+function quoteWindowsPowerShellArg(value: string): string {
+	return `'${value.replace(/'/g, "''")}'`;
+}
+const WINDOWS_LEADING_GLOBAL_VALUE_FLAGS = new Set<string>(GLOBAL_VALUE_FLAGS);
+/** Exported for unit tests that lock Windows launcher argv ordering. */
+export function reorderWindowsLeadingGlobalArgs(args: string[]): string[] {
+	const leadingGlobals: string[] = [];
+	let index = 0;
+	while (index < args.length && args[index]?.startsWith("-")) {
+		const token = args[index];
+		const flagName = getFlagName(token);
+		leadingGlobals.push(token);
+		index += 1;
+		if (WINDOWS_LEADING_GLOBAL_VALUE_FLAGS.has(flagName) && !token.includes("=") && index < args.length) {
+			leadingGlobals.push(args[index]);
+			index += 1;
+			continue;
+		}
+		if (GLOBAL_BOOLEAN_FLAGS_WITH_OPTIONAL_VALUES.has(flagName) && ["true", "false"].includes(args[index] ?? "")) {
+			leadingGlobals.push(args[index]);
+			index += 1;
+		}
+	}
+	if (leadingGlobals.length === 0 || index >= args.length) return args;
+	return [args[index], ...leadingGlobals, ...args.slice(index + 1)];
+}
+function buildAgentBrowserSpawnCommand(args: string[]): { command: string; args: string[] } {
+	if (processPlatform !== "win32") {
+		return { command: "agent-browser", args };
+	}
+	const commandLine = ["&", "agent-browser", ...reorderWindowsLeadingGlobalArgs(args).map(quoteWindowsPowerShellArg)].join(" ");
+	return { command: "powershell.exe", args: ["-NoLogo", "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", commandLine] };
+}
+function terminateSpawnedChild(child: ChildProcessWithoutNullStreams, signal: NodeJS.Signals): void {
+	if (processPlatform === "win32" && child.pid) {
+		const killer = spawn("taskkill.exe", ["/PID", String(child.pid), "/T", "/F"], { stdio: "ignore" });
+		killer.on("error", () => undefined);
+		killer.unref();
+	}
+	child.kill(signal);
+}
 /** Exported for unit tests that lock subprocess exit-code precedence. */
 export function resolveSpawnedChildExitCode(input: {
 	closeCode?: number | null;
@@ -234,17 +281,27 @@ async function ensureAgentBrowserSocketDir(socketDir: string): Promise<boolean>
 	}
 }
+function getChildEnvName(name: string): string | undefined {
+	if (processPlatform === "win32") {
+		const upperName = name.toUpperCase();
+		if (INHERITED_ENV_NAMES.has(upperName)) return upperName;
+		return INHERITED_ENV_PREFIXES.some((prefix) => upperName.startsWith(prefix)) ? upperName : undefined;
+	}
+	if (INHERITED_ENV_NAMES.has(name) || INHERITED_ENV_PREFIXES.some((prefix) => name.startsWith(prefix))) {
+		return name;
+	}
+	return undefined;
+}
 export function buildAgentBrowserProcessEnv(
 	baseEnv: NodeJS.ProcessEnv = processEnv,
 	overrides: NodeJS.ProcessEnv | undefined = undefined,
 ): NodeJS.ProcessEnv {
 	const childEnv: NodeJS.ProcessEnv = {};
 	for (const [name, value] of Object.entries(baseEnv)) {
-		if (
-			value !== undefined &&
-			(INHERITED_ENV_NAMES.has(name) || INHERITED_ENV_PREFIXES.some((prefix) => name.startsWith(prefix)))
-		) {
-			childEnv[name] = value;
+		const childName = getChildEnvName(name);
+		if (value !== undefined && childName) {
+			childEnv[childName] = value;
 		}
 	}
@@ -254,10 +311,11 @@ export function buildAgentBrowserProcessEnv(
 	}
 	for (const [name, value] of Object.entries(overrides)) {
+		const childName = getChildEnvName(name) ?? name;
 		if (value === undefined) {
-			delete childEnv[name];
+			delete childEnv[childName];
 		} else {
-			childEnv[name] = value;
+			childEnv[childName] = value;
 		}
 	}
 	clampUpstreamDefaultTimeout(childEnv);
@@ -371,7 +429,8 @@ export async function runAgentBrowserProcess(options: {
 			});
 		};
-		const child = spawn("agent-browser", args, {
+		const spawnCommand = buildAgentBrowserSpawnCommand(args);
+		const child = spawn(spawnCommand.command, spawnCommand.args, {
 			cwd,
 			env: buildAgentBrowserProcessEnv(processEnv, effectiveEnv),
 			stdio: ["pipe", "pipe", "pipe"],
@@ -384,15 +443,15 @@ export async function runAgentBrowserProcess(options: {
 			} else {
 				timedOut = true;
 			}
-			child.kill("SIGTERM");
+			terminateSpawnedChild(child, "SIGTERM");
 			killTimer = setTimeout(() => {
-				child.kill("SIGKILL");
+				terminateSpawnedChild(child, "SIGKILL");
 			}, 2_000);
 		};
 		const recordStdinError = (error: unknown) => {
 			const stdinError = error instanceof Error ? error : new Error(String(error));
 			const errorCode = (stdinError as NodeJS.ErrnoException).code;
-			if (errorCode === "EPIPE" || errorCode === "ERR_STREAM_DESTROYED") {
+			if (errorCode === "EPIPE" || errorCode === "EOF" || errorCode === "ERR_STREAM_DESTROYED") {
 				return;
 			}
 			if (!spawnError) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-agent-browser-native",
-  "version": "0.2.37",
+  "version": "0.2.39",
   "description": "pi extension that exposes agent-browser as a native tool for browser automation",
   "type": "module",
   "author": "Mitch Fultz (https://github.com/fitchmultz)",
@@ -31,8 +31,11 @@
   },
   "files": [
     "extensions",
+    "platform-smoke.config.mjs",
     "scripts/doctor.mjs",
     "scripts/agent-browser-capability-baseline.mjs",
+    "scripts/platform-smoke.mjs",
+    "scripts/platform-smoke",
     "README.md",
     "CHANGELOG.md",
     "LICENSE",
@@ -40,6 +43,7 @@
     "docs/COMMAND_REFERENCE.md",
     "docs/ELECTRON.md",
     "docs/RELEASE.md",
+    "docs/platform-smoke.md",
     "docs/REQUIREMENTS.md",
     "docs/SUPPORT_MATRIX.md",
     "docs/TOOL_CONTRACT.md"
@@ -56,9 +60,9 @@
     "typebox": "*"
   },
   "devDependencies": {
-    "@earendil-works/pi-ai": "^0.76.0",
-    "@earendil-works/pi-coding-agent": "^0.76.0",
-    "@earendil-works/pi-tui": "^0.76.0",
+    "@earendil-works/pi-ai": "^0.78.0",
+    "@earendil-works/pi-coding-agent": "^0.78.0",
+    "@earendil-works/pi-tui": "^0.78.0",
     "@types/node": "^25.6.1",
     "tsx": "^4.21.0",
     "typebox": "^1.1.38",
@@ -71,6 +75,14 @@
     "docs": "node ./scripts/project.mjs docs",
     "doctor": "node ./scripts/doctor.mjs",
     "benchmark:agent-browser": "node ./scripts/agent-browser-efficiency-benchmark.mjs",
+    "check:platform-smoke": "node --check platform-smoke.config.mjs && node --check scripts/platform-smoke.mjs && node --check scripts/platform-smoke/doctor.mjs && node --check scripts/platform-smoke/crabbox-runner.mjs && node --check scripts/platform-smoke/targets.mjs && node --check scripts/platform-smoke/artifacts.mjs && tsx --test test/platform-smoke.test.ts",
+    "smoke:platform": "node scripts/platform-smoke.mjs",
+    "smoke:platform:doctor": "node scripts/platform-smoke.mjs doctor",
+    "smoke:platform:ubuntu-image": "docker build -t pi-agent-browser-native-platform:node24-agent-browser0.27.1 --build-arg AGENT_BROWSER_VERSION=0.27.1 -f scripts/platform-smoke/linux-image/Dockerfile .",
+    "smoke:platform:macos": "node scripts/platform-smoke.mjs run --target macos",
+    "smoke:platform:ubuntu": "node scripts/platform-smoke.mjs run --target ubuntu",
+    "smoke:platform:windows-native": "node scripts/platform-smoke.mjs run --target windows-native",
+    "smoke:platform:all": "npm run smoke:platform:doctor && node scripts/platform-smoke.mjs run --target macos,ubuntu,windows-native",
     "typecheck": "node ./scripts/project.mjs verify typecheck",
     "test": "tsx --test test/**/*.test.ts",
     "verify": "node ./scripts/project.mjs verify",

package/platform-smoke.config.mjs ADDED Viewed

@@ -0,0 +1,18 @@
+// Platform smoke configuration for pi-agent-browser-native.
+// Crabbox owns the target lease/sync loop; this file is the project source of truth for release-blocking platform coverage.
+import { CAPABILITY_BASELINE } from "./scripts/agent-browser-capability-baseline.mjs";
+export default {
+	packageName: "pi-agent-browser-native",
+	artifactRoot: ".artifacts/platform-smoke",
+	requiredTargets: ["macos", "ubuntu", "windows-native"],
+	requiredSuites: ["platform-build", "browser-dogfood-smoke"],
+	requiredCrabbox: {
+		install: "Homebrew package or PLATFORM_SMOKE_CRABBOX override",
+		minVersion: "0.24.0",
+	},
+	ubuntuContainerImage: "pi-agent-browser-native-platform:node24-agent-browser0.27.1",
+	nodeValidationMajor: 22,
+	agentBrowserVersion: CAPABILITY_BASELINE.targetVersion,
+};

package/scripts/agent-browser-capability-baseline.mjs CHANGED Viewed

@@ -14,8 +14,8 @@ export const COMMAND_REFERENCE_BASELINE_BLOCK_IDS = Object.freeze(["upstream-bas
 const sourceEvidence = Object.freeze({
   repository: "vercel-labs/agent-browser",
-  upstreamHead: "4ad284890cb59564af603e6de403dd75dd19e832",
-  upstreamPackageVersion: "0.27.0",
+  upstreamHead: "90050f2913159875e2c3719e424746396ccb3cbf",
+  upstreamPackageVersion: "0.27.1",
   inspectedSources: Object.freeze([
     "agent-browser --version",
     "agent-browser --help",
@@ -349,7 +349,8 @@ const inventorySections = Object.freeze([
       "diff screenshot --baseline <file> --output <file> --threshold <0-1> --selector <sel> --full",
       "diff url <u1> <u2>",
       "diff url <u1> <u2> --screenshot --wait-until <strategy> --selector <sel> --compact --depth <n>",
-      "trace start|stop [path]",
+      "trace start",
+      "trace stop [path]",
       "profiler start|stop [path]",
       "record start <path> [url]",
       "record restart <path> [url]",
@@ -386,7 +387,8 @@ const inventorySections = Object.freeze([
       root("storage <local|session>"),
       root("diff snapshot"),
       root("diff screenshot --baseline"),
-      root("trace start|stop [path]"),
+      root("trace start"),
+      root("trace stop [path]"),
       root("profiler start|stop [path]"),
       root("record start <path> [url]"),
       root("record stop"),
@@ -422,7 +424,8 @@ const inventorySections = Object.freeze([
       ["diff help", "--threshold <0-1>"],
       ["diff help", "--wait-until <strategy>"],
       ["diff help", "diff screenshot --baseline <f>"],
-      ["trace help", "trace <operation> [path]"],
+      ["trace help", "trace start"],
+      ["trace help", "trace stop [path]"],
       ["profiler help", "--categories <list>"],
       ["record help", "record restart <path.webm> [url]"],
       ["console help", "--clear"],
@@ -703,7 +706,7 @@ const inventorySections = Object.freeze([
 ]);
 export const CAPABILITY_BASELINE = Object.freeze({
-  targetVersion: "0.27.0",
+  targetVersion: "0.27.1",
   sourceEvidence,
   helpCommands,
   inventorySections,

package/scripts/platform-smoke/artifacts.mjs ADDED Viewed

@@ -0,0 +1,94 @@
+/** Artifact helpers for platform smoke suites. */
+import { existsSync, mkdirSync, readdirSync, readFileSync, writeFileSync } from "node:fs";
+import { relative, resolve } from "node:path";
+export function createSuiteDir(artifactRoot, runId, targetName, suiteName) {
+	const dir = resolve(process.cwd(), artifactRoot, runId, targetName, suiteName);
+	mkdirSync(dir, { recursive: true });
+	return dir;
+}
+export function writeCommand(dir, command) {
+	writeFileSync(resolve(dir, "command.txt"), `${command}\n`);
+}
+export function writeExitCode(dir, code, signal) {
+	writeFileSync(resolve(dir, "exit-code.txt"), `code=${code}\nsignal=${signal ?? "none"}\n`);
+}
+export function writeSummary(dir, data) {
+	writeFileSync(resolve(dir, "summary.json"), JSON.stringify({ ...data, writtenAt: new Date().toISOString() }, null, 2));
+}
+export function writeManifest(dir, expectedFiles) {
+	const present = [];
+	function walk(current) {
+		for (const entry of readdirSync(current, { withFileTypes: true })) {
+			const path = resolve(current, entry.name);
+			if (entry.isDirectory()) walk(path);
+			else if (entry.isFile()) present.push(relative(dir, path));
+		}
+	}
+	if (existsSync(dir)) walk(dir);
+	const allPresent = [...new Set([...present, "artifact-manifest.json"])].sort();
+	const manifest = {
+		expected: expectedFiles,
+		present: allPresent,
+		missing: expectedFiles.filter((file) => !allPresent.includes(file)),
+		writtenAt: new Date().toISOString(),
+	};
+	writeFileSync(resolve(dir, "artifact-manifest.json"), JSON.stringify(manifest, null, 2));
+	return manifest;
+}
+export function collectSecretValues(envNames, env = process.env) {
+	return [...new Set(envNames.map((name) => env[name]).filter((value) => typeof value === "string" && value.length >= 8))];
+}
+export function redactSecrets(text, secretValues = []) {
+	let redacted = String(text ?? "");
+	for (const secret of secretValues) {
+		redacted = redacted.split(secret).join("[REDACTED_SECRET]");
+	}
+	return redacted;
+}
+export function scanForSecrets(text, secretValues = []) {
+	const content = String(text ?? "");
+	const violations = [];
+	for (const secret of secretValues) {
+		if (secret && content.includes(secret)) violations.push("raw forwarded secret value");
+	}
+	for (const [pattern, label] of [
+		[/bearer\s+[A-Za-z0-9\-._~+/]{20,}=*/gi, "bearer token"],
+		[/Authorization:\s*Bearer\s+[A-Za-z0-9\-._~+/]{20,}=*/gi, "authorization header"],
+		[/(?:api[_-]?key|access[_-]?token|refresh[_-]?token|cookie)\s*[:=]\s*["']?[A-Za-z0-9_./+\-=]{20,}/gi, "token-like field"],
+	]) {
+		if (pattern.test(content)) violations.push(label);
+	}
+	return [...new Set(violations)];
+}
+export function scanArtifactTextFiles(dir, secretValues = []) {
+	const findings = [];
+	function walk(current) {
+		for (const entry of readdirSync(current, { withFileTypes: true })) {
+			const path = resolve(current, entry.name);
+			if (entry.isDirectory()) {
+				walk(path);
+				continue;
+			}
+			if (!entry.isFile()) continue;
+			if (!/\.(?:txt|json|jsonl|md|log|ps1|mjs|js)$/i.test(entry.name)) continue;
+			try {
+				const text = readFileSync(path, "utf8");
+				for (const violation of scanForSecrets(text, secretValues)) findings.push({ file: relative(dir, path), violation });
+			} catch {
+				// Ignore unreadable or non-text files.
+			}
+		}
+	}
+	walk(dir);
+	return findings;
+}