pi-agent-browser-native 0.2.43 → 0.2.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/README.md +6 -1
- package/docs/RELEASE.md +10 -4
- package/docs/SUPPORT_MATRIX.md +1 -1
- package/docs/platform-smoke.md +13 -8
- package/package.json +4 -4
- package/platform-smoke.config.mjs +10 -1
- package/scripts/doctor.mjs +70 -1
- package/scripts/platform-smoke/crabbox-runner.mjs +57 -29
- package/scripts/platform-smoke/doctor.mjs +22 -9
- package/scripts/platform-smoke/targets.mjs +58 -21
- package/scripts/platform-smoke.mjs +1 -0
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,17 @@
|
|
|
4
4
|
|
|
5
5
|
No changes yet.
|
|
6
6
|
|
|
7
|
+
## 0.2.44 - 2026-06-04
|
|
8
|
+
|
|
9
|
+
### Changed
|
|
10
|
+
|
|
11
|
+
- Updated the local Pi development baseline to `@earendil-works/*` `0.78.1` after reviewing the installed Pi 0.78.1 changelog, docs, examples, and extension source. The audit found no runtime migration needed for `ctx.mode` or command-only `ctx.getSystemPromptOptions()`, and kept the public peer dependency ranges non-pinning.
|
|
12
|
+
- Extended the read-only package doctor with a warning-only `pi --version` check so release validation can catch a Pi CLI older than the audited 0.78.1 floor without making Pi 0.78.1 a hard runtime requirement.
|
|
13
|
+
|
|
14
|
+
### Validation
|
|
15
|
+
|
|
16
|
+
- Ran checkout-based interactive `tmux` Pi dogfood with `pi --no-extensions --no-skills -e .` on Pi 0.78.1: `agent_browser` opened and snapshotted `https://example.com`, ran a QA preset against `https://react.dev` expecting `React`, saved and verified a screenshot, reported no console/network/page errors, closed the browser session, and cleaned the temp artifact directory.
|
|
17
|
+
|
|
7
18
|
## 0.2.43 - 2026-06-04
|
|
8
19
|
|
|
9
20
|
### Added
|
package/README.md
CHANGED
|
@@ -74,6 +74,8 @@ The result is optimized for agent work:
|
|
|
74
74
|
|
|
75
75
|
## Fastest way to try it
|
|
76
76
|
|
|
77
|
+
Use Pi 0.78.1 or newer when possible. This package does not hard-pin Pi 0.78.1 as a runtime requirement, but the current release is audited and validated against that extension/package baseline.
|
|
78
|
+
|
|
77
79
|
Install upstream `agent-browser` first and make sure it is on `PATH`:
|
|
78
80
|
|
|
79
81
|
- https://agent-browser.dev/
|
|
@@ -142,6 +144,7 @@ The doctor checks:
|
|
|
142
144
|
|
|
143
145
|
- upstream `agent-browser` exists on `PATH`
|
|
144
146
|
- the installed upstream version matches this wrapper's command-reference baseline
|
|
147
|
+
- `pi --version` meets the recommended Pi floor for this release, as a warning rather than a hard failure
|
|
145
148
|
- Pi settings do not point at multiple active `pi-agent-browser-native` sources
|
|
146
149
|
|
|
147
150
|
It does **not** edit Pi settings and does **not** run upstream `agent-browser doctor --fix`.
|
|
@@ -575,10 +578,11 @@ Cross-platform release coverage uses Crabbox to run macOS, Ubuntu Linux, and nat
|
|
|
575
578
|
```bash
|
|
576
579
|
npm run check:platform-smoke
|
|
577
580
|
npm run smoke:platform:ubuntu-image
|
|
581
|
+
npm run smoke:platform:doctor
|
|
578
582
|
npm run smoke:platform:all
|
|
579
583
|
```
|
|
580
584
|
|
|
581
|
-
The required matrix is documented in [`docs/platform-smoke.md`](docs/platform-smoke.md). It runs `platform-build` (fast target-local verify, pack, clean packed Pi install, `pi list`) and `browser-dogfood-smoke` (real `agent-browser`/browser wrapper smoke) on every target.
|
|
585
|
+
The required matrix is documented in [`docs/platform-smoke.md`](docs/platform-smoke.md). It runs `platform-build` (fast target-local verify, pack, clean packed Pi install, `pi list`) and `browser-dogfood-smoke` (real `agent-browser`/browser wrapper smoke) on every target. Inspect `.artifacts/platform-smoke/` and check `crabbox list --provider local-container` plus `crabbox list --provider parallels` after release runs so cleanup proof is not chat-only.
|
|
582
586
|
|
|
583
587
|
For package release confidence, follow [`docs/RELEASE.md`](docs/RELEASE.md). The release gate is:
|
|
584
588
|
|
|
@@ -586,6 +590,7 @@ For package release confidence, follow [`docs/RELEASE.md`](docs/RELEASE.md). The
|
|
|
586
590
|
npm run doctor
|
|
587
591
|
npm run check:platform-smoke
|
|
588
592
|
npm run smoke:platform:ubuntu-image
|
|
593
|
+
npm run smoke:platform:doctor
|
|
589
594
|
npm run verify -- release
|
|
590
595
|
```
|
|
591
596
|
|
package/docs/RELEASE.md
CHANGED
|
@@ -26,10 +26,11 @@ npm install
|
|
|
26
26
|
npm run doctor
|
|
27
27
|
npm run check:platform-smoke
|
|
28
28
|
npm run smoke:platform:ubuntu-image
|
|
29
|
+
npm run smoke:platform:doctor
|
|
29
30
|
npm run verify -- release
|
|
30
31
|
```
|
|
31
32
|
|
|
32
|
-
`npm run doctor` is a read-only first-run diagnostic for PATH, targeted upstream version, and duplicate package/checkout source conflicts. It does not replace upstream `agent-browser doctor` for browser runtime health and does not edit Pi settings.
|
|
33
|
+
`npm run doctor` is a read-only first-run diagnostic for PATH, targeted upstream version, the recommended Pi release floor, and duplicate package/checkout source conflicts. The Pi version check is a warning, not a hard runtime requirement. It does not replace upstream `agent-browser doctor` for browser runtime health and does not edit Pi settings.
|
|
33
34
|
|
|
34
35
|
`npm run verify -- release` runs:
|
|
35
36
|
|
|
@@ -50,12 +51,17 @@ npm run verify -- dogfood
|
|
|
50
51
|
For direct Crabbox diagnostics outside the full release compose, run:
|
|
51
52
|
|
|
52
53
|
```bash
|
|
53
|
-
npm run
|
|
54
|
+
npm run check:platform-smoke
|
|
54
55
|
npm run smoke:platform:ubuntu-image
|
|
56
|
+
npm run smoke:platform:doctor
|
|
55
57
|
npm run smoke:platform:all
|
|
58
|
+
crabbox list --provider local-container
|
|
59
|
+
crabbox list --provider parallels
|
|
56
60
|
```
|
|
57
61
|
|
|
58
|
-
|
|
62
|
+
The Crabbox gate is only green when suite assertions and artifact manifests under `.artifacts/platform-smoke/` are green and no unexpected lease/clone remains.
|
|
63
|
+
|
|
64
|
+
The deterministic dogfood mode uses the extension harness and the real `agent-browser` on `PATH` against a deterministic local file fixture, then verifies top-level `qa`, `semanticAction`, constrained `job`, screenshot artifact verification, and session close. Use `npm run verify -- dogfood --keep-artifacts` or `--artifact-dir <path>` only while debugging, then delete retained screenshots. This smoke complements, but does not replace, human-readable interactive transcript evidence.
|
|
59
65
|
|
|
60
66
|
Every release also requires interactive `tmux`-driven Pi dogfood with the native `agent_browser` tool against real sites. For extension-focused release smokes, use `pi --no-extensions --no-skills -e .` from the checkout before publish so auto-loaded dogfood/QA skills cannot replace the bounded smoke workflow; run separate skill-enabled dogfood only when validating skill routing or report-generation behavior. Drive prompts with `tmux send-keys`, exercise at least one simple static site and one real documentation/product site, include the higher-level `qa` or `job`/`batch` surfaces when they changed, close every opened browser session, remove screenshots/temp artifacts, and record the outcome in the release notes or support-matrix evidence. Automated localhost, fake-upstream, and deterministic dogfood gates do not replace this human-readable live-site transcript evidence. When `agent_browser_web_search` or package config changed, add one key-free smoke proving the optional tool is absent without config, one fake/unit-backed smoke in the default suite, and one opt-in live Exa or Brave Search check with a real key while confirming the key does not appear in transcripts, stdout/stderr, config status, PR text, or artifacts. When `electron.*` surfaces, attached-session diagnostics, or `qa.attached` changed, add a local Electron pass: `electron.list` → `electron.launch` (expect isolated profile behavior) → `snapshot -i` or `electron.probe` / `qa.attached` → `electron.cleanup` with the returned `launchId`, verifying status/mismatch guidance if you simulate a dead renderer or stale refs. For dense-dashboard stress coverage, use the [public Grafana stress checklist](#public-grafana-stress-checklist) below; it is a maintainer workflow, not bundled product skill or recipe runtime.
|
|
61
67
|
|
|
@@ -227,7 +233,7 @@ These show up often in cloud dev boxes and scripted smokes; they are maintainer
|
|
|
227
233
|
|
|
228
234
|
| Topic | What to watch for | Mitigation |
|
|
229
235
|
| --- | --- | --- |
|
|
230
|
-
| **Pi CLI vs repo devDependencies** | Global `pi` older than the
|
|
236
|
+
| **Pi CLI vs repo devDependencies** | Global `pi` older than the recommended Pi floor for the release can change TUI behavior, `/reload`, package installs, and tool routing during lifecycle or checkout smokes. | Run `npm run doctor` and align `pi` with the current audited baseline before release gates (`pi update` or install the matching version). The published peer range stays non-pinning; the local release gate should use the audited Pi version. |
|
|
231
237
|
| **npm lockfile (`packageManager`)** | `package.json` pins **npm@11**. npm 10 may only strip optional `libc` metadata on `@esbuild/*` platform entries in `package-lock.json` (no dependency version change). | Prefer `npx -y npm@11.14.0 install` when refreshing the lockfile; do not commit npm-10-only lockfile churn. |
|
|
232
238
|
| **`pi -p` / print mode** | Non-interactive `pi -p` may hang or emit no stdout for long real-browser smokes without a TTY. | Use **tmux**-driven interactive `pi` for release evidence and checkout smokes; reserve `-p` for short, non-browser checks. |
|
|
233
239
|
| **Real-browser cleanup** | `real-upstream`, Sauce Demo, and live-site runs can leave defunct Chrome/`agent-browser` children if a session aborts mid-flow. | Close via `agent_browser` / `agent-browser` `close`, kill stray tmux sessions, and remove temp screenshots/HARs under `/tmp` or your chosen artifact dirs. |
|
package/docs/SUPPORT_MATRIX.md
CHANGED
|
@@ -64,7 +64,7 @@ Re-run the gates below before each release; this table records what the closure
|
|
|
64
64
|
| Packaged Pi smoke | `npm run verify -- package-pi` validates package contents, loads the packaged `agent_browser` tool without requiring optional Brave config, and executes fake-upstream `--version`. | Pass on 2026-06-03 as part of `npm run verify -- release` (`npm run verify -- package-pi` slice). |
|
|
65
65
|
| Deterministic dogfood smoke | `npm run verify -- dogfood` (`scripts/verify-agent-browser-dogfood.ts`) drives the native wrapper against a local file fixture through top-level `qa`, `semanticAction`, constrained `job`, screenshot artifact verification, and session close with the real `agent-browser` on `PATH`. | Pass on 2026-06-03 (`npm run verify -- dogfood`, `agent-browser 0.27.1`; artifacts cleaned by the harness). |
|
|
66
66
|
| Efficiency benchmark | `npm run verify -- benchmark` runs deterministic browser workflow accounting plus focused benchmark tests, including JSONL sampling fixtures and job/qa/sourceLookup/networkSourceLookup/Electron scenario coverage. | Pass on 2026-05-29 (`npm run verify -- benchmark`). |
|
|
67
|
-
| Crabbox platform smoke | `npm run check:platform-smoke` syntax-checks the harness and cheap invariants. `npm run smoke:platform:all` runs doctor first, then fast target-local `platform-build` (`npm run verify -- platform-target`, pack, clean Pi install) plus `browser-dogfood-smoke` on Crabbox `macos`, `ubuntu`, and `windows-native`; see [`platform-smoke.md`](platform-smoke.md). | Pass on 2026-06-03 (`npm run check:platform-smoke`, `npm run smoke:platform:ubuntu-image`, and `npm run verify -- release`, whose platform slice ran the macOS/Ubuntu/native-Windows Crabbox matrix; artifacts cleaned after evidence capture). |
|
|
67
|
+
| Crabbox platform smoke | `npm run check:platform-smoke` syntax-checks the harness and cheap invariants. `npm run smoke:platform:ubuntu-image` builds the project-owned Linux image, `npm run smoke:platform:doctor` checks Crabbox 0.26.0+ and local target readiness, and `npm run smoke:platform:all` runs doctor first, then fast target-local `platform-build` (`npm run verify -- platform-target`, pack, clean Pi install) plus `browser-dogfood-smoke` on Crabbox `macos`, `ubuntu`, and `windows-native`; see [`platform-smoke.md`](platform-smoke.md). Target artifacts include Crabbox/provider/work-root metadata, and release review also checks provider-specific `crabbox list` commands for leftover leases/clones. | Pass on 2026-06-03 (`npm run check:platform-smoke`, `npm run smoke:platform:ubuntu-image`, and `npm run verify -- release`, whose platform slice ran the macOS/Ubuntu/native-Windows Crabbox matrix; artifacts cleaned after evidence capture). |
|
|
68
68
|
| `verify -- release` / `prepublishOnly` | `npm run verify -- release` chains the default gate with packaged Pi smoke and the release-blocking Crabbox platform matrix (`verifySteps` `release` in [`scripts/project.mjs`](../scripts/project.mjs)). `package.json` `prepublishOnly` runs that compose before `npm pack --dry-run` during `npm publish`. It intentionally omits standalone lifecycle, real-upstream, host-only dogfood, and benchmark modes—see [`RELEASE.md`](RELEASE.md#pre-release-checks). | Pass on 2026-06-03 (`npm run verify -- release`, including macOS/Ubuntu/native-Windows Crabbox matrix). |
|
|
69
69
|
| Configured-source lifecycle | `npm run verify -- lifecycle` (`scripts/verify-lifecycle.mjs`) drives `/reload`, closes and relaunches Pi with the same exact `--session-id`, checks the JSONL session header id, session continuity, slash-command sentinel tokens (`v1` then `v2` after rewriting the packaged extension to simulate pickup), persisted spill reachability, and real Pi `tool_result` failure-patch semantics for a QA reclassification with a fake upstream on `PATH`. Default Pi model is `zai/glm-5.1`; default per-step wait is **180000 ms** (`DEFAULT_TIMEOUT_MS`); override model with `--model <id>` and waits with `--timeout-ms <ms>`. Passthrough flags in [`scripts/project.mjs`](../scripts/project.mjs): `--keep-artifacts`, `--model`, `--verbose`, and `--timeout-ms` plus a value (for example `npm run verify -- lifecycle --model openai-codex/gpt-5.5:minimal --keep-artifacts --verbose --timeout-ms 600000`). | Pass on 2026-06-03 (`npm run verify -- lifecycle`). Treat any future unexplained red lifecycle gate as a release blocker. |
|
|
70
70
|
| Quick isolated Pi smoke | `pi --no-extensions --no-skills -e . --tools agent_browser` from repo root; native `agent_browser` only. | Last interactive tmux checkout smoke pass on 2026-05-29 (`agent-browser 0.27.0` at the time). The 2026-06-03 Crabbox matrix now covers clean packed Pi install plus deterministic wrapper dogfood on all required platforms for `agent-browser 0.27.1`; run a new manual tmux smoke before publish when human-readable transcript evidence is required. Broader historical coverage also includes version/help/skills, open/snapshot/click, eval stdin, batch stdin, screenshot, explicit session, `sessionMode: "fresh"`, network requests, console/errors, diff snapshot, stream status/disable, dashboard start/stop, and chat credential-failure pass-through during RQ-0055. |
|
package/docs/platform-smoke.md
CHANGED
|
@@ -6,15 +6,18 @@ This is a release-blocking gate. Missing Crabbox setup, Docker, macOS SSH, the n
|
|
|
6
6
|
|
|
7
7
|
## Required release gate
|
|
8
8
|
|
|
9
|
-
Run the cheap harness checks first, then the full matrix:
|
|
9
|
+
Run the cheap harness checks first, build the project-owned Ubuntu image, run doctor explicitly, then run the full matrix and inspect the evidence:
|
|
10
10
|
|
|
11
11
|
```sh
|
|
12
12
|
npm run check:platform-smoke
|
|
13
13
|
npm run smoke:platform:ubuntu-image
|
|
14
|
+
npm run smoke:platform:doctor
|
|
14
15
|
npm run smoke:platform:all
|
|
16
|
+
crabbox list --provider local-container
|
|
17
|
+
crabbox list --provider parallels
|
|
15
18
|
```
|
|
16
19
|
|
|
17
|
-
`smoke:platform:all` runs `smoke:platform:doctor` before any target suite starts. The canonical `npm run verify -- release` gate also runs the same platform doctor and full `macos,ubuntu,windows-native` matrix after default verification and packaged Pi smoke, so `npm publish` cannot pass `prepublishOnly` without the platform gate.
|
|
20
|
+
`smoke:platform:all` also runs `smoke:platform:doctor` before any target suite starts, so the explicit doctor step is a readable release checklist step rather than a hidden precondition. The canonical `npm run verify -- release` gate also runs the same platform doctor and full `macos,ubuntu,windows-native` matrix after default verification and packaged Pi smoke, so `npm publish` cannot pass `prepublishOnly` without the platform gate. After the matrix, inspect `.artifacts/platform-smoke/<run-id>/...` summaries and manifests; a green Crabbox exit without matching suite assertions is not release proof. Use provider-specific `crabbox list` commands for cleanup review because this host may have unrelated Crabbox providers configured that require credentials.
|
|
18
21
|
|
|
19
22
|
Per-target commands are for diagnosis:
|
|
20
23
|
|
|
@@ -43,7 +46,7 @@ crabbox --version
|
|
|
43
46
|
crabbox providers
|
|
44
47
|
```
|
|
45
48
|
|
|
46
|
-
Use `PLATFORM_SMOKE_CRABBOX=/path/to/crabbox` only when testing a non-default Crabbox binary.
|
|
49
|
+
Use Crabbox `0.26.0` or newer. Use `PLATFORM_SMOKE_CRABBOX=/path/to/crabbox` only when testing a non-default Crabbox binary.
|
|
47
50
|
|
|
48
51
|
Standard configuration knobs:
|
|
49
52
|
|
|
@@ -51,6 +54,8 @@ Standard configuration knobs:
|
|
|
51
54
|
PLATFORM_SMOKE_MAC_HOST=localhost
|
|
52
55
|
PLATFORM_SMOKE_MAC_USER="$USER"
|
|
53
56
|
PLATFORM_SMOKE_MAC_WORK_ROOT="/Users/$USER/crabbox/pi-agent-browser-native"
|
|
57
|
+
# Optional only when localhost SSH does not use port 22.
|
|
58
|
+
PLATFORM_SMOKE_MAC_PORT=22
|
|
54
59
|
|
|
55
60
|
# Default local image built by npm run smoke:platform:ubuntu-image.
|
|
56
61
|
PLATFORM_SMOKE_UBUNTU_IMAGE="pi-agent-browser-native-platform:node24-agent-browser0.27.1"
|
|
@@ -64,7 +69,7 @@ PLATFORM_SMOKE_WINDOWS_WORK_ROOT="C:\\crabbox\\pi-agent-browser-native"
|
|
|
64
69
|
PLATFORM_SMOKE_AUTH_ENV=""
|
|
65
70
|
```
|
|
66
71
|
|
|
67
|
-
The Ubuntu target image is derived from `node:24-bookworm`, installs `agent-browser@0.27.1`, installs Debian Chromium through apt, creates a non-root `circleci` user, and sets `AGENT_BROWSER_EXECUTABLE_PATH=/usr/bin/chromium`. Rebuild it after upstream rebaselining, or override `PLATFORM_SMOKE_UBUNTU_IMAGE` with an equivalent prepared local image. Do not install `agent-browser` ad hoc inside the Ubuntu smoke command.
|
|
72
|
+
The Ubuntu target image is derived from `node:24-bookworm`, installs `agent-browser@0.27.1`, installs Debian Chromium through apt, creates a non-root `circleci` user, and sets `AGENT_BROWSER_EXECUTABLE_PATH=/usr/bin/chromium`. Rebuild it after upstream rebaselining, or override `PLATFORM_SMOKE_UBUNTU_IMAGE` with an equivalent prepared local image. Do not install `agent-browser` ad hoc inside the Ubuntu smoke command; a missing tool is image/template drift.
|
|
68
73
|
|
|
69
74
|
The configured upstream `agent-browser` baseline is imported from [`scripts/agent-browser-capability-baseline.mjs`](../scripts/agent-browser-capability-baseline.mjs). Target-local browser suites verify that exact `agent-browser` version before running. Bake the exact upstream CLI and browser runtime into the Windows template/snapshot for speed and reproducibility; missing or stale Windows `agent-browser` / browser readiness is a blocked setup, not something the smoke command repairs. The Windows browser suite checks the preinstalled browser cache and prewarms one short local file URL before the extension harness runs.
|
|
70
75
|
|
|
@@ -78,7 +83,7 @@ Crabbox does not install project runtime tools. The macOS host, Ubuntu image, an
|
|
|
78
83
|
- Browser/runtime dependencies needed by upstream `agent-browser`.
|
|
79
84
|
- Native PowerShell and OpenSSH Server on Windows.
|
|
80
85
|
|
|
81
|
-
For Windows, reuse `pi-extension-windows-template` with the shared canonical `crabbox-ready` power-off snapshot. Do not create one-off project VMs. If a reusable tool is missing, update the shared template, verify from a fresh SSH session, remove caches/secrets/checkouts, shut down cleanly, and promote a known-good power-off snapshot.
|
|
86
|
+
For Windows, reuse `pi-extension-windows-template` with the shared canonical `crabbox-ready` power-off snapshot configured in [`platform-smoke.config.mjs`](../platform-smoke.config.mjs). Do not create one-off project VMs or run tests directly on the source VM. If a reusable tool is missing, update the shared template, verify from a fresh SSH session, remove caches/secrets/checkouts, shut down cleanly, and promote a known-good power-off snapshot.
|
|
82
87
|
|
|
83
88
|
## What the suites prove
|
|
84
89
|
|
|
@@ -108,7 +113,7 @@ The dogfood suite intentionally uses the checkout harness while `platform-build`
|
|
|
108
113
|
|
|
109
114
|
## Artifact contract
|
|
110
115
|
|
|
111
|
-
Every target
|
|
116
|
+
Every target run writes host-side evidence under one run id shared by that target’s suites:
|
|
112
117
|
|
|
113
118
|
```text
|
|
114
119
|
.artifacts/platform-smoke/<run-id>/<target>/<suite>/
|
|
@@ -117,9 +122,9 @@ Every target suite writes host-side evidence under:
|
|
|
117
122
|
Required files include:
|
|
118
123
|
|
|
119
124
|
```text
|
|
120
|
-
summary.json
|
|
125
|
+
summary.json # includes ok, target, suite, exit code, elapsed time, writtenAt
|
|
121
126
|
artifact-manifest.json
|
|
122
|
-
target.json
|
|
127
|
+
target.json # package, package version, Crabbox binary/version, provider, work root/image/template
|
|
123
128
|
suite.json
|
|
124
129
|
command.txt
|
|
125
130
|
exit-code.txt
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-agent-browser-native",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.44",
|
|
4
4
|
"description": "pi extension that exposes agent-browser as a native tool for browser automation",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"author": "Mitch Fultz (https://github.com/fitchmultz)",
|
|
@@ -62,9 +62,9 @@
|
|
|
62
62
|
"typebox": "*"
|
|
63
63
|
},
|
|
64
64
|
"devDependencies": {
|
|
65
|
-
"@earendil-works/pi-ai": "^0.78.
|
|
66
|
-
"@earendil-works/pi-coding-agent": "^0.78.
|
|
67
|
-
"@earendil-works/pi-tui": "^0.78.
|
|
65
|
+
"@earendil-works/pi-ai": "^0.78.1",
|
|
66
|
+
"@earendil-works/pi-coding-agent": "^0.78.1",
|
|
67
|
+
"@earendil-works/pi-tui": "^0.78.1",
|
|
68
68
|
"@types/node": "^25.6.1",
|
|
69
69
|
"tsx": "^4.21.0",
|
|
70
70
|
"typebox": "^1.1.38",
|
|
@@ -8,11 +8,20 @@ export default {
|
|
|
8
8
|
artifactRoot: ".artifacts/platform-smoke",
|
|
9
9
|
requiredTargets: ["macos", "ubuntu", "windows-native"],
|
|
10
10
|
requiredSuites: ["platform-build", "browser-dogfood-smoke"],
|
|
11
|
+
supportedTargets: ["macos", "ubuntu", "windows-native"],
|
|
11
12
|
requiredCrabbox: {
|
|
12
13
|
install: "Homebrew package or PLATFORM_SMOKE_CRABBOX override",
|
|
13
|
-
minVersion: "0.
|
|
14
|
+
minVersion: "0.26.0",
|
|
15
|
+
},
|
|
16
|
+
macos: {
|
|
17
|
+
host: "localhost",
|
|
18
|
+
port: 22,
|
|
14
19
|
},
|
|
15
20
|
ubuntuContainerImage: "pi-agent-browser-native-platform:node24-agent-browser0.27.1",
|
|
21
|
+
windowsParallels: {
|
|
22
|
+
sourceVm: "pi-extension-windows-template",
|
|
23
|
+
snapshot: "crabbox-ready",
|
|
24
|
+
},
|
|
16
25
|
nodeValidationMajor: 22,
|
|
17
26
|
agentBrowserVersion: CAPABILITY_BASELINE.targetVersion,
|
|
18
27
|
};
|
package/scripts/doctor.mjs
CHANGED
|
@@ -22,6 +22,7 @@ const PACKAGE_NAME = "pi-agent-browser-native";
|
|
|
22
22
|
const REPO_URL_FRAGMENT = "github.com/fitchmultz/pi-agent-browser-native";
|
|
23
23
|
const EXTENSION_ENTRYPOINT = "extensions/agent-browser/index.ts";
|
|
24
24
|
const EXPECTED_VERSION = CAPABILITY_BASELINE.targetVersion;
|
|
25
|
+
const RECOMMENDED_PI_VERSION = "0.78.1";
|
|
25
26
|
const DEFAULT_AGENT_DIR = resolve(homedir(), ".pi/agent");
|
|
26
27
|
const THIS_PACKAGE_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), "..");
|
|
27
28
|
|
|
@@ -29,6 +30,27 @@ export function normalizeAgentBrowserVersion(output) {
|
|
|
29
30
|
return String(output ?? "").trim().replace(/^agent-browser\s+/, "");
|
|
30
31
|
}
|
|
31
32
|
|
|
33
|
+
export function normalizePiVersion(output) {
|
|
34
|
+
return String(output ?? "").trim().replace(/^pi\s+/, "");
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function parseVersionParts(version) {
|
|
38
|
+
const match = String(version ?? "").match(/^(\d+)\.(\d+)\.(\d+)(?:\b|[-+])/);
|
|
39
|
+
if (!match) return undefined;
|
|
40
|
+
return match.slice(1).map((part) => Number.parseInt(part, 10));
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function versionAtLeast(actual, minimum) {
|
|
44
|
+
const actualParts = parseVersionParts(actual);
|
|
45
|
+
const minimumParts = parseVersionParts(minimum);
|
|
46
|
+
if (!actualParts || !minimumParts) return undefined;
|
|
47
|
+
for (let index = 0; index < minimumParts.length; index += 1) {
|
|
48
|
+
if (actualParts[index] > minimumParts[index]) return true;
|
|
49
|
+
if (actualParts[index] < minimumParts[index]) return false;
|
|
50
|
+
}
|
|
51
|
+
return true;
|
|
52
|
+
}
|
|
53
|
+
|
|
32
54
|
function printHelp() {
|
|
33
55
|
console.log(`pi-agent-browser-doctor
|
|
34
56
|
|
|
@@ -45,7 +67,8 @@ Options:
|
|
|
45
67
|
Checks:
|
|
46
68
|
1. agent-browser is installed on PATH.
|
|
47
69
|
2. agent-browser --version matches the package capability baseline.
|
|
48
|
-
3.
|
|
70
|
+
3. pi --version is at least the recommended Pi floor for this release.
|
|
71
|
+
4. Pi settings and repo-local autoload locations do not point at multiple active pi-agent-browser-native sources.
|
|
49
72
|
|
|
50
73
|
Examples:
|
|
51
74
|
pi-agent-browser-doctor
|
|
@@ -101,6 +124,11 @@ async function defaultRunAgentBrowser(args) {
|
|
|
101
124
|
return `${stdout}${stderr}`;
|
|
102
125
|
}
|
|
103
126
|
|
|
127
|
+
async function defaultRunPi(args) {
|
|
128
|
+
const { stdout, stderr } = await execFile("pi", args, { maxBuffer: 1024 * 1024 });
|
|
129
|
+
return `${stdout}${stderr}`;
|
|
130
|
+
}
|
|
131
|
+
|
|
104
132
|
async function defaultPathExists(path) {
|
|
105
133
|
try {
|
|
106
134
|
await access(path);
|
|
@@ -270,6 +298,43 @@ async function collectRepoLocalSources({ cwd, pathExists }) {
|
|
|
270
298
|
return sources;
|
|
271
299
|
}
|
|
272
300
|
|
|
301
|
+
async function checkPiVersion({ runPi }) {
|
|
302
|
+
try {
|
|
303
|
+
const rawOutput = await runPi(["--version"]);
|
|
304
|
+
const version = normalizePiVersion(rawOutput);
|
|
305
|
+
const supported = versionAtLeast(version, RECOMMENDED_PI_VERSION);
|
|
306
|
+
if (supported === false) {
|
|
307
|
+
return {
|
|
308
|
+
status: "warn",
|
|
309
|
+
title: `Pi ${RECOMMENDED_PI_VERSION} or newer is recommended; found ${version || "<empty>"}.`,
|
|
310
|
+
lines: [
|
|
311
|
+
"This package does not hard-pin Pi 0.78.1, but this release was audited against Pi 0.78.1 extension/package behavior.",
|
|
312
|
+
"Update Pi before release validation or lifecycle debugging if you see tool routing, /reload, exact-session, or package-install differences.",
|
|
313
|
+
],
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
if (supported === undefined) {
|
|
317
|
+
return {
|
|
318
|
+
status: "warn",
|
|
319
|
+
title: `Could not parse pi --version output: ${version || "<empty>"}.`,
|
|
320
|
+
lines: [`Pi ${RECOMMENDED_PI_VERSION} or newer is recommended for this release's validation baseline.`],
|
|
321
|
+
};
|
|
322
|
+
}
|
|
323
|
+
return { status: "pass", title: `Pi version is within the recommended baseline: ${version}`, lines: [] };
|
|
324
|
+
} catch (error) {
|
|
325
|
+
const code = error && typeof error === "object" ? error.code : undefined;
|
|
326
|
+
return {
|
|
327
|
+
status: "warn",
|
|
328
|
+
title: "Could not inspect pi --version.",
|
|
329
|
+
lines: [
|
|
330
|
+
`Pi ${RECOMMENDED_PI_VERSION} or newer is recommended for this release's validation baseline, but it is not hard-pinned as a runtime requirement.`,
|
|
331
|
+
"Make sure the same shell that launches pi can run `pi --version` when debugging lifecycle or package-install behavior.",
|
|
332
|
+
code && code !== "ENOENT" ? `Spawn error: ${String(code)}` : undefined,
|
|
333
|
+
].filter(Boolean),
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
|
|
273
338
|
async function checkAgentBrowserVersion({ runAgentBrowser }) {
|
|
274
339
|
try {
|
|
275
340
|
const rawOutput = await runAgentBrowser(["--version"]);
|
|
@@ -358,6 +423,7 @@ export async function evaluateDoctor(options = {}) {
|
|
|
358
423
|
const readText = options.readText ?? ((path) => readFile(path, "utf8"));
|
|
359
424
|
const pathExists = options.pathExists ?? defaultPathExists;
|
|
360
425
|
const runAgentBrowser = options.runAgentBrowser ?? defaultRunAgentBrowser;
|
|
426
|
+
const runPi = options.runPi ?? defaultRunPi;
|
|
361
427
|
const checks = [];
|
|
362
428
|
const failures = [];
|
|
363
429
|
const warnings = [];
|
|
@@ -366,6 +432,9 @@ export async function evaluateDoctor(options = {}) {
|
|
|
366
432
|
checks.push(versionCheck);
|
|
367
433
|
if (versionCheck.status === "fail") failures.push(versionCheck);
|
|
368
434
|
|
|
435
|
+
const piVersionCheck = await checkPiVersion({ runPi });
|
|
436
|
+
checks.push(piVersionCheck);
|
|
437
|
+
|
|
369
438
|
if (!options.skipSourceCheck) {
|
|
370
439
|
const sourceCheck = await checkPiSources({ cwd, agentDir, settingsPaths, readText, pathExists });
|
|
371
440
|
checks.push(sourceCheck);
|
|
@@ -14,49 +14,77 @@ function packageSlug(config = {}) {
|
|
|
14
14
|
return process.env.PLATFORM_SMOKE_PACKAGE_SLUG || config.packageName || "pi-agent-browser-native";
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
-
export function
|
|
17
|
+
export function describeTarget(targetName, config = {}) {
|
|
18
|
+
const slug = packageSlug(config);
|
|
18
19
|
switch (targetName) {
|
|
19
20
|
case "macos": {
|
|
20
21
|
const user = env("PLATFORM_SMOKE_MAC_USER") || env("USER");
|
|
21
|
-
const host = env("PLATFORM_SMOKE_MAC_HOST") || "localhost";
|
|
22
|
-
const
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
22
|
+
const host = env("PLATFORM_SMOKE_MAC_HOST") || config.macos?.host || "localhost";
|
|
23
|
+
const port = String(env("PLATFORM_SMOKE_MAC_PORT") || config.macos?.port || 22);
|
|
24
|
+
const workRoot = env("PLATFORM_SMOKE_MAC_WORK_ROOT") || config.macos?.workRoot || `/Users/${user}/crabbox/${slug}`;
|
|
25
|
+
return {
|
|
26
|
+
provider: "ssh",
|
|
27
|
+
crabboxTarget: "macos",
|
|
28
|
+
shell: "posix",
|
|
29
|
+
workRoot,
|
|
30
|
+
args: [
|
|
31
|
+
"--provider", "ssh",
|
|
32
|
+
"--target", "macos",
|
|
33
|
+
"--static-host", host,
|
|
34
|
+
"--static-user", user,
|
|
35
|
+
"--static-port", port,
|
|
36
|
+
"--static-work-root", workRoot,
|
|
37
|
+
],
|
|
38
|
+
};
|
|
31
39
|
}
|
|
32
40
|
case "ubuntu": {
|
|
33
41
|
const image = env("PLATFORM_SMOKE_UBUNTU_IMAGE") || config.ubuntuContainerImage || "pi-agent-browser-native-platform:node24-agent-browser0.27.1";
|
|
34
|
-
return
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
"
|
|
38
|
-
|
|
42
|
+
return {
|
|
43
|
+
provider: "local-container",
|
|
44
|
+
crabboxTarget: "linux",
|
|
45
|
+
shell: "posix",
|
|
46
|
+
image,
|
|
47
|
+
workRoot: config.localContainer?.workRoot || "/work/crabbox",
|
|
48
|
+
args: [
|
|
49
|
+
"--provider", "local-container",
|
|
50
|
+
"--target", "linux",
|
|
51
|
+
"--local-container-image", image,
|
|
52
|
+
],
|
|
53
|
+
};
|
|
39
54
|
}
|
|
40
55
|
case "windows-native": {
|
|
41
|
-
const vm = env("PLATFORM_SMOKE_WINDOWS_VM") || "pi-extension-windows-template";
|
|
42
|
-
const snapshot = env("PLATFORM_SMOKE_WINDOWS_SNAPSHOT") || "crabbox-ready";
|
|
43
|
-
const user = env("PLATFORM_SMOKE_WINDOWS_USER") || env("USER");
|
|
44
|
-
const workRoot = env("PLATFORM_SMOKE_WINDOWS_WORK_ROOT") || `C:\\crabbox\\${
|
|
45
|
-
return
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
"
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
56
|
+
const vm = env("PLATFORM_SMOKE_WINDOWS_VM") || config.windowsParallels?.sourceVm || "pi-extension-windows-template";
|
|
57
|
+
const snapshot = env("PLATFORM_SMOKE_WINDOWS_SNAPSHOT") || config.windowsParallels?.snapshot || "crabbox-ready";
|
|
58
|
+
const user = env("PLATFORM_SMOKE_WINDOWS_USER") || config.windowsParallels?.user || env("USER");
|
|
59
|
+
const workRoot = env("PLATFORM_SMOKE_WINDOWS_WORK_ROOT") || config.windowsParallels?.workRoot || `C:\\crabbox\\${slug}`;
|
|
60
|
+
return {
|
|
61
|
+
provider: "parallels",
|
|
62
|
+
crabboxTarget: "windows",
|
|
63
|
+
shell: "powershell",
|
|
64
|
+
workRoot,
|
|
65
|
+
windowsMode: "normal",
|
|
66
|
+
sourceVm: vm,
|
|
67
|
+
snapshot,
|
|
68
|
+
args: [
|
|
69
|
+
"--provider", "parallels",
|
|
70
|
+
"--target", "windows",
|
|
71
|
+
"--windows-mode", "normal",
|
|
72
|
+
"--parallels-source", vm,
|
|
73
|
+
"--parallels-source-snapshot", snapshot,
|
|
74
|
+
"--parallels-user", user,
|
|
75
|
+
"--parallels-work-root", workRoot,
|
|
76
|
+
],
|
|
77
|
+
};
|
|
54
78
|
}
|
|
55
79
|
default:
|
|
56
80
|
throw new Error(`unknown platform smoke target: ${targetName}`);
|
|
57
81
|
}
|
|
58
82
|
}
|
|
59
83
|
|
|
84
|
+
export function buildTargetBaseArgs(targetName, config = {}) {
|
|
85
|
+
return describeTarget(targetName, config).args;
|
|
86
|
+
}
|
|
87
|
+
|
|
60
88
|
export function leaseIdFor(targetName, slug) {
|
|
61
89
|
if (targetName === "macos") return "static_localhost";
|
|
62
90
|
return slug;
|
|
@@ -105,6 +105,17 @@ function checkForbiddenProjectFiles(failures) {
|
|
|
105
105
|
}
|
|
106
106
|
|
|
107
107
|
function crabboxProviders(cbox) {
|
|
108
|
+
const jsonOutput = silent(cbox, ["providers", "--json"]);
|
|
109
|
+
if (jsonOutput) {
|
|
110
|
+
try {
|
|
111
|
+
const parsed = JSON.parse(jsonOutput);
|
|
112
|
+
if (Array.isArray(parsed)) return parsed.map((provider) => provider.name ?? provider.id ?? provider.provider).filter(Boolean);
|
|
113
|
+
if (Array.isArray(parsed.providers)) return parsed.providers.map((provider) => provider.name ?? provider.id ?? provider.provider).filter(Boolean);
|
|
114
|
+
if (typeof parsed === "object" && parsed) return Object.keys(parsed.providers ?? parsed);
|
|
115
|
+
} catch {
|
|
116
|
+
// Fall through to text parsing for older or non-JSON provider output.
|
|
117
|
+
}
|
|
118
|
+
}
|
|
108
119
|
const output = silent(cbox, ["providers"]);
|
|
109
120
|
if (!output) return [];
|
|
110
121
|
return output.split(/\r?\n/)
|
|
@@ -212,9 +223,10 @@ export async function runDoctor(config) {
|
|
|
212
223
|
const ubuntuImage = env("PLATFORM_SMOKE_UBUNTU_IMAGE") || config?.ubuntuContainerImage || "pi-agent-browser-native-platform:node24-agent-browser0.27.1";
|
|
213
224
|
checkCrabboxProvider(cbox, ["--provider", "local-container", "--local-container-image", ubuntuImage], "ubuntu local-container", failures);
|
|
214
225
|
const macUser = env("PLATFORM_SMOKE_MAC_USER") || env("USER");
|
|
215
|
-
const macHost = env("PLATFORM_SMOKE_MAC_HOST") || "localhost";
|
|
216
|
-
const
|
|
217
|
-
|
|
226
|
+
const macHost = env("PLATFORM_SMOKE_MAC_HOST") || config?.macos?.host || "localhost";
|
|
227
|
+
const macPort = String(env("PLATFORM_SMOKE_MAC_PORT") || config?.macos?.port || 22);
|
|
228
|
+
const macRoot = env("PLATFORM_SMOKE_MAC_WORK_ROOT") || config?.macos?.workRoot || `/Users/${macUser}/crabbox/${packageName}`;
|
|
229
|
+
checkCrabboxProvider(cbox, ["--provider", "ssh", "--target", "macos", "--static-host", macHost, "--static-user", macUser, "--static-port", macPort, "--static-work-root", macRoot], "macOS ssh", failures);
|
|
218
230
|
}
|
|
219
231
|
|
|
220
232
|
console.log("\n── Docker / Ubuntu ──");
|
|
@@ -226,8 +238,9 @@ export async function runDoctor(config) {
|
|
|
226
238
|
|
|
227
239
|
console.log("\n── macOS SSH ──");
|
|
228
240
|
const sshUser = env("PLATFORM_SMOKE_MAC_USER") || env("USER");
|
|
229
|
-
const sshHost = env("PLATFORM_SMOKE_MAC_HOST") || "localhost";
|
|
230
|
-
const
|
|
241
|
+
const sshHost = env("PLATFORM_SMOKE_MAC_HOST") || config?.macos?.host || "localhost";
|
|
242
|
+
const sshPort = String(env("PLATFORM_SMOKE_MAC_PORT") || config?.macos?.port || 22);
|
|
243
|
+
const sshProbe = shell(`ssh -o BatchMode=yes -o ConnectTimeout=5 -o StrictHostKeyChecking=no -p ${sshPort} ${sshUser}@${sshHost} 'node --version && npm --version && git --version && agent-browser --version'`);
|
|
231
244
|
if (sshProbe) {
|
|
232
245
|
ok(`SSH ${sshUser}@${sshHost}: ${sshProbe.split(/\r?\n/).join(" | ")}`);
|
|
233
246
|
if (agentBrowserVersion && !sshProbe.includes(agentBrowserVersion)) fail(`macOS SSH agent-browser does not match expected ${agentBrowserVersion}`, failures);
|
|
@@ -241,10 +254,10 @@ export async function runDoctor(config) {
|
|
|
241
254
|
fail("prlctl not found", failures);
|
|
242
255
|
} else {
|
|
243
256
|
ok("prlctl found");
|
|
244
|
-
const vmName = env("PLATFORM_SMOKE_WINDOWS_VM") || "pi-extension-windows-template";
|
|
245
|
-
const snapshot = env("PLATFORM_SMOKE_WINDOWS_SNAPSHOT") || "crabbox-ready";
|
|
246
|
-
const user = env("PLATFORM_SMOKE_WINDOWS_USER") || env("USER");
|
|
247
|
-
const workRoot = env("PLATFORM_SMOKE_WINDOWS_WORK_ROOT") || `C:\\crabbox\\${packageName}`;
|
|
257
|
+
const vmName = env("PLATFORM_SMOKE_WINDOWS_VM") || config?.windowsParallels?.sourceVm || "pi-extension-windows-template";
|
|
258
|
+
const snapshot = env("PLATFORM_SMOKE_WINDOWS_SNAPSHOT") || config?.windowsParallels?.snapshot || "crabbox-ready";
|
|
259
|
+
const user = env("PLATFORM_SMOKE_WINDOWS_USER") || config?.windowsParallels?.user || env("USER");
|
|
260
|
+
const workRoot = env("PLATFORM_SMOKE_WINDOWS_WORK_ROOT") || config?.windowsParallels?.workRoot || `C:\\crabbox\\${packageName}`;
|
|
248
261
|
const list = shell("prlctl list -a --no-header 2>/dev/null");
|
|
249
262
|
if (!list) {
|
|
250
263
|
fail("prlctl list returned no VMs", failures);
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
/** Target/suite runner for pi-agent-browser-native platform smoke. */
|
|
2
2
|
|
|
3
|
+
import { execFileSync } from "node:child_process";
|
|
3
4
|
import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
4
5
|
import { dirname, resolve } from "node:path";
|
|
5
6
|
|
|
@@ -14,7 +15,7 @@ import {
|
|
|
14
15
|
writeManifest,
|
|
15
16
|
writeSummary,
|
|
16
17
|
} from "./artifacts.mjs";
|
|
17
|
-
import { cleanupStaleTargetState, runOnLease, stopLease, warmupLease } from "./crabbox-runner.mjs";
|
|
18
|
+
import { cleanupStaleTargetState, crabboxBin, describeTarget, runOnLease, stopLease, warmupLease } from "./crabbox-runner.mjs";
|
|
18
19
|
|
|
19
20
|
export function platformFor(targetName) {
|
|
20
21
|
return targetName === "windows-native" ? "powershell" : "posix";
|
|
@@ -38,6 +39,45 @@ function authEnvAllowList(config = {}) {
|
|
|
38
39
|
return names.map((name) => String(name).trim()).filter(Boolean);
|
|
39
40
|
}
|
|
40
41
|
|
|
42
|
+
function packageVersion() {
|
|
43
|
+
try {
|
|
44
|
+
return JSON.parse(readFileSync("package.json", "utf8")).version ?? null;
|
|
45
|
+
} catch {
|
|
46
|
+
return null;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function crabboxVersion() {
|
|
51
|
+
try {
|
|
52
|
+
return execFileSync(crabboxBin(), ["--version"], { encoding: "utf8", stdio: "pipe", timeout: 10_000 }).trim().split(/\r?\n/)[0] ?? null;
|
|
53
|
+
} catch {
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function targetEvidence(config, targetName, runId, slug) {
|
|
59
|
+
const target = describeTarget(targetName, config);
|
|
60
|
+
return {
|
|
61
|
+
targetName,
|
|
62
|
+
platform: platformFor(targetName),
|
|
63
|
+
runId,
|
|
64
|
+
slug,
|
|
65
|
+
packageName: config.packageName,
|
|
66
|
+
packageVersion: packageVersion(),
|
|
67
|
+
crabbox: {
|
|
68
|
+
binary: crabboxBin(),
|
|
69
|
+
version: crabboxVersion(),
|
|
70
|
+
provider: target.provider,
|
|
71
|
+
target: target.crabboxTarget,
|
|
72
|
+
workRoot: target.workRoot,
|
|
73
|
+
image: target.image,
|
|
74
|
+
windowsMode: target.windowsMode,
|
|
75
|
+
sourceVm: target.sourceVm,
|
|
76
|
+
snapshot: target.snapshot,
|
|
77
|
+
},
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
|
|
41
81
|
function writeRedacted(path, text, secretValues) {
|
|
42
82
|
writeFileSync(path, redactSecrets(text ?? "", secretValues));
|
|
43
83
|
}
|
|
@@ -141,12 +181,11 @@ function finalizeSuite(suiteDir, checks, summary, expectedFiles) {
|
|
|
141
181
|
return { assertions: finalAssertions, manifest: writeManifest(suiteDir, [...expectedFiles, "failures.md"]) };
|
|
142
182
|
}
|
|
143
183
|
|
|
144
|
-
export function createLeaseCleanupResult(config, targetName, leaseId, stopResult, staleCleanupResult = null) {
|
|
184
|
+
export function createLeaseCleanupResult(config, targetName, leaseId, stopResult, staleCleanupResult = null, runId = makeRunId()) {
|
|
145
185
|
const suiteName = "lease-cleanup";
|
|
146
|
-
const runId = makeRunId();
|
|
147
186
|
const suiteDir = createSuiteDir(config.artifactRoot, runId, targetName, suiteName);
|
|
148
187
|
const secretValues = collectSecretValues(authEnvAllowList(config));
|
|
149
|
-
writeFileSync(resolve(suiteDir, "target.json"), JSON.stringify(
|
|
188
|
+
writeFileSync(resolve(suiteDir, "target.json"), JSON.stringify(targetEvidence(config, targetName, runId, `${config.packageName}-${targetName}`), null, 2));
|
|
150
189
|
writeFileSync(resolve(suiteDir, "suite.json"), JSON.stringify({ suiteName, leaseId, modelCalls: 0 }, null, 2));
|
|
151
190
|
writeCommand(suiteDir, `crabbox stop ${targetName} --id ${leaseId}`);
|
|
152
191
|
writeExitCode(suiteDir, stopResult.code, stopResult.signal);
|
|
@@ -180,16 +219,15 @@ export function createLeaseCleanupResult(config, targetName, leaseId, stopResult
|
|
|
180
219
|
return { ok: assertions.ok, suiteDir, assertions };
|
|
181
220
|
}
|
|
182
221
|
|
|
183
|
-
export function createLeaseCleanupFailureResult(config, targetName, leaseId, stopResult) {
|
|
184
|
-
return createLeaseCleanupResult(config, targetName, leaseId, stopResult);
|
|
222
|
+
export function createLeaseCleanupFailureResult(config, targetName, leaseId, stopResult, runId) {
|
|
223
|
+
return createLeaseCleanupResult(config, targetName, leaseId, stopResult, null, runId);
|
|
185
224
|
}
|
|
186
225
|
|
|
187
|
-
export function createLeaseWarmupFailureResult(config, targetName, warmupResult) {
|
|
226
|
+
export function createLeaseWarmupFailureResult(config, targetName, warmupResult, runId = makeRunId()) {
|
|
188
227
|
const suiteName = "lease-warmup";
|
|
189
|
-
const runId = makeRunId();
|
|
190
228
|
const suiteDir = createSuiteDir(config.artifactRoot, runId, targetName, suiteName);
|
|
191
229
|
const secretValues = collectSecretValues(authEnvAllowList(config));
|
|
192
|
-
writeFileSync(resolve(suiteDir, "target.json"), JSON.stringify(
|
|
230
|
+
writeFileSync(resolve(suiteDir, "target.json"), JSON.stringify(targetEvidence(config, targetName, runId, `${config.packageName}-${targetName}`), null, 2));
|
|
193
231
|
writeFileSync(resolve(suiteDir, "suite.json"), JSON.stringify({ suiteName, modelCalls: 0 }, null, 2));
|
|
194
232
|
writeCommand(suiteDir, `crabbox warmup ${targetName}`);
|
|
195
233
|
writeExitCode(suiteDir, warmupResult.code, warmupResult.signal);
|
|
@@ -301,14 +339,13 @@ export function buildBrowserDogfoodCommand(targetName, agentBrowserVersion = "0.
|
|
|
301
339
|
return lines.join("\n");
|
|
302
340
|
}
|
|
303
341
|
|
|
304
|
-
async function runBrowserDogfoodSuite(config, targetName, suiteName, leaseSession) {
|
|
305
|
-
const runId = makeRunId();
|
|
342
|
+
async function runBrowserDogfoodSuite(config, targetName, suiteName, leaseSession, runId = makeRunId()) {
|
|
306
343
|
const suiteDir = createSuiteDir(config.artifactRoot, runId, targetName, suiteName);
|
|
307
344
|
const startedAt = Date.now();
|
|
308
345
|
const platform = platformFor(targetName);
|
|
309
346
|
const slug = `${config.packageName}-${targetName}`;
|
|
310
347
|
const command = buildBrowserDogfoodCommand(targetName, config.agentBrowserVersion);
|
|
311
|
-
writeFileSync(resolve(suiteDir, "target.json"), JSON.stringify(
|
|
348
|
+
writeFileSync(resolve(suiteDir, "target.json"), JSON.stringify(targetEvidence(config, targetName, runId, slug), null, 2));
|
|
312
349
|
writeFileSync(resolve(suiteDir, "suite.json"), JSON.stringify({ suiteName, modelCalls: 0, realBrowser: true }, null, 2));
|
|
313
350
|
writeCommand(suiteDir, command);
|
|
314
351
|
|
|
@@ -367,15 +404,14 @@ async function runBrowserDogfoodSuite(config, targetName, suiteName, leaseSessio
|
|
|
367
404
|
return { ok: assertions.ok, suiteDir, assertions };
|
|
368
405
|
}
|
|
369
406
|
|
|
370
|
-
async function runPlatformBuildSuite(config, targetName, suiteName, leaseSession) {
|
|
371
|
-
const runId = makeRunId();
|
|
407
|
+
async function runPlatformBuildSuite(config, targetName, suiteName, leaseSession, runId = makeRunId()) {
|
|
372
408
|
const suiteDir = createSuiteDir(config.artifactRoot, runId, targetName, suiteName);
|
|
373
409
|
const startedAt = Date.now();
|
|
374
410
|
const platform = platformFor(targetName);
|
|
375
411
|
const slug = `${config.packageName}-${targetName}`;
|
|
376
412
|
const command = buildPlatformBuildCommand(targetName, config.packageName, config.nodeValidationMajor);
|
|
377
413
|
mkdirSync(dirname(suiteDir), { recursive: true });
|
|
378
|
-
writeFileSync(resolve(suiteDir, "target.json"), JSON.stringify(
|
|
414
|
+
writeFileSync(resolve(suiteDir, "target.json"), JSON.stringify(targetEvidence(config, targetName, runId, slug), null, 2));
|
|
379
415
|
writeFileSync(resolve(suiteDir, "suite.json"), JSON.stringify({ suiteName, modelCalls: 0 }, null, 2));
|
|
380
416
|
writeCommand(suiteDir, command);
|
|
381
417
|
|
|
@@ -436,17 +472,18 @@ async function runPlatformBuildSuite(config, targetName, suiteName, leaseSession
|
|
|
436
472
|
return { ok: assertions.ok, suiteDir, assertions };
|
|
437
473
|
}
|
|
438
474
|
|
|
439
|
-
export async function runTargetSuite(config, targetName, suiteName, leaseSession) {
|
|
440
|
-
if (suiteName === "platform-build") return await runPlatformBuildSuite(config, targetName, suiteName, leaseSession);
|
|
441
|
-
if (suiteName === "browser-dogfood-smoke") return await runBrowserDogfoodSuite(config, targetName, suiteName, leaseSession);
|
|
475
|
+
export async function runTargetSuite(config, targetName, suiteName, leaseSession, runId) {
|
|
476
|
+
if (suiteName === "platform-build") return await runPlatformBuildSuite(config, targetName, suiteName, leaseSession, runId);
|
|
477
|
+
if (suiteName === "browser-dogfood-smoke") return await runBrowserDogfoodSuite(config, targetName, suiteName, leaseSession, runId);
|
|
442
478
|
throw new Error(`unknown suite: ${suiteName}`);
|
|
443
479
|
}
|
|
444
480
|
|
|
445
481
|
export async function runTargetSuites(config, targetName, suiteNames) {
|
|
446
482
|
const slug = `${config.packageName}-${targetName}`;
|
|
483
|
+
const runId = makeRunId();
|
|
447
484
|
const lease = await warmupLease(targetName, slug, config);
|
|
448
485
|
if (!lease.ok) {
|
|
449
|
-
const warmupFailure = createLeaseWarmupFailureResult(config, targetName, lease);
|
|
486
|
+
const warmupFailure = createLeaseWarmupFailureResult(config, targetName, lease, runId);
|
|
450
487
|
return { ok: false, results: [warmupFailure] };
|
|
451
488
|
}
|
|
452
489
|
const results = [];
|
|
@@ -455,7 +492,7 @@ export async function runTargetSuites(config, targetName, suiteNames) {
|
|
|
455
492
|
try {
|
|
456
493
|
let sync = true;
|
|
457
494
|
for (const suiteName of suiteNames) {
|
|
458
|
-
const result = await runTargetSuite(config, targetName, suiteName, { ...lease, sync });
|
|
495
|
+
const result = await runTargetSuite(config, targetName, suiteName, { ...lease, sync }, runId);
|
|
459
496
|
results.push(result);
|
|
460
497
|
sync = false;
|
|
461
498
|
if (!result.ok) break;
|
|
@@ -465,7 +502,7 @@ export async function runTargetSuites(config, targetName, suiteNames) {
|
|
|
465
502
|
staleCleanupResult = await cleanupStaleTargetState(targetName, config);
|
|
466
503
|
}
|
|
467
504
|
if (stopResult) {
|
|
468
|
-
results.push(createLeaseCleanupResult(config, targetName, lease.leaseId, stopResult, staleCleanupResult));
|
|
505
|
+
results.push(createLeaseCleanupResult(config, targetName, lease.leaseId, stopResult, staleCleanupResult, runId));
|
|
469
506
|
}
|
|
470
507
|
return { ok: results.every((result) => result.ok), results };
|
|
471
508
|
}
|
|
@@ -61,6 +61,7 @@ Environment:
|
|
|
61
61
|
PLATFORM_SMOKE_MAC_HOST macOS SSH host; default localhost
|
|
62
62
|
PLATFORM_SMOKE_MAC_USER macOS SSH user; default $USER
|
|
63
63
|
PLATFORM_SMOKE_MAC_WORK_ROOT macOS Crabbox work root
|
|
64
|
+
PLATFORM_SMOKE_MAC_PORT macOS SSH port; default 22
|
|
64
65
|
PLATFORM_SMOKE_UBUNTU_IMAGE Ubuntu local-container image; default pi-agent-browser-native-platform:node24-agent-browser0.27.1
|
|
65
66
|
PLATFORM_SMOKE_WINDOWS_VM Parallels Windows template VM
|
|
66
67
|
PLATFORM_SMOKE_WINDOWS_SNAPSHOT Parallels snapshot name
|