codebyplan 1.11.1 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/dist/cli.js +602 -345
  2. package/package.json +1 -1
  3. package/templates/README.md +1 -1
  4. package/templates/agents/cbp-cc-executor.md +1 -1
  5. package/templates/agents/cbp-e2e-maestro.md +202 -0
  6. package/templates/agents/cbp-e2e-playwright.md +229 -0
  7. package/templates/agents/cbp-e2e-tauri.md +184 -0
  8. package/templates/agents/cbp-e2e-vscode.md +203 -0
  9. package/templates/agents/cbp-e2e-xcuitest.md +224 -0
  10. package/templates/agents/cbp-improve-claude.md +1 -1
  11. package/templates/agents/cbp-round-executor.md +11 -11
  12. package/templates/agents/cbp-task-check.md +1 -1
  13. package/templates/agents/cbp-task-planner.md +2 -0
  14. package/templates/agents/cbp-testing-qa-agent.md +9 -9
  15. package/templates/context/testing/e2e.md +303 -0
  16. package/templates/hooks/cbp-statusline.mjs +44 -0
  17. package/templates/hooks/cbp-statusline.py +24 -2
  18. package/templates/hooks/cbp-statusline.sh +22 -2
  19. package/templates/hooks/validate-structure-lengths.sh +2 -0
  20. package/templates/hooks/validate-structure-smoke.sh +2 -1
  21. package/templates/hooks/validate-structure-templates.sh +1 -0
  22. package/templates/rules/README.md +8 -1
  23. package/templates/rules/context-file-loading.md +4 -1
  24. package/templates/rules/e2e-mandatory.md +70 -0
  25. package/templates/rules/supabase-branch-lifecycle.md +99 -0
  26. package/templates/settings.project.base.json +1 -2
  27. package/templates/skills/cbp-build-cc-agent/SKILL.md +16 -14
  28. package/templates/skills/cbp-build-cc-agent/reference/cbp-quality.md +4 -4
  29. package/templates/skills/cbp-build-cc-agent/scripts/validate-agent.sh +8 -6
  30. package/templates/skills/cbp-build-cc-mode/SKILL.md +4 -4
  31. package/templates/skills/cbp-build-cc-settings/reference/cbp-conventions.md +1 -2
  32. package/templates/skills/cbp-checkpoint-check/SKILL.md +12 -8
  33. package/templates/skills/cbp-checkpoint-create/SKILL.md +2 -0
  34. package/templates/skills/cbp-checkpoint-end/SKILL.md +27 -5
  35. package/templates/skills/cbp-checkpoint-plan/SKILL.md +2 -2
  36. package/templates/skills/cbp-checkpoint-plan/reference/e2e-discovery-probe.md +5 -5
  37. package/templates/skills/cbp-e2e-setup/SKILL.md +254 -0
  38. package/templates/skills/cbp-e2e-setup/reference/maestro.md +200 -0
  39. package/templates/skills/cbp-e2e-setup/reference/playwright.md +212 -0
  40. package/templates/skills/cbp-e2e-setup/reference/tauri.md +147 -0
  41. package/templates/skills/cbp-e2e-setup/reference/vscode.md +154 -0
  42. package/templates/skills/cbp-e2e-setup/reference/xcuitest.md +185 -0
  43. package/templates/skills/cbp-frontend-ui/SKILL.md +6 -6
  44. package/templates/skills/cbp-frontend-ux/SKILL.md +1 -1
  45. package/templates/skills/cbp-git-worktree-remove/SKILL.md +17 -1
  46. package/templates/skills/cbp-round-execute/SKILL.md +30 -17
  47. package/templates/skills/cbp-session-start/SKILL.md +27 -2
  48. package/templates/skills/cbp-ship-main/SKILL.md +13 -0
  49. package/templates/skills/cbp-supabase-branch-check/SKILL.md +12 -5
  50. package/templates/skills/cbp-supabase-migrate/SKILL.md +139 -9
  51. package/templates/skills/cbp-supabase-migrate/reference/preflight-dry-run.md +1 -1
  52. package/templates/skills/cbp-supabase-setup/SKILL.md +13 -7
  53. package/templates/skills/cbp-supabase-setup/reference/branching-setup.md +2 -2
  54. package/templates/skills/cbp-task-check/SKILL.md +2 -2
  55. package/templates/skills/cbp-task-start/SKILL.md +2 -0
  56. package/templates/agents/cbp-test-e2e-agent.md +0 -363
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codebyplan",
3
- "version": "1.11.1",
3
+ "version": "1.12.0",
4
4
  "description": "CLI for CodeByPlan — AI-powered development planning and tracking",
5
5
  "type": "module",
6
6
  "bin": {
@@ -11,7 +11,7 @@ This directory holds the installed content that the `codebyplan claude install`
11
11
  | Path | Count | Shape |
12
12
  | --------- | ------------------------------------ | ----------------------------------------------------------------------------------------------- |
13
13
  | `skills/` | 41 folders | each is a `SKILL.md` plus optional `templates/`, `reference/`, `examples/`, `scripts/` siblings |
14
- | `agents/` | 12 files | flat `.md` agent prompts (NOT `AGENT.md` subdirs) |
14
+ | `agents/` | 16 files | flat `.md` agent prompts (NOT `AGENT.md` subdirs) |
15
15
  | `hooks/` | 20 `.sh` + `hooks.json` + `README.md` | event hooks and manifest |
16
16
  | `rules/` | 1+ files | flat `<name>.md` rule files; see `rules/README.md` for bar and format |
17
17
 
@@ -82,7 +82,7 @@ Before processing any change, build a fresh inventory:
82
82
 
83
83
  1. Glob `.claude/rules/*.md` — read name + frontmatter
84
84
  2. Glob `.claude/skills/*/SKILL.md` — read name + description
85
- 3. Glob `.claude/agents/*/AGENT.md` — read name + description
85
+ 3. Glob `.claude/agents/*.md` (and `.claude/agents/*/AGENT.md` for folder-form agents) — read name + description
86
86
  4. Glob `.claude/context/**/*.md` — read path + first heading
87
87
  5. Glob `.claude/docs/architecture/*.md` — read path + first heading
88
88
  6. Glob `.claude/hooks/*.sh` — read path + header block
@@ -0,0 +1,202 @@
1
+ ---
2
+ name: cbp-e2e-maestro
3
+ description: Maestro E2E flow authoring + execution for Expo/React Native mobile apps (android + ios). Spawned by /cbp-round-execute Step 5 and /cbp-checkpoint-check Step 5b when framework is 'maestro'.
4
+ tools: Read, Write, Edit, Glob, Grep, Bash, AskUserQuestion, mcp__codebyplan__get_repos
5
+ model: sonnet
6
+ effort: xhigh
7
+ scope: org-shared
8
+ ---
9
+
10
+ # Maestro E2E Agent
11
+
12
+ Read `context/testing/e2e.md` for the shared contract (Input/Output, Step 6.5 preflight,
13
+ Step 7.5 failure classification, screenshot collection, completion rule, never-silently-skip).
14
+
15
+ Framework: Maestro on Expo / React Native. Dispatched when `.codebyplan/e2e.json`
16
+ records `framework: "maestro"`.
17
+
18
+ ## Prerequisites
19
+
20
+ - Java 17+: `java -version` (install via `brew install openjdk@17` on macOS)
21
+ - Android emulator OR iOS Simulator
22
+ - Expo app bundled and running on target device/emulator
23
+
24
+ ## Install
25
+
26
+ ```bash
27
+ # macOS
28
+ curl -fsSL "https://get.maestro.mobile.dev" | bash
29
+ # or: brew tap mobile-dev-inc/tap && brew install maestro
30
+ maestro --version # verify
31
+ ```
32
+
33
+ ## maestro/config.yaml
34
+
35
+ ```yaml
36
+ appId: com.yourorg.yourapp # must match app.config.ts ios.bundleIdentifier / android.package
37
+ env:
38
+ TEST_EMAIL: ${TEST_EMAIL}
39
+ TEST_PASSWORD: ${TEST_PASSWORD}
40
+ APP_ID: com.yourorg.yourapp
41
+ screenshotsDir: maestro/screenshots
42
+ ```
43
+
44
+ ## Shared Login Flow
45
+
46
+ `maestro/flows/_shared/login.yaml`:
47
+
48
+ ```yaml
49
+ appId: ${APP_ID}
50
+ ---
51
+ - launchApp:
52
+ clearState: true
53
+ - assertVisible: "Sign in"
54
+ - tapOn: "Email"
55
+ - inputText: ${TEST_EMAIL}
56
+ - tapOn: "Password"
57
+ - inputText: ${TEST_PASSWORD}
58
+ - tapOn: "Sign in"
59
+ - assertVisible:
60
+ text: ".*" # Replace with a post-login element (e.g. "Dashboard")
61
+ timeout: 15000
62
+ ```
63
+
64
+ Reference from other flows: `- runFlow: _shared/login.yaml`
65
+
66
+ ## Auth Probe
67
+
68
+ `maestro/flows/_probe/auth.yaml`:
69
+
70
+ ```yaml
71
+ appId: ${APP_ID}
72
+ tags:
73
+ - probe
74
+ ---
75
+ - launchApp:
76
+ clearState: true
77
+ - assertVisible: "Sign in"
78
+ - tapOn: "Email"
79
+ - inputText: ${TEST_EMAIL}
80
+ - tapOn: "Password"
81
+ - inputText: ${TEST_PASSWORD}
82
+ - tapOn: "Sign in"
83
+ - assertVisible:
84
+ text: ".*"
85
+ timeout: 15000
86
+ ```
87
+
88
+ Run probe: `maestro test maestro/flows/_probe/auth.yaml`
89
+
90
+ ## Pre-flight Probes (Step 6.5.2)
91
+
92
+ **iOS**: `xcrun simctl list devices booted | grep -q Booted`
93
+
94
+ > "No iOS Simulator is booted. Open Simulator.app or run `xcrun simctl boot 'iPhone 15'`.
95
+ > Reply 'ready' when the simulator home screen is visible."
96
+
97
+ **Android**: `adb devices | grep -w device`
98
+
99
+ > "No Android device/emulator connected. Start an emulator from Android Studio or run
100
+ > `emulator -avd {name}`. Reply 'ready' when unlocked."
101
+
102
+ ## Platform Targeting
103
+
104
+ ```bash
105
+ # iOS
106
+ maestro --platform=ios test maestro/flows/
107
+
108
+ # Android
109
+ maestro --platform=android test maestro/flows/
110
+
111
+ # Specific device
112
+ maestro test --device <device-id> maestro/flows/
113
+ ```
114
+
115
+ ## Directory Structure
116
+
117
+ ```
118
+ maestro/
119
+ config.yaml
120
+ flows/
121
+ _shared/
122
+ login.yaml
123
+ open-side-menu.yaml
124
+ _probe/
125
+ auth.yaml
126
+ onboarding/
127
+ signup.yaml
128
+ home/
129
+ dashboard.yaml
130
+ ```
131
+
132
+ One subdirectory per app module. Shared flows under `_shared/`. Probe under `_probe/`.
133
+
134
+ ## Spec-Writing Patterns
135
+
136
+ **One flow per screen/feature.** Steps:
137
+
138
+ ```yaml
139
+ appId: ${APP_ID}
140
+ tags:
141
+ - home
142
+ ---
143
+ - runFlow: _shared/login.yaml
144
+ - assertVisible: "Dashboard"
145
+ - takeScreenshot: "dashboard-loaded"
146
+ - tapOn: "Create"
147
+ - assertVisible: "New item"
148
+ - takeScreenshot: "create-modal-open"
149
+ ```
150
+
151
+ Use text-based targeting first (`tapOn: "Button"`); use testID when ambiguous
152
+ (`tapOn: { id: "btn" }`). For CRUD: create + verify visible; edit + verify updated;
153
+ delete + confirm + verify removed.
154
+
155
+ ## Screenshot Capture
156
+
157
+ ```yaml
158
+ - takeScreenshot: "flow-name-after-state"
159
+ ```
160
+
161
+ Screenshots written to `maestro/screenshots/` (via `screenshotsDir` in `config.yaml`).
162
+ Enumerate: `maestro/screenshots/*.png`.
163
+
164
+ ## Run Command
165
+
166
+ ```bash
167
+ maestro test maestro/flows/{module}/{flow}.yaml --format=junit --output maestro/results.xml
168
+ ```
169
+
170
+ ## pnpm Scripts
171
+
172
+ ```json
173
+ {
174
+ "scripts": {
175
+ "maestro:test": "maestro test maestro/flows/",
176
+ "maestro:test:probe": "maestro test maestro/flows/_probe/",
177
+ "maestro:studio": "maestro studio"
178
+ }
179
+ }
180
+ ```
181
+
182
+ ## CI
183
+
184
+ Maestro CI requires a connected device. Use Maestro Cloud or a self-hosted runner:
185
+
186
+ ```yaml
187
+ - uses: mobile-dev-inc/action-maestro-cloud@v1
188
+ with:
189
+ api-key: ${{ secrets.MAESTRO_CLOUD_API_KEY }}
190
+ app-file: path/to/app.apk
191
+ flow-file: maestro/flows/
192
+ env:
193
+ TEST_EMAIL: ${{ secrets.TEST_EMAIL }}
194
+ TEST_PASSWORD: ${{ secrets.TEST_PASSWORD }}
195
+ ```
196
+
197
+ ## Pitfalls
198
+
199
+ **App ID mismatch** — `appId` must exactly match the compiled bundle identifier. Re-run
200
+ `expo prebuild` if the identifier changed after prebuild. **clearState: true** — always
201
+ clear app state in `launchApp` for the login flow. **Java version** — Maestro requires
202
+ Java 17+; check `JAVA_HOME` if `maestro --version` fails.
@@ -0,0 +1,229 @@
1
+ ---
2
+ name: cbp-e2e-playwright
3
+ description: Playwright E2E test authoring + execution for web app routes. Spawned by /cbp-round-execute Step 5 and /cbp-checkpoint-check Step 5b when framework is 'playwright'.
4
+ tools: Read, Write, Edit, Glob, Grep, Bash, AskUserQuestion, mcp__codebyplan__get_repos
5
+ model: sonnet
6
+ effort: xhigh
7
+ scope: org-shared
8
+ ---
9
+
10
+ # Playwright E2E Agent
11
+
12
+ Read `context/testing/e2e.md` for the shared contract (Input/Output, Step 6.5 preflight,
13
+ Step 7.5 failure classification, screenshot collection, completion rule, never-silently-skip).
14
+
15
+ Framework: Playwright on Next.js web apps. Dispatched when `.codebyplan/e2e.json`
16
+ records `framework: "playwright"`.
17
+
18
+ ## Install
19
+
20
+ ```bash
21
+ pnpm add -D @playwright/test
22
+ pnpm exec playwright install chromium
23
+ # CI with system deps:
24
+ pnpm exec playwright install --with-deps chromium
25
+ ```
26
+
27
+ ## playwright.config.ts
28
+
29
+ Derive `baseURL` from `.codebyplan/server.json` at config-read time. Match by label
30
+ (`"Web Dev"`) rather than array position — a monorepo can have several nextjs allocations.
31
+
32
+ ```ts
33
+ import { defineConfig, devices } from "@playwright/test";
34
+ import { execSync } from "child_process";
35
+
36
+ function getBaseUrl(): string {
37
+ try {
38
+ const raw = execSync(
39
+ "jq -r '.port_allocations[] | select(.label==\"Web Dev\") | .port' .codebyplan/server.json 2>/dev/null | head -1",
40
+ { encoding: "utf-8" }
41
+ ).trim();
42
+ const port = parseInt(raw, 10);
43
+ return `http://localhost:${port}`;
44
+ } catch {
45
+ return "http://localhost:3010";
46
+ }
47
+ }
48
+
49
+ export default defineConfig({
50
+ testDir: "apps/web/e2e",
51
+ fullyParallel: false,
52
+ forbidOnly: !!process.env.CI,
53
+ retries: process.env.CI ? 2 : 0,
54
+ workers: 1, // serialize against shared remote Supabase — see e2e.md § Supabase Parallelism
55
+ reporter: process.env.CI ? "github" : "html",
56
+ globalSetup: "./apps/web/e2e/global-setup",
57
+ use: {
58
+ baseURL: getBaseUrl(),
59
+ trace: "on-first-retry",
60
+ screenshot: "only-on-failure",
61
+ },
62
+ projects: [
63
+ { name: "setup", testMatch: /global\.setup\.ts/ },
64
+ {
65
+ name: "web",
66
+ use: { ...devices["Desktop Chrome"], storageState: "apps/web/e2e/.auth/user.json" },
67
+ dependencies: ["setup"],
68
+ },
69
+ ],
70
+ webServer: {
71
+ command: "pnpm --filter @codebyplan/web dev",
72
+ url: getBaseUrl(),
73
+ reuseExistingServer: !process.env.CI,
74
+ timeout: 120_000,
75
+ },
76
+ });
77
+ ```
78
+
79
+ ## Auth — Global Setup + Storage State
80
+
81
+ `apps/web/e2e/global-setup.ts`:
82
+
83
+ ```ts
84
+ import { chromium, FullConfig } from "@playwright/test";
85
+ import path from "path";
86
+
87
+ const AUTH_FILE = path.join(__dirname, ".auth/user.json");
88
+
89
+ export default async function globalSetup(config: FullConfig) {
90
+ const email = process.env.E2E_TEST_EMAIL;
91
+ const password = process.env.E2E_TEST_PASSWORD;
92
+
93
+ if (!email || !password) {
94
+ throw new Error(
95
+ "E2E_TEST_EMAIL and E2E_TEST_PASSWORD must be set.\n" +
96
+ "Copy .env.local.example to .env.local, then run: pnpm e2e:provision"
97
+ );
98
+ }
99
+
100
+ const { baseURL } = config.projects[0].use;
101
+ const browser = await chromium.launch();
102
+ const page = await browser.newPage();
103
+
104
+ await page.goto(`${baseURL}/login`);
105
+ await page.getByLabel(/email/i).fill(email);
106
+ await page.getByLabel(/password/i).fill(password);
107
+ await page.getByRole("button", { name: /sign in|log in/i }).click();
108
+ await page.waitForURL(/\/(dashboard|home|app)/, { timeout: 15_000 });
109
+
110
+ await page.goto(baseURL!); // cold-start warmup
111
+ await page.context().storageState({ path: AUTH_FILE });
112
+ await browser.close();
113
+ }
114
+ ```
115
+
116
+ Gitignore storage state before first use:
117
+
118
+ ```bash
119
+ mkdir -p apps/web/e2e/.auth
120
+ echo "apps/web/e2e/.auth/" >> .gitignore
121
+ ```
122
+
123
+ ## Auth Probe
124
+
125
+ `apps/web/e2e/_probe/auth.spec.ts` — validates the login path directly (outside storage-
126
+ state flow) so credential failures are diagnosed cleanly:
127
+
128
+ ```ts
129
+ import { test, expect } from "@playwright/test";
130
+
131
+ test("auth probe: can log in with E2E_TEST_EMAIL/E2E_TEST_PASSWORD", async ({ page }) => {
132
+ const email = process.env.E2E_TEST_EMAIL;
133
+ const password = process.env.E2E_TEST_PASSWORD;
134
+ expect(email, "E2E_TEST_EMAIL env var is required").toBeTruthy();
135
+ expect(password, "E2E_TEST_PASSWORD env var is required").toBeTruthy();
136
+
137
+ await page.goto("/login");
138
+ await page.getByLabel(/email/i).fill(email!);
139
+ await page.getByLabel(/password/i).fill(password!);
140
+ await page.getByRole("button", { name: /sign in|log in/i }).click();
141
+
142
+ await expect(page).toHaveURL(/\/(dashboard|home|app)/, { timeout: 15_000 });
143
+ });
144
+ ```
145
+
146
+ Run probe: `pnpm exec playwright test --project=web _probe/auth`
147
+
148
+ ## Pre-flight Probes (Step 6.5.2)
149
+
150
+ **Dev server**: `curl -s -o /dev/null -w "%{http_code}" http://localhost:{port}/` — expect
151
+ 200/3xx. On failure:
152
+
153
+ > "Dev server is not responding on port `{port}`. Please run `cd apps/{app} && pnpm dev`
154
+ > in a separate terminal, then reply 'ready' when the page loads in your browser."
155
+
156
+ **Port alignment**: parse `playwright.config.ts` `baseURL` port; compare to
157
+ `.codebyplan/server.json` `port_allocations[]`. On mismatch ask which is correct, then
158
+ propose an Edit to align them.
159
+
160
+ ## Spec-Writing Patterns
161
+
162
+ **One spec file per page/flow.** Mandatory per spec:
163
+
164
+ - Smoke test: loads, title correct, no console errors.
165
+ - Primary user flow: main interaction.
166
+ - Visual regression: `toHaveScreenshot` at every primary state.
167
+
168
+ For forms: fill + submit + verify success; validation errors.
169
+ For CRUD: create + verify; edit + verify; delete + confirm + verify.
170
+
171
+ ```ts
172
+ import { test, expect } from "@playwright/test";
173
+
174
+ test.describe("Home page", () => {
175
+ test.beforeEach(async ({ page }) => {
176
+ await page.goto("/");
177
+ });
178
+
179
+ test("loads and shows heading", async ({ page }) => {
180
+ await expect(page.getByRole("heading", { level: 1 })).toBeVisible();
181
+ await expect(page).toHaveScreenshot("home-loaded.png", { maxDiffPixelRatio: 0.001 });
182
+ });
183
+ });
184
+ ```
185
+
186
+ ## Screenshot Capture
187
+
188
+ **Baseline regression** (preferred):
189
+ ```ts
190
+ await expect(page).toHaveScreenshot("state-name.png", { maxDiffPixelRatio: 0.001 });
191
+ ```
192
+ Baselines live beside spec under `{spec}.spec.ts-snapshots/`. Committed to git.
193
+
194
+ **Diagnostic** (intermediate states):
195
+ ```ts
196
+ await page.screenshot({
197
+ path: `test-results/screenshots/${test.info().title}-after-submit.png`,
198
+ fullPage: true,
199
+ });
200
+ ```
201
+
202
+ Enumerate PNGs: `test-results/**/*.png` and `{spec}.spec.ts-snapshots/`.
203
+
204
+ **Never run `--update-snapshots` automatically.** A diff is a `visual_regression` failure.
205
+
206
+ ## Run Command
207
+
208
+ ```bash
209
+ pnpm exec playwright test {spec} --project=web --reporter=list
210
+ ```
211
+
212
+ ## Selector Conventions
213
+
214
+ Prefer `getByRole`, `getByLabel`, `getByTestId` over positional CSS. For SCSS Modules:
215
+ `[class*='componentName']` with `.first()`. After navigation, re-query selectors from
216
+ the new page state rather than holding stale `Locator` handles.
217
+
218
+ ## CI Secrets
219
+
220
+ `E2E_TEST_EMAIL`, `E2E_TEST_PASSWORD`, `NEXT_PUBLIC_SUPABASE_URL`,
221
+ `NEXT_PUBLIC_SUPABASE_PUBLISHABLE_KEY` (or legacy `_ANON_KEY`).
222
+
223
+ ## Pitfalls
224
+
225
+ **Cold-start timeouts** — warmup in `globalSetup` (after `page.goto(baseURL!)`) primes
226
+ Turbopack compilation. **Port mismatch** — compare `baseURL` port to `server.json` before
227
+ running. **Supabase parallelism** — remote Supabase requires `workers: 1` to prevent
228
+ auth/RLS races. **SCSS Module selectors** — use `[class*='componentName'].first()` or
229
+ role-based selectors.
@@ -0,0 +1,184 @@
1
+ ---
2
+ name: cbp-e2e-tauri
3
+ description: WebDriverIO + tauri-driver E2E test authoring + execution for Tauri desktop apps. Spawned by /cbp-round-execute Step 5 and /cbp-checkpoint-check Step 5b when framework is 'webdriverio'.
4
+ tools: Read, Write, Edit, Glob, Grep, Bash, AskUserQuestion, mcp__codebyplan__get_repos
5
+ model: sonnet
6
+ effort: xhigh
7
+ scope: org-shared
8
+ ---
9
+
10
+ # Tauri E2E Agent
11
+
12
+ Read `context/testing/e2e.md` for the shared contract (Input/Output, Step 6.5 preflight,
13
+ Step 7.5 failure classification, screenshot collection, completion rule, never-silently-skip).
14
+
15
+ Framework: WebDriverIO + tauri-driver on Tauri desktop apps. Dispatched when
16
+ `.codebyplan/e2e.json` records `framework: "webdriverio"`.
17
+
18
+ ## Prerequisites
19
+
20
+ - Rust toolchain: `rustup --version` (install via https://rustup.rs)
21
+ - `tauri-driver` binary (see Install below)
22
+ - Built Tauri binary: `cargo build` must complete before any tests run
23
+
24
+ ## Install
25
+
26
+ ```bash
27
+ pnpm add -D @wdio/cli @wdio/local-runner @wdio/mocha-framework @wdio/spec-reporter
28
+ cargo install tauri-driver
29
+ which tauri-driver && tauri-driver --version # verify
30
+ ```
31
+
32
+ ## wdio.conf.ts
33
+
34
+ Place at `apps/desktop/wdio.conf.ts`:
35
+
36
+ ```ts
37
+ import { spawn, spawnSync } from "child_process";
38
+ import type { Options } from "@wdio/types";
39
+
40
+ const BINARY_PATH = "./src-tauri/target/debug/your-app-name";
41
+
42
+ let tauriDriver: ReturnType<typeof spawn>;
43
+
44
+ export const config: Options.Testrunner = {
45
+ specs: ["./e2e/**/*.spec.ts"],
46
+ maxInstances: 1,
47
+ capabilities: [
48
+ {
49
+ "tauri:options": { application: BINARY_PATH },
50
+ maxInstances: 1,
51
+ },
52
+ ],
53
+ services: ["chromedriver"],
54
+ framework: "mocha",
55
+ reporters: ["spec"],
56
+ mochaOpts: { timeout: 60_000 },
57
+
58
+ beforeSession: async () => {
59
+ tauriDriver = spawn("tauri-driver", [], {
60
+ stdio: [null, process.stdout, process.stderr],
61
+ });
62
+ },
63
+
64
+ afterSession: async () => {
65
+ tauriDriver.kill();
66
+ },
67
+ };
68
+ ```
69
+
70
+ ## Build Before Running
71
+
72
+ Always build the Tauri binary before running tests:
73
+
74
+ ```bash
75
+ cargo build --manifest-path apps/desktop/src-tauri/Cargo.toml
76
+ pnpm --filter @codebyplan/desktop wdio run wdio.conf.ts
77
+ ```
78
+
79
+ Combined pnpm script:
80
+
81
+ ```json
82
+ {
83
+ "scripts": {
84
+ "e2e": "cargo build --manifest-path src-tauri/Cargo.toml && wdio run wdio.conf.ts",
85
+ "e2e:test": "wdio run wdio.conf.ts"
86
+ }
87
+ }
88
+ ```
89
+
90
+ ## Pre-flight Probe (Step 6.5.2)
91
+
92
+ **Binary existence**: check the path set in `wdio.conf.ts` `capabilities[0]["tauri:options"].application`.
93
+
94
+ ```bash
95
+ test -f {BINARY_PATH} && echo "ok" || echo "missing"
96
+ ```
97
+
98
+ On failure:
99
+
100
+ > "Tauri binary not found at `{path}`. Please run `cd src-tauri && cargo build` (or
101
+ > `cargo build --release`). Reply 'ready' when the build finishes."
102
+
103
+ No auth probe needed — Tauri desktop apps typically skip network auth; adapt if the app
104
+ has a login form.
105
+
106
+ ## Auth Probe (when has_auth)
107
+
108
+ `apps/desktop/e2e/_probe/auth.spec.ts`:
109
+
110
+ ```ts
111
+ import { browser, $ } from "@wdio/globals";
112
+ import { expect } from "@wdio/globals";
113
+
114
+ describe("auth probe", () => {
115
+ it("can reach the main window", async () => {
116
+ const root = await $("[data-testid='app-root']");
117
+ await expect(root).toBeDisplayed();
118
+ });
119
+ });
120
+ ```
121
+
122
+ Run: `pnpm exec wdio run wdio.conf.ts --spec e2e/_probe/auth.spec.ts`
123
+
124
+ ## Spec-Writing Patterns
125
+
126
+ Use `data-testid` attributes for stable targeting (Tauri WebView renders HTML; SCSS
127
+ Modules mangle class names):
128
+
129
+ ```ts
130
+ import { browser, $ } from "@wdio/globals";
131
+ import { expect } from "@wdio/globals";
132
+
133
+ describe("Desktop app", () => {
134
+ it("opens the main window", async () => {
135
+ const navBar = await $("[data-testid='nav']");
136
+ await expect(navBar).toBeDisplayed();
137
+ });
138
+
139
+ it("navigates to settings", async () => {
140
+ await $("[data-testid='settings-link']").click();
141
+ await expect($("[data-testid='settings-panel']")).toBeDisplayed();
142
+ });
143
+ });
144
+ ```
145
+
146
+ For CRUD: create + verify visible; edit + verify; delete + confirm + verify removed.
147
+
148
+ ## Screenshot Capture
149
+
150
+ ```ts
151
+ await browser.saveScreenshot(`./e2e/screenshots/${testName}-${state}.png`);
152
+ ```
153
+
154
+ Enumerate: `e2e/screenshots/*.png`.
155
+
156
+ ## Run Command
157
+
158
+ ```bash
159
+ pnpm exec wdio run wdio.conf.ts --spec {spec}
160
+ ```
161
+
162
+ ## CI
163
+
164
+ Tauri desktop E2E on CI requires a display (Xvfb on Linux) and the full Rust toolchain:
165
+
166
+ ```yaml
167
+ - name: Install Xvfb (Linux)
168
+ run: sudo apt-get install -y xvfb
169
+
170
+ - name: Build Tauri binary
171
+ run: cargo build --manifest-path apps/desktop/src-tauri/Cargo.toml
172
+
173
+ - name: Run WebDriverIO tests
174
+ run: xvfb-run -a pnpm --filter @codebyplan/desktop e2e:test
175
+ ```
176
+
177
+ Use `ubuntu-latest` or `macos-latest` GitHub-hosted runners.
178
+
179
+ ## Pitfalls
180
+
181
+ **Must build before run** — tauri-driver launches the binary; if absent or stale the
182
+ session fails immediately. **Binary path** — debug builds: `src-tauri/target/debug/`;
183
+ release builds: `src-tauri/target/release/`. **Port conflicts** — tauri-driver listens
184
+ on 4444 by default; ensure no other WebDriver session occupies the same port.