npm - @ishlabs/cli - Versions diffs - 0.27.0 → 0.27.1 - Mend

@ishlabs/cli 0.27.0 → 0.27.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/README.md +4 -0
package/dist/commands/doctor.js +21 -11
package/dist/commands/iteration.js +13 -4
package/dist/commands/study-run.js +12 -12
package/dist/commands/study-screenshots.js +15 -12
package/dist/commands/study.js +22 -3
package/dist/lib/docs.js +139 -7
package/dist/lib/local-sim/adb.d.ts +19 -2
package/dist/lib/local-sim/adb.js +71 -23
package/dist/lib/local-sim/device-pool.d.ts +85 -0
package/dist/lib/local-sim/device-pool.js +316 -0
package/dist/lib/local-sim/device.d.ts +4 -0
package/dist/lib/local-sim/device.js +19 -1
package/dist/lib/local-sim/emulator.d.ts +50 -0
package/dist/lib/local-sim/emulator.js +189 -0
package/dist/lib/local-sim/install.js +23 -3
package/dist/lib/local-sim/ios.d.ts +26 -1
package/dist/lib/local-sim/ios.js +51 -11
package/dist/lib/local-sim/loop.js +112 -9
package/dist/lib/local-sim/screen-signature.js +4 -0
package/dist/lib/local-sim/simctl-provision.d.ts +49 -0
package/dist/lib/local-sim/simctl-provision.js +89 -0
package/dist/lib/local-sim/simctl.d.ts +6 -4
package/dist/lib/local-sim/simctl.js +18 -5
package/dist/lib/local-sim/xcuitest.d.ts +15 -1
package/dist/lib/local-sim/xcuitest.js +22 -6
package/dist/lib/paths.d.ts +1 -0
package/dist/lib/paths.js +3 -0
package/dist/lib/skill-content.js +5 -2
package/dist/lib/upload.d.ts +27 -0
package/dist/lib/upload.js +108 -11
package/package.json +2 -2

package/README.md CHANGED Viewed

@@ -45,6 +45,10 @@ The CLI resolves your auth token in this order:
 Test plan is available at `/Users/felixweiland/ish-cli-test-plan.md`.
+## Experiments
+Durable records of engineering experiments (including reverted ones, so we don't re-run them) live in [`docs/experiments/`](docs/experiments/README.md).
 ---
 ## Concepts

package/dist/commands/doctor.js CHANGED Viewed

@@ -125,13 +125,16 @@ async function checkSimulator() {
     catch {
         return { key: "ios_simulator", name: "iOS simulator", group: "iOS", status: "fail", message: "could not parse simctl output" };
     }
-    if (booted.length === 1) {
-        return { key: "ios_simulator", name: "iOS simulator", group: "iOS", status: "pass", message: `${booted[0].name} (booted)` };
+    if (booted.length >= 1) {
+        // >1 booted is fine now: a parallel run (`--parallel N`) reuses booted
+        // simulators and clones the shortfall, so extras are a head start, not a
+        // problem. A single-device (non-parallel) run still needs exactly one.
+        const extra = booted.length > 1
+            ? ` (+${booted.length - 1} more — parallel runs pool them; a single-device run needs exactly one)`
+            : "";
+        return { key: "ios_simulator", name: "iOS simulator", group: "iOS", status: "pass", message: `${booted[0].name} (booted)${extra}` };
     }
-    if (booted.length === 0) {
-        return { key: "ios_simulator", name: "iOS simulator", group: "iOS", status: "warn", message: "none booted", fix: "Open Simulator.app (Xcode ships simulators) or `xcrun simctl boot <udid>`" };
-    }
-    return { key: "ios_simulator", name: "iOS simulator", group: "iOS", status: "warn", message: `${booted.length} booted — native runs drive exactly one`, fix: "Shut down the extras" };
+    return { key: "ios_simulator", name: "iOS simulator", group: "iOS", status: "warn", message: "none booted", fix: "Open Simulator.app (Xcode ships simulators) or `xcrun simctl boot <udid>`" };
 }
 /** True when the prebuilt XCUITest runner (WebDriverAgent `.app`) is present. */
 function wdaBundlePresent() {
@@ -172,11 +175,18 @@ async function checkAdb() {
         .slice(1)
         .map((l) => l.trim())
         .filter((l) => l.endsWith("\tdevice"));
-    const emulator = devices.length === 1
-        ? { key: "android_emulator", name: "Android emulator", group: "Android", status: "pass", message: devices[0].split("\t")[0] }
-        : devices.length === 0
-            ? { key: "android_emulator", name: "Android emulator", group: "Android", status: "warn", message: "none online", fix: "Create + boot an AVD in Android Studio > Device Manager (or `emulator -avd <name>`)" }
-            : { key: "android_emulator", name: "Android emulator", group: "Android", status: "warn", message: `${devices.length} online — native runs drive exactly one`, fix: "Stop the extras" };
+    // >1 online is fine now: `--parallel` pools emulators (and auto-launches more
+    // from your AVDs). A single-device (non-parallel) run still uses exactly one.
+    const emulator = devices.length >= 1
+        ? {
+            key: "android_emulator",
+            name: "Android emulator",
+            group: "Android",
+            status: "pass",
+            message: devices[0].split("\t")[0] +
+                (devices.length > 1 ? ` (+${devices.length - 1} more — parallel runs pool them)` : ""),
+        }
+        : { key: "android_emulator", name: "Android emulator", group: "Android", status: "warn", message: "none online (parallel runs auto-launch your AVDs)", fix: "Create an AVD in Android Studio > Device Manager (or `emulator -avd <name>`)" };
     return { adb, emulator };
 }
 async function checkChromium() {

package/dist/commands/iteration.js CHANGED Viewed

@@ -9,7 +9,7 @@ import { readFileSync } from "node:fs";
 import { withClient, resolveStudy, resolveWorkspace, readFileOrStdin, collectIds } from "../lib/command-helpers.js";
 import { resolveId, tagAlias, ALIAS_PREFIX } from "../lib/alias-store.js";
 import { output, formatIterationList, ValidationError } from "../lib/output.js";
-import { resolveContentUrl, resolveContentUrls, resolveTextContent } from "../lib/upload.js";
+import { resolveContentUrl, resolveContentUrls, resolveTextContent, archiveHtmlImages } from "../lib/upload.js";
 import { isMediaModality, validateIterationDetails, normalizeChatMode, validateRoleCriteria } from "../lib/modality.js";
 import { validateSegmentation, warnIfOverSegmented } from "../lib/segmentation.js";
 import { normalizeEnumValue, SCREEN_FORMATS } from "../lib/enums.js";
@@ -297,7 +297,9 @@ function buildIterationDetails(modality, opts) {
             if (opts.platform === "figma" && (!opts.fileKey || !opts.startNodeId)) {
                 throw new Error("Figma interactive iterations require both --file-key and --start-node-id.");
             }
-            let screenFormat = "desktop";
+            // Native (ios/android) targets are phones — default to mobile_portrait
+            // rather than desktop. An explicit --screen-format still wins below.
+            let screenFormat = isNativePlatform(opts.platform) ? "mobile_portrait" : "desktop";
             if (opts.screenFormat !== undefined) {
                 const normalized = normalizeEnumValue(opts.screenFormat, SCREEN_FORMATS);
                 if (normalized === null) {
@@ -385,7 +387,7 @@ Concept pages: ish docs get-page concepts/iteration
         .option("--platform <platform>", "Platform (browser, android, ios, figma, code) — interactive only")
         .option("--url <url>", "URL to test — interactive only (optional for ios/android native apps)")
         .option("--app <id>", "Native app bundle id (or .app/.apk path) — ios/android; supplies the iteration target so --url isn't required")
-        .option("--screen-format <format>", "Screen format (mobile_portrait, desktop) — interactive only; hyphen/underscore variants accepted")
+        .option("--screen-format <format>", "Screen format (mobile_portrait, desktop) — interactive only; hyphen/underscore variants accepted. Default: desktop, or mobile_portrait for native ios/android")
         .option("--locale <locale>", "Locale code (e.g. en-US) — interactive only")
         .option("--file-key <key>", "Figma file key — required when --platform=figma")
         .option("--start-node-id <id>", "Figma start node id — required when --platform=figma")
@@ -622,8 +624,15 @@ Next: \`ish study run\` to dispatch simulations against this iteration.`)
                 if (isMedia) {
                     if (resolved.contentText)
                         resolved.contentText = resolveTextContent(resolved.contentText);
-                    if (resolved.contentHtml)
+                    if (resolved.contentHtml) {
                         resolved.contentHtml = resolveTextContent(resolved.contentHtml);
+                        // Archive external <img> images onto workspace storage so the
+                        // render-to-image worker (egress-denied to other origins) can
+                        // fetch them. Mirrors the FE paste pipeline; text modality only.
+                        if (modality === "text") {
+                            resolved.contentHtml = await archiveHtmlImages(client, studyId, resolved.contentHtml, { quiet: globals.quiet });
+                        }
+                    }
                     if (resolved.copyText)
                         resolved.copyText = resolveTextContent(resolved.copyText);
                     if (resolved.copyHtml)

package/dist/commands/study-run.js CHANGED Viewed

@@ -16,13 +16,10 @@ import { fetchStudyParticipants } from "../lib/study-participants.js";
 import { streamStudyEvents } from "../lib/study-events.js";
 import { isMediaModality, isChatModality, iterationHasContent, describeRequiredContentFlag, readChatMode, readParticipantPairConfig, summarizeRoleCriteria, toModality, } from "../lib/modality.js";
 // NOTE: local-sim modules are loaded via dynamic import at the `--local`
-// branch below, NOT statically here. `local-sim/install.ts` deep-imports
-// `playwright-core/lib/server/registry/index`, which is not exposed by
-// playwright-core's `exports` map — Node refuses to resolve it during
-// module load (ERR_PACKAGE_PATH_NOT_EXPORTED), so a static import here
-// would crash *every* `ish` invocation on the npm-installed CLI, not
-// just `study run --local`. The bun-compiled binary bundles the deep
-// path so it doesn't hit Node's resolver; only the npm path is sensitive.
+// branch below, NOT statically here, so that plain API commands never pay
+// for (or crash on) playwright-core. The registry deep import inside
+// `local-sim/install.ts` is itself lazy for the same reason — see the
+// comment in `installBrowser()`.
 import { estimateChatPair, estimateChatSolo, estimateMediaRun } from "../lib/billing.js";
 import { reportReadiness } from "../lib/report-readiness.js";
 import { runChecks, scopeChecks, overall } from "./doctor.js";
@@ -324,7 +321,7 @@ export function attachStudyRunCommands(study) {
         .option("--slow-mo <ms>", "Slow down actions by ms (local mode only)")
         .option("--devtools", "Open Chrome DevTools (local mode only)")
         .option("--debug", "Enable detailed debug logging to stderr and ~/.ish/local-sim.log")
-        .option("--parallel <n>", "Run N participants in parallel (local mode only, default: all)")
+        .option("--parallel <n>", "Run N participants in parallel (local mode only). Browser: default all. Native iOS/Android: pools N auto-provisioned devices — simulators (iOS) / headless emulators from your AVDs (Android) — default 1, capped at 5, auto-sized to host RAM (and AVD count).")
         .option("--platform <platform>", "Local target platform: 'web' (Playwright), 'android' (adb emulator), or 'ios' (simctl+idb simulator). Defaults to the iteration's platform.")
         .option("--app <path>", "Native local mode: path to an .apk (android) / .app (ios) to install, or an installed package/bundle id to launch. The extension implies --platform.")
         .addHelpText("after", `
@@ -737,10 +734,6 @@ Examples:
                 }
                 log("");
             }
-            if (opts.local) {
-                const { ensureBrowser } = await import("../lib/local-sim/install.js");
-                await ensureBrowser({ quiet: globals.quiet, skipPrompt: globals.json });
-            }
             // Step 5: Either reuse the iteration's participants or batch-create new ones
             let createdParticipants;
             // Pair-mode bookkeeping: the dispatch endpoint takes
@@ -763,6 +756,13 @@ Examples:
                 ?? platformFromApp
                 ?? detailsView.platform
                 ?? "browser";
+            // Chromium is only needed for the browser local path. iOS/Android
+            // local runs drive a simulator/emulator and must not block on (or
+            // prompt for) a browser download.
+            if (opts.local && normalizePlatform(resolvedPlatform) === "browser") {
+                const { ensureBrowser } = await import("../lib/local-sim/install.js");
+                await ensureBrowser({ quiet: globals.quiet, skipPrompt: globals.json });
+            }
             // Best-effort native-readiness report. When this is a LOCAL native run
             // (iOS/Android driven on this developer's machine), fire-and-forget a
             // fresh, platform-scoped `runChecks()` to the backend so the web app

package/dist/commands/study-screenshots.js CHANGED Viewed

@@ -23,14 +23,16 @@ import { resolveId } from "../lib/alias-store.js";
 import { output, printTable } from "../lib/output.js";
 import { ApiError } from "../lib/api-client.js";
 /**
- * Server-side screenshots are produced by remote interactive runs only. A
- * study whose only runs were local (`ish study run --local`) has none — and the
- * grouped endpoint currently 500s instead of returning an empty index. Tag this
- * hint onto the error so the bare 500 points the user at the local debug report.
+ * The frame-grouped screenshot INDEX (`/screenshots/grouped`) is a remote-run
+ * artifact — it groups by frame_version_id, which local runs don't create — and
+ * the endpoint currently 500s for a local-only study instead of returning an
+ * empty index. Local (`--local`) runs DO still capture per-interaction
+ * screenshots; they just live on the participant rows, not in this index. Tag
+ * this hint onto the error so the bare 500 points the user at where they ARE.
  */
 const LOCAL_RUN_SCREENSHOT_HINT = [
-    "Screenshots are produced by remote runs only.",
-    "Ran this study locally (--local)? The per-step screenshots are in the HTML debug report under ~/.ish/debug/ (path printed at the end of each local run).",
+    "The frame-grouped screenshot index is a remote-run artifact (this endpoint may 500 for local-only studies).",
+    "Ran this study locally (--local)? Per-interaction screenshots ARE captured — read them via `ish study get <id>` (each interaction carries a screenshot_url), or open the per-step HTML debug report under ~/.ish/debug/ (path printed at the end of each local run).",
 ];
 /**
  * GET the frame-grouped screenshot index, tagging the local-run hint onto any
@@ -136,12 +138,13 @@ Examples:
   $ ish study screenshots download <study-id> --id <scid> --out shot.png
   $ ish study screenshots download <study-id> --all --out ./shots/
-Screenshots are produced server-side by remote interactive runs only — chat /
-video / text studies don't have them, and neither do local runs
-(\`ish study run --local\`), which instead write a per-step HTML debug report to
-~/.ish/debug/ (the path is printed at the end of each local run). Each row's
-storage URL is self-credentialed, so the CLI fetches bytes without forwarding
-your bearer.`);
+This frame-grouped index is built by remote interactive runs — chat / video /
+text studies don't populate it, and neither do local runs (\`ish study run
+--local\`). Local runs still CAPTURE per-interaction screenshots: read them via
+\`ish study get <id>\` (each interaction carries a screenshot_url) or the per-step
+HTML debug report under ~/.ish/debug/ (path printed at the end of each local
+run). Each row's storage URL is self-credentialed, so the CLI fetches bytes
+without forwarding your bearer.`);
     screenshots
         .command("list", { isDefault: true })
         .description("List screenshots for a study (frame-grouped).")

package/dist/commands/study.js CHANGED Viewed

@@ -350,8 +350,20 @@ Next: configure a run with \`ish iteration create --study <id>\`,
                 validateSegmentation(inlineMediaExtras.segmentation);
                 warnIfOverSegmented(inlineMediaExtras.segmentation, { quiet: globals.quietExplicit });
             }
+            let inlineContentHtml;
+            if (opts.contentHtml) {
+                inlineContentHtml = opts.contentHtml.startsWith("@")
+                    ? readFileSync(opts.contentHtml.slice(1), "utf8")
+                    : opts.contentHtml;
+                // The study does not exist yet here, so we cannot archive remote
+                // images onto workspace storage (the render worker egress-denies
+                // other origins). Point the operator at the archive-capable flow.
+                if (/<img\b[^>]*\bsrc\s*=\s*["']https?:\/\//i.test(inlineContentHtml) && !globals.quietExplicit) {
+                    process.stderr.write("Note: --content-html has remote <img> images, which `study create` cannot archive (the study does not exist yet) — they will not render. To archive them, run `ish study create` without content, then `ish iteration create --content-html ...`.\n");
+                }
+            }
             const inlineEmailExtras = {
-                ...(opts.contentHtml && { content_html: opts.contentHtml.startsWith("@") ? readFileSync(opts.contentHtml.slice(1), "utf8") : opts.contentHtml }),
+                ...(inlineContentHtml !== undefined && { content_html: inlineContentHtml }),
                 ...(opts.senderName && { sender_name: opts.senderName }),
                 ...(opts.senderEmail && { sender_email: opts.senderEmail }),
                 ...(opts.featuredImageUrl && { featured_image_url: opts.featuredImageUrl }),
@@ -1244,13 +1256,20 @@ checklists ("steps") ride along when present in the JSON forms
         if (!id) {
             throw new Error("Provide a study alias or UUID, or use --clear.");
         }
-        await withClient(cmd, async (client) => {
+        await withClient(cmd, async (client, globals) => {
             const rid = resolveId(id);
             const data = await client.get(`/studies/${rid}`);
             const config = loadConfig();
             config.study = rid;
             saveConfig(config);
-            console.error(`Active study set to "${data.name || rid}".`);
+            // stdout = data: emit a JSON object so `study use --json` is capturable
+            // (e.g. `--get alias`); the human confirmation stays on stderr.
+            if (globals.json) {
+                output({ id: rid, alias: tagAlias(ALIAS_PREFIX.study, rid), name: data.name ?? null, active: true }, true, { writePath: true });
+            }
+            else {
+                console.error(`Active study set to "${data.name || rid}".`);
+            }
         });
     });
     attachStudyRunCommands(study);

package/dist/lib/docs.js CHANGED Viewed

@@ -381,12 +381,19 @@ ish iteration create --platform figma --url https://figma.com/proto \\
     --flow-name "Onboarding A"
 # Native app (ios / android): --app names the target, stored as app_artifact (no URL).
+# screen_format defaults to mobile_portrait for native (vs desktop for browser).
 ish iteration create --platform ios --app com.example.app
 ish iteration create --platform ios            # --app optional; "chosen at run time"
 # drive it locally against a booted simulator / emulator — the iteration
 # remembers the app, so no --app needed on reruns:
 ish study run --local
 ish study run --local --app ./Build.app        # override with a fresh local build
+# State reset between participants: with a local .app build the runner does a
+# clean uninstall+reinstall before each participant, so state one participant
+# creates (a reminder, a saved record) does NOT leak into the next. A bare
+# bundle-id / system-app target can't be reinstalled — it relaunches and warns
+# that state may persist; pass --app <.app> or run one participant per study for
+# a guaranteed clean start. See guides/native-app.
 # Text/email content from a file:
 ish iteration create --content-text @./email.html --title "Newsletter"
@@ -2000,14 +2007,22 @@ Interactive study runs produce per-frame screenshots server-side. They
 let you (or an agent) see what participants actually saw alongside the
 sentiment summary.
-## Screenshots — remote interactive studies only
+## Screenshots — the grouped index vs. per-interaction frames
-Screenshots are produced by remote interactive runs only — chat / video /
-text studies don't have them. **Local runs** (\`ish study run --local\`,
-including ios/android) don't push screenshots to the server either; they
-write a per-step HTML debug report to \`~/.ish/debug/sim-*.html\` (the path is
-printed at the end of the run). \`ish study screenshots list\` on a local-only
-study therefore returns nothing useful — open the debug report instead.
+\`ish study screenshots list\` reads the **frame-grouped index**
+(\`/screenshots/grouped\`), a remote-interactive-run artifact keyed by frame —
+chat / video / text studies don't populate it, and neither do local runs. On a
+local-only study this endpoint currently 500s rather than returning an empty
+index, so \`screenshots list\` isn't the way to view a local run.
+**Local runs DO still capture per-interaction screenshots** (\`ish study run
+--local\`, including ios/android). They live on the participant interaction rows,
+not in the grouped index — read them two ways:
+- \`ish study get <id>\` — each interaction carries a \`screenshot_url\` (a public
+  storage URL you can fetch directly).
+- the per-step HTML debug report at \`~/.ish/debug/sim-*.html\` (path printed at
+  the end of each local run).
 ### CLI
@@ -2180,6 +2195,12 @@ The CLI guarantees these contracts so agents can chain safely:
   \`--fields\` set, you can identify the affected resource. Default
   write-path JSON is compact (\`{id, alias, name, updated_at,
   ...changed_fields}\`); pass \`--verbose\` for the full server payload.
+- **Active-context setters emit a capturable object on stdout.** The
+  \`use\` commands (\`study use\`, \`workspace use\`, \`ask use\`) write their
+  human "Active … set to …" confirmation to **stderr** and, under
+  \`--json\`, an \`{id, alias, name, active}\` object to **stdout** — so
+  \`ish study use s-b2c --json --get alias\` is capturable. (\`--clear\`
+  is a stderr-only confirmation.)
 - **Write-path echoes keep collection arrays even when empty.** On a
   create/update echo (e.g. \`study create\`/\`study update\`), entity
   collections like \`assignments\`, \`interview_questions\`, and
@@ -3211,6 +3232,7 @@ request time, for any client, is the backend's \`TIER_LIMITS\` dict in
 | \`maxCustomPersons\`          | 3    | 10    | 10      | ∞   | ∞          |
 | \`maxConcurrentParticipants\` | 3    | 3     | 10      | 50  | ∞          |
 | \`maxWorkspaceMembers\`       | 1    | 1     | 1       | 10  | ∞          |
+| \`maxSeats\`                  | 1    | 1     | 1       | 10  | ∞          |
 Commands that may hit a limit: \`ish workspace create\`,
 \`ish study create\`, \`ish study generate\`, \`ish iteration create\`,
@@ -4340,6 +4362,110 @@ The viewer is only as good as the run behind it. Before sharing, make sure:
 - \`concepts/active-context\` — \`ish study share\` defaults to the active study.
 - \`reference/json-mode\` — the \`{ token, share_url, … }\` envelope.
 `;
+const GUIDE_NATIVE_APP = `# guide: native app studies (ios / android)
+Run an interactive study against a **native iOS or Android app** on a local
+simulator/emulator, driven step-by-step by AI participants — the native
+counterpart of a browser (URL) interactive study.
+## 1. Check the local toolchain
+\`\`\`
+ish check ios        # Xcode/xcrun, a booted simulator, the WDA runner, auth
+ish check android    # adb, a running emulator
+ish setup            # fetch/install whatever's missing (WDA runner, etc.)
+\`\`\`
+\`check\` must be green before a run — it verifies the whole chain (simulator
+booted → automation runner installed → logged in), so you don't discover a
+missing piece mid-run.
+## 2. Create the study, then a native iteration
+The study (assignments + questionnaire) is platform-agnostic. The **iteration**
+names the platform and the app:
+\`\`\`
+ish study create --name "Onboarding" --modality interactive \\
+    --assignment "Explore:Open the app and look around" \\
+    --question "How clear was it?"
+# --app is a bundle id (already-installed / system app) OR a local .app/.apk path.
+ish iteration create --platform ios --app com.acme.app          # installed bundle id
+ish iteration create --platform ios --app ./Build/MyApp.app     # local build (installed for you)
+ish iteration create --platform android --app ./app-debug.apk
+\`\`\`
+The target is stored as \`app_artifact\`; no \`--url\` is needed. \`screen_format\`
+defaults to **mobile_portrait** for native (vs desktop for browser).
+## 3. Run locally
+\`\`\`
+ish study run --local --platform ios --person p-913 --wait
+ish study run --local --platform ios --all --wait        # whole matching cohort
+\`\`\`
+The platform defaults to the iteration's; \`--app\` on \`study run\` overrides the
+stored target with a fresh local build. The WebDriverAgent runner cold-starts
+slowly the first time (~30-60s) and is then reused across participants.
+## 3b. Parallel runs — \`--parallel N\` (iOS + Android)
+\`\`\`
+ish study run --local --platform ios     --all --parallel 5 --wait
+ish study run --local --platform android --all --parallel 5 --wait
+\`\`\`
+Native runs can drive a **pool of N devices** at once, one participant per
+device:
+- **iOS** reuses any booted simulators and **auto-creates + boots** the
+  shortfall, then deletes the simulators it created (reused ones are left alone).
+- **Android** reuses online emulators and **auto-launches headless emulators**
+  (tuned low-RAM), then stops the ones it started. You only need **one AVD**: the
+  pool **clones it** (a fast file-copy — no avdmanager/JDK needed) to as many as
+  it needs, and deletes the clones afterward. Make one AVD in Android Studio ›
+  Device Manager.
+N is **auto-sized to the host's RAM** —
+default 1, capped at 5. A small machine runs fewer concurrently and queues the
+rest rather than erroring, so the same command works everywhere, scaled to the
+machine. Each participant still gets a clean device per the reset rules below.
+## 4. State reset between participants (important)
+Participants share one simulator, run **sequentially** for native. A
+terminate+relaunch alone does NOT clear app data, so:
+- **Local \`.app\`/\`.apk\` build** → the runner does a clean **uninstall+reinstall**
+  before each participant. State one participant creates (a saved record, a new
+  reminder) does NOT leak into the next.
+- **Bare bundle-id / system app** (e.g. \`com.apple.reminders\`) → can't be
+  reinstalled, so it relaunches and **warns once** that earlier-participant state
+  may persist. For a guaranteed clean start, pass \`--app <.app>\` or run one
+  participant per study.
+## 5. Locale / keyboard
+The simulator uses the host machine's keyboard locale. A non-English keyboard
+can derail text entry — pin a locale on the iteration (\`--locale en-US\`) for
+reproducible runs.
+## 6. Results, screenshots, transcripts
+- \`ish study results <id>\` — sentiment + interview answers, same as any study.
+- **Per-interaction screenshots** are captured even for local runs — read them
+  via \`ish study get <id>\` (each interaction carries a \`screenshot_url\`) or the
+  per-step HTML debug report at \`~/.ish/debug/sim-*.html\` (path printed at the
+  end of the run). Note \`ish study screenshots list\` reads the *remote-run*
+  frame index and won't show local frames — see reference/screenshots.
+## Related
+- \`concepts/iteration\` — \`app_artifact\`, screen_format, platforms.
+- \`reference/screenshots\` — grouped index vs per-interaction frames.
+- \`guides/first-study\` — the browser-URL version of this flow.
+`;
 const PAGES = [
     {
         slug: "overview",
@@ -4503,6 +4629,12 @@ const PAGES = [
         description: "Iterative probe loop for one specific persona: person suggest-scenarios returns LLM probes; answer them locally; person evidence add persists answers; person evidence list reads them back.",
         body: GUIDE_BUILD_SPECIFIC_PERSON,
     },
+    {
+        slug: "guides/native-app",
+        title: "guide: native app studies (ios / android)",
+        description: "Run an interactive study against a native iOS or Android app on a local simulator/emulator: check ios/android, create a --platform ios/android iteration with --app (bundle id or .app/.apk), run --local, per-participant state reset, locale/keyboard, and where local screenshots live.",
+        body: GUIDE_NATIVE_APP,
+    },
     {
         slug: "guides/mcp-add",
         title: "guide: wire ish into your AI clients (`ish mcp add`)",

package/dist/lib/local-sim/adb.d.ts CHANGED Viewed

@@ -10,12 +10,16 @@
  * backend's 0-1000 coordinates against the screencap pixel size and taps
  * directly. (Verified by the Layer-1 driver smoke; see scripts/mobile-e2e.)
  */
+/** Run `fn` with all its adb calls pinned to `serial` (parallel pool path). */
+export declare function withAdbSerial<T>(serial: string | undefined, fn: () => Promise<T>): Promise<T>;
+/** `["-s", serial]` or `[]` — the device-targeting prefix. Pure (tested). */
+export declare function serialArgs(serial: string | undefined): string[];
 /** Resolve adb, downloading Google's platform-tools on first use if not found. */
 export declare function ensureAdb(): Promise<string>;
 export declare class AdbError extends Error {
     constructor(message: string);
 }
-/** Run `adb <args>` and return trimmed stdout. Throws AdbError on failure. */
+/** Run `adb [-s serial] <args>` and return trimmed stdout. Throws AdbError on failure. */
 export declare function adb(args: string[], timeoutMs?: number): Promise<string>;
 /** Run `adb shell <args>` and return trimmed stdout. */
 export declare function adbShell(args: string[], timeoutMs?: number): Promise<string>;
@@ -62,7 +66,20 @@ export declare function currentActivity(): Promise<string>;
  * output. Returns the PNG buffer at full device resolution.
  */
 export declare function screencapPng(): Promise<Buffer>;
-/** Assert exactly one device/emulator is in the `device` state. */
+/**
+ * Parse `adb devices` output into {serial, state} rows. Pure (tested). Skips the
+ * "List of devices attached" header and blank lines.
+ */
+export declare function parseAdbDevices(out: string): Array<{
+    serial: string;
+    state: string;
+}>;
+/** List online (state==="device") serials. */
+export declare function listOnlineSerials(): Promise<string[]>;
+/**
+ * Assert the target device is online. With a serial in effect (pool path or
+ * ANDROID_SERIAL), confirm THAT serial is online. Otherwise require exactly one.
+ */
 export declare function requireOneDevice(): Promise<void>;
 export declare function inputTap(x: number, y: number): Promise<void>;
 export declare function inputSwipe(x1: number, y1: number, x2: number, y2: number, durationMs?: number): Promise<void>;

package/dist/lib/local-sim/adb.js CHANGED Viewed

@@ -14,8 +14,32 @@ import { execFile, execFileSync } from "node:child_process";
 import { existsSync, mkdirSync, writeFileSync, rmSync } from "node:fs";
 import { join } from "node:path";
 import { promisify } from "node:util";
+import { AsyncLocalStorage } from "node:async_hooks";
 import { binDir, adbBin } from "../paths.js";
 const execFileAsync = promisify(execFile);
+/**
+ * The adb serial to target for the current async call chain. A parallel run
+ * drives N emulators in ONE process; every adb call must hit the right device,
+ * but the CLI targets devices via the `adb -s <serial>` prefix, not a per-call
+ * argument threaded through ~25 functions. AsyncLocalStorage carries the serial
+ * implicitly through the call stack so `adb()` / `screencapPng()` pick it up,
+ * and two concurrent `withAdbSerial(A, …)` / `withAdbSerial(B, …)` chains stay
+ * isolated. Single-device runs leave the store empty and fall back to
+ * ANDROID_SERIAL / the one online device (unchanged behavior).
+ */
+const serialStore = new AsyncLocalStorage();
+/** Run `fn` with all its adb calls pinned to `serial` (parallel pool path). */
+export function withAdbSerial(serial, fn) {
+    return serialStore.run(serial?.trim() || undefined, fn);
+}
+/** The serial in effect for this call chain: store → ANDROID_SERIAL → none. */
+function activeSerial() {
+    return serialStore.getStore() ?? (process.env.ANDROID_SERIAL?.trim() || undefined);
+}
+/** `["-s", serial]` or `[]` — the device-targeting prefix. Pure (tested). */
+export function serialArgs(serial) {
+    return serial ? ["-s", serial] : [];
+}
 // Resolve adb without depending on the caller's PATH: ISH_ADB/ADB override → the
 // Android SDK → Homebrew → our own download cache → PATH. If none is found,
 // ensureAdb() fetches Google's standalone platform-tools (a small zip) into
@@ -105,11 +129,12 @@ export class AdbError extends Error {
         this.name = "AdbError";
     }
 }
-/** Run `adb <args>` and return trimmed stdout. Throws AdbError on failure. */
+/** Run `adb [-s serial] <args>` and return trimmed stdout. Throws AdbError on failure. */
 export async function adb(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
     const bin = await ensureAdb();
+    const full = [...serialArgs(activeSerial()), ...args];
     try {
-        const { stdout } = await execFileAsync(bin, args, {
+        const { stdout } = await execFileAsync(bin, full, {
             timeout: timeoutMs,
             maxBuffer: 4 * 1024 * 1024,
         });
@@ -117,7 +142,7 @@ export async function adb(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
     }
     catch (err) {
         const msg = err instanceof Error ? err.message : String(err);
-        throw new AdbError(`adb ${args.join(" ")} failed: ${msg}`);
+        throw new AdbError(`adb ${full.join(" ")} failed: ${msg}`);
     }
 }
 /** Run `adb shell <args>` and return trimmed stdout. */
@@ -191,8 +216,9 @@ export async function currentActivity() {
  */
 export async function screencapPng() {
     const bin = await ensureAdb();
+    const full = [...serialArgs(activeSerial()), "exec-out", "screencap", "-p"];
     try {
-        const { stdout } = await execFileAsync(bin, ["exec-out", "screencap", "-p"], {
+        const { stdout } = await execFileAsync(bin, full, {
             timeout: SCREENCAP_TIMEOUT_MS,
             maxBuffer: SCREENCAP_MAX_BUFFER,
             encoding: "buffer",
@@ -204,40 +230,62 @@ export async function screencapPng() {
         throw new AdbError(`adb exec-out screencap failed: ${msg}`);
     }
 }
-/** Assert exactly one device/emulator is in the `device` state. */
+/**
+ * Parse `adb devices` output into {serial, state} rows. Pure (tested). Skips the
+ * "List of devices attached" header and blank lines.
+ */
+export function parseAdbDevices(out) {
+    return out
+        .split("\n")
+        .slice(1)
+        .map((l) => l.trim())
+        .filter(Boolean)
+        .map((l) => {
+        const [serial, state] = l.split("\t");
+        return { serial: serial ?? "", state: state ?? "" };
+    })
+        .filter((d) => d.serial);
+}
+/** `adb devices` WITHOUT a serial prefix (the list is global, not per-device). */
+async function devicesRaw() {
+    const bin = await ensureAdb();
+    const { stdout } = await execFileAsync(bin, ["devices"], {
+        timeout: DEFAULT_TIMEOUT_MS,
+        maxBuffer: 1024 * 1024,
+    });
+    return stdout.trim();
+}
+/** List online (state==="device") serials. */
+export async function listOnlineSerials() {
+    return parseAdbDevices(await devicesRaw())
+        .filter((d) => d.state === "device")
+        .map((d) => d.serial);
+}
+/**
+ * Assert the target device is online. With a serial in effect (pool path or
+ * ANDROID_SERIAL), confirm THAT serial is online. Otherwise require exactly one.
+ */
 export async function requireOneDevice() {
-    let out;
+    let online;
     try {
-        out = await adb(["devices"]);
+        online = await listOnlineSerials();
     }
     catch (err) {
         const msg = err instanceof Error ? err.message : String(err);
         throw new AdbError(`Could not run adb (looked for "${findAdb() ?? "adb"}"). Run \`ish check android\` to check your setup. ${msg}`);
     }
-    // Output: "List of devices attached\n<serial>\tdevice\n..."
-    const online = out
-        .split("\n")
-        .slice(1)
-        .map((l) => l.trim())
-        .filter((l) => l && l.endsWith("\tdevice"));
     if (online.length === 0) {
         throw new AdbError("No Android device/emulator online. Run `ish check android` to check your setup and how to boot one.");
     }
-    // Honor ANDROID_SERIAL (the standard adb convention): when it names an online
-    // device, pin to it instead of failing on "more than one device". The adb
-    // wrapper inherits process.env, so every subsequent `adb` call already targets
-    // that serial — this lets multiple emulators run in parallel, each driven by a
-    // CLI invocation with its own ANDROID_SERIAL.
-    const pinned = process.env.ANDROID_SERIAL?.trim();
+    const pinned = activeSerial();
     if (pinned) {
-        if (online.some((l) => l.startsWith(`${pinned}\t`)))
+        if (online.includes(pinned))
             return;
-        throw new AdbError(`ANDROID_SERIAL=${pinned} is set but that device is not online. ` +
-            `Online: ${online.map((l) => l.split("\t")[0]).join(", ") || "none"}.`);
+        throw new AdbError(`Android device ${pinned} is not online. Online: ${online.join(", ") || "none"}.`);
     }
     if (online.length > 1) {
         throw new AdbError(`Expected exactly one Android device, found ${online.length}. ` +
-            `Stop the extras, or set ANDROID_SERIAL=<serial> to pin one (parallel runs).`);
+            `Stop the extras, or run with --parallel to pool them.`);
     }
 }
 // --- Input gestures (all in screencap pixel space) ---