@ishlabs/cli 0.27.0 → 0.27.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -45,6 +45,10 @@ The CLI resolves your auth token in this order:
45
45
 
46
46
  Test plan is available at `/Users/felixweiland/ish-cli-test-plan.md`.
47
47
 
48
+ ## Experiments
49
+
50
+ Durable records of engineering experiments (including reverted ones, so we don't re-run them) live in [`docs/experiments/`](docs/experiments/README.md).
51
+
48
52
  ---
49
53
 
50
54
  ## Concepts
@@ -125,13 +125,16 @@ async function checkSimulator() {
125
125
  catch {
126
126
  return { key: "ios_simulator", name: "iOS simulator", group: "iOS", status: "fail", message: "could not parse simctl output" };
127
127
  }
128
- if (booted.length === 1) {
129
- return { key: "ios_simulator", name: "iOS simulator", group: "iOS", status: "pass", message: `${booted[0].name} (booted)` };
128
+ if (booted.length >= 1) {
129
+ // >1 booted is fine now: a parallel run (`--parallel N`) reuses booted
130
+ // simulators and clones the shortfall, so extras are a head start, not a
131
+ // problem. A single-device (non-parallel) run still needs exactly one.
132
+ const extra = booted.length > 1
133
+ ? ` (+${booted.length - 1} more — parallel runs pool them; a single-device run needs exactly one)`
134
+ : "";
135
+ return { key: "ios_simulator", name: "iOS simulator", group: "iOS", status: "pass", message: `${booted[0].name} (booted)${extra}` };
130
136
  }
131
- if (booted.length === 0) {
132
- return { key: "ios_simulator", name: "iOS simulator", group: "iOS", status: "warn", message: "none booted", fix: "Open Simulator.app (Xcode ships simulators) or `xcrun simctl boot <udid>`" };
133
- }
134
- return { key: "ios_simulator", name: "iOS simulator", group: "iOS", status: "warn", message: `${booted.length} booted — native runs drive exactly one`, fix: "Shut down the extras" };
137
+ return { key: "ios_simulator", name: "iOS simulator", group: "iOS", status: "warn", message: "none booted", fix: "Open Simulator.app (Xcode ships simulators) or `xcrun simctl boot <udid>`" };
135
138
  }
136
139
  /** True when the prebuilt XCUITest runner (WebDriverAgent `.app`) is present. */
137
140
  function wdaBundlePresent() {
@@ -172,11 +175,18 @@ async function checkAdb() {
172
175
  .slice(1)
173
176
  .map((l) => l.trim())
174
177
  .filter((l) => l.endsWith("\tdevice"));
175
- const emulator = devices.length === 1
176
- ? { key: "android_emulator", name: "Android emulator", group: "Android", status: "pass", message: devices[0].split("\t")[0] }
177
- : devices.length === 0
178
- ? { key: "android_emulator", name: "Android emulator", group: "Android", status: "warn", message: "none online", fix: "Create + boot an AVD in Android Studio > Device Manager (or `emulator -avd <name>`)" }
179
- : { key: "android_emulator", name: "Android emulator", group: "Android", status: "warn", message: `${devices.length} online — native runs drive exactly one`, fix: "Stop the extras" };
178
+ // >1 online is fine now: `--parallel` pools emulators (and auto-launches more
179
+ // from your AVDs). A single-device (non-parallel) run still uses exactly one.
180
+ const emulator = devices.length >= 1
181
+ ? {
182
+ key: "android_emulator",
183
+ name: "Android emulator",
184
+ group: "Android",
185
+ status: "pass",
186
+ message: devices[0].split("\t")[0] +
187
+ (devices.length > 1 ? ` (+${devices.length - 1} more — parallel runs pool them)` : ""),
188
+ }
189
+ : { key: "android_emulator", name: "Android emulator", group: "Android", status: "warn", message: "none online (parallel runs auto-launch your AVDs)", fix: "Create an AVD in Android Studio > Device Manager (or `emulator -avd <name>`)" };
180
190
  return { adb, emulator };
181
191
  }
182
192
  async function checkChromium() {
@@ -9,7 +9,7 @@ import { readFileSync } from "node:fs";
9
9
  import { withClient, resolveStudy, resolveWorkspace, readFileOrStdin, collectIds } from "../lib/command-helpers.js";
10
10
  import { resolveId, tagAlias, ALIAS_PREFIX } from "../lib/alias-store.js";
11
11
  import { output, formatIterationList, ValidationError } from "../lib/output.js";
12
- import { resolveContentUrl, resolveContentUrls, resolveTextContent } from "../lib/upload.js";
12
+ import { resolveContentUrl, resolveContentUrls, resolveTextContent, archiveHtmlImages } from "../lib/upload.js";
13
13
  import { isMediaModality, validateIterationDetails, normalizeChatMode, validateRoleCriteria } from "../lib/modality.js";
14
14
  import { validateSegmentation, warnIfOverSegmented } from "../lib/segmentation.js";
15
15
  import { normalizeEnumValue, SCREEN_FORMATS } from "../lib/enums.js";
@@ -297,7 +297,9 @@ function buildIterationDetails(modality, opts) {
297
297
  if (opts.platform === "figma" && (!opts.fileKey || !opts.startNodeId)) {
298
298
  throw new Error("Figma interactive iterations require both --file-key and --start-node-id.");
299
299
  }
300
- let screenFormat = "desktop";
300
+ // Native (ios/android) targets are phones — default to mobile_portrait
301
+ // rather than desktop. An explicit --screen-format still wins below.
302
+ let screenFormat = isNativePlatform(opts.platform) ? "mobile_portrait" : "desktop";
301
303
  if (opts.screenFormat !== undefined) {
302
304
  const normalized = normalizeEnumValue(opts.screenFormat, SCREEN_FORMATS);
303
305
  if (normalized === null) {
@@ -385,7 +387,7 @@ Concept pages: ish docs get-page concepts/iteration
385
387
  .option("--platform <platform>", "Platform (browser, android, ios, figma, code) — interactive only")
386
388
  .option("--url <url>", "URL to test — interactive only (optional for ios/android native apps)")
387
389
  .option("--app <id>", "Native app bundle id (or .app/.apk path) — ios/android; supplies the iteration target so --url isn't required")
388
- .option("--screen-format <format>", "Screen format (mobile_portrait, desktop) — interactive only; hyphen/underscore variants accepted")
390
+ .option("--screen-format <format>", "Screen format (mobile_portrait, desktop) — interactive only; hyphen/underscore variants accepted. Default: desktop, or mobile_portrait for native ios/android")
389
391
  .option("--locale <locale>", "Locale code (e.g. en-US) — interactive only")
390
392
  .option("--file-key <key>", "Figma file key — required when --platform=figma")
391
393
  .option("--start-node-id <id>", "Figma start node id — required when --platform=figma")
@@ -622,8 +624,15 @@ Next: \`ish study run\` to dispatch simulations against this iteration.`)
622
624
  if (isMedia) {
623
625
  if (resolved.contentText)
624
626
  resolved.contentText = resolveTextContent(resolved.contentText);
625
- if (resolved.contentHtml)
627
+ if (resolved.contentHtml) {
626
628
  resolved.contentHtml = resolveTextContent(resolved.contentHtml);
629
+ // Archive external <img> images onto workspace storage so the
630
+ // render-to-image worker (egress-denied to other origins) can
631
+ // fetch them. Mirrors the FE paste pipeline; text modality only.
632
+ if (modality === "text") {
633
+ resolved.contentHtml = await archiveHtmlImages(client, studyId, resolved.contentHtml, { quiet: globals.quiet });
634
+ }
635
+ }
627
636
  if (resolved.copyText)
628
637
  resolved.copyText = resolveTextContent(resolved.copyText);
629
638
  if (resolved.copyHtml)
@@ -16,13 +16,10 @@ import { fetchStudyParticipants } from "../lib/study-participants.js";
16
16
  import { streamStudyEvents } from "../lib/study-events.js";
17
17
  import { isMediaModality, isChatModality, iterationHasContent, describeRequiredContentFlag, readChatMode, readParticipantPairConfig, summarizeRoleCriteria, toModality, } from "../lib/modality.js";
18
18
  // NOTE: local-sim modules are loaded via dynamic import at the `--local`
19
- // branch below, NOT statically here. `local-sim/install.ts` deep-imports
20
- // `playwright-core/lib/server/registry/index`, which is not exposed by
21
- // playwright-core's `exports` map Node refuses to resolve it during
22
- // module load (ERR_PACKAGE_PATH_NOT_EXPORTED), so a static import here
23
- // would crash *every* `ish` invocation on the npm-installed CLI, not
24
- // just `study run --local`. The bun-compiled binary bundles the deep
25
- // path so it doesn't hit Node's resolver; only the npm path is sensitive.
19
+ // branch below, NOT statically here, so that plain API commands never pay
20
+ // for (or crash on) playwright-core. The registry deep import inside
21
+ // `local-sim/install.ts` is itself lazy for the same reason — see the
22
+ // comment in `installBrowser()`.
26
23
  import { estimateChatPair, estimateChatSolo, estimateMediaRun } from "../lib/billing.js";
27
24
  import { reportReadiness } from "../lib/report-readiness.js";
28
25
  import { runChecks, scopeChecks, overall } from "./doctor.js";
@@ -324,7 +321,7 @@ export function attachStudyRunCommands(study) {
324
321
  .option("--slow-mo <ms>", "Slow down actions by ms (local mode only)")
325
322
  .option("--devtools", "Open Chrome DevTools (local mode only)")
326
323
  .option("--debug", "Enable detailed debug logging to stderr and ~/.ish/local-sim.log")
327
- .option("--parallel <n>", "Run N participants in parallel (local mode only, default: all)")
324
+ .option("--parallel <n>", "Run N participants in parallel (local mode only). Browser: default all. Native iOS/Android: pools N auto-provisioned devices — simulators (iOS) / headless emulators from your AVDs (Android) — default 1, capped at 5, auto-sized to host RAM (and AVD count).")
328
325
  .option("--platform <platform>", "Local target platform: 'web' (Playwright), 'android' (adb emulator), or 'ios' (simctl+idb simulator). Defaults to the iteration's platform.")
329
326
  .option("--app <path>", "Native local mode: path to an .apk (android) / .app (ios) to install, or an installed package/bundle id to launch. The extension implies --platform.")
330
327
  .addHelpText("after", `
@@ -737,10 +734,6 @@ Examples:
737
734
  }
738
735
  log("");
739
736
  }
740
- if (opts.local) {
741
- const { ensureBrowser } = await import("../lib/local-sim/install.js");
742
- await ensureBrowser({ quiet: globals.quiet, skipPrompt: globals.json });
743
- }
744
737
  // Step 5: Either reuse the iteration's participants or batch-create new ones
745
738
  let createdParticipants;
746
739
  // Pair-mode bookkeeping: the dispatch endpoint takes
@@ -763,6 +756,13 @@ Examples:
763
756
  ?? platformFromApp
764
757
  ?? detailsView.platform
765
758
  ?? "browser";
759
+ // Chromium is only needed for the browser local path. iOS/Android
760
+ // local runs drive a simulator/emulator and must not block on (or
761
+ // prompt for) a browser download.
762
+ if (opts.local && normalizePlatform(resolvedPlatform) === "browser") {
763
+ const { ensureBrowser } = await import("../lib/local-sim/install.js");
764
+ await ensureBrowser({ quiet: globals.quiet, skipPrompt: globals.json });
765
+ }
766
766
  // Best-effort native-readiness report. When this is a LOCAL native run
767
767
  // (iOS/Android driven on this developer's machine), fire-and-forget a
768
768
  // fresh, platform-scoped `runChecks()` to the backend so the web app
@@ -23,14 +23,16 @@ import { resolveId } from "../lib/alias-store.js";
23
23
  import { output, printTable } from "../lib/output.js";
24
24
  import { ApiError } from "../lib/api-client.js";
25
25
  /**
26
- * Server-side screenshots are produced by remote interactive runs only. A
27
- * study whose only runs were local (`ish study run --local`) has none — and the
28
- * grouped endpoint currently 500s instead of returning an empty index. Tag this
29
- * hint onto the error so the bare 500 points the user at the local debug report.
26
+ * The frame-grouped screenshot INDEX (`/screenshots/grouped`) is a remote-run
27
+ * artifact it groups by frame_version_id, which local runs don't create — and
28
+ * the endpoint currently 500s for a local-only study instead of returning an
29
+ * empty index. Local (`--local`) runs DO still capture per-interaction
30
+ * screenshots; they just live on the participant rows, not in this index. Tag
31
+ * this hint onto the error so the bare 500 points the user at where they ARE.
30
32
  */
31
33
  const LOCAL_RUN_SCREENSHOT_HINT = [
32
- "Screenshots are produced by remote runs only.",
33
- "Ran this study locally (--local)? The per-step screenshots are in the HTML debug report under ~/.ish/debug/ (path printed at the end of each local run).",
34
+ "The frame-grouped screenshot index is a remote-run artifact (this endpoint may 500 for local-only studies).",
35
+ "Ran this study locally (--local)? Per-interaction screenshots ARE captured — read them via `ish study get <id>` (each interaction carries a screenshot_url), or open the per-step HTML debug report under ~/.ish/debug/ (path printed at the end of each local run).",
34
36
  ];
35
37
  /**
36
38
  * GET the frame-grouped screenshot index, tagging the local-run hint onto any
@@ -136,12 +138,13 @@ Examples:
136
138
  $ ish study screenshots download <study-id> --id <scid> --out shot.png
137
139
  $ ish study screenshots download <study-id> --all --out ./shots/
138
140
 
139
- Screenshots are produced server-side by remote interactive runs only — chat /
140
- video / text studies don't have them, and neither do local runs
141
- (\`ish study run --local\`), which instead write a per-step HTML debug report to
142
- ~/.ish/debug/ (the path is printed at the end of each local run). Each row's
143
- storage URL is self-credentialed, so the CLI fetches bytes without forwarding
144
- your bearer.`);
141
+ This frame-grouped index is built by remote interactive runs — chat / video /
142
+ text studies don't populate it, and neither do local runs (\`ish study run
143
+ --local\`). Local runs still CAPTURE per-interaction screenshots: read them via
144
+ \`ish study get <id>\` (each interaction carries a screenshot_url) or the per-step
145
+ HTML debug report under ~/.ish/debug/ (path printed at the end of each local
146
+ run). Each row's storage URL is self-credentialed, so the CLI fetches bytes
147
+ without forwarding your bearer.`);
145
148
  screenshots
146
149
  .command("list", { isDefault: true })
147
150
  .description("List screenshots for a study (frame-grouped).")
@@ -350,8 +350,20 @@ Next: configure a run with \`ish iteration create --study <id>\`,
350
350
  validateSegmentation(inlineMediaExtras.segmentation);
351
351
  warnIfOverSegmented(inlineMediaExtras.segmentation, { quiet: globals.quietExplicit });
352
352
  }
353
+ let inlineContentHtml;
354
+ if (opts.contentHtml) {
355
+ inlineContentHtml = opts.contentHtml.startsWith("@")
356
+ ? readFileSync(opts.contentHtml.slice(1), "utf8")
357
+ : opts.contentHtml;
358
+ // The study does not exist yet here, so we cannot archive remote
359
+ // images onto workspace storage (the render worker egress-denies
360
+ // other origins). Point the operator at the archive-capable flow.
361
+ if (/<img\b[^>]*\bsrc\s*=\s*["']https?:\/\//i.test(inlineContentHtml) && !globals.quietExplicit) {
362
+ process.stderr.write("Note: --content-html has remote <img> images, which `study create` cannot archive (the study does not exist yet) — they will not render. To archive them, run `ish study create` without content, then `ish iteration create --content-html ...`.\n");
363
+ }
364
+ }
353
365
  const inlineEmailExtras = {
354
- ...(opts.contentHtml && { content_html: opts.contentHtml.startsWith("@") ? readFileSync(opts.contentHtml.slice(1), "utf8") : opts.contentHtml }),
366
+ ...(inlineContentHtml !== undefined && { content_html: inlineContentHtml }),
355
367
  ...(opts.senderName && { sender_name: opts.senderName }),
356
368
  ...(opts.senderEmail && { sender_email: opts.senderEmail }),
357
369
  ...(opts.featuredImageUrl && { featured_image_url: opts.featuredImageUrl }),
@@ -1244,13 +1256,20 @@ checklists ("steps") ride along when present in the JSON forms
1244
1256
  if (!id) {
1245
1257
  throw new Error("Provide a study alias or UUID, or use --clear.");
1246
1258
  }
1247
- await withClient(cmd, async (client) => {
1259
+ await withClient(cmd, async (client, globals) => {
1248
1260
  const rid = resolveId(id);
1249
1261
  const data = await client.get(`/studies/${rid}`);
1250
1262
  const config = loadConfig();
1251
1263
  config.study = rid;
1252
1264
  saveConfig(config);
1253
- console.error(`Active study set to "${data.name || rid}".`);
1265
+ // stdout = data: emit a JSON object so `study use --json` is capturable
1266
+ // (e.g. `--get alias`); the human confirmation stays on stderr.
1267
+ if (globals.json) {
1268
+ output({ id: rid, alias: tagAlias(ALIAS_PREFIX.study, rid), name: data.name ?? null, active: true }, true, { writePath: true });
1269
+ }
1270
+ else {
1271
+ console.error(`Active study set to "${data.name || rid}".`);
1272
+ }
1254
1273
  });
1255
1274
  });
1256
1275
  attachStudyRunCommands(study);
package/dist/lib/docs.js CHANGED
@@ -381,12 +381,19 @@ ish iteration create --platform figma --url https://figma.com/proto \\
381
381
  --flow-name "Onboarding A"
382
382
 
383
383
  # Native app (ios / android): --app names the target, stored as app_artifact (no URL).
384
+ # screen_format defaults to mobile_portrait for native (vs desktop for browser).
384
385
  ish iteration create --platform ios --app com.example.app
385
386
  ish iteration create --platform ios # --app optional; "chosen at run time"
386
387
  # drive it locally against a booted simulator / emulator — the iteration
387
388
  # remembers the app, so no --app needed on reruns:
388
389
  ish study run --local
389
390
  ish study run --local --app ./Build.app # override with a fresh local build
391
+ # State reset between participants: with a local .app build the runner does a
392
+ # clean uninstall+reinstall before each participant, so state one participant
393
+ # creates (a reminder, a saved record) does NOT leak into the next. A bare
394
+ # bundle-id / system-app target can't be reinstalled — it relaunches and warns
395
+ # that state may persist; pass --app <.app> or run one participant per study for
396
+ # a guaranteed clean start. See guides/native-app.
390
397
 
391
398
  # Text/email content from a file:
392
399
  ish iteration create --content-text @./email.html --title "Newsletter"
@@ -2000,14 +2007,22 @@ Interactive study runs produce per-frame screenshots server-side. They
2000
2007
  let you (or an agent) see what participants actually saw alongside the
2001
2008
  sentiment summary.
2002
2009
 
2003
- ## Screenshots — remote interactive studies only
2010
+ ## Screenshots — the grouped index vs. per-interaction frames
2004
2011
 
2005
- Screenshots are produced by remote interactive runs only — chat / video /
2006
- text studies don't have them. **Local runs** (\`ish study run --local\`,
2007
- including ios/android) don't push screenshots to the server either; they
2008
- write a per-step HTML debug report to \`~/.ish/debug/sim-*.html\` (the path is
2009
- printed at the end of the run). \`ish study screenshots list\` on a local-only
2010
- study therefore returns nothing useful — open the debug report instead.
2012
+ \`ish study screenshots list\` reads the **frame-grouped index**
2013
+ (\`/screenshots/grouped\`), a remote-interactive-run artifact keyed by frame
2014
+ chat / video / text studies don't populate it, and neither do local runs. On a
2015
+ local-only study this endpoint currently 500s rather than returning an empty
2016
+ index, so \`screenshots list\` isn't the way to view a local run.
2017
+
2018
+ **Local runs DO still capture per-interaction screenshots** (\`ish study run
2019
+ --local\`, including ios/android). They live on the participant interaction rows,
2020
+ not in the grouped index — read them two ways:
2021
+
2022
+ - \`ish study get <id>\` — each interaction carries a \`screenshot_url\` (a public
2023
+ storage URL you can fetch directly).
2024
+ - the per-step HTML debug report at \`~/.ish/debug/sim-*.html\` (path printed at
2025
+ the end of each local run).
2011
2026
 
2012
2027
  ### CLI
2013
2028
 
@@ -2180,6 +2195,12 @@ The CLI guarantees these contracts so agents can chain safely:
2180
2195
  \`--fields\` set, you can identify the affected resource. Default
2181
2196
  write-path JSON is compact (\`{id, alias, name, updated_at,
2182
2197
  ...changed_fields}\`); pass \`--verbose\` for the full server payload.
2198
+ - **Active-context setters emit a capturable object on stdout.** The
2199
+ \`use\` commands (\`study use\`, \`workspace use\`, \`ask use\`) write their
2200
+ human "Active … set to …" confirmation to **stderr** and, under
2201
+ \`--json\`, an \`{id, alias, name, active}\` object to **stdout** — so
2202
+ \`ish study use s-b2c --json --get alias\` is capturable. (\`--clear\`
2203
+ is a stderr-only confirmation.)
2183
2204
  - **Write-path echoes keep collection arrays even when empty.** On a
2184
2205
  create/update echo (e.g. \`study create\`/\`study update\`), entity
2185
2206
  collections like \`assignments\`, \`interview_questions\`, and
@@ -3211,6 +3232,7 @@ request time, for any client, is the backend's \`TIER_LIMITS\` dict in
3211
3232
  | \`maxCustomPersons\` | 3 | 10 | 10 | ∞ | ∞ |
3212
3233
  | \`maxConcurrentParticipants\` | 3 | 3 | 10 | 50 | ∞ |
3213
3234
  | \`maxWorkspaceMembers\` | 1 | 1 | 1 | 10 | ∞ |
3235
+ | \`maxSeats\` | 1 | 1 | 1 | 10 | ∞ |
3214
3236
 
3215
3237
  Commands that may hit a limit: \`ish workspace create\`,
3216
3238
  \`ish study create\`, \`ish study generate\`, \`ish iteration create\`,
@@ -4340,6 +4362,110 @@ The viewer is only as good as the run behind it. Before sharing, make sure:
4340
4362
  - \`concepts/active-context\` — \`ish study share\` defaults to the active study.
4341
4363
  - \`reference/json-mode\` — the \`{ token, share_url, … }\` envelope.
4342
4364
  `;
4365
+ const GUIDE_NATIVE_APP = `# guide: native app studies (ios / android)
4366
+
4367
+ Run an interactive study against a **native iOS or Android app** on a local
4368
+ simulator/emulator, driven step-by-step by AI participants — the native
4369
+ counterpart of a browser (URL) interactive study.
4370
+
4371
+ ## 1. Check the local toolchain
4372
+
4373
+ \`\`\`
4374
+ ish check ios # Xcode/xcrun, a booted simulator, the WDA runner, auth
4375
+ ish check android # adb, a running emulator
4376
+ ish setup # fetch/install whatever's missing (WDA runner, etc.)
4377
+ \`\`\`
4378
+
4379
+ \`check\` must be green before a run — it verifies the whole chain (simulator
4380
+ booted → automation runner installed → logged in), so you don't discover a
4381
+ missing piece mid-run.
4382
+
4383
+ ## 2. Create the study, then a native iteration
4384
+
4385
+ The study (assignments + questionnaire) is platform-agnostic. The **iteration**
4386
+ names the platform and the app:
4387
+
4388
+ \`\`\`
4389
+ ish study create --name "Onboarding" --modality interactive \\
4390
+ --assignment "Explore:Open the app and look around" \\
4391
+ --question "How clear was it?"
4392
+
4393
+ # --app is a bundle id (already-installed / system app) OR a local .app/.apk path.
4394
+ ish iteration create --platform ios --app com.acme.app # installed bundle id
4395
+ ish iteration create --platform ios --app ./Build/MyApp.app # local build (installed for you)
4396
+ ish iteration create --platform android --app ./app-debug.apk
4397
+ \`\`\`
4398
+
4399
+ The target is stored as \`app_artifact\`; no \`--url\` is needed. \`screen_format\`
4400
+ defaults to **mobile_portrait** for native (vs desktop for browser).
4401
+
4402
+ ## 3. Run locally
4403
+
4404
+ \`\`\`
4405
+ ish study run --local --platform ios --person p-913 --wait
4406
+ ish study run --local --platform ios --all --wait # whole matching cohort
4407
+ \`\`\`
4408
+
4409
+ The platform defaults to the iteration's; \`--app\` on \`study run\` overrides the
4410
+ stored target with a fresh local build. The WebDriverAgent runner cold-starts
4411
+ slowly the first time (~30-60s) and is then reused across participants.
4412
+
4413
+ ## 3b. Parallel runs — \`--parallel N\` (iOS + Android)
4414
+
4415
+ \`\`\`
4416
+ ish study run --local --platform ios --all --parallel 5 --wait
4417
+ ish study run --local --platform android --all --parallel 5 --wait
4418
+ \`\`\`
4419
+
4420
+ Native runs can drive a **pool of N devices** at once, one participant per
4421
+ device:
4422
+ - **iOS** reuses any booted simulators and **auto-creates + boots** the
4423
+ shortfall, then deletes the simulators it created (reused ones are left alone).
4424
+ - **Android** reuses online emulators and **auto-launches headless emulators**
4425
+ (tuned low-RAM), then stops the ones it started. You only need **one AVD**: the
4426
+ pool **clones it** (a fast file-copy — no avdmanager/JDK needed) to as many as
4427
+ it needs, and deletes the clones afterward. Make one AVD in Android Studio ›
4428
+ Device Manager.
4429
+
4430
+ N is **auto-sized to the host's RAM** —
4431
+ default 1, capped at 5. A small machine runs fewer concurrently and queues the
4432
+ rest rather than erroring, so the same command works everywhere, scaled to the
4433
+ machine. Each participant still gets a clean device per the reset rules below.
4434
+
4435
+ ## 4. State reset between participants (important)
4436
+
4437
+ Participants share one simulator, run **sequentially** for native. A
4438
+ terminate+relaunch alone does NOT clear app data, so:
4439
+
4440
+ - **Local \`.app\`/\`.apk\` build** → the runner does a clean **uninstall+reinstall**
4441
+ before each participant. State one participant creates (a saved record, a new
4442
+ reminder) does NOT leak into the next.
4443
+ - **Bare bundle-id / system app** (e.g. \`com.apple.reminders\`) → can't be
4444
+ reinstalled, so it relaunches and **warns once** that earlier-participant state
4445
+ may persist. For a guaranteed clean start, pass \`--app <.app>\` or run one
4446
+ participant per study.
4447
+
4448
+ ## 5. Locale / keyboard
4449
+
4450
+ The simulator uses the host machine's keyboard locale. A non-English keyboard
4451
+ can derail text entry — pin a locale on the iteration (\`--locale en-US\`) for
4452
+ reproducible runs.
4453
+
4454
+ ## 6. Results, screenshots, transcripts
4455
+
4456
+ - \`ish study results <id>\` — sentiment + interview answers, same as any study.
4457
+ - **Per-interaction screenshots** are captured even for local runs — read them
4458
+ via \`ish study get <id>\` (each interaction carries a \`screenshot_url\`) or the
4459
+ per-step HTML debug report at \`~/.ish/debug/sim-*.html\` (path printed at the
4460
+ end of the run). Note \`ish study screenshots list\` reads the *remote-run*
4461
+ frame index and won't show local frames — see reference/screenshots.
4462
+
4463
+ ## Related
4464
+
4465
+ - \`concepts/iteration\` — \`app_artifact\`, screen_format, platforms.
4466
+ - \`reference/screenshots\` — grouped index vs per-interaction frames.
4467
+ - \`guides/first-study\` — the browser-URL version of this flow.
4468
+ `;
4343
4469
  const PAGES = [
4344
4470
  {
4345
4471
  slug: "overview",
@@ -4503,6 +4629,12 @@ const PAGES = [
4503
4629
  description: "Iterative probe loop for one specific persona: person suggest-scenarios returns LLM probes; answer them locally; person evidence add persists answers; person evidence list reads them back.",
4504
4630
  body: GUIDE_BUILD_SPECIFIC_PERSON,
4505
4631
  },
4632
+ {
4633
+ slug: "guides/native-app",
4634
+ title: "guide: native app studies (ios / android)",
4635
+ description: "Run an interactive study against a native iOS or Android app on a local simulator/emulator: check ios/android, create a --platform ios/android iteration with --app (bundle id or .app/.apk), run --local, per-participant state reset, locale/keyboard, and where local screenshots live.",
4636
+ body: GUIDE_NATIVE_APP,
4637
+ },
4506
4638
  {
4507
4639
  slug: "guides/mcp-add",
4508
4640
  title: "guide: wire ish into your AI clients (`ish mcp add`)",
@@ -10,12 +10,16 @@
10
10
  * backend's 0-1000 coordinates against the screencap pixel size and taps
11
11
  * directly. (Verified by the Layer-1 driver smoke; see scripts/mobile-e2e.)
12
12
  */
13
+ /** Run `fn` with all its adb calls pinned to `serial` (parallel pool path). */
14
+ export declare function withAdbSerial<T>(serial: string | undefined, fn: () => Promise<T>): Promise<T>;
15
+ /** `["-s", serial]` or `[]` — the device-targeting prefix. Pure (tested). */
16
+ export declare function serialArgs(serial: string | undefined): string[];
13
17
  /** Resolve adb, downloading Google's platform-tools on first use if not found. */
14
18
  export declare function ensureAdb(): Promise<string>;
15
19
  export declare class AdbError extends Error {
16
20
  constructor(message: string);
17
21
  }
18
- /** Run `adb <args>` and return trimmed stdout. Throws AdbError on failure. */
22
+ /** Run `adb [-s serial] <args>` and return trimmed stdout. Throws AdbError on failure. */
19
23
  export declare function adb(args: string[], timeoutMs?: number): Promise<string>;
20
24
  /** Run `adb shell <args>` and return trimmed stdout. */
21
25
  export declare function adbShell(args: string[], timeoutMs?: number): Promise<string>;
@@ -62,7 +66,20 @@ export declare function currentActivity(): Promise<string>;
62
66
  * output. Returns the PNG buffer at full device resolution.
63
67
  */
64
68
  export declare function screencapPng(): Promise<Buffer>;
65
- /** Assert exactly one device/emulator is in the `device` state. */
69
+ /**
70
+ * Parse `adb devices` output into {serial, state} rows. Pure (tested). Skips the
71
+ * "List of devices attached" header and blank lines.
72
+ */
73
+ export declare function parseAdbDevices(out: string): Array<{
74
+ serial: string;
75
+ state: string;
76
+ }>;
77
+ /** List online (state==="device") serials. */
78
+ export declare function listOnlineSerials(): Promise<string[]>;
79
+ /**
80
+ * Assert the target device is online. With a serial in effect (pool path or
81
+ * ANDROID_SERIAL), confirm THAT serial is online. Otherwise require exactly one.
82
+ */
66
83
  export declare function requireOneDevice(): Promise<void>;
67
84
  export declare function inputTap(x: number, y: number): Promise<void>;
68
85
  export declare function inputSwipe(x1: number, y1: number, x2: number, y2: number, durationMs?: number): Promise<void>;
@@ -14,8 +14,32 @@ import { execFile, execFileSync } from "node:child_process";
14
14
  import { existsSync, mkdirSync, writeFileSync, rmSync } from "node:fs";
15
15
  import { join } from "node:path";
16
16
  import { promisify } from "node:util";
17
+ import { AsyncLocalStorage } from "node:async_hooks";
17
18
  import { binDir, adbBin } from "../paths.js";
18
19
  const execFileAsync = promisify(execFile);
20
+ /**
21
+ * The adb serial to target for the current async call chain. A parallel run
22
+ * drives N emulators in ONE process; every adb call must hit the right device,
23
+ * but the CLI targets devices via the `adb -s <serial>` prefix, not a per-call
24
+ * argument threaded through ~25 functions. AsyncLocalStorage carries the serial
25
+ * implicitly through the call stack so `adb()` / `screencapPng()` pick it up,
26
+ * and two concurrent `withAdbSerial(A, …)` / `withAdbSerial(B, …)` chains stay
27
+ * isolated. Single-device runs leave the store empty and fall back to
28
+ * ANDROID_SERIAL / the one online device (unchanged behavior).
29
+ */
30
+ const serialStore = new AsyncLocalStorage();
31
+ /** Run `fn` with all its adb calls pinned to `serial` (parallel pool path). */
32
+ export function withAdbSerial(serial, fn) {
33
+ return serialStore.run(serial?.trim() || undefined, fn);
34
+ }
35
+ /** The serial in effect for this call chain: store → ANDROID_SERIAL → none. */
36
+ function activeSerial() {
37
+ return serialStore.getStore() ?? (process.env.ANDROID_SERIAL?.trim() || undefined);
38
+ }
39
+ /** `["-s", serial]` or `[]` — the device-targeting prefix. Pure (tested). */
40
+ export function serialArgs(serial) {
41
+ return serial ? ["-s", serial] : [];
42
+ }
19
43
  // Resolve adb without depending on the caller's PATH: ISH_ADB/ADB override → the
20
44
  // Android SDK → Homebrew → our own download cache → PATH. If none is found,
21
45
  // ensureAdb() fetches Google's standalone platform-tools (a small zip) into
@@ -105,11 +129,12 @@ export class AdbError extends Error {
105
129
  this.name = "AdbError";
106
130
  }
107
131
  }
108
- /** Run `adb <args>` and return trimmed stdout. Throws AdbError on failure. */
132
+ /** Run `adb [-s serial] <args>` and return trimmed stdout. Throws AdbError on failure. */
109
133
  export async function adb(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
110
134
  const bin = await ensureAdb();
135
+ const full = [...serialArgs(activeSerial()), ...args];
111
136
  try {
112
- const { stdout } = await execFileAsync(bin, args, {
137
+ const { stdout } = await execFileAsync(bin, full, {
113
138
  timeout: timeoutMs,
114
139
  maxBuffer: 4 * 1024 * 1024,
115
140
  });
@@ -117,7 +142,7 @@ export async function adb(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
117
142
  }
118
143
  catch (err) {
119
144
  const msg = err instanceof Error ? err.message : String(err);
120
- throw new AdbError(`adb ${args.join(" ")} failed: ${msg}`);
145
+ throw new AdbError(`adb ${full.join(" ")} failed: ${msg}`);
121
146
  }
122
147
  }
123
148
  /** Run `adb shell <args>` and return trimmed stdout. */
@@ -191,8 +216,9 @@ export async function currentActivity() {
191
216
  */
192
217
  export async function screencapPng() {
193
218
  const bin = await ensureAdb();
219
+ const full = [...serialArgs(activeSerial()), "exec-out", "screencap", "-p"];
194
220
  try {
195
- const { stdout } = await execFileAsync(bin, ["exec-out", "screencap", "-p"], {
221
+ const { stdout } = await execFileAsync(bin, full, {
196
222
  timeout: SCREENCAP_TIMEOUT_MS,
197
223
  maxBuffer: SCREENCAP_MAX_BUFFER,
198
224
  encoding: "buffer",
@@ -204,40 +230,62 @@ export async function screencapPng() {
204
230
  throw new AdbError(`adb exec-out screencap failed: ${msg}`);
205
231
  }
206
232
  }
207
- /** Assert exactly one device/emulator is in the `device` state. */
233
+ /**
234
+ * Parse `adb devices` output into {serial, state} rows. Pure (tested). Skips the
235
+ * "List of devices attached" header and blank lines.
236
+ */
237
+ export function parseAdbDevices(out) {
238
+ return out
239
+ .split("\n")
240
+ .slice(1)
241
+ .map((l) => l.trim())
242
+ .filter(Boolean)
243
+ .map((l) => {
244
+ const [serial, state] = l.split("\t");
245
+ return { serial: serial ?? "", state: state ?? "" };
246
+ })
247
+ .filter((d) => d.serial);
248
+ }
249
+ /** `adb devices` WITHOUT a serial prefix (the list is global, not per-device). */
250
+ async function devicesRaw() {
251
+ const bin = await ensureAdb();
252
+ const { stdout } = await execFileAsync(bin, ["devices"], {
253
+ timeout: DEFAULT_TIMEOUT_MS,
254
+ maxBuffer: 1024 * 1024,
255
+ });
256
+ return stdout.trim();
257
+ }
258
+ /** List online (state==="device") serials. */
259
+ export async function listOnlineSerials() {
260
+ return parseAdbDevices(await devicesRaw())
261
+ .filter((d) => d.state === "device")
262
+ .map((d) => d.serial);
263
+ }
264
+ /**
265
+ * Assert the target device is online. With a serial in effect (pool path or
266
+ * ANDROID_SERIAL), confirm THAT serial is online. Otherwise require exactly one.
267
+ */
208
268
  export async function requireOneDevice() {
209
- let out;
269
+ let online;
210
270
  try {
211
- out = await adb(["devices"]);
271
+ online = await listOnlineSerials();
212
272
  }
213
273
  catch (err) {
214
274
  const msg = err instanceof Error ? err.message : String(err);
215
275
  throw new AdbError(`Could not run adb (looked for "${findAdb() ?? "adb"}"). Run \`ish check android\` to check your setup. ${msg}`);
216
276
  }
217
- // Output: "List of devices attached\n<serial>\tdevice\n..."
218
- const online = out
219
- .split("\n")
220
- .slice(1)
221
- .map((l) => l.trim())
222
- .filter((l) => l && l.endsWith("\tdevice"));
223
277
  if (online.length === 0) {
224
278
  throw new AdbError("No Android device/emulator online. Run `ish check android` to check your setup and how to boot one.");
225
279
  }
226
- // Honor ANDROID_SERIAL (the standard adb convention): when it names an online
227
- // device, pin to it instead of failing on "more than one device". The adb
228
- // wrapper inherits process.env, so every subsequent `adb` call already targets
229
- // that serial — this lets multiple emulators run in parallel, each driven by a
230
- // CLI invocation with its own ANDROID_SERIAL.
231
- const pinned = process.env.ANDROID_SERIAL?.trim();
280
+ const pinned = activeSerial();
232
281
  if (pinned) {
233
- if (online.some((l) => l.startsWith(`${pinned}\t`)))
282
+ if (online.includes(pinned))
234
283
  return;
235
- throw new AdbError(`ANDROID_SERIAL=${pinned} is set but that device is not online. ` +
236
- `Online: ${online.map((l) => l.split("\t")[0]).join(", ") || "none"}.`);
284
+ throw new AdbError(`Android device ${pinned} is not online. Online: ${online.join(", ") || "none"}.`);
237
285
  }
238
286
  if (online.length > 1) {
239
287
  throw new AdbError(`Expected exactly one Android device, found ${online.length}. ` +
240
- `Stop the extras, or set ANDROID_SERIAL=<serial> to pin one (parallel runs).`);
288
+ `Stop the extras, or run with --parallel to pool them.`);
241
289
  }
242
290
  }
243
291
  // --- Input gestures (all in screencap pixel space) ---