@ishlabs/cli 0.24.1 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/dist/commands/ask.js +3 -3
  2. package/dist/commands/iteration.js +1 -1
  3. package/dist/commands/study-analyze.js +1 -1
  4. package/dist/commands/study-run.js +80 -12
  5. package/dist/commands/study.js +11 -7
  6. package/dist/lib/alias-store.js +1 -1
  7. package/dist/lib/api-client.d.ts +2 -0
  8. package/dist/lib/docs.js +57 -42
  9. package/dist/lib/local-sim/actions.d.ts +10 -2
  10. package/dist/lib/local-sim/actions.js +16 -11
  11. package/dist/lib/local-sim/adb.d.ts +103 -0
  12. package/dist/lib/local-sim/adb.js +352 -0
  13. package/dist/lib/local-sim/android.d.ts +111 -0
  14. package/dist/lib/local-sim/android.js +499 -0
  15. package/dist/lib/local-sim/apk-manifest.d.ts +22 -0
  16. package/dist/lib/local-sim/apk-manifest.js +210 -0
  17. package/dist/lib/local-sim/browser.d.ts +22 -0
  18. package/dist/lib/local-sim/browser.js +65 -0
  19. package/dist/lib/local-sim/coordinates.d.ts +69 -0
  20. package/dist/lib/local-sim/coordinates.js +59 -0
  21. package/dist/lib/local-sim/device.d.ts +143 -0
  22. package/dist/lib/local-sim/device.js +152 -0
  23. package/dist/lib/local-sim/ios.d.ts +168 -0
  24. package/dist/lib/local-sim/ios.js +546 -0
  25. package/dist/lib/local-sim/loop.d.ts +14 -2
  26. package/dist/lib/local-sim/loop.js +166 -73
  27. package/dist/lib/local-sim/native-a11y.d.ts +97 -0
  28. package/dist/lib/local-sim/native-a11y.js +384 -0
  29. package/dist/lib/local-sim/simctl.d.ts +85 -0
  30. package/dist/lib/local-sim/simctl.js +273 -0
  31. package/dist/lib/local-sim/types.d.ts +37 -2
  32. package/dist/lib/local-sim/upload.d.ts +1 -1
  33. package/dist/lib/local-sim/upload.js +9 -6
  34. package/dist/lib/output.js +58 -12
  35. package/dist/lib/skill-content.js +10 -9
  36. package/package.json +2 -1
@@ -0,0 +1,273 @@
1
+ /**
2
+ * Thin async wrappers over `xcrun simctl` + `idb` for the native-iOS sim path.
3
+ *
4
+ * Two tools, two jobs:
5
+ * - `xcrun simctl` drives the simulator LIFECYCLE (boot detection, install,
6
+ * terminate, launch) and the SCREENSHOT.
7
+ * - `idb` drives UI INPUT (tap/swipe/text/key) and reports the screen
8
+ * geometry (pixels, points, and the scale between them).
9
+ *
10
+ * COORDINATE SPACES (the key difference from Android, where screencap and tap
11
+ * share one pixel space):
12
+ * - `simctl io booted screenshot` writes a PNG in PIXELS (e.g. 1179x2556 @3x).
13
+ * - `idb ui tap/swipe` take POINTS (e.g. 393x852) — pixels / scale.
14
+ * The native sim TAPS in points (de-normalize 0-1000 against the POINT size)
15
+ * but RECORDS in PIXELS: dimensions() returns the pixel size so the loop's
16
+ * round-trip is exact. Recording in points would drift — the point grid (393)
17
+ * is coarser than the 0-1000 normalized grid, so it double-rounds. See
18
+ * IOSDevice for the full derivation.
19
+ */
20
+ import { execFile } from "node:child_process";
21
+ import { existsSync } from "node:fs";
22
+ import { mkdtemp, readFile, rm } from "node:fs/promises";
23
+ import { tmpdir } from "node:os";
24
+ import { join } from "node:path";
25
+ import { promisify } from "node:util";
26
+ const execFileAsync = promisify(execFile);
27
+ // idb installs to ~/.local/bin via pip; resolve an explicit path so we don't
28
+ // depend on the caller's PATH. Override with ISH_IDB.
29
+ function resolveIdb() {
30
+ const fromEnv = process.env.ISH_IDB;
31
+ if (fromEnv && existsSync(fromEnv))
32
+ return fromEnv;
33
+ const local = `${process.env.HOME ?? ""}/.local/bin/idb`;
34
+ if (existsSync(local))
35
+ return local;
36
+ const homebrew = "/opt/homebrew/bin/idb";
37
+ if (existsSync(homebrew))
38
+ return homebrew;
39
+ return "idb";
40
+ }
41
+ const XCRUN = "/usr/bin/xcrun";
42
+ const IDB = resolveIdb();
43
+ const PLUTIL = "/usr/bin/plutil";
44
+ const DEFAULT_TIMEOUT_MS = 30_000;
45
+ const SCREENSHOT_TIMEOUT_MS = 30_000;
46
+ export class IosError extends Error {
47
+ constructor(message) {
48
+ super(message);
49
+ this.name = "IosError";
50
+ }
51
+ }
52
+ /** Run `xcrun simctl <args>` and return trimmed stdout. */
53
+ export async function simctl(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
54
+ try {
55
+ const { stdout } = await execFileAsync(XCRUN, ["simctl", ...args], {
56
+ timeout: timeoutMs,
57
+ maxBuffer: 4 * 1024 * 1024,
58
+ });
59
+ return stdout.trim();
60
+ }
61
+ catch (err) {
62
+ const msg = err instanceof Error ? err.message : String(err);
63
+ throw new IosError(`xcrun simctl ${args.join(" ")} failed: ${msg}`);
64
+ }
65
+ }
66
+ /** Run `idb <args>` and return trimmed stdout. */
67
+ export async function idb(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
68
+ try {
69
+ const { stdout } = await execFileAsync(IDB, args, {
70
+ timeout: timeoutMs,
71
+ maxBuffer: 8 * 1024 * 1024,
72
+ });
73
+ return stdout.trim();
74
+ }
75
+ catch (err) {
76
+ const msg = err instanceof Error ? err.message : String(err);
77
+ throw new IosError(`idb ${args.join(" ")} failed: ${msg}`);
78
+ }
79
+ }
80
+ // --- Device state ---
81
+ /**
82
+ * Assert exactly one simulator is Booted and return its udid. We pin every
83
+ * subsequent idb/simctl call (and the screenshot) to "booted", so multiple
84
+ * booted simulators are ambiguous and rejected.
85
+ */
86
+ export async function requireOneBootedSimulator() {
87
+ let out;
88
+ try {
89
+ out = await simctl(["list", "devices", "booted", "-j"]);
90
+ }
91
+ catch (err) {
92
+ const msg = err instanceof Error ? err.message : String(err);
93
+ throw new IosError(`Could not run xcrun simctl. Is Xcode installed and a simulator booted? ${msg}`);
94
+ }
95
+ let booted = [];
96
+ try {
97
+ const parsed = JSON.parse(out);
98
+ booted = Object.values(parsed.devices)
99
+ .flat()
100
+ .filter((d) => d.state === "Booted")
101
+ .map((d) => ({ udid: d.udid, name: d.name }));
102
+ }
103
+ catch {
104
+ throw new IosError("Could not parse `simctl list devices booted -j` output.");
105
+ }
106
+ if (booted.length === 0) {
107
+ throw new IosError("No iOS simulator booted. Boot one first (e.g. `xcrun simctl boot <udid>` or open Simulator.app).");
108
+ }
109
+ if (booted.length > 1) {
110
+ throw new IosError(`Expected exactly one booted simulator, found ${booted.length} (${booted.map((d) => d.name).join(", ")}). ` +
111
+ "Shut down the extras (the sim drives a single device).");
112
+ }
113
+ return booted[0].udid;
114
+ }
115
+ export async function describeScreen(udid) {
116
+ const out = await idb(["describe", "--json", "--udid", udid]);
117
+ let dims;
118
+ try {
119
+ const parsed = JSON.parse(out);
120
+ dims = parsed.screen_dimensions;
121
+ }
122
+ catch {
123
+ throw new IosError("Could not parse `idb describe --json` output.");
124
+ }
125
+ if (!dims || !dims.width_points || !dims.height_points || !dims.width || !dims.height) {
126
+ throw new IosError(`idb describe returned no usable screen_dimensions: ${out.slice(0, 200)}`);
127
+ }
128
+ return {
129
+ pixelWidth: dims.width,
130
+ pixelHeight: dims.height,
131
+ pointWidth: dims.width_points,
132
+ pointHeight: dims.height_points,
133
+ density: dims.density ?? dims.width / dims.width_points,
134
+ };
135
+ }
136
+ // --- Screenshot (PIXELS) ---
137
+ /**
138
+ * Capture the booted simulator's screen as PNG bytes via
139
+ * `simctl io booted screenshot`. simctl writes to a file path (no reliable
140
+ * stdout in current Xcode), so we round-trip through a temp file.
141
+ */
142
+ export async function screenshotPng() {
143
+ const dir = await mkdtemp(join(tmpdir(), "ish-ios-shot-"));
144
+ const path = join(dir, "shot.png");
145
+ try {
146
+ await simctl(["io", "booted", "screenshot", path], SCREENSHOT_TIMEOUT_MS);
147
+ return await readFile(path);
148
+ }
149
+ finally {
150
+ await rm(dir, { recursive: true, force: true }).catch(() => { });
151
+ }
152
+ }
153
+ // --- UI input via idb (POINTS) ---
154
+ export async function uiTap(udid, x, y) {
155
+ await idb(["ui", "tap", "--udid", udid, String(Math.round(x)), String(Math.round(y))]);
156
+ }
157
+ export async function uiLongPress(udid, x, y, durationMs = 600) {
158
+ // idb takes the press duration in SECONDS.
159
+ await idb([
160
+ "ui", "tap", "--udid", udid,
161
+ "--duration", (durationMs / 1000).toFixed(2),
162
+ String(Math.round(x)), String(Math.round(y)),
163
+ ]);
164
+ }
165
+ export async function uiSwipe(udid, x1, y1, x2, y2, durationMs = 300) {
166
+ await idb([
167
+ "ui", "swipe", "--udid", udid,
168
+ "--duration", (durationMs / 1000).toFixed(2),
169
+ String(Math.round(x1)), String(Math.round(y1)),
170
+ String(Math.round(x2)), String(Math.round(y2)),
171
+ ]);
172
+ }
173
+ /**
174
+ * Type text into the focused field. Unlike Android's `adb shell input text`,
175
+ * `idb ui text` handles spaces/unicode/quotes correctly, so no helper IME is
176
+ * needed.
177
+ */
178
+ export async function uiText(udid, text) {
179
+ await idb(["ui", "text", "--udid", udid, text]);
180
+ }
181
+ /**
182
+ * Press a hardware key by HID usage code. `idb ui key 40` is Return/Enter
183
+ * (used to submit a text field).
184
+ */
185
+ export async function uiKey(udid, keycode) {
186
+ await idb(["ui", "key", "--udid", udid, String(keycode)]);
187
+ }
188
+ /** HID usage code for Return/Enter. */
189
+ export const HID_KEY_RETURN = 40;
190
+ // --- Accessibility tree (idb describe-all) ---
191
+ /**
192
+ * Capture the current accessibility tree as `idb ui describe-all` JSON (a flat
193
+ * array of elements, each with a POINT frame) and return it. Mirrors the
194
+ * oracle's `ios_describe`: right after a tap the tree can be mid-transition and
195
+ * come back empty/partial, so we retry until we get an array with more than just
196
+ * the root application node. Throws IosError if every attempt yields a trivial
197
+ * tree so the caller can degrade to the vision path.
198
+ */
199
+ export async function describeAll(udid) {
200
+ let lastJson = "";
201
+ for (let i = 0; i < 5; i++) {
202
+ try {
203
+ const json = await idb(["ui", "describe-all", "--udid", udid]);
204
+ lastJson = json;
205
+ // A valid non-trivial tree has more than just the root application node.
206
+ if (countJsonArray(json) >= 2)
207
+ return json;
208
+ }
209
+ catch (err) {
210
+ lastJson = err instanceof Error ? err.message : String(err);
211
+ }
212
+ await delay(800);
213
+ }
214
+ throw new IosError(`idb ui describe-all returned a trivial/empty tree after retries (last: ${lastJson.slice(0, 200)})`);
215
+ }
216
+ /** Length of a JSON array string, or 0 if it isn't a parseable array. */
217
+ function countJsonArray(json) {
218
+ try {
219
+ const parsed = JSON.parse(json);
220
+ return Array.isArray(parsed) ? parsed.length : 0;
221
+ }
222
+ catch {
223
+ return 0;
224
+ }
225
+ }
226
+ function delay(ms) {
227
+ return new Promise((r) => setTimeout(r, ms));
228
+ }
229
+ // --- App lifecycle (simctl) ---
230
+ export async function terminateApp(udid, bundleId) {
231
+ // Terminating an app that isn't running exits non-zero ("found nothing to
232
+ // terminate"); that's fine for a reset, so swallow it.
233
+ try {
234
+ await simctl(["terminate", udid, bundleId]);
235
+ }
236
+ catch {
237
+ // not running — nothing to stop
238
+ }
239
+ }
240
+ export async function launchApp(udid, bundleId) {
241
+ // simctl launch exits non-zero with a clear message if the bundle id isn't
242
+ // installed, so the wrapper's throw is already a loud failure.
243
+ await simctl(["launch", udid, bundleId]);
244
+ }
245
+ export async function installApp(udid, appPath) {
246
+ // Simulator builds aren't code-signed; `simctl install` just stages the .app.
247
+ await simctl(["install", udid, appPath], 180_000);
248
+ }
249
+ export async function isAppInstalled(udid, bundleId) {
250
+ // `simctl listapps` emits a plist of installed bundles; a substring check on
251
+ // the quoted bundle id is enough to confirm presence.
252
+ const out = await simctl(["listapps", udid], 60_000);
253
+ return out.includes(`"${bundleId}"`) || out.includes(`CFBundleIdentifier = "${bundleId}"`);
254
+ }
255
+ /**
256
+ * Read CFBundleIdentifier from a local `.app`'s Info.plist via `plutil`. Lets us
257
+ * terminate+launch a just-installed app without diffing the app list.
258
+ */
259
+ export async function bundleIdFromApp(appPath) {
260
+ const plist = join(appPath, "Info.plist");
261
+ if (!existsSync(plist))
262
+ return null;
263
+ try {
264
+ const { stdout } = await execFileAsync(PLUTIL, ["-extract", "CFBundleIdentifier", "raw", "-o", "-", plist], {
265
+ timeout: 10_000,
266
+ });
267
+ const id = stdout.trim();
268
+ return id || null;
269
+ }
270
+ catch {
271
+ return null;
272
+ }
273
+ }
@@ -44,6 +44,15 @@ export interface ContextValue {
44
44
  value: string | null;
45
45
  description?: string;
46
46
  }
47
+ /**
48
+ * Per-turn assignment status the agent can emit. Mirrors the backend's
49
+ * AssignmentStatus enum (app/db/schemas/enums/study.py) restricted to the
50
+ * LLM-emittable values (_LLM_EMITTABLE_STATUSES in
51
+ * app/interactive/instructions/output.py). The harness-only values
52
+ * (pending / max_steps_reached / failed) are NOT emittable per turn; the
53
+ * run-level AssignmentStatusUpdate sends those terminal values instead.
54
+ */
55
+ export type AssignmentStatus = "in_progress" | "completed" | "abandoned";
47
56
  export interface HistoryEntry {
48
57
  comment: string;
49
58
  action_description: string;
@@ -104,6 +113,18 @@ export interface LocalStepAction {
104
113
  modifiers: string[] | null;
105
114
  key: string | null;
106
115
  tab_id: string | null;
116
+ orientation: string | null;
117
+ scale: number | null;
118
+ coordinates: {
119
+ x: number;
120
+ y: number;
121
+ } | null;
122
+ drag: {
123
+ startX: number;
124
+ startY: number;
125
+ endX: number;
126
+ endY: number;
127
+ } | null;
107
128
  }
108
129
  /** Raw backend step response — output is nested, actions are separate. */
109
130
  export interface LocalSimStepResponseRaw {
@@ -114,7 +135,7 @@ export interface LocalSimStepResponseRaw {
114
135
  sentiment_intensity?: number;
115
136
  current_location: string;
116
137
  effort_seconds: number;
117
- assignment_completed: boolean;
138
+ assignment_status: AssignmentStatus;
118
139
  action: {
119
140
  actions: Array<{
120
141
  type: string;
@@ -135,6 +156,17 @@ export interface LocalSimStepResponseRaw {
135
156
  modifiers?: string[];
136
157
  key?: string;
137
158
  tab_id?: string;
159
+ orientation?: string;
160
+ scale?: number;
161
+ coordinates?: {
162
+ x: number;
163
+ y: number;
164
+ } | {
165
+ startX: number;
166
+ startY: number;
167
+ endX: number;
168
+ endY: number;
169
+ };
138
170
  }>;
139
171
  };
140
172
  };
@@ -149,6 +181,7 @@ export interface LocalSimStepResponse {
149
181
  sentiment_intensity: number;
150
182
  current_location: string;
151
183
  effort_seconds: number;
184
+ assignment_status: AssignmentStatus;
152
185
  assignment_completed: boolean;
153
186
  actions: LocalStepAction[];
154
187
  loop_detected: boolean;
@@ -174,6 +207,8 @@ export interface RecordInteraction {
174
207
  assignment_id: string;
175
208
  screenshot_base64?: string;
176
209
  screenshot_url?: string;
210
+ screen_width?: number;
211
+ screen_height?: number;
177
212
  frame_version_id?: string;
178
213
  timestamp_ms: number;
179
214
  comment: string | null;
@@ -181,7 +216,7 @@ export interface RecordInteraction {
181
216
  sentiment: SentimentData;
182
217
  actions: ActionData[];
183
218
  current_location: string | null;
184
- assignment_completed: boolean;
219
+ assignment_status: AssignmentStatus;
185
220
  tabs?: LocalTabInfo[];
186
221
  }
187
222
  export interface AssignmentStatusUpdate {
@@ -3,4 +3,4 @@ export interface ScreenshotUploadResult {
3
3
  screenshotUrl: string;
4
4
  screenshotId: string;
5
5
  }
6
- export declare function uploadScreenshot(client: ApiClient, productId: string, jpegBuffer: Buffer): Promise<ScreenshotUploadResult>;
6
+ export declare function uploadScreenshot(client: ApiClient, productId: string, imageBuffer: Buffer, contentType?: "image/jpeg" | "image/png"): Promise<ScreenshotUploadResult>;
@@ -1,20 +1,23 @@
1
1
  import { randomUUID } from "node:crypto";
2
- export async function uploadScreenshot(client, productId, jpegBuffer) {
2
+ export async function uploadScreenshot(client, productId, imageBuffer,
3
+ // Browser captures JPEG; native (adb screencap) emits PNG. The signed-URL
4
+ // request and the PUT header MUST agree so storage serves correct bytes.
5
+ contentType = "image/jpeg") {
3
6
  const screenshotId = randomUUID();
4
7
  // Step 1: Get signed URL from backend
5
8
  const resp = await client.localSimScreenshotUpload({
6
9
  product_id: productId,
7
10
  screenshot_id: screenshotId,
8
- content_type: "image/jpeg",
11
+ content_type: contentType,
9
12
  });
10
- // Step 2: PUT raw JPEG bytes directly to Supabase Storage
13
+ // Step 2: PUT raw image bytes directly to Supabase Storage
11
14
  const putResp = await fetch(resp.upload_info.signed_upload_url, {
12
15
  method: "PUT",
13
16
  headers: {
14
- "Content-Type": "image/jpeg",
15
- "Content-Length": String(jpegBuffer.byteLength),
17
+ "Content-Type": contentType,
18
+ "Content-Length": String(imageBuffer.byteLength),
16
19
  },
17
- body: jpegBuffer,
20
+ body: imageBuffer,
18
21
  signal: AbortSignal.timeout(30_000),
19
22
  });
20
23
  if (!putResp.ok) {
@@ -503,7 +503,7 @@ function suggestionsForError(err) {
503
503
  "If you didn't pass the resource explicitly, your saved active workspace/study/ask may be stale — run `ish status` to check, then `ish workspace use --clear` (or `ish study use --clear` / `ish ask use --clear`) to reset.",
504
504
  ];
505
505
  case "insufficient_credits":
506
- return ["Purchase more credits at https://app.ishlabs.io"];
506
+ return ["Get more credits at https://app.ishlabs.io"];
507
507
  case "usage_limit_reached": {
508
508
  const d = structuredDetail(err);
509
509
  const upgradeUrl = typeof d?.upgrade_url === "string" ? d.upgrade_url : "https://app.ishlabs.io/billing";
@@ -625,7 +625,7 @@ export function outputError(err, json) {
625
625
  }
626
626
  else {
627
627
  if (err.status === 402) {
628
- console.error("Error: Insufficient credits. Purchase more at https://app.ishlabs.io");
628
+ console.error("Error: Insufficient credits. Get more credits at https://app.ishlabs.io");
629
629
  }
630
630
  else {
631
631
  console.error(`Error: ${remapEntityName(err.message)}`);
@@ -1222,7 +1222,7 @@ export function formatStudyResults(study, participants, json) {
1222
1222
  console.log(` ${alias} (${t.name}): ${truncate(t.errorMessage, 200)}`);
1223
1223
  }
1224
1224
  }
1225
- console.log("\nRun `ish participant get <id> --json` for full interaction details.");
1225
+ console.log("\nRun `ish study participant <id> --json` for full interaction details.");
1226
1226
  }
1227
1227
  }
1228
1228
  /**
@@ -1668,7 +1668,7 @@ export function formatSimulationPoll(results, json, isMedia = false) {
1668
1668
  // Pattern A (cli half): list per-participant error_message under the table so
1669
1669
  // agents see why a simulation failed without re-fetching every participant.
1670
1670
  // Truncate to 200 chars; full text is available via --json or
1671
- // `ish study participant get <id>`.
1671
+ // `ish study participant <id>`.
1672
1672
  const failedRows = results.filter((r) => {
1673
1673
  const status = String(r.status || "").toLowerCase();
1674
1674
  return (status === "failed" || status === "errored") && r.error_message;
@@ -1689,6 +1689,43 @@ function variantLetter(index) {
1689
1689
  return String.fromCharCode(65 + index);
1690
1690
  return `V${index + 1}`;
1691
1691
  }
1692
+ /**
1693
+ * Assign one stable letter per variant id across an ask's rounds — the CLI
1694
+ * mirror of the backend's `app/asks/variant_loader.py:build_ask_label_map`.
1695
+ *
1696
+ * D2 fix: the LLM letters variants GLOBALLY across the whole ask. It scans
1697
+ * rounds in `order_index` order and gives `A, B, C…` on the *first appearance*
1698
+ * of each stable variant id, so the participant comments and round summaries
1699
+ * say `[[A]]/[[B]]` in round 1 and `[[C]]/[[D]]` in round 2. The CLI table used
1700
+ * to re-letter each round positionally (`variantLetter(localIndex)` → A/B every
1701
+ * round), so a comment's `[[C]]` pointed at a row labeled `A`. Building the same
1702
+ * id-keyed map here makes the table, the picks/winner/ratings aggregates, and
1703
+ * the cross-round columns all agree with the `[[token]]` letters.
1704
+ *
1705
+ * Identity is the variant `id` (persisted variants always carry one). A variant
1706
+ * missing an id is skipped from the map — it then falls back to the round-local
1707
+ * positional letter at the call site, matching the backend's documented fallback
1708
+ * for direct callers with no cross-round context.
1709
+ */
1710
+ function buildAskLabelMap(rounds) {
1711
+ const labelMap = new Map();
1712
+ const ordered = [...rounds].sort((a, b) => {
1713
+ const ai = typeof a.order_index === "number" ? a.order_index : 0;
1714
+ const bi = typeof b.order_index === "number" ? b.order_index : 0;
1715
+ return ai - bi;
1716
+ });
1717
+ for (const round of ordered) {
1718
+ const variants = Array.isArray(round.variants) ? round.variants : [];
1719
+ for (const v of variants) {
1720
+ const id = v.id;
1721
+ if (typeof id !== "string" || id.length === 0)
1722
+ continue;
1723
+ if (!labelMap.has(id))
1724
+ labelMap.set(id, variantLetter(labelMap.size));
1725
+ }
1726
+ }
1727
+ return labelMap;
1728
+ }
1692
1729
  export function formatAskList(asks, json) {
1693
1730
  injectAliases(asks, ALIAS_PREFIX.ask);
1694
1731
  if (json) {
@@ -1852,13 +1889,17 @@ export function formatRoundDetail(round, json) {
1852
1889
  console.log(` ${summary.comment}`);
1853
1890
  }
1854
1891
  }
1855
- function computeVariantStats(round) {
1892
+ function computeVariantStats(round, labelMap) {
1856
1893
  const variants = Array.isArray(round.variants) ? round.variants : [];
1857
1894
  const responses = Array.isArray(round.responses) ? round.responses : [];
1858
1895
  const stats = variants.map((v, i) => {
1859
1896
  const variant = v;
1897
+ const id = typeof variant.id === "string" ? variant.id : undefined;
1860
1898
  return {
1861
- letter: variantLetter(i),
1899
+ // D2: prefer the ask-global letter (id-keyed, matches the LLM's
1900
+ // `[[token]]` letters) and only fall back to the round-local positional
1901
+ // letter when no map entry exists (single-round / mapless callers).
1902
+ letter: (id && labelMap?.get(id)) || variantLetter(i),
1862
1903
  label: variant.label ? String(variant.label) : undefined,
1863
1904
  kind: String(variant.kind || "-"),
1864
1905
  pickCount: 0,
@@ -2087,13 +2128,13 @@ export function deriveWinnerConfidence(args) {
2087
2128
  return "medium";
2088
2129
  return "high";
2089
2130
  }
2090
- function buildCrossRoundSummary(rounds) {
2131
+ function buildCrossRoundSummary(rounds, labelMap) {
2091
2132
  if (rounds.length < 2)
2092
2133
  return undefined;
2093
2134
  const entries = [];
2094
2135
  for (const round of rounds) {
2095
2136
  const idx = typeof round.order_index === "number" ? round.order_index : 0;
2096
- const stats = computeVariantStats(round);
2137
+ const stats = computeVariantStats(round, labelMap);
2097
2138
  const aggregates = buildAggregates(round, stats);
2098
2139
  const entry = {
2099
2140
  round_number: idx + 1,
@@ -2128,12 +2169,17 @@ export function formatAskResults(ask, json, roundFilter) {
2128
2169
  const filtered = roundFilter !== undefined
2129
2170
  ? rounds.filter((r) => (typeof r.order_index === "number" ? r.order_index : 0) === roundFilter - 1)
2130
2171
  : rounds;
2172
+ // D2: build the ask-global variant→letter map from the FULL round list (not
2173
+ // `filtered`) so that even `--round 2` still letters its variants C/D — the
2174
+ // letter a variant earned when it debuted, matching the `[[token]]` letters
2175
+ // in the LLM's comments and round summaries.
2176
+ const labelMap = buildAskLabelMap(rounds);
2131
2177
  if (json) {
2132
2178
  let total = 0;
2133
2179
  let complete = 0;
2134
2180
  let errored = 0;
2135
2181
  const enrichedRounds = filtered.map((round) => {
2136
- const stats = computeVariantStats(round);
2182
+ const stats = computeVariantStats(round, labelMap);
2137
2183
  const aggregates = buildAggregates(round, stats);
2138
2184
  const decorated = denormalizeRoundCounts(round);
2139
2185
  total += decorated.responses_total ?? 0;
@@ -2158,7 +2204,7 @@ export function formatAskResults(ask, json, roundFilter) {
2158
2204
  }
2159
2205
  // Pattern H2: include cross-round summary when 2+ rounds exist so agents
2160
2206
  // don't have to diff two `ask results` calls themselves.
2161
- const crossRound = buildCrossRoundSummary(filtered);
2207
+ const crossRound = buildCrossRoundSummary(filtered, labelMap);
2162
2208
  if (crossRound)
2163
2209
  payload.cross_round_summary = crossRound;
2164
2210
  console.log(jsonOutput(payload));
@@ -2175,7 +2221,7 @@ export function formatAskResults(ask, json, roundFilter) {
2175
2221
  const completed = responses.filter((r) => r.status === "completed");
2176
2222
  console.log(`\nRound ${idx + 1} [${round.status || "-"}] · ${completed.length}/${responses.length} responded`);
2177
2223
  console.log(` Prompt: "${truncate(String(round.prompt || ""), 100)}"`);
2178
- const stats = computeVariantStats(round);
2224
+ const stats = computeVariantStats(round, labelMap);
2179
2225
  if (stats.length > 0 && (round.wants_pick || round.wants_ratings)) {
2180
2226
  const hasPick = !!round.wants_pick;
2181
2227
  const hasRatings = !!round.wants_ratings;
@@ -2222,7 +2268,7 @@ export function formatAskResults(ask, json, roundFilter) {
2222
2268
  }
2223
2269
  // Pattern H2: cross-round picks comparison when 2+ rounds exist. Saves
2224
2270
  // agents from re-running results twice and diffing aggregates by hand.
2225
- const crossRound = buildCrossRoundSummary(filtered);
2271
+ const crossRound = buildCrossRoundSummary(filtered, labelMap);
2226
2272
  if (crossRound) {
2227
2273
  console.log("\nCross-round summary:");
2228
2274
  const letters = new Set();
@@ -174,7 +174,7 @@ Examples below use MCP shape; for CLI, kebab-case the tool name (\`ask_run\` →
174
174
 
175
175
  - **Input**: a \`description\`, a \`count\`, and optionally \`sources\` (transcripts / audio / images / docs that seed persona generation — for "make profiles that feel like these real customers"). Local files force CLI (binary upload constraint).
176
176
  - **Output**: a list of \`person_ids\` to pass into \`ask_run\` or \`study_run\`.
177
- - **Cost**: slow (~30-120s) + credit-bearing. Reuse profiles via \`profile_list\` when possible. Sensible defaults: \`count: 5-10\` for ad-hoc tests, \`count: 20+\` for studies where you want statistical signal.
177
+ - **Usage**: slow (~30-120s) + draws credits. Reuse profiles via \`profile_list\` when possible. Sensible defaults: \`count: 5-10\` for ad-hoc tests, \`count: 20+\` for studies where you want statistical signal.
178
178
  - **Growing a group of people**: build only the delta — don't rebuild. Concat the new \`person_ids\` with the existing ones for the next run. The "audience is a query" framing means there's no audience entity to update.
179
179
  - **Shapes**:
180
180
  \`\`\`
@@ -216,7 +216,7 @@ To hand a study to someone **without an ish account** — a prospect, a stakehol
216
216
  - **Cold start on free plan**: \`workspace_create\` returns \`usage_limit_reached\` at the free-plan cap (1 workspace). Always inspect with \`workspace_list\` first. **MCP-only recipe** (no \`--ensure\` available): \`workspace_list\` → if non-empty, use the first; if empty, \`workspace_create\`; if \`workspace_create\` returns \`usage_limit_reached\`, re-call \`workspace_list\` (a workspace exists you didn't see — possibly created by another session). **CLI shortcut**: \`ish workspace create --name <name> --ensure\` is idempotent by name.
217
217
  - **Ask participants vs variants** — see Lifecycle table for the re-use vs new-ask decision.
218
218
  - **Study iterations are immutable once they have results** — see Lifecycle table for new-iteration vs new-study.
219
- - **Credit costs**: \`ask_run\`, \`study_run\`, and \`group_build\` consume credits. Check \`workspace_get\`'s \`credits\` headroom before dispatching large runs. For free-plan ad-hoc tests, default \`count: 5-8\` participants + 2 variants is usually within budget.
219
+ - **Credit usage**: \`ask_run\`, \`study_run\`, and \`group_build\` draw credits — this is the normal, expected way to use ish, so run them without hesitation. Credits are a usage allowance (paid plans refill monthly; the free tier is a one-time signup grant), not a per-call bill. Check \`workspace_get\`'s \`credits\` headroom before dispatching large runs. For free-plan ad-hoc tests, default \`count: 5-8\` participants + 2 variants comfortably fits the signup grant.
220
220
  - **\`group_build\` may return fewer profiles than requested** if the description is over-constrained. Always read the returned \`person_ids\` count, don't trust the requested \`count\` blindly.
221
221
  - **Variants of wildly different length** (one-line vs paragraph) can skew picks toward the longer one. Keep variants comparable in shape.
222
222
  - **Chatbot endpoint response-shape mismatch**: \`chat_endpoint_test\` succeeds shallowly if the bot responds at all, but a wrong response path (e.g. bot returns \`{ data: { reply } }\` instead of \`{ reply }\`) produces empty transcripts on the actual run. Inspect one full test response before dispatching participants.
@@ -869,12 +869,13 @@ ish study results <study-id> --transcript <participant-id> --json
869
869
 
870
870
  ## 9. Stage an ask for human review, then dispatch
871
871
 
872
- Goal: prepare a billable A/B but let the user inspect and approve the
873
- people + prompt before any credits are spent. Two-step flow with a
874
- DRAFT status in between.
872
+ Goal: prepare an A/B but let the user inspect and approve the
873
+ people + prompt before any credits are drawn. Two-step flow with a
874
+ DRAFT status in between. (Drawing credits to run an ask is normal — the
875
+ draft step is for human review, not to avoid the credit usage.)
875
876
 
876
877
  \`\`\`bash
877
- # 1. Stage. No worker enqueued, no bill. Audience flags are still
878
+ # 1. Stage. No worker enqueued, no credits drawn. Audience flags are still
878
879
  # required — participants materialize at create time.
879
880
  ASK=$(ish ask create --name "tagline AB" \\
880
881
  --prompt "Which sounds better?" \\
@@ -888,7 +889,7 @@ ASK=$(ish ask create --name "tagline AB" \\
888
889
  # ish ask get "$ASK" # status: draft
889
890
  # ish ask get "$ASK" --json | jq '.participants | length'
890
891
 
891
- # 2. Dispatch once approved (BILLABLE). Idempotent: a non-DRAFT ask
892
+ # 2. Dispatch once approved (draws credits). Idempotent: a non-DRAFT ask
892
893
  # returns 409 mapped to exit 2, so re-running is safe.
893
894
  ish ask dispatch "$ASK" --wait
894
895
  \`\`\`
@@ -971,7 +972,7 @@ Rules to remember:
971
972
  untouched. Get the new id from \`.participant_id\` / \`.participant_alias\` on
972
973
  \`--json\`.
973
974
  - \`--add-steps\` is **only** the extra budget; it does NOT include the
974
- source's original cap. Credits debit per
975
+ source's original cap. Credits draw per
975
976
  \`max(1, round(additional_steps / 10))\` — same formula as
976
977
  \`study run\` interactive, just scoped to the extension.
977
978
  - \`--instruction\` accepts three input shapes (matching the rest of
@@ -982,7 +983,7 @@ Rules to remember:
982
983
  \`study run\`. Extend always inherits the source's iteration config.
983
984
 
984
985
  See \`ish docs get-page concepts/extending-a-simulation\` for the full
985
- mental model (cancel + extend as a pair, error envelopes, cost model).
986
+ mental model (cancel + extend as a pair, error envelopes, credit model).
986
987
 
987
988
  ## 12. Slice study results by frame / segment / turn / sentiment
988
989
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ishlabs/cli",
3
- "version": "0.24.1",
3
+ "version": "0.25.0",
4
4
  "description": "The command-line interface for ish",
5
5
  "type": "module",
6
6
  "bin": {
@@ -14,6 +14,7 @@
14
14
  "verify:skills-parity": "npm run build && node scripts/verify-skills-parity.mjs",
15
15
  "dev": "tsc --watch",
16
16
  "test": "npm run build && node --test --test-concurrency=1 tests/*.test.mjs",
17
+ "mobile-e2e": "./scripts/mobile-e2e/run.sh all",
17
18
  "prepublishOnly": "npm test"
18
19
  },
19
20
  "engines": {