npm - @ishlabs/cli - Versions diffs - 0.24.1 → 0.25.0 - Mend

@ishlabs/cli 0.24.1 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/dist/commands/ask.js +3 -3
package/dist/commands/iteration.js +1 -1
package/dist/commands/study-analyze.js +1 -1
package/dist/commands/study-run.js +80 -12
package/dist/commands/study.js +11 -7
package/dist/lib/alias-store.js +1 -1
package/dist/lib/api-client.d.ts +2 -0
package/dist/lib/docs.js +57 -42
package/dist/lib/local-sim/actions.d.ts +10 -2
package/dist/lib/local-sim/actions.js +16 -11
package/dist/lib/local-sim/adb.d.ts +103 -0
package/dist/lib/local-sim/adb.js +352 -0
package/dist/lib/local-sim/android.d.ts +111 -0
package/dist/lib/local-sim/android.js +499 -0
package/dist/lib/local-sim/apk-manifest.d.ts +22 -0
package/dist/lib/local-sim/apk-manifest.js +210 -0
package/dist/lib/local-sim/browser.d.ts +22 -0
package/dist/lib/local-sim/browser.js +65 -0
package/dist/lib/local-sim/coordinates.d.ts +69 -0
package/dist/lib/local-sim/coordinates.js +59 -0
package/dist/lib/local-sim/device.d.ts +143 -0
package/dist/lib/local-sim/device.js +152 -0
package/dist/lib/local-sim/ios.d.ts +168 -0
package/dist/lib/local-sim/ios.js +546 -0
package/dist/lib/local-sim/loop.d.ts +14 -2
package/dist/lib/local-sim/loop.js +166 -73
package/dist/lib/local-sim/native-a11y.d.ts +97 -0
package/dist/lib/local-sim/native-a11y.js +384 -0
package/dist/lib/local-sim/simctl.d.ts +85 -0
package/dist/lib/local-sim/simctl.js +273 -0
package/dist/lib/local-sim/types.d.ts +37 -2
package/dist/lib/local-sim/upload.d.ts +1 -1
package/dist/lib/local-sim/upload.js +9 -6
package/dist/lib/output.js +58 -12
package/dist/lib/skill-content.js +10 -9
package/package.json +2 -1

package/dist/lib/local-sim/simctl.js ADDED Viewed

@@ -0,0 +1,273 @@
+/**
+ * Thin async wrappers over `xcrun simctl` + `idb` for the native-iOS sim path.
+ *
+ * Two tools, two jobs:
+ *  - `xcrun simctl` drives the simulator LIFECYCLE (boot detection, install,
+ *    terminate, launch) and the SCREENSHOT.
+ *  - `idb` drives UI INPUT (tap/swipe/text/key) and reports the screen
+ *    geometry (pixels, points, and the scale between them).
+ *
+ * COORDINATE SPACES (the key difference from Android, where screencap and tap
+ * share one pixel space):
+ *  - `simctl io booted screenshot` writes a PNG in PIXELS (e.g. 1179x2556 @3x).
+ *  - `idb ui tap/swipe` take POINTS (e.g. 393x852) — pixels / scale.
+ * The native sim TAPS in points (de-normalize 0-1000 against the POINT size)
+ * but RECORDS in PIXELS: dimensions() returns the pixel size so the loop's
+ * round-trip is exact. Recording in points would drift — the point grid (393)
+ * is coarser than the 0-1000 normalized grid, so it double-rounds. See
+ * IOSDevice for the full derivation.
+ */
+import { execFile } from "node:child_process";
+import { existsSync } from "node:fs";
+import { mkdtemp, readFile, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { promisify } from "node:util";
+const execFileAsync = promisify(execFile);
+// idb installs to ~/.local/bin via pip; resolve an explicit path so we don't
+// depend on the caller's PATH. Override with ISH_IDB.
+function resolveIdb() {
+    const fromEnv = process.env.ISH_IDB;
+    if (fromEnv && existsSync(fromEnv))
+        return fromEnv;
+    const local = `${process.env.HOME ?? ""}/.local/bin/idb`;
+    if (existsSync(local))
+        return local;
+    const homebrew = "/opt/homebrew/bin/idb";
+    if (existsSync(homebrew))
+        return homebrew;
+    return "idb";
+}
+const XCRUN = "/usr/bin/xcrun";
+const IDB = resolveIdb();
+const PLUTIL = "/usr/bin/plutil";
+const DEFAULT_TIMEOUT_MS = 30_000;
+const SCREENSHOT_TIMEOUT_MS = 30_000;
+export class IosError extends Error {
+    constructor(message) {
+        super(message);
+        this.name = "IosError";
+    }
+}
+/** Run `xcrun simctl <args>` and return trimmed stdout. */
+export async function simctl(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
+    try {
+        const { stdout } = await execFileAsync(XCRUN, ["simctl", ...args], {
+            timeout: timeoutMs,
+            maxBuffer: 4 * 1024 * 1024,
+        });
+        return stdout.trim();
+    }
+    catch (err) {
+        const msg = err instanceof Error ? err.message : String(err);
+        throw new IosError(`xcrun simctl ${args.join(" ")} failed: ${msg}`);
+    }
+}
+/** Run `idb <args>` and return trimmed stdout. */
+export async function idb(args, timeoutMs = DEFAULT_TIMEOUT_MS) {
+    try {
+        const { stdout } = await execFileAsync(IDB, args, {
+            timeout: timeoutMs,
+            maxBuffer: 8 * 1024 * 1024,
+        });
+        return stdout.trim();
+    }
+    catch (err) {
+        const msg = err instanceof Error ? err.message : String(err);
+        throw new IosError(`idb ${args.join(" ")} failed: ${msg}`);
+    }
+}
+// --- Device state ---
+/**
+ * Assert exactly one simulator is Booted and return its udid. We pin every
+ * subsequent idb/simctl call (and the screenshot) to "booted", so multiple
+ * booted simulators are ambiguous and rejected.
+ */
+export async function requireOneBootedSimulator() {
+    let out;
+    try {
+        out = await simctl(["list", "devices", "booted", "-j"]);
+    }
+    catch (err) {
+        const msg = err instanceof Error ? err.message : String(err);
+        throw new IosError(`Could not run xcrun simctl. Is Xcode installed and a simulator booted? ${msg}`);
+    }
+    let booted = [];
+    try {
+        const parsed = JSON.parse(out);
+        booted = Object.values(parsed.devices)
+            .flat()
+            .filter((d) => d.state === "Booted")
+            .map((d) => ({ udid: d.udid, name: d.name }));
+    }
+    catch {
+        throw new IosError("Could not parse `simctl list devices booted -j` output.");
+    }
+    if (booted.length === 0) {
+        throw new IosError("No iOS simulator booted. Boot one first (e.g. `xcrun simctl boot <udid>` or open Simulator.app).");
+    }
+    if (booted.length > 1) {
+        throw new IosError(`Expected exactly one booted simulator, found ${booted.length} (${booted.map((d) => d.name).join(", ")}). ` +
+            "Shut down the extras (the sim drives a single device).");
+    }
+    return booted[0].udid;
+}
+export async function describeScreen(udid) {
+    const out = await idb(["describe", "--json", "--udid", udid]);
+    let dims;
+    try {
+        const parsed = JSON.parse(out);
+        dims = parsed.screen_dimensions;
+    }
+    catch {
+        throw new IosError("Could not parse `idb describe --json` output.");
+    }
+    if (!dims || !dims.width_points || !dims.height_points || !dims.width || !dims.height) {
+        throw new IosError(`idb describe returned no usable screen_dimensions: ${out.slice(0, 200)}`);
+    }
+    return {
+        pixelWidth: dims.width,
+        pixelHeight: dims.height,
+        pointWidth: dims.width_points,
+        pointHeight: dims.height_points,
+        density: dims.density ?? dims.width / dims.width_points,
+    };
+}
+// --- Screenshot (PIXELS) ---
+/**
+ * Capture the booted simulator's screen as PNG bytes via
+ * `simctl io booted screenshot`. simctl writes to a file path (no reliable
+ * stdout in current Xcode), so we round-trip through a temp file.
+ */
+export async function screenshotPng() {
+    const dir = await mkdtemp(join(tmpdir(), "ish-ios-shot-"));
+    const path = join(dir, "shot.png");
+    try {
+        await simctl(["io", "booted", "screenshot", path], SCREENSHOT_TIMEOUT_MS);
+        return await readFile(path);
+    }
+    finally {
+        await rm(dir, { recursive: true, force: true }).catch(() => { });
+    }
+}
+// --- UI input via idb (POINTS) ---
+export async function uiTap(udid, x, y) {
+    await idb(["ui", "tap", "--udid", udid, String(Math.round(x)), String(Math.round(y))]);
+}
+export async function uiLongPress(udid, x, y, durationMs = 600) {
+    // idb takes the press duration in SECONDS.
+    await idb([
+        "ui", "tap", "--udid", udid,
+        "--duration", (durationMs / 1000).toFixed(2),
+        String(Math.round(x)), String(Math.round(y)),
+    ]);
+}
+export async function uiSwipe(udid, x1, y1, x2, y2, durationMs = 300) {
+    await idb([
+        "ui", "swipe", "--udid", udid,
+        "--duration", (durationMs / 1000).toFixed(2),
+        String(Math.round(x1)), String(Math.round(y1)),
+        String(Math.round(x2)), String(Math.round(y2)),
+    ]);
+}
+/**
+ * Type text into the focused field. Unlike Android's `adb shell input text`,
+ * `idb ui text` handles spaces/unicode/quotes correctly, so no helper IME is
+ * needed.
+ */
+export async function uiText(udid, text) {
+    await idb(["ui", "text", "--udid", udid, text]);
+}
+/**
+ * Press a hardware key by HID usage code. `idb ui key 40` is Return/Enter
+ * (used to submit a text field).
+ */
+export async function uiKey(udid, keycode) {
+    await idb(["ui", "key", "--udid", udid, String(keycode)]);
+}
+/** HID usage code for Return/Enter. */
+export const HID_KEY_RETURN = 40;
+// --- Accessibility tree (idb describe-all) ---
+/**
+ * Capture the current accessibility tree as `idb ui describe-all` JSON (a flat
+ * array of elements, each with a POINT frame) and return it. Mirrors the
+ * oracle's `ios_describe`: right after a tap the tree can be mid-transition and
+ * come back empty/partial, so we retry until we get an array with more than just
+ * the root application node. Throws IosError if every attempt yields a trivial
+ * tree so the caller can degrade to the vision path.
+ */
+export async function describeAll(udid) {
+    let lastJson = "";
+    for (let i = 0; i < 5; i++) {
+        try {
+            const json = await idb(["ui", "describe-all", "--udid", udid]);
+            lastJson = json;
+            // A valid non-trivial tree has more than just the root application node.
+            if (countJsonArray(json) >= 2)
+                return json;
+        }
+        catch (err) {
+            lastJson = err instanceof Error ? err.message : String(err);
+        }
+        await delay(800);
+    }
+    throw new IosError(`idb ui describe-all returned a trivial/empty tree after retries (last: ${lastJson.slice(0, 200)})`);
+}
+/** Length of a JSON array string, or 0 if it isn't a parseable array. */
+function countJsonArray(json) {
+    try {
+        const parsed = JSON.parse(json);
+        return Array.isArray(parsed) ? parsed.length : 0;
+    }
+    catch {
+        return 0;
+    }
+}
+function delay(ms) {
+    return new Promise((r) => setTimeout(r, ms));
+}
+// --- App lifecycle (simctl) ---
+export async function terminateApp(udid, bundleId) {
+    // Terminating an app that isn't running exits non-zero ("found nothing to
+    // terminate"); that's fine for a reset, so swallow it.
+    try {
+        await simctl(["terminate", udid, bundleId]);
+    }
+    catch {
+        // not running — nothing to stop
+    }
+}
+export async function launchApp(udid, bundleId) {
+    // simctl launch exits non-zero with a clear message if the bundle id isn't
+    // installed, so the wrapper's throw is already a loud failure.
+    await simctl(["launch", udid, bundleId]);
+}
+export async function installApp(udid, appPath) {
+    // Simulator builds aren't code-signed; `simctl install` just stages the .app.
+    await simctl(["install", udid, appPath], 180_000);
+}
+export async function isAppInstalled(udid, bundleId) {
+    // `simctl listapps` emits a plist of installed bundles; a substring check on
+    // the quoted bundle id is enough to confirm presence.
+    const out = await simctl(["listapps", udid], 60_000);
+    return out.includes(`"${bundleId}"`) || out.includes(`CFBundleIdentifier = "${bundleId}"`);
+}
+/**
+ * Read CFBundleIdentifier from a local `.app`'s Info.plist via `plutil`. Lets us
+ * terminate+launch a just-installed app without diffing the app list.
+ */
+export async function bundleIdFromApp(appPath) {
+    const plist = join(appPath, "Info.plist");
+    if (!existsSync(plist))
+        return null;
+    try {
+        const { stdout } = await execFileAsync(PLUTIL, ["-extract", "CFBundleIdentifier", "raw", "-o", "-", plist], {
+            timeout: 10_000,
+        });
+        const id = stdout.trim();
+        return id || null;
+    }
+    catch {
+        return null;
+    }
+}

package/dist/lib/local-sim/types.d.ts CHANGED Viewed

@@ -44,6 +44,15 @@ export interface ContextValue {
     value: string | null;
     description?: string;
 }
+/**
+ * Per-turn assignment status the agent can emit. Mirrors the backend's
+ * AssignmentStatus enum (app/db/schemas/enums/study.py) restricted to the
+ * LLM-emittable values (_LLM_EMITTABLE_STATUSES in
+ * app/interactive/instructions/output.py). The harness-only values
+ * (pending / max_steps_reached / failed) are NOT emittable per turn; the
+ * run-level AssignmentStatusUpdate sends those terminal values instead.
+ */
+export type AssignmentStatus = "in_progress" | "completed" | "abandoned";
 export interface HistoryEntry {
     comment: string;
     action_description: string;
@@ -104,6 +113,18 @@ export interface LocalStepAction {
     modifiers: string[] | null;
     key: string | null;
     tab_id: string | null;
+    orientation: string | null;
+    scale: number | null;
+    coordinates: {
+        x: number;
+        y: number;
+    } | null;
+    drag: {
+        startX: number;
+        startY: number;
+        endX: number;
+        endY: number;
+    } | null;
 }
 /** Raw backend step response — output is nested, actions are separate. */
 export interface LocalSimStepResponseRaw {
@@ -114,7 +135,7 @@ export interface LocalSimStepResponseRaw {
         sentiment_intensity?: number;
         current_location: string;
         effort_seconds: number;
-        assignment_completed: boolean;
+        assignment_status: AssignmentStatus;
         action: {
             actions: Array<{
                 type: string;
@@ -135,6 +156,17 @@ export interface LocalSimStepResponseRaw {
                 modifiers?: string[];
                 key?: string;
                 tab_id?: string;
+                orientation?: string;
+                scale?: number;
+                coordinates?: {
+                    x: number;
+                    y: number;
+                } | {
+                    startX: number;
+                    startY: number;
+                    endX: number;
+                    endY: number;
+                };
             }>;
         };
     };
@@ -149,6 +181,7 @@ export interface LocalSimStepResponse {
     sentiment_intensity: number;
     current_location: string;
     effort_seconds: number;
+    assignment_status: AssignmentStatus;
     assignment_completed: boolean;
     actions: LocalStepAction[];
     loop_detected: boolean;
@@ -174,6 +207,8 @@ export interface RecordInteraction {
     assignment_id: string;
     screenshot_base64?: string;
     screenshot_url?: string;
+    screen_width?: number;
+    screen_height?: number;
     frame_version_id?: string;
     timestamp_ms: number;
     comment: string | null;
@@ -181,7 +216,7 @@ export interface RecordInteraction {
     sentiment: SentimentData;
     actions: ActionData[];
     current_location: string | null;
-    assignment_completed: boolean;
+    assignment_status: AssignmentStatus;
     tabs?: LocalTabInfo[];
 }
 export interface AssignmentStatusUpdate {

package/dist/lib/local-sim/upload.d.ts CHANGED Viewed

@@ -3,4 +3,4 @@ export interface ScreenshotUploadResult {
     screenshotUrl: string;
     screenshotId: string;
 }
-export declare function uploadScreenshot(client: ApiClient, productId: string, jpegBuffer: Buffer): Promise<ScreenshotUploadResult>;
+export declare function uploadScreenshot(client: ApiClient, productId: string, imageBuffer: Buffer, contentType?: "image/jpeg" | "image/png"): Promise<ScreenshotUploadResult>;

package/dist/lib/local-sim/upload.js CHANGED Viewed

@@ -1,20 +1,23 @@
 import { randomUUID } from "node:crypto";
-export async function uploadScreenshot(client, productId, jpegBuffer) {
+export async function uploadScreenshot(client, productId, imageBuffer,
+// Browser captures JPEG; native (adb screencap) emits PNG. The signed-URL
+// request and the PUT header MUST agree so storage serves correct bytes.
+contentType = "image/jpeg") {
     const screenshotId = randomUUID();
     // Step 1: Get signed URL from backend
     const resp = await client.localSimScreenshotUpload({
         product_id: productId,
         screenshot_id: screenshotId,
-        content_type: "image/jpeg",
+        content_type: contentType,
     });
-    // Step 2: PUT raw JPEG bytes directly to Supabase Storage
+    // Step 2: PUT raw image bytes directly to Supabase Storage
     const putResp = await fetch(resp.upload_info.signed_upload_url, {
         method: "PUT",
         headers: {
-            "Content-Type": "image/jpeg",
-            "Content-Length": String(jpegBuffer.byteLength),
+            "Content-Type": contentType,
+            "Content-Length": String(imageBuffer.byteLength),
         },
-        body: jpegBuffer,
+        body: imageBuffer,
         signal: AbortSignal.timeout(30_000),
     });
     if (!putResp.ok) {

package/dist/lib/output.js CHANGED Viewed

@@ -503,7 +503,7 @@ function suggestionsForError(err) {
                     "If you didn't pass the resource explicitly, your saved active workspace/study/ask may be stale — run `ish status` to check, then `ish workspace use --clear` (or `ish study use --clear` / `ish ask use --clear`) to reset.",
                 ];
             case "insufficient_credits":
-                return ["Purchase more credits at https://app.ishlabs.io"];
+                return ["Get more credits at https://app.ishlabs.io"];
             case "usage_limit_reached": {
                 const d = structuredDetail(err);
                 const upgradeUrl = typeof d?.upgrade_url === "string" ? d.upgrade_url : "https://app.ishlabs.io/billing";
@@ -625,7 +625,7 @@ export function outputError(err, json) {
         }
         else {
             if (err.status === 402) {
-                console.error("Error: Insufficient credits. Purchase more at https://app.ishlabs.io");
+                console.error("Error: Insufficient credits. Get more credits at https://app.ishlabs.io");
             }
             else {
                 console.error(`Error: ${remapEntityName(err.message)}`);
@@ -1222,7 +1222,7 @@ export function formatStudyResults(study, participants, json) {
                 console.log(`  ${alias} (${t.name}): ${truncate(t.errorMessage, 200)}`);
             }
         }
-        console.log("\nRun `ish participant get <id> --json` for full interaction details.");
+        console.log("\nRun `ish study participant <id> --json` for full interaction details.");
     }
 }
 /**
@@ -1668,7 +1668,7 @@ export function formatSimulationPoll(results, json, isMedia = false) {
     // Pattern A (cli half): list per-participant error_message under the table so
     // agents see why a simulation failed without re-fetching every participant.
     // Truncate to 200 chars; full text is available via --json or
-    // `ish study participant get <id>`.
+    // `ish study participant <id>`.
     const failedRows = results.filter((r) => {
         const status = String(r.status || "").toLowerCase();
         return (status === "failed" || status === "errored") && r.error_message;
@@ -1689,6 +1689,43 @@ function variantLetter(index) {
         return String.fromCharCode(65 + index);
     return `V${index + 1}`;
 }
+/**
+ * Assign one stable letter per variant id across an ask's rounds — the CLI
+ * mirror of the backend's `app/asks/variant_loader.py:build_ask_label_map`.
+ *
+ * D2 fix: the LLM letters variants GLOBALLY across the whole ask. It scans
+ * rounds in `order_index` order and gives `A, B, C…` on the *first appearance*
+ * of each stable variant id, so the participant comments and round summaries
+ * say `[[A]]/[[B]]` in round 1 and `[[C]]/[[D]]` in round 2. The CLI table used
+ * to re-letter each round positionally (`variantLetter(localIndex)` → A/B every
+ * round), so a comment's `[[C]]` pointed at a row labeled `A`. Building the same
+ * id-keyed map here makes the table, the picks/winner/ratings aggregates, and
+ * the cross-round columns all agree with the `[[token]]` letters.
+ *
+ * Identity is the variant `id` (persisted variants always carry one). A variant
+ * missing an id is skipped from the map — it then falls back to the round-local
+ * positional letter at the call site, matching the backend's documented fallback
+ * for direct callers with no cross-round context.
+ */
+function buildAskLabelMap(rounds) {
+    const labelMap = new Map();
+    const ordered = [...rounds].sort((a, b) => {
+        const ai = typeof a.order_index === "number" ? a.order_index : 0;
+        const bi = typeof b.order_index === "number" ? b.order_index : 0;
+        return ai - bi;
+    });
+    for (const round of ordered) {
+        const variants = Array.isArray(round.variants) ? round.variants : [];
+        for (const v of variants) {
+            const id = v.id;
+            if (typeof id !== "string" || id.length === 0)
+                continue;
+            if (!labelMap.has(id))
+                labelMap.set(id, variantLetter(labelMap.size));
+        }
+    }
+    return labelMap;
+}
 export function formatAskList(asks, json) {
     injectAliases(asks, ALIAS_PREFIX.ask);
     if (json) {
@@ -1852,13 +1889,17 @@ export function formatRoundDetail(round, json) {
         console.log(`  ${summary.comment}`);
     }
 }
-function computeVariantStats(round) {
+function computeVariantStats(round, labelMap) {
     const variants = Array.isArray(round.variants) ? round.variants : [];
     const responses = Array.isArray(round.responses) ? round.responses : [];
     const stats = variants.map((v, i) => {
         const variant = v;
+        const id = typeof variant.id === "string" ? variant.id : undefined;
         return {
-            letter: variantLetter(i),
+            // D2: prefer the ask-global letter (id-keyed, matches the LLM's
+            // `[[token]]` letters) and only fall back to the round-local positional
+            // letter when no map entry exists (single-round / mapless callers).
+            letter: (id && labelMap?.get(id)) || variantLetter(i),
             label: variant.label ? String(variant.label) : undefined,
             kind: String(variant.kind || "-"),
             pickCount: 0,
@@ -2087,13 +2128,13 @@ export function deriveWinnerConfidence(args) {
         return "medium";
     return "high";
 }
-function buildCrossRoundSummary(rounds) {
+function buildCrossRoundSummary(rounds, labelMap) {
     if (rounds.length < 2)
         return undefined;
     const entries = [];
     for (const round of rounds) {
         const idx = typeof round.order_index === "number" ? round.order_index : 0;
-        const stats = computeVariantStats(round);
+        const stats = computeVariantStats(round, labelMap);
         const aggregates = buildAggregates(round, stats);
         const entry = {
             round_number: idx + 1,
@@ -2128,12 +2169,17 @@ export function formatAskResults(ask, json, roundFilter) {
     const filtered = roundFilter !== undefined
         ? rounds.filter((r) => (typeof r.order_index === "number" ? r.order_index : 0) === roundFilter - 1)
         : rounds;
+    // D2: build the ask-global variant→letter map from the FULL round list (not
+    // `filtered`) so that even `--round 2` still letters its variants C/D — the
+    // letter a variant earned when it debuted, matching the `[[token]]` letters
+    // in the LLM's comments and round summaries.
+    const labelMap = buildAskLabelMap(rounds);
     if (json) {
         let total = 0;
         let complete = 0;
         let errored = 0;
         const enrichedRounds = filtered.map((round) => {
-            const stats = computeVariantStats(round);
+            const stats = computeVariantStats(round, labelMap);
             const aggregates = buildAggregates(round, stats);
             const decorated = denormalizeRoundCounts(round);
             total += decorated.responses_total ?? 0;
@@ -2158,7 +2204,7 @@ export function formatAskResults(ask, json, roundFilter) {
         }
         // Pattern H2: include cross-round summary when 2+ rounds exist so agents
         // don't have to diff two `ask results` calls themselves.
-        const crossRound = buildCrossRoundSummary(filtered);
+        const crossRound = buildCrossRoundSummary(filtered, labelMap);
         if (crossRound)
             payload.cross_round_summary = crossRound;
         console.log(jsonOutput(payload));
@@ -2175,7 +2221,7 @@ export function formatAskResults(ask, json, roundFilter) {
         const completed = responses.filter((r) => r.status === "completed");
         console.log(`\nRound ${idx + 1} [${round.status || "-"}] · ${completed.length}/${responses.length} responded`);
         console.log(`  Prompt: "${truncate(String(round.prompt || ""), 100)}"`);
-        const stats = computeVariantStats(round);
+        const stats = computeVariantStats(round, labelMap);
         if (stats.length > 0 && (round.wants_pick || round.wants_ratings)) {
             const hasPick = !!round.wants_pick;
             const hasRatings = !!round.wants_ratings;
@@ -2222,7 +2268,7 @@ export function formatAskResults(ask, json, roundFilter) {
     }
     // Pattern H2: cross-round picks comparison when 2+ rounds exist. Saves
     // agents from re-running results twice and diffing aggregates by hand.
-    const crossRound = buildCrossRoundSummary(filtered);
+    const crossRound = buildCrossRoundSummary(filtered, labelMap);
     if (crossRound) {
         console.log("\nCross-round summary:");
         const letters = new Set();

package/dist/lib/skill-content.js CHANGED Viewed

@@ -174,7 +174,7 @@ Examples below use MCP shape; for CLI, kebab-case the tool name (\`ask_run\` →
 - **Input**: a \`description\`, a \`count\`, and optionally \`sources\` (transcripts / audio / images / docs that seed persona generation — for "make profiles that feel like these real customers"). Local files force CLI (binary upload constraint).
 - **Output**: a list of \`person_ids\` to pass into \`ask_run\` or \`study_run\`.
-- **Cost**: slow (~30-120s) + credit-bearing. Reuse profiles via \`profile_list\` when possible. Sensible defaults: \`count: 5-10\` for ad-hoc tests, \`count: 20+\` for studies where you want statistical signal.
+- **Usage**: slow (~30-120s) + draws credits. Reuse profiles via \`profile_list\` when possible. Sensible defaults: \`count: 5-10\` for ad-hoc tests, \`count: 20+\` for studies where you want statistical signal.
 - **Growing a group of people**: build only the delta — don't rebuild. Concat the new \`person_ids\` with the existing ones for the next run. The "audience is a query" framing means there's no audience entity to update.
 - **Shapes**:
   \`\`\`
@@ -216,7 +216,7 @@ To hand a study to someone **without an ish account** — a prospect, a stakehol
 - **Cold start on free plan**: \`workspace_create\` returns \`usage_limit_reached\` at the free-plan cap (1 workspace). Always inspect with \`workspace_list\` first. **MCP-only recipe** (no \`--ensure\` available): \`workspace_list\` → if non-empty, use the first; if empty, \`workspace_create\`; if \`workspace_create\` returns \`usage_limit_reached\`, re-call \`workspace_list\` (a workspace exists you didn't see — possibly created by another session). **CLI shortcut**: \`ish workspace create --name <name> --ensure\` is idempotent by name.
 - **Ask participants vs variants** — see Lifecycle table for the re-use vs new-ask decision.
 - **Study iterations are immutable once they have results** — see Lifecycle table for new-iteration vs new-study.
-- **Credit costs**: \`ask_run\`, \`study_run\`, and \`group_build\` consume credits. Check \`workspace_get\`'s \`credits\` headroom before dispatching large runs. For free-plan ad-hoc tests, default \`count: 5-8\` participants + 2 variants is usually within budget.
+- **Credit usage**: \`ask_run\`, \`study_run\`, and \`group_build\` draw credits — this is the normal, expected way to use ish, so run them without hesitation. Credits are a usage allowance (paid plans refill monthly; the free tier is a one-time signup grant), not a per-call bill. Check \`workspace_get\`'s \`credits\` headroom before dispatching large runs. For free-plan ad-hoc tests, default \`count: 5-8\` participants + 2 variants comfortably fits the signup grant.
 - **\`group_build\` may return fewer profiles than requested** if the description is over-constrained. Always read the returned \`person_ids\` count, don't trust the requested \`count\` blindly.
 - **Variants of wildly different length** (one-line vs paragraph) can skew picks toward the longer one. Keep variants comparable in shape.
 - **Chatbot endpoint response-shape mismatch**: \`chat_endpoint_test\` succeeds shallowly if the bot responds at all, but a wrong response path (e.g. bot returns \`{ data: { reply } }\` instead of \`{ reply }\`) produces empty transcripts on the actual run. Inspect one full test response before dispatching participants.
@@ -869,12 +869,13 @@ ish study results <study-id> --transcript <participant-id> --json
 ## 9. Stage an ask for human review, then dispatch
-Goal: prepare a billable A/B but let the user inspect and approve the
-people + prompt before any credits are spent. Two-step flow with a
-DRAFT status in between.
+Goal: prepare an A/B but let the user inspect and approve the
+people + prompt before any credits are drawn. Two-step flow with a
+DRAFT status in between. (Drawing credits to run an ask is normal — the
+draft step is for human review, not to avoid the credit usage.)
 \`\`\`bash
-# 1. Stage. No worker enqueued, no bill. Audience flags are still
+# 1. Stage. No worker enqueued, no credits drawn. Audience flags are still
 #    required — participants materialize at create time.
 ASK=$(ish ask create --name "tagline AB" \\
         --prompt "Which sounds better?" \\
@@ -888,7 +889,7 @@ ASK=$(ish ask create --name "tagline AB" \\
 #   ish ask get "$ASK"            # status: draft
 #   ish ask get "$ASK" --json | jq '.participants | length'
-# 2. Dispatch once approved (BILLABLE). Idempotent: a non-DRAFT ask
+# 2. Dispatch once approved (draws credits). Idempotent: a non-DRAFT ask
 #    returns 409 mapped to exit 2, so re-running is safe.
 ish ask dispatch "$ASK" --wait
 \`\`\`
@@ -971,7 +972,7 @@ Rules to remember:
   untouched. Get the new id from \`.participant_id\` / \`.participant_alias\` on
   \`--json\`.
 - \`--add-steps\` is **only** the extra budget; it does NOT include the
-  source's original cap. Credits debit per
+  source's original cap. Credits draw per
   \`max(1, round(additional_steps / 10))\` — same formula as
   \`study run\` interactive, just scoped to the extension.
 - \`--instruction\` accepts three input shapes (matching the rest of
@@ -982,7 +983,7 @@ Rules to remember:
   \`study run\`. Extend always inherits the source's iteration config.
 See \`ish docs get-page concepts/extending-a-simulation\` for the full
-mental model (cancel + extend as a pair, error envelopes, cost model).
+mental model (cancel + extend as a pair, error envelopes, credit model).
 ## 12. Slice study results by frame / segment / turn / sentiment

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ishlabs/cli",
-  "version": "0.24.1",
+  "version": "0.25.0",
   "description": "The command-line interface for ish",
   "type": "module",
   "bin": {
@@ -14,6 +14,7 @@
     "verify:skills-parity": "npm run build && node scripts/verify-skills-parity.mjs",
     "dev": "tsc --watch",
     "test": "npm run build && node --test --test-concurrency=1 tests/*.test.mjs",
+    "mobile-e2e": "./scripts/mobile-e2e/run.sh all",
     "prepublishOnly": "npm test"
   },
   "engines": {