npm - @ishlabs/cli - Versions diffs - 0.13.0 → 0.14.1 - Mend

@ishlabs/cli 0.13.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/dist/commands/iteration.js +219 -22
package/dist/commands/profile.js +75 -9
package/dist/commands/source.js +6 -4
package/dist/commands/study-run.js +382 -34
package/dist/commands/study.js +170 -9
package/dist/commands/workspace.js +35 -2
package/dist/lib/accessibility-profile.d.ts +12 -0
package/dist/lib/accessibility-profile.js +136 -0
package/dist/lib/ask-questions.js +9 -0
package/dist/lib/billing.d.ts +55 -0
package/dist/lib/billing.js +77 -0
package/dist/lib/docs.js +1106 -36
package/dist/lib/enums.d.ts +54 -0
package/dist/lib/enums.js +100 -0
package/dist/lib/local-sim/actions.d.ts +2 -1
package/dist/lib/local-sim/actions.js +88 -13
package/dist/lib/local-sim/loop.js +49 -19
package/dist/lib/local-sim/tabs.d.ts +27 -0
package/dist/lib/local-sim/tabs.js +157 -0
package/dist/lib/local-sim/types.d.ts +15 -0
package/dist/lib/modality.d.ts +70 -1
package/dist/lib/modality.js +323 -17
package/dist/lib/output.js +61 -4
package/dist/lib/skill-content.js +382 -19
package/dist/lib/types.d.ts +6 -1
package/package.json +1 -1

package/dist/commands/study-run.js CHANGED Viewed

@@ -11,15 +11,33 @@ import * as readline from "node:readline/promises";
 import { withClient, getWebUrl, terminalLink, resolveWorkspace, resolveStudy, parseWaitTimeout, resolveAudienceProfileIds, addAudienceFilterFlags, hasAudienceFlags, } from "../lib/command-helpers.js";
 import { resolveId, tagAlias, ALIAS_PREFIX } from "../lib/alias-store.js";
 import { output, formatSimulationPoll } from "../lib/output.js";
-import { isMediaModality, isChatModality, iterationHasContent, describeRequiredContentFlag, } from "../lib/modality.js";
+import { isMediaModality, isChatModality, iterationHasContent, describeRequiredContentFlag, readChatMode, readTesterPairConfig, summarizeRoleCriteria, } from "../lib/modality.js";
 import { runLocalSimulations } from "../lib/local-sim/loop.js";
 import { ensureBrowser } from "../lib/local-sim/install.js";
+import { estimateChatPair, estimateChatSolo, estimateMediaRun } from "../lib/billing.js";
 function parseMaxInteractions(value) {
     const n = parseInt(value, 10);
     if (isNaN(n) || n < 1)
         throw new Error(`Invalid --max-interactions value: ${value}`);
     return n;
 }
+/**
+ * Default cap the CLI sends when neither `--max-interactions` nor the
+ * iteration carries its own value. Picked to match the frontend's
+ * conservative interactive launchers and to prevent runaway spend when an
+ * iteration runs against a broken or non-responsive surface — without a
+ * cap, a stuck tester can rack up hundreds of steps before the SDK gives
+ * up.
+ */
+const DEFAULT_MAX_INTERACTIONS = 20;
+function resolveMaxInteractions(optsValue, iterationDetails) {
+    if (optsValue)
+        return parseMaxInteractions(optsValue);
+    if (typeof iterationDetails?.max_interactions === "number") {
+        return iterationDetails.max_interactions;
+    }
+    return DEFAULT_MAX_INTERACTIONS;
+}
 function parseSlowMo(value) {
     const n = parseInt(value, 10);
     if (isNaN(n) || n < 0)
@@ -161,7 +179,7 @@ export function attachStudyRunCommands(study) {
         allFlagDescription: "Use every AI profile matching the filters (workspace-wide if no filters set)",
     })
         .option("--config <id>", "Simulation config ID (required for media unless every profile has one)")
-        .option("--max-interactions <n>", "Max interactions per tester")
+        .option("--max-interactions <n>", `Max interactions per tester (interactive / media only). Precedence: flag > iteration's stored value > CLI default (${DEFAULT_MAX_INTERACTIONS}).`)
         .option("--max-turns <n>", "Max conversation turns per tester (chat studies only)")
         .option("--early-termination", "Allow chat agent to end the conversation early when goals are met (chat studies only)")
         .option("--language <lang>", "Language code (e.g. en, sv)")
@@ -207,6 +225,10 @@ Examples:
   # Override the simulation config (e.g. for a media study):
   $ ish study run --config c-c3c
+  # Cap interactions per tester (default 20 — pass higher to allow deeper
+  # exploration, lower to cap spend on a known-broken surface):
+  $ ish study run --max-interactions 30
   # Block until all simulations finish (or timeout):
   $ ish study run --wait
   $ ish study run --wait --timeout 600
@@ -262,6 +284,10 @@ Examples:
             const modality = study.modality || "interactive";
             const isMedia = isMediaModality(modality);
             const isChat = isChatModality(modality);
+            // Pair-mode (tester_pair) is read off the iteration once we've
+            // resolved it below; set defaults here so the value is in scope.
+            let chatMode = "external_chatbot";
+            let isPair = false;
             if (!study.assignments || study.assignments.length === 0) {
                 throw new Error("Study has no assignments. Add tasks with --assignments when creating the study, or use `ish study generate`.");
             }
@@ -288,24 +314,57 @@ Examples:
             // auto-creates an empty iteration A; agents who don't pass
             // --iteration silently dispatch against it. Detect and refuse with
             // a clear suggestion rather than masking the problem.
+            if (isChat) {
+                chatMode = readChatMode(iteration.details);
+                isPair = chatMode === "tester_pair";
+            }
             if (!iterationHasContent(iteration.details, modality)) {
-                const flagHint = describeRequiredContentFlag(modality);
+                const flagHint = describeRequiredContentFlag(modality, isPair ? "tester_pair" : undefined);
                 const iterAlias = tagAlias(ALIAS_PREFIX.iteration, iterationId);
-                throw new Error(`Iteration "${iterationLabel}" (${iterAlias}) has no ${isMedia ? "content" : "URL"} configured yet. ` +
-                    `Add ${isMedia ? "content" : "a URL"} with ` +
+                throw new Error(`Iteration "${iterationLabel}" (${iterAlias}) has no ${isMedia ? "content" : isPair ? "audiences/scenarios" : isChat ? "endpoint" : "URL"} configured yet. ` +
+                    `Add ${isMedia ? "content" : isPair ? "the pair-mode payload" : isChat ? "an endpoint" : "a URL"} with ` +
                     `\`ish iteration create --study ${resolvedStudy} ${flagHint}\` ` +
                     `(or update the existing iteration via \`ish iteration update ${iterAlias} --details-json '{...}'\`), then retry.`);
             }
             const detailsView = readIterationDetails(iteration.details);
+            const pairConfig = isPair ? readTesterPairConfig(iteration.details) : undefined;
             // Step 2: Resolve audience.
             // - If any audience flag is set (--profile / --sample / --all / filter flags),
             //   resolve a fresh ID list from the workspace pool via the shared helper.
             // - Otherwise reuse the iteration's existing testers.
+            // - For chat tester_pair iterations, audiences live inside the
+            //   iteration's mode_details and are authoritative; run-time
+            //   overrides are refused.
             const profileNames = new Map();
             const profileIds = [];
             const existingTesters = [];
             const audienceSet = hasAudienceFlags(opts);
-            if (audienceSet) {
+            if (isPair) {
+                if (audienceSet) {
+                    throw new Error("tester_pair chat iterations carry their own audiences inside mode_details; run-time audience overrides (--profile / --sample / --all / --country / --gender / --min-age / --max-age / --search / --visibility) are not supported. " +
+                        "To change the audiences, update the iteration via `ish iteration update <id> --details-json '{...}'`.");
+                }
+                if (!pairConfig) {
+                    throw new Error("Pair-mode iteration is missing mode_details; cannot dispatch.");
+                }
+                // Surface a flat profileIds[] (a then b) so downstream
+                // bookkeeping (config resolution, output) still has something to
+                // chew on. The pair-batch tester-provisioning POST below uses
+                // the split lists, not this flat one.
+                for (const pid of pairConfig.audience_a) {
+                    if (!profileNames.has(pid)) {
+                        profileNames.set(pid, "");
+                        profileIds.push(pid);
+                    }
+                }
+                for (const pid of pairConfig.audience_b) {
+                    if (!profileNames.has(pid)) {
+                        profileNames.set(pid, "");
+                        profileIds.push(pid);
+                    }
+                }
+            }
+            else if (audienceSet) {
                 const resolved = await resolveAudienceProfileIds(client, resolvedWorkspace, opts, { requireSimulatable: false, allFlagName: "--all" });
                 profileIds.push(...resolved);
             }
@@ -322,16 +381,28 @@ Examples:
                     }
                 }
             }
-            const reuseExistingTesters = !audienceSet && existingTesters.length > 0;
-            if (profileIds.length === 0) {
+            // Pair iterations always seed fresh testers via the pair-batch
+            // endpoint; never reuse a stale tester roster from a prior run.
+            const reuseExistingTesters = !isPair && !audienceSet && existingTesters.length > 0;
+            // Pair iterations with criteria-only audiences will have empty
+            // profileIds at this stage if the backend deferred resolution past
+            // iteration create. That's a valid state — skip the
+            // "no audience flags" guard for them and let dispatch surface any
+            // backend-side resolution errors (e.g. pool too small).
+            const pairCriteriaOnly = isPair && !!pairConfig && profileIds.length === 0
+                && (!!pairConfig.role_criteria_a || !!pairConfig.role_criteria_b);
+            if (profileIds.length === 0 && !pairCriteriaOnly) {
                 throw new Error(`Iteration "${iterationLabel}" has no testers and no audience flags were given. ` +
                     "Pass --profile <ids>, or filter flags (--country, --gender, --min-age, --max-age, --search, --visibility) with --sample <N> or --all.");
             }
             // Step 3: Resolve simulation config (per-profile fallback for
-            // media + chat, both of which require a config_id per batch item)
+            // media + chat external_chatbot, both of which require a config_id
+            // per batch item). Pair-mode chat dispatch is per-conversation,
+            // not per-tester; the backend resolves configs via the tester rows
+            // it creates on /testers/pair-batch, so the CLI doesn't pre-fetch.
             const resolvedConfigOverride = opts.config ? resolveId(opts.config) : undefined;
             const profileConfigMap = new Map();
-            if ((isMedia || isChat) && !resolvedConfigOverride) {
+            if ((isMedia || (isChat && !isPair)) && !resolvedConfigOverride) {
                 for (const pid of profileIds) {
                     const profile = await client.get(`/tester-profiles/${pid}`);
                     if (profile.simulation_config_id) {
@@ -352,9 +423,63 @@ Examples:
                 log(`    Modality:       ${modality}`);
                 if (study.content_type)
                     log(`    Content type:   ${study.content_type}`);
-                if (isChat) {
-                    const epId = typeof iteration.details?.chatbot_endpoint_id === "string"
-                        ? iteration.details.chatbot_endpoint_id : undefined;
+                if (isPair && pairConfig) {
+                    log(`    Chat mode:      tester_pair`);
+                    // Audience description per side: prefer explicit count when
+                    // present; otherwise show the criteria filter that the backend
+                    // will resolve into a pool.
+                    const describeSide = (audLen, crit) => {
+                        if (audLen > 0)
+                            return `${audLen} profile(s)${crit ? ` (criteria validates list)` : ""}`;
+                        const summary = summarizeRoleCriteria(crit);
+                        return summary ? `criteria (${summary}) — pool resolved server-side` : "—";
+                    };
+                    log(`    Audience A:     ${describeSide(pairConfig.audience_a.length, pairConfig.role_criteria_a)}`);
+                    log(`    Audience B:     ${describeSide(pairConfig.audience_b.length, pairConfig.role_criteria_b)}`);
+                    const explicitConvs = Math.min(pairConfig.audience_a.length, pairConfig.audience_b.length);
+                    const criteriaResolved = !!pairConfig.role_criteria_a || !!pairConfig.role_criteria_b;
+                    if (explicitConvs > 0 && !criteriaResolved) {
+                        log(`    Conversations:  ${explicitConvs} (1:1 by index)`);
+                    }
+                    else {
+                        log(`    Conversations:  resolved server-side from criteria`);
+                    }
+                    // Scale preview: rough LLM-call estimate so the user knows
+                    // what they're committing to before --yes lands. Formula
+                    // matches the backend's billing pre-flight
+                    // (chat_credit_cost(turns) * 2 * conv_count, where the *2
+                    // accounts for one LLM call per side per turn). Doesn't
+                    // claim exact credit cost — just shape + magnitude.
+                    const turnsEstimate = opts.maxTurns
+                        ? parseInt(opts.maxTurns, 10)
+                        : (typeof iteration.details?.max_turns === "number"
+                            ? iteration.details.max_turns
+                            : 14);
+                    if (explicitConvs > 0 && !criteriaResolved && Number.isFinite(turnsEstimate)) {
+                        const est = estimateChatPair({ conversationCount: explicitConvs, maxTurns: turnsEstimate });
+                        log(`    Scale:          ${explicitConvs} conv × ${turnsEstimate} turns × 2 sides ≈ ${explicitConvs * turnsEstimate * 2} LLM calls (upper bound — early-termination may shorten)`);
+                        log(`    Credits (est):  ≈ ${est.upper_bound} credit(s) upper bound — see \`ish docs get-page reference/credits\``);
+                    }
+                    else if (criteriaResolved) {
+                        log(`    Scale:          ~N conv × ${turnsEstimate} turns × 2 sides — N resolved server-side`);
+                        log(`    Credits (est):  N × max(1, round(${turnsEstimate}/10)) × 2 — N resolved server-side`);
+                    }
+                    log(`    Initiator:      side ${pairConfig.initiator_side}`);
+                    const scenAPreview = pairConfig.scenario_a.replace(/\s+/g, " ").trim().slice(0, 60);
+                    const scenBPreview = pairConfig.scenario_b.replace(/\s+/g, " ").trim().slice(0, 60);
+                    log(`    Scenario A:     ${scenAPreview}${pairConfig.scenario_a.length > 60 ? "…" : ""}`);
+                    log(`    Scenario B:     ${scenBPreview}${pairConfig.scenario_b.length > 60 ? "…" : ""}`);
+                    if (opts.maxTurns)
+                        log(`    Max turns:      ${opts.maxTurns}`);
+                    if (opts.earlyTermination)
+                        log(`    Early term:     enabled`);
+                }
+                else if (isChat) {
+                    const md = iteration.details?.mode_details;
+                    const epId = (typeof md?.chatbot_endpoint_id === "string" && md.chatbot_endpoint_id)
+                        || (typeof iteration.details?.chatbot_endpoint_id === "string"
+                            ? iteration.details.chatbot_endpoint_id
+                            : undefined);
                     if (epId)
                         log(`    Endpoint:       ${epId}`);
                     if (opts.maxTurns)
@@ -375,10 +500,40 @@ Examples:
                     log(`    Config:         ${resolvedConfigOverride}`);
                 if (opts.language)
                     log(`    Language:       ${opts.language}`);
-                log(`    Profiles (${profileIds.length}):`);
-                for (const pid of profileIds) {
-                    const name = profileNames.get(pid);
-                    log(`      - ${name ? `${name} (${pid})` : pid}`);
+                if (!isPair) {
+                    log(`    Profiles (${profileIds.length}):`);
+                    for (const pid of profileIds) {
+                        const name = profileNames.get(pid);
+                        log(`      - ${name ? `${name} (${pid})` : pid}`);
+                    }
+                    const testerCount = profileIds.length;
+                    if (testerCount > 0) {
+                        if (isChat) {
+                            const turnsForChat = opts.maxTurns
+                                ? parseInt(opts.maxTurns, 10)
+                                : (typeof iteration.details?.max_turns === "number"
+                                    ? iteration.details.max_turns
+                                    : 14);
+                            if (Number.isFinite(turnsForChat)) {
+                                const est = estimateChatSolo({ testerCount, maxTurns: turnsForChat });
+                                log(`    Credits (est):  ≈ ${est.upper_bound} credit(s) upper bound — ${est.breakdown}`);
+                            }
+                        }
+                        else {
+                            const stepsForMedia = resolveMaxInteractions(opts.maxInteractions, iteration.details);
+                            const source = opts.maxInteractions
+                                ? "from --max-interactions"
+                                : typeof iteration.details?.max_interactions === "number"
+                                    ? "from iteration"
+                                    : `CLI default — pass --max-interactions to override`;
+                            log(`    Max steps:      ${stepsForMedia} (${source})`);
+                            if (Number.isFinite(stepsForMedia)) {
+                                const est = estimateMediaRun({ testerCount, maxInteractions: stepsForMedia });
+                                log(`    Credits (est):  ≈ ${est.upper_bound} credit(s) upper bound — ${est.breakdown}`);
+                            }
+                        }
+                        log(`                    See \`ish docs get-page reference/credits\` for formula.`);
+                    }
                 }
                 log("");
                 const rl = readline.createInterface({ input: process.stdin, output: process.stderr });
@@ -395,7 +550,83 @@ Examples:
             }
             // Step 5: Either reuse the iteration's testers or batch-create new ones
             let createdTesters;
-            if (reuseExistingTesters && existingTesters.length > 0) {
+            // Pair-mode bookkeeping: the dispatch endpoint takes
+            // `conversation_ids`, not tester ids. We populate this list either
+            // by reusing the iteration's existing Conversation rows or by
+            // calling pair-batch.
+            let pairConversationIds = [];
+            if (isPair && pairConfig) {
+                // Pair-mode flow mirrors the MCP (`ish-mcp` `_run_pair_mode`):
+                //   1. If the iteration already carries `conversations[]` from a
+                //      prior dispatch, reuse them — skip pair-batch entirely.
+                //   2. Otherwise call pair-batch with the resolved
+                //      audience UUID lists. Criteria-only iterations should
+                //      already have audiences materialised at iteration-create
+                //      time; if they're still empty here, the backend's
+                //      `PairAudienceResolutionError` is the authoritative
+                //      failure mode — refuse before hitting pair-batch.
+                //
+                // Wire shapes per backend `app/api/iterations/routers`:
+                //   POST /iterations/{id}/testers/pair-batch
+                //     body  : { side_a: UUID[1..20], side_b: UUID[1..20] (equal len),
+                //               language?: str }
+                //     reply : { conversations: [{ conversation_id, pair_index,
+                //                              tester_a_id, tester_b_id }] }
+                const existingConvs = iteration.conversations ?? [];
+                const reusable = [];
+                for (const c of existingConvs) {
+                    const cid = c.conversation_id || c.id;
+                    if (cid && c.tester_a_id && c.tester_b_id) {
+                        reusable.push({ conversation_id: cid, tester_a_id: c.tester_a_id, tester_b_id: c.tester_b_id });
+                    }
+                }
+                let pairRows;
+                if (reusable.length > 0) {
+                    pairRows = reusable;
+                    log(`Reusing ${reusable.length} existing conversation${reusable.length > 1 ? "s" : ""} on iteration "${iterationLabel}"`);
+                }
+                else {
+                    if (pairConfig.audience_a.length === 0 || pairConfig.audience_b.length === 0) {
+                        throw new Error("Pair-mode iteration has empty audience_a / audience_b and no conversations yet. " +
+                            "If this iteration was created with --role-criteria-a/-b, the backend should have " +
+                            "resolved a profile pool at create time — try `ish iteration get <id>` to fetch a " +
+                            "fresh shape, or recreate with explicit --profile-a/-b.");
+                    }
+                    log(`Provisioning ${pairConfig.audience_a.length} pair conversation${pairConfig.audience_a.length > 1 ? "s" : ""}...`);
+                    const pairBatchResult = await client.post(`/iterations/${iterationId}/testers/pair-batch`, {
+                        side_a: pairConfig.audience_a,
+                        side_b: pairConfig.audience_b,
+                        ...(opts.language && { language: opts.language }),
+                    }, { timeout: dispatchTimeoutMs });
+                    pairRows = (pairBatchResult.conversations ?? []).map((c) => ({
+                        conversation_id: c.conversation_id,
+                        tester_a_id: c.tester_a_id,
+                        tester_b_id: c.tester_b_id,
+                    }));
+                    if (pairRows.length === 0) {
+                        throw new Error("Pair-batch returned no conversations. The backend response did not include any conversation IDs.");
+                    }
+                    log(`Created ${pairRows.length * 2} testers (${pairRows.length} conversation${pairRows.length > 1 ? "s" : ""})`);
+                }
+                pairConversationIds = pairRows.map((r) => r.conversation_id);
+                // Flatten both sides' tester IDs for downstream bookkeeping:
+                // error-tagging (`seeded_but_not_dispatched_ids`), poll filtering,
+                // and JSON output. Names aren't returned by pair-batch; agents
+                // who care can correlate via `ish iteration get <id>`.
+                createdTesters = [];
+                for (let i = 0; i < pairRows.length; i++) {
+                    const row = pairRows[i];
+                    createdTesters.push({
+                        id: row.tester_a_id,
+                        tester_profile: { name: `pair ${i} side A` },
+                    });
+                    createdTesters.push({
+                        id: row.tester_b_id,
+                        tester_profile: { name: `pair ${i} side B` },
+                    });
+                }
+            }
+            else if (reuseExistingTesters && existingTesters.length > 0) {
                 createdTesters = existingTesters;
                 log(`Reusing ${createdTesters.length} existing tester${createdTesters.length > 1 ? "s" : ""} from iteration "${iterationLabel}"`);
             }
@@ -430,7 +661,7 @@ Examples:
                     url: detailsView.url,
                     screenFormat: detailsView.screenFormat,
                     locale: detailsView.locale,
-                    maxInteractions: opts.maxInteractions ? parseMaxInteractions(opts.maxInteractions) : undefined,
+                    maxInteractions: resolveMaxInteractions(opts.maxInteractions, iteration.details),
                     headed: !!opts.headed,
                     slowMo: opts.slowMo ? parseSlowMo(opts.slowMo) : undefined,
                     devtools: opts.devtools,
@@ -479,23 +710,66 @@ Examples:
                 }
             };
             if (isChat) {
-                const chatBatchItems = createdTesters.map((t, i) => ({
-                    study_id: resolvedStudy,
-                    tester_id: t.id,
-                    config_id: resolvedConfigOverride || profileConfigMap.get(profileIds[i]),
-                    ...(opts.language && { language: opts.language }),
-                }));
                 const maxTurns = opts.maxTurns ? parseInt(opts.maxTurns, 10) : undefined;
                 if (opts.maxTurns !== undefined && (Number.isNaN(maxTurns) || maxTurns < 1)) {
                     throw new Error(`Invalid --max-turns value: ${opts.maxTurns}`);
                 }
-                const simResult = await dispatchAttempt(() => client.post("/simulation/chat/start/batch", {
-                    product_id: resolvedWorkspace,
-                    simulations: chatBatchItems,
-                    ...(maxTurns !== undefined && { max_turns: maxTurns }),
-                    ...(opts.earlyTermination && { early_termination: true }),
-                }, { timeout: dispatchTimeoutMs }));
-                simResults = simResult.results;
+                if (isPair) {
+                    if (!pairConfig || pairConversationIds.length === 0) {
+                        throw new Error("Pair-mode dispatch reached without provisioned conversations — internal invariant violation.");
+                    }
+                    // Pair-mode dispatch (backend
+                    // `app/api/simulation/routers/chat.py`):
+                    //   POST /simulation/chat/pair/start/batch
+                    //     body : { product_id, study_id,
+                    //              conversation_ids: UUID[1..20],
+                    //              config_id,                  # singular per batch
+                    //              max_turns?, language?, config_overrides? }
+                    // One Cloud Task per conversation_id. Billing is
+                    // chat_credit_cost(max_turns) * 2 * len(conversation_ids).
+                    let pairConfigId = resolvedConfigOverride;
+                    if (!pairConfigId) {
+                        // Fall back to the first audience_a profile's
+                        // simulation_config_id. Pair dispatch takes a single config
+                        // for the whole batch, so we don't need the per-profile map
+                        // the external_chatbot path builds.
+                        const fallbackProfileId = pairConfig.audience_a[0];
+                        if (!fallbackProfileId) {
+                            throw new Error("Pair-mode dispatch requires --config <id>: the iteration has no audience profile to draw a default config_id from.");
+                        }
+                        const fallbackProfile = await client.get(`/tester-profiles/${fallbackProfileId}`);
+                        if (!fallbackProfile.simulation_config_id) {
+                            throw new Error(`Pair-mode dispatch requires a config_id. Profile ${fallbackProfileId} has no simulation config assigned and --config was not passed.\n` +
+                                "Use --config <id> to specify one, or assign a config to the profile.\n" +
+                                "List configs with: ish config list");
+                        }
+                        pairConfigId = fallbackProfile.simulation_config_id;
+                    }
+                    const simResult = await dispatchAttempt(() => client.post("/simulation/chat/pair/start/batch", {
+                        product_id: resolvedWorkspace,
+                        study_id: resolvedStudy,
+                        conversation_ids: pairConversationIds,
+                        config_id: pairConfigId,
+                        ...(maxTurns !== undefined && { max_turns: maxTurns }),
+                        ...(opts.language && { language: opts.language }),
+                    }, { timeout: dispatchTimeoutMs }));
+                    simResults = simResult.results;
+                }
+                else {
+                    const chatBatchItems = createdTesters.map((t, i) => ({
+                        study_id: resolvedStudy,
+                        tester_id: t.id,
+                        config_id: resolvedConfigOverride || profileConfigMap.get(profileIds[i]),
+                        ...(opts.language && { language: opts.language }),
+                    }));
+                    const simResult = await dispatchAttempt(() => client.post("/simulation/chat/start/batch", {
+                        product_id: resolvedWorkspace,
+                        simulations: chatBatchItems,
+                        ...(maxTurns !== undefined && { max_turns: maxTurns }),
+                        ...(opts.earlyTermination && { early_termination: true }),
+                    }, { timeout: dispatchTimeoutMs }));
+                    simResults = simResult.results;
+                }
             }
             else if (isMedia) {
                 const mediaBatchItems = createdTesters.map((t, i) => ({
@@ -507,7 +781,7 @@ Examples:
                 const simResult = await dispatchAttempt(() => client.post("/simulation/media/start/batch", {
                     product_id: resolvedWorkspace,
                     simulations: mediaBatchItems,
-                    ...(opts.maxInteractions && { max_interactions: parseMaxInteractions(opts.maxInteractions) }),
+                    max_interactions: resolveMaxInteractions(opts.maxInteractions, iteration.details),
                 }, { timeout: dispatchTimeoutMs }));
                 simResults = simResult.results;
             }
@@ -525,10 +799,78 @@ Examples:
                     platform: detailsView.platform || "browser",
                     ...(detailsView.url && { url: detailsView.url }),
                     screen_format: detailsView.screenFormat || "desktop",
-                    ...(opts.maxInteractions && { max_interactions: parseMaxInteractions(opts.maxInteractions) }),
+                    max_interactions: resolveMaxInteractions(opts.maxInteractions, iteration.details),
                 }, { timeout: dispatchTimeoutMs }));
                 simResults = simResult.results;
             }
+            // Pair-mode preview block: surface the audience sizes + scenario
+            // previews + initiator in the JSON envelope so agents can verify
+            // what they just dispatched without needing a follow-up
+            // `iteration get`. Mirrors the human confirmation block (which is
+            // skipped under -y or --json).
+            const pairPreviewTurns = opts.maxTurns
+                ? parseInt(opts.maxTurns, 10)
+                : (typeof iteration.details?.max_turns === "number"
+                    ? iteration.details.max_turns
+                    : 14);
+            const pairPreview = isPair && pairConfig ? {
+                mode: "tester_pair",
+                audience_a_size: pairConfig.audience_a.length,
+                audience_b_size: pairConfig.audience_b.length,
+                // Post-dispatch we know the actual conversation count from the
+                // pair-batch (or reuse) result. This is the authoritative number
+                // — better than guessing from audience length, which may diverge
+                // when the backend trims to the smaller side.
+                conversation_count: pairConversationIds.length,
+                conversation_ids: pairConversationIds,
+                // Scale preview: matches the backend's billing-preflight
+                // formula (chat_credit_cost(turns) * 2 * conv_count). Upper
+                // bound — early-termination may shorten actual turns. The CLI
+                // doesn't claim exact credit cost; just call magnitude.
+                max_turns: Number.isFinite(pairPreviewTurns) ? pairPreviewTurns : null,
+                llm_calls_upper_bound: Number.isFinite(pairPreviewTurns)
+                    ? pairConversationIds.length * pairPreviewTurns * 2
+                    : null,
+                // Credit cost upper bound — mirrors backend's chat_credit_cost × 2 × conv.
+                // Don't claim exactness; surface formula key so agents can branch
+                // on shape. Live rates will move to `GET /billing/rates` later.
+                credit_estimate: Number.isFinite(pairPreviewTurns)
+                    ? estimateChatPair({
+                        conversationCount: pairConversationIds.length,
+                        maxTurns: pairPreviewTurns,
+                    })
+                    : null,
+                initiator_side: pairConfig.initiator_side,
+                scenario_a_preview: pairConfig.scenario_a.replace(/\s+/g, " ").trim().slice(0, 200),
+                scenario_b_preview: pairConfig.scenario_b.replace(/\s+/g, " ").trim().slice(0, 200),
+                ...(pairConfig.role_criteria_a && { role_criteria_a: pairConfig.role_criteria_a }),
+                ...(pairConfig.role_criteria_b && { role_criteria_b: pairConfig.role_criteria_b }),
+            } : undefined;
+            // Non-pair credit estimate — surfaced as a top-level field in the
+            // JSON envelope alongside `pair_preview.credit_estimate`. Mirrors
+            // backend formulas (`media_credit_cost` / `chat_credit_cost`).
+            // null when we can't estimate (criteria-only audience, etc.).
+            const nonPairCreditEstimate = (() => {
+                if (isPair)
+                    return null;
+                const testerCount = createdTesters.length || profileIds.length;
+                if (testerCount <= 0)
+                    return null;
+                if (isChat) {
+                    const turns = opts.maxTurns
+                        ? parseInt(opts.maxTurns, 10)
+                        : (typeof iteration.details?.max_turns === "number"
+                            ? iteration.details.max_turns
+                            : 14);
+                    if (!Number.isFinite(turns))
+                        return null;
+                    return estimateChatSolo({ testerCount, maxTurns: turns });
+                }
+                const steps = resolveMaxInteractions(opts.maxInteractions, iteration.details);
+                if (!Number.isFinite(steps))
+                    return null;
+                return estimateMediaRun({ testerCount, maxInteractions: steps });
+            })();
             if (!opts.wait) {
                 if (globals.json) {
                     const testersOut = createdTesters.map((t) => ({
@@ -541,6 +883,9 @@ Examples:
                         testers: testersOut,
                         tester_ids: testersOut.map((t) => t.id),
                         tester_aliases: testersOut.map((t) => t.alias),
+                        url: getWebUrl(globals, `/${resolvedWorkspace}/${resolvedStudy}/timeline`),
+                        ...(pairPreview && { pair_preview: pairPreview }),
+                        ...(nonPairCreditEstimate && { credit_estimate: nonPairCreditEstimate }),
                         simulations: dedupeSimulations(simResults),
                     }, true);
                 }
@@ -577,6 +922,9 @@ Examples:
                     testers: testersOut,
                     tester_ids: testersOut.map((t) => t.id),
                     tester_aliases: testersOut.map((t) => t.alias),
+                    url: getWebUrl(globals, `/${resolvedWorkspace}/${resolvedStudy}/timeline`),
+                    ...(pairPreview && { pair_preview: pairPreview }),
+                    ...(nonPairCreditEstimate && { credit_estimate: nonPairCreditEstimate }),
                     simulations: dedupeSimulations(simResults),
                     results: rows,
                 }, true);