@ishlabs/cli 0.13.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,15 +11,33 @@ import * as readline from "node:readline/promises";
11
11
  import { withClient, getWebUrl, terminalLink, resolveWorkspace, resolveStudy, parseWaitTimeout, resolveAudienceProfileIds, addAudienceFilterFlags, hasAudienceFlags, } from "../lib/command-helpers.js";
12
12
  import { resolveId, tagAlias, ALIAS_PREFIX } from "../lib/alias-store.js";
13
13
  import { output, formatSimulationPoll } from "../lib/output.js";
14
- import { isMediaModality, isChatModality, iterationHasContent, describeRequiredContentFlag, } from "../lib/modality.js";
14
+ import { isMediaModality, isChatModality, iterationHasContent, describeRequiredContentFlag, readChatMode, readTesterPairConfig, summarizeRoleCriteria, } from "../lib/modality.js";
15
15
  import { runLocalSimulations } from "../lib/local-sim/loop.js";
16
16
  import { ensureBrowser } from "../lib/local-sim/install.js";
17
+ import { estimateChatPair, estimateChatSolo, estimateMediaRun } from "../lib/billing.js";
17
18
  function parseMaxInteractions(value) {
18
19
  const n = parseInt(value, 10);
19
20
  if (isNaN(n) || n < 1)
20
21
  throw new Error(`Invalid --max-interactions value: ${value}`);
21
22
  return n;
22
23
  }
24
+ /**
25
+ * Default cap the CLI sends when neither `--max-interactions` nor the
26
+ * iteration carries its own value. Picked to match the frontend's
27
+ * conservative interactive launchers and to prevent runaway spend when an
28
+ * iteration runs against a broken or non-responsive surface — without a
29
+ * cap, a stuck tester can rack up hundreds of steps before the SDK gives
30
+ * up.
31
+ */
32
+ const DEFAULT_MAX_INTERACTIONS = 20;
33
+ function resolveMaxInteractions(optsValue, iterationDetails) {
34
+ if (optsValue)
35
+ return parseMaxInteractions(optsValue);
36
+ if (typeof iterationDetails?.max_interactions === "number") {
37
+ return iterationDetails.max_interactions;
38
+ }
39
+ return DEFAULT_MAX_INTERACTIONS;
40
+ }
23
41
  function parseSlowMo(value) {
24
42
  const n = parseInt(value, 10);
25
43
  if (isNaN(n) || n < 0)
@@ -161,7 +179,7 @@ export function attachStudyRunCommands(study) {
161
179
  allFlagDescription: "Use every AI profile matching the filters (workspace-wide if no filters set)",
162
180
  })
163
181
  .option("--config <id>", "Simulation config ID (required for media unless every profile has one)")
164
- .option("--max-interactions <n>", "Max interactions per tester")
182
+ .option("--max-interactions <n>", `Max interactions per tester (interactive / media only). Precedence: flag > iteration's stored value > CLI default (${DEFAULT_MAX_INTERACTIONS}).`)
165
183
  .option("--max-turns <n>", "Max conversation turns per tester (chat studies only)")
166
184
  .option("--early-termination", "Allow chat agent to end the conversation early when goals are met (chat studies only)")
167
185
  .option("--language <lang>", "Language code (e.g. en, sv)")
@@ -207,6 +225,10 @@ Examples:
207
225
  # Override the simulation config (e.g. for a media study):
208
226
  $ ish study run --config c-c3c
209
227
 
228
+ # Cap interactions per tester (default 20 — pass higher to allow deeper
229
+ # exploration, lower to cap spend on a known-broken surface):
230
+ $ ish study run --max-interactions 30
231
+
210
232
  # Block until all simulations finish (or timeout):
211
233
  $ ish study run --wait
212
234
  $ ish study run --wait --timeout 600
@@ -262,6 +284,10 @@ Examples:
262
284
  const modality = study.modality || "interactive";
263
285
  const isMedia = isMediaModality(modality);
264
286
  const isChat = isChatModality(modality);
287
+ // Pair-mode (tester_pair) is read off the iteration once we've
288
+ // resolved it below; set defaults here so the value is in scope.
289
+ let chatMode = "external_chatbot";
290
+ let isPair = false;
265
291
  if (!study.assignments || study.assignments.length === 0) {
266
292
  throw new Error("Study has no assignments. Add tasks with --assignments when creating the study, or use `ish study generate`.");
267
293
  }
@@ -288,24 +314,57 @@ Examples:
288
314
  // auto-creates an empty iteration A; agents who don't pass
289
315
  // --iteration silently dispatch against it. Detect and refuse with
290
316
  // a clear suggestion rather than masking the problem.
317
+ if (isChat) {
318
+ chatMode = readChatMode(iteration.details);
319
+ isPair = chatMode === "tester_pair";
320
+ }
291
321
  if (!iterationHasContent(iteration.details, modality)) {
292
- const flagHint = describeRequiredContentFlag(modality);
322
+ const flagHint = describeRequiredContentFlag(modality, isPair ? "tester_pair" : undefined);
293
323
  const iterAlias = tagAlias(ALIAS_PREFIX.iteration, iterationId);
294
- throw new Error(`Iteration "${iterationLabel}" (${iterAlias}) has no ${isMedia ? "content" : "URL"} configured yet. ` +
295
- `Add ${isMedia ? "content" : "a URL"} with ` +
324
+ throw new Error(`Iteration "${iterationLabel}" (${iterAlias}) has no ${isMedia ? "content" : isPair ? "audiences/scenarios" : isChat ? "endpoint" : "URL"} configured yet. ` +
325
+ `Add ${isMedia ? "content" : isPair ? "the pair-mode payload" : isChat ? "an endpoint" : "a URL"} with ` +
296
326
  `\`ish iteration create --study ${resolvedStudy} ${flagHint}\` ` +
297
327
  `(or update the existing iteration via \`ish iteration update ${iterAlias} --details-json '{...}'\`), then retry.`);
298
328
  }
299
329
  const detailsView = readIterationDetails(iteration.details);
330
+ const pairConfig = isPair ? readTesterPairConfig(iteration.details) : undefined;
300
331
  // Step 2: Resolve audience.
301
332
  // - If any audience flag is set (--profile / --sample / --all / filter flags),
302
333
  // resolve a fresh ID list from the workspace pool via the shared helper.
303
334
  // - Otherwise reuse the iteration's existing testers.
335
+ // - For chat tester_pair iterations, audiences live inside the
336
+ // iteration's mode_details and are authoritative; run-time
337
+ // overrides are refused.
304
338
  const profileNames = new Map();
305
339
  const profileIds = [];
306
340
  const existingTesters = [];
307
341
  const audienceSet = hasAudienceFlags(opts);
308
- if (audienceSet) {
342
+ if (isPair) {
343
+ if (audienceSet) {
344
+ throw new Error("tester_pair chat iterations carry their own audiences inside mode_details; run-time audience overrides (--profile / --sample / --all / --country / --gender / --min-age / --max-age / --search / --visibility) are not supported. " +
345
+ "To change the audiences, update the iteration via `ish iteration update <id> --details-json '{...}'`.");
346
+ }
347
+ if (!pairConfig) {
348
+ throw new Error("Pair-mode iteration is missing mode_details; cannot dispatch.");
349
+ }
350
+ // Surface a flat profileIds[] (a then b) so downstream
351
+ // bookkeeping (config resolution, output) still has something to
352
+ // chew on. The pair-batch tester-provisioning POST below uses
353
+ // the split lists, not this flat one.
354
+ for (const pid of pairConfig.audience_a) {
355
+ if (!profileNames.has(pid)) {
356
+ profileNames.set(pid, "");
357
+ profileIds.push(pid);
358
+ }
359
+ }
360
+ for (const pid of pairConfig.audience_b) {
361
+ if (!profileNames.has(pid)) {
362
+ profileNames.set(pid, "");
363
+ profileIds.push(pid);
364
+ }
365
+ }
366
+ }
367
+ else if (audienceSet) {
309
368
  const resolved = await resolveAudienceProfileIds(client, resolvedWorkspace, opts, { requireSimulatable: false, allFlagName: "--all" });
310
369
  profileIds.push(...resolved);
311
370
  }
@@ -322,16 +381,28 @@ Examples:
322
381
  }
323
382
  }
324
383
  }
325
- const reuseExistingTesters = !audienceSet && existingTesters.length > 0;
326
- if (profileIds.length === 0) {
384
+ // Pair iterations always seed fresh testers via the pair-batch
385
+ // endpoint; never reuse a stale tester roster from a prior run.
386
+ const reuseExistingTesters = !isPair && !audienceSet && existingTesters.length > 0;
387
+ // Pair iterations with criteria-only audiences will have empty
388
+ // profileIds at this stage if the backend deferred resolution past
389
+ // iteration create. That's a valid state — skip the
390
+ // "no audience flags" guard for them and let dispatch surface any
391
+ // backend-side resolution errors (e.g. pool too small).
392
+ const pairCriteriaOnly = isPair && !!pairConfig && profileIds.length === 0
393
+ && (!!pairConfig.role_criteria_a || !!pairConfig.role_criteria_b);
394
+ if (profileIds.length === 0 && !pairCriteriaOnly) {
327
395
  throw new Error(`Iteration "${iterationLabel}" has no testers and no audience flags were given. ` +
328
396
  "Pass --profile <ids>, or filter flags (--country, --gender, --min-age, --max-age, --search, --visibility) with --sample <N> or --all.");
329
397
  }
330
398
  // Step 3: Resolve simulation config (per-profile fallback for
331
- // media + chat, both of which require a config_id per batch item)
399
+ // media + chat external_chatbot, both of which require a config_id
400
+ // per batch item). Pair-mode chat dispatch is per-conversation,
401
+ // not per-tester; the backend resolves configs via the tester rows
402
+ // it creates on /testers/pair-batch, so the CLI doesn't pre-fetch.
332
403
  const resolvedConfigOverride = opts.config ? resolveId(opts.config) : undefined;
333
404
  const profileConfigMap = new Map();
334
- if ((isMedia || isChat) && !resolvedConfigOverride) {
405
+ if ((isMedia || (isChat && !isPair)) && !resolvedConfigOverride) {
335
406
  for (const pid of profileIds) {
336
407
  const profile = await client.get(`/tester-profiles/${pid}`);
337
408
  if (profile.simulation_config_id) {
@@ -352,9 +423,63 @@ Examples:
352
423
  log(` Modality: ${modality}`);
353
424
  if (study.content_type)
354
425
  log(` Content type: ${study.content_type}`);
355
- if (isChat) {
356
- const epId = typeof iteration.details?.chatbot_endpoint_id === "string"
357
- ? iteration.details.chatbot_endpoint_id : undefined;
426
+ if (isPair && pairConfig) {
427
+ log(` Chat mode: tester_pair`);
428
+ // Audience description per side: prefer explicit count when
429
+ // present; otherwise show the criteria filter that the backend
430
+ // will resolve into a pool.
431
+ const describeSide = (audLen, crit) => {
432
+ if (audLen > 0)
433
+ return `${audLen} profile(s)${crit ? ` (criteria validates list)` : ""}`;
434
+ const summary = summarizeRoleCriteria(crit);
435
+ return summary ? `criteria (${summary}) — pool resolved server-side` : "—";
436
+ };
437
+ log(` Audience A: ${describeSide(pairConfig.audience_a.length, pairConfig.role_criteria_a)}`);
438
+ log(` Audience B: ${describeSide(pairConfig.audience_b.length, pairConfig.role_criteria_b)}`);
439
+ const explicitConvs = Math.min(pairConfig.audience_a.length, pairConfig.audience_b.length);
440
+ const criteriaResolved = !!pairConfig.role_criteria_a || !!pairConfig.role_criteria_b;
441
+ if (explicitConvs > 0 && !criteriaResolved) {
442
+ log(` Conversations: ${explicitConvs} (1:1 by index)`);
443
+ }
444
+ else {
445
+ log(` Conversations: resolved server-side from criteria`);
446
+ }
447
+ // Scale preview: rough LLM-call estimate so the user knows
448
+ // what they're committing to before --yes lands. Formula
449
+ // matches the backend's billing pre-flight
450
+ // (chat_credit_cost(turns) * 2 * conv_count, where the *2
451
+ // accounts for one LLM call per side per turn). Doesn't
452
+ // claim exact credit cost — just shape + magnitude.
453
+ const turnsEstimate = opts.maxTurns
454
+ ? parseInt(opts.maxTurns, 10)
455
+ : (typeof iteration.details?.max_turns === "number"
456
+ ? iteration.details.max_turns
457
+ : 14);
458
+ if (explicitConvs > 0 && !criteriaResolved && Number.isFinite(turnsEstimate)) {
459
+ const est = estimateChatPair({ conversationCount: explicitConvs, maxTurns: turnsEstimate });
460
+ log(` Scale: ${explicitConvs} conv × ${turnsEstimate} turns × 2 sides ≈ ${explicitConvs * turnsEstimate * 2} LLM calls (upper bound — early-termination may shorten)`);
461
+ log(` Credits (est): ≈ ${est.upper_bound} credit(s) upper bound — see \`ish docs get-page reference/credits\``);
462
+ }
463
+ else if (criteriaResolved) {
464
+ log(` Scale: ~N conv × ${turnsEstimate} turns × 2 sides — N resolved server-side`);
465
+ log(` Credits (est): N × max(1, round(${turnsEstimate}/10)) × 2 — N resolved server-side`);
466
+ }
467
+ log(` Initiator: side ${pairConfig.initiator_side}`);
468
+ const scenAPreview = pairConfig.scenario_a.replace(/\s+/g, " ").trim().slice(0, 60);
469
+ const scenBPreview = pairConfig.scenario_b.replace(/\s+/g, " ").trim().slice(0, 60);
470
+ log(` Scenario A: ${scenAPreview}${pairConfig.scenario_a.length > 60 ? "…" : ""}`);
471
+ log(` Scenario B: ${scenBPreview}${pairConfig.scenario_b.length > 60 ? "…" : ""}`);
472
+ if (opts.maxTurns)
473
+ log(` Max turns: ${opts.maxTurns}`);
474
+ if (opts.earlyTermination)
475
+ log(` Early term: enabled`);
476
+ }
477
+ else if (isChat) {
478
+ const md = iteration.details?.mode_details;
479
+ const epId = (typeof md?.chatbot_endpoint_id === "string" && md.chatbot_endpoint_id)
480
+ || (typeof iteration.details?.chatbot_endpoint_id === "string"
481
+ ? iteration.details.chatbot_endpoint_id
482
+ : undefined);
358
483
  if (epId)
359
484
  log(` Endpoint: ${epId}`);
360
485
  if (opts.maxTurns)
@@ -375,10 +500,40 @@ Examples:
375
500
  log(` Config: ${resolvedConfigOverride}`);
376
501
  if (opts.language)
377
502
  log(` Language: ${opts.language}`);
378
- log(` Profiles (${profileIds.length}):`);
379
- for (const pid of profileIds) {
380
- const name = profileNames.get(pid);
381
- log(` - ${name ? `${name} (${pid})` : pid}`);
503
+ if (!isPair) {
504
+ log(` Profiles (${profileIds.length}):`);
505
+ for (const pid of profileIds) {
506
+ const name = profileNames.get(pid);
507
+ log(` - ${name ? `${name} (${pid})` : pid}`);
508
+ }
509
+ const testerCount = profileIds.length;
510
+ if (testerCount > 0) {
511
+ if (isChat) {
512
+ const turnsForChat = opts.maxTurns
513
+ ? parseInt(opts.maxTurns, 10)
514
+ : (typeof iteration.details?.max_turns === "number"
515
+ ? iteration.details.max_turns
516
+ : 14);
517
+ if (Number.isFinite(turnsForChat)) {
518
+ const est = estimateChatSolo({ testerCount, maxTurns: turnsForChat });
519
+ log(` Credits (est): ≈ ${est.upper_bound} credit(s) upper bound — ${est.breakdown}`);
520
+ }
521
+ }
522
+ else {
523
+ const stepsForMedia = resolveMaxInteractions(opts.maxInteractions, iteration.details);
524
+ const source = opts.maxInteractions
525
+ ? "from --max-interactions"
526
+ : typeof iteration.details?.max_interactions === "number"
527
+ ? "from iteration"
528
+ : `CLI default — pass --max-interactions to override`;
529
+ log(` Max steps: ${stepsForMedia} (${source})`);
530
+ if (Number.isFinite(stepsForMedia)) {
531
+ const est = estimateMediaRun({ testerCount, maxInteractions: stepsForMedia });
532
+ log(` Credits (est): ≈ ${est.upper_bound} credit(s) upper bound — ${est.breakdown}`);
533
+ }
534
+ }
535
+ log(` See \`ish docs get-page reference/credits\` for formula.`);
536
+ }
382
537
  }
383
538
  log("");
384
539
  const rl = readline.createInterface({ input: process.stdin, output: process.stderr });
@@ -395,7 +550,83 @@ Examples:
395
550
  }
396
551
  // Step 5: Either reuse the iteration's testers or batch-create new ones
397
552
  let createdTesters;
398
- if (reuseExistingTesters && existingTesters.length > 0) {
553
+ // Pair-mode bookkeeping: the dispatch endpoint takes
554
+ // `conversation_ids`, not tester ids. We populate this list either
555
+ // by reusing the iteration's existing Conversation rows or by
556
+ // calling pair-batch.
557
+ let pairConversationIds = [];
558
+ if (isPair && pairConfig) {
559
+ // Pair-mode flow mirrors the MCP (`ish-mcp` `_run_pair_mode`):
560
+ // 1. If the iteration already carries `conversations[]` from a
561
+ // prior dispatch, reuse them — skip pair-batch entirely.
562
+ // 2. Otherwise call pair-batch with the resolved
563
+ // audience UUID lists. Criteria-only iterations should
564
+ // already have audiences materialised at iteration-create
565
+ // time; if they're still empty here, the backend's
566
+ // `PairAudienceResolutionError` is the authoritative
567
+ // failure mode — refuse before hitting pair-batch.
568
+ //
569
+ // Wire shapes per backend `app/api/iterations/routers`:
570
+ // POST /iterations/{id}/testers/pair-batch
571
+ // body : { side_a: UUID[1..20], side_b: UUID[1..20] (equal len),
572
+ // language?: str }
573
+ // reply : { conversations: [{ conversation_id, pair_index,
574
+ // tester_a_id, tester_b_id }] }
575
+ const existingConvs = iteration.conversations ?? [];
576
+ const reusable = [];
577
+ for (const c of existingConvs) {
578
+ const cid = c.conversation_id || c.id;
579
+ if (cid && c.tester_a_id && c.tester_b_id) {
580
+ reusable.push({ conversation_id: cid, tester_a_id: c.tester_a_id, tester_b_id: c.tester_b_id });
581
+ }
582
+ }
583
+ let pairRows;
584
+ if (reusable.length > 0) {
585
+ pairRows = reusable;
586
+ log(`Reusing ${reusable.length} existing conversation${reusable.length > 1 ? "s" : ""} on iteration "${iterationLabel}"`);
587
+ }
588
+ else {
589
+ if (pairConfig.audience_a.length === 0 || pairConfig.audience_b.length === 0) {
590
+ throw new Error("Pair-mode iteration has empty audience_a / audience_b and no conversations yet. " +
591
+ "If this iteration was created with --role-criteria-a/-b, the backend should have " +
592
+ "resolved a profile pool at create time — try `ish iteration get <id>` to fetch a " +
593
+ "fresh shape, or recreate with explicit --profile-a/-b.");
594
+ }
595
+ log(`Provisioning ${pairConfig.audience_a.length} pair conversation${pairConfig.audience_a.length > 1 ? "s" : ""}...`);
596
+ const pairBatchResult = await client.post(`/iterations/${iterationId}/testers/pair-batch`, {
597
+ side_a: pairConfig.audience_a,
598
+ side_b: pairConfig.audience_b,
599
+ ...(opts.language && { language: opts.language }),
600
+ }, { timeout: dispatchTimeoutMs });
601
+ pairRows = (pairBatchResult.conversations ?? []).map((c) => ({
602
+ conversation_id: c.conversation_id,
603
+ tester_a_id: c.tester_a_id,
604
+ tester_b_id: c.tester_b_id,
605
+ }));
606
+ if (pairRows.length === 0) {
607
+ throw new Error("Pair-batch returned no conversations. The backend response did not include any conversation IDs.");
608
+ }
609
+ log(`Created ${pairRows.length * 2} testers (${pairRows.length} conversation${pairRows.length > 1 ? "s" : ""})`);
610
+ }
611
+ pairConversationIds = pairRows.map((r) => r.conversation_id);
612
+ // Flatten both sides' tester IDs for downstream bookkeeping:
613
+ // error-tagging (`seeded_but_not_dispatched_ids`), poll filtering,
614
+ // and JSON output. Names aren't returned by pair-batch; agents
615
+ // who care can correlate via `ish iteration get <id>`.
616
+ createdTesters = [];
617
+ for (let i = 0; i < pairRows.length; i++) {
618
+ const row = pairRows[i];
619
+ createdTesters.push({
620
+ id: row.tester_a_id,
621
+ tester_profile: { name: `pair ${i} side A` },
622
+ });
623
+ createdTesters.push({
624
+ id: row.tester_b_id,
625
+ tester_profile: { name: `pair ${i} side B` },
626
+ });
627
+ }
628
+ }
629
+ else if (reuseExistingTesters && existingTesters.length > 0) {
399
630
  createdTesters = existingTesters;
400
631
  log(`Reusing ${createdTesters.length} existing tester${createdTesters.length > 1 ? "s" : ""} from iteration "${iterationLabel}"`);
401
632
  }
@@ -430,7 +661,7 @@ Examples:
430
661
  url: detailsView.url,
431
662
  screenFormat: detailsView.screenFormat,
432
663
  locale: detailsView.locale,
433
- maxInteractions: opts.maxInteractions ? parseMaxInteractions(opts.maxInteractions) : undefined,
664
+ maxInteractions: resolveMaxInteractions(opts.maxInteractions, iteration.details),
434
665
  headed: !!opts.headed,
435
666
  slowMo: opts.slowMo ? parseSlowMo(opts.slowMo) : undefined,
436
667
  devtools: opts.devtools,
@@ -479,23 +710,66 @@ Examples:
479
710
  }
480
711
  };
481
712
  if (isChat) {
482
- const chatBatchItems = createdTesters.map((t, i) => ({
483
- study_id: resolvedStudy,
484
- tester_id: t.id,
485
- config_id: resolvedConfigOverride || profileConfigMap.get(profileIds[i]),
486
- ...(opts.language && { language: opts.language }),
487
- }));
488
713
  const maxTurns = opts.maxTurns ? parseInt(opts.maxTurns, 10) : undefined;
489
714
  if (opts.maxTurns !== undefined && (Number.isNaN(maxTurns) || maxTurns < 1)) {
490
715
  throw new Error(`Invalid --max-turns value: ${opts.maxTurns}`);
491
716
  }
492
- const simResult = await dispatchAttempt(() => client.post("/simulation/chat/start/batch", {
493
- product_id: resolvedWorkspace,
494
- simulations: chatBatchItems,
495
- ...(maxTurns !== undefined && { max_turns: maxTurns }),
496
- ...(opts.earlyTermination && { early_termination: true }),
497
- }, { timeout: dispatchTimeoutMs }));
498
- simResults = simResult.results;
717
+ if (isPair) {
718
+ if (!pairConfig || pairConversationIds.length === 0) {
719
+ throw new Error("Pair-mode dispatch reached without provisioned conversations — internal invariant violation.");
720
+ }
721
+ // Pair-mode dispatch (backend
722
+ // `app/api/simulation/routers/chat.py`):
723
+ // POST /simulation/chat/pair/start/batch
724
+ // body : { product_id, study_id,
725
+ // conversation_ids: UUID[1..20],
726
+ // config_id, # singular per batch
727
+ // max_turns?, language?, config_overrides? }
728
+ // One Cloud Task per conversation_id. Billing is
729
+ // chat_credit_cost(max_turns) * 2 * len(conversation_ids).
730
+ let pairConfigId = resolvedConfigOverride;
731
+ if (!pairConfigId) {
732
+ // Fall back to the first audience_a profile's
733
+ // simulation_config_id. Pair dispatch takes a single config
734
+ // for the whole batch, so we don't need the per-profile map
735
+ // the external_chatbot path builds.
736
+ const fallbackProfileId = pairConfig.audience_a[0];
737
+ if (!fallbackProfileId) {
738
+ throw new Error("Pair-mode dispatch requires --config <id>: the iteration has no audience profile to draw a default config_id from.");
739
+ }
740
+ const fallbackProfile = await client.get(`/tester-profiles/${fallbackProfileId}`);
741
+ if (!fallbackProfile.simulation_config_id) {
742
+ throw new Error(`Pair-mode dispatch requires a config_id. Profile ${fallbackProfileId} has no simulation config assigned and --config was not passed.\n` +
743
+ "Use --config <id> to specify one, or assign a config to the profile.\n" +
744
+ "List configs with: ish config list");
745
+ }
746
+ pairConfigId = fallbackProfile.simulation_config_id;
747
+ }
748
+ const simResult = await dispatchAttempt(() => client.post("/simulation/chat/pair/start/batch", {
749
+ product_id: resolvedWorkspace,
750
+ study_id: resolvedStudy,
751
+ conversation_ids: pairConversationIds,
752
+ config_id: pairConfigId,
753
+ ...(maxTurns !== undefined && { max_turns: maxTurns }),
754
+ ...(opts.language && { language: opts.language }),
755
+ }, { timeout: dispatchTimeoutMs }));
756
+ simResults = simResult.results;
757
+ }
758
+ else {
759
+ const chatBatchItems = createdTesters.map((t, i) => ({
760
+ study_id: resolvedStudy,
761
+ tester_id: t.id,
762
+ config_id: resolvedConfigOverride || profileConfigMap.get(profileIds[i]),
763
+ ...(opts.language && { language: opts.language }),
764
+ }));
765
+ const simResult = await dispatchAttempt(() => client.post("/simulation/chat/start/batch", {
766
+ product_id: resolvedWorkspace,
767
+ simulations: chatBatchItems,
768
+ ...(maxTurns !== undefined && { max_turns: maxTurns }),
769
+ ...(opts.earlyTermination && { early_termination: true }),
770
+ }, { timeout: dispatchTimeoutMs }));
771
+ simResults = simResult.results;
772
+ }
499
773
  }
500
774
  else if (isMedia) {
501
775
  const mediaBatchItems = createdTesters.map((t, i) => ({
@@ -507,7 +781,7 @@ Examples:
507
781
  const simResult = await dispatchAttempt(() => client.post("/simulation/media/start/batch", {
508
782
  product_id: resolvedWorkspace,
509
783
  simulations: mediaBatchItems,
510
- ...(opts.maxInteractions && { max_interactions: parseMaxInteractions(opts.maxInteractions) }),
784
+ max_interactions: resolveMaxInteractions(opts.maxInteractions, iteration.details),
511
785
  }, { timeout: dispatchTimeoutMs }));
512
786
  simResults = simResult.results;
513
787
  }
@@ -525,10 +799,78 @@ Examples:
525
799
  platform: detailsView.platform || "browser",
526
800
  ...(detailsView.url && { url: detailsView.url }),
527
801
  screen_format: detailsView.screenFormat || "desktop",
528
- ...(opts.maxInteractions && { max_interactions: parseMaxInteractions(opts.maxInteractions) }),
802
+ max_interactions: resolveMaxInteractions(opts.maxInteractions, iteration.details),
529
803
  }, { timeout: dispatchTimeoutMs }));
530
804
  simResults = simResult.results;
531
805
  }
806
+ // Pair-mode preview block: surface the audience sizes + scenario
807
+ // previews + initiator in the JSON envelope so agents can verify
808
+ // what they just dispatched without needing a follow-up
809
+ // `iteration get`. Mirrors the human confirmation block (which is
810
+ // skipped under -y or --json).
811
+ const pairPreviewTurns = opts.maxTurns
812
+ ? parseInt(opts.maxTurns, 10)
813
+ : (typeof iteration.details?.max_turns === "number"
814
+ ? iteration.details.max_turns
815
+ : 14);
816
+ const pairPreview = isPair && pairConfig ? {
817
+ mode: "tester_pair",
818
+ audience_a_size: pairConfig.audience_a.length,
819
+ audience_b_size: pairConfig.audience_b.length,
820
+ // Post-dispatch we know the actual conversation count from the
821
+ // pair-batch (or reuse) result. This is the authoritative number
822
+ // — better than guessing from audience length, which may diverge
823
+ // when the backend trims to the smaller side.
824
+ conversation_count: pairConversationIds.length,
825
+ conversation_ids: pairConversationIds,
826
+ // Scale preview: matches the backend's billing-preflight
827
+ // formula (chat_credit_cost(turns) * 2 * conv_count). Upper
828
+ // bound — early-termination may shorten actual turns. The CLI
829
+ // doesn't claim exact credit cost; just call magnitude.
830
+ max_turns: Number.isFinite(pairPreviewTurns) ? pairPreviewTurns : null,
831
+ llm_calls_upper_bound: Number.isFinite(pairPreviewTurns)
832
+ ? pairConversationIds.length * pairPreviewTurns * 2
833
+ : null,
834
+ // Credit cost upper bound — mirrors backend's chat_credit_cost × 2 × conv.
835
+ // Don't claim exactness; surface formula key so agents can branch
836
+ // on shape. Live rates will move to `GET /billing/rates` later.
837
+ credit_estimate: Number.isFinite(pairPreviewTurns)
838
+ ? estimateChatPair({
839
+ conversationCount: pairConversationIds.length,
840
+ maxTurns: pairPreviewTurns,
841
+ })
842
+ : null,
843
+ initiator_side: pairConfig.initiator_side,
844
+ scenario_a_preview: pairConfig.scenario_a.replace(/\s+/g, " ").trim().slice(0, 200),
845
+ scenario_b_preview: pairConfig.scenario_b.replace(/\s+/g, " ").trim().slice(0, 200),
846
+ ...(pairConfig.role_criteria_a && { role_criteria_a: pairConfig.role_criteria_a }),
847
+ ...(pairConfig.role_criteria_b && { role_criteria_b: pairConfig.role_criteria_b }),
848
+ } : undefined;
849
+ // Non-pair credit estimate — surfaced as a top-level field in the
850
+ // JSON envelope alongside `pair_preview.credit_estimate`. Mirrors
851
+ // backend formulas (`media_credit_cost` / `chat_credit_cost`).
852
+ // null when we can't estimate (criteria-only audience, etc.).
853
+ const nonPairCreditEstimate = (() => {
854
+ if (isPair)
855
+ return null;
856
+ const testerCount = createdTesters.length || profileIds.length;
857
+ if (testerCount <= 0)
858
+ return null;
859
+ if (isChat) {
860
+ const turns = opts.maxTurns
861
+ ? parseInt(opts.maxTurns, 10)
862
+ : (typeof iteration.details?.max_turns === "number"
863
+ ? iteration.details.max_turns
864
+ : 14);
865
+ if (!Number.isFinite(turns))
866
+ return null;
867
+ return estimateChatSolo({ testerCount, maxTurns: turns });
868
+ }
869
+ const steps = resolveMaxInteractions(opts.maxInteractions, iteration.details);
870
+ if (!Number.isFinite(steps))
871
+ return null;
872
+ return estimateMediaRun({ testerCount, maxInteractions: steps });
873
+ })();
532
874
  if (!opts.wait) {
533
875
  if (globals.json) {
534
876
  const testersOut = createdTesters.map((t) => ({
@@ -541,6 +883,9 @@ Examples:
541
883
  testers: testersOut,
542
884
  tester_ids: testersOut.map((t) => t.id),
543
885
  tester_aliases: testersOut.map((t) => t.alias),
886
+ url: getWebUrl(globals, `/${resolvedWorkspace}/${resolvedStudy}/timeline`),
887
+ ...(pairPreview && { pair_preview: pairPreview }),
888
+ ...(nonPairCreditEstimate && { credit_estimate: nonPairCreditEstimate }),
544
889
  simulations: dedupeSimulations(simResults),
545
890
  }, true);
546
891
  }
@@ -577,6 +922,9 @@ Examples:
577
922
  testers: testersOut,
578
923
  tester_ids: testersOut.map((t) => t.id),
579
924
  tester_aliases: testersOut.map((t) => t.alias),
925
+ url: getWebUrl(globals, `/${resolvedWorkspace}/${resolvedStudy}/timeline`),
926
+ ...(pairPreview && { pair_preview: pairPreview }),
927
+ ...(nonPairCreditEstimate && { credit_estimate: nonPairCreditEstimate }),
580
928
  simulations: dedupeSimulations(simResults),
581
929
  results: rows,
582
930
  }, true);