@ishlabs/cli 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,9 +11,10 @@ import * as readline from "node:readline/promises";
11
11
  import { withClient, getWebUrl, terminalLink, resolveWorkspace, resolveStudy, parseWaitTimeout, resolveAudienceProfileIds, addAudienceFilterFlags, hasAudienceFlags, } from "../lib/command-helpers.js";
12
12
  import { resolveId, tagAlias, ALIAS_PREFIX } from "../lib/alias-store.js";
13
13
  import { output, formatSimulationPoll } from "../lib/output.js";
14
- import { isMediaModality, isChatModality, iterationHasContent, describeRequiredContentFlag, } from "../lib/modality.js";
14
+ import { isMediaModality, isChatModality, iterationHasContent, describeRequiredContentFlag, readChatMode, readTesterPairConfig, summarizeRoleCriteria, } from "../lib/modality.js";
15
15
  import { runLocalSimulations } from "../lib/local-sim/loop.js";
16
16
  import { ensureBrowser } from "../lib/local-sim/install.js";
17
+ import { estimateChatPair, estimateChatSolo, estimateMediaRun } from "../lib/billing.js";
17
18
  function parseMaxInteractions(value) {
18
19
  const n = parseInt(value, 10);
19
20
  if (isNaN(n) || n < 1)
@@ -262,6 +263,10 @@ Examples:
262
263
  const modality = study.modality || "interactive";
263
264
  const isMedia = isMediaModality(modality);
264
265
  const isChat = isChatModality(modality);
266
+ // Pair-mode (tester_pair) is read off the iteration once we've
267
+ // resolved it below; set defaults here so the value is in scope.
268
+ let chatMode = "external_chatbot";
269
+ let isPair = false;
265
270
  if (!study.assignments || study.assignments.length === 0) {
266
271
  throw new Error("Study has no assignments. Add tasks with --assignments when creating the study, or use `ish study generate`.");
267
272
  }
@@ -288,24 +293,57 @@ Examples:
288
293
  // auto-creates an empty iteration A; agents who don't pass
289
294
  // --iteration silently dispatch against it. Detect and refuse with
290
295
  // a clear suggestion rather than masking the problem.
296
+ if (isChat) {
297
+ chatMode = readChatMode(iteration.details);
298
+ isPair = chatMode === "tester_pair";
299
+ }
291
300
  if (!iterationHasContent(iteration.details, modality)) {
292
- const flagHint = describeRequiredContentFlag(modality);
301
+ const flagHint = describeRequiredContentFlag(modality, isPair ? "tester_pair" : undefined);
293
302
  const iterAlias = tagAlias(ALIAS_PREFIX.iteration, iterationId);
294
- throw new Error(`Iteration "${iterationLabel}" (${iterAlias}) has no ${isMedia ? "content" : "URL"} configured yet. ` +
295
- `Add ${isMedia ? "content" : "a URL"} with ` +
303
+ throw new Error(`Iteration "${iterationLabel}" (${iterAlias}) has no ${isMedia ? "content" : isPair ? "audiences/scenarios" : isChat ? "endpoint" : "URL"} configured yet. ` +
304
+ `Add ${isMedia ? "content" : isPair ? "the pair-mode payload" : isChat ? "an endpoint" : "a URL"} with ` +
296
305
  `\`ish iteration create --study ${resolvedStudy} ${flagHint}\` ` +
297
306
  `(or update the existing iteration via \`ish iteration update ${iterAlias} --details-json '{...}'\`), then retry.`);
298
307
  }
299
308
  const detailsView = readIterationDetails(iteration.details);
309
+ const pairConfig = isPair ? readTesterPairConfig(iteration.details) : undefined;
300
310
  // Step 2: Resolve audience.
301
311
  // - If any audience flag is set (--profile / --sample / --all / filter flags),
302
312
  // resolve a fresh ID list from the workspace pool via the shared helper.
303
313
  // - Otherwise reuse the iteration's existing testers.
314
+ // - For chat tester_pair iterations, audiences live inside the
315
+ // iteration's mode_details and are authoritative; run-time
316
+ // overrides are refused.
304
317
  const profileNames = new Map();
305
318
  const profileIds = [];
306
319
  const existingTesters = [];
307
320
  const audienceSet = hasAudienceFlags(opts);
308
- if (audienceSet) {
321
+ if (isPair) {
322
+ if (audienceSet) {
323
+ throw new Error("tester_pair chat iterations carry their own audiences inside mode_details; run-time audience overrides (--profile / --sample / --all / --country / --gender / --min-age / --max-age / --search / --visibility) are not supported. " +
324
+ "To change the audiences, update the iteration via `ish iteration update <id> --details-json '{...}'`.");
325
+ }
326
+ if (!pairConfig) {
327
+ throw new Error("Pair-mode iteration is missing mode_details; cannot dispatch.");
328
+ }
329
+ // Surface a flat profileIds[] (a then b) so downstream
330
+ // bookkeeping (config resolution, output) still has something to
331
+ // chew on. The pair-batch tester-provisioning POST below uses
332
+ // the split lists, not this flat one.
333
+ for (const pid of pairConfig.audience_a) {
334
+ if (!profileNames.has(pid)) {
335
+ profileNames.set(pid, "");
336
+ profileIds.push(pid);
337
+ }
338
+ }
339
+ for (const pid of pairConfig.audience_b) {
340
+ if (!profileNames.has(pid)) {
341
+ profileNames.set(pid, "");
342
+ profileIds.push(pid);
343
+ }
344
+ }
345
+ }
346
+ else if (audienceSet) {
309
347
  const resolved = await resolveAudienceProfileIds(client, resolvedWorkspace, opts, { requireSimulatable: false, allFlagName: "--all" });
310
348
  profileIds.push(...resolved);
311
349
  }
@@ -322,16 +360,28 @@ Examples:
322
360
  }
323
361
  }
324
362
  }
325
- const reuseExistingTesters = !audienceSet && existingTesters.length > 0;
326
- if (profileIds.length === 0) {
363
+ // Pair iterations always seed fresh testers via the pair-batch
364
+ // endpoint; never reuse a stale tester roster from a prior run.
365
+ const reuseExistingTesters = !isPair && !audienceSet && existingTesters.length > 0;
366
+ // Pair iterations with criteria-only audiences will have empty
367
+ // profileIds at this stage if the backend deferred resolution past
368
+ // iteration create. That's a valid state — skip the
369
+ // "no audience flags" guard for them and let dispatch surface any
370
+ // backend-side resolution errors (e.g. pool too small).
371
+ const pairCriteriaOnly = isPair && !!pairConfig && profileIds.length === 0
372
+ && (!!pairConfig.role_criteria_a || !!pairConfig.role_criteria_b);
373
+ if (profileIds.length === 0 && !pairCriteriaOnly) {
327
374
  throw new Error(`Iteration "${iterationLabel}" has no testers and no audience flags were given. ` +
328
375
  "Pass --profile <ids>, or filter flags (--country, --gender, --min-age, --max-age, --search, --visibility) with --sample <N> or --all.");
329
376
  }
330
377
  // Step 3: Resolve simulation config (per-profile fallback for
331
- // media + chat, both of which require a config_id per batch item)
378
+ // media + chat external_chatbot, both of which require a config_id
379
+ // per batch item). Pair-mode chat dispatch is per-conversation,
380
+ // not per-tester; the backend resolves configs via the tester rows
381
+ // it creates on /testers/pair-batch, so the CLI doesn't pre-fetch.
332
382
  const resolvedConfigOverride = opts.config ? resolveId(opts.config) : undefined;
333
383
  const profileConfigMap = new Map();
334
- if ((isMedia || isChat) && !resolvedConfigOverride) {
384
+ if ((isMedia || (isChat && !isPair)) && !resolvedConfigOverride) {
335
385
  for (const pid of profileIds) {
336
386
  const profile = await client.get(`/tester-profiles/${pid}`);
337
387
  if (profile.simulation_config_id) {
@@ -352,9 +402,63 @@ Examples:
352
402
  log(` Modality: ${modality}`);
353
403
  if (study.content_type)
354
404
  log(` Content type: ${study.content_type}`);
355
- if (isChat) {
356
- const epId = typeof iteration.details?.chatbot_endpoint_id === "string"
357
- ? iteration.details.chatbot_endpoint_id : undefined;
405
+ if (isPair && pairConfig) {
406
+ log(` Chat mode: tester_pair`);
407
+ // Audience description per side: prefer explicit count when
408
+ // present; otherwise show the criteria filter that the backend
409
+ // will resolve into a pool.
410
+ const describeSide = (audLen, crit) => {
411
+ if (audLen > 0)
412
+ return `${audLen} profile(s)${crit ? ` (criteria validates list)` : ""}`;
413
+ const summary = summarizeRoleCriteria(crit);
414
+ return summary ? `criteria (${summary}) — pool resolved server-side` : "—";
415
+ };
416
+ log(` Audience A: ${describeSide(pairConfig.audience_a.length, pairConfig.role_criteria_a)}`);
417
+ log(` Audience B: ${describeSide(pairConfig.audience_b.length, pairConfig.role_criteria_b)}`);
418
+ const explicitConvs = Math.min(pairConfig.audience_a.length, pairConfig.audience_b.length);
419
+ const criteriaResolved = !!pairConfig.role_criteria_a || !!pairConfig.role_criteria_b;
420
+ if (explicitConvs > 0 && !criteriaResolved) {
421
+ log(` Conversations: ${explicitConvs} (1:1 by index)`);
422
+ }
423
+ else {
424
+ log(` Conversations: resolved server-side from criteria`);
425
+ }
426
+ // Scale preview: rough LLM-call estimate so the user knows
427
+ // what they're committing to before --yes lands. Formula
428
+ // matches the backend's billing pre-flight
429
+ // (chat_credit_cost(turns) * 2 * conv_count, where the *2
430
+ // accounts for one LLM call per side per turn). Doesn't
431
+ // claim exact credit cost — just shape + magnitude.
432
+ const turnsEstimate = opts.maxTurns
433
+ ? parseInt(opts.maxTurns, 10)
434
+ : (typeof iteration.details?.max_turns === "number"
435
+ ? iteration.details.max_turns
436
+ : 14);
437
+ if (explicitConvs > 0 && !criteriaResolved && Number.isFinite(turnsEstimate)) {
438
+ const est = estimateChatPair({ conversationCount: explicitConvs, maxTurns: turnsEstimate });
439
+ log(` Scale: ${explicitConvs} conv × ${turnsEstimate} turns × 2 sides ≈ ${explicitConvs * turnsEstimate * 2} LLM calls (upper bound — early-termination may shorten)`);
440
+ log(` Credits (est): ≈ ${est.upper_bound} credit(s) upper bound — see \`ish docs get-page reference/credits\``);
441
+ }
442
+ else if (criteriaResolved) {
443
+ log(` Scale: ~N conv × ${turnsEstimate} turns × 2 sides — N resolved server-side`);
444
+ log(` Credits (est): N × max(1, round(${turnsEstimate}/10)) × 2 — N resolved server-side`);
445
+ }
446
+ log(` Initiator: side ${pairConfig.initiator_side}`);
447
+ const scenAPreview = pairConfig.scenario_a.replace(/\s+/g, " ").trim().slice(0, 60);
448
+ const scenBPreview = pairConfig.scenario_b.replace(/\s+/g, " ").trim().slice(0, 60);
449
+ log(` Scenario A: ${scenAPreview}${pairConfig.scenario_a.length > 60 ? "…" : ""}`);
450
+ log(` Scenario B: ${scenBPreview}${pairConfig.scenario_b.length > 60 ? "…" : ""}`);
451
+ if (opts.maxTurns)
452
+ log(` Max turns: ${opts.maxTurns}`);
453
+ if (opts.earlyTermination)
454
+ log(` Early term: enabled`);
455
+ }
456
+ else if (isChat) {
457
+ const md = iteration.details?.mode_details;
458
+ const epId = (typeof md?.chatbot_endpoint_id === "string" && md.chatbot_endpoint_id)
459
+ || (typeof iteration.details?.chatbot_endpoint_id === "string"
460
+ ? iteration.details.chatbot_endpoint_id
461
+ : undefined);
358
462
  if (epId)
359
463
  log(` Endpoint: ${epId}`);
360
464
  if (opts.maxTurns)
@@ -375,10 +479,38 @@ Examples:
375
479
  log(` Config: ${resolvedConfigOverride}`);
376
480
  if (opts.language)
377
481
  log(` Language: ${opts.language}`);
378
- log(` Profiles (${profileIds.length}):`);
379
- for (const pid of profileIds) {
380
- const name = profileNames.get(pid);
381
- log(` - ${name ? `${name} (${pid})` : pid}`);
482
+ if (!isPair) {
483
+ log(` Profiles (${profileIds.length}):`);
484
+ for (const pid of profileIds) {
485
+ const name = profileNames.get(pid);
486
+ log(` - ${name ? `${name} (${pid})` : pid}`);
487
+ }
488
+ const testerCount = profileIds.length;
489
+ if (testerCount > 0) {
490
+ if (isChat) {
491
+ const turnsForChat = opts.maxTurns
492
+ ? parseInt(opts.maxTurns, 10)
493
+ : (typeof iteration.details?.max_turns === "number"
494
+ ? iteration.details.max_turns
495
+ : 14);
496
+ if (Number.isFinite(turnsForChat)) {
497
+ const est = estimateChatSolo({ testerCount, maxTurns: turnsForChat });
498
+ log(` Credits (est): ≈ ${est.upper_bound} credit(s) upper bound — ${est.breakdown}`);
499
+ }
500
+ }
501
+ else {
502
+ const stepsForMedia = opts.maxInteractions
503
+ ? parseMaxInteractions(opts.maxInteractions)
504
+ : (typeof iteration.details?.max_interactions === "number"
505
+ ? iteration.details.max_interactions
506
+ : 30);
507
+ if (Number.isFinite(stepsForMedia)) {
508
+ const est = estimateMediaRun({ testerCount, maxInteractions: stepsForMedia });
509
+ log(` Credits (est): ≈ ${est.upper_bound} credit(s) upper bound — ${est.breakdown}`);
510
+ }
511
+ }
512
+ log(` See \`ish docs get-page reference/credits\` for formula.`);
513
+ }
382
514
  }
383
515
  log("");
384
516
  const rl = readline.createInterface({ input: process.stdin, output: process.stderr });
@@ -395,7 +527,83 @@ Examples:
395
527
  }
396
528
  // Step 5: Either reuse the iteration's testers or batch-create new ones
397
529
  let createdTesters;
398
- if (reuseExistingTesters && existingTesters.length > 0) {
530
+ // Pair-mode bookkeeping: the dispatch endpoint takes
531
+ // `conversation_ids`, not tester ids. We populate this list either
532
+ // by reusing the iteration's existing Conversation rows or by
533
+ // calling pair-batch.
534
+ let pairConversationIds = [];
535
+ if (isPair && pairConfig) {
536
+ // Pair-mode flow mirrors the MCP (`ish-mcp` `_run_pair_mode`):
537
+ // 1. If the iteration already carries `conversations[]` from a
538
+ // prior dispatch, reuse them — skip pair-batch entirely.
539
+ // 2. Otherwise call pair-batch with the resolved
540
+ // audience UUID lists. Criteria-only iterations should
541
+ // already have audiences materialised at iteration-create
542
+ // time; if they're still empty here, the backend's
543
+ // `PairAudienceResolutionError` is the authoritative
544
+ // failure mode — refuse before hitting pair-batch.
545
+ //
546
+ // Wire shapes per backend `app/api/iterations/routers`:
547
+ // POST /iterations/{id}/testers/pair-batch
548
+ // body : { side_a: UUID[1..20], side_b: UUID[1..20] (equal len),
549
+ // language?: str }
550
+ // reply : { conversations: [{ conversation_id, pair_index,
551
+ // tester_a_id, tester_b_id }] }
552
+ const existingConvs = iteration.conversations ?? [];
553
+ const reusable = [];
554
+ for (const c of existingConvs) {
555
+ const cid = c.conversation_id || c.id;
556
+ if (cid && c.tester_a_id && c.tester_b_id) {
557
+ reusable.push({ conversation_id: cid, tester_a_id: c.tester_a_id, tester_b_id: c.tester_b_id });
558
+ }
559
+ }
560
+ let pairRows;
561
+ if (reusable.length > 0) {
562
+ pairRows = reusable;
563
+ log(`Reusing ${reusable.length} existing conversation${reusable.length > 1 ? "s" : ""} on iteration "${iterationLabel}"`);
564
+ }
565
+ else {
566
+ if (pairConfig.audience_a.length === 0 || pairConfig.audience_b.length === 0) {
567
+ throw new Error("Pair-mode iteration has empty audience_a / audience_b and no conversations yet. " +
568
+ "If this iteration was created with --role-criteria-a/-b, the backend should have " +
569
+ "resolved a profile pool at create time — try `ish iteration get <id>` to fetch a " +
570
+ "fresh shape, or recreate with explicit --profile-a/-b.");
571
+ }
572
+ log(`Provisioning ${pairConfig.audience_a.length} pair conversation${pairConfig.audience_a.length > 1 ? "s" : ""}...`);
573
+ const pairBatchResult = await client.post(`/iterations/${iterationId}/testers/pair-batch`, {
574
+ side_a: pairConfig.audience_a,
575
+ side_b: pairConfig.audience_b,
576
+ ...(opts.language && { language: opts.language }),
577
+ }, { timeout: dispatchTimeoutMs });
578
+ pairRows = (pairBatchResult.conversations ?? []).map((c) => ({
579
+ conversation_id: c.conversation_id,
580
+ tester_a_id: c.tester_a_id,
581
+ tester_b_id: c.tester_b_id,
582
+ }));
583
+ if (pairRows.length === 0) {
584
+ throw new Error("Pair-batch returned no conversations. The backend response did not include any conversation IDs.");
585
+ }
586
+ log(`Created ${pairRows.length * 2} testers (${pairRows.length} conversation${pairRows.length > 1 ? "s" : ""})`);
587
+ }
588
+ pairConversationIds = pairRows.map((r) => r.conversation_id);
589
+ // Flatten both sides' tester IDs for downstream bookkeeping:
590
+ // error-tagging (`seeded_but_not_dispatched_ids`), poll filtering,
591
+ // and JSON output. Names aren't returned by pair-batch; agents
592
+ // who care can correlate via `ish iteration get <id>`.
593
+ createdTesters = [];
594
+ for (let i = 0; i < pairRows.length; i++) {
595
+ const row = pairRows[i];
596
+ createdTesters.push({
597
+ id: row.tester_a_id,
598
+ tester_profile: { name: `pair ${i} side A` },
599
+ });
600
+ createdTesters.push({
601
+ id: row.tester_b_id,
602
+ tester_profile: { name: `pair ${i} side B` },
603
+ });
604
+ }
605
+ }
606
+ else if (reuseExistingTesters && existingTesters.length > 0) {
399
607
  createdTesters = existingTesters;
400
608
  log(`Reusing ${createdTesters.length} existing tester${createdTesters.length > 1 ? "s" : ""} from iteration "${iterationLabel}"`);
401
609
  }
@@ -479,23 +687,66 @@ Examples:
479
687
  }
480
688
  };
481
689
  if (isChat) {
482
- const chatBatchItems = createdTesters.map((t, i) => ({
483
- study_id: resolvedStudy,
484
- tester_id: t.id,
485
- config_id: resolvedConfigOverride || profileConfigMap.get(profileIds[i]),
486
- ...(opts.language && { language: opts.language }),
487
- }));
488
690
  const maxTurns = opts.maxTurns ? parseInt(opts.maxTurns, 10) : undefined;
489
691
  if (opts.maxTurns !== undefined && (Number.isNaN(maxTurns) || maxTurns < 1)) {
490
692
  throw new Error(`Invalid --max-turns value: ${opts.maxTurns}`);
491
693
  }
492
- const simResult = await dispatchAttempt(() => client.post("/simulation/chat/start/batch", {
493
- product_id: resolvedWorkspace,
494
- simulations: chatBatchItems,
495
- ...(maxTurns !== undefined && { max_turns: maxTurns }),
496
- ...(opts.earlyTermination && { early_termination: true }),
497
- }, { timeout: dispatchTimeoutMs }));
498
- simResults = simResult.results;
694
+ if (isPair) {
695
+ if (!pairConfig || pairConversationIds.length === 0) {
696
+ throw new Error("Pair-mode dispatch reached without provisioned conversations — internal invariant violation.");
697
+ }
698
+ // Pair-mode dispatch (backend
699
+ // `app/api/simulation/routers/chat.py`):
700
+ // POST /simulation/chat/pair/start/batch
701
+ // body : { product_id, study_id,
702
+ // conversation_ids: UUID[1..20],
703
+ // config_id, # singular per batch
704
+ // max_turns?, language?, config_overrides? }
705
+ // One Cloud Task per conversation_id. Billing is
706
+ // chat_credit_cost(max_turns) * 2 * len(conversation_ids).
707
+ let pairConfigId = resolvedConfigOverride;
708
+ if (!pairConfigId) {
709
+ // Fall back to the first audience_a profile's
710
+ // simulation_config_id. Pair dispatch takes a single config
711
+ // for the whole batch, so we don't need the per-profile map
712
+ // the external_chatbot path builds.
713
+ const fallbackProfileId = pairConfig.audience_a[0];
714
+ if (!fallbackProfileId) {
715
+ throw new Error("Pair-mode dispatch requires --config <id>: the iteration has no audience profile to draw a default config_id from.");
716
+ }
717
+ const fallbackProfile = await client.get(`/tester-profiles/${fallbackProfileId}`);
718
+ if (!fallbackProfile.simulation_config_id) {
719
+ throw new Error(`Pair-mode dispatch requires a config_id. Profile ${fallbackProfileId} has no simulation config assigned and --config was not passed.\n` +
720
+ "Use --config <id> to specify one, or assign a config to the profile.\n" +
721
+ "List configs with: ish config list");
722
+ }
723
+ pairConfigId = fallbackProfile.simulation_config_id;
724
+ }
725
+ const simResult = await dispatchAttempt(() => client.post("/simulation/chat/pair/start/batch", {
726
+ product_id: resolvedWorkspace,
727
+ study_id: resolvedStudy,
728
+ conversation_ids: pairConversationIds,
729
+ config_id: pairConfigId,
730
+ ...(maxTurns !== undefined && { max_turns: maxTurns }),
731
+ ...(opts.language && { language: opts.language }),
732
+ }, { timeout: dispatchTimeoutMs }));
733
+ simResults = simResult.results;
734
+ }
735
+ else {
736
+ const chatBatchItems = createdTesters.map((t, i) => ({
737
+ study_id: resolvedStudy,
738
+ tester_id: t.id,
739
+ config_id: resolvedConfigOverride || profileConfigMap.get(profileIds[i]),
740
+ ...(opts.language && { language: opts.language }),
741
+ }));
742
+ const simResult = await dispatchAttempt(() => client.post("/simulation/chat/start/batch", {
743
+ product_id: resolvedWorkspace,
744
+ simulations: chatBatchItems,
745
+ ...(maxTurns !== undefined && { max_turns: maxTurns }),
746
+ ...(opts.earlyTermination && { early_termination: true }),
747
+ }, { timeout: dispatchTimeoutMs }));
748
+ simResults = simResult.results;
749
+ }
499
750
  }
500
751
  else if (isMedia) {
501
752
  const mediaBatchItems = createdTesters.map((t, i) => ({
@@ -529,6 +780,78 @@ Examples:
529
780
  }, { timeout: dispatchTimeoutMs }));
530
781
  simResults = simResult.results;
531
782
  }
783
+ // Pair-mode preview block: surface the audience sizes + scenario
784
+ // previews + initiator in the JSON envelope so agents can verify
785
+ // what they just dispatched without needing a follow-up
786
+ // `iteration get`. Mirrors the human confirmation block (which is
787
+ // skipped under -y or --json).
788
+ const pairPreviewTurns = opts.maxTurns
789
+ ? parseInt(opts.maxTurns, 10)
790
+ : (typeof iteration.details?.max_turns === "number"
791
+ ? iteration.details.max_turns
792
+ : 14);
793
+ const pairPreview = isPair && pairConfig ? {
794
+ mode: "tester_pair",
795
+ audience_a_size: pairConfig.audience_a.length,
796
+ audience_b_size: pairConfig.audience_b.length,
797
+ // Post-dispatch we know the actual conversation count from the
798
+ // pair-batch (or reuse) result. This is the authoritative number
799
+ // — better than guessing from audience length, which may diverge
800
+ // when the backend trims to the smaller side.
801
+ conversation_count: pairConversationIds.length,
802
+ conversation_ids: pairConversationIds,
803
+ // Scale preview: matches the backend's billing-preflight
804
+ // formula (chat_credit_cost(turns) * 2 * conv_count). Upper
805
+ // bound — early-termination may shorten actual turns. The CLI
806
+ // doesn't claim exact credit cost; just call magnitude.
807
+ max_turns: Number.isFinite(pairPreviewTurns) ? pairPreviewTurns : null,
808
+ llm_calls_upper_bound: Number.isFinite(pairPreviewTurns)
809
+ ? pairConversationIds.length * pairPreviewTurns * 2
810
+ : null,
811
+ // Credit cost upper bound — mirrors backend's chat_credit_cost × 2 × conv.
812
+ // Don't claim exactness; surface formula key so agents can branch
813
+ // on shape. Live rates will move to `GET /billing/rates` later.
814
+ credit_estimate: Number.isFinite(pairPreviewTurns)
815
+ ? estimateChatPair({
816
+ conversationCount: pairConversationIds.length,
817
+ maxTurns: pairPreviewTurns,
818
+ })
819
+ : null,
820
+ initiator_side: pairConfig.initiator_side,
821
+ scenario_a_preview: pairConfig.scenario_a.replace(/\s+/g, " ").trim().slice(0, 200),
822
+ scenario_b_preview: pairConfig.scenario_b.replace(/\s+/g, " ").trim().slice(0, 200),
823
+ ...(pairConfig.role_criteria_a && { role_criteria_a: pairConfig.role_criteria_a }),
824
+ ...(pairConfig.role_criteria_b && { role_criteria_b: pairConfig.role_criteria_b }),
825
+ } : undefined;
826
+ // Non-pair credit estimate — surfaced as a top-level field in the
827
+ // JSON envelope alongside `pair_preview.credit_estimate`. Mirrors
828
+ // backend formulas (`media_credit_cost` / `chat_credit_cost`).
829
+ // null when we can't estimate (criteria-only audience, etc.).
830
+ const nonPairCreditEstimate = (() => {
831
+ if (isPair)
832
+ return null;
833
+ const testerCount = createdTesters.length || profileIds.length;
834
+ if (testerCount <= 0)
835
+ return null;
836
+ if (isChat) {
837
+ const turns = opts.maxTurns
838
+ ? parseInt(opts.maxTurns, 10)
839
+ : (typeof iteration.details?.max_turns === "number"
840
+ ? iteration.details.max_turns
841
+ : 14);
842
+ if (!Number.isFinite(turns))
843
+ return null;
844
+ return estimateChatSolo({ testerCount, maxTurns: turns });
845
+ }
846
+ const steps = opts.maxInteractions
847
+ ? parseMaxInteractions(opts.maxInteractions)
848
+ : (typeof iteration.details?.max_interactions === "number"
849
+ ? iteration.details.max_interactions
850
+ : 30);
851
+ if (!Number.isFinite(steps))
852
+ return null;
853
+ return estimateMediaRun({ testerCount, maxInteractions: steps });
854
+ })();
532
855
  if (!opts.wait) {
533
856
  if (globals.json) {
534
857
  const testersOut = createdTesters.map((t) => ({
@@ -541,6 +864,9 @@ Examples:
541
864
  testers: testersOut,
542
865
  tester_ids: testersOut.map((t) => t.id),
543
866
  tester_aliases: testersOut.map((t) => t.alias),
867
+ url: getWebUrl(globals, `/${resolvedWorkspace}/${resolvedStudy}/timeline`),
868
+ ...(pairPreview && { pair_preview: pairPreview }),
869
+ ...(nonPairCreditEstimate && { credit_estimate: nonPairCreditEstimate }),
544
870
  simulations: dedupeSimulations(simResults),
545
871
  }, true);
546
872
  }
@@ -577,6 +903,9 @@ Examples:
577
903
  testers: testersOut,
578
904
  tester_ids: testersOut.map((t) => t.id),
579
905
  tester_aliases: testersOut.map((t) => t.alias),
906
+ url: getWebUrl(globals, `/${resolvedWorkspace}/${resolvedStudy}/timeline`),
907
+ ...(pairPreview && { pair_preview: pairPreview }),
908
+ ...(nonPairCreditEstimate && { credit_estimate: nonPairCreditEstimate }),
580
909
  simulations: dedupeSimulations(simResults),
581
910
  results: rows,
582
911
  }, true);