@ishlabs/cli 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/iteration.js +219 -22
- package/dist/commands/profile.js +75 -9
- package/dist/commands/source.js +6 -4
- package/dist/commands/study-run.js +359 -30
- package/dist/commands/study.js +170 -9
- package/dist/commands/workspace.js +35 -2
- package/dist/lib/accessibility-profile.d.ts +12 -0
- package/dist/lib/accessibility-profile.js +136 -0
- package/dist/lib/ask-questions.js +9 -0
- package/dist/lib/billing.d.ts +55 -0
- package/dist/lib/billing.js +77 -0
- package/dist/lib/docs.js +1106 -36
- package/dist/lib/enums.d.ts +54 -0
- package/dist/lib/enums.js +100 -0
- package/dist/lib/local-sim/actions.d.ts +2 -1
- package/dist/lib/local-sim/actions.js +88 -13
- package/dist/lib/local-sim/loop.js +49 -19
- package/dist/lib/local-sim/tabs.d.ts +27 -0
- package/dist/lib/local-sim/tabs.js +157 -0
- package/dist/lib/local-sim/types.d.ts +15 -0
- package/dist/lib/modality.d.ts +70 -1
- package/dist/lib/modality.js +323 -17
- package/dist/lib/output.js +61 -4
- package/dist/lib/skill-content.js +382 -19
- package/dist/lib/types.d.ts +6 -1
- package/package.json +1 -1
|
@@ -11,9 +11,10 @@ import * as readline from "node:readline/promises";
|
|
|
11
11
|
import { withClient, getWebUrl, terminalLink, resolveWorkspace, resolveStudy, parseWaitTimeout, resolveAudienceProfileIds, addAudienceFilterFlags, hasAudienceFlags, } from "../lib/command-helpers.js";
|
|
12
12
|
import { resolveId, tagAlias, ALIAS_PREFIX } from "../lib/alias-store.js";
|
|
13
13
|
import { output, formatSimulationPoll } from "../lib/output.js";
|
|
14
|
-
import { isMediaModality, isChatModality, iterationHasContent, describeRequiredContentFlag, } from "../lib/modality.js";
|
|
14
|
+
import { isMediaModality, isChatModality, iterationHasContent, describeRequiredContentFlag, readChatMode, readTesterPairConfig, summarizeRoleCriteria, } from "../lib/modality.js";
|
|
15
15
|
import { runLocalSimulations } from "../lib/local-sim/loop.js";
|
|
16
16
|
import { ensureBrowser } from "../lib/local-sim/install.js";
|
|
17
|
+
import { estimateChatPair, estimateChatSolo, estimateMediaRun } from "../lib/billing.js";
|
|
17
18
|
function parseMaxInteractions(value) {
|
|
18
19
|
const n = parseInt(value, 10);
|
|
19
20
|
if (isNaN(n) || n < 1)
|
|
@@ -262,6 +263,10 @@ Examples:
|
|
|
262
263
|
const modality = study.modality || "interactive";
|
|
263
264
|
const isMedia = isMediaModality(modality);
|
|
264
265
|
const isChat = isChatModality(modality);
|
|
266
|
+
// Pair-mode (tester_pair) is read off the iteration once we've
|
|
267
|
+
// resolved it below; set defaults here so the value is in scope.
|
|
268
|
+
let chatMode = "external_chatbot";
|
|
269
|
+
let isPair = false;
|
|
265
270
|
if (!study.assignments || study.assignments.length === 0) {
|
|
266
271
|
throw new Error("Study has no assignments. Add tasks with --assignments when creating the study, or use `ish study generate`.");
|
|
267
272
|
}
|
|
@@ -288,24 +293,57 @@ Examples:
|
|
|
288
293
|
// auto-creates an empty iteration A; agents who don't pass
|
|
289
294
|
// --iteration silently dispatch against it. Detect and refuse with
|
|
290
295
|
// a clear suggestion rather than masking the problem.
|
|
296
|
+
if (isChat) {
|
|
297
|
+
chatMode = readChatMode(iteration.details);
|
|
298
|
+
isPair = chatMode === "tester_pair";
|
|
299
|
+
}
|
|
291
300
|
if (!iterationHasContent(iteration.details, modality)) {
|
|
292
|
-
const flagHint = describeRequiredContentFlag(modality);
|
|
301
|
+
const flagHint = describeRequiredContentFlag(modality, isPair ? "tester_pair" : undefined);
|
|
293
302
|
const iterAlias = tagAlias(ALIAS_PREFIX.iteration, iterationId);
|
|
294
|
-
throw new Error(`Iteration "${iterationLabel}" (${iterAlias}) has no ${isMedia ? "content" : "URL"} configured yet. ` +
|
|
295
|
-
`Add ${isMedia ? "content" : "a URL"} with ` +
|
|
303
|
+
throw new Error(`Iteration "${iterationLabel}" (${iterAlias}) has no ${isMedia ? "content" : isPair ? "audiences/scenarios" : isChat ? "endpoint" : "URL"} configured yet. ` +
|
|
304
|
+
`Add ${isMedia ? "content" : isPair ? "the pair-mode payload" : isChat ? "an endpoint" : "a URL"} with ` +
|
|
296
305
|
`\`ish iteration create --study ${resolvedStudy} ${flagHint}\` ` +
|
|
297
306
|
`(or update the existing iteration via \`ish iteration update ${iterAlias} --details-json '{...}'\`), then retry.`);
|
|
298
307
|
}
|
|
299
308
|
const detailsView = readIterationDetails(iteration.details);
|
|
309
|
+
const pairConfig = isPair ? readTesterPairConfig(iteration.details) : undefined;
|
|
300
310
|
// Step 2: Resolve audience.
|
|
301
311
|
// - If any audience flag is set (--profile / --sample / --all / filter flags),
|
|
302
312
|
// resolve a fresh ID list from the workspace pool via the shared helper.
|
|
303
313
|
// - Otherwise reuse the iteration's existing testers.
|
|
314
|
+
// - For chat tester_pair iterations, audiences live inside the
|
|
315
|
+
// iteration's mode_details and are authoritative; run-time
|
|
316
|
+
// overrides are refused.
|
|
304
317
|
const profileNames = new Map();
|
|
305
318
|
const profileIds = [];
|
|
306
319
|
const existingTesters = [];
|
|
307
320
|
const audienceSet = hasAudienceFlags(opts);
|
|
308
|
-
if (
|
|
321
|
+
if (isPair) {
|
|
322
|
+
if (audienceSet) {
|
|
323
|
+
throw new Error("tester_pair chat iterations carry their own audiences inside mode_details; run-time audience overrides (--profile / --sample / --all / --country / --gender / --min-age / --max-age / --search / --visibility) are not supported. " +
|
|
324
|
+
"To change the audiences, update the iteration via `ish iteration update <id> --details-json '{...}'`.");
|
|
325
|
+
}
|
|
326
|
+
if (!pairConfig) {
|
|
327
|
+
throw new Error("Pair-mode iteration is missing mode_details; cannot dispatch.");
|
|
328
|
+
}
|
|
329
|
+
// Surface a flat profileIds[] (a then b) so downstream
|
|
330
|
+
// bookkeeping (config resolution, output) still has something to
|
|
331
|
+
// chew on. The pair-batch tester-provisioning POST below uses
|
|
332
|
+
// the split lists, not this flat one.
|
|
333
|
+
for (const pid of pairConfig.audience_a) {
|
|
334
|
+
if (!profileNames.has(pid)) {
|
|
335
|
+
profileNames.set(pid, "");
|
|
336
|
+
profileIds.push(pid);
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
for (const pid of pairConfig.audience_b) {
|
|
340
|
+
if (!profileNames.has(pid)) {
|
|
341
|
+
profileNames.set(pid, "");
|
|
342
|
+
profileIds.push(pid);
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
else if (audienceSet) {
|
|
309
347
|
const resolved = await resolveAudienceProfileIds(client, resolvedWorkspace, opts, { requireSimulatable: false, allFlagName: "--all" });
|
|
310
348
|
profileIds.push(...resolved);
|
|
311
349
|
}
|
|
@@ -322,16 +360,28 @@ Examples:
|
|
|
322
360
|
}
|
|
323
361
|
}
|
|
324
362
|
}
|
|
325
|
-
|
|
326
|
-
|
|
363
|
+
// Pair iterations always seed fresh testers via the pair-batch
|
|
364
|
+
// endpoint; never reuse a stale tester roster from a prior run.
|
|
365
|
+
const reuseExistingTesters = !isPair && !audienceSet && existingTesters.length > 0;
|
|
366
|
+
// Pair iterations with criteria-only audiences will have empty
|
|
367
|
+
// profileIds at this stage if the backend deferred resolution past
|
|
368
|
+
// iteration create. That's a valid state — skip the
|
|
369
|
+
// "no audience flags" guard for them and let dispatch surface any
|
|
370
|
+
// backend-side resolution errors (e.g. pool too small).
|
|
371
|
+
const pairCriteriaOnly = isPair && !!pairConfig && profileIds.length === 0
|
|
372
|
+
&& (!!pairConfig.role_criteria_a || !!pairConfig.role_criteria_b);
|
|
373
|
+
if (profileIds.length === 0 && !pairCriteriaOnly) {
|
|
327
374
|
throw new Error(`Iteration "${iterationLabel}" has no testers and no audience flags were given. ` +
|
|
328
375
|
"Pass --profile <ids>, or filter flags (--country, --gender, --min-age, --max-age, --search, --visibility) with --sample <N> or --all.");
|
|
329
376
|
}
|
|
330
377
|
// Step 3: Resolve simulation config (per-profile fallback for
|
|
331
|
-
// media + chat, both of which require a config_id
|
|
378
|
+
// media + chat external_chatbot, both of which require a config_id
|
|
379
|
+
// per batch item). Pair-mode chat dispatch is per-conversation,
|
|
380
|
+
// not per-tester; the backend resolves configs via the tester rows
|
|
381
|
+
// it creates on /testers/pair-batch, so the CLI doesn't pre-fetch.
|
|
332
382
|
const resolvedConfigOverride = opts.config ? resolveId(opts.config) : undefined;
|
|
333
383
|
const profileConfigMap = new Map();
|
|
334
|
-
if ((isMedia || isChat) && !resolvedConfigOverride) {
|
|
384
|
+
if ((isMedia || (isChat && !isPair)) && !resolvedConfigOverride) {
|
|
335
385
|
for (const pid of profileIds) {
|
|
336
386
|
const profile = await client.get(`/tester-profiles/${pid}`);
|
|
337
387
|
if (profile.simulation_config_id) {
|
|
@@ -352,9 +402,63 @@ Examples:
|
|
|
352
402
|
log(` Modality: ${modality}`);
|
|
353
403
|
if (study.content_type)
|
|
354
404
|
log(` Content type: ${study.content_type}`);
|
|
355
|
-
if (
|
|
356
|
-
|
|
357
|
-
|
|
405
|
+
if (isPair && pairConfig) {
|
|
406
|
+
log(` Chat mode: tester_pair`);
|
|
407
|
+
// Audience description per side: prefer explicit count when
|
|
408
|
+
// present; otherwise show the criteria filter that the backend
|
|
409
|
+
// will resolve into a pool.
|
|
410
|
+
const describeSide = (audLen, crit) => {
|
|
411
|
+
if (audLen > 0)
|
|
412
|
+
return `${audLen} profile(s)${crit ? ` (criteria validates list)` : ""}`;
|
|
413
|
+
const summary = summarizeRoleCriteria(crit);
|
|
414
|
+
return summary ? `criteria (${summary}) — pool resolved server-side` : "—";
|
|
415
|
+
};
|
|
416
|
+
log(` Audience A: ${describeSide(pairConfig.audience_a.length, pairConfig.role_criteria_a)}`);
|
|
417
|
+
log(` Audience B: ${describeSide(pairConfig.audience_b.length, pairConfig.role_criteria_b)}`);
|
|
418
|
+
const explicitConvs = Math.min(pairConfig.audience_a.length, pairConfig.audience_b.length);
|
|
419
|
+
const criteriaResolved = !!pairConfig.role_criteria_a || !!pairConfig.role_criteria_b;
|
|
420
|
+
if (explicitConvs > 0 && !criteriaResolved) {
|
|
421
|
+
log(` Conversations: ${explicitConvs} (1:1 by index)`);
|
|
422
|
+
}
|
|
423
|
+
else {
|
|
424
|
+
log(` Conversations: resolved server-side from criteria`);
|
|
425
|
+
}
|
|
426
|
+
// Scale preview: rough LLM-call estimate so the user knows
|
|
427
|
+
// what they're committing to before --yes lands. Formula
|
|
428
|
+
// matches the backend's billing pre-flight
|
|
429
|
+
// (chat_credit_cost(turns) * 2 * conv_count, where the *2
|
|
430
|
+
// accounts for one LLM call per side per turn). Doesn't
|
|
431
|
+
// claim exact credit cost — just shape + magnitude.
|
|
432
|
+
const turnsEstimate = opts.maxTurns
|
|
433
|
+
? parseInt(opts.maxTurns, 10)
|
|
434
|
+
: (typeof iteration.details?.max_turns === "number"
|
|
435
|
+
? iteration.details.max_turns
|
|
436
|
+
: 14);
|
|
437
|
+
if (explicitConvs > 0 && !criteriaResolved && Number.isFinite(turnsEstimate)) {
|
|
438
|
+
const est = estimateChatPair({ conversationCount: explicitConvs, maxTurns: turnsEstimate });
|
|
439
|
+
log(` Scale: ${explicitConvs} conv × ${turnsEstimate} turns × 2 sides ≈ ${explicitConvs * turnsEstimate * 2} LLM calls (upper bound — early-termination may shorten)`);
|
|
440
|
+
log(` Credits (est): ≈ ${est.upper_bound} credit(s) upper bound — see \`ish docs get-page reference/credits\``);
|
|
441
|
+
}
|
|
442
|
+
else if (criteriaResolved) {
|
|
443
|
+
log(` Scale: ~N conv × ${turnsEstimate} turns × 2 sides — N resolved server-side`);
|
|
444
|
+
log(` Credits (est): N × max(1, round(${turnsEstimate}/10)) × 2 — N resolved server-side`);
|
|
445
|
+
}
|
|
446
|
+
log(` Initiator: side ${pairConfig.initiator_side}`);
|
|
447
|
+
const scenAPreview = pairConfig.scenario_a.replace(/\s+/g, " ").trim().slice(0, 60);
|
|
448
|
+
const scenBPreview = pairConfig.scenario_b.replace(/\s+/g, " ").trim().slice(0, 60);
|
|
449
|
+
log(` Scenario A: ${scenAPreview}${pairConfig.scenario_a.length > 60 ? "…" : ""}`);
|
|
450
|
+
log(` Scenario B: ${scenBPreview}${pairConfig.scenario_b.length > 60 ? "…" : ""}`);
|
|
451
|
+
if (opts.maxTurns)
|
|
452
|
+
log(` Max turns: ${opts.maxTurns}`);
|
|
453
|
+
if (opts.earlyTermination)
|
|
454
|
+
log(` Early term: enabled`);
|
|
455
|
+
}
|
|
456
|
+
else if (isChat) {
|
|
457
|
+
const md = iteration.details?.mode_details;
|
|
458
|
+
const epId = (typeof md?.chatbot_endpoint_id === "string" && md.chatbot_endpoint_id)
|
|
459
|
+
|| (typeof iteration.details?.chatbot_endpoint_id === "string"
|
|
460
|
+
? iteration.details.chatbot_endpoint_id
|
|
461
|
+
: undefined);
|
|
358
462
|
if (epId)
|
|
359
463
|
log(` Endpoint: ${epId}`);
|
|
360
464
|
if (opts.maxTurns)
|
|
@@ -375,10 +479,38 @@ Examples:
|
|
|
375
479
|
log(` Config: ${resolvedConfigOverride}`);
|
|
376
480
|
if (opts.language)
|
|
377
481
|
log(` Language: ${opts.language}`);
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
const
|
|
381
|
-
|
|
482
|
+
if (!isPair) {
|
|
483
|
+
log(` Profiles (${profileIds.length}):`);
|
|
484
|
+
for (const pid of profileIds) {
|
|
485
|
+
const name = profileNames.get(pid);
|
|
486
|
+
log(` - ${name ? `${name} (${pid})` : pid}`);
|
|
487
|
+
}
|
|
488
|
+
const testerCount = profileIds.length;
|
|
489
|
+
if (testerCount > 0) {
|
|
490
|
+
if (isChat) {
|
|
491
|
+
const turnsForChat = opts.maxTurns
|
|
492
|
+
? parseInt(opts.maxTurns, 10)
|
|
493
|
+
: (typeof iteration.details?.max_turns === "number"
|
|
494
|
+
? iteration.details.max_turns
|
|
495
|
+
: 14);
|
|
496
|
+
if (Number.isFinite(turnsForChat)) {
|
|
497
|
+
const est = estimateChatSolo({ testerCount, maxTurns: turnsForChat });
|
|
498
|
+
log(` Credits (est): ≈ ${est.upper_bound} credit(s) upper bound — ${est.breakdown}`);
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
else {
|
|
502
|
+
const stepsForMedia = opts.maxInteractions
|
|
503
|
+
? parseMaxInteractions(opts.maxInteractions)
|
|
504
|
+
: (typeof iteration.details?.max_interactions === "number"
|
|
505
|
+
? iteration.details.max_interactions
|
|
506
|
+
: 30);
|
|
507
|
+
if (Number.isFinite(stepsForMedia)) {
|
|
508
|
+
const est = estimateMediaRun({ testerCount, maxInteractions: stepsForMedia });
|
|
509
|
+
log(` Credits (est): ≈ ${est.upper_bound} credit(s) upper bound — ${est.breakdown}`);
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
log(` See \`ish docs get-page reference/credits\` for formula.`);
|
|
513
|
+
}
|
|
382
514
|
}
|
|
383
515
|
log("");
|
|
384
516
|
const rl = readline.createInterface({ input: process.stdin, output: process.stderr });
|
|
@@ -395,7 +527,83 @@ Examples:
|
|
|
395
527
|
}
|
|
396
528
|
// Step 5: Either reuse the iteration's testers or batch-create new ones
|
|
397
529
|
let createdTesters;
|
|
398
|
-
|
|
530
|
+
// Pair-mode bookkeeping: the dispatch endpoint takes
|
|
531
|
+
// `conversation_ids`, not tester ids. We populate this list either
|
|
532
|
+
// by reusing the iteration's existing Conversation rows or by
|
|
533
|
+
// calling pair-batch.
|
|
534
|
+
let pairConversationIds = [];
|
|
535
|
+
if (isPair && pairConfig) {
|
|
536
|
+
// Pair-mode flow mirrors the MCP (`ish-mcp` `_run_pair_mode`):
|
|
537
|
+
// 1. If the iteration already carries `conversations[]` from a
|
|
538
|
+
// prior dispatch, reuse them — skip pair-batch entirely.
|
|
539
|
+
// 2. Otherwise call pair-batch with the resolved
|
|
540
|
+
// audience UUID lists. Criteria-only iterations should
|
|
541
|
+
// already have audiences materialised at iteration-create
|
|
542
|
+
// time; if they're still empty here, the backend's
|
|
543
|
+
// `PairAudienceResolutionError` is the authoritative
|
|
544
|
+
// failure mode — refuse before hitting pair-batch.
|
|
545
|
+
//
|
|
546
|
+
// Wire shapes per backend `app/api/iterations/routers`:
|
|
547
|
+
// POST /iterations/{id}/testers/pair-batch
|
|
548
|
+
// body : { side_a: UUID[1..20], side_b: UUID[1..20] (equal len),
|
|
549
|
+
// language?: str }
|
|
550
|
+
// reply : { conversations: [{ conversation_id, pair_index,
|
|
551
|
+
// tester_a_id, tester_b_id }] }
|
|
552
|
+
const existingConvs = iteration.conversations ?? [];
|
|
553
|
+
const reusable = [];
|
|
554
|
+
for (const c of existingConvs) {
|
|
555
|
+
const cid = c.conversation_id || c.id;
|
|
556
|
+
if (cid && c.tester_a_id && c.tester_b_id) {
|
|
557
|
+
reusable.push({ conversation_id: cid, tester_a_id: c.tester_a_id, tester_b_id: c.tester_b_id });
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
let pairRows;
|
|
561
|
+
if (reusable.length > 0) {
|
|
562
|
+
pairRows = reusable;
|
|
563
|
+
log(`Reusing ${reusable.length} existing conversation${reusable.length > 1 ? "s" : ""} on iteration "${iterationLabel}"`);
|
|
564
|
+
}
|
|
565
|
+
else {
|
|
566
|
+
if (pairConfig.audience_a.length === 0 || pairConfig.audience_b.length === 0) {
|
|
567
|
+
throw new Error("Pair-mode iteration has empty audience_a / audience_b and no conversations yet. " +
|
|
568
|
+
"If this iteration was created with --role-criteria-a/-b, the backend should have " +
|
|
569
|
+
"resolved a profile pool at create time — try `ish iteration get <id>` to fetch a " +
|
|
570
|
+
"fresh shape, or recreate with explicit --profile-a/-b.");
|
|
571
|
+
}
|
|
572
|
+
log(`Provisioning ${pairConfig.audience_a.length} pair conversation${pairConfig.audience_a.length > 1 ? "s" : ""}...`);
|
|
573
|
+
const pairBatchResult = await client.post(`/iterations/${iterationId}/testers/pair-batch`, {
|
|
574
|
+
side_a: pairConfig.audience_a,
|
|
575
|
+
side_b: pairConfig.audience_b,
|
|
576
|
+
...(opts.language && { language: opts.language }),
|
|
577
|
+
}, { timeout: dispatchTimeoutMs });
|
|
578
|
+
pairRows = (pairBatchResult.conversations ?? []).map((c) => ({
|
|
579
|
+
conversation_id: c.conversation_id,
|
|
580
|
+
tester_a_id: c.tester_a_id,
|
|
581
|
+
tester_b_id: c.tester_b_id,
|
|
582
|
+
}));
|
|
583
|
+
if (pairRows.length === 0) {
|
|
584
|
+
throw new Error("Pair-batch returned no conversations. The backend response did not include any conversation IDs.");
|
|
585
|
+
}
|
|
586
|
+
log(`Created ${pairRows.length * 2} testers (${pairRows.length} conversation${pairRows.length > 1 ? "s" : ""})`);
|
|
587
|
+
}
|
|
588
|
+
pairConversationIds = pairRows.map((r) => r.conversation_id);
|
|
589
|
+
// Flatten both sides' tester IDs for downstream bookkeeping:
|
|
590
|
+
// error-tagging (`seeded_but_not_dispatched_ids`), poll filtering,
|
|
591
|
+
// and JSON output. Names aren't returned by pair-batch; agents
|
|
592
|
+
// who care can correlate via `ish iteration get <id>`.
|
|
593
|
+
createdTesters = [];
|
|
594
|
+
for (let i = 0; i < pairRows.length; i++) {
|
|
595
|
+
const row = pairRows[i];
|
|
596
|
+
createdTesters.push({
|
|
597
|
+
id: row.tester_a_id,
|
|
598
|
+
tester_profile: { name: `pair ${i} side A` },
|
|
599
|
+
});
|
|
600
|
+
createdTesters.push({
|
|
601
|
+
id: row.tester_b_id,
|
|
602
|
+
tester_profile: { name: `pair ${i} side B` },
|
|
603
|
+
});
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
else if (reuseExistingTesters && existingTesters.length > 0) {
|
|
399
607
|
createdTesters = existingTesters;
|
|
400
608
|
log(`Reusing ${createdTesters.length} existing tester${createdTesters.length > 1 ? "s" : ""} from iteration "${iterationLabel}"`);
|
|
401
609
|
}
|
|
@@ -479,23 +687,66 @@ Examples:
|
|
|
479
687
|
}
|
|
480
688
|
};
|
|
481
689
|
if (isChat) {
|
|
482
|
-
const chatBatchItems = createdTesters.map((t, i) => ({
|
|
483
|
-
study_id: resolvedStudy,
|
|
484
|
-
tester_id: t.id,
|
|
485
|
-
config_id: resolvedConfigOverride || profileConfigMap.get(profileIds[i]),
|
|
486
|
-
...(opts.language && { language: opts.language }),
|
|
487
|
-
}));
|
|
488
690
|
const maxTurns = opts.maxTurns ? parseInt(opts.maxTurns, 10) : undefined;
|
|
489
691
|
if (opts.maxTurns !== undefined && (Number.isNaN(maxTurns) || maxTurns < 1)) {
|
|
490
692
|
throw new Error(`Invalid --max-turns value: ${opts.maxTurns}`);
|
|
491
693
|
}
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
694
|
+
if (isPair) {
|
|
695
|
+
if (!pairConfig || pairConversationIds.length === 0) {
|
|
696
|
+
throw new Error("Pair-mode dispatch reached without provisioned conversations — internal invariant violation.");
|
|
697
|
+
}
|
|
698
|
+
// Pair-mode dispatch (backend
|
|
699
|
+
// `app/api/simulation/routers/chat.py`):
|
|
700
|
+
// POST /simulation/chat/pair/start/batch
|
|
701
|
+
// body : { product_id, study_id,
|
|
702
|
+
// conversation_ids: UUID[1..20],
|
|
703
|
+
// config_id, # singular per batch
|
|
704
|
+
// max_turns?, language?, config_overrides? }
|
|
705
|
+
// One Cloud Task per conversation_id. Billing is
|
|
706
|
+
// chat_credit_cost(max_turns) * 2 * len(conversation_ids).
|
|
707
|
+
let pairConfigId = resolvedConfigOverride;
|
|
708
|
+
if (!pairConfigId) {
|
|
709
|
+
// Fall back to the first audience_a profile's
|
|
710
|
+
// simulation_config_id. Pair dispatch takes a single config
|
|
711
|
+
// for the whole batch, so we don't need the per-profile map
|
|
712
|
+
// the external_chatbot path builds.
|
|
713
|
+
const fallbackProfileId = pairConfig.audience_a[0];
|
|
714
|
+
if (!fallbackProfileId) {
|
|
715
|
+
throw new Error("Pair-mode dispatch requires --config <id>: the iteration has no audience profile to draw a default config_id from.");
|
|
716
|
+
}
|
|
717
|
+
const fallbackProfile = await client.get(`/tester-profiles/${fallbackProfileId}`);
|
|
718
|
+
if (!fallbackProfile.simulation_config_id) {
|
|
719
|
+
throw new Error(`Pair-mode dispatch requires a config_id. Profile ${fallbackProfileId} has no simulation config assigned and --config was not passed.\n` +
|
|
720
|
+
"Use --config <id> to specify one, or assign a config to the profile.\n" +
|
|
721
|
+
"List configs with: ish config list");
|
|
722
|
+
}
|
|
723
|
+
pairConfigId = fallbackProfile.simulation_config_id;
|
|
724
|
+
}
|
|
725
|
+
const simResult = await dispatchAttempt(() => client.post("/simulation/chat/pair/start/batch", {
|
|
726
|
+
product_id: resolvedWorkspace,
|
|
727
|
+
study_id: resolvedStudy,
|
|
728
|
+
conversation_ids: pairConversationIds,
|
|
729
|
+
config_id: pairConfigId,
|
|
730
|
+
...(maxTurns !== undefined && { max_turns: maxTurns }),
|
|
731
|
+
...(opts.language && { language: opts.language }),
|
|
732
|
+
}, { timeout: dispatchTimeoutMs }));
|
|
733
|
+
simResults = simResult.results;
|
|
734
|
+
}
|
|
735
|
+
else {
|
|
736
|
+
const chatBatchItems = createdTesters.map((t, i) => ({
|
|
737
|
+
study_id: resolvedStudy,
|
|
738
|
+
tester_id: t.id,
|
|
739
|
+
config_id: resolvedConfigOverride || profileConfigMap.get(profileIds[i]),
|
|
740
|
+
...(opts.language && { language: opts.language }),
|
|
741
|
+
}));
|
|
742
|
+
const simResult = await dispatchAttempt(() => client.post("/simulation/chat/start/batch", {
|
|
743
|
+
product_id: resolvedWorkspace,
|
|
744
|
+
simulations: chatBatchItems,
|
|
745
|
+
...(maxTurns !== undefined && { max_turns: maxTurns }),
|
|
746
|
+
...(opts.earlyTermination && { early_termination: true }),
|
|
747
|
+
}, { timeout: dispatchTimeoutMs }));
|
|
748
|
+
simResults = simResult.results;
|
|
749
|
+
}
|
|
499
750
|
}
|
|
500
751
|
else if (isMedia) {
|
|
501
752
|
const mediaBatchItems = createdTesters.map((t, i) => ({
|
|
@@ -529,6 +780,78 @@ Examples:
|
|
|
529
780
|
}, { timeout: dispatchTimeoutMs }));
|
|
530
781
|
simResults = simResult.results;
|
|
531
782
|
}
|
|
783
|
+
// Pair-mode preview block: surface the audience sizes + scenario
|
|
784
|
+
// previews + initiator in the JSON envelope so agents can verify
|
|
785
|
+
// what they just dispatched without needing a follow-up
|
|
786
|
+
// `iteration get`. Mirrors the human confirmation block (which is
|
|
787
|
+
// skipped under -y or --json).
|
|
788
|
+
const pairPreviewTurns = opts.maxTurns
|
|
789
|
+
? parseInt(opts.maxTurns, 10)
|
|
790
|
+
: (typeof iteration.details?.max_turns === "number"
|
|
791
|
+
? iteration.details.max_turns
|
|
792
|
+
: 14);
|
|
793
|
+
const pairPreview = isPair && pairConfig ? {
|
|
794
|
+
mode: "tester_pair",
|
|
795
|
+
audience_a_size: pairConfig.audience_a.length,
|
|
796
|
+
audience_b_size: pairConfig.audience_b.length,
|
|
797
|
+
// Post-dispatch we know the actual conversation count from the
|
|
798
|
+
// pair-batch (or reuse) result. This is the authoritative number
|
|
799
|
+
// — better than guessing from audience length, which may diverge
|
|
800
|
+
// when the backend trims to the smaller side.
|
|
801
|
+
conversation_count: pairConversationIds.length,
|
|
802
|
+
conversation_ids: pairConversationIds,
|
|
803
|
+
// Scale preview: matches the backend's billing-preflight
|
|
804
|
+
// formula (chat_credit_cost(turns) * 2 * conv_count). Upper
|
|
805
|
+
// bound — early-termination may shorten actual turns. The CLI
|
|
806
|
+
// doesn't claim exact credit cost; just call magnitude.
|
|
807
|
+
max_turns: Number.isFinite(pairPreviewTurns) ? pairPreviewTurns : null,
|
|
808
|
+
llm_calls_upper_bound: Number.isFinite(pairPreviewTurns)
|
|
809
|
+
? pairConversationIds.length * pairPreviewTurns * 2
|
|
810
|
+
: null,
|
|
811
|
+
// Credit cost upper bound — mirrors backend's chat_credit_cost × 2 × conv.
|
|
812
|
+
// Don't claim exactness; surface formula key so agents can branch
|
|
813
|
+
// on shape. Live rates will move to `GET /billing/rates` later.
|
|
814
|
+
credit_estimate: Number.isFinite(pairPreviewTurns)
|
|
815
|
+
? estimateChatPair({
|
|
816
|
+
conversationCount: pairConversationIds.length,
|
|
817
|
+
maxTurns: pairPreviewTurns,
|
|
818
|
+
})
|
|
819
|
+
: null,
|
|
820
|
+
initiator_side: pairConfig.initiator_side,
|
|
821
|
+
scenario_a_preview: pairConfig.scenario_a.replace(/\s+/g, " ").trim().slice(0, 200),
|
|
822
|
+
scenario_b_preview: pairConfig.scenario_b.replace(/\s+/g, " ").trim().slice(0, 200),
|
|
823
|
+
...(pairConfig.role_criteria_a && { role_criteria_a: pairConfig.role_criteria_a }),
|
|
824
|
+
...(pairConfig.role_criteria_b && { role_criteria_b: pairConfig.role_criteria_b }),
|
|
825
|
+
} : undefined;
|
|
826
|
+
// Non-pair credit estimate — surfaced as a top-level field in the
|
|
827
|
+
// JSON envelope alongside `pair_preview.credit_estimate`. Mirrors
|
|
828
|
+
// backend formulas (`media_credit_cost` / `chat_credit_cost`).
|
|
829
|
+
// null when we can't estimate (criteria-only audience, etc.).
|
|
830
|
+
const nonPairCreditEstimate = (() => {
|
|
831
|
+
if (isPair)
|
|
832
|
+
return null;
|
|
833
|
+
const testerCount = createdTesters.length || profileIds.length;
|
|
834
|
+
if (testerCount <= 0)
|
|
835
|
+
return null;
|
|
836
|
+
if (isChat) {
|
|
837
|
+
const turns = opts.maxTurns
|
|
838
|
+
? parseInt(opts.maxTurns, 10)
|
|
839
|
+
: (typeof iteration.details?.max_turns === "number"
|
|
840
|
+
? iteration.details.max_turns
|
|
841
|
+
: 14);
|
|
842
|
+
if (!Number.isFinite(turns))
|
|
843
|
+
return null;
|
|
844
|
+
return estimateChatSolo({ testerCount, maxTurns: turns });
|
|
845
|
+
}
|
|
846
|
+
const steps = opts.maxInteractions
|
|
847
|
+
? parseMaxInteractions(opts.maxInteractions)
|
|
848
|
+
: (typeof iteration.details?.max_interactions === "number"
|
|
849
|
+
? iteration.details.max_interactions
|
|
850
|
+
: 30);
|
|
851
|
+
if (!Number.isFinite(steps))
|
|
852
|
+
return null;
|
|
853
|
+
return estimateMediaRun({ testerCount, maxInteractions: steps });
|
|
854
|
+
})();
|
|
532
855
|
if (!opts.wait) {
|
|
533
856
|
if (globals.json) {
|
|
534
857
|
const testersOut = createdTesters.map((t) => ({
|
|
@@ -541,6 +864,9 @@ Examples:
|
|
|
541
864
|
testers: testersOut,
|
|
542
865
|
tester_ids: testersOut.map((t) => t.id),
|
|
543
866
|
tester_aliases: testersOut.map((t) => t.alias),
|
|
867
|
+
url: getWebUrl(globals, `/${resolvedWorkspace}/${resolvedStudy}/timeline`),
|
|
868
|
+
...(pairPreview && { pair_preview: pairPreview }),
|
|
869
|
+
...(nonPairCreditEstimate && { credit_estimate: nonPairCreditEstimate }),
|
|
544
870
|
simulations: dedupeSimulations(simResults),
|
|
545
871
|
}, true);
|
|
546
872
|
}
|
|
@@ -577,6 +903,9 @@ Examples:
|
|
|
577
903
|
testers: testersOut,
|
|
578
904
|
tester_ids: testersOut.map((t) => t.id),
|
|
579
905
|
tester_aliases: testersOut.map((t) => t.alias),
|
|
906
|
+
url: getWebUrl(globals, `/${resolvedWorkspace}/${resolvedStudy}/timeline`),
|
|
907
|
+
...(pairPreview && { pair_preview: pairPreview }),
|
|
908
|
+
...(nonPairCreditEstimate && { credit_estimate: nonPairCreditEstimate }),
|
|
580
909
|
simulations: dedupeSimulations(simResults),
|
|
581
910
|
results: rows,
|
|
582
911
|
}, true);
|