@ishlabs/cli 0.14.1 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ import { resolveId, tagAlias, ALIAS_PREFIX } from "../lib/alias-store.js";
7
7
  import { formatTesterProfileList, formatGeneratedProfileList, output, } from "../lib/output.js";
8
8
  import { resolveTextContent } from "../lib/upload.js";
9
9
  import { isUuid, resolveSourceRef } from "../lib/profile-sources.js";
10
- import { assertEnumValue, EDUCATION_LEVELS, HOUSEHOLDS, LOCALE_TYPES, INCOME_LEVELS, EMPLOYMENT_STATUSES, } from "../lib/enums.js";
10
+ import { assertEnumValue, EDUCATION_LEVELS, EVIDENCE_SOURCES, HOUSEHOLDS, LOCALE_TYPES, INCOME_LEVELS, EMPLOYMENT_STATUSES, } from "../lib/enums.js";
11
11
  import { validateAccessibilityProfile } from "../lib/accessibility-profile.js";
12
12
  function collect(value, prev) {
13
13
  return prev.concat(value);
@@ -393,6 +393,242 @@ Schema: https://ishlabs.io/spec/accessibility-profile-schema.v1.json`)
393
393
  output({ id: rid, alias: tagAlias(ALIAS_PREFIX.testerProfile, rid), message: "Profile deleted" }, globals.json, { writePath: true });
394
394
  });
395
395
  });
396
+ profile
397
+ .command("suggest-scenarios")
398
+ .description("Ask the LLM for scenario probes to craft a specific simulated tester")
399
+ .option("--workspace <id>", "Workspace (product) ID; falls back to active workspace")
400
+ .option("--context <text>", "What you already know about this tester. Use @path to read from file.")
401
+ .option("--context-file <path>", "Read --context from a file")
402
+ .option("--count <n>", "Number of scenarios to return (1-10, default 5)")
403
+ .option("--previous-answers <json-or-@path>", "Answers already collected this session. Inline JSON, @/path/to.json, or - for stdin. Array of {type, prompt, answer}; max 40.")
404
+ .option("--already-surfaced <json-or-@path>", "Prompt labels already shown this session, so the LLM avoids paraphrasing them. Inline JSON, @/path, or -. Array of strings; max 40.")
405
+ .addHelpText("after", `
406
+ Examples:
407
+ # Bare invocation: 5 scenarios from a free-form context blob
408
+ $ ish profile suggest-scenarios --context "Mid-career engineer who handles oncall for a Stripe-using fintech"
409
+
410
+ # Load context from a file, ask for 3 scenarios
411
+ $ ish profile suggest-scenarios --context-file ./persona-notes.md --count 3
412
+
413
+ # Follow-up probe: skip prompts already shown, build on prior answers
414
+ $ ish profile suggest-scenarios \\
415
+ --context "$(cat notes.md)" \\
416
+ --count 3 \\
417
+ --already-surfaced '["How do you triage 02:00 pages?"]' \\
418
+ --previous-answers @./answers.json
419
+
420
+ # Capture just the first scenario's type
421
+ $ ish profile suggest-scenarios --context "..." --count 1 --get scenarios[0].type
422
+
423
+ The loop: suggest → answer locally → persist via \`ish profile evidence add <id>\` (read back with \`evidence list\`).
424
+ See \`ish docs get-page guides/build-specific-tester\` for the full workflow.`)
425
+ .action(async (opts, cmd) => {
426
+ await withClient(cmd, async (client, globals) => {
427
+ const productId = resolveWorkspace(opts.workspace);
428
+ let context;
429
+ if (opts.context)
430
+ context = resolveTextContent(opts.context);
431
+ if (opts.contextFile)
432
+ context = resolveTextContent(`@${opts.contextFile}`);
433
+ if (!context) {
434
+ throw new Error("Provide --context (text or @path) or --context-file.");
435
+ }
436
+ const trimmed = context.trim();
437
+ if (trimmed.length === 0) {
438
+ throw new Error("--context cannot be empty.");
439
+ }
440
+ if (trimmed.length > 20_000) {
441
+ throw new Error(`--context is ${trimmed.length} chars; backend max is 20000. Trim before retrying.`);
442
+ }
443
+ const body = {
444
+ product_id: productId,
445
+ context: trimmed,
446
+ };
447
+ if (opts.count !== undefined) {
448
+ const n = parseInt(opts.count, 10);
449
+ if (Number.isNaN(n) || n < 1 || n > 10) {
450
+ throw new Error("--count must be an integer between 1 and 10.");
451
+ }
452
+ body.count = n;
453
+ }
454
+ if (opts.previousAnswers) {
455
+ const parsed = await parseJsonFlag(opts.previousAnswers, "--previous-answers");
456
+ if (!Array.isArray(parsed)) {
457
+ throw new Error("--previous-answers must be a JSON array of {type, prompt, answer}.");
458
+ }
459
+ if (parsed.length > 40) {
460
+ throw new Error(`--previous-answers max 40 entries (got ${parsed.length}).`);
461
+ }
462
+ for (let i = 0; i < parsed.length; i++) {
463
+ const row = parsed[i];
464
+ if (!row || typeof row !== "object") {
465
+ throw new Error(`--previous-answers[${i}]: expected object {type, prompt, answer}.`);
466
+ }
467
+ if (typeof row.type !== "string" || typeof row.prompt !== "string" || typeof row.answer !== "string") {
468
+ throw new Error(`--previous-answers[${i}]: missing or non-string {type, prompt, answer}.`);
469
+ }
470
+ assertEnumValue(row.type, EVIDENCE_SOURCES, `--previous-answers[${i}].type`);
471
+ }
472
+ body.previous_answers = parsed;
473
+ }
474
+ if (opts.alreadySurfaced) {
475
+ const parsed = await parseJsonFlag(opts.alreadySurfaced, "--already-surfaced");
476
+ if (!Array.isArray(parsed) || parsed.some((s) => typeof s !== "string")) {
477
+ throw new Error("--already-surfaced must be a JSON array of strings.");
478
+ }
479
+ if (parsed.length > 40) {
480
+ throw new Error(`--already-surfaced max 40 entries (got ${parsed.length}).`);
481
+ }
482
+ body.already_surfaced_prompts = parsed;
483
+ }
484
+ if (!globals.quiet) {
485
+ const target = body.count ?? 5;
486
+ console.error(` suggesting ${target} scenario${target === 1 ? "" : "s"}...`);
487
+ }
488
+ const data = await client.post("/tester-profiles/suggest-scenarios", body, { timeout: 180_000 });
489
+ output(data, globals.json);
490
+ });
491
+ });
492
+ const evidence = profile
493
+ .command("evidence")
494
+ .description("Manage scenario-answer evidence on a tester profile")
495
+ .addHelpText("after", `
496
+ Evidence rows persist answers to \`suggest-scenarios\` probes onto a
497
+ specific profile. The \`source\` field on every trace is the same enum
498
+ as the \`type\` field on a suggested scenario — copy verbatim when
499
+ building a traces.json.
500
+
501
+ Guide: ish docs get-page guides/build-specific-tester`);
502
+ evidence
503
+ .command("add")
504
+ .description("Persist scenario answers as structured evidence on a profile")
505
+ .argument("<id>", "Profile ID (alias or UUID)")
506
+ .option("--traces <json-or-@path>", `Array of {text, source, scenario_prompt?, raw_response?} where source ∈ ${EVIDENCE_SOURCES.join("|")}. Inline JSON, @/path/to.json, or - for stdin.`)
507
+ .option("--traces-file <path>", "Read --traces from a JSON file")
508
+ .addHelpText("after", `
509
+ Examples:
510
+ # Inline JSON for a single trace
511
+ $ ish profile evidence add tp-d4e --traces '[{"text":"I would page my staff engineer first.","source":"situation","scenario_prompt":"PagerDuty fires at 02:00."}]'
512
+
513
+ # From a file
514
+ $ ish profile evidence add tp-d4e --traces-file ./answers.json
515
+
516
+ # From stdin (pipe-friendly)
517
+ $ jq -c '.traces' session.json | ish profile evidence add tp-d4e --traces -
518
+
519
+ # Project the response
520
+ $ ish profile evidence add tp-d4e --traces-file ./answers.json --fields id,source,created_at
521
+
522
+ Valid source values: ${EVIDENCE_SOURCES.join(", ")}.
523
+ \`source\` on a trace = \`type\` on a suggested scenario — same enum.
524
+ Pair with \`ish profile suggest-scenarios\` to drive the iterative probe → answer loop.
525
+ Verify with \`ish profile evidence list <id>\`.`)
526
+ .action(async (id, opts, cmd) => {
527
+ await withClient(cmd, async (client, globals) => {
528
+ if (opts.traces && opts.tracesFile) {
529
+ throw new Error("Pass either --traces or --traces-file, not both.");
530
+ }
531
+ if (!opts.traces && !opts.tracesFile) {
532
+ throw new Error("Provide --traces (JSON, @path, or -) or --traces-file <path>.");
533
+ }
534
+ const parsed = opts.tracesFile
535
+ ? await readJsonFileOrStdin(opts.tracesFile)
536
+ : await parseJsonFlag(opts.traces, "--traces");
537
+ if (!Array.isArray(parsed) || parsed.length === 0) {
538
+ throw new Error("traces must be a non-empty JSON array.");
539
+ }
540
+ const traces = [];
541
+ for (let i = 0; i < parsed.length; i++) {
542
+ const row = parsed[i];
543
+ if (!row || typeof row !== "object") {
544
+ throw new Error(`traces[${i}]: expected object {text, source, ...}.`);
545
+ }
546
+ if (typeof row.text !== "string" || row.text.length === 0) {
547
+ throw new Error(`traces[${i}].text: required non-empty string.`);
548
+ }
549
+ if (typeof row.source !== "string") {
550
+ throw new Error(`traces[${i}].source: required string.`);
551
+ }
552
+ assertEnumValue(row.source, EVIDENCE_SOURCES, `traces[${i}].source`);
553
+ if (row.scenario_prompt !== undefined && typeof row.scenario_prompt !== "string") {
554
+ throw new Error(`traces[${i}].scenario_prompt: must be a string if provided.`);
555
+ }
556
+ traces.push({
557
+ text: row.text,
558
+ source: row.source,
559
+ scenario_prompt: row.scenario_prompt ?? "",
560
+ ...(row.raw_response !== undefined ? { raw_response: row.raw_response } : {}),
561
+ });
562
+ }
563
+ const rid = resolveId(id);
564
+ const body = { traces };
565
+ if (!globals.quiet) {
566
+ console.error(` persisting ${traces.length} evidence trace${traces.length === 1 ? "" : "s"} on ${tagAlias(ALIAS_PREFIX.testerProfile, rid)}...`);
567
+ }
568
+ const data = await client.post(`/tester-profiles/${rid}/scenarios`, body);
569
+ if (globals.json) {
570
+ output({ items: data, total: data.length }, true);
571
+ }
572
+ else {
573
+ output(data, false);
574
+ }
575
+ });
576
+ });
577
+ evidence
578
+ .command("list")
579
+ .description("List evidence traces persisted on a tester profile (newest first)")
580
+ .argument("<id>", "Profile ID (alias or UUID)")
581
+ .addHelpText("after", `
582
+ Examples:
583
+ # Read back every trace on a profile
584
+ $ ish profile evidence list tp-d4e
585
+
586
+ # Project per-row fields
587
+ $ ish profile evidence list tp-d4e --fields id,source,scenario_prompt
588
+
589
+ # Capture just the sources, one per line (auto-descends into items)
590
+ $ ish profile evidence list tp-d4e --get source
591
+
592
+ Returns a {items, total} envelope. Use the same id you passed to \`evidence add\`.`)
593
+ .action(async (id, _opts, cmd) => {
594
+ await withClient(cmd, async (client, globals) => {
595
+ const rid = resolveId(id);
596
+ const data = await client.get(`/tester-profiles/${rid}/scenarios`);
597
+ if (globals.json) {
598
+ output({ items: data, total: data.length }, true);
599
+ }
600
+ else {
601
+ output(data, false);
602
+ }
603
+ });
604
+ });
605
+ }
606
+ /**
607
+ * Parse a flag that accepts inline JSON, an @path reference, or `-` for stdin.
608
+ * Mirrors the @path convention on --description (resolveTextContent) and the
609
+ * stdin convention on --file. Used by --traces, --previous-answers, etc.
610
+ */
611
+ async function parseJsonFlag(value, flagName) {
612
+ if (value === "-") {
613
+ return readJsonFileOrStdin();
614
+ }
615
+ if (value.startsWith("@")) {
616
+ const path = value.slice(1);
617
+ if (!path) {
618
+ throw new Error(`Missing file path after @ in ${flagName}. Usage: ${flagName} @/path/to/file.json`);
619
+ }
620
+ return readJsonFileOrStdin(path);
621
+ }
622
+ const trimmed = value.trim();
623
+ if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
624
+ try {
625
+ return JSON.parse(trimmed);
626
+ }
627
+ catch (e) {
628
+ throw new Error(`Invalid JSON in ${flagName}: ${e.message}`);
629
+ }
630
+ }
631
+ throw new Error(`${flagName} expects inline JSON (starting with { or [), an @path reference (@/path/to/file.json), or '-' for stdin.`);
396
632
  }
397
633
  /**
398
634
  * If the value matches the testerProfileSource alias pattern (e.g. "tps-3a4"),
@@ -8,7 +8,7 @@
8
8
  * Lower-level: `study poll`, `study cancel`.
9
9
  */
10
10
  import * as readline from "node:readline/promises";
11
- import { withClient, getWebUrl, terminalLink, resolveWorkspace, resolveStudy, parseWaitTimeout, resolveAudienceProfileIds, addAudienceFilterFlags, hasAudienceFlags, } from "../lib/command-helpers.js";
11
+ import { withClient, getWebUrl, terminalLink, resolveWorkspace, resolveStudy, parseWaitTimeout, resolveAudienceProfileIds, addAudienceFilterFlags, hasAudienceFlags, readFileOrStdin, } from "../lib/command-helpers.js";
12
12
  import { resolveId, tagAlias, ALIAS_PREFIX } from "../lib/alias-store.js";
13
13
  import { output, formatSimulationPoll } from "../lib/output.js";
14
14
  import { isMediaModality, isChatModality, iterationHasContent, describeRequiredContentFlag, readChatMode, readTesterPairConfig, summarizeRoleCriteria, } from "../lib/modality.js";
@@ -1065,4 +1065,183 @@ Examples:
1065
1065
  output(data, globals.json);
1066
1066
  });
1067
1067
  });
1068
+ // --- Extend ---
1069
+ //
1070
+ // Resume a terminal tester with `additional_steps` more turns — the
1071
+ // "start" half of the cancel + extend pair. The backend spawns a NEW
1072
+ // tester under the same iteration, branched from the source's last
1073
+ // interaction; the source row is left untouched. When --instruction is
1074
+ // set, the new tester treats it as overriding direction (the backend
1075
+ // surfaces it in a dedicated <user_added_instructions> block on every
1076
+ // prompt — see app-simulation Fix 1).
1077
+ study
1078
+ .command("extend")
1079
+ .description("Extend a terminal tester with more steps (and optionally a mid-run instruction)")
1080
+ .argument("<tester_id>", "Tester to extend (alias or UUID). Must be in a terminal state (completed/failed/cancelled).")
1081
+ .option("--add-steps <n>", "Extra interactions past the source's original cap (1-50; backend caps server-side)", "10")
1082
+ .option("--instruction <text>", "User message to inject as the new tester resumes. Accepts inline text, `@/path/to/file`, or `-` for stdin.")
1083
+ .option("--wait", "Block until the new tester reaches a terminal state")
1084
+ .option("--timeout <s>", "Wait timeout in seconds (default 300; only with --wait)")
1085
+ .option("--dispatch-timeout <s>", "Per-POST timeout in seconds for the dispatch call (default 120)")
1086
+ .option("--workspace <id>", "Workspace ID; accepted for consistency (workspace is inferred from <tester_id>)")
1087
+ .addHelpText("after", `
1088
+ The source tester is left untouched; a new tester is spawned under the
1089
+ same iteration and branched from the source's last interaction. Get the
1090
+ new tester ID from \`.tester_id\` / \`.tester_alias\` on the JSON output.
1091
+
1092
+ Examples:
1093
+ # Add 5 more steps to a completed run (no new instruction):
1094
+ $ ish study extend t-072 --add-steps 5
1095
+
1096
+ # Inject a mid-run instruction and wait for completion:
1097
+ $ ish study extend t-072 \\
1098
+ --instruction "Open the language selector and switch to German." \\
1099
+ --wait
1100
+
1101
+ # Long instruction from a file:
1102
+ $ ish study extend t-072 --instruction @/tmp/prompt.txt --wait --timeout 600
1103
+
1104
+ # Instruction from stdin (pipe-friendly):
1105
+ $ echo "Try the search bar instead." | ish study extend t-072 --instruction -
1106
+
1107
+ Get tester IDs from \`ish study run --json\` (.tester_aliases[] / .tester_ids[]).
1108
+ See \`ish docs get-page concepts/extending-a-simulation\` for the full mental model.`)
1109
+ .action(async (testerId, opts, cmd) => {
1110
+ await withClient(cmd, async (client, globals) => {
1111
+ // --add-steps: client-side parser fails fast before the network
1112
+ // call. Bound mirrors the backend's `le=50` cap; if the backend
1113
+ // moves the bound, this message will lag — the backend remains
1114
+ // authoritative and any 422 is surfaced verbatim.
1115
+ const addStepsRaw = opts.addSteps ?? "10";
1116
+ const addSteps = parseInt(addStepsRaw, 10);
1117
+ if (Number.isNaN(addSteps) || addSteps < 1 || addSteps > 50) {
1118
+ throw new Error(`--add-steps must be an integer between 1 and 50, got "${addStepsRaw}".`);
1119
+ }
1120
+ // --instruction: inline text | `@path` | `-` (stdin).
1121
+ let instruction;
1122
+ if (opts.instruction !== undefined) {
1123
+ let raw;
1124
+ if (opts.instruction === "-") {
1125
+ raw = await readFileOrStdin("-");
1126
+ }
1127
+ else if (opts.instruction.startsWith("@")) {
1128
+ raw = await readFileOrStdin(opts.instruction.slice(1));
1129
+ }
1130
+ else {
1131
+ raw = opts.instruction;
1132
+ }
1133
+ const trimmed = raw.trim();
1134
+ if (trimmed.length === 0) {
1135
+ throw new Error("--instruction must be non-empty (after trimming).");
1136
+ }
1137
+ instruction = trimmed;
1138
+ }
1139
+ const dispatchTimeoutMs = opts.dispatchTimeout
1140
+ ? Math.max(1, parseInt(opts.dispatchTimeout, 10)) * 1000
1141
+ : 120_000;
1142
+ if (opts.dispatchTimeout &&
1143
+ (Number.isNaN(parseInt(opts.dispatchTimeout, 10)) ||
1144
+ parseInt(opts.dispatchTimeout, 10) < 1)) {
1145
+ throw new Error(`--dispatch-timeout must be a positive integer (seconds), got "${opts.dispatchTimeout}".`);
1146
+ }
1147
+ const sourceId = resolveId(testerId);
1148
+ const sourceAlias = tagAlias(ALIAS_PREFIX.tester, sourceId);
1149
+ if (!globals.quiet) {
1150
+ const stepNote = `${addSteps} step${addSteps === 1 ? "" : "s"}`;
1151
+ const instrNote = instruction ? " and a new instruction" : "";
1152
+ console.error(`Extending ${sourceAlias} with ${stepNote}${instrNote}...`);
1153
+ }
1154
+ const body = {
1155
+ source_tester_id: sourceId,
1156
+ additional_steps: addSteps,
1157
+ };
1158
+ if (instruction)
1159
+ body.user_message = instruction;
1160
+ const data = await client.post("/simulation/interactive/extend", body, { timeout: dispatchTimeoutMs });
1161
+ const newTesterId = String(data.tester_id);
1162
+ const newAlias = tagAlias(ALIAS_PREFIX.tester, newTesterId);
1163
+ // UUIDs preserved on the output — `study extend` is a write-path
1164
+ // dispatch command and the new `tester_id` is the load-bearing
1165
+ // return value (mirrors how `study run` keeps tester_ids in lean
1166
+ // output via the writePath option).
1167
+ const baseEnvelope = {
1168
+ tester_id: newTesterId,
1169
+ tester_alias: newAlias,
1170
+ source_tester_id: sourceId,
1171
+ source_alias: sourceAlias,
1172
+ study_id: data.study_id,
1173
+ job_id: data.job_id,
1174
+ additional_steps: addSteps,
1175
+ ...(instruction && { instruction }),
1176
+ message: data.message,
1177
+ };
1178
+ if (!opts.wait) {
1179
+ if (globals.json) {
1180
+ output(baseEnvelope, true, { writePath: true });
1181
+ }
1182
+ else {
1183
+ console.error(` New tester: ${newAlias}`);
1184
+ if (data.message)
1185
+ console.error(` ${data.message}`);
1186
+ console.error(` Run \`ish study wait ${newAlias} --timeout 600\` to block until it finishes.`);
1187
+ }
1188
+ return;
1189
+ }
1190
+ // --wait: poll the new tester until it reaches a terminal state.
1191
+ // Mirrors the per-tester wait block in `study wait <tester_id>`
1192
+ // above — same WaitTimeoutError shape (exit 5, retryable) so the
1193
+ // failure envelope is consistent across commands.
1194
+ const timeoutMs = parseWaitTimeout(opts.timeout);
1195
+ if (!globals.quiet) {
1196
+ console.error(`Waiting for ${newAlias} to finish...`);
1197
+ }
1198
+ const start = Date.now();
1199
+ let lastStatus = "";
1200
+ while (true) {
1201
+ const status = await client.get(`/simulation/status/${newTesterId}`, undefined, { timeout: 60_000 });
1202
+ const s = String(status.status ?? "unknown");
1203
+ if (!globals.quiet && s !== lastStatus) {
1204
+ process.stderr.write(` ${s}\n`);
1205
+ lastStatus = s;
1206
+ }
1207
+ if (TERMINAL_STATUSES.has(s)) {
1208
+ const result = {
1209
+ status: s,
1210
+ ...(typeof status.interaction_count === "number" && {
1211
+ interaction_count: status.interaction_count,
1212
+ }),
1213
+ ...(status.tester_name && { tester_name: status.tester_name }),
1214
+ ...(status.error && { error: status.error }),
1215
+ };
1216
+ if (globals.json) {
1217
+ output({ ...baseEnvelope, result }, true, { writePath: true });
1218
+ }
1219
+ else {
1220
+ console.error(` ${newAlias} finished: ${s}`);
1221
+ if (status.error)
1222
+ console.error(` error: ${status.error}`);
1223
+ }
1224
+ return;
1225
+ }
1226
+ if (Date.now() - start > timeoutMs) {
1227
+ throw new WaitTimeoutError(`Timed out after ${Math.round(timeoutMs / 1000)}s waiting for tester ${newAlias}. Last status: ${s}.`, {
1228
+ study_id: newTesterId,
1229
+ timeout_seconds: Math.round(timeoutMs / 1000),
1230
+ done: 0,
1231
+ total: 1,
1232
+ pending: 1,
1233
+ rows: [
1234
+ {
1235
+ id: newTesterId,
1236
+ status: s,
1237
+ tester_name: String(status.tester_name ?? "Unknown"),
1238
+ interaction_count: typeof status.interaction_count === "number" ? status.interaction_count : 0,
1239
+ },
1240
+ ],
1241
+ });
1242
+ }
1243
+ await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
1244
+ }
1245
+ });
1246
+ });
1068
1247
  }
package/dist/lib/docs.js CHANGED
@@ -1213,6 +1213,10 @@ The legacy \`--tech-savviness\` flag was removed in
1213
1213
 
1214
1214
  - \`concepts/source\` — the inputs to \`profile generate\`.
1215
1215
  - \`concepts/audience\` — how profiles get selected into a run.
1216
+ - \`guides/build-specific-tester\` — iterative probe loop
1217
+ (\`profile suggest-scenarios\` + \`profile evidence add\`/\`list\`)
1218
+ for crafting one specific persona, distinct from the
1219
+ audience-generation flow.
1216
1220
  - \`reference/billing-limits\` — \`maxCustomTesterProfiles\` cap on profile creation.
1217
1221
  `;
1218
1222
  const CONCEPT_SOURCE = `# concept: source
@@ -1526,18 +1530,200 @@ ish study run --study s-b2c -y --json | jq -r '.tester_aliases[]' # → t-072,
1526
1530
  ish study poll <tester_id> # one-shot status for one tester
1527
1531
  ish study wait <tester_id> --timeout 600 # block until that tester finishes
1528
1532
  ish study cancel <tester_id> # cancel a running simulation
1533
+ ish study extend <tester_id> --add-steps 10 # resume a terminal tester with N more steps
1529
1534
  \`\`\`
1530
1535
 
1531
1536
  \`<tester_id>\` accepts a tester alias (\`t-…\`) or a full UUID. The
1532
1537
  study-level \`poll\`/\`wait\` forms also exist (\`--study <id>\` /
1533
1538
  \`--iteration <id>\`) for whole-batch progress.
1534
1539
 
1540
+ \`cancel\` and \`extend\` form a reversible stop/start pair. \`cancel\`
1541
+ walks a running tester to a terminal \`cancelled\` status (no row
1542
+ removed); \`extend\` then spawns a fresh tester branched from the
1543
+ cancelled tester's last interaction. See
1544
+ \`concepts/extending-a-simulation\` for the full mental model.
1545
+
1535
1546
  ## Related
1536
1547
 
1537
1548
  - \`reference/json-mode\` — output modes (display vs capture vs chain).
1538
1549
  Use \`--get tester_aliases\` to capture the run's testers without
1539
1550
  piping through \`jq\`. \`--human\` forces table output even through
1540
1551
  \`tee\`/redirection.
1552
+ - \`concepts/extending-a-simulation\` — \`study extend\` flow, when to
1553
+ use it, and the mid-run \`--instruction\` UX.
1554
+ `;
1555
+ const CONCEPT_EXTENDING_SIMULATION = `# concept: extending a simulation
1556
+
1557
+ \`ish study extend <tester_id>\` resumes a **terminal** tester with
1558
+ more interactions — and optionally a mid-run instruction. The source
1559
+ tester is left untouched; a **new** tester row is spawned under the
1560
+ same iteration, branched from the source's last interaction. Use it
1561
+ when a run hits the \`--max-interactions\` cap before the tester
1562
+ finished, or when you want to probe a "what if I had told them X
1563
+ mid-run?" scenario without restarting from scratch.
1564
+
1565
+ ## When extend is the right verb
1566
+
1567
+ - Run hit the step cap (\`--max-interactions\`) before the tester
1568
+ completed the assignment — give it 10 more steps to push through.
1569
+ - Tester veered off into a dead-end — cancel it, then extend with an
1570
+ instruction redirecting it ("Stop browsing the blog. Open the pricing
1571
+ page and try to add a seat.").
1572
+ - You want to test how a tester reacts to a mid-run change you didn't
1573
+ capture in the original assignment — without re-running the whole
1574
+ cohort.
1575
+
1576
+ When extend is **not** the right verb:
1577
+
1578
+ - Source tester is still RUNNING. \`cancel\` it first, then extend.
1579
+ Extend refuses non-terminal sources server-side.
1580
+ - You want a fresh cohort with new audience flags. Use \`study run\`
1581
+ with \`--profile\` / \`--sample\` / \`--all\` instead — extend is a
1582
+ per-tester resume, not a batch op.
1583
+ - You want to change the iteration's URL or content. Edit the iteration
1584
+ itself (\`iteration update\` or a fresh iteration) — extend always
1585
+ inherits the source's iteration config.
1586
+
1587
+ ## Mental model — cancel + extend are a reversible pair
1588
+
1589
+ \`cancel\` and \`extend\` are siblings in the tester lifecycle:
1590
+
1591
+ \`\`\`
1592
+ RUNNING ──(cancel)──▶ CANCELLED ──(extend)──▶ new RUNNING tester
1593
+ (branched from the
1594
+ cancelled tester's
1595
+ last interaction)
1596
+
1597
+ COMPLETED / FAILED ──(extend)──▶ new RUNNING tester
1598
+ \`\`\`
1599
+
1600
+ \`cancel\` is non-destructive — the tester row, every interaction, every
1601
+ screenshot, and the questionnaire answers all survive. \`extend\` then
1602
+ forks from the last interaction to keep the new tester's history
1603
+ seamlessly continuous.
1604
+
1605
+ ## Flags
1606
+
1607
+ \`\`\`
1608
+ ish study extend <tester_id>
1609
+ [--add-steps <n>] # extra steps, 1-50, default 10
1610
+ [--instruction <text|@path|->] # optional mid-run user message
1611
+ [--wait] # block until terminal
1612
+ [--timeout <s>] # wait timeout (default 300)
1613
+ [--dispatch-timeout <s>] # POST timeout (default 120)
1614
+ \`\`\`
1615
+
1616
+ \`--instruction\` accepts three input shapes, matching the rest of the
1617
+ CLI:
1618
+
1619
+ \`\`\`bash
1620
+ # Inline:
1621
+ ish study extend t-072 --instruction "Switch to the German pricing page."
1622
+
1623
+ # From a file (long-form prompts, version-controlled):
1624
+ ish study extend t-072 --instruction @/tmp/redirect.md
1625
+
1626
+ # From stdin (pipe-friendly):
1627
+ echo "Try the search bar instead." | ish study extend t-072 --instruction -
1628
+ \`\`\`
1629
+
1630
+ The instruction is sent to the backend as \`user_message\`. The new
1631
+ tester treats it as **overriding direction** for the rest of the run —
1632
+ the backend surfaces it in a dedicated \`<user_added_instructions>\`
1633
+ block on every prompt, not just the first turn, so the LLM doesn't
1634
+ forget about it as the run goes on.
1635
+
1636
+ ## JSON output (lean, write-path)
1637
+
1638
+ Default (no \`--wait\`):
1639
+
1640
+ \`\`\`json
1641
+ {
1642
+ "tester_id": "<new-uuid>",
1643
+ "tester_alias": "t-xyz",
1644
+ "source_tester_id": "<source-uuid>",
1645
+ "source_alias": "t-abc",
1646
+ "study_id": "<study-uuid>",
1647
+ "job_id": "<job-uuid>",
1648
+ "additional_steps": 10,
1649
+ "instruction": "Switch to the German pricing page.",
1650
+ "message": "Simulation queued"
1651
+ }
1652
+ \`\`\`
1653
+
1654
+ With \`--wait\`, a \`result\` field is appended once the new tester
1655
+ reaches a terminal status:
1656
+
1657
+ \`\`\`json
1658
+ {
1659
+ ...,
1660
+ "result": {
1661
+ "status": "completed",
1662
+ "interaction_count": 14,
1663
+ "tester_name": "Anna, 34, Munich"
1664
+ }
1665
+ }
1666
+ \`\`\`
1667
+
1668
+ UUID fields (\`tester_id\`, \`source_tester_id\`, \`study_id\`, \`job_id\`)
1669
+ are preserved in lean output because the new \`tester_id\` is the
1670
+ load-bearing return value — same exception \`study run\` makes.
1671
+
1672
+ ## Errors
1673
+
1674
+ | Backend | CLI behavior | Exit |
1675
+ |---|---|---|
1676
+ | Source not terminal (RUNNING / QUEUED) | \`Tester is still running — cancel it first or wait for completion.\` | 2 |
1677
+ | Source tester not found | \`Tester not found: <id>\` | 4 |
1678
+ | \`additional_steps\` out of range | Client-side parser rejects before the network call | 2 |
1679
+ | Insufficient credits | Bubbles the server message; retry only after topping up | 5 |
1680
+ | Wait timed out (\`--wait\` only) | \`WaitTimeoutError\` envelope with current status under \`progress.rows[0]\` — the run keeps going server-side; resume with \`study wait <new-tester>\` | 5 |
1681
+
1682
+ ## Cost model
1683
+
1684
+ \`extend\` charges credits for **only \`additional_steps\`**, not for
1685
+ the source's original \`max_interactions\` cap. The formula is the same
1686
+ as \`study run\` for interactive runs: \`max(1, round(N / 10))\` per
1687
+ tester. So \`--add-steps 10\` costs **1 credit**; \`--add-steps 50\`
1688
+ costs **5 credits**. See \`reference/credits\` for the full table.
1689
+
1690
+ ## Worked example — push past the step cap
1691
+
1692
+ \`\`\`bash
1693
+ # 1. Run a study with a small step cap to feel the limit:
1694
+ ish study run --sample 1 --max-interactions 5 --wait
1695
+ # → tester t-072 (status: completed_with_errors, hit cap on step 5)
1696
+
1697
+ # 2. Inspect what happened:
1698
+ ish study tester t-072 --summary
1699
+
1700
+ # 3. Give it 15 more steps:
1701
+ ish study extend t-072 --add-steps 15 --wait --timeout 600
1702
+ # → new tester t-9af, status: completed, 18 interactions total
1703
+
1704
+ # 4. Read the new tester's transcript:
1705
+ ish study tester t-9af --summary
1706
+ \`\`\`
1707
+
1708
+ ## Worked example — redirect mid-run
1709
+
1710
+ \`\`\`bash
1711
+ # Tester wandered into the wrong flow. Cancel, then redirect:
1712
+ ish study cancel t-072
1713
+ ish study extend t-072 \\
1714
+ --instruction "Stop browsing the blog. Open the pricing page and try to upgrade to Pro." \\
1715
+ --add-steps 10 --wait
1716
+ \`\`\`
1717
+
1718
+ ## Related
1719
+
1720
+ - \`concepts/run-verbs\` — the top-level decision rule (\`study run\` vs
1721
+ \`ask run\`); extend is a lifecycle verb downstream of either.
1722
+ - \`reference/credits\` — per-modality cost formulas. \`extend\` follows
1723
+ the interactive formula scaled to \`additional_steps\`.
1724
+ - \`reference/aliases\` — the \`t-…\` prefix and how aliases resolve.
1725
+ - \`reference/json-mode\` — capture-mode (\`--get tester_alias\`) for
1726
+ chaining the new tester into the next call.
1541
1727
  `;
1542
1728
  const REFERENCE_ALIASES = `# reference: aliases
1543
1729
 
@@ -3236,6 +3422,122 @@ without a second round-trip.
3236
3422
  - \`reference/json-mode\` — error envelope shape and exit code mapping
3237
3423
  (\`usage_limit_reached\` is HTTP 403, exit 1, non-retryable).
3238
3424
  `;
3425
+ const GUIDE_BUILD_SPECIFIC_TESTER = `# guide: build a specific simulated tester from notes
3426
+
3427
+ \`profile generate\` is the right tool for *audiences* (many profiles
3428
+ from a description or interview sources). When you want **one specific
3429
+ tester** — modelling a real prospect, rebuilding a persona from a
3430
+ single interview, or simulating a named stakeholder for a pitch
3431
+ rehearsal — use the iterative probe loop:
3432
+
3433
+ 1. \`ish profile suggest-scenarios\` — describe what you already
3434
+ know; the LLM returns 1–10 scenario probes designed to expose what
3435
+ you don't.
3436
+ 2. Answer the probes locally (in chat, with the user, or from
3437
+ transcripts).
3438
+ 3. \`ish profile create --file ...\` — save the profile shell.
3439
+ 4. \`ish profile evidence add <id>\` — persist the answered probes
3440
+ as structured evidence on the profile so they survive into runtime
3441
+ persona injection.
3442
+ 5. \`ish profile evidence list <id>\` — read back what's saved,
3443
+ newest first. Useful for verifying a session or branching on prior
3444
+ state before the next probe round.
3445
+
3446
+ ## Probe types
3447
+
3448
+ \`suggest-scenarios\` returns four discriminated shapes. Each is meant
3449
+ to surface a different facet of the persona:
3450
+
3451
+ - \`situation\` — \`{situation, options[2..4]}\`: "you're in scenario
3452
+ X; which option fits?" Multiple-choice, lets the persona pick
3453
+ behavior.
3454
+ - \`voice\` — \`{situation, options[2..4]}\`: same shape as situation
3455
+ but framed around tone/phrasing the tester would actually use.
3456
+ - \`binary\` — \`{description, option_a, option_b}\`: forced choice
3457
+ between two competing values or trade-offs.
3458
+ - \`micro-story\` — \`{prompt}\`: open-ended; the persona narrates a
3459
+ short story. Answer with a multi-sentence free-text reply.
3460
+
3461
+ The wire format keeps \`option_a\` / \`option_b\` (snake_case). The
3462
+ CLI passes them through verbatim — don't transform to camelCase.
3463
+
3464
+ **Identity rule** — when building \`traces.json\` after answering a
3465
+ probe, copy the scenario's \`type\` straight into the trace's
3466
+ \`source\`. Same enum, two field names. The mechanical mapping:
3467
+
3468
+ | Suggested scenario field | Trace field |
3469
+ |--------------------------|------------------|
3470
+ | \`scenario.type\` | \`trace.source\` |
3471
+ | \`scenario.situation\` / \`scenario.description\` / \`scenario.prompt\` | \`trace.scenario_prompt\` (one line, whatever question label the user actually answered) |
3472
+ | (user's answer) | \`trace.text\` |
3473
+
3474
+ ## Worked example
3475
+
3476
+ \`\`\`
3477
+ # 1. Suggest 5 probes from a context blob
3478
+ ish profile suggest-scenarios \\
3479
+ --context "Staff platform engineer at a Stripe-using fintech. \\
3480
+ Owns on-call for the payments edge. Burned by a Black Friday \\
3481
+ outage last year." \\
3482
+ --count 5
3483
+ # → {scenarios: [{type: "situation", ...}, {type: "binary", ...}, ...]}
3484
+
3485
+ # 2. (offline) answer the probes — build a local answers.json:
3486
+ # [
3487
+ # {"text": "Page the staff engineer first, then start the runbook.",
3488
+ # "source": "situation",
3489
+ # "scenario_prompt": "PagerDuty fires at 02:00 on payments edge."},
3490
+ # {"text": "Option A — cut the rollout, take the revenue hit.",
3491
+ # "source": "binary",
3492
+ # "scenario_prompt": "Ship the migration or hold for incident review?"}
3493
+ # ]
3494
+
3495
+ # 3. Create the profile shell
3496
+ ish profile create --file ./persona.json
3497
+ # → tp-d4e
3498
+
3499
+ # 4. Persist the answered probes as evidence
3500
+ ish profile evidence add tp-d4e --traces-file ./answers.json
3501
+ # → {items: [{id, text, source, scenario_prompt, created_at}, ...], total: N}
3502
+
3503
+ # 5. Read back what got saved (also useful before the next probe round)
3504
+ ish profile evidence list tp-d4e
3505
+ ish profile evidence list tp-d4e --get source # one source per line
3506
+ \`\`\`
3507
+
3508
+ ## Iterating the probe loop
3509
+
3510
+ To go deeper on a follow-up pass, feed the prior round back in so the
3511
+ LLM doesn't paraphrase what you already asked:
3512
+
3513
+ \`\`\`
3514
+ ish profile suggest-scenarios \\
3515
+ --context-file ./notes.md \\
3516
+ --count 3 \\
3517
+ --already-surfaced '["PagerDuty fires at 02:00 on payments edge."]' \\
3518
+ --previous-answers @./answers.json
3519
+ \`\`\`
3520
+
3521
+ \`--previous-answers\` is the array of \`{type, prompt, answer}\` rows
3522
+ already collected. \`--already-surfaced\` is the array of prompt
3523
+ labels already shown — the LLM uses these to avoid re-asking. Both
3524
+ cap at 40 entries.
3525
+
3526
+ ## When to reach for which command
3527
+
3528
+ | Need | Command |
3529
+ |---|---|
3530
+ | Many profiles from a description or interview | \`ish profile generate\` |
3531
+ | One specific persona, iterative probe loop | \`ish profile suggest-scenarios\` + \`evidence add\`/\`list\` |
3532
+ | Exact profile from a JSON spec, no LLM | \`ish profile create --file\` |
3533
+
3534
+ ## Related
3535
+
3536
+ - \`concepts/profile\` — what a tester profile is; structured fields.
3537
+ - \`concepts/source\` — interview transcripts / audio / PDF inputs
3538
+ for the audience-generation flow.
3539
+ - \`reference/aliases\` — \`tp-…\` is the profile alias prefix.
3540
+ `;
3239
3541
  const PAGES = [
3240
3542
  {
3241
3543
  slug: "overview",
@@ -3321,6 +3623,12 @@ const PAGES = [
3321
3623
  description: "Side-by-side; decision rule for choosing one over the other.",
3322
3624
  body: CONCEPT_RUN_VERBS,
3323
3625
  },
3626
+ {
3627
+ slug: "concepts/extending-a-simulation",
3628
+ title: "concept: extending a simulation (study extend)",
3629
+ description: "Resume a terminal tester with more steps and an optional mid-run instruction. Cancel + extend as a reversible stop/start pair.",
3630
+ body: CONCEPT_EXTENDING_SIMULATION,
3631
+ },
3324
3632
  {
3325
3633
  slug: "concepts/active-context",
3326
3634
  title: "concept: active context",
@@ -3375,6 +3683,12 @@ const PAGES = [
3375
3683
  description: "What to do when workspace_create returns usage_limit_reached on a saturated account. Inspect workspace_get (has_headroom / child_counts / last_activity_at), pick a reuse target, or call ish workspace create --ensure name.",
3376
3684
  body: GUIDE_COLD_START,
3377
3685
  },
3686
+ {
3687
+ slug: "guides/build-specific-tester",
3688
+ title: "guide: build a specific simulated tester from notes",
3689
+ description: "Iterative probe loop for one specific persona: profile suggest-scenarios returns LLM probes; answer them locally; profile evidence add persists answers; profile evidence list reads them back.",
3690
+ body: GUIDE_BUILD_SPECIFIC_TESTER,
3691
+ },
3378
3692
  ];
3379
3693
  const PAGES_BY_SLUG = new Map(PAGES.map((p) => [p.slug, p]));
3380
3694
  export function listPages() {
@@ -44,6 +44,14 @@ export declare const LOCALE_TYPES: readonly ["urban", "suburban", "small_town",
44
44
  export type LocaleType = typeof LOCALE_TYPES[number];
45
45
  export declare const INCOME_LEVELS: readonly ["lower", "lower_middle", "middle", "upper_middle", "upper", "prefer_not_to_say"];
46
46
  export type IncomeLevel = typeof INCOME_LEVELS[number];
47
+ /**
48
+ * Source kinds for a persisted scenario answer (EvidenceTrace.source). Matches
49
+ * the backend `EvidenceSource` literal union — one value is hyphenated
50
+ * (`micro-story`) so the wire format is mixed; `assertEnumValue` is strict
51
+ * about this and does not fold hyphens to underscores.
52
+ */
53
+ export declare const EVIDENCE_SOURCES: readonly ["situation", "voice", "binary", "micro-story"];
54
+ export type EvidenceSourceEnum = typeof EVIDENCE_SOURCES[number];
47
55
  export declare const EMPLOYMENT_STATUSES: readonly ["employed_full_time", "employed_part_time", "self_employed", "unemployed_seeking", "student", "homemaker", "retired", "unable_to_work", "other"];
48
56
  export type EmploymentStatus = typeof EMPLOYMENT_STATUSES[number];
49
57
  /**
package/dist/lib/enums.js CHANGED
@@ -76,6 +76,18 @@ export const INCOME_LEVELS = [
76
76
  "upper",
77
77
  "prefer_not_to_say",
78
78
  ];
79
+ /**
80
+ * Source kinds for a persisted scenario answer (EvidenceTrace.source). Matches
81
+ * the backend `EvidenceSource` literal union — one value is hyphenated
82
+ * (`micro-story`) so the wire format is mixed; `assertEnumValue` is strict
83
+ * about this and does not fold hyphens to underscores.
84
+ */
85
+ export const EVIDENCE_SOURCES = [
86
+ "situation",
87
+ "voice",
88
+ "binary",
89
+ "micro-story",
90
+ ];
79
91
  export const EMPLOYMENT_STATUSES = [
80
92
  "employed_full_time",
81
93
  "employed_part_time",
@@ -213,6 +213,7 @@ See \`references/workflows.md\` in this skill for end-to-end transcripts:
213
213
  - Generating profiles from a transcript or audio source
214
214
  - Targeting a gated URL (basic auth, session cookie, login form)
215
215
  - Re-running a study with a fresh audience
216
+ - Extending a tester past its step cap (or redirecting mid-run with \`study extend\`)
216
217
 
217
218
  ## Display vs. capture: the right output mode
218
219
 
@@ -358,6 +359,13 @@ implies \`--quiet\` so the bare value is the only thing on stdout.
358
359
  - **\`ask add-questions\` supports \`--wait\` / \`--timeout\`.** Match
359
360
  the parity of \`ask create\` and \`ask run\`. Without \`--wait\` the
360
361
  command returns after dispatch (round still running).
362
+ - **\`study extend <tester>\` resumes a terminal tester.** Use it when
363
+ a run hit \`--max-interactions\` before finishing, or pair with
364
+ \`study cancel\` to redirect mid-run via \`--instruction\` (inline,
365
+ \`@path\`, or stdin via \`-\`). Spawns a **new** tester branched from
366
+ the source's last interaction — source row untouched. Credits debit
367
+ per \`max(1, round(additional_steps / 10))\`. See workflow #11 and
368
+ \`ish docs get-page concepts/extending-a-simulation\`.
361
369
  - **\`pick_confidence\` (0..1) is on every \`--wants-pick\` response.**
362
370
  The model's self-reported confidence in its variant choice. Use it
363
371
  to break ties when nominal pick counts are close. See
@@ -607,7 +615,50 @@ ish profile generate --source tps-3a4 --propose-count
607
615
  ish profile generate --source tps-3a4 --count 4
608
616
  \`\`\`
609
617
 
610
- ## 4. Target a gated URL (Vercel preview / staging gate / login form)
618
+ ## 4. Build a specific simulated tester from notes
619
+
620
+ Goal: rebuild one named persona (a real prospect, a stakeholder for
621
+ a pitch rehearsal) via the iterative probe loop — distinct from
622
+ \`profile generate\`, which is for audiences.
623
+
624
+ \`\`\`bash
625
+ # 1. Suggest 5 probes from a context blob
626
+ ish profile suggest-scenarios \\
627
+ --context "Staff platform engineer at a Stripe-using fintech. \\
628
+ Owns oncall for the payments edge. Burned by a Black Friday \\
629
+ outage last year." \\
630
+ --count 5
631
+ # → {scenarios: [{type:"situation",...},{type:"binary",...},...]}
632
+
633
+ # 2. (offline) Answer the probes — build answers.json:
634
+ # [{"text":"...","source":"situation","scenario_prompt":"..."}, ...]
635
+ # Valid source values: situation, voice, binary, micro-story
636
+
637
+ # 3. Save the profile shell
638
+ ish profile create --file ./persona.json
639
+ # → tp-d4e
640
+
641
+ # 4. Persist the answers as structured evidence
642
+ ish profile evidence add tp-d4e --traces-file ./answers.json
643
+
644
+ # 5. Read back what's saved (also useful before the next probe round)
645
+ ish profile evidence list tp-d4e
646
+ \`\`\`
647
+
648
+ To iterate, feed prior prompts/answers back in so the LLM doesn't
649
+ paraphrase what you already asked:
650
+
651
+ \`\`\`bash
652
+ ish profile suggest-scenarios \\
653
+ --context-file ./notes.md --count 3 \\
654
+ --already-surfaced '["PagerDuty fires at 02:00."]' \\
655
+ --previous-answers @./answers.json
656
+ \`\`\`
657
+
658
+ See \`ish docs get-page guides/build-specific-tester\` for the full
659
+ walkthrough including the four probe-type shapes.
660
+
661
+ ## 5. Target a gated URL (Vercel preview / staging gate / login form)
611
662
 
612
663
  Configure credentials once on the workspace; testers reuse them.
613
664
 
@@ -633,7 +684,7 @@ printf %s "$STAGING_PW" | ish workspace site-access basic-auth \\
633
684
  --username alice --password -
634
685
  \`\`\`
635
686
 
636
- ## 5. Re-run a study with a fresh audience
687
+ ## 6. Re-run a study with a fresh audience
637
688
 
638
689
  Goal: same study, same iteration, but compare audiences.
639
690
 
@@ -649,7 +700,7 @@ If you don't pass any audience flags, \`ish study run\` reuses the
649
700
  iteration's existing testers — useful for re-running after fixing the
650
701
  target page.
651
702
 
652
- ## 6. Localhost target (dev environment)
703
+ ## 7. Localhost target (dev environment)
653
704
 
654
705
  Expose a port via a Cloudflare tunnel; \`ish connect\` prints the public
655
706
  URL the study iteration can point at. \`connect\` is foreground and
@@ -675,7 +726,7 @@ URL=$(jq -r 'select(.status=="connected") | .tunnel_url' /tmp/ish-tunnel.log | h
675
726
  ish iteration create --url "$URL"
676
727
  \`\`\`
677
728
 
678
- ## 7. Chat-modality study (drive a chatbot endpoint)
729
+ ## 8. Chat-modality study (drive a chatbot endpoint)
679
730
 
680
731
  The chat modality has **two modes**, picked by
681
732
  \`iteration.details.mode_details.mode\`:
@@ -991,7 +1042,7 @@ ish iteration get <iter-id> --json \\
991
1042
  ish study results <study-id> --transcript <tester-id> --json
992
1043
  \`\`\`
993
1044
 
994
- ## 8. Stage an ask for human review, then dispatch
1045
+ ## 9. Stage an ask for human review, then dispatch
995
1046
 
996
1047
  Goal: prepare a billable A/B but let the user inspect and approve the
997
1048
  audience + prompt before any credits are spent. Two-step flow with a
@@ -1025,7 +1076,7 @@ status as a column.
1025
1076
  wait for. Pass \`--wait\` to \`ish ask dispatch\` instead if you want to
1026
1077
  block until the round settles.
1027
1078
 
1028
- ## 9. Display-vs-capture: a script that does both
1079
+ ## 10. Display-vs-capture: a script that does both
1029
1080
 
1030
1081
  Goal: drive an A/B in a script, capture aliases without \`jq\`, and
1031
1082
  still show the human a readable result table at the end.
@@ -1054,6 +1105,60 @@ The mental rule: **\`--get\` is for capture, bare commands / \`--human\`
1054
1105
  are for display, \`--json\` is for chaining (multiple fields at once).**
1055
1106
  If you find yourself reaching for \`jq -r .x\`, you wanted \`--get x\`.
1056
1107
 
1108
+ ## 11. Extend a tester past its step cap (or redirect mid-run)
1109
+
1110
+ Goal: a tester hit the \`--max-interactions\` cap before finishing, or
1111
+ veered off into the wrong flow. Resume it with more steps and an
1112
+ optional mid-run instruction — without re-running the whole cohort.
1113
+
1114
+ \`\`\`bash
1115
+ # 1. Source run with a small cap to feel the limit:
1116
+ ish study run --sample 1 --max-interactions 5 --wait
1117
+ SRC=$(ish study run --sample 1 --max-interactions 5 --wait \\
1118
+ --get tester_aliases | head -1)
1119
+
1120
+ # 2. Inspect what stopped (optional, useful for the LLM to choose
1121
+ # a redirect instruction):
1122
+ ish study tester "$SRC" --summary
1123
+
1124
+ # 3a. Add 15 more steps, no new instruction — let the tester continue:
1125
+ ish study extend "$SRC" --add-steps 15 --wait --timeout 600
1126
+
1127
+ # 3b. OR redirect with a mid-run instruction (captured as user_message;
1128
+ # the backend surfaces it on every prompt for the rest of the run):
1129
+ ish study extend "$SRC" \\
1130
+ --instruction "Stop browsing the blog. Open the pricing page and try to upgrade to Pro." \\
1131
+ --add-steps 10 --wait
1132
+
1133
+ # 4. Capture the new tester alias to chain into results:
1134
+ NEW=$(ish study extend "$SRC" --add-steps 10 --get tester_alias)
1135
+ ish study tester "$NEW" --summary
1136
+ \`\`\`
1137
+
1138
+ Rules to remember:
1139
+ - Source tester must be **terminal** (\`completed\` / \`failed\` /
1140
+ \`cancelled\`). If it's still running, \`ish study cancel <src>\` first.
1141
+ \`cancel\` is non-destructive — every interaction, screenshot, and
1142
+ questionnaire answer survives. \`cancel\` + \`extend\` form a
1143
+ reversible stop/start pair.
1144
+ - A **new** tester id is created under the same iteration (the backend
1145
+ branches from the source's last interaction). The source row is left
1146
+ untouched. Get the new id from \`.tester_id\` / \`.tester_alias\` on
1147
+ \`--json\`.
1148
+ - \`--add-steps\` is **only** the extra budget; it does NOT include the
1149
+ source's original cap. Credits debit per
1150
+ \`max(1, round(additional_steps / 10))\` — same formula as
1151
+ \`study run\` interactive, just scoped to the extension.
1152
+ - \`--instruction\` accepts three input shapes (matching the rest of
1153
+ the CLI): inline text, \`@/path/to/file\`, or \`-\` for stdin. Empty
1154
+ values after trimming are rejected client-side.
1155
+ - Don't use \`extend\` to change the iteration's URL / content. Edit
1156
+ the iteration directly (\`iteration update\`) or run a fresh
1157
+ \`study run\`. Extend always inherits the source's iteration config.
1158
+
1159
+ See \`ish docs get-page concepts/extending-a-simulation\` for the full
1160
+ mental model (cancel + extend as a pair, error envelopes, cost model).
1161
+
1057
1162
  ## Tips for chaining commands as an agent
1058
1163
 
1059
1164
  - Capture aliases from JSON: \`ITER=$(ish iteration create --url … --json | jq -r .alias)\`
@@ -1174,7 +1279,7 @@ ish <command> --help
1174
1279
  | \`study\` | Persistent research artifact | concepts/study |
1175
1280
  | \`iteration\` | One configured run of a study (URL or media) | concepts/iteration |
1176
1281
  | \`ask\` | Lightweight reaction artifact | concepts/ask |
1177
- | \`profile\` | Tester profiles + audience generation | concepts/profile |
1282
+ | \`profile\` | Tester profiles, audience generation, and the \`suggest-scenarios\` + \`evidence add\`/\`list\` probe loop for crafting one specific persona | concepts/profile |
1178
1283
  | \`source\` | Upload sources for profile generation | concepts/source |
1179
1284
  | \`config\` | Simulation configs (model, timing, retries) | (run \`ish config --help\`) |
1180
1285
  | \`chat\` | Chat endpoint CRUD + smoke test (external_chatbot mode); pair-mode iterations created via \`iteration create --chat-mode tester_pair\` | guides/chat |
@@ -188,6 +188,62 @@ export interface GeneratedProfile {
188
188
  custom_field_values?: Record<string, unknown>;
189
189
  [key: string]: unknown;
190
190
  }
191
+ export type EvidenceSource = "situation" | "voice" | "binary" | "micro-story";
192
+ export interface SessionAnswer {
193
+ type: EvidenceSource;
194
+ prompt: string;
195
+ answer: string;
196
+ }
197
+ export interface SuggestScenariosRequest {
198
+ product_id: string;
199
+ context: string;
200
+ count?: number;
201
+ already_surfaced_prompts?: string[];
202
+ previous_answers?: SessionAnswer[];
203
+ }
204
+ /**
205
+ * Discriminated union — wire shape matches the backend (snake_case
206
+ * `option_a`/`option_b` for binary scenarios). The CLI is wire-honest
207
+ * in JSON output; FE-side camelCase transforms are deliberately not
208
+ * applied here.
209
+ */
210
+ export type SuggestedScenario = {
211
+ type: "situation";
212
+ situation: string;
213
+ options: string[];
214
+ } | {
215
+ type: "voice";
216
+ situation: string;
217
+ options: string[];
218
+ } | {
219
+ type: "binary";
220
+ description: string;
221
+ option_a: string;
222
+ option_b: string;
223
+ } | {
224
+ type: "micro-story";
225
+ prompt: string;
226
+ };
227
+ export interface SuggestScenariosResponse {
228
+ scenarios: SuggestedScenario[];
229
+ }
230
+ export interface EvidenceTraceInput {
231
+ text: string;
232
+ source: EvidenceSource;
233
+ scenario_prompt?: string;
234
+ raw_response?: Record<string, unknown> | null;
235
+ }
236
+ export interface AddEvidenceRequest {
237
+ traces: EvidenceTraceInput[];
238
+ }
239
+ export interface EvidenceTraceResponse {
240
+ id: string;
241
+ text: string;
242
+ source: EvidenceSource;
243
+ scenario_prompt: string;
244
+ raw_response: Record<string, unknown> | null;
245
+ created_at: string;
246
+ }
191
247
  export interface Tester {
192
248
  id: string;
193
249
  iteration_id: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ishlabs/cli",
3
- "version": "0.14.1",
3
+ "version": "0.15.0",
4
4
  "description": "The command-line interface for ish",
5
5
  "type": "module",
6
6
  "bin": {