@ishlabs/cli 0.20.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -278,6 +278,53 @@ function pickFields(data, fields) {
278
278
  }
279
279
  return data;
280
280
  }
281
+ /**
282
+ * Pattern A: when an agent passes `--fields foo,bar` and one of those names
283
+ * doesn't exist on the response, emit a one-line stderr warning naming the
284
+ * missing fields plus a sample of what IS available. Otherwise unknown names
285
+ * silently drop and the agent assumes the field doesn't exist on the wire,
286
+ * when the more common cause is a typo or the wrong projection.
287
+ *
288
+ * Probes the response shape: for an object response, the top-level keys;
289
+ * for a list-wrapper response, the keys of `items[0]`; for a bare array,
290
+ * the keys of element 0. Warns at most once per command invocation
291
+ * (the caller invokes this from jsonOutput before pickFields).
292
+ */
293
+ function warnOnUnknownFields(data, fields) {
294
+ let probe = null;
295
+ if (Array.isArray(data) && data.length > 0 && typeof data[0] === "object" && data[0] !== null) {
296
+ probe = data[0];
297
+ }
298
+ else if (data && typeof data === "object" && !Array.isArray(data)) {
299
+ const obj = data;
300
+ if (isListWrapper(obj) && Array.isArray(obj.items) && obj.items.length > 0
301
+ && typeof obj.items[0] === "object" && obj.items[0] !== null) {
302
+ probe = obj.items[0];
303
+ }
304
+ else {
305
+ probe = obj;
306
+ }
307
+ }
308
+ if (!probe)
309
+ return;
310
+ const missing = fields.filter((f) => !(f in probe));
311
+ if (missing.length === 0)
312
+ return;
313
+ // Pattern DD: surface↔backend rename hints. The agent-friendly noun is
314
+ // "workspace" but the backend stores `product_id`; agents who guess the
315
+ // surface name need a did-you-mean to find the actual response key.
316
+ const RENAME_MAP = {
317
+ workspace_id: "product_id",
318
+ workspace: "product",
319
+ };
320
+ const renameHints = missing
321
+ .filter((m) => RENAME_MAP[m] && RENAME_MAP[m] in probe)
322
+ .map((m) => `${m} → ${RENAME_MAP[m]}`);
323
+ const available = Object.keys(probe).slice(0, 12).join(", ");
324
+ const more = Object.keys(probe).length > 12 ? `, … (${Object.keys(probe).length - 12} more)` : "";
325
+ const didYouMean = renameHints.length > 0 ? ` Did you mean: ${renameHints.join(", ")}?` : "";
326
+ console.error(`warning: --fields requested ${missing.length === 1 ? "name" : "names"} not on the response: ${missing.join(", ")}.${didYouMean} Available: ${available}${more}.`);
327
+ }
281
328
  /** Serialize data as JSON, applying lean transform and field selection. */
282
329
  function jsonOutput(data, options = {}) {
283
330
  let out;
@@ -297,6 +344,7 @@ function jsonOutput(data, options = {}) {
297
344
  out = leanJson(data, options.writePath);
298
345
  }
299
346
  if (_fields && _fields.length > 0) {
347
+ warnOnUnknownFields(out, _fields);
300
348
  out = pickFields(out, _fields);
301
349
  }
302
350
  // Pattern Ω capture mode: --get <field> returns bare values instead of
@@ -396,12 +444,19 @@ export function outputList(rows, json) {
396
444
  /**
397
445
  * Error with valid options — used for content_type and similar validation.
398
446
  * Surfaces valid_options in JSON so agents can self-correct.
447
+ *
448
+ * Optional `hint` is the agent's *actionable next step* (e.g. for a wrong
449
+ * --group-by axis on the current modality, the axis that DOES apply). Distinct
450
+ * from `valid_options`, which describes where the supplied value WOULD be
451
+ * valid. Both serialize into the error envelope when present.
399
452
  */
400
453
  export class ValidationError extends Error {
401
454
  valid_options;
402
- constructor(message, valid_options) {
455
+ hint;
456
+ constructor(message, valid_options, hint) {
403
457
  super(message);
404
458
  this.valid_options = valid_options;
459
+ this.hint = hint;
405
460
  this.name = "ValidationError";
406
461
  }
407
462
  }
@@ -434,6 +489,11 @@ function suggestionsForError(err) {
434
489
  return [
435
490
  "Run a list command to see available resources",
436
491
  "Check that the alias or ID is correct",
492
+ // Pattern R: an active workspace / study / ask saved in config can
493
+ // outlive the resource on the server. Implicit lookups then 404
494
+ // with no indication that the ID came from config. `ish status`
495
+ // flags orphans; `<entity> use --clear` resets the active value.
496
+ "If you didn't pass the resource explicitly, your saved active workspace/study/ask may be stale — run `ish status` to check, then `ish workspace use --clear` (or `ish study use --clear` / `ish ask use --clear`) to reset.",
437
497
  ];
438
498
  case "insufficient_credits":
439
499
  return ["Purchase more credits at https://app.ishlabs.io"];
@@ -593,11 +653,14 @@ export function outputError(err, json) {
593
653
  error_code: "validation_error",
594
654
  retryable: false,
595
655
  valid_options: err.valid_options,
656
+ ...(err.hint && { hint: err.hint }),
596
657
  ...(suggestions.length > 0 && { suggestions }),
597
658
  }));
598
659
  }
599
660
  else {
600
661
  console.error(`Error: ${err.message}`);
662
+ if (err.hint)
663
+ console.error(` hint: ${err.hint}`);
601
664
  for (const s of suggestions)
602
665
  console.error(` → ${s}`);
603
666
  }
@@ -635,6 +698,9 @@ export function outputError(err, json) {
635
698
  ? tagged.suggestions.filter((s) => typeof s === "string")
636
699
  : [];
637
700
  const mergedSuggestions = [...new Set([...suggestions, ...taggedSuggestions])];
701
+ const availableValues = Array.isArray(tagged.available_values)
702
+ ? tagged.available_values.filter((s) => typeof s === "string")
703
+ : undefined;
638
704
  if (json) {
639
705
  console.error(JSON.stringify({
640
706
  // Generic Error: CLI-thrown (we control the message), so we don't
@@ -647,6 +713,7 @@ export function outputError(err, json) {
647
713
  ...(errorKind && { error_kind: errorKind }),
648
714
  ...(example && { example }),
649
715
  ...(progress !== undefined && { progress }),
716
+ ...(availableValues && availableValues.length > 0 && { available_values: availableValues }),
650
717
  ...(seededIds && { seeded_but_not_dispatched_ids: seededIds }),
651
718
  ...(seededAliases && { seeded_but_not_dispatched_aliases: seededAliases }),
652
719
  ...(mergedSuggestions.length > 0 && { suggestions: mergedSuggestions }),
@@ -992,12 +1059,20 @@ export function formatStudyDetail(study, json, options = {}, participants) {
992
1059
  * study state — fields default to `null`, `0`, or `[]` when nothing has run.
993
1060
  * Agents can rely on the keys always being present (M4).
994
1061
  */
995
- function buildStudyResultsEnvelope(study, participants) {
1062
+ export function buildStudyResultsEnvelope(study, participants) {
996
1063
  const allParticipants = collectParticipants(participants, Array.isArray(study.iterations) ? study.iterations : []);
997
1064
  const studyAlias = study.id
998
1065
  ? deterministicAlias(ALIAS_PREFIX.study, String(study.id))
999
1066
  : null;
1000
1067
  const completedCount = allParticipants.filter((t) => t.status === "completed" || t.status === "complete").length;
1068
+ // Pattern N: per-status breakdown so callers can distinguish running /
1069
+ // pending / cancelled from terminal completed/failed. Additive — the
1070
+ // aggregate counts (`completed_count` / `failed_count`) stay alongside.
1071
+ const participantStatusCounts = {};
1072
+ for (const t of allParticipants) {
1073
+ const key = (t.status || "unknown").toLowerCase();
1074
+ participantStatusCounts[key] = (participantStatusCounts[key] || 0) + 1;
1075
+ }
1001
1076
  // Aggregate sentiment across all interactions on all participants.
1002
1077
  const sentimentCounts = {};
1003
1078
  let sentimentTotal = 0;
@@ -1066,6 +1141,7 @@ function buildStudyResultsEnvelope(study, participants) {
1066
1141
  participant_count: allParticipants.length,
1067
1142
  completed_count: completedCount,
1068
1143
  failed_count: failedCount,
1144
+ participant_status_counts: participantStatusCounts,
1069
1145
  sentiment,
1070
1146
  interview_answers: interviewAnswers,
1071
1147
  participants: participantRows,
@@ -2226,3 +2302,215 @@ function formatDate(value) {
2226
2302
  return str.slice(0, 10);
2227
2303
  }
2228
2304
  }
2305
+ const POSITIVE_SENTIMENT = new Set(["satisfied", "curious", "engaged", "confident", "delighted"]);
2306
+ const NEGATIVE_SENTIMENT = new Set(["frustrated", "confused", "blocked", "anxious", "disappointed"]);
2307
+ function sentimentColor(label) {
2308
+ const l = label.toLowerCase();
2309
+ if (POSITIVE_SENTIMENT.has(l))
2310
+ return c.green;
2311
+ if (NEGATIVE_SENTIMENT.has(l))
2312
+ return c.red;
2313
+ return c.dim;
2314
+ }
2315
+ function asciiHistogram(hist, options = {}) {
2316
+ const width = options.width ?? 20;
2317
+ const indent = options.indent ?? " ";
2318
+ const entries = Object.entries(hist).filter(([, v]) => v > 0);
2319
+ if (entries.length === 0)
2320
+ return [];
2321
+ const max = entries.reduce((acc, [, v]) => (v > acc ? v : acc), 0);
2322
+ const labelWidth = entries.reduce((acc, [k]) => (k.length > acc ? k.length : acc), 0);
2323
+ return entries
2324
+ .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
2325
+ .map(([label, count]) => {
2326
+ const bars = max > 0 ? Math.max(1, Math.round((count / max) * width)) : 0;
2327
+ const color = sentimentColor(label);
2328
+ return `${indent}${label.padEnd(labelWidth)} ${color}${"█".repeat(bars)}${c.reset} ${count}`;
2329
+ });
2330
+ }
2331
+ function slicesFromProjection(projection) {
2332
+ // Surface wraps every --group-by axis in the uniform SliceResponse envelope
2333
+ // `{ axis, rows, totals_unfiltered, modality_warnings, study_id, modality }`;
2334
+ // slices live under `rows`.
2335
+ if (projection && typeof projection === "object" && !Array.isArray(projection)) {
2336
+ const rows = projection.rows;
2337
+ if (Array.isArray(rows)) {
2338
+ return rows.filter((s) => Boolean(s) && typeof s === "object" && !Array.isArray(s));
2339
+ }
2340
+ }
2341
+ return [];
2342
+ }
2343
+ function totalInteractionsFromSlices(slices) {
2344
+ let total = 0;
2345
+ for (const s of slices) {
2346
+ const n = typeof s.interaction_count === "number" ? s.interaction_count : 0;
2347
+ total += n;
2348
+ }
2349
+ return total;
2350
+ }
2351
+ function totalsUnfilteredFromProjection(projection) {
2352
+ if (projection && typeof projection === "object" && !Array.isArray(projection)) {
2353
+ const t = projection.totals_unfiltered;
2354
+ if (t && typeof t === "object" && !Array.isArray(t)) {
2355
+ return t;
2356
+ }
2357
+ }
2358
+ return null;
2359
+ }
2360
+ function renderIterationSlice(slice) {
2361
+ const label = String(slice.iteration_label ?? slice.iteration_id ?? "?");
2362
+ const pCount = Number(slice.participant_count ?? 0);
2363
+ const iCount = Number(slice.interaction_count ?? 0);
2364
+ console.log(`\n ${c.bold}Iteration ${label}${c.reset} ${c.dim}${pCount} participant${pCount !== 1 ? "s" : ""} · ${iCount} interaction${iCount !== 1 ? "s" : ""}${c.reset}`);
2365
+ const hist = slice.sentiment ?? {};
2366
+ for (const line of asciiHistogram(hist, { indent: " " }))
2367
+ console.log(line);
2368
+ const top = Array.isArray(slice.top_actions) ? slice.top_actions : [];
2369
+ if (top.length > 0) {
2370
+ const parts = top.map((a) => `${a.action_type} ×${a.count}`);
2371
+ console.log(` ${c.dim}Top actions:${c.reset} ${parts.join(", ")}`);
2372
+ }
2373
+ const comments = Array.isArray(slice.sample_comments) ? slice.sample_comments : [];
2374
+ for (const ccomment of comments) {
2375
+ console.log(` ${c.dim}"${ccomment}"${c.reset}`);
2376
+ }
2377
+ }
2378
+ function renderFrameSlice(slice) {
2379
+ const label = slice.frame_label ? String(slice.frame_label) : String(slice.frame_id);
2380
+ const iCount = Number(slice.interaction_count ?? 0);
2381
+ const aliases = Array.isArray(slice.participant_aliases) ? slice.participant_aliases : [];
2382
+ console.log(`\n ${c.bold}${label}${c.reset} ${c.dim}${iCount} interaction${iCount !== 1 ? "s" : ""} · ${aliases.length} participant${aliases.length !== 1 ? "s" : ""}${c.reset}`);
2383
+ const hist = slice.sentiment_histogram ?? {};
2384
+ for (const line of asciiHistogram(hist, { indent: " " }))
2385
+ console.log(line);
2386
+ const comments = Array.isArray(slice.sample_comments) ? slice.sample_comments : [];
2387
+ for (const ccomment of comments) {
2388
+ console.log(` ${c.dim}"${ccomment}"${c.reset}`);
2389
+ }
2390
+ }
2391
+ function renderSegmentSlice(slice) {
2392
+ const idx = slice.segment_index;
2393
+ const label = slice.segment_label ? String(slice.segment_label) : null;
2394
+ const header = idx !== null && idx !== undefined
2395
+ ? `Segment ${idx}${label ? ` — ${label}` : ""}`
2396
+ : (label ?? "Segment ?");
2397
+ const iCount = Number(slice.interaction_count ?? 0);
2398
+ console.log(`\n ${c.bold}${header}${c.reset} ${c.dim}${iCount} interaction${iCount !== 1 ? "s" : ""}${c.reset}`);
2399
+ const hist = slice.sentiment_histogram ?? {};
2400
+ for (const line of asciiHistogram(hist, { indent: " " }))
2401
+ console.log(line);
2402
+ const engagement = slice.engagement_histogram ?? {};
2403
+ if (Object.keys(engagement).length > 0) {
2404
+ const parts = Object.entries(engagement).map(([k, v]) => `${v} ${k}`);
2405
+ console.log(` ${c.dim}Engagement:${c.reset} ${parts.join(", ")}`);
2406
+ }
2407
+ const comments = Array.isArray(slice.sample_comments) ? slice.sample_comments : [];
2408
+ for (const ccomment of comments) {
2409
+ console.log(` ${c.dim}"${ccomment}"${c.reset}`);
2410
+ }
2411
+ }
2412
+ function renderTurnSlice(slice) {
2413
+ const turn = Number(slice.turn_index ?? 0);
2414
+ const iCount = Number(slice.interaction_count ?? 0);
2415
+ const failures = Number(slice.failures ?? 0);
2416
+ const failPart = failures > 0 ? ` ${c.red}${failures} failure${failures !== 1 ? "s" : ""}${c.reset}` : "";
2417
+ console.log(`\n ${c.bold}Turn ${turn}${c.reset} ${c.dim}${iCount} interaction${iCount !== 1 ? "s" : ""}${c.reset}${failPart}`);
2418
+ const hist = slice.sentiment_histogram ?? {};
2419
+ for (const line of asciiHistogram(hist, { indent: " " }))
2420
+ console.log(line);
2421
+ const replies = Array.isArray(slice.sample_replies) ? slice.sample_replies : [];
2422
+ for (const r of replies) {
2423
+ console.log(` ${c.dim}"${r}"${c.reset}`);
2424
+ }
2425
+ }
2426
+ function renderAssignmentSlice(slice) {
2427
+ const name = String(slice.assignment_name ?? slice.assignment_id ?? "?");
2428
+ const iCount = Number(slice.interaction_count ?? 0);
2429
+ console.log(`\n ${c.bold}${name}${c.reset} ${c.dim}${iCount} interaction${iCount !== 1 ? "s" : ""}${c.reset}`);
2430
+ const hist = slice.sentiment_histogram ?? {};
2431
+ for (const line of asciiHistogram(hist, { indent: " " }))
2432
+ console.log(line);
2433
+ const sc = Array.isArray(slice.step_completion) ? slice.step_completion : [];
2434
+ if (sc.length > 0) {
2435
+ const rows = sc.map((s) => [
2436
+ String(s.name ?? s.step_id ?? "?"),
2437
+ String(s.passed ?? 0),
2438
+ String(s.inconclusive ?? 0),
2439
+ String(s.failed ?? 0),
2440
+ typeof s.rate === "number" ? s.rate.toFixed(2) : "-",
2441
+ ]);
2442
+ console.log(` ${c.dim}Steps:${c.reset}`);
2443
+ printTable(["STEP", "PASSED", "INCONCLUSIVE", "FAILED", "RATE"], rows);
2444
+ }
2445
+ }
2446
+ function renderStepSlice(slice) {
2447
+ const name = String(slice.step_name ?? slice.step_id ?? "?");
2448
+ const assignment = String(slice.assignment_name ?? "?");
2449
+ const total = Number(slice.total ?? 0);
2450
+ const passed = Number(slice.passed ?? 0);
2451
+ const inconclusive = Number(slice.inconclusive ?? 0);
2452
+ const failed = Number(slice.failed ?? 0);
2453
+ const rate = typeof slice.rate === "number" ? slice.rate.toFixed(2) : "-";
2454
+ const rateColor = failed > passed ? c.red : (passed > failed ? c.green : c.dim);
2455
+ console.log(`\n ${c.bold}${assignment} › ${name}${c.reset} ${rateColor}${passed}/${total} passed${c.reset} ${c.dim}(${inconclusive} inconclusive, ${failed} failed, rate ${rate})${c.reset}`);
2456
+ const verdicts = Array.isArray(slice.participant_verdicts)
2457
+ ? slice.participant_verdicts
2458
+ : [];
2459
+ if (verdicts.length > 0) {
2460
+ const rows = verdicts.map((v) => [
2461
+ String(v.participant_alias ?? "-"),
2462
+ String(v.verdict ?? "-"),
2463
+ v.reason ? truncate(String(v.reason), 60) : "-",
2464
+ ]);
2465
+ printTable(["PARTICIPANT", "VERDICT", "REASON"], rows);
2466
+ }
2467
+ }
2468
+ /**
2469
+ * Render a `--group-by <kind>` projection wrapped in the uniform
2470
+ * `SliceResponse` envelope (`{ axis, rows, totals_unfiltered,
2471
+ * modality_warnings, study_id, modality }`). JSON mode is a thin
2472
+ * pass-through to jsonOutput with `preProjected: true` so the lean
2473
+ * transform doesn't strip our stable empties. Human mode pulls slices
2474
+ * out of `rows` and renders one section per slice plus a small ASCII
2475
+ * sentiment histogram.
2476
+ */
2477
+ export function formatStudyResultsGroupBy(projection, kind, json) {
2478
+ if (json) {
2479
+ console.log(jsonOutput(projection, { preProjected: true }));
2480
+ return;
2481
+ }
2482
+ const slices = slicesFromProjection(projection);
2483
+ const totalInteractions = totalInteractionsFromSlices(slices);
2484
+ const unfiltered = totalsUnfilteredFromProjection(projection);
2485
+ const totalUnfiltered = unfiltered && typeof unfiltered.interaction_count === "number"
2486
+ ? unfiltered.interaction_count
2487
+ : null;
2488
+ const headline = `Sliced by ${kind}: ${slices.length} group${slices.length !== 1 ? "s" : ""} (${totalInteractions}${totalUnfiltered !== null ? `/${totalUnfiltered}` : ""} interaction${totalInteractions !== 1 ? "s" : ""})`;
2489
+ console.log(`${c.bold}${headline}${c.reset}`);
2490
+ if (slices.length === 0) {
2491
+ console.log(` ${c.dim}(no groups matched)${c.reset}`);
2492
+ return;
2493
+ }
2494
+ for (const slice of slices) {
2495
+ switch (kind) {
2496
+ case "iteration":
2497
+ renderIterationSlice(slice);
2498
+ break;
2499
+ case "frame":
2500
+ renderFrameSlice(slice);
2501
+ break;
2502
+ case "segment":
2503
+ renderSegmentSlice(slice);
2504
+ break;
2505
+ case "turn":
2506
+ renderTurnSlice(slice);
2507
+ break;
2508
+ case "assignment":
2509
+ renderAssignmentSlice(slice);
2510
+ break;
2511
+ case "step":
2512
+ renderStepSlice(slice);
2513
+ break;
2514
+ }
2515
+ }
2516
+ }
@@ -917,6 +917,77 @@ Rules to remember:
917
917
  See \`ish docs get-page concepts/extending-a-simulation\` for the full
918
918
  mental model (cancel + extend as a pair, error envelopes, cost model).
919
919
 
920
+ ## 12. Slice study results by frame / segment / turn / sentiment
921
+
922
+ Goal: ask narrower questions of a finished run than the kitchen-sink
923
+ \`ish study results\` envelope answers. The canonical use case:
924
+ **"what differed on the login screen across these five iterations?"**.
925
+
926
+ \`\`\`bash
927
+ # 12a. Across-iterations comparison on one frame (the canonical question).
928
+ # --frame matches frame names by case-insensitive substring; pass
929
+ # a full Frame UUID or an f-… alias when the name is ambiguous.
930
+ ish study results s-b2c --frame login --group-by iteration --json
931
+
932
+ # 12b. Frustrated reactions to one segment of a video study:
933
+ ish study results s-b2c --segment 3 --sentiment Frustrated
934
+
935
+ # 12c. Who failed the "verify email" step, and why?
936
+ # --group-by step exposes per-participant verdicts inline so you
937
+ # don't fan out across participants.
938
+ ish study results s-b2c --assignment "Sign up" --step verify-email \\
939
+ --group-by step --json
940
+
941
+ # 12d. Pair-mode chat: only side A turn 4.
942
+ ish study results s-b2c --side a --turn 4
943
+
944
+ # 12e. Sanity-check coverage when a filter narrows the slice:
945
+ ish study results s-b2c --frame checkout --json \\
946
+ | jq '{matched: .participant_count, total: .totals_unfiltered.participant_count}'
947
+
948
+ # 12f. A filter that matches zero interactions still returns the stable
949
+ # envelope shape — participant_count: 0, totals_unfiltered populated,
950
+ # exit code 0 (not 4). Never error on no-match.
951
+ ish study results s-b2c --frame doesnotexist --json
952
+ # → ValidationError because "doesnotexist" matches no frame names; pass
953
+ # --include-unmatched only when --frame DID resolve and you want the
954
+ # degraded captures (frame_version_id: null) back.
955
+ \`\`\`
956
+
957
+ Every \`--group-by <axis>\` call returns the same envelope:
958
+ \`{axis, rows, totals_unfiltered, modality_warnings, study_id, modality}\`.
959
+ The \`rows\` array holds axis-specific slice objects. The envelope is
960
+ uniform across all six axes — agents can code one shape and key on
961
+ \`axis\` / \`modality\` to dispatch on what's inside \`rows\`.
962
+
963
+ Rules to remember:
964
+ - **Filters compose with AND across flags; OR within \`--sentiment\`.**
965
+ \`--frame login --sentiment Frustrated,Confused\` keeps only login-frame
966
+ interactions whose sentiment is Frustrated OR Confused.
967
+ - **Modality mismatch is not an error.** \`--segment 0\` on an
968
+ interactive study emits a stderr warning and is ignored. The
969
+ exception is **\`--group-by\`** — \`--group-by frame\` on a chat study,
970
+ \`--group-by turn\` on a video study, etc. error at the router (exit 2).
971
+ - **Empty-slice contract: exit 0, not 4.** Zero matches return a
972
+ stable envelope with \`participant_count: 0\` and
973
+ \`totals_unfiltered\` populated. Agents key on
974
+ \`totals_unfiltered.participant_count\` to ask "is the filter too
975
+ tight, or did the run not produce data?".
976
+ - \`--frame\` accepts a name substring, a Frame UUID, an \`f-…\` alias,
977
+ or a \`frame_version_id\` UUID. Ambiguous substring (matches >1
978
+ frame) errors with the candidate list.
979
+ - \`--summary\` is orthogonal to filters and narrows the summary over
980
+ the filtered set. \`--transcript\` is single-participant and errors
981
+ (exit 2) when **any** filter or \`--group-by\` is set.
982
+ - Per-step output exposes \`participant_verdicts: [{participant_alias,
983
+ verdict, reason, evidence_interaction_ids}]\` on **each row of
984
+ \`rows[]\`** (one per \`(assignment, step)\` pair) — not
985
+ \`per_participant_verdicts\`. The verdict enum is \`passed\` /
986
+ \`inconclusive\` / \`failed\`.
987
+
988
+ See \`ish docs get-page guides/slicing-results\` for the full filter
989
+ table, projection shapes, and the defensive null-handling rules.
990
+
920
991
  ## Tips for chaining commands as an agent
921
992
 
922
993
  - Capture aliases from JSON: \`ITER=$(ish iteration create --url … --json | jq -r .alias)\`
@@ -1010,6 +1081,11 @@ mental model (cancel + extend as a pair, error envelopes, cost model).
1010
1081
  | List of participants from \`study run\` | \`--json \\| jq '.participants[].id'\` | \`--get participant_aliases\` (or \`participant_ids\` for UUIDs) |
1011
1082
  | Per-answer sentiment | \`--json \\| jq '...'\` per participant | \`ish study results <id> --json\` (sentiment is on every answer row) |
1012
1083
  | "Did this run land?" headline | \`study results --json\` + jq filtering | \`ish study results <id> --summary --json\` |
1084
+ | Across-iterations comparison on one frame | \`study results --json\` + jq per iteration | \`ish study results <id> --frame login --group-by iteration --json\` |
1085
+ | Per-step pass/fail with reasons inline | \`study participant --json\` per participant + jq | \`ish study results <id> --step verify-email --group-by step --json\` |
1086
+ | Frustrated reactions to one media segment | \`study results --json\` + jq | \`ish study results <id> --segment 3 --sentiment Frustrated --json\` |
1087
+ | Sanity-check filter coverage | hand-count \`.participants\` vs total | \`--get totals_unfiltered.participant_count\` (set on every sliced envelope) |
1088
+ | Know the sliced-results envelope shape | guess per axis | \`{axis, rows[], totals_unfiltered, modality_warnings, study_id, modality}\` — every \`--group-by\` axis |
1013
1089
  | Chat transcript for one participant (external_chatbot) | \`study participant --json\` + jq | \`ish study results <id> --transcript <participant_id> --json\` |
1014
1090
  | Pair-mode conversation transcripts | \`study participant --json\` per participant | \`ish iteration get <iter-id> --json \\| jq '.conversations[]'\` |
1015
1091
  | Participant headline only (no action timeline) | \`study participant --json\` + jq | \`ish study participant <id> --summary --json\` |
@@ -6,6 +6,19 @@
6
6
  * (person, interactions[], participant_summary, interview_answers, …) that
7
7
  * used to be embedded under `study.iterations[*].participants[*]` on the
8
8
  * legacy `GET /studies/{id}` response.
9
+ *
10
+ * Audit (study-results-slice plan, T4): the flat endpoint already returns
11
+ * everything the new `ish study results --frame/--segment/--step/...` filter
12
+ * pipeline needs in a single round-trip — no per-participant fan-out:
13
+ * - `interactions[]` (modality-discriminated via `ParticipantWithAttributesPublicResponse`)
14
+ * - `participant_assignments[].step_results[]` with `{step_id, name,
15
+ * description, verdict, reason, evidence_interaction_ids[]}`, hydrated
16
+ * by `attach_participant_step_results_flat` in the study repository
17
+ * before serialisation (`ish-backend/app/api/study/repository.py:315`)
18
+ * - `participant_summary`, `interview_answers`
19
+ * If a future filter ever needs `conversation_id` on each interaction (for
20
+ * `--group-by conversation`), that's a backend-side addition on
21
+ * `_InteractionResponseBase`, not a CLI change.
9
22
  */
10
23
  import type { ApiClient } from "./api-client.js";
11
24
  import type { Participant } from "./types.js";
@@ -6,6 +6,19 @@
6
6
  * (person, interactions[], participant_summary, interview_answers, …) that
7
7
  * used to be embedded under `study.iterations[*].participants[*]` on the
8
8
  * legacy `GET /studies/{id}` response.
9
+ *
10
+ * Audit (study-results-slice plan, T4): the flat endpoint already returns
11
+ * everything the new `ish study results --frame/--segment/--step/...` filter
12
+ * pipeline needs in a single round-trip — no per-participant fan-out:
13
+ * - `interactions[]` (modality-discriminated via `ParticipantWithAttributesPublicResponse`)
14
+ * - `participant_assignments[].step_results[]` with `{step_id, name,
15
+ * description, verdict, reason, evidence_interaction_ids[]}`, hydrated
16
+ * by `attach_participant_step_results_flat` in the study repository
17
+ * before serialisation (`ish-backend/app/api/study/repository.py:315`)
18
+ * - `participant_summary`, `interview_answers`
19
+ * If a future filter ever needs `conversation_id` on each interaction (for
20
+ * `--group-by conversation`), that's a backend-side addition on
21
+ * `_InteractionResponseBase`, not a CLI change.
9
22
  */
10
23
  export async function fetchStudyParticipants(client, studyId, opts) {
11
24
  return await client.get(`/studies/${studyId}/participants`, undefined, opts);
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Pure filter pipeline for `ish study results`.
3
+ *
4
+ * Input : the raw `GET /studies/{id}` payload, the raw
5
+ * `GET /studies/{id}/participants` payload, the raw
6
+ * `GET /studies/{id}/frames` payload (or [] when --frame wasn't
7
+ * passed), and a `ResultsFilters` struct from the command surface.
8
+ * Output : a `FilteredResults` struct — the trimmed participant graph,
9
+ * pre-filter counts on `totals_unfiltered`, and a `warnings[]`
10
+ * list of modality-mismatch notes for the surface to surface on
11
+ * stderr.
12
+ *
13
+ * Has no IO and no console side-effects — the caller (study results action)
14
+ * owns network calls and stderr; we just compute. That keeps the function
15
+ * trivially unit-testable and lets the projection builders (T3) consume the
16
+ * same shape without re-walking the graph.
17
+ *
18
+ * Defensive null handling is the load-bearing piece. See the plan's
19
+ * "Defensive handling of nullable fields" section — read it before editing
20
+ * any predicate.
21
+ */
22
+ export interface ResultsFilters {
23
+ /** Frame name (case-insensitive substring), Frame UUID, frame alias `f-...`,
24
+ * or a `frame_version_id` UUID. Resolved against the study's frames list. */
25
+ frame?: string;
26
+ /** Segment index (parseable int) OR a substring matched against
27
+ * `actions[0].data.segment_label` on each interaction. */
28
+ segment?: string;
29
+ /** Chat turn index — matched against `actions[0].data.turn_index`. */
30
+ turn?: number;
31
+ /** participant_pair side — matched against the parent assignment's `side`. */
32
+ side?: "a" | "b";
33
+ /** Assignment UUID, OR a substring matched against
34
+ * `study.assignments[].name`. */
35
+ assignment?: string;
36
+ /** Step id OR a case-insensitive substring against step `name`. Walks
37
+ * `participant_assignments[].step_results[]`. */
38
+ step?: string;
39
+ /** Comma-or-repeat list of sentiment labels (case-insensitive). */
40
+ sentiment?: string[];
41
+ /** Actor field — case-insensitive match against `interaction.actor`. */
42
+ actor?: "ai" | "human" | "user";
43
+ /** Iteration UUID or `label`. */
44
+ iteration?: string;
45
+ /** Participant UUID or alias (`pt-...`). */
46
+ participant?: string;
47
+ /** When --frame is set, keep interactions with null frame_version_id
48
+ * under a synthetic `_unmatched` bucket instead of dropping them. */
49
+ includeUnmatched?: boolean;
50
+ /** Pair with --step: also drop interactions whose id is not in any
51
+ * surviving `step_results[].evidence_interaction_ids[]`. */
52
+ includeEvidence?: boolean;
53
+ }
54
+ export interface FilteredResults {
55
+ /** Shallow copy of the study payload — same shape as the raw response.
56
+ * Participants are NOT embedded here; they're carried alongside on
57
+ * `participants`. */
58
+ study: Record<string, unknown>;
59
+ /** Participants whose interactions[] survived the predicate walk.
60
+ * Empty participants are dropped only when an interaction-level filter
61
+ * was set (preserves the stable schema when the caller just asked
62
+ * "who ran?" without slicing). */
63
+ participants: Record<string, unknown>[];
64
+ /** The frame list returned by the surface, with each frame's
65
+ * `frame_version_ids[]` flattened onto the row for downstream
66
+ * enrichment. Empty when --frame wasn't passed or the modality isn't
67
+ * interactive. */
68
+ frames: Record<string, unknown>[];
69
+ /** Pre-filter participant + interaction counts, so callers can see
70
+ * "matched X / Y". */
71
+ totals_unfiltered: {
72
+ participant_count: number;
73
+ interaction_count: number;
74
+ };
75
+ /** Modality-mismatch notes (e.g. "--segment ignored on interactive").
76
+ * The surface emits these on stderr. */
77
+ warnings: string[];
78
+ /** When --frame was set, the resolved set of frame_version_ids that
79
+ * passed. Used by the projection builders (T3) to enrich surviving
80
+ * interactions with frame_id / frame_label without re-resolving. */
81
+ matchedFrameVersionIds: Set<string>;
82
+ /** Maps frame_version_id → {frame_id, frame_label} for enrichment. */
83
+ frameVersionLookup: Map<string, {
84
+ frame_id: string;
85
+ frame_label: string | null;
86
+ }>;
87
+ }
88
+ /**
89
+ * Pure entry point. See file-level comment for input/output contract.
90
+ */
91
+ export declare function applyResultsFilters(study: Record<string, unknown>, participants: Record<string, unknown>[], rawFrames: Record<string, unknown>[], filters: ResultsFilters): FilteredResults;