@ishlabs/cli 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,10 +3,12 @@
3
3
  */
4
4
  import { readFileSync } from "node:fs";
5
5
  import { Option } from "commander";
6
- import { withClient, getWebUrl, terminalLink, resolveWorkspace, confirmDestructive, readFileOrStdin } from "../lib/command-helpers.js";
6
+ import { withClient, getWebUrl, terminalLink, resolveWorkspace, confirmDestructive, readFileOrStdin, collectIds } from "../lib/command-helpers.js";
7
7
  import { resolveId, tagAlias, ALIAS_PREFIX } from "../lib/alias-store.js";
8
8
  import { loadConfig, saveConfig } from "../config.js";
9
- import { formatStudyList, formatStudyDetail, formatStudyResults, buildStudyResultsSummary, buildChatTranscript, output, ValidationError, } from "../lib/output.js";
9
+ import { formatStudyList, formatStudyDetail, formatStudyResults, buildStudyResultsEnvelope, buildStudyResultsSummary, buildChatTranscript, formatStudyResultsGroupBy, output, ValidationError, } from "../lib/output.js";
10
+ import { applyResultsFilters } from "../lib/study-results-filters.js";
11
+ import { buildStudyResultsPerIteration, buildStudyResultsPerFrame, buildStudyResultsPerSegment, buildStudyResultsPerTurn, buildStudyResultsPerAssignment, buildStudyResultsPerStep, } from "../lib/study-results-projections.js";
10
12
  import { VALID_CONTENT_TYPES } from "../lib/types.js";
11
13
  import { fetchStudyParticipants } from "../lib/study-participants.js";
12
14
  import { parseAssignment, loadAssignmentsFile, validateAssignmentsArray, parseQuestion } from "../lib/study-inputs.js";
@@ -669,22 +671,41 @@ list table layout in human mode.`)
669
671
  });
670
672
  study
671
673
  .command("results")
672
- .description("View aggregated results: participant counts, sentiment, interview answers. Returns a stable envelope with empty fields when no runs have completed.")
674
+ .description("View aggregated results: participant counts, sentiment, interview answers. Returns a stable envelope with empty fields when no runs have completed. Slice with filter flags (--frame, --segment, --turn, --side, --assignment, --step, --sentiment, --actor, --iteration, --participant) or project with --group-by (iteration|frame|segment|turn|assignment|step).")
673
675
  .argument("<id>", "Study ID")
674
676
  .option("--workspace <id>", "Workspace ID; accepted for consistency (workspace is inferred from the study)")
675
- .option("--summary", "Lean summary projection: counts + sentiment + per-participant {alias, status, sentiment, comment}. Drops interview_answers + per-interaction breakdowns.")
677
+ .option("--summary", "Lean summary projection: counts + sentiment + per-participant {alias, status, sentiment, comment}. Drops interview_answers + per-interaction breakdowns. Composes with filters: `--summary --frame login` narrows the summary to the login-screen interactions.")
676
678
  // PC-N4: agents reach for `--summarize` (verb) by analogy with the MCP
677
679
  // `summarize` action; accept it as a hidden alias of --summary so the
678
680
  // canonical flag stays the documented one but the muscle-memory variant
679
681
  // works without a round-trip.
680
682
  .addOption(new Option("--summarize", "Hidden alias for --summary").hideHelp())
681
- .option("--transcript <participant_id>", "Chat transcript projection for one participant: flat role/text/turn-index array (chat-modality only). Mirrors the MCP `get_chat_transcript` shape.")
683
+ .option("--transcript <participant_id>", "Chat transcript projection for one participant: flat role/text/turn-index array (chat-modality only). Mirrors the MCP `get_chat_transcript` shape. Cannot combine with filters or --group-by (transcript is a single-participant projection).")
684
+ // --- Slice / projection flags (T5) ---
685
+ .option("--frame <ref>", "Filter to interactions whose Frame name contains <ref> (case-insensitive), or whose Frame UUID / `f-…` alias / frame_version_id matches. Interactive only — warned and ignored on other modalities.")
686
+ .option("--segment <ref>", "Filter media studies (video/audio/text/document) by segment index (integer) or segment label (substring). Image and other modalities: warned and ignored.")
687
+ .option("--turn <n>", "Filter chat interactions to a single `actions[0].data.turn_index`. Non-chat modalities: warned and ignored.")
688
+ .option("--side <a|b>", "Filter participant_pair chat interactions by assignment side. Other modalities: warned and ignored.")
689
+ .option("--assignment <ref>", "Filter to a single assignment by UUID or name (substring, case-insensitive).")
690
+ .option("--step <ref>", "Filter `participant_assignments[].step_results[]` to a single step by step-id or name (substring). Pair with --include-evidence to also drop non-evidence interactions.")
691
+ .option("--sentiment <labels>", "Filter to interactions whose sentiment.label is in the comma-separated list (case-insensitive; repeatable). Drops null-sentiment rows.", collectIds, [])
692
+ .option("--actor <actor>", "Filter to interactions whose actor is `ai`, `human`, or `user` (case-insensitive).")
693
+ .option("--iteration <ref>", "Restrict to a single iteration by UUID or label.")
694
+ .option("--participant <ref>", "Restrict to a single participant by UUID or `pt-…` alias.")
695
+ .option("--include-unmatched", "When --frame is set, keep interactions with null frame_version_id under a synthetic `_unmatched` bucket instead of dropping them.")
696
+ .option("--include-evidence", "When --step is set, also drop interactions not listed in any surviving step_results[].evidence_interaction_ids[].")
697
+ .option("--group-by <axis>", "Project results into per-axis slices: iteration | frame | segment | turn | assignment | step. Mutually exclusive with --summary and --transcript.")
682
698
  .addHelpText("after", `
683
699
  Examples:
684
700
  $ ish study results <id>
685
701
  $ ish study results <id> --json
686
702
  $ ish study results <id> --summary --json
687
703
  $ ish study results <id> --transcript pt-d4e --json
704
+ # Slice (filters compose: AND across flags, OR within --sentiment)
705
+ $ ish study results <id> --frame login --group-by iteration
706
+ $ ish study results <id> --segment 3 --sentiment Frustrated
707
+ $ ish study results <id> --assignment "Sign up" --step verify-email --group-by step
708
+ $ ish study results <id> --side a --turn 4
688
709
 
689
710
  Default --json envelope (M10: per-answer sentiment now included):
690
711
  {
@@ -707,6 +728,11 @@ Default --json envelope (M10: per-answer sentiment now included):
707
728
  ]
708
729
  }
709
730
 
731
+ When any filter flag is passed, the envelope gains a \`totals_unfiltered\` field
732
+ ({ participant_count, interaction_count }) so callers can sanity-check coverage
733
+ ("matched 12 / 80 participants"). A zero-match filter returns the stable
734
+ envelope with participant_count=0 and exit code 0 (not 4).
735
+
710
736
  --summary projection (M2-friction-7: drops the interview_answers payload):
711
737
  { study, participant_count, completed_count, failed_count, sentiment, participants: [...] }
712
738
 
@@ -723,6 +749,24 @@ Default --json envelope (M10: per-answer sentiment now included):
723
749
  "participant_summary": { "comment": "...", "sentiment": {...} }
724
750
  }
725
751
 
752
+ --group-by iteration projection:
753
+ { study, slices: [{ iteration_id, iteration_label, participant_count, interaction_count, sentiment, sample_comments, top_actions }, ...], totals_unfiltered, warnings }
754
+
755
+ --group-by frame projection (interactive only):
756
+ [{ frame_id, frame_label, interaction_count, sentiment_histogram, sample_comments, participant_aliases }, ...]
757
+
758
+ --group-by segment projection (video/audio/text/document):
759
+ [{ segment_index, segment_label, interaction_count, sentiment_histogram, engagement_histogram, sample_comments }, ...]
760
+
761
+ --group-by turn projection (chat only):
762
+ [{ turn_index, interaction_count, sentiment_histogram, sample_replies, failures }, ...]
763
+
764
+ --group-by assignment projection:
765
+ [{ assignment_id, assignment_name, interaction_count, sentiment_histogram, step_completion }, ...]
766
+
767
+ --group-by step projection:
768
+ [{ assignment_id, assignment_name, step_id, step_name, total, passed, inconclusive, failed, rate, participant_verdicts: [{ participant_alias, verdict, reason, evidence_interaction_ids }, ...] }, ...]
769
+
726
770
  Tips:
727
771
  Use \`--get <path>\` for a single value (e.g. \`--get participant_count\`),
728
772
  \`--fields a,b,c\` to project the JSON output further.
@@ -741,6 +785,7 @@ Common --get paths (default envelope):
741
785
  --get interview_answers # full per-question payload
742
786
  --get interview_answers.0.question # text of the first question
743
787
  --get interview_answers.0.answers.0.answer # first answer to the first question
788
+ --get totals_unfiltered.participant_count # pre-filter participant count (when slicing)
744
789
 
745
790
  Common --get paths (--transcript <participant_id> envelope):
746
791
  --get transcript # full role/text/turn array
@@ -749,6 +794,18 @@ Common --get paths (--transcript <participant_id> envelope):
749
794
  --get participant_summary.sentiment # aggregate sentiment map
750
795
  --get unique_bot_replies # bot-side message count
751
796
 
797
+ Common --get paths (--group-by projections):
798
+ --get slices.iteration_label # per-iteration: one label per line
799
+ --get slices.0.participant_count # per-iteration: first slice's count
800
+ --get 0.frame_label # per-frame: first frame's label
801
+ --get 0.sentiment_histogram # per-frame/segment/turn: first slice's sentiment map
802
+ --get 0.segment_index # per-segment: first segment's index
803
+ --get 0.turn_index # per-turn: first turn's index
804
+ --get 0.assignment_name # per-assignment/step: first slice's assignment
805
+ --get 0.step_name # per-step: first slice's step
806
+ --get 0.rate # per-step: first step's pass-rate
807
+ --get 0.participant_verdicts.verdict # per-step: verdict per participant
808
+
752
809
  When no runs have completed, the default envelope is returned with zero counts and empty arrays.`)
753
810
  .action(async (id, opts, cmd) => {
754
811
  await withClient(cmd, async (client, globals) => {
@@ -756,10 +813,76 @@ When no runs have completed, the default envelope is returned with zero counts a
756
813
  // into a single boolean before validation so the rest of the
757
814
  // handler reads only `summary`.
758
815
  const wantsSummary = !!(opts.summary || opts.summarize);
816
+ // T5: detect whether any filter flag was passed. Interaction-level
817
+ // and participant-level flags both count — they all narrow the
818
+ // result set. `--include-unmatched`/`--include-evidence` are
819
+ // modifiers that only make sense alongside --frame/--step but
820
+ // count as "filter intent" for the transcript/conflict check.
821
+ const hasFilter = opts.frame !== undefined ||
822
+ opts.segment !== undefined ||
823
+ opts.turn !== undefined ||
824
+ opts.side !== undefined ||
825
+ opts.assignment !== undefined ||
826
+ opts.step !== undefined ||
827
+ (opts.sentiment !== undefined && opts.sentiment.length > 0) ||
828
+ opts.actor !== undefined ||
829
+ opts.iteration !== undefined ||
830
+ opts.participant !== undefined ||
831
+ opts.includeUnmatched === true ||
832
+ opts.includeEvidence === true;
833
+ const hasGroupBy = opts.groupBy !== undefined;
834
+ // --- Conflict validation (no IO yet) ---
759
835
  if (wantsSummary && opts.transcript) {
760
836
  throw new ValidationError("Pass only one of: --summary, --transcript.", ["--summary", "--transcript"]);
761
837
  }
838
+ if (opts.transcript && (hasFilter || hasGroupBy)) {
839
+ // --transcript is a single-participant chat projection — slicing
840
+ // doesn't make sense.
841
+ throw new ValidationError("--transcript is a single-participant projection; cannot combine with filter flags or --group-by.", ["--transcript"]);
842
+ }
843
+ if (wantsSummary && hasGroupBy) {
844
+ throw new ValidationError("Pass only one of: --summary, --group-by.", ["--summary", "--group-by"]);
845
+ }
846
+ // --side validation: must be exactly "a" or "b" (case-insensitive).
847
+ const sideNormalised = opts.side ? opts.side.toLowerCase() : undefined;
848
+ if (sideNormalised !== undefined && sideNormalised !== "a" && sideNormalised !== "b") {
849
+ throw new ValidationError(`--side must be "a" or "b", got "${opts.side}".`, ["a", "b"]);
850
+ }
851
+ // --actor validation: must be one of ai|human|user (case-insensitive).
852
+ const actorNormalised = opts.actor ? opts.actor.toLowerCase() : undefined;
853
+ if (actorNormalised !== undefined &&
854
+ actorNormalised !== "ai" &&
855
+ actorNormalised !== "human" &&
856
+ actorNormalised !== "user") {
857
+ throw new ValidationError(`--actor must be "ai", "human", or "user", got "${opts.actor}".`, ["ai", "human", "user"]);
858
+ }
859
+ // --turn validation: must parse as a non-negative integer.
860
+ let turnNum;
861
+ if (opts.turn !== undefined) {
862
+ const n = parseInt(opts.turn, 10);
863
+ if (Number.isNaN(n) || n < 0 || String(n) !== opts.turn.trim()) {
864
+ throw new ValidationError(`--turn must be a non-negative integer, got "${opts.turn}".`, []);
865
+ }
866
+ turnNum = n;
867
+ }
868
+ // --group-by axis whitelist.
869
+ const VALID_GROUP_BY = [
870
+ "iteration",
871
+ "frame",
872
+ "segment",
873
+ "turn",
874
+ "assignment",
875
+ "step",
876
+ ];
877
+ let groupByKind;
878
+ if (opts.groupBy !== undefined) {
879
+ if (!VALID_GROUP_BY.includes(opts.groupBy)) {
880
+ throw new ValidationError(`--group-by must be one of: ${VALID_GROUP_BY.join(", ")}. Got "${opts.groupBy}".`, VALID_GROUP_BY);
881
+ }
882
+ groupByKind = opts.groupBy;
883
+ }
762
884
  const rid = resolveId(id);
885
+ // --- --transcript fast path (no fetch of study payload) ---
763
886
  if (opts.transcript) {
764
887
  // --transcript <participant_id>: bypass the study aggregator; fetch
765
888
  // the named participant directly. Cheaper (one GET, no nested
@@ -769,20 +892,134 @@ When no runs have completed, the default envelope is returned with zero counts a
769
892
  output(buildChatTranscript(participant), globals.json, { preProjected: true });
770
893
  return;
771
894
  }
772
- const [data, participants] = await Promise.all([
895
+ // --- Default-fast path: no filter, no group-by ---
896
+ if (!hasFilter && !hasGroupBy) {
897
+ const [data, participants] = await Promise.all([
898
+ client.get(`/studies/${rid}`),
899
+ fetchStudyParticipants(client, rid),
900
+ ]);
901
+ if (wantsSummary) {
902
+ output(buildStudyResultsSummary(data, participants), globals.json, { preProjected: true });
903
+ }
904
+ else {
905
+ formatStudyResults(data, participants, globals.json);
906
+ }
907
+ if (!globals.json && data.product_id) {
908
+ const url = getWebUrl(globals, `/${data.product_id}/${rid}/overview`);
909
+ console.error(`\n ${terminalLink(url, "Open in browser ↗")}\n`);
910
+ }
911
+ return;
912
+ }
913
+ // --- Slice / projection path: fetch in parallel, then filter+project ---
914
+ //
915
+ // Modality gating for --group-by happens AFTER the study fetch
916
+ // (we need study.modality), but BEFORE any further work — see the
917
+ // post-fetch validation block below. Pre-fetch validation above is
918
+ // limited to checks that don't need wire data.
919
+ const fetchFrames = opts.frame !== undefined;
920
+ const [study, participants, framesPayload] = await Promise.all([
773
921
  client.get(`/studies/${rid}`),
774
922
  fetchStudyParticipants(client, rid),
923
+ fetchFrames
924
+ ? client.get(`/studies/${rid}/frames`)
925
+ : Promise.resolve([]),
775
926
  ]);
776
- if (wantsSummary) {
777
- output(buildStudyResultsSummary(data, participants), globals.json, { preProjected: true });
927
+ const studyRec = study;
928
+ const modality = typeof studyRec.modality === "string" ? studyRec.modality : "unknown";
929
+ // Modality gating for --group-by — router-level, NOT projection-level
930
+ // (devon's T7 note: projection builders are intentionally
931
+ // modality-agnostic and bucket non-matching rows into `_unmatched`;
932
+ // the surface is responsible for refusing nonsensical axes up front).
933
+ if (groupByKind === "frame" && modality !== "interactive") {
934
+ throw new ValidationError(`--group-by frame requires modality=interactive; this study is "${modality}".`, ["interactive"]);
778
935
  }
779
- else {
780
- formatStudyResults(data, participants, globals.json);
936
+ const SEGMENT_MODALITIES = ["video", "audio", "text", "document"];
937
+ if (groupByKind === "segment" && !SEGMENT_MODALITIES.includes(modality)) {
938
+ throw new ValidationError(`--group-by segment requires modality ∈ {${SEGMENT_MODALITIES.join(", ")}}; this study is "${modality}".`, SEGMENT_MODALITIES);
781
939
  }
782
- if (!globals.json && data.product_id) {
783
- const url = getWebUrl(globals, `/${data.product_id}/${rid}/overview`);
784
- console.error(`\n ${terminalLink(url, "Open in browser ↗")}\n`);
940
+ if (groupByKind === "turn" && modality !== "chat") {
941
+ throw new ValidationError(`--group-by turn requires modality=chat; this study is "${modality}".`, ["chat"]);
785
942
  }
943
+ // Coerce the frames payload to a plain array of records (the API
944
+ // returns a bare array). Tolerate `{items: [...]}` shape in case the
945
+ // endpoint ever normalises.
946
+ const rawFrames = Array.isArray(framesPayload)
947
+ ? framesPayload
948
+ : Array.isArray(framesPayload?.items)
949
+ ? (framesPayload.items)
950
+ : [];
951
+ const filters = {
952
+ frame: opts.frame,
953
+ segment: opts.segment,
954
+ turn: turnNum,
955
+ side: sideNormalised,
956
+ assignment: opts.assignment,
957
+ step: opts.step,
958
+ sentiment: opts.sentiment && opts.sentiment.length > 0 ? opts.sentiment : undefined,
959
+ actor: actorNormalised,
960
+ iteration: opts.iteration,
961
+ participant: opts.participant,
962
+ includeUnmatched: opts.includeUnmatched === true ? true : undefined,
963
+ includeEvidence: opts.includeEvidence === true ? true : undefined,
964
+ };
965
+ const filtered = applyResultsFilters(studyRec, participants, rawFrames, filters);
966
+ // Surface modality-mismatch warnings (and any other diagnostics from
967
+ // applyResultsFilters) on stderr so JSON output stays clean. The
968
+ // filter pipeline downgrades mismatched flags to no-ops; the warnings
969
+ // tell the agent which flags were ignored and why.
970
+ if (filtered.warnings.length > 0 && !globals.quiet) {
971
+ for (const w of filtered.warnings) {
972
+ console.error(`warning: ${w}`);
973
+ }
974
+ }
975
+ // --- Dispatch: --group-by projection > --summary on filtered > filtered envelope ---
976
+ if (groupByKind !== undefined) {
977
+ let projection;
978
+ switch (groupByKind) {
979
+ case "iteration":
980
+ projection = buildStudyResultsPerIteration(filtered);
981
+ break;
982
+ case "frame":
983
+ projection = buildStudyResultsPerFrame(filtered);
984
+ break;
985
+ case "segment":
986
+ projection = buildStudyResultsPerSegment(filtered);
987
+ break;
988
+ case "turn":
989
+ projection = buildStudyResultsPerTurn(filtered);
990
+ break;
991
+ case "assignment":
992
+ projection = buildStudyResultsPerAssignment(filtered);
993
+ break;
994
+ case "step":
995
+ projection = buildStudyResultsPerStep(filtered);
996
+ break;
997
+ }
998
+ formatStudyResultsGroupBy(projection, groupByKind, globals.json);
999
+ return;
1000
+ }
1001
+ if (wantsSummary) {
1002
+ // --summary on filtered participants: narrowed summary projection.
1003
+ // Attach totals_unfiltered so callers can still see the pre-filter
1004
+ // denominator (e.g. "12 / 80 participants matched").
1005
+ const summary = buildStudyResultsSummary(filtered.study, filtered.participants);
1006
+ const summaryOut = {
1007
+ ...summary,
1008
+ totals_unfiltered: filtered.totals_unfiltered,
1009
+ };
1010
+ output(summaryOut, globals.json, { preProjected: true });
1011
+ return;
1012
+ }
1013
+ // Default (no --group-by, no --summary) but filters set: stable
1014
+ // envelope on the filtered participants + totals_unfiltered. Empty
1015
+ // slice contract: zero matches yields participant_count=0 and exit
1016
+ // 0, never a 4/not-found.
1017
+ const envelope = buildStudyResultsEnvelope(filtered.study, filtered.participants);
1018
+ const envelopeOut = {
1019
+ ...envelope,
1020
+ totals_unfiltered: filtered.totals_unfiltered,
1021
+ };
1022
+ output(envelopeOut, globals.json, { preProjected: true });
786
1023
  });
787
1024
  });
788
1025
  study
@@ -19,6 +19,7 @@ export declare const ALIAS_PREFIX: {
19
19
  readonly askRound: "r";
20
20
  readonly chatEndpoint: "ep";
21
21
  readonly chatConfig: "cc";
22
+ readonly frame: "f";
22
23
  };
23
24
  /**
24
25
  * Save aliases for a list of IDs under the given prefix.
@@ -22,6 +22,7 @@ export const ALIAS_PREFIX = {
22
22
  askRound: "r",
23
23
  chatEndpoint: "ep",
24
24
  chatConfig: "cc",
25
+ frame: "f",
25
26
  };
26
27
  /** Format a number with zero-padding (minimum 2 digits). */
27
28
  function padNum(n) {
@@ -133,6 +134,7 @@ const HYDRATE_HINT = {
133
134
  a: "ish ask list",
134
135
  r: "ish ask get <ask-id>",
135
136
  ep: "ish chat endpoint list",
137
+ f: "ish study results <study-id> --frame <name> # frames are discovered via the study's frames endpoint",
136
138
  // Legacy two-letter prefixes the deterministic generator may have
137
139
  // produced before; defaults below cover anything else.
138
140
  };
package/dist/lib/docs.js CHANGED
@@ -315,6 +315,8 @@ pick was wrong.
315
315
  - \`concepts/assignment\` — task definition syntax.
316
316
  - \`concepts/questionnaire\` — question types and timing.
317
317
  - \`concepts/run-verbs\` — when to use \`study run\` vs \`ask run\`.
318
+ - \`guides/slicing-results\` — filter / project \`study results\` by frame,
319
+ segment, turn, sentiment, assignment, step.
318
320
  - \`reference/billing-limits\` — \`maxStudiesPerProduct\` cap on study creation.
319
321
  - \`reference/credits\` — per-run credit cost & how to preview before dispatch.
320
322
  `;
@@ -851,6 +853,9 @@ ride along when present in the JSON forms.
851
853
 
852
854
  - \`concepts/study\` — assignments are immutable to the run; questionnaire is too.
853
855
  - \`concepts/questionnaire\` — the other half of the study definition.
856
+ - \`guides/slicing-results\` — slice the post-run envelope by step
857
+ (\`--step verify-email --group-by step\`), surface per-participant verdicts
858
+ inline, or restrict to the evidence interactions with \`--include-evidence\`.
854
859
  - \`reference/json-mode\` — how \`step_completion\` renders in lean vs --verbose.
855
860
  `;
856
861
  const CONCEPT_QUESTIONNAIRE = `# concept: questionnaire
@@ -1127,7 +1132,7 @@ deleted ask was the active one.
1127
1132
  - \`concepts/round\` — what a round is and how it executes.
1128
1133
  - \`concepts/people\` — how participants are chosen at ask creation.
1129
1134
  - \`concepts/run-verbs\` — \`ish ask run\` vs \`ish study run\`.
1130
- - \`reference/credits\` — ask rounds bill \`n_participants * (1 + len(questions))\` credits per round; \`questions\` follow-ups bill *per participant* on top of the base response, so a 3-person panel with 2 follow-up questions costs \`3 * (1 + 2) = 9\` credits when all complete (not 3).
1135
+ - \`reference/credits\` — ask rounds bill **one credit per successful participant per round**, regardless of how many \`questions\` were included. The backend's asks worker bills \`amount=succeeded\` once per round dispatch; questions and round-summary synthesis don't trigger separate debits. A 3-person panel with 2 follow-up questions costs \`3\` credits when all complete, the same as a no-questions run. Failed participant responses (pre-flight errors, refusals) don't bill.
1131
1136
  `;
1132
1137
  const CONCEPT_ROUND = `# concept: round
1133
1138
 
@@ -2461,6 +2466,184 @@ ish study results --human
2461
2466
  When you genuinely need multiple fields in one parse pass, \`--json\` is
2462
2467
  still the right tool — \`--get\` is for single-value capture, not for
2463
2468
  reshaping output.
2469
+
2470
+ ## Slicing study results
2471
+
2472
+ \`ish study results <id>\` accepts filter flags (\`--frame\`, \`--segment\`,
2473
+ \`--turn\`, \`--side\`, \`--assignment\`, \`--step\`, \`--sentiment\`,
2474
+ \`--actor\`, \`--iteration\`, \`--participant\`) and projection flags
2475
+ (\`--group-by iteration|frame|segment|turn|assignment|step\`). When any
2476
+ filter is passed, the envelope gains a \`totals_unfiltered\` field
2477
+ (\`{participant_count, interaction_count}\`) so an agent can sanity-check
2478
+ coverage: "matched 12 / 80 participants". A zero-match filter returns
2479
+ the stable envelope with \`participant_count: 0\` and exit code **0**
2480
+ (not 4) — slicing never errors on no-match.
2481
+
2482
+ \`--group-by\` is **router-gated by modality**: \`frame\` requires
2483
+ interactive, \`segment\` requires media (video / audio / text / document),
2484
+ \`turn\` requires chat. Mismatched filter flags (e.g. \`--segment 0\` on
2485
+ an interactive study) emit a stderr warning and are ignored — they
2486
+ don't error. Full worked examples in \`guides/slicing-results\`.
2487
+ `;
2488
+ const GUIDE_SLICING_RESULTS = `# guide: slicing study results
2489
+
2490
+ \`ish study results <id>\` returns a kitchen-sink envelope by default
2491
+ (every participant, every interaction, every interview answer). For
2492
+ narrower questions — *"what differed on the login screen across these
2493
+ five iterations?"*, *"who failed verify-email, and why?"*, *"frustrated
2494
+ reactions to segment 3 of the video"* — \`ish study results\` accepts
2495
+ **filter flags** (which interactions to keep) and **projection flags**
2496
+ (how to roll up what survives). Filters compose with AND across flags
2497
+ and OR within \`--sentiment\`. Filters and projections are pure
2498
+ client-side; no extra round trip beyond the standard study fetch.
2499
+
2500
+ ## Filter flags
2501
+
2502
+ | Flag | Matches | Where it applies |
2503
+ |-------------------------------|-----------------------------------------------------------------------------------------------|------------------------------------------------------------------|
2504
+ | \`--frame <ref>\` | Interactions whose Frame name contains \`<ref>\` (case-insensitive). Also accepts a full Frame UUID, an \`f-…\` alias, or a \`frame_version_id\` UUID. | interactive — warn + ignore on chat / media |
2505
+ | \`--segment <ref>\` | Integer matches \`actions[0].data.segment_index\`; non-integer is a substring match against \`segment_label\`. | video, audio, text, document — warn + ignore elsewhere |
2506
+ | \`--turn <n>\` | Interactions whose \`actions[0].data.turn_index == n\`. | chat (external_chatbot + participant_pair) |
2507
+ | \`--side <a\|b>\` | Interactions whose parent assignment has \`side == a\` or \`side == b\`. | chat participant_pair — warn + ignore on other chat / non-chat |
2508
+ | \`--assignment <ref>\` | Assignment UUID, or substring match against the assignment name. | all |
2509
+ | \`--step <ref>\` | Filters \`participant_assignments[].step_results[]\` to verdicts matching the step id or name. | interactive + external_chatbot chat (steps live there) |
2510
+ | \`--sentiment <labels>\` | Comma-separated, case-insensitive label list (repeatable). Drops null-sentiment rows. | all |
2511
+ | \`--actor <ai\|human\|user>\` | Restrict by actor. | all |
2512
+ | \`--iteration <ref>\` | Iteration UUID or label (\`A\`, \`B\`, … case-insensitive). | all |
2513
+ | \`--participant <ref>\` | Participant UUID or \`pt-…\` alias. | all |
2514
+ | \`--include-unmatched\` | With \`--frame\`, keep degraded captures (\`frame_version_id: null\`) under a synthetic \`_unmatched\` bucket instead of dropping them. | interactive |
2515
+ | \`--include-evidence\` | With \`--step\`, also drop interactions not listed in any surviving \`step_results[].evidence_interaction_ids[]\`. | interactive + external_chatbot chat |
2516
+
2517
+ **Modality mismatch is not an error.** Pass \`--segment 0\` on an
2518
+ interactive study and the filter is ignored with a stderr warning.
2519
+ The exception is \`--group-by\` — see below.
2520
+
2521
+ ## Projection flags (--group-by)
2522
+
2523
+ | Axis | Output shape | Modality |
2524
+ |-------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
2525
+ | \`iteration\` | \`{study, slices: [{iteration_id, iteration_label, participant_count, interaction_count, sentiment, sample_comments, top_actions}, ...], totals_unfiltered, warnings}\` | all |
2526
+ | \`frame\` | \`[{frame_id, frame_label, interaction_count, sentiment_histogram, sample_comments, participant_aliases}, ...]\` | interactive (router errors on non-interactive) |
2527
+ | \`segment\` | \`[{segment_index, segment_label, interaction_count, sentiment_histogram, engagement_histogram, sample_comments}, ...]\` | media (router errors on non-media) |
2528
+ | \`turn\` | \`[{turn_index, interaction_count, sentiment_histogram, sample_replies, failures}, ...]\` | chat (router errors on non-chat) |
2529
+ | \`assignment\` | \`[{assignment_id, assignment_name, interaction_count, sentiment_histogram, step_completion}, ...]\` | all |
2530
+ | \`step\` | \`[{assignment_id, assignment_name, step_id, step_name, total, passed, inconclusive, failed, rate, participant_verdicts: [{participant_alias, verdict, reason, evidence_interaction_ids}, ...]}, ...]\` | interactive + external_chatbot chat |
2531
+
2532
+ \`--group-by\` is **mutually exclusive with \`--summary\` and
2533
+ \`--transcript\`**. \`--group-by frame\` on a chat study, \`--group-by
2534
+ turn\` on a video study, etc. error at the surface (exit 2) with a
2535
+ clear message before any IO.
2536
+
2537
+ ## The empty-slice contract
2538
+
2539
+ A filter combination that matches zero interactions returns the
2540
+ **stable envelope shape** with:
2541
+
2542
+ - \`participant_count: 0\`
2543
+ - \`totals_unfiltered: {participant_count: <N>, interaction_count: <M>}\` populated
2544
+ - exit code **0** (not 4)
2545
+
2546
+ \`totals_unfiltered\` is the agent's sanity check: *"my filter matched
2547
+ 0 of 80 participants — is the filter too tight, or did the run not
2548
+ produce data?"*. The shape never collapses to \`null\` or a different
2549
+ envelope; \`--get participant_count\` is always safe.
2550
+
2551
+ ## Worked examples
2552
+
2553
+ \`\`\`bash
2554
+ # What differed on the login screen across the five iterations?
2555
+ ish study results s-b2c --frame login --group-by iteration
2556
+
2557
+ # Frustrated reactions to segment 3 of the video
2558
+ ish study results s-b2c --segment 3 --sentiment Frustrated
2559
+
2560
+ # Who failed the "verify email" step, and why?
2561
+ ish study results s-b2c --assignment "Sign up" --step verify-email --group-by step
2562
+
2563
+ # Chat participant_pair: only side A turn 4
2564
+ ish study results s-b2c --side a --turn 4
2565
+
2566
+ # Surface degraded captures (frame_version_id: null) under a "_unmatched" bucket:
2567
+ ish study results s-b2c --frame login --include-unmatched --group-by frame
2568
+
2569
+ # Narrow the lean summary to a slice:
2570
+ ish study results s-b2c --summary --frame checkout --json
2571
+ \`\`\`
2572
+
2573
+ ## Combining filters
2574
+
2575
+ Filters compose with **AND across flags** and **OR within
2576
+ \`--sentiment\`**. \`--frame login --sentiment Frustrated,Confused\`
2577
+ means "interactions on the login frame whose sentiment is Frustrated
2578
+ OR Confused". \`--summary\` is orthogonal to filters and narrows the
2579
+ summary over the filtered set. \`--transcript\` is single-participant
2580
+ and **errors when any filter or \`--group-by\` is set** (exit 2).
2581
+
2582
+ ## Defensive handling of nullable fields
2583
+
2584
+ - \`interaction.sentiment\` is nullable (chat failure stubs,
2585
+ pre-sentiment rows). Dropped **only** when \`--sentiment\` is set; kept
2586
+ by every other filter.
2587
+ - \`interaction.frame_version_id\` is nullable on interactive studies
2588
+ (degraded captures, ~12% on a failing iteration). Dropped by
2589
+ \`--frame\` unless \`--include-unmatched\` is passed; surfaced as a
2590
+ \`_unmatched\` bucket in \`--group-by frame\`.
2591
+ - Chat \`bot_reply.failure\` rows are kept in the default envelope,
2592
+ dropped by \`--sentiment\` (they have \`sentiment: null\`), kept by
2593
+ \`--actor\`, visible in \`--group-by turn\` under a \`failures\`
2594
+ counter.
2595
+
2596
+ ## --frame resolution
2597
+
2598
+ \`--frame login\` walks the frame list returned by
2599
+ \`GET /studies/{id}/frames\` and matches **case-insensitive substring**
2600
+ against the frame name. Other accepted shapes:
2601
+
2602
+ - \`--frame 6ec…\` — full Frame UUID (exact match)
2603
+ - \`--frame f-6ec\` — short alias resolved via \`alias-store\`
2604
+ - \`--frame 7ec…\` — a \`frame_version_id\` UUID (matches only that version)
2605
+
2606
+ Ambiguous substring (matches >1 frame) errors with the candidate list:
2607
+
2608
+ \`\`\`
2609
+ ish study results s-b2c --frame log
2610
+ # Error: --frame "log" is ambiguous — matched 2 frames: Login, Logout.
2611
+ # Use a more specific substring, a full Frame UUID, or an \`f-…\` alias.
2612
+ \`\`\`
2613
+
2614
+ No match at all errors and lists the available frame names.
2615
+
2616
+ ## Common --get paths on a sliced envelope
2617
+
2618
+ \`\`\`
2619
+ # Sanity-check coverage:
2620
+ --get totals_unfiltered.participant_count
2621
+ --get totals_unfiltered.interaction_count
2622
+
2623
+ # Per-iteration projection:
2624
+ --get slices.iteration_label # one label per line
2625
+ --get slices.0.participant_count
2626
+ --get slices.0.sentiment
2627
+
2628
+ # Per-frame / per-segment / per-turn (bare array):
2629
+ --get 0.frame_label
2630
+ --get 0.segment_index
2631
+ --get 0.sentiment_histogram
2632
+
2633
+ # Per-step:
2634
+ --get 0.rate
2635
+ --get 0.participant_verdicts.verdict # one verdict per participant
2636
+ \`\`\`
2637
+
2638
+ ## Related
2639
+
2640
+ - \`concepts/study\` — the parent artifact whose results are being sliced.
2641
+ - \`concepts/assignment\` — defines the steps that \`--step\` and
2642
+ \`--group-by step\` filter against.
2643
+ - \`reference/json-mode\` — display vs capture vs chain output rules
2644
+ (\`--get\`, \`--fields\`, exit codes).
2645
+ - \`reference/aliases\` — \`s-…\` for studies, \`pt-…\` for participants,
2646
+ \`f-…\` for frames. Any UUID-accepting flag also accepts the alias.
2464
2647
  `;
2465
2648
  const GUIDE_FIRST_STUDY = `# guide: your first study, end to end
2466
2649
 
@@ -4053,6 +4236,12 @@ const PAGES = [
4053
4236
  description: "Login → workspace → people → study → iteration → run → results.",
4054
4237
  body: GUIDE_FIRST_STUDY,
4055
4238
  },
4239
+ {
4240
+ slug: "guides/slicing-results",
4241
+ title: "guide: slicing study results by frame / segment / turn / sentiment",
4242
+ description: "Filter and project `ish study results` — --frame, --segment, --turn, --side, --assignment, --step, --sentiment, --actor, --iteration, --participant; --group-by iteration|frame|segment|turn|assignment|step; totals_unfiltered + empty-slice contract.",
4243
+ body: GUIDE_SLICING_RESULTS,
4244
+ },
4056
4245
  {
4057
4246
  slug: "guides/chat",
4058
4247
  title: "guide: chat-modality studies",
@@ -48,6 +48,12 @@ export declare function formatWorkspaceDetail(workspace: Record<string, unknown>
48
48
  export declare function formatSiteAccessStatus(summary: import("./site-access.js").SiteAccessSummary, json: boolean): void;
49
49
  export declare function formatStudyList(studies: Record<string, unknown>[], json: boolean): void;
50
50
  export declare function formatStudyDetail(study: Record<string, unknown>, json: boolean, options?: OutputOptions, participants?: ReadonlyArray<Record<string, unknown>>): void;
51
+ /**
52
+ * Stable JSON envelope for `study results`. Schema is fixed regardless of
53
+ * study state — fields default to `null`, `0`, or `[]` when nothing has run.
54
+ * Agents can rely on the keys always being present (M4).
55
+ */
56
+ export declare function buildStudyResultsEnvelope(study: Record<string, unknown>, participants: ReadonlyArray<Record<string, unknown>>): Record<string, unknown>;
51
57
  export declare function formatStudyResults(study: Record<string, unknown>, participants: ReadonlyArray<Record<string, unknown>>, json: boolean): void;
52
58
  /**
53
59
  * `study results --summary` projection. Drops interview_answers + per-participant
@@ -102,3 +108,15 @@ export declare function deriveWinnerConfidence(args: {
102
108
  }): "low" | "medium" | "high";
103
109
  export declare function formatAskResults(ask: Record<string, unknown>, json: boolean, roundFilter?: number): void;
104
110
  export declare function formatConfigList(configs: Record<string, unknown>[], json: boolean): void;
111
+ export type StudyResultsGroupByKind = "iteration" | "frame" | "segment" | "turn" | "assignment" | "step";
112
+ /**
113
+ * Render a `--group-by <kind>` projection. JSON mode is a thin pass-through
114
+ * to jsonOutput with `preProjected: true` so the lean transform doesn't
115
+ * strip our stable empties. Human mode renders one section per slice plus
116
+ * a small ASCII sentiment histogram.
117
+ *
118
+ * The renderer accepts both the wrapped `{study, slices, ...}` shape (per-
119
+ * iteration) and the bare-array shape (every other --group-by); the
120
+ * surface (T5) doesn't need to know the difference.
121
+ */
122
+ export declare function formatStudyResultsGroupBy(projection: unknown, kind: StudyResultsGroupByKind, json: boolean): void;