npm - @ishlabs/cli - Versions diffs - 0.20.0 → 0.21.0 - Mend

@ishlabs/cli 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/commands/study.js +250 -13
package/dist/lib/alias-store.d.ts +1 -0
package/dist/lib/alias-store.js +2 -0
package/dist/lib/docs.js +190 -1
package/dist/lib/output.d.ts +18 -0
package/dist/lib/output.js +217 -1
package/dist/lib/skill-content.js +68 -0
package/dist/lib/study-participants.d.ts +13 -0
package/dist/lib/study-participants.js +13 -0
package/dist/lib/study-results-filters.d.ts +91 -0
package/dist/lib/study-results-filters.js +538 -0
package/dist/lib/study-results-projections.d.ts +122 -0
package/dist/lib/study-results-projections.js +577 -0
package/package.json +1 -1

package/dist/commands/study.js CHANGED Viewed

@@ -3,10 +3,12 @@
  */
 import { readFileSync } from "node:fs";
 import { Option } from "commander";
-import { withClient, getWebUrl, terminalLink, resolveWorkspace, confirmDestructive, readFileOrStdin } from "../lib/command-helpers.js";
+import { withClient, getWebUrl, terminalLink, resolveWorkspace, confirmDestructive, readFileOrStdin, collectIds } from "../lib/command-helpers.js";
 import { resolveId, tagAlias, ALIAS_PREFIX } from "../lib/alias-store.js";
 import { loadConfig, saveConfig } from "../config.js";
-import { formatStudyList, formatStudyDetail, formatStudyResults, buildStudyResultsSummary, buildChatTranscript, output, ValidationError, } from "../lib/output.js";
+import { formatStudyList, formatStudyDetail, formatStudyResults, buildStudyResultsEnvelope, buildStudyResultsSummary, buildChatTranscript, formatStudyResultsGroupBy, output, ValidationError, } from "../lib/output.js";
+import { applyResultsFilters } from "../lib/study-results-filters.js";
+import { buildStudyResultsPerIteration, buildStudyResultsPerFrame, buildStudyResultsPerSegment, buildStudyResultsPerTurn, buildStudyResultsPerAssignment, buildStudyResultsPerStep, } from "../lib/study-results-projections.js";
 import { VALID_CONTENT_TYPES } from "../lib/types.js";
 import { fetchStudyParticipants } from "../lib/study-participants.js";
 import { parseAssignment, loadAssignmentsFile, validateAssignmentsArray, parseQuestion } from "../lib/study-inputs.js";
@@ -669,22 +671,41 @@ list table layout in human mode.`)
     });
     study
         .command("results")
-        .description("View aggregated results: participant counts, sentiment, interview answers. Returns a stable envelope with empty fields when no runs have completed.")
+        .description("View aggregated results: participant counts, sentiment, interview answers. Returns a stable envelope with empty fields when no runs have completed. Slice with filter flags (--frame, --segment, --turn, --side, --assignment, --step, --sentiment, --actor, --iteration, --participant) or project with --group-by (iteration|frame|segment|turn|assignment|step).")
         .argument("<id>", "Study ID")
         .option("--workspace <id>", "Workspace ID; accepted for consistency (workspace is inferred from the study)")
-        .option("--summary", "Lean summary projection: counts + sentiment + per-participant {alias, status, sentiment, comment}. Drops interview_answers + per-interaction breakdowns.")
+        .option("--summary", "Lean summary projection: counts + sentiment + per-participant {alias, status, sentiment, comment}. Drops interview_answers + per-interaction breakdowns. Composes with filters: `--summary --frame login` narrows the summary to the login-screen interactions.")
         // PC-N4: agents reach for `--summarize` (verb) by analogy with the MCP
         // `summarize` action; accept it as a hidden alias of --summary so the
         // canonical flag stays the documented one but the muscle-memory variant
         // works without a round-trip.
         .addOption(new Option("--summarize", "Hidden alias for --summary").hideHelp())
-        .option("--transcript <participant_id>", "Chat transcript projection for one participant: flat role/text/turn-index array (chat-modality only). Mirrors the MCP `get_chat_transcript` shape.")
+        .option("--transcript <participant_id>", "Chat transcript projection for one participant: flat role/text/turn-index array (chat-modality only). Mirrors the MCP `get_chat_transcript` shape. Cannot combine with filters or --group-by (transcript is a single-participant projection).")
+        // --- Slice / projection flags (T5) ---
+        .option("--frame <ref>", "Filter to interactions whose Frame name contains <ref> (case-insensitive), or whose Frame UUID / `f-…` alias / frame_version_id matches. Interactive only — warned and ignored on other modalities.")
+        .option("--segment <ref>", "Filter media studies (video/audio/text/document) by segment index (integer) or segment label (substring). Image and other modalities: warned and ignored.")
+        .option("--turn <n>", "Filter chat interactions to a single `actions[0].data.turn_index`. Non-chat modalities: warned and ignored.")
+        .option("--side <a|b>", "Filter participant_pair chat interactions by assignment side. Other modalities: warned and ignored.")
+        .option("--assignment <ref>", "Filter to a single assignment by UUID or name (substring, case-insensitive).")
+        .option("--step <ref>", "Filter `participant_assignments[].step_results[]` to a single step by step-id or name (substring). Pair with --include-evidence to also drop non-evidence interactions.")
+        .option("--sentiment <labels>", "Filter to interactions whose sentiment.label is in the comma-separated list (case-insensitive; repeatable). Drops null-sentiment rows.", collectIds, [])
+        .option("--actor <actor>", "Filter to interactions whose actor is `ai`, `human`, or `user` (case-insensitive).")
+        .option("--iteration <ref>", "Restrict to a single iteration by UUID or label.")
+        .option("--participant <ref>", "Restrict to a single participant by UUID or `pt-…` alias.")
+        .option("--include-unmatched", "When --frame is set, keep interactions with null frame_version_id under a synthetic `_unmatched` bucket instead of dropping them.")
+        .option("--include-evidence", "When --step is set, also drop interactions not listed in any surviving step_results[].evidence_interaction_ids[].")
+        .option("--group-by <axis>", "Project results into per-axis slices: iteration | frame | segment | turn | assignment | step. Mutually exclusive with --summary and --transcript.")
         .addHelpText("after", `
 Examples:
   $ ish study results <id>
   $ ish study results <id> --json
   $ ish study results <id> --summary --json
   $ ish study results <id> --transcript pt-d4e --json
+  # Slice (filters compose: AND across flags, OR within --sentiment)
+  $ ish study results <id> --frame login --group-by iteration
+  $ ish study results <id> --segment 3 --sentiment Frustrated
+  $ ish study results <id> --assignment "Sign up" --step verify-email --group-by step
+  $ ish study results <id> --side a --turn 4
 Default --json envelope (M10: per-answer sentiment now included):
   {
@@ -707,6 +728,11 @@ Default --json envelope (M10: per-answer sentiment now included):
     ]
   }
+When any filter flag is passed, the envelope gains a \`totals_unfiltered\` field
+({ participant_count, interaction_count }) so callers can sanity-check coverage
+("matched 12 / 80 participants"). A zero-match filter returns the stable
+envelope with participant_count=0 and exit code 0 (not 4).
 --summary projection (M2-friction-7: drops the interview_answers payload):
   { study, participant_count, completed_count, failed_count, sentiment, participants: [...] }
@@ -723,6 +749,24 @@ Default --json envelope (M10: per-answer sentiment now included):
     "participant_summary": { "comment": "...", "sentiment": {...} }
   }
+--group-by iteration projection:
+  { study, slices: [{ iteration_id, iteration_label, participant_count, interaction_count, sentiment, sample_comments, top_actions }, ...], totals_unfiltered, warnings }
+--group-by frame projection (interactive only):
+  [{ frame_id, frame_label, interaction_count, sentiment_histogram, sample_comments, participant_aliases }, ...]
+--group-by segment projection (video/audio/text/document):
+  [{ segment_index, segment_label, interaction_count, sentiment_histogram, engagement_histogram, sample_comments }, ...]
+--group-by turn projection (chat only):
+  [{ turn_index, interaction_count, sentiment_histogram, sample_replies, failures }, ...]
+--group-by assignment projection:
+  [{ assignment_id, assignment_name, interaction_count, sentiment_histogram, step_completion }, ...]
+--group-by step projection:
+  [{ assignment_id, assignment_name, step_id, step_name, total, passed, inconclusive, failed, rate, participant_verdicts: [{ participant_alias, verdict, reason, evidence_interaction_ids }, ...] }, ...]
 Tips:
   Use \`--get <path>\` for a single value (e.g. \`--get participant_count\`),
   \`--fields a,b,c\` to project the JSON output further.
@@ -741,6 +785,7 @@ Common --get paths (default envelope):
   --get interview_answers                       # full per-question payload
   --get interview_answers.0.question            # text of the first question
   --get interview_answers.0.answers.0.answer    # first answer to the first question
+  --get totals_unfiltered.participant_count          # pre-filter participant count (when slicing)
 Common --get paths (--transcript <participant_id> envelope):
   --get transcript                              # full role/text/turn array
@@ -749,6 +794,18 @@ Common --get paths (--transcript <participant_id> envelope):
   --get participant_summary.sentiment                # aggregate sentiment map
   --get unique_bot_replies                      # bot-side message count
+Common --get paths (--group-by projections):
+  --get slices.iteration_label                       # per-iteration: one label per line
+  --get slices.0.participant_count                   # per-iteration: first slice's count
+  --get 0.frame_label                                # per-frame: first frame's label
+  --get 0.sentiment_histogram                        # per-frame/segment/turn: first slice's sentiment map
+  --get 0.segment_index                              # per-segment: first segment's index
+  --get 0.turn_index                                 # per-turn: first turn's index
+  --get 0.assignment_name                            # per-assignment/step: first slice's assignment
+  --get 0.step_name                                  # per-step: first slice's step
+  --get 0.rate                                       # per-step: first step's pass-rate
+  --get 0.participant_verdicts.verdict               # per-step: verdict per participant
 When no runs have completed, the default envelope is returned with zero counts and empty arrays.`)
         .action(async (id, opts, cmd) => {
         await withClient(cmd, async (client, globals) => {
@@ -756,10 +813,76 @@ When no runs have completed, the default envelope is returned with zero counts a
             // into a single boolean before validation so the rest of the
             // handler reads only `summary`.
             const wantsSummary = !!(opts.summary || opts.summarize);
+            // T5: detect whether any filter flag was passed. Interaction-level
+            // and participant-level flags both count — they all narrow the
+            // result set. `--include-unmatched`/`--include-evidence` are
+            // modifiers that only make sense alongside --frame/--step but
+            // count as "filter intent" for the transcript/conflict check.
+            const hasFilter = opts.frame !== undefined ||
+                opts.segment !== undefined ||
+                opts.turn !== undefined ||
+                opts.side !== undefined ||
+                opts.assignment !== undefined ||
+                opts.step !== undefined ||
+                (opts.sentiment !== undefined && opts.sentiment.length > 0) ||
+                opts.actor !== undefined ||
+                opts.iteration !== undefined ||
+                opts.participant !== undefined ||
+                opts.includeUnmatched === true ||
+                opts.includeEvidence === true;
+            const hasGroupBy = opts.groupBy !== undefined;
+            // --- Conflict validation (no IO yet) ---
             if (wantsSummary && opts.transcript) {
                 throw new ValidationError("Pass only one of: --summary, --transcript.", ["--summary", "--transcript"]);
             }
+            if (opts.transcript && (hasFilter || hasGroupBy)) {
+                // --transcript is a single-participant chat projection — slicing
+                // doesn't make sense.
+                throw new ValidationError("--transcript is a single-participant projection; cannot combine with filter flags or --group-by.", ["--transcript"]);
+            }
+            if (wantsSummary && hasGroupBy) {
+                throw new ValidationError("Pass only one of: --summary, --group-by.", ["--summary", "--group-by"]);
+            }
+            // --side validation: must be exactly "a" or "b" (case-insensitive).
+            const sideNormalised = opts.side ? opts.side.toLowerCase() : undefined;
+            if (sideNormalised !== undefined && sideNormalised !== "a" && sideNormalised !== "b") {
+                throw new ValidationError(`--side must be "a" or "b", got "${opts.side}".`, ["a", "b"]);
+            }
+            // --actor validation: must be one of ai|human|user (case-insensitive).
+            const actorNormalised = opts.actor ? opts.actor.toLowerCase() : undefined;
+            if (actorNormalised !== undefined &&
+                actorNormalised !== "ai" &&
+                actorNormalised !== "human" &&
+                actorNormalised !== "user") {
+                throw new ValidationError(`--actor must be "ai", "human", or "user", got "${opts.actor}".`, ["ai", "human", "user"]);
+            }
+            // --turn validation: must parse as a non-negative integer.
+            let turnNum;
+            if (opts.turn !== undefined) {
+                const n = parseInt(opts.turn, 10);
+                if (Number.isNaN(n) || n < 0 || String(n) !== opts.turn.trim()) {
+                    throw new ValidationError(`--turn must be a non-negative integer, got "${opts.turn}".`, []);
+                }
+                turnNum = n;
+            }
+            // --group-by axis whitelist.
+            const VALID_GROUP_BY = [
+                "iteration",
+                "frame",
+                "segment",
+                "turn",
+                "assignment",
+                "step",
+            ];
+            let groupByKind;
+            if (opts.groupBy !== undefined) {
+                if (!VALID_GROUP_BY.includes(opts.groupBy)) {
+                    throw new ValidationError(`--group-by must be one of: ${VALID_GROUP_BY.join(", ")}. Got "${opts.groupBy}".`, VALID_GROUP_BY);
+                }
+                groupByKind = opts.groupBy;
+            }
             const rid = resolveId(id);
+            // --- --transcript fast path (no fetch of study payload) ---
             if (opts.transcript) {
                 // --transcript <participant_id>: bypass the study aggregator; fetch
                 // the named participant directly. Cheaper (one GET, no nested
@@ -769,20 +892,134 @@ When no runs have completed, the default envelope is returned with zero counts a
                 output(buildChatTranscript(participant), globals.json, { preProjected: true });
                 return;
             }
-            const [data, participants] = await Promise.all([
+            // --- Default-fast path: no filter, no group-by ---
+            if (!hasFilter && !hasGroupBy) {
+                const [data, participants] = await Promise.all([
+                    client.get(`/studies/${rid}`),
+                    fetchStudyParticipants(client, rid),
+                ]);
+                if (wantsSummary) {
+                    output(buildStudyResultsSummary(data, participants), globals.json, { preProjected: true });
+                }
+                else {
+                    formatStudyResults(data, participants, globals.json);
+                }
+                if (!globals.json && data.product_id) {
+                    const url = getWebUrl(globals, `/${data.product_id}/${rid}/overview`);
+                    console.error(`\n  ${terminalLink(url, "Open in browser ↗")}\n`);
+                }
+                return;
+            }
+            // --- Slice / projection path: fetch in parallel, then filter+project ---
+            //
+            // Modality gating for --group-by happens AFTER the study fetch
+            // (we need study.modality), but BEFORE any further work — see the
+            // post-fetch validation block below. Pre-fetch validation above is
+            // limited to checks that don't need wire data.
+            const fetchFrames = opts.frame !== undefined;
+            const [study, participants, framesPayload] = await Promise.all([
                 client.get(`/studies/${rid}`),
                 fetchStudyParticipants(client, rid),
+                fetchFrames
+                    ? client.get(`/studies/${rid}/frames`)
+                    : Promise.resolve([]),
             ]);
-            if (wantsSummary) {
-                output(buildStudyResultsSummary(data, participants), globals.json, { preProjected: true });
+            const studyRec = study;
+            const modality = typeof studyRec.modality === "string" ? studyRec.modality : "unknown";
+            // Modality gating for --group-by — router-level, NOT projection-level
+            // (devon's T7 note: projection builders are intentionally
+            // modality-agnostic and bucket non-matching rows into `_unmatched`;
+            // the surface is responsible for refusing nonsensical axes up front).
+            if (groupByKind === "frame" && modality !== "interactive") {
+                throw new ValidationError(`--group-by frame requires modality=interactive; this study is "${modality}".`, ["interactive"]);
             }
-            else {
-                formatStudyResults(data, participants, globals.json);
+            const SEGMENT_MODALITIES = ["video", "audio", "text", "document"];
+            if (groupByKind === "segment" && !SEGMENT_MODALITIES.includes(modality)) {
+                throw new ValidationError(`--group-by segment requires modality ∈ {${SEGMENT_MODALITIES.join(", ")}}; this study is "${modality}".`, SEGMENT_MODALITIES);
             }
-            if (!globals.json && data.product_id) {
-                const url = getWebUrl(globals, `/${data.product_id}/${rid}/overview`);
-                console.error(`\n  ${terminalLink(url, "Open in browser ↗")}\n`);
+            if (groupByKind === "turn" && modality !== "chat") {
+                throw new ValidationError(`--group-by turn requires modality=chat; this study is "${modality}".`, ["chat"]);
             }
+            // Coerce the frames payload to a plain array of records (the API
+            // returns a bare array). Tolerate `{items: [...]}` shape in case the
+            // endpoint ever normalises.
+            const rawFrames = Array.isArray(framesPayload)
+                ? framesPayload
+                : Array.isArray(framesPayload?.items)
+                    ? (framesPayload.items)
+                    : [];
+            const filters = {
+                frame: opts.frame,
+                segment: opts.segment,
+                turn: turnNum,
+                side: sideNormalised,
+                assignment: opts.assignment,
+                step: opts.step,
+                sentiment: opts.sentiment && opts.sentiment.length > 0 ? opts.sentiment : undefined,
+                actor: actorNormalised,
+                iteration: opts.iteration,
+                participant: opts.participant,
+                includeUnmatched: opts.includeUnmatched === true ? true : undefined,
+                includeEvidence: opts.includeEvidence === true ? true : undefined,
+            };
+            const filtered = applyResultsFilters(studyRec, participants, rawFrames, filters);
+            // Surface modality-mismatch warnings (and any other diagnostics from
+            // applyResultsFilters) on stderr so JSON output stays clean. The
+            // filter pipeline downgrades mismatched flags to no-ops; the warnings
+            // tell the agent which flags were ignored and why.
+            if (filtered.warnings.length > 0 && !globals.quiet) {
+                for (const w of filtered.warnings) {
+                    console.error(`warning: ${w}`);
+                }
+            }
+            // --- Dispatch: --group-by projection > --summary on filtered > filtered envelope ---
+            if (groupByKind !== undefined) {
+                let projection;
+                switch (groupByKind) {
+                    case "iteration":
+                        projection = buildStudyResultsPerIteration(filtered);
+                        break;
+                    case "frame":
+                        projection = buildStudyResultsPerFrame(filtered);
+                        break;
+                    case "segment":
+                        projection = buildStudyResultsPerSegment(filtered);
+                        break;
+                    case "turn":
+                        projection = buildStudyResultsPerTurn(filtered);
+                        break;
+                    case "assignment":
+                        projection = buildStudyResultsPerAssignment(filtered);
+                        break;
+                    case "step":
+                        projection = buildStudyResultsPerStep(filtered);
+                        break;
+                }
+                formatStudyResultsGroupBy(projection, groupByKind, globals.json);
+                return;
+            }
+            if (wantsSummary) {
+                // --summary on filtered participants: narrowed summary projection.
+                // Attach totals_unfiltered so callers can still see the pre-filter
+                // denominator (e.g. "12 / 80 participants matched").
+                const summary = buildStudyResultsSummary(filtered.study, filtered.participants);
+                const summaryOut = {
+                    ...summary,
+                    totals_unfiltered: filtered.totals_unfiltered,
+                };
+                output(summaryOut, globals.json, { preProjected: true });
+                return;
+            }
+            // Default (no --group-by, no --summary) but filters set: stable
+            // envelope on the filtered participants + totals_unfiltered. Empty
+            // slice contract: zero matches yields participant_count=0 and exit
+            // 0, never a 4/not-found.
+            const envelope = buildStudyResultsEnvelope(filtered.study, filtered.participants);
+            const envelopeOut = {
+                ...envelope,
+                totals_unfiltered: filtered.totals_unfiltered,
+            };
+            output(envelopeOut, globals.json, { preProjected: true });
         });
     });
     study

package/dist/lib/alias-store.d.ts CHANGED Viewed

@@ -19,6 +19,7 @@ export declare const ALIAS_PREFIX: {
     readonly askRound: "r";
     readonly chatEndpoint: "ep";
     readonly chatConfig: "cc";
+    readonly frame: "f";
 };
 /**
  * Save aliases for a list of IDs under the given prefix.

package/dist/lib/alias-store.js CHANGED Viewed

@@ -22,6 +22,7 @@ export const ALIAS_PREFIX = {
     askRound: "r",
     chatEndpoint: "ep",
     chatConfig: "cc",
+    frame: "f",
 };
 /** Format a number with zero-padding (minimum 2 digits). */
 function padNum(n) {
@@ -133,6 +134,7 @@ const HYDRATE_HINT = {
     a: "ish ask list",
     r: "ish ask get <ask-id>",
     ep: "ish chat endpoint list",
+    f: "ish study results <study-id> --frame <name>  # frames are discovered via the study's frames endpoint",
     // Legacy two-letter prefixes the deterministic generator may have
     // produced before; defaults below cover anything else.
 };

package/dist/lib/docs.js CHANGED Viewed

@@ -315,6 +315,8 @@ pick was wrong.
 - \`concepts/assignment\` — task definition syntax.
 - \`concepts/questionnaire\` — question types and timing.
 - \`concepts/run-verbs\` — when to use \`study run\` vs \`ask run\`.
+- \`guides/slicing-results\` — filter / project \`study results\` by frame,
+  segment, turn, sentiment, assignment, step.
 - \`reference/billing-limits\` — \`maxStudiesPerProduct\` cap on study creation.
 - \`reference/credits\` — per-run credit cost & how to preview before dispatch.
 `;
@@ -851,6 +853,9 @@ ride along when present in the JSON forms.
 - \`concepts/study\` — assignments are immutable to the run; questionnaire is too.
 - \`concepts/questionnaire\` — the other half of the study definition.
+- \`guides/slicing-results\` — slice the post-run envelope by step
+  (\`--step verify-email --group-by step\`), surface per-participant verdicts
+  inline, or restrict to the evidence interactions with \`--include-evidence\`.
 - \`reference/json-mode\` — how \`step_completion\` renders in lean vs --verbose.
 `;
 const CONCEPT_QUESTIONNAIRE = `# concept: questionnaire
@@ -1127,7 +1132,7 @@ deleted ask was the active one.
 - \`concepts/round\` — what a round is and how it executes.
 - \`concepts/people\` — how participants are chosen at ask creation.
 - \`concepts/run-verbs\` — \`ish ask run\` vs \`ish study run\`.
-- \`reference/credits\` — ask rounds bill \`n_participants * (1 + len(questions))\` credits per round; \`questions\` follow-ups bill *per participant* on top of the base response, so a 3-person panel with 2 follow-up questions costs \`3 * (1 + 2) = 9\` credits when all complete (not 3).
+- \`reference/credits\` — ask rounds bill **one credit per successful participant per round**, regardless of how many \`questions\` were included. The backend's asks worker bills \`amount=succeeded\` once per round dispatch; questions and round-summary synthesis don't trigger separate debits. A 3-person panel with 2 follow-up questions costs \`3\` credits when all complete, the same as a no-questions run. Failed participant responses (pre-flight errors, refusals) don't bill.
 `;
 const CONCEPT_ROUND = `# concept: round
@@ -2461,6 +2466,184 @@ ish study results --human
 When you genuinely need multiple fields in one parse pass, \`--json\` is
 still the right tool — \`--get\` is for single-value capture, not for
 reshaping output.
+## Slicing study results
+\`ish study results <id>\` accepts filter flags (\`--frame\`, \`--segment\`,
+\`--turn\`, \`--side\`, \`--assignment\`, \`--step\`, \`--sentiment\`,
+\`--actor\`, \`--iteration\`, \`--participant\`) and projection flags
+(\`--group-by iteration|frame|segment|turn|assignment|step\`). When any
+filter is passed, the envelope gains a \`totals_unfiltered\` field
+(\`{participant_count, interaction_count}\`) so an agent can sanity-check
+coverage: "matched 12 / 80 participants". A zero-match filter returns
+the stable envelope with \`participant_count: 0\` and exit code **0**
+(not 4) — slicing never errors on no-match.
+\`--group-by\` is **router-gated by modality**: \`frame\` requires
+interactive, \`segment\` requires media (video / audio / text / document),
+\`turn\` requires chat. Mismatched filter flags (e.g. \`--segment 0\` on
+an interactive study) emit a stderr warning and are ignored — they
+don't error. Full worked examples in \`guides/slicing-results\`.
+`;
+const GUIDE_SLICING_RESULTS = `# guide: slicing study results
+\`ish study results <id>\` returns a kitchen-sink envelope by default
+(every participant, every interaction, every interview answer). For
+narrower questions — *"what differed on the login screen across these
+five iterations?"*, *"who failed verify-email, and why?"*, *"frustrated
+reactions to segment 3 of the video"* — \`ish study results\` accepts
+**filter flags** (which interactions to keep) and **projection flags**
+(how to roll up what survives). Filters compose with AND across flags
+and OR within \`--sentiment\`. Filters and projections are pure
+client-side; no extra round trip beyond the standard study fetch.
+## Filter flags
+| Flag                          | Matches                                                                                       | Where it applies                                                |
+|-------------------------------|-----------------------------------------------------------------------------------------------|------------------------------------------------------------------|
+| \`--frame <ref>\`             | Interactions whose Frame name contains \`<ref>\` (case-insensitive). Also accepts a full Frame UUID, an \`f-…\` alias, or a \`frame_version_id\` UUID. | interactive — warn + ignore on chat / media                      |
+| \`--segment <ref>\`           | Integer matches \`actions[0].data.segment_index\`; non-integer is a substring match against \`segment_label\`. | video, audio, text, document — warn + ignore elsewhere           |
+| \`--turn <n>\`                | Interactions whose \`actions[0].data.turn_index == n\`.                                       | chat (external_chatbot + participant_pair)                       |
+| \`--side <a\|b>\`             | Interactions whose parent assignment has \`side == a\` or \`side == b\`.                       | chat participant_pair — warn + ignore on other chat / non-chat   |
+| \`--assignment <ref>\`        | Assignment UUID, or substring match against the assignment name.                              | all                                                              |
+| \`--step <ref>\`              | Filters \`participant_assignments[].step_results[]\` to verdicts matching the step id or name. | interactive + external_chatbot chat (steps live there)           |
+| \`--sentiment <labels>\`      | Comma-separated, case-insensitive label list (repeatable). Drops null-sentiment rows.         | all                                                              |
+| \`--actor <ai\|human\|user>\` | Restrict by actor.                                                                            | all                                                              |
+| \`--iteration <ref>\`         | Iteration UUID or label (\`A\`, \`B\`, … case-insensitive).                                    | all                                                              |
+| \`--participant <ref>\`       | Participant UUID or \`pt-…\` alias.                                                            | all                                                              |
+| \`--include-unmatched\`       | With \`--frame\`, keep degraded captures (\`frame_version_id: null\`) under a synthetic \`_unmatched\` bucket instead of dropping them. | interactive                                                      |
+| \`--include-evidence\`        | With \`--step\`, also drop interactions not listed in any surviving \`step_results[].evidence_interaction_ids[]\`. | interactive + external_chatbot chat                              |
+**Modality mismatch is not an error.** Pass \`--segment 0\` on an
+interactive study and the filter is ignored with a stderr warning.
+The exception is \`--group-by\` — see below.
+## Projection flags (--group-by)
+| Axis        | Output shape                                                                                                                                                              | Modality |
+|-------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
+| \`iteration\` | \`{study, slices: [{iteration_id, iteration_label, participant_count, interaction_count, sentiment, sample_comments, top_actions}, ...], totals_unfiltered, warnings}\` | all      |
+| \`frame\`   | \`[{frame_id, frame_label, interaction_count, sentiment_histogram, sample_comments, participant_aliases}, ...]\`                                                            | interactive (router errors on non-interactive) |
+| \`segment\` | \`[{segment_index, segment_label, interaction_count, sentiment_histogram, engagement_histogram, sample_comments}, ...]\`                                                    | media (router errors on non-media)             |
+| \`turn\`    | \`[{turn_index, interaction_count, sentiment_histogram, sample_replies, failures}, ...]\`                                                                                   | chat (router errors on non-chat)               |
+| \`assignment\` | \`[{assignment_id, assignment_name, interaction_count, sentiment_histogram, step_completion}, ...]\`                                                                      | all      |
+| \`step\`    | \`[{assignment_id, assignment_name, step_id, step_name, total, passed, inconclusive, failed, rate, participant_verdicts: [{participant_alias, verdict, reason, evidence_interaction_ids}, ...]}, ...]\` | interactive + external_chatbot chat            |
+\`--group-by\` is **mutually exclusive with \`--summary\` and
+\`--transcript\`**. \`--group-by frame\` on a chat study, \`--group-by
+turn\` on a video study, etc. error at the surface (exit 2) with a
+clear message before any IO.
+## The empty-slice contract
+A filter combination that matches zero interactions returns the
+**stable envelope shape** with:
+- \`participant_count: 0\`
+- \`totals_unfiltered: {participant_count: <N>, interaction_count: <M>}\` populated
+- exit code **0** (not 4)
+\`totals_unfiltered\` is the agent's sanity check: *"my filter matched
+0 of 80 participants — is the filter too tight, or did the run not
+produce data?"*. The shape never collapses to \`null\` or a different
+envelope; \`--get participant_count\` is always safe.
+## Worked examples
+\`\`\`bash
+# What differed on the login screen across the five iterations?
+ish study results s-b2c --frame login --group-by iteration
+# Frustrated reactions to segment 3 of the video
+ish study results s-b2c --segment 3 --sentiment Frustrated
+# Who failed the "verify email" step, and why?
+ish study results s-b2c --assignment "Sign up" --step verify-email --group-by step
+# Chat participant_pair: only side A turn 4
+ish study results s-b2c --side a --turn 4
+# Surface degraded captures (frame_version_id: null) under a "_unmatched" bucket:
+ish study results s-b2c --frame login --include-unmatched --group-by frame
+# Narrow the lean summary to a slice:
+ish study results s-b2c --summary --frame checkout --json
+\`\`\`
+## Combining filters
+Filters compose with **AND across flags** and **OR within
+\`--sentiment\`**. \`--frame login --sentiment Frustrated,Confused\`
+means "interactions on the login frame whose sentiment is Frustrated
+OR Confused". \`--summary\` is orthogonal to filters and narrows the
+summary over the filtered set. \`--transcript\` is single-participant
+and **errors when any filter or \`--group-by\` is set** (exit 2).
+## Defensive handling of nullable fields
+- \`interaction.sentiment\` is nullable (chat failure stubs,
+  pre-sentiment rows). Dropped **only** when \`--sentiment\` is set; kept
+  by every other filter.
+- \`interaction.frame_version_id\` is nullable on interactive studies
+  (degraded captures, ~12% on a failing iteration). Dropped by
+  \`--frame\` unless \`--include-unmatched\` is passed; surfaced as a
+  \`_unmatched\` bucket in \`--group-by frame\`.
+- Chat \`bot_reply.failure\` rows are kept in the default envelope,
+  dropped by \`--sentiment\` (they have \`sentiment: null\`), kept by
+  \`--actor\`, visible in \`--group-by turn\` under a \`failures\`
+  counter.
+## --frame resolution
+\`--frame login\` walks the frame list returned by
+\`GET /studies/{id}/frames\` and matches **case-insensitive substring**
+against the frame name. Other accepted shapes:
+- \`--frame 6ec…\` — full Frame UUID (exact match)
+- \`--frame f-6ec\` — short alias resolved via \`alias-store\`
+- \`--frame 7ec…\` — a \`frame_version_id\` UUID (matches only that version)
+Ambiguous substring (matches >1 frame) errors with the candidate list:
+\`\`\`
+ish study results s-b2c --frame log
+# Error: --frame "log" is ambiguous — matched 2 frames: Login, Logout.
+# Use a more specific substring, a full Frame UUID, or an \`f-…\` alias.
+\`\`\`
+No match at all errors and lists the available frame names.
+## Common --get paths on a sliced envelope
+\`\`\`
+# Sanity-check coverage:
+--get totals_unfiltered.participant_count
+--get totals_unfiltered.interaction_count
+# Per-iteration projection:
+--get slices.iteration_label             # one label per line
+--get slices.0.participant_count
+--get slices.0.sentiment
+# Per-frame / per-segment / per-turn (bare array):
+--get 0.frame_label
+--get 0.segment_index
+--get 0.sentiment_histogram
+# Per-step:
+--get 0.rate
+--get 0.participant_verdicts.verdict     # one verdict per participant
+\`\`\`
+## Related
+- \`concepts/study\` — the parent artifact whose results are being sliced.
+- \`concepts/assignment\` — defines the steps that \`--step\` and
+  \`--group-by step\` filter against.
+- \`reference/json-mode\` — display vs capture vs chain output rules
+  (\`--get\`, \`--fields\`, exit codes).
+- \`reference/aliases\` — \`s-…\` for studies, \`pt-…\` for participants,
+  \`f-…\` for frames. Any UUID-accepting flag also accepts the alias.
 `;
 const GUIDE_FIRST_STUDY = `# guide: your first study, end to end
@@ -4053,6 +4236,12 @@ const PAGES = [
         description: "Login → workspace → people → study → iteration → run → results.",
         body: GUIDE_FIRST_STUDY,
     },
+    {
+        slug: "guides/slicing-results",
+        title: "guide: slicing study results by frame / segment / turn / sentiment",
+        description: "Filter and project `ish study results` — --frame, --segment, --turn, --side, --assignment, --step, --sentiment, --actor, --iteration, --participant; --group-by iteration|frame|segment|turn|assignment|step; totals_unfiltered + empty-slice contract.",
+        body: GUIDE_SLICING_RESULTS,
+    },
     {
         slug: "guides/chat",
         title: "guide: chat-modality studies",

package/dist/lib/output.d.ts CHANGED Viewed

@@ -48,6 +48,12 @@ export declare function formatWorkspaceDetail(workspace: Record<string, unknown>
 export declare function formatSiteAccessStatus(summary: import("./site-access.js").SiteAccessSummary, json: boolean): void;
 export declare function formatStudyList(studies: Record<string, unknown>[], json: boolean): void;
 export declare function formatStudyDetail(study: Record<string, unknown>, json: boolean, options?: OutputOptions, participants?: ReadonlyArray<Record<string, unknown>>): void;
+/**
+ * Stable JSON envelope for `study results`. Schema is fixed regardless of
+ * study state — fields default to `null`, `0`, or `[]` when nothing has run.
+ * Agents can rely on the keys always being present (M4).
+ */
+export declare function buildStudyResultsEnvelope(study: Record<string, unknown>, participants: ReadonlyArray<Record<string, unknown>>): Record<string, unknown>;
 export declare function formatStudyResults(study: Record<string, unknown>, participants: ReadonlyArray<Record<string, unknown>>, json: boolean): void;
 /**
  * `study results --summary` projection. Drops interview_answers + per-participant
@@ -102,3 +108,15 @@ export declare function deriveWinnerConfidence(args: {
 }): "low" | "medium" | "high";
 export declare function formatAskResults(ask: Record<string, unknown>, json: boolean, roundFilter?: number): void;
 export declare function formatConfigList(configs: Record<string, unknown>[], json: boolean): void;
+export type StudyResultsGroupByKind = "iteration" | "frame" | "segment" | "turn" | "assignment" | "step";
+/**
+ * Render a `--group-by <kind>` projection. JSON mode is a thin pass-through
+ * to jsonOutput with `preProjected: true` so the lean transform doesn't
+ * strip our stable empties. Human mode renders one section per slice plus
+ * a small ASCII sentiment histogram.
+ *
+ * The renderer accepts both the wrapped `{study, slices, ...}` shape (per-
+ * iteration) and the bare-array shape (every other --group-by); the
+ * surface (T5) doesn't need to know the difference.
+ */
+export declare function formatStudyResultsGroupBy(projection: unknown, kind: StudyResultsGroupByKind, json: boolean): void;