@ishlabs/cli 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/study.js +250 -13
- package/dist/lib/alias-store.d.ts +1 -0
- package/dist/lib/alias-store.js +2 -0
- package/dist/lib/docs.js +190 -1
- package/dist/lib/output.d.ts +18 -0
- package/dist/lib/output.js +217 -1
- package/dist/lib/skill-content.js +68 -0
- package/dist/lib/study-participants.d.ts +13 -0
- package/dist/lib/study-participants.js +13 -0
- package/dist/lib/study-results-filters.d.ts +91 -0
- package/dist/lib/study-results-filters.js +538 -0
- package/dist/lib/study-results-projections.d.ts +122 -0
- package/dist/lib/study-results-projections.js +577 -0
- package/package.json +1 -1
package/dist/commands/study.js
CHANGED
|
@@ -3,10 +3,12 @@
|
|
|
3
3
|
*/
|
|
4
4
|
import { readFileSync } from "node:fs";
|
|
5
5
|
import { Option } from "commander";
|
|
6
|
-
import { withClient, getWebUrl, terminalLink, resolveWorkspace, confirmDestructive, readFileOrStdin } from "../lib/command-helpers.js";
|
|
6
|
+
import { withClient, getWebUrl, terminalLink, resolveWorkspace, confirmDestructive, readFileOrStdin, collectIds } from "../lib/command-helpers.js";
|
|
7
7
|
import { resolveId, tagAlias, ALIAS_PREFIX } from "../lib/alias-store.js";
|
|
8
8
|
import { loadConfig, saveConfig } from "../config.js";
|
|
9
|
-
import { formatStudyList, formatStudyDetail, formatStudyResults, buildStudyResultsSummary, buildChatTranscript, output, ValidationError, } from "../lib/output.js";
|
|
9
|
+
import { formatStudyList, formatStudyDetail, formatStudyResults, buildStudyResultsEnvelope, buildStudyResultsSummary, buildChatTranscript, formatStudyResultsGroupBy, output, ValidationError, } from "../lib/output.js";
|
|
10
|
+
import { applyResultsFilters } from "../lib/study-results-filters.js";
|
|
11
|
+
import { buildStudyResultsPerIteration, buildStudyResultsPerFrame, buildStudyResultsPerSegment, buildStudyResultsPerTurn, buildStudyResultsPerAssignment, buildStudyResultsPerStep, } from "../lib/study-results-projections.js";
|
|
10
12
|
import { VALID_CONTENT_TYPES } from "../lib/types.js";
|
|
11
13
|
import { fetchStudyParticipants } from "../lib/study-participants.js";
|
|
12
14
|
import { parseAssignment, loadAssignmentsFile, validateAssignmentsArray, parseQuestion } from "../lib/study-inputs.js";
|
|
@@ -669,22 +671,41 @@ list table layout in human mode.`)
|
|
|
669
671
|
});
|
|
670
672
|
study
|
|
671
673
|
.command("results")
|
|
672
|
-
.description("View aggregated results: participant counts, sentiment, interview answers. Returns a stable envelope with empty fields when no runs have completed.")
|
|
674
|
+
.description("View aggregated results: participant counts, sentiment, interview answers. Returns a stable envelope with empty fields when no runs have completed. Slice with filter flags (--frame, --segment, --turn, --side, --assignment, --step, --sentiment, --actor, --iteration, --participant) or project with --group-by (iteration|frame|segment|turn|assignment|step).")
|
|
673
675
|
.argument("<id>", "Study ID")
|
|
674
676
|
.option("--workspace <id>", "Workspace ID; accepted for consistency (workspace is inferred from the study)")
|
|
675
|
-
.option("--summary", "Lean summary projection: counts + sentiment + per-participant {alias, status, sentiment, comment}. Drops interview_answers + per-interaction breakdowns.")
|
|
677
|
+
.option("--summary", "Lean summary projection: counts + sentiment + per-participant {alias, status, sentiment, comment}. Drops interview_answers + per-interaction breakdowns. Composes with filters: `--summary --frame login` narrows the summary to the login-screen interactions.")
|
|
676
678
|
// PC-N4: agents reach for `--summarize` (verb) by analogy with the MCP
|
|
677
679
|
// `summarize` action; accept it as a hidden alias of --summary so the
|
|
678
680
|
// canonical flag stays the documented one but the muscle-memory variant
|
|
679
681
|
// works without a round-trip.
|
|
680
682
|
.addOption(new Option("--summarize", "Hidden alias for --summary").hideHelp())
|
|
681
|
-
.option("--transcript <participant_id>", "Chat transcript projection for one participant: flat role/text/turn-index array (chat-modality only). Mirrors the MCP `get_chat_transcript` shape.")
|
|
683
|
+
.option("--transcript <participant_id>", "Chat transcript projection for one participant: flat role/text/turn-index array (chat-modality only). Mirrors the MCP `get_chat_transcript` shape. Cannot combine with filters or --group-by (transcript is a single-participant projection).")
|
|
684
|
+
// --- Slice / projection flags (T5) ---
|
|
685
|
+
.option("--frame <ref>", "Filter to interactions whose Frame name contains <ref> (case-insensitive), or whose Frame UUID / `f-…` alias / frame_version_id matches. Interactive only — warned and ignored on other modalities.")
|
|
686
|
+
.option("--segment <ref>", "Filter media studies (video/audio/text/document) by segment index (integer) or segment label (substring). Image and other modalities: warned and ignored.")
|
|
687
|
+
.option("--turn <n>", "Filter chat interactions to a single `actions[0].data.turn_index`. Non-chat modalities: warned and ignored.")
|
|
688
|
+
.option("--side <a|b>", "Filter participant_pair chat interactions by assignment side. Other modalities: warned and ignored.")
|
|
689
|
+
.option("--assignment <ref>", "Filter to a single assignment by UUID or name (substring, case-insensitive).")
|
|
690
|
+
.option("--step <ref>", "Filter `participant_assignments[].step_results[]` to a single step by step-id or name (substring). Pair with --include-evidence to also drop non-evidence interactions.")
|
|
691
|
+
.option("--sentiment <labels>", "Filter to interactions whose sentiment.label is in the comma-separated list (case-insensitive; repeatable). Drops null-sentiment rows.", collectIds, [])
|
|
692
|
+
.option("--actor <actor>", "Filter to interactions whose actor is `ai`, `human`, or `user` (case-insensitive).")
|
|
693
|
+
.option("--iteration <ref>", "Restrict to a single iteration by UUID or label.")
|
|
694
|
+
.option("--participant <ref>", "Restrict to a single participant by UUID or `pt-…` alias.")
|
|
695
|
+
.option("--include-unmatched", "When --frame is set, keep interactions with null frame_version_id under a synthetic `_unmatched` bucket instead of dropping them.")
|
|
696
|
+
.option("--include-evidence", "When --step is set, also drop interactions not listed in any surviving step_results[].evidence_interaction_ids[].")
|
|
697
|
+
.option("--group-by <axis>", "Project results into per-axis slices: iteration | frame | segment | turn | assignment | step. Mutually exclusive with --summary and --transcript.")
|
|
682
698
|
.addHelpText("after", `
|
|
683
699
|
Examples:
|
|
684
700
|
$ ish study results <id>
|
|
685
701
|
$ ish study results <id> --json
|
|
686
702
|
$ ish study results <id> --summary --json
|
|
687
703
|
$ ish study results <id> --transcript pt-d4e --json
|
|
704
|
+
# Slice (filters compose: AND across flags, OR within --sentiment)
|
|
705
|
+
$ ish study results <id> --frame login --group-by iteration
|
|
706
|
+
$ ish study results <id> --segment 3 --sentiment Frustrated
|
|
707
|
+
$ ish study results <id> --assignment "Sign up" --step verify-email --group-by step
|
|
708
|
+
$ ish study results <id> --side a --turn 4
|
|
688
709
|
|
|
689
710
|
Default --json envelope (M10: per-answer sentiment now included):
|
|
690
711
|
{
|
|
@@ -707,6 +728,11 @@ Default --json envelope (M10: per-answer sentiment now included):
|
|
|
707
728
|
]
|
|
708
729
|
}
|
|
709
730
|
|
|
731
|
+
When any filter flag is passed, the envelope gains a \`totals_unfiltered\` field
|
|
732
|
+
({ participant_count, interaction_count }) so callers can sanity-check coverage
|
|
733
|
+
("matched 12 / 80 participants"). A zero-match filter returns the stable
|
|
734
|
+
envelope with participant_count=0 and exit code 0 (not 4).
|
|
735
|
+
|
|
710
736
|
--summary projection (M2-friction-7: drops the interview_answers payload):
|
|
711
737
|
{ study, participant_count, completed_count, failed_count, sentiment, participants: [...] }
|
|
712
738
|
|
|
@@ -723,6 +749,24 @@ Default --json envelope (M10: per-answer sentiment now included):
|
|
|
723
749
|
"participant_summary": { "comment": "...", "sentiment": {...} }
|
|
724
750
|
}
|
|
725
751
|
|
|
752
|
+
--group-by iteration projection:
|
|
753
|
+
{ study, slices: [{ iteration_id, iteration_label, participant_count, interaction_count, sentiment, sample_comments, top_actions }, ...], totals_unfiltered, warnings }
|
|
754
|
+
|
|
755
|
+
--group-by frame projection (interactive only):
|
|
756
|
+
[{ frame_id, frame_label, interaction_count, sentiment_histogram, sample_comments, participant_aliases }, ...]
|
|
757
|
+
|
|
758
|
+
--group-by segment projection (video/audio/text/document):
|
|
759
|
+
[{ segment_index, segment_label, interaction_count, sentiment_histogram, engagement_histogram, sample_comments }, ...]
|
|
760
|
+
|
|
761
|
+
--group-by turn projection (chat only):
|
|
762
|
+
[{ turn_index, interaction_count, sentiment_histogram, sample_replies, failures }, ...]
|
|
763
|
+
|
|
764
|
+
--group-by assignment projection:
|
|
765
|
+
[{ assignment_id, assignment_name, interaction_count, sentiment_histogram, step_completion }, ...]
|
|
766
|
+
|
|
767
|
+
--group-by step projection:
|
|
768
|
+
[{ assignment_id, assignment_name, step_id, step_name, total, passed, inconclusive, failed, rate, participant_verdicts: [{ participant_alias, verdict, reason, evidence_interaction_ids }, ...] }, ...]
|
|
769
|
+
|
|
726
770
|
Tips:
|
|
727
771
|
Use \`--get <path>\` for a single value (e.g. \`--get participant_count\`),
|
|
728
772
|
\`--fields a,b,c\` to project the JSON output further.
|
|
@@ -741,6 +785,7 @@ Common --get paths (default envelope):
|
|
|
741
785
|
--get interview_answers # full per-question payload
|
|
742
786
|
--get interview_answers.0.question # text of the first question
|
|
743
787
|
--get interview_answers.0.answers.0.answer # first answer to the first question
|
|
788
|
+
--get totals_unfiltered.participant_count # pre-filter participant count (when slicing)
|
|
744
789
|
|
|
745
790
|
Common --get paths (--transcript <participant_id> envelope):
|
|
746
791
|
--get transcript # full role/text/turn array
|
|
@@ -749,6 +794,18 @@ Common --get paths (--transcript <participant_id> envelope):
|
|
|
749
794
|
--get participant_summary.sentiment # aggregate sentiment map
|
|
750
795
|
--get unique_bot_replies # bot-side message count
|
|
751
796
|
|
|
797
|
+
Common --get paths (--group-by projections):
|
|
798
|
+
--get slices.iteration_label # per-iteration: one label per line
|
|
799
|
+
--get slices.0.participant_count # per-iteration: first slice's count
|
|
800
|
+
--get 0.frame_label # per-frame: first frame's label
|
|
801
|
+
--get 0.sentiment_histogram # per-frame/segment/turn: first slice's sentiment map
|
|
802
|
+
--get 0.segment_index # per-segment: first segment's index
|
|
803
|
+
--get 0.turn_index # per-turn: first turn's index
|
|
804
|
+
--get 0.assignment_name # per-assignment/step: first slice's assignment
|
|
805
|
+
--get 0.step_name # per-step: first slice's step
|
|
806
|
+
--get 0.rate # per-step: first step's pass-rate
|
|
807
|
+
--get 0.participant_verdicts.verdict # per-step: verdict per participant
|
|
808
|
+
|
|
752
809
|
When no runs have completed, the default envelope is returned with zero counts and empty arrays.`)
|
|
753
810
|
.action(async (id, opts, cmd) => {
|
|
754
811
|
await withClient(cmd, async (client, globals) => {
|
|
@@ -756,10 +813,76 @@ When no runs have completed, the default envelope is returned with zero counts a
|
|
|
756
813
|
// into a single boolean before validation so the rest of the
|
|
757
814
|
// handler reads only `summary`.
|
|
758
815
|
const wantsSummary = !!(opts.summary || opts.summarize);
|
|
816
|
+
// T5: detect whether any filter flag was passed. Interaction-level
|
|
817
|
+
// and participant-level flags both count — they all narrow the
|
|
818
|
+
// result set. `--include-unmatched`/`--include-evidence` are
|
|
819
|
+
// modifiers that only make sense alongside --frame/--step but
|
|
820
|
+
// count as "filter intent" for the transcript/conflict check.
|
|
821
|
+
const hasFilter = opts.frame !== undefined ||
|
|
822
|
+
opts.segment !== undefined ||
|
|
823
|
+
opts.turn !== undefined ||
|
|
824
|
+
opts.side !== undefined ||
|
|
825
|
+
opts.assignment !== undefined ||
|
|
826
|
+
opts.step !== undefined ||
|
|
827
|
+
(opts.sentiment !== undefined && opts.sentiment.length > 0) ||
|
|
828
|
+
opts.actor !== undefined ||
|
|
829
|
+
opts.iteration !== undefined ||
|
|
830
|
+
opts.participant !== undefined ||
|
|
831
|
+
opts.includeUnmatched === true ||
|
|
832
|
+
opts.includeEvidence === true;
|
|
833
|
+
const hasGroupBy = opts.groupBy !== undefined;
|
|
834
|
+
// --- Conflict validation (no IO yet) ---
|
|
759
835
|
if (wantsSummary && opts.transcript) {
|
|
760
836
|
throw new ValidationError("Pass only one of: --summary, --transcript.", ["--summary", "--transcript"]);
|
|
761
837
|
}
|
|
838
|
+
if (opts.transcript && (hasFilter || hasGroupBy)) {
|
|
839
|
+
// --transcript is a single-participant chat projection — slicing
|
|
840
|
+
// doesn't make sense.
|
|
841
|
+
throw new ValidationError("--transcript is a single-participant projection; cannot combine with filter flags or --group-by.", ["--transcript"]);
|
|
842
|
+
}
|
|
843
|
+
if (wantsSummary && hasGroupBy) {
|
|
844
|
+
throw new ValidationError("Pass only one of: --summary, --group-by.", ["--summary", "--group-by"]);
|
|
845
|
+
}
|
|
846
|
+
// --side validation: must be exactly "a" or "b" (case-insensitive).
|
|
847
|
+
const sideNormalised = opts.side ? opts.side.toLowerCase() : undefined;
|
|
848
|
+
if (sideNormalised !== undefined && sideNormalised !== "a" && sideNormalised !== "b") {
|
|
849
|
+
throw new ValidationError(`--side must be "a" or "b", got "${opts.side}".`, ["a", "b"]);
|
|
850
|
+
}
|
|
851
|
+
// --actor validation: must be one of ai|human|user (case-insensitive).
|
|
852
|
+
const actorNormalised = opts.actor ? opts.actor.toLowerCase() : undefined;
|
|
853
|
+
if (actorNormalised !== undefined &&
|
|
854
|
+
actorNormalised !== "ai" &&
|
|
855
|
+
actorNormalised !== "human" &&
|
|
856
|
+
actorNormalised !== "user") {
|
|
857
|
+
throw new ValidationError(`--actor must be "ai", "human", or "user", got "${opts.actor}".`, ["ai", "human", "user"]);
|
|
858
|
+
}
|
|
859
|
+
// --turn validation: must parse as a non-negative integer.
|
|
860
|
+
let turnNum;
|
|
861
|
+
if (opts.turn !== undefined) {
|
|
862
|
+
const n = parseInt(opts.turn, 10);
|
|
863
|
+
if (Number.isNaN(n) || n < 0 || String(n) !== opts.turn.trim()) {
|
|
864
|
+
throw new ValidationError(`--turn must be a non-negative integer, got "${opts.turn}".`, []);
|
|
865
|
+
}
|
|
866
|
+
turnNum = n;
|
|
867
|
+
}
|
|
868
|
+
// --group-by axis whitelist.
|
|
869
|
+
const VALID_GROUP_BY = [
|
|
870
|
+
"iteration",
|
|
871
|
+
"frame",
|
|
872
|
+
"segment",
|
|
873
|
+
"turn",
|
|
874
|
+
"assignment",
|
|
875
|
+
"step",
|
|
876
|
+
];
|
|
877
|
+
let groupByKind;
|
|
878
|
+
if (opts.groupBy !== undefined) {
|
|
879
|
+
if (!VALID_GROUP_BY.includes(opts.groupBy)) {
|
|
880
|
+
throw new ValidationError(`--group-by must be one of: ${VALID_GROUP_BY.join(", ")}. Got "${opts.groupBy}".`, VALID_GROUP_BY);
|
|
881
|
+
}
|
|
882
|
+
groupByKind = opts.groupBy;
|
|
883
|
+
}
|
|
762
884
|
const rid = resolveId(id);
|
|
885
|
+
// --- --transcript fast path (no fetch of study payload) ---
|
|
763
886
|
if (opts.transcript) {
|
|
764
887
|
// --transcript <participant_id>: bypass the study aggregator; fetch
|
|
765
888
|
// the named participant directly. Cheaper (one GET, no nested
|
|
@@ -769,20 +892,134 @@ When no runs have completed, the default envelope is returned with zero counts a
|
|
|
769
892
|
output(buildChatTranscript(participant), globals.json, { preProjected: true });
|
|
770
893
|
return;
|
|
771
894
|
}
|
|
772
|
-
|
|
895
|
+
// --- Default-fast path: no filter, no group-by ---
|
|
896
|
+
if (!hasFilter && !hasGroupBy) {
|
|
897
|
+
const [data, participants] = await Promise.all([
|
|
898
|
+
client.get(`/studies/${rid}`),
|
|
899
|
+
fetchStudyParticipants(client, rid),
|
|
900
|
+
]);
|
|
901
|
+
if (wantsSummary) {
|
|
902
|
+
output(buildStudyResultsSummary(data, participants), globals.json, { preProjected: true });
|
|
903
|
+
}
|
|
904
|
+
else {
|
|
905
|
+
formatStudyResults(data, participants, globals.json);
|
|
906
|
+
}
|
|
907
|
+
if (!globals.json && data.product_id) {
|
|
908
|
+
const url = getWebUrl(globals, `/${data.product_id}/${rid}/overview`);
|
|
909
|
+
console.error(`\n ${terminalLink(url, "Open in browser ↗")}\n`);
|
|
910
|
+
}
|
|
911
|
+
return;
|
|
912
|
+
}
|
|
913
|
+
// --- Slice / projection path: fetch in parallel, then filter+project ---
|
|
914
|
+
//
|
|
915
|
+
// Modality gating for --group-by happens AFTER the study fetch
|
|
916
|
+
// (we need study.modality), but BEFORE any further work — see the
|
|
917
|
+
// post-fetch validation block below. Pre-fetch validation above is
|
|
918
|
+
// limited to checks that don't need wire data.
|
|
919
|
+
const fetchFrames = opts.frame !== undefined;
|
|
920
|
+
const [study, participants, framesPayload] = await Promise.all([
|
|
773
921
|
client.get(`/studies/${rid}`),
|
|
774
922
|
fetchStudyParticipants(client, rid),
|
|
923
|
+
fetchFrames
|
|
924
|
+
? client.get(`/studies/${rid}/frames`)
|
|
925
|
+
: Promise.resolve([]),
|
|
775
926
|
]);
|
|
776
|
-
|
|
777
|
-
|
|
927
|
+
const studyRec = study;
|
|
928
|
+
const modality = typeof studyRec.modality === "string" ? studyRec.modality : "unknown";
|
|
929
|
+
// Modality gating for --group-by — router-level, NOT projection-level
|
|
930
|
+
// (devon's T7 note: projection builders are intentionally
|
|
931
|
+
// modality-agnostic and bucket non-matching rows into `_unmatched`;
|
|
932
|
+
// the surface is responsible for refusing nonsensical axes up front).
|
|
933
|
+
if (groupByKind === "frame" && modality !== "interactive") {
|
|
934
|
+
throw new ValidationError(`--group-by frame requires modality=interactive; this study is "${modality}".`, ["interactive"]);
|
|
778
935
|
}
|
|
779
|
-
|
|
780
|
-
|
|
936
|
+
const SEGMENT_MODALITIES = ["video", "audio", "text", "document"];
|
|
937
|
+
if (groupByKind === "segment" && !SEGMENT_MODALITIES.includes(modality)) {
|
|
938
|
+
throw new ValidationError(`--group-by segment requires modality ∈ {${SEGMENT_MODALITIES.join(", ")}}; this study is "${modality}".`, SEGMENT_MODALITIES);
|
|
781
939
|
}
|
|
782
|
-
if (
|
|
783
|
-
|
|
784
|
-
console.error(`\n ${terminalLink(url, "Open in browser ↗")}\n`);
|
|
940
|
+
if (groupByKind === "turn" && modality !== "chat") {
|
|
941
|
+
throw new ValidationError(`--group-by turn requires modality=chat; this study is "${modality}".`, ["chat"]);
|
|
785
942
|
}
|
|
943
|
+
// Coerce the frames payload to a plain array of records (the API
|
|
944
|
+
// returns a bare array). Tolerate `{items: [...]}` shape in case the
|
|
945
|
+
// endpoint ever normalises.
|
|
946
|
+
const rawFrames = Array.isArray(framesPayload)
|
|
947
|
+
? framesPayload
|
|
948
|
+
: Array.isArray(framesPayload?.items)
|
|
949
|
+
? (framesPayload.items)
|
|
950
|
+
: [];
|
|
951
|
+
const filters = {
|
|
952
|
+
frame: opts.frame,
|
|
953
|
+
segment: opts.segment,
|
|
954
|
+
turn: turnNum,
|
|
955
|
+
side: sideNormalised,
|
|
956
|
+
assignment: opts.assignment,
|
|
957
|
+
step: opts.step,
|
|
958
|
+
sentiment: opts.sentiment && opts.sentiment.length > 0 ? opts.sentiment : undefined,
|
|
959
|
+
actor: actorNormalised,
|
|
960
|
+
iteration: opts.iteration,
|
|
961
|
+
participant: opts.participant,
|
|
962
|
+
includeUnmatched: opts.includeUnmatched === true ? true : undefined,
|
|
963
|
+
includeEvidence: opts.includeEvidence === true ? true : undefined,
|
|
964
|
+
};
|
|
965
|
+
const filtered = applyResultsFilters(studyRec, participants, rawFrames, filters);
|
|
966
|
+
// Surface modality-mismatch warnings (and any other diagnostics from
|
|
967
|
+
// applyResultsFilters) on stderr so JSON output stays clean. The
|
|
968
|
+
// filter pipeline downgrades mismatched flags to no-ops; the warnings
|
|
969
|
+
// tell the agent which flags were ignored and why.
|
|
970
|
+
if (filtered.warnings.length > 0 && !globals.quiet) {
|
|
971
|
+
for (const w of filtered.warnings) {
|
|
972
|
+
console.error(`warning: ${w}`);
|
|
973
|
+
}
|
|
974
|
+
}
|
|
975
|
+
// --- Dispatch: --group-by projection > --summary on filtered > filtered envelope ---
|
|
976
|
+
if (groupByKind !== undefined) {
|
|
977
|
+
let projection;
|
|
978
|
+
switch (groupByKind) {
|
|
979
|
+
case "iteration":
|
|
980
|
+
projection = buildStudyResultsPerIteration(filtered);
|
|
981
|
+
break;
|
|
982
|
+
case "frame":
|
|
983
|
+
projection = buildStudyResultsPerFrame(filtered);
|
|
984
|
+
break;
|
|
985
|
+
case "segment":
|
|
986
|
+
projection = buildStudyResultsPerSegment(filtered);
|
|
987
|
+
break;
|
|
988
|
+
case "turn":
|
|
989
|
+
projection = buildStudyResultsPerTurn(filtered);
|
|
990
|
+
break;
|
|
991
|
+
case "assignment":
|
|
992
|
+
projection = buildStudyResultsPerAssignment(filtered);
|
|
993
|
+
break;
|
|
994
|
+
case "step":
|
|
995
|
+
projection = buildStudyResultsPerStep(filtered);
|
|
996
|
+
break;
|
|
997
|
+
}
|
|
998
|
+
formatStudyResultsGroupBy(projection, groupByKind, globals.json);
|
|
999
|
+
return;
|
|
1000
|
+
}
|
|
1001
|
+
if (wantsSummary) {
|
|
1002
|
+
// --summary on filtered participants: narrowed summary projection.
|
|
1003
|
+
// Attach totals_unfiltered so callers can still see the pre-filter
|
|
1004
|
+
// denominator (e.g. "12 / 80 participants matched").
|
|
1005
|
+
const summary = buildStudyResultsSummary(filtered.study, filtered.participants);
|
|
1006
|
+
const summaryOut = {
|
|
1007
|
+
...summary,
|
|
1008
|
+
totals_unfiltered: filtered.totals_unfiltered,
|
|
1009
|
+
};
|
|
1010
|
+
output(summaryOut, globals.json, { preProjected: true });
|
|
1011
|
+
return;
|
|
1012
|
+
}
|
|
1013
|
+
// Default (no --group-by, no --summary) but filters set: stable
|
|
1014
|
+
// envelope on the filtered participants + totals_unfiltered. Empty
|
|
1015
|
+
// slice contract: zero matches yields participant_count=0 and exit
|
|
1016
|
+
// 0, never a 4/not-found.
|
|
1017
|
+
const envelope = buildStudyResultsEnvelope(filtered.study, filtered.participants);
|
|
1018
|
+
const envelopeOut = {
|
|
1019
|
+
...envelope,
|
|
1020
|
+
totals_unfiltered: filtered.totals_unfiltered,
|
|
1021
|
+
};
|
|
1022
|
+
output(envelopeOut, globals.json, { preProjected: true });
|
|
786
1023
|
});
|
|
787
1024
|
});
|
|
788
1025
|
study
|
package/dist/lib/alias-store.js
CHANGED
|
@@ -22,6 +22,7 @@ export const ALIAS_PREFIX = {
|
|
|
22
22
|
askRound: "r",
|
|
23
23
|
chatEndpoint: "ep",
|
|
24
24
|
chatConfig: "cc",
|
|
25
|
+
frame: "f",
|
|
25
26
|
};
|
|
26
27
|
/** Format a number with zero-padding (minimum 2 digits). */
|
|
27
28
|
function padNum(n) {
|
|
@@ -133,6 +134,7 @@ const HYDRATE_HINT = {
|
|
|
133
134
|
a: "ish ask list",
|
|
134
135
|
r: "ish ask get <ask-id>",
|
|
135
136
|
ep: "ish chat endpoint list",
|
|
137
|
+
f: "ish study results <study-id> --frame <name> # frames are discovered via the study's frames endpoint",
|
|
136
138
|
// Legacy two-letter prefixes the deterministic generator may have
|
|
137
139
|
// produced before; defaults below cover anything else.
|
|
138
140
|
};
|
package/dist/lib/docs.js
CHANGED
|
@@ -315,6 +315,8 @@ pick was wrong.
|
|
|
315
315
|
- \`concepts/assignment\` — task definition syntax.
|
|
316
316
|
- \`concepts/questionnaire\` — question types and timing.
|
|
317
317
|
- \`concepts/run-verbs\` — when to use \`study run\` vs \`ask run\`.
|
|
318
|
+
- \`guides/slicing-results\` — filter / project \`study results\` by frame,
|
|
319
|
+
segment, turn, sentiment, assignment, step.
|
|
318
320
|
- \`reference/billing-limits\` — \`maxStudiesPerProduct\` cap on study creation.
|
|
319
321
|
- \`reference/credits\` — per-run credit cost & how to preview before dispatch.
|
|
320
322
|
`;
|
|
@@ -851,6 +853,9 @@ ride along when present in the JSON forms.
|
|
|
851
853
|
|
|
852
854
|
- \`concepts/study\` — assignments are immutable to the run; questionnaire is too.
|
|
853
855
|
- \`concepts/questionnaire\` — the other half of the study definition.
|
|
856
|
+
- \`guides/slicing-results\` — slice the post-run envelope by step
|
|
857
|
+
(\`--step verify-email --group-by step\`), surface per-participant verdicts
|
|
858
|
+
inline, or restrict to the evidence interactions with \`--include-evidence\`.
|
|
854
859
|
- \`reference/json-mode\` — how \`step_completion\` renders in lean vs --verbose.
|
|
855
860
|
`;
|
|
856
861
|
const CONCEPT_QUESTIONNAIRE = `# concept: questionnaire
|
|
@@ -1127,7 +1132,7 @@ deleted ask was the active one.
|
|
|
1127
1132
|
- \`concepts/round\` — what a round is and how it executes.
|
|
1128
1133
|
- \`concepts/people\` — how participants are chosen at ask creation.
|
|
1129
1134
|
- \`concepts/run-verbs\` — \`ish ask run\` vs \`ish study run\`.
|
|
1130
|
-
- \`reference/credits\` — ask rounds bill
|
|
1135
|
+
- \`reference/credits\` — ask rounds bill **one credit per successful participant per round**, regardless of how many \`questions\` were included. The backend's asks worker bills \`amount=succeeded\` once per round dispatch; questions and round-summary synthesis don't trigger separate debits. A 3-person panel with 2 follow-up questions costs \`3\` credits when all complete, the same as a no-questions run. Failed participant responses (pre-flight errors, refusals) don't bill.
|
|
1131
1136
|
`;
|
|
1132
1137
|
const CONCEPT_ROUND = `# concept: round
|
|
1133
1138
|
|
|
@@ -2461,6 +2466,184 @@ ish study results --human
|
|
|
2461
2466
|
When you genuinely need multiple fields in one parse pass, \`--json\` is
|
|
2462
2467
|
still the right tool — \`--get\` is for single-value capture, not for
|
|
2463
2468
|
reshaping output.
|
|
2469
|
+
|
|
2470
|
+
## Slicing study results
|
|
2471
|
+
|
|
2472
|
+
\`ish study results <id>\` accepts filter flags (\`--frame\`, \`--segment\`,
|
|
2473
|
+
\`--turn\`, \`--side\`, \`--assignment\`, \`--step\`, \`--sentiment\`,
|
|
2474
|
+
\`--actor\`, \`--iteration\`, \`--participant\`) and projection flags
|
|
2475
|
+
(\`--group-by iteration|frame|segment|turn|assignment|step\`). When any
|
|
2476
|
+
filter is passed, the envelope gains a \`totals_unfiltered\` field
|
|
2477
|
+
(\`{participant_count, interaction_count}\`) so an agent can sanity-check
|
|
2478
|
+
coverage: "matched 12 / 80 participants". A zero-match filter returns
|
|
2479
|
+
the stable envelope with \`participant_count: 0\` and exit code **0**
|
|
2480
|
+
(not 4) — slicing never errors on no-match.
|
|
2481
|
+
|
|
2482
|
+
\`--group-by\` is **router-gated by modality**: \`frame\` requires
|
|
2483
|
+
interactive, \`segment\` requires media (video / audio / text / document),
|
|
2484
|
+
\`turn\` requires chat. Mismatched filter flags (e.g. \`--segment 0\` on
|
|
2485
|
+
an interactive study) emit a stderr warning and are ignored — they
|
|
2486
|
+
don't error. Full worked examples in \`guides/slicing-results\`.
|
|
2487
|
+
`;
|
|
2488
|
+
const GUIDE_SLICING_RESULTS = `# guide: slicing study results
|
|
2489
|
+
|
|
2490
|
+
\`ish study results <id>\` returns a kitchen-sink envelope by default
|
|
2491
|
+
(every participant, every interaction, every interview answer). For
|
|
2492
|
+
narrower questions — *"what differed on the login screen across these
|
|
2493
|
+
five iterations?"*, *"who failed verify-email, and why?"*, *"frustrated
|
|
2494
|
+
reactions to segment 3 of the video"* — \`ish study results\` accepts
|
|
2495
|
+
**filter flags** (which interactions to keep) and **projection flags**
|
|
2496
|
+
(how to roll up what survives). Filters compose with AND across flags
|
|
2497
|
+
and OR within \`--sentiment\`. Filters and projections are pure
|
|
2498
|
+
client-side; no extra round trip beyond the standard study fetch.
|
|
2499
|
+
|
|
2500
|
+
## Filter flags
|
|
2501
|
+
|
|
2502
|
+
| Flag | Matches | Where it applies |
|
|
2503
|
+
|-------------------------------|-----------------------------------------------------------------------------------------------|------------------------------------------------------------------|
|
|
2504
|
+
| \`--frame <ref>\` | Interactions whose Frame name contains \`<ref>\` (case-insensitive). Also accepts a full Frame UUID, an \`f-…\` alias, or a \`frame_version_id\` UUID. | interactive — warn + ignore on chat / media |
|
|
2505
|
+
| \`--segment <ref>\` | Integer matches \`actions[0].data.segment_index\`; non-integer is a substring match against \`segment_label\`. | video, audio, text, document — warn + ignore elsewhere |
|
|
2506
|
+
| \`--turn <n>\` | Interactions whose \`actions[0].data.turn_index == n\`. | chat (external_chatbot + participant_pair) |
|
|
2507
|
+
| \`--side <a\|b>\` | Interactions whose parent assignment has \`side == a\` or \`side == b\`. | chat participant_pair — warn + ignore on other chat / non-chat |
|
|
2508
|
+
| \`--assignment <ref>\` | Assignment UUID, or substring match against the assignment name. | all |
|
|
2509
|
+
| \`--step <ref>\` | Filters \`participant_assignments[].step_results[]\` to verdicts matching the step id or name. | interactive + external_chatbot chat (steps live there) |
|
|
2510
|
+
| \`--sentiment <labels>\` | Comma-separated, case-insensitive label list (repeatable). Drops null-sentiment rows. | all |
|
|
2511
|
+
| \`--actor <ai\|human\|user>\` | Restrict by actor. | all |
|
|
2512
|
+
| \`--iteration <ref>\` | Iteration UUID or label (\`A\`, \`B\`, … case-insensitive). | all |
|
|
2513
|
+
| \`--participant <ref>\` | Participant UUID or \`pt-…\` alias. | all |
|
|
2514
|
+
| \`--include-unmatched\` | With \`--frame\`, keep degraded captures (\`frame_version_id: null\`) under a synthetic \`_unmatched\` bucket instead of dropping them. | interactive |
|
|
2515
|
+
| \`--include-evidence\` | With \`--step\`, also drop interactions not listed in any surviving \`step_results[].evidence_interaction_ids[]\`. | interactive + external_chatbot chat |
|
|
2516
|
+
|
|
2517
|
+
**Modality mismatch is not an error.** Pass \`--segment 0\` on an
|
|
2518
|
+
interactive study and the filter is ignored with a stderr warning.
|
|
2519
|
+
The exception is \`--group-by\` — see below.
|
|
2520
|
+
|
|
2521
|
+
## Projection flags (--group-by)
|
|
2522
|
+
|
|
2523
|
+
| Axis | Output shape | Modality |
|
|
2524
|
+
|-------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
|
|
2525
|
+
| \`iteration\` | \`{study, slices: [{iteration_id, iteration_label, participant_count, interaction_count, sentiment, sample_comments, top_actions}, ...], totals_unfiltered, warnings}\` | all |
|
|
2526
|
+
| \`frame\` | \`[{frame_id, frame_label, interaction_count, sentiment_histogram, sample_comments, participant_aliases}, ...]\` | interactive (router errors on non-interactive) |
|
|
2527
|
+
| \`segment\` | \`[{segment_index, segment_label, interaction_count, sentiment_histogram, engagement_histogram, sample_comments}, ...]\` | media (router errors on non-media) |
|
|
2528
|
+
| \`turn\` | \`[{turn_index, interaction_count, sentiment_histogram, sample_replies, failures}, ...]\` | chat (router errors on non-chat) |
|
|
2529
|
+
| \`assignment\` | \`[{assignment_id, assignment_name, interaction_count, sentiment_histogram, step_completion}, ...]\` | all |
|
|
2530
|
+
| \`step\` | \`[{assignment_id, assignment_name, step_id, step_name, total, passed, inconclusive, failed, rate, participant_verdicts: [{participant_alias, verdict, reason, evidence_interaction_ids}, ...]}, ...]\` | interactive + external_chatbot chat |
|
|
2531
|
+
|
|
2532
|
+
\`--group-by\` is **mutually exclusive with \`--summary\` and
|
|
2533
|
+
\`--transcript\`**. \`--group-by frame\` on a chat study, \`--group-by
|
|
2534
|
+
turn\` on a video study, etc. error at the surface (exit 2) with a
|
|
2535
|
+
clear message before any IO.
|
|
2536
|
+
|
|
2537
|
+
## The empty-slice contract
|
|
2538
|
+
|
|
2539
|
+
A filter combination that matches zero interactions returns the
|
|
2540
|
+
**stable envelope shape** with:
|
|
2541
|
+
|
|
2542
|
+
- \`participant_count: 0\`
|
|
2543
|
+
- \`totals_unfiltered: {participant_count: <N>, interaction_count: <M>}\` populated
|
|
2544
|
+
- exit code **0** (not 4)
|
|
2545
|
+
|
|
2546
|
+
\`totals_unfiltered\` is the agent's sanity check: *"my filter matched
|
|
2547
|
+
0 of 80 participants — is the filter too tight, or did the run not
|
|
2548
|
+
produce data?"*. The shape never collapses to \`null\` or a different
|
|
2549
|
+
envelope; \`--get participant_count\` is always safe.
|
|
2550
|
+
|
|
2551
|
+
## Worked examples
|
|
2552
|
+
|
|
2553
|
+
\`\`\`bash
|
|
2554
|
+
# What differed on the login screen across the five iterations?
|
|
2555
|
+
ish study results s-b2c --frame login --group-by iteration
|
|
2556
|
+
|
|
2557
|
+
# Frustrated reactions to segment 3 of the video
|
|
2558
|
+
ish study results s-b2c --segment 3 --sentiment Frustrated
|
|
2559
|
+
|
|
2560
|
+
# Who failed the "verify email" step, and why?
|
|
2561
|
+
ish study results s-b2c --assignment "Sign up" --step verify-email --group-by step
|
|
2562
|
+
|
|
2563
|
+
# Chat participant_pair: only side A turn 4
|
|
2564
|
+
ish study results s-b2c --side a --turn 4
|
|
2565
|
+
|
|
2566
|
+
# Surface degraded captures (frame_version_id: null) under a "_unmatched" bucket:
|
|
2567
|
+
ish study results s-b2c --frame login --include-unmatched --group-by frame
|
|
2568
|
+
|
|
2569
|
+
# Narrow the lean summary to a slice:
|
|
2570
|
+
ish study results s-b2c --summary --frame checkout --json
|
|
2571
|
+
\`\`\`
|
|
2572
|
+
|
|
2573
|
+
## Combining filters
|
|
2574
|
+
|
|
2575
|
+
Filters compose with **AND across flags** and **OR within
|
|
2576
|
+
\`--sentiment\`**. \`--frame login --sentiment Frustrated,Confused\`
|
|
2577
|
+
means "interactions on the login frame whose sentiment is Frustrated
|
|
2578
|
+
OR Confused". \`--summary\` is orthogonal to filters and narrows the
|
|
2579
|
+
summary over the filtered set. \`--transcript\` is single-participant
|
|
2580
|
+
and **errors when any filter or \`--group-by\` is set** (exit 2).
|
|
2581
|
+
|
|
2582
|
+
## Defensive handling of nullable fields
|
|
2583
|
+
|
|
2584
|
+
- \`interaction.sentiment\` is nullable (chat failure stubs,
|
|
2585
|
+
pre-sentiment rows). Dropped **only** when \`--sentiment\` is set; kept
|
|
2586
|
+
by every other filter.
|
|
2587
|
+
- \`interaction.frame_version_id\` is nullable on interactive studies
|
|
2588
|
+
(degraded captures, ~12% on a failing iteration). Dropped by
|
|
2589
|
+
\`--frame\` unless \`--include-unmatched\` is passed; surfaced as a
|
|
2590
|
+
\`_unmatched\` bucket in \`--group-by frame\`.
|
|
2591
|
+
- Chat \`bot_reply.failure\` rows are kept in the default envelope,
|
|
2592
|
+
dropped by \`--sentiment\` (they have \`sentiment: null\`), kept by
|
|
2593
|
+
\`--actor\`, visible in \`--group-by turn\` under a \`failures\`
|
|
2594
|
+
counter.
|
|
2595
|
+
|
|
2596
|
+
## --frame resolution
|
|
2597
|
+
|
|
2598
|
+
\`--frame login\` walks the frame list returned by
|
|
2599
|
+
\`GET /studies/{id}/frames\` and matches **case-insensitive substring**
|
|
2600
|
+
against the frame name. Other accepted shapes:
|
|
2601
|
+
|
|
2602
|
+
- \`--frame 6ec…\` — full Frame UUID (exact match)
|
|
2603
|
+
- \`--frame f-6ec\` — short alias resolved via \`alias-store\`
|
|
2604
|
+
- \`--frame 7ec…\` — a \`frame_version_id\` UUID (matches only that version)
|
|
2605
|
+
|
|
2606
|
+
Ambiguous substring (matches >1 frame) errors with the candidate list:
|
|
2607
|
+
|
|
2608
|
+
\`\`\`
|
|
2609
|
+
ish study results s-b2c --frame log
|
|
2610
|
+
# Error: --frame "log" is ambiguous — matched 2 frames: Login, Logout.
|
|
2611
|
+
# Use a more specific substring, a full Frame UUID, or an \`f-…\` alias.
|
|
2612
|
+
\`\`\`
|
|
2613
|
+
|
|
2614
|
+
No match at all errors and lists the available frame names.
|
|
2615
|
+
|
|
2616
|
+
## Common --get paths on a sliced envelope
|
|
2617
|
+
|
|
2618
|
+
\`\`\`
|
|
2619
|
+
# Sanity-check coverage:
|
|
2620
|
+
--get totals_unfiltered.participant_count
|
|
2621
|
+
--get totals_unfiltered.interaction_count
|
|
2622
|
+
|
|
2623
|
+
# Per-iteration projection:
|
|
2624
|
+
--get slices.iteration_label # one label per line
|
|
2625
|
+
--get slices.0.participant_count
|
|
2626
|
+
--get slices.0.sentiment
|
|
2627
|
+
|
|
2628
|
+
# Per-frame / per-segment / per-turn (bare array):
|
|
2629
|
+
--get 0.frame_label
|
|
2630
|
+
--get 0.segment_index
|
|
2631
|
+
--get 0.sentiment_histogram
|
|
2632
|
+
|
|
2633
|
+
# Per-step:
|
|
2634
|
+
--get 0.rate
|
|
2635
|
+
--get 0.participant_verdicts.verdict # one verdict per participant
|
|
2636
|
+
\`\`\`
|
|
2637
|
+
|
|
2638
|
+
## Related
|
|
2639
|
+
|
|
2640
|
+
- \`concepts/study\` — the parent artifact whose results are being sliced.
|
|
2641
|
+
- \`concepts/assignment\` — defines the steps that \`--step\` and
|
|
2642
|
+
\`--group-by step\` filter against.
|
|
2643
|
+
- \`reference/json-mode\` — display vs capture vs chain output rules
|
|
2644
|
+
(\`--get\`, \`--fields\`, exit codes).
|
|
2645
|
+
- \`reference/aliases\` — \`s-…\` for studies, \`pt-…\` for participants,
|
|
2646
|
+
\`f-…\` for frames. Any UUID-accepting flag also accepts the alias.
|
|
2464
2647
|
`;
|
|
2465
2648
|
const GUIDE_FIRST_STUDY = `# guide: your first study, end to end
|
|
2466
2649
|
|
|
@@ -4053,6 +4236,12 @@ const PAGES = [
|
|
|
4053
4236
|
description: "Login → workspace → people → study → iteration → run → results.",
|
|
4054
4237
|
body: GUIDE_FIRST_STUDY,
|
|
4055
4238
|
},
|
|
4239
|
+
{
|
|
4240
|
+
slug: "guides/slicing-results",
|
|
4241
|
+
title: "guide: slicing study results by frame / segment / turn / sentiment",
|
|
4242
|
+
description: "Filter and project `ish study results` — --frame, --segment, --turn, --side, --assignment, --step, --sentiment, --actor, --iteration, --participant; --group-by iteration|frame|segment|turn|assignment|step; totals_unfiltered + empty-slice contract.",
|
|
4243
|
+
body: GUIDE_SLICING_RESULTS,
|
|
4244
|
+
},
|
|
4056
4245
|
{
|
|
4057
4246
|
slug: "guides/chat",
|
|
4058
4247
|
title: "guide: chat-modality studies",
|
package/dist/lib/output.d.ts
CHANGED
|
@@ -48,6 +48,12 @@ export declare function formatWorkspaceDetail(workspace: Record<string, unknown>
|
|
|
48
48
|
export declare function formatSiteAccessStatus(summary: import("./site-access.js").SiteAccessSummary, json: boolean): void;
|
|
49
49
|
export declare function formatStudyList(studies: Record<string, unknown>[], json: boolean): void;
|
|
50
50
|
export declare function formatStudyDetail(study: Record<string, unknown>, json: boolean, options?: OutputOptions, participants?: ReadonlyArray<Record<string, unknown>>): void;
|
|
51
|
+
/**
|
|
52
|
+
* Stable JSON envelope for `study results`. Schema is fixed regardless of
|
|
53
|
+
* study state — fields default to `null`, `0`, or `[]` when nothing has run.
|
|
54
|
+
* Agents can rely on the keys always being present (M4).
|
|
55
|
+
*/
|
|
56
|
+
export declare function buildStudyResultsEnvelope(study: Record<string, unknown>, participants: ReadonlyArray<Record<string, unknown>>): Record<string, unknown>;
|
|
51
57
|
export declare function formatStudyResults(study: Record<string, unknown>, participants: ReadonlyArray<Record<string, unknown>>, json: boolean): void;
|
|
52
58
|
/**
|
|
53
59
|
* `study results --summary` projection. Drops interview_answers + per-participant
|
|
@@ -102,3 +108,15 @@ export declare function deriveWinnerConfidence(args: {
|
|
|
102
108
|
}): "low" | "medium" | "high";
|
|
103
109
|
export declare function formatAskResults(ask: Record<string, unknown>, json: boolean, roundFilter?: number): void;
|
|
104
110
|
export declare function formatConfigList(configs: Record<string, unknown>[], json: boolean): void;
|
|
111
|
+
export type StudyResultsGroupByKind = "iteration" | "frame" | "segment" | "turn" | "assignment" | "step";
|
|
112
|
+
/**
|
|
113
|
+
* Render a `--group-by <kind>` projection. JSON mode is a thin pass-through
|
|
114
|
+
* to jsonOutput with `preProjected: true` so the lean transform doesn't
|
|
115
|
+
* strip our stable empties. Human mode renders one section per slice plus
|
|
116
|
+
* a small ASCII sentiment histogram.
|
|
117
|
+
*
|
|
118
|
+
* The renderer accepts both the wrapped `{study, slices, ...}` shape (per-
|
|
119
|
+
* iteration) and the bare-array shape (every other --group-by); the
|
|
120
|
+
* surface (T5) doesn't need to know the difference.
|
|
121
|
+
*/
|
|
122
|
+
export declare function formatStudyResultsGroupBy(projection: unknown, kind: StudyResultsGroupByKind, json: boolean): void;
|