@ishlabs/cli 0.20.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/chat.js +2 -2
- package/dist/commands/config.js +17 -3
- package/dist/commands/source.js +1 -1
- package/dist/commands/study-analyze.js +15 -2
- package/dist/commands/study-participant.js +19 -0
- package/dist/commands/study.js +313 -14
- package/dist/lib/alias-store.d.ts +1 -0
- package/dist/lib/alias-store.js +2 -0
- package/dist/lib/command-helpers.js +4 -3
- package/dist/lib/docs.js +232 -15
- package/dist/lib/output.d.ts +24 -1
- package/dist/lib/output.js +290 -2
- package/dist/lib/skill-content.js +76 -0
- package/dist/lib/study-participants.d.ts +13 -0
- package/dist/lib/study-participants.js +13 -0
- package/dist/lib/study-results-filters.d.ts +91 -0
- package/dist/lib/study-results-filters.js +559 -0
- package/dist/lib/study-results-projections.d.ts +152 -0
- package/dist/lib/study-results-projections.js +580 -0
- package/package.json +1 -1
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure projection builders for `ish study results --group-by <kind>`.
|
|
3
|
+
*
|
|
4
|
+
* Each `buildStudyResultsPer<Kind>` consumes a `FilteredResults` (the output
|
|
5
|
+
* of `applyResultsFilters` in `study-results-filters.ts`) and returns a
|
|
6
|
+
* bare array of slice objects. The surface (`commands/study.ts`) wraps the
|
|
7
|
+
* array uniformly in a `SliceResponse` envelope alongside `totals_unfiltered`,
|
|
8
|
+
* `modality_warnings`, `study_id`, and `modality` before handing it off to
|
|
9
|
+
* `formatStudyResultsGroupBy` for JSON or human rendering.
|
|
10
|
+
*
|
|
11
|
+
* Conventions mirror `buildStudyResultsEnvelope` (`output.ts:1081`) and
|
|
12
|
+
* `buildStudyResultsSummary` (`output.ts:1292`):
|
|
13
|
+
* - deterministic field order (object literals are emitted in source order)
|
|
14
|
+
* - stable empties: empty arrays, never `null` for "no rows yet"
|
|
15
|
+
* - sample_comments capped at 5 per group, truncated to 200 chars
|
|
16
|
+
* - sentiment histograms are { label → count } records
|
|
17
|
+
* - participant_aliases capped at 10 per group
|
|
18
|
+
*
|
|
19
|
+
* Has no IO and no console side-effects.
|
|
20
|
+
*/
|
|
21
|
+
import type { FilteredResults } from "./study-results-filters.js";
|
|
22
|
+
import type { StudyResultsGroupByKind } from "./output.js";
|
|
23
|
+
export type { FilteredResults } from "./study-results-filters.js";
|
|
24
|
+
/**
|
|
25
|
+
* Uniform envelope emitted for every `ish study results --group-by <axis>`
|
|
26
|
+
* call, mirroring the MCP backend's `SliceResponse[T]`. Six top-level keys,
|
|
27
|
+
* stable across all six axes — `rows` carries the bare slice array returned
|
|
28
|
+
* by the matching `buildStudyResultsPer<Kind>` builder.
|
|
29
|
+
*/
|
|
30
|
+
export interface SliceResponse<T> {
|
|
31
|
+
axis: StudyResultsGroupByKind;
|
|
32
|
+
rows: T[];
|
|
33
|
+
totals_unfiltered: {
|
|
34
|
+
participant_count: number;
|
|
35
|
+
interaction_count: number;
|
|
36
|
+
};
|
|
37
|
+
modality_warnings: string[];
|
|
38
|
+
study_id: string;
|
|
39
|
+
modality: string;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Wrap a bare projection array in the uniform `SliceResponse` envelope.
|
|
43
|
+
* The surface calls this once after dispatching to one of the six
|
|
44
|
+
* `buildStudyResultsPer<Kind>` builders, then hands the envelope to
|
|
45
|
+
* `formatStudyResultsGroupBy`.
|
|
46
|
+
*/
|
|
47
|
+
export declare function wrapSliceProjection<T>(filtered: FilteredResults, axis: StudyResultsGroupByKind, rows: T[], studyId: string, modality: string): SliceResponse<T>;
|
|
48
|
+
interface IterationSlice {
|
|
49
|
+
iteration_id: string;
|
|
50
|
+
iteration_label: string | null;
|
|
51
|
+
participant_count: number;
|
|
52
|
+
interaction_count: number;
|
|
53
|
+
sentiment: Record<string, number>;
|
|
54
|
+
sample_comments: string[];
|
|
55
|
+
top_actions: Array<{
|
|
56
|
+
action_type: string;
|
|
57
|
+
count: number;
|
|
58
|
+
}>;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* `--group-by iteration` — one slice per declared iteration, in the same
|
|
62
|
+
* order as `ish study get`. Iterations with zero surviving participants
|
|
63
|
+
* still appear with `participant_count: 0` so the consumer sees the full
|
|
64
|
+
* matrix at stable size. Returns a bare array; the surface wraps it in
|
|
65
|
+
* the uniform `SliceResponse` envelope.
|
|
66
|
+
*/
|
|
67
|
+
export declare function buildStudyResultsPerIteration(filtered: FilteredResults): IterationSlice[];
|
|
68
|
+
interface FrameSlice {
|
|
69
|
+
frame_id: string;
|
|
70
|
+
frame_label: string | null;
|
|
71
|
+
interaction_count: number;
|
|
72
|
+
sentiment_histogram: Record<string, number>;
|
|
73
|
+
sample_comments: string[];
|
|
74
|
+
participant_aliases: string[];
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* `--group-by frame` — one slice per Frame that had a surviving interaction.
|
|
78
|
+
* Interactive only — the surface (T5) errors before reaching here when the
|
|
79
|
+
* study isn't interactive. Includes a synthetic `_unmatched` bucket when
|
|
80
|
+
* `--include-unmatched` was set and null-frame_version_id rows survived.
|
|
81
|
+
*
|
|
82
|
+
* Returns a bare array (no wrapper) — callers attach totals_unfiltered.
|
|
83
|
+
*/
|
|
84
|
+
export declare function buildStudyResultsPerFrame(filtered: FilteredResults): FrameSlice[];
|
|
85
|
+
interface SegmentSlice {
|
|
86
|
+
segment_index: number | null;
|
|
87
|
+
segment_label: string | null;
|
|
88
|
+
interaction_count: number;
|
|
89
|
+
sentiment_histogram: Record<string, number>;
|
|
90
|
+
engagement_histogram: Record<string, number>;
|
|
91
|
+
sample_comments: string[];
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* `--group-by segment` — media studies (video / audio / text / document).
|
|
95
|
+
* Groups by `actions[0].data.segment_index`, falling back to `segment_label`
|
|
96
|
+
* when the index isn't present.
|
|
97
|
+
*/
|
|
98
|
+
export declare function buildStudyResultsPerSegment(filtered: FilteredResults): SegmentSlice[];
|
|
99
|
+
interface TurnSlice {
|
|
100
|
+
turn_index: number;
|
|
101
|
+
interaction_count: number;
|
|
102
|
+
sentiment_histogram: Record<string, number>;
|
|
103
|
+
sample_replies: string[];
|
|
104
|
+
failures: number;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* `--group-by turn` — chat studies. Groups by `actions[0].data.turn_index`
|
|
108
|
+
* and surfaces both a count of bot-failure stubs (`bot_reply.failure`
|
|
109
|
+
* populated) and up to 5 sample bot replies per turn.
|
|
110
|
+
*/
|
|
111
|
+
export declare function buildStudyResultsPerTurn(filtered: FilteredResults): TurnSlice[];
|
|
112
|
+
interface AssignmentSlice {
|
|
113
|
+
assignment_id: string;
|
|
114
|
+
assignment_name: string | null;
|
|
115
|
+
interaction_count: number;
|
|
116
|
+
sentiment_histogram: Record<string, number>;
|
|
117
|
+
step_completion: unknown[];
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* `--group-by assignment` — one slice per study assignment, with each
|
|
121
|
+
* assignment's `step_completion[]` (from the study payload) attached so the
|
|
122
|
+
* caller can see pass / inconclusive / fail rollups inline.
|
|
123
|
+
*/
|
|
124
|
+
export declare function buildStudyResultsPerAssignment(filtered: FilteredResults): AssignmentSlice[];
|
|
125
|
+
interface StepVerdict {
|
|
126
|
+
participant_alias: string | null;
|
|
127
|
+
verdict: string | null;
|
|
128
|
+
reason: string | null;
|
|
129
|
+
evidence_interaction_ids: string[];
|
|
130
|
+
}
|
|
131
|
+
interface StepSlice {
|
|
132
|
+
assignment_id: string;
|
|
133
|
+
assignment_name: string | null;
|
|
134
|
+
step_id: string;
|
|
135
|
+
step_name: string | null;
|
|
136
|
+
total: number;
|
|
137
|
+
passed: number;
|
|
138
|
+
inconclusive: number;
|
|
139
|
+
failed: number;
|
|
140
|
+
rate: number;
|
|
141
|
+
participant_verdicts: StepVerdict[];
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* `--group-by step` — one slice per `(assignment, step_id)` pair with verdict
|
|
145
|
+
* totals (re-derived from surviving participants, NOT the pre-computed
|
|
146
|
+
* step_completion) and per-participant verdict rows inline.
|
|
147
|
+
*
|
|
148
|
+
* Re-deriving totals matters when filters are applied: e.g. a caller asking
|
|
149
|
+
* for `--iteration B --group-by step` wants verdict counts for iteration B
|
|
150
|
+
* only, not the study-wide rollup.
|
|
151
|
+
*/
|
|
152
|
+
export declare function buildStudyResultsPerStep(filtered: FilteredResults): StepSlice[];
|