npm - @ishlabs/cli - Versions diffs - 0.20.0 → 0.21.0 - Mend

@ishlabs/cli 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/commands/study.js +250 -13
package/dist/lib/alias-store.d.ts +1 -0
package/dist/lib/alias-store.js +2 -0
package/dist/lib/docs.js +190 -1
package/dist/lib/output.d.ts +18 -0
package/dist/lib/output.js +217 -1
package/dist/lib/skill-content.js +68 -0
package/dist/lib/study-participants.d.ts +13 -0
package/dist/lib/study-participants.js +13 -0
package/dist/lib/study-results-filters.d.ts +91 -0
package/dist/lib/study-results-filters.js +538 -0
package/dist/lib/study-results-projections.d.ts +122 -0
package/dist/lib/study-results-projections.js +577 -0
package/package.json +1 -1

package/dist/lib/study-results-projections.js ADDED Viewed

@@ -0,0 +1,577 @@
+/**
+ * Pure projection builders for `ish study results --group-by <kind>`.
+ *
+ * Each `buildStudyResultsPer<Kind>` consumes a `FilteredResults` (the output
+ * of `applyResultsFilters` in `study-results-filters.ts`) and returns a
+ * plain JSON-serialisable value. The surface (T5) hands the result to
+ * `output(..., json, { preProjected: true })` for JSON, or to
+ * `formatStudyResultsGroupBy` (T6) for human mode.
+ *
+ * Per-iteration is the only projection that wraps a `{study, slices, ...}`
+ * envelope; the others return plain arrays of slice objects. The surface
+ * attaches `totals_unfiltered` and `warnings` from the same `FilteredResults`
+ * for the array projections.
+ *
+ * Conventions mirror `buildStudyResultsEnvelope` (`output.ts:1081`) and
+ * `buildStudyResultsSummary` (`output.ts:1292`):
+ *   - deterministic field order (object literals are emitted in source order)
+ *   - stable empties: empty arrays, never `null` for "no rows yet"
+ *   - sample_comments capped at 5 per group, truncated to 200 chars
+ *   - sentiment histograms are { label → count } records
+ *   - participant_aliases capped at 10 per group
+ *
+ * Has no IO and no console side-effects.
+ */
+import { deterministicAlias, ALIAS_PREFIX } from "./alias-store.js";
+const SAMPLE_COMMENT_CAP = 5;
+const SAMPLE_COMMENT_MAX_LEN = 200;
+const PARTICIPANT_ALIAS_CAP = 10;
+const SAMPLE_REPLY_CAP = 5;
+const SAMPLE_REPLY_MAX_LEN = 200;
+const UNMATCHED_BUCKET = "_unmatched";
+// ---------- helpers ---------------------------------------------------------
+function asRecord(v) {
+    return v && typeof v === "object" && !Array.isArray(v)
+        ? v
+        : null;
+}
+function asArray(v) {
+    return Array.isArray(v) ? v : [];
+}
+function asString(v) {
+    return typeof v === "string" && v.length > 0 ? v : null;
+}
+function truncate(str, maxLen) {
+    if (str.length <= maxLen)
+        return str;
+    return str.slice(0, maxLen - 3) + "...";
+}
+function participantAlias(participant) {
+    const id = asString(participant.id);
+    return id ? deterministicAlias(ALIAS_PREFIX.participant, id) : null;
+}
+function studyHeader(filtered) {
+    const study = filtered.study;
+    const id = asString(study.id);
+    return {
+        alias: id ? deterministicAlias(ALIAS_PREFIX.study, id) : null,
+        name: asString(study.name) ?? null,
+        modality: asString(study.modality) ?? null,
+    };
+}
+function readSentimentLabel(interaction) {
+    const s = asRecord(interaction.sentiment);
+    return s ? asString(s.label) : null;
+}
+function readEngagement(interaction) {
+    // Media interactions carry `engagement` either as a top-level string
+    // ("engaged" | "drifted" | "abandoned") or as an object wrapping a `level`.
+    const e = interaction.engagement;
+    if (typeof e === "string" && e.length > 0)
+        return e;
+    const er = asRecord(e);
+    if (!er)
+        return null;
+    return asString(er.level) ?? asString(er.label);
+}
+function firstActionData(interaction) {
+    const actions = asArray(interaction.actions);
+    if (actions.length === 0)
+        return {};
+    const first = asRecord(actions[0]);
+    if (!first)
+        return {};
+    return asRecord(first.data) ?? {};
+}
+function pushSentiment(hist, label) {
+    if (!label)
+        return;
+    hist[label] = (hist[label] ?? 0) + 1;
+}
+function pushEngagement(hist, label) {
+    if (!label)
+        return;
+    hist[label] = (hist[label] ?? 0) + 1;
+}
+function collectComment(bucket, interaction) {
+    if (bucket.length >= SAMPLE_COMMENT_CAP)
+        return;
+    const c = asString(interaction.comment);
+    if (!c)
+        return;
+    bucket.push(truncate(c, SAMPLE_COMMENT_MAX_LEN));
+}
+function collectParticipantAlias(bucket, seen, participant) {
+    if (bucket.length >= PARTICIPANT_ALIAS_CAP)
+        return;
+    const alias = participantAlias(participant);
+    if (!alias || seen.has(alias))
+        return;
+    seen.add(alias);
+    bucket.push(alias);
+}
+/**
+ * `--group-by iteration` — one slice per iteration that has any surviving
+ * participants. Slices are ordered by the iteration order on the study
+ * (so callers see them in the same order as `ish study get`).
+ *
+ * Unlike the array-returning projections, this one wraps a stable envelope
+ * with `totals_unfiltered` + `warnings` because per-iteration is the
+ * "default agent slice" — the one most likely to be piped directly without
+ * the surface re-wrapping.
+ */
+export function buildStudyResultsPerIteration(filtered) {
+    const iterations = asArray(filtered.study.iterations);
+    const order = [];
+    for (const raw of iterations) {
+        const r = asRecord(raw);
+        const id = r ? asString(r.id) : null;
+        if (!id)
+            continue;
+        order.push({ id, label: r ? asString(r.label) : null });
+    }
+    const byIteration = new Map();
+    const actionCounts = new Map();
+    for (const o of order) {
+        byIteration.set(o.id, {
+            iteration_id: o.id,
+            iteration_label: o.label,
+            participant_count: 0,
+            interaction_count: 0,
+            sentiment: {},
+            sample_comments: [],
+            top_actions: [],
+        });
+        actionCounts.set(o.id, new Map());
+    }
+    for (const p of filtered.participants) {
+        const iterId = asString(p.iteration_id);
+        if (!iterId)
+            continue;
+        const slice = byIteration.get(iterId);
+        if (!slice)
+            continue;
+        slice.participant_count += 1;
+        for (const raw of asArray(p.interactions)) {
+            const ix = asRecord(raw);
+            if (!ix)
+                continue;
+            slice.interaction_count += 1;
+            pushSentiment(slice.sentiment, readSentimentLabel(ix));
+            collectComment(slice.sample_comments, ix);
+            const counter = actionCounts.get(iterId);
+            for (const araw of asArray(ix.actions)) {
+                const a = asRecord(araw);
+                const at = a ? asString(a.action_type) : null;
+                if (!at)
+                    continue;
+                counter.set(at, (counter.get(at) ?? 0) + 1);
+            }
+        }
+    }
+    for (const [iterId, counter] of actionCounts) {
+        const rows = Array.from(counter.entries())
+            .sort((a, b) => (b[1] - a[1]) || a[0].localeCompare(b[0]))
+            .slice(0, 5)
+            .map(([action_type, count]) => ({ action_type, count }));
+        byIteration.get(iterId).top_actions = rows;
+    }
+    // Keep every declared iteration as a slice so the consumer sees the full
+    // matrix at stable size. Iterations with zero surviving rows still appear
+    // with `participant_count: 0` — useful for "matched X / Y" framing.
+    const slices = order.map((o) => byIteration.get(o.id));
+    return {
+        study: studyHeader(filtered),
+        slices,
+        totals_unfiltered: filtered.totals_unfiltered,
+        warnings: filtered.warnings,
+    };
+}
+/**
+ * `--group-by frame` — one slice per Frame that had a surviving interaction.
+ * Interactive only — the surface (T5) errors before reaching here when the
+ * study isn't interactive. Includes a synthetic `_unmatched` bucket when
+ * `--include-unmatched` was set and null-frame_version_id rows survived.
+ *
+ * Returns a bare array (no wrapper) — callers attach totals_unfiltered.
+ */
+export function buildStudyResultsPerFrame(filtered) {
+    const byFrame = new Map();
+    const seenAliasesPerFrame = new Map();
+    const ensureSlice = (frameId, label) => {
+        let slice = byFrame.get(frameId);
+        if (!slice) {
+            slice = {
+                frame_id: frameId,
+                frame_label: label,
+                interaction_count: 0,
+                sentiment_histogram: {},
+                sample_comments: [],
+                participant_aliases: [],
+            };
+            byFrame.set(frameId, slice);
+            seenAliasesPerFrame.set(frameId, new Set());
+        }
+        else if (slice.frame_label === null && label !== null) {
+            slice.frame_label = label;
+        }
+        return slice;
+    };
+    for (const p of filtered.participants) {
+        for (const raw of asArray(p.interactions)) {
+            const ix = asRecord(raw);
+            if (!ix)
+                continue;
+            const fvId = asString(ix.frame_version_id);
+            let frameId;
+            let label;
+            if (!fvId) {
+                frameId = UNMATCHED_BUCKET;
+                label = null;
+            }
+            else {
+                const lookup = filtered.frameVersionLookup.get(fvId);
+                if (lookup) {
+                    frameId = lookup.frame_id;
+                    label = lookup.frame_label;
+                }
+                else {
+                    // Defensive: a surviving fv_id with no lookup entry (when no
+                    // --frame was passed but a caller still asks for --group-by
+                    // frame). Bucket by frame_version_id so grouping stays meaningful.
+                    frameId = fvId;
+                    label = null;
+                }
+            }
+            const slice = ensureSlice(frameId, label);
+            slice.interaction_count += 1;
+            pushSentiment(slice.sentiment_histogram, readSentimentLabel(ix));
+            collectComment(slice.sample_comments, ix);
+            collectParticipantAlias(slice.participant_aliases, seenAliasesPerFrame.get(frameId), p);
+        }
+    }
+    // Stable ordering: named frames first (alphabetical by label, then frame_id
+    // for label-less), then `_unmatched` at the end.
+    return Array.from(byFrame.values()).sort((a, b) => {
+        if (a.frame_id === UNMATCHED_BUCKET)
+            return 1;
+        if (b.frame_id === UNMATCHED_BUCKET)
+            return -1;
+        const al = a.frame_label ?? "";
+        const bl = b.frame_label ?? "";
+        if (al !== bl)
+            return al.localeCompare(bl);
+        return a.frame_id.localeCompare(b.frame_id);
+    });
+}
+/**
+ * `--group-by segment` — media studies (video / audio / text / document).
+ * Groups by `actions[0].data.segment_index`, falling back to `segment_label`
+ * when the index isn't present.
+ */
+export function buildStudyResultsPerSegment(filtered) {
+    const byKey = new Map();
+    for (const p of filtered.participants) {
+        for (const raw of asArray(p.interactions)) {
+            const ix = asRecord(raw);
+            if (!ix)
+                continue;
+            const data = firstActionData(ix);
+            const idx = typeof data.segment_index === "number" ? data.segment_index : null;
+            const label = asString(data.segment_label);
+            if (idx === null && label === null)
+                continue;
+            const key = `${idx ?? "_"}|${label ?? "_"}`;
+            let slice = byKey.get(key);
+            if (!slice) {
+                slice = {
+                    segment_index: idx,
+                    segment_label: label,
+                    interaction_count: 0,
+                    sentiment_histogram: {},
+                    engagement_histogram: {},
+                    sample_comments: [],
+                };
+                byKey.set(key, slice);
+            }
+            slice.interaction_count += 1;
+            pushSentiment(slice.sentiment_histogram, readSentimentLabel(ix));
+            pushEngagement(slice.engagement_histogram, readEngagement(ix));
+            collectComment(slice.sample_comments, ix);
+        }
+    }
+    return Array.from(byKey.values()).sort((a, b) => {
+        if (a.segment_index === null && b.segment_index === null) {
+            return (a.segment_label ?? "").localeCompare(b.segment_label ?? "");
+        }
+        if (a.segment_index === null)
+            return 1;
+        if (b.segment_index === null)
+            return -1;
+        if (a.segment_index !== b.segment_index) {
+            return a.segment_index - b.segment_index;
+        }
+        return (a.segment_label ?? "").localeCompare(b.segment_label ?? "");
+    });
+}
+/**
+ * `--group-by turn` — chat studies. Groups by `actions[0].data.turn_index`
+ * and surfaces both a count of bot-failure stubs (`bot_reply.failure`
+ * populated) and up to 5 sample bot replies per turn.
+ */
+export function buildStudyResultsPerTurn(filtered) {
+    const byTurn = new Map();
+    for (const p of filtered.participants) {
+        for (const raw of asArray(p.interactions)) {
+            const ix = asRecord(raw);
+            if (!ix)
+                continue;
+            const data = firstActionData(ix);
+            if (typeof data.turn_index !== "number")
+                continue;
+            const turn = data.turn_index;
+            let slice = byTurn.get(turn);
+            if (!slice) {
+                slice = {
+                    turn_index: turn,
+                    interaction_count: 0,
+                    sentiment_histogram: {},
+                    sample_replies: [],
+                    failures: 0,
+                };
+                byTurn.set(turn, slice);
+            }
+            slice.interaction_count += 1;
+            pushSentiment(slice.sentiment_histogram, readSentimentLabel(ix));
+            const botReply = asRecord(ix.bot_reply);
+            if (botReply) {
+                if (asRecord(botReply.failure)) {
+                    slice.failures += 1;
+                }
+                else {
+                    const text = asString(botReply.text);
+                    if (text && slice.sample_replies.length < SAMPLE_REPLY_CAP) {
+                        slice.sample_replies.push(truncate(text, SAMPLE_REPLY_MAX_LEN));
+                    }
+                }
+            }
+        }
+    }
+    return Array.from(byTurn.values()).sort((a, b) => a.turn_index - b.turn_index);
+}
+/**
+ * `--group-by assignment` — one slice per study assignment, with each
+ * assignment's `step_completion[]` (from the study payload) attached so the
+ * caller can see pass / inconclusive / fail rollups inline.
+ */
+export function buildStudyResultsPerAssignment(filtered) {
+    const assignments = asArray(filtered.study.assignments);
+    const order = [];
+    const stepCompletionById = new Map();
+    const nameById = new Map();
+    for (const raw of assignments) {
+        const a = asRecord(raw);
+        if (!a)
+            continue;
+        const id = asString(a.id);
+        if (!id)
+            continue;
+        const name = asString(a.name);
+        const sc = asArray(a.step_completion);
+        order.push({ id, name, step_completion: sc });
+        stepCompletionById.set(id, sc);
+        nameById.set(id, name);
+    }
+    const byAssignment = new Map();
+    const ensure = (id) => {
+        let slice = byAssignment.get(id);
+        if (!slice) {
+            slice = {
+                assignment_id: id,
+                assignment_name: nameById.get(id) ?? null,
+                interaction_count: 0,
+                sentiment_histogram: {},
+                step_completion: stepCompletionById.get(id) ?? [],
+            };
+            byAssignment.set(id, slice);
+        }
+        return slice;
+    };
+    // Seed every declared assignment so the caller sees the full matrix even
+    // when a filter wipes some out (interaction_count: 0, but step_completion
+    // still visible for context). This matches the per-iteration convention.
+    for (const o of order)
+        ensure(o.id);
+    for (const p of filtered.participants) {
+        for (const raw of asArray(p.interactions)) {
+            const ix = asRecord(raw);
+            if (!ix)
+                continue;
+            const aid = asString(ix.assignment_id);
+            if (!aid)
+                continue;
+            const slice = ensure(aid);
+            slice.interaction_count += 1;
+            pushSentiment(slice.sentiment_histogram, readSentimentLabel(ix));
+        }
+    }
+    const ordered = [];
+    const consumed = new Set();
+    for (const o of order) {
+        const slice = byAssignment.get(o.id);
+        if (slice) {
+            ordered.push(slice);
+            consumed.add(o.id);
+        }
+    }
+    for (const [id, slice] of byAssignment) {
+        if (!consumed.has(id))
+            ordered.push(slice);
+    }
+    return ordered;
+}
+/**
+ * `--group-by step` — one slice per `(assignment, step_id)` pair with verdict
+ * totals (re-derived from surviving participants, NOT the pre-computed
+ * step_completion) and per-participant verdict rows inline.
+ *
+ * Re-deriving totals matters when filters are applied: e.g. a caller asking
+ * for `--iteration B --group-by step` wants verdict counts for iteration B
+ * only, not the study-wide rollup.
+ */
+export function buildStudyResultsPerStep(filtered) {
+    const assignmentNameById = new Map();
+    const stepNameByKey = new Map();
+    for (const raw of asArray(filtered.study.assignments)) {
+        const a = asRecord(raw);
+        if (!a)
+            continue;
+        const aid = asString(a.id);
+        if (!aid)
+            continue;
+        assignmentNameById.set(aid, asString(a.name));
+        for (const sraw of asArray(a.steps)) {
+            const s = asRecord(sraw);
+            if (!s)
+                continue;
+            const sid = asString(s.id);
+            if (!sid)
+                continue;
+            stepNameByKey.set(`${aid}|${sid}`, asString(s.name));
+        }
+    }
+    const byKey = new Map();
+    const ensureSlice = (aid, sid, fallbackStepName) => {
+        const key = `${aid}|${sid}`;
+        let slice = byKey.get(key);
+        if (!slice) {
+            slice = {
+                assignment_id: aid,
+                assignment_name: assignmentNameById.get(aid) ?? null,
+                step_id: sid,
+                step_name: stepNameByKey.get(key) ?? fallbackStepName,
+                total: 0,
+                passed: 0,
+                inconclusive: 0,
+                failed: 0,
+                rate: 0,
+                participant_verdicts: [],
+            };
+            byKey.set(key, slice);
+        }
+        return slice;
+    };
+    // Seed slices from the declared study so steps with zero surviving
+    // verdicts still surface — gives "0/0 passed" rather than missing rows.
+    for (const raw of asArray(filtered.study.assignments)) {
+        const a = asRecord(raw);
+        const aid = a ? asString(a.id) : null;
+        if (!aid)
+            continue;
+        for (const sraw of asArray(a?.steps)) {
+            const s = asRecord(sraw);
+            const sid = s ? asString(s.id) : null;
+            if (!sid)
+                continue;
+            ensureSlice(aid, sid, s ? asString(s.name) : null);
+        }
+    }
+    for (const p of filtered.participants) {
+        const alias = participantAlias(p);
+        for (const paRaw of asArray(p.participant_assignments)) {
+            const pa = asRecord(paRaw);
+            if (!pa)
+                continue;
+            const aid = asString(pa.assignment_id);
+            if (!aid)
+                continue;
+            for (const srRaw of asArray(pa.step_results)) {
+                const sr = asRecord(srRaw);
+                if (!sr)
+                    continue;
+                const sid = asString(sr.step_id);
+                if (!sid)
+                    continue;
+                const slice = ensureSlice(aid, sid, asString(sr.name));
+                const verdict = asString(sr.verdict);
+                slice.total += 1;
+                if (verdict === "passed" || verdict === "pass")
+                    slice.passed += 1;
+                else if (verdict === "inconclusive")
+                    slice.inconclusive += 1;
+                else if (verdict === "failed" || verdict === "fail")
+                    slice.failed += 1;
+                const evidence = [];
+                for (const eid of asArray(sr.evidence_interaction_ids)) {
+                    const eidStr = asString(eid);
+                    if (eidStr)
+                        evidence.push(eidStr);
+                }
+                slice.participant_verdicts.push({
+                    participant_alias: alias,
+                    verdict,
+                    reason: asString(sr.reason),
+                    evidence_interaction_ids: evidence,
+                });
+            }
+        }
+    }
+    const declaredOrder = [];
+    for (const raw of asArray(filtered.study.assignments)) {
+        const a = asRecord(raw);
+        if (!a)
+            continue;
+        const aid = asString(a.id);
+        if (!aid)
+            continue;
+        for (const sraw of asArray(a.steps)) {
+            const s = asRecord(sraw);
+            const sid = s ? asString(s.id) : null;
+            if (sid)
+                declaredOrder.push(`${aid}|${sid}`);
+        }
+    }
+    const ordered = [];
+    const consumed = new Set();
+    for (const key of declaredOrder) {
+        const slice = byKey.get(key);
+        if (slice) {
+            slice.rate = slice.total > 0
+                ? Math.round((slice.passed / slice.total) * 100) / 100
+                : 0;
+            slice.participant_verdicts.sort((a, b) => (a.participant_alias ?? "").localeCompare(b.participant_alias ?? ""));
+            ordered.push(slice);
+            consumed.add(key);
+        }
+    }
+    for (const [key, slice] of byKey) {
+        if (consumed.has(key))
+            continue;
+        slice.rate = slice.total > 0
+            ? Math.round((slice.passed / slice.total) * 100) / 100
+            : 0;
+        slice.participant_verdicts.sort((a, b) => (a.participant_alias ?? "").localeCompare(b.participant_alias ?? ""));
+        ordered.push(slice);
+    }
+    return ordered;
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ishlabs/cli",
-  "version": "0.20.0",
+  "version": "0.21.0",
   "description": "The command-line interface for ish",
   "type": "module",
   "bin": {