npm - @ishlabs/cli - Versions diffs - 0.20.0 → 0.22.0 - Mend

@ishlabs/cli 0.20.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/dist/commands/chat.js +2 -2
package/dist/commands/config.js +17 -3
package/dist/commands/source.js +1 -1
package/dist/commands/study-analyze.js +15 -2
package/dist/commands/study-participant.js +19 -0
package/dist/commands/study.js +313 -14
package/dist/lib/alias-store.d.ts +1 -0
package/dist/lib/alias-store.js +2 -0
package/dist/lib/command-helpers.js +4 -3
package/dist/lib/docs.js +232 -15
package/dist/lib/output.d.ts +24 -1
package/dist/lib/output.js +290 -2
package/dist/lib/skill-content.js +76 -0
package/dist/lib/study-participants.d.ts +13 -0
package/dist/lib/study-participants.js +13 -0
package/dist/lib/study-results-filters.d.ts +91 -0
package/dist/lib/study-results-filters.js +559 -0
package/dist/lib/study-results-projections.d.ts +152 -0
package/dist/lib/study-results-projections.js +580 -0
package/package.json +1 -1

package/dist/lib/study-results-projections.js ADDED Viewed

@@ -0,0 +1,580 @@
+/**
+ * Pure projection builders for `ish study results --group-by <kind>`.
+ *
+ * Each `buildStudyResultsPer<Kind>` consumes a `FilteredResults` (the output
+ * of `applyResultsFilters` in `study-results-filters.ts`) and returns a
+ * bare array of slice objects. The surface (`commands/study.ts`) wraps the
+ * array uniformly in a `SliceResponse` envelope alongside `totals_unfiltered`,
+ * `modality_warnings`, `study_id`, and `modality` before handing it off to
+ * `formatStudyResultsGroupBy` for JSON or human rendering.
+ *
+ * Conventions mirror `buildStudyResultsEnvelope` (`output.ts:1081`) and
+ * `buildStudyResultsSummary` (`output.ts:1292`):
+ *   - deterministic field order (object literals are emitted in source order)
+ *   - stable empties: empty arrays, never `null` for "no rows yet"
+ *   - sample_comments capped at 5 per group, truncated to 200 chars
+ *   - sentiment histograms are { label → count } records
+ *   - participant_aliases capped at 10 per group
+ *
+ * Has no IO and no console side-effects.
+ */
+import { deterministicAlias, ALIAS_PREFIX } from "./alias-store.js";
+/**
+ * Wrap a bare projection array in the uniform `SliceResponse` envelope.
+ * The surface calls this once after dispatching to one of the six
+ * `buildStudyResultsPer<Kind>` builders, then hands the envelope to
+ * `formatStudyResultsGroupBy`.
+ */
+export function wrapSliceProjection(filtered, axis, rows, studyId, modality) {
+    return {
+        axis,
+        rows,
+        totals_unfiltered: filtered.totals_unfiltered,
+        modality_warnings: filtered.warnings,
+        study_id: deterministicAlias(ALIAS_PREFIX.study, studyId),
+        modality,
+    };
+}
+const SAMPLE_COMMENT_CAP = 5;
+const SAMPLE_COMMENT_MAX_LEN = 200;
+const PARTICIPANT_ALIAS_CAP = 10;
+const SAMPLE_REPLY_CAP = 5;
+const SAMPLE_REPLY_MAX_LEN = 200;
+const UNMATCHED_BUCKET = "_unmatched";
+// ---------- helpers ---------------------------------------------------------
+function asRecord(v) {
+    return v && typeof v === "object" && !Array.isArray(v)
+        ? v
+        : null;
+}
+function asArray(v) {
+    return Array.isArray(v) ? v : [];
+}
+function asString(v) {
+    return typeof v === "string" && v.length > 0 ? v : null;
+}
+function truncate(str, maxLen) {
+    if (str.length <= maxLen)
+        return str;
+    // Pattern H: trim at the last word boundary before the cap so quoted
+    // `sample_comments` / `sample_replies` don't end mid-word ("…koncentrera").
+    // Prefer the last sentence terminator if one exists in the kept range;
+    // otherwise fall back to the last whitespace. If neither is found
+    // (a single long unbroken token), hard-cut at the cap.
+    const head = str.slice(0, maxLen - 3);
+    const sentenceBreak = Math.max(head.lastIndexOf(". "), head.lastIndexOf("! "), head.lastIndexOf("? "));
+    if (sentenceBreak >= maxLen / 2)
+        return head.slice(0, sentenceBreak + 1) + "…";
+    const spaceBreak = head.lastIndexOf(" ");
+    if (spaceBreak >= maxLen / 2)
+        return head.slice(0, spaceBreak) + " …";
+    return head + "…";
+}
+function participantAlias(participant) {
+    const id = asString(participant.id);
+    return id ? deterministicAlias(ALIAS_PREFIX.participant, id) : null;
+}
+function readSentimentLabel(interaction) {
+    const s = asRecord(interaction.sentiment);
+    return s ? asString(s.label) : null;
+}
+function readEngagement(interaction) {
+    // Media interactions carry `engagement` either as a top-level string
+    // ("engaged" | "drifted" | "abandoned") or as an object wrapping a `level`.
+    const e = interaction.engagement;
+    if (typeof e === "string" && e.length > 0)
+        return e;
+    const er = asRecord(e);
+    if (!er)
+        return null;
+    return asString(er.level) ?? asString(er.label);
+}
+function firstActionData(interaction) {
+    const actions = asArray(interaction.actions);
+    if (actions.length === 0)
+        return {};
+    const first = asRecord(actions[0]);
+    if (!first)
+        return {};
+    return asRecord(first.data) ?? {};
+}
+function pushSentiment(hist, label) {
+    if (!label)
+        return;
+    hist[label] = (hist[label] ?? 0) + 1;
+}
+function pushEngagement(hist, label) {
+    if (!label)
+        return;
+    hist[label] = (hist[label] ?? 0) + 1;
+}
+function collectComment(bucket, interaction) {
+    if (bucket.length >= SAMPLE_COMMENT_CAP)
+        return;
+    const c = asString(interaction.comment);
+    if (!c)
+        return;
+    bucket.push(truncate(c, SAMPLE_COMMENT_MAX_LEN));
+}
+function collectParticipantAlias(bucket, seen, participant) {
+    if (bucket.length >= PARTICIPANT_ALIAS_CAP)
+        return;
+    const alias = participantAlias(participant);
+    if (!alias || seen.has(alias))
+        return;
+    seen.add(alias);
+    bucket.push(alias);
+}
+/**
+ * `--group-by iteration` — one slice per declared iteration, in the same
+ * order as `ish study get`. Iterations with zero surviving participants
+ * still appear with `participant_count: 0` so the consumer sees the full
+ * matrix at stable size. Returns a bare array; the surface wraps it in
+ * the uniform `SliceResponse` envelope.
+ */
+export function buildStudyResultsPerIteration(filtered) {
+    const iterations = asArray(filtered.study.iterations);
+    const order = [];
+    for (const raw of iterations) {
+        const r = asRecord(raw);
+        const id = r ? asString(r.id) : null;
+        if (!id)
+            continue;
+        order.push({ id, label: r ? asString(r.label) : null });
+    }
+    const byIteration = new Map();
+    const actionCounts = new Map();
+    for (const o of order) {
+        byIteration.set(o.id, {
+            iteration_id: o.id,
+            iteration_label: o.label,
+            participant_count: 0,
+            interaction_count: 0,
+            sentiment: {},
+            sample_comments: [],
+            top_actions: [],
+        });
+        actionCounts.set(o.id, new Map());
+    }
+    for (const p of filtered.participants) {
+        const iterId = asString(p.iteration_id);
+        if (!iterId)
+            continue;
+        const slice = byIteration.get(iterId);
+        if (!slice)
+            continue;
+        slice.participant_count += 1;
+        for (const raw of asArray(p.interactions)) {
+            const ix = asRecord(raw);
+            if (!ix)
+                continue;
+            slice.interaction_count += 1;
+            pushSentiment(slice.sentiment, readSentimentLabel(ix));
+            collectComment(slice.sample_comments, ix);
+            const counter = actionCounts.get(iterId);
+            for (const araw of asArray(ix.actions)) {
+                const a = asRecord(araw);
+                const at = a ? asString(a.action_type) : null;
+                if (!at)
+                    continue;
+                counter.set(at, (counter.get(at) ?? 0) + 1);
+            }
+        }
+    }
+    for (const [iterId, counter] of actionCounts) {
+        const rows = Array.from(counter.entries())
+            .sort((a, b) => (b[1] - a[1]) || a[0].localeCompare(b[0]))
+            .slice(0, 5)
+            .map(([action_type, count]) => ({ action_type, count }));
+        byIteration.get(iterId).top_actions = rows;
+    }
+    return order.map((o) => byIteration.get(o.id));
+}
+/**
+ * `--group-by frame` — one slice per Frame that had a surviving interaction.
+ * Interactive only — the surface (T5) errors before reaching here when the
+ * study isn't interactive. Includes a synthetic `_unmatched` bucket when
+ * `--include-unmatched` was set and null-frame_version_id rows survived.
+ *
+ * Returns a bare array (no wrapper) — callers attach totals_unfiltered.
+ */
+export function buildStudyResultsPerFrame(filtered) {
+    const byFrame = new Map();
+    const seenAliasesPerFrame = new Map();
+    const ensureSlice = (frameId, label) => {
+        let slice = byFrame.get(frameId);
+        if (!slice) {
+            slice = {
+                frame_id: frameId,
+                frame_label: label,
+                interaction_count: 0,
+                sentiment_histogram: {},
+                sample_comments: [],
+                participant_aliases: [],
+            };
+            byFrame.set(frameId, slice);
+            seenAliasesPerFrame.set(frameId, new Set());
+        }
+        else if (slice.frame_label === null && label !== null) {
+            slice.frame_label = label;
+        }
+        return slice;
+    };
+    for (const p of filtered.participants) {
+        for (const raw of asArray(p.interactions)) {
+            const ix = asRecord(raw);
+            if (!ix)
+                continue;
+            const fvId = asString(ix.frame_version_id);
+            let frameId;
+            let label;
+            if (!fvId) {
+                frameId = UNMATCHED_BUCKET;
+                label = null;
+            }
+            else {
+                const lookup = filtered.frameVersionLookup.get(fvId);
+                if (lookup) {
+                    frameId = lookup.frame_id;
+                    label = lookup.frame_label;
+                }
+                else {
+                    // Defensive: a surviving fv_id with no lookup entry (when no
+                    // --frame was passed but a caller still asks for --group-by
+                    // frame). Bucket by frame_version_id so grouping stays meaningful.
+                    frameId = fvId;
+                    label = null;
+                }
+            }
+            const slice = ensureSlice(frameId, label);
+            slice.interaction_count += 1;
+            pushSentiment(slice.sentiment_histogram, readSentimentLabel(ix));
+            collectComment(slice.sample_comments, ix);
+            collectParticipantAlias(slice.participant_aliases, seenAliasesPerFrame.get(frameId), p);
+        }
+    }
+    // Stable ordering: named frames first (alphabetical by label, then frame_id
+    // for label-less), then `_unmatched` at the end.
+    return Array.from(byFrame.values()).sort((a, b) => {
+        if (a.frame_id === UNMATCHED_BUCKET)
+            return 1;
+        if (b.frame_id === UNMATCHED_BUCKET)
+            return -1;
+        const al = a.frame_label ?? "";
+        const bl = b.frame_label ?? "";
+        if (al !== bl)
+            return al.localeCompare(bl);
+        return a.frame_id.localeCompare(b.frame_id);
+    });
+}
+/**
+ * `--group-by segment` — media studies (video / audio / text / document).
+ * Groups by `actions[0].data.segment_index`, falling back to `segment_label`
+ * when the index isn't present.
+ */
+export function buildStudyResultsPerSegment(filtered) {
+    const byKey = new Map();
+    for (const p of filtered.participants) {
+        for (const raw of asArray(p.interactions)) {
+            const ix = asRecord(raw);
+            if (!ix)
+                continue;
+            const data = firstActionData(ix);
+            const idx = typeof data.segment_index === "number" ? data.segment_index : null;
+            const label = asString(data.segment_label);
+            if (idx === null && label === null)
+                continue;
+            const key = `${idx ?? "_"}|${label ?? "_"}`;
+            let slice = byKey.get(key);
+            if (!slice) {
+                slice = {
+                    segment_index: idx,
+                    segment_label: label,
+                    interaction_count: 0,
+                    sentiment_histogram: {},
+                    engagement_histogram: {},
+                    sample_comments: [],
+                };
+                byKey.set(key, slice);
+            }
+            slice.interaction_count += 1;
+            pushSentiment(slice.sentiment_histogram, readSentimentLabel(ix));
+            pushEngagement(slice.engagement_histogram, readEngagement(ix));
+            collectComment(slice.sample_comments, ix);
+        }
+    }
+    return Array.from(byKey.values()).sort((a, b) => {
+        if (a.segment_index === null && b.segment_index === null) {
+            return (a.segment_label ?? "").localeCompare(b.segment_label ?? "");
+        }
+        if (a.segment_index === null)
+            return 1;
+        if (b.segment_index === null)
+            return -1;
+        if (a.segment_index !== b.segment_index) {
+            return a.segment_index - b.segment_index;
+        }
+        return (a.segment_label ?? "").localeCompare(b.segment_label ?? "");
+    });
+}
+/**
+ * `--group-by turn` — chat studies. Groups by `actions[0].data.turn_index`
+ * and surfaces both a count of bot-failure stubs (`bot_reply.failure`
+ * populated) and up to 5 sample bot replies per turn.
+ */
+export function buildStudyResultsPerTurn(filtered) {
+    const byTurn = new Map();
+    for (const p of filtered.participants) {
+        for (const raw of asArray(p.interactions)) {
+            const ix = asRecord(raw);
+            if (!ix)
+                continue;
+            const data = firstActionData(ix);
+            if (typeof data.turn_index !== "number")
+                continue;
+            const turn = data.turn_index;
+            let slice = byTurn.get(turn);
+            if (!slice) {
+                slice = {
+                    turn_index: turn,
+                    interaction_count: 0,
+                    sentiment_histogram: {},
+                    sample_replies: [],
+                    failures: 0,
+                };
+                byTurn.set(turn, slice);
+            }
+            slice.interaction_count += 1;
+            pushSentiment(slice.sentiment_histogram, readSentimentLabel(ix));
+            const botReply = asRecord(ix.bot_reply);
+            if (botReply) {
+                if (asRecord(botReply.failure)) {
+                    slice.failures += 1;
+                }
+                else {
+                    const text = asString(botReply.text);
+                    if (text && slice.sample_replies.length < SAMPLE_REPLY_CAP) {
+                        slice.sample_replies.push(truncate(text, SAMPLE_REPLY_MAX_LEN));
+                    }
+                }
+            }
+        }
+    }
+    return Array.from(byTurn.values()).sort((a, b) => a.turn_index - b.turn_index);
+}
+/**
+ * `--group-by assignment` — one slice per study assignment, with each
+ * assignment's `step_completion[]` (from the study payload) attached so the
+ * caller can see pass / inconclusive / fail rollups inline.
+ */
+export function buildStudyResultsPerAssignment(filtered) {
+    const assignments = asArray(filtered.study.assignments);
+    const order = [];
+    const stepCompletionById = new Map();
+    const nameById = new Map();
+    for (const raw of assignments) {
+        const a = asRecord(raw);
+        if (!a)
+            continue;
+        const id = asString(a.id);
+        if (!id)
+            continue;
+        const name = asString(a.name);
+        const sc = asArray(a.step_completion);
+        order.push({ id, name, step_completion: sc });
+        stepCompletionById.set(id, sc);
+        nameById.set(id, name);
+    }
+    const byAssignment = new Map();
+    const ensure = (id) => {
+        let slice = byAssignment.get(id);
+        if (!slice) {
+            slice = {
+                assignment_id: id,
+                assignment_name: nameById.get(id) ?? null,
+                interaction_count: 0,
+                sentiment_histogram: {},
+                step_completion: stepCompletionById.get(id) ?? [],
+            };
+            byAssignment.set(id, slice);
+        }
+        return slice;
+    };
+    // Seed every declared assignment so the caller sees the full matrix even
+    // when a filter wipes some out (interaction_count: 0, but step_completion
+    // still visible for context). This matches the per-iteration convention.
+    for (const o of order)
+        ensure(o.id);
+    for (const p of filtered.participants) {
+        for (const raw of asArray(p.interactions)) {
+            const ix = asRecord(raw);
+            if (!ix)
+                continue;
+            const aid = asString(ix.assignment_id);
+            if (!aid)
+                continue;
+            const slice = ensure(aid);
+            slice.interaction_count += 1;
+            pushSentiment(slice.sentiment_histogram, readSentimentLabel(ix));
+        }
+    }
+    const ordered = [];
+    const consumed = new Set();
+    for (const o of order) {
+        const slice = byAssignment.get(o.id);
+        if (slice) {
+            ordered.push(slice);
+            consumed.add(o.id);
+        }
+    }
+    for (const [id, slice] of byAssignment) {
+        if (!consumed.has(id))
+            ordered.push(slice);
+    }
+    return ordered;
+}
+/**
+ * `--group-by step` — one slice per `(assignment, step_id)` pair with verdict
+ * totals (re-derived from surviving participants, NOT the pre-computed
+ * step_completion) and per-participant verdict rows inline.
+ *
+ * Re-deriving totals matters when filters are applied: e.g. a caller asking
+ * for `--iteration B --group-by step` wants verdict counts for iteration B
+ * only, not the study-wide rollup.
+ */
+export function buildStudyResultsPerStep(filtered) {
+    const assignmentNameById = new Map();
+    const stepNameByKey = new Map();
+    for (const raw of asArray(filtered.study.assignments)) {
+        const a = asRecord(raw);
+        if (!a)
+            continue;
+        const aid = asString(a.id);
+        if (!aid)
+            continue;
+        assignmentNameById.set(aid, asString(a.name));
+        for (const sraw of asArray(a.steps)) {
+            const s = asRecord(sraw);
+            if (!s)
+                continue;
+            const sid = asString(s.id);
+            if (!sid)
+                continue;
+            stepNameByKey.set(`${aid}|${sid}`, asString(s.name));
+        }
+    }
+    const byKey = new Map();
+    const ensureSlice = (aid, sid, fallbackStepName) => {
+        const key = `${aid}|${sid}`;
+        let slice = byKey.get(key);
+        if (!slice) {
+            slice = {
+                assignment_id: aid,
+                assignment_name: assignmentNameById.get(aid) ?? null,
+                step_id: sid,
+                step_name: stepNameByKey.get(key) ?? fallbackStepName,
+                total: 0,
+                passed: 0,
+                inconclusive: 0,
+                failed: 0,
+                rate: 0,
+                participant_verdicts: [],
+            };
+            byKey.set(key, slice);
+        }
+        return slice;
+    };
+    // Seed slices from the declared study so steps with zero surviving
+    // verdicts still surface — gives "0/0 passed" rather than missing rows.
+    for (const raw of asArray(filtered.study.assignments)) {
+        const a = asRecord(raw);
+        const aid = a ? asString(a.id) : null;
+        if (!aid)
+            continue;
+        for (const sraw of asArray(a?.steps)) {
+            const s = asRecord(sraw);
+            const sid = s ? asString(s.id) : null;
+            if (!sid)
+                continue;
+            ensureSlice(aid, sid, s ? asString(s.name) : null);
+        }
+    }
+    for (const p of filtered.participants) {
+        const alias = participantAlias(p);
+        for (const paRaw of asArray(p.participant_assignments)) {
+            const pa = asRecord(paRaw);
+            if (!pa)
+                continue;
+            const aid = asString(pa.assignment_id);
+            if (!aid)
+                continue;
+            for (const srRaw of asArray(pa.step_results)) {
+                const sr = asRecord(srRaw);
+                if (!sr)
+                    continue;
+                const sid = asString(sr.step_id);
+                if (!sid)
+                    continue;
+                const slice = ensureSlice(aid, sid, asString(sr.name));
+                const verdict = asString(sr.verdict);
+                slice.total += 1;
+                if (verdict === "passed" || verdict === "pass")
+                    slice.passed += 1;
+                else if (verdict === "inconclusive")
+                    slice.inconclusive += 1;
+                else if (verdict === "failed" || verdict === "fail")
+                    slice.failed += 1;
+                const evidence = [];
+                for (const eid of asArray(sr.evidence_interaction_ids)) {
+                    const eidStr = asString(eid);
+                    if (eidStr)
+                        evidence.push(eidStr);
+                }
+                slice.participant_verdicts.push({
+                    participant_alias: alias,
+                    verdict,
+                    reason: asString(sr.reason),
+                    evidence_interaction_ids: evidence,
+                });
+            }
+        }
+    }
+    const declaredOrder = [];
+    for (const raw of asArray(filtered.study.assignments)) {
+        const a = asRecord(raw);
+        if (!a)
+            continue;
+        const aid = asString(a.id);
+        if (!aid)
+            continue;
+        for (const sraw of asArray(a.steps)) {
+            const s = asRecord(sraw);
+            const sid = s ? asString(s.id) : null;
+            if (sid)
+                declaredOrder.push(`${aid}|${sid}`);
+        }
+    }
+    const ordered = [];
+    const consumed = new Set();
+    for (const key of declaredOrder) {
+        const slice = byKey.get(key);
+        if (slice) {
+            slice.rate = slice.total > 0
+                ? Math.round((slice.passed / slice.total) * 100) / 100
+                : 0;
+            slice.participant_verdicts.sort((a, b) => (a.participant_alias ?? "").localeCompare(b.participant_alias ?? ""));
+            ordered.push(slice);
+            consumed.add(key);
+        }
+    }
+    for (const [key, slice] of byKey) {
+        if (consumed.has(key))
+            continue;
+        slice.rate = slice.total > 0
+            ? Math.round((slice.passed / slice.total) * 100) / 100
+            : 0;
+        slice.participant_verdicts.sort((a, b) => (a.participant_alias ?? "").localeCompare(b.participant_alias ?? ""));
+        ordered.push(slice);
+    }
+    return ordered;
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ishlabs/cli",
-  "version": "0.20.0",
+  "version": "0.22.0",
   "description": "The command-line interface for ish",
   "type": "module",
   "bin": {