@ishlabs/cli 0.20.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/chat.js +2 -2
- package/dist/commands/config.js +17 -3
- package/dist/commands/source.js +1 -1
- package/dist/commands/study-analyze.js +15 -2
- package/dist/commands/study-participant.js +19 -0
- package/dist/commands/study.js +313 -14
- package/dist/lib/alias-store.d.ts +1 -0
- package/dist/lib/alias-store.js +2 -0
- package/dist/lib/command-helpers.js +4 -3
- package/dist/lib/docs.js +232 -15
- package/dist/lib/output.d.ts +24 -1
- package/dist/lib/output.js +290 -2
- package/dist/lib/skill-content.js +76 -0
- package/dist/lib/study-participants.d.ts +13 -0
- package/dist/lib/study-participants.js +13 -0
- package/dist/lib/study-results-filters.d.ts +91 -0
- package/dist/lib/study-results-filters.js +559 -0
- package/dist/lib/study-results-projections.d.ts +152 -0
- package/dist/lib/study-results-projections.js +580 -0
- package/package.json +1 -1
package/dist/lib/output.js
CHANGED
|
@@ -278,6 +278,53 @@ function pickFields(data, fields) {
|
|
|
278
278
|
}
|
|
279
279
|
return data;
|
|
280
280
|
}
|
|
281
|
+
/**
|
|
282
|
+
* Pattern A: when an agent passes `--fields foo,bar` and one of those names
|
|
283
|
+
* doesn't exist on the response, emit a one-line stderr warning naming the
|
|
284
|
+
* missing fields plus a sample of what IS available. Otherwise unknown names
|
|
285
|
+
* silently drop and the agent assumes the field doesn't exist on the wire,
|
|
286
|
+
* when the more common cause is a typo or the wrong projection.
|
|
287
|
+
*
|
|
288
|
+
* Probes the response shape: for an object response, the top-level keys;
|
|
289
|
+
* for a list-wrapper response, the keys of `items[0]`; for a bare array,
|
|
290
|
+
* the keys of element 0. Warns at most once per command invocation
|
|
291
|
+
* (the caller invokes this from jsonOutput before pickFields).
|
|
292
|
+
*/
|
|
293
|
+
function warnOnUnknownFields(data, fields) {
|
|
294
|
+
let probe = null;
|
|
295
|
+
if (Array.isArray(data) && data.length > 0 && typeof data[0] === "object" && data[0] !== null) {
|
|
296
|
+
probe = data[0];
|
|
297
|
+
}
|
|
298
|
+
else if (data && typeof data === "object" && !Array.isArray(data)) {
|
|
299
|
+
const obj = data;
|
|
300
|
+
if (isListWrapper(obj) && Array.isArray(obj.items) && obj.items.length > 0
|
|
301
|
+
&& typeof obj.items[0] === "object" && obj.items[0] !== null) {
|
|
302
|
+
probe = obj.items[0];
|
|
303
|
+
}
|
|
304
|
+
else {
|
|
305
|
+
probe = obj;
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
if (!probe)
|
|
309
|
+
return;
|
|
310
|
+
const missing = fields.filter((f) => !(f in probe));
|
|
311
|
+
if (missing.length === 0)
|
|
312
|
+
return;
|
|
313
|
+
// Pattern DD: surface↔backend rename hints. The agent-friendly noun is
|
|
314
|
+
// "workspace" but the backend stores `product_id`; agents who guess the
|
|
315
|
+
// surface name need a did-you-mean to find the actual response key.
|
|
316
|
+
const RENAME_MAP = {
|
|
317
|
+
workspace_id: "product_id",
|
|
318
|
+
workspace: "product",
|
|
319
|
+
};
|
|
320
|
+
const renameHints = missing
|
|
321
|
+
.filter((m) => RENAME_MAP[m] && RENAME_MAP[m] in probe)
|
|
322
|
+
.map((m) => `${m} → ${RENAME_MAP[m]}`);
|
|
323
|
+
const available = Object.keys(probe).slice(0, 12).join(", ");
|
|
324
|
+
const more = Object.keys(probe).length > 12 ? `, … (${Object.keys(probe).length - 12} more)` : "";
|
|
325
|
+
const didYouMean = renameHints.length > 0 ? ` Did you mean: ${renameHints.join(", ")}?` : "";
|
|
326
|
+
console.error(`warning: --fields requested ${missing.length === 1 ? "name" : "names"} not on the response: ${missing.join(", ")}.${didYouMean} Available: ${available}${more}.`);
|
|
327
|
+
}
|
|
281
328
|
/** Serialize data as JSON, applying lean transform and field selection. */
|
|
282
329
|
function jsonOutput(data, options = {}) {
|
|
283
330
|
let out;
|
|
@@ -297,6 +344,7 @@ function jsonOutput(data, options = {}) {
|
|
|
297
344
|
out = leanJson(data, options.writePath);
|
|
298
345
|
}
|
|
299
346
|
if (_fields && _fields.length > 0) {
|
|
347
|
+
warnOnUnknownFields(out, _fields);
|
|
300
348
|
out = pickFields(out, _fields);
|
|
301
349
|
}
|
|
302
350
|
// Pattern Ω capture mode: --get <field> returns bare values instead of
|
|
@@ -396,12 +444,19 @@ export function outputList(rows, json) {
|
|
|
396
444
|
/**
|
|
397
445
|
* Error with valid options — used for content_type and similar validation.
|
|
398
446
|
* Surfaces valid_options in JSON so agents can self-correct.
|
|
447
|
+
*
|
|
448
|
+
* Optional `hint` is the agent's *actionable next step* (e.g. for a wrong
|
|
449
|
+
* --group-by axis on the current modality, the axis that DOES apply). Distinct
|
|
450
|
+
* from `valid_options`, which describes where the supplied value WOULD be
|
|
451
|
+
* valid. Both serialize into the error envelope when present.
|
|
399
452
|
*/
|
|
400
453
|
export class ValidationError extends Error {
|
|
401
454
|
valid_options;
|
|
402
|
-
|
|
455
|
+
hint;
|
|
456
|
+
constructor(message, valid_options, hint) {
|
|
403
457
|
super(message);
|
|
404
458
|
this.valid_options = valid_options;
|
|
459
|
+
this.hint = hint;
|
|
405
460
|
this.name = "ValidationError";
|
|
406
461
|
}
|
|
407
462
|
}
|
|
@@ -434,6 +489,11 @@ function suggestionsForError(err) {
|
|
|
434
489
|
return [
|
|
435
490
|
"Run a list command to see available resources",
|
|
436
491
|
"Check that the alias or ID is correct",
|
|
492
|
+
// Pattern R: an active workspace / study / ask saved in config can
|
|
493
|
+
// outlive the resource on the server. Implicit lookups then 404
|
|
494
|
+
// with no indication that the ID came from config. `ish status`
|
|
495
|
+
// flags orphans; `<entity> use --clear` resets the active value.
|
|
496
|
+
"If you didn't pass the resource explicitly, your saved active workspace/study/ask may be stale — run `ish status` to check, then `ish workspace use --clear` (or `ish study use --clear` / `ish ask use --clear`) to reset.",
|
|
437
497
|
];
|
|
438
498
|
case "insufficient_credits":
|
|
439
499
|
return ["Purchase more credits at https://app.ishlabs.io"];
|
|
@@ -593,11 +653,14 @@ export function outputError(err, json) {
|
|
|
593
653
|
error_code: "validation_error",
|
|
594
654
|
retryable: false,
|
|
595
655
|
valid_options: err.valid_options,
|
|
656
|
+
...(err.hint && { hint: err.hint }),
|
|
596
657
|
...(suggestions.length > 0 && { suggestions }),
|
|
597
658
|
}));
|
|
598
659
|
}
|
|
599
660
|
else {
|
|
600
661
|
console.error(`Error: ${err.message}`);
|
|
662
|
+
if (err.hint)
|
|
663
|
+
console.error(` hint: ${err.hint}`);
|
|
601
664
|
for (const s of suggestions)
|
|
602
665
|
console.error(` → ${s}`);
|
|
603
666
|
}
|
|
@@ -635,6 +698,9 @@ export function outputError(err, json) {
|
|
|
635
698
|
? tagged.suggestions.filter((s) => typeof s === "string")
|
|
636
699
|
: [];
|
|
637
700
|
const mergedSuggestions = [...new Set([...suggestions, ...taggedSuggestions])];
|
|
701
|
+
const availableValues = Array.isArray(tagged.available_values)
|
|
702
|
+
? tagged.available_values.filter((s) => typeof s === "string")
|
|
703
|
+
: undefined;
|
|
638
704
|
if (json) {
|
|
639
705
|
console.error(JSON.stringify({
|
|
640
706
|
// Generic Error: CLI-thrown (we control the message), so we don't
|
|
@@ -647,6 +713,7 @@ export function outputError(err, json) {
|
|
|
647
713
|
...(errorKind && { error_kind: errorKind }),
|
|
648
714
|
...(example && { example }),
|
|
649
715
|
...(progress !== undefined && { progress }),
|
|
716
|
+
...(availableValues && availableValues.length > 0 && { available_values: availableValues }),
|
|
650
717
|
...(seededIds && { seeded_but_not_dispatched_ids: seededIds }),
|
|
651
718
|
...(seededAliases && { seeded_but_not_dispatched_aliases: seededAliases }),
|
|
652
719
|
...(mergedSuggestions.length > 0 && { suggestions: mergedSuggestions }),
|
|
@@ -992,12 +1059,20 @@ export function formatStudyDetail(study, json, options = {}, participants) {
|
|
|
992
1059
|
* study state — fields default to `null`, `0`, or `[]` when nothing has run.
|
|
993
1060
|
* Agents can rely on the keys always being present (M4).
|
|
994
1061
|
*/
|
|
995
|
-
function buildStudyResultsEnvelope(study, participants) {
|
|
1062
|
+
export function buildStudyResultsEnvelope(study, participants) {
|
|
996
1063
|
const allParticipants = collectParticipants(participants, Array.isArray(study.iterations) ? study.iterations : []);
|
|
997
1064
|
const studyAlias = study.id
|
|
998
1065
|
? deterministicAlias(ALIAS_PREFIX.study, String(study.id))
|
|
999
1066
|
: null;
|
|
1000
1067
|
const completedCount = allParticipants.filter((t) => t.status === "completed" || t.status === "complete").length;
|
|
1068
|
+
// Pattern N: per-status breakdown so callers can distinguish running /
|
|
1069
|
+
// pending / cancelled from terminal completed/failed. Additive — the
|
|
1070
|
+
// aggregate counts (`completed_count` / `failed_count`) stay alongside.
|
|
1071
|
+
const participantStatusCounts = {};
|
|
1072
|
+
for (const t of allParticipants) {
|
|
1073
|
+
const key = (t.status || "unknown").toLowerCase();
|
|
1074
|
+
participantStatusCounts[key] = (participantStatusCounts[key] || 0) + 1;
|
|
1075
|
+
}
|
|
1001
1076
|
// Aggregate sentiment across all interactions on all participants.
|
|
1002
1077
|
const sentimentCounts = {};
|
|
1003
1078
|
let sentimentTotal = 0;
|
|
@@ -1066,6 +1141,7 @@ function buildStudyResultsEnvelope(study, participants) {
|
|
|
1066
1141
|
participant_count: allParticipants.length,
|
|
1067
1142
|
completed_count: completedCount,
|
|
1068
1143
|
failed_count: failedCount,
|
|
1144
|
+
participant_status_counts: participantStatusCounts,
|
|
1069
1145
|
sentiment,
|
|
1070
1146
|
interview_answers: interviewAnswers,
|
|
1071
1147
|
participants: participantRows,
|
|
@@ -2226,3 +2302,215 @@ function formatDate(value) {
|
|
|
2226
2302
|
return str.slice(0, 10);
|
|
2227
2303
|
}
|
|
2228
2304
|
}
|
|
2305
|
+
const POSITIVE_SENTIMENT = new Set(["satisfied", "curious", "engaged", "confident", "delighted"]);
|
|
2306
|
+
const NEGATIVE_SENTIMENT = new Set(["frustrated", "confused", "blocked", "anxious", "disappointed"]);
|
|
2307
|
+
function sentimentColor(label) {
|
|
2308
|
+
const l = label.toLowerCase();
|
|
2309
|
+
if (POSITIVE_SENTIMENT.has(l))
|
|
2310
|
+
return c.green;
|
|
2311
|
+
if (NEGATIVE_SENTIMENT.has(l))
|
|
2312
|
+
return c.red;
|
|
2313
|
+
return c.dim;
|
|
2314
|
+
}
|
|
2315
|
+
function asciiHistogram(hist, options = {}) {
|
|
2316
|
+
const width = options.width ?? 20;
|
|
2317
|
+
const indent = options.indent ?? " ";
|
|
2318
|
+
const entries = Object.entries(hist).filter(([, v]) => v > 0);
|
|
2319
|
+
if (entries.length === 0)
|
|
2320
|
+
return [];
|
|
2321
|
+
const max = entries.reduce((acc, [, v]) => (v > acc ? v : acc), 0);
|
|
2322
|
+
const labelWidth = entries.reduce((acc, [k]) => (k.length > acc ? k.length : acc), 0);
|
|
2323
|
+
return entries
|
|
2324
|
+
.sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
|
|
2325
|
+
.map(([label, count]) => {
|
|
2326
|
+
const bars = max > 0 ? Math.max(1, Math.round((count / max) * width)) : 0;
|
|
2327
|
+
const color = sentimentColor(label);
|
|
2328
|
+
return `${indent}${label.padEnd(labelWidth)} ${color}${"█".repeat(bars)}${c.reset} ${count}`;
|
|
2329
|
+
});
|
|
2330
|
+
}
|
|
2331
|
+
function slicesFromProjection(projection) {
|
|
2332
|
+
// Surface wraps every --group-by axis in the uniform SliceResponse envelope
|
|
2333
|
+
// `{ axis, rows, totals_unfiltered, modality_warnings, study_id, modality }`;
|
|
2334
|
+
// slices live under `rows`.
|
|
2335
|
+
if (projection && typeof projection === "object" && !Array.isArray(projection)) {
|
|
2336
|
+
const rows = projection.rows;
|
|
2337
|
+
if (Array.isArray(rows)) {
|
|
2338
|
+
return rows.filter((s) => Boolean(s) && typeof s === "object" && !Array.isArray(s));
|
|
2339
|
+
}
|
|
2340
|
+
}
|
|
2341
|
+
return [];
|
|
2342
|
+
}
|
|
2343
|
+
function totalInteractionsFromSlices(slices) {
|
|
2344
|
+
let total = 0;
|
|
2345
|
+
for (const s of slices) {
|
|
2346
|
+
const n = typeof s.interaction_count === "number" ? s.interaction_count : 0;
|
|
2347
|
+
total += n;
|
|
2348
|
+
}
|
|
2349
|
+
return total;
|
|
2350
|
+
}
|
|
2351
|
+
function totalsUnfilteredFromProjection(projection) {
|
|
2352
|
+
if (projection && typeof projection === "object" && !Array.isArray(projection)) {
|
|
2353
|
+
const t = projection.totals_unfiltered;
|
|
2354
|
+
if (t && typeof t === "object" && !Array.isArray(t)) {
|
|
2355
|
+
return t;
|
|
2356
|
+
}
|
|
2357
|
+
}
|
|
2358
|
+
return null;
|
|
2359
|
+
}
|
|
2360
|
+
function renderIterationSlice(slice) {
|
|
2361
|
+
const label = String(slice.iteration_label ?? slice.iteration_id ?? "?");
|
|
2362
|
+
const pCount = Number(slice.participant_count ?? 0);
|
|
2363
|
+
const iCount = Number(slice.interaction_count ?? 0);
|
|
2364
|
+
console.log(`\n ${c.bold}Iteration ${label}${c.reset} ${c.dim}${pCount} participant${pCount !== 1 ? "s" : ""} · ${iCount} interaction${iCount !== 1 ? "s" : ""}${c.reset}`);
|
|
2365
|
+
const hist = slice.sentiment ?? {};
|
|
2366
|
+
for (const line of asciiHistogram(hist, { indent: " " }))
|
|
2367
|
+
console.log(line);
|
|
2368
|
+
const top = Array.isArray(slice.top_actions) ? slice.top_actions : [];
|
|
2369
|
+
if (top.length > 0) {
|
|
2370
|
+
const parts = top.map((a) => `${a.action_type} ×${a.count}`);
|
|
2371
|
+
console.log(` ${c.dim}Top actions:${c.reset} ${parts.join(", ")}`);
|
|
2372
|
+
}
|
|
2373
|
+
const comments = Array.isArray(slice.sample_comments) ? slice.sample_comments : [];
|
|
2374
|
+
for (const ccomment of comments) {
|
|
2375
|
+
console.log(` ${c.dim}"${ccomment}"${c.reset}`);
|
|
2376
|
+
}
|
|
2377
|
+
}
|
|
2378
|
+
function renderFrameSlice(slice) {
|
|
2379
|
+
const label = slice.frame_label ? String(slice.frame_label) : String(slice.frame_id);
|
|
2380
|
+
const iCount = Number(slice.interaction_count ?? 0);
|
|
2381
|
+
const aliases = Array.isArray(slice.participant_aliases) ? slice.participant_aliases : [];
|
|
2382
|
+
console.log(`\n ${c.bold}${label}${c.reset} ${c.dim}${iCount} interaction${iCount !== 1 ? "s" : ""} · ${aliases.length} participant${aliases.length !== 1 ? "s" : ""}${c.reset}`);
|
|
2383
|
+
const hist = slice.sentiment_histogram ?? {};
|
|
2384
|
+
for (const line of asciiHistogram(hist, { indent: " " }))
|
|
2385
|
+
console.log(line);
|
|
2386
|
+
const comments = Array.isArray(slice.sample_comments) ? slice.sample_comments : [];
|
|
2387
|
+
for (const ccomment of comments) {
|
|
2388
|
+
console.log(` ${c.dim}"${ccomment}"${c.reset}`);
|
|
2389
|
+
}
|
|
2390
|
+
}
|
|
2391
|
+
function renderSegmentSlice(slice) {
|
|
2392
|
+
const idx = slice.segment_index;
|
|
2393
|
+
const label = slice.segment_label ? String(slice.segment_label) : null;
|
|
2394
|
+
const header = idx !== null && idx !== undefined
|
|
2395
|
+
? `Segment ${idx}${label ? ` — ${label}` : ""}`
|
|
2396
|
+
: (label ?? "Segment ?");
|
|
2397
|
+
const iCount = Number(slice.interaction_count ?? 0);
|
|
2398
|
+
console.log(`\n ${c.bold}${header}${c.reset} ${c.dim}${iCount} interaction${iCount !== 1 ? "s" : ""}${c.reset}`);
|
|
2399
|
+
const hist = slice.sentiment_histogram ?? {};
|
|
2400
|
+
for (const line of asciiHistogram(hist, { indent: " " }))
|
|
2401
|
+
console.log(line);
|
|
2402
|
+
const engagement = slice.engagement_histogram ?? {};
|
|
2403
|
+
if (Object.keys(engagement).length > 0) {
|
|
2404
|
+
const parts = Object.entries(engagement).map(([k, v]) => `${v} ${k}`);
|
|
2405
|
+
console.log(` ${c.dim}Engagement:${c.reset} ${parts.join(", ")}`);
|
|
2406
|
+
}
|
|
2407
|
+
const comments = Array.isArray(slice.sample_comments) ? slice.sample_comments : [];
|
|
2408
|
+
for (const ccomment of comments) {
|
|
2409
|
+
console.log(` ${c.dim}"${ccomment}"${c.reset}`);
|
|
2410
|
+
}
|
|
2411
|
+
}
|
|
2412
|
+
function renderTurnSlice(slice) {
|
|
2413
|
+
const turn = Number(slice.turn_index ?? 0);
|
|
2414
|
+
const iCount = Number(slice.interaction_count ?? 0);
|
|
2415
|
+
const failures = Number(slice.failures ?? 0);
|
|
2416
|
+
const failPart = failures > 0 ? ` ${c.red}${failures} failure${failures !== 1 ? "s" : ""}${c.reset}` : "";
|
|
2417
|
+
console.log(`\n ${c.bold}Turn ${turn}${c.reset} ${c.dim}${iCount} interaction${iCount !== 1 ? "s" : ""}${c.reset}${failPart}`);
|
|
2418
|
+
const hist = slice.sentiment_histogram ?? {};
|
|
2419
|
+
for (const line of asciiHistogram(hist, { indent: " " }))
|
|
2420
|
+
console.log(line);
|
|
2421
|
+
const replies = Array.isArray(slice.sample_replies) ? slice.sample_replies : [];
|
|
2422
|
+
for (const r of replies) {
|
|
2423
|
+
console.log(` ${c.dim}"${r}"${c.reset}`);
|
|
2424
|
+
}
|
|
2425
|
+
}
|
|
2426
|
+
function renderAssignmentSlice(slice) {
|
|
2427
|
+
const name = String(slice.assignment_name ?? slice.assignment_id ?? "?");
|
|
2428
|
+
const iCount = Number(slice.interaction_count ?? 0);
|
|
2429
|
+
console.log(`\n ${c.bold}${name}${c.reset} ${c.dim}${iCount} interaction${iCount !== 1 ? "s" : ""}${c.reset}`);
|
|
2430
|
+
const hist = slice.sentiment_histogram ?? {};
|
|
2431
|
+
for (const line of asciiHistogram(hist, { indent: " " }))
|
|
2432
|
+
console.log(line);
|
|
2433
|
+
const sc = Array.isArray(slice.step_completion) ? slice.step_completion : [];
|
|
2434
|
+
if (sc.length > 0) {
|
|
2435
|
+
const rows = sc.map((s) => [
|
|
2436
|
+
String(s.name ?? s.step_id ?? "?"),
|
|
2437
|
+
String(s.passed ?? 0),
|
|
2438
|
+
String(s.inconclusive ?? 0),
|
|
2439
|
+
String(s.failed ?? 0),
|
|
2440
|
+
typeof s.rate === "number" ? s.rate.toFixed(2) : "-",
|
|
2441
|
+
]);
|
|
2442
|
+
console.log(` ${c.dim}Steps:${c.reset}`);
|
|
2443
|
+
printTable(["STEP", "PASSED", "INCONCLUSIVE", "FAILED", "RATE"], rows);
|
|
2444
|
+
}
|
|
2445
|
+
}
|
|
2446
|
+
function renderStepSlice(slice) {
|
|
2447
|
+
const name = String(slice.step_name ?? slice.step_id ?? "?");
|
|
2448
|
+
const assignment = String(slice.assignment_name ?? "?");
|
|
2449
|
+
const total = Number(slice.total ?? 0);
|
|
2450
|
+
const passed = Number(slice.passed ?? 0);
|
|
2451
|
+
const inconclusive = Number(slice.inconclusive ?? 0);
|
|
2452
|
+
const failed = Number(slice.failed ?? 0);
|
|
2453
|
+
const rate = typeof slice.rate === "number" ? slice.rate.toFixed(2) : "-";
|
|
2454
|
+
const rateColor = failed > passed ? c.red : (passed > failed ? c.green : c.dim);
|
|
2455
|
+
console.log(`\n ${c.bold}${assignment} › ${name}${c.reset} ${rateColor}${passed}/${total} passed${c.reset} ${c.dim}(${inconclusive} inconclusive, ${failed} failed, rate ${rate})${c.reset}`);
|
|
2456
|
+
const verdicts = Array.isArray(slice.participant_verdicts)
|
|
2457
|
+
? slice.participant_verdicts
|
|
2458
|
+
: [];
|
|
2459
|
+
if (verdicts.length > 0) {
|
|
2460
|
+
const rows = verdicts.map((v) => [
|
|
2461
|
+
String(v.participant_alias ?? "-"),
|
|
2462
|
+
String(v.verdict ?? "-"),
|
|
2463
|
+
v.reason ? truncate(String(v.reason), 60) : "-",
|
|
2464
|
+
]);
|
|
2465
|
+
printTable(["PARTICIPANT", "VERDICT", "REASON"], rows);
|
|
2466
|
+
}
|
|
2467
|
+
}
|
|
2468
|
+
/**
|
|
2469
|
+
* Render a `--group-by <kind>` projection wrapped in the uniform
|
|
2470
|
+
* `SliceResponse` envelope (`{ axis, rows, totals_unfiltered,
|
|
2471
|
+
* modality_warnings, study_id, modality }`). JSON mode is a thin
|
|
2472
|
+
* pass-through to jsonOutput with `preProjected: true` so the lean
|
|
2473
|
+
* transform doesn't strip our stable empties. Human mode pulls slices
|
|
2474
|
+
* out of `rows` and renders one section per slice plus a small ASCII
|
|
2475
|
+
* sentiment histogram.
|
|
2476
|
+
*/
|
|
2477
|
+
export function formatStudyResultsGroupBy(projection, kind, json) {
|
|
2478
|
+
if (json) {
|
|
2479
|
+
console.log(jsonOutput(projection, { preProjected: true }));
|
|
2480
|
+
return;
|
|
2481
|
+
}
|
|
2482
|
+
const slices = slicesFromProjection(projection);
|
|
2483
|
+
const totalInteractions = totalInteractionsFromSlices(slices);
|
|
2484
|
+
const unfiltered = totalsUnfilteredFromProjection(projection);
|
|
2485
|
+
const totalUnfiltered = unfiltered && typeof unfiltered.interaction_count === "number"
|
|
2486
|
+
? unfiltered.interaction_count
|
|
2487
|
+
: null;
|
|
2488
|
+
const headline = `Sliced by ${kind}: ${slices.length} group${slices.length !== 1 ? "s" : ""} (${totalInteractions}${totalUnfiltered !== null ? `/${totalUnfiltered}` : ""} interaction${totalInteractions !== 1 ? "s" : ""})`;
|
|
2489
|
+
console.log(`${c.bold}${headline}${c.reset}`);
|
|
2490
|
+
if (slices.length === 0) {
|
|
2491
|
+
console.log(` ${c.dim}(no groups matched)${c.reset}`);
|
|
2492
|
+
return;
|
|
2493
|
+
}
|
|
2494
|
+
for (const slice of slices) {
|
|
2495
|
+
switch (kind) {
|
|
2496
|
+
case "iteration":
|
|
2497
|
+
renderIterationSlice(slice);
|
|
2498
|
+
break;
|
|
2499
|
+
case "frame":
|
|
2500
|
+
renderFrameSlice(slice);
|
|
2501
|
+
break;
|
|
2502
|
+
case "segment":
|
|
2503
|
+
renderSegmentSlice(slice);
|
|
2504
|
+
break;
|
|
2505
|
+
case "turn":
|
|
2506
|
+
renderTurnSlice(slice);
|
|
2507
|
+
break;
|
|
2508
|
+
case "assignment":
|
|
2509
|
+
renderAssignmentSlice(slice);
|
|
2510
|
+
break;
|
|
2511
|
+
case "step":
|
|
2512
|
+
renderStepSlice(slice);
|
|
2513
|
+
break;
|
|
2514
|
+
}
|
|
2515
|
+
}
|
|
2516
|
+
}
|
|
@@ -917,6 +917,77 @@ Rules to remember:
|
|
|
917
917
|
See \`ish docs get-page concepts/extending-a-simulation\` for the full
|
|
918
918
|
mental model (cancel + extend as a pair, error envelopes, cost model).
|
|
919
919
|
|
|
920
|
+
## 12. Slice study results by frame / segment / turn / sentiment
|
|
921
|
+
|
|
922
|
+
Goal: ask narrower questions of a finished run than the kitchen-sink
|
|
923
|
+
\`ish study results\` envelope answers. The canonical use case:
|
|
924
|
+
**"what differed on the login screen across these five iterations?"**.
|
|
925
|
+
|
|
926
|
+
\`\`\`bash
|
|
927
|
+
# 12a. Across-iterations comparison on one frame (the canonical question).
|
|
928
|
+
# --frame matches frame names by case-insensitive substring; pass
|
|
929
|
+
# a full Frame UUID or an f-… alias when the name is ambiguous.
|
|
930
|
+
ish study results s-b2c --frame login --group-by iteration --json
|
|
931
|
+
|
|
932
|
+
# 12b. Frustrated reactions to one segment of a video study:
|
|
933
|
+
ish study results s-b2c --segment 3 --sentiment Frustrated
|
|
934
|
+
|
|
935
|
+
# 12c. Who failed the "verify email" step, and why?
|
|
936
|
+
# --group-by step exposes per-participant verdicts inline so you
|
|
937
|
+
# don't fan out across participants.
|
|
938
|
+
ish study results s-b2c --assignment "Sign up" --step verify-email \\
|
|
939
|
+
--group-by step --json
|
|
940
|
+
|
|
941
|
+
# 12d. Pair-mode chat: only side A turn 4.
|
|
942
|
+
ish study results s-b2c --side a --turn 4
|
|
943
|
+
|
|
944
|
+
# 12e. Sanity-check coverage when a filter narrows the slice:
|
|
945
|
+
ish study results s-b2c --frame checkout --json \\
|
|
946
|
+
| jq '{matched: .participant_count, total: .totals_unfiltered.participant_count}'
|
|
947
|
+
|
|
948
|
+
# 12f. A filter that matches zero interactions still returns the stable
|
|
949
|
+
# envelope shape — participant_count: 0, totals_unfiltered populated,
|
|
950
|
+
# exit code 0 (not 4). Never error on no-match.
|
|
951
|
+
ish study results s-b2c --frame doesnotexist --json
|
|
952
|
+
# → ValidationError because "doesnotexist" matches no frame names; pass
|
|
953
|
+
# --include-unmatched only when --frame DID resolve and you want the
|
|
954
|
+
# degraded captures (frame_version_id: null) back.
|
|
955
|
+
\`\`\`
|
|
956
|
+
|
|
957
|
+
Every \`--group-by <axis>\` call returns the same envelope:
|
|
958
|
+
\`{axis, rows, totals_unfiltered, modality_warnings, study_id, modality}\`.
|
|
959
|
+
The \`rows\` array holds axis-specific slice objects. The envelope is
|
|
960
|
+
uniform across all six axes — agents can code one shape and key on
|
|
961
|
+
\`axis\` / \`modality\` to dispatch on what's inside \`rows\`.
|
|
962
|
+
|
|
963
|
+
Rules to remember:
|
|
964
|
+
- **Filters compose with AND across flags; OR within \`--sentiment\`.**
|
|
965
|
+
\`--frame login --sentiment Frustrated,Confused\` keeps only login-frame
|
|
966
|
+
interactions whose sentiment is Frustrated OR Confused.
|
|
967
|
+
- **Modality mismatch is not an error.** \`--segment 0\` on an
|
|
968
|
+
interactive study emits a stderr warning and is ignored. The
|
|
969
|
+
exception is **\`--group-by\`** — \`--group-by frame\` on a chat study,
|
|
970
|
+
\`--group-by turn\` on a video study, etc. error at the router (exit 2).
|
|
971
|
+
- **Empty-slice contract: exit 0, not 4.** Zero matches return a
|
|
972
|
+
stable envelope with \`participant_count: 0\` and
|
|
973
|
+
\`totals_unfiltered\` populated. Agents key on
|
|
974
|
+
\`totals_unfiltered.participant_count\` to ask "is the filter too
|
|
975
|
+
tight, or did the run not produce data?".
|
|
976
|
+
- \`--frame\` accepts a name substring, a Frame UUID, an \`f-…\` alias,
|
|
977
|
+
or a \`frame_version_id\` UUID. Ambiguous substring (matches >1
|
|
978
|
+
frame) errors with the candidate list.
|
|
979
|
+
- \`--summary\` is orthogonal to filters and narrows the summary over
|
|
980
|
+
the filtered set. \`--transcript\` is single-participant and errors
|
|
981
|
+
(exit 2) when **any** filter or \`--group-by\` is set.
|
|
982
|
+
- Per-step output exposes \`participant_verdicts: [{participant_alias,
|
|
983
|
+
verdict, reason, evidence_interaction_ids}]\` on **each row of
|
|
984
|
+
\`rows[]\`** (one per \`(assignment, step)\` pair) — not
|
|
985
|
+
\`per_participant_verdicts\`. The verdict enum is \`passed\` /
|
|
986
|
+
\`inconclusive\` / \`failed\`.
|
|
987
|
+
|
|
988
|
+
See \`ish docs get-page guides/slicing-results\` for the full filter
|
|
989
|
+
table, projection shapes, and the defensive null-handling rules.
|
|
990
|
+
|
|
920
991
|
## Tips for chaining commands as an agent
|
|
921
992
|
|
|
922
993
|
- Capture aliases from JSON: \`ITER=$(ish iteration create --url … --json | jq -r .alias)\`
|
|
@@ -1010,6 +1081,11 @@ mental model (cancel + extend as a pair, error envelopes, cost model).
|
|
|
1010
1081
|
| List of participants from \`study run\` | \`--json \\| jq '.participants[].id'\` | \`--get participant_aliases\` (or \`participant_ids\` for UUIDs) |
|
|
1011
1082
|
| Per-answer sentiment | \`--json \\| jq '...'\` per participant | \`ish study results <id> --json\` (sentiment is on every answer row) |
|
|
1012
1083
|
| "Did this run land?" headline | \`study results --json\` + jq filtering | \`ish study results <id> --summary --json\` |
|
|
1084
|
+
| Across-iterations comparison on one frame | \`study results --json\` + jq per iteration | \`ish study results <id> --frame login --group-by iteration --json\` |
|
|
1085
|
+
| Per-step pass/fail with reasons inline | \`study participant --json\` per participant + jq | \`ish study results <id> --step verify-email --group-by step --json\` |
|
|
1086
|
+
| Frustrated reactions to one media segment | \`study results --json\` + jq | \`ish study results <id> --segment 3 --sentiment Frustrated --json\` |
|
|
1087
|
+
| Sanity-check filter coverage | hand-count \`.participants\` vs total | \`--get totals_unfiltered.participant_count\` (set on every sliced envelope) |
|
|
1088
|
+
| Know the sliced-results envelope shape | guess per axis | \`{axis, rows[], totals_unfiltered, modality_warnings, study_id, modality}\` — every \`--group-by\` axis |
|
|
1013
1089
|
| Chat transcript for one participant (external_chatbot) | \`study participant --json\` + jq | \`ish study results <id> --transcript <participant_id> --json\` |
|
|
1014
1090
|
| Pair-mode conversation transcripts | \`study participant --json\` per participant | \`ish iteration get <iter-id> --json \\| jq '.conversations[]'\` |
|
|
1015
1091
|
| Participant headline only (no action timeline) | \`study participant --json\` + jq | \`ish study participant <id> --summary --json\` |
|
|
@@ -6,6 +6,19 @@
|
|
|
6
6
|
* (person, interactions[], participant_summary, interview_answers, …) that
|
|
7
7
|
* used to be embedded under `study.iterations[*].participants[*]` on the
|
|
8
8
|
* legacy `GET /studies/{id}` response.
|
|
9
|
+
*
|
|
10
|
+
* Audit (study-results-slice plan, T4): the flat endpoint already returns
|
|
11
|
+
* everything the new `ish study results --frame/--segment/--step/...` filter
|
|
12
|
+
* pipeline needs in a single round-trip — no per-participant fan-out:
|
|
13
|
+
* - `interactions[]` (modality-discriminated via `ParticipantWithAttributesPublicResponse`)
|
|
14
|
+
* - `participant_assignments[].step_results[]` with `{step_id, name,
|
|
15
|
+
* description, verdict, reason, evidence_interaction_ids[]}`, hydrated
|
|
16
|
+
* by `attach_participant_step_results_flat` in the study repository
|
|
17
|
+
* before serialisation (`ish-backend/app/api/study/repository.py:315`)
|
|
18
|
+
* - `participant_summary`, `interview_answers`
|
|
19
|
+
* If a future filter ever needs `conversation_id` on each interaction (for
|
|
20
|
+
* `--group-by conversation`), that's a backend-side addition on
|
|
21
|
+
* `_InteractionResponseBase`, not a CLI change.
|
|
9
22
|
*/
|
|
10
23
|
import type { ApiClient } from "./api-client.js";
|
|
11
24
|
import type { Participant } from "./types.js";
|
|
@@ -6,6 +6,19 @@
|
|
|
6
6
|
* (person, interactions[], participant_summary, interview_answers, …) that
|
|
7
7
|
* used to be embedded under `study.iterations[*].participants[*]` on the
|
|
8
8
|
* legacy `GET /studies/{id}` response.
|
|
9
|
+
*
|
|
10
|
+
* Audit (study-results-slice plan, T4): the flat endpoint already returns
|
|
11
|
+
* everything the new `ish study results --frame/--segment/--step/...` filter
|
|
12
|
+
* pipeline needs in a single round-trip — no per-participant fan-out:
|
|
13
|
+
* - `interactions[]` (modality-discriminated via `ParticipantWithAttributesPublicResponse`)
|
|
14
|
+
* - `participant_assignments[].step_results[]` with `{step_id, name,
|
|
15
|
+
* description, verdict, reason, evidence_interaction_ids[]}`, hydrated
|
|
16
|
+
* by `attach_participant_step_results_flat` in the study repository
|
|
17
|
+
* before serialisation (`ish-backend/app/api/study/repository.py:315`)
|
|
18
|
+
* - `participant_summary`, `interview_answers`
|
|
19
|
+
* If a future filter ever needs `conversation_id` on each interaction (for
|
|
20
|
+
* `--group-by conversation`), that's a backend-side addition on
|
|
21
|
+
* `_InteractionResponseBase`, not a CLI change.
|
|
9
22
|
*/
|
|
10
23
|
export async function fetchStudyParticipants(client, studyId, opts) {
|
|
11
24
|
return await client.get(`/studies/${studyId}/participants`, undefined, opts);
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure filter pipeline for `ish study results`.
|
|
3
|
+
*
|
|
4
|
+
* Input : the raw `GET /studies/{id}` payload, the raw
|
|
5
|
+
* `GET /studies/{id}/participants` payload, the raw
|
|
6
|
+
* `GET /studies/{id}/frames` payload (or [] when --frame wasn't
|
|
7
|
+
* passed), and a `ResultsFilters` struct from the command surface.
|
|
8
|
+
* Output : a `FilteredResults` struct — the trimmed participant graph,
|
|
9
|
+
* pre-filter counts on `totals_unfiltered`, and a `warnings[]`
|
|
10
|
+
* list of modality-mismatch notes for the surface to surface on
|
|
11
|
+
* stderr.
|
|
12
|
+
*
|
|
13
|
+
* Has no IO and no console side-effects — the caller (study results action)
|
|
14
|
+
* owns network calls and stderr; we just compute. That keeps the function
|
|
15
|
+
* trivially unit-testable and lets the projection builders (T3) consume the
|
|
16
|
+
* same shape without re-walking the graph.
|
|
17
|
+
*
|
|
18
|
+
* Defensive null handling is the load-bearing piece. See the plan's
|
|
19
|
+
* "Defensive handling of nullable fields" section — read it before editing
|
|
20
|
+
* any predicate.
|
|
21
|
+
*/
|
|
22
|
+
export interface ResultsFilters {
|
|
23
|
+
/** Frame name (case-insensitive substring), Frame UUID, frame alias `f-...`,
|
|
24
|
+
* or a `frame_version_id` UUID. Resolved against the study's frames list. */
|
|
25
|
+
frame?: string;
|
|
26
|
+
/** Segment index (parseable int) OR a substring matched against
|
|
27
|
+
* `actions[0].data.segment_label` on each interaction. */
|
|
28
|
+
segment?: string;
|
|
29
|
+
/** Chat turn index — matched against `actions[0].data.turn_index`. */
|
|
30
|
+
turn?: number;
|
|
31
|
+
/** participant_pair side — matched against the parent assignment's `side`. */
|
|
32
|
+
side?: "a" | "b";
|
|
33
|
+
/** Assignment UUID, OR a substring matched against
|
|
34
|
+
* `study.assignments[].name`. */
|
|
35
|
+
assignment?: string;
|
|
36
|
+
/** Step id OR a case-insensitive substring against step `name`. Walks
|
|
37
|
+
* `participant_assignments[].step_results[]`. */
|
|
38
|
+
step?: string;
|
|
39
|
+
/** Comma-or-repeat list of sentiment labels (case-insensitive). */
|
|
40
|
+
sentiment?: string[];
|
|
41
|
+
/** Actor field — case-insensitive match against `interaction.actor`. */
|
|
42
|
+
actor?: "ai" | "human" | "user";
|
|
43
|
+
/** Iteration UUID or `label`. */
|
|
44
|
+
iteration?: string;
|
|
45
|
+
/** Participant UUID or alias (`pt-...`). */
|
|
46
|
+
participant?: string;
|
|
47
|
+
/** When --frame is set, keep interactions with null frame_version_id
|
|
48
|
+
* under a synthetic `_unmatched` bucket instead of dropping them. */
|
|
49
|
+
includeUnmatched?: boolean;
|
|
50
|
+
/** Pair with --step: also drop interactions whose id is not in any
|
|
51
|
+
* surviving `step_results[].evidence_interaction_ids[]`. */
|
|
52
|
+
includeEvidence?: boolean;
|
|
53
|
+
}
|
|
54
|
+
export interface FilteredResults {
|
|
55
|
+
/** Shallow copy of the study payload — same shape as the raw response.
|
|
56
|
+
* Participants are NOT embedded here; they're carried alongside on
|
|
57
|
+
* `participants`. */
|
|
58
|
+
study: Record<string, unknown>;
|
|
59
|
+
/** Participants whose interactions[] survived the predicate walk.
|
|
60
|
+
* Empty participants are dropped only when an interaction-level filter
|
|
61
|
+
* was set (preserves the stable schema when the caller just asked
|
|
62
|
+
* "who ran?" without slicing). */
|
|
63
|
+
participants: Record<string, unknown>[];
|
|
64
|
+
/** The frame list returned by the surface, with each frame's
|
|
65
|
+
* `frame_version_ids[]` flattened onto the row for downstream
|
|
66
|
+
* enrichment. Empty when --frame wasn't passed or the modality isn't
|
|
67
|
+
* interactive. */
|
|
68
|
+
frames: Record<string, unknown>[];
|
|
69
|
+
/** Pre-filter participant + interaction counts, so callers can see
|
|
70
|
+
* "matched X / Y". */
|
|
71
|
+
totals_unfiltered: {
|
|
72
|
+
participant_count: number;
|
|
73
|
+
interaction_count: number;
|
|
74
|
+
};
|
|
75
|
+
/** Modality-mismatch notes (e.g. "--segment ignored on interactive").
|
|
76
|
+
* The surface emits these on stderr. */
|
|
77
|
+
warnings: string[];
|
|
78
|
+
/** When --frame was set, the resolved set of frame_version_ids that
|
|
79
|
+
* passed. Used by the projection builders (T3) to enrich surviving
|
|
80
|
+
* interactions with frame_id / frame_label without re-resolving. */
|
|
81
|
+
matchedFrameVersionIds: Set<string>;
|
|
82
|
+
/** Maps frame_version_id → {frame_id, frame_label} for enrichment. */
|
|
83
|
+
frameVersionLookup: Map<string, {
|
|
84
|
+
frame_id: string;
|
|
85
|
+
frame_label: string | null;
|
|
86
|
+
}>;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Pure entry point. See file-level comment for input/output contract.
|
|
90
|
+
*/
|
|
91
|
+
export declare function applyResultsFilters(study: Record<string, unknown>, participants: Record<string, unknown>[], rawFrames: Record<string, unknown>[], filters: ResultsFilters): FilteredResults;
|