@ishlabs/cli 0.8.2 → 0.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/dist/auth.d.ts +1 -0
- package/dist/auth.js +12 -3
- package/dist/commands/ask.js +59 -16
- package/dist/commands/iteration.js +45 -11
- package/dist/commands/profile.js +65 -12
- package/dist/commands/study-run.js +49 -0
- package/dist/commands/study-tester.js +5 -2
- package/dist/commands/study.js +71 -16
- package/dist/connect.js +7 -7
- package/dist/index.js +119 -2
- package/dist/lib/api-client.js +29 -7
- package/dist/lib/command-helpers.d.ts +14 -0
- package/dist/lib/command-helpers.js +40 -0
- package/dist/lib/docs.js +430 -13
- package/dist/lib/local-sim/install.d.ts +0 -7
- package/dist/lib/local-sim/install.js +20 -13
- package/dist/lib/output.js +437 -63
- package/dist/lib/skill-content.js +102 -9
- package/dist/lib/types.d.ts +3 -1
- package/dist/upgrade.js +3 -3
- package/package.json +3 -2
package/dist/lib/output.js
CHANGED
|
@@ -16,7 +16,7 @@ export function setVerbose(v) { _verbose = v; }
|
|
|
16
16
|
export function setFields(fields) { _fields = fields; }
|
|
17
17
|
const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
18
18
|
const TIMESTAMP_KEYS = new Set(["created_at", "updated_at"]);
|
|
19
|
-
const PAGINATION_KEYS = new Set(["items", "total", "limit", "offset"]);
|
|
19
|
+
const PAGINATION_KEYS = new Set(["items", "total", "returned", "limit", "offset", "has_more"]);
|
|
20
20
|
/**
|
|
21
21
|
* Strip UUID-valued fields, null/undefined values, and timestamps.
|
|
22
22
|
* Preserves alias, name, label, status, and other meaningful fields.
|
|
@@ -69,7 +69,31 @@ function leanJson(data, keepIds = false) {
|
|
|
69
69
|
return Object.keys(result).length > 0 ? result : undefined;
|
|
70
70
|
}
|
|
71
71
|
/**
|
|
72
|
-
*
|
|
72
|
+
* Standard list envelope: `{items, total, returned, limit, offset, has_more}`.
|
|
73
|
+
* If the backend already returns a wrapper, `total/limit/offset` are passed
|
|
74
|
+
* through; otherwise they're synthesized from the items array. `returned` and
|
|
75
|
+
* `has_more` are always CLI-computed so agents can detect truncation without
|
|
76
|
+
* counting items themselves.
|
|
77
|
+
*
|
|
78
|
+
* The envelope itself bypasses leanJson (`preProjected: true` at the call
|
|
79
|
+
* site) so the wrapper keys are stable even on empty lists — leanJson would
|
|
80
|
+
* otherwise drop `items: []`. Per-item lean-stripping is applied here so
|
|
81
|
+
* agents still get the lean shape inside the envelope, unless the caller has
|
|
82
|
+
* already projected items to a known shape (`preProjectedItems: true`).
|
|
83
|
+
*/
|
|
84
|
+
function wrapList(items, existing, opts = {}) {
|
|
85
|
+
const returned = items.length;
|
|
86
|
+
const total = typeof existing?.total === "number" ? existing.total : returned;
|
|
87
|
+
const limit = typeof existing?.limit === "number" ? existing.limit : returned;
|
|
88
|
+
const offset = typeof existing?.offset === "number" ? existing.offset : 0;
|
|
89
|
+
const has_more = total > offset + returned;
|
|
90
|
+
const leanItems = _verbose || opts.preProjectedItems
|
|
91
|
+
? items
|
|
92
|
+
: leanJson(items) ?? [];
|
|
93
|
+
return { items: leanItems, total, returned, limit, offset, has_more };
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Detect a paginated list wrapper: `{items, total?, returned?, limit?, offset?, has_more?}`.
|
|
73
97
|
* Used so `--fields` filters per-item shape without dropping pagination metadata.
|
|
74
98
|
*/
|
|
75
99
|
function isListWrapper(data) {
|
|
@@ -198,6 +222,21 @@ export class ValidationError extends Error {
|
|
|
198
222
|
this.name = "ValidationError";
|
|
199
223
|
}
|
|
200
224
|
}
|
|
225
|
+
/**
|
|
226
|
+
* Pull a typed-error detail out of an ApiError body. Backend convention is
|
|
227
|
+
* HTTPException(detail={error_code, ...fields}), which FastAPI serialises as
|
|
228
|
+
* {"detail": {error_code, ...fields}}. Returns undefined when the body isn't
|
|
229
|
+
* shaped that way (e.g. plain string detail, or 422 validation arrays).
|
|
230
|
+
*/
|
|
231
|
+
function structuredDetail(err) {
|
|
232
|
+
if (!err.body || typeof err.body !== "object")
|
|
233
|
+
return undefined;
|
|
234
|
+
const detail = err.body.detail;
|
|
235
|
+
if (detail && typeof detail === "object" && !Array.isArray(detail) && "error_code" in detail) {
|
|
236
|
+
return detail;
|
|
237
|
+
}
|
|
238
|
+
return undefined;
|
|
239
|
+
}
|
|
201
240
|
/**
|
|
202
241
|
* Map error codes to actionable suggestions so agents can self-recover.
|
|
203
242
|
*/
|
|
@@ -215,6 +254,14 @@ function suggestionsForError(err) {
|
|
|
215
254
|
];
|
|
216
255
|
case "insufficient_credits":
|
|
217
256
|
return ["Purchase more credits at https://app.ishlabs.io"];
|
|
257
|
+
case "usage_limit_reached": {
|
|
258
|
+
const d = structuredDetail(err);
|
|
259
|
+
const upgradeUrl = typeof d?.upgrade_url === "string" ? d.upgrade_url : "https://app.ishlabs.io/billing";
|
|
260
|
+
return [
|
|
261
|
+
`Upgrade your plan at ${upgradeUrl}`,
|
|
262
|
+
"Run `ish docs get-page reference/billing-limits` for the tier table",
|
|
263
|
+
];
|
|
264
|
+
}
|
|
218
265
|
case "validation_error":
|
|
219
266
|
return ["Check the command help: add --help to see required options"];
|
|
220
267
|
case "rate_limited":
|
|
@@ -262,12 +309,20 @@ export function outputError(err, json) {
|
|
|
262
309
|
const mergedSuggestions = bodySuggestions
|
|
263
310
|
? Array.from(new Set([...bodySuggestions.map(String), ...suggestions]))
|
|
264
311
|
: suggestions;
|
|
312
|
+
const limitDetail = err.error_code === "usage_limit_reached" ? structuredDetail(err) : undefined;
|
|
265
313
|
if (json) {
|
|
266
314
|
console.error(JSON.stringify({
|
|
267
315
|
error: err.message,
|
|
268
316
|
error_code: err.error_code,
|
|
269
317
|
status: err.status,
|
|
270
318
|
retryable: err.retryable,
|
|
319
|
+
...(limitDetail && {
|
|
320
|
+
tier: limitDetail.tier,
|
|
321
|
+
limit: limitDetail.limit,
|
|
322
|
+
current: limitDetail.current,
|
|
323
|
+
max: limitDetail.max,
|
|
324
|
+
upgrade_url: limitDetail.upgrade_url,
|
|
325
|
+
}),
|
|
271
326
|
...(bodyErrors !== undefined && { errors: bodyErrors }),
|
|
272
327
|
...(mergedSuggestions.length > 0 && { suggestions: mergedSuggestions }),
|
|
273
328
|
}));
|
|
@@ -380,22 +435,20 @@ function projectWorkspace(workspace, options = {}) {
|
|
|
380
435
|
return result;
|
|
381
436
|
}
|
|
382
437
|
export function formatWorkspaceList(workspaces, json) {
|
|
383
|
-
if (workspaces.length === 0) {
|
|
384
|
-
if (json)
|
|
385
|
-
console.log("[]");
|
|
386
|
-
else
|
|
387
|
-
console.log("No workspaces.");
|
|
388
|
-
return;
|
|
389
|
-
}
|
|
390
438
|
injectAliases(workspaces, ALIAS_PREFIX.workspace);
|
|
391
439
|
if (json) {
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
440
|
+
// Synthesize pagination metadata: backend returns a flat array, so
|
|
441
|
+
// total/limit/offset reflect what we actually shipped.
|
|
442
|
+
const projected = _verbose
|
|
443
|
+
? workspaces
|
|
444
|
+
: workspaces.map((w) => projectWorkspace(w));
|
|
445
|
+
// preProjectedItems: workspaces went through projectWorkspace which already
|
|
446
|
+
// chose the field set; skip the inner leanJson so created_at survives.
|
|
447
|
+
console.log(jsonOutput(wrapList(projected, undefined, { preProjectedItems: !_verbose }), { preProjected: true }));
|
|
448
|
+
return;
|
|
449
|
+
}
|
|
450
|
+
if (workspaces.length === 0) {
|
|
451
|
+
console.log("No workspaces.");
|
|
399
452
|
return;
|
|
400
453
|
}
|
|
401
454
|
const aliasMap = getAliasMap(ALIAS_PREFIX.workspace);
|
|
@@ -441,16 +494,14 @@ export function formatSiteAccessStatus(summary, json) {
|
|
|
441
494
|
}
|
|
442
495
|
// --- Study formatting ---
|
|
443
496
|
export function formatStudyList(studies, json) {
|
|
444
|
-
if (studies.length === 0) {
|
|
445
|
-
if (json)
|
|
446
|
-
console.log("[]");
|
|
447
|
-
else
|
|
448
|
-
console.log("No studies.");
|
|
449
|
-
return;
|
|
450
|
-
}
|
|
451
497
|
injectAliases(studies, ALIAS_PREFIX.study);
|
|
452
498
|
if (json) {
|
|
453
|
-
|
|
499
|
+
// Backend returns a flat array; synthesize pagination metadata.
|
|
500
|
+
console.log(jsonOutput(wrapList(studies), { preProjected: true }));
|
|
501
|
+
return;
|
|
502
|
+
}
|
|
503
|
+
if (studies.length === 0) {
|
|
504
|
+
console.log("No studies.");
|
|
454
505
|
return;
|
|
455
506
|
}
|
|
456
507
|
const aliasMap = getAliasMap(ALIAS_PREFIX.study);
|
|
@@ -463,9 +514,47 @@ export function formatStudyList(studies, json) {
|
|
|
463
514
|
String(s.tester_count ?? "0"),
|
|
464
515
|
]));
|
|
465
516
|
}
|
|
517
|
+
/**
|
|
518
|
+
* CLI-side sanity check for ALL-ISSUES Issue #2 / backend Pattern Bk2.
|
|
519
|
+
*
|
|
520
|
+
* Backend sometimes reports `status: "failed"` even when results are
|
|
521
|
+
* populated (testers completed, interactions present). Until the backend
|
|
522
|
+
* root-cause is fixed, the CLI surfaces the inconsistency rather than
|
|
523
|
+
* letting agents trust a misleading status field:
|
|
524
|
+
* - JSON: adds a `status_inferred` field (e.g. `completed_with_errors`).
|
|
525
|
+
* Original `status` field preserved so existing consumers can still
|
|
526
|
+
* branch on it.
|
|
527
|
+
* - Human / stderr: a one-line warning describing the mismatch.
|
|
528
|
+
*
|
|
529
|
+
* Returns null when status is consistent; no warning emitted.
|
|
530
|
+
*/
|
|
531
|
+
function detectStudyStatusInconsistency(study) {
|
|
532
|
+
if (study.status !== "failed")
|
|
533
|
+
return null;
|
|
534
|
+
const allTesters = collectTesters(study);
|
|
535
|
+
const completedCount = allTesters.filter((t) => t.status === "completed" || t.status === "complete").length;
|
|
536
|
+
const totalInteractions = allTesters.reduce((sum, t) => sum + t.interactionCount, 0);
|
|
537
|
+
if (completedCount === 0 && totalInteractions === 0)
|
|
538
|
+
return null;
|
|
539
|
+
return {
|
|
540
|
+
inferred: "completed_with_errors",
|
|
541
|
+
reason: `${completedCount}/${allTesters.length} testers completed, ${totalInteractions} total interactions`,
|
|
542
|
+
};
|
|
543
|
+
}
|
|
544
|
+
function emitStatusInconsistencyWarning(inconsistency) {
|
|
545
|
+
process.stderr.write(`Warning: study reports status="failed" but ${inconsistency.reason}. ` +
|
|
546
|
+
`CLI inferring status_inferred="${inconsistency.inferred}". ` +
|
|
547
|
+
`Backend root-cause tracked as Issue #2 (Pattern Bk2).\n`);
|
|
548
|
+
}
|
|
466
549
|
export function formatStudyDetail(study, json, options = {}) {
|
|
550
|
+
const inconsistency = detectStudyStatusInconsistency(study);
|
|
551
|
+
if (inconsistency)
|
|
552
|
+
emitStatusInconsistencyWarning(inconsistency);
|
|
467
553
|
if (json) {
|
|
468
|
-
|
|
554
|
+
const payload = inconsistency
|
|
555
|
+
? { ...study, status_inferred: inconsistency.inferred }
|
|
556
|
+
: study;
|
|
557
|
+
console.log(jsonOutput(payload, options));
|
|
469
558
|
return;
|
|
470
559
|
}
|
|
471
560
|
// Header
|
|
@@ -477,6 +566,12 @@ export function formatStudyDetail(study, json, options = {}) {
|
|
|
477
566
|
modalityParts.push(String(study.content_type));
|
|
478
567
|
modalityParts.push(String(study.status || "draft"), formatDate(study.created_at));
|
|
479
568
|
console.log(modalityParts.join(" · "));
|
|
569
|
+
// Pattern C-followup: surface the modality rationale on `study generate`
|
|
570
|
+
// so agents (and humans) can spot misclassification without re-reading the
|
|
571
|
+
// brief. The field is only set on the immediate generate response.
|
|
572
|
+
if (study.modality_rationale) {
|
|
573
|
+
console.log(`\n Modality rationale: ${String(study.modality_rationale)}`);
|
|
574
|
+
}
|
|
480
575
|
// Assignments
|
|
481
576
|
const assignments = Array.isArray(study.assignments) ? study.assignments : [];
|
|
482
577
|
if (assignments.length > 0) {
|
|
@@ -560,20 +655,41 @@ function buildStudyResultsEnvelope(study) {
|
|
|
560
655
|
answers,
|
|
561
656
|
};
|
|
562
657
|
});
|
|
658
|
+
// CLI-side sanity check (Pattern E / Issue #2). Surface a status_inferred
|
|
659
|
+
// field when the backend reports failed-with-data; agents can branch on
|
|
660
|
+
// either the original status or status_inferred.
|
|
661
|
+
const inconsistency = detectStudyStatusInconsistency(study);
|
|
662
|
+
// Pattern B2 (cli half): per-tester rows expose status + error_message so
|
|
663
|
+
// agents can act on a failed run without re-fetching every tester.
|
|
664
|
+
const failedCount = allTesters.filter((t) => t.status.toLowerCase() === "failed").length;
|
|
665
|
+
const testerRows = allTesters.map((t) => ({
|
|
666
|
+
alias: t.id ? deterministicAlias(ALIAS_PREFIX.tester, t.id) : null,
|
|
667
|
+
name: t.name,
|
|
668
|
+
iteration: t.iterationLabel,
|
|
669
|
+
status: t.status,
|
|
670
|
+
interaction_count: t.interactionCount,
|
|
671
|
+
...(t.errorMessage && { error_message: t.errorMessage }),
|
|
672
|
+
}));
|
|
563
673
|
return {
|
|
564
674
|
study: {
|
|
565
675
|
alias: studyAlias,
|
|
566
676
|
name: study.name || null,
|
|
567
677
|
status: study.status || null,
|
|
678
|
+
...(inconsistency && { status_inferred: inconsistency.inferred }),
|
|
568
679
|
modality: study.modality || null,
|
|
569
680
|
},
|
|
570
681
|
tester_count: allTesters.length,
|
|
571
682
|
completed_count: completedCount,
|
|
683
|
+
failed_count: failedCount,
|
|
572
684
|
sentiment,
|
|
573
685
|
interview_answers: interviewAnswers,
|
|
686
|
+
testers: testerRows,
|
|
574
687
|
};
|
|
575
688
|
}
|
|
576
689
|
export function formatStudyResults(study, json) {
|
|
690
|
+
const inconsistency = detectStudyStatusInconsistency(study);
|
|
691
|
+
if (inconsistency)
|
|
692
|
+
emitStatusInconsistencyWarning(inconsistency);
|
|
577
693
|
if (json) {
|
|
578
694
|
// preProjected: bypass leanJson so the stable envelope keeps documented
|
|
579
695
|
// empty defaults (sentiment: null, interview_answers[].answers: []) rather
|
|
@@ -628,6 +744,16 @@ export function formatStudyResults(study, json) {
|
|
|
628
744
|
parts.length > 0 ? parts.join(", ") : "-",
|
|
629
745
|
];
|
|
630
746
|
}));
|
|
747
|
+
// Pattern B2: list any failure reasons under the table so agents see why
|
|
748
|
+
// a run failed without drilling into `study tester <id>`.
|
|
749
|
+
const failedRows = allTesters.filter((t) => t.status.toLowerCase() === "failed" && t.errorMessage);
|
|
750
|
+
if (failedRows.length > 0) {
|
|
751
|
+
console.log("\nFailed testers:");
|
|
752
|
+
for (const t of failedRows) {
|
|
753
|
+
const alias = t.id ? deterministicAlias(ALIAS_PREFIX.tester, t.id) : t.id;
|
|
754
|
+
console.log(` ${alias} (${t.name}): ${truncate(t.errorMessage, 200)}`);
|
|
755
|
+
}
|
|
756
|
+
}
|
|
631
757
|
console.log("\nRun `ish tester get <id> --json` for full interaction details.");
|
|
632
758
|
}
|
|
633
759
|
}
|
|
@@ -657,6 +783,7 @@ function collectTesters(study) {
|
|
|
657
783
|
name: String(profile?.name || t.instance_name || "Unknown"),
|
|
658
784
|
iterationLabel: iterLabel,
|
|
659
785
|
status: String(t.status || "-"),
|
|
786
|
+
errorMessage: t.error_message ? String(t.error_message) : null,
|
|
660
787
|
interactionCount: interactions.length,
|
|
661
788
|
sentimentCounts,
|
|
662
789
|
interviewAnswers: answers.map((a) => ({
|
|
@@ -683,16 +810,14 @@ function truncate(str, maxLen) {
|
|
|
683
810
|
}
|
|
684
811
|
// --- Iteration formatting ---
|
|
685
812
|
export function formatIterationList(iterations, json) {
|
|
686
|
-
if (iterations.length === 0) {
|
|
687
|
-
if (json)
|
|
688
|
-
console.log("[]");
|
|
689
|
-
else
|
|
690
|
-
console.log("No iterations.");
|
|
691
|
-
return;
|
|
692
|
-
}
|
|
693
813
|
injectAliases(iterations, ALIAS_PREFIX.iteration);
|
|
694
814
|
if (json) {
|
|
695
|
-
|
|
815
|
+
// Backend returns a flat array; synthesize pagination metadata.
|
|
816
|
+
console.log(jsonOutput(wrapList(iterations), { preProjected: true }));
|
|
817
|
+
return;
|
|
818
|
+
}
|
|
819
|
+
if (iterations.length === 0) {
|
|
820
|
+
console.log("No iterations.");
|
|
696
821
|
return;
|
|
697
822
|
}
|
|
698
823
|
const aliasMap = getAliasMap(ALIAS_PREFIX.iteration);
|
|
@@ -727,10 +852,15 @@ export function formatTesterDetail(tester, json) {
|
|
|
727
852
|
}
|
|
728
853
|
}
|
|
729
854
|
const sentimentParts = Object.entries(sentimentCounts).map(([label, count]) => `${count} ${label.toLowerCase()}`);
|
|
855
|
+
const status = String(tester.status || "-");
|
|
856
|
+
const errorMessage = tester.error_message ? String(tester.error_message) : null;
|
|
730
857
|
const display = {
|
|
731
858
|
ID: tester.id || "-",
|
|
732
859
|
Profile: profileName,
|
|
733
|
-
Status:
|
|
860
|
+
Status: status,
|
|
861
|
+
...(errorMessage && status.toLowerCase() === "failed" && {
|
|
862
|
+
Error: errorMessage,
|
|
863
|
+
}),
|
|
734
864
|
Platform: tester.platform || "-",
|
|
735
865
|
Language: tester.language || "-",
|
|
736
866
|
Interactions: `${interactions.length} interactions`,
|
|
@@ -742,24 +872,27 @@ export function formatTesterDetail(tester, json) {
|
|
|
742
872
|
}
|
|
743
873
|
// --- Tester Profile formatting ---
|
|
744
874
|
export function formatTesterProfileList(profiles, json, limit) {
|
|
745
|
-
// The API may return { items: [...], total, limit, offset } or a flat array
|
|
875
|
+
// The API may return { items: [...], total, limit, offset } or a flat array.
|
|
746
876
|
const wrapper = profiles;
|
|
877
|
+
const wasWrapper = !Array.isArray(profiles)
|
|
878
|
+
&& profiles !== null
|
|
879
|
+
&& typeof profiles === "object"
|
|
880
|
+
&& (Array.isArray(wrapper?.items) || Array.isArray(wrapper?.profiles));
|
|
747
881
|
const fullList = Array.isArray(profiles) ? profiles
|
|
748
882
|
: Array.isArray(wrapper?.items) ? wrapper.items
|
|
749
883
|
: Array.isArray(wrapper?.profiles) ? wrapper.profiles
|
|
750
|
-
:
|
|
751
|
-
if (!Array.isArray(fullList) || fullList.length === 0) {
|
|
752
|
-
if (json)
|
|
753
|
-
console.log(JSON.stringify(profiles, null, 2));
|
|
754
|
-
else
|
|
755
|
-
console.log("No tester profiles.");
|
|
756
|
-
return;
|
|
757
|
-
}
|
|
884
|
+
: [];
|
|
758
885
|
// Client-side limit (server may not enforce it)
|
|
759
886
|
const list = limit ? fullList.slice(0, limit) : fullList;
|
|
760
887
|
injectAliases(list, ALIAS_PREFIX.testerProfile);
|
|
761
888
|
if (json) {
|
|
762
|
-
|
|
889
|
+
// Pass through server-provided pagination when present; otherwise synthesize.
|
|
890
|
+
const existing = wasWrapper ? wrapper : undefined;
|
|
891
|
+
console.log(jsonOutput(wrapList(list, existing), { preProjected: true }));
|
|
892
|
+
return;
|
|
893
|
+
}
|
|
894
|
+
if (list.length === 0) {
|
|
895
|
+
console.log("No tester profiles.");
|
|
763
896
|
return;
|
|
764
897
|
}
|
|
765
898
|
printTable(["#", "NAME", "OCCUPATION", "COUNTRY", "GENDER", "AGE"], list.map((p) => [
|
|
@@ -855,16 +988,14 @@ function variantLetter(index) {
|
|
|
855
988
|
return `V${index + 1}`;
|
|
856
989
|
}
|
|
857
990
|
export function formatAskList(asks, json) {
|
|
858
|
-
if (asks.length === 0) {
|
|
859
|
-
if (json)
|
|
860
|
-
console.log("[]");
|
|
861
|
-
else
|
|
862
|
-
console.log("No asks.");
|
|
863
|
-
return;
|
|
864
|
-
}
|
|
865
991
|
injectAliases(asks, ALIAS_PREFIX.ask);
|
|
866
992
|
if (json) {
|
|
867
|
-
|
|
993
|
+
// Backend returns a flat array; synthesize pagination metadata.
|
|
994
|
+
console.log(jsonOutput(wrapList(asks), { preProjected: true }));
|
|
995
|
+
return;
|
|
996
|
+
}
|
|
997
|
+
if (asks.length === 0) {
|
|
998
|
+
console.log("No asks.");
|
|
868
999
|
return;
|
|
869
1000
|
}
|
|
870
1001
|
const aliasMap = getAliasMap(ALIAS_PREFIX.ask);
|
|
@@ -877,9 +1008,72 @@ export function formatAskList(asks, json) {
|
|
|
877
1008
|
a.is_archived ? "yes" : "no",
|
|
878
1009
|
]));
|
|
879
1010
|
}
|
|
1011
|
+
/**
|
|
1012
|
+
* Add denormalized counts to a round so agents don't have to count
|
|
1013
|
+
* `responses[]` via jq/python:
|
|
1014
|
+
* - responses_total: responses.length
|
|
1015
|
+
* - responses_complete: count where status === "completed"
|
|
1016
|
+
* - responses_errored: count where status === "errored" (only if > 0)
|
|
1017
|
+
*/
|
|
1018
|
+
function denormalizeRoundCounts(round) {
|
|
1019
|
+
const responses = Array.isArray(round.responses) ? round.responses : null;
|
|
1020
|
+
if (!responses)
|
|
1021
|
+
return round;
|
|
1022
|
+
let complete = 0;
|
|
1023
|
+
let errored = 0;
|
|
1024
|
+
for (const r of responses) {
|
|
1025
|
+
const status = r.status;
|
|
1026
|
+
if (status === "completed")
|
|
1027
|
+
complete++;
|
|
1028
|
+
else if (status === "errored")
|
|
1029
|
+
errored++;
|
|
1030
|
+
}
|
|
1031
|
+
return {
|
|
1032
|
+
...round,
|
|
1033
|
+
responses_total: responses.length,
|
|
1034
|
+
responses_complete: complete,
|
|
1035
|
+
...(errored > 0 && { responses_errored: errored }),
|
|
1036
|
+
};
|
|
1037
|
+
}
|
|
1038
|
+
/**
|
|
1039
|
+
* Layer denormalized counts onto an ask detail so agents reading
|
|
1040
|
+
* `ask get`, `ask create --wait`, `ask run --wait`, etc. don't need to
|
|
1041
|
+
* count nested arrays:
|
|
1042
|
+
* - testers_count: ask.testers.length
|
|
1043
|
+
* - responses_total: sum across rounds (only when > 0)
|
|
1044
|
+
* - responses_complete: sum across rounds
|
|
1045
|
+
* - responses_errored: sum across rounds (only when > 0)
|
|
1046
|
+
* - rounds[i].responses_total / _complete / _errored
|
|
1047
|
+
*/
|
|
1048
|
+
function denormalizeAskCounts(ask) {
|
|
1049
|
+
const enriched = { ...ask };
|
|
1050
|
+
const testers = Array.isArray(ask.testers) ? ask.testers : null;
|
|
1051
|
+
if (testers)
|
|
1052
|
+
enriched.testers_count = testers.length;
|
|
1053
|
+
const rounds = Array.isArray(ask.rounds) ? ask.rounds : null;
|
|
1054
|
+
if (rounds) {
|
|
1055
|
+
let total = 0;
|
|
1056
|
+
let complete = 0;
|
|
1057
|
+
let errored = 0;
|
|
1058
|
+
enriched.rounds = rounds.map((r) => {
|
|
1059
|
+
const decorated = denormalizeRoundCounts(r);
|
|
1060
|
+
total += decorated.responses_total ?? 0;
|
|
1061
|
+
complete += decorated.responses_complete ?? 0;
|
|
1062
|
+
errored += decorated.responses_errored ?? 0;
|
|
1063
|
+
return decorated;
|
|
1064
|
+
});
|
|
1065
|
+
if (total > 0) {
|
|
1066
|
+
enriched.responses_total = total;
|
|
1067
|
+
enriched.responses_complete = complete;
|
|
1068
|
+
if (errored > 0)
|
|
1069
|
+
enriched.responses_errored = errored;
|
|
1070
|
+
}
|
|
1071
|
+
}
|
|
1072
|
+
return enriched;
|
|
1073
|
+
}
|
|
880
1074
|
export function formatAskDetail(ask, json) {
|
|
881
1075
|
if (json) {
|
|
882
|
-
console.log(jsonOutput(ask));
|
|
1076
|
+
console.log(jsonOutput(denormalizeAskCounts(ask)));
|
|
883
1077
|
return;
|
|
884
1078
|
}
|
|
885
1079
|
console.log(`${ask.name || "Untitled"} (${ask.id || ""})`);
|
|
@@ -923,7 +1117,7 @@ export function formatAskDetail(ask, json) {
|
|
|
923
1117
|
}
|
|
924
1118
|
export function formatRoundDetail(round, json) {
|
|
925
1119
|
if (json) {
|
|
926
|
-
console.log(jsonOutput(round));
|
|
1120
|
+
console.log(jsonOutput(denormalizeRoundCounts(round)));
|
|
927
1121
|
return;
|
|
928
1122
|
}
|
|
929
1123
|
const variants = Array.isArray(round.variants) ? round.variants : [];
|
|
@@ -999,13 +1193,166 @@ function computeVariantStats(round) {
|
|
|
999
1193
|
}
|
|
1000
1194
|
return stats;
|
|
1001
1195
|
}
|
|
1196
|
+
// When tester_profile and tester_profile_snapshot share all overlapping fields
|
|
1197
|
+
// (the common case — snapshot only diverges if the profile was edited after
|
|
1198
|
+
// dispatch), drop the redundant content from the snapshot and keep only the
|
|
1199
|
+
// snapshot-specific metadata. Saves ~500-1000 bytes per tester in JSON output.
|
|
1200
|
+
function dedupeTesterSnapshot(tester) {
|
|
1201
|
+
const tp = tester.tester_profile;
|
|
1202
|
+
const tps = tester.tester_profile_snapshot;
|
|
1203
|
+
if (!tp || !tps)
|
|
1204
|
+
return tester;
|
|
1205
|
+
const shared = Object.keys(tps).filter((k) => k in tp);
|
|
1206
|
+
if (shared.length === 0)
|
|
1207
|
+
return tester;
|
|
1208
|
+
const isEmpty = (v) => {
|
|
1209
|
+
if (v === null || v === undefined)
|
|
1210
|
+
return true;
|
|
1211
|
+
if (Array.isArray(v))
|
|
1212
|
+
return v.length === 0;
|
|
1213
|
+
if (typeof v === "object")
|
|
1214
|
+
return Object.keys(v).length === 0;
|
|
1215
|
+
return false;
|
|
1216
|
+
};
|
|
1217
|
+
const allMatch = shared.every((k) => {
|
|
1218
|
+
const a = tp[k];
|
|
1219
|
+
const b = tps[k];
|
|
1220
|
+
if (isEmpty(a) && isEmpty(b))
|
|
1221
|
+
return true;
|
|
1222
|
+
return JSON.stringify(a) === JSON.stringify(b);
|
|
1223
|
+
});
|
|
1224
|
+
if (!allMatch)
|
|
1225
|
+
return tester;
|
|
1226
|
+
const snapshotOnly = {};
|
|
1227
|
+
for (const k of Object.keys(tps)) {
|
|
1228
|
+
if (!(k in tp))
|
|
1229
|
+
snapshotOnly[k] = tps[k];
|
|
1230
|
+
}
|
|
1231
|
+
return {
|
|
1232
|
+
...tester,
|
|
1233
|
+
tester_profile_snapshot: { ...snapshotOnly, _matches_tester_profile: true },
|
|
1234
|
+
};
|
|
1235
|
+
}
|
|
1236
|
+
// Shape per-variant stats into a machine-readable aggregates object so agents
|
|
1237
|
+
// running A/B tests can read the verdict without parsing prose.
|
|
1238
|
+
function buildAggregates(round, stats) {
|
|
1239
|
+
if (stats.length === 0)
|
|
1240
|
+
return undefined;
|
|
1241
|
+
const wantsPick = !!round.wants_pick;
|
|
1242
|
+
const wantsRatings = !!round.wants_ratings;
|
|
1243
|
+
if (!wantsPick && !wantsRatings)
|
|
1244
|
+
return undefined;
|
|
1245
|
+
const out = {};
|
|
1246
|
+
if (wantsPick) {
|
|
1247
|
+
const picks = {};
|
|
1248
|
+
let topCount = -1;
|
|
1249
|
+
let topLetter = "";
|
|
1250
|
+
let tied = false;
|
|
1251
|
+
for (const s of stats) {
|
|
1252
|
+
picks[s.letter] = s.pickCount;
|
|
1253
|
+
if (s.pickCount > topCount) {
|
|
1254
|
+
topCount = s.pickCount;
|
|
1255
|
+
topLetter = s.letter;
|
|
1256
|
+
tied = false;
|
|
1257
|
+
}
|
|
1258
|
+
else if (s.pickCount === topCount && topCount > 0) {
|
|
1259
|
+
tied = true;
|
|
1260
|
+
}
|
|
1261
|
+
}
|
|
1262
|
+
out.picks = picks;
|
|
1263
|
+
if (topCount > 0) {
|
|
1264
|
+
out.winner = { letter: topLetter, count: topCount, tied };
|
|
1265
|
+
}
|
|
1266
|
+
}
|
|
1267
|
+
if (wantsRatings) {
|
|
1268
|
+
const ratings = {};
|
|
1269
|
+
for (const s of stats) {
|
|
1270
|
+
if (s.ratingCount > 0) {
|
|
1271
|
+
ratings[s.letter] = {
|
|
1272
|
+
mean: Number((s.ratingTotal / s.ratingCount).toFixed(3)),
|
|
1273
|
+
n: s.ratingCount,
|
|
1274
|
+
};
|
|
1275
|
+
}
|
|
1276
|
+
}
|
|
1277
|
+
if (Object.keys(ratings).length > 0)
|
|
1278
|
+
out.ratings = ratings;
|
|
1279
|
+
}
|
|
1280
|
+
return out;
|
|
1281
|
+
}
|
|
1282
|
+
function buildCrossRoundSummary(rounds) {
|
|
1283
|
+
if (rounds.length < 2)
|
|
1284
|
+
return undefined;
|
|
1285
|
+
const entries = [];
|
|
1286
|
+
for (const round of rounds) {
|
|
1287
|
+
const idx = typeof round.order_index === "number" ? round.order_index : 0;
|
|
1288
|
+
const stats = computeVariantStats(round);
|
|
1289
|
+
const aggregates = buildAggregates(round, stats);
|
|
1290
|
+
const entry = {
|
|
1291
|
+
round_number: idx + 1,
|
|
1292
|
+
prompt_preview: truncate(String(round.prompt || ""), 80),
|
|
1293
|
+
};
|
|
1294
|
+
if (aggregates?.picks)
|
|
1295
|
+
entry.picks = aggregates.picks;
|
|
1296
|
+
if (aggregates?.winner)
|
|
1297
|
+
entry.winner = aggregates.winner;
|
|
1298
|
+
if (aggregates?.ratings)
|
|
1299
|
+
entry.ratings = aggregates.ratings;
|
|
1300
|
+
entries.push(entry);
|
|
1301
|
+
}
|
|
1302
|
+
// Per-letter delta from first round → last round, when both have picks.
|
|
1303
|
+
const first = entries[0]?.picks;
|
|
1304
|
+
const last = entries[entries.length - 1]?.picks;
|
|
1305
|
+
let picks_delta;
|
|
1306
|
+
if (first && last) {
|
|
1307
|
+
picks_delta = {};
|
|
1308
|
+
const letters = new Set([
|
|
1309
|
+
...Object.keys(first),
|
|
1310
|
+
...Object.keys(last),
|
|
1311
|
+
]);
|
|
1312
|
+
for (const letter of letters) {
|
|
1313
|
+
picks_delta[letter] = (last[letter] ?? 0) - (first[letter] ?? 0);
|
|
1314
|
+
}
|
|
1315
|
+
}
|
|
1316
|
+
return picks_delta ? { rounds: entries, picks_delta } : { rounds: entries };
|
|
1317
|
+
}
|
|
1002
1318
|
export function formatAskResults(ask, json, roundFilter) {
|
|
1003
1319
|
const rounds = (Array.isArray(ask.rounds) ? ask.rounds : []);
|
|
1004
1320
|
const filtered = roundFilter !== undefined
|
|
1005
1321
|
? rounds.filter((r) => (typeof r.order_index === "number" ? r.order_index : 0) === roundFilter - 1)
|
|
1006
1322
|
: rounds;
|
|
1007
1323
|
if (json) {
|
|
1008
|
-
|
|
1324
|
+
let total = 0;
|
|
1325
|
+
let complete = 0;
|
|
1326
|
+
let errored = 0;
|
|
1327
|
+
const enrichedRounds = filtered.map((round) => {
|
|
1328
|
+
const stats = computeVariantStats(round);
|
|
1329
|
+
const aggregates = buildAggregates(round, stats);
|
|
1330
|
+
const decorated = denormalizeRoundCounts(round);
|
|
1331
|
+
total += decorated.responses_total ?? 0;
|
|
1332
|
+
complete += decorated.responses_complete ?? 0;
|
|
1333
|
+
errored += decorated.responses_errored ?? 0;
|
|
1334
|
+
return aggregates ? { ...decorated, aggregates } : decorated;
|
|
1335
|
+
});
|
|
1336
|
+
const testers = Array.isArray(ask.testers) ? ask.testers : undefined;
|
|
1337
|
+
const dedupedTesters = testers
|
|
1338
|
+
? testers.map((t) => dedupeTesterSnapshot(t))
|
|
1339
|
+
: undefined;
|
|
1340
|
+
const payload = { ...ask, rounds: enrichedRounds };
|
|
1341
|
+
if (dedupedTesters)
|
|
1342
|
+
payload.testers = dedupedTesters;
|
|
1343
|
+
if (testers)
|
|
1344
|
+
payload.testers_count = testers.length;
|
|
1345
|
+
if (total > 0) {
|
|
1346
|
+
payload.responses_total = total;
|
|
1347
|
+
payload.responses_complete = complete;
|
|
1348
|
+
if (errored > 0)
|
|
1349
|
+
payload.responses_errored = errored;
|
|
1350
|
+
}
|
|
1351
|
+
// Pattern H2: include cross-round summary when 2+ rounds exist so agents
|
|
1352
|
+
// don't have to diff two `ask results` calls themselves.
|
|
1353
|
+
const crossRound = buildCrossRoundSummary(filtered);
|
|
1354
|
+
if (crossRound)
|
|
1355
|
+
payload.cross_round_summary = crossRound;
|
|
1009
1356
|
console.log(jsonOutput(payload));
|
|
1010
1357
|
return;
|
|
1011
1358
|
}
|
|
@@ -1065,19 +1412,46 @@ export function formatAskResults(ask, json, roundFilter) {
|
|
|
1065
1412
|
console.log(` ${summary.comment}`);
|
|
1066
1413
|
}
|
|
1067
1414
|
}
|
|
1415
|
+
// Pattern H2: cross-round picks comparison when 2+ rounds exist. Saves
|
|
1416
|
+
// agents from re-running results twice and diffing aggregates by hand.
|
|
1417
|
+
const crossRound = buildCrossRoundSummary(filtered);
|
|
1418
|
+
if (crossRound) {
|
|
1419
|
+
console.log("\nCross-round summary:");
|
|
1420
|
+
const letters = new Set();
|
|
1421
|
+
for (const entry of crossRound.rounds) {
|
|
1422
|
+
for (const letter of Object.keys(entry.picks ?? {}))
|
|
1423
|
+
letters.add(letter);
|
|
1424
|
+
}
|
|
1425
|
+
const headers = ["ROUND", "WINNER", ...Array.from(letters).sort()];
|
|
1426
|
+
const rows = crossRound.rounds.map((entry) => {
|
|
1427
|
+
const winnerCell = entry.winner
|
|
1428
|
+
? entry.winner.tied
|
|
1429
|
+
? `${entry.winner.letter} (tied)`
|
|
1430
|
+
: entry.winner.letter
|
|
1431
|
+
: "-";
|
|
1432
|
+
return [
|
|
1433
|
+
`R${entry.round_number}`,
|
|
1434
|
+
winnerCell,
|
|
1435
|
+
...Array.from(letters).sort().map((letter) => String(entry.picks?.[letter] ?? 0)),
|
|
1436
|
+
];
|
|
1437
|
+
});
|
|
1438
|
+
printTable(headers, rows);
|
|
1439
|
+
if (crossRound.picks_delta) {
|
|
1440
|
+
const deltaParts = Object.entries(crossRound.picks_delta).map(([letter, d]) => `${letter}: ${d > 0 ? "+" : ""}${d}`);
|
|
1441
|
+
console.log(` Δ picks (R1→R${crossRound.rounds.length}): ${deltaParts.join(", ")}`);
|
|
1442
|
+
}
|
|
1443
|
+
}
|
|
1068
1444
|
}
|
|
1069
1445
|
// --- Config formatting ---
|
|
1070
1446
|
export function formatConfigList(configs, json) {
|
|
1071
|
-
if (configs.length === 0) {
|
|
1072
|
-
if (json)
|
|
1073
|
-
console.log("[]");
|
|
1074
|
-
else
|
|
1075
|
-
console.log("No simulation configs.");
|
|
1076
|
-
return;
|
|
1077
|
-
}
|
|
1078
1447
|
injectAliases(configs, ALIAS_PREFIX.config);
|
|
1079
1448
|
if (json) {
|
|
1080
|
-
|
|
1449
|
+
// Backend returns a flat array; synthesize pagination metadata.
|
|
1450
|
+
console.log(jsonOutput(wrapList(configs), { preProjected: true }));
|
|
1451
|
+
return;
|
|
1452
|
+
}
|
|
1453
|
+
if (configs.length === 0) {
|
|
1454
|
+
console.log("No simulation configs.");
|
|
1081
1455
|
return;
|
|
1082
1456
|
}
|
|
1083
1457
|
const aliasMap = getAliasMap(ALIAS_PREFIX.config);
|