@ishlabs/cli 0.12.2 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/chat-config.d.ts +23 -0
- package/dist/commands/chat-config.js +289 -0
- package/dist/commands/chat.js +26 -37
- package/dist/commands/iteration.js +219 -22
- package/dist/commands/profile.js +75 -9
- package/dist/commands/source.js +6 -4
- package/dist/commands/study-analyze.d.ts +41 -0
- package/dist/commands/study-analyze.js +187 -0
- package/dist/commands/study-run.js +359 -30
- package/dist/commands/study-screenshots.d.ts +20 -0
- package/dist/commands/study-screenshots.js +216 -0
- package/dist/commands/study.js +174 -9
- package/dist/commands/workspace.js +35 -2
- package/dist/lib/accessibility-profile.d.ts +12 -0
- package/dist/lib/accessibility-profile.js +136 -0
- package/dist/lib/alias-store.d.ts +1 -0
- package/dist/lib/alias-store.js +1 -0
- package/dist/lib/ask-questions.js +9 -0
- package/dist/lib/billing.d.ts +55 -0
- package/dist/lib/billing.js +77 -0
- package/dist/lib/command-helpers.d.ts +6 -0
- package/dist/lib/command-helpers.js +12 -0
- package/dist/lib/docs.js +1181 -38
- package/dist/lib/enums.d.ts +54 -0
- package/dist/lib/enums.js +100 -0
- package/dist/lib/local-sim/actions.d.ts +2 -1
- package/dist/lib/local-sim/actions.js +88 -13
- package/dist/lib/local-sim/loop.js +49 -19
- package/dist/lib/local-sim/tabs.d.ts +27 -0
- package/dist/lib/local-sim/tabs.js +157 -0
- package/dist/lib/local-sim/types.d.ts +15 -0
- package/dist/lib/modality.d.ts +70 -1
- package/dist/lib/modality.js +323 -17
- package/dist/lib/output.js +61 -4
- package/dist/lib/skill-content.js +397 -19
- package/dist/lib/types.d.ts +6 -1
- package/dist/lib/types.js +1 -1
- package/package.json +1 -1
|
@@ -24,11 +24,13 @@ const VERSION = pkg.version;
|
|
|
24
24
|
* "ish". Hard cap is 1024 chars. Front-load the use case.
|
|
25
25
|
*/
|
|
26
26
|
const SKILL_DESCRIPTION = "Use this skill whenever the user mentions ish, a study, a tester profile, " +
|
|
27
|
-
"a simulation run, an \"ask\", an audience,
|
|
28
|
-
"
|
|
29
|
-
"
|
|
30
|
-
"
|
|
31
|
-
"
|
|
27
|
+
"a simulation run, an \"ask\", an audience, wants to dispatch tests against AI testers, " +
|
|
28
|
+
"or wants to rehearse a conversation between two AI personas (e.g. sales rep vs. " +
|
|
29
|
+
"skeptical buyer, founder vs. investor archetype). Wraps the `ish` CLI for managing " +
|
|
30
|
+
"studies, asks, iterations, tester profiles, chatbot endpoints, and simulation runs " +
|
|
31
|
+
"against the Ish platform. Always start by running `ish docs overview` to load the " +
|
|
32
|
+
"domain model, then `ish docs list` and `ish docs get-page <slug>` for specifics. " +
|
|
33
|
+
"Prefer this skill over guessing flags from `ish --help`.";
|
|
32
34
|
const SKILL_BODY = `# ish
|
|
33
35
|
|
|
34
36
|
A CLI for the Ish platform — run user-research studies and quick "ask"
|
|
@@ -78,6 +80,8 @@ Workspace (= product)
|
|
|
78
80
|
│ └── Sources (tps-…) transcripts/audio/images that seed generation
|
|
79
81
|
├── Study (s-…) persistent research artifact
|
|
80
82
|
│ ├── modality interactive | text | video | audio | image | document | chat
|
|
83
|
+
│ │ chat has two modes: external_chatbot (probe a customer bot)
|
|
84
|
+
│ │ and tester_pair (two AI personas converse — rehearsal)
|
|
81
85
|
│ ├── assignments tasks the tester does
|
|
82
86
|
│ ├── questionnaire questions the tester answers
|
|
83
87
|
│ └── Iterations (i-…) one configured run; carries the URL or media
|
|
@@ -93,6 +97,16 @@ Two run verbs:
|
|
|
93
97
|
Use **study** when the tester must *do* something on a real surface;
|
|
94
98
|
use **ask** for quick reactions to text/image variants.
|
|
95
99
|
|
|
100
|
+
**Cold-start caveat — "create a fresh workspace" is conditional on
|
|
101
|
+
quota headroom.** \`workspace_create\` returns
|
|
102
|
+
\`error_code: usage_limit_reached\` the instant the account is at
|
|
103
|
+
\`maxProducts\` (FREE caps at 1). Always inspect with \`workspace_get\`
|
|
104
|
+
first and check the \`has_headroom\` flag per row, or use
|
|
105
|
+
\`ish workspace create --name <name> --ensure\` — idempotent: returns
|
|
106
|
+
the existing workspace by name when one exists, otherwise creates. See
|
|
107
|
+
\`ish docs get-page guides/cold-start\` before producing a
|
|
108
|
+
workspace_create call on a session you haven't already probed.
|
|
109
|
+
|
|
96
110
|
## High-frequency commands
|
|
97
111
|
|
|
98
112
|
\`\`\`bash
|
|
@@ -106,6 +120,11 @@ ish workspace use w-6ec
|
|
|
106
120
|
ish study use s-b2c
|
|
107
121
|
ish ask use a-6ec
|
|
108
122
|
|
|
123
|
+
# Idempotent workspace create — returns existing if name matches.
|
|
124
|
+
# Use this on cold-start instead of a blind workspace_create that may
|
|
125
|
+
# hit usage_limit_reached. See \`ish docs get-page guides/cold-start\`.
|
|
126
|
+
ish workspace create --name "Acme — onboarding" --ensure
|
|
127
|
+
|
|
109
128
|
# Inspect
|
|
110
129
|
ish workspace list
|
|
111
130
|
ish study list
|
|
@@ -127,13 +146,31 @@ ish iteration create --url https://example.com # auto-uploads local files
|
|
|
127
146
|
|
|
128
147
|
ish profile generate --description "..." --count 5
|
|
129
148
|
|
|
130
|
-
# Chat modality (talk to a customer chatbot).
|
|
131
|
-
# study run; study create defines the persistent shape only.
|
|
149
|
+
# Chat modality (external_chatbot — talk to a customer chatbot).
|
|
150
|
+
# Audience size lives on study run; study create defines the persistent shape only.
|
|
132
151
|
ish chat endpoint init --from-curl ./bot.curl --name my-bot
|
|
133
152
|
ish chat endpoint test my-bot -m "Hello"
|
|
134
153
|
ish study create --modality chat --endpoint my-bot --assignment "Sign up:Try to sign up"
|
|
135
154
|
# (then) ish study run --sample 5 --wait
|
|
136
155
|
|
|
156
|
+
# Chat modality (tester_pair — rehearse a conversation between two AI personas).
|
|
157
|
+
# Audiences are pinned to the iteration; study run refuses run-time audience
|
|
158
|
+
# overrides. Each side accepts EITHER explicit profiles OR a role-criteria
|
|
159
|
+
# filter (or both — criteria validates the explicit list).
|
|
160
|
+
ish study create --modality chat --chat-mode tester_pair --name "Pitch rehearsal" \\
|
|
161
|
+
--audience-a tp-sales-1,tp-sales-2 --audience-b tp-cto-skeptic-1,tp-cto-skeptic-2 \\
|
|
162
|
+
--scenario-a @./sales_rep.md --scenario-b @./skeptical_cto.md \\
|
|
163
|
+
--assignment "Pitch:Try to win the meeting"
|
|
164
|
+
# (then) ish study run -y
|
|
165
|
+
|
|
166
|
+
# Criteria-driven variant — backend resolves the eligible pool per side.
|
|
167
|
+
# Persona-first: the persona is sacred, criteria filter who plays the role.
|
|
168
|
+
ish study create --modality chat --chat-mode tester_pair --name "Pitch rehearsal" \\
|
|
169
|
+
--role-criteria-a '{"occupation":["sales"],"min_age":28}' \\
|
|
170
|
+
--role-criteria-b '{"occupation":["cto","vp engineering"],"country":["US","SE"]}' \\
|
|
171
|
+
--scenario-a @./sales_rep.md --scenario-b @./skeptical_cto.md \\
|
|
172
|
+
--assignment "Pitch:Try to land a pilot"
|
|
173
|
+
|
|
137
174
|
# Run
|
|
138
175
|
ish study run --sample 5 --country SE --wait
|
|
139
176
|
ish ask run --new --name "..." --prompt "..." --variant text:"A" --variant text:"B" --sample 30 --wants-pick --wait
|
|
@@ -147,6 +184,21 @@ ish ask dispatch a-6ec --wait
|
|
|
147
184
|
ish study results
|
|
148
185
|
ish ask results a-6ec --round 1
|
|
149
186
|
|
|
187
|
+
# AI summary + key insights (any modality with completed testers)
|
|
188
|
+
ish study analyze --wait # trigger + block
|
|
189
|
+
ish study insights # read latest
|
|
190
|
+
|
|
191
|
+
# Screenshots (interactive studies — see what testers actually saw)
|
|
192
|
+
ish study screenshots # list, frame-grouped
|
|
193
|
+
ish study screenshots download <study-id> --id <scid> --out shot.png
|
|
194
|
+
ish study screenshots download <study-id> --all --out ./shots/
|
|
195
|
+
|
|
196
|
+
# Chat configurations (model + system prompt + tools per chatbot endpoint)
|
|
197
|
+
ish chat config list # active endpoint
|
|
198
|
+
ish chat config set --name v1 --model claude-sonnet-4-6 \\
|
|
199
|
+
--system-prompt-file ./prompt.txt --default
|
|
200
|
+
ish chat config get cc-abc --view iterations # cross-study use
|
|
201
|
+
|
|
150
202
|
# Read offline docs
|
|
151
203
|
ish docs overview
|
|
152
204
|
ish docs get-page <slug>
|
|
@@ -222,6 +274,14 @@ implies \`--quiet\` so the bare value is the only thing on stdout.
|
|
|
222
274
|
- **List responses are a six-key envelope:** \`{items, total, returned,
|
|
223
275
|
limit, offset, has_more}\`. Use \`has_more\` to detect truncation;
|
|
224
276
|
don't count items yourself.
|
|
277
|
+
- **\`study\` JSON includes a \`url\` field.** \`study create / generate /
|
|
278
|
+
get / list / run\` each return a top-level \`url\` (per item on
|
|
279
|
+
\`list\`) pointing to the study in the web app — \`overview\` for
|
|
280
|
+
read/write commands, \`timeline\` for \`study run\`. Surface it to
|
|
281
|
+
the user instead of composing \`<host>/<workspace>/<study>/...\`
|
|
282
|
+
yourself. Host follows the active backend (\`app.ishlabs.io\` on
|
|
283
|
+
production, \`localhost:3000\` under \`--dev\`); override with the
|
|
284
|
+
\`ISH_APP_URL\` env var.
|
|
225
285
|
- **Use \`runtime_status\`, not \`status\`, on study responses.** Values:
|
|
226
286
|
\`draft | running | completed | completed_with_errors | cancelled\`.
|
|
227
287
|
Derived from iteration testers' actual state — never reports
|
|
@@ -314,6 +374,33 @@ implies \`--quiet\` so the bare value is the only thing on stdout.
|
|
|
314
374
|
are accepted anywhere a UUID is. See
|
|
315
375
|
\`ish docs get-page reference/aliases\`.
|
|
316
376
|
|
|
377
|
+
## Credits & cost preview
|
|
378
|
+
|
|
379
|
+
Every dispatched run costs **credits**. The CLI surfaces an upper-bound
|
|
380
|
+
estimate *before* you dispatch so you can budget:
|
|
381
|
+
|
|
382
|
+
- **Human output** — \`study run\` shows a \`Scale:\` + \`Credits (est):\`
|
|
383
|
+
line in the confirmation block (skipped under \`--yes\` or \`--json\`).
|
|
384
|
+
- **JSON output** — \`study run --json\` includes a \`credit_estimate\`
|
|
385
|
+
field. For tester-pair chat it nests under \`pair_preview\`; for
|
|
386
|
+
solo/media runs it's top-level. Shape:
|
|
387
|
+
\`{ upper_bound: number, formula: "media_per_tester" | "chat_solo" |
|
|
388
|
+
"chat_pair" | "ask_per_response", breakdown: string, unit: "credits" }\`.
|
|
389
|
+
- **\`formula\` is stable** — agents can branch on it.
|
|
390
|
+
|
|
391
|
+
Today every modality uses \`max(1, round(N / 10))\` per principal
|
|
392
|
+
(per tester for media/interactive, per side per conversation for chat,
|
|
393
|
+
×2 for tester-pair). Asks bill flat **1 credit per successful response**.
|
|
394
|
+
Insights cost **10 credits flat** (first per-study is free).
|
|
395
|
+
|
|
396
|
+
If you exceed the available budget at dispatch time, the backend rejects
|
|
397
|
+
with HTTP 402 / \`error_code: "insufficient_credits"\`. The envelope
|
|
398
|
+
carries \`required\`, \`available\`, \`upgrade_url\`. Don't retry — surface
|
|
399
|
+
the upgrade link.
|
|
400
|
+
|
|
401
|
+
The full table (per-modality rates, tier allotments, error envelope)
|
|
402
|
+
lives in \`ish docs get-page reference/credits\`.
|
|
403
|
+
|
|
317
404
|
## Common pitfalls (don't do these)
|
|
318
405
|
|
|
319
406
|
1. **Don't paste flags from memory.** The CLI evolves; flags change.
|
|
@@ -348,12 +435,70 @@ implies \`--quiet\` so the bare value is the only thing on stdout.
|
|
|
348
435
|
See \`ish docs get-page concepts/site-access\`.
|
|
349
436
|
7. **Don't commit \`~/.ish/config.json\`** — it stores tokens and active
|
|
350
437
|
workspace/study/ask selections. It lives in \`$HOME\`, not the repo.
|
|
351
|
-
8. **Don't
|
|
352
|
-
(\`
|
|
353
|
-
\`
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
438
|
+
8. **Don't pass run-time audience flags to a tester_pair chat iteration.**
|
|
439
|
+
Pair iterations carry their own audiences (\`audience_a\` /
|
|
440
|
+
\`audience_b\` inside \`details.mode_details\`); \`ish study run\`
|
|
441
|
+
refuses \`--profile\` / \`--sample\` / \`--all\` / demographic filters
|
|
442
|
+
on them. To change audiences, update the iteration via
|
|
443
|
+
\`ish iteration update <id> --details-json '{...}'\`. When both sides
|
|
444
|
+
ship explicit \`--audience-a\` / \`--audience-b\` lists, lengths must
|
|
445
|
+
match (1:1 by index) — or use \`--role-criteria-a/-b\` and let the
|
|
446
|
+
backend resolve a pool.
|
|
447
|
+
9. **Don't cram demographic constraints into \`scenario_a/_b\` text.**
|
|
448
|
+
Demographics (occupation, age, country, gender) belong in
|
|
449
|
+
\`--role-criteria-a/-b\` so the persona stays sacred — filtering
|
|
450
|
+
happens upstream of the prompt. Scenario text is for voice, goal,
|
|
451
|
+
and knowledge of the role, not for who plays it. Mixing the two
|
|
452
|
+
breaks the asymmetry contract and produces incoherent characters.
|
|
453
|
+
10. **Don't retry \`usage_limit_reached\` errors.** Tier caps
|
|
454
|
+
(\`maxProducts\`, \`maxStudiesPerProduct\`, \`maxIterationsPerStudy\`,
|
|
455
|
+
\`maxCustomTesterProfiles\`) are enforced server-side. The error body
|
|
456
|
+
carries \`tier\`, \`limit\`, \`current\`, \`max\`, \`upgrade_url\` — show
|
|
457
|
+
the upgrade link or delete an existing resource to free headroom.
|
|
458
|
+
See \`ish docs get-page reference/billing-limits\` for the table.
|
|
459
|
+
11. **Don't retry \`insufficient_credits\` errors either.** HTTP 402,
|
|
460
|
+
non-retryable. Read the \`credit_estimate\` field on \`study run --json\`
|
|
461
|
+
*before* dispatching to know what you'll spend; if the error fires
|
|
462
|
+
after, surface \`required\` / \`available\` / \`upgrade_url\` to the
|
|
463
|
+
human. See \`ish docs get-page reference/credits\`.
|
|
464
|
+
12. **Don't dispatch interactive/media runs without thinking about
|
|
465
|
+
\`--max-interactions\`.** \`ish study run\` defaults to a 20-step
|
|
466
|
+
cap (flag > iteration's stored value > 20), which is the right
|
|
467
|
+
answer for most onboarding/landing-page probes. Raise it
|
|
468
|
+
(\`--max-interactions 50\`) when testers genuinely need to roam
|
|
469
|
+
further; lower it (\`--max-interactions 5\`) for a smoke probe
|
|
470
|
+
against a surface you suspect is broken — a stuck tester on a
|
|
471
|
+
non-responsive page will otherwise burn the full cap before the
|
|
472
|
+
SDK gives up. The confirmation block prints the resolved value
|
|
473
|
+
and where it came from. Credits debit per
|
|
474
|
+
\`max(1, round(steps/10))\` per tester; see
|
|
475
|
+
\`ish docs get-page reference/credits\`.
|
|
476
|
+
13. **Don't call \`workspace_create\` blind on a cold start.** On a
|
|
477
|
+
saturated account it returns \`error_code: usage_limit_reached\`
|
|
478
|
+
immediately — the dogfood account hits this on the first call.
|
|
479
|
+
Always call \`workspace_get\` (or \`ish workspace list --json\`)
|
|
480
|
+
first and inspect \`has_headroom\` per row; if any existing
|
|
481
|
+
workspace fits the work, use it via \`ish workspace use <id>\`.
|
|
482
|
+
To programmatically reuse-or-create idempotently, prefer
|
|
483
|
+
\`ish workspace create --name <name> --ensure\` — returns the existing
|
|
484
|
+
workspace owned by the caller when the name matches, otherwise
|
|
485
|
+
creates a fresh one. Same response shape either way, so the
|
|
486
|
+
agent doesn't branch on success vs. reuse. See
|
|
487
|
+
\`ish docs get-page guides/cold-start\`.
|
|
488
|
+
14. **Don't trust \`occupation\` filters as whole-token matches.**
|
|
489
|
+
\`audience_build\` treats \`occupation\` as a **loose,
|
|
490
|
+
case-insensitive substring** — \`occupation=["manager"]\` matches
|
|
491
|
+
hotel managers, retail managers, bank branch managers, not just
|
|
492
|
+
the engineering managers you probably wanted. Two recovery
|
|
493
|
+
paths: enumerate the role surface explicitly
|
|
494
|
+
(\`occupation=["engineering manager", "software engineering
|
|
495
|
+
manager", "vp engineering", "tech lead"]\`) or read
|
|
496
|
+
\`match_preview\` on the \`audience_build\` response and iterate
|
|
497
|
+
on the filter before \`ask_run\` / \`study_run\`. The public
|
|
498
|
+
profile pool skews non-tech / non-Western, so even a precise
|
|
499
|
+
filter may resolve to a small count — preview before dispatching
|
|
500
|
+
a run that depends on reaching N matches. See
|
|
501
|
+
\`ish docs get-page concepts/audience\`.
|
|
357
502
|
|
|
358
503
|
## Authentication
|
|
359
504
|
|
|
@@ -532,6 +677,21 @@ ish iteration create --url "$URL"
|
|
|
532
677
|
|
|
533
678
|
## 7. Chat-modality study (drive a chatbot endpoint)
|
|
534
679
|
|
|
680
|
+
The chat modality has **two modes**, picked by
|
|
681
|
+
\`iteration.details.mode_details.mode\`:
|
|
682
|
+
|
|
683
|
+
- **\`external_chatbot\`** — testers probe a customer chatbot endpoint
|
|
684
|
+
(the original chat behaviour). Audience size is set on \`study run\`.
|
|
685
|
+
- **\`tester_pair\`** — two AI tester audiences converse with each
|
|
686
|
+
other. Each side has its own scenario + goal; the other side does
|
|
687
|
+
not see it (asymmetry contract). Audiences are pinned to the
|
|
688
|
+
iteration: equal counts zip 1:1 by index, or one side of 1
|
|
689
|
+
broadcasts across the other (1 × N → N conversations). Useful for rehearsing
|
|
690
|
+
a sales call, a fundraising chat, a difficult conversation, or any
|
|
691
|
+
two-role scenario before it happens. See section 7b below.
|
|
692
|
+
|
|
693
|
+
### 7a. external_chatbot — drive a customer chatbot endpoint
|
|
694
|
+
|
|
535
695
|
Goal: configure a customer chatbot endpoint, smoke test it, and run
|
|
536
696
|
a chat-modality study end to end. The CLI talks to the endpoint
|
|
537
697
|
through whatever transport it's configured for (sync / async-poll /
|
|
@@ -622,6 +782,215 @@ you can branch on plan caps before \`study create\` returns
|
|
|
622
782
|
The full reference is at \`ish docs get-page guides/chat\`,
|
|
623
783
|
secrets are at \`ish docs get-page concepts/secret\`.
|
|
624
784
|
|
|
785
|
+
### 7b. tester_pair — rehearse a two-AI conversation
|
|
786
|
+
|
|
787
|
+
Goal: pit two AI tester audiences against each other to see how a
|
|
788
|
+
two-role conversation unfolds — a sales rep vs. a skeptical CTO, a
|
|
789
|
+
founder vs. an investor archetype, a manager vs. a direct report
|
|
790
|
+
ahead of a difficult conversation. Each side has its own scenario
|
|
791
|
+
and goal; the other side does NOT see it (the asymmetry contract is
|
|
792
|
+
what makes the rehearsal credible).
|
|
793
|
+
|
|
794
|
+
One-shot study + iteration:
|
|
795
|
+
|
|
796
|
+
\`\`\`bash
|
|
797
|
+
ish study create --modality chat --chat-mode tester_pair \\
|
|
798
|
+
--name "Pitch rehearsal" \\
|
|
799
|
+
--audience-a tp-sales-1,tp-sales-2 \\
|
|
800
|
+
--audience-b tp-cto-skeptic-1,tp-cto-skeptic-2 \\
|
|
801
|
+
--scenario-a "You are a senior sales rep pitching ish to a new prospect." \\
|
|
802
|
+
--scenario-b "You are a skeptical CTO; surface risks before agreeing to a pilot." \\
|
|
803
|
+
--assignment "Pitch:Try to land a pilot"
|
|
804
|
+
|
|
805
|
+
ish study run -y
|
|
806
|
+
\`\`\`
|
|
807
|
+
|
|
808
|
+
Or add a pair iteration to an existing chat study:
|
|
809
|
+
|
|
810
|
+
\`\`\`bash
|
|
811
|
+
ish iteration create --study s-... --chat-mode tester_pair \\
|
|
812
|
+
--audience-a tp-a1,tp-a2 --audience-b tp-b1,tp-b2 \\
|
|
813
|
+
--scenario-a @./scenario_a.md --scenario-b @./scenario_b.md \\
|
|
814
|
+
--max-turns 14
|
|
815
|
+
\`\`\`
|
|
816
|
+
|
|
817
|
+
Rules to remember:
|
|
818
|
+
- Each side needs **either** \`--profile-*\` (explicit IDs) **or**
|
|
819
|
+
\`--role-criteria-*\` (a filter the backend resolves). They can also
|
|
820
|
+
be combined — criteria then validates the explicit list.
|
|
821
|
+
- When **both sides** use explicit \`--audience-a\` / \`--audience-b\`, they
|
|
822
|
+
must be the same length (≥ 1). Pairs run 1:1 by index. Same profile
|
|
823
|
+
on both sides is allowed (self-talk rehearsal).
|
|
824
|
+
- **1×N broadcast**: pass exactly one profile on one side and N on
|
|
825
|
+
the other to rehearse one fixed side against N variations. The CLI
|
|
826
|
+
auto-broadcasts the singleton to match. E.g.
|
|
827
|
+
\`--audience-a tp-rep --audience-b tp-cto1,tp-cto2,tp-cto3\` → 3
|
|
828
|
+
conversations, same rep, three different CTOs. Stderr notice fires
|
|
829
|
+
when broadcasting kicks in.
|
|
830
|
+
- Both \`--scenario-a\` and \`--scenario-b\` are required and asymmetric.
|
|
831
|
+
Use \`@./file.md\` to read from disk.
|
|
832
|
+
- \`--initiator-side\` (\`a\` default) picks who speaks first.
|
|
833
|
+
- \`--chat-mode\` accepts both \`tester_pair\` and \`tester-pair\`.
|
|
834
|
+
The same hyphen/underscore tolerance applies to \`--screen-format\`,
|
|
835
|
+
\`--kind\` on \`source upload\`, and the question \`type\` field in
|
|
836
|
+
\`--questionnaire\` / \`--questions\` manifests.
|
|
837
|
+
- Audiences are **authoritative on the iteration**.
|
|
838
|
+
\`ish study run\` refuses \`--profile\` / \`--sample\` / \`--all\` /
|
|
839
|
+
demographic filters on a pair iteration with a clear error. To
|
|
840
|
+
change audiences, update the iteration via
|
|
841
|
+
\`ish iteration update <id> --details-json '{...}'\`.
|
|
842
|
+
- \`--max-turns\` / \`--early-termination\` on \`study run\` override the
|
|
843
|
+
iteration's saved values for that single dispatch (they don't
|
|
844
|
+
persist back to the iteration).
|
|
845
|
+
- Dispatch is per-Conversation (one task per pair). Per-Conversation
|
|
846
|
+
summaries (\`end_reason\`, \`dominant_dynamic\`, \`who_steered\`) land on
|
|
847
|
+
\`iteration.conversations[]\`. Per-tester summaries land on
|
|
848
|
+
\`tester.summary\` as before.
|
|
849
|
+
|
|
850
|
+
### Filtering audiences with role criteria (persona-first)
|
|
851
|
+
|
|
852
|
+
\`--role-criteria-a\` / \`--role-criteria-b\` accept a JSON object (or
|
|
853
|
+
\`@./file.json\`) describing who's eligible for that side. The
|
|
854
|
+
backend resolves the matching tester-profile pool and persists the
|
|
855
|
+
IDs on the iteration. Keys (all optional):
|
|
856
|
+
|
|
857
|
+
\`\`\`json
|
|
858
|
+
{
|
|
859
|
+
"occupation": ["founder", "ceo"],
|
|
860
|
+
"min_age": 28, "max_age": 55,
|
|
861
|
+
"gender": ["female", "male"],
|
|
862
|
+
"country": ["US", "SE"],
|
|
863
|
+
"education_level_in": ["bachelor", "graduate"],
|
|
864
|
+
"household_in": ["couple_with_kids", "single_parent"],
|
|
865
|
+
"locale_type_in": ["urban", "suburban"],
|
|
866
|
+
"income_level_in": ["middle", "upper_middle", "upper"],
|
|
867
|
+
"employment_status_in": ["employed_full_time", "self_employed"],
|
|
868
|
+
"requires_captions": false,
|
|
869
|
+
"uses_screen_reader": false,
|
|
870
|
+
"prefers_reduced_motion": false,
|
|
871
|
+
"prefers_high_contrast": false,
|
|
872
|
+
"has_any_accessibility_need": false
|
|
873
|
+
}
|
|
874
|
+
\`\`\`
|
|
875
|
+
|
|
876
|
+
The five \`*_in\` arrays accept snake_case spec values verbatim
|
|
877
|
+
(see \`https://ishlabs.io/spec/profile-enums.v1.json\`). The five
|
|
878
|
+
accessibility filters are coarse booleans over each tester's
|
|
879
|
+
\`accessibility_profile\` JSONB.
|
|
880
|
+
|
|
881
|
+
MECE rules for the list filters:
|
|
882
|
+
- \`household_in\`: \`couple_with_kids\` covers couples raising
|
|
883
|
+
children; \`couple_no_kids\` is strictly child-free. \`single\` means
|
|
884
|
+
lives alone with no partner, roommates, parents, or children
|
|
885
|
+
sharing the household.
|
|
886
|
+
- \`employment_status_in\`: pick the tester's primary daytime
|
|
887
|
+
activity. A student who works 15 hrs/week is \`student\`; a retiree
|
|
888
|
+
who freelances is \`retired\`.
|
|
889
|
+
|
|
890
|
+
The **persona-first** principle: the tester's persona is sacred and
|
|
891
|
+
the LLM prompt construction does not change. Criteria filter the
|
|
892
|
+
*eligible pool* upstream so that by the time a tester reaches the
|
|
893
|
+
prompt, their persona is already plausible for the role described
|
|
894
|
+
in \`scenario_*\`. Don't cram demographic constraints into the
|
|
895
|
+
scenario text — that breaks the asymmetry contract and produces
|
|
896
|
+
incoherent characters (a retired farmer suddenly "pitching a
|
|
897
|
+
Series A"). Scenarios describe voice / goal / knowledge; criteria
|
|
898
|
+
pick who plays the role.
|
|
899
|
+
|
|
900
|
+
If the resolved pool is smaller than the requested count for a side,
|
|
901
|
+
\`ish study run\` exits 2 with the backend's pool-too-small error
|
|
902
|
+
intact. Broaden the criteria, generate more profiles
|
|
903
|
+
(\`ish profile generate\`), or fall back to explicit \`--profile-*\`.
|
|
904
|
+
|
|
905
|
+
### Rehearsing against N variations of one side (1×N)
|
|
906
|
+
|
|
907
|
+
The most common rehearsal shape: fix one side, vary the other.
|
|
908
|
+
"Pitch this once and see how 3 different CTOs respond." Step-by-step:
|
|
909
|
+
|
|
910
|
+
\`\`\`bash
|
|
911
|
+
# 1. Generate N distinct profiles for the varying side (or pick
|
|
912
|
+
# existing ones via \`ish profile list\`).
|
|
913
|
+
ish profile generate \\
|
|
914
|
+
--description "Skeptical CTO at a Series B SaaS startup" \\
|
|
915
|
+
--count 3 --json | jq -r '.items[].alias'
|
|
916
|
+
# → tp-cto1, tp-cto2, tp-cto3
|
|
917
|
+
|
|
918
|
+
# 2. Write the two scenarios as separate files. Each is a system
|
|
919
|
+
# prompt for ONE role; the partner never sees it. Cover voice,
|
|
920
|
+
# knowledge, asymmetry, success criteria. NO demographics in the
|
|
921
|
+
# text — that's --role-criteria-*'s job. See "Writing scenarios
|
|
922
|
+
# that produce signal" below for the template.
|
|
923
|
+
#
|
|
924
|
+
# ./sales_rep.md — the user's pitch + goals
|
|
925
|
+
# ./skeptical_cto.md — CTO's posture + concerns
|
|
926
|
+
|
|
927
|
+
# 3. Create the iteration with ONE profile on the fixed side and
|
|
928
|
+
# N on the varying side. CLI auto-broadcasts the singleton and
|
|
929
|
+
# prints a stderr notice ("Broadcasting --audience-a (1 profile)
|
|
930
|
+
# to length 3…") so you see the expansion.
|
|
931
|
+
ish study create \\
|
|
932
|
+
--modality chat --chat-mode tester_pair \\
|
|
933
|
+
--name "Pitch rehearsal — 3 CTO variants" \\
|
|
934
|
+
--audience-a tp-rep \\
|
|
935
|
+
--audience-b tp-cto1,tp-cto2,tp-cto3 \\
|
|
936
|
+
--scenario-a @./sales_rep.md \\
|
|
937
|
+
--scenario-b @./skeptical_cto.md \\
|
|
938
|
+
--assignment "Pitch:Land a pilot or a clear next step"
|
|
939
|
+
|
|
940
|
+
# 4. Dispatch + wait.
|
|
941
|
+
ish study run -y --wait
|
|
942
|
+
|
|
943
|
+
# 5. Compare per-conversation outcomes:
|
|
944
|
+
ish iteration get <iter-id> --json \\
|
|
945
|
+
| jq '.conversations[] | {pair_index, end_reason,
|
|
946
|
+
dynamic: .summary.dominant_dynamic}'
|
|
947
|
+
\`\`\`
|
|
948
|
+
|
|
949
|
+
The CLI emits a stderr notice when it broadcasts ("Broadcasting
|
|
950
|
+
--audience-a (1 profile) to length 3…") so you can see the
|
|
951
|
+
expansion happen.
|
|
952
|
+
|
|
953
|
+
**Criteria alternative**: \`--role-criteria-b '{"occupation":["cto"]}'\`
|
|
954
|
+
on a single \`--audience-a tp-rep\` lets the backend pick the CTOs.
|
|
955
|
+
Less control over distinctness — for guaranteed variety, generate
|
|
956
|
+
explicit profiles first.
|
|
957
|
+
|
|
958
|
+
### Writing scenarios that produce signal
|
|
959
|
+
|
|
960
|
+
Thin scenarios produce thin rehearsals. Each scenario is injected as
|
|
961
|
+
role-playing context for **its own side only** — the partner does NOT
|
|
962
|
+
see the other side's scenario or goal. Cover five things in each:
|
|
963
|
+
|
|
964
|
+
1. **Role / identity** — who is this person?
|
|
965
|
+
2. **Voice** — how do they speak? Formal, casual, technical, blunt?
|
|
966
|
+
3. **What they know** — context they came in with.
|
|
967
|
+
4. **What they don't know** — the asymmetry that makes it interesting.
|
|
968
|
+
5. **Goal** — what counts as success *for them*.
|
|
969
|
+
|
|
970
|
+
Bad: \`scenario_a: "you are a sales rep"\`. Good (~150 words):
|
|
971
|
+
|
|
972
|
+
\`\`\`
|
|
973
|
+
You are Maya, a senior AE at ish (3 years experience). You speak in
|
|
974
|
+
plain sentences, push back when you disagree, and quantify claims.
|
|
975
|
+
You know this is a 30-min discovery call and you've read the
|
|
976
|
+
prospect's LinkedIn — that's it. You do NOT know their current
|
|
977
|
+
tooling, budget, or politics. Success = leave with a concrete next
|
|
978
|
+
step (pilot, follow-up demo, or a firm "no, because X"). A polite
|
|
979
|
+
"we'll get back to you" is not success.
|
|
980
|
+
\`\`\`
|
|
981
|
+
|
|
982
|
+
Keep each scenario under ~250 words — past that, persona drift
|
|
983
|
+
dominates. Get the full rationale at
|
|
984
|
+
\`ish docs get-page concepts/iteration\` ("Writing a good scenario").
|
|
985
|
+
|
|
986
|
+
Inspect after running:
|
|
987
|
+
|
|
988
|
+
\`\`\`bash
|
|
989
|
+
ish iteration get <iter-id> --json \\
|
|
990
|
+
| jq '.details.mode_details.mode, .conversations[]'
|
|
991
|
+
ish study results <study-id> --transcript <tester-id> --json
|
|
992
|
+
\`\`\`
|
|
993
|
+
|
|
625
994
|
## 8. Stage an ask for human review, then dispatch
|
|
626
995
|
|
|
627
996
|
Goal: prepare a billable A/B but let the user inspect and approve the
|
|
@@ -706,10 +1075,18 @@ If you find yourself reaching for \`jq -r .x\`, you wanted \`--get x\`.
|
|
|
706
1075
|
{alias, status, sentiment, comment}. The cheapest "did this run land?"
|
|
707
1076
|
shape.
|
|
708
1077
|
- \`ish study results --transcript <tester_id> --json\` is the
|
|
709
|
-
chat-modality projection
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
1078
|
+
chat-modality projection — **external_chatbot mode only**. Returns
|
|
1079
|
+
a flat \`transcript[]\` of {role, text, turn_index, action_type?,
|
|
1080
|
+
option_label?, sentiment?, failure?} with a \`unique_bot_replies\`
|
|
1081
|
+
count (1 on a multi-turn run = the M2 loop signature). Same shape
|
|
1082
|
+
as the MCP \`get_chat_transcript\` tool. For tester_pair
|
|
1083
|
+
conversations, fetch \`.conversations[]\` from
|
|
1084
|
+
\`ish iteration get <iter-id> --json\` instead — bot/tester roles
|
|
1085
|
+
don't apply when both speakers are testers.
|
|
1086
|
+
- \`ish study run --json\` on a pair iteration includes a
|
|
1087
|
+
\`pair_preview\` block (audience sizes, conversation count,
|
|
1088
|
+
initiator side, scenario previews) so agents can confirm what
|
|
1089
|
+
they just dispatched without a follow-up \`iteration get\`.
|
|
713
1090
|
- \`ish study tester <id> --summary --json\` drops the action timeline
|
|
714
1091
|
and returns just {tester, sentiment, comment, error_message}.
|
|
715
1092
|
- \`ish ask results --json\` keeps \`variant_pick_id\` on every
|
|
@@ -768,7 +1145,8 @@ If you find yourself reaching for \`jq -r .x\`, you wanted \`--get x\`.
|
|
|
768
1145
|
| List of testers from \`study run\` | \`--json \\| jq '.testers[].id'\` | \`--get tester_aliases\` (or \`tester_ids\` for UUIDs) |
|
|
769
1146
|
| Per-answer sentiment | \`--json \\| jq '...'\` per tester | \`ish study results <id> --json\` (sentiment is on every answer row) |
|
|
770
1147
|
| "Did this run land?" headline | \`study results --json\` + jq filtering | \`ish study results <id> --summary --json\` |
|
|
771
|
-
| Chat transcript for one tester
|
|
1148
|
+
| Chat transcript for one tester (external_chatbot) | \`study tester --json\` + jq | \`ish study results <id> --transcript <tester_id> --json\` |
|
|
1149
|
+
| Pair-mode conversation transcripts | \`study tester --json\` per tester | \`ish iteration get <iter-id> --json \\| jq '.conversations[]'\` |
|
|
772
1150
|
| Tester headline only (no action timeline) | \`study tester --json\` + jq | \`ish study tester <id> --summary --json\` |
|
|
773
1151
|
| Variant pick id on an ask response | \`ask results --json --verbose\` | \`ish ask results a-… --json\` (variant_pick_id is preserved) |
|
|
774
1152
|
|
|
@@ -799,7 +1177,7 @@ ish <command> --help
|
|
|
799
1177
|
| \`profile\` | Tester profiles + audience generation | concepts/profile |
|
|
800
1178
|
| \`source\` | Upload sources for profile generation | concepts/source |
|
|
801
1179
|
| \`config\` | Simulation configs (model, timing, retries) | (run \`ish config --help\`) |
|
|
802
|
-
| \`chat\` | Chat endpoint CRUD + smoke test (
|
|
1180
|
+
| \`chat\` | Chat endpoint CRUD + smoke test (external_chatbot mode); pair-mode iterations created via \`iteration create --chat-mode tester_pair\` | guides/chat |
|
|
803
1181
|
| \`secret\` | Per-workspace secrets (\`{{secret:KEY}}\` resolver) | concepts/secret |
|
|
804
1182
|
| \`docs\` | Offline docs for agents | (run \`ish docs --help\`) |
|
|
805
1183
|
| \`init\` | Drop this skill into a Claude Code / Codex / | (run \`ish init --help\`) |
|
package/dist/lib/types.d.ts
CHANGED
|
@@ -178,7 +178,12 @@ export interface GeneratedProfile {
|
|
|
178
178
|
country?: string | null;
|
|
179
179
|
city?: string | null;
|
|
180
180
|
occupation?: string | null;
|
|
181
|
-
|
|
181
|
+
education_level?: string | null;
|
|
182
|
+
household?: string | null;
|
|
183
|
+
locale_type?: string | null;
|
|
184
|
+
income_level?: string | null;
|
|
185
|
+
employment_status?: string | null;
|
|
186
|
+
accessibility_profile?: Record<string, unknown> | null;
|
|
182
187
|
product_id?: string | null;
|
|
183
188
|
custom_field_values?: Record<string, unknown>;
|
|
184
189
|
[key: string]: unknown;
|
package/dist/lib/types.js
CHANGED
|
@@ -7,7 +7,7 @@ export const VALID_CONTENT_TYPES = {
|
|
|
7
7
|
text: ["narrative", "informational", "commercial", "editorial", "reference", "email", "news"],
|
|
8
8
|
video: ["tutorial", "documentary", "entertainment", "review", "lifestyle", "news", "social_post", "ad"],
|
|
9
9
|
audio: ["music", "narration", "conversation", "speech", "soundscape", "news", "ad"],
|
|
10
|
-
image: ["product", "photography", "infographic", "artwork", "interface", "social_post", "ad"],
|
|
10
|
+
image: ["product", "photography", "infographic", "artwork", "interface", "visual_assets", "social_post", "ad"],
|
|
11
11
|
document: ["deck", "presentation", "report", "brochure", "guide"],
|
|
12
12
|
};
|
|
13
13
|
export const ASK_VARIANT_KINDS = [
|