@ishlabs/cli 0.17.7 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -54
- package/dist/commands/ask.d.ts +4 -4
- package/dist/commands/ask.js +66 -66
- package/dist/commands/chat.js +10 -10
- package/dist/commands/config.js +1 -1
- package/dist/commands/docs.js +1 -1
- package/dist/commands/iteration.js +57 -57
- package/dist/commands/mcp.d.ts +23 -0
- package/dist/commands/mcp.js +676 -0
- package/dist/commands/person.d.ts +5 -0
- package/dist/commands/{profile.js → person.js} +197 -162
- package/dist/commands/source.d.ts +6 -2
- package/dist/commands/source.js +35 -30
- package/dist/commands/study-analyze.d.ts +1 -1
- package/dist/commands/study-analyze.js +3 -3
- package/dist/commands/study-participant.d.ts +8 -0
- package/dist/commands/{study-tester.js → study-participant.js} +50 -50
- package/dist/commands/study-run.d.ts +6 -6
- package/dist/commands/study-run.js +341 -290
- package/dist/commands/study.js +106 -72
- package/dist/commands/workspace.js +13 -13
- package/dist/connect.js +5 -5
- package/dist/index.js +6 -4
- package/dist/lib/accessibility-profile.d.ts +1 -1
- package/dist/lib/accessibility-profile.js +1 -1
- package/dist/lib/alias-hydrate.js +4 -4
- package/dist/lib/alias-store.d.ts +5 -5
- package/dist/lib/alias-store.js +8 -8
- package/dist/lib/api-client.d.ts +1 -1
- package/dist/lib/api-client.js +1 -1
- package/dist/lib/billing.d.ts +11 -11
- package/dist/lib/billing.js +16 -16
- package/dist/lib/chat-endpoint-templates.js +1 -1
- package/dist/lib/command-helpers.d.ts +18 -18
- package/dist/lib/command-helpers.js +49 -37
- package/dist/lib/docs.js +570 -387
- package/dist/lib/enums.d.ts +2 -2
- package/dist/lib/enums.js +2 -2
- package/dist/lib/local-sim/browser.d.ts +1 -1
- package/dist/lib/local-sim/browser.js +1 -1
- package/dist/lib/local-sim/debug-report.d.ts +2 -2
- package/dist/lib/local-sim/debug-report.js +3 -3
- package/dist/lib/local-sim/loop.d.ts +5 -5
- package/dist/lib/local-sim/loop.js +38 -38
- package/dist/lib/local-sim/types.d.ts +12 -12
- package/dist/lib/mcp-clients.d.ts +51 -0
- package/dist/lib/mcp-clients.js +175 -0
- package/dist/lib/modality.d.ts +10 -10
- package/dist/lib/modality.js +46 -46
- package/dist/lib/output.d.ts +16 -15
- package/dist/lib/output.js +291 -226
- package/dist/lib/profile-sources.d.ts +64 -16
- package/dist/lib/profile-sources.js +91 -30
- package/dist/lib/skill-content.js +216 -168
- package/dist/lib/study-events.d.ts +3 -3
- package/dist/lib/study-events.js +1 -1
- package/dist/lib/study-inputs.d.ts +11 -1
- package/dist/lib/study-inputs.js +68 -17
- package/dist/lib/study-participants.d.ts +32 -0
- package/dist/lib/study-participants.js +12 -0
- package/dist/lib/types.d.ts +104 -34
- package/package.json +1 -1
- package/dist/commands/profile.d.ts +0 -5
- package/dist/commands/study-tester.d.ts +0 -8
|
@@ -23,9 +23,9 @@ const VERSION = pkg.version;
|
|
|
23
23
|
* the description with verbs the user is likely to say plus the noun
|
|
24
24
|
* "ish". Hard cap is 1024 chars. Front-load the use case.
|
|
25
25
|
*/
|
|
26
|
-
const SKILL_DESCRIPTION = "Use this skill whenever the user mentions ish, a study, a
|
|
27
|
-
"a simulation run, an \"ask\",
|
|
28
|
-
"dispatch tests against AI
|
|
26
|
+
const SKILL_DESCRIPTION = "Use this skill whenever the user mentions ish, a study, a person, " +
|
|
27
|
+
"a simulation run, an \"ask\", a group of people, a chatbot probe, wants to " +
|
|
28
|
+
"dispatch tests against AI participants, or wants to rehearse a conversation " +
|
|
29
29
|
"between two AI personas (e.g. sales rep vs. skeptical buyer). Covers both " +
|
|
30
30
|
"the `ish` CLI (via Bash) and the hosted ish MCP server " +
|
|
31
31
|
"(`mcp__claude_ai_ish__*` on claude.ai) — same operations, pick whichever " +
|
|
@@ -37,7 +37,7 @@ ish runs user-research simulations: simulated people experience your draft (page
|
|
|
37
37
|
|
|
38
38
|
## When to invoke
|
|
39
39
|
|
|
40
|
-
The user mentioned \`ish\`, a study, an "ask", a
|
|
40
|
+
The user mentioned \`ish\`, a study, an "ask", a person, a group of people, a simulation, "rehearse", "compare variants", "test before shipping", "probe a chatbot".
|
|
41
41
|
|
|
42
42
|
## Drivers
|
|
43
43
|
|
|
@@ -48,8 +48,10 @@ ish has two surfaces; pick whichever your environment has:
|
|
|
48
48
|
|
|
49
49
|
Both wrap the same operations. If neither is present, tell the user: \`npm i -g @ishlabs/cli\`, or enable the ish connector on claude.ai. Don't try to drive ish without a driver.
|
|
50
50
|
|
|
51
|
+
**Bridging CLI → MCP for the user's editor / desktop agent**: if the user has the CLI but their editor or desktop agent (Cursor, VS Code, Claude Code, Claude Desktop, Windsurf) isn't yet wired to call ish, one command does it: \`ish mcp add --all --yes\`. Writes the per-client MCP config block, never embeds a token (OAuth on first connect), idempotent. See \`ish docs get-page guides/mcp-add\`.
|
|
52
|
+
|
|
51
53
|
**When both are available, pick by op:**
|
|
52
|
-
- Streaming results to a watching user → **CLI** with \`--wait\` (per-
|
|
54
|
+
- Streaming results to a watching user → **CLI** with \`--wait\` (per-participant output as participants complete).
|
|
53
55
|
- Structured one-shot reads or run dispatch → **MCP** (JSON in, JSON out, no shell).
|
|
54
56
|
- Idempotent setup (e.g. cold-start workspace) → **CLI** has \`--ensure\`; MCP doesn't.
|
|
55
57
|
- Local file uploads (images, video, docs) → **CLI** only — MCP doesn't accept binaries.
|
|
@@ -60,16 +62,16 @@ Both wrap the same operations. If neither is present, tell the user: \`npm i -g
|
|
|
60
62
|
|
|
61
63
|
\`\`\`
|
|
62
64
|
Workspace (= product)
|
|
63
|
-
├──
|
|
65
|
+
├── Person (p-…) reusable AI persona
|
|
64
66
|
├── Study (s-…) persistent artifact for testing a real surface
|
|
65
67
|
│ └── Iteration (i-…) one configured run; carries the URL or media
|
|
66
68
|
├── Ask (a-…) lightweight artifact for reactions to text/image variants
|
|
67
|
-
│ └── Round unit of execution;
|
|
69
|
+
│ └── Round unit of execution; participants fixed at ask creation
|
|
68
70
|
└── Chat Endpoint workspace-level definition of an external chatbot
|
|
69
71
|
(referenced by study modality: chat, mode: external_chatbot)
|
|
70
72
|
\`\`\`
|
|
71
73
|
|
|
72
|
-
**Audience is a query, not an entity.** Both \`ask_run\` and \`study_run\` take an \`audience\` argument shaped as \`{
|
|
74
|
+
**Audience is a query, not an entity.** Both \`ask_run\` and \`study_run\` take an \`audience\` argument shaped as \`{ person_ids: [...] }\` (explicit) or \`{ sample: N, filters: {...} }\` (sampled from an existing pool). There is no \`audience\` resource to create — you build profiles via \`group_build\` (or reuse existing ones via \`profile_list\`) and pass them in.
|
|
73
75
|
|
|
74
76
|
Two run verbs:
|
|
75
77
|
- **study run** — simulate on a real surface (URL, media, document, chat endpoint).
|
|
@@ -81,11 +83,11 @@ Heuristic: **study** for "test this prototype/page/flow"; **ask** for "which cop
|
|
|
81
83
|
|
|
82
84
|
Each shape names the verb, the *required precursors*, and the **load-bearing knobs** — the arguments that change output quality, not just behavior. Look up the full schema in the MCP tool description or \`ish <command> --help\` once you've picked the shape.
|
|
83
85
|
|
|
84
|
-
Examples below use MCP shape; for CLI, kebab-case the tool name (\`ask_run\` → \`ish ask run\`) and pass equivalent flags (\`
|
|
86
|
+
Examples below use MCP shape; for CLI, kebab-case the tool name (\`ask_run\` → \`ish ask run\`) and pass equivalent flags (\`person_ids: [...]\` → \`--person-id p-… --person-id p-…\`).
|
|
85
87
|
|
|
86
88
|
### Compare text or image variants → \`ask_run\`
|
|
87
89
|
|
|
88
|
-
- **Precursor**:
|
|
90
|
+
- **Precursor**: a group of people (see "Audience is a query" above). If you don't already have suitable people, build them first via \`group_build\`; reuse via \`profile_list\` when possible.
|
|
89
91
|
- **Load-bearing knobs**:
|
|
90
92
|
- \`wants_pick: true\` — adds an aggregate winner verdict. Without it you get prose reactions but no clear answer.
|
|
91
93
|
- \`wants_ratings: true\` — adds per-variant numeric scores.
|
|
@@ -96,22 +98,23 @@ Examples below use MCP shape; for CLI, kebab-case the tool name (\`ask_run\` →
|
|
|
96
98
|
\`\`\`
|
|
97
99
|
ask_run({
|
|
98
100
|
variants: [ { label: "A", content: "..." }, { label: "B", content: "..." } ],
|
|
99
|
-
audience: {
|
|
101
|
+
audience: { person_ids: ["p-…", ...] }, // or { sample: 10 }
|
|
100
102
|
wants_pick: true,
|
|
101
103
|
wants_ratings: true,
|
|
102
104
|
wait: true,
|
|
103
105
|
})
|
|
104
106
|
\`\`\`
|
|
105
|
-
- **Output**: per-
|
|
107
|
+
- **Output**: per-participant reasoning + (if \`wants_pick\`) aggregate winner with confidence.
|
|
106
108
|
|
|
107
109
|
### Test a live page or prototype → \`study_run\` (modality: interactive)
|
|
108
110
|
|
|
109
|
-
- **Precursor**: a study with a URL. Either inline at create-time (\`study_create({ modality: "interactive", url: "..." })\`) or as a separate iteration (\`iteration_create({ study_id, url })\`) when you want to A/B iterations later or upload local files. An **assignment** is required — what the
|
|
110
|
-
- **Audience**: pass \`audience: {
|
|
111
|
+
- **Precursor**: a study with a URL. Either inline at create-time (\`study_create({ modality: "interactive", url: "..." })\`) or as a separate iteration (\`iteration_create({ study_id, url })\`) when you want to A/B iterations later or upload local files. An **assignment** is required — what the participant is supposed to attempt.
|
|
112
|
+
- **Audience**: pass \`audience: { person_ids: [...] }\` or \`{ sample: N }\` to \`study_run\`, same contract as \`ask_run\`. Audience is set on the *run*, not the study.
|
|
111
113
|
- **Load-bearing knobs**:
|
|
112
|
-
- \`assignment\` (on \`study_create\`) — what the
|
|
113
|
-
- \`
|
|
114
|
-
- \`
|
|
114
|
+
- \`assignment\` (on \`study_create\`) — what the participant is supposed to do. Format: \`"<label>:<instruction>"\`. The whole run hinges on this being clear.
|
|
115
|
+
- **steps (optional checklist)** — an assignment can carry an ordered \`steps\` list of atomic actions (\`{name, description?}\`), authored via the CLI JSON forms (\`--assignments-file\` / \`--assignments\`) — not the \`"<label>:<instruction>"\` shorthand. Honored for **interactive** and **external_chatbot chat** only. After a run, \`study get\` reports a per-step \`step_completion\` rollup (pass rate + sample failures). Use steps when "did they finish?" is a checklist, not a single yes/no.
|
|
116
|
+
- \`wait\` (MCP) / \`--wait\` (CLI) — streams per-participant results as they complete. CLI streams to stdout in real-time; MCP blocks until the whole run finishes. For a watching user, prefer the CLI here.
|
|
117
|
+
- \`count\` (on \`study_run\`) — how many participants.
|
|
115
118
|
- **Shape**:
|
|
116
119
|
\`\`\`
|
|
117
120
|
study_create({
|
|
@@ -119,18 +122,18 @@ Examples below use MCP shape; for CLI, kebab-case the tool name (\`ask_run\` →
|
|
|
119
122
|
url: "https://staging.acme.io/welcome",
|
|
120
123
|
assignment: "Complete signup:Go through the 4-step wizard end-to-end",
|
|
121
124
|
})
|
|
122
|
-
study_run({ study_id: "s-…", audience: {
|
|
125
|
+
study_run({ study_id: "s-…", audience: { person_ids: [...] }, count: 15, wait: true })
|
|
123
126
|
\`\`\`
|
|
124
|
-
- **Output**: per-
|
|
127
|
+
- **Output**: per-participant journey transcripts + aggregate friction / blocker / positive-moment counts.
|
|
125
128
|
|
|
126
129
|
### Probe a customer chatbot → \`study_run\` (modality: chat, mode: external_chatbot)
|
|
127
130
|
|
|
128
131
|
- **Precursors**:
|
|
129
|
-
1. A **chat endpoint** definition at the workspace level. \`chat_endpoint_init\` from a curl spec (handles auth headers, request/response shape; **upsert-by-name** — safe to re-call with the same \`name\` to rotate auth or change the request shape) → \`chat_endpoint_test\` to confirm it responds correctly before dispatching simulated
|
|
132
|
+
1. A **chat endpoint** definition at the workspace level. \`chat_endpoint_init\` from a curl spec (handles auth headers, request/response shape; **upsert-by-name** — safe to re-call with the same \`name\` to rotate auth or change the request shape) → \`chat_endpoint_test\` to confirm it responds correctly before dispatching simulated participants.
|
|
130
133
|
2. A study with \`modality: "chat"\`, \`mode: "external_chatbot"\`, the endpoint reference, and an \`assignment\`.
|
|
131
|
-
- **Audience**: same \`{
|
|
134
|
+
- **Audience**: same \`{ person_ids } | { sample }\` contract; pass to \`study_run\`. For custom personas (e.g. "frustrated vs polite"), \`group_build\` first.
|
|
132
135
|
- **Load-bearing knobs**:
|
|
133
|
-
- \`assignment\` — what the
|
|
136
|
+
- \`assignment\` — what the participant tries to do (\`"Cancel:Try to cancel your subscription"\`).
|
|
134
137
|
- \`count\` on the run.
|
|
135
138
|
- **Shape**:
|
|
136
139
|
\`\`\`
|
|
@@ -138,52 +141,52 @@ Examples below use MCP shape; for CLI, kebab-case the tool name (\`ask_run\` →
|
|
|
138
141
|
chat_endpoint_test({ endpoint: "support-bot", message: "hi" })
|
|
139
142
|
study_create({ modality: "chat", mode: "external_chatbot", endpoint: "support-bot",
|
|
140
143
|
assignment: "Cancel:Try to cancel your subscription" })
|
|
141
|
-
study_run({ study_id: "s-…", audience: {
|
|
144
|
+
study_run({ study_id: "s-…", audience: { person_ids: [...] }, count: 8, wait: true })
|
|
142
145
|
\`\`\`
|
|
143
|
-
- **Output**: full conversation transcripts per
|
|
146
|
+
- **Output**: full conversation transcripts per participant + aggregate success / blocker analysis.
|
|
144
147
|
|
|
145
148
|
### Test a media artifact (document, image, video, audio) → \`study_run\`
|
|
146
149
|
|
|
147
150
|
- **Precursors**:
|
|
148
151
|
1. A study with the chosen modality: \`study_create({ modality: "document" | "image" | "video" | "audio", assignment: "..." })\`.
|
|
149
152
|
2. An **iteration** carrying the media. For local files, **CLI only** — \`ish iteration create --study s-… --media @./deck.pdf\` (the \`@\` prefix triggers upload). For hosted URLs, either driver works: \`iteration_create({ study_id, content_url: "https://..." })\`.
|
|
150
|
-
- **Audience**: same \`{
|
|
153
|
+
- **Audience**: same \`{ person_ids } | { sample }\` contract; pass to \`study_run\`. Reusable across runs (see "Lifecycle" below).
|
|
151
154
|
- **Load-bearing knobs**:
|
|
152
155
|
- \`assignment\` on \`study_create\` — for review-style media (decks, ad creative), frame as decision: \`"Take a first meeting:Review this Series A deck and decide whether you'd take a first meeting"\`. Page/timestamp-level attribution depends on the assignment asking for it explicitly.
|
|
153
156
|
- \`wait\` / \`--wait\` — same streaming story as interactive.
|
|
154
157
|
- \`count\` on \`study_run\`.
|
|
155
|
-
- **Iterating on the artifact** (v2 deck, v3 deck): create a **new iteration** on the same study (\`iteration_create\`), reuse the
|
|
156
|
-
- **Output**: per-
|
|
158
|
+
- **Iterating on the artifact** (v2 deck, v3 deck): create a **new iteration** on the same study (\`iteration_create\`), reuse the people's \`person_ids\`. See "Lifecycle".
|
|
159
|
+
- **Output**: per-participant reactions to the artifact + aggregate themes.
|
|
157
160
|
|
|
158
|
-
### Rehearse a conversation between two AI personas → \`study_run\` (modality: chat, mode:
|
|
161
|
+
### Rehearse a conversation between two AI personas → \`study_run\` (modality: chat, mode: participant_pair)
|
|
159
162
|
|
|
160
163
|
**If the user might want the same persona across multiple turns, pin profiles up-front — you can't retro-pin after a run.** Without pinning, personas are re-synthesized from the assignment text each time, so "the same VC from earlier" becomes prose-only continuity.
|
|
161
164
|
|
|
162
|
-
- **Precursor**: a workspace and (optionally) one or two
|
|
163
|
-
- **Audience**: optional. For persona continuity across iterations, build profiles via \`
|
|
165
|
+
- **Precursor**: a workspace and (optionally) one or two people for persona pinning. If you skip the people, ish synthesizes both personas from the \`assignment\` text per-run — fine for one-shot rehearsals, drifts between iterations.
|
|
166
|
+
- **Audience**: optional. For persona continuity across iterations, build profiles via \`group_build\` (or reuse via \`profile_list\`) and pass \`audience: { person_ids: [...] }\` to \`study_run\` — the same profiles play the same roles each time.
|
|
164
167
|
- **Load-bearing knobs**:
|
|
165
168
|
- \`assignment\` — encodes BOTH personas and what each is trying to do. More prose-heavy than other assignments; be specific. Example: \`"Founder pitches Series A to skeptical VC. Founder: defends AI customer-support startup, $2M ARR, 15% MoM. VC: thinks SaaS-for-SaaS is saturated, probes moat and unit economics."\`
|
|
166
169
|
- \`count\` — typically 1 per run; set higher to generate variations.
|
|
167
|
-
- **Iterating the scenario** (turn-by-turn refinement): create a **new iteration** with a revised assignment; reuse the same \`
|
|
170
|
+
- **Iterating the scenario** (turn-by-turn refinement): create a **new iteration** with a revised assignment; reuse the same \`person_ids\` if you pinned personas. See "Lifecycle".
|
|
168
171
|
- **Output**: a full transcript per rehearsal.
|
|
169
172
|
|
|
170
|
-
### Generate a fresh
|
|
173
|
+
### Generate a fresh group → \`group_build\`
|
|
171
174
|
|
|
172
175
|
- **Input**: a \`description\`, a \`count\`, and optionally \`sources\` (transcripts / audio / images / docs that seed persona generation — for "make profiles that feel like these real customers"). Local files force CLI (binary upload constraint).
|
|
173
|
-
- **Output**: a list of \`
|
|
176
|
+
- **Output**: a list of \`person_ids\` to pass into \`ask_run\` or \`study_run\`.
|
|
174
177
|
- **Cost**: slow (~30-120s) + credit-bearing. Reuse profiles via \`profile_list\` when possible. Sensible defaults: \`count: 5-10\` for ad-hoc tests, \`count: 20+\` for studies where you want statistical signal.
|
|
175
|
-
- **Growing
|
|
178
|
+
- **Growing a group of people**: build only the delta — don't rebuild. Concat the new \`person_ids\` with the existing ones for the next run. The "audience is a query" framing means there's no audience entity to update.
|
|
176
179
|
- **Shapes**:
|
|
177
180
|
\`\`\`
|
|
178
181
|
// Simple — description only
|
|
179
|
-
|
|
182
|
+
group_build({
|
|
180
183
|
description: "Parents of toddlers (ages 1-3), US, evening-routine focused",
|
|
181
184
|
count: 8,
|
|
182
185
|
})
|
|
183
|
-
// → {
|
|
186
|
+
// → { person_ids: ["p-…", ...] }
|
|
184
187
|
|
|
185
188
|
// Seeded from real transcripts (CLI only for local files)
|
|
186
|
-
// ish
|
|
189
|
+
// ish person generate --description "..." --count 10 \\
|
|
187
190
|
// --source @./interviews/customer-1.md \\
|
|
188
191
|
// --source @./interviews/customer-2.md
|
|
189
192
|
\`\`\`
|
|
@@ -194,28 +197,29 @@ The most common multi-turn question: "user wants to change X — re-use the exis
|
|
|
194
197
|
|
|
195
198
|
| Change you want | What to do |
|
|
196
199
|
|---|---|
|
|
197
|
-
| Same ask, **same
|
|
198
|
-
| Same ask, **different
|
|
200
|
+
| Same ask, **same participants**, new variants | Pass \`ask_id\` (MCP) or \`--ask\` (CLI) on \`ask_run\` — re-uses the locked participants. |
|
|
201
|
+
| Same ask, **different participants** | New ask: omit \`ask_id\` (MCP) or pass \`--new\` (CLI). Participants are locked at ask creation. |
|
|
199
202
|
| Same study, **new media** (v2 deck, new image) | New **iteration** on the same study (\`iteration_create({ study_id, content_url \\| --media @path })\`). Iterations are immutable once they have results — never edit. |
|
|
200
|
-
| Same study, **new assignment** | **New study.** Assignment lives on the study; there's no in-place edit. Keep the old study's id for side-by-side comparison. *(
|
|
201
|
-
| Same
|
|
203
|
+
| Same study, **new assignment** | **New study.** Assignment lives on the study; there's no in-place edit. Keep the old study's id for side-by-side comparison. *(Participant-pair exception: the assignment IS the content there — use a new **iteration** on the same study, not a new study.)* |
|
|
204
|
+
| Same people across multiple runs / studies | Reuse the \`person_ids\` array. Profiles are workspace-scoped resources (\`p-…\`) — they live independently of any ask or study. |
|
|
202
205
|
| Chat endpoint definition needs to change (auth rotate, URL change) | \`chat_endpoint_init\` is **upsert-by-name** — re-init with the same \`name\` and a new \`from_curl\` spec. Re-run \`chat_endpoint_test\` to confirm. |
|
|
203
|
-
| Persona reuse in
|
|
206
|
+
| Persona reuse in participant-pair | Pin via \`person_ids\` on the first \`study_run\`; pass the same ids on subsequent runs. Without pinning, personas are re-synthesized from the assignment per run. |
|
|
204
207
|
|
|
205
208
|
When in doubt: side-by-side comparison usually beats in-place edits. Ids are cheap; result history isn't.
|
|
206
209
|
|
|
207
210
|
## Pitfalls
|
|
208
211
|
|
|
209
212
|
- **Cold start on free plan**: \`workspace_create\` returns \`usage_limit_reached\` at the free-plan cap (1 workspace). Always inspect with \`workspace_list\` first. **MCP-only recipe** (no \`--ensure\` available): \`workspace_list\` → if non-empty, use the first; if empty, \`workspace_create\`; if \`workspace_create\` returns \`usage_limit_reached\`, re-call \`workspace_list\` (a workspace exists you didn't see — possibly created by another session). **CLI shortcut**: \`ish workspace create --name <name> --ensure\` is idempotent by name.
|
|
210
|
-
- **Ask
|
|
213
|
+
- **Ask participants vs variants** — see Lifecycle table for the re-use vs new-ask decision.
|
|
211
214
|
- **Study iterations are immutable once they have results** — see Lifecycle table for new-iteration vs new-study.
|
|
212
|
-
- **Credit costs**: \`ask_run\`, \`study_run\`, and \`
|
|
213
|
-
- **\`
|
|
215
|
+
- **Credit costs**: \`ask_run\`, \`study_run\`, and \`group_build\` consume credits. Check \`workspace_get\`'s \`credits\` headroom before dispatching large runs. For free-plan ad-hoc tests, default \`count: 5-8\` participants + 2 variants is usually within budget.
|
|
216
|
+
- **\`group_build\` may return fewer profiles than requested** if the description is over-constrained. Always read the returned \`person_ids\` count, don't trust the requested \`count\` blindly.
|
|
214
217
|
- **Variants of wildly different length** (one-line vs paragraph) can skew picks toward the longer one. Keep variants comparable in shape.
|
|
215
|
-
- **Chatbot endpoint response-shape mismatch**: \`chat_endpoint_test\` succeeds shallowly if the bot responds at all, but a wrong response path (e.g. bot returns \`{ data: { reply } }\` instead of \`{ reply }\`) produces empty transcripts on the actual run. Inspect one full test response before dispatching
|
|
218
|
+
- **Chatbot endpoint response-shape mismatch**: \`chat_endpoint_test\` succeeds shallowly if the bot responds at all, but a wrong response path (e.g. bot returns \`{ data: { reply } }\` instead of \`{ reply }\`) produces empty transcripts on the actual run. Inspect one full test response before dispatching participants.
|
|
216
219
|
- **Chatbot auth drift**: tokens/sessions baked into \`--from-curl\` expire. If transcripts come back as identical short error strings, re-run \`chat_endpoint_test\` and refresh the curl spec.
|
|
217
|
-
- **401 surfaces as fake blocker**: an unauthenticated endpoint produces "
|
|
220
|
+
- **401 surfaces as fake blocker**: an unauthenticated endpoint produces "participant got stuck on auth screen" — looks like a UX blocker but is config. Always confirm endpoint auth before reading transcripts as user-research data.
|
|
218
221
|
- **No per-page/per-timestamp scoping for media**: there's no "evaluate just slide 14" or "react to seconds 0-30" API. State the focus explicitly in the \`assignment\` text, or pre-stitch the artifact (e.g. replace one slide locally, upload as a new iteration).
|
|
222
|
+
- **\`study get --json\` participants live at the top level**, not nested under \`iterations[*].participants\`. The backend split made \`/studies/{id}\` lite (metadata + iteration shells, no participant graph) and added \`/studies/{id}/participants\`; the CLI joins them so \`study get --json\` carries a flat \`participants[]\` with \`iteration_id\` on each row. Read \`.participants[]\`, not \`.iterations[].participants[]\`.
|
|
219
223
|
|
|
220
224
|
## When in doubt
|
|
221
225
|
|
|
@@ -232,7 +236,7 @@ Each workflow below is a complete transcript an agent can adapt. Run
|
|
|
232
236
|
## 1. First study from zero
|
|
233
237
|
|
|
234
238
|
Goal: from a fresh install to a finished interactive study with 3
|
|
235
|
-
|
|
239
|
+
participants and one question.
|
|
236
240
|
|
|
237
241
|
\`\`\`bash
|
|
238
242
|
# 1. Authenticate (browser flow, saves tokens to ~/.ish/config.json)
|
|
@@ -242,8 +246,8 @@ ish login
|
|
|
242
246
|
ish workspace create --name "Demo" --base-url https://example.com
|
|
243
247
|
ish workspace use w-…
|
|
244
248
|
|
|
245
|
-
# 3. Generate a small
|
|
246
|
-
ish
|
|
249
|
+
# 3. Generate a small group of people
|
|
250
|
+
ish person generate \\
|
|
247
251
|
--description "Tech-savvy millennials in the US who use mobile banking" \\
|
|
248
252
|
--count 3
|
|
249
253
|
|
|
@@ -266,6 +270,33 @@ ish study run --all --wait
|
|
|
266
270
|
ish study results --json | jq .
|
|
267
271
|
\`\`\`
|
|
268
272
|
|
|
273
|
+
### 1a. Give the assignment a step-by-step checklist
|
|
274
|
+
|
|
275
|
+
When "did they finish?" is a checklist rather than a single yes/no, attach
|
|
276
|
+
\`steps\` to the assignment. Steps are JSON-only (no inline shorthand) and
|
|
277
|
+
honored for **interactive** + **external_chatbot chat** modalities only.
|
|
278
|
+
|
|
279
|
+
\`\`\`bash
|
|
280
|
+
# assignments.json
|
|
281
|
+
# [
|
|
282
|
+
# { "name": "Buy", "instructions": "Add an item to cart and check out",
|
|
283
|
+
# "steps": [
|
|
284
|
+
# { "name": "Find a product", "description": "Browse to any item" },
|
|
285
|
+
# { "name": "Add to cart" },
|
|
286
|
+
# { "name": "Complete checkout" }
|
|
287
|
+
# ] }
|
|
288
|
+
# ]
|
|
289
|
+
ish study create --name "Checkout" --modality interactive \\
|
|
290
|
+
--url https://shop.example.com \\
|
|
291
|
+
--assignments-file ./assignments.json
|
|
292
|
+
ish study use s-…
|
|
293
|
+
ish study run --all --wait
|
|
294
|
+
|
|
295
|
+
# After the run, each step gets a pass-rate rollup:
|
|
296
|
+
ish study get s-… # human: "✓ Add to cart 4/5 (80%)" per step
|
|
297
|
+
ish study get s-… --json --verbose # step_completion[] incl. sample_failures[].participant_id
|
|
298
|
+
\`\`\`
|
|
299
|
+
|
|
269
300
|
## 2. Quick A/B ask with image variants
|
|
270
301
|
|
|
271
302
|
Goal: ship 30 simulated reactions to two hero images, with a "which do
|
|
@@ -289,7 +320,7 @@ adds an \`aggregates\` field per round with \`picks\`, \`ratings\` (mean
|
|
|
289
320
|
+ n per variant), and a \`winner\`. See \`ish docs get-page
|
|
290
321
|
reference/json-mode\` for the full shape.
|
|
291
322
|
|
|
292
|
-
Add a follow-up round with no
|
|
323
|
+
Add a follow-up round with no participant change:
|
|
293
324
|
|
|
294
325
|
\`\`\`bash
|
|
295
326
|
ish ask run --prompt "Which one would you click on?" \\
|
|
@@ -300,29 +331,41 @@ ish ask run --prompt "Which one would you click on?" \\
|
|
|
300
331
|
|
|
301
332
|
## 3. Generate profiles from a real source
|
|
302
333
|
|
|
303
|
-
Goal: turn a customer interview transcript into a 4-
|
|
334
|
+
Goal: turn a customer interview transcript into a 4-person group.
|
|
335
|
+
|
|
336
|
+
\`person generate\` is an async agentic job: it reads your brief and any
|
|
337
|
+
uploaded sources (transcripts, emails, PDFs, audio, images) describing how
|
|
338
|
+
real people reacted, then produces profiles PLUS scenarios grounded in those
|
|
339
|
+
reactions. It enqueues, polls ~30-60s, then prints the profiles (with
|
|
340
|
+
scenarios attached unless \`--no-scenarios\`). \`--json\` returns
|
|
341
|
+
\`{job: {person_ids}, profiles: [...]}\`.
|
|
304
342
|
|
|
305
343
|
\`\`\`bash
|
|
306
344
|
# Inline — auto-uploads the file:
|
|
307
|
-
ish
|
|
345
|
+
ish person generate --source ./interviews/sarah.txt --count 4
|
|
346
|
+
|
|
347
|
+
# The per-source note is the researcher's: how the person reacted to THAT file.
|
|
348
|
+
ish source upload ./proposal.eml --description "called this proposal lazy and vague"
|
|
349
|
+
# → ps-3a4 (status: processed)
|
|
350
|
+
ish person generate --description "Skeptical enterprise buyer" --source ps-3a4 --count 1 --json
|
|
308
351
|
|
|
309
352
|
# Or upload once and reuse the source alias:
|
|
310
353
|
ish source upload ./call.mp3 --diarize
|
|
311
|
-
# →
|
|
312
|
-
ish
|
|
354
|
+
# → ps-3a4 (status: processed)
|
|
355
|
+
ish person generate --source ps-3a4 --propose-count
|
|
313
356
|
# → { proposed_count: 4, rationale: "..." }
|
|
314
|
-
ish
|
|
357
|
+
ish person generate --source ps-3a4 --count 4
|
|
315
358
|
\`\`\`
|
|
316
359
|
|
|
317
|
-
## 4. Build a specific simulated
|
|
360
|
+
## 4. Build a specific simulated person from notes
|
|
318
361
|
|
|
319
362
|
Goal: rebuild one named persona (a real prospect, a stakeholder for
|
|
320
363
|
a pitch rehearsal) via the iterative probe loop — distinct from
|
|
321
|
-
\`
|
|
364
|
+
\`person generate\`, which is for groups.
|
|
322
365
|
|
|
323
366
|
\`\`\`bash
|
|
324
367
|
# 1. Suggest 5 probes from a context blob
|
|
325
|
-
ish
|
|
368
|
+
ish person suggest-scenarios \\
|
|
326
369
|
--context "Staff platform engineer at a Stripe-using fintech. \\
|
|
327
370
|
Owns oncall for the payments edge. Burned by a Black Friday \\
|
|
328
371
|
outage last year." \\
|
|
@@ -333,33 +376,33 @@ ish profile suggest-scenarios \\
|
|
|
333
376
|
# [{"text":"...","source":"situation","scenario_prompt":"..."}, ...]
|
|
334
377
|
# Valid source values: situation, voice, binary, micro-story
|
|
335
378
|
|
|
336
|
-
# 3. Save the
|
|
337
|
-
ish
|
|
338
|
-
# →
|
|
379
|
+
# 3. Save the person shell
|
|
380
|
+
ish person create --file ./persona.json
|
|
381
|
+
# → p-d4e
|
|
339
382
|
|
|
340
383
|
# 4. Persist the answers as structured evidence
|
|
341
|
-
ish
|
|
384
|
+
ish person evidence add p-d4e --traces-file ./answers.json
|
|
342
385
|
|
|
343
386
|
# 5. Read back what's saved (also useful before the next probe round)
|
|
344
|
-
ish
|
|
387
|
+
ish person evidence list p-d4e
|
|
345
388
|
\`\`\`
|
|
346
389
|
|
|
347
390
|
To iterate, feed prior prompts/answers back in so the LLM doesn't
|
|
348
391
|
paraphrase what you already asked:
|
|
349
392
|
|
|
350
393
|
\`\`\`bash
|
|
351
|
-
ish
|
|
394
|
+
ish person suggest-scenarios \\
|
|
352
395
|
--context-file ./notes.md --count 3 \\
|
|
353
396
|
--already-surfaced '["PagerDuty fires at 02:00."]' \\
|
|
354
397
|
--previous-answers @./answers.json
|
|
355
398
|
\`\`\`
|
|
356
399
|
|
|
357
|
-
See \`ish docs get-page guides/build-specific-
|
|
400
|
+
See \`ish docs get-page guides/build-specific-person\` for the full
|
|
358
401
|
walkthrough including the four probe-type shapes.
|
|
359
402
|
|
|
360
403
|
## 5. Target a gated URL (Vercel preview / staging gate / login form)
|
|
361
404
|
|
|
362
|
-
Configure credentials once on the workspace;
|
|
405
|
+
Configure credentials once on the workspace; participants reuse them.
|
|
363
406
|
|
|
364
407
|
\`\`\`bash
|
|
365
408
|
# Show what's configured:
|
|
@@ -371,7 +414,7 @@ ish workspace site-access basic-auth --username alice --password hunter2
|
|
|
371
414
|
# Session cookie (Vercel preview, Lovable, etc.):
|
|
372
415
|
ish workspace site-access cookie --name session --value abc123
|
|
373
416
|
|
|
374
|
-
# Login form (typed by the
|
|
417
|
+
# Login form (typed by the participant into the page):
|
|
375
418
|
ish workspace site-access login --username demo --password demo
|
|
376
419
|
\`\`\`
|
|
377
420
|
|
|
@@ -383,28 +426,28 @@ printf %s "$STAGING_PW" | ish workspace site-access basic-auth \\
|
|
|
383
426
|
--username alice --password -
|
|
384
427
|
\`\`\`
|
|
385
428
|
|
|
386
|
-
## 6. Re-run a study with a fresh
|
|
429
|
+
## 6. Re-run a study with a fresh group
|
|
387
430
|
|
|
388
|
-
Goal: same study, same iteration, but compare
|
|
431
|
+
Goal: same study, same iteration, but compare groups.
|
|
389
432
|
|
|
390
433
|
\`\`\`bash
|
|
391
434
|
# First run — Swedish 35-50:
|
|
392
435
|
ish study run --country SE --min-age 35 --max-age 50 --sample 5 --wait
|
|
393
436
|
|
|
394
|
-
# Second run — every female
|
|
437
|
+
# Second run — every female person in the workspace, same iteration:
|
|
395
438
|
ish study run --gender female --all --wait
|
|
396
439
|
|
|
397
|
-
# Free-text filters: --search matches the
|
|
398
|
-
# matches the
|
|
440
|
+
# Free-text filters: --search matches the person **name**, --bio
|
|
441
|
+
# matches the person **bio**, --occupation matches the person
|
|
399
442
|
# **occupation** (repeatable, OR-joined). All are case-insensitive
|
|
400
|
-
# substrings — the same flag set works on \`ish
|
|
401
|
-
# \`ish ask run\`, \`ish ask add-
|
|
443
|
+
# substrings — the same flag set works on \`ish person list\`,
|
|
444
|
+
# \`ish ask run\`, \`ish ask add-people\`, and \`ish ask create\`.
|
|
402
445
|
ish study run --bio "screen reader" --all --wait
|
|
403
446
|
ish study run --occupation founder --occupation designer --sample 6 --wait
|
|
404
447
|
\`\`\`
|
|
405
448
|
|
|
406
|
-
If you don't pass any
|
|
407
|
-
iteration's existing
|
|
449
|
+
If you don't pass any people flags, \`ish study run\` reuses the
|
|
450
|
+
iteration's existing participants — useful for re-running after fixing the
|
|
408
451
|
target page.
|
|
409
452
|
|
|
410
453
|
## 7. Localhost target (dev environment)
|
|
@@ -438,9 +481,9 @@ ish iteration create --url "$URL"
|
|
|
438
481
|
The chat modality has **two modes**, picked by
|
|
439
482
|
\`iteration.details.mode_details.mode\`:
|
|
440
483
|
|
|
441
|
-
- **\`external_chatbot\`** —
|
|
484
|
+
- **\`external_chatbot\`** — participants probe a customer chatbot endpoint
|
|
442
485
|
(the original chat behaviour). Audience size is set on \`study run\`.
|
|
443
|
-
- **\`
|
|
486
|
+
- **\`participant_pair\`** — two AI people converse with each
|
|
444
487
|
other. Each side has its own scenario + goal; the other side does
|
|
445
488
|
not see it (asymmetry contract). Audiences are pinned to the
|
|
446
489
|
iteration: equal counts zip 1:1 by index, or one side of 1
|
|
@@ -488,12 +531,12 @@ ish chat endpoint get "$ID" --verbose \\
|
|
|
488
531
|
| ish chat endpoint update "$ID" --endpoint-config -
|
|
489
532
|
|
|
490
533
|
# 4. Run a chat-modality study referencing the endpoint. Audience size
|
|
491
|
-
# is set on study run, not study create (--sample, --all, --
|
|
534
|
+
# is set on study run, not study create (--sample, --all, --person).
|
|
492
535
|
STUDY=$(ish study create --modality chat --endpoint "$ID" \\
|
|
493
536
|
--name "Sign-up Q1" --assignment "Sign up:Try to sign up" \\
|
|
494
537
|
| jq -r .id)
|
|
495
538
|
ish study run --study "$STUDY" --sample 5 --wait
|
|
496
|
-
ish study results "$STUDY" --json | jq '.
|
|
539
|
+
ish study results "$STUDY" --json | jq '.participants'
|
|
497
540
|
\`\`\`
|
|
498
541
|
|
|
499
542
|
For stateful bots, thread \`conversation_id\` across single-turn
|
|
@@ -528,21 +571,21 @@ into \`update --endpoint-config -\`. Field-shorthand flags
|
|
|
528
571
|
without round-tripping.
|
|
529
572
|
|
|
530
573
|
Failed chat workers surface their error in
|
|
531
|
-
\`study results --json\` under \`
|
|
574
|
+
\`study results --json\` under \`participants[].error_message\` and
|
|
532
575
|
also in \`study poll --json\`. Branch on it instead of treating
|
|
533
576
|
\`interaction_count: 0\` as a generic failure.
|
|
534
577
|
|
|
535
578
|
Pre-flight tip: \`ish workspace info\` exposes
|
|
536
|
-
\`{studies_used, studies_max,
|
|
579
|
+
\`{studies_used, studies_max, participants_used, participants_max, tier}\` so
|
|
537
580
|
you can branch on plan caps before \`study create\` returns
|
|
538
581
|
\`error_code: usage_limit_reached\`.
|
|
539
582
|
|
|
540
583
|
The full reference is at \`ish docs get-page guides/chat\`,
|
|
541
584
|
secrets are at \`ish docs get-page concepts/secret\`.
|
|
542
585
|
|
|
543
|
-
### 7b.
|
|
586
|
+
### 7b. participant_pair — rehearse a two-AI conversation
|
|
544
587
|
|
|
545
|
-
Goal: pit two AI
|
|
588
|
+
Goal: pit two AI people against each other to see how a
|
|
546
589
|
two-role conversation unfolds — a sales rep vs. a skeptical CTO, a
|
|
547
590
|
founder vs. an investor archetype, a manager vs. a direct report
|
|
548
591
|
ahead of a difficult conversation. Each side has its own scenario
|
|
@@ -552,10 +595,10 @@ what makes the rehearsal credible).
|
|
|
552
595
|
One-shot study + iteration:
|
|
553
596
|
|
|
554
597
|
\`\`\`bash
|
|
555
|
-
ish study create --modality chat --chat-mode
|
|
598
|
+
ish study create --modality chat --chat-mode participant_pair \\
|
|
556
599
|
--name "Pitch rehearsal" \\
|
|
557
|
-
--
|
|
558
|
-
--
|
|
600
|
+
--group-a p-sales-1,p-sales-2 \\
|
|
601
|
+
--group-b p-cto-skeptic-1,p-cto-skeptic-2 \\
|
|
559
602
|
--scenario-a "You are a senior sales rep pitching ish to a new prospect." \\
|
|
560
603
|
--scenario-b "You are a skeptical CTO; surface risks before agreeing to a pilot." \\
|
|
561
604
|
--assignment "Pitch:Try to land a pilot"
|
|
@@ -566,50 +609,50 @@ ish study run -y
|
|
|
566
609
|
Or add a pair iteration to an existing chat study:
|
|
567
610
|
|
|
568
611
|
\`\`\`bash
|
|
569
|
-
ish iteration create --study s-... --chat-mode
|
|
570
|
-
--
|
|
612
|
+
ish iteration create --study s-... --chat-mode participant_pair \\
|
|
613
|
+
--group-a p-a1,p-a2 --group-b p-b1,p-b2 \\
|
|
571
614
|
--scenario-a @./scenario_a.md --scenario-b @./scenario_b.md \\
|
|
572
615
|
--max-turns 14
|
|
573
616
|
\`\`\`
|
|
574
617
|
|
|
575
618
|
Rules to remember:
|
|
576
|
-
- Each side needs **either** \`--
|
|
619
|
+
- Each side needs **either** \`--person-*\` (explicit IDs) **or**
|
|
577
620
|
\`--role-criteria-*\` (a filter the backend resolves). They can also
|
|
578
621
|
be combined — criteria then validates the explicit list.
|
|
579
|
-
- When **both sides** use explicit \`--
|
|
580
|
-
must be the same length (≥ 1). Pairs run 1:1 by index. Same
|
|
622
|
+
- When **both sides** use explicit \`--group-a\` / \`--group-b\`, they
|
|
623
|
+
must be the same length (≥ 1). Pairs run 1:1 by index. Same person
|
|
581
624
|
on both sides is allowed (self-talk rehearsal).
|
|
582
|
-
- **1×N broadcast**: pass exactly one
|
|
625
|
+
- **1×N broadcast**: pass exactly one person on one side and N on
|
|
583
626
|
the other to rehearse one fixed side against N variations. The CLI
|
|
584
627
|
auto-broadcasts the singleton to match. E.g.
|
|
585
|
-
\`--
|
|
628
|
+
\`--group-a p-rep --group-b p-cto1,p-cto2,p-cto3\` → 3
|
|
586
629
|
conversations, same rep, three different CTOs. Stderr notice fires
|
|
587
630
|
when broadcasting kicks in.
|
|
588
631
|
- Both \`--scenario-a\` and \`--scenario-b\` are required and asymmetric.
|
|
589
632
|
Use \`@./file.md\` to read from disk.
|
|
590
633
|
- \`--initiator-side\` (\`a\` default) picks who speaks first.
|
|
591
|
-
- \`--chat-mode\` accepts both \`
|
|
634
|
+
- \`--chat-mode\` accepts both \`participant_pair\` and \`participant-pair\`.
|
|
592
635
|
The same hyphen/underscore tolerance applies to \`--screen-format\`,
|
|
593
636
|
\`--kind\` on \`source upload\`, and the question \`type\` field in
|
|
594
637
|
\`--questionnaire\` / \`--questions\` manifests.
|
|
595
638
|
- Audiences are **authoritative on the iteration**.
|
|
596
|
-
\`ish study run\` refuses \`--
|
|
639
|
+
\`ish study run\` refuses \`--person\` / \`--sample\` / \`--all\` /
|
|
597
640
|
demographic filters on a pair iteration with a clear error. To
|
|
598
|
-
change
|
|
641
|
+
change groups, update the iteration via
|
|
599
642
|
\`ish iteration update <id> --details-json '{...}'\`.
|
|
600
643
|
- \`--max-turns\` / \`--early-termination\` on \`study run\` override the
|
|
601
644
|
iteration's saved values for that single dispatch (they don't
|
|
602
645
|
persist back to the iteration).
|
|
603
646
|
- Dispatch is per-Conversation (one task per pair). Per-Conversation
|
|
604
647
|
summaries (\`end_reason\`, \`dominant_dynamic\`, \`who_steered\`) land on
|
|
605
|
-
\`iteration.conversations[]\`. Per-
|
|
606
|
-
\`
|
|
648
|
+
\`iteration.conversations[]\`. Per-participant summaries land on
|
|
649
|
+
\`participant.summary\` as before.
|
|
607
650
|
|
|
608
|
-
### Filtering
|
|
651
|
+
### Filtering groups with role criteria (persona-first)
|
|
609
652
|
|
|
610
653
|
\`--role-criteria-a\` / \`--role-criteria-b\` accept a JSON object (or
|
|
611
654
|
\`@./file.json\`) describing who's eligible for that side. The
|
|
612
|
-
backend resolves the matching
|
|
655
|
+
backend resolves the matching person pool and persists the
|
|
613
656
|
IDs on the iteration. Keys (all optional):
|
|
614
657
|
|
|
615
658
|
\`\`\`json
|
|
@@ -632,8 +675,8 @@ IDs on the iteration. Keys (all optional):
|
|
|
632
675
|
\`\`\`
|
|
633
676
|
|
|
634
677
|
The five \`*_in\` arrays accept snake_case spec values verbatim
|
|
635
|
-
(see \`https://ishlabs.io/spec/
|
|
636
|
-
accessibility filters are coarse booleans over each
|
|
678
|
+
(see \`https://ishlabs.io/spec/person-enums.v1.json\`). The five
|
|
679
|
+
accessibility filters are coarse booleans over each participant's
|
|
637
680
|
\`accessibility_profile\` JSONB.
|
|
638
681
|
|
|
639
682
|
MECE rules for the list filters:
|
|
@@ -641,13 +684,13 @@ MECE rules for the list filters:
|
|
|
641
684
|
children; \`couple_no_kids\` is strictly child-free. \`single\` means
|
|
642
685
|
lives alone with no partner, roommates, parents, or children
|
|
643
686
|
sharing the household.
|
|
644
|
-
- \`employment_status_in\`: pick the
|
|
687
|
+
- \`employment_status_in\`: pick the participant's primary daytime
|
|
645
688
|
activity. A student who works 15 hrs/week is \`student\`; a retiree
|
|
646
689
|
who freelances is \`retired\`.
|
|
647
690
|
|
|
648
|
-
The **persona-first** principle: the
|
|
691
|
+
The **persona-first** principle: the participant's persona is sacred and
|
|
649
692
|
the LLM prompt construction does not change. Criteria filter the
|
|
650
|
-
*eligible pool* upstream so that by the time a
|
|
693
|
+
*eligible pool* upstream so that by the time a participant reaches the
|
|
651
694
|
prompt, their persona is already plausible for the role described
|
|
652
695
|
in \`scenario_*\`. Don't cram demographic constraints into the
|
|
653
696
|
scenario text — that breaks the asymmetry contract and produces
|
|
@@ -658,7 +701,7 @@ pick who plays the role.
|
|
|
658
701
|
If the resolved pool is smaller than the requested count for a side,
|
|
659
702
|
\`ish study run\` exits 2 with the backend's pool-too-small error
|
|
660
703
|
intact. Broaden the criteria, generate more profiles
|
|
661
|
-
(\`ish
|
|
704
|
+
(\`ish person generate\`), or fall back to explicit \`--person-*\`.
|
|
662
705
|
|
|
663
706
|
### Rehearsing against N variations of one side (1×N)
|
|
664
707
|
|
|
@@ -667,11 +710,11 @@ The most common rehearsal shape: fix one side, vary the other.
|
|
|
667
710
|
|
|
668
711
|
\`\`\`bash
|
|
669
712
|
# 1. Generate N distinct profiles for the varying side (or pick
|
|
670
|
-
# existing ones via \`ish
|
|
671
|
-
ish
|
|
713
|
+
# existing ones via \`ish person list\`).
|
|
714
|
+
ish person generate \\
|
|
672
715
|
--description "Skeptical CTO at a Series B SaaS startup" \\
|
|
673
716
|
--count 3 --json | jq -r '.items[].alias'
|
|
674
|
-
# →
|
|
717
|
+
# → p-cto1, p-cto2, p-cto3
|
|
675
718
|
|
|
676
719
|
# 2. Write the two scenarios as separate files. Each is a system
|
|
677
720
|
# prompt for ONE role; the partner never sees it. Cover voice,
|
|
@@ -682,15 +725,15 @@ ish profile generate \\
|
|
|
682
725
|
# ./sales_rep.md — the user's pitch + goals
|
|
683
726
|
# ./skeptical_cto.md — CTO's posture + concerns
|
|
684
727
|
|
|
685
|
-
# 3. Create the iteration with ONE
|
|
728
|
+
# 3. Create the iteration with ONE person on the fixed side and
|
|
686
729
|
# N on the varying side. CLI auto-broadcasts the singleton and
|
|
687
|
-
# prints a stderr notice ("Broadcasting --
|
|
730
|
+
# prints a stderr notice ("Broadcasting --group-a (1 person)
|
|
688
731
|
# to length 3…") so you see the expansion.
|
|
689
732
|
ish study create \\
|
|
690
|
-
--modality chat --chat-mode
|
|
733
|
+
--modality chat --chat-mode participant_pair \\
|
|
691
734
|
--name "Pitch rehearsal — 3 CTO variants" \\
|
|
692
|
-
--
|
|
693
|
-
--
|
|
735
|
+
--group-a p-rep \\
|
|
736
|
+
--group-b p-cto1,p-cto2,p-cto3 \\
|
|
694
737
|
--scenario-a @./sales_rep.md \\
|
|
695
738
|
--scenario-b @./skeptical_cto.md \\
|
|
696
739
|
--assignment "Pitch:Land a pilot or a clear next step"
|
|
@@ -705,11 +748,11 @@ ish iteration get <iter-id> --json \\
|
|
|
705
748
|
\`\`\`
|
|
706
749
|
|
|
707
750
|
The CLI emits a stderr notice when it broadcasts ("Broadcasting
|
|
708
|
-
--
|
|
751
|
+
--group-a (1 person) to length 3…") so you can see the
|
|
709
752
|
expansion happen.
|
|
710
753
|
|
|
711
754
|
**Criteria alternative**: \`--role-criteria-b '{"occupation":["cto"]}'\`
|
|
712
|
-
on a single \`--
|
|
755
|
+
on a single \`--group-a p-rep\` lets the backend pick the CTOs.
|
|
713
756
|
Less control over distinctness — for guaranteed variety, generate
|
|
714
757
|
explicit profiles first.
|
|
715
758
|
|
|
@@ -746,18 +789,18 @@ Inspect after running:
|
|
|
746
789
|
\`\`\`bash
|
|
747
790
|
ish iteration get <iter-id> --json \\
|
|
748
791
|
| jq '.details.mode_details.mode, .conversations[]'
|
|
749
|
-
ish study results <study-id> --transcript <
|
|
792
|
+
ish study results <study-id> --transcript <participant-id> --json
|
|
750
793
|
\`\`\`
|
|
751
794
|
|
|
752
795
|
## 9. Stage an ask for human review, then dispatch
|
|
753
796
|
|
|
754
797
|
Goal: prepare a billable A/B but let the user inspect and approve the
|
|
755
|
-
|
|
798
|
+
people + prompt before any credits are spent. Two-step flow with a
|
|
756
799
|
DRAFT status in between.
|
|
757
800
|
|
|
758
801
|
\`\`\`bash
|
|
759
802
|
# 1. Stage. No worker enqueued, no bill. Audience flags are still
|
|
760
|
-
# required —
|
|
803
|
+
# required — participants materialize at create time.
|
|
761
804
|
ASK=$(ish ask create --name "tagline AB" \\
|
|
762
805
|
--prompt "Which sounds better?" \\
|
|
763
806
|
--variant text:"Short and punchy." \\
|
|
@@ -768,7 +811,7 @@ ASK=$(ish ask create --name "tagline AB" \\
|
|
|
768
811
|
|
|
769
812
|
# Hand the alias back to the user. They can inspect it:
|
|
770
813
|
# ish ask get "$ASK" # status: draft
|
|
771
|
-
# ish ask get "$ASK" --json | jq '.
|
|
814
|
+
# ish ask get "$ASK" --json | jq '.participants | length'
|
|
772
815
|
|
|
773
816
|
# 2. Dispatch once approved (BILLABLE). Idempotent: a non-DRAFT ask
|
|
774
817
|
# returns 409 mapped to exit 2, so re-running is safe.
|
|
@@ -812,9 +855,9 @@ The mental rule: **\`--get\` is for capture, bare commands / \`--human\`
|
|
|
812
855
|
are for display, \`--json\` is for chaining (multiple fields at once).**
|
|
813
856
|
If you find yourself reaching for \`jq -r .x\`, you wanted \`--get x\`.
|
|
814
857
|
|
|
815
|
-
## 11. Extend a
|
|
858
|
+
## 11. Extend a participant past its step cap (or redirect mid-run)
|
|
816
859
|
|
|
817
|
-
Goal: a
|
|
860
|
+
Goal: a participant hit the \`--max-interactions\` cap before finishing, or
|
|
818
861
|
veered off into the wrong flow. Resume it with more steps and an
|
|
819
862
|
optional mid-run instruction — without re-running the whole cohort.
|
|
820
863
|
|
|
@@ -822,13 +865,13 @@ optional mid-run instruction — without re-running the whole cohort.
|
|
|
822
865
|
# 1. Source run with a small cap to feel the limit:
|
|
823
866
|
ish study run --sample 1 --max-interactions 5 --wait
|
|
824
867
|
SRC=$(ish study run --sample 1 --max-interactions 5 --wait \\
|
|
825
|
-
--get
|
|
868
|
+
--get participant_aliases | head -1)
|
|
826
869
|
|
|
827
870
|
# 2. Inspect what stopped (optional, useful for the LLM to choose
|
|
828
871
|
# a redirect instruction):
|
|
829
|
-
ish study
|
|
872
|
+
ish study participant "$SRC" --summary
|
|
830
873
|
|
|
831
|
-
# 3a. Add 15 more steps, no new instruction — let the
|
|
874
|
+
# 3a. Add 15 more steps, no new instruction — let the participant continue:
|
|
832
875
|
ish study extend "$SRC" --add-steps 15 --wait --timeout 600
|
|
833
876
|
|
|
834
877
|
# 3b. OR redirect with a mid-run instruction (captured as user_message;
|
|
@@ -837,20 +880,20 @@ ish study extend "$SRC" \\
|
|
|
837
880
|
--instruction "Stop browsing the blog. Open the pricing page and try to upgrade to Pro." \\
|
|
838
881
|
--add-steps 10 --wait
|
|
839
882
|
|
|
840
|
-
# 4. Capture the new
|
|
841
|
-
NEW=$(ish study extend "$SRC" --add-steps 10 --get
|
|
842
|
-
ish study
|
|
883
|
+
# 4. Capture the new participant alias to chain into results:
|
|
884
|
+
NEW=$(ish study extend "$SRC" --add-steps 10 --get participant_alias)
|
|
885
|
+
ish study participant "$NEW" --summary
|
|
843
886
|
\`\`\`
|
|
844
887
|
|
|
845
888
|
Rules to remember:
|
|
846
|
-
- Source
|
|
889
|
+
- Source participant must be **terminal** (\`completed\` / \`failed\` /
|
|
847
890
|
\`cancelled\`). If it's still running, \`ish study cancel <src>\` first.
|
|
848
891
|
\`cancel\` is non-destructive — every interaction, screenshot, and
|
|
849
892
|
questionnaire answer survives. \`cancel\` + \`extend\` form a
|
|
850
893
|
reversible stop/start pair.
|
|
851
|
-
- A **new**
|
|
894
|
+
- A **new** participant id is created under the same iteration (the backend
|
|
852
895
|
branches from the source's last interaction). The source row is left
|
|
853
|
-
untouched. Get the new id from \`.
|
|
896
|
+
untouched. Get the new id from \`.participant_id\` / \`.participant_alias\` on
|
|
854
897
|
\`--json\`.
|
|
855
898
|
- \`--add-steps\` is **only** the extra budget; it does NOT include the
|
|
856
899
|
source's original cap. Credits debit per
|
|
@@ -869,54 +912,56 @@ mental model (cancel + extend as a pair, error envelopes, cost model).
|
|
|
869
912
|
## Tips for chaining commands as an agent
|
|
870
913
|
|
|
871
914
|
- Capture aliases from JSON: \`ITER=$(ish iteration create --url … --json | jq -r .alias)\`
|
|
872
|
-
- After \`ish study run --json\`, the
|
|
873
|
-
\`.
|
|
874
|
-
\`ish study poll/wait/cancel <
|
|
915
|
+
- After \`ish study run --json\`, the participants you just dispatched are at
|
|
916
|
+
\`.participant_aliases[]\` (and \`.participant_ids[]\` for UUIDs). Pass these to
|
|
917
|
+
\`ish study poll/wait/cancel <participant_id>\`. The \`simulations[]\` array
|
|
875
918
|
is collapsed to one batch entry per study with nested
|
|
876
|
-
\`
|
|
919
|
+
\`participant_ids[]\` / \`participant_aliases[]\` / \`job_ids[]\` so an N-sample
|
|
877
920
|
batch is a single row, not N near-duplicate rows.
|
|
878
921
|
- \`ish study poll\` honors the active study set by \`ish study use\` —
|
|
879
922
|
pass no \`--study\` flag and it polls the active study (parity with
|
|
880
923
|
\`study results\` / \`study wait\` / \`study run\`).
|
|
881
924
|
- \`ish study results --json\` includes per-answer \`sentiment\` (the
|
|
882
|
-
|
|
925
|
+
participant's session-level sentiment label) on every \`interview_answers[]
|
|
883
926
|
.answers[]\` row, plus \`sentiment\` + \`comment\` on every
|
|
884
|
-
\`
|
|
927
|
+
\`participants[]\` row. No need to fetch \`study participant <id>\` per row.
|
|
885
928
|
- \`ish study results --summary --json\` drops the interview_answers
|
|
886
|
-
payload and gives you counts + sentiment + per-
|
|
929
|
+
payload and gives you counts + sentiment + per-participant
|
|
887
930
|
{alias, status, sentiment, comment}. The cheapest "did this run land?"
|
|
888
931
|
shape.
|
|
889
|
-
- \`ish study results --transcript <
|
|
932
|
+
- \`ish study results --transcript <participant_id> --json\` is the
|
|
890
933
|
chat-modality projection — **external_chatbot mode only**. Returns
|
|
891
934
|
a flat \`transcript[]\` of {role, text, turn_index, action_type?,
|
|
892
935
|
option_label?, sentiment?, failure?} with a \`unique_bot_replies\`
|
|
893
936
|
count (1 on a multi-turn run = the M2 loop signature). Same shape
|
|
894
|
-
as the MCP \`get_chat_transcript\` tool. For
|
|
937
|
+
as the MCP \`get_chat_transcript\` tool. For participant_pair
|
|
895
938
|
conversations, fetch \`.conversations[]\` from
|
|
896
|
-
\`ish iteration get <iter-id> --json\` instead — bot/
|
|
897
|
-
don't apply when both speakers are
|
|
939
|
+
\`ish iteration get <iter-id> --json\` instead — bot/participant roles
|
|
940
|
+
don't apply when both speakers are participants.
|
|
898
941
|
- \`ish study run --json\` on a pair iteration includes a
|
|
899
|
-
\`pair_preview\` block (
|
|
942
|
+
\`pair_preview\` block (group sizes, conversation count,
|
|
900
943
|
initiator side, scenario previews) so agents can confirm what
|
|
901
944
|
they just dispatched without a follow-up \`iteration get\`.
|
|
902
|
-
- \`ish study
|
|
903
|
-
and returns just {
|
|
945
|
+
- \`ish study participant <id> --summary --json\` drops the action timeline
|
|
946
|
+
and returns just {participant, sentiment, comment, error_message}.
|
|
904
947
|
- \`ish ask results --json\` keeps \`variant_pick_id\` on every
|
|
905
948
|
response without needing \`--verbose\` — it's the load-bearing field
|
|
906
949
|
for "who picked what". Same logic on \`ask get\`.
|
|
907
|
-
- \`ish iteration get --json\`
|
|
950
|
+
- \`ish iteration get --json\` participants carry \`alias\` + \`name\` (M12
|
|
908
951
|
parity with \`study results --json\`).
|
|
909
952
|
- Use \`--fields\` to keep JSON tight: \`ish study list --fields alias,name,status\`
|
|
910
953
|
- Always pass \`--wait\` (or \`ish study wait\`) before reading
|
|
911
954
|
\`ish study results\` — without it you may read partial data.
|
|
912
|
-
- For \`ask\` write-paths (update/archive/wait/add-questions/add-
|
|
955
|
+
- For \`ask\` write-paths (update/archive/wait/add-questions/add-people),
|
|
913
956
|
default JSON is compact (changed fields + alias). Pass \`--verbose\` for
|
|
914
957
|
the full Ask payload.
|
|
915
|
-
-
|
|
916
|
-
|
|
958
|
+
- \`person generate --json\` returns \`{job: {id, status, person_ids},
|
|
959
|
+
profiles: [...]}\`; each person is the lean person shape with its
|
|
960
|
+
evidence-grounded \`scenarios\` attached (\`--no-scenarios\` to omit,
|
|
961
|
+
\`--verbose\` for the full record incl. \`simulation_config\`).
|
|
917
962
|
- On \`error_code: "usage_limit_reached"\` (HTTP 403), don't retry —
|
|
918
963
|
read \`tier\`, \`limit\`, \`current\`, \`max\`, and \`upgrade_url\` from
|
|
919
|
-
the JSON body to construct a recovery message. \`
|
|
964
|
+
the JSON body to construct a recovery message. \`person generate\` /
|
|
920
965
|
\`study generate\` refuse the entire batch when the post-generation
|
|
921
966
|
count would exceed the cap; re-issue with a smaller \`--count\`.
|
|
922
967
|
- Every verb's \`--help\` ends with a "Tips:" footer naming \`--get\`
|
|
@@ -925,12 +970,12 @@ mental model (cancel + extend as a pair, error envelopes, cost model).
|
|
|
925
970
|
- \`ish study run --wait\` returns \`error_code: "wait_timeout"\`
|
|
926
971
|
on wait expiry (exit 5, retryable) — distinct from network /
|
|
927
972
|
server timeouts. The envelope carries \`progress\` so you can
|
|
928
|
-
resume by polling the listed
|
|
929
|
-
Same envelope on \`ish study wait\` and per-
|
|
973
|
+
resume by polling the listed participants instead of re-dispatching.
|
|
974
|
+
Same envelope on \`ish study wait\` and per-participant \`study wait\`.
|
|
930
975
|
- \`ish study run\` accepts \`--dispatch-timeout <s>\` (default 120)
|
|
931
976
|
for the per-POST budget. On dispatch failure the error envelope
|
|
932
977
|
includes \`seeded_but_not_dispatched_ids[]\` /
|
|
933
|
-
\`seeded_but_not_dispatched_aliases[]\` —
|
|
978
|
+
\`seeded_but_not_dispatched_aliases[]\` — participants exist
|
|
934
979
|
server-side; resume by polling them, don't re-run \`study run\`.
|
|
935
980
|
- \`ish ask run --new\` is non-idempotent and marked
|
|
936
981
|
\`retryable: false\` on any failure. If you do see one, run
|
|
@@ -946,20 +991,20 @@ mental model (cancel + extend as a pair, error envelopes, cost model).
|
|
|
946
991
|
| You want to… | Don't | Do |
|
|
947
992
|
|-------------------------------------------|----------------------------------------|--------------------------------------------------------------------|
|
|
948
993
|
| Capture a single value (alias, id, …) | \`--json \\| jq -r .alias\` | \`--get alias\` |
|
|
949
|
-
| Capture a nested value | \`--json \\| jq -r .
|
|
994
|
+
| Capture a nested value | \`--json \\| jq -r .person.name\` | \`--get person.name\` |
|
|
950
995
|
| Capture every alias from a list | \`--json \\| jq -r '.items[].alias'\` | \`--get alias\` (auto-descends into \`items\`, one per line) |
|
|
951
996
|
| Force human output through tee/redirect | none, output silently became JSON | \`--human\` |
|
|
952
|
-
| Look up 2-3 specific profiles | \`
|
|
997
|
+
| Look up 2-3 specific profiles | \`person list --json \\| jq '.items[] \\| select(...)'\` | \`ish person get p-1b9 p-fc1 p-2fc\` |
|
|
953
998
|
| Show only some fields | \`--json \\| jq '{alias, name, country}'\` | \`--fields alias,name,country\` |
|
|
954
|
-
| Count
|
|
999
|
+
| Count participants on an ask | \`--json \\| jq '.participants \\| length'\` | \`ish ask get a-… --fields alias,participants_count\` |
|
|
955
1000
|
| Count responses on a round | \`--json \\| jq '.rounds[0].responses \\| length'\` | \`ish ask get a-… --fields alias,rounds,responses_complete,responses_total\` |
|
|
956
1001
|
| Pick the A/B winner | \`--json \\| jq '.rounds[0].responses…'\` | \`ish ask results a-… --json\` then read \`.rounds[].aggregates.winner\` |
|
|
957
|
-
| List of
|
|
958
|
-
| Per-answer sentiment | \`--json \\| jq '...'\` per
|
|
1002
|
+
| List of participants from \`study run\` | \`--json \\| jq '.participants[].id'\` | \`--get participant_aliases\` (or \`participant_ids\` for UUIDs) |
|
|
1003
|
+
| Per-answer sentiment | \`--json \\| jq '...'\` per participant | \`ish study results <id> --json\` (sentiment is on every answer row) |
|
|
959
1004
|
| "Did this run land?" headline | \`study results --json\` + jq filtering | \`ish study results <id> --summary --json\` |
|
|
960
|
-
| Chat transcript for one
|
|
961
|
-
| Pair-mode conversation transcripts | \`study
|
|
962
|
-
|
|
|
1005
|
+
| Chat transcript for one participant (external_chatbot) | \`study participant --json\` + jq | \`ish study results <id> --transcript <participant_id> --json\` |
|
|
1006
|
+
| Pair-mode conversation transcripts | \`study participant --json\` per participant | \`ish iteration get <iter-id> --json \\| jq '.conversations[]'\` |
|
|
1007
|
+
| Participant headline only (no action timeline) | \`study participant --json\` + jq | \`ish study participant <id> --summary --json\` |
|
|
963
1008
|
| Variant pick id on an ask response | \`ask results --json --verbose\` | \`ish ask results a-… --json\` (variant_pick_id is preserved) |
|
|
964
1009
|
|
|
965
1010
|
The bias here is intentional: \`ish\` ships shapes designed for agent
|
|
@@ -986,14 +1031,17 @@ ish <command> --help
|
|
|
986
1031
|
| \`study\` | Persistent research artifact | concepts/study |
|
|
987
1032
|
| \`iteration\` | One configured run of a study (URL or media) | concepts/iteration |
|
|
988
1033
|
| \`ask\` | Lightweight reaction artifact | concepts/ask |
|
|
989
|
-
| \`
|
|
990
|
-
| \`source\` | Upload sources for
|
|
1034
|
+
| \`person\` | People, people generation, and the \`suggest-scenarios\` + \`evidence add\`/\`list\` probe loop for crafting one specific persona | concepts/person |
|
|
1035
|
+
| \`source\` | Upload sources for person generation | concepts/source |
|
|
991
1036
|
| \`config\` | Simulation configs (model, timing, retries) | (run \`ish config --help\`) |
|
|
992
|
-
| \`chat\` | Chat endpoint CRUD + smoke test (external_chatbot mode); pair-mode iterations created via \`iteration create --chat-mode
|
|
1037
|
+
| \`chat\` | Chat endpoint CRUD + smoke test (external_chatbot mode); pair-mode iterations created via \`iteration create --chat-mode participant_pair\` | guides/chat |
|
|
993
1038
|
| \`secret\` | Per-workspace secrets (\`{{secret:KEY}}\` resolver) | concepts/secret |
|
|
994
1039
|
| \`docs\` | Offline docs for agents | (run \`ish docs --help\`) |
|
|
995
1040
|
| \`init\` | Drop this skill into a Claude Code / Codex / | (run \`ish init --help\`) |
|
|
996
1041
|
| | Cursor / Cline / Roo project | |
|
|
1042
|
+
| \`mcp\` | Wire the hosted ish MCP server into local AI | guides/mcp-add |
|
|
1043
|
+
| | clients (Cursor, VS Code, Claude Code, | |
|
|
1044
|
+
| | Claude Desktop, Windsurf). Idempotent. | |
|
|
997
1045
|
| \`login\` | Browser-based auth | — |
|
|
998
1046
|
| \`logout\` | Clear saved credentials | — |
|
|
999
1047
|
| \`status\` | Show active session (user, workspace, | concepts/active-context |
|
|
@@ -1016,8 +1064,8 @@ the right \`ish docs get-page <slug>\` to read deep context.
|
|
|
1016
1064
|
|
|
1017
1065
|
## Aliases
|
|
1018
1066
|
|
|
1019
|
-
Short prefixed IDs (e.g. \`s-b2c\`, \`
|
|
1020
|
-
\`t-a17\`, \`
|
|
1067
|
+
Short prefixed IDs (e.g. \`s-b2c\`, \`p-795\`, \`a-6ec\`, \`i-d4e\`,
|
|
1068
|
+
\`t-a17\`, \`ps-3a4\`, \`w-6ec\`, \`c-c3c\`) are accepted anywhere a UUID
|
|
1021
1069
|
is expected. Full UUIDs always work too. See
|
|
1022
1070
|
\`ish docs get-page reference/aliases\`.
|
|
1023
1071
|
|