@ishlabs/cli 0.17.6 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -54
- package/dist/commands/ask.d.ts +4 -4
- package/dist/commands/ask.js +66 -66
- package/dist/commands/chat.js +10 -10
- package/dist/commands/config.js +1 -1
- package/dist/commands/docs.js +1 -1
- package/dist/commands/iteration.js +57 -57
- package/dist/commands/mcp.d.ts +23 -0
- package/dist/commands/mcp.js +676 -0
- package/dist/commands/person.d.ts +5 -0
- package/dist/commands/{profile.js → person.js} +197 -162
- package/dist/commands/source.d.ts +6 -2
- package/dist/commands/source.js +35 -30
- package/dist/commands/study-analyze.d.ts +1 -1
- package/dist/commands/study-analyze.js +3 -3
- package/dist/commands/study-participant.d.ts +8 -0
- package/dist/commands/{study-tester.js → study-participant.js} +50 -50
- package/dist/commands/study-run.d.ts +6 -6
- package/dist/commands/study-run.js +295 -271
- package/dist/commands/study.js +89 -66
- package/dist/commands/workspace.js +13 -13
- package/dist/connect.js +5 -5
- package/dist/index.js +6 -4
- package/dist/lib/accessibility-profile.d.ts +1 -1
- package/dist/lib/accessibility-profile.js +1 -1
- package/dist/lib/alias-hydrate.js +4 -4
- package/dist/lib/alias-store.d.ts +5 -5
- package/dist/lib/alias-store.js +8 -8
- package/dist/lib/api-client.d.ts +1 -1
- package/dist/lib/api-client.js +1 -1
- package/dist/lib/billing.d.ts +11 -11
- package/dist/lib/billing.js +16 -16
- package/dist/lib/chat-endpoint-templates.js +1 -1
- package/dist/lib/command-helpers.d.ts +18 -18
- package/dist/lib/command-helpers.js +83 -53
- package/dist/lib/docs.js +560 -386
- package/dist/lib/enums.d.ts +2 -2
- package/dist/lib/enums.js +2 -2
- package/dist/lib/local-sim/browser.d.ts +1 -1
- package/dist/lib/local-sim/browser.js +1 -1
- package/dist/lib/local-sim/debug-report.d.ts +2 -2
- package/dist/lib/local-sim/debug-report.js +3 -3
- package/dist/lib/local-sim/loop.d.ts +5 -5
- package/dist/lib/local-sim/loop.js +38 -38
- package/dist/lib/local-sim/types.d.ts +12 -12
- package/dist/lib/mcp-clients.d.ts +51 -0
- package/dist/lib/mcp-clients.js +175 -0
- package/dist/lib/modality.d.ts +10 -10
- package/dist/lib/modality.js +46 -46
- package/dist/lib/observability.d.ts +11 -0
- package/dist/lib/observability.js +16 -3
- package/dist/lib/output.d.ts +13 -12
- package/dist/lib/output.js +244 -184
- package/dist/lib/profile-sources.d.ts +64 -16
- package/dist/lib/profile-sources.js +91 -30
- package/dist/lib/skill-content.js +215 -168
- package/dist/lib/study-events.d.ts +3 -3
- package/dist/lib/study-events.js +1 -1
- package/dist/lib/study-inputs.d.ts +11 -1
- package/dist/lib/study-inputs.js +68 -17
- package/dist/lib/types.d.ts +105 -34
- package/package.json +1 -1
- package/dist/commands/profile.d.ts +0 -5
- package/dist/commands/study-tester.d.ts +0 -8
|
@@ -23,9 +23,9 @@ const VERSION = pkg.version;
|
|
|
23
23
|
* the description with verbs the user is likely to say plus the noun
|
|
24
24
|
* "ish". Hard cap is 1024 chars. Front-load the use case.
|
|
25
25
|
*/
|
|
26
|
-
const SKILL_DESCRIPTION = "Use this skill whenever the user mentions ish, a study, a
|
|
27
|
-
"a simulation run, an \"ask\",
|
|
28
|
-
"dispatch tests against AI
|
|
26
|
+
const SKILL_DESCRIPTION = "Use this skill whenever the user mentions ish, a study, a person, " +
|
|
27
|
+
"a simulation run, an \"ask\", a group of people, a chatbot probe, wants to " +
|
|
28
|
+
"dispatch tests against AI participants, or wants to rehearse a conversation " +
|
|
29
29
|
"between two AI personas (e.g. sales rep vs. skeptical buyer). Covers both " +
|
|
30
30
|
"the `ish` CLI (via Bash) and the hosted ish MCP server " +
|
|
31
31
|
"(`mcp__claude_ai_ish__*` on claude.ai) — same operations, pick whichever " +
|
|
@@ -37,7 +37,7 @@ ish runs user-research simulations: simulated people experience your draft (page
|
|
|
37
37
|
|
|
38
38
|
## When to invoke
|
|
39
39
|
|
|
40
|
-
The user mentioned \`ish\`, a study, an "ask", a
|
|
40
|
+
The user mentioned \`ish\`, a study, an "ask", a person, a group of people, a simulation, "rehearse", "compare variants", "test before shipping", "probe a chatbot".
|
|
41
41
|
|
|
42
42
|
## Drivers
|
|
43
43
|
|
|
@@ -48,8 +48,10 @@ ish has two surfaces; pick whichever your environment has:
|
|
|
48
48
|
|
|
49
49
|
Both wrap the same operations. If neither is present, tell the user: \`npm i -g @ishlabs/cli\`, or enable the ish connector on claude.ai. Don't try to drive ish without a driver.
|
|
50
50
|
|
|
51
|
+
**Bridging CLI → MCP for the user's editor / desktop agent**: if the user has the CLI but their editor or desktop agent (Cursor, VS Code, Claude Code, Claude Desktop, Windsurf) isn't yet wired to call ish, one command does it: \`ish mcp add --all --yes\`. Writes the per-client MCP config block, never embeds a token (OAuth on first connect), idempotent. See \`ish docs get-page guides/mcp-add\`.
|
|
52
|
+
|
|
51
53
|
**When both are available, pick by op:**
|
|
52
|
-
- Streaming results to a watching user → **CLI** with \`--wait\` (per-
|
|
54
|
+
- Streaming results to a watching user → **CLI** with \`--wait\` (per-participant output as participants complete).
|
|
53
55
|
- Structured one-shot reads or run dispatch → **MCP** (JSON in, JSON out, no shell).
|
|
54
56
|
- Idempotent setup (e.g. cold-start workspace) → **CLI** has \`--ensure\`; MCP doesn't.
|
|
55
57
|
- Local file uploads (images, video, docs) → **CLI** only — MCP doesn't accept binaries.
|
|
@@ -60,16 +62,16 @@ Both wrap the same operations. If neither is present, tell the user: \`npm i -g
|
|
|
60
62
|
|
|
61
63
|
\`\`\`
|
|
62
64
|
Workspace (= product)
|
|
63
|
-
├──
|
|
65
|
+
├── Person (p-…) reusable AI persona
|
|
64
66
|
├── Study (s-…) persistent artifact for testing a real surface
|
|
65
67
|
│ └── Iteration (i-…) one configured run; carries the URL or media
|
|
66
68
|
├── Ask (a-…) lightweight artifact for reactions to text/image variants
|
|
67
|
-
│ └── Round unit of execution;
|
|
69
|
+
│ └── Round unit of execution; participants fixed at ask creation
|
|
68
70
|
└── Chat Endpoint workspace-level definition of an external chatbot
|
|
69
71
|
(referenced by study modality: chat, mode: external_chatbot)
|
|
70
72
|
\`\`\`
|
|
71
73
|
|
|
72
|
-
**Audience is a query, not an entity.** Both \`ask_run\` and \`study_run\` take an \`audience\` argument shaped as \`{
|
|
74
|
+
**Audience is a query, not an entity.** Both \`ask_run\` and \`study_run\` take an \`audience\` argument shaped as \`{ person_ids: [...] }\` (explicit) or \`{ sample: N, filters: {...} }\` (sampled from an existing pool). There is no \`audience\` resource to create — you build profiles via \`group_build\` (or reuse existing ones via \`profile_list\`) and pass them in.
|
|
73
75
|
|
|
74
76
|
Two run verbs:
|
|
75
77
|
- **study run** — simulate on a real surface (URL, media, document, chat endpoint).
|
|
@@ -81,11 +83,11 @@ Heuristic: **study** for "test this prototype/page/flow"; **ask** for "which cop
|
|
|
81
83
|
|
|
82
84
|
Each shape names the verb, the *required precursors*, and the **load-bearing knobs** — the arguments that change output quality, not just behavior. Look up the full schema in the MCP tool description or \`ish <command> --help\` once you've picked the shape.
|
|
83
85
|
|
|
84
|
-
Examples below use MCP shape; for CLI, kebab-case the tool name (\`ask_run\` → \`ish ask run\`) and pass equivalent flags (\`
|
|
86
|
+
Examples below use MCP shape; for CLI, kebab-case the tool name (\`ask_run\` → \`ish ask run\`) and pass equivalent flags (\`person_ids: [...]\` → \`--person-id p-… --person-id p-…\`).
|
|
85
87
|
|
|
86
88
|
### Compare text or image variants → \`ask_run\`
|
|
87
89
|
|
|
88
|
-
- **Precursor**:
|
|
90
|
+
- **Precursor**: a group of people (see "Audience is a query" above). If you don't already have suitable people, build them first via \`group_build\`; reuse via \`profile_list\` when possible.
|
|
89
91
|
- **Load-bearing knobs**:
|
|
90
92
|
- \`wants_pick: true\` — adds an aggregate winner verdict. Without it you get prose reactions but no clear answer.
|
|
91
93
|
- \`wants_ratings: true\` — adds per-variant numeric scores.
|
|
@@ -96,22 +98,23 @@ Examples below use MCP shape; for CLI, kebab-case the tool name (\`ask_run\` →
|
|
|
96
98
|
\`\`\`
|
|
97
99
|
ask_run({
|
|
98
100
|
variants: [ { label: "A", content: "..." }, { label: "B", content: "..." } ],
|
|
99
|
-
audience: {
|
|
101
|
+
audience: { person_ids: ["p-…", ...] }, // or { sample: 10 }
|
|
100
102
|
wants_pick: true,
|
|
101
103
|
wants_ratings: true,
|
|
102
104
|
wait: true,
|
|
103
105
|
})
|
|
104
106
|
\`\`\`
|
|
105
|
-
- **Output**: per-
|
|
107
|
+
- **Output**: per-participant reasoning + (if \`wants_pick\`) aggregate winner with confidence.
|
|
106
108
|
|
|
107
109
|
### Test a live page or prototype → \`study_run\` (modality: interactive)
|
|
108
110
|
|
|
109
|
-
- **Precursor**: a study with a URL. Either inline at create-time (\`study_create({ modality: "interactive", url: "..." })\`) or as a separate iteration (\`iteration_create({ study_id, url })\`) when you want to A/B iterations later or upload local files. An **assignment** is required — what the
|
|
110
|
-
- **Audience**: pass \`audience: {
|
|
111
|
+
- **Precursor**: a study with a URL. Either inline at create-time (\`study_create({ modality: "interactive", url: "..." })\`) or as a separate iteration (\`iteration_create({ study_id, url })\`) when you want to A/B iterations later or upload local files. An **assignment** is required — what the participant is supposed to attempt.
|
|
112
|
+
- **Audience**: pass \`audience: { person_ids: [...] }\` or \`{ sample: N }\` to \`study_run\`, same contract as \`ask_run\`. Audience is set on the *run*, not the study.
|
|
111
113
|
- **Load-bearing knobs**:
|
|
112
|
-
- \`assignment\` (on \`study_create\`) — what the
|
|
113
|
-
- \`
|
|
114
|
-
- \`
|
|
114
|
+
- \`assignment\` (on \`study_create\`) — what the participant is supposed to do. Format: \`"<label>:<instruction>"\`. The whole run hinges on this being clear.
|
|
115
|
+
- **steps (optional checklist)** — an assignment can carry an ordered \`steps\` list of atomic actions (\`{name, description?}\`), authored via the CLI JSON forms (\`--assignments-file\` / \`--assignments\`) — not the \`"<label>:<instruction>"\` shorthand. Honored for **interactive** and **external_chatbot chat** only. After a run, \`study get\` reports a per-step \`step_completion\` rollup (pass rate + sample failures). Use steps when "did they finish?" is a checklist, not a single yes/no.
|
|
116
|
+
- \`wait\` (MCP) / \`--wait\` (CLI) — streams per-participant results as they complete. CLI streams to stdout in real-time; MCP blocks until the whole run finishes. For a watching user, prefer the CLI here.
|
|
117
|
+
- \`count\` (on \`study_run\`) — how many participants.
|
|
115
118
|
- **Shape**:
|
|
116
119
|
\`\`\`
|
|
117
120
|
study_create({
|
|
@@ -119,18 +122,18 @@ Examples below use MCP shape; for CLI, kebab-case the tool name (\`ask_run\` →
|
|
|
119
122
|
url: "https://staging.acme.io/welcome",
|
|
120
123
|
assignment: "Complete signup:Go through the 4-step wizard end-to-end",
|
|
121
124
|
})
|
|
122
|
-
study_run({ study_id: "s-…", audience: {
|
|
125
|
+
study_run({ study_id: "s-…", audience: { person_ids: [...] }, count: 15, wait: true })
|
|
123
126
|
\`\`\`
|
|
124
|
-
- **Output**: per-
|
|
127
|
+
- **Output**: per-participant journey transcripts + aggregate friction / blocker / positive-moment counts.
|
|
125
128
|
|
|
126
129
|
### Probe a customer chatbot → \`study_run\` (modality: chat, mode: external_chatbot)
|
|
127
130
|
|
|
128
131
|
- **Precursors**:
|
|
129
|
-
1. A **chat endpoint** definition at the workspace level. \`chat_endpoint_init\` from a curl spec (handles auth headers, request/response shape; **upsert-by-name** — safe to re-call with the same \`name\` to rotate auth or change the request shape) → \`chat_endpoint_test\` to confirm it responds correctly before dispatching simulated
|
|
132
|
+
1. A **chat endpoint** definition at the workspace level. \`chat_endpoint_init\` from a curl spec (handles auth headers, request/response shape; **upsert-by-name** — safe to re-call with the same \`name\` to rotate auth or change the request shape) → \`chat_endpoint_test\` to confirm it responds correctly before dispatching simulated participants.
|
|
130
133
|
2. A study with \`modality: "chat"\`, \`mode: "external_chatbot"\`, the endpoint reference, and an \`assignment\`.
|
|
131
|
-
- **Audience**: same \`{
|
|
134
|
+
- **Audience**: same \`{ person_ids } | { sample }\` contract; pass to \`study_run\`. For custom personas (e.g. "frustrated vs polite"), \`group_build\` first.
|
|
132
135
|
- **Load-bearing knobs**:
|
|
133
|
-
- \`assignment\` — what the
|
|
136
|
+
- \`assignment\` — what the participant tries to do (\`"Cancel:Try to cancel your subscription"\`).
|
|
134
137
|
- \`count\` on the run.
|
|
135
138
|
- **Shape**:
|
|
136
139
|
\`\`\`
|
|
@@ -138,52 +141,52 @@ Examples below use MCP shape; for CLI, kebab-case the tool name (\`ask_run\` →
|
|
|
138
141
|
chat_endpoint_test({ endpoint: "support-bot", message: "hi" })
|
|
139
142
|
study_create({ modality: "chat", mode: "external_chatbot", endpoint: "support-bot",
|
|
140
143
|
assignment: "Cancel:Try to cancel your subscription" })
|
|
141
|
-
study_run({ study_id: "s-…", audience: {
|
|
144
|
+
study_run({ study_id: "s-…", audience: { person_ids: [...] }, count: 8, wait: true })
|
|
142
145
|
\`\`\`
|
|
143
|
-
- **Output**: full conversation transcripts per
|
|
146
|
+
- **Output**: full conversation transcripts per participant + aggregate success / blocker analysis.
|
|
144
147
|
|
|
145
148
|
### Test a media artifact (document, image, video, audio) → \`study_run\`
|
|
146
149
|
|
|
147
150
|
- **Precursors**:
|
|
148
151
|
1. A study with the chosen modality: \`study_create({ modality: "document" | "image" | "video" | "audio", assignment: "..." })\`.
|
|
149
152
|
2. An **iteration** carrying the media. For local files, **CLI only** — \`ish iteration create --study s-… --media @./deck.pdf\` (the \`@\` prefix triggers upload). For hosted URLs, either driver works: \`iteration_create({ study_id, content_url: "https://..." })\`.
|
|
150
|
-
- **Audience**: same \`{
|
|
153
|
+
- **Audience**: same \`{ person_ids } | { sample }\` contract; pass to \`study_run\`. Reusable across runs (see "Lifecycle" below).
|
|
151
154
|
- **Load-bearing knobs**:
|
|
152
155
|
- \`assignment\` on \`study_create\` — for review-style media (decks, ad creative), frame as decision: \`"Take a first meeting:Review this Series A deck and decide whether you'd take a first meeting"\`. Page/timestamp-level attribution depends on the assignment asking for it explicitly.
|
|
153
156
|
- \`wait\` / \`--wait\` — same streaming story as interactive.
|
|
154
157
|
- \`count\` on \`study_run\`.
|
|
155
|
-
- **Iterating on the artifact** (v2 deck, v3 deck): create a **new iteration** on the same study (\`iteration_create\`), reuse the
|
|
156
|
-
- **Output**: per-
|
|
158
|
+
- **Iterating on the artifact** (v2 deck, v3 deck): create a **new iteration** on the same study (\`iteration_create\`), reuse the people's \`person_ids\`. See "Lifecycle".
|
|
159
|
+
- **Output**: per-participant reactions to the artifact + aggregate themes.
|
|
157
160
|
|
|
158
|
-
### Rehearse a conversation between two AI personas → \`study_run\` (modality: chat, mode:
|
|
161
|
+
### Rehearse a conversation between two AI personas → \`study_run\` (modality: chat, mode: participant_pair)
|
|
159
162
|
|
|
160
163
|
**If the user might want the same persona across multiple turns, pin profiles up-front — you can't retro-pin after a run.** Without pinning, personas are re-synthesized from the assignment text each time, so "the same VC from earlier" becomes prose-only continuity.
|
|
161
164
|
|
|
162
|
-
- **Precursor**: a workspace and (optionally) one or two
|
|
163
|
-
- **Audience**: optional. For persona continuity across iterations, build profiles via \`
|
|
165
|
+
- **Precursor**: a workspace and (optionally) one or two people for persona pinning. If you skip the people, ish synthesizes both personas from the \`assignment\` text per-run — fine for one-shot rehearsals, drifts between iterations.
|
|
166
|
+
- **Audience**: optional. For persona continuity across iterations, build profiles via \`group_build\` (or reuse via \`profile_list\`) and pass \`audience: { person_ids: [...] }\` to \`study_run\` — the same profiles play the same roles each time.
|
|
164
167
|
- **Load-bearing knobs**:
|
|
165
168
|
- \`assignment\` — encodes BOTH personas and what each is trying to do. More prose-heavy than other assignments; be specific. Example: \`"Founder pitches Series A to skeptical VC. Founder: defends AI customer-support startup, $2M ARR, 15% MoM. VC: thinks SaaS-for-SaaS is saturated, probes moat and unit economics."\`
|
|
166
169
|
- \`count\` — typically 1 per run; set higher to generate variations.
|
|
167
|
-
- **Iterating the scenario** (turn-by-turn refinement): create a **new iteration** with a revised assignment; reuse the same \`
|
|
170
|
+
- **Iterating the scenario** (turn-by-turn refinement): create a **new iteration** with a revised assignment; reuse the same \`person_ids\` if you pinned personas. See "Lifecycle".
|
|
168
171
|
- **Output**: a full transcript per rehearsal.
|
|
169
172
|
|
|
170
|
-
### Generate a fresh
|
|
173
|
+
### Generate a fresh group → \`group_build\`
|
|
171
174
|
|
|
172
175
|
- **Input**: a \`description\`, a \`count\`, and optionally \`sources\` (transcripts / audio / images / docs that seed persona generation — for "make profiles that feel like these real customers"). Local files force CLI (binary upload constraint).
|
|
173
|
-
- **Output**: a list of \`
|
|
176
|
+
- **Output**: a list of \`person_ids\` to pass into \`ask_run\` or \`study_run\`.
|
|
174
177
|
- **Cost**: slow (~30-120s) + credit-bearing. Reuse profiles via \`profile_list\` when possible. Sensible defaults: \`count: 5-10\` for ad-hoc tests, \`count: 20+\` for studies where you want statistical signal.
|
|
175
|
-
- **Growing
|
|
178
|
+
- **Growing a group of people**: build only the delta — don't rebuild. Concat the new \`person_ids\` with the existing ones for the next run. The "audience is a query" framing means there's no audience entity to update.
|
|
176
179
|
- **Shapes**:
|
|
177
180
|
\`\`\`
|
|
178
181
|
// Simple — description only
|
|
179
|
-
|
|
182
|
+
group_build({
|
|
180
183
|
description: "Parents of toddlers (ages 1-3), US, evening-routine focused",
|
|
181
184
|
count: 8,
|
|
182
185
|
})
|
|
183
|
-
// → {
|
|
186
|
+
// → { person_ids: ["p-…", ...] }
|
|
184
187
|
|
|
185
188
|
// Seeded from real transcripts (CLI only for local files)
|
|
186
|
-
// ish
|
|
189
|
+
// ish person generate --description "..." --count 10 \\
|
|
187
190
|
// --source @./interviews/customer-1.md \\
|
|
188
191
|
// --source @./interviews/customer-2.md
|
|
189
192
|
\`\`\`
|
|
@@ -194,27 +197,27 @@ The most common multi-turn question: "user wants to change X — re-use the exis
|
|
|
194
197
|
|
|
195
198
|
| Change you want | What to do |
|
|
196
199
|
|---|---|
|
|
197
|
-
| Same ask, **same
|
|
198
|
-
| Same ask, **different
|
|
200
|
+
| Same ask, **same participants**, new variants | Pass \`ask_id\` (MCP) or \`--ask\` (CLI) on \`ask_run\` — re-uses the locked participants. |
|
|
201
|
+
| Same ask, **different participants** | New ask: omit \`ask_id\` (MCP) or pass \`--new\` (CLI). Participants are locked at ask creation. |
|
|
199
202
|
| Same study, **new media** (v2 deck, new image) | New **iteration** on the same study (\`iteration_create({ study_id, content_url \\| --media @path })\`). Iterations are immutable once they have results — never edit. |
|
|
200
|
-
| Same study, **new assignment** | **New study.** Assignment lives on the study; there's no in-place edit. Keep the old study's id for side-by-side comparison. *(
|
|
201
|
-
| Same
|
|
203
|
+
| Same study, **new assignment** | **New study.** Assignment lives on the study; there's no in-place edit. Keep the old study's id for side-by-side comparison. *(Participant-pair exception: the assignment IS the content there — use a new **iteration** on the same study, not a new study.)* |
|
|
204
|
+
| Same people across multiple runs / studies | Reuse the \`person_ids\` array. Profiles are workspace-scoped resources (\`p-…\`) — they live independently of any ask or study. |
|
|
202
205
|
| Chat endpoint definition needs to change (auth rotate, URL change) | \`chat_endpoint_init\` is **upsert-by-name** — re-init with the same \`name\` and a new \`from_curl\` spec. Re-run \`chat_endpoint_test\` to confirm. |
|
|
203
|
-
| Persona reuse in
|
|
206
|
+
| Persona reuse in participant-pair | Pin via \`person_ids\` on the first \`study_run\`; pass the same ids on subsequent runs. Without pinning, personas are re-synthesized from the assignment per run. |
|
|
204
207
|
|
|
205
208
|
When in doubt: side-by-side comparison usually beats in-place edits. Ids are cheap; result history isn't.
|
|
206
209
|
|
|
207
210
|
## Pitfalls
|
|
208
211
|
|
|
209
212
|
- **Cold start on free plan**: \`workspace_create\` returns \`usage_limit_reached\` at the free-plan cap (1 workspace). Always inspect with \`workspace_list\` first. **MCP-only recipe** (no \`--ensure\` available): \`workspace_list\` → if non-empty, use the first; if empty, \`workspace_create\`; if \`workspace_create\` returns \`usage_limit_reached\`, re-call \`workspace_list\` (a workspace exists you didn't see — possibly created by another session). **CLI shortcut**: \`ish workspace create --name <name> --ensure\` is idempotent by name.
|
|
210
|
-
- **Ask
|
|
213
|
+
- **Ask participants vs variants** — see Lifecycle table for the re-use vs new-ask decision.
|
|
211
214
|
- **Study iterations are immutable once they have results** — see Lifecycle table for new-iteration vs new-study.
|
|
212
|
-
- **Credit costs**: \`ask_run\`, \`study_run\`, and \`
|
|
213
|
-
- **\`
|
|
215
|
+
- **Credit costs**: \`ask_run\`, \`study_run\`, and \`group_build\` consume credits. Check \`workspace_get\`'s \`credits\` headroom before dispatching large runs. For free-plan ad-hoc tests, default \`count: 5-8\` participants + 2 variants is usually within budget.
|
|
216
|
+
- **\`group_build\` may return fewer profiles than requested** if the description is over-constrained. Always read the returned \`person_ids\` count, don't trust the requested \`count\` blindly.
|
|
214
217
|
- **Variants of wildly different length** (one-line vs paragraph) can skew picks toward the longer one. Keep variants comparable in shape.
|
|
215
|
-
- **Chatbot endpoint response-shape mismatch**: \`chat_endpoint_test\` succeeds shallowly if the bot responds at all, but a wrong response path (e.g. bot returns \`{ data: { reply } }\` instead of \`{ reply }\`) produces empty transcripts on the actual run. Inspect one full test response before dispatching
|
|
218
|
+
- **Chatbot endpoint response-shape mismatch**: \`chat_endpoint_test\` succeeds shallowly if the bot responds at all, but a wrong response path (e.g. bot returns \`{ data: { reply } }\` instead of \`{ reply }\`) produces empty transcripts on the actual run. Inspect one full test response before dispatching participants.
|
|
216
219
|
- **Chatbot auth drift**: tokens/sessions baked into \`--from-curl\` expire. If transcripts come back as identical short error strings, re-run \`chat_endpoint_test\` and refresh the curl spec.
|
|
217
|
-
- **401 surfaces as fake blocker**: an unauthenticated endpoint produces "
|
|
220
|
+
- **401 surfaces as fake blocker**: an unauthenticated endpoint produces "participant got stuck on auth screen" — looks like a UX blocker but is config. Always confirm endpoint auth before reading transcripts as user-research data.
|
|
218
221
|
- **No per-page/per-timestamp scoping for media**: there's no "evaluate just slide 14" or "react to seconds 0-30" API. State the focus explicitly in the \`assignment\` text, or pre-stitch the artifact (e.g. replace one slide locally, upload as a new iteration).
|
|
219
222
|
|
|
220
223
|
## When in doubt
|
|
@@ -232,7 +235,7 @@ Each workflow below is a complete transcript an agent can adapt. Run
|
|
|
232
235
|
## 1. First study from zero
|
|
233
236
|
|
|
234
237
|
Goal: from a fresh install to a finished interactive study with 3
|
|
235
|
-
|
|
238
|
+
participants and one question.
|
|
236
239
|
|
|
237
240
|
\`\`\`bash
|
|
238
241
|
# 1. Authenticate (browser flow, saves tokens to ~/.ish/config.json)
|
|
@@ -242,8 +245,8 @@ ish login
|
|
|
242
245
|
ish workspace create --name "Demo" --base-url https://example.com
|
|
243
246
|
ish workspace use w-…
|
|
244
247
|
|
|
245
|
-
# 3. Generate a small
|
|
246
|
-
ish
|
|
248
|
+
# 3. Generate a small group of people
|
|
249
|
+
ish person generate \\
|
|
247
250
|
--description "Tech-savvy millennials in the US who use mobile banking" \\
|
|
248
251
|
--count 3
|
|
249
252
|
|
|
@@ -266,6 +269,33 @@ ish study run --all --wait
|
|
|
266
269
|
ish study results --json | jq .
|
|
267
270
|
\`\`\`
|
|
268
271
|
|
|
272
|
+
### 1a. Give the assignment a step-by-step checklist
|
|
273
|
+
|
|
274
|
+
When "did they finish?" is a checklist rather than a single yes/no, attach
|
|
275
|
+
\`steps\` to the assignment. Steps are JSON-only (no inline shorthand) and
|
|
276
|
+
honored for **interactive** + **external_chatbot chat** modalities only.
|
|
277
|
+
|
|
278
|
+
\`\`\`bash
|
|
279
|
+
# assignments.json
|
|
280
|
+
# [
|
|
281
|
+
# { "name": "Buy", "instructions": "Add an item to cart and check out",
|
|
282
|
+
# "steps": [
|
|
283
|
+
# { "name": "Find a product", "description": "Browse to any item" },
|
|
284
|
+
# { "name": "Add to cart" },
|
|
285
|
+
# { "name": "Complete checkout" }
|
|
286
|
+
# ] }
|
|
287
|
+
# ]
|
|
288
|
+
ish study create --name "Checkout" --modality interactive \\
|
|
289
|
+
--url https://shop.example.com \\
|
|
290
|
+
--assignments-file ./assignments.json
|
|
291
|
+
ish study use s-…
|
|
292
|
+
ish study run --all --wait
|
|
293
|
+
|
|
294
|
+
# After the run, each step gets a pass-rate rollup:
|
|
295
|
+
ish study get s-… # human: "✓ Add to cart 4/5 (80%)" per step
|
|
296
|
+
ish study get s-… --json --verbose # step_completion[] incl. sample_failures[].participant_id
|
|
297
|
+
\`\`\`
|
|
298
|
+
|
|
269
299
|
## 2. Quick A/B ask with image variants
|
|
270
300
|
|
|
271
301
|
Goal: ship 30 simulated reactions to two hero images, with a "which do
|
|
@@ -289,7 +319,7 @@ adds an \`aggregates\` field per round with \`picks\`, \`ratings\` (mean
|
|
|
289
319
|
+ n per variant), and a \`winner\`. See \`ish docs get-page
|
|
290
320
|
reference/json-mode\` for the full shape.
|
|
291
321
|
|
|
292
|
-
Add a follow-up round with no
|
|
322
|
+
Add a follow-up round with no participant change:
|
|
293
323
|
|
|
294
324
|
\`\`\`bash
|
|
295
325
|
ish ask run --prompt "Which one would you click on?" \\
|
|
@@ -300,29 +330,41 @@ ish ask run --prompt "Which one would you click on?" \\
|
|
|
300
330
|
|
|
301
331
|
## 3. Generate profiles from a real source
|
|
302
332
|
|
|
303
|
-
Goal: turn a customer interview transcript into a 4-
|
|
333
|
+
Goal: turn a customer interview transcript into a 4-person group.
|
|
334
|
+
|
|
335
|
+
\`person generate\` is an async agentic job: it reads your brief and any
|
|
336
|
+
uploaded sources (transcripts, emails, PDFs, audio, images) describing how
|
|
337
|
+
real people reacted, then produces profiles PLUS scenarios grounded in those
|
|
338
|
+
reactions. It enqueues, polls ~30-60s, then prints the profiles (with
|
|
339
|
+
scenarios attached unless \`--no-scenarios\`). \`--json\` returns
|
|
340
|
+
\`{job: {person_ids}, profiles: [...]}\`.
|
|
304
341
|
|
|
305
342
|
\`\`\`bash
|
|
306
343
|
# Inline — auto-uploads the file:
|
|
307
|
-
ish
|
|
344
|
+
ish person generate --source ./interviews/sarah.txt --count 4
|
|
345
|
+
|
|
346
|
+
# The per-source note is the researcher's: how the person reacted to THAT file.
|
|
347
|
+
ish source upload ./proposal.eml --description "called this proposal lazy and vague"
|
|
348
|
+
# → ps-3a4 (status: processed)
|
|
349
|
+
ish person generate --description "Skeptical enterprise buyer" --source ps-3a4 --count 1 --json
|
|
308
350
|
|
|
309
351
|
# Or upload once and reuse the source alias:
|
|
310
352
|
ish source upload ./call.mp3 --diarize
|
|
311
|
-
# →
|
|
312
|
-
ish
|
|
353
|
+
# → ps-3a4 (status: processed)
|
|
354
|
+
ish person generate --source ps-3a4 --propose-count
|
|
313
355
|
# → { proposed_count: 4, rationale: "..." }
|
|
314
|
-
ish
|
|
356
|
+
ish person generate --source ps-3a4 --count 4
|
|
315
357
|
\`\`\`
|
|
316
358
|
|
|
317
|
-
## 4. Build a specific simulated
|
|
359
|
+
## 4. Build a specific simulated person from notes
|
|
318
360
|
|
|
319
361
|
Goal: rebuild one named persona (a real prospect, a stakeholder for
|
|
320
362
|
a pitch rehearsal) via the iterative probe loop — distinct from
|
|
321
|
-
\`
|
|
363
|
+
\`person generate\`, which is for groups.
|
|
322
364
|
|
|
323
365
|
\`\`\`bash
|
|
324
366
|
# 1. Suggest 5 probes from a context blob
|
|
325
|
-
ish
|
|
367
|
+
ish person suggest-scenarios \\
|
|
326
368
|
--context "Staff platform engineer at a Stripe-using fintech. \\
|
|
327
369
|
Owns oncall for the payments edge. Burned by a Black Friday \\
|
|
328
370
|
outage last year." \\
|
|
@@ -333,33 +375,33 @@ ish profile suggest-scenarios \\
|
|
|
333
375
|
# [{"text":"...","source":"situation","scenario_prompt":"..."}, ...]
|
|
334
376
|
# Valid source values: situation, voice, binary, micro-story
|
|
335
377
|
|
|
336
|
-
# 3. Save the
|
|
337
|
-
ish
|
|
338
|
-
# →
|
|
378
|
+
# 3. Save the person shell
|
|
379
|
+
ish person create --file ./persona.json
|
|
380
|
+
# → p-d4e
|
|
339
381
|
|
|
340
382
|
# 4. Persist the answers as structured evidence
|
|
341
|
-
ish
|
|
383
|
+
ish person evidence add p-d4e --traces-file ./answers.json
|
|
342
384
|
|
|
343
385
|
# 5. Read back what's saved (also useful before the next probe round)
|
|
344
|
-
ish
|
|
386
|
+
ish person evidence list p-d4e
|
|
345
387
|
\`\`\`
|
|
346
388
|
|
|
347
389
|
To iterate, feed prior prompts/answers back in so the LLM doesn't
|
|
348
390
|
paraphrase what you already asked:
|
|
349
391
|
|
|
350
392
|
\`\`\`bash
|
|
351
|
-
ish
|
|
393
|
+
ish person suggest-scenarios \\
|
|
352
394
|
--context-file ./notes.md --count 3 \\
|
|
353
395
|
--already-surfaced '["PagerDuty fires at 02:00."]' \\
|
|
354
396
|
--previous-answers @./answers.json
|
|
355
397
|
\`\`\`
|
|
356
398
|
|
|
357
|
-
See \`ish docs get-page guides/build-specific-
|
|
399
|
+
See \`ish docs get-page guides/build-specific-person\` for the full
|
|
358
400
|
walkthrough including the four probe-type shapes.
|
|
359
401
|
|
|
360
402
|
## 5. Target a gated URL (Vercel preview / staging gate / login form)
|
|
361
403
|
|
|
362
|
-
Configure credentials once on the workspace;
|
|
404
|
+
Configure credentials once on the workspace; participants reuse them.
|
|
363
405
|
|
|
364
406
|
\`\`\`bash
|
|
365
407
|
# Show what's configured:
|
|
@@ -371,7 +413,7 @@ ish workspace site-access basic-auth --username alice --password hunter2
|
|
|
371
413
|
# Session cookie (Vercel preview, Lovable, etc.):
|
|
372
414
|
ish workspace site-access cookie --name session --value abc123
|
|
373
415
|
|
|
374
|
-
# Login form (typed by the
|
|
416
|
+
# Login form (typed by the participant into the page):
|
|
375
417
|
ish workspace site-access login --username demo --password demo
|
|
376
418
|
\`\`\`
|
|
377
419
|
|
|
@@ -383,28 +425,28 @@ printf %s "$STAGING_PW" | ish workspace site-access basic-auth \\
|
|
|
383
425
|
--username alice --password -
|
|
384
426
|
\`\`\`
|
|
385
427
|
|
|
386
|
-
## 6. Re-run a study with a fresh
|
|
428
|
+
## 6. Re-run a study with a fresh group
|
|
387
429
|
|
|
388
|
-
Goal: same study, same iteration, but compare
|
|
430
|
+
Goal: same study, same iteration, but compare groups.
|
|
389
431
|
|
|
390
432
|
\`\`\`bash
|
|
391
433
|
# First run — Swedish 35-50:
|
|
392
434
|
ish study run --country SE --min-age 35 --max-age 50 --sample 5 --wait
|
|
393
435
|
|
|
394
|
-
# Second run — every female
|
|
436
|
+
# Second run — every female person in the workspace, same iteration:
|
|
395
437
|
ish study run --gender female --all --wait
|
|
396
438
|
|
|
397
|
-
# Free-text filters: --search matches the
|
|
398
|
-
# matches the
|
|
439
|
+
# Free-text filters: --search matches the person **name**, --bio
|
|
440
|
+
# matches the person **bio**, --occupation matches the person
|
|
399
441
|
# **occupation** (repeatable, OR-joined). All are case-insensitive
|
|
400
|
-
# substrings — the same flag set works on \`ish
|
|
401
|
-
# \`ish ask run\`, \`ish ask add-
|
|
442
|
+
# substrings — the same flag set works on \`ish person list\`,
|
|
443
|
+
# \`ish ask run\`, \`ish ask add-people\`, and \`ish ask create\`.
|
|
402
444
|
ish study run --bio "screen reader" --all --wait
|
|
403
445
|
ish study run --occupation founder --occupation designer --sample 6 --wait
|
|
404
446
|
\`\`\`
|
|
405
447
|
|
|
406
|
-
If you don't pass any
|
|
407
|
-
iteration's existing
|
|
448
|
+
If you don't pass any people flags, \`ish study run\` reuses the
|
|
449
|
+
iteration's existing participants — useful for re-running after fixing the
|
|
408
450
|
target page.
|
|
409
451
|
|
|
410
452
|
## 7. Localhost target (dev environment)
|
|
@@ -438,9 +480,9 @@ ish iteration create --url "$URL"
|
|
|
438
480
|
The chat modality has **two modes**, picked by
|
|
439
481
|
\`iteration.details.mode_details.mode\`:
|
|
440
482
|
|
|
441
|
-
- **\`external_chatbot\`** —
|
|
483
|
+
- **\`external_chatbot\`** — participants probe a customer chatbot endpoint
|
|
442
484
|
(the original chat behaviour). Audience size is set on \`study run\`.
|
|
443
|
-
- **\`
|
|
485
|
+
- **\`participant_pair\`** — two AI people converse with each
|
|
444
486
|
other. Each side has its own scenario + goal; the other side does
|
|
445
487
|
not see it (asymmetry contract). Audiences are pinned to the
|
|
446
488
|
iteration: equal counts zip 1:1 by index, or one side of 1
|
|
@@ -488,12 +530,12 @@ ish chat endpoint get "$ID" --verbose \\
|
|
|
488
530
|
| ish chat endpoint update "$ID" --endpoint-config -
|
|
489
531
|
|
|
490
532
|
# 4. Run a chat-modality study referencing the endpoint. Audience size
|
|
491
|
-
# is set on study run, not study create (--sample, --all, --
|
|
533
|
+
# is set on study run, not study create (--sample, --all, --person).
|
|
492
534
|
STUDY=$(ish study create --modality chat --endpoint "$ID" \\
|
|
493
535
|
--name "Sign-up Q1" --assignment "Sign up:Try to sign up" \\
|
|
494
536
|
| jq -r .id)
|
|
495
537
|
ish study run --study "$STUDY" --sample 5 --wait
|
|
496
|
-
ish study results "$STUDY" --json | jq '.
|
|
538
|
+
ish study results "$STUDY" --json | jq '.participants'
|
|
497
539
|
\`\`\`
|
|
498
540
|
|
|
499
541
|
For stateful bots, thread \`conversation_id\` across single-turn
|
|
@@ -528,21 +570,21 @@ into \`update --endpoint-config -\`. Field-shorthand flags
|
|
|
528
570
|
without round-tripping.
|
|
529
571
|
|
|
530
572
|
Failed chat workers surface their error in
|
|
531
|
-
\`study results --json\` under \`
|
|
573
|
+
\`study results --json\` under \`participants[].error_message\` and
|
|
532
574
|
also in \`study poll --json\`. Branch on it instead of treating
|
|
533
575
|
\`interaction_count: 0\` as a generic failure.
|
|
534
576
|
|
|
535
577
|
Pre-flight tip: \`ish workspace info\` exposes
|
|
536
|
-
\`{studies_used, studies_max,
|
|
578
|
+
\`{studies_used, studies_max, participants_used, participants_max, tier}\` so
|
|
537
579
|
you can branch on plan caps before \`study create\` returns
|
|
538
580
|
\`error_code: usage_limit_reached\`.
|
|
539
581
|
|
|
540
582
|
The full reference is at \`ish docs get-page guides/chat\`,
|
|
541
583
|
secrets are at \`ish docs get-page concepts/secret\`.
|
|
542
584
|
|
|
543
|
-
### 7b.
|
|
585
|
+
### 7b. participant_pair — rehearse a two-AI conversation
|
|
544
586
|
|
|
545
|
-
Goal: pit two AI
|
|
587
|
+
Goal: pit two AI people against each other to see how a
|
|
546
588
|
two-role conversation unfolds — a sales rep vs. a skeptical CTO, a
|
|
547
589
|
founder vs. an investor archetype, a manager vs. a direct report
|
|
548
590
|
ahead of a difficult conversation. Each side has its own scenario
|
|
@@ -552,10 +594,10 @@ what makes the rehearsal credible).
|
|
|
552
594
|
One-shot study + iteration:
|
|
553
595
|
|
|
554
596
|
\`\`\`bash
|
|
555
|
-
ish study create --modality chat --chat-mode
|
|
597
|
+
ish study create --modality chat --chat-mode participant_pair \\
|
|
556
598
|
--name "Pitch rehearsal" \\
|
|
557
|
-
--
|
|
558
|
-
--
|
|
599
|
+
--group-a p-sales-1,p-sales-2 \\
|
|
600
|
+
--group-b p-cto-skeptic-1,p-cto-skeptic-2 \\
|
|
559
601
|
--scenario-a "You are a senior sales rep pitching ish to a new prospect." \\
|
|
560
602
|
--scenario-b "You are a skeptical CTO; surface risks before agreeing to a pilot." \\
|
|
561
603
|
--assignment "Pitch:Try to land a pilot"
|
|
@@ -566,50 +608,50 @@ ish study run -y
|
|
|
566
608
|
Or add a pair iteration to an existing chat study:
|
|
567
609
|
|
|
568
610
|
\`\`\`bash
|
|
569
|
-
ish iteration create --study s-... --chat-mode
|
|
570
|
-
--
|
|
611
|
+
ish iteration create --study s-... --chat-mode participant_pair \\
|
|
612
|
+
--group-a p-a1,p-a2 --group-b p-b1,p-b2 \\
|
|
571
613
|
--scenario-a @./scenario_a.md --scenario-b @./scenario_b.md \\
|
|
572
614
|
--max-turns 14
|
|
573
615
|
\`\`\`
|
|
574
616
|
|
|
575
617
|
Rules to remember:
|
|
576
|
-
- Each side needs **either** \`--
|
|
618
|
+
- Each side needs **either** \`--person-*\` (explicit IDs) **or**
|
|
577
619
|
\`--role-criteria-*\` (a filter the backend resolves). They can also
|
|
578
620
|
be combined — criteria then validates the explicit list.
|
|
579
|
-
- When **both sides** use explicit \`--
|
|
580
|
-
must be the same length (≥ 1). Pairs run 1:1 by index. Same
|
|
621
|
+
- When **both sides** use explicit \`--group-a\` / \`--group-b\`, they
|
|
622
|
+
must be the same length (≥ 1). Pairs run 1:1 by index. Same person
|
|
581
623
|
on both sides is allowed (self-talk rehearsal).
|
|
582
|
-
- **1×N broadcast**: pass exactly one
|
|
624
|
+
- **1×N broadcast**: pass exactly one person on one side and N on
|
|
583
625
|
the other to rehearse one fixed side against N variations. The CLI
|
|
584
626
|
auto-broadcasts the singleton to match. E.g.
|
|
585
|
-
\`--
|
|
627
|
+
\`--group-a p-rep --group-b p-cto1,p-cto2,p-cto3\` → 3
|
|
586
628
|
conversations, same rep, three different CTOs. Stderr notice fires
|
|
587
629
|
when broadcasting kicks in.
|
|
588
630
|
- Both \`--scenario-a\` and \`--scenario-b\` are required and asymmetric.
|
|
589
631
|
Use \`@./file.md\` to read from disk.
|
|
590
632
|
- \`--initiator-side\` (\`a\` default) picks who speaks first.
|
|
591
|
-
- \`--chat-mode\` accepts both \`
|
|
633
|
+
- \`--chat-mode\` accepts both \`participant_pair\` and \`participant-pair\`.
|
|
592
634
|
The same hyphen/underscore tolerance applies to \`--screen-format\`,
|
|
593
635
|
\`--kind\` on \`source upload\`, and the question \`type\` field in
|
|
594
636
|
\`--questionnaire\` / \`--questions\` manifests.
|
|
595
637
|
- Audiences are **authoritative on the iteration**.
|
|
596
|
-
\`ish study run\` refuses \`--
|
|
638
|
+
\`ish study run\` refuses \`--person\` / \`--sample\` / \`--all\` /
|
|
597
639
|
demographic filters on a pair iteration with a clear error. To
|
|
598
|
-
change
|
|
640
|
+
change groups, update the iteration via
|
|
599
641
|
\`ish iteration update <id> --details-json '{...}'\`.
|
|
600
642
|
- \`--max-turns\` / \`--early-termination\` on \`study run\` override the
|
|
601
643
|
iteration's saved values for that single dispatch (they don't
|
|
602
644
|
persist back to the iteration).
|
|
603
645
|
- Dispatch is per-Conversation (one task per pair). Per-Conversation
|
|
604
646
|
summaries (\`end_reason\`, \`dominant_dynamic\`, \`who_steered\`) land on
|
|
605
|
-
\`iteration.conversations[]\`. Per-
|
|
606
|
-
\`
|
|
647
|
+
\`iteration.conversations[]\`. Per-participant summaries land on
|
|
648
|
+
\`participant.summary\` as before.
|
|
607
649
|
|
|
608
|
-
### Filtering
|
|
650
|
+
### Filtering groups with role criteria (persona-first)
|
|
609
651
|
|
|
610
652
|
\`--role-criteria-a\` / \`--role-criteria-b\` accept a JSON object (or
|
|
611
653
|
\`@./file.json\`) describing who's eligible for that side. The
|
|
612
|
-
backend resolves the matching
|
|
654
|
+
backend resolves the matching person pool and persists the
|
|
613
655
|
IDs on the iteration. Keys (all optional):
|
|
614
656
|
|
|
615
657
|
\`\`\`json
|
|
@@ -632,8 +674,8 @@ IDs on the iteration. Keys (all optional):
|
|
|
632
674
|
\`\`\`
|
|
633
675
|
|
|
634
676
|
The five \`*_in\` arrays accept snake_case spec values verbatim
|
|
635
|
-
(see \`https://ishlabs.io/spec/
|
|
636
|
-
accessibility filters are coarse booleans over each
|
|
677
|
+
(see \`https://ishlabs.io/spec/person-enums.v1.json\`). The five
|
|
678
|
+
accessibility filters are coarse booleans over each participant's
|
|
637
679
|
\`accessibility_profile\` JSONB.
|
|
638
680
|
|
|
639
681
|
MECE rules for the list filters:
|
|
@@ -641,13 +683,13 @@ MECE rules for the list filters:
|
|
|
641
683
|
children; \`couple_no_kids\` is strictly child-free. \`single\` means
|
|
642
684
|
lives alone with no partner, roommates, parents, or children
|
|
643
685
|
sharing the household.
|
|
644
|
-
- \`employment_status_in\`: pick the
|
|
686
|
+
- \`employment_status_in\`: pick the participant's primary daytime
|
|
645
687
|
activity. A student who works 15 hrs/week is \`student\`; a retiree
|
|
646
688
|
who freelances is \`retired\`.
|
|
647
689
|
|
|
648
|
-
The **persona-first** principle: the
|
|
690
|
+
The **persona-first** principle: the participant's persona is sacred and
|
|
649
691
|
the LLM prompt construction does not change. Criteria filter the
|
|
650
|
-
*eligible pool* upstream so that by the time a
|
|
692
|
+
*eligible pool* upstream so that by the time a participant reaches the
|
|
651
693
|
prompt, their persona is already plausible for the role described
|
|
652
694
|
in \`scenario_*\`. Don't cram demographic constraints into the
|
|
653
695
|
scenario text — that breaks the asymmetry contract and produces
|
|
@@ -658,7 +700,7 @@ pick who plays the role.
|
|
|
658
700
|
If the resolved pool is smaller than the requested count for a side,
|
|
659
701
|
\`ish study run\` exits 2 with the backend's pool-too-small error
|
|
660
702
|
intact. Broaden the criteria, generate more profiles
|
|
661
|
-
(\`ish
|
|
703
|
+
(\`ish person generate\`), or fall back to explicit \`--person-*\`.
|
|
662
704
|
|
|
663
705
|
### Rehearsing against N variations of one side (1×N)
|
|
664
706
|
|
|
@@ -667,11 +709,11 @@ The most common rehearsal shape: fix one side, vary the other.
|
|
|
667
709
|
|
|
668
710
|
\`\`\`bash
|
|
669
711
|
# 1. Generate N distinct profiles for the varying side (or pick
|
|
670
|
-
# existing ones via \`ish
|
|
671
|
-
ish
|
|
712
|
+
# existing ones via \`ish person list\`).
|
|
713
|
+
ish person generate \\
|
|
672
714
|
--description "Skeptical CTO at a Series B SaaS startup" \\
|
|
673
715
|
--count 3 --json | jq -r '.items[].alias'
|
|
674
|
-
# →
|
|
716
|
+
# → p-cto1, p-cto2, p-cto3
|
|
675
717
|
|
|
676
718
|
# 2. Write the two scenarios as separate files. Each is a system
|
|
677
719
|
# prompt for ONE role; the partner never sees it. Cover voice,
|
|
@@ -682,15 +724,15 @@ ish profile generate \\
|
|
|
682
724
|
# ./sales_rep.md — the user's pitch + goals
|
|
683
725
|
# ./skeptical_cto.md — CTO's posture + concerns
|
|
684
726
|
|
|
685
|
-
# 3. Create the iteration with ONE
|
|
727
|
+
# 3. Create the iteration with ONE person on the fixed side and
|
|
686
728
|
# N on the varying side. CLI auto-broadcasts the singleton and
|
|
687
|
-
# prints a stderr notice ("Broadcasting --
|
|
729
|
+
# prints a stderr notice ("Broadcasting --group-a (1 person)
|
|
688
730
|
# to length 3…") so you see the expansion.
|
|
689
731
|
ish study create \\
|
|
690
|
-
--modality chat --chat-mode
|
|
732
|
+
--modality chat --chat-mode participant_pair \\
|
|
691
733
|
--name "Pitch rehearsal — 3 CTO variants" \\
|
|
692
|
-
--
|
|
693
|
-
--
|
|
734
|
+
--group-a p-rep \\
|
|
735
|
+
--group-b p-cto1,p-cto2,p-cto3 \\
|
|
694
736
|
--scenario-a @./sales_rep.md \\
|
|
695
737
|
--scenario-b @./skeptical_cto.md \\
|
|
696
738
|
--assignment "Pitch:Land a pilot or a clear next step"
|
|
@@ -705,11 +747,11 @@ ish iteration get <iter-id> --json \\
|
|
|
705
747
|
\`\`\`
|
|
706
748
|
|
|
707
749
|
The CLI emits a stderr notice when it broadcasts ("Broadcasting
|
|
708
|
-
--
|
|
750
|
+
--group-a (1 person) to length 3…") so you can see the
|
|
709
751
|
expansion happen.
|
|
710
752
|
|
|
711
753
|
**Criteria alternative**: \`--role-criteria-b '{"occupation":["cto"]}'\`
|
|
712
|
-
on a single \`--
|
|
754
|
+
on a single \`--group-a p-rep\` lets the backend pick the CTOs.
|
|
713
755
|
Less control over distinctness — for guaranteed variety, generate
|
|
714
756
|
explicit profiles first.
|
|
715
757
|
|
|
@@ -746,18 +788,18 @@ Inspect after running:
|
|
|
746
788
|
\`\`\`bash
|
|
747
789
|
ish iteration get <iter-id> --json \\
|
|
748
790
|
| jq '.details.mode_details.mode, .conversations[]'
|
|
749
|
-
ish study results <study-id> --transcript <
|
|
791
|
+
ish study results <study-id> --transcript <participant-id> --json
|
|
750
792
|
\`\`\`
|
|
751
793
|
|
|
752
794
|
## 9. Stage an ask for human review, then dispatch
|
|
753
795
|
|
|
754
796
|
Goal: prepare a billable A/B but let the user inspect and approve the
|
|
755
|
-
|
|
797
|
+
people + prompt before any credits are spent. Two-step flow with a
|
|
756
798
|
DRAFT status in between.
|
|
757
799
|
|
|
758
800
|
\`\`\`bash
|
|
759
801
|
# 1. Stage. No worker enqueued, no bill. Audience flags are still
|
|
760
|
-
# required —
|
|
802
|
+
# required — participants materialize at create time.
|
|
761
803
|
ASK=$(ish ask create --name "tagline AB" \\
|
|
762
804
|
--prompt "Which sounds better?" \\
|
|
763
805
|
--variant text:"Short and punchy." \\
|
|
@@ -768,7 +810,7 @@ ASK=$(ish ask create --name "tagline AB" \\
|
|
|
768
810
|
|
|
769
811
|
# Hand the alias back to the user. They can inspect it:
|
|
770
812
|
# ish ask get "$ASK" # status: draft
|
|
771
|
-
# ish ask get "$ASK" --json | jq '.
|
|
813
|
+
# ish ask get "$ASK" --json | jq '.participants | length'
|
|
772
814
|
|
|
773
815
|
# 2. Dispatch once approved (BILLABLE). Idempotent: a non-DRAFT ask
|
|
774
816
|
# returns 409 mapped to exit 2, so re-running is safe.
|
|
@@ -812,9 +854,9 @@ The mental rule: **\`--get\` is for capture, bare commands / \`--human\`
|
|
|
812
854
|
are for display, \`--json\` is for chaining (multiple fields at once).**
|
|
813
855
|
If you find yourself reaching for \`jq -r .x\`, you wanted \`--get x\`.
|
|
814
856
|
|
|
815
|
-
## 11. Extend a
|
|
857
|
+
## 11. Extend a participant past its step cap (or redirect mid-run)
|
|
816
858
|
|
|
817
|
-
Goal: a
|
|
859
|
+
Goal: a participant hit the \`--max-interactions\` cap before finishing, or
|
|
818
860
|
veered off into the wrong flow. Resume it with more steps and an
|
|
819
861
|
optional mid-run instruction — without re-running the whole cohort.
|
|
820
862
|
|
|
@@ -822,13 +864,13 @@ optional mid-run instruction — without re-running the whole cohort.
|
|
|
822
864
|
# 1. Source run with a small cap to feel the limit:
|
|
823
865
|
ish study run --sample 1 --max-interactions 5 --wait
|
|
824
866
|
SRC=$(ish study run --sample 1 --max-interactions 5 --wait \\
|
|
825
|
-
--get
|
|
867
|
+
--get participant_aliases | head -1)
|
|
826
868
|
|
|
827
869
|
# 2. Inspect what stopped (optional, useful for the LLM to choose
|
|
828
870
|
# a redirect instruction):
|
|
829
|
-
ish study
|
|
871
|
+
ish study participant "$SRC" --summary
|
|
830
872
|
|
|
831
|
-
# 3a. Add 15 more steps, no new instruction — let the
|
|
873
|
+
# 3a. Add 15 more steps, no new instruction — let the participant continue:
|
|
832
874
|
ish study extend "$SRC" --add-steps 15 --wait --timeout 600
|
|
833
875
|
|
|
834
876
|
# 3b. OR redirect with a mid-run instruction (captured as user_message;
|
|
@@ -837,20 +879,20 @@ ish study extend "$SRC" \\
|
|
|
837
879
|
--instruction "Stop browsing the blog. Open the pricing page and try to upgrade to Pro." \\
|
|
838
880
|
--add-steps 10 --wait
|
|
839
881
|
|
|
840
|
-
# 4. Capture the new
|
|
841
|
-
NEW=$(ish study extend "$SRC" --add-steps 10 --get
|
|
842
|
-
ish study
|
|
882
|
+
# 4. Capture the new participant alias to chain into results:
|
|
883
|
+
NEW=$(ish study extend "$SRC" --add-steps 10 --get participant_alias)
|
|
884
|
+
ish study participant "$NEW" --summary
|
|
843
885
|
\`\`\`
|
|
844
886
|
|
|
845
887
|
Rules to remember:
|
|
846
|
-
- Source
|
|
888
|
+
- Source participant must be **terminal** (\`completed\` / \`failed\` /
|
|
847
889
|
\`cancelled\`). If it's still running, \`ish study cancel <src>\` first.
|
|
848
890
|
\`cancel\` is non-destructive — every interaction, screenshot, and
|
|
849
891
|
questionnaire answer survives. \`cancel\` + \`extend\` form a
|
|
850
892
|
reversible stop/start pair.
|
|
851
|
-
- A **new**
|
|
893
|
+
- A **new** participant id is created under the same iteration (the backend
|
|
852
894
|
branches from the source's last interaction). The source row is left
|
|
853
|
-
untouched. Get the new id from \`.
|
|
895
|
+
untouched. Get the new id from \`.participant_id\` / \`.participant_alias\` on
|
|
854
896
|
\`--json\`.
|
|
855
897
|
- \`--add-steps\` is **only** the extra budget; it does NOT include the
|
|
856
898
|
source's original cap. Credits debit per
|
|
@@ -869,54 +911,56 @@ mental model (cancel + extend as a pair, error envelopes, cost model).
|
|
|
869
911
|
## Tips for chaining commands as an agent
|
|
870
912
|
|
|
871
913
|
- Capture aliases from JSON: \`ITER=$(ish iteration create --url … --json | jq -r .alias)\`
|
|
872
|
-
- After \`ish study run --json\`, the
|
|
873
|
-
\`.
|
|
874
|
-
\`ish study poll/wait/cancel <
|
|
914
|
+
- After \`ish study run --json\`, the participants you just dispatched are at
|
|
915
|
+
\`.participant_aliases[]\` (and \`.participant_ids[]\` for UUIDs). Pass these to
|
|
916
|
+
\`ish study poll/wait/cancel <participant_id>\`. The \`simulations[]\` array
|
|
875
917
|
is collapsed to one batch entry per study with nested
|
|
876
|
-
\`
|
|
918
|
+
\`participant_ids[]\` / \`participant_aliases[]\` / \`job_ids[]\` so an N-sample
|
|
877
919
|
batch is a single row, not N near-duplicate rows.
|
|
878
920
|
- \`ish study poll\` honors the active study set by \`ish study use\` —
|
|
879
921
|
pass no \`--study\` flag and it polls the active study (parity with
|
|
880
922
|
\`study results\` / \`study wait\` / \`study run\`).
|
|
881
923
|
- \`ish study results --json\` includes per-answer \`sentiment\` (the
|
|
882
|
-
|
|
924
|
+
participant's session-level sentiment label) on every \`interview_answers[]
|
|
883
925
|
.answers[]\` row, plus \`sentiment\` + \`comment\` on every
|
|
884
|
-
\`
|
|
926
|
+
\`participants[]\` row. No need to fetch \`study participant <id>\` per row.
|
|
885
927
|
- \`ish study results --summary --json\` drops the interview_answers
|
|
886
|
-
payload and gives you counts + sentiment + per-
|
|
928
|
+
payload and gives you counts + sentiment + per-participant
|
|
887
929
|
{alias, status, sentiment, comment}. The cheapest "did this run land?"
|
|
888
930
|
shape.
|
|
889
|
-
- \`ish study results --transcript <
|
|
931
|
+
- \`ish study results --transcript <participant_id> --json\` is the
|
|
890
932
|
chat-modality projection — **external_chatbot mode only**. Returns
|
|
891
933
|
a flat \`transcript[]\` of {role, text, turn_index, action_type?,
|
|
892
934
|
option_label?, sentiment?, failure?} with a \`unique_bot_replies\`
|
|
893
935
|
count (1 on a multi-turn run = the M2 loop signature). Same shape
|
|
894
|
-
as the MCP \`get_chat_transcript\` tool. For
|
|
936
|
+
as the MCP \`get_chat_transcript\` tool. For participant_pair
|
|
895
937
|
conversations, fetch \`.conversations[]\` from
|
|
896
|
-
\`ish iteration get <iter-id> --json\` instead — bot/
|
|
897
|
-
don't apply when both speakers are
|
|
938
|
+
\`ish iteration get <iter-id> --json\` instead — bot/participant roles
|
|
939
|
+
don't apply when both speakers are participants.
|
|
898
940
|
- \`ish study run --json\` on a pair iteration includes a
|
|
899
|
-
\`pair_preview\` block (
|
|
941
|
+
\`pair_preview\` block (group sizes, conversation count,
|
|
900
942
|
initiator side, scenario previews) so agents can confirm what
|
|
901
943
|
they just dispatched without a follow-up \`iteration get\`.
|
|
902
|
-
- \`ish study
|
|
903
|
-
and returns just {
|
|
944
|
+
- \`ish study participant <id> --summary --json\` drops the action timeline
|
|
945
|
+
and returns just {participant, sentiment, comment, error_message}.
|
|
904
946
|
- \`ish ask results --json\` keeps \`variant_pick_id\` on every
|
|
905
947
|
response without needing \`--verbose\` — it's the load-bearing field
|
|
906
948
|
for "who picked what". Same logic on \`ask get\`.
|
|
907
|
-
- \`ish iteration get --json\`
|
|
949
|
+
- \`ish iteration get --json\` participants carry \`alias\` + \`name\` (M12
|
|
908
950
|
parity with \`study results --json\`).
|
|
909
951
|
- Use \`--fields\` to keep JSON tight: \`ish study list --fields alias,name,status\`
|
|
910
952
|
- Always pass \`--wait\` (or \`ish study wait\`) before reading
|
|
911
953
|
\`ish study results\` — without it you may read partial data.
|
|
912
|
-
- For \`ask\` write-paths (update/archive/wait/add-questions/add-
|
|
954
|
+
- For \`ask\` write-paths (update/archive/wait/add-questions/add-people),
|
|
913
955
|
default JSON is compact (changed fields + alias). Pass \`--verbose\` for
|
|
914
956
|
the full Ask payload.
|
|
915
|
-
-
|
|
916
|
-
|
|
957
|
+
- \`person generate --json\` returns \`{job: {id, status, person_ids},
|
|
958
|
+
profiles: [...]}\`; each person is the lean person shape with its
|
|
959
|
+
evidence-grounded \`scenarios\` attached (\`--no-scenarios\` to omit,
|
|
960
|
+
\`--verbose\` for the full record incl. \`simulation_config\`).
|
|
917
961
|
- On \`error_code: "usage_limit_reached"\` (HTTP 403), don't retry —
|
|
918
962
|
read \`tier\`, \`limit\`, \`current\`, \`max\`, and \`upgrade_url\` from
|
|
919
|
-
the JSON body to construct a recovery message. \`
|
|
963
|
+
the JSON body to construct a recovery message. \`person generate\` /
|
|
920
964
|
\`study generate\` refuse the entire batch when the post-generation
|
|
921
965
|
count would exceed the cap; re-issue with a smaller \`--count\`.
|
|
922
966
|
- Every verb's \`--help\` ends with a "Tips:" footer naming \`--get\`
|
|
@@ -925,12 +969,12 @@ mental model (cancel + extend as a pair, error envelopes, cost model).
|
|
|
925
969
|
- \`ish study run --wait\` returns \`error_code: "wait_timeout"\`
|
|
926
970
|
on wait expiry (exit 5, retryable) — distinct from network /
|
|
927
971
|
server timeouts. The envelope carries \`progress\` so you can
|
|
928
|
-
resume by polling the listed
|
|
929
|
-
Same envelope on \`ish study wait\` and per-
|
|
972
|
+
resume by polling the listed participants instead of re-dispatching.
|
|
973
|
+
Same envelope on \`ish study wait\` and per-participant \`study wait\`.
|
|
930
974
|
- \`ish study run\` accepts \`--dispatch-timeout <s>\` (default 120)
|
|
931
975
|
for the per-POST budget. On dispatch failure the error envelope
|
|
932
976
|
includes \`seeded_but_not_dispatched_ids[]\` /
|
|
933
|
-
\`seeded_but_not_dispatched_aliases[]\` —
|
|
977
|
+
\`seeded_but_not_dispatched_aliases[]\` — participants exist
|
|
934
978
|
server-side; resume by polling them, don't re-run \`study run\`.
|
|
935
979
|
- \`ish ask run --new\` is non-idempotent and marked
|
|
936
980
|
\`retryable: false\` on any failure. If you do see one, run
|
|
@@ -946,20 +990,20 @@ mental model (cancel + extend as a pair, error envelopes, cost model).
|
|
|
946
990
|
| You want to… | Don't | Do |
|
|
947
991
|
|-------------------------------------------|----------------------------------------|--------------------------------------------------------------------|
|
|
948
992
|
| Capture a single value (alias, id, …) | \`--json \\| jq -r .alias\` | \`--get alias\` |
|
|
949
|
-
| Capture a nested value | \`--json \\| jq -r .
|
|
993
|
+
| Capture a nested value | \`--json \\| jq -r .person.name\` | \`--get person.name\` |
|
|
950
994
|
| Capture every alias from a list | \`--json \\| jq -r '.items[].alias'\` | \`--get alias\` (auto-descends into \`items\`, one per line) |
|
|
951
995
|
| Force human output through tee/redirect | none, output silently became JSON | \`--human\` |
|
|
952
|
-
| Look up 2-3 specific profiles | \`
|
|
996
|
+
| Look up 2-3 specific profiles | \`person list --json \\| jq '.items[] \\| select(...)'\` | \`ish person get p-1b9 p-fc1 p-2fc\` |
|
|
953
997
|
| Show only some fields | \`--json \\| jq '{alias, name, country}'\` | \`--fields alias,name,country\` |
|
|
954
|
-
| Count
|
|
998
|
+
| Count participants on an ask | \`--json \\| jq '.participants \\| length'\` | \`ish ask get a-… --fields alias,participants_count\` |
|
|
955
999
|
| Count responses on a round | \`--json \\| jq '.rounds[0].responses \\| length'\` | \`ish ask get a-… --fields alias,rounds,responses_complete,responses_total\` |
|
|
956
1000
|
| Pick the A/B winner | \`--json \\| jq '.rounds[0].responses…'\` | \`ish ask results a-… --json\` then read \`.rounds[].aggregates.winner\` |
|
|
957
|
-
| List of
|
|
958
|
-
| Per-answer sentiment | \`--json \\| jq '...'\` per
|
|
1001
|
+
| List of participants from \`study run\` | \`--json \\| jq '.participants[].id'\` | \`--get participant_aliases\` (or \`participant_ids\` for UUIDs) |
|
|
1002
|
+
| Per-answer sentiment | \`--json \\| jq '...'\` per participant | \`ish study results <id> --json\` (sentiment is on every answer row) |
|
|
959
1003
|
| "Did this run land?" headline | \`study results --json\` + jq filtering | \`ish study results <id> --summary --json\` |
|
|
960
|
-
| Chat transcript for one
|
|
961
|
-
| Pair-mode conversation transcripts | \`study
|
|
962
|
-
|
|
|
1004
|
+
| Chat transcript for one participant (external_chatbot) | \`study participant --json\` + jq | \`ish study results <id> --transcript <participant_id> --json\` |
|
|
1005
|
+
| Pair-mode conversation transcripts | \`study participant --json\` per participant | \`ish iteration get <iter-id> --json \\| jq '.conversations[]'\` |
|
|
1006
|
+
| Participant headline only (no action timeline) | \`study participant --json\` + jq | \`ish study participant <id> --summary --json\` |
|
|
963
1007
|
| Variant pick id on an ask response | \`ask results --json --verbose\` | \`ish ask results a-… --json\` (variant_pick_id is preserved) |
|
|
964
1008
|
|
|
965
1009
|
The bias here is intentional: \`ish\` ships shapes designed for agent
|
|
@@ -986,14 +1030,17 @@ ish <command> --help
|
|
|
986
1030
|
| \`study\` | Persistent research artifact | concepts/study |
|
|
987
1031
|
| \`iteration\` | One configured run of a study (URL or media) | concepts/iteration |
|
|
988
1032
|
| \`ask\` | Lightweight reaction artifact | concepts/ask |
|
|
989
|
-
| \`
|
|
990
|
-
| \`source\` | Upload sources for
|
|
1033
|
+
| \`person\` | People, people generation, and the \`suggest-scenarios\` + \`evidence add\`/\`list\` probe loop for crafting one specific persona | concepts/person |
|
|
1034
|
+
| \`source\` | Upload sources for person generation | concepts/source |
|
|
991
1035
|
| \`config\` | Simulation configs (model, timing, retries) | (run \`ish config --help\`) |
|
|
992
|
-
| \`chat\` | Chat endpoint CRUD + smoke test (external_chatbot mode); pair-mode iterations created via \`iteration create --chat-mode
|
|
1036
|
+
| \`chat\` | Chat endpoint CRUD + smoke test (external_chatbot mode); pair-mode iterations created via \`iteration create --chat-mode participant_pair\` | guides/chat |
|
|
993
1037
|
| \`secret\` | Per-workspace secrets (\`{{secret:KEY}}\` resolver) | concepts/secret |
|
|
994
1038
|
| \`docs\` | Offline docs for agents | (run \`ish docs --help\`) |
|
|
995
1039
|
| \`init\` | Drop this skill into a Claude Code / Codex / | (run \`ish init --help\`) |
|
|
996
1040
|
| | Cursor / Cline / Roo project | |
|
|
1041
|
+
| \`mcp\` | Wire the hosted ish MCP server into local AI | guides/mcp-add |
|
|
1042
|
+
| | clients (Cursor, VS Code, Claude Code, | |
|
|
1043
|
+
| | Claude Desktop, Windsurf). Idempotent. | |
|
|
997
1044
|
| \`login\` | Browser-based auth | — |
|
|
998
1045
|
| \`logout\` | Clear saved credentials | — |
|
|
999
1046
|
| \`status\` | Show active session (user, workspace, | concepts/active-context |
|
|
@@ -1016,8 +1063,8 @@ the right \`ish docs get-page <slug>\` to read deep context.
|
|
|
1016
1063
|
|
|
1017
1064
|
## Aliases
|
|
1018
1065
|
|
|
1019
|
-
Short prefixed IDs (e.g. \`s-b2c\`, \`
|
|
1020
|
-
\`t-a17\`, \`
|
|
1066
|
+
Short prefixed IDs (e.g. \`s-b2c\`, \`p-795\`, \`a-6ec\`, \`i-d4e\`,
|
|
1067
|
+
\`t-a17\`, \`ps-3a4\`, \`w-6ec\`, \`c-c3c\`) are accepted anywhere a UUID
|
|
1021
1068
|
is expected. Full UUIDs always work too. See
|
|
1022
1069
|
\`ish docs get-page reference/aliases\`.
|
|
1023
1070
|
|