npm - @ishlabs/cli - Versions diffs - 0.12.2 → 0.14.0 - Mend

@ishlabs/cli 0.12.2 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/dist/commands/chat-config.d.ts +23 -0
package/dist/commands/chat-config.js +289 -0
package/dist/commands/chat.js +26 -37
package/dist/commands/iteration.js +219 -22
package/dist/commands/profile.js +75 -9
package/dist/commands/source.js +6 -4
package/dist/commands/study-analyze.d.ts +41 -0
package/dist/commands/study-analyze.js +187 -0
package/dist/commands/study-run.js +359 -30
package/dist/commands/study-screenshots.d.ts +20 -0
package/dist/commands/study-screenshots.js +216 -0
package/dist/commands/study.js +174 -9
package/dist/commands/workspace.js +35 -2
package/dist/lib/accessibility-profile.d.ts +12 -0
package/dist/lib/accessibility-profile.js +136 -0
package/dist/lib/alias-store.d.ts +1 -0
package/dist/lib/alias-store.js +1 -0
package/dist/lib/ask-questions.js +9 -0
package/dist/lib/billing.d.ts +55 -0
package/dist/lib/billing.js +77 -0
package/dist/lib/command-helpers.d.ts +6 -0
package/dist/lib/command-helpers.js +12 -0
package/dist/lib/docs.js +1181 -38
package/dist/lib/enums.d.ts +54 -0
package/dist/lib/enums.js +100 -0
package/dist/lib/local-sim/actions.d.ts +2 -1
package/dist/lib/local-sim/actions.js +88 -13
package/dist/lib/local-sim/loop.js +49 -19
package/dist/lib/local-sim/tabs.d.ts +27 -0
package/dist/lib/local-sim/tabs.js +157 -0
package/dist/lib/local-sim/types.d.ts +15 -0
package/dist/lib/modality.d.ts +70 -1
package/dist/lib/modality.js +323 -17
package/dist/lib/output.js +61 -4
package/dist/lib/skill-content.js +397 -19
package/dist/lib/types.d.ts +6 -1
package/dist/lib/types.js +1 -1
package/package.json +1 -1

package/dist/lib/skill-content.js CHANGED Viewed

@@ -24,11 +24,13 @@ const VERSION = pkg.version;
  * "ish". Hard cap is 1024 chars. Front-load the use case.
  */
 const SKILL_DESCRIPTION = "Use this skill whenever the user mentions ish, a study, a tester profile, " +
-    "a simulation run, an \"ask\", an audience, or wants to dispatch tests against AI testers. " +
-    "Wraps the `ish` CLI for managing studies, asks, iterations, tester profiles, and simulation " +
-    "runs against the Ish platform. Always start by running `ish docs overview` to load the " +
-    "domain model, then `ish docs list` and `ish docs get-page <slug>` for specifics. Prefer " +
-    "this skill over guessing flags from `ish --help`.";
+    "a simulation run, an \"ask\", an audience, wants to dispatch tests against AI testers, " +
+    "or wants to rehearse a conversation between two AI personas (e.g. sales rep vs. " +
+    "skeptical buyer, founder vs. investor archetype). Wraps the `ish` CLI for managing " +
+    "studies, asks, iterations, tester profiles, chatbot endpoints, and simulation runs " +
+    "against the Ish platform. Always start by running `ish docs overview` to load the " +
+    "domain model, then `ish docs list` and `ish docs get-page <slug>` for specifics. " +
+    "Prefer this skill over guessing flags from `ish --help`.";
 const SKILL_BODY = `# ish
 A CLI for the Ish platform — run user-research studies and quick "ask"
@@ -78,6 +80,8 @@ Workspace (= product)
 │     └── Sources (tps-…)    transcripts/audio/images that seed generation
 ├── Study (s-…)              persistent research artifact
 │     ├── modality           interactive | text | video | audio | image | document | chat
+│     │                       chat has two modes: external_chatbot (probe a customer bot)
+│     │                       and tester_pair (two AI personas converse — rehearsal)
 │     ├── assignments        tasks the tester does
 │     ├── questionnaire      questions the tester answers
 │     └── Iterations (i-…)   one configured run; carries the URL or media
@@ -93,6 +97,16 @@ Two run verbs:
 Use **study** when the tester must *do* something on a real surface;
 use **ask** for quick reactions to text/image variants.
+**Cold-start caveat — "create a fresh workspace" is conditional on
+quota headroom.** \`workspace_create\` returns
+\`error_code: usage_limit_reached\` the instant the account is at
+\`maxProducts\` (FREE caps at 1). Always inspect with \`workspace_get\`
+first and check the \`has_headroom\` flag per row, or use
+\`ish workspace create --name <name> --ensure\` — idempotent: returns
+the existing workspace by name when one exists, otherwise creates. See
+\`ish docs get-page guides/cold-start\` before producing a
+workspace_create call on a session you haven't already probed.
 ## High-frequency commands
 \`\`\`bash
@@ -106,6 +120,11 @@ ish workspace use w-6ec
 ish study use s-b2c
 ish ask use a-6ec
+# Idempotent workspace create — returns existing if name matches.
+# Use this on cold-start instead of a blind workspace_create that may
+# hit usage_limit_reached. See \`ish docs get-page guides/cold-start\`.
+ish workspace create --name "Acme — onboarding" --ensure
 # Inspect
 ish workspace list
 ish study list
@@ -127,13 +146,31 @@ ish iteration create --url https://example.com  # auto-uploads local files
 ish profile generate --description "..." --count 5
-# Chat modality (talk to a customer chatbot). Audience size lives on
-# study run; study create defines the persistent shape only.
+# Chat modality (external_chatbot — talk to a customer chatbot).
+# Audience size lives on study run; study create defines the persistent shape only.
 ish chat endpoint init --from-curl ./bot.curl --name my-bot
 ish chat endpoint test my-bot -m "Hello"
 ish study create --modality chat --endpoint my-bot --assignment "Sign up:Try to sign up"
 # (then) ish study run --sample 5 --wait
+# Chat modality (tester_pair — rehearse a conversation between two AI personas).
+# Audiences are pinned to the iteration; study run refuses run-time audience
+# overrides. Each side accepts EITHER explicit profiles OR a role-criteria
+# filter (or both — criteria validates the explicit list).
+ish study create --modality chat --chat-mode tester_pair --name "Pitch rehearsal" \\
+    --audience-a tp-sales-1,tp-sales-2 --audience-b tp-cto-skeptic-1,tp-cto-skeptic-2 \\
+    --scenario-a @./sales_rep.md --scenario-b @./skeptical_cto.md \\
+    --assignment "Pitch:Try to win the meeting"
+# (then) ish study run -y
+# Criteria-driven variant — backend resolves the eligible pool per side.
+# Persona-first: the persona is sacred, criteria filter who plays the role.
+ish study create --modality chat --chat-mode tester_pair --name "Pitch rehearsal" \\
+    --role-criteria-a '{"occupation":["sales"],"min_age":28}' \\
+    --role-criteria-b '{"occupation":["cto","vp engineering"],"country":["US","SE"]}' \\
+    --scenario-a @./sales_rep.md --scenario-b @./skeptical_cto.md \\
+    --assignment "Pitch:Try to land a pilot"
 # Run
 ish study run --sample 5 --country SE --wait
 ish ask run --new --name "..." --prompt "..." --variant text:"A" --variant text:"B" --sample 30 --wants-pick --wait
@@ -147,6 +184,21 @@ ish ask dispatch a-6ec --wait
 ish study results
 ish ask results a-6ec --round 1
+# AI summary + key insights (any modality with completed testers)
+ish study analyze --wait                                       # trigger + block
+ish study insights                                             # read latest
+# Screenshots (interactive studies — see what testers actually saw)
+ish study screenshots                                          # list, frame-grouped
+ish study screenshots download <study-id> --id <scid> --out shot.png
+ish study screenshots download <study-id> --all --out ./shots/
+# Chat configurations (model + system prompt + tools per chatbot endpoint)
+ish chat config list                                           # active endpoint
+ish chat config set --name v1 --model claude-sonnet-4-6 \\
+    --system-prompt-file ./prompt.txt --default
+ish chat config get cc-abc --view iterations                   # cross-study use
 # Read offline docs
 ish docs overview
 ish docs get-page <slug>
@@ -222,6 +274,14 @@ implies \`--quiet\` so the bare value is the only thing on stdout.
 - **List responses are a six-key envelope:** \`{items, total, returned,
   limit, offset, has_more}\`. Use \`has_more\` to detect truncation;
   don't count items yourself.
+- **\`study\` JSON includes a \`url\` field.** \`study create / generate /
+  get / list / run\` each return a top-level \`url\` (per item on
+  \`list\`) pointing to the study in the web app — \`overview\` for
+  read/write commands, \`timeline\` for \`study run\`. Surface it to
+  the user instead of composing \`<host>/<workspace>/<study>/...\`
+  yourself. Host follows the active backend (\`app.ishlabs.io\` on
+  production, \`localhost:3000\` under \`--dev\`); override with the
+  \`ISH_APP_URL\` env var.
 - **Use \`runtime_status\`, not \`status\`, on study responses.** Values:
   \`draft | running | completed | completed_with_errors | cancelled\`.
   Derived from iteration testers' actual state — never reports
@@ -314,6 +374,33 @@ implies \`--quiet\` so the bare value is the only thing on stdout.
   are accepted anywhere a UUID is. See
   \`ish docs get-page reference/aliases\`.
+## Credits & cost preview
+Every dispatched run costs **credits**. The CLI surfaces an upper-bound
+estimate *before* you dispatch so you can budget:
+- **Human output** — \`study run\` shows a \`Scale:\` + \`Credits (est):\`
+  line in the confirmation block (skipped under \`--yes\` or \`--json\`).
+- **JSON output** — \`study run --json\` includes a \`credit_estimate\`
+  field. For tester-pair chat it nests under \`pair_preview\`; for
+  solo/media runs it's top-level. Shape:
+  \`{ upper_bound: number, formula: "media_per_tester" | "chat_solo" |
+  "chat_pair" | "ask_per_response", breakdown: string, unit: "credits" }\`.
+- **\`formula\` is stable** — agents can branch on it.
+Today every modality uses \`max(1, round(N / 10))\` per principal
+(per tester for media/interactive, per side per conversation for chat,
+×2 for tester-pair). Asks bill flat **1 credit per successful response**.
+Insights cost **10 credits flat** (first per-study is free).
+If you exceed the available budget at dispatch time, the backend rejects
+with HTTP 402 / \`error_code: "insufficient_credits"\`. The envelope
+carries \`required\`, \`available\`, \`upgrade_url\`. Don't retry — surface
+the upgrade link.
+The full table (per-modality rates, tier allotments, error envelope)
+lives in \`ish docs get-page reference/credits\`.
 ## Common pitfalls (don't do these)
 1. **Don't paste flags from memory.** The CLI evolves; flags change.
@@ -348,12 +435,70 @@ implies \`--quiet\` so the bare value is the only thing on stdout.
    See \`ish docs get-page concepts/site-access\`.
 7. **Don't commit \`~/.ish/config.json\`** — it stores tokens and active
    workspace/study/ask selections. It lives in \`$HOME\`, not the repo.
-8. **Don't retry \`usage_limit_reached\` errors.** Tier caps
-   (\`maxProducts\`, \`maxStudiesPerProduct\`, \`maxIterationsPerStudy\`,
-   \`maxCustomTesterProfiles\`) are enforced server-side. The error body
-   carries \`tier\`, \`limit\`, \`current\`, \`max\`, \`upgrade_url\` — show
-   the upgrade link or delete an existing resource to free headroom.
-   See \`ish docs get-page reference/billing-limits\` for the table.
+8. **Don't pass run-time audience flags to a tester_pair chat iteration.**
+   Pair iterations carry their own audiences (\`audience_a\` /
+   \`audience_b\` inside \`details.mode_details\`); \`ish study run\`
+   refuses \`--profile\` / \`--sample\` / \`--all\` / demographic filters
+   on them. To change audiences, update the iteration via
+   \`ish iteration update <id> --details-json '{...}'\`. When both sides
+   ship explicit \`--audience-a\` / \`--audience-b\` lists, lengths must
+   match (1:1 by index) — or use \`--role-criteria-a/-b\` and let the
+   backend resolve a pool.
+9. **Don't cram demographic constraints into \`scenario_a/_b\` text.**
+   Demographics (occupation, age, country, gender) belong in
+   \`--role-criteria-a/-b\` so the persona stays sacred — filtering
+   happens upstream of the prompt. Scenario text is for voice, goal,
+   and knowledge of the role, not for who plays it. Mixing the two
+   breaks the asymmetry contract and produces incoherent characters.
+10. **Don't retry \`usage_limit_reached\` errors.** Tier caps
+    (\`maxProducts\`, \`maxStudiesPerProduct\`, \`maxIterationsPerStudy\`,
+    \`maxCustomTesterProfiles\`) are enforced server-side. The error body
+    carries \`tier\`, \`limit\`, \`current\`, \`max\`, \`upgrade_url\` — show
+    the upgrade link or delete an existing resource to free headroom.
+    See \`ish docs get-page reference/billing-limits\` for the table.
+11. **Don't retry \`insufficient_credits\` errors either.** HTTP 402,
+    non-retryable. Read the \`credit_estimate\` field on \`study run --json\`
+    *before* dispatching to know what you'll spend; if the error fires
+    after, surface \`required\` / \`available\` / \`upgrade_url\` to the
+    human. See \`ish docs get-page reference/credits\`.
+12. **Don't dispatch interactive/media runs without thinking about
+    \`--max-interactions\`.** \`ish study run\` defaults to a 20-step
+    cap (flag > iteration's stored value > 20), which is the right
+    answer for most onboarding/landing-page probes. Raise it
+    (\`--max-interactions 50\`) when testers genuinely need to roam
+    further; lower it (\`--max-interactions 5\`) for a smoke probe
+    against a surface you suspect is broken — a stuck tester on a
+    non-responsive page will otherwise burn the full cap before the
+    SDK gives up. The confirmation block prints the resolved value
+    and where it came from. Credits debit per
+    \`max(1, round(steps/10))\` per tester; see
+    \`ish docs get-page reference/credits\`.
+13. **Don't call \`workspace_create\` blind on a cold start.** On a
+    saturated account it returns \`error_code: usage_limit_reached\`
+    immediately — the dogfood account hits this on the first call.
+    Always call \`workspace_get\` (or \`ish workspace list --json\`)
+    first and inspect \`has_headroom\` per row; if any existing
+    workspace fits the work, use it via \`ish workspace use <id>\`.
+    To programmatically reuse-or-create idempotently, prefer
+    \`ish workspace create --name <name> --ensure\` — returns the existing
+    workspace owned by the caller when the name matches, otherwise
+    creates a fresh one. Same response shape either way, so the
+    agent doesn't branch on success vs. reuse. See
+    \`ish docs get-page guides/cold-start\`.
+14. **Don't trust \`occupation\` filters as whole-token matches.**
+    \`audience_build\` treats \`occupation\` as a **loose,
+    case-insensitive substring** — \`occupation=["manager"]\` matches
+    hotel managers, retail managers, bank branch managers, not just
+    the engineering managers you probably wanted. Two recovery
+    paths: enumerate the role surface explicitly
+    (\`occupation=["engineering manager", "software engineering
+    manager", "vp engineering", "tech lead"]\`) or read
+    \`match_preview\` on the \`audience_build\` response and iterate
+    on the filter before \`ask_run\` / \`study_run\`. The public
+    profile pool skews non-tech / non-Western, so even a precise
+    filter may resolve to a small count — preview before dispatching
+    a run that depends on reaching N matches. See
+    \`ish docs get-page concepts/audience\`.
 ## Authentication
@@ -532,6 +677,21 @@ ish iteration create --url "$URL"
 ## 7. Chat-modality study (drive a chatbot endpoint)
+The chat modality has **two modes**, picked by
+\`iteration.details.mode_details.mode\`:
+- **\`external_chatbot\`** — testers probe a customer chatbot endpoint
+  (the original chat behaviour). Audience size is set on \`study run\`.
+- **\`tester_pair\`** — two AI tester audiences converse with each
+  other. Each side has its own scenario + goal; the other side does
+  not see it (asymmetry contract). Audiences are pinned to the
+  iteration: equal counts zip 1:1 by index, or one side of 1
+  broadcasts across the other (1 × N → N conversations). Useful for rehearsing
+  a sales call, a fundraising chat, a difficult conversation, or any
+  two-role scenario before it happens. See section 7b below.
+### 7a. external_chatbot — drive a customer chatbot endpoint
 Goal: configure a customer chatbot endpoint, smoke test it, and run
 a chat-modality study end to end. The CLI talks to the endpoint
 through whatever transport it's configured for (sync / async-poll /
@@ -622,6 +782,215 @@ you can branch on plan caps before \`study create\` returns
 The full reference is at \`ish docs get-page guides/chat\`,
 secrets are at \`ish docs get-page concepts/secret\`.
+### 7b. tester_pair — rehearse a two-AI conversation
+Goal: pit two AI tester audiences against each other to see how a
+two-role conversation unfolds — a sales rep vs. a skeptical CTO, a
+founder vs. an investor archetype, a manager vs. a direct report
+ahead of a difficult conversation. Each side has its own scenario
+and goal; the other side does NOT see it (the asymmetry contract is
+what makes the rehearsal credible).
+One-shot study + iteration:
+\`\`\`bash
+ish study create --modality chat --chat-mode tester_pair \\
+    --name "Pitch rehearsal" \\
+    --audience-a tp-sales-1,tp-sales-2 \\
+    --audience-b tp-cto-skeptic-1,tp-cto-skeptic-2 \\
+    --scenario-a "You are a senior sales rep pitching ish to a new prospect." \\
+    --scenario-b "You are a skeptical CTO; surface risks before agreeing to a pilot." \\
+    --assignment "Pitch:Try to land a pilot"
+ish study run -y
+\`\`\`
+Or add a pair iteration to an existing chat study:
+\`\`\`bash
+ish iteration create --study s-... --chat-mode tester_pair \\
+    --audience-a tp-a1,tp-a2 --audience-b tp-b1,tp-b2 \\
+    --scenario-a @./scenario_a.md --scenario-b @./scenario_b.md \\
+    --max-turns 14
+\`\`\`
+Rules to remember:
+- Each side needs **either** \`--profile-*\` (explicit IDs) **or**
+  \`--role-criteria-*\` (a filter the backend resolves). They can also
+  be combined — criteria then validates the explicit list.
+- When **both sides** use explicit \`--audience-a\` / \`--audience-b\`, they
+  must be the same length (≥ 1). Pairs run 1:1 by index. Same profile
+  on both sides is allowed (self-talk rehearsal).
+- **1×N broadcast**: pass exactly one profile on one side and N on
+  the other to rehearse one fixed side against N variations. The CLI
+  auto-broadcasts the singleton to match. E.g.
+  \`--audience-a tp-rep --audience-b tp-cto1,tp-cto2,tp-cto3\` → 3
+  conversations, same rep, three different CTOs. Stderr notice fires
+  when broadcasting kicks in.
+- Both \`--scenario-a\` and \`--scenario-b\` are required and asymmetric.
+  Use \`@./file.md\` to read from disk.
+- \`--initiator-side\` (\`a\` default) picks who speaks first.
+- \`--chat-mode\` accepts both \`tester_pair\` and \`tester-pair\`.
+  The same hyphen/underscore tolerance applies to \`--screen-format\`,
+  \`--kind\` on \`source upload\`, and the question \`type\` field in
+  \`--questionnaire\` / \`--questions\` manifests.
+- Audiences are **authoritative on the iteration**.
+  \`ish study run\` refuses \`--profile\` / \`--sample\` / \`--all\` /
+  demographic filters on a pair iteration with a clear error. To
+  change audiences, update the iteration via
+  \`ish iteration update <id> --details-json '{...}'\`.
+- \`--max-turns\` / \`--early-termination\` on \`study run\` override the
+  iteration's saved values for that single dispatch (they don't
+  persist back to the iteration).
+- Dispatch is per-Conversation (one task per pair). Per-Conversation
+  summaries (\`end_reason\`, \`dominant_dynamic\`, \`who_steered\`) land on
+  \`iteration.conversations[]\`. Per-tester summaries land on
+  \`tester.summary\` as before.
+### Filtering audiences with role criteria (persona-first)
+\`--role-criteria-a\` / \`--role-criteria-b\` accept a JSON object (or
+\`@./file.json\`) describing who's eligible for that side. The
+backend resolves the matching tester-profile pool and persists the
+IDs on the iteration. Keys (all optional):
+\`\`\`json
+{
+  "occupation": ["founder", "ceo"],
+  "min_age": 28, "max_age": 55,
+  "gender": ["female", "male"],
+  "country": ["US", "SE"],
+  "education_level_in": ["bachelor", "graduate"],
+  "household_in": ["couple_with_kids", "single_parent"],
+  "locale_type_in": ["urban", "suburban"],
+  "income_level_in": ["middle", "upper_middle", "upper"],
+  "employment_status_in": ["employed_full_time", "self_employed"],
+  "requires_captions": false,
+  "uses_screen_reader": false,
+  "prefers_reduced_motion": false,
+  "prefers_high_contrast": false,
+  "has_any_accessibility_need": false
+}
+\`\`\`
+The five \`*_in\` arrays accept snake_case spec values verbatim
+(see \`https://ishlabs.io/spec/profile-enums.v1.json\`). The five
+accessibility filters are coarse booleans over each tester's
+\`accessibility_profile\` JSONB.
+MECE rules for the list filters:
+- \`household_in\`: \`couple_with_kids\` covers couples raising
+  children; \`couple_no_kids\` is strictly child-free. \`single\` means
+  lives alone with no partner, roommates, parents, or children
+  sharing the household.
+- \`employment_status_in\`: pick the tester's primary daytime
+  activity. A student who works 15 hrs/week is \`student\`; a retiree
+  who freelances is \`retired\`.
+The **persona-first** principle: the tester's persona is sacred and
+the LLM prompt construction does not change. Criteria filter the
+*eligible pool* upstream so that by the time a tester reaches the
+prompt, their persona is already plausible for the role described
+in \`scenario_*\`. Don't cram demographic constraints into the
+scenario text — that breaks the asymmetry contract and produces
+incoherent characters (a retired farmer suddenly "pitching a
+Series A"). Scenarios describe voice / goal / knowledge; criteria
+pick who plays the role.
+If the resolved pool is smaller than the requested count for a side,
+\`ish study run\` exits 2 with the backend's pool-too-small error
+intact. Broaden the criteria, generate more profiles
+(\`ish profile generate\`), or fall back to explicit \`--profile-*\`.
+### Rehearsing against N variations of one side (1×N)
+The most common rehearsal shape: fix one side, vary the other.
+"Pitch this once and see how 3 different CTOs respond." Step-by-step:
+\`\`\`bash
+# 1. Generate N distinct profiles for the varying side (or pick
+#    existing ones via \`ish profile list\`).
+ish profile generate \\
+    --description "Skeptical CTO at a Series B SaaS startup" \\
+    --count 3 --json | jq -r '.items[].alias'
+# → tp-cto1, tp-cto2, tp-cto3
+# 2. Write the two scenarios as separate files. Each is a system
+#    prompt for ONE role; the partner never sees it. Cover voice,
+#    knowledge, asymmetry, success criteria. NO demographics in the
+#    text — that's --role-criteria-*'s job. See "Writing scenarios
+#    that produce signal" below for the template.
+#
+#    ./sales_rep.md       — the user's pitch + goals
+#    ./skeptical_cto.md   — CTO's posture + concerns
+# 3. Create the iteration with ONE profile on the fixed side and
+#    N on the varying side. CLI auto-broadcasts the singleton and
+#    prints a stderr notice ("Broadcasting --audience-a (1 profile)
+#    to length 3…") so you see the expansion.
+ish study create \\
+    --modality chat --chat-mode tester_pair \\
+    --name "Pitch rehearsal — 3 CTO variants" \\
+    --audience-a tp-rep \\
+    --audience-b tp-cto1,tp-cto2,tp-cto3 \\
+    --scenario-a @./sales_rep.md \\
+    --scenario-b @./skeptical_cto.md \\
+    --assignment "Pitch:Land a pilot or a clear next step"
+# 4. Dispatch + wait.
+ish study run -y --wait
+# 5. Compare per-conversation outcomes:
+ish iteration get <iter-id> --json \\
+    | jq '.conversations[] | {pair_index, end_reason,
+          dynamic: .summary.dominant_dynamic}'
+\`\`\`
+The CLI emits a stderr notice when it broadcasts ("Broadcasting
+--audience-a (1 profile) to length 3…") so you can see the
+expansion happen.
+**Criteria alternative**: \`--role-criteria-b '{"occupation":["cto"]}'\`
+on a single \`--audience-a tp-rep\` lets the backend pick the CTOs.
+Less control over distinctness — for guaranteed variety, generate
+explicit profiles first.
+### Writing scenarios that produce signal
+Thin scenarios produce thin rehearsals. Each scenario is injected as
+role-playing context for **its own side only** — the partner does NOT
+see the other side's scenario or goal. Cover five things in each:
+1. **Role / identity** — who is this person?
+2. **Voice** — how do they speak? Formal, casual, technical, blunt?
+3. **What they know** — context they came in with.
+4. **What they don't know** — the asymmetry that makes it interesting.
+5. **Goal** — what counts as success *for them*.
+Bad: \`scenario_a: "you are a sales rep"\`. Good (~150 words):
+\`\`\`
+You are Maya, a senior AE at ish (3 years experience). You speak in
+plain sentences, push back when you disagree, and quantify claims.
+You know this is a 30-min discovery call and you've read the
+prospect's LinkedIn — that's it. You do NOT know their current
+tooling, budget, or politics. Success = leave with a concrete next
+step (pilot, follow-up demo, or a firm "no, because X"). A polite
+"we'll get back to you" is not success.
+\`\`\`
+Keep each scenario under ~250 words — past that, persona drift
+dominates. Get the full rationale at
+\`ish docs get-page concepts/iteration\` ("Writing a good scenario").
+Inspect after running:
+\`\`\`bash
+ish iteration get <iter-id> --json \\
+    | jq '.details.mode_details.mode, .conversations[]'
+ish study results <study-id> --transcript <tester-id> --json
+\`\`\`
 ## 8. Stage an ask for human review, then dispatch
 Goal: prepare a billable A/B but let the user inspect and approve the
@@ -706,10 +1075,18 @@ If you find yourself reaching for \`jq -r .x\`, you wanted \`--get x\`.
   {alias, status, sentiment, comment}. The cheapest "did this run land?"
   shape.
 - \`ish study results --transcript <tester_id> --json\` is the
-  chat-modality projection: a flat \`transcript[]\` of {role, text,
-  turn_index, action_type?, option_label?, sentiment?, failure?} with a
-  \`unique_bot_replies\` count (1 on a multi-turn run = the M2 loop
-  signature). Same shape as the MCP \`get_chat_transcript\` tool.
+  chat-modality projection — **external_chatbot mode only**. Returns
+  a flat \`transcript[]\` of {role, text, turn_index, action_type?,
+  option_label?, sentiment?, failure?} with a \`unique_bot_replies\`
+  count (1 on a multi-turn run = the M2 loop signature). Same shape
+  as the MCP \`get_chat_transcript\` tool. For tester_pair
+  conversations, fetch \`.conversations[]\` from
+  \`ish iteration get <iter-id> --json\` instead — bot/tester roles
+  don't apply when both speakers are testers.
+- \`ish study run --json\` on a pair iteration includes a
+  \`pair_preview\` block (audience sizes, conversation count,
+  initiator side, scenario previews) so agents can confirm what
+  they just dispatched without a follow-up \`iteration get\`.
 - \`ish study tester <id> --summary --json\` drops the action timeline
   and returns just {tester, sentiment, comment, error_message}.
 - \`ish ask results --json\` keeps \`variant_pick_id\` on every
@@ -768,7 +1145,8 @@ If you find yourself reaching for \`jq -r .x\`, you wanted \`--get x\`.
 | List of testers from \`study run\`        | \`--json \\| jq '.testers[].id'\`        | \`--get tester_aliases\` (or \`tester_ids\` for UUIDs)                |
 | Per-answer sentiment                      | \`--json \\| jq '...'\` per tester       | \`ish study results <id> --json\` (sentiment is on every answer row) |
 | "Did this run land?" headline             | \`study results --json\` + jq filtering | \`ish study results <id> --summary --json\`                          |
-| Chat transcript for one tester            | \`study tester --json\` + jq            | \`ish study results <id> --transcript <tester_id> --json\`           |
+| Chat transcript for one tester (external_chatbot) | \`study tester --json\` + jq      | \`ish study results <id> --transcript <tester_id> --json\`           |
+| Pair-mode conversation transcripts        | \`study tester --json\` per tester       | \`ish iteration get <iter-id> --json \\| jq '.conversations[]'\`     |
 | Tester headline only (no action timeline) | \`study tester --json\` + jq            | \`ish study tester <id> --summary --json\`                           |
 | Variant pick id on an ask response        | \`ask results --json --verbose\`        | \`ish ask results a-… --json\` (variant_pick_id is preserved)        |
@@ -799,7 +1177,7 @@ ish <command> --help
 | \`profile\`   | Tester profiles + audience generation           | concepts/profile            |
 | \`source\`    | Upload sources for profile generation           | concepts/source             |
 | \`config\`    | Simulation configs (model, timing, retries)     | (run \`ish config --help\`)   |
-| \`chat\`      | Chat endpoint CRUD + smoke test (chat modality) | guides/chat                 |
+| \`chat\`      | Chat endpoint CRUD + smoke test (external_chatbot mode); pair-mode iterations created via \`iteration create --chat-mode tester_pair\` | guides/chat                 |
 | \`secret\`    | Per-workspace secrets (\`{{secret:KEY}}\` resolver) | concepts/secret           |
 | \`docs\`      | Offline docs for agents                         | (run \`ish docs --help\`)     |
 | \`init\`      | Drop this skill into a Claude Code / Codex /    | (run \`ish init --help\`)     |

package/dist/lib/types.d.ts CHANGED Viewed

@@ -178,7 +178,12 @@ export interface GeneratedProfile {
     country?: string | null;
     city?: string | null;
     occupation?: string | null;
-    tech_savviness?: string | null;
+    education_level?: string | null;
+    household?: string | null;
+    locale_type?: string | null;
+    income_level?: string | null;
+    employment_status?: string | null;
+    accessibility_profile?: Record<string, unknown> | null;
     product_id?: string | null;
     custom_field_values?: Record<string, unknown>;
     [key: string]: unknown;

package/dist/lib/types.js CHANGED Viewed

@@ -7,7 +7,7 @@ export const VALID_CONTENT_TYPES = {
     text: ["narrative", "informational", "commercial", "editorial", "reference", "email", "news"],
     video: ["tutorial", "documentary", "entertainment", "review", "lifestyle", "news", "social_post", "ad"],
     audio: ["music", "narration", "conversation", "speech", "soundscape", "news", "ad"],
-    image: ["product", "photography", "infographic", "artwork", "interface", "social_post", "ad"],
+    image: ["product", "photography", "infographic", "artwork", "interface", "visual_assets", "social_post", "ad"],
     document: ["deck", "presentation", "report", "brochure", "guide"],
 };
 export const ASK_VARIANT_KINDS = [

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ishlabs/cli",
-  "version": "0.12.2",
+  "version": "0.14.0",
   "description": "The command-line interface for ish",
   "type": "module",
   "bin": {