@ishlabs/cli 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,11 +24,13 @@ const VERSION = pkg.version;
24
24
  * "ish". Hard cap is 1024 chars. Front-load the use case.
25
25
  */
26
26
  const SKILL_DESCRIPTION = "Use this skill whenever the user mentions ish, a study, a tester profile, " +
27
- "a simulation run, an \"ask\", an audience, or wants to dispatch tests against AI testers. " +
28
- "Wraps the `ish` CLI for managing studies, asks, iterations, tester profiles, and simulation " +
29
- "runs against the Ish platform. Always start by running `ish docs overview` to load the " +
30
- "domain model, then `ish docs list` and `ish docs get-page <slug>` for specifics. Prefer " +
31
- "this skill over guessing flags from `ish --help`.";
27
+ "a simulation run, an \"ask\", an audience, wants to dispatch tests against AI testers, " +
28
+ "or wants to rehearse a conversation between two AI personas (e.g. sales rep vs. " +
29
+ "skeptical buyer, founder vs. investor archetype). Wraps the `ish` CLI for managing " +
30
+ "studies, asks, iterations, tester profiles, chatbot endpoints, and simulation runs " +
31
+ "against the Ish platform. Always start by running `ish docs overview` to load the " +
32
+ "domain model, then `ish docs list` and `ish docs get-page <slug>` for specifics. " +
33
+ "Prefer this skill over guessing flags from `ish --help`.";
32
34
  const SKILL_BODY = `# ish
33
35
 
34
36
  A CLI for the Ish platform — run user-research studies and quick "ask"
@@ -78,6 +80,8 @@ Workspace (= product)
78
80
  │ └── Sources (tps-…) transcripts/audio/images that seed generation
79
81
  ├── Study (s-…) persistent research artifact
80
82
  │ ├── modality interactive | text | video | audio | image | document | chat
83
+ │ │ chat has two modes: external_chatbot (probe a customer bot)
84
+ │ │ and tester_pair (two AI personas converse — rehearsal)
81
85
  │ ├── assignments tasks the tester does
82
86
  │ ├── questionnaire questions the tester answers
83
87
  │ └── Iterations (i-…) one configured run; carries the URL or media
@@ -93,6 +97,16 @@ Two run verbs:
93
97
  Use **study** when the tester must *do* something on a real surface;
94
98
  use **ask** for quick reactions to text/image variants.
95
99
 
100
+ **Cold-start caveat — "create a fresh workspace" is conditional on
101
+ quota headroom.** \`workspace_create\` returns
102
+ \`error_code: usage_limit_reached\` the instant the account is at
103
+ \`maxProducts\` (FREE caps at 1). Always inspect with \`workspace_get\`
104
+ first and check the \`has_headroom\` flag per row, or use
105
+ \`ish workspace create --name <name> --ensure\` — idempotent: returns
106
+ the existing workspace by name when one exists, otherwise creates. See
107
+ \`ish docs get-page guides/cold-start\` before producing a
108
+ workspace_create call on a session you haven't already probed.
109
+
96
110
  ## High-frequency commands
97
111
 
98
112
  \`\`\`bash
@@ -106,6 +120,11 @@ ish workspace use w-6ec
106
120
  ish study use s-b2c
107
121
  ish ask use a-6ec
108
122
 
123
+ # Idempotent workspace create — returns existing if name matches.
124
+ # Use this on cold-start instead of a blind workspace_create that may
125
+ # hit usage_limit_reached. See \`ish docs get-page guides/cold-start\`.
126
+ ish workspace create --name "Acme — onboarding" --ensure
127
+
109
128
  # Inspect
110
129
  ish workspace list
111
130
  ish study list
@@ -127,13 +146,31 @@ ish iteration create --url https://example.com # auto-uploads local files
127
146
 
128
147
  ish profile generate --description "..." --count 5
129
148
 
130
- # Chat modality (talk to a customer chatbot). Audience size lives on
131
- # study run; study create defines the persistent shape only.
149
+ # Chat modality (external_chatbot — talk to a customer chatbot).
150
+ # Audience size lives on study run; study create defines the persistent shape only.
132
151
  ish chat endpoint init --from-curl ./bot.curl --name my-bot
133
152
  ish chat endpoint test my-bot -m "Hello"
134
153
  ish study create --modality chat --endpoint my-bot --assignment "Sign up:Try to sign up"
135
154
  # (then) ish study run --sample 5 --wait
136
155
 
156
+ # Chat modality (tester_pair — rehearse a conversation between two AI personas).
157
+ # Audiences are pinned to the iteration; study run refuses run-time audience
158
+ # overrides. Each side accepts EITHER explicit profiles OR a role-criteria
159
+ # filter (or both — criteria validates the explicit list).
160
+ ish study create --modality chat --chat-mode tester_pair --name "Pitch rehearsal" \\
161
+ --audience-a tp-sales-1,tp-sales-2 --audience-b tp-cto-skeptic-1,tp-cto-skeptic-2 \\
162
+ --scenario-a @./sales_rep.md --scenario-b @./skeptical_cto.md \\
163
+ --assignment "Pitch:Try to win the meeting"
164
+ # (then) ish study run -y
165
+
166
+ # Criteria-driven variant — backend resolves the eligible pool per side.
167
+ # Persona-first: the persona is sacred, criteria filter who plays the role.
168
+ ish study create --modality chat --chat-mode tester_pair --name "Pitch rehearsal" \\
169
+ --role-criteria-a '{"occupation":["sales"],"min_age":28}' \\
170
+ --role-criteria-b '{"occupation":["cto","vp engineering"],"country":["US","SE"]}' \\
171
+ --scenario-a @./sales_rep.md --scenario-b @./skeptical_cto.md \\
172
+ --assignment "Pitch:Try to land a pilot"
173
+
137
174
  # Run
138
175
  ish study run --sample 5 --country SE --wait
139
176
  ish ask run --new --name "..." --prompt "..." --variant text:"A" --variant text:"B" --sample 30 --wants-pick --wait
@@ -237,6 +274,14 @@ implies \`--quiet\` so the bare value is the only thing on stdout.
237
274
  - **List responses are a six-key envelope:** \`{items, total, returned,
238
275
  limit, offset, has_more}\`. Use \`has_more\` to detect truncation;
239
276
  don't count items yourself.
277
+ - **\`study\` JSON includes a \`url\` field.** \`study create / generate /
278
+ get / list / run\` each return a top-level \`url\` (per item on
279
+ \`list\`) pointing to the study in the web app — \`overview\` for
280
+ read/write commands, \`timeline\` for \`study run\`. Surface it to
281
+ the user instead of composing \`<host>/<workspace>/<study>/...\`
282
+ yourself. Host follows the active backend (\`app.ishlabs.io\` on
283
+ production, \`localhost:3000\` under \`--dev\`); override with the
284
+ \`ISH_APP_URL\` env var.
240
285
  - **Use \`runtime_status\`, not \`status\`, on study responses.** Values:
241
286
  \`draft | running | completed | completed_with_errors | cancelled\`.
242
287
  Derived from iteration testers' actual state — never reports
@@ -329,6 +374,33 @@ implies \`--quiet\` so the bare value is the only thing on stdout.
329
374
  are accepted anywhere a UUID is. See
330
375
  \`ish docs get-page reference/aliases\`.
331
376
 
377
+ ## Credits & cost preview
378
+
379
+ Every dispatched run costs **credits**. The CLI surfaces an upper-bound
380
+ estimate *before* you dispatch so you can budget:
381
+
382
+ - **Human output** — \`study run\` shows a \`Scale:\` + \`Credits (est):\`
383
+ line in the confirmation block (skipped under \`--yes\` or \`--json\`).
384
+ - **JSON output** — \`study run --json\` includes a \`credit_estimate\`
385
+ field. For tester-pair chat it nests under \`pair_preview\`; for
386
+ solo/media runs it's top-level. Shape:
387
+ \`{ upper_bound: number, formula: "media_per_tester" | "chat_solo" |
388
+ "chat_pair" | "ask_per_response", breakdown: string, unit: "credits" }\`.
389
+ - **\`formula\` is stable** — agents can branch on it.
390
+
391
+ Today every modality uses \`max(1, round(N / 10))\` per principal
392
+ (per tester for media/interactive, per side per conversation for chat,
393
+ ×2 for tester-pair). Asks bill flat **1 credit per successful response**.
394
+ Insights cost **10 credits flat** (first per-study is free).
395
+
396
+ If you exceed the available budget at dispatch time, the backend rejects
397
+ with HTTP 402 / \`error_code: "insufficient_credits"\`. The envelope
398
+ carries \`required\`, \`available\`, \`upgrade_url\`. Don't retry — surface
399
+ the upgrade link.
400
+
401
+ The full table (per-modality rates, tier allotments, error envelope)
402
+ lives in \`ish docs get-page reference/credits\`.
403
+
332
404
  ## Common pitfalls (don't do these)
333
405
 
334
406
  1. **Don't paste flags from memory.** The CLI evolves; flags change.
@@ -363,12 +435,70 @@ implies \`--quiet\` so the bare value is the only thing on stdout.
363
435
  See \`ish docs get-page concepts/site-access\`.
364
436
  7. **Don't commit \`~/.ish/config.json\`** — it stores tokens and active
365
437
  workspace/study/ask selections. It lives in \`$HOME\`, not the repo.
366
- 8. **Don't retry \`usage_limit_reached\` errors.** Tier caps
367
- (\`maxProducts\`, \`maxStudiesPerProduct\`, \`maxIterationsPerStudy\`,
368
- \`maxCustomTesterProfiles\`) are enforced server-side. The error body
369
- carries \`tier\`, \`limit\`, \`current\`, \`max\`, \`upgrade_url\` show
370
- the upgrade link or delete an existing resource to free headroom.
371
- See \`ish docs get-page reference/billing-limits\` for the table.
438
+ 8. **Don't pass run-time audience flags to a tester_pair chat iteration.**
439
+ Pair iterations carry their own audiences (\`audience_a\` /
440
+ \`audience_b\` inside \`details.mode_details\`); \`ish study run\`
441
+ refuses \`--profile\` / \`--sample\` / \`--all\` / demographic filters
442
+ on them. To change audiences, update the iteration via
443
+ \`ish iteration update <id> --details-json '{...}'\`. When both sides
444
+ ship explicit \`--audience-a\` / \`--audience-b\` lists, lengths must
445
+ match (1:1 by index) — or use \`--role-criteria-a/-b\` and let the
446
+ backend resolve a pool.
447
+ 9. **Don't cram demographic constraints into \`scenario_a/_b\` text.**
448
+ Demographics (occupation, age, country, gender) belong in
449
+ \`--role-criteria-a/-b\` so the persona stays sacred — filtering
450
+ happens upstream of the prompt. Scenario text is for voice, goal,
451
+ and knowledge of the role, not for who plays it. Mixing the two
452
+ breaks the asymmetry contract and produces incoherent characters.
453
+ 10. **Don't retry \`usage_limit_reached\` errors.** Tier caps
454
+ (\`maxProducts\`, \`maxStudiesPerProduct\`, \`maxIterationsPerStudy\`,
455
+ \`maxCustomTesterProfiles\`) are enforced server-side. The error body
456
+ carries \`tier\`, \`limit\`, \`current\`, \`max\`, \`upgrade_url\` — show
457
+ the upgrade link or delete an existing resource to free headroom.
458
+ See \`ish docs get-page reference/billing-limits\` for the table.
459
+ 11. **Don't retry \`insufficient_credits\` errors either.** HTTP 402,
460
+ non-retryable. Read the \`credit_estimate\` field on \`study run --json\`
461
+ *before* dispatching to know what you'll spend; if the error fires
462
+ after, surface \`required\` / \`available\` / \`upgrade_url\` to the
463
+ human. See \`ish docs get-page reference/credits\`.
464
+ 12. **Don't dispatch interactive/media runs without thinking about
465
+ \`--max-interactions\`.** \`ish study run\` defaults to a 20-step
466
+ cap (flag > iteration's stored value > 20), which is the right
467
+ answer for most onboarding/landing-page probes. Raise it
468
+ (\`--max-interactions 50\`) when testers genuinely need to roam
469
+ further; lower it (\`--max-interactions 5\`) for a smoke probe
470
+ against a surface you suspect is broken — a stuck tester on a
471
+ non-responsive page will otherwise burn the full cap before the
472
+ SDK gives up. The confirmation block prints the resolved value
473
+ and where it came from. Credits debit per
474
+ \`max(1, round(steps/10))\` per tester; see
475
+ \`ish docs get-page reference/credits\`.
476
+ 13. **Don't call \`workspace_create\` blind on a cold start.** On a
477
+ saturated account it returns \`error_code: usage_limit_reached\`
478
+ immediately — the dogfood account hits this on the first call.
479
+ Always call \`workspace_get\` (or \`ish workspace list --json\`)
480
+ first and inspect \`has_headroom\` per row; if any existing
481
+ workspace fits the work, use it via \`ish workspace use <id>\`.
482
+ To programmatically reuse-or-create idempotently, prefer
483
+ \`ish workspace create --name <name> --ensure\` — returns the existing
484
+ workspace owned by the caller when the name matches, otherwise
485
+ creates a fresh one. Same response shape either way, so the
486
+ agent doesn't branch on success vs. reuse. See
487
+ \`ish docs get-page guides/cold-start\`.
488
+ 14. **Don't trust \`occupation\` filters as whole-token matches.**
489
+ \`audience_build\` treats \`occupation\` as a **loose,
490
+ case-insensitive substring** — \`occupation=["manager"]\` matches
491
+ hotel managers, retail managers, bank branch managers, not just
492
+ the engineering managers you probably wanted. Two recovery
493
+ paths: enumerate the role surface explicitly
494
+ (\`occupation=["engineering manager", "software engineering
495
+ manager", "vp engineering", "tech lead"]\`) or read
496
+ \`match_preview\` on the \`audience_build\` response and iterate
497
+ on the filter before \`ask_run\` / \`study_run\`. The public
498
+ profile pool skews non-tech / non-Western, so even a precise
499
+ filter may resolve to a small count — preview before dispatching
500
+ a run that depends on reaching N matches. See
501
+ \`ish docs get-page concepts/audience\`.
372
502
 
373
503
  ## Authentication
374
504
 
@@ -547,6 +677,21 @@ ish iteration create --url "$URL"
547
677
 
548
678
  ## 7. Chat-modality study (drive a chatbot endpoint)
549
679
 
680
+ The chat modality has **two modes**, picked by
681
+ \`iteration.details.mode_details.mode\`:
682
+
683
+ - **\`external_chatbot\`** — testers probe a customer chatbot endpoint
684
+ (the original chat behaviour). Audience size is set on \`study run\`.
685
+ - **\`tester_pair\`** — two AI tester audiences converse with each
686
+ other. Each side has its own scenario + goal; the other side does
687
+ not see it (asymmetry contract). Audiences are pinned to the
688
+ iteration: equal counts zip 1:1 by index, or one side of 1
689
+ broadcasts across the other (1 × N → N conversations). Useful for rehearsing
690
+ a sales call, a fundraising chat, a difficult conversation, or any
691
+ two-role scenario before it happens. See section 7b below.
692
+
693
+ ### 7a. external_chatbot — drive a customer chatbot endpoint
694
+
550
695
  Goal: configure a customer chatbot endpoint, smoke test it, and run
551
696
  a chat-modality study end to end. The CLI talks to the endpoint
552
697
  through whatever transport it's configured for (sync / async-poll /
@@ -637,6 +782,215 @@ you can branch on plan caps before \`study create\` returns
637
782
  The full reference is at \`ish docs get-page guides/chat\`,
638
783
  secrets are at \`ish docs get-page concepts/secret\`.
639
784
 
785
+ ### 7b. tester_pair — rehearse a two-AI conversation
786
+
787
+ Goal: pit two AI tester audiences against each other to see how a
788
+ two-role conversation unfolds — a sales rep vs. a skeptical CTO, a
789
+ founder vs. an investor archetype, a manager vs. a direct report
790
+ ahead of a difficult conversation. Each side has its own scenario
791
+ and goal; the other side does NOT see it (the asymmetry contract is
792
+ what makes the rehearsal credible).
793
+
794
+ One-shot study + iteration:
795
+
796
+ \`\`\`bash
797
+ ish study create --modality chat --chat-mode tester_pair \\
798
+ --name "Pitch rehearsal" \\
799
+ --audience-a tp-sales-1,tp-sales-2 \\
800
+ --audience-b tp-cto-skeptic-1,tp-cto-skeptic-2 \\
801
+ --scenario-a "You are a senior sales rep pitching ish to a new prospect." \\
802
+ --scenario-b "You are a skeptical CTO; surface risks before agreeing to a pilot." \\
803
+ --assignment "Pitch:Try to land a pilot"
804
+
805
+ ish study run -y
806
+ \`\`\`
807
+
808
+ Or add a pair iteration to an existing chat study:
809
+
810
+ \`\`\`bash
811
+ ish iteration create --study s-... --chat-mode tester_pair \\
812
+ --audience-a tp-a1,tp-a2 --audience-b tp-b1,tp-b2 \\
813
+ --scenario-a @./scenario_a.md --scenario-b @./scenario_b.md \\
814
+ --max-turns 14
815
+ \`\`\`
816
+
817
+ Rules to remember:
818
+ - Each side needs **either** \`--profile-*\` (explicit IDs) **or**
819
+ \`--role-criteria-*\` (a filter the backend resolves). They can also
820
+ be combined — criteria then validates the explicit list.
821
+ - When **both sides** use explicit \`--audience-a\` / \`--audience-b\`, they
822
+ must be the same length (≥ 1). Pairs run 1:1 by index. Same profile
823
+ on both sides is allowed (self-talk rehearsal).
824
+ - **1×N broadcast**: pass exactly one profile on one side and N on
825
+ the other to rehearse one fixed side against N variations. The CLI
826
+ auto-broadcasts the singleton to match. E.g.
827
+ \`--audience-a tp-rep --audience-b tp-cto1,tp-cto2,tp-cto3\` → 3
828
+ conversations, same rep, three different CTOs. Stderr notice fires
829
+ when broadcasting kicks in.
830
+ - Both \`--scenario-a\` and \`--scenario-b\` are required and asymmetric.
831
+ Use \`@./file.md\` to read from disk.
832
+ - \`--initiator-side\` (\`a\` default) picks who speaks first.
833
+ - \`--chat-mode\` accepts both \`tester_pair\` and \`tester-pair\`.
834
+ The same hyphen/underscore tolerance applies to \`--screen-format\`,
835
+ \`--kind\` on \`source upload\`, and the question \`type\` field in
836
+ \`--questionnaire\` / \`--questions\` manifests.
837
+ - Audiences are **authoritative on the iteration**.
838
+ \`ish study run\` refuses \`--profile\` / \`--sample\` / \`--all\` /
839
+ demographic filters on a pair iteration with a clear error. To
840
+ change audiences, update the iteration via
841
+ \`ish iteration update <id> --details-json '{...}'\`.
842
+ - \`--max-turns\` / \`--early-termination\` on \`study run\` override the
843
+ iteration's saved values for that single dispatch (they don't
844
+ persist back to the iteration).
845
+ - Dispatch is per-Conversation (one task per pair). Per-Conversation
846
+ summaries (\`end_reason\`, \`dominant_dynamic\`, \`who_steered\`) land on
847
+ \`iteration.conversations[]\`. Per-tester summaries land on
848
+ \`tester.summary\` as before.
849
+
850
+ ### Filtering audiences with role criteria (persona-first)
851
+
852
+ \`--role-criteria-a\` / \`--role-criteria-b\` accept a JSON object (or
853
+ \`@./file.json\`) describing who's eligible for that side. The
854
+ backend resolves the matching tester-profile pool and persists the
855
+ IDs on the iteration. Keys (all optional):
856
+
857
+ \`\`\`json
858
+ {
859
+ "occupation": ["founder", "ceo"],
860
+ "min_age": 28, "max_age": 55,
861
+ "gender": ["female", "male"],
862
+ "country": ["US", "SE"],
863
+ "education_level_in": ["bachelor", "graduate"],
864
+ "household_in": ["couple_with_kids", "single_parent"],
865
+ "locale_type_in": ["urban", "suburban"],
866
+ "income_level_in": ["middle", "upper_middle", "upper"],
867
+ "employment_status_in": ["employed_full_time", "self_employed"],
868
+ "requires_captions": false,
869
+ "uses_screen_reader": false,
870
+ "prefers_reduced_motion": false,
871
+ "prefers_high_contrast": false,
872
+ "has_any_accessibility_need": false
873
+ }
874
+ \`\`\`
875
+
876
+ The five \`*_in\` arrays accept snake_case spec values verbatim
877
+ (see \`https://ishlabs.io/spec/profile-enums.v1.json\`). The five
878
+ accessibility filters are coarse booleans over each tester's
879
+ \`accessibility_profile\` JSONB.
880
+
881
+ MECE rules for the list filters:
882
+ - \`household_in\`: \`couple_with_kids\` covers couples raising
883
+ children; \`couple_no_kids\` is strictly child-free. \`single\` means
884
+ lives alone with no partner, roommates, parents, or children
885
+ sharing the household.
886
+ - \`employment_status_in\`: pick the tester's primary daytime
887
+ activity. A student who works 15 hrs/week is \`student\`; a retiree
888
+ who freelances is \`retired\`.
889
+
890
+ The **persona-first** principle: the tester's persona is sacred and
891
+ the LLM prompt construction does not change. Criteria filter the
892
+ *eligible pool* upstream so that by the time a tester reaches the
893
+ prompt, their persona is already plausible for the role described
894
+ in \`scenario_*\`. Don't cram demographic constraints into the
895
+ scenario text — that breaks the asymmetry contract and produces
896
+ incoherent characters (a retired farmer suddenly "pitching a
897
+ Series A"). Scenarios describe voice / goal / knowledge; criteria
898
+ pick who plays the role.
899
+
900
+ If the resolved pool is smaller than the requested count for a side,
901
+ \`ish study run\` exits 2 with the backend's pool-too-small error
902
+ intact. Broaden the criteria, generate more profiles
903
+ (\`ish profile generate\`), or fall back to explicit \`--profile-*\`.
904
+
905
+ ### Rehearsing against N variations of one side (1×N)
906
+
907
+ The most common rehearsal shape: fix one side, vary the other.
908
+ "Pitch this once and see how 3 different CTOs respond." Step-by-step:
909
+
910
+ \`\`\`bash
911
+ # 1. Generate N distinct profiles for the varying side (or pick
912
+ # existing ones via \`ish profile list\`).
913
+ ish profile generate \\
914
+ --description "Skeptical CTO at a Series B SaaS startup" \\
915
+ --count 3 --json | jq -r '.items[].alias'
916
+ # → tp-cto1, tp-cto2, tp-cto3
917
+
918
+ # 2. Write the two scenarios as separate files. Each is a system
919
+ # prompt for ONE role; the partner never sees it. Cover voice,
920
+ # knowledge, asymmetry, success criteria. NO demographics in the
921
+ # text — that's --role-criteria-*'s job. See "Writing scenarios
922
+ # that produce signal" below for the template.
923
+ #
924
+ # ./sales_rep.md — the user's pitch + goals
925
+ # ./skeptical_cto.md — CTO's posture + concerns
926
+
927
+ # 3. Create the iteration with ONE profile on the fixed side and
928
+ # N on the varying side. CLI auto-broadcasts the singleton and
929
+ # prints a stderr notice ("Broadcasting --audience-a (1 profile)
930
+ # to length 3…") so you see the expansion.
931
+ ish study create \\
932
+ --modality chat --chat-mode tester_pair \\
933
+ --name "Pitch rehearsal — 3 CTO variants" \\
934
+ --audience-a tp-rep \\
935
+ --audience-b tp-cto1,tp-cto2,tp-cto3 \\
936
+ --scenario-a @./sales_rep.md \\
937
+ --scenario-b @./skeptical_cto.md \\
938
+ --assignment "Pitch:Land a pilot or a clear next step"
939
+
940
+ # 4. Dispatch + wait.
941
+ ish study run -y --wait
942
+
943
+ # 5. Compare per-conversation outcomes:
944
+ ish iteration get <iter-id> --json \\
945
+ | jq '.conversations[] | {pair_index, end_reason,
946
+ dynamic: .summary.dominant_dynamic}'
947
+ \`\`\`
948
+
949
+ The CLI emits a stderr notice when it broadcasts ("Broadcasting
950
+ --audience-a (1 profile) to length 3…") so you can see the
951
+ expansion happen.
952
+
953
+ **Criteria alternative**: \`--role-criteria-b '{"occupation":["cto"]}'\`
954
+ on a single \`--audience-a tp-rep\` lets the backend pick the CTOs.
955
+ Less control over distinctness — for guaranteed variety, generate
956
+ explicit profiles first.
957
+
958
+ ### Writing scenarios that produce signal
959
+
960
+ Thin scenarios produce thin rehearsals. Each scenario is injected as
961
+ role-playing context for **its own side only** — the partner does NOT
962
+ see the other side's scenario or goal. Cover five things in each:
963
+
964
+ 1. **Role / identity** — who is this person?
965
+ 2. **Voice** — how do they speak? Formal, casual, technical, blunt?
966
+ 3. **What they know** — context they came in with.
967
+ 4. **What they don't know** — the asymmetry that makes it interesting.
968
+ 5. **Goal** — what counts as success *for them*.
969
+
970
+ Bad: \`scenario_a: "you are a sales rep"\`. Good (~150 words):
971
+
972
+ \`\`\`
973
+ You are Maya, a senior AE at ish (3 years experience). You speak in
974
+ plain sentences, push back when you disagree, and quantify claims.
975
+ You know this is a 30-min discovery call and you've read the
976
+ prospect's LinkedIn — that's it. You do NOT know their current
977
+ tooling, budget, or politics. Success = leave with a concrete next
978
+ step (pilot, follow-up demo, or a firm "no, because X"). A polite
979
+ "we'll get back to you" is not success.
980
+ \`\`\`
981
+
982
+ Keep each scenario under ~250 words — past that, persona drift
983
+ dominates. Get the full rationale at
984
+ \`ish docs get-page concepts/iteration\` ("Writing a good scenario").
985
+
986
+ Inspect after running:
987
+
988
+ \`\`\`bash
989
+ ish iteration get <iter-id> --json \\
990
+ | jq '.details.mode_details.mode, .conversations[]'
991
+ ish study results <study-id> --transcript <tester-id> --json
992
+ \`\`\`
993
+
640
994
  ## 8. Stage an ask for human review, then dispatch
641
995
 
642
996
  Goal: prepare a billable A/B but let the user inspect and approve the
@@ -721,10 +1075,18 @@ If you find yourself reaching for \`jq -r .x\`, you wanted \`--get x\`.
721
1075
  {alias, status, sentiment, comment}. The cheapest "did this run land?"
722
1076
  shape.
723
1077
  - \`ish study results --transcript <tester_id> --json\` is the
724
- chat-modality projection: a flat \`transcript[]\` of {role, text,
725
- turn_index, action_type?, option_label?, sentiment?, failure?} with a
726
- \`unique_bot_replies\` count (1 on a multi-turn run = the M2 loop
727
- signature). Same shape as the MCP \`get_chat_transcript\` tool.
1078
+ chat-modality projection **external_chatbot mode only**. Returns
1079
+ a flat \`transcript[]\` of {role, text, turn_index, action_type?,
1080
+ option_label?, sentiment?, failure?} with a \`unique_bot_replies\`
1081
+ count (1 on a multi-turn run = the M2 loop signature). Same shape
1082
+ as the MCP \`get_chat_transcript\` tool. For tester_pair
1083
+ conversations, fetch \`.conversations[]\` from
1084
+ \`ish iteration get <iter-id> --json\` instead — bot/tester roles
1085
+ don't apply when both speakers are testers.
1086
+ - \`ish study run --json\` on a pair iteration includes a
1087
+ \`pair_preview\` block (audience sizes, conversation count,
1088
+ initiator side, scenario previews) so agents can confirm what
1089
+ they just dispatched without a follow-up \`iteration get\`.
728
1090
  - \`ish study tester <id> --summary --json\` drops the action timeline
729
1091
  and returns just {tester, sentiment, comment, error_message}.
730
1092
  - \`ish ask results --json\` keeps \`variant_pick_id\` on every
@@ -783,7 +1145,8 @@ If you find yourself reaching for \`jq -r .x\`, you wanted \`--get x\`.
783
1145
  | List of testers from \`study run\` | \`--json \\| jq '.testers[].id'\` | \`--get tester_aliases\` (or \`tester_ids\` for UUIDs) |
784
1146
  | Per-answer sentiment | \`--json \\| jq '...'\` per tester | \`ish study results <id> --json\` (sentiment is on every answer row) |
785
1147
  | "Did this run land?" headline | \`study results --json\` + jq filtering | \`ish study results <id> --summary --json\` |
786
- | Chat transcript for one tester | \`study tester --json\` + jq | \`ish study results <id> --transcript <tester_id> --json\` |
1148
+ | Chat transcript for one tester (external_chatbot) | \`study tester --json\` + jq | \`ish study results <id> --transcript <tester_id> --json\` |
1149
+ | Pair-mode conversation transcripts | \`study tester --json\` per tester | \`ish iteration get <iter-id> --json \\| jq '.conversations[]'\` |
787
1150
  | Tester headline only (no action timeline) | \`study tester --json\` + jq | \`ish study tester <id> --summary --json\` |
788
1151
  | Variant pick id on an ask response | \`ask results --json --verbose\` | \`ish ask results a-… --json\` (variant_pick_id is preserved) |
789
1152
 
@@ -814,7 +1177,7 @@ ish <command> --help
814
1177
  | \`profile\` | Tester profiles + audience generation | concepts/profile |
815
1178
  | \`source\` | Upload sources for profile generation | concepts/source |
816
1179
  | \`config\` | Simulation configs (model, timing, retries) | (run \`ish config --help\`) |
817
- | \`chat\` | Chat endpoint CRUD + smoke test (chat modality) | guides/chat |
1180
+ | \`chat\` | Chat endpoint CRUD + smoke test (external_chatbot mode); pair-mode iterations created via \`iteration create --chat-mode tester_pair\` | guides/chat |
818
1181
  | \`secret\` | Per-workspace secrets (\`{{secret:KEY}}\` resolver) | concepts/secret |
819
1182
  | \`docs\` | Offline docs for agents | (run \`ish docs --help\`) |
820
1183
  | \`init\` | Drop this skill into a Claude Code / Codex / | (run \`ish init --help\`) |
@@ -178,7 +178,12 @@ export interface GeneratedProfile {
178
178
  country?: string | null;
179
179
  city?: string | null;
180
180
  occupation?: string | null;
181
- tech_savviness?: string | null;
181
+ education_level?: string | null;
182
+ household?: string | null;
183
+ locale_type?: string | null;
184
+ income_level?: string | null;
185
+ employment_status?: string | null;
186
+ accessibility_profile?: Record<string, unknown> | null;
182
187
  product_id?: string | null;
183
188
  custom_field_values?: Record<string, unknown>;
184
189
  [key: string]: unknown;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ishlabs/cli",
3
- "version": "0.13.0",
3
+ "version": "0.14.0",
4
4
  "description": "The command-line interface for ish",
5
5
  "type": "module",
6
6
  "bin": {